OxRDF: drops lasso dependency

pull/435/head
Tpt 2 years ago committed by Thomas Tanon
parent 21994d39fd
commit a164b268c2
  1. 32
      Cargo.lock
  2. 2
      lib/oxrdf/Cargo.toml
  3. 165
      lib/oxrdf/src/interning.rs

32
Cargo.lock generated

@ -8,17 +8,6 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "ahash"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
dependencies = [
"getrandom",
"once_cell",
"version_check",
]
[[package]] [[package]]
name = "aho-corasick" name = "aho-corasick"
version = "0.7.20" version = "0.7.20"
@ -597,15 +586,6 @@ version = "1.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7"
[[package]]
name = "hashbrown"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
dependencies = [
"ahash",
]
[[package]] [[package]]
name = "hashbrown" name = "hashbrown"
version = "0.12.3" version = "0.12.3"
@ -689,7 +669,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399"
dependencies = [ dependencies = [
"autocfg", "autocfg",
"hashbrown 0.12.3", "hashbrown",
] ]
[[package]] [[package]]
@ -778,15 +758,6 @@ dependencies = [
"winapi-build", "winapi-build",
] ]
[[package]]
name = "lasso"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aeb7b21a526375c5ca55f1a6dfd4e1fad9fa4edd750f530252a718a44b2608f0"
dependencies = [
"hashbrown 0.11.2",
]
[[package]] [[package]]
name = "lazy_static" name = "lazy_static"
version = "1.4.0" version = "1.4.0"
@ -1053,7 +1024,6 @@ checksum = "bb175ec8981211357b7b379869c2f8d555881c55ea62311428ec0de46d89bd5c"
name = "oxrdf" name = "oxrdf"
version = "0.1.5-dev" version = "0.1.5-dev"
dependencies = [ dependencies = [
"lasso",
"oxilangtag", "oxilangtag",
"oxiri", "oxiri",
"oxsdatatypes", "oxsdatatypes",

@ -21,9 +21,7 @@ rdf-star = []
rand = "0.8" rand = "0.8"
oxilangtag = "0.1" oxilangtag = "0.1"
oxiri = "0.2" oxiri = "0.2"
oxsdatatypes = { version = "0.1.1", path="../oxsdatatypes", optional = true } oxsdatatypes = { version = "0.1.1", path="../oxsdatatypes", optional = true }
lasso = { version = "0.6", features = ["inline-more"] }
[package.metadata.docs.rs] [package.metadata.docs.rs]
all-features = true all-features = true

@ -1,108 +1,175 @@
//! Interning of RDF elements using Rodeo //! Interning of RDF elements using Rodeo
use crate::*; use crate::*;
use lasso::{Key, Rodeo, Spur}; use std::collections::hash_map::{Entry, HashMap, RandomState};
#[cfg(feature = "rdf-star")] use std::hash::{BuildHasher, Hasher};
use std::collections::HashMap;
#[derive(Debug, Default)] #[derive(Debug, Default)]
pub struct Interner { pub struct Interner {
strings: Rodeo, hasher: RandomState,
string_for_hash: HashMap<u64, String, IdentityHasherBuilder>,
#[cfg(feature = "rdf-star")] #[cfg(feature = "rdf-star")]
triples: HashMap<InternedTriple, Triple>, triples: HashMap<InternedTriple, Triple>,
} }
impl Interner {
fn get_or_intern(&mut self, value: &str) -> Key {
let mut hash = self.hash(value);
loop {
match self.string_for_hash.entry(hash) {
Entry::Vacant(e) => {
e.insert(value.into());
return Key(hash);
}
Entry::Occupied(e) => loop {
if e.get() == value {
return Key(hash);
} else if hash == u64::MAX - 1 {
hash = 0;
} else {
hash += 1;
}
},
}
}
}
fn get(&self, value: &str) -> Option<Key> {
let mut hash = self.hash(value);
loop {
let v = self.string_for_hash.get(&hash)?;
if v == value {
return Some(Key(hash));
} else if hash == u64::MAX - 1 {
hash = 0;
} else {
hash += 1;
}
}
}
fn hash(&self, value: &str) -> u64 {
let mut hasher = self.hasher.build_hasher();
hasher.write(value.as_bytes());
let hash = hasher.finish();
if hash == u64::MAX {
0
} else {
hash
}
}
fn resolve(&self, key: &Key) -> &str {
self.string_for_hash
.get(&key.0)
.expect("Interned key not found")
}
}
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
pub struct Key(u64);
impl Key {
fn first() -> Self {
Self(0)
}
fn next(self) -> Self {
Self(self.0.saturating_add(1))
}
fn impossible() -> Self {
Key(u64::MAX)
}
}
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
pub struct InternedNamedNode { pub struct InternedNamedNode {
id: Spur, id: Key,
} }
impl InternedNamedNode { impl InternedNamedNode {
pub fn encoded_into(named_node: NamedNodeRef<'_>, interner: &mut Interner) -> Self { pub fn encoded_into(named_node: NamedNodeRef<'_>, interner: &mut Interner) -> Self {
Self { Self {
id: interner.strings.get_or_intern(named_node.as_str()), id: interner.get_or_intern(named_node.as_str()),
} }
} }
pub fn encoded_from(named_node: NamedNodeRef<'_>, interner: &Interner) -> Option<Self> { pub fn encoded_from(named_node: NamedNodeRef<'_>, interner: &Interner) -> Option<Self> {
Some(Self { Some(Self {
id: interner.strings.get(named_node.as_str())?, id: interner.get(named_node.as_str())?,
}) })
} }
pub fn decode_from<'a>(&self, interner: &'a Interner) -> NamedNodeRef<'a> { pub fn decode_from<'a>(&self, interner: &'a Interner) -> NamedNodeRef<'a> {
NamedNodeRef::new_unchecked(interner.strings.resolve(&self.id)) NamedNodeRef::new_unchecked(interner.resolve(&self.id))
} }
pub fn first() -> Self { pub fn first() -> Self {
Self { id: fist_spur() } Self { id: Key::first() }
} }
pub fn next(self) -> Self { pub fn next(self) -> Self {
Self { Self { id: self.id.next() }
id: next_spur(self.id),
}
} }
pub fn impossible() -> Self { pub fn impossible() -> Self {
Self { Self {
id: impossible_spur(), id: Key::impossible(),
} }
} }
} }
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
pub struct InternedBlankNode { pub struct InternedBlankNode {
id: Spur, id: Key,
} }
impl InternedBlankNode { impl InternedBlankNode {
pub fn encoded_into(blank_node: BlankNodeRef<'_>, interner: &mut Interner) -> Self { pub fn encoded_into(blank_node: BlankNodeRef<'_>, interner: &mut Interner) -> Self {
Self { Self {
id: interner.strings.get_or_intern(blank_node.as_str()), id: interner.get_or_intern(blank_node.as_str()),
} }
} }
pub fn encoded_from(blank_node: BlankNodeRef<'_>, interner: &Interner) -> Option<Self> { pub fn encoded_from(blank_node: BlankNodeRef<'_>, interner: &Interner) -> Option<Self> {
Some(Self { Some(Self {
id: interner.strings.get(blank_node.as_str())?, id: interner.get(blank_node.as_str())?,
}) })
} }
pub fn decode_from<'a>(&self, interner: &'a Interner) -> BlankNodeRef<'a> { pub fn decode_from<'a>(&self, interner: &'a Interner) -> BlankNodeRef<'a> {
BlankNodeRef::new_unchecked(interner.strings.resolve(&self.id)) BlankNodeRef::new_unchecked(interner.resolve(&self.id))
} }
pub fn next(self) -> Self { pub fn next(self) -> Self {
Self { Self { id: self.id.next() }
id: next_spur(self.id),
}
} }
} }
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
pub enum InternedLiteral { pub enum InternedLiteral {
String { String {
value_id: Spur, value_id: Key,
}, },
LanguageTaggedString { LanguageTaggedString {
value_id: Spur, value_id: Key,
language_id: Spur, language_id: Key,
}, },
TypedLiteral { TypedLiteral {
value_id: Spur, value_id: Key,
datatype: InternedNamedNode, datatype: InternedNamedNode,
}, },
} }
impl InternedLiteral { impl InternedLiteral {
pub fn encoded_into(literal: LiteralRef<'_>, interner: &mut Interner) -> Self { pub fn encoded_into(literal: LiteralRef<'_>, interner: &mut Interner) -> Self {
let value_id = interner.strings.get_or_intern(literal.value()); let value_id = interner.get_or_intern(literal.value());
if literal.is_plain() { if literal.is_plain() {
if let Some(language) = literal.language() { if let Some(language) = literal.language() {
Self::LanguageTaggedString { Self::LanguageTaggedString {
value_id, value_id,
language_id: interner.strings.get_or_intern(language), language_id: interner.get_or_intern(language),
} }
} else { } else {
Self::String { value_id } Self::String { value_id }
@ -116,12 +183,12 @@ impl InternedLiteral {
} }
pub fn encoded_from(literal: LiteralRef<'_>, interner: &Interner) -> Option<Self> { pub fn encoded_from(literal: LiteralRef<'_>, interner: &Interner) -> Option<Self> {
let value_id = interner.strings.get(literal.value())?; let value_id = interner.get(literal.value())?;
Some(if literal.is_plain() { Some(if literal.is_plain() {
if let Some(language) = literal.language() { if let Some(language) = literal.language() {
Self::LanguageTaggedString { Self::LanguageTaggedString {
value_id, value_id,
language_id: interner.strings.get(language)?, language_id: interner.get(language)?,
} }
} else { } else {
Self::String { value_id } Self::String { value_id }
@ -137,17 +204,17 @@ impl InternedLiteral {
pub fn decode_from<'a>(&self, interner: &'a Interner) -> LiteralRef<'a> { pub fn decode_from<'a>(&self, interner: &'a Interner) -> LiteralRef<'a> {
match self { match self {
InternedLiteral::String { value_id } => { InternedLiteral::String { value_id } => {
LiteralRef::new_simple_literal(interner.strings.resolve(value_id)) LiteralRef::new_simple_literal(interner.resolve(value_id))
} }
InternedLiteral::LanguageTaggedString { InternedLiteral::LanguageTaggedString {
value_id, value_id,
language_id, language_id,
} => LiteralRef::new_language_tagged_literal_unchecked( } => LiteralRef::new_language_tagged_literal_unchecked(
interner.strings.resolve(value_id), interner.resolve(value_id),
interner.strings.resolve(language_id), interner.resolve(language_id),
), ),
InternedLiteral::TypedLiteral { value_id, datatype } => LiteralRef::new_typed_literal( InternedLiteral::TypedLiteral { value_id, datatype } => LiteralRef::new_typed_literal(
interner.strings.resolve(value_id), interner.resolve(value_id),
datatype.decode_from(interner), datatype.decode_from(interner),
), ),
} }
@ -156,14 +223,14 @@ impl InternedLiteral {
pub fn next(&self) -> Self { pub fn next(&self) -> Self {
match self { match self {
Self::String { value_id } => Self::String { Self::String { value_id } => Self::String {
value_id: next_spur(*value_id), value_id: value_id.next(),
}, },
Self::LanguageTaggedString { Self::LanguageTaggedString {
value_id, value_id,
language_id, language_id,
} => Self::LanguageTaggedString { } => Self::LanguageTaggedString {
value_id: *value_id, value_id: *value_id,
language_id: next_spur(*language_id), language_id: language_id.next(),
}, },
Self::TypedLiteral { value_id, datatype } => Self::TypedLiteral { Self::TypedLiteral { value_id, datatype } => Self::TypedLiteral {
value_id: *value_id, value_id: *value_id,
@ -414,14 +481,32 @@ impl InternedTriple {
} }
} }
fn fist_spur() -> Spur { #[derive(Default)]
Spur::try_from_usize(0).unwrap() struct IdentityHasherBuilder {}
impl BuildHasher for IdentityHasherBuilder {
type Hasher = IdentityHasher;
fn build_hasher(&self) -> IdentityHasher {
IdentityHasher::default()
}
} }
fn next_spur(value: Spur) -> Spur { #[derive(Default)]
Spur::try_from_usize(value.into_usize() + 1).unwrap() struct IdentityHasher {
value: u64,
} }
fn impossible_spur() -> Spur { impl Hasher for IdentityHasher {
Spur::try_from_usize((u32::MAX - 10).try_into().unwrap()).unwrap() fn finish(&self) -> u64 {
self.value
}
fn write(&mut self, _bytes: &[u8]) {
unimplemented!()
}
fn write_u64(&mut self, i: u64) {
self.value = i
}
} }

Loading…
Cancel
Save