OxRDF: drops lasso dependency

pull/435/head
Tpt 2 years ago committed by Thomas Tanon
parent 21994d39fd
commit a164b268c2
  1. 32
      Cargo.lock
  2. 2
      lib/oxrdf/Cargo.toml
  3. 165
      lib/oxrdf/src/interning.rs

32
Cargo.lock generated

@ -8,17 +8,6 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "ahash"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
dependencies = [
"getrandom",
"once_cell",
"version_check",
]
[[package]]
name = "aho-corasick"
version = "0.7.20"
@ -597,15 +586,6 @@ version = "1.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7"
[[package]]
name = "hashbrown"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
dependencies = [
"ahash",
]
[[package]]
name = "hashbrown"
version = "0.12.3"
@ -689,7 +669,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399"
dependencies = [
"autocfg",
"hashbrown 0.12.3",
"hashbrown",
]
[[package]]
@ -778,15 +758,6 @@ dependencies = [
"winapi-build",
]
[[package]]
name = "lasso"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aeb7b21a526375c5ca55f1a6dfd4e1fad9fa4edd750f530252a718a44b2608f0"
dependencies = [
"hashbrown 0.11.2",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
@ -1053,7 +1024,6 @@ checksum = "bb175ec8981211357b7b379869c2f8d555881c55ea62311428ec0de46d89bd5c"
name = "oxrdf"
version = "0.1.5-dev"
dependencies = [
"lasso",
"oxilangtag",
"oxiri",
"oxsdatatypes",

@ -21,9 +21,7 @@ rdf-star = []
rand = "0.8"
oxilangtag = "0.1"
oxiri = "0.2"
oxsdatatypes = { version = "0.1.1", path="../oxsdatatypes", optional = true }
lasso = { version = "0.6", features = ["inline-more"] }
[package.metadata.docs.rs]
all-features = true

@ -1,108 +1,175 @@
//! Interning of RDF elements using Rodeo
use crate::*;
use lasso::{Key, Rodeo, Spur};
#[cfg(feature = "rdf-star")]
use std::collections::HashMap;
use std::collections::hash_map::{Entry, HashMap, RandomState};
use std::hash::{BuildHasher, Hasher};
#[derive(Debug, Default)]
pub struct Interner {
strings: Rodeo,
hasher: RandomState,
string_for_hash: HashMap<u64, String, IdentityHasherBuilder>,
#[cfg(feature = "rdf-star")]
triples: HashMap<InternedTriple, Triple>,
}
impl Interner {
fn get_or_intern(&mut self, value: &str) -> Key {
let mut hash = self.hash(value);
loop {
match self.string_for_hash.entry(hash) {
Entry::Vacant(e) => {
e.insert(value.into());
return Key(hash);
}
Entry::Occupied(e) => loop {
if e.get() == value {
return Key(hash);
} else if hash == u64::MAX - 1 {
hash = 0;
} else {
hash += 1;
}
},
}
}
}
fn get(&self, value: &str) -> Option<Key> {
let mut hash = self.hash(value);
loop {
let v = self.string_for_hash.get(&hash)?;
if v == value {
return Some(Key(hash));
} else if hash == u64::MAX - 1 {
hash = 0;
} else {
hash += 1;
}
}
}
fn hash(&self, value: &str) -> u64 {
let mut hasher = self.hasher.build_hasher();
hasher.write(value.as_bytes());
let hash = hasher.finish();
if hash == u64::MAX {
0
} else {
hash
}
}
fn resolve(&self, key: &Key) -> &str {
self.string_for_hash
.get(&key.0)
.expect("Interned key not found")
}
}
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
pub struct Key(u64);
impl Key {
fn first() -> Self {
Self(0)
}
fn next(self) -> Self {
Self(self.0.saturating_add(1))
}
fn impossible() -> Self {
Key(u64::MAX)
}
}
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
pub struct InternedNamedNode {
id: Spur,
id: Key,
}
impl InternedNamedNode {
pub fn encoded_into(named_node: NamedNodeRef<'_>, interner: &mut Interner) -> Self {
Self {
id: interner.strings.get_or_intern(named_node.as_str()),
id: interner.get_or_intern(named_node.as_str()),
}
}
pub fn encoded_from(named_node: NamedNodeRef<'_>, interner: &Interner) -> Option<Self> {
Some(Self {
id: interner.strings.get(named_node.as_str())?,
id: interner.get(named_node.as_str())?,
})
}
pub fn decode_from<'a>(&self, interner: &'a Interner) -> NamedNodeRef<'a> {
NamedNodeRef::new_unchecked(interner.strings.resolve(&self.id))
NamedNodeRef::new_unchecked(interner.resolve(&self.id))
}
pub fn first() -> Self {
Self { id: fist_spur() }
Self { id: Key::first() }
}
pub fn next(self) -> Self {
Self {
id: next_spur(self.id),
}
Self { id: self.id.next() }
}
pub fn impossible() -> Self {
Self {
id: impossible_spur(),
id: Key::impossible(),
}
}
}
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
pub struct InternedBlankNode {
id: Spur,
id: Key,
}
impl InternedBlankNode {
pub fn encoded_into(blank_node: BlankNodeRef<'_>, interner: &mut Interner) -> Self {
Self {
id: interner.strings.get_or_intern(blank_node.as_str()),
id: interner.get_or_intern(blank_node.as_str()),
}
}
pub fn encoded_from(blank_node: BlankNodeRef<'_>, interner: &Interner) -> Option<Self> {
Some(Self {
id: interner.strings.get(blank_node.as_str())?,
id: interner.get(blank_node.as_str())?,
})
}
pub fn decode_from<'a>(&self, interner: &'a Interner) -> BlankNodeRef<'a> {
BlankNodeRef::new_unchecked(interner.strings.resolve(&self.id))
BlankNodeRef::new_unchecked(interner.resolve(&self.id))
}
pub fn next(self) -> Self {
Self {
id: next_spur(self.id),
}
Self { id: self.id.next() }
}
}
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
pub enum InternedLiteral {
String {
value_id: Spur,
value_id: Key,
},
LanguageTaggedString {
value_id: Spur,
language_id: Spur,
value_id: Key,
language_id: Key,
},
TypedLiteral {
value_id: Spur,
value_id: Key,
datatype: InternedNamedNode,
},
}
impl InternedLiteral {
pub fn encoded_into(literal: LiteralRef<'_>, interner: &mut Interner) -> Self {
let value_id = interner.strings.get_or_intern(literal.value());
let value_id = interner.get_or_intern(literal.value());
if literal.is_plain() {
if let Some(language) = literal.language() {
Self::LanguageTaggedString {
value_id,
language_id: interner.strings.get_or_intern(language),
language_id: interner.get_or_intern(language),
}
} else {
Self::String { value_id }
@ -116,12 +183,12 @@ impl InternedLiteral {
}
pub fn encoded_from(literal: LiteralRef<'_>, interner: &Interner) -> Option<Self> {
let value_id = interner.strings.get(literal.value())?;
let value_id = interner.get(literal.value())?;
Some(if literal.is_plain() {
if let Some(language) = literal.language() {
Self::LanguageTaggedString {
value_id,
language_id: interner.strings.get(language)?,
language_id: interner.get(language)?,
}
} else {
Self::String { value_id }
@ -137,17 +204,17 @@ impl InternedLiteral {
pub fn decode_from<'a>(&self, interner: &'a Interner) -> LiteralRef<'a> {
match self {
InternedLiteral::String { value_id } => {
LiteralRef::new_simple_literal(interner.strings.resolve(value_id))
LiteralRef::new_simple_literal(interner.resolve(value_id))
}
InternedLiteral::LanguageTaggedString {
value_id,
language_id,
} => LiteralRef::new_language_tagged_literal_unchecked(
interner.strings.resolve(value_id),
interner.strings.resolve(language_id),
interner.resolve(value_id),
interner.resolve(language_id),
),
InternedLiteral::TypedLiteral { value_id, datatype } => LiteralRef::new_typed_literal(
interner.strings.resolve(value_id),
interner.resolve(value_id),
datatype.decode_from(interner),
),
}
@ -156,14 +223,14 @@ impl InternedLiteral {
pub fn next(&self) -> Self {
match self {
Self::String { value_id } => Self::String {
value_id: next_spur(*value_id),
value_id: value_id.next(),
},
Self::LanguageTaggedString {
value_id,
language_id,
} => Self::LanguageTaggedString {
value_id: *value_id,
language_id: next_spur(*language_id),
language_id: language_id.next(),
},
Self::TypedLiteral { value_id, datatype } => Self::TypedLiteral {
value_id: *value_id,
@ -414,14 +481,32 @@ impl InternedTriple {
}
}
fn fist_spur() -> Spur {
Spur::try_from_usize(0).unwrap()
#[derive(Default)]
struct IdentityHasherBuilder {}
impl BuildHasher for IdentityHasherBuilder {
type Hasher = IdentityHasher;
fn build_hasher(&self) -> IdentityHasher {
IdentityHasher::default()
}
}
fn next_spur(value: Spur) -> Spur {
Spur::try_from_usize(value.into_usize() + 1).unwrap()
#[derive(Default)]
struct IdentityHasher {
value: u64,
}
fn impossible_spur() -> Spur {
Spur::try_from_usize((u32::MAX - 10).try_into().unwrap()).unwrap()
impl Hasher for IdentityHasher {
fn finish(&self) -> u64 {
self.value
}
fn write(&mut self, _bytes: &[u8]) {
unimplemented!()
}
fn write_u64(&mut self, i: u64) {
self.value = i
}
}

Loading…
Cancel
Save