diff --git a/lib/src/sparql/dataset.rs b/lib/src/sparql/dataset.rs index c410e671..8275b480 100644 --- a/lib/src/sparql/dataset.rs +++ b/lib/src/sparql/dataset.rs @@ -1,16 +1,17 @@ use crate::sparql::algebra::QueryDataset; use crate::sparql::EvaluationError; use crate::store::numeric_encoder::{ - EncodedQuad, EncodedTerm, ReadEncoder, StrContainer, StrEncodingAware, StrId, StrLookup, + EncodedQuad, EncodedTerm, ReadEncoder, StrContainer, StrEncodingAware, StrHash, StrId, + StrLookup, }; use crate::store::ReadableEncodedStore; -use lasso::{Rodeo, Spur}; use std::cell::RefCell; +use std::collections::HashMap; use std::iter::{empty, once, Once}; pub(crate) struct DatasetView { store: S, - extra: RefCell, + extra: RefCell>, dataset: EncodedDatasetSpec, } @@ -44,7 +45,7 @@ impl DatasetView { }; Ok(Self { store, - extra: RefCell::new(Rodeo::default()), + extra: RefCell::new(HashMap::default()), dataset, }) } @@ -160,14 +161,13 @@ impl StrLookup for DatasetView { fn get_str(&self, id: DatasetStrId) -> Result, EvaluationError> { match id { DatasetStrId::Store(id) => self.store.get_str(id).map_err(|e| e.into()), - DatasetStrId::Temporary(id) => { - Ok(self.extra.borrow().try_resolve(&id).map(|e| e.to_owned())) - } + DatasetStrId::Temporary(id) => Ok(self.extra.borrow().get(&id).cloned()), } } fn get_str_id(&self, value: &str) -> Result>, EvaluationError> { - if let Some(id) = self.extra.borrow().get(value) { + let id = StrHash::new(value); + if self.extra.borrow().contains_key(&id) { Ok(Some(DatasetStrId::Temporary(id))) } else { Ok(self @@ -272,9 +272,12 @@ impl<'a, S: ReadableEncodedStore> StrContainer for &'a DatasetView { if let Some(id) = self.store.get_str_id(value).map_err(|e| e.into())? { Ok(DatasetStrId::Store(id)) } else { - Ok(DatasetStrId::Temporary( - self.extra.borrow_mut().get_or_intern(value), - )) + let hash = StrHash::new(value); + self.extra + .borrow_mut() + .entry(hash) + .or_insert_with(|| value.to_owned()); + Ok(DatasetStrId::Temporary(hash)) } } } @@ -282,7 +285,7 @@ impl<'a, S: ReadableEncodedStore> StrContainer for &'a DatasetView { #[derive(Eq, PartialEq, Debug, Copy, Clone, Hash)] pub enum DatasetStrId { Store(I), - Temporary(Spur), + Temporary(StrHash), } impl StrId for DatasetStrId {} diff --git a/lib/src/store/binary_encoder.rs b/lib/src/store/binary_encoder.rs index 95a81866..59863874 100644 --- a/lib/src/store/binary_encoder.rs +++ b/lib/src/store/binary_encoder.rs @@ -1,9 +1,7 @@ use crate::error::invalid_data_error; use crate::model::xsd::*; -use crate::store::numeric_encoder::StrId; +use crate::store::numeric_encoder::StrHash; use crate::store::small_string::SmallString; -use siphasher::sip128::{Hasher128, SipHasher24}; -use std::hash::Hasher; use std::io; use std::io::{Cursor, Read}; use std::mem::size_of; @@ -51,36 +49,6 @@ const TYPE_DURATION_LITERAL: u8 = 42; const TYPE_YEAR_MONTH_DURATION_LITERAL: u8 = 43; const TYPE_DAY_TIME_DURATION_LITERAL: u8 = 44; -#[derive(Eq, PartialEq, Debug, Copy, Clone, Hash)] -#[repr(transparent)] -pub struct StrHash { - hash: u128, -} - -impl StrHash { - pub fn new(value: &str) -> Self { - let mut hasher = SipHasher24::new(); - hasher.write(value.as_bytes()); - Self { - hash: hasher.finish128().into(), - } - } - - #[inline] - pub fn from_be_bytes(bytes: [u8; 16]) -> Self { - Self { - hash: u128::from_be_bytes(bytes), - } - } - - #[inline] - pub fn to_be_bytes(&self) -> [u8; 16] { - self.hash.to_be_bytes() - } -} - -impl StrId for StrHash {} - #[derive(Clone, Copy)] pub enum QuadEncoding { Spog, diff --git a/lib/src/store/numeric_encoder.rs b/lib/src/store/numeric_encoder.rs index 787252d8..f7821203 100644 --- a/lib/src/store/numeric_encoder.rs +++ b/lib/src/store/numeric_encoder.rs @@ -7,6 +7,7 @@ use crate::sparql::EvaluationError; use crate::store::small_string::SmallString; use rand::random; use rio_api::model as rio; +use siphasher::sip128::{Hasher128, SipHasher24}; use std::collections::HashMap; use std::convert::{TryFrom, TryInto}; use std::error::Error; @@ -17,6 +18,36 @@ use std::{fmt, io, str}; pub trait StrId: Eq + Debug + Copy + Hash {} +#[derive(Eq, PartialEq, Debug, Copy, Clone, Hash)] +#[repr(transparent)] +pub struct StrHash { + hash: u128, +} + +impl StrHash { + pub fn new(value: &str) -> Self { + let mut hasher = SipHasher24::new(); + hasher.write(value.as_bytes()); + Self { + hash: hasher.finish128().into(), + } + } + + #[inline] + pub fn from_be_bytes(bytes: [u8; 16]) -> Self { + Self { + hash: u128::from_be_bytes(bytes), + } + } + + #[inline] + pub fn to_be_bytes(&self) -> [u8; 16] { + self.hash.to_be_bytes() + } +} + +impl StrId for StrHash {} + #[derive(Debug, Clone, Copy)] pub enum EncodedTerm { DefaultGraph, diff --git a/lib/src/store/sled.rs b/lib/src/store/sled.rs index cf006512..ab85844f 100644 --- a/lib/src/store/sled.rs +++ b/lib/src/store/sled.rs @@ -9,7 +9,7 @@ use crate::sparql::{ }; use crate::store::binary_encoder::*; use crate::store::numeric_encoder::{ - Decoder, ReadEncoder, StrContainer, StrEncodingAware, StrLookup, WriteEncoder, + Decoder, ReadEncoder, StrContainer, StrEncodingAware, StrHash, StrLookup, WriteEncoder, }; use crate::store::{ dump_dataset, dump_graph, get_encoded_quad_pattern, load_dataset, load_graph,