Uses StrHash everywhere

pull/171/head
Tpt 3 years ago
parent 13ee6bf80c
commit 7280823444
  1. 27
      lib/src/sparql/dataset.rs
  2. 34
      lib/src/store/binary_encoder.rs
  3. 31
      lib/src/store/numeric_encoder.rs
  4. 2
      lib/src/store/sled.rs

@ -1,16 +1,17 @@
use crate::sparql::algebra::QueryDataset; use crate::sparql::algebra::QueryDataset;
use crate::sparql::EvaluationError; use crate::sparql::EvaluationError;
use crate::store::numeric_encoder::{ use crate::store::numeric_encoder::{
EncodedQuad, EncodedTerm, ReadEncoder, StrContainer, StrEncodingAware, StrId, StrLookup, EncodedQuad, EncodedTerm, ReadEncoder, StrContainer, StrEncodingAware, StrHash, StrId,
StrLookup,
}; };
use crate::store::ReadableEncodedStore; use crate::store::ReadableEncodedStore;
use lasso::{Rodeo, Spur};
use std::cell::RefCell; use std::cell::RefCell;
use std::collections::HashMap;
use std::iter::{empty, once, Once}; use std::iter::{empty, once, Once};
pub(crate) struct DatasetView<S: ReadableEncodedStore> { pub(crate) struct DatasetView<S: ReadableEncodedStore> {
store: S, store: S,
extra: RefCell<Rodeo>, extra: RefCell<HashMap<StrHash, String>>,
dataset: EncodedDatasetSpec<S::StrId>, dataset: EncodedDatasetSpec<S::StrId>,
} }
@ -44,7 +45,7 @@ impl<S: ReadableEncodedStore> DatasetView<S> {
}; };
Ok(Self { Ok(Self {
store, store,
extra: RefCell::new(Rodeo::default()), extra: RefCell::new(HashMap::default()),
dataset, dataset,
}) })
} }
@ -160,14 +161,13 @@ impl<S: ReadableEncodedStore> StrLookup for DatasetView<S> {
fn get_str(&self, id: DatasetStrId<S::StrId>) -> Result<Option<String>, EvaluationError> { fn get_str(&self, id: DatasetStrId<S::StrId>) -> Result<Option<String>, EvaluationError> {
match id { match id {
DatasetStrId::Store(id) => self.store.get_str(id).map_err(|e| e.into()), DatasetStrId::Store(id) => self.store.get_str(id).map_err(|e| e.into()),
DatasetStrId::Temporary(id) => { DatasetStrId::Temporary(id) => Ok(self.extra.borrow().get(&id).cloned()),
Ok(self.extra.borrow().try_resolve(&id).map(|e| e.to_owned()))
}
} }
} }
fn get_str_id(&self, value: &str) -> Result<Option<DatasetStrId<S::StrId>>, EvaluationError> { fn get_str_id(&self, value: &str) -> Result<Option<DatasetStrId<S::StrId>>, EvaluationError> {
if let Some(id) = self.extra.borrow().get(value) { let id = StrHash::new(value);
if self.extra.borrow().contains_key(&id) {
Ok(Some(DatasetStrId::Temporary(id))) Ok(Some(DatasetStrId::Temporary(id)))
} else { } else {
Ok(self Ok(self
@ -272,9 +272,12 @@ impl<'a, S: ReadableEncodedStore> StrContainer for &'a DatasetView<S> {
if let Some(id) = self.store.get_str_id(value).map_err(|e| e.into())? { if let Some(id) = self.store.get_str_id(value).map_err(|e| e.into())? {
Ok(DatasetStrId::Store(id)) Ok(DatasetStrId::Store(id))
} else { } else {
Ok(DatasetStrId::Temporary( let hash = StrHash::new(value);
self.extra.borrow_mut().get_or_intern(value), self.extra
)) .borrow_mut()
.entry(hash)
.or_insert_with(|| value.to_owned());
Ok(DatasetStrId::Temporary(hash))
} }
} }
} }
@ -282,7 +285,7 @@ impl<'a, S: ReadableEncodedStore> StrContainer for &'a DatasetView<S> {
#[derive(Eq, PartialEq, Debug, Copy, Clone, Hash)] #[derive(Eq, PartialEq, Debug, Copy, Clone, Hash)]
pub enum DatasetStrId<I: StrId> { pub enum DatasetStrId<I: StrId> {
Store(I), Store(I),
Temporary(Spur), Temporary(StrHash),
} }
impl<I: StrId> StrId for DatasetStrId<I> {} impl<I: StrId> StrId for DatasetStrId<I> {}

@ -1,9 +1,7 @@
use crate::error::invalid_data_error; use crate::error::invalid_data_error;
use crate::model::xsd::*; use crate::model::xsd::*;
use crate::store::numeric_encoder::StrId; use crate::store::numeric_encoder::StrHash;
use crate::store::small_string::SmallString; use crate::store::small_string::SmallString;
use siphasher::sip128::{Hasher128, SipHasher24};
use std::hash::Hasher;
use std::io; use std::io;
use std::io::{Cursor, Read}; use std::io::{Cursor, Read};
use std::mem::size_of; use std::mem::size_of;
@ -51,36 +49,6 @@ const TYPE_DURATION_LITERAL: u8 = 42;
const TYPE_YEAR_MONTH_DURATION_LITERAL: u8 = 43; const TYPE_YEAR_MONTH_DURATION_LITERAL: u8 = 43;
const TYPE_DAY_TIME_DURATION_LITERAL: u8 = 44; const TYPE_DAY_TIME_DURATION_LITERAL: u8 = 44;
#[derive(Eq, PartialEq, Debug, Copy, Clone, Hash)]
#[repr(transparent)]
pub struct StrHash {
hash: u128,
}
impl StrHash {
pub fn new(value: &str) -> Self {
let mut hasher = SipHasher24::new();
hasher.write(value.as_bytes());
Self {
hash: hasher.finish128().into(),
}
}
#[inline]
pub fn from_be_bytes(bytes: [u8; 16]) -> Self {
Self {
hash: u128::from_be_bytes(bytes),
}
}
#[inline]
pub fn to_be_bytes(&self) -> [u8; 16] {
self.hash.to_be_bytes()
}
}
impl StrId for StrHash {}
#[derive(Clone, Copy)] #[derive(Clone, Copy)]
pub enum QuadEncoding { pub enum QuadEncoding {
Spog, Spog,

@ -7,6 +7,7 @@ use crate::sparql::EvaluationError;
use crate::store::small_string::SmallString; use crate::store::small_string::SmallString;
use rand::random; use rand::random;
use rio_api::model as rio; use rio_api::model as rio;
use siphasher::sip128::{Hasher128, SipHasher24};
use std::collections::HashMap; use std::collections::HashMap;
use std::convert::{TryFrom, TryInto}; use std::convert::{TryFrom, TryInto};
use std::error::Error; use std::error::Error;
@ -17,6 +18,36 @@ use std::{fmt, io, str};
pub trait StrId: Eq + Debug + Copy + Hash {} pub trait StrId: Eq + Debug + Copy + Hash {}
#[derive(Eq, PartialEq, Debug, Copy, Clone, Hash)]
#[repr(transparent)]
pub struct StrHash {
hash: u128,
}
impl StrHash {
pub fn new(value: &str) -> Self {
let mut hasher = SipHasher24::new();
hasher.write(value.as_bytes());
Self {
hash: hasher.finish128().into(),
}
}
#[inline]
pub fn from_be_bytes(bytes: [u8; 16]) -> Self {
Self {
hash: u128::from_be_bytes(bytes),
}
}
#[inline]
pub fn to_be_bytes(&self) -> [u8; 16] {
self.hash.to_be_bytes()
}
}
impl StrId for StrHash {}
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
pub enum EncodedTerm<I: StrId> { pub enum EncodedTerm<I: StrId> {
DefaultGraph, DefaultGraph,

@ -9,7 +9,7 @@ use crate::sparql::{
}; };
use crate::store::binary_encoder::*; use crate::store::binary_encoder::*;
use crate::store::numeric_encoder::{ use crate::store::numeric_encoder::{
Decoder, ReadEncoder, StrContainer, StrEncodingAware, StrLookup, WriteEncoder, Decoder, ReadEncoder, StrContainer, StrEncodingAware, StrHash, StrLookup, WriteEncoder,
}; };
use crate::store::{ use crate::store::{
dump_dataset, dump_graph, get_encoded_quad_pattern, load_dataset, load_graph, dump_dataset, dump_graph, get_encoded_quad_pattern, load_dataset, load_graph,

Loading…
Cancel
Save