Uses StrHash everywhere

pull/171/head
Tpt 4 years ago
parent 13ee6bf80c
commit 7280823444
  1. 27
      lib/src/sparql/dataset.rs
  2. 34
      lib/src/store/binary_encoder.rs
  3. 31
      lib/src/store/numeric_encoder.rs
  4. 2
      lib/src/store/sled.rs

@ -1,16 +1,17 @@
use crate::sparql::algebra::QueryDataset;
use crate::sparql::EvaluationError;
use crate::store::numeric_encoder::{
EncodedQuad, EncodedTerm, ReadEncoder, StrContainer, StrEncodingAware, StrId, StrLookup,
EncodedQuad, EncodedTerm, ReadEncoder, StrContainer, StrEncodingAware, StrHash, StrId,
StrLookup,
};
use crate::store::ReadableEncodedStore;
use lasso::{Rodeo, Spur};
use std::cell::RefCell;
use std::collections::HashMap;
use std::iter::{empty, once, Once};
pub(crate) struct DatasetView<S: ReadableEncodedStore> {
store: S,
extra: RefCell<Rodeo>,
extra: RefCell<HashMap<StrHash, String>>,
dataset: EncodedDatasetSpec<S::StrId>,
}
@ -44,7 +45,7 @@ impl<S: ReadableEncodedStore> DatasetView<S> {
};
Ok(Self {
store,
extra: RefCell::new(Rodeo::default()),
extra: RefCell::new(HashMap::default()),
dataset,
})
}
@ -160,14 +161,13 @@ impl<S: ReadableEncodedStore> StrLookup for DatasetView<S> {
fn get_str(&self, id: DatasetStrId<S::StrId>) -> Result<Option<String>, EvaluationError> {
match id {
DatasetStrId::Store(id) => self.store.get_str(id).map_err(|e| e.into()),
DatasetStrId::Temporary(id) => {
Ok(self.extra.borrow().try_resolve(&id).map(|e| e.to_owned()))
}
DatasetStrId::Temporary(id) => Ok(self.extra.borrow().get(&id).cloned()),
}
}
fn get_str_id(&self, value: &str) -> Result<Option<DatasetStrId<S::StrId>>, EvaluationError> {
if let Some(id) = self.extra.borrow().get(value) {
let id = StrHash::new(value);
if self.extra.borrow().contains_key(&id) {
Ok(Some(DatasetStrId::Temporary(id)))
} else {
Ok(self
@ -272,9 +272,12 @@ impl<'a, S: ReadableEncodedStore> StrContainer for &'a DatasetView<S> {
if let Some(id) = self.store.get_str_id(value).map_err(|e| e.into())? {
Ok(DatasetStrId::Store(id))
} else {
Ok(DatasetStrId::Temporary(
self.extra.borrow_mut().get_or_intern(value),
))
let hash = StrHash::new(value);
self.extra
.borrow_mut()
.entry(hash)
.or_insert_with(|| value.to_owned());
Ok(DatasetStrId::Temporary(hash))
}
}
}
@ -282,7 +285,7 @@ impl<'a, S: ReadableEncodedStore> StrContainer for &'a DatasetView<S> {
#[derive(Eq, PartialEq, Debug, Copy, Clone, Hash)]
pub enum DatasetStrId<I: StrId> {
Store(I),
Temporary(Spur),
Temporary(StrHash),
}
impl<I: StrId> StrId for DatasetStrId<I> {}

@ -1,9 +1,7 @@
use crate::error::invalid_data_error;
use crate::model::xsd::*;
use crate::store::numeric_encoder::StrId;
use crate::store::numeric_encoder::StrHash;
use crate::store::small_string::SmallString;
use siphasher::sip128::{Hasher128, SipHasher24};
use std::hash::Hasher;
use std::io;
use std::io::{Cursor, Read};
use std::mem::size_of;
@ -51,36 +49,6 @@ const TYPE_DURATION_LITERAL: u8 = 42;
const TYPE_YEAR_MONTH_DURATION_LITERAL: u8 = 43;
const TYPE_DAY_TIME_DURATION_LITERAL: u8 = 44;
#[derive(Eq, PartialEq, Debug, Copy, Clone, Hash)]
#[repr(transparent)]
pub struct StrHash {
hash: u128,
}
impl StrHash {
pub fn new(value: &str) -> Self {
let mut hasher = SipHasher24::new();
hasher.write(value.as_bytes());
Self {
hash: hasher.finish128().into(),
}
}
#[inline]
pub fn from_be_bytes(bytes: [u8; 16]) -> Self {
Self {
hash: u128::from_be_bytes(bytes),
}
}
#[inline]
pub fn to_be_bytes(&self) -> [u8; 16] {
self.hash.to_be_bytes()
}
}
impl StrId for StrHash {}
#[derive(Clone, Copy)]
pub enum QuadEncoding {
Spog,

@ -7,6 +7,7 @@ use crate::sparql::EvaluationError;
use crate::store::small_string::SmallString;
use rand::random;
use rio_api::model as rio;
use siphasher::sip128::{Hasher128, SipHasher24};
use std::collections::HashMap;
use std::convert::{TryFrom, TryInto};
use std::error::Error;
@ -17,6 +18,36 @@ use std::{fmt, io, str};
pub trait StrId: Eq + Debug + Copy + Hash {}
#[derive(Eq, PartialEq, Debug, Copy, Clone, Hash)]
#[repr(transparent)]
pub struct StrHash {
hash: u128,
}
impl StrHash {
pub fn new(value: &str) -> Self {
let mut hasher = SipHasher24::new();
hasher.write(value.as_bytes());
Self {
hash: hasher.finish128().into(),
}
}
#[inline]
pub fn from_be_bytes(bytes: [u8; 16]) -> Self {
Self {
hash: u128::from_be_bytes(bytes),
}
}
#[inline]
pub fn to_be_bytes(&self) -> [u8; 16] {
self.hash.to_be_bytes()
}
}
impl StrId for StrHash {}
#[derive(Debug, Clone, Copy)]
pub enum EncodedTerm<I: StrId> {
DefaultGraph,

@ -9,7 +9,7 @@ use crate::sparql::{
};
use crate::store::binary_encoder::*;
use crate::store::numeric_encoder::{
Decoder, ReadEncoder, StrContainer, StrEncodingAware, StrLookup, WriteEncoder,
Decoder, ReadEncoder, StrContainer, StrEncodingAware, StrHash, StrLookup, WriteEncoder,
};
use crate::store::{
dump_dataset, dump_graph, get_encoded_quad_pattern, load_dataset, load_graph,

Loading…
Cancel
Save