From 68aab2563cbe6d15568ca58f780925fa046622ef Mon Sep 17 00:00:00 2001 From: Tpt Date: Thu, 22 Apr 2021 18:49:37 +0200 Subject: [PATCH] Simplifies string storage traits --- lib/src/sparql/dataset.rs | 41 +++++++-------- lib/src/storage/binary_encoder.rs | 38 ++++++-------- lib/src/storage/mod.rs | 50 ++++++------------ lib/src/storage/numeric_encoder.rs | 32 ++++++------ lib/src/store.rs | 83 ++++++++---------------------- 5 files changed, 86 insertions(+), 158 deletions(-) diff --git a/lib/src/sparql/dataset.rs b/lib/src/sparql/dataset.rs index f54cc2f9..2e782308 100644 --- a/lib/src/sparql/dataset.rs +++ b/lib/src/sparql/dataset.rs @@ -1,10 +1,11 @@ use crate::sparql::algebra::QueryDataset; use crate::sparql::EvaluationError; use crate::storage::numeric_encoder::{ - EncodedQuad, EncodedTerm, ReadEncoder, StrContainer, StrEncodingAware, StrHash, StrLookup, + EncodedQuad, EncodedTerm, ReadEncoder, StrContainer, StrHash, StrLookup, }; use crate::storage::Storage; use std::cell::RefCell; +use std::collections::hash_map::Entry; use std::collections::HashMap; use std::iter::empty; @@ -154,40 +155,34 @@ impl DatasetView { } } -impl StrEncodingAware for DatasetView { +impl StrLookup for DatasetView { type Error = EvaluationError; -} -impl StrLookup for DatasetView { - fn get_str(&self, id: StrHash) -> Result, EvaluationError> { - Ok(if let Some(value) = self.extra.borrow().get(&id) { + fn get_str(&self, key: StrHash) -> Result, EvaluationError> { + Ok(if let Some(value) = self.extra.borrow().get(&key) { Some(value.clone()) } else { - self.storage.get_str(id)? + self.storage.get_str(key)? }) } - fn get_str_id(&self, value: &str) -> Result, EvaluationError> { - let id = StrHash::new(value); - Ok(if self.extra.borrow().contains_key(&id) { - Some(id) - } else { - self.storage.get_str_id(value)? - }) + fn contains_str(&self, key: StrHash) -> Result { + Ok(self.extra.borrow().contains_key(&key) || self.storage.contains_str(key)?) } } impl StrContainer for DatasetView { - fn insert_str(&self, value: &str) -> Result { - if let Some(hash) = self.storage.get_str_id(value)? { - Ok(hash) + fn insert_str(&self, key: StrHash, value: &str) -> Result { + if self.storage.contains_str(key)? { + Ok(false) } else { - let hash = StrHash::new(value); - self.extra - .borrow_mut() - .entry(hash) - .or_insert_with(|| value.to_owned()); - Ok(hash) + match self.extra.borrow_mut().entry(key) { + Entry::Occupied(_) => Ok(false), + Entry::Vacant(entry) => { + entry.insert(value.to_owned()); + Ok(true) + } + } } } } diff --git a/lib/src/storage/binary_encoder.rs b/lib/src/storage/binary_encoder.rs index 1ddb729c..80c453ba 100644 --- a/lib/src/storage/binary_encoder.rs +++ b/lib/src/storage/binary_encoder.rs @@ -628,43 +628,37 @@ pub fn write_term(sink: &mut Vec, term: EncodedTerm) { mod tests { use super::*; use crate::storage::numeric_encoder::*; + use std::cell::RefCell; + use std::collections::hash_map::Entry; use std::collections::HashMap; use std::convert::Infallible; - use std::sync::RwLock; #[derive(Default)] struct MemoryStrStore { - id2str: RwLock>, + id2str: RefCell>, } - impl StrEncodingAware for MemoryStrStore { + impl StrLookup for MemoryStrStore { type Error = Infallible; - } - impl StrLookup for MemoryStrStore { - fn get_str(&self, id: StrHash) -> Result, Infallible> { - Ok(self.id2str.read().unwrap().get(&id).cloned()) + fn get_str(&self, key: StrHash) -> Result, Infallible> { + Ok(self.id2str.borrow().get(&key).cloned()) } - fn get_str_id(&self, value: &str) -> Result, Infallible> { - let id = StrHash::new(value); - Ok(if self.id2str.read().unwrap().contains_key(&id) { - Some(id) - } else { - None - }) + fn contains_str(&self, key: StrHash) -> Result { + Ok(self.id2str.borrow().contains_key(&key)) } } impl StrContainer for MemoryStrStore { - fn insert_str(&self, value: &str) -> Result { - let key = StrHash::new(value); - self.id2str - .write() - .unwrap() - .entry(key) - .or_insert_with(|| value.to_owned()); - Ok(key) + fn insert_str(&self, key: StrHash, value: &str) -> Result { + match self.id2str.borrow_mut().entry(key) { + Entry::Occupied(_) => Ok(false), + Entry::Vacant(entry) => { + entry.insert(value.to_owned()); + Ok(true) + } + } } } diff --git a/lib/src/storage/mod.rs b/lib/src/storage/mod.rs index 21e6f938..2de3c5e4 100644 --- a/lib/src/storage/mod.rs +++ b/lib/src/storage/mod.rs @@ -18,9 +18,7 @@ use crate::storage::binary_encoder::{ LATEST_STORAGE_VERSION, WRITTEN_TERM_MAX_SIZE, }; use crate::storage::io::StoreOrParseError; -use crate::storage::numeric_encoder::{ - EncodedQuad, EncodedTerm, StrContainer, StrEncodingAware, StrHash, StrLookup, -}; +use crate::storage::numeric_encoder::{EncodedQuad, EncodedTerm, StrContainer, StrHash, StrLookup}; mod binary_encoder; pub(crate) mod io; @@ -991,57 +989,39 @@ impl From> for Sled2ConflictableTransactionEr } } -impl StrEncodingAware for Storage { +impl StrLookup for Storage { type Error = std::io::Error; -} -impl StrLookup for Storage { - fn get_str(&self, id: StrHash) -> Result, std::io::Error> { - self.get_str(id) + fn get_str(&self, key: StrHash) -> Result, std::io::Error> { + self.get_str(key) } - fn get_str_id(&self, value: &str) -> Result, std::io::Error> { - let key = StrHash::new(value); - Ok(if self.contains_str(key)? { - Some(key) - } else { - None - }) + fn contains_str(&self, key: StrHash) -> Result { + self.contains_str(key) } } impl StrContainer for Storage { - fn insert_str(&self, value: &str) -> Result { - let key = StrHash::new(value); - self.insert_str(key, value)?; - Ok(key) + fn insert_str(&self, key: StrHash, value: &str) -> Result { + self.insert_str(key, value) } } -impl<'a> StrEncodingAware for StorageTransaction<'a> { +impl<'a> StrLookup for StorageTransaction<'a> { type Error = UnabortableTransactionError; -} -impl<'a> StrLookup for StorageTransaction<'a> { - fn get_str(&self, id: StrHash) -> Result, UnabortableTransactionError> { - self.get_str(id) + fn get_str(&self, key: StrHash) -> Result, UnabortableTransactionError> { + self.get_str(key) } - fn get_str_id(&self, value: &str) -> Result, UnabortableTransactionError> { - let key = StrHash::new(value); - Ok(if self.contains_str(key)? { - Some(key) - } else { - None - }) + fn contains_str(&self, key: StrHash) -> Result { + self.contains_str(key) } } impl<'a> StrContainer for StorageTransaction<'a> { - fn insert_str(&self, value: &str) -> Result { - let key = StrHash::new(value); - self.insert_str(key, value)?; - Ok(key) + fn insert_str(&self, key: StrHash, value: &str) -> Result { + self.insert_str(key, value) } } diff --git a/lib/src/storage/numeric_encoder.rs b/lib/src/storage/numeric_encoder.rs index 5c718cce..b6113a0f 100644 --- a/lib/src/storage/numeric_encoder.rs +++ b/lib/src/storage/numeric_encoder.rs @@ -486,27 +486,20 @@ impl EncodedQuad { } } -pub(crate) trait StrEncodingAware { - //TODO: rename +pub(crate) trait StrLookup { type Error: Error + Into + 'static; -} - -impl<'a, T: StrEncodingAware> StrEncodingAware for &'a T { - type Error = T::Error; -} -pub(crate) trait StrLookup: StrEncodingAware { - fn get_str(&self, id: StrHash) -> Result, Self::Error>; + fn get_str(&self, key: StrHash) -> Result, Self::Error>; - fn get_str_id(&self, value: &str) -> Result, Self::Error>; + fn contains_str(&self, key: StrHash) -> Result; } -pub(crate) trait StrContainer: StrEncodingAware { - fn insert_str(&self, value: &str) -> Result; +pub(crate) trait StrContainer: StrLookup { + fn insert_str(&self, key: StrHash, value: &str) -> Result; } /// Tries to encode a term based on the existing strings (does not insert anything) -pub(crate) trait ReadEncoder: StrEncodingAware { +pub(crate) trait ReadEncoder: StrLookup { fn get_encoded_named_node( &self, named_node: NamedNodeRef<'_>, @@ -738,12 +731,17 @@ pub(crate) trait ReadEncoder: StrEncodingAware { impl ReadEncoder for S { fn get_encoded_str(&self, value: &str) -> Result, Self::Error> { - self.get_str_id(value) + let key = StrHash::new(value); + Ok(if self.contains_str(key)? { + Some(key) + } else { + None + }) } } /// Encodes a term and insert strings if needed -pub(crate) trait WriteEncoder: StrEncodingAware { +pub(crate) trait WriteEncoder: StrContainer { fn encode_named_node(&self, named_node: NamedNodeRef<'_>) -> Result { self.encode_rio_named_node(named_node.into()) } @@ -999,7 +997,9 @@ pub(crate) trait WriteEncoder: StrEncodingAware { impl WriteEncoder for S { fn encode_str(&self, value: &str) -> Result { - self.insert_str(value) + let key = StrHash::new(value); + self.insert_str(key, value)?; + Ok(key) } } diff --git a/lib/src/store.rs b/lib/src/store.rs index fc776799..d2c5da45 100644 --- a/lib/src/store.rs +++ b/lib/src/store.rs @@ -42,10 +42,7 @@ use crate::sparql::{ UpdateOptions, }; use crate::storage::io::{dump_dataset, dump_graph, load_dataset, load_graph}; -use crate::storage::numeric_encoder::{ - Decoder, EncodedTerm, ReadEncoder, StrContainer, StrEncodingAware, StrHash, StrLookup, - WriteEncoder, -}; +use crate::storage::numeric_encoder::{Decoder, EncodedTerm, ReadEncoder, WriteEncoder}; pub use crate::storage::ConflictableTransactionError; pub use crate::storage::TransactionError; pub use crate::storage::UnabortableTransactionError; @@ -247,7 +244,7 @@ impl Store { /// Checks if this store contains a given quad pub fn contains<'a>(&self, quad: impl Into>) -> Result { - if let Some(quad) = self.get_encoded_quad(quad.into())? { + if let Some(quad) = self.storage.get_encoded_quad(quad.into())? { self.storage.contains(&quad) } else { Ok(false) @@ -431,7 +428,7 @@ impl Store { /// It might leave the store in a bad state if a crash happens during the insertion. /// Use a (memory greedy) [transaction](Store::transaction()) if you do not want that. pub fn insert<'a>(&self, quad: impl Into>) -> Result { - let quad = self.encode_quad(quad.into())?; + let quad = self.storage.encode_quad(quad.into())?; self.storage.insert(&quad) } @@ -443,7 +440,7 @@ impl Store { /// It might leave the store in a bad state if a crash happens during the removal. /// Use a (memory greedy) [transaction](Store::transaction()) if you do not want that. pub fn remove<'a>(&self, quad: impl Into>) -> Result { - if let Some(quad) = self.get_encoded_quad(quad.into())? { + if let Some(quad) = self.storage.get_encoded_quad(quad.into())? { self.storage.remove(&quad) } else { Ok(false) @@ -540,7 +537,10 @@ impl Store { &self, graph_name: impl Into>, ) -> Result { - if let Some(graph_name) = self.get_encoded_named_or_blank_node(graph_name.into())? { + if let Some(graph_name) = self + .storage + .get_encoded_named_or_blank_node(graph_name.into())? + { self.storage.contains_named_graph(graph_name) } else { Ok(false) @@ -566,7 +566,7 @@ impl Store { &self, graph_name: impl Into>, ) -> Result { - let graph_name = self.encode_named_or_blank_node(graph_name.into())?; + let graph_name = self.storage.encode_named_or_blank_node(graph_name.into())?; self.storage.insert_named_graph(graph_name) } @@ -592,7 +592,7 @@ impl Store { &self, graph_name: impl Into>, ) -> Result<(), io::Error> { - if let Some(graph_name) = self.get_encoded_graph_name(graph_name.into())? { + if let Some(graph_name) = self.storage.get_encoded_graph_name(graph_name.into())? { self.storage.clear_graph(graph_name) } else { Ok(()) @@ -623,7 +623,10 @@ impl Store { &self, graph_name: impl Into>, ) -> Result { - if let Some(graph_name) = self.get_encoded_named_or_blank_node(graph_name.into())? { + if let Some(graph_name) = self + .storage + .get_encoded_named_or_blank_node(graph_name.into())? + { self.storage.remove_named_graph(graph_name) } else { Ok(false) @@ -661,28 +664,6 @@ impl fmt::Display for Store { } } -impl StrEncodingAware for Store { - type Error = io::Error; -} - -impl StrLookup for Store { - fn get_str(&self, id: StrHash) -> Result, io::Error> { - self.storage.get_str(id) - } - - fn get_str_id(&self, value: &str) -> Result, io::Error> { - self.storage.get_str_id(value) - } -} - -impl<'a> StrContainer for &'a Store { - fn insert_str(&self, value: &str) -> Result { - let key = StrHash::new(value); - self.storage.insert_str(key, value)?; - Ok(key) - } -} - /// Allows inserting and deleting quads during an ACID transaction with the [`Store`]. pub struct Transaction<'a> { storage: StorageTransaction<'a>, @@ -792,7 +773,7 @@ impl Transaction<'_> { &self, quad: impl Into>, ) -> Result { - let quad = self.encode_quad(quad.into())?; + let quad = self.storage.encode_quad(quad.into())?; self.storage.insert(&quad) } @@ -803,7 +784,7 @@ impl Transaction<'_> { &self, quad: impl Into>, ) -> Result { - if let Some(quad) = self.get_encoded_quad(quad.into())? { + if let Some(quad) = self.storage.get_encoded_quad(quad.into())? { self.storage.remove(&quad) } else { Ok(false) @@ -817,33 +798,11 @@ impl Transaction<'_> { &self, graph_name: impl Into>, ) -> Result { - let graph_name = self.encode_named_or_blank_node(graph_name.into())?; + let graph_name = self.storage.encode_named_or_blank_node(graph_name.into())?; self.storage.insert_named_graph(graph_name) } } -impl<'a> StrEncodingAware for &'a Transaction<'a> { - type Error = UnabortableTransactionError; -} - -impl<'a> StrLookup for &'a Transaction<'a> { - fn get_str(&self, id: StrHash) -> Result, UnabortableTransactionError> { - self.storage.get_str(id) - } - - fn get_str_id(&self, value: &str) -> Result, UnabortableTransactionError> { - self.storage.get_str_id(value) - } -} - -impl<'a> StrContainer for &'a Transaction<'a> { - fn insert_str(&self, value: &str) -> Result { - let key = StrHash::new(value); - self.storage.insert_str(key, value)?; - Ok(key) - } -} - /// An iterator returning the quads contained in a [`Store`]. pub struct QuadIter { inner: QuadIterInner, @@ -864,7 +823,7 @@ impl Iterator for QuadIter { fn next(&mut self) -> Option> { match &mut self.inner { QuadIterInner::Quads { iter, store } => Some(match iter.next()? { - Ok(quad) => store.decode_quad(&quad).map_err(|e| e.into()), + Ok(quad) => store.storage.decode_quad(&quad).map_err(|e| e.into()), Err(error) => Err(error), }), QuadIterInner::Error(iter) => iter.next().map(Err), @@ -884,9 +843,9 @@ impl Iterator for GraphNameIter { fn next(&mut self) -> Option> { Some( - self.iter - .next()? - .and_then(|graph_name| Ok(self.store.decode_named_or_blank_node(graph_name)?)), + self.iter.next()?.and_then(|graph_name| { + Ok(self.store.storage.decode_named_or_blank_node(graph_name)?) + }), ) }