Simplifies string storage traits

pull/171/head
Tpt 4 years ago
parent 0efc5b4654
commit 68aab2563c
  1. 41
      lib/src/sparql/dataset.rs
  2. 38
      lib/src/storage/binary_encoder.rs
  3. 50
      lib/src/storage/mod.rs
  4. 32
      lib/src/storage/numeric_encoder.rs
  5. 83
      lib/src/store.rs

@ -1,10 +1,11 @@
use crate::sparql::algebra::QueryDataset; use crate::sparql::algebra::QueryDataset;
use crate::sparql::EvaluationError; use crate::sparql::EvaluationError;
use crate::storage::numeric_encoder::{ use crate::storage::numeric_encoder::{
EncodedQuad, EncodedTerm, ReadEncoder, StrContainer, StrEncodingAware, StrHash, StrLookup, EncodedQuad, EncodedTerm, ReadEncoder, StrContainer, StrHash, StrLookup,
}; };
use crate::storage::Storage; use crate::storage::Storage;
use std::cell::RefCell; use std::cell::RefCell;
use std::collections::hash_map::Entry;
use std::collections::HashMap; use std::collections::HashMap;
use std::iter::empty; use std::iter::empty;
@ -154,40 +155,34 @@ impl DatasetView {
} }
} }
impl StrEncodingAware for DatasetView { impl StrLookup for DatasetView {
type Error = EvaluationError; type Error = EvaluationError;
}
impl StrLookup for DatasetView { fn get_str(&self, key: StrHash) -> Result<Option<String>, EvaluationError> {
fn get_str(&self, id: StrHash) -> Result<Option<String>, EvaluationError> { Ok(if let Some(value) = self.extra.borrow().get(&key) {
Ok(if let Some(value) = self.extra.borrow().get(&id) {
Some(value.clone()) Some(value.clone())
} else { } else {
self.storage.get_str(id)? self.storage.get_str(key)?
}) })
} }
fn get_str_id(&self, value: &str) -> Result<Option<StrHash>, EvaluationError> { fn contains_str(&self, key: StrHash) -> Result<bool, EvaluationError> {
let id = StrHash::new(value); Ok(self.extra.borrow().contains_key(&key) || self.storage.contains_str(key)?)
Ok(if self.extra.borrow().contains_key(&id) {
Some(id)
} else {
self.storage.get_str_id(value)?
})
} }
} }
impl StrContainer for DatasetView { impl StrContainer for DatasetView {
fn insert_str(&self, value: &str) -> Result<StrHash, EvaluationError> { fn insert_str(&self, key: StrHash, value: &str) -> Result<bool, EvaluationError> {
if let Some(hash) = self.storage.get_str_id(value)? { if self.storage.contains_str(key)? {
Ok(hash) Ok(false)
} else { } else {
let hash = StrHash::new(value); match self.extra.borrow_mut().entry(key) {
self.extra Entry::Occupied(_) => Ok(false),
.borrow_mut() Entry::Vacant(entry) => {
.entry(hash) entry.insert(value.to_owned());
.or_insert_with(|| value.to_owned()); Ok(true)
Ok(hash) }
}
} }
} }
} }

@ -628,43 +628,37 @@ pub fn write_term(sink: &mut Vec<u8>, term: EncodedTerm) {
mod tests { mod tests {
use super::*; use super::*;
use crate::storage::numeric_encoder::*; use crate::storage::numeric_encoder::*;
use std::cell::RefCell;
use std::collections::hash_map::Entry;
use std::collections::HashMap; use std::collections::HashMap;
use std::convert::Infallible; use std::convert::Infallible;
use std::sync::RwLock;
#[derive(Default)] #[derive(Default)]
struct MemoryStrStore { struct MemoryStrStore {
id2str: RwLock<HashMap<StrHash, String>>, id2str: RefCell<HashMap<StrHash, String>>,
} }
impl StrEncodingAware for MemoryStrStore { impl StrLookup for MemoryStrStore {
type Error = Infallible; type Error = Infallible;
}
impl StrLookup for MemoryStrStore { fn get_str(&self, key: StrHash) -> Result<Option<String>, Infallible> {
fn get_str(&self, id: StrHash) -> Result<Option<String>, Infallible> { Ok(self.id2str.borrow().get(&key).cloned())
Ok(self.id2str.read().unwrap().get(&id).cloned())
} }
fn get_str_id(&self, value: &str) -> Result<Option<StrHash>, Infallible> { fn contains_str(&self, key: StrHash) -> Result<bool, Infallible> {
let id = StrHash::new(value); Ok(self.id2str.borrow().contains_key(&key))
Ok(if self.id2str.read().unwrap().contains_key(&id) {
Some(id)
} else {
None
})
} }
} }
impl StrContainer for MemoryStrStore { impl StrContainer for MemoryStrStore {
fn insert_str(&self, value: &str) -> Result<StrHash, Infallible> { fn insert_str(&self, key: StrHash, value: &str) -> Result<bool, Infallible> {
let key = StrHash::new(value); match self.id2str.borrow_mut().entry(key) {
self.id2str Entry::Occupied(_) => Ok(false),
.write() Entry::Vacant(entry) => {
.unwrap() entry.insert(value.to_owned());
.entry(key) Ok(true)
.or_insert_with(|| value.to_owned()); }
Ok(key) }
} }
} }

@ -18,9 +18,7 @@ use crate::storage::binary_encoder::{
LATEST_STORAGE_VERSION, WRITTEN_TERM_MAX_SIZE, LATEST_STORAGE_VERSION, WRITTEN_TERM_MAX_SIZE,
}; };
use crate::storage::io::StoreOrParseError; use crate::storage::io::StoreOrParseError;
use crate::storage::numeric_encoder::{ use crate::storage::numeric_encoder::{EncodedQuad, EncodedTerm, StrContainer, StrHash, StrLookup};
EncodedQuad, EncodedTerm, StrContainer, StrEncodingAware, StrHash, StrLookup,
};
mod binary_encoder; mod binary_encoder;
pub(crate) mod io; pub(crate) mod io;
@ -991,57 +989,39 @@ impl<T> From<ConflictableTransactionError<T>> for Sled2ConflictableTransactionEr
} }
} }
impl StrEncodingAware for Storage { impl StrLookup for Storage {
type Error = std::io::Error; type Error = std::io::Error;
}
impl StrLookup for Storage { fn get_str(&self, key: StrHash) -> Result<Option<String>, std::io::Error> {
fn get_str(&self, id: StrHash) -> Result<Option<String>, std::io::Error> { self.get_str(key)
self.get_str(id)
} }
fn get_str_id(&self, value: &str) -> Result<Option<StrHash>, std::io::Error> { fn contains_str(&self, key: StrHash) -> Result<bool, std::io::Error> {
let key = StrHash::new(value); self.contains_str(key)
Ok(if self.contains_str(key)? {
Some(key)
} else {
None
})
} }
} }
impl StrContainer for Storage { impl StrContainer for Storage {
fn insert_str(&self, value: &str) -> Result<StrHash, std::io::Error> { fn insert_str(&self, key: StrHash, value: &str) -> Result<bool, std::io::Error> {
let key = StrHash::new(value); self.insert_str(key, value)
self.insert_str(key, value)?;
Ok(key)
} }
} }
impl<'a> StrEncodingAware for StorageTransaction<'a> { impl<'a> StrLookup for StorageTransaction<'a> {
type Error = UnabortableTransactionError; type Error = UnabortableTransactionError;
}
impl<'a> StrLookup for StorageTransaction<'a> { fn get_str(&self, key: StrHash) -> Result<Option<String>, UnabortableTransactionError> {
fn get_str(&self, id: StrHash) -> Result<Option<String>, UnabortableTransactionError> { self.get_str(key)
self.get_str(id)
} }
fn get_str_id(&self, value: &str) -> Result<Option<StrHash>, UnabortableTransactionError> { fn contains_str(&self, key: StrHash) -> Result<bool, UnabortableTransactionError> {
let key = StrHash::new(value); self.contains_str(key)
Ok(if self.contains_str(key)? {
Some(key)
} else {
None
})
} }
} }
impl<'a> StrContainer for StorageTransaction<'a> { impl<'a> StrContainer for StorageTransaction<'a> {
fn insert_str(&self, value: &str) -> Result<StrHash, UnabortableTransactionError> { fn insert_str(&self, key: StrHash, value: &str) -> Result<bool, UnabortableTransactionError> {
let key = StrHash::new(value); self.insert_str(key, value)
self.insert_str(key, value)?;
Ok(key)
} }
} }

@ -486,27 +486,20 @@ impl EncodedQuad {
} }
} }
pub(crate) trait StrEncodingAware { pub(crate) trait StrLookup {
//TODO: rename
type Error: Error + Into<EvaluationError> + 'static; type Error: Error + Into<EvaluationError> + 'static;
}
impl<'a, T: StrEncodingAware> StrEncodingAware for &'a T {
type Error = T::Error;
}
pub(crate) trait StrLookup: StrEncodingAware { fn get_str(&self, key: StrHash) -> Result<Option<String>, Self::Error>;
fn get_str(&self, id: StrHash) -> Result<Option<String>, Self::Error>;
fn get_str_id(&self, value: &str) -> Result<Option<StrHash>, Self::Error>; fn contains_str(&self, key: StrHash) -> Result<bool, Self::Error>;
} }
pub(crate) trait StrContainer: StrEncodingAware { pub(crate) trait StrContainer: StrLookup {
fn insert_str(&self, value: &str) -> Result<StrHash, Self::Error>; fn insert_str(&self, key: StrHash, value: &str) -> Result<bool, Self::Error>;
} }
/// Tries to encode a term based on the existing strings (does not insert anything) /// Tries to encode a term based on the existing strings (does not insert anything)
pub(crate) trait ReadEncoder: StrEncodingAware { pub(crate) trait ReadEncoder: StrLookup {
fn get_encoded_named_node( fn get_encoded_named_node(
&self, &self,
named_node: NamedNodeRef<'_>, named_node: NamedNodeRef<'_>,
@ -738,12 +731,17 @@ pub(crate) trait ReadEncoder: StrEncodingAware {
impl<S: StrLookup> ReadEncoder for S { impl<S: StrLookup> ReadEncoder for S {
fn get_encoded_str(&self, value: &str) -> Result<Option<StrHash>, Self::Error> { fn get_encoded_str(&self, value: &str) -> Result<Option<StrHash>, Self::Error> {
self.get_str_id(value) let key = StrHash::new(value);
Ok(if self.contains_str(key)? {
Some(key)
} else {
None
})
} }
} }
/// Encodes a term and insert strings if needed /// Encodes a term and insert strings if needed
pub(crate) trait WriteEncoder: StrEncodingAware { pub(crate) trait WriteEncoder: StrContainer {
fn encode_named_node(&self, named_node: NamedNodeRef<'_>) -> Result<EncodedTerm, Self::Error> { fn encode_named_node(&self, named_node: NamedNodeRef<'_>) -> Result<EncodedTerm, Self::Error> {
self.encode_rio_named_node(named_node.into()) self.encode_rio_named_node(named_node.into())
} }
@ -999,7 +997,9 @@ pub(crate) trait WriteEncoder: StrEncodingAware {
impl<S: StrContainer> WriteEncoder for S { impl<S: StrContainer> WriteEncoder for S {
fn encode_str(&self, value: &str) -> Result<StrHash, Self::Error> { fn encode_str(&self, value: &str) -> Result<StrHash, Self::Error> {
self.insert_str(value) let key = StrHash::new(value);
self.insert_str(key, value)?;
Ok(key)
} }
} }

@ -42,10 +42,7 @@ use crate::sparql::{
UpdateOptions, UpdateOptions,
}; };
use crate::storage::io::{dump_dataset, dump_graph, load_dataset, load_graph}; use crate::storage::io::{dump_dataset, dump_graph, load_dataset, load_graph};
use crate::storage::numeric_encoder::{ use crate::storage::numeric_encoder::{Decoder, EncodedTerm, ReadEncoder, WriteEncoder};
Decoder, EncodedTerm, ReadEncoder, StrContainer, StrEncodingAware, StrHash, StrLookup,
WriteEncoder,
};
pub use crate::storage::ConflictableTransactionError; pub use crate::storage::ConflictableTransactionError;
pub use crate::storage::TransactionError; pub use crate::storage::TransactionError;
pub use crate::storage::UnabortableTransactionError; pub use crate::storage::UnabortableTransactionError;
@ -247,7 +244,7 @@ impl Store {
/// Checks if this store contains a given quad /// Checks if this store contains a given quad
pub fn contains<'a>(&self, quad: impl Into<QuadRef<'a>>) -> Result<bool, io::Error> { pub fn contains<'a>(&self, quad: impl Into<QuadRef<'a>>) -> Result<bool, io::Error> {
if let Some(quad) = self.get_encoded_quad(quad.into())? { if let Some(quad) = self.storage.get_encoded_quad(quad.into())? {
self.storage.contains(&quad) self.storage.contains(&quad)
} else { } else {
Ok(false) Ok(false)
@ -431,7 +428,7 @@ impl Store {
/// It might leave the store in a bad state if a crash happens during the insertion. /// It might leave the store in a bad state if a crash happens during the insertion.
/// Use a (memory greedy) [transaction](Store::transaction()) if you do not want that. /// Use a (memory greedy) [transaction](Store::transaction()) if you do not want that.
pub fn insert<'a>(&self, quad: impl Into<QuadRef<'a>>) -> Result<bool, io::Error> { pub fn insert<'a>(&self, quad: impl Into<QuadRef<'a>>) -> Result<bool, io::Error> {
let quad = self.encode_quad(quad.into())?; let quad = self.storage.encode_quad(quad.into())?;
self.storage.insert(&quad) self.storage.insert(&quad)
} }
@ -443,7 +440,7 @@ impl Store {
/// It might leave the store in a bad state if a crash happens during the removal. /// It might leave the store in a bad state if a crash happens during the removal.
/// Use a (memory greedy) [transaction](Store::transaction()) if you do not want that. /// Use a (memory greedy) [transaction](Store::transaction()) if you do not want that.
pub fn remove<'a>(&self, quad: impl Into<QuadRef<'a>>) -> Result<bool, io::Error> { pub fn remove<'a>(&self, quad: impl Into<QuadRef<'a>>) -> Result<bool, io::Error> {
if let Some(quad) = self.get_encoded_quad(quad.into())? { if let Some(quad) = self.storage.get_encoded_quad(quad.into())? {
self.storage.remove(&quad) self.storage.remove(&quad)
} else { } else {
Ok(false) Ok(false)
@ -540,7 +537,10 @@ impl Store {
&self, &self,
graph_name: impl Into<NamedOrBlankNodeRef<'a>>, graph_name: impl Into<NamedOrBlankNodeRef<'a>>,
) -> Result<bool, io::Error> { ) -> Result<bool, io::Error> {
if let Some(graph_name) = self.get_encoded_named_or_blank_node(graph_name.into())? { if let Some(graph_name) = self
.storage
.get_encoded_named_or_blank_node(graph_name.into())?
{
self.storage.contains_named_graph(graph_name) self.storage.contains_named_graph(graph_name)
} else { } else {
Ok(false) Ok(false)
@ -566,7 +566,7 @@ impl Store {
&self, &self,
graph_name: impl Into<NamedOrBlankNodeRef<'a>>, graph_name: impl Into<NamedOrBlankNodeRef<'a>>,
) -> Result<bool, io::Error> { ) -> Result<bool, io::Error> {
let graph_name = self.encode_named_or_blank_node(graph_name.into())?; let graph_name = self.storage.encode_named_or_blank_node(graph_name.into())?;
self.storage.insert_named_graph(graph_name) self.storage.insert_named_graph(graph_name)
} }
@ -592,7 +592,7 @@ impl Store {
&self, &self,
graph_name: impl Into<GraphNameRef<'a>>, graph_name: impl Into<GraphNameRef<'a>>,
) -> Result<(), io::Error> { ) -> Result<(), io::Error> {
if let Some(graph_name) = self.get_encoded_graph_name(graph_name.into())? { if let Some(graph_name) = self.storage.get_encoded_graph_name(graph_name.into())? {
self.storage.clear_graph(graph_name) self.storage.clear_graph(graph_name)
} else { } else {
Ok(()) Ok(())
@ -623,7 +623,10 @@ impl Store {
&self, &self,
graph_name: impl Into<NamedOrBlankNodeRef<'a>>, graph_name: impl Into<NamedOrBlankNodeRef<'a>>,
) -> Result<bool, io::Error> { ) -> Result<bool, io::Error> {
if let Some(graph_name) = self.get_encoded_named_or_blank_node(graph_name.into())? { if let Some(graph_name) = self
.storage
.get_encoded_named_or_blank_node(graph_name.into())?
{
self.storage.remove_named_graph(graph_name) self.storage.remove_named_graph(graph_name)
} else { } else {
Ok(false) Ok(false)
@ -661,28 +664,6 @@ impl fmt::Display for Store {
} }
} }
impl StrEncodingAware for Store {
type Error = io::Error;
}
impl StrLookup for Store {
fn get_str(&self, id: StrHash) -> Result<Option<String>, io::Error> {
self.storage.get_str(id)
}
fn get_str_id(&self, value: &str) -> Result<Option<StrHash>, io::Error> {
self.storage.get_str_id(value)
}
}
impl<'a> StrContainer for &'a Store {
fn insert_str(&self, value: &str) -> Result<StrHash, io::Error> {
let key = StrHash::new(value);
self.storage.insert_str(key, value)?;
Ok(key)
}
}
/// Allows inserting and deleting quads during an ACID transaction with the [`Store`]. /// Allows inserting and deleting quads during an ACID transaction with the [`Store`].
pub struct Transaction<'a> { pub struct Transaction<'a> {
storage: StorageTransaction<'a>, storage: StorageTransaction<'a>,
@ -792,7 +773,7 @@ impl Transaction<'_> {
&self, &self,
quad: impl Into<QuadRef<'a>>, quad: impl Into<QuadRef<'a>>,
) -> Result<bool, UnabortableTransactionError> { ) -> Result<bool, UnabortableTransactionError> {
let quad = self.encode_quad(quad.into())?; let quad = self.storage.encode_quad(quad.into())?;
self.storage.insert(&quad) self.storage.insert(&quad)
} }
@ -803,7 +784,7 @@ impl Transaction<'_> {
&self, &self,
quad: impl Into<QuadRef<'a>>, quad: impl Into<QuadRef<'a>>,
) -> Result<bool, UnabortableTransactionError> { ) -> Result<bool, UnabortableTransactionError> {
if let Some(quad) = self.get_encoded_quad(quad.into())? { if let Some(quad) = self.storage.get_encoded_quad(quad.into())? {
self.storage.remove(&quad) self.storage.remove(&quad)
} else { } else {
Ok(false) Ok(false)
@ -817,33 +798,11 @@ impl Transaction<'_> {
&self, &self,
graph_name: impl Into<NamedOrBlankNodeRef<'a>>, graph_name: impl Into<NamedOrBlankNodeRef<'a>>,
) -> Result<bool, UnabortableTransactionError> { ) -> Result<bool, UnabortableTransactionError> {
let graph_name = self.encode_named_or_blank_node(graph_name.into())?; let graph_name = self.storage.encode_named_or_blank_node(graph_name.into())?;
self.storage.insert_named_graph(graph_name) self.storage.insert_named_graph(graph_name)
} }
} }
impl<'a> StrEncodingAware for &'a Transaction<'a> {
type Error = UnabortableTransactionError;
}
impl<'a> StrLookup for &'a Transaction<'a> {
fn get_str(&self, id: StrHash) -> Result<Option<String>, UnabortableTransactionError> {
self.storage.get_str(id)
}
fn get_str_id(&self, value: &str) -> Result<Option<StrHash>, UnabortableTransactionError> {
self.storage.get_str_id(value)
}
}
impl<'a> StrContainer for &'a Transaction<'a> {
fn insert_str(&self, value: &str) -> Result<StrHash, UnabortableTransactionError> {
let key = StrHash::new(value);
self.storage.insert_str(key, value)?;
Ok(key)
}
}
/// An iterator returning the quads contained in a [`Store`]. /// An iterator returning the quads contained in a [`Store`].
pub struct QuadIter { pub struct QuadIter {
inner: QuadIterInner, inner: QuadIterInner,
@ -864,7 +823,7 @@ impl Iterator for QuadIter {
fn next(&mut self) -> Option<Result<Quad, io::Error>> { fn next(&mut self) -> Option<Result<Quad, io::Error>> {
match &mut self.inner { match &mut self.inner {
QuadIterInner::Quads { iter, store } => Some(match iter.next()? { QuadIterInner::Quads { iter, store } => Some(match iter.next()? {
Ok(quad) => store.decode_quad(&quad).map_err(|e| e.into()), Ok(quad) => store.storage.decode_quad(&quad).map_err(|e| e.into()),
Err(error) => Err(error), Err(error) => Err(error),
}), }),
QuadIterInner::Error(iter) => iter.next().map(Err), QuadIterInner::Error(iter) => iter.next().map(Err),
@ -884,9 +843,9 @@ impl Iterator for GraphNameIter {
fn next(&mut self) -> Option<Result<NamedOrBlankNode, io::Error>> { fn next(&mut self) -> Option<Result<NamedOrBlankNode, io::Error>> {
Some( Some(
self.iter self.iter.next()?.and_then(|graph_name| {
.next()? Ok(self.store.storage.decode_named_or_blank_node(graph_name)?)
.and_then(|graph_name| Ok(self.store.decode_named_or_blank_node(graph_name)?)), }),
) )
} }

Loading…
Cancel
Save