Simplifies string storage traits

pull/171/head
Tpt 3 years ago
parent 0efc5b4654
commit 68aab2563c
  1. 41
      lib/src/sparql/dataset.rs
  2. 38
      lib/src/storage/binary_encoder.rs
  3. 50
      lib/src/storage/mod.rs
  4. 32
      lib/src/storage/numeric_encoder.rs
  5. 83
      lib/src/store.rs

@ -1,10 +1,11 @@
use crate::sparql::algebra::QueryDataset;
use crate::sparql::EvaluationError;
use crate::storage::numeric_encoder::{
EncodedQuad, EncodedTerm, ReadEncoder, StrContainer, StrEncodingAware, StrHash, StrLookup,
EncodedQuad, EncodedTerm, ReadEncoder, StrContainer, StrHash, StrLookup,
};
use crate::storage::Storage;
use std::cell::RefCell;
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::iter::empty;
@ -154,40 +155,34 @@ impl DatasetView {
}
}
impl StrEncodingAware for DatasetView {
impl StrLookup for DatasetView {
type Error = EvaluationError;
}
impl StrLookup for DatasetView {
fn get_str(&self, id: StrHash) -> Result<Option<String>, EvaluationError> {
Ok(if let Some(value) = self.extra.borrow().get(&id) {
fn get_str(&self, key: StrHash) -> Result<Option<String>, EvaluationError> {
Ok(if let Some(value) = self.extra.borrow().get(&key) {
Some(value.clone())
} else {
self.storage.get_str(id)?
self.storage.get_str(key)?
})
}
fn get_str_id(&self, value: &str) -> Result<Option<StrHash>, EvaluationError> {
let id = StrHash::new(value);
Ok(if self.extra.borrow().contains_key(&id) {
Some(id)
} else {
self.storage.get_str_id(value)?
})
fn contains_str(&self, key: StrHash) -> Result<bool, EvaluationError> {
Ok(self.extra.borrow().contains_key(&key) || self.storage.contains_str(key)?)
}
}
impl StrContainer for DatasetView {
fn insert_str(&self, value: &str) -> Result<StrHash, EvaluationError> {
if let Some(hash) = self.storage.get_str_id(value)? {
Ok(hash)
fn insert_str(&self, key: StrHash, value: &str) -> Result<bool, EvaluationError> {
if self.storage.contains_str(key)? {
Ok(false)
} else {
let hash = StrHash::new(value);
self.extra
.borrow_mut()
.entry(hash)
.or_insert_with(|| value.to_owned());
Ok(hash)
match self.extra.borrow_mut().entry(key) {
Entry::Occupied(_) => Ok(false),
Entry::Vacant(entry) => {
entry.insert(value.to_owned());
Ok(true)
}
}
}
}
}

@ -628,43 +628,37 @@ pub fn write_term(sink: &mut Vec<u8>, term: EncodedTerm) {
mod tests {
use super::*;
use crate::storage::numeric_encoder::*;
use std::cell::RefCell;
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::convert::Infallible;
use std::sync::RwLock;
#[derive(Default)]
struct MemoryStrStore {
id2str: RwLock<HashMap<StrHash, String>>,
id2str: RefCell<HashMap<StrHash, String>>,
}
impl StrEncodingAware for MemoryStrStore {
impl StrLookup for MemoryStrStore {
type Error = Infallible;
}
impl StrLookup for MemoryStrStore {
fn get_str(&self, id: StrHash) -> Result<Option<String>, Infallible> {
Ok(self.id2str.read().unwrap().get(&id).cloned())
fn get_str(&self, key: StrHash) -> Result<Option<String>, Infallible> {
Ok(self.id2str.borrow().get(&key).cloned())
}
fn get_str_id(&self, value: &str) -> Result<Option<StrHash>, Infallible> {
let id = StrHash::new(value);
Ok(if self.id2str.read().unwrap().contains_key(&id) {
Some(id)
} else {
None
})
fn contains_str(&self, key: StrHash) -> Result<bool, Infallible> {
Ok(self.id2str.borrow().contains_key(&key))
}
}
impl StrContainer for MemoryStrStore {
fn insert_str(&self, value: &str) -> Result<StrHash, Infallible> {
let key = StrHash::new(value);
self.id2str
.write()
.unwrap()
.entry(key)
.or_insert_with(|| value.to_owned());
Ok(key)
fn insert_str(&self, key: StrHash, value: &str) -> Result<bool, Infallible> {
match self.id2str.borrow_mut().entry(key) {
Entry::Occupied(_) => Ok(false),
Entry::Vacant(entry) => {
entry.insert(value.to_owned());
Ok(true)
}
}
}
}

@ -18,9 +18,7 @@ use crate::storage::binary_encoder::{
LATEST_STORAGE_VERSION, WRITTEN_TERM_MAX_SIZE,
};
use crate::storage::io::StoreOrParseError;
use crate::storage::numeric_encoder::{
EncodedQuad, EncodedTerm, StrContainer, StrEncodingAware, StrHash, StrLookup,
};
use crate::storage::numeric_encoder::{EncodedQuad, EncodedTerm, StrContainer, StrHash, StrLookup};
mod binary_encoder;
pub(crate) mod io;
@ -991,57 +989,39 @@ impl<T> From<ConflictableTransactionError<T>> for Sled2ConflictableTransactionEr
}
}
impl StrEncodingAware for Storage {
impl StrLookup for Storage {
type Error = std::io::Error;
}
impl StrLookup for Storage {
fn get_str(&self, id: StrHash) -> Result<Option<String>, std::io::Error> {
self.get_str(id)
fn get_str(&self, key: StrHash) -> Result<Option<String>, std::io::Error> {
self.get_str(key)
}
fn get_str_id(&self, value: &str) -> Result<Option<StrHash>, std::io::Error> {
let key = StrHash::new(value);
Ok(if self.contains_str(key)? {
Some(key)
} else {
None
})
fn contains_str(&self, key: StrHash) -> Result<bool, std::io::Error> {
self.contains_str(key)
}
}
impl StrContainer for Storage {
fn insert_str(&self, value: &str) -> Result<StrHash, std::io::Error> {
let key = StrHash::new(value);
self.insert_str(key, value)?;
Ok(key)
fn insert_str(&self, key: StrHash, value: &str) -> Result<bool, std::io::Error> {
self.insert_str(key, value)
}
}
impl<'a> StrEncodingAware for StorageTransaction<'a> {
impl<'a> StrLookup for StorageTransaction<'a> {
type Error = UnabortableTransactionError;
}
impl<'a> StrLookup for StorageTransaction<'a> {
fn get_str(&self, id: StrHash) -> Result<Option<String>, UnabortableTransactionError> {
self.get_str(id)
fn get_str(&self, key: StrHash) -> Result<Option<String>, UnabortableTransactionError> {
self.get_str(key)
}
fn get_str_id(&self, value: &str) -> Result<Option<StrHash>, UnabortableTransactionError> {
let key = StrHash::new(value);
Ok(if self.contains_str(key)? {
Some(key)
} else {
None
})
fn contains_str(&self, key: StrHash) -> Result<bool, UnabortableTransactionError> {
self.contains_str(key)
}
}
impl<'a> StrContainer for StorageTransaction<'a> {
fn insert_str(&self, value: &str) -> Result<StrHash, UnabortableTransactionError> {
let key = StrHash::new(value);
self.insert_str(key, value)?;
Ok(key)
fn insert_str(&self, key: StrHash, value: &str) -> Result<bool, UnabortableTransactionError> {
self.insert_str(key, value)
}
}

@ -486,27 +486,20 @@ impl EncodedQuad {
}
}
pub(crate) trait StrEncodingAware {
//TODO: rename
pub(crate) trait StrLookup {
type Error: Error + Into<EvaluationError> + 'static;
}
impl<'a, T: StrEncodingAware> StrEncodingAware for &'a T {
type Error = T::Error;
}
pub(crate) trait StrLookup: StrEncodingAware {
fn get_str(&self, id: StrHash) -> Result<Option<String>, Self::Error>;
fn get_str(&self, key: StrHash) -> Result<Option<String>, Self::Error>;
fn get_str_id(&self, value: &str) -> Result<Option<StrHash>, Self::Error>;
fn contains_str(&self, key: StrHash) -> Result<bool, Self::Error>;
}
pub(crate) trait StrContainer: StrEncodingAware {
fn insert_str(&self, value: &str) -> Result<StrHash, Self::Error>;
pub(crate) trait StrContainer: StrLookup {
fn insert_str(&self, key: StrHash, value: &str) -> Result<bool, Self::Error>;
}
/// Tries to encode a term based on the existing strings (does not insert anything)
pub(crate) trait ReadEncoder: StrEncodingAware {
pub(crate) trait ReadEncoder: StrLookup {
fn get_encoded_named_node(
&self,
named_node: NamedNodeRef<'_>,
@ -738,12 +731,17 @@ pub(crate) trait ReadEncoder: StrEncodingAware {
impl<S: StrLookup> ReadEncoder for S {
fn get_encoded_str(&self, value: &str) -> Result<Option<StrHash>, Self::Error> {
self.get_str_id(value)
let key = StrHash::new(value);
Ok(if self.contains_str(key)? {
Some(key)
} else {
None
})
}
}
/// Encodes a term and insert strings if needed
pub(crate) trait WriteEncoder: StrEncodingAware {
pub(crate) trait WriteEncoder: StrContainer {
fn encode_named_node(&self, named_node: NamedNodeRef<'_>) -> Result<EncodedTerm, Self::Error> {
self.encode_rio_named_node(named_node.into())
}
@ -999,7 +997,9 @@ pub(crate) trait WriteEncoder: StrEncodingAware {
impl<S: StrContainer> WriteEncoder for S {
fn encode_str(&self, value: &str) -> Result<StrHash, Self::Error> {
self.insert_str(value)
let key = StrHash::new(value);
self.insert_str(key, value)?;
Ok(key)
}
}

@ -42,10 +42,7 @@ use crate::sparql::{
UpdateOptions,
};
use crate::storage::io::{dump_dataset, dump_graph, load_dataset, load_graph};
use crate::storage::numeric_encoder::{
Decoder, EncodedTerm, ReadEncoder, StrContainer, StrEncodingAware, StrHash, StrLookup,
WriteEncoder,
};
use crate::storage::numeric_encoder::{Decoder, EncodedTerm, ReadEncoder, WriteEncoder};
pub use crate::storage::ConflictableTransactionError;
pub use crate::storage::TransactionError;
pub use crate::storage::UnabortableTransactionError;
@ -247,7 +244,7 @@ impl Store {
/// Checks if this store contains a given quad
pub fn contains<'a>(&self, quad: impl Into<QuadRef<'a>>) -> Result<bool, io::Error> {
if let Some(quad) = self.get_encoded_quad(quad.into())? {
if let Some(quad) = self.storage.get_encoded_quad(quad.into())? {
self.storage.contains(&quad)
} else {
Ok(false)
@ -431,7 +428,7 @@ impl Store {
/// It might leave the store in a bad state if a crash happens during the insertion.
/// Use a (memory greedy) [transaction](Store::transaction()) if you do not want that.
pub fn insert<'a>(&self, quad: impl Into<QuadRef<'a>>) -> Result<bool, io::Error> {
let quad = self.encode_quad(quad.into())?;
let quad = self.storage.encode_quad(quad.into())?;
self.storage.insert(&quad)
}
@ -443,7 +440,7 @@ impl Store {
/// It might leave the store in a bad state if a crash happens during the removal.
/// Use a (memory greedy) [transaction](Store::transaction()) if you do not want that.
pub fn remove<'a>(&self, quad: impl Into<QuadRef<'a>>) -> Result<bool, io::Error> {
if let Some(quad) = self.get_encoded_quad(quad.into())? {
if let Some(quad) = self.storage.get_encoded_quad(quad.into())? {
self.storage.remove(&quad)
} else {
Ok(false)
@ -540,7 +537,10 @@ impl Store {
&self,
graph_name: impl Into<NamedOrBlankNodeRef<'a>>,
) -> Result<bool, io::Error> {
if let Some(graph_name) = self.get_encoded_named_or_blank_node(graph_name.into())? {
if let Some(graph_name) = self
.storage
.get_encoded_named_or_blank_node(graph_name.into())?
{
self.storage.contains_named_graph(graph_name)
} else {
Ok(false)
@ -566,7 +566,7 @@ impl Store {
&self,
graph_name: impl Into<NamedOrBlankNodeRef<'a>>,
) -> Result<bool, io::Error> {
let graph_name = self.encode_named_or_blank_node(graph_name.into())?;
let graph_name = self.storage.encode_named_or_blank_node(graph_name.into())?;
self.storage.insert_named_graph(graph_name)
}
@ -592,7 +592,7 @@ impl Store {
&self,
graph_name: impl Into<GraphNameRef<'a>>,
) -> Result<(), io::Error> {
if let Some(graph_name) = self.get_encoded_graph_name(graph_name.into())? {
if let Some(graph_name) = self.storage.get_encoded_graph_name(graph_name.into())? {
self.storage.clear_graph(graph_name)
} else {
Ok(())
@ -623,7 +623,10 @@ impl Store {
&self,
graph_name: impl Into<NamedOrBlankNodeRef<'a>>,
) -> Result<bool, io::Error> {
if let Some(graph_name) = self.get_encoded_named_or_blank_node(graph_name.into())? {
if let Some(graph_name) = self
.storage
.get_encoded_named_or_blank_node(graph_name.into())?
{
self.storage.remove_named_graph(graph_name)
} else {
Ok(false)
@ -661,28 +664,6 @@ impl fmt::Display for Store {
}
}
impl StrEncodingAware for Store {
type Error = io::Error;
}
impl StrLookup for Store {
fn get_str(&self, id: StrHash) -> Result<Option<String>, io::Error> {
self.storage.get_str(id)
}
fn get_str_id(&self, value: &str) -> Result<Option<StrHash>, io::Error> {
self.storage.get_str_id(value)
}
}
impl<'a> StrContainer for &'a Store {
fn insert_str(&self, value: &str) -> Result<StrHash, io::Error> {
let key = StrHash::new(value);
self.storage.insert_str(key, value)?;
Ok(key)
}
}
/// Allows inserting and deleting quads during an ACID transaction with the [`Store`].
pub struct Transaction<'a> {
storage: StorageTransaction<'a>,
@ -792,7 +773,7 @@ impl Transaction<'_> {
&self,
quad: impl Into<QuadRef<'a>>,
) -> Result<bool, UnabortableTransactionError> {
let quad = self.encode_quad(quad.into())?;
let quad = self.storage.encode_quad(quad.into())?;
self.storage.insert(&quad)
}
@ -803,7 +784,7 @@ impl Transaction<'_> {
&self,
quad: impl Into<QuadRef<'a>>,
) -> Result<bool, UnabortableTransactionError> {
if let Some(quad) = self.get_encoded_quad(quad.into())? {
if let Some(quad) = self.storage.get_encoded_quad(quad.into())? {
self.storage.remove(&quad)
} else {
Ok(false)
@ -817,33 +798,11 @@ impl Transaction<'_> {
&self,
graph_name: impl Into<NamedOrBlankNodeRef<'a>>,
) -> Result<bool, UnabortableTransactionError> {
let graph_name = self.encode_named_or_blank_node(graph_name.into())?;
let graph_name = self.storage.encode_named_or_blank_node(graph_name.into())?;
self.storage.insert_named_graph(graph_name)
}
}
impl<'a> StrEncodingAware for &'a Transaction<'a> {
type Error = UnabortableTransactionError;
}
impl<'a> StrLookup for &'a Transaction<'a> {
fn get_str(&self, id: StrHash) -> Result<Option<String>, UnabortableTransactionError> {
self.storage.get_str(id)
}
fn get_str_id(&self, value: &str) -> Result<Option<StrHash>, UnabortableTransactionError> {
self.storage.get_str_id(value)
}
}
impl<'a> StrContainer for &'a Transaction<'a> {
fn insert_str(&self, value: &str) -> Result<StrHash, UnabortableTransactionError> {
let key = StrHash::new(value);
self.storage.insert_str(key, value)?;
Ok(key)
}
}
/// An iterator returning the quads contained in a [`Store`].
pub struct QuadIter {
inner: QuadIterInner,
@ -864,7 +823,7 @@ impl Iterator for QuadIter {
fn next(&mut self) -> Option<Result<Quad, io::Error>> {
match &mut self.inner {
QuadIterInner::Quads { iter, store } => Some(match iter.next()? {
Ok(quad) => store.decode_quad(&quad).map_err(|e| e.into()),
Ok(quad) => store.storage.decode_quad(&quad).map_err(|e| e.into()),
Err(error) => Err(error),
}),
QuadIterInner::Error(iter) => iter.next().map(Err),
@ -884,9 +843,9 @@ impl Iterator for GraphNameIter {
fn next(&mut self) -> Option<Result<NamedOrBlankNode, io::Error>> {
Some(
self.iter
.next()?
.and_then(|graph_name| Ok(self.store.decode_named_or_blank_node(graph_name)?)),
self.iter.next()?.and_then(|graph_name| {
Ok(self.store.storage.decode_named_or_blank_node(graph_name)?)
}),
)
}

Loading…
Cancel
Save