From 8606877e337df2aea24dea2893cc11b7f495c52b Mon Sep 17 00:00:00 2001 From: Tpt Date: Sun, 6 Jun 2021 20:23:12 +0200 Subject: [PATCH] Simplifies term encoding code --- lib/src/sparql/dataset.rs | 70 +------- lib/src/storage/binary_encoder.rs | 19 +-- lib/src/storage/mod.rs | 52 +++--- lib/src/storage/numeric_encoder.rs | 258 +++++++++-------------------- 4 files changed, 115 insertions(+), 284 deletions(-) diff --git a/lib/src/sparql/dataset.rs b/lib/src/sparql/dataset.rs index 8dc9696f..a4fb2c43 100644 --- a/lib/src/sparql/dataset.rs +++ b/lib/src/sparql/dataset.rs @@ -1,10 +1,13 @@ use crate::model::TermRef; use crate::sparql::algebra::QueryDataset; use crate::sparql::EvaluationError; -use crate::storage::numeric_encoder::{EncodedQuad, EncodedTerm, StrHash, StrLookup}; +use crate::storage::numeric_encoder::{ + insert_term_values, EncodedQuad, EncodedTerm, StrHash, StrLookup, +}; use crate::storage::Storage; use std::cell::RefCell; use std::collections::HashMap; +use std::convert::Infallible; use std::iter::empty; pub(crate) struct DatasetView { @@ -139,69 +142,14 @@ impl DatasetView { pub fn encode_term<'a>(&self, term: impl Into>) -> EncodedTerm { let term = term.into(); let encoded = term.into(); - self.insert_term_values(term, &encoded); + insert_term_values::(term, &encoded, |key, value| { + self.insert_str(key, value); + Ok(()) + }) + .unwrap(); // Can not fail encoded } - fn insert_term_values(&self, term: TermRef<'_>, encoded: &EncodedTerm) { - match (term, encoded) { - (TermRef::NamedNode(node), EncodedTerm::NamedNode { iri_id }) => { - self.insert_str(iri_id, node.as_str()); - } - (TermRef::BlankNode(node), EncodedTerm::BigBlankNode { id_id }) => { - self.insert_str(id_id, node.as_str()); - } - (TermRef::Literal(literal), EncodedTerm::BigStringLiteral { value_id }) => { - self.insert_str(value_id, literal.value()); - } - ( - TermRef::Literal(literal), - EncodedTerm::SmallBigLangStringLiteral { language_id, .. }, - ) => { - if let Some(language) = literal.language() { - self.insert_str(language_id, language) - } - } - ( - TermRef::Literal(literal), - EncodedTerm::BigSmallLangStringLiteral { value_id, .. }, - ) => { - self.insert_str(value_id, literal.value()); - } - ( - TermRef::Literal(literal), - EncodedTerm::BigBigLangStringLiteral { - value_id, - language_id, - }, - ) => { - self.insert_str(value_id, literal.value()); - if let Some(language) = literal.language() { - self.insert_str(language_id, language) - } - } - (TermRef::Literal(literal), EncodedTerm::SmallTypedLiteral { datatype_id, .. }) => { - self.insert_str(datatype_id, literal.datatype().as_str()); - } - ( - TermRef::Literal(literal), - EncodedTerm::BigTypedLiteral { - value_id, - datatype_id, - }, - ) => { - self.insert_str(value_id, literal.value()); - self.insert_str(datatype_id, literal.datatype().as_str()); - } - (TermRef::Triple(triple), EncodedTerm::Triple(encoded)) => { - self.insert_term_values(triple.subject.as_ref().into(), &encoded.subject); - self.insert_term_values(triple.predicate.as_ref().into(), &encoded.predicate); - self.insert_term_values(triple.object.as_ref(), &encoded.object); - } - _ => (), - } - } - pub fn insert_str(&self, key: &StrHash, value: &str) { if matches!(self.storage.contains_str(key), Ok(true)) { return; diff --git a/lib/src/storage/binary_encoder.rs b/lib/src/storage/binary_encoder.rs index db1a111e..51165ca1 100644 --- a/lib/src/storage/binary_encoder.rs +++ b/lib/src/storage/binary_encoder.rs @@ -643,7 +643,6 @@ mod tests { use super::*; use crate::storage::numeric_encoder::*; use std::cell::RefCell; - use std::collections::hash_map::Entry; use std::collections::HashMap; use std::convert::Infallible; @@ -664,15 +663,15 @@ mod tests { } } - impl StrContainer for MemoryStrStore { - fn insert_str(&self, key: &StrHash, value: &str) -> Result { - match self.id2str.borrow_mut().entry(*key) { - Entry::Occupied(_) => Ok(false), - Entry::Vacant(entry) => { - entry.insert(value.to_owned()); - Ok(true) - } - } + impl TermEncoder for MemoryStrStore { + type Error = Infallible; + + fn insert_str(&self, key: &StrHash, value: &str) -> Result<(), Infallible> { + self.id2str + .borrow_mut() + .entry(*key) + .or_insert_with(|| value.to_owned()); + Ok(()) } } diff --git a/lib/src/storage/mod.rs b/lib/src/storage/mod.rs index d4c2d744..f4e49b9b 100644 --- a/lib/src/storage/mod.rs +++ b/lib/src/storage/mod.rs @@ -19,9 +19,7 @@ use crate::storage::binary_encoder::{ LATEST_STORAGE_VERSION, WRITTEN_TERM_MAX_SIZE, }; use crate::storage::io::StoreOrParseError; -use crate::storage::numeric_encoder::{ - EncodedQuad, EncodedTerm, StrContainer, StrHash, StrLookup, WriteEncoder, -}; +use crate::storage::numeric_encoder::{EncodedQuad, EncodedTerm, StrHash, StrLookup, TermEncoder}; mod binary_encoder; pub(crate) mod io; @@ -671,10 +669,6 @@ impl Storage { pub fn contains_str(&self, key: &StrHash) -> std::io::Result { Ok(self.id2str.contains_key(key.to_be_bytes())?) } - - pub fn insert_str(&self, key: &StrHash, value: &str) -> std::io::Result { - Ok(self.id2str.insert(key.to_be_bytes(), value)?.is_none()) - } } pub struct ChainedDecodingQuadIterator { @@ -872,7 +866,7 @@ impl<'a> StorageTransaction<'a> { &self, graph_name: NamedOrBlankNodeRef<'_>, ) -> Result { - let graph_name = self.encode_named_or_blank_node(graph_name)?; + let graph_name = self.encode_term(graph_name)?; Ok(self.graphs.insert(encode_term(&graph_name), &[])?.is_none()) } @@ -887,14 +881,6 @@ impl<'a> StorageTransaction<'a> { pub fn contains_str(&self, key: &StrHash) -> Result { Ok(self.id2str.get(key.to_be_bytes())?.is_some()) } - - pub fn insert_str( - &self, - key: &StrHash, - value: &str, - ) -> Result { - Ok(self.id2str.insert(&key.to_be_bytes(), value)?.is_none()) - } } /// Error returned by a Sled transaction @@ -1061,12 +1047,6 @@ impl StrLookup for Storage { } } -impl StrContainer for Storage { - fn insert_str(&self, key: &StrHash, value: &str) -> std::io::Result { - self.insert_str(key, value) - } -} - impl<'a> StrLookup for StorageTransaction<'a> { type Error = UnabortableTransactionError; @@ -1079,34 +1059,46 @@ impl<'a> StrLookup for StorageTransaction<'a> { } } -impl<'a> StrContainer for StorageTransaction<'a> { - fn insert_str(&self, key: &StrHash, value: &str) -> Result { - self.insert_str(key, value) +impl TermEncoder for Storage { + type Error = std::io::Error; + + fn insert_str(&self, key: &StrHash, value: &str) -> std::io::Result<()> { + self.id2str.insert(key.to_be_bytes(), value)?; + Ok(()) + } +} + +impl<'a> TermEncoder for StorageTransaction<'a> { + type Error = UnabortableTransactionError; + + fn insert_str(&self, key: &StrHash, value: &str) -> Result<(), UnabortableTransactionError> { + self.id2str.insert(&key.to_be_bytes(), value)?; + Ok(()) } } -pub(crate) trait StorageLike: StrLookup + StrContainer { +pub(crate) trait StorageLike: StrLookup { fn insert(&self, quad: QuadRef<'_>) -> Result; fn remove(&self, quad: QuadRef<'_>) -> Result; } impl StorageLike for Storage { - fn insert(&self, quad: QuadRef<'_>) -> Result { + fn insert(&self, quad: QuadRef<'_>) -> std::io::Result { self.insert(quad) } - fn remove(&self, quad: QuadRef<'_>) -> Result { + fn remove(&self, quad: QuadRef<'_>) -> std::io::Result { self.remove(quad) } } impl<'a> StorageLike for StorageTransaction<'a> { - fn insert(&self, quad: QuadRef<'_>) -> Result { + fn insert(&self, quad: QuadRef<'_>) -> Result { self.insert(quad) } - fn remove(&self, quad: QuadRef<'_>) -> Result { + fn remove(&self, quad: QuadRef<'_>) -> Result { self.remove(quad) } } diff --git a/lib/src/storage/numeric_encoder.rs b/lib/src/storage/numeric_encoder.rs index f31ea107..525a7e01 100644 --- a/lib/src/storage/numeric_encoder.rs +++ b/lib/src/storage/numeric_encoder.rs @@ -657,7 +657,7 @@ impl From> for EncodedQuad { } } -pub(crate) trait StrLookup { +pub trait StrLookup { type Error: Error + Into + 'static; fn get_str(&self, key: &StrHash) -> Result, Self::Error>; @@ -665,200 +665,92 @@ pub(crate) trait StrLookup { fn contains_str(&self, key: &StrHash) -> Result; } -pub(crate) trait StrContainer: StrLookup { - fn insert_str(&self, key: &StrHash, value: &str) -> Result; -} +pub(super) trait TermEncoder { + type Error; -/// Encodes a term and insert strings if needed -pub(crate) trait WriteEncoder: StrContainer { - fn encode_named_node(&self, named_node: NamedNodeRef<'_>) -> Result { - Ok(EncodedTerm::NamedNode { - iri_id: self.encode_str(named_node.as_str())?, - }) + fn insert_str(&self, key: &StrHash, value: &str) -> Result<(), Self::Error>; + + fn encode_term<'a>(&self, term: impl Into>) -> Result { + let term = term.into(); + let encoded = term.into(); + insert_term_values(term, &encoded, |key, value| self.insert_str(key, value))?; + Ok(encoded) } - fn encode_blank_node(&self, blank_node: BlankNodeRef<'_>) -> Result { - Ok(if let Some(id) = blank_node.id() { - EncodedTerm::NumericalBlankNode { id } - } else { - let id = blank_node.as_str(); - if let Ok(id) = id.try_into() { - EncodedTerm::SmallBlankNode(id) - } else { - EncodedTerm::BigBlankNode { - id_id: self.encode_str(id)?, - } - } + fn encode_quad(&self, quad: QuadRef<'_>) -> Result { + Ok(EncodedQuad { + subject: self.encode_term(quad.subject)?, + predicate: self.encode_term(quad.predicate)?, + object: self.encode_term(quad.object)?, + graph_name: match quad.graph_name { + GraphNameRef::NamedNode(graph_name) => self.encode_term(graph_name)?, + GraphNameRef::BlankNode(graph_name) => self.encode_term(graph_name)?, + GraphNameRef::DefaultGraph => EncodedTerm::DefaultGraph, + }, }) } +} - fn encode_literal(&self, literal: LiteralRef<'_>) -> Result { - Ok(if literal.is_plain() { +pub fn insert_term_values Result<(), E> + Copy>( + term: TermRef<'_>, + encoded: &EncodedTerm, + insert_str: F, +) -> Result<(), E> { + match (term, encoded) { + (TermRef::NamedNode(node), EncodedTerm::NamedNode { iri_id }) => { + insert_str(iri_id, node.as_str())?; + } + (TermRef::BlankNode(node), EncodedTerm::BigBlankNode { id_id }) => { + insert_str(id_id, node.as_str())?; + } + (TermRef::Literal(literal), EncodedTerm::BigStringLiteral { value_id }) => { + insert_str(value_id, literal.value())?; + } + (TermRef::Literal(literal), EncodedTerm::SmallBigLangStringLiteral { language_id, .. }) => { if let Some(language) = literal.language() { - if let Ok(value) = SmallString::try_from(literal.value()) { - if let Ok(language) = SmallString::try_from(language) { - EncodedTerm::SmallSmallLangStringLiteral { value, language } - } else { - EncodedTerm::SmallBigLangStringLiteral { - value, - language_id: self.encode_str(language)?, - } - } - } else if let Ok(language) = SmallString::try_from(language) { - EncodedTerm::BigSmallLangStringLiteral { - value_id: self.encode_str(literal.value())?, - language, - } - } else { - EncodedTerm::BigBigLangStringLiteral { - value_id: self.encode_str(literal.value())?, - language_id: self.encode_str(language)?, - } - } - } else if let Ok(value) = SmallString::try_from(literal.value()) { - EncodedTerm::SmallStringLiteral(value) - } else { - EncodedTerm::BigStringLiteral { - value_id: self.encode_str(literal.value())?, - } + insert_str(language_id, language)?; } - } else { - match match literal.datatype().as_str() { - "http://www.w3.org/2001/XMLSchema#boolean" => parse_boolean_str(literal.value()), - "http://www.w3.org/2001/XMLSchema#string" => { - Some(if let Ok(value) = SmallString::try_from(literal.value()) { - EncodedTerm::SmallStringLiteral(value) - } else { - EncodedTerm::BigStringLiteral { - value_id: self.encode_str(literal.value())?, - } - }) - } - "http://www.w3.org/2001/XMLSchema#float" => parse_float_str(literal.value()), - "http://www.w3.org/2001/XMLSchema#double" => parse_double_str(literal.value()), - "http://www.w3.org/2001/XMLSchema#integer" - | "http://www.w3.org/2001/XMLSchema#byte" - | "http://www.w3.org/2001/XMLSchema#short" - | "http://www.w3.org/2001/XMLSchema#int" - | "http://www.w3.org/2001/XMLSchema#long" - | "http://www.w3.org/2001/XMLSchema#unsignedByte" - | "http://www.w3.org/2001/XMLSchema#unsignedShort" - | "http://www.w3.org/2001/XMLSchema#unsignedInt" - | "http://www.w3.org/2001/XMLSchema#unsignedLong" - | "http://www.w3.org/2001/XMLSchema#positiveInteger" - | "http://www.w3.org/2001/XMLSchema#negativeInteger" - | "http://www.w3.org/2001/XMLSchema#nonPositiveInteger" - | "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => { - parse_integer_str(literal.value()) - } - "http://www.w3.org/2001/XMLSchema#decimal" => parse_decimal_str(literal.value()), - "http://www.w3.org/2001/XMLSchema#dateTime" - | "http://www.w3.org/2001/XMLSchema#dateTimeStamp" => { - parse_date_time_str(literal.value()) - } - "http://www.w3.org/2001/XMLSchema#time" => parse_time_str(literal.value()), - "http://www.w3.org/2001/XMLSchema#date" => parse_date_str(literal.value()), - "http://www.w3.org/2001/XMLSchema#gYearMonth" => { - parse_g_year_month_str(literal.value()) - } - "http://www.w3.org/2001/XMLSchema#gYear" => parse_g_year_str(literal.value()), - "http://www.w3.org/2001/XMLSchema#gMonthDay" => { - parse_g_month_day_str(literal.value()) - } - "http://www.w3.org/2001/XMLSchema#gDay" => parse_g_day_str(literal.value()), - "http://www.w3.org/2001/XMLSchema#gMonth" => parse_g_month_str(literal.value()), - "http://www.w3.org/2001/XMLSchema#duration" => parse_duration_str(literal.value()), - "http://www.w3.org/2001/XMLSchema#yearMonthDuration" => { - parse_year_month_duration_str(literal.value()) - } - "http://www.w3.org/2001/XMLSchema#dayTimeDuration" => { - parse_day_time_duration_str(literal.value()) - } - _ => None, - } { - Some(v) => v, - None => { - if let Ok(value) = SmallString::try_from(literal.value()) { - EncodedTerm::SmallTypedLiteral { - value, - datatype_id: self.encode_str(literal.datatype().as_str())?, - } - } else { - EncodedTerm::BigTypedLiteral { - value_id: self.encode_str(literal.value())?, - datatype_id: self.encode_str(literal.datatype().as_str())?, - } - } - } + } + (TermRef::Literal(literal), EncodedTerm::BigSmallLangStringLiteral { value_id, .. }) => { + insert_str(value_id, literal.value())?; + } + ( + TermRef::Literal(literal), + EncodedTerm::BigBigLangStringLiteral { + value_id, + language_id, + }, + ) => { + insert_str(value_id, literal.value())?; + if let Some(language) = literal.language() { + insert_str(language_id, language)? } - }) - } - - fn encode_named_or_blank_node( - &self, - term: NamedOrBlankNodeRef<'_>, - ) -> Result { - match term { - NamedOrBlankNodeRef::NamedNode(named_node) => self.encode_named_node(named_node), - NamedOrBlankNodeRef::BlankNode(blank_node) => self.encode_blank_node(blank_node), } - } - - fn encode_subject(&self, term: SubjectRef<'_>) -> Result { - match term { - SubjectRef::NamedNode(named_node) => self.encode_named_node(named_node), - SubjectRef::BlankNode(blank_node) => self.encode_blank_node(blank_node), - SubjectRef::Triple(triple) => Ok(EncodedTerm::Triple(Rc::new( - self.encode_triple(triple.as_ref())?, - ))), + (TermRef::Literal(literal), EncodedTerm::SmallTypedLiteral { datatype_id, .. }) => { + insert_str(datatype_id, literal.datatype().as_str())?; } - } - - fn encode_term(&self, term: TermRef<'_>) -> Result { - match term { - TermRef::NamedNode(named_node) => self.encode_named_node(named_node), - TermRef::BlankNode(blank_node) => self.encode_blank_node(blank_node), - TermRef::Literal(literal) => self.encode_literal(literal), - TermRef::Triple(triple) => Ok(EncodedTerm::Triple(Rc::new( - self.encode_triple(triple.as_ref())?, - ))), + ( + TermRef::Literal(literal), + EncodedTerm::BigTypedLiteral { + value_id, + datatype_id, + }, + ) => { + insert_str(value_id, literal.value())?; + insert_str(datatype_id, literal.datatype().as_str())?; } - } - - fn encode_graph_name(&self, name: GraphNameRef<'_>) -> Result { - match name { - GraphNameRef::NamedNode(named_node) => self.encode_named_node(named_node), - GraphNameRef::BlankNode(blank_node) => self.encode_blank_node(blank_node), - GraphNameRef::DefaultGraph => Ok(EncodedTerm::DefaultGraph), + (TermRef::Triple(triple), EncodedTerm::Triple(encoded)) => { + insert_term_values(triple.subject.as_ref().into(), &encoded.subject, insert_str)?; + insert_term_values( + triple.predicate.as_ref().into(), + &encoded.predicate, + insert_str, + )?; + insert_term_values(triple.object.as_ref(), &encoded.object, insert_str)?; } + _ => (), } - - fn encode_triple(&self, quad: TripleRef<'_>) -> Result { - Ok(EncodedTriple { - subject: self.encode_subject(quad.subject)?, - predicate: self.encode_named_node(quad.predicate)?, - object: self.encode_term(quad.object)?, - }) - } - - fn encode_quad(&self, quad: QuadRef<'_>) -> Result { - Ok(EncodedQuad { - subject: self.encode_subject(quad.subject)?, - predicate: self.encode_named_node(quad.predicate)?, - object: self.encode_term(quad.object)?, - graph_name: self.encode_graph_name(quad.graph_name)?, - }) - } - - fn encode_str(&self, value: &str) -> Result; -} - -impl WriteEncoder for S { - fn encode_str(&self, value: &str) -> Result { - let key = StrHash::new(value); - self.insert_str(&key, value)?; - Ok(key) - } + Ok(()) } pub fn parse_boolean_str(value: &str) -> Option { @@ -932,7 +824,7 @@ pub fn parse_day_time_duration_str(value: &str) -> Option { value.parse().map(EncodedTerm::DayTimeDurationLiteral).ok() } -pub(crate) trait Decoder: StrLookup { +pub trait Decoder: StrLookup { fn decode_term(&self, encoded: &EncodedTerm) -> Result>; fn decode_subject(&self, encoded: &EncodedTerm) -> Result> { @@ -1112,7 +1004,7 @@ fn get_required_str( } #[derive(Debug)] -pub(crate) enum DecoderError { +pub enum DecoderError { Store(E), Decoder { msg: String }, }