diff --git a/lib/src/sparql/dataset.rs b/lib/src/sparql/dataset.rs index e3e5c604..9ea0f76a 100644 --- a/lib/src/sparql/dataset.rs +++ b/lib/src/sparql/dataset.rs @@ -1,7 +1,9 @@ use crate::model::TermRef; use crate::sparql::algebra::QueryDataset; use crate::sparql::EvaluationError; -use crate::storage::numeric_encoder::{insert_term, EncodedQuad, EncodedTerm, StrHash, StrLookup}; +use crate::storage::numeric_encoder::{ + insert_term, EncodedQuad, EncodedTerm, StrHash, StrLookup, TermDecoder, +}; use crate::storage::{StorageError, StorageReader}; use std::cell::RefCell; use std::collections::hash_map::Entry; @@ -159,6 +161,10 @@ impl DatasetView { encoded } + pub fn term_decoder(&self) -> TermDecoder { + TermDecoder::new(self) + } + pub fn insert_str(&self, key: &StrHash, value: &str) { if let Entry::Vacant(e) = self.extra.borrow_mut().entry(*key) { if !matches!(self.reader.contains_str(key), Ok(true)) { diff --git a/lib/src/sparql/eval.rs b/lib/src/sparql/eval.rs index 842dbe16..5834a01e 100644 --- a/lib/src/sparql/eval.rs +++ b/lib/src/sparql/eval.rs @@ -659,7 +659,9 @@ impl SimpleEvaluator { let service_name = get_pattern_value(service_name, from) .ok_or_else(|| EvaluationError::msg("The SERVICE name is not bound"))?; if let QueryResults::Solutions(iter) = self.service_handler.handle( - self.dataset.decode_named_node(&service_name)?, + self.dataset + .term_decoder() + .decode_named_node(&service_name)?, Query { inner: spargebra::Query::Select { dataset: None, @@ -2029,7 +2031,7 @@ impl SimpleEvaluator { Rc::new(move |tuple| { let args = args .iter() - .map(|f| dataset.decode_term(&f(tuple)?).ok()) + .map(|f| dataset.term_decoder().decode_term(&f(tuple)?).ok()) .collect::>>()?; Some(dataset.encode_term(&function(&args)?)) }) @@ -2307,9 +2309,10 @@ fn decode_bindings( variables, Box::new(iter.map(move |values| { let mut result = vec![None; tuple_size]; + let decoder = dataset.term_decoder(); for (i, value) in values?.iter().enumerate() { if let Some(term) = value { - result[i] = Some(dataset.decode_term(&term)?) + result[i] = Some(decoder.decode_term(&term)?) } } Ok(result) @@ -2569,9 +2572,10 @@ fn cmp_terms(dataset: &DatasetView, a: Option<&EncodedTerm>, b: Option<&EncodedT b => { if let Some(ord) = partial_cmp_literals(dataset, a, b) { ord - } else if let (Ok(Term::Literal(a)), Ok(Term::Literal(b))) = - (dataset.decode_term(a), dataset.decode_term(b)) - { + } else if let (Ok(Term::Literal(a)), Ok(Term::Literal(b))) = ( + dataset.term_decoder().decode_term(a), + dataset.term_decoder().decode_term(b), + ) { (a.value(), a.datatype(), a.language()).cmp(&( b.value(), b.datatype(), @@ -3323,7 +3327,7 @@ impl Iterator for ConstructIterator { get_triple_template_value(&template.object, &tuple, &mut self.bnodes), ) { self.buffered_results.push(decode_triple( - &*self.eval.dataset, + &self.eval.dataset.term_decoder(), &subject, &predicate, &object, @@ -3373,8 +3377,8 @@ fn new_bnode() -> EncodedTerm { EncodedTerm::NumericalBlankNode { id: random() } } -fn decode_triple( - decoder: &D, +fn decode_triple( + decoder: &TermDecoder, subject: &EncodedTerm, predicate: &EncodedTerm, object: &EncodedTerm, @@ -3402,6 +3406,7 @@ impl Iterator for DescribeIterator { Ok(quad) => self .eval .dataset + .term_decoder() .decode_quad(&quad) .map(|q| q.into()) .map_err(|e| e.into()), diff --git a/lib/src/sparql/update.rs b/lib/src/sparql/update.rs index 74e70db2..1c2f53ed 100644 --- a/lib/src/sparql/update.rs +++ b/lib/src/sparql/update.rs @@ -8,7 +8,7 @@ use crate::sparql::http::Client; use crate::sparql::plan::EncodedTuple; use crate::sparql::plan_builder::PlanBuilder; use crate::sparql::{EvaluationError, Update, UpdateOptions}; -use crate::storage::numeric_encoder::{Decoder, EncodedTerm}; +use crate::storage::numeric_encoder::EncodedTerm; use crate::storage::StorageWriter; use oxiri::Iri; use spargebra::algebra::{GraphPattern, GraphTarget}; @@ -374,7 +374,7 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> { .map(|t| t.into()) } TermPattern::Variable(v) => Self::lookup_variable(v, variables, values) - .map(|node| dataset.decode_term(&node)) + .map(|node| dataset.term_decoder().decode_term(&node)) .transpose()?, }) } @@ -388,7 +388,7 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> { Ok(match term { NamedNodePattern::NamedNode(term) => Some(term.clone()), NamedNodePattern::Variable(v) => Self::lookup_variable(v, variables, values) - .map(|node| dataset.decode_named_node(&node)) + .map(|node| dataset.term_decoder().decode_named_node(&node)) .transpose()?, }) } @@ -407,7 +407,7 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> { Ok(if node == EncodedTerm::DefaultGraph { OxGraphName::DefaultGraph } else { - dataset.decode_named_node(&node)?.into() + dataset.term_decoder().decode_named_node(&node)?.into() }) }) .transpose(), @@ -507,7 +507,7 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> { .map(|t| t.into()) } GroundTermPattern::Variable(v) => Self::lookup_variable(v, variables, values) - .map(|node| dataset.decode_term(&node)) + .map(|node| dataset.term_decoder().decode_term(&node)) .transpose()?, }) } diff --git a/lib/src/storage/binary_encoder.rs b/lib/src/storage/binary_encoder.rs index 2041d051..babd3547 100644 --- a/lib/src/storage/binary_encoder.rs +++ b/lib/src/storage/binary_encoder.rs @@ -742,11 +742,12 @@ mod tests { ) .into(), ]; + let decoder = TermDecoder::new(&store); for term in terms { let encoded = term.as_ref().into(); store.insert_term(term.as_ref(), &encoded); assert_eq!(encoded, term.as_ref().into()); - assert_eq!(term, store.decode_term(&encoded).unwrap()); + assert_eq!(term, decoder.decode_term(&encoded).unwrap()); let mut buffer = Vec::new(); write_term(&mut buffer, &encoded); diff --git a/lib/src/storage/mod.rs b/lib/src/storage/mod.rs index 2e7baa29..b6859381 100644 --- a/lib/src/storage/mod.rs +++ b/lib/src/storage/mod.rs @@ -10,7 +10,7 @@ use crate::storage::binary_encoder::{ }; pub use crate::storage::error::{CorruptionError, LoaderError, SerializerError, StorageError}; use crate::storage::numeric_encoder::{ - insert_term, Decoder, EncodedQuad, EncodedTerm, StrHash, StrLookup, + insert_term, EncodedQuad, EncodedTerm, StrHash, StrLookup, TermDecoder, }; use backend::{ColumnFamily, ColumnFamilyDefinition, Db, Iter}; use std::cmp::{max, min}; @@ -294,6 +294,10 @@ pub struct StorageReader { } impl StorageReader { + pub fn term_decoder(&self) -> TermDecoder { + TermDecoder::new(self) + } + pub fn len(&self) -> Result { Ok(self.reader.len(&self.storage.gspo_cf)? + self.reader.len(&self.storage.dspo_cf)?) } @@ -654,7 +658,7 @@ impl StorageReader { } for spo in self.dspo_quads(&[]) { let spo = spo?; - self.decode_quad(&spo)?; // We ensure that the quad is readable + self.term_decoder().decode_quad(&spo)?; // We ensure that the quad is readable if !self.storage.db.contains_key( &self.storage.dpos_cf, &encode_term_triple(&spo.predicate, &spo.object, &spo.subject), @@ -684,7 +688,7 @@ impl StorageReader { } for gspo in self.gspo_quads(&[]) { let gspo = gspo?; - self.decode_quad(&gspo)?; // We ensure that the quad is readable + self.term_decoder().decode_quad(&gspo)?; // We ensure that the quad is readable if !self.storage.db.contains_key( &self.storage.gpos_cf, &encode_term_quad( diff --git a/lib/src/storage/numeric_encoder.rs b/lib/src/storage/numeric_encoder.rs index 5b5e0099..d3f92729 100644 --- a/lib/src/storage/numeric_encoder.rs +++ b/lib/src/storage/numeric_encoder.rs @@ -819,10 +819,95 @@ pub fn parse_day_time_duration_str(value: &str) -> Option { value.parse().map(EncodedTerm::DayTimeDurationLiteral).ok() } -pub trait Decoder: StrLookup { - fn decode_term(&self, encoded: &EncodedTerm) -> Result; +pub struct TermDecoder<'a, S: StrLookup> { + lookup: &'a S, +} + +impl<'a, S: StrLookup> TermDecoder<'a, S> { + pub fn new(lookup: &'a S) -> Self { + Self { lookup } + } + + pub fn decode_term(&self, encoded: &EncodedTerm) -> Result { + match encoded { + EncodedTerm::DefaultGraph => { + Err(CorruptionError::msg("The default graph tag is not a valid term").into()) + } + EncodedTerm::NamedNode { iri_id } => { + Ok(NamedNode::new_unchecked(get_required_str(self.lookup, iri_id)?).into()) + } + EncodedTerm::NumericalBlankNode { id } => Ok(BlankNode::new_from_unique_id(*id).into()), + EncodedTerm::SmallBlankNode(id) => Ok(BlankNode::new_unchecked(id.as_str()).into()), + EncodedTerm::BigBlankNode { id_id } => { + Ok(BlankNode::new_unchecked(get_required_str(self.lookup, id_id)?).into()) + } + EncodedTerm::SmallStringLiteral(value) => { + Ok(Literal::new_simple_literal(*value).into()) + } + EncodedTerm::BigStringLiteral { value_id } => { + Ok(Literal::new_simple_literal(get_required_str(self.lookup, value_id)?).into()) + } + EncodedTerm::SmallSmallLangStringLiteral { value, language } => { + Ok(Literal::new_language_tagged_literal_unchecked(*value, *language).into()) + } + EncodedTerm::SmallBigLangStringLiteral { value, language_id } => { + Ok(Literal::new_language_tagged_literal_unchecked( + *value, + get_required_str(self.lookup, language_id)?, + ) + .into()) + } + EncodedTerm::BigSmallLangStringLiteral { value_id, language } => { + Ok(Literal::new_language_tagged_literal_unchecked( + get_required_str(self.lookup, value_id)?, + *language, + ) + .into()) + } + EncodedTerm::BigBigLangStringLiteral { + value_id, + language_id, + } => Ok(Literal::new_language_tagged_literal_unchecked( + get_required_str(self.lookup, value_id)?, + get_required_str(self.lookup, language_id)?, + ) + .into()), + EncodedTerm::SmallTypedLiteral { value, datatype_id } => { + Ok(Literal::new_typed_literal( + *value, + NamedNode::new_unchecked(get_required_str(self.lookup, datatype_id)?), + ) + .into()) + } + EncodedTerm::BigTypedLiteral { + value_id, + datatype_id, + } => Ok(Literal::new_typed_literal( + get_required_str(self.lookup, value_id)?, + NamedNode::new_unchecked(get_required_str(self.lookup, datatype_id)?), + ) + .into()), + EncodedTerm::BooleanLiteral(value) => Ok(Literal::from(*value).into()), + EncodedTerm::FloatLiteral(value) => Ok(Literal::from(*value).into()), + EncodedTerm::DoubleLiteral(value) => Ok(Literal::from(*value).into()), + EncodedTerm::IntegerLiteral(value) => Ok(Literal::from(*value).into()), + EncodedTerm::DecimalLiteral(value) => Ok(Literal::from(*value).into()), + EncodedTerm::DateTimeLiteral(value) => Ok(Literal::from(*value).into()), + EncodedTerm::DateLiteral(value) => Ok(Literal::from(*value).into()), + EncodedTerm::TimeLiteral(value) => Ok(Literal::from(*value).into()), + EncodedTerm::GYearMonthLiteral(value) => Ok(Literal::from(*value).into()), + EncodedTerm::GYearLiteral(value) => Ok(Literal::from(*value).into()), + EncodedTerm::GMonthDayLiteral(value) => Ok(Literal::from(*value).into()), + EncodedTerm::GDayLiteral(value) => Ok(Literal::from(*value).into()), + EncodedTerm::GMonthLiteral(value) => Ok(Literal::from(*value).into()), + EncodedTerm::DurationLiteral(value) => Ok(Literal::from(*value).into()), + EncodedTerm::YearMonthDurationLiteral(value) => Ok(Literal::from(*value).into()), + EncodedTerm::DayTimeDurationLiteral(value) => Ok(Literal::from(*value).into()), + EncodedTerm::Triple(triple) => Ok(self.decode_triple(triple)?.into()), + } + } - fn decode_subject(&self, encoded: &EncodedTerm) -> Result { + pub fn decode_subject(&self, encoded: &EncodedTerm) -> Result { match self.decode_term(encoded)? { Term::NamedNode(named_node) => Ok(named_node.into()), Term::BlankNode(blank_node) => Ok(blank_node.into()), @@ -834,7 +919,7 @@ pub trait Decoder: StrLookup { } } - fn decode_named_or_blank_node( + pub fn decode_named_or_blank_node( &self, encoded: &EncodedTerm, ) -> Result { @@ -852,7 +937,7 @@ pub trait Decoder: StrLookup { } } - fn decode_named_node(&self, encoded: &EncodedTerm) -> Result { + pub fn decode_named_node(&self, encoded: &EncodedTerm) -> Result { match self.decode_term(encoded)? { Term::NamedNode(named_node) => Ok(named_node), Term::BlankNode(_) => Err(CorruptionError::msg( @@ -868,7 +953,7 @@ pub trait Decoder: StrLookup { } } - fn decode_triple(&self, encoded: &EncodedTriple) -> Result { + pub fn decode_triple(&self, encoded: &EncodedTriple) -> Result { Ok(Triple::new( self.decode_subject(&encoded.subject)?, self.decode_named_node(&encoded.predicate)?, @@ -876,7 +961,7 @@ pub trait Decoder: StrLookup { )) } - fn decode_quad(&self, encoded: &EncodedQuad) -> Result { + pub fn decode_quad(&self, encoded: &EncodedQuad) -> Result { Ok(Quad::new( self.decode_subject(&encoded.subject)?, self.decode_named_node(&encoded.predicate)?, @@ -903,87 +988,6 @@ pub trait Decoder: StrLookup { } } -impl Decoder for S { - fn decode_term(&self, encoded: &EncodedTerm) -> Result { - match encoded { - EncodedTerm::DefaultGraph => { - Err(CorruptionError::msg("The default graph tag is not a valid term").into()) - } - EncodedTerm::NamedNode { iri_id } => { - Ok(NamedNode::new_unchecked(get_required_str(self, iri_id)?).into()) - } - EncodedTerm::NumericalBlankNode { id } => Ok(BlankNode::new_from_unique_id(*id).into()), - EncodedTerm::SmallBlankNode(id) => Ok(BlankNode::new_unchecked(id.as_str()).into()), - EncodedTerm::BigBlankNode { id_id } => { - Ok(BlankNode::new_unchecked(get_required_str(self, id_id)?).into()) - } - EncodedTerm::SmallStringLiteral(value) => { - Ok(Literal::new_simple_literal(*value).into()) - } - EncodedTerm::BigStringLiteral { value_id } => { - Ok(Literal::new_simple_literal(get_required_str(self, value_id)?).into()) - } - EncodedTerm::SmallSmallLangStringLiteral { value, language } => { - Ok(Literal::new_language_tagged_literal_unchecked(*value, *language).into()) - } - EncodedTerm::SmallBigLangStringLiteral { value, language_id } => { - Ok(Literal::new_language_tagged_literal_unchecked( - *value, - get_required_str(self, language_id)?, - ) - .into()) - } - EncodedTerm::BigSmallLangStringLiteral { value_id, language } => { - Ok(Literal::new_language_tagged_literal_unchecked( - get_required_str(self, value_id)?, - *language, - ) - .into()) - } - EncodedTerm::BigBigLangStringLiteral { - value_id, - language_id, - } => Ok(Literal::new_language_tagged_literal_unchecked( - get_required_str(self, value_id)?, - get_required_str(self, language_id)?, - ) - .into()), - EncodedTerm::SmallTypedLiteral { value, datatype_id } => { - Ok(Literal::new_typed_literal( - *value, - NamedNode::new_unchecked(get_required_str(self, datatype_id)?), - ) - .into()) - } - EncodedTerm::BigTypedLiteral { - value_id, - datatype_id, - } => Ok(Literal::new_typed_literal( - get_required_str(self, value_id)?, - NamedNode::new_unchecked(get_required_str(self, datatype_id)?), - ) - .into()), - EncodedTerm::BooleanLiteral(value) => Ok(Literal::from(*value).into()), - EncodedTerm::FloatLiteral(value) => Ok(Literal::from(*value).into()), - EncodedTerm::DoubleLiteral(value) => Ok(Literal::from(*value).into()), - EncodedTerm::IntegerLiteral(value) => Ok(Literal::from(*value).into()), - EncodedTerm::DecimalLiteral(value) => Ok(Literal::from(*value).into()), - EncodedTerm::DateTimeLiteral(value) => Ok(Literal::from(*value).into()), - EncodedTerm::DateLiteral(value) => Ok(Literal::from(*value).into()), - EncodedTerm::TimeLiteral(value) => Ok(Literal::from(*value).into()), - EncodedTerm::GYearMonthLiteral(value) => Ok(Literal::from(*value).into()), - EncodedTerm::GYearLiteral(value) => Ok(Literal::from(*value).into()), - EncodedTerm::GMonthDayLiteral(value) => Ok(Literal::from(*value).into()), - EncodedTerm::GDayLiteral(value) => Ok(Literal::from(*value).into()), - EncodedTerm::GMonthLiteral(value) => Ok(Literal::from(*value).into()), - EncodedTerm::DurationLiteral(value) => Ok(Literal::from(*value).into()), - EncodedTerm::YearMonthDurationLiteral(value) => Ok(Literal::from(*value).into()), - EncodedTerm::DayTimeDurationLiteral(value) => Ok(Literal::from(*value).into()), - EncodedTerm::Triple(triple) => Ok(self.decode_triple(triple)?.into()), - } - } -} - fn get_required_str(lookup: &L, id: &StrHash) -> Result { Ok(lookup.get_str(id)?.ok_or_else(|| { CorruptionError::new(format!( diff --git a/lib/src/store.rs b/lib/src/store.rs index ae1a8591..eb57e097 100644 --- a/lib/src/store.rs +++ b/lib/src/store.rs @@ -32,7 +32,7 @@ use crate::sparql::{ evaluate_query, evaluate_update, EvaluationError, Query, QueryOptions, QueryResults, Update, UpdateOptions, }; -use crate::storage::numeric_encoder::{Decoder, EncodedQuad, EncodedTerm}; +use crate::storage::numeric_encoder::{EncodedQuad, EncodedTerm}; #[cfg(not(target_arch = "wasm32"))] use crate::storage::StorageBulkLoader; use crate::storage::{ @@ -1238,7 +1238,7 @@ impl Iterator for QuadIter { fn next(&mut self) -> Option> { Some(match self.iter.next()? { - Ok(quad) => self.reader.decode_quad(&quad), + Ok(quad) => self.reader.term_decoder().decode_quad(&quad), Err(error) => Err(error), }) } @@ -1254,11 +1254,11 @@ impl Iterator for GraphNameIter { type Item = Result; fn next(&mut self) -> Option> { - Some( - self.iter - .next()? - .and_then(|graph_name| self.reader.decode_named_or_blank_node(&graph_name)), - ) + Some(self.iter.next()?.and_then(|graph_name| { + self.reader + .term_decoder() + .decode_named_or_blank_node(&graph_name) + })) } fn size_hint(&self) -> (usize, Option) {