From 5038d95a00b3b6d20168c52a2196468e1204fd9a Mon Sep 17 00:00:00 2001 From: Tpt Date: Fri, 31 Jul 2020 15:38:51 +0200 Subject: [PATCH] Allows each store to use its own ID builder --- lib/src/sparql/dataset.rs | 173 +++++ lib/src/sparql/eval.rs | 493 ++++++++------ lib/src/sparql/mod.rs | 34 +- lib/src/sparql/plan.rs | 412 +++++------- lib/src/sparql/plan_builder.rs | 123 ++-- lib/src/store/memory.rs | 98 ++- lib/src/store/mod.rs | 72 ++- lib/src/store/numeric_encoder.rs | 1043 +++++++++++++++++------------- lib/src/store/rocksdb.rs | 328 +++++++--- lib/src/store/sled.rs | 108 +++- wikibase/src/loader.rs | 2 +- 11 files changed, 1772 insertions(+), 1114 deletions(-) create mode 100644 lib/src/sparql/dataset.rs diff --git a/lib/src/sparql/dataset.rs b/lib/src/sparql/dataset.rs new file mode 100644 index 00000000..abaa7936 --- /dev/null +++ b/lib/src/sparql/dataset.rs @@ -0,0 +1,173 @@ +use crate::sparql::EvaluationError; +use crate::store::numeric_encoder::{ + EncodedQuad, EncodedTerm, MemoryStrStore, StrContainer, StrHash, StrId, StrLookup, + WithStoreError, +}; +use crate::store::ReadableEncodedStore; +use std::cell::RefCell; +use std::iter::empty; + +pub(crate) struct DatasetView { + store: S, + extra: RefCell, + default_graph_as_union: bool, +} + +impl DatasetView { + pub fn new(store: S, default_graph_as_union: bool) -> Self { + Self { + store, + extra: RefCell::new(MemoryStrStore::default()), + default_graph_as_union, + } + } +} + +impl WithStoreError for DatasetView { + type Error = EvaluationError; + type StrId = DatasetStrId; +} + +impl StrLookup for DatasetView { + fn get_str(&self, id: DatasetStrId) -> Result, EvaluationError> { + match id { + DatasetStrId::Store(id) => self.store.get_str(id).map_err(|e| e.into()), + DatasetStrId::Temporary(id) => Ok(self.extra.borrow().get_str(id)?), + } + } + + fn get_str_id(&self, value: &str) -> Result>, EvaluationError> { + if let Some(id) = self.extra.borrow().get_str_id(value)? { + Ok(Some(DatasetStrId::Temporary(id))) + } else { + Ok(self + .store + .get_str_id(value) + .map_err(|e| e.into())? + .map(DatasetStrId::Store)) + } + } +} + +impl ReadableEncodedStore for DatasetView { + type QuadsIter = + Box>, EvaluationError>>>; + + fn encoded_quads_for_pattern( + &self, + subject: Option>, + predicate: Option>, + object: Option>, + graph_name: Option>, + ) -> Box>, EvaluationError>>> + { + if let Some((subject, predicate, object, graph_name)) = + try_map_quad_pattern(subject, predicate, object, graph_name) + { + if graph_name == None { + Box::new( + map_iter( + self.store + .encoded_quads_for_pattern(subject, predicate, object, None), + ) + .filter(|quad| match quad { + Err(_) => true, + Ok(quad) => quad.graph_name != EncodedTerm::DefaultGraph, + }), + ) + } else if graph_name == Some(EncodedTerm::DefaultGraph) && self.default_graph_as_union { + Box::new( + map_iter( + self.store + .encoded_quads_for_pattern(subject, predicate, object, None), + ) + .map(|quad| { + let quad = quad?; + Ok(EncodedQuad::new( + quad.subject, + quad.predicate, + quad.object, + EncodedTerm::DefaultGraph, + )) + }), + ) + } else { + Box::new(map_iter(self.store.encoded_quads_for_pattern( + subject, predicate, object, graph_name, + ))) + } + } else { + Box::new(empty()) + } + } +} + +fn map_iter<'a, I: StrId>( + iter: impl Iterator, impl Into>> + 'a, +) -> impl Iterator>, EvaluationError>> + 'a { + iter.map(|t| { + t.map(|q| EncodedQuad { + subject: q.subject.map_id(DatasetStrId::Store), + predicate: q.predicate.map_id(DatasetStrId::Store), + object: q.object.map_id(DatasetStrId::Store), + graph_name: q.graph_name.map_id(DatasetStrId::Store), + }) + .map_err(|e| e.into()) + }) +} + +type QuadPattern = ( + Option>, + Option>, + Option>, + Option>, +); + +fn try_map_quad_pattern( + subject: Option>>, + predicate: Option>>, + object: Option>>, + graph_name: Option>>, +) -> Option> { + Some(( + transpose(subject.map(|t| t.try_map_id(unwrap_store_id)))?, + transpose(predicate.map(|t| t.try_map_id(unwrap_store_id)))?, + transpose(object.map(|t| t.try_map_id(unwrap_store_id)))?, + transpose(graph_name.map(|t| t.try_map_id(unwrap_store_id)))?, + )) +} + +fn transpose(o: Option>) -> Option> { + match o { + Some(Some(v)) => Some(Some(v)), + Some(None) => None, + None => Some(None), + } +} + +fn unwrap_store_id(id: DatasetStrId) -> Option { + match id { + DatasetStrId::Store(id) => Some(id), + DatasetStrId::Temporary(_) => None, + } +} + +impl<'a, S: ReadableEncodedStore> StrContainer for &'a DatasetView { + fn insert_str(&mut self, value: &str) -> Result { + if let Some(id) = self.store.get_str_id(value).map_err(|e| e.into())? { + Ok(DatasetStrId::Store(id)) + } else { + Ok(DatasetStrId::Temporary( + self.extra.borrow_mut().insert_str(value)?, + )) + } + } +} + +#[derive(Eq, PartialEq, Debug, Copy, Clone, Hash)] +pub enum DatasetStrId { + Store(I), + Temporary(StrHash), +} + +impl StrId for DatasetStrId {} diff --git a/lib/src/sparql/eval.rs b/lib/src/sparql/eval.rs index c2dc2f93..0499bc6c 100644 --- a/lib/src/sparql/eval.rs +++ b/lib/src/sparql/eval.rs @@ -28,16 +28,16 @@ use std::str; const REGEX_SIZE_LIMIT: usize = 1_000_000; -type EncodedTuplesIterator = Box>>; +type EncodedTuplesIterator = Box, EvaluationError>>>; -pub(crate) struct SimpleEvaluator { - dataset: Rc>, +pub(crate) struct SimpleEvaluator { + dataset: Rc, base_iri: Option>>, now: DateTime, service_handler: Rc>, } -impl Clone for SimpleEvaluator { +impl Clone for SimpleEvaluator { fn clone(&self) -> Self { Self { dataset: self.dataset.clone(), @@ -48,9 +48,12 @@ impl Clone for SimpleEvaluator { } } -impl SimpleEvaluator { +impl + 'static> SimpleEvaluator +where + for<'a> &'a S: StrContainer, +{ pub fn new( - dataset: Rc>, + dataset: Rc, base_iri: Option>>, service_handler: Rc>, ) -> Self { @@ -64,7 +67,7 @@ impl SimpleEvaluator { pub fn evaluate_select_plan( &self, - plan: &PlanNode, + plan: &PlanNode, variables: Rc>, ) -> Result { let iter = self.eval_plan(plan, EncodedTuple::with_capacity(variables.len())); @@ -73,7 +76,10 @@ impl SimpleEvaluator { )) } - pub fn evaluate_ask_plan(&self, plan: &PlanNode) -> Result { + pub fn evaluate_ask_plan( + &self, + plan: &PlanNode, + ) -> Result { let from = EncodedTuple::with_capacity(plan.maybe_bound_variables().len()); match self.eval_plan(plan, from).next() { Some(Ok(_)) => Ok(QueryResult::Boolean(true)), @@ -84,8 +90,8 @@ impl SimpleEvaluator { pub fn evaluate_construct_plan( &self, - plan: &PlanNode, - construct: Rc>, + plan: &PlanNode, + construct: Rc>>, ) -> Result { let from = EncodedTuple::with_capacity(plan.maybe_bound_variables().len()); Ok(QueryResult::Graph(QueryTriplesIterator { @@ -99,7 +105,10 @@ impl SimpleEvaluator { })) } - pub fn evaluate_describe_plan(&self, plan: &PlanNode) -> Result { + pub fn evaluate_describe_plan( + &self, + plan: &PlanNode, + ) -> Result { let from = EncodedTuple::with_capacity(plan.maybe_bound_variables().len()); Ok(QueryResult::Graph(QueryTriplesIterator { iter: Box::new(DescribeIterator { @@ -110,7 +119,11 @@ impl SimpleEvaluator { })) } - fn eval_plan(&self, node: &PlanNode, from: EncodedTuple) -> EncodedTuplesIterator { + fn eval_plan( + &self, + node: &PlanNode, + from: EncodedTuple, + ) -> EncodedTuplesIterator { match node { PlanNode::Init => Box::new(once(Ok(from))), PlanNode::StaticBindings { tuples } => Box::new(tuples.clone().into_iter().map(Ok)), @@ -152,12 +165,13 @@ impl SimpleEvaluator { let object = *object; let graph_name = *graph_name; Box::new(self.eval_plan(child, from).flat_map_ok(move |tuple| { - let mut iter = eval.dataset.quads_for_pattern( - get_pattern_value(&subject, &tuple), - get_pattern_value(&predicate, &tuple), - get_pattern_value(&object, &tuple), - get_pattern_value(&graph_name, &tuple), - ); + let mut iter: Box> = + Box::new(eval.dataset.encoded_quads_for_pattern( + get_pattern_value(&subject, &tuple), + get_pattern_value(&predicate, &tuple), + get_pattern_value(&object, &tuple), + get_pattern_value(&graph_name, &tuple), + )); if subject.is_var() && subject == predicate { iter = Box::new(iter.filter(|quad| match quad { Err(_) => true, @@ -196,7 +210,7 @@ impl SimpleEvaluator { })) } } - let iter: EncodedTuplesIterator = Box::new(iter.map(move |quad| { + let iter: EncodedTuplesIterator<_> = Box::new(iter.map(move |quad| { let quad = quad?; let mut new_tuple = tuple.clone(); put_pattern_value(&subject, quad.subject, &mut new_tuple); @@ -227,7 +241,7 @@ impl SimpleEvaluator { if let Some(graph_name) = get_pattern_value(&graph_name, &tuple) { graph_name } else { - let result: EncodedTuplesIterator = + let result: EncodedTuplesIterator<_> = Box::new(once(Err(EvaluationError::msg( "Unknown graph name is not allowed when evaluating property path", )))); @@ -431,8 +445,10 @@ impl SimpleEvaluator { let key_mapping = key_mapping.clone(); let aggregates = aggregates.clone(); let mut errors = Vec::default(); - let mut accumulators_for_group = - HashMap::>, Vec>>::default(); + let mut accumulators_for_group = HashMap::< + Vec>>, + Vec>>, + >::default(); self.eval_plan(child, from) .filter_map(|result| match result { Ok(result) => Some(result), @@ -498,11 +514,11 @@ impl SimpleEvaluator { fn evaluate_service( &self, - service_name: &PatternValue, + service_name: &PatternValue, graph_pattern: Rc, variables: Rc>, - from: &EncodedTuple, - ) -> Result { + from: &EncodedTuple, + ) -> Result, EvaluationError> { if let QueryResult::Solutions(iter) = self.service_handler.handle( self.dataset.decode_named_node( get_pattern_value(service_name, from) @@ -526,7 +542,7 @@ impl SimpleEvaluator { &self, function: &PlanAggregationFunction, distinct: bool, - ) -> Box { + ) -> Box + 'static> { match function { PlanAggregationFunction::Count => { if distinct { @@ -567,14 +583,14 @@ impl SimpleEvaluator { fn eval_path_from( &self, - path: &PlanPropertyPath, - start: EncodedTerm, - graph_name: EncodedTerm, - ) -> Box>> { + path: &PlanPropertyPath, + start: EncodedTerm, + graph_name: EncodedTerm, + ) -> Box, EvaluationError>>> { match path { PlanPropertyPath::PredicatePath(p) => Box::new( self.dataset - .quads_for_pattern(Some(start), Some(*p), None, Some(graph_name)) + .encoded_quads_for_pattern(Some(start), Some(*p), None, Some(graph_name)) .map(|t| Ok(t?.object)), ), PlanPropertyPath::InversePath(p) => self.eval_path_to(p, start, graph_name), @@ -612,7 +628,7 @@ impl SimpleEvaluator { let ps = ps.clone(); Box::new( self.dataset - .quads_for_pattern(Some(start), None, None, Some(graph_name)) + .encoded_quads_for_pattern(Some(start), None, None, Some(graph_name)) .filter_map(move |t| match t { Ok(t) => { if ps.contains(&t.predicate) { @@ -630,14 +646,14 @@ impl SimpleEvaluator { fn eval_path_to( &self, - path: &PlanPropertyPath, - end: EncodedTerm, - graph_name: EncodedTerm, - ) -> Box>> { + path: &PlanPropertyPath, + end: EncodedTerm, + graph_name: EncodedTerm, + ) -> Box, EvaluationError>>> { match path { PlanPropertyPath::PredicatePath(p) => Box::new( self.dataset - .quads_for_pattern(None, Some(*p), Some(end), Some(graph_name)) + .encoded_quads_for_pattern(None, Some(*p), Some(end), Some(graph_name)) .map(|t| Ok(t?.subject)), ), PlanPropertyPath::InversePath(p) => self.eval_path_from(p, end, graph_name), @@ -675,7 +691,7 @@ impl SimpleEvaluator { let ps = ps.clone(); Box::new( self.dataset - .quads_for_pattern(None, None, Some(end), Some(graph_name)) + .encoded_quads_for_pattern(None, None, Some(end), Some(graph_name)) .filter_map(move |t| match t { Ok(t) => { if ps.contains(&t.predicate) { @@ -693,13 +709,17 @@ impl SimpleEvaluator { fn eval_open_path( &self, - path: &PlanPropertyPath, - graph_name: EncodedTerm, - ) -> Box>> { + path: &PlanPropertyPath, + graph_name: EncodedTerm, + ) -> Box< + dyn Iterator< + Item = Result<(EncodedTerm, EncodedTerm), EvaluationError>, + >, + > { match path { PlanPropertyPath::PredicatePath(p) => Box::new( self.dataset - .quads_for_pattern(None, Some(*p), None, Some(graph_name)) + .encoded_quads_for_pattern(None, Some(*p), None, Some(graph_name)) .map(|t| t.map(|t| (t.subject, t.object))), ), PlanPropertyPath::InversePath(p) => Box::new( @@ -751,7 +771,7 @@ impl SimpleEvaluator { let ps = ps.clone(); Box::new( self.dataset - .quads_for_pattern(None, None, None, Some(graph_name)) + .encoded_quads_for_pattern(None, None, None, Some(graph_name)) .filter_map(move |t| match t { Ok(t) => { if ps.contains(&t.predicate) { @@ -769,10 +789,11 @@ impl SimpleEvaluator { fn get_subject_or_object_identity_pairs( &self, - graph_name: EncodedTerm, - ) -> impl Iterator> { + graph_name: EncodedTerm, + ) -> impl Iterator, EncodedTerm), EvaluationError>> + { self.dataset - .quads_for_pattern(None, None, None, Some(graph_name)) + .encoded_quads_for_pattern(None, None, None, Some(graph_name)) .flat_map_ok(|t| once(Ok(t.subject)).chain(once(Ok(t.object)))) .map(|e| e.map(|e| (e, e))) } @@ -780,9 +801,9 @@ impl SimpleEvaluator { #[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)] fn eval_expression( &self, - expression: &PlanExpression, - tuple: &EncodedTuple, - ) -> Option { + expression: &PlanExpression, + tuple: &EncodedTuple, + ) -> Option> { match expression { PlanExpression::Constant(t) => Some(*t), PlanExpression::Variable(v) => tuple.get(*v), @@ -1040,11 +1061,12 @@ impl SimpleEvaluator { } } PlanExpression::BNode(id) => match id { - Some(id) => Some( - (&BlankNode::new(self.to_simple_string(self.eval_expression(id, tuple)?)?) - .ok()?) - .into(), - ), + Some(id) => { + let bnode = + BlankNode::new(self.to_simple_string(self.eval_expression(id, tuple)?)?) + .ok()?; + Some(self.dataset.as_ref().encode_blank_node(&bnode).ok()?) + } None => Some(EncodedTerm::InlineBlankNode { id: random::(), }), @@ -1340,8 +1362,8 @@ impl SimpleEvaluator { } else { None }?; - self.dataset - .encoder() + let mut encoder = self.dataset.as_ref(); + encoder .encode_rio_literal(rio::Literal::Typed { value: &value, datatype: rio::NamedNode { iri: &datatype }, @@ -1498,7 +1520,7 @@ impl SimpleEvaluator { } } - fn to_bool(&self, term: EncodedTerm) -> Option { + fn to_bool(&self, term: EncodedTerm) -> Option { match term { EncodedTerm::BooleanLiteral(value) => Some(value), EncodedTerm::StringLiteral { value_id } => { @@ -1512,7 +1534,7 @@ impl SimpleEvaluator { } } - fn to_string_id(&self, term: EncodedTerm) -> Option { + fn to_string_id(&self, term: EncodedTerm) -> Option { match term { EncodedTerm::DefaultGraph => None, EncodedTerm::NamedNode { iri_id } => Some(iri_id), @@ -1538,7 +1560,7 @@ impl SimpleEvaluator { } } - fn to_simple_string(&self, term: EncodedTerm) -> Option { + fn to_simple_string(&self, term: EncodedTerm) -> Option { if let EncodedTerm::StringLiteral { value_id } = term { self.dataset.get_str(value_id).ok()? } else { @@ -1546,7 +1568,7 @@ impl SimpleEvaluator { } } - fn to_simple_string_id(&self, term: EncodedTerm) -> Option { + fn to_simple_string_id(&self, term: EncodedTerm) -> Option { if let EncodedTerm::StringLiteral { value_id } = term { Some(value_id) } else { @@ -1554,7 +1576,7 @@ impl SimpleEvaluator { } } - fn to_string(&self, term: EncodedTerm) -> Option { + fn to_string(&self, term: EncodedTerm) -> Option { match term { EncodedTerm::StringLiteral { value_id } | EncodedTerm::LangStringLiteral { value_id, .. } => { @@ -1564,7 +1586,10 @@ impl SimpleEvaluator { } } - fn to_string_and_language(&self, term: EncodedTerm) -> Option<(String, Option)> { + fn to_string_and_language( + &self, + term: EncodedTerm, + ) -> Option<(String, Option)> { match term { EncodedTerm::StringLiteral { value_id } => { Some((self.dataset.get_str(value_id).ok()??, None)) @@ -1577,26 +1602,34 @@ impl SimpleEvaluator { } } - fn build_named_node(&self, iri: &str) -> Option { + fn build_named_node(&self, iri: &str) -> Option> { Some(EncodedTerm::NamedNode { iri_id: self.build_string_id(iri)?, }) } - fn build_string_literal(&self, value: &str) -> Option { + fn build_string_literal(&self, value: &str) -> Option> { Some(EncodedTerm::StringLiteral { value_id: self.build_string_id(value)?, }) } - fn build_lang_string_literal(&self, value: &str, language_id: StrHash) -> Option { + fn build_lang_string_literal( + &self, + value: &str, + language_id: S::StrId, + ) -> Option> { Some(EncodedTerm::LangStringLiteral { value_id: self.build_string_id(value)?, language_id, }) } - fn build_plain_literal(&self, value: &str, language: Option) -> Option { + fn build_plain_literal( + &self, + value: &str, + language: Option, + ) -> Option> { if let Some(language_id) = language { self.build_lang_string_literal(value, language_id) } else { @@ -1604,11 +1637,11 @@ impl SimpleEvaluator { } } - fn build_string_id(&self, value: &str) -> Option { - self.dataset.encoder().insert_str(value).ok() + fn build_string_id(&self, value: &str) -> Option { + self.dataset.as_ref().encode_str(value).ok() } - fn build_language_id(&self, value: EncodedTerm) -> Option { + fn build_language_id(&self, value: EncodedTerm) -> Option { let mut language = self.to_simple_string(value)?; language.make_ascii_lowercase(); self.build_string_id(LanguageTag::parse(language).ok()?.as_str()) @@ -1616,9 +1649,9 @@ impl SimpleEvaluator { fn to_argument_compatible_strings( &self, - arg1: EncodedTerm, - arg2: EncodedTerm, - ) -> Option<(String, String, Option)> { + arg1: EncodedTerm, + arg2: EncodedTerm, + ) -> Option<(String, String, Option)> { let (value1, language1) = self.to_string_and_language(arg1)?; let (value2, language2) = self.to_string_and_language(arg2)?; if language2.is_none() || language1 == language2 { @@ -1628,7 +1661,11 @@ impl SimpleEvaluator { } } - fn compile_pattern(&self, pattern: EncodedTerm, flags: Option) -> Option { + fn compile_pattern( + &self, + pattern: EncodedTerm, + flags: Option>, + ) -> Option { // TODO Avoid to compile the regex each time let pattern = self.to_simple_string(pattern)?; let mut regex_builder = RegexBuilder::new(&pattern); @@ -1659,9 +1696,9 @@ impl SimpleEvaluator { fn parse_numeric_operands( &self, - e1: &PlanExpression, - e2: &PlanExpression, - tuple: &EncodedTuple, + e1: &PlanExpression, + e2: &PlanExpression, + tuple: &EncodedTuple, ) -> Option { NumericBinaryOperands::new( self.eval_expression(e1, tuple)?, @@ -1671,7 +1708,7 @@ impl SimpleEvaluator { fn decode_bindings( &self, - iter: EncodedTuplesIterator, + iter: EncodedTuplesIterator, variables: Rc>, ) -> QuerySolutionsIterator { let eval = self.clone(); @@ -1695,10 +1732,10 @@ impl SimpleEvaluator { &self, variables: Rc>, iter: QuerySolutionsIterator, - ) -> EncodedTuplesIterator { + ) -> EncodedTuplesIterator { let eval = self.clone(); Box::new(iter.map(move |solution| { - let mut encoder = eval.dataset.encoder(); + let mut encoder = eval.dataset.as_ref(); let mut encoded_terms = EncodedTuple::with_capacity(variables.len()); for (variable, term) in solution?.iter() { put_variable_value( @@ -1717,7 +1754,7 @@ impl SimpleEvaluator { clippy::cast_possible_truncation, clippy::cast_precision_loss )] - fn equals(&self, a: EncodedTerm, b: EncodedTerm) -> Option { + fn equals(&self, a: EncodedTerm, b: EncodedTerm) -> Option { match a { EncodedTerm::DefaultGraph | EncodedTerm::NamedNode { .. } @@ -1815,9 +1852,9 @@ impl SimpleEvaluator { fn cmp_according_to_expression( &self, - tuple_a: &EncodedTuple, - tuple_b: &EncodedTuple, - expression: &PlanExpression, + tuple_a: &EncodedTuple, + tuple_b: &EncodedTuple, + expression: &PlanExpression, ) -> Ordering { self.cmp_terms( self.eval_expression(expression, tuple_a), @@ -1825,7 +1862,11 @@ impl SimpleEvaluator { ) } - fn cmp_terms(&self, a: Option, b: Option) -> Ordering { + fn cmp_terms( + &self, + a: Option>, + b: Option>, + ) -> Ordering { match (a, b) { (Some(a), Some(b)) => match a { EncodedTerm::InlineBlankNode { .. } | EncodedTerm::NamedBlankNode { .. } => { @@ -1858,7 +1899,11 @@ impl SimpleEvaluator { } #[allow(clippy::cast_precision_loss)] - fn partial_cmp_literals(&self, a: EncodedTerm, b: EncodedTerm) -> Option { + fn partial_cmp_literals( + &self, + a: EncodedTerm, + b: EncodedTerm, + ) -> Option { match a { EncodedTerm::StringLiteral { value_id: a } => { if let EncodedTerm::StringLiteral { value_id: b } = b { @@ -1938,7 +1983,7 @@ impl SimpleEvaluator { } } - fn compare_str_ids(&self, a: StrHash, b: StrHash) -> Option { + fn compare_str_ids(&self, a: S::StrId, b: S::StrId) -> Option { Some( self.dataset .get_str(a) @@ -1947,13 +1992,17 @@ impl SimpleEvaluator { ) } - fn hash(&self, arg: &PlanExpression, tuple: &EncodedTuple) -> Option { + fn hash( + &self, + arg: &PlanExpression, + tuple: &EncodedTuple, + ) -> Option> { let input = self.to_simple_string(self.eval_expression(arg, tuple)?)?; let hash = hex::encode(H::new().chain(input.as_str()).finalize()); self.build_string_literal(&hash) } - fn datatype(&self, value: EncodedTerm) -> Option { + fn datatype(&self, value: EncodedTerm) -> Option> { //TODO: optimize? match value { EncodedTerm::NamedNode { .. } @@ -2029,7 +2078,7 @@ enum NumericBinaryOperands { impl NumericBinaryOperands { #[allow(clippy::cast_precision_loss)] - fn new(a: EncodedTerm, b: EncodedTerm) -> Option { + fn new(a: EncodedTerm, b: EncodedTerm) -> Option { match (a, b) { (EncodedTerm::FloatLiteral(v1), EncodedTerm::FloatLiteral(v2)) => { Some(NumericBinaryOperands::Float(v1, v2)) @@ -2147,25 +2196,32 @@ impl NumericBinaryOperands { } } -fn get_pattern_value(selector: &PatternValue, tuple: &EncodedTuple) -> Option { +fn get_pattern_value( + selector: &PatternValue, + tuple: &EncodedTuple, +) -> Option> { match selector { PatternValue::Constant(term) => Some(*term), PatternValue::Variable(v) => tuple.get(*v), } } -fn put_pattern_value(selector: &PatternValue, value: EncodedTerm, tuple: &mut EncodedTuple) { +fn put_pattern_value( + selector: &PatternValue, + value: EncodedTerm, + tuple: &mut EncodedTuple, +) { match selector { PatternValue::Constant(_) => (), PatternValue::Variable(v) => tuple.set(*v, value), } } -fn put_variable_value( +fn put_variable_value( selector: &Variable, variables: &[Variable], - value: EncodedTerm, - tuple: &mut EncodedTuple, + value: EncodedTerm, + tuple: &mut EncodedTuple, ) { for (i, v) in variables.iter().enumerate() { if selector == v { @@ -2175,13 +2231,17 @@ fn put_variable_value( } } -fn unbind_variables(binding: &mut EncodedTuple, variables: &[usize]) { +fn unbind_variables(binding: &mut EncodedTuple, variables: &[usize]) { for var in variables { binding.unset(*var) } } -fn combine_tuples(mut a: EncodedTuple, b: &EncodedTuple, vars: &[usize]) -> Option { +fn combine_tuples( + mut a: EncodedTuple, + b: &EncodedTuple, + vars: &[usize], +) -> Option> { for var in vars { if let Some(b_value) = b.get(*var) { if let Some(a_value) = a.get(*var) { @@ -2196,7 +2256,10 @@ fn combine_tuples(mut a: EncodedTuple, b: &EncodedTuple, vars: &[usize]) -> Opti Some(a) } -pub fn are_compatible_and_not_disjointed(a: &EncodedTuple, b: &EncodedTuple) -> bool { +pub fn are_compatible_and_not_disjointed( + a: &EncodedTuple, + b: &EncodedTuple, +) -> bool { let mut found_intersection = false; for (a_value, b_value) in a.iter().zip(b.iter()) { if let (Some(a_value), Some(b_value)) = (a_value, b_value) { @@ -2209,16 +2272,16 @@ pub fn are_compatible_and_not_disjointed(a: &EncodedTuple, b: &EncodedTuple) -> found_intersection } -struct JoinIterator { - left: Vec, - right_iter: EncodedTuplesIterator, - buffered_results: Vec>, +struct JoinIterator { + left: Vec>, + right_iter: EncodedTuplesIterator, + buffered_results: Vec, EvaluationError>>, } -impl Iterator for JoinIterator { - type Item = Result; +impl Iterator for JoinIterator { + type Item = Result, EvaluationError>; - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option, EvaluationError>> { loop { if let Some(result) = self.buffered_results.pop() { return Some(result); @@ -2236,15 +2299,15 @@ impl Iterator for JoinIterator { } } -struct AntiJoinIterator { - left_iter: EncodedTuplesIterator, - right: Vec, +struct AntiJoinIterator { + left_iter: EncodedTuplesIterator, + right: Vec>, } -impl Iterator for AntiJoinIterator { - type Item = Result; +impl Iterator for AntiJoinIterator { + type Item = Result, EvaluationError>; - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option, EvaluationError>> { loop { match self.left_iter.next()? { Ok(left_tuple) => { @@ -2263,15 +2326,18 @@ impl Iterator for AntiJoinIterator { struct LeftJoinIterator { eval: SimpleEvaluator, - right_plan: Rc, - left_iter: EncodedTuplesIterator, - current_right: EncodedTuplesIterator, + right_plan: Rc>, + left_iter: EncodedTuplesIterator, + current_right: EncodedTuplesIterator, } -impl Iterator for LeftJoinIterator { - type Item = Result; +impl + 'static> Iterator for LeftJoinIterator +where + for<'a> &'a S: StrContainer, +{ + type Item = Result, EvaluationError>; - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option, EvaluationError>> { if let Some(tuple) = self.current_right.next() { return Some(tuple); } @@ -2291,17 +2357,20 @@ impl Iterator for LeftJoinIterator { struct BadLeftJoinIterator { eval: SimpleEvaluator, - right_plan: Rc, - left_iter: EncodedTuplesIterator, - current_left: Option, - current_right: EncodedTuplesIterator, + right_plan: Rc>, + left_iter: EncodedTuplesIterator, + current_left: Option>, + current_right: EncodedTuplesIterator, problem_vars: Rc>, } -impl Iterator for BadLeftJoinIterator { - type Item = Result; +impl + 'static> Iterator for BadLeftJoinIterator +where + for<'a> &'a S: StrContainer, +{ + type Item = Result, EvaluationError>; - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option, EvaluationError>> { while let Some(right_tuple) = self.current_right.next() { match right_tuple { Ok(right_tuple) => { @@ -2343,16 +2412,19 @@ impl Iterator for BadLeftJoinIterator { struct UnionIterator { eval: SimpleEvaluator, - plans: Vec>, - input: EncodedTuple, - current_iterator: EncodedTuplesIterator, + plans: Vec>>, + input: EncodedTuple, + current_iterator: EncodedTuplesIterator, current_plan: usize, } -impl Iterator for UnionIterator { - type Item = Result; +impl + 'static> Iterator for UnionIterator +where + for<'a> &'a S: StrContainer, +{ + type Item = Result, EvaluationError>; - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option, EvaluationError>> { loop { if let Some(tuple) = self.current_iterator.next() { return Some(tuple); @@ -2370,13 +2442,13 @@ impl Iterator for UnionIterator { struct ConstructIterator { eval: SimpleEvaluator, - iter: EncodedTuplesIterator, - template: Rc>, + iter: EncodedTuplesIterator, + template: Rc>>, buffered_results: Vec>, - bnodes: Vec, + bnodes: Vec>, } -impl Iterator for ConstructIterator { +impl + 'static> Iterator for ConstructIterator { type Item = Result; fn next(&mut self) -> Option> { @@ -2409,28 +2481,32 @@ impl Iterator for ConstructIterator { } } -fn get_triple_template_value( - selector: &TripleTemplateValue, - tuple: &EncodedTuple, - bnodes: &mut Vec, -) -> Option { +fn get_triple_template_value( + selector: &TripleTemplateValue, + tuple: &EncodedTuple, + bnodes: &mut Vec>, +) -> Option> { match selector { TripleTemplateValue::Constant(term) => Some(*term), TripleTemplateValue::Variable(v) => tuple.get(*v), TripleTemplateValue::BlankNode(id) => { if *id >= bnodes.len() { - bnodes.resize_with(*id, BlankNode::default) + bnodes.resize_with(*id, new_bnode) } - Some((&bnodes[*id]).into()) + Some(bnodes[*id]) } } } -fn decode_triple( - decoder: &impl Decoder, - subject: EncodedTerm, - predicate: EncodedTerm, - object: EncodedTerm, +fn new_bnode() -> EncodedTerm { + EncodedTerm::InlineBlankNode { id: random() } +} + +fn decode_triple( + decoder: &D, + subject: EncodedTerm, + predicate: EncodedTerm, + object: EncodedTerm, ) -> Result { Ok(Triple::new( decoder.decode_named_or_blank_node(subject)?, @@ -2441,11 +2517,11 @@ fn decode_triple( struct DescribeIterator { eval: SimpleEvaluator, - iter: EncodedTuplesIterator, - quads: Box>>, + iter: EncodedTuplesIterator, + quads: Box, EvaluationError>>>, } -impl Iterator for DescribeIterator { +impl + 'static> Iterator for DescribeIterator { type Item = Result; fn next(&mut self) -> Option> { @@ -2467,10 +2543,12 @@ impl Iterator for DescribeIterator { }; for subject in tuple.iter() { if let Some(subject) = subject { - self.quads = - self.eval - .dataset - .quads_for_pattern(Some(subject), None, None, None); + self.quads = Box::new(self.eval.dataset.encoded_quads_for_pattern( + Some(subject), + None, + None, + None, + )); } } } @@ -2621,19 +2699,19 @@ impl< } } -trait Accumulator { - fn add(&mut self, element: Option); +trait Accumulator { + fn add(&mut self, element: Option>); - fn state(&self) -> Option; + fn state(&self) -> Option>; } #[derive(Default, Debug)] -struct DistinctAccumulator { - seen: HashSet>, +struct DistinctAccumulator> { + seen: HashSet>>, inner: T, } -impl DistinctAccumulator { +impl> DistinctAccumulator { fn new(inner: T) -> Self { Self { seen: HashSet::default(), @@ -2642,14 +2720,14 @@ impl DistinctAccumulator { } } -impl Accumulator for DistinctAccumulator { - fn add(&mut self, element: Option) { +impl> Accumulator for DistinctAccumulator { + fn add(&mut self, element: Option>) { if self.seen.insert(element) { self.inner.add(element) } } - fn state(&self) -> Option { + fn state(&self) -> Option> { self.inner.state() } } @@ -2659,22 +2737,22 @@ struct CountAccumulator { count: i64, } -impl Accumulator for CountAccumulator { - fn add(&mut self, _element: Option) { +impl Accumulator for CountAccumulator { + fn add(&mut self, _element: Option>) { self.count += 1; } - fn state(&self) -> Option { + fn state(&self) -> Option> { Some(self.count.into()) } } #[derive(Debug)] -struct SumAccumulator { - sum: Option, +struct SumAccumulator { + sum: Option>, } -impl Default for SumAccumulator { +impl Default for SumAccumulator { fn default() -> Self { Self { sum: Some(0.into()), @@ -2682,8 +2760,8 @@ impl Default for SumAccumulator { } } -impl Accumulator for SumAccumulator { - fn add(&mut self, element: Option) { +impl Accumulator for SumAccumulator { + fn add(&mut self, element: Option>) { if let Some(sum) = self.sum { if let Some(operands) = element.and_then(|e| NumericBinaryOperands::new(sum, e)) { //TODO: unify with addition? @@ -2701,24 +2779,33 @@ impl Accumulator for SumAccumulator { } } - fn state(&self) -> Option { + fn state(&self) -> Option> { self.sum } } -#[derive(Debug, Default)] -struct AvgAccumulator { - sum: SumAccumulator, +#[derive(Debug)] +struct AvgAccumulator { + sum: SumAccumulator, count: CountAccumulator, } -impl Accumulator for AvgAccumulator { - fn add(&mut self, element: Option) { +impl Default for AvgAccumulator { + fn default() -> Self { + Self { + sum: SumAccumulator::default(), + count: CountAccumulator::default(), + } + } +} + +impl Accumulator for AvgAccumulator { + fn add(&mut self, element: Option>) { self.sum.add(element); self.count.add(element); } - fn state(&self) -> Option { + fn state(&self) -> Option> { let sum = self.sum.state()?; let count = self.count.state()?; if count == EncodedTerm::from(0) { @@ -2742,7 +2829,7 @@ impl Accumulator for AvgAccumulator { #[allow(clippy::option_option)] struct MinAccumulator { eval: SimpleEvaluator, - min: Option>, + min: Option>>, } impl MinAccumulator { @@ -2751,8 +2838,12 @@ impl MinAccumulator { } } -impl Accumulator for MinAccumulator { - fn add(&mut self, element: Option) { +impl + 'static> Accumulator + for MinAccumulator +where + for<'a> &'a S: StrContainer, +{ + fn add(&mut self, element: Option>) { if let Some(min) = self.min { if self.eval.cmp_terms(element, min) == Ordering::Less { self.min = Some(element) @@ -2762,7 +2853,7 @@ impl Accumulator for MinAccumulator { } } - fn state(&self) -> Option { + fn state(&self) -> Option> { self.min.and_then(|v| v) } } @@ -2770,7 +2861,7 @@ impl Accumulator for MinAccumulator { #[allow(clippy::option_option)] struct MaxAccumulator { eval: SimpleEvaluator, - max: Option>, + max: Option>>, } impl MaxAccumulator { @@ -2779,8 +2870,12 @@ impl MaxAccumulator { } } -impl Accumulator for MaxAccumulator { - fn add(&mut self, element: Option) { +impl + 'static> Accumulator + for MaxAccumulator +where + for<'a> &'a S: StrContainer, +{ + fn add(&mut self, element: Option>) { if let Some(max) = self.max { if self.eval.cmp_terms(element, max) == Ordering::Greater { self.max = Some(element) @@ -2790,24 +2885,30 @@ impl Accumulator for MaxAccumulator { } } - fn state(&self) -> Option { + fn state(&self) -> Option> { self.max.and_then(|v| v) } } -#[derive(Default, Debug)] -struct SampleAccumulator { - value: Option, +#[derive(Debug)] +struct SampleAccumulator { + value: Option>, } -impl Accumulator for SampleAccumulator { - fn add(&mut self, element: Option) { +impl Default for SampleAccumulator { + fn default() -> Self { + Self { value: None } + } +} + +impl Accumulator for SampleAccumulator { + fn add(&mut self, element: Option>) { if element.is_some() { self.value = element } } - fn state(&self) -> Option { + fn state(&self) -> Option> { self.value } } @@ -2816,7 +2917,7 @@ impl Accumulator for SampleAccumulator { struct GroupConcatAccumulator { eval: SimpleEvaluator, concat: Option, - language: Option>, + language: Option>, separator: Rc, } @@ -2831,8 +2932,12 @@ impl GroupConcatAccumulator { } } -impl Accumulator for GroupConcatAccumulator { - fn add(&mut self, element: Option) { +impl + 'static> Accumulator + for GroupConcatAccumulator +where + for<'a> &'a S: StrContainer, +{ + fn add(&mut self, element: Option>) { if let Some(concat) = self.concat.as_mut() { if let Some(element) = element { if let Some((value, e_language)) = self.eval.to_string_and_language(element) { @@ -2850,7 +2955,7 @@ impl Accumulator for GroupConcatAccumulator Option { + fn state(&self) -> Option> { self.concat.as_ref().and_then(|result| { self.eval .build_plain_literal(result, self.language.and_then(|v| v)) diff --git a/lib/src/sparql/mod.rs b/lib/src/sparql/mod.rs index 2b75ae77..a8c66a46 100644 --- a/lib/src/sparql/mod.rs +++ b/lib/src/sparql/mod.rs @@ -1,6 +1,7 @@ //! [SPARQL](https://www.w3.org/TR/sparql11-overview/) implementation. mod algebra; +mod dataset; mod error; mod eval; mod json_results; @@ -16,8 +17,7 @@ use crate::sparql::eval::SimpleEvaluator; pub use crate::sparql::model::QuerySolution; pub use crate::sparql::model::QuerySolutionsIterator; pub use crate::sparql::model::QueryTriplesIterator; -use crate::sparql::plan::TripleTemplate; -use crate::sparql::plan::{DatasetView, PlanNode}; +use crate::sparql::plan::{PlanNode, TripleTemplate}; use crate::sparql::plan_builder::PlanBuilder; use crate::store::ReadableEncodedStore; use std::convert::TryInto; @@ -28,10 +28,12 @@ pub use crate::sparql::model::QueryResult; pub use crate::sparql::model::QueryResultFormat; #[deprecated(note = "Use QueryResultFormat instead")] pub type QueryResultSyntax = QueryResultFormat; +use crate::sparql::dataset::DatasetView; pub use crate::sparql::error::EvaluationError; pub use crate::sparql::model::Variable; pub use crate::sparql::parser::ParseError; pub use crate::sparql::parser::Query; +use crate::store::numeric_encoder::WithStoreError; use std::error::Error; /// A prepared [SPARQL query](https://www.w3.org/TR/sparql11-query/) @@ -48,22 +50,22 @@ pub(crate) struct SimplePreparedQuery( #[derive(Clone)] enum SimplePreparedQueryAction { Select { - plan: Rc, + plan: Rc as WithStoreError>::StrId>>, variables: Rc>, - evaluator: SimpleEvaluator, + evaluator: SimpleEvaluator>, }, Ask { - plan: Rc, - evaluator: SimpleEvaluator, + plan: Rc as WithStoreError>::StrId>>, + evaluator: SimpleEvaluator>, }, Construct { - plan: Rc, - construct: Rc>, - evaluator: SimpleEvaluator, + plan: Rc as WithStoreError>::StrId>>, + construct: Rc as WithStoreError>::StrId>>>, + evaluator: SimpleEvaluator>, }, Describe { - plan: Rc, - evaluator: SimpleEvaluator, + plan: Rc as WithStoreError>::StrId>>, + evaluator: SimpleEvaluator>, }, } @@ -78,7 +80,7 @@ impl SimplePreparedQuery { QueryVariants::Select { algebra, base_iri, .. } => { - let (plan, variables) = PlanBuilder::build(dataset.encoder(), &algebra)?; + let (plan, variables) = PlanBuilder::build(dataset.as_ref(), &algebra)?; SimplePreparedQueryAction::Select { plan: Rc::new(plan), variables: Rc::new(variables), @@ -88,7 +90,7 @@ impl SimplePreparedQuery { QueryVariants::Ask { algebra, base_iri, .. } => { - let (plan, _) = PlanBuilder::build(dataset.encoder(), &algebra)?; + let (plan, _) = PlanBuilder::build(dataset.as_ref(), &algebra)?; SimplePreparedQueryAction::Ask { plan: Rc::new(plan), evaluator: SimpleEvaluator::new(dataset, base_iri, options.service_handler), @@ -100,11 +102,11 @@ impl SimplePreparedQuery { base_iri, .. } => { - let (plan, variables) = PlanBuilder::build(dataset.encoder(), &algebra)?; + let (plan, variables) = PlanBuilder::build(dataset.as_ref(), &algebra)?; SimplePreparedQueryAction::Construct { plan: Rc::new(plan), construct: Rc::new(PlanBuilder::build_graph_template( - dataset.encoder(), + dataset.as_ref(), &construct, variables, )?), @@ -114,7 +116,7 @@ impl SimplePreparedQuery { QueryVariants::Describe { algebra, base_iri, .. } => { - let (plan, _) = PlanBuilder::build(dataset.encoder(), &algebra)?; + let (plan, _) = PlanBuilder::build(dataset.as_ref(), &algebra)?; SimplePreparedQueryAction::Describe { plan: Rc::new(plan), evaluator: SimpleEvaluator::new(dataset, base_iri, options.service_handler), diff --git a/lib/src/sparql/plan.rs b/lib/src/sparql/plan.rs index b08d7a66..88e47e91 100644 --- a/lib/src/sparql/plan.rs +++ b/lib/src/sparql/plan.rs @@ -1,96 +1,89 @@ -use crate::error::UnwrapInfallible; use crate::sparql::algebra::GraphPattern; -use crate::sparql::error::EvaluationError; use crate::sparql::model::Variable; -use crate::store::numeric_encoder::{ - EncodedQuad, EncodedTerm, Encoder, MemoryStrStore, StrContainer, StrHash, StrLookup, - WithStoreError, -}; -use crate::store::ReadableEncodedStore; -use std::cell::{RefCell, RefMut}; +use crate::store::numeric_encoder::{EncodedTerm, StrId}; use std::collections::BTreeSet; use std::rc::Rc; #[derive(Eq, PartialEq, Debug, Clone, Hash)] -pub enum PlanNode { +pub enum PlanNode { Init, StaticBindings { - tuples: Vec, + tuples: Vec>, }, Service { - service_name: PatternValue, + service_name: PatternValue, variables: Rc>, - child: Rc, + child: Rc>, graph_pattern: Rc, silent: bool, }, QuadPatternJoin { - child: Rc, - subject: PatternValue, - predicate: PatternValue, - object: PatternValue, - graph_name: PatternValue, + child: Rc>, + subject: PatternValue, + predicate: PatternValue, + object: PatternValue, + graph_name: PatternValue, }, PathPatternJoin { - child: Rc, - subject: PatternValue, - path: Rc, - object: PatternValue, - graph_name: PatternValue, + child: Rc>, + subject: PatternValue, + path: Rc>, + object: PatternValue, + graph_name: PatternValue, }, Join { - left: Rc, - right: Rc, + left: Rc>, + right: Rc>, }, AntiJoin { - left: Rc, - right: Rc, + left: Rc>, + right: Rc>, }, Filter { - child: Rc, - expression: Rc, + child: Rc>, + expression: Rc>, }, Union { - children: Vec>, + children: Vec>>, }, LeftJoin { - left: Rc, - right: Rc, + left: Rc>, + right: Rc>, possible_problem_vars: Rc>, //Variables that should not be part of the entry of the left join }, Extend { - child: Rc, + child: Rc>, position: usize, - expression: Rc, + expression: Rc>, }, Sort { - child: Rc, - by: Vec, + child: Rc>, + by: Vec>, }, HashDeduplicate { - child: Rc, + child: Rc>, }, Skip { - child: Rc, + child: Rc>, count: usize, }, Limit { - child: Rc, + child: Rc>, count: usize, }, Project { - child: Rc, + child: Rc>, mapping: Rc>, // pairs of (variable key in child, variable key in output) }, Aggregate { // By definition the group by key are the range 0..key_mapping.len() - child: Rc, + child: Rc>, key_mapping: Rc>, // aggregate key pairs of (variable key in child, variable key in output) - aggregates: Rc>, + aggregates: Rc, usize)>>, }, } -impl PlanNode { +impl PlanNode { /// Returns variables that might be bound in the result set pub fn maybe_bound_variables(&self) -> BTreeSet { let mut set = BTreeSet::default(); @@ -201,12 +194,12 @@ impl PlanNode { } #[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] -pub enum PatternValue { - Constant(EncodedTerm), +pub enum PatternValue { + Constant(EncodedTerm), Variable(usize), } -impl PatternValue { +impl PatternValue { pub fn is_var(&self) -> bool { match self { PatternValue::Constant(_) => false, @@ -216,108 +209,108 @@ impl PatternValue { } #[derive(Eq, PartialEq, Debug, Clone, Hash)] -pub enum PlanExpression { - Constant(EncodedTerm), +pub enum PlanExpression { + Constant(EncodedTerm), Variable(usize), - Exists(Rc), - Or(Box, Box), - And(Box, Box), - Equal(Box, Box), - NotEqual(Box, Box), - Greater(Box, Box), - GreaterOrEq(Box, Box), - Lower(Box, Box), - LowerOrEq(Box, Box), - In(Box, Vec), - Add(Box, Box), - Sub(Box, Box), - Mul(Box, Box), - Div(Box, Box), - UnaryPlus(Box), - UnaryMinus(Box), - UnaryNot(Box), - Str(Box), - Lang(Box), - LangMatches(Box, Box), - Datatype(Box), + Exists(Rc>), + Or(Box>, Box>), + And(Box>, Box>), + Equal(Box>, Box>), + NotEqual(Box>, Box>), + Greater(Box>, Box>), + GreaterOrEq(Box>, Box>), + Lower(Box>, Box>), + LowerOrEq(Box>, Box>), + In(Box>, Vec>), + Add(Box>, Box>), + Sub(Box>, Box>), + Mul(Box>, Box>), + Div(Box>, Box>), + UnaryPlus(Box>), + UnaryMinus(Box>), + UnaryNot(Box>), + Str(Box>), + Lang(Box>), + LangMatches(Box>, Box>), + Datatype(Box>), Bound(usize), - IRI(Box), - BNode(Option>), + IRI(Box>), + BNode(Option>>), Rand, - Abs(Box), - Ceil(Box), - Floor(Box), - Round(Box), - Concat(Vec), + Abs(Box>), + Ceil(Box>), + Floor(Box>), + Round(Box>), + Concat(Vec>), SubStr( - Box, - Box, - Option>, + Box>, + Box>, + Option>>, ), - StrLen(Box), + StrLen(Box>), Replace( - Box, - Box, - Box, - Option>, + Box>, + Box>, + Box>, + Option>>, ), - UCase(Box), - LCase(Box), - EncodeForURI(Box), - Contains(Box, Box), - StrStarts(Box, Box), - StrEnds(Box, Box), - StrBefore(Box, Box), - StrAfter(Box, Box), - Year(Box), - Month(Box), - Day(Box), - Hours(Box), - Minutes(Box), - Seconds(Box), - Timezone(Box), - Tz(Box), + UCase(Box>), + LCase(Box>), + EncodeForURI(Box>), + Contains(Box>, Box>), + StrStarts(Box>, Box>), + StrEnds(Box>, Box>), + StrBefore(Box>, Box>), + StrAfter(Box>, Box>), + Year(Box>), + Month(Box>), + Day(Box>), + Hours(Box>), + Minutes(Box>), + Seconds(Box>), + Timezone(Box>), + Tz(Box>), Now, UUID, StrUUID, - MD5(Box), - SHA1(Box), - SHA256(Box), - SHA384(Box), - SHA512(Box), - Coalesce(Vec), + MD5(Box>), + SHA1(Box>), + SHA256(Box>), + SHA384(Box>), + SHA512(Box>), + Coalesce(Vec>), If( - Box, - Box, - Box, + Box>, + Box>, + Box>, ), - StrLang(Box, Box), - StrDT(Box, Box), - SameTerm(Box, Box), - IsIRI(Box), - IsBlank(Box), - IsLiteral(Box), - IsNumeric(Box), + StrLang(Box>, Box>), + StrDT(Box>, Box>), + SameTerm(Box>, Box>), + IsIRI(Box>), + IsBlank(Box>), + IsLiteral(Box>), + IsNumeric(Box>), Regex( - Box, - Box, - Option>, + Box>, + Box>, + Option>>, ), - BooleanCast(Box), - DoubleCast(Box), - FloatCast(Box), - DecimalCast(Box), - IntegerCast(Box), - DateCast(Box), - TimeCast(Box), - DateTimeCast(Box), - DurationCast(Box), - YearMonthDurationCast(Box), - DayTimeDurationCast(Box), - StringCast(Box), + BooleanCast(Box>), + DoubleCast(Box>), + FloatCast(Box>), + DecimalCast(Box>), + IntegerCast(Box>), + DateCast(Box>), + TimeCast(Box>), + DateTimeCast(Box>), + DurationCast(Box>), + YearMonthDurationCast(Box>), + DayTimeDurationCast(Box>), + StringCast(Box>), } -impl PlanExpression { +impl PlanExpression { pub fn add_maybe_bound_variables(&self, set: &mut BTreeSet) { match self { PlanExpression::Variable(v) | PlanExpression::Bound(v) => { @@ -434,9 +427,9 @@ impl PlanExpression { } #[derive(Eq, PartialEq, Debug, Clone, Hash)] -pub struct PlanAggregation { +pub struct PlanAggregation { pub function: PlanAggregationFunction, - pub parameter: Option, + pub parameter: Option>, pub distinct: bool, } @@ -452,43 +445,43 @@ pub enum PlanAggregationFunction { } #[derive(Eq, PartialEq, Debug, Clone, Hash)] -pub enum PlanPropertyPath { - PredicatePath(EncodedTerm), - InversePath(Rc), - SequencePath(Rc, Rc), - AlternativePath(Rc, Rc), - ZeroOrMorePath(Rc), - OneOrMorePath(Rc), - ZeroOrOnePath(Rc), - NegatedPropertySet(Rc>), +pub enum PlanPropertyPath { + PredicatePath(EncodedTerm), + InversePath(Rc>), + SequencePath(Rc>, Rc>), + AlternativePath(Rc>, Rc>), + ZeroOrMorePath(Rc>), + OneOrMorePath(Rc>), + ZeroOrOnePath(Rc>), + NegatedPropertySet(Rc>>), } #[derive(Eq, PartialEq, Debug, Clone, Hash)] -pub enum Comparator { - Asc(PlanExpression), - Desc(PlanExpression), +pub enum Comparator { + Asc(PlanExpression), + Desc(PlanExpression), } #[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] -pub struct TripleTemplate { - pub subject: TripleTemplateValue, - pub predicate: TripleTemplateValue, - pub object: TripleTemplateValue, +pub struct TripleTemplate { + pub subject: TripleTemplateValue, + pub predicate: TripleTemplateValue, + pub object: TripleTemplateValue, } #[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] -pub enum TripleTemplateValue { - Constant(EncodedTerm), +pub enum TripleTemplateValue { + Constant(EncodedTerm), BlankNode(usize), Variable(usize), } #[derive(Eq, PartialEq, Debug, Clone, Hash)] -pub struct EncodedTuple { - inner: Vec>, +pub struct EncodedTuple { + inner: Vec>>, } -impl EncodedTuple { +impl EncodedTuple { pub fn with_capacity(capacity: usize) -> Self { Self { inner: Vec::with_capacity(capacity), @@ -503,15 +496,15 @@ impl EncodedTuple { self.inner.get(index).map_or(false, Option::is_some) } - pub fn get(&self, index: usize) -> Option { + pub fn get(&self, index: usize) -> Option> { self.inner.get(index).cloned().unwrap_or(None) } - pub fn iter<'a>(&'a self) -> impl Iterator> + 'a { + pub fn iter<'a>(&'a self) -> impl Iterator>> + 'a { self.inner.iter().cloned() } - pub fn set(&mut self, index: usize, value: EncodedTerm) { + pub fn set(&mut self, index: usize, value: EncodedTerm) { if self.inner.len() <= index { self.inner.resize(index + 1, None); } @@ -524,7 +517,7 @@ impl EncodedTuple { } } - pub fn combine_with(&self, other: &EncodedTuple) -> Option { + pub fn combine_with(&self, other: &EncodedTuple) -> Option { if self.inner.len() < other.inner.len() { let mut result = other.inner.to_owned(); for (key, self_value) in self.inner.iter().enumerate() { @@ -558,106 +551,3 @@ impl EncodedTuple { } } } - -pub(crate) struct DatasetView { - store: S, - extra: RefCell, - default_graph_as_union: bool, -} - -impl DatasetView { - pub fn new(store: S, default_graph_as_union: bool) -> Self { - Self { - store, - extra: RefCell::new(MemoryStrStore::default()), - default_graph_as_union, - } - } - - pub fn quads_for_pattern( - &self, - subject: Option, - predicate: Option, - object: Option, - graph_name: Option, - ) -> Box>> { - if graph_name == None { - Box::new( - map_iter_err( - self.store - .encoded_quads_for_pattern(subject, predicate, object, None), - ) - .filter(|quad| match quad { - Err(_) => true, - Ok(quad) => quad.graph_name != EncodedTerm::DefaultGraph, - }), - ) - } else if graph_name == Some(EncodedTerm::DefaultGraph) && self.default_graph_as_union { - Box::new( - map_iter_err( - self.store - .encoded_quads_for_pattern(subject, predicate, object, None), - ) - .map(|quad| { - let quad = quad?; - Ok(EncodedQuad::new( - quad.subject, - quad.predicate, - quad.object, - EncodedTerm::DefaultGraph, - )) - }), - ) - } else { - Box::new(map_iter_err(self.store.encoded_quads_for_pattern( - subject, predicate, object, graph_name, - ))) - } - } - - pub fn encoder<'a>(&'a self) -> impl Encoder + StrContainer + 'a { - DatasetViewStrContainer { - store: &self.store, - extra: self.extra.borrow_mut(), - } - } -} - -fn map_iter_err<'a, T>( - iter: impl Iterator>> + 'a, -) -> impl Iterator> + 'a { - iter.map(|e| e.map_err(|e| e.into())) -} - -impl WithStoreError for DatasetView { - type Error = S::Error; -} -impl StrLookup for DatasetView { - fn get_str(&self, id: StrHash) -> Result, Self::Error> { - if let Some(value) = self.extra.borrow().get_str(id).unwrap_infallible() { - Ok(Some(value)) - } else { - self.store.get_str(id) - } - } -} - -struct DatasetViewStrContainer<'a, S: ReadableEncodedStore> { - store: &'a S, - extra: RefMut<'a, MemoryStrStore>, -} - -impl<'a, S: ReadableEncodedStore> WithStoreError for DatasetViewStrContainer<'a, S> { - type Error = S::Error; -} - -impl<'a, S: ReadableEncodedStore> StrContainer for DatasetViewStrContainer<'a, S> { - fn insert_str(&mut self, value: &str) -> Result { - let key = StrHash::new(value); - if self.store.get_str(key)?.is_none() { - Ok(self.extra.insert_str(value).unwrap_infallible()) - } else { - Ok(key) - } - } -} diff --git a/lib/src/sparql/plan_builder.rs b/lib/src/sparql/plan_builder.rs index 0bcfa2d6..6ade4421 100644 --- a/lib/src/sparql/plan_builder.rs +++ b/lib/src/sparql/plan_builder.rs @@ -1,21 +1,21 @@ -use crate::model::{BlankNode, Term}; +use crate::model::{BlankNode, Literal, NamedNode, Term}; use crate::sparql::algebra::*; use crate::sparql::error::EvaluationError; use crate::sparql::model::*; use crate::sparql::plan::*; -use crate::store::numeric_encoder::{EncodedTerm, Encoder}; +use crate::store::numeric_encoder::{EncodedTerm, WriteEncoder}; use std::collections::{BTreeSet, HashSet}; use std::rc::Rc; -pub(crate) struct PlanBuilder { +pub(crate) struct PlanBuilder { encoder: E, } -impl PlanBuilder { +impl> PlanBuilder { pub fn build( encoder: E, pattern: &GraphPattern, - ) -> Result<(PlanNode, Vec), EvaluationError> { + ) -> Result<(PlanNode, Vec), EvaluationError> { let mut variables = Vec::default(); let plan = PlanBuilder { encoder }.build_for_graph_pattern( pattern, @@ -29,7 +29,7 @@ impl PlanBuilder { encoder: E, template: &[TriplePattern], mut variables: Vec, - ) -> Result, EvaluationError> { + ) -> Result>, EvaluationError> { PlanBuilder { encoder }.build_for_graph_template(template, &mut variables) } @@ -37,8 +37,8 @@ impl PlanBuilder { &mut self, pattern: &GraphPattern, variables: &mut Vec, - graph_name: PatternValue, - ) -> Result { + graph_name: PatternValue, + ) -> Result, EvaluationError> { Ok(match pattern { GraphPattern::BGP(p) => self.build_for_bgp(p, variables, graph_name)?, GraphPattern::Join(a, b) => PlanNode::Join { @@ -217,8 +217,8 @@ impl PlanBuilder { &mut self, p: &[TripleOrPathPattern], variables: &mut Vec, - graph_name: PatternValue, - ) -> Result { + graph_name: PatternValue, + ) -> Result, EvaluationError> { let mut plan = PlanNode::Init; for pattern in sort_bgp(p) { plan = match pattern { @@ -244,11 +244,14 @@ impl PlanBuilder { Ok(plan) } - fn build_for_path(&mut self, path: &PropertyPath) -> Result { + fn build_for_path( + &mut self, + path: &PropertyPath, + ) -> Result, EvaluationError> { Ok(match path { - PropertyPath::PredicatePath(p) => PlanPropertyPath::PredicatePath( - self.encoder.encode_named_node(p).map_err(|e| e.into())?, - ), + PropertyPath::PredicatePath(p) => { + PlanPropertyPath::PredicatePath(self.build_named_node(p)?) + } PropertyPath::InversePath(p) => { PlanPropertyPath::InversePath(Rc::new(self.build_for_path(p)?)) } @@ -271,7 +274,7 @@ impl PlanBuilder { } PropertyPath::NegatedPropertySet(p) => PlanPropertyPath::NegatedPropertySet(Rc::new( p.iter() - .map(|p| self.encoder.encode_named_node(p).map_err(|e| e.into())) + .map(|p| self.build_named_node(p)) .collect::, _>>()?, )), }) @@ -281,15 +284,11 @@ impl PlanBuilder { &mut self, expression: &Expression, variables: &mut Vec, - graph_name: PatternValue, - ) -> Result { + graph_name: PatternValue, + ) -> Result, EvaluationError> { Ok(match expression { - Expression::NamedNode(node) => PlanExpression::Constant( - self.encoder.encode_named_node(node).map_err(|e| e.into())?, - ), - Expression::Literal(l) => { - PlanExpression::Constant(self.encoder.encode_literal(l).map_err(|e| e.into())?) - } + Expression::NamedNode(node) => PlanExpression::Constant(self.build_named_node(node)?), + Expression::Literal(l) => PlanExpression::Constant(self.build_literal(l)?), Expression::Variable(v) => PlanExpression::Variable(variable_key(variables, v)), Expression::Or(a, b) => PlanExpression::Or( Box::new(self.build_for_expression(a, variables, graph_name)?), @@ -695,11 +694,11 @@ impl PlanBuilder { fn build_cast( &mut self, parameters: &[Expression], - constructor: impl Fn(Box) -> PlanExpression, + constructor: impl Fn(Box>) -> PlanExpression, variables: &mut Vec, - graph_name: PatternValue, + graph_name: PatternValue, name: &'static str, - ) -> Result { + ) -> Result, EvaluationError> { if parameters.len() == 1 { Ok(constructor(Box::new(self.build_for_expression( ¶meters[0], @@ -718,8 +717,8 @@ impl PlanBuilder { &mut self, l: &[Expression], variables: &mut Vec, - graph_name: PatternValue, - ) -> Result, EvaluationError> { + graph_name: PatternValue, + ) -> Result>, EvaluationError> { l.iter() .map(|e| self.build_for_expression(e, variables, graph_name)) .collect() @@ -729,7 +728,7 @@ impl PlanBuilder { &mut self, term_or_variable: &TermOrVariable, variables: &mut Vec, - ) -> Result { + ) -> Result, EvaluationError> { Ok(match term_or_variable { TermOrVariable::Variable(variable) => { PatternValue::Variable(variable_key(variables, variable)) @@ -738,9 +737,7 @@ impl PlanBuilder { PatternValue::Variable(variable_key(variables, &Variable::new(bnode.as_str()))) //TODO: very bad hack to convert bnode to variable } - TermOrVariable::Term(term) => { - PatternValue::Constant(self.encoder.encode_term(term).map_err(|e| e.into())?) - } + TermOrVariable::Term(term) => PatternValue::Constant(self.build_term(term)?), }) } @@ -748,13 +745,11 @@ impl PlanBuilder { &mut self, named_node_or_variable: &NamedNodeOrVariable, variables: &mut Vec, - ) -> Result { + ) -> Result, EvaluationError> { Ok(match named_node_or_variable { - NamedNodeOrVariable::NamedNode(named_node) => PatternValue::Constant( - self.encoder - .encode_named_node(named_node) - .map_err(|e| e.into())?, - ), + NamedNodeOrVariable::NamedNode(named_node) => { + PatternValue::Constant(self.build_named_node(named_node)?) + } NamedNodeOrVariable::Variable(variable) => { PatternValue::Variable(variable_key(variables, variable)) } @@ -765,7 +760,7 @@ impl PlanBuilder { &mut self, bindings: &StaticBindings, variables: &mut Vec, - ) -> Result, EvaluationError> { + ) -> Result>, EvaluationError> { let bindings_variables_keys = bindings .variables() .iter() @@ -777,10 +772,7 @@ impl PlanBuilder { let mut result = EncodedTuple::with_capacity(variables.len()); for (key, value) in values.iter().enumerate() { if let Some(term) = value { - result.set( - bindings_variables_keys[key], - self.encoder.encode_term(term).map_err(|e| e.into())?, - ); + result.set(bindings_variables_keys[key], self.build_term(term)?); } } Ok(result) @@ -792,8 +784,8 @@ impl PlanBuilder { &mut self, aggregate: &Aggregation, variables: &mut Vec, - graph_name: PatternValue, - ) -> Result { + graph_name: PatternValue, + ) -> Result, EvaluationError> { Ok(match aggregate { Aggregation::Count(e, distinct) => PlanAggregation { function: PlanAggregationFunction::Count, @@ -842,7 +834,7 @@ impl PlanBuilder { &mut self, template: &[TriplePattern], variables: &mut Vec, - ) -> Result, EvaluationError> { + ) -> Result>, EvaluationError> { let mut bnodes = Vec::default(); template .iter() @@ -870,7 +862,7 @@ impl PlanBuilder { term_or_variable: &TermOrVariable, variables: &mut Vec, bnodes: &mut Vec, - ) -> Result { + ) -> Result, EvaluationError> { Ok(match term_or_variable { TermOrVariable::Variable(variable) => { TripleTemplateValue::Variable(variable_key(variables, variable)) @@ -878,9 +870,7 @@ impl PlanBuilder { TermOrVariable::Term(Term::BlankNode(bnode)) => { TripleTemplateValue::BlankNode(bnode_key(bnodes, bnode)) } - TermOrVariable::Term(term) => { - TripleTemplateValue::Constant(self.encoder.encode_term(term).map_err(|e| e.into())?) - } + TermOrVariable::Term(term) => TripleTemplateValue::Constant(self.build_term(term)?), }) } @@ -888,23 +878,23 @@ impl PlanBuilder { &mut self, named_node_or_variable: &NamedNodeOrVariable, variables: &mut Vec, - ) -> Result { + ) -> Result, EvaluationError> { Ok(match named_node_or_variable { NamedNodeOrVariable::Variable(variable) => { TripleTemplateValue::Variable(variable_key(variables, variable)) } - NamedNodeOrVariable::NamedNode(term) => TripleTemplateValue::Constant( - self.encoder.encode_named_node(term).map_err(|e| e.into())?, - ), + NamedNodeOrVariable::NamedNode(term) => { + TripleTemplateValue::Constant(self.build_named_node(term)?) + } }) } fn convert_pattern_value_id( &self, - from_value: PatternValue, + from_value: PatternValue, from: &[Variable], to: &mut Vec, - ) -> PatternValue { + ) -> PatternValue { match from_value { PatternValue::Constant(v) => PatternValue::Constant(v), PatternValue::Variable(from_id) => { @@ -933,7 +923,11 @@ impl PlanBuilder { } } - fn add_left_join_problematic_variables(&self, node: &PlanNode, set: &mut BTreeSet) { + fn add_left_join_problematic_variables( + &self, + node: &PlanNode, + set: &mut BTreeSet, + ) { match node { PlanNode::Init | PlanNode::StaticBindings { .. } @@ -995,6 +989,21 @@ impl PlanBuilder { } } } + + fn build_named_node( + &mut self, + term: &NamedNode, + ) -> Result, EvaluationError> { + Ok(self.encoder.encode_named_node(term)?) + } + + fn build_literal(&mut self, term: &Literal) -> Result, EvaluationError> { + Ok(self.encoder.encode_literal(term)?) + } + + fn build_term(&mut self, term: &Term) -> Result, EvaluationError> { + Ok(self.encoder.encode_term(term)?) + } } fn variable_key(variables: &mut Vec, variable: &Variable) -> usize { diff --git a/lib/src/store/memory.rs b/lib/src/store/memory.rs index b120cbc9..0e494bdc 100644 --- a/lib/src/store/memory.rs +++ b/lib/src/store/memory.rs @@ -4,9 +4,13 @@ use crate::error::UnwrapInfallible; use crate::io::{DatasetFormat, GraphFormat}; use crate::model::*; use crate::sparql::{EvaluationError, Query, QueryOptions, QueryResult, SimplePreparedQuery}; -use crate::store::numeric_encoder::*; +use crate::store::numeric_encoder::{ + write_term, Decoder, ReadEncoder, StrContainer, StrHash, StrLookup, WithStoreError, + WriteEncoder, WRITTEN_TERM_MAX_SIZE, +}; use crate::store::{ - dump_dataset, dump_graph, load_dataset, load_graph, ReadableEncodedStore, WritableEncodedStore, + dump_dataset, dump_graph, get_encoded_quad_pattern, load_dataset, load_graph, + ReadableEncodedStore, WritableEncodedStore, }; use std::collections::hash_map::DefaultHasher; use std::collections::{HashMap, HashSet}; @@ -55,6 +59,8 @@ type TrivialHashMap = HashMap>; type TrivialHashSet = HashSet>; type TripleMap = TrivialHashMap>>; type QuadMap = TrivialHashMap>; +type EncodedTerm = crate::store::numeric_encoder::EncodedTerm; +type EncodedQuad = crate::store::numeric_encoder::EncodedQuad; #[derive(Default)] struct MemoryStoreIndexes { @@ -169,22 +175,25 @@ impl MemoryStore { object: Option<&Term>, graph_name: Option<&GraphName>, ) -> impl Iterator { - let subject = subject.map(|s| s.into()); - let predicate = predicate.map(|p| p.into()); - let object = object.map(|o| o.into()); - let graph_name = graph_name.map(|g| g.into()); + let quads = if let Some((subject, predicate, object, graph_name)) = + get_encoded_quad_pattern(self, subject, predicate, object, graph_name) + .unwrap_infallible() + { + self.encoded_quads_for_pattern_inner(subject, predicate, object, graph_name) + } else { + Vec::new() + }; let this = self.clone(); - self.encoded_quads_for_pattern_inner(subject, predicate, object, graph_name) - .into_iter() - .map( - move |quad| this.decode_quad(&quad).unwrap(), // Could not fail - ) + quads.into_iter().map( + move |quad| this.decode_quad(&quad).unwrap(), // Could not fail + ) } /// Checks if this store contains a given quad pub fn contains(&self, quad: &Quad) -> bool { - let quad = quad.into(); - self.contains_encoded(&quad) + self.get_encoded_quad(quad) + .unwrap_infallible() + .map_or(false, |q| self.contains_encoded(&q)) } /// Returns the number of quads in the store @@ -238,7 +247,7 @@ impl MemoryStore { let mut transaction = MemoryTransaction { store: self, ops: Vec::new(), - strings: Vec::new(), + strings: TrivialHashMap::default(), }; f(&mut transaction)?; transaction.commit(); @@ -317,16 +326,17 @@ impl MemoryStore { /// Adds a quad to this store. #[allow(clippy::needless_pass_by_value)] pub fn insert(&self, quad: Quad) { - let mut store = self; - let quad = store.encode_quad(&quad).unwrap_infallible(); - store.insert_encoded(&quad).unwrap_infallible(); + let mut indexes = self.indexes_mut(); + let quad = indexes.encode_quad(&quad).unwrap_infallible(); + indexes.insert_encoded(&quad).unwrap_infallible(); } /// Removes a quad from this store. pub fn remove(&self, quad: &Quad) { - let mut store = self; - let quad = quad.into(); - store.remove_encoded(&quad).unwrap_infallible(); + let mut indexes = self.indexes_mut(); + if let Some(quad) = indexes.get_encoded_quad(quad).unwrap_infallible() { + indexes.remove_encoded(&quad).unwrap_infallible(); + } } /// Returns if the current dataset is [isomorphic](https://www.w3.org/TR/rdf11-concepts/#dfn-dataset-isomorphism) with another one. @@ -689,16 +699,17 @@ impl MemoryStore { impl WithStoreError for MemoryStore { type Error = Infallible; -} - -impl<'a> WithStoreError for &'a MemoryStore { - type Error = Infallible; + type StrId = StrHash; } impl StrLookup for MemoryStore { fn get_str(&self, id: StrHash) -> Result, Infallible> { self.indexes().get_str(id) } + + fn get_str_id(&self, value: &str) -> Result, Infallible> { + self.indexes().get_str_id(value) + } } impl<'a> StrContainer for &'a MemoryStore { @@ -737,6 +748,7 @@ impl<'a> WritableEncodedStore for &'a MemoryStore { impl WithStoreError for MemoryStoreIndexes { type Error = Infallible; + type StrId = StrHash; } impl StrLookup for MemoryStoreIndexes { @@ -744,6 +756,15 @@ impl StrLookup for MemoryStoreIndexes { //TODO: avoid copy by adding a lifetime limit to get_str Ok(self.id2str.get(&id).cloned()) } + + fn get_str_id(&self, value: &str) -> Result, Infallible> { + let id = StrHash::new(value); + Ok(if self.id2str.contains_key(&id) { + Some(id) + } else { + None + }) + } } impl StrContainer for MemoryStoreIndexes { @@ -941,7 +962,7 @@ impl MemoryPreparedQuery { pub struct MemoryTransaction<'a> { store: &'a MemoryStore, ops: Vec, - strings: Vec<(StrHash, String)>, + strings: TrivialHashMap, } enum TransactionOp { @@ -1022,8 +1043,9 @@ impl<'a> MemoryTransaction<'a> { /// Removes a quad from this store during the transaction. pub fn remove(&mut self, quad: &Quad) { - let quad = quad.into(); - self.remove_encoded(&quad).unwrap_infallible(); + if let Some(quad) = self.get_encoded_quad(quad).unwrap_infallible() { + self.remove_encoded(&quad).unwrap_infallible(); + } } fn commit(self) { @@ -1038,14 +1060,34 @@ impl<'a> MemoryTransaction<'a> { } } +impl StrLookup for MemoryTransaction<'_> { + fn get_str(&self, id: StrHash) -> Result, Infallible> { + if let Some(str) = self.strings.get(&id) { + Ok(Some(str.clone())) + } else { + self.store.get_str(id) + } + } + + fn get_str_id(&self, value: &str) -> Result, Infallible> { + let id = StrHash::new(value); + if self.strings.contains_key(&id) { + Ok(Some(id)) + } else { + self.store.get_str_id(value) + } + } +} + impl WithStoreError for MemoryTransaction<'_> { type Error = Infallible; + type StrId = StrHash; } impl StrContainer for MemoryTransaction<'_> { fn insert_str(&mut self, value: &str) -> Result { let key = StrHash::new(value); - self.strings.push((key, value.to_owned())); + self.strings.insert(key, value.to_owned()); Ok(key) } } diff --git a/lib/src/store/mod.rs b/lib/src/store/mod.rs index fb9872a2..7daf5b87 100644 --- a/lib/src/store/mod.rs +++ b/lib/src/store/mod.rs @@ -34,21 +34,21 @@ use std::io::{BufRead, Write}; use std::iter::Iterator; pub(crate) trait ReadableEncodedStore: StrLookup { - type QuadsIter: Iterator> + 'static; + type QuadsIter: Iterator, Self::Error>> + 'static; fn encoded_quads_for_pattern( &self, - subject: Option, - predicate: Option, - object: Option, - graph_name: Option, + subject: Option>, + predicate: Option>, + object: Option>, + graph_name: Option>, ) -> Self::QuadsIter; } pub(crate) trait WritableEncodedStore: StrContainer { - fn insert_encoded(&mut self, quad: &EncodedQuad) -> Result<(), Self::Error>; + fn insert_encoded(&mut self, quad: &EncodedQuad) -> Result<(), Self::Error>; - fn remove_encoded(&mut self, quad: &EncodedQuad) -> Result<(), Self::Error>; + fn remove_encoded(&mut self, quad: &EncodedQuad) -> Result<(), Self::Error>; } fn load_graph( @@ -239,3 +239,61 @@ impl> From> for io::Error { } } } + +type QuadPattern = ( + Option>, + Option>, + Option>, + Option>, +); + +fn get_encoded_quad_pattern( + encoder: &E, + subject: Option<&NamedOrBlankNode>, + predicate: Option<&NamedNode>, + object: Option<&Term>, + graph_name: Option<&GraphName>, +) -> Result>, E::Error> { + Ok(Some(( + if let Some(subject) = transpose( + subject + .map(|t| encoder.get_encoded_named_or_blank_node(t)) + .transpose()?, + ) { + subject + } else { + return Ok(None); + }, + if let Some(predicate) = transpose( + predicate + .map(|t| encoder.get_encoded_named_node(t)) + .transpose()?, + ) { + predicate + } else { + return Ok(None); + }, + if let Some(object) = transpose(object.map(|t| encoder.get_encoded_term(t)).transpose()?) { + object + } else { + return Ok(None); + }, + if let Some(graph_name) = transpose( + graph_name + .map(|t| encoder.get_encoded_graph_name(t)) + .transpose()?, + ) { + graph_name + } else { + return Ok(None); + }, + ))) +} + +fn transpose(o: Option>) -> Option> { + match o { + Some(Some(v)) => Some(Some(v)), + Some(None) => None, + None => Some(None), + } +} diff --git a/lib/src/store/numeric_encoder.rs b/lib/src/store/numeric_encoder.rs index 2f488220..9013ad9b 100644 --- a/lib/src/store/numeric_encoder.rs +++ b/lib/src/store/numeric_encoder.rs @@ -10,13 +10,24 @@ use siphasher::sip128::{Hasher128, SipHasher24}; use std::collections::HashMap; use std::convert::Infallible; use std::error::Error; +use std::fmt::Debug; use std::hash::Hash; use std::hash::Hasher; use std::io::Read; use std::mem::size_of; use std::{fmt, io, str}; -#[derive(Ord, PartialOrd, Eq, PartialEq, Debug, Copy, Clone, Hash)] +pub trait StrId: Eq + Debug + Copy + Hash {} + +pub trait SerializableStrId: StrId { + fn len() -> usize; + + fn from_be_bytes(bytes: &[u8]) -> Self; + + fn push_be_bytes(&self, buffer: &mut Vec); +} + +#[derive(Eq, PartialEq, Debug, Copy, Clone, Hash)] #[repr(transparent)] pub struct StrHash { hash: u128, @@ -44,6 +55,26 @@ impl StrHash { } } +impl StrId for StrHash {} + +impl SerializableStrId for StrHash { + fn len() -> usize { + 16 + } + + fn from_be_bytes(bytes: &[u8]) -> Self { + let mut hash = [0; 16]; + hash.copy_from_slice(bytes); + Self { + hash: u128::from_be_bytes(hash), + } + } + + fn push_be_bytes(&self, buffer: &mut Vec) { + buffer.extend_from_slice(&self.to_be_bytes()) + } +} + const TYPE_DEFAULT_GRAPH_ID: u8 = 0; const TYPE_NAMED_NODE_ID: u8 = 1; const TYPE_INLINE_BLANK_NODE_ID: u8 = 2; @@ -65,28 +96,14 @@ const TYPE_YEAR_MONTH_DURATION_LITERAL: u8 = 17; const TYPE_DAY_TIME_DURATION_LITERAL: u8 = 18; #[derive(Debug, Clone, Copy)] -pub enum EncodedTerm { +pub enum EncodedTerm { DefaultGraph, - NamedNode { - iri_id: StrHash, - }, - InlineBlankNode { - id: u128, - }, - NamedBlankNode { - id_id: StrHash, - }, - StringLiteral { - value_id: StrHash, - }, - LangStringLiteral { - value_id: StrHash, - language_id: StrHash, - }, - TypedLiteral { - value_id: StrHash, - datatype_id: StrHash, - }, + NamedNode { iri_id: I }, + InlineBlankNode { id: u128 }, + NamedBlankNode { id_id: I }, + StringLiteral { value_id: I }, + LangStringLiteral { value_id: I, language_id: I }, + TypedLiteral { value_id: I, datatype_id: I }, BooleanLiteral(bool), FloatLiteral(f32), DoubleLiteral(f64), @@ -100,402 +117,353 @@ pub enum EncodedTerm { DayTimeDurationLiteral(DayTimeDuration), } -impl PartialEq for EncodedTerm { +impl PartialEq for EncodedTerm { fn eq(&self, other: &Self) -> bool { match (self, other) { - (EncodedTerm::DefaultGraph, EncodedTerm::DefaultGraph) => true, - ( - EncodedTerm::NamedNode { iri_id: iri_id_a }, - EncodedTerm::NamedNode { iri_id: iri_id_b }, - ) => iri_id_a == iri_id_b, - ( - EncodedTerm::InlineBlankNode { id: id_a }, - EncodedTerm::InlineBlankNode { id: id_b }, - ) => id_a == id_b, - ( - EncodedTerm::NamedBlankNode { id_id: id_a }, - EncodedTerm::NamedBlankNode { id_id: id_b }, - ) => id_a == id_b, + (Self::DefaultGraph, Self::DefaultGraph) => true, + (Self::NamedNode { iri_id: iri_id_a }, Self::NamedNode { iri_id: iri_id_b }) => { + iri_id_a == iri_id_b + } + (Self::InlineBlankNode { id: id_a }, Self::InlineBlankNode { id: id_b }) => { + id_a == id_b + } + (Self::NamedBlankNode { id_id: id_a }, Self::NamedBlankNode { id_id: id_b }) => { + id_a == id_b + } ( - EncodedTerm::StringLiteral { + Self::StringLiteral { value_id: value_id_a, }, - EncodedTerm::StringLiteral { + Self::StringLiteral { value_id: value_id_b, }, ) => value_id_a == value_id_b, ( - EncodedTerm::LangStringLiteral { + Self::LangStringLiteral { value_id: value_id_a, language_id: language_id_a, }, - EncodedTerm::LangStringLiteral { + Self::LangStringLiteral { value_id: value_id_b, language_id: language_id_b, }, ) => value_id_a == value_id_b && language_id_a == language_id_b, ( - EncodedTerm::TypedLiteral { + Self::TypedLiteral { value_id: value_id_a, datatype_id: datatype_id_a, }, - EncodedTerm::TypedLiteral { + Self::TypedLiteral { value_id: value_id_b, datatype_id: datatype_id_b, }, ) => value_id_a == value_id_b && datatype_id_a == datatype_id_b, - (EncodedTerm::BooleanLiteral(a), EncodedTerm::BooleanLiteral(b)) => a == b, - (EncodedTerm::FloatLiteral(a), EncodedTerm::FloatLiteral(b)) => { + (Self::BooleanLiteral(a), Self::BooleanLiteral(b)) => a == b, + (Self::FloatLiteral(a), Self::FloatLiteral(b)) => { if a.is_nan() { b.is_nan() } else { a == b } } - (EncodedTerm::DoubleLiteral(a), EncodedTerm::DoubleLiteral(b)) => { + (Self::DoubleLiteral(a), Self::DoubleLiteral(b)) => { if a.is_nan() { b.is_nan() } else { a == b } } - (EncodedTerm::IntegerLiteral(a), EncodedTerm::IntegerLiteral(b)) => a == b, - (EncodedTerm::DecimalLiteral(a), EncodedTerm::DecimalLiteral(b)) => a == b, - (EncodedTerm::DateLiteral(a), EncodedTerm::DateLiteral(b)) => a == b, - (EncodedTerm::TimeLiteral(a), EncodedTerm::TimeLiteral(b)) => a == b, - (EncodedTerm::DateTimeLiteral(a), EncodedTerm::DateTimeLiteral(b)) => a == b, - (EncodedTerm::DurationLiteral(a), EncodedTerm::DurationLiteral(b)) => a == b, - ( - EncodedTerm::YearMonthDurationLiteral(a), - EncodedTerm::YearMonthDurationLiteral(b), - ) => a == b, - (EncodedTerm::DayTimeDurationLiteral(a), EncodedTerm::DayTimeDurationLiteral(b)) => { - a == b - } + (Self::IntegerLiteral(a), Self::IntegerLiteral(b)) => a == b, + (Self::DecimalLiteral(a), Self::DecimalLiteral(b)) => a == b, + (Self::DateLiteral(a), Self::DateLiteral(b)) => a == b, + (Self::TimeLiteral(a), Self::TimeLiteral(b)) => a == b, + (Self::DateTimeLiteral(a), Self::DateTimeLiteral(b)) => a == b, + (Self::DurationLiteral(a), Self::DurationLiteral(b)) => a == b, + (Self::YearMonthDurationLiteral(a), Self::YearMonthDurationLiteral(b)) => a == b, + (Self::DayTimeDurationLiteral(a), Self::DayTimeDurationLiteral(b)) => a == b, (_, _) => false, } } } -impl Eq for EncodedTerm {} +impl Eq for EncodedTerm {} -impl Hash for EncodedTerm { +impl Hash for EncodedTerm { fn hash(&self, state: &mut H) { match self { - EncodedTerm::NamedNode { iri_id } => iri_id.hash(state), - EncodedTerm::InlineBlankNode { id } => id.hash(state), - EncodedTerm::NamedBlankNode { id_id } => id_id.hash(state), - EncodedTerm::DefaultGraph => (), - EncodedTerm::StringLiteral { value_id } => value_id.hash(state), - EncodedTerm::LangStringLiteral { + Self::NamedNode { iri_id } => iri_id.hash(state), + Self::InlineBlankNode { id } => id.hash(state), + Self::NamedBlankNode { id_id } => id_id.hash(state), + Self::DefaultGraph => (), + Self::StringLiteral { value_id } => value_id.hash(state), + Self::LangStringLiteral { value_id, language_id, } => { value_id.hash(state); language_id.hash(state); } - EncodedTerm::TypedLiteral { + Self::TypedLiteral { value_id, datatype_id, } => { value_id.hash(state); datatype_id.hash(state); } - EncodedTerm::BooleanLiteral(value) => value.hash(state), - EncodedTerm::FloatLiteral(value) => state.write(&value.to_ne_bytes()), - EncodedTerm::DoubleLiteral(value) => state.write(&value.to_ne_bytes()), - EncodedTerm::IntegerLiteral(value) => value.hash(state), - EncodedTerm::DecimalLiteral(value) => value.hash(state), - EncodedTerm::DateLiteral(value) => value.hash(state), - EncodedTerm::TimeLiteral(value) => value.hash(state), - EncodedTerm::DateTimeLiteral(value) => value.hash(state), - EncodedTerm::DurationLiteral(value) => value.hash(state), - EncodedTerm::YearMonthDurationLiteral(value) => value.hash(state), - EncodedTerm::DayTimeDurationLiteral(value) => value.hash(state), + Self::BooleanLiteral(value) => value.hash(state), + Self::FloatLiteral(value) => state.write(&value.to_ne_bytes()), + Self::DoubleLiteral(value) => state.write(&value.to_ne_bytes()), + Self::IntegerLiteral(value) => value.hash(state), + Self::DecimalLiteral(value) => value.hash(state), + Self::DateLiteral(value) => value.hash(state), + Self::TimeLiteral(value) => value.hash(state), + Self::DateTimeLiteral(value) => value.hash(state), + Self::DurationLiteral(value) => value.hash(state), + Self::YearMonthDurationLiteral(value) => value.hash(state), + Self::DayTimeDurationLiteral(value) => value.hash(state), } } } -impl EncodedTerm { +impl EncodedTerm { pub fn is_named_node(&self) -> bool { match self { - EncodedTerm::NamedNode { .. } => true, + Self::NamedNode { .. } => true, _ => false, } } pub fn is_blank_node(&self) -> bool { match self { - EncodedTerm::InlineBlankNode { .. } | EncodedTerm::NamedBlankNode { .. } => true, + Self::InlineBlankNode { .. } | Self::NamedBlankNode { .. } => true, _ => false, } } pub fn is_literal(&self) -> bool { match self { - EncodedTerm::StringLiteral { .. } - | EncodedTerm::LangStringLiteral { .. } - | EncodedTerm::TypedLiteral { .. } - | EncodedTerm::BooleanLiteral(_) - | EncodedTerm::FloatLiteral(_) - | EncodedTerm::DoubleLiteral(_) - | EncodedTerm::IntegerLiteral(_) - | EncodedTerm::DecimalLiteral(_) - | EncodedTerm::DateLiteral(_) - | EncodedTerm::TimeLiteral(_) - | EncodedTerm::DateTimeLiteral(_) - | EncodedTerm::DurationLiteral(_) - | EncodedTerm::YearMonthDurationLiteral(_) - | EncodedTerm::DayTimeDurationLiteral(_) => true, + Self::StringLiteral { .. } + | Self::LangStringLiteral { .. } + | Self::TypedLiteral { .. } + | Self::BooleanLiteral(_) + | Self::FloatLiteral(_) + | Self::DoubleLiteral(_) + | Self::IntegerLiteral(_) + | Self::DecimalLiteral(_) + | Self::DateLiteral(_) + | Self::TimeLiteral(_) + | Self::DateTimeLiteral(_) + | Self::DurationLiteral(_) + | Self::YearMonthDurationLiteral(_) + | Self::DayTimeDurationLiteral(_) => true, _ => false, } } fn type_id(&self) -> u8 { match self { - EncodedTerm::DefaultGraph { .. } => TYPE_DEFAULT_GRAPH_ID, - EncodedTerm::NamedNode { .. } => TYPE_NAMED_NODE_ID, - EncodedTerm::InlineBlankNode { .. } => TYPE_INLINE_BLANK_NODE_ID, - EncodedTerm::NamedBlankNode { .. } => TYPE_NAMED_BLANK_NODE_ID, - EncodedTerm::StringLiteral { .. } => TYPE_STRING_LITERAL, - EncodedTerm::LangStringLiteral { .. } => TYPE_LANG_STRING_LITERAL_ID, - EncodedTerm::TypedLiteral { .. } => TYPE_TYPED_LITERAL_ID, - EncodedTerm::BooleanLiteral(true) => TYPE_BOOLEAN_LITERAL_TRUE, - EncodedTerm::BooleanLiteral(false) => TYPE_BOOLEAN_LITERAL_FALSE, - EncodedTerm::FloatLiteral(_) => TYPE_FLOAT_LITERAL, - EncodedTerm::DoubleLiteral(_) => TYPE_DOUBLE_LITERAL, - EncodedTerm::IntegerLiteral(_) => TYPE_INTEGER_LITERAL, - EncodedTerm::DecimalLiteral(_) => TYPE_DECIMAL_LITERAL, - EncodedTerm::DateLiteral(_) => TYPE_DATE_LITERAL, - EncodedTerm::TimeLiteral(_) => TYPE_TIME_LITERAL, - EncodedTerm::DateTimeLiteral(_) => TYPE_DATE_TIME_LITERAL, - EncodedTerm::DurationLiteral(_) => TYPE_DURATION_LITERAL, - EncodedTerm::YearMonthDurationLiteral(_) => TYPE_YEAR_MONTH_DURATION_LITERAL, - EncodedTerm::DayTimeDurationLiteral(_) => TYPE_DAY_TIME_DURATION_LITERAL, + Self::DefaultGraph { .. } => TYPE_DEFAULT_GRAPH_ID, + Self::NamedNode { .. } => TYPE_NAMED_NODE_ID, + Self::InlineBlankNode { .. } => TYPE_INLINE_BLANK_NODE_ID, + Self::NamedBlankNode { .. } => TYPE_NAMED_BLANK_NODE_ID, + Self::StringLiteral { .. } => TYPE_STRING_LITERAL, + Self::LangStringLiteral { .. } => TYPE_LANG_STRING_LITERAL_ID, + Self::TypedLiteral { .. } => TYPE_TYPED_LITERAL_ID, + Self::BooleanLiteral(true) => TYPE_BOOLEAN_LITERAL_TRUE, + Self::BooleanLiteral(false) => TYPE_BOOLEAN_LITERAL_FALSE, + Self::FloatLiteral(_) => TYPE_FLOAT_LITERAL, + Self::DoubleLiteral(_) => TYPE_DOUBLE_LITERAL, + Self::IntegerLiteral(_) => TYPE_INTEGER_LITERAL, + Self::DecimalLiteral(_) => TYPE_DECIMAL_LITERAL, + Self::DateLiteral(_) => TYPE_DATE_LITERAL, + Self::TimeLiteral(_) => TYPE_TIME_LITERAL, + Self::DateTimeLiteral(_) => TYPE_DATE_TIME_LITERAL, + Self::DurationLiteral(_) => TYPE_DURATION_LITERAL, + Self::YearMonthDurationLiteral(_) => TYPE_YEAR_MONTH_DURATION_LITERAL, + Self::DayTimeDurationLiteral(_) => TYPE_DAY_TIME_DURATION_LITERAL, } } + + pub fn map_id(self, mapping: impl Fn(I) -> J) -> EncodedTerm { + match self { + Self::DefaultGraph { .. } => EncodedTerm::DefaultGraph, + Self::NamedNode { iri_id } => EncodedTerm::NamedNode { + iri_id: mapping(iri_id), + }, + Self::InlineBlankNode { id } => EncodedTerm::InlineBlankNode { id }, + Self::NamedBlankNode { id_id } => EncodedTerm::NamedBlankNode { + id_id: mapping(id_id), + }, + Self::StringLiteral { value_id } => EncodedTerm::StringLiteral { + value_id: mapping(value_id), + }, + Self::LangStringLiteral { + value_id, + language_id, + } => EncodedTerm::LangStringLiteral { + value_id: mapping(value_id), + language_id: mapping(language_id), + }, + Self::TypedLiteral { + value_id, + datatype_id, + } => EncodedTerm::TypedLiteral { + value_id: mapping(value_id), + datatype_id: mapping(datatype_id), + }, + Self::BooleanLiteral(value) => EncodedTerm::BooleanLiteral(value), + Self::FloatLiteral(value) => EncodedTerm::FloatLiteral(value), + Self::DoubleLiteral(value) => EncodedTerm::DoubleLiteral(value), + Self::IntegerLiteral(value) => EncodedTerm::IntegerLiteral(value), + Self::DecimalLiteral(value) => EncodedTerm::DecimalLiteral(value), + Self::DateLiteral(value) => EncodedTerm::DateLiteral(value), + Self::TimeLiteral(value) => EncodedTerm::TimeLiteral(value), + Self::DateTimeLiteral(value) => EncodedTerm::DateTimeLiteral(value), + Self::DurationLiteral(value) => EncodedTerm::DurationLiteral(value), + Self::YearMonthDurationLiteral(value) => EncodedTerm::YearMonthDurationLiteral(value), + Self::DayTimeDurationLiteral(value) => EncodedTerm::DayTimeDurationLiteral(value), + } + } + + pub fn try_map_id(self, mapping: impl Fn(I) -> Option) -> Option> { + Some(match self { + Self::DefaultGraph { .. } => EncodedTerm::DefaultGraph, + Self::NamedNode { iri_id } => EncodedTerm::NamedNode { + iri_id: mapping(iri_id)?, + }, + Self::InlineBlankNode { id } => EncodedTerm::InlineBlankNode { id }, + Self::NamedBlankNode { id_id } => EncodedTerm::NamedBlankNode { + id_id: mapping(id_id)?, + }, + Self::StringLiteral { value_id } => EncodedTerm::StringLiteral { + value_id: mapping(value_id)?, + }, + Self::LangStringLiteral { + value_id, + language_id, + } => EncodedTerm::LangStringLiteral { + value_id: mapping(value_id)?, + language_id: mapping(language_id)?, + }, + Self::TypedLiteral { + value_id, + datatype_id, + } => EncodedTerm::TypedLiteral { + value_id: mapping(value_id)?, + datatype_id: mapping(datatype_id)?, + }, + Self::BooleanLiteral(value) => EncodedTerm::BooleanLiteral(value), + Self::FloatLiteral(value) => EncodedTerm::FloatLiteral(value), + Self::DoubleLiteral(value) => EncodedTerm::DoubleLiteral(value), + Self::IntegerLiteral(value) => EncodedTerm::IntegerLiteral(value), + Self::DecimalLiteral(value) => EncodedTerm::DecimalLiteral(value), + Self::DateLiteral(value) => EncodedTerm::DateLiteral(value), + Self::TimeLiteral(value) => EncodedTerm::TimeLiteral(value), + Self::DateTimeLiteral(value) => EncodedTerm::DateTimeLiteral(value), + Self::DurationLiteral(value) => EncodedTerm::DurationLiteral(value), + Self::YearMonthDurationLiteral(value) => EncodedTerm::YearMonthDurationLiteral(value), + Self::DayTimeDurationLiteral(value) => EncodedTerm::DayTimeDurationLiteral(value), + }) + } } -impl From for EncodedTerm { +impl From for EncodedTerm { fn from(value: bool) -> Self { - EncodedTerm::BooleanLiteral(value) + Self::BooleanLiteral(value) } } -impl From for EncodedTerm { +impl From for EncodedTerm { fn from(value: i64) -> Self { - EncodedTerm::IntegerLiteral(value) + Self::IntegerLiteral(value) } } -impl From for EncodedTerm { +impl From for EncodedTerm { fn from(value: i32) -> Self { - EncodedTerm::IntegerLiteral(value.into()) + Self::IntegerLiteral(value.into()) } } -impl From for EncodedTerm { +impl From for EncodedTerm { fn from(value: u32) -> Self { - EncodedTerm::IntegerLiteral(value.into()) + Self::IntegerLiteral(value.into()) } } -impl From for EncodedTerm { +impl From for EncodedTerm { fn from(value: u8) -> Self { - EncodedTerm::IntegerLiteral(value.into()) + Self::IntegerLiteral(value.into()) } } -impl From for EncodedTerm { +impl From for EncodedTerm { fn from(value: f32) -> Self { - EncodedTerm::FloatLiteral(value) + Self::FloatLiteral(value) } } -impl From for EncodedTerm { +impl From for EncodedTerm { fn from(value: f64) -> Self { - EncodedTerm::DoubleLiteral(value) + Self::DoubleLiteral(value) } } -impl From for EncodedTerm { +impl From for EncodedTerm { fn from(value: Decimal) -> Self { - EncodedTerm::DecimalLiteral(value) + Self::DecimalLiteral(value) } } -impl From for EncodedTerm { +impl From for EncodedTerm { fn from(value: Date) -> Self { - EncodedTerm::DateLiteral(value) + Self::DateLiteral(value) } } -impl From