diff --git a/lib/src/sparql/eval.rs b/lib/src/sparql/eval.rs index b7a2435f..0ebb0211 100644 --- a/lib/src/sparql/eval.rs +++ b/lib/src/sparql/eval.rs @@ -1,6 +1,5 @@ use crate::model::vocab::{rdf, xsd}; -use crate::model::{BlankNode, LiteralRef, NamedNodeRef}; -use crate::model::{NamedNode, Term, Triple}; +use crate::model::{BlankNode, LiteralRef, NamedNode, NamedNodeRef, Term, Triple}; use crate::sparql::algebra::{Query, QueryDataset}; use crate::sparql::dataset::DatasetView; use crate::sparql::error::EvaluationError; @@ -110,8 +109,8 @@ impl SimpleEvaluator { node: &PlanNode, ) -> Rc EncodedTuplesIterator> { match node { - PlanNode::StaticBindings { tuples } => { - let tuples = tuples.clone(); + PlanNode::StaticBindings { encoded_tuples, .. } => { + let tuples = encoded_tuples.clone(); Rc::new(move |from| { Box::new( tuples @@ -162,10 +161,10 @@ impl SimpleEvaluator { object, graph_name, } => { - let subject = subject.clone(); - let predicate = predicate.clone(); - let object = object.clone(); - let graph_name = graph_name.clone(); + let subject = TupleSelector::from(subject); + let predicate = TupleSelector::from(predicate); + let object = TupleSelector::from(object); + let graph_name = TupleSelector::from(graph_name); let dataset = self.dataset.clone(); Rc::new(move |from| { let iter = dataset.encoded_quads_for_pattern( @@ -197,10 +196,10 @@ impl SimpleEvaluator { object, graph_name, } => { - let subject = subject.clone(); + let subject = TupleSelector::from(subject); let path = path.clone(); - let object = object.clone(); - let graph_name = graph_name.clone(); + let object = TupleSelector::from(object); + let graph_name = TupleSelector::from(graph_name); let dataset = self.dataset.clone(); Rc::new(move |from| { let input_subject = get_pattern_value(&subject, &from); @@ -530,11 +529,11 @@ impl SimpleEvaluator { } PlanNode::Extend { child, - position, + variable, expression, } => { let child = self.plan_evaluator(child); - let position = *position; + let position = variable.encoded; let expression = self.expression_evaluator(expression); Rc::new(move |from| { let expression = expression.clone(); @@ -634,8 +633,8 @@ impl SimpleEvaluator { let mapping = mapping.clone(); let mut input_tuple = EncodedTuple::with_capacity(mapping.len()); for (input_key, output_key) in mapping.iter() { - if let Some(value) = from.get(*output_key) { - input_tuple.set(*input_key, value.clone()); + if let Some(value) = from.get(output_key.encoded) { + input_tuple.set(input_key.encoded, value.clone()); } } Box::new(child(input_tuple).filter_map(move |tuple| { @@ -643,14 +642,15 @@ impl SimpleEvaluator { Ok(tuple) => { let mut output_tuple = from.clone(); for (input_key, output_key) in mapping.iter() { - if let Some(value) = tuple.get(*input_key) { - if let Some(existing_value) = output_tuple.get(*output_key) + if let Some(value) = tuple.get(input_key.encoded) { + if let Some(existing_value) = + output_tuple.get(output_key.encoded) { if existing_value != value { return None; // Conflict } } else { - output_tuple.set(*output_key, value.clone()); + output_tuple.set(output_key.encoded, value.clone()); } } } @@ -688,7 +688,7 @@ impl SimpleEvaluator { }) .collect(); let accumulator_variables: Vec<_> = - aggregates.iter().map(|(_, var)| *var).collect(); + aggregates.iter().map(|(_, var)| var.encoded).collect(); Rc::new(move |from| { let tuple_size = from.capacity(); let key_variables = key_variables.clone(); @@ -707,7 +707,7 @@ impl SimpleEvaluator { //TODO avoid copy for key? let key = key_variables .iter() - .map(|v| tuple.get(*v).cloned()) + .map(|v| tuple.get(v.encoded).cloned()) .collect(); let key_accumulators = @@ -739,7 +739,7 @@ impl SimpleEvaluator { let mut result = EncodedTuple::with_capacity(tuple_size); for (variable, value) in key_variables.iter().zip(key) { if let Some(value) = value { - result.set(*variable, value); + result.set(variable.encoded, value); } } for (accumulator, variable) in @@ -765,7 +765,7 @@ impl SimpleEvaluator { variables: Rc>, from: &EncodedTuple, ) -> Result { - let service_name = get_pattern_value(service_name, from) + let service_name = get_pattern_value(&service_name.into(), from) .ok_or_else(|| EvaluationError::msg("The SERVICE name is not bound"))?; if let QueryResults::Solutions(iter) = self.service_handler.handle( self.dataset.decode_named_node(&service_name)?, @@ -851,12 +851,16 @@ impl SimpleEvaluator { expression: &PlanExpression, ) -> Rc Option> { match expression { - PlanExpression::Constant(t) => { - let t = t.clone(); + PlanExpression::NamedNode(t) => { + let t = t.encoded.clone(); + Rc::new(move |_| Some(t.clone())) + } + PlanExpression::Literal(t) => { + let t = t.encoded.clone(); Rc::new(move |_| Some(t.clone())) } PlanExpression::Variable(v) => { - let v = *v; + let v = v.encoded; Rc::new(move |tuple| tuple.get(v).cloned()) } PlanExpression::Exists(plan) => { @@ -1158,7 +1162,7 @@ impl SimpleEvaluator { Rc::new(move |tuple| datatype(&dataset, &e(tuple)?)) } PlanExpression::Bound(v) => { - let v = *v; + let v = v.encoded; Rc::new(move |tuple| Some(tuple.contains(v).into())) } PlanExpression::Iri(e) => { @@ -2924,14 +2928,32 @@ impl NumericBinaryOperands { } } -fn get_pattern_value<'a>( - selector: &'a PatternValue, - tuple: &'a EncodedTuple, -) -> Option { +#[derive(Clone)] +enum TupleSelector { + Constant(EncodedTerm), + Variable(usize), + TriplePattern(Rc), +} + +impl From<&PatternValue> for TupleSelector { + fn from(value: &PatternValue) -> Self { + match value { + PatternValue::Constant(c) => Self::Constant(c.encoded.clone()), + PatternValue::Variable(v) => Self::Variable(v.encoded), + PatternValue::TriplePattern(p) => Self::TriplePattern(Rc::new(TripleTupleSelector { + subject: (&p.subject).into(), + predicate: (&p.predicate).into(), + object: (&p.object).into(), + })), + } + } +} + +fn get_pattern_value(selector: &TupleSelector, tuple: &EncodedTuple) -> Option { match selector { - PatternValue::Constant(term) => Some(term.clone()), - PatternValue::Variable(v) => tuple.get(*v).cloned(), - PatternValue::Triple(triple) => Some( + TupleSelector::Constant(c) => Some(c.clone()), + TupleSelector::Variable(v) => tuple.get(*v).cloned(), + TupleSelector::TriplePattern(triple) => Some( EncodedTriple { subject: get_pattern_value(&triple.subject, tuple)?, predicate: get_pattern_value(&triple.predicate, tuple)?, @@ -2942,20 +2964,26 @@ fn get_pattern_value<'a>( } } +struct TripleTupleSelector { + subject: TupleSelector, + predicate: TupleSelector, + object: TupleSelector, +} + fn put_pattern_value( - selector: &PatternValue, + selector: &TupleSelector, value: EncodedTerm, tuple: &mut EncodedTuple, ) -> Option<()> { match selector { - PatternValue::Constant(c) => { + TupleSelector::Constant(c) => { if *c == value { Some(()) } else { None } } - PatternValue::Variable(v) => { + TupleSelector::Variable(v) => { if let Some(old) = tuple.get(*v) { if value == *old { Some(()) @@ -2967,7 +2995,7 @@ fn put_pattern_value( Some(()) } } - PatternValue::Triple(triple) => { + TupleSelector::TriplePattern(triple) => { if let EncodedTerm::Triple(value) = value { put_pattern_value(&triple.subject, value.subject.clone(), tuple)?; put_pattern_value(&triple.predicate, value.predicate.clone(), tuple)?; @@ -3022,7 +3050,12 @@ impl PathEvaluator { Ok(match path { PlanPropertyPath::Path(p) => self .dataset - .encoded_quads_for_pattern(Some(start), Some(p), Some(end), Some(graph_name)) + .encoded_quads_for_pattern( + Some(start), + Some(&p.encoded), + Some(end), + Some(graph_name), + ) .next() .transpose()? .is_some(), @@ -3074,7 +3107,7 @@ impl PathEvaluator { .encoded_quads_for_pattern(Some(start), None, Some(end), Some(graph_name)) .find_map(move |t| match t { Ok(t) => { - if ps.contains(&t.predicate) { + if ps.iter().any(|p| p.encoded == t.predicate) { None } else { Some(Ok(())) @@ -3096,7 +3129,7 @@ impl PathEvaluator { match path { PlanPropertyPath::Path(p) => Box::new( self.dataset - .encoded_quads_for_pattern(Some(start), Some(p), Some(end), None) + .encoded_quads_for_pattern(Some(start), Some(&p.encoded), Some(end), None) .map(|t| Ok(t?.graph_name)), ), PlanPropertyPath::Reverse(p) => self.eval_closed_in_unknown_graph(p, end, start), @@ -3178,7 +3211,7 @@ impl PathEvaluator { .encoded_quads_for_pattern(Some(start), None, Some(end), None) .filter_map(move |t| match t { Ok(t) => { - if ps.contains(&t.predicate) { + if ps.iter().any(|p| p.encoded == t.predicate) { None } else { Some(Ok(t.graph_name)) @@ -3200,7 +3233,12 @@ impl PathEvaluator { match path { PlanPropertyPath::Path(p) => Box::new( self.dataset - .encoded_quads_for_pattern(Some(start), Some(p), None, Some(graph_name)) + .encoded_quads_for_pattern( + Some(start), + Some(&p.encoded), + None, + Some(graph_name), + ) .map(|t| Ok(t?.object)), ), PlanPropertyPath::Reverse(p) => self.eval_to_in_graph(p, start, graph_name), @@ -3253,7 +3291,7 @@ impl PathEvaluator { .encoded_quads_for_pattern(Some(start), None, None, Some(graph_name)) .filter_map(move |t| match t { Ok(t) => { - if ps.contains(&t.predicate) { + if ps.iter().any(|p| p.encoded == t.predicate) { None } else { Some(Ok(t.object)) @@ -3274,7 +3312,7 @@ impl PathEvaluator { match path { PlanPropertyPath::Path(p) => Box::new( self.dataset - .encoded_quads_for_pattern(Some(start), Some(p), None, None) + .encoded_quads_for_pattern(Some(start), Some(&p.encoded), None, None) .map(|t| { let t = t?; Ok((t.object, t.graph_name)) @@ -3340,7 +3378,7 @@ impl PathEvaluator { .encoded_quads_for_pattern(Some(start), None, None, None) .filter_map(move |t| match t { Ok(t) => { - if ps.contains(&t.predicate) { + if ps.iter().any(|p| p.encoded == t.predicate) { None } else { Some(Ok((t.object, t.graph_name))) @@ -3362,7 +3400,7 @@ impl PathEvaluator { match path { PlanPropertyPath::Path(p) => Box::new( self.dataset - .encoded_quads_for_pattern(None, Some(p), Some(end), Some(graph_name)) + .encoded_quads_for_pattern(None, Some(&p.encoded), Some(end), Some(graph_name)) .map(|t| Ok(t?.subject)), ), PlanPropertyPath::Reverse(p) => self.eval_from_in_graph(p, end, graph_name), @@ -3414,7 +3452,7 @@ impl PathEvaluator { .encoded_quads_for_pattern(None, None, Some(end), Some(graph_name)) .filter_map(move |t| match t { Ok(t) => { - if ps.contains(&t.predicate) { + if ps.iter().any(|p| p.encoded == t.predicate) { None } else { Some(Ok(t.subject)) @@ -3434,7 +3472,7 @@ impl PathEvaluator { match path { PlanPropertyPath::Path(p) => Box::new( self.dataset - .encoded_quads_for_pattern(None, Some(p), Some(end), None) + .encoded_quads_for_pattern(None, Some(&p.encoded), Some(end), None) .map(|t| { let t = t?; Ok((t.subject, t.graph_name)) @@ -3500,7 +3538,7 @@ impl PathEvaluator { .encoded_quads_for_pattern(Some(end), None, None, None) .filter_map(move |t| match t { Ok(t) => { - if ps.contains(&t.predicate) { + if ps.iter().any(|p| p.encoded == t.predicate) { None } else { Some(Ok((t.subject, t.graph_name))) @@ -3521,7 +3559,7 @@ impl PathEvaluator { match path { PlanPropertyPath::Path(p) => Box::new( self.dataset - .encoded_quads_for_pattern(None, Some(p), None, Some(graph_name)) + .encoded_quads_for_pattern(None, Some(&p.encoded), None, Some(graph_name)) .map(|t| t.map(|t| (t.subject, t.object))), ), PlanPropertyPath::Reverse(p) => Box::new( @@ -3578,7 +3616,7 @@ impl PathEvaluator { .encoded_quads_for_pattern(None, None, None, Some(graph_name)) .filter_map(move |t| match t { Ok(t) => { - if ps.contains(&t.predicate) { + if ps.iter().any(|p| p.encoded == t.predicate) { None } else { Some(Ok((t.subject, t.object))) @@ -3599,7 +3637,7 @@ impl PathEvaluator { match path { PlanPropertyPath::Path(p) => Box::new( self.dataset - .encoded_quads_for_pattern(None, Some(p), None, None) + .encoded_quads_for_pattern(None, Some(&p.encoded), None, None) .map(|t| t.map(|t| (t.subject, t.object, t.graph_name))), ), PlanPropertyPath::Reverse(p) => Box::new( @@ -3653,7 +3691,7 @@ impl PathEvaluator { .encoded_quads_for_pattern(None, None, None, None) .filter_map(move |t| match t { Ok(t) => { - if ps.contains(&t.predicate) { + if ps.iter().any(|p| p.encoded == t.predicate) { None } else { Some(Ok((t.subject, t.object, t.graph_name))) @@ -4081,13 +4119,13 @@ fn get_triple_template_value<'a>( bnodes: &'a mut Vec, ) -> Option { match selector { - TripleTemplateValue::Constant(term) => Some(term.clone()), - TripleTemplateValue::Variable(v) => tuple.get(*v).cloned(), - TripleTemplateValue::BlankNode(id) => { - if *id >= bnodes.len() { - bnodes.resize_with(*id + 1, new_bnode) + TripleTemplateValue::Constant(term) => Some(term.encoded.clone()), + TripleTemplateValue::Variable(v) => tuple.get(v.encoded).cloned(), + TripleTemplateValue::BlankNode(bnode) => { + if bnode.encoded >= bnodes.len() { + bnodes.resize_with(bnode.encoded + 1, new_bnode) } - Some(bnodes[*id].clone()) + Some(bnodes[bnode.encoded].clone()) } TripleTemplateValue::Triple(triple) => Some( EncodedTriple { diff --git a/lib/src/sparql/plan.rs b/lib/src/sparql/plan.rs index fdb0922e..0671f473 100644 --- a/lib/src/sparql/plan.rs +++ b/lib/src/sparql/plan.rs @@ -1,8 +1,9 @@ -use crate::model::NamedNode; +use crate::model::{BlankNode, Literal, NamedNode, Term, Triple}; +use crate::sparql::Variable; use crate::storage::numeric_encoder::EncodedTerm; -use oxrdf::Variable; use regex::Regex; use spargebra::algebra::GraphPattern; +use spargebra::term::GroundTerm; use std::cmp::max; use std::collections::btree_map::Entry; use std::collections::{BTreeMap, BTreeSet}; @@ -11,7 +12,9 @@ use std::rc::Rc; #[derive(Debug, Clone)] pub enum PlanNode { StaticBindings { - tuples: Vec, + encoded_tuples: Vec, + variables: Vec, + plain_bindings: Vec>>, }, Service { service_name: PatternValue, @@ -68,7 +71,7 @@ pub enum PlanNode { }, Extend { child: Box, - position: usize, + variable: PlanVariable, expression: Box, }, Sort { @@ -92,13 +95,13 @@ pub enum PlanNode { }, Project { child: Box, - mapping: Rc>, // pairs of (variable key in child, variable key in output) + mapping: Rc>, // pairs of (variable key in child, variable key in output) }, Aggregate { // By definition the group by key are the range 0..key_mapping.len() child: Box, - key_variables: Rc>, - aggregates: Rc>, + key_variables: Rc>, + aggregates: Rc>, }, } @@ -114,8 +117,8 @@ impl PlanNode { pub fn lookup_used_variables(&self, callback: &mut impl FnMut(usize)) { match self { - Self::StaticBindings { tuples } => { - for tuple in tuples { + Self::StaticBindings { encoded_tuples, .. } => { + for tuple in encoded_tuples { for (key, value) in tuple.iter().enumerate() { if value.is_some() { callback(key); @@ -130,16 +133,16 @@ impl PlanNode { graph_name, } => { if let PatternValue::Variable(var) = subject { - callback(*var); + callback(var.encoded); } if let PatternValue::Variable(var) = predicate { - callback(*var); + callback(var.encoded); } if let PatternValue::Variable(var) = object { - callback(*var); + callback(var.encoded); } if let PatternValue::Variable(var) = graph_name { - callback(*var); + callback(var.encoded); } } Self::PathPattern { @@ -149,13 +152,13 @@ impl PlanNode { .. } => { if let PatternValue::Variable(var) = subject { - callback(*var); + callback(var.encoded); } if let PatternValue::Variable(var) = object { - callback(*var); + callback(var.encoded); } if let PatternValue::Variable(var) = graph_name { - callback(*var); + callback(var.encoded); } } Self::Filter { child, expression } => { @@ -185,10 +188,10 @@ impl PlanNode { } Self::Extend { child, - position, + variable, expression, } => { - callback(*position); + callback(variable.encoded); expression.lookup_used_variables(callback); child.lookup_used_variables(callback); } @@ -203,15 +206,15 @@ impl PlanNode { .. } => { if let PatternValue::Variable(v) = service_name { - callback(*v); + callback(v.encoded); } child.lookup_used_variables(callback); } Self::Project { mapping, child } => { let child_bound = child.used_variables(); for (child_i, output_i) in mapping.iter() { - if child_bound.contains(child_i) { - callback(*output_i); + if child_bound.contains(&child_i.encoded) { + callback(output_i.encoded); } } } @@ -221,10 +224,10 @@ impl PlanNode { .. } => { for var in key_variables.iter() { - callback(*var); + callback(var.encoded); } for (_, var) in aggregates.iter() { - callback(*var); + callback(var.encoded); } } } @@ -243,10 +246,10 @@ impl PlanNode { pub fn lookup_always_bound_variables(&self, callback: &mut impl FnMut(usize)) { match self { - Self::StaticBindings { tuples } => { + Self::StaticBindings { encoded_tuples, .. } => { let mut variables = BTreeMap::default(); // value true iff always bound - let max_tuple_length = tuples.iter().map(|t| t.capacity()).fold(0, max); - for tuple in tuples { + let max_tuple_length = encoded_tuples.iter().map(|t| t.capacity()).fold(0, max); + for tuple in encoded_tuples { for key in 0..max_tuple_length { match variables.entry(key) { Entry::Vacant(e) => { @@ -273,16 +276,16 @@ impl PlanNode { graph_name, } => { if let PatternValue::Variable(var) = subject { - callback(*var); + callback(var.encoded); } if let PatternValue::Variable(var) = predicate { - callback(*var); + callback(var.encoded); } if let PatternValue::Variable(var) = object { - callback(*var); + callback(var.encoded); } if let PatternValue::Variable(var) = graph_name { - callback(*var); + callback(var.encoded); } } Self::PathPattern { @@ -292,13 +295,13 @@ impl PlanNode { .. } => { if let PatternValue::Variable(var) = subject { - callback(*var); + callback(var.encoded); } if let PatternValue::Variable(var) = object { - callback(*var); + callback(var.encoded); } if let PatternValue::Variable(var) = graph_name { - callback(*var); + callback(var.encoded); } } Self::Filter { child, .. } => { @@ -327,12 +330,15 @@ impl PlanNode { } Self::Extend { child, - position, + variable, expression, } => { - if matches!(expression.as_ref(), PlanExpression::Constant(_)) { + if matches!( + expression.as_ref(), + PlanExpression::NamedNode(_) | PlanExpression::Literal(_) + ) { // TODO: more cases? - callback(*position); + callback(variable.encoded); } child.lookup_always_bound_variables(callback); } @@ -351,8 +357,8 @@ impl PlanNode { Self::Project { mapping, child } => { let child_bound = child.always_bound_variables(); for (child_i, output_i) in mapping.iter() { - if child_bound.contains(child_i) { - callback(*output_i); + if child_bound.contains(&child_i.encoded) { + callback(output_i.encoded); } } } @@ -373,11 +379,25 @@ impl PlanNode { } } +#[derive(Debug, Clone)] +pub struct PlanTerm { + pub encoded: EncodedTerm, + pub plain: T, +} + #[derive(Debug, Clone)] pub enum PatternValue { - Constant(EncodedTerm), - Variable(usize), - Triple(Box), + Constant(PlanTerm), + Variable(PlanVariable), + TriplePattern(Box), +} + +#[derive(Debug, Clone)] +pub enum PatternValueConstant { + NamedNode(NamedNode), + Literal(Literal), + Triple(Box), + DefaultGraph, } #[derive(Debug, Clone)] @@ -387,10 +407,17 @@ pub struct TriplePatternValue { pub object: PatternValue, } +#[derive(Debug, Clone)] +pub struct PlanVariable

{ + pub encoded: usize, + pub plain: P, +} + #[derive(Debug, Clone)] pub enum PlanExpression { - Constant(EncodedTerm), - Variable(usize), + NamedNode(PlanTerm), + Literal(PlanTerm), + Variable(PlanVariable), Exists(Rc), Or(Box, Box), And(Box, Box), @@ -410,7 +437,7 @@ pub enum PlanExpression { Lang(Box), LangMatches(Box, Box), Datatype(Box), - Bound(usize), + Bound(PlanVariable), Iri(Box), BNode(Option>), Rand, @@ -483,9 +510,10 @@ impl PlanExpression { pub fn lookup_used_variables(&self, callback: &mut impl FnMut(usize)) { match self { Self::Variable(v) | Self::Bound(v) => { - callback(*v); + callback(v.encoded); } - Self::Constant(_) + Self::NamedNode(_) + | Self::Literal(_) | Self::Rand | Self::Now | Self::Uuid @@ -615,14 +643,14 @@ pub enum PlanAggregationFunction { #[derive(Debug, Clone)] pub enum PlanPropertyPath { - Path(EncodedTerm), + Path(PlanTerm), Reverse(Rc), Sequence(Rc, Rc), Alternative(Rc, Rc), ZeroOrMore(Rc), OneOrMore(Rc), ZeroOrOne(Rc), - NegatedPropertySet(Rc>), + NegatedPropertySet(Rc>>), } #[derive(Debug, Clone)] @@ -640,9 +668,9 @@ pub struct TripleTemplate { #[derive(Debug, Clone)] pub enum TripleTemplateValue { - Constant(EncodedTerm), - BlankNode(usize), - Variable(usize), + Constant(PlanTerm), + BlankNode(PlanVariable), + Variable(PlanVariable), Triple(Box), } diff --git a/lib/src/sparql/plan_builder.rs b/lib/src/sparql/plan_builder.rs index f2c1dc57..598d8b3e 100644 --- a/lib/src/sparql/plan_builder.rs +++ b/lib/src/sparql/plan_builder.rs @@ -37,7 +37,10 @@ impl<'a> PlanBuilder<'a> { .build_for_graph_pattern( pattern, &mut variables, - &PatternValue::Constant(EncodedTerm::DefaultGraph), + &PatternValue::Constant(PlanTerm { + encoded: EncodedTerm::DefaultGraph, + plain: PatternValueConstant::DefaultGraph, + }), )?; let plan = if !without_optimizations && !is_cardinality_meaningful { // let's reduce downstream task. @@ -125,13 +128,13 @@ impl<'a> PlanBuilder<'a> { PlanNode::HashLeftJoin { left: Box::new(left), right: Box::new(right), - expression: Box::new( - expression - .as_ref() - .map_or(Ok(PlanExpression::Constant(true.into())), |e| { - self.build_for_expression(e, variables, graph_name) - })?, - ), + expression: Box::new(expression.as_ref().map_or( + Ok(PlanExpression::Literal(PlanTerm { + encoded: true.into(), + plain: true.into(), + })), + |e| self.build_for_expression(e, variables, graph_name), + )?), } } } @@ -171,7 +174,7 @@ impl<'a> PlanBuilder<'a> { expression, } => PlanNode::Extend { child: Box::new(self.build_for_graph_pattern(inner, variables, graph_name)?), - position: variable_key(variables, variable), + variable: build_plan_variable(variables, variable), expression: Box::new(self.build_for_expression(expression, variables, graph_name)?), }, GraphPattern::Minus { left, right } => PlanNode::AntiJoin { @@ -200,14 +203,18 @@ impl<'a> PlanBuilder<'a> { aggregates, } => PlanNode::Aggregate { child: Box::new(self.build_for_graph_pattern(inner, variables, graph_name)?), - key_variables: Rc::new(by.iter().map(|k| variable_key(variables, k)).collect()), + key_variables: Rc::new( + by.iter() + .map(|k| build_plan_variable(variables, k)) + .collect(), + ), aggregates: Rc::new( aggregates .iter() .map(|(v, a)| { Ok(( self.build_for_aggregate(a, variables, graph_name)?, - variable_key(variables, v), + build_plan_variable(variables, v), )) }) .collect::, EvaluationError>>()?, @@ -216,9 +223,36 @@ impl<'a> PlanBuilder<'a> { GraphPattern::Values { variables: table_variables, bindings, - } => PlanNode::StaticBindings { - tuples: self.encode_bindings(table_variables, bindings, variables), - }, + } => { + let bindings_variables = table_variables + .iter() + .map(|v| build_plan_variable(variables, v)) + .collect::>(); + let encoded_tuples = bindings + .iter() + .map(|row| { + let mut result = EncodedTuple::with_capacity(variables.len()); + for (key, value) in row.iter().enumerate() { + if let Some(term) = value { + result.set( + bindings_variables[key].encoded, + match term { + GroundTerm::NamedNode(node) => self.build_term(node), + GroundTerm::Literal(literal) => self.build_term(literal), + GroundTerm::Triple(triple) => self.build_triple(triple), + }, + ); + } + } + result + }) + .collect(); + PlanNode::StaticBindings { + encoded_tuples, + variables: bindings_variables, + plain_bindings: bindings.clone(), + } + } GraphPattern::OrderBy { inner, expression } => { let condition: Result, EvaluationError> = expression .iter() @@ -242,7 +276,7 @@ impl<'a> PlanBuilder<'a> { } => { let mut inner_variables = projection.clone(); let inner_graph_name = - Self::convert_pattern_value_id(graph_name, variables, &mut inner_variables); + Self::convert_pattern_value_id(graph_name, &mut inner_variables); PlanNode::Project { child: Box::new(self.build_for_graph_pattern( inner, @@ -254,7 +288,13 @@ impl<'a> PlanBuilder<'a> { .iter() .enumerate() .map(|(new_variable, variable)| { - (new_variable, variable_key(variables, variable)) + ( + PlanVariable { + encoded: new_variable, + plain: variable.clone(), + }, + build_plan_variable(variables, variable), + ) }) .collect(), ), @@ -306,13 +346,18 @@ impl<'a> PlanBuilder<'a> { }) .reduce(|a, b| self.new_join(a, b)) .unwrap_or_else(|| PlanNode::StaticBindings { - tuples: vec![EncodedTuple::with_capacity(variables.len())], + encoded_tuples: vec![EncodedTuple::with_capacity(variables.len())], + variables: Vec::new(), + plain_bindings: vec![Vec::new()], }) } fn build_for_path(&self, path: &PropertyPathExpression) -> PlanPropertyPath { match path { - PropertyPathExpression::NamedNode(p) => PlanPropertyPath::Path(self.build_term(p)), + PropertyPathExpression::NamedNode(p) => PlanPropertyPath::Path(PlanTerm { + encoded: self.build_term(p), + plain: p.clone(), + }), PropertyPathExpression::Reverse(p) => { PlanPropertyPath::Reverse(Rc::new(self.build_for_path(p))) } @@ -333,9 +378,16 @@ impl<'a> PlanBuilder<'a> { PropertyPathExpression::ZeroOrOne(p) => { PlanPropertyPath::ZeroOrOne(Rc::new(self.build_for_path(p))) } - PropertyPathExpression::NegatedPropertySet(p) => PlanPropertyPath::NegatedPropertySet( - Rc::new(p.iter().map(|p| self.build_term(p)).collect()), - ), + PropertyPathExpression::NegatedPropertySet(p) => { + PlanPropertyPath::NegatedPropertySet(Rc::new( + p.iter() + .map(|p| PlanTerm { + encoded: self.build_term(p), + plain: p.clone(), + }) + .collect(), + )) + } } } @@ -346,9 +398,15 @@ impl<'a> PlanBuilder<'a> { graph_name: &PatternValue, ) -> Result { Ok(match expression { - Expression::NamedNode(node) => PlanExpression::Constant(self.build_term(node)), - Expression::Literal(l) => PlanExpression::Constant(self.build_term(l)), - Expression::Variable(v) => PlanExpression::Variable(variable_key(variables, v)), + Expression::NamedNode(node) => PlanExpression::NamedNode(PlanTerm { + encoded: self.build_term(node), + plain: node.clone(), + }), + Expression::Literal(l) => PlanExpression::Literal(PlanTerm { + encoded: self.build_term(l), + plain: l.clone(), + }), + Expression::Variable(v) => PlanExpression::Variable(build_plan_variable(variables, v)), Expression::Or(a, b) => PlanExpression::Or( Box::new(self.build_for_expression(a, variables, graph_name)?), Box::new(self.build_for_expression(b, variables, graph_name)?), @@ -393,7 +451,12 @@ impl<'a> PlanBuilder<'a> { .reduce(|a: Result<_, EvaluationError>, b| { Ok(PlanExpression::Or(Box::new(a?), Box::new(b?))) }) - .unwrap_or_else(|| Ok(PlanExpression::Constant(false.into())))? + .unwrap_or_else(|| { + Ok(PlanExpression::Literal(PlanTerm { + encoded: false.into(), + plain: false.into(), + })) + })? } Expression::Add(a, b) => PlanExpression::Add( Box::new(self.build_for_expression(a, variables, graph_name)?), @@ -824,7 +887,7 @@ impl<'a> PlanBuilder<'a> { } } }, - Expression::Bound(v) => PlanExpression::Bound(variable_key(variables, v)), + Expression::Bound(v) => PlanExpression::Bound(build_plan_variable(variables, v)), Expression::If(a, b, c) => PlanExpression::If( Box::new(self.build_for_expression(a, variables, graph_name)?), Box::new(self.build_for_expression(b, variables, graph_name)?), @@ -883,17 +946,23 @@ impl<'a> PlanBuilder<'a> { ) -> PatternValue { match term_or_variable { TermPattern::Variable(variable) => { - PatternValue::Variable(variable_key(variables, variable)) + PatternValue::Variable(build_plan_variable(variables, variable)) } - TermPattern::NamedNode(node) => PatternValue::Constant(self.build_term(node)), + TermPattern::NamedNode(node) => PatternValue::Constant(PlanTerm { + encoded: self.build_term(node), + plain: PatternValueConstant::NamedNode(node.clone()), + }), TermPattern::BlankNode(bnode) => { - PatternValue::Variable(variable_key( + PatternValue::Variable(build_plan_variable( variables, &Variable::new_unchecked(bnode.as_str()), )) //TODO: very bad hack to convert bnode to variable } - TermPattern::Literal(literal) => PatternValue::Constant(self.build_term(literal)), + TermPattern::Literal(literal) => PatternValue::Constant(PlanTerm { + encoded: self.build_term(literal), + plain: PatternValueConstant::Literal(literal.clone()), + }), TermPattern::Triple(triple) => { match ( self.pattern_value_from_term_or_variable(&triple.subject, variables), @@ -901,19 +970,48 @@ impl<'a> PlanBuilder<'a> { self.pattern_value_from_term_or_variable(&triple.object, variables), ) { ( - PatternValue::Constant(subject), - PatternValue::Constant(predicate), - PatternValue::Constant(object), - ) => PatternValue::Constant( - EncodedTriple { - subject, - predicate, - object, + PatternValue::Constant(PlanTerm { + encoded: encoded_subject, + plain: plain_subject, + }), + PatternValue::Constant(PlanTerm { + encoded: encoded_predicate, + plain: plain_predicate, + }), + PatternValue::Constant(PlanTerm { + encoded: encoded_object, + plain: plain_object, + }), + ) => PatternValue::Constant(PlanTerm { + encoded: EncodedTriple { + subject: encoded_subject, + predicate: encoded_predicate, + object: encoded_object, } .into(), - ), + plain: PatternValueConstant::Triple(Box::new(Triple { + subject: match plain_subject { + PatternValueConstant::NamedNode(s) => s.into(), + PatternValueConstant::Triple(s) => s.into(), + PatternValueConstant::Literal(_) + | PatternValueConstant::DefaultGraph => unreachable!(), + }, + predicate: match plain_predicate { + PatternValueConstant::NamedNode(s) => s, + PatternValueConstant::Literal(_) + | PatternValueConstant::Triple(_) + | PatternValueConstant::DefaultGraph => unreachable!(), + }, + object: match plain_object { + PatternValueConstant::NamedNode(s) => s.into(), + PatternValueConstant::Literal(s) => s.into(), + PatternValueConstant::Triple(s) => s.into(), + PatternValueConstant::DefaultGraph => unreachable!(), + }, + })), + }), (subject, predicate, object) => { - PatternValue::Triple(Box::new(TriplePatternValue { + PatternValue::TriplePattern(Box::new(TriplePatternValue { subject, predicate, object, @@ -930,45 +1028,16 @@ impl<'a> PlanBuilder<'a> { variables: &mut Vec, ) -> PatternValue { match named_node_or_variable { - NamedNodePattern::NamedNode(named_node) => { - PatternValue::Constant(self.build_term(named_node)) - } + NamedNodePattern::NamedNode(named_node) => PatternValue::Constant(PlanTerm { + encoded: self.build_term(named_node), + plain: PatternValueConstant::NamedNode(named_node.clone()), + }), NamedNodePattern::Variable(variable) => { - PatternValue::Variable(variable_key(variables, variable)) + PatternValue::Variable(build_plan_variable(variables, variable)) } } } - fn encode_bindings( - &self, - table_variables: &[Variable], - rows: &[Vec>], - variables: &mut Vec, - ) -> Vec { - let bindings_variables_keys = table_variables - .iter() - .map(|v| variable_key(variables, v)) - .collect::>(); - rows.iter() - .map(move |row| { - let mut result = EncodedTuple::with_capacity(variables.len()); - for (key, value) in row.iter().enumerate() { - if let Some(term) = value { - result.set( - bindings_variables_keys[key], - match term { - GroundTerm::NamedNode(node) => self.build_term(node), - GroundTerm::Literal(literal) => self.build_term(literal), - GroundTerm::Triple(triple) => self.build_triple(triple), - }, - ); - } - } - result - }) - .collect() - } - fn build_for_aggregate( &self, aggregate: &AggregateExpression, @@ -1059,15 +1128,20 @@ impl<'a> PlanBuilder<'a> { ) -> TripleTemplateValue { match term_or_variable { TermPattern::Variable(variable) => { - TripleTemplateValue::Variable(variable_key(variables, variable)) - } - TermPattern::NamedNode(node) => TripleTemplateValue::Constant(self.build_term(node)), - TermPattern::BlankNode(bnode) => { - TripleTemplateValue::BlankNode(bnode_key(bnodes, bnode)) - } - TermPattern::Literal(literal) => { - TripleTemplateValue::Constant(self.build_term(literal)) + TripleTemplateValue::Variable(build_plan_variable(variables, variable)) } + TermPattern::NamedNode(node) => TripleTemplateValue::Constant(PlanTerm { + encoded: self.build_term(node), + plain: node.clone().into(), + }), + TermPattern::BlankNode(bnode) => TripleTemplateValue::BlankNode(PlanVariable { + encoded: bnode_key(bnodes, bnode), + plain: bnode.clone(), + }), + TermPattern::Literal(literal) => TripleTemplateValue::Constant(PlanTerm { + encoded: self.build_term(literal), + plain: literal.clone().into(), + }), TermPattern::Triple(triple) => match ( self.template_value_from_term_or_variable(&triple.subject, variables, bnodes), self.template_value_from_named_node_or_variable(&triple.predicate, variables), @@ -1077,14 +1151,30 @@ impl<'a> PlanBuilder<'a> { TripleTemplateValue::Constant(subject), TripleTemplateValue::Constant(predicate), TripleTemplateValue::Constant(object), - ) => TripleTemplateValue::Constant( - EncodedTriple { - subject, - predicate, - object, + ) => TripleTemplateValue::Constant(PlanTerm { + encoded: EncodedTriple { + subject: subject.encoded, + predicate: predicate.encoded, + object: object.encoded, } .into(), - ), + plain: Triple { + subject: match subject.plain { + Term::NamedNode(node) => node.into(), + Term::BlankNode(node) => node.into(), + Term::Literal(_) => unreachable!(), + Term::Triple(node) => node.into(), + }, + predicate: match predicate.plain { + Term::NamedNode(node) => node, + Term::BlankNode(_) | Term::Literal(_) | Term::Triple(_) => { + unreachable!() + } + }, + object: object.plain, + } + .into(), + }), (subject, predicate, object) => { TripleTemplateValue::Triple(Box::new(TripleTemplate { subject, @@ -1103,35 +1193,34 @@ impl<'a> PlanBuilder<'a> { ) -> TripleTemplateValue { match named_node_or_variable { NamedNodePattern::Variable(variable) => { - TripleTemplateValue::Variable(variable_key(variables, variable)) - } - NamedNodePattern::NamedNode(term) => { - TripleTemplateValue::Constant(self.build_term(term)) + TripleTemplateValue::Variable(build_plan_variable(variables, variable)) } + NamedNodePattern::NamedNode(term) => TripleTemplateValue::Constant(PlanTerm { + encoded: self.build_term(term), + plain: term.clone().into(), + }), } } - fn convert_pattern_value_id( - from_value: &PatternValue, - from: &[Variable], - to: &mut Vec, - ) -> PatternValue { + fn convert_pattern_value_id(from_value: &PatternValue, to: &mut Vec) -> PatternValue { match from_value { - PatternValue::Constant(v) => PatternValue::Constant(v.clone()), + PatternValue::Constant(c) => PatternValue::Constant(c.clone()), PatternValue::Variable(from_id) => { - PatternValue::Variable(Self::convert_variable_id(*from_id, from, to)) + PatternValue::Variable(Self::convert_plan_variable(from_id, to)) + } + PatternValue::TriplePattern(triple) => { + PatternValue::TriplePattern(Box::new(TriplePatternValue { + subject: Self::convert_pattern_value_id(&triple.subject, to), + predicate: Self::convert_pattern_value_id(&triple.predicate, to), + object: Self::convert_pattern_value_id(&triple.object, to), + })) } - PatternValue::Triple(triple) => PatternValue::Triple(Box::new(TriplePatternValue { - subject: Self::convert_pattern_value_id(&triple.subject, from, to), - predicate: Self::convert_pattern_value_id(&triple.predicate, from, to), - object: Self::convert_pattern_value_id(&triple.object, from, to), - })), } } - fn convert_variable_id(from_id: usize, from: &[Variable], to: &mut Vec) -> usize { - if let Some(to_id) = to.iter().enumerate().find_map(|(to_id, var)| { - if *var == from[from_id] { + fn convert_plan_variable(from_variable: &PlanVariable, to: &mut Vec) -> PlanVariable { + let encoded = if let Some(to_id) = to.iter().enumerate().find_map(|(to_id, var)| { + if *var == from_variable.plain { Some(to_id) } else { None @@ -1141,6 +1230,10 @@ impl<'a> PlanBuilder<'a> { } else { to.push(Variable::new_unchecked(format!("{:x}", random::()))); to.len() - 1 + }; + PlanVariable { + encoded, + plain: from_variable.plain.clone(), } } @@ -1228,8 +1321,8 @@ impl<'a> PlanBuilder<'a> { let mut child_bound = BTreeSet::new(); Self::add_left_join_problematic_variables(child, &mut child_bound); for (child_i, output_i) in mapping.iter() { - if child_bound.contains(child_i) { - set.insert(*output_i); + if child_bound.contains(&child_i.encoded) { + set.insert(output_i.encoded); } } } @@ -1238,10 +1331,10 @@ impl<'a> PlanBuilder<'a> { aggregates, .. } => { - set.extend(key_variables.iter()); + set.extend(key_variables.iter().map(|v| v.encoded)); //TODO: This is too harsh for (_, var) in aggregates.iter() { - set.insert(*var); + set.insert(var.encoded); } } } @@ -1371,21 +1464,21 @@ impl<'a> PlanBuilder<'a> { PlanNode::Extend { child, expression, - position, + variable: position, } => { //TODO: handle the case where the filter generates an expression variable if filter_variables.iter().all(|v| child.is_variable_bound(*v)) { PlanNode::Extend { child: Box::new(self.push_filter(child, filter)), expression, - position, + variable: position, } } else { PlanNode::Filter { child: Box::new(PlanNode::Extend { child, expression, - position, + variable: position, }), expression: filter, } @@ -1439,13 +1532,17 @@ impl<'a> PlanBuilder<'a> { } } -fn variable_key(variables: &mut Vec, variable: &Variable) -> usize { - match slice_key(variables, variable) { +fn build_plan_variable(variables: &mut Vec, variable: &Variable) -> PlanVariable { + let encoded = match slice_key(variables, variable) { Some(key) => key, None => { variables.push(variable.clone()); variables.len() - 1 } + }; + PlanVariable { + plain: variable.clone(), + encoded, } }