From 4f7e396af0b4b0d079837a9de1f49ce7a89d6af8 Mon Sep 17 00:00:00 2001 From: Tpt Date: Fri, 1 Oct 2021 21:54:50 +0200 Subject: [PATCH] Introduces PlanNode::ForLoopJoin Makes for loop join more explicit and usable in more places --- lib/src/sparql/eval.rs | 192 +++++++++++++++------------------ lib/src/sparql/plan.rs | 27 +++-- lib/src/sparql/plan_builder.rs | 104 ++++++++---------- 3 files changed, 146 insertions(+), 177 deletions(-) diff --git a/lib/src/sparql/eval.rs b/lib/src/sparql/eval.rs index 43a54713..15419ef5 100644 --- a/lib/src/sparql/eval.rs +++ b/lib/src/sparql/eval.rs @@ -105,7 +105,6 @@ impl SimpleEvaluator { node: &PlanNode, ) -> Rc EncodedTuplesIterator> { match node { - PlanNode::Init => Rc::new(move |from| Box::new(once(Ok(from)))), PlanNode::StaticBindings { tuples } => { let tuples = tuples.clone(); Rc::new(move |from| { @@ -152,150 +151,126 @@ impl SimpleEvaluator { } }) } - PlanNode::QuadPatternJoin { - child, + PlanNode::QuadPattern { subject, predicate, object, graph_name, } => { - let child = self.plan_evaluator(child); let subject = subject.clone(); let predicate = predicate.clone(); let object = object.clone(); let graph_name = graph_name.clone(); let dataset = self.dataset.clone(); Rc::new(move |from| { + let iter = dataset.encoded_quads_for_pattern( + get_pattern_value(&subject, &from).as_ref(), + get_pattern_value(&predicate, &from).as_ref(), + get_pattern_value(&object, &from).as_ref(), + get_pattern_value(&graph_name, &from).as_ref(), + ); let subject = subject.clone(); let predicate = predicate.clone(); let object = object.clone(); let graph_name = graph_name.clone(); - let dataset = dataset.clone(); - Box::new(child(from).flat_map_ok(move |tuple| { - let iter = dataset.encoded_quads_for_pattern( - get_pattern_value(&subject, &tuple).as_ref(), - get_pattern_value(&predicate, &tuple).as_ref(), - get_pattern_value(&object, &tuple).as_ref(), - get_pattern_value(&graph_name, &tuple).as_ref(), - ); - let subject = subject.clone(); - let predicate = predicate.clone(); - let object = object.clone(); - let graph_name = graph_name.clone(); - let iter: EncodedTuplesIterator = - Box::new(iter.filter_map(move |quad| match quad { - Ok(quad) => { - let mut new_tuple = tuple.clone(); - put_pattern_value(&subject, quad.subject, &mut new_tuple)?; - put_pattern_value(&predicate, quad.predicate, &mut new_tuple)?; - put_pattern_value(&object, quad.object, &mut new_tuple)?; - put_pattern_value( - &graph_name, - quad.graph_name, - &mut new_tuple, - )?; - Some(Ok(new_tuple)) - } - Err(error) => Some(Err(error)), - })); - iter + Box::new(iter.filter_map(move |quad| match quad { + Ok(quad) => { + let mut new_tuple = from.clone(); + put_pattern_value(&subject, quad.subject, &mut new_tuple)?; + put_pattern_value(&predicate, quad.predicate, &mut new_tuple)?; + put_pattern_value(&object, quad.object, &mut new_tuple)?; + put_pattern_value(&graph_name, quad.graph_name, &mut new_tuple)?; + Some(Ok(new_tuple)) + } + Err(error) => Some(Err(error)), })) }) } - PlanNode::PathPatternJoin { - child, + PlanNode::PathPattern { subject, path, object, graph_name, } => { - let child = self.plan_evaluator(child); let eval = self.clone(); let subject = subject.clone(); let path = path.clone(); let object = object.clone(); let graph_name = graph_name.clone(); Rc::new(move |from| { - let eval = eval.clone(); - let subject = subject.clone(); - let path = path.clone(); - let object = object.clone(); - let graph_name = graph_name.clone(); - Box::new(child(from).flat_map_ok(move |tuple| { - let input_subject = get_pattern_value(&subject, &tuple); - let input_object = get_pattern_value(&object, &tuple); - let input_graph_name = - if let Some(graph_name) = get_pattern_value(&graph_name, &tuple) { - graph_name - } else { - let result: EncodedTuplesIterator = - Box::new(once(Err(EvaluationError::msg( - "Unknown graph name is not allowed when evaluating property path", - )))); - return result; - }; - match (input_subject, input_object) { - (Some(input_subject), Some(input_object)) => Box::new( + let input_subject = get_pattern_value(&subject, &from); + let input_object = get_pattern_value(&object, &from); + let input_graph_name = + if let Some(graph_name) = get_pattern_value(&graph_name, &from) { + graph_name + } else { + let result: EncodedTuplesIterator = + Box::new(once(Err(EvaluationError::msg( + "Unknown graph name is not allowed when evaluating property path", + )))); + return result; + }; + match (input_subject, input_object) { + (Some(input_subject), Some(input_object)) => Box::new( + eval.eval_path_from(&path, &input_subject, &input_graph_name) + .filter_map(move |o| match o { + Ok(o) => { + if o == input_object { + Some(Ok(from.clone())) + } else { + None + } + } + Err(error) => Some(Err(error)), + }), + ), + (Some(input_subject), None) => { + let object = object.clone(); + Box::new( eval.eval_path_from(&path, &input_subject, &input_graph_name) .filter_map(move |o| match o { Ok(o) => { - if o == input_object { - Some(Ok(tuple.clone())) - } else { - None - } + let mut new_tuple = from.clone(); + put_pattern_value(&object, o, &mut new_tuple)?; + Some(Ok(new_tuple)) } Err(error) => Some(Err(error)), }), - ), - (Some(input_subject), None) => { - let object = object.clone(); - Box::new( - eval.eval_path_from(&path, &input_subject, &input_graph_name) - .filter_map(move |o| match o { - Ok(o) => { - let mut new_tuple = tuple.clone(); - put_pattern_value(&object, o, &mut new_tuple)?; - Some(Ok(new_tuple)) - } - Err(error) => Some(Err(error)), - }), - ) - } - (None, Some(input_object)) => { - let subject = subject.clone(); - Box::new( - eval.eval_path_to(&path, &input_object, &input_graph_name) - .filter_map(move |s| match s { - Ok(s) => { - let mut new_tuple = tuple.clone(); - put_pattern_value(&subject, s, &mut new_tuple)?; - Some(Ok(new_tuple)) - } - Err(error) => Some(Err(error)), - }), - ) - } - (None, None) => { - let subject = subject.clone(); - let object = object.clone(); - Box::new(eval.eval_open_path(&path, &input_graph_name).filter_map( - move |so| match so { - Ok((s, o)) => { - let mut new_tuple = tuple.clone(); + ) + } + (None, Some(input_object)) => { + let subject = subject.clone(); + Box::new( + eval.eval_path_to(&path, &input_object, &input_graph_name) + .filter_map(move |s| match s { + Ok(s) => { + let mut new_tuple = from.clone(); put_pattern_value(&subject, s, &mut new_tuple)?; - put_pattern_value(&object, o, &mut new_tuple)?; Some(Ok(new_tuple)) } Err(error) => Some(Err(error)), - }, - )) - } + }), + ) } - })) + (None, None) => { + let subject = subject.clone(); + let object = object.clone(); + Box::new(eval.eval_open_path(&path, &input_graph_name).filter_map( + move |so| match so { + Ok((s, o)) => { + let mut new_tuple = from.clone(); + put_pattern_value(&subject, s, &mut new_tuple)?; + put_pattern_value(&object, o, &mut new_tuple)?; + Some(Ok(new_tuple)) + } + Err(error) => Some(Err(error)), + }, + )) + } + } }) } - PlanNode::Join { left, right } => { + PlanNode::HashJoin { left, right } => { let left = self.plan_evaluator(left); let right = self.plan_evaluator(right); Rc::new(move |from| { @@ -317,6 +292,17 @@ impl SimpleEvaluator { }) }) } + PlanNode::ForLoopJoin { left, right } => { + let left = self.plan_evaluator(left); + let right = self.plan_evaluator(right); + Rc::new(move |from| { + let right = right.clone(); + Box::new(left(from).flat_map(move |t| match t { + Ok(t) => right(t), + Err(e) => Box::new(once(Err(e))), + })) + }) + } PlanNode::AntiJoin { left, right } => { let left = self.plan_evaluator(left); let right = self.plan_evaluator(right); diff --git a/lib/src/sparql/plan.rs b/lib/src/sparql/plan.rs index de7140df..b5fa317b 100644 --- a/lib/src/sparql/plan.rs +++ b/lib/src/sparql/plan.rs @@ -6,7 +6,6 @@ use std::rc::Rc; #[derive(Eq, PartialEq, Debug, Clone, Hash)] pub enum PlanNode { - Init, StaticBindings { tuples: Vec, }, @@ -17,21 +16,23 @@ pub enum PlanNode { graph_pattern: Rc, silent: bool, }, - QuadPatternJoin { - child: Rc, + QuadPattern { subject: PatternValue, predicate: PatternValue, object: PatternValue, graph_name: PatternValue, }, - PathPatternJoin { - child: Rc, + PathPattern { subject: PatternValue, path: Rc, object: PatternValue, graph_name: PatternValue, }, - Join { + HashJoin { + left: Rc, + right: Rc, + }, + ForLoopJoin { left: Rc, right: Rc, }, @@ -93,7 +94,6 @@ impl PlanNode { pub fn add_maybe_bound_variables(&self, set: &mut BTreeSet) { match self { - PlanNode::Init => (), PlanNode::StaticBindings { tuples } => { for tuple in tuples { for (key, value) in tuple.iter().enumerate() { @@ -103,8 +103,7 @@ impl PlanNode { } } } - PlanNode::QuadPatternJoin { - child, + PlanNode::QuadPattern { subject, predicate, object, @@ -122,10 +121,8 @@ impl PlanNode { if let PatternValue::Variable(var) = graph_name { set.insert(*var); } - child.add_maybe_bound_variables(set); } - PlanNode::PathPatternJoin { - child, + PlanNode::PathPattern { subject, object, graph_name, @@ -140,7 +137,6 @@ impl PlanNode { if let PatternValue::Variable(var) = graph_name { set.insert(*var); } - child.add_maybe_bound_variables(set); } PlanNode::Filter { child, expression } => { expression.add_maybe_bound_variables(set); @@ -151,8 +147,9 @@ impl PlanNode { child.add_maybe_bound_variables(set); } } - PlanNode::Join { left, right, .. } - | PlanNode::AntiJoin { left, right, .. } + PlanNode::HashJoin { left, right } + | PlanNode::ForLoopJoin { left, right, .. } + | PlanNode::AntiJoin { left, right } | PlanNode::LeftJoin { left, right, .. } => { left.add_maybe_bound_variables(set); right.add_maybe_bound_variables(set); diff --git a/lib/src/sparql/plan_builder.rs b/lib/src/sparql/plan_builder.rs index 1bd1404e..09086df6 100644 --- a/lib/src/sparql/plan_builder.rs +++ b/lib/src/sparql/plan_builder.rs @@ -43,12 +43,47 @@ impl<'a> PlanBuilder<'a> { graph_name: &PatternValue, ) -> Result { Ok(match pattern { - GraphPattern::Bgp { .. } - | GraphPattern::Path { .. } - | GraphPattern::Sequence { .. } => { - self.build_sequence(PlanNode::Init, pattern, variables, graph_name)? - } - GraphPattern::Join { left, right } => PlanNode::Join { + GraphPattern::Bgp { patterns } => sort_bgp(patterns) + .iter() + .map(|triple| PlanNode::QuadPattern { + subject: self.pattern_value_from_term_or_variable(&triple.subject, variables), + predicate: self + .pattern_value_from_named_node_or_variable(&triple.predicate, variables), + object: self.pattern_value_from_term_or_variable(&triple.object, variables), + graph_name: graph_name.clone(), + }) + .reduce(|left, right| PlanNode::ForLoopJoin { + left: Rc::new(left), + right: Rc::new(right), + }) + .unwrap_or_else(|| PlanNode::StaticBindings { + tuples: vec![EncodedTuple::with_capacity(variables.len())], + }), + GraphPattern::Path { + subject, + path, + object, + } => PlanNode::PathPattern { + subject: self.pattern_value_from_term_or_variable(subject, variables), + path: Rc::new(self.build_for_path(path)), + object: self.pattern_value_from_term_or_variable(object, variables), + graph_name: graph_name.clone(), + }, + GraphPattern::Sequence(elements) => elements + .iter() + .map(|e| self.build_for_graph_pattern(e, variables, graph_name)) + .reduce(|left, right| { + Ok(PlanNode::ForLoopJoin { + left: Rc::new(left?), + right: Rc::new(right?), + }) + }) + .unwrap_or_else(|| { + Ok(PlanNode::StaticBindings { + tuples: vec![EncodedTuple::with_capacity(variables.len())], + }) + })?, + GraphPattern::Join { left, right } => PlanNode::HashJoin { left: Rc::new(self.build_for_graph_pattern(left, variables, graph_name)?), right: Rc::new(self.build_for_graph_pattern(right, variables, graph_name)?), }, @@ -255,54 +290,6 @@ impl<'a> PlanBuilder<'a> { }) } - fn build_sequence( - &mut self, - mut plan: PlanNode, - pattern: &GraphPattern, - variables: &mut Vec, - graph_name: &PatternValue, - ) -> Result { - match pattern { - GraphPattern::Bgp { patterns } => { - for triple in sort_bgp(patterns) { - plan = PlanNode::QuadPatternJoin { - child: Rc::new(plan), - subject: self - .pattern_value_from_term_or_variable(&triple.subject, variables), - predicate: self.pattern_value_from_named_node_or_variable( - &triple.predicate, - variables, - ), - object: self.pattern_value_from_term_or_variable(&triple.object, variables), - graph_name: graph_name.clone(), - } - } - Ok(plan) - } - GraphPattern::Path { - subject, - path, - object, - } => Ok(PlanNode::PathPatternJoin { - child: Rc::new(plan), - subject: self.pattern_value_from_term_or_variable(subject, variables), - path: Rc::new(self.build_for_path(path)), - object: self.pattern_value_from_term_or_variable(object, variables), - graph_name: graph_name.clone(), - }), - GraphPattern::Graph { inner, name } => { - let graph_name = self.pattern_value_from_named_node_or_variable(name, variables); - self.build_sequence(plan, inner, variables, &graph_name) - } - GraphPattern::Sequence(elements) => elements.iter().fold(Ok(plan), |plan, element| { - self.build_sequence(plan?, element, variables, graph_name) - }), - _ => Err(EvaluationError::msg( - "Unexpected element in a sequence: {:?}.", - )), - } - } - fn build_for_path(&mut self, path: &PropertyPathExpression) -> PlanPropertyPath { match path { PropertyPathExpression::NamedNode(p) => { @@ -1073,10 +1060,9 @@ impl<'a> PlanBuilder<'a> { fn add_left_join_problematic_variables(&self, node: &PlanNode, set: &mut BTreeSet) { match node { - PlanNode::Init - | PlanNode::StaticBindings { .. } - | PlanNode::QuadPatternJoin { .. } - | PlanNode::PathPatternJoin { .. } => (), + PlanNode::StaticBindings { .. } + | PlanNode::QuadPattern { .. } + | PlanNode::PathPattern { .. } => (), PlanNode::Filter { child, expression } => { expression.add_maybe_bound_variables(set); //TODO: only if it is not already bound self.add_left_join_problematic_variables(&*child, set); @@ -1086,7 +1072,7 @@ impl<'a> PlanBuilder<'a> { self.add_left_join_problematic_variables(child, set); } } - PlanNode::Join { left, right, .. } => { + PlanNode::HashJoin { left, right } | PlanNode::ForLoopJoin { left, right } => { self.add_left_join_problematic_variables(&*left, set); self.add_left_join_problematic_variables(&*right, set); }