From 5f79c408bcdb56fa4a833d8ded5a275c243a9af5 Mon Sep 17 00:00:00 2001 From: Tpt Date: Mon, 6 Sep 2021 17:04:17 +0200 Subject: [PATCH] Adds GraphPattern::Sequence Safe version of GraphPattern::Join --- lib/src/sparql/plan_builder.rs | 91 +++++++++++++++++----------------- spargebra/src/algebra.rs | 21 ++++++++ spargebra/src/parser.rs | 61 ++++++++++++++++------- 3 files changed, 111 insertions(+), 62 deletions(-) diff --git a/lib/src/sparql/plan_builder.rs b/lib/src/sparql/plan_builder.rs index f7beecef..27e2a5d9 100644 --- a/lib/src/sparql/plan_builder.rs +++ b/lib/src/sparql/plan_builder.rs @@ -43,41 +43,13 @@ impl<'a> PlanBuilder<'a> { graph_name: &PatternValue, ) -> Result { Ok(match pattern { - GraphPattern::Bgp(p) => self.build_for_bgp(p, variables, graph_name), - GraphPattern::Path { - subject, - path, - object, - } => PlanNode::PathPatternJoin { - child: Rc::new(PlanNode::Init), - subject: self.pattern_value_from_term_or_variable(subject, variables), - path: Rc::new(self.build_for_path(path)), - object: self.pattern_value_from_term_or_variable(object, variables), - graph_name: graph_name.clone(), - }, - GraphPattern::Join { left, right } => { - //TODO: improve - if let GraphPattern::Path { - subject, - path, - object, - } = right.as_ref() - { - let left = self.build_for_graph_pattern(left, variables, graph_name)?; - PlanNode::PathPatternJoin { - child: Rc::new(left), - subject: self.pattern_value_from_term_or_variable(subject, variables), - path: Rc::new(self.build_for_path(path)), - object: self.pattern_value_from_term_or_variable(object, variables), - graph_name: graph_name.clone(), - } - } else { - PlanNode::Join { - left: Rc::new(self.build_for_graph_pattern(left, variables, graph_name)?), - right: Rc::new(self.build_for_graph_pattern(right, variables, graph_name)?), - } - } + GraphPattern::Bgp(_) | GraphPattern::Path { .. } | GraphPattern::Sequence { .. } => { + self.build_sequence(PlanNode::Init, pattern, variables, graph_name)? } + GraphPattern::Join { left, right } => PlanNode::Join { + left: Rc::new(self.build_for_graph_pattern(left, variables, graph_name)?), + right: Rc::new(self.build_for_graph_pattern(right, variables, graph_name)?), + }, GraphPattern::LeftJoin { left, right, expr } => { let left = self.build_for_graph_pattern(left, variables, graph_name)?; let right = self.build_for_graph_pattern(right, variables, graph_name)?; @@ -271,24 +243,53 @@ impl<'a> PlanBuilder<'a> { }) } - fn build_for_bgp( + fn build_sequence( &mut self, - p: &[TriplePattern], + mut plan: PlanNode, + pattern: &GraphPattern, variables: &mut Vec, graph_name: &PatternValue, - ) -> PlanNode { - let mut plan = PlanNode::Init; - for pattern in sort_bgp(p) { - plan = PlanNode::QuadPatternJoin { + ) -> Result { + match pattern { + GraphPattern::Bgp(p) => { + for triple in sort_bgp(p) { + plan = PlanNode::QuadPatternJoin { + child: Rc::new(plan), + subject: self + .pattern_value_from_term_or_variable(&triple.subject, variables), + predicate: self.pattern_value_from_named_node_or_variable( + &triple.predicate, + variables, + ), + object: self.pattern_value_from_term_or_variable(&triple.object, variables), + graph_name: graph_name.clone(), + } + } + Ok(plan) + } + GraphPattern::Path { + subject, + path, + object, + } => Ok(PlanNode::PathPatternJoin { child: Rc::new(plan), - subject: self.pattern_value_from_term_or_variable(&pattern.subject, variables), - predicate: self - .pattern_value_from_named_node_or_variable(&pattern.predicate, variables), - object: self.pattern_value_from_term_or_variable(&pattern.object, variables), + subject: self.pattern_value_from_term_or_variable(subject, variables), + path: Rc::new(self.build_for_path(path)), + object: self.pattern_value_from_term_or_variable(object, variables), graph_name: graph_name.clone(), + }), + GraphPattern::Graph { inner, graph_name } => { + let graph_name = + self.pattern_value_from_named_node_or_variable(graph_name, variables); + self.build_sequence(plan, inner, variables, &graph_name) } + GraphPattern::Sequence(elements) => elements.iter().fold(Ok(plan), |plan, element| { + self.build_sequence(plan?, element, variables, graph_name) + }), + _ => Err(EvaluationError::msg( + "Unexpected element in a sequence: {:?}.", + )), } - plan } fn build_for_path(&mut self, path: &PropertyPathExpression) -> PlanPropertyPath { diff --git a/spargebra/src/algebra.rs b/spargebra/src/algebra.rs index 4a297cc5..4c3eda43 100644 --- a/spargebra/src/algebra.rs +++ b/spargebra/src/algebra.rs @@ -470,6 +470,9 @@ pub enum GraphPattern { path: PropertyPathExpression, object: TermPattern, }, + /// A set of SPARQL patterns that can be evaluated sequentially + /// It is a safe case of [Join](https://www.w3.org/TR/sparql11-query/#defn_algJoin) + Sequence(Vec), /// [Join](https://www.w3.org/TR/sparql11-query/#defn_algJoin) Join { left: Box, right: Box }, /// [LeftJoin](https://www.w3.org/TR/sparql11-query/#defn_algLeftJoin) @@ -548,6 +551,13 @@ impl fmt::Debug for GraphPattern { path, object, } => write!(f, "(path {:?} {:?} {:?})", subject, path, object), + Self::Sequence(elements) => { + write!(f, "(sequence")?; + for e in elements { + write!(f, " {:?}", e)?; + } + write!(f, ")") + } Self::Join { left, right } => write!(f, "(join {:?} {:?})", left, right), Self::LeftJoin { left, right, expr } => { if let Some(expr) = expr { @@ -665,6 +675,12 @@ impl fmt::Display for GraphPattern { path, object, } => write!(f, "{} {} {} .", subject, path, object), + Self::Sequence(elements) => { + for e in elements { + write!(f, "{} ", e)?; + } + Ok(()) + } Self::Join { left, right } => { if matches!( right.as_ref(), @@ -796,6 +812,11 @@ impl GraphPattern { add_triple_pattern_variables(o, vars) } } + Self::Sequence(elements) => { + for e in elements { + e.add_visible_variables(vars); + } + } Self::Join { left, right } | Self::LeftJoin { left, right, .. } | Self::Union { left, right } => { diff --git a/spargebra/src/parser.rs b/spargebra/src/parser.rs index 79327521..c7bdf5c7 100644 --- a/spargebra/src/parser.rs +++ b/spargebra/src/parser.rs @@ -11,6 +11,7 @@ use std::borrow::Cow; use std::collections::{HashMap, HashSet}; use std::convert::{TryFrom, TryInto}; use std::error::Error; +use std::mem::take; use std::str::Chars; use std::str::FromStr; use std::{char, fmt}; @@ -250,8 +251,8 @@ fn add_triple_to_triple_or_path_patterns( } fn build_bgp(patterns: Vec) -> GraphPattern { - let mut bgp = Vec::with_capacity(patterns.len()); - let mut paths = Vec::with_capacity(patterns.len()); + let mut bgp = Vec::new(); + let mut elements = Vec::with_capacity(patterns.len()); for pattern in patterns { match pattern { TripleOrPathPattern::Triple(t) => bgp.push(t), @@ -259,21 +260,22 @@ fn build_bgp(patterns: Vec) -> GraphPattern { subject, path, object, - } => paths.push((subject, path, object)), + } => { + if !bgp.is_empty() { + elements.push(GraphPattern::Bgp(take(&mut bgp))); + } + elements.push(GraphPattern::Path { + subject, + path, + object, + }) + } } } - let mut graph_pattern = GraphPattern::Bgp(bgp); - for (subject, path, object) in paths { - graph_pattern = new_join( - graph_pattern, - GraphPattern::Path { - subject, - path, - object, - }, - ) + if !bgp.is_empty() { + elements.push(GraphPattern::Bgp(bgp)); } - graph_pattern + new_sequence(elements) } enum TripleOrPathPattern { @@ -364,12 +366,29 @@ fn new_join(l: GraphPattern, r: GraphPattern) -> GraphPattern { } } - //Merge BGPs + // Some optimizations + // TODO: move to a specific optimizer pass match (l, r) { (GraphPattern::Bgp(mut pl), GraphPattern::Bgp(pr)) => { pl.extend(pr); GraphPattern::Bgp(pl) } + (GraphPattern::Sequence(mut e1), GraphPattern::Sequence(e2)) => { + e1.extend_from_slice(&e2); + GraphPattern::Sequence(e1) + } + (GraphPattern::Sequence(mut e), r) + if matches!(r, GraphPattern::Bgp(_) | GraphPattern::Path { .. }) => + { + e.push(r); + GraphPattern::Sequence(e) + } + (l, GraphPattern::Sequence(mut e)) + if matches!(l, GraphPattern::Bgp(_) | GraphPattern::Path { .. }) => + { + e.insert(0, l); + GraphPattern::Sequence(e) + } ( GraphPattern::Graph { graph_name: g1, @@ -393,6 +412,14 @@ fn new_join(l: GraphPattern, r: GraphPattern) -> GraphPattern { } } +fn new_sequence(elements: Vec) -> GraphPattern { + match elements.len() { + 0 => GraphPattern::Bgp(vec![]), + 1 => elements.into_iter().next().unwrap(), + _ => GraphPattern::Sequence(elements), + } +} + fn not_empty_fold( iter: impl Iterator, combine: impl Fn(T, T) -> T, @@ -1185,14 +1212,14 @@ parser! { //[40] rule DeleteWhere() -> Vec = i("DELETE") _ i("WHERE") _ d:QuadPattern() {? - let pattern = d.iter().map(|q| { + let pattern = new_sequence(d.iter().map(|q| { let bgp = GraphPattern::Bgp(vec![TriplePattern::new(q.subject.clone(), q.predicate.clone(), q.object.clone())]); match &q.graph_name { GraphNamePattern::NamedNode(graph_name) => GraphPattern::Graph { graph_name: graph_name.clone().into(), inner: Box::new(bgp) }, GraphNamePattern::DefaultGraph => bgp, GraphNamePattern::Variable(graph_name) => GraphPattern::Graph { graph_name: graph_name.clone().into(), inner: Box::new(bgp) }, } - }).fold(GraphPattern::Bgp(Vec::new()), new_join); + }).collect()); let delete = d.into_iter().map(GroundQuadPattern::try_from).collect::,_>>().map_err(|_| "Blank nodes are not allowed in DELETE WHERE")?; Ok(vec![GraphUpdateOperation::DeleteInsert { delete,