From 15819907afdbefa7205372ca5798a537dffe44de Mon Sep 17 00:00:00 2001 From: Tpt Date: Thu, 27 Oct 2022 15:55:57 +0200 Subject: [PATCH] Adds experimental OX_LATERAL operation "FOO OX_LATERAL(?v1 ?vn) SUBSELECT" means that for all bindings emitted from FOO SUBSELECT is going to be called with ?v1 ?vn already set from the binding from FOO --- lib/Cargo.toml | 2 +- lib/spargebra/Cargo.toml | 1 + lib/spargebra/src/algebra.rs | 74 ++++++++ lib/spargebra/src/parser.rs | 69 ++++++- lib/src/sparql/eval.rs | 52 +++--- lib/src/sparql/plan.rs | 169 ++++++++++-------- lib/src/sparql/plan_builder.rs | 74 +++++--- .../sparql-results/manifest.ttl | 2 +- .../oxigraph-tests/sparql/lateral/basic.rq | 6 + .../oxigraph-tests/sparql/lateral/basic.srx | 41 +++++ .../sparql/lateral/basic_input.ttl | 5 + .../sparql/lateral/basic_optional.rq | 6 + .../sparql/lateral/basic_optional.srx | 46 +++++ .../sparql/lateral/explicit_aggregate.rq | 6 + .../sparql/lateral/explicit_aggregate.srx | 25 +++ .../sparql/lateral/implicit_aggregate.rq | 6 + .../sparql/lateral/implicit_aggregate.srx | 30 ++++ .../sparql/lateral/manifest.ttl | 44 +++++ testsuite/oxigraph-tests/sparql/manifest.ttl | 1 + 19 files changed, 528 insertions(+), 131 deletions(-) create mode 100644 testsuite/oxigraph-tests/sparql/lateral/basic.rq create mode 100644 testsuite/oxigraph-tests/sparql/lateral/basic.srx create mode 100644 testsuite/oxigraph-tests/sparql/lateral/basic_input.ttl create mode 100644 testsuite/oxigraph-tests/sparql/lateral/basic_optional.rq create mode 100644 testsuite/oxigraph-tests/sparql/lateral/basic_optional.srx create mode 100644 testsuite/oxigraph-tests/sparql/lateral/explicit_aggregate.rq create mode 100644 testsuite/oxigraph-tests/sparql/lateral/explicit_aggregate.srx create mode 100644 testsuite/oxigraph-tests/sparql/lateral/implicit_aggregate.rq create mode 100644 testsuite/oxigraph-tests/sparql/lateral/implicit_aggregate.srx create mode 100644 testsuite/oxigraph-tests/sparql/lateral/manifest.ttl diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 51a097af..0048f224 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -38,7 +38,7 @@ siphasher = "0.3" lazy_static = "1" sysinfo = "0.26" oxrdf = { version = "0.1.0", path="oxrdf", features = ["rdf-star"] } -spargebra = { version = "0.3.0-alpha", path="spargebra", features = ["rdf-star"] } +spargebra = { version = "0.3.0-alpha", path="spargebra", features = ["rdf-star", "ex-lateral"] } sparesults = { version = "0.1.1", path="sparesults", features = ["rdf-star"] } [target.'cfg(not(target_arch = "wasm32"))'.dependencies] diff --git a/lib/spargebra/Cargo.toml b/lib/spargebra/Cargo.toml index 21a1cd49..5cf4977e 100644 --- a/lib/spargebra/Cargo.toml +++ b/lib/spargebra/Cargo.toml @@ -15,6 +15,7 @@ edition = "2021" [features] default = [] rdf-star = ["oxrdf/rdf-star"] +ex-lateral = [] [dependencies] peg = "0.8" diff --git a/lib/spargebra/src/algebra.rs b/lib/spargebra/src/algebra.rs index 1460d3c0..ddd3d392 100644 --- a/lib/spargebra/src/algebra.rs +++ b/lib/spargebra/src/algebra.rs @@ -519,6 +519,8 @@ pub enum GraphPattern { path: PropertyPathExpression, object: TermPattern, }, + /// A sequence of operation (lateral join): execute left and for each element execute right + Sequence { left: Box, right: Box }, /// [Join](https://www.w3.org/TR/sparql11-query/#defn_algJoin). Join { left: Box, right: Box }, /// [LeftJoin](https://www.w3.org/TR/sparql11-query/#defn_algLeftJoin). @@ -557,6 +559,8 @@ pub enum GraphPattern { Project { inner: Box, variables: Vec, + #[cfg(feature = "ex-lateral")] + lateral_variables: Vec, }, /// [Distinct](https://www.w3.org/TR/sparql11-query/#defn_algDistinct). Distinct { inner: Box }, @@ -614,6 +618,13 @@ impl GraphPattern { right.fmt_sse(f)?; write!(f, ")") } + Self::Sequence { left, right } => { + write!(f, "(sequence ")?; + left.fmt_sse(f)?; + write!(f, " ")?; + right.fmt_sse(f)?; + write!(f, ")") + } Self::LeftJoin { left, right, @@ -739,6 +750,34 @@ impl GraphPattern { inner.fmt_sse(f)?; write!(f, ")") } + #[cfg(feature = "ex-lateral")] + Self::Project { + inner, + variables, + lateral_variables, + } => { + write!(f, "(project (")?; + for (i, v) in variables.iter().enumerate() { + if i > 0 { + write!(f, " ")?; + } + write!(f, "{}", v)?; + } + write!(f, ") ")?; + if !lateral_variables.is_empty() { + write!(f, "(lateral (")?; + for (i, v) in lateral_variables.iter().enumerate() { + if i > 0 { + write!(f, " ")?; + } + write!(f, "{}", v)?; + } + write!(f, ")) ")?; + } + inner.fmt_sse(f)?; + write!(f, ")") + } + #[cfg(not(feature = "ex-lateral"))] Self::Project { inner, variables } => { write!(f, "(project (")?; for (i, v) in variables.iter().enumerate() { @@ -806,6 +845,9 @@ impl fmt::Display for GraphPattern { write!(f, "{} {}", left, right) } } + Self::Sequence { left, right } => { + write!(f, "{} {}", left, right) + } Self::LeftJoin { left, right, @@ -935,6 +977,7 @@ impl GraphPattern { } } Self::Join { left, right } + | Self::Sequence { left, right } | Self::LeftJoin { left, right, .. } | Self::Union { left, right } => { left.lookup_in_scope_variables(callback); @@ -1021,6 +1064,8 @@ impl<'a> fmt::Display for SparqlGraphRootPattern<'a> { let mut start = 0; let mut length = None; let mut project: &[Variable] = &[]; + #[cfg(feature = "ex-lateral")] + let mut lateral: &[Variable] = &[]; let mut child = self.pattern; loop { @@ -1029,6 +1074,17 @@ impl<'a> fmt::Display for SparqlGraphRootPattern<'a> { order = Some(expression); child = inner; } + #[cfg(feature = "ex-lateral")] + GraphPattern::Project { + inner, + variables, + lateral_variables, + } if project.is_empty() => { + project = variables; + lateral = lateral_variables; + child = inner; + } + #[cfg(not(feature = "ex-lateral"))] GraphPattern::Project { inner, variables } if project.is_empty() => { project = variables; child = inner; @@ -1051,6 +1107,20 @@ impl<'a> fmt::Display for SparqlGraphRootPattern<'a> { child = inner; } p => { + #[cfg(feature = "ex-lateral")] + let mut closing_brackets = false; + #[cfg(feature = "ex-lateral")] + if !lateral.is_empty() { + write!(f, "OX_LATERAL (")?; + for (i, v) in lateral.iter().enumerate() { + if i > 0 { + write!(f, " ")?; + } + write!(f, "{}", v)?; + } + write!(f, ") {{")?; + closing_brackets = true; + } write!(f, "SELECT")?; if distinct { write!(f, " DISTINCT")?; @@ -1081,6 +1151,10 @@ impl<'a> fmt::Display for SparqlGraphRootPattern<'a> { if let Some(length) = length { write!(f, " LIMIT {}", length)?; } + #[cfg(feature = "ex-lateral")] + if closing_brackets { + write!(f, "}}")?; + } return Ok(()); } } diff --git a/lib/spargebra/src/parser.rs b/lib/spargebra/src/parser.rs index 98cfe9e0..e7ea423d 100644 --- a/lib/spargebra/src/parser.rs +++ b/lib/spargebra/src/parser.rs @@ -363,10 +363,25 @@ enum PartialGraphPattern { Minus(GraphPattern), Bind(Expression, Variable), Filter(Expression), + #[cfg(feature = "ex-lateral")] + Lateral(GraphPattern, Vec), Other(GraphPattern), } fn new_join(l: GraphPattern, r: GraphPattern) -> GraphPattern { + new_join_like(l, r, |left, right| GraphPattern::Join { left, right }) +} + +#[cfg(feature = "ex-lateral")] +fn new_sequence(l: GraphPattern, r: GraphPattern) -> GraphPattern { + new_join_like(l, r, |left, right| GraphPattern::Sequence { left, right }) +} + +fn new_join_like( + l: GraphPattern, + r: GraphPattern, + cons: impl FnOnce(Box, Box) -> GraphPattern, +) -> GraphPattern { //Avoid to output empty BGPs if let GraphPattern::Bgp { patterns: pl } = &l { if pl.is_empty() { @@ -389,10 +404,7 @@ fn new_join(l: GraphPattern, r: GraphPattern) -> GraphPattern { { other } - (l, r) => GraphPattern::Join { - left: Box::new(l), - right: Box::new(r), - }, + (l, r) => cons(Box::new(l), Box::new(r)), } } @@ -562,6 +574,8 @@ fn build_select( m = GraphPattern::Project { inner: Box::new(m), variables: pv, + #[cfg(feature = "ex-lateral")] + lateral_variables: Vec::new(), }; } match select.option { @@ -581,6 +595,40 @@ fn build_select( Ok(m) } +#[cfg(feature = "ex-lateral")] +fn insert_lateral_variables( + pattern: GraphPattern, + new_lateral_variables: Vec, +) -> Result { + match pattern { + GraphPattern::Project { + inner, + variables, + lateral_variables, + } if lateral_variables.is_empty() => Ok(GraphPattern::Project { + inner, + variables, + lateral_variables: new_lateral_variables, + }), + GraphPattern::Distinct { inner } => Ok(GraphPattern::Distinct { + inner: Box::new(insert_lateral_variables(*inner, new_lateral_variables)?), + }), + GraphPattern::Reduced { inner } => Ok(GraphPattern::Reduced { + inner: Box::new(insert_lateral_variables(*inner, new_lateral_variables)?), + }), + GraphPattern::Slice { + inner, + start, + length, + } => Ok(GraphPattern::Slice { + inner: Box::new(insert_lateral_variables(*inner, new_lateral_variables)?), + start, + length, + }), + _ => Err("Not able to parse properly OX_LATERAL"), + } +} + fn are_variables_bound(expression: &Expression, variables: &HashSet) -> bool { match expression { Expression::NamedNode(_) @@ -1375,6 +1423,9 @@ parser! { } else { expr }), + #[cfg(feature = "ex-lateral")] + PartialGraphPattern::Lateral(p, mut variables) => + g = new_sequence(g, insert_lateral_variables(p, variables)?), PartialGraphPattern::Other(e) => g = new_join(g, e), } } @@ -1400,7 +1451,15 @@ parser! { rule TriplesBlock_inner() -> Vec = _ h:TriplesSameSubjectPath() _ { h } //[56] - rule GraphPatternNotTriples() -> PartialGraphPattern = GroupOrUnionGraphPattern() / OptionalGraphPattern() / MinusGraphPattern() / GraphGraphPattern() / ServiceGraphPattern() / Filter() / Bind() / InlineData() + rule GraphPatternNotTriples() -> PartialGraphPattern = GroupOrUnionGraphPattern() / OptionalGraphPattern() / MinusGraphPattern() / GraphGraphPattern() / ServiceGraphPattern() / Filter() / Bind() / InlineData() / OxLateral() + + rule OxLateral() -> PartialGraphPattern = i("OX_LATERAL") _ "(" _ vs:OxLateral_variable()* _ ")" _ "{" _ s:SubSelect() _ "}" {? + #[cfg(feature = "ex-lateral")]{ Ok(PartialGraphPattern::Lateral(s, vs)) } + #[cfg(not(feature = "ex-lateral"))]{ Err("The 'OX_LATERAL' syntax is not enabled") } + } + rule OxLateral_variable() -> Variable = v:Var() _ { + v + } //[57] rule OptionalGraphPattern() -> PartialGraphPattern = i("OPTIONAL") _ p:GroupGraphPattern() { diff --git a/lib/src/sparql/eval.rs b/lib/src/sparql/eval.rs index 6c76c055..603b82d0 100644 --- a/lib/src/sparql/eval.rs +++ b/lib/src/sparql/eval.rs @@ -526,37 +526,43 @@ impl SimpleEvaluator { let count = *count; Rc::new(move |from| Box::new(child(from).take(count))) } - PlanNode::Project { child, mapping } => { + PlanNode::Project { + child, + mapping, + lateral_mapping, + } => { let child = self.plan_evaluator(child); let mapping = mapping.clone(); + let lateral_mapping = lateral_mapping.clone(); Rc::new(move |from| { let mapping = mapping.clone(); - Box::new( - child(EncodedTuple::with_capacity(mapping.len())).filter_map( - move |tuple| { - match tuple { - Ok(tuple) => { - let mut output_tuple = from.clone(); - for (input_key, output_key) in mapping.iter() { - if let Some(value) = tuple.get(*input_key) { - if let Some(existing_value) = - output_tuple.get(*output_key) - { - if existing_value != value { - return None; // Conflict - } - } else { - output_tuple.set(*output_key, value.clone()); - } + let mut initial_mapping = EncodedTuple::with_capacity(mapping.len()); + for (input_key, output_key) in lateral_mapping.iter() { + if let Some(value) = from.get(*output_key) { + initial_mapping.set(*input_key, value.clone()) + } + } + Box::new(child(initial_mapping).filter_map(move |tuple| { + match tuple { + Ok(tuple) => { + let mut output_tuple = from.clone(); + for (input_key, output_key) in mapping.iter() { + if let Some(value) = tuple.get(*input_key) { + if let Some(existing_value) = output_tuple.get(*output_key) + { + if existing_value != value { + return None; // Conflict } + } else { + output_tuple.set(*output_key, value.clone()); } - Some(Ok(output_tuple)) } - Err(e) => Some(Err(e)), } - }, - ), - ) + Some(Ok(output_tuple)) + } + Err(e) => Some(Err(e)), + } + })) }) } PlanNode::Aggregate { diff --git a/lib/src/sparql/plan.rs b/lib/src/sparql/plan.rs index b5fd578a..6465ec9f 100644 --- a/lib/src/sparql/plan.rs +++ b/lib/src/sparql/plan.rs @@ -86,6 +86,7 @@ pub enum PlanNode { Project { child: Box, mapping: Rc>, // pairs of (variable key in child, variable key in output) + lateral_mapping: Rc>, // pairs of (variable key in child, variable key in output) }, Aggregate { // By definition the group by key are the range 0..key_mapping.len() @@ -99,19 +100,17 @@ impl PlanNode { /// Returns variables that might be bound in the result set pub fn used_variables(&self) -> BTreeSet { let mut set = BTreeSet::default(); - self.lookup_used_variables(&mut |v| { - set.insert(v); - }); + self.add_used_variables(&mut set); set } - pub fn lookup_used_variables(&self, callback: &mut impl FnMut(usize)) { + pub fn add_used_variables(&self, set: &mut BTreeSet) { match self { Self::StaticBindings { tuples } => { for tuple in tuples { for (key, value) in tuple.iter().enumerate() { if value.is_some() { - callback(key); + set.insert(key); } } } @@ -123,16 +122,16 @@ impl PlanNode { graph_name, } => { if let PatternValue::Variable(var) = subject { - callback(*var); + set.insert(*var); } if let PatternValue::Variable(var) = predicate { - callback(*var); + set.insert(*var); } if let PatternValue::Variable(var) = object { - callback(*var); + set.insert(*var); } if let PatternValue::Variable(var) = graph_name { - callback(*var); + set.insert(*var); } } Self::PathPattern { @@ -142,60 +141,69 @@ impl PlanNode { .. } => { if let PatternValue::Variable(var) = subject { - callback(*var); + set.insert(*var); } if let PatternValue::Variable(var) = object { - callback(*var); + set.insert(*var); } if let PatternValue::Variable(var) = graph_name { - callback(*var); + set.insert(*var); } } Self::Filter { child, expression } => { - expression.lookup_used_variables(callback); - child.lookup_used_variables(callback); + expression.add_used_variables(set); + child.add_used_variables(set); } Self::Union { children } => { for child in children.iter() { - child.lookup_used_variables(callback); + child.add_used_variables(set); } } Self::HashJoin { left, right } | Self::ForLoopJoin { left, right, .. } | Self::AntiJoin { left, right } | Self::LeftJoin { left, right, .. } => { - left.lookup_used_variables(callback); - right.lookup_used_variables(callback); + left.add_used_variables(set); + right.add_used_variables(set); } Self::Extend { child, position, expression, } => { - callback(*position); - expression.lookup_used_variables(callback); - child.lookup_used_variables(callback); + set.insert(*position); + expression.add_used_variables(set); + child.add_used_variables(set); } Self::Sort { child, .. } | Self::HashDeduplicate { child } | Self::Reduced { child } | Self::Skip { child, .. } - | Self::Limit { child, .. } => child.lookup_used_variables(callback), + | Self::Limit { child, .. } => child.add_used_variables(set), Self::Service { child, service_name, .. } => { if let PatternValue::Variable(v) = service_name { - callback(*v); + set.insert(*v); } - child.lookup_used_variables(callback); + child.add_used_variables(set); } - Self::Project { mapping, child } => { - let child_bound = child.used_variables(); + Self::Project { + mapping, + child, + lateral_mapping, + } => { + let mut child_bound = child.used_variables(); + for (child_i, output_i) in lateral_mapping.iter() { + if set.contains(output_i) { + child_bound.insert(*child_i); + } + } for (child_i, output_i) in mapping.iter() { if child_bound.contains(child_i) { - callback(*output_i); + set.insert(*output_i); } } } @@ -205,10 +213,10 @@ impl PlanNode { .. } => { for var in key_variables.iter() { - callback(*var); + set.insert(*var); } for (_, var) in aggregates.iter() { - callback(*var); + set.insert(*var); } } } @@ -219,13 +227,11 @@ impl PlanNode { /// (subset because this function is not perfect yet) pub fn always_bound_variables(&self) -> BTreeSet { let mut set = BTreeSet::default(); - self.lookup_always_bound_variables(&mut |v| { - set.insert(v); - }); + self.add_always_bound_variables(&mut set); set } - pub fn lookup_always_bound_variables(&self, callback: &mut impl FnMut(usize)) { + pub fn add_always_bound_variables(&self, set: &mut BTreeSet) { match self { Self::StaticBindings { tuples } => { let mut variables = BTreeMap::default(); // value true iff always bound @@ -246,7 +252,7 @@ impl PlanNode { } for (k, v) in variables { if v { - callback(k); + set.insert(k); } } } @@ -257,16 +263,16 @@ impl PlanNode { graph_name, } => { if let PatternValue::Variable(var) = subject { - callback(*var); + set.insert(*var); } if let PatternValue::Variable(var) = predicate { - callback(*var); + set.insert(*var); } if let PatternValue::Variable(var) = object { - callback(*var); + set.insert(*var); } if let PatternValue::Variable(var) = graph_name { - callback(*var); + set.insert(*var); } } Self::PathPattern { @@ -276,18 +282,18 @@ impl PlanNode { .. } => { if let PatternValue::Variable(var) = subject { - callback(*var); + set.insert(*var); } if let PatternValue::Variable(var) = object { - callback(*var); + set.insert(*var); } if let PatternValue::Variable(var) = graph_name { - callback(*var); + set.insert(*var); } } Self::Filter { child, .. } => { //TODO: have a look at the expression to know if it filters out unbound variables - child.lookup_always_bound_variables(callback); + child.add_always_bound_variables(set); } Self::Union { children } => { if let Some(vars) = children @@ -296,16 +302,16 @@ impl PlanNode { .reduce(|a, b| a.intersection(&b).copied().collect()) { for v in vars { - callback(v); + set.insert(v); } } } Self::HashJoin { left, right } | Self::ForLoopJoin { left, right, .. } => { - left.lookup_always_bound_variables(callback); - right.lookup_always_bound_variables(callback); + left.add_always_bound_variables(set); + right.add_always_bound_variables(set); } Self::AntiJoin { left, .. } | Self::LeftJoin { left, .. } => { - left.lookup_always_bound_variables(callback); + left.add_always_bound_variables(set); } Self::Extend { child, @@ -314,27 +320,37 @@ impl PlanNode { } => { if matches!(expression.as_ref(), PlanExpression::Constant(_)) { // TODO: more cases? - callback(*position); + set.insert(*position); } - child.lookup_always_bound_variables(callback); + child.add_always_bound_variables(set); } Self::Sort { child, .. } | Self::HashDeduplicate { child } | Self::Reduced { child } | Self::Skip { child, .. } - | Self::Limit { child, .. } => child.lookup_always_bound_variables(callback), + | Self::Limit { child, .. } => child.add_always_bound_variables(set), Self::Service { child, silent, .. } => { if *silent { // none, might return a null tuple } else { - child.lookup_always_bound_variables(callback) + child.add_always_bound_variables(set) } } - Self::Project { mapping, child } => { - let child_bound = child.always_bound_variables(); + Self::Project { + mapping, + child, + lateral_mapping, + } => { + let mut child_bound = BTreeSet::new(); + for (child_i, output_i) in lateral_mapping.iter() { + if set.contains(output_i) { + child_bound.insert(*child_i); + } + } + child.add_always_bound_variables(&mut child_bound); for (child_i, output_i) in mapping.iter() { if child_bound.contains(child_i) { - callback(*output_i); + set.insert(*output_i); } } } @@ -344,14 +360,12 @@ impl PlanNode { } } - pub fn is_variable_bound(&self, variable: usize) -> bool { - let mut found = false; - self.lookup_always_bound_variables(&mut |v| { - if v == variable { - found = true; - } - }); - found + pub fn are_all_variable_bound<'a>( + &self, + variables: impl IntoIterator, + ) -> bool { + let bound = self.always_bound_variables(); + variables.into_iter().all(|v| bound.contains(v)) } } @@ -459,10 +473,17 @@ pub enum PlanExpression { } impl PlanExpression { - pub fn lookup_used_variables(&self, callback: &mut impl FnMut(usize)) { + /// Returns variables that are used in the expression + pub fn used_variables(&self) -> BTreeSet { + let mut set = BTreeSet::default(); + self.add_used_variables(&mut set); + set + } + + pub fn add_used_variables(&self, set: &mut BTreeSet) { match self { Self::Variable(v) | Self::Bound(v) => { - callback(*v); + set.insert(*v); } Self::Constant(_) | Self::Rand @@ -518,7 +539,7 @@ impl PlanExpression { | Self::DurationCast(e) | Self::YearMonthDurationCast(e) | Self::DayTimeDurationCast(e) - | Self::StringCast(e) => e.lookup_used_variables(callback), + | Self::StringCast(e) => e.add_used_variables(set), Self::Or(a, b) | Self::And(a, b) | Self::Equal(a, b) @@ -541,31 +562,31 @@ impl PlanExpression { | Self::SameTerm(a, b) | Self::SubStr(a, b, None) | Self::Regex(a, b, None) => { - a.lookup_used_variables(callback); - b.lookup_used_variables(callback); + a.add_used_variables(set); + b.add_used_variables(set); } Self::If(a, b, c) | Self::SubStr(a, b, Some(c)) | Self::Regex(a, b, Some(c)) | Self::Replace(a, b, c, None) | Self::Triple(a, b, c) => { - a.lookup_used_variables(callback); - b.lookup_used_variables(callback); - c.lookup_used_variables(callback); + a.add_used_variables(set); + b.add_used_variables(set); + c.add_used_variables(set); } Self::Replace(a, b, c, Some(d)) => { - a.lookup_used_variables(callback); - b.lookup_used_variables(callback); - c.lookup_used_variables(callback); - d.lookup_used_variables(callback); + a.add_used_variables(set); + b.add_used_variables(set); + c.add_used_variables(set); + d.add_used_variables(set); } Self::Concat(es) | Self::Coalesce(es) | Self::CustomFunction(_, es) => { for e in es { - e.lookup_used_variables(callback); + e.add_used_variables(set); } } Self::Exists(e) => { - e.lookup_used_variables(callback); + e.add_used_variables(set); } } } diff --git a/lib/src/sparql/plan_builder.rs b/lib/src/sparql/plan_builder.rs index fd96b0af..d223f213 100644 --- a/lib/src/sparql/plan_builder.rs +++ b/lib/src/sparql/plan_builder.rs @@ -93,6 +93,10 @@ impl<'a> PlanBuilder<'a> { self.build_for_graph_pattern(left, variables, graph_name)?, self.build_for_graph_pattern(right, variables, graph_name)?, ), + GraphPattern::Sequence { left, right } => PlanNode::ForLoopJoin { + left: Box::new(self.build_for_graph_pattern(left, variables, graph_name)?), + right: Box::new(self.build_for_graph_pattern(right, variables, graph_name)?), + }, GraphPattern::LeftJoin { left, right, @@ -220,6 +224,7 @@ impl<'a> PlanBuilder<'a> { GraphPattern::Project { inner, variables: projection, + lateral_variables, } => { let mut inner_variables = projection.clone(); let inner_graph_name = @@ -239,6 +244,17 @@ impl<'a> PlanBuilder<'a> { }) .collect(), ), + lateral_mapping: Rc::new( + lateral_variables + .iter() + .map(|variable| { + ( + variable_key(&mut inner_variables, variable), + variable_key(variables, variable), + ) + }) + .collect(), + ), } } GraphPattern::Distinct { inner } => PlanNode::HashDeduplicate { @@ -1054,11 +1070,11 @@ impl<'a> PlanBuilder<'a> { | PlanNode::PathPattern { .. } => (), PlanNode::Filter { child, expression } => { let always_already_bound = child.always_bound_variables(); - expression.lookup_used_variables(&mut |v| { + for v in expression.used_variables() { if !always_already_bound.contains(&v) { set.insert(v); } - }); + } Self::add_left_join_problematic_variables(child, set); } PlanNode::Union { children } => { @@ -1075,20 +1091,20 @@ impl<'a> PlanBuilder<'a> { } PlanNode::LeftJoin { left, right, .. } => { Self::add_left_join_problematic_variables(left, set); - right.lookup_used_variables(&mut |v| { - set.insert(v); - }); + right.add_used_variables(set); } PlanNode::Extend { - child, expression, .. + child, + expression, + position, } => { let always_already_bound = child.always_bound_variables(); - expression.lookup_used_variables(&mut |v| { + for v in expression.used_variables() { if !always_already_bound.contains(&v) { set.insert(v); } - }); - Self::add_left_join_problematic_variables(child, set); + } + set.insert(*position); //TODO: too strict Self::add_left_join_problematic_variables(child, set); } PlanNode::Sort { child, .. } @@ -1100,18 +1116,25 @@ impl<'a> PlanBuilder<'a> { } PlanNode::Service { child, silent, .. } => { if *silent { - child.lookup_used_variables(&mut |v| { - set.insert(v); - }); + child.add_used_variables(set); } else { Self::add_left_join_problematic_variables(child, set) } } - PlanNode::Project { mapping, child } => { - let mut child_bound = BTreeSet::new(); - Self::add_left_join_problematic_variables(child, &mut child_bound); + PlanNode::Project { + mapping, + child, + lateral_mapping, + } => { + let mut child_problematic_set = BTreeSet::new(); + for (child_i, output_i) in lateral_mapping.iter() { + if set.contains(output_i) { + child_problematic_set.insert(*child_i); + } + } + Self::add_left_join_problematic_variables(child, &mut child_problematic_set); for (child_i, output_i) in mapping.iter() { - if child_bound.contains(child_i) { + if child_problematic_set.contains(child_i) { set.insert(*output_i); } } @@ -1194,14 +1217,11 @@ impl<'a> PlanBuilder<'a> { if let PlanExpression::And(f1, f2) = *filter { return Self::push_filter(Box::new(Self::push_filter(node, f1)), f2); } - let mut filter_variables = BTreeSet::new(); - filter.lookup_used_variables(&mut |v| { - filter_variables.insert(v); - }); + let filter_variables = filter.used_variables(); match *node { PlanNode::HashJoin { left, right } => { - if filter_variables.iter().all(|v| left.is_variable_bound(*v)) { - if filter_variables.iter().all(|v| right.is_variable_bound(*v)) { + if left.are_all_variable_bound(&filter_variables) { + if right.are_all_variable_bound(&filter_variables) { PlanNode::HashJoin { left: Box::new(Self::push_filter(left, filter.clone())), right: Box::new(Self::push_filter(right, filter)), @@ -1212,7 +1232,7 @@ impl<'a> PlanBuilder<'a> { right, } } - } else if filter_variables.iter().all(|v| right.is_variable_bound(*v)) { + } else if right.are_all_variable_bound(&filter_variables) { PlanNode::HashJoin { left, right: Box::new(Self::push_filter(right, filter)), @@ -1225,12 +1245,12 @@ impl<'a> PlanBuilder<'a> { } } PlanNode::ForLoopJoin { left, right } => { - if filter_variables.iter().all(|v| left.is_variable_bound(*v)) { + if left.are_all_variable_bound(&filter_variables) { PlanNode::ForLoopJoin { left: Box::new(Self::push_filter(left, filter)), right, } - } else if filter_variables.iter().all(|v| right.is_variable_bound(*v)) { + } else if right.are_all_variable_bound(&filter_variables) { PlanNode::ForLoopJoin { //TODO: should we do that always? left, @@ -1249,7 +1269,7 @@ impl<'a> PlanBuilder<'a> { position, } => { //TODO: handle the case where the filter generates an expression variable - if filter_variables.iter().all(|v| child.is_variable_bound(*v)) { + if child.are_all_variable_bound(&filter_variables) { PlanNode::Extend { child: Box::new(Self::push_filter(child, filter)), expression, @@ -1267,7 +1287,7 @@ impl<'a> PlanBuilder<'a> { } } PlanNode::Filter { child, expression } => { - if filter_variables.iter().all(|v| child.is_variable_bound(*v)) { + if child.are_all_variable_bound(&filter_variables) { PlanNode::Filter { child: Box::new(Self::push_filter(child, filter)), expression, diff --git a/testsuite/oxigraph-tests/sparql-results/manifest.ttl b/testsuite/oxigraph-tests/sparql-results/manifest.ttl index 03e276ec..f6e99bc3 100644 --- a/testsuite/oxigraph-tests/sparql-results/manifest.ttl +++ b/testsuite/oxigraph-tests/sparql-results/manifest.ttl @@ -5,7 +5,7 @@ @prefix ox: . <> rdf:type mf:Manifest ; - rdfs:label "Oxigraph SPARQL resutls tests" ; + rdfs:label "Oxigraph SPARQL results tests" ; mf:entries ( :results_json_duplicated_variables diff --git a/testsuite/oxigraph-tests/sparql/lateral/basic.rq b/testsuite/oxigraph-tests/sparql/lateral/basic.rq new file mode 100644 index 00000000..b84e4862 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql/lateral/basic.rq @@ -0,0 +1,6 @@ +PREFIX ex: + +SELECT ?s ?o WHERE { + ?s a ex:T. + OX_LATERAL(?s) {SELECT ?o WHERE { ?s ex:p ?o } ORDER BY ?o LIMIT 2} +} \ No newline at end of file diff --git a/testsuite/oxigraph-tests/sparql/lateral/basic.srx b/testsuite/oxigraph-tests/sparql/lateral/basic.srx new file mode 100644 index 00000000..6c9cf41f --- /dev/null +++ b/testsuite/oxigraph-tests/sparql/lateral/basic.srx @@ -0,0 +1,41 @@ + + + + + + + + + + http://example.org/s1 + + + 11 + + + + + http://example.org/s1 + + + 12 + + + + + http://example.org/s2 + + + 21 + + + + + http://example.org/s2 + + + 22 + + + + \ No newline at end of file diff --git a/testsuite/oxigraph-tests/sparql/lateral/basic_input.ttl b/testsuite/oxigraph-tests/sparql/lateral/basic_input.ttl new file mode 100644 index 00000000..c3c9fb07 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql/lateral/basic_input.ttl @@ -0,0 +1,5 @@ +@prefix ex: . + +ex:s1 a ex:T ; ex:p 11 , 12 , 13 . +ex:s2 a ex:T ; ex:p 21 , 22 , 23 . +ex:s3 a ex:T . \ No newline at end of file diff --git a/testsuite/oxigraph-tests/sparql/lateral/basic_optional.rq b/testsuite/oxigraph-tests/sparql/lateral/basic_optional.rq new file mode 100644 index 00000000..283a5a5c --- /dev/null +++ b/testsuite/oxigraph-tests/sparql/lateral/basic_optional.rq @@ -0,0 +1,6 @@ +PREFIX ex: + +SELECT ?s ?o WHERE { + ?s a ex:T. + OPTIONAL { OX_LATERAL(?s) {SELECT ?o WHERE { ?s ex:p ?o } ORDER BY ?o LIMIT 2} } +} \ No newline at end of file diff --git a/testsuite/oxigraph-tests/sparql/lateral/basic_optional.srx b/testsuite/oxigraph-tests/sparql/lateral/basic_optional.srx new file mode 100644 index 00000000..11630eb9 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql/lateral/basic_optional.srx @@ -0,0 +1,46 @@ + + + + + + + + + + http://example.org/s1 + + + 11 + + + + + http://example.org/s1 + + + 12 + + + + + http://example.org/s2 + + + 21 + + + + + http://example.org/s2 + + + 22 + + + + + http://example.org/s3 + + + + \ No newline at end of file diff --git a/testsuite/oxigraph-tests/sparql/lateral/explicit_aggregate.rq b/testsuite/oxigraph-tests/sparql/lateral/explicit_aggregate.rq new file mode 100644 index 00000000..0228e61d --- /dev/null +++ b/testsuite/oxigraph-tests/sparql/lateral/explicit_aggregate.rq @@ -0,0 +1,6 @@ +PREFIX ex: + +SELECT ?s ?c WHERE { + ?s a ex:T. + OX_LATERAL(?s) {SELECT (MAX(?o) AS ?c) WHERE { ?s ex:p ?o } GROUP BY ?s} +} \ No newline at end of file diff --git a/testsuite/oxigraph-tests/sparql/lateral/explicit_aggregate.srx b/testsuite/oxigraph-tests/sparql/lateral/explicit_aggregate.srx new file mode 100644 index 00000000..8523de72 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql/lateral/explicit_aggregate.srx @@ -0,0 +1,25 @@ + + + + + + + + + + http://example.org/s1 + + + 13 + + + + + http://example.org/s2 + + + 23 + + + + \ No newline at end of file diff --git a/testsuite/oxigraph-tests/sparql/lateral/implicit_aggregate.rq b/testsuite/oxigraph-tests/sparql/lateral/implicit_aggregate.rq new file mode 100644 index 00000000..e53b3bc3 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql/lateral/implicit_aggregate.rq @@ -0,0 +1,6 @@ +PREFIX ex: + +SELECT ?s ?c WHERE { + ?s a ex:T. + OX_LATERAL(?s) {SELECT (MAX(?o) AS ?c) WHERE { ?s ex:p ?o }} +} \ No newline at end of file diff --git a/testsuite/oxigraph-tests/sparql/lateral/implicit_aggregate.srx b/testsuite/oxigraph-tests/sparql/lateral/implicit_aggregate.srx new file mode 100644 index 00000000..aeec93c3 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql/lateral/implicit_aggregate.srx @@ -0,0 +1,30 @@ + + + + + + + + + + http://example.org/s1 + + + 13 + + + + + http://example.org/s2 + + + 23 + + + + + http://example.org/s3 + + + + \ No newline at end of file diff --git a/testsuite/oxigraph-tests/sparql/lateral/manifest.ttl b/testsuite/oxigraph-tests/sparql/lateral/manifest.ttl new file mode 100644 index 00000000..3b68bf58 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql/lateral/manifest.ttl @@ -0,0 +1,44 @@ +@prefix rdf: . +@prefix : . +@prefix rdfs: . +@prefix mf: . +@prefix qt: . +@prefix ut: . + +<> rdf:type mf:Manifest ; + rdfs:label "Oxigraph OX_LATERAL feature SPARQL tests" ; + mf:entries + ( + :basic + :basic_optional + :implicit_aggregate + :explicit_aggregate + ) . + +:basic rdf:type mf:QueryEvaluationTest ; + mf:name "Basic OX_LATERAL test" ; + mf:action + [ qt:query ; + qt:data ] ; + mf:result . + +:basic_optional rdf:type mf:QueryEvaluationTest ; + mf:name "Basic OX_LATERAL test inside optional" ; + mf:action + [ qt:query ; + qt:data ] ; + mf:result . + +:implicit_aggregate rdf:type mf:QueryEvaluationTest ; + mf:name "OX_LATERAL test with implicit aggregate" ; + mf:action + [ qt:query ; + qt:data ] ; + mf:result . + +:explicit_aggregate rdf:type mf:QueryEvaluationTest ; + mf:name "OX_LATERAL test with explicit aggregate" ; + mf:action + [ qt:query ; + qt:data ] ; + mf:result . diff --git a/testsuite/oxigraph-tests/sparql/manifest.ttl b/testsuite/oxigraph-tests/sparql/manifest.ttl index 621c63e9..540fe294 100644 --- a/testsuite/oxigraph-tests/sparql/manifest.ttl +++ b/testsuite/oxigraph-tests/sparql/manifest.ttl @@ -7,6 +7,7 @@ <> rdf:type mf:Manifest ; rdfs:label "Oxigraph SPARQL tests" ; + mf:include ( ) ; mf:entries ( :small_unicode_escape_with_multibytes_char