diff --git a/lib/src/sparql/algebra.rs b/lib/src/sparql/algebra.rs index e5c4773e..19b57f03 100644 --- a/lib/src/sparql/algebra.rs +++ b/lib/src/sparql/algebra.rs @@ -554,7 +554,7 @@ impl fmt::Display for Function { pub enum GraphPattern { BGP(Vec), Join(Box, Box), - LeftJoin(Box, Box, Expression), + LeftJoin(Box, Box, Option), Filter(Expression, Box), Union(Box, Box), Graph(NamedNodeOrVariable, Box), @@ -582,7 +582,13 @@ impl fmt::Display for GraphPattern { .join(" . ") ), GraphPattern::Join(a, b) => write!(f, "Join({}, {})", a, b), - GraphPattern::LeftJoin(a, b, e) => write!(f, "LeftJoin({}, {}, {})", a, b, e), + GraphPattern::LeftJoin(a, b, e) => { + if let Some(e) = e { + write!(f, "LeftJoin({}, {}, {})", a, b, e) + } else { + write!(f, "LeftJoin({}, {})", a, b) + } + } GraphPattern::Filter(e, p) => write!(f, "Filter({}, {})", e, p), GraphPattern::Union(a, b) => write!(f, "Union({}, {})", a, b), GraphPattern::Graph(g, p) => write!(f, "Graph({}, {})", g, p), @@ -750,13 +756,24 @@ impl<'a> fmt::Display for SparqlGraphPattern<'a> { SparqlGraphPattern(&*a), SparqlGraphPattern(&*b) ), - GraphPattern::LeftJoin(a, b, e) => write!( - f, - "{} OPTIONAL {{ {} FILTER({}) }}", - SparqlGraphPattern(&*a), - SparqlGraphPattern(&*b), - SparqlExpression(e) - ), + GraphPattern::LeftJoin(a, b, e) => { + if let Some(e) = e { + write!( + f, + "{} OPTIONAL {{ {} FILTER({}) }}", + SparqlGraphPattern(&*a), + SparqlGraphPattern(&*b), + SparqlExpression(e) + ) + } else { + write!( + f, + "{} OPTIONAL {{ {} }}", + SparqlGraphPattern(&*a), + SparqlGraphPattern(&*b) + ) + } + } GraphPattern::Filter(e, p) => write!( f, "{} FILTER({})", diff --git a/lib/src/sparql/eval.rs b/lib/src/sparql/eval.rs index 3414b593..c513fba8 100644 --- a/lib/src/sparql/eval.rs +++ b/lib/src/sparql/eval.rs @@ -303,22 +303,21 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { right, possible_problem_vars, } => { - let problem_vars = bind_variables_in_set(&from, possible_problem_vars); - let mut filtered_from = from.clone(); - unbind_variables(&mut filtered_from, &problem_vars); - let iter = LeftJoinIterator { - eval: self, - right_plan: &*right, - left_iter: self.eval_plan(&*left, filtered_from), - current_right: Box::new(empty()), - }; - if problem_vars.is_empty() { - Box::new(iter) + if possible_problem_vars.is_empty() { + Box::new(LeftJoinIterator { + eval: self, + right_plan: &*right, + left_iter: self.eval_plan(&*left, from), + current_right: Box::new(empty()), + }) } else { Box::new(BadLeftJoinIterator { - input: from, - iter, - problem_vars, + eval: self, + right_plan: &*right, + left_iter: self.eval_plan(&*left, from), + current_left: None, + current_right: Box::new(empty()), + problem_vars: possible_problem_vars.as_slice(), }) } } @@ -1959,19 +1958,27 @@ fn put_variable_value( } } -fn bind_variables_in_set(binding: &EncodedTuple, set: &[usize]) -> Vec { - set.iter() - .cloned() - .filter(|key| binding.contains(*key)) - .collect() -} - fn unbind_variables(binding: &mut EncodedTuple, variables: &[usize]) { for var in variables { binding.unset(*var) } } +fn combine_tuples(mut a: EncodedTuple, b: &EncodedTuple, vars: &[usize]) -> Option { + for var in vars { + if let Some(b_value) = b.get(*var) { + if let Some(a_value) = a.get(*var) { + if a_value != b_value { + return None; + } + } else { + a.set(*var, b_value); + } + } + } + Some(a) +} + pub fn are_compatible_and_not_disjointed(a: &EncodedTuple, b: &EncodedTuple) -> bool { let mut found_intersection = false; for (a_value, b_value) in a.iter().zip(b.iter()) { @@ -2066,37 +2073,53 @@ impl<'a, S: StoreConnection> Iterator for LeftJoinIterator<'a, S> { } struct BadLeftJoinIterator<'a, S: StoreConnection> { - input: EncodedTuple, - iter: LeftJoinIterator<'a, S>, - problem_vars: Vec, + eval: &'a SimpleEvaluator, + right_plan: &'a PlanNode, + left_iter: EncodedTuplesIterator<'a>, + current_left: Option, + current_right: EncodedTuplesIterator<'a>, + problem_vars: &'a [usize], } impl<'a, S: StoreConnection> Iterator for BadLeftJoinIterator<'a, S> { type Item = Result; fn next(&mut self) -> Option> { - loop { - match self.iter.next()? { - Ok(mut tuple) => { - let mut conflict = false; - for problem_var in &self.problem_vars { - if let Some(input_value) = self.input.get(*problem_var) { - if let Some(result_value) = tuple.get(*problem_var) { - if input_value != result_value { - conflict = true; - continue; //Binding conflict - } - } else { - tuple.set(*problem_var, input_value); + while let Some(right_tuple) = self.current_right.next() { + match right_tuple { + Ok(right_tuple) => { + if let Some(combined) = combine_tuples( + right_tuple, + self.current_left.as_ref().unwrap(), + self.problem_vars, + ) { + return Some(Ok(combined)); + } + } + Err(error) => return Some(Err(error)), + } + } + match self.left_iter.next()? { + Ok(left_tuple) => { + let mut filtered_left = left_tuple.clone(); + unbind_variables(&mut filtered_left, self.problem_vars); + self.current_right = self.eval.eval_plan(self.right_plan, filtered_left); + while let Some(right_tuple) = self.current_right.next() { + match right_tuple { + Ok(right_tuple) => { + if let Some(combined) = + combine_tuples(right_tuple, &left_tuple, self.problem_vars) + { + self.current_left = Some(left_tuple); + return Some(Ok(combined)); } } - } - if !conflict { - return Some(Ok(tuple)); + Err(error) => return Some(Err(error)), } } - Err(error) => return Some(Err(error)), + Some(Ok(left_tuple)) } + Err(error) => Some(Err(error)), } } } diff --git a/lib/src/sparql/parser.rs b/lib/src/sparql/parser.rs index 3f59d5db..25f0c290 100644 --- a/lib/src/sparql/parser.rs +++ b/lib/src/sparql/parser.rs @@ -132,7 +132,7 @@ impl> From> for FocusedTripleOrPathPattern #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] enum PartialGraphPattern { - Optional(GraphPattern), + Optional(GraphPattern, Option), Minus(GraphPattern), Bind(Expression, Variable), Filter(Expression), @@ -776,13 +776,8 @@ parser! { let mut g = GraphPattern::default(); for e in p { match e { - PartialGraphPattern::Optional(p) => match p { - GraphPattern::Filter(f, a2) => { - g = GraphPattern::LeftJoin(Box::new(g), a2, f) - } - a => { - g = GraphPattern::LeftJoin(Box::new(g), Box::new(a), Literal::from(true).into()) - } + PartialGraphPattern::Optional(p, f) => { + g = GraphPattern::LeftJoin(Box::new(g), Box::new(p), f) } PartialGraphPattern::Minus(p) => { g = GraphPattern::Minus(Box::new(g), Box::new(p)) @@ -834,7 +829,11 @@ parser! { //[57] rule OptionalGraphPattern() -> PartialGraphPattern = i("OPTIONAL") _ p:GroupGraphPattern() { - PartialGraphPattern::Optional(p) + if let GraphPattern::Filter(f, p) = p { + PartialGraphPattern::Optional(*p, Some(f)) + } else { + PartialGraphPattern::Optional(p, None) + } } //[58] diff --git a/lib/src/sparql/plan.rs b/lib/src/sparql/plan.rs index 606300d0..5577d032 100644 --- a/lib/src/sparql/plan.rs +++ b/lib/src/sparql/plan.rs @@ -96,7 +96,7 @@ impl PlanNode { set } - fn add_maybe_bound_variables(&self, set: &mut BTreeSet) { + pub fn add_maybe_bound_variables(&self, set: &mut BTreeSet) { match self { PlanNode::Init => (), PlanNode::StaticBindings { tuples } => { @@ -147,7 +147,10 @@ impl PlanNode { } child.add_maybe_bound_variables(set); } - PlanNode::Filter { child, .. } => child.add_maybe_bound_variables(set), + PlanNode::Filter { child, expression } => { + expression.add_maybe_bound_variables(set); + child.add_maybe_bound_variables(set); + } PlanNode::Union { children } => { for child in children { child.add_maybe_bound_variables(set); @@ -160,9 +163,12 @@ impl PlanNode { right.add_maybe_bound_variables(set); } PlanNode::Extend { - child, position, .. + child, + position, + expression, } => { set.insert(*position); + expression.add_maybe_bound_variables(set); child.add_maybe_bound_variables(set); } PlanNode::Service { child, .. } @@ -307,6 +313,120 @@ pub enum PlanExpression { StringCast(Box), } +impl PlanExpression { + pub fn add_maybe_bound_variables(&self, set: &mut BTreeSet) { + match self { + PlanExpression::Variable(v) | PlanExpression::Bound(v) => { + set.insert(*v); + } + PlanExpression::Constant(_) + | PlanExpression::Rand + | PlanExpression::Now + | PlanExpression::UUID + | PlanExpression::StrUUID + | PlanExpression::BNode(None) => (), + PlanExpression::UnaryPlus(e) + | PlanExpression::UnaryMinus(e) + | PlanExpression::UnaryNot(e) + | PlanExpression::BNode(Some(e)) + | PlanExpression::Str(e) + | PlanExpression::Lang(e) + | PlanExpression::Datatype(e) + | PlanExpression::IRI(e) + | PlanExpression::Abs(e) + | PlanExpression::Ceil(e) + | PlanExpression::Floor(e) + | PlanExpression::Round(e) + | PlanExpression::UCase(e) + | PlanExpression::LCase(e) + | PlanExpression::StrLen(e) + | PlanExpression::EncodeForURI(e) + | PlanExpression::Year(e) + | PlanExpression::Month(e) + | PlanExpression::Day(e) + | PlanExpression::Hours(e) + | PlanExpression::Minutes(e) + | PlanExpression::Seconds(e) + | PlanExpression::Timezone(e) + | PlanExpression::Tz(e) + | PlanExpression::MD5(e) + | PlanExpression::SHA1(e) + | PlanExpression::SHA256(e) + | PlanExpression::SHA384(e) + | PlanExpression::SHA512(e) + | PlanExpression::IsIRI(e) + | PlanExpression::IsBlank(e) + | PlanExpression::IsLiteral(e) + | PlanExpression::IsNumeric(e) + | PlanExpression::BooleanCast(e) + | PlanExpression::DoubleCast(e) + | PlanExpression::FloatCast(e) + | PlanExpression::DecimalCast(e) + | PlanExpression::IntegerCast(e) + | PlanExpression::DateCast(e) + | PlanExpression::TimeCast(e) + | PlanExpression::DateTimeCast(e) + | PlanExpression::DurationCast(e) + | PlanExpression::StringCast(e) => e.add_maybe_bound_variables(set), + PlanExpression::Or(a, b) + | PlanExpression::And(a, b) + | PlanExpression::Equal(a, b) + | PlanExpression::NotEqual(a, b) + | PlanExpression::Greater(a, b) + | PlanExpression::GreaterOrEq(a, b) + | PlanExpression::Lower(a, b) + | PlanExpression::LowerOrEq(a, b) + | PlanExpression::Add(a, b) + | PlanExpression::Sub(a, b) + | PlanExpression::Mul(a, b) + | PlanExpression::Div(a, b) + | PlanExpression::LangMatches(a, b) + | PlanExpression::Contains(a, b) + | PlanExpression::StrStarts(a, b) + | PlanExpression::StrEnds(a, b) + | PlanExpression::StrBefore(a, b) + | PlanExpression::StrAfter(a, b) + | PlanExpression::StrLang(a, b) + | PlanExpression::StrDT(a, b) + | PlanExpression::SameTerm(a, b) + | PlanExpression::SubStr(a, b, None) + | PlanExpression::Regex(a, b, None) => { + a.add_maybe_bound_variables(set); + b.add_maybe_bound_variables(set); + } + PlanExpression::If(a, b, c) + | PlanExpression::SubStr(a, b, Some(c)) + | PlanExpression::Regex(a, b, Some(c)) + | PlanExpression::Replace(a, b, c, None) => { + a.add_maybe_bound_variables(set); + b.add_maybe_bound_variables(set); + c.add_maybe_bound_variables(set); + } + PlanExpression::Replace(a, b, c, Some(d)) => { + a.add_maybe_bound_variables(set); + b.add_maybe_bound_variables(set); + c.add_maybe_bound_variables(set); + d.add_maybe_bound_variables(set); + } + + PlanExpression::Concat(es) | PlanExpression::Coalesce(es) => { + for e in es { + e.add_maybe_bound_variables(set); + } + } + PlanExpression::In(a, bs) => { + a.add_maybe_bound_variables(set); + for b in bs { + b.add_maybe_bound_variables(set); + } + } + PlanExpression::Exists(e) => { + e.add_maybe_bound_variables(set); + } + } + } +} + #[derive(Eq, PartialEq, Debug, Clone, Hash)] pub struct PlanAggregation { pub function: PlanAggregationFunction, diff --git a/lib/src/sparql/plan_builder.rs b/lib/src/sparql/plan_builder.rs index ae1c9130..9f17cf87 100644 --- a/lib/src/sparql/plan_builder.rs +++ b/lib/src/sparql/plan_builder.rs @@ -1,5 +1,4 @@ use crate::model::vocab::xsd; -use crate::model::Literal; use crate::sparql::algebra::*; use crate::sparql::model::*; use crate::sparql::plan::PlanPropertyPath; @@ -7,7 +6,7 @@ use crate::sparql::plan::*; use crate::store::numeric_encoder::{Encoder, ENCODED_DEFAULT_GRAPH}; use crate::Error; use crate::Result; -use std::collections::HashSet; +use std::collections::BTreeSet; pub struct PlanBuilder { encoder: E, @@ -47,25 +46,24 @@ impl PlanBuilder { GraphPattern::LeftJoin(a, b, e) => { let left = self.build_for_graph_pattern(a, variables, graph_name)?; let right = self.build_for_graph_pattern(b, variables, graph_name)?; + + let mut possible_problem_vars = BTreeSet::new(); + self.add_left_join_problematic_variables(&right, &mut possible_problem_vars); + //We add the extra filter if needed - let right = if *e == Expression::from(Literal::from(true)) { - right - } else { + let right = if let Some(e) = e { PlanNode::Filter { child: Box::new(right), expression: self.build_for_expression(e, variables, graph_name)?, } + } else { + right }; - let possible_problem_vars = right - .maybe_bound_variables() - .difference(&left.maybe_bound_variables()) - .cloned() - .collect(); PlanNode::LeftJoin { left: Box::new(left), right: Box::new(right), - possible_problem_vars, + possible_problem_vars: possible_problem_vars.into_iter().collect(), } } GraphPattern::Filter(e, p) => PlanNode::Filter { @@ -909,6 +907,69 @@ impl PlanBuilder { to.len() - 1 } } + + fn add_left_join_problematic_variables(&self, node: &PlanNode, set: &mut BTreeSet) { + match node { + PlanNode::Init + | PlanNode::StaticBindings { .. } + | PlanNode::QuadPatternJoin { .. } + | PlanNode::PathPatternJoin { .. } => (), + PlanNode::Filter { child, expression } => { + expression.add_maybe_bound_variables(set); //TODO: only if it is not already bound + self.add_left_join_problematic_variables(&*child, set); + } + PlanNode::Union { children } => { + for child in children { + self.add_left_join_problematic_variables(&*child, set); + } + } + PlanNode::Join { left, right, .. } => { + self.add_left_join_problematic_variables(&*left, set); + self.add_left_join_problematic_variables(&*right, set); + } + PlanNode::AntiJoin { left, .. } => { + self.add_left_join_problematic_variables(&*left, set); + } + PlanNode::LeftJoin { left, right, .. } => { + self.add_left_join_problematic_variables(&*left, set); + right.add_maybe_bound_variables(set); + } + PlanNode::Extend { + child, expression, .. + } => { + expression.add_maybe_bound_variables(set); //TODO: only if it is not already bound + self.add_left_join_problematic_variables(&*child, set); + self.add_left_join_problematic_variables(&*child, set); + } + PlanNode::Service { child, .. } + | PlanNode::Sort { child, .. } + | PlanNode::HashDeduplicate { child } + | PlanNode::Skip { child, .. } + | PlanNode::Limit { child, .. } => { + self.add_left_join_problematic_variables(&*child, set) + } + PlanNode::Project { mapping, child } => { + let mut child_bound = BTreeSet::new(); + self.add_left_join_problematic_variables(&*child, &mut child_bound); + for (child_i, output_i) in mapping.iter() { + if child_bound.contains(child_i) { + set.insert(*output_i); + } + } + } + PlanNode::Aggregate { + key_mapping, + aggregates, + .. + } => { + set.extend(key_mapping.iter().map(|(_, o)| o)); + //TODO: This is too harsh + for (_, var) in aggregates { + set.insert(*var); + } + } + } + } } fn variable_key(variables: &mut Vec, variable: &Variable) -> usize { @@ -931,7 +992,7 @@ fn slice_key(slice: &[T], element: &T) -> Option { } fn sort_bgp(p: &[TripleOrPathPattern]) -> Vec<&TripleOrPathPattern> { - let mut assigned_variables = HashSet::default(); + let mut assigned_variables = BTreeSet::default(); let mut new_p: Vec<_> = p.iter().collect(); for i in 0..new_p.len() { @@ -947,7 +1008,7 @@ fn sort_bgp(p: &[TripleOrPathPattern]) -> Vec<&TripleOrPathPattern> { fn count_pattern_binds( pattern: &TripleOrPathPattern, - assigned_variables: &HashSet<&Variable>, + assigned_variables: &BTreeSet<&Variable>, ) -> u8 { let mut count = 12; if let TermOrVariable::Variable(v) = pattern.subject() { @@ -980,7 +1041,7 @@ fn count_pattern_binds( fn add_pattern_variables<'a>( pattern: &'a TripleOrPathPattern, - variables: &mut HashSet<&'a Variable>, + variables: &mut BTreeSet<&'a Variable>, ) { if let TermOrVariable::Variable(v) = pattern.subject() { variables.insert(v); diff --git a/lib/tests/rdf-tests b/lib/tests/rdf-tests index 280e9de3..dc237e31 160000 --- a/lib/tests/rdf-tests +++ b/lib/tests/rdf-tests @@ -1 +1 @@ -Subproject commit 280e9de3aaefa6b292a151bd455204d49a0c09db +Subproject commit dc237e319e6562f2913341f6ba964ecbcbbf4499 diff --git a/lib/tests/sparql_test_cases.rs b/lib/tests/sparql_test_cases.rs index 94681655..6ee725f6 100644 --- a/lib/tests/sparql_test_cases.rs +++ b/lib/tests/sparql_test_cases.rs @@ -78,6 +78,7 @@ fn sparql_w3c_query_evaluation_testsuite() -> Result<()> { "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/i18n/manifest.ttl", "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/open-world/manifest.ttl", "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional/manifest.ttl", + "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional-filter/manifest.ttl", "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/reduced/manifest.ttl", "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/regex/manifest.ttl", "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/solution-seq/manifest.ttl", @@ -134,7 +135,8 @@ fn sparql_w3c_query_evaluation_testsuite() -> Result<()> { NamedNode::parse("http://www.w3.org/2009/sparql/docs/tests/data-sparql11/service/manifest#service5").unwrap(), // We use XSD 1.1 equality on dates NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/open-world/manifest#date-2").unwrap(), - + // We choose to simplify first the nested group patterns in OPTIONAL + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional-filter/manifest#dawg-optional-filter-005-not-simplified").unwrap(), ]; let tests: Result> = manifest_10_urls