From e1332e648755ccc87021284569e105826eb06625 Mon Sep 17 00:00:00 2001 From: Tpt Date: Fri, 12 Oct 2018 16:03:00 +0200 Subject: [PATCH] Makes more SPARQL tests pass --- lib/src/sparql/algebra.rs | 9 ++- lib/src/sparql/eval.rs | 122 ++++++++++++++++++++++++++++++--- lib/src/sparql/plan.rs | 19 +++-- lib/tests/sparql_test_cases.rs | 16 +++++ 4 files changed, 146 insertions(+), 20 deletions(-) diff --git a/lib/src/sparql/algebra.rs b/lib/src/sparql/algebra.rs index a1390f16..3e12ecf8 100644 --- a/lib/src/sparql/algebra.rs +++ b/lib/src/sparql/algebra.rs @@ -1083,9 +1083,12 @@ impl<'a> fmt::Display for SparqlGraphPattern<'a> { } Ok(()) } - GraphPattern::Join(a, b) => { - write!(f, "{} {}", SparqlGraphPattern(&*a), SparqlGraphPattern(&*b)) - } + GraphPattern::Join(a, b) => write!( + f, + "{{ {} }} {{ {} }}", + SparqlGraphPattern(&*a), + SparqlGraphPattern(&*b) + ), GraphPattern::LeftJoin(a, b, e) => write!( f, "{} OPTIONAL {{ {} FILTER({}) }}", diff --git a/lib/src/sparql/eval.rs b/lib/src/sparql/eval.rs index 7c4116bd..e86dd36c 100644 --- a/lib/src/sparql/eval.rs +++ b/lib/src/sparql/eval.rs @@ -92,6 +92,23 @@ impl SimpleEvaluator { Ok(quad) => quad.predicate == quad.object, })) } + if let Some(graph_name) = graph_name { + if graph_name.is_var() { + iter = Box::new(iter.filter(|quad| match quad { + Err(_) => true, + Ok(quad) => { + quad.graph_name != ENCODED_DEFAULT_GRAPH + } + })) + } + } else { + iter = Box::new(iter.filter(|quad| match quad { + Err(_) => true, + Ok(quad) => { + quad.graph_name == ENCODED_DEFAULT_GRAPH + } + })) + } Box::new(iter.map(move |quad| { let quad = quad?; let mut new_tuple = tuple.clone(); @@ -124,6 +141,25 @@ impl SimpleEvaluator { }), ) } + PlanNode::Join { left, right } => { + //TODO: very dumb implementation + let left_iter = self.eval_plan(*left, from.clone()); + let mut left_values = Vec::with_capacity(left_iter.size_hint().0); + let mut errors = Vec::default(); + for result in left_iter { + match result { + Ok(result) => { + left_values.push(result); + } + Err(error) => errors.push(Err(error)), + } + } + Box::new(JoinIterator { + left: left_values, + right_iter: self.eval_plan(*right, from), + buffered_results: errors, + }) + } PlanNode::LeftJoin { left, right } => Box::new(LeftJoinIterator { eval: self.clone(), right_plan: *right, @@ -205,11 +241,7 @@ impl SimpleEvaluator { ) -> Option { match expression { PlanExpression::Constant(t) => Some(*t), - PlanExpression::Variable(v) => if *v < tuple.len() { - tuple[*v] - } else { - None - }, + PlanExpression::Variable(v) => get_tuple_value(*v, tuple), PlanExpression::Or(a, b) => match self.to_bool(self.eval_expression(a, tuple)?) { Some(true) => Some(true.into()), Some(false) => self.eval_expression(b, tuple), @@ -297,7 +329,7 @@ impl SimpleEvaluator { _ => None, }, PlanExpression::Datatype(e) => self.eval_expression(e, tuple)?.datatype(), - PlanExpression::Bound(v) => Some((*v < tuple.len() && tuple[*v].is_some()).into()), + PlanExpression::Bound(v) => Some(has_tuple_value(*v, tuple).into()), PlanExpression::IRI(e) => match self.eval_expression(e, tuple)? { EncodedTerm::NamedNode { iri_id } => Some(EncodedTerm::NamedNode { iri_id }), EncodedTerm::SimpleLiteral { value_id } @@ -559,17 +591,29 @@ enum NumericBinaryOperands { Decimal(Decimal, Decimal), } +fn get_tuple_value(variable: usize, tuple: &[Option]) -> Option { + if variable < tuple.len() { + tuple[variable] + } else { + None + } +} + +fn has_tuple_value(variable: usize, tuple: &[Option]) -> bool { + if variable < tuple.len() { + tuple[variable].is_some() + } else { + false + } +} + fn get_pattern_value( selector: &PatternValue, tuple: &[Option], ) -> Option { match selector { PatternValue::Constant(term) => Some(*term), - PatternValue::Variable(v) => if *v < tuple.len() { - tuple[*v] - } else { - None - }, + PatternValue::Variable(v) => get_tuple_value(*v, tuple), } } @@ -591,6 +635,62 @@ fn put_value(position: usize, value: EncodedTerm, tuple: &mut EncodedTuple) { } } +fn combine_tuples(a: &[Option], b: &[Option]) -> Option { + if a.len() < b.len() { + let mut result = b.to_owned(); + for (key, a_value) in a.into_iter().enumerate() { + if let Some(a_value) = a_value { + match b[key] { + Some(ref b_value) => if a_value != b_value { + return None; + }, + None => result[key] = Some(*a_value), + } + } + } + Some(result) + } else { + let mut result = a.to_owned(); + for (key, b_value) in b.into_iter().enumerate() { + if let Some(b_value) = b_value { + match a[key] { + Some(ref a_value) => if a_value != b_value { + return None; + }, + None => result[key] = Some(*b_value), + } + } + } + Some(result) + } +} + +struct JoinIterator { + left: Vec, + right_iter: EncodedTuplesIterator, + buffered_results: Vec>, +} + +impl Iterator for JoinIterator { + type Item = Result; + + fn next(&mut self) -> Option> { + if let Some(result) = self.buffered_results.pop() { + return Some(result); + } + let right_tuple = match self.right_iter.next()? { + Ok(right_tuple) => right_tuple, + Err(error) => return Some(Err(error)), + }; + for left_tuple in &self.left { + if let Some(result_tuple) = combine_tuples(left_tuple, &right_tuple) { + self.buffered_results.push(Ok(result_tuple)) + } + } + self.next() + } +} + struct LeftJoinIterator { eval: SimpleEvaluator, right_plan: PlanNode, diff --git a/lib/src/sparql/plan.rs b/lib/src/sparql/plan.rs index 1896fe38..c8c7e5ba 100644 --- a/lib/src/sparql/plan.rs +++ b/lib/src/sparql/plan.rs @@ -20,6 +20,10 @@ pub enum PlanNode { object: PatternValue, graph_name: Option, }, + Join { + left: Box, + right: Box, + }, Filter { child: Box, expression: PlanExpression, @@ -202,12 +206,15 @@ impl<'a, S: EncodedQuadsStore> PlanBuilder<'a, S> { } plan } - GraphPattern::Join(a, b) => self.build_for_graph_pattern( - b, - self.build_for_graph_pattern(a, input, variables, graph_name)?, - variables, - graph_name, - )?, + GraphPattern::Join(a, b) => PlanNode::Join { + left: Box::new(self.build_for_graph_pattern( + a, + input.clone(), + variables, + graph_name, + )?), + right: Box::new(self.build_for_graph_pattern(b, input, variables, graph_name)?), + }, GraphPattern::LeftJoin(a, b, e) => { let right = Box::new(self.build_for_graph_pattern( b, diff --git a/lib/tests/sparql_test_cases.rs b/lib/tests/sparql_test_cases.rs index 531c591a..5f1662f2 100644 --- a/lib/tests/sparql_test_cases.rs +++ b/lib/tests/sparql_test_cases.rs @@ -80,6 +80,8 @@ fn sparql_w3c_syntax_testsuite() { #[test] fn sparql_w3c_query_evaluation_testsuite() { let manifest_10_urls = vec![ + Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest.ttl") + .unwrap(), Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/basic/manifest.ttl") .unwrap(), Url::parse( @@ -88,13 +90,23 @@ fn sparql_w3c_query_evaluation_testsuite() { Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/cast/manifest.ttl").unwrap(), Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/distinct/manifest.ttl") .unwrap(), + Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/i18n/manifest.ttl").unwrap(), Url::parse( "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional-filter/manifest.ttl", ).unwrap(), Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional/manifest.ttl") .unwrap(), + Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/triple-match/manifest.ttl") + .unwrap(), ]; let test_blacklist = vec![ + // Bad nested optionals + NamedNode::from_str( + "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest#nested-opt-1", + ).unwrap(), + NamedNode::from_str( + "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest#nested-opt-2", + ).unwrap(), //Multiple writing of the same xsd:integer. Our system does strong normalization. NamedNode::from_str( "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/distinct/manifest#distinct-1", @@ -102,6 +114,10 @@ fn sparql_w3c_query_evaluation_testsuite() { NamedNode::from_str( "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/distinct/manifest#distinct-9", ).unwrap(), + //URI normalization: we are normalizing more strongly + NamedNode::from_str( + "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/i18n/manifest#normalization-3", + ).unwrap(), //Test on curly brace scoping with OPTIONAL filter NamedNode::from_str( "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional-filter/manifest#dawg-optional-filter-005-not-simplified",