From 0fd98c701ac195ca690c2fa1ac0965e0d0bbd92b Mon Sep 17 00:00:00 2001 From: Tpt Date: Tue, 6 Nov 2018 16:40:48 +0100 Subject: [PATCH] Implements SPARQL ORDER BY --- lib/src/sparql/eval.rs | 93 +++++++++++++++++++++++++++++++--- lib/src/sparql/plan.rs | 28 +++++++++- lib/tests/sparql_test_cases.rs | 26 +++++++--- 3 files changed, 131 insertions(+), 16 deletions(-) diff --git a/lib/src/sparql/eval.rs b/lib/src/sparql/eval.rs index 5cf2d395..4e04deaf 100644 --- a/lib/src/sparql/eval.rs +++ b/lib/src/sparql/eval.rs @@ -235,6 +235,41 @@ impl SimpleEvaluator { }), ) } + PlanNode::Sort { child, by } => { + let iter = self.eval_plan(&*child, from); + let mut values = Vec::with_capacity(iter.size_hint().0); + let mut errors = Vec::default(); + for result in iter { + match result { + Ok(result) => { + values.push(result); + } + Err(error) => errors.push(Err(error)), + } + } + values.sort_unstable_by(|a, b| { + for comp in by { + match comp { + Comparator::Asc(expression) => { + match self.cmp_according_to_expression(a, b, &expression) { + Ordering::Greater => return Ordering::Greater, + Ordering::Less => return Ordering::Less, + Ordering::Equal => (), + } + } + Comparator::Desc(expression) => { + match self.cmp_according_to_expression(a, b, &expression) { + Ordering::Greater => return Ordering::Less, + Ordering::Less => return Ordering::Greater, + Ordering::Equal => (), + } + } + } + } + Ordering::Equal + }); + Box::new(errors.into_iter().chain(values.into_iter().map(Ok))) + } PlanNode::HashDeduplicate { child } => { let iter = self.eval_plan(&*child, from); let mut values = HashSet::with_capacity(iter.size_hint().0); @@ -293,22 +328,22 @@ impl SimpleEvaluator { PlanExpression::Equal(a, b) => { let a = self.eval_expression(a, tuple)?; let b = self.eval_expression(b, tuple)?; - Some((a == b || self.partial_cmp_terms(a, b) == Some(Ordering::Equal)).into()) + Some((a == b || self.partial_cmp_literals(a, b) == Some(Ordering::Equal)).into()) } PlanExpression::NotEqual(a, b) => { let a = self.eval_expression(a, tuple)?; let b = self.eval_expression(b, tuple)?; - Some((a != b && self.partial_cmp_terms(a, b) != Some(Ordering::Equal)).into()) + Some((a != b && self.partial_cmp_literals(a, b) != Some(Ordering::Equal)).into()) } PlanExpression::Greater(a, b) => Some( - (self.partial_cmp_terms( + (self.partial_cmp_literals( self.eval_expression(a, tuple)?, self.eval_expression(b, tuple)?, )? == Ordering::Greater) .into(), ), PlanExpression::GreaterOrEq(a, b) => Some( - match self.partial_cmp_terms( + match self.partial_cmp_literals( self.eval_expression(a, tuple)?, self.eval_expression(b, tuple)?, )? { @@ -317,14 +352,14 @@ impl SimpleEvaluator { }.into(), ), PlanExpression::Lower(a, b) => Some( - (self.partial_cmp_terms( + (self.partial_cmp_literals( self.eval_expression(a, tuple)?, self.eval_expression(b, tuple)?, )? == Ordering::Less) .into(), ), PlanExpression::LowerOrEq(a, b) => Some( - match self.partial_cmp_terms( + match self.partial_cmp_literals( self.eval_expression(a, tuple)?, self.eval_expression(b, tuple)?, )? { @@ -680,12 +715,46 @@ impl SimpleEvaluator { ) } - fn partial_cmp_terms(&self, a: EncodedTerm, b: EncodedTerm) -> Option { + fn cmp_according_to_expression( + &self, + tuple_a: &[Option], + tuple_b: &[Option], + expression: &PlanExpression, + ) -> Ordering { + match ( + self.eval_expression(expression, tuple_a), + self.eval_expression(expression, tuple_b), + ) { + (Some(a), Some(b)) => match a { + EncodedTerm::BlankNode(a) => if let EncodedTerm::BlankNode(b) = b { + a.cmp(&b) + } else { + Ordering::Less + }, + EncodedTerm::NamedNode { iri_id: a } => match b { + EncodedTerm::NamedNode { iri_id: b } => { + self.compare_str_ids(a, b).unwrap_or(Ordering::Equal) + } + EncodedTerm::BlankNode(_) => Ordering::Greater, + _ => Ordering::Less, + }, + a => match b { + EncodedTerm::NamedNode { .. } | EncodedTerm::BlankNode(_) => Ordering::Greater, + b => self.partial_cmp_literals(a, b).unwrap_or(Ordering::Equal), + }, + }, + (Some(_), None) => Ordering::Greater, + (None, Some(_)) => Ordering::Less, + (None, None) => Ordering::Equal, + } + } + + fn partial_cmp_literals(&self, a: EncodedTerm, b: EncodedTerm) -> Option { match a { EncodedTerm::SimpleLiteral { value_id: a } | EncodedTerm::StringLiteral { value_id: a } => match b { EncodedTerm::SimpleLiteral { value_id: b } - | EncodedTerm::StringLiteral { value_id: b } => a.partial_cmp(&b), + | EncodedTerm::StringLiteral { value_id: b } => self.compare_str_ids(a, b), _ => None, }, EncodedTerm::FloatLiteral(a) => match b { @@ -719,6 +788,14 @@ impl SimpleEvaluator { _ => None, } } + + fn compare_str_ids(&self, a: u64, b: u64) -> Option { + if let (Ok(Some(a)), Ok(Some(b))) = (self.store.get_bytes(a), self.store.get_bytes(b)) { + Some(a.cmp(&b)) + } else { + None + } + } } enum NumericBinaryOperands { diff --git a/lib/src/sparql/plan.rs b/lib/src/sparql/plan.rs index 0832fd85..17a71c1c 100644 --- a/lib/src/sparql/plan.rs +++ b/lib/src/sparql/plan.rs @@ -43,6 +43,10 @@ pub enum PlanNode { position: usize, expression: PlanExpression, }, + Sort { + child: Box, + by: Vec, + }, HashDeduplicate { child: Box, }, @@ -124,6 +128,7 @@ impl PlanNode { set.insert(*position); child.add_variables(set); } + PlanNode::Sort { child, .. } => child.add_variables(set), PlanNode::HashDeduplicate { child } => child.add_variables(set), PlanNode::Skip { child, .. } => child.add_variables(set), PlanNode::Limit { child, .. } => child.add_variables(set), @@ -294,6 +299,12 @@ impl PlanExpression { } } +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum Comparator { + Asc(PlanExpression), + Desc(PlanExpression), +} + pub struct PlanBuilder<'a, S: EncodedQuadsStore> { store: &'a S, } @@ -417,8 +428,21 @@ impl<'a, S: EncodedQuadsStore> PlanBuilder<'a, S> { tuples: self.encode_bindings(bs, variables)?, }, GraphPattern::OrderBy(l, o) => { - self.build_for_graph_pattern(l, input, variables, graph_name)? - } //TODO + let by: Result> = o + .into_iter() + .map(|comp| match comp { + OrderComparator::Asc(e) => { + Ok(Comparator::Asc(self.build_for_expression(e, variables)?)) + } + OrderComparator::Desc(e) => { + Ok(Comparator::Desc(self.build_for_expression(e, variables)?)) + } + }).collect(); + PlanNode::Sort { + child: Box::new(self.build_for_graph_pattern(l, input, variables, graph_name)?), + by: by?, + } + } GraphPattern::Project(l, new_variables) => PlanNode::Project { child: Box::new(self.build_for_graph_pattern( l, diff --git a/lib/tests/sparql_test_cases.rs b/lib/tests/sparql_test_cases.rs index 1e06bbf9..8539d425 100644 --- a/lib/tests/sparql_test_cases.rs +++ b/lib/tests/sparql_test_cases.rs @@ -111,10 +111,12 @@ fn sparql_w3c_query_evaluation_testsuite() { ).unwrap(), Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional/manifest.ttl") .unwrap(), + Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/sort/manifest.ttl").unwrap(), Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/triple-match/manifest.ttl") .unwrap(), - Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/type-promotion/manifest.ttl") - .unwrap(), + Url::parse( + "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/type-promotion/manifest.ttl", + ).unwrap(), ]; let test_blacklist = vec![ //Multiple writing of the same xsd:integer. Our system does strong normalization. @@ -228,10 +230,15 @@ fn sparql_w3c_query_evaluation_testsuite() { test, error ), Ok(result) => { - let actual_graph = to_graph(result).unwrap(); let expected_graph = client .load_sparql_query_result_graph(test.result.clone().unwrap()) .unwrap(); + let with_order = expected_graph + .triples_for_predicate(&rs::INDEX) + .unwrap() + .next() + .is_some(); + let actual_graph = to_graph(result, with_order).unwrap(); assert!( actual_graph.is_isomorphic(&expected_graph).unwrap(), "Failure on {}.\nExpected file:\n{}\nOutput file:\n{}\nParsed query:\n{}\nData:\n{}\n", @@ -279,7 +286,7 @@ impl RDFClient { fn load_sparql_query_result_graph(&self, url: Url) -> Result { if url.as_str().ends_with(".srx") { - to_graph(read_xml_results(BufReader::new(self.get(&url)?))?) + to_graph(read_xml_results(BufReader::new(self.get(&url)?))?, false) } else { self.load_graph(url) } @@ -330,7 +337,7 @@ mod rs { } } -fn to_graph(result: QueryResult) -> Result { +fn to_graph(result: QueryResult, with_order: bool) -> Result { match result { QueryResult::Graph(graph) => Ok(graph), QueryResult::Boolean(value) => { @@ -364,7 +371,7 @@ fn to_graph(result: QueryResult) -> Result { Literal::new_simple_literal(variable.name()?), ))?; } - for binding_values in iter { + for (i, binding_values) in iter.enumerate() { let binding_values = binding_values?; let solution = BlankNode::default(); graph.insert(&Triple::new( @@ -392,6 +399,13 @@ fn to_graph(result: QueryResult) -> Result { ))?; } } + if with_order { + graph.insert(&Triple::new( + solution.clone(), + rs::INDEX.clone(), + Literal::from((i + 1) as i128), + ))?; + } } Ok(graph) }