Implements SPARQL ORDER BY

pull/10/head
Tpt 6 years ago
parent 6e2471de0b
commit 0fd98c701a
  1. 93
      lib/src/sparql/eval.rs
  2. 28
      lib/src/sparql/plan.rs
  3. 26
      lib/tests/sparql_test_cases.rs

@ -235,6 +235,41 @@ impl<S: EncodedQuadsStore> SimpleEvaluator<S> {
}), }),
) )
} }
PlanNode::Sort { child, by } => {
let iter = self.eval_plan(&*child, from);
let mut values = Vec::with_capacity(iter.size_hint().0);
let mut errors = Vec::default();
for result in iter {
match result {
Ok(result) => {
values.push(result);
}
Err(error) => errors.push(Err(error)),
}
}
values.sort_unstable_by(|a, b| {
for comp in by {
match comp {
Comparator::Asc(expression) => {
match self.cmp_according_to_expression(a, b, &expression) {
Ordering::Greater => return Ordering::Greater,
Ordering::Less => return Ordering::Less,
Ordering::Equal => (),
}
}
Comparator::Desc(expression) => {
match self.cmp_according_to_expression(a, b, &expression) {
Ordering::Greater => return Ordering::Less,
Ordering::Less => return Ordering::Greater,
Ordering::Equal => (),
}
}
}
}
Ordering::Equal
});
Box::new(errors.into_iter().chain(values.into_iter().map(Ok)))
}
PlanNode::HashDeduplicate { child } => { PlanNode::HashDeduplicate { child } => {
let iter = self.eval_plan(&*child, from); let iter = self.eval_plan(&*child, from);
let mut values = HashSet::with_capacity(iter.size_hint().0); let mut values = HashSet::with_capacity(iter.size_hint().0);
@ -293,22 +328,22 @@ impl<S: EncodedQuadsStore> SimpleEvaluator<S> {
PlanExpression::Equal(a, b) => { PlanExpression::Equal(a, b) => {
let a = self.eval_expression(a, tuple)?; let a = self.eval_expression(a, tuple)?;
let b = self.eval_expression(b, tuple)?; let b = self.eval_expression(b, tuple)?;
Some((a == b || self.partial_cmp_terms(a, b) == Some(Ordering::Equal)).into()) Some((a == b || self.partial_cmp_literals(a, b) == Some(Ordering::Equal)).into())
} }
PlanExpression::NotEqual(a, b) => { PlanExpression::NotEqual(a, b) => {
let a = self.eval_expression(a, tuple)?; let a = self.eval_expression(a, tuple)?;
let b = self.eval_expression(b, tuple)?; let b = self.eval_expression(b, tuple)?;
Some((a != b && self.partial_cmp_terms(a, b) != Some(Ordering::Equal)).into()) Some((a != b && self.partial_cmp_literals(a, b) != Some(Ordering::Equal)).into())
} }
PlanExpression::Greater(a, b) => Some( PlanExpression::Greater(a, b) => Some(
(self.partial_cmp_terms( (self.partial_cmp_literals(
self.eval_expression(a, tuple)?, self.eval_expression(a, tuple)?,
self.eval_expression(b, tuple)?, self.eval_expression(b, tuple)?,
)? == Ordering::Greater) )? == Ordering::Greater)
.into(), .into(),
), ),
PlanExpression::GreaterOrEq(a, b) => Some( PlanExpression::GreaterOrEq(a, b) => Some(
match self.partial_cmp_terms( match self.partial_cmp_literals(
self.eval_expression(a, tuple)?, self.eval_expression(a, tuple)?,
self.eval_expression(b, tuple)?, self.eval_expression(b, tuple)?,
)? { )? {
@ -317,14 +352,14 @@ impl<S: EncodedQuadsStore> SimpleEvaluator<S> {
}.into(), }.into(),
), ),
PlanExpression::Lower(a, b) => Some( PlanExpression::Lower(a, b) => Some(
(self.partial_cmp_terms( (self.partial_cmp_literals(
self.eval_expression(a, tuple)?, self.eval_expression(a, tuple)?,
self.eval_expression(b, tuple)?, self.eval_expression(b, tuple)?,
)? == Ordering::Less) )? == Ordering::Less)
.into(), .into(),
), ),
PlanExpression::LowerOrEq(a, b) => Some( PlanExpression::LowerOrEq(a, b) => Some(
match self.partial_cmp_terms( match self.partial_cmp_literals(
self.eval_expression(a, tuple)?, self.eval_expression(a, tuple)?,
self.eval_expression(b, tuple)?, self.eval_expression(b, tuple)?,
)? { )? {
@ -680,12 +715,46 @@ impl<S: EncodedQuadsStore> SimpleEvaluator<S> {
) )
} }
fn partial_cmp_terms(&self, a: EncodedTerm, b: EncodedTerm) -> Option<Ordering> { fn cmp_according_to_expression(
&self,
tuple_a: &[Option<EncodedTerm>],
tuple_b: &[Option<EncodedTerm>],
expression: &PlanExpression,
) -> Ordering {
match (
self.eval_expression(expression, tuple_a),
self.eval_expression(expression, tuple_b),
) {
(Some(a), Some(b)) => match a {
EncodedTerm::BlankNode(a) => if let EncodedTerm::BlankNode(b) = b {
a.cmp(&b)
} else {
Ordering::Less
},
EncodedTerm::NamedNode { iri_id: a } => match b {
EncodedTerm::NamedNode { iri_id: b } => {
self.compare_str_ids(a, b).unwrap_or(Ordering::Equal)
}
EncodedTerm::BlankNode(_) => Ordering::Greater,
_ => Ordering::Less,
},
a => match b {
EncodedTerm::NamedNode { .. } | EncodedTerm::BlankNode(_) => Ordering::Greater,
b => self.partial_cmp_literals(a, b).unwrap_or(Ordering::Equal),
},
},
(Some(_), None) => Ordering::Greater,
(None, Some(_)) => Ordering::Less,
(None, None) => Ordering::Equal,
}
}
fn partial_cmp_literals(&self, a: EncodedTerm, b: EncodedTerm) -> Option<Ordering> {
match a { match a {
EncodedTerm::SimpleLiteral { value_id: a } EncodedTerm::SimpleLiteral { value_id: a }
| EncodedTerm::StringLiteral { value_id: a } => match b { | EncodedTerm::StringLiteral { value_id: a } => match b {
EncodedTerm::SimpleLiteral { value_id: b } EncodedTerm::SimpleLiteral { value_id: b }
| EncodedTerm::StringLiteral { value_id: b } => a.partial_cmp(&b), | EncodedTerm::StringLiteral { value_id: b } => self.compare_str_ids(a, b),
_ => None, _ => None,
}, },
EncodedTerm::FloatLiteral(a) => match b { EncodedTerm::FloatLiteral(a) => match b {
@ -719,6 +788,14 @@ impl<S: EncodedQuadsStore> SimpleEvaluator<S> {
_ => None, _ => None,
} }
} }
fn compare_str_ids(&self, a: u64, b: u64) -> Option<Ordering> {
if let (Ok(Some(a)), Ok(Some(b))) = (self.store.get_bytes(a), self.store.get_bytes(b)) {
Some(a.cmp(&b))
} else {
None
}
}
} }
enum NumericBinaryOperands { enum NumericBinaryOperands {

@ -43,6 +43,10 @@ pub enum PlanNode {
position: usize, position: usize,
expression: PlanExpression, expression: PlanExpression,
}, },
Sort {
child: Box<PlanNode>,
by: Vec<Comparator>,
},
HashDeduplicate { HashDeduplicate {
child: Box<PlanNode>, child: Box<PlanNode>,
}, },
@ -124,6 +128,7 @@ impl PlanNode {
set.insert(*position); set.insert(*position);
child.add_variables(set); child.add_variables(set);
} }
PlanNode::Sort { child, .. } => child.add_variables(set),
PlanNode::HashDeduplicate { child } => child.add_variables(set), PlanNode::HashDeduplicate { child } => child.add_variables(set),
PlanNode::Skip { child, .. } => child.add_variables(set), PlanNode::Skip { child, .. } => child.add_variables(set),
PlanNode::Limit { child, .. } => child.add_variables(set), PlanNode::Limit { child, .. } => child.add_variables(set),
@ -294,6 +299,12 @@ impl PlanExpression {
} }
} }
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub enum Comparator {
Asc(PlanExpression),
Desc(PlanExpression),
}
pub struct PlanBuilder<'a, S: EncodedQuadsStore> { pub struct PlanBuilder<'a, S: EncodedQuadsStore> {
store: &'a S, store: &'a S,
} }
@ -417,8 +428,21 @@ impl<'a, S: EncodedQuadsStore> PlanBuilder<'a, S> {
tuples: self.encode_bindings(bs, variables)?, tuples: self.encode_bindings(bs, variables)?,
}, },
GraphPattern::OrderBy(l, o) => { GraphPattern::OrderBy(l, o) => {
self.build_for_graph_pattern(l, input, variables, graph_name)? let by: Result<Vec<_>> = o
} //TODO .into_iter()
.map(|comp| match comp {
OrderComparator::Asc(e) => {
Ok(Comparator::Asc(self.build_for_expression(e, variables)?))
}
OrderComparator::Desc(e) => {
Ok(Comparator::Desc(self.build_for_expression(e, variables)?))
}
}).collect();
PlanNode::Sort {
child: Box::new(self.build_for_graph_pattern(l, input, variables, graph_name)?),
by: by?,
}
}
GraphPattern::Project(l, new_variables) => PlanNode::Project { GraphPattern::Project(l, new_variables) => PlanNode::Project {
child: Box::new(self.build_for_graph_pattern( child: Box::new(self.build_for_graph_pattern(
l, l,

@ -111,10 +111,12 @@ fn sparql_w3c_query_evaluation_testsuite() {
).unwrap(), ).unwrap(),
Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional/manifest.ttl") Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional/manifest.ttl")
.unwrap(), .unwrap(),
Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/sort/manifest.ttl").unwrap(),
Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/triple-match/manifest.ttl") Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/triple-match/manifest.ttl")
.unwrap(), .unwrap(),
Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/type-promotion/manifest.ttl") Url::parse(
.unwrap(), "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/type-promotion/manifest.ttl",
).unwrap(),
]; ];
let test_blacklist = vec![ let test_blacklist = vec![
//Multiple writing of the same xsd:integer. Our system does strong normalization. //Multiple writing of the same xsd:integer. Our system does strong normalization.
@ -228,10 +230,15 @@ fn sparql_w3c_query_evaluation_testsuite() {
test, error test, error
), ),
Ok(result) => { Ok(result) => {
let actual_graph = to_graph(result).unwrap();
let expected_graph = client let expected_graph = client
.load_sparql_query_result_graph(test.result.clone().unwrap()) .load_sparql_query_result_graph(test.result.clone().unwrap())
.unwrap(); .unwrap();
let with_order = expected_graph
.triples_for_predicate(&rs::INDEX)
.unwrap()
.next()
.is_some();
let actual_graph = to_graph(result, with_order).unwrap();
assert!( assert!(
actual_graph.is_isomorphic(&expected_graph).unwrap(), actual_graph.is_isomorphic(&expected_graph).unwrap(),
"Failure on {}.\nExpected file:\n{}\nOutput file:\n{}\nParsed query:\n{}\nData:\n{}\n", "Failure on {}.\nExpected file:\n{}\nOutput file:\n{}\nParsed query:\n{}\nData:\n{}\n",
@ -279,7 +286,7 @@ impl RDFClient {
fn load_sparql_query_result_graph(&self, url: Url) -> Result<MemoryGraph> { fn load_sparql_query_result_graph(&self, url: Url) -> Result<MemoryGraph> {
if url.as_str().ends_with(".srx") { if url.as_str().ends_with(".srx") {
to_graph(read_xml_results(BufReader::new(self.get(&url)?))?) to_graph(read_xml_results(BufReader::new(self.get(&url)?))?, false)
} else { } else {
self.load_graph(url) self.load_graph(url)
} }
@ -330,7 +337,7 @@ mod rs {
} }
} }
fn to_graph(result: QueryResult) -> Result<MemoryGraph> { fn to_graph(result: QueryResult, with_order: bool) -> Result<MemoryGraph> {
match result { match result {
QueryResult::Graph(graph) => Ok(graph), QueryResult::Graph(graph) => Ok(graph),
QueryResult::Boolean(value) => { QueryResult::Boolean(value) => {
@ -364,7 +371,7 @@ fn to_graph(result: QueryResult) -> Result<MemoryGraph> {
Literal::new_simple_literal(variable.name()?), Literal::new_simple_literal(variable.name()?),
))?; ))?;
} }
for binding_values in iter { for (i, binding_values) in iter.enumerate() {
let binding_values = binding_values?; let binding_values = binding_values?;
let solution = BlankNode::default(); let solution = BlankNode::default();
graph.insert(&Triple::new( graph.insert(&Triple::new(
@ -392,6 +399,13 @@ fn to_graph(result: QueryResult) -> Result<MemoryGraph> {
))?; ))?;
} }
} }
if with_order {
graph.insert(&Triple::new(
solution.clone(),
rs::INDEX.clone(),
Literal::from((i + 1) as i128),
))?;
}
} }
Ok(graph) Ok(graph)
} }

Loading…
Cancel
Save