SPARQL: Adds an option to disable the optimizer

Provides a plain evaluator without any trick
pull/322/head
Tpt 2 years ago committed by Thomas Tanon
parent 2ca8bd19d3
commit d7e4d5583f
  1. 102
      lib/src/sparql/eval.rs
  2. 46
      lib/src/sparql/mod.rs
  3. 23
      lib/src/sparql/plan.rs
  4. 127
      lib/src/sparql/plan_builder.rs
  5. 1
      lib/src/sparql/update.rs
  6. 33
      testsuite/src/sparql_evaluator.rs

@ -444,7 +444,39 @@ impl SimpleEvaluator {
})
}
}
PlanNode::LeftJoin {
PlanNode::HashLeftJoin {
left,
right,
expression,
} => {
let join_keys: Vec<_> = left
.always_bound_variables()
.intersection(&right.always_bound_variables())
.copied()
.collect();
let left = self.plan_evaluator(left);
let right = self.plan_evaluator(right);
let expression = self.expression_evaluator(expression);
// Real hash join
Rc::new(move |from| {
let mut errors = Vec::default();
let mut right_values = EncodedTupleSet::new(join_keys.clone());
right_values.extend(right(from.clone()).filter_map(|result| match result {
Ok(result) => Some(result),
Err(error) => {
errors.push(Err(error));
None
}
}));
Box::new(HashLeftJoinIterator {
left_iter: left(from),
right: right_values,
buffered_results: errors,
expression: expression.clone(),
})
})
}
PlanNode::ForLoopLeftJoin {
left,
right,
possible_problem_vars,
@ -454,7 +486,7 @@ impl SimpleEvaluator {
let possible_problem_vars = possible_problem_vars.clone();
Rc::new(move |from| {
if possible_problem_vars.is_empty() {
Box::new(LeftJoinIterator {
Box::new(ForLoopLeftJoinIterator {
right_evaluator: right.clone(),
left_iter: left(from),
current_right: Box::new(empty()),
@ -3572,11 +3604,58 @@ impl Iterator for HashJoinIterator {
Ok(left_tuple) => left_tuple,
Err(error) => return Some(Err(error)),
};
for right_tuple in self.right.get(&left_tuple) {
if let Some(result_tuple) = left_tuple.combine_with(right_tuple) {
self.buffered_results.push(Ok(result_tuple))
self.buffered_results.extend(
self.right
.get(&left_tuple)
.iter()
.filter_map(|right_tuple| left_tuple.combine_with(right_tuple).map(Ok)),
)
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
(
0,
self.left_iter.size_hint().1.map(|v| v * self.right.len()),
)
}
}
struct HashLeftJoinIterator {
left_iter: EncodedTuplesIterator,
right: EncodedTupleSet,
buffered_results: Vec<Result<EncodedTuple, EvaluationError>>,
expression: Rc<dyn Fn(&EncodedTuple) -> Option<EncodedTerm>>,
}
impl Iterator for HashLeftJoinIterator {
type Item = Result<EncodedTuple, EvaluationError>;
fn next(&mut self) -> Option<Result<EncodedTuple, EvaluationError>> {
loop {
if let Some(result) = self.buffered_results.pop() {
return Some(result);
}
let left_tuple = match self.left_iter.next()? {
Ok(left_tuple) => left_tuple,
Err(error) => return Some(Err(error)),
};
self.buffered_results.extend(
self.right
.get(&left_tuple)
.iter()
.filter_map(|right_tuple| left_tuple.combine_with(right_tuple))
.filter(|tuple| {
(self.expression)(tuple)
.and_then(|term| to_bool(&term))
.unwrap_or(false)
})
.map(Ok),
);
if self.buffered_results.is_empty() {
// We have not manage to join with anything
return Some(Ok(left_tuple));
}
}
}
@ -3588,21 +3667,23 @@ impl Iterator for HashJoinIterator {
}
}
struct LeftJoinIterator {
struct ForLoopLeftJoinIterator {
right_evaluator: Rc<dyn Fn(EncodedTuple) -> EncodedTuplesIterator>,
left_iter: EncodedTuplesIterator,
current_right: EncodedTuplesIterator,
}
impl Iterator for LeftJoinIterator {
impl Iterator for ForLoopLeftJoinIterator {
type Item = Result<EncodedTuple, EvaluationError>;
fn next(&mut self) -> Option<Result<EncodedTuple, EvaluationError>> {
if let Some(tuple) = self.current_right.next() {
return Some(tuple);
}
match self.left_iter.next()? {
Ok(left_tuple) => {
let left_tuple = match self.left_iter.next()? {
Ok(left_tuple) => left_tuple,
Err(error) => return Some(Err(error)),
};
self.current_right = (self.right_evaluator)(left_tuple.clone());
if let Some(right_tuple) = self.current_right.next() {
Some(right_tuple)
@ -3610,9 +3691,6 @@ impl Iterator for LeftJoinIterator {
Some(Ok(left_tuple))
}
}
Err(error) => Some(Err(error)),
}
}
}
struct BadLeftJoinIterator {

@ -43,8 +43,13 @@ pub(crate) fn evaluate_query(
spargebra::Query::Select {
pattern, base_iri, ..
} => {
let (plan, variables) =
PlanBuilder::build(&dataset, &pattern, true, &options.custom_functions)?;
let (plan, variables) = PlanBuilder::build(
&dataset,
&pattern,
true,
&options.custom_functions,
options.without_optimizations,
)?;
Ok(SimpleEvaluator::new(
Rc::new(dataset),
base_iri.map(Rc::new),
@ -56,8 +61,13 @@ pub(crate) fn evaluate_query(
spargebra::Query::Ask {
pattern, base_iri, ..
} => {
let (plan, _) =
PlanBuilder::build(&dataset, &pattern, false, &options.custom_functions)?;
let (plan, _) = PlanBuilder::build(
&dataset,
&pattern,
false,
&options.custom_functions,
options.without_optimizations,
)?;
SimpleEvaluator::new(
Rc::new(dataset),
base_iri.map(Rc::new),
@ -72,13 +82,19 @@ pub(crate) fn evaluate_query(
base_iri,
..
} => {
let (plan, variables) =
PlanBuilder::build(&dataset, &pattern, false, &options.custom_functions)?;
let (plan, variables) = PlanBuilder::build(
&dataset,
&pattern,
false,
&options.custom_functions,
options.without_optimizations,
)?;
let construct = PlanBuilder::build_graph_template(
&dataset,
&template,
variables,
&options.custom_functions,
options.without_optimizations,
);
Ok(SimpleEvaluator::new(
Rc::new(dataset),
@ -91,8 +107,13 @@ pub(crate) fn evaluate_query(
spargebra::Query::Describe {
pattern, base_iri, ..
} => {
let (plan, _) =
PlanBuilder::build(&dataset, &pattern, false, &options.custom_functions)?;
let (plan, _) = PlanBuilder::build(
&dataset,
&pattern,
false,
&options.custom_functions,
options.without_optimizations,
)?;
Ok(SimpleEvaluator::new(
Rc::new(dataset),
base_iri.map(Rc::new),
@ -128,6 +149,7 @@ pub struct QueryOptions {
custom_functions: HashMap<NamedNode, Rc<dyn Fn(&[Term]) -> Option<Term>>>,
http_timeout: Option<Duration>,
http_redirection_limit: usize,
without_optimizations: bool,
}
impl QueryOptions {
@ -213,6 +235,14 @@ impl QueryOptions {
}
})
}
#[doc(hidden)]
#[inline]
#[must_use]
pub fn without_optimizations(mut self) -> Self {
self.without_optimizations = true;
self
}
}
/// Options for SPARQL update evaluation.

@ -53,8 +53,14 @@ pub enum PlanNode {
Union {
children: Vec<Self>,
},
/// hash left join
HashLeftJoin {
left: Box<Self>,
right: Box<Self>,
expression: Box<PlanExpression>,
},
/// right nested in left loop
LeftJoin {
ForLoopLeftJoin {
left: Box<Self>,
right: Box<Self>,
possible_problem_vars: Rc<Vec<usize>>, //Variables that should not be part of the entry of the left join
@ -163,9 +169,18 @@ impl PlanNode {
Self::HashJoin { left, right }
| Self::ForLoopJoin { left, right, .. }
| Self::AntiJoin { left, right }
| Self::LeftJoin { left, right, .. } => {
| Self::ForLoopLeftJoin { left, right, .. } => {
left.lookup_used_variables(callback);
right.lookup_used_variables(callback);
}
Self::HashLeftJoin {
left,
right,
expression,
} => {
left.lookup_used_variables(callback);
right.lookup_used_variables(callback);
expression.lookup_used_variables(callback);
}
Self::Extend {
child,
@ -304,7 +319,9 @@ impl PlanNode {
left.lookup_always_bound_variables(callback);
right.lookup_always_bound_variables(callback);
}
Self::AntiJoin { left, .. } | Self::LeftJoin { left, .. } => {
Self::AntiJoin { left, .. }
| Self::HashLeftJoin { left, .. }
| Self::ForLoopLeftJoin { left, .. } => {
left.lookup_always_bound_variables(callback);
}
Self::Extend {

@ -15,6 +15,7 @@ use std::rc::Rc;
pub struct PlanBuilder<'a> {
dataset: &'a DatasetView,
custom_functions: &'a HashMap<NamedNode, Rc<dyn Fn(&[OxTerm]) -> Option<OxTerm>>>,
with_optimizations: bool,
}
impl<'a> PlanBuilder<'a> {
@ -23,25 +24,27 @@ impl<'a> PlanBuilder<'a> {
pattern: &GraphPattern,
is_cardinality_meaningful: bool,
custom_functions: &'a HashMap<NamedNode, Rc<dyn Fn(&[OxTerm]) -> Option<OxTerm>>>,
without_optimizations: bool,
) -> Result<(PlanNode, Vec<Variable>), EvaluationError> {
let mut variables = Vec::default();
let plan = PlanBuilder {
dataset,
custom_functions,
with_optimizations: !without_optimizations,
}
.build_for_graph_pattern(
pattern,
&mut variables,
&PatternValue::Constant(EncodedTerm::DefaultGraph),
)?;
let plan = if is_cardinality_meaningful {
plan
} else {
let plan = if !without_optimizations && !is_cardinality_meaningful {
// let's reduce downstream task.
// TODO: avoid if already REDUCED or DISTINCT
PlanNode::Reduced {
child: Box::new(plan),
}
} else {
plan
};
Ok((plan, variables))
}
@ -51,10 +54,12 @@ impl<'a> PlanBuilder<'a> {
template: &[TriplePattern],
mut variables: Vec<Variable>,
custom_functions: &'a HashMap<NamedNode, Rc<dyn Fn(&[OxTerm]) -> Option<OxTerm>>>,
without_optimizations: bool,
) -> Vec<TripleTemplate> {
PlanBuilder {
dataset,
custom_functions,
with_optimizations: !without_optimizations,
}
.build_for_graph_template(template, &mut variables)
}
@ -66,19 +71,13 @@ impl<'a> PlanBuilder<'a> {
graph_name: &PatternValue,
) -> Result<PlanNode, EvaluationError> {
Ok(match pattern {
GraphPattern::Bgp { patterns } => sort_bgp(patterns)
.iter()
.map(|triple| PlanNode::QuadPattern {
subject: self.pattern_value_from_term_or_variable(&triple.subject, variables),
predicate: self
.pattern_value_from_named_node_or_variable(&triple.predicate, variables),
object: self.pattern_value_from_term_or_variable(&triple.object, variables),
graph_name: graph_name.clone(),
})
.reduce(Self::new_join)
.unwrap_or_else(|| PlanNode::StaticBindings {
tuples: vec![EncodedTuple::with_capacity(variables.len())],
}),
GraphPattern::Bgp { patterns } => {
if self.with_optimizations {
self.build_for_bgp(sort_bgp(patterns), variables, graph_name)
} else {
self.build_for_bgp(patterns, variables, graph_name)
}
}
GraphPattern::Path {
subject,
path,
@ -89,7 +88,7 @@ impl<'a> PlanBuilder<'a> {
object: self.pattern_value_from_term_or_variable(object, variables),
graph_name: graph_name.clone(),
},
GraphPattern::Join { left, right } => Self::new_join(
GraphPattern::Join { left, right } => self.new_join(
self.build_for_graph_pattern(left, variables, graph_name)?,
self.build_for_graph_pattern(right, variables, graph_name)?,
),
@ -103,24 +102,38 @@ impl<'a> PlanBuilder<'a> {
let mut possible_problem_vars = BTreeSet::new();
Self::add_left_join_problematic_variables(&right, &mut possible_problem_vars);
if self.with_optimizations {
// TODO: don't use if SERVICE is inside of for loop
//We add the extra filter if needed
let right = if let Some(expr) = expression {
Self::push_filter(
self.push_filter(
Box::new(right),
Box::new(self.build_for_expression(expr, variables, graph_name)?),
)
} else {
right
};
PlanNode::LeftJoin {
PlanNode::ForLoopLeftJoin {
left: Box::new(left),
right: Box::new(right),
possible_problem_vars: Rc::new(possible_problem_vars.into_iter().collect()),
}
} else {
PlanNode::HashLeftJoin {
left: Box::new(left),
right: Box::new(right),
expression: Box::new(
expression
.as_ref()
.map_or(Ok(PlanExpression::Constant(true.into())), |e| {
self.build_for_expression(e, variables, graph_name)
})?,
),
}
}
}
GraphPattern::Filter { expr, inner } => Self::push_filter(
GraphPattern::Filter { expr, inner } => self.push_filter(
Box::new(self.build_for_graph_pattern(inner, variables, graph_name)?),
Box::new(self.build_for_expression(expr, variables, graph_name)?),
),
@ -270,6 +283,27 @@ impl<'a> PlanBuilder<'a> {
})
}
fn build_for_bgp<'b>(
&self,
patterns: impl IntoIterator<Item = &'b TriplePattern>,
variables: &mut Vec<Variable>,
graph_name: &PatternValue,
) -> PlanNode {
patterns
.into_iter()
.map(|triple| PlanNode::QuadPattern {
subject: self.pattern_value_from_term_or_variable(&triple.subject, variables),
predicate: self
.pattern_value_from_named_node_or_variable(&triple.predicate, variables),
object: self.pattern_value_from_term_or_variable(&triple.object, variables),
graph_name: graph_name.clone(),
})
.reduce(|a, b| self.new_join(a, b))
.unwrap_or_else(|| PlanNode::StaticBindings {
tuples: vec![EncodedTuple::with_capacity(variables.len())],
})
}
fn build_for_path(&self, path: &PropertyPathExpression) -> PlanPropertyPath {
match path {
PropertyPathExpression::NamedNode(p) => PlanPropertyPath::Path(self.build_term(p)),
@ -1073,11 +1107,24 @@ impl<'a> PlanBuilder<'a> {
PlanNode::AntiJoin { left, .. } => {
Self::add_left_join_problematic_variables(left, set);
}
PlanNode::LeftJoin { left, right, .. } => {
PlanNode::ForLoopLeftJoin { left, right, .. } => {
Self::add_left_join_problematic_variables(left, set);
right.lookup_used_variables(&mut |v| {
set.insert(v);
});
}
PlanNode::HashLeftJoin {
left,
right,
expression,
} => {
Self::add_left_join_problematic_variables(left, set);
right.lookup_used_variables(&mut |v| {
set.insert(v);
});
expression.lookup_used_variables(&mut |v| {
set.insert(v);
});
}
PlanNode::Extend {
child, expression, ..
@ -1130,8 +1177,9 @@ impl<'a> PlanBuilder<'a> {
}
}
fn new_join(mut left: PlanNode, mut right: PlanNode) -> PlanNode {
if Self::is_fit_for_for_loop_join(&left)
fn new_join(&self, mut left: PlanNode, mut right: PlanNode) -> PlanNode {
if self.with_optimizations
&& Self::is_fit_for_for_loop_join(&left)
&& Self::is_fit_for_for_loop_join(&right)
&& Self::has_some_common_variables(&left, &right)
{
@ -1178,7 +1226,8 @@ impl<'a> PlanBuilder<'a> {
}
PlanNode::Union { children } => children.iter().all(Self::is_fit_for_for_loop_join),
PlanNode::AntiJoin { .. }
| PlanNode::LeftJoin { .. }
| PlanNode::HashLeftJoin { .. }
| PlanNode::ForLoopLeftJoin { .. }
| PlanNode::Service { .. }
| PlanNode::Sort { .. }
| PlanNode::HashDeduplicate { .. }
@ -1190,9 +1239,15 @@ impl<'a> PlanBuilder<'a> {
}
}
fn push_filter(node: Box<PlanNode>, filter: Box<PlanExpression>) -> PlanNode {
fn push_filter(&self, node: Box<PlanNode>, filter: Box<PlanExpression>) -> PlanNode {
if !self.with_optimizations {
return PlanNode::Filter {
child: node,
expression: filter,
};
}
if let PlanExpression::And(f1, f2) = *filter {
return Self::push_filter(Box::new(Self::push_filter(node, f1)), f2);
return self.push_filter(Box::new(self.push_filter(node, f1)), f2);
}
let mut filter_variables = BTreeSet::new();
filter.lookup_used_variables(&mut |v| {
@ -1203,19 +1258,19 @@ impl<'a> PlanBuilder<'a> {
if filter_variables.iter().all(|v| left.is_variable_bound(*v)) {
if filter_variables.iter().all(|v| right.is_variable_bound(*v)) {
PlanNode::HashJoin {
left: Box::new(Self::push_filter(left, filter.clone())),
right: Box::new(Self::push_filter(right, filter)),
left: Box::new(self.push_filter(left, filter.clone())),
right: Box::new(self.push_filter(right, filter)),
}
} else {
PlanNode::HashJoin {
left: Box::new(Self::push_filter(left, filter)),
left: Box::new(self.push_filter(left, filter)),
right,
}
}
} else if filter_variables.iter().all(|v| right.is_variable_bound(*v)) {
PlanNode::HashJoin {
left,
right: Box::new(Self::push_filter(right, filter)),
right: Box::new(self.push_filter(right, filter)),
}
} else {
PlanNode::Filter {
@ -1227,14 +1282,14 @@ impl<'a> PlanBuilder<'a> {
PlanNode::ForLoopJoin { left, right } => {
if filter_variables.iter().all(|v| left.is_variable_bound(*v)) {
PlanNode::ForLoopJoin {
left: Box::new(Self::push_filter(left, filter)),
left: Box::new(self.push_filter(left, filter)),
right,
}
} else if filter_variables.iter().all(|v| right.is_variable_bound(*v)) {
PlanNode::ForLoopJoin {
//TODO: should we do that always?
left,
right: Box::new(Self::push_filter(right, filter)),
right: Box::new(self.push_filter(right, filter)),
}
} else {
PlanNode::Filter {
@ -1251,7 +1306,7 @@ impl<'a> PlanBuilder<'a> {
//TODO: handle the case where the filter generates an expression variable
if filter_variables.iter().all(|v| child.is_variable_bound(*v)) {
PlanNode::Extend {
child: Box::new(Self::push_filter(child, filter)),
child: Box::new(self.push_filter(child, filter)),
expression,
position,
}
@ -1269,7 +1324,7 @@ impl<'a> PlanBuilder<'a> {
PlanNode::Filter { child, expression } => {
if filter_variables.iter().all(|v| child.is_variable_bound(*v)) {
PlanNode::Filter {
child: Box::new(Self::push_filter(child, filter)),
child: Box::new(self.push_filter(child, filter)),
expression,
}
} else {
@ -1282,7 +1337,7 @@ impl<'a> PlanBuilder<'a> {
PlanNode::Union { children } => PlanNode::Union {
children: children
.into_iter()
.map(|c| Self::push_filter(Box::new(c), filter.clone()))
.map(|c| self.push_filter(Box::new(c), filter.clone()))
.collect(),
},
node => PlanNode::Filter {

@ -123,6 +123,7 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
algebra,
false,
&self.options.query_options.custom_functions,
!self.options.query_options.without_optimizations,
)?;
let evaluator = SimpleEvaluator::new(
dataset.clone(),

@ -208,19 +208,30 @@ fn evaluate_evaluation_test(test: &Test) -> Result<()> {
}
}
}
match store.query_opt(query, options) {
Err(error) => Err(anyhow!(
for with_query_optimizer in [true, false] {
let mut options = options.clone();
if !with_query_optimizer {
options = options.without_optimizations();
}
match store.query_opt(query.clone(), options) {
Err(error) => {
return Err(anyhow!(
"Failure to execute query of {} with error: {}",
test,
error
)),
))
}
Ok(actual_results) => {
let expected_results = load_sparql_query_result(test.result.as_ref().unwrap())
let expected_results = load_sparql_query_result(
test.result.as_ref().unwrap(),
)
.map_err(|e| {
anyhow!("Error constructing expected graph for {}: {}", test, e)
})?;
let with_order =
if let StaticQueryResults::Solutions { ordered, .. } = &expected_results {
let with_order = if let StaticQueryResults::Solutions { ordered, .. } =
&expected_results
{
*ordered
} else {
false
@ -228,19 +239,19 @@ fn evaluate_evaluation_test(test: &Test) -> Result<()> {
let actual_results =
StaticQueryResults::from_query_results(actual_results, with_order)?;
if are_query_results_isomorphic(&expected_results, &actual_results) {
Ok(())
} else {
Err(anyhow!("Failure on {}.\nExpected file:\n{}\nOutput file:\n{}\nParsed query:\n{}\nData:\n{}\n",
if !are_query_results_isomorphic(&expected_results, &actual_results) {
return Err(anyhow!("Failure on {}.\nExpected file:\n{}\nOutput file:\n{}\nParsed query:\n{}\nData:\n{}\n",
test,
expected_results,
actual_results,
Query::parse(&read_file_to_string(query_file)?, Some(query_file)).unwrap(),
store
))
));
}
}
}
}
Ok(())
}
}
}

Loading…
Cancel
Save