Makes sparopt figure out good join keys

pull/572/head
Thomas 1 year ago committed by Thomas Tanon
parent cdabe52847
commit c31ba0e823
  1. 62
      lib/sparopt/src/algebra.rs
  2. 191
      lib/sparopt/src/optimizer.rs
  3. 11
      lib/sparopt/src/type_inference.rs
  4. 22
      lib/src/sparql/eval.rs
  5. 139
      lib/src/sparql/plan.rs
  6. 38
      lib/src/sparql/plan_builder.rs

@ -660,6 +660,7 @@ pub enum GraphPattern {
left: Box<Self>, left: Box<Self>,
right: Box<Self>, right: Box<Self>,
expression: Expression, expression: Expression,
algorithm: LeftJoinAlgorithm,
}, },
/// Lateral join i.e. evaluate right for all result row of left /// Lateral join i.e. evaluate right for all result row of left
#[cfg(feature = "sep-0006")] #[cfg(feature = "sep-0006")]
@ -678,7 +679,11 @@ pub enum GraphPattern {
expression: Expression, expression: Expression,
}, },
/// [Minus](https://www.w3.org/TR/sparql11-query/#defn_algMinus). /// [Minus](https://www.w3.org/TR/sparql11-query/#defn_algMinus).
Minus { left: Box<Self>, right: Box<Self> }, Minus {
left: Box<Self>,
right: Box<Self>,
algorithm: MinusAlgorithm,
},
/// A table used to provide inline values /// A table used to provide inline values
Values { Values {
variables: Vec<Variable>, variables: Vec<Variable>,
@ -784,7 +789,12 @@ impl GraphPattern {
} }
} }
pub fn left_join(left: Self, right: Self, expression: Expression) -> Self { pub fn left_join(
left: Self,
right: Self,
expression: Expression,
algorithm: LeftJoinAlgorithm,
) -> Self {
let expression_ebv = expression.effective_boolean_value(); let expression_ebv = expression.effective_boolean_value();
if left.is_empty() if left.is_empty()
|| right.is_empty() || right.is_empty()
@ -801,10 +811,11 @@ impl GraphPattern {
} else { } else {
expression expression
}, },
algorithm,
} }
} }
pub fn minus(left: Self, right: Self) -> Self { pub fn minus(left: Self, right: Self, algorithm: MinusAlgorithm) -> Self {
if left.is_empty() { if left.is_empty() {
return Self::empty(); return Self::empty();
} }
@ -814,6 +825,7 @@ impl GraphPattern {
Self::Minus { Self::Minus {
left: Box::new(left), left: Box::new(left),
right: Box::new(right), right: Box::new(right),
algorithm,
} }
} }
@ -1046,7 +1058,7 @@ impl GraphPattern {
child.lookup_used_variables(callback); child.lookup_used_variables(callback);
} }
} }
Self::Join { left, right, .. } | Self::Minus { left, right } => { Self::Join { left, right, .. } | Self::Minus { left, right, .. } => {
left.lookup_used_variables(callback); left.lookup_used_variables(callback);
right.lookup_used_variables(callback); right.lookup_used_variables(callback);
} }
@ -1059,6 +1071,7 @@ impl GraphPattern {
left, left,
right, right,
expression, expression,
..
} => { } => {
expression.lookup_used_variables(callback); expression.lookup_used_variables(callback);
left.lookup_used_variables(callback); left.lookup_used_variables(callback);
@ -1148,6 +1161,7 @@ impl GraphPattern {
|| true.into(), || true.into(),
|e| Expression::from_sparql_algebra(e, graph_name), |e| Expression::from_sparql_algebra(e, graph_name),
), ),
algorithm: LeftJoinAlgorithm::default(),
}, },
#[cfg(feature = "sep-0006")] #[cfg(feature = "sep-0006")]
AlGraphPattern::Lateral { left, right } => Self::Lateral { AlGraphPattern::Lateral { left, right } => Self::Lateral {
@ -1179,6 +1193,7 @@ impl GraphPattern {
AlGraphPattern::Minus { left, right } => Self::Minus { AlGraphPattern::Minus { left, right } => Self::Minus {
left: Box::new(Self::from_sparql_algebra(left, graph_name, blank_nodes)), left: Box::new(Self::from_sparql_algebra(left, graph_name, blank_nodes)),
right: Box::new(Self::from_sparql_algebra(right, graph_name, blank_nodes)), right: Box::new(Self::from_sparql_algebra(right, graph_name, blank_nodes)),
algorithm: MinusAlgorithm::default(),
}, },
AlGraphPattern::Values { AlGraphPattern::Values {
variables, variables,
@ -1365,6 +1380,7 @@ impl From<&GraphPattern> for AlGraphPattern {
left, left,
right, right,
expression, expression,
..
} => { } => {
let empty_expr = if let Expression::Literal(l) = expression { let empty_expr = if let Expression::Literal(l) = expression {
l.datatype() == xsd::BOOLEAN && l.value() == "true" l.datatype() == xsd::BOOLEAN && l.value() == "true"
@ -1418,7 +1434,7 @@ impl From<&GraphPattern> for AlGraphPattern {
expression: expression.into(), expression: expression.into(),
variable: variable.clone(), variable: variable.clone(),
}, },
GraphPattern::Minus { left, right } => Self::Minus { GraphPattern::Minus { left, right, .. } => Self::Minus {
left: Box::new(left.as_ref().into()), left: Box::new(left.as_ref().into()),
right: Box::new(right.as_ref().into()), right: Box::new(right.as_ref().into()),
}, },
@ -1478,14 +1494,44 @@ impl From<&GraphPattern> for AlGraphPattern {
} }
/// The join algorithm used (c.f. [`GraphPattern::Join`]). /// The join algorithm used (c.f. [`GraphPattern::Join`]).
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] #[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub enum JoinAlgorithm { pub enum JoinAlgorithm {
HashBuildLeftProbeRight, HashBuildLeftProbeRight { keys: Vec<Variable> },
} }
impl Default for JoinAlgorithm { impl Default for JoinAlgorithm {
fn default() -> Self { fn default() -> Self {
Self::HashBuildLeftProbeRight Self::HashBuildLeftProbeRight {
keys: Vec::default(),
}
}
}
/// The left join algorithm used (c.f. [`GraphPattern::LeftJoin`]).
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub enum LeftJoinAlgorithm {
HashBuildRightProbeLeft { keys: Vec<Variable> },
}
impl Default for LeftJoinAlgorithm {
fn default() -> Self {
Self::HashBuildRightProbeLeft {
keys: Vec::default(),
}
}
}
/// The left join algorithm used (c.f. [`GraphPattern::Minus`]).
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub enum MinusAlgorithm {
HashBuildRightProbeLeft { keys: Vec<Variable> },
}
impl Default for MinusAlgorithm {
fn default() -> Self {
Self::HashBuildRightProbeLeft {
keys: Vec::default(),
}
} }
} }

@ -1,7 +1,10 @@
use crate::algebra::{Expression, GraphPattern, JoinAlgorithm, OrderExpression}; use crate::algebra::{
Expression, GraphPattern, JoinAlgorithm, LeftJoinAlgorithm, MinusAlgorithm, OrderExpression,
};
use crate::type_inference::{ use crate::type_inference::{
infer_expression_type, infer_graph_pattern_types, VariableType, VariableTypes, infer_expression_type, infer_graph_pattern_types, VariableType, VariableTypes,
}; };
use oxrdf::Variable;
use spargebra::algebra::PropertyPathExpression; use spargebra::algebra::PropertyPathExpression;
use spargebra::term::{GroundTermPattern, NamedNodePattern}; use spargebra::term::{GroundTermPattern, NamedNodePattern};
use std::cmp::{max, min}; use std::cmp::{max, min};
@ -53,6 +56,7 @@ impl Optimizer {
left, left,
right, right,
expression, expression,
algorithm,
} => { } => {
let left = Self::normalize_pattern(*left, input_types); let left = Self::normalize_pattern(*left, input_types);
let right = Self::normalize_pattern(*right, input_types); let right = Self::normalize_pattern(*right, input_types);
@ -62,6 +66,7 @@ impl Optimizer {
left, left,
right, right,
Self::normalize_expression(expression, &inner_types), Self::normalize_expression(expression, &inner_types),
algorithm,
) )
} }
#[cfg(feature = "sep-0006")] #[cfg(feature = "sep-0006")]
@ -103,9 +108,14 @@ impl Optimizer {
GraphPattern::extend(inner, variable, expression) GraphPattern::extend(inner, variable, expression)
} }
} }
GraphPattern::Minus { left, right } => GraphPattern::minus( GraphPattern::Minus {
left,
right,
algorithm,
} => GraphPattern::minus(
Self::normalize_pattern(*left, input_types), Self::normalize_pattern(*left, input_types),
Self::normalize_pattern(*right, input_types), Self::normalize_pattern(*right, input_types),
algorithm,
), ),
GraphPattern::Values { GraphPattern::Values {
variables, variables,
@ -336,6 +346,7 @@ impl Optimizer {
left, left,
right, right,
expression, expression,
algorithm,
} => { } => {
let left_types = infer_graph_pattern_types(&left, input_types.clone()); let left_types = infer_graph_pattern_types(&left, input_types.clone());
let right_types = infer_graph_pattern_types(&right, input_types.clone()); let right_types = infer_graph_pattern_types(&right, input_types.clone());
@ -364,13 +375,19 @@ impl Optimizer {
Self::push_filters(*left, left_filters, input_types), Self::push_filters(*left, left_filters, input_types),
Self::push_filters(*right, right_filters, input_types), Self::push_filters(*right, right_filters, input_types),
expression, expression,
algorithm,
), ),
Expression::and_all(final_filters), Expression::and_all(final_filters),
) )
} }
GraphPattern::Minus { left, right } => GraphPattern::minus( GraphPattern::Minus {
left,
right,
algorithm,
} => GraphPattern::minus(
Self::push_filters(*left, filters, input_types), Self::push_filters(*left, filters, input_types),
Self::push_filters(*right, Vec::new(), input_types), Self::push_filters(*right, Vec::new(), input_types),
algorithm,
), ),
GraphPattern::Extend { GraphPattern::Extend {
inner, inner,
@ -503,11 +520,7 @@ impl Optimizer {
.enumerate() .enumerate()
.filter_map(|(i, v)| v.then(|| i)) .filter_map(|(i, v)| v.then(|| i))
.filter(|i| { .filter(|i| {
count_common_variables( has_common_variables(&output_types, &to_reorder_types[*i], input_types)
&output_types,
&to_reorder_types[*i],
input_types,
) > 0
}) })
.min_by_key(|i| { .min_by_key(|i| {
// Estimation of the join cost // Estimation of the join cost
@ -527,10 +540,14 @@ impl Optimizer {
} else { } else {
estimate_join_cost( estimate_join_cost(
&output, &output,
&output_types,
&to_reorder[*i], &to_reorder[*i],
&to_reorder_types[*i], &JoinAlgorithm::HashBuildLeftProbeRight {
JoinAlgorithm::HashBuildLeftProbeRight, keys: join_key_variables(
&output_types,
&to_reorder_types[*i],
input_types,
),
},
input_types, input_types,
) )
} }
@ -547,7 +564,13 @@ impl Optimizer {
GraphPattern::join( GraphPattern::join(
output, output,
next, next,
JoinAlgorithm::HashBuildLeftProbeRight, JoinAlgorithm::HashBuildLeftProbeRight {
keys: join_key_variables(
&output_types,
&to_reorder_types[next_id],
input_types,
),
},
) )
}; };
} }
@ -556,7 +579,13 @@ impl Optimizer {
output = GraphPattern::join( output = GraphPattern::join(
output, output,
next, next,
JoinAlgorithm::HashBuildLeftProbeRight, JoinAlgorithm::HashBuildLeftProbeRight {
keys: join_key_variables(
&output_types,
&to_reorder_types[next_id],
input_types,
),
},
); );
} }
output_types.intersect_with(to_reorder_types[next_id].clone()); output_types.intersect_with(to_reorder_types[next_id].clone());
@ -566,12 +595,25 @@ impl Optimizer {
output_cartesian_product_joins output_cartesian_product_joins
.into_iter() .into_iter()
.reduce(|left, right| { .reduce(|left, right| {
let keys = join_key_variables(
&infer_graph_pattern_types(&left, input_types.clone()),
&infer_graph_pattern_types(&right, input_types.clone()),
input_types,
);
if estimate_graph_pattern_size(&left, input_types) if estimate_graph_pattern_size(&left, input_types)
<= estimate_graph_pattern_size(&right, input_types) <= estimate_graph_pattern_size(&right, input_types)
{ {
GraphPattern::join(left, right, JoinAlgorithm::HashBuildLeftProbeRight) GraphPattern::join(
left,
right,
JoinAlgorithm::HashBuildLeftProbeRight { keys },
)
} else { } else {
GraphPattern::join(right, left, JoinAlgorithm::HashBuildLeftProbeRight) GraphPattern::join(
right,
left,
JoinAlgorithm::HashBuildLeftProbeRight { keys },
)
} }
}) })
.unwrap() .unwrap()
@ -588,15 +630,16 @@ impl Optimizer {
left, left,
right, right,
expression, expression,
..
} => { } => {
let left = Self::reorder_joins(*left, input_types); let left = Self::reorder_joins(*left, input_types);
let left_types = infer_graph_pattern_types(&left, input_types.clone());
let right = Self::reorder_joins(*right, input_types); let right = Self::reorder_joins(*right, input_types);
let right_types = infer_graph_pattern_types(&right, input_types.clone());
#[cfg(feature = "sep-0006")] #[cfg(feature = "sep-0006")]
{ {
let left_types = infer_graph_pattern_types(&left, input_types.clone());
let right_types = infer_graph_pattern_types(&right, input_types.clone());
if is_fit_for_for_loop_join(&right, input_types, &left_types) if is_fit_for_for_loop_join(&right, input_types, &left_types)
&& count_common_variables(&left_types, &right_types, input_types) > 0 && has_common_variables(&left_types, &right_types, input_types)
{ {
return GraphPattern::lateral( return GraphPattern::lateral(
left, left,
@ -604,16 +647,33 @@ impl Optimizer {
GraphPattern::empty_singleton(), GraphPattern::empty_singleton(),
right, right,
expression, expression,
LeftJoinAlgorithm::HashBuildRightProbeLeft { keys: Vec::new() },
), ),
); );
} }
} }
GraphPattern::left_join(left, right, expression) GraphPattern::left_join(
left,
right,
expression,
LeftJoinAlgorithm::HashBuildRightProbeLeft {
keys: join_key_variables(&left_types, &right_types, input_types),
},
)
}
GraphPattern::Minus { left, right, .. } => {
let left = Self::reorder_joins(*left, input_types);
let left_types = infer_graph_pattern_types(&left, input_types.clone());
let right = Self::reorder_joins(*right, input_types);
let right_types = infer_graph_pattern_types(&right, input_types.clone());
GraphPattern::minus(
left,
right,
MinusAlgorithm::HashBuildRightProbeLeft {
keys: join_key_variables(&left_types, &right_types, input_types),
},
)
} }
GraphPattern::Minus { left, right } => GraphPattern::minus(
Self::reorder_joins(*left, input_types),
Self::reorder_joins(*right, input_types),
),
GraphPattern::Extend { GraphPattern::Extend {
inner, inner,
expression, expression,
@ -685,6 +745,7 @@ fn is_fit_for_for_loop_join(
left, left,
right, right,
expression, expression,
..
} => { } => {
if !is_fit_for_for_loop_join(left, global_input_types, entry_types) { if !is_fit_for_for_loop_join(left, global_input_types, entry_types) {
return false; return false;
@ -802,17 +863,28 @@ fn is_expression_fit_for_for_loop_join(
} }
} }
fn count_common_variables( fn has_common_variables(
left: &VariableTypes, left: &VariableTypes,
right: &VariableTypes, right: &VariableTypes,
input_types: &VariableTypes, input_types: &VariableTypes,
) -> usize { ) -> bool {
// TODO: we should be smart and count as shared variables FILTER(?a = ?b) // TODO: we should be smart and count as shared variables FILTER(?a = ?b)
left.iter().any(|(variable, left_type)| {
!left_type.undef && !right.get(variable).undef && input_types.get(variable).undef
})
}
fn join_key_variables(
left: &VariableTypes,
right: &VariableTypes,
input_types: &VariableTypes,
) -> Vec<Variable> {
left.iter() left.iter()
.filter(|(variable, left_type)| { .filter(|(variable, left_type)| {
!left_type.undef && !right.get(variable).undef && input_types.get(variable).undef !left_type.undef && !right.get(variable).undef && input_types.get(variable).undef
}) })
.count() .map(|(variable, _)| variable.clone())
.collect()
} }
fn estimate_graph_pattern_size(pattern: &GraphPattern, input_types: &VariableTypes) -> usize { fn estimate_graph_pattern_size(pattern: &GraphPattern, input_types: &VariableTypes) -> usize {
@ -842,35 +914,26 @@ fn estimate_graph_pattern_size(pattern: &GraphPattern, input_types: &VariableTyp
left, left,
right, right,
algorithm, algorithm,
} => { } => estimate_join_cost(left, right, algorithm, input_types),
let left_types = infer_graph_pattern_types(left, input_types.clone()); GraphPattern::LeftJoin {
let right_types = infer_graph_pattern_types(right, input_types.clone()); left,
estimate_join_cost( right,
left, algorithm,
&left_types, ..
right, } => match algorithm {
&right_types, LeftJoinAlgorithm::HashBuildRightProbeLeft { keys } => {
*algorithm, let left_size = estimate_graph_pattern_size(left, input_types);
input_types, max(
) left_size,
} left_size
GraphPattern::LeftJoin { left, right, .. } => { .saturating_mul(estimate_graph_pattern_size(
let left_size = estimate_graph_pattern_size(left, input_types); right,
let left_types = infer_graph_pattern_types(left, input_types.clone()); &infer_graph_pattern_types(right, input_types.clone()),
let right_types = infer_graph_pattern_types(right, input_types.clone()); ))
max( .saturating_div(1_000_usize.saturating_pow(keys.len().try_into().unwrap())),
left_size, )
left_size }
.saturating_mul(estimate_graph_pattern_size(right, &right_types)) },
.saturating_div(
1_000_usize.saturating_pow(
count_common_variables(&left_types, &right_types, input_types)
.try_into()
.unwrap(),
),
),
)
}
#[cfg(feature = "sep-0006")] #[cfg(feature = "sep-0006")]
GraphPattern::Lateral { left, right } => estimate_lateral_cost( GraphPattern::Lateral { left, right } => estimate_lateral_cost(
left, left,
@ -908,22 +971,16 @@ fn estimate_graph_pattern_size(pattern: &GraphPattern, input_types: &VariableTyp
fn estimate_join_cost( fn estimate_join_cost(
left: &GraphPattern, left: &GraphPattern,
left_types: &VariableTypes,
right: &GraphPattern, right: &GraphPattern,
right_types: &VariableTypes, algorithm: &JoinAlgorithm,
algorithm: JoinAlgorithm,
input_types: &VariableTypes, input_types: &VariableTypes,
) -> usize { ) -> usize {
match algorithm { match algorithm {
JoinAlgorithm::HashBuildLeftProbeRight => estimate_graph_pattern_size(left, input_types) JoinAlgorithm::HashBuildLeftProbeRight { keys } => {
.saturating_mul(estimate_graph_pattern_size(right, input_types)) estimate_graph_pattern_size(left, input_types)
.saturating_div( .saturating_mul(estimate_graph_pattern_size(right, input_types))
1_000_usize.saturating_pow( .saturating_div(1_000_usize.saturating_pow(keys.len().try_into().unwrap()))
count_common_variables(left_types, right_types, input_types) }
.try_into()
.unwrap(),
),
),
} }
} }
fn estimate_lateral_cost( fn estimate_lateral_cost(

@ -124,11 +124,20 @@ pub fn infer_graph_pattern_types(
} }
types types
} }
GraphPattern::Service { name, inner, .. } => { GraphPattern::Service {
name,
inner,
silent,
} => {
let parent_types = types.clone();
let mut types = infer_graph_pattern_types(inner, types); let mut types = infer_graph_pattern_types(inner, types);
if let NamedNodePattern::Variable(v) = name { if let NamedNodePattern::Variable(v) = name {
types.intersect_variable_with(v.clone(), VariableType::NAMED_NODE) types.intersect_variable_with(v.clone(), VariableType::NAMED_NODE)
} }
if *silent {
// On failure, single empty solution
types.union_with(parent_types);
}
types types
} }
} }

@ -380,12 +380,9 @@ impl SimpleEvaluator {
PlanNode::HashJoin { PlanNode::HashJoin {
probe_child, probe_child,
build_child, build_child,
keys,
} => { } => {
let join_keys: Vec<_> = probe_child let join_keys = keys.iter().map(|v| v.encoded).collect::<Vec<_>>();
.always_bound_variables()
.intersection(&build_child.always_bound_variables())
.copied()
.collect();
let (probe, probe_stats) = self.plan_evaluator(probe_child); let (probe, probe_stats) = self.plan_evaluator(probe_child);
stat_children.push(probe_stats); stat_children.push(probe_stats);
let (build, build_stats) = self.plan_evaluator(build_child); let (build, build_stats) = self.plan_evaluator(build_child);
@ -444,12 +441,8 @@ impl SimpleEvaluator {
})) }))
}) })
} }
PlanNode::AntiJoin { left, right } => { PlanNode::AntiJoin { left, right, keys } => {
let join_keys: Vec<_> = left let join_keys = keys.iter().map(|v| v.encoded).collect::<Vec<_>>();
.always_bound_variables()
.intersection(&right.always_bound_variables())
.copied()
.collect();
let (left, left_stats) = self.plan_evaluator(left); let (left, left_stats) = self.plan_evaluator(left);
stat_children.push(left_stats); stat_children.push(left_stats);
let (right, right_stats) = self.plan_evaluator(right); let (right, right_stats) = self.plan_evaluator(right);
@ -487,12 +480,9 @@ impl SimpleEvaluator {
left, left,
right, right,
expression, expression,
keys,
} => { } => {
let join_keys: Vec<_> = left let join_keys = keys.iter().map(|v| v.encoded).collect::<Vec<_>>();
.always_bound_variables()
.intersection(&right.always_bound_variables())
.copied()
.collect();
let (left, left_stats) = self.plan_evaluator(left); let (left, left_stats) = self.plan_evaluator(left);
stat_children.push(left_stats); stat_children.push(left_stats);
let (right, right_stats) = self.plan_evaluator(right); let (right, right_stats) = self.plan_evaluator(right);

@ -4,9 +4,7 @@ use crate::storage::numeric_encoder::EncodedTerm;
use regex::Regex; use regex::Regex;
use spargebra::algebra::GraphPattern; use spargebra::algebra::GraphPattern;
use spargebra::term::GroundTerm; use spargebra::term::GroundTerm;
use std::cmp::max; use std::collections::BTreeSet;
use std::collections::btree_map::Entry;
use std::collections::{BTreeMap, BTreeSet};
use std::fmt; use std::fmt;
use std::rc::Rc; use std::rc::Rc;
@ -40,6 +38,7 @@ pub enum PlanNode {
HashJoin { HashJoin {
probe_child: Rc<Self>, probe_child: Rc<Self>,
build_child: Rc<Self>, build_child: Rc<Self>,
keys: Vec<PlanVariable>,
}, },
/// Right nested in left loop /// Right nested in left loop
ForLoopJoin { ForLoopJoin {
@ -50,6 +49,7 @@ pub enum PlanNode {
AntiJoin { AntiJoin {
left: Rc<Self>, left: Rc<Self>,
right: Rc<Self>, right: Rc<Self>,
keys: Vec<PlanVariable>,
}, },
Filter { Filter {
child: Rc<Self>, child: Rc<Self>,
@ -63,6 +63,7 @@ pub enum PlanNode {
left: Rc<Self>, left: Rc<Self>,
right: Rc<Self>, right: Rc<Self>,
expression: Box<PlanExpression>, expression: Box<PlanExpression>,
keys: Vec<PlanVariable>,
}, },
/// right nested in left loop /// right nested in left loop
ForLoopLeftJoin { ForLoopLeftJoin {
@ -159,9 +160,10 @@ impl PlanNode {
Self::HashJoin { Self::HashJoin {
probe_child: left, probe_child: left,
build_child: right, build_child: right,
..
} }
| Self::ForLoopJoin { left, right, .. } | Self::ForLoopJoin { left, right, .. }
| Self::AntiJoin { left, right } | Self::AntiJoin { left, right, .. }
| Self::ForLoopLeftJoin { left, right, .. } => { | Self::ForLoopLeftJoin { left, right, .. } => {
left.lookup_used_variables(callback); left.lookup_used_variables(callback);
right.lookup_used_variables(callback); right.lookup_used_variables(callback);
@ -170,6 +172,7 @@ impl PlanNode {
left, left,
right, right,
expression, expression,
..
} => { } => {
left.lookup_used_variables(callback); left.lookup_used_variables(callback);
right.lookup_used_variables(callback); right.lookup_used_variables(callback);
@ -219,134 +222,6 @@ impl PlanNode {
} }
} }
} }
/// Returns subset of the set of variables that are always bound in the result set
///
/// (subset because this function is not perfect yet)
pub fn always_bound_variables(&self) -> BTreeSet<usize> {
let mut set = BTreeSet::default();
self.lookup_always_bound_variables(&mut |v| {
set.insert(v);
});
set
}
pub fn lookup_always_bound_variables(&self, callback: &mut impl FnMut(usize)) {
match self {
Self::StaticBindings { encoded_tuples, .. } => {
let mut variables = BTreeMap::default(); // value true iff always bound
let max_tuple_length = encoded_tuples
.iter()
.map(EncodedTuple::capacity)
.fold(0, max);
for tuple in encoded_tuples {
for key in 0..max_tuple_length {
match variables.entry(key) {
Entry::Vacant(e) => {
e.insert(tuple.contains(key));
}
Entry::Occupied(mut e) => {
if !tuple.contains(key) {
e.insert(false);
}
}
}
}
}
for (k, v) in variables {
if v {
callback(k);
}
}
}
Self::QuadPattern {
subject,
predicate,
object,
graph_name,
} => {
subject.lookup_variables(callback);
predicate.lookup_variables(callback);
object.lookup_variables(callback);
graph_name.lookup_variables(callback);
}
Self::PathPattern {
subject,
object,
graph_name,
..
} => {
subject.lookup_variables(callback);
object.lookup_variables(callback);
graph_name.lookup_variables(callback);
}
Self::Filter { child, .. } => {
//TODO: have a look at the expression to know if it filters out unbound variables
child.lookup_always_bound_variables(callback);
}
Self::Union { children } => {
if let Some(vars) = children
.iter()
.map(|c| c.always_bound_variables())
.reduce(|a, b| a.intersection(&b).copied().collect())
{
for v in vars {
callback(v);
}
}
}
Self::HashJoin {
probe_child: left,
build_child: right,
}
| Self::ForLoopJoin { left, right, .. } => {
left.lookup_always_bound_variables(callback);
right.lookup_always_bound_variables(callback);
}
Self::AntiJoin { left, .. }
| Self::HashLeftJoin { left, .. }
| Self::ForLoopLeftJoin { left, .. } => {
left.lookup_always_bound_variables(callback);
}
Self::Extend {
child,
variable,
expression,
} => {
if matches!(
expression.as_ref(),
PlanExpression::NamedNode(_) | PlanExpression::Literal(_)
) {
// TODO: more cases?
callback(variable.encoded);
}
child.lookup_always_bound_variables(callback);
}
Self::Sort { child, .. }
| Self::HashDeduplicate { child }
| Self::Reduced { child }
| Self::Skip { child, .. }
| Self::Limit { child, .. } => child.lookup_always_bound_variables(callback),
Self::Service { child, silent, .. } => {
if *silent {
// none, might return a null tuple
} else {
child.lookup_always_bound_variables(callback)
}
}
Self::Project { mapping, child } => {
let child_bound = child.always_bound_variables();
for (child_i, output_i) in mapping.iter() {
if child_bound.contains(&child_i.encoded) {
callback(output_i.encoded);
}
}
}
Self::Aggregate { .. } => {
//TODO
}
}
}
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]

@ -106,25 +106,37 @@ impl<'a> PlanBuilder<'a> {
right, right,
algorithm, algorithm,
} => match algorithm { } => match algorithm {
JoinAlgorithm::HashBuildLeftProbeRight => PlanNode::HashJoin { JoinAlgorithm::HashBuildLeftProbeRight { keys } => PlanNode::HashJoin {
build_child: Rc::new(self.build_for_graph_pattern(left, variables)?), build_child: Rc::new(self.build_for_graph_pattern(left, variables)?),
probe_child: Rc::new(self.build_for_graph_pattern(right, variables)?), probe_child: Rc::new(self.build_for_graph_pattern(right, variables)?),
keys: keys
.iter()
.map(|v| build_plan_variable(variables, v))
.collect(),
}, },
}, },
GraphPattern::LeftJoin { GraphPattern::LeftJoin {
left, left,
right, right,
expression, expression,
} => PlanNode::HashLeftJoin { algorithm,
left: Rc::new(self.build_for_graph_pattern(left, variables)?), } => match algorithm {
right: Rc::new(self.build_for_graph_pattern(right, variables)?), LeftJoinAlgorithm::HashBuildRightProbeLeft { keys } => PlanNode::HashLeftJoin {
expression: Box::new(self.build_for_expression(expression, variables)?), left: Rc::new(self.build_for_graph_pattern(left, variables)?),
right: Rc::new(self.build_for_graph_pattern(right, variables)?),
expression: Box::new(self.build_for_expression(expression, variables)?),
keys: keys
.iter()
.map(|v| build_plan_variable(variables, v))
.collect(),
},
}, },
GraphPattern::Lateral { left, right } => { GraphPattern::Lateral { left, right } => {
if let GraphPattern::LeftJoin { if let GraphPattern::LeftJoin {
left: nested_left, left: nested_left,
right: nested_right, right: nested_right,
expression, expression,
..
} = right.as_ref() } = right.as_ref()
{ {
if nested_left.is_empty_singleton() { if nested_left.is_empty_singleton() {
@ -167,9 +179,19 @@ impl<'a> PlanBuilder<'a> {
variable: build_plan_variable(variables, variable), variable: build_plan_variable(variables, variable),
expression: Box::new(self.build_for_expression(expression, variables)?), expression: Box::new(self.build_for_expression(expression, variables)?),
}, },
GraphPattern::Minus { left, right } => PlanNode::AntiJoin { GraphPattern::Minus {
left: Rc::new(self.build_for_graph_pattern(left, variables)?), left,
right: Rc::new(self.build_for_graph_pattern(right, variables)?), right,
algorithm,
} => match algorithm {
MinusAlgorithm::HashBuildRightProbeLeft { keys } => PlanNode::AntiJoin {
left: Rc::new(self.build_for_graph_pattern(left, variables)?),
right: Rc::new(self.build_for_graph_pattern(right, variables)?),
keys: keys
.iter()
.map(|v| build_plan_variable(variables, v))
.collect(),
},
}, },
GraphPattern::Service { GraphPattern::Service {
name, name,

Loading…
Cancel
Save