use crate::algebra::{ Expression, GraphPattern, JoinAlgorithm, LeftJoinAlgorithm, MinusAlgorithm, OrderExpression, }; use crate::type_inference::{ infer_expression_type, infer_graph_pattern_types, VariableType, VariableTypes, }; use oxrdf::Variable; use spargebra::algebra::PropertyPathExpression; use spargebra::term::{GroundTermPattern, NamedNodePattern}; use std::cmp::{max, min}; pub struct Optimizer; impl Optimizer { pub fn optimize_graph_pattern(pattern: GraphPattern) -> GraphPattern { let pattern = Self::normalize_pattern(pattern, &VariableTypes::default()); let pattern = Self::reorder_joins(pattern, &VariableTypes::default()); Self::push_filters(pattern, Vec::new(), &VariableTypes::default()) } /// Normalize the pattern, discarding any join ordering information fn normalize_pattern(pattern: GraphPattern, input_types: &VariableTypes) -> GraphPattern { match pattern { GraphPattern::QuadPattern { subject, predicate, object, graph_name, } => GraphPattern::QuadPattern { subject, predicate, object, graph_name, }, GraphPattern::Path { subject, path, object, graph_name, } => GraphPattern::Path { subject, path, object, graph_name, }, GraphPattern::Join { left, right, algorithm, } => GraphPattern::join( Self::normalize_pattern(*left, input_types), Self::normalize_pattern(*right, input_types), algorithm, ), GraphPattern::LeftJoin { left, right, expression, algorithm, } => { let left = Self::normalize_pattern(*left, input_types); let right = Self::normalize_pattern(*right, input_types); let mut inner_types = infer_graph_pattern_types(&left, input_types.clone()); inner_types.intersect_with(infer_graph_pattern_types(&right, input_types.clone())); GraphPattern::left_join( left, right, Self::normalize_expression(expression, &inner_types), algorithm, ) } #[cfg(feature = "sep-0006")] GraphPattern::Lateral { left, right } => { let left = Self::normalize_pattern(*left, input_types); let left_types = infer_graph_pattern_types(&left, input_types.clone()); let right = Self::normalize_pattern(*right, &left_types); GraphPattern::lateral(left, right) } GraphPattern::Filter { inner, expression } => { let inner = Self::normalize_pattern(*inner, input_types); let inner_types = infer_graph_pattern_types(&inner, input_types.clone()); let expression = Self::normalize_expression(expression, &inner_types); let expression_type = infer_expression_type(&expression, &inner_types); if expression_type == VariableType::UNDEF { GraphPattern::empty() } else { GraphPattern::filter(inner, expression) } } GraphPattern::Union { inner } => GraphPattern::union_all( inner .into_iter() .map(|e| Self::normalize_pattern(e, input_types)), ), GraphPattern::Extend { inner, variable, expression, } => { let inner = Self::normalize_pattern(*inner, input_types); let inner_types = infer_graph_pattern_types(&inner, input_types.clone()); let expression = Self::normalize_expression(expression, &inner_types); let expression_type = infer_expression_type(&expression, &inner_types); if expression_type == VariableType::UNDEF { // TODO: valid? inner } else { GraphPattern::extend(inner, variable, expression) } } GraphPattern::Minus { left, right, algorithm, } => GraphPattern::minus( Self::normalize_pattern(*left, input_types), Self::normalize_pattern(*right, input_types), algorithm, ), GraphPattern::Values { variables, bindings, } => GraphPattern::values(variables, bindings), GraphPattern::OrderBy { inner, expression } => { let inner = Self::normalize_pattern(*inner, input_types); let inner_types = infer_graph_pattern_types(&inner, input_types.clone()); GraphPattern::order_by( inner, expression .into_iter() .map(|e| match e { OrderExpression::Asc(e) => { OrderExpression::Asc(Self::normalize_expression(e, &inner_types)) } OrderExpression::Desc(e) => { OrderExpression::Desc(Self::normalize_expression(e, &inner_types)) } }) .collect(), ) } GraphPattern::Project { inner, variables } => { GraphPattern::project(Self::normalize_pattern(*inner, input_types), variables) } GraphPattern::Distinct { inner } => { GraphPattern::distinct(Self::normalize_pattern(*inner, input_types)) } GraphPattern::Reduced { inner } => { GraphPattern::reduced(Self::normalize_pattern(*inner, input_types)) } GraphPattern::Slice { inner, start, length, } => GraphPattern::slice(Self::normalize_pattern(*inner, input_types), start, length), GraphPattern::Group { inner, variables, aggregates, } => { // TODO: min, max and sample don't care about DISTINCT GraphPattern::group( Self::normalize_pattern(*inner, input_types), variables, aggregates, ) } GraphPattern::Service { name, inner, silent, } => GraphPattern::service(Self::normalize_pattern(*inner, input_types), name, silent), } } fn normalize_expression(expression: Expression, types: &VariableTypes) -> Expression { match expression { Expression::NamedNode(node) => node.into(), Expression::Literal(literal) => literal.into(), Expression::Variable(variable) => variable.into(), Expression::Or(inner) => Expression::or_all( inner .into_iter() .map(|e| Self::normalize_expression(e, types)), ), Expression::And(inner) => Expression::and_all( inner .into_iter() .map(|e| Self::normalize_expression(e, types)), ), Expression::Equal(left, right) => { let left = Self::normalize_expression(*left, types); let left_types = infer_expression_type(&left, types); let right = Self::normalize_expression(*right, types); let right_types = infer_expression_type(&right, types); #[allow(unused_mut)] let mut must_use_equal = left_types.literal && right_types.literal; #[cfg(feature = "rdf-star")] { must_use_equal = must_use_equal || left_types.triple && right_types.triple; } if must_use_equal { Expression::equal(left, right) } else { Expression::same_term(left, right) } } Expression::SameTerm(left, right) => Expression::same_term( Self::normalize_expression(*left, types), Self::normalize_expression(*right, types), ), Expression::Greater(left, right) => Expression::greater( Self::normalize_expression(*left, types), Self::normalize_expression(*right, types), ), Expression::GreaterOrEqual(left, right) => Expression::greater_or_equal( Self::normalize_expression(*left, types), Self::normalize_expression(*right, types), ), Expression::Less(left, right) => Expression::less( Self::normalize_expression(*left, types), Self::normalize_expression(*right, types), ), Expression::LessOrEqual(left, right) => Expression::less_or_equal( Self::normalize_expression(*left, types), Self::normalize_expression(*right, types), ), Expression::Add(left, right) => { Self::normalize_expression(*left, types) + Self::normalize_expression(*right, types) } Expression::Subtract(left, right) => { Self::normalize_expression(*left, types) - Self::normalize_expression(*right, types) } Expression::Multiply(left, right) => { Self::normalize_expression(*left, types) * Self::normalize_expression(*right, types) } Expression::Divide(left, right) => { Self::normalize_expression(*left, types) / Self::normalize_expression(*right, types) } Expression::UnaryPlus(inner) => { Expression::unary_plus(Self::normalize_expression(*inner, types)) } Expression::UnaryMinus(inner) => -Self::normalize_expression(*inner, types), Expression::Not(inner) => !Self::normalize_expression(*inner, types), Expression::Exists(inner) => Expression::exists(Self::normalize_pattern(*inner, types)), Expression::Bound(variable) => { let t = types.get(&variable); if !t.undef { true.into() } else if t == VariableType::UNDEF { false.into() } else { Expression::Bound(variable) } } Expression::If(cond, then, els) => Expression::if_cond( Self::normalize_expression(*cond, types), Self::normalize_expression(*then, types), Self::normalize_expression(*els, types), ), Expression::Coalesce(inners) => Expression::coalesce( inners .into_iter() .map(|e| Self::normalize_expression(e, types)) .collect(), ), Expression::FunctionCall(name, args) => Expression::call( name, args.into_iter() .map(|e| Self::normalize_expression(e, types)) .collect(), ), } } fn push_filters( pattern: GraphPattern, mut filters: Vec, input_types: &VariableTypes, ) -> GraphPattern { match pattern { GraphPattern::QuadPattern { .. } | GraphPattern::Path { .. } | GraphPattern::Values { .. } => { GraphPattern::filter(pattern, Expression::and_all(filters)) } GraphPattern::Join { left, right, algorithm, } => { let left_types = infer_graph_pattern_types(&left, input_types.clone()); let right_types = infer_graph_pattern_types(&right, input_types.clone()); let mut left_filters = Vec::new(); let mut right_filters = Vec::new(); let mut final_filters = Vec::new(); for filter in filters { let push_left = are_all_expression_variables_bound(&filter, &left_types); let push_right = are_all_expression_variables_bound(&filter, &right_types); if push_left { if push_right { left_filters.push(filter.clone()); right_filters.push(filter); } else { left_filters.push(filter); } } else if push_right { right_filters.push(filter); } else { final_filters.push(filter); } } GraphPattern::filter( GraphPattern::join( Self::push_filters(*left, left_filters, input_types), Self::push_filters(*right, right_filters, input_types), algorithm, ), Expression::and_all(final_filters), ) } #[cfg(feature = "sep-0006")] GraphPattern::Lateral { left, right } => { let left_types = infer_graph_pattern_types(&left, input_types.clone()); let mut left_filters = Vec::new(); let mut right_filters = Vec::new(); for filter in filters { let push_left = are_all_expression_variables_bound(&filter, &left_types); if push_left { left_filters.push(filter); } else { right_filters.push(filter); } } let left = Self::push_filters(*left, left_filters, input_types); let right = Self::push_filters(*right, right_filters, &left_types); if let GraphPattern::Filter { inner: right, expression, } = right { // We prefer to have filter out of the lateral rather than inside the right part GraphPattern::filter(GraphPattern::lateral(left, *right), expression) } else { GraphPattern::lateral(left, right) } } GraphPattern::LeftJoin { left, right, expression, algorithm, } => { let left_types = infer_graph_pattern_types(&left, input_types.clone()); let right_types = infer_graph_pattern_types(&right, input_types.clone()); let mut left_filters = Vec::new(); let mut right_filters = Vec::new(); let mut final_filters = Vec::new(); for filter in filters { let push_left = are_all_expression_variables_bound(&filter, &left_types); if push_left { left_filters.push(filter); } else { final_filters.push(filter); } } let expression = if expression.effective_boolean_value().is_none() && (are_all_expression_variables_bound(&expression, &right_types) || are_no_expression_variables_bound(&expression, &left_types)) { right_filters.push(expression); true.into() } else { expression }; GraphPattern::filter( GraphPattern::left_join( Self::push_filters(*left, left_filters, input_types), Self::push_filters(*right, right_filters, input_types), expression, algorithm, ), Expression::and_all(final_filters), ) } GraphPattern::Minus { left, right, algorithm, } => GraphPattern::minus( Self::push_filters(*left, filters, input_types), Self::push_filters(*right, Vec::new(), input_types), algorithm, ), GraphPattern::Extend { inner, expression, variable, } => { // TODO: handle the case where the filter overrides an expression variable (should not happen in SPARQL but allowed in the algebra) let mut inner_filters = Vec::new(); let mut final_filters = Vec::new(); for filter in filters { let extend_variable_used = filter.used_variables().into_iter().any(|v| *v == variable); if extend_variable_used { final_filters.push(filter); } else { inner_filters.push(filter); } } GraphPattern::filter( GraphPattern::extend( Self::push_filters(*inner, inner_filters, input_types), variable, expression, ), Expression::and_all(final_filters), ) } GraphPattern::Filter { inner, expression } => { if let Expression::And(expressions) = expression { filters.extend(expressions) } else { filters.push(expression) }; Self::push_filters(*inner, filters, input_types) } GraphPattern::Union { inner } => GraphPattern::union_all( inner .into_iter() .map(|c| Self::push_filters(c, filters.clone(), input_types)), ), GraphPattern::Slice { inner, start, length, } => GraphPattern::filter( GraphPattern::slice( Self::push_filters(*inner, Vec::new(), input_types), start, length, ), Expression::and_all(filters), ), GraphPattern::Distinct { inner } => { GraphPattern::distinct(Self::push_filters(*inner, filters, input_types)) } GraphPattern::Reduced { inner } => { GraphPattern::reduced(Self::push_filters(*inner, filters, input_types)) } GraphPattern::Project { inner, variables } => { GraphPattern::project(Self::push_filters(*inner, filters, input_types), variables) } GraphPattern::OrderBy { inner, expression } => { GraphPattern::order_by(Self::push_filters(*inner, filters, input_types), expression) } GraphPattern::Service { inner, name, silent, } => GraphPattern::service( Self::push_filters(*inner, filters, input_types), name, silent, ), GraphPattern::Group { inner, variables, aggregates, } => GraphPattern::filter( GraphPattern::group( Self::push_filters(*inner, Vec::new(), input_types), variables, aggregates, ), Expression::and_all(filters), ), } } fn reorder_joins(pattern: GraphPattern, input_types: &VariableTypes) -> GraphPattern { match pattern { GraphPattern::QuadPattern { .. } | GraphPattern::Path { .. } | GraphPattern::Values { .. } => pattern, GraphPattern::Join { left, right, .. } => { // We flatten the join operation let mut to_reorder = Vec::new(); let mut todo = vec![*right, *left]; while let Some(e) = todo.pop() { if let GraphPattern::Join { left, right, .. } = e { todo.push(*right); todo.push(*left); } else { to_reorder.push(e); } } // We do first type inference let to_reorder_types = to_reorder .iter() .map(|p| infer_graph_pattern_types(p, input_types.clone())) .collect::>(); // We do greedy join reordering let mut output_cartesian_product_joins = Vec::new(); let mut not_yet_reordered_ids = vec![true; to_reorder.len()]; // We look for the next connected component to reorder and pick the smallest element while let Some(next_entry_id) = not_yet_reordered_ids .iter() .enumerate() .filter(|(_, v)| **v) .map(|(i, _)| i) .min_by_key(|i| estimate_graph_pattern_size(&to_reorder[*i], input_types)) { not_yet_reordered_ids[next_entry_id] = false; // It's now done let mut output = to_reorder[next_entry_id].clone(); let mut output_types = to_reorder_types[next_entry_id].clone(); // We look for an other child to join with that does not blow up the join cost while let Some(next_id) = not_yet_reordered_ids .iter() .enumerate() .filter(|(_, v)| **v) .map(|(i, _)| i) .filter(|i| { has_common_variables(&output_types, &to_reorder_types[*i], input_types) }) .min_by_key(|i| { // Estimation of the join cost if cfg!(feature = "sep-0006") && is_fit_for_for_loop_join( &to_reorder[*i], input_types, &output_types, ) { estimate_lateral_cost( &output, &output_types, &to_reorder[*i], input_types, ) } else { estimate_join_cost( &output, &to_reorder[*i], &JoinAlgorithm::HashBuildLeftProbeRight { keys: join_key_variables( &output_types, &to_reorder_types[*i], input_types, ), }, input_types, ) } }) { not_yet_reordered_ids[next_id] = false; // It's now done let next = to_reorder[next_id].clone(); #[cfg(feature = "sep-0006")] { output = if is_fit_for_for_loop_join(&next, input_types, &output_types) { GraphPattern::lateral(output, next) } else { GraphPattern::join( output, next, JoinAlgorithm::HashBuildLeftProbeRight { keys: join_key_variables( &output_types, &to_reorder_types[next_id], input_types, ), }, ) }; } #[cfg(not(feature = "sep-0006"))] { output = GraphPattern::join( output, next, JoinAlgorithm::HashBuildLeftProbeRight { keys: join_key_variables( &output_types, &to_reorder_types[next_id], input_types, ), }, ); } output_types.intersect_with(to_reorder_types[next_id].clone()); } output_cartesian_product_joins.push(output); } output_cartesian_product_joins .into_iter() .reduce(|left, right| { let keys = join_key_variables( &infer_graph_pattern_types(&left, input_types.clone()), &infer_graph_pattern_types(&right, input_types.clone()), input_types, ); if estimate_graph_pattern_size(&left, input_types) <= estimate_graph_pattern_size(&right, input_types) { GraphPattern::join( left, right, JoinAlgorithm::HashBuildLeftProbeRight { keys }, ) } else { GraphPattern::join( right, left, JoinAlgorithm::HashBuildLeftProbeRight { keys }, ) } }) .unwrap() } #[cfg(feature = "sep-0006")] GraphPattern::Lateral { left, right } => { let left_types = infer_graph_pattern_types(&left, input_types.clone()); GraphPattern::lateral( Self::reorder_joins(*left, input_types), Self::reorder_joins(*right, &left_types), ) } GraphPattern::LeftJoin { left, right, expression, .. } => { let left = Self::reorder_joins(*left, input_types); let left_types = infer_graph_pattern_types(&left, input_types.clone()); let right = Self::reorder_joins(*right, input_types); let right_types = infer_graph_pattern_types(&right, input_types.clone()); #[cfg(feature = "sep-0006")] { if is_fit_for_for_loop_join(&right, input_types, &left_types) && has_common_variables(&left_types, &right_types, input_types) { return GraphPattern::lateral( left, GraphPattern::left_join( GraphPattern::empty_singleton(), right, expression, LeftJoinAlgorithm::HashBuildRightProbeLeft { keys: Vec::new() }, ), ); } } GraphPattern::left_join( left, right, expression, LeftJoinAlgorithm::HashBuildRightProbeLeft { keys: join_key_variables(&left_types, &right_types, input_types), }, ) } GraphPattern::Minus { left, right, .. } => { let left = Self::reorder_joins(*left, input_types); let left_types = infer_graph_pattern_types(&left, input_types.clone()); let right = Self::reorder_joins(*right, input_types); let right_types = infer_graph_pattern_types(&right, input_types.clone()); GraphPattern::minus( left, right, MinusAlgorithm::HashBuildRightProbeLeft { keys: join_key_variables(&left_types, &right_types, input_types), }, ) } GraphPattern::Extend { inner, expression, variable, } => GraphPattern::extend( Self::reorder_joins(*inner, input_types), variable, expression, ), GraphPattern::Filter { inner, expression } => { GraphPattern::filter(Self::reorder_joins(*inner, input_types), expression) } GraphPattern::Union { inner } => GraphPattern::union_all( inner .into_iter() .map(|c| Self::reorder_joins(c, input_types)), ), GraphPattern::Slice { inner, start, length, } => GraphPattern::slice(Self::reorder_joins(*inner, input_types), start, length), GraphPattern::Distinct { inner } => { GraphPattern::distinct(Self::reorder_joins(*inner, input_types)) } GraphPattern::Reduced { inner } => { GraphPattern::reduced(Self::reorder_joins(*inner, input_types)) } GraphPattern::Project { inner, variables } => { GraphPattern::project(Self::reorder_joins(*inner, input_types), variables) } GraphPattern::OrderBy { inner, expression } => { GraphPattern::order_by(Self::reorder_joins(*inner, input_types), expression) } GraphPattern::Service { inner, name, silent, } => GraphPattern::service(Self::reorder_joins(*inner, input_types), name, silent), GraphPattern::Group { inner, variables, aggregates, } => GraphPattern::group( Self::reorder_joins(*inner, input_types), variables, aggregates, ), } } } fn is_fit_for_for_loop_join( pattern: &GraphPattern, global_input_types: &VariableTypes, entry_types: &VariableTypes, ) -> bool { // TODO: think more about it match pattern { GraphPattern::Values { .. } | GraphPattern::QuadPattern { .. } | GraphPattern::Path { .. } => true, #[cfg(feature = "sep-0006")] GraphPattern::Lateral { left, right } => { is_fit_for_for_loop_join(left, global_input_types, entry_types) && is_fit_for_for_loop_join(right, global_input_types, entry_types) } GraphPattern::LeftJoin { left, right, expression, .. } => { if !is_fit_for_for_loop_join(left, global_input_types, entry_types) { return false; } // It is not ok to transform into for loop join if right binds a variable also bound by the entry part of the for loop join let mut left_types = infer_graph_pattern_types(left, global_input_types.clone()); let right_types = infer_graph_pattern_types(right, global_input_types.clone()); if right_types.iter().any(|(variable, t)| { *t != VariableType::UNDEF && left_types.get(variable).undef && entry_types.get(variable) != VariableType::UNDEF }) { return false; } // We don't forget the final expression left_types.intersect_with(right_types); is_expression_fit_for_for_loop_join(expression, &left_types, entry_types) } GraphPattern::Union { inner } => inner .iter() .all(|i| is_fit_for_for_loop_join(i, global_input_types, entry_types)), GraphPattern::Filter { inner, expression } => { is_fit_for_for_loop_join(inner, global_input_types, entry_types) && is_expression_fit_for_for_loop_join( expression, &infer_graph_pattern_types(inner, global_input_types.clone()), entry_types, ) } GraphPattern::Extend { inner, expression, variable, } => { is_fit_for_for_loop_join(inner, global_input_types, entry_types) && entry_types.get(variable) == VariableType::UNDEF && is_expression_fit_for_for_loop_join( expression, &infer_graph_pattern_types(inner, global_input_types.clone()), entry_types, ) } GraphPattern::Join { .. } | GraphPattern::Minus { .. } | GraphPattern::Service { .. } | GraphPattern::OrderBy { .. } | GraphPattern::Distinct { .. } | GraphPattern::Reduced { .. } | GraphPattern::Slice { .. } | GraphPattern::Project { .. } | GraphPattern::Group { .. } => false, } } fn are_all_expression_variables_bound( expression: &Expression, variable_types: &VariableTypes, ) -> bool { expression .used_variables() .into_iter() .all(|v| !variable_types.get(v).undef) } fn are_no_expression_variables_bound( expression: &Expression, variable_types: &VariableTypes, ) -> bool { expression .used_variables() .into_iter() .all(|v| variable_types.get(v) == VariableType::UNDEF) } fn is_expression_fit_for_for_loop_join( expression: &Expression, input_types: &VariableTypes, entry_types: &VariableTypes, ) -> bool { match expression { Expression::NamedNode(_) | Expression::Literal(_) => true, Expression::Variable(v) | Expression::Bound(v) => { !input_types.get(v).undef || entry_types.get(v) == VariableType::UNDEF } Expression::Or(inner) | Expression::And(inner) | Expression::Coalesce(inner) | Expression::FunctionCall(_, inner) => inner .iter() .all(|e| is_expression_fit_for_for_loop_join(e, input_types, entry_types)), Expression::Equal(a, b) | Expression::SameTerm(a, b) | Expression::Greater(a, b) | Expression::GreaterOrEqual(a, b) | Expression::Less(a, b) | Expression::LessOrEqual(a, b) | Expression::Add(a, b) | Expression::Subtract(a, b) | Expression::Multiply(a, b) | Expression::Divide(a, b) => { is_expression_fit_for_for_loop_join(a, input_types, entry_types) && is_expression_fit_for_for_loop_join(b, input_types, entry_types) } Expression::UnaryPlus(e) | Expression::UnaryMinus(e) | Expression::Not(e) => { is_expression_fit_for_for_loop_join(e, input_types, entry_types) } Expression::If(a, b, c) => { is_expression_fit_for_for_loop_join(a, input_types, entry_types) && is_expression_fit_for_for_loop_join(b, input_types, entry_types) && is_expression_fit_for_for_loop_join(c, input_types, entry_types) } Expression::Exists(inner) => is_fit_for_for_loop_join(inner, input_types, entry_types), } } fn has_common_variables( left: &VariableTypes, right: &VariableTypes, input_types: &VariableTypes, ) -> bool { // TODO: we should be smart and count as shared variables FILTER(?a = ?b) left.iter().any(|(variable, left_type)| { !left_type.undef && !right.get(variable).undef && input_types.get(variable).undef }) } fn join_key_variables( left: &VariableTypes, right: &VariableTypes, input_types: &VariableTypes, ) -> Vec { left.iter() .filter(|(variable, left_type)| { !left_type.undef && !right.get(variable).undef && input_types.get(variable).undef }) .map(|(variable, _)| variable.clone()) .collect() } fn estimate_graph_pattern_size(pattern: &GraphPattern, input_types: &VariableTypes) -> usize { match pattern { GraphPattern::Values { bindings, .. } => bindings.len(), GraphPattern::QuadPattern { subject, predicate, object, .. } => estimate_triple_pattern_size( is_term_pattern_bound(subject, input_types), is_named_node_pattern_bound(predicate, input_types), is_term_pattern_bound(object, input_types), ), GraphPattern::Path { subject, path, object, .. } => estimate_path_size( is_term_pattern_bound(subject, input_types), path, is_term_pattern_bound(object, input_types), ), GraphPattern::Join { left, right, algorithm, } => estimate_join_cost(left, right, algorithm, input_types), GraphPattern::LeftJoin { left, right, algorithm, .. } => match algorithm { LeftJoinAlgorithm::HashBuildRightProbeLeft { keys } => { let left_size = estimate_graph_pattern_size(left, input_types); max( left_size, left_size .saturating_mul(estimate_graph_pattern_size( right, &infer_graph_pattern_types(right, input_types.clone()), )) .saturating_div(1_000_usize.saturating_pow(keys.len().try_into().unwrap())), ) } }, #[cfg(feature = "sep-0006")] GraphPattern::Lateral { left, right } => estimate_lateral_cost( left, &infer_graph_pattern_types(left, input_types.clone()), right, input_types, ), GraphPattern::Union { inner } => inner .iter() .map(|inner| estimate_graph_pattern_size(inner, input_types)) .fold(0, usize::saturating_add), GraphPattern::Minus { left, .. } => estimate_graph_pattern_size(left, input_types), GraphPattern::Filter { inner, .. } | GraphPattern::Extend { inner, .. } | GraphPattern::OrderBy { inner, .. } | GraphPattern::Project { inner, .. } | GraphPattern::Distinct { inner, .. } | GraphPattern::Reduced { inner, .. } | GraphPattern::Group { inner, .. } | GraphPattern::Service { inner, .. } => estimate_graph_pattern_size(inner, input_types), GraphPattern::Slice { inner, start, length, } => { let inner = estimate_graph_pattern_size(inner, input_types); if let Some(length) = length { min(inner, *length - *start) } else { inner } } } } fn estimate_join_cost( left: &GraphPattern, right: &GraphPattern, algorithm: &JoinAlgorithm, input_types: &VariableTypes, ) -> usize { match algorithm { JoinAlgorithm::HashBuildLeftProbeRight { keys } => { estimate_graph_pattern_size(left, input_types) .saturating_mul(estimate_graph_pattern_size(right, input_types)) .saturating_div(1_000_usize.saturating_pow(keys.len().try_into().unwrap())) } } } fn estimate_lateral_cost( left: &GraphPattern, left_types: &VariableTypes, right: &GraphPattern, input_types: &VariableTypes, ) -> usize { estimate_graph_pattern_size(left, input_types) .saturating_mul(estimate_graph_pattern_size(right, left_types)) } fn estimate_triple_pattern_size( subject_bound: bool, predicate_bound: bool, object_bound: bool, ) -> usize { match (subject_bound, predicate_bound, object_bound) { (true, true, true) => 1, (true, true, false) => 10, (true, false, true) => 2, (false, true, true) => 10_000, (true, false, false) => 100, (false, false, false) => 1_000_000_000, (false, true, false) => 1_000_000, (false, false, true) => 100_000, } } fn estimate_path_size(start_bound: bool, path: &PropertyPathExpression, end_bound: bool) -> usize { match path { PropertyPathExpression::NamedNode(_) => { estimate_triple_pattern_size(start_bound, true, end_bound) } PropertyPathExpression::Reverse(p) => estimate_path_size(end_bound, p, start_bound), PropertyPathExpression::Sequence(a, b) => { // We do a for loop join in the best direction min( estimate_path_size(start_bound, a, false) .saturating_mul(estimate_path_size(true, b, end_bound)), estimate_path_size(start_bound, a, true) .saturating_mul(estimate_path_size(false, b, end_bound)), ) } PropertyPathExpression::Alternative(a, b) => estimate_path_size(start_bound, a, end_bound) .saturating_add(estimate_path_size(start_bound, b, end_bound)), PropertyPathExpression::ZeroOrMore(p) => { if start_bound && end_bound { 1 } else if start_bound || end_bound { estimate_path_size(start_bound, p, end_bound).saturating_mul(1000) } else { 1_000_000_000 } } PropertyPathExpression::OneOrMore(p) => { if start_bound && end_bound { 1 } else { estimate_path_size(start_bound, p, end_bound).saturating_mul(1000) } } PropertyPathExpression::ZeroOrOne(p) => { if start_bound && end_bound { 1 } else if start_bound || end_bound { estimate_path_size(start_bound, p, end_bound) } else { 1_000_000_000 } } PropertyPathExpression::NegatedPropertySet(_) => { estimate_triple_pattern_size(start_bound, false, end_bound) } } } fn is_term_pattern_bound(pattern: &GroundTermPattern, input_types: &VariableTypes) -> bool { match pattern { GroundTermPattern::NamedNode(_) | GroundTermPattern::Literal(_) => true, GroundTermPattern::Variable(v) => !input_types.get(v).undef, #[cfg(feature = "rdf-star")] GroundTermPattern::Triple(t) => { is_term_pattern_bound(&t.subject, input_types) && is_named_node_pattern_bound(&t.predicate, input_types) && is_term_pattern_bound(&t.object, input_types) } } } fn is_named_node_pattern_bound(pattern: &NamedNodePattern, input_types: &VariableTypes) -> bool { match pattern { NamedNodePattern::NamedNode(_) => true, NamedNodePattern::Variable(v) => !input_types.get(v).undef, } }