use crate::model::Term as OxTerm; use crate::sparql::dataset::DatasetView; use crate::sparql::error::EvaluationError; use crate::sparql::plan::*; use crate::storage::numeric_encoder::{EncodedTerm, EncodedTriple}; use oxrdf::vocab::xsd; use oxrdf::TermRef; use rand::random; use spargebra::algebra::*; use spargebra::term::*; use std::collections::{BTreeSet, HashMap, HashSet}; use std::mem::swap; use std::rc::Rc; pub struct PlanBuilder<'a> { dataset: &'a DatasetView, custom_functions: &'a HashMap Option>>, } impl<'a> PlanBuilder<'a> { pub fn build( dataset: &'a DatasetView, pattern: &GraphPattern, is_cardinality_meaningful: bool, custom_functions: &'a HashMap Option>>, ) -> Result<(PlanNode, Vec), EvaluationError> { let mut variables = Vec::default(); let plan = PlanBuilder { dataset, custom_functions, } .build_for_graph_pattern( pattern, &mut variables, &PatternValue::Constant(EncodedTerm::DefaultGraph), )?; let plan = if is_cardinality_meaningful { plan } else { // let's reduce downstream task. // TODO: avoid if already REDUCED or DISTINCT PlanNode::Reduced { child: Box::new(plan), } }; Ok((plan, variables)) } pub fn build_graph_template( dataset: &'a DatasetView, template: &[TriplePattern], mut variables: Vec, custom_functions: &'a HashMap Option>>, ) -> Vec { PlanBuilder { dataset, custom_functions, } .build_for_graph_template(template, &mut variables) } fn build_for_graph_pattern( &mut self, pattern: &GraphPattern, variables: &mut Vec, graph_name: &PatternValue, ) -> Result { Ok(match pattern { GraphPattern::Bgp { patterns } => sort_bgp(patterns) .iter() .map(|triple| PlanNode::QuadPattern { subject: self.pattern_value_from_term_or_variable(&triple.subject, variables), predicate: self .pattern_value_from_named_node_or_variable(&triple.predicate, variables), object: self.pattern_value_from_term_or_variable(&triple.object, variables), graph_name: graph_name.clone(), }) .reduce(Self::new_join) .unwrap_or_else(|| PlanNode::StaticBindings { tuples: vec![EncodedTuple::with_capacity(variables.len())], }), GraphPattern::Path { subject, path, object, } => PlanNode::PathPattern { subject: self.pattern_value_from_term_or_variable(subject, variables), path: Rc::new(self.build_for_path(path)), object: self.pattern_value_from_term_or_variable(object, variables), graph_name: graph_name.clone(), }, GraphPattern::Join { left, right } => Self::new_join( self.build_for_graph_pattern(left, variables, graph_name)?, self.build_for_graph_pattern(right, variables, graph_name)?, ), GraphPattern::Sequence { left, right } => PlanNode::ForLoopJoin { left: Box::new(self.build_for_graph_pattern(left, variables, graph_name)?), right: Box::new(self.build_for_graph_pattern(right, variables, graph_name)?), }, GraphPattern::LeftSequence { left, right } => PlanNode::LeftJoin { left: Box::new(self.build_for_graph_pattern(left, variables, graph_name)?), right: Box::new(self.build_for_graph_pattern(right, variables, graph_name)?), possible_problem_vars: Rc::new(Vec::new()), }, GraphPattern::LeftJoin { left, right, expression, } => { let left = self.build_for_graph_pattern(left, variables, graph_name)?; let right = self.build_for_graph_pattern(right, variables, graph_name)?; let mut possible_problem_vars = BTreeSet::new(); Self::add_left_join_problematic_variables(&right, &mut possible_problem_vars); //We add the extra filter if needed let right = if let Some(expr) = expression { Self::push_filter( Box::new(right), Box::new(self.build_for_expression(expr, variables, graph_name)?), ) } else { right }; PlanNode::LeftJoin { left: Box::new(left), right: Box::new(right), possible_problem_vars: Rc::new(possible_problem_vars.into_iter().collect()), } } GraphPattern::Filter { expr, inner } => Self::push_filter( Box::new(self.build_for_graph_pattern(inner, variables, graph_name)?), Box::new(self.build_for_expression(expr, variables, graph_name)?), ), GraphPattern::Union { left, right } => { //We flatten the UNIONs let mut stack: Vec<&GraphPattern> = vec![left, right]; let mut children = vec![]; loop { match stack.pop() { None => break, Some(GraphPattern::Union { left, right }) => { stack.push(left); stack.push(right); } Some(p) => { children.push(self.build_for_graph_pattern(p, variables, graph_name)?) } } } PlanNode::Union { children } } GraphPattern::Graph { name, inner } => { let graph_name = self.pattern_value_from_named_node_or_variable(name, variables); self.build_for_graph_pattern(inner, variables, &graph_name)? } GraphPattern::Extend { inner, variable, expression, } => PlanNode::Extend { child: Box::new(self.build_for_graph_pattern(inner, variables, graph_name)?), position: variable_key(variables, variable), expression: Box::new(self.build_for_expression(expression, variables, graph_name)?), }, GraphPattern::Minus { left, right } => PlanNode::AntiJoin { left: Box::new(self.build_for_graph_pattern(left, variables, graph_name)?), right: Box::new(self.build_for_graph_pattern(right, variables, graph_name)?), }, GraphPattern::Service { name, inner, silent, } => { // Child building should be at the begging in order for `variables` to be filled let child = self.build_for_graph_pattern(inner, variables, graph_name)?; let service_name = self.pattern_value_from_named_node_or_variable(name, variables); PlanNode::Service { service_name, variables: Rc::new(variables.clone()), child: Box::new(child), graph_pattern: Rc::new(inner.as_ref().clone()), silent: *silent, } } GraphPattern::Group { inner, variables: by, aggregates, } => PlanNode::Aggregate { child: Box::new(self.build_for_graph_pattern(inner, variables, graph_name)?), key_variables: Rc::new(by.iter().map(|k| variable_key(variables, k)).collect()), aggregates: Rc::new( aggregates .iter() .map(|(v, a)| { Ok(( self.build_for_aggregate(a, variables, graph_name)?, variable_key(variables, v), )) }) .collect::, EvaluationError>>()?, ), }, GraphPattern::Values { variables: table_variables, bindings, } => PlanNode::StaticBindings { tuples: self.encode_bindings(table_variables, bindings, variables), }, GraphPattern::OrderBy { inner, expression } => { let condition: Result, EvaluationError> = expression .iter() .map(|comp| match comp { OrderExpression::Asc(e) => Ok(Comparator::Asc( self.build_for_expression(e, variables, graph_name)?, )), OrderExpression::Desc(e) => Ok(Comparator::Desc( self.build_for_expression(e, variables, graph_name)?, )), }) .collect(); PlanNode::Sort { child: Box::new(self.build_for_graph_pattern(inner, variables, graph_name)?), by: condition?, } } GraphPattern::Project { inner, variables: projection, lateral_variables, } => { let mut inner_variables = projection.clone(); let inner_graph_name = Self::convert_pattern_value_id(graph_name, variables, &mut inner_variables); PlanNode::Project { child: Box::new(self.build_for_graph_pattern( inner, &mut inner_variables, &inner_graph_name, )?), mapping: Rc::new( projection .iter() .enumerate() .map(|(new_variable, variable)| { (new_variable, variable_key(variables, variable)) }) .collect(), ), lateral_mapping: Rc::new( lateral_variables .iter() .map(|variable| { ( variable_key(&mut inner_variables, variable), variable_key(variables, variable), ) }) .collect(), ), } } GraphPattern::Distinct { inner } => PlanNode::HashDeduplicate { child: Box::new(self.build_for_graph_pattern(inner, variables, graph_name)?), }, GraphPattern::Reduced { inner } => PlanNode::Reduced { child: Box::new(self.build_for_graph_pattern(inner, variables, graph_name)?), }, GraphPattern::Slice { inner, start, length, } => { let mut plan = self.build_for_graph_pattern(inner, variables, graph_name)?; if *start > 0 { plan = PlanNode::Skip { child: Box::new(plan), count: *start, }; } if let Some(length) = length { plan = PlanNode::Limit { child: Box::new(plan), count: *length, }; } plan } }) } fn build_for_path(&mut self, path: &PropertyPathExpression) -> PlanPropertyPath { match path { PropertyPathExpression::NamedNode(p) => PlanPropertyPath::Path(self.build_term(p)), PropertyPathExpression::Reverse(p) => { PlanPropertyPath::Reverse(Rc::new(self.build_for_path(p))) } PropertyPathExpression::Alternative(a, b) => PlanPropertyPath::Alternative( Rc::new(self.build_for_path(a)), Rc::new(self.build_for_path(b)), ), PropertyPathExpression::Sequence(a, b) => PlanPropertyPath::Sequence( Rc::new(self.build_for_path(a)), Rc::new(self.build_for_path(b)), ), PropertyPathExpression::ZeroOrMore(p) => { PlanPropertyPath::ZeroOrMore(Rc::new(self.build_for_path(p))) } PropertyPathExpression::OneOrMore(p) => { PlanPropertyPath::OneOrMore(Rc::new(self.build_for_path(p))) } PropertyPathExpression::ZeroOrOne(p) => { PlanPropertyPath::ZeroOrOne(Rc::new(self.build_for_path(p))) } PropertyPathExpression::NegatedPropertySet(p) => PlanPropertyPath::NegatedPropertySet( Rc::new(p.iter().map(|p| self.build_term(p)).collect()), ), } } fn build_for_expression( &mut self, expression: &Expression, variables: &mut Vec, graph_name: &PatternValue, ) -> Result { Ok(match expression { Expression::NamedNode(node) => PlanExpression::Constant(self.build_term(node)), Expression::Literal(l) => PlanExpression::Constant(self.build_term(l)), Expression::Variable(v) => PlanExpression::Variable(variable_key(variables, v)), Expression::Or(a, b) => PlanExpression::Or( Box::new(self.build_for_expression(a, variables, graph_name)?), Box::new(self.build_for_expression(b, variables, graph_name)?), ), Expression::And(a, b) => PlanExpression::And( Box::new(self.build_for_expression(a, variables, graph_name)?), Box::new(self.build_for_expression(b, variables, graph_name)?), ), Expression::Equal(a, b) => PlanExpression::Equal( Box::new(self.build_for_expression(a, variables, graph_name)?), Box::new(self.build_for_expression(b, variables, graph_name)?), ), Expression::SameTerm(a, b) => PlanExpression::SameTerm( Box::new(self.build_for_expression(a, variables, graph_name)?), Box::new(self.build_for_expression(b, variables, graph_name)?), ), Expression::Greater(a, b) => PlanExpression::Greater( Box::new(self.build_for_expression(a, variables, graph_name)?), Box::new(self.build_for_expression(b, variables, graph_name)?), ), Expression::GreaterOrEqual(a, b) => PlanExpression::GreaterOrEqual( Box::new(self.build_for_expression(a, variables, graph_name)?), Box::new(self.build_for_expression(b, variables, graph_name)?), ), Expression::Less(a, b) => PlanExpression::Less( Box::new(self.build_for_expression(a, variables, graph_name)?), Box::new(self.build_for_expression(b, variables, graph_name)?), ), Expression::LessOrEqual(a, b) => PlanExpression::LessOrEqual( Box::new(self.build_for_expression(a, variables, graph_name)?), Box::new(self.build_for_expression(b, variables, graph_name)?), ), Expression::In(e, l) => { let e = self.build_for_expression(e, variables, graph_name)?; l.iter() .map(|v| { Ok(PlanExpression::Equal( Box::new(e.clone()), Box::new(self.build_for_expression(v, variables, graph_name)?), )) }) .reduce(|a: Result<_, EvaluationError>, b| { Ok(PlanExpression::Or(Box::new(a?), Box::new(b?))) }) .unwrap_or_else(|| Ok(PlanExpression::Constant(false.into())))? } Expression::Add(a, b) => PlanExpression::Add( Box::new(self.build_for_expression(a, variables, graph_name)?), Box::new(self.build_for_expression(b, variables, graph_name)?), ), Expression::Subtract(a, b) => PlanExpression::Subtract( Box::new(self.build_for_expression(a, variables, graph_name)?), Box::new(self.build_for_expression(b, variables, graph_name)?), ), Expression::Multiply(a, b) => PlanExpression::Multiply( Box::new(self.build_for_expression(a, variables, graph_name)?), Box::new(self.build_for_expression(b, variables, graph_name)?), ), Expression::Divide(a, b) => PlanExpression::Divide( Box::new(self.build_for_expression(a, variables, graph_name)?), Box::new(self.build_for_expression(b, variables, graph_name)?), ), Expression::UnaryPlus(e) => PlanExpression::UnaryPlus(Box::new( self.build_for_expression(e, variables, graph_name)?, )), Expression::UnaryMinus(e) => PlanExpression::UnaryMinus(Box::new( self.build_for_expression(e, variables, graph_name)?, )), Expression::Not(e) => PlanExpression::Not(Box::new( self.build_for_expression(e, variables, graph_name)?, )), Expression::FunctionCall(function, parameters) => match function { Function::Str => PlanExpression::Str(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, )?)), Function::Lang => PlanExpression::Lang(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, )?)), Function::LangMatches => PlanExpression::LangMatches( Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), ), Function::Datatype => PlanExpression::Datatype(Box::new( self.build_for_expression(¶meters[0], variables, graph_name)?, )), Function::Iri => PlanExpression::Iri(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, )?)), Function::BNode => PlanExpression::BNode(match parameters.get(0) { Some(e) => Some(Box::new( self.build_for_expression(e, variables, graph_name)?, )), None => None, }), Function::Rand => PlanExpression::Rand, Function::Abs => PlanExpression::Abs(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, )?)), Function::Ceil => PlanExpression::Ceil(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, )?)), Function::Floor => PlanExpression::Floor(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, )?)), Function::Round => PlanExpression::Round(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, )?)), Function::Concat => { PlanExpression::Concat(self.expression_list(parameters, variables, graph_name)?) } Function::SubStr => PlanExpression::SubStr( Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), match parameters.get(2) { Some(flags) => Some(Box::new( self.build_for_expression(flags, variables, graph_name)?, )), None => None, }, ), Function::StrLen => PlanExpression::StrLen(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, )?)), Function::Replace => PlanExpression::Replace( Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), Box::new(self.build_for_expression(¶meters[2], variables, graph_name)?), match parameters.get(3) { Some(flags) => Some(Box::new( self.build_for_expression(flags, variables, graph_name)?, )), None => None, }, ), Function::UCase => PlanExpression::UCase(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, )?)), Function::LCase => PlanExpression::LCase(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, )?)), Function::EncodeForUri => PlanExpression::EncodeForUri(Box::new( self.build_for_expression(¶meters[0], variables, graph_name)?, )), Function::Contains => PlanExpression::Contains( Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), ), Function::StrStarts => PlanExpression::StrStarts( Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), ), Function::StrEnds => PlanExpression::StrEnds( Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), ), Function::StrBefore => PlanExpression::StrBefore( Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), ), Function::StrAfter => PlanExpression::StrAfter( Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), ), Function::Year => PlanExpression::Year(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, )?)), Function::Month => PlanExpression::Month(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, )?)), Function::Day => PlanExpression::Day(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, )?)), Function::Hours => PlanExpression::Hours(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, )?)), Function::Minutes => PlanExpression::Minutes(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, )?)), Function::Seconds => PlanExpression::Seconds(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, )?)), Function::Timezone => PlanExpression::Timezone(Box::new( self.build_for_expression(¶meters[0], variables, graph_name)?, )), Function::Tz => PlanExpression::Tz(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, )?)), Function::Now => PlanExpression::Now, Function::Uuid => PlanExpression::Uuid, Function::StrUuid => PlanExpression::StrUuid, Function::Md5 => PlanExpression::Md5(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, )?)), Function::Sha1 => PlanExpression::Sha1(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, )?)), Function::Sha256 => PlanExpression::Sha256(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, )?)), Function::Sha384 => PlanExpression::Sha384(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, )?)), Function::Sha512 => PlanExpression::Sha512(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, )?)), Function::StrLang => PlanExpression::StrLang( Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), ), Function::StrDt => PlanExpression::StrDt( Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), ), Function::IsIri => PlanExpression::IsIri(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, )?)), Function::IsBlank => PlanExpression::IsBlank(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, )?)), Function::IsLiteral => PlanExpression::IsLiteral(Box::new( self.build_for_expression(¶meters[0], variables, graph_name)?, )), Function::IsNumeric => PlanExpression::IsNumeric(Box::new( self.build_for_expression(¶meters[0], variables, graph_name)?, )), Function::Regex => PlanExpression::Regex( Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), match parameters.get(2) { Some(flags) => Some(Box::new( self.build_for_expression(flags, variables, graph_name)?, )), None => None, }, ), Function::Triple => PlanExpression::Triple( Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), Box::new(self.build_for_expression(¶meters[2], variables, graph_name)?), ), Function::Subject => PlanExpression::Subject(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, )?)), Function::Predicate => PlanExpression::Predicate(Box::new( self.build_for_expression(¶meters[0], variables, graph_name)?, )), Function::Object => PlanExpression::Object(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, )?)), Function::IsTriple => PlanExpression::IsTriple(Box::new( self.build_for_expression(¶meters[0], variables, graph_name)?, )), Function::Custom(name) => { if self.custom_functions.contains_key(name) { PlanExpression::CustomFunction( name.clone(), parameters .iter() .map(|p| self.build_for_expression(p, variables, graph_name)) .collect::, EvaluationError>>()?, ) } else if name.as_ref() == xsd::BOOLEAN { self.build_cast( parameters, PlanExpression::BooleanCast, variables, graph_name, "boolean", )? } else if name.as_ref() == xsd::DOUBLE { self.build_cast( parameters, PlanExpression::DoubleCast, variables, graph_name, "double", )? } else if name.as_ref() == xsd::FLOAT { self.build_cast( parameters, PlanExpression::FloatCast, variables, graph_name, "float", )? } else if name.as_ref() == xsd::DECIMAL { self.build_cast( parameters, PlanExpression::DecimalCast, variables, graph_name, "decimal", )? } else if name.as_ref() == xsd::INTEGER { self.build_cast( parameters, PlanExpression::IntegerCast, variables, graph_name, "integer", )? } else if name.as_ref() == xsd::DATE { self.build_cast( parameters, PlanExpression::DateCast, variables, graph_name, "date", )? } else if name.as_ref() == xsd::TIME { self.build_cast( parameters, PlanExpression::TimeCast, variables, graph_name, "time", )? } else if name.as_ref() == xsd::DATE_TIME { self.build_cast( parameters, PlanExpression::DateTimeCast, variables, graph_name, "dateTime", )? } else if name.as_ref() == xsd::DURATION { self.build_cast( parameters, PlanExpression::DurationCast, variables, graph_name, "duration", )? } else if name.as_ref() == xsd::YEAR_MONTH_DURATION { self.build_cast( parameters, PlanExpression::YearMonthDurationCast, variables, graph_name, "yearMonthDuration", )? } else if name.as_ref() == xsd::DAY_TIME_DURATION { self.build_cast( parameters, PlanExpression::DayTimeDurationCast, variables, graph_name, "dayTimeDuration", )? } else if name.as_ref() == xsd::STRING { self.build_cast( parameters, PlanExpression::StringCast, variables, graph_name, "string", )? } else { return Err(EvaluationError::msg(format!( "Not supported custom function {}", expression ))); } } }, Expression::Bound(v) => PlanExpression::Bound(variable_key(variables, v)), Expression::If(a, b, c) => PlanExpression::If( Box::new(self.build_for_expression(a, variables, graph_name)?), Box::new(self.build_for_expression(b, variables, graph_name)?), Box::new(self.build_for_expression(c, variables, graph_name)?), ), Expression::Exists(n) => { let mut variables = variables.clone(); // Do not expose the exists variables outside PlanExpression::Exists(Rc::new(self.build_for_graph_pattern( n, &mut variables, graph_name, )?)) } Expression::Coalesce(parameters) => { PlanExpression::Coalesce(self.expression_list(parameters, variables, graph_name)?) } }) } fn build_cast( &mut self, parameters: &[Expression], constructor: impl Fn(Box) -> PlanExpression, variables: &mut Vec, graph_name: &PatternValue, name: &'static str, ) -> Result { if parameters.len() == 1 { Ok(constructor(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, )?))) } else { Err(EvaluationError::msg(format!( "The xsd:{} casting takes only one parameter", name ))) } } fn expression_list( &mut self, l: &[Expression], variables: &mut Vec, graph_name: &PatternValue, ) -> Result, EvaluationError> { l.iter() .map(|e| self.build_for_expression(e, variables, graph_name)) .collect() } fn pattern_value_from_term_or_variable( &mut self, term_or_variable: &TermPattern, variables: &mut Vec, ) -> PatternValue { match term_or_variable { TermPattern::Variable(variable) => { PatternValue::Variable(variable_key(variables, variable)) } TermPattern::NamedNode(node) => PatternValue::Constant(self.build_term(node)), TermPattern::BlankNode(bnode) => { PatternValue::Variable(variable_key( variables, &Variable::new_unchecked(bnode.as_str()), )) //TODO: very bad hack to convert bnode to variable } TermPattern::Literal(literal) => PatternValue::Constant(self.build_term(literal)), TermPattern::Triple(triple) => { match ( self.pattern_value_from_term_or_variable(&triple.subject, variables), self.pattern_value_from_named_node_or_variable(&triple.predicate, variables), self.pattern_value_from_term_or_variable(&triple.object, variables), ) { ( PatternValue::Constant(subject), PatternValue::Constant(predicate), PatternValue::Constant(object), ) => PatternValue::Constant( EncodedTriple { subject, predicate, object, } .into(), ), (subject, predicate, object) => { PatternValue::Triple(Box::new(TriplePatternValue { subject, predicate, object, })) } } } } } fn pattern_value_from_named_node_or_variable( &mut self, named_node_or_variable: &NamedNodePattern, variables: &mut Vec, ) -> PatternValue { match named_node_or_variable { NamedNodePattern::NamedNode(named_node) => { PatternValue::Constant(self.build_term(named_node)) } NamedNodePattern::Variable(variable) => { PatternValue::Variable(variable_key(variables, variable)) } } } fn encode_bindings( &mut self, table_variables: &[Variable], rows: &[Vec>], variables: &mut Vec, ) -> Vec { let bindings_variables_keys = table_variables .iter() .map(|v| variable_key(variables, v)) .collect::>(); rows.iter() .map(move |row| { let mut result = EncodedTuple::with_capacity(variables.len()); for (key, value) in row.iter().enumerate() { if let Some(term) = value { result.set( bindings_variables_keys[key], match term { GroundTerm::NamedNode(node) => self.build_term(node), GroundTerm::Literal(literal) => self.build_term(literal), GroundTerm::Triple(triple) => self.build_triple(triple), }, ); } } result }) .collect() } fn build_for_aggregate( &mut self, aggregate: &AggregateExpression, variables: &mut Vec, graph_name: &PatternValue, ) -> Result { match aggregate { AggregateExpression::Count { expr, distinct } => Ok(PlanAggregation { function: PlanAggregationFunction::Count, parameter: match expr { Some(expr) => Some(self.build_for_expression(expr, variables, graph_name)?), None => None, }, distinct: *distinct, }), AggregateExpression::Sum { expr, distinct } => Ok(PlanAggregation { function: PlanAggregationFunction::Sum, parameter: Some(self.build_for_expression(expr, variables, graph_name)?), distinct: *distinct, }), AggregateExpression::Min { expr, distinct } => Ok(PlanAggregation { function: PlanAggregationFunction::Min, parameter: Some(self.build_for_expression(expr, variables, graph_name)?), distinct: *distinct, }), AggregateExpression::Max { expr, distinct } => Ok(PlanAggregation { function: PlanAggregationFunction::Max, parameter: Some(self.build_for_expression(expr, variables, graph_name)?), distinct: *distinct, }), AggregateExpression::Avg { expr, distinct } => Ok(PlanAggregation { function: PlanAggregationFunction::Avg, parameter: Some(self.build_for_expression(expr, variables, graph_name)?), distinct: *distinct, }), AggregateExpression::Sample { expr, distinct } => Ok(PlanAggregation { function: PlanAggregationFunction::Sample, parameter: Some(self.build_for_expression(expr, variables, graph_name)?), distinct: *distinct, }), AggregateExpression::GroupConcat { expr, distinct, separator, } => Ok(PlanAggregation { function: PlanAggregationFunction::GroupConcat { separator: Rc::new(separator.clone().unwrap_or_else(|| " ".to_owned())), }, parameter: Some(self.build_for_expression(expr, variables, graph_name)?), distinct: *distinct, }), AggregateExpression::Custom { .. } => Err(EvaluationError::msg( "Custom aggregation functions are not supported yet", )), } } fn build_for_graph_template( &mut self, template: &[TriplePattern], variables: &mut Vec, ) -> Vec { let mut bnodes = Vec::default(); template .iter() .map(|triple| TripleTemplate { subject: self.template_value_from_term_or_variable( &triple.subject, variables, &mut bnodes, ), predicate: self .template_value_from_named_node_or_variable(&triple.predicate, variables), object: self.template_value_from_term_or_variable( &triple.object, variables, &mut bnodes, ), }) .collect() } fn template_value_from_term_or_variable( &mut self, term_or_variable: &TermPattern, variables: &mut Vec, bnodes: &mut Vec, ) -> TripleTemplateValue { match term_or_variable { TermPattern::Variable(variable) => { TripleTemplateValue::Variable(variable_key(variables, variable)) } TermPattern::NamedNode(node) => TripleTemplateValue::Constant(self.build_term(node)), TermPattern::BlankNode(bnode) => { TripleTemplateValue::BlankNode(bnode_key(bnodes, bnode)) } TermPattern::Literal(literal) => { TripleTemplateValue::Constant(self.build_term(literal)) } TermPattern::Triple(triple) => match ( self.template_value_from_term_or_variable(&triple.subject, variables, bnodes), self.template_value_from_named_node_or_variable(&triple.predicate, variables), self.template_value_from_term_or_variable(&triple.object, variables, bnodes), ) { ( TripleTemplateValue::Constant(subject), TripleTemplateValue::Constant(predicate), TripleTemplateValue::Constant(object), ) => TripleTemplateValue::Constant( EncodedTriple { subject, predicate, object, } .into(), ), (subject, predicate, object) => { TripleTemplateValue::Triple(Box::new(TripleTemplate { subject, predicate, object, })) } }, } } fn template_value_from_named_node_or_variable( &mut self, named_node_or_variable: &NamedNodePattern, variables: &mut Vec, ) -> TripleTemplateValue { match named_node_or_variable { NamedNodePattern::Variable(variable) => { TripleTemplateValue::Variable(variable_key(variables, variable)) } NamedNodePattern::NamedNode(term) => { TripleTemplateValue::Constant(self.build_term(term)) } } } fn convert_pattern_value_id( from_value: &PatternValue, from: &[Variable], to: &mut Vec, ) -> PatternValue { match from_value { PatternValue::Constant(v) => PatternValue::Constant(v.clone()), PatternValue::Variable(from_id) => { PatternValue::Variable(Self::convert_variable_id(*from_id, from, to)) } PatternValue::Triple(triple) => PatternValue::Triple(Box::new(TriplePatternValue { subject: Self::convert_pattern_value_id(&triple.subject, from, to), predicate: Self::convert_pattern_value_id(&triple.predicate, from, to), object: Self::convert_pattern_value_id(&triple.object, from, to), })), } } fn convert_variable_id(from_id: usize, from: &[Variable], to: &mut Vec) -> usize { if let Some(to_id) = to.iter().enumerate().find_map(|(to_id, var)| { if *var == from[from_id] { Some(to_id) } else { None } }) { to_id } else { to.push(Variable::new_unchecked(format!("{:x}", random::()))); to.len() - 1 } } fn add_left_join_problematic_variables(node: &PlanNode, set: &mut BTreeSet) { match node { PlanNode::StaticBindings { .. } | PlanNode::QuadPattern { .. } | PlanNode::PathPattern { .. } => (), PlanNode::Filter { child, expression } => { let always_already_bound = child.always_bound_variables(); for v in expression.used_variables() { if !always_already_bound.contains(&v) { set.insert(v); } } Self::add_left_join_problematic_variables(child, set); } PlanNode::Union { children } => { for child in children.iter() { Self::add_left_join_problematic_variables(child, set); } } PlanNode::HashJoin { left, right } | PlanNode::ForLoopJoin { left, right } => { Self::add_left_join_problematic_variables(left, set); Self::add_left_join_problematic_variables(right, set); } PlanNode::AntiJoin { left, .. } => { Self::add_left_join_problematic_variables(left, set); } PlanNode::LeftJoin { left, right, .. } => { Self::add_left_join_problematic_variables(left, set); right.add_used_variables(set); } PlanNode::Extend { child, expression, position, } => { let always_already_bound = child.always_bound_variables(); for v in expression.used_variables() { if !always_already_bound.contains(&v) { set.insert(v); } } set.insert(*position); //TODO: too strict Self::add_left_join_problematic_variables(child, set); } PlanNode::Sort { child, .. } | PlanNode::HashDeduplicate { child } | PlanNode::Reduced { child } | PlanNode::Skip { child, .. } | PlanNode::Limit { child, .. } => { Self::add_left_join_problematic_variables(child, set) } PlanNode::Service { child, silent, .. } => { if *silent { child.add_used_variables(set); } else { Self::add_left_join_problematic_variables(child, set) } } PlanNode::Project { mapping, child, lateral_mapping, } => { let mut child_problematic_set = BTreeSet::new(); for (child_i, output_i) in lateral_mapping.iter() { set.insert(*output_i); child_problematic_set.insert(*child_i); } Self::add_left_join_problematic_variables(child, &mut child_problematic_set); for (child_i, output_i) in mapping.iter() { if child_problematic_set.contains(child_i) { set.insert(*output_i); } } } PlanNode::Aggregate { key_variables, aggregates, .. } => { set.extend(key_variables.iter()); //TODO: This is too harsh for (_, var) in aggregates.iter() { set.insert(*var); } } } } fn new_join(mut left: PlanNode, mut right: PlanNode) -> PlanNode { if Self::is_fit_for_for_loop_join(&left) && Self::is_fit_for_for_loop_join(&right) && Self::has_some_common_variables(&left, &right) { // We first use VALUES to filter the following patterns evaluation if matches!(right, PlanNode::StaticBindings { .. }) { swap(&mut left, &mut right); } PlanNode::ForLoopJoin { left: Box::new(left), right: Box::new(right), } } else { // Let's avoid materializing right if left is already materialized // TODO: be smarter and reuse already existing materialization if matches!(left, PlanNode::StaticBindings { .. }) { swap(&mut left, &mut right); } PlanNode::HashJoin { left: Box::new(left), right: Box::new(right), } } } fn has_some_common_variables(left: &PlanNode, right: &PlanNode) -> bool { left.always_bound_variables() .intersection(&right.always_bound_variables()) .next() .is_some() } fn is_fit_for_for_loop_join(node: &PlanNode) -> bool { //TODO: think more about it match node { PlanNode::StaticBindings { .. } | PlanNode::QuadPattern { .. } | PlanNode::PathPattern { .. } | PlanNode::ForLoopJoin { .. } => true, PlanNode::HashJoin { left, right } => { Self::is_fit_for_for_loop_join(left) && Self::is_fit_for_for_loop_join(right) } PlanNode::Filter { child, .. } | PlanNode::Extend { child, .. } => { Self::is_fit_for_for_loop_join(child) } PlanNode::Union { children } => children.iter().all(Self::is_fit_for_for_loop_join), PlanNode::AntiJoin { .. } | PlanNode::LeftJoin { .. } | PlanNode::Service { .. } | PlanNode::Sort { .. } | PlanNode::HashDeduplicate { .. } | PlanNode::Reduced { .. } | PlanNode::Skip { .. } | PlanNode::Limit { .. } | PlanNode::Project { .. } | PlanNode::Aggregate { .. } => false, } } fn push_filter(node: Box, filter: Box) -> PlanNode { if let PlanExpression::And(f1, f2) = *filter { return Self::push_filter(Box::new(Self::push_filter(node, f1)), f2); } let filter_variables = filter.used_variables(); match *node { PlanNode::HashJoin { left, right } => { if left.are_all_variable_bound(&filter_variables) { if right.are_all_variable_bound(&filter_variables) { PlanNode::HashJoin { left: Box::new(Self::push_filter(left, filter.clone())), right: Box::new(Self::push_filter(right, filter)), } } else { PlanNode::HashJoin { left: Box::new(Self::push_filter(left, filter)), right, } } } else if right.are_all_variable_bound(&filter_variables) { PlanNode::HashJoin { left, right: Box::new(Self::push_filter(right, filter)), } } else { PlanNode::Filter { child: Box::new(PlanNode::HashJoin { left, right }), expression: filter, } } } PlanNode::ForLoopJoin { left, right } => { if left.are_all_variable_bound(&filter_variables) { PlanNode::ForLoopJoin { left: Box::new(Self::push_filter(left, filter)), right, } } else if right.are_all_variable_bound(&filter_variables) { PlanNode::ForLoopJoin { //TODO: should we do that always? left, right: Box::new(Self::push_filter(right, filter)), } } else { PlanNode::Filter { child: Box::new(PlanNode::HashJoin { left, right }), expression: filter, } } } PlanNode::Extend { child, expression, position, } => { //TODO: handle the case where the filter generates an expression variable if child.are_all_variable_bound(&filter_variables) { PlanNode::Extend { child: Box::new(Self::push_filter(child, filter)), expression, position, } } else { PlanNode::Filter { child: Box::new(PlanNode::Extend { child, expression, position, }), expression: filter, } } } PlanNode::Filter { child, expression } => { if child.are_all_variable_bound(&filter_variables) { PlanNode::Filter { child: Box::new(Self::push_filter(child, filter)), expression, } } else { PlanNode::Filter { child, expression: Box::new(PlanExpression::And(expression, filter)), } } } PlanNode::Union { children } => PlanNode::Union { children: children .into_iter() .map(|c| Self::push_filter(Box::new(c), filter.clone())) .collect(), }, node => PlanNode::Filter { //TODO: more? child: Box::new(node), expression: filter, }, } } fn build_term<'b>(&mut self, term: impl Into>) -> EncodedTerm { self.dataset.encode_term(term) } fn build_triple(&mut self, triple: &GroundTriple) -> EncodedTerm { EncodedTriple::new( match &triple.subject { GroundSubject::NamedNode(node) => self.build_term(node), GroundSubject::Triple(triple) => self.build_triple(triple), }, self.build_term(&triple.predicate), match &triple.object { GroundTerm::NamedNode(node) => self.build_term(node), GroundTerm::Literal(literal) => self.build_term(literal), GroundTerm::Triple(triple) => self.build_triple(triple), }, ) .into() } } fn variable_key(variables: &mut Vec, variable: &Variable) -> usize { match slice_key(variables, variable) { Some(key) => key, None => { variables.push(variable.clone()); variables.len() - 1 } } } fn bnode_key(blank_nodes: &mut Vec, blank_node: &BlankNode) -> usize { match slice_key(blank_nodes, blank_node) { Some(key) => key, None => { blank_nodes.push(blank_node.clone()); blank_nodes.len() - 1 } } } fn slice_key(slice: &[T], element: &T) -> Option { for (i, item) in slice.iter().enumerate() { if item == element { return Some(i); } } None } fn sort_bgp(p: &[TriplePattern]) -> Vec<&TriplePattern> { let mut assigned_variables = HashSet::default(); let mut assigned_blank_nodes = HashSet::default(); let mut new_p: Vec<_> = p.iter().collect(); for i in 0..new_p.len() { new_p[i..].sort_by(|p1, p2| { estimate_pattern_cost(p1, &assigned_variables, &assigned_blank_nodes).cmp( &estimate_pattern_cost(p2, &assigned_variables, &assigned_blank_nodes), ) }); add_pattern_variables(new_p[i], &mut assigned_variables, &mut assigned_blank_nodes); } new_p } fn estimate_pattern_cost( pattern: &TriplePattern, assigned_variables: &HashSet<&Variable>, assigned_blank_nodes: &HashSet<&BlankNode>, ) -> u32 { let mut count = 0; match &pattern.subject { TermPattern::NamedNode(_) | TermPattern::Literal(_) => count += 1, TermPattern::BlankNode(bnode) => { if !assigned_blank_nodes.contains(bnode) { count += 4; } } TermPattern::Variable(v) => { if !assigned_variables.contains(v) { count += 4; } } TermPattern::Triple(t) => { count += estimate_pattern_cost(t, assigned_variables, assigned_blank_nodes) } } if let NamedNodePattern::Variable(v) = &pattern.predicate { if !assigned_variables.contains(v) { count += 4; } } else { count += 1; } match &pattern.object { TermPattern::NamedNode(_) | TermPattern::Literal(_) => count += 1, TermPattern::BlankNode(bnode) => { if !assigned_blank_nodes.contains(bnode) { count += 4; } } TermPattern::Variable(v) => { if !assigned_variables.contains(v) { count += 4; } } TermPattern::Triple(t) => { count += estimate_pattern_cost(t, assigned_variables, assigned_blank_nodes) } } count } fn add_pattern_variables<'a>( pattern: &'a TriplePattern, variables: &mut HashSet<&'a Variable>, blank_nodes: &mut HashSet<&'a BlankNode>, ) { match &pattern.subject { TermPattern::NamedNode(_) | TermPattern::Literal(_) => (), TermPattern::BlankNode(bnode) => { blank_nodes.insert(bnode); } TermPattern::Variable(v) => { variables.insert(v); } TermPattern::Triple(t) => add_pattern_variables(t, variables, blank_nodes), } if let NamedNodePattern::Variable(v) = &pattern.predicate { variables.insert(v); } match &pattern.object { TermPattern::NamedNode(_) | TermPattern::Literal(_) => (), TermPattern::BlankNode(bnode) => { blank_nodes.insert(bnode); } TermPattern::Variable(v) => { variables.insert(v); } TermPattern::Triple(t) => add_pattern_variables(t, variables, blank_nodes), } }