diff --git a/lib/spargebra/src/parser.rs b/lib/spargebra/src/parser.rs index 01c4bd69..da28f7d8 100644 --- a/lib/spargebra/src/parser.rs +++ b/lib/spargebra/src/parser.rs @@ -2474,7 +2474,7 @@ parser! { rule Rule() -> Rule = i("IF") _ body:ConstructTemplate() _ i("THEN") _ head:ConstructTemplate() {? Ok(Rule { - body: GraphPattern::Bgp { patterns: body }, + body, head: head.into_iter().map(GroundTriplePattern::try_from).collect::>().map_err(|_| "Blank nodes are not allowed in rules head")? }) } diff --git a/lib/spargebra/src/rule.rs b/lib/spargebra/src/rule.rs index 8f5f8144..8b7050d8 100644 --- a/lib/spargebra/src/rule.rs +++ b/lib/spargebra/src/rule.rs @@ -1,5 +1,4 @@ #![cfg_attr(not(feature = "rules"), allow(dead_code))] -use crate::algebra::*; use crate::parser::{parse_rule_set, ParseError}; use crate::term::*; use std::fmt; @@ -78,7 +77,7 @@ pub struct Rule { /// The construction template. pub head: Vec, /// The rule body graph pattern. - pub body: GraphPattern, + pub body: Vec, } impl Rule { @@ -99,22 +98,22 @@ impl Rule { } t.fmt_sse(f)?; } - write!(f, ") ")?; - self.body.fmt_sse(f)?; - write!(f, ")") + write!(f, ") (bgp")?; + for pattern in &self.body { + write!(f, " ")?; + pattern.fmt_sse(f)?; + } + write!(f, "))") } } impl fmt::Display for Rule { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "IF {{ {} }} THEN {{ ", - SparqlGraphRootPattern { - pattern: &self.body, - dataset: None - } - )?; + write!(f, "IF {{ ")?; + for triple in self.body.iter() { + write!(f, "{triple} . ")?; + } + write!(f, "}} THEN {{ ")?; for triple in self.head.iter() { write!(f, "{triple} . ")?; } diff --git a/lib/sparopt/Cargo.toml b/lib/sparopt/Cargo.toml index 0e1593bc..822858b5 100644 --- a/lib/sparopt/Cargo.toml +++ b/lib/sparopt/Cargo.toml @@ -15,10 +15,11 @@ rust-version = "1.60" [features] default = [] +fixed-point = [] rdf-star = ["oxrdf/rdf-star", "spargebra/rdf-star"] sep-0002 = ["spargebra/sep-0002"] sep-0006 = ["spargebra/sep-0006"] -rules = ["spargebra/rules"] +rules = ["spargebra/rules", "fixed-point"] [dependencies] oxrdf = { version = "0.1.5", path="../oxrdf" } diff --git a/lib/sparopt/src/algebra.rs b/lib/sparopt/src/algebra.rs index aa4b8def..3ce9db72 100644 --- a/lib/sparopt/src/algebra.rs +++ b/lib/sparopt/src/algebra.rs @@ -66,8 +66,8 @@ pub enum Expression { impl Expression { pub(crate) fn effective_boolean_value(&self) -> Option { match self { - Expression::NamedNode(_) => Some(true), - Expression::Literal(literal) => { + Self::NamedNode(_) => Some(true), + Self::Literal(literal) => { if literal.datatype() == xsd::BOOLEAN { match literal.value() { "true" | "1" => Some(true), @@ -212,6 +212,27 @@ impl From for Expression { } } +impl From for Expression { + fn from(value: NamedNodePattern) -> Self { + match value { + NamedNodePattern::NamedNode(value) => value.into(), + NamedNodePattern::Variable(variable) => variable.into(), + } + } +} + +impl From for Expression { + fn from(value: GroundTermPattern) -> Self { + match value { + GroundTermPattern::NamedNode(value) => value.into(), + GroundTermPattern::Literal(value) => value.into(), + #[cfg(feature = "rdf-star")] + GroundTermPattern::Triple(value) => (*value).into(), + GroundTermPattern::Variable(variable) => variable.into(), + } + } +} + #[cfg(feature = "rdf-star")] impl From for Expression { fn from(value: GroundTriple) -> Self { @@ -226,6 +247,20 @@ impl From for Expression { } } +#[cfg(feature = "rdf-star")] +impl From for Expression { + fn from(value: GroundTriplePattern) -> Self { + Self::FunctionCall( + Function::Triple, + vec![ + value.subject.into(), + value.predicate.into(), + value.object.into(), + ], + ) + } +} + impl From for Expression { fn from(value: Variable) -> Self { Self::Variable(value) @@ -394,6 +429,15 @@ pub enum GraphPattern { inner: Box, silent: bool, }, + /// Fix point operator + #[cfg(feature = "fixed-point")] + FixedPoint { + id: FixedPointId, + /// invariant: the inner plans should always return the all the listed variables and only them + variables: Vec, + constant: Box, + recursive: Box, + }, } impl GraphPattern { @@ -644,6 +688,18 @@ impl GraphPattern { silent, } } + + #[cfg(feature = "fixed-point")] + pub(crate) fn fixed_point( + id: FixedPointId, + inner: FixedPointGraphPattern, + variables: Vec, + ) -> Self { + FixedPointGraphPattern::fixed_point(id, inner, variables) + .try_into() + .unwrap() + } + fn from_sparql_algebra( pattern: &AlGraphPattern, graph_name: Option<&NamedNodePattern>, @@ -803,7 +859,7 @@ impl GraphPattern { } } - fn triple_pattern_from_algebra( + pub(crate) fn triple_pattern_from_algebra( pattern: &TriplePattern, blank_nodes: &mut HashMap, ) -> (GroundTermPattern, NamedNodePattern, GroundTermPattern) { @@ -1001,6 +1057,406 @@ impl From<&GraphPattern> for AlGraphPattern { name: name.clone(), silent: *silent, }, + #[cfg(feature = "fixed-point")] + GraphPattern::FixedPoint { .. } => unimplemented!(), + } + } +} + +#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] +pub struct FixedPointId(pub usize); + +/// A SPARQL query [graph pattern](https://www.w3.org/TR/sparql11-query/#sparqlQuery) inside of a fixed-point operation. +#[cfg(feature = "fixed-point")] +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum FixedPointGraphPattern { + /// A [basic graph pattern](https://www.w3.org/TR/sparql11-query/#defn_BasicGraphPattern). + QuadPattern { + subject: GroundTermPattern, + predicate: NamedNodePattern, + object: GroundTermPattern, + graph_name: Option, + }, + /// [Join](https://www.w3.org/TR/sparql11-query/#defn_algJoin). + Join { + left: Box, + right: Box, + }, + /// [Filter](https://www.w3.org/TR/sparql11-query/#defn_algFilter). + Filter { + expression: FixedPointExpression, + inner: Box, + }, + /// [Union](https://www.w3.org/TR/sparql11-query/#defn_algUnion). + Union { + inner: Vec, + }, + /// [Extend](https://www.w3.org/TR/sparql11-query/#defn_extend). + Extend { + inner: Box, + variable: Variable, + expression: FixedPointExpression, + }, + /// A table used to provide inline values + Values { + variables: Vec, + bindings: Vec>>, + }, + /// [Project](https://www.w3.org/TR/sparql11-query/#defn_algProjection). + Project { + inner: Box, + variables: Vec, + }, + /// Inner fixed point operator + FixedPoint { + id: FixedPointId, + /// invariant: the inner plans should always return the all the listed variables and only them + variables: Vec, + constant: Box, + recursive: Box, + }, + FixedPointEntry(FixedPointId), +} + +#[cfg(feature = "fixed-point")] +impl FixedPointGraphPattern { + pub(crate) fn empty() -> Self { + Self::Values { + variables: Vec::new(), + bindings: Vec::new(), + } + } + + /// Check if the pattern is the empty table + fn is_empty(&self) -> bool { + if let Self::Values { bindings, .. } = self { + bindings.is_empty() + } else { + false + } + } + + pub(crate) fn singleton() -> Self { + Self::Values { + variables: Vec::new(), + bindings: vec![Vec::new()], + } + } + + pub(crate) fn join(left: Self, right: Self) -> Self { + if left.is_empty() || right.is_empty() { + return Self::empty(); + } + Self::Join { + left: Box::new(left), + right: Box::new(right), + } + } + + pub(crate) fn union(left: Self, right: Self) -> Self { + if left.is_empty() { + return right; + } + if right.is_empty() { + return left; + } + Self::Union { + inner: match (left, right) { + (Self::Union { inner: mut left }, Self::Union { inner: right }) => { + left.extend(right); + left + } + (Self::Union { inner: mut left }, right) => { + left.push(right); + left + } + (left, Self::Union { inner: mut right }) => { + right.insert(0, left); + right + } + (left, right) => vec![left, right], + }, + } + } + + pub(crate) fn extend( + inner: Self, + variable: Variable, + expression: FixedPointExpression, + ) -> Self { + if inner.is_empty() { + return Self::empty(); + } + Self::Extend { + inner: Box::new(inner), + variable, + expression, + } + } + + pub(crate) fn project(inner: Self, variables: Vec) -> Self { + if inner.is_empty() { + return Self::empty(); + } + Self::Project { + inner: Box::new(inner), + variables, + } + } + + #[cfg(feature = "fixed-point")] + pub(crate) fn fixed_point(id: FixedPointId, inner: Self, variables: Vec) -> Self { + if inner.is_empty() { + return Self::empty(); + } + let children = if let Self::Union { inner } = inner { + inner + } else { + vec![inner] + }; + let mut constant = Self::empty(); + let mut recursive = Self::empty(); + for child in children { + if child.is_recursion_used(&id) { + recursive = Self::union(recursive, child); + } else { + constant = Self::union(constant, child); + } + } + if recursive.is_empty() { + return constant; + } + Self::FixedPoint { + id, + variables, + constant: Box::new(constant), + recursive: Box::new(recursive), + } + } + + fn is_recursion_used(&self, id: &FixedPointId) -> bool { + match self { + Self::QuadPattern { .. } | Self::Values { .. } => false, + Self::Filter { inner, .. } + | Self::Extend { inner, .. } + | Self::Project { inner, .. } => Self::is_recursion_used(inner, id), + Self::Join { left, right } => { + Self::is_recursion_used(left, id) || Self::is_recursion_used(right, id) + } + Self::Union { inner } => inner.iter().any(|p| Self::is_recursion_used(p, id)), + Self::FixedPoint { + constant, + recursive, + .. + } => Self::is_recursion_used(constant, id) || Self::is_recursion_used(recursive, id), + Self::FixedPointEntry(tid) => id == tid, + } + } +} + +impl TryFrom for GraphPattern { + type Error = (); + + fn try_from(pattern: FixedPointGraphPattern) -> Result { + Ok(match pattern { + FixedPointGraphPattern::QuadPattern { + subject, + predicate, + object, + graph_name, + } => Self::QuadPattern { + subject, + predicate, + object, + graph_name, + }, + FixedPointGraphPattern::Join { left, right } => Self::Join { + left: Box::new((*left).try_into()?), + right: Box::new((*right).try_into()?), + }, + FixedPointGraphPattern::Union { inner } => Self::Union { + inner: inner + .into_iter() + .map(TryInto::try_into) + .collect::>()?, + }, + FixedPointGraphPattern::Filter { inner, expression } => Self::Filter { + expression: expression.into(), + inner: Box::new((*inner).try_into()?), + }, + FixedPointGraphPattern::Extend { + inner, + variable, + expression, + } => Self::Extend { + expression: expression.into(), + variable, + inner: Box::new((*inner).try_into()?), + }, + FixedPointGraphPattern::Values { + variables, + bindings, + } => Self::Values { + variables, + bindings, + }, + FixedPointGraphPattern::Project { inner, variables } => Self::Project { + variables, + inner: Box::new((*inner).try_into()?), + }, + FixedPointGraphPattern::FixedPoint { + id, + variables, + constant, + recursive, + } => Self::FixedPoint { + id, + variables, + constant, + recursive, + }, + FixedPointGraphPattern::FixedPointEntry(_) => return Err(()), + }) + } +} + +/// An [expression](https://www.w3.org/TR/sparql11-query/#expressions) inside of a fix point. +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum FixedPointExpression { + NamedNode(NamedNode), + Literal(Literal), + Variable(Variable), + /// [Logical-or](https://www.w3.org/TR/sparql11-query/#func-logical-or). + Or(Box, Box), + /// [Logical-and](https://www.w3.org/TR/sparql11-query/#func-logical-and). + And(Box, Box), + /// [sameTerm](https://www.w3.org/TR/sparql11-query/#func-sameTerm). + SameTerm(Box, Box), + /// [IN](https://www.w3.org/TR/sparql11-query/#func-in) + In(Box, Vec), + /// [fn:not](https://www.w3.org/TR/xpath-functions/#func-not). + Not(Box), + /// A regular function call. + FunctionCall(Function, Vec), +} + +impl From for FixedPointExpression { + fn from(value: NamedNode) -> Self { + Self::NamedNode(value) + } +} + +impl From for FixedPointExpression { + fn from(value: Literal) -> Self { + Self::Literal(value) + } +} + +impl From for FixedPointExpression { + fn from(value: GroundSubject) -> Self { + match value { + GroundSubject::NamedNode(value) => value.into(), + #[cfg(feature = "rdf-star")] + GroundSubject::Triple(value) => (*value).into(), + } + } +} + +impl From for FixedPointExpression { + fn from(value: GroundTerm) -> Self { + match value { + GroundTerm::NamedNode(value) => value.into(), + GroundTerm::Literal(value) => value.into(), + #[cfg(feature = "rdf-star")] + GroundTerm::Triple(value) => (*value).into(), + } + } +} + +impl From for FixedPointExpression { + fn from(value: NamedNodePattern) -> Self { + match value { + NamedNodePattern::NamedNode(value) => value.into(), + NamedNodePattern::Variable(variable) => variable.into(), + } + } +} + +impl From for FixedPointExpression { + fn from(value: GroundTermPattern) -> Self { + match value { + GroundTermPattern::NamedNode(value) => value.into(), + GroundTermPattern::Literal(value) => value.into(), + #[cfg(feature = "rdf-star")] + GroundTermPattern::Triple(value) => (*value).into(), + GroundTermPattern::Variable(variable) => variable.into(), + } + } +} + +#[cfg(feature = "rdf-star")] +impl From for FixedPointExpression { + fn from(value: GroundTriple) -> Self { + Self::FunctionCall( + Function::Triple, + vec![ + value.subject.into(), + value.predicate.into(), + value.object.into(), + ], + ) + } +} + +#[cfg(feature = "rdf-star")] +impl From for FixedPointExpression { + fn from(value: GroundTriplePattern) -> Self { + Self::FunctionCall( + Function::Triple, + vec![ + value.subject.into(), + value.predicate.into(), + value.object.into(), + ], + ) + } +} + +impl From for FixedPointExpression { + fn from(value: Variable) -> Self { + Self::Variable(value) + } +} + +impl From for FixedPointExpression { + fn from(value: bool) -> Self { + Literal::from(value).into() + } +} + +impl From for Expression { + fn from(expression: FixedPointExpression) -> Self { + match expression { + FixedPointExpression::NamedNode(node) => Self::NamedNode(node), + FixedPointExpression::Literal(literal) => Self::Literal(literal), + FixedPointExpression::Variable(variable) => Self::Variable(variable), + FixedPointExpression::Or(left, right) => { + Self::Or(Box::new((*left).into()), Box::new((*right).into())) + } + FixedPointExpression::And(left, right) => { + Self::And(Box::new((*left).into()), Box::new((*right).into())) + } + FixedPointExpression::SameTerm(left, right) => { + Self::SameTerm(Box::new((*left).into()), Box::new((*right).into())) + } + FixedPointExpression::In(left, right) => Self::In( + Box::new((*left).into()), + right.into_iter().map(Into::into).collect(), + ), + FixedPointExpression::Not(inner) => Self::Not(Box::new((*inner).into())), + FixedPointExpression::FunctionCall(name, args) => { + Self::FunctionCall(name, args.into_iter().map(Into::into).collect()) + } } } } diff --git a/lib/sparopt/src/optimizer.rs b/lib/sparopt/src/optimizer.rs index ff76a700..56f42705 100644 --- a/lib/sparopt/src/optimizer.rs +++ b/lib/sparopt/src/optimizer.rs @@ -1,4 +1,4 @@ -use crate::algebra::{Expression, GraphPattern}; +use crate::algebra::{Expression, FixedPointGraphPattern, GraphPattern}; #[derive(Default)] pub struct Optimizer {} @@ -99,6 +99,20 @@ impl Optimizer { inner, silent, } => GraphPattern::service(Self::normalize_pattern(*inner), name, silent), + #[cfg(feature = "fixed-point")] + GraphPattern::FixedPoint { + id, + variables, + constant, + recursive, + } => { + GraphPattern::fixed_point( + id, + FixedPointGraphPattern::union(*constant, *recursive), + variables, + ) + //TODO: recursive normalization + } } } diff --git a/lib/sparopt/src/reasoning.rs b/lib/sparopt/src/reasoning.rs index ecc8085f..be609097 100644 --- a/lib/sparopt/src/reasoning.rs +++ b/lib/sparopt/src/reasoning.rs @@ -8,7 +8,7 @@ use std::collections::hash_map::Entry; use std::collections::HashMap; pub struct QueryRewriter { - rules: Vec<(Vec, GraphPattern)>, + rules: Vec<(Vec, Vec)>, } impl QueryRewriter { @@ -17,7 +17,23 @@ impl QueryRewriter { rules: rule_set .rules .into_iter() - .map(|rule| (rule.head, (&rule.body).into())) + .map(|rule| { + (rule.head, { + let mut blank_nodes = HashMap::new(); + rule.body + .iter() + .map(|p| { + let (subject, predicate, object) = + GraphPattern::triple_pattern_from_algebra(p, &mut blank_nodes); + GroundTriplePattern { + subject, + predicate, + object, + } + }) + .collect() + }) + }) .collect(), } } @@ -30,7 +46,16 @@ impl QueryRewriter { predicate, object, graph_name, - } => self.rewrite_quad_pattern(subject, predicate, object, graph_name.as_ref()), + } => self + .rewrite_quad_pattern( + subject, + predicate, + object, + graph_name.as_ref(), + &mut Vec::new(), + ) + .try_into() + .unwrap(), GraphPattern::Path { .. } => todo!(), GraphPattern::Join { left, right } => GraphPattern::join( self.rewrite_graph_pattern(left), @@ -106,6 +131,7 @@ impl QueryRewriter { silent, name, } => GraphPattern::service(self.rewrite_graph_pattern(inner), name.clone(), *silent), + GraphPattern::FixedPoint { .. } => todo!(), } } @@ -115,9 +141,98 @@ impl QueryRewriter { predicate: &NamedNodePattern, object: &GroundTermPattern, graph_name: Option<&NamedNodePattern>, - ) -> GraphPattern { + possible_fixed_points: &mut Vec<( + GroundTermPattern, + NamedNodePattern, + GroundTermPattern, + Option, + FixedPointId, + )>, + ) -> FixedPointGraphPattern { + // We check if we are in a loop + for ( + fixed_point_subject, + fixed_point_predicate, + fixed_point_object, + fixed_point_graph_name, + fixed_point_id, + ) in possible_fixed_points.iter() + { + let mut variable_mapping = Vec::new(); + if let (GroundTermPattern::Variable(from), GroundTermPattern::Variable(to)) = + (fixed_point_subject, subject) + { + variable_mapping.push((from.clone(), to.clone())); + } else if fixed_point_subject == subject { + // Ok + } else { + continue; // Not compatible + } + if let (NamedNodePattern::Variable(from), NamedNodePattern::Variable(to)) = + (fixed_point_predicate, predicate) + { + variable_mapping.push((from.clone(), to.clone())); + } else if fixed_point_predicate == predicate { + // Ok + } else { + continue; // Not compatible + } + if let (GroundTermPattern::Variable(from), GroundTermPattern::Variable(to)) = + (fixed_point_object, object) + { + variable_mapping.push((from.clone(), to.clone())); + } else if fixed_point_object == object { + // Ok + } else { + continue; // Not compatible + } + if let (Some(NamedNodePattern::Variable(from)), Some(NamedNodePattern::Variable(to))) = + (fixed_point_graph_name, graph_name) + { + variable_mapping.push((from.clone(), to.clone())); + } else if fixed_point_graph_name.as_ref() == graph_name { + // Ok + } else { + continue; // Not compatible + } + let mut plan = FixedPointGraphPattern::FixedPointEntry(*fixed_point_id); + for (from, to) in &variable_mapping { + if from != to { + plan = FixedPointGraphPattern::extend(plan, to.clone(), from.clone().into()); + } + } + return FixedPointGraphPattern::project( + plan, + variable_mapping.into_iter().map(|(_, v)| v).collect(), + ); + } + + let new_fixed_point_id = FixedPointId(possible_fixed_points.len()); + possible_fixed_points.push(( + subject.clone(), + predicate.clone(), + object.clone(), + graph_name.cloned(), + new_fixed_point_id, + )); + + // We get the output variables list: + let mut output_variables = Vec::new(); + if let GroundTermPattern::Variable(v) = subject { + output_variables.push(v.clone()); + } + if let NamedNodePattern::Variable(v) = predicate { + output_variables.push(v.clone()); + } + if let GroundTermPattern::Variable(v) = object { + output_variables.push(v.clone()); + } + if let Some(NamedNodePattern::Variable(v)) = graph_name { + output_variables.push(v.clone()); + } + // We rewrite based on rules - let mut graph_pattern = GraphPattern::QuadPattern { + let mut pattern = FixedPointGraphPattern::QuadPattern { subject: subject.clone(), predicate: predicate.clone(), object: object.clone(), @@ -125,36 +240,50 @@ impl QueryRewriter { }; for (rule_head, rule_body) in &self.rules { for head_pattern in rule_head { - if let Some(nested) = self.apply_rule_for_quad_pattern( + if let Some(nested) = self.apply_rule_on_quad_pattern( subject, predicate, object, graph_name, head_pattern, rule_body, + possible_fixed_points, ) { - graph_pattern = GraphPattern::union(graph_pattern, nested); + pattern = FixedPointGraphPattern::union( + pattern, + FixedPointGraphPattern::project(nested, output_variables.clone()), + ); } } } - graph_pattern + possible_fixed_points.pop(); + FixedPointGraphPattern::fixed_point(new_fixed_point_id, pattern, output_variables) } /// Attempts to use a given rule to get new facts for a triple pattern - fn apply_rule_for_quad_pattern( + fn apply_rule_on_quad_pattern( &self, subject: &GroundTermPattern, predicate: &NamedNodePattern, object: &GroundTermPattern, graph_name: Option<&NamedNodePattern>, head: &GroundTriplePattern, - body: &GraphPattern, - ) -> Option { + body: &[GroundTriplePattern], + possible_fixed_points: &mut Vec<( + GroundTermPattern, + NamedNodePattern, + GroundTermPattern, + Option, + FixedPointId, + )>, + ) -> Option { let head_unification = Self::unify_triple_pattern( subject.clone(), + head.subject.clone(), predicate.clone(), + head.predicate.clone(), object.clone(), - head.clone(), + head.object.clone(), )?; // We build a nested query // from is the parent query and to is the nested one @@ -189,9 +318,14 @@ impl QueryRewriter { }, } } - let mut plan = self.rewrite_rule_body(body, graph_name, &mut replacements_in_rule)?; + let mut plan = self.rewrite_rule_body( + body, + graph_name, + &mut replacements_in_rule, + possible_fixed_points, + )?; for (variable, value) in final_binds { - plan = GraphPattern::extend( + plan = FixedPointGraphPattern::extend( plan, variable, match value { @@ -205,81 +339,33 @@ impl QueryRewriter { fn rewrite_rule_body<'a>( &self, - pattern: &'a GraphPattern, + body: &'a [GroundTriplePattern], parent_graph_name: Option<&'a NamedNodePattern>, replacements_in_rule: &mut HashMap, - ) -> Option { - Some(match pattern { - GraphPattern::QuadPattern { - subject, - predicate, - object, - graph_name, - } => self.rewrite_quad_pattern( - &Self::apply_replacement_on_term_pattern(subject, replacements_in_rule)?, - &Self::apply_replacement_on_named_node_pattern(predicate, replacements_in_rule)?, - &Self::apply_replacement_on_term_pattern(object, replacements_in_rule)?, - if let Some(graph_name) = graph_name { - Some(Self::apply_replacement_on_named_node_pattern( - graph_name, - replacements_in_rule, - )?) - } else { - parent_graph_name.cloned() - } - .as_ref(), - ), - GraphPattern::Join { left, right } => GraphPattern::join( - self.rewrite_rule_body(left, parent_graph_name, replacements_in_rule)?, - self.rewrite_rule_body(right, parent_graph_name, replacements_in_rule)?, - ), - GraphPattern::Values { - variables, - bindings, - } => { - let variable_mapping = variables - .iter() - .map(|v| { - replacements_in_rule - .entry(v.clone()) - .or_insert_with(|| TermOrVariable::Variable(new_var())) - .clone() - }) - .collect::>(); - GraphPattern::Values { - variables: variable_mapping - .iter() - .filter_map(|v| match v { - TermOrVariable::Term(_) => None, - TermOrVariable::Variable(v) => Some(v.clone()), - }) - .collect(), - bindings: bindings - .iter() - .filter_map(|binding| { - let mut new_binding = Vec::with_capacity(binding.len()); - for (variable, value) in variable_mapping.iter().zip(binding) { - match variable { - TermOrVariable::Variable(_) => new_binding.push(value.clone()), - TermOrVariable::Term(cst) => { - let compatible = if let Some(value) = value { - cst == value - } else { - true - }; - if !compatible { - return None; - } - } - } - } - Some(new_binding) - }) - .collect(), - } - } - _ => unreachable!("Not allowed by the parser yet: {pattern:?}"), - }) + possible_fixed_points: &mut Vec<( + GroundTermPattern, + NamedNodePattern, + GroundTermPattern, + Option, + FixedPointId, + )>, + ) -> Option { + let mut patterns = Vec::new(); + for p in body { + patterns.push(self.rewrite_quad_pattern( + &Self::apply_replacement_on_term_pattern(&p.subject, replacements_in_rule)?, + &Self::apply_replacement_on_named_node_pattern(&p.predicate, replacements_in_rule)?, + &Self::apply_replacement_on_term_pattern(&p.object, replacements_in_rule)?, + parent_graph_name, + possible_fixed_points, + )); + } + Some( + patterns + .into_iter() + .reduce(FixedPointGraphPattern::join) + .unwrap_or_else(FixedPointGraphPattern::singleton), + ) } fn apply_replacement_on_named_node_pattern( @@ -336,16 +422,18 @@ impl QueryRewriter { fn unify_triple_pattern( from_subject: GroundTermPattern, + to_subject: GroundTermPattern, from_predicate: NamedNodePattern, + to_predicate: NamedNodePattern, from_object: GroundTermPattern, - to: GroundTriplePattern, + to_object: GroundTermPattern, ) -> Option> { - let mut mapping = Self::unify_ground_term_pattern(from_subject, to.subject)?; + let mut mapping = Self::unify_ground_term_pattern(from_subject, to_subject)?; mapping.extend(Self::unify_named_node_pattern( from_predicate, - to.predicate, + to_predicate, )?); - mapping.extend(Self::unify_ground_term_pattern(from_object, to.object)?); + mapping.extend(Self::unify_ground_term_pattern(from_object, to_object)?); Some(mapping) } diff --git a/lib/src/sparql/eval.rs b/lib/src/sparql/eval.rs index 7cce17d6..4a57a30e 100644 --- a/lib/src/sparql/eval.rs +++ b/lib/src/sparql/eval.rs @@ -36,6 +36,8 @@ const REGEX_SIZE_LIMIT: usize = 1_000_000; type EncodedTuplesIterator = Box>>; type CustomFunctionRegistry = HashMap Option>>; +type FixedPointEvaluationFn = + Rc>>) -> EncodedTuplesIterator>; #[derive(Clone)] pub struct SimpleEvaluator { @@ -144,6 +146,7 @@ impl SimpleEvaluator { Rc, ) { let mut stat_children = Vec::new(); + let mut fixed_point_stat_children = Vec::new(); let mut evaluator: Rc EncodedTuplesIterator> = match node.as_ref() { PlanNode::StaticBindings { encoded_tuples, .. } => { let tuples = encoded_tuples.clone(); @@ -817,10 +820,26 @@ impl SimpleEvaluator { ) }) } + PlanNode::FixedPoint { + id, + constant, + recursive, + } => { + let (constant, constant_stats) = self.fixed_point_plan_evaluator(constant.clone()); + fixed_point_stat_children.push(constant_stats); + let (recursive, recursive_stats) = + self.fixed_point_plan_evaluator(recursive.clone()); + fixed_point_stat_children.push(recursive_stats); + let id = *id; + Rc::new(move |from| { + evaluate_fixed_point(id, &constant, &recursive, &from, &HashMap::new()) + }) + } }; let stats = Rc::new(PlanNodeWithStats { node, children: stat_children, + fixed_point_children: fixed_point_stat_children, exec_count: Cell::new(0), exec_duration: Cell::new(std::time::Duration::from_secs(0)), }); @@ -841,6 +860,258 @@ impl SimpleEvaluator { (evaluator, stats) } + pub fn fixed_point_plan_evaluator( + &self, + node: Rc, + ) -> (FixedPointEvaluationFn, Rc) { + let mut stat_children = Vec::new(); + let mut evaluator: Rc< + dyn Fn(EncodedTuple, HashMap>>) -> EncodedTuplesIterator, + > = match node.as_ref() { + FixedPointPlanNode::StaticBindings { encoded_tuples, .. } => { + let tuples = encoded_tuples.clone(); + Rc::new(move |from, _| { + Box::new( + tuples + .iter() + .filter_map(move |t| Some(Ok(t.combine_with(&from)?))) + .collect::>() + .into_iter(), + ) + }) + } + FixedPointPlanNode::QuadPattern { + subject, + predicate, + object, + graph_name, + } => { + let subject = TupleSelector::from(subject); + let predicate = TupleSelector::from(predicate); + let object = TupleSelector::from(object); + let graph_name = TupleSelector::from(graph_name); + let dataset = self.dataset.clone(); + Rc::new(move |from, _| { + let iter = dataset.encoded_quads_for_pattern( + get_pattern_value(&subject, &from).as_ref(), + get_pattern_value(&predicate, &from).as_ref(), + get_pattern_value(&object, &from).as_ref(), + get_pattern_value(&graph_name, &from).as_ref(), + ); + let subject = subject.clone(); + let predicate = predicate.clone(); + let object = object.clone(); + let graph_name = graph_name.clone(); + Box::new(iter.filter_map(move |quad| match quad { + Ok(quad) => { + let mut new_tuple = from.clone(); + put_pattern_value(&subject, quad.subject, &mut new_tuple)?; + put_pattern_value(&predicate, quad.predicate, &mut new_tuple)?; + put_pattern_value(&object, quad.object, &mut new_tuple)?; + put_pattern_value(&graph_name, quad.graph_name, &mut new_tuple)?; + Some(Ok(new_tuple)) + } + Err(error) => Some(Err(error)), + })) + }) + } + FixedPointPlanNode::HashJoin { left, right } => { + let join_keys: Vec<_> = left + .used_variables() + .intersection(&right.used_variables()) + .copied() + .collect(); + let (left, left_stats) = self.fixed_point_plan_evaluator(left.clone()); + stat_children.push(left_stats); + let (right, right_stats) = self.fixed_point_plan_evaluator(right.clone()); + stat_children.push(right_stats); + if join_keys.is_empty() { + // Cartesian product + Rc::new(move |from, fixed_point_values| { + let mut errors = Vec::default(); + let right_values = right(from.clone(), fixed_point_values.clone()) + .filter_map(|result| match result { + Ok(result) => Some(result), + Err(error) => { + errors.push(Err(error)); + None + } + }) + .collect::>(); + Box::new(CartesianProductJoinIterator { + left_iter: left(from, fixed_point_values), + right: right_values, + buffered_results: errors, + }) + }) + } else { + // Real hash join + Rc::new(move |from, fixed_point_values| { + let mut errors = Vec::default(); + let mut right_values = EncodedTupleSet::new(join_keys.clone()); + right_values.extend( + right(from.clone(), fixed_point_values.clone()).filter_map(|result| { + match result { + Ok(result) => Some(result), + Err(error) => { + errors.push(Err(error)); + None + } + } + }), + ); + Box::new(HashJoinIterator { + left_iter: left(from, fixed_point_values), + right: right_values, + buffered_results: errors, + }) + }) + } + } + FixedPointPlanNode::Filter { child, expression } => { + let (child, child_stats) = self.fixed_point_plan_evaluator(child.clone()); + stat_children.push(child_stats); + let expression = self.expression_evaluator(expression); + Rc::new(move |from, fixed_point_values| { + let expression = expression.clone(); + Box::new(child(from, fixed_point_values).filter(move |tuple| { + match tuple { + Ok(tuple) => expression(tuple) + .and_then(|term| to_bool(&term)) + .unwrap_or(false), + Err(_) => true, + } + })) + }) + } + FixedPointPlanNode::Union { children } => { + let children: Vec<_> = children + .iter() + .map(|child| { + let (child, child_stats) = self.fixed_point_plan_evaluator(child.clone()); + stat_children.push(child_stats); + child + }) + .collect(); + Rc::new(move |from, fixed_point_values| { + Box::new(FixedPointUnionIterator { + plans: children.clone(), + input: (from, fixed_point_values), + current_iterator: Box::new(empty()), + current_plan: 0, + }) + }) + } + FixedPointPlanNode::Extend { + child, + variable, + expression, + } => { + let (child, child_stats) = self.fixed_point_plan_evaluator(child.clone()); + stat_children.push(child_stats); + let position = variable.encoded; + let expression = self.expression_evaluator(expression); + Rc::new(move |from, fixed_point_values| { + let expression = expression.clone(); + Box::new(child(from, fixed_point_values).map(move |tuple| { + let mut tuple = tuple?; + if let Some(value) = expression(&tuple) { + tuple.set(position, value); + } + Ok(tuple) + })) + }) + } + FixedPointPlanNode::Project { child, mapping } => { + let (child, child_stats) = self.fixed_point_plan_evaluator(child.clone()); + stat_children.push(child_stats); + let mapping = mapping.clone(); + Rc::new(move |from, fixed_point_values| { + let mapping = mapping.clone(); + let mut input_tuple = EncodedTuple::with_capacity(mapping.len()); + for (input_key, output_key) in mapping.iter() { + if let Some(value) = from.get(output_key.encoded) { + input_tuple.set(input_key.encoded, value.clone()); + } + } + Box::new( + child(input_tuple, fixed_point_values).filter_map(move |tuple| { + match tuple { + Ok(tuple) => { + let mut output_tuple = from.clone(); + for (input_key, output_key) in mapping.iter() { + if let Some(value) = tuple.get(input_key.encoded) { + if let Some(existing_value) = + output_tuple.get(output_key.encoded) + { + if existing_value != value { + return None; // Conflict + } + } else { + output_tuple.set(output_key.encoded, value.clone()); + } + } + } + Some(Ok(output_tuple)) + } + Err(e) => Some(Err(e)), + } + }), + ) + }) + } + FixedPointPlanNode::FixedPoint { + id, + constant, + recursive, + } => { + let (constant, constant_stats) = self.fixed_point_plan_evaluator(constant.clone()); + stat_children.push(constant_stats); + let (recursive, recursive_stats) = + self.fixed_point_plan_evaluator(recursive.clone()); + stat_children.push(recursive_stats); + let id = *id; + Rc::new(move |from, fixed_point_values| { + evaluate_fixed_point(id, &constant, &recursive, &from, &fixed_point_values) + }) + } + FixedPointPlanNode::FixedPointEntry { id, .. } => { + let id = *id; + Rc::new(move |from, fixed_point_values| { + Box::new( + fixed_point_values[&id] + .as_ref() + .clone() + .into_iter() + .filter_map(move |t| t.combine_with(&from)) + .map(Ok), + ) + }) + } + }; + let stats = Rc::new(FixedPointPlanNodeWithStats { + node, + children: stat_children, + exec_count: Cell::new(0), + exec_duration: Cell::new(std::time::Duration::from_secs(0)), + }); + if self.run_stats { + let stats = stats.clone(); + evaluator = Rc::new(move |tuple, fixed_point_values| { + let start = Timer::now(); + let inner = evaluator(tuple, fixed_point_values); + stats + .exec_duration + .set(stats.exec_duration.get() + start.elapsed()); + Box::new(FixedPointStatsIterator { + inner, + stats: stats.clone(), + }) + }) + } + (evaluator, stats) + } + fn evaluate_service( &self, service_name: &PatternValue, @@ -3883,6 +4154,52 @@ impl PathEvaluator { } } +fn evaluate_fixed_point( + id: usize, + constant: &FixedPointEvaluationFn, + recursive: &FixedPointEvaluationFn, + from: &EncodedTuple, + fixed_point_values: &HashMap>>, +) -> EncodedTuplesIterator { + // Naive algorithm. We should at least be semi-naive + let mut errors = Vec::new(); + let mut all_results = constant(from.clone(), HashMap::new()) + .filter_map(|result| match result { + Ok(result) => Some(result), + Err(error) => { + errors.push(error); + None + } + }) + .collect::>(); + let mut new_set = all_results.iter().cloned().collect::>(); + while !new_set.is_empty() { + let mut fixed_point_values = fixed_point_values.clone(); + fixed_point_values.insert(id, Rc::new(all_results.iter().cloned().collect())); + new_set = recursive(from.clone(), fixed_point_values) + .filter_map(|result| match result { + Ok(result) => { + if all_results.insert(result.clone()) { + Some(result) + } else { + None + } + } + Err(error) => { + errors.push(error); + None + } + }) + .collect(); + } + Box::new( + errors + .into_iter() + .map(Err) + .chain(all_results.into_iter().map(Ok)), + ) +} + struct CartesianProductJoinIterator { left_iter: EncodedTuplesIterator, right: Vec, @@ -4109,6 +4426,31 @@ impl Iterator for UnionIterator { } } +struct FixedPointUnionIterator { + plans: Vec, + input: (EncodedTuple, HashMap>>), + current_iterator: EncodedTuplesIterator, + current_plan: usize, +} + +impl Iterator for FixedPointUnionIterator { + type Item = Result; + + fn next(&mut self) -> Option> { + loop { + if let Some(tuple) = self.current_iterator.next() { + return Some(tuple); + } + if self.current_plan >= self.plans.len() { + return None; + } + self.current_iterator = + self.plans[self.current_plan](self.input.0.clone(), self.input.1.clone()); + self.current_plan += 1; + } + } +} + struct ConsecutiveDeduplication { inner: EncodedTuplesIterator, current: Option, @@ -4802,6 +5144,27 @@ impl Iterator for StatsIterator { } } +struct FixedPointStatsIterator { + inner: EncodedTuplesIterator, + stats: Rc, +} + +impl Iterator for FixedPointStatsIterator { + type Item = Result; + + fn next(&mut self) -> Option> { + let start = Timer::now(); + let result = self.inner.next(); + self.stats + .exec_duration + .set(self.stats.exec_duration.get() + start.elapsed()); + if matches!(result, Some(Ok(_))) { + self.stats.exec_count.set(self.stats.exec_count.get() + 1); + } + result + } +} + #[cfg(all(target_family = "wasm", target_os = "unknown"))] pub struct Timer { timestamp_ms: f64, diff --git a/lib/src/sparql/plan.rs b/lib/src/sparql/plan.rs index bdd4b9e2..44780bfe 100644 --- a/lib/src/sparql/plan.rs +++ b/lib/src/sparql/plan.rs @@ -107,6 +107,11 @@ pub enum PlanNode { key_variables: Rc>, aggregates: Rc>, }, + FixedPoint { + id: usize, + constant: Rc, + recursive: Rc, + }, } impl PlanNode { @@ -218,6 +223,14 @@ impl PlanNode { callback(var.encoded); } } + Self::FixedPoint { + constant, + recursive, + .. + } => { + constant.lookup_used_variables(callback); + recursive.lookup_used_variables(callback); + } } } @@ -336,7 +349,7 @@ impl PlanNode { } } } - Self::Aggregate { .. } => { + Self::Aggregate { .. } | Self::FixedPoint { .. } => { //TODO } } @@ -353,6 +366,138 @@ impl PlanNode { } } +#[derive(Debug)] +pub enum FixedPointPlanNode { + StaticBindings { + encoded_tuples: Vec, + variables: Vec, + plain_bindings: Vec>>, + }, + QuadPattern { + subject: PatternValue, + predicate: PatternValue, + object: PatternValue, + graph_name: PatternValue, + }, + /// Streams left and materializes right join + HashJoin { + left: Rc, + right: Rc, + }, + Filter { + child: Rc, + expression: PlanExpression, + }, + Union { + children: Vec>, + }, + Extend { + child: Rc, + variable: PlanVariable, + expression: PlanExpression, + }, + Project { + child: Rc, + mapping: Rc>, // pairs of (variable key in child, variable key in output) + }, + FixedPoint { + id: usize, + constant: Rc, + recursive: Rc, + }, + FixedPointEntry { + id: usize, + variables: Vec, + }, +} + +impl FixedPointPlanNode { + /// Returns variables that might be bound in the result set + pub fn used_variables(&self) -> BTreeSet { + let mut set = BTreeSet::default(); + self.lookup_used_variables(&mut |v| { + set.insert(v); + }); + set + } + + pub fn lookup_used_variables(&self, callback: &mut impl FnMut(usize)) { + match self { + Self::StaticBindings { encoded_tuples, .. } => { + for tuple in encoded_tuples { + for (key, value) in tuple.iter().enumerate() { + if value.is_some() { + callback(key); + } + } + } + } + Self::QuadPattern { + subject, + predicate, + object, + graph_name, + } => { + if let PatternValue::Variable(var) = subject { + callback(var.encoded); + } + if let PatternValue::Variable(var) = predicate { + callback(var.encoded); + } + if let PatternValue::Variable(var) = object { + callback(var.encoded); + } + if let PatternValue::Variable(var) = graph_name { + callback(var.encoded); + } + } + Self::Filter { child, expression } => { + expression.lookup_used_variables(callback); + child.lookup_used_variables(callback); + } + Self::Union { children } => { + for child in children.iter() { + child.lookup_used_variables(callback); + } + } + Self::HashJoin { left, right } => { + left.lookup_used_variables(callback); + right.lookup_used_variables(callback); + } + Self::Extend { + child, + variable, + expression, + } => { + callback(variable.encoded); + expression.lookup_used_variables(callback); + child.lookup_used_variables(callback); + } + Self::Project { mapping, child } => { + let child_bound = child.used_variables(); + for (child_i, output_i) in mapping.iter() { + if child_bound.contains(&child_i.encoded) { + callback(output_i.encoded); + } + } + } + Self::FixedPoint { + constant, + recursive, + .. + } => { + constant.lookup_used_variables(callback); + recursive.lookup_used_variables(callback); + } + Self::FixedPointEntry { variables, .. } => { + for variable in variables { + callback(variable.encoded); + } + } + } + } +} + #[derive(Debug, Clone)] pub struct PlanTerm { pub encoded: EncodedTerm, @@ -1005,7 +1150,8 @@ impl IntoIterator for EncodedTuple { pub struct PlanNodeWithStats { pub node: Rc, - pub children: Vec>, + pub children: Vec>, + pub fixed_point_children: Vec>, pub exec_count: Cell, pub exec_duration: Cell, } @@ -1032,6 +1178,9 @@ impl PlanNodeWithStats { for child in &self.children { child.json_node(writer, with_stats)?; } + for child in &self.fixed_point_children { + child.json_node(writer, with_stats)?; + } writer.write_event(JsonEvent::EndArray)?; writer.write_event(JsonEvent::EndObject) } @@ -1123,11 +1272,110 @@ impl PlanNodeWithStats { ) } PlanNode::Union { .. } => "Union".to_owned(), + PlanNode::FixedPoint { id, .. } => format!("FixedPoint{id}"), } } } impl fmt::Debug for PlanNodeWithStats { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut obj = f.debug_struct("Node"); + obj.field("name", &self.node_label()); + if self.exec_duration.get() > Duration::default() { + obj.field("number of results", &self.exec_count.get()); + obj.field("duration in seconds", &self.exec_duration.get()); + } + if !self.children.is_empty() { + obj.field("children", &self.children); + } + if !self.fixed_point_children.is_empty() { + obj.field("children", &self.fixed_point_children); + } + obj.finish() + } +} + +pub struct FixedPointPlanNodeWithStats { + pub node: Rc, + pub children: Vec>, + pub exec_count: Cell, + pub exec_duration: Cell, +} + +impl FixedPointPlanNodeWithStats { + pub fn json_node( + &self, + writer: &mut JsonWriter, + with_stats: bool, + ) -> io::Result<()> { + writer.write_event(JsonEvent::StartObject)?; + writer.write_event(JsonEvent::ObjectKey("name"))?; + writer.write_event(JsonEvent::String(&self.node_label()))?; + if with_stats { + writer.write_event(JsonEvent::ObjectKey("number of results"))?; + writer.write_event(JsonEvent::Number(&self.exec_count.get().to_string()))?; + writer.write_event(JsonEvent::ObjectKey("duration in seconds"))?; + writer.write_event(JsonEvent::Number( + &self.exec_duration.get().as_secs_f32().to_string(), + ))?; + } + writer.write_event(JsonEvent::ObjectKey("children"))?; + writer.write_event(JsonEvent::StartArray)?; + for child in &self.children { + child.json_node(writer, with_stats)?; + } + writer.write_event(JsonEvent::EndArray)?; + writer.write_event(JsonEvent::EndObject) + } + + fn node_label(&self) -> String { + match self.node.as_ref() { + FixedPointPlanNode::Extend { + expression, + variable, + .. + } => format!("Extend({expression} -> {variable})"), + FixedPointPlanNode::Filter { expression, .. } => format!("Filter({expression})"), + + FixedPointPlanNode::HashJoin { .. } => "HashJoin".to_owned(), + FixedPointPlanNode::Project { mapping, .. } => { + format!( + "Project({})", + mapping + .iter() + .map(|(f, t)| if f.plain == t.plain { + f.to_string() + } else { + format!("{f} -> {t}") + }) + .collect::>() + .join(", ") + ) + } + FixedPointPlanNode::QuadPattern { + subject, + predicate, + object, + graph_name, + } => format!("QuadPattern({subject} {predicate} {object} {graph_name})"), + FixedPointPlanNode::StaticBindings { variables, .. } => { + format!( + "StaticBindings({})", + variables + .iter() + .map(|v| v.to_string()) + .collect::>() + .join(", ") + ) + } + FixedPointPlanNode::Union { .. } => "Union".to_owned(), + FixedPointPlanNode::FixedPoint { id, .. } => format!("FixedPoint{id}"), + FixedPointPlanNode::FixedPointEntry { id, .. } => format!("FixedPointEntry{id}"), + } + } +} + +impl fmt::Debug for FixedPointPlanNodeWithStats { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let mut obj = f.debug_struct("Node"); obj.field("name", &self.node_label()); diff --git a/lib/src/sparql/plan_builder.rs b/lib/src/sparql/plan_builder.rs index 78795c7a..b5b7f55a 100644 --- a/lib/src/sparql/plan_builder.rs +++ b/lib/src/sparql/plan_builder.rs @@ -320,6 +320,254 @@ impl<'a> PlanBuilder<'a> { } plan } + GraphPattern::FixedPoint { + variables: fixed_point_variables, + constant, + recursive, + id, + } => { + let mut fixed_points = HashMap::new(); + fixed_points.insert( + *id, + fixed_point_variables + .iter() + .enumerate() + .map(|(new_variable, variable)| PlanVariable { + encoded: new_variable, + plain: variable.clone(), + }) + .collect(), + ); + let plan = PlanNode::Project { + mapping: Rc::new( + fixed_point_variables + .iter() + .enumerate() + .map(|(new_variable, variable)| { + ( + PlanVariable { + encoded: new_variable, + plain: variable.clone(), + }, + build_plan_variable(variables, variable), + ) + }) + .collect(), + ), + child: Rc::new(PlanNode::FixedPoint { + id: id.0, + constant: Rc::new(self.build_for_fixed_point_graph_pattern( + constant, + &mut fixed_point_variables.clone(), + &mut fixed_points, + )?), + recursive: Rc::new(self.build_for_fixed_point_graph_pattern( + recursive, + &mut fixed_point_variables.clone(), + &mut fixed_points, + )?), + }), + }; + plan + } + }) + } + + fn build_for_fixed_point_graph_pattern( + &self, + pattern: &FixedPointGraphPattern, + variables: &mut Vec, + fixed_points: &mut HashMap>, + ) -> Result { + Ok(match pattern { + FixedPointGraphPattern::QuadPattern { + subject, + predicate, + object, + graph_name, + } => FixedPointPlanNode::QuadPattern { + subject: self.pattern_value_from_ground_term_pattern(subject, variables), + predicate: self.pattern_value_from_named_node_or_variable(predicate, variables), + object: self.pattern_value_from_ground_term_pattern(object, variables), + graph_name: graph_name.as_ref().map_or( + PatternValue::Constant(PlanTerm { + encoded: EncodedTerm::DefaultGraph, + plain: PatternValueConstant::DefaultGraph, + }), + |g| self.pattern_value_from_named_node_or_variable(g, variables), + ), + }, + FixedPointGraphPattern::Join { left, right } => { + let mut left = + self.build_for_fixed_point_graph_pattern(left, variables, fixed_points)?; + let mut right = + self.build_for_fixed_point_graph_pattern(right, variables, fixed_points)?; + // Let's avoid materializing right if left is already materialized + // TODO: be smarter and reuse already existing materialization + if matches!(left, FixedPointPlanNode::StaticBindings { .. }) { + swap(&mut left, &mut right); + } + FixedPointPlanNode::HashJoin { + left: Rc::new(left), + right: Rc::new(right), + } + } + FixedPointGraphPattern::Filter { inner, expression } => FixedPointPlanNode::Filter { + child: Rc::new(self.build_for_fixed_point_graph_pattern( + inner, + variables, + fixed_points, + )?), + expression: self.build_for_fixed_point_expression(expression, variables)?, + }, + FixedPointGraphPattern::Union { inner } => FixedPointPlanNode::Union { + children: inner + .iter() + .map(|p| { + Ok(Rc::new(self.build_for_fixed_point_graph_pattern( + p, + variables, + fixed_points, + )?)) + }) + .collect::>()?, + }, + FixedPointGraphPattern::Extend { + inner, + variable, + expression, + } => FixedPointPlanNode::Extend { + child: Rc::new(self.build_for_fixed_point_graph_pattern( + inner, + variables, + fixed_points, + )?), + variable: build_plan_variable(variables, variable), + expression: self.build_for_fixed_point_expression(expression, variables)?, + }, + FixedPointGraphPattern::Values { + variables: table_variables, + bindings, + } => { + let bindings_variables = table_variables + .iter() + .map(|v| build_plan_variable(variables, v)) + .collect::>(); + let encoded_tuples = bindings + .iter() + .map(|row| { + let mut result = EncodedTuple::with_capacity(variables.len()); + for (key, value) in row.iter().enumerate() { + if let Some(term) = value { + result.set( + bindings_variables[key].encoded, + match term { + GroundTerm::NamedNode(node) => self.build_term(node), + GroundTerm::Literal(literal) => self.build_term(literal), + GroundTerm::Triple(triple) => self.build_triple(triple), + }, + ); + } + } + result + }) + .collect(); + FixedPointPlanNode::StaticBindings { + encoded_tuples, + variables: bindings_variables, + plain_bindings: bindings.clone(), + } + } + FixedPointGraphPattern::Project { + inner, + variables: projection, + } => { + let mut inner_variables = projection.clone(); + FixedPointPlanNode::Project { + child: Rc::new(self.build_for_fixed_point_graph_pattern( + inner, + &mut inner_variables, + fixed_points, + )?), + mapping: Rc::new( + projection + .iter() + .enumerate() + .map(|(new_variable, variable)| { + ( + PlanVariable { + encoded: new_variable, + plain: variable.clone(), + }, + build_plan_variable(variables, variable), + ) + }) + .collect(), + ), + } + } + FixedPointGraphPattern::FixedPoint { + variables: fixed_point_variables, + constant, + recursive, + id, + } => { + fixed_points.insert( + *id, + fixed_point_variables + .iter() + .enumerate() + .map(|(new_variable, variable)| PlanVariable { + encoded: new_variable, + plain: variable.clone(), + }) + .collect(), + ); + let plan = FixedPointPlanNode::Project { + mapping: Rc::new( + fixed_point_variables + .iter() + .enumerate() + .map(|(new_variable, variable)| { + ( + PlanVariable { + encoded: new_variable, + plain: variable.clone(), + }, + build_plan_variable(variables, variable), + ) + }) + .collect(), + ), + child: Rc::new(FixedPointPlanNode::FixedPoint { + id: id.0, + constant: Rc::new(self.build_for_fixed_point_graph_pattern( + constant, + &mut fixed_point_variables.clone(), + fixed_points, + )?), + recursive: Rc::new(self.build_for_fixed_point_graph_pattern( + recursive, + &mut fixed_point_variables.clone(), + fixed_points, + )?), + }), + }; + fixed_points.remove(id); + plan + } + FixedPointGraphPattern::FixedPointEntry(id) => { + let fixed_point_variable = if let Some(fixed_point_variable) = fixed_points.get(id) + { + fixed_point_variable + } else { + return Err(EvaluationError::msg("Invalid fixed point")); + }; + FixedPointPlanNode::FixedPointEntry { + id: id.0, + variables: fixed_point_variable.clone(), + } + } }) } @@ -758,6 +1006,72 @@ impl<'a> PlanBuilder<'a> { }) } + fn build_for_fixed_point_expression( + &self, + expression: &FixedPointExpression, + variables: &mut Vec, + ) -> Result { + Ok(match expression { + FixedPointExpression::NamedNode(node) => PlanExpression::NamedNode(PlanTerm { + encoded: self.build_term(node), + plain: node.clone(), + }), + FixedPointExpression::Literal(l) => PlanExpression::Literal(PlanTerm { + encoded: self.build_term(l), + plain: l.clone(), + }), + FixedPointExpression::Variable(v) => { + PlanExpression::Variable(build_plan_variable(variables, v)) + } + FixedPointExpression::Or(a, b) => PlanExpression::Or( + Box::new(self.build_for_fixed_point_expression(a, variables)?), + Box::new(self.build_for_fixed_point_expression(b, variables)?), + ), + FixedPointExpression::And(a, b) => PlanExpression::And( + Box::new(self.build_for_fixed_point_expression(a, variables)?), + Box::new(self.build_for_fixed_point_expression(b, variables)?), + ), + FixedPointExpression::SameTerm(a, b) => PlanExpression::SameTerm( + Box::new(self.build_for_fixed_point_expression(a, variables)?), + Box::new(self.build_for_fixed_point_expression(b, variables)?), + ), + FixedPointExpression::In(e, l) => { + let e = self.build_for_fixed_point_expression(e, variables)?; + l.iter() + .map(|v| { + Ok(PlanExpression::Equal( + Box::new(e.clone()), + Box::new(self.build_for_fixed_point_expression(v, variables)?), + )) + }) + .reduce(|a: Result<_, EvaluationError>, b| { + Ok(PlanExpression::Or(Box::new(a?), Box::new(b?))) + }) + .unwrap_or_else(|| { + Ok(PlanExpression::Literal(PlanTerm { + encoded: false.into(), + plain: false.into(), + })) + })? + } + FixedPointExpression::Not(e) => PlanExpression::Not(Box::new( + self.build_for_fixed_point_expression(e, variables)?, + )), + FixedPointExpression::FunctionCall(function, parameters) => match function { + Function::Triple => PlanExpression::Triple( + Box::new(self.build_for_fixed_point_expression(¶meters[0], variables)?), + Box::new(self.build_for_fixed_point_expression(¶meters[1], variables)?), + Box::new(self.build_for_fixed_point_expression(¶meters[2], variables)?), + ), + name => { + return Err(EvaluationError::msg(format!( + "Not supported custom function {name}" + ))) + } + }, + }) + } + fn build_cast( &self, parameters: &[Expression], @@ -1141,6 +1455,9 @@ impl<'a> PlanBuilder<'a> { set.insert(var.encoded); } } + PlanNode::FixedPoint { .. } => { + // All variables must be bound + } } } @@ -1204,7 +1521,8 @@ impl<'a> PlanBuilder<'a> { | PlanNode::Skip { .. } | PlanNode::Limit { .. } | PlanNode::Project { .. } - | PlanNode::Aggregate { .. } => false, + | PlanNode::Aggregate { .. } + | PlanNode::FixedPoint { .. } => false, } } diff --git a/testsuite/oxigraph-tests/sparql-reasoning/manifest.ttl b/testsuite/oxigraph-tests/sparql-reasoning/manifest.ttl index d10eb8a3..f72b5ec0 100644 --- a/testsuite/oxigraph-tests/sparql-reasoning/manifest.ttl +++ b/testsuite/oxigraph-tests/sparql-reasoning/manifest.ttl @@ -12,6 +12,7 @@ :simple_type_inheritance :simple_fact :with_graph_name + :simple_recursion ) . :simple_type_inheritance rdf:type ox:SparqlRuleEvaluationTest ; @@ -37,3 +38,11 @@ qt:graphData ] ; ox:rulesData ; mf:result . + +:simple_recursion rdf:type ox:SparqlRuleEvaluationTest ; + mf:name "Simple recursion" ; + mf:action + [ qt:query ; + qt:data ] ; + ox:rulesData ; + mf:result . \ No newline at end of file diff --git a/testsuite/oxigraph-tests/sparql-reasoning/simple_recursion.rq b/testsuite/oxigraph-tests/sparql-reasoning/simple_recursion.rq new file mode 100644 index 00000000..331ae9f4 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-reasoning/simple_recursion.rq @@ -0,0 +1,5 @@ +PREFIX ex: + +SELECT DISTINCT * WHERE { + ?s ex:includedIn ?o +} diff --git a/testsuite/oxigraph-tests/sparql-reasoning/simple_recursion.rr b/testsuite/oxigraph-tests/sparql-reasoning/simple_recursion.rr new file mode 100644 index 00000000..9bbdab04 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-reasoning/simple_recursion.rr @@ -0,0 +1,3 @@ +PREFIX ex: + +IF { ?s ex:includedIn ?m . ?m ex:includedIn ?o } THEN { ?s ex:includedIn ?o } diff --git a/testsuite/oxigraph-tests/sparql-reasoning/simple_recursion.srj b/testsuite/oxigraph-tests/sparql-reasoning/simple_recursion.srj new file mode 100644 index 00000000..e8fb7398 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-reasoning/simple_recursion.srj @@ -0,0 +1,69 @@ +{ + "head": { + "vars": ["s", "o"] + }, + "results": { + "bindings": [ + { + "s": { + "type": "uri", + "value": "http://example.org/Bar" + }, + "o": { + "type": "uri", + "value": "http://example.org/Foo" + } + }, + { + "s": { + "type": "uri", + "value": "http://example.org/Baz" + }, + "o": { + "type": "uri", + "value": "http://example.org/Foo" + } + }, + { + "s": { + "type": "uri", + "value": "http://example.org/Baz" + }, + "o": { + "type": "uri", + "value": "http://example.org/Bar" + } + }, + { + "s": { + "type": "uri", + "value": "http://example.org/Baa" + }, + "o": { + "type": "uri", + "value": "http://example.org/Foo" + } + }, + { + "s": { + "type": "uri", + "value": "http://example.org/Baa" + }, + "o": { + "type": "uri", + "value": "http://example.org/Bar" + } + }, + { + "s": { + "type": "uri", + "value": "http://example.org/Baa" + }, + "o": { + "type": "uri", + "value": "http://example.org/Baz" + } + } + ] + } +} diff --git a/testsuite/oxigraph-tests/sparql-reasoning/simple_recursion.ttl b/testsuite/oxigraph-tests/sparql-reasoning/simple_recursion.ttl new file mode 100644 index 00000000..952668dd --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-reasoning/simple_recursion.ttl @@ -0,0 +1,4 @@ +PREFIX ex: +ex:Bar ex:includedIn ex:Foo . +ex:Baz ex:includedIn ex:Bar . +ex:Baa ex:includedIn ex:Baz .