diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 65b9102a..5add21ce 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -36,6 +36,8 @@ jobs: working-directory: ./lib/sparesults - run: cargo clippy working-directory: ./lib/spargebra + - run: cargo clippy + working-directory: ./lib/sparopt - run: cargo clippy --all-targets --all-features clippy_wasm_js: @@ -76,6 +78,8 @@ jobs: working-directory: ./lib/sparesults - run: cargo clippy -- -D warnings -D clippy::all working-directory: ./lib/spargebra + - run: cargo clippy -- -D warnings -D clippy::all + working-directory: ./lib/sparopt - run: cargo clippy --all-targets -- -D warnings -D clippy::all working-directory: ./server @@ -119,7 +123,7 @@ jobs: - run: rustup update - uses: Swatinem/rust-cache@v2 - run: cargo install cargo-semver-checks || true - - run: cargo semver-checks check-release --exclude oxrocksdb-sys --exclude oxigraph_js --exclude pyoxigraph --exclude oxigraph_testsuite --exclude oxigraph_server + - run: cargo semver-checks check-release --exclude oxrocksdb-sys --exclude oxigraph_js --exclude pyoxigraph --exclude oxigraph_testsuite --exclude oxigraph_server --exclude sparopt test_linux: runs-on: ubuntu-latest diff --git a/Cargo.lock b/Cargo.lock index 29d3744b..649bd0aa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -958,6 +958,7 @@ dependencies = [ "siphasher", "sparesults", "spargebra", + "sparopt", "zstd", ] @@ -998,6 +999,8 @@ dependencies = [ "anyhow", "clap", "oxigraph", + "spargebra", + "sparopt", "text-diff", "time", ] @@ -1613,6 +1616,15 @@ dependencies = [ "rand", ] +[[package]] +name = "sparopt" +version = "0.1.0-alpha.1-dev" +dependencies = [ + "oxrdf", + "rand", + "spargebra", +] + [[package]] name = "sparql-smith" version = "0.1.0-alpha.5-dev" diff --git a/Cargo.toml b/Cargo.toml index 75a171d3..041afb39 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ members = [ "lib/oxsdatatypes", "lib/spargebra", "lib/sparesults", + "lib/sparopt", "lib/sparql-smith", "oxrocksdb-sys", "python", diff --git a/lib/Cargo.toml b/lib/Cargo.toml index d67b1349..100be488 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -41,6 +41,7 @@ json-event-parser = "0.1" oxrdf = { version = "0.2.0-alpha.1-dev", path="oxrdf", features = ["rdf-star", "oxsdatatypes"] } oxsdatatypes = { version = "0.2.0-alpha.1-dev", path="oxsdatatypes" } spargebra = { version = "0.3.0-alpha.1-dev", path="spargebra", features = ["rdf-star", "sep-0002", "sep-0006"] } +sparopt = { version = "0.1.0-alpha.1-dev", path="sparopt", features = ["rdf-star", "sep-0002", "sep-0006"] } sparesults = { version = "0.2.0-alpha.1-dev", path="sparesults", features = ["rdf-star"] } [target.'cfg(not(target_family = "wasm"))'.dependencies] diff --git a/lib/spargebra/src/term.rs b/lib/spargebra/src/term.rs index 3ef91f68..8c41fc1b 100644 --- a/lib/spargebra/src/term.rs +++ b/lib/spargebra/src/term.rs @@ -541,6 +541,19 @@ impl From for TermPattern { } } +impl From for TermPattern { + #[inline] + fn from(element: GroundTermPattern) -> Self { + match element { + GroundTermPattern::NamedNode(node) => node.into(), + GroundTermPattern::Literal(literal) => literal.into(), + #[cfg(feature = "rdf-star")] + GroundTermPattern::Triple(t) => TriplePattern::from(*t).into(), + GroundTermPattern::Variable(variable) => variable.into(), + } + } +} + impl TryFrom for Subject { type Error = (); @@ -799,6 +812,17 @@ impl From for TriplePattern { } } +impl From for TriplePattern { + #[inline] + fn from(triple: GroundTriplePattern) -> Self { + Self { + subject: triple.subject.into(), + predicate: triple.predicate, + object: triple.object.into(), + } + } +} + impl TryFrom for Triple { type Error = (); diff --git a/lib/sparopt/Cargo.toml b/lib/sparopt/Cargo.toml new file mode 100644 index 00000000..3e30b5ac --- /dev/null +++ b/lib/sparopt/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "sparopt" +version = "0.1.0-alpha.1-dev" +authors = ["Tpt "] +license = "MIT OR Apache-2.0" +readme = "README.md" +keywords = ["SPARQL"] +repository = "https://github.com/oxigraph/oxigraph/tree/main/lib/sparopt" +homepage = "https://oxigraph.org/" +description = """ +A SPARQL optimizer +""" +edition = "2021" +rust-version = "1.60" + +[features] +default = [] +rdf-star = ["oxrdf/rdf-star", "spargebra/rdf-star"] +sep-0002 = ["spargebra/sep-0002"] +sep-0006 = ["spargebra/sep-0006"] + +[dependencies] +oxrdf = { version = "0.2.0-alpha.1-dev", path="../oxrdf" } +rand = "0.8" +spargebra = { version = "0.3.0-alpha.1-dev", path="../spargebra" } + +[package.metadata.docs.rs] +all-features = true diff --git a/lib/sparopt/README.md b/lib/sparopt/README.md new file mode 100644 index 00000000..7a0ac67b --- /dev/null +++ b/lib/sparopt/README.md @@ -0,0 +1,33 @@ +sparopt +======= + +[![Latest Version](https://img.shields.io/crates/v/sparopt.svg)](https://crates.io/crates/sparopt) +[![Released API docs](https://docs.rs/sparopt/badge.svg)](https://docs.rs/sparopt) +[![Crates.io downloads](https://img.shields.io/crates/d/sparopt)](https://crates.io/crates/sparopt) +[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) +[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) + +sparopt is a work in progress [SPARQL Query](https://www.w3.org/TR/sparql11-query/) optimizer. + +It relies on the output of [spargebra](https://crates.io/crates/spargebra). + +Support for [SPARQL-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#sparql-star) is also available behind the `rdf-star` feature. + +This crate is intended to be a building piece for SPARQL implementations in Rust like [Oxigraph](https://oxigraph.org). + + +## License + +This project is licensed under either of + +* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or + ``) +* MIT license ([LICENSE-MIT](../LICENSE-MIT) or + ``) + +at your option. + + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. diff --git a/lib/sparopt/src/algebra.rs b/lib/sparopt/src/algebra.rs new file mode 100644 index 00000000..65c4c618 --- /dev/null +++ b/lib/sparopt/src/algebra.rs @@ -0,0 +1,1712 @@ +//! [SPARQL 1.1 Query Algebra](https://www.w3.org/TR/sparql11-query/#sparqlQuery) representation. + +use oxrdf::vocab::xsd; +use rand::random; +use spargebra::algebra::{ + AggregateExpression as AlAggregateExpression, Expression as AlExpression, + GraphPattern as AlGraphPattern, OrderExpression as AlOrderExpression, +}; +pub use spargebra::algebra::{Function, PropertyPathExpression}; +use spargebra::term::{BlankNode, GroundSubject, TermPattern, TriplePattern}; +pub use spargebra::term::{ + GroundTerm, GroundTermPattern, Literal, NamedNode, NamedNodePattern, Variable, +}; +#[cfg(feature = "rdf-star")] +use spargebra::term::{GroundTriple, GroundTriplePattern}; +use std::collections::hash_map::DefaultHasher; +use std::collections::{HashMap, HashSet}; +use std::hash::{Hash, Hasher}; +use std::ops::{Add, BitAnd, BitOr, Div, Mul, Neg, Not, Sub}; + +/// An [expression](https://www.w3.org/TR/sparql11-query/#expressions). +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum Expression { + NamedNode(NamedNode), + Literal(Literal), + Variable(Variable), + /// [Logical-or](https://www.w3.org/TR/sparql11-query/#func-logical-or). + Or(Vec), + /// [Logical-and](https://www.w3.org/TR/sparql11-query/#func-logical-and). + And(Vec), + /// [RDFterm-equal](https://www.w3.org/TR/sparql11-query/#func-RDFterm-equal) and all the XSD equalities. + Equal(Box, Box), + /// [sameTerm](https://www.w3.org/TR/sparql11-query/#func-sameTerm). + SameTerm(Box, Box), + /// [op:numeric-greater-than](https://www.w3.org/TR/xpath-functions-31/#func-numeric-greater-than) and other XSD greater than operators. + Greater(Box, Box), + GreaterOrEqual(Box, Box), + /// [op:numeric-less-than](https://www.w3.org/TR/xpath-functions-31/#func-numeric-less-than) and other XSD greater than operators. + Less(Box, Box), + LessOrEqual(Box, Box), + /// [op:numeric-add](https://www.w3.org/TR/xpath-functions-31/#func-numeric-add) and other XSD additions. + Add(Box, Box), + /// [op:numeric-subtract](https://www.w3.org/TR/xpath-functions-31/#func-numeric-subtract) and other XSD subtractions. + Subtract(Box, Box), + /// [op:numeric-multiply](https://www.w3.org/TR/xpath-functions-31/#func-numeric-multiply) and other XSD multiplications. + Multiply(Box, Box), + /// [op:numeric-divide](https://www.w3.org/TR/xpath-functions-31/#func-numeric-divide) and other XSD divides. + Divide(Box, Box), + /// [op:numeric-unary-plus](https://www.w3.org/TR/xpath-functions-31/#func-numeric-unary-plus) and other XSD unary plus. + UnaryPlus(Box), + /// [op:numeric-unary-minus](https://www.w3.org/TR/xpath-functions-31/#func-numeric-unary-minus) and other XSD unary minus. + UnaryMinus(Box), + /// [fn:not](https://www.w3.org/TR/xpath-functions-31/#func-not). + Not(Box), + /// [EXISTS](https://www.w3.org/TR/sparql11-query/#func-filter-exists). + Exists(Box), + /// [BOUND](https://www.w3.org/TR/sparql11-query/#func-bound). + Bound(Variable), + /// [IF](https://www.w3.org/TR/sparql11-query/#func-if). + If(Box, Box, Box), + /// [COALESCE](https://www.w3.org/TR/sparql11-query/#func-coalesce). + Coalesce(Vec), + /// A regular function call. + FunctionCall(Function, Vec), +} + +impl Expression { + pub fn or_all(args: impl IntoIterator) -> Self { + let args = args.into_iter(); + let mut all = Vec::with_capacity(args.size_hint().0); + for arg in args { + if let Some(ebv) = arg.effective_boolean_value() { + if ebv { + return true.into(); + } + // We ignore false values + } else if let Self::Or(args) = arg { + all.extend(args); + } else { + all.push(arg); + } + } + match all.len() { + 0 => false.into(), + 1 => { + let result = all.pop().unwrap(); + if result.returns_boolean() { + result // It's already casted to boolean + } else { + Self::And(vec![result]) + } + } + _ => Self::Or(order_vec(all)), + } + } + + pub fn and_all(args: impl IntoIterator) -> Self { + let args = args.into_iter(); + let mut all = Vec::with_capacity(args.size_hint().0); + for arg in args { + if let Some(ebv) = arg.effective_boolean_value() { + if !ebv { + return false.into(); + } + // We ignore true values + } else if let Self::And(args) = arg { + all.extend(args); + } else { + all.push(arg); + } + } + match all.len() { + 0 => true.into(), + 1 => { + let result = all.pop().unwrap(); + if result.returns_boolean() { + result + } else { + Self::And(vec![result]) + } + } + _ => Self::And(order_vec(all)), + } + } + + pub fn equal(left: Self, right: Self) -> Self { + match (left, right) { + (Self::NamedNode(left), Self::NamedNode(right)) => (left == right).into(), + (Self::Literal(left), Self::Literal(right)) if left == right => true.into(), + (left, right) => { + let (left, right) = order_pair(left, right); + Self::Equal(Box::new(left), Box::new(right)) + } + } + } + + pub fn same_term(left: Self, right: Self) -> Self { + match (left, right) { + (Self::NamedNode(left), Self::NamedNode(right)) => (left == right).into(), + (Self::Literal(left), Self::Literal(right)) if left == right => true.into(), + (left, right) => { + let (left, right) = order_pair(left, right); + Self::SameTerm(Box::new(left), Box::new(right)) + } + } + } + + pub fn greater(left: Self, right: Self) -> Self { + Self::Greater(Box::new(left), Box::new(right)) + } + + pub fn greater_or_equal(left: Self, right: Self) -> Self { + Self::GreaterOrEqual(Box::new(left), Box::new(right)) + } + + pub fn less(left: Self, right: Self) -> Self { + Self::Less(Box::new(left), Box::new(right)) + } + + pub fn less_or_equal(left: Self, right: Self) -> Self { + Self::LessOrEqual(Box::new(left), Box::new(right)) + } + + pub fn unary_plus(inner: Self) -> Self { + Self::UnaryPlus(Box::new(inner)) + } + + pub fn exists(inner: GraphPattern) -> Self { + if inner.is_empty() { + return false.into(); + } + if inner.is_empty_singleton() { + return true.into(); + } + Self::Exists(Box::new(inner)) + } + + pub fn if_cond(cond: Self, then: Self, els: Self) -> Self { + match cond.effective_boolean_value() { + Some(true) => then, + Some(false) => els, + None => Self::If(Box::new(cond), Box::new(then), Box::new(els)), + } + } + + pub fn coalesce(args: Vec) -> Self { + Self::Coalesce(args) + } + + pub fn call(name: Function, args: Vec) -> Self { + Self::FunctionCall(name, args) + } + + pub fn effective_boolean_value(&self) -> Option { + if let Self::Literal(literal) = self { + match literal.datatype() { + xsd::BOOLEAN => match literal.value() { + "true" | "1" => Some(true), + "false" | "0" => Some(false), + _ => None, //TODO + }, + xsd::STRING => Some(!literal.value().is_empty()), + _ => None, //TODO + } + } else { + None + } + } + + pub fn used_variables(&self) -> HashSet<&Variable> { + let mut variables = HashSet::new(); + self.lookup_used_variables(&mut |v| { + variables.insert(v); + }); + variables + } + + pub fn lookup_used_variables<'a>(&'a self, callback: &mut impl FnMut(&'a Variable)) { + match self { + Self::NamedNode(_) | Self::Literal(_) => {} + Self::Variable(v) | Self::Bound(v) => callback(v), + Self::Or(inner) + | Self::And(inner) + | Self::Coalesce(inner) + | Self::FunctionCall(_, inner) => { + for i in inner { + i.lookup_used_variables(callback); + } + } + Self::Equal(a, b) + | Self::SameTerm(a, b) + | Self::Greater(a, b) + | Self::GreaterOrEqual(a, b) + | Self::Less(a, b) + | Self::LessOrEqual(a, b) + | Self::Add(a, b) + | Self::Subtract(a, b) + | Self::Multiply(a, b) + | Self::Divide(a, b) => { + a.lookup_used_variables(callback); + b.lookup_used_variables(callback); + } + Self::UnaryPlus(i) | Self::UnaryMinus(i) | Self::Not(i) => { + i.lookup_used_variables(callback) + } + Self::Exists(e) => e.lookup_used_variables(callback), + Self::If(a, b, c) => { + a.lookup_used_variables(callback); + b.lookup_used_variables(callback); + c.lookup_used_variables(callback); + } + } + } + + fn from_sparql_algebra( + expression: &AlExpression, + graph_name: Option<&NamedNodePattern>, + ) -> Self { + match expression { + AlExpression::NamedNode(node) => Self::NamedNode(node.clone()), + AlExpression::Literal(literal) => Self::Literal(literal.clone()), + AlExpression::Variable(variable) => Self::Variable(variable.clone()), + AlExpression::Or(left, right) => Self::Or(vec![ + Self::from_sparql_algebra(left, graph_name), + Self::from_sparql_algebra(right, graph_name), + ]), + AlExpression::And(left, right) => Self::And(vec![ + Self::from_sparql_algebra(left, graph_name), + Self::from_sparql_algebra(right, graph_name), + ]), + AlExpression::Equal(left, right) => Self::Equal( + Box::new(Self::from_sparql_algebra(left, graph_name)), + Box::new(Self::from_sparql_algebra(right, graph_name)), + ), + AlExpression::SameTerm(left, right) => Self::SameTerm( + Box::new(Self::from_sparql_algebra(left, graph_name)), + Box::new(Self::from_sparql_algebra(right, graph_name)), + ), + AlExpression::Greater(left, right) => Self::Greater( + Box::new(Self::from_sparql_algebra(left, graph_name)), + Box::new(Self::from_sparql_algebra(right, graph_name)), + ), + AlExpression::GreaterOrEqual(left, right) => Self::GreaterOrEqual( + Box::new(Self::from_sparql_algebra(left, graph_name)), + Box::new(Self::from_sparql_algebra(right, graph_name)), + ), + AlExpression::Less(left, right) => Self::Less( + Box::new(Self::from_sparql_algebra(left, graph_name)), + Box::new(Self::from_sparql_algebra(right, graph_name)), + ), + AlExpression::LessOrEqual(left, right) => Self::LessOrEqual( + Box::new(Self::from_sparql_algebra(left, graph_name)), + Box::new(Self::from_sparql_algebra(right, graph_name)), + ), + AlExpression::In(left, right) => { + let left = Self::from_sparql_algebra(left, graph_name); + match right.len() { + 0 => Self::if_cond(left, false.into(), false.into()), + 1 => Self::Equal( + Box::new(left), + Box::new(Self::from_sparql_algebra(&right[0], graph_name)), + ), + _ => Self::Or( + right + .iter() + .map(|e| { + Self::Equal( + Box::new(left.clone()), + Box::new(Self::from_sparql_algebra(e, graph_name)), + ) + }) + .collect(), + ), + } + } + AlExpression::Add(left, right) => Self::Add( + Box::new(Self::from_sparql_algebra(left, graph_name)), + Box::new(Self::from_sparql_algebra(right, graph_name)), + ), + AlExpression::Subtract(left, right) => Self::Subtract( + Box::new(Self::from_sparql_algebra(left, graph_name)), + Box::new(Self::from_sparql_algebra(right, graph_name)), + ), + AlExpression::Multiply(left, right) => Self::Multiply( + Box::new(Self::from_sparql_algebra(left, graph_name)), + Box::new(Self::from_sparql_algebra(right, graph_name)), + ), + AlExpression::Divide(left, right) => Self::Divide( + Box::new(Self::from_sparql_algebra(left, graph_name)), + Box::new(Self::from_sparql_algebra(right, graph_name)), + ), + AlExpression::UnaryPlus(inner) => { + Self::UnaryPlus(Box::new(Self::from_sparql_algebra(inner, graph_name))) + } + AlExpression::UnaryMinus(inner) => { + Self::UnaryMinus(Box::new(Self::from_sparql_algebra(inner, graph_name))) + } + AlExpression::Not(inner) => { + Self::Not(Box::new(Self::from_sparql_algebra(inner, graph_name))) + } + AlExpression::Exists(inner) => Self::Exists(Box::new( + GraphPattern::from_sparql_algebra(inner, graph_name, &mut HashMap::new()), + )), + AlExpression::Bound(variable) => Self::Bound(variable.clone()), + AlExpression::If(cond, yes, no) => Self::If( + Box::new(Self::from_sparql_algebra(cond, graph_name)), + Box::new(Self::from_sparql_algebra(yes, graph_name)), + Box::new(Self::from_sparql_algebra(no, graph_name)), + ), + AlExpression::Coalesce(inner) => Self::Coalesce( + inner + .iter() + .map(|e| Self::from_sparql_algebra(e, graph_name)) + .collect(), + ), + AlExpression::FunctionCall(name, args) => Self::FunctionCall( + name.clone(), + args.iter() + .map(|e| Self::from_sparql_algebra(e, graph_name)) + .collect(), + ), + } + } + + fn returns_boolean(&self) -> bool { + match self { + Expression::Or(_) + | Expression::And(_) + | Expression::Equal(_, _) + | Expression::SameTerm(_, _) + | Expression::Greater(_, _) + | Expression::GreaterOrEqual(_, _) + | Expression::Less(_, _) + | Expression::LessOrEqual(_, _) + | Expression::Not(_) + | Expression::Exists(_) + | Expression::Bound(_) + | Expression::FunctionCall( + Function::IsBlank | Function::IsIri | Function::IsLiteral | Function::IsNumeric, + _, + ) => true, + #[cfg(feature = "rdf-star")] + Expression::FunctionCall(Function::IsTriple, _) => true, + Expression::Literal(literal) => literal.datatype() == xsd::BOOLEAN, + Expression::If(_, a, b) => a.returns_boolean() && b.returns_boolean(), + _ => false, + } + } +} + +impl From for Expression { + fn from(value: NamedNode) -> Self { + Self::NamedNode(value) + } +} + +impl From for Expression { + fn from(value: Literal) -> Self { + Self::Literal(value) + } +} + +impl From for Expression { + fn from(value: GroundSubject) -> Self { + match value { + GroundSubject::NamedNode(value) => value.into(), + #[cfg(feature = "rdf-star")] + GroundSubject::Triple(value) => (*value).into(), + } + } +} + +impl From for Expression { + fn from(value: GroundTerm) -> Self { + match value { + GroundTerm::NamedNode(value) => value.into(), + GroundTerm::Literal(value) => value.into(), + #[cfg(feature = "rdf-star")] + GroundTerm::Triple(value) => (*value).into(), + } + } +} + +impl From for Expression { + fn from(value: NamedNodePattern) -> Self { + match value { + NamedNodePattern::NamedNode(value) => value.into(), + NamedNodePattern::Variable(variable) => variable.into(), + } + } +} + +impl From for Expression { + fn from(value: GroundTermPattern) -> Self { + match value { + GroundTermPattern::NamedNode(value) => value.into(), + GroundTermPattern::Literal(value) => value.into(), + #[cfg(feature = "rdf-star")] + GroundTermPattern::Triple(value) => (*value).into(), + GroundTermPattern::Variable(variable) => variable.into(), + } + } +} + +#[cfg(feature = "rdf-star")] +impl From for Expression { + fn from(value: GroundTriple) -> Self { + Self::FunctionCall( + Function::Triple, + vec![ + value.subject.into(), + value.predicate.into(), + value.object.into(), + ], + ) + } +} + +#[cfg(feature = "rdf-star")] +impl From for Expression { + fn from(value: GroundTriplePattern) -> Self { + Self::FunctionCall( + Function::Triple, + vec![ + value.subject.into(), + value.predicate.into(), + value.object.into(), + ], + ) + } +} + +impl From for Expression { + fn from(value: Variable) -> Self { + Self::Variable(value) + } +} + +impl From for Expression { + fn from(value: bool) -> Self { + Literal::from(value).into() + } +} + +impl From<&Expression> for AlExpression { + fn from(expression: &Expression) -> Self { + match expression { + Expression::NamedNode(node) => Self::NamedNode(node.clone()), + Expression::Literal(literal) => Self::Literal(literal.clone()), + Expression::Variable(variable) => Self::Variable(variable.clone()), + Expression::Or(inner) => inner + .iter() + .map(Into::into) + .reduce(|a, b| Self::Or(Box::new(a), Box::new(b))) + .unwrap_or_else(|| Literal::from(false).into()), + Expression::And(inner) => inner + .iter() + .map(Into::into) + .reduce(|a, b| Self::And(Box::new(a), Box::new(b))) + .unwrap_or_else(|| Literal::from(true).into()), + Expression::Equal(left, right) => Self::Equal( + Box::new(left.as_ref().into()), + Box::new(right.as_ref().into()), + ), + Expression::SameTerm(left, right) => Self::SameTerm( + Box::new(left.as_ref().into()), + Box::new(right.as_ref().into()), + ), + Expression::Greater(left, right) => Self::Greater( + Box::new(left.as_ref().into()), + Box::new(right.as_ref().into()), + ), + Expression::GreaterOrEqual(left, right) => Self::GreaterOrEqual( + Box::new(left.as_ref().into()), + Box::new(right.as_ref().into()), + ), + Expression::Less(left, right) => Self::Less( + Box::new(left.as_ref().into()), + Box::new(right.as_ref().into()), + ), + Expression::LessOrEqual(left, right) => Self::LessOrEqual( + Box::new(left.as_ref().into()), + Box::new(right.as_ref().into()), + ), + Expression::Add(left, right) => Self::Add( + Box::new(left.as_ref().into()), + Box::new(right.as_ref().into()), + ), + Expression::Subtract(left, right) => Self::Subtract( + Box::new(left.as_ref().into()), + Box::new(right.as_ref().into()), + ), + Expression::Multiply(left, right) => Self::Multiply( + Box::new(left.as_ref().into()), + Box::new(right.as_ref().into()), + ), + Expression::Divide(left, right) => Self::Divide( + Box::new(left.as_ref().into()), + Box::new(right.as_ref().into()), + ), + Expression::UnaryPlus(inner) => Self::UnaryPlus(Box::new(inner.as_ref().into())), + Expression::UnaryMinus(inner) => Self::UnaryMinus(Box::new(inner.as_ref().into())), + Expression::Not(inner) => Self::Not(Box::new(inner.as_ref().into())), + Expression::Exists(inner) => Self::Exists(Box::new(inner.as_ref().into())), + Expression::Bound(variable) => Self::Bound(variable.clone()), + Expression::If(cond, yes, no) => Self::If( + Box::new(cond.as_ref().into()), + Box::new(yes.as_ref().into()), + Box::new(no.as_ref().into()), + ), + Expression::Coalesce(inner) => Self::Coalesce(inner.iter().map(Into::into).collect()), + Expression::FunctionCall(name, args) => { + Self::FunctionCall(name.clone(), args.iter().map(Into::into).collect()) + } + } + } +} + +impl BitAnd for Expression { + type Output = Self; + + fn bitand(self, rhs: Self) -> Self::Output { + Self::and_all([self, rhs]) + } +} + +impl BitOr for Expression { + type Output = Self; + + fn bitor(self, rhs: Self) -> Self { + Self::or_all([self, rhs]) + } +} + +impl Not for Expression { + type Output = Self; + + fn not(self) -> Self { + if let Some(v) = self.effective_boolean_value() { + (!v).into() + } else if let Self::Not(v) = self { + if v.returns_boolean() { + *v + } else { + Self::And(vec![*v]) + } + } else { + Self::Not(Box::new(self)) + } + } +} + +impl Add for Expression { + type Output = Self; + + fn add(self, rhs: Self) -> Self { + let (left, right) = order_pair(self, rhs); + Self::Add(Box::new(left), Box::new(right)) + } +} + +impl Sub for Expression { + type Output = Self; + + fn sub(self, rhs: Self) -> Self { + Self::Subtract(Box::new(self), Box::new(rhs)) + } +} + +impl Mul for Expression { + type Output = Self; + + fn mul(self, rhs: Self) -> Self { + let (left, right) = order_pair(self, rhs); + Self::Multiply(Box::new(left), Box::new(right)) + } +} + +impl Div for Expression { + type Output = Self; + + fn div(self, rhs: Self) -> Self { + Self::Divide(Box::new(self), Box::new(rhs)) + } +} + +impl Neg for Expression { + type Output = Self; + + fn neg(self) -> Self { + Self::UnaryMinus(Box::new(self)) + } +} + +/// A SPARQL query [graph pattern](https://www.w3.org/TR/sparql11-query/#sparqlQuery). +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum GraphPattern { + /// A [basic graph pattern](https://www.w3.org/TR/sparql11-query/#defn_BasicGraphPattern). + QuadPattern { + subject: GroundTermPattern, + predicate: NamedNodePattern, + object: GroundTermPattern, + graph_name: Option, + }, + /// A [property path pattern](https://www.w3.org/TR/sparql11-query/#defn_evalPP_predicate). + Path { + subject: GroundTermPattern, + path: PropertyPathExpression, + object: GroundTermPattern, + graph_name: Option, + }, + /// [Join](https://www.w3.org/TR/sparql11-query/#defn_algJoin). + Join { + left: Box, + right: Box, + algorithm: JoinAlgorithm, + }, + /// [LeftJoin](https://www.w3.org/TR/sparql11-query/#defn_algLeftJoin). + LeftJoin { + left: Box, + right: Box, + expression: Expression, + }, + /// Lateral join i.e. evaluate right for all result row of left + #[cfg(feature = "sep-0006")] + Lateral { left: Box, right: Box }, + /// [Filter](https://www.w3.org/TR/sparql11-query/#defn_algFilter). + Filter { + expression: Expression, + inner: Box, + }, + /// [Union](https://www.w3.org/TR/sparql11-query/#defn_algUnion). + Union { inner: Vec }, + /// [Extend](https://www.w3.org/TR/sparql11-query/#defn_extend). + Extend { + inner: Box, + variable: Variable, + expression: Expression, + }, + /// [Minus](https://www.w3.org/TR/sparql11-query/#defn_algMinus). + Minus { left: Box, right: Box }, + /// A table used to provide inline values + Values { + variables: Vec, + bindings: Vec>>, + }, + /// [OrderBy](https://www.w3.org/TR/sparql11-query/#defn_algOrdered). + OrderBy { + inner: Box, + expression: Vec, + }, + /// [Project](https://www.w3.org/TR/sparql11-query/#defn_algProjection). + Project { + inner: Box, + variables: Vec, + }, + /// [Distinct](https://www.w3.org/TR/sparql11-query/#defn_algDistinct). + Distinct { inner: Box }, + /// [Reduced](https://www.w3.org/TR/sparql11-query/#defn_algReduced). + Reduced { inner: Box }, + /// [Slice](https://www.w3.org/TR/sparql11-query/#defn_algSlice). + Slice { + inner: Box, + start: usize, + length: Option, + }, + /// [Group](https://www.w3.org/TR/sparql11-query/#aggregateAlgebra). + Group { + inner: Box, + variables: Vec, + aggregates: Vec<(Variable, AggregateExpression)>, + }, + /// [Service](https://www.w3.org/TR/sparql11-federated-query/#defn_evalService). + Service { + name: NamedNodePattern, + inner: Box, + silent: bool, + }, +} + +impl GraphPattern { + pub fn empty() -> Self { + Self::Values { + variables: Vec::new(), + bindings: Vec::new(), + } + } + + /// Check if the pattern is the empty table + fn is_empty(&self) -> bool { + if let Self::Values { bindings, .. } = self { + bindings.is_empty() + } else { + false + } + } + + pub fn empty_singleton() -> Self { + Self::Values { + variables: Vec::new(), + bindings: vec![Vec::new()], + } + } + + pub fn is_empty_singleton(&self) -> bool { + if let Self::Values { bindings, .. } = self { + bindings.len() == 1 && bindings.iter().all(|b| b.iter().all(Option::is_none)) + } else { + false + } + } + + pub fn join(left: Self, right: Self, algorithm: JoinAlgorithm) -> Self { + if left.is_empty() || right.is_empty() { + return Self::empty(); + } + if left.is_empty_singleton() { + return right; + } + if right.is_empty_singleton() { + return left; + } + Self::Join { + left: Box::new(left), + right: Box::new(right), + algorithm, + } + } + + #[cfg(feature = "sep-0006")] + pub fn lateral(left: Self, right: Self) -> Self { + if left.is_empty() || right.is_empty() { + return Self::empty(); + } + if left.is_empty_singleton() { + return right; + } + if right.is_empty_singleton() { + return left; + } + Self::Lateral { + left: Box::new(left), + right: Box::new(right), + } + } + + pub fn left_join(left: Self, right: Self, expression: Expression) -> Self { + let expression_ebv = expression.effective_boolean_value(); + if left.is_empty() + || right.is_empty() + || right.is_empty_singleton() + || expression_ebv == Some(false) + { + return left; + } + Self::LeftJoin { + left: Box::new(left), + right: Box::new(right), + expression: if expression_ebv == Some(true) { + true.into() + } else { + expression + }, + } + } + + pub fn minus(left: Self, right: Self) -> Self { + if left.is_empty() { + return Self::empty(); + } + if right.is_empty() { + return left; + } + Self::Minus { + left: Box::new(left), + right: Box::new(right), + } + } + + pub fn union(left: Self, right: Self) -> Self { + Self::union_all([left, right]) + } + + pub fn union_all(args: impl IntoIterator) -> Self { + let args = args.into_iter(); + let mut all = Vec::with_capacity(args.size_hint().0); + for arg in args { + if arg.is_empty() { + continue; + } + if let Self::Union { inner } = arg { + all.extend(inner); + } else { + all.push(arg); + } + } + if all.is_empty() { + GraphPattern::empty() + } else { + Self::Union { + inner: order_vec(all), + } + } + } + + pub fn filter(inner: Self, expression: Expression) -> Self { + if inner.is_empty() { + return Self::empty(); + } + // We unwrap singleton And + let expression = match expression { + Expression::And(mut l) if l.len() == 1 => l.pop().unwrap(), + e => e, + }; + match expression.effective_boolean_value() { + Some(true) => inner, + Some(false) => Self::empty(), + None => match inner { + Self::Filter { + inner, + expression: e2, + } => Self::Filter { + inner, + expression: expression & e2, + }, + inner => Self::Filter { + inner: Box::new(inner), + expression, + }, + }, + } + } + + pub fn extend(inner: Self, variable: Variable, expression: Expression) -> Self { + if inner.is_empty() { + return Self::empty(); + } + Self::Extend { + inner: Box::new(inner), + variable, + expression, + } + } + + pub fn values( + mut variables: Vec, + mut bindings: Vec>>, + ) -> Self { + let empty_rows = (0..variables.len()) + .filter(|row| !bindings.iter().any(|binding| binding.get(*row).is_some())) + .collect::>(); + if !empty_rows.is_empty() { + // We remove empty rows + variables = variables + .into_iter() + .enumerate() + .filter_map(|(i, v)| { + if empty_rows.contains(&i) { + None + } else { + Some(v) + } + }) + .collect(); + bindings = bindings + .into_iter() + .map(|binding| { + binding + .into_iter() + .enumerate() + .filter_map(|(i, v)| { + if empty_rows.contains(&i) { + None + } else { + Some(v) + } + }) + .collect() + }) + .collect(); + } + Self::Values { + variables, + bindings, + } + } + + pub fn order_by(inner: Self, expression: Vec) -> Self { + if inner.is_empty() { + return Self::empty(); + } + if expression.is_empty() { + return inner; + } + Self::OrderBy { + inner: Box::new(inner), + expression, + } + } + + pub fn project(inner: Self, variables: Vec) -> Self { + Self::Project { + inner: Box::new(inner), + variables, + } + } + + pub fn distinct(inner: Self) -> Self { + if inner.is_empty() { + return Self::empty(); + } + Self::Distinct { + inner: Box::new(inner), + } + } + + pub fn reduced(inner: Self) -> Self { + if inner.is_empty() { + return Self::empty(); + } + Self::Reduced { + inner: Box::new(inner), + } + } + + pub fn slice(inner: Self, start: usize, length: Option) -> Self { + if inner.is_empty() { + return Self::empty(); + } + if start == 0 && length.is_none() { + return inner; + } + Self::Slice { + inner: Box::new(inner), + start, + length, + } + } + + pub fn group( + inner: Self, + variables: Vec, + aggregates: Vec<(Variable, AggregateExpression)>, + ) -> Self { + if inner.is_empty() { + return Self::empty(); + } + Self::Group { + inner: Box::new(inner), + variables, + aggregates, + } + } + + pub fn service(inner: Self, name: NamedNodePattern, silent: bool) -> Self { + if inner.is_empty() { + return Self::empty(); + } + Self::Service { + inner: Box::new(inner), + name, + silent, + } + } + + pub fn lookup_used_variables<'a>(&'a self, callback: &mut impl FnMut(&'a Variable)) { + match self { + Self::Values { variables, .. } | Self::Project { variables, .. } => { + for v in variables { + callback(v); + } + } + Self::QuadPattern { + subject, + predicate, + object, + graph_name, + } => { + lookup_term_pattern_variables(subject, callback); + if let NamedNodePattern::Variable(v) = predicate { + callback(v); + } + lookup_term_pattern_variables(object, callback); + if let Some(NamedNodePattern::Variable(v)) = graph_name { + callback(v); + } + } + Self::Path { + subject, + object, + graph_name, + .. + } => { + lookup_term_pattern_variables(subject, callback); + lookup_term_pattern_variables(object, callback); + if let Some(NamedNodePattern::Variable(v)) = graph_name { + callback(v); + } + } + Self::Filter { inner, expression } => { + expression.lookup_used_variables(callback); + inner.lookup_used_variables(callback); + } + Self::Union { inner } => { + for child in inner { + child.lookup_used_variables(callback); + } + } + Self::Join { left, right, .. } | Self::Minus { left, right } => { + left.lookup_used_variables(callback); + right.lookup_used_variables(callback); + } + #[cfg(feature = "sep-0006")] + Self::Lateral { left, right } => { + left.lookup_used_variables(callback); + right.lookup_used_variables(callback); + } + Self::LeftJoin { + left, + right, + expression, + } => { + expression.lookup_used_variables(callback); + left.lookup_used_variables(callback); + right.lookup_used_variables(callback); + } + Self::Extend { + inner, + variable, + expression, + } => { + callback(variable); + expression.lookup_used_variables(callback); + inner.lookup_used_variables(callback); + } + Self::OrderBy { inner, .. } + | Self::Distinct { inner } + | Self::Reduced { inner } + | Self::Slice { inner, .. } => inner.lookup_used_variables(callback), + Self::Service { inner, name, .. } => { + if let NamedNodePattern::Variable(v) = name { + callback(v); + } + inner.lookup_used_variables(callback); + } + Self::Group { + variables, + aggregates, + .. + } => { + for v in variables { + callback(v); + } + for (v, _) in aggregates { + callback(v); + } + } + } + } + + fn from_sparql_algebra( + pattern: &AlGraphPattern, + graph_name: Option<&NamedNodePattern>, + blank_nodes: &mut HashMap, + ) -> Self { + match pattern { + AlGraphPattern::Bgp { patterns } => patterns + .iter() + .map(|p| { + let (subject, predicate, object) = + Self::triple_pattern_from_algebra(p, blank_nodes); + Self::QuadPattern { + subject, + predicate, + object, + graph_name: graph_name.cloned(), + } + }) + .reduce(|a, b| Self::Join { + left: Box::new(a), + right: Box::new(b), + algorithm: JoinAlgorithm::default(), + }) + .unwrap_or_else(Self::empty_singleton), + AlGraphPattern::Path { + subject, + path, + object, + } => Self::Path { + subject: Self::term_pattern_from_algebra(subject, blank_nodes), + path: path.clone(), + object: Self::term_pattern_from_algebra(object, blank_nodes), + graph_name: graph_name.cloned(), + }, + AlGraphPattern::Join { left, right } => Self::Join { + left: Box::new(Self::from_sparql_algebra(left, graph_name, blank_nodes)), + right: Box::new(Self::from_sparql_algebra(right, graph_name, blank_nodes)), + algorithm: JoinAlgorithm::default(), + }, + AlGraphPattern::LeftJoin { + left, + right, + expression, + } => Self::LeftJoin { + left: Box::new(Self::from_sparql_algebra(left, graph_name, blank_nodes)), + right: Box::new(Self::from_sparql_algebra(right, graph_name, blank_nodes)), + expression: expression.as_ref().map_or_else( + || true.into(), + |e| Expression::from_sparql_algebra(e, graph_name), + ), + }, + #[cfg(feature = "sep-0006")] + AlGraphPattern::Lateral { left, right } => Self::Lateral { + left: Box::new(Self::from_sparql_algebra(left, graph_name, blank_nodes)), + right: Box::new(Self::from_sparql_algebra(right, graph_name, blank_nodes)), + }, + AlGraphPattern::Filter { inner, expr } => Self::Filter { + inner: Box::new(Self::from_sparql_algebra(inner, graph_name, blank_nodes)), + expression: Expression::from_sparql_algebra(expr, graph_name), + }, + AlGraphPattern::Union { left, right } => Self::Union { + inner: vec![ + Self::from_sparql_algebra(left, graph_name, blank_nodes), + Self::from_sparql_algebra(right, graph_name, blank_nodes), + ], + }, + AlGraphPattern::Graph { inner, name } => { + Self::from_sparql_algebra(inner, Some(name), blank_nodes) + } + AlGraphPattern::Extend { + inner, + expression, + variable, + } => Self::Extend { + inner: Box::new(Self::from_sparql_algebra(inner, graph_name, blank_nodes)), + expression: Expression::from_sparql_algebra(expression, graph_name), + variable: variable.clone(), + }, + AlGraphPattern::Minus { left, right } => Self::Minus { + left: Box::new(Self::from_sparql_algebra(left, graph_name, blank_nodes)), + right: Box::new(Self::from_sparql_algebra(right, graph_name, blank_nodes)), + }, + AlGraphPattern::Values { + variables, + bindings, + } => Self::Values { + variables: variables.clone(), + bindings: bindings.clone(), + }, + AlGraphPattern::OrderBy { inner, expression } => Self::OrderBy { + inner: Box::new(Self::from_sparql_algebra(inner, graph_name, blank_nodes)), + expression: expression + .iter() + .map(|e| OrderExpression::from_sparql_algebra(e, graph_name)) + .collect(), + }, + AlGraphPattern::Project { inner, variables } => { + let graph_name = if let Some(NamedNodePattern::Variable(graph_name)) = graph_name { + Some(NamedNodePattern::Variable( + if variables.contains(graph_name) { + graph_name.clone() + } else { + new_var() + }, + )) + } else { + graph_name.cloned() + }; + Self::Project { + inner: Box::new(Self::from_sparql_algebra( + inner, + graph_name.as_ref(), + &mut HashMap::new(), + )), + variables: variables.clone(), + } + } + AlGraphPattern::Distinct { inner } => Self::Distinct { + inner: Box::new(Self::from_sparql_algebra(inner, graph_name, blank_nodes)), + }, + AlGraphPattern::Reduced { inner } => Self::Distinct { + inner: Box::new(Self::from_sparql_algebra(inner, graph_name, blank_nodes)), + }, + AlGraphPattern::Slice { + inner, + start, + length, + } => Self::Slice { + inner: Box::new(Self::from_sparql_algebra(inner, graph_name, blank_nodes)), + start: *start, + length: *length, + }, + AlGraphPattern::Group { + inner, + variables, + aggregates, + } => Self::Group { + inner: Box::new(Self::from_sparql_algebra(inner, graph_name, blank_nodes)), + variables: variables.clone(), + aggregates: aggregates + .iter() + .map(|(var, expr)| { + ( + var.clone(), + AggregateExpression::from_sparql_algebra(expr, graph_name), + ) + }) + .collect(), + }, + AlGraphPattern::Service { + inner, + name, + silent, + } => Self::Service { + inner: Box::new(Self::from_sparql_algebra(inner, graph_name, blank_nodes)), + name: name.clone(), + silent: *silent, + }, + } + } + + fn triple_pattern_from_algebra( + pattern: &TriplePattern, + blank_nodes: &mut HashMap, + ) -> (GroundTermPattern, NamedNodePattern, GroundTermPattern) { + ( + Self::term_pattern_from_algebra(&pattern.subject, blank_nodes), + pattern.predicate.clone(), + Self::term_pattern_from_algebra(&pattern.object, blank_nodes), + ) + } + + fn term_pattern_from_algebra( + pattern: &TermPattern, + blank_nodes: &mut HashMap, + ) -> GroundTermPattern { + match pattern { + TermPattern::NamedNode(node) => node.clone().into(), + TermPattern::BlankNode(node) => blank_nodes + .entry(node.clone()) + .or_insert_with(new_var) + .clone() + .into(), + TermPattern::Literal(literal) => literal.clone().into(), + #[cfg(feature = "rdf-star")] + TermPattern::Triple(pattern) => { + let (subject, predicate, object) = + Self::triple_pattern_from_algebra(pattern, blank_nodes); + GroundTriplePattern { + subject, + predicate, + object, + } + .into() + } + TermPattern::Variable(variable) => variable.clone().into(), + } + } +} + +impl From<&AlGraphPattern> for GraphPattern { + fn from(pattern: &AlGraphPattern) -> Self { + Self::from_sparql_algebra(pattern, None, &mut HashMap::new()) + } +} + +impl From<&GraphPattern> for AlGraphPattern { + fn from(pattern: &GraphPattern) -> Self { + match pattern { + GraphPattern::QuadPattern { + subject, + predicate, + object, + graph_name, + } => { + let pattern = Self::Bgp { + patterns: vec![TriplePattern { + subject: subject.clone().into(), + predicate: predicate.clone(), + object: object.clone().into(), + }], + }; + if let Some(graph_name) = graph_name { + Self::Graph { + inner: Box::new(pattern), + name: graph_name.clone(), + } + } else { + pattern + } + } + GraphPattern::Path { + subject, + path, + object, + graph_name, + } => { + let pattern = Self::Path { + subject: subject.clone().into(), + path: path.clone(), + object: object.clone().into(), + }; + if let Some(graph_name) = graph_name { + Self::Graph { + inner: Box::new(pattern), + name: graph_name.clone(), + } + } else { + pattern + } + } + GraphPattern::Join { left, right, .. } => { + match (left.as_ref().into(), right.as_ref().into()) { + (Self::Bgp { patterns: mut left }, Self::Bgp { patterns: right }) => { + left.extend(right); + Self::Bgp { patterns: left } + } + (left, right) => Self::Join { + left: Box::new(left), + right: Box::new(right), + }, + } + } + GraphPattern::LeftJoin { + left, + right, + expression, + } => { + let empty_expr = if let Expression::Literal(l) = expression { + l.datatype() == xsd::BOOLEAN && l.value() == "true" + } else { + false + }; + Self::LeftJoin { + left: Box::new(left.as_ref().into()), + right: Box::new(right.as_ref().into()), + expression: if empty_expr { + None + } else { + Some(expression.into()) + }, + } + } + #[cfg(feature = "sep-0006")] + GraphPattern::Lateral { left, right } => { + match (left.as_ref().into(), right.as_ref().into()) { + (Self::Bgp { patterns: mut left }, Self::Bgp { patterns: right }) => { + left.extend(right); + Self::Bgp { patterns: left } + } + (left, right) => Self::Lateral { + left: Box::new(left), + right: Box::new(right), + }, + } + } + GraphPattern::Filter { inner, expression } => Self::Filter { + inner: Box::new(inner.as_ref().into()), + expr: expression.into(), + }, + GraphPattern::Union { inner } => inner + .iter() + .map(Into::into) + .reduce(|a, b| Self::Union { + left: Box::new(a), + right: Box::new(b), + }) + .unwrap_or_else(|| Self::Values { + variables: Vec::new(), + bindings: Vec::new(), + }), + GraphPattern::Extend { + inner, + expression, + variable, + } => Self::Extend { + inner: Box::new(inner.as_ref().into()), + expression: expression.into(), + variable: variable.clone(), + }, + GraphPattern::Minus { left, right } => Self::Minus { + left: Box::new(left.as_ref().into()), + right: Box::new(right.as_ref().into()), + }, + GraphPattern::Values { + variables, + bindings, + } => Self::Values { + variables: variables.clone(), + bindings: bindings.clone(), + }, + GraphPattern::OrderBy { inner, expression } => Self::OrderBy { + inner: Box::new(inner.as_ref().into()), + expression: expression.iter().map(Into::into).collect(), + }, + GraphPattern::Project { inner, variables } => Self::Project { + inner: Box::new(inner.as_ref().into()), + variables: variables.clone(), + }, + GraphPattern::Distinct { inner } => Self::Distinct { + inner: Box::new(inner.as_ref().into()), + }, + GraphPattern::Reduced { inner } => Self::Distinct { + inner: Box::new(inner.as_ref().into()), + }, + GraphPattern::Slice { + inner, + start, + length, + } => Self::Slice { + inner: Box::new(inner.as_ref().into()), + start: *start, + length: *length, + }, + GraphPattern::Group { + inner, + variables, + aggregates, + } => Self::Group { + inner: Box::new(inner.as_ref().into()), + variables: variables.clone(), + aggregates: aggregates + .iter() + .map(|(var, expr)| (var.clone(), expr.into())) + .collect(), + }, + GraphPattern::Service { + inner, + name, + silent, + } => Self::Service { + inner: Box::new(inner.as_ref().into()), + name: name.clone(), + silent: *silent, + }, + } + } +} + +/// The join algorithm used (c.f. [`GraphPattern::Join`]). +#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] +pub enum JoinAlgorithm { + HashBuildLeftProbeRight, +} + +impl Default for JoinAlgorithm { + fn default() -> Self { + Self::HashBuildLeftProbeRight + } +} + +/// A set function used in aggregates (c.f. [`GraphPattern::Group`]). +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum AggregateExpression { + /// [Count](https://www.w3.org/TR/sparql11-query/#defn_aggCount). + Count { + expr: Option>, + distinct: bool, + }, + /// [Sum](https://www.w3.org/TR/sparql11-query/#defn_aggSum). + Sum { + expr: Box, + distinct: bool, + }, + /// [Avg](https://www.w3.org/TR/sparql11-query/#defn_aggAvg). + Avg { + expr: Box, + distinct: bool, + }, + /// [Min](https://www.w3.org/TR/sparql11-query/#defn_aggMin). + Min { + expr: Box, + distinct: bool, + }, + /// [Max](https://www.w3.org/TR/sparql11-query/#defn_aggMax). + Max { + expr: Box, + distinct: bool, + }, + /// [GroupConcat](https://www.w3.org/TR/sparql11-query/#defn_aggGroupConcat). + GroupConcat { + expr: Box, + distinct: bool, + separator: Option, + }, + /// [Sample](https://www.w3.org/TR/sparql11-query/#defn_aggSample). + Sample { + expr: Box, + distinct: bool, + }, + /// Custom function. + Custom { + name: NamedNode, + expr: Box, + distinct: bool, + }, +} + +impl AggregateExpression { + fn from_sparql_algebra( + expression: &AlAggregateExpression, + graph_name: Option<&NamedNodePattern>, + ) -> Self { + match expression { + AlAggregateExpression::Count { expr, distinct } => Self::Count { + expr: expr + .as_ref() + .map(|e| Box::new(Expression::from_sparql_algebra(e, graph_name))), + distinct: *distinct, + }, + AlAggregateExpression::Sum { expr, distinct } => Self::Sum { + expr: Box::new(Expression::from_sparql_algebra(expr, graph_name)), + distinct: *distinct, + }, + AlAggregateExpression::Avg { expr, distinct } => Self::Avg { + expr: Box::new(Expression::from_sparql_algebra(expr, graph_name)), + distinct: *distinct, + }, + AlAggregateExpression::Min { expr, distinct } => Self::Min { + expr: Box::new(Expression::from_sparql_algebra(expr, graph_name)), + distinct: *distinct, + }, + AlAggregateExpression::Max { expr, distinct } => Self::Max { + expr: Box::new(Expression::from_sparql_algebra(expr, graph_name)), + distinct: *distinct, + }, + AlAggregateExpression::GroupConcat { + expr, + distinct, + separator, + } => Self::GroupConcat { + expr: Box::new(Expression::from_sparql_algebra(expr, graph_name)), + distinct: *distinct, + separator: separator.clone(), + }, + AlAggregateExpression::Sample { expr, distinct } => Self::Sample { + expr: Box::new(Expression::from_sparql_algebra(expr, graph_name)), + distinct: *distinct, + }, + AlAggregateExpression::Custom { + name, + expr, + distinct, + } => Self::Custom { + name: name.clone(), + expr: Box::new(Expression::from_sparql_algebra(expr, graph_name)), + distinct: *distinct, + }, + } + } +} + +impl From<&AggregateExpression> for AlAggregateExpression { + fn from(expression: &AggregateExpression) -> Self { + match expression { + AggregateExpression::Count { expr, distinct } => Self::Count { + expr: expr.as_ref().map(|e| Box::new(e.as_ref().into())), + distinct: *distinct, + }, + AggregateExpression::Sum { expr, distinct } => Self::Sum { + expr: Box::new(expr.as_ref().into()), + distinct: *distinct, + }, + AggregateExpression::Avg { expr, distinct } => Self::Avg { + expr: Box::new(expr.as_ref().into()), + distinct: *distinct, + }, + AggregateExpression::Min { expr, distinct } => Self::Min { + expr: Box::new(expr.as_ref().into()), + distinct: *distinct, + }, + AggregateExpression::Max { expr, distinct } => Self::Max { + expr: Box::new(expr.as_ref().into()), + distinct: *distinct, + }, + AggregateExpression::GroupConcat { + expr, + distinct, + separator, + } => Self::GroupConcat { + expr: Box::new(expr.as_ref().into()), + distinct: *distinct, + separator: separator.clone(), + }, + AggregateExpression::Sample { expr, distinct } => Self::Sample { + expr: Box::new(expr.as_ref().into()), + distinct: *distinct, + }, + AggregateExpression::Custom { + name, + expr, + distinct, + } => Self::Custom { + name: name.clone(), + expr: Box::new(expr.as_ref().into()), + distinct: *distinct, + }, + } + } +} + +/// An ordering comparator used by [`GraphPattern::OrderBy`]. +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum OrderExpression { + /// Ascending order + Asc(Expression), + /// Descending order + Desc(Expression), +} + +impl OrderExpression { + fn from_sparql_algebra( + expression: &AlOrderExpression, + graph_name: Option<&NamedNodePattern>, + ) -> Self { + match expression { + AlOrderExpression::Asc(e) => Self::Asc(Expression::from_sparql_algebra(e, graph_name)), + AlOrderExpression::Desc(e) => { + Self::Desc(Expression::from_sparql_algebra(e, graph_name)) + } + } + } +} + +impl From<&OrderExpression> for AlOrderExpression { + fn from(expression: &OrderExpression) -> Self { + match expression { + OrderExpression::Asc(e) => Self::Asc(e.into()), + OrderExpression::Desc(e) => Self::Desc(e.into()), + } + } +} + +fn new_var() -> Variable { + Variable::new_unchecked(format!("{:x}", random::())) +} + +fn order_pair(a: T, b: T) -> (T, T) { + if hash(&a) <= hash(&b) { + (a, b) + } else { + (b, a) + } +} + +fn order_vec(mut vec: Vec) -> Vec { + vec.sort_unstable_by_key(|a| hash(a)); + vec +} + +fn hash(v: impl Hash) -> u64 { + let mut hasher = DefaultHasher::new(); + v.hash(&mut hasher); + hasher.finish() +} + +fn lookup_term_pattern_variables<'a>( + pattern: &'a GroundTermPattern, + callback: &mut impl FnMut(&'a Variable), +) { + if let GroundTermPattern::Variable(v) = pattern { + callback(v); + } + #[cfg(feature = "rdf-star")] + if let GroundTermPattern::Triple(t) = pattern { + lookup_term_pattern_variables(&t.subject, callback); + if let NamedNodePattern::Variable(v) = &t.predicate { + callback(v); + } + lookup_term_pattern_variables(&t.object, callback); + } +} diff --git a/lib/sparopt/src/lib.rs b/lib/sparopt/src/lib.rs new file mode 100644 index 00000000..d6f62207 --- /dev/null +++ b/lib/sparopt/src/lib.rs @@ -0,0 +1,5 @@ +pub use crate::optimizer::Optimizer; + +pub mod algebra; +mod optimizer; +mod type_inference; diff --git a/lib/sparopt/src/optimizer.rs b/lib/sparopt/src/optimizer.rs new file mode 100644 index 00000000..91ff65d0 --- /dev/null +++ b/lib/sparopt/src/optimizer.rs @@ -0,0 +1,1022 @@ +use crate::algebra::{Expression, GraphPattern, JoinAlgorithm, OrderExpression}; +use crate::type_inference::{ + infer_expression_type, infer_graph_pattern_types, VariableType, VariableTypes, +}; +use spargebra::algebra::PropertyPathExpression; +use spargebra::term::{GroundTermPattern, NamedNodePattern}; +use std::cmp::{max, min}; + +pub struct Optimizer; + +impl Optimizer { + pub fn optimize_graph_pattern(pattern: GraphPattern) -> GraphPattern { + let pattern = Self::normalize_pattern(pattern, &VariableTypes::default()); + let pattern = Self::reorder_joins(pattern, &VariableTypes::default()); + Self::push_filters(pattern, Vec::new(), &VariableTypes::default()) + } + + /// Normalize the pattern, discarding any join ordering information + fn normalize_pattern(pattern: GraphPattern, input_types: &VariableTypes) -> GraphPattern { + match pattern { + GraphPattern::QuadPattern { + subject, + predicate, + object, + graph_name, + } => GraphPattern::QuadPattern { + subject, + predicate, + object, + graph_name, + }, + GraphPattern::Path { + subject, + path, + object, + graph_name, + } => GraphPattern::Path { + subject, + path, + object, + graph_name, + }, + GraphPattern::Join { + left, + right, + algorithm, + } => GraphPattern::join( + Self::normalize_pattern(*left, input_types), + Self::normalize_pattern(*right, input_types), + algorithm, + ), + GraphPattern::LeftJoin { + left, + right, + expression, + } => { + let left = Self::normalize_pattern(*left, input_types); + let right = Self::normalize_pattern(*right, input_types); + let mut inner_types = infer_graph_pattern_types(&left, input_types.clone()); + inner_types.intersect_with(infer_graph_pattern_types(&right, input_types.clone())); + GraphPattern::left_join( + left, + right, + Self::normalize_expression(expression, &inner_types), + ) + } + #[cfg(feature = "sep-0006")] + GraphPattern::Lateral { left, right } => { + let left = Self::normalize_pattern(*left, input_types); + let left_types = infer_graph_pattern_types(&left, input_types.clone()); + let right = Self::normalize_pattern(*right, &left_types); + GraphPattern::lateral(left, right) + } + GraphPattern::Filter { inner, expression } => { + let inner = Self::normalize_pattern(*inner, input_types); + let inner_types = infer_graph_pattern_types(&inner, input_types.clone()); + let expression = Self::normalize_expression(expression, &inner_types); + let expression_type = infer_expression_type(&expression, &inner_types); + if expression_type == VariableType::UNDEF { + GraphPattern::empty() + } else { + GraphPattern::filter(inner, expression) + } + } + GraphPattern::Union { inner } => GraphPattern::union_all( + inner + .into_iter() + .map(|e| Self::normalize_pattern(e, input_types)), + ), + GraphPattern::Extend { + inner, + variable, + expression, + } => { + let inner = Self::normalize_pattern(*inner, input_types); + let inner_types = infer_graph_pattern_types(&inner, input_types.clone()); + let expression = Self::normalize_expression(expression, &inner_types); + let expression_type = infer_expression_type(&expression, &inner_types); + if expression_type == VariableType::UNDEF { + //TODO: valid? + inner + } else { + GraphPattern::extend(inner, variable, expression) + } + } + GraphPattern::Minus { left, right } => GraphPattern::minus( + Self::normalize_pattern(*left, input_types), + Self::normalize_pattern(*right, input_types), + ), + GraphPattern::Values { + variables, + bindings, + } => GraphPattern::values(variables, bindings), + GraphPattern::OrderBy { inner, expression } => { + let inner = Self::normalize_pattern(*inner, input_types); + let inner_types = infer_graph_pattern_types(&inner, input_types.clone()); + GraphPattern::order_by( + inner, + expression + .into_iter() + .map(|e| match e { + OrderExpression::Asc(e) => { + OrderExpression::Asc(Self::normalize_expression(e, &inner_types)) + } + OrderExpression::Desc(e) => { + OrderExpression::Desc(Self::normalize_expression(e, &inner_types)) + } + }) + .collect(), + ) + } + GraphPattern::Project { inner, variables } => { + GraphPattern::project(Self::normalize_pattern(*inner, input_types), variables) + } + GraphPattern::Distinct { inner } => { + GraphPattern::distinct(Self::normalize_pattern(*inner, input_types)) + } + GraphPattern::Reduced { inner } => { + GraphPattern::reduced(Self::normalize_pattern(*inner, input_types)) + } + GraphPattern::Slice { + inner, + start, + length, + } => GraphPattern::slice(Self::normalize_pattern(*inner, input_types), start, length), + GraphPattern::Group { + inner, + variables, + aggregates, + } => GraphPattern::group( + Self::normalize_pattern(*inner, input_types), + variables, + aggregates, + ), + GraphPattern::Service { + name, + inner, + silent, + } => GraphPattern::service(Self::normalize_pattern(*inner, input_types), name, silent), + } + } + + fn normalize_expression(expression: Expression, types: &VariableTypes) -> Expression { + match expression { + Expression::NamedNode(node) => node.into(), + Expression::Literal(literal) => literal.into(), + Expression::Variable(variable) => variable.into(), + Expression::Or(inner) => Expression::or_all( + inner + .into_iter() + .map(|e| Self::normalize_expression(e, types)), + ), + Expression::And(inner) => Expression::and_all( + inner + .into_iter() + .map(|e| Self::normalize_expression(e, types)), + ), + Expression::Equal(left, right) => { + let left = Self::normalize_expression(*left, types); + let left_types = infer_expression_type(&left, types); + let right = Self::normalize_expression(*right, types); + let right_types = infer_expression_type(&right, types); + #[allow(unused_mut)] + let mut must_use_equal = left_types.literal && right_types.literal; + #[cfg(feature = "rdf-star")] + { + must_use_equal = must_use_equal || left_types.triple && right_types.triple; + } + if must_use_equal { + Expression::equal(left, right) + } else { + Expression::same_term(left, right) + } + } + Expression::SameTerm(left, right) => Expression::same_term( + Self::normalize_expression(*left, types), + Self::normalize_expression(*right, types), + ), + Expression::Greater(left, right) => Expression::greater( + Self::normalize_expression(*left, types), + Self::normalize_expression(*right, types), + ), + Expression::GreaterOrEqual(left, right) => Expression::greater_or_equal( + Self::normalize_expression(*left, types), + Self::normalize_expression(*right, types), + ), + Expression::Less(left, right) => Expression::less( + Self::normalize_expression(*left, types), + Self::normalize_expression(*right, types), + ), + Expression::LessOrEqual(left, right) => Expression::less_or_equal( + Self::normalize_expression(*left, types), + Self::normalize_expression(*right, types), + ), + Expression::Add(left, right) => { + Self::normalize_expression(*left, types) + Self::normalize_expression(*right, types) + } + Expression::Subtract(left, right) => { + Self::normalize_expression(*left, types) - Self::normalize_expression(*right, types) + } + Expression::Multiply(left, right) => { + Self::normalize_expression(*left, types) * Self::normalize_expression(*right, types) + } + Expression::Divide(left, right) => { + Self::normalize_expression(*left, types) / Self::normalize_expression(*right, types) + } + Expression::UnaryPlus(inner) => { + Expression::unary_plus(Self::normalize_expression(*inner, types)) + } + Expression::UnaryMinus(inner) => -Self::normalize_expression(*inner, types), + Expression::Not(inner) => !Self::normalize_expression(*inner, types), + Expression::Exists(inner) => Expression::exists(Self::normalize_pattern(*inner, types)), + Expression::Bound(variable) => { + let t = types.get(&variable); + if !t.undef { + true.into() + } else if t == VariableType::UNDEF { + false.into() + } else { + Expression::Bound(variable) + } + } + Expression::If(cond, then, els) => Expression::if_cond( + Self::normalize_expression(*cond, types), + Self::normalize_expression(*then, types), + Self::normalize_expression(*els, types), + ), + Expression::Coalesce(inners) => Expression::coalesce( + inners + .into_iter() + .map(|e| Self::normalize_expression(e, types)) + .collect(), + ), + Expression::FunctionCall(name, args) => Expression::call( + name, + args.into_iter() + .map(|e| Self::normalize_expression(e, types)) + .collect(), + ), + } + } + + fn push_filters( + pattern: GraphPattern, + filters: Vec, + input_types: &VariableTypes, + ) -> GraphPattern { + match pattern { + pattern @ (GraphPattern::QuadPattern { .. } + | GraphPattern::Path { .. } + | GraphPattern::Values { .. }) => { + GraphPattern::filter(pattern, Expression::and_all(filters)) + } + GraphPattern::Join { + left, + right, + algorithm, + } => { + let left_types = infer_graph_pattern_types(&left, input_types.clone()); + let right_types = infer_graph_pattern_types(&right, input_types.clone()); + let mut left_filters = Vec::new(); + let mut right_filters = Vec::new(); + let mut final_filters = Vec::new(); + for filter in filters { + let push_left = are_all_expression_variables_bound(&filter, &left_types); + let push_right = are_all_expression_variables_bound(&filter, &right_types); + if push_left { + if push_right { + left_filters.push(filter.clone()); + right_filters.push(filter); + } else { + left_filters.push(filter); + } + } else if push_right { + right_filters.push(filter); + } else { + final_filters.push(filter); + } + } + GraphPattern::filter( + GraphPattern::join( + Self::push_filters(*left, left_filters, input_types), + Self::push_filters(*right, right_filters, input_types), + algorithm, + ), + Expression::and_all(final_filters), + ) + } + #[cfg(feature = "sep-0006")] + GraphPattern::Lateral { left, right } => { + let left_types = infer_graph_pattern_types(&left, input_types.clone()); + let mut left_filters = Vec::new(); + let mut right_filters = Vec::new(); + for filter in filters { + let push_left = are_all_expression_variables_bound(&filter, &left_types); + if push_left { + left_filters.push(filter); + } else { + right_filters.push(filter); + } + } + let left = Self::push_filters(*left, left_filters, input_types); + let right = Self::push_filters(*right, right_filters, &left_types); + if let GraphPattern::Filter { + inner: right, + expression, + } = right + { + // We prefer to have filter out of the lateral rather than inside the right part + GraphPattern::filter(GraphPattern::lateral(left, *right), expression) + } else { + GraphPattern::lateral(left, right) + } + } + GraphPattern::LeftJoin { + left, + right, + expression, + } => { + let left_types = infer_graph_pattern_types(&left, input_types.clone()); + let right_types = infer_graph_pattern_types(&right, input_types.clone()); + let mut left_filters = Vec::new(); + let mut right_filters = Vec::new(); + let mut final_filters = Vec::new(); + for filter in filters { + let push_left = are_all_expression_variables_bound(&filter, &left_types); + if push_left { + left_filters.push(filter); + } else { + final_filters.push(filter); + } + } + let expression = if expression.effective_boolean_value().is_none() + && (are_all_expression_variables_bound(&expression, &right_types) + || are_no_expression_variables_bound(&expression, &left_types)) + { + right_filters.push(expression); + true.into() + } else { + expression + }; + GraphPattern::filter( + GraphPattern::left_join( + Self::push_filters(*left, left_filters, input_types), + Self::push_filters(*right, right_filters, input_types), + expression, + ), + Expression::and_all(final_filters), + ) + } + GraphPattern::Minus { left, right } => GraphPattern::minus( + Self::push_filters(*left, filters, input_types), + Self::push_filters(*right, Vec::new(), input_types), + ), + GraphPattern::Extend { + inner, + expression, + variable, + } => { + //TODO: handle the case where the filter overrides an expression variable (should not happen in SPARQL but allowed in the algebra) + let mut inner_filters = Vec::new(); + let mut final_filters = Vec::new(); + for filter in filters { + let extend_variable_used = + filter.used_variables().into_iter().any(|v| *v == variable); + if extend_variable_used { + final_filters.push(filter); + } else { + inner_filters.push(filter); + } + } + GraphPattern::filter( + GraphPattern::extend( + Self::push_filters(*inner, inner_filters, input_types), + variable, + expression, + ), + Expression::and_all(final_filters), + ) + } + GraphPattern::Filter { inner, expression } => { + let mut filters = filters; + if let Expression::And(expressions) = expression { + filters.extend(expressions) + } else { + filters.push(expression) + }; + Self::push_filters(*inner, filters, input_types) + } + GraphPattern::Union { inner } => GraphPattern::union_all( + inner + .into_iter() + .map(|c| Self::push_filters(c, filters.clone(), input_types)), + ), + GraphPattern::Slice { + inner, + start, + length, + } => GraphPattern::filter( + GraphPattern::slice( + Self::push_filters(*inner, Vec::new(), input_types), + start, + length, + ), + Expression::and_all(filters), + ), + GraphPattern::Distinct { inner } => { + GraphPattern::distinct(Self::push_filters(*inner, filters, input_types)) + } + GraphPattern::Reduced { inner } => { + GraphPattern::reduced(Self::push_filters(*inner, filters, input_types)) + } + GraphPattern::Project { inner, variables } => { + GraphPattern::project(Self::push_filters(*inner, filters, input_types), variables) + } + GraphPattern::OrderBy { inner, expression } => { + GraphPattern::order_by(Self::push_filters(*inner, filters, input_types), expression) + } + GraphPattern::Service { + inner, + name, + silent, + } => GraphPattern::service( + Self::push_filters(*inner, filters, input_types), + name, + silent, + ), + GraphPattern::Group { + inner, + variables, + aggregates, + } => GraphPattern::filter( + GraphPattern::group( + Self::push_filters(*inner, Vec::new(), input_types), + variables, + aggregates, + ), + Expression::and_all(filters), + ), + } + } + + fn reorder_joins(pattern: GraphPattern, input_types: &VariableTypes) -> GraphPattern { + match pattern { + pattern @ (GraphPattern::QuadPattern { .. } + | GraphPattern::Path { .. } + | GraphPattern::Values { .. }) => pattern, + GraphPattern::Join { left, right, .. } => { + // We flatten the join operation + let mut to_reorder = Vec::new(); + let mut todo = vec![*right, *left]; + while let Some(e) = todo.pop() { + if let GraphPattern::Join { left, right, .. } = e { + todo.push(*right); + todo.push(*left); + } else { + to_reorder.push(e); + } + } + + // We do first type inference + let to_reorder_types = to_reorder + .iter() + .map(|p| infer_graph_pattern_types(p, input_types.clone())) + .collect::>(); + + // We do greedy join reordering + let mut output_cartesian_product_joins = Vec::new(); + let mut not_yet_reordered_ids = vec![true; to_reorder.len()]; + // We look for the next connected component to reorder and pick the smallest element + while let Some(next_entry_id) = not_yet_reordered_ids + .iter() + .enumerate() + .filter_map(|(i, v)| v.then(|| i)) + .min_by_key(|i| estimate_graph_pattern_size(&to_reorder[*i], input_types)) + { + not_yet_reordered_ids[next_entry_id] = false; // It's now done + let mut output = to_reorder[next_entry_id].clone(); + let mut output_types = to_reorder_types[next_entry_id].clone(); + // We look for an other child to join with that does not blow up the join cost + while let Some(next_id) = not_yet_reordered_ids + .iter() + .enumerate() + .filter_map(|(i, v)| v.then(|| i)) + .filter(|i| { + count_common_variables( + &output_types, + &to_reorder_types[*i], + input_types, + ) > 0 + }) + .min_by_key(|i| { + // Estimation of the join cost + if cfg!(feature = "sep-0006") + && is_fit_for_for_loop_join( + &to_reorder[*i], + input_types, + &output_types, + ) + { + estimate_lateral_cost( + &output, + &output_types, + &to_reorder[*i], + input_types, + ) + } else { + estimate_join_cost( + &output, + &output_types, + &to_reorder[*i], + &to_reorder_types[*i], + JoinAlgorithm::HashBuildLeftProbeRight, + input_types, + ) + } + }) + { + not_yet_reordered_ids[next_id] = false; // It's now done + let next = to_reorder[next_id].clone(); + #[cfg(feature = "sep-0006")] + { + output = if is_fit_for_for_loop_join(&next, input_types, &output_types) + { + GraphPattern::lateral(output, next) + } else { + GraphPattern::join( + output, + next, + JoinAlgorithm::HashBuildLeftProbeRight, + ) + }; + } + #[cfg(not(feature = "sep-0006"))] + { + output = GraphPattern::join( + output, + next, + JoinAlgorithm::HashBuildLeftProbeRight, + ); + } + output_types.intersect_with(to_reorder_types[next_id].clone()); + } + output_cartesian_product_joins.push(output); + } + output_cartesian_product_joins + .into_iter() + .reduce(|left, right| { + if estimate_graph_pattern_size(&left, input_types) + <= estimate_graph_pattern_size(&right, input_types) + { + GraphPattern::join(left, right, JoinAlgorithm::HashBuildLeftProbeRight) + } else { + GraphPattern::join(right, left, JoinAlgorithm::HashBuildLeftProbeRight) + } + }) + .unwrap() + } + #[cfg(feature = "sep-0006")] + GraphPattern::Lateral { left, right } => { + let left_types = infer_graph_pattern_types(&left, input_types.clone()); + GraphPattern::lateral( + Self::reorder_joins(*left, input_types), + Self::reorder_joins(*right, &left_types), + ) + } + GraphPattern::LeftJoin { + left, + right, + expression, + } => { + let left = Self::reorder_joins(*left, input_types); + let right = Self::reorder_joins(*right, input_types); + #[cfg(feature = "sep-0006")] + { + let left_types = infer_graph_pattern_types(&left, input_types.clone()); + let right_types = infer_graph_pattern_types(&right, input_types.clone()); + if is_fit_for_for_loop_join(&right, input_types, &left_types) + && count_common_variables(&left_types, &right_types, input_types) > 0 + { + return GraphPattern::lateral( + left, + GraphPattern::left_join( + GraphPattern::empty_singleton(), + right, + expression, + ), + ); + } + } + GraphPattern::left_join(left, right, expression) + } + GraphPattern::Minus { left, right } => GraphPattern::minus( + Self::reorder_joins(*left, input_types), + Self::reorder_joins(*right, input_types), + ), + GraphPattern::Extend { + inner, + expression, + variable, + } => GraphPattern::extend( + Self::reorder_joins(*inner, input_types), + variable, + expression, + ), + GraphPattern::Filter { inner, expression } => { + GraphPattern::filter(Self::reorder_joins(*inner, input_types), expression) + } + GraphPattern::Union { inner } => GraphPattern::union_all( + inner + .into_iter() + .map(|c| Self::reorder_joins(c, input_types)), + ), + GraphPattern::Slice { + inner, + start, + length, + } => GraphPattern::slice(Self::reorder_joins(*inner, input_types), start, length), + GraphPattern::Distinct { inner } => { + GraphPattern::distinct(Self::reorder_joins(*inner, input_types)) + } + GraphPattern::Reduced { inner } => { + GraphPattern::reduced(Self::reorder_joins(*inner, input_types)) + } + GraphPattern::Project { inner, variables } => { + GraphPattern::project(Self::reorder_joins(*inner, input_types), variables) + } + GraphPattern::OrderBy { inner, expression } => { + GraphPattern::order_by(Self::reorder_joins(*inner, input_types), expression) + } + GraphPattern::Service { + inner, + name, + silent, + } => GraphPattern::service(Self::reorder_joins(*inner, input_types), name, silent), + GraphPattern::Group { + inner, + variables, + aggregates, + } => GraphPattern::group( + Self::reorder_joins(*inner, input_types), + variables, + aggregates, + ), + } + } +} + +fn is_fit_for_for_loop_join( + pattern: &GraphPattern, + global_input_types: &VariableTypes, + entry_types: &VariableTypes, +) -> bool { + //TODO: think more about it + match pattern { + GraphPattern::Values { .. } + | GraphPattern::QuadPattern { .. } + | GraphPattern::Path { .. } => true, + #[cfg(feature = "sep-0006")] + GraphPattern::Lateral { left, right } => { + is_fit_for_for_loop_join(left, global_input_types, entry_types) + && is_fit_for_for_loop_join(right, global_input_types, entry_types) + } + GraphPattern::LeftJoin { + left, + right, + expression, + } => { + if !is_fit_for_for_loop_join(left, global_input_types, entry_types) { + return false; + } + + // It is not ok to transform into for loop join if right binds a variable also bound by the entry part of the for loop join + let mut left_types = infer_graph_pattern_types(left, global_input_types.clone()); + let right_types = infer_graph_pattern_types(right, global_input_types.clone()); + if right_types.iter().any(|(variable, t)| { + *t != VariableType::UNDEF + && left_types.get(variable).undef + && entry_types.get(variable) != VariableType::UNDEF + }) { + return false; + } + + // We don't forget the final expression + left_types.intersect_with(right_types); + is_expression_fit_for_for_loop_join(expression, &left_types, entry_types) + } + GraphPattern::Union { inner } => inner + .iter() + .all(|i| is_fit_for_for_loop_join(i, global_input_types, entry_types)), + GraphPattern::Filter { inner, expression } => { + is_fit_for_for_loop_join(inner, global_input_types, entry_types) + && is_expression_fit_for_for_loop_join( + expression, + &infer_graph_pattern_types(inner, global_input_types.clone()), + entry_types, + ) + } + GraphPattern::Extend { + inner, + expression, + variable, + } => { + is_fit_for_for_loop_join(inner, global_input_types, entry_types) + && entry_types.get(variable) == VariableType::UNDEF + && is_expression_fit_for_for_loop_join( + expression, + &infer_graph_pattern_types(inner, global_input_types.clone()), + entry_types, + ) + } + GraphPattern::Join { .. } + | GraphPattern::Minus { .. } + | GraphPattern::Service { .. } + | GraphPattern::OrderBy { .. } + | GraphPattern::Distinct { .. } + | GraphPattern::Reduced { .. } + | GraphPattern::Slice { .. } + | GraphPattern::Project { .. } + | GraphPattern::Group { .. } => false, + } +} + +fn are_all_expression_variables_bound( + expression: &Expression, + variable_types: &VariableTypes, +) -> bool { + expression + .used_variables() + .into_iter() + .all(|v| !variable_types.get(v).undef) +} + +fn are_no_expression_variables_bound( + expression: &Expression, + variable_types: &VariableTypes, +) -> bool { + expression + .used_variables() + .into_iter() + .all(|v| variable_types.get(v) == VariableType::UNDEF) +} + +fn is_expression_fit_for_for_loop_join( + expression: &Expression, + input_types: &VariableTypes, + entry_types: &VariableTypes, +) -> bool { + match expression { + Expression::NamedNode(_) | Expression::Literal(_) => true, + Expression::Variable(v) | Expression::Bound(v) => { + !input_types.get(v).undef || entry_types.get(v) == VariableType::UNDEF + } + Expression::Or(inner) + | Expression::And(inner) + | Expression::Coalesce(inner) + | Expression::FunctionCall(_, inner) => inner + .iter() + .all(|e| is_expression_fit_for_for_loop_join(e, input_types, entry_types)), + Expression::Equal(a, b) + | Expression::SameTerm(a, b) + | Expression::Greater(a, b) + | Expression::GreaterOrEqual(a, b) + | Expression::Less(a, b) + | Expression::LessOrEqual(a, b) + | Expression::Add(a, b) + | Expression::Subtract(a, b) + | Expression::Multiply(a, b) + | Expression::Divide(a, b) => { + is_expression_fit_for_for_loop_join(a, input_types, entry_types) + && is_expression_fit_for_for_loop_join(b, input_types, entry_types) + } + Expression::UnaryPlus(e) | Expression::UnaryMinus(e) | Expression::Not(e) => { + is_expression_fit_for_for_loop_join(e, input_types, entry_types) + } + Expression::If(a, b, c) => { + is_expression_fit_for_for_loop_join(a, input_types, entry_types) + && is_expression_fit_for_for_loop_join(b, input_types, entry_types) + && is_expression_fit_for_for_loop_join(c, input_types, entry_types) + } + Expression::Exists(inner) => is_fit_for_for_loop_join(inner, input_types, entry_types), + } +} + +fn count_common_variables( + left: &VariableTypes, + right: &VariableTypes, + input_types: &VariableTypes, +) -> usize { + // TODO: we should be smart and count as shared variables FILTER(?a = ?b) + left.iter() + .filter(|(variable, left_type)| { + !left_type.undef && !right.get(variable).undef && input_types.get(variable).undef + }) + .count() +} + +fn estimate_graph_pattern_size(pattern: &GraphPattern, input_types: &VariableTypes) -> usize { + match pattern { + GraphPattern::Values { bindings, .. } => bindings.len(), + GraphPattern::QuadPattern { + subject, + predicate, + object, + .. + } => estimate_triple_pattern_size( + is_term_pattern_bound(subject, input_types), + is_named_node_pattern_bound(predicate, input_types), + is_term_pattern_bound(object, input_types), + ), + GraphPattern::Path { + subject, + path, + object, + .. + } => estimate_path_size( + is_term_pattern_bound(subject, input_types), + path, + is_term_pattern_bound(object, input_types), + ), + GraphPattern::Join { + left, + right, + algorithm, + } => { + let left_types = infer_graph_pattern_types(left, input_types.clone()); + let right_types = infer_graph_pattern_types(right, input_types.clone()); + estimate_join_cost( + left, + &left_types, + right, + &right_types, + *algorithm, + input_types, + ) + } + GraphPattern::LeftJoin { left, right, .. } => { + let left_size = estimate_graph_pattern_size(left, input_types); + let left_types = infer_graph_pattern_types(left, input_types.clone()); + let right_types = infer_graph_pattern_types(right, input_types.clone()); + max( + left_size, + left_size + .saturating_mul(estimate_graph_pattern_size(right, &right_types)) + .saturating_div( + 1_000_usize.saturating_pow( + count_common_variables(&left_types, &right_types, input_types) + .try_into() + .unwrap(), + ), + ), + ) + } + #[cfg(feature = "sep-0006")] + GraphPattern::Lateral { left, right } => estimate_lateral_cost( + left, + &infer_graph_pattern_types(left, input_types.clone()), + right, + input_types, + ), + GraphPattern::Union { inner } => inner + .iter() + .map(|inner| estimate_graph_pattern_size(inner, input_types)) + .fold(0, usize::saturating_add), + GraphPattern::Minus { left, .. } => estimate_graph_pattern_size(left, input_types), + GraphPattern::Filter { inner, .. } + | GraphPattern::Extend { inner, .. } + | GraphPattern::OrderBy { inner, .. } + | GraphPattern::Project { inner, .. } + | GraphPattern::Distinct { inner, .. } + | GraphPattern::Reduced { inner, .. } + | GraphPattern::Group { inner, .. } + | GraphPattern::Service { inner, .. } => estimate_graph_pattern_size(inner, input_types), + GraphPattern::Slice { + inner, + start, + length, + } => { + let inner = estimate_graph_pattern_size(inner, input_types); + if let Some(length) = length { + min(inner, *length - *start) + } else { + inner + } + } + } +} + +fn estimate_join_cost( + left: &GraphPattern, + left_types: &VariableTypes, + right: &GraphPattern, + right_types: &VariableTypes, + algorithm: JoinAlgorithm, + input_types: &VariableTypes, +) -> usize { + match algorithm { + JoinAlgorithm::HashBuildLeftProbeRight => estimate_graph_pattern_size(left, input_types) + .saturating_mul(estimate_graph_pattern_size(right, input_types)) + .saturating_div( + 1_000_usize.saturating_pow( + count_common_variables(left_types, right_types, input_types) + .try_into() + .unwrap(), + ), + ), + } +} +fn estimate_lateral_cost( + left: &GraphPattern, + left_types: &VariableTypes, + right: &GraphPattern, + input_types: &VariableTypes, +) -> usize { + estimate_graph_pattern_size(left, input_types) + .saturating_mul(estimate_graph_pattern_size(right, left_types)) +} + +fn estimate_triple_pattern_size( + subject_bound: bool, + predicate_bound: bool, + object_bound: bool, +) -> usize { + match (subject_bound, predicate_bound, object_bound) { + (true, true, true) => 1, + (true, true, false) => 10, + (true, false, true) => 2, + (false, true, true) => 10_000, + (true, false, false) => 100, + (false, false, false) => 1_000_000_000, + (false, true, false) => 1_000_000, + (false, false, true) => 100_000, + } +} + +fn estimate_path_size(start_bound: bool, path: &PropertyPathExpression, end_bound: bool) -> usize { + match path { + PropertyPathExpression::NamedNode(_) => { + estimate_triple_pattern_size(start_bound, true, end_bound) + } + PropertyPathExpression::Reverse(p) => estimate_path_size(end_bound, p, start_bound), + PropertyPathExpression::Sequence(a, b) => { + // We do a for loop join in the best direction + min( + estimate_path_size(start_bound, a, false) + .saturating_mul(estimate_path_size(true, b, end_bound)), + estimate_path_size(start_bound, a, true) + .saturating_mul(estimate_path_size(false, b, end_bound)), + ) + } + PropertyPathExpression::Alternative(a, b) => estimate_path_size(start_bound, a, end_bound) + .saturating_add(estimate_path_size(start_bound, b, end_bound)), + PropertyPathExpression::ZeroOrMore(p) => { + if start_bound && end_bound { + 1 + } else if start_bound || end_bound { + estimate_path_size(start_bound, p, end_bound).saturating_mul(1000) + } else { + 1_000_000_000 + } + } + PropertyPathExpression::OneOrMore(p) => { + if start_bound && end_bound { + 1 + } else { + estimate_path_size(start_bound, p, end_bound).saturating_mul(1000) + } + } + PropertyPathExpression::ZeroOrOne(p) => { + if start_bound && end_bound { + 1 + } else if start_bound || end_bound { + estimate_path_size(start_bound, p, end_bound) + } else { + 1_000_000_000 + } + } + PropertyPathExpression::NegatedPropertySet(_) => { + estimate_triple_pattern_size(start_bound, false, end_bound) + } + } +} + +fn is_term_pattern_bound(pattern: &GroundTermPattern, input_types: &VariableTypes) -> bool { + match pattern { + GroundTermPattern::NamedNode(_) | GroundTermPattern::Literal(_) => true, + GroundTermPattern::Variable(v) => !input_types.get(v).undef, + #[cfg(feature = "rdf-star")] + GroundTermPattern::Triple(t) => { + is_term_pattern_bound(&t.subject, input_types) + && is_named_node_pattern_bound(&t.predicate, input_types) + && is_term_pattern_bound(&t.object, input_types) + } + } +} + +fn is_named_node_pattern_bound(pattern: &NamedNodePattern, input_types: &VariableTypes) -> bool { + match pattern { + NamedNodePattern::NamedNode(_) => true, + NamedNodePattern::Variable(v) => !input_types.get(v).undef, + } +} diff --git a/lib/sparopt/src/type_inference.rs b/lib/sparopt/src/type_inference.rs new file mode 100644 index 00000000..421fd756 --- /dev/null +++ b/lib/sparopt/src/type_inference.rs @@ -0,0 +1,451 @@ +use crate::algebra::{Expression, GraphPattern}; +use oxrdf::Variable; +use spargebra::algebra::Function; +use spargebra::term::{GroundTerm, GroundTermPattern, NamedNodePattern}; +use std::collections::HashMap; +use std::ops::{BitAnd, BitOr}; + +pub fn infer_graph_pattern_types( + pattern: &GraphPattern, + mut types: VariableTypes, +) -> VariableTypes { + match pattern { + GraphPattern::QuadPattern { + subject, + predicate, + object, + graph_name, + } => { + add_ground_term_pattern_types(subject, &mut types, false); + if let NamedNodePattern::Variable(v) = predicate { + types.intersect_variable_with(v.clone(), VariableType::NAMED_NODE) + } + add_ground_term_pattern_types(object, &mut types, true); + if let Some(NamedNodePattern::Variable(v)) = graph_name { + types.intersect_variable_with(v.clone(), VariableType::NAMED_NODE) + } + types + } + GraphPattern::Path { + subject, + object, + graph_name, + .. + } => { + add_ground_term_pattern_types(subject, &mut types, false); + add_ground_term_pattern_types(object, &mut types, true); + if let Some(NamedNodePattern::Variable(v)) = graph_name { + types.intersect_variable_with(v.clone(), VariableType::NAMED_NODE) + } + types + } + GraphPattern::Join { left, right, .. } => { + let mut output_types = infer_graph_pattern_types(left, types.clone()); + output_types.intersect_with(infer_graph_pattern_types(right, types)); + output_types + } + #[cfg(feature = "sep-0006")] + GraphPattern::Lateral { left, right } => { + infer_graph_pattern_types(right, infer_graph_pattern_types(left, types)) + } + GraphPattern::LeftJoin { left, right, .. } => { + let mut right_types = infer_graph_pattern_types(right, types.clone()); //TODO: expression + for t in right_types.inner.values_mut() { + t.undef = true; // Right might be unset + } + let mut output_types = infer_graph_pattern_types(left, types); + output_types.intersect_with(right_types); + output_types + } + GraphPattern::Minus { left, .. } => infer_graph_pattern_types(left, types), + GraphPattern::Union { inner } => inner + .iter() + .map(|inner| infer_graph_pattern_types(inner, types.clone())) + .reduce(|mut a, b| { + a.union_with(b); + a + }) + .unwrap_or_default(), + GraphPattern::Extend { + inner, + variable, + expression, + } => { + let mut types = infer_graph_pattern_types(inner, types); + types.intersect_variable_with( + variable.clone(), + infer_expression_type(expression, &types), + ); + types + } + GraphPattern::Filter { inner, .. } => infer_graph_pattern_types(inner, types), + GraphPattern::Project { inner, variables } => VariableTypes { + inner: infer_graph_pattern_types(inner, types) + .inner + .into_iter() + .filter(|(v, _)| variables.contains(v)) + .collect(), + }, + GraphPattern::Distinct { inner } + | GraphPattern::Reduced { inner } + | GraphPattern::OrderBy { inner, .. } + | GraphPattern::Slice { inner, .. } => infer_graph_pattern_types(inner, types), + GraphPattern::Group { + inner, + variables, + aggregates, + } => { + let types = infer_graph_pattern_types(inner, types); + VariableTypes { + inner: infer_graph_pattern_types(inner, types) + .inner + .into_iter() + .filter(|(v, _)| variables.contains(v)) + .chain(aggregates.iter().map(|(v, _)| (v.clone(), VariableType::ANY))) //TODO: guess from aggregate + .collect(), + } + } + GraphPattern::Values { + variables, + bindings, + } => { + for (i, v) in variables.iter().enumerate() { + let mut t = VariableType::default(); + for binding in bindings { + match binding[i] { + Some(GroundTerm::NamedNode(_)) => t.named_node = true, + Some(GroundTerm::Literal(_)) => t.literal = true, + #[cfg(feature = "rdf-star")] + Some(GroundTerm::Triple(_)) => t.triple = true, + None => t.undef = true, + } + } + types.intersect_variable_with(v.clone(), t) + } + types + } + GraphPattern::Service { name, inner, .. } => { + let mut types = infer_graph_pattern_types(inner, types); + if let NamedNodePattern::Variable(v) = name { + types.intersect_variable_with(v.clone(), VariableType::NAMED_NODE) + } + types + } + } +} + +fn add_ground_term_pattern_types( + pattern: &GroundTermPattern, + types: &mut VariableTypes, + is_object: bool, +) { + if let GroundTermPattern::Variable(v) = pattern { + types.intersect_variable_with( + v.clone(), + if is_object { + VariableType::TERM + } else { + VariableType::SUBJECT + }, + ) + } + #[cfg(feature = "rdf-star")] + if let GroundTermPattern::Triple(t) = pattern { + add_ground_term_pattern_types(&t.subject, types, false); + if let NamedNodePattern::Variable(v) = &t.predicate { + types.intersect_variable_with(v.clone(), VariableType::NAMED_NODE) + } + add_ground_term_pattern_types(&t.object, types, true); + } +} + +pub fn infer_expression_type(expression: &Expression, types: &VariableTypes) -> VariableType { + match expression { + Expression::NamedNode(_) => VariableType::NAMED_NODE, + Expression::Literal(_) | Expression::Exists(_) | Expression::Bound(_) => { + VariableType::LITERAL + } + Expression::Variable(v) => types.get(v), + Expression::FunctionCall(Function::Datatype | Function::Iri, _) => { + VariableType::NAMED_NODE | VariableType::UNDEF + } + #[cfg(feature = "rdf-star")] + Expression::FunctionCall(Function::Predicate, _) => { + VariableType::NAMED_NODE | VariableType::UNDEF + } + Expression::FunctionCall(Function::BNode, _) => { + VariableType::BLANK_NODE | VariableType::UNDEF + } + Expression::Or(_) + | Expression::And(_) + | Expression::Equal(_, _) + | Expression::Greater(_, _) + | Expression::GreaterOrEqual(_, _) + | Expression::Less(_, _) + | Expression::LessOrEqual(_, _) + | Expression::Add(_, _) + | Expression::Subtract(_, _) + | Expression::Multiply(_, _) + | Expression::Divide(_, _) + | Expression::UnaryPlus(_) + | Expression::UnaryMinus(_) + | Expression::Not(_) + | Expression::FunctionCall( + Function::Str + | Function::Lang + | Function::LangMatches + | Function::Rand + | Function::Abs + | Function::Ceil + | Function::Floor + | Function::Round + | Function::Concat + | Function::SubStr + | Function::StrLen + | Function::Replace + | Function::UCase + | Function::LCase + | Function::EncodeForUri + | Function::Contains + | Function::StrStarts + | Function::StrEnds + | Function::StrBefore + | Function::StrAfter + | Function::Year + | Function::Month + | Function::Day + | Function::Hours + | Function::Minutes + | Function::Seconds + | Function::Timezone + | Function::Tz + | Function::Now + | Function::Uuid + | Function::StrUuid + | Function::Md5 + | Function::Sha1 + | Function::Sha256 + | Function::Sha384 + | Function::Sha512 + | Function::StrLang + | Function::StrDt + | Function::IsIri + | Function::IsBlank + | Function::IsLiteral + | Function::IsNumeric + | Function::Regex, + _, + ) => VariableType::LITERAL | VariableType::UNDEF, + #[cfg(feature = "sep-0002")] + Expression::FunctionCall(Function::Adjust, _) => { + VariableType::LITERAL | VariableType::UNDEF + } + #[cfg(feature = "rdf-star")] + Expression::FunctionCall(Function::IsTriple, _) => { + VariableType::LITERAL | VariableType::UNDEF + } + Expression::SameTerm(left, right) => { + if infer_expression_type(left, types).undef || infer_expression_type(right, types).undef + { + VariableType::LITERAL | VariableType::UNDEF + } else { + VariableType::LITERAL + } + } + Expression::If(_, then, els) => { + infer_expression_type(then, types) | infer_expression_type(els, types) + } + Expression::Coalesce(inner) => { + let mut t = VariableType::UNDEF; + for e in inner { + let new = infer_expression_type(e, types); + t = t | new; + if !new.undef { + t.undef = false; + return t; + } + } + t + } + #[cfg(feature = "rdf-star")] + Expression::FunctionCall(Function::Triple, _) => VariableType::TRIPLE | VariableType::UNDEF, + #[cfg(feature = "rdf-star")] + Expression::FunctionCall(Function::Subject, _) => { + VariableType::SUBJECT | VariableType::UNDEF + } + #[cfg(feature = "rdf-star")] + Expression::FunctionCall(Function::Object, _) => VariableType::TERM | VariableType::UNDEF, + Expression::FunctionCall(Function::Custom(_), _) => VariableType::ANY, + } +} + +#[derive(Default, Clone, Debug)] +pub struct VariableTypes { + inner: HashMap, +} + +impl VariableTypes { + pub fn get(&self, variable: &Variable) -> VariableType { + self.inner + .get(variable) + .copied() + .unwrap_or(VariableType::UNDEF) + } + + pub fn iter(&self) -> impl Iterator { + self.inner.iter() + } + + pub fn intersect_with(&mut self, other: Self) { + for (v, t) in other.inner { + self.intersect_variable_with(v, t); + } + } + + pub fn union_with(&mut self, other: Self) { + for (v, t) in &mut self.inner { + if other.get(v).undef { + t.undef = true; // Might be undefined + } + } + for (v, mut t) in other.inner { + self.inner + .entry(v) + .and_modify(|ex| *ex = *ex | t) + .or_insert({ + t.undef = true; + t + }); + } + } + + fn intersect_variable_with(&mut self, variable: Variable, t: VariableType) { + let t = self.get(&variable) & t; + if t != VariableType::UNDEF { + self.inner.insert(variable, t); + } + } +} + +#[derive(Clone, Copy, Eq, PartialEq, Debug, Default)] +pub struct VariableType { + pub undef: bool, + pub named_node: bool, + pub blank_node: bool, + pub literal: bool, + #[cfg(feature = "rdf-star")] + pub triple: bool, +} + +impl VariableType { + pub const UNDEF: Self = Self { + undef: true, + named_node: false, + blank_node: false, + literal: false, + #[cfg(feature = "rdf-star")] + triple: false, + }; + + const NAMED_NODE: Self = Self { + undef: false, + named_node: true, + blank_node: false, + literal: false, + #[cfg(feature = "rdf-star")] + triple: false, + }; + + const BLANK_NODE: Self = Self { + undef: false, + named_node: false, + blank_node: true, + literal: false, + #[cfg(feature = "rdf-star")] + triple: false, + }; + + const LITERAL: Self = Self { + undef: false, + named_node: false, + blank_node: false, + literal: true, + #[cfg(feature = "rdf-star")] + triple: false, + }; + + #[cfg(feature = "rdf-star")] + const TRIPLE: Self = Self { + undef: false, + named_node: false, + blank_node: false, + literal: false, + triple: true, + }; + + const SUBJECT: Self = Self { + undef: false, + named_node: true, + blank_node: true, + literal: false, + #[cfg(feature = "rdf-star")] + triple: true, + }; + + const TERM: Self = Self { + undef: false, + named_node: true, + blank_node: true, + literal: true, + #[cfg(feature = "rdf-star")] + triple: true, + }; + + const ANY: Self = Self { + undef: true, + named_node: true, + blank_node: true, + literal: true, + #[cfg(feature = "rdf-star")] + triple: true, + }; +} + +impl BitOr for VariableType { + type Output = Self; + + fn bitor(self, other: Self) -> Self { + Self { + undef: self.undef || other.undef, + named_node: self.named_node || other.named_node, + blank_node: self.blank_node || other.blank_node, + literal: self.literal || other.literal, + #[cfg(feature = "rdf-star")] + triple: self.triple || other.triple, + } + } +} + +impl BitAnd for VariableType { + type Output = Self; + + #[allow(clippy::nonminimal_bool)] + fn bitand(self, other: Self) -> Self { + Self { + undef: self.undef && other.undef, + named_node: self.named_node && other.named_node + || (self.undef && other.named_node) + || (self.named_node && other.undef), + blank_node: self.blank_node && other.blank_node + || (self.undef && other.blank_node) + || (self.blank_node && other.undef), + literal: self.literal && other.literal + || (self.undef && other.literal) + || (self.literal && other.undef), + #[cfg(feature = "rdf-star")] + triple: self.triple && other.triple + || (self.undef && other.triple) + || (self.triple && other.undef), + } + } +} diff --git a/lib/src/sparql/eval.rs b/lib/src/sparql/eval.rs index f4620f56..f9a8690b 100644 --- a/lib/src/sparql/eval.rs +++ b/lib/src/sparql/eval.rs @@ -379,21 +379,24 @@ impl SimpleEvaluator { } }) } - PlanNode::HashJoin { left, right } => { - let join_keys: Vec<_> = left + PlanNode::HashJoin { + probe_child, + build_child, + } => { + let join_keys: Vec<_> = probe_child .always_bound_variables() - .intersection(&right.always_bound_variables()) + .intersection(&build_child.always_bound_variables()) .copied() .collect(); - let (left, left_stats) = self.plan_evaluator(Rc::clone(left)); - stat_children.push(left_stats); - let (right, right_stats) = self.plan_evaluator(Rc::clone(right)); - stat_children.push(right_stats); + let (probe, probe_stats) = self.plan_evaluator(Rc::clone(probe_child)); + stat_children.push(probe_stats); + let (build, build_stats) = self.plan_evaluator(Rc::clone(build_child)); + stat_children.push(build_stats); if join_keys.is_empty() { // Cartesian product Rc::new(move |from| { let mut errors = Vec::default(); - let right_values = right(from.clone()) + let build_values = build(from.clone()) .filter_map(|result| match result { Ok(result) => Some(result), Err(error) => { @@ -403,8 +406,8 @@ impl SimpleEvaluator { }) .collect::>(); Box::new(CartesianProductJoinIterator { - left_iter: left(from), - right: right_values, + probe_iter: probe(from), + built: build_values, buffered_results: errors, }) }) @@ -412,8 +415,8 @@ impl SimpleEvaluator { // Real hash join Rc::new(move |from| { let mut errors = Vec::default(); - let mut right_values = EncodedTupleSet::new(join_keys.clone()); - right_values.extend(right(from.clone()).filter_map( + let mut built_values = EncodedTupleSet::new(join_keys.clone()); + built_values.extend(build(from.clone()).filter_map( |result| match result { Ok(result) => Some(result), Err(error) => { @@ -423,8 +426,8 @@ impl SimpleEvaluator { }, )); Box::new(HashJoinIterator { - left_iter: left(from), - right: right_values, + probe_iter: probe(from), + built: built_values, buffered_results: errors, }) }) @@ -516,33 +519,17 @@ impl SimpleEvaluator { }) }) } - PlanNode::ForLoopLeftJoin { - left, - right, - possible_problem_vars, - } => { + PlanNode::ForLoopLeftJoin { left, right } => { let (left, left_stats) = self.plan_evaluator(Rc::clone(left)); stat_children.push(left_stats); let (right, right_stats) = self.plan_evaluator(Rc::clone(right)); stat_children.push(right_stats); - let possible_problem_vars = Rc::clone(possible_problem_vars); Rc::new(move |from| { - if possible_problem_vars.is_empty() { - Box::new(ForLoopLeftJoinIterator { - right_evaluator: Rc::clone(&right), - left_iter: left(from), - current_right: Box::new(empty()), - }) - } else { - Box::new(BadForLoopLeftJoinIterator { - from_tuple: from.clone(), - right_evaluator: Rc::clone(&right), - left_iter: left(from), - current_left: EncodedTuple::with_capacity(0), - current_right: Box::new(empty()), - problem_vars: Rc::clone(&possible_problem_vars), - }) - } + Box::new(ForLoopLeftJoinIterator { + right_evaluator: Rc::clone(&right), + left_iter: left(from), + current_right: Box::new(empty()), + }) }) } PlanNode::Filter { child, expression } => { @@ -3887,8 +3874,8 @@ impl PathEvaluator { } struct CartesianProductJoinIterator { - left_iter: EncodedTuplesIterator, - right: Vec, + probe_iter: EncodedTuplesIterator, + built: Vec, buffered_results: Vec>, } @@ -3900,12 +3887,12 @@ impl Iterator for CartesianProductJoinIterator { if let Some(result) = self.buffered_results.pop() { return Some(result); } - let left_tuple = match self.left_iter.next()? { - Ok(left_tuple) => left_tuple, + let probe_tuple = match self.probe_iter.next()? { + Ok(probe_tuple) => probe_tuple, Err(error) => return Some(Err(error)), }; - for right_tuple in &self.right { - if let Some(result_tuple) = left_tuple.combine_with(right_tuple) { + for built_tuple in &self.built { + if let Some(result_tuple) = probe_tuple.combine_with(built_tuple) { self.buffered_results.push(Ok(result_tuple)) } } @@ -3913,17 +3900,17 @@ impl Iterator for CartesianProductJoinIterator { } fn size_hint(&self) -> (usize, Option) { - let (min, max) = self.left_iter.size_hint(); + let (min, max) = self.probe_iter.size_hint(); ( - min.saturating_mul(self.right.len()), - max.map(|v| v.saturating_mul(self.right.len())), + min.saturating_mul(self.built.len()), + max.map(|v| v.saturating_mul(self.built.len())), ) } } struct HashJoinIterator { - left_iter: EncodedTuplesIterator, - right: EncodedTupleSet, + probe_iter: EncodedTuplesIterator, + built: EncodedTupleSet, buffered_results: Vec>, } @@ -3935,15 +3922,15 @@ impl Iterator for HashJoinIterator { if let Some(result) = self.buffered_results.pop() { return Some(result); } - let left_tuple = match self.left_iter.next()? { - Ok(left_tuple) => left_tuple, + let probe_tuple = match self.probe_iter.next()? { + Ok(probe_tuple) => probe_tuple, Err(error) => return Some(Err(error)), }; self.buffered_results.extend( - self.right - .get(&left_tuple) + self.built + .get(&probe_tuple) .iter() - .filter_map(|right_tuple| left_tuple.combine_with(right_tuple).map(Ok)), + .filter_map(|built_tuple| probe_tuple.combine_with(built_tuple).map(Ok)), ) } } @@ -3951,10 +3938,10 @@ impl Iterator for HashJoinIterator { fn size_hint(&self) -> (usize, Option) { ( 0, - self.left_iter + self.probe_iter .size_hint() .1 - .map(|v| v.saturating_mul(self.right.len())), + .map(|v| v.saturating_mul(self.built.len())), ) } } @@ -4034,58 +4021,6 @@ impl Iterator for ForLoopLeftJoinIterator { } } -struct BadForLoopLeftJoinIterator { - from_tuple: EncodedTuple, - right_evaluator: Rc EncodedTuplesIterator>, - left_iter: EncodedTuplesIterator, - current_left: EncodedTuple, - current_right: EncodedTuplesIterator, - problem_vars: Rc<[usize]>, -} - -impl Iterator for BadForLoopLeftJoinIterator { - type Item = Result; - - fn next(&mut self) -> Option> { - for right_tuple in &mut self.current_right { - match right_tuple { - Ok(right_tuple) => { - if let Some(combined) = right_tuple.combine_with(&self.current_left) { - return Some(Ok(combined)); - } - } - Err(error) => return Some(Err(error)), - } - } - match self.left_iter.next()? { - Ok(left_tuple) => { - let mut right_input = self.from_tuple.clone(); - for (var, val) in left_tuple.iter().enumerate() { - if let Some(val) = val { - if !self.problem_vars.contains(&var) { - right_input.set(var, val); - } - } - } - self.current_right = (self.right_evaluator)(right_input); - for right_tuple in &mut self.current_right { - match right_tuple { - Ok(right_tuple) => { - if let Some(combined) = right_tuple.combine_with(&left_tuple) { - self.current_left = left_tuple; - return Some(Ok(combined)); - } - } - Err(error) => return Some(Err(error)), - } - } - Some(Ok(left_tuple)) - } - Err(error) => Some(Err(error)), - } - } -} - struct UnionIterator { plans: Vec EncodedTuplesIterator>>, input: EncodedTuple, diff --git a/lib/src/sparql/mod.rs b/lib/src/sparql/mod.rs index 93fda294..9612d514 100644 --- a/lib/src/sparql/mod.rs +++ b/lib/src/sparql/mod.rs @@ -105,7 +105,6 @@ pub(crate) fn evaluate_query( &template, variables, &options.custom_functions, - options.without_optimizations, ); let planning_duration = start_planning.elapsed(); let (results, explanation) = SimpleEvaluator::new( diff --git a/lib/src/sparql/plan.rs b/lib/src/sparql/plan.rs index 88469433..af51e7a4 100644 --- a/lib/src/sparql/plan.rs +++ b/lib/src/sparql/plan.rs @@ -41,8 +41,8 @@ pub enum PlanNode { }, /// Streams left and materializes right join HashJoin { - left: Rc, - right: Rc, + probe_child: Rc, + build_child: Rc, }, /// Right nested in left loop ForLoopJoin { @@ -71,7 +71,6 @@ pub enum PlanNode { ForLoopLeftJoin { left: Rc, right: Rc, - possible_problem_vars: Rc<[usize]>, //Variables that should not be part of the entry of the left join }, Extend { child: Rc, @@ -160,7 +159,10 @@ impl PlanNode { child.lookup_used_variables(callback); } } - Self::HashJoin { left, right } + Self::HashJoin { + probe_child: left, + build_child: right, + } | Self::ForLoopJoin { left, right, .. } | Self::AntiJoin { left, right } | Self::ForLoopLeftJoin { left, right, .. } => { @@ -296,7 +298,11 @@ impl PlanNode { } } } - Self::HashJoin { left, right } | Self::ForLoopJoin { left, right, .. } => { + Self::HashJoin { + probe_child: left, + build_child: right, + } + | Self::ForLoopJoin { left, right, .. } => { left.lookup_always_bound_variables(callback); right.lookup_always_bound_variables(callback); } @@ -344,16 +350,6 @@ impl PlanNode { } } } - - pub fn is_variable_bound(&self, variable: usize) -> bool { - let mut found = false; - self.lookup_always_bound_variables(&mut |v| { - if v == variable { - found = true; - } - }); - found - } } #[derive(Debug, Clone)] diff --git a/lib/src/sparql/plan_builder.rs b/lib/src/sparql/plan_builder.rs index 7d985504..ec93bb7f 100644 --- a/lib/src/sparql/plan_builder.rs +++ b/lib/src/sparql/plan_builder.rs @@ -5,43 +5,37 @@ use crate::sparql::eval::compile_pattern; use crate::sparql::plan::*; use crate::storage::numeric_encoder::{EncodedTerm, EncodedTriple}; use oxrdf::vocab::xsd; -use oxrdf::TermRef; -use rand::random; +use oxrdf::{BlankNode, Term, TermRef, Triple}; use regex::Regex; -use spargebra::algebra::*; -use spargebra::term::*; -use std::collections::{BTreeSet, HashMap, HashSet}; -use std::mem::swap; +use spargebra::term::{GroundSubject, GroundTriple, TermPattern, TriplePattern}; +use sparopt::algebra::*; +use sparopt::Optimizer; +use std::collections::HashMap; use std::rc::Rc; pub struct PlanBuilder<'a> { dataset: &'a DatasetView, custom_functions: &'a HashMap Option>>, - with_optimizations: bool, } impl<'a> PlanBuilder<'a> { pub fn build( dataset: &'a DatasetView, - pattern: &GraphPattern, + pattern: &spargebra::algebra::GraphPattern, is_cardinality_meaningful: bool, custom_functions: &'a HashMap Option>>, without_optimizations: bool, ) -> Result<(PlanNode, Vec), EvaluationError> { + let mut pattern = GraphPattern::from(pattern); + if !without_optimizations { + pattern = Optimizer::optimize_graph_pattern(pattern); + } let mut variables = Vec::default(); let plan = PlanBuilder { dataset, custom_functions, - with_optimizations: !without_optimizations, } - .build_for_graph_pattern( - pattern, - &mut variables, - &PatternValue::Constant(PlanTerm { - encoded: EncodedTerm::DefaultGraph, - plain: PatternValueConstant::DefaultGraph, - }), - )?; + .build_for_graph_pattern(&pattern, &mut variables)?; let plan = if !without_optimizations && !is_cardinality_meaningful { // let's reduce downstream task. // TODO: avoid if already REDUCED or DISTINCT @@ -59,12 +53,10 @@ impl<'a> PlanBuilder<'a> { template: &[TriplePattern], mut variables: Vec, custom_functions: &'a HashMap Option>>, - without_optimizations: bool, ) -> Vec { PlanBuilder { dataset, custom_functions, - with_optimizations: !without_optimizations, } .build_for_graph_template(template, &mut variables) } @@ -73,112 +65,111 @@ impl<'a> PlanBuilder<'a> { &self, pattern: &GraphPattern, variables: &mut Vec, - graph_name: &PatternValue, ) -> Result { Ok(match pattern { - GraphPattern::Bgp { patterns } => { - if self.with_optimizations { - self.build_for_bgp(sort_bgp(patterns), variables, graph_name) - } else { - self.build_for_bgp(patterns, variables, graph_name) - } - } + GraphPattern::QuadPattern { + subject, + predicate, + object, + graph_name, + } => PlanNode::QuadPattern { + subject: self.pattern_value_from_ground_term_pattern(subject, variables), + predicate: self.pattern_value_from_named_node_or_variable(predicate, variables), + object: self.pattern_value_from_ground_term_pattern(object, variables), + graph_name: graph_name.as_ref().map_or( + PatternValue::Constant(PlanTerm { + encoded: EncodedTerm::DefaultGraph, + plain: PatternValueConstant::DefaultGraph, + }), + |g| self.pattern_value_from_named_node_or_variable(g, variables), + ), + }, GraphPattern::Path { subject, path, object, + graph_name, } => PlanNode::PathPattern { - subject: self.pattern_value_from_term_or_variable(subject, variables), + subject: self.pattern_value_from_ground_term_pattern(subject, variables), path: Rc::new(self.build_for_path(path)), - object: self.pattern_value_from_term_or_variable(object, variables), - graph_name: graph_name.clone(), + object: self.pattern_value_from_ground_term_pattern(object, variables), + graph_name: graph_name.as_ref().map_or( + PatternValue::Constant(PlanTerm { + encoded: EncodedTerm::DefaultGraph, + plain: PatternValueConstant::DefaultGraph, + }), + |g| self.pattern_value_from_named_node_or_variable(g, variables), + ), + }, + GraphPattern::Join { + left, + right, + algorithm, + } => match algorithm { + JoinAlgorithm::HashBuildLeftProbeRight => PlanNode::HashJoin { + build_child: Rc::new(self.build_for_graph_pattern(left, variables)?), + probe_child: Rc::new(self.build_for_graph_pattern(right, variables)?), + }, }, - GraphPattern::Join { left, right } => self.new_join( - self.build_for_graph_pattern(left, variables, graph_name)?, - self.build_for_graph_pattern(right, variables, graph_name)?, - ), GraphPattern::LeftJoin { left, right, expression, - } => { - let left = self.build_for_graph_pattern(left, variables, graph_name)?; - let right = self.build_for_graph_pattern(right, variables, graph_name)?; - - if self.with_optimizations && Self::can_use_for_loop_left_join(&right) { - let mut possible_problem_vars = BTreeSet::new(); - Self::add_left_join_problematic_variables(&right, &mut possible_problem_vars); - - //We add the extra filter if needed - let right = if let Some(expr) = expression { - self.push_filter( - Rc::new(right), - Box::new(self.build_for_expression(expr, variables, graph_name)?), - ) + } => PlanNode::HashLeftJoin { + left: Rc::new(self.build_for_graph_pattern(left, variables)?), + right: Rc::new(self.build_for_graph_pattern(right, variables)?), + expression: Box::new(self.build_for_expression(expression, variables)?), + }, + GraphPattern::Lateral { left, right } => { + if let GraphPattern::LeftJoin { + left: nested_left, + right: nested_right, + expression, + } = right.as_ref() + { + if nested_left.is_empty_singleton() { + // We are in a ForLoopLeftJoin + let right = + GraphPattern::filter(nested_right.as_ref().clone(), expression.clone()); + PlanNode::ForLoopLeftJoin { + left: Rc::new(self.build_for_graph_pattern(left, variables)?), + right: Rc::new(self.build_for_graph_pattern(&right, variables)?), + } } else { - right - }; - PlanNode::ForLoopLeftJoin { - left: Rc::new(left), - right: Rc::new(right), - possible_problem_vars: possible_problem_vars.into_iter().collect(), + PlanNode::ForLoopJoin { + left: Rc::new(self.build_for_graph_pattern(left, variables)?), + right: Rc::new(self.build_for_graph_pattern(right, variables)?), + } } } else { - PlanNode::HashLeftJoin { - left: Rc::new(left), - right: Rc::new(right), - expression: Box::new(expression.as_ref().map_or( - Ok(PlanExpression::Literal(PlanTerm { - encoded: true.into(), - plain: true.into(), - })), - |e| self.build_for_expression(e, variables, graph_name), - )?), + PlanNode::ForLoopJoin { + left: Rc::new(self.build_for_graph_pattern(left, variables)?), + right: Rc::new(self.build_for_graph_pattern(right, variables)?), } } } - GraphPattern::Lateral { left, right } => PlanNode::ForLoopJoin { - left: Rc::new(self.build_for_graph_pattern(left, variables, graph_name)?), - right: Rc::new(self.build_for_graph_pattern(right, variables, graph_name)?), + GraphPattern::Filter { expression, inner } => PlanNode::Filter { + child: Rc::new(self.build_for_graph_pattern(inner, variables)?), + expression: Box::new(self.build_for_expression(expression, variables)?), + }, + GraphPattern::Union { inner } => PlanNode::Union { + children: inner + .iter() + .map(|p| Ok(Rc::new(self.build_for_graph_pattern(p, variables)?))) + .collect::>()?, }, - GraphPattern::Filter { expr, inner } => self.push_filter( - Rc::new(self.build_for_graph_pattern(inner, variables, graph_name)?), - Box::new(self.build_for_expression(expr, variables, graph_name)?), - ), - GraphPattern::Union { left, right } => { - //We flatten the UNION - let mut stack: Vec<&GraphPattern> = vec![left, right]; - let mut children = vec![]; - loop { - match stack.pop() { - None => break, - Some(GraphPattern::Union { left, right }) => { - stack.push(left); - stack.push(right); - } - Some(p) => children.push(Rc::new( - self.build_for_graph_pattern(p, variables, graph_name)?, - )), - } - } - PlanNode::Union { children } - } - GraphPattern::Graph { name, inner } => { - let graph_name = self.pattern_value_from_named_node_or_variable(name, variables); - self.build_for_graph_pattern(inner, variables, &graph_name)? - } GraphPattern::Extend { inner, variable, expression, } => PlanNode::Extend { - child: Rc::new(self.build_for_graph_pattern(inner, variables, graph_name)?), + child: Rc::new(self.build_for_graph_pattern(inner, variables)?), variable: build_plan_variable(variables, variable), - expression: Box::new(self.build_for_expression(expression, variables, graph_name)?), + expression: Box::new(self.build_for_expression(expression, variables)?), }, GraphPattern::Minus { left, right } => PlanNode::AntiJoin { - left: Rc::new(self.build_for_graph_pattern(left, variables, graph_name)?), - right: Rc::new(self.build_for_graph_pattern(right, variables, graph_name)?), + left: Rc::new(self.build_for_graph_pattern(left, variables)?), + right: Rc::new(self.build_for_graph_pattern(right, variables)?), }, GraphPattern::Service { name, @@ -186,13 +177,13 @@ impl<'a> PlanBuilder<'a> { silent, } => { // Child building should be at the begging in order for `variables` to be filled - let child = self.build_for_graph_pattern(inner, variables, graph_name)?; + let child = self.build_for_graph_pattern(inner, variables)?; let service_name = self.pattern_value_from_named_node_or_variable(name, variables); PlanNode::Service { service_name, variables: Rc::from(variables.as_slice()), child: Rc::new(child), - graph_pattern: Rc::new(inner.as_ref().clone()), + graph_pattern: Rc::new(inner.as_ref().into()), silent: *silent, } } @@ -201,7 +192,7 @@ impl<'a> PlanBuilder<'a> { variables: by, aggregates, } => PlanNode::Aggregate { - child: Rc::new(self.build_for_graph_pattern(inner, variables, graph_name)?), + child: Rc::new(self.build_for_graph_pattern(inner, variables)?), key_variables: by .iter() .map(|k| build_plan_variable(variables, k)) @@ -210,7 +201,7 @@ impl<'a> PlanBuilder<'a> { .iter() .map(|(v, a)| { Ok(( - self.build_for_aggregate(a, variables, graph_name)?, + self.build_for_aggregate(a, variables)?, build_plan_variable(variables, v), )) }) @@ -253,16 +244,16 @@ impl<'a> PlanBuilder<'a> { let condition: Result, EvaluationError> = expression .iter() .map(|comp| match comp { - OrderExpression::Asc(e) => Ok(Comparator::Asc( - self.build_for_expression(e, variables, graph_name)?, - )), - OrderExpression::Desc(e) => Ok(Comparator::Desc( - self.build_for_expression(e, variables, graph_name)?, - )), + OrderExpression::Asc(e) => { + Ok(Comparator::Asc(self.build_for_expression(e, variables)?)) + } + OrderExpression::Desc(e) => { + Ok(Comparator::Desc(self.build_for_expression(e, variables)?)) + } }) .collect(); PlanNode::Sort { - child: Rc::new(self.build_for_graph_pattern(inner, variables, graph_name)?), + child: Rc::new(self.build_for_graph_pattern(inner, variables)?), by: condition?, } } @@ -271,14 +262,8 @@ impl<'a> PlanBuilder<'a> { variables: projection, } => { let mut inner_variables = projection.clone(); - let inner_graph_name = - Self::convert_pattern_value_id(graph_name, &mut inner_variables); PlanNode::Project { - child: Rc::new(self.build_for_graph_pattern( - inner, - &mut inner_variables, - &inner_graph_name, - )?), + child: Rc::new(self.build_for_graph_pattern(inner, &mut inner_variables)?), mapping: projection .iter() .enumerate() @@ -295,17 +280,17 @@ impl<'a> PlanBuilder<'a> { } } GraphPattern::Distinct { inner } => PlanNode::HashDeduplicate { - child: Rc::new(self.build_for_graph_pattern(inner, variables, graph_name)?), + child: Rc::new(self.build_for_graph_pattern(inner, variables)?), }, GraphPattern::Reduced { inner } => PlanNode::Reduced { - child: Rc::new(self.build_for_graph_pattern(inner, variables, graph_name)?), + child: Rc::new(self.build_for_graph_pattern(inner, variables)?), }, GraphPattern::Slice { inner, start, length, } => { - let mut plan = self.build_for_graph_pattern(inner, variables, graph_name)?; + let mut plan = self.build_for_graph_pattern(inner, variables)?; if *start > 0 { plan = PlanNode::Skip { child: Rc::new(plan), @@ -323,29 +308,6 @@ impl<'a> PlanBuilder<'a> { }) } - fn build_for_bgp<'b>( - &self, - patterns: impl IntoIterator, - variables: &mut Vec, - graph_name: &PatternValue, - ) -> PlanNode { - patterns - .into_iter() - .map(|triple| PlanNode::QuadPattern { - subject: self.pattern_value_from_term_or_variable(&triple.subject, variables), - predicate: self - .pattern_value_from_named_node_or_variable(&triple.predicate, variables), - object: self.pattern_value_from_term_or_variable(&triple.object, variables), - graph_name: graph_name.clone(), - }) - .reduce(|a, b| self.new_join(a, b)) - .unwrap_or_else(|| PlanNode::StaticBindings { - encoded_tuples: vec![EncodedTuple::with_capacity(variables.len())], - variables: Vec::new(), - plain_bindings: vec![Vec::new()], - }) - } - fn build_for_path(&self, path: &PropertyPathExpression) -> PlanPropertyPath { match path { PropertyPathExpression::NamedNode(p) => PlanPropertyPath::Path(PlanTerm { @@ -387,7 +349,6 @@ impl<'a> PlanBuilder<'a> { &self, expression: &Expression, variables: &mut Vec, - graph_name: &PatternValue, ) -> Result { Ok(match expression { Expression::NamedNode(node) => PlanExpression::NamedNode(PlanTerm { @@ -399,382 +360,270 @@ impl<'a> PlanBuilder<'a> { plain: l.clone(), }), Expression::Variable(v) => PlanExpression::Variable(build_plan_variable(variables, v)), - Expression::Or(a, b) => PlanExpression::Or(vec![ - self.build_for_expression(a, variables, graph_name)?, - self.build_for_expression(b, variables, graph_name)?, - ]), - Expression::And(a, b) => PlanExpression::And(vec![ - self.build_for_expression(a, variables, graph_name)?, - self.build_for_expression(b, variables, graph_name)?, - ]), + Expression::Or(inner) => PlanExpression::Or( + inner + .iter() + .map(|e| self.build_for_expression(e, variables)) + .collect::>()?, + ), + Expression::And(inner) => PlanExpression::And( + inner + .iter() + .map(|e| self.build_for_expression(e, variables)) + .collect::>()?, + ), Expression::Equal(a, b) => PlanExpression::Equal( - Box::new(self.build_for_expression(a, variables, graph_name)?), - Box::new(self.build_for_expression(b, variables, graph_name)?), + Box::new(self.build_for_expression(a, variables)?), + Box::new(self.build_for_expression(b, variables)?), ), Expression::SameTerm(a, b) => PlanExpression::SameTerm( - Box::new(self.build_for_expression(a, variables, graph_name)?), - Box::new(self.build_for_expression(b, variables, graph_name)?), + Box::new(self.build_for_expression(a, variables)?), + Box::new(self.build_for_expression(b, variables)?), ), Expression::Greater(a, b) => PlanExpression::Greater( - Box::new(self.build_for_expression(a, variables, graph_name)?), - Box::new(self.build_for_expression(b, variables, graph_name)?), + Box::new(self.build_for_expression(a, variables)?), + Box::new(self.build_for_expression(b, variables)?), ), Expression::GreaterOrEqual(a, b) => PlanExpression::GreaterOrEqual( - Box::new(self.build_for_expression(a, variables, graph_name)?), - Box::new(self.build_for_expression(b, variables, graph_name)?), + Box::new(self.build_for_expression(a, variables)?), + Box::new(self.build_for_expression(b, variables)?), ), Expression::Less(a, b) => PlanExpression::Less( - Box::new(self.build_for_expression(a, variables, graph_name)?), - Box::new(self.build_for_expression(b, variables, graph_name)?), + Box::new(self.build_for_expression(a, variables)?), + Box::new(self.build_for_expression(b, variables)?), ), Expression::LessOrEqual(a, b) => PlanExpression::LessOrEqual( - Box::new(self.build_for_expression(a, variables, graph_name)?), - Box::new(self.build_for_expression(b, variables, graph_name)?), + Box::new(self.build_for_expression(a, variables)?), + Box::new(self.build_for_expression(b, variables)?), ), - Expression::In(e, l) => { - let e = self.build_for_expression(e, variables, graph_name)?; - if l.is_empty() { - // False except on error - PlanExpression::If( - Box::new(e), - Box::new(PlanExpression::Literal(PlanTerm { - encoded: false.into(), - plain: false.into(), - })), - Box::new(PlanExpression::Literal(PlanTerm { - encoded: false.into(), - plain: false.into(), - })), - ) - } else { - PlanExpression::Or( - l.iter() - .map(|v| { - Ok(PlanExpression::Equal( - Box::new(e.clone()), - Box::new(self.build_for_expression(v, variables, graph_name)?), - )) - }) - .collect::>()?, - ) - } - } Expression::Add(a, b) => PlanExpression::Add( - Box::new(self.build_for_expression(a, variables, graph_name)?), - Box::new(self.build_for_expression(b, variables, graph_name)?), + Box::new(self.build_for_expression(a, variables)?), + Box::new(self.build_for_expression(b, variables)?), ), Expression::Subtract(a, b) => PlanExpression::Subtract( - Box::new(self.build_for_expression(a, variables, graph_name)?), - Box::new(self.build_for_expression(b, variables, graph_name)?), + Box::new(self.build_for_expression(a, variables)?), + Box::new(self.build_for_expression(b, variables)?), ), Expression::Multiply(a, b) => PlanExpression::Multiply( - Box::new(self.build_for_expression(a, variables, graph_name)?), - Box::new(self.build_for_expression(b, variables, graph_name)?), + Box::new(self.build_for_expression(a, variables)?), + Box::new(self.build_for_expression(b, variables)?), ), Expression::Divide(a, b) => PlanExpression::Divide( - Box::new(self.build_for_expression(a, variables, graph_name)?), - Box::new(self.build_for_expression(b, variables, graph_name)?), + Box::new(self.build_for_expression(a, variables)?), + Box::new(self.build_for_expression(b, variables)?), ), - Expression::UnaryPlus(e) => PlanExpression::UnaryPlus(Box::new( - self.build_for_expression(e, variables, graph_name)?, - )), - Expression::UnaryMinus(e) => PlanExpression::UnaryMinus(Box::new( - self.build_for_expression(e, variables, graph_name)?, - )), - Expression::Not(e) => PlanExpression::Not(Box::new( - self.build_for_expression(e, variables, graph_name)?, - )), + Expression::UnaryPlus(e) => { + PlanExpression::UnaryPlus(Box::new(self.build_for_expression(e, variables)?)) + } + Expression::UnaryMinus(e) => { + PlanExpression::UnaryMinus(Box::new(self.build_for_expression(e, variables)?)) + } + Expression::Not(e) => { + PlanExpression::Not(Box::new(self.build_for_expression(e, variables)?)) + } Expression::FunctionCall(function, parameters) => match function { - Function::Str => PlanExpression::Str(Box::new(self.build_for_expression( - ¶meters[0], - variables, - graph_name, - )?)), - Function::Lang => PlanExpression::Lang(Box::new(self.build_for_expression( - ¶meters[0], - variables, - graph_name, - )?)), + Function::Str => PlanExpression::Str(Box::new( + self.build_for_expression(¶meters[0], variables)?, + )), + Function::Lang => PlanExpression::Lang(Box::new( + self.build_for_expression(¶meters[0], variables)?, + )), Function::LangMatches => PlanExpression::LangMatches( - Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), - Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), + Box::new(self.build_for_expression(¶meters[0], variables)?), + Box::new(self.build_for_expression(¶meters[1], variables)?), ), Function::Datatype => PlanExpression::Datatype(Box::new( - self.build_for_expression(¶meters[0], variables, graph_name)?, + self.build_for_expression(¶meters[0], variables)?, + )), + Function::Iri => PlanExpression::Iri(Box::new( + self.build_for_expression(¶meters[0], variables)?, )), - Function::Iri => PlanExpression::Iri(Box::new(self.build_for_expression( - ¶meters[0], - variables, - graph_name, - )?)), Function::BNode => PlanExpression::BNode(match parameters.get(0) { - Some(e) => Some(Box::new( - self.build_for_expression(e, variables, graph_name)?, - )), + Some(e) => Some(Box::new(self.build_for_expression(e, variables)?)), None => None, }), Function::Rand => PlanExpression::Rand, - Function::Abs => PlanExpression::Abs(Box::new(self.build_for_expression( - ¶meters[0], - variables, - graph_name, - )?)), - Function::Ceil => PlanExpression::Ceil(Box::new(self.build_for_expression( - ¶meters[0], - variables, - graph_name, - )?)), - Function::Floor => PlanExpression::Floor(Box::new(self.build_for_expression( - ¶meters[0], - variables, - graph_name, - )?)), - Function::Round => PlanExpression::Round(Box::new(self.build_for_expression( - ¶meters[0], - variables, - graph_name, - )?)), + Function::Abs => PlanExpression::Abs(Box::new( + self.build_for_expression(¶meters[0], variables)?, + )), + Function::Ceil => PlanExpression::Ceil(Box::new( + self.build_for_expression(¶meters[0], variables)?, + )), + Function::Floor => PlanExpression::Floor(Box::new( + self.build_for_expression(¶meters[0], variables)?, + )), + Function::Round => PlanExpression::Round(Box::new( + self.build_for_expression(¶meters[0], variables)?, + )), Function::Concat => { - PlanExpression::Concat(self.expression_list(parameters, variables, graph_name)?) + PlanExpression::Concat(self.expression_list(parameters, variables)?) } Function::SubStr => PlanExpression::SubStr( - Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), - Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), + Box::new(self.build_for_expression(¶meters[0], variables)?), + Box::new(self.build_for_expression(¶meters[1], variables)?), match parameters.get(2) { - Some(flags) => Some(Box::new( - self.build_for_expression(flags, variables, graph_name)?, - )), + Some(flags) => Some(Box::new(self.build_for_expression(flags, variables)?)), None => None, }, ), - Function::StrLen => PlanExpression::StrLen(Box::new(self.build_for_expression( - ¶meters[0], - variables, - graph_name, - )?)), + Function::StrLen => PlanExpression::StrLen(Box::new( + self.build_for_expression(¶meters[0], variables)?, + )), Function::Replace => { if let Some(static_regex) = compile_static_pattern_if_exists(¶meters[1], parameters.get(3)) { PlanExpression::StaticReplace( - Box::new(self.build_for_expression( - ¶meters[0], - variables, - graph_name, - )?), + Box::new(self.build_for_expression(¶meters[0], variables)?), static_regex, - Box::new(self.build_for_expression( - ¶meters[2], - variables, - graph_name, - )?), + Box::new(self.build_for_expression(¶meters[2], variables)?), ) } else { PlanExpression::DynamicReplace( - Box::new(self.build_for_expression( - ¶meters[0], - variables, - graph_name, - )?), - Box::new(self.build_for_expression( - ¶meters[1], - variables, - graph_name, - )?), - Box::new(self.build_for_expression( - ¶meters[2], - variables, - graph_name, - )?), + Box::new(self.build_for_expression(¶meters[0], variables)?), + Box::new(self.build_for_expression(¶meters[1], variables)?), + Box::new(self.build_for_expression(¶meters[2], variables)?), match parameters.get(3) { - Some(flags) => Some(Box::new( - self.build_for_expression(flags, variables, graph_name)?, - )), + Some(flags) => { + Some(Box::new(self.build_for_expression(flags, variables)?)) + } None => None, }, ) } } - Function::UCase => PlanExpression::UCase(Box::new(self.build_for_expression( - ¶meters[0], - variables, - graph_name, - )?)), - Function::LCase => PlanExpression::LCase(Box::new(self.build_for_expression( - ¶meters[0], - variables, - graph_name, - )?)), + Function::UCase => PlanExpression::UCase(Box::new( + self.build_for_expression(¶meters[0], variables)?, + )), + Function::LCase => PlanExpression::LCase(Box::new( + self.build_for_expression(¶meters[0], variables)?, + )), Function::EncodeForUri => PlanExpression::EncodeForUri(Box::new( - self.build_for_expression(¶meters[0], variables, graph_name)?, + self.build_for_expression(¶meters[0], variables)?, )), Function::Contains => PlanExpression::Contains( - Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), - Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), + Box::new(self.build_for_expression(¶meters[0], variables)?), + Box::new(self.build_for_expression(¶meters[1], variables)?), ), Function::StrStarts => PlanExpression::StrStarts( - Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), - Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), + Box::new(self.build_for_expression(¶meters[0], variables)?), + Box::new(self.build_for_expression(¶meters[1], variables)?), ), Function::StrEnds => PlanExpression::StrEnds( - Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), - Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), + Box::new(self.build_for_expression(¶meters[0], variables)?), + Box::new(self.build_for_expression(¶meters[1], variables)?), ), Function::StrBefore => PlanExpression::StrBefore( - Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), - Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), + Box::new(self.build_for_expression(¶meters[0], variables)?), + Box::new(self.build_for_expression(¶meters[1], variables)?), ), Function::StrAfter => PlanExpression::StrAfter( - Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), - Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), + Box::new(self.build_for_expression(¶meters[0], variables)?), + Box::new(self.build_for_expression(¶meters[1], variables)?), ), - Function::Year => PlanExpression::Year(Box::new(self.build_for_expression( - ¶meters[0], - variables, - graph_name, - )?)), - Function::Month => PlanExpression::Month(Box::new(self.build_for_expression( - ¶meters[0], - variables, - graph_name, - )?)), - Function::Day => PlanExpression::Day(Box::new(self.build_for_expression( - ¶meters[0], - variables, - graph_name, - )?)), - Function::Hours => PlanExpression::Hours(Box::new(self.build_for_expression( - ¶meters[0], - variables, - graph_name, - )?)), - Function::Minutes => PlanExpression::Minutes(Box::new(self.build_for_expression( - ¶meters[0], - variables, - graph_name, - )?)), - Function::Seconds => PlanExpression::Seconds(Box::new(self.build_for_expression( - ¶meters[0], - variables, - graph_name, - )?)), + Function::Year => PlanExpression::Year(Box::new( + self.build_for_expression(¶meters[0], variables)?, + )), + Function::Month => PlanExpression::Month(Box::new( + self.build_for_expression(¶meters[0], variables)?, + )), + Function::Day => PlanExpression::Day(Box::new( + self.build_for_expression(¶meters[0], variables)?, + )), + Function::Hours => PlanExpression::Hours(Box::new( + self.build_for_expression(¶meters[0], variables)?, + )), + Function::Minutes => PlanExpression::Minutes(Box::new( + self.build_for_expression(¶meters[0], variables)?, + )), + Function::Seconds => PlanExpression::Seconds(Box::new( + self.build_for_expression(¶meters[0], variables)?, + )), Function::Timezone => PlanExpression::Timezone(Box::new( - self.build_for_expression(¶meters[0], variables, graph_name)?, + self.build_for_expression(¶meters[0], variables)?, + )), + Function::Tz => PlanExpression::Tz(Box::new( + self.build_for_expression(¶meters[0], variables)?, )), - Function::Tz => PlanExpression::Tz(Box::new(self.build_for_expression( - ¶meters[0], - variables, - graph_name, - )?)), Function::Now => PlanExpression::Now, Function::Uuid => PlanExpression::Uuid, Function::StrUuid => PlanExpression::StrUuid, - Function::Md5 => PlanExpression::Md5(Box::new(self.build_for_expression( - ¶meters[0], - variables, - graph_name, - )?)), - Function::Sha1 => PlanExpression::Sha1(Box::new(self.build_for_expression( - ¶meters[0], - variables, - graph_name, - )?)), - Function::Sha256 => PlanExpression::Sha256(Box::new(self.build_for_expression( - ¶meters[0], - variables, - graph_name, - )?)), - Function::Sha384 => PlanExpression::Sha384(Box::new(self.build_for_expression( - ¶meters[0], - variables, - graph_name, - )?)), - Function::Sha512 => PlanExpression::Sha512(Box::new(self.build_for_expression( - ¶meters[0], - variables, - graph_name, - )?)), + Function::Md5 => PlanExpression::Md5(Box::new( + self.build_for_expression(¶meters[0], variables)?, + )), + Function::Sha1 => PlanExpression::Sha1(Box::new( + self.build_for_expression(¶meters[0], variables)?, + )), + Function::Sha256 => PlanExpression::Sha256(Box::new( + self.build_for_expression(¶meters[0], variables)?, + )), + Function::Sha384 => PlanExpression::Sha384(Box::new( + self.build_for_expression(¶meters[0], variables)?, + )), + Function::Sha512 => PlanExpression::Sha512(Box::new( + self.build_for_expression(¶meters[0], variables)?, + )), Function::StrLang => PlanExpression::StrLang( - Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), - Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), + Box::new(self.build_for_expression(¶meters[0], variables)?), + Box::new(self.build_for_expression(¶meters[1], variables)?), ), Function::StrDt => PlanExpression::StrDt( - Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), - Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), + Box::new(self.build_for_expression(¶meters[0], variables)?), + Box::new(self.build_for_expression(¶meters[1], variables)?), ), - Function::IsIri => PlanExpression::IsIri(Box::new(self.build_for_expression( - ¶meters[0], - variables, - graph_name, - )?)), - Function::IsBlank => PlanExpression::IsBlank(Box::new(self.build_for_expression( - ¶meters[0], - variables, - graph_name, - )?)), + Function::IsIri => PlanExpression::IsIri(Box::new( + self.build_for_expression(¶meters[0], variables)?, + )), + Function::IsBlank => PlanExpression::IsBlank(Box::new( + self.build_for_expression(¶meters[0], variables)?, + )), Function::IsLiteral => PlanExpression::IsLiteral(Box::new( - self.build_for_expression(¶meters[0], variables, graph_name)?, + self.build_for_expression(¶meters[0], variables)?, )), Function::IsNumeric => PlanExpression::IsNumeric(Box::new( - self.build_for_expression(¶meters[0], variables, graph_name)?, + self.build_for_expression(¶meters[0], variables)?, )), Function::Regex => { if let Some(static_regex) = compile_static_pattern_if_exists(¶meters[1], parameters.get(2)) { PlanExpression::StaticRegex( - Box::new(self.build_for_expression( - ¶meters[0], - variables, - graph_name, - )?), + Box::new(self.build_for_expression(¶meters[0], variables)?), static_regex, ) } else { PlanExpression::DynamicRegex( - Box::new(self.build_for_expression( - ¶meters[0], - variables, - graph_name, - )?), - Box::new(self.build_for_expression( - ¶meters[1], - variables, - graph_name, - )?), + Box::new(self.build_for_expression(¶meters[0], variables)?), + Box::new(self.build_for_expression(¶meters[1], variables)?), match parameters.get(2) { - Some(flags) => Some(Box::new( - self.build_for_expression(flags, variables, graph_name)?, - )), + Some(flags) => { + Some(Box::new(self.build_for_expression(flags, variables)?)) + } None => None, }, ) } } Function::Triple => PlanExpression::Triple( - Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), - Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), - Box::new(self.build_for_expression(¶meters[2], variables, graph_name)?), + Box::new(self.build_for_expression(¶meters[0], variables)?), + Box::new(self.build_for_expression(¶meters[1], variables)?), + Box::new(self.build_for_expression(¶meters[2], variables)?), ), - Function::Subject => PlanExpression::Subject(Box::new(self.build_for_expression( - ¶meters[0], - variables, - graph_name, - )?)), + Function::Subject => PlanExpression::Subject(Box::new( + self.build_for_expression(¶meters[0], variables)?, + )), Function::Predicate => PlanExpression::Predicate(Box::new( - self.build_for_expression(¶meters[0], variables, graph_name)?, + self.build_for_expression(¶meters[0], variables)?, + )), + Function::Object => PlanExpression::Object(Box::new( + self.build_for_expression(¶meters[0], variables)?, )), - Function::Object => PlanExpression::Object(Box::new(self.build_for_expression( - ¶meters[0], - variables, - graph_name, - )?)), Function::IsTriple => PlanExpression::IsTriple(Box::new( - self.build_for_expression(¶meters[0], variables, graph_name)?, + self.build_for_expression(¶meters[0], variables)?, )), Function::Adjust => PlanExpression::Adjust( - Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), - Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), + Box::new(self.build_for_expression(¶meters[0], variables)?), + Box::new(self.build_for_expression(¶meters[1], variables)?), ), Function::Custom(name) => { if self.custom_functions.contains_key(name) { @@ -782,7 +631,7 @@ impl<'a> PlanBuilder<'a> { name.clone(), parameters .iter() - .map(|p| self.build_for_expression(p, variables, graph_name)) + .map(|p| self.build_for_expression(p, variables)) .collect::, EvaluationError>>()?, ) } else if name.as_ref() == xsd::BOOLEAN { @@ -790,7 +639,6 @@ impl<'a> PlanBuilder<'a> { parameters, PlanExpression::BooleanCast, variables, - graph_name, "boolean", )? } else if name.as_ref() == xsd::DOUBLE { @@ -798,23 +646,15 @@ impl<'a> PlanBuilder<'a> { parameters, PlanExpression::DoubleCast, variables, - graph_name, "double", )? } else if name.as_ref() == xsd::FLOAT { - self.build_cast( - parameters, - PlanExpression::FloatCast, - variables, - graph_name, - "float", - )? + self.build_cast(parameters, PlanExpression::FloatCast, variables, "float")? } else if name.as_ref() == xsd::DECIMAL { self.build_cast( parameters, PlanExpression::DecimalCast, variables, - graph_name, "decimal", )? } else if name.as_ref() == xsd::INTEGER { @@ -822,31 +662,17 @@ impl<'a> PlanBuilder<'a> { parameters, PlanExpression::IntegerCast, variables, - graph_name, "integer", )? } else if name.as_ref() == xsd::DATE { - self.build_cast( - parameters, - PlanExpression::DateCast, - variables, - graph_name, - "date", - )? + self.build_cast(parameters, PlanExpression::DateCast, variables, "date")? } else if name.as_ref() == xsd::TIME { - self.build_cast( - parameters, - PlanExpression::TimeCast, - variables, - graph_name, - "time", - )? + self.build_cast(parameters, PlanExpression::TimeCast, variables, "time")? } else if name.as_ref() == xsd::DATE_TIME { self.build_cast( parameters, PlanExpression::DateTimeCast, variables, - graph_name, "dateTime", )? } else if name.as_ref() == xsd::DURATION { @@ -854,7 +680,6 @@ impl<'a> PlanBuilder<'a> { parameters, PlanExpression::DurationCast, variables, - graph_name, "duration", )? } else if name.as_ref() == xsd::YEAR_MONTH_DURATION { @@ -862,7 +687,6 @@ impl<'a> PlanBuilder<'a> { parameters, PlanExpression::YearMonthDurationCast, variables, - graph_name, "yearMonthDuration", )? } else if name.as_ref() == xsd::DAY_TIME_DURATION { @@ -870,7 +694,6 @@ impl<'a> PlanBuilder<'a> { parameters, PlanExpression::DayTimeDurationCast, variables, - graph_name, "dayTimeDuration", )? } else if name.as_ref() == xsd::STRING { @@ -878,32 +701,27 @@ impl<'a> PlanBuilder<'a> { parameters, PlanExpression::StringCast, variables, - graph_name, "string", )? } else { return Err(EvaluationError::msg(format!( - "Not supported custom function {expression}" + "Not supported custom function {name}" ))); } } }, Expression::Bound(v) => PlanExpression::Bound(build_plan_variable(variables, v)), Expression::If(a, b, c) => PlanExpression::If( - Box::new(self.build_for_expression(a, variables, graph_name)?), - Box::new(self.build_for_expression(b, variables, graph_name)?), - Box::new(self.build_for_expression(c, variables, graph_name)?), + Box::new(self.build_for_expression(a, variables)?), + Box::new(self.build_for_expression(b, variables)?), + Box::new(self.build_for_expression(c, variables)?), ), Expression::Exists(n) => { let mut variables = variables.clone(); // Do not expose the exists variables outside - PlanExpression::Exists(Rc::new(self.build_for_graph_pattern( - n, - &mut variables, - graph_name, - )?)) + PlanExpression::Exists(Rc::new(self.build_for_graph_pattern(n, &mut variables)?)) } Expression::Coalesce(parameters) => { - PlanExpression::Coalesce(self.expression_list(parameters, variables, graph_name)?) + PlanExpression::Coalesce(self.expression_list(parameters, variables)?) } }) } @@ -913,15 +731,12 @@ impl<'a> PlanBuilder<'a> { parameters: &[Expression], constructor: impl Fn(Box) -> PlanExpression, variables: &mut Vec, - graph_name: &PatternValue, name: &'static str, ) -> Result { if parameters.len() == 1 { - Ok(constructor(Box::new(self.build_for_expression( - ¶meters[0], - variables, - graph_name, - )?))) + Ok(constructor(Box::new( + self.build_for_expression(¶meters[0], variables)?, + ))) } else { Err(EvaluationError::msg(format!( "The xsd:{name} casting takes only one parameter" @@ -933,42 +748,34 @@ impl<'a> PlanBuilder<'a> { &self, l: &[Expression], variables: &mut Vec, - graph_name: &PatternValue, ) -> Result, EvaluationError> { l.iter() - .map(|e| self.build_for_expression(e, variables, graph_name)) + .map(|e| self.build_for_expression(e, variables)) .collect() } - fn pattern_value_from_term_or_variable( + fn pattern_value_from_ground_term_pattern( &self, - term_or_variable: &TermPattern, + term_pattern: &GroundTermPattern, variables: &mut Vec, ) -> PatternValue { - match term_or_variable { - TermPattern::Variable(variable) => { + match term_pattern { + GroundTermPattern::Variable(variable) => { PatternValue::Variable(build_plan_variable(variables, variable)) } - TermPattern::NamedNode(node) => PatternValue::Constant(PlanTerm { + GroundTermPattern::NamedNode(node) => PatternValue::Constant(PlanTerm { encoded: self.build_term(node), plain: PatternValueConstant::NamedNode(node.clone()), }), - TermPattern::BlankNode(bnode) => { - PatternValue::Variable(build_plan_variable( - variables, - &Variable::new_unchecked(bnode.as_str()), - )) - //TODO: very bad hack to convert bnode to variable - } - TermPattern::Literal(literal) => PatternValue::Constant(PlanTerm { + GroundTermPattern::Literal(literal) => PatternValue::Constant(PlanTerm { encoded: self.build_term(literal), plain: PatternValueConstant::Literal(literal.clone()), }), - TermPattern::Triple(triple) => { + GroundTermPattern::Triple(triple) => { match ( - self.pattern_value_from_term_or_variable(&triple.subject, variables), + self.pattern_value_from_ground_term_pattern(&triple.subject, variables), self.pattern_value_from_named_node_or_variable(&triple.predicate, variables), - self.pattern_value_from_term_or_variable(&triple.object, variables), + self.pattern_value_from_ground_term_pattern(&triple.object, variables), ) { ( PatternValue::Constant(PlanTerm { @@ -1043,40 +850,39 @@ impl<'a> PlanBuilder<'a> { &self, aggregate: &AggregateExpression, variables: &mut Vec, - graph_name: &PatternValue, ) -> Result { match aggregate { AggregateExpression::Count { expr, distinct } => Ok(PlanAggregation { function: PlanAggregationFunction::Count, parameter: match expr { - Some(expr) => Some(self.build_for_expression(expr, variables, graph_name)?), + Some(expr) => Some(self.build_for_expression(expr, variables)?), None => None, }, distinct: *distinct, }), AggregateExpression::Sum { expr, distinct } => Ok(PlanAggregation { function: PlanAggregationFunction::Sum, - parameter: Some(self.build_for_expression(expr, variables, graph_name)?), + parameter: Some(self.build_for_expression(expr, variables)?), distinct: *distinct, }), AggregateExpression::Min { expr, distinct } => Ok(PlanAggregation { function: PlanAggregationFunction::Min, - parameter: Some(self.build_for_expression(expr, variables, graph_name)?), + parameter: Some(self.build_for_expression(expr, variables)?), distinct: *distinct, }), AggregateExpression::Max { expr, distinct } => Ok(PlanAggregation { function: PlanAggregationFunction::Max, - parameter: Some(self.build_for_expression(expr, variables, graph_name)?), + parameter: Some(self.build_for_expression(expr, variables)?), distinct: *distinct, }), AggregateExpression::Avg { expr, distinct } => Ok(PlanAggregation { function: PlanAggregationFunction::Avg, - parameter: Some(self.build_for_expression(expr, variables, graph_name)?), + parameter: Some(self.build_for_expression(expr, variables)?), distinct: *distinct, }), AggregateExpression::Sample { expr, distinct } => Ok(PlanAggregation { function: PlanAggregationFunction::Sample, - parameter: Some(self.build_for_expression(expr, variables, graph_name)?), + parameter: Some(self.build_for_expression(expr, variables)?), distinct: *distinct, }), AggregateExpression::GroupConcat { @@ -1087,7 +893,7 @@ impl<'a> PlanBuilder<'a> { function: PlanAggregationFunction::GroupConcat { separator: Rc::from(separator.as_deref().unwrap_or(" ")), }, - parameter: Some(self.build_for_expression(expr, variables, graph_name)?), + parameter: Some(self.build_for_expression(expr, variables)?), distinct: *distinct, }), AggregateExpression::Custom { .. } => Err(EvaluationError::msg( @@ -1203,348 +1009,6 @@ impl<'a> PlanBuilder<'a> { } } - fn convert_pattern_value_id(from_value: &PatternValue, to: &mut Vec) -> PatternValue { - match from_value { - PatternValue::Constant(c) => PatternValue::Constant(c.clone()), - PatternValue::Variable(from_id) => { - PatternValue::Variable(Self::convert_plan_variable(from_id, to)) - } - PatternValue::TriplePattern(triple) => { - PatternValue::TriplePattern(Box::new(TriplePatternValue { - subject: Self::convert_pattern_value_id(&triple.subject, to), - predicate: Self::convert_pattern_value_id(&triple.predicate, to), - object: Self::convert_pattern_value_id(&triple.object, to), - })) - } - } - } - - fn convert_plan_variable(from_variable: &PlanVariable, to: &mut Vec) -> PlanVariable { - let encoded = if let Some(to_id) = to - .iter() - .enumerate() - .find_map(|(to_id, var)| (*var == from_variable.plain).then_some(to_id)) - { - to_id - } else { - to.push(Variable::new_unchecked(format!("{:x}", random::()))); - to.len() - 1 - }; - PlanVariable { - encoded, - plain: from_variable.plain.clone(), - } - } - - fn can_use_for_loop_left_join(node: &PlanNode) -> bool { - // We forbid MINUS, SERVICE and everything that affects cardinality in for loop left joins - match node { - PlanNode::StaticBindings { .. } - | PlanNode::QuadPattern { .. } - | PlanNode::PathPattern { .. } => true, - PlanNode::Filter { child, .. } - | PlanNode::Extend { child, .. } - | PlanNode::Sort { child, .. } - | PlanNode::Project { child, .. } - | PlanNode::Aggregate { child, .. } => Self::can_use_for_loop_left_join(child), - PlanNode::Union { children } => { - children.iter().all(|c| Self::can_use_for_loop_left_join(c)) - } - PlanNode::HashJoin { left, right } - | PlanNode::ForLoopJoin { left, right } - | PlanNode::ForLoopLeftJoin { left, right, .. } - | PlanNode::HashLeftJoin { left, right, .. } => { - Self::can_use_for_loop_left_join(left) && Self::can_use_for_loop_left_join(right) - } - PlanNode::AntiJoin { .. } - | PlanNode::Service { .. } - | PlanNode::HashDeduplicate { .. } - | PlanNode::Reduced { .. } - | PlanNode::Skip { .. } - | PlanNode::Limit { .. } => false, - } - } - - fn add_left_join_problematic_variables(node: &PlanNode, set: &mut BTreeSet) { - match node { - PlanNode::StaticBindings { .. } - | PlanNode::QuadPattern { .. } - | PlanNode::PathPattern { .. } => (), - PlanNode::Filter { child, expression } => { - let always_already_bound = child.always_bound_variables(); - expression.lookup_used_variables(&mut |v| { - if !always_already_bound.contains(&v) { - set.insert(v); - } - }); - Self::add_left_join_problematic_variables(child, set); - } - PlanNode::Union { children } => { - for child in children.iter() { - Self::add_left_join_problematic_variables(child, set); - } - } - PlanNode::HashJoin { left, right } | PlanNode::ForLoopJoin { left, right } => { - Self::add_left_join_problematic_variables(left, set); - Self::add_left_join_problematic_variables(right, set); - } - PlanNode::AntiJoin { left, .. } => { - Self::add_left_join_problematic_variables(left, set); - } - PlanNode::ForLoopLeftJoin { left, right, .. } => { - Self::add_left_join_problematic_variables(left, set); - right.lookup_used_variables(&mut |v| { - set.insert(v); - }); - } - PlanNode::HashLeftJoin { - left, - right, - expression, - } => { - Self::add_left_join_problematic_variables(left, set); - right.lookup_used_variables(&mut |v| { - set.insert(v); - }); - let always_already_bound = left.always_bound_variables(); - expression.lookup_used_variables(&mut |v| { - if !always_already_bound.contains(&v) { - set.insert(v); - } - }); - } - PlanNode::Extend { - child, expression, .. - } => { - let always_already_bound = child.always_bound_variables(); - expression.lookup_used_variables(&mut |v| { - if !always_already_bound.contains(&v) { - set.insert(v); - } - }); - Self::add_left_join_problematic_variables(child, set); - Self::add_left_join_problematic_variables(child, set); - } - PlanNode::Sort { child, .. } - | PlanNode::HashDeduplicate { child } - | PlanNode::Reduced { child } - | PlanNode::Project { child, .. } => { - Self::add_left_join_problematic_variables(child, set); - } - PlanNode::Skip { child, .. } | PlanNode::Limit { child, .. } => { - // Any variable might affect arity - child.lookup_used_variables(&mut |v| { - set.insert(v); - }) - } - PlanNode::Service { child, silent, .. } => { - if *silent { - child.lookup_used_variables(&mut |v| { - set.insert(v); - }); - } else { - Self::add_left_join_problematic_variables(child, set) - } - } - PlanNode::Aggregate { - key_variables, - aggregates, - .. - } => { - set.extend(key_variables.iter().map(|v| v.encoded)); - //TODO: This is too harsh - for (_, var) in aggregates.iter() { - set.insert(var.encoded); - } - } - } - } - - fn new_join(&self, mut left: PlanNode, mut right: PlanNode) -> PlanNode { - // We first use VALUES to filter the following patterns evaluation - if matches!(right, PlanNode::StaticBindings { .. }) { - swap(&mut left, &mut right); - } - - if self.with_optimizations - && Self::is_fit_for_for_loop_join(&right) - && Self::has_some_common_variables(&left, &right) - { - PlanNode::ForLoopJoin { - left: Rc::new(left), - right: Rc::new(right), - } - } else { - // Let's avoid materializing right if left is already materialized - // TODO: be smarter and reuse already existing materialization - if matches!(left, PlanNode::StaticBindings { .. }) { - swap(&mut left, &mut right); - } - PlanNode::HashJoin { - left: Rc::new(left), - right: Rc::new(right), - } - } - } - - fn has_some_common_variables(left: &PlanNode, right: &PlanNode) -> bool { - left.always_bound_variables() - .intersection(&right.always_bound_variables()) - .next() - .is_some() - } - - fn is_fit_for_for_loop_join(node: &PlanNode) -> bool { - //TODO: think more about it - match node { - PlanNode::StaticBindings { .. } - | PlanNode::QuadPattern { .. } - | PlanNode::PathPattern { .. } => true, - PlanNode::ForLoopJoin { left, right } | PlanNode::HashJoin { left, right } => { - Self::is_fit_for_for_loop_join(left) && Self::is_fit_for_for_loop_join(right) - } - PlanNode::Filter { child, .. } | PlanNode::Extend { child, .. } => { - Self::is_fit_for_for_loop_join(child) - } - PlanNode::Union { children } => { - children.iter().all(|c| Self::is_fit_for_for_loop_join(c)) - } - PlanNode::AntiJoin { .. } - | PlanNode::HashLeftJoin { .. } - | PlanNode::ForLoopLeftJoin { .. } - | PlanNode::Service { .. } - | PlanNode::Sort { .. } - | PlanNode::HashDeduplicate { .. } - | PlanNode::Reduced { .. } - | PlanNode::Skip { .. } - | PlanNode::Limit { .. } - | PlanNode::Project { .. } - | PlanNode::Aggregate { .. } => false, - } - } - - fn push_filter(&self, node: Rc, filter: Box) -> PlanNode { - if !self.with_optimizations { - return PlanNode::Filter { - child: node, - expression: filter, - }; - } - if let PlanExpression::And(filters) = *filter { - return filters - .into_iter() - .fold((*node.as_ref()).clone(), |acc, f| { - self.push_filter(Rc::new(acc), Box::new(f)) - }); - } - let mut filter_variables = BTreeSet::new(); - filter.lookup_used_variables(&mut |v| { - filter_variables.insert(v); - }); - match node.as_ref() { - PlanNode::HashJoin { left, right } => { - if filter_variables.iter().all(|v| left.is_variable_bound(*v)) { - if filter_variables.iter().all(|v| right.is_variable_bound(*v)) { - PlanNode::HashJoin { - left: Rc::new(self.push_filter(Rc::clone(left), filter.clone())), - right: Rc::new(self.push_filter(Rc::clone(right), filter)), - } - } else { - PlanNode::HashJoin { - left: Rc::new(self.push_filter(Rc::clone(left), filter)), - right: Rc::clone(right), - } - } - } else if filter_variables.iter().all(|v| right.is_variable_bound(*v)) { - PlanNode::HashJoin { - left: Rc::clone(left), - right: Rc::new(self.push_filter(Rc::clone(right), filter)), - } - } else { - PlanNode::Filter { - child: Rc::new(PlanNode::HashJoin { - left: Rc::clone(left), - right: Rc::clone(right), - }), - expression: filter, - } - } - } - PlanNode::ForLoopJoin { left, right } => { - if filter_variables.iter().all(|v| left.is_variable_bound(*v)) { - PlanNode::ForLoopJoin { - left: Rc::new(self.push_filter(Rc::clone(left), filter)), - right: Rc::clone(right), - } - } else if filter_variables.iter().all(|v| right.is_variable_bound(*v)) { - PlanNode::ForLoopJoin { - //TODO: should we do that always? - left: Rc::clone(left), - right: Rc::new(self.push_filter(Rc::clone(right), filter)), - } - } else { - PlanNode::Filter { - child: Rc::new(PlanNode::HashJoin { - left: Rc::clone(left), - right: Rc::clone(right), - }), - expression: filter, - } - } - } - PlanNode::Extend { - child, - expression, - variable, - } => { - //TODO: handle the case where the filter generates an expression variable - if filter_variables.iter().all(|v| child.is_variable_bound(*v)) { - PlanNode::Extend { - child: Rc::new(self.push_filter(Rc::clone(child), filter)), - expression: expression.clone(), - variable: variable.clone(), - } - } else { - PlanNode::Filter { - child: Rc::new(PlanNode::Extend { - child: Rc::clone(child), - expression: expression.clone(), - variable: variable.clone(), - }), - expression: filter, - } - } - } - PlanNode::Filter { child, expression } => { - if filter_variables.iter().all(|v| child.is_variable_bound(*v)) { - PlanNode::Filter { - child: Rc::new(self.push_filter(Rc::clone(child), filter)), - expression: expression.clone(), - } - } else { - PlanNode::Filter { - child: Rc::clone(child), - expression: Box::new(PlanExpression::And(vec![ - *expression.clone(), - *filter, - ])), - } - } - } - PlanNode::Union { children } => PlanNode::Union { - children: children - .iter() - .map(|c| Rc::new(self.push_filter(Rc::clone(c), filter.clone()))) - .collect(), - }, - _ => PlanNode::Filter { - //TODO: more? - child: node, - expression: filter, - }, - } - } - fn build_term<'b>(&self, term: impl Into>) -> EncodedTerm { self.dataset.encode_term(term) } @@ -1597,101 +1061,6 @@ fn slice_key(slice: &[T], element: &T) -> Option { None } -fn sort_bgp(p: &[TriplePattern]) -> Vec<&TriplePattern> { - let mut assigned_variables = HashSet::default(); - let mut assigned_blank_nodes = HashSet::default(); - let mut new_p: Vec<_> = p.iter().collect(); - - for i in 0..new_p.len() { - new_p[i..].sort_by(|p1, p2| { - estimate_pattern_cost(p1, &assigned_variables, &assigned_blank_nodes).cmp( - &estimate_pattern_cost(p2, &assigned_variables, &assigned_blank_nodes), - ) - }); - add_pattern_variables(new_p[i], &mut assigned_variables, &mut assigned_blank_nodes); - } - - new_p -} - -fn estimate_pattern_cost( - pattern: &TriplePattern, - assigned_variables: &HashSet<&Variable>, - assigned_blank_nodes: &HashSet<&BlankNode>, -) -> u32 { - let mut count = 0; - match &pattern.subject { - TermPattern::NamedNode(_) | TermPattern::Literal(_) => count += 1, - TermPattern::BlankNode(bnode) => { - if !assigned_blank_nodes.contains(bnode) { - count += 4; - } - } - TermPattern::Variable(v) => { - if !assigned_variables.contains(v) { - count += 4; - } - } - TermPattern::Triple(t) => { - count += estimate_pattern_cost(t, assigned_variables, assigned_blank_nodes) - } - } - if let NamedNodePattern::Variable(v) = &pattern.predicate { - if !assigned_variables.contains(v) { - count += 4; - } - } else { - count += 1; - } - match &pattern.object { - TermPattern::NamedNode(_) | TermPattern::Literal(_) => count += 1, - TermPattern::BlankNode(bnode) => { - if !assigned_blank_nodes.contains(bnode) { - count += 4; - } - } - TermPattern::Variable(v) => { - if !assigned_variables.contains(v) { - count += 4; - } - } - TermPattern::Triple(t) => { - count += estimate_pattern_cost(t, assigned_variables, assigned_blank_nodes) - } - } - count -} - -fn add_pattern_variables<'a>( - pattern: &'a TriplePattern, - variables: &mut HashSet<&'a Variable>, - blank_nodes: &mut HashSet<&'a BlankNode>, -) { - match &pattern.subject { - TermPattern::NamedNode(_) | TermPattern::Literal(_) => (), - TermPattern::BlankNode(bnode) => { - blank_nodes.insert(bnode); - } - TermPattern::Variable(v) => { - variables.insert(v); - } - TermPattern::Triple(t) => add_pattern_variables(t, variables, blank_nodes), - } - if let NamedNodePattern::Variable(v) = &pattern.predicate { - variables.insert(v); - } - match &pattern.object { - TermPattern::NamedNode(_) | TermPattern::Literal(_) => (), - TermPattern::BlankNode(bnode) => { - blank_nodes.insert(bnode); - } - TermPattern::Variable(v) => { - variables.insert(v); - } - TermPattern::Triple(t) => add_pattern_variables(t, variables, blank_nodes), - } -} - fn compile_static_pattern_if_exists( pattern: &Expression, options: Option<&Expression>, diff --git a/testsuite/Cargo.toml b/testsuite/Cargo.toml index 92305371..20579b1f 100644 --- a/testsuite/Cargo.toml +++ b/testsuite/Cargo.toml @@ -16,4 +16,6 @@ anyhow = "1" clap = { version = "4", features = ["derive"] } time = { version = "0.3", features = ["formatting"] } oxigraph = { path = "../lib" } +sparopt = { path = "../lib/sparopt" } +spargebra = { path = "../lib/spargebra" } text-diff = "0.4" diff --git a/testsuite/oxigraph-tests/sparql-optimization/bgp_join_reordering_input.rq b/testsuite/oxigraph-tests/sparql-optimization/bgp_join_reordering_input.rq new file mode 100644 index 00000000..c71b46cb --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/bgp_join_reordering_input.rq @@ -0,0 +1,9 @@ +PREFIX ex: + +SELECT ?s ?o WHERE { + ?m2 ex:p2 ?o . + ?s ex:p1 ?m1 , ?m2 . + ?m1 ex:p2 ?o . + ?s ex:p1prime ?m1 . + ?s a ex:C . +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/bgp_join_reordering_output.rq b/testsuite/oxigraph-tests/sparql-optimization/bgp_join_reordering_output.rq new file mode 100644 index 00000000..040c11e3 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/bgp_join_reordering_output.rq @@ -0,0 +1,10 @@ +PREFIX ex: + +SELECT ?s ?o WHERE { + ?s a ex:C . + ?s ex:p1 ?m1 . + ?s ex:p1prime ?m1 . + ?s ex:p1 ?m2 . + ?m2 ex:p2 ?o . + ?m1 ex:p2 ?o . +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/bind_always_false_input.rq b/testsuite/oxigraph-tests/sparql-optimization/bind_always_false_input.rq new file mode 100644 index 00000000..2ebda3b4 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/bind_always_false_input.rq @@ -0,0 +1,4 @@ +SELECT ?a ?o WHERE { + ?s ?p ?o . + FILTER(BOUND(?a)) +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/bind_always_false_output.rq b/testsuite/oxigraph-tests/sparql-optimization/bind_always_false_output.rq new file mode 100644 index 00000000..493c0c64 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/bind_always_false_output.rq @@ -0,0 +1,3 @@ +SELECT ?a ?o WHERE { + VALUES () {} +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/bind_always_true_input.rq b/testsuite/oxigraph-tests/sparql-optimization/bind_always_true_input.rq new file mode 100644 index 00000000..1230938d --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/bind_always_true_input.rq @@ -0,0 +1,4 @@ +SELECT ?s WHERE { + ?s ?p ?o . + FILTER(BOUND(?s)) +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/bind_always_true_output.rq b/testsuite/oxigraph-tests/sparql-optimization/bind_always_true_output.rq new file mode 100644 index 00000000..89aeaa3d --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/bind_always_true_output.rq @@ -0,0 +1,3 @@ +SELECT ?s WHERE { + ?s ?p ?o +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/empty_union_input.rq b/testsuite/oxigraph-tests/sparql-optimization/empty_union_input.rq new file mode 100644 index 00000000..dc8de6f4 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/empty_union_input.rq @@ -0,0 +1,3 @@ +SELECT ?o WHERE { + { ?s ?p ?o } UNION { VALUES () {} } +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/empty_union_output.rq b/testsuite/oxigraph-tests/sparql-optimization/empty_union_output.rq new file mode 100644 index 00000000..e0da523f --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/empty_union_output.rq @@ -0,0 +1,3 @@ +SELECT ?o WHERE { + ?s ?p ?o . +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/equal_to_same_term_input.rq b/testsuite/oxigraph-tests/sparql-optimization/equal_to_same_term_input.rq new file mode 100644 index 00000000..015a2163 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/equal_to_same_term_input.rq @@ -0,0 +1,5 @@ +SELECT ?s1 ?s2 ?o1 ?o2 WHERE { + ?s1 ?p1 ?o1 . + ?s2 ?p2 ?o2 . + FILTER(?p1 = ?p2) +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/equal_to_same_term_output.rq b/testsuite/oxigraph-tests/sparql-optimization/equal_to_same_term_output.rq new file mode 100644 index 00000000..8e3c96c7 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/equal_to_same_term_output.rq @@ -0,0 +1,5 @@ +SELECT ?s1 ?s2 ?o1 ?o2 WHERE { + ?s1 ?p1 ?o1 . + ?s2 ?p2 ?o2 . + FILTER(sameTerm(?p2, ?p1)) +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/exists_always_false_input.rq b/testsuite/oxigraph-tests/sparql-optimization/exists_always_false_input.rq new file mode 100644 index 00000000..e3615fa7 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/exists_always_false_input.rq @@ -0,0 +1,4 @@ +SELECT ?s WHERE { + ?s ?p ?o . + FILTER(EXISTS { VALUES () {}}) +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/exists_always_false_output.rq b/testsuite/oxigraph-tests/sparql-optimization/exists_always_false_output.rq new file mode 100644 index 00000000..13583411 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/exists_always_false_output.rq @@ -0,0 +1,3 @@ +SELECT ?s WHERE { + VALUES() {} +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/false_and_something_input.rq b/testsuite/oxigraph-tests/sparql-optimization/false_and_something_input.rq new file mode 100644 index 00000000..2123e97a --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/false_and_something_input.rq @@ -0,0 +1,4 @@ +SELECT ?o1 ?o2 WHERE { + ?s ?p ?o1 , ?o2 . + FILTER(false && ?o1 = ?o2) +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/false_and_something_output.rq b/testsuite/oxigraph-tests/sparql-optimization/false_and_something_output.rq new file mode 100644 index 00000000..ba012e0a --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/false_and_something_output.rq @@ -0,0 +1,3 @@ +SELECT ?o1 ?o2 WHERE { + VALUES () {} +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/false_or_something_input.rq b/testsuite/oxigraph-tests/sparql-optimization/false_or_something_input.rq new file mode 100644 index 00000000..3710d7eb --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/false_or_something_input.rq @@ -0,0 +1,4 @@ +SELECT ?o1 ?o2 WHERE { + ?s ?p ?o1 , ?o2 . + FILTER(false || ?o1 = ?o2) +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/false_or_something_output.rq b/testsuite/oxigraph-tests/sparql-optimization/false_or_something_output.rq new file mode 100644 index 00000000..95741032 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/false_or_something_output.rq @@ -0,0 +1,4 @@ +SELECT ?o1 ?o2 WHERE { + ?s ?p ?o1 , ?o2 . + FILTER(?o2 = ?o1) +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/if_always_false_input.rq b/testsuite/oxigraph-tests/sparql-optimization/if_always_false_input.rq new file mode 100644 index 00000000..6c8aa537 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/if_always_false_input.rq @@ -0,0 +1,4 @@ +SELECT ?o1 ?o2 WHERE { + ?s ?p ?o1 , ?o2 . + FILTER(IF(false, ?o1 = ?o2, ?o1 != ?o2)) +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/if_always_false_output.rq b/testsuite/oxigraph-tests/sparql-optimization/if_always_false_output.rq new file mode 100644 index 00000000..c64ec6ea --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/if_always_false_output.rq @@ -0,0 +1,4 @@ +SELECT ?o1 ?o2 WHERE { + ?s ?p ?o1 , ?o2 . + FILTER(?o2 != ?o1) +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/if_always_true_input.rq b/testsuite/oxigraph-tests/sparql-optimization/if_always_true_input.rq new file mode 100644 index 00000000..52a63e4b --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/if_always_true_input.rq @@ -0,0 +1,4 @@ +SELECT ?o1 ?o2 WHERE { + ?s ?p ?o1 , ?o2 . + FILTER(IF(true, ?o1 = ?o2, ?o1 != ?o2)) +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/if_always_true_output.rq b/testsuite/oxigraph-tests/sparql-optimization/if_always_true_output.rq new file mode 100644 index 00000000..95741032 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/if_always_true_output.rq @@ -0,0 +1,4 @@ +SELECT ?o1 ?o2 WHERE { + ?s ?p ?o1 , ?o2 . + FILTER(?o2 = ?o1) +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/manifest.ttl b/testsuite/oxigraph-tests/sparql-optimization/manifest.ttl new file mode 100644 index 00000000..c509fe79 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/manifest.ttl @@ -0,0 +1,132 @@ +@prefix rdf: . +@prefix : . +@prefix rdfs: . +@prefix mf: . +@prefix ox: . + +<> rdf:type mf:Manifest ; + rdfs:label "Oxigraph SPARQL optimization tests" ; + mf:entries + ( + :unbound_filter + :unbound_bind + :something_or_true + :true_or_something + :something_or_false + :false_or_something + :something_and_true + :true_and_something + :something_and_false + :false_and_something + :equal_to_same_term + :bind_always_true + :bind_always_false + :if_always_true + :if_always_false + :exists_always_false + :push_filter + :push_optional_filter + :empty_union + :bgp_join_reordering + ) . + + +:unbound_filter rdf:type ox:QueryOptimizationTest ; + mf:name "unbound variable in filter" ; + mf:action ; + mf:result . + +:unbound_bind rdf:type ox:QueryOptimizationTest ; + mf:name "unbound variable in bindr" ; + mf:action ; + mf:result . + +:something_or_true rdf:type ox:QueryOptimizationTest ; + mf:name "something || true" ; + mf:action ; + mf:result . + +:true_or_something rdf:type ox:QueryOptimizationTest ; + mf:name "true || something" ; + mf:action ; + mf:result . + +:something_or_false rdf:type ox:QueryOptimizationTest ; + mf:name "something || false" ; + mf:action ; + mf:result . + +:false_or_something rdf:type ox:QueryOptimizationTest ; + mf:name "false || something" ; + mf:action ; + mf:result . + +:something_and_true rdf:type ox:QueryOptimizationTest ; + mf:name "something && true" ; + mf:action ; + mf:result . + +:true_and_something rdf:type ox:QueryOptimizationTest ; + mf:name "true && something" ; + mf:action ; + mf:result . + +:something_and_false rdf:type ox:QueryOptimizationTest ; + mf:name "something && false" ; + mf:action ; + mf:result . + +:false_and_something rdf:type ox:QueryOptimizationTest ; + mf:name "false && something" ; + mf:action ; + mf:result . + +:equal_to_same_term a ox:QueryOptimizationTest ; + mf:name "equal to same term" ; + mf:action ; + mf:result . + +:bind_always_true rdf:type ox:QueryOptimizationTest ; + mf:name "BIND() always true" ; + mf:action ; + mf:result . + +:bind_always_false rdf:type ox:QueryOptimizationTest ; + mf:name "BIND() always false" ; + mf:action ; + mf:result . + +:if_always_true rdf:type ox:QueryOptimizationTest ; + mf:name "IF() always true" ; + mf:action ; + mf:result . + +:if_always_false rdf:type ox:QueryOptimizationTest ; + mf:name "IF() always false" ; + mf:action ; + mf:result . + +:exists_always_false rdf:type ox:QueryOptimizationTest ; + mf:name "EXISTS {} always false" ; + mf:action ; + mf:result . + +:push_filter rdf:type ox:QueryOptimizationTest ; + mf:name "push filter down" ; + mf:action ; + mf:result . + +:push_optional_filter rdf:type ox:QueryOptimizationTest ; + mf:name "push OPTIONAL filter down" ; + mf:action ; + mf:result . + +:empty_union rdf:type ox:QueryOptimizationTest ; + mf:name "empty UNION" ; + mf:action ; + mf:result . + +:bgp_join_reordering rdf:type ox:QueryOptimizationTest ; + mf:name "BGP join reordering" ; + mf:action ; + mf:result . diff --git a/testsuite/oxigraph-tests/sparql-optimization/push_filter_input.rq b/testsuite/oxigraph-tests/sparql-optimization/push_filter_input.rq new file mode 100644 index 00000000..58e64a8e --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/push_filter_input.rq @@ -0,0 +1,11 @@ +PREFIX : + +SELECT ?o1 ?o2 ?o4 ?o5 WHERE { + ?s :p1 ?o1 ; :p4 ?o4 ; :p5 ?o5 . + LATERAL { ?s :p2 ?o2 } + MINUS { ?s :p3 ?o3 } + FILTER(?o1 = 1) + FILTER(?o2 = 2) + FILTER(?o4 = 4) + FILTER(?o1 = ?o5) +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/push_filter_output.rq b/testsuite/oxigraph-tests/sparql-optimization/push_filter_output.rq new file mode 100644 index 00000000..3cecb4b3 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/push_filter_output.rq @@ -0,0 +1,18 @@ +PREFIX : + +SELECT ?o1 ?o2 ?o4 ?o5 WHERE { + { + { + { + { ?s :p1 ?o1 FILTER(1 = ?o1) } + LATERAL { ?s :p4 ?o4 } + FILTER(?o4 = 4) + } + LATERAL { ?s :p5 ?o5 } + FILTER(?o5 = ?o1) + } + LATERAL { ?s :p2 ?o2 } + FILTER(?o2 = 2) + } + MINUS { ?s :p3 ?o3 } +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/push_optional_filter_input.rq b/testsuite/oxigraph-tests/sparql-optimization/push_optional_filter_input.rq new file mode 100644 index 00000000..a78e08e3 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/push_optional_filter_input.rq @@ -0,0 +1,5 @@ +SELECT ?s ?o WHERE { + ?s a ?t . + OPTIONAL { { ?s ?p ?o } UNION { ?s ?p ?o2 } FILTER(?o = 1) } + OPTIONAL { { ?s ?p ?o } UNION { ?s ?p2 ?o2 } FILTER(?o = ?t) } +} \ No newline at end of file diff --git a/testsuite/oxigraph-tests/sparql-optimization/push_optional_filter_output.rq b/testsuite/oxigraph-tests/sparql-optimization/push_optional_filter_output.rq new file mode 100644 index 00000000..067f0d87 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/push_optional_filter_output.rq @@ -0,0 +1,5 @@ +SELECT ?s ?o WHERE { + ?s a ?t . + LATERAL { VALUES () {()} OPTIONAL { { ?s ?p ?o FILTER(1 = ?o) } UNION { ?s ?p ?o2 FILTER(1 = ?o) } } } + LATERAL { VALUES () {()} OPTIONAL { { ?s ?p ?o } UNION { ?s ?p2 ?o2 } FILTER(?t = ?o) } } +} \ No newline at end of file diff --git a/testsuite/oxigraph-tests/sparql-optimization/something_and_false_input.rq b/testsuite/oxigraph-tests/sparql-optimization/something_and_false_input.rq new file mode 100644 index 00000000..c99ca675 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/something_and_false_input.rq @@ -0,0 +1,4 @@ +SELECT ?o1 ?o2 WHERE { + ?s ?p ?o1 , ?o2 . + FILTER(?o1 = ?o2 && false) +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/something_and_false_output.rq b/testsuite/oxigraph-tests/sparql-optimization/something_and_false_output.rq new file mode 100644 index 00000000..ba012e0a --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/something_and_false_output.rq @@ -0,0 +1,3 @@ +SELECT ?o1 ?o2 WHERE { + VALUES () {} +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/something_and_true_input.rq b/testsuite/oxigraph-tests/sparql-optimization/something_and_true_input.rq new file mode 100644 index 00000000..de09d711 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/something_and_true_input.rq @@ -0,0 +1,4 @@ +SELECT ?o1 ?o2 WHERE { + ?s ?p ?o1 , ?o2 . + FILTER(?o1 = ?o2 && true) +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/something_and_true_output.rq b/testsuite/oxigraph-tests/sparql-optimization/something_and_true_output.rq new file mode 100644 index 00000000..95741032 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/something_and_true_output.rq @@ -0,0 +1,4 @@ +SELECT ?o1 ?o2 WHERE { + ?s ?p ?o1 , ?o2 . + FILTER(?o2 = ?o1) +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/something_or_false_input.rq b/testsuite/oxigraph-tests/sparql-optimization/something_or_false_input.rq new file mode 100644 index 00000000..495bbd1b --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/something_or_false_input.rq @@ -0,0 +1,4 @@ +SELECT ?o1 ?o2 WHERE { + ?s ?p ?o1 , ?o2 . + FILTER(?o1 = ?o2 || false) +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/something_or_false_output.rq b/testsuite/oxigraph-tests/sparql-optimization/something_or_false_output.rq new file mode 100644 index 00000000..95741032 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/something_or_false_output.rq @@ -0,0 +1,4 @@ +SELECT ?o1 ?o2 WHERE { + ?s ?p ?o1 , ?o2 . + FILTER(?o2 = ?o1) +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/something_or_true_input.rq b/testsuite/oxigraph-tests/sparql-optimization/something_or_true_input.rq new file mode 100644 index 00000000..1a1c3ca6 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/something_or_true_input.rq @@ -0,0 +1,4 @@ +SELECT ?o1 ?o2 WHERE { + ?s ?p ?o1 , ?o2 . + FILTER(?o1 = ?o2 || true) +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/something_or_true_output.rq b/testsuite/oxigraph-tests/sparql-optimization/something_or_true_output.rq new file mode 100644 index 00000000..7af27485 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/something_or_true_output.rq @@ -0,0 +1,3 @@ +SELECT ?o1 ?o2 WHERE { + ?s ?p ?o1 , ?o2 . +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/true_and_something_input.rq b/testsuite/oxigraph-tests/sparql-optimization/true_and_something_input.rq new file mode 100644 index 00000000..3e34f239 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/true_and_something_input.rq @@ -0,0 +1,4 @@ +SELECT ?o1 ?o2 WHERE { + ?s ?p ?o1 , ?o2 . + FILTER(true && ?o1 = ?o2) +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/true_and_something_output.rq b/testsuite/oxigraph-tests/sparql-optimization/true_and_something_output.rq new file mode 100644 index 00000000..95741032 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/true_and_something_output.rq @@ -0,0 +1,4 @@ +SELECT ?o1 ?o2 WHERE { + ?s ?p ?o1 , ?o2 . + FILTER(?o2 = ?o1) +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/true_or_something_input.rq b/testsuite/oxigraph-tests/sparql-optimization/true_or_something_input.rq new file mode 100644 index 00000000..a47c54f7 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/true_or_something_input.rq @@ -0,0 +1,4 @@ +SELECT ?o1 ?o2 WHERE { + ?s ?p ?o1 , ?o2 . + FILTER(true || ?o1 = ?o2) +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/true_or_something_output.rq b/testsuite/oxigraph-tests/sparql-optimization/true_or_something_output.rq new file mode 100644 index 00000000..7af27485 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/true_or_something_output.rq @@ -0,0 +1,3 @@ +SELECT ?o1 ?o2 WHERE { + ?s ?p ?o1 , ?o2 . +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/unbound_bind_input.rq b/testsuite/oxigraph-tests/sparql-optimization/unbound_bind_input.rq new file mode 100644 index 00000000..25808725 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/unbound_bind_input.rq @@ -0,0 +1,3 @@ +SELECT ?o WHERE { + BIND(?a AS ?o) +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/unbound_bind_output.rq b/testsuite/oxigraph-tests/sparql-optimization/unbound_bind_output.rq new file mode 100644 index 00000000..7ef17038 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/unbound_bind_output.rq @@ -0,0 +1,3 @@ +SELECT ?o WHERE { + VALUES () { () } +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/unbound_filter_input.rq b/testsuite/oxigraph-tests/sparql-optimization/unbound_filter_input.rq new file mode 100644 index 00000000..273ed694 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/unbound_filter_input.rq @@ -0,0 +1,4 @@ +SELECT ?o WHERE { + ?s ?p ?o . + FILTER(?a) +} diff --git a/testsuite/oxigraph-tests/sparql-optimization/unbound_filter_output.rq b/testsuite/oxigraph-tests/sparql-optimization/unbound_filter_output.rq new file mode 100644 index 00000000..36ac8cf8 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-optimization/unbound_filter_output.rq @@ -0,0 +1,3 @@ +SELECT ?o WHERE { + VALUES () {} +} diff --git a/testsuite/src/sparql_evaluator.rs b/testsuite/src/sparql_evaluator.rs index 4790d175..dc68dff4 100644 --- a/testsuite/src/sparql_evaluator.rs +++ b/testsuite/src/sparql_evaluator.rs @@ -8,6 +8,7 @@ use oxigraph::model::vocab::*; use oxigraph::model::*; use oxigraph::sparql::*; use oxigraph::store::Store; +use sparopt::Optimizer; use std::collections::HashMap; use std::fmt::Write; use std::io::{self, Cursor}; @@ -67,6 +68,10 @@ pub fn register_sparql_tests(evaluator: &mut TestEvaluator) { "https://github.com/oxigraph/oxigraph/tests#NegativeTsvResultsSyntaxTest", evaluate_negative_tsv_result_syntax_test, ); + evaluator.register( + "https://github.com/oxigraph/oxigraph/tests#QueryOptimizationTest", + evaluate_query_optimization_test, + ); } fn evaluate_positive_syntax_test(test: &Test) -> Result<()> { @@ -717,3 +722,54 @@ fn load_dataset_to_store(url: &str, store: &Store) -> Result<()> { }?; Ok(()) } + +fn evaluate_query_optimization_test(test: &Test) -> Result<()> { + let action = test + .action + .as_deref() + .ok_or_else(|| anyhow!("No action found for test {test}"))?; + let actual = (&Optimizer::optimize_graph_pattern( + (&if let spargebra::Query::Select { pattern, .. } = + spargebra::Query::parse(&read_file_to_string(action)?, Some(action))? + { + pattern + } else { + bail!("Only SELECT queries are supported in query sparql-optimization tests") + }) + .into(), + )) + .into(); + let result = test + .result + .as_ref() + .ok_or_else(|| anyhow!("No tests result found"))?; + let expected = if let spargebra::Query::Select { pattern, .. } = + spargebra::Query::parse(&read_file_to_string(result)?, Some(result))? + { + pattern + } else { + bail!("Only SELECT queries are supported in query sparql-optimization tests") + }; + if expected == actual { + Ok(()) + } else { + bail!( + "Failure on {test}.\nDiff:\n{}\n", + format_diff( + &spargebra::Query::Select { + pattern: expected, + dataset: None, + base_iri: None + } + .to_sse(), + &spargebra::Query::Select { + pattern: actual, + dataset: None, + base_iri: None + } + .to_sse(), + "query" + ) + ) + } +} diff --git a/testsuite/tests/oxigraph.rs b/testsuite/tests/oxigraph.rs index 04360a9e..a37bfab2 100644 --- a/testsuite/tests/oxigraph.rs +++ b/testsuite/tests/oxigraph.rs @@ -3,10 +3,10 @@ use oxigraph_testsuite::evaluator::TestEvaluator; use oxigraph_testsuite::manifest::TestManifest; use oxigraph_testsuite::sparql_evaluator::register_sparql_tests; -fn run_testsuite(manifest_urls: Vec<&str>) -> Result<()> { +fn run_testsuite(manifest_url: &str) -> Result<()> { let mut evaluator = TestEvaluator::default(); register_sparql_tests(&mut evaluator); - let manifest = TestManifest::new(manifest_urls); + let manifest = TestManifest::new([manifest_url]); let results = evaluator.evaluate(manifest)?; let mut errors = Vec::default(); @@ -27,14 +27,15 @@ fn run_testsuite(manifest_urls: Vec<&str>) -> Result<()> { #[test] fn oxigraph_sparql_testsuite() -> Result<()> { - run_testsuite(vec![ - "https://github.com/oxigraph/oxigraph/tests/sparql/manifest.ttl", - ]) + run_testsuite("https://github.com/oxigraph/oxigraph/tests/sparql/manifest.ttl") } #[test] fn oxigraph_sparql_results_testsuite() -> Result<()> { - run_testsuite(vec![ - "https://github.com/oxigraph/oxigraph/tests/sparql-results/manifest.ttl", - ]) + run_testsuite("https://github.com/oxigraph/oxigraph/tests/sparql-results/manifest.ttl") +} + +#[test] +fn oxigraph_optimizer_testsuite() -> Result<()> { + run_testsuite("https://github.com/oxigraph/oxigraph/tests/sparql-optimization/manifest.ttl") } diff --git a/testsuite/tests/sparql.rs b/testsuite/tests/sparql.rs index ca59e397..d058bd71 100644 --- a/testsuite/tests/sparql.rs +++ b/testsuite/tests/sparql.rs @@ -67,6 +67,7 @@ fn sparql10_w3c_query_evaluation_testsuite() -> Result<()> { // We choose to simplify first the nested group patterns in OPTIONAL "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional-filter/manifest#dawg-optional-filter-005-not-simplified", // This test relies on naive iteration on the input file + "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/reduced/manifest#reduced-1", "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/reduced/manifest#reduced-2" ]) }