From be4a5b0b6bb36682a344540ea6f1a369bb63f972 Mon Sep 17 00:00:00 2001 From: Tpt Date: Tue, 30 Mar 2021 19:01:31 +0200 Subject: [PATCH] Moves SPARQL parser and algebra to an independent crate --- Cargo.toml | 1 + lib/Cargo.toml | 3 +- lib/src/sparql/algebra.rs | 1813 +------------------ lib/src/sparql/eval.rs | 14 +- lib/src/sparql/mod.rs | 47 +- lib/src/sparql/model.rs | 6 - lib/src/sparql/plan.rs | 2 +- lib/src/sparql/plan_builder.rs | 101 +- lib/src/sparql/update.rs | 280 +-- spargebra/Cargo.toml | 19 + spargebra/README.md | 40 + spargebra/src/algebra.rs | 1331 ++++++++++++++ spargebra/src/lib.rs | 36 + {lib/src/sparql => spargebra/src}/parser.rs | 296 +-- spargebra/src/query.rs | 181 ++ spargebra/src/term.rs | 426 +++++ spargebra/src/update.rs | 199 ++ testsuite/tests/sparql.rs | 7 +- 18 files changed, 2747 insertions(+), 2055 deletions(-) create mode 100644 spargebra/Cargo.toml create mode 100644 spargebra/README.md create mode 100644 spargebra/src/algebra.rs create mode 100644 spargebra/src/lib.rs rename {lib/src/sparql => spargebra/src}/parser.rs (89%) create mode 100644 spargebra/src/query.rs create mode 100644 spargebra/src/term.rs create mode 100644 spargebra/src/update.rs diff --git a/Cargo.toml b/Cargo.toml index 62e4bb84..27e4311d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,6 +3,7 @@ members = [ "lib", "python", "server", + "spargebra", "testsuite", "wikibase" ] diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 75a479f7..920bd531 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -37,13 +37,14 @@ rio_turtle = "0.5" rio_xml = "0.5" hex = "0.4" nom = "6" -peg = "0.7" siphasher = "0.3" lasso = {version="0.5", features=["multi-threaded", "inline-more"]} sophia_api = { version = "0.6.2", optional = true } http = "0.2" httparse = { version = "1", optional = true } native-tls = { version = "0.2", optional = true } +spargebra = { version = "0.1", path="../spargebra" } + [dev-dependencies] rayon = "1" diff --git a/lib/src/sparql/algebra.rs b/lib/src/sparql/algebra.rs index f7e415fb..44f17ae2 100644 --- a/lib/src/sparql/algebra.rs +++ b/lib/src/sparql/algebra.rs @@ -5,14 +5,9 @@ //! Warning: this implementation is an unstable work in progress use crate::model::*; -use crate::sparql::model::*; -use crate::sparql::parser::{parse_query, parse_update, ParseError}; -use oxiri::Iri; -use rio_api::model as rio; -use std::collections::BTreeSet; +use spargebra::GraphUpdateOperation; use std::convert::TryFrom; use std::fmt; -use std::rc::Rc; use std::str::FromStr; /// A parsed [SPARQL query](https://www.w3.org/TR/sparql11-query/) @@ -27,175 +22,69 @@ use std::str::FromStr; /// assert_eq!(query.to_string(), query_str); /// /// // We edit the query dataset specification -/// query.dataset_mut().set_default_graph(vec![NamedNode::new("http://example.com").unwrap().into()]); -/// assert_eq!(query.to_string(), "SELECT ?s ?p ?o FROM WHERE { ?s ?p ?o . }"); +/// let default = vec![NamedNode::new("http://example.com")?.into()]; +/// query.dataset_mut().set_default_graph(default.clone()); +/// assert_eq!(query.dataset().default_graph_graphs(), Some(default.as_slice())); /// # Result::Ok::<_, Box>(()) /// ``` #[derive(Eq, PartialEq, Debug, Clone, Hash)] -pub enum Query { - /// [SELECT](https://www.w3.org/TR/sparql11-query/#select) - Select { - /// The [query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset) - dataset: QueryDataset, - /// The query selection graph pattern - pattern: GraphPattern, - /// The query base IRI - base_iri: Option>, - }, - /// [CONSTRUCT](https://www.w3.org/TR/sparql11-query/#construct) - Construct { - /// The query construction template - template: Vec, - /// The [query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset) - dataset: QueryDataset, - /// The query selection graph pattern - pattern: GraphPattern, - /// The query base IRI - base_iri: Option>, - }, - /// [DESCRIBE](https://www.w3.org/TR/sparql11-query/#describe) - Describe { - /// The [query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset) - dataset: QueryDataset, - /// The query selection graph pattern - pattern: GraphPattern, - /// The query base IRI - base_iri: Option>, - }, - /// [ASK](https://www.w3.org/TR/sparql11-query/#ask) - Ask { - /// The [query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset) - dataset: QueryDataset, - /// The query selection graph pattern - pattern: Rc, - /// The query base IRI - base_iri: Option>, - }, +pub struct Query { + pub(super) inner: spargebra::Query, + pub(super) dataset: QueryDataset, } impl Query { /// Parses a SPARQL query with an optional base IRI to resolve relative IRIs in the query - pub fn parse(query: &str, base_iri: Option<&str>) -> Result { - parse_query(query, base_iri) + pub fn parse(query: &str, base_iri: Option<&str>) -> Result { + let query = spargebra::Query::parse(query, base_iri)?; + Ok(Self { + dataset: QueryDataset::from_algebra(match &query { + spargebra::Query::Select { dataset, .. } => dataset, + spargebra::Query::Construct { dataset, .. } => dataset, + spargebra::Query::Describe { dataset, .. } => dataset, + spargebra::Query::Ask { dataset, .. } => dataset, + }), + inner: query, + }) } /// Returns [the query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset) pub fn dataset(&self) -> &QueryDataset { - match self { - Query::Select { dataset, .. } => dataset, - Query::Construct { dataset, .. } => dataset, - Query::Describe { dataset, .. } => dataset, - Query::Ask { dataset, .. } => dataset, - } + &self.dataset } /// Returns [the query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset) pub fn dataset_mut(&mut self) -> &mut QueryDataset { - match self { - Query::Select { dataset, .. } => dataset, - Query::Construct { dataset, .. } => dataset, - Query::Describe { dataset, .. } => dataset, - Query::Ask { dataset, .. } => dataset, - } + &mut self.dataset } } impl fmt::Display for Query { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Query::Select { - dataset, - pattern, - base_iri, - } => { - if let Some(base_iri) = base_iri { - writeln!(f, "BASE <{}>", base_iri)?; - } - write!(f, "{}", SparqlGraphRootPattern { pattern, dataset }) - } - Query::Construct { - template, - dataset, - pattern, - base_iri, - } => { - if let Some(base_iri) = base_iri { - writeln!(f, "BASE <{}>", base_iri)?; - } - write!(f, "CONSTRUCT {{ ")?; - for triple in template.iter() { - write!(f, "{} ", SparqlTriplePattern(triple))?; - } - write!( - f, - "}}{} WHERE {{ {} }}", - dataset, - SparqlGraphRootPattern { - pattern, - dataset: &QueryDataset::default() - } - ) - } - Query::Describe { - dataset, - pattern, - base_iri, - } => { - if let Some(base_iri) = base_iri { - writeln!(f, "BASE <{}>", base_iri.as_str())?; - } - write!( - f, - "DESCRIBE *{} WHERE {{ {} }}", - dataset, - SparqlGraphRootPattern { - pattern, - dataset: &QueryDataset::default() - } - ) - } - Query::Ask { - dataset, - pattern, - base_iri, - } => { - if let Some(base_iri) = base_iri { - writeln!(f, "BASE <{}>", base_iri)?; - } - write!( - f, - "ASK{} WHERE {{ {} }}", - dataset, - SparqlGraphRootPattern { - pattern, - dataset: &QueryDataset::default() - } - ) - } - } + self.inner.fmt(f) //TODO: override } } impl FromStr for Query { - type Err = ParseError; + type Err = spargebra::ParseError; - fn from_str(query: &str) -> Result { + fn from_str(query: &str) -> Result { Self::parse(query, None) } } impl<'a> TryFrom<&'a str> for Query { - type Error = ParseError; + type Error = spargebra::ParseError; - fn try_from(query: &str) -> Result { + fn try_from(query: &str) -> Result { Self::from_str(query) } } impl<'a> TryFrom<&'a String> for Query { - type Error = ParseError; + type Error = spargebra::ParseError; - fn try_from(query: &String) -> Result { + fn try_from(query: &String) -> Result { Self::from_str(query) } } @@ -213,1442 +102,71 @@ impl<'a> TryFrom<&'a String> for Query { /// ``` #[derive(Eq, PartialEq, Debug, Clone, Hash)] pub struct Update { - /// The update base IRI - pub(super) base_iri: Option>, - /// The [update operations](https://www.w3.org/TR/sparql11-update/#formalModelGraphUpdate) - pub(super) operations: Vec, + pub(super) inner: spargebra::Update, + pub(super) using_datasets: Vec>, } impl Update { /// Parses a SPARQL update with an optional base IRI to resolve relative IRIs in the query - pub fn parse(update: &str, base_iri: Option<&str>) -> Result { - parse_update(update, base_iri) + pub fn parse(update: &str, base_iri: Option<&str>) -> Result { + let update = spargebra::Update::parse(update, base_iri)?; + Ok(Self { + using_datasets: update + .operations + .iter() + .map(|operation| { + if let GraphUpdateOperation::DeleteInsert { using, .. } = operation { + Some(QueryDataset::from_algebra(using)) + } else { + None + } + }) + .collect(), + inner: update, + }) } /// Returns [the query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset) in [DELETE/INSERT operations](https://www.w3.org/TR/sparql11-update/#deleteInsert). pub fn using_datasets(&self) -> impl Iterator { - self.operations.iter().filter_map(|operation| { - if let GraphUpdateOperation::DeleteInsert { using, .. } = operation { - Some(using) - } else { - None - } - }) + self.using_datasets.iter().filter_map(|q| q.as_ref()) } /// Returns [the query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset) in [DELETE/INSERT operations](https://www.w3.org/TR/sparql11-update/#deleteInsert). pub fn using_datasets_mut(&mut self) -> impl Iterator { - self.operations.iter_mut().filter_map(|operation| { - if let GraphUpdateOperation::DeleteInsert { using, .. } = operation { - Some(using) - } else { - None - } - }) + self.using_datasets.iter_mut().filter_map(|q| q.as_mut()) } } impl fmt::Display for Update { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if let Some(base_iri) = &self.base_iri { - writeln!(f, "BASE <{}>", base_iri)?; - } - for update in &self.operations { - writeln!(f, "{} ;", update)?; - } - Ok(()) + self.inner.fmt(f) } } impl FromStr for Update { - type Err = ParseError; + type Err = spargebra::ParseError; - fn from_str(update: &str) -> Result { + fn from_str(update: &str) -> Result { Self::parse(update, None) } } impl<'a> TryFrom<&'a str> for Update { - type Error = ParseError; + type Error = spargebra::ParseError; - fn try_from(update: &str) -> Result { + fn try_from(update: &str) -> Result { Self::from_str(update) } } impl<'a> TryFrom<&'a String> for Update { - type Error = ParseError; + type Error = spargebra::ParseError; - fn try_from(update: &String) -> Result { + fn try_from(update: &String) -> Result { Self::from_str(update) } } -/// The union of [`NamedNode`]s and [`Variable`]s -#[derive(Eq, PartialEq, Debug, Clone, Hash)] -pub enum NamedNodeOrVariable { - NamedNode(NamedNode), - Variable(Variable), -} - -impl fmt::Display for NamedNodeOrVariable { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - NamedNodeOrVariable::NamedNode(node) => node.fmt(f), - NamedNodeOrVariable::Variable(var) => var.fmt(f), - } - } -} - -impl From for NamedNodeOrVariable { - fn from(node: NamedNode) -> Self { - NamedNodeOrVariable::NamedNode(node) - } -} - -impl From for NamedNodeOrVariable { - fn from(var: Variable) -> Self { - NamedNodeOrVariable::Variable(var) - } -} - -/// The union of [`Term`]s and [`Variable`]s -#[derive(Eq, PartialEq, Debug, Clone, Hash)] -pub enum TermOrVariable { - Term(Term), - Variable(Variable), -} - -impl fmt::Display for TermOrVariable { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - TermOrVariable::Term(term) => term.fmt(f), - TermOrVariable::Variable(var) => var.fmt(f), - } - } -} - -impl From for TermOrVariable { - fn from(node: NamedNode) -> Self { - TermOrVariable::Term(node.into()) - } -} - -impl From for TermOrVariable { - fn from(node: BlankNode) -> Self { - TermOrVariable::Term(node.into()) - } -} - -impl From for TermOrVariable { - fn from(literal: Literal) -> Self { - TermOrVariable::Term(literal.into()) - } -} - -impl From for TermOrVariable { - fn from(var: Variable) -> Self { - TermOrVariable::Variable(var) - } -} - -impl From for TermOrVariable { - fn from(term: Term) -> Self { - TermOrVariable::Term(term) - } -} - -impl From for TermOrVariable { - fn from(element: NamedNodeOrVariable) -> Self { - match element { - NamedNodeOrVariable::NamedNode(node) => TermOrVariable::Term(node.into()), - NamedNodeOrVariable::Variable(var) => TermOrVariable::Variable(var), - } - } -} - -/// A [triple pattern](https://www.w3.org/TR/sparql11-query/#defn_TriplePattern) -#[derive(Eq, PartialEq, Debug, Clone, Hash)] -pub struct TriplePattern { - pub subject: TermOrVariable, - pub predicate: NamedNodeOrVariable, - pub object: TermOrVariable, -} - -impl TriplePattern { - pub(crate) fn new( - subject: impl Into, - predicate: impl Into, - object: impl Into, - ) -> Self { - Self { - subject: subject.into(), - predicate: predicate.into(), - object: object.into(), - } - } -} - -impl fmt::Display for TriplePattern { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "(triple {} {} {})", - self.subject, self.predicate, self.object - ) - } -} - -struct SparqlTriplePattern<'a>(&'a TriplePattern); - -impl<'a> fmt::Display for SparqlTriplePattern<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "{} {} {} .", - self.0.subject, self.0.predicate, self.0.object - ) - } -} - -/// A [triple pattern](https://www.w3.org/TR/sparql11-query/#defn_TriplePattern) in a specific graph -#[derive(Eq, PartialEq, Debug, Clone, Hash)] -pub struct QuadPattern { - pub subject: TermOrVariable, - pub predicate: NamedNodeOrVariable, - pub object: TermOrVariable, - pub graph_name: Option, -} - -impl QuadPattern { - pub(crate) fn new( - subject: impl Into, - predicate: impl Into, - object: impl Into, - graph_name: Option, - ) -> Self { - Self { - subject: subject.into(), - predicate: predicate.into(), - object: object.into(), - graph_name, - } - } -} - -impl fmt::Display for QuadPattern { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if let Some(graph_name) = &self.graph_name { - write!( - f, - "(graph {} (triple {} {} {}))", - graph_name, self.subject, self.predicate, self.object - ) - } else { - write!( - f, - "(triple {} {} {})", - self.subject, self.predicate, self.object - ) - } - } -} - -struct SparqlQuadPattern<'a>(&'a QuadPattern); - -impl<'a> fmt::Display for SparqlQuadPattern<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if let Some(graph_name) = &self.0.graph_name { - write!( - f, - "GRAPH {} {{ {} {} {} }}", - graph_name, self.0.subject, self.0.predicate, self.0.object - ) - } else { - write!( - f, - "{} {} {} .", - self.0.subject, self.0.predicate, self.0.object - ) - } - } -} - -/// A [property path expression](https://www.w3.org/TR/sparql11-query/#defn_PropertyPathExpr) -#[derive(Eq, PartialEq, Debug, Clone, Hash)] -pub enum PropertyPathExpression { - NamedNode(NamedNode), - Reverse(Box), - Sequence(Box, Box), - Alternative(Box, Box), - ZeroOrMore(Box), - OneOrMore(Box), - ZeroOrOne(Box), - NegatedPropertySet(Vec), -} - -impl fmt::Display for PropertyPathExpression { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - PropertyPathExpression::NamedNode(p) => p.fmt(f), - PropertyPathExpression::Reverse(p) => write!(f, "(reverse {})", p), - PropertyPathExpression::Alternative(a, b) => write!(f, "(alt {} {})", a, b), - PropertyPathExpression::Sequence(a, b) => write!(f, "(seq {} {})", a, b), - PropertyPathExpression::ZeroOrMore(p) => write!(f, "(path* {})", p), - PropertyPathExpression::OneOrMore(p) => write!(f, "(path+ {})", p), - PropertyPathExpression::ZeroOrOne(p) => write!(f, "(path? {})", p), - PropertyPathExpression::NegatedPropertySet(p) => { - write!(f, "(notoneof ")?; - for p in p { - write!(f, " {}", p)?; - } - write!(f, ")") - } - } - } -} - -struct SparqlPropertyPath<'a>(&'a PropertyPathExpression); - -impl<'a> fmt::Display for SparqlPropertyPath<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self.0 { - PropertyPathExpression::NamedNode(p) => p.fmt(f), - PropertyPathExpression::Reverse(p) => write!(f, "^{}", SparqlPropertyPath(&*p)), - PropertyPathExpression::Sequence(a, b) => write!( - f, - "({} / {})", - SparqlPropertyPath(&*a), - SparqlPropertyPath(&*b) - ), - PropertyPathExpression::Alternative(a, b) => write!( - f, - "({} | {})", - SparqlPropertyPath(&*a), - SparqlPropertyPath(&*b) - ), - PropertyPathExpression::ZeroOrMore(p) => write!(f, "{}*", SparqlPropertyPath(&*p)), - PropertyPathExpression::OneOrMore(p) => write!(f, "{}+", SparqlPropertyPath(&*p)), - PropertyPathExpression::ZeroOrOne(p) => write!(f, "{}?", SparqlPropertyPath(&*p)), - PropertyPathExpression::NegatedPropertySet(p) => write!( - f, - "!({})", - p.iter() - .map(|v| v.to_string()) - .collect::>() - .join(" | ") - ), - } - } -} - -impl From for PropertyPathExpression { - fn from(p: NamedNode) -> Self { - PropertyPathExpression::NamedNode(p) - } -} - -/// An [expression](https://www.w3.org/TR/sparql11-query/#expressions) -#[derive(Eq, PartialEq, Debug, Clone, Hash)] -pub enum Expression { - NamedNode(NamedNode), - Literal(Literal), - Variable(Variable), - /// [Logical-or](https://www.w3.org/TR/sparql11-query/#func-logical-or) - Or(Box, Box), - /// [Logical-and](https://www.w3.org/TR/sparql11-query/#func-logical-and) - And(Box, Box), - /// [RDFterm-equal](https://www.w3.org/TR/sparql11-query/#func-RDFterm-equal) and all the XSD equalities - Equal(Box, Box), - /// [sameTerm](https://www.w3.org/TR/sparql11-query/#func-sameTerm) - SameTerm(Box, Box), - /// [op:numeric-greater-than](https://www.w3.org/TR/xpath-functions/#func-numeric-greater-than) and other XSD greater than operators - Greater(Box, Box), - GreaterOrEqual(Box, Box), - /// [op:numeric-less-than](https://www.w3.org/TR/xpath-functions/#func-numeric-less-than) and other XSD greater than operators - Less(Box, Box), - LessOrEqual(Box, Box), - /// [IN](https://www.w3.org/TR/sparql11-query/#func-in) - In(Box, Vec), - /// [op:numeric-add](https://www.w3.org/TR/xpath-functions/#func-numeric-add) and other XSD additions - Add(Box, Box), - /// [op:numeric-subtract](https://www.w3.org/TR/xpath-functions/#func-numeric-subtract) and other XSD subtractions - Subtract(Box, Box), - /// [op:numeric-multiply](https://www.w3.org/TR/xpath-functions/#func-numeric-multiply) and other XSD multiplications - Multiply(Box, Box), - /// [op:numeric-divide](https://www.w3.org/TR/xpath-functions/#func-numeric-divide) and other XSD divides - Divide(Box, Box), - /// [op:numeric-unary-plus](https://www.w3.org/TR/xpath-functions/#func-numeric-unary-plus) and other XSD unary plus - UnaryPlus(Box), - /// [op:numeric-unary-minus](https://www.w3.org/TR/xpath-functions/#func-numeric-unary-minus) and other XSD unary minus - UnaryMinus(Box), - /// [fn:not](https://www.w3.org/TR/xpath-functions/#func-not) - Not(Box), - /// [EXISTS](https://www.w3.org/TR/sparql11-query/#func-filter-exists) - Exists(Box), - /// [BOUND](https://www.w3.org/TR/sparql11-query/#func-bound) - Bound(Variable), - /// [IF](https://www.w3.org/TR/sparql11-query/#func-if) - If(Box, Box, Box), - /// [COALESCE](https://www.w3.org/TR/sparql11-query/#func-coalesce) - Coalesce(Vec), - /// A regular function call - FunctionCall(Function, Vec), -} - -impl fmt::Display for Expression { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Expression::NamedNode(node) => node.fmt(f), - Expression::Literal(l) => l.fmt(f), - Expression::Variable(var) => var.fmt(f), - Expression::Or(a, b) => write!(f, "(|| {} {})", a, b), - Expression::And(a, b) => write!(f, "(&& {} {})", a, b), - Expression::Equal(a, b) => write!(f, "(= {} {})", a, b), - Expression::SameTerm(a, b) => write!(f, "(sameTerm {} {})", a, b), - Expression::Greater(a, b) => write!(f, "(> {} {})", a, b), - Expression::GreaterOrEqual(a, b) => write!(f, "(>= {} {})", a, b), - Expression::Less(a, b) => write!(f, "(< {} {})", a, b), - Expression::LessOrEqual(a, b) => write!(f, "(<= {} {})", a, b), - Expression::In(a, b) => { - write!(f, "(in {}", a)?; - for p in b { - write!(f, " {}", p)?; - } - write!(f, ")") - } - Expression::Add(a, b) => write!(f, "(+ {} {})", a, b), - Expression::Subtract(a, b) => write!(f, "(- {} {})", a, b), - Expression::Multiply(a, b) => write!(f, "(* {} {})", a, b), - Expression::Divide(a, b) => write!(f, "(/ {} {})", a, b), - Expression::UnaryPlus(e) => write!(f, "(+ {})", e), - Expression::UnaryMinus(e) => write!(f, "(- {})", e), - Expression::Not(e) => write!(f, "(! {})", e), - Expression::FunctionCall(function, parameters) => { - write!(f, "({}", function)?; - for p in parameters { - write!(f, " {}", p)?; - } - write!(f, ")") - } - Expression::Exists(p) => write!(f, "(exists {})", p), - Expression::Bound(v) => write!(f, "(bound {})", v), - Expression::If(a, b, c) => write!(f, "(if {} {} {})", a, b, c), - Expression::Coalesce(parameters) => { - write!(f, "(coalesce")?; - for p in parameters { - write!(f, " {}", p)?; - } - write!(f, ")") - } - } - } -} - -impl From for Expression { - fn from(p: NamedNode) -> Self { - Expression::NamedNode(p) - } -} - -impl From for Expression { - fn from(p: Literal) -> Self { - Expression::Literal(p) - } -} - -impl From for Expression { - fn from(v: Variable) -> Self { - Expression::Variable(v) - } -} - -struct SparqlExpression<'a>(&'a Expression); - -impl<'a> fmt::Display for SparqlExpression<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self.0 { - Expression::NamedNode(node) => node.fmt(f), - Expression::Literal(l) => l.fmt(f), - Expression::Variable(var) => var.fmt(f), - Expression::Or(a, b) => write!( - f, - "({} || {})", - SparqlExpression(&*a), - SparqlExpression(&*b) - ), - Expression::And(a, b) => write!( - f, - "({} && {})", - SparqlExpression(&*a), - SparqlExpression(&*b) - ), - Expression::Equal(a, b) => { - write!(f, "({} = {})", SparqlExpression(&*a), SparqlExpression(&*b)) - } - Expression::SameTerm(a, b) => { - write!( - f, - "sameTerm({}, {})", - SparqlExpression(&*a), - SparqlExpression(&*b) - ) - } - Expression::Greater(a, b) => { - write!(f, "({} > {})", SparqlExpression(&*a), SparqlExpression(&*b)) - } - Expression::GreaterOrEqual(a, b) => write!( - f, - "({} >= {})", - SparqlExpression(&*a), - SparqlExpression(&*b) - ), - Expression::Less(a, b) => { - write!(f, "({} < {})", SparqlExpression(&*a), SparqlExpression(&*b)) - } - Expression::LessOrEqual(a, b) => write!( - f, - "({} <= {})", - SparqlExpression(&*a), - SparqlExpression(&*b) - ), - Expression::In(a, b) => { - write!(f, "({} IN ", SparqlExpression(&*a))?; - write_arg_list(b.iter().map(|p| SparqlExpression(&*p)), f)?; - write!(f, ")") - } - Expression::Add(a, b) => { - write!(f, "{} + {}", SparqlExpression(&*a), SparqlExpression(&*b)) - } - Expression::Subtract(a, b) => { - write!(f, "{} - {}", SparqlExpression(&*a), SparqlExpression(&*b)) - } - Expression::Multiply(a, b) => { - write!(f, "{} * {}", SparqlExpression(&*a), SparqlExpression(&*b)) - } - Expression::Divide(a, b) => { - write!(f, "{} / {}", SparqlExpression(&*a), SparqlExpression(&*b)) - } - Expression::UnaryPlus(e) => write!(f, "+{}", SparqlExpression(&*e)), - Expression::UnaryMinus(e) => write!(f, "-{}", SparqlExpression(&*e)), - Expression::Not(e) => match e.as_ref() { - Expression::Exists(p) => write!(f, "NOT EXISTS {{ {} }}", SparqlGraphPattern(&*p)), - e => write!(f, "!{}", SparqlExpression(&*e)), - }, - Expression::FunctionCall(function, parameters) => { - write!(f, "{}", function)?; - write_arg_list(parameters.iter().map(|p| SparqlExpression(&*p)), f) - } - Expression::Bound(v) => write!(f, "BOUND({})", v), - Expression::Exists(p) => write!(f, "EXISTS {{ {} }}", SparqlGraphPattern(&*p)), - Expression::If(a, b, c) => write!( - f, - "IF({}, {}, {})", - SparqlExpression(&*a), - SparqlExpression(&*b), - SparqlExpression(&*c) - ), - Expression::Coalesce(parameters) => { - write!(f, "COALESCE")?; - write_arg_list(parameters.iter().map(|p| SparqlExpression(&*p)), f) - } - } - } -} - -fn write_arg_list( - params: impl IntoIterator, - f: &mut fmt::Formatter<'_>, -) -> fmt::Result { - write!(f, "(")?; - let mut cont = false; - for p in params { - if cont { - write!(f, ", ")?; - } - p.fmt(f)?; - cont = true; - } - write!(f, ")") -} - -/// A function name -#[allow(clippy::upper_case_acronyms)] //TODO: Fix on the next breaking release -#[derive(Eq, PartialEq, Debug, Clone, Hash)] -pub enum Function { - Str, - Lang, - LangMatches, - Datatype, - IRI, - BNode, - Rand, - Abs, - Ceil, - Floor, - Round, - Concat, - SubStr, - StrLen, - Replace, - UCase, - LCase, - EncodeForURI, - Contains, - StrStarts, - StrEnds, - StrBefore, - StrAfter, - Year, - Month, - Day, - Hours, - Minutes, - Seconds, - Timezone, - Tz, - Now, - UUID, - StrUUID, - MD5, - SHA1, - SHA256, - SHA384, - SHA512, - StrLang, - StrDT, - IsIRI, - IsBlank, - IsLiteral, - IsNumeric, - Regex, - Custom(NamedNode), -} - -impl fmt::Display for Function { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Function::Str => write!(f, "STR"), - Function::Lang => write!(f, "LANG"), - Function::LangMatches => write!(f, "LANGMATCHES"), - Function::Datatype => write!(f, "DATATYPE"), - Function::IRI => write!(f, "IRI"), - Function::BNode => write!(f, "BNODE"), - Function::Rand => write!(f, "RAND"), - Function::Abs => write!(f, "ABS"), - Function::Ceil => write!(f, "CEIL"), - Function::Floor => write!(f, "FLOOR"), - Function::Round => write!(f, "ROUND"), - Function::Concat => write!(f, "CONCAT"), - Function::SubStr => write!(f, "SUBSTR"), - Function::StrLen => write!(f, "STRLEN"), - Function::Replace => write!(f, "REPLACE"), - Function::UCase => write!(f, "UCASE"), - Function::LCase => write!(f, "LCASE"), - Function::EncodeForURI => write!(f, "ENCODE_FOR_URI"), - Function::Contains => write!(f, "CONTAINS"), - Function::StrStarts => write!(f, "STRSTATS"), - Function::StrEnds => write!(f, "STRENDS"), - Function::StrBefore => write!(f, "STRBEFORE"), - Function::StrAfter => write!(f, "STRAFTER"), - Function::Year => write!(f, "YEAR"), - Function::Month => write!(f, "MONTH"), - Function::Day => write!(f, "DAY"), - Function::Hours => write!(f, "HOURS"), - Function::Minutes => write!(f, "MINUTES"), - Function::Seconds => write!(f, "SECONDS"), - Function::Timezone => write!(f, "TIMEZONE"), - Function::Tz => write!(f, "TZ"), - Function::Now => write!(f, "NOW"), - Function::UUID => write!(f, "UUID"), - Function::StrUUID => write!(f, "STRUUID"), - Function::MD5 => write!(f, "MD5"), - Function::SHA1 => write!(f, "SHA1"), - Function::SHA256 => write!(f, "SHA256"), - Function::SHA384 => write!(f, "SHA384"), - Function::SHA512 => write!(f, "SHA512"), - Function::StrLang => write!(f, "STRLANG"), - Function::StrDT => write!(f, "STRDT"), - Function::IsIRI => write!(f, "isIRI"), - Function::IsBlank => write!(f, "isBLANK"), - Function::IsLiteral => write!(f, "isLITERAL"), - Function::IsNumeric => write!(f, "isNUMERIC"), - Function::Regex => write!(f, "REGEX"), - Function::Custom(iri) => iri.fmt(f), - } - } -} - -/// A SPARQL query [graph pattern](https://www.w3.org/TR/sparql11-query/#sparqlQuery) -#[allow(clippy::upper_case_acronyms)] //TODO: Fix on the next breaking release -#[derive(Eq, PartialEq, Debug, Clone, Hash)] -pub enum GraphPattern { - /// A [basic graph pattern](https://www.w3.org/TR/sparql11-query/#defn_BasicGraphPattern) - BGP(Vec), - /// A [property path pattern](https://www.w3.org/TR/sparql11-query/#defn_evalPP_predicate) - Path { - subject: TermOrVariable, - path: PropertyPathExpression, - object: TermOrVariable, - }, - /// [Join](https://www.w3.org/TR/sparql11-query/#defn_algJoin) - Join { - left: Box, - right: Box, - }, - /// [LeftJoin](https://www.w3.org/TR/sparql11-query/#defn_algLeftJoin) - LeftJoin { - left: Box, - right: Box, - expr: Option, - }, - /// [Filter](https://www.w3.org/TR/sparql11-query/#defn_algFilter) - Filter { - expr: Expression, - inner: Box, - }, - /// [Union](https://www.w3.org/TR/sparql11-query/#defn_algUnion) - Union { - left: Box, - right: Box, - }, - Graph { - graph_name: NamedNodeOrVariable, - inner: Box, - }, - /// [Extend](https://www.w3.org/TR/sparql11-query/#defn_extend) - Extend { - inner: Box, - var: Variable, - expr: Expression, - }, - /// [Minus](https://www.w3.org/TR/sparql11-query/#defn_algMinus) - Minus { - left: Box, - right: Box, - }, - /// A table used to provide inline values - Table { - variables: Vec, - rows: Vec>>, - }, - /// [OrderBy](https://www.w3.org/TR/sparql11-query/#defn_algOrdered) - OrderBy { - inner: Box, - condition: Vec, - }, - /// [Project](https://www.w3.org/TR/sparql11-query/#defn_algProjection) - Project { - inner: Box, - projection: Vec, - }, - /// [Distinct](https://www.w3.org/TR/sparql11-query/#defn_algDistinct) - Distinct { inner: Box }, - /// [Reduced](https://www.w3.org/TR/sparql11-query/#defn_algReduced) - Reduced { inner: Box }, - /// [Slice](https://www.w3.org/TR/sparql11-query/#defn_algSlice) - Slice { - inner: Box, - start: usize, - length: Option, - }, - /// [Group](https://www.w3.org/TR/sparql11-federated-query/#aggregateAlgebra) - Group { - inner: Box, - by: Vec, - aggregates: Vec<(Variable, AggregationFunction)>, - }, - /// [Service](https://www.w3.org/TR/sparql11-federated-query/#defn_evalService) - Service { - name: NamedNodeOrVariable, - pattern: Box, - silent: bool, - }, -} - -impl fmt::Display for GraphPattern { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - GraphPattern::BGP(p) => { - write!(f, "(bgp")?; - for pattern in p { - write!(f, " {}", pattern)?; - } - write!(f, ")") - } - GraphPattern::Path { - subject, - path, - object, - } => write!(f, "(path {} {} {})", subject, path, object), - GraphPattern::Join { left, right } => write!(f, "(join {} {})", left, right), - GraphPattern::LeftJoin { left, right, expr } => { - if let Some(expr) = expr { - write!(f, "(leftjoin {} {} {})", left, right, expr) - } else { - write!(f, "(leftjoin {} {})", left, right) - } - } - GraphPattern::Filter { expr, inner } => write!(f, "(filter {} {})", expr, inner), - GraphPattern::Union { left, right } => write!(f, "(union {} {})", left, right), - GraphPattern::Graph { graph_name, inner } => { - write!(f, "(graph {} {})", graph_name, inner) - } - GraphPattern::Extend { inner, var, expr } => { - write!(f, "(extend ({} {}) {})", var, expr, inner) - } - GraphPattern::Minus { left, right } => write!(f, "(minus {} {})", left, right), - GraphPattern::Service { - name, - pattern, - silent, - } => { - if *silent { - write!(f, "(service silent {} {})", name, pattern) - } else { - write!(f, "(service {} {})", name, pattern) - } - } - GraphPattern::Group { - inner, - by, - aggregates, - } => write!( - f, - "(group ({}) ({}) {})", - by.iter() - .map(|v| v.as_str()) - .collect::>() - .join(" "), - aggregates - .iter() - .map(|(a, v)| format!("({} {})", v, a)) - .collect::>() - .join(" "), - inner - ), - GraphPattern::Table { variables, rows } => { - write!(f, "(table (vars")?; - for var in variables { - write!(f, " {}", var)?; - } - write!(f, ")")?; - for row in rows { - write!(f, " (row")?; - for (value, var) in row.iter().zip(variables) { - if let Some(value) = value { - write!(f, " ({} {})", var, value)?; - } - } - write!(f, ")")?; - } - write!(f, ")") - } - GraphPattern::OrderBy { inner, condition } => write!( - f, - "(order ({}) {})", - condition - .iter() - .map(|c| c.to_string()) - .collect::>() - .join(" "), - inner - ), - GraphPattern::Project { inner, projection } => write!( - f, - "(project ({}) {})", - projection - .iter() - .map(|v| v.to_string()) - .collect::>() - .join(" "), - inner - ), - GraphPattern::Distinct { inner } => write!(f, "(distinct {})", inner), - GraphPattern::Reduced { inner } => write!(f, "(reduced {})", inner), - GraphPattern::Slice { - inner, - start, - length, - } => write!( - f, - "(slice {} {} {})", - start, - length - .map(|l| l.to_string()) - .unwrap_or_else(|| '_'.to_string()), - inner - ), - } - } -} - -impl Default for GraphPattern { - fn default() -> Self { - GraphPattern::BGP(Vec::default()) - } -} - -impl GraphPattern { - pub fn visible_variables(&self) -> BTreeSet<&Variable> { - let mut vars = BTreeSet::default(); - self.add_visible_variables(&mut vars); - vars - } - - fn add_visible_variables<'a>(&'a self, vars: &mut BTreeSet<&'a Variable>) { - match self { - GraphPattern::BGP(p) => { - for pattern in p { - if let TermOrVariable::Variable(s) = &pattern.subject { - vars.insert(s); - } - if let NamedNodeOrVariable::Variable(p) = &pattern.predicate { - vars.insert(p); - } - if let TermOrVariable::Variable(o) = &pattern.object { - vars.insert(o); - } - } - } - GraphPattern::Path { - subject, object, .. - } => { - if let TermOrVariable::Variable(s) = subject { - vars.insert(s); - } - if let TermOrVariable::Variable(o) = object { - vars.insert(o); - } - } - GraphPattern::Join { left, right } - | GraphPattern::LeftJoin { left, right, .. } - | GraphPattern::Union { left, right } => { - left.add_visible_variables(vars); - right.add_visible_variables(vars); - } - GraphPattern::Filter { inner, .. } => inner.add_visible_variables(vars), - GraphPattern::Graph { graph_name, inner } => { - if let NamedNodeOrVariable::Variable(ref g) = graph_name { - vars.insert(g); - } - inner.add_visible_variables(vars); - } - GraphPattern::Extend { inner, var, .. } => { - vars.insert(var); - inner.add_visible_variables(vars); - } - GraphPattern::Minus { left, .. } => left.add_visible_variables(vars), - GraphPattern::Service { pattern, .. } => pattern.add_visible_variables(vars), - GraphPattern::Group { by, aggregates, .. } => { - vars.extend(by); - for (v, _) in aggregates { - vars.insert(v); - } - } - GraphPattern::Table { variables, .. } => vars.extend(variables), - GraphPattern::Project { projection, .. } => vars.extend(projection.iter()), - GraphPattern::OrderBy { inner, .. } - | GraphPattern::Distinct { inner } - | GraphPattern::Reduced { inner } - | GraphPattern::Slice { inner, .. } => inner.add_visible_variables(vars), - } - } -} - -struct SparqlGraphPattern<'a>(&'a GraphPattern); - -impl<'a> fmt::Display for SparqlGraphPattern<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self.0 { - GraphPattern::BGP(p) => { - for pattern in p { - write!(f, "{}", SparqlTriplePattern(pattern))? - } - Ok(()) - } - GraphPattern::Path { - subject, - path, - object, - } => write!(f, "{} {} {} .", subject, SparqlPropertyPath(path), object), - GraphPattern::Join { left, right } => write!( - f, - "{} {}", - SparqlGraphPattern(&*left), - SparqlGraphPattern(&*right) - ), - GraphPattern::LeftJoin { left, right, expr } => { - if let Some(expr) = expr { - write!( - f, - "{} OPTIONAL {{ {} FILTER({}) }}", - SparqlGraphPattern(&*left), - SparqlGraphPattern(&*right), - SparqlExpression(expr) - ) - } else { - write!( - f, - "{} OPTIONAL {{ {} }}", - SparqlGraphPattern(&*left), - SparqlGraphPattern(&*right) - ) - } - } - GraphPattern::Filter { expr, inner } => write!( - f, - "{} FILTER({})", - SparqlGraphPattern(&*inner), - SparqlExpression(expr) - ), - GraphPattern::Union { left, right } => write!( - f, - "{{ {} }} UNION {{ {} }}", - SparqlGraphPattern(&*left), - SparqlGraphPattern(&*right), - ), - GraphPattern::Graph { graph_name, inner } => { - write!( - f, - "GRAPH {} {{ {} }}", - graph_name, - SparqlGraphPattern(&*inner) - ) - } - GraphPattern::Extend { inner, var, expr } => write!( - f, - "{} BIND({} AS {})", - SparqlGraphPattern(&*inner), - SparqlExpression(expr), - var - ), - GraphPattern::Minus { left, right } => write!( - f, - "{} MINUS {{ {} }}", - SparqlGraphPattern(&*left), - SparqlGraphPattern(&*right) - ), - GraphPattern::Service { - name, - pattern, - silent, - } => { - if *silent { - write!( - f, - "SERVICE SILENT {} {{ {} }}", - name, - SparqlGraphPattern(&*pattern) - ) - } else { - write!( - f, - "SERVICE {} {{ {} }}", - name, - SparqlGraphPattern(&*pattern) - ) - } - } - GraphPattern::Table { variables, rows } => { - write!(f, "VALUES ( ")?; - for var in variables { - write!(f, "{} ", var)?; - } - write!(f, ") {{ ")?; - for row in rows { - write!(f, "( ")?; - for val in row { - match val { - Some(val) => write!(f, "{} ", val), - None => write!(f, "UNDEF "), - }?; - } - write!(f, ") ")?; - } - write!(f, " }}") - } - GraphPattern::Group { - inner, - by, - aggregates, - } => write!( - f, - "{{ SELECT {} WHERE {{ {} }} GROUP BY {} }}", - aggregates - .iter() - .map(|(v, a)| format!("({} AS {})", SparqlAggregationFunction(a), v)) - .chain(by.iter().map(|e| e.to_string())) - .collect::>() - .join(" "), - SparqlGraphPattern(&*inner), - by.iter() - .map(|e| format!("({})", e.to_string())) - .collect::>() - .join(" ") - ), - p => write!( - f, - "{{ {} }}", - SparqlGraphRootPattern { - pattern: p, - dataset: &QueryDataset::default() - } - ), - } - } -} - -struct SparqlGraphRootPattern<'a> { - pattern: &'a GraphPattern, - dataset: &'a QueryDataset, -} - -impl<'a> fmt::Display for SparqlGraphRootPattern<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut distinct = false; - let mut reduced = false; - let mut order = None; - let mut start = 0; - let mut length = None; - let mut project: &[Variable] = &[]; - - let mut child = self.pattern; - loop { - match child { - GraphPattern::OrderBy { inner, condition } => { - order = Some(condition); - child = &*inner; - } - GraphPattern::Project { inner, projection } if project.is_empty() => { - project = projection; - child = &*inner; - } - GraphPattern::Distinct { inner } => { - distinct = true; - child = &*inner; - } - GraphPattern::Reduced { inner } => { - reduced = true; - child = &*inner; - } - GraphPattern::Slice { - inner, - start: s, - length: l, - } => { - start = *s; - length = *l; - child = inner; - } - p => { - write!(f, "SELECT ")?; - if distinct { - write!(f, "DISTINCT ")?; - } - if reduced { - write!(f, "REDUCED ")?; - } - write!( - f, - "{}{} WHERE {{ {} }}", - build_sparql_select_arguments(project), - self.dataset, - SparqlGraphPattern(p) - )?; - if let Some(order) = order { - write!( - f, - " ORDER BY {}", - order - .iter() - .map(|c| SparqlOrderComparator(c).to_string()) - .collect::>() - .join(" ") - )?; - } - if start > 0 { - write!(f, " OFFSET {}", start)?; - } - if let Some(length) = length { - write!(f, " LIMIT {}", length)?; - } - return Ok(()); - } - } - } - } -} - -fn build_sparql_select_arguments(args: &[Variable]) -> String { - if args.is_empty() { - "*".to_owned() - } else { - args.iter() - .map(|v| v.to_string()) - .collect::>() - .join(" ") - } -} - -/// A set function used in aggregates (c.f. [`GraphPattern::Group`]) -#[derive(Eq, PartialEq, Debug, Clone, Hash)] -pub enum AggregationFunction { - /// [Count](https://www.w3.org/TR/sparql11-query/#defn_aggCount) - Count { - expr: Option>, - distinct: bool, - }, - /// [Sum](https://www.w3.org/TR/sparql11-query/#defn_aggSum) - Sum { - expr: Box, - distinct: bool, - }, - /// [Avg](https://www.w3.org/TR/sparql11-query/#defn_aggAvg) - Avg { - expr: Box, - distinct: bool, - }, - /// [Min](https://www.w3.org/TR/sparql11-query/#defn_aggMin) - Min { - expr: Box, - distinct: bool, - }, - /// [Max](https://www.w3.org/TR/sparql11-query/#defn_aggMax) - Max { - expr: Box, - distinct: bool, - }, - /// [GroupConcat](https://www.w3.org/TR/sparql11-query/#defn_aggGroupConcat) - GroupConcat { - expr: Box, - distinct: bool, - separator: Option, - }, - /// [Sample](https://www.w3.org/TR/sparql11-query/#defn_aggSample) - Sample { - expr: Box, - distinct: bool, - }, - /// Custom function - Custom { - name: NamedNode, - expr: Box, - distinct: bool, - }, -} - -impl fmt::Display for AggregationFunction { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - AggregationFunction::Count { expr, distinct } => { - if *distinct { - if let Some(expr) = expr { - write!(f, "(count distinct {})", expr) - } else { - write!(f, "(count distinct)") - } - } else if let Some(expr) = expr { - write!(f, "(count {})", expr) - } else { - write!(f, "(count)") - } - } - AggregationFunction::Sum { expr, distinct } => { - if *distinct { - write!(f, "(sum distinct {})", expr) - } else { - write!(f, "(sum {})", expr) - } - } - AggregationFunction::Avg { expr, distinct } => { - if *distinct { - write!(f, "(avg distinct {})", expr) - } else { - write!(f, "(avg {})", expr) - } - } - AggregationFunction::Min { expr, distinct } => { - if *distinct { - write!(f, "(min distinct {})", expr) - } else { - write!(f, "(min {})", expr) - } - } - AggregationFunction::Max { expr, distinct } => { - if *distinct { - write!(f, "(max distinct {})", expr) - } else { - write!(f, "(max {})", expr) - } - } - AggregationFunction::Sample { expr, distinct } => { - if *distinct { - write!(f, "(sample distinct {})", expr) - } else { - write!(f, "(sample {})", expr) - } - } - AggregationFunction::GroupConcat { - expr, - distinct, - separator, - } => { - if *distinct { - if let Some(separator) = separator { - write!(f, "(group_concat distinct {} {})", expr, fmt_str(separator)) - } else { - write!(f, "(group_concat distinct {})", expr) - } - } else if let Some(separator) = separator { - write!(f, "(group_concat {} {})", expr, fmt_str(separator)) - } else { - write!(f, "(group_concat {})", expr) - } - } - AggregationFunction::Custom { - name, - expr, - distinct, - } => { - if *distinct { - write!(f, "({} distinct {})", name, expr) - } else { - write!(f, "({} {})", name, expr) - } - } - } - } -} - -struct SparqlAggregationFunction<'a>(&'a AggregationFunction); - -impl<'a> fmt::Display for SparqlAggregationFunction<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self.0 { - AggregationFunction::Count { expr, distinct } => { - if *distinct { - if let Some(expr) = expr { - write!(f, "COUNT(DISTINCT {})", SparqlExpression(expr)) - } else { - write!(f, "COUNT(DISTINCT *)") - } - } else if let Some(expr) = expr { - write!(f, "COUNT({})", SparqlExpression(expr)) - } else { - write!(f, "COUNT(*)") - } - } - AggregationFunction::Sum { expr, distinct } => { - if *distinct { - write!(f, "SUM(DISTINCT {})", SparqlExpression(expr)) - } else { - write!(f, "SUM({})", SparqlExpression(expr)) - } - } - AggregationFunction::Min { expr, distinct } => { - if *distinct { - write!(f, "MIN(DISTINCT {})", SparqlExpression(expr)) - } else { - write!(f, "MIN({})", SparqlExpression(expr)) - } - } - AggregationFunction::Max { expr, distinct } => { - if *distinct { - write!(f, "MAX(DISTINCT {})", SparqlExpression(expr)) - } else { - write!(f, "MAX({})", SparqlExpression(expr)) - } - } - AggregationFunction::Avg { expr, distinct } => { - if *distinct { - write!(f, "AVG(DISTINCT {})", SparqlExpression(expr)) - } else { - write!(f, "AVG({})", SparqlExpression(expr)) - } - } - AggregationFunction::Sample { expr, distinct } => { - if *distinct { - write!(f, "SAMPLE(DISTINCT {})", SparqlExpression(expr)) - } else { - write!(f, "SAMPLE({})", SparqlExpression(expr)) - } - } - AggregationFunction::GroupConcat { - expr, - distinct, - separator, - } => { - if *distinct { - if let Some(separator) = separator { - write!( - f, - "GROUP_CONCAT(DISTINCT {}; SEPARATOR = {})", - SparqlExpression(expr), - fmt_str(separator) - ) - } else { - write!(f, "GROUP_CONCAT(DISTINCT {})", SparqlExpression(expr)) - } - } else if let Some(separator) = separator { - write!( - f, - "GROUP_CONCAT({}; SEPARATOR = {})", - SparqlExpression(expr), - fmt_str(separator) - ) - } else { - write!(f, "GROUP_CONCAT({})", SparqlExpression(expr)) - } - } - AggregationFunction::Custom { - name, - expr, - distinct, - } => { - if *distinct { - write!(f, "{}(DISTINCT {})", name, SparqlExpression(expr)) - } else { - write!(f, "{}({})", name, SparqlExpression(expr)) - } - } - } - } -} - -fn fmt_str(value: &str) -> rio::Literal<'_> { - rio::Literal::Simple { value } -} - -/// An ordering comparator used by [`GraphPattern::OrderBy`] -#[derive(Eq, PartialEq, Debug, Clone, Hash)] -pub enum OrderComparator { - /// Ascending order - Asc(Expression), - /// Descending order - Desc(Expression), -} - -impl fmt::Display for OrderComparator { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - OrderComparator::Asc(e) => write!(f, "(asc {})", e), - OrderComparator::Desc(e) => write!(f, "(desc {})", e), - } - } -} - -struct SparqlOrderComparator<'a>(&'a OrderComparator); - -impl<'a> fmt::Display for SparqlOrderComparator<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self.0 { - OrderComparator::Asc(e) => write!(f, "ASC({})", SparqlExpression(e)), - OrderComparator::Desc(e) => write!(f, "DESC({})", SparqlExpression(e)), - } - } -} - /// A SPARQL query [dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset) #[derive(Eq, PartialEq, Debug, Clone, Hash)] pub struct QueryDataset { @@ -1656,16 +174,39 @@ pub struct QueryDataset { named: Option>, } -impl Default for QueryDataset { - fn default() -> Self { +impl QueryDataset { + pub(crate) fn new() -> Self { Self { - default: Some(vec![GraphName::DefaultGraph]), + default: None, named: None, } } -} -impl QueryDataset { + fn from_algebra(inner: &Option) -> Self { + if let Some(inner) = inner { + Self { + default: Some( + inner + .default + .iter() + .map(|g| NamedNode::new_unchecked(&g.iri).into()) + .collect(), + ), + named: inner.named.as_ref().map(|named| { + named + .iter() + .map(|g| NamedNode::new_unchecked(&g.iri).into()) + .collect() + }), + } + } else { + Self { + default: Some(vec![GraphName::DefaultGraph]), + named: None, + } + } + } + /// Checks if this dataset specification is the default one /// (i.e. the default graph is the store default graph and all the store named graphs are available) /// @@ -1703,8 +244,9 @@ impl QueryDataset { /// use oxigraph::sparql::Query; /// /// let mut query = Query::parse("SELECT ?s ?p ?o WHERE { ?s ?p ?o . }", None)?; - /// query.dataset_mut().set_default_graph(vec![NamedNode::new("http://example.com")?.into()]); - /// assert_eq!(query.to_string(), "SELECT ?s ?p ?o FROM WHERE { ?s ?p ?o . }"); + /// let default = vec![NamedNode::new("http://example.com")?.into()]; + /// query.dataset_mut().set_default_graph(default.clone()); + /// assert_eq!(query.dataset().default_graph_graphs(), Some(default.as_slice())); /// /// # Result::Ok::<_, Box>(()) /// ``` @@ -1724,8 +266,9 @@ impl QueryDataset { /// use oxigraph::sparql::Query; /// /// let mut query = Query::parse("SELECT ?s ?p ?o WHERE { ?s ?p ?o . }", None)?; - /// query.dataset_mut().set_available_named_graphs(vec![NamedNode::new("http://example.com")?.into()]); - /// assert_eq!(query.to_string(), "SELECT ?s ?p ?o FROM NAMED WHERE { ?s ?p ?o . }"); + /// let named = vec![NamedNode::new("http://example.com")?.into()]; + /// query.dataset_mut().set_available_named_graphs(named.clone()); + /// assert_eq!(query.dataset().available_named_graphs(), Some(named.as_slice())); /// /// # Result::Ok::<_, Box>(()) /// ``` @@ -1733,183 +276,3 @@ impl QueryDataset { self.named = Some(named_graphs); } } - -impl fmt::Display for QueryDataset { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - //TODO: does not encode everything - if let Some(graphs) = &self.default { - for g in graphs { - if !g.is_default_graph() { - write!(f, " FROM {}", g)?; - } - } - } - if let Some(graphs) = &self.named { - for g in graphs { - write!(f, " FROM NAMED {}", g)?; - } - } - Ok(()) - } -} - -/// The [graph update operations](https://www.w3.org/TR/sparql11-update/#formalModelGraphUpdate) -#[derive(Eq, PartialEq, Debug, Clone, Hash)] -pub enum GraphUpdateOperation { - /// [insert data](https://www.w3.org/TR/sparql11-update/#def_insertdataoperation) - InsertData { data: Vec }, - /// [delete data](https://www.w3.org/TR/sparql11-update/#def_deletedataoperation) - DeleteData { data: Vec }, - /// [delete insert](https://www.w3.org/TR/sparql11-update/#def_deleteinsertoperation) - DeleteInsert { - delete: Vec, - insert: Vec, - using: QueryDataset, - pattern: Box, - }, - /// [load](https://www.w3.org/TR/sparql11-update/#def_loadoperation) - Load { - silent: bool, - from: NamedNode, - to: Option, - }, - /// [clear](https://www.w3.org/TR/sparql11-update/#def_clearoperation) - Clear { silent: bool, graph: GraphTarget }, - /// [create](https://www.w3.org/TR/sparql11-update/#def_createoperation) - Create { silent: bool, graph: NamedNode }, - /// [drop](https://www.w3.org/TR/sparql11-update/#def_dropoperation) - Drop { silent: bool, graph: GraphTarget }, -} - -impl fmt::Display for GraphUpdateOperation { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - GraphUpdateOperation::InsertData { data } => { - writeln!(f, "INSERT DATA {{")?; - write_quads(data, f)?; - write!(f, "}}") - } - GraphUpdateOperation::DeleteData { data } => { - writeln!(f, "DELETE DATA {{")?; - write_quads(data, f)?; - write!(f, "}}") - } - GraphUpdateOperation::DeleteInsert { - delete, - insert, - using, - pattern, - } => { - if !delete.is_empty() { - writeln!(f, "DELETE {{")?; - for quad in delete { - writeln!(f, "\t{}", SparqlQuadPattern(quad))?; - } - writeln!(f, "}}")?; - } - if !insert.is_empty() { - writeln!(f, "INSERT {{")?; - for quad in insert { - writeln!(f, "\t{}", SparqlQuadPattern(quad))?; - } - writeln!(f, "}}")?; - } - if let Some(using_default) = using.default_graph_graphs() { - for g in using_default { - if !g.is_default_graph() { - writeln!(f, "USING {}", g)?; - } - } - } - if let Some(using_named) = using.available_named_graphs() { - for g in using_named { - writeln!(f, "USING NAMED {}", g)?; - } - } - write!( - f, - "WHERE {{ {} }}", - SparqlGraphRootPattern { - pattern, - dataset: &QueryDataset::default() - } - ) - } - GraphUpdateOperation::Load { silent, from, to } => { - write!(f, "LOAD ")?; - if *silent { - write!(f, "SILENT ")?; - } - write!(f, "{}", from)?; - if let Some(to) = to { - write!(f, " INTO GRAPH {}", to)?; - } - Ok(()) - } - GraphUpdateOperation::Clear { silent, graph } => { - write!(f, "CLEAR ")?; - if *silent { - write!(f, "SILENT ")?; - } - write!(f, "{}", graph) - } - GraphUpdateOperation::Create { silent, graph } => { - write!(f, "CREATE ")?; - if *silent { - write!(f, "SILENT ")?; - } - write!(f, "GRAPH {}", graph) - } - GraphUpdateOperation::Drop { silent, graph } => { - write!(f, "DROP ")?; - if *silent { - write!(f, "SILENT ")?; - } - write!(f, "{}", graph) - } - } - } -} - -fn write_quads(quads: &[Quad], f: &mut fmt::Formatter<'_>) -> fmt::Result { - for quad in quads { - if quad.graph_name == GraphName::DefaultGraph { - writeln!(f, "\t{} {} {} .", quad.subject, quad.predicate, quad.object)?; - } else { - writeln!( - f, - "\tGRAPH {} {{ {} {} {} }}", - quad.graph_name, quad.subject, quad.predicate, quad.object - )?; - } - } - Ok(()) -} - -/// A target RDF graph for update operations -/// -/// Could be a specific graph, all named graphs or the complete dataset. -#[derive(Eq, PartialEq, Debug, Clone, Hash)] -pub enum GraphTarget { - NamedNode(NamedNode), - DefaultGraph, - NamedGraphs, - AllGraphs, -} - -impl fmt::Display for GraphTarget { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::NamedNode(node) => write!(f, "GRAPH {}", node), - Self::DefaultGraph => write!(f, "DEFAULT"), - Self::NamedGraphs => write!(f, "NAMED"), - Self::AllGraphs => write!(f, "ALL"), - } - } -} - -impl From for GraphTarget { - fn from(node: NamedNode) -> Self { - Self::NamedNode(node) - } -} diff --git a/lib/src/sparql/eval.rs b/lib/src/sparql/eval.rs index fa8497c4..05946a27 100644 --- a/lib/src/sparql/eval.rs +++ b/lib/src/sparql/eval.rs @@ -2,7 +2,7 @@ use crate::model::vocab::{rdf, xsd}; use crate::model::xsd::*; use crate::model::Triple; use crate::model::{BlankNode, LiteralRef, NamedNodeRef}; -use crate::sparql::algebra::{GraphPattern, Query, QueryDataset}; +use crate::sparql::algebra::{Query, QueryDataset}; use crate::sparql::error::EvaluationError; use crate::sparql::model::*; use crate::sparql::plan::*; @@ -18,6 +18,7 @@ use rand::random; use regex::{Regex, RegexBuilder}; use sha1::Sha1; use sha2::{Sha256, Sha384, Sha512}; +use spargebra::algebra::GraphPattern; use std::cmp::Ordering; use std::collections::{HashMap, HashSet}; use std::convert::{TryFrom, TryInto}; @@ -510,10 +511,13 @@ where get_pattern_value(service_name, from) .ok_or_else(|| EvaluationError::msg("The SERVICE name is not bound"))?, )?, - Query::Select { - dataset: QueryDataset::default(), - pattern: graph_pattern.clone(), - base_iri: self.base_iri.as_ref().map(|iri| iri.as_ref().clone()), + Query { + inner: spargebra::Query::Select { + dataset: None, + pattern: graph_pattern.clone(), + base_iri: self.base_iri.as_ref().map(|iri| iri.as_ref().clone()), + }, + dataset: QueryDataset::new(), }, )? { Ok(self.encode_bindings(variables, iter)) diff --git a/lib/src/sparql/mod.rs b/lib/src/sparql/mod.rs index 1f7b1b8c..4728513c 100644 --- a/lib/src/sparql/mod.rs +++ b/lib/src/sparql/mod.rs @@ -10,7 +10,6 @@ mod eval; mod http; mod json_results; mod model; -mod parser; mod plan; mod plan_builder; mod service; @@ -27,13 +26,13 @@ pub use crate::sparql::model::QuerySolution; pub use crate::sparql::model::QuerySolutionIter; pub use crate::sparql::model::QueryTripleIter; pub use crate::sparql::model::{Variable, VariableNameParseError}; -pub use crate::sparql::parser::ParseError; use crate::sparql::plan_builder::PlanBuilder; pub use crate::sparql::service::ServiceHandler; use crate::sparql::service::{EmptyServiceHandler, ErrorConversionServiceHandler}; use crate::sparql::update::SimpleUpdateEvaluator; use crate::store::numeric_encoder::StrContainer; use crate::store::{ReadableEncodedStore, StoreOrParseError, WritableEncodedStore}; +pub use spargebra::ParseError; use std::convert::TryInto; use std::io; use std::rc::Rc; @@ -43,27 +42,31 @@ pub(crate) fn evaluate_query( query: impl TryInto>, options: QueryOptions, ) -> Result { - match query.try_into().map_err(|e| e.into())? { - Query::Select { - pattern, - base_iri, - dataset, + let query = query.try_into().map_err(|e| e.into())?; + let dataset = DatasetView::new(store, &query.dataset)?; + match query.inner { + spargebra::Query::Select { + pattern, base_iri, .. } => { - let dataset = DatasetView::new(store, &dataset)?; let (plan, variables) = PlanBuilder::build(&dataset, &pattern)?; SimpleEvaluator::new( Rc::new(dataset), base_iri.map(Rc::new), options.service_handler, ) - .evaluate_select_plan(&plan, Rc::new(variables)) + .evaluate_select_plan( + &plan, + Rc::new( + variables + .into_iter() + .map(|v| Variable::new_unchecked(v.name)) + .collect(), + ), + ) } - Query::Ask { - pattern, - base_iri, - dataset, + spargebra::Query::Ask { + pattern, base_iri, .. } => { - let dataset = DatasetView::new(store, &dataset)?; let (plan, _) = PlanBuilder::build(&dataset, &pattern)?; SimpleEvaluator::new( Rc::new(dataset), @@ -72,13 +75,12 @@ pub(crate) fn evaluate_query( ) .evaluate_ask_plan(&plan) } - Query::Construct { + spargebra::Query::Construct { template, pattern, base_iri, - dataset, + .. } => { - let dataset = DatasetView::new(store, &dataset)?; let (plan, variables) = PlanBuilder::build(&dataset, &pattern)?; let construct = PlanBuilder::build_graph_template(&dataset, &template, variables)?; SimpleEvaluator::new( @@ -88,12 +90,9 @@ pub(crate) fn evaluate_query( ) .evaluate_construct_plan(&plan, construct) } - Query::Describe { - pattern, - base_iri, - dataset, + spargebra::Query::Describe { + pattern, base_iri, .. } => { - let dataset = DatasetView::new(store, &dataset)?; let (plan, _) = PlanBuilder::build(&dataset, &pattern)?; SimpleEvaluator::new( Rc::new(dataset), @@ -192,6 +191,6 @@ pub(crate) fn evaluate_update< where io::Error: From>, { - SimpleUpdateEvaluator::new(read, write, update.base_iri.map(Rc::new), options) - .eval_all(&update.operations) + SimpleUpdateEvaluator::new(read, write, update.inner.base_iri.map(Rc::new), options) + .eval_all(&update.inner.operations, &update.using_datasets) } diff --git a/lib/src/sparql/model.rs b/lib/src/sparql/model.rs index 9ce64431..6bef3b27 100644 --- a/lib/src/sparql/model.rs +++ b/lib/src/sparql/model.rs @@ -6,7 +6,6 @@ use crate::sparql::csv_results::{read_tsv_results, write_csv_results, write_tsv_ use crate::sparql::error::EvaluationError; use crate::sparql::json_results::write_json_results; use crate::sparql::xml_results::{read_xml_results, write_xml_results}; -use rand::random; use std::error::Error; use std::io::{BufRead, Write}; use std::rc::Rc; @@ -443,11 +442,6 @@ impl Variable { pub fn into_string(self) -> String { self.name } - - #[inline] - pub(crate) fn new_random() -> Self { - Self::new_unchecked(format!("{:x}", random::())) - } } impl fmt::Display for Variable { diff --git a/lib/src/sparql/plan.rs b/lib/src/sparql/plan.rs index 10fbc369..f3fef050 100644 --- a/lib/src/sparql/plan.rs +++ b/lib/src/sparql/plan.rs @@ -1,6 +1,6 @@ -use crate::sparql::algebra::GraphPattern; use crate::sparql::model::Variable; use crate::store::numeric_encoder::EncodedTerm; +use spargebra::algebra::GraphPattern; use std::collections::BTreeSet; use std::rc::Rc; diff --git a/lib/src/sparql/plan_builder.rs b/lib/src/sparql/plan_builder.rs index d13891d0..e3a75bac 100644 --- a/lib/src/sparql/plan_builder.rs +++ b/lib/src/sparql/plan_builder.rs @@ -1,9 +1,11 @@ -use crate::model::{BlankNode, Literal, NamedNode, Term}; -use crate::sparql::algebra::*; +use crate::model::{LiteralRef, NamedNodeRef}; use crate::sparql::error::EvaluationError; -use crate::sparql::model::*; +use crate::sparql::model::Variable as OxVariable; use crate::sparql::plan::*; use crate::store::numeric_encoder::{EncodedTerm, WriteEncoder}; +use rand::random; +use spargebra::algebra::*; +use spargebra::term::*; use std::collections::{BTreeSet, HashSet}; use std::rc::Rc; @@ -40,7 +42,7 @@ impl> PlanBuilder { graph_name: PatternValue, ) -> Result { Ok(match pattern { - GraphPattern::BGP(p) => self.build_for_bgp(p, variables, graph_name)?, + GraphPattern::Bgp(p) => self.build_for_bgp(p, variables, graph_name)?, GraphPattern::Path { subject, path, @@ -147,7 +149,12 @@ impl> PlanBuilder { self.pattern_value_from_named_node_or_variable(name, variables)?; PlanNode::Service { service_name, - variables: Rc::new(variables.clone()), + variables: Rc::new( + variables + .iter() + .map(|v| OxVariable::new_unchecked(v.name.clone())) + .collect(), + ), child: Rc::new(child), graph_pattern: Rc::new(*pattern.clone()), silent: *silent, @@ -411,7 +418,7 @@ impl> PlanBuilder { Function::Datatype => PlanExpression::Datatype(Box::new( self.build_for_expression(¶meters[0], variables, graph_name)?, )), - Function::IRI => PlanExpression::Iri(Box::new(self.build_for_expression( + Function::Iri => PlanExpression::Iri(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, @@ -482,7 +489,7 @@ impl> PlanBuilder { variables, graph_name, )?)), - Function::EncodeForURI => PlanExpression::EncodeForUri(Box::new( + Function::EncodeForUri => PlanExpression::EncodeForUri(Box::new( self.build_for_expression(¶meters[0], variables, graph_name)?, )), Function::Contains => PlanExpression::Contains( @@ -544,29 +551,29 @@ impl> PlanBuilder { graph_name, )?)), Function::Now => PlanExpression::Now, - Function::UUID => PlanExpression::Uuid, - Function::StrUUID => PlanExpression::StrUuid, - Function::MD5 => PlanExpression::Md5(Box::new(self.build_for_expression( + Function::Uuid => PlanExpression::Uuid, + Function::StrUuid => PlanExpression::StrUuid, + Function::Md5 => PlanExpression::Md5(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, )?)), - Function::SHA1 => PlanExpression::Sha1(Box::new(self.build_for_expression( + Function::Sha1 => PlanExpression::Sha1(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, )?)), - Function::SHA256 => PlanExpression::Sha256(Box::new(self.build_for_expression( + Function::Sha256 => PlanExpression::Sha256(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, )?)), - Function::SHA384 => PlanExpression::Sha384(Box::new(self.build_for_expression( + Function::Sha384 => PlanExpression::Sha384(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, )?)), - Function::SHA512 => PlanExpression::Sha512(Box::new(self.build_for_expression( + Function::Sha512 => PlanExpression::Sha512(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, @@ -575,11 +582,11 @@ impl> PlanBuilder { Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), ), - Function::StrDT => PlanExpression::StrDt( + Function::StrDt => PlanExpression::StrDt( Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), ), - Function::IsIRI => PlanExpression::IsIri(Box::new(self.build_for_expression( + Function::IsIri => PlanExpression::IsIri(Box::new(self.build_for_expression( ¶meters[0], variables, graph_name, @@ -606,7 +613,7 @@ impl> PlanBuilder { }, ), Function::Custom(name) => { - if name == "http://www.w3.org/2001/XMLSchema#boolean" { + if name.iri == "http://www.w3.org/2001/XMLSchema#boolean" { self.build_cast( parameters, PlanExpression::BooleanCast, @@ -614,7 +621,7 @@ impl> PlanBuilder { graph_name, "boolean", )? - } else if name == "http://www.w3.org/2001/XMLSchema#double" { + } else if name.iri == "http://www.w3.org/2001/XMLSchema#double" { self.build_cast( parameters, PlanExpression::DoubleCast, @@ -622,7 +629,7 @@ impl> PlanBuilder { graph_name, "double", )? - } else if name == "http://www.w3.org/2001/XMLSchema#float" { + } else if name.iri == "http://www.w3.org/2001/XMLSchema#float" { self.build_cast( parameters, PlanExpression::FloatCast, @@ -630,7 +637,7 @@ impl> PlanBuilder { graph_name, "float", )? - } else if name == "http://www.w3.org/2001/XMLSchema#decimal" { + } else if name.iri == "http://www.w3.org/2001/XMLSchema#decimal" { self.build_cast( parameters, PlanExpression::DecimalCast, @@ -638,7 +645,7 @@ impl> PlanBuilder { graph_name, "decimal", )? - } else if name == "http://www.w3.org/2001/XMLSchema#integer" { + } else if name.iri == "http://www.w3.org/2001/XMLSchema#integer" { self.build_cast( parameters, PlanExpression::IntegerCast, @@ -646,7 +653,7 @@ impl> PlanBuilder { graph_name, "integer", )? - } else if name == "http://www.w3.org/2001/XMLSchema#date" { + } else if name.iri == "http://www.w3.org/2001/XMLSchema#date" { self.build_cast( parameters, PlanExpression::DateCast, @@ -654,7 +661,7 @@ impl> PlanBuilder { graph_name, "date", )? - } else if name == "http://www.w3.org/2001/XMLSchema#time" { + } else if name.iri == "http://www.w3.org/2001/XMLSchema#time" { self.build_cast( parameters, PlanExpression::TimeCast, @@ -662,7 +669,7 @@ impl> PlanBuilder { graph_name, "time", )? - } else if name == "http://www.w3.org/2001/XMLSchema#dateTime" { + } else if name.iri == "http://www.w3.org/2001/XMLSchema#dateTime" { self.build_cast( parameters, PlanExpression::DateTimeCast, @@ -670,7 +677,7 @@ impl> PlanBuilder { graph_name, "dateTime", )? - } else if name == "http://www.w3.org/2001/XMLSchema#duration" { + } else if name.iri == "http://www.w3.org/2001/XMLSchema#duration" { self.build_cast( parameters, PlanExpression::DurationCast, @@ -678,7 +685,7 @@ impl> PlanBuilder { graph_name, "duration", )? - } else if name == "http://www.w3.org/2001/XMLSchema#yearMonthDuration" { + } else if name.iri == "http://www.w3.org/2001/XMLSchema#yearMonthDuration" { self.build_cast( parameters, PlanExpression::YearMonthDurationCast, @@ -686,7 +693,7 @@ impl> PlanBuilder { graph_name, "yearMonthDuration", )? - } else if name == "http://www.w3.org/2001/XMLSchema#dayTimeDuration" { + } else if name.iri == "http://www.w3.org/2001/XMLSchema#dayTimeDuration" { self.build_cast( parameters, PlanExpression::DayTimeDurationCast, @@ -694,7 +701,7 @@ impl> PlanBuilder { graph_name, "dayTimeDuration", )? - } else if name == "http://www.w3.org/2001/XMLSchema#string" { + } else if name.iri == "http://www.w3.org/2001/XMLSchema#string" { self.build_cast( parameters, PlanExpression::StringCast, @@ -770,7 +777,9 @@ impl> PlanBuilder { TermOrVariable::Term(Term::BlankNode(bnode)) => { PatternValue::Variable(variable_key( variables, - &Variable::new_unchecked(bnode.as_str()), + &Variable { + name: bnode.id.clone(), + }, )) //TODO: very bad hack to convert bnode to variable } @@ -796,7 +805,7 @@ impl> PlanBuilder { fn encode_bindings( &mut self, table_variables: &[Variable], - rows: &[Vec>], + rows: &[Vec>], variables: &mut Vec, ) -> Result, EvaluationError> { let bindings_variables_keys = table_variables @@ -808,7 +817,13 @@ impl> PlanBuilder { let mut result = EncodedTuple::with_capacity(variables.len()); for (key, value) in row.iter().enumerate() { if let Some(term) = value { - result.set(bindings_variables_keys[key], self.build_term(term)?); + result.set( + bindings_variables_keys[key], + match term { + NamedNodeOrLiteral::NamedNode(node) => self.build_named_node(node), + NamedNodeOrLiteral::Literal(literal) => self.build_literal(literal), + }?, + ); } } Ok(result) @@ -961,7 +976,9 @@ impl> PlanBuilder { }) { to_id } else { - to.push(Variable::new_random()); + to.push(Variable { + name: format!("{:x}", random::()), + }); to.len() - 1 } } @@ -1030,15 +1047,29 @@ impl> PlanBuilder { } fn build_named_node(&mut self, node: &NamedNode) -> Result { - self.encoder.encode_named_node(node.as_ref()) + self.encoder + .encode_named_node(NamedNodeRef::new_unchecked(node.iri.as_str())) } fn build_literal(&mut self, literal: &Literal) -> Result { - self.encoder.encode_literal(literal.as_ref()) + self.encoder.encode_literal(match literal { + Literal::Simple { value } => LiteralRef::new_simple_literal(value), + Literal::LanguageTaggedString { value, language } => { + LiteralRef::new_language_tagged_literal_unchecked(value, language.as_str()) + } + Literal::Typed { value, datatype } => LiteralRef::new_typed_literal( + value, + NamedNodeRef::new_unchecked(datatype.iri.as_str()), + ), + }) } fn build_term(&mut self, term: &Term) -> Result { - self.encoder.encode_term(term.as_ref()) + match term { + Term::NamedNode(node) => self.build_named_node(node), + Term::BlankNode(_) => Err(EvaluationError::msg("Unexpected blank node")), + Term::Literal(literal) => self.build_literal(literal), + } } } diff --git a/lib/src/sparql/update.rs b/lib/src/sparql/update.rs index c7692dce..b1ab80a5 100644 --- a/lib/src/sparql/update.rs +++ b/lib/src/sparql/update.rs @@ -1,16 +1,13 @@ use crate::error::{invalid_data_error, invalid_input_error}; use crate::io::GraphFormat; -use crate::model::{BlankNode, GraphNameRef, NamedNode, NamedOrBlankNode, Quad, Term}; -use crate::sparql::algebra::{ - GraphPattern, GraphTarget, GraphUpdateOperation, NamedNodeOrVariable, QuadPattern, - QueryDataset, TermOrVariable, -}; +use crate::model::{BlankNode as OxBlankNode, GraphNameRef, LiteralRef, NamedNodeRef}; +use crate::sparql::algebra::QueryDataset; use crate::sparql::dataset::DatasetView; use crate::sparql::eval::SimpleEvaluator; use crate::sparql::http::Client; use crate::sparql::plan::EncodedTuple; use crate::sparql::plan_builder::PlanBuilder; -use crate::sparql::{EvaluationError, UpdateOptions, Variable}; +use crate::sparql::{EvaluationError, UpdateOptions}; use crate::store::numeric_encoder::{ EncodedQuad, EncodedTerm, ReadEncoder, StrContainer, StrLookup, WriteEncoder, }; @@ -18,6 +15,12 @@ use crate::store::{load_graph, ReadableEncodedStore, StoreOrParseError, Writable use http::header::{ACCEPT, CONTENT_TYPE, USER_AGENT}; use http::{Method, Request, StatusCode}; use oxiri::Iri; +use spargebra::algebra::{GraphPattern, GraphTarget, QuadPattern}; +use spargebra::term::{ + BlankNode, GraphName, Literal, NamedNode, NamedNodeOrVariable, NamedOrBlankNode, Quad, Term, + TermOrVariable, Variable, +}; +use spargebra::GraphUpdateOperation; use std::collections::HashMap; use std::io; use std::rc::Rc; @@ -53,23 +56,31 @@ where } } - pub fn eval_all(&mut self, updates: &[GraphUpdateOperation]) -> Result<(), EvaluationError> { - for update in updates { - self.eval(update)?; + pub fn eval_all( + &mut self, + updates: &[GraphUpdateOperation], + using_datasets: &[Option], + ) -> Result<(), EvaluationError> { + for (update, using_dataset) in updates.iter().zip(using_datasets) { + self.eval(update, using_dataset)?; } Ok(()) } - fn eval(&mut self, update: &GraphUpdateOperation) -> Result<(), EvaluationError> { + fn eval( + &mut self, + update: &GraphUpdateOperation, + using_dataset: &Option, + ) -> Result<(), EvaluationError> { match update { GraphUpdateOperation::InsertData { data } => self.eval_insert_data(data), GraphUpdateOperation::DeleteData { data } => self.eval_delete_data(data), GraphUpdateOperation::DeleteInsert { delete, insert, - using, pattern, - } => self.eval_delete_insert(delete, insert, using, pattern), + .. + } => self.eval_delete_insert(delete, insert, using_dataset.as_ref().unwrap(), pattern), GraphUpdateOperation::Load { silent, from, to } => { if let Err(error) = self.eval_load(from, to) { if *silent { @@ -176,7 +187,7 @@ where ) -> Result<(), EvaluationError> { let request = Request::builder() .method(Method::GET) - .uri(from.as_str()) + .uri(&from.iri) .header( ACCEPT, "application/n-triples, text/turtle, application/rdf+xml", @@ -207,7 +218,7 @@ where )) })?; let to_graph_name = if let Some(graph_name) = to { - graph_name.as_ref().into() + NamedNodeRef::new_unchecked(&graph_name.iri).into() } else { GraphNameRef::DefaultGraph }; @@ -216,17 +227,14 @@ where response.into_body(), format, to_graph_name, - Some(from.as_str()), + Some(&from.iri), ) .map_err(io::Error::from)?; Ok(()) } fn eval_create(&mut self, graph: &NamedNode, silent: bool) -> Result<(), EvaluationError> { - let encoded_graph_name = self - .write - .encode_named_node(graph.as_ref()) - .map_err(to_eval_error)?; + let encoded_graph_name = self.encode_named_node_for_insertion(graph)?; if self .read .contains_encoded_named_graph(encoded_graph_name) @@ -250,11 +258,7 @@ where fn eval_clear(&mut self, graph: &GraphTarget, silent: bool) -> Result<(), EvaluationError> { match graph { GraphTarget::NamedNode(graph_name) => { - if let Some(graph_name) = self - .read - .get_encoded_named_node(graph_name.as_ref()) - .map_err(to_eval_error)? - { + if let Some(graph_name) = self.encode_named_node_for_deletion(graph_name)? { if self .read .contains_encoded_named_graph(graph_name) @@ -305,11 +309,7 @@ where fn eval_drop(&mut self, graph: &GraphTarget, silent: bool) -> Result<(), EvaluationError> { match graph { GraphTarget::NamedNode(graph_name) => { - if let Some(graph_name) = self - .read - .get_encoded_named_node(graph_name.as_ref()) - .map_err(to_eval_error)? - { + if let Some(graph_name) = self.encode_named_node_for_deletion(graph_name)? { if self .read .contains_encoded_named_graph(graph_name) @@ -350,34 +350,36 @@ where fn encode_quad_for_insertion( &mut self, quad: &Quad, - bnodes: &mut HashMap, + bnodes: &mut HashMap, ) -> Result, EvaluationError> { Ok(Some(EncodedQuad { subject: match &quad.subject { NamedOrBlankNode::NamedNode(subject) => { - self.write.encode_named_node(subject.as_ref()) + self.encode_named_node_for_insertion(subject)? } NamedOrBlankNode::BlankNode(subject) => self .write - .encode_blank_node(bnodes.entry(subject.clone()).or_default().as_ref()), - } - .map_err(to_eval_error)?, + .encode_blank_node(bnodes.entry(subject.clone()).or_default().as_ref()) + .map_err(to_eval_error)?, + }, predicate: self .write - .encode_named_node(quad.predicate.as_ref()) + .encode_named_node(NamedNodeRef::new_unchecked(&quad.predicate.iri)) .map_err(to_eval_error)?, object: match &quad.object { - Term::NamedNode(object) => self.write.encode_named_node(object.as_ref()), + Term::NamedNode(object) => self.encode_named_node_for_insertion(object)?, Term::BlankNode(object) => self .write - .encode_blank_node(bnodes.entry(object.clone()).or_default().as_ref()), - Term::Literal(object) => self.write.encode_literal(object.as_ref()), - } - .map_err(to_eval_error)?, - graph_name: self - .write - .encode_graph_name(quad.graph_name.as_ref()) - .map_err(to_eval_error)?, + .encode_blank_node(bnodes.entry(object.clone()).or_default().as_ref()) + .map_err(to_eval_error)?, + Term::Literal(object) => self.encode_literal_for_insertion(object)?, + }, + graph_name: match &quad.graph_name { + GraphName::NamedNode(graph_name) => { + self.encode_named_node_for_insertion(graph_name)? + } + GraphName::DefaultGraph => EncodedTerm::DefaultGraph, + }, })) } @@ -386,35 +388,41 @@ where quad: &QuadPattern, variables: &[Variable], values: &[Option], - bnodes: &mut HashMap, + bnodes: &mut HashMap, ) -> Result, EvaluationError> { Ok(Some(EncodedQuad { - subject: if let Some(subject) = - self.encode_term_for_insertion(&quad.subject, variables, values, bnodes, |t| { - t.is_named_node() || t.is_blank_node() - })? { + subject: if let Some(subject) = self.encode_term_or_var_for_insertion( + &quad.subject, + variables, + values, + bnodes, + |t| t.is_named_node() || t.is_blank_node(), + )? { subject } else { return Ok(None); }, predicate: if let Some(predicate) = - self.encode_named_node_for_insertion(&quad.predicate, variables, values)? + self.encode_named_node_or_var_for_insertion(&quad.predicate, variables, values)? { predicate } else { return Ok(None); }, - object: if let Some(object) = - self.encode_term_for_insertion(&quad.object, variables, values, bnodes, |t| { - !t.is_default_graph() - })? { + object: if let Some(object) = self.encode_term_or_var_for_insertion( + &quad.object, + variables, + values, + bnodes, + |t| !t.is_default_graph(), + )? { object } else { return Ok(None); }, graph_name: if let Some(graph_name) = &quad.graph_name { if let Some(graph_name) = - self.encode_named_node_for_insertion(graph_name, variables, values)? + self.encode_named_node_or_var_for_insertion(graph_name, variables, values)? { graph_name } else { @@ -426,24 +434,23 @@ where })) } - fn encode_term_for_insertion( + fn encode_term_or_var_for_insertion( &mut self, term: &TermOrVariable, variables: &[Variable], values: &[Option], - bnodes: &mut HashMap, + bnodes: &mut HashMap, validate: impl FnOnce(&EncodedTerm) -> bool, ) -> Result, EvaluationError> { Ok(match term { - TermOrVariable::Term(term) => Some( - self.write - .encode_term(if let Term::BlankNode(bnode) = term { - bnodes.entry(bnode.clone()).or_default().as_ref().into() - } else { - term.as_ref() - }) + TermOrVariable::Term(term) => Some(match term { + Term::NamedNode(term) => self.encode_named_node_for_insertion(term)?, + Term::BlankNode(bnode) => self + .write + .encode_blank_node(bnodes.entry(bnode.clone()).or_default().as_ref()) .map_err(to_eval_error)?, - ), + Term::Literal(term) => self.encode_literal_for_insertion(term)?, + }), TermOrVariable::Variable(v) => { if let Some(Some(term)) = variables .iter() @@ -462,18 +469,16 @@ where }) } - fn encode_named_node_for_insertion( + fn encode_named_node_or_var_for_insertion( &mut self, term: &NamedNodeOrVariable, variables: &[Variable], values: &[Option], ) -> Result, EvaluationError> { Ok(match term { - NamedNodeOrVariable::NamedNode(term) => Some( - self.write - .encode_named_node(term.into()) - .map_err(to_eval_error)?, - ), + NamedNodeOrVariable::NamedNode(term) => { + Some(self.encode_named_node_for_insertion(term)?) + } NamedNodeOrVariable::Variable(v) => { if let Some(Some(term)) = variables .iter() @@ -492,43 +497,77 @@ where }) } + fn encode_named_node_for_insertion( + &mut self, + term: &NamedNode, + ) -> Result { + self.write + .encode_named_node(NamedNodeRef::new_unchecked(&term.iri)) + .map_err(to_eval_error) + } + + fn encode_literal_for_insertion( + &mut self, + term: &Literal, + ) -> Result { + self.write + .encode_literal(match term { + Literal::Simple { value } => LiteralRef::new_simple_literal(value), + Literal::LanguageTaggedString { value, language } => { + LiteralRef::new_language_tagged_literal_unchecked(value, language) + } + Literal::Typed { value, datatype } => { + LiteralRef::new_typed_literal(value, NamedNodeRef::new_unchecked(&datatype.iri)) + } + }) + .map_err(to_eval_error) + } + fn encode_quad_for_deletion( &mut self, quad: &Quad, ) -> Result, EvaluationError> { Ok(Some(EncodedQuad { - subject: if let Some(subject) = self - .read - .get_encoded_named_or_blank_node(quad.subject.as_ref()) - .map_err(to_eval_error)? - { + subject: if let Some(subject) = match &quad.subject { + NamedOrBlankNode::NamedNode(subject) => { + self.encode_named_node_for_deletion(subject)? + } + NamedOrBlankNode::BlankNode(_) => { + return Err(EvaluationError::msg( + "Blank nodes are not allowed in DELETE DATA", + )) + } + } { subject } else { return Ok(None); }, - predicate: if let Some(predicate) = self - .read - .get_encoded_named_node(quad.predicate.as_ref()) - .map_err(to_eval_error)? + predicate: if let Some(predicate) = + self.encode_named_node_for_deletion(&quad.predicate)? { predicate } else { return Ok(None); }, - object: if let Some(object) = self - .read - .get_encoded_term(quad.object.as_ref()) - .map_err(to_eval_error)? - { + object: if let Some(object) = match &quad.object { + Term::NamedNode(object) => self.encode_named_node_for_deletion(object)?, + Term::BlankNode(_) => { + return Err(EvaluationError::msg( + "Blank nodes are not allowed in DELETE DATA", + )) + } + Term::Literal(object) => self.encode_literal_for_deletion(object)?, + } { object } else { return Ok(None); }, - graph_name: if let Some(graph_name) = self - .read - .get_encoded_graph_name(quad.graph_name.as_ref()) - .map_err(to_eval_error)? - { + graph_name: if let Some(graph_name) = match &quad.graph_name { + GraphName::NamedNode(graph_name) => { + self.encode_named_node_for_deletion(graph_name)? + } + GraphName::DefaultGraph => Some(EncodedTerm::DefaultGraph), + } { graph_name } else { return Ok(None); @@ -544,21 +583,21 @@ where ) -> Result, EvaluationError> { Ok(Some(EncodedQuad { subject: if let Some(subject) = - self.encode_term_for_deletion(&quad.subject, variables, values)? + self.encode_term_or_var_for_deletion(&quad.subject, variables, values)? { subject } else { return Ok(None); }, predicate: if let Some(predicate) = - self.encode_named_node_for_deletion(&quad.predicate, variables, values)? + self.encode_named_node_or_var_for_deletion(&quad.predicate, variables, values)? { predicate } else { return Ok(None); }, object: if let Some(object) = - self.encode_term_for_deletion(&quad.object, variables, values)? + self.encode_term_or_var_for_deletion(&quad.object, variables, values)? { object } else { @@ -566,7 +605,7 @@ where }, graph_name: if let Some(graph_name) = &quad.graph_name { if let Some(graph_name) = - self.encode_named_node_for_deletion(graph_name, variables, values)? + self.encode_named_node_or_var_for_deletion(graph_name, variables, values)? { graph_name } else { @@ -578,24 +617,20 @@ where })) } - fn encode_term_for_deletion( + fn encode_term_or_var_for_deletion( &self, term: &TermOrVariable, variables: &[Variable], values: &[Option], ) -> Result, EvaluationError> { match term { - TermOrVariable::Term(term) => { - if term.is_blank_node() { - Err(EvaluationError::msg( - "Blank node are not allowed in deletion patterns", - )) - } else { - self.read - .get_encoded_term(term.into()) - .map_err(to_eval_error) - } - } + TermOrVariable::Term(term) => match term { + Term::NamedNode(term) => self.encode_named_node_for_deletion(term), + Term::BlankNode(_) => Err(EvaluationError::msg( + "Blank nodes are not allowed in DELETE patterns", + )), + Term::Literal(term) => self.encode_literal_for_deletion(term), + }, TermOrVariable::Variable(v) => Ok( if let Some(Some(term)) = variables .iter() @@ -610,17 +645,14 @@ where } } - fn encode_named_node_for_deletion( + fn encode_named_node_or_var_for_deletion( &self, term: &NamedNodeOrVariable, variables: &[Variable], values: &[Option], ) -> Result, EvaluationError> { Ok(match term { - NamedNodeOrVariable::NamedNode(term) => self - .read - .get_encoded_named_node(term.into()) - .map_err(to_eval_error)?, + NamedNodeOrVariable::NamedNode(term) => self.encode_named_node_for_deletion(term)?, NamedNodeOrVariable::Variable(v) => { if let Some(Some(term)) = variables .iter() @@ -638,6 +670,32 @@ where } }) } + + fn encode_named_node_for_deletion( + &self, + term: &NamedNode, + ) -> Result, EvaluationError> { + self.read + .get_encoded_named_node(NamedNodeRef::new_unchecked(&term.iri)) + .map_err(to_eval_error) + } + + fn encode_literal_for_deletion( + &self, + term: &Literal, + ) -> Result, EvaluationError> { + self.read + .get_encoded_literal(match term { + Literal::Simple { value } => LiteralRef::new_simple_literal(value), + Literal::LanguageTaggedString { value, language } => { + LiteralRef::new_language_tagged_literal_unchecked(value, language) + } + Literal::Typed { value, datatype } => { + LiteralRef::new_typed_literal(value, NamedNodeRef::new_unchecked(&datatype.iri)) + } + }) + .map_err(to_eval_error) + } } fn to_eval_error(e: impl Into) -> EvaluationError { diff --git a/spargebra/Cargo.toml b/spargebra/Cargo.toml new file mode 100644 index 00000000..5935ff2c --- /dev/null +++ b/spargebra/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "spargebra" +version = "0.1.0" +authors = ["Tpt "] +license = "MIT OR Apache-2.0" +readme = "README.md" +keywords = ["SPARQL"] +repository = "https://github.com/oxigraph/oxigraph/tree/master/spargebra" +homepage = "https://oxigraph.org/" +description = """ +A SPARQL parser +""" +edition = "2018" + +[dependencies] +peg = "0.7" +rand = "0.8" +oxiri = "0.1" +oxilangtag = "0.1" diff --git a/spargebra/README.md b/spargebra/README.md new file mode 100644 index 00000000..519209c7 --- /dev/null +++ b/spargebra/README.md @@ -0,0 +1,40 @@ +Spargebra +======== + +[![Latest Version](https://img.shields.io/crates/v/spargebra.svg)](https://crates.io/crates/spargebra) +[![Released API docs](https://docs.rs/spargebra/badge.svg)](https://docs.rs/spargebra) +[![Crates.io downloads](https://img.shields.io/crates/d/spargebra)](https://crates.io/crates/spargebra) +[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) +[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) + +Spargebra is a [SPARQL](https://www.w3.org/TR/sparql11-overview/) parser. + +It supports both [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/) and [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/). + +This crate is intended to be a building piece for SPARQL implementations in Rust like [Oxigraph](https://oxigraph.org). + +Usage example: + +```rust +use spargebra::Query; + +let query_str = "SELECT ?s ?p ?o WHERE { ?s ?p ?o . }"; +let mut query = Query::parse(query_str, None)?; +assert_eq!(query.to_string(), query_str); +``` + +## License + +This project is licensed under either of + +* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or + http://www.apache.org/licenses/LICENSE-2.0) +* MIT license ([LICENSE-MIT](../LICENSE-MIT) or + http://opensource.org/licenses/MIT) + +at your option. + + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Futures by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. diff --git a/spargebra/src/algebra.rs b/spargebra/src/algebra.rs new file mode 100644 index 00000000..4a572c64 --- /dev/null +++ b/spargebra/src/algebra.rs @@ -0,0 +1,1331 @@ +//! [SPARQL 1.1 Query Algebra](https://www.w3.org/TR/sparql11-query/#sparqlQuery) representation + +use crate::term::print_quoted_str; +use crate::term::*; +use std::collections::BTreeSet; +use std::fmt; + +/// A [triple pattern](https://www.w3.org/TR/sparql11-query/#defn_TriplePattern) +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub struct TriplePattern { + pub subject: TermOrVariable, + pub predicate: NamedNodeOrVariable, + pub object: TermOrVariable, +} + +impl TriplePattern { + pub(crate) fn new( + subject: impl Into, + predicate: impl Into, + object: impl Into, + ) -> Self { + Self { + subject: subject.into(), + predicate: predicate.into(), + object: object.into(), + } + } +} + +impl fmt::Display for TriplePattern { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "(triple {} {} {})", + self.subject, self.predicate, self.object + ) + } +} + +pub(crate) struct SparqlTriplePattern<'a>(pub(crate) &'a TriplePattern); + +impl<'a> fmt::Display for SparqlTriplePattern<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{} {} {} .", + self.0.subject, self.0.predicate, self.0.object + ) + } +} + +/// A [triple pattern](https://www.w3.org/TR/sparql11-query/#defn_TriplePattern) in a specific graph +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub struct QuadPattern { + pub subject: TermOrVariable, + pub predicate: NamedNodeOrVariable, + pub object: TermOrVariable, + pub graph_name: Option, +} + +impl QuadPattern { + pub(crate) fn new( + subject: impl Into, + predicate: impl Into, + object: impl Into, + graph_name: Option, + ) -> Self { + Self { + subject: subject.into(), + predicate: predicate.into(), + object: object.into(), + graph_name, + } + } +} + +impl fmt::Display for QuadPattern { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(graph_name) = &self.graph_name { + write!( + f, + "(graph {} (triple {} {} {}))", + graph_name, self.subject, self.predicate, self.object + ) + } else { + write!( + f, + "(triple {} {} {})", + self.subject, self.predicate, self.object + ) + } + } +} + +pub(crate) struct SparqlQuadPattern<'a>(pub(crate) &'a QuadPattern); + +impl<'a> fmt::Display for SparqlQuadPattern<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(graph_name) = &self.0.graph_name { + write!( + f, + "GRAPH {} {{ {} {} {} }}", + graph_name, self.0.subject, self.0.predicate, self.0.object + ) + } else { + write!( + f, + "{} {} {} .", + self.0.subject, self.0.predicate, self.0.object + ) + } + } +} + +/// A [property path expression](https://www.w3.org/TR/sparql11-query/#defn_PropertyPathExpr) +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum PropertyPathExpression { + NamedNode(NamedNode), + Reverse(Box), + Sequence(Box, Box), + Alternative(Box, Box), + ZeroOrMore(Box), + OneOrMore(Box), + ZeroOrOne(Box), + NegatedPropertySet(Vec), +} + +impl fmt::Display for PropertyPathExpression { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + PropertyPathExpression::NamedNode(p) => p.fmt(f), + PropertyPathExpression::Reverse(p) => write!(f, "(reverse {})", p), + PropertyPathExpression::Alternative(a, b) => write!(f, "(alt {} {})", a, b), + PropertyPathExpression::Sequence(a, b) => write!(f, "(seq {} {})", a, b), + PropertyPathExpression::ZeroOrMore(p) => write!(f, "(path* {})", p), + PropertyPathExpression::OneOrMore(p) => write!(f, "(path+ {})", p), + PropertyPathExpression::ZeroOrOne(p) => write!(f, "(path? {})", p), + PropertyPathExpression::NegatedPropertySet(p) => { + write!(f, "(notoneof ")?; + for p in p { + write!(f, " {}", p)?; + } + write!(f, ")") + } + } + } +} + +struct SparqlPropertyPath<'a>(&'a PropertyPathExpression); + +impl<'a> fmt::Display for SparqlPropertyPath<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.0 { + PropertyPathExpression::NamedNode(p) => p.fmt(f), + PropertyPathExpression::Reverse(p) => write!(f, "^{}", SparqlPropertyPath(&*p)), + PropertyPathExpression::Sequence(a, b) => write!( + f, + "({} / {})", + SparqlPropertyPath(&*a), + SparqlPropertyPath(&*b) + ), + PropertyPathExpression::Alternative(a, b) => write!( + f, + "({} | {})", + SparqlPropertyPath(&*a), + SparqlPropertyPath(&*b) + ), + PropertyPathExpression::ZeroOrMore(p) => write!(f, "{}*", SparqlPropertyPath(&*p)), + PropertyPathExpression::OneOrMore(p) => write!(f, "{}+", SparqlPropertyPath(&*p)), + PropertyPathExpression::ZeroOrOne(p) => write!(f, "{}?", SparqlPropertyPath(&*p)), + PropertyPathExpression::NegatedPropertySet(p) => write!( + f, + "!({})", + p.iter() + .map(|v| v.to_string()) + .collect::>() + .join(" | ") + ), + } + } +} + +impl From for PropertyPathExpression { + fn from(p: NamedNode) -> Self { + PropertyPathExpression::NamedNode(p) + } +} + +/// An [expression](https://www.w3.org/TR/sparql11-query/#expressions) +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum Expression { + NamedNode(NamedNode), + Literal(Literal), + Variable(Variable), + /// [Logical-or](https://www.w3.org/TR/sparql11-query/#func-logical-or) + Or(Box, Box), + /// [Logical-and](https://www.w3.org/TR/sparql11-query/#func-logical-and) + And(Box, Box), + /// [RDFterm-equal](https://www.w3.org/TR/sparql11-query/#func-RDFterm-equal) and all the XSD equalities + Equal(Box, Box), + /// [sameTerm](https://www.w3.org/TR/sparql11-query/#func-sameTerm) + SameTerm(Box, Box), + /// [op:numeric-greater-than](https://www.w3.org/TR/xpath-functions/#func-numeric-greater-than) and other XSD greater than operators + Greater(Box, Box), + GreaterOrEqual(Box, Box), + /// [op:numeric-less-than](https://www.w3.org/TR/xpath-functions/#func-numeric-less-than) and other XSD greater than operators + Less(Box, Box), + LessOrEqual(Box, Box), + /// [IN](https://www.w3.org/TR/sparql11-query/#func-in) + In(Box, Vec), + /// [op:numeric-add](https://www.w3.org/TR/xpath-functions/#func-numeric-add) and other XSD additions + Add(Box, Box), + /// [op:numeric-subtract](https://www.w3.org/TR/xpath-functions/#func-numeric-subtract) and other XSD subtractions + Subtract(Box, Box), + /// [op:numeric-multiply](https://www.w3.org/TR/xpath-functions/#func-numeric-multiply) and other XSD multiplications + Multiply(Box, Box), + /// [op:numeric-divide](https://www.w3.org/TR/xpath-functions/#func-numeric-divide) and other XSD divides + Divide(Box, Box), + /// [op:numeric-unary-plus](https://www.w3.org/TR/xpath-functions/#func-numeric-unary-plus) and other XSD unary plus + UnaryPlus(Box), + /// [op:numeric-unary-minus](https://www.w3.org/TR/xpath-functions/#func-numeric-unary-minus) and other XSD unary minus + UnaryMinus(Box), + /// [fn:not](https://www.w3.org/TR/xpath-functions/#func-not) + Not(Box), + /// [EXISTS](https://www.w3.org/TR/sparql11-query/#func-filter-exists) + Exists(Box), + /// [BOUND](https://www.w3.org/TR/sparql11-query/#func-bound) + Bound(Variable), + /// [IF](https://www.w3.org/TR/sparql11-query/#func-if) + If(Box, Box, Box), + /// [COALESCE](https://www.w3.org/TR/sparql11-query/#func-coalesce) + Coalesce(Vec), + /// A regular function call + FunctionCall(Function, Vec), +} + +impl fmt::Display for Expression { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Expression::NamedNode(node) => node.fmt(f), + Expression::Literal(l) => l.fmt(f), + Expression::Variable(var) => var.fmt(f), + Expression::Or(a, b) => write!(f, "(|| {} {})", a, b), + Expression::And(a, b) => write!(f, "(&& {} {})", a, b), + Expression::Equal(a, b) => write!(f, "(= {} {})", a, b), + Expression::SameTerm(a, b) => write!(f, "(sameTerm {} {})", a, b), + Expression::Greater(a, b) => write!(f, "(> {} {})", a, b), + Expression::GreaterOrEqual(a, b) => write!(f, "(>= {} {})", a, b), + Expression::Less(a, b) => write!(f, "(< {} {})", a, b), + Expression::LessOrEqual(a, b) => write!(f, "(<= {} {})", a, b), + Expression::In(a, b) => { + write!(f, "(in {}", a)?; + for p in b { + write!(f, " {}", p)?; + } + write!(f, ")") + } + Expression::Add(a, b) => write!(f, "(+ {} {})", a, b), + Expression::Subtract(a, b) => write!(f, "(- {} {})", a, b), + Expression::Multiply(a, b) => write!(f, "(* {} {})", a, b), + Expression::Divide(a, b) => write!(f, "(/ {} {})", a, b), + Expression::UnaryPlus(e) => write!(f, "(+ {})", e), + Expression::UnaryMinus(e) => write!(f, "(- {})", e), + Expression::Not(e) => write!(f, "(! {})", e), + Expression::FunctionCall(function, parameters) => { + write!(f, "({}", function)?; + for p in parameters { + write!(f, " {}", p)?; + } + write!(f, ")") + } + Expression::Exists(p) => write!(f, "(exists {})", p), + Expression::Bound(v) => write!(f, "(bound {})", v), + Expression::If(a, b, c) => write!(f, "(if {} {} {})", a, b, c), + Expression::Coalesce(parameters) => { + write!(f, "(coalesce")?; + for p in parameters { + write!(f, " {}", p)?; + } + write!(f, ")") + } + } + } +} + +impl From for Expression { + fn from(p: NamedNode) -> Self { + Expression::NamedNode(p) + } +} + +impl From for Expression { + fn from(p: Literal) -> Self { + Expression::Literal(p) + } +} + +impl From for Expression { + fn from(v: Variable) -> Self { + Expression::Variable(v) + } +} + +struct SparqlExpression<'a>(&'a Expression); + +impl<'a> fmt::Display for SparqlExpression<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.0 { + Expression::NamedNode(node) => node.fmt(f), + Expression::Literal(l) => l.fmt(f), + Expression::Variable(var) => var.fmt(f), + Expression::Or(a, b) => write!( + f, + "({} || {})", + SparqlExpression(&*a), + SparqlExpression(&*b) + ), + Expression::And(a, b) => write!( + f, + "({} && {})", + SparqlExpression(&*a), + SparqlExpression(&*b) + ), + Expression::Equal(a, b) => { + write!(f, "({} = {})", SparqlExpression(&*a), SparqlExpression(&*b)) + } + Expression::SameTerm(a, b) => { + write!( + f, + "sameTerm({}, {})", + SparqlExpression(&*a), + SparqlExpression(&*b) + ) + } + Expression::Greater(a, b) => { + write!(f, "({} > {})", SparqlExpression(&*a), SparqlExpression(&*b)) + } + Expression::GreaterOrEqual(a, b) => write!( + f, + "({} >= {})", + SparqlExpression(&*a), + SparqlExpression(&*b) + ), + Expression::Less(a, b) => { + write!(f, "({} < {})", SparqlExpression(&*a), SparqlExpression(&*b)) + } + Expression::LessOrEqual(a, b) => write!( + f, + "({} <= {})", + SparqlExpression(&*a), + SparqlExpression(&*b) + ), + Expression::In(a, b) => { + write!(f, "({} IN ", SparqlExpression(&*a))?; + write_arg_list(b.iter().map(|p| SparqlExpression(&*p)), f)?; + write!(f, ")") + } + Expression::Add(a, b) => { + write!(f, "{} + {}", SparqlExpression(&*a), SparqlExpression(&*b)) + } + Expression::Subtract(a, b) => { + write!(f, "{} - {}", SparqlExpression(&*a), SparqlExpression(&*b)) + } + Expression::Multiply(a, b) => { + write!(f, "{} * {}", SparqlExpression(&*a), SparqlExpression(&*b)) + } + Expression::Divide(a, b) => { + write!(f, "{} / {}", SparqlExpression(&*a), SparqlExpression(&*b)) + } + Expression::UnaryPlus(e) => write!(f, "+{}", SparqlExpression(&*e)), + Expression::UnaryMinus(e) => write!(f, "-{}", SparqlExpression(&*e)), + Expression::Not(e) => match e.as_ref() { + Expression::Exists(p) => write!(f, "NOT EXISTS {{ {} }}", SparqlGraphPattern(&*p)), + e => write!(f, "!{}", SparqlExpression(&*e)), + }, + Expression::FunctionCall(function, parameters) => { + write!(f, "{}", function)?; + write_arg_list(parameters.iter().map(|p| SparqlExpression(&*p)), f) + } + Expression::Bound(v) => write!(f, "BOUND({})", v), + Expression::Exists(p) => write!(f, "EXISTS {{ {} }}", SparqlGraphPattern(&*p)), + Expression::If(a, b, c) => write!( + f, + "IF({}, {}, {})", + SparqlExpression(&*a), + SparqlExpression(&*b), + SparqlExpression(&*c) + ), + Expression::Coalesce(parameters) => { + write!(f, "COALESCE")?; + write_arg_list(parameters.iter().map(|p| SparqlExpression(&*p)), f) + } + } + } +} + +fn write_arg_list( + params: impl IntoIterator, + f: &mut fmt::Formatter<'_>, +) -> fmt::Result { + write!(f, "(")?; + let mut cont = false; + for p in params { + if cont { + write!(f, ", ")?; + } + p.fmt(f)?; + cont = true; + } + write!(f, ")") +} + +/// A function name +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum Function { + Str, + Lang, + LangMatches, + Datatype, + Iri, + BNode, + Rand, + Abs, + Ceil, + Floor, + Round, + Concat, + SubStr, + StrLen, + Replace, + UCase, + LCase, + EncodeForUri, + Contains, + StrStarts, + StrEnds, + StrBefore, + StrAfter, + Year, + Month, + Day, + Hours, + Minutes, + Seconds, + Timezone, + Tz, + Now, + Uuid, + StrUuid, + Md5, + Sha1, + Sha256, + Sha384, + Sha512, + StrLang, + StrDt, + IsIri, + IsBlank, + IsLiteral, + IsNumeric, + Regex, + Custom(NamedNode), +} + +impl fmt::Display for Function { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Function::Str => write!(f, "STR"), + Function::Lang => write!(f, "LANG"), + Function::LangMatches => write!(f, "LANGMATCHES"), + Function::Datatype => write!(f, "DATATYPE"), + Function::Iri => write!(f, "IRI"), + Function::BNode => write!(f, "BNODE"), + Function::Rand => write!(f, "RAND"), + Function::Abs => write!(f, "ABS"), + Function::Ceil => write!(f, "CEIL"), + Function::Floor => write!(f, "FLOOR"), + Function::Round => write!(f, "ROUND"), + Function::Concat => write!(f, "CONCAT"), + Function::SubStr => write!(f, "SUBSTR"), + Function::StrLen => write!(f, "STRLEN"), + Function::Replace => write!(f, "REPLACE"), + Function::UCase => write!(f, "UCASE"), + Function::LCase => write!(f, "LCASE"), + Function::EncodeForUri => write!(f, "ENCODE_FOR_URI"), + Function::Contains => write!(f, "CONTAINS"), + Function::StrStarts => write!(f, "STRSTATS"), + Function::StrEnds => write!(f, "STRENDS"), + Function::StrBefore => write!(f, "STRBEFORE"), + Function::StrAfter => write!(f, "STRAFTER"), + Function::Year => write!(f, "YEAR"), + Function::Month => write!(f, "MONTH"), + Function::Day => write!(f, "DAY"), + Function::Hours => write!(f, "HOURS"), + Function::Minutes => write!(f, "MINUTES"), + Function::Seconds => write!(f, "SECONDS"), + Function::Timezone => write!(f, "TIMEZONE"), + Function::Tz => write!(f, "TZ"), + Function::Now => write!(f, "NOW"), + Function::Uuid => write!(f, "UUID"), + Function::StrUuid => write!(f, "STRUUID"), + Function::Md5 => write!(f, "MD5"), + Function::Sha1 => write!(f, "SHA1"), + Function::Sha256 => write!(f, "SHA256"), + Function::Sha384 => write!(f, "SHA384"), + Function::Sha512 => write!(f, "SHA512"), + Function::StrLang => write!(f, "STRLANG"), + Function::StrDt => write!(f, "STRDT"), + Function::IsIri => write!(f, "isIRI"), + Function::IsBlank => write!(f, "isBLANK"), + Function::IsLiteral => write!(f, "isLITERAL"), + Function::IsNumeric => write!(f, "isNUMERIC"), + Function::Regex => write!(f, "REGEX"), + Function::Custom(iri) => iri.fmt(f), + } + } +} + +/// A SPARQL query [graph pattern](https://www.w3.org/TR/sparql11-query/#sparqlQuery) +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum GraphPattern { + /// A [basic graph pattern](https://www.w3.org/TR/sparql11-query/#defn_BasicGraphPattern) + Bgp(Vec), + /// A [property path pattern](https://www.w3.org/TR/sparql11-query/#defn_evalPP_predicate) + Path { + subject: TermOrVariable, + path: PropertyPathExpression, + object: TermOrVariable, + }, + /// [Join](https://www.w3.org/TR/sparql11-query/#defn_algJoin) + Join { + left: Box, + right: Box, + }, + /// [LeftJoin](https://www.w3.org/TR/sparql11-query/#defn_algLeftJoin) + LeftJoin { + left: Box, + right: Box, + expr: Option, + }, + /// [Filter](https://www.w3.org/TR/sparql11-query/#defn_algFilter) + Filter { + expr: Expression, + inner: Box, + }, + /// [Union](https://www.w3.org/TR/sparql11-query/#defn_algUnion) + Union { + left: Box, + right: Box, + }, + Graph { + graph_name: NamedNodeOrVariable, + inner: Box, + }, + /// [Extend](https://www.w3.org/TR/sparql11-query/#defn_extend) + Extend { + inner: Box, + var: Variable, + expr: Expression, + }, + /// [Minus](https://www.w3.org/TR/sparql11-query/#defn_algMinus) + Minus { + left: Box, + right: Box, + }, + /// A table used to provide inline values + Table { + variables: Vec, + rows: Vec>>, + }, + /// [OrderBy](https://www.w3.org/TR/sparql11-query/#defn_algOrdered) + OrderBy { + inner: Box, + condition: Vec, + }, + /// [Project](https://www.w3.org/TR/sparql11-query/#defn_algProjection) + Project { + inner: Box, + projection: Vec, + }, + /// [Distinct](https://www.w3.org/TR/sparql11-query/#defn_algDistinct) + Distinct { inner: Box }, + /// [Reduced](https://www.w3.org/TR/sparql11-query/#defn_algReduced) + Reduced { inner: Box }, + /// [Slice](https://www.w3.org/TR/sparql11-query/#defn_algSlice) + Slice { + inner: Box, + start: usize, + length: Option, + }, + /// [Group](https://www.w3.org/TR/sparql11-federated-query/#aggregateAlgebra) + Group { + inner: Box, + by: Vec, + aggregates: Vec<(Variable, AggregationFunction)>, + }, + /// [Service](https://www.w3.org/TR/sparql11-federated-query/#defn_evalService) + Service { + name: NamedNodeOrVariable, + pattern: Box, + silent: bool, + }, +} + +impl fmt::Display for GraphPattern { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + GraphPattern::Bgp(p) => { + write!(f, "(bgp")?; + for pattern in p { + write!(f, " {}", pattern)?; + } + write!(f, ")") + } + GraphPattern::Path { + subject, + path, + object, + } => write!(f, "(path {} {} {})", subject, path, object), + GraphPattern::Join { left, right } => write!(f, "(join {} {})", left, right), + GraphPattern::LeftJoin { left, right, expr } => { + if let Some(expr) = expr { + write!(f, "(leftjoin {} {} {})", left, right, expr) + } else { + write!(f, "(leftjoin {} {})", left, right) + } + } + GraphPattern::Filter { expr, inner } => write!(f, "(filter {} {})", expr, inner), + GraphPattern::Union { left, right } => write!(f, "(union {} {})", left, right), + GraphPattern::Graph { graph_name, inner } => { + write!(f, "(graph {} {})", graph_name, inner) + } + GraphPattern::Extend { inner, var, expr } => { + write!(f, "(extend ({} {}) {})", var, expr, inner) + } + GraphPattern::Minus { left, right } => write!(f, "(minus {} {})", left, right), + GraphPattern::Service { + name, + pattern, + silent, + } => { + if *silent { + write!(f, "(service silent {} {})", name, pattern) + } else { + write!(f, "(service {} {})", name, pattern) + } + } + GraphPattern::Group { + inner, + by, + aggregates, + } => write!( + f, + "(group ({}) ({}) {})", + by.iter() + .map(|v| v.name.as_str()) + .collect::>() + .join(" "), + aggregates + .iter() + .map(|(a, v)| format!("({} {})", v, a)) + .collect::>() + .join(" "), + inner + ), + GraphPattern::Table { variables, rows } => { + write!(f, "(table (vars")?; + for var in variables { + write!(f, " {}", var)?; + } + write!(f, ")")?; + for row in rows { + write!(f, " (row")?; + for (value, var) in row.iter().zip(variables) { + if let Some(value) = value { + write!(f, " ({} {})", var, value)?; + } + } + write!(f, ")")?; + } + write!(f, ")") + } + GraphPattern::OrderBy { inner, condition } => write!( + f, + "(order ({}) {})", + condition + .iter() + .map(|c| c.to_string()) + .collect::>() + .join(" "), + inner + ), + GraphPattern::Project { inner, projection } => write!( + f, + "(project ({}) {})", + projection + .iter() + .map(|v| v.to_string()) + .collect::>() + .join(" "), + inner + ), + GraphPattern::Distinct { inner } => write!(f, "(distinct {})", inner), + GraphPattern::Reduced { inner } => write!(f, "(reduced {})", inner), + GraphPattern::Slice { + inner, + start, + length, + } => write!( + f, + "(slice {} {} {})", + start, + length + .map(|l| l.to_string()) + .unwrap_or_else(|| '_'.to_string()), + inner + ), + } + } +} + +impl Default for GraphPattern { + fn default() -> Self { + GraphPattern::Bgp(Vec::default()) + } +} + +impl GraphPattern { + pub fn visible_variables(&self) -> BTreeSet<&Variable> { + let mut vars = BTreeSet::default(); + self.add_visible_variables(&mut vars); + vars + } + + fn add_visible_variables<'a>(&'a self, vars: &mut BTreeSet<&'a Variable>) { + match self { + GraphPattern::Bgp(p) => { + for pattern in p { + if let TermOrVariable::Variable(s) = &pattern.subject { + vars.insert(s); + } + if let NamedNodeOrVariable::Variable(p) = &pattern.predicate { + vars.insert(p); + } + if let TermOrVariable::Variable(o) = &pattern.object { + vars.insert(o); + } + } + } + GraphPattern::Path { + subject, object, .. + } => { + if let TermOrVariable::Variable(s) = subject { + vars.insert(s); + } + if let TermOrVariable::Variable(o) = object { + vars.insert(o); + } + } + GraphPattern::Join { left, right } + | GraphPattern::LeftJoin { left, right, .. } + | GraphPattern::Union { left, right } => { + left.add_visible_variables(vars); + right.add_visible_variables(vars); + } + GraphPattern::Filter { inner, .. } => inner.add_visible_variables(vars), + GraphPattern::Graph { graph_name, inner } => { + if let NamedNodeOrVariable::Variable(ref g) = graph_name { + vars.insert(g); + } + inner.add_visible_variables(vars); + } + GraphPattern::Extend { inner, var, .. } => { + vars.insert(var); + inner.add_visible_variables(vars); + } + GraphPattern::Minus { left, .. } => left.add_visible_variables(vars), + GraphPattern::Service { pattern, .. } => pattern.add_visible_variables(vars), + GraphPattern::Group { by, aggregates, .. } => { + vars.extend(by); + for (v, _) in aggregates { + vars.insert(v); + } + } + GraphPattern::Table { variables, .. } => vars.extend(variables), + GraphPattern::Project { projection, .. } => vars.extend(projection.iter()), + GraphPattern::OrderBy { inner, .. } + | GraphPattern::Distinct { inner } + | GraphPattern::Reduced { inner } + | GraphPattern::Slice { inner, .. } => inner.add_visible_variables(vars), + } + } +} + +struct SparqlGraphPattern<'a>(&'a GraphPattern); + +impl<'a> fmt::Display for SparqlGraphPattern<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.0 { + GraphPattern::Bgp(p) => { + for pattern in p { + write!(f, "{}", SparqlTriplePattern(pattern))? + } + Ok(()) + } + GraphPattern::Path { + subject, + path, + object, + } => write!(f, "{} {} {} .", subject, SparqlPropertyPath(path), object), + GraphPattern::Join { left, right } => write!( + f, + "{} {}", + SparqlGraphPattern(&*left), + SparqlGraphPattern(&*right) + ), + GraphPattern::LeftJoin { left, right, expr } => { + if let Some(expr) = expr { + write!( + f, + "{} OPTIONAL {{ {} FILTER({}) }}", + SparqlGraphPattern(&*left), + SparqlGraphPattern(&*right), + SparqlExpression(expr) + ) + } else { + write!( + f, + "{} OPTIONAL {{ {} }}", + SparqlGraphPattern(&*left), + SparqlGraphPattern(&*right) + ) + } + } + GraphPattern::Filter { expr, inner } => write!( + f, + "{} FILTER({})", + SparqlGraphPattern(&*inner), + SparqlExpression(expr) + ), + GraphPattern::Union { left, right } => write!( + f, + "{{ {} }} UNION {{ {} }}", + SparqlGraphPattern(&*left), + SparqlGraphPattern(&*right), + ), + GraphPattern::Graph { graph_name, inner } => { + write!( + f, + "GRAPH {} {{ {} }}", + graph_name, + SparqlGraphPattern(&*inner) + ) + } + GraphPattern::Extend { inner, var, expr } => write!( + f, + "{} BIND({} AS {})", + SparqlGraphPattern(&*inner), + SparqlExpression(expr), + var + ), + GraphPattern::Minus { left, right } => write!( + f, + "{} MINUS {{ {} }}", + SparqlGraphPattern(&*left), + SparqlGraphPattern(&*right) + ), + GraphPattern::Service { + name, + pattern, + silent, + } => { + if *silent { + write!( + f, + "SERVICE SILENT {} {{ {} }}", + name, + SparqlGraphPattern(&*pattern) + ) + } else { + write!( + f, + "SERVICE {} {{ {} }}", + name, + SparqlGraphPattern(&*pattern) + ) + } + } + GraphPattern::Table { variables, rows } => { + write!(f, "VALUES ( ")?; + for var in variables { + write!(f, "{} ", var)?; + } + write!(f, ") {{ ")?; + for row in rows { + write!(f, "( ")?; + for val in row { + match val { + Some(val) => write!(f, "{} ", val), + None => write!(f, "UNDEF "), + }?; + } + write!(f, ") ")?; + } + write!(f, " }}") + } + GraphPattern::Group { + inner, + by, + aggregates, + } => write!( + f, + "{{ SELECT {} WHERE {{ {} }} GROUP BY {} }}", + aggregates + .iter() + .map(|(v, a)| format!("({} AS {})", SparqlAggregationFunction(a), v)) + .chain(by.iter().map(|e| e.to_string())) + .collect::>() + .join(" "), + SparqlGraphPattern(&*inner), + by.iter() + .map(|e| format!("({})", e.to_string())) + .collect::>() + .join(" ") + ), + p => write!( + f, + "{{ {} }}", + SparqlGraphRootPattern { + pattern: p, + dataset: None + } + ), + } + } +} + +pub(crate) struct SparqlGraphRootPattern<'a> { + pub(crate) pattern: &'a GraphPattern, + pub(crate) dataset: Option<&'a QueryDataset>, +} + +impl<'a> fmt::Display for SparqlGraphRootPattern<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut distinct = false; + let mut reduced = false; + let mut order = None; + let mut start = 0; + let mut length = None; + let mut project: &[Variable] = &[]; + + let mut child = self.pattern; + loop { + match child { + GraphPattern::OrderBy { inner, condition } => { + order = Some(condition); + child = &*inner; + } + GraphPattern::Project { inner, projection } if project.is_empty() => { + project = projection; + child = &*inner; + } + GraphPattern::Distinct { inner } => { + distinct = true; + child = &*inner; + } + GraphPattern::Reduced { inner } => { + reduced = true; + child = &*inner; + } + GraphPattern::Slice { + inner, + start: s, + length: l, + } => { + start = *s; + length = *l; + child = inner; + } + p => { + write!(f, "SELECT ")?; + if distinct { + write!(f, "DISTINCT ")?; + } + if reduced { + write!(f, "REDUCED ")?; + } + build_sparql_select_arguments(project).fmt(f)?; + if let Some(dataset) = self.dataset { + dataset.fmt(f)?; + } + write!(f, " WHERE {{ {} }}", SparqlGraphPattern(p))?; + if let Some(order) = order { + write!( + f, + " ORDER BY {}", + order + .iter() + .map(|c| SparqlOrderComparator(c).to_string()) + .collect::>() + .join(" ") + )?; + } + if start > 0 { + write!(f, " OFFSET {}", start)?; + } + if let Some(length) = length { + write!(f, " LIMIT {}", length)?; + } + return Ok(()); + } + } + } + } +} + +fn build_sparql_select_arguments(args: &[Variable]) -> String { + if args.is_empty() { + "*".to_owned() + } else { + args.iter() + .map(|v| v.to_string()) + .collect::>() + .join(" ") + } +} + +/// A set function used in aggregates (c.f. [`GraphPattern::Group`]) +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum AggregationFunction { + /// [Count](https://www.w3.org/TR/sparql11-query/#defn_aggCount) + Count { + expr: Option>, + distinct: bool, + }, + /// [Sum](https://www.w3.org/TR/sparql11-query/#defn_aggSum) + Sum { + expr: Box, + distinct: bool, + }, + /// [Avg](https://www.w3.org/TR/sparql11-query/#defn_aggAvg) + Avg { + expr: Box, + distinct: bool, + }, + /// [Min](https://www.w3.org/TR/sparql11-query/#defn_aggMin) + Min { + expr: Box, + distinct: bool, + }, + /// [Max](https://www.w3.org/TR/sparql11-query/#defn_aggMax) + Max { + expr: Box, + distinct: bool, + }, + /// [GroupConcat](https://www.w3.org/TR/sparql11-query/#defn_aggGroupConcat) + GroupConcat { + expr: Box, + distinct: bool, + separator: Option, + }, + /// [Sample](https://www.w3.org/TR/sparql11-query/#defn_aggSample) + Sample { + expr: Box, + distinct: bool, + }, + /// Custom function + Custom { + name: NamedNode, + expr: Box, + distinct: bool, + }, +} + +impl fmt::Display for AggregationFunction { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + AggregationFunction::Count { expr, distinct } => { + if *distinct { + if let Some(expr) = expr { + write!(f, "(count distinct {})", expr) + } else { + write!(f, "(count distinct)") + } + } else if let Some(expr) = expr { + write!(f, "(count {})", expr) + } else { + write!(f, "(count)") + } + } + AggregationFunction::Sum { expr, distinct } => { + if *distinct { + write!(f, "(sum distinct {})", expr) + } else { + write!(f, "(sum {})", expr) + } + } + AggregationFunction::Avg { expr, distinct } => { + if *distinct { + write!(f, "(avg distinct {})", expr) + } else { + write!(f, "(avg {})", expr) + } + } + AggregationFunction::Min { expr, distinct } => { + if *distinct { + write!(f, "(min distinct {})", expr) + } else { + write!(f, "(min {})", expr) + } + } + AggregationFunction::Max { expr, distinct } => { + if *distinct { + write!(f, "(max distinct {})", expr) + } else { + write!(f, "(max {})", expr) + } + } + AggregationFunction::Sample { expr, distinct } => { + if *distinct { + write!(f, "(sample distinct {})", expr) + } else { + write!(f, "(sample {})", expr) + } + } + AggregationFunction::GroupConcat { + expr, + distinct, + separator, + } => { + if *distinct { + if let Some(separator) = separator { + write!(f, "(group_concat distinct {} ", expr)?; + print_quoted_str(separator, f)?; + write!(f, ")") + } else { + write!(f, "(group_concat distinct {})", expr) + } + } else if let Some(separator) = separator { + write!(f, "(group_concat {} ", expr)?; + print_quoted_str(separator, f)?; + write!(f, ")") + } else { + write!(f, "(group_concat {})", expr) + } + } + AggregationFunction::Custom { + name, + expr, + distinct, + } => { + if *distinct { + write!(f, "({} distinct {})", name, expr) + } else { + write!(f, "({} {})", name, expr) + } + } + } + } +} + +struct SparqlAggregationFunction<'a>(&'a AggregationFunction); + +impl<'a> fmt::Display for SparqlAggregationFunction<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.0 { + AggregationFunction::Count { expr, distinct } => { + if *distinct { + if let Some(expr) = expr { + write!(f, "COUNT(DISTINCT {})", SparqlExpression(expr)) + } else { + write!(f, "COUNT(DISTINCT *)") + } + } else if let Some(expr) = expr { + write!(f, "COUNT({})", SparqlExpression(expr)) + } else { + write!(f, "COUNT(*)") + } + } + AggregationFunction::Sum { expr, distinct } => { + if *distinct { + write!(f, "SUM(DISTINCT {})", SparqlExpression(expr)) + } else { + write!(f, "SUM({})", SparqlExpression(expr)) + } + } + AggregationFunction::Min { expr, distinct } => { + if *distinct { + write!(f, "MIN(DISTINCT {})", SparqlExpression(expr)) + } else { + write!(f, "MIN({})", SparqlExpression(expr)) + } + } + AggregationFunction::Max { expr, distinct } => { + if *distinct { + write!(f, "MAX(DISTINCT {})", SparqlExpression(expr)) + } else { + write!(f, "MAX({})", SparqlExpression(expr)) + } + } + AggregationFunction::Avg { expr, distinct } => { + if *distinct { + write!(f, "AVG(DISTINCT {})", SparqlExpression(expr)) + } else { + write!(f, "AVG({})", SparqlExpression(expr)) + } + } + AggregationFunction::Sample { expr, distinct } => { + if *distinct { + write!(f, "SAMPLE(DISTINCT {})", SparqlExpression(expr)) + } else { + write!(f, "SAMPLE({})", SparqlExpression(expr)) + } + } + AggregationFunction::GroupConcat { + expr, + distinct, + separator, + } => { + if *distinct { + if let Some(separator) = separator { + write!( + f, + "GROUP_CONCAT(DISTINCT {}; SEPARATOR = ", + SparqlExpression(expr) + )?; + print_quoted_str(separator, f)?; + write!(f, ")") + } else { + write!(f, "GROUP_CONCAT(DISTINCT {})", SparqlExpression(expr)) + } + } else if let Some(separator) = separator { + write!(f, "GROUP_CONCAT({}; SEPARATOR = ", SparqlExpression(expr))?; + print_quoted_str(separator, f)?; + write!(f, ")") + } else { + write!(f, "GROUP_CONCAT({})", SparqlExpression(expr)) + } + } + AggregationFunction::Custom { + name, + expr, + distinct, + } => { + if *distinct { + write!(f, "{}(DISTINCT {})", name, SparqlExpression(expr)) + } else { + write!(f, "{}({})", name, SparqlExpression(expr)) + } + } + } + } +} + +/// An ordering comparator used by [`GraphPattern::OrderBy`] +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum OrderComparator { + /// Ascending order + Asc(Expression), + /// Descending order + Desc(Expression), +} + +impl fmt::Display for OrderComparator { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + OrderComparator::Asc(e) => write!(f, "(asc {})", e), + OrderComparator::Desc(e) => write!(f, "(desc {})", e), + } + } +} + +struct SparqlOrderComparator<'a>(&'a OrderComparator); + +impl<'a> fmt::Display for SparqlOrderComparator<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.0 { + OrderComparator::Asc(e) => write!(f, "ASC({})", SparqlExpression(e)), + OrderComparator::Desc(e) => write!(f, "DESC({})", SparqlExpression(e)), + } + } +} + +/// A SPARQL query [dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset) +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub struct QueryDataset { + pub default: Vec, + pub named: Option>, +} + +impl fmt::Display for QueryDataset { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for g in &self.default { + write!(f, " FROM {}", g)?; + } + if let Some(named) = &self.named { + for g in named { + write!(f, " FROM NAMED {}", g)?; + } + } + Ok(()) + } +} + +/// A target RDF graph for update operations +/// +/// Could be a specific graph, all named graphs or the complete dataset. +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum GraphTarget { + NamedNode(NamedNode), + DefaultGraph, + NamedGraphs, + AllGraphs, +} + +impl fmt::Display for GraphTarget { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::NamedNode(node) => write!(f, "GRAPH {}", node), + Self::DefaultGraph => write!(f, "DEFAULT"), + Self::NamedGraphs => write!(f, "NAMED"), + Self::AllGraphs => write!(f, "ALL"), + } + } +} + +impl From for GraphTarget { + fn from(node: NamedNode) -> Self { + Self::NamedNode(node) + } +} diff --git a/spargebra/src/lib.rs b/spargebra/src/lib.rs new file mode 100644 index 00000000..77c3f693 --- /dev/null +++ b/spargebra/src/lib.rs @@ -0,0 +1,36 @@ +//! This crate provides [SPARQL 1.1](http://www.w3.org/TR/sparql11-overview/) query and update parsers. +//! The emitted tree is based on [SPARQL 1.1 Query Algebra](https://www.w3.org/TR/sparql11-query/#sparqlQuery) objects. +//! +//! The API entry point for SPARQL queries is [`Query`] and the API entry point for SPARQL updates is [`Update`]. +//! +//! This crate is intended to be a building piece for SPARQL implementations in Rust like [Oxigraph](https://oxigraph.org). +//! +//! Usage example: +//! ``` +//! use spargebra::Query; +//! +//! let query_str = "SELECT ?s ?p ?o WHERE { ?s ?p ?o . }"; +//! let mut query = Query::parse(query_str, None)?; +//! assert_eq!(query.to_string(), query_str); +//! # Result::Ok::<_, spargebra::ParseError>(()) +//! ``` +#![deny( + future_incompatible, + nonstandard_style, + rust_2018_idioms, + missing_copy_implementations, + trivial_casts, + trivial_numeric_casts, + unsafe_code, + unused_qualifications +)] + +pub mod algebra; +mod parser; +mod query; +pub mod term; +mod update; + +pub use parser::ParseError; +pub use query::*; +pub use update::*; diff --git a/lib/src/sparql/parser.rs b/spargebra/src/parser.rs similarity index 89% rename from lib/src/sparql/parser.rs rename to spargebra/src/parser.rs index 67d5ad72..4caf5623 100644 --- a/lib/src/sparql/parser.rs +++ b/spargebra/src/parser.rs @@ -1,15 +1,15 @@ -use crate::model::vocab::rdf; -use crate::model::vocab::xsd; -use crate::model::*; -use crate::sparql::algebra::*; -use crate::sparql::model::*; +use crate::algebra::*; +use crate::query::*; +use crate::term::*; +use crate::update::*; +use oxilangtag::LanguageTag; use oxiri::{Iri, IriParseError}; use peg::parser; use peg::str::LineCol; +use rand::random; use std::borrow::Cow; use std::collections::{HashMap, HashSet}; use std::error::Error; -use std::rc::Rc; use std::str::Chars; use std::str::FromStr; use std::{char, fmt}; @@ -156,7 +156,7 @@ fn add_to_triple_or_path_patterns( add_to_triple_or_path_patterns(object, *p, subject, patterns) } PropertyPathExpression::Sequence(a, b) => { - let middle = BlankNode::default(); + let middle = bnode(); add_to_triple_or_path_patterns(subject, *a, middle.clone().into(), patterns); add_to_triple_or_path_patterns(middle.into(), *b, object, patterns); } @@ -182,7 +182,7 @@ fn build_bgp(patterns: Vec) -> GraphPattern { } => paths.push((subject, path, object)), } } - let mut graph_pattern = GraphPattern::BGP(bgp); + let mut graph_pattern = GraphPattern::Bgp(bgp); for (subject, path, object) in paths { graph_pattern = new_join( graph_pattern, @@ -263,12 +263,12 @@ enum PartialGraphPattern { fn new_join(l: GraphPattern, r: GraphPattern) -> GraphPattern { //Avoid to output empty BGPs - if let GraphPattern::BGP(pl) = &l { + if let GraphPattern::Bgp(pl) = &l { if pl.is_empty() { return r; } } - if let GraphPattern::BGP(pr) = &r { + if let GraphPattern::Bgp(pr) = &r { if pr.is_empty() { return l; } @@ -276,9 +276,9 @@ fn new_join(l: GraphPattern, r: GraphPattern) -> GraphPattern { //Merge BGPs match (l, r) { - (GraphPattern::BGP(mut pl), GraphPattern::BGP(pr)) => { + (GraphPattern::Bgp(mut pl), GraphPattern::Bgp(pr)) => { pl.extend(pr); - GraphPattern::BGP(pl) + GraphPattern::Bgp(pl) } ( GraphPattern::Graph { @@ -354,10 +354,17 @@ fn build_select( //GROUP BY let aggregates = state.aggregates.pop().unwrap_or_else(Vec::default); if group.is_none() && !aggregates.is_empty() { - let const_variable = Variable::new_random(); + let const_variable = variable(); group = Some(( vec![const_variable.clone()], - vec![(Literal::from(1).into(), const_variable)], + vec![( + Literal::Typed { + value: "1".into(), + datatype: iri("http://www.w3.org/2001/XMLSchema#integer"), + } + .into(), + const_variable, + )], )); } @@ -448,20 +455,20 @@ fn build_select( } fn copy_graph(from: Option, to: Option) -> GraphUpdateOperation { - let bgp = GraphPattern::BGP(vec![TriplePattern::new( - Variable::new_unchecked("s"), - Variable::new_unchecked("p"), - Variable::new_unchecked("o"), + let bgp = GraphPattern::Bgp(vec![TriplePattern::new( + Variable { name: "s".into() }, + Variable { name: "p".into() }, + Variable { name: "o".into() }, )]); GraphUpdateOperation::DeleteInsert { delete: Vec::new(), insert: vec![QuadPattern::new( - Variable::new_unchecked("s"), - Variable::new_unchecked("p"), - Variable::new_unchecked("o"), + Variable { name: "s".into() }, + Variable { name: "p".into() }, + Variable { name: "o".into() }, to, )], - using: QueryDataset::default(), + using: None, pattern: Box::new(if let Some(from) = from { GraphPattern::Graph { graph_name: from.into(), @@ -502,7 +509,7 @@ impl ParserState { .find_map(|(v, a)| if a == &agg { Some(v) } else { None }) .cloned() .unwrap_or_else(|| { - let new_var = Variable::new_random(); + let new_var = variable(); aggregates.push((new_var.clone(), agg)); new_var })) @@ -713,6 +720,22 @@ pub fn unescape_pn_local(input: &str) -> Cow<'_, str> { unescape_characters(input, &UNESCAPE_PN_CHARACTERS, &UNESCAPE_PN_REPLACEMENT) } +fn iri(value: impl Into) -> NamedNode { + NamedNode { iri: value.into() } +} + +fn bnode() -> BlankNode { + BlankNode { + id: format!("{:x}", random::()), + } +} + +fn variable() -> Variable { + Variable { + name: format!("{:x}", random::()), + } +} + parser! { //See https://www.w3.org/TR/turtle/#sec-grammar grammar parser(state: &mut ParserState) for str { @@ -791,7 +814,7 @@ parser! { dataset: d, pattern: build_select( Selection::default(), - GraphPattern::BGP(c), + GraphPattern::Bgp(c), g, h, o, l, v, state ), base_iri: state.base_iri.clone() @@ -815,7 +838,7 @@ parser! { pattern: build_select(Selection { option: SelectionOption::Default, variables: Some(p.into_iter().map(|var_or_iri| match var_or_iri { - NamedNodeOrVariable::NamedNode(n) => SelectionMember::Expression(n.into(), Variable::new_random()), + NamedNodeOrVariable::NamedNode(n) => SelectionMember::Expression(n.into(), variable()), NamedNodeOrVariable::Variable(v) => SelectionMember::Variable(v) }).collect()) }, w.unwrap_or_else(GraphPattern::default), g, h, o, l, v, state), @@ -828,40 +851,40 @@ parser! { rule AskQuery() -> Query = i("ASK") _ d:DatasetClauses() w:WhereClause() _ g:GroupClause()? _ h:HavingClause()? _ o:OrderClause()? _ l:LimitOffsetClauses()? _ v:ValuesClause() { Query::Ask { dataset: d, - pattern: Rc::new(build_select(Selection::default(), w, g, h, o, l, v, state)), + pattern: build_select(Selection::default(), w, g, h, o, l, v, state), base_iri: state.base_iri.clone() } } //[13] - rule DatasetClause() -> (Option, Option) = i("FROM") _ d:(DefaultGraphClause() / NamedGraphClause()) { d } - rule DatasetClauses() -> QueryDataset = d:DatasetClause() ** (_) { - let mut dataset = QueryDataset::default(); - if !d.is_empty() { - let mut default = Vec::new(); - let mut named = Vec::new(); - for (d, n) in d { - if let Some(d) = d { - default.push(d); - } - if let Some(n) = n { - named.push(n); - } + rule DatasetClause() -> (Option, Option) = i("FROM") _ d:(DefaultGraphClause() / NamedGraphClause()) { d } + rule DatasetClauses() -> Option = d:DatasetClause() ** (_) { + if d.is_empty() { + return None; + } + let mut default = Vec::new(); + let mut named = Vec::new(); + for (d, n) in d { + if let Some(d) = d { + default.push(d); + } + if let Some(n) = n { + named.push(n); } - dataset.set_default_graph(default); - dataset.set_available_named_graphs(named); } - dataset + Some(QueryDataset { + default, named: Some(named) + }) } //[14] - rule DefaultGraphClause() -> (Option, Option) = s:SourceSelector() { - (Some(s.into()), None) + rule DefaultGraphClause() -> (Option, Option) = s:SourceSelector() { + (Some(s), None) } //[15] - rule NamedGraphClause() -> (Option, Option) = i("NAMED") _ s:SourceSelector() { - (None, Some(s.into())) + rule NamedGraphClause() -> (Option, Option) = i("NAMED") _ s:SourceSelector() { + (None, Some(s)) } //[16] @@ -879,7 +902,7 @@ parser! { if let Expression::Variable(v) = e { v } else { - let v = vo.unwrap_or_else(Variable::new_random); + let v = vo.unwrap_or_else(variable); projections.push((e, v.clone())); v } @@ -970,7 +993,7 @@ parser! { if from == to { Vec::new() // identity case } else { - let bgp = GraphPattern::BGP(vec![TriplePattern::new(Variable::new_unchecked("s"), Variable::new_unchecked("p"), Variable::new_unchecked("o"))]); + let bgp = GraphPattern::Bgp(vec![TriplePattern::new(Variable { name: "s".into() }, Variable { name: "p".into() }, Variable { name: "o".into() })]); vec![copy_graph(from, to.map(NamedNodeOrVariable::NamedNode))] } } @@ -981,7 +1004,7 @@ parser! { if from == to { Vec::new() // identity case } else { - let bgp = GraphPattern::BGP(vec![TriplePattern::new(Variable::new_unchecked("s"), Variable::new_unchecked("p"), Variable::new_unchecked("o"))]); + let bgp = GraphPattern::Bgp(vec![TriplePattern::new(Variable { name: "s".into() }, Variable { name: "p".into() }, Variable { name: "o".into() })]); vec![GraphUpdateOperation::Drop { silent: true, graph: to.clone().map_or(GraphTarget::DefaultGraph, GraphTarget::NamedNode) }, copy_graph(from.clone(), to.map(NamedNodeOrVariable::NamedNode)), GraphUpdateOperation::Drop { silent, graph: from.map_or(GraphTarget::DefaultGraph, GraphTarget::NamedNode) }] } } @@ -992,7 +1015,7 @@ parser! { if from == to { Vec::new() // identity case } else { - let bgp = GraphPattern::BGP(vec![TriplePattern::new(Variable::new_unchecked("s"), Variable::new_unchecked("p"), Variable::new_unchecked("o"))]); + let bgp = GraphPattern::Bgp(vec![TriplePattern::new(Variable { name: "s".into() }, Variable { name: "p".into() }, Variable{ name: "o".into() })]); vec![GraphUpdateOperation::Drop { silent: true, graph: to.clone().map_or(GraphTarget::DefaultGraph, GraphTarget::NamedNode) }, copy_graph(from, to.map(NamedNodeOrVariable::NamedNode))] } } @@ -1004,7 +1027,7 @@ parser! { //[39] rule DeleteData() -> Vec = i("DELETE") _ i("DATA") _ data:QuadData() {? - if data.iter().any(|quad| quad.subject.is_blank_node() || quad.object.is_blank_node() || quad.graph_name.is_blank_node()) { + if data.iter().any(|quad| matches!(quad.subject, NamedOrBlankNode::BlankNode(_)) || matches!(quad.object, Term::BlankNode(_))) { Err("Blank nodes are not allowed in DELETE DATA") } else { Ok(vec![GraphUpdateOperation::DeleteData { data }]) @@ -1017,17 +1040,17 @@ parser! { Err("Blank nodes are not allowed in DELETE WHERE") } else { let pattern = d.iter().map(|q| { - let bgp = GraphPattern::BGP(vec![TriplePattern::new(q.subject.clone(), q.predicate.clone(), q.object.clone())]); + let bgp = GraphPattern::Bgp(vec![TriplePattern::new(q.subject.clone(), q.predicate.clone(), q.object.clone())]); if let Some(graph_name) = &q.graph_name { GraphPattern::Graph { graph_name: graph_name.clone(), inner: Box::new(bgp) } } else { bgp } - }).fold(GraphPattern::BGP(Vec::new()), new_join); + }).fold(GraphPattern::Bgp(Vec::new()), new_join); Ok(vec![GraphUpdateOperation::DeleteInsert { delete: d, insert: Vec::new(), - using: QueryDataset::default(), + using: None, pattern: Box::new(pattern) }]) } @@ -1040,21 +1063,21 @@ parser! { let mut insert = insert.unwrap_or_else(Vec::new); let mut pattern = pattern; - let mut using = QueryDataset::default(); - if !u.is_empty() { - let mut using_default = Vec::new(); - let mut using_named = Vec::new(); + let mut using = if u.is_empty() { + None + } else { + let mut default = Vec::new(); + let mut named = Vec::new(); for (d, n) in u { if let Some(d) = d { - using_default.push(d) + default.push(d) } if let Some(n) = n { - using_named.push(n) + named.push(n) } } - using.set_default_graph(using_default); - using.set_available_named_graphs(using_named); - } + Some(QueryDataset { default, named: Some(named) }) + }; if let Some(with) = with { // We inject WITH everywhere @@ -1068,8 +1091,8 @@ parser! { } else { q }).collect(); - if using.is_default_dataset() { - using.set_default_graph(vec![with.into()]); + if using.is_none() { + using = Some(QueryDataset { default: vec![with], named: None }); } } @@ -1104,12 +1127,12 @@ parser! { rule InsertClause() -> Vec = i("INSERT") _ q:QuadPattern() { q } //[44] - rule UsingClause() -> (Option, Option) = i("USING") _ d:(UsingClause_default() / UsingClause_named()) { d } - rule UsingClause_default() -> (Option, Option) = i:iri() { - (Some(i.into()), None) + rule UsingClause() -> (Option, Option) = i("USING") _ d:(UsingClause_default() / UsingClause_named()) { d } + rule UsingClause_default() -> (Option, Option) = i:iri() { + (Some(i), None) } - rule UsingClause_named() -> (Option, Option) = i("NAMED") _ i:iri() { - (None, Some(i.into())) + rule UsingClause_named() -> (Option, Option) = i("NAMED") _ i:iri() { + (None, Some(i)) } //[45] @@ -1282,21 +1305,21 @@ parser! { } //[63] - rule InlineDataOneVar() -> (Vec, Vec>>) = var:Var() _ "{" _ d:InlineDataOneVar_value()* "}" { + rule InlineDataOneVar() -> (Vec, Vec>>) = var:Var() _ "{" _ d:InlineDataOneVar_value()* "}" { (vec![var], d) } - rule InlineDataOneVar_value() -> Vec> = t:DataBlockValue() _ { vec![t] } + rule InlineDataOneVar_value() -> Vec> = t:DataBlockValue() _ { vec![t] } //[64] - rule InlineDataFull() -> (Vec, Vec>>) = "(" _ vars:InlineDataFull_var()* _ ")" _ "{" _ val:InlineDataFull_values()* "}" { + rule InlineDataFull() -> (Vec, Vec>>) = "(" _ vars:InlineDataFull_var()* _ ")" _ "{" _ val:InlineDataFull_values()* "}" { (vars, val) } rule InlineDataFull_var() -> Variable = v:Var() _ { v } - rule InlineDataFull_values() -> Vec> = "(" _ v:InlineDataFull_value()* _ ")" _ { v } - rule InlineDataFull_value() -> Option = v:DataBlockValue() _ { v } + rule InlineDataFull_values() -> Vec> = "(" _ v:InlineDataFull_value()* _ ")" _ { v } + rule InlineDataFull_value() -> Option = v:DataBlockValue() _ { v } //[65] - rule DataBlockValue() -> Option = + rule DataBlockValue() -> Option = i:iri() { Some(i.into()) } / l:RDFLiteral() { Some(l.into()) } / l:NumericLiteral() { Some(l.into()) } / @@ -1331,7 +1354,7 @@ parser! { //[71] rule ArgList() -> Vec = - "(" _ i("DISTINCT")? _ e:ArgList_item() **<1,> ("," _) _ ")" { e } / + "(" _ e:ArgList_item() **<1,> ("," _) _ ")" { e } / NIL() { Vec::new() } rule ArgList_item() -> Expression = e:Expression() _ { e } @@ -1393,7 +1416,7 @@ parser! { } //[78] - rule Verb() -> NamedNodeOrVariable = VarOrIri() / "a" { rdf::TYPE.into_owned().into() } + rule Verb() -> NamedNodeOrVariable = VarOrIri() / "a" { iri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type").into() } //[79] rule ObjectList() -> FocusedTriplePattern> = o:ObjectList_item() **<1,> ("," _) { @@ -1513,7 +1536,7 @@ parser! { //[94] rule PathPrimary() -> PropertyPathExpression = v:iri() { v.into() } / - "a" { rdf::TYPE.into_owned().into() } / + "a" { iri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type").into() } / "!" _ p:PathNegatedPropertySet() { p } / "(" _ p:Path() _ ")" { p } @@ -1550,9 +1573,9 @@ parser! { //[96] rule PathOneInPropertySet() -> Either = "^" _ v:iri() { Either::Right(v) } / - "^" _ "a" { Either::Right(rdf::TYPE.into_owned()) } / + "^" _ "a" { Either::Right(iri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")) } / v:iri() { Either::Left(v) } / - "a" { Either::Left(rdf::TYPE.into_owned()) } + "a" { Either::Left(iri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")) } //[98] rule TriplesNode() -> FocusedTriplePattern = Collection() / BlankNodePropertyList() @@ -1560,7 +1583,7 @@ parser! { //[99] rule BlankNodePropertyList() -> FocusedTriplePattern = "[" _ po:PropertyListNotEmpty() _ "]" { let mut patterns: Vec = Vec::default(); - let mut bnode = TermOrVariable::from(BlankNode::default()); + let mut bnode = TermOrVariable::from(bnode()); for (p, os) in po.focus { for o in os { patterns.push(TriplePattern::new(bnode.clone(), p.clone(), o)); @@ -1578,7 +1601,7 @@ parser! { //[101] rule BlankNodePropertyListPath() -> FocusedTripleOrPathPattern = "[" _ po:PropertyListPathNotEmpty() _ "]" { let mut patterns: Vec = Vec::default(); - let mut bnode = TermOrVariable::from(BlankNode::default()); + let mut bnode = TermOrVariable::from(bnode()); for (p, os) in po.focus { for o in os { add_to_triple_or_path_patterns(bnode.clone(), p.clone(), o, &mut patterns); @@ -1593,11 +1616,11 @@ parser! { //[102] rule Collection() -> FocusedTriplePattern = "(" _ o:Collection_item()+ ")" { let mut patterns: Vec = Vec::default(); - let mut current_list_node = TermOrVariable::from(rdf::NIL.into_owned()); + let mut current_list_node = TermOrVariable::from(iri("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil")); for objWithPatterns in o.into_iter().rev() { - let new_blank_node = TermOrVariable::from(BlankNode::default()); - patterns.push(TriplePattern::new(new_blank_node.clone(), rdf::FIRST.into_owned(), objWithPatterns.focus.clone())); - patterns.push(TriplePattern::new(new_blank_node.clone(), rdf::REST.into_owned(), current_list_node)); + let new_blank_node = TermOrVariable::from(bnode()); + patterns.push(TriplePattern::new(new_blank_node.clone(), iri("http://www.w3.org/1999/02/22-rdf-syntax-ns#first"), objWithPatterns.focus.clone())); + patterns.push(TriplePattern::new(new_blank_node.clone(), iri("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"), current_list_node)); current_list_node = new_blank_node; patterns.extend_from_slice(&objWithPatterns.patterns); } @@ -1611,11 +1634,11 @@ parser! { //[103] rule CollectionPath() -> FocusedTripleOrPathPattern = "(" _ o:CollectionPath_item()+ _ ")" { let mut patterns: Vec = Vec::default(); - let mut current_list_node = TermOrVariable::from(rdf::NIL.into_owned()); + let mut current_list_node = TermOrVariable::from(iri("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil")); for objWithPatterns in o.into_iter().rev() { - let new_blank_node = TermOrVariable::from(BlankNode::default()); - patterns.push(TriplePattern::new(new_blank_node.clone(), rdf::FIRST.into_owned(), objWithPatterns.focus.clone()).into()); - patterns.push(TriplePattern::new(new_blank_node.clone(), rdf::REST.into_owned(), current_list_node).into()); + let new_blank_node = TermOrVariable::from(bnode()); + patterns.push(TriplePattern::new(new_blank_node.clone(), iri("http://www.w3.org/1999/02/22-rdf-syntax-ns#first"), objWithPatterns.focus.clone()).into()); + patterns.push(TriplePattern::new(new_blank_node.clone(), iri("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"), current_list_node).into()); current_list_node = new_blank_node; patterns.extend(objWithPatterns.patterns); } @@ -1647,7 +1670,7 @@ parser! { i:iri() { i.into() } //[108] - rule Var() -> Variable = v:(VAR1() / VAR2()) { Variable::new_unchecked(v) } + rule Var() -> Variable = name:(VAR1() / VAR2()) { Variable { name: name.into() } } //[109] rule GraphTerm() -> Term = @@ -1656,7 +1679,7 @@ parser! { l:NumericLiteral() { l.into() } / l:BooleanLiteral() { l.into() } / b:BlankNode() { b.into() } / - NIL() { rdf::NIL.into() } + NIL() { iri("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil").into() } //[110] rule Expression() -> Expression = e:ConditionalOrExpression() {e} @@ -1749,7 +1772,7 @@ parser! { i("LANGMATCHES") _ "(" _ a:Expression() _ "," _ b:Expression() _ ")" { Expression::FunctionCall(Function::LangMatches, vec![a, b]) } / i("DATATYPE") _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Datatype, vec![e]) } / i("BOUND") _ "(" _ v:Var() _ ")" { Expression::Bound(v) } / - (i("IRI") / i("URI")) _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::IRI, vec![e]) } / + (i("IRI") / i("URI")) _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Iri, vec![e]) } / i("BNODE") "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::BNode, vec![e]) } / i("BNODE") NIL() { Expression::FunctionCall(Function::BNode, vec![]) } / i("RAND") _ NIL() { Expression::FunctionCall(Function::Rand, vec![]) } / @@ -1763,7 +1786,7 @@ parser! { StrReplaceExpression() / i("UCASE") _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::UCase, vec![e]) } / i("LCASE") _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::LCase, vec![e]) } / - i("ENCODE_FOR_URI") "(" _ e: Expression() _ ")" { Expression::FunctionCall(Function::EncodeForURI, vec![e]) } / + i("ENCODE_FOR_URI") "(" _ e: Expression() _ ")" { Expression::FunctionCall(Function::EncodeForUri, vec![e]) } / i("CONTAINS") _ "(" _ a:Expression() _ "," _ b:Expression() _ ")" { Expression::FunctionCall(Function::Contains, vec![a, b]) } / i("STRSTARTS") _ "(" _ a:Expression() _ "," _ b:Expression() _ ")" { Expression::FunctionCall(Function::StrStarts, vec![a, b]) } / i("STRENDS") _ "(" _ a:Expression() _ "," _ b:Expression() _ ")" { Expression::FunctionCall(Function::StrEnds, vec![a, b]) } / @@ -1778,19 +1801,19 @@ parser! { i("TIMEZONE") _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Timezone, vec![e]) } / i("TZ") _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Tz, vec![e]) } / i("NOW") _ NIL() { Expression::FunctionCall(Function::Now, vec![]) } / - i("UUID") _ NIL() { Expression::FunctionCall(Function::UUID, vec![]) }/ - i("STRUUID") _ NIL() { Expression::FunctionCall(Function::StrUUID, vec![]) } / - i("MD5") "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::MD5, vec![e]) } / - i("SHA1") "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::SHA1, vec![e]) } / - i("SHA256") "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::SHA256, vec![e]) } / - i("SHA384") "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::SHA384, vec![e]) } / - i("SHA512") "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::SHA512, vec![e]) } / + i("UUID") _ NIL() { Expression::FunctionCall(Function::Uuid, vec![]) }/ + i("STRUUID") _ NIL() { Expression::FunctionCall(Function::StrUuid, vec![]) } / + i("MD5") "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Md5, vec![e]) } / + i("SHA1") "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Sha1, vec![e]) } / + i("SHA256") "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Sha256, vec![e]) } / + i("SHA384") "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Sha384, vec![e]) } / + i("SHA512") "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Sha512, vec![e]) } / i("COALESCE") e:ExpressionList() { Expression::Coalesce(e) } / i("IF") _ "(" _ a:Expression() _ "," _ b:Expression() _ "," _ c:Expression() _ ")" { Expression::If(Box::new(a), Box::new(b), Box::new(c)) } / i("STRLANG") _ "(" _ a:Expression() _ "," _ b:Expression() _ ")" { Expression::FunctionCall(Function::StrLang, vec![a, b]) } / - i("STRDT") _ "(" _ a:Expression() _ "," _ b:Expression() _ ")" { Expression::FunctionCall(Function::StrDT, vec![a, b]) } / + i("STRDT") _ "(" _ a:Expression() _ "," _ b:Expression() _ ")" { Expression::FunctionCall(Function::StrDt, vec![a, b]) } / i("sameTerm") "(" _ a:Expression() _ "," _ b:Expression() _ ")" { Expression::SameTerm(Box::new(a), Box::new(b)) } / - (i("isIRI") / i("isURI")) _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::IsIRI, vec![e]) } / + (i("isIRI") / i("isURI")) _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::IsIri, vec![e]) } / i("isBLANK") "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::IsBlank, vec![e]) } / i("isLITERAL") "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::IsLiteral, vec![e]) } / i("isNUMERIC") "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::IsNumeric, vec![e]) } / @@ -1853,52 +1876,43 @@ parser! { //[129] rule RDFLiteral() -> Literal = - v:String() _ "^^" _ t:iri() { Literal::new_typed_literal(v, t) } / - v:String() _ l:LANGTAG() {? Literal::new_language_tagged_literal(v, l).map_err(|_| "language tag parsing failed") } / - v:String() { Literal::new_simple_literal(v) } + value:String() _ "^^" _ datatype:iri() { Literal::Typed { value, datatype } } / + value:String() _ language:LANGTAG() { Literal::LanguageTaggedString { value, language: language.into_inner() } } / + value:String() { Literal::Simple { value } } //[130] rule NumericLiteral() -> Literal = NumericLiteralUnsigned() / NumericLiteralPositive() / NumericLiteralNegative() //[131] rule NumericLiteralUnsigned() -> Literal = - d:$(DOUBLE()) {? match f64::from_str(d) { - Ok(value) => Ok(value.into()), - Err(_) => Err("Invalid xsd:double()") - } } / - d:$(DECIMAL()) { Literal::new_typed_literal(d, xsd::DECIMAL) } / - i:$(INTEGER()) { Literal::new_typed_literal(i, xsd::INTEGER) } + d:$(DOUBLE()) { Literal::Typed { value: d.into(), datatype: iri("http://www.w3.org/2001/XMLSchema#double") } } / + d:$(DECIMAL()) { Literal::Typed { value: d.into(), datatype: iri("http://www.w3.org/2001/XMLSchema#decimal") } } / + i:$(INTEGER()) { Literal::Typed { value: i.into(), datatype: iri("http://www.w3.org/2001/XMLSchema#integer") } } //[132] rule NumericLiteralPositive() -> Literal = - d:$(DOUBLE_POSITIVE()) {? match f64::from_str(d) { - Ok(value) => Ok(value.into()), - Err(_) => Err("Invalid xsd:double()") - } } / - d:$(DECIMAL_POSITIVE()) { Literal::new_typed_literal(d, xsd::DECIMAL) } / - i:$(INTEGER_POSITIVE()) { Literal::new_typed_literal(i, xsd::INTEGER) } + d:$(DOUBLE_POSITIVE()) { Literal::Typed { value: d.into(), datatype: iri("http://www.w3.org/2001/XMLSchema#double") } } / + d:$(DECIMAL_POSITIVE()) { Literal::Typed { value: d.into(), datatype: iri("http://www.w3.org/2001/XMLSchema#decimal") } } / + i:$(INTEGER_POSITIVE()) { Literal::Typed { value: i.into(), datatype: iri("http://www.w3.org/2001/XMLSchema#integer") } } //[133] rule NumericLiteralNegative() -> Literal = - d:$(DOUBLE_NEGATIVE()) {? match f64::from_str(d) { - Ok(value) => Ok(value.into()), - Err(_) => Err("Invalid xsd:double()") - } } / - d:$(DECIMAL_NEGATIVE()) { Literal::new_typed_literal(d, xsd::DECIMAL) } / - i:$(INTEGER_NEGATIVE()) { Literal::new_typed_literal(i, xsd::INTEGER) } + d:$(DOUBLE_NEGATIVE()) { Literal::Typed { value: d.into(), datatype: iri("http://www.w3.org/2001/XMLSchema#double") } } / + d:$(DECIMAL_NEGATIVE()) { Literal::Typed { value: d.into(), datatype: iri("http://www.w3.org/2001/XMLSchema#decimal") } } / + i:$(INTEGER_NEGATIVE()) { Literal::Typed { value: i.into(), datatype: iri("http://www.w3.org/2001/XMLSchema#integer") } } //[134] rule BooleanLiteral() -> Literal = - "true" { true.into() } / - "false" { false.into() } + "true" { Literal::Typed { value: "true".into(), datatype: iri("http://www.w3.org/2001/XMLSchema#boolean") } } / + "false" { Literal::Typed { value: "false".into(), datatype: iri("http://www.w3.org/2001/XMLSchema#boolean") } } //[135] rule String() -> String = STRING_LITERAL_LONG1() / STRING_LITERAL_LONG2() / STRING_LITERAL1() / STRING_LITERAL2() //[136] rule iri() -> NamedNode = i:(IRIREF() / PrefixedName()) { - NamedNode::new_from_iri(i) + iri(i.into_inner()) } //[137] @@ -1910,19 +1924,15 @@ parser! { } } //[138] - rule BlankNode() -> BlankNode = - b:BLANK_NODE_LABEL() {? - match BlankNode::new(b) { - Ok(node) => if state.used_bnodes.contains(&node) { - Err("Already used blank node id") - } else { - state.currently_used_bnodes.insert(node.clone()); - Ok(node) - }, - Err(_) => Err("Invalid blank node identifier") - } - } / - ANON() { BlankNode::default() } + rule BlankNode() -> BlankNode = id:BLANK_NODE_LABEL() {? + let node = BlankNode { id: id.to_owned() }; + if state.used_bnodes.contains(&node) { + Err("Already used blank node id") + } else { + state.currently_used_bnodes.insert(node.clone()); + Ok(node) + } + } / ANON() { bnode() } //[139] rule IRIREF() -> Iri = "<" i:$((!['>'] [_])*) ">" {? @@ -1957,8 +1967,8 @@ parser! { rule VAR2() -> &'input str = "$" v:$(VARNAME()) { v } //[145] - rule LANGTAG() -> String = "@" l:$(['a' ..= 'z' | 'A' ..= 'Z']+ ("-" ['a' ..= 'z' | 'A' ..= 'Z' | '0' ..= '9']+)*) { - l.to_ascii_lowercase() + rule LANGTAG() -> LanguageTag = "@" l:$(['a' ..= 'z' | 'A' ..= 'Z']+ ("-" ['a' ..= 'z' | 'A' ..= 'Z' | '0' ..= '9']+)*) {? + LanguageTag::parse(l.to_ascii_lowercase()).map_err(|_| "language tag parsing failed") } //[146] diff --git a/spargebra/src/query.rs b/spargebra/src/query.rs new file mode 100644 index 00000000..16106adf --- /dev/null +++ b/spargebra/src/query.rs @@ -0,0 +1,181 @@ +use crate::algebra::*; +use crate::parser::{parse_query, ParseError}; +use oxiri::Iri; +use std::convert::TryFrom; +use std::fmt; +use std::str::FromStr; + +/// A parsed [SPARQL query](https://www.w3.org/TR/sparql11-query/) +/// +/// ``` +/// use spargebra::Query; +/// +/// let query_str = "SELECT ?s ?p ?o WHERE { ?s ?p ?o . }"; +/// let mut query = Query::parse(query_str, None)?; +/// assert_eq!(query.to_string(), query_str); +/// # Result::Ok::<_, spargebra::ParseError>(()) +/// ``` +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum Query { + /// [SELECT](https://www.w3.org/TR/sparql11-query/#select) + Select { + /// The [query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset) + dataset: Option, + /// The query selection graph pattern + pattern: GraphPattern, + /// The query base IRI + base_iri: Option>, + }, + /// [CONSTRUCT](https://www.w3.org/TR/sparql11-query/#construct) + Construct { + /// The query construction template + template: Vec, + /// The [query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset) + dataset: Option, + /// The query selection graph pattern + pattern: GraphPattern, + /// The query base IRI + base_iri: Option>, + }, + /// [DESCRIBE](https://www.w3.org/TR/sparql11-query/#describe) + Describe { + /// The [query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset) + dataset: Option, + /// The query selection graph pattern + pattern: GraphPattern, + /// The query base IRI + base_iri: Option>, + }, + /// [ASK](https://www.w3.org/TR/sparql11-query/#ask) + Ask { + /// The [query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset) + dataset: Option, + /// The query selection graph pattern + pattern: GraphPattern, + /// The query base IRI + base_iri: Option>, + }, +} + +impl Query { + /// Parses a SPARQL query with an optional base IRI to resolve relative IRIs in the query + pub fn parse(query: &str, base_iri: Option<&str>) -> Result { + parse_query(query, base_iri) + } +} + +impl fmt::Display for Query { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Query::Select { + dataset, + pattern, + base_iri, + } => { + if let Some(base_iri) = base_iri { + writeln!(f, "BASE <{}>", base_iri)?; + } + write!( + f, + "{}", + SparqlGraphRootPattern { + pattern, + dataset: dataset.as_ref() + } + ) + } + Query::Construct { + template, + dataset, + pattern, + base_iri, + } => { + if let Some(base_iri) = base_iri { + writeln!(f, "BASE <{}>", base_iri)?; + } + write!(f, "CONSTRUCT {{ ")?; + for triple in template.iter() { + write!(f, "{} ", SparqlTriplePattern(triple))?; + } + write!(f, "}}")?; + if let Some(dataset) = dataset { + dataset.fmt(f)?; + } + write!( + f, + " WHERE {{ {} }}", + SparqlGraphRootPattern { + pattern, + dataset: None + } + ) + } + Query::Describe { + dataset, + pattern, + base_iri, + } => { + if let Some(base_iri) = base_iri { + writeln!(f, "BASE <{}>", base_iri.as_str())?; + } + write!(f, "DESCRIBE *")?; + if let Some(dataset) = dataset { + dataset.fmt(f)?; + } + write!( + f, + " WHERE {{ {} }}", + SparqlGraphRootPattern { + pattern, + dataset: None + } + ) + } + Query::Ask { + dataset, + pattern, + base_iri, + } => { + if let Some(base_iri) = base_iri { + writeln!(f, "BASE <{}>", base_iri)?; + } + write!(f, "ASK")?; + if let Some(dataset) = dataset { + dataset.fmt(f)?; + } + write!( + f, + " WHERE {{ {} }}", + SparqlGraphRootPattern { + pattern, + dataset: None + } + ) + } + } + } +} + +impl FromStr for Query { + type Err = ParseError; + + fn from_str(query: &str) -> Result { + Self::parse(query, None) + } +} + +impl<'a> TryFrom<&'a str> for Query { + type Error = ParseError; + + fn try_from(query: &str) -> Result { + Self::from_str(query) + } +} + +impl<'a> TryFrom<&'a String> for Query { + type Error = ParseError; + + fn try_from(query: &String) -> Result { + Self::from_str(query) + } +} diff --git a/spargebra/src/term.rs b/spargebra/src/term.rs new file mode 100644 index 00000000..5e55d150 --- /dev/null +++ b/spargebra/src/term.rs @@ -0,0 +1,426 @@ +//! Data structures for [RDF 1.1 Concepts](https://www.w3.org/TR/rdf11-concepts/) like IRI, literal or triples. + +use std::fmt; +use std::fmt::Write; + +/// An RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri). +/// +/// The default string formatter is returning an N-Triples, Turtle and SPARQL compatible representation. +/// +/// ``` +/// use spargebra::term::NamedNode; +/// +/// assert_eq!( +/// "", +/// NamedNode { iri: "http://example.com/foo".into() }.to_string() +/// ) +/// ``` +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] +pub struct NamedNode { + /// The [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) itself. + pub iri: String, +} + +impl fmt::Display for NamedNode { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "<{}>", self.iri) + } +} + +/// An RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node). +/// +/// +/// The default string formatter is returning an N-Triples, Turtle and SPARQL compatible representation. +/// +/// ``` +/// use spargebra::term::BlankNode; +/// +/// assert_eq!( +/// "_:a1", +/// BlankNode { id: "a1".into() }.to_string() +/// ) +/// ``` +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub struct BlankNode { + /// The [blank node identifier](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node-identifier). + pub id: String, +} + +impl fmt::Display for BlankNode { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "_:{}", self.id) + } +} + +/// An RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal). +/// +/// The default string formatter is returning an N-Triples, Turtle and SPARQL compatible representation. +/// +/// The language tags should be lowercased [as suggested by the RDF specification](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string). +/// +/// ``` +/// use spargebra::term::NamedNode; +/// use spargebra::term::Literal; +/// +/// assert_eq!( +/// "\"foo\\nbar\"", +/// Literal::Simple { value: "foo\nbar".into() }.to_string() +/// ); +/// +/// assert_eq!( +/// "\"1999-01-01\"^^", +/// Literal::Typed { value: "1999-01-01".into(), datatype: NamedNode { iri: "http://www.w3.org/2001/XMLSchema#date".into() }}.to_string() +/// ); +/// +/// assert_eq!( +/// "\"foo\"@en", +/// Literal::LanguageTaggedString { value: "foo".into(), language: "en".into() }.to_string() +/// ); +/// ``` +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum Literal { + /// A [simple literal](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal) without datatype or language form. + Simple { + /// The [lexical form](https://www.w3.org/TR/rdf11-concepts/#dfn-lexical-form). + value: String, + }, + /// A [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) + LanguageTaggedString { + /// The [lexical form](https://www.w3.org/TR/rdf11-concepts/#dfn-lexical-form). + value: String, + /// The [language tag](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tag). + language: String, + }, + /// A literal with an explicit datatype + Typed { + /// The [lexical form](https://www.w3.org/TR/rdf11-concepts/#dfn-lexical-form). + value: String, + /// The [datatype IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri). + datatype: NamedNode, + }, +} + +impl fmt::Display for Literal { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Literal::Simple { value } => print_quoted_str(value, f), + Literal::LanguageTaggedString { value, language } => { + print_quoted_str(value, f)?; + write!(f, "@{}", language) + } + Literal::Typed { value, datatype } => { + print_quoted_str(value, f)?; + write!(f, "^^{}", datatype) + } + } + } +} + +/// A [SPARQL query variable](https://www.w3.org/TR/sparql11-query/#sparqlQueryVariables). +/// +/// ``` +/// use spargebra::term::Variable; +/// +/// assert_eq!( +/// "?foo", +/// Variable { name: "foo".into() }.to_string() +/// ); +/// ``` +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] +pub struct Variable { + pub name: String, +} + +impl fmt::Display for Variable { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "?{}", self.name) + } +} + +/// The union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) and [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node). +/// +/// The default string formatter is returning an N-Triples, Turtle and SPARQL compatible representation. +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum NamedOrBlankNode { + NamedNode(NamedNode), + BlankNode(BlankNode), +} + +impl fmt::Display for NamedOrBlankNode { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + NamedOrBlankNode::NamedNode(node) => node.fmt(f), + NamedOrBlankNode::BlankNode(node) => node.fmt(f), + } + } +} + +impl From for NamedOrBlankNode { + #[inline] + fn from(node: NamedNode) -> Self { + NamedOrBlankNode::NamedNode(node) + } +} + +impl From for NamedOrBlankNode { + #[inline] + fn from(node: BlankNode) -> Self { + NamedOrBlankNode::BlankNode(node) + } +} + +/// The union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) and [literals](https://www.w3.org/TR/rdf11-concepts/#dfn-literal). +/// +/// The default string formatter is returning an N-Triples, Turtle and SPARQL compatible representation. +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum NamedNodeOrLiteral { + NamedNode(NamedNode), + Literal(Literal), +} + +impl fmt::Display for NamedNodeOrLiteral { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + NamedNodeOrLiteral::NamedNode(node) => node.fmt(f), + NamedNodeOrLiteral::Literal(literal) => literal.fmt(f), + } + } +} + +impl From for NamedNodeOrLiteral { + #[inline] + fn from(node: NamedNode) -> Self { + NamedNodeOrLiteral::NamedNode(node) + } +} + +impl From for NamedNodeOrLiteral { + #[inline] + fn from(literal: Literal) -> Self { + NamedNodeOrLiteral::Literal(literal) + } +} + +/// An RDF [term](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-term). +/// +/// It is the union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) and [literals](https://www.w3.org/TR/rdf11-concepts/#dfn-literal). +/// +/// The default string formatter is returning an N-Triples, Turtle and SPARQL compatible representation. +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum Term { + NamedNode(NamedNode), + BlankNode(BlankNode), + Literal(Literal), +} + +impl fmt::Display for Term { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Term::NamedNode(node) => node.fmt(f), + Term::BlankNode(node) => node.fmt(f), + Term::Literal(literal) => literal.fmt(f), + } + } +} + +impl From for Term { + #[inline] + fn from(node: NamedNode) -> Self { + Term::NamedNode(node) + } +} + +impl From for Term { + #[inline] + fn from(node: BlankNode) -> Self { + Term::BlankNode(node) + } +} + +impl From for Term { + #[inline] + fn from(literal: Literal) -> Self { + Term::Literal(literal) + } +} + +impl From for Term { + #[inline] + fn from(resource: NamedOrBlankNode) -> Self { + match resource { + NamedOrBlankNode::NamedNode(node) => Term::NamedNode(node), + NamedOrBlankNode::BlankNode(node) => Term::BlankNode(node), + } + } +} + +/// A possible graph name. +/// It is the union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node), and the [default graph name](https://www.w3.org/TR/rdf11-concepts/#dfn-default-graph). +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum GraphName { + NamedNode(NamedNode), + DefaultGraph, +} + +impl fmt::Display for GraphName { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + GraphName::NamedNode(node) => node.fmt(f), + GraphName::DefaultGraph => write!(f, "DEFAULT"), + } + } +} + +impl From for GraphName { + #[inline] + fn from(node: NamedNode) -> Self { + GraphName::NamedNode(node) + } +} + +/// A [RDF triple](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) in a [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset). +/// +/// The default string formatter is returning a N-Quads representation. +/// +/// ``` +/// use spargebra::term::NamedNode; +/// use spargebra::term::Quad; +/// +/// assert_eq!( +/// " .", +/// Quad { +/// subject: NamedNode { iri: "http://example.com/foo".into() }.into(), +/// predicate: NamedNode { iri: "http://schema.org/sameAs".into() }, +/// object: NamedNode { iri: "http://example.com/foo".into() }.into(), +/// graph_name: NamedNode { iri: "http://example.com/".into() }.into(), +/// }.to_string() +/// ) +/// ``` +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub struct Quad { + pub subject: NamedOrBlankNode, + pub predicate: NamedNode, + pub object: Term, + pub graph_name: GraphName, +} + +impl fmt::Display for Quad { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.graph_name == GraphName::DefaultGraph { + write!(f, "{} {} {} .", self.subject, self.predicate, self.object) + } else { + write!( + f, + "{} {} {} {} .", + self.subject, self.predicate, self.object, self.graph_name + ) + } + } +} + +/// The union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) and [variables](https://www.w3.org/TR/sparql11-query/#sparqlQueryVariables). +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum NamedNodeOrVariable { + NamedNode(NamedNode), + Variable(Variable), +} + +impl fmt::Display for NamedNodeOrVariable { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + NamedNodeOrVariable::NamedNode(node) => node.fmt(f), + NamedNodeOrVariable::Variable(var) => var.fmt(f), + } + } +} + +impl From for NamedNodeOrVariable { + fn from(node: NamedNode) -> Self { + NamedNodeOrVariable::NamedNode(node) + } +} + +impl From for NamedNodeOrVariable { + fn from(var: Variable) -> Self { + NamedNodeOrVariable::Variable(var) + } +} + +/// The union of [terms](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-term) and [variables](https://www.w3.org/TR/sparql11-query/#sparqlQueryVariables). +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum TermOrVariable { + Term(Term), + Variable(Variable), +} + +impl fmt::Display for TermOrVariable { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + TermOrVariable::Term(term) => term.fmt(f), + TermOrVariable::Variable(var) => var.fmt(f), + } + } +} + +impl From for TermOrVariable { + fn from(node: NamedNode) -> Self { + TermOrVariable::Term(node.into()) + } +} + +impl From for TermOrVariable { + fn from(node: BlankNode) -> Self { + TermOrVariable::Term(node.into()) + } +} + +impl From for TermOrVariable { + fn from(literal: Literal) -> Self { + TermOrVariable::Term(literal.into()) + } +} + +impl From for TermOrVariable { + fn from(var: Variable) -> Self { + TermOrVariable::Variable(var) + } +} + +impl From for TermOrVariable { + fn from(term: Term) -> Self { + TermOrVariable::Term(term) + } +} + +impl From for TermOrVariable { + fn from(element: NamedNodeOrVariable) -> Self { + match element { + NamedNodeOrVariable::NamedNode(node) => TermOrVariable::Term(node.into()), + NamedNodeOrVariable::Variable(var) => TermOrVariable::Variable(var), + } + } +} + +#[inline] +pub(crate) fn print_quoted_str(string: &str, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_char('"')?; + for c in string.chars() { + match c { + '\n' => f.write_str("\\n"), + '\r' => f.write_str("\\r"), + '"' => f.write_str("\\\""), + '\\' => f.write_str("\\\\"), + c => f.write_char(c), + }?; + } + f.write_char('"') +} diff --git a/spargebra/src/update.rs b/spargebra/src/update.rs new file mode 100644 index 00000000..656691ab --- /dev/null +++ b/spargebra/src/update.rs @@ -0,0 +1,199 @@ +use crate::algebra::*; +use crate::parser::{parse_update, ParseError}; +use crate::term::*; +use oxiri::Iri; +use std::convert::TryFrom; +use std::fmt; +use std::str::FromStr; + +/// A parsed [SPARQL update](https://www.w3.org/TR/sparql11-update/) +/// +/// ``` +/// use spargebra::Update; +/// +/// let update_str = "CLEAR ALL ;"; +/// let update = Update::parse(update_str, None)?; +/// assert_eq!(update.to_string().trim(), update_str); +/// # Result::Ok::<_, spargebra::ParseError>(()) +/// ``` +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub struct Update { + /// The update base IRI + pub base_iri: Option>, + /// The [update operations](https://www.w3.org/TR/sparql11-update/#formalModelGraphUpdate) + pub operations: Vec, +} + +impl Update { + /// Parses a SPARQL update with an optional base IRI to resolve relative IRIs in the query + pub fn parse(update: &str, base_iri: Option<&str>) -> Result { + parse_update(update, base_iri) + } +} + +impl fmt::Display for Update { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(base_iri) = &self.base_iri { + writeln!(f, "BASE <{}>", base_iri)?; + } + for update in &self.operations { + writeln!(f, "{} ;", update)?; + } + Ok(()) + } +} + +impl FromStr for Update { + type Err = ParseError; + + fn from_str(update: &str) -> Result { + Self::parse(update, None) + } +} + +impl<'a> TryFrom<&'a str> for Update { + type Error = ParseError; + + fn try_from(update: &str) -> Result { + Self::from_str(update) + } +} + +impl<'a> TryFrom<&'a String> for Update { + type Error = ParseError; + + fn try_from(update: &String) -> Result { + Self::from_str(update) + } +} + +/// The [graph update operations](https://www.w3.org/TR/sparql11-update/#formalModelGraphUpdate) +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum GraphUpdateOperation { + /// [insert data](https://www.w3.org/TR/sparql11-update/#def_insertdataoperation) + InsertData { data: Vec }, + /// [delete data](https://www.w3.org/TR/sparql11-update/#def_deletedataoperation) + DeleteData { data: Vec }, + /// [delete insert](https://www.w3.org/TR/sparql11-update/#def_deleteinsertoperation) + DeleteInsert { + delete: Vec, + insert: Vec, + using: Option, + pattern: Box, + }, + /// [load](https://www.w3.org/TR/sparql11-update/#def_loadoperation) + Load { + silent: bool, + from: NamedNode, + to: Option, + }, + /// [clear](https://www.w3.org/TR/sparql11-update/#def_clearoperation) + Clear { silent: bool, graph: GraphTarget }, + /// [create](https://www.w3.org/TR/sparql11-update/#def_createoperation) + Create { silent: bool, graph: NamedNode }, + /// [drop](https://www.w3.org/TR/sparql11-update/#def_dropoperation) + Drop { silent: bool, graph: GraphTarget }, +} + +impl fmt::Display for GraphUpdateOperation { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + GraphUpdateOperation::InsertData { data } => { + writeln!(f, "INSERT DATA {{")?; + write_quads(data, f)?; + write!(f, "}}") + } + GraphUpdateOperation::DeleteData { data } => { + writeln!(f, "DELETE DATA {{")?; + write_quads(data, f)?; + write!(f, "}}") + } + GraphUpdateOperation::DeleteInsert { + delete, + insert, + using, + pattern, + } => { + if !delete.is_empty() { + writeln!(f, "DELETE {{")?; + for quad in delete { + writeln!(f, "\t{}", SparqlQuadPattern(quad))?; + } + writeln!(f, "}}")?; + } + if !insert.is_empty() { + writeln!(f, "INSERT {{")?; + for quad in insert { + writeln!(f, "\t{}", SparqlQuadPattern(quad))?; + } + writeln!(f, "}}")?; + } + if let Some(using) = using { + for g in &using.default { + writeln!(f, "USING {}", g)?; + } + if let Some(named) = &using.named { + for g in named { + writeln!(f, "USING NAMED {}", g)?; + } + } + } + write!( + f, + "WHERE {{ {} }}", + SparqlGraphRootPattern { + pattern, + dataset: None + } + ) + } + GraphUpdateOperation::Load { silent, from, to } => { + write!(f, "LOAD ")?; + if *silent { + write!(f, "SILENT ")?; + } + write!(f, "{}", from)?; + if let Some(to) = to { + write!(f, " INTO GRAPH {}", to)?; + } + Ok(()) + } + GraphUpdateOperation::Clear { silent, graph } => { + write!(f, "CLEAR ")?; + if *silent { + write!(f, "SILENT ")?; + } + write!(f, "{}", graph) + } + GraphUpdateOperation::Create { silent, graph } => { + write!(f, "CREATE ")?; + if *silent { + write!(f, "SILENT ")?; + } + write!(f, "GRAPH {}", graph) + } + GraphUpdateOperation::Drop { silent, graph } => { + write!(f, "DROP ")?; + if *silent { + write!(f, "SILENT ")?; + } + write!(f, "{}", graph) + } + } + } +} + +fn write_quads(quads: &[Quad], f: &mut fmt::Formatter<'_>) -> fmt::Result { + for quad in quads { + if quad.graph_name == GraphName::DefaultGraph { + writeln!(f, "\t{} {} {} .", quad.subject, quad.predicate, quad.object)?; + } else { + writeln!( + f, + "\tGRAPH {} {{ {} {} {} }}", + quad.graph_name, quad.subject, quad.predicate, quad.object + )?; + } + } + Ok(()) +} diff --git a/testsuite/tests/sparql.rs b/testsuite/tests/sparql.rs index 2b10b63f..0c6ce486 100644 --- a/testsuite/tests/sparql.rs +++ b/testsuite/tests/sparql.rs @@ -24,10 +24,9 @@ fn sparql10_w3c_query_syntax_testsuite() -> Result<()> { run_testsuite( "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/manifest-syntax.ttl", vec![ - //Bad SPARQL query that should be rejected by the parser - "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/syntax-sparql4/manifest#syn-bad-38", - "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/syntax-sparql4/manifest#syn-bad-34", - "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/syntax-sparql3/manifest#syn-bad-26", + "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/syntax-sparql4/manifest#syn-bad-38", // bnode scope + "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/syntax-sparql4/manifest#syn-bad-34", // bnode scope + "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/syntax-sparql3/manifest#syn-bad-26", // tokenizer ], ) }