From d933660ed7bb43b633ede8195d07d7ea08a2f251 Mon Sep 17 00:00:00 2001 From: Tpt Date: Sun, 27 May 2018 12:05:01 +0200 Subject: [PATCH] Adds a basic incomplete SPARQL parser --- build.rs | 1 + src/lib.rs | 1 + src/rio/turtle/turtle_grammar.rustpeg | 2 +- src/sparql/ast.rs | 644 +++++++++++++++++ src/sparql/mod.rs | 3 + src/sparql/model.rs | 115 +++ src/sparql/parser.rs | 247 +++++++ src/sparql/sparql_grammar.rustpeg | 977 ++++++++++++++++++++++++++ src/utils.rs | 6 + tests/client.rs | 52 ++ tests/rdf_test_cases.rs | 46 +- tests/sparql_test_cases.rs | 69 ++ 12 files changed, 2121 insertions(+), 42 deletions(-) create mode 100644 src/sparql/ast.rs create mode 100644 src/sparql/mod.rs create mode 100644 src/sparql/model.rs create mode 100644 src/sparql/parser.rs create mode 100644 src/sparql/sparql_grammar.rustpeg create mode 100644 tests/client.rs create mode 100644 tests/sparql_test_cases.rs diff --git a/build.rs b/build.rs index c63acd89..8324c913 100644 --- a/build.rs +++ b/build.rs @@ -3,4 +3,5 @@ extern crate peg; fn main() { peg::cargo_build("src/rio/ntriples/ntriples_grammar.rustpeg"); peg::cargo_build("src/rio/turtle/turtle_grammar.rustpeg"); + peg::cargo_build("src/sparql/sparql_grammar.rustpeg"); } diff --git a/src/lib.rs b/src/lib.rs index 586d4b87..08669af7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,5 +5,6 @@ extern crate uuid; pub mod model; pub mod rio; +pub mod sparql; pub mod store; mod utils; diff --git a/src/rio/turtle/turtle_grammar.rustpeg b/src/rio/turtle/turtle_grammar.rustpeg index b4ffabb4..93201d17 100644 --- a/src/rio/turtle/turtle_grammar.rustpeg +++ b/src/rio/turtle/turtle_grammar.rustpeg @@ -227,7 +227,7 @@ STRING_LITERAL_LONG_SINGLE_QUOTE_inner -> String = a:$(("''" / "'")?) b:(STRING_ } STRING_LITERAL_LONG_SINGLE_QUOTE_simple_char -> char = c:$([^'\u{005c}]) { c.chars().next().unwrap() } -//[25]abc""def''ghi" +//[25] STRING_LITERAL_LONG_QUOTE -> String = "\"\"\"" l:(STRING_LITERAL_LONG_QUOTE_inner*) "\"\"\"" { l.into_iter().collect() } diff --git a/src/sparql/ast.rs b/src/sparql/ast.rs new file mode 100644 index 00000000..4f0151d6 --- /dev/null +++ b/src/sparql/ast.rs @@ -0,0 +1,644 @@ +use model::data::*; +use sparql::model::*; +use std::fmt; +use std::ops::Add; +use utils::Escaper; + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] +pub struct TriplePattern { + subject: TermOrVariable, + predicate: NamedNodeOrVariable, + object: TermOrVariable, +} + +impl TriplePattern { + pub fn new( + subject: impl Into, + predicate: impl Into, + object: impl Into, + ) -> Self { + Self { + subject: subject.into(), + predicate: predicate.into(), + object: object.into(), + } + } +} + +impl fmt::Display for TriplePattern { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{} {} {}", self.subject, self.predicate, self.object) + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] +pub enum PropertyPath { + PredicatePath(NamedNodeOrVariable), + InversePath(Box), + SequencePath(Vec), + AlternativePath(Vec), + ZeroOrMorePath(Box), + OneOrMorePath(Box), + ZeroOrOnePath(Box), + NegatedPath(Box), +} + +impl fmt::Display for PropertyPath { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + PropertyPath::PredicatePath(p) => write!(f, "{}", p), + PropertyPath::InversePath(p) => write!(f, "^{}", p), + PropertyPath::SequencePath(ps) => write!( + f, + "({})", + ps.iter() + .map(|v| v.to_string()) + .collect::>() + .join(" / ") + ), + PropertyPath::AlternativePath(ps) => write!( + f, + "({})", + ps.iter() + .map(|v| v.to_string()) + .collect::>() + .join(" | ") + ), + PropertyPath::ZeroOrMorePath(p) => write!(f, "{}*", p), + PropertyPath::OneOrMorePath(p) => write!(f, "{}+", p), + PropertyPath::ZeroOrOnePath(p) => write!(f, "{}?", p), + PropertyPath::NegatedPath(p) => write!(f, "!{}", p), + } + } +} + +impl From for PropertyPath { + fn from(p: NamedNodeOrVariable) -> Self { + PropertyPath::PredicatePath(p) + } +} + +impl From for PropertyPath { + fn from(p: NamedNode) -> Self { + PropertyPath::PredicatePath(p.into()) + } +} + +impl From for PropertyPath { + fn from(p: Variable) -> Self { + PropertyPath::PredicatePath(p.into()) + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] +pub struct PropertyPathPattern { + subject: TermOrVariable, + path: PropertyPath, + object: TermOrVariable, +} + +impl PropertyPathPattern { + pub fn new( + subject: impl Into, + path: impl Into, + object: impl Into, + ) -> Self { + Self { + subject: subject.into(), + path: path.into(), + object: object.into(), + } + } +} + +impl fmt::Display for PropertyPathPattern { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{} {} {}", self.subject, self.path, self.object) + } +} + +impl From for PropertyPathPattern { + fn from(p: TriplePattern) -> Self { + Self { + subject: p.subject, + path: p.predicate.into(), + object: p.object, + } + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] +pub enum Expression { + ConstantExpression(TermOrVariable), + OrExpression(Vec), + AndExpression(Vec), + EqualExpression(Box, Box), + NotEqualExpression(Box, Box), + GreaterExpression(Box, Box), + GreaterOrEqExpression(Box, Box), + LowerExpression(Box, Box), + LowerOrEqExpression(Box, Box), + InExpression(Box, Vec), + NotInExpression(Box, Vec), + AddExpression(Box, Box), + SubExpression(Box, Box), + MulExpression(Box, Box), + DivExpression(Box, Box), + UnaryPlusExpression(Box), + UnaryMinusExpression(Box), + UnaryNotExpression(Box), + StrFunctionCall(Box), + LangFunctionCall(Box), + LangMatchesFunctionCall(Box, Box), + DatatypeFunctionCall(Box), + BoundFunctionCall(Variable), + IRIFunctionCall(Box), + BNodeFunctionCall(Option>), + RandFunctionCall(), + AbsFunctionCall(Box), + CeilFunctionCall(Box), + FloorFunctionCall(Box), + RoundFunctionCall(Box), + ConcatFunctionCall(Vec), + SubStrFunctionCall(Box, Box, Option>), + StrLenFunctionCall(Box), + ReplaceFunctionCall( + Box, + Box, + Box, + Option>, + ), + UCaseFunctionCall(Box), + LCaseFunctionCall(Box), + EncodeForURIFunctionCall(Box), + ContainsFunctionCall(Box, Box), + StrStartsFunctionCall(Box, Box), + StrEndsFunctionCall(Box, Box), + StrBeforeFunctionCall(Box, Box), + StrAfterFunctionCall(Box, Box), + YearFunctionCall(Box), + MonthFunctionCall(Box), + DayFunctionCall(Box), + HoursFunctionCall(Box), + MinutesFunctionCall(Box), + SecondsFunctionCall(Box), + TimezoneFunctionCall(Box), + NowFunctionCall(), + UUIDFunctionCall(), + StrUUIDFunctionCall(), + MD5FunctionCall(Box), + SHA1FunctionCall(Box), + SHA256FunctionCall(Box), + SHA384FunctionCall(Box), + SHA512FunctionCall(Box), + CoalesceFunctionCall(Vec), + IfFunctionCall(Box, Box, Box), + StrLangFunctionCall(Box, Box), + StrDTFunctionCall(Box, Box), + SameTermFunctionCall(Box, Box), + IsIRIFunctionCall(Box), + IsBlankFunctionCall(Box), + IsLiteralFunctionCall(Box), + IsNumericFunctionCall(Box), + RegexFunctionCall(Box, Box, Option>), + CustomFunctionCall(NamedNode, Vec), + ExistsFunctionCall(Box), + NotExistsFunctionCall(Box), + CountAggregate(Option>, bool), + SumAggregate(Box, bool), + MinAggregate(Box, bool), + MaxAggregate(Box, bool), + AvgAggregate(Box, bool), + SampleAggregate(Box, bool), + GroupConcatAggregate(Box, bool, Option), +} + +impl fmt::Display for Expression { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Expression::ConstantExpression(t) => write!(f, "{}", t), + Expression::OrExpression(e) => write!( + f, + "({})", + e.iter() + .map(|v| v.to_string()) + .collect::>() + .join(" || ") + ), + Expression::AndExpression(e) => write!( + f, + "({})", + e.iter() + .map(|v| v.to_string()) + .collect::>() + .join(" && ") + ), + Expression::EqualExpression(a, b) => write!(f, "{} = {}", a, b), + Expression::NotEqualExpression(a, b) => write!(f, "{} != {}", a, b), + Expression::GreaterExpression(a, b) => write!(f, "{} > {}", a, b), + Expression::GreaterOrEqExpression(a, b) => write!(f, "{} >= {}", a, b), + Expression::LowerExpression(a, b) => write!(f, "{} < {}", a, b), + Expression::LowerOrEqExpression(a, b) => write!(f, "{} <= {}", a, b), + Expression::InExpression(a, b) => write!( + f, + "{} IN ({})", + a, + b.iter() + .map(|v| v.to_string()) + .collect::>() + .join(", ") + ), + Expression::NotInExpression(a, b) => write!( + f, + "{} NOT IN ({})", + a, + b.iter() + .map(|v| v.to_string()) + .collect::>() + .join(", ") + ), + Expression::AddExpression(a, b) => write!(f, "{} + {}", a, b), + Expression::SubExpression(a, b) => write!(f, "{} - {}", a, b), + Expression::MulExpression(a, b) => write!(f, "{} * {}", a, b), + Expression::DivExpression(a, b) => write!(f, "{} / {}", a, b), + Expression::UnaryPlusExpression(e) => write!(f, "+{}", e), + Expression::UnaryMinusExpression(e) => write!(f, "-{}", e), + Expression::UnaryNotExpression(e) => write!(f, "!{}", e), + Expression::StrFunctionCall(e) => write!(f, "STR({})", e), + Expression::LangFunctionCall(e) => write!(f, "LANG({})", e), + Expression::LangMatchesFunctionCall(a, b) => write!(f, "LANGMATCHES({}, {})", a, b), + Expression::DatatypeFunctionCall(e) => write!(f, "DATATYPE({})", e), + Expression::BoundFunctionCall(v) => write!(f, "BOUND({})", v), + Expression::IRIFunctionCall(e) => write!(f, "IRI({})", e), + Expression::BNodeFunctionCall(v) => v.as_ref() + .map(|id| write!(f, "BOUND({})", id)) + .unwrap_or_else(|| write!(f, "BOUND()")), + Expression::RandFunctionCall() => write!(f, "RAND()"), + Expression::AbsFunctionCall(e) => write!(f, "ABS({})", e), + Expression::CeilFunctionCall(e) => write!(f, "CEIL({})", e), + Expression::FloorFunctionCall(e) => write!(f, "FLOOR({})", e), + Expression::RoundFunctionCall(e) => write!(f, "ROUND({})", e), + Expression::ConcatFunctionCall(e) => write!( + f, + "CONCAT({})", + e.iter() + .map(|v| v.to_string()) + .collect::>() + .join(", ") + ), + Expression::SubStrFunctionCall(a, b, c) => c.as_ref() + .map(|cv| write!(f, "SUBSTR({}, {}, {})", a, b, cv)) + .unwrap_or_else(|| write!(f, "SUBSTR({}, {})", a, b)), + Expression::StrLenFunctionCall(e) => write!(f, "STRLEN({})", e), + Expression::ReplaceFunctionCall(a, b, c, d) => d.as_ref() + .map(|dv| write!(f, "REPLACE({}, {}, {}, {})", a, b, c, dv)) + .unwrap_or_else(|| write!(f, "REPLACE({}, {}, {})", a, b, c)), + Expression::UCaseFunctionCall(e) => write!(f, "UCASE({})", e), + Expression::LCaseFunctionCall(e) => write!(f, "LCASE({})", e), + Expression::EncodeForURIFunctionCall(e) => write!(f, "ENCODE_FOR_URI({})", e), + Expression::ContainsFunctionCall(a, b) => write!(f, "CONTAINS({}, {})", a, b), + Expression::StrStartsFunctionCall(a, b) => write!(f, "STRSTATS({}, {})", a, b), + Expression::StrEndsFunctionCall(a, b) => write!(f, "STRENDS({}, {})", a, b), + Expression::StrBeforeFunctionCall(a, b) => write!(f, "STRBEFORE({}, {})", a, b), + Expression::StrAfterFunctionCall(a, b) => write!(f, "STRAFTER({}, {})", a, b), + Expression::YearFunctionCall(e) => write!(f, "YEAR({})", e), + Expression::MonthFunctionCall(e) => write!(f, "MONTH({})", e), + Expression::DayFunctionCall(e) => write!(f, "DAY({})", e), + Expression::HoursFunctionCall(e) => write!(f, "HOURS({})", e), + Expression::MinutesFunctionCall(e) => write!(f, "MINUTES({})", e), + Expression::SecondsFunctionCall(e) => write!(f, "SECONDS({})", e), + Expression::TimezoneFunctionCall(e) => write!(f, "TIMEZONE({})", e), + Expression::NowFunctionCall() => write!(f, "NOW()"), + Expression::UUIDFunctionCall() => write!(f, "UUID()"), + Expression::StrUUIDFunctionCall() => write!(f, "STRUUID()"), + Expression::MD5FunctionCall(e) => write!(f, "MD5({})", e), + Expression::SHA1FunctionCall(e) => write!(f, "SHA1({})", e), + Expression::SHA256FunctionCall(e) => write!(f, "SHA256({})", e), + Expression::SHA384FunctionCall(e) => write!(f, "SHA384({})", e), + Expression::SHA512FunctionCall(e) => write!(f, "SHA512({})", e), + Expression::CoalesceFunctionCall(e) => write!( + f, + "COALESCE({})", + e.iter() + .map(|v| v.to_string()) + .collect::>() + .join(", ") + ), + Expression::IfFunctionCall(a, b, c) => write!(f, "IF({}, {}, {})", a, b, c), + Expression::StrLangFunctionCall(a, b) => write!(f, "STRLANG({}, {})", a, b), + Expression::StrDTFunctionCall(a, b) => write!(f, "STRDT({}, {})", a, b), + Expression::SameTermFunctionCall(a, b) => write!(f, "sameTerm({}, {})", a, b), + Expression::IsIRIFunctionCall(e) => write!(f, "isIRI({})", e), + Expression::IsBlankFunctionCall(e) => write!(f, "isBLANK({})", e), + Expression::IsLiteralFunctionCall(e) => write!(f, "isLITERAL({})", e), + Expression::IsNumericFunctionCall(e) => write!(f, "isNUMERIC({})", e), + Expression::RegexFunctionCall(a, b, c) => c.as_ref() + .map(|cv| write!(f, "REGEX({}, {}, {})", a, b, cv)) + .unwrap_or_else(|| write!(f, "REGEX({}, {})", a, b)), + Expression::CustomFunctionCall(iri, args) => write!( + f, + "{}({})", + iri, + args.iter() + .map(|v| v.to_string()) + .collect::>() + .join(", ") + ), + Expression::ExistsFunctionCall(p) => write!(f, "EXISTS {{ {} }}", p), + Expression::NotExistsFunctionCall(p) => write!(f, "NOT EXISTS {{ {} }}", p), + Expression::CountAggregate(e, distinct) => if *distinct { + e.as_ref() + .map(|ex| write!(f, "COUNT(DISTINCT {})", ex)) + .unwrap_or_else(|| write!(f, "COUNT(DISTINCT *)")) + } else { + e.as_ref() + .map(|ex| write!(f, "COUNT({})", ex)) + .unwrap_or_else(|| write!(f, "COUNT(*)")) + }, + Expression::SumAggregate(e, distinct) => if *distinct { + write!(f, "SUM(DISTINCT {})", e) + } else { + write!(f, "SUM({})", e) + }, + Expression::MinAggregate(e, distinct) => if *distinct { + write!(f, "MIN(DISTINCT {})", e) + } else { + write!(f, "MIN({})", e) + }, + Expression::MaxAggregate(e, distinct) => if *distinct { + write!(f, "MAX(DISTINCT {})", e) + } else { + write!(f, "MAX({})", e) + }, + Expression::AvgAggregate(e, distinct) => if *distinct { + write!(f, "AVG(DISTINCT {})", e) + } else { + write!(f, "AVG({})", e) + }, + Expression::SampleAggregate(e, distinct) => if *distinct { + write!(f, "SAMPLE(DISTINCT {})", e) + } else { + write!(f, "SAMPLE({})", e) + }, + Expression::GroupConcatAggregate(e, distinct, sep) => if *distinct { + sep.as_ref() + .map(|s| { + write!( + f, + "GROUP_CONCAT(DISTINCT {}; SEPARATOR = \"{}\")", + e, + s.escape() + ) + }) + .unwrap_or_else(|| write!(f, "GROUP_CONCAT(DISTINCT {})", e)) + } else { + sep.as_ref() + .map(|s| write!(f, "GROUP_CONCAT({}; SEPARATOR = \"{}\")", e, s.escape())) + .unwrap_or_else(|| write!(f, "GROUP_CONCAT({})", e)) + }, + } + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] +pub enum GraphPattern { + GroupPattern(Vec), + PropertyPathPattern(PropertyPathPattern), + OptionalPattern(Box), + UnionPattern(Vec), + GraphPattern(NamedNodeOrVariable, Box), + BindPattern(Expression, Variable), + ValuesPattern(Vec, Vec>>), + GroupByPattern(Expression), + HavingPattern(Expression), + MinusPattern(Box), + FilterPattern(Expression), + SubSelectPattern { + selection: Selection, + filter: Box, + }, + ServicePattern(NamedNodeOrVariable, Box), +} + +impl Default for GraphPattern { + fn default() -> Self { + GraphPattern::GroupPattern(Vec::default()) + } +} + +impl From for GraphPattern { + fn from(p: PropertyPathPattern) -> Self { + GraphPattern::PropertyPathPattern(p) + } +} + +impl fmt::Display for GraphPattern { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + GraphPattern::GroupPattern(p) => write!( + f, + "{}", + p.iter() + .map(|v| v.to_string()) + .collect::>() + .join(" . ") + ), + GraphPattern::PropertyPathPattern(p) => write!(f, "{}", p), + GraphPattern::OptionalPattern(p) => write!(f, "OPTIONAL {{ {} }}", p), + GraphPattern::UnionPattern(ps) => write!( + f, + "{{ {} }}", + ps.iter() + .map(|p| p.to_string()) + .collect::>() + .join(" } UNION { ") + ), + GraphPattern::GraphPattern(g, p) => write!(f, "GRAPH {} {{ {} }}", g, p), + GraphPattern::BindPattern(e, v) => write!(f, "BIND({} AS {})", e, v), + GraphPattern::ValuesPattern(vars, vals) => write!( + f, + "VALUES ({}) {{ {} }}", + vars.iter() + .map(|v| v.to_string()) + .collect::>() + .join(" "), + vals.iter() + .map(|r| format!( + "({})", + r.iter() + .map(|vop| vop.as_ref() + .map(|v| v.to_string()) + .unwrap_or_else(|| "UNDEF".to_string())) + .collect::>() + .join(" ") + )) + .collect::>() + .join(" ") + ), + GraphPattern::GroupByPattern(g) => write!(f, "GROUP BY ({})", g), + GraphPattern::HavingPattern(e) => write!(f, "HAVING({})", e), + GraphPattern::MinusPattern(p) => write!(f, "MINUS {{ {} }}", p), + GraphPattern::FilterPattern(p) => write!(f, "FILTER({})", p), + GraphPattern::SubSelectPattern { selection, filter } => { + write!(f, "{{ SELECT {} WHERE {{ {} }} }}", selection, filter) + } + GraphPattern::ServicePattern(s, p) => write!(f, "SERVICE {} {{ {} }}", s, p), + } + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash, Default)] +pub struct Dataset { + pub default: Vec, + pub named: Vec, +} + +impl Dataset { + pub fn new_with_default(graph: NamedNode) -> Self { + Self { + default: vec![graph], + named: Vec::default(), + } + } + + pub fn new_with_named(graph: NamedNode) -> Self { + Self { + default: Vec::default(), + named: vec![graph], + } + } +} + +impl Add for Dataset { + type Output = Self; + + fn add(mut self, rhs: Dataset) -> Self { + self.default.extend_from_slice(&rhs.default); + self.named.extend_from_slice(&rhs.named); + self + } +} + +impl fmt::Display for Dataset { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + for g in &self.default { + write!(f, "FROM {} ", g)?; + } + for g in &self.named { + write!(f, "FROM NAMED {} ", g)?; + } + Ok(()) + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] +pub enum SelectionOption { + Distinct, + Reduced, + Default, +} + +impl fmt::Display for SelectionOption { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + match self { + SelectionOption::Distinct => write!(f, "DISTINCT"), + SelectionOption::Reduced => write!(f, "REDUCED"), + SelectionOption::Default => Ok(()), + } + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] +pub enum SelectionMember { + Variable(Variable), + Expression(Expression, Variable), +} + +impl fmt::Display for SelectionMember { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + match self { + SelectionMember::Variable(v) => write!(f, "{}", v), + SelectionMember::Expression(e, v) => write!(f, "({} AS {})", e, v), + } + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] +pub struct Selection { + pub option: SelectionOption, + pub variables: Option>, +} + +impl fmt::Display for Selection { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + self.variables + .as_ref() + .map(|vars| { + write!( + f, + "{} {}", + self.option, + vars.iter() + .map(|v| v.to_string()) + .collect::>() + .join(" ") + ) + }) + .unwrap_or_else(|| write!(f, "{} *", self.option)) + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] +pub enum Query { + SelectQuery { + selection: Selection, + dataset: Dataset, + filter: GraphPattern, + }, + ConstructQuery { + construct: Vec, + dataset: Dataset, + filter: GraphPattern, + }, + DescribeQuery { + dataset: Dataset, + filter: GraphPattern, + }, + AskQuery { + dataset: Dataset, + filter: GraphPattern, + }, +} + +impl fmt::Display for Query { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Query::SelectQuery { + selection, + dataset, + filter, + } => write!(f, "SELECT {} {} WHERE {{ {} }}", selection, dataset, filter), + Query::ConstructQuery { + construct, + dataset, + filter, + } => write!( + f, + "CONSTRUCT {{ {} }} {} WHERE {{ {} }}", + construct + .iter() + .map(|t| t.to_string()) + .collect::>() + .join(" . "), + dataset, + filter + ), + Query::DescribeQuery { dataset, filter } => { + write!(f, "DESCRIBE {} WHERE {{ {} }}", dataset, filter) + } + Query::AskQuery { dataset, filter } => { + write!(f, "ASK {} WHERE {{ {} }}", dataset, filter) + } + } + } +} diff --git a/src/sparql/mod.rs b/src/sparql/mod.rs new file mode 100644 index 00000000..55886577 --- /dev/null +++ b/src/sparql/mod.rs @@ -0,0 +1,3 @@ +pub mod ast; +pub mod model; +pub mod parser; diff --git a/src/sparql/model.rs b/src/sparql/model.rs new file mode 100644 index 00000000..1b9bfbfa --- /dev/null +++ b/src/sparql/model.rs @@ -0,0 +1,115 @@ +use model::data::*; +use std::fmt; +use uuid::Uuid; + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] +pub struct Variable { + name: String, +} + +impl Variable { + pub fn new(name: impl Into) -> Self { + Self { name: name.into() } + } +} + +impl fmt::Display for Variable { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "?{}", self.name) + } +} + +impl Default for Variable { + fn default() -> Self { + Self { + name: Uuid::new_v4().to_string(), + } + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] +pub enum NamedNodeOrVariable { + NamedNode(NamedNode), + Variable(Variable), +} + +impl fmt::Display for NamedNodeOrVariable { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + NamedNodeOrVariable::NamedNode(node) => write!(f, "{}", node), + NamedNodeOrVariable::Variable(var) => write!(f, "{}", var), + } + } +} + +impl From for NamedNodeOrVariable { + fn from(node: NamedNode) -> Self { + NamedNodeOrVariable::NamedNode(node) + } +} + +impl From for NamedNodeOrVariable { + fn from(var: Variable) -> Self { + NamedNodeOrVariable::Variable(var) + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] +pub enum TermOrVariable { + Term(Term), + Variable(Variable), +} + +impl fmt::Display for TermOrVariable { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + TermOrVariable::Term(node) => write!(f, "{}", node), + TermOrVariable::Variable(var) => write!(f, "{}", var), + } + } +} + +impl From for TermOrVariable { + fn from(node: NamedNode) -> Self { + TermOrVariable::Term(node.into()) + } +} + +impl From for TermOrVariable { + fn from(node: BlankNode) -> Self { + TermOrVariable::Term(node.into()) + } +} + +impl From for TermOrVariable { + fn from(literal: Literal) -> Self { + TermOrVariable::Term(literal.into()) + } +} + +impl From for TermOrVariable { + fn from(node: NamedOrBlankNode) -> Self { + TermOrVariable::Term(node.into()) + } +} + +impl From for TermOrVariable { + fn from(node: Term) -> Self { + TermOrVariable::Term(node) + } +} + +impl From for TermOrVariable { + fn from(var: Variable) -> Self { + TermOrVariable::Variable(var) + } +} + +impl From for TermOrVariable { + fn from(element: NamedNodeOrVariable) -> Self { + match element { + NamedNodeOrVariable::NamedNode(node) => TermOrVariable::Term(node.into()), + NamedNodeOrVariable::Variable(var) => TermOrVariable::Variable(var), + } + } +} diff --git a/src/sparql/parser.rs b/src/sparql/parser.rs new file mode 100644 index 00000000..3291ef26 --- /dev/null +++ b/src/sparql/parser.rs @@ -0,0 +1,247 @@ +use std::borrow::Cow; +use std::char; +use std::str::Chars; + +mod grammar { + use model::data::*; + use rio::RioError; + use rio::RioResult; + use sparql::ast::*; + use sparql::model::*; + use sparql::parser::unescape_unicode_codepoints; + use std::borrow::Cow; + use std::collections::BTreeMap; + use std::collections::HashMap; + use std::io::BufReader; + use std::io::Read; + use url::ParseOptions; + use url::Url; + + struct FocusedTriplePattern { + focus: F, + patterns: Vec, + } + + impl FocusedTriplePattern { + fn new(focus: F) -> Self { + Self { + focus, + patterns: Vec::default(), + } + } + } + + impl Default for FocusedTriplePattern { + fn default() -> Self { + Self { + focus: F::default(), + patterns: Vec::default(), + } + } + } + + impl From> for FocusedTriplePattern> { + fn from(input: FocusedTriplePattern) -> Self { + Self { + focus: vec![input.focus], + patterns: input.patterns, + } + } + } + + struct FocusedPropertyPathPattern { + focus: F, + patterns: Vec, + } + + impl FocusedPropertyPathPattern { + fn new(focus: F) -> Self { + Self { + focus, + patterns: Vec::default(), + } + } + } + + impl Default for FocusedPropertyPathPattern { + fn default() -> Self { + Self { + focus: F::default(), + patterns: Vec::default(), + } + } + } + + impl From> for FocusedPropertyPathPattern> { + fn from(input: FocusedPropertyPathPattern) -> Self { + Self { + focus: vec![input.focus], + patterns: input.patterns, + } + } + } + + impl> From> for FocusedPropertyPathPattern { + fn from(input: FocusedTriplePattern) -> Self { + Self { + focus: input.focus.into(), + patterns: input.patterns.into_iter().map(|p| p.into()).collect(), + } + } + } + + impl From> for GraphPattern { + fn from(input: FocusedPropertyPathPattern) -> Self { + if input.patterns.len() == 1 { + input.patterns[0].clone().into() + } else { + GraphPattern::GroupPattern(input.patterns.into_iter().map(|p| p.into()).collect()) + } + } + } + + fn flatten_group_pattern(v: impl Iterator) -> GraphPattern { + let l: Vec = v.into_iter() + .flat_map(|p| { + if let GraphPattern::GroupPattern(v2) = p { + v2.into_iter() + } else { + vec![p].into_iter() + } + }) + .collect(); + if l.len() == 1 { + l[0].clone() + } else { + GraphPattern::GroupPattern(l) + } + } + + pub struct ParserState { + base_uri: Option, + namespaces: HashMap, + bnodes_map: BTreeMap, + } + + impl ParserState { + fn url_parser<'a>(&'a self) -> ParseOptions<'a> { + Url::options().base_url(self.base_uri.as_ref()) + } + } + + include!(concat!(env!("OUT_DIR"), "/sparql_grammar.rs")); + + pub fn read_sparql_query<'a, R: Read + 'a>( + source: R, + base_uri: impl Into>, + ) -> RioResult { + let mut state = ParserState { + base_uri: base_uri.into(), + namespaces: HashMap::default(), + bnodes_map: BTreeMap::default(), + }; + + let mut string_buffer = String::default(); + BufReader::new(source).read_to_string(&mut string_buffer)?; + + match QueryUnit( + &unescape_unicode_codepoints(Cow::from(string_buffer)), + &mut state, + ) { + Ok(query) => Ok(query), + Err(error) => Err(RioError::new(error)), + } + } +} + +pub use sparql::parser::grammar::read_sparql_query; + +fn needs_unescape_unicode_codepoints(input: &str) -> bool { + let bytes = input.as_bytes(); + for i in 1..bytes.len() { + if (bytes[i] == ('u' as u8) || bytes[i] == ('U' as u8)) && bytes[i - 1] == ('/' as u8) { + return true; + } + } + return false; +} + +struct UnescapeUnicodeCharIterator<'a> { + iter: Chars<'a>, + buffer: String, +} + +impl<'a> UnescapeUnicodeCharIterator<'a> { + fn new(string: &'a str) -> Self { + Self { + iter: string.chars(), + buffer: String::with_capacity(9), + } + } +} + +impl<'a> Iterator for UnescapeUnicodeCharIterator<'a> { + type Item = char; + + fn next(&mut self) -> Option { + if !self.buffer.is_empty() { + return Some(self.buffer.remove(0)); + } + match self.iter.next()? { + '\\' => match self.iter.next() { + Some('u') => { + self.buffer.push('u'); + for _ in 0..4 { + if let Some(c) = self.iter.next() { + self.buffer.push(c); + } else { + return Some('\\'); + } + } + if let Some(c) = u32::from_str_radix(&self.buffer[1..5], 16) + .ok() + .and_then(char::from_u32) + { + self.buffer.clear(); + Some(c) + } else { + Some('\\') + } + } + Some('U') => { + self.buffer.push('U'); + for _ in 0..8 { + if let Some(c) = self.iter.next() { + self.buffer.push(c); + } else { + return Some('\\'); + } + } + if let Some(c) = u32::from_str_radix(&self.buffer[1..9], 16) + .ok() + .and_then(char::from_u32) + { + self.buffer.clear(); + Some(c) + } else { + Some('\\') + } + } + Some(c) => { + self.buffer.push(c); + Some('\\') + } + None => Some('\\'), + }, + c => Some(c), + } + } +} + +fn unescape_unicode_codepoints<'a>(input: Cow<'a, str>) -> Cow<'a, str> { + if needs_unescape_unicode_codepoints(&input) { + UnescapeUnicodeCharIterator::new(&input).collect() + } else { + input + } +} diff --git a/src/sparql/sparql_grammar.rustpeg b/src/sparql/sparql_grammar.rustpeg new file mode 100644 index 00000000..12de5577 --- /dev/null +++ b/src/sparql/sparql_grammar.rustpeg @@ -0,0 +1,977 @@ +//See https://www.w3.org/TR/turtle/#sec-grammar + +use std::char; +use model::vocab::rdf; +use model::vocab::xsd; +use std::iter; +use std::iter::once; + +#![arguments(state: &mut ParserState)] + + +//[1] +#[pub] +QueryUnit -> Query = Query + +//[2] +Query -> Query = _ Prologue _ q:(SelectQuery / ConstructQuery / DescribeQuery / AskQuery) _ ValuesClause _ { //TODO: ValuesClause + q +} + +//[4] +Prologue -> () = (BaseDecl _ / PrefixDecl _)* + +//[5] +BaseDecl -> () = "BASE"i _ i:IRIREF {? + match state.url_parser().parse(&i) { + Ok(url) => { + state.base_uri = Some(url); + Ok(()) + }, + Err(error) => Err("IRI parsing failed") + } +} + +//[6] +PrefixDecl -> () = "PREFIX"i _ ns:PNAME_NS _ i:IRIREF { + state.namespaces.insert(ns.into(), i.into()); +} + +//[7] +SelectQuery -> Query = s:SelectClause _ d:DatasetClauses _ f:WhereClause _ SolutionModifier { //TODO: Modifier + Query::SelectQuery { + selection: s, + dataset: d, + filter: f + } +} + +//[8] +SubSelect -> GraphPattern = s:SelectClause _ f:WhereClause _ SolutionModifier _ ValuesClause { //TODO: Modifiers + GraphPattern::SubSelectPattern { + selection: s, + filter: Box::new(f) + } +} + +//[9] +SelectClause -> Selection = "SELECT"i _ o:SelectClause_option _ v:SelectClause_variables { + Selection { + option: o, + variables: v + } +} +SelectClause_option -> SelectionOption = + "DISTINCT"i { SelectionOption::Distinct } / + "REDUCED"i { SelectionOption::Reduced } / + { SelectionOption::Default } +SelectClause_variables -> Option> = + '*' { None } / + p:SelectClause_member+ { Some(p) } +SelectClause_member -> SelectionMember = + v:Var _ { SelectionMember::Variable(v) } / + '(' _ e:Expression _ "AS"i _ v:Var _ ')' _ { SelectionMember::Expression(e, v) } + +//[10] +ConstructQuery -> Query = + "CONSTRUCT"i _ c:ConstructTemplate _ d:DatasetClauses _ f:WhereClause _ SolutionModifier { + Query::ConstructQuery { construct: c, dataset: d, filter: f } + } / + "CONSTRUCT"i _ d:DatasetClauses _ "WHERE"i _ '{' _ c:ConstructQuery_optional_triple_template _ '}' _ SolutionModifier { + Query::ConstructQuery { + construct: c.clone(), + dataset: d, + filter: flatten_group_pattern(c.into_iter().map(|p| PropertyPathPattern::from(p).into())) + } + } + +ConstructQuery_optional_triple_template -> Vec = TriplesTemplate / { Vec::default() } + +//[11] +DescribeQuery -> Query = "DESCRIBE"i _ ('*' / (VarOrIri _)+) _ d:DatasetClauses f:WhereClause? _ SolutionModifier { + Query::DescribeQuery { + dataset: d, + filter: f.unwrap_or_else(GraphPattern::default) + } +} + +//[12] +AskQuery -> Query = "ASK"i _ d:DatasetClauses f:WhereClause _ SolutionModifier { + Query::AskQuery { + dataset: d, + filter: f + } +} + +//[13] +DatasetClause -> Dataset = "FROM"i _ d:(DefaultGraphClause / NamedGraphClause) { d } +DatasetClauses -> Dataset = d:DatasetClauses_item* { + d.into_iter().fold(Dataset::default(), |mut a, b| a + b) +} +DatasetClauses_item -> Dataset = d:DatasetClause _ { d } +//[14] +DefaultGraphClause -> Dataset = s:SourceSelector { + Dataset::new_with_default(s) +} + +//[15] +NamedGraphClause -> Dataset = "NAMED"i _ s:SourceSelector { + Dataset::new_with_named(s) +} + +//[16] +SourceSelector -> NamedNode = iri + +//[17] +WhereClause -> GraphPattern = "WHERE"i? _ p:GroupGraphPattern { p } + +//[18] +SolutionModifier -> () = GroupClause? _ HavingClause? _ OrderClause? _ LimitOffsetClauses? + +//[19] +GroupClause -> () = "GROUP"i _ "BY"i _ (GroupCondition _)+ + +//[20] +GroupCondition -> () = BuiltInCall / FunctionCall / '(' _ Expression _ ("AS"i _ Var _)? ')' / Var + +//[21] +HavingClause -> Expression = "HAVING"i _ c:HavingCondition+ { + if c.len() == 1 { + c[0].clone() + } else { + Expression::AndExpression(c) + } +} + +//[22] +HavingCondition -> Expression = Constraint + +//[23] +OrderClause -> () = "ORDER"i "BY"i _ OrderCondition+ + +//[24] +OrderCondition -> () = (( "ASC"i / "DESC"i) _ BrackettedExpression) / (Constraint / Var) + +//[25] +LimitOffsetClauses -> () = LimitClause _ OffsetClause? / OffsetClause _ LimitClause? + +//[26] +LimitClause -> () = "LIMIT"i _ INTEGER + +//[27] +OffsetClause -> () = "OFFSET"i _ INTEGER + +//[28] +ValuesClause -> Option = + "VALUES"i _ p:DataBlock { Some(p) } / + { None } + +//[52] +TriplesTemplate -> Vec = p:TriplesTemplate_item **<1,> ('.' _) '.'? { + p.into_iter().flat_map(|c| c.into_iter()).collect() +} +TriplesTemplate_item -> Vec = p:TriplesSameSubject _ { p } + +//[53] +GroupGraphPattern -> GraphPattern = + '{' _ p:GroupGraphPatternSub _ '}' { p } / + '{' _ p:SubSelect _ '}' { p } + +//[54] +GroupGraphPatternSub -> GraphPattern = a:TriplesBlock? _ b:GroupGraphPatternSub_item* { + let mut list = a.map(|v| vec![v]).unwrap_or_else(|| vec![]); + for v in b { + list.extend_from_slice(&v) + } + flatten_group_pattern(list.into_iter()) +} +GroupGraphPatternSub_item -> Vec = a:GraphPatternNotTriples _ ('.' _)? b:TriplesBlock? _ { + let mut result = vec![a]; + b.map(|v| result.push(v)); + result +} + +//[55] +TriplesBlock -> GraphPattern = h:TriplesSameSubjectPath _ t:TriplesBlock_tail? { + match t { + Some(l) => flatten_group_pattern(vec![h, l].into_iter()), + None => flatten_group_pattern(once(h)) + } +} +TriplesBlock_tail -> GraphPattern = '.' _ t:TriplesBlock? _ { + t.unwrap_or_else(|| GraphPattern::GroupPattern(Vec::default())) +} + +//[56] +GraphPatternNotTriples -> GraphPattern = GroupOrUnionGraphPattern / OptionalGraphPattern / MinusGraphPattern / GraphGraphPattern / ServiceGraphPattern / Filter / Bind / InlineData + +//[57] +OptionalGraphPattern -> GraphPattern = "OPTIONAL"i _ p:GroupGraphPattern { + GraphPattern::OptionalPattern(Box::new(p)) +} + +//[58] +GraphGraphPattern -> GraphPattern = "GRAPH"i _ g:VarOrIri _ p:GroupGraphPattern { + GraphPattern::GraphPattern(g, Box::new(p)) +} + +//[59] +ServiceGraphPattern -> GraphPattern = "SERVICE"i _ "SILENT"i? _ s:VarOrIri _ p:GroupGraphPattern { + GraphPattern::ServicePattern(s, Box::new(p)) +} + +//[60] +Bind -> GraphPattern = "BIND"i _ '(' _ e:Expression _ "AS"i _ v:Var _ ')' { + GraphPattern::BindPattern(e, v) +} + +//[61] +InlineData -> GraphPattern = "VALUES"i _ p:DataBlock { p } + +//[62] +DataBlock -> GraphPattern = InlineDataOneVar / InlineDataFull + +//[63] +InlineDataOneVar -> GraphPattern = v:Var _ '{' _ d:InlineDataOneVar_value* '}' { + GraphPattern::ValuesPattern(vec![v], d) +} +InlineDataOneVar_value -> Vec> = t:DataBlockValue { vec![t] } + +//[64] +InlineDataFull -> GraphPattern = '(' _ var:InlineDataFull_var* _ ')' _ '{' _ val:InlineDataFull_values* '}' { + GraphPattern::ValuesPattern(var, val) +} +InlineDataFull_var -> Variable = v:Var _ { v } +InlineDataFull_values -> Vec> = '(' _ v:InlineDataFull_value* _ ')' _ { v } +InlineDataFull_value -> Option = v:DataBlockValue _ { v } + +//[65] +DataBlockValue -> Option = + i:iri { Some(i.into()) } / + l:RDFLiteral { Some(l.into()) } / + l:NumericLiteral { Some(l.into()) } / + l:BooleanLiteral { Some(l.into()) } / + "UNDEF"i { None } + +//[66] +MinusGraphPattern -> GraphPattern = "MINUS"i _ p: GroupGraphPattern { + GraphPattern::MinusPattern(Box::new(p)) +} + +//[67] +GroupOrUnionGraphPattern -> GraphPattern = p:GroupOrUnionGraphPattern_item **<1,> ("UNION"i _) { + if p.len() == 1 { + p[0].clone() + } else { + GraphPattern::UnionPattern(p) + } +} +GroupOrUnionGraphPattern_item -> GraphPattern = p:GroupGraphPattern _ { p } + +//[68] +Filter -> GraphPattern = "FILTER"i _ c:Constraint { GraphPattern::FilterPattern(c) } + +//[69] +Constraint -> Expression = BrackettedExpression / BuiltInCall / FunctionCall + +//[70] +FunctionCall -> Expression = f: iri _ a: ArgList { + Expression::CustomFunctionCall(f, a.into()) +} + +//[71] +ArgList -> Vec = //TODO: support DISTINCT + NIL { Vec::new() } / + '(' _ 'DISTINCT'? _ e:ArgList_item **<1,> (',' _) _ ')' { e } +ArgList_item -> Expression = e:Expression _ { e } + +//[72] +ExpressionList -> Vec = + NIL { Vec::default() } / + '(' _ e:ExpressionList_item **<1,> (',' _) ')' { e } +ExpressionList_item -> Expression = e:Expression _ { e } + +//[73] +ConstructTemplate -> Vec = '{' _ t:ConstructTriples _ '}' { t } + +//[74] +ConstructTriples -> Vec = p:ConstructTriples_item ** ('.' _) { + p.into_iter().flat_map(|c| c.into_iter()).collect() +} +ConstructTriples_item -> Vec = t:TriplesSameSubject _ { t } + +//[75] +TriplesSameSubject -> Vec = + s:VarOrTerm _ po:PropertyListNotEmpty { + let mut patterns = po.patterns; + for (p, os) in po.focus { + for o in os { + patterns.push(TriplePattern::new(s.clone(), p.clone(), o)) + } + } + patterns.into_iter().map(|p| p.into()).collect() + } / + s:TriplesNode _ po:PropertyList { + let mut patterns = s.patterns; + patterns.extend_from_slice(&po.patterns); + for (p, os) in po.focus { + for o in os { + patterns.push(TriplePattern::new(s.focus.clone(), p.clone(), o)) + } + } + patterns.into_iter().map(|p| p.into()).collect() + } + +//[76] +PropertyList -> FocusedTriplePattern)>> = + PropertyListNotEmpty / + { FocusedTriplePattern::default() } + +//[77] +PropertyListNotEmpty -> FocusedTriplePattern)>> = l:PropertyListNotEmpty_item **<1,> (';' _) { + l.into_iter().fold(FocusedTriplePattern::)>>::default(), |mut a, b| { + a.focus.push(b.focus); + a.patterns.extend_from_slice(&b.patterns); + a + }) +} +PropertyListNotEmpty_item -> FocusedTriplePattern<(NamedNodeOrVariable,Vec)> = p:Verb _ o:ObjectList _ { + FocusedTriplePattern { + focus: (p, o.focus), + patterns: o.patterns + } +} + +//[78] +Verb -> NamedNodeOrVariable = 'a' { rdf::TYPE.clone().into() } / VarOrIri + +//[79] +ObjectList -> FocusedTriplePattern> = o:ObjectList_item **<1,> (',' _) { + o.into_iter().fold(FocusedTriplePattern::>::default(), |mut a, b| { + a.focus.push(b.focus); + a.patterns.extend_from_slice(&b.patterns); + a + }) +} +ObjectList_item -> FocusedTriplePattern = o:Object _ { o } + +//[80] +Object -> FocusedTriplePattern = GraphNode + +//[81] +TriplesSameSubjectPath -> GraphPattern = + s:VarOrTerm _ po:PropertyListPathNotEmpty { + let mut patterns = po.patterns; + for (p, os) in po.focus { + for o in os { + patterns.push(PropertyPathPattern::new(s.clone(), p.clone(), o)) + } + } + flatten_group_pattern(patterns.into_iter().map(|p| p.into())) + } / + s:TriplesNodePath _ po:PropertyListPath { + let mut patterns = s.patterns; + patterns.extend_from_slice(&po.patterns); + for (p, os) in po.focus { + for o in os { + patterns.push(PropertyPathPattern::new(s.focus.clone(), p.clone(), o)) + } + } + flatten_group_pattern(patterns.into_iter().map(|p| p.into())) + } + +//[82] +PropertyListPath -> FocusedPropertyPathPattern)>> = + PropertyListPathNotEmpty / + { FocusedPropertyPathPattern::default() } + +//[83] +PropertyListPathNotEmpty -> FocusedPropertyPathPattern)>> = hp:(VerbPath / VerbSimple) _ ho:ObjectListPath _ t:PropertyListPathNotEmpty_item* { + t.into_iter().fold(FocusedPropertyPathPattern { + focus: vec![(hp, ho.focus)], + patterns: ho.patterns + }, |mut a, b| { + a.focus.push(b.focus); + a.patterns.extend(b.patterns.into_iter().map(|v| v.into())); + a + }) +} +PropertyListPathNotEmpty_item -> FocusedTriplePattern<(PropertyPath,Vec)> = ';' _ p:(VerbPath / VerbSimple) _ o:ObjectList _ { //TODO: make values after ';' optional + FocusedTriplePattern { + focus: (p, o.focus), + patterns: o.patterns + } +} + +//[84] +VerbPath -> PropertyPath = Path + +//[85] +VerbSimple -> PropertyPath = v:Var { + v.into() +} + +//[86] +ObjectListPath -> FocusedPropertyPathPattern> = o:ObjectPath_item **<1,> (',' _) { + o.into_iter().fold(FocusedPropertyPathPattern::>::default(), |mut a, b| { + a.focus.push(b.focus); + a.patterns.extend_from_slice(&b.patterns); + a + }) +} +ObjectPath_item -> FocusedPropertyPathPattern = o:ObjectPath _ { o } + +//[87] +ObjectPath -> FocusedPropertyPathPattern = GraphNodePath + +//[88] +Path -> PropertyPath = PathAlternative + +//[89] +PathAlternative -> PropertyPath = p:PathAlternative_item **<1,> ('|' _) { + if p.len() == 1 { + p[0].clone() + } else { + PropertyPath::SequencePath(p) + } +} +PathAlternative_item -> PropertyPath = p:PathSequence _ { p } + +//[90] +PathSequence -> PropertyPath = p:PathSequence_item **<1,> ('/' _) { + if p.len() == 1 { + p[0].clone() + } else { + PropertyPath::AlternativePath(p) + } +} +PathSequence_item -> PropertyPath = p:PathEltOrInverse _ { p } + +//[91] +PathElt -> PropertyPath = + p:PathPrimary '?' { PropertyPath::ZeroOrOnePath(Box::new(p)) } / //TODO: allow space before "?" + p:PathPrimary _ '*' { PropertyPath::ZeroOrMorePath(Box::new(p)) } / + p:PathPrimary _ '+' { PropertyPath::OneOrMorePath(Box::new(p)) } / + PathPrimary + +//[92] +PathEltOrInverse -> PropertyPath = + '^' _ p:PathElt { PropertyPath::InversePath(Box::new(p)) } / + PathElt + +//[94] +PathPrimary -> PropertyPath = + v:Verb { v.into() } / + '!' _ p:PathNegatedPropertySet { PropertyPath::NegatedPath(Box::new(p)) } / + '(' _ p:Path _ ')' { p } + +//[95] +PathNegatedPropertySet -> PropertyPath = + '(' _ p:PathNegatedPropertySet_item **<1,> ('|' _) ')' { PropertyPath::AlternativePath(p) } / + PathOneInPropertySet +PathNegatedPropertySet_item -> PropertyPath = p:PathOneInPropertySet _ { p } + +//[96] +PathOneInPropertySet -> PropertyPath = + '^' _ v:Verb { PropertyPath::InversePath(Box::new(v.into())) } / + v:Verb { v.into() } + +//[98] +TriplesNode -> FocusedTriplePattern = Collection / BlankNodePropertyList + +//[99] +BlankNodePropertyList -> FocusedTriplePattern = '[' _ po:PropertyListNotEmpty _ ']' { + let mut patterns: Vec = Vec::default(); + let mut bnode = TermOrVariable::from(BlankNode::default()); + for (p, os) in po.focus { + for o in os { + patterns.push(TriplePattern::new(bnode.clone(), p.clone(), o)); + } + } + FocusedTriplePattern { + focus: bnode, + patterns + } +} + +//[100] +TriplesNodePath -> FocusedPropertyPathPattern = CollectionPath / BlankNodePropertyListPath + +//[101] +BlankNodePropertyListPath -> FocusedPropertyPathPattern = '[' _ po:PropertyListPathNotEmpty _ ']' { + let mut patterns: Vec = Vec::default(); + let mut bnode = TermOrVariable::from(BlankNode::default()); + for (p, os) in po.focus { + for o in os { + patterns.push(PropertyPathPattern::new(bnode.clone(), p.clone(), o)); + } + } + FocusedPropertyPathPattern { + focus: bnode, + patterns + } +} + +//[102] +Collection -> FocusedTriplePattern = '(' _ o:Collection_item+ ')' { + let mut patterns: Vec = Vec::default(); + let mut current_list_node = TermOrVariable::from(rdf::NIL.clone()); + for objWithPatterns in o.into_iter().rev() { + let new_blank_node = TermOrVariable::from(BlankNode::default()); + patterns.push(TriplePattern::new(new_blank_node.clone(), rdf::FIRST.clone(), objWithPatterns.focus.clone())); + patterns.push(TriplePattern::new(new_blank_node.clone(), rdf::REST.clone(), current_list_node)); + current_list_node = new_blank_node; + patterns.extend_from_slice(&objWithPatterns.patterns); + } + FocusedTriplePattern { + focus: current_list_node, + patterns + } +} +Collection_item -> FocusedTriplePattern = o:GraphNode _ { o } + +//[103] +CollectionPath -> FocusedPropertyPathPattern = '(' _ o:CollectionPath_item+ _ ')' { + let mut patterns: Vec = Vec::default(); + let mut current_list_node = TermOrVariable::from(rdf::NIL.clone()); + for objWithPatterns in o.into_iter().rev() { + let new_blank_node = TermOrVariable::from(BlankNode::default()); + patterns.push(PropertyPathPattern::new(new_blank_node.clone(), rdf::FIRST.clone(), objWithPatterns.focus.clone())); + patterns.push(PropertyPathPattern::new(new_blank_node.clone(), rdf::REST.clone(), current_list_node)); + current_list_node = new_blank_node; + patterns.extend_from_slice(&objWithPatterns.patterns); + } + FocusedPropertyPathPattern { + focus: current_list_node, + patterns + } +} +CollectionPath_item -> FocusedPropertyPathPattern = p:GraphNodePath _ { p } + +//[104] +GraphNode -> FocusedTriplePattern = + t:VarOrTerm { FocusedTriplePattern::new(t) } / + TriplesNode + +//[105] +GraphNodePath -> FocusedPropertyPathPattern = + t:VarOrTerm { FocusedPropertyPathPattern::new(t.into()) } / + TriplesNodePath + +//[106] +VarOrTerm -> TermOrVariable = + v:Var { v.into() } / + t:GraphTerm { t.into() } + +//[107] +VarOrIri -> NamedNodeOrVariable = + v:Var { v.into() } / + i:iri { i.into() } + +//[108] +Var -> Variable = v:(VAR1 / VAR2) { Variable::new(v) } + +//[109] +GraphTerm -> Term = + i:iri { i.into() } / + l:RDFLiteral { l.into() } / + l:NumericLiteral { l.into() } / + l:BooleanLiteral { l.into() } / + b:BlankNode { b.into() } / + NIL { BlankNode::default().into() } + +//[110] +Expression -> Expression = e:ConditionalOrExpression {e} + +//[111] +ConditionalOrExpression -> Expression = e:ConditionalOrExpression_item **<1,> ("||" _) { + if e.len() == 1 { + e[0].clone() + } else { + Expression::OrExpression(e) + } +} +ConditionalOrExpression_item -> Expression = e:ConditionalAndExpression _ { e } + +//[112] +ConditionalAndExpression -> Expression = e:ConditionalAndExpression_item **<1,> ("&&" _) { + if e.len() == 1 { + e[0].clone() + } else { + Expression::AndExpression(e) + } +} +ConditionalAndExpression_item -> Expression = e:ValueLogical _ { e } + +//[113] +ValueLogical -> Expression = RelationalExpression + +//[114] +RelationalExpression -> Expression = + a:NumericExpression _ "=" _ b:NumericExpression { Expression::EqualExpression(Box::new(a), Box::new(b)) } / + a:NumericExpression _ "!=" _ b:NumericExpression { Expression::NotEqualExpression(Box::new(a), Box::new(b)) } / + a:NumericExpression _ ">" _ b:NumericExpression { Expression::GreaterExpression(Box::new(a), Box::new(b)) } / + a:NumericExpression _ ">=" _ b:NumericExpression { Expression::GreaterOrEqExpression(Box::new(a), Box::new(b)) } / + a:NumericExpression _ "<" _ b:NumericExpression { Expression::LowerExpression(Box::new(a), Box::new(b)) } / + a:NumericExpression _ "<=" _ b:NumericExpression { Expression::LowerOrEqExpression(Box::new(a), Box::new(b)) } / + a:NumericExpression _ "IN"i _ b:ExpressionList { Expression::InExpression(Box::new(a), b) } / + a:NumericExpression _ "NOT"i _ "IN"i _ b:ExpressionList { Expression::NotInExpression(Box::new(a), b) } / + NumericExpression + +//[115] +NumericExpression -> Expression = AdditiveExpression + +//[116] +AdditiveExpression -> Expression = + a:MultiplicativeExpression _ '+' _ b:AdditiveExpression { Expression::AddExpression(Box::new(a), Box::new(b)) } / + a:MultiplicativeExpression _ '-' _ b:AdditiveExpression { Expression::SubExpression(Box::new(a), Box::new(b)) } / + MultiplicativeExpression + +//[117] +MultiplicativeExpression -> Expression = + a:UnaryExpression _ '*' _ b:MultiplicativeExpression { Expression::MulExpression(Box::new(a), Box::new(b)) } / + a:UnaryExpression _ '/' _ b:MultiplicativeExpression { Expression::DivExpression(Box::new(a), Box::new(b)) } / + UnaryExpression + +//[118] +UnaryExpression -> Expression = + '!' _ e:PrimaryExpression { Expression::UnaryNotExpression(Box::new(e)) } / + '+' _ e:PrimaryExpression { Expression::UnaryPlusExpression(Box::new(e)) } / + '-' _ e:PrimaryExpression { Expression::UnaryMinusExpression(Box::new(e)) } / + PrimaryExpression + +//[119] +PrimaryExpression -> Expression = + BrackettedExpression / + BuiltInCall / + iriOrFunction / + l:RDFLiteral { Expression::ConstantExpression(l.into()) } / + l:NumericLiteral { Expression::ConstantExpression(l.into()) } / + l:BooleanLiteral { Expression::ConstantExpression(l.into()) } / + v:Var { Expression::ConstantExpression(v.into()) } + +//[120] +BrackettedExpression -> Expression = '(' _ e:Expression _ ')' { e } + +//[121] +BuiltInCall -> Expression = + Aggregate / + "STR"i _ '(' _ e:Expression _ ')' { Expression::StrFunctionCall(Box::new(e)) } / + "LANG"i _ '(' _ e:Expression _ ')' { Expression::LangFunctionCall(Box::new(e)) } / + "LANGMATCHES"i _ '(' _ a:Expression _ ',' _ b:Expression _ ')' { Expression::LangMatchesFunctionCall(Box::new(a), Box::new(b)) } / + "DATATYPE"i _ '(' _ e:Expression _ ')' { Expression::DatatypeFunctionCall(Box::new(e)) } / + "BOUND"i _ '(' _ v:Var _ ')' { Expression::BoundFunctionCall(v) } / + ("IRI"i / "URI"i) _ '(' _ e:Expression _ ')' { Expression::IRIFunctionCall(Box::new(e)) } / + "BNODE"i '(' _ e:Expression _ ')' { Expression::BNodeFunctionCall(Some(Box::new(e))) } / + "BNODE"i NIL { Expression::BNodeFunctionCall(None) } / + "RAND"i _ NIL { Expression::RandFunctionCall() } / + "ABS"i _ '(' _ e:Expression _ ')' { Expression::AbsFunctionCall(Box::new(e)) } / + "CEIL"i _ '(' _ e:Expression _ ')' { Expression::CeilFunctionCall(Box::new(e)) } / + "FLOOR"i _ '(' _ e:Expression _ ')' { Expression::FloorFunctionCall(Box::new(e)) } / + "ROUND"i _ '(' _ e:Expression _ ')' { Expression::RoundFunctionCall(Box::new(e)) } / + "CONCAT"i e:ExpressionList { Expression::ConcatFunctionCall(e) } / + SubstringExpression / + "STRLEN"i _ '(' _ e: Expression _ ')' { Expression::StrLenFunctionCall(Box::new(e)) } / + StrReplaceExpression / + "UCASE"i _ '(' _ e:Expression _ ')' { Expression::UCaseFunctionCall(Box::new(e)) } / + "LCASE"i _ '(' _ e:Expression _ ')' { Expression::LCaseFunctionCall(Box::new(e)) } / + 'ENCODE_FOR_URI' '(' _ e: Expression _ ')' { Expression::EncodeForURIFunctionCall(Box::new(e)) } / + "CONTAINS"i _ '(' _ a:Expression _ ',' _ b:Expression _ ')' { Expression::ContainsFunctionCall(Box::new(a), Box::new(b)) } / + "STRSTARTS"i _ '(' _ a:Expression _ ',' _ b:Expression _ ')' { Expression::StrStartsFunctionCall(Box::new(a), Box::new(b)) } / + "STRENDS"i _ '(' _ a:Expression _ ',' _ b:Expression _ ')' { Expression::StrEndsFunctionCall(Box::new(a), Box::new(b)) } / + "STRBEFORE"i _ '(' _ a:Expression _ ',' _ b:Expression _ ')' { Expression::StrBeforeFunctionCall(Box::new(a), Box::new(b)) } / + "STRAFTER"i _ '(' _ a:Expression _ ',' _ b:Expression _ ')' { Expression::StrAfterFunctionCall(Box::new(a), Box::new(b)) } / + "YEAR"i _ '(' _ e:Expression _ ')' { Expression::YearFunctionCall(Box::new(e)) } / + "MONTH"i _ '(' _ e:Expression _ ')' { Expression::MonthFunctionCall(Box::new(e)) } / + "DAY"i _ '(' _ e:Expression _ ')' { Expression::DayFunctionCall(Box::new(e)) } / + "HOURS"i _ '(' _ e:Expression _ ')' { Expression::HoursFunctionCall(Box::new(e)) } / + "MINUTES"i _ '(' _ e:Expression _ ')' { Expression::MinutesFunctionCall(Box::new(e)) } / + "SECONDS"i _ '(' _ e:Expression _ ')' { Expression::SecondsFunctionCall(Box::new(e)) } / + ("TIMEZONE"i / "TZ"i) _ '(' _ e:Expression _ ')' { Expression::TimezoneFunctionCall(Box::new(e)) } / + "NOW"i _ NIL { Expression::NowFunctionCall() } / + "UUID"i _ NIL { Expression::UUIDFunctionCall() }/ + "STRUUID"i _ NIL { Expression::StrUUIDFunctionCall() } / + "MD5"i '(' _ e:Expression _ ')' { Expression::MD5FunctionCall(Box::new(e)) } / + "SHA1"i '(' _ e:Expression _ ')' { Expression::SHA1FunctionCall(Box::new(e)) } / + "SHA256"i '(' _ e:Expression _ ')' { Expression::SHA256FunctionCall(Box::new(e)) } / + "SHA384"i '(' _ e:Expression _ ')' { Expression::SHA384FunctionCall(Box::new(e)) } / + "SHA512"i '(' _ e:Expression _ ')' { Expression::SHA512FunctionCall(Box::new(e)) } / + "COALESCE"i e:ExpressionList { Expression::CoalesceFunctionCall(e) } / + "IF"i _ '(' _ a:Expression _ ',' _ b:Expression _ ',' _ c:Expression _ ')' { Expression::IfFunctionCall(Box::new(a), Box::new(b), Box::new(c)) } / + "STRLANG"i _ '(' _ a:Expression _ ',' _ b:Expression _ ')' { Expression::StrLangFunctionCall(Box::new(a), Box::new(b)) } / + "STRDT"i _ '(' _ a:Expression _ ',' _ b:Expression _ ')' { Expression::StrDTFunctionCall(Box::new(a), Box::new(b)) } / + "sameTerm"i '(' _ a:Expression _ ',' _ b:Expression _ ')' { Expression::SameTermFunctionCall(Box::new(a), Box::new(b)) } / + ("isIRI"i / "isURI"i) _ '(' _ e:Expression _ ')' { Expression::IsIRIFunctionCall(Box::new(e)) } / + "isBLANK"i '(' _ e:Expression _ ')' { Expression::IsBlankFunctionCall(Box::new(e)) } / + "isLITERAL"i '(' _ e:Expression _ ')' { Expression::IsLiteralFunctionCall(Box::new(e)) } / + "isNUMERIC"i '(' _ e:Expression _ ')' { Expression::IsNumericFunctionCall(Box::new(e)) } / + RegexExpression / + ExistsFunc / + NotExistsFunc + +//[122] +RegexExpression -> Expression = + "REGEX"i _ '(' _ a:Expression _ ',' _ b:Expression _ ',' _ c:Expression _ ')' { Expression::RegexFunctionCall(Box::new(a), Box::new(b), Some(Box::new(c))) } / + "REGEX"i _ '(' _ a:Expression _ ',' _ b:Expression _ ')' { Expression::RegexFunctionCall(Box::new(a), Box::new(b), None) } + + +SubstringExpression -> Expression = + "SUBSTR"i _ '(' _ a:Expression _ ',' _ b:Expression _ ',' _ c:Expression _ ')' { Expression::SubStrFunctionCall(Box::new(a), Box::new(b), Some(Box::new(c))) } / + "SUBSTR"i _ '(' _ a:Expression _ ',' _ b:Expression _ ')' { Expression::SubStrFunctionCall(Box::new(a), Box::new(b), None) } + + +//[124] +StrReplaceExpression -> Expression = + "REPLACE"i _ '(' _ a:Expression _ ',' _ b:Expression _ ',' _ c:Expression _ ',' _ d:Expression _ ')' { Expression::ReplaceFunctionCall(Box::new(a), Box::new(b), Box::new(c), Some(Box::new(d))) } / + "REPLACE"i _ '(' _ a:Expression _ ',' _ b:Expression _ ',' _ c:Expression _ ')' { Expression::ReplaceFunctionCall(Box::new(a), Box::new(b), Box::new(c), None) } + +//[125] +ExistsFunc -> Expression = "EXISTS"i _ p:GroupGraphPattern { Expression::ExistsFunctionCall(Box::new(p)) } + +//[126] +NotExistsFunc -> Expression = "NOT"i _ "EXISTS"i _ p:GroupGraphPattern { Expression::NotExistsFunctionCall(Box::new(p)) } + +//[127] +Aggregate -> Expression = + "COUNT"i _ '(' _ "DISTINCT"i _ '*' _ ')' { Expression::CountAggregate(None, true) } / + "COUNT"i _ '(' _ "DISTINCT"i _ e:Expression _ ')' { Expression::CountAggregate(Some(Box::new(e)), true) } / + "COUNT"i _ '(' _ '*' _ ')' { Expression::CountAggregate(None, false) } / + "COUNT"i _ '(' _ e:Expression _ ')' { Expression::CountAggregate(Some(Box::new(e)), false) } / + "SUM"i _ '(' _ "DISTINCT"i _ e:Expression _ ')' { Expression::SumAggregate(Box::new(e), true) } / + "SUM"i _ '(' _ e:Expression _ ')' { Expression::SumAggregate(Box::new(e), false) } / + "MIN"i _ '(' _ "DISTINCT"i _ e:Expression _ ')' { Expression::MinAggregate(Box::new(e), true) } / + "MIN"i _ '(' _ e:Expression _ ')' { Expression::MinAggregate(Box::new(e), false) } / + "MAX"i _ '(' _ "DISTINCT"i _ e:Expression _ ')' { Expression::MaxAggregate(Box::new(e), true) } / + "MAX"i _ '(' _ e:Expression _ ')' { Expression::MaxAggregate(Box::new(e), false) } / + "AVG"i _ '(' _ "DISTINCT"i _ e:Expression _ ')' { Expression::AvgAggregate(Box::new(e), true) } / + "AVG"i _ '(' _ e:Expression _ ')' { Expression::AvgAggregate(Box::new(e), false) } / + "SAMPLE"i _ '(' _ "DISTINCT"i _ e:Expression _ ')' { Expression::SampleAggregate(Box::new(e), true) } / + "SAMPLE"i _ '(' _ e:Expression _ ')' { Expression::SampleAggregate(Box::new(e), false) } / + "GROUP_CONCAT"i _ '(' _ "DISTINCT"i _ e:Expression _ ';' _ 'SEPARATOR'i _ '=' _ s:String _ ')' { Expression::GroupConcatAggregate(Box::new(e), true, Some(s)) } / + "GROUP_CONCAT"i _ '(' _ "DISTINCT"i _ e:Expression _ ')' { Expression::GroupConcatAggregate(Box::new(e), true, None) } / + "GROUP_CONCAT"i _ '(' _ e:Expression _ ';' _ 'SEPARATOR'i _ '=' _ s:String _ ')' { Expression::GroupConcatAggregate(Box::new(e), true, Some(s)) } / + "GROUP_CONCAT"i _ '(' _ e:Expression _ ')' { Expression::GroupConcatAggregate(Box::new(e), false, None) } + +//[128] +iriOrFunction -> Expression = + FunctionCall / + i:iri { Expression::ConstantExpression(i.into()) } + +//[129] +RDFLiteral -> Literal = + v:String _ "^^" _ t:iri { Literal::new_typed_literal(v, t) } / + v:String _ l:LANGTAG { Literal::new_language_tagged_literal(v, l) } / + v:String { v.into() } + +//[130] +NumericLiteral -> Literal = NumericLiteralUnsigned / NumericLiteralPositive / NumericLiteralNegative + +//[131] +NumericLiteralUnsigned -> Literal = + d:$(DOUBLE) { Literal::new_typed_literal(d, xsd::DOUBLE.clone()) } / + d:$(DECIMAL) { Literal::new_typed_literal(d, xsd::DECIMAL.clone()) } / + i:$(INTEGER) { Literal::new_typed_literal(i, xsd::INTEGER.clone()) } + +//[132] +NumericLiteralPositive -> Literal = + d:$(DOUBLE_POSITIVE) { Literal::new_typed_literal(d, xsd::DOUBLE.clone()) } / + d:$(DECIMAL_POSITIVE) { Literal::new_typed_literal(d, xsd::DECIMAL.clone()) } / + i:$(INTEGER_POSITIVE) { Literal::new_typed_literal(i, xsd::INTEGER.clone()) } + + +//[133] +NumericLiteralNegative -> Literal = + d:$(DOUBLE_NEGATIVE) { Literal::new_typed_literal(d, xsd::DOUBLE.clone()) } / + d:$(DECIMAL_NEGATIVE) { Literal::new_typed_literal(d, xsd::DECIMAL.clone()) } / + i:$(INTEGER_NEGATIVE) { Literal::new_typed_literal(i, xsd::INTEGER.clone()) } + +//[134] +BooleanLiteral -> Literal = + "true" { true.into() } / + "false" { false.into() } + +//[135] +String -> String = STRING_LITERAL1 / STRING_LITERAL2 / STRING_LITERAL_LONG1 / STRING_LITERAL_LONG2 + +//[136] +iri -> NamedNode = i:(IRIREF / PrefixedName) {? + match state.url_parser().parse(&i) { + Ok(url) => Ok(NamedNode::new(url)), + Err(error) => Err("IRI parsing failed") + } +} + +//[137] +PrefixedName -> String = PNAME_LN / + ns:PNAME_NS {? state.namespaces.get(ns).map(|v| v.clone()).ok_or("Prefix not found") } + +//[138] +BlankNode -> BlankNode = + b:BLANK_NODE_LABEL { state.bnodes_map.entry(b.to_string()).or_insert_with(BlankNode::default).clone() } / + ANON { BlankNode::default() } + +//[139] +IRIREF -> String = "<" i:$(([^\u{00}-\u{20}<>"{}|^\u{60}\u{5c}])*) ">" { + i.to_owned() +} + +//[140] +PNAME_NS -> &'input str = ns:$(PN_PREFIX? ":") { + ns +} + +//[141] +PNAME_LN -> String = ns:$(PNAME_NS) local:PN_LOCAL {? + state.namespaces.get(ns).map(|v| v.clone() + &local).ok_or("Prefix not found") +} + +//[142] +BLANK_NODE_LABEL -> &'input str = "_:" b:$(([0-9] / PN_CHARS_U) PN_CHARS* ("."+ PN_CHARS+)*) { + b +} + +//[143] +VAR1 -> &'input str = '?' v:$(VARNAME) { v } + +//[144] +VAR2 -> &'input str = '$' v:$(VARNAME) { v } + +//[145] +LANGTAG -> &'input str = "@" l:$([a-zA-Z]+ ("-" [a-zA-Z0-9]+)*) { + l +} + +//[146] +INTEGER -> () = [0-9]+ + +//[147] +DECIMAL -> () = [0-9]* '.' [0-9]+ + +//[148] +DOUBLE -> () = ([0-9]+ "." [0-9]* / "."? [0-9]+) EXPONENT + +//[149] +INTEGER_POSITIVE -> () = '+' _ INTEGER + +//[150] +DECIMAL_POSITIVE -> () = '+' _ DECIMAL + +//[151] +DOUBLE_POSITIVE -> () = '+' _ DOUBLE + +//[152] +INTEGER_NEGATIVE -> () = '-' _ INTEGER + +//[153] +DECIMAL_NEGATIVE -> () = '-' _ DECIMAL + +//[154] +DOUBLE_NEGATIVE -> () = '-' _ DOUBLE + +//[155] +EXPONENT -> () = [eE] [+-]? [0-9]+ + +//[156] +STRING_LITERAL1 -> String = "'" l:((STRING_LITERAL1_simple_char / ECHAR)*) "'" { + l.into_iter().collect() +} +STRING_LITERAL1_simple_char -> char = c:$([^'\u{005c}\u{000a}\u{000d}]) { c.chars().next().unwrap() } + + +//[157] +STRING_LITERAL2 -> String = "\"" l:((STRING_LITERAL2_simple_char / ECHAR)*) "\"" { + l.into_iter().collect() +} +STRING_LITERAL2_simple_char -> char = c:$([^"\u{005c}\u{000a}\u{000d}]) { c.chars().next().unwrap() } + +//[158] +STRING_LITERAL_LONG1 -> String = "'''" l:(STRING_LITERAL_LONG1_inner*) "'''" { + l.into_iter().collect() +} +STRING_LITERAL_LONG1_inner -> String = a:$(("''" / "'")?) b:(STRING_LITERAL_LONG1_simple_char / ECHAR) { + let mut s = a.to_string(); + s.push(b); + s +} +STRING_LITERAL_LONG1_simple_char -> char = c:$([^'\u{005c}]) { c.chars().next().unwrap() } + +//[159] +STRING_LITERAL_LONG2 -> String = "\"\"\"" l:(STRING_LITERAL_LONG2_inner*) "\"\"\"" { + l.into_iter().collect() +} +STRING_LITERAL_LONG2_inner -> String = a:$(("\"\"" / "\"")?) b:(STRING_LITERAL_LONG2_simple_char / ECHAR) { + let mut s = a.to_string(); + s.push(b); + s +} +STRING_LITERAL_LONG2_simple_char -> char = c:$([^"\u{005c}]) { c.chars().next().unwrap() } + +//[160] +ECHAR -> char = "\\" c:$([tbnrf"'\\]) { + match c { + "t" => '\u{0009}', + "b" => '\u{0008}', + "n" => '\u{000A}', + "r" => '\u{000D}', + "f" => '\u{000C}', + "\"" => '\u{0022}', + "'" => '\u{0027}', + "\\" => '\u{005C}', + _ => panic!("unexpected escaped char") // not possible + } +} + +//[161] +NIL -> () = "(" WS* ")" + +//[162] +WS -> () = #quiet<[\u{20}\u{9}\u{D}\u{A}]> + +//[163] +ANON -> () = '[' WS* ']' + +//[164] +PN_CHARS_BASE -> () = [A-Za-z\u{00C0}-\u{00D6}\u{00D8}-\u{00F6}\u{00F8}-\u{02FF}\u{0370}-\u{037D}\u{037F}-\u{1FFF}\u{200C}-\u{200D}\u{2070}-\u{218F}\u{2C00}-\u{2FEF}\u{3001}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFFD}] + +//[165] +PN_CHARS_U -> () = '_' / PN_CHARS_BASE + +//[166] +VARNAME -> () = ([0-9] / PN_CHARS_U) ([0-9\u{00B7}\u{0300}-\u{036F}\u{203F}-\u{2040}] / PN_CHARS_U)* + +//[167] +PN_CHARS -> () = [\-0-9\u{00B7}\u{0300}-\u{036F}\u{203F}-\u{2040}] / PN_CHARS_U + +//[168] +PN_PREFIX -> () = PN_CHARS_BASE PN_CHARS* ("."+ PN_CHARS+)* + +//[169] +PN_LOCAL -> String = f:PN_LOCAL_first c:(PN_LOCAL_next*) e:(PN_LOCAL_next_dot*) { + f.to_string() + &c.concat() + &e.concat() +} +PN_LOCAL_first -> String = + c:$(":" / [0-9] / PN_CHARS_U) { c.into() } / + PLX +PN_LOCAL_next -> String = + c:$(":" / PN_CHARS) { c.into() } / + PLX +PN_LOCAL_next_dot -> String = d:$('.'+) f:PN_LOCAL_next* { d.to_string() + &f.concat()} + +//[170] +PLX -> String = + p:$(PERCENT) { p.into() } / + e:PN_LOCAL_ESC { iter::once(e).collect() } + +//[171] +PERCENT -> () = "%" HEX HEX + +//[172] +HEX -> () = ([0-9A-Fa-f]) + +//[173] +PN_LOCAL_ESC -> char = "\\" c:$([_~\.\-!$&'()*+,;=/?#@%:]) { c.chars().next().unwrap() } //TODO: added '/' to make tests pass but is it valid? + +//space +_ = #quiet<([ \t\n\r] / comment)*> +//comment +comment = #quiet<"#" [^\r\n]*> diff --git a/src/utils.rs b/src/utils.rs index eb0960b5..5f1a00fb 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -8,6 +8,12 @@ impl<'a> Escaper for &'a str { } } +impl Escaper for String { + fn escape(&self) -> String { + self.chars().flat_map(|c| EscapeRDF::new(c)).collect() + } +} + /// Customized version of EscapeDefault of the Rust standard library struct EscapeRDF { state: EscapeRdfState, diff --git a/tests/client.rs b/tests/client.rs new file mode 100644 index 00000000..2b8f5859 --- /dev/null +++ b/tests/client.rs @@ -0,0 +1,52 @@ +extern crate reqwest; +extern crate rudf; +extern crate url; + +use reqwest::Client; +use reqwest::Response; +use rudf::rio::ntriples::read_ntriples; +use rudf::rio::turtle::read_turtle; +use rudf::rio::RioError; +use rudf::rio::RioResult; +use rudf::sparql::ast::Query; +use rudf::sparql::parser::read_sparql_query; +use rudf::store::memory::MemoryGraph; +use std::error::Error; +use url::Url; + +pub struct RDFClient { + client: Client, +} + +impl Default for RDFClient { + fn default() -> Self { + Self { + client: Client::new(), + } + } +} + +impl RDFClient { + pub fn load_turtle(&self, url: Url) -> RioResult { + Ok(read_turtle(self.get(&url)?, Some(url))?.collect()) + } + + pub fn load_ntriples(&self, url: Url) -> RioResult { + read_ntriples(self.get(&url)?).collect() + } + + pub fn load_sparql_query(&self, url: Url) -> RioResult { + read_sparql_query(self.get(&url)?, Some(url)) + } + + fn get(&self, url: &Url) -> RioResult { + match self.client.get(url.clone()).send() { + Ok(response) => Ok(response), + Err(error) => if error.description() == "message is incomplete" { + self.get(url) + } else { + Err(RioError::new(error)) + }, + } + } +} diff --git a/tests/rdf_test_cases.rs b/tests/rdf_test_cases.rs index eea5764e..dafd1a0f 100644 --- a/tests/rdf_test_cases.rs +++ b/tests/rdf_test_cases.rs @@ -1,57 +1,21 @@ +///! Integration tests based on [RDF 1.1 Test Cases](https://www.w3.org/TR/rdf11-testcases/) + #[macro_use] extern crate lazy_static; extern crate reqwest; extern crate rudf; extern crate url; -use reqwest::Client; -use reqwest::Response; +mod client; + +use client::RDFClient; use rudf::model::data::*; use rudf::model::vocab::rdf; use rudf::model::vocab::rdfs; -use rudf::rio::ntriples::read_ntriples; -use rudf::rio::turtle::read_turtle; -use rudf::rio::RioError; -use rudf::rio::RioResult; use rudf::store::isomorphism::GraphIsomorphism; -use rudf::store::memory::MemoryGraph; -use std::error::Error; use std::str::FromStr; use url::Url; -struct RDFClient { - client: Client, -} - -impl Default for RDFClient { - fn default() -> Self { - Self { - client: Client::new(), - } - } -} - -impl RDFClient { - fn load_turtle(&self, url: Url) -> RioResult { - Ok(read_turtle(self.get(&url)?, Some(url))?.collect()) - } - - fn load_ntriples(&self, url: Url) -> RioResult { - read_ntriples(self.get(&url)?).collect() - } - - fn get(&self, url: &Url) -> RioResult { - match self.client.get(url.clone()).send() { - Ok(response) => Ok(response), - Err(error) => if error.description() == "message is incomplete" { - self.get(url) - } else { - Err(RioError::new(error)) - }, - } - } -} - mod mf { use rudf::model::data::NamedNode; use std::str::FromStr; diff --git a/tests/sparql_test_cases.rs b/tests/sparql_test_cases.rs new file mode 100644 index 00000000..43001b5b --- /dev/null +++ b/tests/sparql_test_cases.rs @@ -0,0 +1,69 @@ +///! Integration tests based on [SPARQL 1.1 Test Cases](https://www.w3.org/2009/sparql/docs/tests/) + +#[macro_use] +extern crate lazy_static; +extern crate reqwest; +extern crate rudf; +extern crate url; + +mod client; + +use client::RDFClient; +use rudf::model::data::*; +use rudf::model::vocab::rdf; +use rudf::sparql::parser::read_sparql_query; +use url::Url; + +mod mf { + use rudf::model::data::NamedNode; + use std::str::FromStr; + + lazy_static! { + pub static ref ACTION: NamedNode = NamedNode::from_str( + "http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#action" + ).unwrap(); + pub static ref RESULT: NamedNode = NamedNode::from_str( + "http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#result" + ).unwrap(); + pub static ref POSITIVE_SYNTAX_TEST_11: NamedNode = NamedNode::from_str( + "http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#PositiveSyntaxTest11" + ).unwrap(); + } +} + +#[test] +fn sparql_w3c_syntax_testsuite() { + let manifest_url = Url::parse( + "http://www.w3.org/2009/sparql/docs/tests/data-sparql11/syntax-query/manifest.ttl", + ).unwrap(); + let client = RDFClient::default(); + let manifest = client.load_turtle(manifest_url.clone()).unwrap(); + let mf_positive_syntax_test = Term::from(mf::POSITIVE_SYNTAX_TEST_11.clone()); + + manifest + .subjects_for_predicate_object(&rdf::TYPE, &mf_positive_syntax_test) + .for_each(|test| { + if let Some(Term::NamedNode(file)) = + manifest.object_for_subject_predicate(test, &mf::ACTION) + { + match client.load_sparql_query(file.url().clone()) { + Err(error) => assert!( + false, + "Failure on positive syntax file {} with error: {}", + file, error + ), + Ok(query) => { + if let Err(error) = read_sparql_query(query.to_string().as_bytes(), None) { + assert!( + false, + "Failure tu deserialize \"{}\" of file {} with error: {}", + query.to_string(), + file, + error + ) + } + } + } + } + }); +}