Fixes bugs in SPARQL parser and makes it slightly faster

pull/10/head
Tpt 5 years ago
parent 45d78d490d
commit 27d0c2a67a
  1. 4
      lib/src/sparql/parser.rs
  2. 92
      lib/src/sparql/sparql_grammar.rustpeg
  3. 10
      lib/tests/sparql_test_cases.rs

@ -17,8 +17,8 @@ mod grammar {
use lazy_static::lazy_static; use lazy_static::lazy_static;
use std::borrow::Cow; use std::borrow::Cow;
use std::char; use std::char;
use std::collections::BTreeMap;
use std::collections::HashMap; use std::collections::HashMap;
use std::collections::{BTreeMap, BTreeSet};
use std::str::Chars; use std::str::Chars;
struct FocusedTriplePattern<F> { struct FocusedTriplePattern<F> {
@ -295,6 +295,7 @@ mod grammar {
base_iri: Option<Iri>, base_iri: Option<Iri>,
namespaces: HashMap<String, String>, namespaces: HashMap<String, String>,
bnodes_map: BTreeMap<String, BlankNode>, bnodes_map: BTreeMap<String, BlankNode>,
used_bnodes: BTreeSet<String>,
aggregations: BTreeMap<Aggregation, Variable>, aggregations: BTreeMap<Aggregation, Variable>,
} }
@ -543,6 +544,7 @@ mod grammar {
}, },
namespaces: HashMap::default(), namespaces: HashMap::default(),
bnodes_map: BTreeMap::default(), bnodes_map: BTreeMap::default(),
used_bnodes: BTreeSet::default(),
aggregations: BTreeMap::default(), aggregations: BTreeMap::default(),
}; };

@ -240,16 +240,23 @@ GroupGraphPatternSub -> GraphPattern = a:TriplesBlock? _ b:GroupGraphPatternSub_
PartialGraphPattern::Bind(expr, var) => { PartialGraphPattern::Bind(expr, var) => {
g = GraphPattern::Extend(Box::new(g), var, expr) g = GraphPattern::Extend(Box::new(g), var, expr)
} }
PartialGraphPattern::Filter(expr) => match filter { PartialGraphPattern::Filter(expr) => filter = Some(if let Some(f) = filter {
Some(f) => { filter = Some(Expression::And(Box::new(f), Box::new(expr))) }, Expression::And(Box::new(f), Box::new(expr))
None => { filter = Some(expr) } } else {
}, expr
}),
PartialGraphPattern::Other(e) => g = new_join(g, e), PartialGraphPattern::Other(e) => g = new_join(g, e),
} }
} }
match filter {
Some(filter) => GraphPattern::Filter(filter, Box::new(g)), // We deal with blank nodes aliases rule (TODO: partial for now)
None => g state.used_bnodes.extend(state.bnodes_map.keys().cloned());
state.bnodes_map.clear();
if let Some(filter) = filter {
GraphPattern::Filter(filter, Box::new(g))
} else {
g
} }
} }
GroupGraphPatternSub_item -> Vec<PartialGraphPattern> = a:GraphPatternNotTriples _ ('.' _)? b:TriplesBlock? _ { GroupGraphPatternSub_item -> Vec<PartialGraphPattern> = a:GraphPatternNotTriples _ ('.' _)? b:TriplesBlock? _ {
@ -353,7 +360,7 @@ FunctionCall -> Expression = f: iri _ a: ArgList {
//[71] //[71]
ArgList -> Vec<Expression> = //TODO: support DISTINCT ArgList -> Vec<Expression> = //TODO: support DISTINCT
'(' _ 'DISTINCT'? _ e:ArgList_item **<1,> (',' _) _ ')' { e } / '(' _ 'DISTINCT'i? _ e:ArgList_item **<1,> (',' _) _ ')' { e } /
NIL { Vec::new() } NIL { Vec::new() }
ArgList_item -> Expression = e:Expression _ { e } ArgList_item -> Expression = e:Expression _ { e }
@ -367,7 +374,7 @@ ExpressionList_item -> Expression = e:Expression _ { e }
ConstructTemplate -> Vec<TriplePattern> = '{' _ t:ConstructTriples _ '}' { t } ConstructTemplate -> Vec<TriplePattern> = '{' _ t:ConstructTriples _ '}' { t }
//[74] //[74]
ConstructTriples -> Vec<TriplePattern> = p:ConstructTriples_item ** ('.' _) { ConstructTriples -> Vec<TriplePattern> = p:ConstructTriples_item ** ('.' _) '.'? {
p.into_iter().flat_map(|c| c.into_iter()).collect() p.into_iter().flat_map(|c| c.into_iter()).collect()
} }
ConstructTriples_item -> Vec<TriplePattern> = t:TriplesSameSubject _ { t } ConstructTriples_item -> Vec<TriplePattern> = t:TriplesSameSubject _ { t }
@ -415,7 +422,7 @@ PropertyListNotEmpty_item -> FocusedTriplePattern<(NamedNodeOrVariable,Vec<TermO
} }
//[78] //[78]
Verb -> NamedNodeOrVariable = 'a' { rdf::TYPE.clone().into() } / VarOrIri Verb -> NamedNodeOrVariable = VarOrIri / 'a' { rdf::TYPE.clone().into() }
//[79] //[79]
ObjectList -> FocusedTriplePattern<Vec<TermOrVariable>> = o:ObjectList_item **<1,> (',' _) { ObjectList -> FocusedTriplePattern<Vec<TermOrVariable>> = o:ObjectList_item **<1,> (',' _) {
@ -534,8 +541,8 @@ PathEltOrInverse -> PropertyPath =
//[94] //[94]
PathPrimary -> PropertyPath = PathPrimary -> PropertyPath =
'a' { rdf::TYPE.clone().into() } /
v:iri { v.into() } / v:iri { v.into() } /
'a' { rdf::TYPE.clone().into() } /
'!' _ p:PathNegatedPropertySet { p } / '!' _ p:PathNegatedPropertySet { p } /
'(' _ p:Path _ ')' { p } '(' _ p:Path _ ')' { p }
@ -571,10 +578,10 @@ PathNegatedPropertySet_item -> Either<NamedNode,NamedNode> = p:PathOneInProperty
//[96] //[96]
PathOneInPropertySet -> Either<NamedNode,NamedNode> = PathOneInPropertySet -> Either<NamedNode,NamedNode> =
'^' _ 'a' { Either::Right(rdf::TYPE.clone()) } /
'^' _ v:iri { Either::Right(v) } / '^' _ v:iri { Either::Right(v) } /
'a' { Either::Left(rdf::TYPE.clone()) } / '^' _ 'a' { Either::Right(rdf::TYPE.clone()) } /
v:iri { Either::Left(v) } v:iri { Either::Left(v) } /
'a' { Either::Left(rdf::TYPE.clone()) }
//[98] //[98]
TriplesNode -> FocusedTriplePattern<TermOrVariable> = Collection / BlankNodePropertyList TriplesNode -> FocusedTriplePattern<TermOrVariable> = Collection / BlankNodePropertyList
@ -700,12 +707,17 @@ ValueLogical -> Expression = RelationalExpression
//[114] //[114]
RelationalExpression -> Expression = RelationalExpression -> Expression =
a:NumericExpression _ "=" _ b:NumericExpression { Expression::Equal(Box::new(a), Box::new(b)) } / a:NumericExpression _ s: $("=" / "!=" / ">=" / ">" / "<=" / "<") _ b:NumericExpression {
a:NumericExpression _ "!=" _ b:NumericExpression { Expression::NotEqual(Box::new(a), Box::new(b)) } / match s {
a:NumericExpression _ ">" _ b:NumericExpression { Expression::Greater(Box::new(a), Box::new(b)) } / "=" => Expression::Equal(Box::new(a), Box::new(b)),
a:NumericExpression _ ">=" _ b:NumericExpression { Expression::GreaterOrEq(Box::new(a), Box::new(b)) } / "!=" => Expression::NotEqual(Box::new(a), Box::new(b)),
a:NumericExpression _ "<" _ b:NumericExpression { Expression::Lower(Box::new(a), Box::new(b)) } / ">" => Expression::Greater(Box::new(a), Box::new(b)),
a:NumericExpression _ "<=" _ b:NumericExpression { Expression::LowerOrEq(Box::new(a), Box::new(b)) } / ">=" => Expression::GreaterOrEq(Box::new(a), Box::new(b)),
"<" => Expression::Lower(Box::new(a), Box::new(b)),
"<=" => Expression::LowerOrEq(Box::new(a), Box::new(b)),
_ => panic!() // Could never happen
}
} /
a:NumericExpression _ "IN"i _ b:ExpressionList { Expression::In(Box::new(a), b) } / a:NumericExpression _ "IN"i _ b:ExpressionList { Expression::In(Box::new(a), b) } /
a:NumericExpression _ "NOT"i _ "IN"i _ b:ExpressionList { Expression::NotIn(Box::new(a), b) } / a:NumericExpression _ "NOT"i _ "IN"i _ b:ExpressionList { Expression::NotIn(Box::new(a), b) } /
NumericExpression NumericExpression
@ -715,22 +727,28 @@ NumericExpression -> Expression = AdditiveExpression
//[116] //[116]
AdditiveExpression -> Expression = AdditiveExpression -> Expression =
a:MultiplicativeExpression _ '+' _ b:AdditiveExpression { Expression::Add(Box::new(a), Box::new(b)) } / a:MultiplicativeExpression _ s: $('+' / '-') _ b:AdditiveExpression { match s {
a:MultiplicativeExpression _ '-' _ b:AdditiveExpression { Expression::Sub(Box::new(a), Box::new(b)) } / "+" => Expression::Add(Box::new(a), Box::new(b)),
MultiplicativeExpression "-" => Expression::Sub(Box::new(a), Box::new(b)),
_ => panic!() // Could never happen
} } / MultiplicativeExpression
//[117] //[117]
MultiplicativeExpression -> Expression = MultiplicativeExpression -> Expression =
a:UnaryExpression _ '*' _ b:MultiplicativeExpression { Expression::Mul(Box::new(a), Box::new(b)) } / a:UnaryExpression _ s: $('*' / '/') _ b:MultiplicativeExpression { match s {
a:UnaryExpression _ '/' _ b:MultiplicativeExpression { Expression::Div(Box::new(a), Box::new(b)) } / "*" => Expression::Mul(Box::new(a), Box::new(b)),
UnaryExpression "/" => Expression::Div(Box::new(a), Box::new(b)),
_ => panic!() // Could never happen
} } / UnaryExpression
//[118] //[118]
UnaryExpression -> Expression = UnaryExpression -> Expression = s: $('!' / '+' / '-')? _ e:PrimaryExpression { match s {
'!' _ e:PrimaryExpression { Expression::UnaryNot(Box::new(e)) } / "!" => Expression::UnaryNot(Box::new(e)),
'+' _ e:PrimaryExpression { Expression::UnaryPlus(Box::new(e)) } / "+" => Expression::UnaryPlus(Box::new(e)),
'-' _ e:PrimaryExpression { Expression::UnaryMinus(Box::new(e)) } / "-" => Expression::UnaryMinus(Box::new(e)),
PrimaryExpression "" => e,
_ => panic!() // Could never happen
} }
//[119] //[119]
PrimaryExpression -> Expression = PrimaryExpression -> Expression =
@ -913,7 +931,13 @@ PrefixedName -> Iri = PNAME_LN /
//[138] //[138]
BlankNode -> BlankNode = BlankNode -> BlankNode =
b:BLANK_NODE_LABEL { state.bnodes_map.entry(b.to_string()).or_insert_with(BlankNode::default).clone() } / b:BLANK_NODE_LABEL {?
if state.used_bnodes.contains(b) {
Err("Already used blank node id")
} else {
Ok(state.bnodes_map.entry(b.to_string()).or_insert_with(BlankNode::default).clone())
}
} /
ANON { BlankNode::default() } ANON { BlankNode::default() }
//[139] //[139]
@ -922,12 +946,12 @@ IRIREF -> Iri = "<" i:$(([^\u{00}-\u{20}<>"{}|^\u{60}\u{5c}])*) ">" {?
} }
//[140] //[140]
PNAME_NS -> &'input str = ns:$(PN_PREFIX? ":") { PNAME_NS -> &'input str = ns:$(PN_PREFIX?) ':' {
ns ns
} }
//[141] //[141]
PNAME_LN -> Iri = ns:$(PNAME_NS) local:$(PN_LOCAL) {? PNAME_LN -> Iri = ns:PNAME_NS local:$(PN_LOCAL) {?
if let Some(iri) = state.namespaces.get(ns).map(|v| v.clone() + &unescape_pn_local(local)) { if let Some(iri) = state.namespaces.get(ns).map(|v| v.clone() + &unescape_pn_local(local)) {
Iri::parse(iri).map_err(|_| "IRI parsing failed") Iri::parse(iri).map_err(|_| "IRI parsing failed")
} else { } else {

@ -17,19 +17,9 @@ fn sparql_w3c_syntax_testsuite() -> Result<()> {
let manifest_10_url = "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/manifest-syntax.ttl"; let manifest_10_url = "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/manifest-syntax.ttl";
let manifest_11_url = let manifest_11_url =
"http://www.w3.org/2009/sparql/docs/tests/data-sparql11/syntax-query/manifest.ttl"; "http://www.w3.org/2009/sparql/docs/tests/data-sparql11/syntax-query/manifest.ttl";
let test_blacklist = vec![
NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/syntax-sparql2/manifest#syntax-form-construct02").unwrap(),
NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/syntax-sparql2/manifest#syntax-form-construct04").unwrap(),
NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/syntax-sparql2/manifest#syntax-function-04").unwrap(),
NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/syntax-sparql1/manifest#syntax-qname-04").unwrap(),
];
for test_result in TestManifest::new(manifest_10_url).chain(TestManifest::new(manifest_11_url)) for test_result in TestManifest::new(manifest_10_url).chain(TestManifest::new(manifest_11_url))
{ {
let test = test_result.unwrap(); let test = test_result.unwrap();
if test_blacklist.contains(&test.id) {
continue;
}
if test.kind == "PositiveSyntaxTest" || test.kind == "PositiveSyntaxTest11" { if test.kind == "PositiveSyntaxTest" || test.kind == "PositiveSyntaxTest11" {
match Query::parse(&read_file_to_string(&test.query)?, Some(&test.query)) { match Query::parse(&read_file_to_string(&test.query)?, Some(&test.query)) {
Err(error) => assert!(false, "Failure on {} with error: {}", test, error), Err(error) => assert!(false, "Failure on {} with error: {}", test, error),

Loading…
Cancel
Save