From 7ed4252ad8a91bf1d31a92cb6a9dd12101a400cc Mon Sep 17 00:00:00 2001 From: Tpt Date: Tue, 18 Sep 2018 17:37:37 +0200 Subject: [PATCH] Adds beginning of SPARQL evaluation --- src/model/dataset.rs | 4 + src/model/named_node.rs | 6 + src/sparql/algebra.rs | 155 +++++++++++--- src/sparql/mod.rs | 2 +- src/sparql/model.rs | 64 ------ src/sparql/parser.rs | 1 - src/sparql/sparql_grammar.rustpeg | 42 ++-- src/sparql/xml_results.rs | 333 ++++++++++++++++++++++++++++++ src/store/mod.rs | 1 + src/store/sparql.rs | 315 ++++++++++++++++++++++++++++ src/store/store.rs | 11 +- tests/sparql_test_cases.rs | 242 +++++++++++++++++++--- 12 files changed, 1023 insertions(+), 153 deletions(-) delete mode 100644 src/sparql/model.rs create mode 100644 src/sparql/xml_results.rs create mode 100644 src/store/sparql.rs diff --git a/src/model/dataset.rs b/src/model/dataset.rs index fb2009bb..df5584b0 100644 --- a/src/model/dataset.rs +++ b/src/model/dataset.rs @@ -1,5 +1,7 @@ use errors::*; use model::*; +use sparql::algebra::QueryResult; +use std::io::Read; /// Trait for [RDF graphs](https://www.w3.org/TR/rdf11-concepts/#dfn-graph) pub trait Graph { @@ -148,4 +150,6 @@ pub trait Dataset { fn len(&self) -> Result; fn is_empty(&self) -> Result; + + fn query(&self, query: impl Read) -> Result; } diff --git a/src/model/named_node.rs b/src/model/named_node.rs index 88b5ac90..2bcb90f4 100644 --- a/src/model/named_node.rs +++ b/src/model/named_node.rs @@ -48,6 +48,12 @@ impl From for NamedNode { } } +impl From for Url { + fn from(named_node: NamedNode) -> Self { + Arc::try_unwrap(named_node.iri).unwrap_or_else(|iri| (*iri).clone()) + } +} + impl FromStr for NamedNode { type Err = Error; diff --git a/src/sparql/algebra.rs b/src/sparql/algebra.rs index 9198c660..ef7fc863 100644 --- a/src/sparql/algebra.rs +++ b/src/sparql/algebra.rs @@ -1,9 +1,10 @@ +use errors::*; use model::*; -use sparql::model::*; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::fmt; use std::ops::Add; +use store::MemoryGraph; use utils::Escaper; use uuid::Uuid; @@ -25,6 +26,13 @@ impl Variable { _ => false, } } + + pub fn name(&self) -> Result<&str> { + match self { + Variable::Variable { name } => Ok(name), + _ => Err(format!("The variable {} has no name", self).into()), + } + } } impl fmt::Display for Variable { @@ -78,16 +86,14 @@ impl From for NamedNodeOrVariable { #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] pub enum TermOrVariable { - NamedNode(NamedNode), - Literal(Literal), + Term(Term), Variable(Variable), } impl fmt::Display for TermOrVariable { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - TermOrVariable::NamedNode(node) => write!(f, "{}", node), - TermOrVariable::Literal(node) => write!(f, "{}", node), + TermOrVariable::Term(term) => write!(f, "{}", term), TermOrVariable::Variable(var) => write!(f, "{}", var), } } @@ -95,7 +101,7 @@ impl fmt::Display for TermOrVariable { impl From for TermOrVariable { fn from(node: NamedNode) -> Self { - TermOrVariable::NamedNode(node) + TermOrVariable::Term(node.into()) } } @@ -107,7 +113,7 @@ impl From for TermOrVariable { impl From for TermOrVariable { fn from(literal: Literal) -> Self { - TermOrVariable::Literal(literal) + TermOrVariable::Term(literal.into()) } } @@ -120,9 +126,9 @@ impl From for TermOrVariable { impl From for TermOrVariable { fn from(term: Term) -> Self { match term { - Term::NamedNode(node) => TermOrVariable::NamedNode(node), + Term::NamedNode(node) => TermOrVariable::Term(node.into()), Term::BlankNode(node) => TermOrVariable::Variable(node.into()), - Term::Literal(literal) => TermOrVariable::Literal(literal), + Term::Literal(literal) => TermOrVariable::Term(literal.into()), } } } @@ -130,12 +136,87 @@ impl From for TermOrVariable { impl From for TermOrVariable { fn from(element: NamedNodeOrVariable) -> Self { match element { - NamedNodeOrVariable::NamedNode(node) => TermOrVariable::NamedNode(node), + NamedNodeOrVariable::NamedNode(node) => TermOrVariable::Term(node.into()), NamedNodeOrVariable::Variable(var) => TermOrVariable::Variable(var), } } } +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] +pub struct StaticBindings { + variables: Vec, + values: Vec>>, +} + +impl StaticBindings { + pub fn new(variables: Vec, values: Vec>>) -> Self { + Self { variables, values } + } + + pub fn variables(&self) -> &[Variable] { + &*self.variables + } + + pub fn variables_iter(&self) -> impl Iterator { + self.variables.iter() + } + + pub fn values_iter(&self) -> impl Iterator>> { + self.values.iter() + } + + pub fn into_iterator(self) -> BindingsIterator { + BindingsIterator { + variables: self.variables, + iter: Box::new(self.values.into_iter().map(Ok)), + } + } + + pub fn is_empty(&self) -> bool { + self.values.is_empty() + } +} + +impl Default for StaticBindings { + fn default() -> Self { + StaticBindings { + variables: Vec::default(), + values: Vec::default(), + } + } +} + +pub struct BindingsIterator { + variables: Vec, + iter: Box>>>>, +} + +impl BindingsIterator { + pub fn new( + variables: Vec, + iter: Box>>>>, + ) -> Self { + Self { variables, iter } + } + + pub fn variables(&self) -> &[Variable] { + &*self.variables + } + + pub fn into_values_iter(self) -> Box>>>> { + self.iter + } + + pub fn destruct( + self, + ) -> ( + Vec, + Box>>>>, + ) { + (self.variables, self.iter) + } +} + #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] pub struct TriplePattern { pub subject: TermOrVariable, @@ -1061,7 +1142,7 @@ impl fmt::Display for GroupPattern { #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] pub enum ListPattern { - Data(Vec), + Data(StaticBindings), ToList(MultiSetPattern), OrderBy(Box, Vec), Project(Box, Vec), @@ -1073,14 +1154,20 @@ pub enum ListPattern { impl fmt::Display for ListPattern { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - ListPattern::Data(bs) => write!( - f, - "{{ {} }}", - bs.iter() - .map(|c| c.to_string()) - .collect::>() - .join(" ") - ), + ListPattern::Data(bs) => { + let variables = bs.variables(); + write!(f, "{{ ")?; + for values in bs.values_iter() { + write!(f, "{{")?; + for i in 0..values.len() { + if let Some(ref val) = values[i] { + write!(f, " {} → {} ", variables[i], val)?; + } + } + write!(f, "}}")?; + } + write!(f, "}}") + } ListPattern::ToList(l) => write!(f, "{}", l), ListPattern::OrderBy(l, o) => write!( f, @@ -1117,7 +1204,7 @@ impl fmt::Display for ListPattern { impl Default for ListPattern { fn default() -> Self { - ListPattern::Data(Vec::default()) + ListPattern::Data(StaticBindings::default()) } } @@ -1136,13 +1223,7 @@ impl ListPattern { fn add_visible_variables<'a>(&'a self, vars: &mut BTreeSet<&'a Variable>) { match self { - ListPattern::Data(b) => { - for binding in b { - for (var, _) in binding { - vars.insert(var); - } - } - } + ListPattern::Data(b) => vars.extend(b.variables_iter()), ListPattern::ToList(p) => p.add_visible_variables(vars), ListPattern::OrderBy(l, _) => l.add_visible_variables(vars), ListPattern::Project(_, pv) => vars.extend(pv.iter()), @@ -1164,18 +1245,18 @@ impl<'a> fmt::Display for SparqlListPattern<'a> { ListPattern::Data(bs) => if bs.is_empty() { Ok(()) } else { - let vars: Vec<&Variable> = bs[0].iter().map(|(v, _)| v).collect(); write!(f, "VALUES ( ")?; - for var in &vars { + for var in bs.variables() { write!(f, "{} ", var)?; } write!(f, ") {{ ")?; - for b in bs { + for values in bs.values_iter() { write!(f, "( ")?; - for var in &vars { - b.get(var) - .map(|v| write!(f, "{} ", v)) - .unwrap_or_else(|| write!(f, "UNDEF "))?; + for val in values { + match val { + Some(val) => write!(f, "{} ", val), + None => write!(f, "UNDEF "), + }?; } write!(f, ") ")?; } @@ -1573,3 +1654,9 @@ impl fmt::Display for Query { } } } + +pub enum QueryResult { + Bindings(BindingsIterator), + Boolean(bool), + Graph(MemoryGraph), +} diff --git a/src/sparql/mod.rs b/src/sparql/mod.rs index 47813087..7c71662e 100644 --- a/src/sparql/mod.rs +++ b/src/sparql/mod.rs @@ -1,3 +1,3 @@ pub mod algebra; -pub mod model; pub mod parser; +pub mod xml_results; diff --git a/src/sparql/model.rs b/src/sparql/model.rs deleted file mode 100644 index 36cd8c7a..00000000 --- a/src/sparql/model.rs +++ /dev/null @@ -1,64 +0,0 @@ -use model::*; -use sparql::algebra::TermOrVariable; -use sparql::algebra::Variable; -use std::collections::BTreeMap; -use std::fmt; - -#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] -pub struct Binding(BTreeMap); - -impl Binding { - pub fn insert(&mut self, var: Variable, value: Term) { - self.0.insert(var, value); - } - - pub fn get<'a>(&'a self, key: &'a Variable) -> Option<&'a Term> { - self.0.get(key) - } - - pub fn get_or_constant<'a>(&'a self, key: &'a TermOrVariable) -> Option { - match key { - TermOrVariable::NamedNode(node) => Some(node.clone().into()), - TermOrVariable::Literal(literal) => Some(literal.clone().into()), - TermOrVariable::Variable(v) => self.get(v).cloned(), - } - } - - pub fn iter(&self) -> <&BTreeMap as IntoIterator>::IntoIter { - self.0.iter() - } -} - -impl Default for Binding { - fn default() -> Self { - Binding(BTreeMap::default()) - } -} - -impl IntoIterator for Binding { - type Item = (Variable, Term); - type IntoIter = as IntoIterator>::IntoIter; - - fn into_iter(self) -> as IntoIterator>::IntoIter { - self.0.into_iter() - } -} - -impl<'a> IntoIterator for &'a Binding { - type Item = (&'a Variable, &'a Term); - type IntoIter = <&'a BTreeMap as IntoIterator>::IntoIter; - - fn into_iter(self) -> <&'a BTreeMap as IntoIterator>::IntoIter { - self.0.iter() - } -} - -impl fmt::Display for Binding { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{{")?; - for (var, val) in self { - write!(f, " {} → {} ", var, val)?; - } - write!(f, "}}") - } -} diff --git a/src/sparql/parser.rs b/src/sparql/parser.rs index ac9855c6..cb5ea015 100644 --- a/src/sparql/parser.rs +++ b/src/sparql/parser.rs @@ -15,7 +15,6 @@ mod grammar { use rio::utils::unescape_characters; use rio::utils::unescape_unicode_codepoints; use sparql::algebra::*; - use sparql::model::*; use std::borrow::Cow; use std::collections::BTreeMap; use std::collections::HashMap; diff --git a/src/sparql/sparql_grammar.rustpeg b/src/sparql/sparql_grammar.rustpeg index 1f76afad..66b82bf0 100644 --- a/src/sparql/sparql_grammar.rustpeg +++ b/src/sparql/sparql_grammar.rustpeg @@ -312,28 +312,14 @@ DataBlock -> MultiSetPattern = l:(InlineDataOneVar / InlineDataFull) { } //[63] -InlineDataOneVar -> Vec = var:Var _ '{' _ d:InlineDataOneVar_value* '}' { - d.into_iter().map(|val| { - let mut bindings = Binding::default(); - if let Some(v) = val { - bindings.insert(var.clone(), v); - } - bindings - }).collect() +InlineDataOneVar -> StaticBindings = var:Var _ '{' _ d:InlineDataOneVar_value* '}' { + StaticBindings::new(vec![var], d) } -InlineDataOneVar_value -> Option = t:DataBlockValue { t } +InlineDataOneVar_value -> Vec> = t:DataBlockValue { vec![t] } //[64] -InlineDataFull -> Vec = '(' _ vars:InlineDataFull_var* _ ')' _ '{' _ val:InlineDataFull_values* '}' { - val.into_iter().map(|vals| { - let mut bindings = Binding::default(); - for (var, val) in vars.iter().zip(vals.into_iter()) { - if let Some(v) = val { - bindings.insert(var.clone(), v); - } - } - bindings - }).collect() +InlineDataFull -> StaticBindings = '(' _ vars:InlineDataFull_var* _ ')' _ '{' _ val:InlineDataFull_values* '}' { + StaticBindings::new(vars, val) } InlineDataFull_var -> Variable = v:Var _ { v } InlineDataFull_values -> Vec> = '(' _ v:InlineDataFull_value* _ ')' _ { v } @@ -700,7 +686,7 @@ GraphTerm -> Term = l:NumericLiteral { l.into() } / l:BooleanLiteral { l.into() } / b:BlankNode { b.into() } / - NIL { BlankNode::default().into() } + NIL { rdf::NIL.clone().into() } //[110] Expression -> Expression = e:ConditionalOrExpression {e} @@ -962,28 +948,28 @@ LANGTAG -> &'input str = "@" l:$([a-zA-Z]+ ("-" [a-zA-Z0-9]+)*) { INTEGER -> () = [0-9]+ //[147] -DECIMAL -> () = [0-9]* '.' [0-9]+ +DECIMAL -> () = ([0-9]+ "." [0-9]* / [0-9]* "." [0-9]+) //[148] -DOUBLE -> () = ([0-9]+ "."? [0-9]* / "." [0-9]+) EXPONENT +DOUBLE -> () = ([0-9]+ "." [0-9]* / "." [0-9]+ / [0-9]+) EXPONENT //[149] -INTEGER_POSITIVE -> () = '+' _ INTEGER +INTEGER_POSITIVE -> () = "+" _ INTEGER //[150] -DECIMAL_POSITIVE -> () = '+' _ DECIMAL +DECIMAL_POSITIVE -> () = "+" _ DECIMAL //[151] -DOUBLE_POSITIVE -> () = '+' _ DOUBLE +DOUBLE_POSITIVE -> () = "+" _ DOUBLE //[152] -INTEGER_NEGATIVE -> () = '-' _ INTEGER +INTEGER_NEGATIVE -> () = "-" _ INTEGER //[153] -DECIMAL_NEGATIVE -> () = '-' _ DECIMAL +DECIMAL_NEGATIVE -> () = "-" _ DECIMAL //[154] -DOUBLE_NEGATIVE -> () = '-' _ DOUBLE +DOUBLE_NEGATIVE -> () = "-" _ DOUBLE //[155] EXPONENT -> () = [eE] [+-]? [0-9]+ diff --git a/src/sparql/xml_results.rs b/src/sparql/xml_results.rs new file mode 100644 index 00000000..5b9cd3e4 --- /dev/null +++ b/src/sparql/xml_results.rs @@ -0,0 +1,333 @@ +use errors::*; +use model::*; +use quick_xml::events::Event; +use quick_xml::Reader; +use sparql::algebra::BindingsIterator; +use sparql::algebra::QueryResult; +use sparql::algebra::Variable; +use std::collections::BTreeMap; +use std::io::BufRead; +use std::iter::empty; +use std::str::FromStr; + +pub fn read_xml_results(source: impl BufRead + 'static) -> Result { + enum State { + Start, + Sparql, + Head, + AfterHead, + Boolean, + } + + let mut reader = Reader::from_reader(source); + reader.trim_text(true); + + let mut buffer = Vec::default(); + let mut namespace_buffer = Vec::default(); + let mut variables: Vec = Vec::default(); + let mut state = State::Start; + + //Read header + loop { + let event = { + let (ns, event) = reader.read_namespaced_event(&mut buffer, &mut namespace_buffer)?; + if let Some(ns) = ns { + if ns != b"http://www.w3.org/2005/sparql-results#".as_ref() { + return Err(format!( + "Unexpected namespace found in RDF/XML query result: {}", + reader.decode(ns) + ).into()); + } + } + event + }; + match event { + Event::Start(event) => match state { + State::Start => { + if event.name() == b"sparql" { + state = State::Sparql; + } else { + return Err(format!("Expecting tag, found {}", reader.decode(event.name())).into()); + } + } + State::Sparql => { + if event.name() == b"head" { + state = State::Head; + } else { + return Err(format!("Expecting tag, found {}", reader.decode(event.name())).into()); + } + } + State::Head => if event.name() == b"variable" || event.name() == b"link" { + return Err(" and tag should be autoclosing".into()); + } else { + return Err(format!("Expecting or tag, found {}", reader.decode(event.name())).into()); + }, + State::AfterHead => { + if event.name() == b"boolean" { + state = State::Boolean + } else if event.name() == b"results" { + let mut mapping = BTreeMap::default(); + for (i,var) in variables.iter().enumerate() { + mapping.insert(var.as_bytes().to_vec(), i); + } + return Ok(QueryResult::Bindings(BindingsIterator::new( + variables.into_iter().map(Variable::new).collect(), + Box::new(ResultsIterator { + reader, + buffer: Vec::default(), + namespace_buffer, + mapping, + bnodes_map: BTreeMap::default(), + }), + ))); + } else if event.name() != b"link" && event.name() != b"results" && event.name() != b"boolean" { + return Err(format!("Expecting sparql tag, found {}", reader.decode(event.name())).into()); + } + } + State::Boolean => return Err(format!("Unexpected tag inside of tag: {}", reader.decode(event.name())).into()) + }, + Event::Empty(event) => match state { + State::Head => { + if event.name() == b"variable" { + let name = event.attributes() + .filter(|attr| attr.is_ok()) + .map(|attr| attr.unwrap()) + .find(|attr| attr.key == b"name") + .ok_or("No name attribute found for the tag"); + variables.push(name?.unescape_and_decode_value(&reader)?); + } else if event.name() == b"link" { + // no op + } else { + return Err(format!("Expecting or tag, found {}", reader.decode(event.name())).into()); + } + }, + State::AfterHead => { + if event.name() == b"results" { + return Ok(QueryResult::Bindings(BindingsIterator::new( + variables.into_iter().map(Variable::new).collect(), + Box::new(empty()), + ))) + } else { + return Err(format!("Unexpected autoclosing tag <{}>", reader.decode(event.name())).into()) + } + } + _ => return Err(format!("Unexpected autoclosing tag <{}>", reader.decode(event.name())).into()) + }, + Event::Text(event) => { + let value = event.unescaped()?; + return match state { + State::Boolean => { + return if value.as_ref() == b"true" { + Ok(QueryResult::Boolean(true)) + } else if value.as_ref() == b"false" { + Ok(QueryResult::Boolean(false)) + } else { + Err(format!("Unexpected boolean value. Found {}", reader.decode(&value)).into()) + }; + } + _ => Err(format!("Unexpected textual value found: {}", reader.decode(&value)).into()) + }; + }, + Event::End(_) => match state { + State::Head => state = State::AfterHead, + _ => { + return Err("Unexpected early file end. All results file should have a and a or tag".into()); + } + }, + Event::Eof => return Err("Unexpected early file end. All results file should have a and a or tag".into()), + _ => (), + } + } +} + +struct ResultsIterator { + reader: Reader, + buffer: Vec, + namespace_buffer: Vec, + mapping: BTreeMap, usize>, + bnodes_map: BTreeMap, BlankNode>, +} + +impl Iterator for ResultsIterator { + type Item = Result>>; + + fn next(&mut self) -> Option>>> { + enum State { + Start, + Result, + Binding, + Uri, + BNode, + Literal, + End, + } + let mut state = State::Start; + + let mut new_bindings = Vec::default(); + new_bindings.resize(self.mapping.len(), None); + + let mut current_var = None; + let mut term: Option = None; + let mut lang = None; + let mut datatype = None; + loop { + let (ns, event) = match self + .reader + .read_namespaced_event(&mut self.buffer, &mut self.namespace_buffer) + { + Ok(v) => v, + Err(error) => return Some(Err(error.into())), + }; + if let Some(ns) = ns { + if ns != b"http://www.w3.org/2005/sparql-results#".as_ref() { + return Some(Err(format!( + "Unexpected namespace found in RDF/XML query result: {}", + self.reader.decode(ns) + ).into())); + } + } + match event { + Event::Start(event) => match state { + State::Start => if event.name() == b"result" { + state = State::Result; + } else { + return Some(Err(format!( + "Expecting , found {}", + self.reader.decode(event.name()) + ).into())); + }, + State::Result => if event.name() == b"binding" { + match event + .attributes() + .filter(|attr| attr.is_ok()) + .map(|attr| attr.unwrap()) + .find(|attr| attr.key == b"name") + { + Some(attr) => match attr.unescaped_value() { + Ok(var) => current_var = Some(var.to_vec()), + Err(error) => return Some(Err(error.into())), + }, + None => { + return Some(Err( + "No name attribute found for the tag".into() + )) + } + } + state = State::Binding; + } else { + return Some(Err(format!( + "Expecting , found {}", + self.reader.decode(event.name()) + ).into())); + }, + State::Binding => { + if term.is_some() { + return Some(Err( + "There is already a value for the current binding".into() + )); + } + if event.name() == b"uri" { + state = State::Uri; + } else if event.name() == b"bnode" { + state = State::BNode; + } else if event.name() == b"literal" { + for attr in event.attributes() { + if let Ok(attr) = attr { + if attr.key == b"xml:lang" { + match attr.unescape_and_decode_value(&self.reader) { + Ok(val) => lang = Some(val), + Err(error) => return Some(Err(error.into())), + } + } else if attr.key == b"datatype" { + match attr.unescaped_value() { + Ok(val) => { + match NamedNode::from_str(&self.reader.decode(&val)) + { + Ok(dt) => datatype = Some(dt), + Err(error) => return Some(Err(error)), + } + } + Err(error) => return Some(Err(error.into())), + } + } + } + } + state = State::Literal; + } else { + return Some(Err(format!( + "Expecting , or found {}", + self.reader.decode(event.name()) + ).into())); + } + } + _ => (), + }, + Event::Text(event) => match event.unescaped() { + Ok(data) => match state { + State::Uri => match NamedNode::from_str(&self.reader.decode(&data)) { + Ok(named_node) => term = Some(named_node.into()), + Err(error) => return Some(Err(error)), + }, + State::BNode => { + term = Some( + self.bnodes_map + .entry(data.to_vec()) + .or_insert_with(BlankNode::default) + .clone() + .into(), + ) + } + State::Literal => { + let value = self.reader.decode(&data).to_string(); + term = Some( + match datatype { + Some(ref datatype) => { + Literal::new_typed_literal(value, datatype.clone()) + } + None => match lang { + Some(ref lang) => Literal::new_language_tagged_literal( + value, + lang.clone(), + ), + None => Literal::new_simple_literal(value), + }, + }.into(), + ) + } + _ => { + return Some(Err(format!( + "Unexpected textual value found: {}", + self.reader.decode(&data) + ).into())) + } + }, + Err(error) => return Some(Err(error.into())), + }, + Event::End(_) => match state { + State::Start => state = State::End, + State::Result => return Some(Ok(new_bindings)), + State::Binding => { + match (¤t_var, &term) { + (Some(var), Some(term)) => { + new_bindings[self.mapping[var]] = Some(term.clone()) + } + (Some(var), None) => { + return Some(Err(format!( + "No variable found for variable {}", + self.reader.decode(&var) + ).into())) + } + _ => return Some(Err("No name found for tag".into())), + } + term = None; + state = State::Result; + } + State::Uri | State::BNode | State::Literal => state = State::Binding, + _ => (), + }, + Event::Eof => return None, + _ => (), + } + } + } +} diff --git a/src/store/mod.rs b/src/store/mod.rs index ef240169..25d932e5 100644 --- a/src/store/mod.rs +++ b/src/store/mod.rs @@ -2,6 +2,7 @@ pub mod isomorphism; mod memory; mod numeric_encoder; mod rocksdb; +mod sparql; mod store; pub use store::memory::MemoryDataset; diff --git a/src/store/sparql.rs b/src/store/sparql.rs new file mode 100644 index 00000000..4edf5525 --- /dev/null +++ b/src/store/sparql.rs @@ -0,0 +1,315 @@ +use errors::*; +use sparql::algebra::*; +use std::iter::once; +use std::iter::Iterator; +use std::sync::Arc; +use store::numeric_encoder::EncodedTerm; +use store::store::EncodedQuadsStore; + +type EncodedBinding = Vec>; + +struct EncodedBindingsIterator { + variables: Vec, + iter: Box>>, +} + +impl EncodedBindingsIterator { + fn take(self, n: usize) -> Self { + EncodedBindingsIterator { + variables: self.variables, + iter: Box::new(self.iter.take(n)), + } + } + + fn skip(self, n: usize) -> Self { + EncodedBindingsIterator { + variables: self.variables, + iter: Box::new(self.iter.skip(n)), + } + } + + fn project(self, on_variables: Vec) -> Self { + let EncodedBindingsIterator { variables, iter } = self; + let projection: Vec<(usize, usize)> = on_variables + .iter() + .enumerate() + .flat_map(|(new_pos, v)| slice_key(&variables, v).map(|old_pos| (old_pos, new_pos))) + .collect(); + let new_len = on_variables.len(); + EncodedBindingsIterator { + variables: on_variables, + iter: Box::new(iter.map(move |binding| { + let binding = binding?; + let mut new_binding = Vec::with_capacity(new_len); + new_binding.resize(new_len, None); + for (old_pos, new_pos) in &projection { + new_binding[*new_pos] = binding[*old_pos]; + } + Ok(new_binding) + })), + } + } +} + +impl Default for EncodedBindingsIterator { + fn default() -> Self { + EncodedBindingsIterator { + variables: Vec::default(), + iter: Box::new(once(Ok(Vec::default()))), + } + } +} + +fn slice_key(slice: &[T], element: &T) -> Option { + for (i, item) in slice.iter().enumerate() { + if item == element { + return Some(i); + } + } + None +} + +pub struct SparqlEvaluator { + store: Arc, +} + +impl SparqlEvaluator { + pub fn new(store: Arc) -> Self { + Self { store } + } + + pub fn evaluate(&self, query: &Query) -> Result { + match query { + Query::SelectQuery { algebra, dataset } => { + Ok(QueryResult::Bindings(self.decode_bindings( + self.eval_list_pattern(algebra, EncodedBindingsIterator::default())?, + ))) + } + _ => unimplemented!(), + } + } + + fn eval_list_pattern( + &self, + pattern: &ListPattern, + from: EncodedBindingsIterator, + ) -> Result { + match pattern { + ListPattern::Data(bs) => Ok(self.encode_bindings(bs)), + ListPattern::ToList(l) => self.eval_multi_set_pattern(l, from), + ListPattern::OrderBy(l, o) => self.eval_list_pattern(l, from), //TODO + ListPattern::Project(l, new_variables) => Ok(self + .eval_list_pattern(l, from)? + .project(new_variables.to_vec())), + ListPattern::Distinct(l) => self.eval_list_pattern(l, from), //TODO + ListPattern::Reduced(l) => self.eval_list_pattern(l, from), + ListPattern::Slice(l, start, length) => { + let mut iter = self.eval_list_pattern(l, from)?; + if *start > 0 { + iter = iter.skip(*start); + } + if let Some(length) = length { + iter = iter.take(*length); + } + Ok(iter) + } + } + } + + fn eval_multi_set_pattern( + &self, + pattern: &MultiSetPattern, + from: EncodedBindingsIterator, + ) -> Result { + match pattern { + MultiSetPattern::BGP(p) => { + let mut iter = from; + for pattern in p { + iter = match pattern { + TripleOrPathPattern::Triple(pattern) => { + self.eval_triple_pattern(pattern, iter) + } + TripleOrPathPattern::Path(pattern) => self.eval_path_pattern(pattern, iter), + }?; + } + Ok(iter) + } + MultiSetPattern::Join(a, b) => { + self.eval_multi_set_pattern(b, self.eval_multi_set_pattern(a, from)?) + } + MultiSetPattern::LeftJoin(a, b, e) => unimplemented!(), + MultiSetPattern::Filter(e, p) => unimplemented!(), + MultiSetPattern::Union(a, b) => unimplemented!(), + MultiSetPattern::Graph(g, p) => unimplemented!(), + MultiSetPattern::Extend(p, v, e) => unimplemented!(), + MultiSetPattern::Minus(a, b) => unimplemented!(), + MultiSetPattern::ToMultiSet(l) => self.eval_list_pattern(l, from), + MultiSetPattern::Service(n, p, s) => unimplemented!(), + MultiSetPattern::AggregateJoin(g, a) => unimplemented!(), + } + } + + fn eval_triple_pattern( + &self, + pattern: &TriplePattern, + from: EncodedBindingsIterator, + ) -> Result { + let EncodedBindingsIterator { + mut variables, + iter: from_iter, + } = from; + let subject = + self.binding_value_lookup_from_term_or_variable(&pattern.subject, &mut variables)?; + let predicate = self + .binding_value_lookup_from_named_node_or_variable(&pattern.predicate, &mut variables)?; + let object = + self.binding_value_lookup_from_term_or_variable(&pattern.object, &mut variables)?; + + let store = self.store.clone(); + let variables_len = variables.len(); + Ok(EncodedBindingsIterator { + variables, + iter: Box::new(from_iter.flat_map(move |binding| { + let result: Box>> = match binding { + Ok(mut binding) => { + match store.quads_for_pattern( + subject.get(&binding), + predicate.get(&binding), + object.get(&binding), + None, //TODO + ) { + Ok(iter) => Box::new(iter.map(move |quad| { + let quad = quad?; + let mut binding = binding.clone(); + binding.resize(variables_len, None); + subject.put(quad.subject, &mut binding); + predicate.put(quad.predicate, &mut binding); + object.put(quad.object, &mut binding); + Ok(binding) + })), + Err(error) => Box::new(once(Err(error))), + } + } + Err(error) => Box::new(once(Err(error))), + }; + result + })), + }) + } + + fn eval_path_pattern( + &self, + pattern: &PathPattern, + from: EncodedBindingsIterator, + ) -> Result { + unimplemented!() + } + + fn binding_value_lookup_from_term_or_variable( + &self, + term_or_variable: &TermOrVariable, + variables: &mut Vec, + ) -> Result { + Ok(match term_or_variable { + TermOrVariable::Term(term) => { + BindingValueLookup::Constant(self.store.encoder().encode_term(term)?) + } + TermOrVariable::Variable(variable) => { + BindingValueLookup::Variable(match slice_key(variables, variable) { + Some(key) => key, + None => { + variables.push(variable.clone()); + variables.len() - 1 + } + }) + } + }) + } + + fn binding_value_lookup_from_named_node_or_variable( + &self, + named_node_or_variable: &NamedNodeOrVariable, + variables: &mut Vec, + ) -> Result { + Ok(match named_node_or_variable { + NamedNodeOrVariable::NamedNode(named_node) => { + BindingValueLookup::Constant(self.store.encoder().encode_named_node(named_node)?) + } + NamedNodeOrVariable::Variable(variable) => { + BindingValueLookup::Variable(match slice_key(variables, variable) { + Some(key) => key, + None => { + variables.push(variable.clone()); + variables.len() - 1 + } + }) + } + }) + } + + fn encode_bindings(&self, bindings: &StaticBindings) -> EncodedBindingsIterator { + let encoder = self.store.encoder(); + let encoded_values: Vec> = bindings + .values_iter() + .map(move |values| { + let mut result = Vec::with_capacity(values.len()); + for value in values { + result.push(match value { + Some(term) => Some(encoder.encode_term(term)?), + None => None, + }); + } + Ok(result) + }).collect(); + EncodedBindingsIterator { + variables: bindings.variables().to_vec(), + iter: Box::new(encoded_values.into_iter()), + } + } + + fn decode_bindings(&self, iter: EncodedBindingsIterator) -> BindingsIterator { + let store = self.store.clone(); + let EncodedBindingsIterator { variables, iter } = iter; + BindingsIterator::new( + variables, + Box::new(iter.map(move |values| { + let values = values?; + let encoder = store.encoder(); + let mut result = Vec::with_capacity(values.len()); + for value in values { + result.push(match value { + Some(term) => Some(encoder.decode_term(term)?), + None => None, + }); + } + Ok(result) + })), + ) + } +} + +#[derive(Clone, Copy)] +enum BindingValueLookup { + Constant(EncodedTerm), + Variable(usize), +} + +impl BindingValueLookup { + fn get(&self, binding: &[Option]) -> Option { + match self { + BindingValueLookup::Constant(term) => Some(*term), + BindingValueLookup::Variable(v) => if *v < binding.len() { + binding[*v] + } else { + None + }, + } + } + + fn put(&self, value: EncodedTerm, binding: &mut EncodedBinding) { + match self { + BindingValueLookup::Constant(_) => (), + BindingValueLookup::Variable(v) => binding[*v] = Some(value), + } + } +} diff --git a/src/store/store.rs b/src/store/store.rs index b0c442bd..1f71492b 100644 --- a/src/store/store.rs +++ b/src/store/store.rs @@ -1,16 +1,20 @@ use errors::*; use model::*; +use sparql::algebra::QueryResult; +use sparql::parser::read_sparql_query; use std::fmt; +use std::io::Read; use std::iter::empty; use std::iter::once; use std::iter::FromIterator; use std::iter::Iterator; use std::sync::Arc; use store::numeric_encoder::*; +use store::sparql::SparqlEvaluator; /// Defines the Store traits that is used to have efficient binary storage -pub trait EncodedQuadsStore: BytesStore + Sized { +pub trait EncodedQuadsStore: BytesStore + Sized + 'static { type QuadsIterator: Iterator> + 'static; type QuadsForSubjectIterator: Iterator> + 'static; type QuadsForSubjectPredicateIterator: Iterator> + 'static; @@ -345,6 +349,11 @@ impl Dataset for StoreDataset { fn is_empty(&self) -> Result { Ok(self.store.quads()?.any(|_| true)) } + + fn query(&self, query: impl Read) -> Result { + let query = read_sparql_query(query, None)?; + SparqlEvaluator::new(self.store.clone()).evaluate(&query) + } } impl fmt::Display for StoreDataset { diff --git a/tests/sparql_test_cases.rs b/tests/sparql_test_cases.rs index 3d72b3a2..e6a3e427 100644 --- a/tests/sparql_test_cases.rs +++ b/tests/sparql_test_cases.rs @@ -1,5 +1,4 @@ ///! Integration tests based on [SPARQL 1.1 Test Cases](https://www.w3.org/2009/sparql/docs/tests/README.html) - #[macro_use] extern crate lazy_static; extern crate reqwest; @@ -12,11 +11,14 @@ use rudf::errors::*; use rudf::model::vocab::rdf; use rudf::model::vocab::rdfs; use rudf::model::*; -use rudf::rio::ntriples::read_ntriples; use rudf::rio::turtle::read_turtle; use rudf::rio::xml::read_rdf_xml; use rudf::sparql::algebra::Query; +use rudf::sparql::algebra::QueryResult; use rudf::sparql::parser::read_sparql_query; +use rudf::sparql::xml_results::read_xml_results; +use rudf::store::isomorphism::GraphIsomorphism; +use rudf::store::MemoryDataset; use rudf::store::MemoryGraph; use std::error::Error; use std::fmt; @@ -37,7 +39,6 @@ fn sparql_w3c_syntax_testsuite() { //TODO: Deserialization of the serialization failing: NamedNode::from_str("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/syntax-sparql2/manifest#syntax-form-construct04").unwrap(), NamedNode::from_str("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/syntax-sparql2/manifest#syntax-function-04").unwrap(), - NamedNode::from_str("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/syntax-sparql1/manifest#syntax-lit-08").unwrap(), NamedNode::from_str("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/syntax-sparql1/manifest#syntax-qname-04").unwrap(), ]; let client = RDFClient::default(); @@ -50,7 +51,7 @@ fn sparql_w3c_syntax_testsuite() { continue; } if test.kind == "PositiveSyntaxTest" || test.kind == "PositiveSyntaxTest11" { - match client.load_sparql_query(test.action.clone()) { + match client.load_sparql_query(test.query.clone()) { Err(error) => assert!(false, "Failure on {} with error: {}", test, error), Ok(query) => { if let Err(error) = read_sparql_query(query.to_string().as_bytes(), None) { @@ -66,7 +67,7 @@ fn sparql_w3c_syntax_testsuite() { } } else if test.kind == "NegativeSyntaxTest" || test.kind == "NegativeSyntaxTest11" { //TODO - if let Ok(result) = client.load_sparql_query(test.action.clone()) { + if let Ok(result) = client.load_sparql_query(test.query.clone()) { eprintln!("Failure on {}. The output tree is: {}", test, result); } } else { @@ -75,6 +76,63 @@ fn sparql_w3c_syntax_testsuite() { } } +#[test] +fn sparql_w3c_query_evaluation_testsuite() { + let manifest_10_url = + Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/basic/manifest.ttl") + .unwrap(); + + let test_blacklist = vec![]; + let client = RDFClient::default(); + + for test_result in TestManifest::new(&client, manifest_10_url) { + let test = test_result.unwrap(); + if test_blacklist.contains(&test.id) { + continue; + } + if test.kind == "QueryEvaluationTest" { + let data = match &test.data { + Some(data) => { + let dataset = MemoryDataset::default(); + let dataset_default = dataset.default_graph(); + client + .load_graph(data.clone()) + .unwrap() + .iter() + .unwrap() + .for_each(|triple| dataset_default.insert(&triple.unwrap()).unwrap()); + dataset + } + None => MemoryDataset::default(), + }; + match data.query(client.get(&test.query).unwrap()) { + Err(error) => assert!( + false, + "Failure to parse query of {} with error: {}", + test, error + ), + Ok(result) => { + let actual_graph = to_graph(result).unwrap(); + let expected_graph = client + .load_sparql_query_result_graph(test.result.clone().unwrap()) + .unwrap(); + assert!( + actual_graph.is_isomorphic(&expected_graph).unwrap(), + "Failure on {}. Expected file:\n{}\nOutput file:\n{}\nParsed query:\n{}\nData:\n{}\n", + test, + expected_graph, + actual_graph, + client.load_sparql_query(test.query.clone()).unwrap(), + data + ) + } + } + } else { + assert!(false, "Not supported test: {}", test); + } + } +} + pub struct RDFClient { client: Client, } @@ -88,20 +146,26 @@ impl Default for RDFClient { } impl RDFClient { - pub fn load_turtle(&self, url: Url) -> Result { - Ok(read_turtle(self.get(&url)?, Some(url))?.collect()) - } - - pub fn load_ntriples(&self, url: Url) -> Result { - read_ntriples(self.get(&url)?).collect() + fn load_graph(&self, url: Url) -> Result { + if url.as_str().ends_with(".ttl") { + Ok(read_turtle(self.get(&url)?, Some(url))?.collect()) + } else if url.as_str().ends_with(".rdf") { + read_rdf_xml(BufReader::new(self.get(&url)?), Some(url)).collect() + } else { + Err(format!("Serialization type not found for {}", url).into()) + } } - pub fn load_rdf_xml(&self, url: Url) -> Result { - read_rdf_xml(BufReader::new(self.get(&url)?), Some(url)).collect() + fn load_sparql_query(&self, url: Url) -> Result { + read_sparql_query(self.get(&url)?, Some(url)) } - pub fn load_sparql_query(&self, url: Url) -> Result { - read_sparql_query(self.get(&url)?, Some(url)) + fn load_sparql_query_result_graph(&self, url: Url) -> Result { + if url.as_str().ends_with(".srx") { + to_graph(read_xml_results(BufReader::new(self.get(&url)?))?) + } else { + self.load_graph(url) + } } fn get(&self, url: &Url) -> Result { @@ -117,12 +181,96 @@ impl RDFClient { } } +mod rs { + use rudf::model::NamedNode; + use std::str::FromStr; + + lazy_static! { + pub static ref RESULT_SET: NamedNode = + NamedNode::from_str("http://www.w3.org/2001/sw/DataAccess/tests/result-set#ResultSet") + .unwrap(); + pub static ref RESULT_VARIABLE: NamedNode = NamedNode::from_str( + "http://www.w3.org/2001/sw/DataAccess/tests/result-set#resultVariable" + ).unwrap(); + pub static ref SOLUTION: NamedNode = + NamedNode::from_str("http://www.w3.org/2001/sw/DataAccess/tests/result-set#solution") + .unwrap(); + pub static ref BINDING: NamedNode = + NamedNode::from_str("http://www.w3.org/2001/sw/DataAccess/tests/result-set#binding") + .unwrap(); + pub static ref VALUE: NamedNode = + NamedNode::from_str("http://www.w3.org/2001/sw/DataAccess/tests/result-set#value") + .unwrap(); + pub static ref VARIABLE: NamedNode = + NamedNode::from_str("http://www.w3.org/2001/sw/DataAccess/tests/result-set#variable") + .unwrap(); + pub static ref INDEX: NamedNode = + NamedNode::from_str("http://www.w3.org/2001/sw/DataAccess/tests/result-set#index") + .unwrap(); + } +} + +fn to_graph(result: QueryResult) -> Result { + match result { + QueryResult::Graph(graph) => Ok(graph), + QueryResult::Boolean(_) => unimplemented!(), + QueryResult::Bindings(bindings) => { + let graph = MemoryGraph::default(); + let result_set = BlankNode::default(); + graph.insert(&Triple::new( + result_set.clone(), + rdf::TYPE.clone(), + rs::RESULT_SET.clone(), + ))?; + let (variables, iter) = bindings.destruct(); + for variable in &variables { + graph.insert(&Triple::new( + result_set.clone(), + rs::RESULT_VARIABLE.clone(), + Literal::from(variable.name()?), + ))?; + } + for binding_values in iter { + let binding_values = binding_values?; + let solution = BlankNode::default(); + graph.insert(&Triple::new( + result_set.clone(), + rs::SOLUTION.clone(), + solution.clone(), + ))?; + for i in 0..variables.len() { + if let Some(ref value) = binding_values[i] { + let binding = BlankNode::default(); + graph.insert(&Triple::new( + solution.clone(), + rs::BINDING.clone(), + binding.clone(), + ))?; + graph.insert(&Triple::new( + binding.clone(), + rs::VALUE.clone(), + value.clone(), + ))?; + graph.insert(&Triple::new( + binding.clone(), + rs::VARIABLE.clone(), + Literal::from(variables[i].name()?), + ))?; + } + } + } + Ok(graph) + } + } +} + pub struct Test { pub id: NamedNode, pub kind: String, pub name: Option, pub comment: Option, - pub action: Url, + pub query: Url, + pub data: Option, pub result: Option, } @@ -135,7 +283,13 @@ impl fmt::Display for Test { for comment in &self.comment { write!(f, " with comment \"{}\"", comment)?; } - write!(f, " on file \"{}\"", self.action)?; + write!(f, " on query {}", self.query)?; + for data in &self.data { + write!(f, " with data {}", data)?; + } + for result in &self.result { + write!(f, " and expected result {}", result)?; + } Ok(()) } } @@ -181,6 +335,20 @@ pub mod mf { } } +pub mod qt { + use rudf::model::NamedNode; + use std::str::FromStr; + + lazy_static! { + pub static ref QUERY: NamedNode = + NamedNode::from_str("http://www.w3.org/2001/sw/DataAccess/tests/test-query#query") + .unwrap(); + pub static ref DATA: NamedNode = + NamedNode::from_str("http://www.w3.org/2001/sw/DataAccess/tests/test-query#data") + .unwrap(); + } +} + impl<'a> Iterator for TestManifest<'a> { type Item = Result; @@ -215,21 +383,46 @@ impl<'a> Iterator for TestManifest<'a> { Some(Term::Literal(c)) => Some(c.value().to_string()), _ => None, }; - let action = match self + let (query, data) = match self .graph .object_for_subject_predicate(&test_subject, &*mf::ACTION) .unwrap() { - Some(Term::NamedNode(n)) => n.url().clone(), + Some(Term::NamedNode(n)) => (n.into(), None), + Some(Term::BlankNode(n)) => { + let n = n.into(); + let query = match self + .graph + .object_for_subject_predicate(&n, &qt::QUERY) + .unwrap() + { + Some(Term::NamedNode(q)) => q.into(), + Some(_) => return Some(Err("invalid query".into())), + None => return Some(Err("query not found".into())), + }; + let data = match self + .graph + .object_for_subject_predicate(&n, &qt::DATA) + .unwrap() + { + Some(Term::NamedNode(q)) => Some(q.into()), + _ => None, + }; + (query, data) + } Some(_) => return Some(Err("invalid action".into())), - None => return Some(Err("action not found".into())), + None => { + return Some(Err( + format!("action not found for test {}", test_subject).into() + )) + } }; let result = match self .graph .object_for_subject_predicate(&test_subject, &*mf::RESULT) .unwrap() { - Some(Term::NamedNode(n)) => Some(n.url().clone()), + Some(Term::NamedNode(n)) => Some(n.into()), Some(_) => return Some(Err("invalid result".into())), None => None, }; @@ -238,7 +431,8 @@ impl<'a> Iterator for TestManifest<'a> { kind, name, comment, - action, + query, + data, result, })) } @@ -247,7 +441,7 @@ impl<'a> Iterator for TestManifest<'a> { match self.manifests_to_do.pop() { Some(url) => { let manifest = NamedOrBlankNode::from(NamedNode::new(url.clone())); - match self.client.load_turtle(url) { + match self.client.load_graph(url) { Ok(g) => g .iter() .unwrap() @@ -289,7 +483,7 @@ impl<'a> Iterator for TestManifest<'a> { Some(term) => { return Some(Err( format!("Invalid tests list. Got term {}", term).into() - )) + )); } None => (), }