Adds beginning of SPARQL evaluation

pull/10/head
Tpt 6 years ago
parent cdafcfc2cc
commit 7ed4252ad8
  1. 4
      src/model/dataset.rs
  2. 6
      src/model/named_node.rs
  3. 155
      src/sparql/algebra.rs
  4. 2
      src/sparql/mod.rs
  5. 64
      src/sparql/model.rs
  6. 1
      src/sparql/parser.rs
  7. 42
      src/sparql/sparql_grammar.rustpeg
  8. 333
      src/sparql/xml_results.rs
  9. 1
      src/store/mod.rs
  10. 315
      src/store/sparql.rs
  11. 11
      src/store/store.rs
  12. 242
      tests/sparql_test_cases.rs

@ -1,5 +1,7 @@
use errors::*;
use model::*;
use sparql::algebra::QueryResult;
use std::io::Read;
/// Trait for [RDF graphs](https://www.w3.org/TR/rdf11-concepts/#dfn-graph)
pub trait Graph {
@ -148,4 +150,6 @@ pub trait Dataset {
fn len(&self) -> Result<usize>;
fn is_empty(&self) -> Result<bool>;
fn query(&self, query: impl Read) -> Result<QueryResult>;
}

@ -48,6 +48,12 @@ impl From<Url> for NamedNode {
}
}
impl From<NamedNode> for Url {
fn from(named_node: NamedNode) -> Self {
Arc::try_unwrap(named_node.iri).unwrap_or_else(|iri| (*iri).clone())
}
}
impl FromStr for NamedNode {
type Err = Error;

@ -1,9 +1,10 @@
use errors::*;
use model::*;
use sparql::model::*;
use std::collections::BTreeMap;
use std::collections::BTreeSet;
use std::fmt;
use std::ops::Add;
use store::MemoryGraph;
use utils::Escaper;
use uuid::Uuid;
@ -25,6 +26,13 @@ impl Variable {
_ => false,
}
}
pub fn name(&self) -> Result<&str> {
match self {
Variable::Variable { name } => Ok(name),
_ => Err(format!("The variable {} has no name", self).into()),
}
}
}
impl fmt::Display for Variable {
@ -78,16 +86,14 @@ impl From<Variable> for NamedNodeOrVariable {
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
pub enum TermOrVariable {
NamedNode(NamedNode),
Literal(Literal),
Term(Term),
Variable(Variable),
}
impl fmt::Display for TermOrVariable {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
TermOrVariable::NamedNode(node) => write!(f, "{}", node),
TermOrVariable::Literal(node) => write!(f, "{}", node),
TermOrVariable::Term(term) => write!(f, "{}", term),
TermOrVariable::Variable(var) => write!(f, "{}", var),
}
}
@ -95,7 +101,7 @@ impl fmt::Display for TermOrVariable {
impl From<NamedNode> for TermOrVariable {
fn from(node: NamedNode) -> Self {
TermOrVariable::NamedNode(node)
TermOrVariable::Term(node.into())
}
}
@ -107,7 +113,7 @@ impl From<BlankNode> for TermOrVariable {
impl From<Literal> for TermOrVariable {
fn from(literal: Literal) -> Self {
TermOrVariable::Literal(literal)
TermOrVariable::Term(literal.into())
}
}
@ -120,9 +126,9 @@ impl From<Variable> for TermOrVariable {
impl From<Term> for TermOrVariable {
fn from(term: Term) -> Self {
match term {
Term::NamedNode(node) => TermOrVariable::NamedNode(node),
Term::NamedNode(node) => TermOrVariable::Term(node.into()),
Term::BlankNode(node) => TermOrVariable::Variable(node.into()),
Term::Literal(literal) => TermOrVariable::Literal(literal),
Term::Literal(literal) => TermOrVariable::Term(literal.into()),
}
}
}
@ -130,12 +136,87 @@ impl From<Term> for TermOrVariable {
impl From<NamedNodeOrVariable> for TermOrVariable {
fn from(element: NamedNodeOrVariable) -> Self {
match element {
NamedNodeOrVariable::NamedNode(node) => TermOrVariable::NamedNode(node),
NamedNodeOrVariable::NamedNode(node) => TermOrVariable::Term(node.into()),
NamedNodeOrVariable::Variable(var) => TermOrVariable::Variable(var),
}
}
}
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
pub struct StaticBindings {
variables: Vec<Variable>,
values: Vec<Vec<Option<Term>>>,
}
impl StaticBindings {
pub fn new(variables: Vec<Variable>, values: Vec<Vec<Option<Term>>>) -> Self {
Self { variables, values }
}
pub fn variables(&self) -> &[Variable] {
&*self.variables
}
pub fn variables_iter(&self) -> impl Iterator<Item = &Variable> {
self.variables.iter()
}
pub fn values_iter(&self) -> impl Iterator<Item = &Vec<Option<Term>>> {
self.values.iter()
}
pub fn into_iterator(self) -> BindingsIterator {
BindingsIterator {
variables: self.variables,
iter: Box::new(self.values.into_iter().map(Ok)),
}
}
pub fn is_empty(&self) -> bool {
self.values.is_empty()
}
}
impl Default for StaticBindings {
fn default() -> Self {
StaticBindings {
variables: Vec::default(),
values: Vec::default(),
}
}
}
pub struct BindingsIterator {
variables: Vec<Variable>,
iter: Box<dyn Iterator<Item = Result<Vec<Option<Term>>>>>,
}
impl BindingsIterator {
pub fn new(
variables: Vec<Variable>,
iter: Box<dyn Iterator<Item = Result<Vec<Option<Term>>>>>,
) -> Self {
Self { variables, iter }
}
pub fn variables(&self) -> &[Variable] {
&*self.variables
}
pub fn into_values_iter(self) -> Box<dyn Iterator<Item = Result<Vec<Option<Term>>>>> {
self.iter
}
pub fn destruct(
self,
) -> (
Vec<Variable>,
Box<dyn Iterator<Item = Result<Vec<Option<Term>>>>>,
) {
(self.variables, self.iter)
}
}
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
pub struct TriplePattern {
pub subject: TermOrVariable,
@ -1061,7 +1142,7 @@ impl fmt::Display for GroupPattern {
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
pub enum ListPattern {
Data(Vec<Binding>),
Data(StaticBindings),
ToList(MultiSetPattern),
OrderBy(Box<ListPattern>, Vec<OrderComparator>),
Project(Box<ListPattern>, Vec<Variable>),
@ -1073,14 +1154,20 @@ pub enum ListPattern {
impl fmt::Display for ListPattern {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
ListPattern::Data(bs) => write!(
f,
"{{ {} }}",
bs.iter()
.map(|c| c.to_string())
.collect::<Vec<String>>()
.join(" ")
),
ListPattern::Data(bs) => {
let variables = bs.variables();
write!(f, "{{ ")?;
for values in bs.values_iter() {
write!(f, "{{")?;
for i in 0..values.len() {
if let Some(ref val) = values[i] {
write!(f, " {} → {} ", variables[i], val)?;
}
}
write!(f, "}}")?;
}
write!(f, "}}")
}
ListPattern::ToList(l) => write!(f, "{}", l),
ListPattern::OrderBy(l, o) => write!(
f,
@ -1117,7 +1204,7 @@ impl fmt::Display for ListPattern {
impl Default for ListPattern {
fn default() -> Self {
ListPattern::Data(Vec::default())
ListPattern::Data(StaticBindings::default())
}
}
@ -1136,13 +1223,7 @@ impl ListPattern {
fn add_visible_variables<'a>(&'a self, vars: &mut BTreeSet<&'a Variable>) {
match self {
ListPattern::Data(b) => {
for binding in b {
for (var, _) in binding {
vars.insert(var);
}
}
}
ListPattern::Data(b) => vars.extend(b.variables_iter()),
ListPattern::ToList(p) => p.add_visible_variables(vars),
ListPattern::OrderBy(l, _) => l.add_visible_variables(vars),
ListPattern::Project(_, pv) => vars.extend(pv.iter()),
@ -1164,18 +1245,18 @@ impl<'a> fmt::Display for SparqlListPattern<'a> {
ListPattern::Data(bs) => if bs.is_empty() {
Ok(())
} else {
let vars: Vec<&Variable> = bs[0].iter().map(|(v, _)| v).collect();
write!(f, "VALUES ( ")?;
for var in &vars {
for var in bs.variables() {
write!(f, "{} ", var)?;
}
write!(f, ") {{ ")?;
for b in bs {
for values in bs.values_iter() {
write!(f, "( ")?;
for var in &vars {
b.get(var)
.map(|v| write!(f, "{} ", v))
.unwrap_or_else(|| write!(f, "UNDEF "))?;
for val in values {
match val {
Some(val) => write!(f, "{} ", val),
None => write!(f, "UNDEF "),
}?;
}
write!(f, ") ")?;
}
@ -1573,3 +1654,9 @@ impl fmt::Display for Query {
}
}
}
pub enum QueryResult {
Bindings(BindingsIterator),
Boolean(bool),
Graph(MemoryGraph),
}

@ -1,3 +1,3 @@
pub mod algebra;
pub mod model;
pub mod parser;
pub mod xml_results;

@ -1,64 +0,0 @@
use model::*;
use sparql::algebra::TermOrVariable;
use sparql::algebra::Variable;
use std::collections::BTreeMap;
use std::fmt;
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
pub struct Binding(BTreeMap<Variable, Term>);
impl Binding {
pub fn insert(&mut self, var: Variable, value: Term) {
self.0.insert(var, value);
}
pub fn get<'a>(&'a self, key: &'a Variable) -> Option<&'a Term> {
self.0.get(key)
}
pub fn get_or_constant<'a>(&'a self, key: &'a TermOrVariable) -> Option<Term> {
match key {
TermOrVariable::NamedNode(node) => Some(node.clone().into()),
TermOrVariable::Literal(literal) => Some(literal.clone().into()),
TermOrVariable::Variable(v) => self.get(v).cloned(),
}
}
pub fn iter(&self) -> <&BTreeMap<Variable, Term> as IntoIterator>::IntoIter {
self.0.iter()
}
}
impl Default for Binding {
fn default() -> Self {
Binding(BTreeMap::default())
}
}
impl IntoIterator for Binding {
type Item = (Variable, Term);
type IntoIter = <BTreeMap<Variable, Term> as IntoIterator>::IntoIter;
fn into_iter(self) -> <BTreeMap<Variable, Term> as IntoIterator>::IntoIter {
self.0.into_iter()
}
}
impl<'a> IntoIterator for &'a Binding {
type Item = (&'a Variable, &'a Term);
type IntoIter = <&'a BTreeMap<Variable, Term> as IntoIterator>::IntoIter;
fn into_iter(self) -> <&'a BTreeMap<Variable, Term> as IntoIterator>::IntoIter {
self.0.iter()
}
}
impl fmt::Display for Binding {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{{")?;
for (var, val) in self {
write!(f, " {} → {} ", var, val)?;
}
write!(f, "}}")
}
}

@ -15,7 +15,6 @@ mod grammar {
use rio::utils::unescape_characters;
use rio::utils::unescape_unicode_codepoints;
use sparql::algebra::*;
use sparql::model::*;
use std::borrow::Cow;
use std::collections::BTreeMap;
use std::collections::HashMap;

@ -312,28 +312,14 @@ DataBlock -> MultiSetPattern = l:(InlineDataOneVar / InlineDataFull) {
}
//[63]
InlineDataOneVar -> Vec<Binding> = var:Var _ '{' _ d:InlineDataOneVar_value* '}' {
d.into_iter().map(|val| {
let mut bindings = Binding::default();
if let Some(v) = val {
bindings.insert(var.clone(), v);
}
bindings
}).collect()
InlineDataOneVar -> StaticBindings = var:Var _ '{' _ d:InlineDataOneVar_value* '}' {
StaticBindings::new(vec![var], d)
}
InlineDataOneVar_value -> Option<Term> = t:DataBlockValue { t }
InlineDataOneVar_value -> Vec<Option<Term>> = t:DataBlockValue { vec![t] }
//[64]
InlineDataFull -> Vec<Binding> = '(' _ vars:InlineDataFull_var* _ ')' _ '{' _ val:InlineDataFull_values* '}' {
val.into_iter().map(|vals| {
let mut bindings = Binding::default();
for (var, val) in vars.iter().zip(vals.into_iter()) {
if let Some(v) = val {
bindings.insert(var.clone(), v);
}
}
bindings
}).collect()
InlineDataFull -> StaticBindings = '(' _ vars:InlineDataFull_var* _ ')' _ '{' _ val:InlineDataFull_values* '}' {
StaticBindings::new(vars, val)
}
InlineDataFull_var -> Variable = v:Var _ { v }
InlineDataFull_values -> Vec<Option<Term>> = '(' _ v:InlineDataFull_value* _ ')' _ { v }
@ -700,7 +686,7 @@ GraphTerm -> Term =
l:NumericLiteral { l.into() } /
l:BooleanLiteral { l.into() } /
b:BlankNode { b.into() } /
NIL { BlankNode::default().into() }
NIL { rdf::NIL.clone().into() }
//[110]
Expression -> Expression = e:ConditionalOrExpression {e}
@ -962,28 +948,28 @@ LANGTAG -> &'input str = "@" l:$([a-zA-Z]+ ("-" [a-zA-Z0-9]+)*) {
INTEGER -> () = [0-9]+
//[147]
DECIMAL -> () = [0-9]* '.' [0-9]+
DECIMAL -> () = ([0-9]+ "." [0-9]* / [0-9]* "." [0-9]+)
//[148]
DOUBLE -> () = ([0-9]+ "."? [0-9]* / "." [0-9]+) EXPONENT
DOUBLE -> () = ([0-9]+ "." [0-9]* / "." [0-9]+ / [0-9]+) EXPONENT
//[149]
INTEGER_POSITIVE -> () = '+' _ INTEGER
INTEGER_POSITIVE -> () = "+" _ INTEGER
//[150]
DECIMAL_POSITIVE -> () = '+' _ DECIMAL
DECIMAL_POSITIVE -> () = "+" _ DECIMAL
//[151]
DOUBLE_POSITIVE -> () = '+' _ DOUBLE
DOUBLE_POSITIVE -> () = "+" _ DOUBLE
//[152]
INTEGER_NEGATIVE -> () = '-' _ INTEGER
INTEGER_NEGATIVE -> () = "-" _ INTEGER
//[153]
DECIMAL_NEGATIVE -> () = '-' _ DECIMAL
DECIMAL_NEGATIVE -> () = "-" _ DECIMAL
//[154]
DOUBLE_NEGATIVE -> () = '-' _ DOUBLE
DOUBLE_NEGATIVE -> () = "-" _ DOUBLE
//[155]
EXPONENT -> () = [eE] [+-]? [0-9]+

@ -0,0 +1,333 @@
use errors::*;
use model::*;
use quick_xml::events::Event;
use quick_xml::Reader;
use sparql::algebra::BindingsIterator;
use sparql::algebra::QueryResult;
use sparql::algebra::Variable;
use std::collections::BTreeMap;
use std::io::BufRead;
use std::iter::empty;
use std::str::FromStr;
pub fn read_xml_results(source: impl BufRead + 'static) -> Result<QueryResult> {
enum State {
Start,
Sparql,
Head,
AfterHead,
Boolean,
}
let mut reader = Reader::from_reader(source);
reader.trim_text(true);
let mut buffer = Vec::default();
let mut namespace_buffer = Vec::default();
let mut variables: Vec<String> = Vec::default();
let mut state = State::Start;
//Read header
loop {
let event = {
let (ns, event) = reader.read_namespaced_event(&mut buffer, &mut namespace_buffer)?;
if let Some(ns) = ns {
if ns != b"http://www.w3.org/2005/sparql-results#".as_ref() {
return Err(format!(
"Unexpected namespace found in RDF/XML query result: {}",
reader.decode(ns)
).into());
}
}
event
};
match event {
Event::Start(event) => match state {
State::Start => {
if event.name() == b"sparql" {
state = State::Sparql;
} else {
return Err(format!("Expecting <sparql> tag, found {}", reader.decode(event.name())).into());
}
}
State::Sparql => {
if event.name() == b"head" {
state = State::Head;
} else {
return Err(format!("Expecting <head> tag, found {}", reader.decode(event.name())).into());
}
}
State::Head => if event.name() == b"variable" || event.name() == b"link" {
return Err("<variable> and <link> tag should be autoclosing".into());
} else {
return Err(format!("Expecting <variable> or <link> tag, found {}", reader.decode(event.name())).into());
},
State::AfterHead => {
if event.name() == b"boolean" {
state = State::Boolean
} else if event.name() == b"results" {
let mut mapping = BTreeMap::default();
for (i,var) in variables.iter().enumerate() {
mapping.insert(var.as_bytes().to_vec(), i);
}
return Ok(QueryResult::Bindings(BindingsIterator::new(
variables.into_iter().map(Variable::new).collect(),
Box::new(ResultsIterator {
reader,
buffer: Vec::default(),
namespace_buffer,
mapping,
bnodes_map: BTreeMap::default(),
}),
)));
} else if event.name() != b"link" && event.name() != b"results" && event.name() != b"boolean" {
return Err(format!("Expecting sparql tag, found {}", reader.decode(event.name())).into());
}
}
State::Boolean => return Err(format!("Unexpected tag inside of <boolean> tag: {}", reader.decode(event.name())).into())
},
Event::Empty(event) => match state {
State::Head => {
if event.name() == b"variable" {
let name = event.attributes()
.filter(|attr| attr.is_ok())
.map(|attr| attr.unwrap())
.find(|attr| attr.key == b"name")
.ok_or("No name attribute found for the <variable> tag");
variables.push(name?.unescape_and_decode_value(&reader)?);
} else if event.name() == b"link" {
// no op
} else {
return Err(format!("Expecting <variable> or <link> tag, found {}", reader.decode(event.name())).into());
}
},
State::AfterHead => {
if event.name() == b"results" {
return Ok(QueryResult::Bindings(BindingsIterator::new(
variables.into_iter().map(Variable::new).collect(),
Box::new(empty()),
)))
} else {
return Err(format!("Unexpected autoclosing tag <{}>", reader.decode(event.name())).into())
}
}
_ => return Err(format!("Unexpected autoclosing tag <{}>", reader.decode(event.name())).into())
},
Event::Text(event) => {
let value = event.unescaped()?;
return match state {
State::Boolean => {
return if value.as_ref() == b"true" {
Ok(QueryResult::Boolean(true))
} else if value.as_ref() == b"false" {
Ok(QueryResult::Boolean(false))
} else {
Err(format!("Unexpected boolean value. Found {}", reader.decode(&value)).into())
};
}
_ => Err(format!("Unexpected textual value found: {}", reader.decode(&value)).into())
};
},
Event::End(_) => match state {
State::Head => state = State::AfterHead,
_ => {
return Err("Unexpected early file end. All results file should have a <head> and a <result> or <boolean> tag".into());
}
},
Event::Eof => return Err("Unexpected early file end. All results file should have a <head> and a <result> or <boolean> tag".into()),
_ => (),
}
}
}
struct ResultsIterator<R: BufRead> {
reader: Reader<R>,
buffer: Vec<u8>,
namespace_buffer: Vec<u8>,
mapping: BTreeMap<Vec<u8>, usize>,
bnodes_map: BTreeMap<Vec<u8>, BlankNode>,
}
impl<R: BufRead> Iterator for ResultsIterator<R> {
type Item = Result<Vec<Option<Term>>>;
fn next(&mut self) -> Option<Result<Vec<Option<Term>>>> {
enum State {
Start,
Result,
Binding,
Uri,
BNode,
Literal,
End,
}
let mut state = State::Start;
let mut new_bindings = Vec::default();
new_bindings.resize(self.mapping.len(), None);
let mut current_var = None;
let mut term: Option<Term> = None;
let mut lang = None;
let mut datatype = None;
loop {
let (ns, event) = match self
.reader
.read_namespaced_event(&mut self.buffer, &mut self.namespace_buffer)
{
Ok(v) => v,
Err(error) => return Some(Err(error.into())),
};
if let Some(ns) = ns {
if ns != b"http://www.w3.org/2005/sparql-results#".as_ref() {
return Some(Err(format!(
"Unexpected namespace found in RDF/XML query result: {}",
self.reader.decode(ns)
).into()));
}
}
match event {
Event::Start(event) => match state {
State::Start => if event.name() == b"result" {
state = State::Result;
} else {
return Some(Err(format!(
"Expecting <result>, found {}",
self.reader.decode(event.name())
).into()));
},
State::Result => if event.name() == b"binding" {
match event
.attributes()
.filter(|attr| attr.is_ok())
.map(|attr| attr.unwrap())
.find(|attr| attr.key == b"name")
{
Some(attr) => match attr.unescaped_value() {
Ok(var) => current_var = Some(var.to_vec()),
Err(error) => return Some(Err(error.into())),
},
None => {
return Some(Err(
"No name attribute found for the <binding> tag".into()
))
}
}
state = State::Binding;
} else {
return Some(Err(format!(
"Expecting <binding>, found {}",
self.reader.decode(event.name())
).into()));
},
State::Binding => {
if term.is_some() {
return Some(Err(
"There is already a value for the current binding".into()
));
}
if event.name() == b"uri" {
state = State::Uri;
} else if event.name() == b"bnode" {
state = State::BNode;
} else if event.name() == b"literal" {
for attr in event.attributes() {
if let Ok(attr) = attr {
if attr.key == b"xml:lang" {
match attr.unescape_and_decode_value(&self.reader) {
Ok(val) => lang = Some(val),
Err(error) => return Some(Err(error.into())),
}
} else if attr.key == b"datatype" {
match attr.unescaped_value() {
Ok(val) => {
match NamedNode::from_str(&self.reader.decode(&val))
{
Ok(dt) => datatype = Some(dt),
Err(error) => return Some(Err(error)),
}
}
Err(error) => return Some(Err(error.into())),
}
}
}
}
state = State::Literal;
} else {
return Some(Err(format!(
"Expecting <uri>, <bnode> or <literal> found {}",
self.reader.decode(event.name())
).into()));
}
}
_ => (),
},
Event::Text(event) => match event.unescaped() {
Ok(data) => match state {
State::Uri => match NamedNode::from_str(&self.reader.decode(&data)) {
Ok(named_node) => term = Some(named_node.into()),
Err(error) => return Some(Err(error)),
},
State::BNode => {
term = Some(
self.bnodes_map
.entry(data.to_vec())
.or_insert_with(BlankNode::default)
.clone()
.into(),
)
}
State::Literal => {
let value = self.reader.decode(&data).to_string();
term = Some(
match datatype {
Some(ref datatype) => {
Literal::new_typed_literal(value, datatype.clone())
}
None => match lang {
Some(ref lang) => Literal::new_language_tagged_literal(
value,
lang.clone(),
),
None => Literal::new_simple_literal(value),
},
}.into(),
)
}
_ => {
return Some(Err(format!(
"Unexpected textual value found: {}",
self.reader.decode(&data)
).into()))
}
},
Err(error) => return Some(Err(error.into())),
},
Event::End(_) => match state {
State::Start => state = State::End,
State::Result => return Some(Ok(new_bindings)),
State::Binding => {
match (&current_var, &term) {
(Some(var), Some(term)) => {
new_bindings[self.mapping[var]] = Some(term.clone())
}
(Some(var), None) => {
return Some(Err(format!(
"No variable found for variable {}",
self.reader.decode(&var)
).into()))
}
_ => return Some(Err("No name found for <binding> tag".into())),
}
term = None;
state = State::Result;
}
State::Uri | State::BNode | State::Literal => state = State::Binding,
_ => (),
},
Event::Eof => return None,
_ => (),
}
}
}
}

@ -2,6 +2,7 @@ pub mod isomorphism;
mod memory;
mod numeric_encoder;
mod rocksdb;
mod sparql;
mod store;
pub use store::memory::MemoryDataset;

@ -0,0 +1,315 @@
use errors::*;
use sparql::algebra::*;
use std::iter::once;
use std::iter::Iterator;
use std::sync::Arc;
use store::numeric_encoder::EncodedTerm;
use store::store::EncodedQuadsStore;
type EncodedBinding = Vec<Option<EncodedTerm>>;
struct EncodedBindingsIterator {
variables: Vec<Variable>,
iter: Box<dyn Iterator<Item = Result<EncodedBinding>>>,
}
impl EncodedBindingsIterator {
fn take(self, n: usize) -> Self {
EncodedBindingsIterator {
variables: self.variables,
iter: Box::new(self.iter.take(n)),
}
}
fn skip(self, n: usize) -> Self {
EncodedBindingsIterator {
variables: self.variables,
iter: Box::new(self.iter.skip(n)),
}
}
fn project(self, on_variables: Vec<Variable>) -> Self {
let EncodedBindingsIterator { variables, iter } = self;
let projection: Vec<(usize, usize)> = on_variables
.iter()
.enumerate()
.flat_map(|(new_pos, v)| slice_key(&variables, v).map(|old_pos| (old_pos, new_pos)))
.collect();
let new_len = on_variables.len();
EncodedBindingsIterator {
variables: on_variables,
iter: Box::new(iter.map(move |binding| {
let binding = binding?;
let mut new_binding = Vec::with_capacity(new_len);
new_binding.resize(new_len, None);
for (old_pos, new_pos) in &projection {
new_binding[*new_pos] = binding[*old_pos];
}
Ok(new_binding)
})),
}
}
}
impl Default for EncodedBindingsIterator {
fn default() -> Self {
EncodedBindingsIterator {
variables: Vec::default(),
iter: Box::new(once(Ok(Vec::default()))),
}
}
}
fn slice_key<T: Eq>(slice: &[T], element: &T) -> Option<usize> {
for (i, item) in slice.iter().enumerate() {
if item == element {
return Some(i);
}
}
None
}
pub struct SparqlEvaluator<S: EncodedQuadsStore> {
store: Arc<S>,
}
impl<S: EncodedQuadsStore> SparqlEvaluator<S> {
pub fn new(store: Arc<S>) -> Self {
Self { store }
}
pub fn evaluate(&self, query: &Query) -> Result<QueryResult> {
match query {
Query::SelectQuery { algebra, dataset } => {
Ok(QueryResult::Bindings(self.decode_bindings(
self.eval_list_pattern(algebra, EncodedBindingsIterator::default())?,
)))
}
_ => unimplemented!(),
}
}
fn eval_list_pattern(
&self,
pattern: &ListPattern,
from: EncodedBindingsIterator,
) -> Result<EncodedBindingsIterator> {
match pattern {
ListPattern::Data(bs) => Ok(self.encode_bindings(bs)),
ListPattern::ToList(l) => self.eval_multi_set_pattern(l, from),
ListPattern::OrderBy(l, o) => self.eval_list_pattern(l, from), //TODO
ListPattern::Project(l, new_variables) => Ok(self
.eval_list_pattern(l, from)?
.project(new_variables.to_vec())),
ListPattern::Distinct(l) => self.eval_list_pattern(l, from), //TODO
ListPattern::Reduced(l) => self.eval_list_pattern(l, from),
ListPattern::Slice(l, start, length) => {
let mut iter = self.eval_list_pattern(l, from)?;
if *start > 0 {
iter = iter.skip(*start);
}
if let Some(length) = length {
iter = iter.take(*length);
}
Ok(iter)
}
}
}
fn eval_multi_set_pattern(
&self,
pattern: &MultiSetPattern,
from: EncodedBindingsIterator,
) -> Result<EncodedBindingsIterator> {
match pattern {
MultiSetPattern::BGP(p) => {
let mut iter = from;
for pattern in p {
iter = match pattern {
TripleOrPathPattern::Triple(pattern) => {
self.eval_triple_pattern(pattern, iter)
}
TripleOrPathPattern::Path(pattern) => self.eval_path_pattern(pattern, iter),
}?;
}
Ok(iter)
}
MultiSetPattern::Join(a, b) => {
self.eval_multi_set_pattern(b, self.eval_multi_set_pattern(a, from)?)
}
MultiSetPattern::LeftJoin(a, b, e) => unimplemented!(),
MultiSetPattern::Filter(e, p) => unimplemented!(),
MultiSetPattern::Union(a, b) => unimplemented!(),
MultiSetPattern::Graph(g, p) => unimplemented!(),
MultiSetPattern::Extend(p, v, e) => unimplemented!(),
MultiSetPattern::Minus(a, b) => unimplemented!(),
MultiSetPattern::ToMultiSet(l) => self.eval_list_pattern(l, from),
MultiSetPattern::Service(n, p, s) => unimplemented!(),
MultiSetPattern::AggregateJoin(g, a) => unimplemented!(),
}
}
fn eval_triple_pattern(
&self,
pattern: &TriplePattern,
from: EncodedBindingsIterator,
) -> Result<EncodedBindingsIterator> {
let EncodedBindingsIterator {
mut variables,
iter: from_iter,
} = from;
let subject =
self.binding_value_lookup_from_term_or_variable(&pattern.subject, &mut variables)?;
let predicate = self
.binding_value_lookup_from_named_node_or_variable(&pattern.predicate, &mut variables)?;
let object =
self.binding_value_lookup_from_term_or_variable(&pattern.object, &mut variables)?;
let store = self.store.clone();
let variables_len = variables.len();
Ok(EncodedBindingsIterator {
variables,
iter: Box::new(from_iter.flat_map(move |binding| {
let result: Box<dyn Iterator<Item = Result<EncodedBinding>>> = match binding {
Ok(mut binding) => {
match store.quads_for_pattern(
subject.get(&binding),
predicate.get(&binding),
object.get(&binding),
None, //TODO
) {
Ok(iter) => Box::new(iter.map(move |quad| {
let quad = quad?;
let mut binding = binding.clone();
binding.resize(variables_len, None);
subject.put(quad.subject, &mut binding);
predicate.put(quad.predicate, &mut binding);
object.put(quad.object, &mut binding);
Ok(binding)
})),
Err(error) => Box::new(once(Err(error))),
}
}
Err(error) => Box::new(once(Err(error))),
};
result
})),
})
}
fn eval_path_pattern(
&self,
pattern: &PathPattern,
from: EncodedBindingsIterator,
) -> Result<EncodedBindingsIterator> {
unimplemented!()
}
fn binding_value_lookup_from_term_or_variable(
&self,
term_or_variable: &TermOrVariable,
variables: &mut Vec<Variable>,
) -> Result<BindingValueLookup> {
Ok(match term_or_variable {
TermOrVariable::Term(term) => {
BindingValueLookup::Constant(self.store.encoder().encode_term(term)?)
}
TermOrVariable::Variable(variable) => {
BindingValueLookup::Variable(match slice_key(variables, variable) {
Some(key) => key,
None => {
variables.push(variable.clone());
variables.len() - 1
}
})
}
})
}
fn binding_value_lookup_from_named_node_or_variable(
&self,
named_node_or_variable: &NamedNodeOrVariable,
variables: &mut Vec<Variable>,
) -> Result<BindingValueLookup> {
Ok(match named_node_or_variable {
NamedNodeOrVariable::NamedNode(named_node) => {
BindingValueLookup::Constant(self.store.encoder().encode_named_node(named_node)?)
}
NamedNodeOrVariable::Variable(variable) => {
BindingValueLookup::Variable(match slice_key(variables, variable) {
Some(key) => key,
None => {
variables.push(variable.clone());
variables.len() - 1
}
})
}
})
}
fn encode_bindings(&self, bindings: &StaticBindings) -> EncodedBindingsIterator {
let encoder = self.store.encoder();
let encoded_values: Vec<Result<EncodedBinding>> = bindings
.values_iter()
.map(move |values| {
let mut result = Vec::with_capacity(values.len());
for value in values {
result.push(match value {
Some(term) => Some(encoder.encode_term(term)?),
None => None,
});
}
Ok(result)
}).collect();
EncodedBindingsIterator {
variables: bindings.variables().to_vec(),
iter: Box::new(encoded_values.into_iter()),
}
}
fn decode_bindings(&self, iter: EncodedBindingsIterator) -> BindingsIterator {
let store = self.store.clone();
let EncodedBindingsIterator { variables, iter } = iter;
BindingsIterator::new(
variables,
Box::new(iter.map(move |values| {
let values = values?;
let encoder = store.encoder();
let mut result = Vec::with_capacity(values.len());
for value in values {
result.push(match value {
Some(term) => Some(encoder.decode_term(term)?),
None => None,
});
}
Ok(result)
})),
)
}
}
#[derive(Clone, Copy)]
enum BindingValueLookup {
Constant(EncodedTerm),
Variable(usize),
}
impl BindingValueLookup {
fn get(&self, binding: &[Option<EncodedTerm>]) -> Option<EncodedTerm> {
match self {
BindingValueLookup::Constant(term) => Some(*term),
BindingValueLookup::Variable(v) => if *v < binding.len() {
binding[*v]
} else {
None
},
}
}
fn put(&self, value: EncodedTerm, binding: &mut EncodedBinding) {
match self {
BindingValueLookup::Constant(_) => (),
BindingValueLookup::Variable(v) => binding[*v] = Some(value),
}
}
}

@ -1,16 +1,20 @@
use errors::*;
use model::*;
use sparql::algebra::QueryResult;
use sparql::parser::read_sparql_query;
use std::fmt;
use std::io::Read;
use std::iter::empty;
use std::iter::once;
use std::iter::FromIterator;
use std::iter::Iterator;
use std::sync::Arc;
use store::numeric_encoder::*;
use store::sparql::SparqlEvaluator;
/// Defines the Store traits that is used to have efficient binary storage
pub trait EncodedQuadsStore: BytesStore + Sized {
pub trait EncodedQuadsStore: BytesStore + Sized + 'static {
type QuadsIterator: Iterator<Item = Result<EncodedQuad>> + 'static;
type QuadsForSubjectIterator: Iterator<Item = Result<EncodedQuad>> + 'static;
type QuadsForSubjectPredicateIterator: Iterator<Item = Result<EncodedQuad>> + 'static;
@ -345,6 +349,11 @@ impl<S: EncodedQuadsStore> Dataset for StoreDataset<S> {
fn is_empty(&self) -> Result<bool> {
Ok(self.store.quads()?.any(|_| true))
}
fn query(&self, query: impl Read) -> Result<QueryResult> {
let query = read_sparql_query(query, None)?;
SparqlEvaluator::new(self.store.clone()).evaluate(&query)
}
}
impl<S: EncodedQuadsStore> fmt::Display for StoreDataset<S> {

@ -1,5 +1,4 @@
///! Integration tests based on [SPARQL 1.1 Test Cases](https://www.w3.org/2009/sparql/docs/tests/README.html)
#[macro_use]
extern crate lazy_static;
extern crate reqwest;
@ -12,11 +11,14 @@ use rudf::errors::*;
use rudf::model::vocab::rdf;
use rudf::model::vocab::rdfs;
use rudf::model::*;
use rudf::rio::ntriples::read_ntriples;
use rudf::rio::turtle::read_turtle;
use rudf::rio::xml::read_rdf_xml;
use rudf::sparql::algebra::Query;
use rudf::sparql::algebra::QueryResult;
use rudf::sparql::parser::read_sparql_query;
use rudf::sparql::xml_results::read_xml_results;
use rudf::store::isomorphism::GraphIsomorphism;
use rudf::store::MemoryDataset;
use rudf::store::MemoryGraph;
use std::error::Error;
use std::fmt;
@ -37,7 +39,6 @@ fn sparql_w3c_syntax_testsuite() {
//TODO: Deserialization of the serialization failing:
NamedNode::from_str("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/syntax-sparql2/manifest#syntax-form-construct04").unwrap(),
NamedNode::from_str("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/syntax-sparql2/manifest#syntax-function-04").unwrap(),
NamedNode::from_str("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/syntax-sparql1/manifest#syntax-lit-08").unwrap(),
NamedNode::from_str("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/syntax-sparql1/manifest#syntax-qname-04").unwrap(),
];
let client = RDFClient::default();
@ -50,7 +51,7 @@ fn sparql_w3c_syntax_testsuite() {
continue;
}
if test.kind == "PositiveSyntaxTest" || test.kind == "PositiveSyntaxTest11" {
match client.load_sparql_query(test.action.clone()) {
match client.load_sparql_query(test.query.clone()) {
Err(error) => assert!(false, "Failure on {} with error: {}", test, error),
Ok(query) => {
if let Err(error) = read_sparql_query(query.to_string().as_bytes(), None) {
@ -66,7 +67,7 @@ fn sparql_w3c_syntax_testsuite() {
}
} else if test.kind == "NegativeSyntaxTest" || test.kind == "NegativeSyntaxTest11" {
//TODO
if let Ok(result) = client.load_sparql_query(test.action.clone()) {
if let Ok(result) = client.load_sparql_query(test.query.clone()) {
eprintln!("Failure on {}. The output tree is: {}", test, result);
}
} else {
@ -75,6 +76,63 @@ fn sparql_w3c_syntax_testsuite() {
}
}
#[test]
fn sparql_w3c_query_evaluation_testsuite() {
let manifest_10_url =
Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/basic/manifest.ttl")
.unwrap();
let test_blacklist = vec![];
let client = RDFClient::default();
for test_result in TestManifest::new(&client, manifest_10_url) {
let test = test_result.unwrap();
if test_blacklist.contains(&test.id) {
continue;
}
if test.kind == "QueryEvaluationTest" {
let data = match &test.data {
Some(data) => {
let dataset = MemoryDataset::default();
let dataset_default = dataset.default_graph();
client
.load_graph(data.clone())
.unwrap()
.iter()
.unwrap()
.for_each(|triple| dataset_default.insert(&triple.unwrap()).unwrap());
dataset
}
None => MemoryDataset::default(),
};
match data.query(client.get(&test.query).unwrap()) {
Err(error) => assert!(
false,
"Failure to parse query of {} with error: {}",
test, error
),
Ok(result) => {
let actual_graph = to_graph(result).unwrap();
let expected_graph = client
.load_sparql_query_result_graph(test.result.clone().unwrap())
.unwrap();
assert!(
actual_graph.is_isomorphic(&expected_graph).unwrap(),
"Failure on {}. Expected file:\n{}\nOutput file:\n{}\nParsed query:\n{}\nData:\n{}\n",
test,
expected_graph,
actual_graph,
client.load_sparql_query(test.query.clone()).unwrap(),
data
)
}
}
} else {
assert!(false, "Not supported test: {}", test);
}
}
}
pub struct RDFClient {
client: Client,
}
@ -88,20 +146,26 @@ impl Default for RDFClient {
}
impl RDFClient {
pub fn load_turtle(&self, url: Url) -> Result<MemoryGraph> {
Ok(read_turtle(self.get(&url)?, Some(url))?.collect())
}
pub fn load_ntriples(&self, url: Url) -> Result<MemoryGraph> {
read_ntriples(self.get(&url)?).collect()
fn load_graph(&self, url: Url) -> Result<MemoryGraph> {
if url.as_str().ends_with(".ttl") {
Ok(read_turtle(self.get(&url)?, Some(url))?.collect())
} else if url.as_str().ends_with(".rdf") {
read_rdf_xml(BufReader::new(self.get(&url)?), Some(url)).collect()
} else {
Err(format!("Serialization type not found for {}", url).into())
}
}
pub fn load_rdf_xml(&self, url: Url) -> Result<MemoryGraph> {
read_rdf_xml(BufReader::new(self.get(&url)?), Some(url)).collect()
fn load_sparql_query(&self, url: Url) -> Result<Query> {
read_sparql_query(self.get(&url)?, Some(url))
}
pub fn load_sparql_query(&self, url: Url) -> Result<Query> {
read_sparql_query(self.get(&url)?, Some(url))
fn load_sparql_query_result_graph(&self, url: Url) -> Result<MemoryGraph> {
if url.as_str().ends_with(".srx") {
to_graph(read_xml_results(BufReader::new(self.get(&url)?))?)
} else {
self.load_graph(url)
}
}
fn get(&self, url: &Url) -> Result<Response> {
@ -117,12 +181,96 @@ impl RDFClient {
}
}
mod rs {
use rudf::model::NamedNode;
use std::str::FromStr;
lazy_static! {
pub static ref RESULT_SET: NamedNode =
NamedNode::from_str("http://www.w3.org/2001/sw/DataAccess/tests/result-set#ResultSet")
.unwrap();
pub static ref RESULT_VARIABLE: NamedNode = NamedNode::from_str(
"http://www.w3.org/2001/sw/DataAccess/tests/result-set#resultVariable"
).unwrap();
pub static ref SOLUTION: NamedNode =
NamedNode::from_str("http://www.w3.org/2001/sw/DataAccess/tests/result-set#solution")
.unwrap();
pub static ref BINDING: NamedNode =
NamedNode::from_str("http://www.w3.org/2001/sw/DataAccess/tests/result-set#binding")
.unwrap();
pub static ref VALUE: NamedNode =
NamedNode::from_str("http://www.w3.org/2001/sw/DataAccess/tests/result-set#value")
.unwrap();
pub static ref VARIABLE: NamedNode =
NamedNode::from_str("http://www.w3.org/2001/sw/DataAccess/tests/result-set#variable")
.unwrap();
pub static ref INDEX: NamedNode =
NamedNode::from_str("http://www.w3.org/2001/sw/DataAccess/tests/result-set#index")
.unwrap();
}
}
fn to_graph(result: QueryResult) -> Result<MemoryGraph> {
match result {
QueryResult::Graph(graph) => Ok(graph),
QueryResult::Boolean(_) => unimplemented!(),
QueryResult::Bindings(bindings) => {
let graph = MemoryGraph::default();
let result_set = BlankNode::default();
graph.insert(&Triple::new(
result_set.clone(),
rdf::TYPE.clone(),
rs::RESULT_SET.clone(),
))?;
let (variables, iter) = bindings.destruct();
for variable in &variables {
graph.insert(&Triple::new(
result_set.clone(),
rs::RESULT_VARIABLE.clone(),
Literal::from(variable.name()?),
))?;
}
for binding_values in iter {
let binding_values = binding_values?;
let solution = BlankNode::default();
graph.insert(&Triple::new(
result_set.clone(),
rs::SOLUTION.clone(),
solution.clone(),
))?;
for i in 0..variables.len() {
if let Some(ref value) = binding_values[i] {
let binding = BlankNode::default();
graph.insert(&Triple::new(
solution.clone(),
rs::BINDING.clone(),
binding.clone(),
))?;
graph.insert(&Triple::new(
binding.clone(),
rs::VALUE.clone(),
value.clone(),
))?;
graph.insert(&Triple::new(
binding.clone(),
rs::VARIABLE.clone(),
Literal::from(variables[i].name()?),
))?;
}
}
}
Ok(graph)
}
}
}
pub struct Test {
pub id: NamedNode,
pub kind: String,
pub name: Option<String>,
pub comment: Option<String>,
pub action: Url,
pub query: Url,
pub data: Option<Url>,
pub result: Option<Url>,
}
@ -135,7 +283,13 @@ impl fmt::Display for Test {
for comment in &self.comment {
write!(f, " with comment \"{}\"", comment)?;
}
write!(f, " on file \"{}\"", self.action)?;
write!(f, " on query {}", self.query)?;
for data in &self.data {
write!(f, " with data {}", data)?;
}
for result in &self.result {
write!(f, " and expected result {}", result)?;
}
Ok(())
}
}
@ -181,6 +335,20 @@ pub mod mf {
}
}
pub mod qt {
use rudf::model::NamedNode;
use std::str::FromStr;
lazy_static! {
pub static ref QUERY: NamedNode =
NamedNode::from_str("http://www.w3.org/2001/sw/DataAccess/tests/test-query#query")
.unwrap();
pub static ref DATA: NamedNode =
NamedNode::from_str("http://www.w3.org/2001/sw/DataAccess/tests/test-query#data")
.unwrap();
}
}
impl<'a> Iterator for TestManifest<'a> {
type Item = Result<Test>;
@ -215,21 +383,46 @@ impl<'a> Iterator for TestManifest<'a> {
Some(Term::Literal(c)) => Some(c.value().to_string()),
_ => None,
};
let action = match self
let (query, data) = match self
.graph
.object_for_subject_predicate(&test_subject, &*mf::ACTION)
.unwrap()
{
Some(Term::NamedNode(n)) => n.url().clone(),
Some(Term::NamedNode(n)) => (n.into(), None),
Some(Term::BlankNode(n)) => {
let n = n.into();
let query = match self
.graph
.object_for_subject_predicate(&n, &qt::QUERY)
.unwrap()
{
Some(Term::NamedNode(q)) => q.into(),
Some(_) => return Some(Err("invalid query".into())),
None => return Some(Err("query not found".into())),
};
let data = match self
.graph
.object_for_subject_predicate(&n, &qt::DATA)
.unwrap()
{
Some(Term::NamedNode(q)) => Some(q.into()),
_ => None,
};
(query, data)
}
Some(_) => return Some(Err("invalid action".into())),
None => return Some(Err("action not found".into())),
None => {
return Some(Err(
format!("action not found for test {}", test_subject).into()
))
}
};
let result = match self
.graph
.object_for_subject_predicate(&test_subject, &*mf::RESULT)
.unwrap()
{
Some(Term::NamedNode(n)) => Some(n.url().clone()),
Some(Term::NamedNode(n)) => Some(n.into()),
Some(_) => return Some(Err("invalid result".into())),
None => None,
};
@ -238,7 +431,8 @@ impl<'a> Iterator for TestManifest<'a> {
kind,
name,
comment,
action,
query,
data,
result,
}))
}
@ -247,7 +441,7 @@ impl<'a> Iterator for TestManifest<'a> {
match self.manifests_to_do.pop() {
Some(url) => {
let manifest = NamedOrBlankNode::from(NamedNode::new(url.clone()));
match self.client.load_turtle(url) {
match self.client.load_graph(url) {
Ok(g) => g
.iter()
.unwrap()
@ -289,7 +483,7 @@ impl<'a> Iterator for TestManifest<'a> {
Some(term) => {
return Some(Err(
format!("Invalid tests list. Got term {}", term).into()
))
));
}
None => (),
}

Loading…
Cancel
Save