Adds support of SPARQL DISTINCT and UNION evaluation

Uses the Literal::new_simple_literal constructors where relevant
pull/10/head
Tpt 6 years ago
parent 5cff79c372
commit c1ea33b5aa
  1. 2
      src/rio/ntriples/ntriples_grammar.rustpeg
  2. 2
      src/rio/turtle/turtle_grammar.rustpeg
  3. 2
      src/sparql/sparql_grammar.rustpeg
  4. 38
      src/sparql/xml_results.rs
  5. 30
      src/store/numeric_encoder.rs
  6. 89
      src/store/sparql.rs
  7. 16
      tests/sparql_test_cases.rs

@ -33,7 +33,7 @@ object -> Term =
literal -> Literal = literal -> Literal =
v: STRING_LITERAL_QUOTE _ "^^" _ t:IRIREF { Literal::new_typed_literal(v, t) } / v: STRING_LITERAL_QUOTE _ "^^" _ t:IRIREF { Literal::new_typed_literal(v, t) } /
v: STRING_LITERAL_QUOTE _ l:LANGTAG { Literal::new_language_tagged_literal(v, l) } / v: STRING_LITERAL_QUOTE _ l:LANGTAG { Literal::new_language_tagged_literal(v, l) } /
v: STRING_LITERAL_QUOTE { v.into() } v: STRING_LITERAL_QUOTE { Literal::new_simple_literal(v) }
//[144s] //[144s]

@ -128,7 +128,7 @@ NumericLiteral -> Literal =
RDFLiteral -> Literal = RDFLiteral -> Literal =
v:String _ "^^" _ t:iri { Literal::new_typed_literal(v, t) } / v:String _ "^^" _ t:iri { Literal::new_typed_literal(v, t) } /
v:String _ l:LANGTAG { Literal::new_language_tagged_literal(v, l) } / v:String _ l:LANGTAG { Literal::new_language_tagged_literal(v, l) } /
v:String { v.into() } v:String { Literal::new_simple_literal(v) }
//[133s] //[133s]
BooleanLiteral -> Literal = BooleanLiteral -> Literal =

@ -864,7 +864,7 @@ iriOrFunction -> Expression = i: iri _ a: ArgList? {
RDFLiteral -> Literal = RDFLiteral -> Literal =
v:String _ "^^" _ t:iri { Literal::new_typed_literal(v, t) } / v:String _ "^^" _ t:iri { Literal::new_typed_literal(v, t) } /
v:String _ l:LANGTAG { Literal::new_language_tagged_literal(v, l) } / v:String _ l:LANGTAG { Literal::new_language_tagged_literal(v, l) } /
v:String { v.into() } v:String { Literal::new_simple_literal(v) }
//[130] //[130]
NumericLiteral -> Literal = NumericLiteralUnsigned / NumericLiteralPositive / NumericLiteralNegative NumericLiteral -> Literal = NumericLiteralUnsigned / NumericLiteralPositive / NumericLiteralNegative

@ -279,20 +279,7 @@ impl<R: BufRead> Iterator for ResultsIterator<R> {
} }
State::Literal => { State::Literal => {
let value = self.reader.decode(&data).to_string(); let value = self.reader.decode(&data).to_string();
term = Some( term = Some(build_literal(value, &lang, &datatype).into());
match datatype {
Some(ref datatype) => {
Literal::new_typed_literal(value, datatype.clone())
}
None => match lang {
Some(ref lang) => Literal::new_language_tagged_literal(
value,
lang.clone(),
),
None => Literal::new_simple_literal(value),
},
}.into(),
)
} }
_ => { _ => {
return Some(Err(format!( return Some(Err(format!(
@ -322,7 +309,14 @@ impl<R: BufRead> Iterator for ResultsIterator<R> {
term = None; term = None;
state = State::Result; state = State::Result;
} }
State::Uri | State::BNode | State::Literal => state = State::Binding, State::Uri | State::BNode => state = State::Binding,
State::Literal => {
if term.is_none() {
//We default to the empty literal
term = Some(build_literal("", &lang, &datatype).into())
}
state = State::Binding;
}
_ => (), _ => (),
}, },
Event::Eof => return None, Event::Eof => return None,
@ -331,3 +325,17 @@ impl<R: BufRead> Iterator for ResultsIterator<R> {
} }
} }
} }
fn build_literal(
value: impl Into<String>,
lang: &Option<String>,
datatype: &Option<NamedNode>,
) -> Literal {
match datatype {
Some(datatype) => Literal::new_typed_literal(value, datatype.clone()),
None => match lang {
Some(lang) => Literal::new_language_tagged_literal(value, lang.clone()),
None => Literal::new_simple_literal(value),
},
}
}

@ -19,8 +19,9 @@ pub trait BytesStore {
const TYPE_DEFAULT_GRAPH_ID: u8 = 0; const TYPE_DEFAULT_GRAPH_ID: u8 = 0;
const TYPE_NAMED_NODE_ID: u8 = 1; const TYPE_NAMED_NODE_ID: u8 = 1;
const TYPE_BLANK_NODE_ID: u8 = 2; const TYPE_BLANK_NODE_ID: u8 = 2;
const TYPE_LANG_STRING_LITERAL_ID: u8 = 3; const TYPE_SIMPLE_LITERAL_ID: u8 = 3;
const TYPE_TYPED_LITERAL_ID: u8 = 4; const TYPE_LANG_STRING_LITERAL_ID: u8 = 4;
const TYPE_TYPED_LITERAL_ID: u8 = 5;
pub static ENCODED_DEFAULT_GRAPH: EncodedTerm = EncodedTerm::DefaultGraph {}; pub static ENCODED_DEFAULT_GRAPH: EncodedTerm = EncodedTerm::DefaultGraph {};
@ -29,6 +30,7 @@ pub enum EncodedTerm {
DefaultGraph {}, DefaultGraph {},
NamedNode { iri_id: u64 }, NamedNode { iri_id: u64 },
BlankNode(Uuid), BlankNode(Uuid),
SimpleLiteral { value_id: u64 },
LangStringLiteral { value_id: u64, language_id: u64 }, LangStringLiteral { value_id: u64, language_id: u64 },
TypedLiteral { value_id: u64, datatype_id: u64 }, TypedLiteral { value_id: u64, datatype_id: u64 },
} }
@ -39,6 +41,7 @@ impl EncodedTerm {
EncodedTerm::DefaultGraph { .. } => TYPE_DEFAULT_GRAPH_ID, EncodedTerm::DefaultGraph { .. } => TYPE_DEFAULT_GRAPH_ID,
EncodedTerm::NamedNode { .. } => TYPE_NAMED_NODE_ID, EncodedTerm::NamedNode { .. } => TYPE_NAMED_NODE_ID,
EncodedTerm::BlankNode(_) => TYPE_BLANK_NODE_ID, EncodedTerm::BlankNode(_) => TYPE_BLANK_NODE_ID,
EncodedTerm::SimpleLiteral { .. } => TYPE_SIMPLE_LITERAL_ID,
EncodedTerm::LangStringLiteral { .. } => TYPE_LANG_STRING_LITERAL_ID, EncodedTerm::LangStringLiteral { .. } => TYPE_LANG_STRING_LITERAL_ID,
EncodedTerm::TypedLiteral { .. } => TYPE_TYPED_LITERAL_ID, EncodedTerm::TypedLiteral { .. } => TYPE_TYPED_LITERAL_ID,
} }
@ -88,6 +91,9 @@ impl<R: Read> TermReader for R {
self.read_exact(&mut uuid_buffer)?; self.read_exact(&mut uuid_buffer)?;
Ok(EncodedTerm::BlankNode(Uuid::from_bytes(uuid_buffer))) Ok(EncodedTerm::BlankNode(Uuid::from_bytes(uuid_buffer)))
} }
TYPE_SIMPLE_LITERAL_ID => Ok(EncodedTerm::SimpleLiteral {
value_id: self.read_u64::<NetworkEndian>()?,
}),
TYPE_LANG_STRING_LITERAL_ID => Ok(EncodedTerm::LangStringLiteral { TYPE_LANG_STRING_LITERAL_ID => Ok(EncodedTerm::LangStringLiteral {
language_id: self.read_u64::<NetworkEndian>()?, language_id: self.read_u64::<NetworkEndian>()?,
value_id: self.read_u64::<NetworkEndian>()?, value_id: self.read_u64::<NetworkEndian>()?,
@ -154,6 +160,9 @@ impl<R: Write> TermWriter for R {
EncodedTerm::DefaultGraph {} => {} EncodedTerm::DefaultGraph {} => {}
EncodedTerm::NamedNode { iri_id } => self.write_u64::<NetworkEndian>(iri_id)?, EncodedTerm::NamedNode { iri_id } => self.write_u64::<NetworkEndian>(iri_id)?,
EncodedTerm::BlankNode(id) => self.write_all(id.as_bytes())?, EncodedTerm::BlankNode(id) => self.write_all(id.as_bytes())?,
EncodedTerm::SimpleLiteral { value_id } => {
self.write_u64::<NetworkEndian>(value_id)?;
}
EncodedTerm::LangStringLiteral { EncodedTerm::LangStringLiteral {
value_id, value_id,
language_id, language_id,
@ -217,17 +226,23 @@ impl<S: BytesStore> Encoder<S> {
} }
pub fn encode_literal(&self, literal: &Literal) -> Result<EncodedTerm> { pub fn encode_literal(&self, literal: &Literal) -> Result<EncodedTerm> {
Ok(if literal.is_plain() {
if let Some(language) = literal.language() { if let Some(language) = literal.language() {
Ok(EncodedTerm::LangStringLiteral { EncodedTerm::LangStringLiteral {
value_id: self.encode_str_value(&literal.value())?, value_id: self.encode_str_value(&literal.value())?,
language_id: self.encode_str_value(language)?, language_id: self.encode_str_value(language)?,
}) }
} else { } else {
Ok(EncodedTerm::TypedLiteral { EncodedTerm::SimpleLiteral {
value_id: self.encode_str_value(&literal.value())?,
}
}
} else {
EncodedTerm::TypedLiteral {
value_id: self.encode_str_value(&literal.value())?, value_id: self.encode_str_value(&literal.value())?,
datatype_id: self.encode_str_value(literal.datatype().as_str())?, datatype_id: self.encode_str_value(literal.datatype().as_str())?,
})
} }
})
} }
pub fn encode_named_or_blank_node(&self, term: &NamedOrBlankNode) -> Result<EncodedTerm> { pub fn encode_named_or_blank_node(&self, term: &NamedOrBlankNode) -> Result<EncodedTerm> {
@ -277,6 +292,9 @@ impl<S: BytesStore> Encoder<S> {
Ok(NamedNode::from(self.decode_url_value(iri_id)?).into()) Ok(NamedNode::from(self.decode_url_value(iri_id)?).into())
} }
EncodedTerm::BlankNode(id) => Ok(BlankNode::from(id).into()), EncodedTerm::BlankNode(id) => Ok(BlankNode::from(id).into()),
EncodedTerm::SimpleLiteral { value_id } => {
Ok(Literal::new_simple_literal(self.decode_str_value(value_id)?).into())
}
EncodedTerm::LangStringLiteral { EncodedTerm::LangStringLiteral {
value_id, value_id,
language_id, language_id,

@ -1,4 +1,5 @@
use sparql::algebra::*; use sparql::algebra::*;
use std::collections::BTreeSet;
use std::iter::once; use std::iter::once;
use std::iter::Iterator; use std::iter::Iterator;
use std::sync::Arc; use std::sync::Arc;
@ -49,6 +50,85 @@ impl EncodedBindingsIterator {
})), })),
} }
} }
fn unique(self) -> Self {
let EncodedBindingsIterator { variables, iter } = self;
let mut oks = BTreeSet::default();
let mut errors = Vec::default();
for element in iter {
match element {
Ok(ok) => {
oks.insert(ok);
}
Err(error) => errors.push(error),
}
}
EncodedBindingsIterator {
variables,
iter: Box::new(errors.into_iter().map(Err).chain(oks.into_iter().map(Ok))),
}
}
fn chain(self, other: Self) -> Self {
let EncodedBindingsIterator {
variables: variables1,
iter: iter1,
} = self;
let EncodedBindingsIterator {
variables: variables2,
iter: iter2,
} = other;
let mut variables = variables1;
let mut map_2_to_1 = Vec::with_capacity(variables2.len());
for var in variables2 {
map_2_to_1.push(match slice_key(&variables, &var) {
Some(key) => key,
None => {
variables.push(var);
variables.len() - 1
}
})
}
let variables_len = variables.len();
EncodedBindingsIterator {
variables,
iter: Box::new(iter1.chain(iter2.map(move |binding| {
let binding = binding?;
let mut new_binding = binding.clone();
new_binding.resize(variables_len, None);
for (old_key, new_key) in map_2_to_1.iter().enumerate() {
new_binding[*new_key] = binding[old_key];
}
Ok(new_binding)
}))),
}
}
fn duplicate(self) -> (Self, Self) {
let EncodedBindingsIterator { variables, iter } = self;
//TODO: optimize
let mut oks = Vec::default();
let mut errors = Vec::default();
for element in iter {
match element {
Ok(ok) => {
oks.push(ok);
}
Err(error) => errors.push(error),
}
}
(
EncodedBindingsIterator {
variables: variables.clone(),
iter: Box::new(oks.clone().into_iter().map(Ok)),
},
EncodedBindingsIterator {
variables: variables,
iter: Box::new(errors.into_iter().map(Err).chain(oks.into_iter().map(Ok))),
},
)
}
} }
impl Default for EncodedBindingsIterator { impl Default for EncodedBindingsIterator {
@ -101,7 +181,7 @@ impl<S: EncodedQuadsStore> SparqlEvaluator<S> {
ListPattern::Project(l, new_variables) => Ok(self ListPattern::Project(l, new_variables) => Ok(self
.eval_list_pattern(l, from)? .eval_list_pattern(l, from)?
.project(new_variables.to_vec())), .project(new_variables.to_vec())),
ListPattern::Distinct(l) => self.eval_list_pattern(l, from), //TODO ListPattern::Distinct(l) => Ok(self.eval_list_pattern(l, from)?.unique()),
ListPattern::Reduced(l) => self.eval_list_pattern(l, from), ListPattern::Reduced(l) => self.eval_list_pattern(l, from),
ListPattern::Slice(l, start, length) => { ListPattern::Slice(l, start, length) => {
let mut iter = self.eval_list_pattern(l, from)?; let mut iter = self.eval_list_pattern(l, from)?;
@ -139,7 +219,12 @@ impl<S: EncodedQuadsStore> SparqlEvaluator<S> {
} }
MultiSetPattern::LeftJoin(a, b, e) => unimplemented!(), MultiSetPattern::LeftJoin(a, b, e) => unimplemented!(),
MultiSetPattern::Filter(e, p) => unimplemented!(), MultiSetPattern::Filter(e, p) => unimplemented!(),
MultiSetPattern::Union(a, b) => unimplemented!(), MultiSetPattern::Union(a, b) => {
let (from1, from2) = from.duplicate();
Ok(self
.eval_multi_set_pattern(a, from1)?
.chain(self.eval_multi_set_pattern(b, from2)?))
}
MultiSetPattern::Graph(g, p) => unimplemented!(), MultiSetPattern::Graph(g, p) => unimplemented!(),
MultiSetPattern::Extend(p, v, e) => unimplemented!(), MultiSetPattern::Extend(p, v, e) => unimplemented!(),
MultiSetPattern::Minus(a, b) => unimplemented!(), MultiSetPattern::Minus(a, b) => unimplemented!(),

@ -83,8 +83,18 @@ fn sparql_w3c_query_evaluation_testsuite() {
.unwrap(), .unwrap(),
Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/triple-match/manifest.ttl") Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/triple-match/manifest.ttl")
.unwrap(), .unwrap(),
Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/distinct/manifest.ttl")
.unwrap(),
];
let test_blacklist = vec![
//With LeftJoin
NamedNode::from_str(
"http://www.w3.org/2001/sw/DataAccess/tests/data-r2/distinct/manifest#distinct-4",
).unwrap(),
NamedNode::from_str(
"http://www.w3.org/2001/sw/DataAccess/tests/data-r2/distinct/manifest#no-distinct-4",
).unwrap(),
]; ];
let test_blacklist = vec![];
let client = RDFClient::default(); let client = RDFClient::default();
for test_result in manifest_10_urls for test_result in manifest_10_urls
@ -232,7 +242,7 @@ fn to_graph(result: QueryResult) -> Result<MemoryGraph> {
graph.insert(&Triple::new( graph.insert(&Triple::new(
result_set.clone(), result_set.clone(),
rs::RESULT_VARIABLE.clone(), rs::RESULT_VARIABLE.clone(),
Literal::from(variable.name()?), Literal::new_simple_literal(variable.name()?),
))?; ))?;
} }
for binding_values in iter { for binding_values in iter {
@ -259,7 +269,7 @@ fn to_graph(result: QueryResult) -> Result<MemoryGraph> {
graph.insert(&Triple::new( graph.insert(&Triple::new(
binding.clone(), binding.clone(),
rs::VARIABLE.clone(), rs::VARIABLE.clone(),
Literal::from(variables[i].name()?), Literal::new_simple_literal(variables[i].name()?),
))?; ))?;
} }
} }

Loading…
Cancel
Save