diff --git a/src/rio/ntriples/ntriples_grammar.rustpeg b/src/rio/ntriples/ntriples_grammar.rustpeg index 6f5993ba..ec081bde 100644 --- a/src/rio/ntriples/ntriples_grammar.rustpeg +++ b/src/rio/ntriples/ntriples_grammar.rustpeg @@ -33,7 +33,7 @@ object -> Term = literal -> Literal = v: STRING_LITERAL_QUOTE _ "^^" _ t:IRIREF { Literal::new_typed_literal(v, t) } / v: STRING_LITERAL_QUOTE _ l:LANGTAG { Literal::new_language_tagged_literal(v, l) } / - v: STRING_LITERAL_QUOTE { v.into() } + v: STRING_LITERAL_QUOTE { Literal::new_simple_literal(v) } //[144s] diff --git a/src/rio/turtle/turtle_grammar.rustpeg b/src/rio/turtle/turtle_grammar.rustpeg index 9dc78e0a..2e1cd70a 100644 --- a/src/rio/turtle/turtle_grammar.rustpeg +++ b/src/rio/turtle/turtle_grammar.rustpeg @@ -128,7 +128,7 @@ NumericLiteral -> Literal = RDFLiteral -> Literal = v:String _ "^^" _ t:iri { Literal::new_typed_literal(v, t) } / v:String _ l:LANGTAG { Literal::new_language_tagged_literal(v, l) } / - v:String { v.into() } + v:String { Literal::new_simple_literal(v) } //[133s] BooleanLiteral -> Literal = diff --git a/src/sparql/sparql_grammar.rustpeg b/src/sparql/sparql_grammar.rustpeg index 66b82bf0..2dadb2cc 100644 --- a/src/sparql/sparql_grammar.rustpeg +++ b/src/sparql/sparql_grammar.rustpeg @@ -864,7 +864,7 @@ iriOrFunction -> Expression = i: iri _ a: ArgList? { RDFLiteral -> Literal = v:String _ "^^" _ t:iri { Literal::new_typed_literal(v, t) } / v:String _ l:LANGTAG { Literal::new_language_tagged_literal(v, l) } / - v:String { v.into() } + v:String { Literal::new_simple_literal(v) } //[130] NumericLiteral -> Literal = NumericLiteralUnsigned / NumericLiteralPositive / NumericLiteralNegative diff --git a/src/sparql/xml_results.rs b/src/sparql/xml_results.rs index 02d42d04..8bba79df 100644 --- a/src/sparql/xml_results.rs +++ b/src/sparql/xml_results.rs @@ -279,20 +279,7 @@ impl Iterator for ResultsIterator { } State::Literal => { let value = self.reader.decode(&data).to_string(); - term = Some( - match datatype { - Some(ref datatype) => { - Literal::new_typed_literal(value, datatype.clone()) - } - None => match lang { - Some(ref lang) => Literal::new_language_tagged_literal( - value, - lang.clone(), - ), - None => Literal::new_simple_literal(value), - }, - }.into(), - ) + term = Some(build_literal(value, &lang, &datatype).into()); } _ => { return Some(Err(format!( @@ -322,7 +309,14 @@ impl Iterator for ResultsIterator { term = None; state = State::Result; } - State::Uri | State::BNode | State::Literal => state = State::Binding, + State::Uri | State::BNode => state = State::Binding, + State::Literal => { + if term.is_none() { + //We default to the empty literal + term = Some(build_literal("", &lang, &datatype).into()) + } + state = State::Binding; + } _ => (), }, Event::Eof => return None, @@ -331,3 +325,17 @@ impl Iterator for ResultsIterator { } } } + +fn build_literal( + value: impl Into, + lang: &Option, + datatype: &Option, +) -> Literal { + match datatype { + Some(datatype) => Literal::new_typed_literal(value, datatype.clone()), + None => match lang { + Some(lang) => Literal::new_language_tagged_literal(value, lang.clone()), + None => Literal::new_simple_literal(value), + }, + } +} diff --git a/src/store/numeric_encoder.rs b/src/store/numeric_encoder.rs index 2d5a919d..20757131 100644 --- a/src/store/numeric_encoder.rs +++ b/src/store/numeric_encoder.rs @@ -19,8 +19,9 @@ pub trait BytesStore { const TYPE_DEFAULT_GRAPH_ID: u8 = 0; const TYPE_NAMED_NODE_ID: u8 = 1; const TYPE_BLANK_NODE_ID: u8 = 2; -const TYPE_LANG_STRING_LITERAL_ID: u8 = 3; -const TYPE_TYPED_LITERAL_ID: u8 = 4; +const TYPE_SIMPLE_LITERAL_ID: u8 = 3; +const TYPE_LANG_STRING_LITERAL_ID: u8 = 4; +const TYPE_TYPED_LITERAL_ID: u8 = 5; pub static ENCODED_DEFAULT_GRAPH: EncodedTerm = EncodedTerm::DefaultGraph {}; @@ -29,6 +30,7 @@ pub enum EncodedTerm { DefaultGraph {}, NamedNode { iri_id: u64 }, BlankNode(Uuid), + SimpleLiteral { value_id: u64 }, LangStringLiteral { value_id: u64, language_id: u64 }, TypedLiteral { value_id: u64, datatype_id: u64 }, } @@ -39,6 +41,7 @@ impl EncodedTerm { EncodedTerm::DefaultGraph { .. } => TYPE_DEFAULT_GRAPH_ID, EncodedTerm::NamedNode { .. } => TYPE_NAMED_NODE_ID, EncodedTerm::BlankNode(_) => TYPE_BLANK_NODE_ID, + EncodedTerm::SimpleLiteral { .. } => TYPE_SIMPLE_LITERAL_ID, EncodedTerm::LangStringLiteral { .. } => TYPE_LANG_STRING_LITERAL_ID, EncodedTerm::TypedLiteral { .. } => TYPE_TYPED_LITERAL_ID, } @@ -88,6 +91,9 @@ impl TermReader for R { self.read_exact(&mut uuid_buffer)?; Ok(EncodedTerm::BlankNode(Uuid::from_bytes(uuid_buffer))) } + TYPE_SIMPLE_LITERAL_ID => Ok(EncodedTerm::SimpleLiteral { + value_id: self.read_u64::()?, + }), TYPE_LANG_STRING_LITERAL_ID => Ok(EncodedTerm::LangStringLiteral { language_id: self.read_u64::()?, value_id: self.read_u64::()?, @@ -154,6 +160,9 @@ impl TermWriter for R { EncodedTerm::DefaultGraph {} => {} EncodedTerm::NamedNode { iri_id } => self.write_u64::(iri_id)?, EncodedTerm::BlankNode(id) => self.write_all(id.as_bytes())?, + EncodedTerm::SimpleLiteral { value_id } => { + self.write_u64::(value_id)?; + } EncodedTerm::LangStringLiteral { value_id, language_id, @@ -217,17 +226,23 @@ impl Encoder { } pub fn encode_literal(&self, literal: &Literal) -> Result { - if let Some(language) = literal.language() { - Ok(EncodedTerm::LangStringLiteral { - value_id: self.encode_str_value(&literal.value())?, - language_id: self.encode_str_value(language)?, - }) + Ok(if literal.is_plain() { + if let Some(language) = literal.language() { + EncodedTerm::LangStringLiteral { + value_id: self.encode_str_value(&literal.value())?, + language_id: self.encode_str_value(language)?, + } + } else { + EncodedTerm::SimpleLiteral { + value_id: self.encode_str_value(&literal.value())?, + } + } } else { - Ok(EncodedTerm::TypedLiteral { + EncodedTerm::TypedLiteral { value_id: self.encode_str_value(&literal.value())?, datatype_id: self.encode_str_value(literal.datatype().as_str())?, - }) - } + } + }) } pub fn encode_named_or_blank_node(&self, term: &NamedOrBlankNode) -> Result { @@ -277,6 +292,9 @@ impl Encoder { Ok(NamedNode::from(self.decode_url_value(iri_id)?).into()) } EncodedTerm::BlankNode(id) => Ok(BlankNode::from(id).into()), + EncodedTerm::SimpleLiteral { value_id } => { + Ok(Literal::new_simple_literal(self.decode_str_value(value_id)?).into()) + } EncodedTerm::LangStringLiteral { value_id, language_id, diff --git a/src/store/sparql.rs b/src/store/sparql.rs index 98dd165a..2f91aec2 100644 --- a/src/store/sparql.rs +++ b/src/store/sparql.rs @@ -1,4 +1,5 @@ use sparql::algebra::*; +use std::collections::BTreeSet; use std::iter::once; use std::iter::Iterator; use std::sync::Arc; @@ -49,6 +50,85 @@ impl EncodedBindingsIterator { })), } } + + fn unique(self) -> Self { + let EncodedBindingsIterator { variables, iter } = self; + let mut oks = BTreeSet::default(); + let mut errors = Vec::default(); + for element in iter { + match element { + Ok(ok) => { + oks.insert(ok); + } + Err(error) => errors.push(error), + } + } + EncodedBindingsIterator { + variables, + iter: Box::new(errors.into_iter().map(Err).chain(oks.into_iter().map(Ok))), + } + } + + fn chain(self, other: Self) -> Self { + let EncodedBindingsIterator { + variables: variables1, + iter: iter1, + } = self; + let EncodedBindingsIterator { + variables: variables2, + iter: iter2, + } = other; + + let mut variables = variables1; + let mut map_2_to_1 = Vec::with_capacity(variables2.len()); + for var in variables2 { + map_2_to_1.push(match slice_key(&variables, &var) { + Some(key) => key, + None => { + variables.push(var); + variables.len() - 1 + } + }) + } + let variables_len = variables.len(); + EncodedBindingsIterator { + variables, + iter: Box::new(iter1.chain(iter2.map(move |binding| { + let binding = binding?; + let mut new_binding = binding.clone(); + new_binding.resize(variables_len, None); + for (old_key, new_key) in map_2_to_1.iter().enumerate() { + new_binding[*new_key] = binding[old_key]; + } + Ok(new_binding) + }))), + } + } + + fn duplicate(self) -> (Self, Self) { + let EncodedBindingsIterator { variables, iter } = self; + //TODO: optimize + let mut oks = Vec::default(); + let mut errors = Vec::default(); + for element in iter { + match element { + Ok(ok) => { + oks.push(ok); + } + Err(error) => errors.push(error), + } + } + ( + EncodedBindingsIterator { + variables: variables.clone(), + iter: Box::new(oks.clone().into_iter().map(Ok)), + }, + EncodedBindingsIterator { + variables: variables, + iter: Box::new(errors.into_iter().map(Err).chain(oks.into_iter().map(Ok))), + }, + ) + } } impl Default for EncodedBindingsIterator { @@ -101,7 +181,7 @@ impl SparqlEvaluator { ListPattern::Project(l, new_variables) => Ok(self .eval_list_pattern(l, from)? .project(new_variables.to_vec())), - ListPattern::Distinct(l) => self.eval_list_pattern(l, from), //TODO + ListPattern::Distinct(l) => Ok(self.eval_list_pattern(l, from)?.unique()), ListPattern::Reduced(l) => self.eval_list_pattern(l, from), ListPattern::Slice(l, start, length) => { let mut iter = self.eval_list_pattern(l, from)?; @@ -139,7 +219,12 @@ impl SparqlEvaluator { } MultiSetPattern::LeftJoin(a, b, e) => unimplemented!(), MultiSetPattern::Filter(e, p) => unimplemented!(), - MultiSetPattern::Union(a, b) => unimplemented!(), + MultiSetPattern::Union(a, b) => { + let (from1, from2) = from.duplicate(); + Ok(self + .eval_multi_set_pattern(a, from1)? + .chain(self.eval_multi_set_pattern(b, from2)?)) + } MultiSetPattern::Graph(g, p) => unimplemented!(), MultiSetPattern::Extend(p, v, e) => unimplemented!(), MultiSetPattern::Minus(a, b) => unimplemented!(), diff --git a/tests/sparql_test_cases.rs b/tests/sparql_test_cases.rs index c1da9e12..3f1f9e00 100644 --- a/tests/sparql_test_cases.rs +++ b/tests/sparql_test_cases.rs @@ -83,8 +83,18 @@ fn sparql_w3c_query_evaluation_testsuite() { .unwrap(), Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/triple-match/manifest.ttl") .unwrap(), + Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/distinct/manifest.ttl") + .unwrap(), + ]; + let test_blacklist = vec![ + //With LeftJoin + NamedNode::from_str( + "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/distinct/manifest#distinct-4", + ).unwrap(), + NamedNode::from_str( + "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/distinct/manifest#no-distinct-4", + ).unwrap(), ]; - let test_blacklist = vec![]; let client = RDFClient::default(); for test_result in manifest_10_urls @@ -232,7 +242,7 @@ fn to_graph(result: QueryResult) -> Result { graph.insert(&Triple::new( result_set.clone(), rs::RESULT_VARIABLE.clone(), - Literal::from(variable.name()?), + Literal::new_simple_literal(variable.name()?), ))?; } for binding_values in iter { @@ -259,7 +269,7 @@ fn to_graph(result: QueryResult) -> Result { graph.insert(&Triple::new( binding.clone(), rs::VARIABLE.clone(), - Literal::from(variables[i].name()?), + Literal::new_simple_literal(variables[i].name()?), ))?; } }