JSON SPARQL results: allows the "head" key to be at the end of the document

Does not do streaming parsing in this case
pull/435/head
Tpt 2 years ago committed by Thomas Tanon
parent c40c81447e
commit bdb803dab5
  1. 502
      lib/sparesults/src/json.rs
  2. 16
      testsuite/oxigraph-tests/sparql-results/late_head.srj
  3. 16
      testsuite/oxigraph-tests/sparql-results/late_head_expected.srj
  4. 9
      testsuite/oxigraph-tests/sparql-results/manifest.ttl
  5. 16
      testsuite/oxigraph-tests/sparql-results/typed_literal_expected.srj
  6. 27
      testsuite/src/sparql_evaluator.rs

@ -7,6 +7,7 @@ use oxrdf::Variable;
use oxrdf::*; use oxrdf::*;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::io::{self, BufRead, Write}; use std::io::{self, BufRead, Write};
use std::mem::take;
/// This limit is set in order to avoid stack overflow error when parsing nested triples due to too many recursive calls. /// This limit is set in order to avoid stack overflow error when parsing nested triples due to too many recursive calls.
/// The actual limit value is a wet finger compromise between not failing to parse valid files and avoiding to trigger stack overflow errors. /// The actual limit value is a wet finger compromise between not failing to parse valid files and avoiding to trigger stack overflow errors.
@ -138,6 +139,8 @@ impl<R: BufRead> JsonQueryResultsReader<R> {
let mut reader = JsonReader::from_reader(source); let mut reader = JsonReader::from_reader(source);
let mut buffer = Vec::default(); let mut buffer = Vec::default();
let mut variables = None; let mut variables = None;
let mut buffered_bindings: Option<Vec<_>> = None;
let mut output_iter = None;
if reader.read_event(&mut buffer)? != JsonEvent::StartObject { if reader.read_event(&mut buffer)? != JsonEvent::StartObject {
return Err(SyntaxError::msg("SPARQL JSON results should be an object").into()); return Err(SyntaxError::msg("SPARQL JSON results should be an object").into());
@ -148,7 +151,24 @@ impl<R: BufRead> JsonQueryResultsReader<R> {
match event { match event {
JsonEvent::ObjectKey(key) => match key { JsonEvent::ObjectKey(key) => match key {
"head" => { "head" => {
variables = Some(read_head(&mut reader, &mut buffer)?); let extracted_variables = read_head(&mut reader, &mut buffer)?;
if let Some(buffered_bindings) = buffered_bindings.take() {
let mut mapping = BTreeMap::default();
for (i, var) in extracted_variables.iter().enumerate() {
mapping.insert(var.as_str().to_string(), i);
}
output_iter = Some(Self::Solutions {
variables: extracted_variables,
solutions: JsonSolutionsReader {
kind: JsonSolutionsReaderKind::Buffered {
bindings: buffered_bindings.into_iter(),
},
mapping,
},
});
} else {
variables = Some(extracted_variables);
}
} }
"results" => { "results" => {
if reader.read_event(&mut buffer)? != JsonEvent::StartObject { if reader.read_event(&mut buffer)? != JsonEvent::StartObject {
@ -169,24 +189,45 @@ impl<R: BufRead> JsonQueryResultsReader<R> {
if reader.read_event(&mut buffer)? != JsonEvent::StartArray { if reader.read_event(&mut buffer)? != JsonEvent::StartArray {
return Err(SyntaxError::msg("'bindings' should be an object").into()); return Err(SyntaxError::msg("'bindings' should be an object").into());
} }
return if let Some(variables) = variables { if let Some(variables) = variables {
let mut mapping = BTreeMap::default(); let mut mapping = BTreeMap::default();
for (i, var) in variables.iter().enumerate() { for (i, var) in variables.iter().enumerate() {
mapping.insert(var.as_str().to_string(), i); mapping.insert(var.as_str().to_string(), i);
} }
Ok(Self::Solutions { return Ok(Self::Solutions {
variables, variables,
solutions: JsonSolutionsReader { solutions: JsonSolutionsReader {
reader, kind: JsonSolutionsReaderKind::Streaming { reader, buffer },
buffer,
mapping, mapping,
}, },
}) });
} else { } else {
Err(SyntaxError::msg( // We buffer all results before being able to read the header
"SPARQL tuple query results should contain a head key", let mut bindings = Vec::new();
) let mut variables = Vec::new();
.into()) let mut values = Vec::new();
loop {
match reader.read_event(&mut buffer)? {
JsonEvent::StartObject => (),
JsonEvent::EndObject => {
bindings.push((take(&mut variables), take(&mut values)));
}
JsonEvent::EndArray | JsonEvent::Eof => {
buffered_bindings = Some(bindings);
break;
}
JsonEvent::ObjectKey(key) => {
variables.push(key.to_string());
values.push(read_value(&mut reader, &mut buffer, 0)?);
}
_ => {
return Err(SyntaxError::msg(
"Invalid result serialization",
)
.into())
}
}
}
}; };
} }
"boolean" => { "boolean" => {
@ -203,17 +244,16 @@ impl<R: BufRead> JsonQueryResultsReader<R> {
.into()); .into());
} }
}, },
JsonEvent::EndObject => { JsonEvent::EndObject => (),
return Err(SyntaxError::msg(
"SPARQL results should contain a bindings key or a boolean key",
)
.into())
}
JsonEvent::Eof => { JsonEvent::Eof => {
return Err(SyntaxError::msg( return if let Some(output_iter) = output_iter {
"Unexpected end of JSON object without 'results' or 'boolean' key", Ok(output_iter)
) } else {
.into()) Err(SyntaxError::msg(
"Unexpected end of JSON object without 'results' or 'boolean' key",
)
.into())
}
} }
_ => return Err(SyntaxError::msg("Invalid SPARQL results serialization").into()), _ => return Err(SyntaxError::msg("Invalid SPARQL results serialization").into()),
} }
@ -222,226 +262,254 @@ impl<R: BufRead> JsonQueryResultsReader<R> {
} }
pub struct JsonSolutionsReader<R: BufRead> { pub struct JsonSolutionsReader<R: BufRead> {
reader: JsonReader<R>,
buffer: Vec<u8>,
mapping: BTreeMap<String, usize>, mapping: BTreeMap<String, usize>,
kind: JsonSolutionsReaderKind<R>,
}
enum JsonSolutionsReaderKind<R: BufRead> {
Streaming {
reader: JsonReader<R>,
buffer: Vec<u8>,
},
Buffered {
bindings: std::vec::IntoIter<(Vec<String>, Vec<Term>)>,
},
} }
impl<R: BufRead> JsonSolutionsReader<R> { impl<R: BufRead> JsonSolutionsReader<R> {
pub fn read_next(&mut self) -> Result<Option<Vec<Option<Term>>>, ParseError> { pub fn read_next(&mut self) -> Result<Option<Vec<Option<Term>>>, ParseError> {
let mut new_bindings = vec![None; self.mapping.len()]; match &mut self.kind {
loop { JsonSolutionsReaderKind::Streaming { reader, buffer } => {
match self.reader.read_event(&mut self.buffer)? { let mut new_bindings = vec![None; self.mapping.len()];
JsonEvent::StartObject => (), loop {
JsonEvent::EndObject => return Ok(Some(new_bindings)), match reader.read_event(buffer)? {
JsonEvent::EndArray | JsonEvent::Eof => return Ok(None), JsonEvent::StartObject => (),
JsonEvent::ObjectKey(key) => { JsonEvent::EndObject => return Ok(Some(new_bindings)),
let k = *self.mapping.get(key).ok_or_else(|| { JsonEvent::EndArray | JsonEvent::Eof => return Ok(None),
SyntaxError::msg(format!( JsonEvent::ObjectKey(key) => {
"The variable {key} has not been defined in the header" let k = *self.mapping.get(key).ok_or_else(|| {
)) SyntaxError::msg(format!(
})?; "The variable {key} has not been defined in the header"
new_bindings[k] = Some(self.read_value(0)?) ))
})?;
new_bindings[k] = Some(read_value(reader, buffer, 0)?)
}
_ => return Err(SyntaxError::msg("Invalid result serialization").into()),
}
} }
_ => return Err(SyntaxError::msg("Invalid result serialization").into()), }
JsonSolutionsReaderKind::Buffered { bindings } => {
Ok(if let Some((variables, values)) = bindings.next() {
let mut new_bindings = vec![None; self.mapping.len()];
for (variable, value) in variables.into_iter().zip(values) {
let k = *self.mapping.get(&variable).ok_or_else(|| {
SyntaxError::msg(format!(
"The variable {variable} has not been defined in the header"
))
})?;
new_bindings[k] = Some(value)
}
Some(new_bindings)
} else {
None
})
} }
} }
} }
}
fn read_value(&mut self, number_of_recursive_calls: usize) -> Result<Term, ParseError> { fn read_value<R: BufRead>(
if number_of_recursive_calls == MAX_NUMBER_OF_NESTED_TRIPLES { reader: &mut JsonReader<R>,
return Err(SyntaxError::msg(format!( buffer: &mut Vec<u8>,
"Too many nested triples ({MAX_NUMBER_OF_NESTED_TRIPLES}). The parser fails here to avoid a stack overflow." number_of_recursive_calls: usize,
)) ) -> Result<Term, ParseError> {
if number_of_recursive_calls == MAX_NUMBER_OF_NESTED_TRIPLES {
return Err(SyntaxError::msg(format!(
"Too many nested triples ({MAX_NUMBER_OF_NESTED_TRIPLES}). The parser fails here to avoid a stack overflow."
))
.into()); .into());
} }
enum Type { enum Type {
Uri, Uri,
BNode, BNode,
Literal, Literal,
#[cfg(feature = "rdf-star")]
Triple,
}
#[derive(Eq, PartialEq)]
enum State {
Type,
Value,
Lang,
Datatype,
}
let mut state = None;
let mut t = None;
let mut value = None;
let mut lang = None;
let mut datatype = None;
#[cfg(feature = "rdf-star")]
let mut subject = None;
#[cfg(feature = "rdf-star")]
let mut predicate = None;
#[cfg(feature = "rdf-star")] #[cfg(feature = "rdf-star")]
let mut object = None; Triple,
if self.reader.read_event(&mut self.buffer)? != JsonEvent::StartObject { }
return Err(SyntaxError::msg("Term serializations should be an object").into()); #[derive(Eq, PartialEq)]
} enum State {
loop { Type,
match self.reader.read_event(&mut self.buffer)? { Value,
JsonEvent::ObjectKey(key) => match key { Lang,
"type" => state = Some(State::Type), Datatype,
"value" => state = Some(State::Value), }
"xml:lang" => state = Some(State::Lang), let mut state = None;
"datatype" => state = Some(State::Datatype), let mut t = None;
#[cfg(feature = "rdf-star")] let mut value = None;
"subject" => subject = Some(self.read_value(number_of_recursive_calls + 1)?), let mut lang = None;
#[cfg(feature = "rdf-star")] let mut datatype = None;
"predicate" => { #[cfg(feature = "rdf-star")]
predicate = Some(self.read_value(number_of_recursive_calls + 1)?) let mut subject = None;
} #[cfg(feature = "rdf-star")]
#[cfg(feature = "rdf-star")] let mut predicate = None;
"object" => object = Some(self.read_value(number_of_recursive_calls + 1)?), #[cfg(feature = "rdf-star")]
_ => { let mut object = None;
return Err(SyntaxError::msg(format!( if reader.read_event(buffer)? != JsonEvent::StartObject {
"Unexpected key in term serialization: '{key}'" return Err(SyntaxError::msg("Term serializations should be an object").into());
)) }
.into()) loop {
} match reader.read_event(buffer)? {
}, JsonEvent::ObjectKey(key) => match key {
JsonEvent::StartObject => { "type" => state = Some(State::Type),
if state != Some(State::Value) { "value" => state = Some(State::Value),
return Err(SyntaxError::msg( "xml:lang" => state = Some(State::Lang),
"Unexpected nested object in term serialization", "datatype" => state = Some(State::Datatype),
) #[cfg(feature = "rdf-star")]
.into()); "subject" => {
} subject = Some(read_value(reader, buffer, number_of_recursive_calls + 1)?)
} }
JsonEvent::String(s) => match state { #[cfg(feature = "rdf-star")]
Some(State::Type) => { "predicate" => {
match s { predicate = Some(read_value(reader, buffer, number_of_recursive_calls + 1)?)
"uri" => t = Some(Type::Uri), }
"bnode" => t = Some(Type::BNode), #[cfg(feature = "rdf-star")]
"literal" | "typed-literal" => t = Some(Type::Literal), "object" => {
#[cfg(feature = "rdf-star")] object = Some(read_value(reader, buffer, number_of_recursive_calls + 1)?)
"triple" => t = Some(Type::Triple), }
_ => { _ => {
return Err(SyntaxError::msg(format!( return Err(SyntaxError::msg(format!(
"Unexpected term type: '{s}'" "Unexpected key in term serialization: '{key}'"
)) ))
.into()) .into())
} }
}; },
state = None; JsonEvent::StartObject => {
} if state != Some(State::Value) {
Some(State::Value) => { return Err(
value = Some(s.to_owned()); SyntaxError::msg("Unexpected nested object in term serialization").into(),
state = None; );
} }
Some(State::Lang) => { }
lang = Some(s.to_owned()); JsonEvent::String(s) => match state {
state = None; Some(State::Type) => {
} match s {
Some(State::Datatype) => { "uri" => t = Some(Type::Uri),
datatype = "bnode" => t = Some(Type::BNode),
Some(NamedNode::new(s).map_err(|e| { "literal" | "typed-literal" => t = Some(Type::Literal),
SyntaxError::msg(format!("Invalid datatype IRI: {e}")) #[cfg(feature = "rdf-star")]
})?); "triple" => t = Some(Type::Triple),
state = None; _ => {
} return Err(
_ => (), // impossible SyntaxError::msg(format!("Unexpected term type: '{s}'")).into()
},
JsonEvent::EndObject => {
if let Some(s) = state {
if s == State::Value {
state = None; //End of triple
} else {
return Err(SyntaxError::msg(
"Term description values should be string",
) )
.into());
} }
};
state = None;
}
Some(State::Value) => {
value = Some(s.to_owned());
state = None;
}
Some(State::Lang) => {
lang = Some(s.to_owned());
state = None;
}
Some(State::Datatype) => {
datatype = Some(
NamedNode::new(s)
.map_err(|e| SyntaxError::msg(format!("Invalid datatype IRI: {e}")))?,
);
state = None;
}
_ => (), // impossible
},
JsonEvent::EndObject => {
if let Some(s) = state {
if s == State::Value {
state = None; //End of triple
} else { } else {
return match t { return Err(
None => Err(SyntaxError::msg( SyntaxError::msg("Term description values should be string").into()
"Term serialization should have a 'type' key", );
) }
.into()), } else {
Some(Type::Uri) => Ok(NamedNode::new(value.ok_or_else(|| { return match t {
SyntaxError::msg("uri serialization should have a 'value' key") None => Err(SyntaxError::msg(
})?) "Term serialization should have a 'type' key",
.map_err(|e| SyntaxError::msg(format!("Invalid uri value: {e}")))? )
.into()), .into()),
Some(Type::BNode) => Ok(BlankNode::new(value.ok_or_else(|| { Some(Type::Uri) => Ok(NamedNode::new(value.ok_or_else(|| {
SyntaxError::msg("bnode serialization should have a 'value' key") SyntaxError::msg("uri serialization should have a 'value' key")
})?) })?)
.map_err(|e| SyntaxError::msg(format!("Invalid bnode value: {e}")))? .map_err(|e| SyntaxError::msg(format!("Invalid uri value: {e}")))?
.into()), .into()),
Some(Type::Literal) => { Some(Type::BNode) => Ok(BlankNode::new(value.ok_or_else(|| {
let value = value.ok_or_else(|| { SyntaxError::msg("bnode serialization should have a 'value' key")
SyntaxError::msg( })?)
"literal serialization should have a 'value' key", .map_err(|e| SyntaxError::msg(format!("Invalid bnode value: {e}")))?
) .into()),
})?; Some(Type::Literal) => {
Ok(match lang { let value = value.ok_or_else(|| {
Some(lang) => { SyntaxError::msg("literal serialization should have a 'value' key")
if let Some(datatype) = datatype { })?;
if datatype.as_ref() != rdf::LANG_STRING { Ok(match lang {
return Err(SyntaxError::msg(format!( Some(lang) => {
"xml:lang value '{lang}' provided with the datatype {datatype}" if let Some(datatype) = datatype {
)).into()) if datatype.as_ref() != rdf::LANG_STRING {
} return Err(SyntaxError::msg(format!(
} "xml:lang value '{lang}' provided with the datatype {datatype}"
Literal::new_language_tagged_literal(value, &lang).map_err(|e| { )).into())
SyntaxError::msg(format!("Invalid xml:lang value '{lang}': {e}"))
})?
}
None => if let Some(datatype) = datatype {
Literal::new_typed_literal(value, datatype)
} else {
Literal::new_simple_literal(value)
} }
} }
.into()) Literal::new_language_tagged_literal(value, &lang).map_err(|e| {
SyntaxError::msg(format!("Invalid xml:lang value '{lang}': {e}"))
})?
}
None => if let Some(datatype) = datatype {
Literal::new_typed_literal(value, datatype)
} else {
Literal::new_simple_literal(value)
}
} }
#[cfg(feature = "rdf-star")] .into())
Some(Type::Triple) => Ok(Triple::new( }
match subject.ok_or_else(|| { #[cfg(feature = "rdf-star")]
SyntaxError::msg( Some(Type::Triple) => Ok(Triple::new(
"triple serialization should have a 'subject' key", match subject.ok_or_else(|| {
) SyntaxError::msg("triple serialization should have a 'subject' key")
})? { })? {
Term::NamedNode(subject) => subject.into(), Term::NamedNode(subject) => subject.into(),
Term::BlankNode(subject) => subject.into(), Term::BlankNode(subject) => subject.into(),
Term::Triple(subject) => Subject::Triple(subject), Term::Triple(subject) => Subject::Triple(subject),
Term::Literal(_) => { Term::Literal(_) => {
return Err(SyntaxError::msg( return Err(SyntaxError::msg(
"The 'subject' value should not be a literal", "The 'subject' value should not be a literal",
)
.into())
}
},
match predicate.ok_or_else(|| {
SyntaxError::msg(
"triple serialization should have a 'predicate' key",
) )
})? { .into())
Term::NamedNode(predicate) => predicate, }
_ => { },
return Err(SyntaxError::msg( match predicate.ok_or_else(|| {
"The 'predicate' value should be a uri", SyntaxError::msg(
) "triple serialization should have a 'predicate' key",
.into()) )
} })? {
}, Term::NamedNode(predicate) => predicate,
object.ok_or_else(|| { _ => {
SyntaxError::msg( return Err(SyntaxError::msg(
"triple serialization should have a 'object' key", "The 'predicate' value should be a uri",
) )
})?, .into())
) }
.into()), },
}; object.ok_or_else(|| {
} SyntaxError::msg("triple serialization should have a 'object' key")
})?,
)
.into()),
};
} }
_ => return Err(SyntaxError::msg("Invalid term serialization").into()),
} }
_ => return Err(SyntaxError::msg("Invalid term serialization").into()),
} }
} }
} }

@ -0,0 +1,16 @@
{
"results": {
"bindings": [
{},
{
"s": {
"type": "literal",
"value": "foo"
}
}
]
},
"head": {
"vars": ["s"]
}
}

@ -0,0 +1,16 @@
{
"head": {
"vars": ["s"]
},
"results": {
"bindings": [
{},
{
"s": {
"type": "literal",
"value": "foo"
}
}
]
}
}

@ -14,6 +14,7 @@
:results_json_ignored_keys :results_json_ignored_keys
:results_xml_ignored_keys :results_xml_ignored_keys
:results_json_typed_literal :results_json_typed_literal
:late_head
) . ) .
:results_json_duplicated_variables rdf:type ox:NegativeJsonResultsSyntaxTest ; :results_json_duplicated_variables rdf:type ox:NegativeJsonResultsSyntaxTest ;
@ -38,4 +39,10 @@
:results_json_typed_literal rdf:type ox:PositiveJsonResultsSyntaxTest ; :results_json_typed_literal rdf:type ox:PositiveJsonResultsSyntaxTest ;
mf:name "typed-literal term type is allowed" ; mf:name "typed-literal term type is allowed" ;
mf:action <typed_literal.srj> . mf:action <typed_literal.srj> ;
mf:result <typed_literal_expected.srj> .
:late_head rdf:type ox:PositiveJsonResultsSyntaxTest ;
mf:name "head after the list of results" ;
mf:action <late_head.srj> ;
mf:result <late_head_expected.srj> .

@ -0,0 +1,16 @@
{
"head": {
"vars": ["s"]
},
"results": {
"bindings": [
{
"s": {
"type": "literal",
"value": "foo",
"datatype": "http://example.com/dt"
}
}
]
}
}

@ -127,22 +127,25 @@ fn evaluate_negative_tsv_result_syntax_test(test: &Test) -> Result<()> {
} }
fn result_syntax_check(test: &Test, format: QueryResultsFormat) -> Result<()> { fn result_syntax_check(test: &Test, format: QueryResultsFormat) -> Result<()> {
let results_file = test let action_file = test
.action .action
.as_deref() .as_deref()
.ok_or_else(|| anyhow!("No action found for test {test}"))?; .ok_or_else(|| anyhow!("No action found for test {test}"))?;
match QueryResults::read(Cursor::new(read_file_to_string(results_file)?), format)? { let actual_results = StaticQueryResults::from_query_results(
QueryResults::Solutions(solutions) => { QueryResults::read(Cursor::new(read_file_to_string(action_file)?), format)?,
for s in solutions { true,
s?; )?;
} if let Some(result_file) = test.result.as_deref() {
} let expected_results = StaticQueryResults::from_query_results(
QueryResults::Graph(triples) => { QueryResults::read(Cursor::new(read_file_to_string(result_file)?), format)?,
for t in triples { true,
t?; )?;
} if !are_query_results_isomorphic(&expected_results, &actual_results) {
bail!(
"Failure on {test}.\n{}\n",
results_diff(expected_results, actual_results),
);
} }
QueryResults::Boolean(_) => (),
} }
Ok(()) Ok(())
} }

Loading…
Cancel
Save