JSON SPARQL results: allows the "head" key to be at the end of the document

Does not do streaming parsing in this case
pull/435/head
Tpt 2 years ago committed by Thomas Tanon
parent c40c81447e
commit bdb803dab5
  1. 168
      lib/sparesults/src/json.rs
  2. 16
      testsuite/oxigraph-tests/sparql-results/late_head.srj
  3. 16
      testsuite/oxigraph-tests/sparql-results/late_head_expected.srj
  4. 9
      testsuite/oxigraph-tests/sparql-results/manifest.ttl
  5. 16
      testsuite/oxigraph-tests/sparql-results/typed_literal_expected.srj
  6. 27
      testsuite/src/sparql_evaluator.rs

@ -7,6 +7,7 @@ use oxrdf::Variable;
use oxrdf::*; use oxrdf::*;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::io::{self, BufRead, Write}; use std::io::{self, BufRead, Write};
use std::mem::take;
/// This limit is set in order to avoid stack overflow error when parsing nested triples due to too many recursive calls. /// This limit is set in order to avoid stack overflow error when parsing nested triples due to too many recursive calls.
/// The actual limit value is a wet finger compromise between not failing to parse valid files and avoiding to trigger stack overflow errors. /// The actual limit value is a wet finger compromise between not failing to parse valid files and avoiding to trigger stack overflow errors.
@ -138,6 +139,8 @@ impl<R: BufRead> JsonQueryResultsReader<R> {
let mut reader = JsonReader::from_reader(source); let mut reader = JsonReader::from_reader(source);
let mut buffer = Vec::default(); let mut buffer = Vec::default();
let mut variables = None; let mut variables = None;
let mut buffered_bindings: Option<Vec<_>> = None;
let mut output_iter = None;
if reader.read_event(&mut buffer)? != JsonEvent::StartObject { if reader.read_event(&mut buffer)? != JsonEvent::StartObject {
return Err(SyntaxError::msg("SPARQL JSON results should be an object").into()); return Err(SyntaxError::msg("SPARQL JSON results should be an object").into());
@ -148,7 +151,24 @@ impl<R: BufRead> JsonQueryResultsReader<R> {
match event { match event {
JsonEvent::ObjectKey(key) => match key { JsonEvent::ObjectKey(key) => match key {
"head" => { "head" => {
variables = Some(read_head(&mut reader, &mut buffer)?); let extracted_variables = read_head(&mut reader, &mut buffer)?;
if let Some(buffered_bindings) = buffered_bindings.take() {
let mut mapping = BTreeMap::default();
for (i, var) in extracted_variables.iter().enumerate() {
mapping.insert(var.as_str().to_string(), i);
}
output_iter = Some(Self::Solutions {
variables: extracted_variables,
solutions: JsonSolutionsReader {
kind: JsonSolutionsReaderKind::Buffered {
bindings: buffered_bindings.into_iter(),
},
mapping,
},
});
} else {
variables = Some(extracted_variables);
}
} }
"results" => { "results" => {
if reader.read_event(&mut buffer)? != JsonEvent::StartObject { if reader.read_event(&mut buffer)? != JsonEvent::StartObject {
@ -169,24 +189,45 @@ impl<R: BufRead> JsonQueryResultsReader<R> {
if reader.read_event(&mut buffer)? != JsonEvent::StartArray { if reader.read_event(&mut buffer)? != JsonEvent::StartArray {
return Err(SyntaxError::msg("'bindings' should be an object").into()); return Err(SyntaxError::msg("'bindings' should be an object").into());
} }
return if let Some(variables) = variables { if let Some(variables) = variables {
let mut mapping = BTreeMap::default(); let mut mapping = BTreeMap::default();
for (i, var) in variables.iter().enumerate() { for (i, var) in variables.iter().enumerate() {
mapping.insert(var.as_str().to_string(), i); mapping.insert(var.as_str().to_string(), i);
} }
Ok(Self::Solutions { return Ok(Self::Solutions {
variables, variables,
solutions: JsonSolutionsReader { solutions: JsonSolutionsReader {
reader, kind: JsonSolutionsReaderKind::Streaming { reader, buffer },
buffer,
mapping, mapping,
}, },
}) });
} else { } else {
Err(SyntaxError::msg( // We buffer all results before being able to read the header
"SPARQL tuple query results should contain a head key", let mut bindings = Vec::new();
let mut variables = Vec::new();
let mut values = Vec::new();
loop {
match reader.read_event(&mut buffer)? {
JsonEvent::StartObject => (),
JsonEvent::EndObject => {
bindings.push((take(&mut variables), take(&mut values)));
}
JsonEvent::EndArray | JsonEvent::Eof => {
buffered_bindings = Some(bindings);
break;
}
JsonEvent::ObjectKey(key) => {
variables.push(key.to_string());
values.push(read_value(&mut reader, &mut buffer, 0)?);
}
_ => {
return Err(SyntaxError::msg(
"Invalid result serialization",
) )
.into()) .into())
}
}
}
}; };
} }
"boolean" => { "boolean" => {
@ -203,18 +244,17 @@ impl<R: BufRead> JsonQueryResultsReader<R> {
.into()); .into());
} }
}, },
JsonEvent::EndObject => { JsonEvent::EndObject => (),
return Err(SyntaxError::msg(
"SPARQL results should contain a bindings key or a boolean key",
)
.into())
}
JsonEvent::Eof => { JsonEvent::Eof => {
return Err(SyntaxError::msg( return if let Some(output_iter) = output_iter {
Ok(output_iter)
} else {
Err(SyntaxError::msg(
"Unexpected end of JSON object without 'results' or 'boolean' key", "Unexpected end of JSON object without 'results' or 'boolean' key",
) )
.into()) .into())
} }
}
_ => return Err(SyntaxError::msg("Invalid SPARQL results serialization").into()), _ => return Err(SyntaxError::msg("Invalid SPARQL results serialization").into()),
} }
} }
@ -222,16 +262,27 @@ impl<R: BufRead> JsonQueryResultsReader<R> {
} }
pub struct JsonSolutionsReader<R: BufRead> { pub struct JsonSolutionsReader<R: BufRead> {
mapping: BTreeMap<String, usize>,
kind: JsonSolutionsReaderKind<R>,
}
enum JsonSolutionsReaderKind<R: BufRead> {
Streaming {
reader: JsonReader<R>, reader: JsonReader<R>,
buffer: Vec<u8>, buffer: Vec<u8>,
mapping: BTreeMap<String, usize>, },
Buffered {
bindings: std::vec::IntoIter<(Vec<String>, Vec<Term>)>,
},
} }
impl<R: BufRead> JsonSolutionsReader<R> { impl<R: BufRead> JsonSolutionsReader<R> {
pub fn read_next(&mut self) -> Result<Option<Vec<Option<Term>>>, ParseError> { pub fn read_next(&mut self) -> Result<Option<Vec<Option<Term>>>, ParseError> {
match &mut self.kind {
JsonSolutionsReaderKind::Streaming { reader, buffer } => {
let mut new_bindings = vec![None; self.mapping.len()]; let mut new_bindings = vec![None; self.mapping.len()];
loop { loop {
match self.reader.read_event(&mut self.buffer)? { match reader.read_event(buffer)? {
JsonEvent::StartObject => (), JsonEvent::StartObject => (),
JsonEvent::EndObject => return Ok(Some(new_bindings)), JsonEvent::EndObject => return Ok(Some(new_bindings)),
JsonEvent::EndArray | JsonEvent::Eof => return Ok(None), JsonEvent::EndArray | JsonEvent::Eof => return Ok(None),
@ -241,14 +292,37 @@ impl<R: BufRead> JsonSolutionsReader<R> {
"The variable {key} has not been defined in the header" "The variable {key} has not been defined in the header"
)) ))
})?; })?;
new_bindings[k] = Some(self.read_value(0)?) new_bindings[k] = Some(read_value(reader, buffer, 0)?)
} }
_ => return Err(SyntaxError::msg("Invalid result serialization").into()), _ => return Err(SyntaxError::msg("Invalid result serialization").into()),
} }
} }
} }
JsonSolutionsReaderKind::Buffered { bindings } => {
Ok(if let Some((variables, values)) = bindings.next() {
let mut new_bindings = vec![None; self.mapping.len()];
for (variable, value) in variables.into_iter().zip(values) {
let k = *self.mapping.get(&variable).ok_or_else(|| {
SyntaxError::msg(format!(
"The variable {variable} has not been defined in the header"
))
})?;
new_bindings[k] = Some(value)
}
Some(new_bindings)
} else {
None
})
}
}
}
}
fn read_value(&mut self, number_of_recursive_calls: usize) -> Result<Term, ParseError> { fn read_value<R: BufRead>(
reader: &mut JsonReader<R>,
buffer: &mut Vec<u8>,
number_of_recursive_calls: usize,
) -> Result<Term, ParseError> {
if number_of_recursive_calls == MAX_NUMBER_OF_NESTED_TRIPLES { if number_of_recursive_calls == MAX_NUMBER_OF_NESTED_TRIPLES {
return Err(SyntaxError::msg(format!( return Err(SyntaxError::msg(format!(
"Too many nested triples ({MAX_NUMBER_OF_NESTED_TRIPLES}). The parser fails here to avoid a stack overflow." "Too many nested triples ({MAX_NUMBER_OF_NESTED_TRIPLES}). The parser fails here to avoid a stack overflow."
@ -280,24 +354,28 @@ impl<R: BufRead> JsonSolutionsReader<R> {
let mut predicate = None; let mut predicate = None;
#[cfg(feature = "rdf-star")] #[cfg(feature = "rdf-star")]
let mut object = None; let mut object = None;
if self.reader.read_event(&mut self.buffer)? != JsonEvent::StartObject { if reader.read_event(buffer)? != JsonEvent::StartObject {
return Err(SyntaxError::msg("Term serializations should be an object").into()); return Err(SyntaxError::msg("Term serializations should be an object").into());
} }
loop { loop {
match self.reader.read_event(&mut self.buffer)? { match reader.read_event(buffer)? {
JsonEvent::ObjectKey(key) => match key { JsonEvent::ObjectKey(key) => match key {
"type" => state = Some(State::Type), "type" => state = Some(State::Type),
"value" => state = Some(State::Value), "value" => state = Some(State::Value),
"xml:lang" => state = Some(State::Lang), "xml:lang" => state = Some(State::Lang),
"datatype" => state = Some(State::Datatype), "datatype" => state = Some(State::Datatype),
#[cfg(feature = "rdf-star")] #[cfg(feature = "rdf-star")]
"subject" => subject = Some(self.read_value(number_of_recursive_calls + 1)?), "subject" => {
subject = Some(read_value(reader, buffer, number_of_recursive_calls + 1)?)
}
#[cfg(feature = "rdf-star")] #[cfg(feature = "rdf-star")]
"predicate" => { "predicate" => {
predicate = Some(self.read_value(number_of_recursive_calls + 1)?) predicate = Some(read_value(reader, buffer, number_of_recursive_calls + 1)?)
} }
#[cfg(feature = "rdf-star")] #[cfg(feature = "rdf-star")]
"object" => object = Some(self.read_value(number_of_recursive_calls + 1)?), "object" => {
object = Some(read_value(reader, buffer, number_of_recursive_calls + 1)?)
}
_ => { _ => {
return Err(SyntaxError::msg(format!( return Err(SyntaxError::msg(format!(
"Unexpected key in term serialization: '{key}'" "Unexpected key in term serialization: '{key}'"
@ -307,10 +385,9 @@ impl<R: BufRead> JsonSolutionsReader<R> {
}, },
JsonEvent::StartObject => { JsonEvent::StartObject => {
if state != Some(State::Value) { if state != Some(State::Value) {
return Err(SyntaxError::msg( return Err(
"Unexpected nested object in term serialization", SyntaxError::msg("Unexpected nested object in term serialization").into(),
) );
.into());
} }
} }
JsonEvent::String(s) => match state { JsonEvent::String(s) => match state {
@ -322,10 +399,9 @@ impl<R: BufRead> JsonSolutionsReader<R> {
#[cfg(feature = "rdf-star")] #[cfg(feature = "rdf-star")]
"triple" => t = Some(Type::Triple), "triple" => t = Some(Type::Triple),
_ => { _ => {
return Err(SyntaxError::msg(format!( return Err(
"Unexpected term type: '{s}'" SyntaxError::msg(format!("Unexpected term type: '{s}'")).into()
)) )
.into())
} }
}; };
state = None; state = None;
@ -339,10 +415,10 @@ impl<R: BufRead> JsonSolutionsReader<R> {
state = None; state = None;
} }
Some(State::Datatype) => { Some(State::Datatype) => {
datatype = datatype = Some(
Some(NamedNode::new(s).map_err(|e| { NamedNode::new(s)
SyntaxError::msg(format!("Invalid datatype IRI: {e}")) .map_err(|e| SyntaxError::msg(format!("Invalid datatype IRI: {e}")))?,
})?); );
state = None; state = None;
} }
_ => (), // impossible _ => (), // impossible
@ -352,10 +428,9 @@ impl<R: BufRead> JsonSolutionsReader<R> {
if s == State::Value { if s == State::Value {
state = None; //End of triple state = None; //End of triple
} else { } else {
return Err(SyntaxError::msg( return Err(
"Term description values should be string", SyntaxError::msg("Term description values should be string").into()
) );
.into());
} }
} else { } else {
return match t { return match t {
@ -375,9 +450,7 @@ impl<R: BufRead> JsonSolutionsReader<R> {
.into()), .into()),
Some(Type::Literal) => { Some(Type::Literal) => {
let value = value.ok_or_else(|| { let value = value.ok_or_else(|| {
SyntaxError::msg( SyntaxError::msg("literal serialization should have a 'value' key")
"literal serialization should have a 'value' key",
)
})?; })?;
Ok(match lang { Ok(match lang {
Some(lang) => { Some(lang) => {
@ -403,9 +476,7 @@ impl<R: BufRead> JsonSolutionsReader<R> {
#[cfg(feature = "rdf-star")] #[cfg(feature = "rdf-star")]
Some(Type::Triple) => Ok(Triple::new( Some(Type::Triple) => Ok(Triple::new(
match subject.ok_or_else(|| { match subject.ok_or_else(|| {
SyntaxError::msg( SyntaxError::msg("triple serialization should have a 'subject' key")
"triple serialization should have a 'subject' key",
)
})? { })? {
Term::NamedNode(subject) => subject.into(), Term::NamedNode(subject) => subject.into(),
Term::BlankNode(subject) => subject.into(), Term::BlankNode(subject) => subject.into(),
@ -431,9 +502,7 @@ impl<R: BufRead> JsonSolutionsReader<R> {
} }
}, },
object.ok_or_else(|| { object.ok_or_else(|| {
SyntaxError::msg( SyntaxError::msg("triple serialization should have a 'object' key")
"triple serialization should have a 'object' key",
)
})?, })?,
) )
.into()), .into()),
@ -444,7 +513,6 @@ impl<R: BufRead> JsonSolutionsReader<R> {
} }
} }
} }
}
fn read_head<R: BufRead>( fn read_head<R: BufRead>(
reader: &mut JsonReader<R>, reader: &mut JsonReader<R>,

@ -0,0 +1,16 @@
{
"results": {
"bindings": [
{},
{
"s": {
"type": "literal",
"value": "foo"
}
}
]
},
"head": {
"vars": ["s"]
}
}

@ -0,0 +1,16 @@
{
"head": {
"vars": ["s"]
},
"results": {
"bindings": [
{},
{
"s": {
"type": "literal",
"value": "foo"
}
}
]
}
}

@ -14,6 +14,7 @@
:results_json_ignored_keys :results_json_ignored_keys
:results_xml_ignored_keys :results_xml_ignored_keys
:results_json_typed_literal :results_json_typed_literal
:late_head
) . ) .
:results_json_duplicated_variables rdf:type ox:NegativeJsonResultsSyntaxTest ; :results_json_duplicated_variables rdf:type ox:NegativeJsonResultsSyntaxTest ;
@ -38,4 +39,10 @@
:results_json_typed_literal rdf:type ox:PositiveJsonResultsSyntaxTest ; :results_json_typed_literal rdf:type ox:PositiveJsonResultsSyntaxTest ;
mf:name "typed-literal term type is allowed" ; mf:name "typed-literal term type is allowed" ;
mf:action <typed_literal.srj> . mf:action <typed_literal.srj> ;
mf:result <typed_literal_expected.srj> .
:late_head rdf:type ox:PositiveJsonResultsSyntaxTest ;
mf:name "head after the list of results" ;
mf:action <late_head.srj> ;
mf:result <late_head_expected.srj> .

@ -0,0 +1,16 @@
{
"head": {
"vars": ["s"]
},
"results": {
"bindings": [
{
"s": {
"type": "literal",
"value": "foo",
"datatype": "http://example.com/dt"
}
}
]
}
}

@ -127,22 +127,25 @@ fn evaluate_negative_tsv_result_syntax_test(test: &Test) -> Result<()> {
} }
fn result_syntax_check(test: &Test, format: QueryResultsFormat) -> Result<()> { fn result_syntax_check(test: &Test, format: QueryResultsFormat) -> Result<()> {
let results_file = test let action_file = test
.action .action
.as_deref() .as_deref()
.ok_or_else(|| anyhow!("No action found for test {test}"))?; .ok_or_else(|| anyhow!("No action found for test {test}"))?;
match QueryResults::read(Cursor::new(read_file_to_string(results_file)?), format)? { let actual_results = StaticQueryResults::from_query_results(
QueryResults::Solutions(solutions) => { QueryResults::read(Cursor::new(read_file_to_string(action_file)?), format)?,
for s in solutions { true,
s?; )?;
} if let Some(result_file) = test.result.as_deref() {
} let expected_results = StaticQueryResults::from_query_results(
QueryResults::Graph(triples) => { QueryResults::read(Cursor::new(read_file_to_string(result_file)?), format)?,
for t in triples { true,
t?; )?;
} if !are_query_results_isomorphic(&expected_results, &actual_results) {
bail!(
"Failure on {test}.\n{}\n",
results_diff(expected_results, actual_results),
);
} }
QueryResults::Boolean(_) => (),
} }
Ok(()) Ok(())
} }

Loading…
Cancel
Save