From bdb803dab547fe792a81e42bb957ef1189043166 Mon Sep 17 00:00:00 2001 From: Tpt Date: Thu, 16 Mar 2023 20:05:46 +0100 Subject: [PATCH] JSON SPARQL results: allows the "head" key to be at the end of the document Does not do streaming parsing in this case --- lib/sparesults/src/json.rs | 502 ++++++++++-------- .../sparql-results/late_head.srj | 16 + .../sparql-results/late_head_expected.srj | 16 + .../sparql-results/manifest.ttl | 9 +- .../sparql-results/typed_literal_expected.srj | 16 + testsuite/src/sparql_evaluator.rs | 27 +- 6 files changed, 356 insertions(+), 230 deletions(-) create mode 100644 testsuite/oxigraph-tests/sparql-results/late_head.srj create mode 100644 testsuite/oxigraph-tests/sparql-results/late_head_expected.srj create mode 100644 testsuite/oxigraph-tests/sparql-results/typed_literal_expected.srj diff --git a/lib/sparesults/src/json.rs b/lib/sparesults/src/json.rs index ded01047..cbcf4b8a 100644 --- a/lib/sparesults/src/json.rs +++ b/lib/sparesults/src/json.rs @@ -7,6 +7,7 @@ use oxrdf::Variable; use oxrdf::*; use std::collections::BTreeMap; use std::io::{self, BufRead, Write}; +use std::mem::take; /// This limit is set in order to avoid stack overflow error when parsing nested triples due to too many recursive calls. /// The actual limit value is a wet finger compromise between not failing to parse valid files and avoiding to trigger stack overflow errors. @@ -138,6 +139,8 @@ impl JsonQueryResultsReader { let mut reader = JsonReader::from_reader(source); let mut buffer = Vec::default(); let mut variables = None; + let mut buffered_bindings: Option> = None; + let mut output_iter = None; if reader.read_event(&mut buffer)? != JsonEvent::StartObject { return Err(SyntaxError::msg("SPARQL JSON results should be an object").into()); @@ -148,7 +151,24 @@ impl JsonQueryResultsReader { match event { JsonEvent::ObjectKey(key) => match key { "head" => { - variables = Some(read_head(&mut reader, &mut buffer)?); + let extracted_variables = read_head(&mut reader, &mut buffer)?; + if let Some(buffered_bindings) = buffered_bindings.take() { + let mut mapping = BTreeMap::default(); + for (i, var) in extracted_variables.iter().enumerate() { + mapping.insert(var.as_str().to_string(), i); + } + output_iter = Some(Self::Solutions { + variables: extracted_variables, + solutions: JsonSolutionsReader { + kind: JsonSolutionsReaderKind::Buffered { + bindings: buffered_bindings.into_iter(), + }, + mapping, + }, + }); + } else { + variables = Some(extracted_variables); + } } "results" => { if reader.read_event(&mut buffer)? != JsonEvent::StartObject { @@ -169,24 +189,45 @@ impl JsonQueryResultsReader { if reader.read_event(&mut buffer)? != JsonEvent::StartArray { return Err(SyntaxError::msg("'bindings' should be an object").into()); } - return if let Some(variables) = variables { + if let Some(variables) = variables { let mut mapping = BTreeMap::default(); for (i, var) in variables.iter().enumerate() { mapping.insert(var.as_str().to_string(), i); } - Ok(Self::Solutions { + return Ok(Self::Solutions { variables, solutions: JsonSolutionsReader { - reader, - buffer, + kind: JsonSolutionsReaderKind::Streaming { reader, buffer }, mapping, }, - }) + }); } else { - Err(SyntaxError::msg( - "SPARQL tuple query results should contain a head key", - ) - .into()) + // We buffer all results before being able to read the header + let mut bindings = Vec::new(); + let mut variables = Vec::new(); + let mut values = Vec::new(); + loop { + match reader.read_event(&mut buffer)? { + JsonEvent::StartObject => (), + JsonEvent::EndObject => { + bindings.push((take(&mut variables), take(&mut values))); + } + JsonEvent::EndArray | JsonEvent::Eof => { + buffered_bindings = Some(bindings); + break; + } + JsonEvent::ObjectKey(key) => { + variables.push(key.to_string()); + values.push(read_value(&mut reader, &mut buffer, 0)?); + } + _ => { + return Err(SyntaxError::msg( + "Invalid result serialization", + ) + .into()) + } + } + } }; } "boolean" => { @@ -203,17 +244,16 @@ impl JsonQueryResultsReader { .into()); } }, - JsonEvent::EndObject => { - return Err(SyntaxError::msg( - "SPARQL results should contain a bindings key or a boolean key", - ) - .into()) - } + JsonEvent::EndObject => (), JsonEvent::Eof => { - return Err(SyntaxError::msg( - "Unexpected end of JSON object without 'results' or 'boolean' key", - ) - .into()) + return if let Some(output_iter) = output_iter { + Ok(output_iter) + } else { + Err(SyntaxError::msg( + "Unexpected end of JSON object without 'results' or 'boolean' key", + ) + .into()) + } } _ => return Err(SyntaxError::msg("Invalid SPARQL results serialization").into()), } @@ -222,226 +262,254 @@ impl JsonQueryResultsReader { } pub struct JsonSolutionsReader { - reader: JsonReader, - buffer: Vec, mapping: BTreeMap, + kind: JsonSolutionsReaderKind, +} + +enum JsonSolutionsReaderKind { + Streaming { + reader: JsonReader, + buffer: Vec, + }, + Buffered { + bindings: std::vec::IntoIter<(Vec, Vec)>, + }, } impl JsonSolutionsReader { pub fn read_next(&mut self) -> Result>>, ParseError> { - let mut new_bindings = vec![None; self.mapping.len()]; - loop { - match self.reader.read_event(&mut self.buffer)? { - JsonEvent::StartObject => (), - JsonEvent::EndObject => return Ok(Some(new_bindings)), - JsonEvent::EndArray | JsonEvent::Eof => return Ok(None), - JsonEvent::ObjectKey(key) => { - let k = *self.mapping.get(key).ok_or_else(|| { - SyntaxError::msg(format!( - "The variable {key} has not been defined in the header" - )) - })?; - new_bindings[k] = Some(self.read_value(0)?) + match &mut self.kind { + JsonSolutionsReaderKind::Streaming { reader, buffer } => { + let mut new_bindings = vec![None; self.mapping.len()]; + loop { + match reader.read_event(buffer)? { + JsonEvent::StartObject => (), + JsonEvent::EndObject => return Ok(Some(new_bindings)), + JsonEvent::EndArray | JsonEvent::Eof => return Ok(None), + JsonEvent::ObjectKey(key) => { + let k = *self.mapping.get(key).ok_or_else(|| { + SyntaxError::msg(format!( + "The variable {key} has not been defined in the header" + )) + })?; + new_bindings[k] = Some(read_value(reader, buffer, 0)?) + } + _ => return Err(SyntaxError::msg("Invalid result serialization").into()), + } } - _ => return Err(SyntaxError::msg("Invalid result serialization").into()), + } + JsonSolutionsReaderKind::Buffered { bindings } => { + Ok(if let Some((variables, values)) = bindings.next() { + let mut new_bindings = vec![None; self.mapping.len()]; + for (variable, value) in variables.into_iter().zip(values) { + let k = *self.mapping.get(&variable).ok_or_else(|| { + SyntaxError::msg(format!( + "The variable {variable} has not been defined in the header" + )) + })?; + new_bindings[k] = Some(value) + } + Some(new_bindings) + } else { + None + }) } } } +} - fn read_value(&mut self, number_of_recursive_calls: usize) -> Result { - if number_of_recursive_calls == MAX_NUMBER_OF_NESTED_TRIPLES { - return Err(SyntaxError::msg(format!( - "Too many nested triples ({MAX_NUMBER_OF_NESTED_TRIPLES}). The parser fails here to avoid a stack overflow." - )) +fn read_value( + reader: &mut JsonReader, + buffer: &mut Vec, + number_of_recursive_calls: usize, +) -> Result { + if number_of_recursive_calls == MAX_NUMBER_OF_NESTED_TRIPLES { + return Err(SyntaxError::msg(format!( + "Too many nested triples ({MAX_NUMBER_OF_NESTED_TRIPLES}). The parser fails here to avoid a stack overflow." + )) .into()); - } - enum Type { - Uri, - BNode, - Literal, - #[cfg(feature = "rdf-star")] - Triple, - } - #[derive(Eq, PartialEq)] - enum State { - Type, - Value, - Lang, - Datatype, - } - let mut state = None; - let mut t = None; - let mut value = None; - let mut lang = None; - let mut datatype = None; - #[cfg(feature = "rdf-star")] - let mut subject = None; - #[cfg(feature = "rdf-star")] - let mut predicate = None; + } + enum Type { + Uri, + BNode, + Literal, #[cfg(feature = "rdf-star")] - let mut object = None; - if self.reader.read_event(&mut self.buffer)? != JsonEvent::StartObject { - return Err(SyntaxError::msg("Term serializations should be an object").into()); - } - loop { - match self.reader.read_event(&mut self.buffer)? { - JsonEvent::ObjectKey(key) => match key { - "type" => state = Some(State::Type), - "value" => state = Some(State::Value), - "xml:lang" => state = Some(State::Lang), - "datatype" => state = Some(State::Datatype), - #[cfg(feature = "rdf-star")] - "subject" => subject = Some(self.read_value(number_of_recursive_calls + 1)?), - #[cfg(feature = "rdf-star")] - "predicate" => { - predicate = Some(self.read_value(number_of_recursive_calls + 1)?) - } - #[cfg(feature = "rdf-star")] - "object" => object = Some(self.read_value(number_of_recursive_calls + 1)?), - _ => { - return Err(SyntaxError::msg(format!( - "Unexpected key in term serialization: '{key}'" - )) - .into()) - } - }, - JsonEvent::StartObject => { - if state != Some(State::Value) { - return Err(SyntaxError::msg( - "Unexpected nested object in term serialization", - ) - .into()); - } + Triple, + } + #[derive(Eq, PartialEq)] + enum State { + Type, + Value, + Lang, + Datatype, + } + let mut state = None; + let mut t = None; + let mut value = None; + let mut lang = None; + let mut datatype = None; + #[cfg(feature = "rdf-star")] + let mut subject = None; + #[cfg(feature = "rdf-star")] + let mut predicate = None; + #[cfg(feature = "rdf-star")] + let mut object = None; + if reader.read_event(buffer)? != JsonEvent::StartObject { + return Err(SyntaxError::msg("Term serializations should be an object").into()); + } + loop { + match reader.read_event(buffer)? { + JsonEvent::ObjectKey(key) => match key { + "type" => state = Some(State::Type), + "value" => state = Some(State::Value), + "xml:lang" => state = Some(State::Lang), + "datatype" => state = Some(State::Datatype), + #[cfg(feature = "rdf-star")] + "subject" => { + subject = Some(read_value(reader, buffer, number_of_recursive_calls + 1)?) } - JsonEvent::String(s) => match state { - Some(State::Type) => { - match s { - "uri" => t = Some(Type::Uri), - "bnode" => t = Some(Type::BNode), - "literal" | "typed-literal" => t = Some(Type::Literal), - #[cfg(feature = "rdf-star")] - "triple" => t = Some(Type::Triple), - _ => { - return Err(SyntaxError::msg(format!( - "Unexpected term type: '{s}'" - )) - .into()) - } - }; - state = None; - } - Some(State::Value) => { - value = Some(s.to_owned()); - state = None; - } - Some(State::Lang) => { - lang = Some(s.to_owned()); - state = None; - } - Some(State::Datatype) => { - datatype = - Some(NamedNode::new(s).map_err(|e| { - SyntaxError::msg(format!("Invalid datatype IRI: {e}")) - })?); - state = None; - } - _ => (), // impossible - }, - JsonEvent::EndObject => { - if let Some(s) = state { - if s == State::Value { - state = None; //End of triple - } else { - return Err(SyntaxError::msg( - "Term description values should be string", + #[cfg(feature = "rdf-star")] + "predicate" => { + predicate = Some(read_value(reader, buffer, number_of_recursive_calls + 1)?) + } + #[cfg(feature = "rdf-star")] + "object" => { + object = Some(read_value(reader, buffer, number_of_recursive_calls + 1)?) + } + _ => { + return Err(SyntaxError::msg(format!( + "Unexpected key in term serialization: '{key}'" + )) + .into()) + } + }, + JsonEvent::StartObject => { + if state != Some(State::Value) { + return Err( + SyntaxError::msg("Unexpected nested object in term serialization").into(), + ); + } + } + JsonEvent::String(s) => match state { + Some(State::Type) => { + match s { + "uri" => t = Some(Type::Uri), + "bnode" => t = Some(Type::BNode), + "literal" | "typed-literal" => t = Some(Type::Literal), + #[cfg(feature = "rdf-star")] + "triple" => t = Some(Type::Triple), + _ => { + return Err( + SyntaxError::msg(format!("Unexpected term type: '{s}'")).into() ) - .into()); } + }; + state = None; + } + Some(State::Value) => { + value = Some(s.to_owned()); + state = None; + } + Some(State::Lang) => { + lang = Some(s.to_owned()); + state = None; + } + Some(State::Datatype) => { + datatype = Some( + NamedNode::new(s) + .map_err(|e| SyntaxError::msg(format!("Invalid datatype IRI: {e}")))?, + ); + state = None; + } + _ => (), // impossible + }, + JsonEvent::EndObject => { + if let Some(s) = state { + if s == State::Value { + state = None; //End of triple } else { - return match t { - None => Err(SyntaxError::msg( - "Term serialization should have a 'type' key", - ) - .into()), - Some(Type::Uri) => Ok(NamedNode::new(value.ok_or_else(|| { - SyntaxError::msg("uri serialization should have a 'value' key") - })?) - .map_err(|e| SyntaxError::msg(format!("Invalid uri value: {e}")))? - .into()), - Some(Type::BNode) => Ok(BlankNode::new(value.ok_or_else(|| { - SyntaxError::msg("bnode serialization should have a 'value' key") - })?) - .map_err(|e| SyntaxError::msg(format!("Invalid bnode value: {e}")))? - .into()), - Some(Type::Literal) => { - let value = value.ok_or_else(|| { - SyntaxError::msg( - "literal serialization should have a 'value' key", - ) - })?; - Ok(match lang { - Some(lang) => { - if let Some(datatype) = datatype { - if datatype.as_ref() != rdf::LANG_STRING { - return Err(SyntaxError::msg(format!( - "xml:lang value '{lang}' provided with the datatype {datatype}" - )).into()) - } - } - Literal::new_language_tagged_literal(value, &lang).map_err(|e| { - SyntaxError::msg(format!("Invalid xml:lang value '{lang}': {e}")) - })? - } - None => if let Some(datatype) = datatype { - Literal::new_typed_literal(value, datatype) - } else { - Literal::new_simple_literal(value) + return Err( + SyntaxError::msg("Term description values should be string").into() + ); + } + } else { + return match t { + None => Err(SyntaxError::msg( + "Term serialization should have a 'type' key", + ) + .into()), + Some(Type::Uri) => Ok(NamedNode::new(value.ok_or_else(|| { + SyntaxError::msg("uri serialization should have a 'value' key") + })?) + .map_err(|e| SyntaxError::msg(format!("Invalid uri value: {e}")))? + .into()), + Some(Type::BNode) => Ok(BlankNode::new(value.ok_or_else(|| { + SyntaxError::msg("bnode serialization should have a 'value' key") + })?) + .map_err(|e| SyntaxError::msg(format!("Invalid bnode value: {e}")))? + .into()), + Some(Type::Literal) => { + let value = value.ok_or_else(|| { + SyntaxError::msg("literal serialization should have a 'value' key") + })?; + Ok(match lang { + Some(lang) => { + if let Some(datatype) = datatype { + if datatype.as_ref() != rdf::LANG_STRING { + return Err(SyntaxError::msg(format!( + "xml:lang value '{lang}' provided with the datatype {datatype}" + )).into()) } } - .into()) + Literal::new_language_tagged_literal(value, &lang).map_err(|e| { + SyntaxError::msg(format!("Invalid xml:lang value '{lang}': {e}")) + })? + } + None => if let Some(datatype) = datatype { + Literal::new_typed_literal(value, datatype) + } else { + Literal::new_simple_literal(value) + } } - #[cfg(feature = "rdf-star")] - Some(Type::Triple) => Ok(Triple::new( - match subject.ok_or_else(|| { - SyntaxError::msg( - "triple serialization should have a 'subject' key", - ) - })? { - Term::NamedNode(subject) => subject.into(), - Term::BlankNode(subject) => subject.into(), - Term::Triple(subject) => Subject::Triple(subject), - Term::Literal(_) => { - return Err(SyntaxError::msg( - "The 'subject' value should not be a literal", - ) - .into()) - } - }, - match predicate.ok_or_else(|| { - SyntaxError::msg( - "triple serialization should have a 'predicate' key", + .into()) + } + #[cfg(feature = "rdf-star")] + Some(Type::Triple) => Ok(Triple::new( + match subject.ok_or_else(|| { + SyntaxError::msg("triple serialization should have a 'subject' key") + })? { + Term::NamedNode(subject) => subject.into(), + Term::BlankNode(subject) => subject.into(), + Term::Triple(subject) => Subject::Triple(subject), + Term::Literal(_) => { + return Err(SyntaxError::msg( + "The 'subject' value should not be a literal", ) - })? { - Term::NamedNode(predicate) => predicate, - _ => { - return Err(SyntaxError::msg( - "The 'predicate' value should be a uri", - ) - .into()) - } - }, - object.ok_or_else(|| { - SyntaxError::msg( - "triple serialization should have a 'object' key", + .into()) + } + }, + match predicate.ok_or_else(|| { + SyntaxError::msg( + "triple serialization should have a 'predicate' key", + ) + })? { + Term::NamedNode(predicate) => predicate, + _ => { + return Err(SyntaxError::msg( + "The 'predicate' value should be a uri", ) - })?, - ) - .into()), - }; - } + .into()) + } + }, + object.ok_or_else(|| { + SyntaxError::msg("triple serialization should have a 'object' key") + })?, + ) + .into()), + }; } - _ => return Err(SyntaxError::msg("Invalid term serialization").into()), } + _ => return Err(SyntaxError::msg("Invalid term serialization").into()), } } } diff --git a/testsuite/oxigraph-tests/sparql-results/late_head.srj b/testsuite/oxigraph-tests/sparql-results/late_head.srj new file mode 100644 index 00000000..89c68ace --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-results/late_head.srj @@ -0,0 +1,16 @@ +{ + "results": { + "bindings": [ + {}, + { + "s": { + "type": "literal", + "value": "foo" + } + } + ] + }, + "head": { + "vars": ["s"] + } +} diff --git a/testsuite/oxigraph-tests/sparql-results/late_head_expected.srj b/testsuite/oxigraph-tests/sparql-results/late_head_expected.srj new file mode 100644 index 00000000..c1113e69 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-results/late_head_expected.srj @@ -0,0 +1,16 @@ +{ + "head": { + "vars": ["s"] + }, + "results": { + "bindings": [ + {}, + { + "s": { + "type": "literal", + "value": "foo" + } + } + ] + } +} diff --git a/testsuite/oxigraph-tests/sparql-results/manifest.ttl b/testsuite/oxigraph-tests/sparql-results/manifest.ttl index 4e9c6473..55b4f7df 100644 --- a/testsuite/oxigraph-tests/sparql-results/manifest.ttl +++ b/testsuite/oxigraph-tests/sparql-results/manifest.ttl @@ -14,6 +14,7 @@ :results_json_ignored_keys :results_xml_ignored_keys :results_json_typed_literal + :late_head ) . :results_json_duplicated_variables rdf:type ox:NegativeJsonResultsSyntaxTest ; @@ -38,4 +39,10 @@ :results_json_typed_literal rdf:type ox:PositiveJsonResultsSyntaxTest ; mf:name "typed-literal term type is allowed" ; - mf:action . + mf:action ; + mf:result . + +:late_head rdf:type ox:PositiveJsonResultsSyntaxTest ; + mf:name "head after the list of results" ; + mf:action ; + mf:result . diff --git a/testsuite/oxigraph-tests/sparql-results/typed_literal_expected.srj b/testsuite/oxigraph-tests/sparql-results/typed_literal_expected.srj new file mode 100644 index 00000000..895693cf --- /dev/null +++ b/testsuite/oxigraph-tests/sparql-results/typed_literal_expected.srj @@ -0,0 +1,16 @@ +{ + "head": { + "vars": ["s"] + }, + "results": { + "bindings": [ + { + "s": { + "type": "literal", + "value": "foo", + "datatype": "http://example.com/dt" + } + } + ] + } +} diff --git a/testsuite/src/sparql_evaluator.rs b/testsuite/src/sparql_evaluator.rs index e0b5fea6..b01aa2dd 100644 --- a/testsuite/src/sparql_evaluator.rs +++ b/testsuite/src/sparql_evaluator.rs @@ -127,22 +127,25 @@ fn evaluate_negative_tsv_result_syntax_test(test: &Test) -> Result<()> { } fn result_syntax_check(test: &Test, format: QueryResultsFormat) -> Result<()> { - let results_file = test + let action_file = test .action .as_deref() .ok_or_else(|| anyhow!("No action found for test {test}"))?; - match QueryResults::read(Cursor::new(read_file_to_string(results_file)?), format)? { - QueryResults::Solutions(solutions) => { - for s in solutions { - s?; - } - } - QueryResults::Graph(triples) => { - for t in triples { - t?; - } + let actual_results = StaticQueryResults::from_query_results( + QueryResults::read(Cursor::new(read_file_to_string(action_file)?), format)?, + true, + )?; + if let Some(result_file) = test.result.as_deref() { + let expected_results = StaticQueryResults::from_query_results( + QueryResults::read(Cursor::new(read_file_to_string(result_file)?), format)?, + true, + )?; + if !are_query_results_isomorphic(&expected_results, &actual_results) { + bail!( + "Failure on {test}.\n{}\n", + results_diff(expected_results, actual_results), + ); } - QueryResults::Boolean(_) => (), } Ok(()) }