From cde2672cdd7b23f461fac64f4bd105cbb4a354c6 Mon Sep 17 00:00:00 2001 From: Tpt Date: Sun, 30 May 2021 09:29:58 +0200 Subject: [PATCH] Adds JSON deserializer Closes #47 --- lib/Cargo.toml | 1 + lib/src/sparql/json_results.rs | 375 ++++++++++++++++++++++++++------- lib/src/sparql/model.rs | 78 ++++++- lib/src/sparql/xml_results.rs | 6 +- testsuite/tests/sparql.rs | 5 +- 5 files changed, 377 insertions(+), 88 deletions(-) diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 8ee256da..48809c72 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -45,6 +45,7 @@ sophia_api = { version = "0.6.2", optional = true } http = "0.2" httparse = { version = "1", optional = true } native-tls = { version = "0.2", optional = true } +json-event-parser = "0.1" [target.'cfg(target_arch = "wasm32")'.dependencies] js-sys = "0.3" diff --git a/lib/src/sparql/json_results.rs b/lib/src/sparql/json_results.rs index 609aa888..0650f3e7 100644 --- a/lib/src/sparql/json_results.rs +++ b/lib/src/sparql/json_results.rs @@ -1,80 +1,89 @@ //! Implementation of [SPARQL Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) -use crate::error::invalid_input_error; +use crate::error::{invalid_data_error, invalid_input_error}; use crate::model::*; use crate::sparql::error::EvaluationError; use crate::sparql::model::*; -use std::io::Write; +use json_event_parser::{JsonEvent, JsonReader, JsonWriter}; +use std::collections::BTreeMap; +use std::io; +use std::io::{BufRead, Write}; +use std::rc::Rc; -pub fn write_json_results( - results: QueryResults, - mut sink: impl Write, -) -> Result<(), EvaluationError> { +pub fn write_json_results(results: QueryResults, sink: impl Write) -> Result<(), EvaluationError> { + let mut writer = JsonWriter::from_writer(sink); match results { QueryResults::Boolean(value) => { - sink.write_all(b"{\"head\":{},\"boolean\":")?; - sink.write_all(if value { b"true" } else { b"false" })?; - sink.write_all(b"}")?; + writer.write_event(JsonEvent::StartObject)?; + writer.write_event(JsonEvent::ObjectKey("head"))?; + writer.write_event(JsonEvent::StartObject)?; + writer.write_event(JsonEvent::EndObject)?; + writer.write_event(JsonEvent::ObjectKey("boolean"))?; + writer.write_event(JsonEvent::Boolean(value))?; + writer.write_event(JsonEvent::EndObject)?; Ok(()) } QueryResults::Solutions(solutions) => { - sink.write_all(b"{\"head\":{\"vars\":[")?; - let mut start_vars = true; + writer.write_event(JsonEvent::StartObject)?; + writer.write_event(JsonEvent::ObjectKey("head"))?; + writer.write_event(JsonEvent::StartObject)?; + writer.write_event(JsonEvent::ObjectKey("vars"))?; + writer.write_event(JsonEvent::StartArray)?; for variable in solutions.variables() { - if start_vars { - start_vars = false; - } else { - sink.write_all(b",")?; - } - write_escaped_json_string(variable.as_str(), &mut sink)?; + writer.write_event(JsonEvent::String(variable.as_str()))?; } - sink.write_all(b"]},\"results\":{\"bindings\":[")?; - let mut start_bindings = true; + writer.write_event(JsonEvent::EndArray)?; + writer.write_event(JsonEvent::EndObject)?; + writer.write_event(JsonEvent::ObjectKey("results"))?; + writer.write_event(JsonEvent::StartObject)?; + writer.write_event(JsonEvent::ObjectKey("bindings"))?; + writer.write_event(JsonEvent::StartArray)?; for solution in solutions { - if start_bindings { - start_bindings = false; - } else { - sink.write_all(b",")?; - } - sink.write_all(b"{")?; + writer.write_event(JsonEvent::StartObject)?; let solution = solution?; - let mut start_binding = true; for (variable, value) in solution.iter() { - if start_binding { - start_binding = false; - } else { - sink.write_all(b",")?; - } - write_escaped_json_string(variable.as_str(), &mut sink)?; + writer.write_event(JsonEvent::ObjectKey(variable.as_str()))?; match value { Term::NamedNode(uri) => { - sink.write_all(b":{\"type\":\"uri\",\"value\":")?; - write_escaped_json_string(uri.as_str(), &mut sink)?; - sink.write_all(b"}")?; + writer.write_event(JsonEvent::StartObject)?; + writer.write_event(JsonEvent::ObjectKey("type"))?; + writer.write_event(JsonEvent::String("uri"))?; + writer.write_event(JsonEvent::ObjectKey("value"))?; + writer.write_event(JsonEvent::String(uri.as_str()))?; + writer.write_event(JsonEvent::EndObject)?; } Term::BlankNode(bnode) => { - sink.write_all(b":{\"type\":\"bnode\",\"value\":")?; - write_escaped_json_string(bnode.as_str(), &mut sink)?; - sink.write_all(b"}")?; + writer.write_event(JsonEvent::StartObject)?; + writer.write_event(JsonEvent::ObjectKey("type"))?; + writer.write_event(JsonEvent::String("bnode"))?; + writer.write_event(JsonEvent::ObjectKey("value"))?; + writer.write_event(JsonEvent::String(bnode.as_str()))?; + writer.write_event(JsonEvent::EndObject)?; } Term::Literal(literal) => { - sink.write_all(b":{\"type\":\"literal\",\"value\":")?; - write_escaped_json_string(literal.value(), &mut sink)?; + writer.write_event(JsonEvent::StartObject)?; + writer.write_event(JsonEvent::ObjectKey("type"))?; + writer.write_event(JsonEvent::String("literal"))?; + writer.write_event(JsonEvent::ObjectKey("value"))?; + writer.write_event(JsonEvent::String(literal.value()))?; if let Some(language) = literal.language() { - sink.write_all(b",\"xml:lang\":")?; - write_escaped_json_string(language, &mut sink)?; + writer.write_event(JsonEvent::ObjectKey("xml:lang"))?; + writer.write_event(JsonEvent::String(language))?; } else if !literal.is_plain() { - sink.write_all(b",\"datatype\":")?; - write_escaped_json_string(literal.datatype().as_str(), &mut sink)?; + writer.write_event(JsonEvent::ObjectKey("datatype"))?; + writer + .write_event(JsonEvent::String(literal.datatype().as_str()))?; } - sink.write_all(b"}")?; + writer.write_event(JsonEvent::EndObject)?; } } } - sink.write_all(b"}")?; + writer.write_event(JsonEvent::EndObject)?; } - sink.write_all(b"]}}")?; + writer.write_event(JsonEvent::EndArray)?; + writer.write_event(JsonEvent::EndObject)?; + writer.write_event(JsonEvent::EndObject)?; Ok(()) } QueryResults::Graph(_) => Err(invalid_input_error( @@ -84,37 +93,253 @@ pub fn write_json_results( } } -fn write_escaped_json_string(s: &str, mut sink: impl Write) -> Result<(), EvaluationError> { - sink.write_all(b"\"")?; - for c in s.chars() { - match c { - '\\' => sink.write_all(b"\\\\"), - '"' => sink.write_all(b"\\\""), - c => { - if c < char::from(32) { - match c { - '\u{08}' => sink.write_all(b"\\b"), - '\u{0C}' => sink.write_all(b"\\f"), - '\n' => sink.write_all(b"\\n"), - '\r' => sink.write_all(b"\\r"), - '\t' => sink.write_all(b"\\t"), - c => { - let mut c = c as u8; - let mut result = [b'\\', b'u', 0, 0, 0, 0]; - for i in (2..6).rev() { - let ch = c % 16; - result[i] = ch + if ch < 10 { b'0' } else { b'A' }; - c /= 16; - } - sink.write_all(&result) +pub fn read_json_results(source: impl BufRead + 'static) -> Result { + let mut reader = JsonReader::from_reader(source); + let mut buffer = Vec::default(); + let mut variables = None; + + if reader.read_event(&mut buffer)? != JsonEvent::StartObject { + return Err(invalid_data_error( + "SPARQL JSON results should be an object", + )); + } + + loop { + let event = reader.read_event(&mut buffer)?; + match event { + JsonEvent::ObjectKey(key) => match key { + "head" => variables = Some(read_head(&mut reader, &mut buffer)?), + "results" => { + if reader.read_event(&mut buffer)? != JsonEvent::StartObject { + return Err(invalid_data_error("'results' should be an object")); + } + if reader.read_event(&mut buffer)? != JsonEvent::ObjectKey("bindings") { + return Err(invalid_data_error( + "'results' should contain a 'bindings' key", + )); + } + if reader.read_event(&mut buffer)? != JsonEvent::StartArray { + return Err(invalid_data_error("'bindings' should be an object")); + } + return if let Some(variables) = variables { + let mut mapping = BTreeMap::default(); + for (i, var) in variables.iter().enumerate() { + mapping.insert(var.clone(), i); } + Ok(QueryResults::Solutions(QuerySolutionIter::new( + Rc::new( + variables + .into_iter() + .map(Variable::new) + .collect::, _>>() + .map_err(invalid_data_error)?, + ), + Box::new(ResultsIterator { + reader, + buffer, + mapping, + }), + ))) + } else { + Err(invalid_data_error( + "SPARQL tuple query results should contain a head key", + )) + }; + } + "boolean" => { + return if let JsonEvent::Boolean(v) = reader.read_event(&mut buffer)? { + Ok(QueryResults::Boolean(v)) + } else { + Err(invalid_data_error("Unexpected boolean value")) } - } else { - write!(sink, "{}", c) } + _ => { + return Err(invalid_data_error(format!( + "Expecting head or result key, found {}", + key + ))); + } + }, + JsonEvent::EndObject => { + return Err(invalid_data_error( + "SPARQL results should contain a bindings key or a boolean key", + )) } - }?; + JsonEvent::Eof => return Err(io::Error::from(io::ErrorKind::UnexpectedEof)), + _ => return Err(invalid_data_error("Invalid SPARQL results serialization")), + } + } +} + +fn read_head( + reader: &mut JsonReader, + buffer: &mut Vec, +) -> io::Result> { + if reader.read_event(buffer)? != JsonEvent::StartObject { + return Err(invalid_data_error("head should be an object")); + } + let mut variables = None; + loop { + match reader.read_event(buffer)? { + JsonEvent::ObjectKey(key) => match key { + "vars" => variables = Some(read_string_array(reader, buffer)?), + "link" => { + read_string_array(reader, buffer)?; + } + _ => { + return Err(invalid_data_error(format!( + "Unexpected key in head: '{}'", + key + ))) + } + }, + JsonEvent::EndObject => return Ok(variables.unwrap_or_else(Vec::new)), + _ => return Err(invalid_data_error("Invalid head serialization")), + } + } +} + +fn read_string_array( + reader: &mut JsonReader, + buffer: &mut Vec, +) -> io::Result> { + if reader.read_event(buffer)? != JsonEvent::StartArray { + return Err(invalid_data_error("Variable list should be an array")); + } + let mut elements = Vec::new(); + loop { + match reader.read_event(buffer)? { + JsonEvent::String(s) => { + elements.push(s.into()); + } + JsonEvent::EndArray => return Ok(elements), + _ => return Err(invalid_data_error("Variable names should be strings")), + } + } +} + +struct ResultsIterator { + reader: JsonReader, + buffer: Vec, + mapping: BTreeMap, +} + +impl Iterator for ResultsIterator { + type Item = Result>, EvaluationError>; + + fn next(&mut self) -> Option>, EvaluationError>> { + self.read_next().map_err(EvaluationError::from).transpose() + } +} + +impl ResultsIterator { + fn read_next(&mut self) -> io::Result>>> { + let mut new_bindings = vec![None; self.mapping.len()]; + loop { + match self.reader.read_event(&mut self.buffer)? { + JsonEvent::StartObject => (), + JsonEvent::EndObject => return Ok(Some(new_bindings)), + JsonEvent::EndArray | JsonEvent::Eof => return Ok(None), + JsonEvent::ObjectKey(key) => { + let k = *self.mapping.get(key).ok_or_else(|| { + invalid_data_error(format!( + "The variable {} has not been defined in the header", + key + )) + })?; + new_bindings[k] = Some(self.read_value()?) + } + _ => return Err(invalid_data_error("Invalid result serialization")), + } + } + } + fn read_value(&mut self) -> io::Result { + enum Type { + Uri, + BNode, + Literal, + } + enum State { + Type, + Value, + Lang, + Datatype, + } + let mut state = None; + let mut t = None; + let mut value = None; + let mut lang = None; + let mut datatype = None; + if self.reader.read_event(&mut self.buffer)? != JsonEvent::StartObject { + return Err(invalid_data_error( + "Term serializations should be an object", + )); + } + loop { + match self.reader.read_event(&mut self.buffer)? { + JsonEvent::ObjectKey(key) => match key { + "type" => state = Some(State::Type), + "value" => state = Some(State::Value), + "xml:lang" => state = Some(State::Lang), + "datatype" => state = Some(State::Datatype), + _ => { + return Err(invalid_data_error(format!( + "Unexpected key in term serialization: '{}'", + key + ))) + } + }, + JsonEvent::String(s) => match state { + None => (), // impossible + Some(State::Type) => match s { + "uri" => t = Some(Type::Uri), + "bnode" => t = Some(Type::BNode), + "literal" => t = Some(Type::Literal), + _ => { + return Err(invalid_data_error(format!( + "Unexpected term type: '{}'", + s + ))) + } + }, + Some(State::Value) => value = Some(s.to_owned()), + Some(State::Lang) => lang = Some(s.to_owned()), + Some(State::Datatype) => datatype = Some(s.to_owned()), + }, + JsonEvent::EndObject => { + let value = value.ok_or_else(|| { + invalid_data_error("Term serialization should have a value key") + })?; + return match t { + None => Err(invalid_data_error( + "Term serialization should have a type key", + )), + Some(Type::Uri) => Ok(NamedNode::new(value) + .map_err(|e| invalid_data_error(format!("Invalid uri value: {}", e)))? + .into()), + Some(Type::BNode) => Ok(BlankNode::new(value) + .map_err(|e| invalid_data_error(format!("Invalid bnode value: {}", e)))? + .into()), + Some(Type::Literal) => Ok(match datatype { + Some(datatype) => Literal::new_typed_literal( + value, + NamedNode::new(datatype).map_err(|e| { + invalid_data_error(format!("Invalid datatype value: {}", e)) + })?, + ), + None => match lang { + Some(lang) => Literal::new_language_tagged_literal(value, lang) + .map_err(|e| { + invalid_data_error(format!("Invalid xml:lang value: {}", e)) + })?, + None => Literal::new_simple_literal(value), + }, + } + .into()), + }; + } + _ => return Err(invalid_data_error("Invalid term serialization")), + } + } } - sink.write_all(b"\"")?; - Ok(()) } diff --git a/lib/src/sparql/model.rs b/lib/src/sparql/model.rs index f8ff1605..ec056a6b 100644 --- a/lib/src/sparql/model.rs +++ b/lib/src/sparql/model.rs @@ -4,7 +4,7 @@ use crate::io::GraphSerializer; use crate::model::*; use crate::sparql::csv_results::{read_tsv_results, write_csv_results, write_tsv_results}; use crate::sparql::error::EvaluationError; -use crate::sparql::json_results::write_json_results; +use crate::sparql::json_results::{read_json_results, write_json_results}; use crate::sparql::xml_results::{read_xml_results, write_xml_results}; use rand::random; use std::error::Error; @@ -30,11 +30,9 @@ impl QueryResults { ) -> Result { match format { QueryResultsFormat::Xml => read_xml_results(reader), - QueryResultsFormat::Json => Err(invalid_input_error( - "JSON SPARQL results format parsing has not been implemented yet", - )), //TODO: implement + QueryResultsFormat::Json => read_json_results(reader), QueryResultsFormat::Csv => Err(invalid_input_error( - "CSV and TSV SPARQL results format parsing is not implemented", + "CSV SPARQL results format parsing is not implemented", )), QueryResultsFormat::Tsv => read_tsv_results(reader), } @@ -520,3 +518,73 @@ impl fmt::Display for VariableNameParseError { } impl Error for VariableNameParseError {} + +#[test] +fn test_serialization_rountrip() -> Result<(), EvaluationError> { + use std::io::Cursor; + use std::str; + + for format in &[ + QueryResultsFormat::Xml, + QueryResultsFormat::Json, + QueryResultsFormat::Tsv, + ] { + let results = vec![ + QueryResults::Boolean(true), + QueryResults::Boolean(false), + QueryResults::Solutions(QuerySolutionIter::new( + Rc::new(vec![ + Variable::new_unchecked("foo"), + Variable::new_unchecked("bar"), + ]), + Box::new( + vec![ + Ok(vec![None, None]), + Ok(vec![ + Some(NamedNode::new_unchecked("http://example.com").into()), + None, + ]), + Ok(vec![ + None, + Some(NamedNode::new_unchecked("http://example.com").into()), + ]), + Ok(vec![ + Some(BlankNode::new_unchecked("foo").into()), + Some(BlankNode::new_unchecked("bar").into()), + ]), + Ok(vec![Some(Literal::new_simple_literal("foo").into()), None]), + Ok(vec![ + Some( + Literal::new_language_tagged_literal_unchecked("foo", "fr").into(), + ), + None, + ]), + Ok(vec![ + Some(Literal::from(1).into()), + Some(Literal::from(true).into()), + ]), + Ok(vec![ + Some(Literal::from(1.33).into()), + Some(Literal::from(false).into()), + ]), + ] + .into_iter(), + ), + )), + ]; + + for ex in results { + let mut buffer = Vec::new(); + ex.write(&mut buffer, *format)?; + let ex2 = QueryResults::read(Cursor::new(buffer.clone()), *format)?; + let mut buffer2 = Vec::new(); + ex2.write(&mut buffer2, *format)?; + assert_eq!( + str::from_utf8(&buffer).unwrap(), + str::from_utf8(&buffer2).unwrap() + ); + } + } + + Ok(()) +} diff --git a/lib/src/sparql/xml_results.rs b/lib/src/sparql/xml_results.rs index 88a55d5f..e0ca3617 100644 --- a/lib/src/sparql/xml_results.rs +++ b/lib/src/sparql/xml_results.rs @@ -318,8 +318,7 @@ impl ResultsIterator { } let mut state = State::Start; - let mut new_bindings = Vec::default(); - new_bindings.resize(self.mapping.len(), None); + let mut new_bindings = vec![None; self.mapping.len()]; let mut current_var = None; let mut term: Option = None; @@ -474,13 +473,12 @@ impl ResultsIterator { State::Result => return Ok(Some(new_bindings)), State::Binding => { if let Some(var) = ¤t_var { - new_bindings[self.mapping[var]] = term.clone() + new_bindings[self.mapping[var]] = term.take() } else { return Err( invalid_data_error("No name found for tag").into() ); } - term = None; state = State::Result; } State::Uri | State::BNode => state = State::Binding, diff --git a/testsuite/tests/sparql.rs b/testsuite/tests/sparql.rs index 2b10b63f..f73b4f94 100644 --- a/testsuite/tests/sparql.rs +++ b/testsuite/tests/sparql.rs @@ -85,15 +85,12 @@ fn sparql11_query_w3c_evaluation_testsuite() -> Result<()> { "http://www.w3.org/2009/sparql/docs/tests/data-sparql11/syntax-query/manifest#test_61a", "http://www.w3.org/2009/sparql/docs/tests/data-sparql11/syntax-query/manifest#test_62a", "http://www.w3.org/2009/sparql/docs/tests/data-sparql11/syntax-query/manifest#test_65", - // SPARQL 1.1 JSON query results deserialization is not implemented yet - "http://www.w3.org/2009/sparql/docs/tests/data-sparql11/aggregates/manifest#agg-empty-group-count-1", - "http://www.w3.org/2009/sparql/docs/tests/data-sparql11/aggregates/manifest#agg-empty-group-count-2", //BNODE() scope is currently wrong "http://www.w3.org/2009/sparql/docs/tests/data-sparql11/functions/manifest#bnode01", //Property path with unbound graph name are not supported yet "http://www.w3.org/2009/sparql/docs/tests/data-sparql11/property-path/manifest#pp35", //SERVICE name from a BGP - "http://www.w3.org/2009/sparql/docs/tests/data-sparql11/service/manifest#service5" + "http://www.w3.org/2009/sparql/docs/tests/data-sparql11/service/manifest#service5", ], ) }