Adds JSON deserializer

Closes #47
pull/107/head
Tpt 4 years ago
parent 7b98d58b10
commit cde2672cdd
  1. 1
      lib/Cargo.toml
  2. 375
      lib/src/sparql/json_results.rs
  3. 78
      lib/src/sparql/model.rs
  4. 6
      lib/src/sparql/xml_results.rs
  5. 5
      testsuite/tests/sparql.rs

@ -45,6 +45,7 @@ sophia_api = { version = "0.6.2", optional = true }
http = "0.2" http = "0.2"
httparse = { version = "1", optional = true } httparse = { version = "1", optional = true }
native-tls = { version = "0.2", optional = true } native-tls = { version = "0.2", optional = true }
json-event-parser = "0.1"
[target.'cfg(target_arch = "wasm32")'.dependencies] [target.'cfg(target_arch = "wasm32")'.dependencies]
js-sys = "0.3" js-sys = "0.3"

@ -1,80 +1,89 @@
//! Implementation of [SPARQL Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) //! Implementation of [SPARQL Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/)
use crate::error::invalid_input_error; use crate::error::{invalid_data_error, invalid_input_error};
use crate::model::*; use crate::model::*;
use crate::sparql::error::EvaluationError; use crate::sparql::error::EvaluationError;
use crate::sparql::model::*; use crate::sparql::model::*;
use std::io::Write; use json_event_parser::{JsonEvent, JsonReader, JsonWriter};
use std::collections::BTreeMap;
use std::io;
use std::io::{BufRead, Write};
use std::rc::Rc;
pub fn write_json_results( pub fn write_json_results(results: QueryResults, sink: impl Write) -> Result<(), EvaluationError> {
results: QueryResults, let mut writer = JsonWriter::from_writer(sink);
mut sink: impl Write,
) -> Result<(), EvaluationError> {
match results { match results {
QueryResults::Boolean(value) => { QueryResults::Boolean(value) => {
sink.write_all(b"{\"head\":{},\"boolean\":")?; writer.write_event(JsonEvent::StartObject)?;
sink.write_all(if value { b"true" } else { b"false" })?; writer.write_event(JsonEvent::ObjectKey("head"))?;
sink.write_all(b"}")?; writer.write_event(JsonEvent::StartObject)?;
writer.write_event(JsonEvent::EndObject)?;
writer.write_event(JsonEvent::ObjectKey("boolean"))?;
writer.write_event(JsonEvent::Boolean(value))?;
writer.write_event(JsonEvent::EndObject)?;
Ok(()) Ok(())
} }
QueryResults::Solutions(solutions) => { QueryResults::Solutions(solutions) => {
sink.write_all(b"{\"head\":{\"vars\":[")?; writer.write_event(JsonEvent::StartObject)?;
let mut start_vars = true; writer.write_event(JsonEvent::ObjectKey("head"))?;
writer.write_event(JsonEvent::StartObject)?;
writer.write_event(JsonEvent::ObjectKey("vars"))?;
writer.write_event(JsonEvent::StartArray)?;
for variable in solutions.variables() { for variable in solutions.variables() {
if start_vars { writer.write_event(JsonEvent::String(variable.as_str()))?;
start_vars = false;
} else {
sink.write_all(b",")?;
}
write_escaped_json_string(variable.as_str(), &mut sink)?;
} }
sink.write_all(b"]},\"results\":{\"bindings\":[")?; writer.write_event(JsonEvent::EndArray)?;
let mut start_bindings = true; writer.write_event(JsonEvent::EndObject)?;
writer.write_event(JsonEvent::ObjectKey("results"))?;
writer.write_event(JsonEvent::StartObject)?;
writer.write_event(JsonEvent::ObjectKey("bindings"))?;
writer.write_event(JsonEvent::StartArray)?;
for solution in solutions { for solution in solutions {
if start_bindings { writer.write_event(JsonEvent::StartObject)?;
start_bindings = false;
} else {
sink.write_all(b",")?;
}
sink.write_all(b"{")?;
let solution = solution?; let solution = solution?;
let mut start_binding = true;
for (variable, value) in solution.iter() { for (variable, value) in solution.iter() {
if start_binding { writer.write_event(JsonEvent::ObjectKey(variable.as_str()))?;
start_binding = false;
} else {
sink.write_all(b",")?;
}
write_escaped_json_string(variable.as_str(), &mut sink)?;
match value { match value {
Term::NamedNode(uri) => { Term::NamedNode(uri) => {
sink.write_all(b":{\"type\":\"uri\",\"value\":")?; writer.write_event(JsonEvent::StartObject)?;
write_escaped_json_string(uri.as_str(), &mut sink)?; writer.write_event(JsonEvent::ObjectKey("type"))?;
sink.write_all(b"}")?; writer.write_event(JsonEvent::String("uri"))?;
writer.write_event(JsonEvent::ObjectKey("value"))?;
writer.write_event(JsonEvent::String(uri.as_str()))?;
writer.write_event(JsonEvent::EndObject)?;
} }
Term::BlankNode(bnode) => { Term::BlankNode(bnode) => {
sink.write_all(b":{\"type\":\"bnode\",\"value\":")?; writer.write_event(JsonEvent::StartObject)?;
write_escaped_json_string(bnode.as_str(), &mut sink)?; writer.write_event(JsonEvent::ObjectKey("type"))?;
sink.write_all(b"}")?; writer.write_event(JsonEvent::String("bnode"))?;
writer.write_event(JsonEvent::ObjectKey("value"))?;
writer.write_event(JsonEvent::String(bnode.as_str()))?;
writer.write_event(JsonEvent::EndObject)?;
} }
Term::Literal(literal) => { Term::Literal(literal) => {
sink.write_all(b":{\"type\":\"literal\",\"value\":")?; writer.write_event(JsonEvent::StartObject)?;
write_escaped_json_string(literal.value(), &mut sink)?; writer.write_event(JsonEvent::ObjectKey("type"))?;
writer.write_event(JsonEvent::String("literal"))?;
writer.write_event(JsonEvent::ObjectKey("value"))?;
writer.write_event(JsonEvent::String(literal.value()))?;
if let Some(language) = literal.language() { if let Some(language) = literal.language() {
sink.write_all(b",\"xml:lang\":")?; writer.write_event(JsonEvent::ObjectKey("xml:lang"))?;
write_escaped_json_string(language, &mut sink)?; writer.write_event(JsonEvent::String(language))?;
} else if !literal.is_plain() { } else if !literal.is_plain() {
sink.write_all(b",\"datatype\":")?; writer.write_event(JsonEvent::ObjectKey("datatype"))?;
write_escaped_json_string(literal.datatype().as_str(), &mut sink)?; writer
.write_event(JsonEvent::String(literal.datatype().as_str()))?;
} }
sink.write_all(b"}")?; writer.write_event(JsonEvent::EndObject)?;
} }
} }
} }
sink.write_all(b"}")?; writer.write_event(JsonEvent::EndObject)?;
} }
sink.write_all(b"]}}")?; writer.write_event(JsonEvent::EndArray)?;
writer.write_event(JsonEvent::EndObject)?;
writer.write_event(JsonEvent::EndObject)?;
Ok(()) Ok(())
} }
QueryResults::Graph(_) => Err(invalid_input_error( QueryResults::Graph(_) => Err(invalid_input_error(
@ -84,37 +93,253 @@ pub fn write_json_results(
} }
} }
fn write_escaped_json_string(s: &str, mut sink: impl Write) -> Result<(), EvaluationError> { pub fn read_json_results(source: impl BufRead + 'static) -> Result<QueryResults, io::Error> {
sink.write_all(b"\"")?; let mut reader = JsonReader::from_reader(source);
for c in s.chars() { let mut buffer = Vec::default();
match c { let mut variables = None;
'\\' => sink.write_all(b"\\\\"),
'"' => sink.write_all(b"\\\""), if reader.read_event(&mut buffer)? != JsonEvent::StartObject {
c => { return Err(invalid_data_error(
if c < char::from(32) { "SPARQL JSON results should be an object",
match c { ));
'\u{08}' => sink.write_all(b"\\b"), }
'\u{0C}' => sink.write_all(b"\\f"),
'\n' => sink.write_all(b"\\n"), loop {
'\r' => sink.write_all(b"\\r"), let event = reader.read_event(&mut buffer)?;
'\t' => sink.write_all(b"\\t"), match event {
c => { JsonEvent::ObjectKey(key) => match key {
let mut c = c as u8; "head" => variables = Some(read_head(&mut reader, &mut buffer)?),
let mut result = [b'\\', b'u', 0, 0, 0, 0]; "results" => {
for i in (2..6).rev() { if reader.read_event(&mut buffer)? != JsonEvent::StartObject {
let ch = c % 16; return Err(invalid_data_error("'results' should be an object"));
result[i] = ch + if ch < 10 { b'0' } else { b'A' }; }
c /= 16; if reader.read_event(&mut buffer)? != JsonEvent::ObjectKey("bindings") {
} return Err(invalid_data_error(
sink.write_all(&result) "'results' should contain a 'bindings' key",
));
}
if reader.read_event(&mut buffer)? != JsonEvent::StartArray {
return Err(invalid_data_error("'bindings' should be an object"));
}
return if let Some(variables) = variables {
let mut mapping = BTreeMap::default();
for (i, var) in variables.iter().enumerate() {
mapping.insert(var.clone(), i);
} }
Ok(QueryResults::Solutions(QuerySolutionIter::new(
Rc::new(
variables
.into_iter()
.map(Variable::new)
.collect::<Result<Vec<_>, _>>()
.map_err(invalid_data_error)?,
),
Box::new(ResultsIterator {
reader,
buffer,
mapping,
}),
)))
} else {
Err(invalid_data_error(
"SPARQL tuple query results should contain a head key",
))
};
}
"boolean" => {
return if let JsonEvent::Boolean(v) = reader.read_event(&mut buffer)? {
Ok(QueryResults::Boolean(v))
} else {
Err(invalid_data_error("Unexpected boolean value"))
} }
} else {
write!(sink, "{}", c)
} }
_ => {
return Err(invalid_data_error(format!(
"Expecting head or result key, found {}",
key
)));
}
},
JsonEvent::EndObject => {
return Err(invalid_data_error(
"SPARQL results should contain a bindings key or a boolean key",
))
} }
}?; JsonEvent::Eof => return Err(io::Error::from(io::ErrorKind::UnexpectedEof)),
_ => return Err(invalid_data_error("Invalid SPARQL results serialization")),
}
}
}
fn read_head<R: BufRead>(
reader: &mut JsonReader<R>,
buffer: &mut Vec<u8>,
) -> io::Result<Vec<String>> {
if reader.read_event(buffer)? != JsonEvent::StartObject {
return Err(invalid_data_error("head should be an object"));
}
let mut variables = None;
loop {
match reader.read_event(buffer)? {
JsonEvent::ObjectKey(key) => match key {
"vars" => variables = Some(read_string_array(reader, buffer)?),
"link" => {
read_string_array(reader, buffer)?;
}
_ => {
return Err(invalid_data_error(format!(
"Unexpected key in head: '{}'",
key
)))
}
},
JsonEvent::EndObject => return Ok(variables.unwrap_or_else(Vec::new)),
_ => return Err(invalid_data_error("Invalid head serialization")),
}
}
}
fn read_string_array<R: BufRead>(
reader: &mut JsonReader<R>,
buffer: &mut Vec<u8>,
) -> io::Result<Vec<String>> {
if reader.read_event(buffer)? != JsonEvent::StartArray {
return Err(invalid_data_error("Variable list should be an array"));
}
let mut elements = Vec::new();
loop {
match reader.read_event(buffer)? {
JsonEvent::String(s) => {
elements.push(s.into());
}
JsonEvent::EndArray => return Ok(elements),
_ => return Err(invalid_data_error("Variable names should be strings")),
}
}
}
struct ResultsIterator<R: BufRead> {
reader: JsonReader<R>,
buffer: Vec<u8>,
mapping: BTreeMap<String, usize>,
}
impl<R: BufRead> Iterator for ResultsIterator<R> {
type Item = Result<Vec<Option<Term>>, EvaluationError>;
fn next(&mut self) -> Option<Result<Vec<Option<Term>>, EvaluationError>> {
self.read_next().map_err(EvaluationError::from).transpose()
}
}
impl<R: BufRead> ResultsIterator<R> {
fn read_next(&mut self) -> io::Result<Option<Vec<Option<Term>>>> {
let mut new_bindings = vec![None; self.mapping.len()];
loop {
match self.reader.read_event(&mut self.buffer)? {
JsonEvent::StartObject => (),
JsonEvent::EndObject => return Ok(Some(new_bindings)),
JsonEvent::EndArray | JsonEvent::Eof => return Ok(None),
JsonEvent::ObjectKey(key) => {
let k = *self.mapping.get(key).ok_or_else(|| {
invalid_data_error(format!(
"The variable {} has not been defined in the header",
key
))
})?;
new_bindings[k] = Some(self.read_value()?)
}
_ => return Err(invalid_data_error("Invalid result serialization")),
}
}
}
fn read_value(&mut self) -> io::Result<Term> {
enum Type {
Uri,
BNode,
Literal,
}
enum State {
Type,
Value,
Lang,
Datatype,
}
let mut state = None;
let mut t = None;
let mut value = None;
let mut lang = None;
let mut datatype = None;
if self.reader.read_event(&mut self.buffer)? != JsonEvent::StartObject {
return Err(invalid_data_error(
"Term serializations should be an object",
));
}
loop {
match self.reader.read_event(&mut self.buffer)? {
JsonEvent::ObjectKey(key) => match key {
"type" => state = Some(State::Type),
"value" => state = Some(State::Value),
"xml:lang" => state = Some(State::Lang),
"datatype" => state = Some(State::Datatype),
_ => {
return Err(invalid_data_error(format!(
"Unexpected key in term serialization: '{}'",
key
)))
}
},
JsonEvent::String(s) => match state {
None => (), // impossible
Some(State::Type) => match s {
"uri" => t = Some(Type::Uri),
"bnode" => t = Some(Type::BNode),
"literal" => t = Some(Type::Literal),
_ => {
return Err(invalid_data_error(format!(
"Unexpected term type: '{}'",
s
)))
}
},
Some(State::Value) => value = Some(s.to_owned()),
Some(State::Lang) => lang = Some(s.to_owned()),
Some(State::Datatype) => datatype = Some(s.to_owned()),
},
JsonEvent::EndObject => {
let value = value.ok_or_else(|| {
invalid_data_error("Term serialization should have a value key")
})?;
return match t {
None => Err(invalid_data_error(
"Term serialization should have a type key",
)),
Some(Type::Uri) => Ok(NamedNode::new(value)
.map_err(|e| invalid_data_error(format!("Invalid uri value: {}", e)))?
.into()),
Some(Type::BNode) => Ok(BlankNode::new(value)
.map_err(|e| invalid_data_error(format!("Invalid bnode value: {}", e)))?
.into()),
Some(Type::Literal) => Ok(match datatype {
Some(datatype) => Literal::new_typed_literal(
value,
NamedNode::new(datatype).map_err(|e| {
invalid_data_error(format!("Invalid datatype value: {}", e))
})?,
),
None => match lang {
Some(lang) => Literal::new_language_tagged_literal(value, lang)
.map_err(|e| {
invalid_data_error(format!("Invalid xml:lang value: {}", e))
})?,
None => Literal::new_simple_literal(value),
},
}
.into()),
};
}
_ => return Err(invalid_data_error("Invalid term serialization")),
}
}
} }
sink.write_all(b"\"")?;
Ok(())
} }

@ -4,7 +4,7 @@ use crate::io::GraphSerializer;
use crate::model::*; use crate::model::*;
use crate::sparql::csv_results::{read_tsv_results, write_csv_results, write_tsv_results}; use crate::sparql::csv_results::{read_tsv_results, write_csv_results, write_tsv_results};
use crate::sparql::error::EvaluationError; use crate::sparql::error::EvaluationError;
use crate::sparql::json_results::write_json_results; use crate::sparql::json_results::{read_json_results, write_json_results};
use crate::sparql::xml_results::{read_xml_results, write_xml_results}; use crate::sparql::xml_results::{read_xml_results, write_xml_results};
use rand::random; use rand::random;
use std::error::Error; use std::error::Error;
@ -30,11 +30,9 @@ impl QueryResults {
) -> Result<Self, io::Error> { ) -> Result<Self, io::Error> {
match format { match format {
QueryResultsFormat::Xml => read_xml_results(reader), QueryResultsFormat::Xml => read_xml_results(reader),
QueryResultsFormat::Json => Err(invalid_input_error( QueryResultsFormat::Json => read_json_results(reader),
"JSON SPARQL results format parsing has not been implemented yet",
)), //TODO: implement
QueryResultsFormat::Csv => Err(invalid_input_error( QueryResultsFormat::Csv => Err(invalid_input_error(
"CSV and TSV SPARQL results format parsing is not implemented", "CSV SPARQL results format parsing is not implemented",
)), )),
QueryResultsFormat::Tsv => read_tsv_results(reader), QueryResultsFormat::Tsv => read_tsv_results(reader),
} }
@ -520,3 +518,73 @@ impl fmt::Display for VariableNameParseError {
} }
impl Error for VariableNameParseError {} impl Error for VariableNameParseError {}
#[test]
fn test_serialization_rountrip() -> Result<(), EvaluationError> {
use std::io::Cursor;
use std::str;
for format in &[
QueryResultsFormat::Xml,
QueryResultsFormat::Json,
QueryResultsFormat::Tsv,
] {
let results = vec![
QueryResults::Boolean(true),
QueryResults::Boolean(false),
QueryResults::Solutions(QuerySolutionIter::new(
Rc::new(vec![
Variable::new_unchecked("foo"),
Variable::new_unchecked("bar"),
]),
Box::new(
vec![
Ok(vec![None, None]),
Ok(vec![
Some(NamedNode::new_unchecked("http://example.com").into()),
None,
]),
Ok(vec![
None,
Some(NamedNode::new_unchecked("http://example.com").into()),
]),
Ok(vec![
Some(BlankNode::new_unchecked("foo").into()),
Some(BlankNode::new_unchecked("bar").into()),
]),
Ok(vec![Some(Literal::new_simple_literal("foo").into()), None]),
Ok(vec![
Some(
Literal::new_language_tagged_literal_unchecked("foo", "fr").into(),
),
None,
]),
Ok(vec![
Some(Literal::from(1).into()),
Some(Literal::from(true).into()),
]),
Ok(vec![
Some(Literal::from(1.33).into()),
Some(Literal::from(false).into()),
]),
]
.into_iter(),
),
)),
];
for ex in results {
let mut buffer = Vec::new();
ex.write(&mut buffer, *format)?;
let ex2 = QueryResults::read(Cursor::new(buffer.clone()), *format)?;
let mut buffer2 = Vec::new();
ex2.write(&mut buffer2, *format)?;
assert_eq!(
str::from_utf8(&buffer).unwrap(),
str::from_utf8(&buffer2).unwrap()
);
}
}
Ok(())
}

@ -318,8 +318,7 @@ impl<R: BufRead> ResultsIterator<R> {
} }
let mut state = State::Start; let mut state = State::Start;
let mut new_bindings = Vec::default(); let mut new_bindings = vec![None; self.mapping.len()];
new_bindings.resize(self.mapping.len(), None);
let mut current_var = None; let mut current_var = None;
let mut term: Option<Term> = None; let mut term: Option<Term> = None;
@ -474,13 +473,12 @@ impl<R: BufRead> ResultsIterator<R> {
State::Result => return Ok(Some(new_bindings)), State::Result => return Ok(Some(new_bindings)),
State::Binding => { State::Binding => {
if let Some(var) = &current_var { if let Some(var) = &current_var {
new_bindings[self.mapping[var]] = term.clone() new_bindings[self.mapping[var]] = term.take()
} else { } else {
return Err( return Err(
invalid_data_error("No name found for <binding> tag").into() invalid_data_error("No name found for <binding> tag").into()
); );
} }
term = None;
state = State::Result; state = State::Result;
} }
State::Uri | State::BNode => state = State::Binding, State::Uri | State::BNode => state = State::Binding,

@ -85,15 +85,12 @@ fn sparql11_query_w3c_evaluation_testsuite() -> Result<()> {
"http://www.w3.org/2009/sparql/docs/tests/data-sparql11/syntax-query/manifest#test_61a", "http://www.w3.org/2009/sparql/docs/tests/data-sparql11/syntax-query/manifest#test_61a",
"http://www.w3.org/2009/sparql/docs/tests/data-sparql11/syntax-query/manifest#test_62a", "http://www.w3.org/2009/sparql/docs/tests/data-sparql11/syntax-query/manifest#test_62a",
"http://www.w3.org/2009/sparql/docs/tests/data-sparql11/syntax-query/manifest#test_65", "http://www.w3.org/2009/sparql/docs/tests/data-sparql11/syntax-query/manifest#test_65",
// SPARQL 1.1 JSON query results deserialization is not implemented yet
"http://www.w3.org/2009/sparql/docs/tests/data-sparql11/aggregates/manifest#agg-empty-group-count-1",
"http://www.w3.org/2009/sparql/docs/tests/data-sparql11/aggregates/manifest#agg-empty-group-count-2",
//BNODE() scope is currently wrong //BNODE() scope is currently wrong
"http://www.w3.org/2009/sparql/docs/tests/data-sparql11/functions/manifest#bnode01", "http://www.w3.org/2009/sparql/docs/tests/data-sparql11/functions/manifest#bnode01",
//Property path with unbound graph name are not supported yet //Property path with unbound graph name are not supported yet
"http://www.w3.org/2009/sparql/docs/tests/data-sparql11/property-path/manifest#pp35", "http://www.w3.org/2009/sparql/docs/tests/data-sparql11/property-path/manifest#pp35",
//SERVICE name from a BGP //SERVICE name from a BGP
"http://www.w3.org/2009/sparql/docs/tests/data-sparql11/service/manifest#service5" "http://www.w3.org/2009/sparql/docs/tests/data-sparql11/service/manifest#service5",
], ],
) )
} }

Loading…
Cancel
Save