From 01a33192eb966aa78c34658aac2ef533b8c244be Mon Sep 17 00:00:00 2001 From: Tpt Date: Sat, 18 Dec 2021 22:12:59 +0100 Subject: [PATCH] Adds a public API for SPARQL query results I/O --- lib/src/io/read.rs | 69 +++- lib/src/io/write.rs | 7 +- lib/src/sparql/csv_results.rs | 307 -------------- lib/src/sparql/io/csv.rs | 306 ++++++++++++++ lib/src/sparql/io/json.rs | 703 +++++++++++++++++++++++++++++++++ lib/src/sparql/io/mod.rs | 337 ++++++++++++++++ lib/src/sparql/io/xml.rs | 597 ++++++++++++++++++++++++++++ lib/src/sparql/json_results.rs | 455 --------------------- lib/src/sparql/mod.rs | 15 +- lib/src/sparql/model.rs | 170 ++------ lib/src/sparql/xml_results.rs | 674 ------------------------------- 11 files changed, 2055 insertions(+), 1585 deletions(-) delete mode 100644 lib/src/sparql/csv_results.rs create mode 100644 lib/src/sparql/io/csv.rs create mode 100644 lib/src/sparql/io/json.rs create mode 100644 lib/src/sparql/io/mod.rs create mode 100644 lib/src/sparql/io/xml.rs delete mode 100644 lib/src/sparql/json_results.rs delete mode 100644 lib/src/sparql/xml_results.rs diff --git a/lib/src/io/read.rs b/lib/src/io/read.rs index 61ee8c3c..e4763c9e 100644 --- a/lib/src/io/read.rs +++ b/lib/src/io/read.rs @@ -38,7 +38,7 @@ pub struct GraphParser { } impl GraphParser { - /// Builds a parser for the given format + /// Builds a parser for the given format. pub fn from_format(format: GraphFormat) -> Self { Self { format, @@ -46,7 +46,7 @@ impl GraphParser { } } - /// Provides an IRI that could be used to resolve the file relative IRIs + /// Provides an IRI that could be used to resolve the file relative IRIs. /// /// ``` /// use oxigraph::io::{GraphFormat, GraphParser}; @@ -66,7 +66,7 @@ impl GraphParser { Ok(self) } - /// Executes the parsing itself on a [`BufRead`](std::io::BufRead) implementation and returns an iterator of triples + /// Executes the parsing itself on a [`BufRead`](std::io::BufRead) implementation and returns an iterator of triples. #[allow(clippy::unnecessary_wraps)] pub fn read_triples(&self, reader: R) -> Result, ParserError> { Ok(TripleReader { @@ -187,7 +187,7 @@ pub struct DatasetParser { } impl DatasetParser { - /// Builds a parser for the given format + /// Builds a parser for the given format. pub fn from_format(format: DatasetFormat) -> Self { Self { format, @@ -195,7 +195,7 @@ impl DatasetParser { } } - /// Provides an IRI that could be used to resolve the file relative IRIs + /// Provides an IRI that could be used to resolve the file relative IRIs. /// /// ``` /// use oxigraph::io::{DatasetFormat, DatasetParser}; @@ -215,7 +215,7 @@ impl DatasetParser { Ok(self) } - /// Executes the parsing itself on a [`BufRead`](std::io::BufRead) implementation and returns an iterator of quads + /// Executes the parsing itself on a [`BufRead`](std::io::BufRead) implementation and returns an iterator of quads. #[allow(clippy::unnecessary_wraps)] pub fn read_quads(&self, reader: R) -> Result, ParserError> { Ok(QuadReader { @@ -442,12 +442,26 @@ impl From for ParserError { } } +impl From for ParserError { + fn from(error: TermParseError) -> Self { + Self::Syntax(SyntaxError { + inner: SyntaxErrorKind::Term(error), + }) + } +} + impl From for ParserError { fn from(error: io::Error) -> Self { Self::Io(error) } } +impl From for ParserError { + fn from(error: SyntaxError) -> Self { + Self::Syntax(error) + } +} + impl From for io::Error { fn from(error: ParserError) -> Self { match error { @@ -457,17 +471,40 @@ impl From for io::Error { } } +impl From for ParserError { + fn from(error: quick_xml::Error) -> Self { + match error { + quick_xml::Error::Io(error) => Self::Io(error), + error => Self::Syntax(SyntaxError { + inner: SyntaxErrorKind::Xml(error), + }), + } + } +} + /// An error in the syntax of the parsed file #[derive(Debug)] pub struct SyntaxError { - inner: SyntaxErrorKind, + pub(crate) inner: SyntaxErrorKind, } #[derive(Debug)] -enum SyntaxErrorKind { +pub(crate) enum SyntaxErrorKind { Turtle(TurtleError), RdfXml(RdfXmlError), BaseIri { iri: String, error: IriParseError }, + Xml(quick_xml::Error), + Term(TermParseError), + Msg { msg: String }, +} + +impl SyntaxError { + /// Builds an error from a printable error message. + pub(crate) fn msg(msg: impl Into) -> Self { + Self { + inner: SyntaxErrorKind::Msg { msg: msg.into() }, + } + } } impl fmt::Display for SyntaxError { @@ -478,6 +515,9 @@ impl fmt::Display for SyntaxError { SyntaxErrorKind::BaseIri { iri, error } => { write!(f, "Invalid base IRI '{}': {}", iri, error) } + SyntaxErrorKind::Xml(e) => e.fmt(f), + SyntaxErrorKind::Term(e) => e.fmt(f), + SyntaxErrorKind::Msg { msg } => f.write_str(msg), } } } @@ -487,7 +527,9 @@ impl Error for SyntaxError { match &self.inner { SyntaxErrorKind::Turtle(e) => Some(e), SyntaxErrorKind::RdfXml(e) => Some(e), - SyntaxErrorKind::BaseIri { .. } => None, + SyntaxErrorKind::Xml(e) => Some(e), + SyntaxErrorKind::Term(e) => Some(e), + SyntaxErrorKind::BaseIri { .. } | SyntaxErrorKind::Msg { .. } => None, } } } @@ -501,6 +543,15 @@ impl From for io::Error { io::ErrorKind::InvalidInput, format!("Invalid IRI '{}': {}", iri, error), ), + SyntaxErrorKind::Xml(error) => match error { + quick_xml::Error::Io(error) => error, + quick_xml::Error::UnexpectedEof(error) => { + Self::new(io::ErrorKind::UnexpectedEof, error) + } + error => Self::new(io::ErrorKind::InvalidData, error), + }, + SyntaxErrorKind::Term(error) => Self::new(io::ErrorKind::InvalidData, error), + SyntaxErrorKind::Msg { msg } => Self::new(io::ErrorKind::InvalidData, msg), } } } diff --git a/lib/src/io/write.rs b/lib/src/io/write.rs index 11c10824..1c689c0b 100644 --- a/lib/src/io/write.rs +++ b/lib/src/io/write.rs @@ -6,8 +6,7 @@ use crate::model::*; use rio_api::formatter::TriplesFormatter; use rio_api::model as rio; use rio_xml::RdfXmlFormatter; -use std::io; -use std::io::Write; +use std::io::{self, Write}; /// A serializer for RDF graph serialization formats. /// @@ -43,7 +42,7 @@ impl GraphSerializer { Self { format } } - /// Returns a `TripleWriter` allowing writing triples into the given [`Write`](std::io::Write) implementation + /// Returns a [`TripleWriter`] allowing writing triples into the given [`Write`](std::io::Write) implementation pub fn triple_writer(&self, writer: W) -> io::Result> { Ok(TripleWriter { formatter: match self.format { @@ -182,7 +181,7 @@ impl DatasetSerializer { Self { format } } - /// Returns a `QuadWriter` allowing writing triples into the given [`Write`](std::io::Write) implementation + /// Returns a [`QuadWriter`] allowing writing triples into the given [`Write`](std::io::Write) implementation #[allow(clippy::unnecessary_wraps)] pub fn quad_writer(&self, writer: W) -> io::Result> { Ok(QuadWriter { diff --git a/lib/src/sparql/csv_results.rs b/lib/src/sparql/csv_results.rs deleted file mode 100644 index d2d3a87f..00000000 --- a/lib/src/sparql/csv_results.rs +++ /dev/null @@ -1,307 +0,0 @@ -//! Implementation of [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/) - -use crate::error::invalid_data_error; -use crate::model::{vocab::xsd, *}; -use crate::sparql::error::EvaluationError; -use crate::sparql::model::*; -use std::io::{self, BufRead, Write}; -use std::rc::Rc; -use std::str::FromStr; - -pub fn write_csv_results( - results: QueryResults, - mut sink: impl Write, -) -> Result<(), EvaluationError> { - match results { - QueryResults::Boolean(value) => { - sink.write_all(if value { b"true" } else { b"false" })?; - } - QueryResults::Solutions(solutions) => { - let mut start_vars = true; - for variable in solutions.variables() { - if start_vars { - start_vars = false; - } else { - sink.write_all(b",")?; - } - sink.write_all(variable.as_str().as_bytes())?; - } - - for solution in solutions { - let solution = solution?; - sink.write_all(b"\r\n")?; - let mut start_binding = true; - for value in solution.values() { - if start_binding { - start_binding = false; - } else { - sink.write_all(b",")?; - } - if let Some(value) = value { - write_csv_term(value, &mut sink)?; - } - } - } - } - QueryResults::Graph(g) => { - sink.write_all(b"subject,predicate,object")?; - for t in g { - let t = t?; - sink.write_all(b"\r\n")?; - write_csv_term(&t.subject, &mut sink)?; - sink.write_all(b",")?; - write_csv_term(&t.predicate, &mut sink)?; - sink.write_all(b",")?; - write_csv_term(&t.object, &mut sink)?; - } - } - } - Ok(()) -} - -fn write_csv_term<'a>(term: impl Into>, sink: &mut impl Write) -> io::Result<()> { - match term.into() { - TermRef::NamedNode(uri) => sink.write_all(uri.as_str().as_bytes()), - TermRef::BlankNode(bnode) => { - sink.write_all(b"_:")?; - sink.write_all(bnode.as_str().as_bytes()) - } - TermRef::Literal(literal) => write_escaped_csv_string(literal.value(), sink), - TermRef::Triple(triple) => { - write_csv_term(&triple.subject, sink)?; - sink.write_all(b" ")?; - write_csv_term(&triple.predicate, sink)?; - sink.write_all(b" ")?; - write_csv_term(&triple.object, sink) - } - } -} - -fn write_escaped_csv_string(s: &str, sink: &mut impl Write) -> io::Result<()> { - if s.bytes().any(|c| matches!(c, b'"' | b',' | b'\n' | b'\r')) { - sink.write_all(b"\"")?; - for c in s.bytes() { - if c == b'\"' { - sink.write_all(b"\"\"") - } else { - sink.write_all(&[c]) - }?; - } - sink.write_all(b"\"") - } else { - sink.write_all(s.as_bytes()) - } -} - -pub fn write_tsv_results( - results: QueryResults, - mut sink: impl Write, -) -> Result<(), EvaluationError> { - match results { - QueryResults::Boolean(value) => { - sink.write_all(if value { b"true" } else { b"false" })?; - } - QueryResults::Solutions(solutions) => { - let mut start_vars = true; - for variable in solutions.variables() { - if start_vars { - start_vars = false; - } else { - sink.write_all(b"\t")?; - } - sink.write_all(b"?")?; - sink.write_all(variable.as_str().as_bytes())?; - } - - for solution in solutions { - let solution = solution?; - sink.write_all(b"\n")?; - let mut start_binding = true; - for value in solution.values() { - if start_binding { - start_binding = false; - } else { - sink.write_all(b"\t")?; - } - if let Some(value) = value { - write_tsv_term(value, &mut sink)?; - } - } - } - } - QueryResults::Graph(g) => { - sink.write_all(b"subject\tpredicate\tobject")?; - for t in g { - let t = t?; - sink.write_all(b"\n")?; - write_tsv_term(&t.subject, &mut sink)?; - sink.write_all(b"\t")?; - write_tsv_term(&t.predicate, &mut sink)?; - sink.write_all(b"\t")?; - write_tsv_term(&t.object, &mut sink)?; - } - } - } - Ok(()) -} - -fn write_tsv_term<'a>(term: impl Into>, sink: &mut impl Write) -> io::Result<()> { - //TODO: full Turtle serialization - match term.into() { - TermRef::NamedNode(node) => write!(sink, "<{}>", node.as_str()), - TermRef::BlankNode(node) => write!(sink, "_:{}", node.as_str()), - TermRef::Literal(literal) => match literal.datatype() { - xsd::BOOLEAN => match literal.value() { - "true" | "1" => sink.write_all(b"true"), - "false" | "0" => sink.write_all(b"false"), - _ => sink.write_all(literal.to_string().as_bytes()), - }, - xsd::INTEGER => { - if literal.value().bytes().all(|c| matches!(c, b'0'..=b'9')) { - sink.write_all(literal.value().as_bytes()) - } else { - sink.write_all(literal.to_string().as_bytes()) - } - } - _ => sink.write_all(literal.to_string().as_bytes()), - }, - TermRef::Triple(triple) => { - sink.write_all(b"<<")?; - write_tsv_term(&triple.subject, sink)?; - sink.write_all(b" ")?; - write_tsv_term(&triple.predicate, sink)?; - sink.write_all(b" ")?; - write_tsv_term(&triple.object, sink)?; - sink.write_all(b">>")?; - Ok(()) - } - } -} - -pub fn read_tsv_results(mut source: impl BufRead + 'static) -> io::Result { - let mut buffer = String::new(); - - // We read the header - source.read_line(&mut buffer)?; - if buffer.trim().eq_ignore_ascii_case("true") { - return Ok(QueryResults::Boolean(true)); - } - if buffer.trim().eq_ignore_ascii_case("false") { - return Ok(QueryResults::Boolean(false)); - } - let variables = buffer - .split('\t') - .map(|v| Variable::from_str(v.trim()).map_err(invalid_data_error)) - .collect::>>()?; - - Ok(QueryResults::Solutions(QuerySolutionIter::new( - Rc::new(variables), - Box::new(TsvResultsIterator { source, buffer }), - ))) -} - -struct TsvResultsIterator { - source: R, - buffer: String, -} - -impl Iterator for TsvResultsIterator { - type Item = Result>, EvaluationError>; - - fn next(&mut self) -> Option>, EvaluationError>> { - self.read_next().transpose() - } -} - -impl TsvResultsIterator { - fn read_next(&mut self) -> Result>>, EvaluationError> { - self.buffer.clear(); - if self.source.read_line(&mut self.buffer)? == 0 { - return Ok(None); - } - Ok(Some( - self.buffer - .split('\t') - .map(|v| { - let v = v.trim(); - if v.is_empty() { - Ok(None) - } else { - Ok(Some(Term::from_str(v).map_err(invalid_data_error)?)) - } - }) - .collect::, EvaluationError>>()?, - )) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::rc::Rc; - use std::str; - - fn build_example() -> QueryResults { - QuerySolutionIter::new( - Rc::new(vec![ - Variable::new_unchecked("x"), - Variable::new_unchecked("literal"), - ]), - Box::new( - vec![ - Ok(vec![ - Some(NamedNode::new_unchecked("http://example/x").into()), - Some(Literal::new_simple_literal("String").into()), - ]), - Ok(vec![ - Some(NamedNode::new_unchecked("http://example/x").into()), - Some(Literal::new_simple_literal("String-with-dquote\"").into()), - ]), - Ok(vec![ - Some(BlankNode::new_unchecked("b0").into()), - Some(Literal::new_simple_literal("Blank node").into()), - ]), - Ok(vec![ - None, - Some(Literal::new_simple_literal("Missing 'x'").into()), - ]), - Ok(vec![None, None]), - Ok(vec![ - Some(NamedNode::new_unchecked("http://example/x").into()), - None, - ]), - Ok(vec![ - Some(BlankNode::new_unchecked("b1").into()), - Some( - Literal::new_language_tagged_literal_unchecked( - "String-with-lang", - "en", - ) - .into(), - ), - ]), - Ok(vec![ - Some(BlankNode::new_unchecked("b1").into()), - Some(Literal::new_typed_literal("123", xsd::INTEGER).into()), - ]), - ] - .into_iter(), - ), - ) - .into() - } - - #[test] - fn test_csv_serialization() { - let mut sink = Vec::new(); - write_csv_results(build_example(), &mut sink).unwrap(); - assert_eq!(str::from_utf8(&sink).unwrap(), "x,literal\r\nhttp://example/x,String\r\nhttp://example/x,\"String-with-dquote\"\"\"\r\n_:b0,Blank node\r\n,Missing 'x'\r\n,\r\nhttp://example/x,\r\n_:b1,String-with-lang\r\n_:b1,123"); - } - - #[test] - fn test_tsv_serialization() { - let mut sink = Vec::new(); - write_tsv_results(build_example(), &mut sink).unwrap(); - assert_eq!(str::from_utf8(&sink).unwrap(), "?x\t?literal\n\t\"String\"\n\t\"String-with-dquote\\\"\"\n_:b0\t\"Blank node\"\n\t\"Missing 'x'\"\n\t\n\t\n_:b1\t\"String-with-lang\"@en\n_:b1\t123"); - } -} diff --git a/lib/src/sparql/io/csv.rs b/lib/src/sparql/io/csv.rs new file mode 100644 index 00000000..80a4a7f5 --- /dev/null +++ b/lib/src/sparql/io/csv.rs @@ -0,0 +1,306 @@ +//! Implementation of [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/) + +use crate::io::read::{ParserError, SyntaxError}; +use crate::model::{vocab::xsd, *}; +use crate::sparql::model::Variable; +use std::io::{self, BufRead, Write}; +use std::str::FromStr; + +pub fn write_boolean_csv_result(mut sink: W, value: bool) -> io::Result { + sink.write_all(if value { b"true" } else { b"false" })?; + Ok(sink) +} + +pub struct CsvSolutionsWriter { + sink: W, +} + +impl CsvSolutionsWriter { + pub fn start(mut sink: W, variables: &[Variable]) -> io::Result { + let mut start_vars = true; + for variable in variables { + if start_vars { + start_vars = false; + } else { + sink.write_all(b",")?; + } + sink.write_all(variable.as_str().as_bytes())?; + } + Ok(Self { sink }) + } + + pub fn write<'a>( + &mut self, + solution: impl IntoIterator>>, + ) -> io::Result<()> { + self.sink.write_all(b"\r\n")?; + let mut start_binding = true; + for value in solution { + if start_binding { + start_binding = false; + } else { + self.sink.write_all(b",")?; + } + if let Some(value) = value { + write_csv_term(value, &mut self.sink)?; + } + } + Ok(()) + } + + pub fn finish(self) -> W { + self.sink + } +} + +fn write_csv_term<'a>(term: impl Into>, sink: &mut impl Write) -> io::Result<()> { + match term.into() { + TermRef::NamedNode(uri) => sink.write_all(uri.as_str().as_bytes()), + TermRef::BlankNode(bnode) => { + sink.write_all(b"_:")?; + sink.write_all(bnode.as_str().as_bytes()) + } + TermRef::Literal(literal) => write_escaped_csv_string(literal.value(), sink), + TermRef::Triple(triple) => { + write_csv_term(&triple.subject, sink)?; + sink.write_all(b" ")?; + write_csv_term(&triple.predicate, sink)?; + sink.write_all(b" ")?; + write_csv_term(&triple.object, sink) + } + } +} + +fn write_escaped_csv_string(s: &str, sink: &mut impl Write) -> io::Result<()> { + if s.bytes().any(|c| matches!(c, b'"' | b',' | b'\n' | b'\r')) { + sink.write_all(b"\"")?; + for c in s.bytes() { + if c == b'\"' { + sink.write_all(b"\"\"") + } else { + sink.write_all(&[c]) + }?; + } + sink.write_all(b"\"") + } else { + sink.write_all(s.as_bytes()) + } +} + +pub fn write_boolean_tsv_result(mut sink: W, value: bool) -> io::Result { + sink.write_all(if value { b"true" } else { b"false" })?; + Ok(sink) +} + +pub struct TsvSolutionsWriter { + sink: W, +} + +impl TsvSolutionsWriter { + pub fn start(mut sink: W, variables: &[Variable]) -> io::Result { + let mut start_vars = true; + for variable in variables { + if start_vars { + start_vars = false; + } else { + sink.write_all(b"\t")?; + } + sink.write_all(b"?")?; + sink.write_all(variable.as_str().as_bytes())?; + } + Ok(Self { sink }) + } + + pub fn write<'a>( + &mut self, + solution: impl IntoIterator>>, + ) -> io::Result<()> { + self.sink.write_all(b"\n")?; + let mut start_binding = true; + for value in solution { + if start_binding { + start_binding = false; + } else { + self.sink.write_all(b"\t")?; + } + if let Some(value) = value { + write_tsv_term(value, &mut self.sink)?; + } + } + Ok(()) + } + + pub fn finish(self) -> W { + self.sink + } +} + +fn write_tsv_term<'a>(term: impl Into>, sink: &mut impl Write) -> io::Result<()> { + //TODO: full Turtle serialization + match term.into() { + TermRef::NamedNode(node) => write!(sink, "<{}>", node.as_str()), + TermRef::BlankNode(node) => write!(sink, "_:{}", node.as_str()), + TermRef::Literal(literal) => match literal.datatype() { + xsd::BOOLEAN => match literal.value() { + "true" | "1" => sink.write_all(b"true"), + "false" | "0" => sink.write_all(b"false"), + _ => sink.write_all(literal.to_string().as_bytes()), + }, + xsd::INTEGER => { + if literal.value().bytes().all(|c| matches!(c, b'0'..=b'9')) { + sink.write_all(literal.value().as_bytes()) + } else { + sink.write_all(literal.to_string().as_bytes()) + } + } + _ => sink.write_all(literal.to_string().as_bytes()), + }, + TermRef::Triple(triple) => { + sink.write_all(b"<<")?; + write_tsv_term(&triple.subject, sink)?; + sink.write_all(b" ")?; + write_tsv_term(&triple.predicate, sink)?; + sink.write_all(b" ")?; + write_tsv_term(&triple.object, sink)?; + sink.write_all(b">>")?; + Ok(()) + } + } +} + +pub enum TsvQueryResultsReader { + Solutions { + variables: Vec, + solutions: TsvSolutionsReader, + }, + Boolean(bool), +} + +impl TsvQueryResultsReader { + pub fn read(mut source: R) -> Result { + let mut buffer = String::new(); + + // We read the header + source.read_line(&mut buffer)?; + if buffer.trim().eq_ignore_ascii_case("true") { + return Ok(Self::Boolean(true)); + } + if buffer.trim().eq_ignore_ascii_case("false") { + return Ok(Self::Boolean(false)); + } + let variables = buffer + .split('\t') + .map(|v| { + Variable::from_str(v.trim()) + .map_err(|e| SyntaxError::msg(format!("Invalid variable name '{}': {}", v, e))) + }) + .collect::, _>>()?; + + Ok(Self::Solutions { + variables, + solutions: TsvSolutionsReader { source, buffer }, + }) + } +} + +pub struct TsvSolutionsReader { + source: R, + buffer: String, +} + +impl TsvSolutionsReader { + pub fn read_next(&mut self) -> Result>>, ParserError> { + self.buffer.clear(); + if self.source.read_line(&mut self.buffer)? == 0 { + return Ok(None); + } + Ok(Some( + self.buffer + .split('\t') + .map(|v| { + let v = v.trim(); + if v.is_empty() { + Ok(None) + } else { + Ok(Some( + Term::from_str(v).map_err(|e| SyntaxError::msg(e.to_string()))?, + )) + } + }) + .collect::>()?, + )) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::str; + + fn build_example() -> (Vec, Vec>>) { + ( + vec![ + Variable::new_unchecked("x"), + Variable::new_unchecked("literal"), + ], + vec![ + vec![ + Some(NamedNode::new_unchecked("http://example/x").into()), + Some(Literal::new_simple_literal("String").into()), + ], + vec![ + Some(NamedNode::new_unchecked("http://example/x").into()), + Some(Literal::new_simple_literal("String-with-dquote\"").into()), + ], + vec![ + Some(BlankNode::new_unchecked("b0").into()), + Some(Literal::new_simple_literal("Blank node").into()), + ], + vec![ + None, + Some(Literal::new_simple_literal("Missing 'x'").into()), + ], + vec![None, None], + vec![ + Some(NamedNode::new_unchecked("http://example/x").into()), + None, + ], + vec![ + Some(BlankNode::new_unchecked("b1").into()), + Some( + Literal::new_language_tagged_literal_unchecked("String-with-lang", "en") + .into(), + ), + ], + vec![ + Some(BlankNode::new_unchecked("b1").into()), + Some(Literal::new_typed_literal("123", xsd::INTEGER).into()), + ], + ], + ) + } + + #[test] + fn test_csv_serialization() -> io::Result<()> { + let (variables, solutions) = build_example(); + let mut writer = CsvSolutionsWriter::start(Vec::new(), &variables)?; + for solution in &solutions { + writer.write(solution.iter().map(|t| t.as_ref().map(|t| t.as_ref())))?; + } + let result = writer.finish(); + assert_eq!(str::from_utf8(&result).unwrap(), "x,literal\r\nhttp://example/x,String\r\nhttp://example/x,\"String-with-dquote\"\"\"\r\n_:b0,Blank node\r\n,Missing 'x'\r\n,\r\nhttp://example/x,\r\n_:b1,String-with-lang\r\n_:b1,123"); + Ok(()) + } + + #[test] + fn test_tsv_serialization() -> io::Result<()> { + let (variables, solutions) = build_example(); + let mut writer = TsvSolutionsWriter::start(Vec::new(), &variables)?; + for solution in &solutions { + writer.write(solution.iter().map(|t| t.as_ref().map(|t| t.as_ref())))?; + } + let result = writer.finish(); + assert_eq!(str::from_utf8(&result).unwrap(), "?x\t?literal\n\t\"String\"\n\t\"String-with-dquote\\\"\"\n_:b0\t\"Blank node\"\n\t\"Missing 'x'\"\n\t\n\t\n_:b1\t\"String-with-lang\"@en\n_:b1\t123"); + Ok(()) + } +} diff --git a/lib/src/sparql/io/json.rs b/lib/src/sparql/io/json.rs new file mode 100644 index 00000000..f6271058 --- /dev/null +++ b/lib/src/sparql/io/json.rs @@ -0,0 +1,703 @@ +//! Implementation of [SPARQL Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) + +use crate::io::read::{ParserError, SyntaxError}; +use crate::model::vocab::rdf; +use crate::model::*; +use crate::sparql::error::EvaluationError; +use crate::sparql::model::Variable; +use json_event_parser::{JsonEvent, JsonReader, JsonWriter}; +use std::collections::BTreeMap; +use std::io::{self, BufRead, Write}; + +pub fn write_boolean_json_result(sink: W, value: bool) -> io::Result { + let mut writer = JsonWriter::from_writer(sink); + writer.write_event(JsonEvent::StartObject)?; + writer.write_event(JsonEvent::ObjectKey("head"))?; + writer.write_event(JsonEvent::StartObject)?; + writer.write_event(JsonEvent::EndObject)?; + writer.write_event(JsonEvent::ObjectKey("boolean"))?; + writer.write_event(JsonEvent::Boolean(value))?; + writer.write_event(JsonEvent::EndObject)?; + Ok(writer.into_inner()) +} + +pub struct JsonSolutionsWriter { + writer: JsonWriter, + variables: Vec, +} + +impl JsonSolutionsWriter { + pub fn start(sink: W, variables: &[Variable]) -> io::Result { + let mut writer = JsonWriter::from_writer(sink); + writer.write_event(JsonEvent::StartObject)?; + writer.write_event(JsonEvent::ObjectKey("head"))?; + writer.write_event(JsonEvent::StartObject)?; + writer.write_event(JsonEvent::ObjectKey("vars"))?; + writer.write_event(JsonEvent::StartArray)?; + for variable in variables { + writer.write_event(JsonEvent::String(variable.as_str()))?; + } + writer.write_event(JsonEvent::EndArray)?; + writer.write_event(JsonEvent::EndObject)?; + writer.write_event(JsonEvent::ObjectKey("results"))?; + writer.write_event(JsonEvent::StartObject)?; + writer.write_event(JsonEvent::ObjectKey("bindings"))?; + writer.write_event(JsonEvent::StartArray)?; + Ok(Self { + writer, + variables: variables.to_vec(), + }) + } + + pub fn write<'a>( + &mut self, + solution: impl IntoIterator>>, + ) -> io::Result<()> { + self.writer.write_event(JsonEvent::StartObject)?; + for (value, variable) in solution.into_iter().zip(&self.variables) { + if let Some(value) = value { + self.writer + .write_event(JsonEvent::ObjectKey(variable.as_str()))?; + write_json_term(value, &mut self.writer)?; + } + } + self.writer.write_event(JsonEvent::EndObject)?; + Ok(()) + } + + pub fn finish(mut self) -> io::Result { + self.writer.write_event(JsonEvent::EndArray)?; + self.writer.write_event(JsonEvent::EndObject)?; + self.writer.write_event(JsonEvent::EndObject)?; + Ok(self.writer.into_inner()) + } +} + +fn write_json_term( + term: TermRef<'_>, + writer: &mut JsonWriter, +) -> Result<(), EvaluationError> { + match term { + TermRef::NamedNode(uri) => { + writer.write_event(JsonEvent::StartObject)?; + writer.write_event(JsonEvent::ObjectKey("type"))?; + writer.write_event(JsonEvent::String("uri"))?; + writer.write_event(JsonEvent::ObjectKey("value"))?; + writer.write_event(JsonEvent::String(uri.as_str()))?; + writer.write_event(JsonEvent::EndObject)?; + } + TermRef::BlankNode(bnode) => { + writer.write_event(JsonEvent::StartObject)?; + writer.write_event(JsonEvent::ObjectKey("type"))?; + writer.write_event(JsonEvent::String("bnode"))?; + writer.write_event(JsonEvent::ObjectKey("value"))?; + writer.write_event(JsonEvent::String(bnode.as_str()))?; + writer.write_event(JsonEvent::EndObject)?; + } + TermRef::Literal(literal) => { + writer.write_event(JsonEvent::StartObject)?; + writer.write_event(JsonEvent::ObjectKey("type"))?; + writer.write_event(JsonEvent::String("literal"))?; + writer.write_event(JsonEvent::ObjectKey("value"))?; + writer.write_event(JsonEvent::String(literal.value()))?; + if let Some(language) = literal.language() { + writer.write_event(JsonEvent::ObjectKey("xml:lang"))?; + writer.write_event(JsonEvent::String(language))?; + } else if !literal.is_plain() { + writer.write_event(JsonEvent::ObjectKey("datatype"))?; + writer.write_event(JsonEvent::String(literal.datatype().as_str()))?; + } + writer.write_event(JsonEvent::EndObject)?; + } + TermRef::Triple(triple) => { + writer.write_event(JsonEvent::StartObject)?; + writer.write_event(JsonEvent::ObjectKey("type"))?; + writer.write_event(JsonEvent::String("triple"))?; + writer.write_event(JsonEvent::ObjectKey("value"))?; + writer.write_event(JsonEvent::StartObject)?; + writer.write_event(JsonEvent::ObjectKey("subject"))?; + write_json_term(triple.subject.as_ref().into(), writer)?; + writer.write_event(JsonEvent::ObjectKey("predicate"))?; + write_json_term(triple.predicate.as_ref().into(), writer)?; + writer.write_event(JsonEvent::ObjectKey("object"))?; + write_json_term(triple.object.as_ref(), writer)?; + writer.write_event(JsonEvent::EndObject)?; + writer.write_event(JsonEvent::EndObject)?; + } + } + Ok(()) +} + +pub enum JsonQueryResultsReader { + Solutions { + variables: Vec, + solutions: JsonSolutionsReader, + }, + Boolean(bool), +} + +impl JsonQueryResultsReader { + pub fn read(source: R) -> Result { + let mut reader = JsonReader::from_reader(source); + let mut buffer = Vec::default(); + let mut variables = None; + + if reader.read_event(&mut buffer)? != JsonEvent::StartObject { + return Err(SyntaxError::msg("SPARQL JSON results should be an object").into()); + } + + loop { + let event = reader.read_event(&mut buffer)?; + match event { + JsonEvent::ObjectKey(key) => match key { + "head" => variables = Some(read_head(&mut reader, &mut buffer)?), + "results" => { + if reader.read_event(&mut buffer)? != JsonEvent::StartObject { + return Err(SyntaxError::msg("'results' should be an object").into()); + } + if reader.read_event(&mut buffer)? != JsonEvent::ObjectKey("bindings") { + return Err(SyntaxError::msg( + "'results' should contain a 'bindings' key", + ) + .into()); + } + if reader.read_event(&mut buffer)? != JsonEvent::StartArray { + return Err(SyntaxError::msg("'bindings' should be an object").into()); + } + return if let Some(variables) = variables { + let mut mapping = BTreeMap::default(); + for (i, var) in variables.iter().enumerate() { + mapping.insert(var.clone(), i); + } + Ok(Self::Solutions { + variables: variables + .into_iter() + .map(|v| { + Variable::new(v).map_err(|e| { + SyntaxError::msg(format!( + "Invalid variable name: {}", + e + )) + }) + }) + .collect::, _>>()?, + solutions: JsonSolutionsReader { + reader, + buffer, + mapping, + }, + }) + } else { + Err(SyntaxError::msg( + "SPARQL tuple query results should contain a head key", + ) + .into()) + }; + } + "boolean" => { + return if let JsonEvent::Boolean(v) = reader.read_event(&mut buffer)? { + Ok(Self::Boolean(v)) + } else { + Err(SyntaxError::msg("Unexpected boolean value").into()) + } + } + _ => { + return Err(SyntaxError::msg(format!( + "Expecting head or result key, found {}", + key + )) + .into()); + } + }, + JsonEvent::EndObject => { + return Err(SyntaxError::msg( + "SPARQL results should contain a bindings key or a boolean key", + ) + .into()) + } + JsonEvent::Eof => { + return Err(SyntaxError::msg( + "Unexpected end of JSON object without 'results' or 'boolean' key", + ) + .into()) + } + _ => return Err(SyntaxError::msg("Invalid SPARQL results serialization").into()), + } + } + } +} + +pub struct JsonSolutionsReader { + reader: JsonReader, + buffer: Vec, + mapping: BTreeMap, +} + +impl JsonSolutionsReader { + pub fn read_next(&mut self) -> Result>>, ParserError> { + let mut new_bindings = vec![None; self.mapping.len()]; + loop { + match self.reader.read_event(&mut self.buffer)? { + JsonEvent::StartObject => (), + JsonEvent::EndObject => return Ok(Some(new_bindings)), + JsonEvent::EndArray | JsonEvent::Eof => return Ok(None), + JsonEvent::ObjectKey(key) => { + let k = *self.mapping.get(key).ok_or_else(|| { + SyntaxError::msg(format!( + "The variable {} has not been defined in the header", + key + )) + })?; + new_bindings[k] = Some(self.read_value()?) + } + _ => return Err(SyntaxError::msg("Invalid result serialization").into()), + } + } + } + + fn read_value(&mut self) -> Result { + enum Type { + Uri, + BNode, + Literal, + Triple, + } + #[derive(Eq, PartialEq)] + enum State { + Type, + Value, + Lang, + Datatype, + } + let mut state = None; + let mut t = None; + let mut value = None; + let mut lang = None; + let mut datatype = None; + let mut subject = None; + let mut predicate = None; + let mut object = None; + if self.reader.read_event(&mut self.buffer)? != JsonEvent::StartObject { + return Err(SyntaxError::msg("Term serializations should be an object").into()); + } + loop { + match self.reader.read_event(&mut self.buffer)? { + JsonEvent::ObjectKey(key) => match key { + "type" => state = Some(State::Type), + "value" => state = Some(State::Value), + "xml:lang" => state = Some(State::Lang), + "datatype" => state = Some(State::Datatype), + "subject" => subject = Some(self.read_value()?), + "predicate" => predicate = Some(self.read_value()?), + "object" => object = Some(self.read_value()?), + _ => { + return Err(SyntaxError::msg(format!( + "Unexpected key in term serialization: '{}'", + key + )) + .into()) + } + }, + JsonEvent::StartObject => { + if state != Some(State::Value) { + return Err(SyntaxError::msg( + "Unexpected nested object in term serialization", + ) + .into()); + } + } + JsonEvent::String(s) => match state { + Some(State::Type) => { + match s { + "uri" => t = Some(Type::Uri), + "bnode" => t = Some(Type::BNode), + "literal" => t = Some(Type::Literal), + "triple" => t = Some(Type::Triple), + _ => { + return Err(SyntaxError::msg(format!( + "Unexpected term type: '{}'", + s + )) + .into()) + } + }; + state = None; + } + Some(State::Value) => { + value = Some(s.to_owned()); + state = None; + } + Some(State::Lang) => { + lang = Some(s.to_owned()); + state = None; + } + Some(State::Datatype) => { + datatype = Some(NamedNode::new(s).map_err(|e| { + SyntaxError::msg(format!("Invalid datatype IRI: {}", e)) + })?); + state = None; + } + _ => (), // impossible + }, + JsonEvent::EndObject => { + if let Some(s) = state { + if s == State::Value { + state = None; //End of triple + } else { + return Err(SyntaxError::msg( + "Term description values should be string", + ) + .into()); + } + } else { + return match t { + None => Err(SyntaxError::msg( + "Term serialization should have a 'type' key", + ) + .into()), + Some(Type::Uri) => Ok(NamedNode::new(value.ok_or_else(|| { + SyntaxError::msg("uri serialization should have a 'value' key") + })?) + .map_err(|e| SyntaxError::msg(format!("Invalid uri value: {}", e)))? + .into()), + Some(Type::BNode) => Ok(BlankNode::new(value.ok_or_else(|| { + SyntaxError::msg("bnode serialization should have a 'value' key") + })?) + .map_err(|e| SyntaxError::msg(format!("Invalid bnode value: {}", e)))? + .into()), + Some(Type::Literal) => { + let value = value.ok_or_else(|| { + SyntaxError::msg( + "literal serialization should have a 'value' key", + ) + })?; + Ok(match lang { + Some(lang) => { + if let Some(datatype) = datatype { + if datatype.as_ref() != rdf::LANG_STRING { + return Err(SyntaxError::msg(format!( + "xml:lang value '{}' provided with the datatype {}", + lang, datatype + )).into()) + } + } + Literal::new_language_tagged_literal(value, &lang).map_err(|e| { + SyntaxError::msg(format!("Invalid xml:lang value '{}': {}", lang, e)) + })? + } + None => if let Some(datatype) = datatype { + Literal::new_typed_literal(value, datatype) + } else { + Literal::new_simple_literal(value) + } + } + .into()) + } + Some(Type::Triple) => Ok(Triple::new( + match subject.ok_or_else(|| { + SyntaxError::msg( + "triple serialization should have a 'subject' key", + ) + })? { + Term::NamedNode(subject) => subject.into(), + Term::BlankNode(subject) => subject.into(), + Term::Triple(subject) => Subject::Triple(subject), + Term::Literal(_) => { + return Err(SyntaxError::msg( + "The 'subject' value should not be a literal", + ) + .into()) + } + }, + match predicate.ok_or_else(|| { + SyntaxError::msg( + "triple serialization should have a 'predicate' key", + ) + })? { + Term::NamedNode(predicate) => predicate, + _ => { + return Err(SyntaxError::msg( + "The 'predicate' value should be a uri", + ) + .into()) + } + }, + object.ok_or_else(|| { + SyntaxError::msg( + "triple serialization should have a 'object' key", + ) + })?, + ) + .into()), + }; + } + } + _ => return Err(SyntaxError::msg("Invalid term serialization").into()), + } + } + } +} + +fn read_head( + reader: &mut JsonReader, + buffer: &mut Vec, +) -> Result, ParserError> { + if reader.read_event(buffer)? != JsonEvent::StartObject { + return Err(SyntaxError::msg("head should be an object").into()); + } + let mut variables = None; + loop { + match reader.read_event(buffer)? { + JsonEvent::ObjectKey(key) => match key { + "vars" => variables = Some(read_string_array(reader, buffer)?), + "link" => { + read_string_array(reader, buffer)?; + } + _ => { + return Err( + SyntaxError::msg(format!("Unexpected key in head: '{}'", key)).into(), + ) + } + }, + JsonEvent::EndObject => return Ok(variables.unwrap_or_else(Vec::new)), + _ => return Err(SyntaxError::msg("Invalid head serialization").into()), + } + } +} + +fn read_string_array( + reader: &mut JsonReader, + buffer: &mut Vec, +) -> Result, ParserError> { + if reader.read_event(buffer)? != JsonEvent::StartArray { + return Err(SyntaxError::msg("Variable list should be an array").into()); + } + let mut elements = Vec::new(); + loop { + match reader.read_event(buffer)? { + JsonEvent::String(s) => { + elements.push(s.into()); + } + JsonEvent::EndArray => return Ok(elements), + _ => return Err(SyntaxError::msg("Variable names should be strings").into()), + } + } +} + +struct ResultsIterator { + reader: JsonReader, + buffer: Vec, + mapping: BTreeMap, +} + +impl Iterator for ResultsIterator { + type Item = Result>, EvaluationError>; + + fn next(&mut self) -> Option>, EvaluationError>> { + self.read_next().map_err(EvaluationError::from).transpose() + } +} + +impl ResultsIterator { + fn read_next(&mut self) -> Result>>, ParserError> { + let mut new_bindings = vec![None; self.mapping.len()]; + loop { + match self.reader.read_event(&mut self.buffer)? { + JsonEvent::StartObject => (), + JsonEvent::EndObject => return Ok(Some(new_bindings)), + JsonEvent::EndArray | JsonEvent::Eof => return Ok(None), + JsonEvent::ObjectKey(key) => { + let k = *self.mapping.get(key).ok_or_else(|| { + SyntaxError::msg(format!( + "The variable {} has not been defined in the header", + key + )) + })?; + new_bindings[k] = Some(self.read_value()?) + } + _ => return Err(SyntaxError::msg("Invalid result serialization").into()), + } + } + } + fn read_value(&mut self) -> Result { + enum Type { + Uri, + BNode, + Literal, + Triple, + } + #[derive(Eq, PartialEq)] + enum State { + Type, + Value, + Lang, + Datatype, + } + let mut state = None; + let mut t = None; + let mut value = None; + let mut lang = None; + let mut datatype = None; + let mut subject = None; + let mut predicate = None; + let mut object = None; + if self.reader.read_event(&mut self.buffer)? != JsonEvent::StartObject { + return Err(SyntaxError::msg("Term serializations should be an object").into()); + } + loop { + match self.reader.read_event(&mut self.buffer)? { + JsonEvent::ObjectKey(key) => match key { + "type" => state = Some(State::Type), + "value" => state = Some(State::Value), + "xml:lang" => state = Some(State::Lang), + "datatype" => state = Some(State::Datatype), + "subject" => subject = Some(self.read_value()?), + "predicate" => predicate = Some(self.read_value()?), + "object" => object = Some(self.read_value()?), + _ => { + return Err(SyntaxError::msg(format!( + "Unexpected key in term serialization: '{}'", + key + )) + .into()) + } + }, + JsonEvent::StartObject => { + if state != Some(State::Value) { + return Err(SyntaxError::msg( + "Unexpected nested object in term serialization", + ) + .into()); + } + } + JsonEvent::String(s) => match state { + Some(State::Type) => { + match s { + "uri" => t = Some(Type::Uri), + "bnode" => t = Some(Type::BNode), + "literal" => t = Some(Type::Literal), + "triple" => t = Some(Type::Triple), + _ => { + return Err(SyntaxError::msg(format!( + "Unexpected term type: '{}'", + s + )) + .into()) + } + }; + state = None; + } + Some(State::Value) => { + value = Some(s.to_owned()); + state = None; + } + Some(State::Lang) => { + lang = Some(s.to_owned()); + state = None; + } + Some(State::Datatype) => { + datatype = Some(NamedNode::new(s).map_err(|e| { + SyntaxError::msg(format!("Invalid datatype value: {}", e)) + })?); + state = None; + } + _ => (), // impossible + }, + JsonEvent::EndObject => { + if let Some(s) = state { + if s == State::Value { + state = None; //End of triple + } else { + return Err(SyntaxError::msg( + "Term description values should be string", + ) + .into()); + } + } else { + return match t { + None => Err(SyntaxError::msg( + "Term serialization should have a 'type' key", + ) + .into()), + Some(Type::Uri) => Ok(NamedNode::new(value.ok_or_else(|| { + SyntaxError::msg("uri serialization should have a 'value' key") + })?) + .map_err(|e| SyntaxError::msg(format!("Invalid uri value: {}", e)))? + .into()), + Some(Type::BNode) => Ok(BlankNode::new(value.ok_or_else(|| { + SyntaxError::msg("bnode serialization should have a 'value' key") + })?) + .map_err(|e| SyntaxError::msg(format!("Invalid bnode value: {}", e)))? + .into()), + Some(Type::Literal) => { + let value = value.ok_or_else(|| { + SyntaxError::msg( + "literal serialization should have a 'value' key", + ) + })?; + Ok(match lang { + Some(lang) => { + if let Some(datatype) = datatype { + if datatype.as_ref() != rdf::LANG_STRING { + return Err(SyntaxError::msg(format!( + "xml:lang value '{}' provided with the datatype {}", + lang, datatype + )).into()) + } + } + Literal::new_language_tagged_literal(value, &lang).map_err(|e| { + SyntaxError::msg(format!("Invalid xml:lang value '{}': {}", lang, e)) + })? + } + None => if let Some(datatype) = datatype { + Literal::new_typed_literal(value, datatype) + } else { + Literal::new_simple_literal(value) + } + } + .into()) + } + Some(Type::Triple) => Ok(Triple::new( + match subject.ok_or_else(|| { + SyntaxError::msg( + "triple serialization should have a 'subject' key", + ) + })? { + Term::NamedNode(subject) => subject.into(), + Term::BlankNode(subject) => subject.into(), + Term::Triple(subject) => Subject::Triple(subject), + Term::Literal(_) => { + return Err(SyntaxError::msg( + "The 'subject' value should not be a literal", + ) + .into()) + } + }, + match predicate.ok_or_else(|| { + SyntaxError::msg( + "triple serialization should have a 'predicate' key", + ) + })? { + Term::NamedNode(predicate) => predicate, + _ => { + return Err(SyntaxError::msg( + "The 'predicate' value should be a uri", + ) + .into()) + } + }, + object.ok_or_else(|| { + SyntaxError::msg( + "triple serialization should have a 'object' key", + ) + })?, + ) + .into()), + }; + } + } + _ => return Err(SyntaxError::msg("Invalid term serialization").into()), + } + } + } +} diff --git a/lib/src/sparql/io/mod.rs b/lib/src/sparql/io/mod.rs new file mode 100644 index 00000000..4c84e26e --- /dev/null +++ b/lib/src/sparql/io/mod.rs @@ -0,0 +1,337 @@ +mod csv; +mod json; +mod xml; + +use crate::io::read::{ParserError, SyntaxError}; +use crate::model::{Term, TermRef}; +use crate::sparql::io::csv::*; +use crate::sparql::io::json::*; +use crate::sparql::io::xml::*; +use crate::sparql::{EvaluationError, QueryResults, QuerySolution, QuerySolutionIter, Variable}; +use std::io::{self, BufRead, Write}; +use std::rc::Rc; + +/// [SPARQL query](https://www.w3.org/TR/sparql11-query/) results serialization formats. +#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] +#[non_exhaustive] +pub enum QueryResultsFormat { + /// [SPARQL Query Results XML Format](http://www.w3.org/TR/rdf-sparql-XMLres/) + Xml, + /// [SPARQL Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) + Json, + /// [SPARQL Query Results CSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/) + Csv, + /// [SPARQL Query Results TSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/) + Tsv, +} + +impl QueryResultsFormat { + /// The format canonical IRI according to the [Unique URIs for file formats registry](https://www.w3.org/ns/formats/). + /// + /// ``` + /// use oxigraph::sparql::QueryResultsFormat; + /// + /// assert_eq!(QueryResultsFormat::Json.iri(), "http://www.w3.org/ns/formats/SPARQL_Results_JSON") + /// ``` + #[inline] + pub fn iri(self) -> &'static str { + match self { + QueryResultsFormat::Xml => "http://www.w3.org/ns/formats/SPARQL_Results_XML", + QueryResultsFormat::Json => "http://www.w3.org/ns/formats/SPARQL_Results_JSON", + QueryResultsFormat::Csv => "http://www.w3.org/ns/formats/SPARQL_Results_CSV", + QueryResultsFormat::Tsv => "http://www.w3.org/ns/formats/SPARQL_Results_TSV", + } + } + /// The format [IANA media type](https://tools.ietf.org/html/rfc2046). + /// + /// ``` + /// use oxigraph::sparql::QueryResultsFormat; + /// + /// assert_eq!(QueryResultsFormat::Json.media_type(), "application/sparql-results+json") + /// ``` + #[inline] + pub fn media_type(self) -> &'static str { + match self { + QueryResultsFormat::Xml => "application/sparql-results+xml", + QueryResultsFormat::Json => "application/sparql-results+json", + QueryResultsFormat::Csv => "text/csv; charset=utf-8", + QueryResultsFormat::Tsv => "text/tab-separated-values; charset=utf-8", + } + } + + /// The format [IANA-registered](https://tools.ietf.org/html/rfc2046) file extension. + /// + /// ``` + /// use oxigraph::sparql::QueryResultsFormat; + /// + /// assert_eq!(QueryResultsFormat::Json.file_extension(), "srj") + /// ``` + #[inline] + pub fn file_extension(self) -> &'static str { + match self { + QueryResultsFormat::Xml => "srx", + QueryResultsFormat::Json => "srj", + QueryResultsFormat::Csv => "csv", + QueryResultsFormat::Tsv => "tsv", + } + } + + /// Looks for a known format from a media type. + /// + /// It supports some media type aliases. + /// For example "application/xml" is going to return `Xml` even if it is not its canonical media type. + /// + /// Example: + /// ``` + /// use oxigraph::sparql::QueryResultsFormat; + /// + /// assert_eq!(QueryResultsFormat::from_media_type("application/sparql-results+json; charset=utf-8"), Some(QueryResultsFormat::Json)) + /// ``` + pub fn from_media_type(media_type: &str) -> Option { + match media_type.split(';').next()?.trim() { + "application/sparql-results+xml" | "application/xml" | "text/xml" => Some(Self::Xml), + "application/sparql-results+json" | "application/json" | "text/json" => { + Some(Self::Json) + } + "text/csv" => Some(Self::Csv), + "text/tab-separated-values" | "text/tsv" => Some(Self::Tsv), + _ => None, + } + } + + /// Looks for a known format from an extension. + /// + /// It supports some aliases. + /// + /// Example: + /// ``` + /// use oxigraph::sparql::QueryResultsFormat; + /// + /// assert_eq!(QueryResultsFormat::from_extension("json"), Some(QueryResultsFormat::Json)) + /// ``` + pub fn from_extension(extension: &str) -> Option { + match extension { + "srx" | "xml" => Some(Self::Xml), + "srj" | "json" => Some(Self::Json), + "csv" | "txt" => Some(Self::Csv), + "tsv" => Some(Self::Tsv), + _ => None, + } + } +} + +/// Parsers for [SPARQL query](https://www.w3.org/TR/sparql11-query/) results serialization formats. +/// +/// It currently supports the following formats: +/// * [SPARQL Query Results XML Format](http://www.w3.org/TR/rdf-sparql-XMLres/) ([`QueryResultsFormat::Xml`](QueryResultsFormat::Xml)) +/// * [SPARQL Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) ([`QueryResultsFormat::Json`](QueryResultsFormat::Json)) +/// * [SPARQL Query Results TSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/) ([`QueryResultsFormat::Tsv`](QueryResultsFormat::Tsv)) +#[allow(missing_copy_implementations)] +pub struct QueryResultsParser { + format: QueryResultsFormat, +} + +impl QueryResultsParser { + /// Builds a parser for the given format. + pub fn from_format(format: QueryResultsFormat) -> Self { + Self { format } + } + + pub fn read_results( + &self, + reader: R, + ) -> Result, ParserError> { + Ok(match self.format { + QueryResultsFormat::Xml => match XmlQueryResultsReader::read(reader)? { + XmlQueryResultsReader::Boolean(r) => QueryResultsReader::Boolean(r), + XmlQueryResultsReader::Solutions { + solutions, + variables, + } => QueryResultsReader::Solutions(SolutionsReader { + variables: Rc::new(variables), + solutions: SolutionsReaderKind::Xml(solutions), + }), + }, + QueryResultsFormat::Json => match JsonQueryResultsReader::read(reader)? { + JsonQueryResultsReader::Boolean(r) => QueryResultsReader::Boolean(r), + JsonQueryResultsReader::Solutions { + solutions, + variables, + } => QueryResultsReader::Solutions(SolutionsReader { + variables: Rc::new(variables), + solutions: SolutionsReaderKind::Json(solutions), + }), + }, + QueryResultsFormat::Csv => return Err(SyntaxError::msg("CSV SPARQL results syntax is lossy and can't be parsed to a proper RDF representation").into()), + QueryResultsFormat::Tsv => match TsvQueryResultsReader::read(reader)? { + TsvQueryResultsReader::Boolean(r) => QueryResultsReader::Boolean(r), + TsvQueryResultsReader::Solutions { + solutions, + variables, + } => QueryResultsReader::Solutions(SolutionsReader { + variables: Rc::new(variables), + solutions: SolutionsReaderKind::Tsv(solutions), + }), + }, + }) + } +} + +pub enum QueryResultsReader { + Solutions(SolutionsReader), + Boolean(bool), +} + +pub struct SolutionsReader { + variables: Rc>, + solutions: SolutionsReaderKind, +} + +enum SolutionsReaderKind { + Xml(XmlSolutionsReader), + Json(JsonSolutionsReader), + Tsv(TsvSolutionsReader), +} + +impl SolutionsReader { + #[inline] + pub fn variables(&self) -> &[Variable] { + &self.variables + } +} + +impl Iterator for SolutionsReaderKind { + type Item = Result>, ParserError>; + + fn next(&mut self) -> Option>, ParserError>> { + match self { + Self::Xml(reader) => reader.read_next(), + Self::Json(reader) => reader.read_next(), + Self::Tsv(reader) => reader.read_next(), + } + .transpose() + } +} + +impl Iterator for SolutionsReader { + type Item = Result; + + fn next(&mut self) -> Option> { + Some(self.solutions.next()?.map(|values| QuerySolution { + values, + variables: self.variables.clone(), + })) + } +} + +impl From> for QuerySolutionIter { + fn from(reader: SolutionsReader) -> Self { + Self::new( + reader.variables.clone(), + Box::new(reader.solutions.map(|r| r.map_err(EvaluationError::from))), + ) + } +} + +impl From> for QueryResults { + fn from(reader: QueryResultsReader) -> Self { + match reader { + QueryResultsReader::Solutions(s) => Self::Solutions(s.into()), + QueryResultsReader::Boolean(v) => Self::Boolean(v), + } + } +} + +/// A serializer for [SPARQL query](https://www.w3.org/TR/sparql11-query/) results serialization formats. +/// +/// It currently supports the following formats: +/// * [SPARQL Query Results XML Format](http://www.w3.org/TR/rdf-sparql-XMLres/) ([`QueryResultsFormat::Xml`](QueryResultsFormat::Xml)) +/// * [SPARQL Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) ([`QueryResultsFormat::Json`](QueryResultsFormat::Json)) +/// * [SPARQL Query Results CSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/) ([`QueryResultsFormat::Csv`](QueryResultsFormat::Csv)) +/// * [SPARQL Query Results TSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/) ([`QueryResultsFormat::Tsv`](QueryResultsFormat::Tsv)) +#[allow(missing_copy_implementations)] +pub struct QueryResultsSerializer { + format: QueryResultsFormat, +} + +impl QueryResultsSerializer { + /// Builds a serializer for the given format + pub fn from_format(format: QueryResultsFormat) -> Self { + Self { format } + } + + pub fn write_boolean_result(&self, writer: W, value: bool) -> io::Result { + match self.format { + QueryResultsFormat::Xml => write_boolean_xml_result(writer, value), + QueryResultsFormat::Json => write_boolean_json_result(writer, value), + QueryResultsFormat::Csv => write_boolean_csv_result(writer, value), + QueryResultsFormat::Tsv => write_boolean_tsv_result(writer, value), + } + } + + /// Returns a `SolutionsWriter` allowing writing query solutions into the given [`Write`](std::io::Write) implementation + pub fn solutions_writer( + &self, + writer: W, + variables: &[Variable], + ) -> io::Result> { + Ok(SolutionsWriter { + formatter: match self.format { + QueryResultsFormat::Xml => { + SolutionsWriterKind::Xml(XmlSolutionsWriter::start(writer, variables)?) + } + QueryResultsFormat::Json => { + SolutionsWriterKind::Json(JsonSolutionsWriter::start(writer, variables)?) + } + QueryResultsFormat::Csv => { + SolutionsWriterKind::Csv(CsvSolutionsWriter::start(writer, variables)?) + } + QueryResultsFormat::Tsv => { + SolutionsWriterKind::Tsv(TsvSolutionsWriter::start(writer, variables)?) + } + }, + }) + } +} + +/// Allows writing query results. +/// Could be built using a [`QueryResultsSerializer`]. +/// +/// Warning: Do not forget to run the [`finish`](SolutionsWriter::finish()) method to properly write the last bytes of the file. +#[must_use] +pub struct SolutionsWriter { + formatter: SolutionsWriterKind, +} + +enum SolutionsWriterKind { + Xml(XmlSolutionsWriter), + Json(JsonSolutionsWriter), + Csv(CsvSolutionsWriter), + Tsv(TsvSolutionsWriter), +} + +impl SolutionsWriter { + /// Writes a solution + pub fn write<'a>( + &mut self, + solution: impl IntoIterator>>, + ) -> io::Result<()> { + match &mut self.formatter { + SolutionsWriterKind::Xml(writer) => writer.write(solution), + SolutionsWriterKind::Json(writer) => writer.write(solution), + SolutionsWriterKind::Csv(writer) => writer.write(solution), + SolutionsWriterKind::Tsv(writer) => writer.write(solution), + } + } + + /// Writes the last bytes of the file + pub fn finish(self) -> io::Result<()> { + match self.formatter { + SolutionsWriterKind::Xml(write) => write.finish()?, + SolutionsWriterKind::Json(write) => write.finish()?, + SolutionsWriterKind::Csv(write) => write.finish(), + SolutionsWriterKind::Tsv(write) => write.finish(), + }; + Ok(()) + } +} diff --git a/lib/src/sparql/io/xml.rs b/lib/src/sparql/io/xml.rs new file mode 100644 index 00000000..742a81f2 --- /dev/null +++ b/lib/src/sparql/io/xml.rs @@ -0,0 +1,597 @@ +//! Implementation of [SPARQL Query Results XML Format](http://www.w3.org/TR/rdf-sparql-XMLres/) + +use crate::io::read::{ParserError, SyntaxError}; +use crate::model::vocab::rdf; +use crate::model::*; +use crate::sparql::model::Variable; +use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event}; +use quick_xml::Reader; +use quick_xml::Writer; +use std::collections::BTreeMap; +use std::io::{self, BufRead, Write}; + +pub fn write_boolean_xml_result(sink: W, value: bool) -> io::Result { + do_write_boolean_xml_result(sink, value).map_err(map_xml_error) +} + +fn do_write_boolean_xml_result(sink: W, value: bool) -> Result { + let mut writer = Writer::new(sink); + writer.write_event(Event::Decl(BytesDecl::new(b"1.0", None, None)))?; + let mut sparql_open = BytesStart::borrowed_name(b"sparql"); + sparql_open.push_attribute(("xmlns", "http://www.w3.org/2005/sparql-results#")); + writer.write_event(Event::Start(sparql_open))?; + writer.write_event(Event::Start(BytesStart::borrowed_name(b"head")))?; + writer.write_event(Event::End(BytesEnd::borrowed(b"head")))?; + writer.write_event(Event::Start(BytesStart::borrowed_name(b"boolean")))?; + writer.write_event(Event::Text(BytesText::from_plain_str(if value { + "true" + } else { + "false" + })))?; + writer.write_event(Event::End(BytesEnd::borrowed(b"boolean")))?; + writer.write_event(Event::End(BytesEnd::borrowed(b"sparql")))?; + Ok(writer.into_inner()) +} + +pub struct XmlSolutionsWriter { + writer: Writer, + variables: Vec, +} + +impl XmlSolutionsWriter { + pub fn start(sink: W, variables: &[Variable]) -> io::Result { + Self::do_start(sink, variables).map_err(map_xml_error) + } + + fn do_start(sink: W, variables: &[Variable]) -> Result { + let mut writer = Writer::new(sink); + writer.write_event(Event::Decl(BytesDecl::new(b"1.0", None, None)))?; + let mut sparql_open = BytesStart::borrowed_name(b"sparql"); + sparql_open.push_attribute(("xmlns", "http://www.w3.org/2005/sparql-results#")); + writer.write_event(Event::Start(sparql_open))?; + writer.write_event(Event::Start(BytesStart::borrowed_name(b"head")))?; + for variable in variables { + let mut variable_tag = BytesStart::borrowed_name(b"variable"); + variable_tag.push_attribute(("name", variable.as_str())); + writer.write_event(Event::Empty(variable_tag))?; + } + writer.write_event(Event::End(BytesEnd::borrowed(b"head")))?; + writer.write_event(Event::Start(BytesStart::borrowed_name(b"results")))?; + Ok(Self { + writer, + variables: variables.to_vec(), + }) + } + + pub fn write<'a>( + &mut self, + solution: impl IntoIterator>>, + ) -> io::Result<()> { + self.do_write(solution).map_err(map_xml_error) + } + + fn do_write<'a>( + &mut self, + solution: impl IntoIterator>>, + ) -> Result<(), quick_xml::Error> { + self.writer + .write_event(Event::Start(BytesStart::borrowed_name(b"result")))?; + for (value, variable) in solution.into_iter().zip(&self.variables) { + if let Some(value) = value { + let mut binding_tag = BytesStart::borrowed_name(b"binding"); + binding_tag.push_attribute(("name", variable.as_str())); + self.writer.write_event(Event::Start(binding_tag))?; + write_xml_term(value, &mut self.writer)?; + self.writer + .write_event(Event::End(BytesEnd::borrowed(b"binding")))?; + } + } + self.writer + .write_event(Event::End(BytesEnd::borrowed(b"result"))) + } + + pub fn finish(self) -> io::Result { + self.do_finish().map_err(map_xml_error) + } + + fn do_finish(mut self) -> Result { + self.writer + .write_event(Event::End(BytesEnd::borrowed(b"results")))?; + self.writer + .write_event(Event::End(BytesEnd::borrowed(b"sparql")))?; + Ok(self.writer.into_inner()) + } +} + +fn write_xml_term( + term: TermRef<'_>, + writer: &mut Writer, +) -> Result<(), quick_xml::Error> { + match term { + TermRef::NamedNode(uri) => { + writer.write_event(Event::Start(BytesStart::borrowed_name(b"uri")))?; + writer.write_event(Event::Text(BytesText::from_plain_str(uri.as_str())))?; + writer.write_event(Event::End(BytesEnd::borrowed(b"uri")))?; + } + TermRef::BlankNode(bnode) => { + writer.write_event(Event::Start(BytesStart::borrowed_name(b"bnode")))?; + writer.write_event(Event::Text(BytesText::from_plain_str(bnode.as_str())))?; + writer.write_event(Event::End(BytesEnd::borrowed(b"bnode")))?; + } + TermRef::Literal(literal) => { + let mut literal_tag = BytesStart::borrowed_name(b"literal"); + if let Some(language) = literal.language() { + literal_tag.push_attribute(("xml:lang", language)); + } else if !literal.is_plain() { + literal_tag.push_attribute(("datatype", literal.datatype().as_str())); + } + writer.write_event(Event::Start(literal_tag))?; + writer.write_event(Event::Text(BytesText::from_plain_str(literal.value())))?; + writer.write_event(Event::End(BytesEnd::borrowed(b"literal")))?; + } + TermRef::Triple(triple) => { + writer.write_event(Event::Start(BytesStart::borrowed_name(b"triple")))?; + writer.write_event(Event::Start(BytesStart::borrowed_name(b"subject")))?; + write_xml_term(triple.subject.as_ref().into(), writer)?; + writer.write_event(Event::End(BytesEnd::borrowed(b"subject")))?; + writer.write_event(Event::Start(BytesStart::borrowed_name(b"predicate")))?; + write_xml_term(triple.predicate.as_ref().into(), writer)?; + writer.write_event(Event::End(BytesEnd::borrowed(b"predicate")))?; + writer.write_event(Event::Start(BytesStart::borrowed_name(b"object")))?; + write_xml_term(triple.object.as_ref(), writer)?; + writer.write_event(Event::End(BytesEnd::borrowed(b"object")))?; + writer.write_event(Event::End(BytesEnd::borrowed(b"triple")))?; + } + } + Ok(()) +} + +pub enum XmlQueryResultsReader { + Solutions { + variables: Vec, + solutions: XmlSolutionsReader, + }, + Boolean(bool), +} + +impl XmlQueryResultsReader { + pub fn read(source: R) -> Result { + enum State { + Start, + Sparql, + Head, + AfterHead, + Boolean, + } + + let mut reader = Reader::from_reader(source); + reader.trim_text(true); + reader.expand_empty_elements(true); + + let mut buffer = Vec::default(); + let mut namespace_buffer = Vec::default(); + let mut variables = Vec::default(); + let mut state = State::Start; + + //Read header + loop { + let event = { + let (ns, event) = + reader.read_namespaced_event(&mut buffer, &mut namespace_buffer)?; + if let Some(ns) = ns { + if ns != b"http://www.w3.org/2005/sparql-results#".as_ref() { + return Err(SyntaxError::msg(format!( + "Unexpected namespace found in RDF/XML query result: {}", + reader.decode(ns)? + )) + .into()); + } + } + event + }; + match event { + Event::Start(event) => match state { + State::Start => { + if event.name() == b"sparql" { + state = State::Sparql; + } else { + return Err(SyntaxError::msg(format!("Expecting tag, found {}", reader.decode(event.name())?)).into()); + } + } + State::Sparql => { + if event.name() == b"head" { + state = State::Head; + } else { + return Err(SyntaxError::msg(format!("Expecting tag, found {}", reader.decode(event.name())?)).into()); + } + } + State::Head => { + if event.name() == b"variable" { + let name = event.attributes() + .filter_map(std::result::Result::ok) + .find(|attr| attr.key == b"name") + .ok_or_else(|| SyntaxError::msg("No name attribute found for the tag"))? + .unescape_and_decode_value(&reader)?; + variables.push(Variable::new(name).map_err(|e| SyntaxError::msg(format!("Invalid variable name: {}", e)))?); + } else if event.name() == b"link" { + // no op + } else { + return Err(SyntaxError::msg(format!("Expecting or tag, found {}", reader.decode(event.name())?)).into()); + } + } + State::AfterHead => { + if event.name() == b"boolean" { + state = State::Boolean + } else if event.name() == b"results" { + let mut mapping = BTreeMap::default(); + for (i, var) in variables.iter().enumerate() { + mapping.insert(var.as_str().as_bytes().to_vec(), i); + } + return Ok(Self::Solutions { variables, + solutions: XmlSolutionsReader { + reader, + buffer, + namespace_buffer, + mapping, + stack: Vec::new(), + subject_stack: Vec::new(), + predicate_stack: Vec::new(), + object_stack: Vec::new(), + }}); + } else if event.name() != b"link" && event.name() != b"results" && event.name() != b"boolean" { + return Err(SyntaxError::msg(format!("Expecting sparql tag, found {}", reader.decode(event.name())?)).into()); + } + } + State::Boolean => return Err(SyntaxError::msg(format!("Unexpected tag inside of tag: {}", reader.decode(event.name())?)).into()) + }, + Event::Text(event) => { + let value = event.unescaped()?; + return match state { + State::Boolean => { + return if value.as_ref() == b"true" { + Ok(Self::Boolean(true)) + } else if value.as_ref() == b"false" { + Ok(Self::Boolean(false)) + } else { + Err(SyntaxError::msg(format!("Unexpected boolean value. Found {}", reader.decode(&value)?)).into()) + }; + } + _ => Err(SyntaxError::msg(format!("Unexpected textual value found: {}", reader.decode(&value)?)).into()) + }; + }, + Event::End(event) => { + if let State::Head = state { + if event.name() == b"head" { + state = State::AfterHead + } + } else { + return Err(SyntaxError::msg("Unexpected early file end. All results file should have a and a or tag").into()); + } + }, + Event::Eof => return Err(SyntaxError::msg("Unexpected early file end. All results file should have a and a or tag").into()), + _ => (), + } + } + } +} + +enum State { + Start, + Result, + Binding, + Uri, + BNode, + Literal, + Triple, + Subject, + Predicate, + Object, + End, +} + +pub struct XmlSolutionsReader { + reader: Reader, + buffer: Vec, + namespace_buffer: Vec, + mapping: BTreeMap, usize>, + stack: Vec, + subject_stack: Vec, + predicate_stack: Vec, + object_stack: Vec, +} + +impl XmlSolutionsReader { + pub fn read_next(&mut self) -> Result>>, ParserError> { + let mut state = State::Start; + + let mut new_bindings = vec![None; self.mapping.len()]; + + let mut current_var = None; + let mut term: Option = None; + let mut lang = None; + let mut datatype = None; + loop { + let (ns, event) = self + .reader + .read_namespaced_event(&mut self.buffer, &mut self.namespace_buffer)?; + if let Some(ns) = ns { + if ns != b"http://www.w3.org/2005/sparql-results#".as_ref() { + return Err(SyntaxError::msg(format!( + "Unexpected namespace found in RDF/XML query result: {}", + self.reader.decode(ns)? + )) + .into()); + } + } + match event { + Event::Start(event) => match state { + State::Start => { + if event.name() == b"result" { + state = State::Result; + } else { + return Err(SyntaxError::msg(format!( + "Expecting , found {}", + self.reader.decode(event.name())? + )) + .into()); + } + } + State::Result => { + if event.name() == b"binding" { + match event + .attributes() + .filter_map(std::result::Result::ok) + .find(|attr| attr.key == b"name") + { + Some(attr) => current_var = Some(attr.unescaped_value()?.to_vec()), + None => { + return Err(SyntaxError::msg( + "No name attribute found for the tag", + ) + .into()); + } + } + state = State::Binding; + } else { + return Err(SyntaxError::msg(format!( + "Expecting , found {}", + self.reader.decode(event.name())? + )) + .into()); + } + } + State::Binding | State::Subject | State::Predicate | State::Object => { + if term.is_some() { + return Err(SyntaxError::msg( + "There is already a value for the current binding", + ) + .into()); + } + self.stack.push(state); + if event.name() == b"uri" { + state = State::Uri; + } else if event.name() == b"bnode" { + state = State::BNode; + } else if event.name() == b"literal" { + for attr in event.attributes().flatten() { + if attr.key == b"xml:lang" { + lang = Some(attr.unescape_and_decode_value(&self.reader)?); + } else if attr.key == b"datatype" { + let iri = attr.unescape_and_decode_value(&self.reader)?; + datatype = Some(NamedNode::new(&iri).map_err(|e| { + SyntaxError::msg(format!( + "Invalid datatype IRI '{}': {}", + iri, e + )) + })?); + } + } + state = State::Literal; + } else if event.name() == b"triple" { + state = State::Triple; + } else { + return Err(SyntaxError::msg(format!( + "Expecting , or found {}", + self.reader.decode(event.name())? + )) + .into()); + } + } + State::Triple => { + if event.name() == b"subject" { + state = State::Subject + } else if event.name() == b"predicate" { + state = State::Predicate + } else if event.name() == b"object" { + state = State::Object + } else { + return Err(SyntaxError::msg(format!( + "Expecting , or found {}", + self.reader.decode(event.name())? + )) + .into()); + } + } + _ => (), + }, + Event::Text(event) => { + let data = event.unescaped()?; + match state { + State::Uri => { + let iri = self.reader.decode(&data)?; + term = Some( + NamedNode::new(iri) + .map_err(|e| { + SyntaxError::msg(format!( + "Invalid IRI value '{}': {}", + iri, e + )) + })? + .into(), + ) + } + State::BNode => { + let bnode = self.reader.decode(&data)?; + term = Some( + BlankNode::new(bnode) + .map_err(|e| { + SyntaxError::msg(format!( + "Invalid blank node value '{}': {}", + bnode, e + )) + })? + .into(), + ) + } + State::Literal => { + term = Some( + build_literal( + self.reader.decode(&data)?, + lang.take(), + datatype.take(), + )? + .into(), + ); + } + _ => { + return Err(SyntaxError::msg(format!( + "Unexpected textual value found: {}", + self.reader.decode(&data)? + )) + .into()); + } + } + } + Event::End(_) => match state { + State::Start => state = State::End, + State::Result => return Ok(Some(new_bindings)), + State::Binding => { + if let Some(var) = ¤t_var { + if let Some(var) = self.mapping.get(var) { + new_bindings[*var] = term.take() + } else { + return Err( + SyntaxError::msg(format!("The variable '{}' is used in a binding but not declared in the variables list", self.reader.decode(var)?)).into() + ); + } + } else { + return Err(SyntaxError::msg("No name found for tag").into()); + } + state = State::Result; + } + State::Subject => { + if let Some(subject) = term.take() { + self.subject_stack.push(subject) + } + state = State::Triple; + } + State::Predicate => { + if let Some(predicate) = term.take() { + self.predicate_stack.push(predicate) + } + state = State::Triple; + } + State::Object => { + if let Some(object) = term.take() { + self.object_stack.push(object) + } + state = State::Triple; + } + State::Uri => state = self.stack.pop().unwrap(), + State::BNode => { + if term.is_none() { + //We default to a random bnode + term = Some(BlankNode::default().into()) + } + state = self.stack.pop().unwrap() + } + State::Literal => { + if term.is_none() { + //We default to the empty literal + term = Some(build_literal("", lang.take(), datatype.take())?.into()) + } + state = self.stack.pop().unwrap(); + } + State::Triple => { + if let (Some(subject), Some(predicate), Some(object)) = ( + self.subject_stack.pop(), + self.predicate_stack.pop(), + self.object_stack.pop(), + ) { + term = Some( + Triple::new( + match subject { + Term::NamedNode(subject) => subject.into(), + Term::BlankNode(subject) => subject.into(), + Term::Triple(subject) => Subject::Triple(subject), + Term::Literal(_) => { + return Err(SyntaxError::msg( + "The value should not be a ", + ) + .into()) + } + }, + match predicate { + Term::NamedNode(predicate) => predicate, + _ => { + return Err(SyntaxError::msg( + "The value should be an ", + ) + .into()) + } + }, + object, + ) + .into(), + ); + state = self.stack.pop().unwrap(); + } else { + return Err( + SyntaxError::msg("A should contain a , a and an ").into() + ); + } + } + State::End => (), + }, + Event::Eof => return Ok(None), + _ => (), + } + } + } +} + +fn build_literal( + value: impl Into, + lang: Option, + datatype: Option, +) -> Result { + match lang { + Some(lang) => { + if let Some(datatype) = datatype { + if datatype.as_ref() != rdf::LANG_STRING { + return Err(SyntaxError::msg(format!( + "xml:lang value '{}' provided with the datatype {}", + lang, datatype + )) + .into()); + } + } + Literal::new_language_tagged_literal(value, &lang).map_err(|e| { + SyntaxError::msg(format!("Invalid xml:lang value '{}': {}", lang, e)).into() + }) + } + None => Ok(if let Some(datatype) = datatype { + Literal::new_typed_literal(value, datatype) + } else { + Literal::new_simple_literal(value) + }), + } +} + +fn map_xml_error(error: quick_xml::Error) -> io::Error { + match error { + quick_xml::Error::Io(error) => error, + quick_xml::Error::UnexpectedEof(_) => io::Error::new(io::ErrorKind::UnexpectedEof, error), + _ => io::Error::new(io::ErrorKind::InvalidData, error), + } +} diff --git a/lib/src/sparql/json_results.rs b/lib/src/sparql/json_results.rs deleted file mode 100644 index 896484b7..00000000 --- a/lib/src/sparql/json_results.rs +++ /dev/null @@ -1,455 +0,0 @@ -//! Implementation of [SPARQL Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) - -use crate::error::{invalid_data_error, invalid_input_error}; -use crate::model::vocab::rdf; -use crate::model::*; -use crate::sparql::error::EvaluationError; -use crate::sparql::model::*; -use json_event_parser::{JsonEvent, JsonReader, JsonWriter}; -use std::collections::BTreeMap; -use std::io; -use std::io::{BufRead, Write}; -use std::rc::Rc; - -pub fn write_json_results(results: QueryResults, sink: impl Write) -> Result<(), EvaluationError> { - let mut writer = JsonWriter::from_writer(sink); - match results { - QueryResults::Boolean(value) => { - writer.write_event(JsonEvent::StartObject)?; - writer.write_event(JsonEvent::ObjectKey("head"))?; - writer.write_event(JsonEvent::StartObject)?; - writer.write_event(JsonEvent::EndObject)?; - writer.write_event(JsonEvent::ObjectKey("boolean"))?; - writer.write_event(JsonEvent::Boolean(value))?; - writer.write_event(JsonEvent::EndObject)?; - Ok(()) - } - QueryResults::Solutions(solutions) => { - writer.write_event(JsonEvent::StartObject)?; - writer.write_event(JsonEvent::ObjectKey("head"))?; - writer.write_event(JsonEvent::StartObject)?; - writer.write_event(JsonEvent::ObjectKey("vars"))?; - writer.write_event(JsonEvent::StartArray)?; - for variable in solutions.variables() { - writer.write_event(JsonEvent::String(variable.as_str()))?; - } - writer.write_event(JsonEvent::EndArray)?; - writer.write_event(JsonEvent::EndObject)?; - writer.write_event(JsonEvent::ObjectKey("results"))?; - writer.write_event(JsonEvent::StartObject)?; - writer.write_event(JsonEvent::ObjectKey("bindings"))?; - writer.write_event(JsonEvent::StartArray)?; - for solution in solutions { - writer.write_event(JsonEvent::StartObject)?; - - let solution = solution?; - for (variable, value) in solution.iter() { - writer.write_event(JsonEvent::ObjectKey(variable.as_str()))?; - write_json_term(value.as_ref(), &mut writer)?; - } - writer.write_event(JsonEvent::EndObject)?; - } - writer.write_event(JsonEvent::EndArray)?; - writer.write_event(JsonEvent::EndObject)?; - writer.write_event(JsonEvent::EndObject)?; - Ok(()) - } - QueryResults::Graph(_) => Err(invalid_input_error( - "Graphs could not be formatted to SPARQL query results XML format", - ) - .into()), - } -} - -fn write_json_term( - term: TermRef<'_>, - writer: &mut JsonWriter, -) -> Result<(), EvaluationError> { - match term { - TermRef::NamedNode(uri) => { - writer.write_event(JsonEvent::StartObject)?; - writer.write_event(JsonEvent::ObjectKey("type"))?; - writer.write_event(JsonEvent::String("uri"))?; - writer.write_event(JsonEvent::ObjectKey("value"))?; - writer.write_event(JsonEvent::String(uri.as_str()))?; - writer.write_event(JsonEvent::EndObject)?; - } - TermRef::BlankNode(bnode) => { - writer.write_event(JsonEvent::StartObject)?; - writer.write_event(JsonEvent::ObjectKey("type"))?; - writer.write_event(JsonEvent::String("bnode"))?; - writer.write_event(JsonEvent::ObjectKey("value"))?; - writer.write_event(JsonEvent::String(bnode.as_str()))?; - writer.write_event(JsonEvent::EndObject)?; - } - TermRef::Literal(literal) => { - writer.write_event(JsonEvent::StartObject)?; - writer.write_event(JsonEvent::ObjectKey("type"))?; - writer.write_event(JsonEvent::String("literal"))?; - writer.write_event(JsonEvent::ObjectKey("value"))?; - writer.write_event(JsonEvent::String(literal.value()))?; - if let Some(language) = literal.language() { - writer.write_event(JsonEvent::ObjectKey("xml:lang"))?; - writer.write_event(JsonEvent::String(language))?; - } else if !literal.is_plain() { - writer.write_event(JsonEvent::ObjectKey("datatype"))?; - writer.write_event(JsonEvent::String(literal.datatype().as_str()))?; - } - writer.write_event(JsonEvent::EndObject)?; - } - TermRef::Triple(triple) => { - writer.write_event(JsonEvent::StartObject)?; - writer.write_event(JsonEvent::ObjectKey("type"))?; - writer.write_event(JsonEvent::String("triple"))?; - writer.write_event(JsonEvent::ObjectKey("value"))?; - writer.write_event(JsonEvent::StartObject)?; - writer.write_event(JsonEvent::ObjectKey("subject"))?; - write_json_term(triple.subject.as_ref().into(), writer)?; - writer.write_event(JsonEvent::ObjectKey("predicate"))?; - write_json_term(triple.predicate.as_ref().into(), writer)?; - writer.write_event(JsonEvent::ObjectKey("object"))?; - write_json_term(triple.object.as_ref(), writer)?; - writer.write_event(JsonEvent::EndObject)?; - writer.write_event(JsonEvent::EndObject)?; - } - } - Ok(()) -} - -pub fn read_json_results(source: impl BufRead + 'static) -> io::Result { - let mut reader = JsonReader::from_reader(source); - let mut buffer = Vec::default(); - let mut variables = None; - - if reader.read_event(&mut buffer)? != JsonEvent::StartObject { - return Err(invalid_data_error( - "SPARQL JSON results should be an object", - )); - } - - loop { - let event = reader.read_event(&mut buffer)?; - match event { - JsonEvent::ObjectKey(key) => match key { - "head" => variables = Some(read_head(&mut reader, &mut buffer)?), - "results" => { - if reader.read_event(&mut buffer)? != JsonEvent::StartObject { - return Err(invalid_data_error("'results' should be an object")); - } - if reader.read_event(&mut buffer)? != JsonEvent::ObjectKey("bindings") { - return Err(invalid_data_error( - "'results' should contain a 'bindings' key", - )); - } - if reader.read_event(&mut buffer)? != JsonEvent::StartArray { - return Err(invalid_data_error("'bindings' should be an object")); - } - return if let Some(variables) = variables { - let mut mapping = BTreeMap::default(); - for (i, var) in variables.iter().enumerate() { - mapping.insert(var.clone(), i); - } - Ok(QueryResults::Solutions(QuerySolutionIter::new( - Rc::new( - variables - .into_iter() - .map(Variable::new) - .collect::, _>>() - .map_err(invalid_data_error)?, - ), - Box::new(ResultsIterator { - reader, - buffer, - mapping, - }), - ))) - } else { - Err(invalid_data_error( - "SPARQL tuple query results should contain a head key", - )) - }; - } - "boolean" => { - return if let JsonEvent::Boolean(v) = reader.read_event(&mut buffer)? { - Ok(QueryResults::Boolean(v)) - } else { - Err(invalid_data_error("Unexpected boolean value")) - } - } - _ => { - return Err(invalid_data_error(format!( - "Expecting head or result key, found {}", - key - ))); - } - }, - JsonEvent::EndObject => { - return Err(invalid_data_error( - "SPARQL results should contain a bindings key or a boolean key", - )) - } - JsonEvent::Eof => return Err(io::Error::from(io::ErrorKind::UnexpectedEof)), - _ => return Err(invalid_data_error("Invalid SPARQL results serialization")), - } - } -} - -fn read_head( - reader: &mut JsonReader, - buffer: &mut Vec, -) -> io::Result> { - if reader.read_event(buffer)? != JsonEvent::StartObject { - return Err(invalid_data_error("head should be an object")); - } - let mut variables = None; - loop { - match reader.read_event(buffer)? { - JsonEvent::ObjectKey(key) => match key { - "vars" => variables = Some(read_string_array(reader, buffer)?), - "link" => { - read_string_array(reader, buffer)?; - } - _ => { - return Err(invalid_data_error(format!( - "Unexpected key in head: '{}'", - key - ))) - } - }, - JsonEvent::EndObject => return Ok(variables.unwrap_or_else(Vec::new)), - _ => return Err(invalid_data_error("Invalid head serialization")), - } - } -} - -fn read_string_array( - reader: &mut JsonReader, - buffer: &mut Vec, -) -> io::Result> { - if reader.read_event(buffer)? != JsonEvent::StartArray { - return Err(invalid_data_error("Variable list should be an array")); - } - let mut elements = Vec::new(); - loop { - match reader.read_event(buffer)? { - JsonEvent::String(s) => { - elements.push(s.into()); - } - JsonEvent::EndArray => return Ok(elements), - _ => return Err(invalid_data_error("Variable names should be strings")), - } - } -} - -struct ResultsIterator { - reader: JsonReader, - buffer: Vec, - mapping: BTreeMap, -} - -impl Iterator for ResultsIterator { - type Item = Result>, EvaluationError>; - - fn next(&mut self) -> Option>, EvaluationError>> { - self.read_next().map_err(EvaluationError::from).transpose() - } -} - -impl ResultsIterator { - fn read_next(&mut self) -> io::Result>>> { - let mut new_bindings = vec![None; self.mapping.len()]; - loop { - match self.reader.read_event(&mut self.buffer)? { - JsonEvent::StartObject => (), - JsonEvent::EndObject => return Ok(Some(new_bindings)), - JsonEvent::EndArray | JsonEvent::Eof => return Ok(None), - JsonEvent::ObjectKey(key) => { - let k = *self.mapping.get(key).ok_or_else(|| { - invalid_data_error(format!( - "The variable {} has not been defined in the header", - key - )) - })?; - new_bindings[k] = Some(self.read_value()?) - } - _ => return Err(invalid_data_error("Invalid result serialization")), - } - } - } - fn read_value(&mut self) -> io::Result { - enum Type { - Uri, - BNode, - Literal, - Triple, - } - #[derive(Eq, PartialEq)] - enum State { - Type, - Value, - Lang, - Datatype, - } - let mut state = None; - let mut t = None; - let mut value = None; - let mut lang = None; - let mut datatype = None; - let mut subject = None; - let mut predicate = None; - let mut object = None; - if self.reader.read_event(&mut self.buffer)? != JsonEvent::StartObject { - return Err(invalid_data_error( - "Term serializations should be an object", - )); - } - loop { - match self.reader.read_event(&mut self.buffer)? { - JsonEvent::ObjectKey(key) => match key { - "type" => state = Some(State::Type), - "value" => state = Some(State::Value), - "xml:lang" => state = Some(State::Lang), - "datatype" => state = Some(State::Datatype), - "subject" => subject = Some(self.read_value()?), - "predicate" => predicate = Some(self.read_value()?), - "object" => object = Some(self.read_value()?), - _ => { - return Err(invalid_data_error(format!( - "Unexpected key in term serialization: '{}'", - key - ))) - } - }, - JsonEvent::StartObject => { - if state != Some(State::Value) { - return Err(invalid_data_error( - "Unexpected nested object in term serialization", - )); - } - } - JsonEvent::String(s) => match state { - Some(State::Type) => { - match s { - "uri" => t = Some(Type::Uri), - "bnode" => t = Some(Type::BNode), - "literal" => t = Some(Type::Literal), - "triple" => t = Some(Type::Triple), - _ => { - return Err(invalid_data_error(format!( - "Unexpected term type: '{}'", - s - ))) - } - }; - state = None; - } - Some(State::Value) => { - value = Some(s.to_owned()); - state = None; - } - Some(State::Lang) => { - lang = Some(s.to_owned()); - state = None; - } - Some(State::Datatype) => { - datatype = Some(NamedNode::new(s).map_err(|e| { - invalid_data_error(format!("Invalid datatype value: {}", e)) - })?); - state = None; - } - _ => (), // impossible - }, - JsonEvent::EndObject => { - if let Some(s) = state { - if s == State::Value { - state = None; //End of triple - } else { - return Err(invalid_data_error( - "Term description values should be string", - )); - } - } else { - return match t { - None => Err(invalid_data_error( - "Term serialization should have a 'type' key", - )), - Some(Type::Uri) => Ok(NamedNode::new(value.ok_or_else(|| { - invalid_data_error("uri serialization should have a 'value' key") - })?) - .map_err(|e| invalid_data_error(format!("Invalid uri value: {}", e)))? - .into()), - Some(Type::BNode) => Ok(BlankNode::new(value.ok_or_else(|| { - invalid_data_error("bnode serialization should have a 'value' key") - })?) - .map_err(|e| invalid_data_error(format!("Invalid bnode value: {}", e)))? - .into()), - Some(Type::Literal) => { - let value = value.ok_or_else(|| { - invalid_data_error( - "literal serialization should have a 'value' key", - ) - })?; - Ok(match lang { - Some(lang) => { - if let Some(datatype) = datatype { - if datatype.as_ref() != rdf::LANG_STRING { - return Err(invalid_data_error(format!( - "xml:lang value '{}' provided with the datatype {}", - lang, datatype - ))) - } - } - Literal::new_language_tagged_literal(value, &lang).map_err(|e| { - invalid_data_error(format!("Invalid xml:lang value '{}': {}", lang, e)) - })? - } - None => if let Some(datatype) = datatype { - Literal::new_typed_literal(value, datatype) - } else { - Literal::new_simple_literal(value) - } - } - .into()) - } - Some(Type::Triple) => Ok(Triple::new( - match subject.ok_or_else(|| { - invalid_data_error( - "triple serialization should have a 'subject' key", - ) - })? { - Term::NamedNode(subject) => subject.into(), - Term::BlankNode(subject) => subject.into(), - Term::Triple(subject) => Subject::Triple(subject), - Term::Literal(_) => { - return Err(invalid_data_error( - "The 'subject' value should not be a literal", - )) - } - }, - match predicate.ok_or_else(|| { - invalid_data_error( - "triple serialization should have a 'predicate' key", - ) - })? { - Term::NamedNode(predicate) => predicate, - _ => { - return Err(invalid_data_error( - "The 'predicate' value should be a uri", - )) - } - }, - object.ok_or_else(|| { - invalid_data_error( - "triple serialization should have a 'object' key", - ) - })?, - ) - .into()), - }; - } - } - _ => return Err(invalid_data_error("Invalid term serialization")), - } - } - } -} diff --git a/lib/src/sparql/mod.rs b/lib/src/sparql/mod.rs index cdf83ec8..8cff5100 100644 --- a/lib/src/sparql/mod.rs +++ b/lib/src/sparql/mod.rs @@ -3,30 +3,27 @@ //! Stores execute SPARQL. See [`Store`](crate::store::Store::query()) for an example. mod algebra; -mod csv_results; mod dataset; mod error; mod eval; mod http; -mod json_results; +pub mod io; mod model; mod plan; mod plan_builder; mod service; mod update; -mod xml_results; use crate::model::{NamedNode, Term}; pub use crate::sparql::algebra::{Query, Update}; use crate::sparql::dataset::DatasetView; pub use crate::sparql::error::EvaluationError; use crate::sparql::eval::SimpleEvaluator; -pub use crate::sparql::model::QueryResults; -pub use crate::sparql::model::QueryResultsFormat; -pub use crate::sparql::model::QuerySolution; -pub use crate::sparql::model::QuerySolutionIter; -pub use crate::sparql::model::QueryTripleIter; -pub use crate::sparql::model::{Variable, VariableNameParseError}; +pub use crate::sparql::io::QueryResultsFormat; +pub use crate::sparql::model::{ + QueryResults, QuerySolution, QuerySolutionIter, QueryTripleIter, Variable, + VariableNameParseError, +}; use crate::sparql::plan_builder::PlanBuilder; pub use crate::sparql::service::ServiceHandler; use crate::sparql::service::{EmptyServiceHandler, ErrorConversionServiceHandler}; diff --git a/lib/src/sparql/model.rs b/lib/src/sparql/model.rs index 551b18d4..28f912f7 100644 --- a/lib/src/sparql/model.rs +++ b/lib/src/sparql/model.rs @@ -2,10 +2,8 @@ use crate::error::invalid_input_error; use crate::io::GraphFormat; use crate::io::GraphSerializer; use crate::model::*; -use crate::sparql::csv_results::{read_tsv_results, write_csv_results, write_tsv_results}; use crate::sparql::error::EvaluationError; -use crate::sparql::json_results::{read_json_results, write_json_results}; -use crate::sparql::xml_results::{read_xml_results, write_xml_results}; +use crate::sparql::io::{QueryResultsFormat, QueryResultsParser, QueryResultsSerializer}; use std::error::Error; use std::io::{BufRead, Write}; use std::rc::Rc; @@ -24,14 +22,9 @@ pub enum QueryResults { impl QueryResults { /// Reads a SPARQL query results serialization. pub fn read(reader: impl BufRead + 'static, format: QueryResultsFormat) -> io::Result { - match format { - QueryResultsFormat::Xml => read_xml_results(reader), - QueryResultsFormat::Json => read_json_results(reader), - QueryResultsFormat::Csv => Err(invalid_input_error( - "CSV SPARQL results format parsing is not implemented", - )), - QueryResultsFormat::Tsv => read_tsv_results(reader), - } + Ok(QueryResultsParser::from_format(format) + .read_results(reader)? + .into()) } /// Writes the query results (solutions or boolean). @@ -57,12 +50,44 @@ impl QueryResults { writer: impl Write, format: QueryResultsFormat, ) -> Result<(), EvaluationError> { - match format { - QueryResultsFormat::Xml => write_xml_results(self, writer), - QueryResultsFormat::Json => write_json_results(self, writer), - QueryResultsFormat::Csv => write_csv_results(self, writer), - QueryResultsFormat::Tsv => write_tsv_results(self, writer), + let serializer = QueryResultsSerializer::from_format(format); + match self { + Self::Boolean(value) => { + serializer.write_boolean_result(writer, value)?; + } + QueryResults::Solutions(solutions) => { + let mut writer = serializer.solutions_writer(writer, solutions.variables())?; + for solution in solutions { + writer.write( + solution? + .values + .iter() + .map(|t| t.as_ref().map(|t| t.as_ref())), + )?; + } + writer.finish()?; + } + QueryResults::Graph(triples) => { + let mut writer = serializer.solutions_writer( + writer, + &[ + Variable::new_unchecked("subject"), + Variable::new_unchecked("predicate"), + Variable::new_unchecked("object"), + ], + )?; + for triple in triples { + let triple = triple?; + writer.write([ + Some(triple.subject.as_ref().into()), + Some(triple.predicate.as_ref().into()), + Some(triple.object.as_ref()), + ])?; + } + writer.finish()?; + } } + Ok(()) } /// Writes the graph query results. @@ -113,115 +138,6 @@ impl From for QueryResults { } } -/// [SPARQL query](https://www.w3.org/TR/sparql11-query/) results serialization formats. -#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] -#[non_exhaustive] -pub enum QueryResultsFormat { - /// [SPARQL Query Results XML Format](http://www.w3.org/TR/rdf-sparql-XMLres/) - Xml, - /// [SPARQL Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) - Json, - /// [SPARQL Query Results CSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/) - Csv, - /// [SPARQL Query Results TSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/) - Tsv, -} - -impl QueryResultsFormat { - /// The format canonical IRI according to the [Unique URIs for file formats registry](https://www.w3.org/ns/formats/). - /// - /// ``` - /// use oxigraph::sparql::QueryResultsFormat; - /// - /// assert_eq!(QueryResultsFormat::Json.iri(), "http://www.w3.org/ns/formats/SPARQL_Results_JSON") - /// ``` - #[inline] - pub fn iri(self) -> &'static str { - match self { - QueryResultsFormat::Xml => "http://www.w3.org/ns/formats/SPARQL_Results_XML", - QueryResultsFormat::Json => "http://www.w3.org/ns/formats/SPARQL_Results_JSON", - QueryResultsFormat::Csv => "http://www.w3.org/ns/formats/SPARQL_Results_CSV", - QueryResultsFormat::Tsv => "http://www.w3.org/ns/formats/SPARQL_Results_TSV", - } - } - /// The format [IANA media type](https://tools.ietf.org/html/rfc2046). - /// - /// ``` - /// use oxigraph::sparql::QueryResultsFormat; - /// - /// assert_eq!(QueryResultsFormat::Json.media_type(), "application/sparql-results+json") - /// ``` - #[inline] - pub fn media_type(self) -> &'static str { - match self { - QueryResultsFormat::Xml => "application/sparql-results+xml", - QueryResultsFormat::Json => "application/sparql-results+json", - QueryResultsFormat::Csv => "text/csv; charset=utf-8", - QueryResultsFormat::Tsv => "text/tab-separated-values; charset=utf-8", - } - } - - /// The format [IANA-registered](https://tools.ietf.org/html/rfc2046) file extension. - /// - /// ``` - /// use oxigraph::sparql::QueryResultsFormat; - /// - /// assert_eq!(QueryResultsFormat::Json.file_extension(), "srj") - /// ``` - #[inline] - pub fn file_extension(self) -> &'static str { - match self { - QueryResultsFormat::Xml => "srx", - QueryResultsFormat::Json => "srj", - QueryResultsFormat::Csv => "csv", - QueryResultsFormat::Tsv => "tsv", - } - } - - /// Looks for a known format from a media type. - /// - /// It supports some media type aliases. - /// For example "application/xml" is going to return `Xml` even if it is not its canonical media type. - /// - /// Example: - /// ``` - /// use oxigraph::sparql::QueryResultsFormat; - /// - /// assert_eq!(QueryResultsFormat::from_media_type("application/sparql-results+json; charset=utf-8"), Some(QueryResultsFormat::Json)) - /// ``` - pub fn from_media_type(media_type: &str) -> Option { - match media_type.split(';').next()?.trim() { - "application/sparql-results+xml" | "application/xml" | "text/xml" => Some(Self::Xml), - "application/sparql-results+json" | "application/json" | "text/json" => { - Some(Self::Json) - } - "text/csv" => Some(Self::Csv), - "text/tab-separated-values" | "text/tsv" => Some(Self::Tsv), - _ => None, - } - } - - /// Looks for a known format from an extension. - /// - /// It supports some aliases. - /// - /// Example: - /// ``` - /// use oxigraph::sparql::QueryResultsFormat; - /// - /// assert_eq!(QueryResultsFormat::from_extension("json"), Some(QueryResultsFormat::Json)) - /// ``` - pub fn from_extension(extension: &str) -> Option { - match extension { - "srx" | "xml" => Some(Self::Xml), - "srj" | "json" => Some(Self::Json), - "csv" | "txt" => Some(Self::Csv), - "tsv" => Some(Self::Tsv), - _ => None, - } - } -} - /// An iterator over [`QuerySolution`]s. /// /// ``` @@ -288,8 +204,8 @@ impl Iterator for QuerySolutionIter { /// /// It is the equivalent of a row in SQL. pub struct QuerySolution { - values: Vec>, - variables: Rc>, + pub(super) values: Vec>, + pub(super) variables: Rc>, } impl QuerySolution { diff --git a/lib/src/sparql/xml_results.rs b/lib/src/sparql/xml_results.rs deleted file mode 100644 index 51e0aff0..00000000 --- a/lib/src/sparql/xml_results.rs +++ /dev/null @@ -1,674 +0,0 @@ -//! Implementation of [SPARQL Query Results XML Format](http://www.w3.org/TR/rdf-sparql-XMLres/) - -use crate::error::{invalid_data_error, invalid_input_error}; -use crate::model::vocab::rdf; -use crate::model::*; -use crate::sparql::error::EvaluationError; -use crate::sparql::model::*; -use quick_xml::events::BytesDecl; -use quick_xml::events::BytesEnd; -use quick_xml::events::BytesStart; -use quick_xml::events::BytesText; -use quick_xml::events::Event; -use quick_xml::Reader; -use quick_xml::Writer; -use std::collections::BTreeMap; -use std::io; -use std::io::BufRead; -use std::io::Write; -use std::iter::empty; -use std::rc::Rc; - -pub fn write_xml_results(results: QueryResults, sink: impl Write) -> Result<(), EvaluationError> { - match results { - QueryResults::Boolean(value) => { - write_boolean(value, sink).map_err(map_xml_error)?; - Ok(()) - } - QueryResults::Solutions(solutions) => write_solutions(solutions, sink), - QueryResults::Graph(_) => Err(invalid_input_error( - "Graphs could not be formatted to SPARQL query results XML format", - ) - .into()), - } -} - -fn write_boolean(value: bool, sink: impl Write) -> Result<(), quick_xml::Error> { - let mut writer = Writer::new(sink); - writer.write_event(Event::Decl(BytesDecl::new(b"1.0", None, None)))?; - let mut sparql_open = BytesStart::borrowed_name(b"sparql"); - sparql_open.push_attribute(("xmlns", "http://www.w3.org/2005/sparql-results#")); - writer.write_event(Event::Start(sparql_open))?; - writer.write_event(Event::Start(BytesStart::borrowed_name(b"head")))?; - writer.write_event(Event::End(BytesEnd::borrowed(b"head")))?; - writer.write_event(Event::Start(BytesStart::borrowed_name(b"boolean")))?; - writer.write_event(Event::Text(BytesText::from_plain_str(if value { - "true" - } else { - "false" - })))?; - writer.write_event(Event::End(BytesEnd::borrowed(b"boolean")))?; - writer.write_event(Event::End(BytesEnd::borrowed(b"sparql")))?; - Ok(()) -} - -fn write_solutions(solutions: QuerySolutionIter, sink: impl Write) -> Result<(), EvaluationError> { - let mut writer = Writer::new(sink); - writer - .write_event(Event::Decl(BytesDecl::new(b"1.0", None, None))) - .map_err(map_xml_error)?; - let mut sparql_open = BytesStart::borrowed_name(b"sparql"); - sparql_open.push_attribute(("xmlns", "http://www.w3.org/2005/sparql-results#")); - writer - .write_event(Event::Start(sparql_open)) - .map_err(map_xml_error)?; - writer - .write_event(Event::Start(BytesStart::borrowed_name(b"head"))) - .map_err(map_xml_error)?; - for variable in solutions.variables() { - let mut variable_tag = BytesStart::borrowed_name(b"variable"); - variable_tag.push_attribute(("name", variable.as_str())); - writer - .write_event(Event::Empty(variable_tag)) - .map_err(map_xml_error)?; - } - writer - .write_event(Event::End(BytesEnd::borrowed(b"head"))) - .map_err(map_xml_error)?; - writer - .write_event(Event::Start(BytesStart::borrowed_name(b"results"))) - .map_err(map_xml_error)?; - for solution in solutions { - let solution = solution?; - writer - .write_event(Event::Start(BytesStart::borrowed_name(b"result"))) - .map_err(map_xml_error)?; - for (variable, value) in solution.iter() { - let mut binding_tag = BytesStart::borrowed_name(b"binding"); - binding_tag.push_attribute(("name", variable.as_str())); - writer - .write_event(Event::Start(binding_tag)) - .map_err(map_xml_error)?; - write_xml_term(value.as_ref(), &mut writer)?; - writer - .write_event(Event::End(BytesEnd::borrowed(b"binding"))) - .map_err(map_xml_error)?; - } - writer - .write_event(Event::End(BytesEnd::borrowed(b"result"))) - .map_err(map_xml_error)?; - } - writer - .write_event(Event::End(BytesEnd::borrowed(b"results"))) - .map_err(map_xml_error)?; - writer - .write_event(Event::End(BytesEnd::borrowed(b"sparql"))) - .map_err(map_xml_error)?; - Ok(()) -} - -fn write_xml_term( - term: TermRef<'_>, - writer: &mut Writer, -) -> Result<(), EvaluationError> { - match term { - TermRef::NamedNode(uri) => { - writer - .write_event(Event::Start(BytesStart::borrowed_name(b"uri"))) - .map_err(map_xml_error)?; - writer - .write_event(Event::Text(BytesText::from_plain_str(uri.as_str()))) - .map_err(map_xml_error)?; - writer - .write_event(Event::End(BytesEnd::borrowed(b"uri"))) - .map_err(map_xml_error)?; - } - TermRef::BlankNode(bnode) => { - writer - .write_event(Event::Start(BytesStart::borrowed_name(b"bnode"))) - .map_err(map_xml_error)?; - writer - .write_event(Event::Text(BytesText::from_plain_str(bnode.as_str()))) - .map_err(map_xml_error)?; - writer - .write_event(Event::End(BytesEnd::borrowed(b"bnode"))) - .map_err(map_xml_error)?; - } - TermRef::Literal(literal) => { - let mut literal_tag = BytesStart::borrowed_name(b"literal"); - if let Some(language) = literal.language() { - literal_tag.push_attribute(("xml:lang", language)); - } else if !literal.is_plain() { - literal_tag.push_attribute(("datatype", literal.datatype().as_str())); - } - writer - .write_event(Event::Start(literal_tag)) - .map_err(map_xml_error)?; - writer - .write_event(Event::Text(BytesText::from_plain_str(literal.value()))) - .map_err(map_xml_error)?; - writer - .write_event(Event::End(BytesEnd::borrowed(b"literal"))) - .map_err(map_xml_error)?; - } - TermRef::Triple(triple) => { - writer - .write_event(Event::Start(BytesStart::borrowed_name(b"triple"))) - .map_err(map_xml_error)?; - writer - .write_event(Event::Start(BytesStart::borrowed_name(b"subject"))) - .map_err(map_xml_error)?; - write_xml_term(triple.subject.as_ref().into(), writer)?; - writer - .write_event(Event::End(BytesEnd::borrowed(b"subject"))) - .map_err(map_xml_error)?; - writer - .write_event(Event::Start(BytesStart::borrowed_name(b"predicate"))) - .map_err(map_xml_error)?; - write_xml_term(triple.predicate.as_ref().into(), writer)?; - writer - .write_event(Event::End(BytesEnd::borrowed(b"predicate"))) - .map_err(map_xml_error)?; - writer - .write_event(Event::Start(BytesStart::borrowed_name(b"object"))) - .map_err(map_xml_error)?; - write_xml_term(triple.object.as_ref(), writer)?; - writer - .write_event(Event::End(BytesEnd::borrowed(b"object"))) - .map_err(map_xml_error)?; - writer - .write_event(Event::End(BytesEnd::borrowed(b"triple"))) - .map_err(map_xml_error)?; - } - } - Ok(()) -} - -pub fn read_xml_results(source: impl BufRead + 'static) -> io::Result { - enum State { - Start, - Sparql, - Head, - AfterHead, - Boolean, - } - - let mut reader = Reader::from_reader(source); - reader.trim_text(true); - - let mut buffer = Vec::default(); - let mut namespace_buffer = Vec::default(); - let mut variables: Vec = Vec::default(); - let mut state = State::Start; - - //Read header - loop { - let event = { - let (ns, event) = reader - .read_namespaced_event(&mut buffer, &mut namespace_buffer) - .map_err(map_xml_error)?; - if let Some(ns) = ns { - if ns != b"http://www.w3.org/2005/sparql-results#".as_ref() { - return Err(invalid_data_error(format!( - "Unexpected namespace found in RDF/XML query result: {}", - reader.decode(ns).map_err(map_xml_error)? - ))); - } - } - event - }; - match event { - Event::Start(event) => match state { - State::Start => { - if event.name() == b"sparql" { - state = State::Sparql; - } else { - return Err(invalid_data_error(format!("Expecting tag, found {}", reader.decode(event.name()).map_err(map_xml_error)?))); - } - } - State::Sparql => { - if event.name() == b"head" { - state = State::Head; - } else { - return Err(invalid_data_error(format!("Expecting tag, found {}", reader.decode(event.name()).map_err(map_xml_error)?))); - } - } - State::Head => { - if event.name() == b"variable" { - let name = event.attributes() - .filter_map(std::result::Result::ok) - .find(|attr| attr.key == b"name") - .ok_or_else(|| invalid_data_error("No name attribute found for the tag"))?; - variables.push(name.unescape_and_decode_value(&reader).map_err(map_xml_error)?); - } else if event.name() == b"link" { - // no op - } else { - return Err(invalid_data_error(format!("Expecting or tag, found {}", reader.decode(event.name()).map_err(map_xml_error)?))); - } - } - State::AfterHead => { - if event.name() == b"boolean" { - state = State::Boolean - } else if event.name() == b"results" { - let mut mapping = BTreeMap::default(); - for (i,var) in variables.iter().enumerate() { - mapping.insert(var.as_bytes().to_vec(), i); - } - return Ok(QueryResults::Solutions(QuerySolutionIter::new( - Rc::new(variables.into_iter().map(Variable::new).collect::,_>>().map_err(invalid_data_error)?), - Box::new(ResultsIterator { - reader, - buffer, - namespace_buffer, - mapping, - stack: Vec::new(), - subject_stack: Vec::new(), - predicate_stack: Vec::new(), - object_stack:Vec::new(), - }), - ))); - } else if event.name() != b"link" && event.name() != b"results" && event.name() != b"boolean" { - return Err(invalid_data_error(format!("Expecting sparql tag, found {}", reader.decode(event.name()).map_err(map_xml_error)?))); - } - } - State::Boolean => return Err(invalid_data_error(format!("Unexpected tag inside of tag: {}", reader.decode(event.name()).map_err(map_xml_error)?))) - }, - Event::Empty(event) => match state { - State::Sparql => { - if event.name() == b"head" { - state = State::AfterHead; - } else { - return Err(invalid_data_error(format!("Expecting tag, found {}", reader.decode(event.name()).map_err(map_xml_error)?))); - } - } - State::Head => { - if event.name() == b"variable" { - let name = event.attributes() - .filter_map(std::result::Result::ok) - .find(|attr| attr.key == b"name") - .ok_or_else(|| invalid_data_error("No name attribute found for the tag"))?; - variables.push(name.unescape_and_decode_value(&reader).map_err(map_xml_error)?); - } else if event.name() == b"link" { - // no op - } else { - return Err(invalid_data_error(format!("Expecting or tag, found {}", reader.decode(event.name()).map_err(map_xml_error)?))); - } - }, - State::AfterHead => { - return if event.name() == b"results" { - Ok(QueryResults::Solutions(QuerySolutionIter::new( - Rc::new(variables.into_iter().map(Variable::new).collect::,_>>().map_err(invalid_data_error)?), - Box::new(empty()), - ))) - } else { - Err(invalid_data_error(format!("Unexpected autoclosing tag <{}>", reader.decode(event.name()).map_err(map_xml_error)?))) - } - } - _ => return Err(invalid_data_error(format!("Unexpected autoclosing tag <{}>", reader.decode(event.name()).map_err(map_xml_error)?))) - }, - Event::Text(event) => { - let value = event.unescaped().map_err(map_xml_error)?; - return match state { - State::Boolean => { - return if value.as_ref() == b"true" { - Ok(QueryResults::Boolean(true)) - } else if value.as_ref() == b"false" { - Ok(QueryResults::Boolean(false)) - } else { - Err(invalid_data_error(format!("Unexpected boolean value. Found {}", reader.decode(&value).map_err(map_xml_error)?))) - }; - } - _ => Err(invalid_data_error(format!("Unexpected textual value found: {}", reader.decode(&value).map_err(map_xml_error)?))) - }; - }, - Event::End(_) => if let State::Head = state { - state = State::AfterHead; - } else { - return Err(invalid_data_error("Unexpected early file end. All results file should have a and a or tag")); - }, - Event::Eof => return Err(invalid_data_error("Unexpected early file end. All results file should have a and a or tag")), - _ => (), - } - } -} - -enum State { - Start, - Result, - Binding, - Uri, - BNode, - Literal, - Triple, - Subject, - Predicate, - Object, - End, -} - -struct ResultsIterator { - reader: Reader, - buffer: Vec, - namespace_buffer: Vec, - mapping: BTreeMap, usize>, - stack: Vec, - subject_stack: Vec, - predicate_stack: Vec, - object_stack: Vec, -} - -impl Iterator for ResultsIterator { - type Item = Result>, EvaluationError>; - - fn next(&mut self) -> Option>, EvaluationError>> { - self.read_next().transpose() - } -} - -impl ResultsIterator { - fn read_next(&mut self) -> Result>>, EvaluationError> { - let mut state = State::Start; - - let mut new_bindings = vec![None; self.mapping.len()]; - - let mut current_var = None; - let mut term: Option = None; - let mut lang = None; - let mut datatype = None; - loop { - let (ns, event) = self - .reader - .read_namespaced_event(&mut self.buffer, &mut self.namespace_buffer) - .map_err(map_xml_error)?; - if let Some(ns) = ns { - if ns != b"http://www.w3.org/2005/sparql-results#".as_ref() { - return Err(invalid_data_error(format!( - "Unexpected namespace found in RDF/XML query result: {}", - self.reader.decode(ns).map_err(map_xml_error)? - )) - .into()); - } - } - match event { - Event::Start(event) => match state { - State::Start => { - if event.name() == b"result" { - state = State::Result; - } else { - return Err(invalid_data_error(format!( - "Expecting , found {}", - self.reader.decode(event.name()).map_err(map_xml_error)? - )) - .into()); - } - } - State::Result => { - if event.name() == b"binding" { - match event - .attributes() - .filter_map(std::result::Result::ok) - .find(|attr| attr.key == b"name") - { - Some(attr) => { - current_var = Some( - attr.unescaped_value().map_err(map_xml_error)?.to_vec(), - ) - } - None => { - return Err(invalid_data_error( - "No name attribute found for the tag", - ) - .into()); - } - } - state = State::Binding; - } else { - return Err(invalid_data_error(format!( - "Expecting , found {}", - self.reader.decode(event.name()).map_err(map_xml_error)? - )) - .into()); - } - } - State::Binding | State::Subject | State::Predicate | State::Object => { - if term.is_some() { - return Err(invalid_data_error( - "There is already a value for the current binding", - ) - .into()); - } - self.stack.push(state); - if event.name() == b"uri" { - state = State::Uri; - } else if event.name() == b"bnode" { - state = State::BNode; - } else if event.name() == b"literal" { - for attr in event.attributes().flatten() { - if attr.key == b"xml:lang" { - lang = Some( - attr.unescape_and_decode_value(&self.reader) - .map_err(map_xml_error)?, - ); - } else if attr.key == b"datatype" { - let iri = attr - .unescape_and_decode_value(&self.reader) - .map_err(map_xml_error)?; - datatype = Some(NamedNode::new(&iri).map_err(|e| { - invalid_data_error(format!( - "Invalid datatype IRI '{}': {}", - iri, e - )) - })?); - } - } - state = State::Literal; - } else if event.name() == b"triple" { - state = State::Triple; - } else { - return Err(invalid_data_error(format!( - "Expecting , or found {}", - self.reader.decode(event.name()).map_err(map_xml_error)? - )) - .into()); - } - } - State::Triple => { - if event.name() == b"subject" { - state = State::Subject - } else if event.name() == b"predicate" { - state = State::Predicate - } else if event.name() == b"object" { - state = State::Object - } else { - return Err(invalid_data_error(format!( - "Expecting , or found {}", - self.reader.decode(event.name()).map_err(map_xml_error)? - )) - .into()); - } - } - _ => (), - }, - Event::Text(event) => { - let data = event.unescaped().map_err(map_xml_error)?; - match state { - State::Uri => { - let iri = self.reader.decode(&data).map_err(map_xml_error)?; - term = Some( - NamedNode::new(iri) - .map_err(|e| { - invalid_data_error(format!( - "Invalid IRI value '{}': {}", - iri, e - )) - })? - .into(), - ) - } - State::BNode => { - let bnode = self.reader.decode(&data).map_err(map_xml_error)?; - term = Some( - BlankNode::new(bnode) - .map_err(|e| { - invalid_data_error(format!( - "Invalid blank node value '{}': {}", - bnode, e - )) - })? - .into(), - ) - } - State::Literal => { - term = Some( - build_literal( - self.reader.decode(&data).map_err(map_xml_error)?, - lang.take(), - datatype.take(), - )? - .into(), - ); - } - _ => { - return Err(invalid_data_error(format!( - "Unexpected textual value found: {}", - self.reader.decode(&data).map_err(map_xml_error)? - )) - .into()); - } - } - } - Event::End(_) => match state { - State::Start => state = State::End, - State::Result => return Ok(Some(new_bindings)), - State::Binding => { - if let Some(var) = ¤t_var { - if let Some(var) = self.mapping.get(var) { - new_bindings[*var] = term.take() - } else { - return Err( - invalid_data_error(format!("The variable '{}' is used in a binding but not declared in the variables list", self.reader.decode(var).map_err(map_xml_error)?)).into() - ); - } - } else { - return Err( - invalid_data_error("No name found for tag").into() - ); - } - state = State::Result; - } - State::Subject => { - if let Some(subject) = term.take() { - self.subject_stack.push(subject) - } - state = State::Triple; - } - State::Predicate => { - if let Some(predicate) = term.take() { - self.predicate_stack.push(predicate) - } - state = State::Triple; - } - State::Object => { - if let Some(object) = term.take() { - self.object_stack.push(object) - } - state = State::Triple; - } - State::Uri => state = self.stack.pop().unwrap(), - State::BNode => { - if term.is_none() { - //We default to a random bnode - term = Some(BlankNode::default().into()) - } - state = self.stack.pop().unwrap() - } - State::Literal => { - if term.is_none() { - //We default to the empty literal - term = Some(build_literal("", lang.take(), datatype.take())?.into()) - } - state = self.stack.pop().unwrap(); - } - State::Triple => { - if let (Some(subject), Some(predicate), Some(object)) = ( - self.subject_stack.pop(), - self.predicate_stack.pop(), - self.object_stack.pop(), - ) { - term = Some( - Triple::new( - match subject { - Term::NamedNode(subject) => subject.into(), - Term::BlankNode(subject) => subject.into(), - Term::Triple(subject) => Subject::Triple(subject), - Term::Literal(_) => { - return Err(invalid_data_error( - "The value should not be a ", - ) - .into()) - } - }, - match predicate { - Term::NamedNode(predicate) => predicate, - _ => { - return Err(invalid_data_error( - "The value should be an ", - ) - .into()) - } - }, - object, - ) - .into(), - ); - state = self.stack.pop().unwrap(); - } else { - return Err( - invalid_data_error("A should contain a , a and an ").into() - ); - } - } - State::End => (), - }, - Event::Eof => return Ok(None), - _ => (), - } - } - } -} - -fn build_literal( - value: impl Into, - lang: Option, - datatype: Option, -) -> Result { - match lang { - Some(lang) => { - if let Some(datatype) = datatype { - if datatype.as_ref() != rdf::LANG_STRING { - return Err(invalid_data_error(format!( - "xml:lang value '{}' provided with the datatype {}", - lang, datatype - )) - .into()); - } - } - Literal::new_language_tagged_literal(value, &lang).map_err(|e| { - invalid_data_error(format!("Invalid xml:lang value '{}': {}", lang, e)).into() - }) - } - None => Ok(if let Some(datatype) = datatype { - Literal::new_typed_literal(value, datatype) - } else { - Literal::new_simple_literal(value) - }), - } -} - -fn map_xml_error(error: quick_xml::Error) -> io::Error { - match error { - quick_xml::Error::Io(error) => error, - quick_xml::Error::UnexpectedEof(_) => io::Error::new(io::ErrorKind::UnexpectedEof, error), - _ => invalid_data_error(error), - } -}