Adds a public API for SPARQL query results I/O

pull/190/head
Tpt 3 years ago
parent a33dbb6d06
commit 01a33192eb
  1. 69
      lib/src/io/read.rs
  2. 7
      lib/src/io/write.rs
  3. 307
      lib/src/sparql/csv_results.rs
  4. 306
      lib/src/sparql/io/csv.rs
  5. 703
      lib/src/sparql/io/json.rs
  6. 337
      lib/src/sparql/io/mod.rs
  7. 597
      lib/src/sparql/io/xml.rs
  8. 455
      lib/src/sparql/json_results.rs
  9. 15
      lib/src/sparql/mod.rs
  10. 170
      lib/src/sparql/model.rs
  11. 674
      lib/src/sparql/xml_results.rs

@ -38,7 +38,7 @@ pub struct GraphParser {
}
impl GraphParser {
/// Builds a parser for the given format
/// Builds a parser for the given format.
pub fn from_format(format: GraphFormat) -> Self {
Self {
format,
@ -46,7 +46,7 @@ impl GraphParser {
}
}
/// Provides an IRI that could be used to resolve the file relative IRIs
/// Provides an IRI that could be used to resolve the file relative IRIs.
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphParser};
@ -66,7 +66,7 @@ impl GraphParser {
Ok(self)
}
/// Executes the parsing itself on a [`BufRead`](std::io::BufRead) implementation and returns an iterator of triples
/// Executes the parsing itself on a [`BufRead`](std::io::BufRead) implementation and returns an iterator of triples.
#[allow(clippy::unnecessary_wraps)]
pub fn read_triples<R: BufRead>(&self, reader: R) -> Result<TripleReader<R>, ParserError> {
Ok(TripleReader {
@ -187,7 +187,7 @@ pub struct DatasetParser {
}
impl DatasetParser {
/// Builds a parser for the given format
/// Builds a parser for the given format.
pub fn from_format(format: DatasetFormat) -> Self {
Self {
format,
@ -195,7 +195,7 @@ impl DatasetParser {
}
}
/// Provides an IRI that could be used to resolve the file relative IRIs
/// Provides an IRI that could be used to resolve the file relative IRIs.
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetParser};
@ -215,7 +215,7 @@ impl DatasetParser {
Ok(self)
}
/// Executes the parsing itself on a [`BufRead`](std::io::BufRead) implementation and returns an iterator of quads
/// Executes the parsing itself on a [`BufRead`](std::io::BufRead) implementation and returns an iterator of quads.
#[allow(clippy::unnecessary_wraps)]
pub fn read_quads<R: BufRead>(&self, reader: R) -> Result<QuadReader<R>, ParserError> {
Ok(QuadReader {
@ -442,12 +442,26 @@ impl From<RdfXmlError> for ParserError {
}
}
impl From<TermParseError> for ParserError {
fn from(error: TermParseError) -> Self {
Self::Syntax(SyntaxError {
inner: SyntaxErrorKind::Term(error),
})
}
}
impl From<io::Error> for ParserError {
fn from(error: io::Error) -> Self {
Self::Io(error)
}
}
impl From<SyntaxError> for ParserError {
fn from(error: SyntaxError) -> Self {
Self::Syntax(error)
}
}
impl From<ParserError> for io::Error {
fn from(error: ParserError) -> Self {
match error {
@ -457,17 +471,40 @@ impl From<ParserError> for io::Error {
}
}
impl From<quick_xml::Error> for ParserError {
fn from(error: quick_xml::Error) -> Self {
match error {
quick_xml::Error::Io(error) => Self::Io(error),
error => Self::Syntax(SyntaxError {
inner: SyntaxErrorKind::Xml(error),
}),
}
}
}
/// An error in the syntax of the parsed file
#[derive(Debug)]
pub struct SyntaxError {
inner: SyntaxErrorKind,
pub(crate) inner: SyntaxErrorKind,
}
#[derive(Debug)]
enum SyntaxErrorKind {
pub(crate) enum SyntaxErrorKind {
Turtle(TurtleError),
RdfXml(RdfXmlError),
BaseIri { iri: String, error: IriParseError },
Xml(quick_xml::Error),
Term(TermParseError),
Msg { msg: String },
}
impl SyntaxError {
/// Builds an error from a printable error message.
pub(crate) fn msg(msg: impl Into<String>) -> Self {
Self {
inner: SyntaxErrorKind::Msg { msg: msg.into() },
}
}
}
impl fmt::Display for SyntaxError {
@ -478,6 +515,9 @@ impl fmt::Display for SyntaxError {
SyntaxErrorKind::BaseIri { iri, error } => {
write!(f, "Invalid base IRI '{}': {}", iri, error)
}
SyntaxErrorKind::Xml(e) => e.fmt(f),
SyntaxErrorKind::Term(e) => e.fmt(f),
SyntaxErrorKind::Msg { msg } => f.write_str(msg),
}
}
}
@ -487,7 +527,9 @@ impl Error for SyntaxError {
match &self.inner {
SyntaxErrorKind::Turtle(e) => Some(e),
SyntaxErrorKind::RdfXml(e) => Some(e),
SyntaxErrorKind::BaseIri { .. } => None,
SyntaxErrorKind::Xml(e) => Some(e),
SyntaxErrorKind::Term(e) => Some(e),
SyntaxErrorKind::BaseIri { .. } | SyntaxErrorKind::Msg { .. } => None,
}
}
}
@ -501,6 +543,15 @@ impl From<SyntaxError> for io::Error {
io::ErrorKind::InvalidInput,
format!("Invalid IRI '{}': {}", iri, error),
),
SyntaxErrorKind::Xml(error) => match error {
quick_xml::Error::Io(error) => error,
quick_xml::Error::UnexpectedEof(error) => {
Self::new(io::ErrorKind::UnexpectedEof, error)
}
error => Self::new(io::ErrorKind::InvalidData, error),
},
SyntaxErrorKind::Term(error) => Self::new(io::ErrorKind::InvalidData, error),
SyntaxErrorKind::Msg { msg } => Self::new(io::ErrorKind::InvalidData, msg),
}
}
}

@ -6,8 +6,7 @@ use crate::model::*;
use rio_api::formatter::TriplesFormatter;
use rio_api::model as rio;
use rio_xml::RdfXmlFormatter;
use std::io;
use std::io::Write;
use std::io::{self, Write};
/// A serializer for RDF graph serialization formats.
///
@ -43,7 +42,7 @@ impl GraphSerializer {
Self { format }
}
/// Returns a `TripleWriter` allowing writing triples into the given [`Write`](std::io::Write) implementation
/// Returns a [`TripleWriter`] allowing writing triples into the given [`Write`](std::io::Write) implementation
pub fn triple_writer<W: Write>(&self, writer: W) -> io::Result<TripleWriter<W>> {
Ok(TripleWriter {
formatter: match self.format {
@ -182,7 +181,7 @@ impl DatasetSerializer {
Self { format }
}
/// Returns a `QuadWriter` allowing writing triples into the given [`Write`](std::io::Write) implementation
/// Returns a [`QuadWriter`] allowing writing triples into the given [`Write`](std::io::Write) implementation
#[allow(clippy::unnecessary_wraps)]
pub fn quad_writer<W: Write>(&self, writer: W) -> io::Result<QuadWriter<W>> {
Ok(QuadWriter {

@ -1,307 +0,0 @@
//! Implementation of [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/)
use crate::error::invalid_data_error;
use crate::model::{vocab::xsd, *};
use crate::sparql::error::EvaluationError;
use crate::sparql::model::*;
use std::io::{self, BufRead, Write};
use std::rc::Rc;
use std::str::FromStr;
pub fn write_csv_results(
results: QueryResults,
mut sink: impl Write,
) -> Result<(), EvaluationError> {
match results {
QueryResults::Boolean(value) => {
sink.write_all(if value { b"true" } else { b"false" })?;
}
QueryResults::Solutions(solutions) => {
let mut start_vars = true;
for variable in solutions.variables() {
if start_vars {
start_vars = false;
} else {
sink.write_all(b",")?;
}
sink.write_all(variable.as_str().as_bytes())?;
}
for solution in solutions {
let solution = solution?;
sink.write_all(b"\r\n")?;
let mut start_binding = true;
for value in solution.values() {
if start_binding {
start_binding = false;
} else {
sink.write_all(b",")?;
}
if let Some(value) = value {
write_csv_term(value, &mut sink)?;
}
}
}
}
QueryResults::Graph(g) => {
sink.write_all(b"subject,predicate,object")?;
for t in g {
let t = t?;
sink.write_all(b"\r\n")?;
write_csv_term(&t.subject, &mut sink)?;
sink.write_all(b",")?;
write_csv_term(&t.predicate, &mut sink)?;
sink.write_all(b",")?;
write_csv_term(&t.object, &mut sink)?;
}
}
}
Ok(())
}
fn write_csv_term<'a>(term: impl Into<TermRef<'a>>, sink: &mut impl Write) -> io::Result<()> {
match term.into() {
TermRef::NamedNode(uri) => sink.write_all(uri.as_str().as_bytes()),
TermRef::BlankNode(bnode) => {
sink.write_all(b"_:")?;
sink.write_all(bnode.as_str().as_bytes())
}
TermRef::Literal(literal) => write_escaped_csv_string(literal.value(), sink),
TermRef::Triple(triple) => {
write_csv_term(&triple.subject, sink)?;
sink.write_all(b" ")?;
write_csv_term(&triple.predicate, sink)?;
sink.write_all(b" ")?;
write_csv_term(&triple.object, sink)
}
}
}
fn write_escaped_csv_string(s: &str, sink: &mut impl Write) -> io::Result<()> {
if s.bytes().any(|c| matches!(c, b'"' | b',' | b'\n' | b'\r')) {
sink.write_all(b"\"")?;
for c in s.bytes() {
if c == b'\"' {
sink.write_all(b"\"\"")
} else {
sink.write_all(&[c])
}?;
}
sink.write_all(b"\"")
} else {
sink.write_all(s.as_bytes())
}
}
pub fn write_tsv_results(
results: QueryResults,
mut sink: impl Write,
) -> Result<(), EvaluationError> {
match results {
QueryResults::Boolean(value) => {
sink.write_all(if value { b"true" } else { b"false" })?;
}
QueryResults::Solutions(solutions) => {
let mut start_vars = true;
for variable in solutions.variables() {
if start_vars {
start_vars = false;
} else {
sink.write_all(b"\t")?;
}
sink.write_all(b"?")?;
sink.write_all(variable.as_str().as_bytes())?;
}
for solution in solutions {
let solution = solution?;
sink.write_all(b"\n")?;
let mut start_binding = true;
for value in solution.values() {
if start_binding {
start_binding = false;
} else {
sink.write_all(b"\t")?;
}
if let Some(value) = value {
write_tsv_term(value, &mut sink)?;
}
}
}
}
QueryResults::Graph(g) => {
sink.write_all(b"subject\tpredicate\tobject")?;
for t in g {
let t = t?;
sink.write_all(b"\n")?;
write_tsv_term(&t.subject, &mut sink)?;
sink.write_all(b"\t")?;
write_tsv_term(&t.predicate, &mut sink)?;
sink.write_all(b"\t")?;
write_tsv_term(&t.object, &mut sink)?;
}
}
}
Ok(())
}
fn write_tsv_term<'a>(term: impl Into<TermRef<'a>>, sink: &mut impl Write) -> io::Result<()> {
//TODO: full Turtle serialization
match term.into() {
TermRef::NamedNode(node) => write!(sink, "<{}>", node.as_str()),
TermRef::BlankNode(node) => write!(sink, "_:{}", node.as_str()),
TermRef::Literal(literal) => match literal.datatype() {
xsd::BOOLEAN => match literal.value() {
"true" | "1" => sink.write_all(b"true"),
"false" | "0" => sink.write_all(b"false"),
_ => sink.write_all(literal.to_string().as_bytes()),
},
xsd::INTEGER => {
if literal.value().bytes().all(|c| matches!(c, b'0'..=b'9')) {
sink.write_all(literal.value().as_bytes())
} else {
sink.write_all(literal.to_string().as_bytes())
}
}
_ => sink.write_all(literal.to_string().as_bytes()),
},
TermRef::Triple(triple) => {
sink.write_all(b"<<")?;
write_tsv_term(&triple.subject, sink)?;
sink.write_all(b" ")?;
write_tsv_term(&triple.predicate, sink)?;
sink.write_all(b" ")?;
write_tsv_term(&triple.object, sink)?;
sink.write_all(b">>")?;
Ok(())
}
}
}
pub fn read_tsv_results(mut source: impl BufRead + 'static) -> io::Result<QueryResults> {
let mut buffer = String::new();
// We read the header
source.read_line(&mut buffer)?;
if buffer.trim().eq_ignore_ascii_case("true") {
return Ok(QueryResults::Boolean(true));
}
if buffer.trim().eq_ignore_ascii_case("false") {
return Ok(QueryResults::Boolean(false));
}
let variables = buffer
.split('\t')
.map(|v| Variable::from_str(v.trim()).map_err(invalid_data_error))
.collect::<io::Result<Vec<_>>>()?;
Ok(QueryResults::Solutions(QuerySolutionIter::new(
Rc::new(variables),
Box::new(TsvResultsIterator { source, buffer }),
)))
}
struct TsvResultsIterator<R: BufRead> {
source: R,
buffer: String,
}
impl<R: BufRead> Iterator for TsvResultsIterator<R> {
type Item = Result<Vec<Option<Term>>, EvaluationError>;
fn next(&mut self) -> Option<Result<Vec<Option<Term>>, EvaluationError>> {
self.read_next().transpose()
}
}
impl<R: BufRead> TsvResultsIterator<R> {
fn read_next(&mut self) -> Result<Option<Vec<Option<Term>>>, EvaluationError> {
self.buffer.clear();
if self.source.read_line(&mut self.buffer)? == 0 {
return Ok(None);
}
Ok(Some(
self.buffer
.split('\t')
.map(|v| {
let v = v.trim();
if v.is_empty() {
Ok(None)
} else {
Ok(Some(Term::from_str(v).map_err(invalid_data_error)?))
}
})
.collect::<Result<Vec<_>, EvaluationError>>()?,
))
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::rc::Rc;
use std::str;
fn build_example() -> QueryResults {
QuerySolutionIter::new(
Rc::new(vec![
Variable::new_unchecked("x"),
Variable::new_unchecked("literal"),
]),
Box::new(
vec![
Ok(vec![
Some(NamedNode::new_unchecked("http://example/x").into()),
Some(Literal::new_simple_literal("String").into()),
]),
Ok(vec![
Some(NamedNode::new_unchecked("http://example/x").into()),
Some(Literal::new_simple_literal("String-with-dquote\"").into()),
]),
Ok(vec![
Some(BlankNode::new_unchecked("b0").into()),
Some(Literal::new_simple_literal("Blank node").into()),
]),
Ok(vec![
None,
Some(Literal::new_simple_literal("Missing 'x'").into()),
]),
Ok(vec![None, None]),
Ok(vec![
Some(NamedNode::new_unchecked("http://example/x").into()),
None,
]),
Ok(vec![
Some(BlankNode::new_unchecked("b1").into()),
Some(
Literal::new_language_tagged_literal_unchecked(
"String-with-lang",
"en",
)
.into(),
),
]),
Ok(vec![
Some(BlankNode::new_unchecked("b1").into()),
Some(Literal::new_typed_literal("123", xsd::INTEGER).into()),
]),
]
.into_iter(),
),
)
.into()
}
#[test]
fn test_csv_serialization() {
let mut sink = Vec::new();
write_csv_results(build_example(), &mut sink).unwrap();
assert_eq!(str::from_utf8(&sink).unwrap(), "x,literal\r\nhttp://example/x,String\r\nhttp://example/x,\"String-with-dquote\"\"\"\r\n_:b0,Blank node\r\n,Missing 'x'\r\n,\r\nhttp://example/x,\r\n_:b1,String-with-lang\r\n_:b1,123");
}
#[test]
fn test_tsv_serialization() {
let mut sink = Vec::new();
write_tsv_results(build_example(), &mut sink).unwrap();
assert_eq!(str::from_utf8(&sink).unwrap(), "?x\t?literal\n<http://example/x>\t\"String\"\n<http://example/x>\t\"String-with-dquote\\\"\"\n_:b0\t\"Blank node\"\n\t\"Missing 'x'\"\n\t\n<http://example/x>\t\n_:b1\t\"String-with-lang\"@en\n_:b1\t123");
}
}

@ -0,0 +1,306 @@
//! Implementation of [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/)
use crate::io::read::{ParserError, SyntaxError};
use crate::model::{vocab::xsd, *};
use crate::sparql::model::Variable;
use std::io::{self, BufRead, Write};
use std::str::FromStr;
pub fn write_boolean_csv_result<W: Write>(mut sink: W, value: bool) -> io::Result<W> {
sink.write_all(if value { b"true" } else { b"false" })?;
Ok(sink)
}
pub struct CsvSolutionsWriter<W: Write> {
sink: W,
}
impl<W: Write> CsvSolutionsWriter<W> {
pub fn start(mut sink: W, variables: &[Variable]) -> io::Result<Self> {
let mut start_vars = true;
for variable in variables {
if start_vars {
start_vars = false;
} else {
sink.write_all(b",")?;
}
sink.write_all(variable.as_str().as_bytes())?;
}
Ok(Self { sink })
}
pub fn write<'a>(
&mut self,
solution: impl IntoIterator<Item = Option<TermRef<'a>>>,
) -> io::Result<()> {
self.sink.write_all(b"\r\n")?;
let mut start_binding = true;
for value in solution {
if start_binding {
start_binding = false;
} else {
self.sink.write_all(b",")?;
}
if let Some(value) = value {
write_csv_term(value, &mut self.sink)?;
}
}
Ok(())
}
pub fn finish(self) -> W {
self.sink
}
}
fn write_csv_term<'a>(term: impl Into<TermRef<'a>>, sink: &mut impl Write) -> io::Result<()> {
match term.into() {
TermRef::NamedNode(uri) => sink.write_all(uri.as_str().as_bytes()),
TermRef::BlankNode(bnode) => {
sink.write_all(b"_:")?;
sink.write_all(bnode.as_str().as_bytes())
}
TermRef::Literal(literal) => write_escaped_csv_string(literal.value(), sink),
TermRef::Triple(triple) => {
write_csv_term(&triple.subject, sink)?;
sink.write_all(b" ")?;
write_csv_term(&triple.predicate, sink)?;
sink.write_all(b" ")?;
write_csv_term(&triple.object, sink)
}
}
}
fn write_escaped_csv_string(s: &str, sink: &mut impl Write) -> io::Result<()> {
if s.bytes().any(|c| matches!(c, b'"' | b',' | b'\n' | b'\r')) {
sink.write_all(b"\"")?;
for c in s.bytes() {
if c == b'\"' {
sink.write_all(b"\"\"")
} else {
sink.write_all(&[c])
}?;
}
sink.write_all(b"\"")
} else {
sink.write_all(s.as_bytes())
}
}
pub fn write_boolean_tsv_result<W: Write>(mut sink: W, value: bool) -> io::Result<W> {
sink.write_all(if value { b"true" } else { b"false" })?;
Ok(sink)
}
pub struct TsvSolutionsWriter<W: Write> {
sink: W,
}
impl<W: Write> TsvSolutionsWriter<W> {
pub fn start(mut sink: W, variables: &[Variable]) -> io::Result<Self> {
let mut start_vars = true;
for variable in variables {
if start_vars {
start_vars = false;
} else {
sink.write_all(b"\t")?;
}
sink.write_all(b"?")?;
sink.write_all(variable.as_str().as_bytes())?;
}
Ok(Self { sink })
}
pub fn write<'a>(
&mut self,
solution: impl IntoIterator<Item = Option<TermRef<'a>>>,
) -> io::Result<()> {
self.sink.write_all(b"\n")?;
let mut start_binding = true;
for value in solution {
if start_binding {
start_binding = false;
} else {
self.sink.write_all(b"\t")?;
}
if let Some(value) = value {
write_tsv_term(value, &mut self.sink)?;
}
}
Ok(())
}
pub fn finish(self) -> W {
self.sink
}
}
fn write_tsv_term<'a>(term: impl Into<TermRef<'a>>, sink: &mut impl Write) -> io::Result<()> {
//TODO: full Turtle serialization
match term.into() {
TermRef::NamedNode(node) => write!(sink, "<{}>", node.as_str()),
TermRef::BlankNode(node) => write!(sink, "_:{}", node.as_str()),
TermRef::Literal(literal) => match literal.datatype() {
xsd::BOOLEAN => match literal.value() {
"true" | "1" => sink.write_all(b"true"),
"false" | "0" => sink.write_all(b"false"),
_ => sink.write_all(literal.to_string().as_bytes()),
},
xsd::INTEGER => {
if literal.value().bytes().all(|c| matches!(c, b'0'..=b'9')) {
sink.write_all(literal.value().as_bytes())
} else {
sink.write_all(literal.to_string().as_bytes())
}
}
_ => sink.write_all(literal.to_string().as_bytes()),
},
TermRef::Triple(triple) => {
sink.write_all(b"<<")?;
write_tsv_term(&triple.subject, sink)?;
sink.write_all(b" ")?;
write_tsv_term(&triple.predicate, sink)?;
sink.write_all(b" ")?;
write_tsv_term(&triple.object, sink)?;
sink.write_all(b">>")?;
Ok(())
}
}
}
pub enum TsvQueryResultsReader<R: BufRead> {
Solutions {
variables: Vec<Variable>,
solutions: TsvSolutionsReader<R>,
},
Boolean(bool),
}
impl<R: BufRead> TsvQueryResultsReader<R> {
pub fn read(mut source: R) -> Result<Self, ParserError> {
let mut buffer = String::new();
// We read the header
source.read_line(&mut buffer)?;
if buffer.trim().eq_ignore_ascii_case("true") {
return Ok(Self::Boolean(true));
}
if buffer.trim().eq_ignore_ascii_case("false") {
return Ok(Self::Boolean(false));
}
let variables = buffer
.split('\t')
.map(|v| {
Variable::from_str(v.trim())
.map_err(|e| SyntaxError::msg(format!("Invalid variable name '{}': {}", v, e)))
})
.collect::<Result<Vec<_>, _>>()?;
Ok(Self::Solutions {
variables,
solutions: TsvSolutionsReader { source, buffer },
})
}
}
pub struct TsvSolutionsReader<R: BufRead> {
source: R,
buffer: String,
}
impl<R: BufRead> TsvSolutionsReader<R> {
pub fn read_next(&mut self) -> Result<Option<Vec<Option<Term>>>, ParserError> {
self.buffer.clear();
if self.source.read_line(&mut self.buffer)? == 0 {
return Ok(None);
}
Ok(Some(
self.buffer
.split('\t')
.map(|v| {
let v = v.trim();
if v.is_empty() {
Ok(None)
} else {
Ok(Some(
Term::from_str(v).map_err(|e| SyntaxError::msg(e.to_string()))?,
))
}
})
.collect::<Result<_, ParserError>>()?,
))
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::str;
fn build_example() -> (Vec<Variable>, Vec<Vec<Option<Term>>>) {
(
vec![
Variable::new_unchecked("x"),
Variable::new_unchecked("literal"),
],
vec![
vec![
Some(NamedNode::new_unchecked("http://example/x").into()),
Some(Literal::new_simple_literal("String").into()),
],
vec![
Some(NamedNode::new_unchecked("http://example/x").into()),
Some(Literal::new_simple_literal("String-with-dquote\"").into()),
],
vec![
Some(BlankNode::new_unchecked("b0").into()),
Some(Literal::new_simple_literal("Blank node").into()),
],
vec![
None,
Some(Literal::new_simple_literal("Missing 'x'").into()),
],
vec![None, None],
vec![
Some(NamedNode::new_unchecked("http://example/x").into()),
None,
],
vec![
Some(BlankNode::new_unchecked("b1").into()),
Some(
Literal::new_language_tagged_literal_unchecked("String-with-lang", "en")
.into(),
),
],
vec![
Some(BlankNode::new_unchecked("b1").into()),
Some(Literal::new_typed_literal("123", xsd::INTEGER).into()),
],
],
)
}
#[test]
fn test_csv_serialization() -> io::Result<()> {
let (variables, solutions) = build_example();
let mut writer = CsvSolutionsWriter::start(Vec::new(), &variables)?;
for solution in &solutions {
writer.write(solution.iter().map(|t| t.as_ref().map(|t| t.as_ref())))?;
}
let result = writer.finish();
assert_eq!(str::from_utf8(&result).unwrap(), "x,literal\r\nhttp://example/x,String\r\nhttp://example/x,\"String-with-dquote\"\"\"\r\n_:b0,Blank node\r\n,Missing 'x'\r\n,\r\nhttp://example/x,\r\n_:b1,String-with-lang\r\n_:b1,123");
Ok(())
}
#[test]
fn test_tsv_serialization() -> io::Result<()> {
let (variables, solutions) = build_example();
let mut writer = TsvSolutionsWriter::start(Vec::new(), &variables)?;
for solution in &solutions {
writer.write(solution.iter().map(|t| t.as_ref().map(|t| t.as_ref())))?;
}
let result = writer.finish();
assert_eq!(str::from_utf8(&result).unwrap(), "?x\t?literal\n<http://example/x>\t\"String\"\n<http://example/x>\t\"String-with-dquote\\\"\"\n_:b0\t\"Blank node\"\n\t\"Missing 'x'\"\n\t\n<http://example/x>\t\n_:b1\t\"String-with-lang\"@en\n_:b1\t123");
Ok(())
}
}

@ -0,0 +1,703 @@
//! Implementation of [SPARQL Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/)
use crate::io::read::{ParserError, SyntaxError};
use crate::model::vocab::rdf;
use crate::model::*;
use crate::sparql::error::EvaluationError;
use crate::sparql::model::Variable;
use json_event_parser::{JsonEvent, JsonReader, JsonWriter};
use std::collections::BTreeMap;
use std::io::{self, BufRead, Write};
pub fn write_boolean_json_result<W: Write>(sink: W, value: bool) -> io::Result<W> {
let mut writer = JsonWriter::from_writer(sink);
writer.write_event(JsonEvent::StartObject)?;
writer.write_event(JsonEvent::ObjectKey("head"))?;
writer.write_event(JsonEvent::StartObject)?;
writer.write_event(JsonEvent::EndObject)?;
writer.write_event(JsonEvent::ObjectKey("boolean"))?;
writer.write_event(JsonEvent::Boolean(value))?;
writer.write_event(JsonEvent::EndObject)?;
Ok(writer.into_inner())
}
pub struct JsonSolutionsWriter<W: Write> {
writer: JsonWriter<W>,
variables: Vec<Variable>,
}
impl<W: Write> JsonSolutionsWriter<W> {
pub fn start(sink: W, variables: &[Variable]) -> io::Result<Self> {
let mut writer = JsonWriter::from_writer(sink);
writer.write_event(JsonEvent::StartObject)?;
writer.write_event(JsonEvent::ObjectKey("head"))?;
writer.write_event(JsonEvent::StartObject)?;
writer.write_event(JsonEvent::ObjectKey("vars"))?;
writer.write_event(JsonEvent::StartArray)?;
for variable in variables {
writer.write_event(JsonEvent::String(variable.as_str()))?;
}
writer.write_event(JsonEvent::EndArray)?;
writer.write_event(JsonEvent::EndObject)?;
writer.write_event(JsonEvent::ObjectKey("results"))?;
writer.write_event(JsonEvent::StartObject)?;
writer.write_event(JsonEvent::ObjectKey("bindings"))?;
writer.write_event(JsonEvent::StartArray)?;
Ok(Self {
writer,
variables: variables.to_vec(),
})
}
pub fn write<'a>(
&mut self,
solution: impl IntoIterator<Item = Option<TermRef<'a>>>,
) -> io::Result<()> {
self.writer.write_event(JsonEvent::StartObject)?;
for (value, variable) in solution.into_iter().zip(&self.variables) {
if let Some(value) = value {
self.writer
.write_event(JsonEvent::ObjectKey(variable.as_str()))?;
write_json_term(value, &mut self.writer)?;
}
}
self.writer.write_event(JsonEvent::EndObject)?;
Ok(())
}
pub fn finish(mut self) -> io::Result<W> {
self.writer.write_event(JsonEvent::EndArray)?;
self.writer.write_event(JsonEvent::EndObject)?;
self.writer.write_event(JsonEvent::EndObject)?;
Ok(self.writer.into_inner())
}
}
fn write_json_term(
term: TermRef<'_>,
writer: &mut JsonWriter<impl Write>,
) -> Result<(), EvaluationError> {
match term {
TermRef::NamedNode(uri) => {
writer.write_event(JsonEvent::StartObject)?;
writer.write_event(JsonEvent::ObjectKey("type"))?;
writer.write_event(JsonEvent::String("uri"))?;
writer.write_event(JsonEvent::ObjectKey("value"))?;
writer.write_event(JsonEvent::String(uri.as_str()))?;
writer.write_event(JsonEvent::EndObject)?;
}
TermRef::BlankNode(bnode) => {
writer.write_event(JsonEvent::StartObject)?;
writer.write_event(JsonEvent::ObjectKey("type"))?;
writer.write_event(JsonEvent::String("bnode"))?;
writer.write_event(JsonEvent::ObjectKey("value"))?;
writer.write_event(JsonEvent::String(bnode.as_str()))?;
writer.write_event(JsonEvent::EndObject)?;
}
TermRef::Literal(literal) => {
writer.write_event(JsonEvent::StartObject)?;
writer.write_event(JsonEvent::ObjectKey("type"))?;
writer.write_event(JsonEvent::String("literal"))?;
writer.write_event(JsonEvent::ObjectKey("value"))?;
writer.write_event(JsonEvent::String(literal.value()))?;
if let Some(language) = literal.language() {
writer.write_event(JsonEvent::ObjectKey("xml:lang"))?;
writer.write_event(JsonEvent::String(language))?;
} else if !literal.is_plain() {
writer.write_event(JsonEvent::ObjectKey("datatype"))?;
writer.write_event(JsonEvent::String(literal.datatype().as_str()))?;
}
writer.write_event(JsonEvent::EndObject)?;
}
TermRef::Triple(triple) => {
writer.write_event(JsonEvent::StartObject)?;
writer.write_event(JsonEvent::ObjectKey("type"))?;
writer.write_event(JsonEvent::String("triple"))?;
writer.write_event(JsonEvent::ObjectKey("value"))?;
writer.write_event(JsonEvent::StartObject)?;
writer.write_event(JsonEvent::ObjectKey("subject"))?;
write_json_term(triple.subject.as_ref().into(), writer)?;
writer.write_event(JsonEvent::ObjectKey("predicate"))?;
write_json_term(triple.predicate.as_ref().into(), writer)?;
writer.write_event(JsonEvent::ObjectKey("object"))?;
write_json_term(triple.object.as_ref(), writer)?;
writer.write_event(JsonEvent::EndObject)?;
writer.write_event(JsonEvent::EndObject)?;
}
}
Ok(())
}
pub enum JsonQueryResultsReader<R: BufRead> {
Solutions {
variables: Vec<Variable>,
solutions: JsonSolutionsReader<R>,
},
Boolean(bool),
}
impl<R: BufRead> JsonQueryResultsReader<R> {
pub fn read(source: R) -> Result<Self, ParserError> {
let mut reader = JsonReader::from_reader(source);
let mut buffer = Vec::default();
let mut variables = None;
if reader.read_event(&mut buffer)? != JsonEvent::StartObject {
return Err(SyntaxError::msg("SPARQL JSON results should be an object").into());
}
loop {
let event = reader.read_event(&mut buffer)?;
match event {
JsonEvent::ObjectKey(key) => match key {
"head" => variables = Some(read_head(&mut reader, &mut buffer)?),
"results" => {
if reader.read_event(&mut buffer)? != JsonEvent::StartObject {
return Err(SyntaxError::msg("'results' should be an object").into());
}
if reader.read_event(&mut buffer)? != JsonEvent::ObjectKey("bindings") {
return Err(SyntaxError::msg(
"'results' should contain a 'bindings' key",
)
.into());
}
if reader.read_event(&mut buffer)? != JsonEvent::StartArray {
return Err(SyntaxError::msg("'bindings' should be an object").into());
}
return if let Some(variables) = variables {
let mut mapping = BTreeMap::default();
for (i, var) in variables.iter().enumerate() {
mapping.insert(var.clone(), i);
}
Ok(Self::Solutions {
variables: variables
.into_iter()
.map(|v| {
Variable::new(v).map_err(|e| {
SyntaxError::msg(format!(
"Invalid variable name: {}",
e
))
})
})
.collect::<Result<Vec<_>, _>>()?,
solutions: JsonSolutionsReader {
reader,
buffer,
mapping,
},
})
} else {
Err(SyntaxError::msg(
"SPARQL tuple query results should contain a head key",
)
.into())
};
}
"boolean" => {
return if let JsonEvent::Boolean(v) = reader.read_event(&mut buffer)? {
Ok(Self::Boolean(v))
} else {
Err(SyntaxError::msg("Unexpected boolean value").into())
}
}
_ => {
return Err(SyntaxError::msg(format!(
"Expecting head or result key, found {}",
key
))
.into());
}
},
JsonEvent::EndObject => {
return Err(SyntaxError::msg(
"SPARQL results should contain a bindings key or a boolean key",
)
.into())
}
JsonEvent::Eof => {
return Err(SyntaxError::msg(
"Unexpected end of JSON object without 'results' or 'boolean' key",
)
.into())
}
_ => return Err(SyntaxError::msg("Invalid SPARQL results serialization").into()),
}
}
}
}
pub struct JsonSolutionsReader<R: BufRead> {
reader: JsonReader<R>,
buffer: Vec<u8>,
mapping: BTreeMap<String, usize>,
}
impl<R: BufRead> JsonSolutionsReader<R> {
pub fn read_next(&mut self) -> Result<Option<Vec<Option<Term>>>, ParserError> {
let mut new_bindings = vec![None; self.mapping.len()];
loop {
match self.reader.read_event(&mut self.buffer)? {
JsonEvent::StartObject => (),
JsonEvent::EndObject => return Ok(Some(new_bindings)),
JsonEvent::EndArray | JsonEvent::Eof => return Ok(None),
JsonEvent::ObjectKey(key) => {
let k = *self.mapping.get(key).ok_or_else(|| {
SyntaxError::msg(format!(
"The variable {} has not been defined in the header",
key
))
})?;
new_bindings[k] = Some(self.read_value()?)
}
_ => return Err(SyntaxError::msg("Invalid result serialization").into()),
}
}
}
fn read_value(&mut self) -> Result<Term, ParserError> {
enum Type {
Uri,
BNode,
Literal,
Triple,
}
#[derive(Eq, PartialEq)]
enum State {
Type,
Value,
Lang,
Datatype,
}
let mut state = None;
let mut t = None;
let mut value = None;
let mut lang = None;
let mut datatype = None;
let mut subject = None;
let mut predicate = None;
let mut object = None;
if self.reader.read_event(&mut self.buffer)? != JsonEvent::StartObject {
return Err(SyntaxError::msg("Term serializations should be an object").into());
}
loop {
match self.reader.read_event(&mut self.buffer)? {
JsonEvent::ObjectKey(key) => match key {
"type" => state = Some(State::Type),
"value" => state = Some(State::Value),
"xml:lang" => state = Some(State::Lang),
"datatype" => state = Some(State::Datatype),
"subject" => subject = Some(self.read_value()?),
"predicate" => predicate = Some(self.read_value()?),
"object" => object = Some(self.read_value()?),
_ => {
return Err(SyntaxError::msg(format!(
"Unexpected key in term serialization: '{}'",
key
))
.into())
}
},
JsonEvent::StartObject => {
if state != Some(State::Value) {
return Err(SyntaxError::msg(
"Unexpected nested object in term serialization",
)
.into());
}
}
JsonEvent::String(s) => match state {
Some(State::Type) => {
match s {
"uri" => t = Some(Type::Uri),
"bnode" => t = Some(Type::BNode),
"literal" => t = Some(Type::Literal),
"triple" => t = Some(Type::Triple),
_ => {
return Err(SyntaxError::msg(format!(
"Unexpected term type: '{}'",
s
))
.into())
}
};
state = None;
}
Some(State::Value) => {
value = Some(s.to_owned());
state = None;
}
Some(State::Lang) => {
lang = Some(s.to_owned());
state = None;
}
Some(State::Datatype) => {
datatype = Some(NamedNode::new(s).map_err(|e| {
SyntaxError::msg(format!("Invalid datatype IRI: {}", e))
})?);
state = None;
}
_ => (), // impossible
},
JsonEvent::EndObject => {
if let Some(s) = state {
if s == State::Value {
state = None; //End of triple
} else {
return Err(SyntaxError::msg(
"Term description values should be string",
)
.into());
}
} else {
return match t {
None => Err(SyntaxError::msg(
"Term serialization should have a 'type' key",
)
.into()),
Some(Type::Uri) => Ok(NamedNode::new(value.ok_or_else(|| {
SyntaxError::msg("uri serialization should have a 'value' key")
})?)
.map_err(|e| SyntaxError::msg(format!("Invalid uri value: {}", e)))?
.into()),
Some(Type::BNode) => Ok(BlankNode::new(value.ok_or_else(|| {
SyntaxError::msg("bnode serialization should have a 'value' key")
})?)
.map_err(|e| SyntaxError::msg(format!("Invalid bnode value: {}", e)))?
.into()),
Some(Type::Literal) => {
let value = value.ok_or_else(|| {
SyntaxError::msg(
"literal serialization should have a 'value' key",
)
})?;
Ok(match lang {
Some(lang) => {
if let Some(datatype) = datatype {
if datatype.as_ref() != rdf::LANG_STRING {
return Err(SyntaxError::msg(format!(
"xml:lang value '{}' provided with the datatype {}",
lang, datatype
)).into())
}
}
Literal::new_language_tagged_literal(value, &lang).map_err(|e| {
SyntaxError::msg(format!("Invalid xml:lang value '{}': {}", lang, e))
})?
}
None => if let Some(datatype) = datatype {
Literal::new_typed_literal(value, datatype)
} else {
Literal::new_simple_literal(value)
}
}
.into())
}
Some(Type::Triple) => Ok(Triple::new(
match subject.ok_or_else(|| {
SyntaxError::msg(
"triple serialization should have a 'subject' key",
)
})? {
Term::NamedNode(subject) => subject.into(),
Term::BlankNode(subject) => subject.into(),
Term::Triple(subject) => Subject::Triple(subject),
Term::Literal(_) => {
return Err(SyntaxError::msg(
"The 'subject' value should not be a literal",
)
.into())
}
},
match predicate.ok_or_else(|| {
SyntaxError::msg(
"triple serialization should have a 'predicate' key",
)
})? {
Term::NamedNode(predicate) => predicate,
_ => {
return Err(SyntaxError::msg(
"The 'predicate' value should be a uri",
)
.into())
}
},
object.ok_or_else(|| {
SyntaxError::msg(
"triple serialization should have a 'object' key",
)
})?,
)
.into()),
};
}
}
_ => return Err(SyntaxError::msg("Invalid term serialization").into()),
}
}
}
}
fn read_head<R: BufRead>(
reader: &mut JsonReader<R>,
buffer: &mut Vec<u8>,
) -> Result<Vec<String>, ParserError> {
if reader.read_event(buffer)? != JsonEvent::StartObject {
return Err(SyntaxError::msg("head should be an object").into());
}
let mut variables = None;
loop {
match reader.read_event(buffer)? {
JsonEvent::ObjectKey(key) => match key {
"vars" => variables = Some(read_string_array(reader, buffer)?),
"link" => {
read_string_array(reader, buffer)?;
}
_ => {
return Err(
SyntaxError::msg(format!("Unexpected key in head: '{}'", key)).into(),
)
}
},
JsonEvent::EndObject => return Ok(variables.unwrap_or_else(Vec::new)),
_ => return Err(SyntaxError::msg("Invalid head serialization").into()),
}
}
}
fn read_string_array<R: BufRead>(
reader: &mut JsonReader<R>,
buffer: &mut Vec<u8>,
) -> Result<Vec<String>, ParserError> {
if reader.read_event(buffer)? != JsonEvent::StartArray {
return Err(SyntaxError::msg("Variable list should be an array").into());
}
let mut elements = Vec::new();
loop {
match reader.read_event(buffer)? {
JsonEvent::String(s) => {
elements.push(s.into());
}
JsonEvent::EndArray => return Ok(elements),
_ => return Err(SyntaxError::msg("Variable names should be strings").into()),
}
}
}
struct ResultsIterator<R: BufRead> {
reader: JsonReader<R>,
buffer: Vec<u8>,
mapping: BTreeMap<String, usize>,
}
impl<R: BufRead> Iterator for ResultsIterator<R> {
type Item = Result<Vec<Option<Term>>, EvaluationError>;
fn next(&mut self) -> Option<Result<Vec<Option<Term>>, EvaluationError>> {
self.read_next().map_err(EvaluationError::from).transpose()
}
}
impl<R: BufRead> ResultsIterator<R> {
fn read_next(&mut self) -> Result<Option<Vec<Option<Term>>>, ParserError> {
let mut new_bindings = vec![None; self.mapping.len()];
loop {
match self.reader.read_event(&mut self.buffer)? {
JsonEvent::StartObject => (),
JsonEvent::EndObject => return Ok(Some(new_bindings)),
JsonEvent::EndArray | JsonEvent::Eof => return Ok(None),
JsonEvent::ObjectKey(key) => {
let k = *self.mapping.get(key).ok_or_else(|| {
SyntaxError::msg(format!(
"The variable {} has not been defined in the header",
key
))
})?;
new_bindings[k] = Some(self.read_value()?)
}
_ => return Err(SyntaxError::msg("Invalid result serialization").into()),
}
}
}
fn read_value(&mut self) -> Result<Term, ParserError> {
enum Type {
Uri,
BNode,
Literal,
Triple,
}
#[derive(Eq, PartialEq)]
enum State {
Type,
Value,
Lang,
Datatype,
}
let mut state = None;
let mut t = None;
let mut value = None;
let mut lang = None;
let mut datatype = None;
let mut subject = None;
let mut predicate = None;
let mut object = None;
if self.reader.read_event(&mut self.buffer)? != JsonEvent::StartObject {
return Err(SyntaxError::msg("Term serializations should be an object").into());
}
loop {
match self.reader.read_event(&mut self.buffer)? {
JsonEvent::ObjectKey(key) => match key {
"type" => state = Some(State::Type),
"value" => state = Some(State::Value),
"xml:lang" => state = Some(State::Lang),
"datatype" => state = Some(State::Datatype),
"subject" => subject = Some(self.read_value()?),
"predicate" => predicate = Some(self.read_value()?),
"object" => object = Some(self.read_value()?),
_ => {
return Err(SyntaxError::msg(format!(
"Unexpected key in term serialization: '{}'",
key
))
.into())
}
},
JsonEvent::StartObject => {
if state != Some(State::Value) {
return Err(SyntaxError::msg(
"Unexpected nested object in term serialization",
)
.into());
}
}
JsonEvent::String(s) => match state {
Some(State::Type) => {
match s {
"uri" => t = Some(Type::Uri),
"bnode" => t = Some(Type::BNode),
"literal" => t = Some(Type::Literal),
"triple" => t = Some(Type::Triple),
_ => {
return Err(SyntaxError::msg(format!(
"Unexpected term type: '{}'",
s
))
.into())
}
};
state = None;
}
Some(State::Value) => {
value = Some(s.to_owned());
state = None;
}
Some(State::Lang) => {
lang = Some(s.to_owned());
state = None;
}
Some(State::Datatype) => {
datatype = Some(NamedNode::new(s).map_err(|e| {
SyntaxError::msg(format!("Invalid datatype value: {}", e))
})?);
state = None;
}
_ => (), // impossible
},
JsonEvent::EndObject => {
if let Some(s) = state {
if s == State::Value {
state = None; //End of triple
} else {
return Err(SyntaxError::msg(
"Term description values should be string",
)
.into());
}
} else {
return match t {
None => Err(SyntaxError::msg(
"Term serialization should have a 'type' key",
)
.into()),
Some(Type::Uri) => Ok(NamedNode::new(value.ok_or_else(|| {
SyntaxError::msg("uri serialization should have a 'value' key")
})?)
.map_err(|e| SyntaxError::msg(format!("Invalid uri value: {}", e)))?
.into()),
Some(Type::BNode) => Ok(BlankNode::new(value.ok_or_else(|| {
SyntaxError::msg("bnode serialization should have a 'value' key")
})?)
.map_err(|e| SyntaxError::msg(format!("Invalid bnode value: {}", e)))?
.into()),
Some(Type::Literal) => {
let value = value.ok_or_else(|| {
SyntaxError::msg(
"literal serialization should have a 'value' key",
)
})?;
Ok(match lang {
Some(lang) => {
if let Some(datatype) = datatype {
if datatype.as_ref() != rdf::LANG_STRING {
return Err(SyntaxError::msg(format!(
"xml:lang value '{}' provided with the datatype {}",
lang, datatype
)).into())
}
}
Literal::new_language_tagged_literal(value, &lang).map_err(|e| {
SyntaxError::msg(format!("Invalid xml:lang value '{}': {}", lang, e))
})?
}
None => if let Some(datatype) = datatype {
Literal::new_typed_literal(value, datatype)
} else {
Literal::new_simple_literal(value)
}
}
.into())
}
Some(Type::Triple) => Ok(Triple::new(
match subject.ok_or_else(|| {
SyntaxError::msg(
"triple serialization should have a 'subject' key",
)
})? {
Term::NamedNode(subject) => subject.into(),
Term::BlankNode(subject) => subject.into(),
Term::Triple(subject) => Subject::Triple(subject),
Term::Literal(_) => {
return Err(SyntaxError::msg(
"The 'subject' value should not be a literal",
)
.into())
}
},
match predicate.ok_or_else(|| {
SyntaxError::msg(
"triple serialization should have a 'predicate' key",
)
})? {
Term::NamedNode(predicate) => predicate,
_ => {
return Err(SyntaxError::msg(
"The 'predicate' value should be a uri",
)
.into())
}
},
object.ok_or_else(|| {
SyntaxError::msg(
"triple serialization should have a 'object' key",
)
})?,
)
.into()),
};
}
}
_ => return Err(SyntaxError::msg("Invalid term serialization").into()),
}
}
}
}

@ -0,0 +1,337 @@
mod csv;
mod json;
mod xml;
use crate::io::read::{ParserError, SyntaxError};
use crate::model::{Term, TermRef};
use crate::sparql::io::csv::*;
use crate::sparql::io::json::*;
use crate::sparql::io::xml::*;
use crate::sparql::{EvaluationError, QueryResults, QuerySolution, QuerySolutionIter, Variable};
use std::io::{self, BufRead, Write};
use std::rc::Rc;
/// [SPARQL query](https://www.w3.org/TR/sparql11-query/) results serialization formats.
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
#[non_exhaustive]
pub enum QueryResultsFormat {
/// [SPARQL Query Results XML Format](http://www.w3.org/TR/rdf-sparql-XMLres/)
Xml,
/// [SPARQL Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/)
Json,
/// [SPARQL Query Results CSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/)
Csv,
/// [SPARQL Query Results TSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/)
Tsv,
}
impl QueryResultsFormat {
/// The format canonical IRI according to the [Unique URIs for file formats registry](https://www.w3.org/ns/formats/).
///
/// ```
/// use oxigraph::sparql::QueryResultsFormat;
///
/// assert_eq!(QueryResultsFormat::Json.iri(), "http://www.w3.org/ns/formats/SPARQL_Results_JSON")
/// ```
#[inline]
pub fn iri(self) -> &'static str {
match self {
QueryResultsFormat::Xml => "http://www.w3.org/ns/formats/SPARQL_Results_XML",
QueryResultsFormat::Json => "http://www.w3.org/ns/formats/SPARQL_Results_JSON",
QueryResultsFormat::Csv => "http://www.w3.org/ns/formats/SPARQL_Results_CSV",
QueryResultsFormat::Tsv => "http://www.w3.org/ns/formats/SPARQL_Results_TSV",
}
}
/// The format [IANA media type](https://tools.ietf.org/html/rfc2046).
///
/// ```
/// use oxigraph::sparql::QueryResultsFormat;
///
/// assert_eq!(QueryResultsFormat::Json.media_type(), "application/sparql-results+json")
/// ```
#[inline]
pub fn media_type(self) -> &'static str {
match self {
QueryResultsFormat::Xml => "application/sparql-results+xml",
QueryResultsFormat::Json => "application/sparql-results+json",
QueryResultsFormat::Csv => "text/csv; charset=utf-8",
QueryResultsFormat::Tsv => "text/tab-separated-values; charset=utf-8",
}
}
/// The format [IANA-registered](https://tools.ietf.org/html/rfc2046) file extension.
///
/// ```
/// use oxigraph::sparql::QueryResultsFormat;
///
/// assert_eq!(QueryResultsFormat::Json.file_extension(), "srj")
/// ```
#[inline]
pub fn file_extension(self) -> &'static str {
match self {
QueryResultsFormat::Xml => "srx",
QueryResultsFormat::Json => "srj",
QueryResultsFormat::Csv => "csv",
QueryResultsFormat::Tsv => "tsv",
}
}
/// Looks for a known format from a media type.
///
/// It supports some media type aliases.
/// For example "application/xml" is going to return `Xml` even if it is not its canonical media type.
///
/// Example:
/// ```
/// use oxigraph::sparql::QueryResultsFormat;
///
/// assert_eq!(QueryResultsFormat::from_media_type("application/sparql-results+json; charset=utf-8"), Some(QueryResultsFormat::Json))
/// ```
pub fn from_media_type(media_type: &str) -> Option<Self> {
match media_type.split(';').next()?.trim() {
"application/sparql-results+xml" | "application/xml" | "text/xml" => Some(Self::Xml),
"application/sparql-results+json" | "application/json" | "text/json" => {
Some(Self::Json)
}
"text/csv" => Some(Self::Csv),
"text/tab-separated-values" | "text/tsv" => Some(Self::Tsv),
_ => None,
}
}
/// Looks for a known format from an extension.
///
/// It supports some aliases.
///
/// Example:
/// ```
/// use oxigraph::sparql::QueryResultsFormat;
///
/// assert_eq!(QueryResultsFormat::from_extension("json"), Some(QueryResultsFormat::Json))
/// ```
pub fn from_extension(extension: &str) -> Option<Self> {
match extension {
"srx" | "xml" => Some(Self::Xml),
"srj" | "json" => Some(Self::Json),
"csv" | "txt" => Some(Self::Csv),
"tsv" => Some(Self::Tsv),
_ => None,
}
}
}
/// Parsers for [SPARQL query](https://www.w3.org/TR/sparql11-query/) results serialization formats.
///
/// It currently supports the following formats:
/// * [SPARQL Query Results XML Format](http://www.w3.org/TR/rdf-sparql-XMLres/) ([`QueryResultsFormat::Xml`](QueryResultsFormat::Xml))
/// * [SPARQL Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) ([`QueryResultsFormat::Json`](QueryResultsFormat::Json))
/// * [SPARQL Query Results TSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/) ([`QueryResultsFormat::Tsv`](QueryResultsFormat::Tsv))
#[allow(missing_copy_implementations)]
pub struct QueryResultsParser {
format: QueryResultsFormat,
}
impl QueryResultsParser {
/// Builds a parser for the given format.
pub fn from_format(format: QueryResultsFormat) -> Self {
Self { format }
}
pub fn read_results<R: BufRead>(
&self,
reader: R,
) -> Result<QueryResultsReader<R>, ParserError> {
Ok(match self.format {
QueryResultsFormat::Xml => match XmlQueryResultsReader::read(reader)? {
XmlQueryResultsReader::Boolean(r) => QueryResultsReader::Boolean(r),
XmlQueryResultsReader::Solutions {
solutions,
variables,
} => QueryResultsReader::Solutions(SolutionsReader {
variables: Rc::new(variables),
solutions: SolutionsReaderKind::Xml(solutions),
}),
},
QueryResultsFormat::Json => match JsonQueryResultsReader::read(reader)? {
JsonQueryResultsReader::Boolean(r) => QueryResultsReader::Boolean(r),
JsonQueryResultsReader::Solutions {
solutions,
variables,
} => QueryResultsReader::Solutions(SolutionsReader {
variables: Rc::new(variables),
solutions: SolutionsReaderKind::Json(solutions),
}),
},
QueryResultsFormat::Csv => return Err(SyntaxError::msg("CSV SPARQL results syntax is lossy and can't be parsed to a proper RDF representation").into()),
QueryResultsFormat::Tsv => match TsvQueryResultsReader::read(reader)? {
TsvQueryResultsReader::Boolean(r) => QueryResultsReader::Boolean(r),
TsvQueryResultsReader::Solutions {
solutions,
variables,
} => QueryResultsReader::Solutions(SolutionsReader {
variables: Rc::new(variables),
solutions: SolutionsReaderKind::Tsv(solutions),
}),
},
})
}
}
pub enum QueryResultsReader<R: BufRead> {
Solutions(SolutionsReader<R>),
Boolean(bool),
}
pub struct SolutionsReader<R: BufRead> {
variables: Rc<Vec<Variable>>,
solutions: SolutionsReaderKind<R>,
}
enum SolutionsReaderKind<R: BufRead> {
Xml(XmlSolutionsReader<R>),
Json(JsonSolutionsReader<R>),
Tsv(TsvSolutionsReader<R>),
}
impl<R: BufRead> SolutionsReader<R> {
#[inline]
pub fn variables(&self) -> &[Variable] {
&self.variables
}
}
impl<R: BufRead> Iterator for SolutionsReaderKind<R> {
type Item = Result<Vec<Option<Term>>, ParserError>;
fn next(&mut self) -> Option<Result<Vec<Option<Term>>, ParserError>> {
match self {
Self::Xml(reader) => reader.read_next(),
Self::Json(reader) => reader.read_next(),
Self::Tsv(reader) => reader.read_next(),
}
.transpose()
}
}
impl<R: BufRead> Iterator for SolutionsReader<R> {
type Item = Result<QuerySolution, ParserError>;
fn next(&mut self) -> Option<Result<QuerySolution, ParserError>> {
Some(self.solutions.next()?.map(|values| QuerySolution {
values,
variables: self.variables.clone(),
}))
}
}
impl<R: BufRead + 'static> From<SolutionsReader<R>> for QuerySolutionIter {
fn from(reader: SolutionsReader<R>) -> Self {
Self::new(
reader.variables.clone(),
Box::new(reader.solutions.map(|r| r.map_err(EvaluationError::from))),
)
}
}
impl<R: BufRead + 'static> From<QueryResultsReader<R>> for QueryResults {
fn from(reader: QueryResultsReader<R>) -> Self {
match reader {
QueryResultsReader::Solutions(s) => Self::Solutions(s.into()),
QueryResultsReader::Boolean(v) => Self::Boolean(v),
}
}
}
/// A serializer for [SPARQL query](https://www.w3.org/TR/sparql11-query/) results serialization formats.
///
/// It currently supports the following formats:
/// * [SPARQL Query Results XML Format](http://www.w3.org/TR/rdf-sparql-XMLres/) ([`QueryResultsFormat::Xml`](QueryResultsFormat::Xml))
/// * [SPARQL Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) ([`QueryResultsFormat::Json`](QueryResultsFormat::Json))
/// * [SPARQL Query Results CSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/) ([`QueryResultsFormat::Csv`](QueryResultsFormat::Csv))
/// * [SPARQL Query Results TSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/) ([`QueryResultsFormat::Tsv`](QueryResultsFormat::Tsv))
#[allow(missing_copy_implementations)]
pub struct QueryResultsSerializer {
format: QueryResultsFormat,
}
impl QueryResultsSerializer {
/// Builds a serializer for the given format
pub fn from_format(format: QueryResultsFormat) -> Self {
Self { format }
}
pub fn write_boolean_result<W: Write>(&self, writer: W, value: bool) -> io::Result<W> {
match self.format {
QueryResultsFormat::Xml => write_boolean_xml_result(writer, value),
QueryResultsFormat::Json => write_boolean_json_result(writer, value),
QueryResultsFormat::Csv => write_boolean_csv_result(writer, value),
QueryResultsFormat::Tsv => write_boolean_tsv_result(writer, value),
}
}
/// Returns a `SolutionsWriter` allowing writing query solutions into the given [`Write`](std::io::Write) implementation
pub fn solutions_writer<W: Write>(
&self,
writer: W,
variables: &[Variable],
) -> io::Result<SolutionsWriter<W>> {
Ok(SolutionsWriter {
formatter: match self.format {
QueryResultsFormat::Xml => {
SolutionsWriterKind::Xml(XmlSolutionsWriter::start(writer, variables)?)
}
QueryResultsFormat::Json => {
SolutionsWriterKind::Json(JsonSolutionsWriter::start(writer, variables)?)
}
QueryResultsFormat::Csv => {
SolutionsWriterKind::Csv(CsvSolutionsWriter::start(writer, variables)?)
}
QueryResultsFormat::Tsv => {
SolutionsWriterKind::Tsv(TsvSolutionsWriter::start(writer, variables)?)
}
},
})
}
}
/// Allows writing query results.
/// Could be built using a [`QueryResultsSerializer`].
///
/// Warning: Do not forget to run the [`finish`](SolutionsWriter::finish()) method to properly write the last bytes of the file.
#[must_use]
pub struct SolutionsWriter<W: Write> {
formatter: SolutionsWriterKind<W>,
}
enum SolutionsWriterKind<W: Write> {
Xml(XmlSolutionsWriter<W>),
Json(JsonSolutionsWriter<W>),
Csv(CsvSolutionsWriter<W>),
Tsv(TsvSolutionsWriter<W>),
}
impl<W: Write> SolutionsWriter<W> {
/// Writes a solution
pub fn write<'a>(
&mut self,
solution: impl IntoIterator<Item = Option<TermRef<'a>>>,
) -> io::Result<()> {
match &mut self.formatter {
SolutionsWriterKind::Xml(writer) => writer.write(solution),
SolutionsWriterKind::Json(writer) => writer.write(solution),
SolutionsWriterKind::Csv(writer) => writer.write(solution),
SolutionsWriterKind::Tsv(writer) => writer.write(solution),
}
}
/// Writes the last bytes of the file
pub fn finish(self) -> io::Result<()> {
match self.formatter {
SolutionsWriterKind::Xml(write) => write.finish()?,
SolutionsWriterKind::Json(write) => write.finish()?,
SolutionsWriterKind::Csv(write) => write.finish(),
SolutionsWriterKind::Tsv(write) => write.finish(),
};
Ok(())
}
}

@ -0,0 +1,597 @@
//! Implementation of [SPARQL Query Results XML Format](http://www.w3.org/TR/rdf-sparql-XMLres/)
use crate::io::read::{ParserError, SyntaxError};
use crate::model::vocab::rdf;
use crate::model::*;
use crate::sparql::model::Variable;
use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event};
use quick_xml::Reader;
use quick_xml::Writer;
use std::collections::BTreeMap;
use std::io::{self, BufRead, Write};
pub fn write_boolean_xml_result<W: Write>(sink: W, value: bool) -> io::Result<W> {
do_write_boolean_xml_result(sink, value).map_err(map_xml_error)
}
fn do_write_boolean_xml_result<W: Write>(sink: W, value: bool) -> Result<W, quick_xml::Error> {
let mut writer = Writer::new(sink);
writer.write_event(Event::Decl(BytesDecl::new(b"1.0", None, None)))?;
let mut sparql_open = BytesStart::borrowed_name(b"sparql");
sparql_open.push_attribute(("xmlns", "http://www.w3.org/2005/sparql-results#"));
writer.write_event(Event::Start(sparql_open))?;
writer.write_event(Event::Start(BytesStart::borrowed_name(b"head")))?;
writer.write_event(Event::End(BytesEnd::borrowed(b"head")))?;
writer.write_event(Event::Start(BytesStart::borrowed_name(b"boolean")))?;
writer.write_event(Event::Text(BytesText::from_plain_str(if value {
"true"
} else {
"false"
})))?;
writer.write_event(Event::End(BytesEnd::borrowed(b"boolean")))?;
writer.write_event(Event::End(BytesEnd::borrowed(b"sparql")))?;
Ok(writer.into_inner())
}
pub struct XmlSolutionsWriter<W: Write> {
writer: Writer<W>,
variables: Vec<Variable>,
}
impl<W: Write> XmlSolutionsWriter<W> {
pub fn start(sink: W, variables: &[Variable]) -> io::Result<Self> {
Self::do_start(sink, variables).map_err(map_xml_error)
}
fn do_start(sink: W, variables: &[Variable]) -> Result<Self, quick_xml::Error> {
let mut writer = Writer::new(sink);
writer.write_event(Event::Decl(BytesDecl::new(b"1.0", None, None)))?;
let mut sparql_open = BytesStart::borrowed_name(b"sparql");
sparql_open.push_attribute(("xmlns", "http://www.w3.org/2005/sparql-results#"));
writer.write_event(Event::Start(sparql_open))?;
writer.write_event(Event::Start(BytesStart::borrowed_name(b"head")))?;
for variable in variables {
let mut variable_tag = BytesStart::borrowed_name(b"variable");
variable_tag.push_attribute(("name", variable.as_str()));
writer.write_event(Event::Empty(variable_tag))?;
}
writer.write_event(Event::End(BytesEnd::borrowed(b"head")))?;
writer.write_event(Event::Start(BytesStart::borrowed_name(b"results")))?;
Ok(Self {
writer,
variables: variables.to_vec(),
})
}
pub fn write<'a>(
&mut self,
solution: impl IntoIterator<Item = Option<TermRef<'a>>>,
) -> io::Result<()> {
self.do_write(solution).map_err(map_xml_error)
}
fn do_write<'a>(
&mut self,
solution: impl IntoIterator<Item = Option<TermRef<'a>>>,
) -> Result<(), quick_xml::Error> {
self.writer
.write_event(Event::Start(BytesStart::borrowed_name(b"result")))?;
for (value, variable) in solution.into_iter().zip(&self.variables) {
if let Some(value) = value {
let mut binding_tag = BytesStart::borrowed_name(b"binding");
binding_tag.push_attribute(("name", variable.as_str()));
self.writer.write_event(Event::Start(binding_tag))?;
write_xml_term(value, &mut self.writer)?;
self.writer
.write_event(Event::End(BytesEnd::borrowed(b"binding")))?;
}
}
self.writer
.write_event(Event::End(BytesEnd::borrowed(b"result")))
}
pub fn finish(self) -> io::Result<W> {
self.do_finish().map_err(map_xml_error)
}
fn do_finish(mut self) -> Result<W, quick_xml::Error> {
self.writer
.write_event(Event::End(BytesEnd::borrowed(b"results")))?;
self.writer
.write_event(Event::End(BytesEnd::borrowed(b"sparql")))?;
Ok(self.writer.into_inner())
}
}
fn write_xml_term(
term: TermRef<'_>,
writer: &mut Writer<impl Write>,
) -> Result<(), quick_xml::Error> {
match term {
TermRef::NamedNode(uri) => {
writer.write_event(Event::Start(BytesStart::borrowed_name(b"uri")))?;
writer.write_event(Event::Text(BytesText::from_plain_str(uri.as_str())))?;
writer.write_event(Event::End(BytesEnd::borrowed(b"uri")))?;
}
TermRef::BlankNode(bnode) => {
writer.write_event(Event::Start(BytesStart::borrowed_name(b"bnode")))?;
writer.write_event(Event::Text(BytesText::from_plain_str(bnode.as_str())))?;
writer.write_event(Event::End(BytesEnd::borrowed(b"bnode")))?;
}
TermRef::Literal(literal) => {
let mut literal_tag = BytesStart::borrowed_name(b"literal");
if let Some(language) = literal.language() {
literal_tag.push_attribute(("xml:lang", language));
} else if !literal.is_plain() {
literal_tag.push_attribute(("datatype", literal.datatype().as_str()));
}
writer.write_event(Event::Start(literal_tag))?;
writer.write_event(Event::Text(BytesText::from_plain_str(literal.value())))?;
writer.write_event(Event::End(BytesEnd::borrowed(b"literal")))?;
}
TermRef::Triple(triple) => {
writer.write_event(Event::Start(BytesStart::borrowed_name(b"triple")))?;
writer.write_event(Event::Start(BytesStart::borrowed_name(b"subject")))?;
write_xml_term(triple.subject.as_ref().into(), writer)?;
writer.write_event(Event::End(BytesEnd::borrowed(b"subject")))?;
writer.write_event(Event::Start(BytesStart::borrowed_name(b"predicate")))?;
write_xml_term(triple.predicate.as_ref().into(), writer)?;
writer.write_event(Event::End(BytesEnd::borrowed(b"predicate")))?;
writer.write_event(Event::Start(BytesStart::borrowed_name(b"object")))?;
write_xml_term(triple.object.as_ref(), writer)?;
writer.write_event(Event::End(BytesEnd::borrowed(b"object")))?;
writer.write_event(Event::End(BytesEnd::borrowed(b"triple")))?;
}
}
Ok(())
}
pub enum XmlQueryResultsReader<R: BufRead> {
Solutions {
variables: Vec<Variable>,
solutions: XmlSolutionsReader<R>,
},
Boolean(bool),
}
impl<R: BufRead> XmlQueryResultsReader<R> {
pub fn read(source: R) -> Result<Self, ParserError> {
enum State {
Start,
Sparql,
Head,
AfterHead,
Boolean,
}
let mut reader = Reader::from_reader(source);
reader.trim_text(true);
reader.expand_empty_elements(true);
let mut buffer = Vec::default();
let mut namespace_buffer = Vec::default();
let mut variables = Vec::default();
let mut state = State::Start;
//Read header
loop {
let event = {
let (ns, event) =
reader.read_namespaced_event(&mut buffer, &mut namespace_buffer)?;
if let Some(ns) = ns {
if ns != b"http://www.w3.org/2005/sparql-results#".as_ref() {
return Err(SyntaxError::msg(format!(
"Unexpected namespace found in RDF/XML query result: {}",
reader.decode(ns)?
))
.into());
}
}
event
};
match event {
Event::Start(event) => match state {
State::Start => {
if event.name() == b"sparql" {
state = State::Sparql;
} else {
return Err(SyntaxError::msg(format!("Expecting <sparql> tag, found {}", reader.decode(event.name())?)).into());
}
}
State::Sparql => {
if event.name() == b"head" {
state = State::Head;
} else {
return Err(SyntaxError::msg(format!("Expecting <head> tag, found {}", reader.decode(event.name())?)).into());
}
}
State::Head => {
if event.name() == b"variable" {
let name = event.attributes()
.filter_map(std::result::Result::ok)
.find(|attr| attr.key == b"name")
.ok_or_else(|| SyntaxError::msg("No name attribute found for the <variable> tag"))?
.unescape_and_decode_value(&reader)?;
variables.push(Variable::new(name).map_err(|e| SyntaxError::msg(format!("Invalid variable name: {}", e)))?);
} else if event.name() == b"link" {
// no op
} else {
return Err(SyntaxError::msg(format!("Expecting <variable> or <link> tag, found {}", reader.decode(event.name())?)).into());
}
}
State::AfterHead => {
if event.name() == b"boolean" {
state = State::Boolean
} else if event.name() == b"results" {
let mut mapping = BTreeMap::default();
for (i, var) in variables.iter().enumerate() {
mapping.insert(var.as_str().as_bytes().to_vec(), i);
}
return Ok(Self::Solutions { variables,
solutions: XmlSolutionsReader {
reader,
buffer,
namespace_buffer,
mapping,
stack: Vec::new(),
subject_stack: Vec::new(),
predicate_stack: Vec::new(),
object_stack: Vec::new(),
}});
} else if event.name() != b"link" && event.name() != b"results" && event.name() != b"boolean" {
return Err(SyntaxError::msg(format!("Expecting sparql tag, found {}", reader.decode(event.name())?)).into());
}
}
State::Boolean => return Err(SyntaxError::msg(format!("Unexpected tag inside of <boolean> tag: {}", reader.decode(event.name())?)).into())
},
Event::Text(event) => {
let value = event.unescaped()?;
return match state {
State::Boolean => {
return if value.as_ref() == b"true" {
Ok(Self::Boolean(true))
} else if value.as_ref() == b"false" {
Ok(Self::Boolean(false))
} else {
Err(SyntaxError::msg(format!("Unexpected boolean value. Found {}", reader.decode(&value)?)).into())
};
}
_ => Err(SyntaxError::msg(format!("Unexpected textual value found: {}", reader.decode(&value)?)).into())
};
},
Event::End(event) => {
if let State::Head = state {
if event.name() == b"head" {
state = State::AfterHead
}
} else {
return Err(SyntaxError::msg("Unexpected early file end. All results file should have a <head> and a <result> or <boolean> tag").into());
}
},
Event::Eof => return Err(SyntaxError::msg("Unexpected early file end. All results file should have a <head> and a <result> or <boolean> tag").into()),
_ => (),
}
}
}
}
enum State {
Start,
Result,
Binding,
Uri,
BNode,
Literal,
Triple,
Subject,
Predicate,
Object,
End,
}
pub struct XmlSolutionsReader<R: BufRead> {
reader: Reader<R>,
buffer: Vec<u8>,
namespace_buffer: Vec<u8>,
mapping: BTreeMap<Vec<u8>, usize>,
stack: Vec<State>,
subject_stack: Vec<Term>,
predicate_stack: Vec<Term>,
object_stack: Vec<Term>,
}
impl<R: BufRead> XmlSolutionsReader<R> {
pub fn read_next(&mut self) -> Result<Option<Vec<Option<Term>>>, ParserError> {
let mut state = State::Start;
let mut new_bindings = vec![None; self.mapping.len()];
let mut current_var = None;
let mut term: Option<Term> = None;
let mut lang = None;
let mut datatype = None;
loop {
let (ns, event) = self
.reader
.read_namespaced_event(&mut self.buffer, &mut self.namespace_buffer)?;
if let Some(ns) = ns {
if ns != b"http://www.w3.org/2005/sparql-results#".as_ref() {
return Err(SyntaxError::msg(format!(
"Unexpected namespace found in RDF/XML query result: {}",
self.reader.decode(ns)?
))
.into());
}
}
match event {
Event::Start(event) => match state {
State::Start => {
if event.name() == b"result" {
state = State::Result;
} else {
return Err(SyntaxError::msg(format!(
"Expecting <result>, found {}",
self.reader.decode(event.name())?
))
.into());
}
}
State::Result => {
if event.name() == b"binding" {
match event
.attributes()
.filter_map(std::result::Result::ok)
.find(|attr| attr.key == b"name")
{
Some(attr) => current_var = Some(attr.unescaped_value()?.to_vec()),
None => {
return Err(SyntaxError::msg(
"No name attribute found for the <binding> tag",
)
.into());
}
}
state = State::Binding;
} else {
return Err(SyntaxError::msg(format!(
"Expecting <binding>, found {}",
self.reader.decode(event.name())?
))
.into());
}
}
State::Binding | State::Subject | State::Predicate | State::Object => {
if term.is_some() {
return Err(SyntaxError::msg(
"There is already a value for the current binding",
)
.into());
}
self.stack.push(state);
if event.name() == b"uri" {
state = State::Uri;
} else if event.name() == b"bnode" {
state = State::BNode;
} else if event.name() == b"literal" {
for attr in event.attributes().flatten() {
if attr.key == b"xml:lang" {
lang = Some(attr.unescape_and_decode_value(&self.reader)?);
} else if attr.key == b"datatype" {
let iri = attr.unescape_and_decode_value(&self.reader)?;
datatype = Some(NamedNode::new(&iri).map_err(|e| {
SyntaxError::msg(format!(
"Invalid datatype IRI '{}': {}",
iri, e
))
})?);
}
}
state = State::Literal;
} else if event.name() == b"triple" {
state = State::Triple;
} else {
return Err(SyntaxError::msg(format!(
"Expecting <uri>, <bnode> or <literal> found {}",
self.reader.decode(event.name())?
))
.into());
}
}
State::Triple => {
if event.name() == b"subject" {
state = State::Subject
} else if event.name() == b"predicate" {
state = State::Predicate
} else if event.name() == b"object" {
state = State::Object
} else {
return Err(SyntaxError::msg(format!(
"Expecting <subject>, <predicate> or <object> found {}",
self.reader.decode(event.name())?
))
.into());
}
}
_ => (),
},
Event::Text(event) => {
let data = event.unescaped()?;
match state {
State::Uri => {
let iri = self.reader.decode(&data)?;
term = Some(
NamedNode::new(iri)
.map_err(|e| {
SyntaxError::msg(format!(
"Invalid IRI value '{}': {}",
iri, e
))
})?
.into(),
)
}
State::BNode => {
let bnode = self.reader.decode(&data)?;
term = Some(
BlankNode::new(bnode)
.map_err(|e| {
SyntaxError::msg(format!(
"Invalid blank node value '{}': {}",
bnode, e
))
})?
.into(),
)
}
State::Literal => {
term = Some(
build_literal(
self.reader.decode(&data)?,
lang.take(),
datatype.take(),
)?
.into(),
);
}
_ => {
return Err(SyntaxError::msg(format!(
"Unexpected textual value found: {}",
self.reader.decode(&data)?
))
.into());
}
}
}
Event::End(_) => match state {
State::Start => state = State::End,
State::Result => return Ok(Some(new_bindings)),
State::Binding => {
if let Some(var) = &current_var {
if let Some(var) = self.mapping.get(var) {
new_bindings[*var] = term.take()
} else {
return Err(
SyntaxError::msg(format!("The variable '{}' is used in a binding but not declared in the variables list", self.reader.decode(var)?)).into()
);
}
} else {
return Err(SyntaxError::msg("No name found for <binding> tag").into());
}
state = State::Result;
}
State::Subject => {
if let Some(subject) = term.take() {
self.subject_stack.push(subject)
}
state = State::Triple;
}
State::Predicate => {
if let Some(predicate) = term.take() {
self.predicate_stack.push(predicate)
}
state = State::Triple;
}
State::Object => {
if let Some(object) = term.take() {
self.object_stack.push(object)
}
state = State::Triple;
}
State::Uri => state = self.stack.pop().unwrap(),
State::BNode => {
if term.is_none() {
//We default to a random bnode
term = Some(BlankNode::default().into())
}
state = self.stack.pop().unwrap()
}
State::Literal => {
if term.is_none() {
//We default to the empty literal
term = Some(build_literal("", lang.take(), datatype.take())?.into())
}
state = self.stack.pop().unwrap();
}
State::Triple => {
if let (Some(subject), Some(predicate), Some(object)) = (
self.subject_stack.pop(),
self.predicate_stack.pop(),
self.object_stack.pop(),
) {
term = Some(
Triple::new(
match subject {
Term::NamedNode(subject) => subject.into(),
Term::BlankNode(subject) => subject.into(),
Term::Triple(subject) => Subject::Triple(subject),
Term::Literal(_) => {
return Err(SyntaxError::msg(
"The <subject> value should not be a <literal>",
)
.into())
}
},
match predicate {
Term::NamedNode(predicate) => predicate,
_ => {
return Err(SyntaxError::msg(
"The <predicate> value should be an <uri>",
)
.into())
}
},
object,
)
.into(),
);
state = self.stack.pop().unwrap();
} else {
return Err(
SyntaxError::msg("A <triple> should contain a <subject>, a <predicate> and an <object>").into()
);
}
}
State::End => (),
},
Event::Eof => return Ok(None),
_ => (),
}
}
}
}
fn build_literal(
value: impl Into<String>,
lang: Option<String>,
datatype: Option<NamedNode>,
) -> Result<Literal, ParserError> {
match lang {
Some(lang) => {
if let Some(datatype) = datatype {
if datatype.as_ref() != rdf::LANG_STRING {
return Err(SyntaxError::msg(format!(
"xml:lang value '{}' provided with the datatype {}",
lang, datatype
))
.into());
}
}
Literal::new_language_tagged_literal(value, &lang).map_err(|e| {
SyntaxError::msg(format!("Invalid xml:lang value '{}': {}", lang, e)).into()
})
}
None => Ok(if let Some(datatype) = datatype {
Literal::new_typed_literal(value, datatype)
} else {
Literal::new_simple_literal(value)
}),
}
}
fn map_xml_error(error: quick_xml::Error) -> io::Error {
match error {
quick_xml::Error::Io(error) => error,
quick_xml::Error::UnexpectedEof(_) => io::Error::new(io::ErrorKind::UnexpectedEof, error),
_ => io::Error::new(io::ErrorKind::InvalidData, error),
}
}

@ -1,455 +0,0 @@
//! Implementation of [SPARQL Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/)
use crate::error::{invalid_data_error, invalid_input_error};
use crate::model::vocab::rdf;
use crate::model::*;
use crate::sparql::error::EvaluationError;
use crate::sparql::model::*;
use json_event_parser::{JsonEvent, JsonReader, JsonWriter};
use std::collections::BTreeMap;
use std::io;
use std::io::{BufRead, Write};
use std::rc::Rc;
pub fn write_json_results(results: QueryResults, sink: impl Write) -> Result<(), EvaluationError> {
let mut writer = JsonWriter::from_writer(sink);
match results {
QueryResults::Boolean(value) => {
writer.write_event(JsonEvent::StartObject)?;
writer.write_event(JsonEvent::ObjectKey("head"))?;
writer.write_event(JsonEvent::StartObject)?;
writer.write_event(JsonEvent::EndObject)?;
writer.write_event(JsonEvent::ObjectKey("boolean"))?;
writer.write_event(JsonEvent::Boolean(value))?;
writer.write_event(JsonEvent::EndObject)?;
Ok(())
}
QueryResults::Solutions(solutions) => {
writer.write_event(JsonEvent::StartObject)?;
writer.write_event(JsonEvent::ObjectKey("head"))?;
writer.write_event(JsonEvent::StartObject)?;
writer.write_event(JsonEvent::ObjectKey("vars"))?;
writer.write_event(JsonEvent::StartArray)?;
for variable in solutions.variables() {
writer.write_event(JsonEvent::String(variable.as_str()))?;
}
writer.write_event(JsonEvent::EndArray)?;
writer.write_event(JsonEvent::EndObject)?;
writer.write_event(JsonEvent::ObjectKey("results"))?;
writer.write_event(JsonEvent::StartObject)?;
writer.write_event(JsonEvent::ObjectKey("bindings"))?;
writer.write_event(JsonEvent::StartArray)?;
for solution in solutions {
writer.write_event(JsonEvent::StartObject)?;
let solution = solution?;
for (variable, value) in solution.iter() {
writer.write_event(JsonEvent::ObjectKey(variable.as_str()))?;
write_json_term(value.as_ref(), &mut writer)?;
}
writer.write_event(JsonEvent::EndObject)?;
}
writer.write_event(JsonEvent::EndArray)?;
writer.write_event(JsonEvent::EndObject)?;
writer.write_event(JsonEvent::EndObject)?;
Ok(())
}
QueryResults::Graph(_) => Err(invalid_input_error(
"Graphs could not be formatted to SPARQL query results XML format",
)
.into()),
}
}
fn write_json_term(
term: TermRef<'_>,
writer: &mut JsonWriter<impl Write>,
) -> Result<(), EvaluationError> {
match term {
TermRef::NamedNode(uri) => {
writer.write_event(JsonEvent::StartObject)?;
writer.write_event(JsonEvent::ObjectKey("type"))?;
writer.write_event(JsonEvent::String("uri"))?;
writer.write_event(JsonEvent::ObjectKey("value"))?;
writer.write_event(JsonEvent::String(uri.as_str()))?;
writer.write_event(JsonEvent::EndObject)?;
}
TermRef::BlankNode(bnode) => {
writer.write_event(JsonEvent::StartObject)?;
writer.write_event(JsonEvent::ObjectKey("type"))?;
writer.write_event(JsonEvent::String("bnode"))?;
writer.write_event(JsonEvent::ObjectKey("value"))?;
writer.write_event(JsonEvent::String(bnode.as_str()))?;
writer.write_event(JsonEvent::EndObject)?;
}
TermRef::Literal(literal) => {
writer.write_event(JsonEvent::StartObject)?;
writer.write_event(JsonEvent::ObjectKey("type"))?;
writer.write_event(JsonEvent::String("literal"))?;
writer.write_event(JsonEvent::ObjectKey("value"))?;
writer.write_event(JsonEvent::String(literal.value()))?;
if let Some(language) = literal.language() {
writer.write_event(JsonEvent::ObjectKey("xml:lang"))?;
writer.write_event(JsonEvent::String(language))?;
} else if !literal.is_plain() {
writer.write_event(JsonEvent::ObjectKey("datatype"))?;
writer.write_event(JsonEvent::String(literal.datatype().as_str()))?;
}
writer.write_event(JsonEvent::EndObject)?;
}
TermRef::Triple(triple) => {
writer.write_event(JsonEvent::StartObject)?;
writer.write_event(JsonEvent::ObjectKey("type"))?;
writer.write_event(JsonEvent::String("triple"))?;
writer.write_event(JsonEvent::ObjectKey("value"))?;
writer.write_event(JsonEvent::StartObject)?;
writer.write_event(JsonEvent::ObjectKey("subject"))?;
write_json_term(triple.subject.as_ref().into(), writer)?;
writer.write_event(JsonEvent::ObjectKey("predicate"))?;
write_json_term(triple.predicate.as_ref().into(), writer)?;
writer.write_event(JsonEvent::ObjectKey("object"))?;
write_json_term(triple.object.as_ref(), writer)?;
writer.write_event(JsonEvent::EndObject)?;
writer.write_event(JsonEvent::EndObject)?;
}
}
Ok(())
}
pub fn read_json_results(source: impl BufRead + 'static) -> io::Result<QueryResults> {
let mut reader = JsonReader::from_reader(source);
let mut buffer = Vec::default();
let mut variables = None;
if reader.read_event(&mut buffer)? != JsonEvent::StartObject {
return Err(invalid_data_error(
"SPARQL JSON results should be an object",
));
}
loop {
let event = reader.read_event(&mut buffer)?;
match event {
JsonEvent::ObjectKey(key) => match key {
"head" => variables = Some(read_head(&mut reader, &mut buffer)?),
"results" => {
if reader.read_event(&mut buffer)? != JsonEvent::StartObject {
return Err(invalid_data_error("'results' should be an object"));
}
if reader.read_event(&mut buffer)? != JsonEvent::ObjectKey("bindings") {
return Err(invalid_data_error(
"'results' should contain a 'bindings' key",
));
}
if reader.read_event(&mut buffer)? != JsonEvent::StartArray {
return Err(invalid_data_error("'bindings' should be an object"));
}
return if let Some(variables) = variables {
let mut mapping = BTreeMap::default();
for (i, var) in variables.iter().enumerate() {
mapping.insert(var.clone(), i);
}
Ok(QueryResults::Solutions(QuerySolutionIter::new(
Rc::new(
variables
.into_iter()
.map(Variable::new)
.collect::<Result<Vec<_>, _>>()
.map_err(invalid_data_error)?,
),
Box::new(ResultsIterator {
reader,
buffer,
mapping,
}),
)))
} else {
Err(invalid_data_error(
"SPARQL tuple query results should contain a head key",
))
};
}
"boolean" => {
return if let JsonEvent::Boolean(v) = reader.read_event(&mut buffer)? {
Ok(QueryResults::Boolean(v))
} else {
Err(invalid_data_error("Unexpected boolean value"))
}
}
_ => {
return Err(invalid_data_error(format!(
"Expecting head or result key, found {}",
key
)));
}
},
JsonEvent::EndObject => {
return Err(invalid_data_error(
"SPARQL results should contain a bindings key or a boolean key",
))
}
JsonEvent::Eof => return Err(io::Error::from(io::ErrorKind::UnexpectedEof)),
_ => return Err(invalid_data_error("Invalid SPARQL results serialization")),
}
}
}
fn read_head<R: BufRead>(
reader: &mut JsonReader<R>,
buffer: &mut Vec<u8>,
) -> io::Result<Vec<String>> {
if reader.read_event(buffer)? != JsonEvent::StartObject {
return Err(invalid_data_error("head should be an object"));
}
let mut variables = None;
loop {
match reader.read_event(buffer)? {
JsonEvent::ObjectKey(key) => match key {
"vars" => variables = Some(read_string_array(reader, buffer)?),
"link" => {
read_string_array(reader, buffer)?;
}
_ => {
return Err(invalid_data_error(format!(
"Unexpected key in head: '{}'",
key
)))
}
},
JsonEvent::EndObject => return Ok(variables.unwrap_or_else(Vec::new)),
_ => return Err(invalid_data_error("Invalid head serialization")),
}
}
}
fn read_string_array<R: BufRead>(
reader: &mut JsonReader<R>,
buffer: &mut Vec<u8>,
) -> io::Result<Vec<String>> {
if reader.read_event(buffer)? != JsonEvent::StartArray {
return Err(invalid_data_error("Variable list should be an array"));
}
let mut elements = Vec::new();
loop {
match reader.read_event(buffer)? {
JsonEvent::String(s) => {
elements.push(s.into());
}
JsonEvent::EndArray => return Ok(elements),
_ => return Err(invalid_data_error("Variable names should be strings")),
}
}
}
struct ResultsIterator<R: BufRead> {
reader: JsonReader<R>,
buffer: Vec<u8>,
mapping: BTreeMap<String, usize>,
}
impl<R: BufRead> Iterator for ResultsIterator<R> {
type Item = Result<Vec<Option<Term>>, EvaluationError>;
fn next(&mut self) -> Option<Result<Vec<Option<Term>>, EvaluationError>> {
self.read_next().map_err(EvaluationError::from).transpose()
}
}
impl<R: BufRead> ResultsIterator<R> {
fn read_next(&mut self) -> io::Result<Option<Vec<Option<Term>>>> {
let mut new_bindings = vec![None; self.mapping.len()];
loop {
match self.reader.read_event(&mut self.buffer)? {
JsonEvent::StartObject => (),
JsonEvent::EndObject => return Ok(Some(new_bindings)),
JsonEvent::EndArray | JsonEvent::Eof => return Ok(None),
JsonEvent::ObjectKey(key) => {
let k = *self.mapping.get(key).ok_or_else(|| {
invalid_data_error(format!(
"The variable {} has not been defined in the header",
key
))
})?;
new_bindings[k] = Some(self.read_value()?)
}
_ => return Err(invalid_data_error("Invalid result serialization")),
}
}
}
fn read_value(&mut self) -> io::Result<Term> {
enum Type {
Uri,
BNode,
Literal,
Triple,
}
#[derive(Eq, PartialEq)]
enum State {
Type,
Value,
Lang,
Datatype,
}
let mut state = None;
let mut t = None;
let mut value = None;
let mut lang = None;
let mut datatype = None;
let mut subject = None;
let mut predicate = None;
let mut object = None;
if self.reader.read_event(&mut self.buffer)? != JsonEvent::StartObject {
return Err(invalid_data_error(
"Term serializations should be an object",
));
}
loop {
match self.reader.read_event(&mut self.buffer)? {
JsonEvent::ObjectKey(key) => match key {
"type" => state = Some(State::Type),
"value" => state = Some(State::Value),
"xml:lang" => state = Some(State::Lang),
"datatype" => state = Some(State::Datatype),
"subject" => subject = Some(self.read_value()?),
"predicate" => predicate = Some(self.read_value()?),
"object" => object = Some(self.read_value()?),
_ => {
return Err(invalid_data_error(format!(
"Unexpected key in term serialization: '{}'",
key
)))
}
},
JsonEvent::StartObject => {
if state != Some(State::Value) {
return Err(invalid_data_error(
"Unexpected nested object in term serialization",
));
}
}
JsonEvent::String(s) => match state {
Some(State::Type) => {
match s {
"uri" => t = Some(Type::Uri),
"bnode" => t = Some(Type::BNode),
"literal" => t = Some(Type::Literal),
"triple" => t = Some(Type::Triple),
_ => {
return Err(invalid_data_error(format!(
"Unexpected term type: '{}'",
s
)))
}
};
state = None;
}
Some(State::Value) => {
value = Some(s.to_owned());
state = None;
}
Some(State::Lang) => {
lang = Some(s.to_owned());
state = None;
}
Some(State::Datatype) => {
datatype = Some(NamedNode::new(s).map_err(|e| {
invalid_data_error(format!("Invalid datatype value: {}", e))
})?);
state = None;
}
_ => (), // impossible
},
JsonEvent::EndObject => {
if let Some(s) = state {
if s == State::Value {
state = None; //End of triple
} else {
return Err(invalid_data_error(
"Term description values should be string",
));
}
} else {
return match t {
None => Err(invalid_data_error(
"Term serialization should have a 'type' key",
)),
Some(Type::Uri) => Ok(NamedNode::new(value.ok_or_else(|| {
invalid_data_error("uri serialization should have a 'value' key")
})?)
.map_err(|e| invalid_data_error(format!("Invalid uri value: {}", e)))?
.into()),
Some(Type::BNode) => Ok(BlankNode::new(value.ok_or_else(|| {
invalid_data_error("bnode serialization should have a 'value' key")
})?)
.map_err(|e| invalid_data_error(format!("Invalid bnode value: {}", e)))?
.into()),
Some(Type::Literal) => {
let value = value.ok_or_else(|| {
invalid_data_error(
"literal serialization should have a 'value' key",
)
})?;
Ok(match lang {
Some(lang) => {
if let Some(datatype) = datatype {
if datatype.as_ref() != rdf::LANG_STRING {
return Err(invalid_data_error(format!(
"xml:lang value '{}' provided with the datatype {}",
lang, datatype
)))
}
}
Literal::new_language_tagged_literal(value, &lang).map_err(|e| {
invalid_data_error(format!("Invalid xml:lang value '{}': {}", lang, e))
})?
}
None => if let Some(datatype) = datatype {
Literal::new_typed_literal(value, datatype)
} else {
Literal::new_simple_literal(value)
}
}
.into())
}
Some(Type::Triple) => Ok(Triple::new(
match subject.ok_or_else(|| {
invalid_data_error(
"triple serialization should have a 'subject' key",
)
})? {
Term::NamedNode(subject) => subject.into(),
Term::BlankNode(subject) => subject.into(),
Term::Triple(subject) => Subject::Triple(subject),
Term::Literal(_) => {
return Err(invalid_data_error(
"The 'subject' value should not be a literal",
))
}
},
match predicate.ok_or_else(|| {
invalid_data_error(
"triple serialization should have a 'predicate' key",
)
})? {
Term::NamedNode(predicate) => predicate,
_ => {
return Err(invalid_data_error(
"The 'predicate' value should be a uri",
))
}
},
object.ok_or_else(|| {
invalid_data_error(
"triple serialization should have a 'object' key",
)
})?,
)
.into()),
};
}
}
_ => return Err(invalid_data_error("Invalid term serialization")),
}
}
}
}

@ -3,30 +3,27 @@
//! Stores execute SPARQL. See [`Store`](crate::store::Store::query()) for an example.
mod algebra;
mod csv_results;
mod dataset;
mod error;
mod eval;
mod http;
mod json_results;
pub mod io;
mod model;
mod plan;
mod plan_builder;
mod service;
mod update;
mod xml_results;
use crate::model::{NamedNode, Term};
pub use crate::sparql::algebra::{Query, Update};
use crate::sparql::dataset::DatasetView;
pub use crate::sparql::error::EvaluationError;
use crate::sparql::eval::SimpleEvaluator;
pub use crate::sparql::model::QueryResults;
pub use crate::sparql::model::QueryResultsFormat;
pub use crate::sparql::model::QuerySolution;
pub use crate::sparql::model::QuerySolutionIter;
pub use crate::sparql::model::QueryTripleIter;
pub use crate::sparql::model::{Variable, VariableNameParseError};
pub use crate::sparql::io::QueryResultsFormat;
pub use crate::sparql::model::{
QueryResults, QuerySolution, QuerySolutionIter, QueryTripleIter, Variable,
VariableNameParseError,
};
use crate::sparql::plan_builder::PlanBuilder;
pub use crate::sparql::service::ServiceHandler;
use crate::sparql::service::{EmptyServiceHandler, ErrorConversionServiceHandler};

@ -2,10 +2,8 @@ use crate::error::invalid_input_error;
use crate::io::GraphFormat;
use crate::io::GraphSerializer;
use crate::model::*;
use crate::sparql::csv_results::{read_tsv_results, write_csv_results, write_tsv_results};
use crate::sparql::error::EvaluationError;
use crate::sparql::json_results::{read_json_results, write_json_results};
use crate::sparql::xml_results::{read_xml_results, write_xml_results};
use crate::sparql::io::{QueryResultsFormat, QueryResultsParser, QueryResultsSerializer};
use std::error::Error;
use std::io::{BufRead, Write};
use std::rc::Rc;
@ -24,14 +22,9 @@ pub enum QueryResults {
impl QueryResults {
/// Reads a SPARQL query results serialization.
pub fn read(reader: impl BufRead + 'static, format: QueryResultsFormat) -> io::Result<Self> {
match format {
QueryResultsFormat::Xml => read_xml_results(reader),
QueryResultsFormat::Json => read_json_results(reader),
QueryResultsFormat::Csv => Err(invalid_input_error(
"CSV SPARQL results format parsing is not implemented",
)),
QueryResultsFormat::Tsv => read_tsv_results(reader),
}
Ok(QueryResultsParser::from_format(format)
.read_results(reader)?
.into())
}
/// Writes the query results (solutions or boolean).
@ -57,12 +50,44 @@ impl QueryResults {
writer: impl Write,
format: QueryResultsFormat,
) -> Result<(), EvaluationError> {
match format {
QueryResultsFormat::Xml => write_xml_results(self, writer),
QueryResultsFormat::Json => write_json_results(self, writer),
QueryResultsFormat::Csv => write_csv_results(self, writer),
QueryResultsFormat::Tsv => write_tsv_results(self, writer),
let serializer = QueryResultsSerializer::from_format(format);
match self {
Self::Boolean(value) => {
serializer.write_boolean_result(writer, value)?;
}
QueryResults::Solutions(solutions) => {
let mut writer = serializer.solutions_writer(writer, solutions.variables())?;
for solution in solutions {
writer.write(
solution?
.values
.iter()
.map(|t| t.as_ref().map(|t| t.as_ref())),
)?;
}
writer.finish()?;
}
QueryResults::Graph(triples) => {
let mut writer = serializer.solutions_writer(
writer,
&[
Variable::new_unchecked("subject"),
Variable::new_unchecked("predicate"),
Variable::new_unchecked("object"),
],
)?;
for triple in triples {
let triple = triple?;
writer.write([
Some(triple.subject.as_ref().into()),
Some(triple.predicate.as_ref().into()),
Some(triple.object.as_ref()),
])?;
}
writer.finish()?;
}
}
Ok(())
}
/// Writes the graph query results.
@ -113,115 +138,6 @@ impl From<QuerySolutionIter> for QueryResults {
}
}
/// [SPARQL query](https://www.w3.org/TR/sparql11-query/) results serialization formats.
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
#[non_exhaustive]
pub enum QueryResultsFormat {
/// [SPARQL Query Results XML Format](http://www.w3.org/TR/rdf-sparql-XMLres/)
Xml,
/// [SPARQL Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/)
Json,
/// [SPARQL Query Results CSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/)
Csv,
/// [SPARQL Query Results TSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/)
Tsv,
}
impl QueryResultsFormat {
/// The format canonical IRI according to the [Unique URIs for file formats registry](https://www.w3.org/ns/formats/).
///
/// ```
/// use oxigraph::sparql::QueryResultsFormat;
///
/// assert_eq!(QueryResultsFormat::Json.iri(), "http://www.w3.org/ns/formats/SPARQL_Results_JSON")
/// ```
#[inline]
pub fn iri(self) -> &'static str {
match self {
QueryResultsFormat::Xml => "http://www.w3.org/ns/formats/SPARQL_Results_XML",
QueryResultsFormat::Json => "http://www.w3.org/ns/formats/SPARQL_Results_JSON",
QueryResultsFormat::Csv => "http://www.w3.org/ns/formats/SPARQL_Results_CSV",
QueryResultsFormat::Tsv => "http://www.w3.org/ns/formats/SPARQL_Results_TSV",
}
}
/// The format [IANA media type](https://tools.ietf.org/html/rfc2046).
///
/// ```
/// use oxigraph::sparql::QueryResultsFormat;
///
/// assert_eq!(QueryResultsFormat::Json.media_type(), "application/sparql-results+json")
/// ```
#[inline]
pub fn media_type(self) -> &'static str {
match self {
QueryResultsFormat::Xml => "application/sparql-results+xml",
QueryResultsFormat::Json => "application/sparql-results+json",
QueryResultsFormat::Csv => "text/csv; charset=utf-8",
QueryResultsFormat::Tsv => "text/tab-separated-values; charset=utf-8",
}
}
/// The format [IANA-registered](https://tools.ietf.org/html/rfc2046) file extension.
///
/// ```
/// use oxigraph::sparql::QueryResultsFormat;
///
/// assert_eq!(QueryResultsFormat::Json.file_extension(), "srj")
/// ```
#[inline]
pub fn file_extension(self) -> &'static str {
match self {
QueryResultsFormat::Xml => "srx",
QueryResultsFormat::Json => "srj",
QueryResultsFormat::Csv => "csv",
QueryResultsFormat::Tsv => "tsv",
}
}
/// Looks for a known format from a media type.
///
/// It supports some media type aliases.
/// For example "application/xml" is going to return `Xml` even if it is not its canonical media type.
///
/// Example:
/// ```
/// use oxigraph::sparql::QueryResultsFormat;
///
/// assert_eq!(QueryResultsFormat::from_media_type("application/sparql-results+json; charset=utf-8"), Some(QueryResultsFormat::Json))
/// ```
pub fn from_media_type(media_type: &str) -> Option<Self> {
match media_type.split(';').next()?.trim() {
"application/sparql-results+xml" | "application/xml" | "text/xml" => Some(Self::Xml),
"application/sparql-results+json" | "application/json" | "text/json" => {
Some(Self::Json)
}
"text/csv" => Some(Self::Csv),
"text/tab-separated-values" | "text/tsv" => Some(Self::Tsv),
_ => None,
}
}
/// Looks for a known format from an extension.
///
/// It supports some aliases.
///
/// Example:
/// ```
/// use oxigraph::sparql::QueryResultsFormat;
///
/// assert_eq!(QueryResultsFormat::from_extension("json"), Some(QueryResultsFormat::Json))
/// ```
pub fn from_extension(extension: &str) -> Option<Self> {
match extension {
"srx" | "xml" => Some(Self::Xml),
"srj" | "json" => Some(Self::Json),
"csv" | "txt" => Some(Self::Csv),
"tsv" => Some(Self::Tsv),
_ => None,
}
}
}
/// An iterator over [`QuerySolution`]s.
///
/// ```
@ -288,8 +204,8 @@ impl Iterator for QuerySolutionIter {
///
/// It is the equivalent of a row in SQL.
pub struct QuerySolution {
values: Vec<Option<Term>>,
variables: Rc<Vec<Variable>>,
pub(super) values: Vec<Option<Term>>,
pub(super) variables: Rc<Vec<Variable>>,
}
impl QuerySolution {

@ -1,674 +0,0 @@
//! Implementation of [SPARQL Query Results XML Format](http://www.w3.org/TR/rdf-sparql-XMLres/)
use crate::error::{invalid_data_error, invalid_input_error};
use crate::model::vocab::rdf;
use crate::model::*;
use crate::sparql::error::EvaluationError;
use crate::sparql::model::*;
use quick_xml::events::BytesDecl;
use quick_xml::events::BytesEnd;
use quick_xml::events::BytesStart;
use quick_xml::events::BytesText;
use quick_xml::events::Event;
use quick_xml::Reader;
use quick_xml::Writer;
use std::collections::BTreeMap;
use std::io;
use std::io::BufRead;
use std::io::Write;
use std::iter::empty;
use std::rc::Rc;
pub fn write_xml_results(results: QueryResults, sink: impl Write) -> Result<(), EvaluationError> {
match results {
QueryResults::Boolean(value) => {
write_boolean(value, sink).map_err(map_xml_error)?;
Ok(())
}
QueryResults::Solutions(solutions) => write_solutions(solutions, sink),
QueryResults::Graph(_) => Err(invalid_input_error(
"Graphs could not be formatted to SPARQL query results XML format",
)
.into()),
}
}
fn write_boolean(value: bool, sink: impl Write) -> Result<(), quick_xml::Error> {
let mut writer = Writer::new(sink);
writer.write_event(Event::Decl(BytesDecl::new(b"1.0", None, None)))?;
let mut sparql_open = BytesStart::borrowed_name(b"sparql");
sparql_open.push_attribute(("xmlns", "http://www.w3.org/2005/sparql-results#"));
writer.write_event(Event::Start(sparql_open))?;
writer.write_event(Event::Start(BytesStart::borrowed_name(b"head")))?;
writer.write_event(Event::End(BytesEnd::borrowed(b"head")))?;
writer.write_event(Event::Start(BytesStart::borrowed_name(b"boolean")))?;
writer.write_event(Event::Text(BytesText::from_plain_str(if value {
"true"
} else {
"false"
})))?;
writer.write_event(Event::End(BytesEnd::borrowed(b"boolean")))?;
writer.write_event(Event::End(BytesEnd::borrowed(b"sparql")))?;
Ok(())
}
fn write_solutions(solutions: QuerySolutionIter, sink: impl Write) -> Result<(), EvaluationError> {
let mut writer = Writer::new(sink);
writer
.write_event(Event::Decl(BytesDecl::new(b"1.0", None, None)))
.map_err(map_xml_error)?;
let mut sparql_open = BytesStart::borrowed_name(b"sparql");
sparql_open.push_attribute(("xmlns", "http://www.w3.org/2005/sparql-results#"));
writer
.write_event(Event::Start(sparql_open))
.map_err(map_xml_error)?;
writer
.write_event(Event::Start(BytesStart::borrowed_name(b"head")))
.map_err(map_xml_error)?;
for variable in solutions.variables() {
let mut variable_tag = BytesStart::borrowed_name(b"variable");
variable_tag.push_attribute(("name", variable.as_str()));
writer
.write_event(Event::Empty(variable_tag))
.map_err(map_xml_error)?;
}
writer
.write_event(Event::End(BytesEnd::borrowed(b"head")))
.map_err(map_xml_error)?;
writer
.write_event(Event::Start(BytesStart::borrowed_name(b"results")))
.map_err(map_xml_error)?;
for solution in solutions {
let solution = solution?;
writer
.write_event(Event::Start(BytesStart::borrowed_name(b"result")))
.map_err(map_xml_error)?;
for (variable, value) in solution.iter() {
let mut binding_tag = BytesStart::borrowed_name(b"binding");
binding_tag.push_attribute(("name", variable.as_str()));
writer
.write_event(Event::Start(binding_tag))
.map_err(map_xml_error)?;
write_xml_term(value.as_ref(), &mut writer)?;
writer
.write_event(Event::End(BytesEnd::borrowed(b"binding")))
.map_err(map_xml_error)?;
}
writer
.write_event(Event::End(BytesEnd::borrowed(b"result")))
.map_err(map_xml_error)?;
}
writer
.write_event(Event::End(BytesEnd::borrowed(b"results")))
.map_err(map_xml_error)?;
writer
.write_event(Event::End(BytesEnd::borrowed(b"sparql")))
.map_err(map_xml_error)?;
Ok(())
}
fn write_xml_term(
term: TermRef<'_>,
writer: &mut Writer<impl Write>,
) -> Result<(), EvaluationError> {
match term {
TermRef::NamedNode(uri) => {
writer
.write_event(Event::Start(BytesStart::borrowed_name(b"uri")))
.map_err(map_xml_error)?;
writer
.write_event(Event::Text(BytesText::from_plain_str(uri.as_str())))
.map_err(map_xml_error)?;
writer
.write_event(Event::End(BytesEnd::borrowed(b"uri")))
.map_err(map_xml_error)?;
}
TermRef::BlankNode(bnode) => {
writer
.write_event(Event::Start(BytesStart::borrowed_name(b"bnode")))
.map_err(map_xml_error)?;
writer
.write_event(Event::Text(BytesText::from_plain_str(bnode.as_str())))
.map_err(map_xml_error)?;
writer
.write_event(Event::End(BytesEnd::borrowed(b"bnode")))
.map_err(map_xml_error)?;
}
TermRef::Literal(literal) => {
let mut literal_tag = BytesStart::borrowed_name(b"literal");
if let Some(language) = literal.language() {
literal_tag.push_attribute(("xml:lang", language));
} else if !literal.is_plain() {
literal_tag.push_attribute(("datatype", literal.datatype().as_str()));
}
writer
.write_event(Event::Start(literal_tag))
.map_err(map_xml_error)?;
writer
.write_event(Event::Text(BytesText::from_plain_str(literal.value())))
.map_err(map_xml_error)?;
writer
.write_event(Event::End(BytesEnd::borrowed(b"literal")))
.map_err(map_xml_error)?;
}
TermRef::Triple(triple) => {
writer
.write_event(Event::Start(BytesStart::borrowed_name(b"triple")))
.map_err(map_xml_error)?;
writer
.write_event(Event::Start(BytesStart::borrowed_name(b"subject")))
.map_err(map_xml_error)?;
write_xml_term(triple.subject.as_ref().into(), writer)?;
writer
.write_event(Event::End(BytesEnd::borrowed(b"subject")))
.map_err(map_xml_error)?;
writer
.write_event(Event::Start(BytesStart::borrowed_name(b"predicate")))
.map_err(map_xml_error)?;
write_xml_term(triple.predicate.as_ref().into(), writer)?;
writer
.write_event(Event::End(BytesEnd::borrowed(b"predicate")))
.map_err(map_xml_error)?;
writer
.write_event(Event::Start(BytesStart::borrowed_name(b"object")))
.map_err(map_xml_error)?;
write_xml_term(triple.object.as_ref(), writer)?;
writer
.write_event(Event::End(BytesEnd::borrowed(b"object")))
.map_err(map_xml_error)?;
writer
.write_event(Event::End(BytesEnd::borrowed(b"triple")))
.map_err(map_xml_error)?;
}
}
Ok(())
}
pub fn read_xml_results(source: impl BufRead + 'static) -> io::Result<QueryResults> {
enum State {
Start,
Sparql,
Head,
AfterHead,
Boolean,
}
let mut reader = Reader::from_reader(source);
reader.trim_text(true);
let mut buffer = Vec::default();
let mut namespace_buffer = Vec::default();
let mut variables: Vec<String> = Vec::default();
let mut state = State::Start;
//Read header
loop {
let event = {
let (ns, event) = reader
.read_namespaced_event(&mut buffer, &mut namespace_buffer)
.map_err(map_xml_error)?;
if let Some(ns) = ns {
if ns != b"http://www.w3.org/2005/sparql-results#".as_ref() {
return Err(invalid_data_error(format!(
"Unexpected namespace found in RDF/XML query result: {}",
reader.decode(ns).map_err(map_xml_error)?
)));
}
}
event
};
match event {
Event::Start(event) => match state {
State::Start => {
if event.name() == b"sparql" {
state = State::Sparql;
} else {
return Err(invalid_data_error(format!("Expecting <sparql> tag, found {}", reader.decode(event.name()).map_err(map_xml_error)?)));
}
}
State::Sparql => {
if event.name() == b"head" {
state = State::Head;
} else {
return Err(invalid_data_error(format!("Expecting <head> tag, found {}", reader.decode(event.name()).map_err(map_xml_error)?)));
}
}
State::Head => {
if event.name() == b"variable" {
let name = event.attributes()
.filter_map(std::result::Result::ok)
.find(|attr| attr.key == b"name")
.ok_or_else(|| invalid_data_error("No name attribute found for the <variable> tag"))?;
variables.push(name.unescape_and_decode_value(&reader).map_err(map_xml_error)?);
} else if event.name() == b"link" {
// no op
} else {
return Err(invalid_data_error(format!("Expecting <variable> or <link> tag, found {}", reader.decode(event.name()).map_err(map_xml_error)?)));
}
}
State::AfterHead => {
if event.name() == b"boolean" {
state = State::Boolean
} else if event.name() == b"results" {
let mut mapping = BTreeMap::default();
for (i,var) in variables.iter().enumerate() {
mapping.insert(var.as_bytes().to_vec(), i);
}
return Ok(QueryResults::Solutions(QuerySolutionIter::new(
Rc::new(variables.into_iter().map(Variable::new).collect::<Result<Vec<_>,_>>().map_err(invalid_data_error)?),
Box::new(ResultsIterator {
reader,
buffer,
namespace_buffer,
mapping,
stack: Vec::new(),
subject_stack: Vec::new(),
predicate_stack: Vec::new(),
object_stack:Vec::new(),
}),
)));
} else if event.name() != b"link" && event.name() != b"results" && event.name() != b"boolean" {
return Err(invalid_data_error(format!("Expecting sparql tag, found {}", reader.decode(event.name()).map_err(map_xml_error)?)));
}
}
State::Boolean => return Err(invalid_data_error(format!("Unexpected tag inside of <boolean> tag: {}", reader.decode(event.name()).map_err(map_xml_error)?)))
},
Event::Empty(event) => match state {
State::Sparql => {
if event.name() == b"head" {
state = State::AfterHead;
} else {
return Err(invalid_data_error(format!("Expecting <head> tag, found {}", reader.decode(event.name()).map_err(map_xml_error)?)));
}
}
State::Head => {
if event.name() == b"variable" {
let name = event.attributes()
.filter_map(std::result::Result::ok)
.find(|attr| attr.key == b"name")
.ok_or_else(|| invalid_data_error("No name attribute found for the <variable> tag"))?;
variables.push(name.unescape_and_decode_value(&reader).map_err(map_xml_error)?);
} else if event.name() == b"link" {
// no op
} else {
return Err(invalid_data_error(format!("Expecting <variable> or <link> tag, found {}", reader.decode(event.name()).map_err(map_xml_error)?)));
}
},
State::AfterHead => {
return if event.name() == b"results" {
Ok(QueryResults::Solutions(QuerySolutionIter::new(
Rc::new(variables.into_iter().map(Variable::new).collect::<Result<Vec<_>,_>>().map_err(invalid_data_error)?),
Box::new(empty()),
)))
} else {
Err(invalid_data_error(format!("Unexpected autoclosing tag <{}>", reader.decode(event.name()).map_err(map_xml_error)?)))
}
}
_ => return Err(invalid_data_error(format!("Unexpected autoclosing tag <{}>", reader.decode(event.name()).map_err(map_xml_error)?)))
},
Event::Text(event) => {
let value = event.unescaped().map_err(map_xml_error)?;
return match state {
State::Boolean => {
return if value.as_ref() == b"true" {
Ok(QueryResults::Boolean(true))
} else if value.as_ref() == b"false" {
Ok(QueryResults::Boolean(false))
} else {
Err(invalid_data_error(format!("Unexpected boolean value. Found {}", reader.decode(&value).map_err(map_xml_error)?)))
};
}
_ => Err(invalid_data_error(format!("Unexpected textual value found: {}", reader.decode(&value).map_err(map_xml_error)?)))
};
},
Event::End(_) => if let State::Head = state {
state = State::AfterHead;
} else {
return Err(invalid_data_error("Unexpected early file end. All results file should have a <head> and a <result> or <boolean> tag"));
},
Event::Eof => return Err(invalid_data_error("Unexpected early file end. All results file should have a <head> and a <result> or <boolean> tag")),
_ => (),
}
}
}
enum State {
Start,
Result,
Binding,
Uri,
BNode,
Literal,
Triple,
Subject,
Predicate,
Object,
End,
}
struct ResultsIterator<R: BufRead> {
reader: Reader<R>,
buffer: Vec<u8>,
namespace_buffer: Vec<u8>,
mapping: BTreeMap<Vec<u8>, usize>,
stack: Vec<State>,
subject_stack: Vec<Term>,
predicate_stack: Vec<Term>,
object_stack: Vec<Term>,
}
impl<R: BufRead> Iterator for ResultsIterator<R> {
type Item = Result<Vec<Option<Term>>, EvaluationError>;
fn next(&mut self) -> Option<Result<Vec<Option<Term>>, EvaluationError>> {
self.read_next().transpose()
}
}
impl<R: BufRead> ResultsIterator<R> {
fn read_next(&mut self) -> Result<Option<Vec<Option<Term>>>, EvaluationError> {
let mut state = State::Start;
let mut new_bindings = vec![None; self.mapping.len()];
let mut current_var = None;
let mut term: Option<Term> = None;
let mut lang = None;
let mut datatype = None;
loop {
let (ns, event) = self
.reader
.read_namespaced_event(&mut self.buffer, &mut self.namespace_buffer)
.map_err(map_xml_error)?;
if let Some(ns) = ns {
if ns != b"http://www.w3.org/2005/sparql-results#".as_ref() {
return Err(invalid_data_error(format!(
"Unexpected namespace found in RDF/XML query result: {}",
self.reader.decode(ns).map_err(map_xml_error)?
))
.into());
}
}
match event {
Event::Start(event) => match state {
State::Start => {
if event.name() == b"result" {
state = State::Result;
} else {
return Err(invalid_data_error(format!(
"Expecting <result>, found {}",
self.reader.decode(event.name()).map_err(map_xml_error)?
))
.into());
}
}
State::Result => {
if event.name() == b"binding" {
match event
.attributes()
.filter_map(std::result::Result::ok)
.find(|attr| attr.key == b"name")
{
Some(attr) => {
current_var = Some(
attr.unescaped_value().map_err(map_xml_error)?.to_vec(),
)
}
None => {
return Err(invalid_data_error(
"No name attribute found for the <binding> tag",
)
.into());
}
}
state = State::Binding;
} else {
return Err(invalid_data_error(format!(
"Expecting <binding>, found {}",
self.reader.decode(event.name()).map_err(map_xml_error)?
))
.into());
}
}
State::Binding | State::Subject | State::Predicate | State::Object => {
if term.is_some() {
return Err(invalid_data_error(
"There is already a value for the current binding",
)
.into());
}
self.stack.push(state);
if event.name() == b"uri" {
state = State::Uri;
} else if event.name() == b"bnode" {
state = State::BNode;
} else if event.name() == b"literal" {
for attr in event.attributes().flatten() {
if attr.key == b"xml:lang" {
lang = Some(
attr.unescape_and_decode_value(&self.reader)
.map_err(map_xml_error)?,
);
} else if attr.key == b"datatype" {
let iri = attr
.unescape_and_decode_value(&self.reader)
.map_err(map_xml_error)?;
datatype = Some(NamedNode::new(&iri).map_err(|e| {
invalid_data_error(format!(
"Invalid datatype IRI '{}': {}",
iri, e
))
})?);
}
}
state = State::Literal;
} else if event.name() == b"triple" {
state = State::Triple;
} else {
return Err(invalid_data_error(format!(
"Expecting <uri>, <bnode> or <literal> found {}",
self.reader.decode(event.name()).map_err(map_xml_error)?
))
.into());
}
}
State::Triple => {
if event.name() == b"subject" {
state = State::Subject
} else if event.name() == b"predicate" {
state = State::Predicate
} else if event.name() == b"object" {
state = State::Object
} else {
return Err(invalid_data_error(format!(
"Expecting <subject>, <predicate> or <object> found {}",
self.reader.decode(event.name()).map_err(map_xml_error)?
))
.into());
}
}
_ => (),
},
Event::Text(event) => {
let data = event.unescaped().map_err(map_xml_error)?;
match state {
State::Uri => {
let iri = self.reader.decode(&data).map_err(map_xml_error)?;
term = Some(
NamedNode::new(iri)
.map_err(|e| {
invalid_data_error(format!(
"Invalid IRI value '{}': {}",
iri, e
))
})?
.into(),
)
}
State::BNode => {
let bnode = self.reader.decode(&data).map_err(map_xml_error)?;
term = Some(
BlankNode::new(bnode)
.map_err(|e| {
invalid_data_error(format!(
"Invalid blank node value '{}': {}",
bnode, e
))
})?
.into(),
)
}
State::Literal => {
term = Some(
build_literal(
self.reader.decode(&data).map_err(map_xml_error)?,
lang.take(),
datatype.take(),
)?
.into(),
);
}
_ => {
return Err(invalid_data_error(format!(
"Unexpected textual value found: {}",
self.reader.decode(&data).map_err(map_xml_error)?
))
.into());
}
}
}
Event::End(_) => match state {
State::Start => state = State::End,
State::Result => return Ok(Some(new_bindings)),
State::Binding => {
if let Some(var) = &current_var {
if let Some(var) = self.mapping.get(var) {
new_bindings[*var] = term.take()
} else {
return Err(
invalid_data_error(format!("The variable '{}' is used in a binding but not declared in the variables list", self.reader.decode(var).map_err(map_xml_error)?)).into()
);
}
} else {
return Err(
invalid_data_error("No name found for <binding> tag").into()
);
}
state = State::Result;
}
State::Subject => {
if let Some(subject) = term.take() {
self.subject_stack.push(subject)
}
state = State::Triple;
}
State::Predicate => {
if let Some(predicate) = term.take() {
self.predicate_stack.push(predicate)
}
state = State::Triple;
}
State::Object => {
if let Some(object) = term.take() {
self.object_stack.push(object)
}
state = State::Triple;
}
State::Uri => state = self.stack.pop().unwrap(),
State::BNode => {
if term.is_none() {
//We default to a random bnode
term = Some(BlankNode::default().into())
}
state = self.stack.pop().unwrap()
}
State::Literal => {
if term.is_none() {
//We default to the empty literal
term = Some(build_literal("", lang.take(), datatype.take())?.into())
}
state = self.stack.pop().unwrap();
}
State::Triple => {
if let (Some(subject), Some(predicate), Some(object)) = (
self.subject_stack.pop(),
self.predicate_stack.pop(),
self.object_stack.pop(),
) {
term = Some(
Triple::new(
match subject {
Term::NamedNode(subject) => subject.into(),
Term::BlankNode(subject) => subject.into(),
Term::Triple(subject) => Subject::Triple(subject),
Term::Literal(_) => {
return Err(invalid_data_error(
"The <subject> value should not be a <literal>",
)
.into())
}
},
match predicate {
Term::NamedNode(predicate) => predicate,
_ => {
return Err(invalid_data_error(
"The <predicate> value should be an <uri>",
)
.into())
}
},
object,
)
.into(),
);
state = self.stack.pop().unwrap();
} else {
return Err(
invalid_data_error("A <triple> should contain a <subject>, a <predicate> and an <object>").into()
);
}
}
State::End => (),
},
Event::Eof => return Ok(None),
_ => (),
}
}
}
}
fn build_literal(
value: impl Into<String>,
lang: Option<String>,
datatype: Option<NamedNode>,
) -> Result<Literal, EvaluationError> {
match lang {
Some(lang) => {
if let Some(datatype) = datatype {
if datatype.as_ref() != rdf::LANG_STRING {
return Err(invalid_data_error(format!(
"xml:lang value '{}' provided with the datatype {}",
lang, datatype
))
.into());
}
}
Literal::new_language_tagged_literal(value, &lang).map_err(|e| {
invalid_data_error(format!("Invalid xml:lang value '{}': {}", lang, e)).into()
})
}
None => Ok(if let Some(datatype) = datatype {
Literal::new_typed_literal(value, datatype)
} else {
Literal::new_simple_literal(value)
}),
}
}
fn map_xml_error(error: quick_xml::Error) -> io::Error {
match error {
quick_xml::Error::Io(error) => error,
quick_xml::Error::UnexpectedEof(_) => io::Error::new(io::ErrorKind::UnexpectedEof, error),
_ => invalid_data_error(error),
}
}
Loading…
Cancel
Save