diff --git a/README.md b/README.md index 3dbc43c0..4da0ee77 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ It is split into multiple parts: Oxigraph implements the following specifications: * [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/), [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/), and [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/). * [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/), and [RDF XML](https://www.w3.org/TR/rdf-syntax-grammar/) RDF serialization formats for both data ingestion and retrieval using the [Rio library](https://github.com/oxigraph/rio). -* [SPARQL Query Results XML Format](http://www.w3.org/TR/rdf-sparql-XMLres/) and [SPARQL Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/). +* [SPARQL Query Results XML Format](http://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/). A preliminary benchmark [is provided](bench/README.md). diff --git a/lib/src/sparql/csv_results.rs b/lib/src/sparql/csv_results.rs new file mode 100644 index 00000000..a34adc71 --- /dev/null +++ b/lib/src/sparql/csv_results.rs @@ -0,0 +1,221 @@ +//! Implementation of [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/) + +use crate::error::invalid_input_error; +use crate::model::{vocab::xsd, *}; +use crate::sparql::error::EvaluationError; +use crate::sparql::model::*; +use std::io::{self, Write}; + +pub fn write_csv_results( + results: QueryResults, + mut sink: impl Write, +) -> Result<(), EvaluationError> { + match results { + QueryResults::Boolean(_) => Err(invalid_input_error( + "boolean could not be formatted to SPARQL query results CSV format", + ) + .into()), + QueryResults::Solutions(solutions) => { + let mut start_vars = true; + for variable in solutions.variables() { + if start_vars { + start_vars = false; + } else { + sink.write_all(b",")?; + } + sink.write_all(variable.as_str().as_bytes())?; + } + + let size = solutions.variables().len(); + for solution in solutions { + let solution = solution?; + sink.write_all(b"\r\n")?; + let mut start_binding = true; + for i in 0..size { + if start_binding { + start_binding = false; + } else { + sink.write_all(b",")?; + } + if let Some(value) = solution.get(i) { + match value { + Term::NamedNode(uri) => { + sink.write_all(uri.as_str().as_bytes())?; + } + Term::BlankNode(bnode) => { + sink.write_all(b"_:")?; + sink.write_all(bnode.as_str().as_bytes())?; + } + Term::Literal(literal) => { + write_escaped_csv_string(literal.value(), &mut sink)?; + } + } + } + } + } + Ok(()) + } + QueryResults::Graph(_) => Err(invalid_input_error( + "Graphs could not be formatted to SPARQL query results CSV format", + ) + .into()), + } +} + +fn write_escaped_csv_string(s: &str, mut sink: impl Write) -> Result<(), io::Error> { + if s.bytes().any(|c| match c { + b'"' | b',' | b'\n' | b'\r' => true, + _ => false, + }) { + sink.write_all(b"\"")?; + for c in s.bytes() { + if c == b'\"' { + sink.write_all(b"\"\"") + } else { + sink.write_all(&[c]) + }?; + } + sink.write_all(b"\"") + } else { + sink.write_all(s.as_bytes()) + } +} + +pub fn write_tsv_results( + results: QueryResults, + mut sink: impl Write, +) -> Result<(), EvaluationError> { + match results { + QueryResults::Boolean(_) => Err(invalid_input_error( + "boolean could not be formatted to SPARQL query results TSV format", + ) + .into()), + QueryResults::Solutions(solutions) => { + let mut start_vars = true; + for variable in solutions.variables() { + if start_vars { + start_vars = false; + } else { + sink.write_all(b"\t")?; + } + sink.write_all(b"?")?; + sink.write_all(variable.as_str().as_bytes())?; + } + + let size = solutions.variables().len(); + for solution in solutions { + let solution = solution?; + sink.write_all(b"\n")?; + let mut start_binding = true; + for i in 0..size { + if start_binding { + start_binding = false; + } else { + sink.write_all(b"\t")?; + } + if let Some(value) = solution.get(i) { + //TODO: full Turtle serialization + sink.write_all( + match value { + Term::NamedNode(node) => node.to_string(), + Term::BlankNode(node) => node.to_string(), + Term::Literal(literal) => match literal.datatype() { + xsd::BOOLEAN => match literal.value() { + "true" | "1" => "true".to_owned(), + "false" | "0" => "false".to_owned(), + _ => literal.to_string(), + }, + xsd::INTEGER => { + if literal.value().bytes().all(|c| match c { + b'0'..=b'9' => true, + _ => false, + }) { + literal.value().to_owned() + } else { + literal.to_string() + } + } + _ => literal.to_string(), + }, + } + .as_bytes(), + )?; + } + } + } + Ok(()) + } + QueryResults::Graph(_) => Err(invalid_input_error( + "Graphs could not be formatted to SPARQL query results TSV format", + ) + .into()), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::rc::Rc; + use std::str; + + fn build_example() -> QueryResults { + QuerySolutionIter::new( + Rc::new(vec![Variable::new("x"), Variable::new("literal")]), + Box::new( + vec![ + Ok(vec![ + Some(NamedNode::new_unchecked("http://example/x").into()), + Some(Literal::new_simple_literal("String").into()), + ]), + Ok(vec![ + Some(NamedNode::new_unchecked("http://example/x").into()), + Some(Literal::new_simple_literal("String-with-dquote\"").into()), + ]), + Ok(vec![ + Some(BlankNode::new_unchecked("b0").into()), + Some(Literal::new_simple_literal("Blank node").into()), + ]), + Ok(vec![ + None, + Some(Literal::new_simple_literal("Missing 'x'").into()), + ]), + Ok(vec![None, None]), + Ok(vec![ + Some(NamedNode::new_unchecked("http://example/x").into()), + None, + ]), + Ok(vec![ + Some(BlankNode::new_unchecked("b1").into()), + Some( + Literal::new_language_tagged_literal_unchecked( + "String-with-lang", + "en", + ) + .into(), + ), + ]), + Ok(vec![ + Some(BlankNode::new_unchecked("b1").into()), + Some(Literal::new_typed_literal("123", xsd::INTEGER).into()), + ]), + ] + .into_iter(), + ), + ) + .into() + } + + #[test] + fn test_csv_serialization() { + let mut sink = Vec::new(); + write_csv_results(build_example(), &mut sink).unwrap(); + assert_eq!(str::from_utf8(&sink).unwrap(), "x,literal\r\nhttp://example/x,String\r\nhttp://example/x,\"String-with-dquote\"\"\"\r\n_:b0,Blank node\r\n,Missing 'x'\r\n,\r\nhttp://example/x,\r\n_:b1,String-with-lang\r\n_:b1,123"); + } + + #[test] + fn test_tsv_serialization() { + let mut sink = Vec::new(); + write_tsv_results(build_example(), &mut sink).unwrap(); + assert_eq!(str::from_utf8(&sink).unwrap(), "?x\t?literal\n\t\"String\"\n\t\"String-with-dquote\\\"\"\n_:b0\t\"Blank node\"\n\t\"Missing 'x'\"\n\t\n\t\n_:b1\t\"String-with-lang\"@en\n_:b1\t123"); + } +} diff --git a/lib/src/sparql/json_results.rs b/lib/src/sparql/json_results.rs index 3fc084fb..609aa888 100644 --- a/lib/src/sparql/json_results.rs +++ b/lib/src/sparql/json_results.rs @@ -15,6 +15,7 @@ pub fn write_json_results( sink.write_all(b"{\"head\":{},\"boolean\":")?; sink.write_all(if value { b"true" } else { b"false" })?; sink.write_all(b"}")?; + Ok(()) } QueryResults::Solutions(solutions) => { sink.write_all(b"{\"head\":{\"vars\":[")?; @@ -74,15 +75,13 @@ pub fn write_json_results( sink.write_all(b"}")?; } sink.write_all(b"]}}")?; + Ok(()) } - QueryResults::Graph(_) => { - return Err(invalid_input_error( - "Graphs could not be formatted to SPARQL query results XML format", - ) - .into()); - } + QueryResults::Graph(_) => Err(invalid_input_error( + "Graphs could not be formatted to SPARQL query results XML format", + ) + .into()), } - Ok(()) } fn write_escaped_json_string(s: &str, mut sink: impl Write) -> Result<(), EvaluationError> { diff --git a/lib/src/sparql/mod.rs b/lib/src/sparql/mod.rs index 430b2be3..54c48cd2 100644 --- a/lib/src/sparql/mod.rs +++ b/lib/src/sparql/mod.rs @@ -3,6 +3,7 @@ //! Stores execute SPARQL. See [`MemoryStore`](../store/memory/struct.MemoryStore.html#method.query) for an example. mod algebra; +mod csv_results; mod dataset; mod error; mod eval; diff --git a/lib/src/sparql/model.rs b/lib/src/sparql/model.rs index 71dec28d..1c7c9291 100644 --- a/lib/src/sparql/model.rs +++ b/lib/src/sparql/model.rs @@ -2,6 +2,7 @@ use crate::error::invalid_input_error; use crate::io::GraphFormat; use crate::io::GraphSerializer; use crate::model::*; +use crate::sparql::csv_results::{write_csv_results, write_tsv_results}; use crate::sparql::error::EvaluationError; use crate::sparql::json_results::write_json_results; use crate::sparql::xml_results::{read_xml_results, write_xml_results}; @@ -31,6 +32,9 @@ impl QueryResults { QueryResultsFormat::Json => Err(invalid_input_error( "JSON SPARQL results format parsing has not been implemented yet", )), //TODO: implement + QueryResultsFormat::Csv | QueryResultsFormat::Tsv => Err(invalid_input_error( + "CSV and TSV SPARQL results format parsing is not implemented", + )), } } @@ -60,6 +64,8 @@ impl QueryResults { match format { QueryResultsFormat::Xml => write_xml_results(self, writer), QueryResultsFormat::Json => write_json_results(self, writer), + QueryResultsFormat::Csv => write_csv_results(self, writer), + QueryResultsFormat::Tsv => write_tsv_results(self, writer), } } @@ -113,8 +119,6 @@ impl From for QueryResults { } /// [SPARQL query](https://www.w3.org/TR/sparql11-query/) results serialization formats -/// -/// This enumeration is non exhaustive. New formats like CSV will be added in the future. #[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] #[non_exhaustive] pub enum QueryResultsFormat { @@ -122,6 +126,10 @@ pub enum QueryResultsFormat { Xml, /// [SPARQL Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) Json, + /// [SPARQL Query Results CSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/) + Csv, + /// [SPARQL Query Results TSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/) + Tsv, } impl QueryResultsFormat { @@ -137,6 +145,8 @@ impl QueryResultsFormat { match self { QueryResultsFormat::Xml => "http://www.w3.org/ns/formats/SPARQL_Results_XML", QueryResultsFormat::Json => "http://www.w3.org/ns/formats/SPARQL_Results_JSON", + QueryResultsFormat::Csv => "http://www.w3.org/ns/formats/SPARQL_Results_CSV", + QueryResultsFormat::Tsv => "http://www.w3.org/ns/formats/SPARQL_Results_TSV", } } /// The format [IANA media type](https://tools.ietf.org/html/rfc2046). @@ -151,6 +161,8 @@ impl QueryResultsFormat { match self { QueryResultsFormat::Xml => "application/sparql-results+xml", QueryResultsFormat::Json => "application/sparql-results+json", + QueryResultsFormat::Csv => "text/csv; charset=utf-8", + QueryResultsFormat::Tsv => "text/tab-separated-values; charset=utf-8", } } @@ -166,6 +178,8 @@ impl QueryResultsFormat { match self { QueryResultsFormat::Xml => "srx", QueryResultsFormat::Json => "srj", + QueryResultsFormat::Csv => "csv", + QueryResultsFormat::Tsv => "tsv", } } @@ -189,6 +203,8 @@ impl QueryResultsFormat { "application/sparql-results+json" | "application/json" | "text/json" => { Some(QueryResultsFormat::Json) } + "text/csv" => Some(QueryResultsFormat::Csv), + "text/tab-separated-values" | "text/tsv" => Some(QueryResultsFormat::Tsv), _ => None, } } else { @@ -279,7 +295,7 @@ impl QuerySolution { self.values.get(index.index(self)?).and_then(|e| e.as_ref()) } - /// The number of variables which are bind + /// The number of variables which could be bound #[inline] pub fn len(&self) -> usize { self.values.len() diff --git a/server/src/main.rs b/server/src/main.rs index 719bf7a0..ddfe8b86 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -278,6 +278,8 @@ async fn evaluate_sparql_query( &[ QueryResultsFormat::Xml.media_type(), QueryResultsFormat::Json.media_type(), + QueryResultsFormat::Csv.media_type(), + QueryResultsFormat::Tsv.media_type(), ], QueryResultsFormat::from_media_type, )?; diff --git a/wikibase/src/main.rs b/wikibase/src/main.rs index 9e664a7e..f3e18e28 100644 --- a/wikibase/src/main.rs +++ b/wikibase/src/main.rs @@ -212,6 +212,8 @@ async fn evaluate_sparql_query( &[ QueryResultsFormat::Xml.media_type(), QueryResultsFormat::Json.media_type(), + QueryResultsFormat::Csv.media_type(), + QueryResultsFormat::Tsv.media_type(), ], QueryResultsFormat::from_media_type, )?;