Implements CSV and TSV results serialization

pull/51/head
Tpt 4 years ago
parent 39fdf4b16b
commit 21c2f6d870
  1. 2
      README.md
  2. 221
      lib/src/sparql/csv_results.rs
  3. 13
      lib/src/sparql/json_results.rs
  4. 1
      lib/src/sparql/mod.rs
  5. 22
      lib/src/sparql/model.rs
  6. 2
      server/src/main.rs
  7. 2
      wikibase/src/main.rs

@ -33,7 +33,7 @@ It is split into multiple parts:
Oxigraph implements the following specifications:
* [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/), [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/), and [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/).
* [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/), and [RDF XML](https://www.w3.org/TR/rdf-syntax-grammar/) RDF serialization formats for both data ingestion and retrieval using the [Rio library](https://github.com/oxigraph/rio).
* [SPARQL Query Results XML Format](http://www.w3.org/TR/rdf-sparql-XMLres/) and [SPARQL Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/).
* [SPARQL Query Results XML Format](http://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/).
A preliminary benchmark [is provided](bench/README.md).

@ -0,0 +1,221 @@
//! Implementation of [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/)
use crate::error::invalid_input_error;
use crate::model::{vocab::xsd, *};
use crate::sparql::error::EvaluationError;
use crate::sparql::model::*;
use std::io::{self, Write};
pub fn write_csv_results(
results: QueryResults,
mut sink: impl Write,
) -> Result<(), EvaluationError> {
match results {
QueryResults::Boolean(_) => Err(invalid_input_error(
"boolean could not be formatted to SPARQL query results CSV format",
)
.into()),
QueryResults::Solutions(solutions) => {
let mut start_vars = true;
for variable in solutions.variables() {
if start_vars {
start_vars = false;
} else {
sink.write_all(b",")?;
}
sink.write_all(variable.as_str().as_bytes())?;
}
let size = solutions.variables().len();
for solution in solutions {
let solution = solution?;
sink.write_all(b"\r\n")?;
let mut start_binding = true;
for i in 0..size {
if start_binding {
start_binding = false;
} else {
sink.write_all(b",")?;
}
if let Some(value) = solution.get(i) {
match value {
Term::NamedNode(uri) => {
sink.write_all(uri.as_str().as_bytes())?;
}
Term::BlankNode(bnode) => {
sink.write_all(b"_:")?;
sink.write_all(bnode.as_str().as_bytes())?;
}
Term::Literal(literal) => {
write_escaped_csv_string(literal.value(), &mut sink)?;
}
}
}
}
}
Ok(())
}
QueryResults::Graph(_) => Err(invalid_input_error(
"Graphs could not be formatted to SPARQL query results CSV format",
)
.into()),
}
}
fn write_escaped_csv_string(s: &str, mut sink: impl Write) -> Result<(), io::Error> {
if s.bytes().any(|c| match c {
b'"' | b',' | b'\n' | b'\r' => true,
_ => false,
}) {
sink.write_all(b"\"")?;
for c in s.bytes() {
if c == b'\"' {
sink.write_all(b"\"\"")
} else {
sink.write_all(&[c])
}?;
}
sink.write_all(b"\"")
} else {
sink.write_all(s.as_bytes())
}
}
pub fn write_tsv_results(
results: QueryResults,
mut sink: impl Write,
) -> Result<(), EvaluationError> {
match results {
QueryResults::Boolean(_) => Err(invalid_input_error(
"boolean could not be formatted to SPARQL query results TSV format",
)
.into()),
QueryResults::Solutions(solutions) => {
let mut start_vars = true;
for variable in solutions.variables() {
if start_vars {
start_vars = false;
} else {
sink.write_all(b"\t")?;
}
sink.write_all(b"?")?;
sink.write_all(variable.as_str().as_bytes())?;
}
let size = solutions.variables().len();
for solution in solutions {
let solution = solution?;
sink.write_all(b"\n")?;
let mut start_binding = true;
for i in 0..size {
if start_binding {
start_binding = false;
} else {
sink.write_all(b"\t")?;
}
if let Some(value) = solution.get(i) {
//TODO: full Turtle serialization
sink.write_all(
match value {
Term::NamedNode(node) => node.to_string(),
Term::BlankNode(node) => node.to_string(),
Term::Literal(literal) => match literal.datatype() {
xsd::BOOLEAN => match literal.value() {
"true" | "1" => "true".to_owned(),
"false" | "0" => "false".to_owned(),
_ => literal.to_string(),
},
xsd::INTEGER => {
if literal.value().bytes().all(|c| match c {
b'0'..=b'9' => true,
_ => false,
}) {
literal.value().to_owned()
} else {
literal.to_string()
}
}
_ => literal.to_string(),
},
}
.as_bytes(),
)?;
}
}
}
Ok(())
}
QueryResults::Graph(_) => Err(invalid_input_error(
"Graphs could not be formatted to SPARQL query results TSV format",
)
.into()),
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::rc::Rc;
use std::str;
fn build_example() -> QueryResults {
QuerySolutionIter::new(
Rc::new(vec![Variable::new("x"), Variable::new("literal")]),
Box::new(
vec![
Ok(vec![
Some(NamedNode::new_unchecked("http://example/x").into()),
Some(Literal::new_simple_literal("String").into()),
]),
Ok(vec![
Some(NamedNode::new_unchecked("http://example/x").into()),
Some(Literal::new_simple_literal("String-with-dquote\"").into()),
]),
Ok(vec![
Some(BlankNode::new_unchecked("b0").into()),
Some(Literal::new_simple_literal("Blank node").into()),
]),
Ok(vec![
None,
Some(Literal::new_simple_literal("Missing 'x'").into()),
]),
Ok(vec![None, None]),
Ok(vec![
Some(NamedNode::new_unchecked("http://example/x").into()),
None,
]),
Ok(vec![
Some(BlankNode::new_unchecked("b1").into()),
Some(
Literal::new_language_tagged_literal_unchecked(
"String-with-lang",
"en",
)
.into(),
),
]),
Ok(vec![
Some(BlankNode::new_unchecked("b1").into()),
Some(Literal::new_typed_literal("123", xsd::INTEGER).into()),
]),
]
.into_iter(),
),
)
.into()
}
#[test]
fn test_csv_serialization() {
let mut sink = Vec::new();
write_csv_results(build_example(), &mut sink).unwrap();
assert_eq!(str::from_utf8(&sink).unwrap(), "x,literal\r\nhttp://example/x,String\r\nhttp://example/x,\"String-with-dquote\"\"\"\r\n_:b0,Blank node\r\n,Missing 'x'\r\n,\r\nhttp://example/x,\r\n_:b1,String-with-lang\r\n_:b1,123");
}
#[test]
fn test_tsv_serialization() {
let mut sink = Vec::new();
write_tsv_results(build_example(), &mut sink).unwrap();
assert_eq!(str::from_utf8(&sink).unwrap(), "?x\t?literal\n<http://example/x>\t\"String\"\n<http://example/x>\t\"String-with-dquote\\\"\"\n_:b0\t\"Blank node\"\n\t\"Missing 'x'\"\n\t\n<http://example/x>\t\n_:b1\t\"String-with-lang\"@en\n_:b1\t123");
}
}

@ -15,6 +15,7 @@ pub fn write_json_results(
sink.write_all(b"{\"head\":{},\"boolean\":")?;
sink.write_all(if value { b"true" } else { b"false" })?;
sink.write_all(b"}")?;
Ok(())
}
QueryResults::Solutions(solutions) => {
sink.write_all(b"{\"head\":{\"vars\":[")?;
@ -74,15 +75,13 @@ pub fn write_json_results(
sink.write_all(b"}")?;
}
sink.write_all(b"]}}")?;
Ok(())
}
QueryResults::Graph(_) => {
return Err(invalid_input_error(
"Graphs could not be formatted to SPARQL query results XML format",
)
.into());
}
QueryResults::Graph(_) => Err(invalid_input_error(
"Graphs could not be formatted to SPARQL query results XML format",
)
.into()),
}
Ok(())
}
fn write_escaped_json_string(s: &str, mut sink: impl Write) -> Result<(), EvaluationError> {

@ -3,6 +3,7 @@
//! Stores execute SPARQL. See [`MemoryStore`](../store/memory/struct.MemoryStore.html#method.query) for an example.
mod algebra;
mod csv_results;
mod dataset;
mod error;
mod eval;

@ -2,6 +2,7 @@ use crate::error::invalid_input_error;
use crate::io::GraphFormat;
use crate::io::GraphSerializer;
use crate::model::*;
use crate::sparql::csv_results::{write_csv_results, write_tsv_results};
use crate::sparql::error::EvaluationError;
use crate::sparql::json_results::write_json_results;
use crate::sparql::xml_results::{read_xml_results, write_xml_results};
@ -31,6 +32,9 @@ impl QueryResults {
QueryResultsFormat::Json => Err(invalid_input_error(
"JSON SPARQL results format parsing has not been implemented yet",
)), //TODO: implement
QueryResultsFormat::Csv | QueryResultsFormat::Tsv => Err(invalid_input_error(
"CSV and TSV SPARQL results format parsing is not implemented",
)),
}
}
@ -60,6 +64,8 @@ impl QueryResults {
match format {
QueryResultsFormat::Xml => write_xml_results(self, writer),
QueryResultsFormat::Json => write_json_results(self, writer),
QueryResultsFormat::Csv => write_csv_results(self, writer),
QueryResultsFormat::Tsv => write_tsv_results(self, writer),
}
}
@ -113,8 +119,6 @@ impl From<QuerySolutionIter> for QueryResults {
}
/// [SPARQL query](https://www.w3.org/TR/sparql11-query/) results serialization formats
///
/// This enumeration is non exhaustive. New formats like CSV will be added in the future.
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
#[non_exhaustive]
pub enum QueryResultsFormat {
@ -122,6 +126,10 @@ pub enum QueryResultsFormat {
Xml,
/// [SPARQL Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/)
Json,
/// [SPARQL Query Results CSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/)
Csv,
/// [SPARQL Query Results TSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/)
Tsv,
}
impl QueryResultsFormat {
@ -137,6 +145,8 @@ impl QueryResultsFormat {
match self {
QueryResultsFormat::Xml => "http://www.w3.org/ns/formats/SPARQL_Results_XML",
QueryResultsFormat::Json => "http://www.w3.org/ns/formats/SPARQL_Results_JSON",
QueryResultsFormat::Csv => "http://www.w3.org/ns/formats/SPARQL_Results_CSV",
QueryResultsFormat::Tsv => "http://www.w3.org/ns/formats/SPARQL_Results_TSV",
}
}
/// The format [IANA media type](https://tools.ietf.org/html/rfc2046).
@ -151,6 +161,8 @@ impl QueryResultsFormat {
match self {
QueryResultsFormat::Xml => "application/sparql-results+xml",
QueryResultsFormat::Json => "application/sparql-results+json",
QueryResultsFormat::Csv => "text/csv; charset=utf-8",
QueryResultsFormat::Tsv => "text/tab-separated-values; charset=utf-8",
}
}
@ -166,6 +178,8 @@ impl QueryResultsFormat {
match self {
QueryResultsFormat::Xml => "srx",
QueryResultsFormat::Json => "srj",
QueryResultsFormat::Csv => "csv",
QueryResultsFormat::Tsv => "tsv",
}
}
@ -189,6 +203,8 @@ impl QueryResultsFormat {
"application/sparql-results+json" | "application/json" | "text/json" => {
Some(QueryResultsFormat::Json)
}
"text/csv" => Some(QueryResultsFormat::Csv),
"text/tab-separated-values" | "text/tsv" => Some(QueryResultsFormat::Tsv),
_ => None,
}
} else {
@ -279,7 +295,7 @@ impl QuerySolution {
self.values.get(index.index(self)?).and_then(|e| e.as_ref())
}
/// The number of variables which are bind
/// The number of variables which could be bound
#[inline]
pub fn len(&self) -> usize {
self.values.len()

@ -278,6 +278,8 @@ async fn evaluate_sparql_query(
&[
QueryResultsFormat::Xml.media_type(),
QueryResultsFormat::Json.media_type(),
QueryResultsFormat::Csv.media_type(),
QueryResultsFormat::Tsv.media_type(),
],
QueryResultsFormat::from_media_type,
)?;

@ -212,6 +212,8 @@ async fn evaluate_sparql_query(
&[
QueryResultsFormat::Xml.media_type(),
QueryResultsFormat::Json.media_type(),
QueryResultsFormat::Csv.media_type(),
QueryResultsFormat::Tsv.media_type(),
],
QueryResultsFormat::from_media_type,
)?;

Loading…
Cancel
Save