TSV SPARQL results: Properly quote \t and use short Turtle serialization everywhere possible

pull/409/head
Tpt 2 years ago committed by Thomas Tanon
parent a271e39fa0
commit a51509dcd3
  1. 151
      lib/sparesults/src/csv.rs

@ -153,25 +153,28 @@ impl<W: Write> TsvSolutionsWriter<W> {
} }
fn write_tsv_term<'a>(term: impl Into<TermRef<'a>>, sink: &mut impl Write) -> io::Result<()> { fn write_tsv_term<'a>(term: impl Into<TermRef<'a>>, sink: &mut impl Write) -> io::Result<()> {
//TODO: full Turtle serialization
match term.into() { match term.into() {
TermRef::NamedNode(node) => write!(sink, "<{}>", node.as_str()), TermRef::NamedNode(node) => write!(sink, "<{}>", node.as_str()),
TermRef::BlankNode(node) => write!(sink, "_:{}", node.as_str()), TermRef::BlankNode(node) => write!(sink, "_:{}", node.as_str()),
TermRef::Literal(literal) => match literal.datatype() { TermRef::Literal(literal) => {
xsd::BOOLEAN => match literal.value() { let value = literal.value();
"true" | "1" => sink.write_all(b"true"), if let Some(language) = literal.language() {
"false" | "0" => sink.write_all(b"false"), write_tsv_quoted_str(value, sink)?;
_ => sink.write_all(literal.to_string().as_bytes()), write!(sink, "@{}", language)
}, } else {
xsd::INTEGER => { match literal.datatype() {
if literal.value().bytes().all(|c| c.is_ascii_digit()) { xsd::BOOLEAN if is_turtle_boolean(value) => sink.write_all(value.as_bytes()),
sink.write_all(literal.value().as_bytes()) xsd::INTEGER if is_turtle_integer(value) => sink.write_all(value.as_bytes()),
} else { xsd::DECIMAL if is_turtle_decimal(value) => sink.write_all(value.as_bytes()),
sink.write_all(literal.to_string().as_bytes()) xsd::DOUBLE if is_turtle_double(value) => sink.write_all(value.as_bytes()),
xsd::STRING => write_tsv_quoted_str(value, sink),
datatype => {
write_tsv_quoted_str(value, sink)?;
write!(sink, "^^<{}>", datatype.as_str())
}
} }
} }
_ => sink.write_all(literal.to_string().as_bytes()), }
},
#[cfg(feature = "rdf-star")] #[cfg(feature = "rdf-star")]
TermRef::Triple(triple) => { TermRef::Triple(triple) => {
sink.write_all(b"<<")?; sink.write_all(b"<<")?;
@ -186,6 +189,92 @@ fn write_tsv_term<'a>(term: impl Into<TermRef<'a>>, sink: &mut impl Write) -> io
} }
} }
fn write_tsv_quoted_str(string: &str, f: &mut impl Write) -> io::Result<()> {
f.write_all(b"\"")?;
for c in string.bytes() {
match c {
b'\t' => f.write_all(b"\\t"),
b'\n' => f.write_all(b"\\n"),
b'\r' => f.write_all(b"\\r"),
b'"' => f.write_all(b"\\\""),
b'\\' => f.write_all(b"\\\\"),
c => f.write_all(&[c]),
}?;
}
f.write_all(b"\"")
}
fn is_turtle_boolean(value: &str) -> bool {
matches!(value, "true" | "false")
}
fn is_turtle_integer(value: &str) -> bool {
// [19] INTEGER ::= [+-]? [0-9]+
let mut value = value.as_bytes();
if let Some(v) = value.strip_prefix(b"+") {
value = v;
} else if let Some(v) = value.strip_prefix(b"-") {
value = v;
}
!value.is_empty() && value.iter().all(|c| c.is_ascii_digit())
}
fn is_turtle_decimal(value: &str) -> bool {
// [20] DECIMAL ::= [+-]? [0-9]* '.' [0-9]+
let mut value = value.as_bytes();
if let Some(v) = value.strip_prefix(b"+") {
value = v;
} else if let Some(v) = value.strip_prefix(b"-") {
value = v;
}
while value.first().map_or(false, |c| c.is_ascii_digit()) {
value = &value[1..];
}
if let Some(v) = value.strip_prefix(b".") {
value = v;
} else {
return false;
}
!value.is_empty() && value.iter().all(|c| c.is_ascii_digit())
}
fn is_turtle_double(value: &str) -> bool {
// [21] DOUBLE ::= [+-]? ([0-9]+ '.' [0-9]* EXPONENT | '.' [0-9]+ EXPONENT | [0-9]+ EXPONENT)
// [154s] EXPONENT ::= [eE] [+-]? [0-9]+
let mut value = value.as_bytes();
if let Some(v) = value.strip_prefix(b"+") {
value = v;
} else if let Some(v) = value.strip_prefix(b"-") {
value = v;
}
let mut with_before = false;
while value.first().map_or(false, |c| c.is_ascii_digit()) {
value = &value[1..];
with_before = true;
}
let mut with_after = false;
if let Some(v) = value.strip_prefix(b".") {
value = v;
while value.first().map_or(false, |c| c.is_ascii_digit()) {
value = &value[1..];
with_after = true;
}
}
if let Some(v) = value.strip_prefix(b"e") {
value = v;
} else if let Some(v) = value.strip_prefix(b"E") {
value = v;
} else {
return false;
}
if let Some(v) = value.strip_prefix(b"+") {
value = v;
} else if let Some(v) = value.strip_prefix(b"-") {
value = v;
}
(with_before || with_after) && !value.is_empty() && value.iter().all(|c| c.is_ascii_digit())
}
pub enum TsvQueryResultsReader<R: BufRead> { pub enum TsvQueryResultsReader<R: BufRead> {
Solutions { Solutions {
variables: Vec<Variable>, variables: Vec<Variable>,
@ -259,6 +348,7 @@ impl<R: BufRead> TsvSolutionsReader<R> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use std::error::Error;
use std::io::Cursor; use std::io::Cursor;
use std::rc::Rc; use std::rc::Rc;
use std::str; use std::str;
@ -302,6 +392,10 @@ mod tests {
Some(BlankNode::new_unchecked("b1").into()), Some(BlankNode::new_unchecked("b1").into()),
Some(Literal::new_typed_literal("123", xsd::INTEGER).into()), Some(Literal::new_typed_literal("123", xsd::INTEGER).into()),
], ],
vec![
None,
Some(Literal::new_simple_literal("escape,\t\r\n").into()),
],
], ],
) )
} }
@ -320,25 +414,44 @@ mod tests {
)?; )?;
} }
let result = writer.finish()?; let result = writer.finish()?;
assert_eq!(str::from_utf8(&result).unwrap(), "x,literal\r\nhttp://example/x,String\r\nhttp://example/x,\"String-with-dquote\"\"\"\r\n_:b0,Blank node\r\n,Missing 'x'\r\n,\r\nhttp://example/x,\r\n_:b1,String-with-lang\r\n_:b1,123"); assert_eq!(str::from_utf8(&result).unwrap(), "x,literal\r\nhttp://example/x,String\r\nhttp://example/x,\"String-with-dquote\"\"\"\r\n_:b0,Blank node\r\n,Missing 'x'\r\n,\r\nhttp://example/x,\r\n_:b1,String-with-lang\r\n_:b1,123\r\n,\"escape,\t\r\n\"");
Ok(()) Ok(())
} }
#[test] #[test]
fn test_tsv_serialization() -> io::Result<()> { fn test_tsv_roundtrip() -> Result<(), Box<dyn Error>> {
let (variables, solutions) = build_example(); let (variables, solutions) = build_example();
// Write
let mut writer = TsvSolutionsWriter::start(Vec::new(), variables.clone())?; let mut writer = TsvSolutionsWriter::start(Vec::new(), variables.clone())?;
let variables = Rc::new(variables); let variables = Rc::new(variables);
for solution in solutions { for solution in &solutions {
writer.write( writer.write(
variables variables
.iter() .iter()
.zip(&solution) .zip(solution)
.filter_map(|(v, s)| s.as_ref().map(|s| (v.as_ref(), s.as_ref()))), .filter_map(|(v, s)| s.as_ref().map(|s| (v.as_ref(), s.as_ref()))),
)?; )?;
} }
let result = writer.finish()?; let result = writer.finish()?;
assert_eq!(str::from_utf8(&result).unwrap(), "?x\t?literal\n<http://example/x>\t\"String\"\n<http://example/x>\t\"String-with-dquote\\\"\"\n_:b0\t\"Blank node\"\n\t\"Missing 'x'\"\n\t\n<http://example/x>\t\n_:b1\t\"String-with-lang\"@en\n_:b1\t123"); assert_eq!(str::from_utf8(&result).unwrap(), "?x\t?literal\n<http://example/x>\t\"String\"\n<http://example/x>\t\"String-with-dquote\\\"\"\n_:b0\t\"Blank node\"\n\t\"Missing 'x'\"\n\t\n<http://example/x>\t\n_:b1\t\"String-with-lang\"@en\n_:b1\t123\n\t\"escape,\\t\\r\\n\"");
// Read
if let TsvQueryResultsReader::Solutions {
solutions: mut solutions_iter,
variables: actual_variables,
} = TsvQueryResultsReader::read(Cursor::new(result))?
{
assert_eq!(actual_variables.as_slice(), variables.as_slice());
let mut rows = Vec::new();
while let Some(row) = solutions_iter.read_next()? {
rows.push(row);
}
assert_eq!(rows, solutions);
} else {
unreachable!()
}
Ok(()) Ok(())
} }

Loading…
Cancel
Save