Fork of https://github.com/oxigraph/oxigraph.git for the purpose of NextGraph project
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
327 lines
11 KiB
327 lines
11 KiB
3 years ago
|
//! Implementation of [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/)
|
||
|
|
||
3 years ago
|
use crate::error::{ParseError, SyntaxError, SyntaxErrorKind};
|
||
3 years ago
|
use oxrdf::Variable;
|
||
3 years ago
|
use oxrdf::{vocab::xsd, *};
|
||
3 years ago
|
use std::io::{self, BufRead, Write};
|
||
|
use std::str::FromStr;
|
||
|
|
||
|
pub fn write_boolean_csv_result<W: Write>(mut sink: W, value: bool) -> io::Result<W> {
|
||
|
sink.write_all(if value { b"true" } else { b"false" })?;
|
||
|
Ok(sink)
|
||
|
}
|
||
|
|
||
|
pub struct CsvSolutionsWriter<W: Write> {
|
||
|
sink: W,
|
||
3 years ago
|
variables: Vec<Variable>,
|
||
3 years ago
|
}
|
||
|
|
||
|
impl<W: Write> CsvSolutionsWriter<W> {
|
||
3 years ago
|
pub fn start(mut sink: W, variables: Vec<Variable>) -> io::Result<Self> {
|
||
3 years ago
|
let mut start_vars = true;
|
||
3 years ago
|
for variable in &variables {
|
||
3 years ago
|
if start_vars {
|
||
|
start_vars = false;
|
||
|
} else {
|
||
|
sink.write_all(b",")?;
|
||
|
}
|
||
|
sink.write_all(variable.as_str().as_bytes())?;
|
||
|
}
|
||
3 years ago
|
Ok(Self { sink, variables })
|
||
3 years ago
|
}
|
||
|
|
||
|
pub fn write<'a>(
|
||
|
&mut self,
|
||
3 years ago
|
solution: impl IntoIterator<Item = (&'a Variable, &'a Term)>,
|
||
3 years ago
|
) -> io::Result<()> {
|
||
3 years ago
|
let mut values = vec![None; self.variables.len()];
|
||
|
for (variable, value) in solution {
|
||
|
if let Some(position) = self.variables.iter().position(|v| v == variable) {
|
||
|
values[position] = Some(value);
|
||
|
}
|
||
|
}
|
||
3 years ago
|
self.sink.write_all(b"\r\n")?;
|
||
|
let mut start_binding = true;
|
||
3 years ago
|
for value in values {
|
||
3 years ago
|
if start_binding {
|
||
|
start_binding = false;
|
||
|
} else {
|
||
|
self.sink.write_all(b",")?;
|
||
|
}
|
||
|
if let Some(value) = value {
|
||
|
write_csv_term(value, &mut self.sink)?;
|
||
|
}
|
||
|
}
|
||
|
Ok(())
|
||
|
}
|
||
|
|
||
|
pub fn finish(self) -> W {
|
||
|
self.sink
|
||
|
}
|
||
|
}
|
||
|
|
||
|
fn write_csv_term<'a>(term: impl Into<TermRef<'a>>, sink: &mut impl Write) -> io::Result<()> {
|
||
|
match term.into() {
|
||
|
TermRef::NamedNode(uri) => sink.write_all(uri.as_str().as_bytes()),
|
||
|
TermRef::BlankNode(bnode) => {
|
||
|
sink.write_all(b"_:")?;
|
||
|
sink.write_all(bnode.as_str().as_bytes())
|
||
|
}
|
||
|
TermRef::Literal(literal) => write_escaped_csv_string(literal.value(), sink),
|
||
3 years ago
|
#[cfg(feature = "rdf-star")]
|
||
3 years ago
|
TermRef::Triple(triple) => {
|
||
|
write_csv_term(&triple.subject, sink)?;
|
||
|
sink.write_all(b" ")?;
|
||
|
write_csv_term(&triple.predicate, sink)?;
|
||
|
sink.write_all(b" ")?;
|
||
|
write_csv_term(&triple.object, sink)
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
fn write_escaped_csv_string(s: &str, sink: &mut impl Write) -> io::Result<()> {
|
||
|
if s.bytes().any(|c| matches!(c, b'"' | b',' | b'\n' | b'\r')) {
|
||
|
sink.write_all(b"\"")?;
|
||
|
for c in s.bytes() {
|
||
|
if c == b'\"' {
|
||
|
sink.write_all(b"\"\"")
|
||
|
} else {
|
||
|
sink.write_all(&[c])
|
||
|
}?;
|
||
|
}
|
||
|
sink.write_all(b"\"")
|
||
|
} else {
|
||
|
sink.write_all(s.as_bytes())
|
||
|
}
|
||
|
}
|
||
|
|
||
|
pub fn write_boolean_tsv_result<W: Write>(mut sink: W, value: bool) -> io::Result<W> {
|
||
|
sink.write_all(if value { b"true" } else { b"false" })?;
|
||
|
Ok(sink)
|
||
|
}
|
||
|
|
||
|
pub struct TsvSolutionsWriter<W: Write> {
|
||
|
sink: W,
|
||
3 years ago
|
variables: Vec<Variable>,
|
||
3 years ago
|
}
|
||
|
|
||
|
impl<W: Write> TsvSolutionsWriter<W> {
|
||
3 years ago
|
pub fn start(mut sink: W, variables: Vec<Variable>) -> io::Result<Self> {
|
||
3 years ago
|
let mut start_vars = true;
|
||
3 years ago
|
for variable in &variables {
|
||
3 years ago
|
if start_vars {
|
||
|
start_vars = false;
|
||
|
} else {
|
||
|
sink.write_all(b"\t")?;
|
||
|
}
|
||
|
sink.write_all(b"?")?;
|
||
|
sink.write_all(variable.as_str().as_bytes())?;
|
||
|
}
|
||
3 years ago
|
Ok(Self { sink, variables })
|
||
3 years ago
|
}
|
||
|
|
||
|
pub fn write<'a>(
|
||
|
&mut self,
|
||
3 years ago
|
solution: impl IntoIterator<Item = (&'a Variable, &'a Term)>,
|
||
3 years ago
|
) -> io::Result<()> {
|
||
3 years ago
|
let mut values = vec![None; self.variables.len()];
|
||
|
for (variable, value) in solution {
|
||
|
if let Some(position) = self.variables.iter().position(|v| v == variable) {
|
||
|
values[position] = Some(value);
|
||
|
}
|
||
|
}
|
||
3 years ago
|
self.sink.write_all(b"\n")?;
|
||
|
let mut start_binding = true;
|
||
3 years ago
|
for value in values {
|
||
3 years ago
|
if start_binding {
|
||
|
start_binding = false;
|
||
|
} else {
|
||
|
self.sink.write_all(b"\t")?;
|
||
|
}
|
||
|
if let Some(value) = value {
|
||
|
write_tsv_term(value, &mut self.sink)?;
|
||
|
}
|
||
|
}
|
||
|
Ok(())
|
||
|
}
|
||
|
|
||
|
pub fn finish(self) -> W {
|
||
|
self.sink
|
||
|
}
|
||
|
}
|
||
|
|
||
|
fn write_tsv_term<'a>(term: impl Into<TermRef<'a>>, sink: &mut impl Write) -> io::Result<()> {
|
||
|
//TODO: full Turtle serialization
|
||
|
match term.into() {
|
||
|
TermRef::NamedNode(node) => write!(sink, "<{}>", node.as_str()),
|
||
|
TermRef::BlankNode(node) => write!(sink, "_:{}", node.as_str()),
|
||
|
TermRef::Literal(literal) => match literal.datatype() {
|
||
|
xsd::BOOLEAN => match literal.value() {
|
||
|
"true" | "1" => sink.write_all(b"true"),
|
||
|
"false" | "0" => sink.write_all(b"false"),
|
||
|
_ => sink.write_all(literal.to_string().as_bytes()),
|
||
|
},
|
||
|
xsd::INTEGER => {
|
||
|
if literal.value().bytes().all(|c| matches!(c, b'0'..=b'9')) {
|
||
|
sink.write_all(literal.value().as_bytes())
|
||
|
} else {
|
||
|
sink.write_all(literal.to_string().as_bytes())
|
||
|
}
|
||
|
}
|
||
|
_ => sink.write_all(literal.to_string().as_bytes()),
|
||
|
},
|
||
3 years ago
|
#[cfg(feature = "rdf-star")]
|
||
3 years ago
|
TermRef::Triple(triple) => {
|
||
|
sink.write_all(b"<<")?;
|
||
|
write_tsv_term(&triple.subject, sink)?;
|
||
|
sink.write_all(b" ")?;
|
||
|
write_tsv_term(&triple.predicate, sink)?;
|
||
|
sink.write_all(b" ")?;
|
||
|
write_tsv_term(&triple.object, sink)?;
|
||
|
sink.write_all(b">>")?;
|
||
|
Ok(())
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
pub enum TsvQueryResultsReader<R: BufRead> {
|
||
|
Solutions {
|
||
|
variables: Vec<Variable>,
|
||
|
solutions: TsvSolutionsReader<R>,
|
||
|
},
|
||
|
Boolean(bool),
|
||
|
}
|
||
|
|
||
|
impl<R: BufRead> TsvQueryResultsReader<R> {
|
||
3 years ago
|
pub fn read(mut source: R) -> Result<Self, ParseError> {
|
||
3 years ago
|
let mut buffer = String::new();
|
||
|
|
||
|
// We read the header
|
||
|
source.read_line(&mut buffer)?;
|
||
|
if buffer.trim().eq_ignore_ascii_case("true") {
|
||
|
return Ok(Self::Boolean(true));
|
||
|
}
|
||
|
if buffer.trim().eq_ignore_ascii_case("false") {
|
||
|
return Ok(Self::Boolean(false));
|
||
|
}
|
||
|
let variables = buffer
|
||
|
.split('\t')
|
||
|
.map(|v| {
|
||
|
Variable::from_str(v.trim())
|
||
|
.map_err(|e| SyntaxError::msg(format!("Invalid variable name '{}': {}", v, e)))
|
||
|
})
|
||
|
.collect::<Result<Vec<_>, _>>()?;
|
||
|
|
||
|
Ok(Self::Solutions {
|
||
|
variables,
|
||
|
solutions: TsvSolutionsReader { source, buffer },
|
||
|
})
|
||
|
}
|
||
|
}
|
||
|
|
||
|
pub struct TsvSolutionsReader<R: BufRead> {
|
||
|
source: R,
|
||
|
buffer: String,
|
||
|
}
|
||
|
|
||
|
impl<R: BufRead> TsvSolutionsReader<R> {
|
||
3 years ago
|
pub fn read_next(&mut self) -> Result<Option<Vec<Option<Term>>>, ParseError> {
|
||
3 years ago
|
self.buffer.clear();
|
||
|
if self.source.read_line(&mut self.buffer)? == 0 {
|
||
|
return Ok(None);
|
||
|
}
|
||
|
Ok(Some(
|
||
|
self.buffer
|
||
|
.split('\t')
|
||
|
.map(|v| {
|
||
|
let v = v.trim();
|
||
|
if v.is_empty() {
|
||
|
Ok(None)
|
||
|
} else {
|
||
3 years ago
|
Ok(Some(Term::from_str(v).map_err(|e| SyntaxError {
|
||
|
inner: SyntaxErrorKind::Term(e),
|
||
|
})?))
|
||
3 years ago
|
}
|
||
|
})
|
||
3 years ago
|
.collect::<Result<_, ParseError>>()?,
|
||
3 years ago
|
))
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#[cfg(test)]
|
||
|
mod tests {
|
||
|
use super::*;
|
||
3 years ago
|
use crate::QuerySolution;
|
||
|
use std::rc::Rc;
|
||
3 years ago
|
use std::str;
|
||
|
|
||
|
fn build_example() -> (Vec<Variable>, Vec<Vec<Option<Term>>>) {
|
||
|
(
|
||
|
vec![
|
||
|
Variable::new_unchecked("x"),
|
||
|
Variable::new_unchecked("literal"),
|
||
|
],
|
||
|
vec![
|
||
|
vec![
|
||
|
Some(NamedNode::new_unchecked("http://example/x").into()),
|
||
|
Some(Literal::new_simple_literal("String").into()),
|
||
|
],
|
||
|
vec![
|
||
|
Some(NamedNode::new_unchecked("http://example/x").into()),
|
||
|
Some(Literal::new_simple_literal("String-with-dquote\"").into()),
|
||
|
],
|
||
|
vec![
|
||
|
Some(BlankNode::new_unchecked("b0").into()),
|
||
|
Some(Literal::new_simple_literal("Blank node").into()),
|
||
|
],
|
||
|
vec![
|
||
|
None,
|
||
|
Some(Literal::new_simple_literal("Missing 'x'").into()),
|
||
|
],
|
||
|
vec![None, None],
|
||
|
vec![
|
||
|
Some(NamedNode::new_unchecked("http://example/x").into()),
|
||
|
None,
|
||
|
],
|
||
|
vec![
|
||
|
Some(BlankNode::new_unchecked("b1").into()),
|
||
|
Some(
|
||
|
Literal::new_language_tagged_literal_unchecked("String-with-lang", "en")
|
||
|
.into(),
|
||
|
),
|
||
|
],
|
||
|
vec![
|
||
|
Some(BlankNode::new_unchecked("b1").into()),
|
||
|
Some(Literal::new_typed_literal("123", xsd::INTEGER).into()),
|
||
|
],
|
||
|
],
|
||
|
)
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_csv_serialization() -> io::Result<()> {
|
||
|
let (variables, solutions) = build_example();
|
||
3 years ago
|
let mut writer = CsvSolutionsWriter::start(Vec::new(), variables.clone())?;
|
||
|
let variables = Rc::new(variables);
|
||
|
for solution in solutions {
|
||
|
writer.write(QuerySolution::from((variables.clone(), solution)).iter())?;
|
||
3 years ago
|
}
|
||
|
let result = writer.finish();
|
||
|
assert_eq!(str::from_utf8(&result).unwrap(), "x,literal\r\nhttp://example/x,String\r\nhttp://example/x,\"String-with-dquote\"\"\"\r\n_:b0,Blank node\r\n,Missing 'x'\r\n,\r\nhttp://example/x,\r\n_:b1,String-with-lang\r\n_:b1,123");
|
||
|
Ok(())
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_tsv_serialization() -> io::Result<()> {
|
||
|
let (variables, solutions) = build_example();
|
||
3 years ago
|
let mut writer = TsvSolutionsWriter::start(Vec::new(), variables.clone())?;
|
||
|
let variables = Rc::new(variables);
|
||
|
for solution in solutions {
|
||
|
writer.write(QuerySolution::from((variables.clone(), solution)).iter())?;
|
||
3 years ago
|
}
|
||
|
let result = writer.finish();
|
||
|
assert_eq!(str::from_utf8(&result).unwrap(), "?x\t?literal\n<http://example/x>\t\"String\"\n<http://example/x>\t\"String-with-dquote\\\"\"\n_:b0\t\"Blank node\"\n\t\"Missing 'x'\"\n\t\n<http://example/x>\t\n_:b1\t\"String-with-lang\"@en\n_:b1\t123");
|
||
|
Ok(())
|
||
|
}
|
||
|
}
|