CSV and TSV SPARQL results: always print trailing line jumps

Follows the TSV grammar and probably nicer CSV support too

Also check for wrong numbers of columns in TSV parsing
pull/409/head
Tpt 2 years ago committed by Thomas Tanon
parent a51509dcd3
commit 6d09d77c61
  1. 2
      lib/sparesults/README.md
  2. 136
      lib/sparesults/src/csv.rs
  3. 2
      lib/sparesults/src/lib.rs
  4. 6
      server/src/main.rs

@ -50,7 +50,7 @@ assert_eq!(
// And with a set of solutions // And with a set of solutions
assert_eq!( assert_eq!(
convert_json_to_tsv(b"{\"head\":{\"vars\":[\"foo\",\"bar\"]},\"results\":{\"bindings\":[{\"foo\":{\"type\":\"literal\",\"value\":\"test\"}}]}}".as_slice()).unwrap(), convert_json_to_tsv(b"{\"head\":{\"vars\":[\"foo\",\"bar\"]},\"results\":{\"bindings\":[{\"foo\":{\"type\":\"literal\",\"value\":\"test\"}}]}}".as_slice()).unwrap(),
b"?foo\t?bar\n\"test\"\t" b"?foo\t?bar\n\"test\"\t\n"
); );
``` ```

@ -27,6 +27,7 @@ impl<W: Write> CsvSolutionsWriter<W> {
} }
sink.write_all(variable.as_str().as_bytes())?; sink.write_all(variable.as_str().as_bytes())?;
} }
sink.write_all(b"\r\n")?;
Ok(Self { sink, variables }) Ok(Self { sink, variables })
} }
@ -40,7 +41,6 @@ impl<W: Write> CsvSolutionsWriter<W> {
values[position] = Some(value); values[position] = Some(value);
} }
} }
self.sink.write_all(b"\r\n")?;
let mut start_binding = true; let mut start_binding = true;
for value in values { for value in values {
if start_binding { if start_binding {
@ -52,7 +52,7 @@ impl<W: Write> CsvSolutionsWriter<W> {
write_csv_term(value, &mut self.sink)?; write_csv_term(value, &mut self.sink)?;
} }
} }
Ok(()) self.sink.write_all(b"\r\n")
} }
pub fn finish(mut self) -> io::Result<W> { pub fn finish(mut self) -> io::Result<W> {
@ -118,6 +118,7 @@ impl<W: Write> TsvSolutionsWriter<W> {
sink.write_all(b"?")?; sink.write_all(b"?")?;
sink.write_all(variable.as_str().as_bytes())?; sink.write_all(variable.as_str().as_bytes())?;
} }
sink.write_all(b"\n")?;
Ok(Self { sink, variables }) Ok(Self { sink, variables })
} }
@ -131,7 +132,6 @@ impl<W: Write> TsvSolutionsWriter<W> {
values[position] = Some(value); values[position] = Some(value);
} }
} }
self.sink.write_all(b"\n")?;
let mut start_binding = true; let mut start_binding = true;
for value in values { for value in values {
if start_binding { if start_binding {
@ -143,7 +143,7 @@ impl<W: Write> TsvSolutionsWriter<W> {
write_tsv_term(value, &mut self.sink)?; write_tsv_term(value, &mut self.sink)?;
} }
} }
Ok(()) self.sink.write_all(b"\n")
} }
pub fn finish(mut self) -> io::Result<W> { pub fn finish(mut self) -> io::Result<W> {
@ -289,29 +289,42 @@ impl<R: BufRead> TsvQueryResultsReader<R> {
// We read the header // We read the header
source.read_line(&mut buffer)?; source.read_line(&mut buffer)?;
if buffer.trim().eq_ignore_ascii_case("true") { let line = buffer
.as_str()
.trim_matches(|c| matches!(c, ' ' | '\r' | '\n'));
if line.eq_ignore_ascii_case("true") {
return Ok(Self::Boolean(true)); return Ok(Self::Boolean(true));
} }
if buffer.trim().eq_ignore_ascii_case("false") { if line.eq_ignore_ascii_case("false") {
return Ok(Self::Boolean(false)); return Ok(Self::Boolean(false));
} }
let mut variables = Vec::new(); let mut variables = Vec::new();
for v in buffer.split('\t') { if !line.is_empty() {
for v in line.split('\t') {
let v = v.trim(); let v = v.trim();
if v.is_empty() {
return Err(SyntaxError::msg("Empty column on the first row. The first row should be a list of variables like ?foo or $bar").into());
}
let variable = Variable::from_str(v).map_err(|e| { let variable = Variable::from_str(v).map_err(|e| {
SyntaxError::msg(format!("Invalid variable declaration '{v}': {e}")) SyntaxError::msg(format!("Invalid variable declaration '{v}': {e}"))
})?; })?;
if variables.contains(&variable) { if variables.contains(&variable) {
return Err( return Err(SyntaxError::msg(format!(
SyntaxError::msg(format!("The variable {variable} is declared twice")).into(), "The variable {variable} is declared twice"
); ))
.into());
} }
variables.push(variable); variables.push(variable);
} }
}
let column_len = variables.len();
Ok(Self::Solutions { Ok(Self::Solutions {
variables, variables,
solutions: TsvSolutionsReader { source, buffer }, solutions: TsvSolutionsReader {
source,
buffer,
column_len,
},
}) })
} }
} }
@ -319,6 +332,7 @@ impl<R: BufRead> TsvQueryResultsReader<R> {
pub struct TsvSolutionsReader<R: BufRead> { pub struct TsvSolutionsReader<R: BufRead> {
source: R, source: R,
buffer: String, buffer: String,
column_len: usize,
} }
impl<R: BufRead> TsvSolutionsReader<R> { impl<R: BufRead> TsvSolutionsReader<R> {
@ -327,8 +341,8 @@ impl<R: BufRead> TsvSolutionsReader<R> {
if self.source.read_line(&mut self.buffer)? == 0 { if self.source.read_line(&mut self.buffer)? == 0 {
return Ok(None); return Ok(None);
} }
Ok(Some( let elements = self
self.buffer .buffer
.split('\t') .split('\t')
.map(|v| { .map(|v| {
let v = v.trim(); let v = v.trim();
@ -340,8 +354,20 @@ impl<R: BufRead> TsvSolutionsReader<R> {
})?)) })?))
} }
}) })
.collect::<Result<_, ParseError>>()?, .collect::<Result<Vec<_>, ParseError>>()?;
if elements.len() == self.column_len {
Ok(Some(elements))
} else if self.column_len == 0 && elements == [None] {
Ok(Some(Vec::new())) // Zero columns case
} else {
Err(SyntaxError::msg(format!(
"This TSV files has {} columns but we found a row with {} columns: {:?}",
self.column_len,
elements.len(),
self.buffer
)) ))
.into())
}
} }
} }
@ -414,7 +440,7 @@ mod tests {
)?; )?;
} }
let result = writer.finish()?; let result = writer.finish()?;
assert_eq!(str::from_utf8(&result).unwrap(), "x,literal\r\nhttp://example/x,String\r\nhttp://example/x,\"String-with-dquote\"\"\"\r\n_:b0,Blank node\r\n,Missing 'x'\r\n,\r\nhttp://example/x,\r\n_:b1,String-with-lang\r\n_:b1,123\r\n,\"escape,\t\r\n\""); assert_eq!(str::from_utf8(&result).unwrap(), "x,literal\r\nhttp://example/x,String\r\nhttp://example/x,\"String-with-dquote\"\"\"\r\n_:b0,Blank node\r\n,Missing 'x'\r\n,\r\nhttp://example/x,\r\n_:b1,String-with-lang\r\n_:b1,123\r\n,\"escape,\t\r\n\"\r\n");
Ok(()) Ok(())
} }
@ -434,7 +460,7 @@ mod tests {
)?; )?;
} }
let result = writer.finish()?; let result = writer.finish()?;
assert_eq!(str::from_utf8(&result).unwrap(), "?x\t?literal\n<http://example/x>\t\"String\"\n<http://example/x>\t\"String-with-dquote\\\"\"\n_:b0\t\"Blank node\"\n\t\"Missing 'x'\"\n\t\n<http://example/x>\t\n_:b1\t\"String-with-lang\"@en\n_:b1\t123\n\t\"escape,\\t\\r\\n\""); assert_eq!(str::from_utf8(&result).unwrap(), "?x\t?literal\n<http://example/x>\t\"String\"\n<http://example/x>\t\"String-with-dquote\\\"\"\n_:b0\t\"Blank node\"\n\t\"Missing 'x'\"\n\t\n<http://example/x>\t\n_:b1\t\"String-with-lang\"@en\n_:b1\t123\n\t\"escape,\\t\\r\\n\"\n");
// Read // Read
if let TsvQueryResultsReader::Solutions { if let TsvQueryResultsReader::Solutions {
@ -458,7 +484,16 @@ mod tests {
#[test] #[test]
fn test_bad_tsv() { fn test_bad_tsv() {
let mut bad_tsvs = vec![ let mut bad_tsvs = vec![
"?", "?p", "?p?o", "?p\n<", "?p\n_", "?p\n_:", "?p\n\"", "?p\n<<", "?",
"?p",
"?p?o",
"?p\n<",
"?p\n_",
"?p\n_:",
"?p\n\"",
"?p\n<<",
"?p\n1\t2\n",
"?p\n\n",
]; ];
let a_lot_of_strings = format!("?p\n{}\n", "<".repeat(100_000)); let a_lot_of_strings = format!("?p\n{}\n", "<".repeat(100_000));
bad_tsvs.push(&a_lot_of_strings); bad_tsvs.push(&a_lot_of_strings);
@ -470,4 +505,69 @@ mod tests {
} }
} }
} }
#[test]
fn test_no_columns_csv_serialization() -> io::Result<()> {
let mut writer = CsvSolutionsWriter::start(Vec::new(), Vec::new())?;
writer.write([])?;
let result = writer.finish()?;
assert_eq!(str::from_utf8(&result).unwrap(), "\r\n\r\n");
Ok(())
}
#[test]
fn test_no_columns_tsv_serialization() -> io::Result<()> {
let mut writer = TsvSolutionsWriter::start(Vec::new(), Vec::new())?;
writer.write([])?;
let result = writer.finish()?;
assert_eq!(str::from_utf8(&result).unwrap(), "\n\n");
Ok(())
}
#[test]
fn test_no_columns_tsv_parsing() -> io::Result<()> {
if let TsvQueryResultsReader::Solutions {
mut solutions,
variables,
} = TsvQueryResultsReader::read("\n\n".as_bytes())?
{
assert_eq!(variables, Vec::<Variable>::new());
assert_eq!(solutions.read_next()?, Some(Vec::new()));
assert_eq!(solutions.read_next()?, None);
} else {
unreachable!()
}
Ok(())
}
#[test]
fn test_no_results_csv_serialization() -> io::Result<()> {
let result =
CsvSolutionsWriter::start(Vec::new(), vec![Variable::new_unchecked("a")])?.finish()?;
assert_eq!(str::from_utf8(&result).unwrap(), "a\r\n");
Ok(())
}
#[test]
fn test_no_results_tsv_serialization() -> io::Result<()> {
let result =
TsvSolutionsWriter::start(Vec::new(), vec![Variable::new_unchecked("a")])?.finish()?;
assert_eq!(str::from_utf8(&result).unwrap(), "?a\n");
Ok(())
}
#[test]
fn test_no_results_tsv_parsing() -> io::Result<()> {
if let TsvQueryResultsReader::Solutions {
mut solutions,
variables,
} = TsvQueryResultsReader::read("?a\n".as_bytes())?
{
assert_eq!(variables, vec![Variable::new_unchecked("a")]);
assert_eq!(solutions.read_next()?, None);
} else {
unreachable!()
}
Ok(())
}
} }

@ -439,7 +439,7 @@ impl QueryResultsSerializer {
/// let mut writer = json_serializer.solutions_writer(&mut buffer, vec![Variable::new_unchecked("foo"), Variable::new_unchecked("bar")])?; /// let mut writer = json_serializer.solutions_writer(&mut buffer, vec![Variable::new_unchecked("foo"), Variable::new_unchecked("bar")])?;
/// writer.write(once((VariableRef::new_unchecked("foo"), LiteralRef::from("test"))))?; /// writer.write(once((VariableRef::new_unchecked("foo"), LiteralRef::from("test"))))?;
/// writer.finish()?; /// writer.finish()?;
/// assert_eq!(buffer, b"?foo\t?bar\n\"test\"\t"); /// assert_eq!(buffer, b"?foo\t?bar\n\"test\"\t\n");
/// # std::io::Result::Ok(()) /// # std::io::Result::Ok(())
/// ``` /// ```
#[must_use] #[must_use]

@ -1354,7 +1354,7 @@ mod tests {
.build(); .build();
server.test_body( server.test_body(
request, request,
"s,p,o\r\nhttp://example.com,http://example.com,http://example.com", "s,p,o\r\nhttp://example.com,http://example.com,http://example.com\r\n",
) )
} }
@ -1429,7 +1429,7 @@ mod tests {
.build(); .build();
server.test_body( server.test_body(
request, request,
"s,p,o\r\nhttp://example.com,http://example.com,http://example.com", "s,p,o\r\nhttp://example.com,http://example.com,http://example.com\r\n",
) )
} }
@ -1454,7 +1454,7 @@ mod tests {
.with_body("query=SELECT%20?s%20?p%20?o%20WHERE%20{%20?s%20?p%20?o%20}"); .with_body("query=SELECT%20?s%20?p%20?o%20WHERE%20{%20?s%20?p%20?o%20}");
server.test_body( server.test_body(
request, request,
"s,p,o\r\nhttp://example.com,http://example.com,http://example.com", "s,p,o\r\nhttp://example.com,http://example.com,http://example.com\r\n",
) )
} }

Loading…
Cancel
Save