From 6d09d77c61c761bbaab83139df054f8a92fba2d3 Mon Sep 17 00:00:00 2001 From: Tpt Date: Sat, 4 Mar 2023 11:44:21 +0100 Subject: [PATCH] CSV and TSV SPARQL results: always print trailing line jumps Follows the TSV grammar and probably nicer CSV support too Also check for wrong numbers of columns in TSV parsing --- lib/sparesults/README.md | 2 +- lib/sparesults/src/csv.rs | 172 ++++++++++++++++++++++++++++++-------- lib/sparesults/src/lib.rs | 2 +- server/src/main.rs | 6 +- 4 files changed, 141 insertions(+), 41 deletions(-) diff --git a/lib/sparesults/README.md b/lib/sparesults/README.md index bd861ef1..8cf6c3c7 100644 --- a/lib/sparesults/README.md +++ b/lib/sparesults/README.md @@ -50,7 +50,7 @@ assert_eq!( // And with a set of solutions assert_eq!( convert_json_to_tsv(b"{\"head\":{\"vars\":[\"foo\",\"bar\"]},\"results\":{\"bindings\":[{\"foo\":{\"type\":\"literal\",\"value\":\"test\"}}]}}".as_slice()).unwrap(), - b"?foo\t?bar\n\"test\"\t" + b"?foo\t?bar\n\"test\"\t\n" ); ``` diff --git a/lib/sparesults/src/csv.rs b/lib/sparesults/src/csv.rs index aa2cb280..076f1ea3 100644 --- a/lib/sparesults/src/csv.rs +++ b/lib/sparesults/src/csv.rs @@ -27,6 +27,7 @@ impl CsvSolutionsWriter { } sink.write_all(variable.as_str().as_bytes())?; } + sink.write_all(b"\r\n")?; Ok(Self { sink, variables }) } @@ -40,7 +41,6 @@ impl CsvSolutionsWriter { values[position] = Some(value); } } - self.sink.write_all(b"\r\n")?; let mut start_binding = true; for value in values { if start_binding { @@ -52,7 +52,7 @@ impl CsvSolutionsWriter { write_csv_term(value, &mut self.sink)?; } } - Ok(()) + self.sink.write_all(b"\r\n") } pub fn finish(mut self) -> io::Result { @@ -118,6 +118,7 @@ impl TsvSolutionsWriter { sink.write_all(b"?")?; sink.write_all(variable.as_str().as_bytes())?; } + sink.write_all(b"\n")?; Ok(Self { sink, variables }) } @@ -131,7 +132,6 @@ impl TsvSolutionsWriter { values[position] = Some(value); } } - self.sink.write_all(b"\n")?; let mut start_binding = true; for value in values { if start_binding { @@ -143,7 +143,7 @@ impl TsvSolutionsWriter { write_tsv_term(value, &mut self.sink)?; } } - Ok(()) + self.sink.write_all(b"\n") } pub fn finish(mut self) -> io::Result { @@ -289,29 +289,42 @@ impl TsvQueryResultsReader { // We read the header source.read_line(&mut buffer)?; - if buffer.trim().eq_ignore_ascii_case("true") { + let line = buffer + .as_str() + .trim_matches(|c| matches!(c, ' ' | '\r' | '\n')); + if line.eq_ignore_ascii_case("true") { return Ok(Self::Boolean(true)); } - if buffer.trim().eq_ignore_ascii_case("false") { + if line.eq_ignore_ascii_case("false") { return Ok(Self::Boolean(false)); } let mut variables = Vec::new(); - for v in buffer.split('\t') { - let v = v.trim(); - let variable = Variable::from_str(v).map_err(|e| { - SyntaxError::msg(format!("Invalid variable declaration '{v}': {e}")) - })?; - if variables.contains(&variable) { - return Err( - SyntaxError::msg(format!("The variable {variable} is declared twice")).into(), - ); + if !line.is_empty() { + for v in line.split('\t') { + let v = v.trim(); + if v.is_empty() { + return Err(SyntaxError::msg("Empty column on the first row. The first row should be a list of variables like ?foo or $bar").into()); + } + let variable = Variable::from_str(v).map_err(|e| { + SyntaxError::msg(format!("Invalid variable declaration '{v}': {e}")) + })?; + if variables.contains(&variable) { + return Err(SyntaxError::msg(format!( + "The variable {variable} is declared twice" + )) + .into()); + } + variables.push(variable); } - variables.push(variable); } - + let column_len = variables.len(); Ok(Self::Solutions { variables, - solutions: TsvSolutionsReader { source, buffer }, + solutions: TsvSolutionsReader { + source, + buffer, + column_len, + }, }) } } @@ -319,6 +332,7 @@ impl TsvQueryResultsReader { pub struct TsvSolutionsReader { source: R, buffer: String, + column_len: usize, } impl TsvSolutionsReader { @@ -327,21 +341,33 @@ impl TsvSolutionsReader { if self.source.read_line(&mut self.buffer)? == 0 { return Ok(None); } - Ok(Some( - self.buffer - .split('\t') - .map(|v| { - let v = v.trim(); - if v.is_empty() { - Ok(None) - } else { - Ok(Some(Term::from_str(v).map_err(|e| SyntaxError { - inner: SyntaxErrorKind::Term(e), - })?)) - } - }) - .collect::>()?, - )) + let elements = self + .buffer + .split('\t') + .map(|v| { + let v = v.trim(); + if v.is_empty() { + Ok(None) + } else { + Ok(Some(Term::from_str(v).map_err(|e| SyntaxError { + inner: SyntaxErrorKind::Term(e), + })?)) + } + }) + .collect::, ParseError>>()?; + if elements.len() == self.column_len { + Ok(Some(elements)) + } else if self.column_len == 0 && elements == [None] { + Ok(Some(Vec::new())) // Zero columns case + } else { + Err(SyntaxError::msg(format!( + "This TSV files has {} columns but we found a row with {} columns: {:?}", + self.column_len, + elements.len(), + self.buffer + )) + .into()) + } } } @@ -414,7 +440,7 @@ mod tests { )?; } let result = writer.finish()?; - assert_eq!(str::from_utf8(&result).unwrap(), "x,literal\r\nhttp://example/x,String\r\nhttp://example/x,\"String-with-dquote\"\"\"\r\n_:b0,Blank node\r\n,Missing 'x'\r\n,\r\nhttp://example/x,\r\n_:b1,String-with-lang\r\n_:b1,123\r\n,\"escape,\t\r\n\""); + assert_eq!(str::from_utf8(&result).unwrap(), "x,literal\r\nhttp://example/x,String\r\nhttp://example/x,\"String-with-dquote\"\"\"\r\n_:b0,Blank node\r\n,Missing 'x'\r\n,\r\nhttp://example/x,\r\n_:b1,String-with-lang\r\n_:b1,123\r\n,\"escape,\t\r\n\"\r\n"); Ok(()) } @@ -434,7 +460,7 @@ mod tests { )?; } let result = writer.finish()?; - assert_eq!(str::from_utf8(&result).unwrap(), "?x\t?literal\n\t\"String\"\n\t\"String-with-dquote\\\"\"\n_:b0\t\"Blank node\"\n\t\"Missing 'x'\"\n\t\n\t\n_:b1\t\"String-with-lang\"@en\n_:b1\t123\n\t\"escape,\\t\\r\\n\""); + assert_eq!(str::from_utf8(&result).unwrap(), "?x\t?literal\n\t\"String\"\n\t\"String-with-dquote\\\"\"\n_:b0\t\"Blank node\"\n\t\"Missing 'x'\"\n\t\n\t\n_:b1\t\"String-with-lang\"@en\n_:b1\t123\n\t\"escape,\\t\\r\\n\"\n"); // Read if let TsvQueryResultsReader::Solutions { @@ -458,7 +484,16 @@ mod tests { #[test] fn test_bad_tsv() { let mut bad_tsvs = vec![ - "?", "?p", "?p?o", "?p\n<", "?p\n_", "?p\n_:", "?p\n\"", "?p\n<<", + "?", + "?p", + "?p?o", + "?p\n<", + "?p\n_", + "?p\n_:", + "?p\n\"", + "?p\n<<", + "?p\n1\t2\n", + "?p\n\n", ]; let a_lot_of_strings = format!("?p\n{}\n", "<".repeat(100_000)); bad_tsvs.push(&a_lot_of_strings); @@ -470,4 +505,69 @@ mod tests { } } } + + #[test] + fn test_no_columns_csv_serialization() -> io::Result<()> { + let mut writer = CsvSolutionsWriter::start(Vec::new(), Vec::new())?; + writer.write([])?; + let result = writer.finish()?; + assert_eq!(str::from_utf8(&result).unwrap(), "\r\n\r\n"); + Ok(()) + } + + #[test] + fn test_no_columns_tsv_serialization() -> io::Result<()> { + let mut writer = TsvSolutionsWriter::start(Vec::new(), Vec::new())?; + writer.write([])?; + let result = writer.finish()?; + assert_eq!(str::from_utf8(&result).unwrap(), "\n\n"); + Ok(()) + } + + #[test] + fn test_no_columns_tsv_parsing() -> io::Result<()> { + if let TsvQueryResultsReader::Solutions { + mut solutions, + variables, + } = TsvQueryResultsReader::read("\n\n".as_bytes())? + { + assert_eq!(variables, Vec::::new()); + assert_eq!(solutions.read_next()?, Some(Vec::new())); + assert_eq!(solutions.read_next()?, None); + } else { + unreachable!() + } + Ok(()) + } + + #[test] + fn test_no_results_csv_serialization() -> io::Result<()> { + let result = + CsvSolutionsWriter::start(Vec::new(), vec![Variable::new_unchecked("a")])?.finish()?; + assert_eq!(str::from_utf8(&result).unwrap(), "a\r\n"); + Ok(()) + } + + #[test] + fn test_no_results_tsv_serialization() -> io::Result<()> { + let result = + TsvSolutionsWriter::start(Vec::new(), vec![Variable::new_unchecked("a")])?.finish()?; + assert_eq!(str::from_utf8(&result).unwrap(), "?a\n"); + Ok(()) + } + + #[test] + fn test_no_results_tsv_parsing() -> io::Result<()> { + if let TsvQueryResultsReader::Solutions { + mut solutions, + variables, + } = TsvQueryResultsReader::read("?a\n".as_bytes())? + { + assert_eq!(variables, vec![Variable::new_unchecked("a")]); + assert_eq!(solutions.read_next()?, None); + } else { + unreachable!() + } + Ok(()) + } } diff --git a/lib/sparesults/src/lib.rs b/lib/sparesults/src/lib.rs index c747477a..da3bbe20 100644 --- a/lib/sparesults/src/lib.rs +++ b/lib/sparesults/src/lib.rs @@ -439,7 +439,7 @@ impl QueryResultsSerializer { /// let mut writer = json_serializer.solutions_writer(&mut buffer, vec![Variable::new_unchecked("foo"), Variable::new_unchecked("bar")])?; /// writer.write(once((VariableRef::new_unchecked("foo"), LiteralRef::from("test"))))?; /// writer.finish()?; -/// assert_eq!(buffer, b"?foo\t?bar\n\"test\"\t"); +/// assert_eq!(buffer, b"?foo\t?bar\n\"test\"\t\n"); /// # std::io::Result::Ok(()) /// ``` #[must_use] diff --git a/server/src/main.rs b/server/src/main.rs index 80d2585b..b7cb578e 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -1354,7 +1354,7 @@ mod tests { .build(); server.test_body( request, - "s,p,o\r\nhttp://example.com,http://example.com,http://example.com", + "s,p,o\r\nhttp://example.com,http://example.com,http://example.com\r\n", ) } @@ -1429,7 +1429,7 @@ mod tests { .build(); server.test_body( request, - "s,p,o\r\nhttp://example.com,http://example.com,http://example.com", + "s,p,o\r\nhttp://example.com,http://example.com,http://example.com\r\n", ) } @@ -1454,7 +1454,7 @@ mod tests { .with_body("query=SELECT%20?s%20?p%20?o%20WHERE%20{%20?s%20?p%20?o%20}"); server.test_body( request, - "s,p,o\r\nhttp://example.com,http://example.com,http://example.com", + "s,p,o\r\nhttp://example.com,http://example.com,http://example.com\r\n", ) }