|  |  |  | @ -1,10 +1,13 @@ | 
			
		
	
		
			
				
					|  |  |  |  | //! Implementation of [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/)
 | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | use crate::error::{ParseError, SyntaxError, SyntaxErrorKind}; | 
			
		
	
		
			
				
					|  |  |  |  | use memchr::memchr; | 
			
		
	
		
			
				
					|  |  |  |  | use oxrdf::Variable; | 
			
		
	
		
			
				
					|  |  |  |  | use oxrdf::{vocab::xsd, *}; | 
			
		
	
		
			
				
					|  |  |  |  | use std::io::{self, BufRead, Write}; | 
			
		
	
		
			
				
					|  |  |  |  | use std::str::FromStr; | 
			
		
	
		
			
				
					|  |  |  |  | use std::io::{self, BufRead, Read, Write}; | 
			
		
	
		
			
				
					|  |  |  |  | use std::str::{self, FromStr}; | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | const MAX_BUFFER_SIZE: usize = 4096 * 4096; | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | pub fn write_boolean_csv_result<W: Write>(mut sink: W, value: bool) -> io::Result<W> { | 
			
		
	
		
			
				
					|  |  |  |  |     sink.write_all(if value { b"true" } else { b"false" })?; | 
			
		
	
	
		
			
				
					|  |  |  | @ -271,7 +274,7 @@ fn is_turtle_double(value: &str) -> bool { | 
			
		
	
		
			
				
					|  |  |  |  |     (with_before || with_after) && !value.is_empty() && value.iter().all(u8::is_ascii_digit) | 
			
		
	
		
			
				
					|  |  |  |  | } | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | pub enum TsvQueryResultsReader<R: BufRead> { | 
			
		
	
		
			
				
					|  |  |  |  | pub enum TsvQueryResultsReader<R: Read> { | 
			
		
	
		
			
				
					|  |  |  |  |     Solutions { | 
			
		
	
		
			
				
					|  |  |  |  |         variables: Vec<Variable>, | 
			
		
	
		
			
				
					|  |  |  |  |         solutions: TsvSolutionsReader<R>, | 
			
		
	
	
		
			
				
					|  |  |  | @ -279,14 +282,13 @@ pub enum TsvQueryResultsReader<R: BufRead> { | 
			
		
	
		
			
				
					|  |  |  |  |     Boolean(bool), | 
			
		
	
		
			
				
					|  |  |  |  | } | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | impl<R: BufRead> TsvQueryResultsReader<R> { | 
			
		
	
		
			
				
					|  |  |  |  |     pub fn read(mut source: R) -> Result<Self, ParseError> { | 
			
		
	
		
			
				
					|  |  |  |  |         let mut buffer = String::new(); | 
			
		
	
		
			
				
					|  |  |  |  | impl<R: Read> TsvQueryResultsReader<R> { | 
			
		
	
		
			
				
					|  |  |  |  |     pub fn read(read: R) -> Result<Self, ParseError> { | 
			
		
	
		
			
				
					|  |  |  |  |         let mut reader = LineReader::new(read); | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |         // We read the header
 | 
			
		
	
		
			
				
					|  |  |  |  |         source.read_line(&mut buffer)?; | 
			
		
	
		
			
				
					|  |  |  |  |         let line = buffer | 
			
		
	
		
			
				
					|  |  |  |  |             .as_str() | 
			
		
	
		
			
				
					|  |  |  |  |         let line = reader | 
			
		
	
		
			
				
					|  |  |  |  |             .next_line()? | 
			
		
	
		
			
				
					|  |  |  |  |             .trim_matches(|c| matches!(c, ' ' | '\r' | '\n')); | 
			
		
	
		
			
				
					|  |  |  |  |         if line.eq_ignore_ascii_case("true") { | 
			
		
	
		
			
				
					|  |  |  |  |             return Ok(Self::Boolean(true)); | 
			
		
	
	
		
			
				
					|  |  |  | @ -316,29 +318,23 @@ impl<R: BufRead> TsvQueryResultsReader<R> { | 
			
		
	
		
			
				
					|  |  |  |  |         let column_len = variables.len(); | 
			
		
	
		
			
				
					|  |  |  |  |         Ok(Self::Solutions { | 
			
		
	
		
			
				
					|  |  |  |  |             variables, | 
			
		
	
		
			
				
					|  |  |  |  |             solutions: TsvSolutionsReader { | 
			
		
	
		
			
				
					|  |  |  |  |                 source, | 
			
		
	
		
			
				
					|  |  |  |  |                 buffer, | 
			
		
	
		
			
				
					|  |  |  |  |                 column_len, | 
			
		
	
		
			
				
					|  |  |  |  |             }, | 
			
		
	
		
			
				
					|  |  |  |  |             solutions: TsvSolutionsReader { reader, column_len }, | 
			
		
	
		
			
				
					|  |  |  |  |         }) | 
			
		
	
		
			
				
					|  |  |  |  |     } | 
			
		
	
		
			
				
					|  |  |  |  | } | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | pub struct TsvSolutionsReader<R: BufRead> { | 
			
		
	
		
			
				
					|  |  |  |  |     source: R, | 
			
		
	
		
			
				
					|  |  |  |  |     buffer: String, | 
			
		
	
		
			
				
					|  |  |  |  | pub struct TsvSolutionsReader<R: Read> { | 
			
		
	
		
			
				
					|  |  |  |  |     reader: LineReader<R>, | 
			
		
	
		
			
				
					|  |  |  |  |     column_len: usize, | 
			
		
	
		
			
				
					|  |  |  |  | } | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | impl<R: BufRead> TsvSolutionsReader<R> { | 
			
		
	
		
			
				
					|  |  |  |  |     pub fn read_next(&mut self) -> Result<Option<Vec<Option<Term>>>, ParseError> { | 
			
		
	
		
			
				
					|  |  |  |  |         self.buffer.clear(); | 
			
		
	
		
			
				
					|  |  |  |  |         if self.source.read_line(&mut self.buffer)? == 0 { | 
			
		
	
		
			
				
					|  |  |  |  |             return Ok(None); | 
			
		
	
		
			
				
					|  |  |  |  |         let line = self.reader.next_line()?; | 
			
		
	
		
			
				
					|  |  |  |  |         if line.is_empty() { | 
			
		
	
		
			
				
					|  |  |  |  |             return Ok(None); // EOF
 | 
			
		
	
		
			
				
					|  |  |  |  |         } | 
			
		
	
		
			
				
					|  |  |  |  |         let elements = self | 
			
		
	
		
			
				
					|  |  |  |  |             .buffer | 
			
		
	
		
			
				
					|  |  |  |  |         let elements = line | 
			
		
	
		
			
				
					|  |  |  |  |             .split('\t') | 
			
		
	
		
			
				
					|  |  |  |  |             .map(|v| { | 
			
		
	
		
			
				
					|  |  |  |  |                 let v = v.trim(); | 
			
		
	
	
		
			
				
					|  |  |  | @ -346,7 +342,10 @@ impl<R: BufRead> TsvSolutionsReader<R> { | 
			
		
	
		
			
				
					|  |  |  |  |                     Ok(None) | 
			
		
	
		
			
				
					|  |  |  |  |                 } else { | 
			
		
	
		
			
				
					|  |  |  |  |                     Ok(Some(Term::from_str(v).map_err(|e| SyntaxError { | 
			
		
	
		
			
				
					|  |  |  |  |                         inner: SyntaxErrorKind::Term(e), | 
			
		
	
		
			
				
					|  |  |  |  |                         inner: SyntaxErrorKind::Term { | 
			
		
	
		
			
				
					|  |  |  |  |                             error: e, | 
			
		
	
		
			
				
					|  |  |  |  |                             term: v.into(), | 
			
		
	
		
			
				
					|  |  |  |  |                         }, | 
			
		
	
		
			
				
					|  |  |  |  |                     })?)) | 
			
		
	
		
			
				
					|  |  |  |  |                 } | 
			
		
	
		
			
				
					|  |  |  |  |             }) | 
			
		
	
	
		
			
				
					|  |  |  | @ -357,16 +356,67 @@ impl<R: BufRead> TsvSolutionsReader<R> { | 
			
		
	
		
			
				
					|  |  |  |  |             Ok(Some(Vec::new())) // Zero columns case
 | 
			
		
	
		
			
				
					|  |  |  |  |         } else { | 
			
		
	
		
			
				
					|  |  |  |  |             Err(SyntaxError::msg(format!( | 
			
		
	
		
			
				
					|  |  |  |  |                 "This TSV files has {} columns but we found a row with {} columns: {:?}", | 
			
		
	
		
			
				
					|  |  |  |  |                 "This TSV files has {} columns but we found a row with {} columns: {}", | 
			
		
	
		
			
				
					|  |  |  |  |                 self.column_len, | 
			
		
	
		
			
				
					|  |  |  |  |                 elements.len(), | 
			
		
	
		
			
				
					|  |  |  |  |                 self.buffer | 
			
		
	
		
			
				
					|  |  |  |  |                 line | 
			
		
	
		
			
				
					|  |  |  |  |             )) | 
			
		
	
		
			
				
					|  |  |  |  |             .into()) | 
			
		
	
		
			
				
					|  |  |  |  |         } | 
			
		
	
		
			
				
					|  |  |  |  |     } | 
			
		
	
		
			
				
					|  |  |  |  | } | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | struct LineReader<R: Read> { | 
			
		
	
		
			
				
					|  |  |  |  |     read: R, | 
			
		
	
		
			
				
					|  |  |  |  |     buffer: Vec<u8>, | 
			
		
	
		
			
				
					|  |  |  |  |     start: usize, | 
			
		
	
		
			
				
					|  |  |  |  |     end: usize, | 
			
		
	
		
			
				
					|  |  |  |  | } | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | impl<R: Read> LineReader<R> { | 
			
		
	
		
			
				
					|  |  |  |  |     fn new(read: R) -> Self { | 
			
		
	
		
			
				
					|  |  |  |  |         Self { | 
			
		
	
		
			
				
					|  |  |  |  |             read, | 
			
		
	
		
			
				
					|  |  |  |  |             buffer: Vec::new(), | 
			
		
	
		
			
				
					|  |  |  |  |             start: 0, | 
			
		
	
		
			
				
					|  |  |  |  |             end: 0, | 
			
		
	
		
			
				
					|  |  |  |  |         } | 
			
		
	
		
			
				
					|  |  |  |  |     } | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |     fn next_line(&mut self) -> io::Result<&str> { | 
			
		
	
		
			
				
					|  |  |  |  |         self.buffer.copy_within(self.start..self.end, 0); | 
			
		
	
		
			
				
					|  |  |  |  |         self.end -= self.start; | 
			
		
	
		
			
				
					|  |  |  |  |         self.start = 0; | 
			
		
	
		
			
				
					|  |  |  |  |         let line_end = loop { | 
			
		
	
		
			
				
					|  |  |  |  |             if let Some(eol) = memchr(b'\n', &self.buffer[self.start..self.end]) { | 
			
		
	
		
			
				
					|  |  |  |  |                 break self.start + eol + 1; | 
			
		
	
		
			
				
					|  |  |  |  |             } | 
			
		
	
		
			
				
					|  |  |  |  |             if self.end + 1024 > self.buffer.len() { | 
			
		
	
		
			
				
					|  |  |  |  |                 if self.end + 1024 > MAX_BUFFER_SIZE { | 
			
		
	
		
			
				
					|  |  |  |  |                     return Err(io::Error::new( | 
			
		
	
		
			
				
					|  |  |  |  |                         io::ErrorKind::OutOfMemory, | 
			
		
	
		
			
				
					|  |  |  |  |                         format!("Reached the buffer maximal size of {MAX_BUFFER_SIZE}"), | 
			
		
	
		
			
				
					|  |  |  |  |                     )); | 
			
		
	
		
			
				
					|  |  |  |  |                 } | 
			
		
	
		
			
				
					|  |  |  |  |                 self.buffer.resize(self.end + 1024, b'\0'); | 
			
		
	
		
			
				
					|  |  |  |  |             } | 
			
		
	
		
			
				
					|  |  |  |  |             let read = self.read.read(&mut self.buffer[self.end..])?; | 
			
		
	
		
			
				
					|  |  |  |  |             if read == 0 { | 
			
		
	
		
			
				
					|  |  |  |  |                 break self.end; | 
			
		
	
		
			
				
					|  |  |  |  |             } | 
			
		
	
		
			
				
					|  |  |  |  |             self.end += read; | 
			
		
	
		
			
				
					|  |  |  |  |         }; | 
			
		
	
		
			
				
					|  |  |  |  |         let result = str::from_utf8(&self.buffer[self.start..line_end]).map_err(|e| { | 
			
		
	
		
			
				
					|  |  |  |  |             io::Error::new( | 
			
		
	
		
			
				
					|  |  |  |  |                 io::ErrorKind::InvalidData, | 
			
		
	
		
			
				
					|  |  |  |  |                 format!("Invalid UTF-8 in the TSV file: {e}"), | 
			
		
	
		
			
				
					|  |  |  |  |             ) | 
			
		
	
		
			
				
					|  |  |  |  |         }); | 
			
		
	
		
			
				
					|  |  |  |  |         self.start = line_end; | 
			
		
	
		
			
				
					|  |  |  |  |         result | 
			
		
	
		
			
				
					|  |  |  |  |     } | 
			
		
	
		
			
				
					|  |  |  |  | } | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | #[cfg(test)] | 
			
		
	
		
			
				
					|  |  |  |  | mod tests { | 
			
		
	
		
			
				
					|  |  |  |  |     use super::*; | 
			
		
	
	
		
			
				
					|  |  |  | 
 |