sparesults: use Read instead of BufRead as input type

Allows to avoid an extra intermediate buffer for JSON and TSV
pull/643/head
Tpt 1 year ago committed by Thomas Tanon
parent d280f7d2f7
commit 38844f6436
  1. 18
      lib/sparesults/src/parser.rs
  2. 14
      lib/sparesults/src/xml.rs
  3. 23
      lib/src/sparql/model.rs
  4. 3
      lib/src/sparql/service.rs
  5. 5
      python/src/sparql.rs
  6. 11
      testsuite/src/sparql_evaluator.rs

@ -5,7 +5,7 @@ use crate::json::{JsonQueryResultsReader, JsonSolutionsReader};
use crate::solution::QuerySolution; use crate::solution::QuerySolution;
use crate::xml::{XmlQueryResultsReader, XmlSolutionsReader}; use crate::xml::{XmlQueryResultsReader, XmlSolutionsReader};
use oxrdf::Variable; use oxrdf::Variable;
use std::io::BufRead; use std::io::Read;
use std::rc::Rc; use std::rc::Rc;
/// Parsers for [SPARQL query](https://www.w3.org/TR/sparql11-query/) results serialization formats. /// Parsers for [SPARQL query](https://www.w3.org/TR/sparql11-query/) results serialization formats.
@ -47,6 +47,8 @@ impl QueryResultsParser {
/// Reads a result file. /// Reads a result file.
/// ///
/// Reads are buffered.
///
/// Example in XML (the API is the same for JSON and TSV): /// Example in XML (the API is the same for JSON and TSV):
/// ``` /// ```
/// use sparesults::{QueryResultsFormat, QueryResultsParser, FromReadQueryResultsReader}; /// use sparesults::{QueryResultsFormat, QueryResultsParser, FromReadQueryResultsReader};
@ -68,7 +70,7 @@ impl QueryResultsParser {
/// } /// }
/// # Result::<(),sparesults::ParseError>::Ok(()) /// # Result::<(),sparesults::ParseError>::Ok(())
/// ``` /// ```
pub fn parse_read<R: BufRead>( pub fn parse_read<R: Read>(
&self, &self,
reader: R, reader: R,
) -> Result<FromReadQueryResultsReader<R>, ParseError> { ) -> Result<FromReadQueryResultsReader<R>, ParseError> {
@ -108,7 +110,7 @@ impl QueryResultsParser {
} }
#[deprecated(note = "Use parse_read")] #[deprecated(note = "Use parse_read")]
pub fn read_results<R: BufRead>( pub fn read_results<R: Read>(
&self, &self,
reader: R, reader: R,
) -> Result<FromReadQueryResultsReader<R>, ParseError> { ) -> Result<FromReadQueryResultsReader<R>, ParseError> {
@ -141,7 +143,7 @@ impl QueryResultsParser {
/// } /// }
/// # Result::<(),sparesults::ParseError>::Ok(()) /// # Result::<(),sparesults::ParseError>::Ok(())
/// ``` /// ```
pub enum FromReadQueryResultsReader<R: BufRead> { pub enum FromReadQueryResultsReader<R: Read> {
Solutions(FromReadSolutionsReader<R>), Solutions(FromReadSolutionsReader<R>),
Boolean(bool), Boolean(bool),
} }
@ -165,18 +167,18 @@ pub enum FromReadQueryResultsReader<R: BufRead> {
/// # Result::<(),sparesults::ParseError>::Ok(()) /// # Result::<(),sparesults::ParseError>::Ok(())
/// ``` /// ```
#[allow(clippy::rc_buffer)] #[allow(clippy::rc_buffer)]
pub struct FromReadSolutionsReader<R: BufRead> { pub struct FromReadSolutionsReader<R: Read> {
variables: Rc<Vec<Variable>>, variables: Rc<Vec<Variable>>,
solutions: SolutionsReaderKind<R>, solutions: SolutionsReaderKind<R>,
} }
enum SolutionsReaderKind<R: BufRead> { enum SolutionsReaderKind<R: Read> {
Xml(XmlSolutionsReader<R>), Xml(XmlSolutionsReader<R>),
Json(JsonSolutionsReader<R>), Json(JsonSolutionsReader<R>),
Tsv(TsvSolutionsReader<R>), Tsv(TsvSolutionsReader<R>),
} }
impl<R: BufRead> FromReadSolutionsReader<R> { impl<R: Read> FromReadSolutionsReader<R> {
/// Ordered list of the declared variables at the beginning of the results. /// Ordered list of the declared variables at the beginning of the results.
/// ///
/// Example in TSV (the API is the same for JSON and XML): /// Example in TSV (the API is the same for JSON and XML):
@ -196,7 +198,7 @@ impl<R: BufRead> FromReadSolutionsReader<R> {
} }
} }
impl<R: BufRead> Iterator for FromReadSolutionsReader<R> { impl<R: Read> Iterator for FromReadSolutionsReader<R> {
type Item = Result<QuerySolution, ParseError>; type Item = Result<QuerySolution, ParseError>;
fn next(&mut self) -> Option<Result<QuerySolution, ParseError>> { fn next(&mut self) -> Option<Result<QuerySolution, ParseError>> {

@ -8,7 +8,7 @@ use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event};
use quick_xml::{Reader, Writer}; use quick_xml::{Reader, Writer};
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::io::{self, BufRead, Write}; use std::io::{self, BufReader, Read, Write};
use std::str; use std::str;
use std::sync::Arc; use std::sync::Arc;
@ -157,7 +157,7 @@ fn write_xml_term(
Ok(()) Ok(())
} }
pub enum XmlQueryResultsReader<R: BufRead> { pub enum XmlQueryResultsReader<R: Read> {
Solutions { Solutions {
variables: Vec<Variable>, variables: Vec<Variable>,
solutions: XmlSolutionsReader<R>, solutions: XmlSolutionsReader<R>,
@ -165,7 +165,7 @@ pub enum XmlQueryResultsReader<R: BufRead> {
Boolean(bool), Boolean(bool),
} }
impl<R: BufRead> XmlQueryResultsReader<R> { impl<R: Read> XmlQueryResultsReader<R> {
pub fn read(source: R) -> Result<Self, ParseError> { pub fn read(source: R) -> Result<Self, ParseError> {
enum State { enum State {
Start, Start,
@ -175,7 +175,7 @@ impl<R: BufRead> XmlQueryResultsReader<R> {
Boolean, Boolean,
} }
let mut reader = Reader::from_reader(source); let mut reader = Reader::from_reader(BufReader::new(source));
reader.trim_text(true); reader.trim_text(true);
reader.expand_empty_elements(true); reader.expand_empty_elements(true);
@ -293,8 +293,8 @@ enum State {
End, End,
} }
pub struct XmlSolutionsReader<R: BufRead> { pub struct XmlSolutionsReader<R: Read> {
reader: Reader<R>, reader: Reader<BufReader<R>>,
buffer: Vec<u8>, buffer: Vec<u8>,
mapping: BTreeMap<String, usize>, mapping: BTreeMap<String, usize>,
stack: Vec<State>, stack: Vec<State>,
@ -303,7 +303,7 @@ pub struct XmlSolutionsReader<R: BufRead> {
object_stack: Vec<Term>, object_stack: Vec<Term>,
} }
impl<R: BufRead> XmlSolutionsReader<R> { impl<R: Read> XmlSolutionsReader<R> {
pub fn read_next(&mut self) -> Result<Option<Vec<Option<Term>>>, ParseError> { pub fn read_next(&mut self) -> Result<Option<Vec<Option<Term>>>, ParseError> {
let mut state = State::Start; let mut state = State::Start;

@ -7,7 +7,7 @@ use crate::sparql::results::{
}; };
use oxrdf::{Variable, VariableRef}; use oxrdf::{Variable, VariableRef};
pub use sparesults::QuerySolution; pub use sparesults::QuerySolution;
use std::io::{BufRead, Write}; use std::io::{Read, Write};
use std::rc::Rc; use std::rc::Rc;
/// Results of a [SPARQL query](https://www.w3.org/TR/sparql11-query/). /// Results of a [SPARQL query](https://www.w3.org/TR/sparql11-query/).
@ -22,12 +22,9 @@ pub enum QueryResults {
impl QueryResults { impl QueryResults {
/// Reads a SPARQL query results serialization. /// Reads a SPARQL query results serialization.
pub fn read( pub fn read(read: impl Read + 'static, format: QueryResultsFormat) -> Result<Self, ParseError> {
reader: impl BufRead + 'static,
format: QueryResultsFormat,
) -> Result<Self, ParseError> {
Ok(QueryResultsParser::from_format(format) Ok(QueryResultsParser::from_format(format)
.parse_read(reader)? .parse_read(read)?
.into()) .into())
} }
@ -51,19 +48,19 @@ impl QueryResults {
/// ``` /// ```
pub fn write( pub fn write(
self, self,
writer: impl Write, write: impl Write,
format: QueryResultsFormat, format: QueryResultsFormat,
) -> Result<(), EvaluationError> { ) -> Result<(), EvaluationError> {
let serializer = QueryResultsSerializer::from_format(format); let serializer = QueryResultsSerializer::from_format(format);
match self { match self {
Self::Boolean(value) => { Self::Boolean(value) => {
serializer serializer
.serialize_boolean_to_write(writer, value) .serialize_boolean_to_write(write, value)
.map_err(EvaluationError::ResultsSerialization)?; .map_err(EvaluationError::ResultsSerialization)?;
} }
Self::Solutions(solutions) => { Self::Solutions(solutions) => {
let mut writer = serializer let mut writer = serializer
.serialize_solutions_to_write(writer, solutions.variables().to_vec()) .serialize_solutions_to_write(write, solutions.variables().to_vec())
.map_err(EvaluationError::ResultsSerialization)?; .map_err(EvaluationError::ResultsSerialization)?;
for solution in solutions { for solution in solutions {
writer writer
@ -80,7 +77,7 @@ impl QueryResults {
let o = VariableRef::new_unchecked("object"); let o = VariableRef::new_unchecked("object");
let mut writer = serializer let mut writer = serializer
.serialize_solutions_to_write( .serialize_solutions_to_write(
writer, write,
vec![s.into_owned(), p.into_owned(), o.into_owned()], vec![s.into_owned(), p.into_owned(), o.into_owned()],
) )
.map_err(EvaluationError::ResultsSerialization)?; .map_err(EvaluationError::ResultsSerialization)?;
@ -150,7 +147,7 @@ impl From<QuerySolutionIter> for QueryResults {
} }
} }
impl<R: BufRead + 'static> From<FromReadQueryResultsReader<R>> for QueryResults { impl<R: Read + 'static> From<FromReadQueryResultsReader<R>> for QueryResults {
fn from(reader: FromReadQueryResultsReader<R>) -> Self { fn from(reader: FromReadQueryResultsReader<R>) -> Self {
match reader { match reader {
FromReadQueryResultsReader::Solutions(s) => Self::Solutions(s.into()), FromReadQueryResultsReader::Solutions(s) => Self::Solutions(s.into()),
@ -211,7 +208,7 @@ impl QuerySolutionIter {
} }
} }
impl<R: BufRead + 'static> From<FromReadSolutionsReader<R>> for QuerySolutionIter { impl<R: Read + 'static> From<FromReadSolutionsReader<R>> for QuerySolutionIter {
fn from(reader: FromReadSolutionsReader<R>) -> Self { fn from(reader: FromReadSolutionsReader<R>) -> Self {
Self { Self {
variables: Rc::new(reader.variables().to_vec()), variables: Rc::new(reader.variables().to_vec()),
@ -279,10 +276,10 @@ mod tests {
#![allow(clippy::panic_in_result_fn)] #![allow(clippy::panic_in_result_fn)]
use super::*; use super::*;
use std::io::Cursor;
#[test] #[test]
fn test_serialization_roundtrip() -> Result<(), EvaluationError> { fn test_serialization_roundtrip() -> Result<(), EvaluationError> {
use std::io::Cursor;
use std::str; use std::str;
for format in [ for format in [

@ -5,7 +5,6 @@ use crate::sparql::http::Client;
use crate::sparql::model::QueryResults; use crate::sparql::model::QueryResults;
use crate::sparql::results::QueryResultsFormat; use crate::sparql::results::QueryResultsFormat;
use std::error::Error; use std::error::Error;
use std::io::BufReader;
use std::time::Duration; use std::time::Duration;
/// Handler for [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/) SERVICE. /// Handler for [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/) SERVICE.
@ -121,6 +120,6 @@ impl ServiceHandler for SimpleServiceHandler {
.map_err(|e| EvaluationError::Service(Box::new(e)))?; .map_err(|e| EvaluationError::Service(Box::new(e)))?;
let format = QueryResultsFormat::from_media_type(&content_type) let format = QueryResultsFormat::from_media_type(&content_type)
.ok_or_else(|| EvaluationError::UnsupportedContentType(content_type))?; .ok_or_else(|| EvaluationError::UnsupportedContentType(content_type))?;
Ok(QueryResults::read(BufReader::new(body), format)?) Ok(QueryResults::read(body, format)?)
} }
} }

@ -17,7 +17,6 @@ use pyo3::exceptions::{
}; };
use pyo3::prelude::*; use pyo3::prelude::*;
use pyo3::types::PyBytes; use pyo3::types::PyBytes;
use std::io::BufReader;
use std::path::PathBuf; use std::path::PathBuf;
use std::vec::IntoIter; use std::vec::IntoIter;
@ -191,7 +190,7 @@ pub struct PyQuerySolutions {
enum PyQuerySolutionsVariant { enum PyQuerySolutionsVariant {
Query(QuerySolutionIter), Query(QuerySolutionIter),
Reader { Reader {
iter: FromReadSolutionsReader<BufReader<PyReadable>>, iter: FromReadSolutionsReader<PyReadable>,
file_path: Option<PathBuf>, file_path: Option<PathBuf>,
}, },
} }
@ -504,7 +503,7 @@ pub fn parse_query_results(
PyReadable::from_data(input) PyReadable::from_data(input)
}; };
let results = QueryResultsParser::from_format(format) let results = QueryResultsParser::from_format(format)
.parse_read(BufReader::new(input)) .parse_read(input)
.map_err(|e| map_query_results_parse_error(e, file_path.clone()))?; .map_err(|e| map_query_results_parse_error(e, file_path.clone()))?;
Ok(match results { Ok(match results {
FromReadQueryResultsReader::Solutions(iter) => PyQuerySolutions { FromReadQueryResultsReader::Solutions(iter) => PyQuerySolutions {

@ -12,7 +12,7 @@ use oxigraph::store::Store;
use sparopt::Optimizer; use sparopt::Optimizer;
use std::collections::HashMap; use std::collections::HashMap;
use std::fmt::Write; use std::fmt::Write;
use std::io::{self, BufReader, Cursor}; use std::io::{self, Cursor};
use std::ops::Deref; use std::ops::Deref;
use std::str::FromStr; use std::str::FromStr;
use std::sync::{Arc, Mutex, OnceLock}; use std::sync::{Arc, Mutex, OnceLock};
@ -106,12 +106,12 @@ fn evaluate_positive_result_syntax_test(test: &Test, format: QueryResultsFormat)
.as_deref() .as_deref()
.ok_or_else(|| anyhow!("No action found"))?; .ok_or_else(|| anyhow!("No action found"))?;
let actual_results = StaticQueryResults::from_query_results( let actual_results = StaticQueryResults::from_query_results(
QueryResults::read(Cursor::new(read_file_to_string(action_file)?), format)?, QueryResults::read(read_file(action_file)?, format)?,
true, true,
)?; )?;
if let Some(result_file) = test.result.as_deref() { if let Some(result_file) = test.result.as_deref() {
let expected_results = StaticQueryResults::from_query_results( let expected_results = StaticQueryResults::from_query_results(
QueryResults::read(Cursor::new(read_file_to_string(result_file)?), format)?, QueryResults::read(read_file(result_file)?, format)?,
true, true,
)?; )?;
ensure!( ensure!(
@ -277,10 +277,7 @@ fn load_sparql_query_result(url: &str) -> Result<StaticQueryResults> {
.rsplit_once('.') .rsplit_once('.')
.and_then(|(_, extension)| QueryResultsFormat::from_extension(extension)) .and_then(|(_, extension)| QueryResultsFormat::from_extension(extension))
{ {
StaticQueryResults::from_query_results( StaticQueryResults::from_query_results(QueryResults::read(read_file(url)?, format)?, false)
QueryResults::read(BufReader::new(read_file(url)?), format)?,
false,
)
} else { } else {
StaticQueryResults::from_graph(&load_graph(url, guess_rdf_format(url)?, false)?) StaticQueryResults::from_graph(&load_graph(url, guess_rdf_format(url)?, false)?)
} }

Loading…
Cancel
Save