From 38844f6436524e72b69985fcc285fad0a91a4713 Mon Sep 17 00:00:00 2001 From: Tpt Date: Tue, 26 Sep 2023 22:04:57 +0200 Subject: [PATCH] sparesults: use Read instead of BufRead as input type Allows to avoid an extra intermediate buffer for JSON and TSV --- lib/sparesults/src/parser.rs | 18 ++++++++++-------- lib/sparesults/src/xml.rs | 14 +++++++------- lib/src/sparql/model.rs | 23 ++++++++++------------- lib/src/sparql/service.rs | 3 +-- python/src/sparql.rs | 5 ++--- testsuite/src/sparql_evaluator.rs | 11 ++++------- 6 files changed, 34 insertions(+), 40 deletions(-) diff --git a/lib/sparesults/src/parser.rs b/lib/sparesults/src/parser.rs index fd7a403d..8833f9ac 100644 --- a/lib/sparesults/src/parser.rs +++ b/lib/sparesults/src/parser.rs @@ -5,7 +5,7 @@ use crate::json::{JsonQueryResultsReader, JsonSolutionsReader}; use crate::solution::QuerySolution; use crate::xml::{XmlQueryResultsReader, XmlSolutionsReader}; use oxrdf::Variable; -use std::io::BufRead; +use std::io::Read; use std::rc::Rc; /// Parsers for [SPARQL query](https://www.w3.org/TR/sparql11-query/) results serialization formats. @@ -47,6 +47,8 @@ impl QueryResultsParser { /// Reads a result file. /// + /// Reads are buffered. + /// /// Example in XML (the API is the same for JSON and TSV): /// ``` /// use sparesults::{QueryResultsFormat, QueryResultsParser, FromReadQueryResultsReader}; @@ -68,7 +70,7 @@ impl QueryResultsParser { /// } /// # Result::<(),sparesults::ParseError>::Ok(()) /// ``` - pub fn parse_read( + pub fn parse_read( &self, reader: R, ) -> Result, ParseError> { @@ -108,7 +110,7 @@ impl QueryResultsParser { } #[deprecated(note = "Use parse_read")] - pub fn read_results( + pub fn read_results( &self, reader: R, ) -> Result, ParseError> { @@ -141,7 +143,7 @@ impl QueryResultsParser { /// } /// # Result::<(),sparesults::ParseError>::Ok(()) /// ``` -pub enum FromReadQueryResultsReader { +pub enum FromReadQueryResultsReader { Solutions(FromReadSolutionsReader), Boolean(bool), } @@ -165,18 +167,18 @@ pub enum FromReadQueryResultsReader { /// # Result::<(),sparesults::ParseError>::Ok(()) /// ``` #[allow(clippy::rc_buffer)] -pub struct FromReadSolutionsReader { +pub struct FromReadSolutionsReader { variables: Rc>, solutions: SolutionsReaderKind, } -enum SolutionsReaderKind { +enum SolutionsReaderKind { Xml(XmlSolutionsReader), Json(JsonSolutionsReader), Tsv(TsvSolutionsReader), } -impl FromReadSolutionsReader { +impl FromReadSolutionsReader { /// Ordered list of the declared variables at the beginning of the results. /// /// Example in TSV (the API is the same for JSON and XML): @@ -196,7 +198,7 @@ impl FromReadSolutionsReader { } } -impl Iterator for FromReadSolutionsReader { +impl Iterator for FromReadSolutionsReader { type Item = Result; fn next(&mut self) -> Option> { diff --git a/lib/sparesults/src/xml.rs b/lib/sparesults/src/xml.rs index 84e7e099..73d26fff 100644 --- a/lib/sparesults/src/xml.rs +++ b/lib/sparesults/src/xml.rs @@ -8,7 +8,7 @@ use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event}; use quick_xml::{Reader, Writer}; use std::borrow::Cow; use std::collections::BTreeMap; -use std::io::{self, BufRead, Write}; +use std::io::{self, BufReader, Read, Write}; use std::str; use std::sync::Arc; @@ -157,7 +157,7 @@ fn write_xml_term( Ok(()) } -pub enum XmlQueryResultsReader { +pub enum XmlQueryResultsReader { Solutions { variables: Vec, solutions: XmlSolutionsReader, @@ -165,7 +165,7 @@ pub enum XmlQueryResultsReader { Boolean(bool), } -impl XmlQueryResultsReader { +impl XmlQueryResultsReader { pub fn read(source: R) -> Result { enum State { Start, @@ -175,7 +175,7 @@ impl XmlQueryResultsReader { Boolean, } - let mut reader = Reader::from_reader(source); + let mut reader = Reader::from_reader(BufReader::new(source)); reader.trim_text(true); reader.expand_empty_elements(true); @@ -293,8 +293,8 @@ enum State { End, } -pub struct XmlSolutionsReader { - reader: Reader, +pub struct XmlSolutionsReader { + reader: Reader>, buffer: Vec, mapping: BTreeMap, stack: Vec, @@ -303,7 +303,7 @@ pub struct XmlSolutionsReader { object_stack: Vec, } -impl XmlSolutionsReader { +impl XmlSolutionsReader { pub fn read_next(&mut self) -> Result>>, ParseError> { let mut state = State::Start; diff --git a/lib/src/sparql/model.rs b/lib/src/sparql/model.rs index 8ef06ecb..0c7becc6 100644 --- a/lib/src/sparql/model.rs +++ b/lib/src/sparql/model.rs @@ -7,7 +7,7 @@ use crate::sparql::results::{ }; use oxrdf::{Variable, VariableRef}; pub use sparesults::QuerySolution; -use std::io::{BufRead, Write}; +use std::io::{Read, Write}; use std::rc::Rc; /// Results of a [SPARQL query](https://www.w3.org/TR/sparql11-query/). @@ -22,12 +22,9 @@ pub enum QueryResults { impl QueryResults { /// Reads a SPARQL query results serialization. - pub fn read( - reader: impl BufRead + 'static, - format: QueryResultsFormat, - ) -> Result { + pub fn read(read: impl Read + 'static, format: QueryResultsFormat) -> Result { Ok(QueryResultsParser::from_format(format) - .parse_read(reader)? + .parse_read(read)? .into()) } @@ -51,19 +48,19 @@ impl QueryResults { /// ``` pub fn write( self, - writer: impl Write, + write: impl Write, format: QueryResultsFormat, ) -> Result<(), EvaluationError> { let serializer = QueryResultsSerializer::from_format(format); match self { Self::Boolean(value) => { serializer - .serialize_boolean_to_write(writer, value) + .serialize_boolean_to_write(write, value) .map_err(EvaluationError::ResultsSerialization)?; } Self::Solutions(solutions) => { let mut writer = serializer - .serialize_solutions_to_write(writer, solutions.variables().to_vec()) + .serialize_solutions_to_write(write, solutions.variables().to_vec()) .map_err(EvaluationError::ResultsSerialization)?; for solution in solutions { writer @@ -80,7 +77,7 @@ impl QueryResults { let o = VariableRef::new_unchecked("object"); let mut writer = serializer .serialize_solutions_to_write( - writer, + write, vec![s.into_owned(), p.into_owned(), o.into_owned()], ) .map_err(EvaluationError::ResultsSerialization)?; @@ -150,7 +147,7 @@ impl From for QueryResults { } } -impl From> for QueryResults { +impl From> for QueryResults { fn from(reader: FromReadQueryResultsReader) -> Self { match reader { FromReadQueryResultsReader::Solutions(s) => Self::Solutions(s.into()), @@ -211,7 +208,7 @@ impl QuerySolutionIter { } } -impl From> for QuerySolutionIter { +impl From> for QuerySolutionIter { fn from(reader: FromReadSolutionsReader) -> Self { Self { variables: Rc::new(reader.variables().to_vec()), @@ -279,10 +276,10 @@ mod tests { #![allow(clippy::panic_in_result_fn)] use super::*; + use std::io::Cursor; #[test] fn test_serialization_roundtrip() -> Result<(), EvaluationError> { - use std::io::Cursor; use std::str; for format in [ diff --git a/lib/src/sparql/service.rs b/lib/src/sparql/service.rs index ae397ee2..dec189ae 100644 --- a/lib/src/sparql/service.rs +++ b/lib/src/sparql/service.rs @@ -5,7 +5,6 @@ use crate::sparql::http::Client; use crate::sparql::model::QueryResults; use crate::sparql::results::QueryResultsFormat; use std::error::Error; -use std::io::BufReader; use std::time::Duration; /// Handler for [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/) SERVICE. @@ -121,6 +120,6 @@ impl ServiceHandler for SimpleServiceHandler { .map_err(|e| EvaluationError::Service(Box::new(e)))?; let format = QueryResultsFormat::from_media_type(&content_type) .ok_or_else(|| EvaluationError::UnsupportedContentType(content_type))?; - Ok(QueryResults::read(BufReader::new(body), format)?) + Ok(QueryResults::read(body, format)?) } } diff --git a/python/src/sparql.rs b/python/src/sparql.rs index b30d27fe..963aa0df 100644 --- a/python/src/sparql.rs +++ b/python/src/sparql.rs @@ -17,7 +17,6 @@ use pyo3::exceptions::{ }; use pyo3::prelude::*; use pyo3::types::PyBytes; -use std::io::BufReader; use std::path::PathBuf; use std::vec::IntoIter; @@ -191,7 +190,7 @@ pub struct PyQuerySolutions { enum PyQuerySolutionsVariant { Query(QuerySolutionIter), Reader { - iter: FromReadSolutionsReader>, + iter: FromReadSolutionsReader, file_path: Option, }, } @@ -504,7 +503,7 @@ pub fn parse_query_results( PyReadable::from_data(input) }; let results = QueryResultsParser::from_format(format) - .parse_read(BufReader::new(input)) + .parse_read(input) .map_err(|e| map_query_results_parse_error(e, file_path.clone()))?; Ok(match results { FromReadQueryResultsReader::Solutions(iter) => PyQuerySolutions { diff --git a/testsuite/src/sparql_evaluator.rs b/testsuite/src/sparql_evaluator.rs index 67081be6..14bdb38d 100644 --- a/testsuite/src/sparql_evaluator.rs +++ b/testsuite/src/sparql_evaluator.rs @@ -12,7 +12,7 @@ use oxigraph::store::Store; use sparopt::Optimizer; use std::collections::HashMap; use std::fmt::Write; -use std::io::{self, BufReader, Cursor}; +use std::io::{self, Cursor}; use std::ops::Deref; use std::str::FromStr; use std::sync::{Arc, Mutex, OnceLock}; @@ -106,12 +106,12 @@ fn evaluate_positive_result_syntax_test(test: &Test, format: QueryResultsFormat) .as_deref() .ok_or_else(|| anyhow!("No action found"))?; let actual_results = StaticQueryResults::from_query_results( - QueryResults::read(Cursor::new(read_file_to_string(action_file)?), format)?, + QueryResults::read(read_file(action_file)?, format)?, true, )?; if let Some(result_file) = test.result.as_deref() { let expected_results = StaticQueryResults::from_query_results( - QueryResults::read(Cursor::new(read_file_to_string(result_file)?), format)?, + QueryResults::read(read_file(result_file)?, format)?, true, )?; ensure!( @@ -277,10 +277,7 @@ fn load_sparql_query_result(url: &str) -> Result { .rsplit_once('.') .and_then(|(_, extension)| QueryResultsFormat::from_extension(extension)) { - StaticQueryResults::from_query_results( - QueryResults::read(BufReader::new(read_file(url)?), format)?, - false, - ) + StaticQueryResults::from_query_results(QueryResults::read(read_file(url)?, format)?, false) } else { StaticQueryResults::from_graph(&load_graph(url, guess_rdf_format(url)?, false)?) }