From 7d45ea43f5f64b576e57c687c728118f6a51b0f3 Mon Sep 17 00:00:00 2001 From: Tpt Date: Sun, 18 Feb 2024 21:36:15 +0100 Subject: [PATCH] Adds Tokio async to SPARQL XML results parser --- lib/sparesults/src/error.rs | 6 + lib/sparesults/src/parser.rs | 27 +- lib/sparesults/src/xml.rs | 862 +++++++++++++++++++++-------------- 3 files changed, 534 insertions(+), 361 deletions(-) diff --git a/lib/sparesults/src/error.rs b/lib/sparesults/src/error.rs index f26a3349..0c3eab4d 100644 --- a/lib/sparesults/src/error.rs +++ b/lib/sparesults/src/error.rs @@ -47,6 +47,12 @@ impl From for QueryResultsParseError { } } +impl From for QueryResultsParseError { + #[inline] + fn from(error: quick_xml::escape::EscapeError) -> Self { + quick_xml::Error::from(error).into() + } +} /// An error in the syntax of the parsed file. #[derive(Debug, thiserror::Error)] #[error(transparent)] diff --git a/lib/sparesults/src/parser.rs b/lib/sparesults/src/parser.rs index 602a5e61..2ac6d08f 100644 --- a/lib/sparesults/src/parser.rs +++ b/lib/sparesults/src/parser.rs @@ -7,7 +7,9 @@ use crate::json::{ FromTokioAsyncReadJsonQueryResultsReader, FromTokioAsyncReadJsonSolutionsReader, }; use crate::solution::QuerySolution; -use crate::xml::{XmlQueryResultsReader, XmlSolutionsReader}; +use crate::xml::{FromReadXmlQueryResultsReader, FromReadXmlSolutionsReader}; +#[cfg(feature = "async-tokio")] +use crate::xml::{FromTokioAsyncReadXmlQueryResultsReader, FromTokioAsyncReadXmlSolutionsReader}; use oxrdf::Variable; use std::io::Read; use std::sync::Arc; @@ -81,9 +83,9 @@ impl QueryResultsParser { reader: R, ) -> Result, QueryResultsParseError> { Ok(match self.format { - QueryResultsFormat::Xml => match XmlQueryResultsReader::read(reader)? { - XmlQueryResultsReader::Boolean(r) => FromReadQueryResultsReader::Boolean(r), - XmlQueryResultsReader::Solutions { + QueryResultsFormat::Xml => match FromReadXmlQueryResultsReader::read(reader)? { + FromReadXmlQueryResultsReader::Boolean(r) => FromReadQueryResultsReader::Boolean(r), + FromReadXmlQueryResultsReader::Solutions { solutions, variables, } => FromReadQueryResultsReader::Solutions(FromReadSolutionsReader { @@ -128,7 +130,7 @@ impl QueryResultsParser { /// Reads are automatically buffered. /// /// Example in XML (the API is the same for JSON and TSV): - /// ```no_run + /// ``` /// use sparesults::{QueryResultsFormat, QueryResultsParser, FromTokioAsyncReadQueryResultsReader}; /// use oxrdf::{Literal, Variable}; /// @@ -157,7 +159,16 @@ impl QueryResultsParser { reader: R, ) -> Result, QueryResultsParseError> { Ok(match self.format { - QueryResultsFormat::Xml => return Err(QueryResultsSyntaxError::msg("The XML query results parser does not support Tokio AsyncRead yet").into()), + QueryResultsFormat::Xml => match FromTokioAsyncReadXmlQueryResultsReader::read(reader).await? { + FromTokioAsyncReadXmlQueryResultsReader::Boolean(r) => FromTokioAsyncReadQueryResultsReader::Boolean(r), + FromTokioAsyncReadXmlQueryResultsReader::Solutions { + solutions, + variables, + } => FromTokioAsyncReadQueryResultsReader::Solutions(FromTokioAsyncReadSolutionsReader { + variables: variables.into(), + solutions: FromTokioAsyncReadSolutionsReaderKind::Xml(solutions), + }), + }, QueryResultsFormat::Json => match FromTokioAsyncReadJsonQueryResultsReader::read(reader).await? { FromTokioAsyncReadJsonQueryResultsReader::Boolean(r) => FromTokioAsyncReadQueryResultsReader::Boolean(r), FromTokioAsyncReadJsonQueryResultsReader::Solutions { @@ -248,7 +259,7 @@ pub struct FromReadSolutionsReader { } enum FromReadSolutionsReaderKind { - Xml(XmlSolutionsReader), + Xml(FromReadXmlSolutionsReader), Json(FromReadJsonSolutionsReader), Tsv(TsvSolutionsReader), } @@ -381,6 +392,7 @@ pub struct FromTokioAsyncReadSolutionsReader { #[cfg(feature = "async-tokio")] enum FromTokioAsyncReadSolutionsReaderKind { Json(FromTokioAsyncReadJsonSolutionsReader), + Xml(FromTokioAsyncReadXmlSolutionsReader), } #[cfg(feature = "async-tokio")] @@ -422,6 +434,7 @@ impl FromTokioAsyncReadSolutionsReader { Some( match &mut self.solutions { FromTokioAsyncReadSolutionsReaderKind::Json(reader) => reader.read_next().await, + FromTokioAsyncReadSolutionsReaderKind::Xml(reader) => reader.read_next().await, } .transpose()? .map(|values| (Arc::clone(&self.variables), values).into()), diff --git a/lib/sparesults/src/xml.rs b/lib/sparesults/src/xml.rs index 6eb861e2..d229c143 100644 --- a/lib/sparesults/src/xml.rs +++ b/lib/sparesults/src/xml.rs @@ -3,15 +3,15 @@ use crate::error::{QueryResultsParseError, QueryResultsSyntaxError}; use oxrdf::vocab::rdf; use oxrdf::*; +use quick_xml::escape::unescape; use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event}; -use quick_xml::{Reader, Writer}; -use std::borrow::Cow; +use quick_xml::{Decoder, Reader, Writer}; use std::collections::BTreeMap; use std::io::{self, BufReader, Read, Write}; -use std::str; +use std::mem::take; use std::sync::Arc; #[cfg(feature = "async-tokio")] -use tokio::io::AsyncWrite; +use tokio::io::{AsyncRead, AsyncWrite, BufReader as AsyncBufReader}; pub fn write_boolean_xml_result(write: W, value: bool) -> io::Result { let mut writer = Writer::new(write); @@ -154,7 +154,6 @@ impl InnerXmlSolutionsWriter { } #[allow(clippy::unused_self)] - fn write<'a>( &self, output: &mut Vec>, @@ -218,123 +217,276 @@ fn write_xml_term<'a>(output: &mut Vec>, term: TermRef<'a>) { } } -pub enum XmlQueryResultsReader { +pub enum FromReadXmlQueryResultsReader { Solutions { variables: Vec, - solutions: XmlSolutionsReader, + solutions: FromReadXmlSolutionsReader, }, Boolean(bool), } -impl XmlQueryResultsReader { - pub fn read(source: R) -> Result { - enum State { - Start, - Sparql, - Head, - AfterHead, - Boolean, +impl FromReadXmlQueryResultsReader { + pub fn read(read: R) -> Result { + let mut reader = Reader::from_reader(BufReader::new(read)); + reader.trim_text(true); + reader.expand_empty_elements(true); + let mut reader_buffer = Vec::new(); + let mut inner = XmlInnerQueryResultsReader { + state: ResultsState::Start, + variables: Vec::new(), + decoder: reader.decoder(), + }; + loop { + reader_buffer.clear(); + let event = reader.read_event_into(&mut reader_buffer)?; + if let Some(result) = inner.read_event(event)? { + return Ok(match result { + XmlInnerQueryResults::Solutions { + variables, + solutions, + } => Self::Solutions { + variables, + solutions: FromReadXmlSolutionsReader { + reader, + inner: solutions, + reader_buffer, + }, + }, + XmlInnerQueryResults::Boolean(value) => Self::Boolean(value), + }); + } + } + } +} + +pub struct FromReadXmlSolutionsReader { + reader: Reader>, + inner: XmlInnerSolutionsReader, + reader_buffer: Vec, +} + +impl FromReadXmlSolutionsReader { + pub fn read_next(&mut self) -> Result>>, QueryResultsParseError> { + loop { + self.reader_buffer.clear(); + let event = self.reader.read_event_into(&mut self.reader_buffer)?; + if event == Event::Eof { + return Ok(None); + } + if let Some(solution) = self.inner.read_event(event)? { + return Ok(Some(solution)); + } } + } +} - let mut reader = Reader::from_reader(BufReader::new(source)); +#[cfg(feature = "async-tokio")] +pub enum FromTokioAsyncReadXmlQueryResultsReader { + Solutions { + variables: Vec, + solutions: FromTokioAsyncReadXmlSolutionsReader, + }, + Boolean(bool), +} + +#[cfg(feature = "async-tokio")] +impl FromTokioAsyncReadXmlQueryResultsReader { + pub async fn read(read: R) -> Result { + let mut reader = Reader::from_reader(AsyncBufReader::new(read)); reader.trim_text(true); reader.expand_empty_elements(true); + let mut reader_buffer = Vec::new(); + let mut inner = XmlInnerQueryResultsReader { + state: ResultsState::Start, + variables: Vec::new(), + decoder: reader.decoder(), + }; + loop { + reader_buffer.clear(); + let event = reader.read_event_into_async(&mut reader_buffer).await?; + if let Some(result) = inner.read_event(event)? { + return Ok(match result { + XmlInnerQueryResults::Solutions { + variables, + solutions, + } => Self::Solutions { + variables, + solutions: FromTokioAsyncReadXmlSolutionsReader { + reader, + inner: solutions, + reader_buffer, + }, + }, + XmlInnerQueryResults::Boolean(value) => Self::Boolean(value), + }); + } + } + } +} - let mut buffer = Vec::default(); - let mut variables = Vec::default(); - let mut state = State::Start; +#[cfg(feature = "async-tokio")] +pub struct FromTokioAsyncReadXmlSolutionsReader { + reader: Reader>, + inner: XmlInnerSolutionsReader, + reader_buffer: Vec, +} - // Read header +#[cfg(feature = "async-tokio")] +impl FromTokioAsyncReadXmlSolutionsReader { + pub async fn read_next(&mut self) -> Result>>, QueryResultsParseError> { loop { - buffer.clear(); - let event = reader.read_event_into(&mut buffer)?; - match event { - Event::Start(event) => match state { - State::Start => { - if event.local_name().as_ref() == b"sparql" { - state = State::Sparql; - } else { - return Err(QueryResultsSyntaxError::msg(format!("Expecting tag, found <{}>", decode(&reader, &event.name())?)).into()); - } + self.reader_buffer.clear(); + let event = self + .reader + .read_event_into_async(&mut self.reader_buffer) + .await?; + if event == Event::Eof { + return Ok(None); + } + if let Some(solution) = self.inner.read_event(event)? { + return Ok(Some(solution)); + } + } + } +} + +enum XmlInnerQueryResults { + Solutions { + variables: Vec, + solutions: XmlInnerSolutionsReader, + }, + Boolean(bool), +} + +#[derive(Clone, Copy)] +enum ResultsState { + Start, + Sparql, + Head, + AfterHead, + Boolean, +} + +struct XmlInnerQueryResultsReader { + state: ResultsState, + variables: Vec, + decoder: Decoder, +} + +impl XmlInnerQueryResultsReader { + pub fn read_event( + &mut self, + event: Event<'_>, + ) -> Result, QueryResultsParseError> { + match event { + Event::Start(event) => match self.state { + ResultsState::Start => { + if event.local_name().as_ref() == b"sparql" { + self.state = ResultsState::Sparql; + Ok(None) + } else { + Err(QueryResultsSyntaxError::msg(format!("Expecting tag, found <{}>", self.decoder.decode(event.name().as_ref())?)).into()) } - State::Sparql => { - if event.local_name().as_ref() == b"head" { - state = State::Head; - } else { - return Err(QueryResultsSyntaxError::msg(format!("Expecting tag, found <{}>", decode(&reader, &event.name())?)).into()); - } + } + ResultsState::Sparql => { + if event.local_name().as_ref() == b"head" { + self.state = ResultsState::Head; + Ok(None) + } else { + Err(QueryResultsSyntaxError::msg(format!("Expecting tag, found <{}>", self.decoder.decode(event.name().as_ref())?)).into()) } - State::Head => { - if event.local_name().as_ref() == b"variable" { - let name = event.attributes() - .filter_map(Result::ok) - .find(|attr| attr.key.local_name().as_ref() == b"name") - .ok_or_else(|| QueryResultsSyntaxError::msg("No name attribute found for the tag"))? - .decode_and_unescape_value(&reader)?; - let variable = Variable::new(name).map_err(|e| QueryResultsSyntaxError::msg(format!("Invalid variable name: {e}")))?; - if variables.contains(&variable) { - return Err(QueryResultsSyntaxError::msg(format!( - "The variable {variable} is declared twice" - )) - .into()); - } - variables.push(variable); - } else if event.local_name().as_ref() == b"link" { - // no op - } else { - return Err(QueryResultsSyntaxError::msg(format!("Expecting or tag, found <{}>", decode(&reader, &event.name())?)).into()); + } + ResultsState::Head => { + if event.local_name().as_ref() == b"variable" { + let name = event.attributes() + .filter_map(Result::ok) + .find(|attr| attr.key.local_name().as_ref() == b"name") + .ok_or_else(|| QueryResultsSyntaxError::msg("No name attribute found for the tag"))?; + let name = unescape(&self.decoder.decode(&name.value)?)?.into_owned(); + let variable = Variable::new(name).map_err(|e| QueryResultsSyntaxError::msg(format!("Invalid variable name: {e}")))?; + if self.variables.contains(&variable) { + return Err(QueryResultsSyntaxError::msg(format!( + "The variable {variable} is declared twice" + )) + .into()); } + self.variables.push(variable); + Ok(None) + } else if event.local_name().as_ref() == b"link" { + // no op + Ok(None) + } else { + Err(QueryResultsSyntaxError::msg(format!("Expecting or tag, found <{}>", self.decoder.decode(event.name().as_ref())?)).into()) } - State::AfterHead => { - if event.local_name().as_ref() == b"boolean" { - state = State::Boolean - } else if event.local_name().as_ref() == b"results" { - let mut mapping = BTreeMap::default(); - for (i, var) in variables.iter().enumerate() { - mapping.insert(var.clone().into_string(), i); - } - return Ok(Self::Solutions { variables, - solutions: XmlSolutionsReader { - reader, - buffer, - mapping, - stack: Vec::new(), - subject_stack: Vec::new(), - predicate_stack: Vec::new(), - object_stack: Vec::new(), - }}); - } else if event.local_name().as_ref() != b"link" && event.local_name().as_ref() != b"results" && event.local_name().as_ref() != b"boolean" { - return Err(QueryResultsSyntaxError::msg(format!("Expecting sparql tag, found <{}>", decode(&reader, &event.name())?)).into()); + } + ResultsState::AfterHead => { + if event.local_name().as_ref() == b"boolean" { + self.state = ResultsState::Boolean; + Ok(None) + } else if event.local_name().as_ref() == b"results" { + let mut mapping = BTreeMap::default(); + for (i, var) in self.variables.iter().enumerate() { + mapping.insert(var.clone().into_string(), i); } + Ok(Some(XmlInnerQueryResults::Solutions { + variables: take(&mut self.variables), + solutions: XmlInnerSolutionsReader { + decoder: self.decoder, + mapping, + state_stack: vec![State::Start, State::Start], + new_bindings: Vec::new(), + current_var: None, + term: None, + lang: None, + datatype: None, + subject_stack: Vec::new(), + predicate_stack: Vec::new(), + object_stack: Vec::new(), + }, + })) + } else if event.local_name().as_ref() != b"link" && event.local_name().as_ref() != b"results" && event.local_name().as_ref() != b"boolean" { + Err(QueryResultsSyntaxError::msg(format!("Expecting sparql tag, found <{}>", self.decoder.decode(event.name().as_ref())?)).into()) + } else { + Ok(None) } - State::Boolean => return Err(QueryResultsSyntaxError::msg(format!("Unexpected tag inside of tag: <{}>", decode(&reader, &event.name())?)).into()) - }, - Event::Text(event) => { - let value = event.unescape()?; - return match state { - State::Boolean => { - return if value == "true" { - Ok(Self::Boolean(true)) - } else if value == "false" { - Ok(Self::Boolean(false)) - } else { - Err(QueryResultsSyntaxError::msg(format!("Unexpected boolean value. Found '{value}'")).into()) - }; - } - _ => Err(QueryResultsSyntaxError::msg(format!("Unexpected textual value found: '{value}'")).into()) - }; - }, - Event::End(event) => { - if let State::Head = state { - if event.local_name().as_ref() == b"head" { - state = State::AfterHead + } + ResultsState::Boolean => Err(QueryResultsSyntaxError::msg(format!("Unexpected tag inside of tag: <{}>", self.decoder.decode(event.name().as_ref())?)).into()) + }, + Event::Text(event) => { + let value = event.unescape()?; + match self.state { + ResultsState::Boolean => { + if value == "true" { + Ok(Some(XmlInnerQueryResults::Boolean(true))) + } else if value == "false" { + Ok(Some(XmlInnerQueryResults::Boolean(false))) + } else { + Err(QueryResultsSyntaxError::msg(format!("Unexpected boolean value. Found '{value}'")).into()) } - } else { - return Err(QueryResultsSyntaxError::msg("Unexpected early file end. All results file should have a and a or tag").into()); } - }, - Event::Eof => return Err(QueryResultsSyntaxError::msg("Unexpected early file end. All results file should have a and a or tag").into()), - _ => (), + _ => Err(QueryResultsSyntaxError::msg(format!("Unexpected textual value found: '{value}'")).into()) + } + } + Event::End(event) => { + if let ResultsState::Head = self.state { + if event.local_name().as_ref() == b"head" { + self.state = ResultsState::AfterHead + } + Ok(None) + } else { + Err(QueryResultsSyntaxError::msg("Unexpected early file end. All results file should have a and a or tag").into()) + } + } + Event::Eof => Err(QueryResultsSyntaxError::msg("Unexpected early file end. All results file should have a and a or tag").into()), + Event::Comment(_) | Event::Decl(_) | Event::PI(_) | Event::DocType(_) => { + Ok(None) + } + Event::Empty(_) => unreachable!("Empty events are expended"), + Event::CData(_) => { + Err(QueryResultsSyntaxError::msg( + " are not supported in SPARQL XML results", + ) + .into()) } } } @@ -351,285 +503,294 @@ enum State { Subject, Predicate, Object, - End, } -pub struct XmlSolutionsReader { - reader: Reader>, - buffer: Vec, +struct XmlInnerSolutionsReader { + decoder: Decoder, mapping: BTreeMap, - stack: Vec, + state_stack: Vec, + new_bindings: Vec>, + current_var: Option, + term: Option, + lang: Option, + datatype: Option, subject_stack: Vec, predicate_stack: Vec, object_stack: Vec, } -impl XmlSolutionsReader { - pub fn read_next(&mut self) -> Result>>, QueryResultsParseError> { - let mut state = State::Start; - - let mut new_bindings = vec![None; self.mapping.len()]; - - let mut current_var = None; - let mut term: Option = None; - let mut lang = None; - let mut datatype = None; - loop { - self.buffer.clear(); - let event = self.reader.read_event_into(&mut self.buffer)?; - match event { - Event::Start(event) => match state { - State::Start => { - if event.local_name().as_ref() == b"result" { - state = State::Result; - } else { - return Err(QueryResultsSyntaxError::msg(format!( - "Expecting , found <{}>", - decode(&self.reader, &event.name())? - )) - .into()); - } - } - State::Result => { - if event.local_name().as_ref() == b"binding" { - match event - .attributes() - .filter_map(Result::ok) - .find(|attr| attr.key.local_name().as_ref() == b"name") - { - Some(attr) => { - current_var = Some( - attr.decode_and_unescape_value(&self.reader)?.to_string(), - ) - } - None => { - return Err(QueryResultsSyntaxError::msg( - "No name attribute found for the tag", - ) - .into()); - } - } - state = State::Binding; - } else { - return Err(QueryResultsSyntaxError::msg(format!( - "Expecting , found <{}>", - decode(&self.reader, &event.name())? - )) - .into()); - } +impl XmlInnerSolutionsReader { + #[allow(clippy::unwrap_in_result)] + pub fn read_event( + &mut self, + event: Event<'_>, + ) -> Result>>, QueryResultsParseError> { + match event { + Event::Start(event) => match self.state_stack.last().unwrap() { + State::Start => { + if event.local_name().as_ref() == b"result" { + self.new_bindings = vec![None; self.mapping.len()]; + self.state_stack.push(State::Result); + Ok(None) + } else { + Err(QueryResultsSyntaxError::msg(format!( + "Expecting , found <{}>", + self.decoder.decode(event.name().as_ref())? + )) + .into()) } - State::Binding | State::Subject | State::Predicate | State::Object => { - if term.is_some() { + } + State::Result => { + if event.local_name().as_ref() == b"binding" { + let Some(attr) = event + .attributes() + .filter_map(Result::ok) + .find(|attr| attr.key.local_name().as_ref() == b"name") + else { return Err(QueryResultsSyntaxError::msg( - "There is already a value for the current binding", + "No name attribute found for the tag", ) .into()); - } - self.stack.push(state); - if event.local_name().as_ref() == b"uri" { - state = State::Uri; - } else if event.local_name().as_ref() == b"bnode" { - state = State::BNode; - } else if event.local_name().as_ref() == b"literal" { - for attr in event.attributes() { - let attr = attr.map_err(quick_xml::Error::from)?; - if attr.key.as_ref() == b"xml:lang" { - lang = Some( - attr.decode_and_unescape_value(&self.reader)?.to_string(), - ); - } else if attr.key.local_name().as_ref() == b"datatype" { - let iri = attr.decode_and_unescape_value(&self.reader)?; - datatype = - Some(NamedNode::new(iri.to_string()).map_err(|e| { - QueryResultsSyntaxError::msg(format!( - "Invalid datatype IRI '{iri}': {e}" - )) - })?); - } - } - state = State::Literal; - } else if event.local_name().as_ref() == b"triple" { - state = State::Triple; - } else { - return Err(QueryResultsSyntaxError::msg(format!( - "Expecting , or found <{}>", - decode(&self.reader, &event.name())? - )) - .into()); - } + }; + self.current_var = + Some(unescape(&self.decoder.decode(&attr.value)?)?.into_owned()); + self.state_stack.push(State::Binding); + Ok(None) + } else { + Err(QueryResultsSyntaxError::msg(format!( + "Expecting , found <{}>", + self.decoder.decode(event.name().as_ref())? + )) + .into()) } - State::Triple => { - if event.local_name().as_ref() == b"subject" { - state = State::Subject - } else if event.local_name().as_ref() == b"predicate" { - state = State::Predicate - } else if event.local_name().as_ref() == b"object" { - state = State::Object - } else { - return Err(QueryResultsSyntaxError::msg(format!( - "Expecting , or found <{}>", - decode(&self.reader, &event.name())? - )) - .into()); - } + } + State::Binding | State::Subject | State::Predicate | State::Object => { + if self.term.is_some() { + return Err(QueryResultsSyntaxError::msg( + "There is already a value for the current binding", + ) + .into()); } - _ => (), - }, - Event::Text(event) => { - let data = event.unescape()?; - match state { - State::Uri => { - term = Some( - NamedNode::new(data.to_string()) - .map_err(|e| { - QueryResultsSyntaxError::msg(format!( - "Invalid IRI value '{data}': {e}" - )) - })? - .into(), - ) - } - State::BNode => { - term = Some( - BlankNode::new(data.to_string()) - .map_err(|e| { + if event.local_name().as_ref() == b"uri" { + self.state_stack.push(State::Uri); + Ok(None) + } else if event.local_name().as_ref() == b"bnode" { + self.state_stack.push(State::BNode); + Ok(None) + } else if event.local_name().as_ref() == b"literal" { + for attr in event.attributes() { + let attr = attr.map_err(quick_xml::Error::from)?; + if attr.key.as_ref() == b"xml:lang" { + self.lang = Some( + unescape(&self.decoder.decode(&attr.value)?)?.into_owned(), + ); + } else if attr.key.local_name().as_ref() == b"datatype" { + let iri = self.decoder.decode(&attr.value)?; + let iri = unescape(&iri)?; + self.datatype = + Some(NamedNode::new(iri.as_ref()).map_err(|e| { QueryResultsSyntaxError::msg(format!( - "Invalid blank node value '{data}': {e}" + "Invalid datatype IRI '{iri}': {e}" )) - })? - .into(), - ) - } - State::Literal => { - term = Some(build_literal(data, lang.take(), datatype.take())?.into()); - } - _ => { - return Err(QueryResultsSyntaxError::msg(format!( - "Unexpected textual value found: {data}" - )) - .into()); - } - } - } - Event::End(_) => match state { - State::Start => state = State::End, - State::Result => return Ok(Some(new_bindings)), - State::Binding => { - if let Some(var) = ¤t_var { - if let Some(var) = self.mapping.get(var) { - new_bindings[*var] = term.take() - } else { - return Err( - QueryResultsSyntaxError::msg(format!("The variable '{var}' is used in a binding but not declared in the variables list")).into() - ); + })?); } - } else { - return Err(QueryResultsSyntaxError::msg( - "No name found for tag", - ) - .into()); - } - state = State::Result; - } - State::Subject => { - if let Some(subject) = term.take() { - self.subject_stack.push(subject) } - state = State::Triple; - } - State::Predicate => { - if let Some(predicate) = term.take() { - self.predicate_stack.push(predicate) - } - state = State::Triple; + self.state_stack.push(State::Literal); + Ok(None) + } else if event.local_name().as_ref() == b"triple" { + self.state_stack.push(State::Triple); + Ok(None) + } else { + Err(QueryResultsSyntaxError::msg(format!( + "Expecting , or found <{}>", + self.decoder.decode(event.name().as_ref())? + )) + .into()) } - State::Object => { - if let Some(object) = term.take() { - self.object_stack.push(object) - } - state = State::Triple; + } + State::Triple => { + if event.local_name().as_ref() == b"subject" { + self.state_stack.push(State::Subject); + Ok(None) + } else if event.local_name().as_ref() == b"predicate" { + self.state_stack.push(State::Predicate); + Ok(None) + } else if event.local_name().as_ref() == b"object" { + self.state_stack.push(State::Object); + Ok(None) + } else { + Err(QueryResultsSyntaxError::msg(format!( + "Expecting , or found <{}>", + self.decoder.decode(event.name().as_ref())? + )) + .into()) } + } + State::Uri => Err(QueryResultsSyntaxError::msg(format!( + " must only contain a string, found <{}>", + self.decoder.decode(event.name().as_ref())? + )) + .into()), + State::BNode => Err(QueryResultsSyntaxError::msg(format!( + " must only contain a string, found <{}>", + self.decoder.decode(event.name().as_ref())? + )) + .into()), + State::Literal => Err(QueryResultsSyntaxError::msg(format!( + " must only contain a string, found <{}>", + self.decoder.decode(event.name().as_ref())? + )) + .into()), + }, + Event::Text(event) => { + let data = event.unescape()?; + match self.state_stack.last().unwrap() { State::Uri => { - state = self - .stack - .pop() - .ok_or_else(|| QueryResultsSyntaxError::msg("Empty stack"))? + self.term = Some( + NamedNode::new(data.to_string()) + .map_err(|e| { + QueryResultsSyntaxError::msg(format!( + "Invalid IRI value '{data}': {e}" + )) + })? + .into(), + ); + Ok(None) } State::BNode => { - if term.is_none() { - // We default to a random bnode - term = Some(BlankNode::default().into()) - } - state = self - .stack - .pop() - .ok_or_else(|| QueryResultsSyntaxError::msg("Empty stack"))? + self.term = Some( + BlankNode::new(data.to_string()) + .map_err(|e| { + QueryResultsSyntaxError::msg(format!( + "Invalid blank node value '{data}': {e}" + )) + })? + .into(), + ); + Ok(None) } State::Literal => { - if term.is_none() { - // We default to the empty literal - term = Some(build_literal("", lang.take(), datatype.take())?.into()) - } - state = self - .stack - .pop() - .ok_or_else(|| QueryResultsSyntaxError::msg("Empty stack"))?; + self.term = Some( + build_literal(data, self.lang.take(), self.datatype.take())?.into(), + ); + Ok(None) } - State::Triple => { - #[cfg(feature = "rdf-star")] - if let (Some(subject), Some(predicate), Some(object)) = ( - self.subject_stack.pop(), - self.predicate_stack.pop(), - self.object_stack.pop(), - ) { - term = Some( - Triple::new( - match subject { - Term::NamedNode(subject) => subject.into(), - Term::BlankNode(subject) => subject.into(), - Term::Triple(subject) => Subject::Triple(subject), - Term::Literal(_) => { - return Err(QueryResultsSyntaxError::msg( - "The value should not be a ", - ) - .into()) - } - }, - match predicate { - Term::NamedNode(predicate) => predicate, - _ => { - return Err(QueryResultsSyntaxError::msg( - "The value should be an ", - ) - .into()) - } - }, - object, - ) - .into(), - ); - state = self - .stack - .pop() - .ok_or_else(|| QueryResultsSyntaxError::msg("Empty stack"))?; + _ => Err(QueryResultsSyntaxError::msg(format!( + "Unexpected textual value found: {data}" + )) + .into()), + } + } + Event::End(_) => match self.state_stack.pop().unwrap() { + State::Start | State::Uri => Ok(None), + State::Result => Ok(Some(take(&mut self.new_bindings))), + State::Binding => { + if let Some(var) = &self.current_var { + if let Some(var) = self.mapping.get(var) { + self.new_bindings[*var] = self.term.take() } else { return Err( - QueryResultsSyntaxError::msg("A should contain a , a and an ").into() + QueryResultsSyntaxError::msg(format!("The variable '{var}' is used in a binding but not declared in the variables list")).into() ); } - #[cfg(not(feature = "rdf-star"))] - { - return Err(QueryResultsSyntaxError::msg( - "The tag is only supported with RDF-star", + } else { + return Err(QueryResultsSyntaxError::msg( + "No name found for tag", + ) + .into()); + } + Ok(None) + } + State::Subject => { + if let Some(subject) = self.term.take() { + self.subject_stack.push(subject) + } + Ok(None) + } + State::Predicate => { + if let Some(predicate) = self.term.take() { + self.predicate_stack.push(predicate) + } + Ok(None) + } + State::Object => { + if let Some(object) = self.term.take() { + self.object_stack.push(object) + } + Ok(None) + } + State::BNode => { + if self.term.is_none() { + // We default to a random bnode + self.term = Some(BlankNode::default().into()) + } + Ok(None) + } + State::Literal => { + if self.term.is_none() { + // We default to the empty literal + self.term = + Some(build_literal("", self.lang.take(), self.datatype.take())?.into()) + } + Ok(None) + } + State::Triple => { + #[cfg(feature = "rdf-star")] + if let (Some(subject), Some(predicate), Some(object)) = ( + self.subject_stack.pop(), + self.predicate_stack.pop(), + self.object_stack.pop(), + ) { + self.term = Some( + Triple::new( + match subject { + Term::NamedNode(subject) => subject.into(), + Term::BlankNode(subject) => subject.into(), + Term::Triple(subject) => Subject::Triple(subject), + Term::Literal(_) => { + return Err(QueryResultsSyntaxError::msg( + "The value should not be a ", + ) + .into()); + } + }, + match predicate { + Term::NamedNode(predicate) => predicate, + _ => { + return Err(QueryResultsSyntaxError::msg( + "The value should be an ", + ) + .into()); + } + }, + object, ) - .into()); - } + .into(), + ); + Ok(None) + } else { + Err(QueryResultsSyntaxError::msg( + "A should contain a , a and an ", + ) + .into()) + } + #[cfg(not(feature = "rdf-star"))] + { + Err(QueryResultsSyntaxError::msg( + "The tag is only supported with RDF-star", + ) + .into()) } - State::End => (), - }, - Event::Eof => return Ok(None), - _ => (), + } + }, + Event::Eof | Event::Comment(_) | Event::Decl(_) | Event::PI(_) | Event::DocType(_) => { + Ok(None) } + Event::Empty(_) => unreachable!("Empty events are expended"), + Event::CData(_) => Err(QueryResultsSyntaxError::msg( + " are not supported in SPARQL XML results", + ) + .into()), } } } @@ -661,13 +822,6 @@ fn build_literal( } } -fn decode<'a, T>( - reader: &Reader, - data: &'a impl AsRef<[u8]>, -) -> Result, QueryResultsParseError> { - Ok(reader.decoder().decode(data.as_ref())?) -} - fn map_xml_error(error: quick_xml::Error) -> io::Error { match error { quick_xml::Error::Io(error) => {