Adds Tokio async to SPARQL XML results parser

pull/776/merge
Tpt 7 months ago committed by Thomas Tanon
parent c13cb8db7c
commit 7d45ea43f5
  1. 6
      lib/sparesults/src/error.rs
  2. 27
      lib/sparesults/src/parser.rs
  3. 512
      lib/sparesults/src/xml.rs

@ -47,6 +47,12 @@ impl From<quick_xml::Error> for QueryResultsParseError {
} }
} }
impl From<quick_xml::escape::EscapeError> for QueryResultsParseError {
#[inline]
fn from(error: quick_xml::escape::EscapeError) -> Self {
quick_xml::Error::from(error).into()
}
}
/// An error in the syntax of the parsed file. /// An error in the syntax of the parsed file.
#[derive(Debug, thiserror::Error)] #[derive(Debug, thiserror::Error)]
#[error(transparent)] #[error(transparent)]

@ -7,7 +7,9 @@ use crate::json::{
FromTokioAsyncReadJsonQueryResultsReader, FromTokioAsyncReadJsonSolutionsReader, FromTokioAsyncReadJsonQueryResultsReader, FromTokioAsyncReadJsonSolutionsReader,
}; };
use crate::solution::QuerySolution; use crate::solution::QuerySolution;
use crate::xml::{XmlQueryResultsReader, XmlSolutionsReader}; use crate::xml::{FromReadXmlQueryResultsReader, FromReadXmlSolutionsReader};
#[cfg(feature = "async-tokio")]
use crate::xml::{FromTokioAsyncReadXmlQueryResultsReader, FromTokioAsyncReadXmlSolutionsReader};
use oxrdf::Variable; use oxrdf::Variable;
use std::io::Read; use std::io::Read;
use std::sync::Arc; use std::sync::Arc;
@ -81,9 +83,9 @@ impl QueryResultsParser {
reader: R, reader: R,
) -> Result<FromReadQueryResultsReader<R>, QueryResultsParseError> { ) -> Result<FromReadQueryResultsReader<R>, QueryResultsParseError> {
Ok(match self.format { Ok(match self.format {
QueryResultsFormat::Xml => match XmlQueryResultsReader::read(reader)? { QueryResultsFormat::Xml => match FromReadXmlQueryResultsReader::read(reader)? {
XmlQueryResultsReader::Boolean(r) => FromReadQueryResultsReader::Boolean(r), FromReadXmlQueryResultsReader::Boolean(r) => FromReadQueryResultsReader::Boolean(r),
XmlQueryResultsReader::Solutions { FromReadXmlQueryResultsReader::Solutions {
solutions, solutions,
variables, variables,
} => FromReadQueryResultsReader::Solutions(FromReadSolutionsReader { } => FromReadQueryResultsReader::Solutions(FromReadSolutionsReader {
@ -128,7 +130,7 @@ impl QueryResultsParser {
/// Reads are automatically buffered. /// Reads are automatically buffered.
/// ///
/// Example in XML (the API is the same for JSON and TSV): /// Example in XML (the API is the same for JSON and TSV):
/// ```no_run /// ```
/// use sparesults::{QueryResultsFormat, QueryResultsParser, FromTokioAsyncReadQueryResultsReader}; /// use sparesults::{QueryResultsFormat, QueryResultsParser, FromTokioAsyncReadQueryResultsReader};
/// use oxrdf::{Literal, Variable}; /// use oxrdf::{Literal, Variable};
/// ///
@ -157,7 +159,16 @@ impl QueryResultsParser {
reader: R, reader: R,
) -> Result<FromTokioAsyncReadQueryResultsReader<R>, QueryResultsParseError> { ) -> Result<FromTokioAsyncReadQueryResultsReader<R>, QueryResultsParseError> {
Ok(match self.format { Ok(match self.format {
QueryResultsFormat::Xml => return Err(QueryResultsSyntaxError::msg("The XML query results parser does not support Tokio AsyncRead yet").into()), QueryResultsFormat::Xml => match FromTokioAsyncReadXmlQueryResultsReader::read(reader).await? {
FromTokioAsyncReadXmlQueryResultsReader::Boolean(r) => FromTokioAsyncReadQueryResultsReader::Boolean(r),
FromTokioAsyncReadXmlQueryResultsReader::Solutions {
solutions,
variables,
} => FromTokioAsyncReadQueryResultsReader::Solutions(FromTokioAsyncReadSolutionsReader {
variables: variables.into(),
solutions: FromTokioAsyncReadSolutionsReaderKind::Xml(solutions),
}),
},
QueryResultsFormat::Json => match FromTokioAsyncReadJsonQueryResultsReader::read(reader).await? { QueryResultsFormat::Json => match FromTokioAsyncReadJsonQueryResultsReader::read(reader).await? {
FromTokioAsyncReadJsonQueryResultsReader::Boolean(r) => FromTokioAsyncReadQueryResultsReader::Boolean(r), FromTokioAsyncReadJsonQueryResultsReader::Boolean(r) => FromTokioAsyncReadQueryResultsReader::Boolean(r),
FromTokioAsyncReadJsonQueryResultsReader::Solutions { FromTokioAsyncReadJsonQueryResultsReader::Solutions {
@ -248,7 +259,7 @@ pub struct FromReadSolutionsReader<R: Read> {
} }
enum FromReadSolutionsReaderKind<R: Read> { enum FromReadSolutionsReaderKind<R: Read> {
Xml(XmlSolutionsReader<R>), Xml(FromReadXmlSolutionsReader<R>),
Json(FromReadJsonSolutionsReader<R>), Json(FromReadJsonSolutionsReader<R>),
Tsv(TsvSolutionsReader<R>), Tsv(TsvSolutionsReader<R>),
} }
@ -381,6 +392,7 @@ pub struct FromTokioAsyncReadSolutionsReader<R: AsyncRead + Unpin> {
#[cfg(feature = "async-tokio")] #[cfg(feature = "async-tokio")]
enum FromTokioAsyncReadSolutionsReaderKind<R: AsyncRead + Unpin> { enum FromTokioAsyncReadSolutionsReaderKind<R: AsyncRead + Unpin> {
Json(FromTokioAsyncReadJsonSolutionsReader<R>), Json(FromTokioAsyncReadJsonSolutionsReader<R>),
Xml(FromTokioAsyncReadXmlSolutionsReader<R>),
} }
#[cfg(feature = "async-tokio")] #[cfg(feature = "async-tokio")]
@ -422,6 +434,7 @@ impl<R: AsyncRead + Unpin> FromTokioAsyncReadSolutionsReader<R> {
Some( Some(
match &mut self.solutions { match &mut self.solutions {
FromTokioAsyncReadSolutionsReaderKind::Json(reader) => reader.read_next().await, FromTokioAsyncReadSolutionsReaderKind::Json(reader) => reader.read_next().await,
FromTokioAsyncReadSolutionsReaderKind::Xml(reader) => reader.read_next().await,
} }
.transpose()? .transpose()?
.map(|values| (Arc::clone(&self.variables), values).into()), .map(|values| (Arc::clone(&self.variables), values).into()),

@ -3,15 +3,15 @@
use crate::error::{QueryResultsParseError, QueryResultsSyntaxError}; use crate::error::{QueryResultsParseError, QueryResultsSyntaxError};
use oxrdf::vocab::rdf; use oxrdf::vocab::rdf;
use oxrdf::*; use oxrdf::*;
use quick_xml::escape::unescape;
use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event}; use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event};
use quick_xml::{Reader, Writer}; use quick_xml::{Decoder, Reader, Writer};
use std::borrow::Cow;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::io::{self, BufReader, Read, Write}; use std::io::{self, BufReader, Read, Write};
use std::str; use std::mem::take;
use std::sync::Arc; use std::sync::Arc;
#[cfg(feature = "async-tokio")] #[cfg(feature = "async-tokio")]
use tokio::io::AsyncWrite; use tokio::io::{AsyncRead, AsyncWrite, BufReader as AsyncBufReader};
pub fn write_boolean_xml_result<W: Write>(write: W, value: bool) -> io::Result<W> { pub fn write_boolean_xml_result<W: Write>(write: W, value: bool) -> io::Result<W> {
let mut writer = Writer::new(write); let mut writer = Writer::new(write);
@ -154,7 +154,6 @@ impl InnerXmlSolutionsWriter {
} }
#[allow(clippy::unused_self)] #[allow(clippy::unused_self)]
fn write<'a>( fn write<'a>(
&self, &self,
output: &mut Vec<Event<'a>>, output: &mut Vec<Event<'a>>,
@ -218,17 +217,149 @@ fn write_xml_term<'a>(output: &mut Vec<Event<'a>>, term: TermRef<'a>) {
} }
} }
pub enum XmlQueryResultsReader<R: Read> { pub enum FromReadXmlQueryResultsReader<R: Read> {
Solutions { Solutions {
variables: Vec<Variable>, variables: Vec<Variable>,
solutions: XmlSolutionsReader<R>, solutions: FromReadXmlSolutionsReader<R>,
}, },
Boolean(bool), Boolean(bool),
} }
impl<R: Read> XmlQueryResultsReader<R> { impl<R: Read> FromReadXmlQueryResultsReader<R> {
pub fn read(source: R) -> Result<Self, QueryResultsParseError> { pub fn read(read: R) -> Result<Self, QueryResultsParseError> {
enum State { let mut reader = Reader::from_reader(BufReader::new(read));
reader.trim_text(true);
reader.expand_empty_elements(true);
let mut reader_buffer = Vec::new();
let mut inner = XmlInnerQueryResultsReader {
state: ResultsState::Start,
variables: Vec::new(),
decoder: reader.decoder(),
};
loop {
reader_buffer.clear();
let event = reader.read_event_into(&mut reader_buffer)?;
if let Some(result) = inner.read_event(event)? {
return Ok(match result {
XmlInnerQueryResults::Solutions {
variables,
solutions,
} => Self::Solutions {
variables,
solutions: FromReadXmlSolutionsReader {
reader,
inner: solutions,
reader_buffer,
},
},
XmlInnerQueryResults::Boolean(value) => Self::Boolean(value),
});
}
}
}
}
pub struct FromReadXmlSolutionsReader<R: Read> {
reader: Reader<BufReader<R>>,
inner: XmlInnerSolutionsReader,
reader_buffer: Vec<u8>,
}
impl<R: Read> FromReadXmlSolutionsReader<R> {
pub fn read_next(&mut self) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> {
loop {
self.reader_buffer.clear();
let event = self.reader.read_event_into(&mut self.reader_buffer)?;
if event == Event::Eof {
return Ok(None);
}
if let Some(solution) = self.inner.read_event(event)? {
return Ok(Some(solution));
}
}
}
}
#[cfg(feature = "async-tokio")]
pub enum FromTokioAsyncReadXmlQueryResultsReader<R: AsyncRead + Unpin> {
Solutions {
variables: Vec<Variable>,
solutions: FromTokioAsyncReadXmlSolutionsReader<R>,
},
Boolean(bool),
}
#[cfg(feature = "async-tokio")]
impl<R: AsyncRead + Unpin> FromTokioAsyncReadXmlQueryResultsReader<R> {
pub async fn read(read: R) -> Result<Self, QueryResultsParseError> {
let mut reader = Reader::from_reader(AsyncBufReader::new(read));
reader.trim_text(true);
reader.expand_empty_elements(true);
let mut reader_buffer = Vec::new();
let mut inner = XmlInnerQueryResultsReader {
state: ResultsState::Start,
variables: Vec::new(),
decoder: reader.decoder(),
};
loop {
reader_buffer.clear();
let event = reader.read_event_into_async(&mut reader_buffer).await?;
if let Some(result) = inner.read_event(event)? {
return Ok(match result {
XmlInnerQueryResults::Solutions {
variables,
solutions,
} => Self::Solutions {
variables,
solutions: FromTokioAsyncReadXmlSolutionsReader {
reader,
inner: solutions,
reader_buffer,
},
},
XmlInnerQueryResults::Boolean(value) => Self::Boolean(value),
});
}
}
}
}
#[cfg(feature = "async-tokio")]
pub struct FromTokioAsyncReadXmlSolutionsReader<R: AsyncRead + Unpin> {
reader: Reader<AsyncBufReader<R>>,
inner: XmlInnerSolutionsReader,
reader_buffer: Vec<u8>,
}
#[cfg(feature = "async-tokio")]
impl<R: AsyncRead + Unpin> FromTokioAsyncReadXmlSolutionsReader<R> {
pub async fn read_next(&mut self) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> {
loop {
self.reader_buffer.clear();
let event = self
.reader
.read_event_into_async(&mut self.reader_buffer)
.await?;
if event == Event::Eof {
return Ok(None);
}
if let Some(solution) = self.inner.read_event(event)? {
return Ok(Some(solution));
}
}
}
}
enum XmlInnerQueryResults {
Solutions {
variables: Vec<Variable>,
solutions: XmlInnerSolutionsReader,
},
Boolean(bool),
}
#[derive(Clone, Copy)]
enum ResultsState {
Start, Start,
Sparql, Sparql,
Head, Head,
@ -236,105 +367,126 @@ impl<R: Read> XmlQueryResultsReader<R> {
Boolean, Boolean,
} }
let mut reader = Reader::from_reader(BufReader::new(source)); struct XmlInnerQueryResultsReader {
reader.trim_text(true); state: ResultsState,
reader.expand_empty_elements(true); variables: Vec<Variable>,
decoder: Decoder,
let mut buffer = Vec::default(); }
let mut variables = Vec::default();
let mut state = State::Start;
// Read header impl XmlInnerQueryResultsReader {
loop { pub fn read_event(
buffer.clear(); &mut self,
let event = reader.read_event_into(&mut buffer)?; event: Event<'_>,
) -> Result<Option<XmlInnerQueryResults>, QueryResultsParseError> {
match event { match event {
Event::Start(event) => match state { Event::Start(event) => match self.state {
State::Start => { ResultsState::Start => {
if event.local_name().as_ref() == b"sparql" { if event.local_name().as_ref() == b"sparql" {
state = State::Sparql; self.state = ResultsState::Sparql;
Ok(None)
} else { } else {
return Err(QueryResultsSyntaxError::msg(format!("Expecting <sparql> tag, found <{}>", decode(&reader, &event.name())?)).into()); Err(QueryResultsSyntaxError::msg(format!("Expecting <sparql> tag, found <{}>", self.decoder.decode(event.name().as_ref())?)).into())
} }
} }
State::Sparql => { ResultsState::Sparql => {
if event.local_name().as_ref() == b"head" { if event.local_name().as_ref() == b"head" {
state = State::Head; self.state = ResultsState::Head;
Ok(None)
} else { } else {
return Err(QueryResultsSyntaxError::msg(format!("Expecting <head> tag, found <{}>", decode(&reader, &event.name())?)).into()); Err(QueryResultsSyntaxError::msg(format!("Expecting <head> tag, found <{}>", self.decoder.decode(event.name().as_ref())?)).into())
} }
} }
State::Head => { ResultsState::Head => {
if event.local_name().as_ref() == b"variable" { if event.local_name().as_ref() == b"variable" {
let name = event.attributes() let name = event.attributes()
.filter_map(Result::ok) .filter_map(Result::ok)
.find(|attr| attr.key.local_name().as_ref() == b"name") .find(|attr| attr.key.local_name().as_ref() == b"name")
.ok_or_else(|| QueryResultsSyntaxError::msg("No name attribute found for the <variable> tag"))? .ok_or_else(|| QueryResultsSyntaxError::msg("No name attribute found for the <variable> tag"))?;
.decode_and_unescape_value(&reader)?; let name = unescape(&self.decoder.decode(&name.value)?)?.into_owned();
let variable = Variable::new(name).map_err(|e| QueryResultsSyntaxError::msg(format!("Invalid variable name: {e}")))?; let variable = Variable::new(name).map_err(|e| QueryResultsSyntaxError::msg(format!("Invalid variable name: {e}")))?;
if variables.contains(&variable) { if self.variables.contains(&variable) {
return Err(QueryResultsSyntaxError::msg(format!( return Err(QueryResultsSyntaxError::msg(format!(
"The variable {variable} is declared twice" "The variable {variable} is declared twice"
)) ))
.into()); .into());
} }
variables.push(variable); self.variables.push(variable);
Ok(None)
} else if event.local_name().as_ref() == b"link" { } else if event.local_name().as_ref() == b"link" {
// no op // no op
Ok(None)
} else { } else {
return Err(QueryResultsSyntaxError::msg(format!("Expecting <variable> or <link> tag, found <{}>", decode(&reader, &event.name())?)).into()); Err(QueryResultsSyntaxError::msg(format!("Expecting <variable> or <link> tag, found <{}>", self.decoder.decode(event.name().as_ref())?)).into())
} }
} }
State::AfterHead => { ResultsState::AfterHead => {
if event.local_name().as_ref() == b"boolean" { if event.local_name().as_ref() == b"boolean" {
state = State::Boolean self.state = ResultsState::Boolean;
Ok(None)
} else if event.local_name().as_ref() == b"results" { } else if event.local_name().as_ref() == b"results" {
let mut mapping = BTreeMap::default(); let mut mapping = BTreeMap::default();
for (i, var) in variables.iter().enumerate() { for (i, var) in self.variables.iter().enumerate() {
mapping.insert(var.clone().into_string(), i); mapping.insert(var.clone().into_string(), i);
} }
return Ok(Self::Solutions { variables, Ok(Some(XmlInnerQueryResults::Solutions {
solutions: XmlSolutionsReader { variables: take(&mut self.variables),
reader, solutions: XmlInnerSolutionsReader {
buffer, decoder: self.decoder,
mapping, mapping,
stack: Vec::new(), state_stack: vec![State::Start, State::Start],
new_bindings: Vec::new(),
current_var: None,
term: None,
lang: None,
datatype: None,
subject_stack: Vec::new(), subject_stack: Vec::new(),
predicate_stack: Vec::new(), predicate_stack: Vec::new(),
object_stack: Vec::new(), object_stack: Vec::new(),
}}); },
}))
} else if event.local_name().as_ref() != b"link" && event.local_name().as_ref() != b"results" && event.local_name().as_ref() != b"boolean" { } else if event.local_name().as_ref() != b"link" && event.local_name().as_ref() != b"results" && event.local_name().as_ref() != b"boolean" {
return Err(QueryResultsSyntaxError::msg(format!("Expecting sparql tag, found <{}>", decode(&reader, &event.name())?)).into()); Err(QueryResultsSyntaxError::msg(format!("Expecting sparql tag, found <{}>", self.decoder.decode(event.name().as_ref())?)).into())
} else {
Ok(None)
} }
} }
State::Boolean => return Err(QueryResultsSyntaxError::msg(format!("Unexpected tag inside of <boolean> tag: <{}>", decode(&reader, &event.name())?)).into()) ResultsState::Boolean => Err(QueryResultsSyntaxError::msg(format!("Unexpected tag inside of <boolean> tag: <{}>", self.decoder.decode(event.name().as_ref())?)).into())
}, },
Event::Text(event) => { Event::Text(event) => {
let value = event.unescape()?; let value = event.unescape()?;
return match state { match self.state {
State::Boolean => { ResultsState::Boolean => {
return if value == "true" { if value == "true" {
Ok(Self::Boolean(true)) Ok(Some(XmlInnerQueryResults::Boolean(true)))
} else if value == "false" { } else if value == "false" {
Ok(Self::Boolean(false)) Ok(Some(XmlInnerQueryResults::Boolean(false)))
} else { } else {
Err(QueryResultsSyntaxError::msg(format!("Unexpected boolean value. Found '{value}'")).into()) Err(QueryResultsSyntaxError::msg(format!("Unexpected boolean value. Found '{value}'")).into())
}; }
} }
_ => Err(QueryResultsSyntaxError::msg(format!("Unexpected textual value found: '{value}'")).into()) _ => Err(QueryResultsSyntaxError::msg(format!("Unexpected textual value found: '{value}'")).into())
}; }
}, }
Event::End(event) => { Event::End(event) => {
if let State::Head = state { if let ResultsState::Head = self.state {
if event.local_name().as_ref() == b"head" { if event.local_name().as_ref() == b"head" {
state = State::AfterHead self.state = ResultsState::AfterHead
} }
Ok(None)
} else { } else {
return Err(QueryResultsSyntaxError::msg("Unexpected early file end. All results file should have a <head> and a <result> or <boolean> tag").into()); Err(QueryResultsSyntaxError::msg("Unexpected early file end. All results file should have a <head> and a <result> or <boolean> tag").into())
} }
}, }
Event::Eof => return Err(QueryResultsSyntaxError::msg("Unexpected early file end. All results file should have a <head> and a <result> or <boolean> tag").into()), Event::Eof => Err(QueryResultsSyntaxError::msg("Unexpected early file end. All results file should have a <head> and a <result> or <boolean> tag").into()),
_ => (), Event::Comment(_) | Event::Decl(_) | Event::PI(_) | Event::DocType(_) => {
Ok(None)
}
Event::Empty(_) => unreachable!("Empty events are expended"),
Event::CData(_) => {
Err(QueryResultsSyntaxError::msg(
"<![CDATA[...]]> are not supported in SPARQL XML results",
)
.into())
} }
} }
} }
@ -351,135 +503,150 @@ enum State {
Subject, Subject,
Predicate, Predicate,
Object, Object,
End,
} }
pub struct XmlSolutionsReader<R: Read> { struct XmlInnerSolutionsReader {
reader: Reader<BufReader<R>>, decoder: Decoder,
buffer: Vec<u8>,
mapping: BTreeMap<String, usize>, mapping: BTreeMap<String, usize>,
stack: Vec<State>, state_stack: Vec<State>,
new_bindings: Vec<Option<Term>>,
current_var: Option<String>,
term: Option<Term>,
lang: Option<String>,
datatype: Option<NamedNode>,
subject_stack: Vec<Term>, subject_stack: Vec<Term>,
predicate_stack: Vec<Term>, predicate_stack: Vec<Term>,
object_stack: Vec<Term>, object_stack: Vec<Term>,
} }
impl<R: Read> XmlSolutionsReader<R> { impl XmlInnerSolutionsReader {
pub fn read_next(&mut self) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> { #[allow(clippy::unwrap_in_result)]
let mut state = State::Start; pub fn read_event(
&mut self,
let mut new_bindings = vec![None; self.mapping.len()]; event: Event<'_>,
) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> {
let mut current_var = None;
let mut term: Option<Term> = None;
let mut lang = None;
let mut datatype = None;
loop {
self.buffer.clear();
let event = self.reader.read_event_into(&mut self.buffer)?;
match event { match event {
Event::Start(event) => match state { Event::Start(event) => match self.state_stack.last().unwrap() {
State::Start => { State::Start => {
if event.local_name().as_ref() == b"result" { if event.local_name().as_ref() == b"result" {
state = State::Result; self.new_bindings = vec![None; self.mapping.len()];
self.state_stack.push(State::Result);
Ok(None)
} else { } else {
return Err(QueryResultsSyntaxError::msg(format!( Err(QueryResultsSyntaxError::msg(format!(
"Expecting <result>, found <{}>", "Expecting <result>, found <{}>",
decode(&self.reader, &event.name())? self.decoder.decode(event.name().as_ref())?
)) ))
.into()); .into())
} }
} }
State::Result => { State::Result => {
if event.local_name().as_ref() == b"binding" { if event.local_name().as_ref() == b"binding" {
match event let Some(attr) = event
.attributes() .attributes()
.filter_map(Result::ok) .filter_map(Result::ok)
.find(|attr| attr.key.local_name().as_ref() == b"name") .find(|attr| attr.key.local_name().as_ref() == b"name")
{ else {
Some(attr) => {
current_var = Some(
attr.decode_and_unescape_value(&self.reader)?.to_string(),
)
}
None => {
return Err(QueryResultsSyntaxError::msg( return Err(QueryResultsSyntaxError::msg(
"No name attribute found for the <binding> tag", "No name attribute found for the <binding> tag",
) )
.into()); .into());
} };
} self.current_var =
state = State::Binding; Some(unescape(&self.decoder.decode(&attr.value)?)?.into_owned());
self.state_stack.push(State::Binding);
Ok(None)
} else { } else {
return Err(QueryResultsSyntaxError::msg(format!( Err(QueryResultsSyntaxError::msg(format!(
"Expecting <binding>, found <{}>", "Expecting <binding>, found <{}>",
decode(&self.reader, &event.name())? self.decoder.decode(event.name().as_ref())?
)) ))
.into()); .into())
} }
} }
State::Binding | State::Subject | State::Predicate | State::Object => { State::Binding | State::Subject | State::Predicate | State::Object => {
if term.is_some() { if self.term.is_some() {
return Err(QueryResultsSyntaxError::msg( return Err(QueryResultsSyntaxError::msg(
"There is already a value for the current binding", "There is already a value for the current binding",
) )
.into()); .into());
} }
self.stack.push(state);
if event.local_name().as_ref() == b"uri" { if event.local_name().as_ref() == b"uri" {
state = State::Uri; self.state_stack.push(State::Uri);
Ok(None)
} else if event.local_name().as_ref() == b"bnode" { } else if event.local_name().as_ref() == b"bnode" {
state = State::BNode; self.state_stack.push(State::BNode);
Ok(None)
} else if event.local_name().as_ref() == b"literal" { } else if event.local_name().as_ref() == b"literal" {
for attr in event.attributes() { for attr in event.attributes() {
let attr = attr.map_err(quick_xml::Error::from)?; let attr = attr.map_err(quick_xml::Error::from)?;
if attr.key.as_ref() == b"xml:lang" { if attr.key.as_ref() == b"xml:lang" {
lang = Some( self.lang = Some(
attr.decode_and_unescape_value(&self.reader)?.to_string(), unescape(&self.decoder.decode(&attr.value)?)?.into_owned(),
); );
} else if attr.key.local_name().as_ref() == b"datatype" { } else if attr.key.local_name().as_ref() == b"datatype" {
let iri = attr.decode_and_unescape_value(&self.reader)?; let iri = self.decoder.decode(&attr.value)?;
datatype = let iri = unescape(&iri)?;
Some(NamedNode::new(iri.to_string()).map_err(|e| { self.datatype =
Some(NamedNode::new(iri.as_ref()).map_err(|e| {
QueryResultsSyntaxError::msg(format!( QueryResultsSyntaxError::msg(format!(
"Invalid datatype IRI '{iri}': {e}" "Invalid datatype IRI '{iri}': {e}"
)) ))
})?); })?);
} }
} }
state = State::Literal; self.state_stack.push(State::Literal);
Ok(None)
} else if event.local_name().as_ref() == b"triple" { } else if event.local_name().as_ref() == b"triple" {
state = State::Triple; self.state_stack.push(State::Triple);
Ok(None)
} else { } else {
return Err(QueryResultsSyntaxError::msg(format!( Err(QueryResultsSyntaxError::msg(format!(
"Expecting <uri>, <bnode> or <literal> found <{}>", "Expecting <uri>, <bnode> or <literal> found <{}>",
decode(&self.reader, &event.name())? self.decoder.decode(event.name().as_ref())?
)) ))
.into()); .into())
} }
} }
State::Triple => { State::Triple => {
if event.local_name().as_ref() == b"subject" { if event.local_name().as_ref() == b"subject" {
state = State::Subject self.state_stack.push(State::Subject);
Ok(None)
} else if event.local_name().as_ref() == b"predicate" { } else if event.local_name().as_ref() == b"predicate" {
state = State::Predicate self.state_stack.push(State::Predicate);
Ok(None)
} else if event.local_name().as_ref() == b"object" { } else if event.local_name().as_ref() == b"object" {
state = State::Object self.state_stack.push(State::Object);
Ok(None)
} else { } else {
return Err(QueryResultsSyntaxError::msg(format!( Err(QueryResultsSyntaxError::msg(format!(
"Expecting <subject>, <predicate> or <object> found <{}>", "Expecting <subject>, <predicate> or <object> found <{}>",
decode(&self.reader, &event.name())? self.decoder.decode(event.name().as_ref())?
)) ))
.into()); .into())
} }
} }
_ => (), State::Uri => Err(QueryResultsSyntaxError::msg(format!(
"<uri> must only contain a string, found <{}>",
self.decoder.decode(event.name().as_ref())?
))
.into()),
State::BNode => Err(QueryResultsSyntaxError::msg(format!(
"<uri> must only contain a string, found <{}>",
self.decoder.decode(event.name().as_ref())?
))
.into()),
State::Literal => Err(QueryResultsSyntaxError::msg(format!(
"<uri> must only contain a string, found <{}>",
self.decoder.decode(event.name().as_ref())?
))
.into()),
}, },
Event::Text(event) => { Event::Text(event) => {
let data = event.unescape()?; let data = event.unescape()?;
match state { match self.state_stack.last().unwrap() {
State::Uri => { State::Uri => {
term = Some( self.term = Some(
NamedNode::new(data.to_string()) NamedNode::new(data.to_string())
.map_err(|e| { .map_err(|e| {
QueryResultsSyntaxError::msg(format!( QueryResultsSyntaxError::msg(format!(
@ -487,10 +654,11 @@ impl<R: Read> XmlSolutionsReader<R> {
)) ))
})? })?
.into(), .into(),
) );
Ok(None)
} }
State::BNode => { State::BNode => {
term = Some( self.term = Some(
BlankNode::new(data.to_string()) BlankNode::new(data.to_string())
.map_err(|e| { .map_err(|e| {
QueryResultsSyntaxError::msg(format!( QueryResultsSyntaxError::msg(format!(
@ -498,26 +666,28 @@ impl<R: Read> XmlSolutionsReader<R> {
)) ))
})? })?
.into(), .into(),
) );
Ok(None)
} }
State::Literal => { State::Literal => {
term = Some(build_literal(data, lang.take(), datatype.take())?.into()); self.term = Some(
build_literal(data, self.lang.take(), self.datatype.take())?.into(),
);
Ok(None)
} }
_ => { _ => Err(QueryResultsSyntaxError::msg(format!(
return Err(QueryResultsSyntaxError::msg(format!(
"Unexpected textual value found: {data}" "Unexpected textual value found: {data}"
)) ))
.into()); .into()),
} }
} }
} Event::End(_) => match self.state_stack.pop().unwrap() {
Event::End(_) => match state { State::Start | State::Uri => Ok(None),
State::Start => state = State::End, State::Result => Ok(Some(take(&mut self.new_bindings))),
State::Result => return Ok(Some(new_bindings)),
State::Binding => { State::Binding => {
if let Some(var) = &current_var { if let Some(var) = &self.current_var {
if let Some(var) = self.mapping.get(var) { if let Some(var) = self.mapping.get(var) {
new_bindings[*var] = term.take() self.new_bindings[*var] = self.term.take()
} else { } else {
return Err( return Err(
QueryResultsSyntaxError::msg(format!("The variable '{var}' is used in a binding but not declared in the variables list")).into() QueryResultsSyntaxError::msg(format!("The variable '{var}' is used in a binding but not declared in the variables list")).into()
@ -529,51 +699,40 @@ impl<R: Read> XmlSolutionsReader<R> {
) )
.into()); .into());
} }
state = State::Result; Ok(None)
} }
State::Subject => { State::Subject => {
if let Some(subject) = term.take() { if let Some(subject) = self.term.take() {
self.subject_stack.push(subject) self.subject_stack.push(subject)
} }
state = State::Triple; Ok(None)
} }
State::Predicate => { State::Predicate => {
if let Some(predicate) = term.take() { if let Some(predicate) = self.term.take() {
self.predicate_stack.push(predicate) self.predicate_stack.push(predicate)
} }
state = State::Triple; Ok(None)
} }
State::Object => { State::Object => {
if let Some(object) = term.take() { if let Some(object) = self.term.take() {
self.object_stack.push(object) self.object_stack.push(object)
} }
state = State::Triple; Ok(None)
}
State::Uri => {
state = self
.stack
.pop()
.ok_or_else(|| QueryResultsSyntaxError::msg("Empty stack"))?
} }
State::BNode => { State::BNode => {
if term.is_none() { if self.term.is_none() {
// We default to a random bnode // We default to a random bnode
term = Some(BlankNode::default().into()) self.term = Some(BlankNode::default().into())
} }
state = self Ok(None)
.stack
.pop()
.ok_or_else(|| QueryResultsSyntaxError::msg("Empty stack"))?
} }
State::Literal => { State::Literal => {
if term.is_none() { if self.term.is_none() {
// We default to the empty literal // We default to the empty literal
term = Some(build_literal("", lang.take(), datatype.take())?.into()) self.term =
Some(build_literal("", self.lang.take(), self.datatype.take())?.into())
} }
state = self Ok(None)
.stack
.pop()
.ok_or_else(|| QueryResultsSyntaxError::msg("Empty stack"))?;
} }
State::Triple => { State::Triple => {
#[cfg(feature = "rdf-star")] #[cfg(feature = "rdf-star")]
@ -582,7 +741,7 @@ impl<R: Read> XmlSolutionsReader<R> {
self.predicate_stack.pop(), self.predicate_stack.pop(),
self.object_stack.pop(), self.object_stack.pop(),
) { ) {
term = Some( self.term = Some(
Triple::new( Triple::new(
match subject { match subject {
Term::NamedNode(subject) => subject.into(), Term::NamedNode(subject) => subject.into(),
@ -592,7 +751,7 @@ impl<R: Read> XmlSolutionsReader<R> {
return Err(QueryResultsSyntaxError::msg( return Err(QueryResultsSyntaxError::msg(
"The <subject> value should not be a <literal>", "The <subject> value should not be a <literal>",
) )
.into()) .into());
} }
}, },
match predicate { match predicate {
@ -601,35 +760,37 @@ impl<R: Read> XmlSolutionsReader<R> {
return Err(QueryResultsSyntaxError::msg( return Err(QueryResultsSyntaxError::msg(
"The <predicate> value should be an <uri>", "The <predicate> value should be an <uri>",
) )
.into()) .into());
} }
}, },
object, object,
) )
.into(), .into(),
); );
state = self Ok(None)
.stack
.pop()
.ok_or_else(|| QueryResultsSyntaxError::msg("Empty stack"))?;
} else { } else {
return Err( Err(QueryResultsSyntaxError::msg(
QueryResultsSyntaxError::msg("A <triple> should contain a <subject>, a <predicate> and an <object>").into() "A <triple> should contain a <subject>, a <predicate> and an <object>",
); )
.into())
} }
#[cfg(not(feature = "rdf-star"))] #[cfg(not(feature = "rdf-star"))]
{ {
return Err(QueryResultsSyntaxError::msg( Err(QueryResultsSyntaxError::msg(
"The <triple> tag is only supported with RDF-star", "The <triple> tag is only supported with RDF-star",
) )
.into()); .into())
} }
} }
State::End => (),
}, },
Event::Eof => return Ok(None), Event::Eof | Event::Comment(_) | Event::Decl(_) | Event::PI(_) | Event::DocType(_) => {
_ => (), Ok(None)
} }
Event::Empty(_) => unreachable!("Empty events are expended"),
Event::CData(_) => Err(QueryResultsSyntaxError::msg(
"<![CDATA[...]]> are not supported in SPARQL XML results",
)
.into()),
} }
} }
} }
@ -661,13 +822,6 @@ fn build_literal(
} }
} }
fn decode<'a, T>(
reader: &Reader<T>,
data: &'a impl AsRef<[u8]>,
) -> Result<Cow<'a, str>, QueryResultsParseError> {
Ok(reader.decoder().decode(data.as_ref())?)
}
fn map_xml_error(error: quick_xml::Error) -> io::Error { fn map_xml_error(error: quick_xml::Error) -> io::Error {
match error { match error {
quick_xml::Error::Io(error) => { quick_xml::Error::Io(error) => {

Loading…
Cancel
Save