//! Implementation of [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/) use crate::oxrdf::vocab::rdf; use crate::oxrdf::*; use crate::sparesults::error::{QueryResultsParseError, QueryResultsSyntaxError}; use quick_xml::escape::unescape; use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event}; use quick_xml::{Decoder, Error, Reader, Writer}; use std::collections::BTreeMap; use std::io::{self, BufReader, Read, Write}; use std::mem::take; use std::sync::Arc; #[cfg(feature = "async-tokio")] use tokio::io::{AsyncRead, AsyncWrite, BufReader as AsyncBufReader}; pub fn write_boolean_xml_result(write: W, value: bool) -> io::Result { let mut writer = Writer::new(write); for event in inner_write_boolean_xml_result(value) { writer.write_event(event).map_err(map_xml_error)?; } Ok(writer.into_inner()) } #[cfg(feature = "async-tokio")] pub async fn tokio_async_write_boolean_xml_result( write: W, value: bool, ) -> io::Result { let mut writer = Writer::new(write); for event in inner_write_boolean_xml_result(value) { writer .write_event_async(event) .await .map_err(map_xml_error)?; } Ok(writer.into_inner()) } fn inner_write_boolean_xml_result(value: bool) -> [Event<'static>; 8] { [ Event::Decl(BytesDecl::new("1.0", None, None)), Event::Start( BytesStart::new("sparql") .with_attributes([("xmlns", "http://www.w3.org/2005/sparql-results#")]), ), Event::Start(BytesStart::new("head")), Event::End(BytesEnd::new("head")), Event::Start(BytesStart::new("boolean")), Event::Text(BytesText::new(if value { "true" } else { "false" })), Event::End(BytesEnd::new("boolean")), Event::End(BytesEnd::new("sparql")), ] } pub struct ToWriteXmlSolutionsWriter { inner: InnerXmlSolutionsWriter, writer: Writer, } impl ToWriteXmlSolutionsWriter { pub fn start(write: W, variables: &[Variable]) -> io::Result { let mut writer = Writer::new(write); let mut buffer = Vec::with_capacity(48); let inner = InnerXmlSolutionsWriter::start(&mut buffer, variables); Self::do_write(&mut writer, buffer)?; Ok(Self { inner, writer }) } pub fn write<'a>( &mut self, solution: impl IntoIterator, TermRef<'a>)>, ) -> io::Result<()> { let mut buffer = Vec::with_capacity(48); self.inner.write(&mut buffer, solution); Self::do_write(&mut self.writer, buffer) } pub fn finish(mut self) -> io::Result { let mut buffer = Vec::with_capacity(4); self.inner.finish(&mut buffer); Self::do_write(&mut self.writer, buffer)?; Ok(self.writer.into_inner()) } fn do_write(writer: &mut Writer, output: Vec>) -> io::Result<()> { for event in output { writer.write_event(event).map_err(map_xml_error)?; } Ok(()) } } #[cfg(feature = "async-tokio")] pub struct ToTokioAsyncWriteXmlSolutionsWriter { inner: InnerXmlSolutionsWriter, writer: Writer, } #[cfg(feature = "async-tokio")] impl ToTokioAsyncWriteXmlSolutionsWriter { pub async fn start(write: W, variables: &[Variable]) -> io::Result { let mut writer = Writer::new(write); let mut buffer = Vec::with_capacity(48); let inner = InnerXmlSolutionsWriter::start(&mut buffer, variables); Self::do_write(&mut writer, buffer).await?; Ok(Self { inner, writer }) } pub async fn write<'a>( &mut self, solution: impl IntoIterator, TermRef<'a>)>, ) -> io::Result<()> { let mut buffer = Vec::with_capacity(48); self.inner.write(&mut buffer, solution); Self::do_write(&mut self.writer, buffer).await } pub async fn finish(mut self) -> io::Result { let mut buffer = Vec::with_capacity(4); self.inner.finish(&mut buffer); Self::do_write(&mut self.writer, buffer).await?; Ok(self.writer.into_inner()) } async fn do_write(writer: &mut Writer, output: Vec>) -> io::Result<()> { for event in output { writer .write_event_async(event) .await .map_err(map_xml_error)?; } Ok(()) } } struct InnerXmlSolutionsWriter; impl InnerXmlSolutionsWriter { fn start<'a>(output: &mut Vec>, variables: &'a [Variable]) -> Self { output.push(Event::Decl(BytesDecl::new("1.0", None, None))); output.push(Event::Start(BytesStart::new("sparql").with_attributes([( "xmlns", "http://www.w3.org/2005/sparql-results#", )]))); output.push(Event::Start(BytesStart::new("head"))); for variable in variables { output.push(Event::Empty( BytesStart::new("variable").with_attributes([("name", variable.as_str())]), )); } output.push(Event::End(BytesEnd::new("head"))); output.push(Event::Start(BytesStart::new("results"))); Self {} } #[allow(clippy::unused_self)] fn write<'a>( &self, output: &mut Vec>, solution: impl IntoIterator, TermRef<'a>)>, ) { output.push(Event::Start(BytesStart::new("result"))); for (variable, value) in solution { output.push(Event::Start( BytesStart::new("binding").with_attributes([("name", variable.as_str())]), )); write_xml_term(output, value); output.push(Event::End(BytesEnd::new("binding"))); } output.push(Event::End(BytesEnd::new("result"))); } #[allow(clippy::unused_self)] fn finish(self, output: &mut Vec>) { output.push(Event::End(BytesEnd::new("results"))); output.push(Event::End(BytesEnd::new("sparql"))); } } fn write_xml_term<'a>(output: &mut Vec>, term: TermRef<'a>) { match term { TermRef::NamedNode(uri) => { output.push(Event::Start(BytesStart::new("uri"))); output.push(Event::Text(BytesText::new(uri.as_str()))); output.push(Event::End(BytesEnd::new("uri"))); } TermRef::BlankNode(bnode) => { output.push(Event::Start(BytesStart::new("bnode"))); output.push(Event::Text(BytesText::new(bnode.as_str()))); output.push(Event::End(BytesEnd::new("bnode"))); } TermRef::Literal(literal) => { let mut start = BytesStart::new("literal"); if let Some(language) = literal.language() { start.push_attribute(("xml:lang", language)); } else if !literal.is_plain() { start.push_attribute(("datatype", literal.datatype().as_str())) } output.push(Event::Start(start)); output.push(Event::Text(BytesText::new(literal.value()))); output.push(Event::End(BytesEnd::new("literal"))); } #[cfg(feature = "rdf-star")] TermRef::Triple(triple) => { output.push(Event::Start(BytesStart::new("triple"))); output.push(Event::Start(BytesStart::new("subject"))); write_xml_term(output, triple.subject.as_ref().into()); output.push(Event::End(BytesEnd::new("subject"))); output.push(Event::Start(BytesStart::new("predicate"))); write_xml_term(output, triple.predicate.as_ref().into()); output.push(Event::End(BytesEnd::new("predicate"))); output.push(Event::Start(BytesStart::new("object"))); write_xml_term(output, triple.object.as_ref()); output.push(Event::End(BytesEnd::new("object"))); output.push(Event::End(BytesEnd::new("triple"))); } } } pub enum FromReadXmlQueryResultsReader { Solutions { variables: Vec, solutions: FromReadXmlSolutionsReader, }, Boolean(bool), } impl FromReadXmlQueryResultsReader { pub fn read(read: R) -> Result { let mut reader = Reader::from_reader(BufReader::new(read)); reader.trim_text(true); reader.expand_empty_elements(true); let mut reader_buffer = Vec::new(); let mut inner = XmlInnerQueryResultsReader { state: ResultsState::Start, variables: Vec::new(), decoder: reader.decoder(), }; loop { reader_buffer.clear(); let event = reader.read_event_into(&mut reader_buffer)?; if let Some(result) = inner.read_event(event)? { return Ok(match result { XmlInnerQueryResults::Solutions { variables, solutions, } => Self::Solutions { variables, solutions: FromReadXmlSolutionsReader { reader, inner: solutions, reader_buffer, }, }, XmlInnerQueryResults::Boolean(value) => Self::Boolean(value), }); } } } } pub struct FromReadXmlSolutionsReader { reader: Reader>, inner: XmlInnerSolutionsReader, reader_buffer: Vec, } impl FromReadXmlSolutionsReader { pub fn read_next(&mut self) -> Result>>, QueryResultsParseError> { loop { self.reader_buffer.clear(); let event = self.reader.read_event_into(&mut self.reader_buffer)?; if event == Event::Eof { return Ok(None); } if let Some(solution) = self.inner.read_event(event)? { return Ok(Some(solution)); } } } } #[cfg(feature = "async-tokio")] pub enum FromTokioAsyncReadXmlQueryResultsReader { Solutions { variables: Vec, solutions: FromTokioAsyncReadXmlSolutionsReader, }, Boolean(bool), } #[cfg(feature = "async-tokio")] impl FromTokioAsyncReadXmlQueryResultsReader { pub async fn read(read: R) -> Result { let mut reader = Reader::from_reader(AsyncBufReader::new(read)); reader.trim_text(true); reader.expand_empty_elements(true); let mut reader_buffer = Vec::new(); let mut inner = XmlInnerQueryResultsReader { state: ResultsState::Start, variables: Vec::new(), decoder: reader.decoder(), }; loop { reader_buffer.clear(); let event = reader.read_event_into_async(&mut reader_buffer).await?; if let Some(result) = inner.read_event(event)? { return Ok(match result { XmlInnerQueryResults::Solutions { variables, solutions, } => Self::Solutions { variables, solutions: FromTokioAsyncReadXmlSolutionsReader { reader, inner: solutions, reader_buffer, }, }, XmlInnerQueryResults::Boolean(value) => Self::Boolean(value), }); } } } } #[cfg(feature = "async-tokio")] pub struct FromTokioAsyncReadXmlSolutionsReader { reader: Reader>, inner: XmlInnerSolutionsReader, reader_buffer: Vec, } #[cfg(feature = "async-tokio")] impl FromTokioAsyncReadXmlSolutionsReader { pub async fn read_next(&mut self) -> Result>>, QueryResultsParseError> { loop { self.reader_buffer.clear(); let event = self .reader .read_event_into_async(&mut self.reader_buffer) .await?; if event == Event::Eof { return Ok(None); } if let Some(solution) = self.inner.read_event(event)? { return Ok(Some(solution)); } } } } enum XmlInnerQueryResults { Solutions { variables: Vec, solutions: XmlInnerSolutionsReader, }, Boolean(bool), } #[derive(Clone, Copy)] enum ResultsState { Start, Sparql, Head, AfterHead, Boolean, } struct XmlInnerQueryResultsReader { state: ResultsState, variables: Vec, decoder: Decoder, } impl XmlInnerQueryResultsReader { pub fn read_event( &mut self, event: Event<'_>, ) -> Result, QueryResultsParseError> { match event { Event::Start(event) => match self.state { ResultsState::Start => { if event.local_name().as_ref() == b"sparql" { self.state = ResultsState::Sparql; Ok(None) } else { Err(QueryResultsSyntaxError::msg(format!("Expecting tag, found <{}>", self.decoder.decode(event.name().as_ref())?)).into()) } } ResultsState::Sparql => { if event.local_name().as_ref() == b"head" { self.state = ResultsState::Head; Ok(None) } else { Err(QueryResultsSyntaxError::msg(format!("Expecting tag, found <{}>", self.decoder.decode(event.name().as_ref())?)).into()) } } ResultsState::Head => { if event.local_name().as_ref() == b"variable" { let name = event.attributes() .filter_map(Result::ok) .find(|attr| attr.key.local_name().as_ref() == b"name") .ok_or_else(|| QueryResultsSyntaxError::msg("No name attribute found for the tag"))?; let name = unescape(&self.decoder.decode(&name.value)?)?.into_owned(); let variable = Variable::new(name).map_err(|e| QueryResultsSyntaxError::msg(format!("Invalid variable name: {e}")))?; if self.variables.contains(&variable) { return Err(QueryResultsSyntaxError::msg(format!( "The variable {variable} is declared twice" )) .into()); } self.variables.push(variable); Ok(None) } else if event.local_name().as_ref() == b"link" { // no op Ok(None) } else { Err(QueryResultsSyntaxError::msg(format!("Expecting or tag, found <{}>", self.decoder.decode(event.name().as_ref())?)).into()) } } ResultsState::AfterHead => { if event.local_name().as_ref() == b"boolean" { self.state = ResultsState::Boolean; Ok(None) } else if event.local_name().as_ref() == b"results" { let mut mapping = BTreeMap::default(); for (i, var) in self.variables.iter().enumerate() { mapping.insert(var.clone().into_string(), i); } Ok(Some(XmlInnerQueryResults::Solutions { variables: take(&mut self.variables), solutions: XmlInnerSolutionsReader { decoder: self.decoder, mapping, state_stack: vec![State::Start, State::Start], new_bindings: Vec::new(), current_var: None, term: None, lang: None, datatype: None, subject_stack: Vec::new(), predicate_stack: Vec::new(), object_stack: Vec::new(), }, })) } else if event.local_name().as_ref() != b"link" && event.local_name().as_ref() != b"results" && event.local_name().as_ref() != b"boolean" { Err(QueryResultsSyntaxError::msg(format!("Expecting sparql tag, found <{}>", self.decoder.decode(event.name().as_ref())?)).into()) } else { Ok(None) } } ResultsState::Boolean => Err(QueryResultsSyntaxError::msg(format!("Unexpected tag inside of tag: <{}>", self.decoder.decode(event.name().as_ref())?)).into()) }, Event::Text(event) => { let value = event.unescape()?; match self.state { ResultsState::Boolean => { if value == "true" { Ok(Some(XmlInnerQueryResults::Boolean(true))) } else if value == "false" { Ok(Some(XmlInnerQueryResults::Boolean(false))) } else { Err(QueryResultsSyntaxError::msg(format!("Unexpected boolean value. Found '{value}'")).into()) } } _ => Err(QueryResultsSyntaxError::msg(format!("Unexpected textual value found: '{value}'")).into()) } } Event::End(event) => { if let ResultsState::Head = self.state { if event.local_name().as_ref() == b"head" { self.state = ResultsState::AfterHead } Ok(None) } else { Err(QueryResultsSyntaxError::msg("Unexpected early file end. All results file should have a and a or tag").into()) } } Event::Eof => Err(QueryResultsSyntaxError::msg("Unexpected early file end. All results file should have a and a or tag").into()), Event::Comment(_) | Event::Decl(_) | Event::PI(_) | Event::DocType(_) => { Ok(None) } Event::Empty(_) => unreachable!("Empty events are expended"), Event::CData(_) => { Err(QueryResultsSyntaxError::msg( " are not supported in SPARQL XML results", ) .into()) } } } } enum State { Start, Result, Binding, Uri, BNode, Literal, Triple, Subject, Predicate, Object, } struct XmlInnerSolutionsReader { decoder: Decoder, mapping: BTreeMap, state_stack: Vec, new_bindings: Vec>, current_var: Option, term: Option, lang: Option, datatype: Option, subject_stack: Vec, predicate_stack: Vec, object_stack: Vec, } impl XmlInnerSolutionsReader { #[allow(clippy::unwrap_in_result)] pub fn read_event( &mut self, event: Event<'_>, ) -> Result>>, QueryResultsParseError> { match event { Event::Start(event) => match self.state_stack.last().unwrap() { State::Start => { if event.local_name().as_ref() == b"result" { self.new_bindings = vec![None; self.mapping.len()]; self.state_stack.push(State::Result); Ok(None) } else { Err(QueryResultsSyntaxError::msg(format!( "Expecting , found <{}>", self.decoder.decode(event.name().as_ref())? )) .into()) } } State::Result => { if event.local_name().as_ref() == b"binding" { let Some(attr) = event .attributes() .filter_map(Result::ok) .find(|attr| attr.key.local_name().as_ref() == b"name") else { return Err(QueryResultsSyntaxError::msg( "No name attribute found for the tag", ) .into()); }; self.current_var = Some(unescape(&self.decoder.decode(&attr.value)?)?.into_owned()); self.state_stack.push(State::Binding); Ok(None) } else { Err(QueryResultsSyntaxError::msg(format!( "Expecting , found <{}>", self.decoder.decode(event.name().as_ref())? )) .into()) } } State::Binding | State::Subject | State::Predicate | State::Object => { if self.term.is_some() { return Err(QueryResultsSyntaxError::msg( "There is already a value for the current binding", ) .into()); } if event.local_name().as_ref() == b"uri" { self.state_stack.push(State::Uri); Ok(None) } else if event.local_name().as_ref() == b"bnode" { self.state_stack.push(State::BNode); Ok(None) } else if event.local_name().as_ref() == b"literal" { for attr in event.attributes() { let attr = attr.map_err(Error::from)?; if attr.key.as_ref() == b"xml:lang" { self.lang = Some( unescape(&self.decoder.decode(&attr.value)?)?.into_owned(), ); } else if attr.key.local_name().as_ref() == b"datatype" { let iri = self.decoder.decode(&attr.value)?; let iri = unescape(&iri)?; self.datatype = Some(NamedNode::new(iri.as_ref()).map_err(|e| { QueryResultsSyntaxError::msg(format!( "Invalid datatype IRI '{iri}': {e}" )) })?); } } self.state_stack.push(State::Literal); Ok(None) } else if event.local_name().as_ref() == b"triple" { self.state_stack.push(State::Triple); Ok(None) } else { Err(QueryResultsSyntaxError::msg(format!( "Expecting , or found <{}>", self.decoder.decode(event.name().as_ref())? )) .into()) } } State::Triple => { if event.local_name().as_ref() == b"subject" { self.state_stack.push(State::Subject); Ok(None) } else if event.local_name().as_ref() == b"predicate" { self.state_stack.push(State::Predicate); Ok(None) } else if event.local_name().as_ref() == b"object" { self.state_stack.push(State::Object); Ok(None) } else { Err(QueryResultsSyntaxError::msg(format!( "Expecting , or found <{}>", self.decoder.decode(event.name().as_ref())? )) .into()) } } State::Uri => Err(QueryResultsSyntaxError::msg(format!( " must only contain a string, found <{}>", self.decoder.decode(event.name().as_ref())? )) .into()), State::BNode => Err(QueryResultsSyntaxError::msg(format!( " must only contain a string, found <{}>", self.decoder.decode(event.name().as_ref())? )) .into()), State::Literal => Err(QueryResultsSyntaxError::msg(format!( " must only contain a string, found <{}>", self.decoder.decode(event.name().as_ref())? )) .into()), }, Event::Text(event) => { let data = event.unescape()?; match self.state_stack.last().unwrap() { State::Uri => { self.term = Some( NamedNode::new(data.to_string()) .map_err(|e| { QueryResultsSyntaxError::msg(format!( "Invalid IRI value '{data}': {e}" )) })? .into(), ); Ok(None) } State::BNode => { self.term = Some( BlankNode::new(data.to_string()) .map_err(|e| { QueryResultsSyntaxError::msg(format!( "Invalid blank node value '{data}': {e}" )) })? .into(), ); Ok(None) } State::Literal => { self.term = Some( build_literal(data, self.lang.take(), self.datatype.take())?.into(), ); Ok(None) } _ => Err(QueryResultsSyntaxError::msg(format!( "Unexpected textual value found: {data}" )) .into()), } } Event::End(_) => match self.state_stack.pop().unwrap() { State::Start | State::Uri => Ok(None), State::Result => Ok(Some(take(&mut self.new_bindings))), State::Binding => { if let Some(var) = &self.current_var { if let Some(var) = self.mapping.get(var) { self.new_bindings[*var] = self.term.take() } else { return Err( QueryResultsSyntaxError::msg(format!("The variable '{var}' is used in a binding but not declared in the variables list")).into() ); } } else { return Err(QueryResultsSyntaxError::msg( "No name found for tag", ) .into()); } Ok(None) } State::Subject => { if let Some(subject) = self.term.take() { self.subject_stack.push(subject) } Ok(None) } State::Predicate => { if let Some(predicate) = self.term.take() { self.predicate_stack.push(predicate) } Ok(None) } State::Object => { if let Some(object) = self.term.take() { self.object_stack.push(object) } Ok(None) } State::BNode => { if self.term.is_none() { // We default to a random bnode self.term = Some(BlankNode::default().into()) } Ok(None) } State::Literal => { if self.term.is_none() { // We default to the empty literal self.term = Some(build_literal("", self.lang.take(), self.datatype.take())?.into()) } Ok(None) } State::Triple => { #[cfg(feature = "rdf-star")] if let (Some(subject), Some(predicate), Some(object)) = ( self.subject_stack.pop(), self.predicate_stack.pop(), self.object_stack.pop(), ) { self.term = Some( Triple::new( match subject { Term::NamedNode(subject) => subject.into(), Term::BlankNode(subject) => subject.into(), Term::Triple(subject) => Subject::Triple(subject), Term::Literal(_) => { return Err(QueryResultsSyntaxError::msg( "The value should not be a ", ) .into()); } }, match predicate { Term::NamedNode(predicate) => predicate, _ => { return Err(QueryResultsSyntaxError::msg( "The value should be an ", ) .into()); } }, object, ) .into(), ); Ok(None) } else { Err(QueryResultsSyntaxError::msg( "A should contain a , a and an ", ) .into()) } #[cfg(not(feature = "rdf-star"))] { Err(QueryResultsSyntaxError::msg( "The tag is only supported with RDF-star", ) .into()) } } }, Event::Eof | Event::Comment(_) | Event::Decl(_) | Event::PI(_) | Event::DocType(_) => { Ok(None) } Event::Empty(_) => unreachable!("Empty events are expended"), Event::CData(_) => Err(QueryResultsSyntaxError::msg( " are not supported in SPARQL XML results", ) .into()), } } } fn build_literal( value: impl Into, lang: Option, datatype: Option, ) -> Result { match lang { Some(lang) => { if let Some(datatype) = datatype { if datatype.as_ref() != rdf::LANG_STRING { return Err(QueryResultsSyntaxError::msg(format!( "xml:lang value '{lang}' provided with the datatype {datatype}" )) .into()); } } Literal::new_language_tagged_literal(value, &lang).map_err(|e| { QueryResultsSyntaxError::msg(format!("Invalid xml:lang value '{lang}': {e}")).into() }) } None => Ok(if let Some(datatype) = datatype { Literal::new_typed_literal(value, datatype) } else { Literal::new_simple_literal(value) }), } } fn map_xml_error(error: Error) -> io::Error { match error { Error::Io(error) => { Arc::try_unwrap(error).unwrap_or_else(|error| io::Error::new(error.kind(), error)) } Error::UnexpectedEof(_) => io::Error::new(io::ErrorKind::UnexpectedEof, error), _ => io::Error::new(io::ErrorKind::InvalidData, error), } }