//! Implementation of [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/) use crate::error::{ParseError, SyntaxError}; use oxrdf::vocab::rdf; use oxrdf::Variable; use oxrdf::*; use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event}; use quick_xml::{Reader, Writer}; use std::borrow::Cow; use std::collections::BTreeMap; use std::io::{self, BufRead, Write}; use std::str; use std::sync::Arc; pub fn write_boolean_xml_result(sink: W, value: bool) -> io::Result { do_write_boolean_xml_result(sink, value).map_err(map_xml_error) } fn do_write_boolean_xml_result(sink: W, value: bool) -> Result { let mut writer = Writer::new(sink); writer.write_event(Event::Decl(BytesDecl::new("1.0", None, None)))?; writer .create_element("sparql") .with_attribute(("xmlns", "http://www.w3.org/2005/sparql-results#")) .write_inner_content(|writer| { writer .create_element("head") .write_text_content(BytesText::new(""))? .create_element("boolean") .write_text_content(BytesText::new(if value { "true" } else { "false" }))?; Ok(()) })?; Ok(writer.into_inner()) } pub struct XmlSolutionsWriter { writer: Writer, } impl XmlSolutionsWriter { pub fn start(sink: W, variables: &[Variable]) -> io::Result { Self::do_start(sink, variables).map_err(map_xml_error) } fn do_start(sink: W, variables: &[Variable]) -> Result { let mut writer = Writer::new(sink); writer.write_event(Event::Decl(BytesDecl::new("1.0", None, None)))?; let mut sparql_open = BytesStart::new("sparql"); sparql_open.push_attribute(("xmlns", "http://www.w3.org/2005/sparql-results#")); writer.write_event(Event::Start(sparql_open))?; writer .create_element("head") .write_inner_content(|writer| { for variable in variables { writer .create_element("variable") .with_attribute(("name", variable.as_str())) .write_empty()?; } Ok(()) })?; writer.write_event(Event::Start(BytesStart::new("results")))?; Ok(Self { writer }) } pub fn write<'a>( &mut self, solution: impl IntoIterator, TermRef<'a>)>, ) -> io::Result<()> { self.do_write(solution).map_err(map_xml_error) } fn do_write<'a>( &mut self, solution: impl IntoIterator, TermRef<'a>)>, ) -> Result<(), quick_xml::Error> { self.writer .create_element("result") .write_inner_content(|writer| { for (variable, value) in solution { writer .create_element("binding") .with_attribute(("name", variable.as_str())) .write_inner_content(|writer| { write_xml_term(value, writer)?; Ok(()) })?; } Ok(()) })?; Ok(()) } pub fn finish(self) -> io::Result { let mut inner = self.do_finish().map_err(map_xml_error)?; inner.flush()?; Ok(inner) } fn do_finish(mut self) -> Result { self.writer .write_event(Event::End(BytesEnd::new("results")))?; self.writer .write_event(Event::End(BytesEnd::new("sparql")))?; Ok(self.writer.into_inner()) } } fn write_xml_term( term: TermRef<'_>, writer: &mut Writer, ) -> Result<(), quick_xml::Error> { match term { TermRef::NamedNode(uri) => { writer .create_element("uri") .write_text_content(BytesText::new(uri.as_str()))?; } TermRef::BlankNode(bnode) => { writer .create_element("bnode") .write_text_content(BytesText::new(bnode.as_str()))?; } TermRef::Literal(literal) => { let element = writer.create_element("literal"); let element = if let Some(language) = literal.language() { element.with_attribute(("xml:lang", language)) } else if !literal.is_plain() { element.with_attribute(("datatype", literal.datatype().as_str())) } else { element }; element.write_text_content(BytesText::new(literal.value()))?; } #[cfg(feature = "rdf-star")] TermRef::Triple(triple) => { writer .create_element("triple") .write_inner_content(|writer| { writer .create_element("subject") .write_inner_content(|writer| { write_xml_term(triple.subject.as_ref().into(), writer) })?; writer .create_element("predicate") .write_inner_content(|writer| { write_xml_term(triple.predicate.as_ref().into(), writer) })?; writer .create_element("object") .write_inner_content(|writer| { write_xml_term(triple.object.as_ref(), writer) })?; Ok(()) })?; } } Ok(()) } pub enum XmlQueryResultsReader { Solutions { variables: Vec, solutions: XmlSolutionsReader, }, Boolean(bool), } impl XmlQueryResultsReader { pub fn read(source: R) -> Result { enum State { Start, Sparql, Head, AfterHead, Boolean, } let mut reader = Reader::from_reader(source); reader.trim_text(true); reader.expand_empty_elements(true); let mut buffer = Vec::default(); let mut variables = Vec::default(); let mut state = State::Start; //Read header loop { buffer.clear(); let event = reader.read_event_into(&mut buffer)?; match event { Event::Start(event) => match state { State::Start => { if event.local_name().as_ref() == b"sparql" { state = State::Sparql; } else { return Err(SyntaxError::msg(format!("Expecting tag, found <{}>", decode(&reader, &event.name())?)).into()); } } State::Sparql => { if event.local_name().as_ref() == b"head" { state = State::Head; } else { return Err(SyntaxError::msg(format!("Expecting tag, found <{}>",decode(&reader, &event.name())?)).into()); } } State::Head => { if event.local_name().as_ref() == b"variable" { let name = event.attributes() .filter_map(Result::ok) .find(|attr| attr.key.local_name().as_ref() == b"name") .ok_or_else(|| SyntaxError::msg("No name attribute found for the tag"))? .decode_and_unescape_value(&reader)?; let variable = Variable::new(name).map_err(|e| SyntaxError::msg(format!("Invalid variable name: {e}")))?; if variables.contains(&variable) { return Err(SyntaxError::msg(format!( "The variable {variable} is declared twice" )) .into()); } variables.push(variable); } else if event.local_name().as_ref() == b"link" { // no op } else { return Err(SyntaxError::msg(format!("Expecting or tag, found <{}>", decode(&reader, &event.name())?)).into()); } } State::AfterHead => { if event.local_name().as_ref() == b"boolean" { state = State::Boolean } else if event.local_name().as_ref() == b"results" { let mut mapping = BTreeMap::default(); for (i, var) in variables.iter().enumerate() { mapping.insert(var.clone().into_string(), i); } return Ok(Self::Solutions { variables, solutions: XmlSolutionsReader { reader, buffer, mapping, stack: Vec::new(), subject_stack: Vec::new(), predicate_stack: Vec::new(), object_stack: Vec::new(), }}); } else if event.local_name().as_ref() != b"link" && event.local_name().as_ref() != b"results" && event.local_name().as_ref() != b"boolean" { return Err(SyntaxError::msg(format!("Expecting sparql tag, found <{}>", decode(&reader, &event.name())?)).into()); } } State::Boolean => return Err(SyntaxError::msg(format!("Unexpected tag inside of tag: <{}>", decode(&reader, &event.name())?)).into()) }, Event::Text(event) => { let value = event.unescape()?; return match state { State::Boolean => { return if value == "true" { Ok(Self::Boolean(true)) } else if value == "false" { Ok(Self::Boolean(false)) } else { Err(SyntaxError::msg(format!("Unexpected boolean value. Found '{value}'")).into()) }; } _ => Err(SyntaxError::msg(format!("Unexpected textual value found: '{value}'")).into()) }; }, Event::End(event) => { if let State::Head = state { if event.local_name().as_ref() == b"head" { state = State::AfterHead } } else { return Err(SyntaxError::msg("Unexpected early file end. All results file should have a and a or tag").into()); } }, Event::Eof => return Err(SyntaxError::msg("Unexpected early file end. All results file should have a and a or tag").into()), _ => (), } } } } enum State { Start, Result, Binding, Uri, BNode, Literal, Triple, Subject, Predicate, Object, End, } pub struct XmlSolutionsReader { reader: Reader, buffer: Vec, mapping: BTreeMap, stack: Vec, subject_stack: Vec, predicate_stack: Vec, object_stack: Vec, } impl XmlSolutionsReader { pub fn read_next(&mut self) -> Result>>, ParseError> { let mut state = State::Start; let mut new_bindings = vec![None; self.mapping.len()]; let mut current_var = None; let mut term: Option = None; let mut lang = None; let mut datatype = None; loop { self.buffer.clear(); let event = self.reader.read_event_into(&mut self.buffer)?; match event { Event::Start(event) => match state { State::Start => { if event.local_name().as_ref() == b"result" { state = State::Result; } else { return Err(SyntaxError::msg(format!( "Expecting , found <{}>", decode(&self.reader, &event.name())? )) .into()); } } State::Result => { if event.local_name().as_ref() == b"binding" { match event .attributes() .filter_map(Result::ok) .find(|attr| attr.key.local_name().as_ref() == b"name") { Some(attr) => { current_var = Some( attr.decode_and_unescape_value(&self.reader)?.to_string(), ) } None => { return Err(SyntaxError::msg( "No name attribute found for the tag", ) .into()); } } state = State::Binding; } else { return Err(SyntaxError::msg(format!( "Expecting , found <{}>", decode(&self.reader, &event.name())? )) .into()); } } State::Binding | State::Subject | State::Predicate | State::Object => { if term.is_some() { return Err(SyntaxError::msg( "There is already a value for the current binding", ) .into()); } self.stack.push(state); if event.local_name().as_ref() == b"uri" { state = State::Uri; } else if event.local_name().as_ref() == b"bnode" { state = State::BNode; } else if event.local_name().as_ref() == b"literal" { for attr in event.attributes().flatten() { if attr.key.as_ref() == b"xml:lang" { lang = Some( attr.decode_and_unescape_value(&self.reader)?.to_string(), ); } else if attr.key.local_name().as_ref() == b"datatype" { let iri = attr.decode_and_unescape_value(&self.reader)?; datatype = Some(NamedNode::new(iri.to_string()).map_err(|e| { SyntaxError::msg(format!( "Invalid datatype IRI '{iri}': {e}" )) })?); } } state = State::Literal; } else if event.local_name().as_ref() == b"triple" { state = State::Triple; } else { return Err(SyntaxError::msg(format!( "Expecting , or found <{}>", decode(&self.reader, &event.name())? )) .into()); } } State::Triple => { if event.local_name().as_ref() == b"subject" { state = State::Subject } else if event.local_name().as_ref() == b"predicate" { state = State::Predicate } else if event.local_name().as_ref() == b"object" { state = State::Object } else { return Err(SyntaxError::msg(format!( "Expecting , or found <{}>", decode(&self.reader, &event.name())? )) .into()); } } _ => (), }, Event::Text(event) => { let data = event.unescape()?; match state { State::Uri => { term = Some( NamedNode::new(data.to_string()) .map_err(|e| { SyntaxError::msg(format!("Invalid IRI value '{data}': {e}")) })? .into(), ) } State::BNode => { term = Some( BlankNode::new(data.to_string()) .map_err(|e| { SyntaxError::msg(format!( "Invalid blank node value '{data}': {e}" )) })? .into(), ) } State::Literal => { term = Some(build_literal(data, lang.take(), datatype.take())?.into()); } _ => { return Err(SyntaxError::msg(format!( "Unexpected textual value found: {data}" )) .into()); } } } Event::End(_) => match state { State::Start => state = State::End, State::Result => return Ok(Some(new_bindings)), State::Binding => { if let Some(var) = ¤t_var { if let Some(var) = self.mapping.get(var) { new_bindings[*var] = term.take() } else { return Err( SyntaxError::msg(format!("The variable '{var}' is used in a binding but not declared in the variables list")).into() ); } } else { return Err(SyntaxError::msg("No name found for tag").into()); } state = State::Result; } State::Subject => { if let Some(subject) = term.take() { self.subject_stack.push(subject) } state = State::Triple; } State::Predicate => { if let Some(predicate) = term.take() { self.predicate_stack.push(predicate) } state = State::Triple; } State::Object => { if let Some(object) = term.take() { self.object_stack.push(object) } state = State::Triple; } State::Uri => { state = self .stack .pop() .ok_or_else(|| SyntaxError::msg("Empty stack"))? } State::BNode => { if term.is_none() { //We default to a random bnode term = Some(BlankNode::default().into()) } state = self .stack .pop() .ok_or_else(|| SyntaxError::msg("Empty stack"))? } State::Literal => { if term.is_none() { //We default to the empty literal term = Some(build_literal("", lang.take(), datatype.take())?.into()) } state = self .stack .pop() .ok_or_else(|| SyntaxError::msg("Empty stack"))?; } State::Triple => { #[cfg(feature = "rdf-star")] if let (Some(subject), Some(predicate), Some(object)) = ( self.subject_stack.pop(), self.predicate_stack.pop(), self.object_stack.pop(), ) { term = Some( Triple::new( match subject { Term::NamedNode(subject) => subject.into(), Term::BlankNode(subject) => subject.into(), Term::Triple(subject) => Subject::Triple(subject), Term::Literal(_) => { return Err(SyntaxError::msg( "The value should not be a ", ) .into()) } }, match predicate { Term::NamedNode(predicate) => predicate, _ => { return Err(SyntaxError::msg( "The value should be an ", ) .into()) } }, object, ) .into(), ); state = self .stack .pop() .ok_or_else(|| SyntaxError::msg("Empty stack"))?; } else { return Err( SyntaxError::msg("A should contain a , a and an ").into() ); } #[cfg(not(feature = "rdf-star"))] { return Err(SyntaxError::msg( "The tag is only supported with RDF-star", ) .into()); } } State::End => (), }, Event::Eof => return Ok(None), _ => (), } } } } fn build_literal( value: impl Into, lang: Option, datatype: Option, ) -> Result { match lang { Some(lang) => { if let Some(datatype) = datatype { if datatype.as_ref() != rdf::LANG_STRING { return Err(SyntaxError::msg(format!( "xml:lang value '{lang}' provided with the datatype {datatype}" )) .into()); } } Literal::new_language_tagged_literal(value, &lang).map_err(|e| { SyntaxError::msg(format!("Invalid xml:lang value '{lang}': {e}")).into() }) } None => Ok(if let Some(datatype) = datatype { Literal::new_typed_literal(value, datatype) } else { Literal::new_simple_literal(value) }), } } fn decode<'a, T>( reader: &Reader, data: &'a impl AsRef<[u8]>, ) -> Result, ParseError> { Ok(reader.decoder().decode(data.as_ref())?) } fn map_xml_error(error: quick_xml::Error) -> io::Error { match error { quick_xml::Error::Io(error) => match Arc::try_unwrap(error) { Ok(error) => error, Err(error) => io::Error::new(error.kind(), error), }, quick_xml::Error::UnexpectedEof(_) => io::Error::new(io::ErrorKind::UnexpectedEof, error), _ => io::Error::new(io::ErrorKind::InvalidData, error), } }