diff --git a/lib/oxrdfxml/src/error.rs b/lib/oxrdfxml/src/error.rs index 413d3c4a..1382844e 100644 --- a/lib/oxrdfxml/src/error.rs +++ b/lib/oxrdfxml/src/error.rs @@ -4,16 +4,91 @@ use std::error::Error; use std::sync::Arc; use std::{fmt, io}; -/// Error that might be returned during parsing. -/// -/// It might wrap an IO error or be a parsing error. +/// Error returned during RDF/XML parsing. #[derive(Debug)] -pub struct RdfXmlError { - pub(crate) kind: RdfXmlErrorKind, +pub enum ParseError { + /// I/O error during parsing (file not found...). + Io(io::Error), + /// An error in the file syntax. + Syntax(SyntaxError), +} + +impl fmt::Display for ParseError { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Io(e) => e.fmt(f), + Self::Syntax(e) => e.fmt(f), + } + } +} + +impl Error for ParseError { + #[inline] + fn source(&self) -> Option<&(dyn Error + 'static)> { + match self { + Self::Io(e) => Some(e), + Self::Syntax(e) => Some(e), + } + } +} + +impl From for ParseError { + #[inline] + fn from(error: io::Error) -> Self { + Self::Io(error) + } +} + +impl From for ParseError { + #[inline] + fn from(error: SyntaxError) -> Self { + Self::Syntax(error) + } +} + +impl From for io::Error { + #[inline] + fn from(error: ParseError) -> Self { + match error { + ParseError::Io(error) => error, + ParseError::Syntax(error) => error.into(), + } + } +} + +impl From for ParseError { + #[inline] + fn from(error: quick_xml::Error) -> Self { + match error { + quick_xml::Error::Io(error) => Self::Io(match Arc::try_unwrap(error) { + Ok(error) => error, + Err(error) => io::Error::new(error.kind(), error), + }), + error => Self::Syntax(SyntaxError { + inner: SyntaxErrorKind::Xml(error), + }), + } + } +} + +impl From for ParseError { + #[inline] + fn from(error: quick_xml::events::attributes::AttrError) -> Self { + Self::Syntax(SyntaxError { + inner: SyntaxErrorKind::XmlAttribute(error), + }) + } +} + +/// An error in the syntax of the parsed file. +#[derive(Debug)] +pub struct SyntaxError { + pub(crate) inner: SyntaxErrorKind, } #[derive(Debug)] -pub(crate) enum RdfXmlErrorKind { +pub enum SyntaxErrorKind { Xml(quick_xml::Error), XmlAttribute(quick_xml::events::attributes::AttrError), InvalidIri { @@ -24,84 +99,67 @@ pub(crate) enum RdfXmlErrorKind { tag: String, error: LanguageTagParseError, }, - Other(String), + Msg { + msg: String, + }, } -impl RdfXmlError { +impl SyntaxError { + /// Builds an error from a printable error message. + #[inline] pub(crate) fn msg(msg: impl Into) -> Self { Self { - kind: RdfXmlErrorKind::Other(msg.into()), + inner: SyntaxErrorKind::Msg { msg: msg.into() }, } } } -impl fmt::Display for RdfXmlError { +impl fmt::Display for SyntaxError { + #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match &self.kind { - RdfXmlErrorKind::Xml(error) => error.fmt(f), - RdfXmlErrorKind::XmlAttribute(error) => error.fmt(f), - RdfXmlErrorKind::InvalidIri { iri, error } => { + match &self.inner { + SyntaxErrorKind::Xml(error) => error.fmt(f), + SyntaxErrorKind::XmlAttribute(error) => error.fmt(f), + SyntaxErrorKind::InvalidIri { iri, error } => { write!(f, "error while parsing IRI '{}': {}", iri, error) } - RdfXmlErrorKind::InvalidLanguageTag { tag, error } => { + SyntaxErrorKind::InvalidLanguageTag { tag, error } => { write!(f, "error while parsing language tag '{}': {}", tag, error) } - RdfXmlErrorKind::Other(message) => write!(f, "{}", message), + SyntaxErrorKind::Msg { msg } => f.write_str(msg), } } } -impl Error for RdfXmlError { +impl Error for SyntaxError { + #[inline] fn source(&self) -> Option<&(dyn Error + 'static)> { - match &self.kind { - RdfXmlErrorKind::Xml(error) => Some(error), - RdfXmlErrorKind::XmlAttribute(error) => Some(error), - RdfXmlErrorKind::InvalidIri { error, .. } => Some(error), - RdfXmlErrorKind::InvalidLanguageTag { error, .. } => Some(error), - RdfXmlErrorKind::Other(_) => None, - } - } -} - -impl From for RdfXmlError { - fn from(error: quick_xml::Error) -> Self { - Self { - kind: RdfXmlErrorKind::Xml(error), - } - } -} - -impl From for RdfXmlError { - fn from(error: quick_xml::events::attributes::AttrError) -> Self { - Self { - kind: RdfXmlErrorKind::XmlAttribute(error), - } - } -} - -impl From for RdfXmlError { - fn from(error: io::Error) -> Self { - Self { - kind: RdfXmlErrorKind::Xml(quick_xml::Error::Io(Arc::new(error))), + match &self.inner { + SyntaxErrorKind::Xml(error) => Some(error), + SyntaxErrorKind::XmlAttribute(error) => Some(error), + SyntaxErrorKind::InvalidIri { error, .. } => Some(error), + SyntaxErrorKind::InvalidLanguageTag { error, .. } => Some(error), + SyntaxErrorKind::Msg { .. } => None, } } } -impl From for io::Error { - fn from(error: RdfXmlError) -> Self { - match error.kind { - RdfXmlErrorKind::Xml(error) => match error { +impl From for io::Error { + #[inline] + fn from(error: SyntaxError) -> Self { + match error.inner { + SyntaxErrorKind::Xml(error) => match error { quick_xml::Error::Io(error) => match Arc::try_unwrap(error) { Ok(error) => error, - Err(error) => io::Error::new(error.kind(), error), + Err(error) => Self::new(error.kind(), error), }, quick_xml::Error::UnexpectedEof(error) => { - io::Error::new(io::ErrorKind::UnexpectedEof, error) + Self::new(io::ErrorKind::UnexpectedEof, error) } - error => io::Error::new(io::ErrorKind::InvalidData, error), + error => Self::new(io::ErrorKind::InvalidData, error), }, - RdfXmlErrorKind::Other(error) => io::Error::new(io::ErrorKind::InvalidData, error), - _ => io::Error::new(io::ErrorKind::InvalidData, error), + SyntaxErrorKind::Msg { msg } => Self::new(io::ErrorKind::InvalidData, msg), + _ => Self::new(io::ErrorKind::InvalidData, error), } } } diff --git a/lib/oxrdfxml/src/lib.rs b/lib/oxrdfxml/src/lib.rs index e04e0d86..e07e31c8 100644 --- a/lib/oxrdfxml/src/lib.rs +++ b/lib/oxrdfxml/src/lib.rs @@ -10,5 +10,5 @@ mod serializer; mod utils; pub use crate::serializer::{RdfXmlSerializer, ToWriteRdfXmlWriter}; -pub use error::RdfXmlError; +pub use error::{ParseError, SyntaxError}; pub use parser::{FromReadRdfXmlReader, RdfXmlParser}; diff --git a/lib/oxrdfxml/src/parser.rs b/lib/oxrdfxml/src/parser.rs index 3493fe54..a726a7d0 100644 --- a/lib/oxrdfxml/src/parser.rs +++ b/lib/oxrdfxml/src/parser.rs @@ -1,4 +1,4 @@ -use crate::error::{RdfXmlError, RdfXmlErrorKind}; +use crate::error::{ParseError, SyntaxError, SyntaxErrorKind}; use crate::utils::*; use oxilangtag::LanguageTag; use oxiri::{Iri, IriParseError}; @@ -146,9 +146,9 @@ pub struct FromReadRdfXmlReader { } impl Iterator for FromReadRdfXmlReader { - type Item = Result; + type Item = Result; - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option> { loop { if let Some(triple) = self.results.pop() { return Some(Ok(triple)); @@ -168,7 +168,7 @@ impl FromReadRdfXmlReader { self.reader.reader.buffer_position() } - fn parse_step(&mut self) -> Result<(), RdfXmlError> { + fn parse_step(&mut self) -> Result<(), ParseError> { self.reader_buffer.clear(); let event = self .reader @@ -295,7 +295,7 @@ struct RdfXmlReader { } impl RdfXmlReader { - fn parse_event(&mut self, event: Event, results: &mut Vec) -> Result<(), RdfXmlError> { + fn parse_event(&mut self, event: Event, results: &mut Vec) -> Result<(), ParseError> { match event { Event::Start(event) => self.parse_start_event(&event, results), Event::End(event) => self.parse_end_event(&event, results), @@ -306,9 +306,10 @@ impl RdfXmlReader { Event::Decl(decl) => { if let Some(encoding) = decl.encoding() { if !is_utf8(&encoding?) { - return Err(RdfXmlError::msg( + return Err(SyntaxError::msg( "Only UTF-8 is supported by the RDF/XML parser", - )); + ) + .into()); } } Ok(()) @@ -321,7 +322,7 @@ impl RdfXmlReader { } } - fn parse_doctype(&mut self, dt: &BytesText<'_>) -> Result<(), RdfXmlError> { + fn parse_doctype(&mut self, dt: &BytesText<'_>) -> Result<(), ParseError> { // we extract entities for input in self .reader @@ -333,20 +334,20 @@ impl RdfXmlReader { if let Some(input) = input.strip_prefix("!ENTITY") { let input = input.trim_start().strip_prefix('%').unwrap_or(input); let (entity_name, input) = input.trim_start().split_once(|c: char| c.is_ascii_whitespace()).ok_or_else(|| { - RdfXmlError::msg( + SyntaxError::msg( "').ok_or_else(|| { - RdfXmlError::msg("") + SyntaxError::msg("") })?; // Resolves custom entities within the current entity definition. @@ -363,7 +364,7 @@ impl RdfXmlReader { &mut self, event: &BytesStart<'_>, results: &mut Vec, - ) -> Result<(), RdfXmlError> { + ) -> Result<(), ParseError> { #[derive(PartialEq, Eq)] enum RdfXmlParseType { Default, @@ -425,15 +426,15 @@ impl RdfXmlReader { let tag = self.convert_attribute(&attribute)?; language = Some( LanguageTag::parse(tag.to_ascii_lowercase()) - .map_err(|error| RdfXmlError { - kind: RdfXmlErrorKind::InvalidLanguageTag { tag, error }, + .map_err(|error| SyntaxError { + inner: SyntaxErrorKind::InvalidLanguageTag { tag, error }, })? .into_inner(), ); } else if attribute.key.as_ref() == b"xml:base" { let iri = self.convert_attribute(&attribute)?; - base_iri = Some(Iri::parse(iri.clone()).map_err(|error| RdfXmlError { - kind: RdfXmlErrorKind::InvalidIri { iri, error }, + base_iri = Some(Iri::parse(iri.clone()).map_err(|error| SyntaxError { + inner: SyntaxErrorKind::InvalidIri { iri, error }, })?) } else { // We ignore other xml attributes @@ -443,28 +444,31 @@ impl RdfXmlReader { if *attribute_url == *RDF_ID { let mut id = self.convert_attribute(&attribute)?; if !is_nc_name(&id) { - return Err(RdfXmlError::msg(format!( + return Err(SyntaxError::msg(format!( "{} is not a valid rdf:ID value", &id - ))); + )) + .into()); } id.insert(0, '#'); id_attr = Some(id); } else if *attribute_url == *RDF_BAG_ID { let bag_id = self.convert_attribute(&attribute)?; if !is_nc_name(&bag_id) { - return Err(RdfXmlError::msg(format!( + return Err(SyntaxError::msg(format!( "{} is not a valid rdf:bagID value", &bag_id - ))); + )) + .into()); } } else if *attribute_url == *RDF_NODE_ID { let id = self.convert_attribute(&attribute)?; if !is_nc_name(&id) { - return Err(RdfXmlError::msg(format!( + return Err(SyntaxError::msg(format!( "{} is not a valid rdf:nodeID value", &id - ))); + )) + .into()); } node_id_attr = Some(BlankNode::new_unchecked(id)); } else if *attribute_url == *RDF_ABOUT { @@ -483,14 +487,15 @@ impl RdfXmlReader { } else if attribute_url == rdf::TYPE.as_str() { type_attr = Some(attribute); } else if RESERVED_RDF_ATTRIBUTES.contains(&&*attribute_url) { - return Err(RdfXmlError::msg(format!( + return Err(SyntaxError::msg(format!( "{} is not a valid attribute", &attribute_url - ))); + )) + .into()); } else { property_attrs.push(( - NamedNode::new(attribute_url.clone()).map_err(|error| RdfXmlError { - kind: RdfXmlErrorKind::InvalidIri { + NamedNode::new(attribute_url.clone()).map_err(|error| SyntaxError { + inner: SyntaxErrorKind::InvalidIri { iri: attribute_url, error, }, @@ -506,10 +511,11 @@ impl RdfXmlReader { Some(iri) => { let iri = resolve(&base_iri, iri)?; if self.known_rdf_id.contains(iri.as_str()) { - return Err(RdfXmlError::msg(format!( + return Err(SyntaxError::msg(format!( "{} has already been used as rdf:ID value", &iri - ))); + )) + .into()); } self.known_rdf_id.insert(iri.as_str().into()); Some(iri) @@ -547,9 +553,9 @@ impl RdfXmlReader { panic!("ParseTypeLiteralPropertyElt production children should never be considered as a RDF/XML content") } None => { - return Err(RdfXmlError::msg( - "No state in the stack: the XML is not balanced", - )); + return Err( + SyntaxError::msg("No state in the stack: the XML is not balanced").into(), + ); } }; @@ -558,14 +564,15 @@ impl RdfXmlReader { if *tag_name == *RDF_RDF { RdfXmlState::Rdf { base_iri, language } } else if RESERVED_RDF_ELEMENTS.contains(&&*tag_name) { - return Err(RdfXmlError::msg(format!( + return Err(SyntaxError::msg(format!( "Invalid node element tag name: {}", &tag_name - ))); + )) + .into()); } else { Self::build_node_elt( - NamedNode::new(tag_name.clone()).map_err(|error| RdfXmlError { - kind: RdfXmlErrorKind::InvalidIri { + NamedNode::new(tag_name.clone()).map_err(|error| SyntaxError { + inner: SyntaxErrorKind::InvalidIri { iri: tag_name, error, }, @@ -583,14 +590,15 @@ impl RdfXmlReader { } RdfXmlNextProduction::NodeElt => { if RESERVED_RDF_ELEMENTS.contains(&&*tag_name) { - return Err(RdfXmlError::msg(format!( + return Err(SyntaxError::msg(format!( "Invalid property element tag name: {}", &tag_name - ))); + )) + .into()); } Self::build_node_elt( - NamedNode::new(tag_name.clone()).map_err(|error| RdfXmlError { - kind: RdfXmlErrorKind::InvalidIri { + NamedNode::new(tag_name.clone()).map_err(|error| SyntaxError { + inner: SyntaxErrorKind::InvalidIri { iri: tag_name, error, }, @@ -608,10 +616,10 @@ impl RdfXmlReader { RdfXmlNextProduction::PropertyElt { subject } => { let iri = if *tag_name == *RDF_LI { let Some(RdfXmlState::NodeElt { li_counter, .. }) = self.state.last_mut() else { - return Err(RdfXmlError::msg(format!( + return Err(SyntaxError::msg(format!( "Invalid property element tag name: {}", &tag_name - ))); + )).into()); }; *li_counter += 1; NamedNode::new_unchecked(format!( @@ -621,13 +629,14 @@ impl RdfXmlReader { } else if RESERVED_RDF_ELEMENTS.contains(&&*tag_name) || *tag_name == *RDF_DESCRIPTION { - return Err(RdfXmlError::msg(format!( + return Err(SyntaxError::msg(format!( "Invalid property element tag name: {}", &tag_name - ))); + )) + .into()); } else { - NamedNode::new(tag_name.clone()).map_err(|error| RdfXmlError { - kind: RdfXmlErrorKind::InvalidIri { + NamedNode::new(tag_name.clone()).map_err(|error| SyntaxError { + inner: SyntaxErrorKind::InvalidIri { iri: tag_name, error, }, @@ -644,7 +653,7 @@ impl RdfXmlReader { (Some(resource_attr), None) => Subject::from(resource_attr), (None, Some(node_id_attr)) => node_id_attr.into(), (None, None) => BlankNode::default().into(), - (Some(_), Some(_)) => return Err(RdfXmlError::msg("Not both rdf:resource and rdf:nodeID could be set at the same time")) + (Some(_), Some(_)) => return Err(SyntaxError::msg("Not both rdf:resource and rdf:nodeID could be set at the same time").into()) }; Self::emit_property_attrs(&object, property_attrs, &language, results); if let Some(type_attr) = type_attr { @@ -711,7 +720,7 @@ impl RdfXmlReader { &mut self, event: &BytesEnd<'_>, results: &mut Vec, - ) -> Result<(), RdfXmlError> { + ) -> Result<(), ParseError> { //Literal case if self.in_literal_depth > 0 { if let Some(RdfXmlState::ParseTypeLiteralPropertyElt { writer, .. }) = @@ -731,7 +740,7 @@ impl RdfXmlReader { Ok(()) } - fn parse_text_event(&mut self, event: &BytesText<'_>) -> Result<(), RdfXmlError> { + fn parse_text_event(&mut self, event: &BytesText<'_>) -> Result<(), ParseError> { let text = event.unescape_with(|e| self.resolve_entity(e))?.to_string(); match self.state.last_mut() { Some(RdfXmlState::PropertyElt { object, .. }) => { @@ -748,21 +757,18 @@ impl RdfXmlReader { if event.iter().copied().all(is_whitespace) { Ok(()) } else { - Err(RdfXmlError::msg(format!( - "Unexpected text event: '{}'", - text - ))) + Err(SyntaxError::msg(format!("Unexpected text event: '{}'", text)).into()) } } } } - fn resolve_tag_name(&self, qname: QName<'_>) -> Result { + fn resolve_tag_name(&self, qname: QName<'_>) -> Result { let (namespace, local_name) = self.reader.resolve_element(qname); self.resolve_ns_name(namespace, local_name) } - fn resolve_attribute_name(&self, qname: QName<'_>) -> Result { + fn resolve_attribute_name(&self, qname: QName<'_>) -> Result { let (namespace, local_name) = self.reader.resolve_attribute(qname); self.resolve_ns_name(namespace, local_name) } @@ -771,7 +777,7 @@ impl RdfXmlReader { &self, namespace: ResolveResult, local_name: LocalName<'_>, - ) -> Result { + ) -> Result { match namespace { ResolveResult::Bound(ns) => { let mut value = Vec::with_capacity(ns.as_ref().len() + local_name.as_ref().len()); @@ -784,12 +790,13 @@ impl RdfXmlReader { .to_string()) } ResolveResult::Unbound => { - Err(RdfXmlError::msg("XML namespaces are required in RDF/XML")) + Err(SyntaxError::msg("XML namespaces are required in RDF/XML").into()) } - ResolveResult::Unknown(v) => Err(RdfXmlError::msg(format!( + ResolveResult::Unknown(v) => Err(SyntaxError::msg(format!( "Unknown prefix {}:", self.reader.decoder().decode(&v)? - ))), + )) + .into()), } } @@ -804,24 +811,24 @@ impl RdfXmlReader { type_attr: Option, property_attrs: Vec<(NamedNode, String)>, results: &mut Vec, - ) -> Result { + ) -> Result { let subject = match (id_attr, node_id_attr, about_attr) { (Some(id_attr), None, None) => Subject::from(id_attr), (None, Some(node_id_attr), None) => node_id_attr.into(), (None, None, Some(about_attr)) => about_attr.into(), (None, None, None) => BlankNode::default().into(), (Some(_), Some(_), _) => { - return Err(RdfXmlError::msg( + return Err(SyntaxError::msg( "Not both rdf:ID and rdf:nodeID could be set at the same time", )) } (_, Some(_), Some(_)) => { - return Err(RdfXmlError::msg( + return Err(SyntaxError::msg( "Not both rdf:nodeID and rdf:resource could be set at the same time", )) } (Some(_), _, Some(_)) => { - return Err(RdfXmlError::msg( + return Err(SyntaxError::msg( "Not both rdf:ID and rdf:resource could be set at the same time", )) } @@ -870,7 +877,7 @@ impl RdfXmlReader { &mut self, state: RdfXmlState, results: &mut Vec, - ) -> Result<(), RdfXmlError> { + ) -> Result<(), SyntaxError> { match state { RdfXmlState::PropertyElt { iri, @@ -925,7 +932,7 @@ impl RdfXmlReader { if emit { let object = writer.into_inner(); if object.is_empty() { - return Err(RdfXmlError::msg(format!( + return Err(SyntaxError::msg(format!( "No value found for rdf:XMLLiteral value of property {}", iri ))); @@ -935,7 +942,7 @@ impl RdfXmlReader { iri, Literal::new_typed_literal( str::from_utf8(&object).map_err(|_| { - RdfXmlError::msg("The XML literal is not in valid UTF-8".to_owned()) + SyntaxError::msg("The XML literal is not in valid UTF-8".to_owned()) })?, rdf::XML_LITERAL, ), @@ -1008,7 +1015,7 @@ impl RdfXmlReader { } } - fn convert_attribute(&self, attribute: &Attribute) -> Result { + fn convert_attribute(&self, attribute: &Attribute) -> Result { Ok(attribute .decode_and_unescape_value_with(&self.reader, |e| self.resolve_entity(e))? .to_string()) @@ -1018,8 +1025,8 @@ impl RdfXmlReader { &self, base_iri: &Option>, attribute: &Attribute<'_>, - ) -> Result { - resolve(base_iri, self.convert_attribute(attribute)?) + ) -> Result { + Ok(resolve(base_iri, self.convert_attribute(attribute)?)?) } fn resolve_entity(&self, e: &str) -> Option<&str> { @@ -1027,13 +1034,13 @@ impl RdfXmlReader { } } -fn resolve(base_iri: &Option>, relative_iri: String) -> Result { +fn resolve(base_iri: &Option>, relative_iri: String) -> Result { if let Some(base_iri) = base_iri { Ok(NamedNode::new_unchecked( base_iri .resolve(&relative_iri) - .map_err(|error| RdfXmlError { - kind: RdfXmlErrorKind::InvalidIri { + .map_err(|error| SyntaxError { + inner: SyntaxErrorKind::InvalidIri { iri: relative_iri, error, }, @@ -1041,8 +1048,8 @@ fn resolve(base_iri: &Option>, relative_iri: String) -> Result { } impl Iterator for FromReadN3Reader { - type Item = Result; + type Item = Result; - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option> { self.inner.next() } } @@ -406,7 +406,7 @@ impl LowLevelN3Reader { /// /// Returns [`None`] if the parsing is finished or more data is required. /// If it is the case more data should be fed using [`extend_from_slice`](Self::extend_from_slice). - pub fn read_next(&mut self) -> Option> { + pub fn read_next(&mut self) -> Option> { self.parser.read_next() } } diff --git a/lib/oxttl/src/nquads.rs b/lib/oxttl/src/nquads.rs index ce8c3a2a..4414d3fc 100644 --- a/lib/oxttl/src/nquads.rs +++ b/lib/oxttl/src/nquads.rs @@ -1,7 +1,7 @@ //! A [N-Quads](https://www.w3.org/TR/n-quads/) streaming parser implemented by [`NQuadsParser`]. use crate::line_formats::NQuadsRecognizer; -use crate::toolkit::{FromReadIterator, ParseError, ParseOrIoError, Parser}; +use crate::toolkit::{FromReadIterator, ParseError, Parser, SyntaxError}; use oxrdf::{Quad, QuadRef}; use std::io::{self, Read, Write}; @@ -157,9 +157,9 @@ pub struct FromReadNQuadsReader { } impl Iterator for FromReadNQuadsReader { - type Item = Result; + type Item = Result; - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option> { self.inner.next() } } @@ -226,7 +226,7 @@ impl LowLevelNQuadsReader { /// /// Returns [`None`] if the parsing is finished or more data is required. /// If it is the case more data should be fed using [`extend_from_slice`](Self::extend_from_slice). - pub fn read_next(&mut self) -> Option> { + pub fn read_next(&mut self) -> Option> { self.parser.read_next() } } diff --git a/lib/oxttl/src/ntriples.rs b/lib/oxttl/src/ntriples.rs index daaa2d5c..cac8cd22 100644 --- a/lib/oxttl/src/ntriples.rs +++ b/lib/oxttl/src/ntriples.rs @@ -2,7 +2,7 @@ //! and a serializer implemented by [`NTriplesSerializer`]. use crate::line_formats::NQuadsRecognizer; -use crate::toolkit::{FromReadIterator, ParseError, ParseOrIoError, Parser}; +use crate::toolkit::{FromReadIterator, ParseError, Parser, SyntaxError}; use oxrdf::{Triple, TripleRef}; use std::io::{self, Read, Write}; @@ -158,9 +158,9 @@ pub struct FromReadNTriplesReader { } impl Iterator for FromReadNTriplesReader { - type Item = Result; + type Item = Result; - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option> { Some(self.inner.next()?.map(Into::into)) } } @@ -227,7 +227,7 @@ impl LowLevelNTriplesReader { /// /// Returns [`None`] if the parsing is finished or more data is required. /// If it is the case more data should be fed using [`extend_from_slice`](Self::extend_from_slice). - pub fn read_next(&mut self) -> Option> { + pub fn read_next(&mut self) -> Option> { Some(self.parser.read_next()?.map(Into::into)) } } diff --git a/lib/oxttl/src/toolkit/lexer.rs b/lib/oxttl/src/toolkit/lexer.rs index 26906386..3510ab28 100644 --- a/lib/oxttl/src/toolkit/lexer.rs +++ b/lib/oxttl/src/toolkit/lexer.rs @@ -92,6 +92,7 @@ impl Lexer { self.end = self.data.len(); } + #[inline] pub fn end(&mut self) { self.is_ending = true; } diff --git a/lib/oxttl/src/toolkit/mod.rs b/lib/oxttl/src/toolkit/mod.rs index 986504f9..39f9d40d 100644 --- a/lib/oxttl/src/toolkit/mod.rs +++ b/lib/oxttl/src/toolkit/mod.rs @@ -7,5 +7,5 @@ mod parser; pub use self::lexer::{Lexer, LexerError, TokenRecognizer, TokenRecognizerError}; pub use self::parser::{ - FromReadIterator, ParseError, ParseOrIoError, Parser, RuleRecognizer, RuleRecognizerError, + FromReadIterator, ParseError, Parser, RuleRecognizer, RuleRecognizerError, SyntaxError, }; diff --git a/lib/oxttl/src/toolkit/parser.rs b/lib/oxttl/src/toolkit/parser.rs index a02afec4..44c01d5a 100644 --- a/lib/oxttl/src/toolkit/parser.rs +++ b/lib/oxttl/src/toolkit/parser.rs @@ -60,18 +60,20 @@ impl Parser { self.lexer.extend_from_slice(other) } + #[inline] pub fn end(&mut self) { self.lexer.end() } + #[inline] pub fn is_end(&self) -> bool { self.state.is_none() && self.results.is_empty() && self.errors.is_empty() } - pub fn read_next(&mut self) -> Option> { + pub fn read_next(&mut self) -> Option> { loop { if let Some(error) = self.errors.pop() { - return Some(Err(ParseError { + return Some(Err(SyntaxError { position: self.position.clone(), message: error.message, })); @@ -114,33 +116,37 @@ impl Parser { } } -/// An error from parsing. +/// An error in the syntax of the parsed file. /// /// It is composed of a message and a byte range in the input. #[derive(Debug)] -pub struct ParseError { +pub struct SyntaxError { position: Range, message: String, } -impl ParseError { +impl SyntaxError { /// The invalid byte range in the input. + #[inline] pub fn position(&self) -> Range { self.position.clone() } /// The error message. + #[inline] pub fn message(&self) -> &str { &self.message } /// Converts this error to an error message. + #[inline] pub fn into_message(self) -> String { self.message } } -impl fmt::Display for ParseError { +impl fmt::Display for SyntaxError { + #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { if self.position.start + 1 == self.position.end { write!( @@ -158,15 +164,17 @@ impl fmt::Display for ParseError { } } -impl Error for ParseError {} +impl Error for SyntaxError {} -impl From for io::Error { - fn from(error: ParseError) -> Self { +impl From for io::Error { + #[inline] + fn from(error: SyntaxError) -> Self { io::Error::new(io::ErrorKind::InvalidData, error) } } -impl From for ParseError { +impl From for SyntaxError { + #[inline] fn from(e: LexerError) -> Self { Self { position: e.position(), @@ -175,48 +183,57 @@ impl From for ParseError { } } -/// The union of [`ParseError`] and [`std::io::Error`]. +/// A parsing error. +/// +/// It is the union of [`SyntaxError`] and [`std::io::Error`]. #[derive(Debug)] -pub enum ParseOrIoError { - Parse(ParseError), +pub enum ParseError { + /// I/O error during parsing (file not found...). Io(io::Error), + /// An error in the file syntax. + Syntax(SyntaxError), } -impl fmt::Display for ParseOrIoError { +impl fmt::Display for ParseError { + #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - Self::Parse(e) => e.fmt(f), Self::Io(e) => e.fmt(f), + Self::Syntax(e) => e.fmt(f), } } } -impl Error for ParseOrIoError { +impl Error for ParseError { + #[inline] fn source(&self) -> Option<&(dyn Error + 'static)> { Some(match self { - Self::Parse(e) => e, Self::Io(e) => e, + Self::Syntax(e) => e, }) } } -impl From for ParseOrIoError { - fn from(error: ParseError) -> Self { - Self::Parse(error) +impl From for ParseError { + #[inline] + fn from(error: SyntaxError) -> Self { + Self::Syntax(error) } } -impl From for ParseOrIoError { +impl From for ParseError { + #[inline] fn from(error: io::Error) -> Self { Self::Io(error) } } -impl From for io::Error { - fn from(error: ParseOrIoError) -> Self { +impl From for io::Error { + #[inline] + fn from(error: ParseError) -> Self { match error { - ParseOrIoError::Parse(e) => e.into(), - ParseOrIoError::Io(e) => e, + ParseError::Syntax(e) => e.into(), + ParseError::Io(e) => e, } } } @@ -227,12 +244,12 @@ pub struct FromReadIterator { } impl Iterator for FromReadIterator { - type Item = Result; + type Item = Result; fn next(&mut self) -> Option { while !self.parser.is_end() { if let Some(result) = self.parser.read_next() { - return Some(result.map_err(ParseOrIoError::Parse)); + return Some(result.map_err(ParseError::Syntax)); } if let Err(e) = self.parser.lexer.extend_from_read(&mut self.read) { return Some(Err(e.into())); diff --git a/lib/oxttl/src/trig.rs b/lib/oxttl/src/trig.rs index 311b8f96..62fc331f 100644 --- a/lib/oxttl/src/trig.rs +++ b/lib/oxttl/src/trig.rs @@ -1,7 +1,7 @@ //! A [TriG](https://www.w3.org/TR/trig/) streaming parser implemented by [`TriGParser`]. use crate::terse::TriGRecognizer; -use crate::toolkit::{FromReadIterator, ParseError, ParseOrIoError, Parser}; +use crate::toolkit::{FromReadIterator, ParseError, Parser, SyntaxError}; use oxiri::{Iri, IriParseError}; use oxrdf::{vocab::xsd, GraphName, NamedNode, Quad, QuadRef, Subject, TermRef}; use std::collections::HashMap; @@ -186,9 +186,9 @@ pub struct FromReadTriGReader { } impl Iterator for FromReadTriGReader { - type Item = Result; + type Item = Result; - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option> { self.inner.next() } } @@ -255,7 +255,7 @@ impl LowLevelTriGReader { /// /// Returns [`None`] if the parsing is finished or more data is required. /// If it is the case more data should be fed using [`extend_from_slice`](Self::extend_from_slice). - pub fn read_next(&mut self) -> Option> { + pub fn read_next(&mut self) -> Option> { self.parser.read_next() } } diff --git a/lib/oxttl/src/turtle.rs b/lib/oxttl/src/turtle.rs index 65967613..3b5639c0 100644 --- a/lib/oxttl/src/turtle.rs +++ b/lib/oxttl/src/turtle.rs @@ -1,7 +1,7 @@ //! A [Turtle](https://www.w3.org/TR/turtle/) streaming parser implemented by [`TurtleParser`]. use crate::terse::TriGRecognizer; -use crate::toolkit::{FromReadIterator, ParseError, ParseOrIoError, Parser}; +use crate::toolkit::{FromReadIterator, ParseError, Parser, SyntaxError}; use crate::trig::{LowLevelTriGWriter, ToWriteTriGWriter}; use crate::TriGSerializer; use oxiri::{Iri, IriParseError}; @@ -187,9 +187,9 @@ pub struct FromReadTurtleReader { } impl Iterator for FromReadTurtleReader { - type Item = Result; + type Item = Result; - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option> { Some(self.inner.next()?.map(Into::into)) } } @@ -256,7 +256,7 @@ impl LowLevelTurtleReader { /// /// Returns [`None`] if the parsing is finished or more data is required. /// If it is the case more data should be fed using [`extend_from_slice`](Self::extend_from_slice). - pub fn read_next(&mut self) -> Option> { + pub fn read_next(&mut self) -> Option> { Some(self.parser.read_next()?.map(Into::into)) } } diff --git a/lib/sparesults/src/error.rs b/lib/sparesults/src/error.rs index 24ed05fa..fbb45728 100644 --- a/lib/sparesults/src/error.rs +++ b/lib/sparesults/src/error.rs @@ -1,5 +1,6 @@ use oxrdf::TermParseError; use std::error::Error; +use std::sync::Arc; use std::{fmt, io}; /// Error returned during SPARQL result formats format parsing. @@ -59,7 +60,10 @@ impl From for ParseError { #[inline] fn from(error: quick_xml::Error) -> Self { match error { - quick_xml::Error::Io(error) => Self::Io(io::Error::new(error.kind(), error)), + quick_xml::Error::Io(error) => Self::Io(match Arc::try_unwrap(error) { + Ok(error) => error, + Err(error) => io::Error::new(error.kind(), error), + }), error => Self::Syntax(SyntaxError { inner: SyntaxErrorKind::Xml(error), }), @@ -117,7 +121,10 @@ impl From for io::Error { fn from(error: SyntaxError) -> Self { match error.inner { SyntaxErrorKind::Xml(error) => match error { - quick_xml::Error::Io(error) => Self::new(error.kind(), error), + quick_xml::Error::Io(error) => match Arc::try_unwrap(error) { + Ok(error) => error, + Err(error) => Self::new(error.kind(), error), + }, quick_xml::Error::UnexpectedEof(error) => { Self::new(io::ErrorKind::UnexpectedEof, error) } diff --git a/lib/sparesults/src/xml.rs b/lib/sparesults/src/xml.rs index dbc17242..f7e0bb41 100644 --- a/lib/sparesults/src/xml.rs +++ b/lib/sparesults/src/xml.rs @@ -10,6 +10,7 @@ use std::borrow::Cow; use std::collections::BTreeMap; use std::io::{self, BufRead, Write}; use std::str; +use std::sync::Arc; pub fn write_boolean_xml_result(sink: W, value: bool) -> io::Result { do_write_boolean_xml_result(sink, value).map_err(map_xml_error) @@ -604,7 +605,10 @@ fn decode<'a, T>( fn map_xml_error(error: quick_xml::Error) -> io::Error { match error { - quick_xml::Error::Io(error) => io::Error::new(error.kind(), error), + quick_xml::Error::Io(error) => match Arc::try_unwrap(error) { + Ok(error) => error, + Err(error) => io::Error::new(error.kind(), error), + }, quick_xml::Error::UnexpectedEof(_) => io::Error::new(io::ErrorKind::UnexpectedEof, error), _ => io::Error::new(io::ErrorKind::InvalidData, error), } diff --git a/lib/src/io/error.rs b/lib/src/io/error.rs index 6d49148d..bea22d8e 100644 --- a/lib/src/io/error.rs +++ b/lib/src/io/error.rs @@ -1,5 +1,4 @@ use oxiri::IriParseError; -use oxrdfxml::RdfXmlError; use std::error::Error; use std::{fmt, io}; @@ -44,39 +43,40 @@ impl Error for ParseError { } } -impl From for ParseError { +impl From for SyntaxError { #[inline] - fn from(error: oxttl::ParseError) -> Self { - Self::Syntax(SyntaxError { + fn from(error: oxttl::SyntaxError) -> Self { + SyntaxError { inner: SyntaxErrorKind::Turtle(error), - }) + } } } -impl From for ParseError { +impl From for ParseError { #[inline] - fn from(error: oxttl::ParseOrIoError) -> Self { + fn from(error: oxttl::ParseError) -> Self { match error { - oxttl::ParseOrIoError::Parse(e) => e.into(), - oxttl::ParseOrIoError::Io(e) => e.into(), + oxttl::ParseError::Syntax(e) => Self::Syntax(e.into()), + oxttl::ParseError::Io(e) => Self::Io(e), } } } -#[allow(clippy::fallible_impl_from)] -impl From for ParseError { +impl From for SyntaxError { #[inline] - fn from(error: RdfXmlError) -> Self { - let error = io::Error::from(error); - if error.get_ref().map_or( - false, - <(dyn Error + Send + Sync + 'static)>::is::, - ) { - Self::Syntax(SyntaxError { - inner: SyntaxErrorKind::RdfXml(*error.into_inner().unwrap().downcast().unwrap()), - }) - } else { - Self::Io(error) + fn from(error: oxrdfxml::SyntaxError) -> Self { + SyntaxError { + inner: SyntaxErrorKind::RdfXml(error), + } + } +} + +impl From for ParseError { + #[inline] + fn from(error: oxrdfxml::ParseError) -> Self { + match error { + oxrdfxml::ParseError::Syntax(e) => Self::Syntax(e.into()), + oxrdfxml::ParseError::Io(e) => Self::Io(e), } } } @@ -113,8 +113,8 @@ pub struct SyntaxError { #[derive(Debug)] enum SyntaxErrorKind { - Turtle(oxttl::ParseError), - RdfXml(RdfXmlError), + Turtle(oxttl::SyntaxError), + RdfXml(oxrdfxml::SyntaxError), InvalidBaseIri { iri: String, error: IriParseError }, }