From ec5c7c86be7f6380b78e810c9f62dd5a1f7b12a2 Mon Sep 17 00:00:00 2001 From: Tpt Date: Sun, 9 Jan 2022 21:01:45 +0100 Subject: [PATCH] Improves RDF I/O ParseError --- lib/src/io/error.rs | 140 +++++++++++++++++++++++++++++++ lib/src/io/mod.rs | 1 + lib/src/io/read.rs | 174 +++------------------------------------ lib/src/sparql/error.rs | 16 ++-- lib/src/sparql/update.rs | 4 +- lib/src/storage/error.rs | 16 ++-- lib/src/store.rs | 10 +-- python/src/io.rs | 16 ++-- python/src/sparql.rs | 4 +- python/src/store.rs | 4 +- 10 files changed, 189 insertions(+), 196 deletions(-) create mode 100644 lib/src/io/error.rs diff --git a/lib/src/io/error.rs b/lib/src/io/error.rs new file mode 100644 index 00000000..8faa4b73 --- /dev/null +++ b/lib/src/io/error.rs @@ -0,0 +1,140 @@ +use oxiri::IriParseError; +use rio_turtle::TurtleError; +use rio_xml::RdfXmlError; +use std::error::Error; +use std::{fmt, io}; + +/// Error returned during RDF format parsing. +#[derive(Debug)] +pub enum ParseError { + /// I/O error during parsing (file not found...). + Io(io::Error), + /// An error in the file syntax. + Syntax(SyntaxError), +} + +impl ParseError { + pub(crate) fn invalid_base_iri(iri: &str, error: IriParseError) -> Self { + Self::Syntax(SyntaxError { + inner: SyntaxErrorKind::InvalidBaseIri { + iri: iri.to_owned(), + error, + }, + }) + } +} + +impl fmt::Display for ParseError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Io(e) => e.fmt(f), + Self::Syntax(e) => e.fmt(f), + } + } +} + +impl Error for ParseError { + fn source(&self) -> Option<&(dyn Error + 'static)> { + match self { + Self::Io(e) => Some(e), + Self::Syntax(e) => Some(e), + } + } +} + +#[allow(clippy::fallible_impl_from)] +impl From for ParseError { + fn from(error: TurtleError) -> Self { + let error = io::Error::from(error); + if error.get_ref().map_or(false, |e| e.is::()) { + Self::Syntax(SyntaxError { + inner: SyntaxErrorKind::Turtle(*error.into_inner().unwrap().downcast().unwrap()), + }) + } else { + Self::Io(error) + } + } +} + +#[allow(clippy::fallible_impl_from)] +impl From for ParseError { + fn from(error: RdfXmlError) -> Self { + let error = io::Error::from(error); + if error.get_ref().map_or(false, |e| e.is::()) { + Self::Syntax(SyntaxError { + inner: SyntaxErrorKind::RdfXml(*error.into_inner().unwrap().downcast().unwrap()), + }) + } else { + Self::Io(error) + } + } +} + +impl From for ParseError { + fn from(error: io::Error) -> Self { + Self::Io(error) + } +} + +impl From for ParseError { + fn from(error: SyntaxError) -> Self { + Self::Syntax(error) + } +} + +impl From for io::Error { + fn from(error: ParseError) -> Self { + match error { + ParseError::Io(error) => error, + ParseError::Syntax(error) => error.into(), + } + } +} + +/// An error in the syntax of the parsed file. +#[derive(Debug)] +pub struct SyntaxError { + inner: SyntaxErrorKind, +} + +#[derive(Debug)] +enum SyntaxErrorKind { + Turtle(TurtleError), + RdfXml(RdfXmlError), + InvalidBaseIri { iri: String, error: IriParseError }, +} + +impl fmt::Display for SyntaxError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self.inner { + SyntaxErrorKind::Turtle(e) => e.fmt(f), + SyntaxErrorKind::RdfXml(e) => e.fmt(f), + SyntaxErrorKind::InvalidBaseIri { iri, error } => { + write!(f, "Invalid base IRI '{}': {}", iri, error) + } + } + } +} + +impl Error for SyntaxError { + fn source(&self) -> Option<&(dyn Error + 'static)> { + match &self.inner { + SyntaxErrorKind::Turtle(e) => Some(e), + SyntaxErrorKind::RdfXml(e) => Some(e), + SyntaxErrorKind::InvalidBaseIri { .. } => None, + } + } +} + +impl From for io::Error { + fn from(error: SyntaxError) -> Self { + match error.inner { + SyntaxErrorKind::Turtle(error) => error.into(), + SyntaxErrorKind::RdfXml(error) => error.into(), + SyntaxErrorKind::InvalidBaseIri { iri, error } => Self::new( + io::ErrorKind::InvalidInput, + format!("Invalid IRI '{}': {}", iri, error), + ), + } + } +} diff --git a/lib/src/io/mod.rs b/lib/src/io/mod.rs index 884e53b7..6c2c49b4 100644 --- a/lib/src/io/mod.rs +++ b/lib/src/io/mod.rs @@ -1,5 +1,6 @@ //! Utilities to read and write RDF graphs and datasets. +mod error; mod format; pub mod read; pub mod write; diff --git a/lib/src/io/read.rs b/lib/src/io/read.rs index c457a71a..dc82e2c0 100644 --- a/lib/src/io/read.rs +++ b/lib/src/io/read.rs @@ -1,16 +1,15 @@ //! Utilities to read RDF graphs and datasets. +pub use crate::io::error::{ParseError, SyntaxError}; use crate::io::{DatasetFormat, GraphFormat}; use crate::model::*; use oxiri::{Iri, IriParseError}; use rio_api::model as rio; use rio_api::parser::{QuadsParser, TriplesParser}; -use rio_turtle::{NQuadsParser, NTriplesParser, TriGParser, TurtleError, TurtleParser}; -use rio_xml::{RdfXmlError, RdfXmlParser}; +use rio_turtle::{NQuadsParser, NTriplesParser, TriGParser, TurtleParser}; +use rio_xml::RdfXmlParser; use std::collections::HashMap; -use std::error::Error; use std::io::BufRead; -use std::{fmt, io}; /// Parsers for RDF graph serialization formats. /// @@ -68,7 +67,7 @@ impl GraphParser { /// Executes the parsing itself on a [`BufRead`](std::io::BufRead) implementation and returns an iterator of triples. #[allow(clippy::unnecessary_wraps)] - pub fn read_triples(&self, reader: R) -> Result, ParserError> { + pub fn read_triples(&self, reader: R) -> Result, ParseError> { Ok(TripleReader { mapper: RioMapper::default(), parser: match self.format { @@ -115,9 +114,9 @@ enum TripleReaderKind { } impl Iterator for TripleReader { - type Item = Result; + type Item = Result; - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option> { loop { if let Some(r) = self.buffer.pop() { return Some(Ok(r)); @@ -145,9 +144,9 @@ impl TripleReader { parser: &mut P, buffer: &mut Vec, mapper: &mut RioMapper, - ) -> Option> + ) -> Option> where - ParserError: From, + ParseError: From, { if parser.is_end() { None @@ -217,7 +216,7 @@ impl DatasetParser { /// Executes the parsing itself on a [`BufRead`](std::io::BufRead) implementation and returns an iterator of quads. #[allow(clippy::unnecessary_wraps)] - pub fn read_quads(&self, reader: R) -> Result, ParserError> { + pub fn read_quads(&self, reader: R) -> Result, ParseError> { Ok(QuadReader { mapper: RioMapper::default(), parser: match self.format { @@ -260,9 +259,9 @@ enum QuadReaderKind { } impl Iterator for QuadReader { - type Item = Result; + type Item = Result; - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option> { loop { if let Some(r) = self.buffer.pop() { return Some(Ok(r)); @@ -287,9 +286,9 @@ impl QuadReader { parser: &mut P, buffer: &mut Vec, mapper: &mut RioMapper, - ) -> Option> + ) -> Option> where - ParserError: From, + ParseError: From, { if parser.is_end() { None @@ -375,150 +374,3 @@ impl<'a> RioMapper { } } } - -/// Error returned during RDF format parsing. -#[derive(Debug)] -pub enum ParserError { - /// I/O error during parsing (file not found...). - Io(io::Error), - /// An error in the file syntax. - Syntax(SyntaxError), -} - -impl ParserError { - pub(crate) fn invalid_base_iri(iri: &str, error: IriParseError) -> Self { - Self::Syntax(SyntaxError { - inner: SyntaxErrorKind::InvalidBaseIri { - iri: iri.to_owned(), - error, - }, - }) - } -} - -impl fmt::Display for ParserError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Io(e) => e.fmt(f), - Self::Syntax(e) => e.fmt(f), - } - } -} - -impl Error for ParserError { - fn source(&self) -> Option<&(dyn Error + 'static)> { - match self { - Self::Io(e) => Some(e), - Self::Syntax(e) => Some(e), - } - } -} - -#[allow(clippy::fallible_impl_from)] -impl From for ParserError { - fn from(error: TurtleError) -> Self { - let error = io::Error::from(error); - if error.get_ref().map_or(false, |e| e.is::()) { - Self::Syntax(SyntaxError { - inner: SyntaxErrorKind::Turtle(*error.into_inner().unwrap().downcast().unwrap()), - }) - } else { - Self::Io(error) - } - } -} - -#[allow(clippy::fallible_impl_from)] -impl From for ParserError { - fn from(error: RdfXmlError) -> Self { - let error = io::Error::from(error); - if error.get_ref().map_or(false, |e| e.is::()) { - Self::Syntax(SyntaxError { - inner: SyntaxErrorKind::RdfXml(*error.into_inner().unwrap().downcast().unwrap()), - }) - } else { - Self::Io(error) - } - } -} - -impl From for ParserError { - fn from(error: TermParseError) -> Self { - Self::Syntax(SyntaxError { - inner: SyntaxErrorKind::Term(error), - }) - } -} - -impl From for ParserError { - fn from(error: io::Error) -> Self { - Self::Io(error) - } -} - -impl From for ParserError { - fn from(error: SyntaxError) -> Self { - Self::Syntax(error) - } -} - -impl From for io::Error { - fn from(error: ParserError) -> Self { - match error { - ParserError::Io(error) => error, - ParserError::Syntax(error) => error.into(), - } - } -} - -/// An error in the syntax of the parsed file. -#[derive(Debug)] -pub struct SyntaxError { - pub(crate) inner: SyntaxErrorKind, -} - -#[derive(Debug)] -pub(crate) enum SyntaxErrorKind { - Turtle(TurtleError), - RdfXml(RdfXmlError), - InvalidBaseIri { iri: String, error: IriParseError }, - Term(TermParseError), -} - -impl fmt::Display for SyntaxError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match &self.inner { - SyntaxErrorKind::Turtle(e) => e.fmt(f), - SyntaxErrorKind::RdfXml(e) => e.fmt(f), - SyntaxErrorKind::InvalidBaseIri { iri, error } => { - write!(f, "Invalid base IRI '{}': {}", iri, error) - } - SyntaxErrorKind::Term(e) => e.fmt(f), - } - } -} - -impl Error for SyntaxError { - fn source(&self) -> Option<&(dyn Error + 'static)> { - match &self.inner { - SyntaxErrorKind::Turtle(e) => Some(e), - SyntaxErrorKind::RdfXml(e) => Some(e), - SyntaxErrorKind::Term(e) => Some(e), - SyntaxErrorKind::InvalidBaseIri { .. } => None, - } - } -} - -impl From for io::Error { - fn from(error: SyntaxError) -> Self { - match error.inner { - SyntaxErrorKind::Turtle(error) => error.into(), - SyntaxErrorKind::RdfXml(error) => error.into(), - SyntaxErrorKind::InvalidBaseIri { iri, error } => Self::new( - io::ErrorKind::InvalidInput, - format!("Invalid IRI '{}': {}", iri, error), - ), - SyntaxErrorKind::Term(error) => Self::new(io::ErrorKind::InvalidData, error), - } - } -} diff --git a/lib/src/sparql/error.rs b/lib/src/sparql/error.rs index b5bb3910..3083dc46 100644 --- a/lib/src/sparql/error.rs +++ b/lib/src/sparql/error.rs @@ -1,4 +1,4 @@ -use crate::io::read::ParserError; +use crate::io::read::ParseError; use crate::storage::StorageError; use std::convert::Infallible; use std::error; @@ -14,7 +14,7 @@ pub enum EvaluationError { /// An error from the storage Storage(StorageError), /// An error while parsing an external RDF file - ExternalParser(ParserError), + GraphParsing(ParseError), /// An error while parsing an external result file (likely from a federated query) ResultsParsing(sparesults::ParseError), /// An error returned during store IOs or during results write @@ -39,7 +39,7 @@ impl fmt::Display for EvaluationError { match self { Self::Parsing(error) => error.fmt(f), Self::Storage(error) => error.fmt(f), - Self::ExternalParser(error) => error.fmt(f), + Self::GraphParsing(error) => error.fmt(f), Self::ResultsParsing(error) => error.fmt(f), Self::Io(error) => error.fmt(f), Self::Query(error) => error.fmt(f), @@ -61,7 +61,7 @@ impl error::Error for EvaluationError { match self { Self::Parsing(e) => Some(e), Self::Storage(e) => Some(e), - Self::ExternalParser(e) => Some(e), + Self::GraphParsing(e) => Some(e), Self::ResultsParsing(e) => Some(e), Self::Io(e) => Some(e), Self::Query(e) => Some(e), @@ -118,9 +118,9 @@ impl From for EvaluationError { } } -impl From for EvaluationError { - fn from(error: ParserError) -> Self { - Self::ExternalParser(error) +impl From for EvaluationError { + fn from(error: ParseError) -> Self { + Self::GraphParsing(error) } } @@ -134,7 +134,7 @@ impl From for io::Error { fn from(error: EvaluationError) -> Self { match error { EvaluationError::Parsing(error) => Self::new(io::ErrorKind::InvalidData, error), - EvaluationError::ExternalParser(error) => error.into(), + EvaluationError::GraphParsing(error) => error.into(), EvaluationError::ResultsParsing(error) => error.into(), EvaluationError::Io(error) => error, EvaluationError::Storage(error) => error.into(), diff --git a/lib/src/sparql/update.rs b/lib/src/sparql/update.rs index 1f0c10f8..19e141b1 100644 --- a/lib/src/sparql/update.rs +++ b/lib/src/sparql/update.rs @@ -1,4 +1,4 @@ -use crate::io::read::ParserError; +use crate::io::read::ParseError; use crate::io::{GraphFormat, GraphParser}; use crate::model::{GraphName as OxGraphName, GraphNameRef, Quad as OxQuad}; use crate::sparql::algebra::QueryDataset; @@ -172,7 +172,7 @@ impl SimpleUpdateEvaluator<'_> { if let Some(base_iri) = &self.base_iri { parser = parser .with_base_iri(base_iri.as_str()) - .map_err(|e| ParserError::invalid_base_iri(base_iri, e))?; + .map_err(|e| ParseError::invalid_base_iri(base_iri, e))?; } for t in parser.read_triples(BufReader::new(body))? { self.transaction diff --git a/lib/src/storage/error.rs b/lib/src/storage/error.rs index de875601..ffd3a702 100644 --- a/lib/src/storage/error.rs +++ b/lib/src/storage/error.rs @@ -1,4 +1,4 @@ -use crate::io::read::ParserError; +use crate::io::read::ParseError; use std::error::Error; use std::fmt; use std::io; @@ -113,7 +113,7 @@ impl From for io::Error { #[derive(Debug)] pub enum LoaderError { /// An error raised while reading the file. - Parser(ParserError), + Parsing(ParseError), /// An error raised during the insertion in the store. Storage(StorageError), } @@ -121,7 +121,7 @@ pub enum LoaderError { impl fmt::Display for LoaderError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - Self::Parser(e) => e.fmt(f), + Self::Parsing(e) => e.fmt(f), Self::Storage(e) => e.fmt(f), } } @@ -130,15 +130,15 @@ impl fmt::Display for LoaderError { impl Error for LoaderError { fn source(&self) -> Option<&(dyn Error + 'static)> { match self { - Self::Parser(e) => Some(e), + Self::Parsing(e) => Some(e), Self::Storage(e) => Some(e), } } } -impl From for LoaderError { - fn from(error: ParserError) -> Self { - Self::Parser(error) +impl From for LoaderError { + fn from(error: ParseError) -> Self { + Self::Parsing(error) } } @@ -152,7 +152,7 @@ impl From for io::Error { fn from(error: LoaderError) -> Self { match error { LoaderError::Storage(error) => error.into(), - LoaderError::Parser(error) => error.into(), + LoaderError::Parsing(error) => error.into(), } } } diff --git a/lib/src/store.rs b/lib/src/store.rs index f77f2126..f898c956 100644 --- a/lib/src/store.rs +++ b/lib/src/store.rs @@ -23,7 +23,7 @@ //! }; //! # Result::<_,Box>::Ok(()) //! ``` -use crate::io::read::ParserError; +use crate::io::read::ParseError; use crate::io::{ DatasetFormat, DatasetParser, DatasetSerializer, GraphFormat, GraphParser, GraphSerializer, }; @@ -274,7 +274,7 @@ impl Store { if let Some(base_iri) = base_iri { parser = parser .with_base_iri(base_iri) - .map_err(|e| ParserError::invalid_base_iri(base_iri, e))?; + .map_err(|e| ParseError::invalid_base_iri(base_iri, e))?; } let quads = parser .read_triples(reader)? @@ -323,7 +323,7 @@ impl Store { if let Some(base_iri) = base_iri { parser = parser .with_base_iri(base_iri) - .map_err(|e| ParserError::invalid_base_iri(base_iri, e))?; + .map_err(|e| ParseError::invalid_base_iri(base_iri, e))?; } let quads = parser.read_quads(reader)?.collect::, _>>()?; self.storage.transaction(move |mut t| { @@ -660,7 +660,7 @@ impl Store { if let Some(base_iri) = base_iri { parser = parser .with_base_iri(base_iri) - .map_err(|e| ParserError::invalid_base_iri(base_iri, e))?; + .map_err(|e| ParseError::invalid_base_iri(base_iri, e))?; } bulk_load(&self.storage, parser.read_quads(reader)?) } @@ -708,7 +708,7 @@ impl Store { if let Some(base_iri) = base_iri { parser = parser .with_base_iri(base_iri) - .map_err(|e| ParserError::invalid_base_iri(base_iri, e))?; + .map_err(|e| ParseError::invalid_base_iri(base_iri, e))?; } let to_graph_name = to_graph_name.into(); bulk_load( diff --git a/python/src/io.rs b/python/src/io.rs index 6c954c87..34c00828 100644 --- a/python/src/io.rs +++ b/python/src/io.rs @@ -1,7 +1,7 @@ #![allow(clippy::needless_option_as_deref)] use crate::model::{PyQuad, PyTriple}; -use oxigraph::io::read::{ParserError, QuadReader, TripleReader}; +use oxigraph::io::read::{ParseError, QuadReader, TripleReader}; use oxigraph::io::{ DatasetFormat, DatasetParser, DatasetSerializer, GraphFormat, GraphParser, GraphSerializer, }; @@ -61,7 +61,7 @@ pub fn parse( .map_err(|e| PyValueError::new_err(e.to_string()))?; } Ok(PyTripleReader { - inner: parser.read_triples(input).map_err(map_parser_error)?, + inner: parser.read_triples(input).map_err(map_parse_error)?, } .into_py(py)) } else if let Some(dataset_format) = DatasetFormat::from_media_type(mime_type) { @@ -72,7 +72,7 @@ pub fn parse( .map_err(|e| PyValueError::new_err(e.to_string()))?; } Ok(PyQuadReader { - inner: parser.read_quads(input).map_err(map_parser_error)?, + inner: parser.read_quads(input).map_err(map_parse_error)?, } .into_py(py)) } else { @@ -158,7 +158,7 @@ impl PyTripleReader { fn __next__(&mut self) -> PyResult> { self.inner .next() - .map(|q| Ok(q.map_err(map_parser_error)?.into())) + .map(|q| Ok(q.map_err(map_parse_error)?.into())) .transpose() } } @@ -177,7 +177,7 @@ impl PyQuadReader { fn __next__(&mut self) -> PyResult> { self.inner .next() - .map(|q| Ok(q.map_err(map_parser_error)?.into())) + .map(|q| Ok(q.map_err(map_parse_error)?.into())) .transpose() } } @@ -247,9 +247,9 @@ pub(crate) fn map_io_err(error: io::Error) -> PyErr { PyIOError::new_err(error.to_string()) } -pub(crate) fn map_parser_error(error: ParserError) -> PyErr { +pub(crate) fn map_parse_error(error: ParseError) -> PyErr { match error { - ParserError::Syntax(error) => PySyntaxError::new_err(error.to_string()), - ParserError::Io(error) => map_io_err(error), + ParseError::Syntax(error) => PySyntaxError::new_err(error.to_string()), + ParseError::Io(error) => map_io_err(error), } } diff --git a/python/src/sparql.rs b/python/src/sparql.rs index 12aa6e00..a1a84f23 100644 --- a/python/src/sparql.rs +++ b/python/src/sparql.rs @@ -1,4 +1,4 @@ -use crate::io::{map_io_err, map_parser_error}; +use crate::io::{map_io_err, map_parse_error}; use crate::map_storage_error; use crate::model::*; use oxigraph::model::Term; @@ -224,7 +224,7 @@ pub(crate) fn map_evaluation_error(error: EvaluationError) -> PyErr { EvaluationError::Parsing(error) => PySyntaxError::new_err(error.to_string()), EvaluationError::Storage(error) => map_storage_error(error), EvaluationError::Io(error) => map_io_err(error), - EvaluationError::ExternalParser(error) => map_parser_error(error), + EvaluationError::GraphParsing(error) => map_parse_error(error), EvaluationError::Query(error) => PyValueError::new_err(error.to_string()), _ => PyRuntimeError::new_err(error.to_string()), } diff --git a/python/src/store.rs b/python/src/store.rs index 7d5d3a7a..31141a7e 100644 --- a/python/src/store.rs +++ b/python/src/store.rs @@ -1,6 +1,6 @@ #![allow(clippy::needless_option_as_deref)] -use crate::io::{map_parser_error, PyFileLike}; +use crate::io::{map_parse_error, PyFileLike}; use crate::model::*; use crate::sparql::*; use oxigraph::io::{DatasetFormat, GraphFormat}; @@ -633,7 +633,7 @@ pub(crate) fn map_storage_error(error: StorageError) -> PyErr { pub(crate) fn map_loader_error(error: LoaderError) -> PyErr { match error { LoaderError::Storage(error) => map_storage_error(error), - LoaderError::Parser(error) => map_parser_error(error), + LoaderError::Parsing(error) => map_parse_error(error), } }