OxTTL: return file position in errors

2 years ago · 13c3515d7b
parent 8193cac86d
commit 13c3515d7b
29 changed files with 552 additions and 300 deletions
--- a/fuzz/fuzz_targets/nquads.rs
+++ b/fuzz/fuzz_targets/nquads.rs
@ -2,9 +2,9 @@
 use libfuzzer_sys::fuzz_target;
 use oxrdf::Quad;
-use oxttl::{NQuadsParser, NQuadsSerializer, SyntaxError};
+use oxttl::{NQuadsParser, NQuadsSerializer};
-fn parse<'a>(chunks: impl IntoIterator<Item = &'a [u8]>) -> (Vec<Quad>, Vec<SyntaxError>) {
+fn parse<'a>(chunks: impl IntoIterator<Item = &'a [u8]>) -> (Vec<Quad>, Vec<String>) {
    let mut quads = Vec::new();
    let mut errors = Vec::new();
    let mut parser = NQuadsParser::new().with_quoted_triples().parse();
@ -13,7 +13,7 @@ fn parse<'a>(chunks: impl IntoIterator<Item = &'a [u8]>) -> (Vec<Quad>, Vec<Synt
        while let Some(result) = parser.read_next() {
            match result {
                Ok(quad) => quads.push(quad),
-                Err(error) => errors.push(error),
+                Err(error) => errors.push(error.to_string()),
            }
        }
    }
@ -21,7 +21,7 @@ fn parse<'a>(chunks: impl IntoIterator<Item = &'a [u8]>) -> (Vec<Quad>, Vec<Synt
    while let Some(result) = parser.read_next() {
        match result {
            Ok(quad) => quads.push(quad),
-            Err(error) => errors.push(error),
+            Err(error) => errors.push(error.to_string()),
        }
    }
    assert!(parser.is_end());
@ -39,7 +39,7 @@ fuzz_target!(|data: &[u8]| {
        .collect::<Vec<_>>()
        .as_slice()]);
    assert_eq!(quads, quads_without_split);
-    assert_eq!(errors.len(), errors_without_split.len());
+    assert_eq!(errors, errors_without_split);
    // We serialize
    let mut writer = NQuadsSerializer::new().serialize_to_write(Vec::new());
--- a/fuzz/fuzz_targets/trig.rs
+++ b/fuzz/fuzz_targets/trig.rs
@ -2,9 +2,9 @@
 use libfuzzer_sys::fuzz_target;
 use oxrdf::{Dataset, GraphName, Quad, Subject, Term, Triple};
-use oxttl::{SyntaxError, TriGParser, TriGSerializer};
+use oxttl::{TriGParser, TriGSerializer};
-fn parse<'a>(chunks: impl IntoIterator<Item = &'a [u8]>) -> (Vec<Quad>, Vec<SyntaxError>) {
+fn parse<'a>(chunks: impl IntoIterator<Item = &'a [u8]>) -> (Vec<Quad>, Vec<String>) {
    let mut quads = Vec::new();
    let mut errors = Vec::new();
    let mut parser = TriGParser::new()
@ -17,7 +17,7 @@ fn parse<'a>(chunks: impl IntoIterator<Item = &'a [u8]>) -> (Vec<Quad>, Vec<Synt
        while let Some(result) = parser.read_next() {
            match result {
                Ok(quad) => quads.push(quad),
-                Err(error) => errors.push(error),
+                Err(error) => errors.push(error.to_string()),
            }
        }
    }
@ -25,7 +25,7 @@ fn parse<'a>(chunks: impl IntoIterator<Item = &'a [u8]>) -> (Vec<Quad>, Vec<Synt
    while let Some(result) = parser.read_next() {
        match result {
            Ok(quad) => quads.push(quad),
-            Err(error) => errors.push(error),
+            Err(error) => errors.push(error.to_string()),
        }
    }
    assert!(parser.is_end());
@ -96,7 +96,7 @@ fuzz_target!(|data: &[u8]| {
            String::from_utf8_lossy(&serialize_quads(&quads_without_split))
        );
    }
-    assert_eq!(errors.len(), errors_without_split.len());
+    assert_eq!(errors, errors_without_split);
    // We serialize
    let new_serialization = serialize_quads(&quads);
--- a/lib/oxrdfio/src/error.rs
+++ b/lib/oxrdfio/src/error.rs
@ -1,4 +1,5 @@
 use std::error::Error;
 use std::ops::Range;
 use std::{fmt, io};
 /// Error returned during RDF format parsing.
@ -110,10 +111,33 @@ pub struct SyntaxError {
 enum SyntaxErrorKind {
    Turtle(oxttl::SyntaxError),
    RdfXml(oxrdfxml::SyntaxError),
    Msg { msg: &'static str },
 }
 impl SyntaxError {
    /// The location of the error inside of the file.
    #[inline]
    pub fn location(&self) -> Option<Range<TextPosition>> {
        match &self.inner {
            SyntaxErrorKind::Turtle(e) => {
                let location = e.location();
                Some(
                    TextPosition {
                        line: location.start.line,
                        column: location.start.column,
                        offset: location.start.offset,
                    }..TextPosition {
                        line: location.end.line,
                        column: location.end.column,
                        offset: location.end.offset,
                    },
                )
            }
            SyntaxErrorKind::RdfXml(_) | SyntaxErrorKind::Msg { .. } => None,
        }
    }
 }
 impl fmt::Display for SyntaxError {
    #[inline]
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
@ -146,3 +170,11 @@ impl From<SyntaxError> for io::Error {
        }
    }
 }
 /// A position in a text i.e. a `line` number starting from 0, a `column` number starting from 0 (in number of code points) and a global file `offset` starting from 0 (in number of bytes).
 #[derive(Eq, PartialEq, Debug, Clone, Copy)]
 pub struct TextPosition {
    pub line: u64,
    pub column: u64,
    pub offset: u64,
 }
--- a/lib/oxrdfxml/src/error.rs
+++ b/lib/oxrdfxml/src/error.rs
@ -72,15 +72,6 @@ impl From<quick_xml::Error> for ParseError {
    }
 }
 impl From<quick_xml::events::attributes::AttrError> for ParseError {
    #[inline]
    fn from(error: quick_xml::events::attributes::AttrError) -> Self {
        Self::Syntax(SyntaxError {
            inner: SyntaxErrorKind::XmlAttribute(error),
        })
    }
 }
 /// An error in the syntax of the parsed file.
 #[derive(Debug)]
 pub struct SyntaxError {
@ -90,7 +81,6 @@ pub struct SyntaxError {
 #[derive(Debug)]
 pub enum SyntaxErrorKind {
    Xml(quick_xml::Error),
    XmlAttribute(quick_xml::events::attributes::AttrError),
    InvalidIri {
        iri: String,
        error: IriParseError,
@ -119,7 +109,6 @@ impl fmt::Display for SyntaxError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match &self.inner {
            SyntaxErrorKind::Xml(error) => error.fmt(f),
            SyntaxErrorKind::XmlAttribute(error) => error.fmt(f),
            SyntaxErrorKind::InvalidIri { iri, error } => {
                write!(f, "error while parsing IRI '{iri}': {error}")
            }
@ -136,7 +125,6 @@ impl Error for SyntaxError {
    fn source(&self) -> Option<&(dyn Error + 'static)> {
        match &self.inner {
            SyntaxErrorKind::Xml(error) => Some(error),
            SyntaxErrorKind::XmlAttribute(error) => Some(error),
            SyntaxErrorKind::InvalidIri { error, .. } => Some(error),
            SyntaxErrorKind::InvalidLanguageTag { error, .. } => Some(error),
            SyntaxErrorKind::Msg { .. } => None,
--- a/lib/oxrdfxml/src/parser.rs
+++ b/lib/oxrdfxml/src/parser.rs
@ -8,7 +8,7 @@ use quick_xml::escape::unescape_with;
 use quick_xml::events::attributes::Attribute;
 use quick_xml::events::*;
 use quick_xml::name::{LocalName, QName, ResolveResult};
-use quick_xml::{NsReader, Writer};
+use quick_xml::{Error, NsReader, Writer};
 use std::collections::{HashMap, HashSet};
 use std::io::{BufReader, Read};
 use std::str;
@ -515,7 +515,7 @@ impl<R> RdfXmlReader<R> {
                    .to_string(),
            );
            for attr in event.attributes() {
-                clean_event.push_attribute(attr?);
+                clean_event.push_attribute(attr.map_err(Error::InvalidAttr)?);
            }
            writer.write_event(Event::Start(clean_event))?;
            self.in_literal_depth += 1;
@ -544,7 +544,7 @@ impl<R> RdfXmlReader<R> {
        let mut type_attr = None;
        for attribute in event.attributes() {
-            let attribute = attribute?;
+            let attribute = attribute.map_err(Error::InvalidAttr)?;
            if attribute.key.as_ref().starts_with(b"xml") {
                if attribute.key.as_ref() == b"xml:lang" {
                    let tag = self.convert_attribute(&attribute)?;
--- a/lib/oxttl/src/lexer.rs
+++ b/lib/oxttl/src/lexer.rs
@ -266,7 +266,7 @@ impl N3Lexer {
                            ));
                        }
                    }
-                    Err(e) => return Some((e.position.end, Err(e))),
+                    Err(e) => return Some((e.location.end, Err(e))),
                }
            } else if is_ending {
                while data[..i].ends_with(b".") {
@ -447,7 +447,7 @@ impl N3Lexer {
                            return Some((i, Ok((buffer, might_be_invalid_iri))));
                        }
                    }
-                    Err(e) => return Some((e.position.end, Err(e))),
+                    Err(e) => return Some((e.location.end, Err(e))),
                }
            } else if is_ending {
                let buffer = if let Some(mut buffer) = buffer {
@ -515,7 +515,7 @@ impl N3Lexer {
                    }
                    i += consumed;
                }
-                Err(e) => return Some((e.position.end, Err(e))),
+                Err(e) => return Some((e.location.end, Err(e))),
            }
        }
    }
--- a/lib/oxttl/src/lib.rs
+++ b/lib/oxttl/src/lib.rs
@ -17,7 +17,7 @@ pub mod turtle;
 pub use crate::n3::N3Parser;
 pub use crate::nquads::{NQuadsParser, NQuadsSerializer};
 pub use crate::ntriples::{NTriplesParser, NTriplesSerializer};
-pub use crate::toolkit::{ParseError, SyntaxError};
+pub use crate::toolkit::{ParseError, SyntaxError, TextPosition};
 pub use crate::trig::{TriGParser, TriGSerializer};
 pub use crate::turtle::{TurtleParser, TurtleSerializer};
--- a/lib/oxttl/src/line_formats.rs
+++ b/lib/oxttl/src/line_formats.rs
@ -76,7 +76,7 @@ impl RuleRecognizer for NQuadsRecognizer {
                    }
                    _ => self.error(
                        errors,
-                        format!("The subject of a triple should be an IRI or a blank node, {token:?} found"),
+                        "The subject of a triple should be an IRI or a blank node, TOKEN found",
                    ),
                },
                NQuadsState::ExpectPredicate => match token {
@ -88,7 +88,7 @@ impl RuleRecognizer for NQuadsRecognizer {
                    }
                    _ => self.error(
                        errors,
-                        format!("The predicate of a triple should be an IRI, {token:?} found"),
+                        "The predicate of a triple should be an IRI, TOKEN found",
                    ),
                },
                NQuadsState::ExpectedObject => match token {
@ -118,7 +118,7 @@ impl RuleRecognizer for NQuadsRecognizer {
                    }
                    _ => self.error(
                        errors,
-                        format!("The object of a triple should be an IRI, a blank node or a literal, {token:?} found"),
+                        "The object of a triple should be an IRI, a blank node or a literal, TOKEN found",
                    ),
                },
                NQuadsState::ExpectLiteralAnnotationOrGraphNameOrDot { value } => match token {
@ -159,7 +159,7 @@ impl RuleRecognizer for NQuadsRecognizer {
                            .push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple);
                        self
                    }
-                    _ => self.error(errors, format!("A literal datatype must be an IRI, found {token:?}")),
+                    _ => self.error(errors, "A literal datatype must be an IRI, found TOKEN"),
                },
                NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple => {
                    if self.stack.is_empty() {
--- a/lib/oxttl/src/n3.rs
+++ b/lib/oxttl/src/n3.rs
@ -836,7 +836,7 @@ impl RuleRecognizer for N3Recognizer {
                            self.stack.push(N3State::FormulaContent);
                            self
                        }
-                       _ => self.error(errors, format!("This is not a valid RDF value: {token:?}"))
+                       _ => self.error(errors, "TOKEN is not a valid RDF value")
                    }
                }
                N3State::PropertyListMiddle => match token {
@ -950,7 +950,7 @@ impl RuleRecognizer for N3Recognizer {
                            Err(e) => self.error(errors, e)
                        }
                       _ => {
-                            self.error(errors, format!("Expecting a datatype IRI after '^^, found {token:?}")).recognize_next(token, results, errors)
+                            self.error(errors, "Expecting a datatype IRI after '^^, found TOKEN").recognize_next(token, results, errors)
                        }
                    }
                }
--- a/lib/oxttl/src/terse.rs
+++ b/lib/oxttl/src/terse.rs
@ -167,7 +167,7 @@ impl RuleRecognizer for TriGRecognizer {
                        self
                    }
                    _ => {
-                        self.error(errors, format!("The token {token:?} is not a valid subject or graph name"))
+                        self.error(errors, "TOKEN is not a valid subject or graph name")
                    }
                }
                TriGState::WrappedGraphOrPredicateObjectList { term } => {
@ -317,7 +317,7 @@ impl RuleRecognizer for TriGRecognizer {
                        self
                    }
                   _ => {
-                        self.error(errors, format!("The token {token:?} is not a valid RDF subject"))
+                        self.error(errors, "TOKEN is not a valid RDF subject")
                    }
                },
                TriGState::TriplesBlankNodePropertyListCurrent => if token == N3Token::Punctuation("]") {
@ -350,7 +350,7 @@ impl RuleRecognizer for TriGRecognizer {
                        self
                    }
                   _ => {
-                        self.error(errors, format!("The token {token:?} is not a valid graph name"))
+                        self.error(errors, "TOKEN is not a valid graph name")
                    }
                }
                TriGState::GraphNameAnonEnd => if token == N3Token::Punctuation("]") {
@ -456,7 +456,7 @@ impl RuleRecognizer for TriGRecognizer {
                        Err(e) => self.error(errors, e)
                    }
                   _ => {
-                        self.error(errors, format!("The token {token:?} is not a valid predicate"))
+                        self.error(errors, "TOKEN is not a valid predicate")
                    }
                }
                // [12] 	object 	::= 	iri | BlankNode | collection | blankNodePropertyList | literal | quotedTriple
@ -536,7 +536,7 @@ impl RuleRecognizer for TriGRecognizer {
                        self
                    }
                   _ => {
-                        self.error(errors, format!("This is not a valid RDF object: {token:?}"))
+                        self.error(errors, "TOKEN is not a valid RDF object")
                    }
                }
@ -637,7 +637,7 @@ impl RuleRecognizer for TriGRecognizer {
                            Err(e) => self.error(errors, e)
                        }
                        _ => {
-                            self.error(errors, format!("Expecting a datatype IRI after '^^, found {token:?}")).recognize_next(token, results, errors)
+                            self.error(errors, "Expecting a datatype IRI after ^^, found TOKEN").recognize_next(token, results, errors)
                        }
                    }
                }
@ -653,7 +653,7 @@ impl RuleRecognizer for TriGRecognizer {
                    if token == N3Token::Punctuation(">>") {
                        self
                    } else {
-                        self.error(errors, format!("Expecting '>>' to close a quoted triple, found {token:?}"))
+                        self.error(errors, "Expecting '>>' to close a quoted triple, found TOKEN")
                    }
                }
                #[cfg(feature = "rdf-star")]
@ -670,7 +670,7 @@ impl RuleRecognizer for TriGRecognizer {
                    if token == N3Token::Punctuation(">>") {
                        self
                    } else {
-                        self.error(errors, format!("Expecting '>>' to close a quoted triple, found {token:?}"))
+                        self.error(errors, "Expecting '>>' to close a quoted triple, found TOKEN")
                    }
                }
                // [28t] 	qtSubject 	::= 	iri | BlankNode | quotedTriple
@ -703,7 +703,7 @@ impl RuleRecognizer for TriGRecognizer {
                        self.stack.push(TriGState::QuotedSubject);
                        self
                    }
-                    _ => self.error(errors, format!("This is not a valid RDF quoted triple subject: {token:?}"))
+                    _ => self.error(errors, "TOKEN is not a valid RDF quoted triple subject: TOKEN")
                }
                // [29t] 	qtObject 	::= 	iri | BlankNode | literal | quotedTriple
                #[cfg(feature = "rdf-star")]
@ -759,7 +759,7 @@ impl RuleRecognizer for TriGRecognizer {
                        self.stack.push(TriGState::QuotedSubject);
                        self
                    }
-                    _ => self.error(errors, format!("This is not a valid RDF quoted triple object: {token:?}"))
+                    _ => self.error(errors, "TOKEN is not a valid RDF quoted triple object")
                }
                #[cfg(feature = "rdf-star")]
                TriGState::QuotedAnonEnd => if token == N3Token::Punctuation("]") {
--- a/lib/oxttl/src/toolkit/error.rs
+++ b/lib/oxttl/src/toolkit/error.rs
@ -0,0 +1,132 @@
 use std::error::Error;
 use std::ops::Range;
 use std::{fmt, io};
 /// A position in a text i.e. a `line` number starting from 0, a `column` number starting from 0 (in number of code points) and a global file `offset` starting from 0 (in number of bytes).
 #[derive(Eq, PartialEq, Debug, Clone, Copy)]
 pub struct TextPosition {
    pub line: u64,
    pub column: u64,
    pub offset: u64,
 }
 /// An error in the syntax of the parsed file.
 ///
 /// It is composed of a message and a byte range in the input.
 #[derive(Debug)]
 pub struct SyntaxError {
    pub(super) location: Range<TextPosition>,
    pub(super) message: String,
 }
 impl SyntaxError {
    /// The location of the error inside of the file.
    #[inline]
    pub fn location(&self) -> Range<TextPosition> {
        self.location.clone()
    }
    /// The error message.
    #[inline]
    pub fn message(&self) -> &str {
        &self.message
    }
 }
 impl fmt::Display for SyntaxError {
    #[inline]
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        if self.location.start.offset + 1 >= self.location.end.offset {
            write!(
                f,
                "Parser error at line {} column {}: {}",
                self.location.start.line + 1,
                self.location.start.column + 1,
                self.message
            )
        } else if self.location.start.line == self.location.end.line {
            write!(
                f,
                "Parser error between at line {} between columns {} and column {}: {}",
                self.location.start.line + 1,
                self.location.start.column + 1,
                self.location.end.column + 1,
                self.message
            )
        } else {
            write!(
                f,
                "Parser error between line {} column {} and line {} column {}: {}",
                self.location.start.line + 1,
                self.location.start.column + 1,
                self.location.end.line + 1,
                self.location.end.column + 1,
                self.message
            )
        }
    }
 }
 impl Error for SyntaxError {}
 impl From<SyntaxError> for io::Error {
    #[inline]
    fn from(error: SyntaxError) -> Self {
        io::Error::new(io::ErrorKind::InvalidData, error)
    }
 }
 /// A parsing error.
 ///
 /// It is the union of [`SyntaxError`] and [`std::io::Error`].
 #[derive(Debug)]
 pub enum ParseError {
    /// I/O error during parsing (file not found...).
    Io(io::Error),
    /// An error in the file syntax.
    Syntax(SyntaxError),
 }
 impl fmt::Display for ParseError {
    #[inline]
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            Self::Io(e) => e.fmt(f),
            Self::Syntax(e) => e.fmt(f),
        }
    }
 }
 impl Error for ParseError {
    #[inline]
    fn source(&self) -> Option<&(dyn Error + 'static)> {
        Some(match self {
            Self::Io(e) => e,
            Self::Syntax(e) => e,
        })
    }
 }
 impl From<SyntaxError> for ParseError {
    #[inline]
    fn from(error: SyntaxError) -> Self {
        Self::Syntax(error)
    }
 }
 impl From<io::Error> for ParseError {
    #[inline]
    fn from(error: io::Error) -> Self {
        Self::Io(error)
    }
 }
 impl From<ParseError> for io::Error {
    #[inline]
    fn from(error: ParseError) -> Self {
        match error {
            ParseError::Syntax(e) => e.into(),
            ParseError::Io(e) => e,
        }
    }
 }
--- a/lib/oxttl/src/toolkit/lexer.rs
+++ b/lib/oxttl/src/toolkit/lexer.rs
@ -1,9 +1,10 @@
-use memchr::memchr2;
+use crate::toolkit::error::{SyntaxError, TextPosition};
 use memchr::{memchr2, memchr2_iter};
 use std::borrow::Cow;
 use std::cmp::min;
 use std::error::Error;
 use std::fmt;
 use std::io::{self, Read};
 use std::ops::{Range, RangeInclusive};
 use std::str;
 #[cfg(feature = "async-tokio")]
 use tokio::io::{AsyncRead, AsyncReadExt};
@ -22,14 +23,14 @@ pub trait TokenRecognizer {
 }
 pub struct TokenRecognizerError {
-    pub position: Range<usize>,
+    pub location: Range<usize>,
    pub message: String,
 }
 impl<S: Into<String>> From<(Range<usize>, S)> for TokenRecognizerError {
-    fn from((position, message): (Range<usize>, S)) -> Self {
+    fn from((location, message): (Range<usize>, S)) -> Self {
        Self {
-            position,
+            location,
            message: message.into(),
        }
    }
@ -37,34 +38,37 @@ impl<S: Into<String>> From<(Range<usize>, S)> for TokenRecognizerError {
 #[allow(clippy::range_plus_one)]
 impl<S: Into<String>> From<(RangeInclusive<usize>, S)> for TokenRecognizerError {
-    fn from((position, message): (RangeInclusive<usize>, S)) -> Self {
+    fn from((location, message): (RangeInclusive<usize>, S)) -> Self {
-        (*position.start()..*position.end() + 1, message).into()
+        (*location.start()..*location.end() + 1, message).into()
    }
 }
 impl<S: Into<String>> From<(usize, S)> for TokenRecognizerError {
-    fn from((position, message): (usize, S)) -> Self {
+    fn from((location, message): (usize, S)) -> Self {
-        (position..=position, message).into()
+        (location..=location, message).into()
    }
 }
 pub struct TokenWithPosition<T> {
    pub token: T,
    pub position: Range<usize>,
 }
 pub struct Lexer<R: TokenRecognizer> {
    parser: R,
    data: Vec<u8>,
-    start: usize,
+    position: Position,
    previous_position: Position, // Lexer position before the last emitted token
    is_ending: bool,
    position: usize,
    min_buffer_size: usize,
    max_buffer_size: usize,
    is_line_jump_whitespace: bool,
    line_comment_start: Option<&'static [u8]>,
 }
 #[derive(Clone, Copy)]
 struct Position {
    line_start_buffer_offset: usize,
    buffer_offset: usize,
    global_offset: u64,
    global_line: u64,
 }
 impl<R: TokenRecognizer> Lexer<R> {
    pub fn new(
        parser: R,
@ -76,9 +80,19 @@ impl<R: TokenRecognizer> Lexer<R> {
        Self {
            parser,
            data: Vec::new(),
-            start: 0,
+            position: Position {
                line_start_buffer_offset: 0,
                buffer_offset: 0,
                global_offset: 0,
                global_line: 0,
            },
            previous_position: Position {
                line_start_buffer_offset: 0,
                buffer_offset: 0,
                global_offset: 0,
                global_line: 0,
            },
            is_ending: false,
            position: 0,
            min_buffer_size,
            max_buffer_size,
            is_line_jump_whitespace,
@ -148,24 +162,43 @@ impl<R: TokenRecognizer> Lexer<R> {
        Ok(())
    }
-    pub fn read_next(
+    #[allow(clippy::unwrap_in_result)]
-        &mut self,
+    pub fn read_next(&mut self, options: &R::Options) -> Option<Result<R::Token<'_>, SyntaxError>> {
        options: &R::Options,
    ) -> Option<Result<TokenWithPosition<R::Token<'_>>, LexerError>> {
        self.skip_whitespaces_and_comments()?;
-        let Some((consumed, result)) =
+        self.previous_position = self.position;
-            self.parser
+        let Some((consumed, result)) = self.parser.recognize_next_token(
-                .recognize_next_token(&self.data[self.start..], self.is_ending, options)
+            &self.data[self.position.buffer_offset..],
-        else {
+            self.is_ending,
            options,
        ) else {
            return if self.is_ending {
-                if self.start == self.data.len() {
+                if self.position.buffer_offset == self.data.len() {
                    None // We have finished
                } else {
-                    let error = LexerError {
+                    let (new_line_jumps, new_line_start) =
-                        position: self.position..self.position + (self.data.len() - self.start),
+                        Self::find_number_of_line_jumps_and_start_of_last_line(
                            &self.data[self.position.buffer_offset..],
                        );
                    if new_line_jumps > 0 {
                        self.position.line_start_buffer_offset =
                            self.position.buffer_offset + new_line_start;
                    }
                    self.position.global_offset +=
                        u64::try_from(self.data.len() - self.position.buffer_offset).unwrap();
                    self.position.buffer_offset = self.data.len();
                    self.position.global_line += new_line_jumps;
                    let new_position = TextPosition {
                        line: self.position.global_line,
                        column: Self::column_from_bytes(
                            &self.data[self.position.line_start_buffer_offset..],
                        ),
                        offset: self.position.global_offset,
                    };
                    let error = SyntaxError {
                        location: new_position..new_position,
                        message: "Unexpected end of file".into(),
                    };
-                    self.start = self.data.len(); // We consume everything
+                    self.position.buffer_offset = self.data.len(); // We consume everything
                    Some(Err(error))
                }
            } else {
@ -177,44 +210,119 @@ impl<R: TokenRecognizer> Lexer<R> {
            "The lexer must consume at least one byte each time"
        );
        debug_assert!(
-            self.start + consumed <= self.data.len(),
+            self.position.buffer_offset + consumed <= self.data.len(),
            "The lexer tried to consumed {consumed} bytes but only {} bytes are readable",
-            self.data.len() - self.start
+            self.data.len() - self.position.buffer_offset
        );
        let (new_line_jumps, new_line_start) =
            Self::find_number_of_line_jumps_and_start_of_last_line(
                &self.data[self.position.buffer_offset..self.position.buffer_offset + consumed],
            );
-        let old_position = self.position;
+        if new_line_jumps > 0 {
-        self.start += consumed;
+            self.position.line_start_buffer_offset = self.position.buffer_offset + new_line_start;
-        self.position += consumed;
+        }
-        Some(match result {
+        self.position.buffer_offset += consumed;
-            Ok(token) => Ok(TokenWithPosition {
+        self.position.global_offset += u64::try_from(consumed).unwrap();
-                token,
+        self.position.global_line += new_line_jumps;
-                position: old_position..self.position,
+        Some(result.map_err(|e| SyntaxError {
-            }),
+            location: self.location_from_buffer_offset_range(e.location),
            Err(e) => Err(LexerError {
                position: e.position.start + self.position..e.position.end + self.position,
            message: e.message,
-            }),
+        }))
-        })
+    }
    pub fn location_from_buffer_offset_range(
        &self,
        offset_range: Range<usize>,
    ) -> Range<TextPosition> {
        let start_offset = self.previous_position.buffer_offset + offset_range.start;
        let (start_extra_line_jumps, start_line_start) =
            Self::find_number_of_line_jumps_and_start_of_last_line(
                &self.data[self.previous_position.buffer_offset..start_offset],
            );
        let start_line_start = if start_extra_line_jumps > 0 {
            start_line_start + self.previous_position.buffer_offset
        } else {
            self.previous_position.line_start_buffer_offset
        };
        let end_offset = self.previous_position.buffer_offset + offset_range.end;
        let (end_extra_line_jumps, end_line_start) =
            Self::find_number_of_line_jumps_and_start_of_last_line(
                &self.data[self.previous_position.buffer_offset..end_offset],
            );
        let end_line_start = if end_extra_line_jumps > 0 {
            end_line_start + self.previous_position.buffer_offset
        } else {
            self.previous_position.line_start_buffer_offset
        };
        TextPosition {
            line: self.previous_position.global_line + start_extra_line_jumps,
            column: Self::column_from_bytes(&self.data[start_line_start..start_offset]),
            offset: self.previous_position.global_offset
                + u64::try_from(offset_range.start).unwrap(),
        }..TextPosition {
            line: self.previous_position.global_line + end_extra_line_jumps,
            column: Self::column_from_bytes(&self.data[end_line_start..end_offset]),
            offset: self.previous_position.global_offset + u64::try_from(offset_range.end).unwrap(),
        }
    }
    pub fn last_token_location(&self) -> Range<TextPosition> {
        TextPosition {
            line: self.previous_position.global_line,
            column: Self::column_from_bytes(
                &self.data[self.previous_position.line_start_buffer_offset
                    ..self.previous_position.buffer_offset],
            ),
            offset: self.previous_position.global_offset,
        }..TextPosition {
            line: self.position.global_line,
            column: Self::column_from_bytes(
                &self.data[self.position.line_start_buffer_offset..self.position.buffer_offset],
            ),
            offset: self.position.global_offset,
        }
    }
    pub fn last_token_source(&self) -> Cow<'_, str> {
        String::from_utf8_lossy(
            &self.data[self.previous_position.buffer_offset..self.position.buffer_offset],
        )
    }
    pub fn is_end(&self) -> bool {
-        self.is_ending && self.data.len() == self.start
+        self.is_ending && self.data.len() == self.position.buffer_offset
    }
    #[allow(clippy::unwrap_in_result)]
    fn skip_whitespaces_and_comments(&mut self) -> Option<()> {
        loop {
-            self.skip_whitespaces();
+            self.skip_whitespaces()?;
-            let buf = &self.data[self.start..];
+            let buf = &self.data[self.position.buffer_offset..];
            if let Some(line_comment_start) = self.line_comment_start {
                if buf.starts_with(line_comment_start) {
                    // Comment
                    if let Some(end) = memchr2(b'\r', b'\n', &buf[line_comment_start.len()..]) {
-                        self.start += end + line_comment_start.len();
+                        let mut end_position = line_comment_start.len() + end;
-                        self.position += end + line_comment_start.len();
+                        if buf.get(end_position).copied() == Some(b'\r') {
                            // We look for \n for Windows line end style
                            if let Some(c) = buf.get(end_position + 1) {
                                if *c == b'\n' {
                                    end_position += 1;
                                }
                            } else if !self.is_ending {
                                return None; // We need to read more
                            }
                        }
                        let comment_size = end_position + 1;
                        self.position.buffer_offset += comment_size;
                        self.position.line_start_buffer_offset = self.position.buffer_offset;
                        self.position.global_offset += u64::try_from(comment_size).unwrap();
                        self.position.global_line += 1;
                        continue;
                    }
                    if self.is_ending {
-                        self.start = self.data.len(); // EOF
+                        self.position.buffer_offset = self.data.len(); // EOF
                        return Some(());
                    }
                    return None; // We need more data
@ -224,80 +332,98 @@ impl<R: TokenRecognizer> Lexer<R> {
        }
    }
-    fn skip_whitespaces(&mut self) {
+    fn skip_whitespaces(&mut self) -> Option<()> {
        if self.is_line_jump_whitespace {
-            for (i, c) in self.data[self.start..].iter().enumerate() {
+            let mut i = self.position.buffer_offset;
-                if !matches!(c, b' ' | b'\t' | b'\r' | b'\n') {
+            while let Some(c) = self.data.get(i) {
-                    self.start += i;
+                match c {
-                    self.position += i;
+                    b' ' | b'\t' => {
-                    return;
+                        self.position.buffer_offset += 1;
-                }
+                        self.position.global_offset += 1;
                    }
                    b'\r' => {
                        // We look for \n for Windows line end style
                        let mut increment: u8 = 1;
                        if let Some(c) = self.data.get(i + 1) {
                            if *c == b'\n' {
                                increment += 1;
                                i += 1;
                            }
                        } else if !self.is_ending {
                            return None; // We need to read more
                        }
                        self.position.buffer_offset += usize::from(increment);
                        self.position.line_start_buffer_offset = self.position.buffer_offset;
                        self.position.global_offset += u64::from(increment);
                        self.position.global_line += 1;
                    }
                    b'\n' => {
                        self.position.buffer_offset += 1;
                        self.position.line_start_buffer_offset = self.position.buffer_offset;
                        self.position.global_offset += 1;
                        self.position.global_line += 1;
                    }
                    _ => return Some(()),
                }
                i += 1;
                //TODO: SIMD
            }
        } else {
-            for (i, c) in self.data[self.start..].iter().enumerate() {
+            for c in &self.data[self.position.buffer_offset..] {
-                if !matches!(c, b' ' | b'\t') {
+                if matches!(c, b' ' | b'\t') {
-                    self.start += i;
+                    self.position.buffer_offset += 1;
-                    self.position += i;
+                    self.position.global_offset += 1;
-                    return;
+                } else {
                    return Some(());
                }
                //TODO: SIMD
            }
        }
-        // We only have whitespaces
+        Some(())
        self.position += self.data.len() - self.start;
        self.start = self.data.len();
    }
    fn shrink_data(&mut self) {
-        if self.start > 0 {
+        if self.position.line_start_buffer_offset > 0 {
-            self.data.copy_within(self.start.., 0);
+            self.data
-            self.data.truncate(self.data.len() - self.start);
+                .copy_within(self.position.line_start_buffer_offset.., 0);
-            self.start = 0;
+            self.data
                .truncate(self.data.len() - self.position.line_start_buffer_offset);
            self.position.buffer_offset -= self.position.line_start_buffer_offset;
            self.position.line_start_buffer_offset = 0;
            self.previous_position = self.position;
        }
    }
 }
 #[derive(Debug)]
 pub struct LexerError {
    position: Range<usize>,
    message: String,
 }
-impl LexerError {
+    fn find_number_of_line_jumps_and_start_of_last_line(bytes: &[u8]) -> (u64, usize) {
-    pub fn position(&self) -> Range<usize> {
+        let mut num_of_jumps = 0;
-        self.position.clone()
+        let mut last_jump_pos = 0;
        let mut previous_cr = 0;
        for pos in memchr2_iter(b'\r', b'\n', bytes) {
            if bytes[pos] == b'\r' {
                previous_cr = pos;
                num_of_jumps += 1;
                last_jump_pos = pos + 1;
            } else {
                if previous_cr < pos - 1 {
                    // We count \r\n as a single line jump
                    num_of_jumps += 1;
                }
-
+                last_jump_pos = pos + 1;
    pub fn message(&self) -> &str {
        &self.message
            }
    pub fn into_message(self) -> String {
        self.message
        }
-}
+        (num_of_jumps, last_jump_pos)
    }
-impl fmt::Display for LexerError {
+    fn column_from_bytes(bytes: &[u8]) -> u64 {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match str::from_utf8(bytes) {
-        if self.position.start + 1 == self.position.end {
+            Ok(s) => u64::try_from(s.chars().count()).unwrap(),
-            write!(
+            Err(e) => {
-                f,
+                if e.valid_up_to() == 0 {
-                "Lexer error at byte {}: {}",
+                    0
                self.position.start, self.message
            )
                } else {
-            write!(
+                    Self::column_from_bytes(&bytes[..e.valid_up_to()])
-                f,
+                }
                "Lexer error between bytes {} and {}: {}",
                self.position.start, self.position.end, self.message
            )
            }
        }
 }
 impl Error for LexerError {
    fn description(&self) -> &str {
        self.message()
    }
 }
--- a/lib/oxttl/src/toolkit/mod.rs
+++ b/lib/oxttl/src/toolkit/mod.rs
@ -2,12 +2,12 @@
 //!
 //! Provides the basic code to write plain Rust lexers and parsers able to read files chunk by chunk.
 mod error;
 mod lexer;
 mod parser;
-pub use self::lexer::{Lexer, LexerError, TokenRecognizer, TokenRecognizerError};
+pub use self::error::{ParseError, SyntaxError, TextPosition};
 pub use self::lexer::{Lexer, TokenRecognizer, TokenRecognizerError};
 #[cfg(feature = "async-tokio")]
 pub use self::parser::FromTokioAsyncReadIterator;
-pub use self::parser::{
+pub use self::parser::{FromReadIterator, Parser, RuleRecognizer, RuleRecognizerError};
    FromReadIterator, ParseError, Parser, RuleRecognizer, RuleRecognizerError, SyntaxError,
 };
--- a/lib/oxttl/src/toolkit/parser.rs
+++ b/lib/oxttl/src/toolkit/parser.rs
@ -1,9 +1,6 @@
-use crate::toolkit::lexer::TokenWithPosition;
+use crate::toolkit::error::{ParseError, SyntaxError};
-use crate::toolkit::{Lexer, LexerError, TokenRecognizer};
+use crate::toolkit::lexer::{Lexer, TokenRecognizer};
 use std::error::Error;
 use std::io::Read;
 use std::ops::Range;
 use std::{fmt, io};
 #[cfg(feature = "async-tokio")]
 use tokio::io::AsyncRead;
@ -42,7 +39,6 @@ pub struct Parser<RR: RuleRecognizer> {
    state: Option<RR>,
    results: Vec<RR::Output>,
    errors: Vec<RuleRecognizerError>,
    position: Range<usize>,
    default_lexer_options: <RR::TokenRecognizer as TokenRecognizer>::Options,
 }
@ -53,7 +49,6 @@ impl<RR: RuleRecognizer> Parser<RR> {
            state: Some(recognizer),
            results: vec![],
            errors: vec![],
            position: 0..0,
            default_lexer_options: <RR::TokenRecognizer as TokenRecognizer>::Options::default(),
        }
    }
@ -76,8 +71,10 @@ impl<RR: RuleRecognizer> Parser<RR> {
        loop {
            if let Some(error) = self.errors.pop() {
                return Some(Err(SyntaxError {
-                    position: self.position.clone(),
+                    location: self.lexer.last_token_location(),
-                    message: error.message,
+                    message: error
                        .message
                        .replace("TOKEN", &self.lexer.last_token_source()),
                }));
            }
            if let Some(result) = self.results.pop() {
@ -89,8 +86,7 @@ impl<RR: RuleRecognizer> Parser<RR> {
                    .map_or(&self.default_lexer_options, |p| p.lexer_options()),
            ) {
                match result {
-                    Ok(TokenWithPosition { token, position }) => {
+                    Ok(token) => {
                        self.position = position;
                        self.state = self.state.take().map(|state| {
                            state.recognize_next(token, &mut self.results, &mut self.errors)
                        });
@ -98,7 +94,7 @@ impl<RR: RuleRecognizer> Parser<RR> {
                    }
                    Err(e) => {
                        self.state = self.state.take().map(RR::error_recovery_state);
-                        return Some(Err(e.into()));
+                        return Some(Err(e));
                    }
                }
            }
@ -126,128 +122,6 @@ impl<RR: RuleRecognizer> Parser<RR> {
    }
 }
 /// An error in the syntax of the parsed file.
 ///
 /// It is composed of a message and a byte range in the input.
 #[derive(Debug)]
 pub struct SyntaxError {
    position: Range<usize>,
    message: String,
 }
 impl SyntaxError {
    /// The invalid byte range in the input.
    #[inline]
    pub fn position(&self) -> Range<usize> {
        self.position.clone()
    }
    /// The error message.
    #[inline]
    pub fn message(&self) -> &str {
        &self.message
    }
    /// Converts this error to an error message.
    #[inline]
    pub fn into_message(self) -> String {
        self.message
    }
 }
 impl fmt::Display for SyntaxError {
    #[inline]
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        if self.position.start + 1 == self.position.end {
            write!(
                f,
                "Parser error at byte {}: {}",
                self.position.start, self.message
            )
        } else {
            write!(
                f,
                "Parser error between bytes {} and {}: {}",
                self.position.start, self.position.end, self.message
            )
        }
    }
 }
 impl Error for SyntaxError {}
 impl From<SyntaxError> for io::Error {
    #[inline]
    fn from(error: SyntaxError) -> Self {
        io::Error::new(io::ErrorKind::InvalidData, error)
    }
 }
 impl From<LexerError> for SyntaxError {
    #[inline]
    fn from(e: LexerError) -> Self {
        Self {
            position: e.position(),
            message: e.into_message(),
        }
    }
 }
 /// A parsing error.
 ///
 /// It is the union of [`SyntaxError`] and [`std::io::Error`].
 #[derive(Debug)]
 pub enum ParseError {
    /// I/O error during parsing (file not found...).
    Io(io::Error),
    /// An error in the file syntax.
    Syntax(SyntaxError),
 }
 impl fmt::Display for ParseError {
    #[inline]
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            Self::Io(e) => e.fmt(f),
            Self::Syntax(e) => e.fmt(f),
        }
    }
 }
 impl Error for ParseError {
    #[inline]
    fn source(&self) -> Option<&(dyn Error + 'static)> {
        Some(match self {
            Self::Io(e) => e,
            Self::Syntax(e) => e,
        })
    }
 }
 impl From<SyntaxError> for ParseError {
    #[inline]
    fn from(error: SyntaxError) -> Self {
        Self::Syntax(error)
    }
 }
 impl From<io::Error> for ParseError {
    #[inline]
    fn from(error: io::Error) -> Self {
        Self::Io(error)
    }
 }
 impl From<ParseError> for io::Error {
    #[inline]
    fn from(error: ParseError) -> Self {
        match error {
            ParseError::Syntax(e) => e.into(),
            ParseError::Io(e) => e,
        }
    }
 }
 pub struct FromReadIterator<R: Read, RR: RuleRecognizer> {
    read: R,
    parser: Parser<RR>,
--- a/testsuite/oxigraph-tests/parser-error/invalid_iri.nt
+++ b/testsuite/oxigraph-tests/parser-error/invalid_iri.nt
@ -0,0 +1,2 @@
 <http://example.com/s> <http://example.com/p> <http://example.com/o> .
 <http://example.com/s> <http:// /p> <http://example.com/o> .
--- a/testsuite/oxigraph-tests/parser-error/invalid_iri_comment.nt
+++ b/testsuite/oxigraph-tests/parser-error/invalid_iri_comment.nt
@ -0,0 +1,2 @@
 <http://example.com/s> <http://example.com/p> <http://example.com/o> . # foo
 <http://example.com/s> <http:// /p> <http://example.com/o> .
--- a/testsuite/oxigraph-tests/parser-error/invalid_iri_comment_crlf.nt
+++ b/testsuite/oxigraph-tests/parser-error/invalid_iri_comment_crlf.nt
@ -0,0 +1,2 @@
 <http://example.com/s> <http://example.com/p> <http://example.com/o> . # foo
 <http://example.com/s> <http:// /p> <http://example.com/o> .
--- a/testsuite/oxigraph-tests/parser-error/invalid_iri_crlf.nt
+++ b/testsuite/oxigraph-tests/parser-error/invalid_iri_crlf.nt
@ -0,0 +1,2 @@
 <http://example.com/s> <http://example.com/p> <http://example.com/o> .
 <http://example.com/s> <http:// /p> <http://example.com/o> .
--- a/testsuite/oxigraph-tests/parser-error/invalid_iri_error.txt
+++ b/testsuite/oxigraph-tests/parser-error/invalid_iri_error.txt
@ -0,0 +1 @@
 Parser error between at line 2 between columns 24 and column 36: Invalid IRI code point ' '
--- a/testsuite/oxigraph-tests/parser-error/invalid_predicate.nt
+++ b/testsuite/oxigraph-tests/parser-error/invalid_predicate.nt
@ -0,0 +1,2 @@
 <http://example.com/s> <http://example.com/p> <http://example.com/o> .
 <http://example.com/s> "p" <http://example.com/o> .
--- a/testsuite/oxigraph-tests/parser-error/invalid_predicate_error.txt
+++ b/testsuite/oxigraph-tests/parser-error/invalid_predicate_error.txt
@ -0,0 +1 @@
 Parser error between at line 2 between columns 24 and column 27: "p" is not a valid predicate
--- a/testsuite/oxigraph-tests/parser-error/invalid_string_escape.nt
+++ b/testsuite/oxigraph-tests/parser-error/invalid_string_escape.nt
@ -0,0 +1 @@
 <http://example.com/s> <http://example.com/p> "fooé \a baré" .
--- a/testsuite/oxigraph-tests/parser-error/invalid_string_escape_error.txt
+++ b/testsuite/oxigraph-tests/parser-error/invalid_string_escape_error.txt
@ -0,0 +1 @@
 Parser error between at line 1 between columns 53 and column 55: Unexpected escape character '\a'
--- a/testsuite/oxigraph-tests/parser-error/manifest.ttl
+++ b/testsuite/oxigraph-tests/parser-error/manifest.ttl
@ -0,0 +1,66 @@
@prefix mf: <http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix rdft: <http://www.w3.org/ns/rdftest#> .
 <>
 	rdf:type mf:Manifest ;
 	rdfs:comment "Oxigraph parser error test cases" ;
 	mf:entries (
 	    <#invalid_iri>
 	    <#invalid_iri_crlf>
 	    <#invalid_iri_comment>
 	    <#invalid_iri_comment_crlf>
 	    <#invalid_string_escape>
 	    <#unexpected_eof>
 	    <#unexpected_eof_crlf>
 	    <#invalid_predicate>
 	) .
 <#invalid_iri>
 	rdf:type rdft:TestTurtleNegativeSyntax ;
 	mf:name "bad IRI" ;
 	mf:action <invalid_iri.nt> ;
 	mf:result <invalid_iri_error.txt> .
 <#invalid_iri_crlf>
 	rdf:type rdft:TestTurtleNegativeSyntax ;
 	mf:name "bad IRI" ;
 	mf:action <invalid_iri_crlf.nt> ;
 	mf:result <invalid_iri_error.txt> .
 <#invalid_iri_comment>
 	rdf:type rdft:TestTurtleNegativeSyntax ;
 	mf:name "bad IRI" ;
 	mf:action <invalid_iri_comment.nt> ;
 	mf:result <invalid_iri_error.txt> .
 <#invalid_iri_comment_crlf>
 	rdf:type rdft:TestTurtleNegativeSyntax ;
 	mf:name "bad IRI" ;
 	mf:action <invalid_iri_comment_crlf.nt> ;
 	mf:result <invalid_iri_error.txt> .
 <#invalid_string_escape>
 	rdf:type rdft:TestTurtleNegativeSyntax ;
 	mf:name "bad string escape" ;
 	mf:action <invalid_string_escape.nt> ;
 	mf:result <invalid_string_escape_error.txt> .
 <#unexpected_eof>
 	rdf:type rdft:TestTurtleNegativeSyntax ;
 	mf:name "unexpected end of file" ;
 	mf:action <unexpected_eof.nt> ;
 	mf:result <unexpected_eof_error.txt> .
 <#unexpected_eof_crlf>
 	rdf:type rdft:TestTurtleNegativeSyntax ;
 	mf:name "unexpected end of file" ;
 	mf:action <unexpected_eof_crlf.nt> ;
 	mf:result <unexpected_eof_error.txt> .
 <#invalid_predicate>
 	rdf:type rdft:TestTurtleNegativeSyntax ;
 	mf:name "invalid predicate" ;
 	mf:action <invalid_predicate.nt> ;
 	mf:result <invalid_predicate_error.txt> .
--- a/testsuite/oxigraph-tests/parser-error/unexpected_eof.nt
+++ b/testsuite/oxigraph-tests/parser-error/unexpected_eof.nt
@ -0,0 +1,2 @@
 <http://example.com/s> <http://example.com/p> <http://example.com/o
 bé
--- a/testsuite/oxigraph-tests/parser-error/unexpected_eof_crlf.nt
+++ b/testsuite/oxigraph-tests/parser-error/unexpected_eof_crlf.nt
@ -0,0 +1,2 @@
 <http://example.com/s> <http://example.com/p> <http://example.com/o
 bé
--- a/testsuite/oxigraph-tests/parser-error/unexpected_eof_error.txt
+++ b/testsuite/oxigraph-tests/parser-error/unexpected_eof_error.txt
@ -0,0 +1 @@
 Parser error at line 2 column 3: Unexpected end of file
--- a/testsuite/src/parser_evaluator.rs
+++ b/testsuite/src/parser_evaluator.rs
@ -1,8 +1,8 @@
 use crate::evaluator::TestEvaluator;
-use crate::files::{guess_rdf_format, load_dataset, load_n3};
+use crate::files::{guess_rdf_format, load_dataset, load_n3, read_file_to_string};
 use crate::manifest::Test;
-use crate::report::dataset_diff;
+use crate::report::{dataset_diff, format_diff};
-use anyhow::{anyhow, ensure, Result};
+use anyhow::{anyhow, bail, ensure, Result};
 use oxigraph::io::RdfFormat;
 use oxigraph::model::{BlankNode, Dataset, Quad};
 use oxttl::n3::{N3Quad, N3Term};
@ -116,10 +116,17 @@ fn evaluate_negative_syntax_test(test: &Test, format: RdfFormat) -> Result<()> {
        .action
        .as_deref()
        .ok_or_else(|| anyhow!("No action found"))?;
    let Err(error) = load_dataset(action, format, false) else {
        bail!("File parsed without errors even if it should not");
    };
    if let Some(result) = &test.result {
        let expected = read_file_to_string(result)?;
        ensure!(
-        load_dataset(action, format, false).is_err(),
+            expected == error.to_string(),
-        "File parsed without errors even if it should not"
+            "Not expected error message:\n{}",
            format_diff(&expected, &error.to_string(), "message")
        );
    }
    Ok(())
 }
--- a/testsuite/tests/oxigraph.rs
+++ b/testsuite/tests/oxigraph.rs
@ -20,6 +20,14 @@ fn oxigraph_parser_recovery_testsuite() -> Result<()> {
    )
 }
 #[test]
 fn oxigraph_parser_error_testsuite() -> Result<()> {
    check_testsuite(
        "https://github.com/oxigraph/oxigraph/tests/parser-error/manifest.ttl",
        &[],
    )
 }
 #[test]
 fn oxigraph_sparql_testsuite() -> Result<()> {
    check_testsuite(
		`@ -0,0 +1,2 @@`
							`<http://example.com/s> <http://example.com/p> <http://example.com/o> .`
							`<http://example.com/s> <http:// /p> <http://example.com/o> .`
		`@ -0,0 +1 @@`
							`Parser error between at line 2 between columns 24 and column 36: Invalid IRI code point ' '`
		`@ -0,0 +1,2 @@`
							`<http://example.com/s> <http://example.com/p> <http://example.com/o> .`
							`<http://example.com/s> "p" <http://example.com/o> .`
		`@ -0,0 +1 @@`
							`Parser error between at line 1 between columns 53 and column 55: Unexpected escape character '\a'`
		`@ -0,0 +1,2 @@`
							`<http://example.com/s> <http://example.com/p> <http://example.com/o`
							`bé`
		`@ -0,0 +1 @@`
							`Parser error at line 2 column 3: Unexpected end of file`