OxTTL: return file position in errors

2 years ago · 13c3515d7b
parent 8193cac86d
commit 13c3515d7b
29 changed files with 552 additions and 300 deletions
--- a/fuzz/fuzz_targets/nquads.rs
+++ b/fuzz/fuzz_targets/nquads.rs
@ -2,9 +2,9 @@

 use libfuzzer_sys::fuzz_target;
 use oxrdf::Quad;
-use oxttl::{NQuadsParser, NQuadsSerializer, SyntaxError};
+use oxttl::{NQuadsParser, NQuadsSerializer};

-fn parse<'a>(chunks: impl IntoIterator<Item = &'a [u8]>) -> (Vec<Quad>, Vec<SyntaxError>) {
+fn parse<'a>(chunks: impl IntoIterator<Item = &'a [u8]>) -> (Vec<Quad>, Vec<String>) {
    let mut quads = Vec::new();
    let mut errors = Vec::new();
    let mut parser = NQuadsParser::new().with_quoted_triples().parse();
@ -13,7 +13,7 @@ fn parse<'a>(chunks: impl IntoIterator<Item = &'a [u8]>) -> (Vec<Quad>, Vec<Synt
        while let Some(result) = parser.read_next() {
            match result {
                Ok(quad) => quads.push(quad),
-                Err(error) => errors.push(error),
+                Err(error) => errors.push(error.to_string()),
            }
        }
    }
@ -21,7 +21,7 @@ fn parse<'a>(chunks: impl IntoIterator<Item = &'a [u8]>) -> (Vec<Quad>, Vec<Synt
    while let Some(result) = parser.read_next() {
        match result {
            Ok(quad) => quads.push(quad),
-            Err(error) => errors.push(error),
+            Err(error) => errors.push(error.to_string()),
        }
    }
    assert!(parser.is_end());
@ -39,7 +39,7 @@ fuzz_target!(|data: &[u8]| {
        .collect::<Vec<_>>()
        .as_slice()]);
    assert_eq!(quads, quads_without_split);
-    assert_eq!(errors.len(), errors_without_split.len());
+    assert_eq!(errors, errors_without_split);

    // We serialize
    let mut writer = NQuadsSerializer::new().serialize_to_write(Vec::new());
--- a/fuzz/fuzz_targets/trig.rs
+++ b/fuzz/fuzz_targets/trig.rs
@ -2,9 +2,9 @@

 use libfuzzer_sys::fuzz_target;
 use oxrdf::{Dataset, GraphName, Quad, Subject, Term, Triple};
-use oxttl::{SyntaxError, TriGParser, TriGSerializer};
+use oxttl::{TriGParser, TriGSerializer};

-fn parse<'a>(chunks: impl IntoIterator<Item = &'a [u8]>) -> (Vec<Quad>, Vec<SyntaxError>) {
+fn parse<'a>(chunks: impl IntoIterator<Item = &'a [u8]>) -> (Vec<Quad>, Vec<String>) {
    let mut quads = Vec::new();
    let mut errors = Vec::new();
    let mut parser = TriGParser::new()
@ -17,7 +17,7 @@ fn parse<'a>(chunks: impl IntoIterator<Item = &'a [u8]>) -> (Vec<Quad>, Vec<Synt
        while let Some(result) = parser.read_next() {
            match result {
                Ok(quad) => quads.push(quad),
-                Err(error) => errors.push(error),
+                Err(error) => errors.push(error.to_string()),
            }
        }
    }
@ -25,7 +25,7 @@ fn parse<'a>(chunks: impl IntoIterator<Item = &'a [u8]>) -> (Vec<Quad>, Vec<Synt
    while let Some(result) = parser.read_next() {
        match result {
            Ok(quad) => quads.push(quad),
-            Err(error) => errors.push(error),
+            Err(error) => errors.push(error.to_string()),
        }
    }
    assert!(parser.is_end());
@ -96,7 +96,7 @@ fuzz_target!(|data: &[u8]| {
            String::from_utf8_lossy(&serialize_quads(&quads_without_split))
        );
    }
-    assert_eq!(errors.len(), errors_without_split.len());
+    assert_eq!(errors, errors_without_split);

    // We serialize
    let new_serialization = serialize_quads(&quads);
--- a/lib/oxrdfio/src/error.rs
+++ b/lib/oxrdfio/src/error.rs
@ -1,4 +1,5 @@
 use std::error::Error;
+use std::ops::Range;
 use std::{fmt, io};

 /// Error returned during RDF format parsing.
@ -110,10 +111,33 @@ pub struct SyntaxError {
 enum SyntaxErrorKind {
    Turtle(oxttl::SyntaxError),
    RdfXml(oxrdfxml::SyntaxError),
-
    Msg { msg: &'static str },
 }

+impl SyntaxError {
+    /// The location of the error inside of the file.
+    #[inline]
+    pub fn location(&self) -> Option<Range<TextPosition>> {
+        match &self.inner {
+            SyntaxErrorKind::Turtle(e) => {
+                let location = e.location();
+                Some(
+                    TextPosition {
+                        line: location.start.line,
+                        column: location.start.column,
+                        offset: location.start.offset,
+                    }..TextPosition {
+                        line: location.end.line,
+                        column: location.end.column,
+                        offset: location.end.offset,
+                    },
+                )
+            }
+            SyntaxErrorKind::RdfXml(_) | SyntaxErrorKind::Msg { .. } => None,
+        }
+    }
+}
+
 impl fmt::Display for SyntaxError {
    #[inline]
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
@ -146,3 +170,11 @@ impl From<SyntaxError> for io::Error {
        }
    }
 }
+
+/// A position in a text i.e. a `line` number starting from 0, a `column` number starting from 0 (in number of code points) and a global file `offset` starting from 0 (in number of bytes).
+#[derive(Eq, PartialEq, Debug, Clone, Copy)]
+pub struct TextPosition {
+    pub line: u64,
+    pub column: u64,
+    pub offset: u64,
+}
--- a/lib/oxrdfxml/src/error.rs
+++ b/lib/oxrdfxml/src/error.rs
@ -72,15 +72,6 @@ impl From<quick_xml::Error> for ParseError {
    }
 }

-impl From<quick_xml::events::attributes::AttrError> for ParseError {
-    #[inline]
-    fn from(error: quick_xml::events::attributes::AttrError) -> Self {
-        Self::Syntax(SyntaxError {
-            inner: SyntaxErrorKind::XmlAttribute(error),
-        })
-    }
-}
-
 /// An error in the syntax of the parsed file.
 #[derive(Debug)]
 pub struct SyntaxError {
@ -90,7 +81,6 @@ pub struct SyntaxError {
 #[derive(Debug)]
 pub enum SyntaxErrorKind {
    Xml(quick_xml::Error),
-    XmlAttribute(quick_xml::events::attributes::AttrError),
    InvalidIri {
        iri: String,
        error: IriParseError,
@ -119,7 +109,6 @@ impl fmt::Display for SyntaxError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match &self.inner {
            SyntaxErrorKind::Xml(error) => error.fmt(f),
-            SyntaxErrorKind::XmlAttribute(error) => error.fmt(f),
            SyntaxErrorKind::InvalidIri { iri, error } => {
                write!(f, "error while parsing IRI '{iri}': {error}")
            }
@ -136,7 +125,6 @@ impl Error for SyntaxError {
    fn source(&self) -> Option<&(dyn Error + 'static)> {
        match &self.inner {
            SyntaxErrorKind::Xml(error) => Some(error),
-            SyntaxErrorKind::XmlAttribute(error) => Some(error),
            SyntaxErrorKind::InvalidIri { error, .. } => Some(error),
            SyntaxErrorKind::InvalidLanguageTag { error, .. } => Some(error),
            SyntaxErrorKind::Msg { .. } => None,
--- a/lib/oxrdfxml/src/parser.rs
+++ b/lib/oxrdfxml/src/parser.rs
@ -8,7 +8,7 @@ use quick_xml::escape::unescape_with;
 use quick_xml::events::attributes::Attribute;
 use quick_xml::events::*;
 use quick_xml::name::{LocalName, QName, ResolveResult};
-use quick_xml::{NsReader, Writer};
+use quick_xml::{Error, NsReader, Writer};
 use std::collections::{HashMap, HashSet};
 use std::io::{BufReader, Read};
 use std::str;
@ -515,7 +515,7 @@ impl<R> RdfXmlReader<R> {
                    .to_string(),
            );
            for attr in event.attributes() {
-                clean_event.push_attribute(attr?);
+                clean_event.push_attribute(attr.map_err(Error::InvalidAttr)?);
            }
            writer.write_event(Event::Start(clean_event))?;
            self.in_literal_depth += 1;
@ -544,7 +544,7 @@ impl<R> RdfXmlReader<R> {
        let mut type_attr = None;

        for attribute in event.attributes() {
-            let attribute = attribute?;
+            let attribute = attribute.map_err(Error::InvalidAttr)?;
            if attribute.key.as_ref().starts_with(b"xml") {
                if attribute.key.as_ref() == b"xml:lang" {
                    let tag = self.convert_attribute(&attribute)?;
--- a/lib/oxttl/src/lexer.rs
+++ b/lib/oxttl/src/lexer.rs
@ -266,7 +266,7 @@ impl N3Lexer {
                            ));
                        }
                    }
-                    Err(e) => return Some((e.position.end, Err(e))),
+                    Err(e) => return Some((e.location.end, Err(e))),
                }
            } else if is_ending {
                while data[..i].ends_with(b".") {
@ -447,7 +447,7 @@ impl N3Lexer {
                            return Some((i, Ok((buffer, might_be_invalid_iri))));
                        }
                    }
-                    Err(e) => return Some((e.position.end, Err(e))),
+                    Err(e) => return Some((e.location.end, Err(e))),
                }
            } else if is_ending {
                let buffer = if let Some(mut buffer) = buffer {
@ -515,7 +515,7 @@ impl N3Lexer {
                    }
                    i += consumed;
                }
-                Err(e) => return Some((e.position.end, Err(e))),
+                Err(e) => return Some((e.location.end, Err(e))),
            }
        }
    }
--- a/lib/oxttl/src/lib.rs
+++ b/lib/oxttl/src/lib.rs
@ -17,7 +17,7 @@ pub mod turtle;
 pub use crate::n3::N3Parser;
 pub use crate::nquads::{NQuadsParser, NQuadsSerializer};
 pub use crate::ntriples::{NTriplesParser, NTriplesSerializer};
-pub use crate::toolkit::{ParseError, SyntaxError};
+pub use crate::toolkit::{ParseError, SyntaxError, TextPosition};
 pub use crate::trig::{TriGParser, TriGSerializer};
 pub use crate::turtle::{TurtleParser, TurtleSerializer};

--- a/lib/oxttl/src/line_formats.rs
+++ b/lib/oxttl/src/line_formats.rs
@ -76,7 +76,7 @@ impl RuleRecognizer for NQuadsRecognizer {
                    }
                    _ => self.error(
                        errors,
-                        format!("The subject of a triple should be an IRI or a blank node, {token:?} found"),
+                        "The subject of a triple should be an IRI or a blank node, TOKEN found",
                    ),
                },
                NQuadsState::ExpectPredicate => match token {
@ -88,7 +88,7 @@ impl RuleRecognizer for NQuadsRecognizer {
                    }
                    _ => self.error(
                        errors,
-                        format!("The predicate of a triple should be an IRI, {token:?} found"),
+                        "The predicate of a triple should be an IRI, TOKEN found",
                    ),
                },
                NQuadsState::ExpectedObject => match token {
@ -118,7 +118,7 @@ impl RuleRecognizer for NQuadsRecognizer {
                    }
                    _ => self.error(
                        errors,
-                        format!("The object of a triple should be an IRI, a blank node or a literal, {token:?} found"),
+                        "The object of a triple should be an IRI, a blank node or a literal, TOKEN found",
                    ),
                },
                NQuadsState::ExpectLiteralAnnotationOrGraphNameOrDot { value } => match token {
@ -159,7 +159,7 @@ impl RuleRecognizer for NQuadsRecognizer {
                            .push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple);
                        self
                    }
-                    _ => self.error(errors, format!("A literal datatype must be an IRI, found {token:?}")),
+                    _ => self.error(errors, "A literal datatype must be an IRI, found TOKEN"),
                },
                NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple => {
                    if self.stack.is_empty() {
--- a/lib/oxttl/src/n3.rs
+++ b/lib/oxttl/src/n3.rs
@ -836,7 +836,7 @@ impl RuleRecognizer for N3Recognizer {
                            self.stack.push(N3State::FormulaContent);
                            self
                        }
-                       _ => self.error(errors, format!("This is not a valid RDF value: {token:?}"))
+                       _ => self.error(errors, "TOKEN is not a valid RDF value")
                    }
                }
                N3State::PropertyListMiddle => match token {
@ -950,7 +950,7 @@ impl RuleRecognizer for N3Recognizer {
                            Err(e) => self.error(errors, e)
                        }
                       _ => {
-                            self.error(errors, format!("Expecting a datatype IRI after '^^, found {token:?}")).recognize_next(token, results, errors)
+                            self.error(errors, "Expecting a datatype IRI after '^^, found TOKEN").recognize_next(token, results, errors)
                        }
                    }
                }
--- a/lib/oxttl/src/terse.rs
+++ b/lib/oxttl/src/terse.rs
@ -167,7 +167,7 @@ impl RuleRecognizer for TriGRecognizer {
                        self
                    }
                    _ => {
-                        self.error(errors, format!("The token {token:?} is not a valid subject or graph name"))
+                        self.error(errors, "TOKEN is not a valid subject or graph name")
                    }
                }
                TriGState::WrappedGraphOrPredicateObjectList { term } => {
@ -317,7 +317,7 @@ impl RuleRecognizer for TriGRecognizer {
                        self
                    }
                   _ => {
-                        self.error(errors, format!("The token {token:?} is not a valid RDF subject"))
+                        self.error(errors, "TOKEN is not a valid RDF subject")
                    }
                },
                TriGState::TriplesBlankNodePropertyListCurrent => if token == N3Token::Punctuation("]") {
@ -350,7 +350,7 @@ impl RuleRecognizer for TriGRecognizer {
                        self
                    }
                   _ => {
-                        self.error(errors, format!("The token {token:?} is not a valid graph name"))
+                        self.error(errors, "TOKEN is not a valid graph name")
                    }
                }
                TriGState::GraphNameAnonEnd => if token == N3Token::Punctuation("]") {
@ -456,7 +456,7 @@ impl RuleRecognizer for TriGRecognizer {
                        Err(e) => self.error(errors, e)
                    }
                   _ => {
-                        self.error(errors, format!("The token {token:?} is not a valid predicate"))
+                        self.error(errors, "TOKEN is not a valid predicate")
                    }
                }
                // [12] 	object 	::= 	iri | BlankNode | collection | blankNodePropertyList | literal | quotedTriple
@ -536,7 +536,7 @@ impl RuleRecognizer for TriGRecognizer {
                        self
                    }
                   _ => {
-                        self.error(errors, format!("This is not a valid RDF object: {token:?}"))
+                        self.error(errors, "TOKEN is not a valid RDF object")
                    }

                }
@ -637,7 +637,7 @@ impl RuleRecognizer for TriGRecognizer {
                            Err(e) => self.error(errors, e)
                        }
                        _ => {
-                            self.error(errors, format!("Expecting a datatype IRI after '^^, found {token:?}")).recognize_next(token, results, errors)
+                            self.error(errors, "Expecting a datatype IRI after ^^, found TOKEN").recognize_next(token, results, errors)
                        }
                    }
                }
@ -653,7 +653,7 @@ impl RuleRecognizer for TriGRecognizer {
                    if token == N3Token::Punctuation(">>") {
                        self
                    } else {
-                        self.error(errors, format!("Expecting '>>' to close a quoted triple, found {token:?}"))
+                        self.error(errors, "Expecting '>>' to close a quoted triple, found TOKEN")
                    }
                }
                #[cfg(feature = "rdf-star")]
@ -670,7 +670,7 @@ impl RuleRecognizer for TriGRecognizer {
                    if token == N3Token::Punctuation(">>") {
                        self
                    } else {
-                        self.error(errors, format!("Expecting '>>' to close a quoted triple, found {token:?}"))
+                        self.error(errors, "Expecting '>>' to close a quoted triple, found TOKEN")
                    }
                }
                // [28t] 	qtSubject 	::= 	iri | BlankNode | quotedTriple
@ -703,7 +703,7 @@ impl RuleRecognizer for TriGRecognizer {
                        self.stack.push(TriGState::QuotedSubject);
                        self
                    }
-                    _ => self.error(errors, format!("This is not a valid RDF quoted triple subject: {token:?}"))
+                    _ => self.error(errors, "TOKEN is not a valid RDF quoted triple subject: TOKEN")
                }
                // [29t] 	qtObject 	::= 	iri | BlankNode | literal | quotedTriple
                #[cfg(feature = "rdf-star")]
@ -759,7 +759,7 @@ impl RuleRecognizer for TriGRecognizer {
                        self.stack.push(TriGState::QuotedSubject);
                        self
                    }
-                    _ => self.error(errors, format!("This is not a valid RDF quoted triple object: {token:?}"))
+                    _ => self.error(errors, "TOKEN is not a valid RDF quoted triple object")
                }
                #[cfg(feature = "rdf-star")]
                TriGState::QuotedAnonEnd => if token == N3Token::Punctuation("]") {
--- a/lib/oxttl/src/toolkit/error.rs
+++ b/lib/oxttl/src/toolkit/error.rs
@ -0,0 +1,132 @@
+use std::error::Error;
+use std::ops::Range;
+use std::{fmt, io};
+
+/// A position in a text i.e. a `line` number starting from 0, a `column` number starting from 0 (in number of code points) and a global file `offset` starting from 0 (in number of bytes).
+#[derive(Eq, PartialEq, Debug, Clone, Copy)]
+pub struct TextPosition {
+    pub line: u64,
+    pub column: u64,
+    pub offset: u64,
+}
+
+/// An error in the syntax of the parsed file.
+///
+/// It is composed of a message and a byte range in the input.
+#[derive(Debug)]
+pub struct SyntaxError {
+    pub(super) location: Range<TextPosition>,
+    pub(super) message: String,
+}
+
+impl SyntaxError {
+    /// The location of the error inside of the file.
+    #[inline]
+    pub fn location(&self) -> Range<TextPosition> {
+        self.location.clone()
+    }
+
+    /// The error message.
+    #[inline]
+    pub fn message(&self) -> &str {
+        &self.message
+    }
+}
+
+impl fmt::Display for SyntaxError {
+    #[inline]
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        if self.location.start.offset + 1 >= self.location.end.offset {
+            write!(
+                f,
+                "Parser error at line {} column {}: {}",
+                self.location.start.line + 1,
+                self.location.start.column + 1,
+                self.message
+            )
+        } else if self.location.start.line == self.location.end.line {
+            write!(
+                f,
+                "Parser error between at line {} between columns {} and column {}: {}",
+                self.location.start.line + 1,
+                self.location.start.column + 1,
+                self.location.end.column + 1,
+                self.message
+            )
+        } else {
+            write!(
+                f,
+                "Parser error between line {} column {} and line {} column {}: {}",
+                self.location.start.line + 1,
+                self.location.start.column + 1,
+                self.location.end.line + 1,
+                self.location.end.column + 1,
+                self.message
+            )
+        }
+    }
+}
+
+impl Error for SyntaxError {}
+
+impl From<SyntaxError> for io::Error {
+    #[inline]
+    fn from(error: SyntaxError) -> Self {
+        io::Error::new(io::ErrorKind::InvalidData, error)
+    }
+}
+
+/// A parsing error.
+///
+/// It is the union of [`SyntaxError`] and [`std::io::Error`].
+#[derive(Debug)]
+pub enum ParseError {
+    /// I/O error during parsing (file not found...).
+    Io(io::Error),
+    /// An error in the file syntax.
+    Syntax(SyntaxError),
+}
+
+impl fmt::Display for ParseError {
+    #[inline]
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Self::Io(e) => e.fmt(f),
+            Self::Syntax(e) => e.fmt(f),
+        }
+    }
+}
+
+impl Error for ParseError {
+    #[inline]
+    fn source(&self) -> Option<&(dyn Error + 'static)> {
+        Some(match self {
+            Self::Io(e) => e,
+            Self::Syntax(e) => e,
+        })
+    }
+}
+
+impl From<SyntaxError> for ParseError {
+    #[inline]
+    fn from(error: SyntaxError) -> Self {
+        Self::Syntax(error)
+    }
+}
+
+impl From<io::Error> for ParseError {
+    #[inline]
+    fn from(error: io::Error) -> Self {
+        Self::Io(error)
+    }
+}
+
+impl From<ParseError> for io::Error {
+    #[inline]
+    fn from(error: ParseError) -> Self {
+        match error {
+            ParseError::Syntax(e) => e.into(),
+            ParseError::Io(e) => e,
+        }
+    }
+}
--- a/lib/oxttl/src/toolkit/lexer.rs
+++ b/lib/oxttl/src/toolkit/lexer.rs
@ -1,9 +1,10 @@
-use memchr::memchr2;
+use crate::toolkit::error::{SyntaxError, TextPosition};
+use memchr::{memchr2, memchr2_iter};
+use std::borrow::Cow;
 use std::cmp::min;
-use std::error::Error;
-use std::fmt;
 use std::io::{self, Read};
 use std::ops::{Range, RangeInclusive};
+use std::str;
 #[cfg(feature = "async-tokio")]
 use tokio::io::{AsyncRead, AsyncReadExt};

@ -22,14 +23,14 @@ pub trait TokenRecognizer {
 }

 pub struct TokenRecognizerError {
-    pub position: Range<usize>,
+    pub location: Range<usize>,
    pub message: String,
 }

 impl<S: Into<String>> From<(Range<usize>, S)> for TokenRecognizerError {
-    fn from((position, message): (Range<usize>, S)) -> Self {
+    fn from((location, message): (Range<usize>, S)) -> Self {
        Self {
-            position,
+            location,
            message: message.into(),
        }
    }
@ -37,34 +38,37 @@ impl<S: Into<String>> From<(Range<usize>, S)> for TokenRecognizerError {

 #[allow(clippy::range_plus_one)]
 impl<S: Into<String>> From<(RangeInclusive<usize>, S)> for TokenRecognizerError {
-    fn from((position, message): (RangeInclusive<usize>, S)) -> Self {
-        (*position.start()..*position.end() + 1, message).into()
+    fn from((location, message): (RangeInclusive<usize>, S)) -> Self {
+        (*location.start()..*location.end() + 1, message).into()
    }
 }

 impl<S: Into<String>> From<(usize, S)> for TokenRecognizerError {
-    fn from((position, message): (usize, S)) -> Self {
-        (position..=position, message).into()
+    fn from((location, message): (usize, S)) -> Self {
+        (location..=location, message).into()
    }
 }

-pub struct TokenWithPosition<T> {
-    pub token: T,
-    pub position: Range<usize>,
-}
-
 pub struct Lexer<R: TokenRecognizer> {
    parser: R,
    data: Vec<u8>,
-    start: usize,
+    position: Position,
+    previous_position: Position, // Lexer position before the last emitted token
    is_ending: bool,
-    position: usize,
    min_buffer_size: usize,
    max_buffer_size: usize,
    is_line_jump_whitespace: bool,
    line_comment_start: Option<&'static [u8]>,
 }

+#[derive(Clone, Copy)]
+struct Position {
+    line_start_buffer_offset: usize,
+    buffer_offset: usize,
+    global_offset: u64,
+    global_line: u64,
+}
+
 impl<R: TokenRecognizer> Lexer<R> {
    pub fn new(
        parser: R,
@ -76,9 +80,19 @@ impl<R: TokenRecognizer> Lexer<R> {
        Self {
            parser,
            data: Vec::new(),
-            start: 0,
+            position: Position {
+                line_start_buffer_offset: 0,
+                buffer_offset: 0,
+                global_offset: 0,
+                global_line: 0,
+            },
+            previous_position: Position {
+                line_start_buffer_offset: 0,
+                buffer_offset: 0,
+                global_offset: 0,
+                global_line: 0,
+            },
            is_ending: false,
-            position: 0,
            min_buffer_size,
            max_buffer_size,
            is_line_jump_whitespace,
@ -148,24 +162,43 @@ impl<R: TokenRecognizer> Lexer<R> {
        Ok(())
    }

-    pub fn read_next(
-        &mut self,
-        options: &R::Options,
-    ) -> Option<Result<TokenWithPosition<R::Token<'_>>, LexerError>> {
+    #[allow(clippy::unwrap_in_result)]
+    pub fn read_next(&mut self, options: &R::Options) -> Option<Result<R::Token<'_>, SyntaxError>> {
        self.skip_whitespaces_and_comments()?;
-        let Some((consumed, result)) =
-            self.parser
-                .recognize_next_token(&self.data[self.start..], self.is_ending, options)
-        else {
+        self.previous_position = self.position;
+        let Some((consumed, result)) = self.parser.recognize_next_token(
+            &self.data[self.position.buffer_offset..],
+            self.is_ending,
+            options,
+        ) else {
            return if self.is_ending {
-                if self.start == self.data.len() {
+                if self.position.buffer_offset == self.data.len() {
                    None // We have finished
                } else {
-                    let error = LexerError {
-                        position: self.position..self.position + (self.data.len() - self.start),
+                    let (new_line_jumps, new_line_start) =
+                        Self::find_number_of_line_jumps_and_start_of_last_line(
+                            &self.data[self.position.buffer_offset..],
+                        );
+                    if new_line_jumps > 0 {
+                        self.position.line_start_buffer_offset =
+                            self.position.buffer_offset + new_line_start;
+                    }
+                    self.position.global_offset +=
+                        u64::try_from(self.data.len() - self.position.buffer_offset).unwrap();
+                    self.position.buffer_offset = self.data.len();
+                    self.position.global_line += new_line_jumps;
+                    let new_position = TextPosition {
+                        line: self.position.global_line,
+                        column: Self::column_from_bytes(
+                            &self.data[self.position.line_start_buffer_offset..],
+                        ),
+                        offset: self.position.global_offset,
+                    };
+                    let error = SyntaxError {
+                        location: new_position..new_position,
                        message: "Unexpected end of file".into(),
                    };
-                    self.start = self.data.len(); // We consume everything
+                    self.position.buffer_offset = self.data.len(); // We consume everything
                    Some(Err(error))
                }
            } else {
@ -177,44 +210,119 @@ impl<R: TokenRecognizer> Lexer<R> {
            "The lexer must consume at least one byte each time"
        );
        debug_assert!(
-            self.start + consumed <= self.data.len(),
+            self.position.buffer_offset + consumed <= self.data.len(),
            "The lexer tried to consumed {consumed} bytes but only {} bytes are readable",
-            self.data.len() - self.start
+            self.data.len() - self.position.buffer_offset
        );
-        let old_position = self.position;
-        self.start += consumed;
-        self.position += consumed;
-        Some(match result {
-            Ok(token) => Ok(TokenWithPosition {
-                token,
-                position: old_position..self.position,
-            }),
-            Err(e) => Err(LexerError {
-                position: e.position.start + self.position..e.position.end + self.position,
-                message: e.message,
-            }),
-        })
+        let (new_line_jumps, new_line_start) =
+            Self::find_number_of_line_jumps_and_start_of_last_line(
+                &self.data[self.position.buffer_offset..self.position.buffer_offset + consumed],
+            );
+        if new_line_jumps > 0 {
+            self.position.line_start_buffer_offset = self.position.buffer_offset + new_line_start;
+        }
+        self.position.buffer_offset += consumed;
+        self.position.global_offset += u64::try_from(consumed).unwrap();
+        self.position.global_line += new_line_jumps;
+        Some(result.map_err(|e| SyntaxError {
+            location: self.location_from_buffer_offset_range(e.location),
+            message: e.message,
+        }))
+    }
+
+    pub fn location_from_buffer_offset_range(
+        &self,
+        offset_range: Range<usize>,
+    ) -> Range<TextPosition> {
+        let start_offset = self.previous_position.buffer_offset + offset_range.start;
+        let (start_extra_line_jumps, start_line_start) =
+            Self::find_number_of_line_jumps_and_start_of_last_line(
+                &self.data[self.previous_position.buffer_offset..start_offset],
+            );
+        let start_line_start = if start_extra_line_jumps > 0 {
+            start_line_start + self.previous_position.buffer_offset
+        } else {
+            self.previous_position.line_start_buffer_offset
+        };
+        let end_offset = self.previous_position.buffer_offset + offset_range.end;
+        let (end_extra_line_jumps, end_line_start) =
+            Self::find_number_of_line_jumps_and_start_of_last_line(
+                &self.data[self.previous_position.buffer_offset..end_offset],
+            );
+        let end_line_start = if end_extra_line_jumps > 0 {
+            end_line_start + self.previous_position.buffer_offset
+        } else {
+            self.previous_position.line_start_buffer_offset
+        };
+        TextPosition {
+            line: self.previous_position.global_line + start_extra_line_jumps,
+            column: Self::column_from_bytes(&self.data[start_line_start..start_offset]),
+            offset: self.previous_position.global_offset
+                + u64::try_from(offset_range.start).unwrap(),
+        }..TextPosition {
+            line: self.previous_position.global_line + end_extra_line_jumps,
+            column: Self::column_from_bytes(&self.data[end_line_start..end_offset]),
+            offset: self.previous_position.global_offset + u64::try_from(offset_range.end).unwrap(),
+        }
+    }
+
+    pub fn last_token_location(&self) -> Range<TextPosition> {
+        TextPosition {
+            line: self.previous_position.global_line,
+            column: Self::column_from_bytes(
+                &self.data[self.previous_position.line_start_buffer_offset
+                    ..self.previous_position.buffer_offset],
+            ),
+            offset: self.previous_position.global_offset,
+        }..TextPosition {
+            line: self.position.global_line,
+            column: Self::column_from_bytes(
+                &self.data[self.position.line_start_buffer_offset..self.position.buffer_offset],
+            ),
+            offset: self.position.global_offset,
+        }
+    }
+
+    pub fn last_token_source(&self) -> Cow<'_, str> {
+        String::from_utf8_lossy(
+            &self.data[self.previous_position.buffer_offset..self.position.buffer_offset],
+        )
    }

    pub fn is_end(&self) -> bool {
-        self.is_ending && self.data.len() == self.start
+        self.is_ending && self.data.len() == self.position.buffer_offset
    }

+    #[allow(clippy::unwrap_in_result)]
    fn skip_whitespaces_and_comments(&mut self) -> Option<()> {
        loop {
-            self.skip_whitespaces();
+            self.skip_whitespaces()?;

-            let buf = &self.data[self.start..];
+            let buf = &self.data[self.position.buffer_offset..];
            if let Some(line_comment_start) = self.line_comment_start {
                if buf.starts_with(line_comment_start) {
                    // Comment
                    if let Some(end) = memchr2(b'\r', b'\n', &buf[line_comment_start.len()..]) {
-                        self.start += end + line_comment_start.len();
-                        self.position += end + line_comment_start.len();
+                        let mut end_position = line_comment_start.len() + end;
+                        if buf.get(end_position).copied() == Some(b'\r') {
+                            // We look for \n for Windows line end style
+                            if let Some(c) = buf.get(end_position + 1) {
+                                if *c == b'\n' {
+                                    end_position += 1;
+                                }
+                            } else if !self.is_ending {
+                                return None; // We need to read more
+                            }
+                        }
+                        let comment_size = end_position + 1;
+                        self.position.buffer_offset += comment_size;
+                        self.position.line_start_buffer_offset = self.position.buffer_offset;
+                        self.position.global_offset += u64::try_from(comment_size).unwrap();
+                        self.position.global_line += 1;
                        continue;
                    }
                    if self.is_ending {
-                        self.start = self.data.len(); // EOF
+                        self.position.buffer_offset = self.data.len(); // EOF
                        return Some(());
                    }
                    return None; // We need more data
@ -224,80 +332,98 @@ impl<R: TokenRecognizer> Lexer<R> {
        }
    }

-    fn skip_whitespaces(&mut self) {
+    fn skip_whitespaces(&mut self) -> Option<()> {
        if self.is_line_jump_whitespace {
-            for (i, c) in self.data[self.start..].iter().enumerate() {
-                if !matches!(c, b' ' | b'\t' | b'\r' | b'\n') {
-                    self.start += i;
-                    self.position += i;
-                    return;
+            let mut i = self.position.buffer_offset;
+            while let Some(c) = self.data.get(i) {
+                match c {
+                    b' ' | b'\t' => {
+                        self.position.buffer_offset += 1;
+                        self.position.global_offset += 1;
+                    }
+                    b'\r' => {
+                        // We look for \n for Windows line end style
+                        let mut increment: u8 = 1;
+                        if let Some(c) = self.data.get(i + 1) {
+                            if *c == b'\n' {
+                                increment += 1;
+                                i += 1;
+                            }
+                        } else if !self.is_ending {
+                            return None; // We need to read more
+                        }
+                        self.position.buffer_offset += usize::from(increment);
+                        self.position.line_start_buffer_offset = self.position.buffer_offset;
+                        self.position.global_offset += u64::from(increment);
+                        self.position.global_line += 1;
+                    }
+                    b'\n' => {
+                        self.position.buffer_offset += 1;
+                        self.position.line_start_buffer_offset = self.position.buffer_offset;
+                        self.position.global_offset += 1;
+                        self.position.global_line += 1;
+                    }
+                    _ => return Some(()),
                }
+                i += 1;
                //TODO: SIMD
            }
        } else {
-            for (i, c) in self.data[self.start..].iter().enumerate() {
-                if !matches!(c, b' ' | b'\t') {
-                    self.start += i;
-                    self.position += i;
-                    return;
+            for c in &self.data[self.position.buffer_offset..] {
+                if matches!(c, b' ' | b'\t') {
+                    self.position.buffer_offset += 1;
+                    self.position.global_offset += 1;
+                } else {
+                    return Some(());
                }
                //TODO: SIMD
            }
        }
-        // We only have whitespaces
-        self.position += self.data.len() - self.start;
-        self.start = self.data.len();
+        Some(())
    }

    fn shrink_data(&mut self) {
-        if self.start > 0 {
-            self.data.copy_within(self.start.., 0);
-            self.data.truncate(self.data.len() - self.start);
-            self.start = 0;
+        if self.position.line_start_buffer_offset > 0 {
+            self.data
+                .copy_within(self.position.line_start_buffer_offset.., 0);
+            self.data
+                .truncate(self.data.len() - self.position.line_start_buffer_offset);
+            self.position.buffer_offset -= self.position.line_start_buffer_offset;
+            self.position.line_start_buffer_offset = 0;
+            self.previous_position = self.position;
        }
    }
-}

-#[derive(Debug)]
-pub struct LexerError {
-    position: Range<usize>,
-    message: String,
-}
-
-impl LexerError {
-    pub fn position(&self) -> Range<usize> {
-        self.position.clone()
-    }
-
-    pub fn message(&self) -> &str {
-        &self.message
-    }
-
-    pub fn into_message(self) -> String {
-        self.message
-    }
-}
-
-impl fmt::Display for LexerError {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        if self.position.start + 1 == self.position.end {
-            write!(
-                f,
-                "Lexer error at byte {}: {}",
-                self.position.start, self.message
-            )
-        } else {
-            write!(
-                f,
-                "Lexer error between bytes {} and {}: {}",
-                self.position.start, self.position.end, self.message
-            )
+    fn find_number_of_line_jumps_and_start_of_last_line(bytes: &[u8]) -> (u64, usize) {
+        let mut num_of_jumps = 0;
+        let mut last_jump_pos = 0;
+        let mut previous_cr = 0;
+        for pos in memchr2_iter(b'\r', b'\n', bytes) {
+            if bytes[pos] == b'\r' {
+                previous_cr = pos;
+                num_of_jumps += 1;
+                last_jump_pos = pos + 1;
+            } else {
+                if previous_cr < pos - 1 {
+                    // We count \r\n as a single line jump
+                    num_of_jumps += 1;
+                }
+                last_jump_pos = pos + 1;
+            }
        }
+        (num_of_jumps, last_jump_pos)
    }
-}

-impl Error for LexerError {
-    fn description(&self) -> &str {
-        self.message()
+    fn column_from_bytes(bytes: &[u8]) -> u64 {
+        match str::from_utf8(bytes) {
+            Ok(s) => u64::try_from(s.chars().count()).unwrap(),
+            Err(e) => {
+                if e.valid_up_to() == 0 {
+                    0
+                } else {
+                    Self::column_from_bytes(&bytes[..e.valid_up_to()])
+                }
+            }
+        }
    }
 }
--- a/lib/oxttl/src/toolkit/mod.rs
+++ b/lib/oxttl/src/toolkit/mod.rs
@ -2,12 +2,12 @@
 //!
 //! Provides the basic code to write plain Rust lexers and parsers able to read files chunk by chunk.

+mod error;
 mod lexer;
 mod parser;

-pub use self::lexer::{Lexer, LexerError, TokenRecognizer, TokenRecognizerError};
+pub use self::error::{ParseError, SyntaxError, TextPosition};
+pub use self::lexer::{Lexer, TokenRecognizer, TokenRecognizerError};
 #[cfg(feature = "async-tokio")]
 pub use self::parser::FromTokioAsyncReadIterator;
-pub use self::parser::{
-    FromReadIterator, ParseError, Parser, RuleRecognizer, RuleRecognizerError, SyntaxError,
-};
+pub use self::parser::{FromReadIterator, Parser, RuleRecognizer, RuleRecognizerError};
--- a/lib/oxttl/src/toolkit/parser.rs
+++ b/lib/oxttl/src/toolkit/parser.rs
@ -1,9 +1,6 @@
-use crate::toolkit::lexer::TokenWithPosition;
-use crate::toolkit::{Lexer, LexerError, TokenRecognizer};
-use std::error::Error;
+use crate::toolkit::error::{ParseError, SyntaxError};
+use crate::toolkit::lexer::{Lexer, TokenRecognizer};
 use std::io::Read;
-use std::ops::Range;
-use std::{fmt, io};
 #[cfg(feature = "async-tokio")]
 use tokio::io::AsyncRead;

@ -42,7 +39,6 @@ pub struct Parser<RR: RuleRecognizer> {
    state: Option<RR>,
    results: Vec<RR::Output>,
    errors: Vec<RuleRecognizerError>,
-    position: Range<usize>,
    default_lexer_options: <RR::TokenRecognizer as TokenRecognizer>::Options,
 }

@ -53,7 +49,6 @@ impl<RR: RuleRecognizer> Parser<RR> {
            state: Some(recognizer),
            results: vec![],
            errors: vec![],
-            position: 0..0,
            default_lexer_options: <RR::TokenRecognizer as TokenRecognizer>::Options::default(),
        }
    }
@ -76,8 +71,10 @@ impl<RR: RuleRecognizer> Parser<RR> {
        loop {
            if let Some(error) = self.errors.pop() {
                return Some(Err(SyntaxError {
-                    position: self.position.clone(),
-                    message: error.message,
+                    location: self.lexer.last_token_location(),
+                    message: error
+                        .message
+                        .replace("TOKEN", &self.lexer.last_token_source()),
                }));
            }
            if let Some(result) = self.results.pop() {
@ -89,8 +86,7 @@ impl<RR: RuleRecognizer> Parser<RR> {
                    .map_or(&self.default_lexer_options, |p| p.lexer_options()),
            ) {
                match result {
-                    Ok(TokenWithPosition { token, position }) => {
-                        self.position = position;
+                    Ok(token) => {
                        self.state = self.state.take().map(|state| {
                            state.recognize_next(token, &mut self.results, &mut self.errors)
                        });
@ -98,7 +94,7 @@ impl<RR: RuleRecognizer> Parser<RR> {
                    }
                    Err(e) => {
                        self.state = self.state.take().map(RR::error_recovery_state);
-                        return Some(Err(e.into()));
+                        return Some(Err(e));
                    }
                }
            }
@ -126,128 +122,6 @@ impl<RR: RuleRecognizer> Parser<RR> {
    }
 }

-/// An error in the syntax of the parsed file.
-///
-/// It is composed of a message and a byte range in the input.
-#[derive(Debug)]
-pub struct SyntaxError {
-    position: Range<usize>,
-    message: String,
-}
-
-impl SyntaxError {
-    /// The invalid byte range in the input.
-    #[inline]
-    pub fn position(&self) -> Range<usize> {
-        self.position.clone()
-    }
-
-    /// The error message.
-    #[inline]
-    pub fn message(&self) -> &str {
-        &self.message
-    }
-
-    /// Converts this error to an error message.
-    #[inline]
-    pub fn into_message(self) -> String {
-        self.message
-    }
-}
-
-impl fmt::Display for SyntaxError {
-    #[inline]
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        if self.position.start + 1 == self.position.end {
-            write!(
-                f,
-                "Parser error at byte {}: {}",
-                self.position.start, self.message
-            )
-        } else {
-            write!(
-                f,
-                "Parser error between bytes {} and {}: {}",
-                self.position.start, self.position.end, self.message
-            )
-        }
-    }
-}
-
-impl Error for SyntaxError {}
-
-impl From<SyntaxError> for io::Error {
-    #[inline]
-    fn from(error: SyntaxError) -> Self {
-        io::Error::new(io::ErrorKind::InvalidData, error)
-    }
-}
-
-impl From<LexerError> for SyntaxError {
-    #[inline]
-    fn from(e: LexerError) -> Self {
-        Self {
-            position: e.position(),
-            message: e.into_message(),
-        }
-    }
-}
-
-/// A parsing error.
-///
-/// It is the union of [`SyntaxError`] and [`std::io::Error`].
-#[derive(Debug)]
-pub enum ParseError {
-    /// I/O error during parsing (file not found...).
-    Io(io::Error),
-    /// An error in the file syntax.
-    Syntax(SyntaxError),
-}
-
-impl fmt::Display for ParseError {
-    #[inline]
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        match self {
-            Self::Io(e) => e.fmt(f),
-            Self::Syntax(e) => e.fmt(f),
-        }
-    }
-}
-
-impl Error for ParseError {
-    #[inline]
-    fn source(&self) -> Option<&(dyn Error + 'static)> {
-        Some(match self {
-            Self::Io(e) => e,
-            Self::Syntax(e) => e,
-        })
-    }
-}
-
-impl From<SyntaxError> for ParseError {
-    #[inline]
-    fn from(error: SyntaxError) -> Self {
-        Self::Syntax(error)
-    }
-}
-
-impl From<io::Error> for ParseError {
-    #[inline]
-    fn from(error: io::Error) -> Self {
-        Self::Io(error)
-    }
-}
-
-impl From<ParseError> for io::Error {
-    #[inline]
-    fn from(error: ParseError) -> Self {
-        match error {
-            ParseError::Syntax(e) => e.into(),
-            ParseError::Io(e) => e,
-        }
-    }
-}
-
 pub struct FromReadIterator<R: Read, RR: RuleRecognizer> {
    read: R,
    parser: Parser<RR>,
--- a/testsuite/oxigraph-tests/parser-error/invalid_iri.nt
+++ b/testsuite/oxigraph-tests/parser-error/invalid_iri.nt
@ -0,0 +1,2 @@
+<http://example.com/s> <http://example.com/p> <http://example.com/o> .
+<http://example.com/s> <http:// /p> <http://example.com/o> .
--- a/testsuite/oxigraph-tests/parser-error/invalid_iri_comment.nt
+++ b/testsuite/oxigraph-tests/parser-error/invalid_iri_comment.nt
@ -0,0 +1,2 @@
+<http://example.com/s> <http://example.com/p> <http://example.com/o> . # foo
+<http://example.com/s> <http:// /p> <http://example.com/o> .
--- a/testsuite/oxigraph-tests/parser-error/invalid_iri_comment_crlf.nt
+++ b/testsuite/oxigraph-tests/parser-error/invalid_iri_comment_crlf.nt
@ -0,0 +1,2 @@
+<http://example.com/s> <http://example.com/p> <http://example.com/o> . # foo
+<http://example.com/s> <http:// /p> <http://example.com/o> .
--- a/testsuite/oxigraph-tests/parser-error/invalid_iri_crlf.nt
+++ b/testsuite/oxigraph-tests/parser-error/invalid_iri_crlf.nt
@ -0,0 +1,2 @@
+<http://example.com/s> <http://example.com/p> <http://example.com/o> .
+<http://example.com/s> <http:// /p> <http://example.com/o> .
--- a/testsuite/oxigraph-tests/parser-error/invalid_iri_error.txt
+++ b/testsuite/oxigraph-tests/parser-error/invalid_iri_error.txt
@ -0,0 +1 @@
+Parser error between at line 2 between columns 24 and column 36: Invalid IRI code point ' '
--- a/testsuite/oxigraph-tests/parser-error/invalid_predicate.nt
+++ b/testsuite/oxigraph-tests/parser-error/invalid_predicate.nt
@ -0,0 +1,2 @@
+<http://example.com/s> <http://example.com/p> <http://example.com/o> .
+<http://example.com/s> "p" <http://example.com/o> .
--- a/testsuite/oxigraph-tests/parser-error/invalid_predicate_error.txt
+++ b/testsuite/oxigraph-tests/parser-error/invalid_predicate_error.txt
@ -0,0 +1 @@
+Parser error between at line 2 between columns 24 and column 27: "p" is not a valid predicate
--- a/testsuite/oxigraph-tests/parser-error/invalid_string_escape.nt
+++ b/testsuite/oxigraph-tests/parser-error/invalid_string_escape.nt
@ -0,0 +1 @@
+<http://example.com/s> <http://example.com/p> "fooé \a baré" .
--- a/testsuite/oxigraph-tests/parser-error/invalid_string_escape_error.txt
+++ b/testsuite/oxigraph-tests/parser-error/invalid_string_escape_error.txt
@ -0,0 +1 @@
+Parser error between at line 1 between columns 53 and column 55: Unexpected escape character '\a'
--- a/testsuite/oxigraph-tests/parser-error/manifest.ttl
+++ b/testsuite/oxigraph-tests/parser-error/manifest.ttl
@ -0,0 +1,66 @@
+@prefix mf: <http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#> .
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix rdft: <http://www.w3.org/ns/rdftest#> .
+
+<>
+	rdf:type mf:Manifest ;
+	rdfs:comment "Oxigraph parser error test cases" ;
+	mf:entries (
+	    <#invalid_iri>
+	    <#invalid_iri_crlf>
+	    <#invalid_iri_comment>
+	    <#invalid_iri_comment_crlf>
+	    <#invalid_string_escape>
+	    <#unexpected_eof>
+	    <#unexpected_eof_crlf>
+	    <#invalid_predicate>
+	) .
+
+<#invalid_iri>
+	rdf:type rdft:TestTurtleNegativeSyntax ;
+	mf:name "bad IRI" ;
+	mf:action <invalid_iri.nt> ;
+	mf:result <invalid_iri_error.txt> .
+
+<#invalid_iri_crlf>
+	rdf:type rdft:TestTurtleNegativeSyntax ;
+	mf:name "bad IRI" ;
+	mf:action <invalid_iri_crlf.nt> ;
+	mf:result <invalid_iri_error.txt> .
+
+<#invalid_iri_comment>
+	rdf:type rdft:TestTurtleNegativeSyntax ;
+	mf:name "bad IRI" ;
+	mf:action <invalid_iri_comment.nt> ;
+	mf:result <invalid_iri_error.txt> .
+
+<#invalid_iri_comment_crlf>
+	rdf:type rdft:TestTurtleNegativeSyntax ;
+	mf:name "bad IRI" ;
+	mf:action <invalid_iri_comment_crlf.nt> ;
+	mf:result <invalid_iri_error.txt> .
+
+<#invalid_string_escape>
+	rdf:type rdft:TestTurtleNegativeSyntax ;
+	mf:name "bad string escape" ;
+	mf:action <invalid_string_escape.nt> ;
+	mf:result <invalid_string_escape_error.txt> .
+
+<#unexpected_eof>
+	rdf:type rdft:TestTurtleNegativeSyntax ;
+	mf:name "unexpected end of file" ;
+	mf:action <unexpected_eof.nt> ;
+	mf:result <unexpected_eof_error.txt> .
+
+<#unexpected_eof_crlf>
+	rdf:type rdft:TestTurtleNegativeSyntax ;
+	mf:name "unexpected end of file" ;
+	mf:action <unexpected_eof_crlf.nt> ;
+	mf:result <unexpected_eof_error.txt> .
+
+<#invalid_predicate>
+	rdf:type rdft:TestTurtleNegativeSyntax ;
+	mf:name "invalid predicate" ;
+	mf:action <invalid_predicate.nt> ;
+	mf:result <invalid_predicate_error.txt> .
--- a/testsuite/oxigraph-tests/parser-error/unexpected_eof.nt
+++ b/testsuite/oxigraph-tests/parser-error/unexpected_eof.nt
@ -0,0 +1,2 @@
+<http://example.com/s> <http://example.com/p> <http://example.com/o
+bé
--- a/testsuite/oxigraph-tests/parser-error/unexpected_eof_crlf.nt
+++ b/testsuite/oxigraph-tests/parser-error/unexpected_eof_crlf.nt
@ -0,0 +1,2 @@
+<http://example.com/s> <http://example.com/p> <http://example.com/o
+bé
--- a/testsuite/oxigraph-tests/parser-error/unexpected_eof_error.txt
+++ b/testsuite/oxigraph-tests/parser-error/unexpected_eof_error.txt
@ -0,0 +1 @@
+Parser error at line 2 column 3: Unexpected end of file
--- a/testsuite/src/parser_evaluator.rs
+++ b/testsuite/src/parser_evaluator.rs
@ -1,8 +1,8 @@
 use crate::evaluator::TestEvaluator;
-use crate::files::{guess_rdf_format, load_dataset, load_n3};
+use crate::files::{guess_rdf_format, load_dataset, load_n3, read_file_to_string};
 use crate::manifest::Test;
-use crate::report::dataset_diff;
-use anyhow::{anyhow, ensure, Result};
+use crate::report::{dataset_diff, format_diff};
+use anyhow::{anyhow, bail, ensure, Result};
 use oxigraph::io::RdfFormat;
 use oxigraph::model::{BlankNode, Dataset, Quad};
 use oxttl::n3::{N3Quad, N3Term};
@ -116,10 +116,17 @@ fn evaluate_negative_syntax_test(test: &Test, format: RdfFormat) -> Result<()> {
        .action
        .as_deref()
        .ok_or_else(|| anyhow!("No action found"))?;
-    ensure!(
-        load_dataset(action, format, false).is_err(),
-        "File parsed without errors even if it should not"
-    );
+    let Err(error) = load_dataset(action, format, false) else {
+        bail!("File parsed without errors even if it should not");
+    };
+    if let Some(result) = &test.result {
+        let expected = read_file_to_string(result)?;
+        ensure!(
+            expected == error.to_string(),
+            "Not expected error message:\n{}",
+            format_diff(&expected, &error.to_string(), "message")
+        );
+    }
    Ok(())
 }

--- a/testsuite/tests/oxigraph.rs
+++ b/testsuite/tests/oxigraph.rs
@ -20,6 +20,14 @@ fn oxigraph_parser_recovery_testsuite() -> Result<()> {
    )
 }

+#[test]
+fn oxigraph_parser_error_testsuite() -> Result<()> {
+    check_testsuite(
+        "https://github.com/oxigraph/oxigraph/tests/parser-error/manifest.ttl",
+        &[],
+    )
+}
+
 #[test]
 fn oxigraph_sparql_testsuite() -> Result<()> {
    check_testsuite(
				`@ -0,0 +1 @@`
				`Parser error between at line 2 between columns 24 and column 36: Invalid IRI code point ' '`
				`@ -0,0 +1 @@`
				`<http://example.com/s> <http://example.com/p> "fooé \a baré" .`
				`@ -0,0 +1 @@`
				`Parser error between at line 1 between columns 53 and column 55: Unexpected escape character '\a'`
				`@ -0,0 +1 @@`
				`Parser error at line 2 column 3: Unexpected end of file`