From 13c3515d7b1b537dd08ef0e4391afa3e347d14f9 Mon Sep 17 00:00:00 2001
From: Tpt <thomaspt@hotmail.fr>
Date: Sat, 2 Sep 2023 17:12:43 +0200
Subject: [PATCH] OxTTL: return file position in errors

---
 fuzz/fuzz_targets/nquads.rs                   |  10 +-
 fuzz/fuzz_targets/trig.rs                     |  10 +-
 lib/oxrdfio/src/error.rs                      |  34 +-
 lib/oxrdfxml/src/error.rs                     |  12 -
 lib/oxrdfxml/src/parser.rs                    |   6 +-
 lib/oxttl/src/lexer.rs                        |   6 +-
 lib/oxttl/src/lib.rs                          |   2 +-
 lib/oxttl/src/line_formats.rs                 |   8 +-
 lib/oxttl/src/n3.rs                           |   4 +-
 lib/oxttl/src/terse.rs                        |  20 +-
 lib/oxttl/src/toolkit/error.rs                | 132 +++++++
 lib/oxttl/src/toolkit/lexer.rs                | 344 ++++++++++++------
 lib/oxttl/src/toolkit/mod.rs                  |   8 +-
 lib/oxttl/src/toolkit/parser.rs               | 142 +-------
 .../parser-error/invalid_iri.nt               |   2 +
 .../parser-error/invalid_iri_comment.nt       |   2 +
 .../parser-error/invalid_iri_comment_crlf.nt  |   2 +
 .../parser-error/invalid_iri_crlf.nt          |   2 +
 .../parser-error/invalid_iri_error.txt        |   1 +
 .../parser-error/invalid_predicate.nt         |   2 +
 .../parser-error/invalid_predicate_error.txt  |   1 +
 .../parser-error/invalid_string_escape.nt     |   1 +
 .../invalid_string_escape_error.txt           |   1 +
 .../oxigraph-tests/parser-error/manifest.ttl  |  66 ++++
 .../parser-error/unexpected_eof.nt            |   2 +
 .../parser-error/unexpected_eof_crlf.nt       |   2 +
 .../parser-error/unexpected_eof_error.txt     |   1 +
 testsuite/src/parser_evaluator.rs             |  21 +-
 testsuite/tests/oxigraph.rs                   |   8 +
 29 files changed, 552 insertions(+), 300 deletions(-)
 create mode 100644 lib/oxttl/src/toolkit/error.rs
 create mode 100644 testsuite/oxigraph-tests/parser-error/invalid_iri.nt
 create mode 100644 testsuite/oxigraph-tests/parser-error/invalid_iri_comment.nt
 create mode 100644 testsuite/oxigraph-tests/parser-error/invalid_iri_comment_crlf.nt
 create mode 100644 testsuite/oxigraph-tests/parser-error/invalid_iri_crlf.nt
 create mode 100644 testsuite/oxigraph-tests/parser-error/invalid_iri_error.txt
 create mode 100644 testsuite/oxigraph-tests/parser-error/invalid_predicate.nt
 create mode 100644 testsuite/oxigraph-tests/parser-error/invalid_predicate_error.txt
 create mode 100644 testsuite/oxigraph-tests/parser-error/invalid_string_escape.nt
 create mode 100644 testsuite/oxigraph-tests/parser-error/invalid_string_escape_error.txt
 create mode 100644 testsuite/oxigraph-tests/parser-error/manifest.ttl
 create mode 100644 testsuite/oxigraph-tests/parser-error/unexpected_eof.nt
 create mode 100644 testsuite/oxigraph-tests/parser-error/unexpected_eof_crlf.nt
 create mode 100644 testsuite/oxigraph-tests/parser-error/unexpected_eof_error.txt
diff --git a/fuzz/fuzz_targets/nquads.rs b/fuzz/fuzz_targets/nquads.rs
index a7de4913..c964e229 100644
--- a/fuzz/fuzz_targets/nquads.rs
+++ b/fuzz/fuzz_targets/nquads.rs
@@ -2,9 +2,9 @@
 
 use libfuzzer_sys::fuzz_target;
 use oxrdf::Quad;
-use oxttl::{NQuadsParser, NQuadsSerializer, SyntaxError};
+use oxttl::{NQuadsParser, NQuadsSerializer};
 
-fn parse<'a>(chunks: impl IntoIterator<Item = &'a [u8]>) -> (Vec<Quad>, Vec<SyntaxError>) {
+fn parse<'a>(chunks: impl IntoIterator<Item = &'a [u8]>) -> (Vec<Quad>, Vec<String>) {
     let mut quads = Vec::new();
     let mut errors = Vec::new();
     let mut parser = NQuadsParser::new().with_quoted_triples().parse();
@@ -13,7 +13,7 @@ fn parse<'a>(chunks: impl IntoIterator<Item = &'a [u8]>) -> (Vec<Quad>, Vec<Synt
         while let Some(result) = parser.read_next() {
             match result {
                 Ok(quad) => quads.push(quad),
-                Err(error) => errors.push(error),
+                Err(error) => errors.push(error.to_string()),
             }
         }
     }
@@ -21,7 +21,7 @@ fn parse<'a>(chunks: impl IntoIterator<Item = &'a [u8]>) -> (Vec<Quad>, Vec<Synt
     while let Some(result) = parser.read_next() {
         match result {
             Ok(quad) => quads.push(quad),
-            Err(error) => errors.push(error),
+            Err(error) => errors.push(error.to_string()),
         }
     }
     assert!(parser.is_end());
@@ -39,7 +39,7 @@ fuzz_target!(|data: &[u8]| {
         .collect::<Vec<_>>()
         .as_slice()]);
     assert_eq!(quads, quads_without_split);
-    assert_eq!(errors.len(), errors_without_split.len());
+    assert_eq!(errors, errors_without_split);
 
     // We serialize
     let mut writer = NQuadsSerializer::new().serialize_to_write(Vec::new());
diff --git a/fuzz/fuzz_targets/trig.rs b/fuzz/fuzz_targets/trig.rs
index 6a930a97..c0713e69 100644
--- a/fuzz/fuzz_targets/trig.rs
+++ b/fuzz/fuzz_targets/trig.rs
@@ -2,9 +2,9 @@
 
 use libfuzzer_sys::fuzz_target;
 use oxrdf::{Dataset, GraphName, Quad, Subject, Term, Triple};
-use oxttl::{SyntaxError, TriGParser, TriGSerializer};
+use oxttl::{TriGParser, TriGSerializer};
 
-fn parse<'a>(chunks: impl IntoIterator<Item = &'a [u8]>) -> (Vec<Quad>, Vec<SyntaxError>) {
+fn parse<'a>(chunks: impl IntoIterator<Item = &'a [u8]>) -> (Vec<Quad>, Vec<String>) {
     let mut quads = Vec::new();
     let mut errors = Vec::new();
     let mut parser = TriGParser::new()
@@ -17,7 +17,7 @@ fn parse<'a>(chunks: impl IntoIterator<Item = &'a [u8]>) -> (Vec<Quad>, Vec<Synt
         while let Some(result) = parser.read_next() {
             match result {
                 Ok(quad) => quads.push(quad),
-                Err(error) => errors.push(error),
+                Err(error) => errors.push(error.to_string()),
             }
         }
     }
@@ -25,7 +25,7 @@ fn parse<'a>(chunks: impl IntoIterator<Item = &'a [u8]>) -> (Vec<Quad>, Vec<Synt
     while let Some(result) = parser.read_next() {
         match result {
             Ok(quad) => quads.push(quad),
-            Err(error) => errors.push(error),
+            Err(error) => errors.push(error.to_string()),
         }
     }
     assert!(parser.is_end());
@@ -96,7 +96,7 @@ fuzz_target!(|data: &[u8]| {
             String::from_utf8_lossy(&serialize_quads(&quads_without_split))
         );
     }
-    assert_eq!(errors.len(), errors_without_split.len());
+    assert_eq!(errors, errors_without_split);
 
     // We serialize
     let new_serialization = serialize_quads(&quads);
diff --git a/lib/oxrdfio/src/error.rs b/lib/oxrdfio/src/error.rs
index ac8173a7..235ba1b7 100644
--- a/lib/oxrdfio/src/error.rs
+++ b/lib/oxrdfio/src/error.rs
@@ -1,4 +1,5 @@
 use std::error::Error;
+use std::ops::Range;
 use std::{fmt, io};
 
 /// Error returned during RDF format parsing.
@@ -110,10 +111,33 @@ pub struct SyntaxError {
 enum SyntaxErrorKind {
     Turtle(oxttl::SyntaxError),
     RdfXml(oxrdfxml::SyntaxError),
-
     Msg { msg: &'static str },
 }
 
+impl SyntaxError {
+    /// The location of the error inside of the file.
+    #[inline]
+    pub fn location(&self) -> Option<Range<TextPosition>> {
+        match &self.inner {
+            SyntaxErrorKind::Turtle(e) => {
+                let location = e.location();
+                Some(
+                    TextPosition {
+                        line: location.start.line,
+                        column: location.start.column,
+                        offset: location.start.offset,
+                    }..TextPosition {
+                        line: location.end.line,
+                        column: location.end.column,
+                        offset: location.end.offset,
+                    },
+                )
+            }
+            SyntaxErrorKind::RdfXml(_) | SyntaxErrorKind::Msg { .. } => None,
+        }
+    }
+}
+
 impl fmt::Display for SyntaxError {
     #[inline]
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
@@ -146,3 +170,11 @@ impl From<SyntaxError> for io::Error {
         }
     }
 }
+
+/// A position in a text i.e. a `line` number starting from 0, a `column` number starting from 0 (in number of code points) and a global file `offset` starting from 0 (in number of bytes).
+#[derive(Eq, PartialEq, Debug, Clone, Copy)]
+pub struct TextPosition {
+    pub line: u64,
+    pub column: u64,
+    pub offset: u64,
+}
diff --git a/lib/oxrdfxml/src/error.rs b/lib/oxrdfxml/src/error.rs
index fd561be6..cb9eb9c4 100644
--- a/lib/oxrdfxml/src/error.rs
+++ b/lib/oxrdfxml/src/error.rs
@@ -72,15 +72,6 @@ impl From<quick_xml::Error> for ParseError {
     }
 }
 
-impl From<quick_xml::events::attributes::AttrError> for ParseError {
-    #[inline]
-    fn from(error: quick_xml::events::attributes::AttrError) -> Self {
-        Self::Syntax(SyntaxError {
-            inner: SyntaxErrorKind::XmlAttribute(error),
-        })
-    }
-}
-
 /// An error in the syntax of the parsed file.
 #[derive(Debug)]
 pub struct SyntaxError {
@@ -90,7 +81,6 @@ pub struct SyntaxError {
 #[derive(Debug)]
 pub enum SyntaxErrorKind {
     Xml(quick_xml::Error),
-    XmlAttribute(quick_xml::events::attributes::AttrError),
     InvalidIri {
         iri: String,
         error: IriParseError,
@@ -119,7 +109,6 @@ impl fmt::Display for SyntaxError {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         match &self.inner {
             SyntaxErrorKind::Xml(error) => error.fmt(f),
-            SyntaxErrorKind::XmlAttribute(error) => error.fmt(f),
             SyntaxErrorKind::InvalidIri { iri, error } => {
                 write!(f, "error while parsing IRI '{iri}': {error}")
             }
@@ -136,7 +125,6 @@ impl Error for SyntaxError {
     fn source(&self) -> Option<&(dyn Error + 'static)> {
         match &self.inner {
             SyntaxErrorKind::Xml(error) => Some(error),
-            SyntaxErrorKind::XmlAttribute(error) => Some(error),
             SyntaxErrorKind::InvalidIri { error, .. } => Some(error),
             SyntaxErrorKind::InvalidLanguageTag { error, .. } => Some(error),
             SyntaxErrorKind::Msg { .. } => None,
diff --git a/lib/oxrdfxml/src/parser.rs b/lib/oxrdfxml/src/parser.rs
index 22983350..dcd216a4 100644
--- a/lib/oxrdfxml/src/parser.rs
+++ b/lib/oxrdfxml/src/parser.rs
@@ -8,7 +8,7 @@ use quick_xml::escape::unescape_with;
 use quick_xml::events::attributes::Attribute;
 use quick_xml::events::*;
 use quick_xml::name::{LocalName, QName, ResolveResult};
-use quick_xml::{NsReader, Writer};
+use quick_xml::{Error, NsReader, Writer};
 use std::collections::{HashMap, HashSet};
 use std::io::{BufReader, Read};
 use std::str;
@@ -515,7 +515,7 @@ impl<R> RdfXmlReader<R> {
                     .to_string(),
             );
             for attr in event.attributes() {
-                clean_event.push_attribute(attr?);
+                clean_event.push_attribute(attr.map_err(Error::InvalidAttr)?);
             }
             writer.write_event(Event::Start(clean_event))?;
             self.in_literal_depth += 1;
@@ -544,7 +544,7 @@ impl<R> RdfXmlReader<R> {
         let mut type_attr = None;
 
         for attribute in event.attributes() {
-            let attribute = attribute?;
+            let attribute = attribute.map_err(Error::InvalidAttr)?;
             if attribute.key.as_ref().starts_with(b"xml") {
                 if attribute.key.as_ref() == b"xml:lang" {
                     let tag = self.convert_attribute(&attribute)?;
diff --git a/lib/oxttl/src/lexer.rs b/lib/oxttl/src/lexer.rs
index f12f3b25..65dba56e 100644
--- a/lib/oxttl/src/lexer.rs
+++ b/lib/oxttl/src/lexer.rs
@@ -266,7 +266,7 @@ impl N3Lexer {
                             ));
                         }
                     }
-                    Err(e) => return Some((e.position.end, Err(e))),
+                    Err(e) => return Some((e.location.end, Err(e))),
                 }
             } else if is_ending {
                 while data[..i].ends_with(b".") {
@@ -447,7 +447,7 @@ impl N3Lexer {
                             return Some((i, Ok((buffer, might_be_invalid_iri))));
                         }
                     }
-                    Err(e) => return Some((e.position.end, Err(e))),
+                    Err(e) => return Some((e.location.end, Err(e))),
                 }
             } else if is_ending {
                 let buffer = if let Some(mut buffer) = buffer {
@@ -515,7 +515,7 @@ impl N3Lexer {
                     }
                     i += consumed;
                 }
-                Err(e) => return Some((e.position.end, Err(e))),
+                Err(e) => return Some((e.location.end, Err(e))),
             }
         }
     }
diff --git a/lib/oxttl/src/lib.rs b/lib/oxttl/src/lib.rs
index ac96515e..0e04e243 100644
--- a/lib/oxttl/src/lib.rs
+++ b/lib/oxttl/src/lib.rs
@@ -17,7 +17,7 @@ pub mod turtle;
 pub use crate::n3::N3Parser;
 pub use crate::nquads::{NQuadsParser, NQuadsSerializer};
 pub use crate::ntriples::{NTriplesParser, NTriplesSerializer};
-pub use crate::toolkit::{ParseError, SyntaxError};
+pub use crate::toolkit::{ParseError, SyntaxError, TextPosition};
 pub use crate::trig::{TriGParser, TriGSerializer};
 pub use crate::turtle::{TurtleParser, TurtleSerializer};
 
diff --git a/lib/oxttl/src/line_formats.rs b/lib/oxttl/src/line_formats.rs
index f95e56f3..1b4c31e6 100644
--- a/lib/oxttl/src/line_formats.rs
+++ b/lib/oxttl/src/line_formats.rs
@@ -76,7 +76,7 @@ impl RuleRecognizer for NQuadsRecognizer {
                     }
                     _ => self.error(
                         errors,
-                        format!("The subject of a triple should be an IRI or a blank node, {token:?} found"),
+                        "The subject of a triple should be an IRI or a blank node, TOKEN found",
                     ),
                 },
                 NQuadsState::ExpectPredicate => match token {
@@ -88,7 +88,7 @@ impl RuleRecognizer for NQuadsRecognizer {
                     }
                     _ => self.error(
                         errors,
-                        format!("The predicate of a triple should be an IRI, {token:?} found"),
+                        "The predicate of a triple should be an IRI, TOKEN found",
                     ),
                 },
                 NQuadsState::ExpectedObject => match token {
@@ -118,7 +118,7 @@ impl RuleRecognizer for NQuadsRecognizer {
                     }
                     _ => self.error(
                         errors,
-                        format!("The object of a triple should be an IRI, a blank node or a literal, {token:?} found"),
+                        "The object of a triple should be an IRI, a blank node or a literal, TOKEN found",
                     ),
                 },
                 NQuadsState::ExpectLiteralAnnotationOrGraphNameOrDot { value } => match token {
@@ -159,7 +159,7 @@ impl RuleRecognizer for NQuadsRecognizer {
                             .push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple);
                         self
                     }
-                    _ => self.error(errors, format!("A literal datatype must be an IRI, found {token:?}")),
+                    _ => self.error(errors, "A literal datatype must be an IRI, found TOKEN"),
                 },
                 NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple => {
                     if self.stack.is_empty() {
diff --git a/lib/oxttl/src/n3.rs b/lib/oxttl/src/n3.rs
index 46274ecf..84e36235 100644
--- a/lib/oxttl/src/n3.rs
+++ b/lib/oxttl/src/n3.rs
@@ -836,7 +836,7 @@ impl RuleRecognizer for N3Recognizer {
                             self.stack.push(N3State::FormulaContent);
                             self
                         }
-                       _ => self.error(errors, format!("This is not a valid RDF value: {token:?}"))
+                       _ => self.error(errors, "TOKEN is not a valid RDF value")
                     }
                 }
                 N3State::PropertyListMiddle => match token {
@@ -950,7 +950,7 @@ impl RuleRecognizer for N3Recognizer {
                             Err(e) => self.error(errors, e)
                         }
                        _ => {
-                            self.error(errors, format!("Expecting a datatype IRI after '^^, found {token:?}")).recognize_next(token, results, errors)
+                            self.error(errors, "Expecting a datatype IRI after '^^, found TOKEN").recognize_next(token, results, errors)
                         }
                     }
                 }
diff --git a/lib/oxttl/src/terse.rs b/lib/oxttl/src/terse.rs
index ecd24d4a..f99aaf4d 100644
--- a/lib/oxttl/src/terse.rs
+++ b/lib/oxttl/src/terse.rs
@@ -167,7 +167,7 @@ impl RuleRecognizer for TriGRecognizer {
                         self
                     }
                     _ => {
-                        self.error(errors, format!("The token {token:?} is not a valid subject or graph name"))
+                        self.error(errors, "TOKEN is not a valid subject or graph name")
                     }
                 }
                 TriGState::WrappedGraphOrPredicateObjectList { term } => {
@@ -317,7 +317,7 @@ impl RuleRecognizer for TriGRecognizer {
                         self
                     }
                    _ => {
-                        self.error(errors, format!("The token {token:?} is not a valid RDF subject"))
+                        self.error(errors, "TOKEN is not a valid RDF subject")
                     }
                 },
                 TriGState::TriplesBlankNodePropertyListCurrent => if token == N3Token::Punctuation("]") {
@@ -350,7 +350,7 @@ impl RuleRecognizer for TriGRecognizer {
                         self
                     }
                    _ => {
-                        self.error(errors, format!("The token {token:?} is not a valid graph name"))
+                        self.error(errors, "TOKEN is not a valid graph name")
                     }
                 }
                 TriGState::GraphNameAnonEnd => if token == N3Token::Punctuation("]") {
@@ -456,7 +456,7 @@ impl RuleRecognizer for TriGRecognizer {
                         Err(e) => self.error(errors, e)
                     }
                    _ => {
-                        self.error(errors, format!("The token {token:?} is not a valid predicate"))
+                        self.error(errors, "TOKEN is not a valid predicate")
                     }
                 }
                 // [12] 	object 	::= 	iri | BlankNode | collection | blankNodePropertyList | literal | quotedTriple
@@ -536,7 +536,7 @@ impl RuleRecognizer for TriGRecognizer {
                         self
                     }
                    _ => {
-                        self.error(errors, format!("This is not a valid RDF object: {token:?}"))
+                        self.error(errors, "TOKEN is not a valid RDF object")
                     }
 
                 }
@@ -637,7 +637,7 @@ impl RuleRecognizer for TriGRecognizer {
                             Err(e) => self.error(errors, e)
                         }
                         _ => {
-                            self.error(errors, format!("Expecting a datatype IRI after '^^, found {token:?}")).recognize_next(token, results, errors)
+                            self.error(errors, "Expecting a datatype IRI after ^^, found TOKEN").recognize_next(token, results, errors)
                         }
                     }
                 }
@@ -653,7 +653,7 @@ impl RuleRecognizer for TriGRecognizer {
                     if token == N3Token::Punctuation(">>") {
                         self
                     } else {
-                        self.error(errors, format!("Expecting '>>' to close a quoted triple, found {token:?}"))
+                        self.error(errors, "Expecting '>>' to close a quoted triple, found TOKEN")
                     }
                 }
                 #[cfg(feature = "rdf-star")]
@@ -670,7 +670,7 @@ impl RuleRecognizer for TriGRecognizer {
                     if token == N3Token::Punctuation(">>") {
                         self
                     } else {
-                        self.error(errors, format!("Expecting '>>' to close a quoted triple, found {token:?}"))
+                        self.error(errors, "Expecting '>>' to close a quoted triple, found TOKEN")
                     }
                 }
                 // [28t] 	qtSubject 	::= 	iri | BlankNode | quotedTriple
@@ -703,7 +703,7 @@ impl RuleRecognizer for TriGRecognizer {
                         self.stack.push(TriGState::QuotedSubject);
                         self
                     }
-                    _ => self.error(errors, format!("This is not a valid RDF quoted triple subject: {token:?}"))
+                    _ => self.error(errors, "TOKEN is not a valid RDF quoted triple subject: TOKEN")
                 }
                 // [29t] 	qtObject 	::= 	iri | BlankNode | literal | quotedTriple
                 #[cfg(feature = "rdf-star")]
@@ -759,7 +759,7 @@ impl RuleRecognizer for TriGRecognizer {
                         self.stack.push(TriGState::QuotedSubject);
                         self
                     }
-                    _ => self.error(errors, format!("This is not a valid RDF quoted triple object: {token:?}"))
+                    _ => self.error(errors, "TOKEN is not a valid RDF quoted triple object")
                 }
                 #[cfg(feature = "rdf-star")]
                 TriGState::QuotedAnonEnd => if token == N3Token::Punctuation("]") {
diff --git a/lib/oxttl/src/toolkit/error.rs b/lib/oxttl/src/toolkit/error.rs
new file mode 100644
index 00000000..df50b950
--- /dev/null
+++ b/lib/oxttl/src/toolkit/error.rs
@@ -0,0 +1,132 @@
+use std::error::Error;
+use std::ops::Range;
+use std::{fmt, io};
+
+/// A position in a text i.e. a `line` number starting from 0, a `column` number starting from 0 (in number of code points) and a global file `offset` starting from 0 (in number of bytes).
+#[derive(Eq, PartialEq, Debug, Clone, Copy)]
+pub struct TextPosition {
+    pub line: u64,
+    pub column: u64,
+    pub offset: u64,
+}
+
+/// An error in the syntax of the parsed file.
+///
+/// It is composed of a message and a byte range in the input.
+#[derive(Debug)]
+pub struct SyntaxError {
+    pub(super) location: Range<TextPosition>,
+    pub(super) message: String,
+}
+
+impl SyntaxError {
+    /// The location of the error inside of the file.
+    #[inline]
+    pub fn location(&self) -> Range<TextPosition> {
+        self.location.clone()
+    }
+
+    /// The error message.
+    #[inline]
+    pub fn message(&self) -> &str {
+        &self.message
+    }
+}
+
+impl fmt::Display for SyntaxError {
+    #[inline]
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        if self.location.start.offset + 1 >= self.location.end.offset {
+            write!(
+                f,
+                "Parser error at line {} column {}: {}",
+                self.location.start.line + 1,
+                self.location.start.column + 1,
+                self.message
+            )
+        } else if self.location.start.line == self.location.end.line {
+            write!(
+                f,
+                "Parser error between at line {} between columns {} and column {}: {}",
+                self.location.start.line + 1,
+                self.location.start.column + 1,
+                self.location.end.column + 1,
+                self.message
+            )
+        } else {
+            write!(
+                f,
+                "Parser error between line {} column {} and line {} column {}: {}",
+                self.location.start.line + 1,
+                self.location.start.column + 1,
+                self.location.end.line + 1,
+                self.location.end.column + 1,
+                self.message
+            )
+        }
+    }
+}
+
+impl Error for SyntaxError {}
+
+impl From<SyntaxError> for io::Error {
+    #[inline]
+    fn from(error: SyntaxError) -> Self {
+        io::Error::new(io::ErrorKind::InvalidData, error)
+    }
+}
+
+/// A parsing error.
+///
+/// It is the union of [`SyntaxError`] and [`std::io::Error`].
+#[derive(Debug)]
+pub enum ParseError {
+    /// I/O error during parsing (file not found...).
+    Io(io::Error),
+    /// An error in the file syntax.
+    Syntax(SyntaxError),
+}
+
+impl fmt::Display for ParseError {
+    #[inline]
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Self::Io(e) => e.fmt(f),
+            Self::Syntax(e) => e.fmt(f),
+        }
+    }
+}
+
+impl Error for ParseError {
+    #[inline]
+    fn source(&self) -> Option<&(dyn Error + 'static)> {
+        Some(match self {
+            Self::Io(e) => e,
+            Self::Syntax(e) => e,
+        })
+    }
+}
+
+impl From<SyntaxError> for ParseError {
+    #[inline]
+    fn from(error: SyntaxError) -> Self {
+        Self::Syntax(error)
+    }
+}
+
+impl From<io::Error> for ParseError {
+    #[inline]
+    fn from(error: io::Error) -> Self {
+        Self::Io(error)
+    }
+}
+
+impl From<ParseError> for io::Error {
+    #[inline]
+    fn from(error: ParseError) -> Self {
+        match error {
+            ParseError::Syntax(e) => e.into(),
+            ParseError::Io(e) => e,
+        }
+    }
+}
diff --git a/lib/oxttl/src/toolkit/lexer.rs b/lib/oxttl/src/toolkit/lexer.rs
index 34c1c01e..0f7373c2 100644
--- a/lib/oxttl/src/toolkit/lexer.rs
+++ b/lib/oxttl/src/toolkit/lexer.rs
@@ -1,9 +1,10 @@
-use memchr::memchr2;
+use crate::toolkit::error::{SyntaxError, TextPosition};
+use memchr::{memchr2, memchr2_iter};
+use std::borrow::Cow;
 use std::cmp::min;
-use std::error::Error;
-use std::fmt;
 use std::io::{self, Read};
 use std::ops::{Range, RangeInclusive};
+use std::str;
 #[cfg(feature = "async-tokio")]
 use tokio::io::{AsyncRead, AsyncReadExt};
 
@@ -22,14 +23,14 @@ pub trait TokenRecognizer {
 }
 
 pub struct TokenRecognizerError {
-    pub position: Range<usize>,
+    pub location: Range<usize>,
     pub message: String,
 }
 
 impl<S: Into<String>> From<(Range<usize>, S)> for TokenRecognizerError {
-    fn from((position, message): (Range<usize>, S)) -> Self {
+    fn from((location, message): (Range<usize>, S)) -> Self {
         Self {
-            position,
+            location,
             message: message.into(),
         }
     }
@@ -37,34 +38,37 @@ impl<S: Into<String>> From<(Range<usize>, S)> for TokenRecognizerError {
 
 #[allow(clippy::range_plus_one)]
 impl<S: Into<String>> From<(RangeInclusive<usize>, S)> for TokenRecognizerError {
-    fn from((position, message): (RangeInclusive<usize>, S)) -> Self {
-        (*position.start()..*position.end() + 1, message).into()
+    fn from((location, message): (RangeInclusive<usize>, S)) -> Self {
+        (*location.start()..*location.end() + 1, message).into()
     }
 }
 
 impl<S: Into<String>> From<(usize, S)> for TokenRecognizerError {
-    fn from((position, message): (usize, S)) -> Self {
-        (position..=position, message).into()
+    fn from((location, message): (usize, S)) -> Self {
+        (location..=location, message).into()
     }
 }
 
-pub struct TokenWithPosition<T> {
-    pub token: T,
-    pub position: Range<usize>,
-}
-
 pub struct Lexer<R: TokenRecognizer> {
     parser: R,
     data: Vec<u8>,
-    start: usize,
+    position: Position,
+    previous_position: Position, // Lexer position before the last emitted token
     is_ending: bool,
-    position: usize,
     min_buffer_size: usize,
     max_buffer_size: usize,
     is_line_jump_whitespace: bool,
     line_comment_start: Option<&'static [u8]>,
 }
 
+#[derive(Clone, Copy)]
+struct Position {
+    line_start_buffer_offset: usize,
+    buffer_offset: usize,
+    global_offset: u64,
+    global_line: u64,
+}
+
 impl<R: TokenRecognizer> Lexer<R> {
     pub fn new(
         parser: R,
@@ -76,9 +80,19 @@ impl<R: TokenRecognizer> Lexer<R> {
         Self {
             parser,
             data: Vec::new(),
-            start: 0,
+            position: Position {
+                line_start_buffer_offset: 0,
+                buffer_offset: 0,
+                global_offset: 0,
+                global_line: 0,
+            },
+            previous_position: Position {
+                line_start_buffer_offset: 0,
+                buffer_offset: 0,
+                global_offset: 0,
+                global_line: 0,
+            },
             is_ending: false,
-            position: 0,
             min_buffer_size,
             max_buffer_size,
             is_line_jump_whitespace,
@@ -148,24 +162,43 @@ impl<R: TokenRecognizer> Lexer<R> {
         Ok(())
     }
 
-    pub fn read_next(
-        &mut self,
-        options: &R::Options,
-    ) -> Option<Result<TokenWithPosition<R::Token<'_>>, LexerError>> {
+    #[allow(clippy::unwrap_in_result)]
+    pub fn read_next(&mut self, options: &R::Options) -> Option<Result<R::Token<'_>, SyntaxError>> {
         self.skip_whitespaces_and_comments()?;
-        let Some((consumed, result)) =
-            self.parser
-                .recognize_next_token(&self.data[self.start..], self.is_ending, options)
-        else {
+        self.previous_position = self.position;
+        let Some((consumed, result)) = self.parser.recognize_next_token(
+            &self.data[self.position.buffer_offset..],
+            self.is_ending,
+            options,
+        ) else {
             return if self.is_ending {
-                if self.start == self.data.len() {
+                if self.position.buffer_offset == self.data.len() {
                     None // We have finished
                 } else {
-                    let error = LexerError {
-                        position: self.position..self.position + (self.data.len() - self.start),
+                    let (new_line_jumps, new_line_start) =
+                        Self::find_number_of_line_jumps_and_start_of_last_line(
+                            &self.data[self.position.buffer_offset..],
+                        );
+                    if new_line_jumps > 0 {
+                        self.position.line_start_buffer_offset =
+                            self.position.buffer_offset + new_line_start;
+                    }
+                    self.position.global_offset +=
+                        u64::try_from(self.data.len() - self.position.buffer_offset).unwrap();
+                    self.position.buffer_offset = self.data.len();
+                    self.position.global_line += new_line_jumps;
+                    let new_position = TextPosition {
+                        line: self.position.global_line,
+                        column: Self::column_from_bytes(
+                            &self.data[self.position.line_start_buffer_offset..],
+                        ),
+                        offset: self.position.global_offset,
+                    };
+                    let error = SyntaxError {
+                        location: new_position..new_position,
                         message: "Unexpected end of file".into(),
                     };
-                    self.start = self.data.len(); // We consume everything
+                    self.position.buffer_offset = self.data.len(); // We consume everything
                     Some(Err(error))
                 }
             } else {
@@ -177,44 +210,119 @@ impl<R: TokenRecognizer> Lexer<R> {
             "The lexer must consume at least one byte each time"
         );
         debug_assert!(
-            self.start + consumed <= self.data.len(),
+            self.position.buffer_offset + consumed <= self.data.len(),
             "The lexer tried to consumed {consumed} bytes but only {} bytes are readable",
-            self.data.len() - self.start
+            self.data.len() - self.position.buffer_offset
         );
-        let old_position = self.position;
-        self.start += consumed;
-        self.position += consumed;
-        Some(match result {
-            Ok(token) => Ok(TokenWithPosition {
-                token,
-                position: old_position..self.position,
-            }),
-            Err(e) => Err(LexerError {
-                position: e.position.start + self.position..e.position.end + self.position,
-                message: e.message,
-            }),
-        })
+        let (new_line_jumps, new_line_start) =
+            Self::find_number_of_line_jumps_and_start_of_last_line(
+                &self.data[self.position.buffer_offset..self.position.buffer_offset + consumed],
+            );
+        if new_line_jumps > 0 {
+            self.position.line_start_buffer_offset = self.position.buffer_offset + new_line_start;
+        }
+        self.position.buffer_offset += consumed;
+        self.position.global_offset += u64::try_from(consumed).unwrap();
+        self.position.global_line += new_line_jumps;
+        Some(result.map_err(|e| SyntaxError {
+            location: self.location_from_buffer_offset_range(e.location),
+            message: e.message,
+        }))
+    }
+
+    pub fn location_from_buffer_offset_range(
+        &self,
+        offset_range: Range<usize>,
+    ) -> Range<TextPosition> {
+        let start_offset = self.previous_position.buffer_offset + offset_range.start;
+        let (start_extra_line_jumps, start_line_start) =
+            Self::find_number_of_line_jumps_and_start_of_last_line(
+                &self.data[self.previous_position.buffer_offset..start_offset],
+            );
+        let start_line_start = if start_extra_line_jumps > 0 {
+            start_line_start + self.previous_position.buffer_offset
+        } else {
+            self.previous_position.line_start_buffer_offset
+        };
+        let end_offset = self.previous_position.buffer_offset + offset_range.end;
+        let (end_extra_line_jumps, end_line_start) =
+            Self::find_number_of_line_jumps_and_start_of_last_line(
+                &self.data[self.previous_position.buffer_offset..end_offset],
+            );
+        let end_line_start = if end_extra_line_jumps > 0 {
+            end_line_start + self.previous_position.buffer_offset
+        } else {
+            self.previous_position.line_start_buffer_offset
+        };
+        TextPosition {
+            line: self.previous_position.global_line + start_extra_line_jumps,
+            column: Self::column_from_bytes(&self.data[start_line_start..start_offset]),
+            offset: self.previous_position.global_offset
+                + u64::try_from(offset_range.start).unwrap(),
+        }..TextPosition {
+            line: self.previous_position.global_line + end_extra_line_jumps,
+            column: Self::column_from_bytes(&self.data[end_line_start..end_offset]),
+            offset: self.previous_position.global_offset + u64::try_from(offset_range.end).unwrap(),
+        }
+    }
+
+    pub fn last_token_location(&self) -> Range<TextPosition> {
+        TextPosition {
+            line: self.previous_position.global_line,
+            column: Self::column_from_bytes(
+                &self.data[self.previous_position.line_start_buffer_offset
+                    ..self.previous_position.buffer_offset],
+            ),
+            offset: self.previous_position.global_offset,
+        }..TextPosition {
+            line: self.position.global_line,
+            column: Self::column_from_bytes(
+                &self.data[self.position.line_start_buffer_offset..self.position.buffer_offset],
+            ),
+            offset: self.position.global_offset,
+        }
+    }
+
+    pub fn last_token_source(&self) -> Cow<'_, str> {
+        String::from_utf8_lossy(
+            &self.data[self.previous_position.buffer_offset..self.position.buffer_offset],
+        )
     }
 
     pub fn is_end(&self) -> bool {
-        self.is_ending && self.data.len() == self.start
+        self.is_ending && self.data.len() == self.position.buffer_offset
     }
 
+    #[allow(clippy::unwrap_in_result)]
     fn skip_whitespaces_and_comments(&mut self) -> Option<()> {
         loop {
-            self.skip_whitespaces();
+            self.skip_whitespaces()?;
 
-            let buf = &self.data[self.start..];
+            let buf = &self.data[self.position.buffer_offset..];
             if let Some(line_comment_start) = self.line_comment_start {
                 if buf.starts_with(line_comment_start) {
                     // Comment
                     if let Some(end) = memchr2(b'\r', b'\n', &buf[line_comment_start.len()..]) {
-                        self.start += end + line_comment_start.len();
-                        self.position += end + line_comment_start.len();
+                        let mut end_position = line_comment_start.len() + end;
+                        if buf.get(end_position).copied() == Some(b'\r') {
+                            // We look for \n for Windows line end style
+                            if let Some(c) = buf.get(end_position + 1) {
+                                if *c == b'\n' {
+                                    end_position += 1;
+                                }
+                            } else if !self.is_ending {
+                                return None; // We need to read more
+                            }
+                        }
+                        let comment_size = end_position + 1;
+                        self.position.buffer_offset += comment_size;
+                        self.position.line_start_buffer_offset = self.position.buffer_offset;
+                        self.position.global_offset += u64::try_from(comment_size).unwrap();
+                        self.position.global_line += 1;
                         continue;
                     }
                     if self.is_ending {
-                        self.start = self.data.len(); // EOF
+                        self.position.buffer_offset = self.data.len(); // EOF
                         return Some(());
                     }
                     return None; // We need more data
@@ -224,80 +332,98 @@ impl<R: TokenRecognizer> Lexer<R> {
         }
     }
 
-    fn skip_whitespaces(&mut self) {
+    fn skip_whitespaces(&mut self) -> Option<()> {
         if self.is_line_jump_whitespace {
-            for (i, c) in self.data[self.start..].iter().enumerate() {
-                if !matches!(c, b' ' | b'\t' | b'\r' | b'\n') {
-                    self.start += i;
-                    self.position += i;
-                    return;
+            let mut i = self.position.buffer_offset;
+            while let Some(c) = self.data.get(i) {
+                match c {
+                    b' ' | b'\t' => {
+                        self.position.buffer_offset += 1;
+                        self.position.global_offset += 1;
+                    }
+                    b'\r' => {
+                        // We look for \n for Windows line end style
+                        let mut increment: u8 = 1;
+                        if let Some(c) = self.data.get(i + 1) {
+                            if *c == b'\n' {
+                                increment += 1;
+                                i += 1;
+                            }
+                        } else if !self.is_ending {
+                            return None; // We need to read more
+                        }
+                        self.position.buffer_offset += usize::from(increment);
+                        self.position.line_start_buffer_offset = self.position.buffer_offset;
+                        self.position.global_offset += u64::from(increment);
+                        self.position.global_line += 1;
+                    }
+                    b'\n' => {
+                        self.position.buffer_offset += 1;
+                        self.position.line_start_buffer_offset = self.position.buffer_offset;
+                        self.position.global_offset += 1;
+                        self.position.global_line += 1;
+                    }
+                    _ => return Some(()),
                 }
+                i += 1;
                 //TODO: SIMD
             }
         } else {
-            for (i, c) in self.data[self.start..].iter().enumerate() {
-                if !matches!(c, b' ' | b'\t') {
-                    self.start += i;
-                    self.position += i;
-                    return;
+            for c in &self.data[self.position.buffer_offset..] {
+                if matches!(c, b' ' | b'\t') {
+                    self.position.buffer_offset += 1;
+                    self.position.global_offset += 1;
+                } else {
+                    return Some(());
                 }
                 //TODO: SIMD
             }
         }
-        // We only have whitespaces
-        self.position += self.data.len() - self.start;
-        self.start = self.data.len();
+        Some(())
     }
 
     fn shrink_data(&mut self) {
-        if self.start > 0 {
-            self.data.copy_within(self.start.., 0);
-            self.data.truncate(self.data.len() - self.start);
-            self.start = 0;
+        if self.position.line_start_buffer_offset > 0 {
+            self.data
+                .copy_within(self.position.line_start_buffer_offset.., 0);
+            self.data
+                .truncate(self.data.len() - self.position.line_start_buffer_offset);
+            self.position.buffer_offset -= self.position.line_start_buffer_offset;
+            self.position.line_start_buffer_offset = 0;
+            self.previous_position = self.position;
         }
     }
-}
 
-#[derive(Debug)]
-pub struct LexerError {
-    position: Range<usize>,
-    message: String,
-}
-
-impl LexerError {
-    pub fn position(&self) -> Range<usize> {
-        self.position.clone()
-    }
-
-    pub fn message(&self) -> &str {
-        &self.message
-    }
-
-    pub fn into_message(self) -> String {
-        self.message
-    }
-}
-
-impl fmt::Display for LexerError {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        if self.position.start + 1 == self.position.end {
-            write!(
-                f,
-                "Lexer error at byte {}: {}",
-                self.position.start, self.message
-            )
-        } else {
-            write!(
-                f,
-                "Lexer error between bytes {} and {}: {}",
-                self.position.start, self.position.end, self.message
-            )
+    fn find_number_of_line_jumps_and_start_of_last_line(bytes: &[u8]) -> (u64, usize) {
+        let mut num_of_jumps = 0;
+        let mut last_jump_pos = 0;
+        let mut previous_cr = 0;
+        for pos in memchr2_iter(b'\r', b'\n', bytes) {
+            if bytes[pos] == b'\r' {
+                previous_cr = pos;
+                num_of_jumps += 1;
+                last_jump_pos = pos + 1;
+            } else {
+                if previous_cr < pos - 1 {
+                    // We count \r\n as a single line jump
+                    num_of_jumps += 1;
+                }
+                last_jump_pos = pos + 1;
+            }
         }
+        (num_of_jumps, last_jump_pos)
     }
-}
 
-impl Error for LexerError {
-    fn description(&self) -> &str {
-        self.message()
+    fn column_from_bytes(bytes: &[u8]) -> u64 {
+        match str::from_utf8(bytes) {
+            Ok(s) => u64::try_from(s.chars().count()).unwrap(),
+            Err(e) => {
+                if e.valid_up_to() == 0 {
+                    0
+                } else {
+                    Self::column_from_bytes(&bytes[..e.valid_up_to()])
+                }
+            }
+        }
     }
 }
diff --git a/lib/oxttl/src/toolkit/mod.rs b/lib/oxttl/src/toolkit/mod.rs
index 300b9c2c..cc8e3624 100644
--- a/lib/oxttl/src/toolkit/mod.rs
+++ b/lib/oxttl/src/toolkit/mod.rs
@@ -2,12 +2,12 @@
 //!
 //! Provides the basic code to write plain Rust lexers and parsers able to read files chunk by chunk.
 
+mod error;
 mod lexer;
 mod parser;
 
-pub use self::lexer::{Lexer, LexerError, TokenRecognizer, TokenRecognizerError};
+pub use self::error::{ParseError, SyntaxError, TextPosition};
+pub use self::lexer::{Lexer, TokenRecognizer, TokenRecognizerError};
 #[cfg(feature = "async-tokio")]
 pub use self::parser::FromTokioAsyncReadIterator;
-pub use self::parser::{
-    FromReadIterator, ParseError, Parser, RuleRecognizer, RuleRecognizerError, SyntaxError,
-};
+pub use self::parser::{FromReadIterator, Parser, RuleRecognizer, RuleRecognizerError};
diff --git a/lib/oxttl/src/toolkit/parser.rs b/lib/oxttl/src/toolkit/parser.rs
index 38419477..7a9ba8bf 100644
--- a/lib/oxttl/src/toolkit/parser.rs
+++ b/lib/oxttl/src/toolkit/parser.rs
@@ -1,9 +1,6 @@
-use crate::toolkit::lexer::TokenWithPosition;
-use crate::toolkit::{Lexer, LexerError, TokenRecognizer};
-use std::error::Error;
+use crate::toolkit::error::{ParseError, SyntaxError};
+use crate::toolkit::lexer::{Lexer, TokenRecognizer};
 use std::io::Read;
-use std::ops::Range;
-use std::{fmt, io};
 #[cfg(feature = "async-tokio")]
 use tokio::io::AsyncRead;
 
@@ -42,7 +39,6 @@ pub struct Parser<RR: RuleRecognizer> {
     state: Option<RR>,
     results: Vec<RR::Output>,
     errors: Vec<RuleRecognizerError>,
-    position: Range<usize>,
     default_lexer_options: <RR::TokenRecognizer as TokenRecognizer>::Options,
 }
 
@@ -53,7 +49,6 @@ impl<RR: RuleRecognizer> Parser<RR> {
             state: Some(recognizer),
             results: vec![],
             errors: vec![],
-            position: 0..0,
             default_lexer_options: <RR::TokenRecognizer as TokenRecognizer>::Options::default(),
         }
     }
@@ -76,8 +71,10 @@ impl<RR: RuleRecognizer> Parser<RR> {
         loop {
             if let Some(error) = self.errors.pop() {
                 return Some(Err(SyntaxError {
-                    position: self.position.clone(),
-                    message: error.message,
+                    location: self.lexer.last_token_location(),
+                    message: error
+                        .message
+                        .replace("TOKEN", &self.lexer.last_token_source()),
                 }));
             }
             if let Some(result) = self.results.pop() {
@@ -89,8 +86,7 @@ impl<RR: RuleRecognizer> Parser<RR> {
                     .map_or(&self.default_lexer_options, |p| p.lexer_options()),
             ) {
                 match result {
-                    Ok(TokenWithPosition { token, position }) => {
-                        self.position = position;
+                    Ok(token) => {
                         self.state = self.state.take().map(|state| {
                             state.recognize_next(token, &mut self.results, &mut self.errors)
                         });
@@ -98,7 +94,7 @@ impl<RR: RuleRecognizer> Parser<RR> {
                     }
                     Err(e) => {
                         self.state = self.state.take().map(RR::error_recovery_state);
-                        return Some(Err(e.into()));
+                        return Some(Err(e));
                     }
                 }
             }
@@ -126,128 +122,6 @@ impl<RR: RuleRecognizer> Parser<RR> {
     }
 }
 
-/// An error in the syntax of the parsed file.
-///
-/// It is composed of a message and a byte range in the input.
-#[derive(Debug)]
-pub struct SyntaxError {
-    position: Range<usize>,
-    message: String,
-}
-
-impl SyntaxError {
-    /// The invalid byte range in the input.
-    #[inline]
-    pub fn position(&self) -> Range<usize> {
-        self.position.clone()
-    }
-
-    /// The error message.
-    #[inline]
-    pub fn message(&self) -> &str {
-        &self.message
-    }
-
-    /// Converts this error to an error message.
-    #[inline]
-    pub fn into_message(self) -> String {
-        self.message
-    }
-}
-
-impl fmt::Display for SyntaxError {
-    #[inline]
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        if self.position.start + 1 == self.position.end {
-            write!(
-                f,
-                "Parser error at byte {}: {}",
-                self.position.start, self.message
-            )
-        } else {
-            write!(
-                f,
-                "Parser error between bytes {} and {}: {}",
-                self.position.start, self.position.end, self.message
-            )
-        }
-    }
-}
-
-impl Error for SyntaxError {}
-
-impl From<SyntaxError> for io::Error {
-    #[inline]
-    fn from(error: SyntaxError) -> Self {
-        io::Error::new(io::ErrorKind::InvalidData, error)
-    }
-}
-
-impl From<LexerError> for SyntaxError {
-    #[inline]
-    fn from(e: LexerError) -> Self {
-        Self {
-            position: e.position(),
-            message: e.into_message(),
-        }
-    }
-}
-
-/// A parsing error.
-///
-/// It is the union of [`SyntaxError`] and [`std::io::Error`].
-#[derive(Debug)]
-pub enum ParseError {
-    /// I/O error during parsing (file not found...).
-    Io(io::Error),
-    /// An error in the file syntax.
-    Syntax(SyntaxError),
-}
-
-impl fmt::Display for ParseError {
-    #[inline]
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        match self {
-            Self::Io(e) => e.fmt(f),
-            Self::Syntax(e) => e.fmt(f),
-        }
-    }
-}
-
-impl Error for ParseError {
-    #[inline]
-    fn source(&self) -> Option<&(dyn Error + 'static)> {
-        Some(match self {
-            Self::Io(e) => e,
-            Self::Syntax(e) => e,
-        })
-    }
-}
-
-impl From<SyntaxError> for ParseError {
-    #[inline]
-    fn from(error: SyntaxError) -> Self {
-        Self::Syntax(error)
-    }
-}
-
-impl From<io::Error> for ParseError {
-    #[inline]
-    fn from(error: io::Error) -> Self {
-        Self::Io(error)
-    }
-}
-
-impl From<ParseError> for io::Error {
-    #[inline]
-    fn from(error: ParseError) -> Self {
-        match error {
-            ParseError::Syntax(e) => e.into(),
-            ParseError::Io(e) => e,
-        }
-    }
-}
-
 pub struct FromReadIterator<R: Read, RR: RuleRecognizer> {
     read: R,
     parser: Parser<RR>,
diff --git a/testsuite/oxigraph-tests/parser-error/invalid_iri.nt b/testsuite/oxigraph-tests/parser-error/invalid_iri.nt
new file mode 100644
index 00000000..021c7911
--- /dev/null
+++ b/testsuite/oxigraph-tests/parser-error/invalid_iri.nt
@@ -0,0 +1,2 @@
+<http://example.com/s> <http://example.com/p> <http://example.com/o> .
+<http://example.com/s> <http:// /p> <http://example.com/o> .
diff --git a/testsuite/oxigraph-tests/parser-error/invalid_iri_comment.nt b/testsuite/oxigraph-tests/parser-error/invalid_iri_comment.nt
new file mode 100644
index 00000000..7c8d2120
--- /dev/null
+++ b/testsuite/oxigraph-tests/parser-error/invalid_iri_comment.nt
@@ -0,0 +1,2 @@
+<http://example.com/s> <http://example.com/p> <http://example.com/o> . # foo
+<http://example.com/s> <http:// /p> <http://example.com/o> .
diff --git a/testsuite/oxigraph-tests/parser-error/invalid_iri_comment_crlf.nt b/testsuite/oxigraph-tests/parser-error/invalid_iri_comment_crlf.nt
new file mode 100644
index 00000000..7c8d2120
--- /dev/null
+++ b/testsuite/oxigraph-tests/parser-error/invalid_iri_comment_crlf.nt
@@ -0,0 +1,2 @@
+<http://example.com/s> <http://example.com/p> <http://example.com/o> . # foo
+<http://example.com/s> <http:// /p> <http://example.com/o> .
diff --git a/testsuite/oxigraph-tests/parser-error/invalid_iri_crlf.nt b/testsuite/oxigraph-tests/parser-error/invalid_iri_crlf.nt
new file mode 100644
index 00000000..021c7911
--- /dev/null
+++ b/testsuite/oxigraph-tests/parser-error/invalid_iri_crlf.nt
@@ -0,0 +1,2 @@
+<http://example.com/s> <http://example.com/p> <http://example.com/o> .
+<http://example.com/s> <http:// /p> <http://example.com/o> .
diff --git a/testsuite/oxigraph-tests/parser-error/invalid_iri_error.txt b/testsuite/oxigraph-tests/parser-error/invalid_iri_error.txt
new file mode 100644
index 00000000..26729063
--- /dev/null
+++ b/testsuite/oxigraph-tests/parser-error/invalid_iri_error.txt
@@ -0,0 +1 @@
+Parser error between at line 2 between columns 24 and column 36: Invalid IRI code point ' '
\ No newline at end of file
diff --git a/testsuite/oxigraph-tests/parser-error/invalid_predicate.nt b/testsuite/oxigraph-tests/parser-error/invalid_predicate.nt
new file mode 100644
index 00000000..63e6fd7a
--- /dev/null
+++ b/testsuite/oxigraph-tests/parser-error/invalid_predicate.nt
@@ -0,0 +1,2 @@
+<http://example.com/s> <http://example.com/p> <http://example.com/o> .
+<http://example.com/s> "p" <http://example.com/o> .
diff --git a/testsuite/oxigraph-tests/parser-error/invalid_predicate_error.txt b/testsuite/oxigraph-tests/parser-error/invalid_predicate_error.txt
new file mode 100644
index 00000000..469dd19f
--- /dev/null
+++ b/testsuite/oxigraph-tests/parser-error/invalid_predicate_error.txt
@@ -0,0 +1 @@
+Parser error between at line 2 between columns 24 and column 27: "p" is not a valid predicate
\ No newline at end of file
diff --git a/testsuite/oxigraph-tests/parser-error/invalid_string_escape.nt b/testsuite/oxigraph-tests/parser-error/invalid_string_escape.nt
new file mode 100644
index 00000000..8a625fce
--- /dev/null
+++ b/testsuite/oxigraph-tests/parser-error/invalid_string_escape.nt
@@ -0,0 +1 @@
+<http://example.com/s> <http://example.com/p> "fooé \a baré" .
diff --git a/testsuite/oxigraph-tests/parser-error/invalid_string_escape_error.txt b/testsuite/oxigraph-tests/parser-error/invalid_string_escape_error.txt
new file mode 100644
index 00000000..f5e45857
--- /dev/null
+++ b/testsuite/oxigraph-tests/parser-error/invalid_string_escape_error.txt
@@ -0,0 +1 @@
+Parser error between at line 1 between columns 53 and column 55: Unexpected escape character '\a'
\ No newline at end of file
diff --git a/testsuite/oxigraph-tests/parser-error/manifest.ttl b/testsuite/oxigraph-tests/parser-error/manifest.ttl
new file mode 100644
index 00000000..86159b38
--- /dev/null
+++ b/testsuite/oxigraph-tests/parser-error/manifest.ttl
@@ -0,0 +1,66 @@
+@prefix mf: <http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#> .
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix rdft: <http://www.w3.org/ns/rdftest#> .
+
+<>
+	rdf:type mf:Manifest ;
+	rdfs:comment "Oxigraph parser error test cases" ;
+	mf:entries (
+	    <#invalid_iri>
+	    <#invalid_iri_crlf>
+	    <#invalid_iri_comment>
+	    <#invalid_iri_comment_crlf>
+	    <#invalid_string_escape>
+	    <#unexpected_eof>
+	    <#unexpected_eof_crlf>
+	    <#invalid_predicate>
+	) .
+
+<#invalid_iri>
+	rdf:type rdft:TestTurtleNegativeSyntax ;
+	mf:name "bad IRI" ;
+	mf:action <invalid_iri.nt> ;
+	mf:result <invalid_iri_error.txt> .
+
+<#invalid_iri_crlf>
+	rdf:type rdft:TestTurtleNegativeSyntax ;
+	mf:name "bad IRI" ;
+	mf:action <invalid_iri_crlf.nt> ;
+	mf:result <invalid_iri_error.txt> .
+
+<#invalid_iri_comment>
+	rdf:type rdft:TestTurtleNegativeSyntax ;
+	mf:name "bad IRI" ;
+	mf:action <invalid_iri_comment.nt> ;
+	mf:result <invalid_iri_error.txt> .
+
+<#invalid_iri_comment_crlf>
+	rdf:type rdft:TestTurtleNegativeSyntax ;
+	mf:name "bad IRI" ;
+	mf:action <invalid_iri_comment_crlf.nt> ;
+	mf:result <invalid_iri_error.txt> .
+
+<#invalid_string_escape>
+	rdf:type rdft:TestTurtleNegativeSyntax ;
+	mf:name "bad string escape" ;
+	mf:action <invalid_string_escape.nt> ;
+	mf:result <invalid_string_escape_error.txt> .
+
+<#unexpected_eof>
+	rdf:type rdft:TestTurtleNegativeSyntax ;
+	mf:name "unexpected end of file" ;
+	mf:action <unexpected_eof.nt> ;
+	mf:result <unexpected_eof_error.txt> .
+
+<#unexpected_eof_crlf>
+	rdf:type rdft:TestTurtleNegativeSyntax ;
+	mf:name "unexpected end of file" ;
+	mf:action <unexpected_eof_crlf.nt> ;
+	mf:result <unexpected_eof_error.txt> .
+
+<#invalid_predicate>
+	rdf:type rdft:TestTurtleNegativeSyntax ;
+	mf:name "invalid predicate" ;
+	mf:action <invalid_predicate.nt> ;
+	mf:result <invalid_predicate_error.txt> .
diff --git a/testsuite/oxigraph-tests/parser-error/unexpected_eof.nt b/testsuite/oxigraph-tests/parser-error/unexpected_eof.nt
new file mode 100644
index 00000000..8c0a4ca2
--- /dev/null
+++ b/testsuite/oxigraph-tests/parser-error/unexpected_eof.nt
@@ -0,0 +1,2 @@
+<http://example.com/s> <http://example.com/p> <http://example.com/o
+bé
\ No newline at end of file
diff --git a/testsuite/oxigraph-tests/parser-error/unexpected_eof_crlf.nt b/testsuite/oxigraph-tests/parser-error/unexpected_eof_crlf.nt
new file mode 100644
index 00000000..8c0a4ca2
--- /dev/null
+++ b/testsuite/oxigraph-tests/parser-error/unexpected_eof_crlf.nt
@@ -0,0 +1,2 @@
+<http://example.com/s> <http://example.com/p> <http://example.com/o
+bé
\ No newline at end of file
diff --git a/testsuite/oxigraph-tests/parser-error/unexpected_eof_error.txt b/testsuite/oxigraph-tests/parser-error/unexpected_eof_error.txt
new file mode 100644
index 00000000..bfd7f451
--- /dev/null
+++ b/testsuite/oxigraph-tests/parser-error/unexpected_eof_error.txt
@@ -0,0 +1 @@
+Parser error at line 2 column 3: Unexpected end of file
\ No newline at end of file
diff --git a/testsuite/src/parser_evaluator.rs b/testsuite/src/parser_evaluator.rs
index a32ab084..c0122640 100644
--- a/testsuite/src/parser_evaluator.rs
+++ b/testsuite/src/parser_evaluator.rs
@@ -1,8 +1,8 @@
 use crate::evaluator::TestEvaluator;
-use crate::files::{guess_rdf_format, load_dataset, load_n3};
+use crate::files::{guess_rdf_format, load_dataset, load_n3, read_file_to_string};
 use crate::manifest::Test;
-use crate::report::dataset_diff;
-use anyhow::{anyhow, ensure, Result};
+use crate::report::{dataset_diff, format_diff};
+use anyhow::{anyhow, bail, ensure, Result};
 use oxigraph::io::RdfFormat;
 use oxigraph::model::{BlankNode, Dataset, Quad};
 use oxttl::n3::{N3Quad, N3Term};
@@ -116,10 +116,17 @@ fn evaluate_negative_syntax_test(test: &Test, format: RdfFormat) -> Result<()> {
         .action
         .as_deref()
         .ok_or_else(|| anyhow!("No action found"))?;
-    ensure!(
-        load_dataset(action, format, false).is_err(),
-        "File parsed without errors even if it should not"
-    );
+    let Err(error) = load_dataset(action, format, false) else {
+        bail!("File parsed without errors even if it should not");
+    };
+    if let Some(result) = &test.result {
+        let expected = read_file_to_string(result)?;
+        ensure!(
+            expected == error.to_string(),
+            "Not expected error message:\n{}",
+            format_diff(&expected, &error.to_string(), "message")
+        );
+    }
     Ok(())
 }
 
diff --git a/testsuite/tests/oxigraph.rs b/testsuite/tests/oxigraph.rs
index b76e5a2a..238b57c7 100644
--- a/testsuite/tests/oxigraph.rs
+++ b/testsuite/tests/oxigraph.rs
@@ -20,6 +20,14 @@ fn oxigraph_parser_recovery_testsuite() -> Result<()> {
     )
 }
 
+#[test]
+fn oxigraph_parser_error_testsuite() -> Result<()> {
+    check_testsuite(
+        "https://github.com/oxigraph/oxigraph/tests/parser-error/manifest.ttl",
+        &[],
+    )
+}
+
 #[test]
 fn oxigraph_sparql_testsuite() -> Result<()> {
     check_testsuite(