Parsers: adds "unchecked" option for faster parsing

Does not validate IRIs and language tags
2 years ago · a84b898fda
parent d170b53609
commit a84b898fda
23 changed files with 476 additions and 230 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1113,9 +1113,9 @@ checksum = "8d91edf4fbb970279443471345a4e8c491bf05bb283b3e6c88e4e606fd8c181b"
 [[package]]
 name = "oxiri"
-version = "0.2.2"
+version = "0.2.3-alpha.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bb175ec8981211357b7b379869c2f8d555881c55ea62311428ec0de46d89bd5c"
+checksum = "b225dad32cfaa43a960b93f01fa7f87528ac07e794b80f6d9a0153e0222557e2"
 [[package]]
 name = "oxrdf"
--- a/cli/Cargo.toml
+++ b/cli/Cargo.toml
@ -32,7 +32,7 @@ clap = { version = "4.0", features = ["derive"] }
 oxigraph = { version = "0.4.0-alpha.1-dev", path = "../lib" }
 rand = "0.8"
 url = "2.4"
-oxiri = "0.2"
+oxiri = "0.2.3-alpha.1"
 flate2 = "1.0"
 rayon-core = "1.11"
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@ -121,6 +121,8 @@ enum Command {
        destination: PathBuf,
    },
    /// Load file(s) into the store.
    ///
    /// Feel free to enable the --lenient option if you know your input is valid to get better performances.
    Load {
        /// Directory in which Oxigraph data are persisted.
        #[arg(short, long, value_hint = ValueHint::DirPath)]
@ -143,6 +145,8 @@ enum Command {
        #[arg(long, value_hint = ValueHint::Url)]
        base: Option<String>,
        /// Attempt to keep loading even if the data file is invalid.
        ///
        /// This disables most of validation on RDF content.
        #[arg(long)]
        lenient: bool,
        /// Name of the graph to load the data to.
@ -391,6 +395,7 @@ pub fn main() -> anyhow::Result<()> {
                    format.context("The --format option must be set when loading from stdin")?,
                    base.as_deref(),
                    graph,
                    lenient,
                )
            } else {
                ThreadPoolBuilder::new()
@ -444,6 +449,7 @@ pub fn main() -> anyhow::Result<()> {
                                            }),
                                            base.as_deref(),
                                            graph,
                                            lenient,
                                        )
                                    } else {
                                        bulk_load(
@ -454,6 +460,7 @@ pub fn main() -> anyhow::Result<()> {
                                            }),
                                            base.as_deref(),
                                            graph,
                                            lenient,
                                        )
                                    }
                                } {
@ -784,6 +791,7 @@ fn bulk_load(
    format: RdfFormat,
    base_iri: Option<&str>,
    to_graph_name: Option<NamedNode>,
    lenient: bool,
 ) -> anyhow::Result<()> {
    let mut parser = RdfParser::from_format(format);
    if let Some(to_graph_name) = to_graph_name {
@ -794,6 +802,9 @@ fn bulk_load(
            .with_base_iri(base_iri)
            .with_context(|| format!("Invalid base IRI {base_iri}"))?;
    }
    if lenient {
        parser = parser.unchecked();
    }
    loader.load_from_read(parser, read)?;
    Ok(())
 }
--- a/fuzz/fuzz_targets/nquads.rs
+++ b/fuzz/fuzz_targets/nquads.rs
@ -4,43 +4,60 @@ use libfuzzer_sys::fuzz_target;
 use oxrdf::Quad;
 use oxttl::{NQuadsParser, NQuadsSerializer};
-fn parse<'a>(chunks: impl IntoIterator<Item = &'a [u8]>) -> (Vec<Quad>, Vec<String>) {
+fn parse<'a>(
    chunks: impl IntoIterator<Item = &'a [u8]>,
    unchecked: bool,
 ) -> (Vec<Quad>, Vec<String>) {
    let mut quads = Vec::new();
    let mut errors = Vec::new();
-    let mut parser = NQuadsParser::new().with_quoted_triples().parse();
+    let mut parser = NQuadsParser::new().with_quoted_triples();
    if unchecked {
        parser = parser.unchecked();
    }
    let mut reader = parser.parse();
    for chunk in chunks {
-        parser.extend_from_slice(chunk);
+        reader.extend_from_slice(chunk);
-        while let Some(result) = parser.read_next() {
+        while let Some(result) = reader.read_next() {
            match result {
                Ok(quad) => quads.push(quad),
                Err(error) => errors.push(error.to_string()),
            }
        }
    }
-    parser.end();
+    reader.end();
-    while let Some(result) = parser.read_next() {
+    while let Some(result) = reader.read_next() {
        match result {
            Ok(quad) => quads.push(quad),
            Err(error) => errors.push(error.to_string()),
        }
    }
-    assert!(parser.is_end());
+    assert!(reader.is_end());
    (quads, errors)
 }
 fuzz_target!(|data: &[u8]| {
    // We parse with splitting
-    let (quads, errors) = parse(data.split(|c| *c == 0xFF));
+    let (quads, errors) = parse(data.split(|c| *c == 0xFF), false);
    // We parse without splitting
-    let (quads_without_split, errors_without_split) = parse([data
+    let (quads_without_split, errors_without_split) = parse(
-        .iter()
+        [data
-        .copied()
+            .iter()
-        .filter(|c| *c != 0xFF)
+            .copied()
-        .collect::<Vec<_>>()
+            .filter(|c| *c != 0xFF)
-        .as_slice()]);
+            .collect::<Vec<_>>()
            .as_slice()],
        false,
    );
    assert_eq!(quads, quads_without_split);
    assert_eq!(errors, errors_without_split);
    // We test also unchecked if valid
    if errors.is_empty() {
        let (quads_unchecked, errors_unchecked) = parse(data.split(|c| *c == 0xFF), true);
        assert!(errors_unchecked.is_empty());
        assert_eq!(quads, quads_unchecked);
    }
    // We serialize
    let mut writer = NQuadsSerializer::new().serialize_to_write(Vec::new());
    for quad in &quads {
--- a/fuzz/fuzz_targets/trig.rs
+++ b/fuzz/fuzz_targets/trig.rs
@ -4,31 +4,37 @@ use libfuzzer_sys::fuzz_target;
 use oxrdf::{Dataset, GraphName, Quad, Subject, Term, Triple};
 use oxttl::{TriGParser, TriGSerializer};
-fn parse<'a>(chunks: impl IntoIterator<Item = &'a [u8]>) -> (Vec<Quad>, Vec<String>) {
+fn parse<'a>(
    chunks: impl IntoIterator<Item = &'a [u8]>,
    unchecked: bool,
 ) -> (Vec<Quad>, Vec<String>) {
    let mut quads = Vec::new();
    let mut errors = Vec::new();
    let mut parser = TriGParser::new()
        .with_quoted_triples()
        .with_base_iri("http://example.com/")
-        .unwrap()
+        .unwrap();
-        .parse();
+    if unchecked {
        parser = parser.unchecked();
    }
    let mut reader = parser.parse();
    for chunk in chunks {
-        parser.extend_from_slice(chunk);
+        reader.extend_from_slice(chunk);
-        while let Some(result) = parser.read_next() {
+        while let Some(result) = reader.read_next() {
            match result {
                Ok(quad) => quads.push(quad),
                Err(error) => errors.push(error.to_string()),
            }
        }
    }
-    parser.end();
+    reader.end();
-    while let Some(result) = parser.read_next() {
+    while let Some(result) = reader.read_next() {
        match result {
            Ok(quad) => quads.push(quad),
            Err(error) => errors.push(error.to_string()),
        }
    }
-    assert!(parser.is_end());
+    assert!(reader.is_end());
    (quads, errors)
 }
@ -66,14 +72,22 @@ fn serialize_quads(quads: &[Quad]) -> Vec<u8> {
 fuzz_target!(|data: &[u8]| {
    // We parse with splitting
-    let (quads, errors) = parse(data.split(|c| *c == 0xFF));
+    let (quads, errors) = parse(data.split(|c| *c == 0xFF), false);
    // We parse without splitting
-    let (quads_without_split, errors_without_split) = parse([data
+    let (quads_without_split, errors_without_split) = parse(
-        .iter()
+        [data
-        .copied()
+            .iter()
-        .filter(|c| *c != 0xFF)
+            .copied()
-        .collect::<Vec<_>>()
+            .filter(|c| *c != 0xFF)
-        .as_slice()]);
+            .collect::<Vec<_>>()
            .as_slice()],
        false,
    );
    let (quads_unchecked, errors_unchecked) = parse(data.split(|c| *c == 0xFF), true);
    if errors.is_empty() {
        assert!(errors_unchecked.is_empty());
    }
    let bnodes_count = quads.iter().map(count_quad_blank_nodes).sum::<usize>();
    if bnodes_count == 0 {
        assert_eq!(
@ -83,6 +97,15 @@ fuzz_target!(|data: &[u8]| {
            String::from_utf8_lossy(&serialize_quads(&quads)),
            String::from_utf8_lossy(&serialize_quads(&quads_without_split))
        );
        if errors.is_empty() {
            assert_eq!(
                quads,
                quads_unchecked,
                "Validating:\n{}\nUnchecked:\n{}",
                String::from_utf8_lossy(&serialize_quads(&quads)),
                String::from_utf8_lossy(&serialize_quads(&quads_unchecked))
            );
        }
    } else if bnodes_count <= 4 {
        let mut dataset_with_split = quads.iter().collect::<Dataset>();
        let mut dataset_without_split = quads_without_split.iter().collect::<Dataset>();
@ -95,6 +118,19 @@ fuzz_target!(|data: &[u8]| {
            String::from_utf8_lossy(&serialize_quads(&quads)),
            String::from_utf8_lossy(&serialize_quads(&quads_without_split))
        );
        if errors.is_empty() {
            if errors.is_empty() {
                let mut dataset_unchecked = quads_unchecked.iter().collect::<Dataset>();
                dataset_unchecked.canonicalize();
                assert_eq!(
                    dataset_with_split,
                    dataset_unchecked,
                    "Validating:\n{}\nUnchecked:\n{}",
                    String::from_utf8_lossy(&serialize_quads(&quads)),
                    String::from_utf8_lossy(&serialize_quads(&quads_unchecked))
                );
            }
        }
    }
    assert_eq!(errors, errors_without_split);
--- a/lib/Cargo.toml
+++ b/lib/Cargo.toml
@ -31,7 +31,7 @@ hex = "0.4"
 json-event-parser = "0.2.0-alpha.2"
 md-5 = "0.10"
 oxilangtag = "0.1"
-oxiri = "0.2"
+oxiri = "0.2.3-alpha.1"
 oxrdf = { version = "0.2.0-alpha.1-dev", path = "oxrdf", features = ["rdf-star", "oxsdatatypes"] }
 oxrdfio = { version = "0.1.0-alpha.1-dev", path = "oxrdfio", features = ["rdf-star"] }
 oxsdatatypes = { version = "0.2.0-alpha.1-dev", path="oxsdatatypes" }
--- a/lib/benches/store.rs
+++ b/lib/benches/store.rs
@ -24,6 +24,16 @@ fn parse_nt(c: &mut Criterion) {
            }
        })
    });
    group.bench_function("parse BSBM explore 1000 unchecked", |b| {
        b.iter(|| {
            for r in RdfParser::from_format(RdfFormat::NTriples)
                .unchecked()
                .parse_read(data.as_slice())
            {
                r.unwrap();
            }
        })
    });
 }
 fn store_load(c: &mut Criterion) {
--- a/lib/oxrdf/Cargo.toml
+++ b/lib/oxrdf/Cargo.toml
@ -21,7 +21,7 @@ rdf-star = []
 [dependencies]
 rand = "0.8"
 oxilangtag = "0.1"
-oxiri = "0.2"
+oxiri = "0.2.3-alpha.1"
 oxsdatatypes = { version = "0.2.0-alpha.1-dev", path="../oxsdatatypes", optional = true }
 [lints]
--- a/lib/oxrdfio/src/parser.rs
+++ b/lib/oxrdfio/src/parser.rs
@ -158,20 +158,16 @@ impl RdfParser {
    /// # Result::<_,Box<dyn std::error::Error>>::Ok(())
    /// ```
    #[inline]
-    pub fn with_base_iri(self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
+    pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
-        Ok(Self {
+        self.inner = match self.inner {
-            inner: match self.inner {
+            RdfParserKind::N3(p) => RdfParserKind::N3(p),
-                RdfParserKind::N3(p) => RdfParserKind::N3(p),
+            RdfParserKind::NTriples(p) => RdfParserKind::NTriples(p),
-                RdfParserKind::NTriples(p) => RdfParserKind::NTriples(p),
+            RdfParserKind::NQuads(p) => RdfParserKind::NQuads(p),
-                RdfParserKind::NQuads(p) => RdfParserKind::NQuads(p),
+            RdfParserKind::RdfXml(p) => RdfParserKind::RdfXml(p.with_base_iri(base_iri)?),
-                RdfParserKind::RdfXml(p) => RdfParserKind::RdfXml(p.with_base_iri(base_iri)?),
+            RdfParserKind::TriG(p) => RdfParserKind::TriG(p.with_base_iri(base_iri)?),
-                RdfParserKind::TriG(p) => RdfParserKind::TriG(p.with_base_iri(base_iri)?),
+            RdfParserKind::Turtle(p) => RdfParserKind::Turtle(p.with_base_iri(base_iri)?),
-                RdfParserKind::Turtle(p) => RdfParserKind::Turtle(p.with_base_iri(base_iri)?),
+        };
-            },
+        Ok(self)
            default_graph: self.default_graph,
            without_named_graphs: self.without_named_graphs,
            rename_blank_nodes: self.rename_blank_nodes,
        })
    }
    /// Provides the name graph name that should replace the default graph in the returned quads.
@ -190,13 +186,9 @@ impl RdfParser {
    /// # Result::<_,Box<dyn std::error::Error>>::Ok(())
    /// ```
    #[inline]
-    pub fn with_default_graph(self, default_graph: impl Into<GraphName>) -> Self {
+    pub fn with_default_graph(mut self, default_graph: impl Into<GraphName>) -> Self {
-        Self {
+        self.default_graph = default_graph.into();
-            inner: self.inner,
+        self
            default_graph: default_graph.into(),
            without_named_graphs: self.without_named_graphs,
            rename_blank_nodes: self.rename_blank_nodes,
        }
    }
    /// Sets that the parser must fail if parsing a named graph.
@ -212,13 +204,9 @@ impl RdfParser {
    /// assert!(parser.parse_read(file.as_bytes()).next().unwrap().is_err());
    /// ```
    #[inline]
-    pub fn without_named_graphs(self) -> Self {
+    pub fn without_named_graphs(mut self) -> Self {
-        Self {
+        self.without_named_graphs = true;
-            inner: self.inner,
+        self
            default_graph: self.default_graph,
            without_named_graphs: true,
            rename_blank_nodes: self.rename_blank_nodes,
        }
    }
    /// Renames the blank nodes ids from the ones set in the serialization to random ids.
@ -240,13 +228,27 @@ impl RdfParser {
    /// # Result::<_,Box<dyn std::error::Error>>::Ok(())
    /// ```
    #[inline]
-    pub fn rename_blank_nodes(self) -> Self {
+    pub fn rename_blank_nodes(mut self) -> Self {
-        Self {
+        self.rename_blank_nodes = true;
-            inner: self.inner,
+        self
-            default_graph: self.default_graph,
+    }
-            without_named_graphs: self.without_named_graphs,
+
-            rename_blank_nodes: true,
+    /// Assumes the file is valid to make parsing faster.
-        }
+    ///
    /// It will skip some validations.
    ///
    /// Note that if the file is actually not valid, then broken RDF might be emitted by the parser.
    #[inline]
    pub fn unchecked(mut self) -> Self {
        self.inner = match self.inner {
            RdfParserKind::N3(p) => RdfParserKind::N3(p.unchecked()),
            RdfParserKind::NTriples(p) => RdfParserKind::NTriples(p.unchecked()),
            RdfParserKind::NQuads(p) => RdfParserKind::NQuads(p.unchecked()),
            RdfParserKind::RdfXml(p) => RdfParserKind::RdfXml(p.unchecked()),
            RdfParserKind::TriG(p) => RdfParserKind::TriG(p.unchecked()),
            RdfParserKind::Turtle(p) => RdfParserKind::Turtle(p.unchecked()),
        };
        self
    }
    /// Parses from a [`Read`] implementation and returns an iterator of quads.
--- a/lib/oxrdfxml/Cargo.toml
+++ b/lib/oxrdfxml/Cargo.toml
@ -21,7 +21,7 @@ async-tokio = ["dep:tokio", "quick-xml/async-tokio"]
 [dependencies]
 oxrdf = { version = "0.2.0-alpha.1-dev", path = "../oxrdf" }
 oxilangtag = "0.1"
-oxiri = "0.2"
+oxiri = "0.2.3-alpha.1"
 quick-xml = ">=0.29, <0.32"
 tokio = { version = "1.29", optional = true, features = ["io-util"] }
--- a/lib/oxrdfxml/src/parser.rs
+++ b/lib/oxrdfxml/src/parser.rs
@ -52,6 +52,7 @@ use tokio::io::{AsyncRead, BufReader as AsyncBufReader};
 #[derive(Default)]
 #[must_use]
 pub struct RdfXmlParser {
    unchecked: bool,
    base: Option<Iri<String>>,
 }
@ -62,6 +63,17 @@ impl RdfXmlParser {
        Self::default()
    }
    /// Assumes the file is valid to make parsing faster.
    ///
    /// It will skip some validations.
    ///
    /// Note that if the file is actually not valid, then broken RDF might be emitted by the parser.
    #[inline]
    pub fn unchecked(mut self) -> Self {
        self.unchecked = true;
        self
    }
    #[inline]
    pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
        self.base = Some(Iri::parse(base_iri.into())?);
@ -158,6 +170,7 @@ impl RdfXmlParser {
            in_literal_depth: 0,
            known_rdf_id: HashSet::default(),
            is_end: false,
            unchecked: self.unchecked,
        }
    }
 }
@ -414,6 +427,7 @@ struct RdfXmlReader<R> {
    in_literal_depth: usize,
    known_rdf_id: HashSet<String>,
    is_end: bool,
    unchecked: bool,
 }
 impl<R> RdfXmlReader<R> {
@ -551,19 +565,28 @@ impl<R> RdfXmlReader<R> {
            let attribute = attribute.map_err(Error::InvalidAttr)?;
            if attribute.key.as_ref().starts_with(b"xml") {
                if attribute.key.as_ref() == b"xml:lang" {
-                    let tag = self.convert_attribute(&attribute)?;
+                    let tag = self.convert_attribute(&attribute)?.to_ascii_lowercase();
-                    language = Some(
+                    language = Some(if self.unchecked {
                        tag
                    } else {
                        LanguageTag::parse(tag.to_ascii_lowercase())
                            .map_err(|error| SyntaxError {
                                inner: SyntaxErrorKind::InvalidLanguageTag { tag, error },
                            })?
-                            .into_inner(),
+                            .into_inner()
-                    );
+                    });
                } else if attribute.key.as_ref() == b"xml:base" {
                    let iri = self.convert_attribute(&attribute)?;
-                    base_iri = Some(Iri::parse(iri.clone()).map_err(|error| SyntaxError {
+                    base_iri = Some(
-                        inner: SyntaxErrorKind::InvalidIri { iri, error },
+                        if self.unchecked {
-                    })?)
+                            Iri::parse_unchecked(iri.clone())
                        } else {
                            Iri::parse(iri.clone())
                        }
                        .map_err(|error| SyntaxError {
                            inner: SyntaxErrorKind::InvalidIri { iri, error },
                        })?,
                    )
                } else {
                    // We ignore other xml attributes
                }
@ -622,12 +645,7 @@ impl<R> RdfXmlReader<R> {
                    .into());
                } else {
                    property_attrs.push((
-                        NamedNode::new(attribute_url.clone()).map_err(|error| SyntaxError {
+                        self.parse_iri(attribute_url)?,
                            inner: SyntaxErrorKind::InvalidIri {
                                iri: attribute_url,
                                error,
                            },
                        })?,
                        self.convert_attribute(&attribute)?,
                    ));
                }
@ -637,7 +655,7 @@ impl<R> RdfXmlReader<R> {
        //Parsing with the base URI
        let id_attr = match id_attr {
            Some(iri) => {
-                let iri = resolve(&base_iri, iri)?;
+                let iri = self.resolve_iri(&base_iri, iri)?;
                if self.known_rdf_id.contains(iri.as_str()) {
                    return Err(SyntaxError::msg(format!(
                        "{} has already been used as rdf:ID value",
@ -701,12 +719,7 @@ impl<R> RdfXmlReader<R> {
                    .into());
                } else {
                    Self::build_node_elt(
-                        NamedNode::new(tag_name.clone()).map_err(|error| SyntaxError {
+                        self.parse_iri(tag_name)?,
                            inner: SyntaxErrorKind::InvalidIri {
                                iri: tag_name,
                                error,
                            },
                        })?,
                        base_iri,
                        language,
                        id_attr,
@ -727,12 +740,7 @@ impl<R> RdfXmlReader<R> {
                    .into());
                }
                Self::build_node_elt(
-                    NamedNode::new(tag_name.clone()).map_err(|error| SyntaxError {
+                    self.parse_iri(tag_name)?,
                        inner: SyntaxErrorKind::InvalidIri {
                            iri: tag_name,
                            error,
                        },
                    })?,
                    base_iri,
                    language,
                    id_attr,
@ -766,12 +774,7 @@ impl<R> RdfXmlReader<R> {
                    ))
                    .into());
                } else {
-                    NamedNode::new(tag_name.clone()).map_err(|error| SyntaxError {
+                    self.parse_iri(tag_name)?
                        inner: SyntaxErrorKind::InvalidIri {
                            iri: tag_name,
                            error,
                        },
                    })?
                };
                match parse_type {
                    RdfXmlParseType::Default => {
@ -1156,32 +1159,51 @@ impl<R> RdfXmlReader<R> {
        base_iri: &Option<Iri<String>>,
        attribute: &Attribute<'_>,
    ) -> Result<NamedNode, ParseError> {
-        Ok(resolve(base_iri, self.convert_attribute(attribute)?)?)
+        Ok(self.resolve_iri(base_iri, self.convert_attribute(attribute)?)?)
    }
-    fn resolve_entity(&self, e: &str) -> Option<&str> {
+    fn resolve_iri(
-        self.custom_entities.get(e).map(String::as_str)
+        &self,
        base_iri: &Option<Iri<String>>,
        relative_iri: String,
    ) -> Result<NamedNode, SyntaxError> {
        if let Some(base_iri) = base_iri {
            Ok(NamedNode::new_unchecked(
                if self.unchecked {
                    base_iri.resolve_unchecked(&relative_iri)
                } else {
                    base_iri.resolve(&relative_iri)
                }
                .map_err(|error| SyntaxError {
                    inner: SyntaxErrorKind::InvalidIri {
                        iri: relative_iri,
                        error,
                    },
                })?
                .into_inner(),
            ))
        } else {
            self.parse_iri(relative_iri)
        }
    }
 }
-fn resolve(base_iri: &Option<Iri<String>>, relative_iri: String) -> Result<NamedNode, SyntaxError> {
+    fn parse_iri(&self, relative_iri: String) -> Result<NamedNode, SyntaxError> {
-    if let Some(base_iri) = base_iri {
+        Ok(NamedNode::new_unchecked(if self.unchecked {
-        Ok(base_iri
+            relative_iri
-            .resolve(&relative_iri)
+        } else {
-            .map_err(|error| SyntaxError {
+            Iri::parse(relative_iri.clone())
-                inner: SyntaxErrorKind::InvalidIri {
+                .map_err(|error| SyntaxError {
-                    iri: relative_iri,
+                    inner: SyntaxErrorKind::InvalidIri {
-                    error,
+                        iri: relative_iri,
-                },
+                        error,
-            })?
+                    },
-            .into())
+                })?
-    } else {
+                .into_inner()
-        NamedNode::new(relative_iri.clone()).map_err(|error| SyntaxError {
+        }))
-            inner: SyntaxErrorKind::InvalidIri {
+    }
-                iri: relative_iri,
+
-                error,
+    fn resolve_entity(&self, e: &str) -> Option<&str> {
-            },
+        self.custom_entities.get(e).map(String::as_str)
        })
    }
 }
--- a/lib/oxttl/Cargo.toml
+++ b/lib/oxttl/Cargo.toml
@ -22,7 +22,7 @@ async-tokio = ["dep:tokio"]
 [dependencies]
 memchr = "2.5"
 oxrdf = { version = "0.2.0-alpha.1-dev", path = "../oxrdf" }
-oxiri = "0.2"
+oxiri = "0.2.3-alpha.1"
 oxilangtag = "0.1"
 tokio = { version = "1.29", optional = true, features = ["io-util"] }
--- a/lib/oxttl/src/lexer.rs
+++ b/lib/oxttl/src/lexer.rs
@ -6,12 +6,12 @@ use oxrdf::NamedNode;
 use std::borrow::Cow;
 use std::cmp::min;
 use std::collections::HashMap;
-use std::ops::{Range, RangeInclusive};
+use std::ops::Range;
 use std::str;
 #[derive(Debug, PartialEq, Eq)]
 pub enum N3Token<'a> {
-    IriRef(Iri<String>),
+    IriRef(String),
    PrefixedName {
        prefix: &'a str,
        local: Cow<'a, str>,
@ -42,6 +42,7 @@ pub struct N3LexerOptions {
 pub struct N3Lexer {
    mode: N3LexerMode,
    unchecked: bool,
 }
 // TODO: there are a lot of 'None' (missing data) returned even if the stream is ending!!!
@ -61,7 +62,7 @@ impl TokenRecognizer for N3Lexer {
            b'<' => match *data.get(1)? {
                b'<' => Some((2, Ok(N3Token::Punctuation("<<")))),
                b'=' if self.mode == N3LexerMode::N3 => {
-                    if let Some((consumed, result)) = Self::recognize_iri(data, options) {
+                    if let Some((consumed, result)) = self.recognize_iri(data, options) {
                        Some(if let Ok(result) = result {
                            (consumed, Ok(result))
                        } else {
@ -74,7 +75,7 @@ impl TokenRecognizer for N3Lexer {
                    }
                }
                b'-' if self.mode == N3LexerMode::N3 => {
-                    if let Some((consumed, result)) = Self::recognize_iri(data, options) {
+                    if let Some((consumed, result)) = self.recognize_iri(data, options) {
                        Some(if let Ok(result) = result {
                            (consumed, Ok(result))
                        } else {
@ -86,7 +87,7 @@ impl TokenRecognizer for N3Lexer {
                        None
                    }
                }
-                _ => Self::recognize_iri(data, options),
+                _ => self.recognize_iri(data, options),
            },
            b'>' => {
                if *data.get(1)? == b'>' {
@ -119,7 +120,7 @@ impl TokenRecognizer for N3Lexer {
                    Self::recognize_string(data, b'\'')
                }
            }
-            b'@' => Self::recognize_lang_tag(data),
+            b'@' => self.recognize_lang_tag(data),
            b'.' => match data.get(1) {
                Some(b'0'..=b'9') => Self::recognize_number(data),
                Some(_) => Some((1, Ok(N3Token::Punctuation(".")))),
@ -162,18 +163,19 @@ impl TokenRecognizer for N3Lexer {
                }
            }
            b'0'..=b'9' | b'+' | b'-' => Self::recognize_number(data),
-            b'?' => Self::recognize_variable(data, is_ending),
+            b'?' => self.recognize_variable(data, is_ending),
-            _ => Self::recognize_pname_or_keyword(data, is_ending),
+            _ => self.recognize_pname_or_keyword(data, is_ending),
        }
    }
 }
 impl N3Lexer {
-    pub fn new(mode: N3LexerMode) -> Self {
+    pub fn new(mode: N3LexerMode, unchecked: bool) -> Self {
-        Self { mode }
+        Self { mode, unchecked }
    }
    fn recognize_iri(
        &self,
        data: &[u8],
        options: &N3LexerOptions,
    ) -> Option<(usize, Result<N3Token<'static>, TokenRecognizerError>)> {
@ -186,7 +188,8 @@ impl N3Lexer {
            i += end;
            match data[i] {
                b'>' => {
-                    return Some((i + 1, Self::parse_iri(string, 0..=i, options)));
+                    #[allow(clippy::range_plus_one)]
                    return Some((i + 1, self.parse_iri(string, 0..i + 1, options)));
                }
                b'\\' => {
                    let (additional, c) = Self::recognize_escape(&data[i..], i, false)?;
@ -205,29 +208,36 @@ impl N3Lexer {
    }
    fn parse_iri(
        &self,
        iri: Vec<u8>,
-        position: RangeInclusive<usize>,
+        position: Range<usize>,
        options: &N3LexerOptions,
    ) -> Result<N3Token<'static>, TokenRecognizerError> {
-        let iri = String::from_utf8(iri).map_err(|e| {
+        let iri = string_from_utf8(iri, position.clone())?;
-            (
+        Ok(N3Token::IriRef(
-                position.clone(),
+            if let Some(base_iri) = options.base_iri.as_ref() {
-                format!("The IRI contains invalid UTF-8 characters: {e}"),
+                if self.unchecked {
-            )
+                    base_iri.resolve_unchecked(&iri)
-        })?;
+                } else {
-        let iri = if let Some(base_iri) = options.base_iri.as_ref() {
+                    base_iri.resolve(&iri)
-            base_iri.resolve(&iri)
+                }
-        } else {
+                .map_err(|e| (position, e.to_string()))?
-            Iri::parse(iri)
+                .into_inner()
-        }
+            } else if self.unchecked {
-        .map_err(|e| (position, e.to_string()))?;
+                iri
-        Ok(N3Token::IriRef(iri))
+            } else {
                Iri::parse(iri)
                    .map_err(|e| (position, e.to_string()))?
                    .into_inner()
            },
        ))
    }
-    fn recognize_pname_or_keyword(
+    fn recognize_pname_or_keyword<'a>(
-        data: &[u8],
+        &self,
        data: &'a [u8],
        is_ending: bool,
-    ) -> Option<(usize, Result<N3Token<'_>, TokenRecognizerError>)> {
+    ) -> Option<(usize, Result<N3Token<'a>, TokenRecognizerError>)> {
        // [139s] 	PNAME_NS 	::= 	PN_PREFIX? ':'
        // [140s] 	PNAME_LN 	::= 	PNAME_NS PN_LOCAL
@ -303,7 +313,8 @@ impl N3Lexer {
            ));
        }
-        let (consumed, pn_local_result) = Self::recognize_optional_pn_local(&data[i..], is_ending)?;
+        let (consumed, pn_local_result) =
            self.recognize_optional_pn_local(&data[i..], is_ending)?;
        Some((
            consumed + i,
            pn_local_result.map(|(local, might_be_invalid_iri)| N3Token::PrefixedName {
@ -314,12 +325,13 @@ impl N3Lexer {
        ))
    }
-    fn recognize_variable(
+    fn recognize_variable<'a>(
-        data: &[u8],
+        &self,
        data: &'a [u8],
        is_ending: bool,
-    ) -> Option<(usize, Result<N3Token<'_>, TokenRecognizerError>)> {
+    ) -> Option<(usize, Result<N3Token<'a>, TokenRecognizerError>)> {
        // [36] 	QUICK_VAR_NAME 	::= 	"?" PN_LOCAL
-        let (consumed, result) = Self::recognize_optional_pn_local(&data[1..], is_ending)?;
+        let (consumed, result) = self.recognize_optional_pn_local(&data[1..], is_ending)?;
        Some((
            consumed + 1,
            result.and_then(|(name, _)| {
@ -332,10 +344,11 @@ impl N3Lexer {
        ))
    }
-    fn recognize_optional_pn_local(
+    fn recognize_optional_pn_local<'a>(
-        data: &[u8],
+        &self,
        data: &'a [u8],
        is_ending: bool,
-    ) -> Option<(usize, Result<(Cow<'_, str>, bool), TokenRecognizerError>)> {
+    ) -> Option<(usize, Result<(Cow<'a, str>, bool), TokenRecognizerError>)> {
        // [168s] 	PN_LOCAL 	::= 	(PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))?
        let mut i = 0;
        let mut buffer = None; // Buffer if there are some escaped characters
@ -359,23 +372,25 @@ impl N3Lexer {
                        } else if c == '\\' {
                            i += 1;
                            let a = char::from(*data.get(i)?);
-                            if matches!(
+                            if self.unchecked
-                                a,
+                                || matches!(
-                                '_' | '~'
+                                    a,
-                                    | '.'
+                                    '_' | '~'
-                                    | '-'
+                                        | '.'
-                                    | '!'
+                                        | '-'
-                                    | '$'
+                                        | '!'
-                                    | '&'
+                                        | '$'
-                                    | '\''
+                                        | '&'
-                                    | '('
+                                        | '\''
-                                    | ')'
+                                        | '('
-                                    | '*'
+                                        | ')'
-                                    | '+'
+                                        | '*'
-                                    | ','
+                                        | '+'
-                                    | ';'
+                                        | ','
-                                    | '='
+                                        | ';'
-                            ) {
+                                        | '='
                                )
                            {
                                // ok to escape
                            } else if matches!(a, '/' | '?' | '#' | '@' | '%') {
                                // ok to escape but requires IRI validation
@ -406,12 +421,18 @@ impl N3Lexer {
                            {
                                return Some((0, Ok((Cow::Borrowed(""), false))));
                            }
-                            might_be_invalid_iri |=
+                            if !self.unchecked {
-                                Self::is_possible_pn_chars_base_but_not_valid_iri(c) || c == ':';
+                                might_be_invalid_iri |=
                                    Self::is_possible_pn_chars_base_but_not_valid_iri(c)
                                        || c == ':';
                            }
                            i += consumed;
                        } else if Self::is_possible_pn_chars(c) || c == ':' || c == '.' {
-                            might_be_invalid_iri |=
+                            if !self.unchecked {
-                                Self::is_possible_pn_chars_base_but_not_valid_iri(c) || c == ':';
+                                might_be_invalid_iri |=
                                    Self::is_possible_pn_chars_base_but_not_valid_iri(c)
                                        || c == ':';
                            }
                            i += consumed;
                        } else {
                            let buffer = if let Some(mut buffer) = buffer {
@ -518,9 +539,10 @@ impl N3Lexer {
        }
    }
-    fn recognize_lang_tag(
+    fn recognize_lang_tag<'a>(
-        data: &[u8],
+        &self,
-    ) -> Option<(usize, Result<N3Token<'_>, TokenRecognizerError>)> {
+        data: &'a [u8],
    ) -> Option<(usize, Result<N3Token<'a>, TokenRecognizerError>)> {
        // [144s] 	LANGTAG 	::= 	'@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)*
        let mut is_last_block_empty = true;
        for (i, c) in data[1..].iter().enumerate() {
@ -532,25 +554,29 @@ impl N3Lexer {
                    Err((1..2, "A language code should always start with a letter").into()),
                ));
            } else if is_last_block_empty {
-                return Some((i, Self::parse_lang_tag(&data[1..i], 1..i - 1)));
+                return Some((i, self.parse_lang_tag(&data[1..i], 1..i - 1)));
            } else if *c == b'-' {
                is_last_block_empty = true;
            } else {
-                return Some((i + 1, Self::parse_lang_tag(&data[1..=i], 1..i)));
+                return Some((i + 1, self.parse_lang_tag(&data[1..=i], 1..i)));
            }
        }
        None
    }
-    fn parse_lang_tag(
+    fn parse_lang_tag<'a>(
-        lang_tag: &[u8],
+        &self,
        lang_tag: &'a [u8],
        position: Range<usize>,
-    ) -> Result<N3Token<'_>, TokenRecognizerError> {
+    ) -> Result<N3Token<'a>, TokenRecognizerError> {
-        Ok(N3Token::LangTag(
+        let lang_tag = str_from_utf8(lang_tag, position.clone())?;
-            LanguageTag::parse(str_from_utf8(lang_tag, position.clone())?)
+        Ok(N3Token::LangTag(if self.unchecked {
            lang_tag
        } else {
            LanguageTag::parse(lang_tag)
                .map_err(|e| (position.clone(), e.to_string()))?
-                .into_inner(),
+                .into_inner()
-        ))
+        }))
    }
    fn recognize_string(
@ -933,3 +959,14 @@ fn str_from_utf8(data: &[u8], range: Range<usize>) -> Result<&str, TokenRecogniz
            .into()
    })
 }
 fn string_from_utf8(data: Vec<u8>, range: Range<usize>) -> Result<String, TokenRecognizerError> {
    String::from_utf8(data).map_err(|e| {
        (
            range.start + e.utf8_error().valid_up_to()
                ..min(range.end, range.start + e.utf8_error().valid_up_to() + 4),
            format!("Invalid UTF-8: {e}"),
        )
            .into()
    })
 }
--- a/lib/oxttl/src/line_formats.rs
+++ b/lib/oxttl/src/line_formats.rs
@ -63,7 +63,7 @@ impl RuleRecognizer for NQuadsRecognizer {
                NQuadsState::ExpectSubject => match token {
                    N3Token::IriRef(s) => {
                        self.subjects
-                            .push(NamedNode::from(s).into());
+                            .push(NamedNode::new_unchecked(s).into());
                        self.stack.push(NQuadsState::ExpectPredicate);
                        self
                    }
@ -86,7 +86,7 @@ impl RuleRecognizer for NQuadsRecognizer {
                NQuadsState::ExpectPredicate => match token {
                    N3Token::IriRef(p) => {
                        self.predicates
-                            .push(p.into());
+                            .push(NamedNode::new_unchecked(p));
                        self.stack.push(NQuadsState::ExpectedObject);
                        self
                    }
@ -98,7 +98,7 @@ impl RuleRecognizer for NQuadsRecognizer {
                NQuadsState::ExpectedObject => match token {
                    N3Token::IriRef(o) => {
                        self.objects
-                            .push(NamedNode::from(o).into());
+                            .push(NamedNode::new_unchecked(o).into());
                        self.stack
                            .push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple);
                        self
@ -155,7 +155,7 @@ impl RuleRecognizer for NQuadsRecognizer {
                        self.objects.push(
                            Literal::new_typed_literal(
                                value,
-                                d
+                                NamedNode::new_unchecked(d)
                            )
                            .into(),
                        );
@ -171,7 +171,7 @@ impl RuleRecognizer for NQuadsRecognizer {
                            N3Token::IriRef(g) if context.with_graph_name => {
                                self.emit_quad(
                                    results,
-                                    NamedNode::from(g).into(),
+                                    NamedNode::new_unchecked(g).into(),
                                );
                                self.stack.push(NQuadsState::ExpectDot);
                                self
@ -264,10 +264,11 @@ impl NQuadsRecognizer {
    pub fn new_parser(
        with_graph_name: bool,
        #[cfg(feature = "rdf-star")] with_quoted_triples: bool,
        unchecked: bool,
    ) -> Parser<Self> {
        Parser::new(
            Lexer::new(
-                N3Lexer::new(N3LexerMode::NTriples),
+                N3Lexer::new(N3LexerMode::NTriples, unchecked),
                MIN_BUFFER_SIZE,
                MAX_BUFFER_SIZE,
                true,
--- a/lib/oxttl/src/n3.rs
+++ b/lib/oxttl/src/n3.rs
@ -206,6 +206,7 @@ impl From<Quad> for N3Quad {
 #[derive(Default)]
 #[must_use]
 pub struct N3Parser {
    unchecked: bool,
    base: Option<Iri<String>>,
    prefixes: HashMap<String, Iri<String>>,
 }
@ -217,6 +218,17 @@ impl N3Parser {
        Self::default()
    }
    /// Assumes the file is valid to make parsing faster.
    ///
    /// It will skip some validations.
    ///
    /// Note that if the file is actually not valid, then broken RDF might be emitted by the parser.
    #[inline]
    pub fn unchecked(mut self) -> Self {
        self.unchecked = true;
        self
    }
    #[inline]
    pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
        self.base = Some(Iri::parse(base_iri.into())?);
@ -345,7 +357,7 @@ impl N3Parser {
    /// ```
    pub fn parse(self) -> LowLevelN3Reader {
        LowLevelN3Reader {
-            parser: N3Recognizer::new_parser(self.base, self.prefixes),
+            parser: N3Recognizer::new_parser(self.unchecked, self.base, self.prefixes),
        }
    }
 }
@ -665,8 +677,13 @@ impl RuleRecognizer for N3Recognizer {
                }
                N3State::BaseExpectIri => return match token {
                    N3Token::IriRef(iri) => {
-                        context.lexer_options.base_iri = Some(iri);
+                        match Iri::parse_unchecked(iri) {
-                        self
+                            Ok(iri) => {
                                context.lexer_options.base_iri = Some(iri);
                                self
                            }
                            Err(e) => self.error(errors, format!("Invalid base IRI: {e}"))
                        }
                    }
                    _ => self.error(errors, "The BASE keyword should be followed by an IRI"),
                },
@ -681,8 +698,13 @@ impl RuleRecognizer for N3Recognizer {
                },
                N3State::PrefixExpectIri { name } => return match token {
                    N3Token::IriRef(iri) => {
-                        context.prefixes.insert(name, iri);
+                        match Iri::parse_unchecked(iri) {
-                        self
+                            Ok(iri) => {
                                context.prefixes.insert(name, iri);
                                self
                            }
                            Err(e) => self.error(errors, format!("Invalid prefix IRI: {e}"))
                        }
                    }
                    _ => self.error(errors, "The PREFIX declaration should be followed by a prefix and its value as an IRI"),
                },
@ -843,7 +865,7 @@ impl RuleRecognizer for N3Recognizer {
                N3State::PathItem => {
                    return match token {
                        N3Token::IriRef(iri) => {
-                            self.terms.push(NamedNode::from(iri).into());
+                            self.terms.push(NamedNode::new_unchecked(iri).into());
                            self
                        }
                        N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &context.prefixes) {
@ -925,7 +947,7 @@ impl RuleRecognizer for N3Recognizer {
                }
                N3State::IriPropertyList => return match token {
                    N3Token::IriRef(id) => {
-                        self.terms.push(NamedNode::from(id).into());
+                        self.terms.push(NamedNode::new_unchecked(id).into());
                        self.stack.push(N3State::PropertyListEnd);
                        self.stack.push(N3State::PredicateObjectList);
                        self
@ -999,7 +1021,7 @@ impl RuleRecognizer for N3Recognizer {
                N3State::LiteralExpectDatatype { value } => {
                    match token {
                        N3Token::IriRef(datatype) => {
-                            self.terms.push(Literal::new_typed_literal(value, datatype).into());
+                            self.terms.push(Literal::new_typed_literal(value, NamedNode::new_unchecked(datatype)).into());
                            return self;
                        }
                        N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &context.prefixes) {
@ -1096,12 +1118,13 @@ impl RuleRecognizer for N3Recognizer {
 impl N3Recognizer {
    pub fn new_parser(
        unchecked: bool,
        base_iri: Option<Iri<String>>,
        prefixes: HashMap<String, Iri<String>>,
    ) -> Parser<Self> {
        Parser::new(
            Lexer::new(
-                N3Lexer::new(N3LexerMode::N3),
+                N3Lexer::new(N3LexerMode::N3, unchecked),
                MIN_BUFFER_SIZE,
                MAX_BUFFER_SIZE,
                true,
--- a/lib/oxttl/src/nquads.rs
+++ b/lib/oxttl/src/nquads.rs
@ -37,6 +37,7 @@ use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
 #[derive(Default)]
 #[must_use]
 pub struct NQuadsParser {
    unchecked: bool,
    #[cfg(feature = "rdf-star")]
    with_quoted_triples: bool,
 }
@ -48,6 +49,17 @@ impl NQuadsParser {
        Self::default()
    }
    /// Assumes the file is valid to make parsing faster.
    ///
    /// It will skip some validations.
    ///
    /// Note that if the file is actually not valid, then broken RDF might be emitted by the parser.
    #[inline]
    pub fn unchecked(mut self) -> Self {
        self.unchecked = true;
        self
    }
    /// Enables [N-Quads-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-quads-star).
    #[cfg(feature = "rdf-star")]
    #[inline]
@ -165,6 +177,7 @@ impl NQuadsParser {
                true,
                #[cfg(feature = "rdf-star")]
                self.with_quoted_triples,
                self.unchecked,
            ),
        }
    }
--- a/lib/oxttl/src/ntriples.rs
+++ b/lib/oxttl/src/ntriples.rs
@ -38,6 +38,7 @@ use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
 #[derive(Default)]
 #[must_use]
 pub struct NTriplesParser {
    unchecked: bool,
    #[cfg(feature = "rdf-star")]
    with_quoted_triples: bool,
 }
@ -49,6 +50,17 @@ impl NTriplesParser {
        Self::default()
    }
    /// Assumes the file is valid to make parsing faster.
    ///
    /// It will skip some validations.
    ///
    /// Note that if the file is actually not valid, then broken RDF might be emitted by the parser.    ///
    #[inline]
    pub fn unchecked(mut self) -> Self {
        self.unchecked = true;
        self
    }
    /// Enables [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star).
    #[cfg(feature = "rdf-star")]
    #[inline]
@ -166,6 +178,7 @@ impl NTriplesParser {
                false,
                #[cfg(feature = "rdf-star")]
                self.with_quoted_triples,
                self.unchecked,
            ),
        }
    }
@ -542,3 +555,26 @@ impl LowLevelNTriplesWriter {
        writeln!(write, "{} .", t.into())
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use oxrdf::{Literal, NamedNode};
    #[test]
    fn unchecked_parsing() {
        let triples = NTriplesParser::new()
            .unchecked()
            .parse_read("<foo> <bar> \"baz\"@toolonglangtag .".as_bytes())
            .collect::<Result<Vec<_>, _>>()
            .unwrap();
        assert_eq!(
            triples,
            [Triple::new(
                NamedNode::new_unchecked("foo"),
                NamedNode::new_unchecked("bar"),
                Literal::new_language_tagged_literal_unchecked("baz", "toolonglangtag"),
            )]
        )
    }
 }
--- a/lib/oxttl/src/terse.rs
+++ b/lib/oxttl/src/terse.rs
@ -107,8 +107,13 @@ impl RuleRecognizer for TriGRecognizer {
                },
                TriGState::BaseExpectIri => match token {
                    N3Token::IriRef(iri) => {
-                        context.lexer_options.base_iri = Some(iri);
+                        match Iri::parse_unchecked(iri) {
-                        self
+                            Ok(iri) => {
                                context.lexer_options.base_iri = Some(iri);
                                self
                            }
                            Err(e) => self.error(errors, format!("Invalid base IRI: {e}"))
                        }
                    }
                    _ => self.error(errors, "The BASE keyword should be followed by an IRI"),
                },
@ -123,9 +128,13 @@ impl RuleRecognizer for TriGRecognizer {
                },
                TriGState::PrefixExpectIri { name } => match token {
                    N3Token::IriRef(iri) => {
-                        context.prefixes.insert(name, iri);
+                        match Iri::parse_unchecked(iri) {
-                        self
+                            Ok(iri) => {
-                    }
+                                context.prefixes.insert(name, iri);
                                self
                            }
                            Err(e) => self.error(errors, format!("Invalid prefix IRI: {e}"))
                        }                    }
                    _ => self.error(errors, "The PREFIX declaration should be followed by a prefix and its value as an IRI"),
                },
                // [3g] 	triplesOrGraph 	::= 	labelOrSubject ( wrappedGraph | predicateObjectList '.' ) | quotedTriple predicateObjectList '.'
@ -133,7 +142,7 @@ impl RuleRecognizer for TriGRecognizer {
                TriGState::TriplesOrGraph => match token {
                    N3Token::IriRef(iri) => {
                        self.stack.push(TriGState::WrappedGraphOrPredicateObjectList {
-                            term: NamedNode::from(iri).into()
+                            term: NamedNode::new_unchecked(iri).into()
                        });
                        self
                    }
@ -291,7 +300,7 @@ impl RuleRecognizer for TriGRecognizer {
                        self
                    }
                    N3Token::IriRef(iri) => {
-                        self.cur_subject.push(NamedNode::from(iri).into());
+                        self.cur_subject.push(NamedNode::new_unchecked(iri).into());
                        self.stack.push(TriGState::PredicateObjectList);
                        self
                    }
@ -337,7 +346,7 @@ impl RuleRecognizer for TriGRecognizer {
                // [7g] 	labelOrSubject 	::= 	iri | BlankNode
                TriGState::GraphName => match token {
                    N3Token::IriRef(iri) => {
-                        self.cur_graph = NamedNode::from(iri).into();
+                        self.cur_graph = NamedNode::new_unchecked(iri).into();
                        self
                    }
                    N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &context.prefixes) {
@ -451,7 +460,7 @@ impl RuleRecognizer for TriGRecognizer {
                        self
                    }
                    N3Token::IriRef(iri) => {
-                        self.cur_predicate.push(NamedNode::from(iri));
+                        self.cur_predicate.push(NamedNode::new_unchecked(iri));
                        self
                    }
                    N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &context.prefixes) {
@ -479,7 +488,7 @@ impl RuleRecognizer for TriGRecognizer {
                // [137s] 	BlankNode 	::= 	BLANK_NODE_LABEL | ANON
                TriGState::Object => match token {
                    N3Token::IriRef(iri) => {
-                        self.cur_object.push(NamedNode::from(iri).into());
+                        self.cur_object.push(NamedNode::new_unchecked(iri).into());
                        self.emit_quad(results);
                        self
                    }
@ -626,7 +635,7 @@ impl RuleRecognizer for TriGRecognizer {
                TriGState::LiteralExpectDatatype { value, emit } => {
                    match token {
                        N3Token::IriRef(datatype) => {
-                            self.cur_object.push(Literal::new_typed_literal(value, datatype).into());
+                            self.cur_object.push(Literal::new_typed_literal(value, NamedNode::new_unchecked(datatype)).into());
                            if emit {
                                self.emit_quad(results);
                            }
@ -688,7 +697,7 @@ impl RuleRecognizer for TriGRecognizer {
                        self
                    }
                    N3Token::IriRef(iri) => {
-                        self.cur_subject.push(NamedNode::from(iri).into());
+                        self.cur_subject.push(NamedNode::new_unchecked(iri).into());
                        self
                    }
                    N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &context.prefixes) {
@ -720,7 +729,7 @@ impl RuleRecognizer for TriGRecognizer {
                        self
                    }
                    N3Token::IriRef(iri) => {
-                        self.cur_object.push(NamedNode::from(iri).into());
+                        self.cur_object.push(NamedNode::new_unchecked(iri).into());
                        self
                    }
                    N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &context.prefixes) {
@ -823,12 +832,13 @@ impl TriGRecognizer {
    pub fn new_parser(
        with_graph_name: bool,
        #[cfg(feature = "rdf-star")] with_quoted_triples: bool,
        unchecked: bool,
        base_iri: Option<Iri<String>>,
        prefixes: HashMap<String, Iri<String>>,
    ) -> Parser<Self> {
        Parser::new(
            Lexer::new(
-                N3Lexer::new(N3LexerMode::Turtle),
+                N3Lexer::new(N3LexerMode::Turtle, unchecked),
                MIN_BUFFER_SIZE,
                MAX_BUFFER_SIZE,
                true,
--- a/lib/oxttl/src/trig.rs
+++ b/lib/oxttl/src/trig.rs
@ -42,6 +42,7 @@ use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
 #[derive(Default)]
 #[must_use]
 pub struct TriGParser {
    unchecked: bool,
    base: Option<Iri<String>>,
    prefixes: HashMap<String, Iri<String>>,
    #[cfg(feature = "rdf-star")]
@ -55,6 +56,17 @@ impl TriGParser {
        Self::default()
    }
    /// Assumes the file is valid to make parsing faster.
    ///
    /// It will skip some validations.
    ///
    /// Note that if the file is actually not valid, then broken RDF might be emitted by the parser.
    #[inline]
    pub fn unchecked(mut self) -> Self {
        self.unchecked = true;
        self
    }
    #[inline]
    pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
        self.base = Some(Iri::parse(base_iri.into())?);
@ -192,6 +204,7 @@ impl TriGParser {
                true,
                #[cfg(feature = "rdf-star")]
                self.with_quoted_triples,
                self.unchecked,
                self.base,
                self.prefixes,
            ),
--- a/lib/oxttl/src/turtle.rs
+++ b/lib/oxttl/src/turtle.rs
@ -44,6 +44,7 @@ use tokio::io::{AsyncRead, AsyncWrite};
 #[derive(Default)]
 #[must_use]
 pub struct TurtleParser {
    unchecked: bool,
    base: Option<Iri<String>>,
    prefixes: HashMap<String, Iri<String>>,
    #[cfg(feature = "rdf-star")]
@ -57,6 +58,17 @@ impl TurtleParser {
        Self::default()
    }
    /// Assumes the file is valid to make parsing faster.
    ///
    /// It will skip some validations.
    ///
    /// Note that if the file is actually not valid, then broken RDF might be emitted by the parser.
    #[inline]
    pub fn unchecked(mut self) -> Self {
        self.unchecked = true;
        self
    }
    #[inline]
    pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
        self.base = Some(Iri::parse(base_iri.into())?);
@ -194,6 +206,7 @@ impl TurtleParser {
                false,
                #[cfg(feature = "rdf-star")]
                self.with_quoted_triples,
                self.unchecked,
                self.base,
                self.prefixes,
            ),
--- a/lib/spargebra/Cargo.toml
+++ b/lib/spargebra/Cargo.toml
@ -23,7 +23,7 @@ sep-0006 = []
 [dependencies]
 peg = "0.8"
 rand = "0.8"
-oxiri = "0.2"
+oxiri = "0.2.3-alpha.1"
 oxilangtag = "0.1"
 oxrdf = { version = "0.2.0-alpha.1-dev", path="../oxrdf" }
--- a/lib/src/store.rs
+++ b/lib/src/store.rs
@ -1053,7 +1053,6 @@ impl<'a> Transaction<'a> {
    /// Retrieves quads with a filter on each quad component.
    ///
    /// Usage example:
    /// Usage example:
    /// ```
    /// use oxigraph::store::{StorageError, Store};
    /// use oxigraph::model::*;
@ -1601,19 +1600,22 @@ impl BulkLoader {
    ///
    /// <div class="warning">This method is optimized for speed. See [the struct](BulkLoader) documentation for more details.</div>
    ///
-    /// Usage example:
+    /// To get better speed on valid datasets, consider enabling [`RdfParser::unchecked`] option to skip some validations.
    ///
    /// Usage example:
    /// ```
    /// use oxigraph::store::Store;
-    /// use oxigraph::io::RdfFormat;
+    /// use oxigraph::io::{RdfParser, RdfFormat};
    /// use oxigraph::model::*;
    /// use oxrdfio::RdfParser;
    ///
    /// let store = Store::new()?;
    ///
    /// // insert a dataset file (former load_dataset method)
    /// let file = b"<http://example.com> <http://example.com> <http://example.com> <http://example.com/g> .";
-    /// store.bulk_loader().load_from_read(RdfFormat::NQuads, file.as_ref())?;
+    /// store.bulk_loader().load_from_read(
    ///     RdfParser::from_format(RdfFormat::NQuads).unchecked(), // we inject a custom parser with options
    ///     file.as_ref()
    /// )?;
    ///
    /// // insert a graph file (former load_graph method)
    /// let file = b"<> <> <> .";
--- a/lints/test_debian_compatibility.py
+++ b/lints/test_debian_compatibility.py
@ -5,7 +5,7 @@ from urllib.request import urlopen
 TARGET_DEBIAN_VERSIONS = ["sid"]
 IGNORE_PACKAGES = {"oxigraph-js", "oxigraph-testsuite", "pyoxigraph", "sparql-smith"}
-ALLOWED_MISSING_PACKAGES = {"codspeed-criterion-compat", "escargot", "json-event-parser", "oxhttp", "quick-xml"}
+ALLOWED_MISSING_PACKAGES = {"codspeed-criterion-compat", "escargot", "json-event-parser", "oxhttp", "oxiri", "quick-xml"}
 base_path = Path(__file__).parent.parent