Parsers: adds "unchecked" option for faster parsing

Does not validate IRIs and language tags
pull/714/head
Tpt 9 months ago committed by Thomas Tanon
parent d170b53609
commit a84b898fda
  1. 4
      Cargo.lock
  2. 2
      cli/Cargo.toml
  3. 11
      cli/src/main.rs
  4. 45
      fuzz/fuzz_targets/nquads.rs
  5. 66
      fuzz/fuzz_targets/trig.rs
  6. 2
      lib/Cargo.toml
  7. 10
      lib/benches/store.rs
  8. 2
      lib/oxrdf/Cargo.toml
  9. 72
      lib/oxrdfio/src/parser.rs
  10. 2
      lib/oxrdfxml/Cargo.toml
  11. 130
      lib/oxrdfxml/src/parser.rs
  12. 2
      lib/oxttl/Cargo.toml
  13. 175
      lib/oxttl/src/lexer.rs
  14. 13
      lib/oxttl/src/line_formats.rs
  15. 41
      lib/oxttl/src/n3.rs
  16. 13
      lib/oxttl/src/nquads.rs
  17. 36
      lib/oxttl/src/ntriples.rs
  18. 38
      lib/oxttl/src/terse.rs
  19. 13
      lib/oxttl/src/trig.rs
  20. 13
      lib/oxttl/src/turtle.rs
  21. 2
      lib/spargebra/Cargo.toml
  22. 12
      lib/src/store.rs
  23. 2
      lints/test_debian_compatibility.py

4
Cargo.lock generated

@ -1113,9 +1113,9 @@ checksum = "8d91edf4fbb970279443471345a4e8c491bf05bb283b3e6c88e4e606fd8c181b"
[[package]]
name = "oxiri"
version = "0.2.2"
version = "0.2.3-alpha.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb175ec8981211357b7b379869c2f8d555881c55ea62311428ec0de46d89bd5c"
checksum = "b225dad32cfaa43a960b93f01fa7f87528ac07e794b80f6d9a0153e0222557e2"
[[package]]
name = "oxrdf"

@ -32,7 +32,7 @@ clap = { version = "4.0", features = ["derive"] }
oxigraph = { version = "0.4.0-alpha.1-dev", path = "../lib" }
rand = "0.8"
url = "2.4"
oxiri = "0.2"
oxiri = "0.2.3-alpha.1"
flate2 = "1.0"
rayon-core = "1.11"

@ -121,6 +121,8 @@ enum Command {
destination: PathBuf,
},
/// Load file(s) into the store.
///
/// Feel free to enable the --lenient option if you know your input is valid to get better performances.
Load {
/// Directory in which Oxigraph data are persisted.
#[arg(short, long, value_hint = ValueHint::DirPath)]
@ -143,6 +145,8 @@ enum Command {
#[arg(long, value_hint = ValueHint::Url)]
base: Option<String>,
/// Attempt to keep loading even if the data file is invalid.
///
/// This disables most of validation on RDF content.
#[arg(long)]
lenient: bool,
/// Name of the graph to load the data to.
@ -391,6 +395,7 @@ pub fn main() -> anyhow::Result<()> {
format.context("The --format option must be set when loading from stdin")?,
base.as_deref(),
graph,
lenient,
)
} else {
ThreadPoolBuilder::new()
@ -444,6 +449,7 @@ pub fn main() -> anyhow::Result<()> {
}),
base.as_deref(),
graph,
lenient,
)
} else {
bulk_load(
@ -454,6 +460,7 @@ pub fn main() -> anyhow::Result<()> {
}),
base.as_deref(),
graph,
lenient,
)
}
} {
@ -784,6 +791,7 @@ fn bulk_load(
format: RdfFormat,
base_iri: Option<&str>,
to_graph_name: Option<NamedNode>,
lenient: bool,
) -> anyhow::Result<()> {
let mut parser = RdfParser::from_format(format);
if let Some(to_graph_name) = to_graph_name {
@ -794,6 +802,9 @@ fn bulk_load(
.with_base_iri(base_iri)
.with_context(|| format!("Invalid base IRI {base_iri}"))?;
}
if lenient {
parser = parser.unchecked();
}
loader.load_from_read(parser, read)?;
Ok(())
}

@ -4,43 +4,60 @@ use libfuzzer_sys::fuzz_target;
use oxrdf::Quad;
use oxttl::{NQuadsParser, NQuadsSerializer};
fn parse<'a>(chunks: impl IntoIterator<Item = &'a [u8]>) -> (Vec<Quad>, Vec<String>) {
fn parse<'a>(
chunks: impl IntoIterator<Item = &'a [u8]>,
unchecked: bool,
) -> (Vec<Quad>, Vec<String>) {
let mut quads = Vec::new();
let mut errors = Vec::new();
let mut parser = NQuadsParser::new().with_quoted_triples().parse();
let mut parser = NQuadsParser::new().with_quoted_triples();
if unchecked {
parser = parser.unchecked();
}
let mut reader = parser.parse();
for chunk in chunks {
parser.extend_from_slice(chunk);
while let Some(result) = parser.read_next() {
reader.extend_from_slice(chunk);
while let Some(result) = reader.read_next() {
match result {
Ok(quad) => quads.push(quad),
Err(error) => errors.push(error.to_string()),
}
}
}
parser.end();
while let Some(result) = parser.read_next() {
reader.end();
while let Some(result) = reader.read_next() {
match result {
Ok(quad) => quads.push(quad),
Err(error) => errors.push(error.to_string()),
}
}
assert!(parser.is_end());
assert!(reader.is_end());
(quads, errors)
}
fuzz_target!(|data: &[u8]| {
// We parse with splitting
let (quads, errors) = parse(data.split(|c| *c == 0xFF));
let (quads, errors) = parse(data.split(|c| *c == 0xFF), false);
// We parse without splitting
let (quads_without_split, errors_without_split) = parse([data
.iter()
.copied()
.filter(|c| *c != 0xFF)
.collect::<Vec<_>>()
.as_slice()]);
let (quads_without_split, errors_without_split) = parse(
[data
.iter()
.copied()
.filter(|c| *c != 0xFF)
.collect::<Vec<_>>()
.as_slice()],
false,
);
assert_eq!(quads, quads_without_split);
assert_eq!(errors, errors_without_split);
// We test also unchecked if valid
if errors.is_empty() {
let (quads_unchecked, errors_unchecked) = parse(data.split(|c| *c == 0xFF), true);
assert!(errors_unchecked.is_empty());
assert_eq!(quads, quads_unchecked);
}
// We serialize
let mut writer = NQuadsSerializer::new().serialize_to_write(Vec::new());
for quad in &quads {

@ -4,31 +4,37 @@ use libfuzzer_sys::fuzz_target;
use oxrdf::{Dataset, GraphName, Quad, Subject, Term, Triple};
use oxttl::{TriGParser, TriGSerializer};
fn parse<'a>(chunks: impl IntoIterator<Item = &'a [u8]>) -> (Vec<Quad>, Vec<String>) {
fn parse<'a>(
chunks: impl IntoIterator<Item = &'a [u8]>,
unchecked: bool,
) -> (Vec<Quad>, Vec<String>) {
let mut quads = Vec::new();
let mut errors = Vec::new();
let mut parser = TriGParser::new()
.with_quoted_triples()
.with_base_iri("http://example.com/")
.unwrap()
.parse();
.unwrap();
if unchecked {
parser = parser.unchecked();
}
let mut reader = parser.parse();
for chunk in chunks {
parser.extend_from_slice(chunk);
while let Some(result) = parser.read_next() {
reader.extend_from_slice(chunk);
while let Some(result) = reader.read_next() {
match result {
Ok(quad) => quads.push(quad),
Err(error) => errors.push(error.to_string()),
}
}
}
parser.end();
while let Some(result) = parser.read_next() {
reader.end();
while let Some(result) = reader.read_next() {
match result {
Ok(quad) => quads.push(quad),
Err(error) => errors.push(error.to_string()),
}
}
assert!(parser.is_end());
assert!(reader.is_end());
(quads, errors)
}
@ -66,14 +72,22 @@ fn serialize_quads(quads: &[Quad]) -> Vec<u8> {
fuzz_target!(|data: &[u8]| {
// We parse with splitting
let (quads, errors) = parse(data.split(|c| *c == 0xFF));
let (quads, errors) = parse(data.split(|c| *c == 0xFF), false);
// We parse without splitting
let (quads_without_split, errors_without_split) = parse([data
.iter()
.copied()
.filter(|c| *c != 0xFF)
.collect::<Vec<_>>()
.as_slice()]);
let (quads_without_split, errors_without_split) = parse(
[data
.iter()
.copied()
.filter(|c| *c != 0xFF)
.collect::<Vec<_>>()
.as_slice()],
false,
);
let (quads_unchecked, errors_unchecked) = parse(data.split(|c| *c == 0xFF), true);
if errors.is_empty() {
assert!(errors_unchecked.is_empty());
}
let bnodes_count = quads.iter().map(count_quad_blank_nodes).sum::<usize>();
if bnodes_count == 0 {
assert_eq!(
@ -83,6 +97,15 @@ fuzz_target!(|data: &[u8]| {
String::from_utf8_lossy(&serialize_quads(&quads)),
String::from_utf8_lossy(&serialize_quads(&quads_without_split))
);
if errors.is_empty() {
assert_eq!(
quads,
quads_unchecked,
"Validating:\n{}\nUnchecked:\n{}",
String::from_utf8_lossy(&serialize_quads(&quads)),
String::from_utf8_lossy(&serialize_quads(&quads_unchecked))
);
}
} else if bnodes_count <= 4 {
let mut dataset_with_split = quads.iter().collect::<Dataset>();
let mut dataset_without_split = quads_without_split.iter().collect::<Dataset>();
@ -95,6 +118,19 @@ fuzz_target!(|data: &[u8]| {
String::from_utf8_lossy(&serialize_quads(&quads)),
String::from_utf8_lossy(&serialize_quads(&quads_without_split))
);
if errors.is_empty() {
if errors.is_empty() {
let mut dataset_unchecked = quads_unchecked.iter().collect::<Dataset>();
dataset_unchecked.canonicalize();
assert_eq!(
dataset_with_split,
dataset_unchecked,
"Validating:\n{}\nUnchecked:\n{}",
String::from_utf8_lossy(&serialize_quads(&quads)),
String::from_utf8_lossy(&serialize_quads(&quads_unchecked))
);
}
}
}
assert_eq!(errors, errors_without_split);

@ -31,7 +31,7 @@ hex = "0.4"
json-event-parser = "0.2.0-alpha.2"
md-5 = "0.10"
oxilangtag = "0.1"
oxiri = "0.2"
oxiri = "0.2.3-alpha.1"
oxrdf = { version = "0.2.0-alpha.1-dev", path = "oxrdf", features = ["rdf-star", "oxsdatatypes"] }
oxrdfio = { version = "0.1.0-alpha.1-dev", path = "oxrdfio", features = ["rdf-star"] }
oxsdatatypes = { version = "0.2.0-alpha.1-dev", path="oxsdatatypes" }

@ -24,6 +24,16 @@ fn parse_nt(c: &mut Criterion) {
}
})
});
group.bench_function("parse BSBM explore 1000 unchecked", |b| {
b.iter(|| {
for r in RdfParser::from_format(RdfFormat::NTriples)
.unchecked()
.parse_read(data.as_slice())
{
r.unwrap();
}
})
});
}
fn store_load(c: &mut Criterion) {

@ -21,7 +21,7 @@ rdf-star = []
[dependencies]
rand = "0.8"
oxilangtag = "0.1"
oxiri = "0.2"
oxiri = "0.2.3-alpha.1"
oxsdatatypes = { version = "0.2.0-alpha.1-dev", path="../oxsdatatypes", optional = true }
[lints]

@ -158,20 +158,16 @@ impl RdfParser {
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn with_base_iri(self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
Ok(Self {
inner: match self.inner {
RdfParserKind::N3(p) => RdfParserKind::N3(p),
RdfParserKind::NTriples(p) => RdfParserKind::NTriples(p),
RdfParserKind::NQuads(p) => RdfParserKind::NQuads(p),
RdfParserKind::RdfXml(p) => RdfParserKind::RdfXml(p.with_base_iri(base_iri)?),
RdfParserKind::TriG(p) => RdfParserKind::TriG(p.with_base_iri(base_iri)?),
RdfParserKind::Turtle(p) => RdfParserKind::Turtle(p.with_base_iri(base_iri)?),
},
default_graph: self.default_graph,
without_named_graphs: self.without_named_graphs,
rename_blank_nodes: self.rename_blank_nodes,
})
pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
self.inner = match self.inner {
RdfParserKind::N3(p) => RdfParserKind::N3(p),
RdfParserKind::NTriples(p) => RdfParserKind::NTriples(p),
RdfParserKind::NQuads(p) => RdfParserKind::NQuads(p),
RdfParserKind::RdfXml(p) => RdfParserKind::RdfXml(p.with_base_iri(base_iri)?),
RdfParserKind::TriG(p) => RdfParserKind::TriG(p.with_base_iri(base_iri)?),
RdfParserKind::Turtle(p) => RdfParserKind::Turtle(p.with_base_iri(base_iri)?),
};
Ok(self)
}
/// Provides the name graph name that should replace the default graph in the returned quads.
@ -190,13 +186,9 @@ impl RdfParser {
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn with_default_graph(self, default_graph: impl Into<GraphName>) -> Self {
Self {
inner: self.inner,
default_graph: default_graph.into(),
without_named_graphs: self.without_named_graphs,
rename_blank_nodes: self.rename_blank_nodes,
}
pub fn with_default_graph(mut self, default_graph: impl Into<GraphName>) -> Self {
self.default_graph = default_graph.into();
self
}
/// Sets that the parser must fail if parsing a named graph.
@ -212,13 +204,9 @@ impl RdfParser {
/// assert!(parser.parse_read(file.as_bytes()).next().unwrap().is_err());
/// ```
#[inline]
pub fn without_named_graphs(self) -> Self {
Self {
inner: self.inner,
default_graph: self.default_graph,
without_named_graphs: true,
rename_blank_nodes: self.rename_blank_nodes,
}
pub fn without_named_graphs(mut self) -> Self {
self.without_named_graphs = true;
self
}
/// Renames the blank nodes ids from the ones set in the serialization to random ids.
@ -240,13 +228,27 @@ impl RdfParser {
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn rename_blank_nodes(self) -> Self {
Self {
inner: self.inner,
default_graph: self.default_graph,
without_named_graphs: self.without_named_graphs,
rename_blank_nodes: true,
}
pub fn rename_blank_nodes(mut self) -> Self {
self.rename_blank_nodes = true;
self
}
/// Assumes the file is valid to make parsing faster.
///
/// It will skip some validations.
///
/// Note that if the file is actually not valid, then broken RDF might be emitted by the parser.
#[inline]
pub fn unchecked(mut self) -> Self {
self.inner = match self.inner {
RdfParserKind::N3(p) => RdfParserKind::N3(p.unchecked()),
RdfParserKind::NTriples(p) => RdfParserKind::NTriples(p.unchecked()),
RdfParserKind::NQuads(p) => RdfParserKind::NQuads(p.unchecked()),
RdfParserKind::RdfXml(p) => RdfParserKind::RdfXml(p.unchecked()),
RdfParserKind::TriG(p) => RdfParserKind::TriG(p.unchecked()),
RdfParserKind::Turtle(p) => RdfParserKind::Turtle(p.unchecked()),
};
self
}
/// Parses from a [`Read`] implementation and returns an iterator of quads.

@ -21,7 +21,7 @@ async-tokio = ["dep:tokio", "quick-xml/async-tokio"]
[dependencies]
oxrdf = { version = "0.2.0-alpha.1-dev", path = "../oxrdf" }
oxilangtag = "0.1"
oxiri = "0.2"
oxiri = "0.2.3-alpha.1"
quick-xml = ">=0.29, <0.32"
tokio = { version = "1.29", optional = true, features = ["io-util"] }

@ -52,6 +52,7 @@ use tokio::io::{AsyncRead, BufReader as AsyncBufReader};
#[derive(Default)]
#[must_use]
pub struct RdfXmlParser {
unchecked: bool,
base: Option<Iri<String>>,
}
@ -62,6 +63,17 @@ impl RdfXmlParser {
Self::default()
}
/// Assumes the file is valid to make parsing faster.
///
/// It will skip some validations.
///
/// Note that if the file is actually not valid, then broken RDF might be emitted by the parser.
#[inline]
pub fn unchecked(mut self) -> Self {
self.unchecked = true;
self
}
#[inline]
pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
self.base = Some(Iri::parse(base_iri.into())?);
@ -158,6 +170,7 @@ impl RdfXmlParser {
in_literal_depth: 0,
known_rdf_id: HashSet::default(),
is_end: false,
unchecked: self.unchecked,
}
}
}
@ -414,6 +427,7 @@ struct RdfXmlReader<R> {
in_literal_depth: usize,
known_rdf_id: HashSet<String>,
is_end: bool,
unchecked: bool,
}
impl<R> RdfXmlReader<R> {
@ -551,19 +565,28 @@ impl<R> RdfXmlReader<R> {
let attribute = attribute.map_err(Error::InvalidAttr)?;
if attribute.key.as_ref().starts_with(b"xml") {
if attribute.key.as_ref() == b"xml:lang" {
let tag = self.convert_attribute(&attribute)?;
language = Some(
let tag = self.convert_attribute(&attribute)?.to_ascii_lowercase();
language = Some(if self.unchecked {
tag
} else {
LanguageTag::parse(tag.to_ascii_lowercase())
.map_err(|error| SyntaxError {
inner: SyntaxErrorKind::InvalidLanguageTag { tag, error },
})?
.into_inner(),
);
.into_inner()
});
} else if attribute.key.as_ref() == b"xml:base" {
let iri = self.convert_attribute(&attribute)?;
base_iri = Some(Iri::parse(iri.clone()).map_err(|error| SyntaxError {
inner: SyntaxErrorKind::InvalidIri { iri, error },
})?)
base_iri = Some(
if self.unchecked {
Iri::parse_unchecked(iri.clone())
} else {
Iri::parse(iri.clone())
}
.map_err(|error| SyntaxError {
inner: SyntaxErrorKind::InvalidIri { iri, error },
})?,
)
} else {
// We ignore other xml attributes
}
@ -622,12 +645,7 @@ impl<R> RdfXmlReader<R> {
.into());
} else {
property_attrs.push((
NamedNode::new(attribute_url.clone()).map_err(|error| SyntaxError {
inner: SyntaxErrorKind::InvalidIri {
iri: attribute_url,
error,
},
})?,
self.parse_iri(attribute_url)?,
self.convert_attribute(&attribute)?,
));
}
@ -637,7 +655,7 @@ impl<R> RdfXmlReader<R> {
//Parsing with the base URI
let id_attr = match id_attr {
Some(iri) => {
let iri = resolve(&base_iri, iri)?;
let iri = self.resolve_iri(&base_iri, iri)?;
if self.known_rdf_id.contains(iri.as_str()) {
return Err(SyntaxError::msg(format!(
"{} has already been used as rdf:ID value",
@ -701,12 +719,7 @@ impl<R> RdfXmlReader<R> {
.into());
} else {
Self::build_node_elt(
NamedNode::new(tag_name.clone()).map_err(|error| SyntaxError {
inner: SyntaxErrorKind::InvalidIri {
iri: tag_name,
error,
},
})?,
self.parse_iri(tag_name)?,
base_iri,
language,
id_attr,
@ -727,12 +740,7 @@ impl<R> RdfXmlReader<R> {
.into());
}
Self::build_node_elt(
NamedNode::new(tag_name.clone()).map_err(|error| SyntaxError {
inner: SyntaxErrorKind::InvalidIri {
iri: tag_name,
error,
},
})?,
self.parse_iri(tag_name)?,
base_iri,
language,
id_attr,
@ -766,12 +774,7 @@ impl<R> RdfXmlReader<R> {
))
.into());
} else {
NamedNode::new(tag_name.clone()).map_err(|error| SyntaxError {
inner: SyntaxErrorKind::InvalidIri {
iri: tag_name,
error,
},
})?
self.parse_iri(tag_name)?
};
match parse_type {
RdfXmlParseType::Default => {
@ -1156,32 +1159,51 @@ impl<R> RdfXmlReader<R> {
base_iri: &Option<Iri<String>>,
attribute: &Attribute<'_>,
) -> Result<NamedNode, ParseError> {
Ok(resolve(base_iri, self.convert_attribute(attribute)?)?)
Ok(self.resolve_iri(base_iri, self.convert_attribute(attribute)?)?)
}
fn resolve_entity(&self, e: &str) -> Option<&str> {
self.custom_entities.get(e).map(String::as_str)
fn resolve_iri(
&self,
base_iri: &Option<Iri<String>>,
relative_iri: String,
) -> Result<NamedNode, SyntaxError> {
if let Some(base_iri) = base_iri {
Ok(NamedNode::new_unchecked(
if self.unchecked {
base_iri.resolve_unchecked(&relative_iri)
} else {
base_iri.resolve(&relative_iri)
}
.map_err(|error| SyntaxError {
inner: SyntaxErrorKind::InvalidIri {
iri: relative_iri,
error,
},
})?
.into_inner(),
))
} else {
self.parse_iri(relative_iri)
}
}
}
fn resolve(base_iri: &Option<Iri<String>>, relative_iri: String) -> Result<NamedNode, SyntaxError> {
if let Some(base_iri) = base_iri {
Ok(base_iri
.resolve(&relative_iri)
.map_err(|error| SyntaxError {
inner: SyntaxErrorKind::InvalidIri {
iri: relative_iri,
error,
},
})?
.into())
} else {
NamedNode::new(relative_iri.clone()).map_err(|error| SyntaxError {
inner: SyntaxErrorKind::InvalidIri {
iri: relative_iri,
error,
},
})
fn parse_iri(&self, relative_iri: String) -> Result<NamedNode, SyntaxError> {
Ok(NamedNode::new_unchecked(if self.unchecked {
relative_iri
} else {
Iri::parse(relative_iri.clone())
.map_err(|error| SyntaxError {
inner: SyntaxErrorKind::InvalidIri {
iri: relative_iri,
error,
},
})?
.into_inner()
}))
}
fn resolve_entity(&self, e: &str) -> Option<&str> {
self.custom_entities.get(e).map(String::as_str)
}
}

@ -22,7 +22,7 @@ async-tokio = ["dep:tokio"]
[dependencies]
memchr = "2.5"
oxrdf = { version = "0.2.0-alpha.1-dev", path = "../oxrdf" }
oxiri = "0.2"
oxiri = "0.2.3-alpha.1"
oxilangtag = "0.1"
tokio = { version = "1.29", optional = true, features = ["io-util"] }

@ -6,12 +6,12 @@ use oxrdf::NamedNode;
use std::borrow::Cow;
use std::cmp::min;
use std::collections::HashMap;
use std::ops::{Range, RangeInclusive};
use std::ops::Range;
use std::str;
#[derive(Debug, PartialEq, Eq)]
pub enum N3Token<'a> {
IriRef(Iri<String>),
IriRef(String),
PrefixedName {
prefix: &'a str,
local: Cow<'a, str>,
@ -42,6 +42,7 @@ pub struct N3LexerOptions {
pub struct N3Lexer {
mode: N3LexerMode,
unchecked: bool,
}
// TODO: there are a lot of 'None' (missing data) returned even if the stream is ending!!!
@ -61,7 +62,7 @@ impl TokenRecognizer for N3Lexer {
b'<' => match *data.get(1)? {
b'<' => Some((2, Ok(N3Token::Punctuation("<<")))),
b'=' if self.mode == N3LexerMode::N3 => {
if let Some((consumed, result)) = Self::recognize_iri(data, options) {
if let Some((consumed, result)) = self.recognize_iri(data, options) {
Some(if let Ok(result) = result {
(consumed, Ok(result))
} else {
@ -74,7 +75,7 @@ impl TokenRecognizer for N3Lexer {
}
}
b'-' if self.mode == N3LexerMode::N3 => {
if let Some((consumed, result)) = Self::recognize_iri(data, options) {
if let Some((consumed, result)) = self.recognize_iri(data, options) {
Some(if let Ok(result) = result {
(consumed, Ok(result))
} else {
@ -86,7 +87,7 @@ impl TokenRecognizer for N3Lexer {
None
}
}
_ => Self::recognize_iri(data, options),
_ => self.recognize_iri(data, options),
},
b'>' => {
if *data.get(1)? == b'>' {
@ -119,7 +120,7 @@ impl TokenRecognizer for N3Lexer {
Self::recognize_string(data, b'\'')
}
}
b'@' => Self::recognize_lang_tag(data),
b'@' => self.recognize_lang_tag(data),
b'.' => match data.get(1) {
Some(b'0'..=b'9') => Self::recognize_number(data),
Some(_) => Some((1, Ok(N3Token::Punctuation(".")))),
@ -162,18 +163,19 @@ impl TokenRecognizer for N3Lexer {
}
}
b'0'..=b'9' | b'+' | b'-' => Self::recognize_number(data),
b'?' => Self::recognize_variable(data, is_ending),
_ => Self::recognize_pname_or_keyword(data, is_ending),
b'?' => self.recognize_variable(data, is_ending),
_ => self.recognize_pname_or_keyword(data, is_ending),
}
}
}
impl N3Lexer {
pub fn new(mode: N3LexerMode) -> Self {
Self { mode }
pub fn new(mode: N3LexerMode, unchecked: bool) -> Self {
Self { mode, unchecked }
}
fn recognize_iri(
&self,
data: &[u8],
options: &N3LexerOptions,
) -> Option<(usize, Result<N3Token<'static>, TokenRecognizerError>)> {
@ -186,7 +188,8 @@ impl N3Lexer {
i += end;
match data[i] {
b'>' => {
return Some((i + 1, Self::parse_iri(string, 0..=i, options)));
#[allow(clippy::range_plus_one)]
return Some((i + 1, self.parse_iri(string, 0..i + 1, options)));
}
b'\\' => {
let (additional, c) = Self::recognize_escape(&data[i..], i, false)?;
@ -205,29 +208,36 @@ impl N3Lexer {
}
fn parse_iri(
&self,
iri: Vec<u8>,
position: RangeInclusive<usize>,
position: Range<usize>,
options: &N3LexerOptions,
) -> Result<N3Token<'static>, TokenRecognizerError> {
let iri = String::from_utf8(iri).map_err(|e| {
(
position.clone(),
format!("The IRI contains invalid UTF-8 characters: {e}"),
)
})?;
let iri = if let Some(base_iri) = options.base_iri.as_ref() {
base_iri.resolve(&iri)
} else {
Iri::parse(iri)
}
.map_err(|e| (position, e.to_string()))?;
Ok(N3Token::IriRef(iri))
let iri = string_from_utf8(iri, position.clone())?;
Ok(N3Token::IriRef(
if let Some(base_iri) = options.base_iri.as_ref() {
if self.unchecked {
base_iri.resolve_unchecked(&iri)
} else {
base_iri.resolve(&iri)
}
.map_err(|e| (position, e.to_string()))?
.into_inner()
} else if self.unchecked {
iri
} else {
Iri::parse(iri)
.map_err(|e| (position, e.to_string()))?
.into_inner()
},
))
}
fn recognize_pname_or_keyword(
data: &[u8],
fn recognize_pname_or_keyword<'a>(
&self,
data: &'a [u8],
is_ending: bool,
) -> Option<(usize, Result<N3Token<'_>, TokenRecognizerError>)> {
) -> Option<(usize, Result<N3Token<'a>, TokenRecognizerError>)> {
// [139s] PNAME_NS ::= PN_PREFIX? ':'
// [140s] PNAME_LN ::= PNAME_NS PN_LOCAL
@ -303,7 +313,8 @@ impl N3Lexer {
));
}
let (consumed, pn_local_result) = Self::recognize_optional_pn_local(&data[i..], is_ending)?;
let (consumed, pn_local_result) =
self.recognize_optional_pn_local(&data[i..], is_ending)?;
Some((
consumed + i,
pn_local_result.map(|(local, might_be_invalid_iri)| N3Token::PrefixedName {
@ -314,12 +325,13 @@ impl N3Lexer {
))
}
fn recognize_variable(
data: &[u8],
fn recognize_variable<'a>(
&self,
data: &'a [u8],
is_ending: bool,
) -> Option<(usize, Result<N3Token<'_>, TokenRecognizerError>)> {
) -> Option<(usize, Result<N3Token<'a>, TokenRecognizerError>)> {
// [36] QUICK_VAR_NAME ::= "?" PN_LOCAL
let (consumed, result) = Self::recognize_optional_pn_local(&data[1..], is_ending)?;
let (consumed, result) = self.recognize_optional_pn_local(&data[1..], is_ending)?;
Some((
consumed + 1,
result.and_then(|(name, _)| {
@ -332,10 +344,11 @@ impl N3Lexer {
))
}
fn recognize_optional_pn_local(
data: &[u8],
fn recognize_optional_pn_local<'a>(
&self,
data: &'a [u8],
is_ending: bool,
) -> Option<(usize, Result<(Cow<'_, str>, bool), TokenRecognizerError>)> {
) -> Option<(usize, Result<(Cow<'a, str>, bool), TokenRecognizerError>)> {
// [168s] PN_LOCAL ::= (PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))?
let mut i = 0;
let mut buffer = None; // Buffer if there are some escaped characters
@ -359,23 +372,25 @@ impl N3Lexer {
} else if c == '\\' {
i += 1;
let a = char::from(*data.get(i)?);
if matches!(
a,
'_' | '~'
| '.'
| '-'
| '!'
| '$'
| '&'
| '\''
| '('
| ')'
| '*'
| '+'
| ','
| ';'
| '='
) {
if self.unchecked
|| matches!(
a,
'_' | '~'
| '.'
| '-'
| '!'
| '$'
| '&'
| '\''
| '('
| ')'
| '*'
| '+'
| ','
| ';'
| '='
)
{
// ok to escape
} else if matches!(a, '/' | '?' | '#' | '@' | '%') {
// ok to escape but requires IRI validation
@ -406,12 +421,18 @@ impl N3Lexer {
{
return Some((0, Ok((Cow::Borrowed(""), false))));
}
might_be_invalid_iri |=
Self::is_possible_pn_chars_base_but_not_valid_iri(c) || c == ':';
if !self.unchecked {
might_be_invalid_iri |=
Self::is_possible_pn_chars_base_but_not_valid_iri(c)
|| c == ':';
}
i += consumed;
} else if Self::is_possible_pn_chars(c) || c == ':' || c == '.' {
might_be_invalid_iri |=
Self::is_possible_pn_chars_base_but_not_valid_iri(c) || c == ':';
if !self.unchecked {
might_be_invalid_iri |=
Self::is_possible_pn_chars_base_but_not_valid_iri(c)
|| c == ':';
}
i += consumed;
} else {
let buffer = if let Some(mut buffer) = buffer {
@ -518,9 +539,10 @@ impl N3Lexer {
}
}
fn recognize_lang_tag(
data: &[u8],
) -> Option<(usize, Result<N3Token<'_>, TokenRecognizerError>)> {
fn recognize_lang_tag<'a>(
&self,
data: &'a [u8],
) -> Option<(usize, Result<N3Token<'a>, TokenRecognizerError>)> {
// [144s] LANGTAG ::= '@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)*
let mut is_last_block_empty = true;
for (i, c) in data[1..].iter().enumerate() {
@ -532,25 +554,29 @@ impl N3Lexer {
Err((1..2, "A language code should always start with a letter").into()),
));
} else if is_last_block_empty {
return Some((i, Self::parse_lang_tag(&data[1..i], 1..i - 1)));
return Some((i, self.parse_lang_tag(&data[1..i], 1..i - 1)));
} else if *c == b'-' {
is_last_block_empty = true;
} else {
return Some((i + 1, Self::parse_lang_tag(&data[1..=i], 1..i)));
return Some((i + 1, self.parse_lang_tag(&data[1..=i], 1..i)));
}
}
None
}
fn parse_lang_tag(
lang_tag: &[u8],
fn parse_lang_tag<'a>(
&self,
lang_tag: &'a [u8],
position: Range<usize>,
) -> Result<N3Token<'_>, TokenRecognizerError> {
Ok(N3Token::LangTag(
LanguageTag::parse(str_from_utf8(lang_tag, position.clone())?)
) -> Result<N3Token<'a>, TokenRecognizerError> {
let lang_tag = str_from_utf8(lang_tag, position.clone())?;
Ok(N3Token::LangTag(if self.unchecked {
lang_tag
} else {
LanguageTag::parse(lang_tag)
.map_err(|e| (position.clone(), e.to_string()))?
.into_inner(),
))
.into_inner()
}))
}
fn recognize_string(
@ -933,3 +959,14 @@ fn str_from_utf8(data: &[u8], range: Range<usize>) -> Result<&str, TokenRecogniz
.into()
})
}
fn string_from_utf8(data: Vec<u8>, range: Range<usize>) -> Result<String, TokenRecognizerError> {
String::from_utf8(data).map_err(|e| {
(
range.start + e.utf8_error().valid_up_to()
..min(range.end, range.start + e.utf8_error().valid_up_to() + 4),
format!("Invalid UTF-8: {e}"),
)
.into()
})
}

@ -63,7 +63,7 @@ impl RuleRecognizer for NQuadsRecognizer {
NQuadsState::ExpectSubject => match token {
N3Token::IriRef(s) => {
self.subjects
.push(NamedNode::from(s).into());
.push(NamedNode::new_unchecked(s).into());
self.stack.push(NQuadsState::ExpectPredicate);
self
}
@ -86,7 +86,7 @@ impl RuleRecognizer for NQuadsRecognizer {
NQuadsState::ExpectPredicate => match token {
N3Token::IriRef(p) => {
self.predicates
.push(p.into());
.push(NamedNode::new_unchecked(p));
self.stack.push(NQuadsState::ExpectedObject);
self
}
@ -98,7 +98,7 @@ impl RuleRecognizer for NQuadsRecognizer {
NQuadsState::ExpectedObject => match token {
N3Token::IriRef(o) => {
self.objects
.push(NamedNode::from(o).into());
.push(NamedNode::new_unchecked(o).into());
self.stack
.push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple);
self
@ -155,7 +155,7 @@ impl RuleRecognizer for NQuadsRecognizer {
self.objects.push(
Literal::new_typed_literal(
value,
d
NamedNode::new_unchecked(d)
)
.into(),
);
@ -171,7 +171,7 @@ impl RuleRecognizer for NQuadsRecognizer {
N3Token::IriRef(g) if context.with_graph_name => {
self.emit_quad(
results,
NamedNode::from(g).into(),
NamedNode::new_unchecked(g).into(),
);
self.stack.push(NQuadsState::ExpectDot);
self
@ -264,10 +264,11 @@ impl NQuadsRecognizer {
pub fn new_parser(
with_graph_name: bool,
#[cfg(feature = "rdf-star")] with_quoted_triples: bool,
unchecked: bool,
) -> Parser<Self> {
Parser::new(
Lexer::new(
N3Lexer::new(N3LexerMode::NTriples),
N3Lexer::new(N3LexerMode::NTriples, unchecked),
MIN_BUFFER_SIZE,
MAX_BUFFER_SIZE,
true,

@ -206,6 +206,7 @@ impl From<Quad> for N3Quad {
#[derive(Default)]
#[must_use]
pub struct N3Parser {
unchecked: bool,
base: Option<Iri<String>>,
prefixes: HashMap<String, Iri<String>>,
}
@ -217,6 +218,17 @@ impl N3Parser {
Self::default()
}
/// Assumes the file is valid to make parsing faster.
///
/// It will skip some validations.
///
/// Note that if the file is actually not valid, then broken RDF might be emitted by the parser.
#[inline]
pub fn unchecked(mut self) -> Self {
self.unchecked = true;
self
}
#[inline]
pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
self.base = Some(Iri::parse(base_iri.into())?);
@ -345,7 +357,7 @@ impl N3Parser {
/// ```
pub fn parse(self) -> LowLevelN3Reader {
LowLevelN3Reader {
parser: N3Recognizer::new_parser(self.base, self.prefixes),
parser: N3Recognizer::new_parser(self.unchecked, self.base, self.prefixes),
}
}
}
@ -665,8 +677,13 @@ impl RuleRecognizer for N3Recognizer {
}
N3State::BaseExpectIri => return match token {
N3Token::IriRef(iri) => {
context.lexer_options.base_iri = Some(iri);
self
match Iri::parse_unchecked(iri) {
Ok(iri) => {
context.lexer_options.base_iri = Some(iri);
self
}
Err(e) => self.error(errors, format!("Invalid base IRI: {e}"))
}
}
_ => self.error(errors, "The BASE keyword should be followed by an IRI"),
},
@ -681,8 +698,13 @@ impl RuleRecognizer for N3Recognizer {
},
N3State::PrefixExpectIri { name } => return match token {
N3Token::IriRef(iri) => {
context.prefixes.insert(name, iri);
self
match Iri::parse_unchecked(iri) {
Ok(iri) => {
context.prefixes.insert(name, iri);
self
}
Err(e) => self.error(errors, format!("Invalid prefix IRI: {e}"))
}
}
_ => self.error(errors, "The PREFIX declaration should be followed by a prefix and its value as an IRI"),
},
@ -843,7 +865,7 @@ impl RuleRecognizer for N3Recognizer {
N3State::PathItem => {
return match token {
N3Token::IriRef(iri) => {
self.terms.push(NamedNode::from(iri).into());
self.terms.push(NamedNode::new_unchecked(iri).into());
self
}
N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &context.prefixes) {
@ -925,7 +947,7 @@ impl RuleRecognizer for N3Recognizer {
}
N3State::IriPropertyList => return match token {
N3Token::IriRef(id) => {
self.terms.push(NamedNode::from(id).into());
self.terms.push(NamedNode::new_unchecked(id).into());
self.stack.push(N3State::PropertyListEnd);
self.stack.push(N3State::PredicateObjectList);
self
@ -999,7 +1021,7 @@ impl RuleRecognizer for N3Recognizer {
N3State::LiteralExpectDatatype { value } => {
match token {
N3Token::IriRef(datatype) => {
self.terms.push(Literal::new_typed_literal(value, datatype).into());
self.terms.push(Literal::new_typed_literal(value, NamedNode::new_unchecked(datatype)).into());
return self;
}
N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &context.prefixes) {
@ -1096,12 +1118,13 @@ impl RuleRecognizer for N3Recognizer {
impl N3Recognizer {
pub fn new_parser(
unchecked: bool,
base_iri: Option<Iri<String>>,
prefixes: HashMap<String, Iri<String>>,
) -> Parser<Self> {
Parser::new(
Lexer::new(
N3Lexer::new(N3LexerMode::N3),
N3Lexer::new(N3LexerMode::N3, unchecked),
MIN_BUFFER_SIZE,
MAX_BUFFER_SIZE,
true,

@ -37,6 +37,7 @@ use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
#[derive(Default)]
#[must_use]
pub struct NQuadsParser {
unchecked: bool,
#[cfg(feature = "rdf-star")]
with_quoted_triples: bool,
}
@ -48,6 +49,17 @@ impl NQuadsParser {
Self::default()
}
/// Assumes the file is valid to make parsing faster.
///
/// It will skip some validations.
///
/// Note that if the file is actually not valid, then broken RDF might be emitted by the parser.
#[inline]
pub fn unchecked(mut self) -> Self {
self.unchecked = true;
self
}
/// Enables [N-Quads-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-quads-star).
#[cfg(feature = "rdf-star")]
#[inline]
@ -165,6 +177,7 @@ impl NQuadsParser {
true,
#[cfg(feature = "rdf-star")]
self.with_quoted_triples,
self.unchecked,
),
}
}

@ -38,6 +38,7 @@ use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
#[derive(Default)]
#[must_use]
pub struct NTriplesParser {
unchecked: bool,
#[cfg(feature = "rdf-star")]
with_quoted_triples: bool,
}
@ -49,6 +50,17 @@ impl NTriplesParser {
Self::default()
}
/// Assumes the file is valid to make parsing faster.
///
/// It will skip some validations.
///
/// Note that if the file is actually not valid, then broken RDF might be emitted by the parser. ///
#[inline]
pub fn unchecked(mut self) -> Self {
self.unchecked = true;
self
}
/// Enables [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star).
#[cfg(feature = "rdf-star")]
#[inline]
@ -166,6 +178,7 @@ impl NTriplesParser {
false,
#[cfg(feature = "rdf-star")]
self.with_quoted_triples,
self.unchecked,
),
}
}
@ -542,3 +555,26 @@ impl LowLevelNTriplesWriter {
writeln!(write, "{} .", t.into())
}
}
#[cfg(test)]
mod tests {
use super::*;
use oxrdf::{Literal, NamedNode};
#[test]
fn unchecked_parsing() {
let triples = NTriplesParser::new()
.unchecked()
.parse_read("<foo> <bar> \"baz\"@toolonglangtag .".as_bytes())
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(
triples,
[Triple::new(
NamedNode::new_unchecked("foo"),
NamedNode::new_unchecked("bar"),
Literal::new_language_tagged_literal_unchecked("baz", "toolonglangtag"),
)]
)
}
}

@ -107,8 +107,13 @@ impl RuleRecognizer for TriGRecognizer {
},
TriGState::BaseExpectIri => match token {
N3Token::IriRef(iri) => {
context.lexer_options.base_iri = Some(iri);
self
match Iri::parse_unchecked(iri) {
Ok(iri) => {
context.lexer_options.base_iri = Some(iri);
self
}
Err(e) => self.error(errors, format!("Invalid base IRI: {e}"))
}
}
_ => self.error(errors, "The BASE keyword should be followed by an IRI"),
},
@ -123,9 +128,13 @@ impl RuleRecognizer for TriGRecognizer {
},
TriGState::PrefixExpectIri { name } => match token {
N3Token::IriRef(iri) => {
context.prefixes.insert(name, iri);
self
}
match Iri::parse_unchecked(iri) {
Ok(iri) => {
context.prefixes.insert(name, iri);
self
}
Err(e) => self.error(errors, format!("Invalid prefix IRI: {e}"))
} }
_ => self.error(errors, "The PREFIX declaration should be followed by a prefix and its value as an IRI"),
},
// [3g] triplesOrGraph ::= labelOrSubject ( wrappedGraph | predicateObjectList '.' ) | quotedTriple predicateObjectList '.'
@ -133,7 +142,7 @@ impl RuleRecognizer for TriGRecognizer {
TriGState::TriplesOrGraph => match token {
N3Token::IriRef(iri) => {
self.stack.push(TriGState::WrappedGraphOrPredicateObjectList {
term: NamedNode::from(iri).into()
term: NamedNode::new_unchecked(iri).into()
});
self
}
@ -291,7 +300,7 @@ impl RuleRecognizer for TriGRecognizer {
self
}
N3Token::IriRef(iri) => {
self.cur_subject.push(NamedNode::from(iri).into());
self.cur_subject.push(NamedNode::new_unchecked(iri).into());
self.stack.push(TriGState::PredicateObjectList);
self
}
@ -337,7 +346,7 @@ impl RuleRecognizer for TriGRecognizer {
// [7g] labelOrSubject ::= iri | BlankNode
TriGState::GraphName => match token {
N3Token::IriRef(iri) => {
self.cur_graph = NamedNode::from(iri).into();
self.cur_graph = NamedNode::new_unchecked(iri).into();
self
}
N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &context.prefixes) {
@ -451,7 +460,7 @@ impl RuleRecognizer for TriGRecognizer {
self
}
N3Token::IriRef(iri) => {
self.cur_predicate.push(NamedNode::from(iri));
self.cur_predicate.push(NamedNode::new_unchecked(iri));
self
}
N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &context.prefixes) {
@ -479,7 +488,7 @@ impl RuleRecognizer for TriGRecognizer {
// [137s] BlankNode ::= BLANK_NODE_LABEL | ANON
TriGState::Object => match token {
N3Token::IriRef(iri) => {
self.cur_object.push(NamedNode::from(iri).into());
self.cur_object.push(NamedNode::new_unchecked(iri).into());
self.emit_quad(results);
self
}
@ -626,7 +635,7 @@ impl RuleRecognizer for TriGRecognizer {
TriGState::LiteralExpectDatatype { value, emit } => {
match token {
N3Token::IriRef(datatype) => {
self.cur_object.push(Literal::new_typed_literal(value, datatype).into());
self.cur_object.push(Literal::new_typed_literal(value, NamedNode::new_unchecked(datatype)).into());
if emit {
self.emit_quad(results);
}
@ -688,7 +697,7 @@ impl RuleRecognizer for TriGRecognizer {
self
}
N3Token::IriRef(iri) => {
self.cur_subject.push(NamedNode::from(iri).into());
self.cur_subject.push(NamedNode::new_unchecked(iri).into());
self
}
N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &context.prefixes) {
@ -720,7 +729,7 @@ impl RuleRecognizer for TriGRecognizer {
self
}
N3Token::IriRef(iri) => {
self.cur_object.push(NamedNode::from(iri).into());
self.cur_object.push(NamedNode::new_unchecked(iri).into());
self
}
N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &context.prefixes) {
@ -823,12 +832,13 @@ impl TriGRecognizer {
pub fn new_parser(
with_graph_name: bool,
#[cfg(feature = "rdf-star")] with_quoted_triples: bool,
unchecked: bool,
base_iri: Option<Iri<String>>,
prefixes: HashMap<String, Iri<String>>,
) -> Parser<Self> {
Parser::new(
Lexer::new(
N3Lexer::new(N3LexerMode::Turtle),
N3Lexer::new(N3LexerMode::Turtle, unchecked),
MIN_BUFFER_SIZE,
MAX_BUFFER_SIZE,
true,

@ -42,6 +42,7 @@ use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
#[derive(Default)]
#[must_use]
pub struct TriGParser {
unchecked: bool,
base: Option<Iri<String>>,
prefixes: HashMap<String, Iri<String>>,
#[cfg(feature = "rdf-star")]
@ -55,6 +56,17 @@ impl TriGParser {
Self::default()
}
/// Assumes the file is valid to make parsing faster.
///
/// It will skip some validations.
///
/// Note that if the file is actually not valid, then broken RDF might be emitted by the parser.
#[inline]
pub fn unchecked(mut self) -> Self {
self.unchecked = true;
self
}
#[inline]
pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
self.base = Some(Iri::parse(base_iri.into())?);
@ -192,6 +204,7 @@ impl TriGParser {
true,
#[cfg(feature = "rdf-star")]
self.with_quoted_triples,
self.unchecked,
self.base,
self.prefixes,
),

@ -44,6 +44,7 @@ use tokio::io::{AsyncRead, AsyncWrite};
#[derive(Default)]
#[must_use]
pub struct TurtleParser {
unchecked: bool,
base: Option<Iri<String>>,
prefixes: HashMap<String, Iri<String>>,
#[cfg(feature = "rdf-star")]
@ -57,6 +58,17 @@ impl TurtleParser {
Self::default()
}
/// Assumes the file is valid to make parsing faster.
///
/// It will skip some validations.
///
/// Note that if the file is actually not valid, then broken RDF might be emitted by the parser.
#[inline]
pub fn unchecked(mut self) -> Self {
self.unchecked = true;
self
}
#[inline]
pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
self.base = Some(Iri::parse(base_iri.into())?);
@ -194,6 +206,7 @@ impl TurtleParser {
false,
#[cfg(feature = "rdf-star")]
self.with_quoted_triples,
self.unchecked,
self.base,
self.prefixes,
),

@ -23,7 +23,7 @@ sep-0006 = []
[dependencies]
peg = "0.8"
rand = "0.8"
oxiri = "0.2"
oxiri = "0.2.3-alpha.1"
oxilangtag = "0.1"
oxrdf = { version = "0.2.0-alpha.1-dev", path="../oxrdf" }

@ -1053,7 +1053,6 @@ impl<'a> Transaction<'a> {
/// Retrieves quads with a filter on each quad component.
///
/// Usage example:
/// Usage example:
/// ```
/// use oxigraph::store::{StorageError, Store};
/// use oxigraph::model::*;
@ -1601,19 +1600,22 @@ impl BulkLoader {
///
/// <div class="warning">This method is optimized for speed. See [the struct](BulkLoader) documentation for more details.</div>
///
/// Usage example:
/// To get better speed on valid datasets, consider enabling [`RdfParser::unchecked`] option to skip some validations.
///
/// Usage example:
/// ```
/// use oxigraph::store::Store;
/// use oxigraph::io::RdfFormat;
/// use oxigraph::io::{RdfParser, RdfFormat};
/// use oxigraph::model::*;
/// use oxrdfio::RdfParser;
///
/// let store = Store::new()?;
///
/// // insert a dataset file (former load_dataset method)
/// let file = b"<http://example.com> <http://example.com> <http://example.com> <http://example.com/g> .";
/// store.bulk_loader().load_from_read(RdfFormat::NQuads, file.as_ref())?;
/// store.bulk_loader().load_from_read(
/// RdfParser::from_format(RdfFormat::NQuads).unchecked(), // we inject a custom parser with options
/// file.as_ref()
/// )?;
///
/// // insert a graph file (former load_graph method)
/// let file = b"<> <> <> .";

@ -5,7 +5,7 @@ from urllib.request import urlopen
TARGET_DEBIAN_VERSIONS = ["sid"]
IGNORE_PACKAGES = {"oxigraph-js", "oxigraph-testsuite", "pyoxigraph", "sparql-smith"}
ALLOWED_MISSING_PACKAGES = {"codspeed-criterion-compat", "escargot", "json-event-parser", "oxhttp", "quick-xml"}
ALLOWED_MISSING_PACKAGES = {"codspeed-criterion-compat", "escargot", "json-event-parser", "oxhttp", "oxiri", "quick-xml"}
base_path = Path(__file__).parent.parent

Loading…
Cancel
Save