New N3/Turtle/TriG/N-Triple/N-Quad parsers and serializers

- Compatible with async IO
- Turtle/TriG parser recovery on simple errors
pull/555/head
Tpt 2 years ago committed by Thomas Tanon
parent a1cbfdf67d
commit 71b1768d28
  1. 6
      .clusterfuzzlite/build.sh
  2. 6
      .github/workflows/tests.yml
  3. 4
      .gitmodules
  4. 16
      Cargo.lock
  5. 1
      Cargo.toml
  6. 13
      fuzz/Cargo.toml
  7. 28
      fuzz/fuzz_targets/n3.rs
  8. 49
      fuzz/fuzz_targets/nquads.rs
  9. 53
      fuzz/fuzz_targets/trig.rs
  10. 8
      lib/Cargo.toml
  11. 6
      lib/benches/store.rs
  12. 27
      lib/oxttl/Cargo.toml
  13. 938
      lib/oxttl/src/lexer.rs
  14. 19
      lib/oxttl/src/lib.rs
  15. 305
      lib/oxttl/src/line_formats.rs
  16. 1035
      lib/oxttl/src/n3.rs
  17. 393
      lib/oxttl/src/nquads.rs
  18. 389
      lib/oxttl/src/ntriples.rs
  19. 932
      lib/oxttl/src/terse.rs
  20. 280
      lib/oxttl/src/toolkit/lexer.rs
  21. 11
      lib/oxttl/src/toolkit/mod.rs
  22. 244
      lib/oxttl/src/toolkit/parser.rs
  23. 666
      lib/oxttl/src/trig.rs
  24. 462
      lib/oxttl/src/turtle.rs
  25. 30
      lib/src/io/error.rs
  26. 9
      lib/src/io/mod.rs
  27. 318
      lib/src/io/read.rs
  28. 151
      lib/src/io/write.rs
  29. 2
      lib/src/sparql/update.rs
  30. 16
      lib/src/store.rs
  31. 8
      python/src/io.rs
  32. 2
      python/tests/test_io.py
  33. 3
      python/tests/test_store.py
  34. 2
      server/src/main.rs
  35. 10
      testsuite/Cargo.toml
  36. 1
      testsuite/N3
  37. 194
      testsuite/benches/parser.rs
  38. 2
      testsuite/oxigraph-tests/parser-recovery/invalid_bnode.nt
  39. 2
      testsuite/oxigraph-tests/parser-recovery/invalid_iri.nt
  40. 2
      testsuite/oxigraph-tests/parser-recovery/invalid_string.nt
  41. 2
      testsuite/oxigraph-tests/parser-recovery/iri2_spo.nt
  42. 2
      testsuite/oxigraph-tests/parser-recovery/iri2_string_spo.nt
  43. 1
      testsuite/oxigraph-tests/parser-recovery/iri_spo.nt
  44. 1
      testsuite/oxigraph-tests/parser-recovery/iri_string_spo.nt
  45. 129
      testsuite/oxigraph-tests/parser-recovery/manifest.ttl
  46. 1
      testsuite/oxigraph-tests/parser-recovery/missing_dot_at_end_of_triple_with_iri_end.nt
  47. 2
      testsuite/oxigraph-tests/parser-recovery/missing_dot_at_end_of_triple_with_iri_middle.nt
  48. 1
      testsuite/oxigraph-tests/parser-recovery/missing_dot_at_end_of_triple_with_string_end.nt
  49. 2
      testsuite/oxigraph-tests/parser-recovery/missing_dot_at_end_of_triple_with_string_middle.nt
  50. 2
      testsuite/oxigraph-tests/parser/at_keywords_as_lang_tag.nt
  51. 3
      testsuite/oxigraph-tests/parser/at_keywords_as_lang_tag.ttl
  52. 1
      testsuite/oxigraph-tests/parser/bad_lang.ttl
  53. 2
      testsuite/oxigraph-tests/parser/bad_parentheses.ttl
  54. 2
      testsuite/oxigraph-tests/parser/blank_node_with_linebreak.nt
  55. 6
      testsuite/oxigraph-tests/parser/blank_node_with_linebreak.ttl
  56. 3
      testsuite/oxigraph-tests/parser/keyword_vs_prefix.nq
  57. 2
      testsuite/oxigraph-tests/parser/keyword_vs_prefix.nt
  58. 10
      testsuite/oxigraph-tests/parser/keyword_vs_prefix.trig
  59. 8
      testsuite/oxigraph-tests/parser/keyword_vs_prefix.ttl
  60. 1
      testsuite/oxigraph-tests/parser/language_normalization.nt
  61. 4
      testsuite/oxigraph-tests/parser/language_normalization.rdf
  62. 1
      testsuite/oxigraph-tests/parser/language_normalization.ttl
  63. 1
      testsuite/oxigraph-tests/parser/literal_value_space.nt
  64. 7
      testsuite/oxigraph-tests/parser/literal_value_space.rdf
  65. 90
      testsuite/oxigraph-tests/parser/manifest.ttl
  66. 1
      testsuite/oxigraph-tests/parser/no_end_line_jump.nt
  67. 1
      testsuite/oxigraph-tests/parser/xml_entities.nt
  68. 10
      testsuite/oxigraph-tests/parser/xml_entities.rdf
  69. 1
      testsuite/oxigraph-tests/parser/xml_nested_entities.nt
  70. 15
      testsuite/oxigraph-tests/parser/xml_nested_entities.rdf
  71. 13
      testsuite/serd-tests/LICENSE
  72. 1
      testsuite/serd-tests/README.md
  73. 2
      testsuite/serd-tests/bad/bad-00.ttl
  74. 3
      testsuite/serd-tests/bad/bad-01.ttl
  75. 3
      testsuite/serd-tests/bad/bad-02.ttl
  76. 3
      testsuite/serd-tests/bad/bad-03.ttl
  77. 3
      testsuite/serd-tests/bad/bad-04.ttl
  78. 4
      testsuite/serd-tests/bad/bad-05.ttl
  79. 3
      testsuite/serd-tests/bad/bad-06.ttl
  80. 4
      testsuite/serd-tests/bad/bad-07.ttl
  81. 2
      testsuite/serd-tests/bad/bad-08.ttl
  82. 3
      testsuite/serd-tests/bad/bad-09.ttl
  83. 3
      testsuite/serd-tests/bad/bad-10.ttl
  84. 3
      testsuite/serd-tests/bad/bad-11.ttl
  85. 3
      testsuite/serd-tests/bad/bad-12.ttl
  86. 3
      testsuite/serd-tests/bad/bad-13.ttl
  87. 6
      testsuite/serd-tests/bad/bad-14.ttl
  88. 1
      testsuite/serd-tests/bad/bad-base.ttl
  89. 1
      testsuite/serd-tests/bad/bad-blank-syntax.ttl
  90. 3
      testsuite/serd-tests/bad/bad-blank.ttl
  91. 3
      testsuite/serd-tests/bad/bad-bom.ttl
  92. 3
      testsuite/serd-tests/bad/bad-char-in-local.ttl
  93. 1
      testsuite/serd-tests/bad/bad-char-in-prefix.ttl
  94. 1
      testsuite/serd-tests/bad/bad-char-in-uri.ttl
  95. 1
      testsuite/serd-tests/bad/bad-datatype-syntax.ttl
  96. 1
      testsuite/serd-tests/bad/bad-datatype.ttl
  97. 1
      testsuite/serd-tests/bad/bad-dot-after-subject.ttl
  98. 1
      testsuite/serd-tests/bad/bad-dot-in-collection.ttl
  99. 3
      testsuite/serd-tests/bad/bad-eof-after-quotes.ttl
  100. 3
      testsuite/serd-tests/bad/bad-eof-at-string-start.ttl
  101. Some files were not shown because too many files have changed in this diff Show More

@ -15,10 +15,14 @@ function build_seed_corpus() {
cd "$SRC"/oxigraph
cargo fuzz build -O --debug-assertions
for TARGET in sparql_eval sparql_results_json sparql_results_tsv # sparql_results_xml https://github.com/tafia/quick-xml/issues/608
for TARGET in sparql_eval sparql_results_json sparql_results_tsv n3 nquads trig # sparql_results_xml https://github.com/tafia/quick-xml/issues/608
do
cp fuzz/target/x86_64-unknown-linux-gnu/release/$TARGET "$OUT"/
done
build_seed_corpus sparql_results_json srj
build_seed_corpus sparql_results_tsv tsv
build_seed_corpus sparql_results_xml srx
build_seed_corpus n3 n3
build_seed_corpus nquads nq
build_seed_corpus trig trig

@ -32,6 +32,8 @@ jobs:
working-directory: ./lib/oxsdatatypes
- run: cargo clippy
working-directory: ./lib/oxrdf
- run: cargo clippy
working-directory: ./lib/oxttl
- run: cargo clippy
working-directory: ./lib/sparesults
- run: cargo clippy
@ -74,6 +76,8 @@ jobs:
working-directory: ./lib/oxsdatatypes
- run: cargo clippy -- -D warnings -D clippy::all
working-directory: ./lib/oxrdf
- run: cargo clippy -- -D warnings -D clippy::all
working-directory: ./lib/oxttl
- run: cargo clippy -- -D warnings -D clippy::all
working-directory: ./lib/sparesults
- run: cargo clippy -- -D warnings -D clippy::all
@ -123,7 +127,7 @@ jobs:
- run: rustup update
- uses: Swatinem/rust-cache@v2
- run: cargo install cargo-semver-checks || true
- run: cargo semver-checks check-release --exclude oxrocksdb-sys --exclude oxigraph_js --exclude pyoxigraph --exclude oxigraph_testsuite --exclude oxigraph_server --exclude sparopt
- run: cargo semver-checks check-release --exclude oxrocksdb-sys --exclude oxigraph_js --exclude pyoxigraph --exclude oxigraph_testsuite --exclude oxigraph_server --exclude oxttl --exclude sparopt
test_linux:
runs-on: ubuntu-latest

4
.gitmodules vendored

@ -13,3 +13,7 @@
[submodule "oxrocksdb-sys/lz4"]
path = oxrocksdb-sys/lz4
url = https://github.com/lz4/lz4.git
[submodule "testsuite/N3"]
path = testsuite/N3
url = https://github.com/w3c/N3.git
branch = master

16
Cargo.lock generated

@ -948,10 +948,10 @@ dependencies = [
"oxrdf",
"oxrocksdb-sys",
"oxsdatatypes",
"oxttl",
"rand",
"regex",
"rio_api",
"rio_turtle",
"rio_xml",
"sha-1",
"sha2",
@ -998,7 +998,11 @@ version = "0.0.0"
dependencies = [
"anyhow",
"clap",
"criterion",
"oxigraph",
"oxttl",
"rio_api",
"rio_turtle",
"spargebra",
"sparopt",
"text-diff",
@ -1043,6 +1047,16 @@ dependencies = [
"js-sys",
]
[[package]]
name = "oxttl"
version = "0.1.0"
dependencies = [
"memchr",
"oxilangtag",
"oxiri",
"oxrdf",
]
[[package]]
name = "parking_lot"
version = "0.12.1"

@ -4,6 +4,7 @@ members = [
"lib",
"lib/oxrdf",
"lib/oxsdatatypes",
"lib/oxttl",
"lib/spargebra",
"lib/sparesults",
"lib/sparopt",

@ -12,6 +12,7 @@ cargo-fuzz = true
anyhow = "1"
lazy_static = "1"
libfuzzer-sys = "0.4"
oxttl = { path = "../lib/oxttl", features = ["rdf-star"] }
spargebra = { path = "../lib/spargebra", features = ["rdf-star", "sep-0006"] }
sparesults = { path = "../lib/sparesults", features = ["rdf-star"] }
sparql-smith = { path = "../lib/sparql-smith", features = ["sep-0006"] }
@ -23,6 +24,14 @@ debug = true
[workspace]
[[bin]]
name = "nquads"
path = "fuzz_targets/nquads.rs"
[[bin]]
name = "n3"
path = "fuzz_targets/n3.rs"
[[bin]]
name = "sparql_eval"
path = "fuzz_targets/sparql_eval.rs"
@ -46,3 +55,7 @@ path = "fuzz_targets/sparql_results_xml.rs"
[[bin]]
name = "sparql_results_tsv"
path = "fuzz_targets/sparql_results_tsv.rs"
[[bin]]
name = "trig"
path = "fuzz_targets/trig.rs"

@ -0,0 +1,28 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use oxttl::N3Parser;
fuzz_target!(|data: &[u8]| {
let mut quads = Vec::new();
let mut parser = N3Parser::new()
.with_base_iri("http://example.com/")
.unwrap()
.parse();
for chunk in data.split(|c| *c == 0xFF) {
parser.extend_from_slice(chunk);
while let Some(result) = parser.read_next() {
if let Ok(quad) = result {
quads.push(quad);
}
}
}
parser.end();
while let Some(result) = parser.read_next() {
if let Ok(quad) = result {
quads.push(quad);
}
}
assert!(parser.is_end());
//TODO: serialize
});

@ -0,0 +1,49 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use oxttl::{NQuadsParser, NQuadsSerializer};
fuzz_target!(|data: &[u8]| {
// We parse
let mut quads = Vec::new();
let mut parser = NQuadsParser::new().with_quoted_triples().parse();
for chunk in data.split(|c| *c == 0xFF) {
parser.extend_from_slice(chunk);
while let Some(result) = parser.read_next() {
if let Ok(quad) = result {
quads.push(quad);
}
}
}
parser.end();
while let Some(result) = parser.read_next() {
if let Ok(quad) = result {
quads.push(quad);
}
}
assert!(parser.is_end());
// We serialize
let mut writer = NQuadsSerializer::new().serialize_to_write(Vec::new());
for quad in &quads {
writer.write_quad(quad).unwrap();
}
let new_serialization = writer.finish();
// We parse the serialization
let new_quads = NQuadsParser::new()
.with_quoted_triples()
.parse_from_read(new_serialization.as_slice())
.collect::<Result<Vec<_>, _>>()
.map_err(|e| {
format!(
"Error on {:?} from {quads:?} based on {:?}: {e}",
String::from_utf8_lossy(&new_serialization),
String::from_utf8_lossy(data)
)
})
.unwrap();
// We check the roundtrip has not changed anything
assert_eq!(new_quads, quads);
});

@ -0,0 +1,53 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use oxttl::{TriGParser, TriGSerializer};
fuzz_target!(|data: &[u8]| {
// We parse
let mut quads = Vec::new();
let mut parser = TriGParser::new()
.with_quoted_triples()
.with_base_iri("http://example.com/")
.unwrap()
.parse();
for chunk in data.split(|c| *c == 0xFF) {
parser.extend_from_slice(chunk);
while let Some(result) = parser.read_next() {
if let Ok(quad) = result {
quads.push(quad);
}
}
}
parser.end();
while let Some(result) = parser.read_next() {
if let Ok(quad) = result {
quads.push(quad);
}
}
assert!(parser.is_end());
// We serialize
let mut writer = TriGSerializer::new().serialize_to_write(Vec::new());
for quad in &quads {
writer.write_quad(quad).unwrap();
}
let new_serialization = writer.finish().unwrap();
// We parse the serialization
let new_quads = TriGParser::new()
.with_quoted_triples()
.parse_from_read(new_serialization.as_slice())
.collect::<Result<Vec<_>, _>>()
.map_err(|e| {
format!(
"Error on {:?} from {quads:?} based on {:?}: {e}",
String::from_utf8_lossy(&new_serialization),
String::from_utf8_lossy(data)
)
})
.unwrap();
// We check the roundtrip has not changed anything
assert_eq!(new_quads, quads);
});

@ -32,17 +32,17 @@ regex = "1"
oxilangtag = "0.1"
oxiri = "0.2"
rio_api = "0.8"
rio_turtle = "0.8"
rio_xml = "0.8"
hex = "0.4"
siphasher = "0.3"
lazy_static = "1"
json-event-parser = "0.1"
oxrdf = { version = "0.2.0-alpha.1-dev", path="oxrdf", features = ["rdf-star", "oxsdatatypes"] }
oxrdf = { version = "0.2.0-alpha.1-dev", path = "oxrdf", features = ["rdf-star", "oxsdatatypes"] }
oxsdatatypes = { version = "0.2.0-alpha.1-dev", path="oxsdatatypes" }
spargebra = { version = "0.3.0-alpha.1-dev", path="spargebra", features = ["rdf-star", "sep-0002", "sep-0006"] }
oxttl = { version = "0.1.0" , path = "oxttl", features = ["rdf-star"] }
spargebra = { version = "0.3.0-alpha.1-dev", path = "spargebra", features = ["rdf-star", "sep-0002", "sep-0006"] }
sparopt = { version = "0.1.0-alpha.1-dev", path="sparopt", features = ["rdf-star", "sep-0002", "sep-0006"] }
sparesults = { version = "0.2.0-alpha.1-dev", path="sparesults", features = ["rdf-star"] }
sparesults = { version = "0.2.0-alpha.1-dev", path = "sparesults", features = ["rdf-star"] }
[target.'cfg(not(target_family = "wasm"))'.dependencies]
libc = "0.2"

@ -7,7 +7,7 @@ use oxigraph::store::Store;
use rand::random;
use std::env::temp_dir;
use std::fs::{remove_dir_all, File};
use std::io::{BufRead, BufReader, Cursor, Read};
use std::io::{BufRead, BufReader, Read};
use std::path::{Path, PathBuf};
fn store_load(c: &mut Criterion) {
@ -64,7 +64,7 @@ fn store_load(c: &mut Criterion) {
fn do_load(store: &Store, data: &[u8]) {
store
.load_graph(
Cursor::new(&data),
data,
GraphFormat::NTriples,
GraphNameRef::DefaultGraph,
None,
@ -77,7 +77,7 @@ fn do_bulk_load(store: &Store, data: &[u8]) {
store
.bulk_loader()
.load_graph(
Cursor::new(&data),
data,
GraphFormat::NTriples,
GraphNameRef::DefaultGraph,
None,

@ -0,0 +1,27 @@
[package]
name = "oxttl"
version = "0.1.0"
authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license = "MIT OR Apache-2.0"
readme = "README.md"
keywords = ["SPARQL"]
repository = "https://github.com/oxigraph/oxigraph/tree/master/lib/oxttl"
homepage = "https://oxigraph.org/"
description = """
N-Triples parser
"""
edition = "2021"
rust-version = "1.65"
[features]
default = []
rdf-star = ["oxrdf/rdf-star"]
[dependencies]
memchr = "2"
oxrdf = { version = "0.2.0-alpha.1-dev", path = "../oxrdf" }
oxiri = "0.2"
oxilangtag = "0.1"
[package.metadata.docs.rs]
all-features = true

@ -0,0 +1,938 @@
use crate::toolkit::{TokenRecognizer, TokenRecognizerError};
use memchr::{memchr, memchr2};
use oxilangtag::LanguageTag;
use oxiri::Iri;
use oxrdf::NamedNode;
use std::borrow::Cow;
use std::collections::HashMap;
use std::ops::{Range, RangeInclusive};
use std::str;
#[derive(Debug, PartialEq, Eq)]
pub enum N3Token<'a> {
IriRef(Iri<String>),
PrefixedName {
prefix: &'a str,
local: Cow<'a, str>,
might_be_invalid_iri: bool,
},
Variable(Cow<'a, str>),
BlankNodeLabel(&'a str),
String(String),
Integer(&'a str),
Decimal(&'a str),
Double(&'a str),
LangTag(&'a str),
Punctuation(&'a str),
PlainKeyword(&'a str),
}
#[derive(Eq, PartialEq)]
pub enum N3LexerMode {
NTriples,
Turtle,
N3,
}
#[derive(Default)]
pub struct N3LexerOptions {
pub base_iri: Option<Iri<String>>,
}
pub struct N3Lexer {
mode: N3LexerMode,
}
// TODO: there are a lot of 'None' (missing data) returned even if the stream is ending!!!
// TODO: simplify by not giving is_end and fail with an "unexpected eof" is none is returned when is_end=true?
impl TokenRecognizer for N3Lexer {
type Token<'a> = N3Token<'a>;
type Options = N3LexerOptions;
fn recognize_next_token<'a>(
&mut self,
data: &'a [u8],
is_ending: bool,
options: &N3LexerOptions,
) -> Option<(usize, Result<N3Token<'a>, TokenRecognizerError>)> {
match *data.first()? {
b'<' => match *data.get(1)? {
b'<' => Some((2, Ok(N3Token::Punctuation("<<")))),
b'=' if self.mode == N3LexerMode::N3 => {
if let Some((consumed, result)) = Self::recognize_iri(data, options) {
Some(if let Ok(result) = result {
(consumed, Ok(result))
} else {
(2, Ok(N3Token::Punctuation("<=")))
})
} else if is_ending {
Some((2, Ok(N3Token::Punctuation("<="))))
} else {
None
}
}
b'-' if self.mode == N3LexerMode::N3 => {
if let Some((consumed, result)) = Self::recognize_iri(data, options) {
Some(if let Ok(result) = result {
(consumed, Ok(result))
} else {
(2, Ok(N3Token::Punctuation("<-")))
})
} else if is_ending {
Some((2, Ok(N3Token::Punctuation("<-"))))
} else {
None
}
}
_ => Self::recognize_iri(data, options),
},
b'>' => {
if *data.get(1)? == b'>' {
Some((2, Ok(N3Token::Punctuation(">>"))))
} else {
Some((1, Ok(N3Token::Punctuation(">"))))
}
}
b'_' => match data.get(1)? {
b':' => Self::recognize_blank_node_label(data),
c => Some((
1,
Err((0, format!("Unexpected character '{}'", char::from(*c))).into()),
)),
},
b'"' => {
if self.mode != N3LexerMode::NTriples
&& *data.get(1)? == b'"'
&& *data.get(2)? == b'"'
{
Self::recognize_long_string(data, b'"')
} else {
Self::recognize_string(data, b'"')
}
}
b'\'' if self.mode != N3LexerMode::NTriples => {
if *data.get(1)? == b'\'' && *data.get(2)? == b'\'' {
Self::recognize_long_string(data, b'\'')
} else {
Self::recognize_string(data, b'\'')
}
}
b'@' => Self::recognize_lang_tag(data),
b'.' => match data.get(1) {
Some(b'0'..=b'9') => Self::recognize_number(data),
Some(_) => Some((1, Ok(N3Token::Punctuation(".")))),
None => is_ending.then_some((1, Ok(N3Token::Punctuation(".")))),
},
b'^' => {
if *data.get(1)? == b'^' {
Some((2, Ok(N3Token::Punctuation("^^"))))
} else {
Some((1, Ok(N3Token::Punctuation("^"))))
}
}
b'(' => Some((1, Ok(N3Token::Punctuation("(")))),
b')' => Some((1, Ok(N3Token::Punctuation(")")))),
b'[' => Some((1, Ok(N3Token::Punctuation("[")))),
b']' => Some((1, Ok(N3Token::Punctuation("]")))),
b'{' => {
if *data.get(1)? == b'|' {
Some((2, Ok(N3Token::Punctuation("{|"))))
} else {
Some((1, Ok(N3Token::Punctuation("{"))))
}
}
b'}' => Some((1, Ok(N3Token::Punctuation("}")))),
b',' => Some((1, Ok(N3Token::Punctuation(",")))),
b';' => Some((1, Ok(N3Token::Punctuation(";")))),
b'!' => Some((1, Ok(N3Token::Punctuation("!")))),
b'|' => {
if *data.get(1)? == b'}' {
Some((2, Ok(N3Token::Punctuation("|}"))))
} else {
Some((1, Ok(N3Token::Punctuation("|"))))
}
}
b'=' => {
if *data.get(1)? == b'>' {
Some((2, Ok(N3Token::Punctuation("=>"))))
} else {
Some((1, Ok(N3Token::Punctuation("="))))
}
}
b'0'..=b'9' | b'+' | b'-' => Self::recognize_number(data),
b'?' => Self::recognize_variable(data, is_ending),
_ => Self::recognize_pname_or_keyword(data, is_ending),
}
}
}
impl N3Lexer {
pub fn new(mode: N3LexerMode) -> Self {
Self { mode }
}
fn recognize_iri(
data: &[u8],
options: &N3LexerOptions,
) -> Option<(usize, Result<N3Token<'static>, TokenRecognizerError>)> {
// [18] IRIREF ::= '<' ([^#x00-#x20<>"{}|^`\] | UCHAR)* '>' /* #x00=NULL #01-#x1F=control codes #x20=space */
let mut string = Vec::new();
let mut i = 1;
loop {
let end = memchr2(b'>', b'\\', &data[i..])?;
string.extend_from_slice(&data[i..i + end]);
i += end;
match data[i] {
b'>' => {
return Some((i + 1, Self::parse_iri(string, 0..=i, options)));
}
b'\\' => {
let (additional, c) = Self::recognize_escape(&data[i..], i, false)?;
i += additional + 1;
match c {
Ok(c) => {
let mut buf = [0; 4];
string.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
}
Err(e) => return Some((i, Err(e))),
}
}
_ => unreachable!(),
}
}
}
fn parse_iri(
iri: Vec<u8>,
position: RangeInclusive<usize>,
options: &N3LexerOptions,
) -> Result<N3Token<'static>, TokenRecognizerError> {
let iri = String::from_utf8(iri).map_err(|e| {
(
position.clone(),
format!("The IRI contains invalid UTF-8 characters: {e}"),
)
})?;
let iri = if let Some(base_iri) = options.base_iri.as_ref() {
base_iri.resolve(&iri)
} else {
Iri::parse(iri)
}
.map_err(|e| (position, e.to_string()))?;
Ok(N3Token::IriRef(iri))
}
fn recognize_pname_or_keyword(
data: &[u8],
is_ending: bool,
) -> Option<(usize, Result<N3Token<'_>, TokenRecognizerError>)> {
// [139s] PNAME_NS ::= PN_PREFIX? ':'
// [140s] PNAME_LN ::= PNAME_NS PN_LOCAL
// [167s] PN_PREFIX ::= PN_CHARS_BASE ((PN_CHARS | '.')* PN_CHARS)?
let mut i = 0;
loop {
if let Some(r) = Self::recognize_unicode_char(&data[i..], i) {
match r {
Ok((c, consumed)) => {
if c == ':' {
i += consumed;
break;
} else if i == 0 {
if !Self::is_possible_pn_chars_base(c) {
return Some((
consumed,
Err((
0..consumed,
format!(
"'{c}' is not allowed at the beginning of a prefix name"
),
)
.into()),
));
}
i += consumed;
} else if Self::is_possible_pn_chars(c) || c == '.' {
i += consumed;
} else {
while data[..i].ends_with(b".") {
i -= 1;
}
return Some((
i,
Ok(N3Token::PlainKeyword(str::from_utf8(&data[..i]).unwrap())),
));
}
}
Err(e) => return Some((e.position.end, Err(e))),
}
} else if is_ending {
while data[..i].ends_with(b".") {
i -= 1;
}
return Some(if i == 0 {
(
1,
Err((0..1, format!("Unexpected byte {}", data[0])).into()),
)
} else {
(
i,
Ok(N3Token::PlainKeyword(str::from_utf8(&data[..i]).unwrap())),
)
});
} else {
return None;
}
}
let pn_prefix = str::from_utf8(&data[..i - 1]).unwrap();
if pn_prefix.ends_with('.') {
return Some((
i,
Err((
0..i,
format!(
"'{pn_prefix}' is not a valid prefix: prefixes are not allowed to end with '.'"),
)
.into()),
));
}
let (consumed, pn_local_result) = Self::recognize_optional_pn_local(&data[i..], is_ending)?;
Some((
consumed + i,
pn_local_result.map(|(local, might_be_invalid_iri)| N3Token::PrefixedName {
prefix: pn_prefix,
local,
might_be_invalid_iri,
}),
))
}
fn recognize_variable(
data: &[u8],
is_ending: bool,
) -> Option<(usize, Result<N3Token<'_>, TokenRecognizerError>)> {
// [36] QUICK_VAR_NAME ::= "?" PN_LOCAL
let (consumed, result) = Self::recognize_optional_pn_local(&data[1..], is_ending)?;
Some((
consumed + 1,
result.and_then(|(name, _)| {
if name.is_empty() {
Err((0..consumed, "A variable name is not allowed to be empty").into())
} else {
Ok(N3Token::Variable(name))
}
}),
))
}
fn recognize_optional_pn_local(
data: &[u8],
is_ending: bool,
) -> Option<(usize, Result<(Cow<'_, str>, bool), TokenRecognizerError>)> {
// [168s] PN_LOCAL ::= (PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))?
let mut i = 0;
let mut buffer = None; // Buffer if there are some escaped characters
let mut position_that_is_already_in_buffer = 0;
let mut might_be_invalid_iri = false;
loop {
if let Some(r) = Self::recognize_unicode_char(&data[i..], i) {
match r {
Ok((c, consumed)) => {
if c == '%' {
i += 1;
let a = char::from(*data.get(i)?);
i += 1;
let b = char::from(*data.get(i)?);
if !matches!(a, '0'..='9' | 'A'..='F' | 'a'..='f')
|| !matches!(b, '0'..='9' | 'A'..='F' | 'a'..='f')
{
return Some((i + 1, Err((
i - 2..=i, format!("escapes in IRIs should be % followed by two hexadecimal characters, found '%{a}{b}'")
).into())));
}
i += 1;
} else if c == '\\' {
i += 1;
let a = char::from(*data.get(i)?);
if matches!(
a,
'_' | '~'
| '.'
| '-'
| '!'
| '$'
| '&'
| '\''
| '('
| ')'
| '*'
| '+'
| ','
| ';'
| '='
) {
// ok to escape
} else if matches!(a, '/' | '?' | '#' | '@' | '%') {
// ok to escape but requires IRI validation
might_be_invalid_iri = true;
} else {
return Some((i + 1, Err((
i..=i, format!("The character that are allowed to be escaped in IRIs are _~.-!$&'()*+,;=/?#@%, found '{a}'")
).into())));
}
let buffer = buffer.get_or_insert_with(String::new);
// We add the missing bytes
if i - position_that_is_already_in_buffer > 1 {
buffer.push_str(
str::from_utf8(
&data[position_that_is_already_in_buffer..i - 1],
)
.unwrap(),
)
}
buffer.push(a);
i += 1;
position_that_is_already_in_buffer = i;
} else if i == 0 {
if !(Self::is_possible_pn_chars_u(c) || c == ':' || c.is_ascii_digit())
{
return Some((0, Ok((Cow::Borrowed(""), false))));
}
might_be_invalid_iri |=
Self::is_possible_pn_chars_base_but_not_valid_iri(c) || c == ':';
i += consumed;
} else if Self::is_possible_pn_chars(c) || c == ':' || c == '.' {
might_be_invalid_iri |=
Self::is_possible_pn_chars_base_but_not_valid_iri(c) || c == ':';
i += consumed;
} else {
let buffer = if let Some(mut buffer) = buffer {
buffer.push_str(
str::from_utf8(&data[position_that_is_already_in_buffer..i])
.unwrap(),
);
// We do not include the last dot
while buffer.ends_with('.') {
buffer.pop();
i -= 1;
}
Cow::Owned(buffer)
} else {
let mut data = str::from_utf8(&data[..i]).unwrap();
// We do not include the last dot
while let Some(d) = data.strip_suffix('.') {
data = d;
i -= 1;
}
Cow::Borrowed(data)
};
return Some((i, Ok((buffer, might_be_invalid_iri))));
}
}
Err(e) => return Some((e.position.end, Err(e))),
}
} else if is_ending {
let buffer = if let Some(mut buffer) = buffer {
// We do not include the last dot
while buffer.ends_with('.') {
buffer.pop();
i -= 1;
}
Cow::Owned(buffer)
} else {
let mut data = str::from_utf8(&data[..i]).unwrap();
// We do not include the last dot
while let Some(d) = data.strip_suffix('.') {
data = d;
i -= 1;
}
Cow::Borrowed(data)
};
return Some((i, Ok((buffer, might_be_invalid_iri))));
} else {
return None;
}
}
}
fn recognize_blank_node_label(
data: &[u8],
) -> Option<(usize, Result<N3Token<'_>, TokenRecognizerError>)> {
// [141s] BLANK_NODE_LABEL ::= '_:' (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)?
let mut i = 2;
loop {
match Self::recognize_unicode_char(&data[i..], i)? {
Ok((c, consumed)) => {
if (i == 2 && (Self::is_possible_pn_chars_u(c) || c.is_ascii_digit()))
|| (i > 2 && Self::is_possible_pn_chars(c))
{
// Ok
} else if i > 2 && c == '.' {
if data[i - 1] == b'.' {
i -= 1;
return Some((
i,
Ok(N3Token::BlankNodeLabel(
str::from_utf8(&data[2..i]).unwrap(),
)),
));
}
} else if i == 0 {
return Some((
i,
Err((0..i, "A blank node ID should not be empty").into()),
));
} else if data[i - 1] == b'.' {
i -= 1;
return Some((
i,
Ok(N3Token::BlankNodeLabel(
str::from_utf8(&data[2..i]).unwrap(),
)),
));
} else {
return Some((
i,
Ok(N3Token::BlankNodeLabel(
str::from_utf8(&data[2..i]).unwrap(),
)),
));
}
i += consumed;
}
Err(e) => return Some((e.position.end, Err(e))),
}
}
}
fn recognize_lang_tag(
data: &[u8],
) -> Option<(usize, Result<N3Token<'_>, TokenRecognizerError>)> {
// [144s] LANGTAG ::= '@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)*
let mut is_last_block_empty = true;
for (i, c) in data[1..].iter().enumerate() {
if c.is_ascii_alphabetic() {
is_last_block_empty = false;
} else if i == 0 {
return Some((
1,
Err((1..2, "A language code should always start with a letter").into()),
));
} else if is_last_block_empty {
return Some((i, Self::parse_lang_tag(&data[1..i], 1..i - 1)));
} else if *c == b'-' {
is_last_block_empty = true;
} else {
return Some((i + 1, Self::parse_lang_tag(&data[1..=i], 1..i)));
}
}
None
}
fn parse_lang_tag(
lang_tag: &[u8],
position: Range<usize>,
) -> Result<N3Token<'_>, TokenRecognizerError> {
Ok(N3Token::LangTag(
LanguageTag::parse(str::from_utf8(lang_tag).unwrap())
.map_err(|e| (position.clone(), e.to_string()))?
.into_inner(),
))
}
fn recognize_string(
data: &[u8],
delimiter: u8,
) -> Option<(usize, Result<N3Token<'static>, TokenRecognizerError>)> {
// [22] STRING_LITERAL_QUOTE ::= '"' ([^#x22#x5C#xA#xD] | ECHAR | UCHAR)* '"' /* #x22=" #x5C=\ #xA=new line #xD=carriage return */
// [23] STRING_LITERAL_SINGLE_QUOTE ::= "'" ([^#x27#x5C#xA#xD] | ECHAR | UCHAR)* "'" /* #x27=' #x5C=\ #xA=new line #xD=carriage return */
let mut string = String::new();
let mut i = 1;
loop {
let end = memchr2(delimiter, b'\\', &data[i..])?;
match str::from_utf8(&data[i..i + end]) {
Ok(a) => string.push_str(a),
Err(e) => {
return Some((
end,
Err((
i..i + end,
format!("The string contains invalid UTF-8 characters: {e}"),
)
.into()),
))
}
};
i += end;
match data[i] {
c if c == delimiter => {
return Some((i + 1, Ok(N3Token::String(string))));
}
b'\\' => {
let (additional, c) = Self::recognize_escape(&data[i..], i, true)?;
i += additional + 1;
match c {
Ok(c) => {
string.push(c);
}
Err(e) => {
// We read until the end of string char
let end = memchr(delimiter, &data[i..])?;
return Some((i + end + 1, Err(e)));
}
}
}
_ => unreachable!(),
}
}
}
fn recognize_long_string(
data: &[u8],
delimiter: u8,
) -> Option<(usize, Result<N3Token<'static>, TokenRecognizerError>)> {
// [24] STRING_LITERAL_LONG_SINGLE_QUOTE ::= "'''" (("'" | "''")? ([^'\] | ECHAR | UCHAR))* "'''"
// [25] STRING_LITERAL_LONG_QUOTE ::= '"""' (('"' | '""')? ([^"\] | ECHAR | UCHAR))* '"""'
let mut string = String::new();
let mut i = 3;
loop {
let end = memchr2(delimiter, b'\\', &data[i..])?;
match str::from_utf8(&data[i..i + end]) {
Ok(a) => string.push_str(a),
Err(e) => {
return Some((
end,
Err((
i..i + end,
format!("The string contains invalid UTF-8 characters: {e}"),
)
.into()),
))
}
};
i += end;
match data[i] {
c if c == delimiter => {
if *data.get(i + 1)? == delimiter && *data.get(i + 2)? == delimiter {
return Some((i + 3, Ok(N3Token::String(string))));
}
i += 1;
string.push(char::from(delimiter));
}
b'\\' => {
let (additional, c) = Self::recognize_escape(&data[i..], i, true)?;
i += additional + 1;
match c {
Ok(c) => {
string.push(c);
}
Err(e) => return Some((i, Err(e))),
}
}
_ => unreachable!(),
}
}
}
fn recognize_number(data: &[u8]) -> Option<(usize, Result<N3Token<'_>, TokenRecognizerError>)> {
// [19] INTEGER ::= [+-]? [0-9]+
// [20] DECIMAL ::= [+-]? [0-9]* '.' [0-9]+
// [21] DOUBLE ::= [+-]? ([0-9]+ '.' [0-9]* EXPONENT | '.' [0-9]+ EXPONENT | [0-9]+ EXPONENT)
// [154s] EXPONENT ::= [eE] [+-]? [0-9]+
let mut i = 0;
let c = *data.first()?;
if matches!(c, b'+' | b'-') {
i += 1;
}
// We read the digits before .
let mut count_before: usize = 0;
loop {
let c = *data.get(i)?;
if c.is_ascii_digit() {
i += 1;
count_before += 1;
} else {
break;
}
}
// We read the digits after .
#[allow(clippy::if_then_some_else_none)]
let count_after = if *data.get(i)? == b'.' {
i += 1;
let mut count_after = 0;
loop {
let c = *data.get(i)?;
if c.is_ascii_digit() {
i += 1;
count_after += 1;
} else {
break;
}
}
Some(count_after)
} else {
None
};
// End
let c = *data.get(i)?;
if matches!(c, b'e' | b'E') {
i += 1;
let c = *data.get(i)?;
if matches!(c, b'+' | b'-') {
i += 1;
}
let mut found = false;
loop {
let c = *data.get(i)?;
if c.is_ascii_digit() {
i += 1;
found = true;
} else {
break;
}
}
Some((
i,
if !found {
Err((0..i, "A double exponent cannot be empty").into())
} else if count_before == 0 && count_after.unwrap_or(0) == 0 {
Err((0..i, "A double should not be empty").into())
} else {
Ok(N3Token::Double(str::from_utf8(&data[..i]).unwrap()))
},
))
} else if let Some(count_after) = count_after {
if count_after == 0 {
// We do not consume the '.' after all
i -= 1;
Some((
i,
if count_before == 0 {
Err((0..i, "An integer should not be empty").into())
} else {
Ok(N3Token::Integer(str::from_utf8(&data[..i]).unwrap()))
},
))
} else {
Some((i, Ok(N3Token::Decimal(str::from_utf8(&data[..i]).unwrap()))))
}
} else {
Some((
i,
if count_before == 0 {
Err((0..i, "An integer should not be empty").into())
} else {
Ok(N3Token::Integer(str::from_utf8(&data[..i]).unwrap()))
},
))
}
}
fn recognize_escape(
data: &[u8],
position: usize,
with_echar: bool,
) -> Option<(usize, Result<char, TokenRecognizerError>)> {
// [26] UCHAR ::= '\u' HEX HEX HEX HEX | '\U' HEX HEX HEX HEX HEX HEX HEX HEX
// [159s] ECHAR ::= '\' [tbnrf"'\]
match *data.get(1)? {
b'u' => match Self::recognize_hex_char(&data[2..], 4, 'u', position) {
Ok(c) => Some((5, Ok(c?))),
Err(e) => Some((5, Err(e))),
},
b'U' => match Self::recognize_hex_char(&data[2..], 8, 'u', position) {
Ok(c) => Some((9, Ok(c?))),
Err(e) => Some((9, Err(e))),
},
b't' if with_echar => Some((1, Ok('\t'))),
b'b' if with_echar => Some((1, Ok('\x08'))),
b'n' if with_echar => Some((1, Ok('\n'))),
b'r' if with_echar => Some((1, Ok('\r'))),
b'f' if with_echar => Some((1, Ok('\x0C'))),
b'"' if with_echar => Some((1, Ok('"'))),
b'\'' if with_echar => Some((1, Ok('\''))),
b'\\' if with_echar => Some((1, Ok('\\'))),
c => Some((
1,
Err((
position..position + 2,
format!("Unexpected escape character '\\{}'", char::from(c)),
)
.into()),
)), //TODO: read until end of string
}
}
fn recognize_hex_char(
data: &[u8],
len: usize,
escape_char: char,
position: usize,
) -> Result<Option<char>, TokenRecognizerError> {
if data.len() < len {
return Ok(None);
}
let val = str::from_utf8(&data[..len]).map_err(|e| {
(
position..position + len + 2,
format!("The escape sequence contains invalid UTF-8 characters: {e}"),
)
})?;
let codepoint = u32::from_str_radix(val, 16).map_err(|e| {
(
position..position + len + 2,
format!(
"The escape sequence '\\{escape_char}{val}' is not a valid hexadecimal string: {e}"
),
)
})?;
let c = char::from_u32(codepoint).ok_or_else(|| {
(
position..position + len +2,
format!(
"The escape sequence '\\{escape_char}{val}' is encoding {codepoint:X} that is not a valid unicode character",
),
)
})?;
Ok(Some(c))
}
fn recognize_unicode_char(
data: &[u8],
position: usize,
) -> Option<Result<(char, usize), TokenRecognizerError>> {
let mut code_point: u32;
let bytes_needed: usize;
let mut lower_boundary = 0x80;
let mut upper_boundary = 0xBF;
let byte = *data.first()?;
match byte {
0x00..=0x7F => return Some(Ok((char::from(byte), 1))),
0xC2..=0xDF => {
bytes_needed = 1;
code_point = u32::from(byte) & 0x1F;
}
0xE0..=0xEF => {
if byte == 0xE0 {
lower_boundary = 0xA0;
}
if byte == 0xED {
upper_boundary = 0x9F;
}
bytes_needed = 2;
code_point = u32::from(byte) & 0xF;
}
0xF0..=0xF4 => {
if byte == 0xF0 {
lower_boundary = 0x90;
}
if byte == 0xF4 {
upper_boundary = 0x8F;
}
bytes_needed = 3;
code_point = u32::from(byte) & 0x7;
}
_ => {
return Some(Err((
position..=position,
"Invalid UTF-8 character encoding",
)
.into()))
}
}
for i in 1..=bytes_needed {
let byte = *data.get(i)?;
if byte < lower_boundary || upper_boundary < byte {
return Some(Err((
position..=position + i,
"Invalid UTF-8 character encoding",
)
.into()));
}
lower_boundary = 0x80;
upper_boundary = 0xBF;
code_point = (code_point << 6) | (u32::from(byte) & 0x3F);
}
Some(
char::from_u32(code_point)
.map(|c| (c, bytes_needed + 1))
.ok_or_else(|| {
(
position..=position + bytes_needed,
format!("The codepoint {code_point:X} is not a valid unicode character"),
)
.into()
}),
)
}
// [157s] PN_CHARS_BASE ::= [A-Z] | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
fn is_possible_pn_chars_base(c: char) -> bool {
matches!(c,
'A'..='Z'
| 'a'..='z'
| '\u{00C0}'..='\u{00D6}'
| '\u{00D8}'..='\u{00F6}'
| '\u{00F8}'..='\u{02FF}'
| '\u{0370}'..='\u{037D}'
| '\u{037F}'..='\u{1FFF}'
| '\u{200C}'..='\u{200D}'
| '\u{2070}'..='\u{218F}'
| '\u{2C00}'..='\u{2FEF}'
| '\u{3001}'..='\u{D7FF}'
| '\u{F900}'..='\u{FDCF}'
| '\u{FDF0}'..='\u{FFFD}'
| '\u{10000}'..='\u{EFFFF}')
}
// [158s] PN_CHARS_U ::= PN_CHARS_BASE | '_' | ':'
fn is_possible_pn_chars_u(c: char) -> bool {
Self::is_possible_pn_chars_base(c) || c == '_'
}
// [160s] PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]
fn is_possible_pn_chars(c: char) -> bool {
Self::is_possible_pn_chars_u(c)
|| matches!(c,
'-' | '0'..='9' | '\u{00B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}')
}
fn is_possible_pn_chars_base_but_not_valid_iri(c: char) -> bool {
matches!(c, '\u{FFF0}'..='\u{FFFD}')
|| u32::from(c) % u32::from('\u{FFFE}') == 0
|| u32::from(c) % u32::from('\u{FFFF}') == 0
}
}
pub fn resolve_local_name(
prefix: &str,
local: &str,
might_be_invalid_iri: bool,
prefixes: &HashMap<String, Iri<String>>,
) -> Result<NamedNode, String> {
if let Some(start) = prefixes.get(prefix) {
let iri = format!("{start}{local}");
if might_be_invalid_iri || start.path().is_empty() {
// We validate again. We always validate if the local part might be the IRI authority.
if let Err(e) = Iri::parse(iri.as_str()) {
return Err(format!(
"The prefixed name {prefix}:{local} builds IRI {iri} that is invalid: {e}"
));
}
}
Ok(NamedNode::new_unchecked(iri))
} else {
Err(format!("The prefix {prefix}: has not been declared"))
}
}

@ -0,0 +1,19 @@
mod lexer;
mod line_formats;
pub mod n3;
pub mod nquads;
pub mod ntriples;
mod terse;
mod toolkit;
pub mod trig;
pub mod turtle;
pub use crate::n3::N3Parser;
pub use crate::nquads::{NQuadsParser, NQuadsSerializer};
pub use crate::ntriples::{NTriplesParser, NTriplesSerializer};
pub use crate::toolkit::{ParseError, ParseOrIoError};
pub use crate::trig::{TriGParser, TriGSerializer};
pub use crate::turtle::{TurtleParser, TurtleSerializer};
pub(crate) const MIN_BUFFER_SIZE: usize = 4096;
pub(crate) const MAX_BUFFER_SIZE: usize = 4096 * 4096;

@ -0,0 +1,305 @@
//! Shared parser implementation for N-Triples and N-Quads.
use crate::lexer::{N3Lexer, N3LexerMode, N3LexerOptions, N3Token};
use crate::toolkit::{Lexer, Parser, RuleRecognizer, RuleRecognizerError};
use crate::{MAX_BUFFER_SIZE, MIN_BUFFER_SIZE};
#[cfg(feature = "rdf-star")]
use oxrdf::Triple;
use oxrdf::{BlankNode, GraphName, Literal, NamedNode, Quad, Subject, Term};
pub struct NQuadsRecognizer {
stack: Vec<NQuadsState>,
with_graph_name: bool,
#[cfg(feature = "rdf-star")]
with_quoted_triples: bool,
lexer_options: N3LexerOptions,
subjects: Vec<Subject>,
predicates: Vec<NamedNode>,
objects: Vec<Term>,
}
enum NQuadsState {
ExpectSubject,
ExpectPredicate,
ExpectedObject,
ExpectPossibleGraphOrEndOfQuotedTriple,
ExpectDot,
ExpectLiteralAnnotationOrGraphNameOrDot {
value: String,
},
ExpectLiteralDatatype {
value: String,
},
#[cfg(feature = "rdf-star")]
AfterQuotedSubject,
#[cfg(feature = "rdf-star")]
AfterQuotedObject,
}
impl RuleRecognizer for NQuadsRecognizer {
type TokenRecognizer = N3Lexer;
type Output = Quad;
fn error_recovery_state(mut self) -> Self {
self.stack.clear();
self.subjects.clear();
self.predicates.clear();
self.objects.clear();
self
}
fn recognize_next(
mut self,
token: N3Token,
results: &mut Vec<Quad>,
errors: &mut Vec<RuleRecognizerError>,
) -> Self {
if let Some(state) = self.stack.pop() {
match state {
NQuadsState::ExpectSubject => match token {
N3Token::IriRef(s) => {
self.subjects
.push(NamedNode::new_unchecked(s.into_inner()).into());
self.stack.push(NQuadsState::ExpectPredicate);
self
}
N3Token::BlankNodeLabel(s) => {
self.subjects.push(BlankNode::new_unchecked(s).into());
self.stack.push(NQuadsState::ExpectPredicate);
self
}
#[cfg(feature = "rdf-star")]
N3Token::Punctuation("<<") if self.with_quoted_triples => {
self.stack.push(NQuadsState::AfterQuotedSubject);
self.stack.push(NQuadsState::ExpectSubject);
self
}
token => self.error(
errors,
format!("The subject of a triple should be an IRI or a blank node, {token:?} found"),
),
},
NQuadsState::ExpectPredicate => match token {
N3Token::IriRef(p) => {
self.predicates
.push(NamedNode::new_unchecked(p.into_inner()));
self.stack.push(NQuadsState::ExpectedObject);
self
}
token => self.error(
errors,
format!("The predicate of a triple should be an IRI, {token:?} found"),
),
},
NQuadsState::ExpectedObject => match token {
N3Token::IriRef(o) => {
self.objects
.push(NamedNode::new_unchecked(o.into_inner()).into());
self.stack
.push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple);
self
}
N3Token::BlankNodeLabel(o) => {
self.objects.push(BlankNode::new_unchecked(o).into());
self.stack
.push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple);
self
}
N3Token::String(value) => {
self.stack
.push(NQuadsState::ExpectLiteralAnnotationOrGraphNameOrDot { value });
self
}
#[cfg(feature = "rdf-star")]
N3Token::Punctuation("<<") if self.with_quoted_triples => {
self.stack.push(NQuadsState::AfterQuotedObject);
self.stack.push(NQuadsState::ExpectSubject);
self
}
token => self.error(
errors,
format!("The object of a triple should be an IRI, a blank node or a literal, {token:?} found"),
),
},
NQuadsState::ExpectLiteralAnnotationOrGraphNameOrDot { value } => match token {
N3Token::LangTag(lang_tag) => {
self.objects.push(
Literal::new_language_tagged_literal_unchecked(
value,
lang_tag.to_ascii_lowercase(),
)
.into(),
);
self.stack
.push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple);
self
}
N3Token::Punctuation("^^") => {
self.stack
.push(NQuadsState::ExpectLiteralDatatype { value });
self
}
token => {
self.objects.push(Literal::new_simple_literal(value).into());
self.stack
.push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple);
self.recognize_next(token, results, errors)
}
},
NQuadsState::ExpectLiteralDatatype { value } => match token {
N3Token::IriRef(d) => {
self.objects.push(
Literal::new_typed_literal(
value,
NamedNode::new_unchecked(d.into_inner()),
)
.into(),
);
self.stack
.push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple);
self
}
token => self.error(errors, format!("A literal datatype must be an IRI, found {token:?}")),
},
NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple => {
if self.stack.is_empty() {
match token {
N3Token::IriRef(g) if self.with_graph_name => {
self.emit_quad(
results,
NamedNode::new_unchecked(g.into_inner()).into(),
);
self.stack.push(NQuadsState::ExpectDot);
self
}
N3Token::BlankNodeLabel(g) if self.with_graph_name => {
self.emit_quad(results, BlankNode::new_unchecked(g).into());
self.stack.push(NQuadsState::ExpectDot);
self
}
token => {
self.emit_quad(results, GraphName::DefaultGraph);
self.stack.push(NQuadsState::ExpectDot);
self.recognize_next(token, results, errors)
}
}
} else if token == N3Token::Punctuation(">>") {
self
} else {
self.error(errors, "Expecting the end of a quoted triple '>>'")
}
}
NQuadsState::ExpectDot => match token {
N3Token::Punctuation(".") => {
self.stack.push(NQuadsState::ExpectSubject);
self
}
token => {
errors.push("Quads should be followed by a dot".into());
self.stack.push(NQuadsState::ExpectSubject);
self.recognize_next(token, results, errors)
}
},
#[cfg(feature = "rdf-star")]
NQuadsState::AfterQuotedSubject => {
let triple = Triple {
subject: self.subjects.pop().unwrap(),
predicate: self.predicates.pop().unwrap(),
object: self.objects.pop().unwrap(),
};
self.subjects.push(triple.into());
self.stack.push(NQuadsState::ExpectPredicate);
self.recognize_next(token, results, errors)
}
#[cfg(feature = "rdf-star")]
NQuadsState::AfterQuotedObject => {
let triple = Triple {
subject: self.subjects.pop().unwrap(),
predicate: self.predicates.pop().unwrap(),
object: self.objects.pop().unwrap(),
};
self.objects.push(triple.into());
self.stack
.push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple);
self.recognize_next(token, results, errors)
}
}
} else if token == N3Token::Punctuation(".") {
self.stack.push(NQuadsState::ExpectSubject);
self
} else {
self
}
}
fn recognize_end(mut self, results: &mut Vec<Quad>, errors: &mut Vec<RuleRecognizerError>) {
match &*self.stack {
[NQuadsState::ExpectSubject] | [] => (),
[NQuadsState::ExpectDot] => errors.push("Triples should be followed by a dot".into()),
[NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple] => {
self.emit_quad(results, GraphName::DefaultGraph);
errors.push("Triples should be followed by a dot".into())
}
[NQuadsState::ExpectLiteralAnnotationOrGraphNameOrDot { ref value }] => {
self.objects.push(Literal::new_simple_literal(value).into());
self.emit_quad(results, GraphName::DefaultGraph);
errors.push("Triples should be followed by a dot".into())
}
_ => errors.push("Unexpected end".into()), //TODO
}
}
fn lexer_options(&self) -> &N3LexerOptions {
&self.lexer_options
}
}
impl NQuadsRecognizer {
pub fn new_parser(
with_graph_name: bool,
#[cfg(feature = "rdf-star")] with_quoted_triples: bool,
) -> Parser<Self> {
Parser::new(
Lexer::new(
N3Lexer::new(N3LexerMode::NTriples),
MIN_BUFFER_SIZE,
MAX_BUFFER_SIZE,
true,
Some(b"#"),
),
NQuadsRecognizer {
stack: vec![NQuadsState::ExpectSubject],
with_graph_name,
#[cfg(feature = "rdf-star")]
with_quoted_triples,
lexer_options: N3LexerOptions::default(),
subjects: Vec::new(),
predicates: Vec::new(),
objects: Vec::new(),
},
)
}
#[must_use]
fn error(
mut self,
errors: &mut Vec<RuleRecognizerError>,
msg: impl Into<RuleRecognizerError>,
) -> Self {
errors.push(msg.into());
self.stack.clear();
self.subjects.clear();
self.predicates.clear();
self.objects.clear();
self
}
fn emit_quad(&mut self, results: &mut Vec<Quad>, graph_name: GraphName) {
results.push(Quad {
subject: self.subjects.pop().unwrap(),
predicate: self.predicates.pop().unwrap(),
object: self.objects.pop().unwrap(),
graph_name,
})
}
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,393 @@
//! A [N-Quads](https://www.w3.org/TR/n-quads/) streaming parser implemented by [`NQuadsParser`].
use crate::line_formats::NQuadsRecognizer;
use crate::toolkit::{FromReadIterator, ParseError, ParseOrIoError, Parser};
use oxrdf::{Quad, QuadRef};
use std::io::{self, Read, Write};
/// A [N-Quads](https://www.w3.org/TR/n-quads/) streaming parser.
///
/// Support for [N-Quads-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-quads-star) is available behind the `rdf-star` feature and the [`NQuadsParser::with_quoted_triples`] option.
///
/// Count the number of people:
/// ```
/// use oxrdf::NamedNodeRef;
/// use oxttl::{NQuadsParser, ParseError};
///
/// let file = b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
/// <http://example.com/foo> <http://schema.org/name> \"Foo\" .
/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
/// <http://example.com/bar> <http://schema.org/name> \"Bar\" .";
///
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// for quad in NQuadsParser::new().parse_from_read(file.as_ref()) {
/// let quad = quad?;
/// if quad.predicate == rdf_type && quad.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[derive(Default)]
pub struct NQuadsParser {
#[cfg(feature = "rdf-star")]
with_quoted_triples: bool,
}
impl NQuadsParser {
/// Builds a new [`NQuadsParser`].
#[inline]
pub fn new() -> Self {
Self::default()
}
/// Enables [N-Quads-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-quads-star).
#[cfg(feature = "rdf-star")]
#[inline]
#[must_use]
pub fn with_quoted_triples(mut self) -> Self {
self.with_quoted_triples = true;
self
}
/// Parses a N-Quads file from a [`Read`] implementation.
///
/// Count the number of people:
/// ```
/// use oxrdf::NamedNodeRef;
/// use oxttl::{NQuadsParser, ParseError};
///
/// let file = b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
/// <http://example.com/foo> <http://schema.org/name> \"Foo\" .
/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
/// <http://example.com/bar> <http://schema.org/name> \"Bar\" .";
///
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// for quad in NQuadsParser::new().parse_from_read(file.as_ref()) {
/// let quad = quad?;
/// if quad.predicate == rdf_type && quad.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn parse_from_read<R: Read>(&self, read: R) -> FromReadNQuadsReader<R> {
FromReadNQuadsReader {
inner: self.parse().parser.parse_from_read(read),
}
}
/// Allows to parse a N-Quads file by using a low-level API.
///
/// Count the number of people:
/// ```
/// use oxrdf::NamedNodeRef;
/// use oxttl::{NQuadsParser, ParseError};
///
/// let file: [&[u8]; 4] = [
/// b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// b"<http://example.com/foo> <http://schema.org/name> \"Foo\" .\n",
/// b"<http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// b"<http://example.com/bar> <http://schema.org/name> \"Bar\" .\n"
/// ];
///
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// let mut parser = NQuadsParser::new().parse();
/// let mut file_chunks = file.iter();
/// while !parser.is_end() {
/// // We feed more data to the parser
/// if let Some(chunk) = file_chunks.next() {
/// parser.extend_from_slice(chunk);
/// } else {
/// parser.end(); // It's finished
/// }
/// // We read as many quads from the parser as possible
/// while let Some(quad) = parser.read_next() {
/// let quad = quad?;
/// if quad.predicate == rdf_type && quad.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[allow(clippy::unused_self)]
pub fn parse(&self) -> LowLevelNQuadsReader {
LowLevelNQuadsReader {
parser: NQuadsRecognizer::new_parser(
true,
#[cfg(feature = "rdf-star")]
self.with_quoted_triples,
),
}
}
}
/// Parses a N-Quads file from a [`Read`] implementation. Can be built using [`NQuadsParser::parse_from_read`].
///
/// Count the number of people:
/// ```
/// use oxrdf::NamedNodeRef;
/// use oxttl::{NQuadsParser, ParseError};
///
/// let file = b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
/// <http://example.com/foo> <http://schema.org/name> \"Foo\" .
/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
/// <http://example.com/bar> <http://schema.org/name> \"Bar\" .";
///
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// for quad in NQuadsParser::new().parse_from_read(file.as_ref()) {
/// let quad = quad?;
/// if quad.predicate == rdf_type && quad.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct FromReadNQuadsReader<R: Read> {
inner: FromReadIterator<R, NQuadsRecognizer>,
}
impl<R: Read> Iterator for FromReadNQuadsReader<R> {
type Item = Result<Quad, ParseOrIoError>;
fn next(&mut self) -> Option<Result<Quad, ParseOrIoError>> {
self.inner.next()
}
}
/// Parses a N-Quads file by using a low-level API. Can be built using [`NQuadsParser::parse`].
///
/// Count the number of people:
/// ```
/// use oxrdf::NamedNodeRef;
/// use oxttl::{NQuadsParser, ParseError};
///
/// let file: [&[u8]; 4] = [
/// b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// b"<http://example.com/foo> <http://schema.org/name> \"Foo\" .\n",
/// b"<http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// b"<http://example.com/bar> <http://schema.org/name> \"Bar\" .\n"
/// ];
///
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// let mut parser = NQuadsParser::new().parse();
/// let mut file_chunks = file.iter();
/// while !parser.is_end() {
/// // We feed more data to the parser
/// if let Some(chunk) = file_chunks.next() {
/// parser.extend_from_slice(chunk);
/// } else {
/// parser.end(); // It's finished
/// }
/// // We read as many quads from the parser as possible
/// while let Some(quad) = parser.read_next() {
/// let quad = quad?;
/// if quad.predicate == rdf_type && quad.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct LowLevelNQuadsReader {
parser: Parser<NQuadsRecognizer>,
}
impl LowLevelNQuadsReader {
/// Adds some extra bytes to the parser. Should be called when [`read_next`](Self::read_next) returns [`None`] and there is still unread data.
pub fn extend_from_slice(&mut self, other: &[u8]) {
self.parser.extend_from_slice(other)
}
/// Tell the parser that the file is finished.
///
/// This triggers the parsing of the final bytes and might lead [`read_next`](Self::read_next) to return some extra values.
pub fn end(&mut self) {
self.parser.end()
}
/// Returns if the parsing is finished i.e. [`end`](Self::end) has been called and [`read_next`](Self::read_next) is always going to return `None`.
pub fn is_end(&self) -> bool {
self.parser.is_end()
}
/// Attempt to parse a new quad from the already provided data.
///
/// Returns [`None`] if the parsing is finished or more data is required.
/// If it is the case more data should be fed using [`extend_from_slice`](Self::extend_from_slice).
pub fn read_next(&mut self) -> Option<Result<Quad, ParseError>> {
self.parser.read_next()
}
}
/// A [N-Quads](https://www.w3.org/TR/n-quads/) serializer.
///
/// Support for [N-Quads-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-quads-star) is available behind the `rdf-star` feature.
///
/// ```
/// use oxrdf::{NamedNodeRef, QuadRef};
/// use oxttl::NQuadsSerializer;
///
/// let mut buf = Vec::new();
/// let mut writer = NQuadsSerializer::new().serialize_to_write(buf);
/// writer.write_quad(QuadRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// NamedNodeRef::new("http://example.com")?,
/// ))?;
/// assert_eq!(
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://example.com> .\n",
/// writer.finish().as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[derive(Default)]
pub struct NQuadsSerializer;
impl NQuadsSerializer {
/// Builds a new [`NQuadsSerializer`].
#[inline]
pub fn new() -> Self {
Self
}
/// Writes a N-Quads file to a [`Write`] implementation.
///
/// ```
/// use oxrdf::{NamedNodeRef, QuadRef};
/// use oxttl::NQuadsSerializer;
///
/// let mut buf = Vec::new();
/// let mut writer = NQuadsSerializer::new().serialize_to_write(buf);
/// writer.write_quad(QuadRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// NamedNodeRef::new("http://example.com")?,
/// ))?;
/// assert_eq!(
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://example.com> .\n",
/// writer.finish().as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn serialize_to_write<W: Write>(&self, write: W) -> ToWriteNQuadsWriter<W> {
ToWriteNQuadsWriter {
write,
writer: self.serialize(),
}
}
/// Builds a low-level N-Quads writer.
///
/// ```
/// use oxrdf::{NamedNodeRef, QuadRef};
/// use oxttl::NQuadsSerializer;
///
/// let mut buf = Vec::new();
/// let mut writer = NQuadsSerializer::new().serialize();
/// writer.write_quad(QuadRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// NamedNodeRef::new("http://example.com")?,
/// ), &mut buf)?;
/// assert_eq!(
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://example.com> .\n",
/// buf.as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[allow(clippy::unused_self)]
pub fn serialize(&self) -> LowLevelNQuadsWriter {
LowLevelNQuadsWriter
}
}
/// Writes a N-Quads file to a [`Write`] implementation. Can be built using [`NQuadsSerializer::serialize_to_write`].
///
/// ```
/// use oxrdf::{NamedNodeRef, QuadRef};
/// use oxttl::NQuadsSerializer;
///
/// let mut buf = Vec::new();
/// let mut writer = NQuadsSerializer::new().serialize_to_write(buf);
/// writer.write_quad(QuadRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// NamedNodeRef::new("http://example.com")?,
/// ))?;
/// assert_eq!(
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://example.com> .\n",
/// writer.finish().as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct ToWriteNQuadsWriter<W: Write> {
write: W,
writer: LowLevelNQuadsWriter,
}
impl<W: Write> ToWriteNQuadsWriter<W> {
/// Writes an extra quad.
pub fn write_quad<'a>(&mut self, q: impl Into<QuadRef<'a>>) -> io::Result<()> {
self.writer.write_quad(q, &mut self.write)
}
/// Ends the write process and returns the underlying [`Write`].
pub fn finish(self) -> W {
self.write
}
}
/// Writes a N-Quads file by using a low-level API. Can be built using [`NQuadsSerializer::serialize`].
///
/// ```
/// use oxrdf::{NamedNodeRef, QuadRef};
/// use oxttl::NQuadsSerializer;
///
/// let mut buf = Vec::new();
/// let mut writer = NQuadsSerializer::new().serialize();
/// writer.write_quad(QuadRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// NamedNodeRef::new("http://example.com")?,
/// ), &mut buf)?;
/// assert_eq!(
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://example.com> .\n",
/// buf.as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct LowLevelNQuadsWriter;
impl LowLevelNQuadsWriter {
/// Writes an extra quad.
#[allow(clippy::unused_self)]
pub fn write_quad<'a>(
&mut self,
q: impl Into<QuadRef<'a>>,
mut write: impl Write,
) -> io::Result<()> {
writeln!(write, "{} .", q.into())
}
}

@ -0,0 +1,389 @@
//! A [N-Triples](https://www.w3.org/TR/n-triples/) streaming parser implemented by [`NTriplesParser`]
//! and a serializer implemented by [`NTriplesSerializer`].
use crate::line_formats::NQuadsRecognizer;
use crate::toolkit::{FromReadIterator, ParseError, ParseOrIoError, Parser};
use oxrdf::{Triple, TripleRef};
use std::io::{self, Read, Write};
/// A [N-Triples](https://www.w3.org/TR/n-triples/) streaming parser.
///
/// Support for [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star) is available behind the `rdf-star` feature and the [`NTriplesParser::with_quoted_triples`] option.
///
/// Count the number of people:
/// ```
/// use oxrdf::NamedNodeRef;
/// use oxttl::{NTriplesParser, ParseError};
///
/// let file = b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
/// <http://example.com/foo> <http://schema.org/name> \"Foo\" .
/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
/// <http://example.com/bar> <http://schema.org/name> \"Bar\" .";
///
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// for triple in NTriplesParser::new().parse_from_read(file.as_ref()) {
/// let triple = triple?;
/// if triple.predicate == rdf_type && triple.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[derive(Default)]
pub struct NTriplesParser {
#[cfg(feature = "rdf-star")]
with_quoted_triples: bool,
}
impl NTriplesParser {
/// Builds a new [`NTriplesParser`].
#[inline]
pub fn new() -> Self {
Self::default()
}
/// Enables [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star).
#[cfg(feature = "rdf-star")]
#[inline]
#[must_use]
pub fn with_quoted_triples(mut self) -> Self {
self.with_quoted_triples = true;
self
}
/// Parses a N-Triples file from a [`Read`] implementation.
///
/// Count the number of people:
/// ```
/// use oxrdf::NamedNodeRef;
/// use oxttl::{NTriplesParser, ParseError};
///
/// let file = b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
/// <http://example.com/foo> <http://schema.org/name> \"Foo\" .
/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
/// <http://example.com/bar> <http://schema.org/name> \"Bar\" .";
///
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// for triple in NTriplesParser::new().parse_from_read(file.as_ref()) {
/// let triple = triple?;
/// if triple.predicate == rdf_type && triple.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn parse_from_read<R: Read>(&self, read: R) -> FromReadNTriplesReader<R> {
FromReadNTriplesReader {
inner: self.parse().parser.parse_from_read(read),
}
}
/// Allows to parse a N-Triples file by using a low-level API.
///
/// Count the number of people:
/// ```
/// use oxrdf::NamedNodeRef;
/// use oxttl::{NTriplesParser, ParseError};
///
/// let file: [&[u8]; 4] = [
/// b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// b"<http://example.com/foo> <http://schema.org/name> \"Foo\" .\n",
/// b"<http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// b"<http://example.com/bar> <http://schema.org/name> \"Bar\" .\n"
/// ];
///
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// let mut parser = NTriplesParser::new().parse();
/// let mut file_chunks = file.iter();
/// while !parser.is_end() {
/// // We feed more data to the parser
/// if let Some(chunk) = file_chunks.next() {
/// parser.extend_from_slice(chunk);
/// } else {
/// parser.end(); // It's finished
/// }
/// // We read as many triples from the parser as possible
/// while let Some(triple) = parser.read_next() {
/// let triple = triple?;
/// if triple.predicate == rdf_type && triple.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[allow(clippy::unused_self)]
pub fn parse(&self) -> LowLevelNTriplesReader {
LowLevelNTriplesReader {
parser: NQuadsRecognizer::new_parser(
false,
#[cfg(feature = "rdf-star")]
self.with_quoted_triples,
),
}
}
}
/// Parses a N-Triples file from a [`Read`] implementation. Can be built using [`NTriplesParser::parse_from_read`].
///
/// Count the number of people:
/// ```
/// use oxrdf::NamedNodeRef;
/// use oxttl::{NTriplesParser, ParseError};
///
/// let file = b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
/// <http://example.com/foo> <http://schema.org/name> \"Foo\" .
/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
/// <http://example.com/bar> <http://schema.org/name> \"Bar\" .";
///
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// for triple in NTriplesParser::new().parse_from_read(file.as_ref()) {
/// let triple = triple?;
/// if triple.predicate == rdf_type && triple.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct FromReadNTriplesReader<R: Read> {
inner: FromReadIterator<R, NQuadsRecognizer>,
}
impl<R: Read> Iterator for FromReadNTriplesReader<R> {
type Item = Result<Triple, ParseOrIoError>;
fn next(&mut self) -> Option<Result<Triple, ParseOrIoError>> {
Some(self.inner.next()?.map(Into::into))
}
}
/// Parses a N-Triples file by using a low-level API. Can be built using [`NTriplesParser::parse`].
///
/// Count the number of people:
/// ```
/// use oxrdf::NamedNodeRef;
/// use oxttl::{NTriplesParser, ParseError};
///
/// let file: [&[u8]; 4] = [
/// b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// b"<http://example.com/foo> <http://schema.org/name> \"Foo\" .\n",
/// b"<http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// b"<http://example.com/bar> <http://schema.org/name> \"Bar\" .\n"
/// ];
///
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// let mut parser = NTriplesParser::new().parse();
/// let mut file_chunks = file.iter();
/// while !parser.is_end() {
/// // We feed more data to the parser
/// if let Some(chunk) = file_chunks.next() {
/// parser.extend_from_slice(chunk);
/// } else {
/// parser.end(); // It's finished
/// }
/// // We read as many triples from the parser as possible
/// while let Some(triple) = parser.read_next() {
/// let triple = triple?;
/// if triple.predicate == rdf_type && triple.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct LowLevelNTriplesReader {
parser: Parser<NQuadsRecognizer>,
}
impl LowLevelNTriplesReader {
/// Adds some extra bytes to the parser. Should be called when [`read_next`](Self::read_next) returns [`None`] and there is still unread data.
pub fn extend_from_slice(&mut self, other: &[u8]) {
self.parser.extend_from_slice(other)
}
/// Tell the parser that the file is finished.
///
/// This triggers the parsing of the final bytes and might lead [`read_next`](Self::read_next) to return some extra values.
pub fn end(&mut self) {
self.parser.end()
}
/// Returns if the parsing is finished i.e. [`end`](Self::end) has been called and [`read_next`](Self::read_next) is always going to return `None`.
pub fn is_end(&self) -> bool {
self.parser.is_end()
}
/// Attempt to parse a new triple from the already provided data.
///
/// Returns [`None`] if the parsing is finished or more data is required.
/// If it is the case more data should be fed using [`extend_from_slice`](Self::extend_from_slice).
pub fn read_next(&mut self) -> Option<Result<Triple, ParseError>> {
Some(self.parser.read_next()?.map(Into::into))
}
}
/// A [N-Triples](https://www.w3.org/TR/n-triples/) serializer.
///
/// Support for [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star) is available behind the `rdf-star` feature.
///
/// ```
/// use oxrdf::{NamedNodeRef, TripleRef};
/// use oxttl::NTriplesSerializer;
///
/// let mut buf = Vec::new();
/// let mut writer = NTriplesSerializer::new().serialize_to_write(buf);
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// ))?;
/// assert_eq!(
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// writer.finish().as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[derive(Default)]
pub struct NTriplesSerializer;
impl NTriplesSerializer {
/// Builds a new [`NTriplesSerializer`].
#[inline]
pub fn new() -> Self {
Self
}
/// Writes a N-Triples file to a [`Write`] implementation.
///
/// ```
/// use oxrdf::{NamedNodeRef, TripleRef};
/// use oxttl::NTriplesSerializer;
///
/// let mut buf = Vec::new();
/// let mut writer = NTriplesSerializer::new().serialize_to_write(buf);
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// ))?;
/// assert_eq!(
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// writer.finish().as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn serialize_to_write<W: Write>(&self, write: W) -> ToWriteNTriplesWriter<W> {
ToWriteNTriplesWriter {
write,
writer: self.serialize(),
}
}
/// Builds a low-level N-Triples writer.
///
/// ```
/// use oxrdf::{NamedNodeRef, TripleRef};
/// use oxttl::NTriplesSerializer;
///
/// let mut buf = Vec::new();
/// let mut writer = NTriplesSerializer::new().serialize();
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// ), &mut buf)?;
/// assert_eq!(
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// buf.as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[allow(clippy::unused_self)]
pub fn serialize(&self) -> LowLevelNTriplesWriter {
LowLevelNTriplesWriter
}
}
/// Writes a N-Triples file to a [`Write`] implementation. Can be built using [`NTriplesSerializer::serialize_to_write`].
///
/// ```
/// use oxrdf::{NamedNodeRef, TripleRef};
/// use oxttl::NTriplesSerializer;
///
/// let mut buf = Vec::new();
/// let mut writer = NTriplesSerializer::new().serialize_to_write(buf);
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// ))?;
/// assert_eq!(
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// writer.finish().as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct ToWriteNTriplesWriter<W: Write> {
write: W,
writer: LowLevelNTriplesWriter,
}
impl<W: Write> ToWriteNTriplesWriter<W> {
/// Writes an extra triple.
pub fn write_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> {
self.writer.write_triple(t, &mut self.write)
}
/// Ends the write process and returns the underlying [`Write`].
pub fn finish(self) -> W {
self.write
}
}
/// Writes a N-Triples file by using a low-level API. Can be built using [`NTriplesSerializer::serialize`].
///
/// ```
/// use oxrdf::{NamedNodeRef, TripleRef};
/// use oxttl::NTriplesSerializer;
///
/// let mut buf = Vec::new();
/// let mut writer = NTriplesSerializer::new().serialize();
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// ), &mut buf)?;
/// assert_eq!(
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// buf.as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct LowLevelNTriplesWriter;
impl LowLevelNTriplesWriter {
/// Writes an extra triple.
#[allow(clippy::unused_self)]
pub fn write_triple<'a>(
&mut self,
t: impl Into<TripleRef<'a>>,
mut write: impl Write,
) -> io::Result<()> {
writeln!(write, "{} .", t.into())
}
}

@ -0,0 +1,932 @@
//! Shared parser implementation for Turtle and TriG.
use crate::lexer::{resolve_local_name, N3Lexer, N3LexerMode, N3LexerOptions, N3Token};
use crate::toolkit::{Lexer, Parser, RuleRecognizer, RuleRecognizerError};
use crate::{MAX_BUFFER_SIZE, MIN_BUFFER_SIZE};
use oxiri::Iri;
#[cfg(feature = "rdf-star")]
use oxrdf::Triple;
use oxrdf::{
vocab::{rdf, xsd},
BlankNode, GraphName, Literal, NamedNode, NamedOrBlankNode, Quad, Subject, Term,
};
use std::collections::HashMap;
pub struct TriGRecognizer {
stack: Vec<TriGState>,
with_graph_name: bool,
#[cfg(feature = "rdf-star")]
with_quoted_triples: bool,
lexer_options: N3LexerOptions,
prefixes: HashMap<String, Iri<String>>,
cur_subject: Vec<Subject>,
cur_predicate: Vec<NamedNode>,
cur_object: Vec<Term>,
cur_graph: GraphName,
}
impl RuleRecognizer for TriGRecognizer {
type TokenRecognizer = N3Lexer;
type Output = Quad;
fn error_recovery_state(mut self) -> Self {
self.stack.clear();
self.cur_subject.clear();
self.cur_predicate.clear();
self.cur_object.clear();
self.cur_graph = GraphName::DefaultGraph;
self
}
fn recognize_next(
mut self,
token: N3Token,
results: &mut Vec<Quad>,
errors: &mut Vec<RuleRecognizerError>,
) -> Self {
if let Some(rule) = self.stack.pop() {
match rule {
// [1g] trigDoc ::= (directive | block)*
// [2g] block ::= triplesOrGraph | wrappedGraph | triples2 | "GRAPH" labelOrSubject wrappedGraph
// [3] directive ::= prefixID | base | sparqlPrefix | sparqlBase
// [4] prefixID ::= '@prefix' PNAME_NS IRIREF '.'
// [5] base ::= '@base' IRIREF '.'
// [5s] sparqlPrefix ::= "PREFIX" PNAME_NS IRIREF
// [6s] sparqlBase ::= "BASE" IRIREF
TriGState::TriGDoc => {
self.cur_graph = GraphName::DefaultGraph;
self.stack.push(TriGState::TriGDoc);
match token {
N3Token::PlainKeyword(k) if k.eq_ignore_ascii_case("base") => {
self.stack.push(TriGState::BaseExpectIri);
self
}
N3Token::PlainKeyword(k) if k.eq_ignore_ascii_case("prefix") => {
self.stack.push(TriGState::PrefixExpectPrefix);
self
}
N3Token::LangTag("prefix") => {
self.stack.push(TriGState::ExpectDot);
self.stack.push(TriGState::PrefixExpectPrefix);
self
}
N3Token::LangTag("base") => {
self.stack.push(TriGState::ExpectDot);
self.stack.push(TriGState::BaseExpectIri);
self
}
N3Token::PlainKeyword(k) if k.eq_ignore_ascii_case("graph") && self.with_graph_name => {
self.stack.push(TriGState::WrappedGraph);
self.stack.push(TriGState::GraphName);
self
}
token @ N3Token::Punctuation("{") if self.with_graph_name => {
self.stack.push(TriGState::WrappedGraph);
self.recognize_next(token, results, errors)
}
token => {
self.stack.push(TriGState::TriplesOrGraph);
self.recognize_next(token, results, errors)
}
}
},
TriGState::ExpectDot => {
self.cur_subject.pop();
if token == N3Token::Punctuation(".") {
self
} else {
errors.push("A dot is expected at the end of statements".into());
self.recognize_next(token, results, errors)
}
},
TriGState::BaseExpectIri => match token {
N3Token::IriRef(iri) => {
self.lexer_options.base_iri = Some(iri);
self
}
_ => self.error(errors, "The BASE keyword should be followed by an IRI"),
},
TriGState::PrefixExpectPrefix => match token {
N3Token::PrefixedName { prefix, local, .. } if local.is_empty() => {
self.stack.push(TriGState::PrefixExpectIri { name: prefix.to_owned() });
self
}
_ => {
self.error(errors, "The PREFIX keyword should be followed by a prefix like 'ex:'")
}
},
TriGState::PrefixExpectIri { name } => match token {
N3Token::IriRef(iri) => {
self.prefixes.insert(name, iri);
self
}
_ => self.error(errors, "The PREFIX declaration should be followed by a prefix and its value as an IRI"),
},
// [3g] triplesOrGraph ::= labelOrSubject ( wrappedGraph | predicateObjectList '.' ) | quotedTriple predicateObjectList '.'
// [4g] triples2 ::= blankNodePropertyList predicateObjectList? '.' | collection predicateObjectList '.'
TriGState::TriplesOrGraph => match token {
N3Token::IriRef(iri) => {
self.stack.push(TriGState::WrappedGraphOrPredicateObjectList {
term: NamedNode::new_unchecked(iri.into_inner()).into()
});
self
}
N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &self.prefixes) {
Ok(t) => {
self.stack.push(TriGState::WrappedGraphOrPredicateObjectList {
term: t.into()
});
self
},
Err(e) => self.error(errors, e)
}
N3Token::BlankNodeLabel(label) => {
self.stack.push(TriGState::WrappedGraphOrPredicateObjectList {
term: BlankNode::new_unchecked(label).into()
});
self
}
N3Token::Punctuation("[") => {
self.stack.push(TriGState::WrappedGraphBlankNodePropertyListCurrent);
self
}
N3Token::Punctuation("(") => {
self.stack.push(TriGState::ExpectDot);
self.stack.push(TriGState::PredicateObjectList);
self.stack.push(TriGState::SubjectCollectionBeginning);
self
}
#[cfg(feature = "rdf-star")]
N3Token::Punctuation("<<") if self.with_quoted_triples => {
self.stack.push(TriGState::ExpectDot);
self.stack.push(TriGState::PredicateObjectList);
self.stack.push(TriGState::SubjectQuotedTripleEnd);
self.stack.push(TriGState::QuotedObject);
self.stack.push(TriGState::Verb);
self.stack.push(TriGState::QuotedSubject);
self
}
token => {
self.error(errors, format!("The token {token:?} is not a valid subject or graph name"))
}
}
TriGState::WrappedGraphOrPredicateObjectList { term } => {
if token == N3Token::Punctuation("{") && self.with_graph_name {
self.cur_graph = term.into();
self.stack.push(TriGState::WrappedGraph);
} else {
self.cur_subject.push(term.into());
self.stack.push(TriGState::ExpectDot);
self.stack.push(TriGState::PredicateObjectList);
}
self.recognize_next(token, results, errors)
}
TriGState::WrappedGraphBlankNodePropertyListCurrent => if token == N3Token::Punctuation("]") {
self.stack.push(TriGState::WrappedGraphOrPredicateObjectList {
term: BlankNode::default().into()
});
self
} else {
self.cur_subject.push(BlankNode::default().into());
self.stack.push(TriGState::ExpectDot);
self.stack.push(TriGState::SubjectBlankNodePropertyListEnd);
self.stack.push(TriGState::PredicateObjectList);
self.recognize_next(token, results, errors)
}
TriGState::SubjectBlankNodePropertyListEnd => if token == N3Token::Punctuation("]") {
self.stack.push(TriGState::SubjectBlankNodePropertyListAfter );
self
} else {
errors.push("blank node property lists should end with a ']'".into());
self.stack.push(TriGState::SubjectBlankNodePropertyListAfter );
self.recognize_next(token, results, errors)
}
TriGState::SubjectBlankNodePropertyListAfter => if matches!(token, N3Token::Punctuation("." | "}")) {
self.recognize_next(token, results, errors)
} else {
self.stack.push(TriGState::PredicateObjectList);
self.recognize_next(token, results, errors)
}
TriGState::SubjectCollectionBeginning => {
match token {
N3Token::Punctuation(")") => {
self.cur_subject.push(rdf::NIL.into());
self
}
token => {
let root = BlankNode::default();
self.cur_subject.push(root.clone().into());
self.cur_subject.push(root.into());
self.cur_predicate.push(rdf::FIRST.into());
self.stack.push(TriGState::SubjectCollectionPossibleEnd);
self.stack.push(TriGState::Object);
self.recognize_next(token, results, errors)
}
}
},
TriGState::SubjectCollectionPossibleEnd => {
let old = self.cur_subject.pop().unwrap();
self.cur_object.pop();
match token {
N3Token::Punctuation(")") => {
self.cur_predicate.pop();
results.push(Quad::new(
old,
rdf::REST,
rdf::NIL,
self.cur_graph.clone()
));
self
}
token => {
let new = BlankNode::default();
results.push(Quad::new(
old,
rdf::REST,
new.clone(),
self.cur_graph.clone()
));
self.cur_subject.push(new.into());
self.stack.push(TriGState::ObjectCollectionPossibleEnd);
self.stack.push(TriGState::Object);
self.recognize_next(token, results, errors)
}
}
}
// [5g] wrappedGraph ::= '{' triplesBlock? '}'
// [6g] triplesBlock ::= triples ('.' triplesBlock?)?
TriGState::WrappedGraph => if token == N3Token::Punctuation("{") {
self.stack.push(TriGState::WrappedGraphPossibleEnd);
self.stack.push(TriGState::Triples);
self
} else {
self.error(errors, "The GRAPH keyword should be followed by a graph name and a value in '{'")
},
TriGState::WrappedGraphPossibleEnd => {
self.cur_subject.pop();
match token {
N3Token::Punctuation("}") => {
self
}
N3Token::Punctuation(".") => {
self.stack.push(TriGState::WrappedGraphPossibleEnd);
self.stack.push(TriGState::Triples);
self
}
token => {
errors.push("A '}' or a '.' is expected at the end of a graph block".into());
self.recognize_next(token, results, errors)
}
}
}
// [6] triples ::= subject predicateObjectList | blankNodePropertyList predicateObjectList?
// [10] subject ::= iri | BlankNode | collection | quotedTriple
TriGState::Triples => match token {
N3Token::Punctuation("}") => {
self.recognize_next(token, results, errors) // Early end
},
N3Token::Punctuation("[") => {
self.cur_subject.push(BlankNode::default().into());
self.stack.push(TriGState::TriplesBlankNodePropertyListCurrent);
self
}
N3Token::IriRef(iri) => {
self.cur_subject.push(NamedNode::new_unchecked(iri.into_inner()).into());
self.stack.push(TriGState::PredicateObjectList);
self
}
N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &self.prefixes) {
Ok(t) => {
self.cur_subject.push(t.into());
self.stack.push(TriGState::PredicateObjectList);
self
},
Err(e) => self.error(errors, e)
}
N3Token::BlankNodeLabel(label) => {
self.cur_subject.push(BlankNode::new_unchecked(label).into());
self.stack.push(TriGState::PredicateObjectList);
self
}
N3Token::Punctuation("(") => {
self.stack.push(TriGState::PredicateObjectList);
self.stack.push(TriGState::SubjectCollectionBeginning);
self
}
#[cfg(feature = "rdf-star")]
N3Token::Punctuation("<<") if self.with_quoted_triples => {
self.stack.push(TriGState::PredicateObjectList);
self.stack.push(TriGState::SubjectQuotedTripleEnd);
self.stack.push(TriGState::QuotedObject);
self.stack.push(TriGState::Verb);
self.stack.push(TriGState::QuotedSubject);
self
}
token => {
self.error(errors, format!("The token {token:?} is not a valid RDF subject"))
}
},
TriGState::TriplesBlankNodePropertyListCurrent => if token == N3Token::Punctuation("]") {
self.stack.push(TriGState::PredicateObjectList);
self
} else {
self.stack.push(TriGState::SubjectBlankNodePropertyListEnd);
self.stack.push(TriGState::PredicateObjectList);
self.recognize_next(token, results, errors)
}
// [7g] labelOrSubject ::= iri | BlankNode
TriGState::GraphName => match token {
N3Token::IriRef(iri) => {
self.cur_graph = NamedNode::new_unchecked(iri.into_inner()).into();
self
}
N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &self.prefixes) {
Ok(t) => {
self.cur_graph = t.into();
self
},
Err(e) => self.error(errors, e)
}
N3Token::BlankNodeLabel(label) => {
self.cur_graph = BlankNode::new_unchecked(label).into();
self
}
N3Token::Punctuation("[") => {
self.stack.push(TriGState::GraphNameAnonEnd);
self
}
token => {
self.error(errors, format!("The token {token:?} is not a valid graph name"))
}
}
TriGState::GraphNameAnonEnd => if token == N3Token::Punctuation("]") {
self.cur_graph = BlankNode::default().into();
self
} else {
self.error(errors, "Anonymous blank node with a property list are not allowed as graph name")
}
// [7] predicateObjectList ::= verb objectList (';' (verb objectList)?)*
TriGState::PredicateObjectList => {
self.stack.push(TriGState::PredicateObjectListEnd);
self.stack.push(TriGState::ObjectsList);
self.stack.push(TriGState::Verb);
self.recognize_next(token, results, errors)
},
TriGState::PredicateObjectListEnd => {
self.cur_predicate.pop();
if token == N3Token::Punctuation(";") {
self.stack.push(TriGState::PredicateObjectListPossibleContinuation);
self
} else {
self.recognize_next(token, results, errors)
}
},
TriGState::PredicateObjectListPossibleContinuation => if token == N3Token::Punctuation(";") {
self.stack.push(TriGState::PredicateObjectListPossibleContinuation);
self
} else if matches!(token, N3Token::Punctuation("." | "}" | "]")) {
self.recognize_next(token, results, errors)
} else {
self.stack.push(TriGState::PredicateObjectListEnd);
self.stack.push(TriGState::ObjectsList);
self.stack.push(TriGState::Verb);
self.recognize_next(token, results, errors)
},
// [8] objectList ::= object annotation? ( ',' object annotation? )*
// [30t] annotation ::= '{|' predicateObjectList '|}'
TriGState::ObjectsList => {
self.stack.push(TriGState::ObjectsListEnd);
self.stack.push(TriGState::Object);
self.recognize_next(token, results, errors)
}
TriGState::ObjectsListEnd => {
match token {
N3Token::Punctuation(",") => {
self.cur_object.pop();
self.stack.push(TriGState::ObjectsListEnd);
self.stack.push(TriGState::Object);
self
},
#[cfg(feature = "rdf-star")]
N3Token::Punctuation("{|") => {
let triple = Triple::new(
self.cur_subject.last().unwrap().clone(),
self.cur_predicate.last().unwrap().clone(),
self.cur_object.pop().unwrap()
);
self.cur_subject.push(triple.into());
self.stack.push(TriGState::AnnotationEnd);
self.stack.push(TriGState::PredicateObjectList);
self
}
token => {
self.cur_object.pop();
self.recognize_next(token, results, errors)
}
}
},
#[cfg(feature = "rdf-star")]
TriGState::AnnotationEnd => {
self.cur_subject.pop();
self.stack.push(TriGState::ObjectsListAfterAnnotation);
if token == N3Token::Punctuation("|}") {
self
} else {
self.error(errors, "Annotations should end with '|}'")
}
},
#[cfg(feature = "rdf-star")]
TriGState::ObjectsListAfterAnnotation => if token == N3Token::Punctuation(",") {
self.stack.push(TriGState::ObjectsListEnd);
self.stack.push(TriGState::Object);
self
} else {
self.recognize_next(token, results, errors)
},
// [9] verb ::= predicate | 'a'
// [11] predicate ::= iri
TriGState::Verb => match token {
N3Token::PlainKeyword("a") => {
self.cur_predicate.push(rdf::TYPE.into());
self
}
N3Token::IriRef(iri) => {
self.cur_predicate.push(NamedNode::new_unchecked(iri.into_inner()));
self
}
N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &self.prefixes) {
Ok(t) => {
self.cur_predicate.push(t);
self
},
Err(e) => self.error(errors, e)
}
token => {
self.error(errors, format!("The token {token:?} is not a valid predicate"))
}
}
// [12] object ::= iri | BlankNode | collection | blankNodePropertyList | literal | quotedTriple
// [13] literal ::= RDFLiteral | NumericLiteral | BooleanLiteral
// [14] blank ::= BlankNode | collection
// [15] blankNodePropertyList ::= '[' predicateObjectList ']'
// [16] collection ::= '(' object* ')'
// [17] NumericLiteral ::= INTEGER | DECIMAL | DOUBLE
// [128s] RDFLiteral ::= String (LANGTAG | '^^' iri)?
// [133s] BooleanLiteral ::= 'true' | 'false'
// [18] String ::= STRING_LITERAL_QUOTE | STRING_LITERAL_SINGLE_QUOTE | STRING_LITERAL_LONG_SINGLE_QUOTE | STRING_LITERAL_LONG_QUOTE
// [135s] iri ::= IRIREF | PrefixedName
// [136s] PrefixedName ::= PNAME_LN | PNAME_NS
// [137s] BlankNode ::= BLANK_NODE_LABEL | ANON
TriGState::Object => match token {
N3Token::IriRef(iri) => {
self.cur_object.push(NamedNode::new_unchecked(iri.into_inner()).into());
self.emit_quad(results);
self
}
N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &self.prefixes) {
Ok(t) => {
self.cur_object.push(t.into());
self.emit_quad(results);
self
},
Err(e) => self.error(errors, e)
}
N3Token::BlankNodeLabel(label) => {
self.cur_object.push(BlankNode::new_unchecked(label).into());
self.emit_quad(results);
self
}
N3Token::Punctuation("[") => {
self.stack.push(TriGState::ObjectBlankNodePropertyListCurrent);
self
}
N3Token::Punctuation("(") => {
self.stack.push(TriGState::ObjectCollectionBeginning);
self
}
N3Token::String(value) => {
self.stack.push(TriGState::LiteralPossibleSuffix { value, emit: true });
self
}
N3Token::Integer(v) => {
self.cur_object.push(Literal::new_typed_literal(v, xsd::INTEGER).into());
self.emit_quad(results);
self
}
N3Token::Decimal(v) => {
self.cur_object.push(Literal::new_typed_literal(v, xsd::DECIMAL).into());
self.emit_quad(results);
self
}
N3Token::Double(v) => {
self.cur_object.push(Literal::new_typed_literal(v, xsd::DOUBLE).into());
self.emit_quad(results);
self
}
N3Token::PlainKeyword("true") => {
self.cur_object.push(Literal::new_typed_literal("true", xsd::BOOLEAN).into());
self.emit_quad(results);
self
}
N3Token::PlainKeyword("false") => {
self.cur_object.push(Literal::new_typed_literal("false", xsd::BOOLEAN).into());
self.emit_quad(results);
self
}
#[cfg(feature = "rdf-star")]
N3Token::Punctuation("<<") if self.with_quoted_triples => {
self.stack.push(TriGState::ObjectQuotedTripleEnd { emit: true });
self.stack.push(TriGState::QuotedObject);
self.stack.push(TriGState::Verb);
self.stack.push(TriGState::QuotedSubject);
self
}
token => {
self.error(errors, format!("This is not a valid RDF object: {token:?}"))
}
}
TriGState::ObjectBlankNodePropertyListCurrent => if token == N3Token::Punctuation("]") {
self.cur_object.push(BlankNode::default().into());
self.emit_quad(results);
self
} else {
self.cur_subject.push(BlankNode::default().into());
self.stack.push(TriGState::ObjectBlankNodePropertyListEnd);
self.stack.push(TriGState::PredicateObjectList);
self.recognize_next(token, results, errors)
}
TriGState::ObjectBlankNodePropertyListEnd => if token == N3Token::Punctuation("]") {
self.cur_object.push(self.cur_subject.pop().unwrap().into());
self.emit_quad(results);
self
} else {
self.error(errors, "blank node property lists should end with a ']'")
}
TriGState::ObjectCollectionBeginning => match token {
N3Token::Punctuation(")") => {
self.cur_object.push(rdf::NIL.into());
self.emit_quad(results);
self
}
token => {
let root = BlankNode::default();
self.cur_object.push(root.clone().into());
self.emit_quad(results);
self.cur_subject.push(root.into());
self.cur_predicate.push(rdf::FIRST.into());
self.stack.push(TriGState::ObjectCollectionPossibleEnd);
self.stack.push(TriGState::Object);
self.recognize_next(token, results, errors)
}
},
TriGState::ObjectCollectionPossibleEnd => {
let old = self.cur_subject.pop().unwrap();
self.cur_object.pop();
match token {
N3Token::Punctuation(")") => {
self.cur_predicate.pop();
results.push(Quad::new(old,
rdf::REST,
rdf::NIL,
self.cur_graph.clone()
));
self
}
token => {
let new = BlankNode::default();
results.push(Quad::new(old,
rdf::REST,
new.clone(),
self.cur_graph.clone()
));
self.cur_subject.push(new.into());
self.stack.push(TriGState::ObjectCollectionPossibleEnd);
self.stack.push(TriGState::Object);
self.recognize_next(token, results, errors)
}
}
}
TriGState::LiteralPossibleSuffix { value, emit } => {
match token {
N3Token::LangTag(lang) => {
self.cur_object.push(Literal::new_language_tagged_literal_unchecked(value, lang.to_ascii_lowercase()).into());
if emit {
self.emit_quad(results);
}
self
},
N3Token::Punctuation("^^") => {
self.stack.push(TriGState::LiteralExpectDatatype { value, emit });
self
}
token => {
self.cur_object.push(Literal::new_simple_literal(value).into());
if emit {
self.emit_quad(results);
}
self.recognize_next(token, results, errors)
}
}
}
TriGState::LiteralExpectDatatype { value, emit } => {
match token {
N3Token::IriRef(datatype) => {
self.cur_object.push(Literal::new_typed_literal(value, NamedNode::new_unchecked(datatype.into_inner())).into());
if emit {
self.emit_quad(results);
}
self
},
N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &self.prefixes) {
Ok(t) => {
self.cur_object.push(Literal::new_typed_literal(value, t).into());
if emit {
self.emit_quad(results);
}
self
},
Err(e) => self.error(errors, e)
}
token => {
self.error(errors, format!("Expecting a datatype IRI after '^^, found {token:?}")).recognize_next(token, results, errors)
}
}
}
// [27t] quotedTriple ::= '<<' qtSubject verb qtObject '>>'
#[cfg(feature = "rdf-star")]
TriGState::SubjectQuotedTripleEnd => {
let triple = Triple::new(
self.cur_subject.pop().unwrap(),
self.cur_predicate.pop().unwrap(),
self.cur_object.pop().unwrap()
);
self.cur_subject.push(triple.into());
if token == N3Token::Punctuation(">>") {
self
} else {
self.error(errors, format!("Expecting '>>' to close a quoted triple, found {token:?}"))
}
}
#[cfg(feature = "rdf-star")]
TriGState::ObjectQuotedTripleEnd { emit } => {
let triple = Triple::new(
self.cur_subject.pop().unwrap(),
self.cur_predicate.pop().unwrap(),
self.cur_object.pop().unwrap()
);
self.cur_object.push(triple.into());
if emit {
self.emit_quad(results);
}
if token == N3Token::Punctuation(">>") {
self
} else {
self.error(errors, format!("Expecting '>>' to close a quoted triple, found {token:?}"))
}
}
// [28t] qtSubject ::= iri | BlankNode | quotedTriple
#[cfg(feature = "rdf-star")]
TriGState::QuotedSubject => match token {
N3Token::Punctuation("[") => {
self.cur_subject.push(BlankNode::default().into());
self.stack.push(TriGState::QuotedAnonEnd);
self
}
N3Token::IriRef(iri) => {
self.cur_subject.push(NamedNode::new_unchecked(iri.into_inner()).into());
self
}
N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &self.prefixes) {
Ok(t) => {
self.cur_subject.push(t.into());
self
},
Err(e) => self.error(errors, e)
}
N3Token::BlankNodeLabel(label) => {
self.cur_subject.push(BlankNode::new_unchecked(label).into());
self
}
N3Token::Punctuation("<<") => {
self.stack.push(TriGState::SubjectQuotedTripleEnd);
self.stack.push(TriGState::QuotedObject);
self.stack.push(TriGState::Verb);
self.stack.push(TriGState::QuotedSubject);
self
}
token => self.error(errors, format!("This is not a valid RDF quoted triple subject: {token:?}"))
}
// [29t] qtObject ::= iri | BlankNode | literal | quotedTriple
#[cfg(feature = "rdf-star")]
TriGState::QuotedObject => match token {
N3Token::Punctuation("[") => {
self.cur_object.push(BlankNode::default().into());
self.stack.push(TriGState::QuotedAnonEnd);
self
}
N3Token::IriRef(iri) => {
self.cur_object.push(NamedNode::new_unchecked(iri.into_inner()).into());
self
}
N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &self.prefixes) {
Ok(t) => {
self.cur_object.push(t.into());
self
},
Err(e) => self.error(errors, e)
}
N3Token::BlankNodeLabel(label) => {
self.cur_object.push(BlankNode::new_unchecked(label).into());
self
}
N3Token::String(value) => {
self.stack.push(TriGState::LiteralPossibleSuffix { value, emit: false });
self
}
N3Token::Integer(v) => {
self.cur_object.push(Literal::new_typed_literal(v, xsd::INTEGER).into());
self
}
N3Token::Decimal(v) => {
self.cur_object.push(Literal::new_typed_literal(v, xsd::DECIMAL).into());
self
}
N3Token::Double(v) => {
self.cur_object.push(Literal::new_typed_literal(v, xsd::DOUBLE).into());
self
}
N3Token::PlainKeyword("true") => {
self.cur_object.push(Literal::new_typed_literal("true", xsd::BOOLEAN).into());
self
}
N3Token::PlainKeyword("false") => {
self.cur_object.push(Literal::new_typed_literal("false", xsd::BOOLEAN).into());
self
}
N3Token::Punctuation("<<") => {
self.stack.push(TriGState::ObjectQuotedTripleEnd { emit: false });
self.stack.push(TriGState::QuotedObject);
self.stack.push(TriGState::Verb);
self.stack.push(TriGState::QuotedSubject);
self
}
token => self.error(errors, format!("This is not a valid RDF quoted triple object: {token:?}"))
}
#[cfg(feature = "rdf-star")]
TriGState::QuotedAnonEnd => if token == N3Token::Punctuation("]") {
self
} else {
self.error(errors, "Anonymous blank node with a property list are not allowed in quoted triples")
}
}
} else if token == N3Token::Punctuation(".") || token == N3Token::Punctuation("}") {
//TODO: be smarter depending if we are in '{' or not
self.stack.push(TriGState::TriGDoc);
self
} else {
self
}
}
fn recognize_end(
mut self,
results: &mut Vec<Self::Output>,
errors: &mut Vec<RuleRecognizerError>,
) {
match &*self.stack {
[] | [TriGState::TriGDoc] => {
debug_assert!(self.cur_subject.is_empty());
debug_assert!(self.cur_predicate.is_empty());
debug_assert!(self.cur_object.is_empty());
}
[.., TriGState::LiteralPossibleSuffix { value, emit: true }] => {
self.cur_object
.push(Literal::new_simple_literal(value).into());
self.emit_quad(results);
errors.push("Triples should be followed by a dot".into())
}
_ => errors.push("Unexpected end".into()), //TODO
}
}
fn lexer_options(&self) -> &N3LexerOptions {
&self.lexer_options
}
}
impl TriGRecognizer {
pub fn new_parser(
with_graph_name: bool,
#[cfg(feature = "rdf-star")] with_quoted_triples: bool,
base_iri: Option<Iri<String>>,
prefixes: HashMap<String, Iri<String>>,
) -> Parser<Self> {
Parser::new(
Lexer::new(
N3Lexer::new(N3LexerMode::Turtle),
MIN_BUFFER_SIZE,
MAX_BUFFER_SIZE,
true,
Some(b"#"),
),
TriGRecognizer {
stack: vec![TriGState::TriGDoc],
with_graph_name,
#[cfg(feature = "rdf-star")]
with_quoted_triples,
lexer_options: N3LexerOptions { base_iri },
prefixes,
cur_subject: Vec::new(),
cur_predicate: Vec::new(),
cur_object: Vec::new(),
cur_graph: GraphName::DefaultGraph,
},
)
}
#[must_use]
fn error(
mut self,
errors: &mut Vec<RuleRecognizerError>,
msg: impl Into<RuleRecognizerError>,
) -> Self {
errors.push(msg.into());
self.stack.clear();
self.cur_subject.clear();
self.cur_predicate.clear();
self.cur_object.clear();
self.cur_graph = GraphName::DefaultGraph;
self
}
fn emit_quad(&mut self, results: &mut Vec<Quad>) {
results.push(Quad::new(
self.cur_subject.last().unwrap().clone(),
self.cur_predicate.last().unwrap().clone(),
self.cur_object.last().unwrap().clone(),
self.cur_graph.clone(),
));
}
}
#[derive(Debug)]
enum TriGState {
TriGDoc,
ExpectDot,
BaseExpectIri,
PrefixExpectPrefix,
PrefixExpectIri {
name: String,
},
TriplesOrGraph,
WrappedGraphBlankNodePropertyListCurrent,
SubjectBlankNodePropertyListEnd,
SubjectBlankNodePropertyListAfter,
SubjectCollectionBeginning,
SubjectCollectionPossibleEnd,
WrappedGraphOrPredicateObjectList {
term: NamedOrBlankNode,
},
WrappedGraph,
WrappedGraphPossibleEnd,
GraphName,
GraphNameAnonEnd,
Triples,
TriplesBlankNodePropertyListCurrent,
PredicateObjectList,
PredicateObjectListEnd,
PredicateObjectListPossibleContinuation,
ObjectsList,
ObjectsListEnd,
#[cfg(feature = "rdf-star")]
AnnotationEnd,
#[cfg(feature = "rdf-star")]
ObjectsListAfterAnnotation,
Verb,
Object,
ObjectBlankNodePropertyListCurrent,
ObjectBlankNodePropertyListEnd,
ObjectCollectionBeginning,
ObjectCollectionPossibleEnd,
LiteralPossibleSuffix {
value: String,
emit: bool,
},
LiteralExpectDatatype {
value: String,
emit: bool,
},
#[cfg(feature = "rdf-star")]
SubjectQuotedTripleEnd,
#[cfg(feature = "rdf-star")]
ObjectQuotedTripleEnd {
emit: bool,
},
#[cfg(feature = "rdf-star")]
QuotedSubject,
#[cfg(feature = "rdf-star")]
QuotedObject,
#[cfg(feature = "rdf-star")]
QuotedAnonEnd,
}

@ -0,0 +1,280 @@
use memchr::memchr2;
use std::error::Error;
use std::fmt;
use std::io::{self, Read};
use std::ops::{Range, RangeInclusive};
pub trait TokenRecognizer {
type Token<'a>
where
Self: 'a;
type Options: Default;
fn recognize_next_token<'a>(
&mut self,
data: &'a [u8],
is_ending: bool,
config: &Self::Options,
) -> Option<(usize, Result<Self::Token<'a>, TokenRecognizerError>)>;
}
pub struct TokenRecognizerError {
pub position: Range<usize>,
pub message: String,
}
impl<S: Into<String>> From<(Range<usize>, S)> for TokenRecognizerError {
fn from((position, message): (Range<usize>, S)) -> Self {
Self {
position,
message: message.into(),
}
}
}
#[allow(clippy::range_plus_one)]
impl<S: Into<String>> From<(RangeInclusive<usize>, S)> for TokenRecognizerError {
fn from((position, message): (RangeInclusive<usize>, S)) -> Self {
(*position.start()..*position.end() + 1, message).into()
}
}
impl<S: Into<String>> From<(usize, S)> for TokenRecognizerError {
fn from((position, message): (usize, S)) -> Self {
(position..=position, message).into()
}
}
pub struct TokenWithPosition<T> {
pub token: T,
pub position: Range<usize>,
}
pub struct Lexer<R: TokenRecognizer> {
parser: R,
data: Vec<u8>,
start: usize,
end: usize,
is_ending: bool,
position: usize,
min_buffer_size: usize,
max_buffer_size: usize,
is_line_jump_whitespace: bool,
line_comment_start: Option<&'static [u8]>,
}
impl<R: TokenRecognizer> Lexer<R> {
pub fn new(
parser: R,
min_buffer_size: usize,
max_buffer_size: usize,
is_line_jump_whitespace: bool,
line_comment_start: Option<&'static [u8]>,
) -> Self {
Self {
parser,
data: Vec::new(),
start: 0,
end: 0,
is_ending: false,
position: 0,
min_buffer_size,
max_buffer_size,
is_line_jump_whitespace,
line_comment_start,
}
}
pub fn extend_from_slice(&mut self, other: &[u8]) {
self.shrink_if_useful();
self.data.truncate(self.end);
self.data.extend_from_slice(other);
self.end = self.data.len();
}
pub fn end(&mut self) {
self.is_ending = true;
}
pub fn extend_from_read(&mut self, read: &mut impl Read) -> io::Result<()> {
self.shrink_if_useful();
let min_end = self.end + self.min_buffer_size;
if min_end > self.max_buffer_size {
return Err(io::Error::new(
io::ErrorKind::OutOfMemory,
format!(
"The buffer maximal size is {} < {min_end}",
self.max_buffer_size
),
));
}
if self.data.len() < min_end {
self.data.resize(min_end, 0);
}
if self.data.len() < self.data.capacity() {
// We keep extending to have as much space as available without reallocation
self.data.resize(self.data.capacity(), 0);
}
let read = read.read(&mut self.data[self.end..])?;
self.end += read;
self.is_ending = read == 0;
Ok(())
}
pub fn read_next(
&mut self,
options: &R::Options,
) -> Option<Result<TokenWithPosition<R::Token<'_>>, LexerError>> {
self.skip_whitespaces_and_comments()?;
let (consumed, result) = if let Some(r) = self.parser.recognize_next_token(
&self.data[self.start..self.end],
self.is_ending,
options,
) {
r
} else {
return if self.is_ending {
if self.start == self.end {
None // We have finished
} else {
let error = LexerError {
position: self.position..self.position + (self.end - self.start),
message: "Unexpected end of file".into(),
};
self.end = self.start; // We consume everything
Some(Err(error))
}
} else {
None
};
};
debug_assert!(
consumed > 0,
"The lexer must consume at least one byte each time"
);
debug_assert!(
self.start + consumed <= self.end,
"The lexer tried to consumed {consumed} bytes but only {} bytes are readable",
self.end - self.start
);
let old_position = self.position;
self.start += consumed;
self.position += consumed;
Some(match result {
Ok(token) => Ok(TokenWithPosition {
token,
position: old_position..self.position,
}),
Err(e) => Err(LexerError {
position: e.position.start + self.position..e.position.end + self.position,
message: e.message,
}),
})
}
pub fn is_end(&self) -> bool {
self.is_ending && self.end == self.start
}
fn skip_whitespaces_and_comments(&mut self) -> Option<()> {
loop {
self.skip_whitespaces();
let buf = &self.data[self.start..self.end];
if let Some(line_comment_start) = self.line_comment_start {
if buf.starts_with(line_comment_start) {
// Comment
if let Some(end) = memchr2(b'\r', b'\n', &buf[line_comment_start.len()..]) {
self.start += end + line_comment_start.len();
self.position += end + line_comment_start.len();
continue;
}
if self.is_ending {
self.end = self.start; // EOF
return Some(());
}
return None; // We need more data
}
}
return Some(());
}
}
fn skip_whitespaces(&mut self) {
if self.is_line_jump_whitespace {
for (i, c) in self.data[self.start..self.end].iter().enumerate() {
if !matches!(c, b' ' | b'\t' | b'\r' | b'\n') {
self.start += i;
self.position += i;
return;
}
//TODO: SIMD
}
} else {
for (i, c) in self.data[self.start..self.end].iter().enumerate() {
if !matches!(c, b' ' | b'\t') {
self.start += i;
self.position += i;
return;
}
//TODO: SIMD
}
}
// We only have whitespaces
self.position += self.end - self.start;
self.end = self.start;
}
fn shrink_if_useful(&mut self) {
if self.start * 2 > self.data.len() {
// We have read more than half of the buffer, let's move the data to the beginning
self.data.copy_within(self.start..self.end, 0);
self.end -= self.start;
self.start = 0;
}
}
}
#[derive(Debug)]
pub struct LexerError {
position: Range<usize>,
message: String,
}
impl LexerError {
pub fn position(&self) -> Range<usize> {
self.position.clone()
}
pub fn message(&self) -> &str {
&self.message
}
pub fn into_message(self) -> String {
self.message
}
}
impl fmt::Display for LexerError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.position.start + 1 == self.position.end {
write!(
f,
"Lexer error at byte {}: {}",
self.position.start, self.message
)
} else {
write!(
f,
"Lexer error between bytes {} and {}: {}",
self.position.start, self.position.end, self.message
)
}
}
}
impl Error for LexerError {
fn description(&self) -> &str {
self.message()
}
}

@ -0,0 +1,11 @@
//! oxttl parsing toolkit.
//!
//! Provides the basic code to write plain Rust lexers and parsers able to read files chunk by chunk.
mod lexer;
mod parser;
pub use self::lexer::{Lexer, LexerError, TokenRecognizer, TokenRecognizerError};
pub use self::parser::{
FromReadIterator, ParseError, ParseOrIoError, Parser, RuleRecognizer, RuleRecognizerError,
};

@ -0,0 +1,244 @@
use crate::toolkit::lexer::TokenWithPosition;
use crate::toolkit::{Lexer, LexerError, TokenRecognizer};
use std::error::Error;
use std::io::Read;
use std::ops::Range;
use std::{fmt, io};
pub trait RuleRecognizer: Sized {
type TokenRecognizer: TokenRecognizer;
type Output;
fn error_recovery_state(self) -> Self;
fn recognize_next(
self,
token: <Self::TokenRecognizer as TokenRecognizer>::Token<'_>,
results: &mut Vec<Self::Output>,
errors: &mut Vec<RuleRecognizerError>,
) -> Self;
fn recognize_end(self, results: &mut Vec<Self::Output>, errors: &mut Vec<RuleRecognizerError>);
fn lexer_options(&self) -> &<Self::TokenRecognizer as TokenRecognizer>::Options;
}
pub struct RuleRecognizerError {
pub message: String,
}
impl<S: Into<String>> From<S> for RuleRecognizerError {
fn from(message: S) -> Self {
Self {
message: message.into(),
}
}
}
pub struct Parser<RR: RuleRecognizer> {
lexer: Lexer<RR::TokenRecognizer>,
state: Option<RR>,
results: Vec<RR::Output>,
errors: Vec<RuleRecognizerError>,
position: Range<usize>,
default_lexer_options: <RR::TokenRecognizer as TokenRecognizer>::Options,
}
impl<RR: RuleRecognizer> Parser<RR> {
pub fn new(lexer: Lexer<RR::TokenRecognizer>, recognizer: RR) -> Self {
Self {
lexer,
state: Some(recognizer),
results: vec![],
errors: vec![],
position: 0..0,
default_lexer_options: <RR::TokenRecognizer as TokenRecognizer>::Options::default(),
}
}
pub fn extend_from_slice(&mut self, other: &[u8]) {
self.lexer.extend_from_slice(other)
}
pub fn end(&mut self) {
self.lexer.end()
}
pub fn is_end(&self) -> bool {
self.state.is_none() && self.results.is_empty() && self.errors.is_empty()
}
pub fn read_next(&mut self) -> Option<Result<RR::Output, ParseError>> {
loop {
if let Some(error) = self.errors.pop() {
return Some(Err(ParseError {
position: self.position.clone(),
message: error.message,
}));
}
if let Some(result) = self.results.pop() {
return Some(Ok(result));
}
if let Some(result) = self.lexer.read_next(
self.state
.as_ref()
.map_or(&self.default_lexer_options, |p| p.lexer_options()),
) {
match result {
Ok(TokenWithPosition { token, position }) => {
self.position = position;
self.state = self.state.take().map(|state| {
state.recognize_next(token, &mut self.results, &mut self.errors)
});
continue;
}
Err(e) => {
self.state = self.state.take().map(RR::error_recovery_state);
return Some(Err(e.into()));
}
}
}
if self.lexer.is_end() {
if let Some(state) = self.state.take() {
state.recognize_end(&mut self.results, &mut self.errors)
} else {
return None;
}
} else {
return None;
}
}
}
pub fn parse_from_read<R: Read>(self, read: R) -> FromReadIterator<R, RR> {
FromReadIterator { read, parser: self }
}
}
/// An error from parsing.
///
/// It is composed of a message and a byte range in the input.
#[derive(Debug)]
pub struct ParseError {
position: Range<usize>,
message: String,
}
impl ParseError {
/// The invalid byte range in the input.
pub fn position(&self) -> Range<usize> {
self.position.clone()
}
/// The error message.
pub fn message(&self) -> &str {
&self.message
}
/// Converts this error to an error message.
pub fn into_message(self) -> String {
self.message
}
}
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.position.start + 1 == self.position.end {
write!(
f,
"Parser error at byte {}: {}",
self.position.start, self.message
)
} else {
write!(
f,
"Parser error between bytes {} and {}: {}",
self.position.start, self.position.end, self.message
)
}
}
}
impl Error for ParseError {}
impl From<ParseError> for io::Error {
fn from(error: ParseError) -> Self {
io::Error::new(io::ErrorKind::InvalidData, error)
}
}
impl From<LexerError> for ParseError {
fn from(e: LexerError) -> Self {
Self {
position: e.position(),
message: e.into_message(),
}
}
}
/// The union of [`ParseError`] and [`std::io::Error`].
#[derive(Debug)]
pub enum ParseOrIoError {
Parse(ParseError),
Io(io::Error),
}
impl fmt::Display for ParseOrIoError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Parse(e) => e.fmt(f),
Self::Io(e) => e.fmt(f),
}
}
}
impl Error for ParseOrIoError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
Some(match self {
Self::Parse(e) => e,
Self::Io(e) => e,
})
}
}
impl From<ParseError> for ParseOrIoError {
fn from(error: ParseError) -> Self {
Self::Parse(error)
}
}
impl From<io::Error> for ParseOrIoError {
fn from(error: io::Error) -> Self {
Self::Io(error)
}
}
impl From<ParseOrIoError> for io::Error {
fn from(error: ParseOrIoError) -> Self {
match error {
ParseOrIoError::Parse(e) => e.into(),
ParseOrIoError::Io(e) => e,
}
}
}
pub struct FromReadIterator<R: Read, RR: RuleRecognizer> {
read: R,
parser: Parser<RR>,
}
impl<R: Read, RR: RuleRecognizer> Iterator for FromReadIterator<R, RR> {
type Item = Result<RR::Output, ParseOrIoError>;
fn next(&mut self) -> Option<Self::Item> {
while !self.parser.is_end() {
if let Some(result) = self.parser.read_next() {
return Some(result.map_err(ParseOrIoError::Parse));
}
if let Err(e) = self.parser.lexer.extend_from_read(&mut self.read) {
return Some(Err(e.into()));
}
}
None
}
}

@ -0,0 +1,666 @@
//! A [TriG](https://www.w3.org/TR/trig/) streaming parser implemented by [`TriGParser`].
use crate::terse::TriGRecognizer;
use crate::toolkit::{FromReadIterator, ParseError, ParseOrIoError, Parser};
use oxiri::{Iri, IriParseError};
use oxrdf::{vocab::xsd, GraphName, NamedNode, Quad, QuadRef, Subject, TermRef};
use std::collections::HashMap;
use std::fmt;
use std::io::{self, Read, Write};
/// A [TriG](https://www.w3.org/TR/trig/) streaming parser.
///
/// Support for [TriG-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#trig-star) is available behind the `rdf-star` feature and the [`TriGParser::with_quoted_triples`] option.
///
/// Count the number of people:
/// ```
/// use oxrdf::NamedNodeRef;
/// use oxttl::{TriGParser, ParseError};
///
/// let file = b"@base <http://example.com/> .
/// @prefix schema: <http://schema.org/> .
/// <foo> a schema:Person ;
/// schema:name \"Foo\" .
/// <bar> a schema:Person ;
/// schema:name \"Bar\" .";
///
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// for quad in TriGParser::new().parse_from_read(file.as_ref()) {
/// let quad = quad?;
/// if quad.predicate == rdf_type && quad.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[derive(Default)]
pub struct TriGParser {
base: Option<Iri<String>>,
prefixes: HashMap<String, Iri<String>>,
#[cfg(feature = "rdf-star")]
with_quoted_triples: bool,
}
impl TriGParser {
/// Builds a new [`TriGParser`].
#[inline]
pub fn new() -> Self {
Self::default()
}
#[inline]
pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
self.base = Some(Iri::parse(base_iri.into())?);
Ok(self)
}
#[inline]
pub fn with_prefix(
mut self,
prefix_name: impl Into<String>,
prefix_iri: impl Into<String>,
) -> Result<Self, IriParseError> {
self.prefixes
.insert(prefix_name.into(), Iri::parse(prefix_iri.into())?);
Ok(self)
}
/// Enables [TriG-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#trig-star).
#[cfg(feature = "rdf-star")]
#[inline]
#[must_use]
pub fn with_quoted_triples(mut self) -> Self {
self.with_quoted_triples = true;
self
}
/// Parses a TriG file from a [`Read`] implementation.
///
/// Count the number of people:
/// ```
/// use oxrdf::NamedNodeRef;
/// use oxttl::{TriGParser, ParseError};
///
/// let file = b"@base <http://example.com/> .
/// @prefix schema: <http://schema.org/> .
/// <foo> a schema:Person ;
/// schema:name \"Foo\" .
/// <bar> a schema:Person ;
/// schema:name \"Bar\" .";
///
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// for quad in TriGParser::new().parse_from_read(file.as_ref()) {
/// let quad = quad?;
/// if quad.predicate == rdf_type && quad.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn parse_from_read<R: Read>(&self, read: R) -> FromReadTriGReader<R> {
FromReadTriGReader {
inner: self.parse().parser.parse_from_read(read),
}
}
/// Allows to parse a TriG file by using a low-level API.
///
/// Count the number of people:
/// ```
/// use oxrdf::NamedNodeRef;
/// use oxttl::{TriGParser, ParseError};
///
/// let file: [&[u8]; 5] = [b"@base <http://example.com/>",
/// b". @prefix schema: <http://schema.org/> .",
/// b"<foo> a schema:Person",
/// b" ; schema:name \"Foo\" . <bar>",
/// b" a schema:Person ; schema:name \"Bar\" ."
/// ];
///
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// let mut parser = TriGParser::new().parse();
/// let mut file_chunks = file.iter();
/// while !parser.is_end() {
/// // We feed more data to the parser
/// if let Some(chunk) = file_chunks.next() {
/// parser.extend_from_slice(chunk);
/// } else {
/// parser.end(); // It's finished
/// }
/// // We read as many quads from the parser as possible
/// while let Some(quad) = parser.read_next() {
/// let quad = quad?;
/// if quad.predicate == rdf_type && quad.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn parse(&self) -> LowLevelTriGReader {
LowLevelTriGReader {
parser: TriGRecognizer::new_parser(
true,
#[cfg(feature = "rdf-star")]
self.with_quoted_triples,
self.base.clone(),
self.prefixes.clone(),
),
}
}
}
/// Parses a TriG file from a [`Read`] implementation. Can be built using [`TriGParser::parse_from_read`].
///
/// Count the number of people:
/// ```
/// use oxrdf::NamedNodeRef;
/// use oxttl::{TriGParser, ParseError};
///
/// let file = b"@base <http://example.com/> .
/// @prefix schema: <http://schema.org/> .
/// <foo> a schema:Person ;
/// schema:name \"Foo\" .
/// <bar> a schema:Person ;
/// schema:name \"Bar\" .";
///
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// for quad in TriGParser::new().parse_from_read(file.as_ref()) {
/// let quad = quad?;
/// if quad.predicate == rdf_type && quad.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct FromReadTriGReader<R: Read> {
inner: FromReadIterator<R, TriGRecognizer>,
}
impl<R: Read> Iterator for FromReadTriGReader<R> {
type Item = Result<Quad, ParseOrIoError>;
fn next(&mut self) -> Option<Result<Quad, ParseOrIoError>> {
self.inner.next()
}
}
/// Parses a TriG file by using a low-level API. Can be built using [`TriGParser::parse`].
///
/// Count the number of people:
/// ```
/// use oxrdf::NamedNodeRef;
/// use oxttl::{TriGParser, ParseError};
///
/// let file: [&[u8]; 5] = [b"@base <http://example.com/>",
/// b". @prefix schema: <http://schema.org/> .",
/// b"<foo> a schema:Person",
/// b" ; schema:name \"Foo\" . <bar>",
/// b" a schema:Person ; schema:name \"Bar\" ."
/// ];
///
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// let mut parser = TriGParser::new().parse();
/// let mut file_chunks = file.iter();
/// while !parser.is_end() {
/// // We feed more data to the parser
/// if let Some(chunk) = file_chunks.next() {
/// parser.extend_from_slice(chunk);
/// } else {
/// parser.end(); // It's finished
/// }
/// // We read as many quads from the parser as possible
/// while let Some(quad) = parser.read_next() {
/// let quad = quad?;
/// if quad.predicate == rdf_type && quad.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct LowLevelTriGReader {
parser: Parser<TriGRecognizer>,
}
impl LowLevelTriGReader {
/// Adds some extra bytes to the parser. Should be called when [`read_next`](Self::read_next) returns [`None`] and there is still unread data.
pub fn extend_from_slice(&mut self, other: &[u8]) {
self.parser.extend_from_slice(other)
}
/// Tell the parser that the file is finished.
///
/// This triggers the parsing of the final bytes and might lead [`read_next`](Self::read_next) to return some extra values.
pub fn end(&mut self) {
self.parser.end()
}
/// Returns if the parsing is finished i.e. [`end`](Self::end) has been called and [`read_next`](Self::read_next) is always going to return `None`.
pub fn is_end(&self) -> bool {
self.parser.is_end()
}
/// Attempt to parse a new quad from the already provided data.
///
/// Returns [`None`] if the parsing is finished or more data is required.
/// If it is the case more data should be fed using [`extend_from_slice`](Self::extend_from_slice).
pub fn read_next(&mut self) -> Option<Result<Quad, ParseError>> {
self.parser.read_next()
}
}
/// A [TriG](https://www.w3.org/TR/trig/) serializer.
///
/// Support for [TriG-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#trig-star) is available behind the `rdf-star` feature.
///
/// ```
/// use oxrdf::{NamedNodeRef, QuadRef};
/// use oxttl::TriGSerializer;
///
/// let mut buf = Vec::new();
/// let mut writer = TriGSerializer::new().serialize_to_write(buf);
/// writer.write_quad(QuadRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// NamedNodeRef::new("http://example.com")?,
/// ))?;
/// assert_eq!(
/// b"<http://example.com> {\n\t<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n}\n",
/// writer.finish()?.as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[derive(Default)]
pub struct TriGSerializer;
impl TriGSerializer {
/// Builds a new [`TriGSerializer`].
#[inline]
pub fn new() -> Self {
Self
}
/// Writes a TriG file to a [`Write`] implementation.
///
/// ```
/// use oxrdf::{NamedNodeRef, QuadRef};
/// use oxttl::TriGSerializer;
///
/// let mut buf = Vec::new();
/// let mut writer = TriGSerializer::new().serialize_to_write(buf);
/// writer.write_quad(QuadRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// NamedNodeRef::new("http://example.com")?,
/// ))?;
/// assert_eq!(
/// b"<http://example.com> {\n\t<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n}\n",
/// writer.finish()?.as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn serialize_to_write<W: Write>(&self, write: W) -> ToWriteTriGWriter<W> {
ToWriteTriGWriter {
write,
writer: self.serialize(),
}
}
/// Builds a low-level TriG writer.
///
/// ```
/// use oxrdf::{NamedNodeRef, QuadRef};
/// use oxttl::TriGSerializer;
///
/// let mut buf = Vec::new();
/// let mut writer = TriGSerializer::new().serialize();
/// writer.write_quad(QuadRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// NamedNodeRef::new("http://example.com")?,
/// ), &mut buf)?;
/// writer.finish(&mut buf)?;
/// assert_eq!(
/// b"<http://example.com> {\n\t<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n}\n",
/// buf.as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[allow(clippy::unused_self)]
pub fn serialize(&self) -> LowLevelTriGWriter {
LowLevelTriGWriter {
current_graph_name: GraphName::DefaultGraph,
current_subject_predicate: None,
}
}
}
/// Writes a TriG file to a [`Write`] implementation. Can be built using [`TriGSerializer::serialize_to_write`].
///
/// ```
/// use oxrdf::{NamedNodeRef, QuadRef};
/// use oxttl::TriGSerializer;
///
/// let mut buf = Vec::new();
/// let mut writer = TriGSerializer::new().serialize_to_write(buf);
/// writer.write_quad(QuadRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// NamedNodeRef::new("http://example.com")?,
/// ))?;
/// assert_eq!(
/// b"<http://example.com> {\n\t<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n}\n",
/// writer.finish()?.as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct ToWriteTriGWriter<W: Write> {
write: W,
writer: LowLevelTriGWriter,
}
impl<W: Write> ToWriteTriGWriter<W> {
/// Writes an extra quad.
pub fn write_quad<'a>(&mut self, q: impl Into<QuadRef<'a>>) -> io::Result<()> {
self.writer.write_quad(q, &mut self.write)
}
/// Ends the write process and returns the underlying [`Write`].
pub fn finish(mut self) -> io::Result<W> {
self.writer.finish(&mut self.write)?;
Ok(self.write)
}
}
/// Writes a TriG file by using a low-level API. Can be built using [`TriGSerializer::serialize`].
///
/// ```
/// use oxrdf::{NamedNodeRef, QuadRef};
/// use oxttl::TriGSerializer;
///
/// let mut buf = Vec::new();
/// let mut writer = TriGSerializer::new().serialize();
/// writer.write_quad(QuadRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// NamedNodeRef::new("http://example.com")?,
/// ), &mut buf)?;
/// writer.finish(&mut buf)?;
/// assert_eq!(
/// b"<http://example.com> {\n\t<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n}\n",
/// buf.as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct LowLevelTriGWriter {
current_graph_name: GraphName,
current_subject_predicate: Option<(Subject, NamedNode)>,
}
impl LowLevelTriGWriter {
/// Writes an extra quad.
pub fn write_quad<'a>(
&mut self,
q: impl Into<QuadRef<'a>>,
mut write: impl Write,
) -> io::Result<()> {
let q = q.into();
if q.graph_name == self.current_graph_name.as_ref() {
if let Some((current_subject, current_predicate)) =
self.current_subject_predicate.take()
{
if q.subject == current_subject.as_ref() {
if q.predicate == current_predicate {
self.current_subject_predicate = Some((current_subject, current_predicate));
write!(write, " , {}", TurtleTerm(q.object))
} else {
self.current_subject_predicate =
Some((current_subject, q.predicate.into_owned()));
writeln!(write, " ;")?;
if !self.current_graph_name.is_default_graph() {
write!(write, "\t")?;
}
write!(write, "\t{} {}", q.predicate, TurtleTerm(q.object))
}
} else {
self.current_subject_predicate =
Some((q.subject.into_owned(), q.predicate.into_owned()));
writeln!(write, " .")?;
if !self.current_graph_name.is_default_graph() {
write!(write, "\t")?;
}
write!(
write,
"{} {} {}",
TurtleTerm(q.subject.into()),
q.predicate,
TurtleTerm(q.object)
)
}
} else {
self.current_subject_predicate =
Some((q.subject.into_owned(), q.predicate.into_owned()));
if !self.current_graph_name.is_default_graph() {
write!(write, "\t")?;
}
write!(
write,
"{} {} {}",
TurtleTerm(q.subject.into()),
q.predicate,
TurtleTerm(q.object)
)
}
} else {
if self.current_subject_predicate.is_some() {
writeln!(write, " .")?;
}
if !self.current_graph_name.is_default_graph() {
writeln!(write, "}}")?;
}
self.current_graph_name = q.graph_name.into_owned();
self.current_subject_predicate =
Some((q.subject.into_owned(), q.predicate.into_owned()));
if !self.current_graph_name.is_default_graph() {
writeln!(write, "{} {{", q.graph_name)?;
write!(write, "\t")?;
}
write!(
write,
"{} {} {}",
TurtleTerm(q.subject.into()),
q.predicate,
TurtleTerm(q.object)
)
}
}
/// Finishes to write the file.
pub fn finish(&mut self, mut write: impl Write) -> io::Result<()> {
if self.current_subject_predicate.is_some() {
writeln!(write, " .")?;
}
if !self.current_graph_name.is_default_graph() {
writeln!(write, "}}")?;
}
Ok(())
}
}
struct TurtleTerm<'a>(TermRef<'a>);
impl<'a> fmt::Display for TurtleTerm<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.0 {
TermRef::NamedNode(v) => write!(f, "{v}"),
TermRef::BlankNode(v) => write!(f, "{v}"),
TermRef::Literal(v) => {
let value = v.value();
let inline = match v.datatype() {
xsd::BOOLEAN => is_turtle_boolean(value),
xsd::INTEGER => is_turtle_integer(value),
xsd::DECIMAL => is_turtle_decimal(value),
xsd::DOUBLE => is_turtle_double(value),
_ => false,
};
if inline {
write!(f, "{value}")
} else {
write!(f, "{v}")
}
}
#[cfg(feature = "rdf-star")]
TermRef::Triple(t) => {
write!(
f,
"<< {} {} {} >>",
TurtleTerm(t.subject.as_ref().into()),
t.predicate,
TurtleTerm(t.object.as_ref())
)
}
}
}
}
fn is_turtle_boolean(value: &str) -> bool {
matches!(value, "true" | "false")
}
fn is_turtle_integer(value: &str) -> bool {
// [19] INTEGER ::= [+-]? [0-9]+
let mut value = value.as_bytes();
if let Some(v) = value.strip_prefix(b"+") {
value = v;
} else if let Some(v) = value.strip_prefix(b"-") {
value = v;
}
!value.is_empty() && value.iter().all(u8::is_ascii_digit)
}
fn is_turtle_decimal(value: &str) -> bool {
// [20] DECIMAL ::= [+-]? [0-9]* '.' [0-9]+
let mut value = value.as_bytes();
if let Some(v) = value.strip_prefix(b"+") {
value = v;
} else if let Some(v) = value.strip_prefix(b"-") {
value = v;
}
while value.first().map_or(false, u8::is_ascii_digit) {
value = &value[1..];
}
if let Some(v) = value.strip_prefix(b".") {
value = v;
} else {
return false;
}
!value.is_empty() && value.iter().all(u8::is_ascii_digit)
}
fn is_turtle_double(value: &str) -> bool {
// [21] DOUBLE ::= [+-]? ([0-9]+ '.' [0-9]* EXPONENT | '.' [0-9]+ EXPONENT | [0-9]+ EXPONENT)
// [154s] EXPONENT ::= [eE] [+-]? [0-9]+
let mut value = value.as_bytes();
if let Some(v) = value.strip_prefix(b"+") {
value = v;
} else if let Some(v) = value.strip_prefix(b"-") {
value = v;
}
let mut with_before = false;
while value.first().map_or(false, u8::is_ascii_digit) {
value = &value[1..];
with_before = true;
}
let mut with_after = false;
if let Some(v) = value.strip_prefix(b".") {
value = v;
while value.first().map_or(false, u8::is_ascii_digit) {
value = &value[1..];
with_after = true;
}
}
if let Some(v) = value.strip_prefix(b"e") {
value = v;
} else if let Some(v) = value.strip_prefix(b"E") {
value = v;
} else {
return false;
}
if let Some(v) = value.strip_prefix(b"+") {
value = v;
} else if let Some(v) = value.strip_prefix(b"-") {
value = v;
}
(with_before || with_after) && !value.is_empty() && value.iter().all(u8::is_ascii_digit)
}
#[cfg(test)]
mod tests {
use super::*;
use oxrdf::vocab::xsd;
use oxrdf::{BlankNodeRef, GraphNameRef, LiteralRef, NamedNodeRef};
#[test]
fn test_write() -> io::Result<()> {
let mut writer = TriGSerializer::new().serialize_to_write(Vec::new());
writer.write_quad(QuadRef::new(
NamedNodeRef::new_unchecked("http://example.com/s"),
NamedNodeRef::new_unchecked("http://example.com/p"),
NamedNodeRef::new_unchecked("http://example.com/o"),
NamedNodeRef::new_unchecked("http://example.com/g"),
))?;
writer.write_quad(QuadRef::new(
NamedNodeRef::new_unchecked("http://example.com/s"),
NamedNodeRef::new_unchecked("http://example.com/p"),
LiteralRef::new_simple_literal("foo"),
NamedNodeRef::new_unchecked("http://example.com/g"),
))?;
writer.write_quad(QuadRef::new(
NamedNodeRef::new_unchecked("http://example.com/s"),
NamedNodeRef::new_unchecked("http://example.com/p2"),
LiteralRef::new_language_tagged_literal_unchecked("foo", "en"),
NamedNodeRef::new_unchecked("http://example.com/g"),
))?;
writer.write_quad(QuadRef::new(
BlankNodeRef::new_unchecked("b"),
NamedNodeRef::new_unchecked("http://example.com/p2"),
BlankNodeRef::new_unchecked("b2"),
NamedNodeRef::new_unchecked("http://example.com/g"),
))?;
writer.write_quad(QuadRef::new(
BlankNodeRef::new_unchecked("b"),
NamedNodeRef::new_unchecked("http://example.com/p2"),
LiteralRef::new_typed_literal("true", xsd::BOOLEAN),
GraphNameRef::DefaultGraph,
))?;
writer.write_quad(QuadRef::new(
BlankNodeRef::new_unchecked("b"),
NamedNodeRef::new_unchecked("http://example.com/p2"),
LiteralRef::new_typed_literal("false", xsd::BOOLEAN),
NamedNodeRef::new_unchecked("http://example.com/g2"),
))?;
assert_eq!(String::from_utf8(writer.finish()?).unwrap(), "<http://example.com/g> {\n\t<http://example.com/s> <http://example.com/p> <http://example.com/o> , \"foo\" ;\n\t\t<http://example.com/p2> \"foo\"@en .\n\t_:b <http://example.com/p2> _:b2 .\n}\n_:b <http://example.com/p2> true .\n<http://example.com/g2> {\n\t_:b <http://example.com/p2> false .\n}\n");
Ok(())
}
}

@ -0,0 +1,462 @@
//! A [Turtle](https://www.w3.org/TR/turtle/) streaming parser implemented by [`TurtleParser`].
use crate::terse::TriGRecognizer;
use crate::toolkit::{FromReadIterator, ParseError, ParseOrIoError, Parser};
use crate::trig::{LowLevelTriGWriter, ToWriteTriGWriter};
use crate::TriGSerializer;
use oxiri::{Iri, IriParseError};
use oxrdf::{GraphNameRef, Triple, TripleRef};
use std::collections::HashMap;
use std::io::{self, Read, Write};
/// A [Turtle](https://www.w3.org/TR/turtle/) streaming parser.
///
/// Support for [Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star) is available behind the `rdf-star` feature and the [`TurtleParser::with_quoted_triples`] option.
///
/// Count the number of people:
/// ```
/// use oxrdf::NamedNodeRef;
/// use oxttl::{TurtleParser, ParseError};
///
/// let file = b"@base <http://example.com/> .
/// @prefix schema: <http://schema.org/> .
/// <foo> a schema:Person ;
/// schema:name \"Foo\" .
/// <bar> a schema:Person ;
/// schema:name \"Bar\" .";
///
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// for triple in TurtleParser::new().parse_from_read(file.as_ref()) {
/// let triple = triple?;
/// if triple.predicate == rdf_type && triple.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[derive(Default)]
pub struct TurtleParser {
base: Option<Iri<String>>,
prefixes: HashMap<String, Iri<String>>,
#[cfg(feature = "rdf-star")]
with_quoted_triples: bool,
}
impl TurtleParser {
/// Builds a new [`TurtleParser`].
#[inline]
pub fn new() -> Self {
Self::default()
}
#[inline]
pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
self.base = Some(Iri::parse(base_iri.into())?);
Ok(self)
}
#[inline]
pub fn with_prefix(
mut self,
prefix_name: impl Into<String>,
prefix_iri: impl Into<String>,
) -> Result<Self, IriParseError> {
self.prefixes
.insert(prefix_name.into(), Iri::parse(prefix_iri.into())?);
Ok(self)
}
/// Enables [Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star).
#[cfg(feature = "rdf-star")]
#[inline]
#[must_use]
pub fn with_quoted_triples(mut self) -> Self {
self.with_quoted_triples = true;
self
}
/// Parses a Turtle file from a [`Read`] implementation.
///
/// Count the number of people:
/// ```
/// use oxrdf::NamedNodeRef;
/// use oxttl::{TurtleParser, ParseError};
///
/// let file = b"@base <http://example.com/> .
/// @prefix schema: <http://schema.org/> .
/// <foo> a schema:Person ;
/// schema:name \"Foo\" .
/// <bar> a schema:Person ;
/// schema:name \"Bar\" .";
///
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// for triple in TurtleParser::new().parse_from_read(file.as_ref()) {
/// let triple = triple?;
/// if triple.predicate == rdf_type && triple.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn parse_from_read<R: Read>(&self, read: R) -> FromReadTurtleReader<R> {
FromReadTurtleReader {
inner: self.parse().parser.parse_from_read(read),
}
}
/// Allows to parse a Turtle file by using a low-level API.
///
/// Count the number of people:
/// ```
/// use oxrdf::NamedNodeRef;
/// use oxttl::{TurtleParser, ParseError};
///
/// let file: [&[u8]; 5] = [b"@base <http://example.com/>",
/// b". @prefix schema: <http://schema.org/> .",
/// b"<foo> a schema:Person",
/// b" ; schema:name \"Foo\" . <bar>",
/// b" a schema:Person ; schema:name \"Bar\" ."
/// ];
///
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// let mut parser = TurtleParser::new().parse();
/// let mut file_chunks = file.iter();
/// while !parser.is_end() {
/// // We feed more data to the parser
/// if let Some(chunk) = file_chunks.next() {
/// parser.extend_from_slice(chunk);
/// } else {
/// parser.end(); // It's finished
/// }
/// // We read as many triples from the parser as possible
/// while let Some(triple) = parser.read_next() {
/// let triple = triple?;
/// if triple.predicate == rdf_type && triple.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn parse(&self) -> LowLevelTurtleReader {
LowLevelTurtleReader {
parser: TriGRecognizer::new_parser(
false,
#[cfg(feature = "rdf-star")]
self.with_quoted_triples,
self.base.clone(),
self.prefixes.clone(),
),
}
}
}
/// Parses a Turtle file from a [`Read`] implementation. Can be built using [`TurtleParser::parse_from_read`].
///
/// Count the number of people:
/// ```
/// use oxrdf::NamedNodeRef;
/// use oxttl::{TurtleParser, ParseError};
///
/// let file = b"@base <http://example.com/> .
/// @prefix schema: <http://schema.org/> .
/// <foo> a schema:Person ;
/// schema:name \"Foo\" .
/// <bar> a schema:Person ;
/// schema:name \"Bar\" .";
///
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// for triple in TurtleParser::new().parse_from_read(file.as_ref()) {
/// let triple = triple?;
/// if triple.predicate == rdf_type && triple.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct FromReadTurtleReader<R: Read> {
inner: FromReadIterator<R, TriGRecognizer>,
}
impl<R: Read> Iterator for FromReadTurtleReader<R> {
type Item = Result<Triple, ParseOrIoError>;
fn next(&mut self) -> Option<Result<Triple, ParseOrIoError>> {
Some(self.inner.next()?.map(Into::into))
}
}
/// Parses a Turtle file by using a low-level API. Can be built using [`TurtleParser::parse`].
///
/// Count the number of people:
/// ```
/// use oxrdf::NamedNodeRef;
/// use oxttl::{TurtleParser, ParseError};
///
/// let file: [&[u8]; 5] = [b"@base <http://example.com/>",
/// b". @prefix schema: <http://schema.org/> .",
/// b"<foo> a schema:Person",
/// b" ; schema:name \"Foo\" . <bar>",
/// b" a schema:Person ; schema:name \"Bar\" ."
/// ];
///
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// let mut parser = TurtleParser::new().parse();
/// let mut file_chunks = file.iter();
/// while !parser.is_end() {
/// // We feed more data to the parser
/// if let Some(chunk) = file_chunks.next() {
/// parser.extend_from_slice(chunk);
/// } else {
/// parser.end(); // It's finished
/// }
/// // We read as many triples from the parser as possible
/// while let Some(triple) = parser.read_next() {
/// let triple = triple?;
/// if triple.predicate == rdf_type && triple.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct LowLevelTurtleReader {
parser: Parser<TriGRecognizer>,
}
impl LowLevelTurtleReader {
/// Adds some extra bytes to the parser. Should be called when [`read_next`](Self::read_next) returns [`None`] and there is still unread data.
pub fn extend_from_slice(&mut self, other: &[u8]) {
self.parser.extend_from_slice(other)
}
/// Tell the parser that the file is finished.
///
/// This triggers the parsing of the final bytes and might lead [`read_next`](Self::read_next) to return some extra values.
pub fn end(&mut self) {
self.parser.end()
}
/// Returns if the parsing is finished i.e. [`end`](Self::end) has been called and [`read_next`](Self::read_next) is always going to return `None`.
pub fn is_end(&self) -> bool {
self.parser.is_end()
}
/// Attempt to parse a new triple from the already provided data.
///
/// Returns [`None`] if the parsing is finished or more data is required.
/// If it is the case more data should be fed using [`extend_from_slice`](Self::extend_from_slice).
pub fn read_next(&mut self) -> Option<Result<Triple, ParseError>> {
Some(self.parser.read_next()?.map(Into::into))
}
}
/// A [Turtle](https://www.w3.org/TR/turtle/) serializer.
///
/// Support for [Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star) is available behind the `rdf-star` feature.
///
/// ```
/// use oxrdf::{NamedNodeRef, TripleRef};
/// use oxttl::TurtleSerializer;
///
/// let mut buf = Vec::new();
/// let mut writer = TurtleSerializer::new().serialize_to_write(buf);
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// ))?;
/// assert_eq!(
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// writer.finish()?.as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[derive(Default)]
pub struct TurtleSerializer {
inner: TriGSerializer,
}
impl TurtleSerializer {
/// Builds a new [`TurtleSerializer`].
#[inline]
pub fn new() -> Self {
Self::default()
}
/// Writes a Turtle file to a [`Write`] implementation.
///
/// ```
/// use oxrdf::{NamedNodeRef, TripleRef};
/// use oxttl::TurtleSerializer;
///
/// let mut buf = Vec::new();
/// let mut writer = TurtleSerializer::new().serialize_to_write(buf);
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// ))?;
/// assert_eq!(
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// writer.finish()?.as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn serialize_to_write<W: Write>(&self, write: W) -> ToWriteTurtleWriter<W> {
ToWriteTurtleWriter {
inner: self.inner.serialize_to_write(write),
}
}
/// Builds a low-level Turtle writer.
///
/// ```
/// use oxrdf::{NamedNodeRef, TripleRef};
/// use oxttl::TurtleSerializer;
///
/// let mut buf = Vec::new();
/// let mut writer = TurtleSerializer::new().serialize();
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// ), &mut buf)?;
/// writer.finish(&mut buf)?;
/// assert_eq!(
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// buf.as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn serialize(&self) -> LowLevelTurtleWriter {
LowLevelTurtleWriter {
inner: self.inner.serialize(),
}
}
}
/// Writes a Turtle file to a [`Write`] implementation. Can be built using [`TurtleSerializer::serialize_to_write`].
///
/// ```
/// use oxrdf::{NamedNodeRef, TripleRef};
/// use oxttl::TurtleSerializer;
///
/// let mut buf = Vec::new();
/// let mut writer = TurtleSerializer::new().serialize_to_write(buf);
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// ))?;
/// assert_eq!(
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// writer.finish()?.as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct ToWriteTurtleWriter<W: Write> {
inner: ToWriteTriGWriter<W>,
}
impl<W: Write> ToWriteTurtleWriter<W> {
/// Writes an extra triple.
pub fn write_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> {
self.inner
.write_quad(t.into().in_graph(GraphNameRef::DefaultGraph))
}
/// Ends the write process and returns the underlying [`Write`].
pub fn finish(self) -> io::Result<W> {
self.inner.finish()
}
}
/// Writes a Turtle file by using a low-level API. Can be built using [`TurtleSerializer::serialize`].
///
/// ```
/// use oxrdf::{NamedNodeRef, TripleRef};
/// use oxttl::TurtleSerializer;
///
/// let mut buf = Vec::new();
/// let mut writer = TurtleSerializer::new().serialize();
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// ), &mut buf)?;
/// writer.finish(&mut buf)?;
/// assert_eq!(
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// buf.as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct LowLevelTurtleWriter {
inner: LowLevelTriGWriter,
}
impl LowLevelTurtleWriter {
/// Writes an extra triple.
pub fn write_triple<'a>(
&mut self,
t: impl Into<TripleRef<'a>>,
write: impl Write,
) -> io::Result<()> {
self.inner
.write_quad(t.into().in_graph(GraphNameRef::DefaultGraph), write)
}
/// Finishes to write the file.
pub fn finish(&mut self, write: impl Write) -> io::Result<()> {
self.inner.finish(write)
}
}
#[cfg(test)]
mod tests {
use super::*;
use oxrdf::{BlankNodeRef, LiteralRef, NamedNodeRef};
#[test]
fn test_write() -> io::Result<()> {
let mut writer = TurtleSerializer::new().serialize_to_write(Vec::new());
writer.write_triple(TripleRef::new(
NamedNodeRef::new_unchecked("http://example.com/s"),
NamedNodeRef::new_unchecked("http://example.com/p"),
NamedNodeRef::new_unchecked("http://example.com/o"),
))?;
writer.write_triple(TripleRef::new(
NamedNodeRef::new_unchecked("http://example.com/s"),
NamedNodeRef::new_unchecked("http://example.com/p"),
LiteralRef::new_simple_literal("foo"),
))?;
writer.write_triple(TripleRef::new(
NamedNodeRef::new_unchecked("http://example.com/s"),
NamedNodeRef::new_unchecked("http://example.com/p2"),
LiteralRef::new_language_tagged_literal_unchecked("foo", "en"),
))?;
writer.write_triple(TripleRef::new(
BlankNodeRef::new_unchecked("b"),
NamedNodeRef::new_unchecked("http://example.com/p2"),
BlankNodeRef::new_unchecked("b2"),
))?;
assert_eq!(String::from_utf8(writer.finish()?).unwrap(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> , \"foo\" ;\n\t<http://example.com/p2> \"foo\"@en .\n_:b <http://example.com/p2> _:b2 .\n");
Ok(())
}
}

@ -1,5 +1,4 @@
use oxiri::IriParseError;
use rio_turtle::TurtleError;
use rio_xml::RdfXmlError;
use std::error::Error;
use std::{fmt, io};
@ -45,23 +44,26 @@ impl Error for ParseError {
}
}
impl From<TurtleError> for ParseError {
impl From<oxttl::ParseError> for ParseError {
#[inline]
fn from(error: TurtleError) -> Self {
let error = io::Error::from(error);
if error.get_ref().map_or(
false,
<(dyn Error + Send + Sync + 'static)>::is::<TurtleError>,
) {
Self::Syntax(SyntaxError {
inner: SyntaxErrorKind::Turtle(*error.into_inner().unwrap().downcast().unwrap()),
})
} else {
Self::Io(error)
fn from(error: oxttl::ParseError) -> Self {
Self::Syntax(SyntaxError {
inner: SyntaxErrorKind::Turtle(error),
})
}
}
impl From<oxttl::ParseOrIoError> for ParseError {
#[inline]
fn from(error: oxttl::ParseOrIoError) -> Self {
match error {
oxttl::ParseOrIoError::Parse(e) => e.into(),
oxttl::ParseOrIoError::Io(e) => e.into(),
}
}
}
#[allow(clippy::fallible_impl_from)]
impl From<RdfXmlError> for ParseError {
#[inline]
fn from(error: RdfXmlError) -> Self {
@ -111,7 +113,7 @@ pub struct SyntaxError {
#[derive(Debug)]
enum SyntaxErrorKind {
Turtle(TurtleError),
Turtle(oxttl::ParseError),
RdfXml(RdfXmlError),
InvalidBaseIri { iri: String, error: IriParseError },
}

@ -5,9 +5,6 @@ mod format;
pub mod read;
pub mod write;
pub use self::format::DatasetFormat;
pub use self::format::GraphFormat;
pub use self::read::DatasetParser;
pub use self::read::GraphParser;
pub use self::write::DatasetSerializer;
pub use self::write::GraphSerializer;
pub use self::format::{DatasetFormat, GraphFormat};
pub use self::read::{DatasetParser, GraphParser};
pub use self::write::{DatasetSerializer, GraphSerializer};

@ -4,9 +4,12 @@ pub use crate::io::error::{ParseError, SyntaxError};
use crate::io::{DatasetFormat, GraphFormat};
use crate::model::*;
use oxiri::{Iri, IriParseError};
use oxttl::nquads::{FromReadNQuadsReader, NQuadsParser};
use oxttl::ntriples::{FromReadNTriplesReader, NTriplesParser};
use oxttl::trig::{FromReadTriGReader, TriGParser};
use oxttl::turtle::{FromReadTurtleReader, TurtleParser};
use rio_api::model as rio;
use rio_api::parser::{QuadsParser, TriplesParser};
use rio_turtle::{NQuadsParser, NTriplesParser, TriGParser, TurtleParser};
use rio_api::parser::TriplesParser;
use rio_xml::RdfXmlParser;
use std::collections::HashMap;
use std::io::BufRead;
@ -20,20 +23,24 @@ use std::io::BufRead;
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphParser};
/// use std::io::Cursor;
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = GraphParser::from_format(GraphFormat::NTriples);
/// let triples = parser.read_triples(Cursor::new(file))?.collect::<Result<Vec<_>,_>>()?;
/// let triples = parser.read_triples(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
///
///assert_eq!(triples.len(), 1);
///assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
pub struct GraphParser {
format: GraphFormat,
base_iri: Option<Iri<String>>,
inner: GraphParserKind,
}
enum GraphParserKind {
NTriples(NTriplesParser),
Turtle(TurtleParser),
RdfXml { base_iri: Option<Iri<String>> },
}
impl GraphParser {
@ -41,8 +48,15 @@ impl GraphParser {
#[inline]
pub fn from_format(format: GraphFormat) -> Self {
Self {
format,
base_iri: None,
inner: match format {
GraphFormat::NTriples => {
GraphParserKind::NTriples(NTriplesParser::new().with_quoted_triples())
}
GraphFormat::Turtle => {
GraphParserKind::Turtle(TurtleParser::new().with_quoted_triples())
}
GraphFormat::RdfXml => GraphParserKind::RdfXml { base_iri: None },
},
}
}
@ -50,39 +64,44 @@ impl GraphParser {
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphParser};
/// use std::io::Cursor;
///
/// let file = "</s> </p> </o> .";
///
/// let parser = GraphParser::from_format(GraphFormat::Turtle).with_base_iri("http://example.com")?;
/// let triples = parser.read_triples(Cursor::new(file))?.collect::<Result<Vec<_>,_>>()?;
/// let triples = parser.read_triples(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
///
///assert_eq!(triples.len(), 1);
///assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
self.base_iri = Some(Iri::parse(base_iri.into())?);
Ok(self)
pub fn with_base_iri(self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
Ok(Self {
inner: match self.inner {
GraphParserKind::NTriples(p) => GraphParserKind::NTriples(p),
GraphParserKind::Turtle(p) => GraphParserKind::Turtle(p.with_base_iri(base_iri)?),
GraphParserKind::RdfXml { .. } => GraphParserKind::RdfXml {
base_iri: Some(Iri::parse(base_iri.into())?),
},
},
})
}
/// Executes the parsing itself on a [`BufRead`] implementation and returns an iterator of triples.
#[allow(clippy::unnecessary_wraps)]
pub fn read_triples<R: BufRead>(&self, reader: R) -> Result<TripleReader<R>, ParseError> {
Ok(TripleReader {
mapper: RioMapper::default(),
parser: match self.format {
GraphFormat::NTriples => TripleReaderKind::NTriples(NTriplesParser::new(reader)),
GraphFormat::Turtle => {
TripleReaderKind::Turtle(TurtleParser::new(reader, self.base_iri.clone()))
pub fn read_triples<R: BufRead>(&self, reader: R) -> TripleReader<R> {
TripleReader {
mapper: BlankNodeMapper::default(),
parser: match &self.inner {
GraphParserKind::NTriples(p) => {
TripleReaderKind::NTriples(p.parse_from_read(reader))
}
GraphFormat::RdfXml => {
TripleReaderKind::RdfXml(RdfXmlParser::new(reader, self.base_iri.clone()))
GraphParserKind::Turtle(p) => TripleReaderKind::Turtle(p.parse_from_read(reader)),
GraphParserKind::RdfXml { base_iri } => {
TripleReaderKind::RdfXml(RdfXmlParser::new(reader, base_iri.clone()))
}
},
buffer: Vec::new(),
})
}
}
}
@ -91,12 +110,11 @@ impl GraphParser {
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphParser};
/// use std::io::Cursor;
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = GraphParser::from_format(GraphFormat::NTriples);
/// let triples = parser.read_triples(Cursor::new(file))?.collect::<Result<Vec<_>,_>>()?;
/// let triples = parser.read_triples(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
///
///assert_eq!(triples.len(), 1);
///assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
@ -104,15 +122,15 @@ impl GraphParser {
/// ```
#[must_use]
pub struct TripleReader<R: BufRead> {
mapper: RioMapper,
mapper: BlankNodeMapper,
parser: TripleReaderKind<R>,
buffer: Vec<Triple>,
}
#[allow(clippy::large_enum_variant)]
enum TripleReaderKind<R: BufRead> {
NTriples(NTriplesParser<R>),
Turtle(TurtleParser<R>),
NTriples(FromReadNTriplesReader<R>),
Turtle(FromReadTurtleReader<R>),
RdfXml(RdfXmlParser<R>),
}
@ -125,41 +143,28 @@ impl<R: BufRead> Iterator for TripleReader<R> {
return Some(Ok(r));
}
if let Err(error) = match &mut self.parser {
TripleReaderKind::NTriples(parser) => {
Self::read(parser, &mut self.buffer, &mut self.mapper)
}
TripleReaderKind::Turtle(parser) => {
Self::read(parser, &mut self.buffer, &mut self.mapper)
}
return Some(match &mut self.parser {
TripleReaderKind::NTriples(parser) => match parser.next()? {
Ok(triple) => Ok(self.mapper.triple(triple)),
Err(e) => Err(e.into()),
},
TripleReaderKind::Turtle(parser) => match parser.next()? {
Ok(triple) => Ok(self.mapper.triple(triple)),
Err(e) => Err(e.into()),
},
TripleReaderKind::RdfXml(parser) => {
Self::read(parser, &mut self.buffer, &mut self.mapper)
if parser.is_end() {
return None;
} else if let Err(e) = parser.parse_step(&mut |t| {
self.buffer.push(self.mapper.triple(RioMapper::triple(&t)));
Ok(())
}) {
Err(e)
} else {
continue;
}
}
}? {
return Some(Err(error));
}
}
}
}
impl<R: BufRead> TripleReader<R> {
fn read<P: TriplesParser>(
parser: &mut P,
buffer: &mut Vec<Triple>,
mapper: &mut RioMapper,
) -> Option<Result<(), ParseError>>
where
ParseError: From<P::Error>,
{
if parser.is_end() {
None
} else if let Err(e) = parser.parse_step(&mut |t| {
buffer.push(mapper.triple(&t));
Ok(())
}) {
Some(Err(e))
} else {
Some(Ok(()))
});
}
}
}
@ -172,20 +177,23 @@ impl<R: BufRead> TripleReader<R> {
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetParser};
/// use std::io::Cursor;
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .";
///
/// let parser = DatasetParser::from_format(DatasetFormat::NQuads);
/// let quads = parser.read_quads(Cursor::new(file))?.collect::<Result<Vec<_>,_>>()?;
/// let quads = parser.read_quads(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
///
///assert_eq!(quads.len(), 1);
///assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
pub struct DatasetParser {
format: DatasetFormat,
base_iri: Option<Iri<String>>,
inner: DatasetParserKind,
}
enum DatasetParserKind {
NQuads(NQuadsParser),
TriG(TriGParser),
}
impl DatasetParser {
@ -193,8 +201,14 @@ impl DatasetParser {
#[inline]
pub fn from_format(format: DatasetFormat) -> Self {
Self {
format,
base_iri: None,
inner: match format {
DatasetFormat::NQuads => {
DatasetParserKind::NQuads(NQuadsParser::new().with_quoted_triples())
}
DatasetFormat::TriG => {
DatasetParserKind::TriG(TriGParser::new().with_quoted_triples())
}
},
}
}
@ -202,36 +216,35 @@ impl DatasetParser {
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetParser};
/// use std::io::Cursor;
///
/// let file = "<g> { </s> </p> </o> }";
///
/// let parser = DatasetParser::from_format(DatasetFormat::TriG).with_base_iri("http://example.com")?;
/// let triples = parser.read_quads(Cursor::new(file))?.collect::<Result<Vec<_>,_>>()?;
/// let triples = parser.read_quads(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
///
///assert_eq!(triples.len(), 1);
///assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
self.base_iri = Some(Iri::parse(base_iri.into())?);
Ok(self)
pub fn with_base_iri(self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
Ok(Self {
inner: match self.inner {
DatasetParserKind::NQuads(p) => DatasetParserKind::NQuads(p),
DatasetParserKind::TriG(p) => DatasetParserKind::TriG(p.with_base_iri(base_iri)?),
},
})
}
/// Executes the parsing itself on a [`BufRead`] implementation and returns an iterator of quads.
#[allow(clippy::unnecessary_wraps)]
pub fn read_quads<R: BufRead>(&self, reader: R) -> Result<QuadReader<R>, ParseError> {
Ok(QuadReader {
mapper: RioMapper::default(),
parser: match self.format {
DatasetFormat::NQuads => QuadReaderKind::NQuads(NQuadsParser::new(reader)),
DatasetFormat::TriG => {
QuadReaderKind::TriG(TriGParser::new(reader, self.base_iri.clone()))
}
pub fn read_quads<R: BufRead>(&self, reader: R) -> QuadReader<R> {
QuadReader {
mapper: BlankNodeMapper::default(),
parser: match &self.inner {
DatasetParserKind::NQuads(p) => QuadReaderKind::NQuads(p.parse_from_read(reader)),
DatasetParserKind::TriG(p) => QuadReaderKind::TriG(p.parse_from_read(reader)),
},
buffer: Vec::new(),
})
}
}
}
@ -240,12 +253,11 @@ impl DatasetParser {
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetParser};
/// use std::io::Cursor;
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .";
///
/// let parser = DatasetParser::from_format(DatasetFormat::NQuads);
/// let quads = parser.read_quads(Cursor::new(file))?.collect::<Result<Vec<_>,_>>()?;
/// let quads = parser.read_quads(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
///
///assert_eq!(quads.len(), 1);
///assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
@ -253,76 +265,41 @@ impl DatasetParser {
/// ```
#[must_use]
pub struct QuadReader<R: BufRead> {
mapper: RioMapper,
mapper: BlankNodeMapper,
parser: QuadReaderKind<R>,
buffer: Vec<Quad>,
}
enum QuadReaderKind<R: BufRead> {
NQuads(NQuadsParser<R>),
TriG(TriGParser<R>),
NQuads(FromReadNQuadsReader<R>),
TriG(FromReadTriGReader<R>),
}
impl<R: BufRead> Iterator for QuadReader<R> {
type Item = Result<Quad, ParseError>;
fn next(&mut self) -> Option<Result<Quad, ParseError>> {
loop {
if let Some(r) = self.buffer.pop() {
return Some(Ok(r));
}
if let Err(error) = match &mut self.parser {
QuadReaderKind::NQuads(parser) => {
Self::read(parser, &mut self.buffer, &mut self.mapper)
}
QuadReaderKind::TriG(parser) => {
Self::read(parser, &mut self.buffer, &mut self.mapper)
}
}? {
return Some(Err(error));
}
}
Some(match &mut self.parser {
QuadReaderKind::NQuads(parser) => match parser.next()? {
Ok(quad) => Ok(self.mapper.quad(quad)),
Err(e) => Err(e.into()),
},
QuadReaderKind::TriG(parser) => match parser.next()? {
Ok(quad) => Ok(self.mapper.quad(quad)),
Err(e) => Err(e.into()),
},
})
}
}
impl<R: BufRead> QuadReader<R> {
fn read<P: QuadsParser>(
parser: &mut P,
buffer: &mut Vec<Quad>,
mapper: &mut RioMapper,
) -> Option<Result<(), ParseError>>
where
ParseError: From<P::Error>,
{
if parser.is_end() {
None
} else if let Err(e) = parser.parse_step(&mut |t| {
buffer.push(mapper.quad(&t));
Ok(())
}) {
Some(Err(e))
} else {
Some(Ok(()))
}
}
}
#[derive(Default)]
struct RioMapper {
bnode_map: HashMap<String, BlankNode>,
}
struct RioMapper;
impl<'a> RioMapper {
fn named_node(node: rio::NamedNode<'a>) -> NamedNode {
NamedNode::new_unchecked(node.iri)
}
fn blank_node(&mut self, node: rio::BlankNode<'a>) -> BlankNode {
self.bnode_map
.entry(node.id.to_owned())
.or_insert_with(BlankNode::default)
.clone()
fn blank_node(node: rio::BlankNode<'a>) -> BlankNode {
BlankNode::new_unchecked(node.id)
}
fn literal(literal: rio::Literal<'a>) -> Literal {
@ -337,43 +314,82 @@ impl<'a> RioMapper {
}
}
fn subject(&mut self, node: rio::Subject<'a>) -> Subject {
fn subject(node: rio::Subject<'a>) -> Subject {
match node {
rio::Subject::NamedNode(node) => Self::named_node(node).into(),
rio::Subject::BlankNode(node) => self.blank_node(node).into(),
rio::Subject::Triple(triple) => self.triple(triple).into(),
rio::Subject::BlankNode(node) => Self::blank_node(node).into(),
rio::Subject::Triple(triple) => Self::triple(triple).into(),
}
}
fn term(&mut self, node: rio::Term<'a>) -> Term {
fn term(node: rio::Term<'a>) -> Term {
match node {
rio::Term::NamedNode(node) => Self::named_node(node).into(),
rio::Term::BlankNode(node) => self.blank_node(node).into(),
rio::Term::BlankNode(node) => Self::blank_node(node).into(),
rio::Term::Literal(literal) => Self::literal(literal).into(),
rio::Term::Triple(triple) => self.triple(triple).into(),
rio::Term::Triple(triple) => Self::triple(triple).into(),
}
}
fn triple(&mut self, triple: &rio::Triple<'a>) -> Triple {
fn triple(triple: &rio::Triple<'a>) -> Triple {
Triple {
subject: self.subject(triple.subject),
subject: Self::subject(triple.subject),
predicate: Self::named_node(triple.predicate),
object: Self::term(triple.object),
}
}
}
#[derive(Default)]
struct BlankNodeMapper {
bnode_map: HashMap<BlankNode, BlankNode>,
}
impl BlankNodeMapper {
fn blank_node(&mut self, node: BlankNode) -> BlankNode {
self.bnode_map
.entry(node)
.or_insert_with(BlankNode::default)
.clone()
}
fn subject(&mut self, node: Subject) -> Subject {
match node {
Subject::NamedNode(node) => node.into(),
Subject::BlankNode(node) => self.blank_node(node).into(),
Subject::Triple(triple) => self.triple(*triple).into(),
}
}
fn term(&mut self, node: Term) -> Term {
match node {
Term::NamedNode(node) => node.into(),
Term::BlankNode(node) => self.blank_node(node).into(),
Term::Literal(literal) => literal.into(),
Term::Triple(triple) => self.triple(*triple).into(),
}
}
fn triple(&mut self, triple: Triple) -> Triple {
Triple {
subject: self.subject(triple.subject),
predicate: triple.predicate,
object: self.term(triple.object),
}
}
fn graph_name(&mut self, graph_name: Option<rio::GraphName<'a>>) -> GraphName {
fn graph_name(&mut self, graph_name: GraphName) -> GraphName {
match graph_name {
Some(rio::GraphName::NamedNode(node)) => Self::named_node(node).into(),
Some(rio::GraphName::BlankNode(node)) => self.blank_node(node).into(),
None => GraphName::DefaultGraph,
GraphName::NamedNode(node) => node.into(),
GraphName::BlankNode(node) => self.blank_node(node).into(),
GraphName::DefaultGraph => GraphName::DefaultGraph,
}
}
fn quad(&mut self, quad: &rio::Quad<'a>) -> Quad {
fn quad(&mut self, quad: Quad) -> Quad {
Quad {
subject: self.subject(quad.subject),
predicate: Self::named_node(quad.predicate),
predicate: quad.predicate,
object: self.term(quad.object),
graph_name: self.graph_name(quad.graph_name),
}

@ -2,6 +2,10 @@
use crate::io::{DatasetFormat, GraphFormat};
use crate::model::*;
use oxttl::nquads::{NQuadsSerializer, ToWriteNQuadsWriter};
use oxttl::ntriples::{NTriplesSerializer, ToWriteNTriplesWriter};
use oxttl::trig::{ToWriteTriGWriter, TriGSerializer};
use oxttl::turtle::{ToWriteTurtleWriter, TurtleSerializer};
use rio_api::formatter::TriplesFormatter;
use rio_api::model as rio;
use rio_xml::RdfXmlFormatter;
@ -45,7 +49,12 @@ impl GraphSerializer {
pub fn triple_writer<W: Write>(&self, writer: W) -> io::Result<TripleWriter<W>> {
Ok(TripleWriter {
formatter: match self.format {
GraphFormat::NTriples | GraphFormat::Turtle => TripleWriterKind::NTriples(writer),
GraphFormat::NTriples => {
TripleWriterKind::NTriples(NTriplesSerializer::new().serialize_to_write(writer))
}
GraphFormat::Turtle => {
TripleWriterKind::Turtle(TurtleSerializer::new().serialize_to_write(writer))
}
GraphFormat::RdfXml => TripleWriterKind::RdfXml(RdfXmlFormatter::new(writer)?),
},
})
@ -79,71 +88,73 @@ pub struct TripleWriter<W: Write> {
}
enum TripleWriterKind<W: Write> {
NTriples(W),
NTriples(ToWriteNTriplesWriter<W>),
Turtle(ToWriteTurtleWriter<W>),
RdfXml(RdfXmlFormatter<W>),
}
impl<W: Write> TripleWriter<W> {
/// Writes a triple
pub fn write<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> io::Result<()> {
let triple = triple.into();
match &mut self.formatter {
TripleWriterKind::NTriples(writer) => {
writeln!(writer, "{triple} .")?;
}
TripleWriterKind::RdfXml(formatter) => formatter.format(&rio::Triple {
subject: match triple.subject {
SubjectRef::NamedNode(node) => rio::NamedNode { iri: node.as_str() }.into(),
SubjectRef::BlankNode(node) => rio::BlankNode { id: node.as_str() }.into(),
SubjectRef::Triple(_) => {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"RDF/XML does not support RDF-star yet",
))
}
},
predicate: rio::NamedNode {
iri: triple.predicate.as_str(),
},
object: match triple.object {
TermRef::NamedNode(node) => rio::NamedNode { iri: node.as_str() }.into(),
TermRef::BlankNode(node) => rio::BlankNode { id: node.as_str() }.into(),
TermRef::Literal(literal) => if literal.is_plain() {
if let Some(language) = literal.language() {
rio::Literal::LanguageTaggedString {
value: literal.value(),
language,
TripleWriterKind::NTriples(writer) => writer.write_triple(triple),
TripleWriterKind::Turtle(writer) => writer.write_triple(triple),
TripleWriterKind::RdfXml(formatter) => {
let triple = triple.into();
formatter.format(&rio::Triple {
subject: match triple.subject {
SubjectRef::NamedNode(node) => rio::NamedNode { iri: node.as_str() }.into(),
SubjectRef::BlankNode(node) => rio::BlankNode { id: node.as_str() }.into(),
SubjectRef::Triple(_) => {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"RDF/XML does not support RDF-star yet",
))
}
},
predicate: rio::NamedNode {
iri: triple.predicate.as_str(),
},
object: match triple.object {
TermRef::NamedNode(node) => rio::NamedNode { iri: node.as_str() }.into(),
TermRef::BlankNode(node) => rio::BlankNode { id: node.as_str() }.into(),
TermRef::Literal(literal) => if literal.is_plain() {
if let Some(language) = literal.language() {
rio::Literal::LanguageTaggedString {
value: literal.value(),
language,
}
} else {
rio::Literal::Simple {
value: literal.value(),
}
}
} else {
rio::Literal::Simple {
rio::Literal::Typed {
value: literal.value(),
datatype: rio::NamedNode {
iri: literal.datatype().as_str(),
},
}
}
} else {
rio::Literal::Typed {
value: literal.value(),
datatype: rio::NamedNode {
iri: literal.datatype().as_str(),
},
.into(),
TermRef::Triple(_) => {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"RDF/XML does not support RDF-star yet",
))
}
}
.into(),
TermRef::Triple(_) => {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"RDF/XML does not support RDF-star yet",
))
}
},
})?,
},
})
}
}
Ok(())
}
/// Writes the last bytes of the file
pub fn finish(self) -> io::Result<()> {
match self.formatter {
TripleWriterKind::NTriples(mut writer) => writer.flush(),
TripleWriterKind::NTriples(writer) => writer.finish().flush(),
TripleWriterKind::Turtle(writer) => writer.finish()?.flush(),
TripleWriterKind::RdfXml(formatter) => formatter.finish()?.flush(), //TODO: remove flush when the next version of Rio is going to be released
}
}
@ -160,7 +171,7 @@ impl<W: Write> TripleWriter<W> {
/// use oxigraph::model::*;
///
/// let mut buffer = Vec::new();
/// let mut writer = DatasetSerializer::from_format(DatasetFormat::NQuads).quad_writer(&mut buffer)?;
/// let mut writer = DatasetSerializer::from_format(DatasetFormat::NQuads).quad_writer(&mut buffer);
/// writer.write(&Quad {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
@ -184,14 +195,17 @@ impl DatasetSerializer {
}
/// Returns a [`QuadWriter`] allowing writing triples into the given [`Write`] implementation
#[allow(clippy::unnecessary_wraps)]
pub fn quad_writer<W: Write>(&self, writer: W) -> io::Result<QuadWriter<W>> {
Ok(QuadWriter {
pub fn quad_writer<W: Write>(&self, writer: W) -> QuadWriter<W> {
QuadWriter {
formatter: match self.format {
DatasetFormat::NQuads => QuadWriterKind::NQuads(writer),
DatasetFormat::TriG => QuadWriterKind::TriG(writer),
DatasetFormat::NQuads => {
QuadWriterKind::NQuads(NQuadsSerializer::new().serialize_to_write(writer))
}
DatasetFormat::TriG => {
QuadWriterKind::TriG(TriGSerializer::new().serialize_to_write(writer))
}
},
})
}
}
}
@ -205,7 +219,7 @@ impl DatasetSerializer {
/// use oxigraph::model::*;
///
/// let mut buffer = Vec::new();
/// let mut writer = DatasetSerializer::from_format(DatasetFormat::NQuads).quad_writer(&mut buffer)?;
/// let mut writer = DatasetSerializer::from_format(DatasetFormat::NQuads).quad_writer(&mut buffer);
/// writer.write(&Quad {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
@ -223,39 +237,24 @@ pub struct QuadWriter<W: Write> {
}
enum QuadWriterKind<W: Write> {
NQuads(W),
TriG(W),
NQuads(ToWriteNQuadsWriter<W>),
TriG(ToWriteTriGWriter<W>),
}
impl<W: Write> QuadWriter<W> {
/// Writes a quad
pub fn write<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> io::Result<()> {
let quad = quad.into();
match &mut self.formatter {
QuadWriterKind::NQuads(writer) => {
writeln!(writer, "{quad} .")?;
}
QuadWriterKind::TriG(writer) => {
if quad.graph_name.is_default_graph() {
writeln!(writer, "{} .", TripleRef::from(quad))
} else {
writeln!(
writer,
"{} {{ {} }}",
quad.graph_name,
TripleRef::from(quad)
)
}?;
}
QuadWriterKind::NQuads(writer) => writer.write_quad(quad),
QuadWriterKind::TriG(writer) => writer.write_quad(quad),
}
Ok(())
}
/// Writes the last bytes of the file
#[allow(clippy::unused_self, clippy::unnecessary_wraps)]
pub fn finish(self) -> io::Result<()> {
match self.formatter {
QuadWriterKind::NQuads(mut writer) | QuadWriterKind::TriG(mut writer) => writer.flush(),
QuadWriterKind::NQuads(writer) => writer.finish().flush(),
QuadWriterKind::TriG(writer) => writer.finish()?.flush(),
}
}
}

@ -183,7 +183,7 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
.with_base_iri(base_iri.as_str())
.map_err(|e| ParseError::invalid_base_iri(base_iri, e))?;
}
for t in parser.read_triples(BufReader::new(body))? {
for t in parser.read_triples(BufReader::new(body)) {
self.transaction
.insert(t?.as_ref().in_graph(to_graph_name))?;
}

@ -480,9 +480,7 @@ impl Store {
.with_base_iri(base_iri)
.map_err(|e| ParseError::invalid_base_iri(base_iri, e))?;
}
let quads = parser
.read_triples(reader)?
.collect::<Result<Vec<_>, _>>()?;
let quads = parser.read_triples(reader).collect::<Result<Vec<_>, _>>()?;
let to_graph_name = to_graph_name.into();
self.storage.transaction(move |mut t| {
for quad in &quads {
@ -525,7 +523,7 @@ impl Store {
.with_base_iri(base_iri)
.map_err(|e| ParseError::invalid_base_iri(base_iri, e))?;
}
let quads = parser.read_quads(reader)?.collect::<Result<Vec<_>, _>>()?;
let quads = parser.read_quads(reader).collect::<Result<Vec<_>, _>>()?;
self.storage.transaction(move |mut t| {
for quad in &quads {
t.insert(quad.into())?;
@ -647,7 +645,7 @@ impl Store {
writer: impl Write,
format: DatasetFormat,
) -> Result<(), SerializerError> {
let mut writer = DatasetSerializer::from_format(format).quad_writer(writer)?;
let mut writer = DatasetSerializer::from_format(format).quad_writer(writer);
for quad in self.iter() {
writer.write(&quad?)?;
}
@ -1091,7 +1089,7 @@ impl<'a> Transaction<'a> {
.map_err(|e| ParseError::invalid_base_iri(base_iri, e))?;
}
let to_graph_name = to_graph_name.into();
for triple in parser.read_triples(reader)? {
for triple in parser.read_triples(reader) {
self.writer
.insert(triple?.as_ref().in_graph(to_graph_name))?;
}
@ -1131,7 +1129,7 @@ impl<'a> Transaction<'a> {
.with_base_iri(base_iri)
.map_err(|e| ParseError::invalid_base_iri(base_iri, e))?;
}
for quad in parser.read_quads(reader)? {
for quad in parser.read_quads(reader) {
self.writer.insert(quad?.as_ref())?;
}
Ok(())
@ -1470,7 +1468,7 @@ impl BulkLoader {
.with_base_iri(base_iri)
.map_err(|e| ParseError::invalid_base_iri(base_iri, e))?;
}
self.load_ok_quads(parser.read_quads(reader)?.filter_map(|r| match r {
self.load_ok_quads(parser.read_quads(reader).filter_map(|r| match r {
Ok(q) => Some(Ok(q)),
Err(e) => {
if let Some(callback) = &self.on_parse_error {
@ -1527,7 +1525,7 @@ impl BulkLoader {
.map_err(|e| ParseError::invalid_base_iri(base_iri, e))?;
}
let to_graph_name = to_graph_name.into();
self.load_ok_quads(parser.read_triples(reader)?.filter_map(|r| match r {
self.load_ok_quads(parser.read_triples(reader).filter_map(|r| match r {
Ok(q) => Some(Ok(q.in_graph(to_graph_name.into_owned()))),
Err(e) => {
if let Some(callback) = &self.on_parse_error {

@ -68,7 +68,7 @@ pub fn parse(
.map_err(|e| PyValueError::new_err(e.to_string()))?;
}
Ok(PyTripleReader {
inner: py.allow_threads(|| parser.read_triples(input).map_err(map_parse_error))?,
inner: parser.read_triples(input),
}
.into_py(py))
} else if let Some(dataset_format) = DatasetFormat::from_media_type(mime_type) {
@ -79,7 +79,7 @@ pub fn parse(
.map_err(|e| PyValueError::new_err(e.to_string()))?;
}
Ok(PyQuadReader {
inner: py.allow_threads(|| parser.read_quads(input).map_err(map_parse_error))?,
inner: parser.read_quads(input),
}
.into_py(py))
} else {
@ -136,9 +136,7 @@ pub fn serialize(input: &PyAny, output: PyObject, mime_type: &str, py: Python<'_
writer.finish().map_err(map_io_err)?;
Ok(())
} else if let Some(dataset_format) = DatasetFormat::from_media_type(mime_type) {
let mut writer = DatasetSerializer::from_format(dataset_format)
.quad_writer(output)
.map_err(map_io_err)?;
let mut writer = DatasetSerializer::from_format(dataset_format).quad_writer(output);
for i in input.iter()? {
writer
.write(&*i?.extract::<PyRef<PyQuad>>()?)

@ -110,5 +110,5 @@ class TestSerialize(unittest.TestCase):
serialize([EXAMPLE_QUAD], output, "application/trig")
self.assertEqual(
output.getvalue(),
b'<http://example.com/g> { <http://example.com/foo> <http://example.com/p> "1" }\n',
b'<http://example.com/g> {\n\t<http://example.com/foo> <http://example.com/p> "1" .\n}\n',
)

@ -305,7 +305,8 @@ class TestStore(unittest.TestCase):
store.dump(output, "application/trig")
self.assertEqual(
output.getvalue(),
b"<http://foo> <http://bar> <http://baz> .\n<http://graph> { <http://foo> <http://bar> <http://baz> }\n",
b"<http://foo> <http://bar> <http://baz> .\n"
b"<http://graph> {\n\t<http://foo> <http://bar> <http://baz> .\n}\n",
)
def test_dump_file(self) -> None:

@ -946,7 +946,7 @@ fn handle_request(
ReadForWrite::build_response(
move |w| {
Ok((
DatasetSerializer::from_format(format).quad_writer(w)?,
DatasetSerializer::from_format(format).quad_writer(w),
store.iter(),
))
},

@ -16,6 +16,16 @@ anyhow = "1"
clap = { version = "4", features = ["derive"] }
time = { version = "0.3", features = ["formatting"] }
oxigraph = { path = "../lib" }
oxttl = { path= "../lib/oxttl" }
sparopt = { path = "../lib/sparopt" }
spargebra = { path = "../lib/spargebra" }
text-diff = "0.4"
rio_api = "0.8"
rio_turtle = "0.8"
[dev-dependencies]
criterion = "0.5"
[[bench]]
name = "parser"
harness = false

@ -0,0 +1 @@
Subproject commit 5fa35bf602669a467cfd0ab24cc732fe49f2b927

@ -0,0 +1,194 @@
use anyhow::Result;
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use oxigraph_testsuite::files::read_file;
use oxigraph_testsuite::manifest::TestManifest;
use rio_api::parser::*;
use rio_turtle::*;
use std::io::Read;
fn test_data_from_testsuite(manifest_uri: String, include_tests_types: &[&str]) -> Result<Vec<u8>> {
let manifest = TestManifest::new([manifest_uri]);
let mut data = Vec::default();
for test in manifest {
let test = test?;
if include_tests_types.contains(&test.kind.as_str()) {
read_file(&test.action.unwrap())?.read_to_end(&mut data)?;
data.push(b'\n');
}
}
Ok(data)
}
fn ntriples_test_data() -> Result<Vec<u8>> {
test_data_from_testsuite(
"http://w3c.github.io/rdf-tests/ntriples/manifest.ttl".to_owned(),
&["http://www.w3.org/ns/rdftest#TestNTriplesPositiveSyntax"],
)
}
fn turtle_test_data() -> Result<Vec<u8>> {
test_data_from_testsuite(
"http://w3c.github.io/rdf-tests/turtle/manifest.ttl".to_owned(),
&[
"http://www.w3.org/ns/rdftest#TestTurtlePositiveSyntax",
"http://www.w3.org/ns/rdftest#TestTurtleEval",
],
)
}
fn parse_bench(
c: &mut Criterion,
parser_name: &str,
data_name: &str,
data: Vec<u8>,
bench: impl Fn(&[u8]),
) {
let mut group = c.benchmark_group(parser_name);
group.throughput(Throughput::Bytes(data.len() as u64));
group.bench_with_input(BenchmarkId::from_parameter(data_name), &data, |b, data| {
b.iter(|| bench(data))
});
group.finish();
}
fn parse_oxttl_ntriples(c: &mut Criterion, name: &str, data: Vec<u8>) {
parse_bench(c, "oxttl ntriples", name, data, |data| {
let mut parser = oxttl::NTriplesParser::new().parse();
parser.extend_from_slice(data);
parser.end();
while let Some(result) = parser.read_next() {
result.unwrap();
}
});
}
fn parse_oxttl_turtle(c: &mut Criterion, name: &str, data: Vec<u8>) {
parse_bench(c, "oxttl turtle", name, data, |data| {
let mut parser = oxttl::TurtleParser::new().parse();
parser.extend_from_slice(data);
parser.end();
while let Some(result) = parser.read_next() {
result.unwrap();
}
});
}
fn parse_rio_ntriples(c: &mut Criterion, name: &str, data: Vec<u8>) {
parse_bench(c, "rio ntriples", name, data, |data| {
let mut count: u64 = 0;
NTriplesParser::new(data)
.parse_all(&mut |_| {
count += 1;
Ok(()) as Result<(), TurtleError>
})
.unwrap();
});
}
fn parse_rio_turtle(c: &mut Criterion, name: &str, data: Vec<u8>) {
parse_bench(c, "rio turtle", name, data, |data| {
let mut count: u64 = 0;
TurtleParser::new(data, None)
.parse_all(&mut |_| {
count += 1;
Ok(()) as Result<(), TurtleError>
})
.unwrap();
});
}
fn bench_parse_oxttl_ntriples_with_ntriples(c: &mut Criterion) {
parse_oxttl_ntriples(
c,
"ntriples",
match ntriples_test_data() {
Ok(d) => d,
Err(e) => {
eprintln!("{e}");
return;
}
},
)
}
fn bench_parse_oxttl_ntriples_with_turtle(c: &mut Criterion) {
parse_oxttl_turtle(
c,
"ntriples",
match ntriples_test_data() {
Ok(d) => d,
Err(e) => {
eprintln!("{e}");
return;
}
},
)
}
fn bench_parse_oxttl_turtle_with_turtle(c: &mut Criterion) {
parse_oxttl_turtle(
c,
"turtle",
match turtle_test_data() {
Ok(d) => d,
Err(e) => {
eprintln!("{e}");
return;
}
},
)
}
fn bench_parse_rio_ntriples_with_ntriples(c: &mut Criterion) {
parse_rio_ntriples(
c,
"ntriples",
match ntriples_test_data() {
Ok(d) => d,
Err(e) => {
eprintln!("{e}");
return;
}
},
)
}
fn bench_parse_rio_ntriples_with_turtle(c: &mut Criterion) {
parse_rio_turtle(
c,
"ntriples",
match ntriples_test_data() {
Ok(d) => d,
Err(e) => {
eprintln!("{e}");
return;
}
},
)
}
fn bench_parse_rio_turtle_with_turtle(c: &mut Criterion) {
parse_rio_turtle(
c,
"turtle",
match turtle_test_data() {
Ok(d) => d,
Err(e) => {
eprintln!("{e}");
return;
}
},
)
}
criterion_group!(
w3c_testsuite,
bench_parse_rio_ntriples_with_ntriples,
bench_parse_rio_ntriples_with_turtle,
bench_parse_rio_turtle_with_turtle,
bench_parse_oxttl_ntriples_with_ntriples,
bench_parse_oxttl_ntriples_with_turtle,
bench_parse_oxttl_turtle_with_turtle
);
criterion_main!(w3c_testsuite);

@ -0,0 +1,2 @@
_:` <http://example.com/pb> <http://example.com/o> .
<http://example.com/s> <http://example.com/p> <http://example.com/o> .

@ -0,0 +1,2 @@
<http:// /s> <http://example.com/p> <http://example.com/o> .
<http://example.com/s> <http://example.com/p> <http://example.com/o> .

@ -0,0 +1,2 @@
<http://example.com/s> <http://example.com/p> "\a" .
<http://example.com/s> <http://example.com/p> <http://example.com/o> .

@ -0,0 +1,2 @@
<http://example.com/s> <http://example.com/p> <http://example.com/o> .
<http://example.com/s> <http://example.com/p> <http://example.com/o2> .

@ -0,0 +1,2 @@
<http://example.com/s> <http://example.com/p> "o" .
<http://example.com/s> <http://example.com/p> <http://example.com/o> .

@ -0,0 +1 @@
<http://example.com/s> <http://example.com/p> <http://example.com/o> .

@ -0,0 +1 @@
<http://example.com/s> <http://example.com/p> "o" .

@ -0,0 +1,129 @@
@prefix mf: <http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix ox: <https://github.com/oxigraph/oxigraph/tests#> .
<>
rdf:type mf:Manifest ;
rdfs:comment "Oxigraph parser recovery test cases" ;
mf:entries (
<#invalid_iri_nt>
<#invalid_iri_ttl>
<#invalid_iri_n3>
<#invalid_bnode_nt>
<#invalid_bnode_ttl>
<#invalid_bnode_n3>
<#invalid_string_nt>
<#invalid_string_ttl>
<#invalid_string_n3>
<#missing_dot_at_end_of_triple_with_iri_middle_nt>
<#missing_dot_at_end_of_triple_with_iri_middle_ttl>
<#missing_dot_at_end_of_triple_with_iri_end_nt>
<#missing_dot_at_end_of_triple_with_iri_end_ttl>
<#missing_dot_at_end_of_triple_with_string_middle_nt>
<#missing_dot_at_end_of_triple_with_string_middle_ttl>
<#missing_dot_at_end_of_triple_with_string_end_nt>
<#missing_dot_at_end_of_triple_with_string_end_ttl>
) .
<#invalid_iri_nt>
rdf:type ox:TestNTripleRecovery ;
mf:name "IRI with space" ;
mf:action <invalid_iri.nt> ;
mf:result <iri_spo.nt> .
<#invalid_iri_ttl>
rdf:type ox:TestTurtleRecovery ;
mf:name "IRI with space" ;
mf:action <invalid_iri.nt> ;
mf:result <iri_spo.nt> .
<#invalid_iri_n3>
rdf:type ox:TestN3Recovery ;
mf:name "IRI with space" ;
mf:action <invalid_iri.nt> ;
mf:result <iri_spo.nt> .
<#invalid_bnode_nt>
rdf:type ox:TestNTripleRecovery ;
mf:name "bad character in blank node" ;
mf:action <invalid_bnode.nt> ;
mf:result <iri_spo.nt> .
<#invalid_bnode_ttl>
rdf:type ox:TestTurtleRecovery ;
mf:name "bad character in blank node" ;
mf:action <invalid_bnode.nt> ;
mf:result <iri_spo.nt> .
<#invalid_bnode_n3>
rdf:type ox:TestN3Recovery ;
mf:name "bad character in blank node" ;
mf:action <invalid_bnode.nt> ;
mf:result <iri_spo.nt> .
<#invalid_string_nt>
rdf:type ox:TestNTripleRecovery ;
mf:name "invalid escape sequence in string" ;
mf:action <invalid_string.nt> ;
mf:result <iri_spo.nt> .
<#invalid_string_ttl>
rdf:type ox:TestTurtleRecovery ;
mf:name "invalid escape sequence in string" ;
mf:action <invalid_string.nt> ;
mf:result <iri_spo.nt> .
<#invalid_string_n3>
rdf:type ox:TestN3Recovery ;
mf:name "invalid escape sequence in string" ;
mf:action <invalid_string.nt> ;
mf:result <iri_spo.nt> .
<#missing_dot_at_end_of_triple_with_iri_middle_nt>
rdf:type ox:TestNTripleRecovery ;
mf:name "missing dot at the end of a triple" ;
mf:action <missing_dot_at_end_of_triple_with_iri_middle.nt> ;
mf:result <iri2_spo.nt> .
<#missing_dot_at_end_of_triple_with_iri_middle_ttl>
rdf:type ox:TestTurtleRecovery ;
mf:name "missing dot at the end of a triple" ;
mf:action <missing_dot_at_end_of_triple_with_iri_middle.nt> ;
mf:result <iri2_spo.nt> .
<#missing_dot_at_end_of_triple_with_iri_end_nt>
rdf:type ox:TestNTripleRecovery ;
mf:name "missing dot at the end of a triple" ;
mf:action <missing_dot_at_end_of_triple_with_iri_end.nt> ;
mf:result <iri_spo.nt> .
<#missing_dot_at_end_of_triple_with_iri_end_ttl>
rdf:type ox:TestTurtleRecovery ;
mf:name "missing dot at the end of a triple" ;
mf:action <missing_dot_at_end_of_triple_with_iri_end.nt> ;
mf:result <iri_spo.nt> .
<#missing_dot_at_end_of_triple_with_string_middle_nt>
rdf:type ox:TestNTripleRecovery ;
mf:name "missing dot at the end of a triple" ;
mf:action <missing_dot_at_end_of_triple_with_string_middle.nt> ;
mf:result <iri2_string_spo.nt> .
<#missing_dot_at_end_of_triple_with_string_middle_ttl>
rdf:type ox:TestTurtleRecovery ;
mf:name "missing dot at the end of a triple" ;
mf:action <missing_dot_at_end_of_triple_with_string_middle.nt> ;
mf:result <iri2_string_spo.nt> .
<#missing_dot_at_end_of_triple_with_string_end_nt>
rdf:type ox:TestNTripleRecovery ;
mf:name "missing dot at the end of a triple" ;
mf:action <missing_dot_at_end_of_triple_with_string_end.nt> ;
mf:result <iri_string_spo.nt> .
<#missing_dot_at_end_of_triple_with_string_end_ttl>
rdf:type ox:TestTurtleRecovery ;
mf:name "missing dot at the end of a triple" ;
mf:action <missing_dot_at_end_of_triple_with_string_end.nt> ;
mf:result <iri_string_spo.nt> .

@ -0,0 +1 @@
<http://example.com/s> <http://example.com/p> <http://example.com/o>

@ -0,0 +1,2 @@
<http://example.com/s> <http://example.com/p> <http://example.com/o>
<http://example.com/s> <http://example.com/p> <http://example.com/o2> .

@ -0,0 +1,2 @@
<http://example.com/s> <http://example.com/p> "o"
<http://example.com/s> <http://example.com/p> <http://example.com/o> .

@ -0,0 +1,2 @@
<http://example.com/s> <http://example.com/p> "foo"@base .
<http://example.com/s> <http://example.com/p> "bar"@prefix .

@ -0,0 +1,3 @@
@prefix : <http://example.com/> .
:s :p "foo"@base , "bar"@prefix .

@ -0,0 +1 @@
<http://foo> <http://foo> "foo"@badlanguagetag .

@ -0,0 +1,2 @@
((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((

@ -0,0 +1,2 @@
<urn:zamaudio:ZaMultiComp#preset001> <http://lv2plug.in/ns/ext/state#state> _:1 .

@ -0,0 +1,6 @@
@prefix state: <http://lv2plug.in/ns/ext/state#> .
<urn:zamaudio:ZaMultiComp#preset001>
state:state [
] .

@ -0,0 +1,3 @@
<http://example.com/prefix/s> <http://example.com/prefix/p> <http://example.com/true/o> .
<http://example.com/base/s> <http://example.com/base/p> <http://example.com/false/o> .
<http://example.com/graph/s> <http://example.com/graph/p> <http://example.com/graph/o> <http://example.com/graph/g> .

@ -0,0 +1,2 @@
<http://example.com/prefix/s> <http://example.com/prefix/p> <http://example.com/true/o> .
<http://example.com/base/s> <http://example.com/base/p> <http://example.com/false/o> .

@ -0,0 +1,10 @@
base <http://example.com/>
prefix prefix: <prefix/>
prefix base: <base/>
prefix graph: <graph/>
prefix true: <true/>
prefix false: <false/>
prefix:s prefix:p true:o .
base:s base:p false:o .
graph:g { graph:s graph:p graph:o . }

@ -0,0 +1,8 @@
base <http://example.com/>
prefix prefix: <prefix/>
prefix base: <base/>
prefix true: <true/>
prefix false: <false/>
prefix:s prefix:p true:o .
base:s base:p false:o .

@ -0,0 +1 @@
<http://foo> <http://foo> "foo"@en-us .

@ -0,0 +1,4 @@
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:foo="http://foo">
<rdf:Description rdf:about="http://foo" xml:lang="en-US" foo:="foo">
</rdf:Description>
</rdf:RDF>

@ -0,0 +1 @@
<http://foo> <http://foo> "foo"@en-US-US .

@ -0,0 +1 @@
<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#value> " bar\n" .

@ -0,0 +1,7 @@
<?xml version="1.0"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<rdf:Description rdf:about="http://example.com/foo">
<rdf:value> bar
</rdf:value>
</rdf:Description>
</rdf:RDF>

@ -0,0 +1,90 @@
@prefix mf: <http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix rdft: <http://www.w3.org/ns/rdftest#> .
<>
rdf:type mf:Manifest ;
rdfs:comment "Oxigraph parsers test case" ;
mf:entries (
<#blank_node_with_linebreak>
<#bad_lang>
<#language_normalization_ttl>
<#language_normalization_xml>
<#xml_entities>
<#xml_nested_entities>
<#literal_value_space>
<#bad_parentheses>
<#keyword_vs_prefix_ttl>
<#keyword_vs_prefix_trig>
<#at_keywords_as_lang_tag>
) .
<#no_end_line_jump>
rdf:type rdft:TestNTriplesPositiveSyntax ;
mf:name "No line jump at the end of the file" ;
mf:action <no_end_line_jump.nt> .
<#blank_node_with_linebreak>
rdf:type rdft:TestTurtleEval ;
mf:name "blank node with linebreak" ;
mf:action <blank_node_with_linebreak.ttl> ;
mf:result <blank_node_with_linebreak.nt> .
<#language_normalization_ttl>
rdf:type rdft:TestTurtleEval ;
mf:name "language case normalization" ;
mf:action <language_normalization.ttl> ;
mf:result <language_normalization.nt> .
<#language_normalization_xml>
rdf:type rdft:TestXMLEval ;
mf:name "language case normalization" ;
mf:action <language_normalization.rdf> ;
mf:result <language_normalization.nt> .
<#bad_lang>
rdf:type rdft:TestTurtleNegativeSyntax ;
mf:name "bad language tag" ;
mf:action <bad_lang.ttl> .
<#xml_entities>
rdf:type rdft:TestXMLEval ;
mf:name "custom XML entities" ;
mf:action <xml_entities.rdf> ;
mf:result <xml_entities.nt> .
<#xml_nested_entities>
rdf:type rdft:TestXMLEval ;
mf:name "custom XML entities with nested definitions" ;
mf:action <xml_nested_entities.rdf> ;
mf:result <xml_nested_entities.nt> .
<#literal_value_space>
rdf:type rdft:TestXMLEval ;
mf:name "spaces in literal values" ;
mf:action <literal_value_space.rdf> ;
mf:result <literal_value_space.nt> .
<#bad_parentheses>
rdf:type rdft:TestTurtleNegativeSyntax ;
mf:name "a lot of parentheses that might generate a stack overflow" ;
mf:action <bad_parentheses.ttl> .
<#keyword_vs_prefix_ttl>
rdf:type rdft:TestTurtleEval ;
mf:name "usage of keywords as prefix" ;
mf:action <keyword_vs_prefix.ttl> ;
mf:result <keyword_vs_prefix.nt> .
<#keyword_vs_prefix_trig>
rdf:type rdft:TestTrigEval ;
mf:name "usage of keywords as prefix" ;
mf:action <keyword_vs_prefix.trig> ;
mf:result <keyword_vs_prefix.nq> .
<#at_keywords_as_lang_tag>
rdf:type rdft:TestTurtleEval ;
mf:name "usage of at keywords as language tags" ;
mf:action <at_keywords_as_lang_tag.ttl> ;
mf:result <at_keywords_as_lang_tag.nt> .

@ -0,0 +1 @@
<http://example.com> <http://example.com> <http://example.com> .

@ -0,0 +1 @@
<http://example.com/foo> <http://example.com/2/test> "bar"^^<http://www.w3.org/2001/XMLSchema#string> .

@ -0,0 +1,10 @@
<?xml version="1.0"?>
<!DOCTYPE rdf:RDF [
<!ENTITY xsd "http://www.w3.org/2001/XMLSchema#" >
<!ENTITY ex "http://example.com/">
]>
<rdf:RDF xmlns:ex2="&ex;2/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<rdf:Description rdf:about="&ex;foo">
<ex2:test rdf:datatype="&xsd;string">bar</ex2:test>
</rdf:Description>
</rdf:RDF>

@ -0,0 +1 @@
<http://example.com/foo> <http://example.com/2/test> "bar"^^<http://www.w3.org/2001/XMLSchema#string> .

@ -0,0 +1,15 @@
<?xml version="1.0"?>
<!DOCTYPE rdf:RDF [
<!ENTITY ex "http://example.com/">
<!ENTITY w3 "http://www.w3.org">
<!ENTITY rdf "&w3;/1999/02/22-rdf-syntax-ns#">
<!ENTITY rdfs "&w3;/2000/01/rdf-schema#">
<!ENTITY xsd "&w3;/2001/XMLSchema#">
]>
<rdf:RDF xmlns:ex2="&ex;2/" xmlns:rdf="&rdf;">
<rdf:Description rdf:about="&ex;foo">
<ex2:test rdf:datatype="&xsd;string">bar</ex2:test>
</rdf:Description>
</rdf:RDF>

@ -0,0 +1,13 @@
Copyright 2011-2022 David Robillard <d@drobilla.net>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.

@ -0,0 +1 @@
Testsuite from [Serd](https://drobilla.net/software/serd) project.

@ -0,0 +1,2 @@
# prefix name must end in a :
@prefix a <#> .

@ -0,0 +1,3 @@
# Forbidden by RDF - predicate cannot be blank
@prefix : <http://example.org/base#> .
:a [ :b :c ] :d .

@ -0,0 +1,3 @@
# Forbidden by RDF - predicate cannot be blank
@prefix : <http://example.org/base#> .
:a [] :b .

@ -0,0 +1,3 @@
# 'a' only allowed as a predicate
@prefix : <http://example.org/base#> .
a :a :b .

@ -0,0 +1,3 @@
# No comma is allowed in collections
@prefix : <http://example.org/stuff/1.0/> .
:a :b ( "apple", "banana" ) .

@ -0,0 +1,4 @@
# N3 {}s are not in Turtle
@prefix : <http://example.org/stuff/1.0/> .
{ :a :b :c . } :d :e .

@ -0,0 +1,3 @@
# is and of are not in turtle
@prefix : <http://example.org/stuff/1.0/> .
:a is :b of :c .

@ -0,0 +1,4 @@
# paths are not in turtle
@prefix : <http://example.org/stuff/1.0/> .
:a.:b.:c .
:a^:b^:c .

@ -0,0 +1,2 @@
@keywords something.
# @keywords is not in turtle

@ -0,0 +1,3 @@
# implies is not in turtle
@prefix : <http://example.org/stuff/1.0/> .
:a => :b .

@ -0,0 +1,3 @@
# equivalence is not in turtle
@prefix : <http://example.org/stuff/1.0/> .
:a = :b .

@ -0,0 +1,3 @@
# @forAll is not in turtle
@prefix : <http://example.org/stuff/1.0/> .
@forAll :x .

@ -0,0 +1,3 @@
# @forSome is not in turtle
@prefix : <http://example.org/stuff/1.0/> .
@forSome :x .

@ -0,0 +1,3 @@
# <= is not in turtle
@prefix : <http://example.org/stuff/1.0/> .
:a <= :b .

@ -0,0 +1,6 @@
# Test long literals with missing end
@prefix : <http://example.org/ex#> .
:a :b """a long
literal
with
newlines

@ -0,0 +1 @@
@base "I'm quite certain this is not a URI" .

@ -0,0 +1 @@
<http://example.org/s> <http://example.org/p> _|invalid .

@ -0,0 +1,3 @@
@prefix eg: <http://example.org/> .
_:.bad a eg:Thing .

@ -0,0 +1,3 @@
# This file starts with the first two bytes of the UTF-8 Byte Order Mark
<http://example.org/thing> a <http://example.org/Thing> .

@ -0,0 +1,3 @@
@prefix eg: <http://example.org/> .
eg:bad <http://example.org/p> <http://example.org/o> .

@ -0,0 +1 @@
bad:s <http://example.org/p> <http://example.org/o> .

@ -0,0 +1 @@
<˙˙˙://a.example/s> <http://a.eoampl†/p> "\u0006!#[]\u007F" .

@ -0,0 +1 @@
<http://example.org/s> <http://example.org/p> "value"^<http://example.org/t> .

@ -0,0 +1 @@
<> <http://example.org/pred> "hello"^^"not-a-uri" .

@ -0,0 +1 @@
<http://example.org/s> . <http://example.org/p> <http://example.org/o> .

@ -0,0 +1,3 @@
@prefix eg: <http://example.org/> .
<> eg:comment ""

@ -0,0 +1,3 @@
@prefix eg: <http://example.org/> .
<> eg:comment "

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save