- Compatible with async IO - Turtle/TriG parser recovery on simple errorspull/555/head
parent
a1cbfdf67d
commit
71b1768d28
@ -0,0 +1,28 @@ |
|||||||
|
#![no_main] |
||||||
|
|
||||||
|
use libfuzzer_sys::fuzz_target; |
||||||
|
use oxttl::N3Parser; |
||||||
|
|
||||||
|
fuzz_target!(|data: &[u8]| { |
||||||
|
let mut quads = Vec::new(); |
||||||
|
let mut parser = N3Parser::new() |
||||||
|
.with_base_iri("http://example.com/") |
||||||
|
.unwrap() |
||||||
|
.parse(); |
||||||
|
for chunk in data.split(|c| *c == 0xFF) { |
||||||
|
parser.extend_from_slice(chunk); |
||||||
|
while let Some(result) = parser.read_next() { |
||||||
|
if let Ok(quad) = result { |
||||||
|
quads.push(quad); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
parser.end(); |
||||||
|
while let Some(result) = parser.read_next() { |
||||||
|
if let Ok(quad) = result { |
||||||
|
quads.push(quad); |
||||||
|
} |
||||||
|
} |
||||||
|
assert!(parser.is_end()); |
||||||
|
//TODO: serialize
|
||||||
|
}); |
@ -0,0 +1,49 @@ |
|||||||
|
#![no_main] |
||||||
|
|
||||||
|
use libfuzzer_sys::fuzz_target; |
||||||
|
use oxttl::{NQuadsParser, NQuadsSerializer}; |
||||||
|
|
||||||
|
fuzz_target!(|data: &[u8]| { |
||||||
|
// We parse
|
||||||
|
let mut quads = Vec::new(); |
||||||
|
let mut parser = NQuadsParser::new().with_quoted_triples().parse(); |
||||||
|
for chunk in data.split(|c| *c == 0xFF) { |
||||||
|
parser.extend_from_slice(chunk); |
||||||
|
while let Some(result) = parser.read_next() { |
||||||
|
if let Ok(quad) = result { |
||||||
|
quads.push(quad); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
parser.end(); |
||||||
|
while let Some(result) = parser.read_next() { |
||||||
|
if let Ok(quad) = result { |
||||||
|
quads.push(quad); |
||||||
|
} |
||||||
|
} |
||||||
|
assert!(parser.is_end()); |
||||||
|
|
||||||
|
// We serialize
|
||||||
|
let mut writer = NQuadsSerializer::new().serialize_to_write(Vec::new()); |
||||||
|
for quad in &quads { |
||||||
|
writer.write_quad(quad).unwrap(); |
||||||
|
} |
||||||
|
let new_serialization = writer.finish(); |
||||||
|
|
||||||
|
// We parse the serialization
|
||||||
|
let new_quads = NQuadsParser::new() |
||||||
|
.with_quoted_triples() |
||||||
|
.parse_from_read(new_serialization.as_slice()) |
||||||
|
.collect::<Result<Vec<_>, _>>() |
||||||
|
.map_err(|e| { |
||||||
|
format!( |
||||||
|
"Error on {:?} from {quads:?} based on {:?}: {e}", |
||||||
|
String::from_utf8_lossy(&new_serialization), |
||||||
|
String::from_utf8_lossy(data) |
||||||
|
) |
||||||
|
}) |
||||||
|
.unwrap(); |
||||||
|
|
||||||
|
// We check the roundtrip has not changed anything
|
||||||
|
assert_eq!(new_quads, quads); |
||||||
|
}); |
@ -0,0 +1,53 @@ |
|||||||
|
#![no_main] |
||||||
|
|
||||||
|
use libfuzzer_sys::fuzz_target; |
||||||
|
use oxttl::{TriGParser, TriGSerializer}; |
||||||
|
|
||||||
|
fuzz_target!(|data: &[u8]| { |
||||||
|
// We parse
|
||||||
|
let mut quads = Vec::new(); |
||||||
|
let mut parser = TriGParser::new() |
||||||
|
.with_quoted_triples() |
||||||
|
.with_base_iri("http://example.com/") |
||||||
|
.unwrap() |
||||||
|
.parse(); |
||||||
|
for chunk in data.split(|c| *c == 0xFF) { |
||||||
|
parser.extend_from_slice(chunk); |
||||||
|
while let Some(result) = parser.read_next() { |
||||||
|
if let Ok(quad) = result { |
||||||
|
quads.push(quad); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
parser.end(); |
||||||
|
while let Some(result) = parser.read_next() { |
||||||
|
if let Ok(quad) = result { |
||||||
|
quads.push(quad); |
||||||
|
} |
||||||
|
} |
||||||
|
assert!(parser.is_end()); |
||||||
|
|
||||||
|
// We serialize
|
||||||
|
let mut writer = TriGSerializer::new().serialize_to_write(Vec::new()); |
||||||
|
for quad in &quads { |
||||||
|
writer.write_quad(quad).unwrap(); |
||||||
|
} |
||||||
|
let new_serialization = writer.finish().unwrap(); |
||||||
|
|
||||||
|
// We parse the serialization
|
||||||
|
let new_quads = TriGParser::new() |
||||||
|
.with_quoted_triples() |
||||||
|
.parse_from_read(new_serialization.as_slice()) |
||||||
|
.collect::<Result<Vec<_>, _>>() |
||||||
|
.map_err(|e| { |
||||||
|
format!( |
||||||
|
"Error on {:?} from {quads:?} based on {:?}: {e}", |
||||||
|
String::from_utf8_lossy(&new_serialization), |
||||||
|
String::from_utf8_lossy(data) |
||||||
|
) |
||||||
|
}) |
||||||
|
.unwrap(); |
||||||
|
|
||||||
|
// We check the roundtrip has not changed anything
|
||||||
|
assert_eq!(new_quads, quads); |
||||||
|
}); |
@ -0,0 +1,27 @@ |
|||||||
|
[package] |
||||||
|
name = "oxttl" |
||||||
|
version = "0.1.0" |
||||||
|
authors = ["Tpt <thomas@pellissier-tanon.fr>"] |
||||||
|
license = "MIT OR Apache-2.0" |
||||||
|
readme = "README.md" |
||||||
|
keywords = ["SPARQL"] |
||||||
|
repository = "https://github.com/oxigraph/oxigraph/tree/master/lib/oxttl" |
||||||
|
homepage = "https://oxigraph.org/" |
||||||
|
description = """ |
||||||
|
N-Triples parser |
||||||
|
""" |
||||||
|
edition = "2021" |
||||||
|
rust-version = "1.65" |
||||||
|
|
||||||
|
[features] |
||||||
|
default = [] |
||||||
|
rdf-star = ["oxrdf/rdf-star"] |
||||||
|
|
||||||
|
[dependencies] |
||||||
|
memchr = "2" |
||||||
|
oxrdf = { version = "0.2.0-alpha.1-dev", path = "../oxrdf" } |
||||||
|
oxiri = "0.2" |
||||||
|
oxilangtag = "0.1" |
||||||
|
|
||||||
|
[package.metadata.docs.rs] |
||||||
|
all-features = true |
@ -0,0 +1,938 @@ |
|||||||
|
use crate::toolkit::{TokenRecognizer, TokenRecognizerError}; |
||||||
|
use memchr::{memchr, memchr2}; |
||||||
|
use oxilangtag::LanguageTag; |
||||||
|
use oxiri::Iri; |
||||||
|
use oxrdf::NamedNode; |
||||||
|
use std::borrow::Cow; |
||||||
|
use std::collections::HashMap; |
||||||
|
use std::ops::{Range, RangeInclusive}; |
||||||
|
use std::str; |
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq)] |
||||||
|
pub enum N3Token<'a> { |
||||||
|
IriRef(Iri<String>), |
||||||
|
PrefixedName { |
||||||
|
prefix: &'a str, |
||||||
|
local: Cow<'a, str>, |
||||||
|
might_be_invalid_iri: bool, |
||||||
|
}, |
||||||
|
Variable(Cow<'a, str>), |
||||||
|
BlankNodeLabel(&'a str), |
||||||
|
String(String), |
||||||
|
Integer(&'a str), |
||||||
|
Decimal(&'a str), |
||||||
|
Double(&'a str), |
||||||
|
LangTag(&'a str), |
||||||
|
Punctuation(&'a str), |
||||||
|
PlainKeyword(&'a str), |
||||||
|
} |
||||||
|
|
||||||
|
#[derive(Eq, PartialEq)] |
||||||
|
pub enum N3LexerMode { |
||||||
|
NTriples, |
||||||
|
Turtle, |
||||||
|
N3, |
||||||
|
} |
||||||
|
|
||||||
|
#[derive(Default)] |
||||||
|
pub struct N3LexerOptions { |
||||||
|
pub base_iri: Option<Iri<String>>, |
||||||
|
} |
||||||
|
|
||||||
|
pub struct N3Lexer { |
||||||
|
mode: N3LexerMode, |
||||||
|
} |
||||||
|
|
||||||
|
// TODO: there are a lot of 'None' (missing data) returned even if the stream is ending!!!
|
||||||
|
// TODO: simplify by not giving is_end and fail with an "unexpected eof" is none is returned when is_end=true?
|
||||||
|
|
||||||
|
impl TokenRecognizer for N3Lexer { |
||||||
|
type Token<'a> = N3Token<'a>; |
||||||
|
type Options = N3LexerOptions; |
||||||
|
|
||||||
|
fn recognize_next_token<'a>( |
||||||
|
&mut self, |
||||||
|
data: &'a [u8], |
||||||
|
is_ending: bool, |
||||||
|
options: &N3LexerOptions, |
||||||
|
) -> Option<(usize, Result<N3Token<'a>, TokenRecognizerError>)> { |
||||||
|
match *data.first()? { |
||||||
|
b'<' => match *data.get(1)? { |
||||||
|
b'<' => Some((2, Ok(N3Token::Punctuation("<<")))), |
||||||
|
b'=' if self.mode == N3LexerMode::N3 => { |
||||||
|
if let Some((consumed, result)) = Self::recognize_iri(data, options) { |
||||||
|
Some(if let Ok(result) = result { |
||||||
|
(consumed, Ok(result)) |
||||||
|
} else { |
||||||
|
(2, Ok(N3Token::Punctuation("<="))) |
||||||
|
}) |
||||||
|
} else if is_ending { |
||||||
|
Some((2, Ok(N3Token::Punctuation("<=")))) |
||||||
|
} else { |
||||||
|
None |
||||||
|
} |
||||||
|
} |
||||||
|
b'-' if self.mode == N3LexerMode::N3 => { |
||||||
|
if let Some((consumed, result)) = Self::recognize_iri(data, options) { |
||||||
|
Some(if let Ok(result) = result { |
||||||
|
(consumed, Ok(result)) |
||||||
|
} else { |
||||||
|
(2, Ok(N3Token::Punctuation("<-"))) |
||||||
|
}) |
||||||
|
} else if is_ending { |
||||||
|
Some((2, Ok(N3Token::Punctuation("<-")))) |
||||||
|
} else { |
||||||
|
None |
||||||
|
} |
||||||
|
} |
||||||
|
_ => Self::recognize_iri(data, options), |
||||||
|
}, |
||||||
|
b'>' => { |
||||||
|
if *data.get(1)? == b'>' { |
||||||
|
Some((2, Ok(N3Token::Punctuation(">>")))) |
||||||
|
} else { |
||||||
|
Some((1, Ok(N3Token::Punctuation(">")))) |
||||||
|
} |
||||||
|
} |
||||||
|
b'_' => match data.get(1)? { |
||||||
|
b':' => Self::recognize_blank_node_label(data), |
||||||
|
c => Some(( |
||||||
|
1, |
||||||
|
Err((0, format!("Unexpected character '{}'", char::from(*c))).into()), |
||||||
|
)), |
||||||
|
}, |
||||||
|
b'"' => { |
||||||
|
if self.mode != N3LexerMode::NTriples |
||||||
|
&& *data.get(1)? == b'"' |
||||||
|
&& *data.get(2)? == b'"' |
||||||
|
{ |
||||||
|
Self::recognize_long_string(data, b'"') |
||||||
|
} else { |
||||||
|
Self::recognize_string(data, b'"') |
||||||
|
} |
||||||
|
} |
||||||
|
b'\'' if self.mode != N3LexerMode::NTriples => { |
||||||
|
if *data.get(1)? == b'\'' && *data.get(2)? == b'\'' { |
||||||
|
Self::recognize_long_string(data, b'\'') |
||||||
|
} else { |
||||||
|
Self::recognize_string(data, b'\'') |
||||||
|
} |
||||||
|
} |
||||||
|
b'@' => Self::recognize_lang_tag(data), |
||||||
|
b'.' => match data.get(1) { |
||||||
|
Some(b'0'..=b'9') => Self::recognize_number(data), |
||||||
|
Some(_) => Some((1, Ok(N3Token::Punctuation(".")))), |
||||||
|
None => is_ending.then_some((1, Ok(N3Token::Punctuation(".")))), |
||||||
|
}, |
||||||
|
b'^' => { |
||||||
|
if *data.get(1)? == b'^' { |
||||||
|
Some((2, Ok(N3Token::Punctuation("^^")))) |
||||||
|
} else { |
||||||
|
Some((1, Ok(N3Token::Punctuation("^")))) |
||||||
|
} |
||||||
|
} |
||||||
|
b'(' => Some((1, Ok(N3Token::Punctuation("(")))), |
||||||
|
b')' => Some((1, Ok(N3Token::Punctuation(")")))), |
||||||
|
b'[' => Some((1, Ok(N3Token::Punctuation("[")))), |
||||||
|
b']' => Some((1, Ok(N3Token::Punctuation("]")))), |
||||||
|
b'{' => { |
||||||
|
if *data.get(1)? == b'|' { |
||||||
|
Some((2, Ok(N3Token::Punctuation("{|")))) |
||||||
|
} else { |
||||||
|
Some((1, Ok(N3Token::Punctuation("{")))) |
||||||
|
} |
||||||
|
} |
||||||
|
b'}' => Some((1, Ok(N3Token::Punctuation("}")))), |
||||||
|
b',' => Some((1, Ok(N3Token::Punctuation(",")))), |
||||||
|
b';' => Some((1, Ok(N3Token::Punctuation(";")))), |
||||||
|
b'!' => Some((1, Ok(N3Token::Punctuation("!")))), |
||||||
|
b'|' => { |
||||||
|
if *data.get(1)? == b'}' { |
||||||
|
Some((2, Ok(N3Token::Punctuation("|}")))) |
||||||
|
} else { |
||||||
|
Some((1, Ok(N3Token::Punctuation("|")))) |
||||||
|
} |
||||||
|
} |
||||||
|
b'=' => { |
||||||
|
if *data.get(1)? == b'>' { |
||||||
|
Some((2, Ok(N3Token::Punctuation("=>")))) |
||||||
|
} else { |
||||||
|
Some((1, Ok(N3Token::Punctuation("=")))) |
||||||
|
} |
||||||
|
} |
||||||
|
b'0'..=b'9' | b'+' | b'-' => Self::recognize_number(data), |
||||||
|
b'?' => Self::recognize_variable(data, is_ending), |
||||||
|
_ => Self::recognize_pname_or_keyword(data, is_ending), |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
impl N3Lexer { |
||||||
|
pub fn new(mode: N3LexerMode) -> Self { |
||||||
|
Self { mode } |
||||||
|
} |
||||||
|
|
||||||
|
fn recognize_iri( |
||||||
|
data: &[u8], |
||||||
|
options: &N3LexerOptions, |
||||||
|
) -> Option<(usize, Result<N3Token<'static>, TokenRecognizerError>)> { |
||||||
|
// [18] IRIREF ::= '<' ([^#x00-#x20<>"{}|^`\] | UCHAR)* '>' /* #x00=NULL #01-#x1F=control codes #x20=space */
|
||||||
|
let mut string = Vec::new(); |
||||||
|
let mut i = 1; |
||||||
|
loop { |
||||||
|
let end = memchr2(b'>', b'\\', &data[i..])?; |
||||||
|
string.extend_from_slice(&data[i..i + end]); |
||||||
|
i += end; |
||||||
|
match data[i] { |
||||||
|
b'>' => { |
||||||
|
return Some((i + 1, Self::parse_iri(string, 0..=i, options))); |
||||||
|
} |
||||||
|
b'\\' => { |
||||||
|
let (additional, c) = Self::recognize_escape(&data[i..], i, false)?; |
||||||
|
i += additional + 1; |
||||||
|
match c { |
||||||
|
Ok(c) => { |
||||||
|
let mut buf = [0; 4]; |
||||||
|
string.extend_from_slice(c.encode_utf8(&mut buf).as_bytes()); |
||||||
|
} |
||||||
|
Err(e) => return Some((i, Err(e))), |
||||||
|
} |
||||||
|
} |
||||||
|
_ => unreachable!(), |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
fn parse_iri( |
||||||
|
iri: Vec<u8>, |
||||||
|
position: RangeInclusive<usize>, |
||||||
|
options: &N3LexerOptions, |
||||||
|
) -> Result<N3Token<'static>, TokenRecognizerError> { |
||||||
|
let iri = String::from_utf8(iri).map_err(|e| { |
||||||
|
( |
||||||
|
position.clone(), |
||||||
|
format!("The IRI contains invalid UTF-8 characters: {e}"), |
||||||
|
) |
||||||
|
})?; |
||||||
|
let iri = if let Some(base_iri) = options.base_iri.as_ref() { |
||||||
|
base_iri.resolve(&iri) |
||||||
|
} else { |
||||||
|
Iri::parse(iri) |
||||||
|
} |
||||||
|
.map_err(|e| (position, e.to_string()))?; |
||||||
|
Ok(N3Token::IriRef(iri)) |
||||||
|
} |
||||||
|
|
||||||
|
fn recognize_pname_or_keyword( |
||||||
|
data: &[u8], |
||||||
|
is_ending: bool, |
||||||
|
) -> Option<(usize, Result<N3Token<'_>, TokenRecognizerError>)> { |
||||||
|
// [139s] PNAME_NS ::= PN_PREFIX? ':'
|
||||||
|
// [140s] PNAME_LN ::= PNAME_NS PN_LOCAL
|
||||||
|
|
||||||
|
// [167s] PN_PREFIX ::= PN_CHARS_BASE ((PN_CHARS | '.')* PN_CHARS)?
|
||||||
|
let mut i = 0; |
||||||
|
loop { |
||||||
|
if let Some(r) = Self::recognize_unicode_char(&data[i..], i) { |
||||||
|
match r { |
||||||
|
Ok((c, consumed)) => { |
||||||
|
if c == ':' { |
||||||
|
i += consumed; |
||||||
|
break; |
||||||
|
} else if i == 0 { |
||||||
|
if !Self::is_possible_pn_chars_base(c) { |
||||||
|
return Some(( |
||||||
|
consumed, |
||||||
|
Err(( |
||||||
|
0..consumed, |
||||||
|
format!( |
||||||
|
"'{c}' is not allowed at the beginning of a prefix name" |
||||||
|
), |
||||||
|
) |
||||||
|
.into()), |
||||||
|
)); |
||||||
|
} |
||||||
|
i += consumed; |
||||||
|
} else if Self::is_possible_pn_chars(c) || c == '.' { |
||||||
|
i += consumed; |
||||||
|
} else { |
||||||
|
while data[..i].ends_with(b".") { |
||||||
|
i -= 1; |
||||||
|
} |
||||||
|
return Some(( |
||||||
|
i, |
||||||
|
Ok(N3Token::PlainKeyword(str::from_utf8(&data[..i]).unwrap())), |
||||||
|
)); |
||||||
|
} |
||||||
|
} |
||||||
|
Err(e) => return Some((e.position.end, Err(e))), |
||||||
|
} |
||||||
|
} else if is_ending { |
||||||
|
while data[..i].ends_with(b".") { |
||||||
|
i -= 1; |
||||||
|
} |
||||||
|
return Some(if i == 0 { |
||||||
|
( |
||||||
|
1, |
||||||
|
Err((0..1, format!("Unexpected byte {}", data[0])).into()), |
||||||
|
) |
||||||
|
} else { |
||||||
|
( |
||||||
|
i, |
||||||
|
Ok(N3Token::PlainKeyword(str::from_utf8(&data[..i]).unwrap())), |
||||||
|
) |
||||||
|
}); |
||||||
|
} else { |
||||||
|
return None; |
||||||
|
} |
||||||
|
} |
||||||
|
let pn_prefix = str::from_utf8(&data[..i - 1]).unwrap(); |
||||||
|
if pn_prefix.ends_with('.') { |
||||||
|
return Some(( |
||||||
|
i, |
||||||
|
Err(( |
||||||
|
0..i, |
||||||
|
format!( |
||||||
|
"'{pn_prefix}' is not a valid prefix: prefixes are not allowed to end with '.'"), |
||||||
|
) |
||||||
|
.into()), |
||||||
|
)); |
||||||
|
} |
||||||
|
|
||||||
|
let (consumed, pn_local_result) = Self::recognize_optional_pn_local(&data[i..], is_ending)?; |
||||||
|
Some(( |
||||||
|
consumed + i, |
||||||
|
pn_local_result.map(|(local, might_be_invalid_iri)| N3Token::PrefixedName { |
||||||
|
prefix: pn_prefix, |
||||||
|
local, |
||||||
|
might_be_invalid_iri, |
||||||
|
}), |
||||||
|
)) |
||||||
|
} |
||||||
|
|
||||||
|
fn recognize_variable( |
||||||
|
data: &[u8], |
||||||
|
is_ending: bool, |
||||||
|
) -> Option<(usize, Result<N3Token<'_>, TokenRecognizerError>)> { |
||||||
|
// [36] QUICK_VAR_NAME ::= "?" PN_LOCAL
|
||||||
|
let (consumed, result) = Self::recognize_optional_pn_local(&data[1..], is_ending)?; |
||||||
|
Some(( |
||||||
|
consumed + 1, |
||||||
|
result.and_then(|(name, _)| { |
||||||
|
if name.is_empty() { |
||||||
|
Err((0..consumed, "A variable name is not allowed to be empty").into()) |
||||||
|
} else { |
||||||
|
Ok(N3Token::Variable(name)) |
||||||
|
} |
||||||
|
}), |
||||||
|
)) |
||||||
|
} |
||||||
|
|
||||||
|
fn recognize_optional_pn_local( |
||||||
|
data: &[u8], |
||||||
|
is_ending: bool, |
||||||
|
) -> Option<(usize, Result<(Cow<'_, str>, bool), TokenRecognizerError>)> { |
||||||
|
// [168s] PN_LOCAL ::= (PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))?
|
||||||
|
let mut i = 0; |
||||||
|
let mut buffer = None; // Buffer if there are some escaped characters
|
||||||
|
let mut position_that_is_already_in_buffer = 0; |
||||||
|
let mut might_be_invalid_iri = false; |
||||||
|
loop { |
||||||
|
if let Some(r) = Self::recognize_unicode_char(&data[i..], i) { |
||||||
|
match r { |
||||||
|
Ok((c, consumed)) => { |
||||||
|
if c == '%' { |
||||||
|
i += 1; |
||||||
|
let a = char::from(*data.get(i)?); |
||||||
|
i += 1; |
||||||
|
let b = char::from(*data.get(i)?); |
||||||
|
if !matches!(a, '0'..='9' | 'A'..='F' | 'a'..='f') |
||||||
|
|| !matches!(b, '0'..='9' | 'A'..='F' | 'a'..='f') |
||||||
|
{ |
||||||
|
return Some((i + 1, Err(( |
||||||
|
i - 2..=i, format!("escapes in IRIs should be % followed by two hexadecimal characters, found '%{a}{b}'") |
||||||
|
).into()))); |
||||||
|
} |
||||||
|
i += 1; |
||||||
|
} else if c == '\\' { |
||||||
|
i += 1; |
||||||
|
let a = char::from(*data.get(i)?); |
||||||
|
if matches!( |
||||||
|
a, |
||||||
|
'_' | '~' |
||||||
|
| '.' |
||||||
|
| '-' |
||||||
|
| '!' |
||||||
|
| '$' |
||||||
|
| '&' |
||||||
|
| '\'' |
||||||
|
| '(' |
||||||
|
| ')' |
||||||
|
| '*' |
||||||
|
| '+' |
||||||
|
| ',' |
||||||
|
| ';' |
||||||
|
| '=' |
||||||
|
) { |
||||||
|
// ok to escape
|
||||||
|
} else if matches!(a, '/' | '?' | '#' | '@' | '%') { |
||||||
|
// ok to escape but requires IRI validation
|
||||||
|
might_be_invalid_iri = true; |
||||||
|
} else { |
||||||
|
return Some((i + 1, Err(( |
||||||
|
i..=i, format!("The character that are allowed to be escaped in IRIs are _~.-!$&'()*+,;=/?#@%, found '{a}'") |
||||||
|
).into()))); |
||||||
|
} |
||||||
|
let buffer = buffer.get_or_insert_with(String::new); |
||||||
|
// We add the missing bytes
|
||||||
|
if i - position_that_is_already_in_buffer > 1 { |
||||||
|
buffer.push_str( |
||||||
|
str::from_utf8( |
||||||
|
&data[position_that_is_already_in_buffer..i - 1], |
||||||
|
) |
||||||
|
.unwrap(), |
||||||
|
) |
||||||
|
} |
||||||
|
buffer.push(a); |
||||||
|
i += 1; |
||||||
|
position_that_is_already_in_buffer = i; |
||||||
|
} else if i == 0 { |
||||||
|
if !(Self::is_possible_pn_chars_u(c) || c == ':' || c.is_ascii_digit()) |
||||||
|
{ |
||||||
|
return Some((0, Ok((Cow::Borrowed(""), false)))); |
||||||
|
} |
||||||
|
might_be_invalid_iri |= |
||||||
|
Self::is_possible_pn_chars_base_but_not_valid_iri(c) || c == ':'; |
||||||
|
i += consumed; |
||||||
|
} else if Self::is_possible_pn_chars(c) || c == ':' || c == '.' { |
||||||
|
might_be_invalid_iri |= |
||||||
|
Self::is_possible_pn_chars_base_but_not_valid_iri(c) || c == ':'; |
||||||
|
i += consumed; |
||||||
|
} else { |
||||||
|
let buffer = if let Some(mut buffer) = buffer { |
||||||
|
buffer.push_str( |
||||||
|
str::from_utf8(&data[position_that_is_already_in_buffer..i]) |
||||||
|
.unwrap(), |
||||||
|
); |
||||||
|
// We do not include the last dot
|
||||||
|
while buffer.ends_with('.') { |
||||||
|
buffer.pop(); |
||||||
|
i -= 1; |
||||||
|
} |
||||||
|
Cow::Owned(buffer) |
||||||
|
} else { |
||||||
|
let mut data = str::from_utf8(&data[..i]).unwrap(); |
||||||
|
// We do not include the last dot
|
||||||
|
while let Some(d) = data.strip_suffix('.') { |
||||||
|
data = d; |
||||||
|
i -= 1; |
||||||
|
} |
||||||
|
Cow::Borrowed(data) |
||||||
|
}; |
||||||
|
return Some((i, Ok((buffer, might_be_invalid_iri)))); |
||||||
|
} |
||||||
|
} |
||||||
|
Err(e) => return Some((e.position.end, Err(e))), |
||||||
|
} |
||||||
|
} else if is_ending { |
||||||
|
let buffer = if let Some(mut buffer) = buffer { |
||||||
|
// We do not include the last dot
|
||||||
|
while buffer.ends_with('.') { |
||||||
|
buffer.pop(); |
||||||
|
i -= 1; |
||||||
|
} |
||||||
|
Cow::Owned(buffer) |
||||||
|
} else { |
||||||
|
let mut data = str::from_utf8(&data[..i]).unwrap(); |
||||||
|
// We do not include the last dot
|
||||||
|
while let Some(d) = data.strip_suffix('.') { |
||||||
|
data = d; |
||||||
|
i -= 1; |
||||||
|
} |
||||||
|
Cow::Borrowed(data) |
||||||
|
}; |
||||||
|
return Some((i, Ok((buffer, might_be_invalid_iri)))); |
||||||
|
} else { |
||||||
|
return None; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
fn recognize_blank_node_label( |
||||||
|
data: &[u8], |
||||||
|
) -> Option<(usize, Result<N3Token<'_>, TokenRecognizerError>)> { |
||||||
|
// [141s] BLANK_NODE_LABEL ::= '_:' (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)?
|
||||||
|
let mut i = 2; |
||||||
|
loop { |
||||||
|
match Self::recognize_unicode_char(&data[i..], i)? { |
||||||
|
Ok((c, consumed)) => { |
||||||
|
if (i == 2 && (Self::is_possible_pn_chars_u(c) || c.is_ascii_digit())) |
||||||
|
|| (i > 2 && Self::is_possible_pn_chars(c)) |
||||||
|
{ |
||||||
|
// Ok
|
||||||
|
} else if i > 2 && c == '.' { |
||||||
|
if data[i - 1] == b'.' { |
||||||
|
i -= 1; |
||||||
|
return Some(( |
||||||
|
i, |
||||||
|
Ok(N3Token::BlankNodeLabel( |
||||||
|
str::from_utf8(&data[2..i]).unwrap(), |
||||||
|
)), |
||||||
|
)); |
||||||
|
} |
||||||
|
} else if i == 0 { |
||||||
|
return Some(( |
||||||
|
i, |
||||||
|
Err((0..i, "A blank node ID should not be empty").into()), |
||||||
|
)); |
||||||
|
} else if data[i - 1] == b'.' { |
||||||
|
i -= 1; |
||||||
|
return Some(( |
||||||
|
i, |
||||||
|
Ok(N3Token::BlankNodeLabel( |
||||||
|
str::from_utf8(&data[2..i]).unwrap(), |
||||||
|
)), |
||||||
|
)); |
||||||
|
} else { |
||||||
|
return Some(( |
||||||
|
i, |
||||||
|
Ok(N3Token::BlankNodeLabel( |
||||||
|
str::from_utf8(&data[2..i]).unwrap(), |
||||||
|
)), |
||||||
|
)); |
||||||
|
} |
||||||
|
i += consumed; |
||||||
|
} |
||||||
|
Err(e) => return Some((e.position.end, Err(e))), |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
fn recognize_lang_tag( |
||||||
|
data: &[u8], |
||||||
|
) -> Option<(usize, Result<N3Token<'_>, TokenRecognizerError>)> { |
||||||
|
// [144s] LANGTAG ::= '@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)*
|
||||||
|
let mut is_last_block_empty = true; |
||||||
|
for (i, c) in data[1..].iter().enumerate() { |
||||||
|
if c.is_ascii_alphabetic() { |
||||||
|
is_last_block_empty = false; |
||||||
|
} else if i == 0 { |
||||||
|
return Some(( |
||||||
|
1, |
||||||
|
Err((1..2, "A language code should always start with a letter").into()), |
||||||
|
)); |
||||||
|
} else if is_last_block_empty { |
||||||
|
return Some((i, Self::parse_lang_tag(&data[1..i], 1..i - 1))); |
||||||
|
} else if *c == b'-' { |
||||||
|
is_last_block_empty = true; |
||||||
|
} else { |
||||||
|
return Some((i + 1, Self::parse_lang_tag(&data[1..=i], 1..i))); |
||||||
|
} |
||||||
|
} |
||||||
|
None |
||||||
|
} |
||||||
|
|
||||||
|
fn parse_lang_tag( |
||||||
|
lang_tag: &[u8], |
||||||
|
position: Range<usize>, |
||||||
|
) -> Result<N3Token<'_>, TokenRecognizerError> { |
||||||
|
Ok(N3Token::LangTag( |
||||||
|
LanguageTag::parse(str::from_utf8(lang_tag).unwrap()) |
||||||
|
.map_err(|e| (position.clone(), e.to_string()))? |
||||||
|
.into_inner(), |
||||||
|
)) |
||||||
|
} |
||||||
|
|
||||||
|
fn recognize_string( |
||||||
|
data: &[u8], |
||||||
|
delimiter: u8, |
||||||
|
) -> Option<(usize, Result<N3Token<'static>, TokenRecognizerError>)> { |
||||||
|
// [22] STRING_LITERAL_QUOTE ::= '"' ([^#x22#x5C#xA#xD] | ECHAR | UCHAR)* '"' /* #x22=" #x5C=\ #xA=new line #xD=carriage return */
|
||||||
|
// [23] STRING_LITERAL_SINGLE_QUOTE ::= "'" ([^#x27#x5C#xA#xD] | ECHAR | UCHAR)* "'" /* #x27=' #x5C=\ #xA=new line #xD=carriage return */
|
||||||
|
let mut string = String::new(); |
||||||
|
let mut i = 1; |
||||||
|
loop { |
||||||
|
let end = memchr2(delimiter, b'\\', &data[i..])?; |
||||||
|
match str::from_utf8(&data[i..i + end]) { |
||||||
|
Ok(a) => string.push_str(a), |
||||||
|
Err(e) => { |
||||||
|
return Some(( |
||||||
|
end, |
||||||
|
Err(( |
||||||
|
i..i + end, |
||||||
|
format!("The string contains invalid UTF-8 characters: {e}"), |
||||||
|
) |
||||||
|
.into()), |
||||||
|
)) |
||||||
|
} |
||||||
|
}; |
||||||
|
i += end; |
||||||
|
match data[i] { |
||||||
|
c if c == delimiter => { |
||||||
|
return Some((i + 1, Ok(N3Token::String(string)))); |
||||||
|
} |
||||||
|
b'\\' => { |
||||||
|
let (additional, c) = Self::recognize_escape(&data[i..], i, true)?; |
||||||
|
i += additional + 1; |
||||||
|
match c { |
||||||
|
Ok(c) => { |
||||||
|
string.push(c); |
||||||
|
} |
||||||
|
Err(e) => { |
||||||
|
// We read until the end of string char
|
||||||
|
let end = memchr(delimiter, &data[i..])?; |
||||||
|
return Some((i + end + 1, Err(e))); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
_ => unreachable!(), |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
fn recognize_long_string( |
||||||
|
data: &[u8], |
||||||
|
delimiter: u8, |
||||||
|
) -> Option<(usize, Result<N3Token<'static>, TokenRecognizerError>)> { |
||||||
|
// [24] STRING_LITERAL_LONG_SINGLE_QUOTE ::= "'''" (("'" | "''")? ([^'\] | ECHAR | UCHAR))* "'''"
|
||||||
|
// [25] STRING_LITERAL_LONG_QUOTE ::= '"""' (('"' | '""')? ([^"\] | ECHAR | UCHAR))* '"""'
|
||||||
|
let mut string = String::new(); |
||||||
|
let mut i = 3; |
||||||
|
loop { |
||||||
|
let end = memchr2(delimiter, b'\\', &data[i..])?; |
||||||
|
match str::from_utf8(&data[i..i + end]) { |
||||||
|
Ok(a) => string.push_str(a), |
||||||
|
Err(e) => { |
||||||
|
return Some(( |
||||||
|
end, |
||||||
|
Err(( |
||||||
|
i..i + end, |
||||||
|
format!("The string contains invalid UTF-8 characters: {e}"), |
||||||
|
) |
||||||
|
.into()), |
||||||
|
)) |
||||||
|
} |
||||||
|
}; |
||||||
|
i += end; |
||||||
|
match data[i] { |
||||||
|
c if c == delimiter => { |
||||||
|
if *data.get(i + 1)? == delimiter && *data.get(i + 2)? == delimiter { |
||||||
|
return Some((i + 3, Ok(N3Token::String(string)))); |
||||||
|
} |
||||||
|
i += 1; |
||||||
|
string.push(char::from(delimiter)); |
||||||
|
} |
||||||
|
b'\\' => { |
||||||
|
let (additional, c) = Self::recognize_escape(&data[i..], i, true)?; |
||||||
|
i += additional + 1; |
||||||
|
match c { |
||||||
|
Ok(c) => { |
||||||
|
string.push(c); |
||||||
|
} |
||||||
|
Err(e) => return Some((i, Err(e))), |
||||||
|
} |
||||||
|
} |
||||||
|
_ => unreachable!(), |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
fn recognize_number(data: &[u8]) -> Option<(usize, Result<N3Token<'_>, TokenRecognizerError>)> { |
||||||
|
// [19] INTEGER ::= [+-]? [0-9]+
|
||||||
|
// [20] DECIMAL ::= [+-]? [0-9]* '.' [0-9]+
|
||||||
|
// [21] DOUBLE ::= [+-]? ([0-9]+ '.' [0-9]* EXPONENT | '.' [0-9]+ EXPONENT | [0-9]+ EXPONENT)
|
||||||
|
// [154s] EXPONENT ::= [eE] [+-]? [0-9]+
|
||||||
|
let mut i = 0; |
||||||
|
let c = *data.first()?; |
||||||
|
if matches!(c, b'+' | b'-') { |
||||||
|
i += 1; |
||||||
|
} |
||||||
|
// We read the digits before .
|
||||||
|
let mut count_before: usize = 0; |
||||||
|
loop { |
||||||
|
let c = *data.get(i)?; |
||||||
|
if c.is_ascii_digit() { |
||||||
|
i += 1; |
||||||
|
count_before += 1; |
||||||
|
} else { |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// We read the digits after .
|
||||||
|
#[allow(clippy::if_then_some_else_none)] |
||||||
|
let count_after = if *data.get(i)? == b'.' { |
||||||
|
i += 1; |
||||||
|
|
||||||
|
let mut count_after = 0; |
||||||
|
loop { |
||||||
|
let c = *data.get(i)?; |
||||||
|
if c.is_ascii_digit() { |
||||||
|
i += 1; |
||||||
|
count_after += 1; |
||||||
|
} else { |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
Some(count_after) |
||||||
|
} else { |
||||||
|
None |
||||||
|
}; |
||||||
|
|
||||||
|
// End
|
||||||
|
let c = *data.get(i)?; |
||||||
|
if matches!(c, b'e' | b'E') { |
||||||
|
i += 1; |
||||||
|
|
||||||
|
let c = *data.get(i)?; |
||||||
|
if matches!(c, b'+' | b'-') { |
||||||
|
i += 1; |
||||||
|
} |
||||||
|
|
||||||
|
let mut found = false; |
||||||
|
loop { |
||||||
|
let c = *data.get(i)?; |
||||||
|
if c.is_ascii_digit() { |
||||||
|
i += 1; |
||||||
|
found = true; |
||||||
|
} else { |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
Some(( |
||||||
|
i, |
||||||
|
if !found { |
||||||
|
Err((0..i, "A double exponent cannot be empty").into()) |
||||||
|
} else if count_before == 0 && count_after.unwrap_or(0) == 0 { |
||||||
|
Err((0..i, "A double should not be empty").into()) |
||||||
|
} else { |
||||||
|
Ok(N3Token::Double(str::from_utf8(&data[..i]).unwrap())) |
||||||
|
}, |
||||||
|
)) |
||||||
|
} else if let Some(count_after) = count_after { |
||||||
|
if count_after == 0 { |
||||||
|
// We do not consume the '.' after all
|
||||||
|
i -= 1; |
||||||
|
Some(( |
||||||
|
i, |
||||||
|
if count_before == 0 { |
||||||
|
Err((0..i, "An integer should not be empty").into()) |
||||||
|
} else { |
||||||
|
Ok(N3Token::Integer(str::from_utf8(&data[..i]).unwrap())) |
||||||
|
}, |
||||||
|
)) |
||||||
|
} else { |
||||||
|
Some((i, Ok(N3Token::Decimal(str::from_utf8(&data[..i]).unwrap())))) |
||||||
|
} |
||||||
|
} else { |
||||||
|
Some(( |
||||||
|
i, |
||||||
|
if count_before == 0 { |
||||||
|
Err((0..i, "An integer should not be empty").into()) |
||||||
|
} else { |
||||||
|
Ok(N3Token::Integer(str::from_utf8(&data[..i]).unwrap())) |
||||||
|
}, |
||||||
|
)) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
fn recognize_escape( |
||||||
|
data: &[u8], |
||||||
|
position: usize, |
||||||
|
with_echar: bool, |
||||||
|
) -> Option<(usize, Result<char, TokenRecognizerError>)> { |
||||||
|
// [26] UCHAR ::= '\u' HEX HEX HEX HEX | '\U' HEX HEX HEX HEX HEX HEX HEX HEX
|
||||||
|
// [159s] ECHAR ::= '\' [tbnrf"'\]
|
||||||
|
match *data.get(1)? { |
||||||
|
b'u' => match Self::recognize_hex_char(&data[2..], 4, 'u', position) { |
||||||
|
Ok(c) => Some((5, Ok(c?))), |
||||||
|
Err(e) => Some((5, Err(e))), |
||||||
|
}, |
||||||
|
b'U' => match Self::recognize_hex_char(&data[2..], 8, 'u', position) { |
||||||
|
Ok(c) => Some((9, Ok(c?))), |
||||||
|
Err(e) => Some((9, Err(e))), |
||||||
|
}, |
||||||
|
b't' if with_echar => Some((1, Ok('\t'))), |
||||||
|
b'b' if with_echar => Some((1, Ok('\x08'))), |
||||||
|
b'n' if with_echar => Some((1, Ok('\n'))), |
||||||
|
b'r' if with_echar => Some((1, Ok('\r'))), |
||||||
|
b'f' if with_echar => Some((1, Ok('\x0C'))), |
||||||
|
b'"' if with_echar => Some((1, Ok('"'))), |
||||||
|
b'\'' if with_echar => Some((1, Ok('\''))), |
||||||
|
b'\\' if with_echar => Some((1, Ok('\\'))), |
||||||
|
c => Some(( |
||||||
|
1, |
||||||
|
Err(( |
||||||
|
position..position + 2, |
||||||
|
format!("Unexpected escape character '\\{}'", char::from(c)), |
||||||
|
) |
||||||
|
.into()), |
||||||
|
)), //TODO: read until end of string
|
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
fn recognize_hex_char( |
||||||
|
data: &[u8], |
||||||
|
len: usize, |
||||||
|
escape_char: char, |
||||||
|
position: usize, |
||||||
|
) -> Result<Option<char>, TokenRecognizerError> { |
||||||
|
if data.len() < len { |
||||||
|
return Ok(None); |
||||||
|
} |
||||||
|
let val = str::from_utf8(&data[..len]).map_err(|e| { |
||||||
|
( |
||||||
|
position..position + len + 2, |
||||||
|
format!("The escape sequence contains invalid UTF-8 characters: {e}"), |
||||||
|
) |
||||||
|
})?; |
||||||
|
let codepoint = u32::from_str_radix(val, 16).map_err(|e| { |
||||||
|
( |
||||||
|
position..position + len + 2, |
||||||
|
format!( |
||||||
|
"The escape sequence '\\{escape_char}{val}' is not a valid hexadecimal string: {e}" |
||||||
|
), |
||||||
|
) |
||||||
|
})?; |
||||||
|
let c = char::from_u32(codepoint).ok_or_else(|| { |
||||||
|
( |
||||||
|
position..position + len +2, |
||||||
|
format!( |
||||||
|
"The escape sequence '\\{escape_char}{val}' is encoding {codepoint:X} that is not a valid unicode character", |
||||||
|
), |
||||||
|
) |
||||||
|
})?; |
||||||
|
Ok(Some(c)) |
||||||
|
} |
||||||
|
|
||||||
|
fn recognize_unicode_char( |
||||||
|
data: &[u8], |
||||||
|
position: usize, |
||||||
|
) -> Option<Result<(char, usize), TokenRecognizerError>> { |
||||||
|
let mut code_point: u32; |
||||||
|
let bytes_needed: usize; |
||||||
|
let mut lower_boundary = 0x80; |
||||||
|
let mut upper_boundary = 0xBF; |
||||||
|
|
||||||
|
let byte = *data.first()?; |
||||||
|
match byte { |
||||||
|
0x00..=0x7F => return Some(Ok((char::from(byte), 1))), |
||||||
|
0xC2..=0xDF => { |
||||||
|
bytes_needed = 1; |
||||||
|
code_point = u32::from(byte) & 0x1F; |
||||||
|
} |
||||||
|
0xE0..=0xEF => { |
||||||
|
if byte == 0xE0 { |
||||||
|
lower_boundary = 0xA0; |
||||||
|
} |
||||||
|
if byte == 0xED { |
||||||
|
upper_boundary = 0x9F; |
||||||
|
} |
||||||
|
bytes_needed = 2; |
||||||
|
code_point = u32::from(byte) & 0xF; |
||||||
|
} |
||||||
|
0xF0..=0xF4 => { |
||||||
|
if byte == 0xF0 { |
||||||
|
lower_boundary = 0x90; |
||||||
|
} |
||||||
|
if byte == 0xF4 { |
||||||
|
upper_boundary = 0x8F; |
||||||
|
} |
||||||
|
bytes_needed = 3; |
||||||
|
code_point = u32::from(byte) & 0x7; |
||||||
|
} |
||||||
|
_ => { |
||||||
|
return Some(Err(( |
||||||
|
position..=position, |
||||||
|
"Invalid UTF-8 character encoding", |
||||||
|
) |
||||||
|
.into())) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
for i in 1..=bytes_needed { |
||||||
|
let byte = *data.get(i)?; |
||||||
|
if byte < lower_boundary || upper_boundary < byte { |
||||||
|
return Some(Err(( |
||||||
|
position..=position + i, |
||||||
|
"Invalid UTF-8 character encoding", |
||||||
|
) |
||||||
|
.into())); |
||||||
|
} |
||||||
|
lower_boundary = 0x80; |
||||||
|
upper_boundary = 0xBF; |
||||||
|
code_point = (code_point << 6) | (u32::from(byte) & 0x3F); |
||||||
|
} |
||||||
|
|
||||||
|
Some( |
||||||
|
char::from_u32(code_point) |
||||||
|
.map(|c| (c, bytes_needed + 1)) |
||||||
|
.ok_or_else(|| { |
||||||
|
( |
||||||
|
position..=position + bytes_needed, |
||||||
|
format!("The codepoint {code_point:X} is not a valid unicode character"), |
||||||
|
) |
||||||
|
.into() |
||||||
|
}), |
||||||
|
) |
||||||
|
} |
||||||
|
|
||||||
|
// [157s] PN_CHARS_BASE ::= [A-Z] | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
|
||||||
|
fn is_possible_pn_chars_base(c: char) -> bool { |
||||||
|
matches!(c, |
||||||
|
'A'..='Z' |
||||||
|
| 'a'..='z' |
||||||
|
| '\u{00C0}'..='\u{00D6}' |
||||||
|
| '\u{00D8}'..='\u{00F6}' |
||||||
|
| '\u{00F8}'..='\u{02FF}' |
||||||
|
| '\u{0370}'..='\u{037D}' |
||||||
|
| '\u{037F}'..='\u{1FFF}' |
||||||
|
| '\u{200C}'..='\u{200D}' |
||||||
|
| '\u{2070}'..='\u{218F}' |
||||||
|
| '\u{2C00}'..='\u{2FEF}' |
||||||
|
| '\u{3001}'..='\u{D7FF}' |
||||||
|
| '\u{F900}'..='\u{FDCF}' |
||||||
|
| '\u{FDF0}'..='\u{FFFD}' |
||||||
|
| '\u{10000}'..='\u{EFFFF}') |
||||||
|
} |
||||||
|
|
||||||
|
// [158s] PN_CHARS_U ::= PN_CHARS_BASE | '_' | ':'
|
||||||
|
fn is_possible_pn_chars_u(c: char) -> bool { |
||||||
|
Self::is_possible_pn_chars_base(c) || c == '_' |
||||||
|
} |
||||||
|
|
||||||
|
// [160s] PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]
|
||||||
|
fn is_possible_pn_chars(c: char) -> bool { |
||||||
|
Self::is_possible_pn_chars_u(c) |
||||||
|
|| matches!(c, |
||||||
|
'-' | '0'..='9' | '\u{00B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}') |
||||||
|
} |
||||||
|
|
||||||
|
fn is_possible_pn_chars_base_but_not_valid_iri(c: char) -> bool { |
||||||
|
matches!(c, '\u{FFF0}'..='\u{FFFD}') |
||||||
|
|| u32::from(c) % u32::from('\u{FFFE}') == 0 |
||||||
|
|| u32::from(c) % u32::from('\u{FFFF}') == 0 |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
pub fn resolve_local_name( |
||||||
|
prefix: &str, |
||||||
|
local: &str, |
||||||
|
might_be_invalid_iri: bool, |
||||||
|
prefixes: &HashMap<String, Iri<String>>, |
||||||
|
) -> Result<NamedNode, String> { |
||||||
|
if let Some(start) = prefixes.get(prefix) { |
||||||
|
let iri = format!("{start}{local}"); |
||||||
|
if might_be_invalid_iri || start.path().is_empty() { |
||||||
|
// We validate again. We always validate if the local part might be the IRI authority.
|
||||||
|
if let Err(e) = Iri::parse(iri.as_str()) { |
||||||
|
return Err(format!( |
||||||
|
"The prefixed name {prefix}:{local} builds IRI {iri} that is invalid: {e}" |
||||||
|
)); |
||||||
|
} |
||||||
|
} |
||||||
|
Ok(NamedNode::new_unchecked(iri)) |
||||||
|
} else { |
||||||
|
Err(format!("The prefix {prefix}: has not been declared")) |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,19 @@ |
|||||||
|
mod lexer; |
||||||
|
mod line_formats; |
||||||
|
pub mod n3; |
||||||
|
pub mod nquads; |
||||||
|
pub mod ntriples; |
||||||
|
mod terse; |
||||||
|
mod toolkit; |
||||||
|
pub mod trig; |
||||||
|
pub mod turtle; |
||||||
|
|
||||||
|
pub use crate::n3::N3Parser; |
||||||
|
pub use crate::nquads::{NQuadsParser, NQuadsSerializer}; |
||||||
|
pub use crate::ntriples::{NTriplesParser, NTriplesSerializer}; |
||||||
|
pub use crate::toolkit::{ParseError, ParseOrIoError}; |
||||||
|
pub use crate::trig::{TriGParser, TriGSerializer}; |
||||||
|
pub use crate::turtle::{TurtleParser, TurtleSerializer}; |
||||||
|
|
||||||
|
pub(crate) const MIN_BUFFER_SIZE: usize = 4096; |
||||||
|
pub(crate) const MAX_BUFFER_SIZE: usize = 4096 * 4096; |
@ -0,0 +1,305 @@ |
|||||||
|
//! Shared parser implementation for N-Triples and N-Quads.
|
||||||
|
|
||||||
|
use crate::lexer::{N3Lexer, N3LexerMode, N3LexerOptions, N3Token}; |
||||||
|
use crate::toolkit::{Lexer, Parser, RuleRecognizer, RuleRecognizerError}; |
||||||
|
use crate::{MAX_BUFFER_SIZE, MIN_BUFFER_SIZE}; |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
use oxrdf::Triple; |
||||||
|
use oxrdf::{BlankNode, GraphName, Literal, NamedNode, Quad, Subject, Term}; |
||||||
|
|
||||||
|
pub struct NQuadsRecognizer { |
||||||
|
stack: Vec<NQuadsState>, |
||||||
|
with_graph_name: bool, |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
with_quoted_triples: bool, |
||||||
|
lexer_options: N3LexerOptions, |
||||||
|
subjects: Vec<Subject>, |
||||||
|
predicates: Vec<NamedNode>, |
||||||
|
objects: Vec<Term>, |
||||||
|
} |
||||||
|
|
||||||
|
enum NQuadsState { |
||||||
|
ExpectSubject, |
||||||
|
ExpectPredicate, |
||||||
|
ExpectedObject, |
||||||
|
ExpectPossibleGraphOrEndOfQuotedTriple, |
||||||
|
ExpectDot, |
||||||
|
ExpectLiteralAnnotationOrGraphNameOrDot { |
||||||
|
value: String, |
||||||
|
}, |
||||||
|
ExpectLiteralDatatype { |
||||||
|
value: String, |
||||||
|
}, |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
AfterQuotedSubject, |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
AfterQuotedObject, |
||||||
|
} |
||||||
|
|
||||||
|
impl RuleRecognizer for NQuadsRecognizer { |
||||||
|
type TokenRecognizer = N3Lexer; |
||||||
|
type Output = Quad; |
||||||
|
|
||||||
|
fn error_recovery_state(mut self) -> Self { |
||||||
|
self.stack.clear(); |
||||||
|
self.subjects.clear(); |
||||||
|
self.predicates.clear(); |
||||||
|
self.objects.clear(); |
||||||
|
self |
||||||
|
} |
||||||
|
|
||||||
|
fn recognize_next( |
||||||
|
mut self, |
||||||
|
token: N3Token, |
||||||
|
results: &mut Vec<Quad>, |
||||||
|
errors: &mut Vec<RuleRecognizerError>, |
||||||
|
) -> Self { |
||||||
|
if let Some(state) = self.stack.pop() { |
||||||
|
match state { |
||||||
|
NQuadsState::ExpectSubject => match token { |
||||||
|
N3Token::IriRef(s) => { |
||||||
|
self.subjects |
||||||
|
.push(NamedNode::new_unchecked(s.into_inner()).into()); |
||||||
|
self.stack.push(NQuadsState::ExpectPredicate); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::BlankNodeLabel(s) => { |
||||||
|
self.subjects.push(BlankNode::new_unchecked(s).into()); |
||||||
|
self.stack.push(NQuadsState::ExpectPredicate); |
||||||
|
self |
||||||
|
} |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
N3Token::Punctuation("<<") if self.with_quoted_triples => { |
||||||
|
self.stack.push(NQuadsState::AfterQuotedSubject); |
||||||
|
self.stack.push(NQuadsState::ExpectSubject); |
||||||
|
self |
||||||
|
} |
||||||
|
token => self.error( |
||||||
|
errors, |
||||||
|
format!("The subject of a triple should be an IRI or a blank node, {token:?} found"), |
||||||
|
), |
||||||
|
}, |
||||||
|
NQuadsState::ExpectPredicate => match token { |
||||||
|
N3Token::IriRef(p) => { |
||||||
|
self.predicates |
||||||
|
.push(NamedNode::new_unchecked(p.into_inner())); |
||||||
|
self.stack.push(NQuadsState::ExpectedObject); |
||||||
|
self |
||||||
|
} |
||||||
|
token => self.error( |
||||||
|
errors, |
||||||
|
format!("The predicate of a triple should be an IRI, {token:?} found"), |
||||||
|
), |
||||||
|
}, |
||||||
|
NQuadsState::ExpectedObject => match token { |
||||||
|
N3Token::IriRef(o) => { |
||||||
|
self.objects |
||||||
|
.push(NamedNode::new_unchecked(o.into_inner()).into()); |
||||||
|
self.stack |
||||||
|
.push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::BlankNodeLabel(o) => { |
||||||
|
self.objects.push(BlankNode::new_unchecked(o).into()); |
||||||
|
self.stack |
||||||
|
.push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::String(value) => { |
||||||
|
self.stack |
||||||
|
.push(NQuadsState::ExpectLiteralAnnotationOrGraphNameOrDot { value }); |
||||||
|
self |
||||||
|
} |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
N3Token::Punctuation("<<") if self.with_quoted_triples => { |
||||||
|
self.stack.push(NQuadsState::AfterQuotedObject); |
||||||
|
self.stack.push(NQuadsState::ExpectSubject); |
||||||
|
self |
||||||
|
} |
||||||
|
token => self.error( |
||||||
|
errors, |
||||||
|
format!("The object of a triple should be an IRI, a blank node or a literal, {token:?} found"), |
||||||
|
), |
||||||
|
}, |
||||||
|
NQuadsState::ExpectLiteralAnnotationOrGraphNameOrDot { value } => match token { |
||||||
|
N3Token::LangTag(lang_tag) => { |
||||||
|
self.objects.push( |
||||||
|
Literal::new_language_tagged_literal_unchecked( |
||||||
|
value, |
||||||
|
lang_tag.to_ascii_lowercase(), |
||||||
|
) |
||||||
|
.into(), |
||||||
|
); |
||||||
|
self.stack |
||||||
|
.push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::Punctuation("^^") => { |
||||||
|
self.stack |
||||||
|
.push(NQuadsState::ExpectLiteralDatatype { value }); |
||||||
|
self |
||||||
|
} |
||||||
|
token => { |
||||||
|
self.objects.push(Literal::new_simple_literal(value).into()); |
||||||
|
self.stack |
||||||
|
.push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple); |
||||||
|
self.recognize_next(token, results, errors) |
||||||
|
} |
||||||
|
}, |
||||||
|
NQuadsState::ExpectLiteralDatatype { value } => match token { |
||||||
|
N3Token::IriRef(d) => { |
||||||
|
self.objects.push( |
||||||
|
Literal::new_typed_literal( |
||||||
|
value, |
||||||
|
NamedNode::new_unchecked(d.into_inner()), |
||||||
|
) |
||||||
|
.into(), |
||||||
|
); |
||||||
|
self.stack |
||||||
|
.push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple); |
||||||
|
self |
||||||
|
} |
||||||
|
token => self.error(errors, format!("A literal datatype must be an IRI, found {token:?}")), |
||||||
|
}, |
||||||
|
NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple => { |
||||||
|
if self.stack.is_empty() { |
||||||
|
match token { |
||||||
|
N3Token::IriRef(g) if self.with_graph_name => { |
||||||
|
self.emit_quad( |
||||||
|
results, |
||||||
|
NamedNode::new_unchecked(g.into_inner()).into(), |
||||||
|
); |
||||||
|
self.stack.push(NQuadsState::ExpectDot); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::BlankNodeLabel(g) if self.with_graph_name => { |
||||||
|
self.emit_quad(results, BlankNode::new_unchecked(g).into()); |
||||||
|
self.stack.push(NQuadsState::ExpectDot); |
||||||
|
self |
||||||
|
} |
||||||
|
token => { |
||||||
|
self.emit_quad(results, GraphName::DefaultGraph); |
||||||
|
self.stack.push(NQuadsState::ExpectDot); |
||||||
|
self.recognize_next(token, results, errors) |
||||||
|
} |
||||||
|
} |
||||||
|
} else if token == N3Token::Punctuation(">>") { |
||||||
|
self |
||||||
|
} else { |
||||||
|
self.error(errors, "Expecting the end of a quoted triple '>>'") |
||||||
|
} |
||||||
|
} |
||||||
|
NQuadsState::ExpectDot => match token { |
||||||
|
N3Token::Punctuation(".") => { |
||||||
|
self.stack.push(NQuadsState::ExpectSubject); |
||||||
|
self |
||||||
|
} |
||||||
|
token => { |
||||||
|
errors.push("Quads should be followed by a dot".into()); |
||||||
|
self.stack.push(NQuadsState::ExpectSubject); |
||||||
|
self.recognize_next(token, results, errors) |
||||||
|
} |
||||||
|
}, |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
NQuadsState::AfterQuotedSubject => { |
||||||
|
let triple = Triple { |
||||||
|
subject: self.subjects.pop().unwrap(), |
||||||
|
predicate: self.predicates.pop().unwrap(), |
||||||
|
object: self.objects.pop().unwrap(), |
||||||
|
}; |
||||||
|
self.subjects.push(triple.into()); |
||||||
|
self.stack.push(NQuadsState::ExpectPredicate); |
||||||
|
self.recognize_next(token, results, errors) |
||||||
|
} |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
NQuadsState::AfterQuotedObject => { |
||||||
|
let triple = Triple { |
||||||
|
subject: self.subjects.pop().unwrap(), |
||||||
|
predicate: self.predicates.pop().unwrap(), |
||||||
|
object: self.objects.pop().unwrap(), |
||||||
|
}; |
||||||
|
self.objects.push(triple.into()); |
||||||
|
self.stack |
||||||
|
.push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple); |
||||||
|
self.recognize_next(token, results, errors) |
||||||
|
} |
||||||
|
} |
||||||
|
} else if token == N3Token::Punctuation(".") { |
||||||
|
self.stack.push(NQuadsState::ExpectSubject); |
||||||
|
self |
||||||
|
} else { |
||||||
|
self |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
fn recognize_end(mut self, results: &mut Vec<Quad>, errors: &mut Vec<RuleRecognizerError>) { |
||||||
|
match &*self.stack { |
||||||
|
[NQuadsState::ExpectSubject] | [] => (), |
||||||
|
[NQuadsState::ExpectDot] => errors.push("Triples should be followed by a dot".into()), |
||||||
|
[NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple] => { |
||||||
|
self.emit_quad(results, GraphName::DefaultGraph); |
||||||
|
errors.push("Triples should be followed by a dot".into()) |
||||||
|
} |
||||||
|
[NQuadsState::ExpectLiteralAnnotationOrGraphNameOrDot { ref value }] => { |
||||||
|
self.objects.push(Literal::new_simple_literal(value).into()); |
||||||
|
self.emit_quad(results, GraphName::DefaultGraph); |
||||||
|
errors.push("Triples should be followed by a dot".into()) |
||||||
|
} |
||||||
|
_ => errors.push("Unexpected end".into()), //TODO
|
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
fn lexer_options(&self) -> &N3LexerOptions { |
||||||
|
&self.lexer_options |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
impl NQuadsRecognizer { |
||||||
|
pub fn new_parser( |
||||||
|
with_graph_name: bool, |
||||||
|
#[cfg(feature = "rdf-star")] with_quoted_triples: bool, |
||||||
|
) -> Parser<Self> { |
||||||
|
Parser::new( |
||||||
|
Lexer::new( |
||||||
|
N3Lexer::new(N3LexerMode::NTriples), |
||||||
|
MIN_BUFFER_SIZE, |
||||||
|
MAX_BUFFER_SIZE, |
||||||
|
true, |
||||||
|
Some(b"#"), |
||||||
|
), |
||||||
|
NQuadsRecognizer { |
||||||
|
stack: vec![NQuadsState::ExpectSubject], |
||||||
|
with_graph_name, |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
with_quoted_triples, |
||||||
|
lexer_options: N3LexerOptions::default(), |
||||||
|
subjects: Vec::new(), |
||||||
|
predicates: Vec::new(), |
||||||
|
objects: Vec::new(), |
||||||
|
}, |
||||||
|
) |
||||||
|
} |
||||||
|
|
||||||
|
#[must_use] |
||||||
|
fn error( |
||||||
|
mut self, |
||||||
|
errors: &mut Vec<RuleRecognizerError>, |
||||||
|
msg: impl Into<RuleRecognizerError>, |
||||||
|
) -> Self { |
||||||
|
errors.push(msg.into()); |
||||||
|
self.stack.clear(); |
||||||
|
self.subjects.clear(); |
||||||
|
self.predicates.clear(); |
||||||
|
self.objects.clear(); |
||||||
|
self |
||||||
|
} |
||||||
|
|
||||||
|
fn emit_quad(&mut self, results: &mut Vec<Quad>, graph_name: GraphName) { |
||||||
|
results.push(Quad { |
||||||
|
subject: self.subjects.pop().unwrap(), |
||||||
|
predicate: self.predicates.pop().unwrap(), |
||||||
|
object: self.objects.pop().unwrap(), |
||||||
|
graph_name, |
||||||
|
}) |
||||||
|
} |
||||||
|
} |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,393 @@ |
|||||||
|
//! A [N-Quads](https://www.w3.org/TR/n-quads/) streaming parser implemented by [`NQuadsParser`].
|
||||||
|
|
||||||
|
use crate::line_formats::NQuadsRecognizer; |
||||||
|
use crate::toolkit::{FromReadIterator, ParseError, ParseOrIoError, Parser}; |
||||||
|
use oxrdf::{Quad, QuadRef}; |
||||||
|
use std::io::{self, Read, Write}; |
||||||
|
|
||||||
|
/// A [N-Quads](https://www.w3.org/TR/n-quads/) streaming parser.
|
||||||
|
///
|
||||||
|
/// Support for [N-Quads-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-quads-star) is available behind the `rdf-star` feature and the [`NQuadsParser::with_quoted_triples`] option.
|
||||||
|
///
|
||||||
|
/// Count the number of people:
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::NamedNodeRef;
|
||||||
|
/// use oxttl::{NQuadsParser, ParseError};
|
||||||
|
///
|
||||||
|
/// let file = b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||||
|
/// <http://example.com/foo> <http://schema.org/name> \"Foo\" .
|
||||||
|
/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||||
|
/// <http://example.com/bar> <http://schema.org/name> \"Bar\" .";
|
||||||
|
///
|
||||||
|
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
|
||||||
|
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||||
|
/// let mut count = 0;
|
||||||
|
/// for quad in NQuadsParser::new().parse_from_read(file.as_ref()) {
|
||||||
|
/// let quad = quad?;
|
||||||
|
/// if quad.predicate == rdf_type && quad.object == schema_person.into() {
|
||||||
|
/// count += 1;
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// assert_eq!(2, count);
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
#[derive(Default)] |
||||||
|
pub struct NQuadsParser { |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
with_quoted_triples: bool, |
||||||
|
} |
||||||
|
|
||||||
|
impl NQuadsParser { |
||||||
|
/// Builds a new [`NQuadsParser`].
|
||||||
|
#[inline] |
||||||
|
pub fn new() -> Self { |
||||||
|
Self::default() |
||||||
|
} |
||||||
|
|
||||||
|
/// Enables [N-Quads-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-quads-star).
|
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
#[inline] |
||||||
|
#[must_use] |
||||||
|
pub fn with_quoted_triples(mut self) -> Self { |
||||||
|
self.with_quoted_triples = true; |
||||||
|
self |
||||||
|
} |
||||||
|
|
||||||
|
/// Parses a N-Quads file from a [`Read`] implementation.
|
||||||
|
///
|
||||||
|
/// Count the number of people:
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::NamedNodeRef;
|
||||||
|
/// use oxttl::{NQuadsParser, ParseError};
|
||||||
|
///
|
||||||
|
/// let file = b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||||
|
/// <http://example.com/foo> <http://schema.org/name> \"Foo\" .
|
||||||
|
/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||||
|
/// <http://example.com/bar> <http://schema.org/name> \"Bar\" .";
|
||||||
|
///
|
||||||
|
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
|
||||||
|
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||||
|
/// let mut count = 0;
|
||||||
|
/// for quad in NQuadsParser::new().parse_from_read(file.as_ref()) {
|
||||||
|
/// let quad = quad?;
|
||||||
|
/// if quad.predicate == rdf_type && quad.object == schema_person.into() {
|
||||||
|
/// count += 1;
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// assert_eq!(2, count);
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
pub fn parse_from_read<R: Read>(&self, read: R) -> FromReadNQuadsReader<R> { |
||||||
|
FromReadNQuadsReader { |
||||||
|
inner: self.parse().parser.parse_from_read(read), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// Allows to parse a N-Quads file by using a low-level API.
|
||||||
|
///
|
||||||
|
/// Count the number of people:
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::NamedNodeRef;
|
||||||
|
/// use oxttl::{NQuadsParser, ParseError};
|
||||||
|
///
|
||||||
|
/// let file: [&[u8]; 4] = [
|
||||||
|
/// b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||||
|
/// b"<http://example.com/foo> <http://schema.org/name> \"Foo\" .\n",
|
||||||
|
/// b"<http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||||
|
/// b"<http://example.com/bar> <http://schema.org/name> \"Bar\" .\n"
|
||||||
|
/// ];
|
||||||
|
///
|
||||||
|
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
|
||||||
|
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||||
|
/// let mut count = 0;
|
||||||
|
/// let mut parser = NQuadsParser::new().parse();
|
||||||
|
/// let mut file_chunks = file.iter();
|
||||||
|
/// while !parser.is_end() {
|
||||||
|
/// // We feed more data to the parser
|
||||||
|
/// if let Some(chunk) = file_chunks.next() {
|
||||||
|
/// parser.extend_from_slice(chunk);
|
||||||
|
/// } else {
|
||||||
|
/// parser.end(); // It's finished
|
||||||
|
/// }
|
||||||
|
/// // We read as many quads from the parser as possible
|
||||||
|
/// while let Some(quad) = parser.read_next() {
|
||||||
|
/// let quad = quad?;
|
||||||
|
/// if quad.predicate == rdf_type && quad.object == schema_person.into() {
|
||||||
|
/// count += 1;
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// assert_eq!(2, count);
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
#[allow(clippy::unused_self)] |
||||||
|
pub fn parse(&self) -> LowLevelNQuadsReader { |
||||||
|
LowLevelNQuadsReader { |
||||||
|
parser: NQuadsRecognizer::new_parser( |
||||||
|
true, |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
self.with_quoted_triples, |
||||||
|
), |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// Parses a N-Quads file from a [`Read`] implementation. Can be built using [`NQuadsParser::parse_from_read`].
|
||||||
|
///
|
||||||
|
/// Count the number of people:
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::NamedNodeRef;
|
||||||
|
/// use oxttl::{NQuadsParser, ParseError};
|
||||||
|
///
|
||||||
|
/// let file = b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||||
|
/// <http://example.com/foo> <http://schema.org/name> \"Foo\" .
|
||||||
|
/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||||
|
/// <http://example.com/bar> <http://schema.org/name> \"Bar\" .";
|
||||||
|
///
|
||||||
|
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
|
||||||
|
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||||
|
/// let mut count = 0;
|
||||||
|
/// for quad in NQuadsParser::new().parse_from_read(file.as_ref()) {
|
||||||
|
/// let quad = quad?;
|
||||||
|
/// if quad.predicate == rdf_type && quad.object == schema_person.into() {
|
||||||
|
/// count += 1;
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// assert_eq!(2, count);
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
pub struct FromReadNQuadsReader<R: Read> { |
||||||
|
inner: FromReadIterator<R, NQuadsRecognizer>, |
||||||
|
} |
||||||
|
|
||||||
|
impl<R: Read> Iterator for FromReadNQuadsReader<R> { |
||||||
|
type Item = Result<Quad, ParseOrIoError>; |
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Result<Quad, ParseOrIoError>> { |
||||||
|
self.inner.next() |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// Parses a N-Quads file by using a low-level API. Can be built using [`NQuadsParser::parse`].
|
||||||
|
///
|
||||||
|
/// Count the number of people:
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::NamedNodeRef;
|
||||||
|
/// use oxttl::{NQuadsParser, ParseError};
|
||||||
|
///
|
||||||
|
/// let file: [&[u8]; 4] = [
|
||||||
|
/// b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||||
|
/// b"<http://example.com/foo> <http://schema.org/name> \"Foo\" .\n",
|
||||||
|
/// b"<http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||||
|
/// b"<http://example.com/bar> <http://schema.org/name> \"Bar\" .\n"
|
||||||
|
/// ];
|
||||||
|
///
|
||||||
|
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
|
||||||
|
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||||
|
/// let mut count = 0;
|
||||||
|
/// let mut parser = NQuadsParser::new().parse();
|
||||||
|
/// let mut file_chunks = file.iter();
|
||||||
|
/// while !parser.is_end() {
|
||||||
|
/// // We feed more data to the parser
|
||||||
|
/// if let Some(chunk) = file_chunks.next() {
|
||||||
|
/// parser.extend_from_slice(chunk);
|
||||||
|
/// } else {
|
||||||
|
/// parser.end(); // It's finished
|
||||||
|
/// }
|
||||||
|
/// // We read as many quads from the parser as possible
|
||||||
|
/// while let Some(quad) = parser.read_next() {
|
||||||
|
/// let quad = quad?;
|
||||||
|
/// if quad.predicate == rdf_type && quad.object == schema_person.into() {
|
||||||
|
/// count += 1;
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// assert_eq!(2, count);
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
pub struct LowLevelNQuadsReader { |
||||||
|
parser: Parser<NQuadsRecognizer>, |
||||||
|
} |
||||||
|
|
||||||
|
impl LowLevelNQuadsReader { |
||||||
|
/// Adds some extra bytes to the parser. Should be called when [`read_next`](Self::read_next) returns [`None`] and there is still unread data.
|
||||||
|
pub fn extend_from_slice(&mut self, other: &[u8]) { |
||||||
|
self.parser.extend_from_slice(other) |
||||||
|
} |
||||||
|
|
||||||
|
/// Tell the parser that the file is finished.
|
||||||
|
///
|
||||||
|
/// This triggers the parsing of the final bytes and might lead [`read_next`](Self::read_next) to return some extra values.
|
||||||
|
pub fn end(&mut self) { |
||||||
|
self.parser.end() |
||||||
|
} |
||||||
|
|
||||||
|
/// Returns if the parsing is finished i.e. [`end`](Self::end) has been called and [`read_next`](Self::read_next) is always going to return `None`.
|
||||||
|
pub fn is_end(&self) -> bool { |
||||||
|
self.parser.is_end() |
||||||
|
} |
||||||
|
|
||||||
|
/// Attempt to parse a new quad from the already provided data.
|
||||||
|
///
|
||||||
|
/// Returns [`None`] if the parsing is finished or more data is required.
|
||||||
|
/// If it is the case more data should be fed using [`extend_from_slice`](Self::extend_from_slice).
|
||||||
|
pub fn read_next(&mut self) -> Option<Result<Quad, ParseError>> { |
||||||
|
self.parser.read_next() |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// A [N-Quads](https://www.w3.org/TR/n-quads/) serializer.
|
||||||
|
///
|
||||||
|
/// Support for [N-Quads-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-quads-star) is available behind the `rdf-star` feature.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::{NamedNodeRef, QuadRef};
|
||||||
|
/// use oxttl::NQuadsSerializer;
|
||||||
|
///
|
||||||
|
/// let mut buf = Vec::new();
|
||||||
|
/// let mut writer = NQuadsSerializer::new().serialize_to_write(buf);
|
||||||
|
/// writer.write_quad(QuadRef::new(
|
||||||
|
/// NamedNodeRef::new("http://example.com#me")?,
|
||||||
|
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||||
|
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||||
|
/// NamedNodeRef::new("http://example.com")?,
|
||||||
|
/// ))?;
|
||||||
|
/// assert_eq!(
|
||||||
|
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://example.com> .\n",
|
||||||
|
/// writer.finish().as_slice()
|
||||||
|
/// );
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
#[derive(Default)] |
||||||
|
pub struct NQuadsSerializer; |
||||||
|
|
||||||
|
impl NQuadsSerializer { |
||||||
|
/// Builds a new [`NQuadsSerializer`].
|
||||||
|
#[inline] |
||||||
|
pub fn new() -> Self { |
||||||
|
Self |
||||||
|
} |
||||||
|
|
||||||
|
/// Writes a N-Quads file to a [`Write`] implementation.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::{NamedNodeRef, QuadRef};
|
||||||
|
/// use oxttl::NQuadsSerializer;
|
||||||
|
///
|
||||||
|
/// let mut buf = Vec::new();
|
||||||
|
/// let mut writer = NQuadsSerializer::new().serialize_to_write(buf);
|
||||||
|
/// writer.write_quad(QuadRef::new(
|
||||||
|
/// NamedNodeRef::new("http://example.com#me")?,
|
||||||
|
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||||
|
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||||
|
/// NamedNodeRef::new("http://example.com")?,
|
||||||
|
/// ))?;
|
||||||
|
/// assert_eq!(
|
||||||
|
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://example.com> .\n",
|
||||||
|
/// writer.finish().as_slice()
|
||||||
|
/// );
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
pub fn serialize_to_write<W: Write>(&self, write: W) -> ToWriteNQuadsWriter<W> { |
||||||
|
ToWriteNQuadsWriter { |
||||||
|
write, |
||||||
|
writer: self.serialize(), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// Builds a low-level N-Quads writer.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::{NamedNodeRef, QuadRef};
|
||||||
|
/// use oxttl::NQuadsSerializer;
|
||||||
|
///
|
||||||
|
/// let mut buf = Vec::new();
|
||||||
|
/// let mut writer = NQuadsSerializer::new().serialize();
|
||||||
|
/// writer.write_quad(QuadRef::new(
|
||||||
|
/// NamedNodeRef::new("http://example.com#me")?,
|
||||||
|
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||||
|
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||||
|
/// NamedNodeRef::new("http://example.com")?,
|
||||||
|
/// ), &mut buf)?;
|
||||||
|
/// assert_eq!(
|
||||||
|
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://example.com> .\n",
|
||||||
|
/// buf.as_slice()
|
||||||
|
/// );
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
#[allow(clippy::unused_self)] |
||||||
|
pub fn serialize(&self) -> LowLevelNQuadsWriter { |
||||||
|
LowLevelNQuadsWriter |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// Writes a N-Quads file to a [`Write`] implementation. Can be built using [`NQuadsSerializer::serialize_to_write`].
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::{NamedNodeRef, QuadRef};
|
||||||
|
/// use oxttl::NQuadsSerializer;
|
||||||
|
///
|
||||||
|
/// let mut buf = Vec::new();
|
||||||
|
/// let mut writer = NQuadsSerializer::new().serialize_to_write(buf);
|
||||||
|
/// writer.write_quad(QuadRef::new(
|
||||||
|
/// NamedNodeRef::new("http://example.com#me")?,
|
||||||
|
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||||
|
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||||
|
/// NamedNodeRef::new("http://example.com")?,
|
||||||
|
/// ))?;
|
||||||
|
/// assert_eq!(
|
||||||
|
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://example.com> .\n",
|
||||||
|
/// writer.finish().as_slice()
|
||||||
|
/// );
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
pub struct ToWriteNQuadsWriter<W: Write> { |
||||||
|
write: W, |
||||||
|
writer: LowLevelNQuadsWriter, |
||||||
|
} |
||||||
|
|
||||||
|
impl<W: Write> ToWriteNQuadsWriter<W> { |
||||||
|
/// Writes an extra quad.
|
||||||
|
pub fn write_quad<'a>(&mut self, q: impl Into<QuadRef<'a>>) -> io::Result<()> { |
||||||
|
self.writer.write_quad(q, &mut self.write) |
||||||
|
} |
||||||
|
|
||||||
|
/// Ends the write process and returns the underlying [`Write`].
|
||||||
|
pub fn finish(self) -> W { |
||||||
|
self.write |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// Writes a N-Quads file by using a low-level API. Can be built using [`NQuadsSerializer::serialize`].
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::{NamedNodeRef, QuadRef};
|
||||||
|
/// use oxttl::NQuadsSerializer;
|
||||||
|
///
|
||||||
|
/// let mut buf = Vec::new();
|
||||||
|
/// let mut writer = NQuadsSerializer::new().serialize();
|
||||||
|
/// writer.write_quad(QuadRef::new(
|
||||||
|
/// NamedNodeRef::new("http://example.com#me")?,
|
||||||
|
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||||
|
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||||
|
/// NamedNodeRef::new("http://example.com")?,
|
||||||
|
/// ), &mut buf)?;
|
||||||
|
/// assert_eq!(
|
||||||
|
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://example.com> .\n",
|
||||||
|
/// buf.as_slice()
|
||||||
|
/// );
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
pub struct LowLevelNQuadsWriter; |
||||||
|
|
||||||
|
impl LowLevelNQuadsWriter { |
||||||
|
/// Writes an extra quad.
|
||||||
|
#[allow(clippy::unused_self)] |
||||||
|
pub fn write_quad<'a>( |
||||||
|
&mut self, |
||||||
|
q: impl Into<QuadRef<'a>>, |
||||||
|
mut write: impl Write, |
||||||
|
) -> io::Result<()> { |
||||||
|
writeln!(write, "{} .", q.into()) |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,389 @@ |
|||||||
|
//! A [N-Triples](https://www.w3.org/TR/n-triples/) streaming parser implemented by [`NTriplesParser`]
|
||||||
|
//! and a serializer implemented by [`NTriplesSerializer`].
|
||||||
|
|
||||||
|
use crate::line_formats::NQuadsRecognizer; |
||||||
|
use crate::toolkit::{FromReadIterator, ParseError, ParseOrIoError, Parser}; |
||||||
|
use oxrdf::{Triple, TripleRef}; |
||||||
|
use std::io::{self, Read, Write}; |
||||||
|
|
||||||
|
/// A [N-Triples](https://www.w3.org/TR/n-triples/) streaming parser.
|
||||||
|
///
|
||||||
|
/// Support for [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star) is available behind the `rdf-star` feature and the [`NTriplesParser::with_quoted_triples`] option.
|
||||||
|
///
|
||||||
|
/// Count the number of people:
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::NamedNodeRef;
|
||||||
|
/// use oxttl::{NTriplesParser, ParseError};
|
||||||
|
///
|
||||||
|
/// let file = b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||||
|
/// <http://example.com/foo> <http://schema.org/name> \"Foo\" .
|
||||||
|
/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||||
|
/// <http://example.com/bar> <http://schema.org/name> \"Bar\" .";
|
||||||
|
///
|
||||||
|
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
|
||||||
|
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||||
|
/// let mut count = 0;
|
||||||
|
/// for triple in NTriplesParser::new().parse_from_read(file.as_ref()) {
|
||||||
|
/// let triple = triple?;
|
||||||
|
/// if triple.predicate == rdf_type && triple.object == schema_person.into() {
|
||||||
|
/// count += 1;
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// assert_eq!(2, count);
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
#[derive(Default)] |
||||||
|
pub struct NTriplesParser { |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
with_quoted_triples: bool, |
||||||
|
} |
||||||
|
|
||||||
|
impl NTriplesParser { |
||||||
|
/// Builds a new [`NTriplesParser`].
|
||||||
|
#[inline] |
||||||
|
pub fn new() -> Self { |
||||||
|
Self::default() |
||||||
|
} |
||||||
|
|
||||||
|
/// Enables [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star).
|
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
#[inline] |
||||||
|
#[must_use] |
||||||
|
pub fn with_quoted_triples(mut self) -> Self { |
||||||
|
self.with_quoted_triples = true; |
||||||
|
self |
||||||
|
} |
||||||
|
|
||||||
|
/// Parses a N-Triples file from a [`Read`] implementation.
|
||||||
|
///
|
||||||
|
/// Count the number of people:
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::NamedNodeRef;
|
||||||
|
/// use oxttl::{NTriplesParser, ParseError};
|
||||||
|
///
|
||||||
|
/// let file = b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||||
|
/// <http://example.com/foo> <http://schema.org/name> \"Foo\" .
|
||||||
|
/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||||
|
/// <http://example.com/bar> <http://schema.org/name> \"Bar\" .";
|
||||||
|
///
|
||||||
|
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
|
||||||
|
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||||
|
/// let mut count = 0;
|
||||||
|
/// for triple in NTriplesParser::new().parse_from_read(file.as_ref()) {
|
||||||
|
/// let triple = triple?;
|
||||||
|
/// if triple.predicate == rdf_type && triple.object == schema_person.into() {
|
||||||
|
/// count += 1;
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// assert_eq!(2, count);
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
pub fn parse_from_read<R: Read>(&self, read: R) -> FromReadNTriplesReader<R> { |
||||||
|
FromReadNTriplesReader { |
||||||
|
inner: self.parse().parser.parse_from_read(read), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// Allows to parse a N-Triples file by using a low-level API.
|
||||||
|
///
|
||||||
|
/// Count the number of people:
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::NamedNodeRef;
|
||||||
|
/// use oxttl::{NTriplesParser, ParseError};
|
||||||
|
///
|
||||||
|
/// let file: [&[u8]; 4] = [
|
||||||
|
/// b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||||
|
/// b"<http://example.com/foo> <http://schema.org/name> \"Foo\" .\n",
|
||||||
|
/// b"<http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||||
|
/// b"<http://example.com/bar> <http://schema.org/name> \"Bar\" .\n"
|
||||||
|
/// ];
|
||||||
|
///
|
||||||
|
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
|
||||||
|
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||||
|
/// let mut count = 0;
|
||||||
|
/// let mut parser = NTriplesParser::new().parse();
|
||||||
|
/// let mut file_chunks = file.iter();
|
||||||
|
/// while !parser.is_end() {
|
||||||
|
/// // We feed more data to the parser
|
||||||
|
/// if let Some(chunk) = file_chunks.next() {
|
||||||
|
/// parser.extend_from_slice(chunk);
|
||||||
|
/// } else {
|
||||||
|
/// parser.end(); // It's finished
|
||||||
|
/// }
|
||||||
|
/// // We read as many triples from the parser as possible
|
||||||
|
/// while let Some(triple) = parser.read_next() {
|
||||||
|
/// let triple = triple?;
|
||||||
|
/// if triple.predicate == rdf_type && triple.object == schema_person.into() {
|
||||||
|
/// count += 1;
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// assert_eq!(2, count);
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
#[allow(clippy::unused_self)] |
||||||
|
pub fn parse(&self) -> LowLevelNTriplesReader { |
||||||
|
LowLevelNTriplesReader { |
||||||
|
parser: NQuadsRecognizer::new_parser( |
||||||
|
false, |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
self.with_quoted_triples, |
||||||
|
), |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// Parses a N-Triples file from a [`Read`] implementation. Can be built using [`NTriplesParser::parse_from_read`].
|
||||||
|
///
|
||||||
|
/// Count the number of people:
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::NamedNodeRef;
|
||||||
|
/// use oxttl::{NTriplesParser, ParseError};
|
||||||
|
///
|
||||||
|
/// let file = b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||||
|
/// <http://example.com/foo> <http://schema.org/name> \"Foo\" .
|
||||||
|
/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||||
|
/// <http://example.com/bar> <http://schema.org/name> \"Bar\" .";
|
||||||
|
///
|
||||||
|
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
|
||||||
|
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||||
|
/// let mut count = 0;
|
||||||
|
/// for triple in NTriplesParser::new().parse_from_read(file.as_ref()) {
|
||||||
|
/// let triple = triple?;
|
||||||
|
/// if triple.predicate == rdf_type && triple.object == schema_person.into() {
|
||||||
|
/// count += 1;
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// assert_eq!(2, count);
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
pub struct FromReadNTriplesReader<R: Read> { |
||||||
|
inner: FromReadIterator<R, NQuadsRecognizer>, |
||||||
|
} |
||||||
|
|
||||||
|
impl<R: Read> Iterator for FromReadNTriplesReader<R> { |
||||||
|
type Item = Result<Triple, ParseOrIoError>; |
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Result<Triple, ParseOrIoError>> { |
||||||
|
Some(self.inner.next()?.map(Into::into)) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// Parses a N-Triples file by using a low-level API. Can be built using [`NTriplesParser::parse`].
|
||||||
|
///
|
||||||
|
/// Count the number of people:
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::NamedNodeRef;
|
||||||
|
/// use oxttl::{NTriplesParser, ParseError};
|
||||||
|
///
|
||||||
|
/// let file: [&[u8]; 4] = [
|
||||||
|
/// b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||||
|
/// b"<http://example.com/foo> <http://schema.org/name> \"Foo\" .\n",
|
||||||
|
/// b"<http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||||
|
/// b"<http://example.com/bar> <http://schema.org/name> \"Bar\" .\n"
|
||||||
|
/// ];
|
||||||
|
///
|
||||||
|
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
|
||||||
|
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||||
|
/// let mut count = 0;
|
||||||
|
/// let mut parser = NTriplesParser::new().parse();
|
||||||
|
/// let mut file_chunks = file.iter();
|
||||||
|
/// while !parser.is_end() {
|
||||||
|
/// // We feed more data to the parser
|
||||||
|
/// if let Some(chunk) = file_chunks.next() {
|
||||||
|
/// parser.extend_from_slice(chunk);
|
||||||
|
/// } else {
|
||||||
|
/// parser.end(); // It's finished
|
||||||
|
/// }
|
||||||
|
/// // We read as many triples from the parser as possible
|
||||||
|
/// while let Some(triple) = parser.read_next() {
|
||||||
|
/// let triple = triple?;
|
||||||
|
/// if triple.predicate == rdf_type && triple.object == schema_person.into() {
|
||||||
|
/// count += 1;
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// assert_eq!(2, count);
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
pub struct LowLevelNTriplesReader { |
||||||
|
parser: Parser<NQuadsRecognizer>, |
||||||
|
} |
||||||
|
|
||||||
|
impl LowLevelNTriplesReader { |
||||||
|
/// Adds some extra bytes to the parser. Should be called when [`read_next`](Self::read_next) returns [`None`] and there is still unread data.
|
||||||
|
pub fn extend_from_slice(&mut self, other: &[u8]) { |
||||||
|
self.parser.extend_from_slice(other) |
||||||
|
} |
||||||
|
|
||||||
|
/// Tell the parser that the file is finished.
|
||||||
|
///
|
||||||
|
/// This triggers the parsing of the final bytes and might lead [`read_next`](Self::read_next) to return some extra values.
|
||||||
|
pub fn end(&mut self) { |
||||||
|
self.parser.end() |
||||||
|
} |
||||||
|
|
||||||
|
/// Returns if the parsing is finished i.e. [`end`](Self::end) has been called and [`read_next`](Self::read_next) is always going to return `None`.
|
||||||
|
pub fn is_end(&self) -> bool { |
||||||
|
self.parser.is_end() |
||||||
|
} |
||||||
|
|
||||||
|
/// Attempt to parse a new triple from the already provided data.
|
||||||
|
///
|
||||||
|
/// Returns [`None`] if the parsing is finished or more data is required.
|
||||||
|
/// If it is the case more data should be fed using [`extend_from_slice`](Self::extend_from_slice).
|
||||||
|
pub fn read_next(&mut self) -> Option<Result<Triple, ParseError>> { |
||||||
|
Some(self.parser.read_next()?.map(Into::into)) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// A [N-Triples](https://www.w3.org/TR/n-triples/) serializer.
|
||||||
|
///
|
||||||
|
/// Support for [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star) is available behind the `rdf-star` feature.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::{NamedNodeRef, TripleRef};
|
||||||
|
/// use oxttl::NTriplesSerializer;
|
||||||
|
///
|
||||||
|
/// let mut buf = Vec::new();
|
||||||
|
/// let mut writer = NTriplesSerializer::new().serialize_to_write(buf);
|
||||||
|
/// writer.write_triple(TripleRef::new(
|
||||||
|
/// NamedNodeRef::new("http://example.com#me")?,
|
||||||
|
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||||
|
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||||
|
/// ))?;
|
||||||
|
/// assert_eq!(
|
||||||
|
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||||
|
/// writer.finish().as_slice()
|
||||||
|
/// );
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
#[derive(Default)] |
||||||
|
pub struct NTriplesSerializer; |
||||||
|
|
||||||
|
impl NTriplesSerializer { |
||||||
|
/// Builds a new [`NTriplesSerializer`].
|
||||||
|
#[inline] |
||||||
|
pub fn new() -> Self { |
||||||
|
Self |
||||||
|
} |
||||||
|
|
||||||
|
/// Writes a N-Triples file to a [`Write`] implementation.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::{NamedNodeRef, TripleRef};
|
||||||
|
/// use oxttl::NTriplesSerializer;
|
||||||
|
///
|
||||||
|
/// let mut buf = Vec::new();
|
||||||
|
/// let mut writer = NTriplesSerializer::new().serialize_to_write(buf);
|
||||||
|
/// writer.write_triple(TripleRef::new(
|
||||||
|
/// NamedNodeRef::new("http://example.com#me")?,
|
||||||
|
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||||
|
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||||
|
/// ))?;
|
||||||
|
/// assert_eq!(
|
||||||
|
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||||
|
/// writer.finish().as_slice()
|
||||||
|
/// );
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
pub fn serialize_to_write<W: Write>(&self, write: W) -> ToWriteNTriplesWriter<W> { |
||||||
|
ToWriteNTriplesWriter { |
||||||
|
write, |
||||||
|
writer: self.serialize(), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// Builds a low-level N-Triples writer.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::{NamedNodeRef, TripleRef};
|
||||||
|
/// use oxttl::NTriplesSerializer;
|
||||||
|
///
|
||||||
|
/// let mut buf = Vec::new();
|
||||||
|
/// let mut writer = NTriplesSerializer::new().serialize();
|
||||||
|
/// writer.write_triple(TripleRef::new(
|
||||||
|
/// NamedNodeRef::new("http://example.com#me")?,
|
||||||
|
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||||
|
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||||
|
/// ), &mut buf)?;
|
||||||
|
/// assert_eq!(
|
||||||
|
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||||
|
/// buf.as_slice()
|
||||||
|
/// );
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
#[allow(clippy::unused_self)] |
||||||
|
pub fn serialize(&self) -> LowLevelNTriplesWriter { |
||||||
|
LowLevelNTriplesWriter |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// Writes a N-Triples file to a [`Write`] implementation. Can be built using [`NTriplesSerializer::serialize_to_write`].
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::{NamedNodeRef, TripleRef};
|
||||||
|
/// use oxttl::NTriplesSerializer;
|
||||||
|
///
|
||||||
|
/// let mut buf = Vec::new();
|
||||||
|
/// let mut writer = NTriplesSerializer::new().serialize_to_write(buf);
|
||||||
|
/// writer.write_triple(TripleRef::new(
|
||||||
|
/// NamedNodeRef::new("http://example.com#me")?,
|
||||||
|
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||||
|
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||||
|
/// ))?;
|
||||||
|
/// assert_eq!(
|
||||||
|
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||||
|
/// writer.finish().as_slice()
|
||||||
|
/// );
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
pub struct ToWriteNTriplesWriter<W: Write> { |
||||||
|
write: W, |
||||||
|
writer: LowLevelNTriplesWriter, |
||||||
|
} |
||||||
|
|
||||||
|
impl<W: Write> ToWriteNTriplesWriter<W> { |
||||||
|
/// Writes an extra triple.
|
||||||
|
pub fn write_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> { |
||||||
|
self.writer.write_triple(t, &mut self.write) |
||||||
|
} |
||||||
|
|
||||||
|
/// Ends the write process and returns the underlying [`Write`].
|
||||||
|
pub fn finish(self) -> W { |
||||||
|
self.write |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// Writes a N-Triples file by using a low-level API. Can be built using [`NTriplesSerializer::serialize`].
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::{NamedNodeRef, TripleRef};
|
||||||
|
/// use oxttl::NTriplesSerializer;
|
||||||
|
///
|
||||||
|
/// let mut buf = Vec::new();
|
||||||
|
/// let mut writer = NTriplesSerializer::new().serialize();
|
||||||
|
/// writer.write_triple(TripleRef::new(
|
||||||
|
/// NamedNodeRef::new("http://example.com#me")?,
|
||||||
|
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||||
|
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||||
|
/// ), &mut buf)?;
|
||||||
|
/// assert_eq!(
|
||||||
|
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||||
|
/// buf.as_slice()
|
||||||
|
/// );
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
pub struct LowLevelNTriplesWriter; |
||||||
|
|
||||||
|
impl LowLevelNTriplesWriter { |
||||||
|
/// Writes an extra triple.
|
||||||
|
#[allow(clippy::unused_self)] |
||||||
|
pub fn write_triple<'a>( |
||||||
|
&mut self, |
||||||
|
t: impl Into<TripleRef<'a>>, |
||||||
|
mut write: impl Write, |
||||||
|
) -> io::Result<()> { |
||||||
|
writeln!(write, "{} .", t.into()) |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,932 @@ |
|||||||
|
//! Shared parser implementation for Turtle and TriG.
|
||||||
|
|
||||||
|
use crate::lexer::{resolve_local_name, N3Lexer, N3LexerMode, N3LexerOptions, N3Token}; |
||||||
|
use crate::toolkit::{Lexer, Parser, RuleRecognizer, RuleRecognizerError}; |
||||||
|
use crate::{MAX_BUFFER_SIZE, MIN_BUFFER_SIZE}; |
||||||
|
use oxiri::Iri; |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
use oxrdf::Triple; |
||||||
|
use oxrdf::{ |
||||||
|
vocab::{rdf, xsd}, |
||||||
|
BlankNode, GraphName, Literal, NamedNode, NamedOrBlankNode, Quad, Subject, Term, |
||||||
|
}; |
||||||
|
use std::collections::HashMap; |
||||||
|
|
||||||
|
pub struct TriGRecognizer { |
||||||
|
stack: Vec<TriGState>, |
||||||
|
with_graph_name: bool, |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
with_quoted_triples: bool, |
||||||
|
lexer_options: N3LexerOptions, |
||||||
|
prefixes: HashMap<String, Iri<String>>, |
||||||
|
cur_subject: Vec<Subject>, |
||||||
|
cur_predicate: Vec<NamedNode>, |
||||||
|
cur_object: Vec<Term>, |
||||||
|
cur_graph: GraphName, |
||||||
|
} |
||||||
|
|
||||||
|
impl RuleRecognizer for TriGRecognizer { |
||||||
|
type TokenRecognizer = N3Lexer; |
||||||
|
type Output = Quad; |
||||||
|
|
||||||
|
fn error_recovery_state(mut self) -> Self { |
||||||
|
self.stack.clear(); |
||||||
|
self.cur_subject.clear(); |
||||||
|
self.cur_predicate.clear(); |
||||||
|
self.cur_object.clear(); |
||||||
|
self.cur_graph = GraphName::DefaultGraph; |
||||||
|
self |
||||||
|
} |
||||||
|
|
||||||
|
fn recognize_next( |
||||||
|
mut self, |
||||||
|
token: N3Token, |
||||||
|
results: &mut Vec<Quad>, |
||||||
|
errors: &mut Vec<RuleRecognizerError>, |
||||||
|
) -> Self { |
||||||
|
if let Some(rule) = self.stack.pop() { |
||||||
|
match rule { |
||||||
|
// [1g] trigDoc ::= (directive | block)*
|
||||||
|
// [2g] block ::= triplesOrGraph | wrappedGraph | triples2 | "GRAPH" labelOrSubject wrappedGraph
|
||||||
|
// [3] directive ::= prefixID | base | sparqlPrefix | sparqlBase
|
||||||
|
// [4] prefixID ::= '@prefix' PNAME_NS IRIREF '.'
|
||||||
|
// [5] base ::= '@base' IRIREF '.'
|
||||||
|
// [5s] sparqlPrefix ::= "PREFIX" PNAME_NS IRIREF
|
||||||
|
// [6s] sparqlBase ::= "BASE" IRIREF
|
||||||
|
TriGState::TriGDoc => { |
||||||
|
self.cur_graph = GraphName::DefaultGraph; |
||||||
|
self.stack.push(TriGState::TriGDoc); |
||||||
|
match token { |
||||||
|
N3Token::PlainKeyword(k) if k.eq_ignore_ascii_case("base") => { |
||||||
|
self.stack.push(TriGState::BaseExpectIri); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::PlainKeyword(k) if k.eq_ignore_ascii_case("prefix") => { |
||||||
|
self.stack.push(TriGState::PrefixExpectPrefix); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::LangTag("prefix") => { |
||||||
|
self.stack.push(TriGState::ExpectDot); |
||||||
|
self.stack.push(TriGState::PrefixExpectPrefix); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::LangTag("base") => { |
||||||
|
self.stack.push(TriGState::ExpectDot); |
||||||
|
self.stack.push(TriGState::BaseExpectIri); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::PlainKeyword(k) if k.eq_ignore_ascii_case("graph") && self.with_graph_name => { |
||||||
|
self.stack.push(TriGState::WrappedGraph); |
||||||
|
self.stack.push(TriGState::GraphName); |
||||||
|
self |
||||||
|
} |
||||||
|
token @ N3Token::Punctuation("{") if self.with_graph_name => { |
||||||
|
self.stack.push(TriGState::WrappedGraph); |
||||||
|
self.recognize_next(token, results, errors) |
||||||
|
} |
||||||
|
token => { |
||||||
|
self.stack.push(TriGState::TriplesOrGraph); |
||||||
|
self.recognize_next(token, results, errors) |
||||||
|
} |
||||||
|
} |
||||||
|
}, |
||||||
|
TriGState::ExpectDot => { |
||||||
|
self.cur_subject.pop(); |
||||||
|
if token == N3Token::Punctuation(".") { |
||||||
|
self |
||||||
|
} else { |
||||||
|
errors.push("A dot is expected at the end of statements".into()); |
||||||
|
self.recognize_next(token, results, errors) |
||||||
|
} |
||||||
|
}, |
||||||
|
TriGState::BaseExpectIri => match token { |
||||||
|
N3Token::IriRef(iri) => { |
||||||
|
self.lexer_options.base_iri = Some(iri); |
||||||
|
self |
||||||
|
} |
||||||
|
_ => self.error(errors, "The BASE keyword should be followed by an IRI"), |
||||||
|
}, |
||||||
|
TriGState::PrefixExpectPrefix => match token { |
||||||
|
N3Token::PrefixedName { prefix, local, .. } if local.is_empty() => { |
||||||
|
self.stack.push(TriGState::PrefixExpectIri { name: prefix.to_owned() }); |
||||||
|
self |
||||||
|
} |
||||||
|
_ => { |
||||||
|
self.error(errors, "The PREFIX keyword should be followed by a prefix like 'ex:'") |
||||||
|
} |
||||||
|
}, |
||||||
|
TriGState::PrefixExpectIri { name } => match token { |
||||||
|
N3Token::IriRef(iri) => { |
||||||
|
self.prefixes.insert(name, iri); |
||||||
|
self |
||||||
|
} |
||||||
|
_ => self.error(errors, "The PREFIX declaration should be followed by a prefix and its value as an IRI"), |
||||||
|
}, |
||||||
|
// [3g] triplesOrGraph ::= labelOrSubject ( wrappedGraph | predicateObjectList '.' ) | quotedTriple predicateObjectList '.'
|
||||||
|
// [4g] triples2 ::= blankNodePropertyList predicateObjectList? '.' | collection predicateObjectList '.'
|
||||||
|
TriGState::TriplesOrGraph => match token { |
||||||
|
N3Token::IriRef(iri) => { |
||||||
|
self.stack.push(TriGState::WrappedGraphOrPredicateObjectList { |
||||||
|
term: NamedNode::new_unchecked(iri.into_inner()).into() |
||||||
|
}); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &self.prefixes) { |
||||||
|
Ok(t) => { |
||||||
|
self.stack.push(TriGState::WrappedGraphOrPredicateObjectList { |
||||||
|
term: t.into() |
||||||
|
}); |
||||||
|
self |
||||||
|
}, |
||||||
|
Err(e) => self.error(errors, e) |
||||||
|
} |
||||||
|
N3Token::BlankNodeLabel(label) => { |
||||||
|
self.stack.push(TriGState::WrappedGraphOrPredicateObjectList { |
||||||
|
term: BlankNode::new_unchecked(label).into() |
||||||
|
}); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::Punctuation("[") => { |
||||||
|
self.stack.push(TriGState::WrappedGraphBlankNodePropertyListCurrent); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::Punctuation("(") => { |
||||||
|
self.stack.push(TriGState::ExpectDot); |
||||||
|
self.stack.push(TriGState::PredicateObjectList); |
||||||
|
self.stack.push(TriGState::SubjectCollectionBeginning); |
||||||
|
self |
||||||
|
} |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
N3Token::Punctuation("<<") if self.with_quoted_triples => { |
||||||
|
self.stack.push(TriGState::ExpectDot); |
||||||
|
self.stack.push(TriGState::PredicateObjectList); |
||||||
|
self.stack.push(TriGState::SubjectQuotedTripleEnd); |
||||||
|
self.stack.push(TriGState::QuotedObject); |
||||||
|
self.stack.push(TriGState::Verb); |
||||||
|
self.stack.push(TriGState::QuotedSubject); |
||||||
|
self |
||||||
|
} |
||||||
|
token => { |
||||||
|
self.error(errors, format!("The token {token:?} is not a valid subject or graph name")) |
||||||
|
} |
||||||
|
} |
||||||
|
TriGState::WrappedGraphOrPredicateObjectList { term } => { |
||||||
|
if token == N3Token::Punctuation("{") && self.with_graph_name { |
||||||
|
self.cur_graph = term.into(); |
||||||
|
self.stack.push(TriGState::WrappedGraph); |
||||||
|
} else { |
||||||
|
self.cur_subject.push(term.into()); |
||||||
|
self.stack.push(TriGState::ExpectDot); |
||||||
|
self.stack.push(TriGState::PredicateObjectList); |
||||||
|
} |
||||||
|
self.recognize_next(token, results, errors) |
||||||
|
} |
||||||
|
TriGState::WrappedGraphBlankNodePropertyListCurrent => if token == N3Token::Punctuation("]") { |
||||||
|
self.stack.push(TriGState::WrappedGraphOrPredicateObjectList { |
||||||
|
term: BlankNode::default().into() |
||||||
|
}); |
||||||
|
self |
||||||
|
} else { |
||||||
|
self.cur_subject.push(BlankNode::default().into()); |
||||||
|
self.stack.push(TriGState::ExpectDot); |
||||||
|
self.stack.push(TriGState::SubjectBlankNodePropertyListEnd); |
||||||
|
self.stack.push(TriGState::PredicateObjectList); |
||||||
|
self.recognize_next(token, results, errors) |
||||||
|
} |
||||||
|
TriGState::SubjectBlankNodePropertyListEnd => if token == N3Token::Punctuation("]") { |
||||||
|
self.stack.push(TriGState::SubjectBlankNodePropertyListAfter ); |
||||||
|
self |
||||||
|
} else { |
||||||
|
errors.push("blank node property lists should end with a ']'".into()); |
||||||
|
self.stack.push(TriGState::SubjectBlankNodePropertyListAfter ); |
||||||
|
self.recognize_next(token, results, errors) |
||||||
|
} |
||||||
|
TriGState::SubjectBlankNodePropertyListAfter => if matches!(token, N3Token::Punctuation("." | "}")) { |
||||||
|
self.recognize_next(token, results, errors) |
||||||
|
} else { |
||||||
|
self.stack.push(TriGState::PredicateObjectList); |
||||||
|
self.recognize_next(token, results, errors) |
||||||
|
} |
||||||
|
TriGState::SubjectCollectionBeginning => { |
||||||
|
match token { |
||||||
|
N3Token::Punctuation(")") => { |
||||||
|
self.cur_subject.push(rdf::NIL.into()); |
||||||
|
self |
||||||
|
} |
||||||
|
token => { |
||||||
|
let root = BlankNode::default(); |
||||||
|
self.cur_subject.push(root.clone().into()); |
||||||
|
self.cur_subject.push(root.into()); |
||||||
|
self.cur_predicate.push(rdf::FIRST.into()); |
||||||
|
self.stack.push(TriGState::SubjectCollectionPossibleEnd); |
||||||
|
self.stack.push(TriGState::Object); |
||||||
|
self.recognize_next(token, results, errors) |
||||||
|
} |
||||||
|
} |
||||||
|
}, |
||||||
|
TriGState::SubjectCollectionPossibleEnd => { |
||||||
|
let old = self.cur_subject.pop().unwrap(); |
||||||
|
self.cur_object.pop(); |
||||||
|
match token { |
||||||
|
N3Token::Punctuation(")") => { |
||||||
|
self.cur_predicate.pop(); |
||||||
|
results.push(Quad::new( |
||||||
|
old, |
||||||
|
rdf::REST, |
||||||
|
rdf::NIL, |
||||||
|
self.cur_graph.clone() |
||||||
|
)); |
||||||
|
self |
||||||
|
} |
||||||
|
token => { |
||||||
|
let new = BlankNode::default(); |
||||||
|
results.push(Quad::new( |
||||||
|
old, |
||||||
|
rdf::REST, |
||||||
|
new.clone(), |
||||||
|
self.cur_graph.clone() |
||||||
|
)); |
||||||
|
self.cur_subject.push(new.into()); |
||||||
|
self.stack.push(TriGState::ObjectCollectionPossibleEnd); |
||||||
|
self.stack.push(TriGState::Object); |
||||||
|
self.recognize_next(token, results, errors) |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
// [5g] wrappedGraph ::= '{' triplesBlock? '}'
|
||||||
|
// [6g] triplesBlock ::= triples ('.' triplesBlock?)?
|
||||||
|
TriGState::WrappedGraph => if token == N3Token::Punctuation("{") { |
||||||
|
self.stack.push(TriGState::WrappedGraphPossibleEnd); |
||||||
|
self.stack.push(TriGState::Triples); |
||||||
|
self |
||||||
|
} else { |
||||||
|
self.error(errors, "The GRAPH keyword should be followed by a graph name and a value in '{'") |
||||||
|
}, |
||||||
|
TriGState::WrappedGraphPossibleEnd => { |
||||||
|
self.cur_subject.pop(); |
||||||
|
match token { |
||||||
|
N3Token::Punctuation("}") => { |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::Punctuation(".") => { |
||||||
|
self.stack.push(TriGState::WrappedGraphPossibleEnd); |
||||||
|
self.stack.push(TriGState::Triples); |
||||||
|
self |
||||||
|
} |
||||||
|
token => { |
||||||
|
errors.push("A '}' or a '.' is expected at the end of a graph block".into()); |
||||||
|
self.recognize_next(token, results, errors) |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
// [6] triples ::= subject predicateObjectList | blankNodePropertyList predicateObjectList?
|
||||||
|
// [10] subject ::= iri | BlankNode | collection | quotedTriple
|
||||||
|
TriGState::Triples => match token { |
||||||
|
N3Token::Punctuation("}") => { |
||||||
|
self.recognize_next(token, results, errors) // Early end
|
||||||
|
}, |
||||||
|
N3Token::Punctuation("[") => { |
||||||
|
self.cur_subject.push(BlankNode::default().into()); |
||||||
|
self.stack.push(TriGState::TriplesBlankNodePropertyListCurrent); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::IriRef(iri) => { |
||||||
|
self.cur_subject.push(NamedNode::new_unchecked(iri.into_inner()).into()); |
||||||
|
self.stack.push(TriGState::PredicateObjectList); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &self.prefixes) { |
||||||
|
Ok(t) => { |
||||||
|
self.cur_subject.push(t.into()); |
||||||
|
self.stack.push(TriGState::PredicateObjectList); |
||||||
|
self |
||||||
|
}, |
||||||
|
Err(e) => self.error(errors, e) |
||||||
|
} |
||||||
|
N3Token::BlankNodeLabel(label) => { |
||||||
|
self.cur_subject.push(BlankNode::new_unchecked(label).into()); |
||||||
|
self.stack.push(TriGState::PredicateObjectList); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::Punctuation("(") => { |
||||||
|
self.stack.push(TriGState::PredicateObjectList); |
||||||
|
self.stack.push(TriGState::SubjectCollectionBeginning); |
||||||
|
self |
||||||
|
} |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
N3Token::Punctuation("<<") if self.with_quoted_triples => { |
||||||
|
self.stack.push(TriGState::PredicateObjectList); |
||||||
|
self.stack.push(TriGState::SubjectQuotedTripleEnd); |
||||||
|
self.stack.push(TriGState::QuotedObject); |
||||||
|
self.stack.push(TriGState::Verb); |
||||||
|
self.stack.push(TriGState::QuotedSubject); |
||||||
|
self |
||||||
|
} |
||||||
|
token => { |
||||||
|
self.error(errors, format!("The token {token:?} is not a valid RDF subject")) |
||||||
|
} |
||||||
|
}, |
||||||
|
TriGState::TriplesBlankNodePropertyListCurrent => if token == N3Token::Punctuation("]") { |
||||||
|
self.stack.push(TriGState::PredicateObjectList); |
||||||
|
self |
||||||
|
} else { |
||||||
|
self.stack.push(TriGState::SubjectBlankNodePropertyListEnd); |
||||||
|
self.stack.push(TriGState::PredicateObjectList); |
||||||
|
self.recognize_next(token, results, errors) |
||||||
|
} |
||||||
|
// [7g] labelOrSubject ::= iri | BlankNode
|
||||||
|
TriGState::GraphName => match token { |
||||||
|
N3Token::IriRef(iri) => { |
||||||
|
self.cur_graph = NamedNode::new_unchecked(iri.into_inner()).into(); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &self.prefixes) { |
||||||
|
Ok(t) => { |
||||||
|
self.cur_graph = t.into(); |
||||||
|
self |
||||||
|
}, |
||||||
|
Err(e) => self.error(errors, e) |
||||||
|
} |
||||||
|
N3Token::BlankNodeLabel(label) => { |
||||||
|
self.cur_graph = BlankNode::new_unchecked(label).into(); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::Punctuation("[") => { |
||||||
|
self.stack.push(TriGState::GraphNameAnonEnd); |
||||||
|
self |
||||||
|
} |
||||||
|
token => { |
||||||
|
self.error(errors, format!("The token {token:?} is not a valid graph name")) |
||||||
|
} |
||||||
|
} |
||||||
|
TriGState::GraphNameAnonEnd => if token == N3Token::Punctuation("]") { |
||||||
|
self.cur_graph = BlankNode::default().into(); |
||||||
|
self |
||||||
|
} else { |
||||||
|
self.error(errors, "Anonymous blank node with a property list are not allowed as graph name") |
||||||
|
} |
||||||
|
// [7] predicateObjectList ::= verb objectList (';' (verb objectList)?)*
|
||||||
|
TriGState::PredicateObjectList => { |
||||||
|
self.stack.push(TriGState::PredicateObjectListEnd); |
||||||
|
self.stack.push(TriGState::ObjectsList); |
||||||
|
self.stack.push(TriGState::Verb); |
||||||
|
self.recognize_next(token, results, errors) |
||||||
|
}, |
||||||
|
TriGState::PredicateObjectListEnd => { |
||||||
|
self.cur_predicate.pop(); |
||||||
|
if token == N3Token::Punctuation(";") { |
||||||
|
self.stack.push(TriGState::PredicateObjectListPossibleContinuation); |
||||||
|
self |
||||||
|
} else { |
||||||
|
self.recognize_next(token, results, errors) |
||||||
|
} |
||||||
|
}, |
||||||
|
TriGState::PredicateObjectListPossibleContinuation => if token == N3Token::Punctuation(";") { |
||||||
|
self.stack.push(TriGState::PredicateObjectListPossibleContinuation); |
||||||
|
self |
||||||
|
} else if matches!(token, N3Token::Punctuation("." | "}" | "]")) { |
||||||
|
self.recognize_next(token, results, errors) |
||||||
|
} else { |
||||||
|
self.stack.push(TriGState::PredicateObjectListEnd); |
||||||
|
self.stack.push(TriGState::ObjectsList); |
||||||
|
self.stack.push(TriGState::Verb); |
||||||
|
self.recognize_next(token, results, errors) |
||||||
|
}, |
||||||
|
// [8] objectList ::= object annotation? ( ',' object annotation? )*
|
||||||
|
// [30t] annotation ::= '{|' predicateObjectList '|}'
|
||||||
|
TriGState::ObjectsList => { |
||||||
|
self.stack.push(TriGState::ObjectsListEnd); |
||||||
|
self.stack.push(TriGState::Object); |
||||||
|
self.recognize_next(token, results, errors) |
||||||
|
} |
||||||
|
TriGState::ObjectsListEnd => { |
||||||
|
match token { |
||||||
|
N3Token::Punctuation(",") => { |
||||||
|
self.cur_object.pop(); |
||||||
|
self.stack.push(TriGState::ObjectsListEnd); |
||||||
|
self.stack.push(TriGState::Object); |
||||||
|
self |
||||||
|
}, |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
N3Token::Punctuation("{|") => { |
||||||
|
let triple = Triple::new( |
||||||
|
self.cur_subject.last().unwrap().clone(), |
||||||
|
self.cur_predicate.last().unwrap().clone(), |
||||||
|
self.cur_object.pop().unwrap() |
||||||
|
); |
||||||
|
self.cur_subject.push(triple.into()); |
||||||
|
self.stack.push(TriGState::AnnotationEnd); |
||||||
|
self.stack.push(TriGState::PredicateObjectList); |
||||||
|
self |
||||||
|
} |
||||||
|
token => { |
||||||
|
self.cur_object.pop(); |
||||||
|
self.recognize_next(token, results, errors) |
||||||
|
} |
||||||
|
} |
||||||
|
}, |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
TriGState::AnnotationEnd => { |
||||||
|
self.cur_subject.pop(); |
||||||
|
self.stack.push(TriGState::ObjectsListAfterAnnotation); |
||||||
|
if token == N3Token::Punctuation("|}") { |
||||||
|
self |
||||||
|
} else { |
||||||
|
self.error(errors, "Annotations should end with '|}'") |
||||||
|
} |
||||||
|
}, |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
TriGState::ObjectsListAfterAnnotation => if token == N3Token::Punctuation(",") { |
||||||
|
self.stack.push(TriGState::ObjectsListEnd); |
||||||
|
self.stack.push(TriGState::Object); |
||||||
|
self |
||||||
|
} else { |
||||||
|
self.recognize_next(token, results, errors) |
||||||
|
}, |
||||||
|
// [9] verb ::= predicate | 'a'
|
||||||
|
// [11] predicate ::= iri
|
||||||
|
TriGState::Verb => match token { |
||||||
|
N3Token::PlainKeyword("a") => { |
||||||
|
self.cur_predicate.push(rdf::TYPE.into()); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::IriRef(iri) => { |
||||||
|
self.cur_predicate.push(NamedNode::new_unchecked(iri.into_inner())); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &self.prefixes) { |
||||||
|
Ok(t) => { |
||||||
|
self.cur_predicate.push(t); |
||||||
|
self |
||||||
|
}, |
||||||
|
Err(e) => self.error(errors, e) |
||||||
|
} |
||||||
|
token => { |
||||||
|
self.error(errors, format!("The token {token:?} is not a valid predicate")) |
||||||
|
} |
||||||
|
} |
||||||
|
// [12] object ::= iri | BlankNode | collection | blankNodePropertyList | literal | quotedTriple
|
||||||
|
// [13] literal ::= RDFLiteral | NumericLiteral | BooleanLiteral
|
||||||
|
// [14] blank ::= BlankNode | collection
|
||||||
|
// [15] blankNodePropertyList ::= '[' predicateObjectList ']'
|
||||||
|
// [16] collection ::= '(' object* ')'
|
||||||
|
// [17] NumericLiteral ::= INTEGER | DECIMAL | DOUBLE
|
||||||
|
// [128s] RDFLiteral ::= String (LANGTAG | '^^' iri)?
|
||||||
|
// [133s] BooleanLiteral ::= 'true' | 'false'
|
||||||
|
// [18] String ::= STRING_LITERAL_QUOTE | STRING_LITERAL_SINGLE_QUOTE | STRING_LITERAL_LONG_SINGLE_QUOTE | STRING_LITERAL_LONG_QUOTE
|
||||||
|
// [135s] iri ::= IRIREF | PrefixedName
|
||||||
|
// [136s] PrefixedName ::= PNAME_LN | PNAME_NS
|
||||||
|
// [137s] BlankNode ::= BLANK_NODE_LABEL | ANON
|
||||||
|
TriGState::Object => match token { |
||||||
|
N3Token::IriRef(iri) => { |
||||||
|
self.cur_object.push(NamedNode::new_unchecked(iri.into_inner()).into()); |
||||||
|
self.emit_quad(results); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &self.prefixes) { |
||||||
|
Ok(t) => { |
||||||
|
self.cur_object.push(t.into()); |
||||||
|
self.emit_quad(results); |
||||||
|
self |
||||||
|
}, |
||||||
|
Err(e) => self.error(errors, e) |
||||||
|
} |
||||||
|
N3Token::BlankNodeLabel(label) => { |
||||||
|
self.cur_object.push(BlankNode::new_unchecked(label).into()); |
||||||
|
self.emit_quad(results); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::Punctuation("[") => { |
||||||
|
self.stack.push(TriGState::ObjectBlankNodePropertyListCurrent); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::Punctuation("(") => { |
||||||
|
self.stack.push(TriGState::ObjectCollectionBeginning); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::String(value) => { |
||||||
|
self.stack.push(TriGState::LiteralPossibleSuffix { value, emit: true }); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::Integer(v) => { |
||||||
|
self.cur_object.push(Literal::new_typed_literal(v, xsd::INTEGER).into()); |
||||||
|
self.emit_quad(results); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::Decimal(v) => { |
||||||
|
self.cur_object.push(Literal::new_typed_literal(v, xsd::DECIMAL).into()); |
||||||
|
self.emit_quad(results); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::Double(v) => { |
||||||
|
self.cur_object.push(Literal::new_typed_literal(v, xsd::DOUBLE).into()); |
||||||
|
self.emit_quad(results); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::PlainKeyword("true") => { |
||||||
|
self.cur_object.push(Literal::new_typed_literal("true", xsd::BOOLEAN).into()); |
||||||
|
self.emit_quad(results); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::PlainKeyword("false") => { |
||||||
|
self.cur_object.push(Literal::new_typed_literal("false", xsd::BOOLEAN).into()); |
||||||
|
self.emit_quad(results); |
||||||
|
self |
||||||
|
} |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
N3Token::Punctuation("<<") if self.with_quoted_triples => { |
||||||
|
self.stack.push(TriGState::ObjectQuotedTripleEnd { emit: true }); |
||||||
|
self.stack.push(TriGState::QuotedObject); |
||||||
|
self.stack.push(TriGState::Verb); |
||||||
|
self.stack.push(TriGState::QuotedSubject); |
||||||
|
self |
||||||
|
} |
||||||
|
token => { |
||||||
|
self.error(errors, format!("This is not a valid RDF object: {token:?}")) |
||||||
|
} |
||||||
|
|
||||||
|
} |
||||||
|
TriGState::ObjectBlankNodePropertyListCurrent => if token == N3Token::Punctuation("]") { |
||||||
|
self.cur_object.push(BlankNode::default().into()); |
||||||
|
self.emit_quad(results); |
||||||
|
self |
||||||
|
} else { |
||||||
|
self.cur_subject.push(BlankNode::default().into()); |
||||||
|
self.stack.push(TriGState::ObjectBlankNodePropertyListEnd); |
||||||
|
self.stack.push(TriGState::PredicateObjectList); |
||||||
|
self.recognize_next(token, results, errors) |
||||||
|
} |
||||||
|
TriGState::ObjectBlankNodePropertyListEnd => if token == N3Token::Punctuation("]") { |
||||||
|
self.cur_object.push(self.cur_subject.pop().unwrap().into()); |
||||||
|
self.emit_quad(results); |
||||||
|
self |
||||||
|
} else { |
||||||
|
self.error(errors, "blank node property lists should end with a ']'") |
||||||
|
} |
||||||
|
TriGState::ObjectCollectionBeginning => match token { |
||||||
|
N3Token::Punctuation(")") => { |
||||||
|
self.cur_object.push(rdf::NIL.into()); |
||||||
|
self.emit_quad(results); |
||||||
|
self |
||||||
|
} |
||||||
|
token => { |
||||||
|
let root = BlankNode::default(); |
||||||
|
self.cur_object.push(root.clone().into()); |
||||||
|
self.emit_quad(results); |
||||||
|
self.cur_subject.push(root.into()); |
||||||
|
self.cur_predicate.push(rdf::FIRST.into()); |
||||||
|
self.stack.push(TriGState::ObjectCollectionPossibleEnd); |
||||||
|
self.stack.push(TriGState::Object); |
||||||
|
self.recognize_next(token, results, errors) |
||||||
|
} |
||||||
|
}, |
||||||
|
TriGState::ObjectCollectionPossibleEnd => { |
||||||
|
let old = self.cur_subject.pop().unwrap(); |
||||||
|
self.cur_object.pop(); |
||||||
|
match token { |
||||||
|
N3Token::Punctuation(")") => { |
||||||
|
self.cur_predicate.pop(); |
||||||
|
results.push(Quad::new(old, |
||||||
|
rdf::REST, |
||||||
|
rdf::NIL, |
||||||
|
self.cur_graph.clone() |
||||||
|
)); |
||||||
|
self |
||||||
|
} |
||||||
|
token => { |
||||||
|
let new = BlankNode::default(); |
||||||
|
results.push(Quad::new(old, |
||||||
|
rdf::REST, |
||||||
|
new.clone(), |
||||||
|
self.cur_graph.clone() |
||||||
|
)); |
||||||
|
self.cur_subject.push(new.into()); |
||||||
|
self.stack.push(TriGState::ObjectCollectionPossibleEnd); |
||||||
|
self.stack.push(TriGState::Object); |
||||||
|
self.recognize_next(token, results, errors) |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
TriGState::LiteralPossibleSuffix { value, emit } => { |
||||||
|
match token { |
||||||
|
N3Token::LangTag(lang) => { |
||||||
|
self.cur_object.push(Literal::new_language_tagged_literal_unchecked(value, lang.to_ascii_lowercase()).into()); |
||||||
|
if emit { |
||||||
|
self.emit_quad(results); |
||||||
|
} |
||||||
|
self |
||||||
|
}, |
||||||
|
N3Token::Punctuation("^^") => { |
||||||
|
self.stack.push(TriGState::LiteralExpectDatatype { value, emit }); |
||||||
|
self |
||||||
|
} |
||||||
|
token => { |
||||||
|
self.cur_object.push(Literal::new_simple_literal(value).into()); |
||||||
|
if emit { |
||||||
|
self.emit_quad(results); |
||||||
|
} |
||||||
|
self.recognize_next(token, results, errors) |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
TriGState::LiteralExpectDatatype { value, emit } => { |
||||||
|
match token { |
||||||
|
N3Token::IriRef(datatype) => { |
||||||
|
self.cur_object.push(Literal::new_typed_literal(value, NamedNode::new_unchecked(datatype.into_inner())).into()); |
||||||
|
if emit { |
||||||
|
self.emit_quad(results); |
||||||
|
} |
||||||
|
self |
||||||
|
}, |
||||||
|
N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &self.prefixes) { |
||||||
|
Ok(t) => { |
||||||
|
self.cur_object.push(Literal::new_typed_literal(value, t).into()); |
||||||
|
if emit { |
||||||
|
self.emit_quad(results); |
||||||
|
} |
||||||
|
self |
||||||
|
}, |
||||||
|
Err(e) => self.error(errors, e) |
||||||
|
} |
||||||
|
token => { |
||||||
|
self.error(errors, format!("Expecting a datatype IRI after '^^, found {token:?}")).recognize_next(token, results, errors) |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
// [27t] quotedTriple ::= '<<' qtSubject verb qtObject '>>'
|
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
TriGState::SubjectQuotedTripleEnd => { |
||||||
|
let triple = Triple::new( |
||||||
|
self.cur_subject.pop().unwrap(), |
||||||
|
self.cur_predicate.pop().unwrap(), |
||||||
|
self.cur_object.pop().unwrap() |
||||||
|
); |
||||||
|
self.cur_subject.push(triple.into()); |
||||||
|
if token == N3Token::Punctuation(">>") { |
||||||
|
self |
||||||
|
} else { |
||||||
|
self.error(errors, format!("Expecting '>>' to close a quoted triple, found {token:?}")) |
||||||
|
} |
||||||
|
} |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
TriGState::ObjectQuotedTripleEnd { emit } => { |
||||||
|
let triple = Triple::new( |
||||||
|
self.cur_subject.pop().unwrap(), |
||||||
|
self.cur_predicate.pop().unwrap(), |
||||||
|
self.cur_object.pop().unwrap() |
||||||
|
); |
||||||
|
self.cur_object.push(triple.into()); |
||||||
|
if emit { |
||||||
|
self.emit_quad(results); |
||||||
|
} |
||||||
|
if token == N3Token::Punctuation(">>") { |
||||||
|
self |
||||||
|
} else { |
||||||
|
self.error(errors, format!("Expecting '>>' to close a quoted triple, found {token:?}")) |
||||||
|
} |
||||||
|
} |
||||||
|
// [28t] qtSubject ::= iri | BlankNode | quotedTriple
|
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
TriGState::QuotedSubject => match token { |
||||||
|
N3Token::Punctuation("[") => { |
||||||
|
self.cur_subject.push(BlankNode::default().into()); |
||||||
|
self.stack.push(TriGState::QuotedAnonEnd); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::IriRef(iri) => { |
||||||
|
self.cur_subject.push(NamedNode::new_unchecked(iri.into_inner()).into()); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &self.prefixes) { |
||||||
|
Ok(t) => { |
||||||
|
self.cur_subject.push(t.into()); |
||||||
|
self |
||||||
|
}, |
||||||
|
Err(e) => self.error(errors, e) |
||||||
|
} |
||||||
|
N3Token::BlankNodeLabel(label) => { |
||||||
|
self.cur_subject.push(BlankNode::new_unchecked(label).into()); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::Punctuation("<<") => { |
||||||
|
self.stack.push(TriGState::SubjectQuotedTripleEnd); |
||||||
|
self.stack.push(TriGState::QuotedObject); |
||||||
|
self.stack.push(TriGState::Verb); |
||||||
|
self.stack.push(TriGState::QuotedSubject); |
||||||
|
self |
||||||
|
} |
||||||
|
token => self.error(errors, format!("This is not a valid RDF quoted triple subject: {token:?}")) |
||||||
|
} |
||||||
|
// [29t] qtObject ::= iri | BlankNode | literal | quotedTriple
|
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
TriGState::QuotedObject => match token { |
||||||
|
N3Token::Punctuation("[") => { |
||||||
|
self.cur_object.push(BlankNode::default().into()); |
||||||
|
self.stack.push(TriGState::QuotedAnonEnd); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::IriRef(iri) => { |
||||||
|
self.cur_object.push(NamedNode::new_unchecked(iri.into_inner()).into()); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &self.prefixes) { |
||||||
|
Ok(t) => { |
||||||
|
self.cur_object.push(t.into()); |
||||||
|
self |
||||||
|
}, |
||||||
|
Err(e) => self.error(errors, e) |
||||||
|
} |
||||||
|
N3Token::BlankNodeLabel(label) => { |
||||||
|
self.cur_object.push(BlankNode::new_unchecked(label).into()); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::String(value) => { |
||||||
|
self.stack.push(TriGState::LiteralPossibleSuffix { value, emit: false }); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::Integer(v) => { |
||||||
|
self.cur_object.push(Literal::new_typed_literal(v, xsd::INTEGER).into()); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::Decimal(v) => { |
||||||
|
self.cur_object.push(Literal::new_typed_literal(v, xsd::DECIMAL).into()); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::Double(v) => { |
||||||
|
self.cur_object.push(Literal::new_typed_literal(v, xsd::DOUBLE).into()); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::PlainKeyword("true") => { |
||||||
|
self.cur_object.push(Literal::new_typed_literal("true", xsd::BOOLEAN).into()); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::PlainKeyword("false") => { |
||||||
|
self.cur_object.push(Literal::new_typed_literal("false", xsd::BOOLEAN).into()); |
||||||
|
self |
||||||
|
} |
||||||
|
N3Token::Punctuation("<<") => { |
||||||
|
self.stack.push(TriGState::ObjectQuotedTripleEnd { emit: false }); |
||||||
|
self.stack.push(TriGState::QuotedObject); |
||||||
|
self.stack.push(TriGState::Verb); |
||||||
|
self.stack.push(TriGState::QuotedSubject); |
||||||
|
self |
||||||
|
} |
||||||
|
token => self.error(errors, format!("This is not a valid RDF quoted triple object: {token:?}")) |
||||||
|
} |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
TriGState::QuotedAnonEnd => if token == N3Token::Punctuation("]") { |
||||||
|
self |
||||||
|
} else { |
||||||
|
self.error(errors, "Anonymous blank node with a property list are not allowed in quoted triples") |
||||||
|
} |
||||||
|
} |
||||||
|
} else if token == N3Token::Punctuation(".") || token == N3Token::Punctuation("}") { |
||||||
|
//TODO: be smarter depending if we are in '{' or not
|
||||||
|
self.stack.push(TriGState::TriGDoc); |
||||||
|
self |
||||||
|
} else { |
||||||
|
self |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
fn recognize_end( |
||||||
|
mut self, |
||||||
|
results: &mut Vec<Self::Output>, |
||||||
|
errors: &mut Vec<RuleRecognizerError>, |
||||||
|
) { |
||||||
|
match &*self.stack { |
||||||
|
[] | [TriGState::TriGDoc] => { |
||||||
|
debug_assert!(self.cur_subject.is_empty()); |
||||||
|
debug_assert!(self.cur_predicate.is_empty()); |
||||||
|
debug_assert!(self.cur_object.is_empty()); |
||||||
|
} |
||||||
|
[.., TriGState::LiteralPossibleSuffix { value, emit: true }] => { |
||||||
|
self.cur_object |
||||||
|
.push(Literal::new_simple_literal(value).into()); |
||||||
|
self.emit_quad(results); |
||||||
|
errors.push("Triples should be followed by a dot".into()) |
||||||
|
} |
||||||
|
_ => errors.push("Unexpected end".into()), //TODO
|
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
fn lexer_options(&self) -> &N3LexerOptions { |
||||||
|
&self.lexer_options |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
impl TriGRecognizer { |
||||||
|
pub fn new_parser( |
||||||
|
with_graph_name: bool, |
||||||
|
#[cfg(feature = "rdf-star")] with_quoted_triples: bool, |
||||||
|
base_iri: Option<Iri<String>>, |
||||||
|
prefixes: HashMap<String, Iri<String>>, |
||||||
|
) -> Parser<Self> { |
||||||
|
Parser::new( |
||||||
|
Lexer::new( |
||||||
|
N3Lexer::new(N3LexerMode::Turtle), |
||||||
|
MIN_BUFFER_SIZE, |
||||||
|
MAX_BUFFER_SIZE, |
||||||
|
true, |
||||||
|
Some(b"#"), |
||||||
|
), |
||||||
|
TriGRecognizer { |
||||||
|
stack: vec![TriGState::TriGDoc], |
||||||
|
with_graph_name, |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
with_quoted_triples, |
||||||
|
lexer_options: N3LexerOptions { base_iri }, |
||||||
|
prefixes, |
||||||
|
cur_subject: Vec::new(), |
||||||
|
cur_predicate: Vec::new(), |
||||||
|
cur_object: Vec::new(), |
||||||
|
cur_graph: GraphName::DefaultGraph, |
||||||
|
}, |
||||||
|
) |
||||||
|
} |
||||||
|
|
||||||
|
#[must_use] |
||||||
|
fn error( |
||||||
|
mut self, |
||||||
|
errors: &mut Vec<RuleRecognizerError>, |
||||||
|
msg: impl Into<RuleRecognizerError>, |
||||||
|
) -> Self { |
||||||
|
errors.push(msg.into()); |
||||||
|
self.stack.clear(); |
||||||
|
self.cur_subject.clear(); |
||||||
|
self.cur_predicate.clear(); |
||||||
|
self.cur_object.clear(); |
||||||
|
self.cur_graph = GraphName::DefaultGraph; |
||||||
|
self |
||||||
|
} |
||||||
|
|
||||||
|
fn emit_quad(&mut self, results: &mut Vec<Quad>) { |
||||||
|
results.push(Quad::new( |
||||||
|
self.cur_subject.last().unwrap().clone(), |
||||||
|
self.cur_predicate.last().unwrap().clone(), |
||||||
|
self.cur_object.last().unwrap().clone(), |
||||||
|
self.cur_graph.clone(), |
||||||
|
)); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
#[derive(Debug)] |
||||||
|
enum TriGState { |
||||||
|
TriGDoc, |
||||||
|
ExpectDot, |
||||||
|
BaseExpectIri, |
||||||
|
PrefixExpectPrefix, |
||||||
|
PrefixExpectIri { |
||||||
|
name: String, |
||||||
|
}, |
||||||
|
TriplesOrGraph, |
||||||
|
WrappedGraphBlankNodePropertyListCurrent, |
||||||
|
SubjectBlankNodePropertyListEnd, |
||||||
|
SubjectBlankNodePropertyListAfter, |
||||||
|
SubjectCollectionBeginning, |
||||||
|
SubjectCollectionPossibleEnd, |
||||||
|
WrappedGraphOrPredicateObjectList { |
||||||
|
term: NamedOrBlankNode, |
||||||
|
}, |
||||||
|
WrappedGraph, |
||||||
|
WrappedGraphPossibleEnd, |
||||||
|
GraphName, |
||||||
|
GraphNameAnonEnd, |
||||||
|
Triples, |
||||||
|
TriplesBlankNodePropertyListCurrent, |
||||||
|
PredicateObjectList, |
||||||
|
PredicateObjectListEnd, |
||||||
|
PredicateObjectListPossibleContinuation, |
||||||
|
ObjectsList, |
||||||
|
ObjectsListEnd, |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
AnnotationEnd, |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
ObjectsListAfterAnnotation, |
||||||
|
Verb, |
||||||
|
Object, |
||||||
|
ObjectBlankNodePropertyListCurrent, |
||||||
|
ObjectBlankNodePropertyListEnd, |
||||||
|
ObjectCollectionBeginning, |
||||||
|
ObjectCollectionPossibleEnd, |
||||||
|
LiteralPossibleSuffix { |
||||||
|
value: String, |
||||||
|
emit: bool, |
||||||
|
}, |
||||||
|
LiteralExpectDatatype { |
||||||
|
value: String, |
||||||
|
emit: bool, |
||||||
|
}, |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
SubjectQuotedTripleEnd, |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
ObjectQuotedTripleEnd { |
||||||
|
emit: bool, |
||||||
|
}, |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
QuotedSubject, |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
QuotedObject, |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
QuotedAnonEnd, |
||||||
|
} |
@ -0,0 +1,280 @@ |
|||||||
|
use memchr::memchr2; |
||||||
|
use std::error::Error; |
||||||
|
use std::fmt; |
||||||
|
use std::io::{self, Read}; |
||||||
|
use std::ops::{Range, RangeInclusive}; |
||||||
|
|
||||||
|
pub trait TokenRecognizer { |
||||||
|
type Token<'a> |
||||||
|
where |
||||||
|
Self: 'a; |
||||||
|
type Options: Default; |
||||||
|
|
||||||
|
fn recognize_next_token<'a>( |
||||||
|
&mut self, |
||||||
|
data: &'a [u8], |
||||||
|
is_ending: bool, |
||||||
|
config: &Self::Options, |
||||||
|
) -> Option<(usize, Result<Self::Token<'a>, TokenRecognizerError>)>; |
||||||
|
} |
||||||
|
|
||||||
|
pub struct TokenRecognizerError { |
||||||
|
pub position: Range<usize>, |
||||||
|
pub message: String, |
||||||
|
} |
||||||
|
|
||||||
|
impl<S: Into<String>> From<(Range<usize>, S)> for TokenRecognizerError { |
||||||
|
fn from((position, message): (Range<usize>, S)) -> Self { |
||||||
|
Self { |
||||||
|
position, |
||||||
|
message: message.into(), |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
#[allow(clippy::range_plus_one)] |
||||||
|
impl<S: Into<String>> From<(RangeInclusive<usize>, S)> for TokenRecognizerError { |
||||||
|
fn from((position, message): (RangeInclusive<usize>, S)) -> Self { |
||||||
|
(*position.start()..*position.end() + 1, message).into() |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
impl<S: Into<String>> From<(usize, S)> for TokenRecognizerError { |
||||||
|
fn from((position, message): (usize, S)) -> Self { |
||||||
|
(position..=position, message).into() |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
pub struct TokenWithPosition<T> { |
||||||
|
pub token: T, |
||||||
|
pub position: Range<usize>, |
||||||
|
} |
||||||
|
|
||||||
|
pub struct Lexer<R: TokenRecognizer> { |
||||||
|
parser: R, |
||||||
|
data: Vec<u8>, |
||||||
|
start: usize, |
||||||
|
end: usize, |
||||||
|
is_ending: bool, |
||||||
|
position: usize, |
||||||
|
min_buffer_size: usize, |
||||||
|
max_buffer_size: usize, |
||||||
|
is_line_jump_whitespace: bool, |
||||||
|
line_comment_start: Option<&'static [u8]>, |
||||||
|
} |
||||||
|
|
||||||
|
impl<R: TokenRecognizer> Lexer<R> { |
||||||
|
pub fn new( |
||||||
|
parser: R, |
||||||
|
min_buffer_size: usize, |
||||||
|
max_buffer_size: usize, |
||||||
|
is_line_jump_whitespace: bool, |
||||||
|
line_comment_start: Option<&'static [u8]>, |
||||||
|
) -> Self { |
||||||
|
Self { |
||||||
|
parser, |
||||||
|
data: Vec::new(), |
||||||
|
start: 0, |
||||||
|
end: 0, |
||||||
|
is_ending: false, |
||||||
|
position: 0, |
||||||
|
min_buffer_size, |
||||||
|
max_buffer_size, |
||||||
|
is_line_jump_whitespace, |
||||||
|
line_comment_start, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
pub fn extend_from_slice(&mut self, other: &[u8]) { |
||||||
|
self.shrink_if_useful(); |
||||||
|
self.data.truncate(self.end); |
||||||
|
self.data.extend_from_slice(other); |
||||||
|
self.end = self.data.len(); |
||||||
|
} |
||||||
|
|
||||||
|
pub fn end(&mut self) { |
||||||
|
self.is_ending = true; |
||||||
|
} |
||||||
|
|
||||||
|
pub fn extend_from_read(&mut self, read: &mut impl Read) -> io::Result<()> { |
||||||
|
self.shrink_if_useful(); |
||||||
|
let min_end = self.end + self.min_buffer_size; |
||||||
|
if min_end > self.max_buffer_size { |
||||||
|
return Err(io::Error::new( |
||||||
|
io::ErrorKind::OutOfMemory, |
||||||
|
format!( |
||||||
|
"The buffer maximal size is {} < {min_end}", |
||||||
|
self.max_buffer_size |
||||||
|
), |
||||||
|
)); |
||||||
|
} |
||||||
|
if self.data.len() < min_end { |
||||||
|
self.data.resize(min_end, 0); |
||||||
|
} |
||||||
|
if self.data.len() < self.data.capacity() { |
||||||
|
// We keep extending to have as much space as available without reallocation
|
||||||
|
self.data.resize(self.data.capacity(), 0); |
||||||
|
} |
||||||
|
let read = read.read(&mut self.data[self.end..])?; |
||||||
|
self.end += read; |
||||||
|
self.is_ending = read == 0; |
||||||
|
Ok(()) |
||||||
|
} |
||||||
|
|
||||||
|
pub fn read_next( |
||||||
|
&mut self, |
||||||
|
options: &R::Options, |
||||||
|
) -> Option<Result<TokenWithPosition<R::Token<'_>>, LexerError>> { |
||||||
|
self.skip_whitespaces_and_comments()?; |
||||||
|
let (consumed, result) = if let Some(r) = self.parser.recognize_next_token( |
||||||
|
&self.data[self.start..self.end], |
||||||
|
self.is_ending, |
||||||
|
options, |
||||||
|
) { |
||||||
|
r |
||||||
|
} else { |
||||||
|
return if self.is_ending { |
||||||
|
if self.start == self.end { |
||||||
|
None // We have finished
|
||||||
|
} else { |
||||||
|
let error = LexerError { |
||||||
|
position: self.position..self.position + (self.end - self.start), |
||||||
|
message: "Unexpected end of file".into(), |
||||||
|
}; |
||||||
|
self.end = self.start; // We consume everything
|
||||||
|
Some(Err(error)) |
||||||
|
} |
||||||
|
} else { |
||||||
|
None |
||||||
|
}; |
||||||
|
}; |
||||||
|
debug_assert!( |
||||||
|
consumed > 0, |
||||||
|
"The lexer must consume at least one byte each time" |
||||||
|
); |
||||||
|
debug_assert!( |
||||||
|
self.start + consumed <= self.end, |
||||||
|
"The lexer tried to consumed {consumed} bytes but only {} bytes are readable", |
||||||
|
self.end - self.start |
||||||
|
); |
||||||
|
let old_position = self.position; |
||||||
|
self.start += consumed; |
||||||
|
self.position += consumed; |
||||||
|
Some(match result { |
||||||
|
Ok(token) => Ok(TokenWithPosition { |
||||||
|
token, |
||||||
|
position: old_position..self.position, |
||||||
|
}), |
||||||
|
Err(e) => Err(LexerError { |
||||||
|
position: e.position.start + self.position..e.position.end + self.position, |
||||||
|
message: e.message, |
||||||
|
}), |
||||||
|
}) |
||||||
|
} |
||||||
|
|
||||||
|
pub fn is_end(&self) -> bool { |
||||||
|
self.is_ending && self.end == self.start |
||||||
|
} |
||||||
|
|
||||||
|
fn skip_whitespaces_and_comments(&mut self) -> Option<()> { |
||||||
|
loop { |
||||||
|
self.skip_whitespaces(); |
||||||
|
|
||||||
|
let buf = &self.data[self.start..self.end]; |
||||||
|
if let Some(line_comment_start) = self.line_comment_start { |
||||||
|
if buf.starts_with(line_comment_start) { |
||||||
|
// Comment
|
||||||
|
if let Some(end) = memchr2(b'\r', b'\n', &buf[line_comment_start.len()..]) { |
||||||
|
self.start += end + line_comment_start.len(); |
||||||
|
self.position += end + line_comment_start.len(); |
||||||
|
continue; |
||||||
|
} |
||||||
|
if self.is_ending { |
||||||
|
self.end = self.start; // EOF
|
||||||
|
return Some(()); |
||||||
|
} |
||||||
|
return None; // We need more data
|
||||||
|
} |
||||||
|
} |
||||||
|
return Some(()); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
fn skip_whitespaces(&mut self) { |
||||||
|
if self.is_line_jump_whitespace { |
||||||
|
for (i, c) in self.data[self.start..self.end].iter().enumerate() { |
||||||
|
if !matches!(c, b' ' | b'\t' | b'\r' | b'\n') { |
||||||
|
self.start += i; |
||||||
|
self.position += i; |
||||||
|
return; |
||||||
|
} |
||||||
|
//TODO: SIMD
|
||||||
|
} |
||||||
|
} else { |
||||||
|
for (i, c) in self.data[self.start..self.end].iter().enumerate() { |
||||||
|
if !matches!(c, b' ' | b'\t') { |
||||||
|
self.start += i; |
||||||
|
self.position += i; |
||||||
|
return; |
||||||
|
} |
||||||
|
//TODO: SIMD
|
||||||
|
} |
||||||
|
} |
||||||
|
// We only have whitespaces
|
||||||
|
self.position += self.end - self.start; |
||||||
|
self.end = self.start; |
||||||
|
} |
||||||
|
|
||||||
|
fn shrink_if_useful(&mut self) { |
||||||
|
if self.start * 2 > self.data.len() { |
||||||
|
// We have read more than half of the buffer, let's move the data to the beginning
|
||||||
|
self.data.copy_within(self.start..self.end, 0); |
||||||
|
self.end -= self.start; |
||||||
|
self.start = 0; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
#[derive(Debug)] |
||||||
|
pub struct LexerError { |
||||||
|
position: Range<usize>, |
||||||
|
message: String, |
||||||
|
} |
||||||
|
|
||||||
|
impl LexerError { |
||||||
|
pub fn position(&self) -> Range<usize> { |
||||||
|
self.position.clone() |
||||||
|
} |
||||||
|
|
||||||
|
pub fn message(&self) -> &str { |
||||||
|
&self.message |
||||||
|
} |
||||||
|
|
||||||
|
pub fn into_message(self) -> String { |
||||||
|
self.message |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
impl fmt::Display for LexerError { |
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
||||||
|
if self.position.start + 1 == self.position.end { |
||||||
|
write!( |
||||||
|
f, |
||||||
|
"Lexer error at byte {}: {}", |
||||||
|
self.position.start, self.message |
||||||
|
) |
||||||
|
} else { |
||||||
|
write!( |
||||||
|
f, |
||||||
|
"Lexer error between bytes {} and {}: {}", |
||||||
|
self.position.start, self.position.end, self.message |
||||||
|
) |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
impl Error for LexerError { |
||||||
|
fn description(&self) -> &str { |
||||||
|
self.message() |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,11 @@ |
|||||||
|
//! oxttl parsing toolkit.
|
||||||
|
//!
|
||||||
|
//! Provides the basic code to write plain Rust lexers and parsers able to read files chunk by chunk.
|
||||||
|
|
||||||
|
mod lexer; |
||||||
|
mod parser; |
||||||
|
|
||||||
|
pub use self::lexer::{Lexer, LexerError, TokenRecognizer, TokenRecognizerError}; |
||||||
|
pub use self::parser::{ |
||||||
|
FromReadIterator, ParseError, ParseOrIoError, Parser, RuleRecognizer, RuleRecognizerError, |
||||||
|
}; |
@ -0,0 +1,244 @@ |
|||||||
|
use crate::toolkit::lexer::TokenWithPosition; |
||||||
|
use crate::toolkit::{Lexer, LexerError, TokenRecognizer}; |
||||||
|
use std::error::Error; |
||||||
|
use std::io::Read; |
||||||
|
use std::ops::Range; |
||||||
|
use std::{fmt, io}; |
||||||
|
|
||||||
|
pub trait RuleRecognizer: Sized { |
||||||
|
type TokenRecognizer: TokenRecognizer; |
||||||
|
type Output; |
||||||
|
|
||||||
|
fn error_recovery_state(self) -> Self; |
||||||
|
|
||||||
|
fn recognize_next( |
||||||
|
self, |
||||||
|
token: <Self::TokenRecognizer as TokenRecognizer>::Token<'_>, |
||||||
|
results: &mut Vec<Self::Output>, |
||||||
|
errors: &mut Vec<RuleRecognizerError>, |
||||||
|
) -> Self; |
||||||
|
|
||||||
|
fn recognize_end(self, results: &mut Vec<Self::Output>, errors: &mut Vec<RuleRecognizerError>); |
||||||
|
|
||||||
|
fn lexer_options(&self) -> &<Self::TokenRecognizer as TokenRecognizer>::Options; |
||||||
|
} |
||||||
|
|
||||||
|
pub struct RuleRecognizerError { |
||||||
|
pub message: String, |
||||||
|
} |
||||||
|
|
||||||
|
impl<S: Into<String>> From<S> for RuleRecognizerError { |
||||||
|
fn from(message: S) -> Self { |
||||||
|
Self { |
||||||
|
message: message.into(), |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
pub struct Parser<RR: RuleRecognizer> { |
||||||
|
lexer: Lexer<RR::TokenRecognizer>, |
||||||
|
state: Option<RR>, |
||||||
|
results: Vec<RR::Output>, |
||||||
|
errors: Vec<RuleRecognizerError>, |
||||||
|
position: Range<usize>, |
||||||
|
default_lexer_options: <RR::TokenRecognizer as TokenRecognizer>::Options, |
||||||
|
} |
||||||
|
|
||||||
|
impl<RR: RuleRecognizer> Parser<RR> { |
||||||
|
pub fn new(lexer: Lexer<RR::TokenRecognizer>, recognizer: RR) -> Self { |
||||||
|
Self { |
||||||
|
lexer, |
||||||
|
state: Some(recognizer), |
||||||
|
results: vec![], |
||||||
|
errors: vec![], |
||||||
|
position: 0..0, |
||||||
|
default_lexer_options: <RR::TokenRecognizer as TokenRecognizer>::Options::default(), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
pub fn extend_from_slice(&mut self, other: &[u8]) { |
||||||
|
self.lexer.extend_from_slice(other) |
||||||
|
} |
||||||
|
|
||||||
|
pub fn end(&mut self) { |
||||||
|
self.lexer.end() |
||||||
|
} |
||||||
|
|
||||||
|
pub fn is_end(&self) -> bool { |
||||||
|
self.state.is_none() && self.results.is_empty() && self.errors.is_empty() |
||||||
|
} |
||||||
|
|
||||||
|
pub fn read_next(&mut self) -> Option<Result<RR::Output, ParseError>> { |
||||||
|
loop { |
||||||
|
if let Some(error) = self.errors.pop() { |
||||||
|
return Some(Err(ParseError { |
||||||
|
position: self.position.clone(), |
||||||
|
message: error.message, |
||||||
|
})); |
||||||
|
} |
||||||
|
if let Some(result) = self.results.pop() { |
||||||
|
return Some(Ok(result)); |
||||||
|
} |
||||||
|
if let Some(result) = self.lexer.read_next( |
||||||
|
self.state |
||||||
|
.as_ref() |
||||||
|
.map_or(&self.default_lexer_options, |p| p.lexer_options()), |
||||||
|
) { |
||||||
|
match result { |
||||||
|
Ok(TokenWithPosition { token, position }) => { |
||||||
|
self.position = position; |
||||||
|
self.state = self.state.take().map(|state| { |
||||||
|
state.recognize_next(token, &mut self.results, &mut self.errors) |
||||||
|
}); |
||||||
|
continue; |
||||||
|
} |
||||||
|
Err(e) => { |
||||||
|
self.state = self.state.take().map(RR::error_recovery_state); |
||||||
|
return Some(Err(e.into())); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
if self.lexer.is_end() { |
||||||
|
if let Some(state) = self.state.take() { |
||||||
|
state.recognize_end(&mut self.results, &mut self.errors) |
||||||
|
} else { |
||||||
|
return None; |
||||||
|
} |
||||||
|
} else { |
||||||
|
return None; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
pub fn parse_from_read<R: Read>(self, read: R) -> FromReadIterator<R, RR> { |
||||||
|
FromReadIterator { read, parser: self } |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// An error from parsing.
|
||||||
|
///
|
||||||
|
/// It is composed of a message and a byte range in the input.
|
||||||
|
#[derive(Debug)] |
||||||
|
pub struct ParseError { |
||||||
|
position: Range<usize>, |
||||||
|
message: String, |
||||||
|
} |
||||||
|
|
||||||
|
impl ParseError { |
||||||
|
/// The invalid byte range in the input.
|
||||||
|
pub fn position(&self) -> Range<usize> { |
||||||
|
self.position.clone() |
||||||
|
} |
||||||
|
|
||||||
|
/// The error message.
|
||||||
|
pub fn message(&self) -> &str { |
||||||
|
&self.message |
||||||
|
} |
||||||
|
|
||||||
|
/// Converts this error to an error message.
|
||||||
|
pub fn into_message(self) -> String { |
||||||
|
self.message |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
impl fmt::Display for ParseError { |
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
||||||
|
if self.position.start + 1 == self.position.end { |
||||||
|
write!( |
||||||
|
f, |
||||||
|
"Parser error at byte {}: {}", |
||||||
|
self.position.start, self.message |
||||||
|
) |
||||||
|
} else { |
||||||
|
write!( |
||||||
|
f, |
||||||
|
"Parser error between bytes {} and {}: {}", |
||||||
|
self.position.start, self.position.end, self.message |
||||||
|
) |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
impl Error for ParseError {} |
||||||
|
|
||||||
|
impl From<ParseError> for io::Error { |
||||||
|
fn from(error: ParseError) -> Self { |
||||||
|
io::Error::new(io::ErrorKind::InvalidData, error) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
impl From<LexerError> for ParseError { |
||||||
|
fn from(e: LexerError) -> Self { |
||||||
|
Self { |
||||||
|
position: e.position(), |
||||||
|
message: e.into_message(), |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// The union of [`ParseError`] and [`std::io::Error`].
|
||||||
|
#[derive(Debug)] |
||||||
|
pub enum ParseOrIoError { |
||||||
|
Parse(ParseError), |
||||||
|
Io(io::Error), |
||||||
|
} |
||||||
|
|
||||||
|
impl fmt::Display for ParseOrIoError { |
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
||||||
|
match self { |
||||||
|
Self::Parse(e) => e.fmt(f), |
||||||
|
Self::Io(e) => e.fmt(f), |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
impl Error for ParseOrIoError { |
||||||
|
fn source(&self) -> Option<&(dyn Error + 'static)> { |
||||||
|
Some(match self { |
||||||
|
Self::Parse(e) => e, |
||||||
|
Self::Io(e) => e, |
||||||
|
}) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
impl From<ParseError> for ParseOrIoError { |
||||||
|
fn from(error: ParseError) -> Self { |
||||||
|
Self::Parse(error) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
impl From<io::Error> for ParseOrIoError { |
||||||
|
fn from(error: io::Error) -> Self { |
||||||
|
Self::Io(error) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
impl From<ParseOrIoError> for io::Error { |
||||||
|
fn from(error: ParseOrIoError) -> Self { |
||||||
|
match error { |
||||||
|
ParseOrIoError::Parse(e) => e.into(), |
||||||
|
ParseOrIoError::Io(e) => e, |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
pub struct FromReadIterator<R: Read, RR: RuleRecognizer> { |
||||||
|
read: R, |
||||||
|
parser: Parser<RR>, |
||||||
|
} |
||||||
|
|
||||||
|
impl<R: Read, RR: RuleRecognizer> Iterator for FromReadIterator<R, RR> { |
||||||
|
type Item = Result<RR::Output, ParseOrIoError>; |
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> { |
||||||
|
while !self.parser.is_end() { |
||||||
|
if let Some(result) = self.parser.read_next() { |
||||||
|
return Some(result.map_err(ParseOrIoError::Parse)); |
||||||
|
} |
||||||
|
if let Err(e) = self.parser.lexer.extend_from_read(&mut self.read) { |
||||||
|
return Some(Err(e.into())); |
||||||
|
} |
||||||
|
} |
||||||
|
None |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,666 @@ |
|||||||
|
//! A [TriG](https://www.w3.org/TR/trig/) streaming parser implemented by [`TriGParser`].
|
||||||
|
|
||||||
|
use crate::terse::TriGRecognizer; |
||||||
|
use crate::toolkit::{FromReadIterator, ParseError, ParseOrIoError, Parser}; |
||||||
|
use oxiri::{Iri, IriParseError}; |
||||||
|
use oxrdf::{vocab::xsd, GraphName, NamedNode, Quad, QuadRef, Subject, TermRef}; |
||||||
|
use std::collections::HashMap; |
||||||
|
use std::fmt; |
||||||
|
use std::io::{self, Read, Write}; |
||||||
|
|
||||||
|
/// A [TriG](https://www.w3.org/TR/trig/) streaming parser.
|
||||||
|
///
|
||||||
|
/// Support for [TriG-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#trig-star) is available behind the `rdf-star` feature and the [`TriGParser::with_quoted_triples`] option.
|
||||||
|
///
|
||||||
|
/// Count the number of people:
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::NamedNodeRef;
|
||||||
|
/// use oxttl::{TriGParser, ParseError};
|
||||||
|
///
|
||||||
|
/// let file = b"@base <http://example.com/> .
|
||||||
|
/// @prefix schema: <http://schema.org/> .
|
||||||
|
/// <foo> a schema:Person ;
|
||||||
|
/// schema:name \"Foo\" .
|
||||||
|
/// <bar> a schema:Person ;
|
||||||
|
/// schema:name \"Bar\" .";
|
||||||
|
///
|
||||||
|
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
|
||||||
|
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||||
|
/// let mut count = 0;
|
||||||
|
/// for quad in TriGParser::new().parse_from_read(file.as_ref()) {
|
||||||
|
/// let quad = quad?;
|
||||||
|
/// if quad.predicate == rdf_type && quad.object == schema_person.into() {
|
||||||
|
/// count += 1;
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// assert_eq!(2, count);
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
#[derive(Default)] |
||||||
|
pub struct TriGParser { |
||||||
|
base: Option<Iri<String>>, |
||||||
|
prefixes: HashMap<String, Iri<String>>, |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
with_quoted_triples: bool, |
||||||
|
} |
||||||
|
|
||||||
|
impl TriGParser { |
||||||
|
/// Builds a new [`TriGParser`].
|
||||||
|
#[inline] |
||||||
|
pub fn new() -> Self { |
||||||
|
Self::default() |
||||||
|
} |
||||||
|
|
||||||
|
#[inline] |
||||||
|
pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> { |
||||||
|
self.base = Some(Iri::parse(base_iri.into())?); |
||||||
|
Ok(self) |
||||||
|
} |
||||||
|
|
||||||
|
#[inline] |
||||||
|
pub fn with_prefix( |
||||||
|
mut self, |
||||||
|
prefix_name: impl Into<String>, |
||||||
|
prefix_iri: impl Into<String>, |
||||||
|
) -> Result<Self, IriParseError> { |
||||||
|
self.prefixes |
||||||
|
.insert(prefix_name.into(), Iri::parse(prefix_iri.into())?); |
||||||
|
Ok(self) |
||||||
|
} |
||||||
|
|
||||||
|
/// Enables [TriG-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#trig-star).
|
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
#[inline] |
||||||
|
#[must_use] |
||||||
|
pub fn with_quoted_triples(mut self) -> Self { |
||||||
|
self.with_quoted_triples = true; |
||||||
|
self |
||||||
|
} |
||||||
|
|
||||||
|
/// Parses a TriG file from a [`Read`] implementation.
|
||||||
|
///
|
||||||
|
/// Count the number of people:
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::NamedNodeRef;
|
||||||
|
/// use oxttl::{TriGParser, ParseError};
|
||||||
|
///
|
||||||
|
/// let file = b"@base <http://example.com/> .
|
||||||
|
/// @prefix schema: <http://schema.org/> .
|
||||||
|
/// <foo> a schema:Person ;
|
||||||
|
/// schema:name \"Foo\" .
|
||||||
|
/// <bar> a schema:Person ;
|
||||||
|
/// schema:name \"Bar\" .";
|
||||||
|
///
|
||||||
|
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
|
||||||
|
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||||
|
/// let mut count = 0;
|
||||||
|
/// for quad in TriGParser::new().parse_from_read(file.as_ref()) {
|
||||||
|
/// let quad = quad?;
|
||||||
|
/// if quad.predicate == rdf_type && quad.object == schema_person.into() {
|
||||||
|
/// count += 1;
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// assert_eq!(2, count);
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
pub fn parse_from_read<R: Read>(&self, read: R) -> FromReadTriGReader<R> { |
||||||
|
FromReadTriGReader { |
||||||
|
inner: self.parse().parser.parse_from_read(read), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// Allows to parse a TriG file by using a low-level API.
|
||||||
|
///
|
||||||
|
/// Count the number of people:
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::NamedNodeRef;
|
||||||
|
/// use oxttl::{TriGParser, ParseError};
|
||||||
|
///
|
||||||
|
/// let file: [&[u8]; 5] = [b"@base <http://example.com/>",
|
||||||
|
/// b". @prefix schema: <http://schema.org/> .",
|
||||||
|
/// b"<foo> a schema:Person",
|
||||||
|
/// b" ; schema:name \"Foo\" . <bar>",
|
||||||
|
/// b" a schema:Person ; schema:name \"Bar\" ."
|
||||||
|
/// ];
|
||||||
|
///
|
||||||
|
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
|
||||||
|
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||||
|
/// let mut count = 0;
|
||||||
|
/// let mut parser = TriGParser::new().parse();
|
||||||
|
/// let mut file_chunks = file.iter();
|
||||||
|
/// while !parser.is_end() {
|
||||||
|
/// // We feed more data to the parser
|
||||||
|
/// if let Some(chunk) = file_chunks.next() {
|
||||||
|
/// parser.extend_from_slice(chunk);
|
||||||
|
/// } else {
|
||||||
|
/// parser.end(); // It's finished
|
||||||
|
/// }
|
||||||
|
/// // We read as many quads from the parser as possible
|
||||||
|
/// while let Some(quad) = parser.read_next() {
|
||||||
|
/// let quad = quad?;
|
||||||
|
/// if quad.predicate == rdf_type && quad.object == schema_person.into() {
|
||||||
|
/// count += 1;
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// assert_eq!(2, count);
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
pub fn parse(&self) -> LowLevelTriGReader { |
||||||
|
LowLevelTriGReader { |
||||||
|
parser: TriGRecognizer::new_parser( |
||||||
|
true, |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
self.with_quoted_triples, |
||||||
|
self.base.clone(), |
||||||
|
self.prefixes.clone(), |
||||||
|
), |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// Parses a TriG file from a [`Read`] implementation. Can be built using [`TriGParser::parse_from_read`].
|
||||||
|
///
|
||||||
|
/// Count the number of people:
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::NamedNodeRef;
|
||||||
|
/// use oxttl::{TriGParser, ParseError};
|
||||||
|
///
|
||||||
|
/// let file = b"@base <http://example.com/> .
|
||||||
|
/// @prefix schema: <http://schema.org/> .
|
||||||
|
/// <foo> a schema:Person ;
|
||||||
|
/// schema:name \"Foo\" .
|
||||||
|
/// <bar> a schema:Person ;
|
||||||
|
/// schema:name \"Bar\" .";
|
||||||
|
///
|
||||||
|
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
|
||||||
|
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||||
|
/// let mut count = 0;
|
||||||
|
/// for quad in TriGParser::new().parse_from_read(file.as_ref()) {
|
||||||
|
/// let quad = quad?;
|
||||||
|
/// if quad.predicate == rdf_type && quad.object == schema_person.into() {
|
||||||
|
/// count += 1;
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// assert_eq!(2, count);
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
pub struct FromReadTriGReader<R: Read> { |
||||||
|
inner: FromReadIterator<R, TriGRecognizer>, |
||||||
|
} |
||||||
|
|
||||||
|
impl<R: Read> Iterator for FromReadTriGReader<R> { |
||||||
|
type Item = Result<Quad, ParseOrIoError>; |
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Result<Quad, ParseOrIoError>> { |
||||||
|
self.inner.next() |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// Parses a TriG file by using a low-level API. Can be built using [`TriGParser::parse`].
|
||||||
|
///
|
||||||
|
/// Count the number of people:
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::NamedNodeRef;
|
||||||
|
/// use oxttl::{TriGParser, ParseError};
|
||||||
|
///
|
||||||
|
/// let file: [&[u8]; 5] = [b"@base <http://example.com/>",
|
||||||
|
/// b". @prefix schema: <http://schema.org/> .",
|
||||||
|
/// b"<foo> a schema:Person",
|
||||||
|
/// b" ; schema:name \"Foo\" . <bar>",
|
||||||
|
/// b" a schema:Person ; schema:name \"Bar\" ."
|
||||||
|
/// ];
|
||||||
|
///
|
||||||
|
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
|
||||||
|
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||||
|
/// let mut count = 0;
|
||||||
|
/// let mut parser = TriGParser::new().parse();
|
||||||
|
/// let mut file_chunks = file.iter();
|
||||||
|
/// while !parser.is_end() {
|
||||||
|
/// // We feed more data to the parser
|
||||||
|
/// if let Some(chunk) = file_chunks.next() {
|
||||||
|
/// parser.extend_from_slice(chunk);
|
||||||
|
/// } else {
|
||||||
|
/// parser.end(); // It's finished
|
||||||
|
/// }
|
||||||
|
/// // We read as many quads from the parser as possible
|
||||||
|
/// while let Some(quad) = parser.read_next() {
|
||||||
|
/// let quad = quad?;
|
||||||
|
/// if quad.predicate == rdf_type && quad.object == schema_person.into() {
|
||||||
|
/// count += 1;
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// assert_eq!(2, count);
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
pub struct LowLevelTriGReader { |
||||||
|
parser: Parser<TriGRecognizer>, |
||||||
|
} |
||||||
|
|
||||||
|
impl LowLevelTriGReader { |
||||||
|
/// Adds some extra bytes to the parser. Should be called when [`read_next`](Self::read_next) returns [`None`] and there is still unread data.
|
||||||
|
pub fn extend_from_slice(&mut self, other: &[u8]) { |
||||||
|
self.parser.extend_from_slice(other) |
||||||
|
} |
||||||
|
|
||||||
|
/// Tell the parser that the file is finished.
|
||||||
|
///
|
||||||
|
/// This triggers the parsing of the final bytes and might lead [`read_next`](Self::read_next) to return some extra values.
|
||||||
|
pub fn end(&mut self) { |
||||||
|
self.parser.end() |
||||||
|
} |
||||||
|
|
||||||
|
/// Returns if the parsing is finished i.e. [`end`](Self::end) has been called and [`read_next`](Self::read_next) is always going to return `None`.
|
||||||
|
pub fn is_end(&self) -> bool { |
||||||
|
self.parser.is_end() |
||||||
|
} |
||||||
|
|
||||||
|
/// Attempt to parse a new quad from the already provided data.
|
||||||
|
///
|
||||||
|
/// Returns [`None`] if the parsing is finished or more data is required.
|
||||||
|
/// If it is the case more data should be fed using [`extend_from_slice`](Self::extend_from_slice).
|
||||||
|
pub fn read_next(&mut self) -> Option<Result<Quad, ParseError>> { |
||||||
|
self.parser.read_next() |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// A [TriG](https://www.w3.org/TR/trig/) serializer.
|
||||||
|
///
|
||||||
|
/// Support for [TriG-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#trig-star) is available behind the `rdf-star` feature.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::{NamedNodeRef, QuadRef};
|
||||||
|
/// use oxttl::TriGSerializer;
|
||||||
|
///
|
||||||
|
/// let mut buf = Vec::new();
|
||||||
|
/// let mut writer = TriGSerializer::new().serialize_to_write(buf);
|
||||||
|
/// writer.write_quad(QuadRef::new(
|
||||||
|
/// NamedNodeRef::new("http://example.com#me")?,
|
||||||
|
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||||
|
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||||
|
/// NamedNodeRef::new("http://example.com")?,
|
||||||
|
/// ))?;
|
||||||
|
/// assert_eq!(
|
||||||
|
/// b"<http://example.com> {\n\t<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n}\n",
|
||||||
|
/// writer.finish()?.as_slice()
|
||||||
|
/// );
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
#[derive(Default)] |
||||||
|
pub struct TriGSerializer; |
||||||
|
|
||||||
|
impl TriGSerializer { |
||||||
|
/// Builds a new [`TriGSerializer`].
|
||||||
|
#[inline] |
||||||
|
pub fn new() -> Self { |
||||||
|
Self |
||||||
|
} |
||||||
|
|
||||||
|
/// Writes a TriG file to a [`Write`] implementation.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::{NamedNodeRef, QuadRef};
|
||||||
|
/// use oxttl::TriGSerializer;
|
||||||
|
///
|
||||||
|
/// let mut buf = Vec::new();
|
||||||
|
/// let mut writer = TriGSerializer::new().serialize_to_write(buf);
|
||||||
|
/// writer.write_quad(QuadRef::new(
|
||||||
|
/// NamedNodeRef::new("http://example.com#me")?,
|
||||||
|
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||||
|
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||||
|
/// NamedNodeRef::new("http://example.com")?,
|
||||||
|
/// ))?;
|
||||||
|
/// assert_eq!(
|
||||||
|
/// b"<http://example.com> {\n\t<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n}\n",
|
||||||
|
/// writer.finish()?.as_slice()
|
||||||
|
/// );
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
pub fn serialize_to_write<W: Write>(&self, write: W) -> ToWriteTriGWriter<W> { |
||||||
|
ToWriteTriGWriter { |
||||||
|
write, |
||||||
|
writer: self.serialize(), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// Builds a low-level TriG writer.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::{NamedNodeRef, QuadRef};
|
||||||
|
/// use oxttl::TriGSerializer;
|
||||||
|
///
|
||||||
|
/// let mut buf = Vec::new();
|
||||||
|
/// let mut writer = TriGSerializer::new().serialize();
|
||||||
|
/// writer.write_quad(QuadRef::new(
|
||||||
|
/// NamedNodeRef::new("http://example.com#me")?,
|
||||||
|
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||||
|
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||||
|
/// NamedNodeRef::new("http://example.com")?,
|
||||||
|
/// ), &mut buf)?;
|
||||||
|
/// writer.finish(&mut buf)?;
|
||||||
|
/// assert_eq!(
|
||||||
|
/// b"<http://example.com> {\n\t<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n}\n",
|
||||||
|
/// buf.as_slice()
|
||||||
|
/// );
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
#[allow(clippy::unused_self)] |
||||||
|
pub fn serialize(&self) -> LowLevelTriGWriter { |
||||||
|
LowLevelTriGWriter { |
||||||
|
current_graph_name: GraphName::DefaultGraph, |
||||||
|
current_subject_predicate: None, |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// Writes a TriG file to a [`Write`] implementation. Can be built using [`TriGSerializer::serialize_to_write`].
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::{NamedNodeRef, QuadRef};
|
||||||
|
/// use oxttl::TriGSerializer;
|
||||||
|
///
|
||||||
|
/// let mut buf = Vec::new();
|
||||||
|
/// let mut writer = TriGSerializer::new().serialize_to_write(buf);
|
||||||
|
/// writer.write_quad(QuadRef::new(
|
||||||
|
/// NamedNodeRef::new("http://example.com#me")?,
|
||||||
|
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||||
|
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||||
|
/// NamedNodeRef::new("http://example.com")?,
|
||||||
|
/// ))?;
|
||||||
|
/// assert_eq!(
|
||||||
|
/// b"<http://example.com> {\n\t<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n}\n",
|
||||||
|
/// writer.finish()?.as_slice()
|
||||||
|
/// );
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
pub struct ToWriteTriGWriter<W: Write> { |
||||||
|
write: W, |
||||||
|
writer: LowLevelTriGWriter, |
||||||
|
} |
||||||
|
|
||||||
|
impl<W: Write> ToWriteTriGWriter<W> { |
||||||
|
/// Writes an extra quad.
|
||||||
|
pub fn write_quad<'a>(&mut self, q: impl Into<QuadRef<'a>>) -> io::Result<()> { |
||||||
|
self.writer.write_quad(q, &mut self.write) |
||||||
|
} |
||||||
|
|
||||||
|
/// Ends the write process and returns the underlying [`Write`].
|
||||||
|
pub fn finish(mut self) -> io::Result<W> { |
||||||
|
self.writer.finish(&mut self.write)?; |
||||||
|
Ok(self.write) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// Writes a TriG file by using a low-level API. Can be built using [`TriGSerializer::serialize`].
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::{NamedNodeRef, QuadRef};
|
||||||
|
/// use oxttl::TriGSerializer;
|
||||||
|
///
|
||||||
|
/// let mut buf = Vec::new();
|
||||||
|
/// let mut writer = TriGSerializer::new().serialize();
|
||||||
|
/// writer.write_quad(QuadRef::new(
|
||||||
|
/// NamedNodeRef::new("http://example.com#me")?,
|
||||||
|
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||||
|
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||||
|
/// NamedNodeRef::new("http://example.com")?,
|
||||||
|
/// ), &mut buf)?;
|
||||||
|
/// writer.finish(&mut buf)?;
|
||||||
|
/// assert_eq!(
|
||||||
|
/// b"<http://example.com> {\n\t<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n}\n",
|
||||||
|
/// buf.as_slice()
|
||||||
|
/// );
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
pub struct LowLevelTriGWriter { |
||||||
|
current_graph_name: GraphName, |
||||||
|
current_subject_predicate: Option<(Subject, NamedNode)>, |
||||||
|
} |
||||||
|
|
||||||
|
impl LowLevelTriGWriter { |
||||||
|
/// Writes an extra quad.
|
||||||
|
pub fn write_quad<'a>( |
||||||
|
&mut self, |
||||||
|
q: impl Into<QuadRef<'a>>, |
||||||
|
mut write: impl Write, |
||||||
|
) -> io::Result<()> { |
||||||
|
let q = q.into(); |
||||||
|
if q.graph_name == self.current_graph_name.as_ref() { |
||||||
|
if let Some((current_subject, current_predicate)) = |
||||||
|
self.current_subject_predicate.take() |
||||||
|
{ |
||||||
|
if q.subject == current_subject.as_ref() { |
||||||
|
if q.predicate == current_predicate { |
||||||
|
self.current_subject_predicate = Some((current_subject, current_predicate)); |
||||||
|
write!(write, " , {}", TurtleTerm(q.object)) |
||||||
|
} else { |
||||||
|
self.current_subject_predicate = |
||||||
|
Some((current_subject, q.predicate.into_owned())); |
||||||
|
writeln!(write, " ;")?; |
||||||
|
if !self.current_graph_name.is_default_graph() { |
||||||
|
write!(write, "\t")?; |
||||||
|
} |
||||||
|
write!(write, "\t{} {}", q.predicate, TurtleTerm(q.object)) |
||||||
|
} |
||||||
|
} else { |
||||||
|
self.current_subject_predicate = |
||||||
|
Some((q.subject.into_owned(), q.predicate.into_owned())); |
||||||
|
writeln!(write, " .")?; |
||||||
|
if !self.current_graph_name.is_default_graph() { |
||||||
|
write!(write, "\t")?; |
||||||
|
} |
||||||
|
write!( |
||||||
|
write, |
||||||
|
"{} {} {}", |
||||||
|
TurtleTerm(q.subject.into()), |
||||||
|
q.predicate, |
||||||
|
TurtleTerm(q.object) |
||||||
|
) |
||||||
|
} |
||||||
|
} else { |
||||||
|
self.current_subject_predicate = |
||||||
|
Some((q.subject.into_owned(), q.predicate.into_owned())); |
||||||
|
if !self.current_graph_name.is_default_graph() { |
||||||
|
write!(write, "\t")?; |
||||||
|
} |
||||||
|
write!( |
||||||
|
write, |
||||||
|
"{} {} {}", |
||||||
|
TurtleTerm(q.subject.into()), |
||||||
|
q.predicate, |
||||||
|
TurtleTerm(q.object) |
||||||
|
) |
||||||
|
} |
||||||
|
} else { |
||||||
|
if self.current_subject_predicate.is_some() { |
||||||
|
writeln!(write, " .")?; |
||||||
|
} |
||||||
|
if !self.current_graph_name.is_default_graph() { |
||||||
|
writeln!(write, "}}")?; |
||||||
|
} |
||||||
|
self.current_graph_name = q.graph_name.into_owned(); |
||||||
|
self.current_subject_predicate = |
||||||
|
Some((q.subject.into_owned(), q.predicate.into_owned())); |
||||||
|
if !self.current_graph_name.is_default_graph() { |
||||||
|
writeln!(write, "{} {{", q.graph_name)?; |
||||||
|
write!(write, "\t")?; |
||||||
|
} |
||||||
|
write!( |
||||||
|
write, |
||||||
|
"{} {} {}", |
||||||
|
TurtleTerm(q.subject.into()), |
||||||
|
q.predicate, |
||||||
|
TurtleTerm(q.object) |
||||||
|
) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// Finishes to write the file.
|
||||||
|
pub fn finish(&mut self, mut write: impl Write) -> io::Result<()> { |
||||||
|
if self.current_subject_predicate.is_some() { |
||||||
|
writeln!(write, " .")?; |
||||||
|
} |
||||||
|
if !self.current_graph_name.is_default_graph() { |
||||||
|
writeln!(write, "}}")?; |
||||||
|
} |
||||||
|
Ok(()) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
struct TurtleTerm<'a>(TermRef<'a>); |
||||||
|
|
||||||
|
impl<'a> fmt::Display for TurtleTerm<'a> { |
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
||||||
|
match self.0 { |
||||||
|
TermRef::NamedNode(v) => write!(f, "{v}"), |
||||||
|
TermRef::BlankNode(v) => write!(f, "{v}"), |
||||||
|
TermRef::Literal(v) => { |
||||||
|
let value = v.value(); |
||||||
|
let inline = match v.datatype() { |
||||||
|
xsd::BOOLEAN => is_turtle_boolean(value), |
||||||
|
xsd::INTEGER => is_turtle_integer(value), |
||||||
|
xsd::DECIMAL => is_turtle_decimal(value), |
||||||
|
xsd::DOUBLE => is_turtle_double(value), |
||||||
|
_ => false, |
||||||
|
}; |
||||||
|
if inline { |
||||||
|
write!(f, "{value}") |
||||||
|
} else { |
||||||
|
write!(f, "{v}") |
||||||
|
} |
||||||
|
} |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
TermRef::Triple(t) => { |
||||||
|
write!( |
||||||
|
f, |
||||||
|
"<< {} {} {} >>", |
||||||
|
TurtleTerm(t.subject.as_ref().into()), |
||||||
|
t.predicate, |
||||||
|
TurtleTerm(t.object.as_ref()) |
||||||
|
) |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
fn is_turtle_boolean(value: &str) -> bool { |
||||||
|
matches!(value, "true" | "false") |
||||||
|
} |
||||||
|
|
||||||
|
fn is_turtle_integer(value: &str) -> bool { |
||||||
|
// [19] INTEGER ::= [+-]? [0-9]+
|
||||||
|
let mut value = value.as_bytes(); |
||||||
|
if let Some(v) = value.strip_prefix(b"+") { |
||||||
|
value = v; |
||||||
|
} else if let Some(v) = value.strip_prefix(b"-") { |
||||||
|
value = v; |
||||||
|
} |
||||||
|
!value.is_empty() && value.iter().all(u8::is_ascii_digit) |
||||||
|
} |
||||||
|
|
||||||
|
fn is_turtle_decimal(value: &str) -> bool { |
||||||
|
// [20] DECIMAL ::= [+-]? [0-9]* '.' [0-9]+
|
||||||
|
let mut value = value.as_bytes(); |
||||||
|
if let Some(v) = value.strip_prefix(b"+") { |
||||||
|
value = v; |
||||||
|
} else if let Some(v) = value.strip_prefix(b"-") { |
||||||
|
value = v; |
||||||
|
} |
||||||
|
while value.first().map_or(false, u8::is_ascii_digit) { |
||||||
|
value = &value[1..]; |
||||||
|
} |
||||||
|
if let Some(v) = value.strip_prefix(b".") { |
||||||
|
value = v; |
||||||
|
} else { |
||||||
|
return false; |
||||||
|
} |
||||||
|
!value.is_empty() && value.iter().all(u8::is_ascii_digit) |
||||||
|
} |
||||||
|
|
||||||
|
fn is_turtle_double(value: &str) -> bool { |
||||||
|
// [21] DOUBLE ::= [+-]? ([0-9]+ '.' [0-9]* EXPONENT | '.' [0-9]+ EXPONENT | [0-9]+ EXPONENT)
|
||||||
|
// [154s] EXPONENT ::= [eE] [+-]? [0-9]+
|
||||||
|
let mut value = value.as_bytes(); |
||||||
|
if let Some(v) = value.strip_prefix(b"+") { |
||||||
|
value = v; |
||||||
|
} else if let Some(v) = value.strip_prefix(b"-") { |
||||||
|
value = v; |
||||||
|
} |
||||||
|
let mut with_before = false; |
||||||
|
while value.first().map_or(false, u8::is_ascii_digit) { |
||||||
|
value = &value[1..]; |
||||||
|
with_before = true; |
||||||
|
} |
||||||
|
let mut with_after = false; |
||||||
|
if let Some(v) = value.strip_prefix(b".") { |
||||||
|
value = v; |
||||||
|
while value.first().map_or(false, u8::is_ascii_digit) { |
||||||
|
value = &value[1..]; |
||||||
|
with_after = true; |
||||||
|
} |
||||||
|
} |
||||||
|
if let Some(v) = value.strip_prefix(b"e") { |
||||||
|
value = v; |
||||||
|
} else if let Some(v) = value.strip_prefix(b"E") { |
||||||
|
value = v; |
||||||
|
} else { |
||||||
|
return false; |
||||||
|
} |
||||||
|
if let Some(v) = value.strip_prefix(b"+") { |
||||||
|
value = v; |
||||||
|
} else if let Some(v) = value.strip_prefix(b"-") { |
||||||
|
value = v; |
||||||
|
} |
||||||
|
(with_before || with_after) && !value.is_empty() && value.iter().all(u8::is_ascii_digit) |
||||||
|
} |
||||||
|
|
||||||
|
#[cfg(test)] |
||||||
|
mod tests { |
||||||
|
use super::*; |
||||||
|
use oxrdf::vocab::xsd; |
||||||
|
use oxrdf::{BlankNodeRef, GraphNameRef, LiteralRef, NamedNodeRef}; |
||||||
|
|
||||||
|
#[test] |
||||||
|
fn test_write() -> io::Result<()> { |
||||||
|
let mut writer = TriGSerializer::new().serialize_to_write(Vec::new()); |
||||||
|
writer.write_quad(QuadRef::new( |
||||||
|
NamedNodeRef::new_unchecked("http://example.com/s"), |
||||||
|
NamedNodeRef::new_unchecked("http://example.com/p"), |
||||||
|
NamedNodeRef::new_unchecked("http://example.com/o"), |
||||||
|
NamedNodeRef::new_unchecked("http://example.com/g"), |
||||||
|
))?; |
||||||
|
writer.write_quad(QuadRef::new( |
||||||
|
NamedNodeRef::new_unchecked("http://example.com/s"), |
||||||
|
NamedNodeRef::new_unchecked("http://example.com/p"), |
||||||
|
LiteralRef::new_simple_literal("foo"), |
||||||
|
NamedNodeRef::new_unchecked("http://example.com/g"), |
||||||
|
))?; |
||||||
|
writer.write_quad(QuadRef::new( |
||||||
|
NamedNodeRef::new_unchecked("http://example.com/s"), |
||||||
|
NamedNodeRef::new_unchecked("http://example.com/p2"), |
||||||
|
LiteralRef::new_language_tagged_literal_unchecked("foo", "en"), |
||||||
|
NamedNodeRef::new_unchecked("http://example.com/g"), |
||||||
|
))?; |
||||||
|
writer.write_quad(QuadRef::new( |
||||||
|
BlankNodeRef::new_unchecked("b"), |
||||||
|
NamedNodeRef::new_unchecked("http://example.com/p2"), |
||||||
|
BlankNodeRef::new_unchecked("b2"), |
||||||
|
NamedNodeRef::new_unchecked("http://example.com/g"), |
||||||
|
))?; |
||||||
|
writer.write_quad(QuadRef::new( |
||||||
|
BlankNodeRef::new_unchecked("b"), |
||||||
|
NamedNodeRef::new_unchecked("http://example.com/p2"), |
||||||
|
LiteralRef::new_typed_literal("true", xsd::BOOLEAN), |
||||||
|
GraphNameRef::DefaultGraph, |
||||||
|
))?; |
||||||
|
writer.write_quad(QuadRef::new( |
||||||
|
BlankNodeRef::new_unchecked("b"), |
||||||
|
NamedNodeRef::new_unchecked("http://example.com/p2"), |
||||||
|
LiteralRef::new_typed_literal("false", xsd::BOOLEAN), |
||||||
|
NamedNodeRef::new_unchecked("http://example.com/g2"), |
||||||
|
))?; |
||||||
|
assert_eq!(String::from_utf8(writer.finish()?).unwrap(), "<http://example.com/g> {\n\t<http://example.com/s> <http://example.com/p> <http://example.com/o> , \"foo\" ;\n\t\t<http://example.com/p2> \"foo\"@en .\n\t_:b <http://example.com/p2> _:b2 .\n}\n_:b <http://example.com/p2> true .\n<http://example.com/g2> {\n\t_:b <http://example.com/p2> false .\n}\n"); |
||||||
|
Ok(()) |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,462 @@ |
|||||||
|
//! A [Turtle](https://www.w3.org/TR/turtle/) streaming parser implemented by [`TurtleParser`].
|
||||||
|
|
||||||
|
use crate::terse::TriGRecognizer; |
||||||
|
use crate::toolkit::{FromReadIterator, ParseError, ParseOrIoError, Parser}; |
||||||
|
use crate::trig::{LowLevelTriGWriter, ToWriteTriGWriter}; |
||||||
|
use crate::TriGSerializer; |
||||||
|
use oxiri::{Iri, IriParseError}; |
||||||
|
use oxrdf::{GraphNameRef, Triple, TripleRef}; |
||||||
|
use std::collections::HashMap; |
||||||
|
use std::io::{self, Read, Write}; |
||||||
|
|
||||||
|
/// A [Turtle](https://www.w3.org/TR/turtle/) streaming parser.
|
||||||
|
///
|
||||||
|
/// Support for [Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star) is available behind the `rdf-star` feature and the [`TurtleParser::with_quoted_triples`] option.
|
||||||
|
///
|
||||||
|
/// Count the number of people:
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::NamedNodeRef;
|
||||||
|
/// use oxttl::{TurtleParser, ParseError};
|
||||||
|
///
|
||||||
|
/// let file = b"@base <http://example.com/> .
|
||||||
|
/// @prefix schema: <http://schema.org/> .
|
||||||
|
/// <foo> a schema:Person ;
|
||||||
|
/// schema:name \"Foo\" .
|
||||||
|
/// <bar> a schema:Person ;
|
||||||
|
/// schema:name \"Bar\" .";
|
||||||
|
///
|
||||||
|
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
|
||||||
|
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||||
|
/// let mut count = 0;
|
||||||
|
/// for triple in TurtleParser::new().parse_from_read(file.as_ref()) {
|
||||||
|
/// let triple = triple?;
|
||||||
|
/// if triple.predicate == rdf_type && triple.object == schema_person.into() {
|
||||||
|
/// count += 1;
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// assert_eq!(2, count);
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
#[derive(Default)] |
||||||
|
pub struct TurtleParser { |
||||||
|
base: Option<Iri<String>>, |
||||||
|
prefixes: HashMap<String, Iri<String>>, |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
with_quoted_triples: bool, |
||||||
|
} |
||||||
|
|
||||||
|
impl TurtleParser { |
||||||
|
/// Builds a new [`TurtleParser`].
|
||||||
|
#[inline] |
||||||
|
pub fn new() -> Self { |
||||||
|
Self::default() |
||||||
|
} |
||||||
|
|
||||||
|
#[inline] |
||||||
|
pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> { |
||||||
|
self.base = Some(Iri::parse(base_iri.into())?); |
||||||
|
Ok(self) |
||||||
|
} |
||||||
|
|
||||||
|
#[inline] |
||||||
|
pub fn with_prefix( |
||||||
|
mut self, |
||||||
|
prefix_name: impl Into<String>, |
||||||
|
prefix_iri: impl Into<String>, |
||||||
|
) -> Result<Self, IriParseError> { |
||||||
|
self.prefixes |
||||||
|
.insert(prefix_name.into(), Iri::parse(prefix_iri.into())?); |
||||||
|
Ok(self) |
||||||
|
} |
||||||
|
|
||||||
|
/// Enables [Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star).
|
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
#[inline] |
||||||
|
#[must_use] |
||||||
|
pub fn with_quoted_triples(mut self) -> Self { |
||||||
|
self.with_quoted_triples = true; |
||||||
|
self |
||||||
|
} |
||||||
|
|
||||||
|
/// Parses a Turtle file from a [`Read`] implementation.
|
||||||
|
///
|
||||||
|
/// Count the number of people:
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::NamedNodeRef;
|
||||||
|
/// use oxttl::{TurtleParser, ParseError};
|
||||||
|
///
|
||||||
|
/// let file = b"@base <http://example.com/> .
|
||||||
|
/// @prefix schema: <http://schema.org/> .
|
||||||
|
/// <foo> a schema:Person ;
|
||||||
|
/// schema:name \"Foo\" .
|
||||||
|
/// <bar> a schema:Person ;
|
||||||
|
/// schema:name \"Bar\" .";
|
||||||
|
///
|
||||||
|
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
|
||||||
|
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||||
|
/// let mut count = 0;
|
||||||
|
/// for triple in TurtleParser::new().parse_from_read(file.as_ref()) {
|
||||||
|
/// let triple = triple?;
|
||||||
|
/// if triple.predicate == rdf_type && triple.object == schema_person.into() {
|
||||||
|
/// count += 1;
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// assert_eq!(2, count);
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
pub fn parse_from_read<R: Read>(&self, read: R) -> FromReadTurtleReader<R> { |
||||||
|
FromReadTurtleReader { |
||||||
|
inner: self.parse().parser.parse_from_read(read), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// Allows to parse a Turtle file by using a low-level API.
|
||||||
|
///
|
||||||
|
/// Count the number of people:
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::NamedNodeRef;
|
||||||
|
/// use oxttl::{TurtleParser, ParseError};
|
||||||
|
///
|
||||||
|
/// let file: [&[u8]; 5] = [b"@base <http://example.com/>",
|
||||||
|
/// b". @prefix schema: <http://schema.org/> .",
|
||||||
|
/// b"<foo> a schema:Person",
|
||||||
|
/// b" ; schema:name \"Foo\" . <bar>",
|
||||||
|
/// b" a schema:Person ; schema:name \"Bar\" ."
|
||||||
|
/// ];
|
||||||
|
///
|
||||||
|
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
|
||||||
|
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||||
|
/// let mut count = 0;
|
||||||
|
/// let mut parser = TurtleParser::new().parse();
|
||||||
|
/// let mut file_chunks = file.iter();
|
||||||
|
/// while !parser.is_end() {
|
||||||
|
/// // We feed more data to the parser
|
||||||
|
/// if let Some(chunk) = file_chunks.next() {
|
||||||
|
/// parser.extend_from_slice(chunk);
|
||||||
|
/// } else {
|
||||||
|
/// parser.end(); // It's finished
|
||||||
|
/// }
|
||||||
|
/// // We read as many triples from the parser as possible
|
||||||
|
/// while let Some(triple) = parser.read_next() {
|
||||||
|
/// let triple = triple?;
|
||||||
|
/// if triple.predicate == rdf_type && triple.object == schema_person.into() {
|
||||||
|
/// count += 1;
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// assert_eq!(2, count);
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
pub fn parse(&self) -> LowLevelTurtleReader { |
||||||
|
LowLevelTurtleReader { |
||||||
|
parser: TriGRecognizer::new_parser( |
||||||
|
false, |
||||||
|
#[cfg(feature = "rdf-star")] |
||||||
|
self.with_quoted_triples, |
||||||
|
self.base.clone(), |
||||||
|
self.prefixes.clone(), |
||||||
|
), |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// Parses a Turtle file from a [`Read`] implementation. Can be built using [`TurtleParser::parse_from_read`].
|
||||||
|
///
|
||||||
|
/// Count the number of people:
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::NamedNodeRef;
|
||||||
|
/// use oxttl::{TurtleParser, ParseError};
|
||||||
|
///
|
||||||
|
/// let file = b"@base <http://example.com/> .
|
||||||
|
/// @prefix schema: <http://schema.org/> .
|
||||||
|
/// <foo> a schema:Person ;
|
||||||
|
/// schema:name \"Foo\" .
|
||||||
|
/// <bar> a schema:Person ;
|
||||||
|
/// schema:name \"Bar\" .";
|
||||||
|
///
|
||||||
|
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
|
||||||
|
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||||
|
/// let mut count = 0;
|
||||||
|
/// for triple in TurtleParser::new().parse_from_read(file.as_ref()) {
|
||||||
|
/// let triple = triple?;
|
||||||
|
/// if triple.predicate == rdf_type && triple.object == schema_person.into() {
|
||||||
|
/// count += 1;
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// assert_eq!(2, count);
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
pub struct FromReadTurtleReader<R: Read> { |
||||||
|
inner: FromReadIterator<R, TriGRecognizer>, |
||||||
|
} |
||||||
|
|
||||||
|
impl<R: Read> Iterator for FromReadTurtleReader<R> { |
||||||
|
type Item = Result<Triple, ParseOrIoError>; |
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Result<Triple, ParseOrIoError>> { |
||||||
|
Some(self.inner.next()?.map(Into::into)) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// Parses a Turtle file by using a low-level API. Can be built using [`TurtleParser::parse`].
|
||||||
|
///
|
||||||
|
/// Count the number of people:
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::NamedNodeRef;
|
||||||
|
/// use oxttl::{TurtleParser, ParseError};
|
||||||
|
///
|
||||||
|
/// let file: [&[u8]; 5] = [b"@base <http://example.com/>",
|
||||||
|
/// b". @prefix schema: <http://schema.org/> .",
|
||||||
|
/// b"<foo> a schema:Person",
|
||||||
|
/// b" ; schema:name \"Foo\" . <bar>",
|
||||||
|
/// b" a schema:Person ; schema:name \"Bar\" ."
|
||||||
|
/// ];
|
||||||
|
///
|
||||||
|
/// let rdf_type = NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
|
||||||
|
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||||
|
/// let mut count = 0;
|
||||||
|
/// let mut parser = TurtleParser::new().parse();
|
||||||
|
/// let mut file_chunks = file.iter();
|
||||||
|
/// while !parser.is_end() {
|
||||||
|
/// // We feed more data to the parser
|
||||||
|
/// if let Some(chunk) = file_chunks.next() {
|
||||||
|
/// parser.extend_from_slice(chunk);
|
||||||
|
/// } else {
|
||||||
|
/// parser.end(); // It's finished
|
||||||
|
/// }
|
||||||
|
/// // We read as many triples from the parser as possible
|
||||||
|
/// while let Some(triple) = parser.read_next() {
|
||||||
|
/// let triple = triple?;
|
||||||
|
/// if triple.predicate == rdf_type && triple.object == schema_person.into() {
|
||||||
|
/// count += 1;
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// assert_eq!(2, count);
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
pub struct LowLevelTurtleReader { |
||||||
|
parser: Parser<TriGRecognizer>, |
||||||
|
} |
||||||
|
|
||||||
|
impl LowLevelTurtleReader { |
||||||
|
/// Adds some extra bytes to the parser. Should be called when [`read_next`](Self::read_next) returns [`None`] and there is still unread data.
|
||||||
|
pub fn extend_from_slice(&mut self, other: &[u8]) { |
||||||
|
self.parser.extend_from_slice(other) |
||||||
|
} |
||||||
|
|
||||||
|
/// Tell the parser that the file is finished.
|
||||||
|
///
|
||||||
|
/// This triggers the parsing of the final bytes and might lead [`read_next`](Self::read_next) to return some extra values.
|
||||||
|
pub fn end(&mut self) { |
||||||
|
self.parser.end() |
||||||
|
} |
||||||
|
|
||||||
|
/// Returns if the parsing is finished i.e. [`end`](Self::end) has been called and [`read_next`](Self::read_next) is always going to return `None`.
|
||||||
|
pub fn is_end(&self) -> bool { |
||||||
|
self.parser.is_end() |
||||||
|
} |
||||||
|
|
||||||
|
/// Attempt to parse a new triple from the already provided data.
|
||||||
|
///
|
||||||
|
/// Returns [`None`] if the parsing is finished or more data is required.
|
||||||
|
/// If it is the case more data should be fed using [`extend_from_slice`](Self::extend_from_slice).
|
||||||
|
pub fn read_next(&mut self) -> Option<Result<Triple, ParseError>> { |
||||||
|
Some(self.parser.read_next()?.map(Into::into)) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// A [Turtle](https://www.w3.org/TR/turtle/) serializer.
|
||||||
|
///
|
||||||
|
/// Support for [Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star) is available behind the `rdf-star` feature.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::{NamedNodeRef, TripleRef};
|
||||||
|
/// use oxttl::TurtleSerializer;
|
||||||
|
///
|
||||||
|
/// let mut buf = Vec::new();
|
||||||
|
/// let mut writer = TurtleSerializer::new().serialize_to_write(buf);
|
||||||
|
/// writer.write_triple(TripleRef::new(
|
||||||
|
/// NamedNodeRef::new("http://example.com#me")?,
|
||||||
|
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||||
|
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||||
|
/// ))?;
|
||||||
|
/// assert_eq!(
|
||||||
|
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||||
|
/// writer.finish()?.as_slice()
|
||||||
|
/// );
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
#[derive(Default)] |
||||||
|
pub struct TurtleSerializer { |
||||||
|
inner: TriGSerializer, |
||||||
|
} |
||||||
|
|
||||||
|
impl TurtleSerializer { |
||||||
|
/// Builds a new [`TurtleSerializer`].
|
||||||
|
#[inline] |
||||||
|
pub fn new() -> Self { |
||||||
|
Self::default() |
||||||
|
} |
||||||
|
|
||||||
|
/// Writes a Turtle file to a [`Write`] implementation.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::{NamedNodeRef, TripleRef};
|
||||||
|
/// use oxttl::TurtleSerializer;
|
||||||
|
///
|
||||||
|
/// let mut buf = Vec::new();
|
||||||
|
/// let mut writer = TurtleSerializer::new().serialize_to_write(buf);
|
||||||
|
/// writer.write_triple(TripleRef::new(
|
||||||
|
/// NamedNodeRef::new("http://example.com#me")?,
|
||||||
|
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||||
|
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||||
|
/// ))?;
|
||||||
|
/// assert_eq!(
|
||||||
|
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||||
|
/// writer.finish()?.as_slice()
|
||||||
|
/// );
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
pub fn serialize_to_write<W: Write>(&self, write: W) -> ToWriteTurtleWriter<W> { |
||||||
|
ToWriteTurtleWriter { |
||||||
|
inner: self.inner.serialize_to_write(write), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// Builds a low-level Turtle writer.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::{NamedNodeRef, TripleRef};
|
||||||
|
/// use oxttl::TurtleSerializer;
|
||||||
|
///
|
||||||
|
/// let mut buf = Vec::new();
|
||||||
|
/// let mut writer = TurtleSerializer::new().serialize();
|
||||||
|
/// writer.write_triple(TripleRef::new(
|
||||||
|
/// NamedNodeRef::new("http://example.com#me")?,
|
||||||
|
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||||
|
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||||
|
/// ), &mut buf)?;
|
||||||
|
/// writer.finish(&mut buf)?;
|
||||||
|
/// assert_eq!(
|
||||||
|
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||||
|
/// buf.as_slice()
|
||||||
|
/// );
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
pub fn serialize(&self) -> LowLevelTurtleWriter { |
||||||
|
LowLevelTurtleWriter { |
||||||
|
inner: self.inner.serialize(), |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// Writes a Turtle file to a [`Write`] implementation. Can be built using [`TurtleSerializer::serialize_to_write`].
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::{NamedNodeRef, TripleRef};
|
||||||
|
/// use oxttl::TurtleSerializer;
|
||||||
|
///
|
||||||
|
/// let mut buf = Vec::new();
|
||||||
|
/// let mut writer = TurtleSerializer::new().serialize_to_write(buf);
|
||||||
|
/// writer.write_triple(TripleRef::new(
|
||||||
|
/// NamedNodeRef::new("http://example.com#me")?,
|
||||||
|
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||||
|
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||||
|
/// ))?;
|
||||||
|
/// assert_eq!(
|
||||||
|
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||||
|
/// writer.finish()?.as_slice()
|
||||||
|
/// );
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
pub struct ToWriteTurtleWriter<W: Write> { |
||||||
|
inner: ToWriteTriGWriter<W>, |
||||||
|
} |
||||||
|
|
||||||
|
impl<W: Write> ToWriteTurtleWriter<W> { |
||||||
|
/// Writes an extra triple.
|
||||||
|
pub fn write_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> { |
||||||
|
self.inner |
||||||
|
.write_quad(t.into().in_graph(GraphNameRef::DefaultGraph)) |
||||||
|
} |
||||||
|
|
||||||
|
/// Ends the write process and returns the underlying [`Write`].
|
||||||
|
pub fn finish(self) -> io::Result<W> { |
||||||
|
self.inner.finish() |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// Writes a Turtle file by using a low-level API. Can be built using [`TurtleSerializer::serialize`].
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use oxrdf::{NamedNodeRef, TripleRef};
|
||||||
|
/// use oxttl::TurtleSerializer;
|
||||||
|
///
|
||||||
|
/// let mut buf = Vec::new();
|
||||||
|
/// let mut writer = TurtleSerializer::new().serialize();
|
||||||
|
/// writer.write_triple(TripleRef::new(
|
||||||
|
/// NamedNodeRef::new("http://example.com#me")?,
|
||||||
|
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||||
|
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||||
|
/// ), &mut buf)?;
|
||||||
|
/// writer.finish(&mut buf)?;
|
||||||
|
/// assert_eq!(
|
||||||
|
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||||
|
/// buf.as_slice()
|
||||||
|
/// );
|
||||||
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||||
|
/// ```
|
||||||
|
pub struct LowLevelTurtleWriter { |
||||||
|
inner: LowLevelTriGWriter, |
||||||
|
} |
||||||
|
|
||||||
|
impl LowLevelTurtleWriter { |
||||||
|
/// Writes an extra triple.
|
||||||
|
pub fn write_triple<'a>( |
||||||
|
&mut self, |
||||||
|
t: impl Into<TripleRef<'a>>, |
||||||
|
write: impl Write, |
||||||
|
) -> io::Result<()> { |
||||||
|
self.inner |
||||||
|
.write_quad(t.into().in_graph(GraphNameRef::DefaultGraph), write) |
||||||
|
} |
||||||
|
|
||||||
|
/// Finishes to write the file.
|
||||||
|
pub fn finish(&mut self, write: impl Write) -> io::Result<()> { |
||||||
|
self.inner.finish(write) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
#[cfg(test)] |
||||||
|
mod tests { |
||||||
|
use super::*; |
||||||
|
use oxrdf::{BlankNodeRef, LiteralRef, NamedNodeRef}; |
||||||
|
|
||||||
|
#[test] |
||||||
|
fn test_write() -> io::Result<()> { |
||||||
|
let mut writer = TurtleSerializer::new().serialize_to_write(Vec::new()); |
||||||
|
writer.write_triple(TripleRef::new( |
||||||
|
NamedNodeRef::new_unchecked("http://example.com/s"), |
||||||
|
NamedNodeRef::new_unchecked("http://example.com/p"), |
||||||
|
NamedNodeRef::new_unchecked("http://example.com/o"), |
||||||
|
))?; |
||||||
|
writer.write_triple(TripleRef::new( |
||||||
|
NamedNodeRef::new_unchecked("http://example.com/s"), |
||||||
|
NamedNodeRef::new_unchecked("http://example.com/p"), |
||||||
|
LiteralRef::new_simple_literal("foo"), |
||||||
|
))?; |
||||||
|
writer.write_triple(TripleRef::new( |
||||||
|
NamedNodeRef::new_unchecked("http://example.com/s"), |
||||||
|
NamedNodeRef::new_unchecked("http://example.com/p2"), |
||||||
|
LiteralRef::new_language_tagged_literal_unchecked("foo", "en"), |
||||||
|
))?; |
||||||
|
writer.write_triple(TripleRef::new( |
||||||
|
BlankNodeRef::new_unchecked("b"), |
||||||
|
NamedNodeRef::new_unchecked("http://example.com/p2"), |
||||||
|
BlankNodeRef::new_unchecked("b2"), |
||||||
|
))?; |
||||||
|
assert_eq!(String::from_utf8(writer.finish()?).unwrap(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> , \"foo\" ;\n\t<http://example.com/p2> \"foo\"@en .\n_:b <http://example.com/p2> _:b2 .\n"); |
||||||
|
Ok(()) |
||||||
|
} |
||||||
|
} |
@ -0,0 +1 @@ |
|||||||
|
Subproject commit 5fa35bf602669a467cfd0ab24cc732fe49f2b927 |
@ -0,0 +1,194 @@ |
|||||||
|
use anyhow::Result; |
||||||
|
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; |
||||||
|
use oxigraph_testsuite::files::read_file; |
||||||
|
use oxigraph_testsuite::manifest::TestManifest; |
||||||
|
use rio_api::parser::*; |
||||||
|
use rio_turtle::*; |
||||||
|
use std::io::Read; |
||||||
|
|
||||||
|
fn test_data_from_testsuite(manifest_uri: String, include_tests_types: &[&str]) -> Result<Vec<u8>> { |
||||||
|
let manifest = TestManifest::new([manifest_uri]); |
||||||
|
let mut data = Vec::default(); |
||||||
|
for test in manifest { |
||||||
|
let test = test?; |
||||||
|
if include_tests_types.contains(&test.kind.as_str()) { |
||||||
|
read_file(&test.action.unwrap())?.read_to_end(&mut data)?; |
||||||
|
data.push(b'\n'); |
||||||
|
} |
||||||
|
} |
||||||
|
Ok(data) |
||||||
|
} |
||||||
|
|
||||||
|
fn ntriples_test_data() -> Result<Vec<u8>> { |
||||||
|
test_data_from_testsuite( |
||||||
|
"http://w3c.github.io/rdf-tests/ntriples/manifest.ttl".to_owned(), |
||||||
|
&["http://www.w3.org/ns/rdftest#TestNTriplesPositiveSyntax"], |
||||||
|
) |
||||||
|
} |
||||||
|
|
||||||
|
fn turtle_test_data() -> Result<Vec<u8>> { |
||||||
|
test_data_from_testsuite( |
||||||
|
"http://w3c.github.io/rdf-tests/turtle/manifest.ttl".to_owned(), |
||||||
|
&[ |
||||||
|
"http://www.w3.org/ns/rdftest#TestTurtlePositiveSyntax", |
||||||
|
"http://www.w3.org/ns/rdftest#TestTurtleEval", |
||||||
|
], |
||||||
|
) |
||||||
|
} |
||||||
|
|
||||||
|
fn parse_bench( |
||||||
|
c: &mut Criterion, |
||||||
|
parser_name: &str, |
||||||
|
data_name: &str, |
||||||
|
data: Vec<u8>, |
||||||
|
bench: impl Fn(&[u8]), |
||||||
|
) { |
||||||
|
let mut group = c.benchmark_group(parser_name); |
||||||
|
group.throughput(Throughput::Bytes(data.len() as u64)); |
||||||
|
group.bench_with_input(BenchmarkId::from_parameter(data_name), &data, |b, data| { |
||||||
|
b.iter(|| bench(data)) |
||||||
|
}); |
||||||
|
group.finish(); |
||||||
|
} |
||||||
|
|
||||||
|
fn parse_oxttl_ntriples(c: &mut Criterion, name: &str, data: Vec<u8>) { |
||||||
|
parse_bench(c, "oxttl ntriples", name, data, |data| { |
||||||
|
let mut parser = oxttl::NTriplesParser::new().parse(); |
||||||
|
parser.extend_from_slice(data); |
||||||
|
parser.end(); |
||||||
|
while let Some(result) = parser.read_next() { |
||||||
|
result.unwrap(); |
||||||
|
} |
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
fn parse_oxttl_turtle(c: &mut Criterion, name: &str, data: Vec<u8>) { |
||||||
|
parse_bench(c, "oxttl turtle", name, data, |data| { |
||||||
|
let mut parser = oxttl::TurtleParser::new().parse(); |
||||||
|
parser.extend_from_slice(data); |
||||||
|
parser.end(); |
||||||
|
while let Some(result) = parser.read_next() { |
||||||
|
result.unwrap(); |
||||||
|
} |
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
fn parse_rio_ntriples(c: &mut Criterion, name: &str, data: Vec<u8>) { |
||||||
|
parse_bench(c, "rio ntriples", name, data, |data| { |
||||||
|
let mut count: u64 = 0; |
||||||
|
NTriplesParser::new(data) |
||||||
|
.parse_all(&mut |_| { |
||||||
|
count += 1; |
||||||
|
Ok(()) as Result<(), TurtleError> |
||||||
|
}) |
||||||
|
.unwrap(); |
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
fn parse_rio_turtle(c: &mut Criterion, name: &str, data: Vec<u8>) { |
||||||
|
parse_bench(c, "rio turtle", name, data, |data| { |
||||||
|
let mut count: u64 = 0; |
||||||
|
TurtleParser::new(data, None) |
||||||
|
.parse_all(&mut |_| { |
||||||
|
count += 1; |
||||||
|
Ok(()) as Result<(), TurtleError> |
||||||
|
}) |
||||||
|
.unwrap(); |
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
fn bench_parse_oxttl_ntriples_with_ntriples(c: &mut Criterion) { |
||||||
|
parse_oxttl_ntriples( |
||||||
|
c, |
||||||
|
"ntriples", |
||||||
|
match ntriples_test_data() { |
||||||
|
Ok(d) => d, |
||||||
|
Err(e) => { |
||||||
|
eprintln!("{e}"); |
||||||
|
return; |
||||||
|
} |
||||||
|
}, |
||||||
|
) |
||||||
|
} |
||||||
|
|
||||||
|
fn bench_parse_oxttl_ntriples_with_turtle(c: &mut Criterion) { |
||||||
|
parse_oxttl_turtle( |
||||||
|
c, |
||||||
|
"ntriples", |
||||||
|
match ntriples_test_data() { |
||||||
|
Ok(d) => d, |
||||||
|
Err(e) => { |
||||||
|
eprintln!("{e}"); |
||||||
|
return; |
||||||
|
} |
||||||
|
}, |
||||||
|
) |
||||||
|
} |
||||||
|
|
||||||
|
fn bench_parse_oxttl_turtle_with_turtle(c: &mut Criterion) { |
||||||
|
parse_oxttl_turtle( |
||||||
|
c, |
||||||
|
"turtle", |
||||||
|
match turtle_test_data() { |
||||||
|
Ok(d) => d, |
||||||
|
Err(e) => { |
||||||
|
eprintln!("{e}"); |
||||||
|
return; |
||||||
|
} |
||||||
|
}, |
||||||
|
) |
||||||
|
} |
||||||
|
|
||||||
|
fn bench_parse_rio_ntriples_with_ntriples(c: &mut Criterion) { |
||||||
|
parse_rio_ntriples( |
||||||
|
c, |
||||||
|
"ntriples", |
||||||
|
match ntriples_test_data() { |
||||||
|
Ok(d) => d, |
||||||
|
Err(e) => { |
||||||
|
eprintln!("{e}"); |
||||||
|
return; |
||||||
|
} |
||||||
|
}, |
||||||
|
) |
||||||
|
} |
||||||
|
|
||||||
|
fn bench_parse_rio_ntriples_with_turtle(c: &mut Criterion) { |
||||||
|
parse_rio_turtle( |
||||||
|
c, |
||||||
|
"ntriples", |
||||||
|
match ntriples_test_data() { |
||||||
|
Ok(d) => d, |
||||||
|
Err(e) => { |
||||||
|
eprintln!("{e}"); |
||||||
|
return; |
||||||
|
} |
||||||
|
}, |
||||||
|
) |
||||||
|
} |
||||||
|
|
||||||
|
fn bench_parse_rio_turtle_with_turtle(c: &mut Criterion) { |
||||||
|
parse_rio_turtle( |
||||||
|
c, |
||||||
|
"turtle", |
||||||
|
match turtle_test_data() { |
||||||
|
Ok(d) => d, |
||||||
|
Err(e) => { |
||||||
|
eprintln!("{e}"); |
||||||
|
return; |
||||||
|
} |
||||||
|
}, |
||||||
|
) |
||||||
|
} |
||||||
|
|
||||||
|
criterion_group!( |
||||||
|
w3c_testsuite, |
||||||
|
bench_parse_rio_ntriples_with_ntriples, |
||||||
|
bench_parse_rio_ntriples_with_turtle, |
||||||
|
bench_parse_rio_turtle_with_turtle, |
||||||
|
bench_parse_oxttl_ntriples_with_ntriples, |
||||||
|
bench_parse_oxttl_ntriples_with_turtle, |
||||||
|
bench_parse_oxttl_turtle_with_turtle |
||||||
|
); |
||||||
|
|
||||||
|
criterion_main!(w3c_testsuite); |
@ -0,0 +1,2 @@ |
|||||||
|
_:` <http://example.com/pb> <http://example.com/o> . |
||||||
|
<http://example.com/s> <http://example.com/p> <http://example.com/o> . |
@ -0,0 +1,2 @@ |
|||||||
|
<http:// /s> <http://example.com/p> <http://example.com/o> . |
||||||
|
<http://example.com/s> <http://example.com/p> <http://example.com/o> . |
@ -0,0 +1,2 @@ |
|||||||
|
<http://example.com/s> <http://example.com/p> "\a" . |
||||||
|
<http://example.com/s> <http://example.com/p> <http://example.com/o> . |
@ -0,0 +1,2 @@ |
|||||||
|
<http://example.com/s> <http://example.com/p> <http://example.com/o> . |
||||||
|
<http://example.com/s> <http://example.com/p> <http://example.com/o2> . |
@ -0,0 +1,2 @@ |
|||||||
|
<http://example.com/s> <http://example.com/p> "o" . |
||||||
|
<http://example.com/s> <http://example.com/p> <http://example.com/o> . |
@ -0,0 +1 @@ |
|||||||
|
<http://example.com/s> <http://example.com/p> <http://example.com/o> . |
@ -0,0 +1 @@ |
|||||||
|
<http://example.com/s> <http://example.com/p> "o" . |
@ -0,0 +1,129 @@ |
|||||||
|
@prefix mf: <http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#> . |
||||||
|
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . |
||||||
|
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . |
||||||
|
@prefix ox: <https://github.com/oxigraph/oxigraph/tests#> . |
||||||
|
|
||||||
|
<> |
||||||
|
rdf:type mf:Manifest ; |
||||||
|
rdfs:comment "Oxigraph parser recovery test cases" ; |
||||||
|
mf:entries ( |
||||||
|
<#invalid_iri_nt> |
||||||
|
<#invalid_iri_ttl> |
||||||
|
<#invalid_iri_n3> |
||||||
|
<#invalid_bnode_nt> |
||||||
|
<#invalid_bnode_ttl> |
||||||
|
<#invalid_bnode_n3> |
||||||
|
<#invalid_string_nt> |
||||||
|
<#invalid_string_ttl> |
||||||
|
<#invalid_string_n3> |
||||||
|
<#missing_dot_at_end_of_triple_with_iri_middle_nt> |
||||||
|
<#missing_dot_at_end_of_triple_with_iri_middle_ttl> |
||||||
|
<#missing_dot_at_end_of_triple_with_iri_end_nt> |
||||||
|
<#missing_dot_at_end_of_triple_with_iri_end_ttl> |
||||||
|
<#missing_dot_at_end_of_triple_with_string_middle_nt> |
||||||
|
<#missing_dot_at_end_of_triple_with_string_middle_ttl> |
||||||
|
<#missing_dot_at_end_of_triple_with_string_end_nt> |
||||||
|
<#missing_dot_at_end_of_triple_with_string_end_ttl> |
||||||
|
) . |
||||||
|
|
||||||
|
<#invalid_iri_nt> |
||||||
|
rdf:type ox:TestNTripleRecovery ; |
||||||
|
mf:name "IRI with space" ; |
||||||
|
mf:action <invalid_iri.nt> ; |
||||||
|
mf:result <iri_spo.nt> . |
||||||
|
|
||||||
|
<#invalid_iri_ttl> |
||||||
|
rdf:type ox:TestTurtleRecovery ; |
||||||
|
mf:name "IRI with space" ; |
||||||
|
mf:action <invalid_iri.nt> ; |
||||||
|
mf:result <iri_spo.nt> . |
||||||
|
|
||||||
|
<#invalid_iri_n3> |
||||||
|
rdf:type ox:TestN3Recovery ; |
||||||
|
mf:name "IRI with space" ; |
||||||
|
mf:action <invalid_iri.nt> ; |
||||||
|
mf:result <iri_spo.nt> . |
||||||
|
|
||||||
|
<#invalid_bnode_nt> |
||||||
|
rdf:type ox:TestNTripleRecovery ; |
||||||
|
mf:name "bad character in blank node" ; |
||||||
|
mf:action <invalid_bnode.nt> ; |
||||||
|
mf:result <iri_spo.nt> . |
||||||
|
|
||||||
|
<#invalid_bnode_ttl> |
||||||
|
rdf:type ox:TestTurtleRecovery ; |
||||||
|
mf:name "bad character in blank node" ; |
||||||
|
mf:action <invalid_bnode.nt> ; |
||||||
|
mf:result <iri_spo.nt> . |
||||||
|
|
||||||
|
<#invalid_bnode_n3> |
||||||
|
rdf:type ox:TestN3Recovery ; |
||||||
|
mf:name "bad character in blank node" ; |
||||||
|
mf:action <invalid_bnode.nt> ; |
||||||
|
mf:result <iri_spo.nt> . |
||||||
|
|
||||||
|
<#invalid_string_nt> |
||||||
|
rdf:type ox:TestNTripleRecovery ; |
||||||
|
mf:name "invalid escape sequence in string" ; |
||||||
|
mf:action <invalid_string.nt> ; |
||||||
|
mf:result <iri_spo.nt> . |
||||||
|
|
||||||
|
<#invalid_string_ttl> |
||||||
|
rdf:type ox:TestTurtleRecovery ; |
||||||
|
mf:name "invalid escape sequence in string" ; |
||||||
|
mf:action <invalid_string.nt> ; |
||||||
|
mf:result <iri_spo.nt> . |
||||||
|
|
||||||
|
<#invalid_string_n3> |
||||||
|
rdf:type ox:TestN3Recovery ; |
||||||
|
mf:name "invalid escape sequence in string" ; |
||||||
|
mf:action <invalid_string.nt> ; |
||||||
|
mf:result <iri_spo.nt> . |
||||||
|
|
||||||
|
<#missing_dot_at_end_of_triple_with_iri_middle_nt> |
||||||
|
rdf:type ox:TestNTripleRecovery ; |
||||||
|
mf:name "missing dot at the end of a triple" ; |
||||||
|
mf:action <missing_dot_at_end_of_triple_with_iri_middle.nt> ; |
||||||
|
mf:result <iri2_spo.nt> . |
||||||
|
|
||||||
|
<#missing_dot_at_end_of_triple_with_iri_middle_ttl> |
||||||
|
rdf:type ox:TestTurtleRecovery ; |
||||||
|
mf:name "missing dot at the end of a triple" ; |
||||||
|
mf:action <missing_dot_at_end_of_triple_with_iri_middle.nt> ; |
||||||
|
mf:result <iri2_spo.nt> . |
||||||
|
|
||||||
|
<#missing_dot_at_end_of_triple_with_iri_end_nt> |
||||||
|
rdf:type ox:TestNTripleRecovery ; |
||||||
|
mf:name "missing dot at the end of a triple" ; |
||||||
|
mf:action <missing_dot_at_end_of_triple_with_iri_end.nt> ; |
||||||
|
mf:result <iri_spo.nt> . |
||||||
|
|
||||||
|
<#missing_dot_at_end_of_triple_with_iri_end_ttl> |
||||||
|
rdf:type ox:TestTurtleRecovery ; |
||||||
|
mf:name "missing dot at the end of a triple" ; |
||||||
|
mf:action <missing_dot_at_end_of_triple_with_iri_end.nt> ; |
||||||
|
mf:result <iri_spo.nt> . |
||||||
|
|
||||||
|
<#missing_dot_at_end_of_triple_with_string_middle_nt> |
||||||
|
rdf:type ox:TestNTripleRecovery ; |
||||||
|
mf:name "missing dot at the end of a triple" ; |
||||||
|
mf:action <missing_dot_at_end_of_triple_with_string_middle.nt> ; |
||||||
|
mf:result <iri2_string_spo.nt> . |
||||||
|
|
||||||
|
<#missing_dot_at_end_of_triple_with_string_middle_ttl> |
||||||
|
rdf:type ox:TestTurtleRecovery ; |
||||||
|
mf:name "missing dot at the end of a triple" ; |
||||||
|
mf:action <missing_dot_at_end_of_triple_with_string_middle.nt> ; |
||||||
|
mf:result <iri2_string_spo.nt> . |
||||||
|
|
||||||
|
<#missing_dot_at_end_of_triple_with_string_end_nt> |
||||||
|
rdf:type ox:TestNTripleRecovery ; |
||||||
|
mf:name "missing dot at the end of a triple" ; |
||||||
|
mf:action <missing_dot_at_end_of_triple_with_string_end.nt> ; |
||||||
|
mf:result <iri_string_spo.nt> . |
||||||
|
|
||||||
|
<#missing_dot_at_end_of_triple_with_string_end_ttl> |
||||||
|
rdf:type ox:TestTurtleRecovery ; |
||||||
|
mf:name "missing dot at the end of a triple" ; |
||||||
|
mf:action <missing_dot_at_end_of_triple_with_string_end.nt> ; |
||||||
|
mf:result <iri_string_spo.nt> . |
@ -0,0 +1 @@ |
|||||||
|
<http://example.com/s> <http://example.com/p> <http://example.com/o> |
@ -0,0 +1,2 @@ |
|||||||
|
<http://example.com/s> <http://example.com/p> <http://example.com/o> |
||||||
|
<http://example.com/s> <http://example.com/p> <http://example.com/o2> . |
@ -0,0 +1 @@ |
|||||||
|
<http://example.com/s> <http://example.com/p> "o" |
@ -0,0 +1,2 @@ |
|||||||
|
<http://example.com/s> <http://example.com/p> "o" |
||||||
|
<http://example.com/s> <http://example.com/p> <http://example.com/o> . |
@ -0,0 +1,2 @@ |
|||||||
|
<http://example.com/s> <http://example.com/p> "foo"@base . |
||||||
|
<http://example.com/s> <http://example.com/p> "bar"@prefix . |
@ -0,0 +1,3 @@ |
|||||||
|
@prefix : <http://example.com/> . |
||||||
|
|
||||||
|
:s :p "foo"@base , "bar"@prefix . |
@ -0,0 +1 @@ |
|||||||
|
<http://foo> <http://foo> "foo"@badlanguagetag . |
@ -0,0 +1,2 @@ |
|||||||
|
(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((( |
||||||
|
|
@ -0,0 +1,2 @@ |
|||||||
|
<urn:zamaudio:ZaMultiComp#preset001> <http://lv2plug.in/ns/ext/state#state> _:1 . |
||||||
|
|
@ -0,0 +1,6 @@ |
|||||||
|
@prefix state: <http://lv2plug.in/ns/ext/state#> . |
||||||
|
|
||||||
|
<urn:zamaudio:ZaMultiComp#preset001> |
||||||
|
state:state [ |
||||||
|
] . |
||||||
|
|
@ -0,0 +1,3 @@ |
|||||||
|
<http://example.com/prefix/s> <http://example.com/prefix/p> <http://example.com/true/o> . |
||||||
|
<http://example.com/base/s> <http://example.com/base/p> <http://example.com/false/o> . |
||||||
|
<http://example.com/graph/s> <http://example.com/graph/p> <http://example.com/graph/o> <http://example.com/graph/g> . |
@ -0,0 +1,2 @@ |
|||||||
|
<http://example.com/prefix/s> <http://example.com/prefix/p> <http://example.com/true/o> . |
||||||
|
<http://example.com/base/s> <http://example.com/base/p> <http://example.com/false/o> . |
@ -0,0 +1,10 @@ |
|||||||
|
base <http://example.com/> |
||||||
|
prefix prefix: <prefix/> |
||||||
|
prefix base: <base/> |
||||||
|
prefix graph: <graph/> |
||||||
|
prefix true: <true/> |
||||||
|
prefix false: <false/> |
||||||
|
|
||||||
|
prefix:s prefix:p true:o . |
||||||
|
base:s base:p false:o . |
||||||
|
graph:g { graph:s graph:p graph:o . } |
@ -0,0 +1,8 @@ |
|||||||
|
base <http://example.com/> |
||||||
|
prefix prefix: <prefix/> |
||||||
|
prefix base: <base/> |
||||||
|
prefix true: <true/> |
||||||
|
prefix false: <false/> |
||||||
|
|
||||||
|
prefix:s prefix:p true:o . |
||||||
|
base:s base:p false:o . |
@ -0,0 +1 @@ |
|||||||
|
<http://foo> <http://foo> "foo"@en-us . |
@ -0,0 +1,4 @@ |
|||||||
|
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:foo="http://foo"> |
||||||
|
<rdf:Description rdf:about="http://foo" xml:lang="en-US" foo:="foo"> |
||||||
|
</rdf:Description> |
||||||
|
</rdf:RDF> |
@ -0,0 +1 @@ |
|||||||
|
<http://foo> <http://foo> "foo"@en-US-US . |
@ -0,0 +1 @@ |
|||||||
|
<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#value> " bar\n" . |
@ -0,0 +1,7 @@ |
|||||||
|
<?xml version="1.0"?> |
||||||
|
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> |
||||||
|
<rdf:Description rdf:about="http://example.com/foo"> |
||||||
|
<rdf:value> bar |
||||||
|
</rdf:value> |
||||||
|
</rdf:Description> |
||||||
|
</rdf:RDF> |
@ -0,0 +1,90 @@ |
|||||||
|
@prefix mf: <http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#> . |
||||||
|
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . |
||||||
|
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . |
||||||
|
@prefix rdft: <http://www.w3.org/ns/rdftest#> . |
||||||
|
|
||||||
|
<> |
||||||
|
rdf:type mf:Manifest ; |
||||||
|
rdfs:comment "Oxigraph parsers test case" ; |
||||||
|
mf:entries ( |
||||||
|
<#blank_node_with_linebreak> |
||||||
|
<#bad_lang> |
||||||
|
<#language_normalization_ttl> |
||||||
|
<#language_normalization_xml> |
||||||
|
<#xml_entities> |
||||||
|
<#xml_nested_entities> |
||||||
|
<#literal_value_space> |
||||||
|
<#bad_parentheses> |
||||||
|
<#keyword_vs_prefix_ttl> |
||||||
|
<#keyword_vs_prefix_trig> |
||||||
|
<#at_keywords_as_lang_tag> |
||||||
|
) . |
||||||
|
|
||||||
|
<#no_end_line_jump> |
||||||
|
rdf:type rdft:TestNTriplesPositiveSyntax ; |
||||||
|
mf:name "No line jump at the end of the file" ; |
||||||
|
mf:action <no_end_line_jump.nt> . |
||||||
|
|
||||||
|
<#blank_node_with_linebreak> |
||||||
|
rdf:type rdft:TestTurtleEval ; |
||||||
|
mf:name "blank node with linebreak" ; |
||||||
|
mf:action <blank_node_with_linebreak.ttl> ; |
||||||
|
mf:result <blank_node_with_linebreak.nt> . |
||||||
|
|
||||||
|
<#language_normalization_ttl> |
||||||
|
rdf:type rdft:TestTurtleEval ; |
||||||
|
mf:name "language case normalization" ; |
||||||
|
mf:action <language_normalization.ttl> ; |
||||||
|
mf:result <language_normalization.nt> . |
||||||
|
|
||||||
|
<#language_normalization_xml> |
||||||
|
rdf:type rdft:TestXMLEval ; |
||||||
|
mf:name "language case normalization" ; |
||||||
|
mf:action <language_normalization.rdf> ; |
||||||
|
mf:result <language_normalization.nt> . |
||||||
|
|
||||||
|
<#bad_lang> |
||||||
|
rdf:type rdft:TestTurtleNegativeSyntax ; |
||||||
|
mf:name "bad language tag" ; |
||||||
|
mf:action <bad_lang.ttl> . |
||||||
|
|
||||||
|
<#xml_entities> |
||||||
|
rdf:type rdft:TestXMLEval ; |
||||||
|
mf:name "custom XML entities" ; |
||||||
|
mf:action <xml_entities.rdf> ; |
||||||
|
mf:result <xml_entities.nt> . |
||||||
|
|
||||||
|
<#xml_nested_entities> |
||||||
|
rdf:type rdft:TestXMLEval ; |
||||||
|
mf:name "custom XML entities with nested definitions" ; |
||||||
|
mf:action <xml_nested_entities.rdf> ; |
||||||
|
mf:result <xml_nested_entities.nt> . |
||||||
|
|
||||||
|
<#literal_value_space> |
||||||
|
rdf:type rdft:TestXMLEval ; |
||||||
|
mf:name "spaces in literal values" ; |
||||||
|
mf:action <literal_value_space.rdf> ; |
||||||
|
mf:result <literal_value_space.nt> . |
||||||
|
|
||||||
|
<#bad_parentheses> |
||||||
|
rdf:type rdft:TestTurtleNegativeSyntax ; |
||||||
|
mf:name "a lot of parentheses that might generate a stack overflow" ; |
||||||
|
mf:action <bad_parentheses.ttl> . |
||||||
|
|
||||||
|
<#keyword_vs_prefix_ttl> |
||||||
|
rdf:type rdft:TestTurtleEval ; |
||||||
|
mf:name "usage of keywords as prefix" ; |
||||||
|
mf:action <keyword_vs_prefix.ttl> ; |
||||||
|
mf:result <keyword_vs_prefix.nt> . |
||||||
|
|
||||||
|
<#keyword_vs_prefix_trig> |
||||||
|
rdf:type rdft:TestTrigEval ; |
||||||
|
mf:name "usage of keywords as prefix" ; |
||||||
|
mf:action <keyword_vs_prefix.trig> ; |
||||||
|
mf:result <keyword_vs_prefix.nq> . |
||||||
|
|
||||||
|
<#at_keywords_as_lang_tag> |
||||||
|
rdf:type rdft:TestTurtleEval ; |
||||||
|
mf:name "usage of at keywords as language tags" ; |
||||||
|
mf:action <at_keywords_as_lang_tag.ttl> ; |
||||||
|
mf:result <at_keywords_as_lang_tag.nt> . |
@ -0,0 +1 @@ |
|||||||
|
<http://example.com> <http://example.com> <http://example.com> . |
@ -0,0 +1 @@ |
|||||||
|
<http://example.com/foo> <http://example.com/2/test> "bar"^^<http://www.w3.org/2001/XMLSchema#string> . |
@ -0,0 +1,10 @@ |
|||||||
|
<?xml version="1.0"?> |
||||||
|
<!DOCTYPE rdf:RDF [ |
||||||
|
<!ENTITY xsd "http://www.w3.org/2001/XMLSchema#" > |
||||||
|
<!ENTITY ex "http://example.com/"> |
||||||
|
]> |
||||||
|
<rdf:RDF xmlns:ex2="&ex;2/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> |
||||||
|
<rdf:Description rdf:about="&ex;foo"> |
||||||
|
<ex2:test rdf:datatype="&xsd;string">bar</ex2:test> |
||||||
|
</rdf:Description> |
||||||
|
</rdf:RDF> |
@ -0,0 +1 @@ |
|||||||
|
<http://example.com/foo> <http://example.com/2/test> "bar"^^<http://www.w3.org/2001/XMLSchema#string> . |
@ -0,0 +1,15 @@ |
|||||||
|
<?xml version="1.0"?> |
||||||
|
|
||||||
|
<!DOCTYPE rdf:RDF [ |
||||||
|
<!ENTITY ex "http://example.com/"> |
||||||
|
<!ENTITY w3 "http://www.w3.org"> |
||||||
|
<!ENTITY rdf "&w3;/1999/02/22-rdf-syntax-ns#"> |
||||||
|
<!ENTITY rdfs "&w3;/2000/01/rdf-schema#"> |
||||||
|
<!ENTITY xsd "&w3;/2001/XMLSchema#"> |
||||||
|
]> |
||||||
|
|
||||||
|
<rdf:RDF xmlns:ex2="&ex;2/" xmlns:rdf="&rdf;"> |
||||||
|
<rdf:Description rdf:about="&ex;foo"> |
||||||
|
<ex2:test rdf:datatype="&xsd;string">bar</ex2:test> |
||||||
|
</rdf:Description> |
||||||
|
</rdf:RDF> |
@ -0,0 +1,13 @@ |
|||||||
|
Copyright 2011-2022 David Robillard <d@drobilla.net> |
||||||
|
|
||||||
|
Permission to use, copy, modify, and/or distribute this software for any |
||||||
|
purpose with or without fee is hereby granted, provided that the above |
||||||
|
copyright notice and this permission notice appear in all copies. |
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH |
||||||
|
REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND |
||||||
|
FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, |
||||||
|
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM |
||||||
|
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR |
||||||
|
OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR |
||||||
|
PERFORMANCE OF THIS SOFTWARE. |
@ -0,0 +1 @@ |
|||||||
|
Testsuite from [Serd](https://drobilla.net/software/serd) project. |
@ -0,0 +1,2 @@ |
|||||||
|
# prefix name must end in a : |
||||||
|
@prefix a <#> . |
@ -0,0 +1,3 @@ |
|||||||
|
# Forbidden by RDF - predicate cannot be blank |
||||||
|
@prefix : <http://example.org/base#> . |
||||||
|
:a [ :b :c ] :d . |
@ -0,0 +1,3 @@ |
|||||||
|
# Forbidden by RDF - predicate cannot be blank |
||||||
|
@prefix : <http://example.org/base#> . |
||||||
|
:a [] :b . |
@ -0,0 +1,3 @@ |
|||||||
|
# 'a' only allowed as a predicate |
||||||
|
@prefix : <http://example.org/base#> . |
||||||
|
a :a :b . |
@ -0,0 +1,3 @@ |
|||||||
|
# No comma is allowed in collections |
||||||
|
@prefix : <http://example.org/stuff/1.0/> . |
||||||
|
:a :b ( "apple", "banana" ) . |
@ -0,0 +1,4 @@ |
|||||||
|
# N3 {}s are not in Turtle |
||||||
|
@prefix : <http://example.org/stuff/1.0/> . |
||||||
|
{ :a :b :c . } :d :e . |
||||||
|
|
@ -0,0 +1,3 @@ |
|||||||
|
# is and of are not in turtle |
||||||
|
@prefix : <http://example.org/stuff/1.0/> . |
||||||
|
:a is :b of :c . |
@ -0,0 +1,4 @@ |
|||||||
|
# paths are not in turtle |
||||||
|
@prefix : <http://example.org/stuff/1.0/> . |
||||||
|
:a.:b.:c . |
||||||
|
:a^:b^:c . |
@ -0,0 +1,2 @@ |
|||||||
|
@keywords something. |
||||||
|
# @keywords is not in turtle |
@ -0,0 +1,3 @@ |
|||||||
|
# implies is not in turtle |
||||||
|
@prefix : <http://example.org/stuff/1.0/> . |
||||||
|
:a => :b . |
@ -0,0 +1,3 @@ |
|||||||
|
# equivalence is not in turtle |
||||||
|
@prefix : <http://example.org/stuff/1.0/> . |
||||||
|
:a = :b . |
@ -0,0 +1,3 @@ |
|||||||
|
# @forAll is not in turtle |
||||||
|
@prefix : <http://example.org/stuff/1.0/> . |
||||||
|
@forAll :x . |
@ -0,0 +1,3 @@ |
|||||||
|
# @forSome is not in turtle |
||||||
|
@prefix : <http://example.org/stuff/1.0/> . |
||||||
|
@forSome :x . |
@ -0,0 +1,3 @@ |
|||||||
|
# <= is not in turtle |
||||||
|
@prefix : <http://example.org/stuff/1.0/> . |
||||||
|
:a <= :b . |
@ -0,0 +1,6 @@ |
|||||||
|
# Test long literals with missing end |
||||||
|
@prefix : <http://example.org/ex#> . |
||||||
|
:a :b """a long |
||||||
|
literal |
||||||
|
with |
||||||
|
newlines |
@ -0,0 +1 @@ |
|||||||
|
@base "I'm quite certain this is not a URI" . |
@ -0,0 +1 @@ |
|||||||
|
<http://example.org/s> <http://example.org/p> _|invalid . |
@ -0,0 +1,3 @@ |
|||||||
|
@prefix eg: <http://example.org/> . |
||||||
|
|
||||||
|
_:.bad a eg:Thing . |
@ -0,0 +1,3 @@ |
|||||||
|
ﻴ# This file starts with the first two bytes of the UTF-8 Byte Order Mark |
||||||
|
|
||||||
|
<http://example.org/thing> a <http://example.org/Thing> . |
@ -0,0 +1,3 @@ |
|||||||
|
@prefix eg: <http://example.org/> . |
||||||
|
|
||||||
|
eg:†bad <http://example.org/p> <http://example.org/o> . |
@ -0,0 +1 @@ |
|||||||
|
bad†:s <http://example.org/p> <http://example.org/o> . |
@ -0,0 +1 @@ |
|||||||
|
<http://example.org/s> <http://example.org/p> "value"^<http://example.org/t> . |
@ -0,0 +1 @@ |
|||||||
|
<> <http://example.org/pred> "hello"^^"not-a-uri" . |
@ -0,0 +1 @@ |
|||||||
|
<http://example.org/s> . <http://example.org/p> <http://example.org/o> . |
@ -0,0 +1 @@ |
|||||||
|
[ <http://example.org/p> (1. |
@ -0,0 +1,3 @@ |
|||||||
|
@prefix eg: <http://example.org/> . |
||||||
|
|
||||||
|
<> eg:comment "" |
@ -0,0 +1,3 @@ |
|||||||
|
@prefix eg: <http://example.org/> . |
||||||
|
|
||||||
|
<> eg:comment " |
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue