parent
10ce4aaeca
commit
74dadf5f21
@ -1,7 +1,5 @@ |
|||||||
use peg; |
use peg; |
||||||
|
|
||||||
fn main() { |
fn main() { |
||||||
peg::cargo_build("src/rio/ntriples/ntriples_grammar.rustpeg"); |
|
||||||
peg::cargo_build("src/rio/turtle/turtle_grammar.rustpeg"); |
|
||||||
peg::cargo_build("src/sparql/sparql_grammar.rustpeg"); |
peg::cargo_build("src/sparql/sparql_grammar.rustpeg"); |
||||||
} |
} |
||||||
|
@ -1,6 +1,7 @@ |
|||||||
//! Implementations of serializers and deserializers for usual RDF syntaxes
|
//! Implementations of serializers and deserializers for usual RDF syntaxes
|
||||||
|
|
||||||
pub mod ntriples; |
pub mod ntriples; |
||||||
|
mod rio; |
||||||
pub mod turtle; |
pub mod turtle; |
||||||
pub(crate) mod utils; |
pub(crate) mod utils; |
||||||
pub mod xml; |
pub mod xml; |
||||||
|
@ -0,0 +1,15 @@ |
|||||||
|
//! Implementation of [N-Triples](https://www.w3.org/TR/n-triples/) RDF syntax
|
||||||
|
|
||||||
|
use crate::model::Triple; |
||||||
|
use crate::rio::rio::convert_triple; |
||||||
|
use crate::Result; |
||||||
|
use rio_api::parser::TripleParser; |
||||||
|
use rio_turtle::NTriplesParser; |
||||||
|
use std::collections::BTreeMap; |
||||||
|
use std::io::BufRead; |
||||||
|
|
||||||
|
/// Reads a [N-Triples](https://www.w3.org/TR/n-triples/) file from a Rust `BufRead` and returns an iterator of the read `Triple`s
|
||||||
|
pub fn read_ntriples<R: BufRead>(reader: R) -> Result<impl Iterator<Item = Result<Triple>>> { |
||||||
|
let mut bnode_map = BTreeMap::default(); |
||||||
|
Ok(NTriplesParser::new(reader)?.into_iter(move |t| convert_triple(t, &mut bnode_map))) |
||||||
|
} |
@ -1,75 +0,0 @@ |
|||||||
//! Implementation of [N-Triples](https://www.w3.org/TR/n-triples/) RDF syntax
|
|
||||||
|
|
||||||
mod grammar { |
|
||||||
#![allow(
|
|
||||||
clippy::suspicious_else_formatting, |
|
||||||
clippy::len_zero, |
|
||||||
clippy::single_match, |
|
||||||
clippy::unit_arg, |
|
||||||
clippy::naive_bytecount |
|
||||||
)] |
|
||||||
|
|
||||||
use crate::rio::utils::unescape_characters; |
|
||||||
use crate::utils::StaticSliceMap; |
|
||||||
use lazy_static::lazy_static; |
|
||||||
use std::borrow::Cow; |
|
||||||
|
|
||||||
const UNESCAPE_CHARACTERS: [u8; 8] = [b't', b'b', b'n', b'r', b'f', b'"', b'\'', b'\\']; |
|
||||||
lazy_static! { |
|
||||||
static ref UNESCAPE_REPLACEMENT: StaticSliceMap<char, char> = StaticSliceMap::new( |
|
||||||
&['t', 'b', 'n', 'r', 'f', '"', '\'', '\\'], |
|
||||||
&[ |
|
||||||
'\u{0009}', '\u{0008}', '\u{000A}', '\u{000D}', '\u{000C}', '\u{0022}', '\u{0027}', |
|
||||||
'\u{005C}' |
|
||||||
] |
|
||||||
); |
|
||||||
} |
|
||||||
|
|
||||||
pub fn unescape_echars(input: &str) -> Cow<'_, str> { |
|
||||||
unescape_characters(input, &UNESCAPE_CHARACTERS, &UNESCAPE_REPLACEMENT) |
|
||||||
} |
|
||||||
|
|
||||||
include!(concat!(env!("OUT_DIR"), "/ntriples_grammar.rs")); |
|
||||||
} |
|
||||||
|
|
||||||
use crate::model::*; |
|
||||||
use crate::Result; |
|
||||||
use std::collections::BTreeMap; |
|
||||||
use std::io::BufRead; |
|
||||||
use std::io::BufReader; |
|
||||||
use std::io::Read; |
|
||||||
|
|
||||||
struct NTriplesIterator<R: Read> { |
|
||||||
buffer: String, |
|
||||||
reader: BufReader<R>, |
|
||||||
bnodes_map: BTreeMap<String, BlankNode>, |
|
||||||
} |
|
||||||
|
|
||||||
impl<R: Read> Iterator for NTriplesIterator<R> { |
|
||||||
type Item = Result<Triple>; |
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Result<Triple>> { |
|
||||||
if let Err(error) = self.reader.read_line(&mut self.buffer) { |
|
||||||
return Some(Err(error.into())); |
|
||||||
} |
|
||||||
if self.buffer.is_empty() { |
|
||||||
return None; //End of file
|
|
||||||
} |
|
||||||
let result = grammar::triple(&self.buffer, &mut self.bnodes_map); |
|
||||||
self.buffer.clear(); |
|
||||||
match result { |
|
||||||
Ok(Some(triple)) => Some(Ok(triple)), |
|
||||||
Ok(None) => self.next(), |
|
||||||
Err(error) => Some(Err(error.into())), |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// Reads a [N-Triples](https://www.w3.org/TR/n-triples/) file from a Rust `Read` and returns an iterator of the read `Triple`s
|
|
||||||
pub fn read_ntriples<'a, R: Read + 'a>(source: R) -> impl Iterator<Item = Result<Triple>> { |
|
||||||
NTriplesIterator { |
|
||||||
buffer: String::default(), |
|
||||||
reader: BufReader::new(source), |
|
||||||
bnodes_map: BTreeMap::default(), |
|
||||||
} |
|
||||||
} |
|
@ -1,92 +0,0 @@ |
|||||||
//See https://www.w3.org/TR/2014/REC-n-triples-20140225/#n-triples-grammar |
|
||||||
|
|
||||||
use std::char; |
|
||||||
use std::str::FromStr; |
|
||||||
use crate::model::*; |
|
||||||
use std::collections::BTreeMap; |
|
||||||
use crate::rio::utils::unescape_unicode_codepoints; |
|
||||||
|
|
||||||
#![arguments(bnodes_map: &mut BTreeMap<String, BlankNode>)] |
|
||||||
|
|
||||||
//[2] |
|
||||||
pub triple -> Option<Triple> = |
|
||||||
_ s:subject _ p:predicate _ o:object _ "." _ comment? EOL? { Some(Triple::new(s, p, o)) } / |
|
||||||
_ comment? EOL? { None } |
|
||||||
|
|
||||||
//[3] |
|
||||||
subject -> NamedOrBlankNode = |
|
||||||
i: IRIREF { i.into() } / |
|
||||||
b: BLANK_NODE_LABEL { b.into() } |
|
||||||
|
|
||||||
//[4] |
|
||||||
predicate -> NamedNode = i:IRIREF { |
|
||||||
i |
|
||||||
} |
|
||||||
|
|
||||||
//[5] |
|
||||||
object -> Term = |
|
||||||
i: IRIREF { i.into() } / |
|
||||||
b: BLANK_NODE_LABEL { b.into() } / |
|
||||||
l: literal { l.into() } |
|
||||||
|
|
||||||
//[6] |
|
||||||
literal -> Literal = |
|
||||||
v: STRING_LITERAL_QUOTE _ "^^" _ t:IRIREF { Literal::new_typed_literal(v, t) } / |
|
||||||
v: STRING_LITERAL_QUOTE _ l:LANGTAG { Literal::new_language_tagged_literal(v, l) } / |
|
||||||
v: STRING_LITERAL_QUOTE { Literal::new_simple_literal(v) } |
|
||||||
|
|
||||||
|
|
||||||
//[144s] |
|
||||||
LANGTAG -> LanguageTag = "@" l:$([a-zA-Z]+ ("-" [a-zA-Z0-9]+)*) {? |
|
||||||
match LanguageTag::parse(&l) { |
|
||||||
Ok(langtag) => Ok(langtag), |
|
||||||
Err(error) => Err("language tag parsing failed") |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
//[7] |
|
||||||
EOL = [\r\n]+ |
|
||||||
|
|
||||||
//[8] |
|
||||||
IRIREF -> NamedNode = "<" _ i:$(([^\u{00}-\u{20}<>"{}|^\u{60}\u{5c}] / UCHAR)*) _ ">" {? |
|
||||||
let s = unescape_unicode_codepoints(i); |
|
||||||
match NamedNode::from_str(&s) { |
|
||||||
Ok(named_node) => Ok(named_node), |
|
||||||
Err(error) => Err("IRI parsing failed") |
|
||||||
} |
|
||||||
} |
|
||||||
_IRIREF_simple_char -> char = c:$() { c.chars().next().unwrap() } |
|
||||||
|
|
||||||
//[9] |
|
||||||
STRING_LITERAL_QUOTE -> String = "\"" l:$(([^\u{0022}\u{005c}\u{000a}\u{000d}] / ECHAR / UCHAR)*) "\"" { |
|
||||||
unescape_unicode_codepoints(&unescape_echars(l)).into_owned() |
|
||||||
} |
|
||||||
|
|
||||||
//[141s] |
|
||||||
BLANK_NODE_LABEL -> BlankNode = "_:" b:$(([0-9] / PN_CHARS_U) PN_CHARS* ("."+ PN_CHARS+)*) { |
|
||||||
bnodes_map.entry(b.to_string()).or_insert_with(BlankNode::default).clone() |
|
||||||
} |
|
||||||
|
|
||||||
//[10] |
|
||||||
UCHAR -> () = "\\u" HEX HEX HEX HEX / "\\U" HEX HEX HEX HEX HEX HEX HEX HEX |
|
||||||
|
|
||||||
//[153s] |
|
||||||
ECHAR -> () = '\\' [tbnrf"'\\] |
|
||||||
|
|
||||||
//[157s] |
|
||||||
PN_CHARS_BASE -> () = [A-Za-z\u{00C0}-\u{00D6}\u{00D8}-\u{00F6}\u{00F8}-\u{02FF}\u{0370}-\u{037D}\u{037F}-\u{1FFF}\u{200C}-\u{200D}\u{2070}-\u{218F}\u{2C00}-\u{2FEF}\u{3001}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFFD}] |
|
||||||
|
|
||||||
//[158s] |
|
||||||
PN_CHARS_U -> () = '_' / ':' / PN_CHARS_BASE |
|
||||||
|
|
||||||
//[160s] |
|
||||||
PN_CHARS -> () = [\-0-9\u{00B7}\u{0300}-\u{036F}\u{203F}-\u{2040}] / PN_CHARS_U |
|
||||||
|
|
||||||
//[162s] |
|
||||||
HEX -> () = ([0-9A-Fa-f]) |
|
||||||
|
|
||||||
|
|
||||||
//space |
|
||||||
_ = #quiet<[ \t]*> |
|
||||||
//comment |
|
||||||
comment = #quiet<"#" [^\r\n]*> |
|
@ -0,0 +1,64 @@ |
|||||||
|
//! Wrapper for RIO parsers
|
||||||
|
|
||||||
|
use crate::model::*; |
||||||
|
use crate::Result; |
||||||
|
use rio_api::model as rio; |
||||||
|
use std::collections::BTreeMap; |
||||||
|
use std::str::FromStr; |
||||||
|
|
||||||
|
pub fn convert_triple( |
||||||
|
value: rio::Triple, |
||||||
|
bnodes_map: &mut BTreeMap<String, BlankNode>, |
||||||
|
) -> Result<Triple> { |
||||||
|
let t = Triple::new( |
||||||
|
convert_named_or_blank_node(value.subject, bnodes_map)?, |
||||||
|
convert_named_node(value.predicate)?, |
||||||
|
convert_term(value.object, bnodes_map)?, |
||||||
|
); |
||||||
|
// println!("{}", t);
|
||||||
|
Ok(t) |
||||||
|
} |
||||||
|
|
||||||
|
fn convert_term(value: rio::Term, bnodes_map: &mut BTreeMap<String, BlankNode>) -> Result<Term> { |
||||||
|
Ok(match value { |
||||||
|
rio::Term::NamedNode(v) => convert_named_node(v)?.into(), |
||||||
|
rio::Term::BlankNode(v) => convert_blank_node(v, bnodes_map).into(), |
||||||
|
rio::Term::Literal(v) => convert_literal(v)?.into(), |
||||||
|
}) |
||||||
|
} |
||||||
|
|
||||||
|
fn convert_named_or_blank_node( |
||||||
|
value: rio::NamedOrBlankNode, |
||||||
|
bnodes_map: &mut BTreeMap<String, BlankNode>, |
||||||
|
) -> Result<NamedOrBlankNode> { |
||||||
|
Ok(match value { |
||||||
|
rio::NamedOrBlankNode::NamedNode(v) => convert_named_node(v)?.into(), |
||||||
|
rio::NamedOrBlankNode::BlankNode(v) => convert_blank_node(v, bnodes_map).into(), |
||||||
|
}) |
||||||
|
} |
||||||
|
|
||||||
|
fn convert_named_node(value: rio::NamedNode) -> Result<NamedNode> { |
||||||
|
NamedNode::from_str(value.iri) |
||||||
|
} |
||||||
|
|
||||||
|
fn convert_blank_node( |
||||||
|
value: rio::BlankNode, |
||||||
|
bnodes_map: &mut BTreeMap<String, BlankNode>, |
||||||
|
) -> BlankNode { |
||||||
|
bnodes_map |
||||||
|
.entry(value.id.to_string()) |
||||||
|
.or_insert_with(BlankNode::default) |
||||||
|
.clone() |
||||||
|
} |
||||||
|
|
||||||
|
fn convert_literal(value: rio::Literal) -> Result<Literal> { |
||||||
|
Ok(match value { |
||||||
|
rio::Literal::Simple { value } => Literal::new_simple_literal(value), |
||||||
|
rio::Literal::LanguageTaggedString { value, language } => { |
||||||
|
Literal::new_language_tagged_literal(value, LanguageTag::parse(language)?) |
||||||
|
} |
||||||
|
rio::Literal::Typed { value, datatype } => { |
||||||
|
Literal::new_typed_literal(value, convert_named_node(datatype)?) |
||||||
|
} |
||||||
|
}) |
||||||
|
} |
@ -0,0 +1,22 @@ |
|||||||
|
//! Implementation of [Turtle](https://www.w3.org/TR/turtle/) RDF syntax
|
||||||
|
|
||||||
|
use crate::model::Triple; |
||||||
|
use crate::rio::rio::convert_triple; |
||||||
|
use crate::Result; |
||||||
|
use rio_api::parser::TripleParser; |
||||||
|
use rio_turtle::TurtleParser; |
||||||
|
use std::collections::BTreeMap; |
||||||
|
use std::io::BufRead; |
||||||
|
use url::Url; |
||||||
|
|
||||||
|
/// Reads a [Turtle](https://www.w3.org/TR/turtle/) file from a Rust `BufRead` and returns an iterator of the read `Triple`s
|
||||||
|
pub fn read_turtle<R: BufRead>( |
||||||
|
reader: R, |
||||||
|
base_url: Option<Url>, |
||||||
|
) -> Result<impl Iterator<Item = Result<Triple>>> { |
||||||
|
let mut bnode_map = BTreeMap::default(); |
||||||
|
Ok( |
||||||
|
TurtleParser::new(reader, base_url.as_ref().map_or("", |url| url.as_str()))? |
||||||
|
.into_iter(move |t| convert_triple(t, &mut bnode_map)), |
||||||
|
) |
||||||
|
} |
@ -1,101 +0,0 @@ |
|||||||
//! Implementation of [Turtle](https://www.w3.org/TR/turtle/) RDF syntax
|
|
||||||
|
|
||||||
mod grammar { |
|
||||||
#![allow(
|
|
||||||
clippy::suspicious_else_formatting, |
|
||||||
clippy::len_zero, |
|
||||||
clippy::single_match, |
|
||||||
clippy::unit_arg, |
|
||||||
clippy::naive_bytecount |
|
||||||
)] |
|
||||||
|
|
||||||
use crate::model::*; |
|
||||||
use crate::rio::utils::unescape_characters; |
|
||||||
use crate::utils::StaticSliceMap; |
|
||||||
use lazy_static::lazy_static; |
|
||||||
use std::borrow::Cow; |
|
||||||
use std::collections::BTreeMap; |
|
||||||
use std::collections::HashMap; |
|
||||||
use std::io::BufReader; |
|
||||||
use std::io::Read; |
|
||||||
use url::ParseOptions; |
|
||||||
use url::Url; |
|
||||||
|
|
||||||
include!(concat!(env!("OUT_DIR"), "/turtle_grammar.rs")); |
|
||||||
|
|
||||||
pub struct ParserState { |
|
||||||
base_uri: Option<Url>, |
|
||||||
namespaces: HashMap<String, Url>, |
|
||||||
cur_subject: Vec<NamedOrBlankNode>, |
|
||||||
cur_predicate: Vec<NamedNode>, |
|
||||||
bnodes_map: BTreeMap<String, BlankNode>, |
|
||||||
} |
|
||||||
|
|
||||||
impl ParserState { |
|
||||||
fn url_parser(&self) -> ParseOptions<'_> { |
|
||||||
Url::options().base_url(self.base_uri.as_ref()) |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// Reads a [Turtle](https://www.w3.org/TR/turtle/) file from a Rust `Read` and returns an iterator on the read `Triple`s
|
|
||||||
///
|
|
||||||
/// Warning: this implementation has not been optimized yet and stores all the found triples in memory.
|
|
||||||
/// This implementation also requires that blank node ids are valid UTF-8
|
|
||||||
pub fn read_turtle<'a, R: Read + 'a>( |
|
||||||
source: R, |
|
||||||
base_uri: impl Into<Option<Url>>, |
|
||||||
) -> super::super::super::Result<impl Iterator<Item = Triple>> { |
|
||||||
let mut state = ParserState { |
|
||||||
base_uri: base_uri.into(), |
|
||||||
namespaces: HashMap::default(), |
|
||||||
cur_subject: Vec::default(), |
|
||||||
cur_predicate: Vec::default(), |
|
||||||
bnodes_map: BTreeMap::default(), |
|
||||||
}; |
|
||||||
let mut triple_buffer = Vec::default(); |
|
||||||
|
|
||||||
let mut string_buffer = String::default(); |
|
||||||
BufReader::new(source).read_to_string(&mut string_buffer)?; |
|
||||||
|
|
||||||
turtleDoc(&string_buffer, &mut state, &mut triple_buffer)?; |
|
||||||
Ok(triple_buffer.into_iter()) |
|
||||||
} |
|
||||||
|
|
||||||
const UNESCAPE_CHARACTERS: [u8; 8] = [b't', b'b', b'n', b'r', b'f', b'"', b'\'', b'\\']; |
|
||||||
lazy_static! { |
|
||||||
static ref UNESCAPE_REPLACEMENT: StaticSliceMap<char, char> = StaticSliceMap::new( |
|
||||||
&['t', 'b', 'n', 'r', 'f', '"', '\'', '\\'], |
|
||||||
&[ |
|
||||||
'\u{0009}', '\u{0008}', '\u{000A}', '\u{000D}', '\u{000C}', '\u{0022}', '\u{0027}', |
|
||||||
'\u{005C}' |
|
||||||
] |
|
||||||
); |
|
||||||
} |
|
||||||
|
|
||||||
fn unescape_echars(input: &str) -> Cow<'_, str> { |
|
||||||
unescape_characters(input, &UNESCAPE_CHARACTERS, &UNESCAPE_REPLACEMENT) |
|
||||||
} |
|
||||||
|
|
||||||
const UNESCAPE_PN_CHARACTERS: [u8; 20] = [ |
|
||||||
b'_', b'~', b'.', b'-', b'!', b'$', b'&', b'\'', b'(', b')', b'*', b'+', b',', b';', b'=', |
|
||||||
b'/', b'?', b'#', b'@', b'%', |
|
||||||
]; |
|
||||||
lazy_static! { |
|
||||||
static ref UNESCAPE_PN_REPLACEMENT: StaticSliceMap<char, char> = StaticSliceMap::new( |
|
||||||
&[ |
|
||||||
'_', '~', '.', '-', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', '/', |
|
||||||
'?', '#', '@', '%' |
|
||||||
], |
|
||||||
&[ |
|
||||||
'_', '~', '.', '-', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', '/', |
|
||||||
'?', '#', '@', '%' |
|
||||||
] |
|
||||||
); |
|
||||||
} |
|
||||||
|
|
||||||
pub fn unescape_pn_local(input: &str) -> Cow<'_, str> { |
|
||||||
unescape_characters(input, &UNESCAPE_PN_CHARACTERS, &UNESCAPE_PN_REPLACEMENT) |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
pub use self::grammar::read_turtle; |
|
@ -1,272 +0,0 @@ |
|||||||
//See https://www.w3.org/TR/turtle/#sec-grammar |
|
||||||
|
|
||||||
use std::char; |
|
||||||
use crate::model::vocab::rdf; |
|
||||||
use crate::model::vocab::xsd; |
|
||||||
use std::str::FromStr; |
|
||||||
use crate::rio::utils::unescape_unicode_codepoints; |
|
||||||
|
|
||||||
#![arguments(state: &mut ParserState, buffer: &mut Vec<Triple>)] |
|
||||||
|
|
||||||
//[1] |
|
||||||
pub turtleDoc -> () = _ (statement _)* |
|
||||||
|
|
||||||
//[2] |
|
||||||
statement -> () = directive / triples "." |
|
||||||
|
|
||||||
//[3] |
|
||||||
directive -> () = prefixID / base / sparqlPrefix / sparqlBase |
|
||||||
|
|
||||||
//[4] |
|
||||||
prefixID -> () = "@prefix" _ ns:PNAME_NS _ i:IRIREF _ "." { |
|
||||||
state.namespaces.insert(ns.into(), i); |
|
||||||
} |
|
||||||
|
|
||||||
//[5] |
|
||||||
base -> () = "@base" _ url:IRIREF _ "." { |
|
||||||
state.base_uri = Some(url); |
|
||||||
} |
|
||||||
|
|
||||||
//[5s] |
|
||||||
sparqlBase -> () = "BASE"i _ url:IRIREF { |
|
||||||
state.base_uri = Some(url); |
|
||||||
} |
|
||||||
|
|
||||||
//[6s] |
|
||||||
sparqlPrefix -> () = "PREFIX"i _ ns:PNAME_NS _ i:IRIREF { |
|
||||||
state.namespaces.insert(ns.into(), i); |
|
||||||
} |
|
||||||
|
|
||||||
//[6] |
|
||||||
triples -> () = subject_push _ predicateObjectList / triples_blankNodePropertyList_push _ predicateObjectList? { |
|
||||||
state.cur_subject.pop(); |
|
||||||
} |
|
||||||
subject_push -> () = s:subject { |
|
||||||
state.cur_subject.push(s) |
|
||||||
} |
|
||||||
triples_blankNodePropertyList_push -> () = s: blankNodePropertyList { |
|
||||||
state.cur_subject.push(s) |
|
||||||
} |
|
||||||
|
|
||||||
//[7] |
|
||||||
predicateObjectList -> () = predicateObject (";" _ predicateObject?)* |
|
||||||
predicateObject -> () = predicate_push _ objectList _ { |
|
||||||
state.cur_predicate.pop(); |
|
||||||
} |
|
||||||
predicate_push -> () = v:verb { |
|
||||||
state.cur_predicate.push(v) |
|
||||||
} |
|
||||||
|
|
||||||
//[8] |
|
||||||
objectList -> () = object _ ("," _ object _)* |
|
||||||
|
|
||||||
//[9] |
|
||||||
verb -> NamedNode = predicate / |
|
||||||
"a" { rdf::TYPE.clone() } |
|
||||||
|
|
||||||
// [10] |
|
||||||
subject -> NamedOrBlankNode = |
|
||||||
i:iri { i.into() } / |
|
||||||
b:BlankNode { b.into() } / |
|
||||||
c:collection { c } |
|
||||||
|
|
||||||
//[11] |
|
||||||
predicate -> NamedNode = iri |
|
||||||
|
|
||||||
// [12] |
|
||||||
object -> () = o:object_value {? |
|
||||||
match state.cur_subject.last() { |
|
||||||
Some(s) => match state.cur_predicate.last() { |
|
||||||
Some(p) => { |
|
||||||
buffer.push(Triple::new(s.clone(), p.clone(), o)); |
|
||||||
Ok(()) |
|
||||||
} |
|
||||||
None => Err("Predicate not found") |
|
||||||
}, |
|
||||||
None => Err("Subject not found") |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
object_value -> Term = |
|
||||||
i:iri { i.into() } / |
|
||||||
b:BlankNode { b.into() } / |
|
||||||
c:collection { c.into() } / |
|
||||||
b:blankNodePropertyList { b.into() } / |
|
||||||
l:literal { l.into() } |
|
||||||
|
|
||||||
//[13] |
|
||||||
literal -> Literal = RDFLiteral / NumericLiteral / BooleanLiteral |
|
||||||
|
|
||||||
//[14] |
|
||||||
blankNodePropertyList -> NamedOrBlankNode = blankNodePropertyList_open _ predicateObjectList _ "]" {? |
|
||||||
state.cur_subject.pop().ok_or("No subject found in the stack") |
|
||||||
} |
|
||||||
blankNodePropertyList_open -> () = "[" { |
|
||||||
state.cur_subject.push(BlankNode::default().into()) |
|
||||||
} |
|
||||||
|
|
||||||
//[15] |
|
||||||
collection -> NamedOrBlankNode = '(' _ o:(collection_value*) ')' { |
|
||||||
let mut current_list_node = NamedOrBlankNode::from(rdf::NIL.clone()); |
|
||||||
for obj in o.into_iter().rev() { |
|
||||||
let new_blank_node = NamedOrBlankNode::from(BlankNode::default()); |
|
||||||
buffer.push(Triple::new(new_blank_node.clone(), rdf::FIRST.clone(), obj)); |
|
||||||
buffer.push(Triple::new(new_blank_node.clone(), rdf::REST.clone(), current_list_node)); |
|
||||||
current_list_node = new_blank_node; |
|
||||||
} |
|
||||||
current_list_node |
|
||||||
} |
|
||||||
collection_value -> Term = o:object_value _ { o } |
|
||||||
|
|
||||||
//[16] |
|
||||||
NumericLiteral -> Literal = |
|
||||||
d:$(DOUBLE) {? match f64::from_str(d) { |
|
||||||
Ok(value) => Ok(value.into()), |
|
||||||
Err(_) => Err("Invalid xsd:double") |
|
||||||
} } / |
|
||||||
d:$(DECIMAL) { Literal::new_typed_literal(d, xsd::DECIMAL.clone()) } / |
|
||||||
i:$(INTEGER) { Literal::new_typed_literal(i, xsd::INTEGER.clone()) } |
|
||||||
|
|
||||||
//[128s] |
|
||||||
RDFLiteral -> Literal = |
|
||||||
v:String _ "^^" _ t:iri { Literal::new_typed_literal(v, t) } / |
|
||||||
v:String _ l:LANGTAG { Literal::new_language_tagged_literal(v, l) } / |
|
||||||
v:String { Literal::new_simple_literal(v) } |
|
||||||
|
|
||||||
//[133s] |
|
||||||
BooleanLiteral -> Literal = |
|
||||||
"true" { true.into() } / |
|
||||||
"false" { false.into() } |
|
||||||
|
|
||||||
//[17] |
|
||||||
String -> String = STRING_LITERAL_LONG_SINGLE_QUOTE / STRING_LITERAL_LONG_QUOTE / STRING_LITERAL_QUOTE / STRING_LITERAL_SINGLE_QUOTE |
|
||||||
|
|
||||||
//[135s] |
|
||||||
iri -> NamedNode = i:(IRIREF / PrefixedName) { |
|
||||||
i.into() |
|
||||||
} |
|
||||||
|
|
||||||
//[136s] |
|
||||||
PrefixedName -> Url = PNAME_LN / |
|
||||||
ns:PNAME_NS {? state.namespaces.get(ns).cloned().ok_or("Prefix not found") } |
|
||||||
|
|
||||||
//[137s] |
|
||||||
BlankNode -> BlankNode = |
|
||||||
b:BLANK_NODE_LABEL { state.bnodes_map.entry(b.to_string()).or_insert_with(BlankNode::default).clone() } / |
|
||||||
ANON { BlankNode::default() } |
|
||||||
|
|
||||||
//[18] |
|
||||||
IRIREF -> Url = "<" i:$(([^\u{00}-\u{20}<>"{}|^\u{60}\u{5c}] / UCHAR)*) ">" {? |
|
||||||
match state.url_parser().parse(&unescape_unicode_codepoints(i)) { |
|
||||||
Ok(url) => Ok(url), |
|
||||||
Err(error) => Err("IRI parsing failed") |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
//[139s] |
|
||||||
PNAME_NS -> &'input str = ns:$(PN_PREFIX? ":") { |
|
||||||
ns |
|
||||||
} |
|
||||||
|
|
||||||
//[140s] |
|
||||||
PNAME_LN -> Url = ns:$(PNAME_NS) local:$(PN_LOCAL) {? |
|
||||||
match state.namespaces.get(ns) { |
|
||||||
Some(ns) => match Url::parse(&(ns.to_string() + &unescape_pn_local(local))) { |
|
||||||
Ok(url) => Ok(url), |
|
||||||
Err(error) => Err("IRI parsing failed") |
|
||||||
}, |
|
||||||
None => Err("Prefix not found") |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
//[141s] |
|
||||||
BLANK_NODE_LABEL -> &'input str = "_:" b:$(([0-9] / PN_CHARS_U) PN_CHARS* ("."+ PN_CHARS+)*) { |
|
||||||
b |
|
||||||
} |
|
||||||
|
|
||||||
//[144s] |
|
||||||
LANGTAG -> LanguageTag = "@" l:$([a-zA-Z]+ ("-" [a-zA-Z0-9]+)*) {? |
|
||||||
match LanguageTag::parse(&l) { |
|
||||||
Ok(langtag) => Ok(langtag), |
|
||||||
Err(error) => Err("language tag parsing failed") |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
//[19] |
|
||||||
INTEGER -> () = [+-]? [0-9]+ |
|
||||||
|
|
||||||
//[20] |
|
||||||
DECIMAL -> () = [+-]? [0-9]* "." [0-9]+ |
|
||||||
|
|
||||||
//[21] |
|
||||||
DOUBLE -> () = [+-]? ([0-9]+ "." [0-9]* / "."? [0-9]+) EXPONENT |
|
||||||
|
|
||||||
//[154s] |
|
||||||
EXPONENT -> () = [eE] [+-]? [0-9]+ |
|
||||||
|
|
||||||
//[22] |
|
||||||
STRING_LITERAL_QUOTE -> String = "\"" l: $(([^"\u{005c}\u{000a}\u{000d}] / ECHAR / UCHAR)*) "\"" { |
|
||||||
unescape_unicode_codepoints(&unescape_echars(l)).into_owned() |
|
||||||
} |
|
||||||
|
|
||||||
//[23] |
|
||||||
STRING_LITERAL_SINGLE_QUOTE -> String = "'" l:$(([^'\u{005c}\u{000a}\u{000d}] / ECHAR / UCHAR)*) "'" { |
|
||||||
unescape_unicode_codepoints(&unescape_echars(l)).into_owned() |
|
||||||
} |
|
||||||
|
|
||||||
//[24] |
|
||||||
STRING_LITERAL_LONG_SINGLE_QUOTE -> String = "'''" l:$(STRING_LITERAL_LONG_SINGLE_QUOTE_inner*) "'''" { |
|
||||||
unescape_unicode_codepoints(&unescape_echars(l)).into_owned() |
|
||||||
} |
|
||||||
STRING_LITERAL_LONG_SINGLE_QUOTE_inner -> () = ("''" / "'")? ([^'\u{005c}] / ECHAR / UCHAR) |
|
||||||
|
|
||||||
//[25] |
|
||||||
STRING_LITERAL_LONG_QUOTE -> String = "\"\"\"" l:$(STRING_LITERAL_LONG_QUOTE_inner*) "\"\"\"" { |
|
||||||
unescape_unicode_codepoints(&unescape_echars(l)).into_owned() |
|
||||||
} |
|
||||||
STRING_LITERAL_LONG_QUOTE_inner -> () = ("\"\"" / "\"")? ([^"\u{005c}] / ECHAR / UCHAR) |
|
||||||
|
|
||||||
//[26] |
|
||||||
UCHAR -> () = "\\u" HEX HEX HEX HEX / "\\U" HEX HEX HEX HEX HEX HEX HEX HEX |
|
||||||
|
|
||||||
//[159s] |
|
||||||
ECHAR -> () = "\\" [tbnrf"'\\] |
|
||||||
|
|
||||||
//[161s] |
|
||||||
WS -> () = #quiet<[\u{20}\u{9}\u{D}\u{A}]> |
|
||||||
|
|
||||||
//[162s] |
|
||||||
ANON -> () = "[" WS* "]" |
|
||||||
|
|
||||||
//[163s] |
|
||||||
PN_CHARS_BASE -> () = [A-Za-z\u{00C0}-\u{00D6}\u{00D8}-\u{00F6}\u{00F8}-\u{02FF}\u{0370}-\u{037D}\u{037F}-\u{1FFF}\u{200C}-\u{200D}\u{2070}-\u{218F}\u{2C00}-\u{2FEF}\u{3001}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFFD}] |
|
||||||
|
|
||||||
//[164s] |
|
||||||
PN_CHARS_U -> () = "_" / PN_CHARS_BASE |
|
||||||
|
|
||||||
//[166s] |
|
||||||
PN_CHARS -> () = [\-0-9\u{00B7}\u{0300}-\u{036F}\u{203F}-\u{2040}] / PN_CHARS_U |
|
||||||
|
|
||||||
//[167s] |
|
||||||
PN_PREFIX -> () = PN_CHARS_BASE PN_CHARS* ("."+ PN_CHARS+)* |
|
||||||
|
|
||||||
//[168s] |
|
||||||
PN_LOCAL -> () = (PN_CHARS_U / ':' / [0-9] / PLX) (PN_CHARS / ':' / PLX)* ('.'+ (PN_CHARS / ':' / PLX)+)? |
|
||||||
|
|
||||||
//[169s] |
|
||||||
PLX -> () = PERCENT / PN_LOCAL_ESC |
|
||||||
|
|
||||||
//[170s] |
|
||||||
PERCENT -> () = "%" HEX HEX |
|
||||||
|
|
||||||
//[171s] |
|
||||||
HEX -> () = ([0-9A-Fa-f]) |
|
||||||
|
|
||||||
//[172s] |
|
||||||
PN_LOCAL_ESC -> () = "\\" [_~\.\-!$&'()*+,;=/?#@%] |
|
||||||
|
|
||||||
|
|
||||||
//space |
|
||||||
_ = #quiet<([ \t\n\r] / comment)*> |
|
||||||
//comment |
|
||||||
comment = #quiet<"#" [^\r\n]*> |
|
Loading…
Reference in new issue