parent
005271c348
commit
4c7fd0168b
@ -1,5 +1,6 @@ |
||||
extern crate peg; |
||||
|
||||
fn main() { |
||||
peg::cargo_build("src/rio/ntriples/grammar.rustpeg"); |
||||
peg::cargo_build("src/rio/ntriples/ntriples_grammar.rustpeg"); |
||||
peg::cargo_build("src/rio/turtle/turtle_grammar.rustpeg"); |
||||
} |
||||
|
@ -0,0 +1,42 @@ |
||||
/// Implements https://www.w3.org/TR/turtle/
|
||||
|
||||
mod grammar { |
||||
include!(concat!(env!("OUT_DIR"), "/turtle_grammar.rs")); |
||||
} |
||||
|
||||
use model::data::*; |
||||
use rio::*; |
||||
use std::collections::HashMap; |
||||
use std::io::BufReader; |
||||
use std::io::Read; |
||||
|
||||
//TODO: make private
|
||||
pub struct ParserState { |
||||
pub base_uri: String, |
||||
pub namespaces: HashMap<String, String>, |
||||
pub cur_subject: Vec<NamedOrBlankNode>, |
||||
pub cur_predicate: Vec<NamedNode>, |
||||
} |
||||
|
||||
pub fn read_turtle<'a, R: Read + 'a>( |
||||
source: R, |
||||
data_factory: &'a DataFactory, |
||||
) -> RioResult<impl Iterator<Item = Triple>> { |
||||
let factory = data_factory.clone(); //TODO: try to avoid clone here
|
||||
let mut state = ParserState { |
||||
base_uri: String::default(), |
||||
namespaces: HashMap::default(), |
||||
cur_subject: Vec::default(), |
||||
cur_predicate: Vec::default(), |
||||
}; |
||||
let mut string_buffer = String::default(); |
||||
let mut triple_buffer = Vec::default(); |
||||
match BufReader::new(source).read_to_string(&mut string_buffer) { |
||||
Ok(_) => match grammar::turtleDoc(&string_buffer, &mut state, &mut triple_buffer, &factory) |
||||
{ |
||||
Ok(_) => Ok(triple_buffer.into_iter()), |
||||
Err(error) => Err(RioError::new(error)), |
||||
}, |
||||
Err(error) => Err(RioError::new(error)), |
||||
} |
||||
} |
@ -0,0 +1,275 @@ |
||||
//See https://www.w3.org/TR/turtle/#sec-grammar |
||||
|
||||
use std::char; |
||||
use std::iter; |
||||
use model::data::*; |
||||
use rio::turtle::ParserState; |
||||
|
||||
#![arguments(state: &mut ParserState, buffer: &mut Vec<Triple>, data_factory: &DataFactory)] |
||||
|
||||
//[1] |
||||
#[pub] |
||||
turtleDoc -> () = _ (statement _)* |
||||
|
||||
//[2] |
||||
statement -> () = directive / triples "." |
||||
|
||||
//[3] |
||||
directive -> () = prefixID / base / sparqlPrefix / sparqlBase |
||||
|
||||
//[4] |
||||
prefixID -> () = "@prefix" _ ns:PNAME_NS _ i:IRIREF _ "." { |
||||
state.namespaces.insert(ns.into(), i.into()); |
||||
} |
||||
|
||||
//[5] |
||||
base -> () = "@base" _ i:IRIREF _ "." { |
||||
state.base_uri = i.into(); |
||||
} |
||||
|
||||
//[5s] |
||||
sparqlBase -> () = "BASE"i _ i:IRIREF { |
||||
state.base_uri = i.into(); |
||||
} |
||||
|
||||
//[6s] |
||||
sparqlPrefix -> () = "PREFIX"i _ ns:PNAME_NS _ i:IRIREF { |
||||
state.namespaces.insert(ns.into(), i.into()); |
||||
} |
||||
|
||||
//[6] |
||||
triples -> () = subject_push _ predicateObjectList / triples_blankNodePropertyList_push _ predicateObjectList? |
||||
subject_push -> () = s:subject { |
||||
state.cur_subject.push(s) |
||||
} |
||||
triples_blankNodePropertyList_push -> () = s: blankNodePropertyList { |
||||
state.cur_subject.push(s) |
||||
} |
||||
|
||||
//[7] |
||||
predicateObjectList -> () = predicate_push _ objectList _ (";" _ (predicate_push _ objectList _)?)* |
||||
predicate_push -> () = v:verb { |
||||
state.cur_predicate.push(v) |
||||
} |
||||
|
||||
//[8] |
||||
objectList -> () = object _ ("," _ object _)* |
||||
|
||||
//[9] |
||||
verb -> NamedNode = predicate / |
||||
"a" { data_factory.named_node("http://www.w3.org/1999/02/22-rdf-syntax-ns#type") } |
||||
|
||||
// [10] |
||||
subject -> NamedOrBlankNode = |
||||
i:iri { i.into() } / |
||||
b:BlankNode { b.into() } / |
||||
c:collection { c } |
||||
|
||||
//[11] |
||||
predicate -> NamedNode = iri |
||||
|
||||
// [12] |
||||
object -> () = o:object_value {? |
||||
match state.cur_subject.last() { |
||||
Some(s) => match state.cur_predicate.last() { |
||||
Some(p) => { |
||||
buffer.push(data_factory.triple(s.clone(), p.clone(), o)); |
||||
Ok(()) |
||||
} |
||||
None => Err("Predicate not found") |
||||
}, |
||||
None => Err("Subject not found") |
||||
} |
||||
} |
||||
|
||||
object_value -> Term = |
||||
i:iri { i.into() } / |
||||
b:BlankNode { b.into() } / |
||||
c:collection { c.into() } / |
||||
b:blankNodePropertyList { b.into() } / |
||||
l:literal { l.into() } |
||||
|
||||
//[13] |
||||
literal -> Literal = RDFLiteral / NumericLiteral / BooleanLiteral |
||||
|
||||
//[14] |
||||
blankNodePropertyList -> NamedOrBlankNode = blankNodePropertyList_open _ predicateObjectList _ "]" {? |
||||
state.cur_subject.pop().ok_or("No subject found in the stack") |
||||
} |
||||
blankNodePropertyList_open -> () = "[" { |
||||
state.cur_subject.push(data_factory.new_blank_node().into()) |
||||
} |
||||
|
||||
//[15] |
||||
collection -> NamedOrBlankNode = '(' _ o:(collection_value*) ')' { |
||||
let mut current_list_node = NamedOrBlankNode::from(data_factory.named_node("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil")); |
||||
for obj in o.into_iter().rev() { |
||||
let new_blank_node = NamedOrBlankNode::from(data_factory.new_blank_node()); |
||||
buffer.push(data_factory.triple(new_blank_node.clone(), data_factory.named_node("http://www.w3.org/1999/02/22-rdf-syntax-ns#first"), obj)); |
||||
buffer.push(data_factory.triple(new_blank_node.clone(), data_factory.named_node("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"), current_list_node)); |
||||
current_list_node = new_blank_node; |
||||
} |
||||
current_list_node |
||||
} |
||||
collection_value -> Term = o:object_value _ { o } |
||||
|
||||
//[16] |
||||
NumericLiteral -> Literal = |
||||
i:$(INTEGER) { data_factory.typed_literal(i, data_factory.named_node("http://www.w3.org/2001/XMLSchema#integer")) } / |
||||
d:$(DECIMAL) { data_factory.typed_literal(d, data_factory.named_node("http://www.w3.org/2001/XMLSchema#decimal")) } / |
||||
d:$(DOUBLE) { data_factory.typed_literal(d, data_factory.named_node("http://www.w3.org/2001/XMLSchema#double")) } |
||||
|
||||
//[128s] |
||||
RDFLiteral -> Literal = |
||||
v:String _ "^^" _ t:iri { data_factory.typed_literal(v, t) } / |
||||
v:String _ l:LANGTAG { data_factory.language_tagged_literal(v, l) } / |
||||
v:String { data_factory.simple_literal(v) } |
||||
|
||||
//[133s] |
||||
BooleanLiteral -> Literal = |
||||
"true" { data_factory.typed_literal("true", data_factory.named_node("http://www.w3.org/2001/XMLSchema#boolean")) } / |
||||
"false" { data_factory.typed_literal("false", data_factory.named_node("http://www.w3.org/2001/XMLSchema#boolean")) } |
||||
|
||||
//[17] |
||||
String -> String = STRING_LITERAL_QUOTE / STRING_LITERAL_SINGLE_QUOTE / STRING_LITERAL_LONG_SINGLE_QUOTE / STRING_LITERAL_LONG_QUOTE |
||||
|
||||
//[135s] |
||||
iri -> NamedNode = i:(IRIREF / PrefixedName) { |
||||
data_factory.named_node(i) |
||||
} |
||||
|
||||
//[136s] |
||||
PrefixedName -> String = PNAME_LN / |
||||
ns:PNAME_NS {? state.namespaces.get(ns).map(|v| v.clone()).ok_or("Prefix not found") } |
||||
|
||||
//[137s] |
||||
BlankNode -> BlankNode = |
||||
b:BLANK_NODE_LABEL { data_factory.blank_node(b) } / |
||||
ANON { data_factory.new_blank_node() } |
||||
|
||||
//[18] |
||||
IRIREF -> String = "<" _ i:((_IRIREF_simple_char / UCHAR)*) _ ">" { |
||||
//TODO: relative URIs resolution |
||||
i.into_iter().collect() |
||||
} |
||||
_IRIREF_simple_char -> char = c:$([^\u{00}-\u{20}<>"{}|^\u{60}\u{5c}]) { c.chars().next().unwrap() } |
||||
|
||||
//[139s] |
||||
PNAME_NS -> &'input str = ns:$(PN_PREFIX? ":") { |
||||
ns |
||||
} |
||||
|
||||
//[140s] |
||||
PNAME_LN -> String = ns:$(PNAME_NS) local:$(PN_LOCAL) {? |
||||
state.namespaces.get(ns).map(|v| v.clone() + local).ok_or("Prefix not found") |
||||
} |
||||
|
||||
//[141s] |
||||
BLANK_NODE_LABEL -> &'input str = "_:" b:$((PN_CHARS_U / [0-9]) ((PN_CHARS / ".")* PN_CHARS)?) { |
||||
//TODO unescape |
||||
b |
||||
} |
||||
|
||||
//[144s] |
||||
LANGTAG -> &'input str = "@" l:$([a-zA-Z]+ ("-" [a-zA-Z0-9]+)*) { |
||||
l |
||||
} |
||||
|
||||
//[19] |
||||
INTEGER -> () = [+-]? [0-9]+ |
||||
|
||||
//[20] |
||||
DECIMAL -> () = [+-]? [0-9]* "." [0-9]+ |
||||
|
||||
//[21] |
||||
DOUBLE -> () = [+-]? ([0-9]+ "." [0-9]* EXPONENT / "." [0-9]+ EXPONENT / [0-9]+ EXPONENT) |
||||
|
||||
//[154s] |
||||
EXPONENT -> () = [eE] [+-]? [0-9]+ |
||||
|
||||
//[22] |
||||
STRING_LITERAL_QUOTE -> String = "\"" l:((STRING_LITERAL_QUOTE_simple_char / ECHAR / UCHAR)*) "\"" { |
||||
l.into_iter().collect() |
||||
} |
||||
STRING_LITERAL_QUOTE_simple_char -> char = c:$([^\u{0022}\u{005c}\u{000a}\u{000d}]) { c.chars().next().unwrap() } |
||||
|
||||
//[23] |
||||
STRING_LITERAL_SINGLE_QUOTE -> String = "'" l:((STRING_LITERAL_SINGLE_QUOTE_simple_char / ECHAR / UCHAR)*) "'" { |
||||
l.into_iter().collect() |
||||
} |
||||
STRING_LITERAL_SINGLE_QUOTE_simple_char -> char = c:$([^\u{0027}\u{005c}\u{000a}\u{000d}]) { c.chars().next().unwrap() } |
||||
|
||||
//[24] |
||||
STRING_LITERAL_LONG_SINGLE_QUOTE -> String = "'''" ("'" / "''")? l:((STRING_LITERAL_LONG_SINGLE_QUOTE_simple_char / ECHAR / UCHAR)*) "'''" { |
||||
l.into_iter().collect() |
||||
} |
||||
STRING_LITERAL_LONG_SINGLE_QUOTE_simple_char -> char = c:$([^\u{0027}\u{005c}]) { c.chars().next().unwrap() } |
||||
|
||||
//[25] |
||||
STRING_LITERAL_LONG_QUOTE -> String = "\"\"\"" ("\"" / "\"\"")? l:((STRING_LITERAL_LONG_QUOTE_simple_char / ECHAR / UCHAR)*) "\"\"\"" { |
||||
l.into_iter().collect() |
||||
} |
||||
STRING_LITERAL_LONG_QUOTE_simple_char -> char = c:$([^\u{0022}\u{005c}]) { c.chars().next().unwrap() } |
||||
|
||||
//[26] |
||||
UCHAR -> char = "\\u" h:$(HEX HEX HEX HEX) { |
||||
u32::from_str_radix(h, 16).ok().and_then(char::from_u32).unwrap() |
||||
} / "\\U" h:$(HEX HEX HEX HEX HEX HEX HEX HEX) { |
||||
u32::from_str_radix(h, 16).ok().and_then(char::from_u32).unwrap() |
||||
} |
||||
|
||||
//[159s] |
||||
ECHAR -> char = "\\" c:$([tbnrf"'\\]) { |
||||
match c { |
||||
"t" => '\u{0009}', |
||||
"b" => '\u{0008}', |
||||
"n" => '\u{000A}', |
||||
"r" => '\u{000D}', |
||||
"f" => '\u{000C}', |
||||
"\"" => '\u{0022}', |
||||
"'" => '\u{0027}', |
||||
"\\" => '\u{005C}', |
||||
_ => panic!("unexpected escaped char") // not possible |
||||
} |
||||
} |
||||
|
||||
//[161s] |
||||
WS -> () = #quiet<[\u{20}\u{9}\u{D}\u{A}]> |
||||
|
||||
//[162s] |
||||
ANON -> () = "[" WS* "]" |
||||
|
||||
//[163s] |
||||
PN_CHARS_BASE -> () = [A-Za-z\u{00C0}-\u{00D6}\u{00D8}-\u{00F6}\u{00F8}-\u{02FF}\u{0370}-\u{037D}\u{037F}-\u{1FFF}\u{200C}-\u{200D}\u{2070}-\u{218F}\u{2C00}-\u{2FEF}\u{3001}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFFD}] |
||||
|
||||
//[164s] |
||||
PN_CHARS_U -> () = PN_CHARS_BASE / "_" |
||||
|
||||
//[166s] |
||||
PN_CHARS -> () = PN_CHARS_U / [\-0-9\u{00B7}\u{0300}-\u{036F}\u{203F}-\u{2040}] |
||||
|
||||
//[167s] |
||||
PN_PREFIX -> () = PN_CHARS_BASE PN_CHARS* ("." PN_CHARS+)* |
||||
|
||||
//[168s] |
||||
PN_LOCAL -> () = (PN_CHARS_U / ":" / [0-9] / PLX) (PN_CHARS / ":" / PLX)* ("." (PN_CHARS / ":" / PLX)+)* |
||||
|
||||
//[169s] |
||||
PLX -> String = |
||||
p:$(PERCENT) { p.into() } / |
||||
e:PN_LOCAL_ESC { iter::once(e).collect() } |
||||
|
||||
//[170s] |
||||
PERCENT -> () = "%" HEX HEX |
||||
|
||||
//[171s] |
||||
HEX -> () = ([0-9A-Fa-f]) |
||||
|
||||
//[172s] |
||||
PN_LOCAL_ESC -> char = "\\" c:$([_~\.\-!$&'()*+,;=/?#@%]) { c.chars().next().unwrap() } |
||||
|
||||
|
||||
//space |
||||
_ = #quiet<([ \t\n\r] / comment)*> |
||||
//comment |
||||
comment = #quiet<"#" [^\r\n]*> |
Loading…
Reference in new issue