//See https://www.w3.org/TR/turtle/#sec-grammar use std::char; use url::Url; use model::data::*; use std::str::FromStr; use std::iter; use rio::turtle::ParserState; #![arguments(state: &mut ParserState, buffer: &mut Vec, data_factory: &DataFactory)] //[1] #[pub] turtleDoc -> () = _ (statement _)* //[2] statement -> () = directive / triples "." //[3] directive -> () = prefixID / base / sparqlPrefix / sparqlBase //[4] prefixID -> () = "@prefix" _ ns:PNAME_NS _ i:IRIREF _ "." { state.namespaces.insert(ns.into(), i.into()); } //[5] base -> () = "@base" _ i:IRIREF _ "." {? match Url::parse(&i) { Ok(url) => { state.base_uri = Some(url); Ok(()) }, Err(error) => Err("IRI parsing failed") } } //[5s] sparqlBase -> () = "BASE"i _ i:IRIREF {? match Url::parse(&i) { Ok(url) => { state.base_uri = Some(url); Ok(()) }, Err(error) => Err("IRI parsing failed") } } //[6s] sparqlPrefix -> () = "PREFIX"i _ ns:PNAME_NS _ i:IRIREF { state.namespaces.insert(ns.into(), i.into()); } //[6] triples -> () = subject_push _ predicateObjectList / triples_blankNodePropertyList_push _ predicateObjectList? subject_push -> () = s:subject { state.cur_subject.push(s) } triples_blankNodePropertyList_push -> () = s: blankNodePropertyList { state.cur_subject.push(s) } //[7] predicateObjectList -> () = predicate_push _ objectList _ (";" _ (predicate_push _ objectList _)?)* predicate_push -> () = v:verb { state.cur_predicate.push(v) } //[8] objectList -> () = object _ ("," _ object _)* //[9] verb -> NamedNode = predicate / "a" { NamedNode::from_str("http://www.w3.org/1999/02/22-rdf-syntax-ns#type").unwrap() } // [10] subject -> NamedOrBlankNode = i:iri { i.into() } / b:BlankNode { b.into() } / c:collection { c } //[11] predicate -> NamedNode = iri // [12] object -> () = o:object_value {? match state.cur_subject.last() { Some(s) => match state.cur_predicate.last() { Some(p) => { buffer.push(data_factory.triple(s.clone(), p.clone(), o)); Ok(()) } None => Err("Predicate not found") }, None => Err("Subject not found") } } object_value -> Term = i:iri { i.into() } / b:BlankNode { b.into() } / c:collection { c.into() } / b:blankNodePropertyList { b.into() } / l:literal { l.into() } //[13] literal -> Literal = RDFLiteral / NumericLiteral / BooleanLiteral //[14] blankNodePropertyList -> NamedOrBlankNode = blankNodePropertyList_open _ predicateObjectList _ "]" {? state.cur_subject.pop().ok_or("No subject found in the stack") } blankNodePropertyList_open -> () = "[" { state.cur_subject.push(data_factory.new_blank_node().into()) } //[15] collection -> NamedOrBlankNode = '(' _ o:(collection_value*) ')' { let first = NamedNode::from_str("http://www.w3.org/1999/02/22-rdf-syntax-ns#first").unwrap(); let rest = NamedNode::from_str("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest").unwrap(); let mut current_list_node = NamedOrBlankNode::from(NamedNode::from_str("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil").unwrap()); for obj in o.into_iter().rev() { let new_blank_node = NamedOrBlankNode::from(data_factory.new_blank_node()); buffer.push(data_factory.triple(new_blank_node.clone(), first.clone(), obj)); buffer.push(data_factory.triple(new_blank_node.clone(), rest.clone(), current_list_node)); current_list_node = new_blank_node; } current_list_node } collection_value -> Term = o:object_value _ { o } //[16] NumericLiteral -> Literal = d:$(DOUBLE) { data_factory.typed_literal(d, NamedNode::from_str("http://www.w3.org/2001/XMLSchema#double").unwrap()) } / d:$(DECIMAL) { data_factory.typed_literal(d, NamedNode::from_str("http://www.w3.org/2001/XMLSchema#decimal").unwrap()) } / i:$(INTEGER) { data_factory.typed_literal(i, NamedNode::from_str("http://www.w3.org/2001/XMLSchema#integer").unwrap()) } //[128s] RDFLiteral -> Literal = v:String _ "^^" _ t:iri { data_factory.typed_literal(v, t) } / v:String _ l:LANGTAG { data_factory.language_tagged_literal(v, l) } / v:String { v.into() } //[133s] BooleanLiteral -> Literal = "true" { true.into() } / "false" { false.into() } //[17] String -> String = STRING_LITERAL_LONG_SINGLE_QUOTE / STRING_LITERAL_LONG_QUOTE / STRING_LITERAL_QUOTE / STRING_LITERAL_SINGLE_QUOTE //[135s] iri -> NamedNode = i:(IRIREF / PrefixedName) {? match state.url_parser().parse(&i) { Ok(url) => Ok(data_factory.named_node(url)), Err(error) => Err("IRI parsing failed") } } //[136s] PrefixedName -> String = PNAME_LN / ns:PNAME_NS {? state.namespaces.get(ns).map(|v| v.clone()).ok_or("Prefix not found") } //[137s] BlankNode -> BlankNode = b:BLANK_NODE_LABEL { data_factory.blank_node(b) } / ANON { data_factory.new_blank_node() } //[18] IRIREF -> String = "<" i:((_IRIREF_simple_char / UCHAR)*) ">" { i.into_iter().collect() } _IRIREF_simple_char -> char = c:$([^\u{00}-\u{20}<>"{}|^\u{60}\u{5c}]) { c.chars().next().unwrap() } //[139s] PNAME_NS -> &'input str = ns:$(PN_PREFIX? ":") { ns } //[140s] PNAME_LN -> String = ns:$(PNAME_NS) local:$(PN_LOCAL) {? state.namespaces.get(ns).map(|v| v.clone() + local).ok_or("Prefix not found") } //[141s] BLANK_NODE_LABEL -> &'input str = "_:" b:$(([0-9] / PN_CHARS_U) PN_CHARS* ("."+ PN_CHARS+)*) { //TODO unescape b } //[144s] LANGTAG -> &'input str = "@" l:$([a-zA-Z]+ ("-" [a-zA-Z0-9]+)*) { l } //[19] INTEGER -> () = [+-]? [0-9]+ //[20] DECIMAL -> () = [+-]? [0-9]* "." [0-9]+ //[21] DOUBLE -> () = [+-]? ([0-9]+ "." [0-9]* / "."? [0-9]+) EXPONENT //[154s] EXPONENT -> () = [eE] [+-]? [0-9]+ //[22] STRING_LITERAL_QUOTE -> String = "\"" l:((STRING_LITERAL_QUOTE_simple_char / ECHAR / UCHAR)*) "\"" { l.into_iter().collect() } STRING_LITERAL_QUOTE_simple_char -> char = c:$([^"\u{005c}\u{000a}\u{000d}]) { c.chars().next().unwrap() } //[23] STRING_LITERAL_SINGLE_QUOTE -> String = "'" l:((STRING_LITERAL_SINGLE_QUOTE_simple_char / ECHAR / UCHAR)*) "'" { l.into_iter().collect() } STRING_LITERAL_SINGLE_QUOTE_simple_char -> char = c:$([^'\u{005c}\u{000a}\u{000d}]) { c.chars().next().unwrap() } //[24] STRING_LITERAL_LONG_SINGLE_QUOTE -> String = "'''" l:(STRING_LITERAL_LONG_SINGLE_QUOTE_inner*) "'''" { l.into_iter().collect() } STRING_LITERAL_LONG_SINGLE_QUOTE_inner -> String = a:$(("''" / "'")?) b:(STRING_LITERAL_LONG_SINGLE_QUOTE_simple_char / ECHAR / UCHAR) { let mut s = a.to_string(); s.push(b); s } STRING_LITERAL_LONG_SINGLE_QUOTE_simple_char -> char = c:$([^'\u{005c}]) { c.chars().next().unwrap() } //[25]abc""def''ghi" STRING_LITERAL_LONG_QUOTE -> String = "\"\"\"" l:(STRING_LITERAL_LONG_QUOTE_inner*) "\"\"\"" { l.into_iter().collect() } STRING_LITERAL_LONG_QUOTE_inner -> String = a:$(("\"\"" / "\"")?) b:(STRING_LITERAL_LONG_QUOTE_simple_char / ECHAR / UCHAR) { let mut s = a.to_string(); s.push(b); s } STRING_LITERAL_LONG_QUOTE_simple_char -> char = c:$([^"\u{005c}]) { c.chars().next().unwrap() } //[26] UCHAR -> char = "\\u" h:$(HEX HEX HEX HEX) { u32::from_str_radix(h, 16).ok().and_then(char::from_u32).unwrap() } / "\\U" h:$(HEX HEX HEX HEX HEX HEX HEX HEX) { u32::from_str_radix(h, 16).ok().and_then(char::from_u32).unwrap() } //[159s] ECHAR -> char = "\\" c:$([tbnrf"'\\]) { match c { "t" => '\u{0009}', "b" => '\u{0008}', "n" => '\u{000A}', "r" => '\u{000D}', "f" => '\u{000C}', "\"" => '\u{0022}', "'" => '\u{0027}', "\\" => '\u{005C}', _ => panic!("unexpected escaped char") // not possible } } //[161s] WS -> () = #quiet<[\u{20}\u{9}\u{D}\u{A}]> //[162s] ANON -> () = "[" WS* "]" //[163s] PN_CHARS_BASE -> () = [A-Za-z\u{00C0}-\u{00D6}\u{00D8}-\u{00F6}\u{00F8}-\u{02FF}\u{0370}-\u{037D}\u{037F}-\u{1FFF}\u{200C}-\u{200D}\u{2070}-\u{218F}\u{2C00}-\u{2FEF}\u{3001}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFFD}] //[164s] PN_CHARS_U -> () = "_" / PN_CHARS_BASE //[166s] PN_CHARS -> () = [\-0-9\u{00B7}\u{0300}-\u{036F}\u{203F}-\u{2040}] / PN_CHARS_U //[167s] PN_PREFIX -> () = PN_CHARS_BASE PN_CHARS* ("."+ PN_CHARS+)* //[168s] PN_LOCAL -> () = (":" / [0-9] / PN_CHARS_U / PLX) (":" / PN_CHARS / PLX)* ("."+ (":" / PN_CHARS / PLX)+)* //[169s] PLX -> String = p:$(PERCENT) { p.into() } / e:PN_LOCAL_ESC { iter::once(e).collect() } //[170s] PERCENT -> () = "%" HEX HEX //[171s] HEX -> () = ([0-9A-Fa-f]) //[172s] PN_LOCAL_ESC -> char = "\\" c:$([_~\.\-!$&'()*+,;=/?#@%]) { c.chars().next().unwrap() } //space _ = #quiet<([ \t\n\r] / comment)*> //comment comment = #quiet<"#" [^\r\n]*>