//See https://www.w3.org/TR/2014/REC-n-triples-20140225/#n-triples-grammar use std::char; use std::str::FromStr; use model::*; use std::collections::BTreeMap; use rio::utils::unescape_unicode_codepoints; #![arguments(bnodes_map: &mut BTreeMap)] //[2] pub triple -> Option = _ s:subject _ p:predicate _ o:object _ "." _ comment? EOL? { Some(Triple::new(s, p, o)) } / _ comment? EOL? { None } //[3] subject -> NamedOrBlankNode = i: IRIREF { i.into() } / b: BLANK_NODE_LABEL { b.into() } //[4] predicate -> NamedNode = i:IRIREF { i } //[5] object -> Term = i: IRIREF { i.into() } / b: BLANK_NODE_LABEL { b.into() } / l: literal { l.into() } //[6] literal -> Literal = v: STRING_LITERAL_QUOTE _ "^^" _ t:IRIREF { Literal::new_typed_literal(v, t) } / v: STRING_LITERAL_QUOTE _ l:LANGTAG { Literal::new_language_tagged_literal(v, l) } / v: STRING_LITERAL_QUOTE { Literal::new_simple_literal(v) } //[144s] LANGTAG -> &'input str = "@" l:$([a-zA-Z]+ ("-" [a-zA-Z0-9]+)*) { l } //[7] EOL = [\r\n]+ //[8] IRIREF -> NamedNode = "<" _ i:$(([^\u{00}-\u{20}<>"{}|^\u{60}\u{5c}] / UCHAR)*) _ ">" {? let s = unescape_unicode_codepoints(i); match NamedNode::from_str(&s) { Ok(named_node) => Ok(named_node), Err(error) => Err("IRI parsing failed") } } _IRIREF_simple_char -> char = c:$() { c.chars().next().unwrap() } //[9] STRING_LITERAL_QUOTE -> String = "\"" l:$(([^\u{0022}\u{005c}\u{000a}\u{000d}] / ECHAR / UCHAR)*) "\"" { unescape_unicode_codepoints(&unescape_echars(l)).into_owned() } //[141s] BLANK_NODE_LABEL -> BlankNode = "_:" b:$(([0-9] / PN_CHARS_U) PN_CHARS* ("."+ PN_CHARS+)*) { bnodes_map.entry(b.to_string()).or_insert_with(BlankNode::default).clone() } //[10] UCHAR -> () = "\\u" HEX HEX HEX HEX / "\\U" HEX HEX HEX HEX HEX HEX HEX HEX //[153s] ECHAR -> () = '\\' [tbnrf"'\\] //[157s] PN_CHARS_BASE -> () = [A-Za-z\u{00C0}-\u{00D6}\u{00D8}-\u{00F6}\u{00F8}-\u{02FF}\u{0370}-\u{037D}\u{037F}-\u{1FFF}\u{200C}-\u{200D}\u{2070}-\u{218F}\u{2C00}-\u{2FEF}\u{3001}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFFD}] //[158s] PN_CHARS_U -> () = '_' / ':' / PN_CHARS_BASE //[160s] PN_CHARS -> () = [\-0-9\u{00B7}\u{0300}-\u{036F}\u{203F}-\u{2040}] / PN_CHARS_U //[162s] HEX -> () = ([0-9A-Fa-f]) //space _ = #quiet<[ \t]*> //comment comment = #quiet<"#" [^\r\n]*>