Fork of https://github.com/oxigraph/oxigraph.git for the purpose of NextGraph project
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
oxigraph/lib/src/rio/ntriples/ntriples_grammar.rustpeg

89 lines
2.3 KiB

//See https://www.w3.org/TR/2014/REC-n-triples-20140225/#n-triples-grammar
use std::char;
use std::str::FromStr;
use model::*;
use std::collections::BTreeMap;
use rio::utils::unescape_unicode_codepoints;
#![arguments(bnodes_map: &mut BTreeMap<String, BlankNode>)]
//[2]
pub triple -> Option<Triple> =
_ s:subject _ p:predicate _ o:object _ "." _ comment? EOL? { Some(Triple::new(s, p, o)) } /
_ comment? EOL? { None }
//[3]
subject -> NamedOrBlankNode =
i: IRIREF { i.into() } /
b: BLANK_NODE_LABEL { b.into() }
//[4]
predicate -> NamedNode = i:IRIREF {
i
}
//[5]
object -> Term =
i: IRIREF { i.into() } /
b: BLANK_NODE_LABEL { b.into() } /
l: literal { l.into() }
//[6]
literal -> Literal =
v: STRING_LITERAL_QUOTE _ "^^" _ t:IRIREF { Literal::new_typed_literal(v, t) } /
v: STRING_LITERAL_QUOTE _ l:LANGTAG { Literal::new_language_tagged_literal(v, l) } /
v: STRING_LITERAL_QUOTE { Literal::new_simple_literal(v) }
//[144s]
LANGTAG -> &'input str = "@" l:$([a-zA-Z]+ ("-" [a-zA-Z0-9]+)*) {
l
}
//[7]
EOL = [\r\n]+
//[8]
IRIREF -> NamedNode = "<" _ i:$(([^\u{00}-\u{20}<>"{}|^\u{60}\u{5c}] / UCHAR)*) _ ">" {?
let s = unescape_unicode_codepoints(i);
match NamedNode::from_str(&s) {
Ok(named_node) => Ok(named_node),
Err(error) => Err("IRI parsing failed")
}
}
_IRIREF_simple_char -> char = c:$() { c.chars().next().unwrap() }
//[9]
STRING_LITERAL_QUOTE -> String = "\"" l:$(([^\u{0022}\u{005c}\u{000a}\u{000d}] / ECHAR / UCHAR)*) "\"" {
unescape_unicode_codepoints(&unescape_echars(l)).into_owned()
}
//[141s]
BLANK_NODE_LABEL -> BlankNode = "_:" b:$(([0-9] / PN_CHARS_U) PN_CHARS* ("."+ PN_CHARS+)*) {
bnodes_map.entry(b.to_string()).or_insert_with(BlankNode::default).clone()
}
//[10]
UCHAR -> () = "\\u" HEX HEX HEX HEX / "\\U" HEX HEX HEX HEX HEX HEX HEX HEX
//[153s]
ECHAR -> () = '\\' [tbnrf"'\\]
//[157s]
PN_CHARS_BASE -> () = [A-Za-z\u{00C0}-\u{00D6}\u{00D8}-\u{00F6}\u{00F8}-\u{02FF}\u{0370}-\u{037D}\u{037F}-\u{1FFF}\u{200C}-\u{200D}\u{2070}-\u{218F}\u{2C00}-\u{2FEF}\u{3001}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFFD}]
//[158s]
PN_CHARS_U -> () = '_' / ':' / PN_CHARS_BASE
//[160s]
PN_CHARS -> () = [\-0-9\u{00B7}\u{0300}-\u{036F}\u{203F}-\u{2040}] / PN_CHARS_U
//[162s]
HEX -> () = ([0-9A-Fa-f])
//space
_ = #quiet<[ \t]*>
//comment
comment = #quiet<"#" [^\r\n]*>