Fork of https://github.com/oxigraph/oxigraph.git for the purpose of NextGraph project
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
303 lines
8.6 KiB
303 lines
8.6 KiB
//See https://www.w3.org/TR/turtle/#sec-grammar
|
|
|
|
use std::char;
|
|
use url::Url;
|
|
use model::data::*;
|
|
use std::str::FromStr;
|
|
use std::iter;
|
|
use rio::turtle::ParserState;
|
|
|
|
#![arguments(state: &mut ParserState, buffer: &mut Vec<Triple>, data_factory: &DataFactory)]
|
|
|
|
//[1]
|
|
#[pub]
|
|
turtleDoc -> () = _ (statement _)*
|
|
|
|
//[2]
|
|
statement -> () = directive / triples "."
|
|
|
|
//[3]
|
|
directive -> () = prefixID / base / sparqlPrefix / sparqlBase
|
|
|
|
//[4]
|
|
prefixID -> () = "@prefix" _ ns:PNAME_NS _ i:IRIREF _ "." {
|
|
state.namespaces.insert(ns.into(), i.into());
|
|
}
|
|
|
|
//[5]
|
|
base -> () = "@base" _ i:IRIREF _ "." {?
|
|
match Url::parse(&i) {
|
|
Ok(url) => {
|
|
state.base_uri = Some(url);
|
|
Ok(())
|
|
},
|
|
Err(error) => Err("IRI parsing failed")
|
|
}
|
|
}
|
|
|
|
//[5s]
|
|
sparqlBase -> () = "BASE"i _ i:IRIREF {?
|
|
match Url::parse(&i) {
|
|
Ok(url) => {
|
|
state.base_uri = Some(url);
|
|
Ok(())
|
|
},
|
|
Err(error) => Err("IRI parsing failed")
|
|
}
|
|
}
|
|
|
|
//[6s]
|
|
sparqlPrefix -> () = "PREFIX"i _ ns:PNAME_NS _ i:IRIREF {
|
|
state.namespaces.insert(ns.into(), i.into());
|
|
}
|
|
|
|
//[6]
|
|
triples -> () = subject_push _ predicateObjectList / triples_blankNodePropertyList_push _ predicateObjectList?
|
|
subject_push -> () = s:subject {
|
|
state.cur_subject.push(s)
|
|
}
|
|
triples_blankNodePropertyList_push -> () = s: blankNodePropertyList {
|
|
state.cur_subject.push(s)
|
|
}
|
|
|
|
//[7]
|
|
predicateObjectList -> () = predicate_push _ objectList _ (";" _ (predicate_push _ objectList _)?)*
|
|
predicate_push -> () = v:verb {
|
|
state.cur_predicate.push(v)
|
|
}
|
|
|
|
//[8]
|
|
objectList -> () = object _ ("," _ object _)*
|
|
|
|
//[9]
|
|
verb -> NamedNode = predicate /
|
|
"a" { NamedNode::from_str("http://www.w3.org/1999/02/22-rdf-syntax-ns#type").unwrap() }
|
|
|
|
// [10]
|
|
subject -> NamedOrBlankNode =
|
|
i:iri { i.into() } /
|
|
b:BlankNode { b.into() } /
|
|
c:collection { c }
|
|
|
|
//[11]
|
|
predicate -> NamedNode = iri
|
|
|
|
// [12]
|
|
object -> () = o:object_value {?
|
|
match state.cur_subject.last() {
|
|
Some(s) => match state.cur_predicate.last() {
|
|
Some(p) => {
|
|
buffer.push(data_factory.triple(s.clone(), p.clone(), o));
|
|
Ok(())
|
|
}
|
|
None => Err("Predicate not found")
|
|
},
|
|
None => Err("Subject not found")
|
|
}
|
|
}
|
|
|
|
object_value -> Term =
|
|
i:iri { i.into() } /
|
|
b:BlankNode { b.into() } /
|
|
c:collection { c.into() } /
|
|
b:blankNodePropertyList { b.into() } /
|
|
l:literal { l.into() }
|
|
|
|
//[13]
|
|
literal -> Literal = RDFLiteral / NumericLiteral / BooleanLiteral
|
|
|
|
//[14]
|
|
blankNodePropertyList -> NamedOrBlankNode = blankNodePropertyList_open _ predicateObjectList _ "]" {?
|
|
state.cur_subject.pop().ok_or("No subject found in the stack")
|
|
}
|
|
blankNodePropertyList_open -> () = "[" {
|
|
state.cur_subject.push(data_factory.new_blank_node().into())
|
|
}
|
|
|
|
//[15]
|
|
collection -> NamedOrBlankNode = '(' _ o:(collection_value*) ')' {
|
|
let first = NamedNode::from_str("http://www.w3.org/1999/02/22-rdf-syntax-ns#first").unwrap();
|
|
let rest = NamedNode::from_str("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest").unwrap();
|
|
let mut current_list_node = NamedOrBlankNode::from(NamedNode::from_str("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil").unwrap());
|
|
for obj in o.into_iter().rev() {
|
|
let new_blank_node = NamedOrBlankNode::from(data_factory.new_blank_node());
|
|
buffer.push(data_factory.triple(new_blank_node.clone(), first.clone(), obj));
|
|
buffer.push(data_factory.triple(new_blank_node.clone(), rest.clone(), current_list_node));
|
|
current_list_node = new_blank_node;
|
|
}
|
|
current_list_node
|
|
}
|
|
collection_value -> Term = o:object_value _ { o }
|
|
|
|
//[16]
|
|
NumericLiteral -> Literal =
|
|
d:$(DOUBLE) { data_factory.typed_literal(d, NamedNode::from_str("http://www.w3.org/2001/XMLSchema#double").unwrap()) } /
|
|
d:$(DECIMAL) { data_factory.typed_literal(d, NamedNode::from_str("http://www.w3.org/2001/XMLSchema#decimal").unwrap()) } /
|
|
i:$(INTEGER) { data_factory.typed_literal(i, NamedNode::from_str("http://www.w3.org/2001/XMLSchema#integer").unwrap()) }
|
|
|
|
//[128s]
|
|
RDFLiteral -> Literal =
|
|
v:String _ "^^" _ t:iri { data_factory.typed_literal(v, t) } /
|
|
v:String _ l:LANGTAG { data_factory.language_tagged_literal(v, l) } /
|
|
v:String { v.into() }
|
|
|
|
//[133s]
|
|
BooleanLiteral -> Literal =
|
|
"true" { true.into() } /
|
|
"false" { false.into() }
|
|
|
|
//[17]
|
|
String -> String = STRING_LITERAL_LONG_SINGLE_QUOTE / STRING_LITERAL_LONG_QUOTE / STRING_LITERAL_QUOTE / STRING_LITERAL_SINGLE_QUOTE
|
|
|
|
//[135s]
|
|
iri -> NamedNode = i:(IRIREF / PrefixedName) {?
|
|
match state.url_parser().parse(&i) {
|
|
Ok(url) => Ok(data_factory.named_node(url)),
|
|
Err(error) => Err("IRI parsing failed")
|
|
}
|
|
}
|
|
|
|
//[136s]
|
|
PrefixedName -> String = PNAME_LN /
|
|
ns:PNAME_NS {? state.namespaces.get(ns).map(|v| v.clone()).ok_or("Prefix not found") }
|
|
|
|
//[137s]
|
|
BlankNode -> BlankNode =
|
|
b:BLANK_NODE_LABEL { data_factory.blank_node(b) } /
|
|
ANON { data_factory.new_blank_node() }
|
|
|
|
//[18]
|
|
IRIREF -> String = "<" i:((_IRIREF_simple_char / UCHAR)*) ">" {
|
|
i.into_iter().collect()
|
|
}
|
|
_IRIREF_simple_char -> char = c:$([^\u{00}-\u{20}<>"{}|^\u{60}\u{5c}]) { c.chars().next().unwrap() }
|
|
|
|
//[139s]
|
|
PNAME_NS -> &'input str = ns:$(PN_PREFIX? ":") {
|
|
ns
|
|
}
|
|
|
|
//[140s]
|
|
PNAME_LN -> String = ns:$(PNAME_NS) local:$(PN_LOCAL) {?
|
|
state.namespaces.get(ns).map(|v| v.clone() + local).ok_or("Prefix not found")
|
|
}
|
|
|
|
//[141s]
|
|
BLANK_NODE_LABEL -> &'input str = "_:" b:$(([0-9] / PN_CHARS_U) PN_CHARS* ("."+ PN_CHARS+)*) {
|
|
//TODO unescape
|
|
b
|
|
}
|
|
|
|
//[144s]
|
|
LANGTAG -> &'input str = "@" l:$([a-zA-Z]+ ("-" [a-zA-Z0-9]+)*) {
|
|
l
|
|
}
|
|
|
|
//[19]
|
|
INTEGER -> () = [+-]? [0-9]+
|
|
|
|
//[20]
|
|
DECIMAL -> () = [+-]? [0-9]* "." [0-9]+
|
|
|
|
//[21]
|
|
DOUBLE -> () = [+-]? ([0-9]+ "." [0-9]* / "."? [0-9]+) EXPONENT
|
|
|
|
//[154s]
|
|
EXPONENT -> () = [eE] [+-]? [0-9]+
|
|
|
|
//[22]
|
|
STRING_LITERAL_QUOTE -> String = "\"" l:((STRING_LITERAL_QUOTE_simple_char / ECHAR / UCHAR)*) "\"" {
|
|
l.into_iter().collect()
|
|
}
|
|
STRING_LITERAL_QUOTE_simple_char -> char = c:$([^"\u{005c}\u{000a}\u{000d}]) { c.chars().next().unwrap() }
|
|
|
|
//[23]
|
|
STRING_LITERAL_SINGLE_QUOTE -> String = "'" l:((STRING_LITERAL_SINGLE_QUOTE_simple_char / ECHAR / UCHAR)*) "'" {
|
|
l.into_iter().collect()
|
|
}
|
|
STRING_LITERAL_SINGLE_QUOTE_simple_char -> char = c:$([^'\u{005c}\u{000a}\u{000d}]) { c.chars().next().unwrap() }
|
|
|
|
//[24]
|
|
STRING_LITERAL_LONG_SINGLE_QUOTE -> String = "'''" l:(STRING_LITERAL_LONG_SINGLE_QUOTE_inner*) "'''" {
|
|
l.into_iter().collect()
|
|
}
|
|
STRING_LITERAL_LONG_SINGLE_QUOTE_inner -> String = a:$(("''" / "'")?) b:(STRING_LITERAL_LONG_SINGLE_QUOTE_simple_char / ECHAR / UCHAR) {
|
|
let mut s = a.to_string();
|
|
s.push(b);
|
|
s
|
|
}
|
|
STRING_LITERAL_LONG_SINGLE_QUOTE_simple_char -> char = c:$([^'\u{005c}]) { c.chars().next().unwrap() }
|
|
|
|
//[25]abc""def''ghi"
|
|
STRING_LITERAL_LONG_QUOTE -> String = "\"\"\"" l:(STRING_LITERAL_LONG_QUOTE_inner*) "\"\"\"" {
|
|
l.into_iter().collect()
|
|
}
|
|
STRING_LITERAL_LONG_QUOTE_inner -> String = a:$(("\"\"" / "\"")?) b:(STRING_LITERAL_LONG_QUOTE_simple_char / ECHAR / UCHAR) {
|
|
let mut s = a.to_string();
|
|
s.push(b);
|
|
s
|
|
}
|
|
STRING_LITERAL_LONG_QUOTE_simple_char -> char = c:$([^"\u{005c}]) { c.chars().next().unwrap() }
|
|
|
|
//[26]
|
|
UCHAR -> char = "\\u" h:$(HEX HEX HEX HEX) {
|
|
u32::from_str_radix(h, 16).ok().and_then(char::from_u32).unwrap()
|
|
} / "\\U" h:$(HEX HEX HEX HEX HEX HEX HEX HEX) {
|
|
u32::from_str_radix(h, 16).ok().and_then(char::from_u32).unwrap()
|
|
}
|
|
|
|
//[159s]
|
|
ECHAR -> char = "\\" c:$([tbnrf"'\\]) {
|
|
match c {
|
|
"t" => '\u{0009}',
|
|
"b" => '\u{0008}',
|
|
"n" => '\u{000A}',
|
|
"r" => '\u{000D}',
|
|
"f" => '\u{000C}',
|
|
"\"" => '\u{0022}',
|
|
"'" => '\u{0027}',
|
|
"\\" => '\u{005C}',
|
|
_ => panic!("unexpected escaped char") // not possible
|
|
}
|
|
}
|
|
|
|
//[161s]
|
|
WS -> () = #quiet<[\u{20}\u{9}\u{D}\u{A}]>
|
|
|
|
//[162s]
|
|
ANON -> () = "[" WS* "]"
|
|
|
|
//[163s]
|
|
PN_CHARS_BASE -> () = [A-Za-z\u{00C0}-\u{00D6}\u{00D8}-\u{00F6}\u{00F8}-\u{02FF}\u{0370}-\u{037D}\u{037F}-\u{1FFF}\u{200C}-\u{200D}\u{2070}-\u{218F}\u{2C00}-\u{2FEF}\u{3001}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFFD}]
|
|
|
|
//[164s]
|
|
PN_CHARS_U -> () = "_" / PN_CHARS_BASE
|
|
|
|
//[166s]
|
|
PN_CHARS -> () = [\-0-9\u{00B7}\u{0300}-\u{036F}\u{203F}-\u{2040}] / PN_CHARS_U
|
|
|
|
//[167s]
|
|
PN_PREFIX -> () = PN_CHARS_BASE PN_CHARS* ("."+ PN_CHARS+)*
|
|
|
|
//[168s]
|
|
PN_LOCAL -> () = (":" / [0-9] / PN_CHARS_U / PLX) (":" / PN_CHARS / PLX)* ("."+ (":" / PN_CHARS / PLX)+)*
|
|
|
|
//[169s]
|
|
PLX -> String =
|
|
p:$(PERCENT) { p.into() } /
|
|
e:PN_LOCAL_ESC { iter::once(e).collect() }
|
|
|
|
//[170s]
|
|
PERCENT -> () = "%" HEX HEX
|
|
|
|
//[171s]
|
|
HEX -> () = ([0-9A-Fa-f])
|
|
|
|
//[172s]
|
|
PN_LOCAL_ESC -> char = "\\" c:$([_~\.\-!$&'()*+,;=/?#@%]) { c.chars().next().unwrap() }
|
|
|
|
|
|
//space
|
|
_ = #quiet<([ \t\n\r] / comment)*>
|
|
//comment
|
|
comment = #quiet<"#" [^\r\n]*>
|
|
|