Optimizes LN parsing for Turtle and SPARQL queries

pull/10/head
Tpt 6 years ago
parent d84190bd87
commit c67ab460d0
  1. 23
      lib/src/rio/turtle/mod.rs
  2. 22
      lib/src/rio/turtle/turtle_grammar.rustpeg
  3. 21
      lib/src/sparql/parser.rs
  4. 22
      lib/src/sparql/sparql_grammar.rustpeg

@ -73,9 +73,30 @@ mod grammar {
);
}
pub fn unescape_echars(input: &str) -> Cow<str> {
fn unescape_echars(input: &str) -> Cow<str> {
unescape_characters(input, &UNESCAPE_CHARACTERS, &UNESCAPE_REPLACEMENT)
}
const UNESCAPE_PN_CHARACTERS: [u8; 20] = [
b'_', b'~', b'.', b'-', b'!', b'$', b'&', b'\'', b'(', b')', b'*', b'+', b',', b';', b'=',
b'/', b'?', b'#', b'@', b'%',
];
lazy_static! {
static ref UNESCAPE_PN_REPLACEMENT: StaticSliceMap<char, char> = StaticSliceMap::new(
&[
'_', '~', '.', '-', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', '/',
'?', '#', '@', '%'
],
&[
'_', '~', '.', '-', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', '/',
'?', '#', '@', '%'
]
);
}
pub fn unescape_pn_local(input: &str) -> Cow<str> {
unescape_characters(input, &UNESCAPE_PN_CHARACTERS, &UNESCAPE_PN_REPLACEMENT)
}
}
pub use self::grammar::read_turtle;

@ -3,7 +3,6 @@
use std::char;
use model::vocab::rdf;
use model::vocab::xsd;
use std::iter;
use std::str::FromStr;
use rio::utils::unescape_unicode_codepoints;
@ -170,9 +169,9 @@ PNAME_NS -> &'input str = ns:$(PN_PREFIX? ":") {
}
//[140s]
PNAME_LN -> Url = ns:$(PNAME_NS) local:PN_LOCAL {?
PNAME_LN -> Url = ns:$(PNAME_NS) local:$(PN_LOCAL) {?
match state.namespaces.get(ns) {
Some(ns) => match Url::parse(&(ns.to_string() + &local)) {
Some(ns) => match Url::parse(&(ns.to_string() + &unescape_pn_local(local))) {
Ok(url) => Ok(url),
Err(error) => Err("IRI parsing failed")
},
@ -249,21 +248,10 @@ PN_CHARS -> () = [\-0-9\u{00B7}\u{0300}-\u{036F}\u{203F}-\u{2040}] / PN_CHARS_U
PN_PREFIX -> () = PN_CHARS_BASE PN_CHARS* ("."+ PN_CHARS+)*
//[168s]
PN_LOCAL -> String = f:PN_LOCAL_first c:(PN_LOCAL_next*) e:(PN_LOCAL_next_dot*) {
f.to_string() + &c.concat() + &e.concat()
}
PN_LOCAL_first -> String =
c:$(":" / [0-9] / PN_CHARS_U) { c.into() } /
s:PLX { s }
PN_LOCAL_next -> String =
c:$(":" / PN_CHARS) { c.into() } /
s:PLX { s }
PN_LOCAL_next_dot -> String = d:$("."+) f:PN_LOCAL_next { d.to_string() + &f}
PN_LOCAL -> () = (PN_CHARS_U / ':' / [0-9] / PLX) (PN_CHARS / ':' / PLX)* ('.'+ (PN_CHARS / ':' / PLX)+)?
//[169s]
PLX -> String =
p:$(PERCENT) { p.into() } /
e:PN_LOCAL_ESC { iter::once(e).collect() }
PLX -> () = PERCENT / PN_LOCAL_ESC
//[170s]
PERCENT -> () = "%" HEX HEX
@ -272,7 +260,7 @@ PERCENT -> () = "%" HEX HEX
HEX -> () = ([0-9A-Fa-f])
//[172s]
PN_LOCAL_ESC -> char = "\\" c:$([_~\.\-!$&'()*+,;=/?#@%]) { c.chars().next().unwrap() }
PN_LOCAL_ESC -> () = "\\" [_~\.\-!$&'()*+,;=/?#@%]
//space

@ -329,6 +329,27 @@ mod grammar {
unescape_characters(input, &UNESCAPE_CHARACTERS, &UNESCAPE_REPLACEMENT)
}
const UNESCAPE_PN_CHARACTERS: [u8; 20] = [
b'_', b'~', b'.', b'-', b'!', b'$', b'&', b'\'', b'(', b')', b'*', b'+', b',', b';', b'=',
b'/', b'?', b'#', b'@', b'%',
];
lazy_static! {
static ref UNESCAPE_PN_REPLACEMENT: StaticSliceMap<char, char> = StaticSliceMap::new(
&[
'_', '~', '.', '-', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', '/',
'?', '#', '@', '%'
],
&[
'_', '~', '.', '-', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', '/',
'?', '#', '@', '%'
]
);
}
pub fn unescape_pn_local(input: &str) -> Cow<str> {
unescape_characters(input, &UNESCAPE_PN_CHARACTERS, &UNESCAPE_PN_REPLACEMENT)
}
include!(concat!(env!("OUT_DIR"), "/sparql_grammar.rs"));
pub fn read_sparql_query<'a, R: Read + 'a>(

@ -3,7 +3,6 @@
use std::char;
use model::vocab::rdf;
use model::vocab::xsd;
use std::iter;
use std::str::FromStr;
#![arguments(state: &mut ParserState)]
@ -933,8 +932,8 @@ PNAME_NS -> &'input str = ns:$(PN_PREFIX? ":") {
}
//[141]
PNAME_LN -> String = ns:$(PNAME_NS) local:PN_LOCAL {?
state.namespaces.get(ns).map(|v| v.clone() + &local).ok_or("Prefix not found")
PNAME_LN -> String = ns:$(PNAME_NS) local:$(PN_LOCAL) {?
state.namespaces.get(ns).map(|v| v.clone() + &unescape_pn_local(local)).ok_or("Prefix not found")
}
//[142]
@ -1038,21 +1037,10 @@ PN_CHARS -> () = [\-0-9\u{00B7}\u{0300}-\u{036F}\u{203F}-\u{2040}] / PN_CHARS_U
PN_PREFIX -> () = PN_CHARS_BASE PN_CHARS* ("."+ PN_CHARS+)*
//[169]
PN_LOCAL -> String = f:PN_LOCAL_first c:(PN_LOCAL_next*) e:(PN_LOCAL_next_dot*) {
f.to_string() + &c.concat() + &e.concat()
}
PN_LOCAL_first -> String =
c:$(":" / [0-9] / PN_CHARS_U) { c.into() } /
PLX
PN_LOCAL_next -> String =
c:$(":" / PN_CHARS) { c.into() } /
PLX
PN_LOCAL_next_dot -> String = d:$('.'+) f:PN_LOCAL_next* { d.to_string() + &f.concat()}
PN_LOCAL -> () = (PN_CHARS_U / ':' / [0-9] / PLX) (PN_CHARS / ':' / PLX)* ('.'+ (PN_CHARS / ':' / PLX)+)?
//[170]
PLX -> String =
p:$(PERCENT) { p.into() } /
e:PN_LOCAL_ESC { iter::once(e).collect() }
PLX -> () = PERCENT / PN_LOCAL_ESC
//[171]
PERCENT -> () = "%" HEX HEX
@ -1061,7 +1049,7 @@ PERCENT -> () = "%" HEX HEX
HEX -> () = ([0-9A-Fa-f])
//[173]
PN_LOCAL_ESC -> char = "\\" c:$([_~\.\-!$&'()*+,;=/?#@%]) { c.chars().next().unwrap() } //TODO: added '/' to make tests pass but is it valid?
PN_LOCAL_ESC -> () = "\\" [_~\.\-!$&'()*+,;=/?#@%] //TODO: added '/' to make tests pass but is it valid?
//space
_ = #quiet<([ \t\n\r] / comment)*>

Loading…
Cancel
Save