Optimizes LN parsing for Turtle and SPARQL queries

pull/10/head
Tpt 6 years ago
parent d84190bd87
commit c67ab460d0
  1. 23
      lib/src/rio/turtle/mod.rs
  2. 22
      lib/src/rio/turtle/turtle_grammar.rustpeg
  3. 21
      lib/src/sparql/parser.rs
  4. 22
      lib/src/sparql/sparql_grammar.rustpeg

@ -73,9 +73,30 @@ mod grammar {
); );
} }
pub fn unescape_echars(input: &str) -> Cow<str> { fn unescape_echars(input: &str) -> Cow<str> {
unescape_characters(input, &UNESCAPE_CHARACTERS, &UNESCAPE_REPLACEMENT) unescape_characters(input, &UNESCAPE_CHARACTERS, &UNESCAPE_REPLACEMENT)
} }
const UNESCAPE_PN_CHARACTERS: [u8; 20] = [
b'_', b'~', b'.', b'-', b'!', b'$', b'&', b'\'', b'(', b')', b'*', b'+', b',', b';', b'=',
b'/', b'?', b'#', b'@', b'%',
];
lazy_static! {
static ref UNESCAPE_PN_REPLACEMENT: StaticSliceMap<char, char> = StaticSliceMap::new(
&[
'_', '~', '.', '-', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', '/',
'?', '#', '@', '%'
],
&[
'_', '~', '.', '-', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', '/',
'?', '#', '@', '%'
]
);
}
pub fn unescape_pn_local(input: &str) -> Cow<str> {
unescape_characters(input, &UNESCAPE_PN_CHARACTERS, &UNESCAPE_PN_REPLACEMENT)
}
} }
pub use self::grammar::read_turtle; pub use self::grammar::read_turtle;

@ -3,7 +3,6 @@
use std::char; use std::char;
use model::vocab::rdf; use model::vocab::rdf;
use model::vocab::xsd; use model::vocab::xsd;
use std::iter;
use std::str::FromStr; use std::str::FromStr;
use rio::utils::unescape_unicode_codepoints; use rio::utils::unescape_unicode_codepoints;
@ -170,9 +169,9 @@ PNAME_NS -> &'input str = ns:$(PN_PREFIX? ":") {
} }
//[140s] //[140s]
PNAME_LN -> Url = ns:$(PNAME_NS) local:PN_LOCAL {? PNAME_LN -> Url = ns:$(PNAME_NS) local:$(PN_LOCAL) {?
match state.namespaces.get(ns) { match state.namespaces.get(ns) {
Some(ns) => match Url::parse(&(ns.to_string() + &local)) { Some(ns) => match Url::parse(&(ns.to_string() + &unescape_pn_local(local))) {
Ok(url) => Ok(url), Ok(url) => Ok(url),
Err(error) => Err("IRI parsing failed") Err(error) => Err("IRI parsing failed")
}, },
@ -249,21 +248,10 @@ PN_CHARS -> () = [\-0-9\u{00B7}\u{0300}-\u{036F}\u{203F}-\u{2040}] / PN_CHARS_U
PN_PREFIX -> () = PN_CHARS_BASE PN_CHARS* ("."+ PN_CHARS+)* PN_PREFIX -> () = PN_CHARS_BASE PN_CHARS* ("."+ PN_CHARS+)*
//[168s] //[168s]
PN_LOCAL -> String = f:PN_LOCAL_first c:(PN_LOCAL_next*) e:(PN_LOCAL_next_dot*) { PN_LOCAL -> () = (PN_CHARS_U / ':' / [0-9] / PLX) (PN_CHARS / ':' / PLX)* ('.'+ (PN_CHARS / ':' / PLX)+)?
f.to_string() + &c.concat() + &e.concat()
}
PN_LOCAL_first -> String =
c:$(":" / [0-9] / PN_CHARS_U) { c.into() } /
s:PLX { s }
PN_LOCAL_next -> String =
c:$(":" / PN_CHARS) { c.into() } /
s:PLX { s }
PN_LOCAL_next_dot -> String = d:$("."+) f:PN_LOCAL_next { d.to_string() + &f}
//[169s] //[169s]
PLX -> String = PLX -> () = PERCENT / PN_LOCAL_ESC
p:$(PERCENT) { p.into() } /
e:PN_LOCAL_ESC { iter::once(e).collect() }
//[170s] //[170s]
PERCENT -> () = "%" HEX HEX PERCENT -> () = "%" HEX HEX
@ -272,7 +260,7 @@ PERCENT -> () = "%" HEX HEX
HEX -> () = ([0-9A-Fa-f]) HEX -> () = ([0-9A-Fa-f])
//[172s] //[172s]
PN_LOCAL_ESC -> char = "\\" c:$([_~\.\-!$&'()*+,;=/?#@%]) { c.chars().next().unwrap() } PN_LOCAL_ESC -> () = "\\" [_~\.\-!$&'()*+,;=/?#@%]
//space //space

@ -329,6 +329,27 @@ mod grammar {
unescape_characters(input, &UNESCAPE_CHARACTERS, &UNESCAPE_REPLACEMENT) unescape_characters(input, &UNESCAPE_CHARACTERS, &UNESCAPE_REPLACEMENT)
} }
const UNESCAPE_PN_CHARACTERS: [u8; 20] = [
b'_', b'~', b'.', b'-', b'!', b'$', b'&', b'\'', b'(', b')', b'*', b'+', b',', b';', b'=',
b'/', b'?', b'#', b'@', b'%',
];
lazy_static! {
static ref UNESCAPE_PN_REPLACEMENT: StaticSliceMap<char, char> = StaticSliceMap::new(
&[
'_', '~', '.', '-', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', '/',
'?', '#', '@', '%'
],
&[
'_', '~', '.', '-', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', '/',
'?', '#', '@', '%'
]
);
}
pub fn unescape_pn_local(input: &str) -> Cow<str> {
unescape_characters(input, &UNESCAPE_PN_CHARACTERS, &UNESCAPE_PN_REPLACEMENT)
}
include!(concat!(env!("OUT_DIR"), "/sparql_grammar.rs")); include!(concat!(env!("OUT_DIR"), "/sparql_grammar.rs"));
pub fn read_sparql_query<'a, R: Read + 'a>( pub fn read_sparql_query<'a, R: Read + 'a>(

@ -3,7 +3,6 @@
use std::char; use std::char;
use model::vocab::rdf; use model::vocab::rdf;
use model::vocab::xsd; use model::vocab::xsd;
use std::iter;
use std::str::FromStr; use std::str::FromStr;
#![arguments(state: &mut ParserState)] #![arguments(state: &mut ParserState)]
@ -933,8 +932,8 @@ PNAME_NS -> &'input str = ns:$(PN_PREFIX? ":") {
} }
//[141] //[141]
PNAME_LN -> String = ns:$(PNAME_NS) local:PN_LOCAL {? PNAME_LN -> String = ns:$(PNAME_NS) local:$(PN_LOCAL) {?
state.namespaces.get(ns).map(|v| v.clone() + &local).ok_or("Prefix not found") state.namespaces.get(ns).map(|v| v.clone() + &unescape_pn_local(local)).ok_or("Prefix not found")
} }
//[142] //[142]
@ -1038,21 +1037,10 @@ PN_CHARS -> () = [\-0-9\u{00B7}\u{0300}-\u{036F}\u{203F}-\u{2040}] / PN_CHARS_U
PN_PREFIX -> () = PN_CHARS_BASE PN_CHARS* ("."+ PN_CHARS+)* PN_PREFIX -> () = PN_CHARS_BASE PN_CHARS* ("."+ PN_CHARS+)*
//[169] //[169]
PN_LOCAL -> String = f:PN_LOCAL_first c:(PN_LOCAL_next*) e:(PN_LOCAL_next_dot*) { PN_LOCAL -> () = (PN_CHARS_U / ':' / [0-9] / PLX) (PN_CHARS / ':' / PLX)* ('.'+ (PN_CHARS / ':' / PLX)+)?
f.to_string() + &c.concat() + &e.concat()
}
PN_LOCAL_first -> String =
c:$(":" / [0-9] / PN_CHARS_U) { c.into() } /
PLX
PN_LOCAL_next -> String =
c:$(":" / PN_CHARS) { c.into() } /
PLX
PN_LOCAL_next_dot -> String = d:$('.'+) f:PN_LOCAL_next* { d.to_string() + &f.concat()}
//[170] //[170]
PLX -> String = PLX -> () = PERCENT / PN_LOCAL_ESC
p:$(PERCENT) { p.into() } /
e:PN_LOCAL_ESC { iter::once(e).collect() }
//[171] //[171]
PERCENT -> () = "%" HEX HEX PERCENT -> () = "%" HEX HEX
@ -1061,7 +1049,7 @@ PERCENT -> () = "%" HEX HEX
HEX -> () = ([0-9A-Fa-f]) HEX -> () = ([0-9A-Fa-f])
//[173] //[173]
PN_LOCAL_ESC -> char = "\\" c:$([_~\.\-!$&'()*+,;=/?#@%]) { c.chars().next().unwrap() } //TODO: added '/' to make tests pass but is it valid? PN_LOCAL_ESC -> () = "\\" [_~\.\-!$&'()*+,;=/?#@%] //TODO: added '/' to make tests pass but is it valid?
//space //space
_ = #quiet<([ \t\n\r] / comment)*> _ = #quiet<([ \t\n\r] / comment)*>

Loading…
Cancel
Save