Uses more efficient echar escaping in SPARQL

pull/10/head
Tpt 6 years ago
parent 74a2d9859a
commit 8b4d71a628
  1. 18
      src/sparql/parser.rs
  2. 50
      src/sparql/sparql_grammar.rustpeg

@ -12,15 +12,18 @@ mod grammar {
)] )]
use model::*; use model::*;
use rio::utils::unescape_characters;
use rio::utils::unescape_unicode_codepoints; use rio::utils::unescape_unicode_codepoints;
use sparql::algebra::*; use sparql::algebra::*;
use sparql::model::*; use sparql::model::*;
use std::borrow::Cow;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::collections::HashMap; use std::collections::HashMap;
use std::io::BufReader; use std::io::BufReader;
use std::io::Read; use std::io::Read;
use url::ParseOptions; use url::ParseOptions;
use url::Url; use url::Url;
use utils::StaticSliceMap;
struct FocusedTriplePattern<F> { struct FocusedTriplePattern<F> {
focus: F, focus: F,
@ -286,6 +289,21 @@ mod grammar {
} }
} }
const UNESCAPE_CHARACTERS: [u8; 8] = [b't', b'b', b'n', b'r', b'f', b'"', b'\'', b'\\'];
lazy_static! {
static ref UNESCAPE_REPLACEMENT: StaticSliceMap<char, char> = StaticSliceMap::new(
&['t', 'b', 'n', 'r', 'f', '"', '\'', '\\'],
&[
'\u{0009}', '\u{0008}', '\u{000A}', '\u{000D}', '\u{000C}', '\u{0022}', '\u{0027}',
'\u{005C}'
]
);
}
fn unescape_echars(input: &str) -> Cow<str> {
unescape_characters(input, &UNESCAPE_CHARACTERS, &UNESCAPE_REPLACEMENT)
}
include!(concat!(env!("OUT_DIR"), "/sparql_grammar.rs")); include!(concat!(env!("OUT_DIR"), "/sparql_grammar.rs"));
pub fn read_sparql_query<'a, R: Read + 'a>( pub fn read_sparql_query<'a, R: Read + 'a>(

@ -989,54 +989,34 @@ DOUBLE_NEGATIVE -> () = '-' _ DOUBLE
EXPONENT -> () = [eE] [+-]? [0-9]+ EXPONENT -> () = [eE] [+-]? [0-9]+
//[156] //[156]
STRING_LITERAL1 -> String = "'" l:((STRING_LITERAL1_simple_char / ECHAR)*) "'" { STRING_LITERAL1 -> String = "'" l:$((STRING_LITERAL1_simple_char / ECHAR)*) "'" {
l.into_iter().collect() unescape_echars(l).to_string()
} }
STRING_LITERAL1_simple_char -> char = c:$([^'\u{005c}\u{000a}\u{000d}]) { c.chars().next().unwrap() } STRING_LITERAL1_simple_char -> () = [^'\u{005c}\u{000a}\u{000d}]
//[157] //[157]
STRING_LITERAL2 -> String = "\"" l:((STRING_LITERAL2_simple_char / ECHAR)*) "\"" { STRING_LITERAL2 -> String = "\"" l:$((STRING_LITERAL2_simple_char / ECHAR)*) "\"" {
l.into_iter().collect() unescape_echars(l).to_string()
} }
STRING_LITERAL2_simple_char -> char = c:$([^"\u{005c}\u{000a}\u{000d}]) { c.chars().next().unwrap() } STRING_LITERAL2_simple_char -> () = [^"\u{005c}\u{000a}\u{000d}]
//[158] //[158]
STRING_LITERAL_LONG1 -> String = "'''" l:(STRING_LITERAL_LONG1_inner*) "'''" { STRING_LITERAL_LONG1 -> String = "'''" l:$(STRING_LITERAL_LONG1_inner*) "'''" {
l.into_iter().collect() unescape_echars(l).to_string()
} }
STRING_LITERAL_LONG1_inner -> String = a:$(("''" / "'")?) b:(STRING_LITERAL_LONG1_simple_char / ECHAR) { STRING_LITERAL_LONG1_inner -> () = ("''" / "'")? (STRING_LITERAL_LONG1_simple_char / ECHAR)
let mut s = a.to_string(); STRING_LITERAL_LONG1_simple_char -> () = [^'\u{005c}]
s.push(b);
s
}
STRING_LITERAL_LONG1_simple_char -> char = c:$([^'\u{005c}]) { c.chars().next().unwrap() }
//[159] //[159]
STRING_LITERAL_LONG2 -> String = "\"\"\"" l:(STRING_LITERAL_LONG2_inner*) "\"\"\"" { STRING_LITERAL_LONG2 -> String = "\"\"\"" l:$(STRING_LITERAL_LONG2_inner*) "\"\"\"" {
l.into_iter().collect() unescape_echars(l).to_string()
}
STRING_LITERAL_LONG2_inner -> String = a:$(("\"\"" / "\"")?) b:(STRING_LITERAL_LONG2_simple_char / ECHAR) {
let mut s = a.to_string();
s.push(b);
s
} }
STRING_LITERAL_LONG2_simple_char -> char = c:$([^"\u{005c}]) { c.chars().next().unwrap() } STRING_LITERAL_LONG2_inner -> () = ("\"\"" / "\"")? (STRING_LITERAL_LONG2_simple_char / ECHAR)
STRING_LITERAL_LONG2_simple_char -> () = [^"\u{005c}]
//[160] //[160]
ECHAR -> char = "\\" c:$([tbnrf"'\\]) { ECHAR -> () = "\\" [tbnrf"'\\]
match c {
"t" => '\u{0009}',
"b" => '\u{0008}',
"n" => '\u{000A}',
"r" => '\u{000D}',
"f" => '\u{000C}',
"\"" => '\u{0022}',
"'" => '\u{0027}',
"\\" => '\u{005C}',
_ => panic!("unexpected escaped char") // not possible
}
}
//[161] //[161]
NIL -> () = "(" WS* ")" NIL -> () = "(" WS* ")"

Loading…
Cancel
Save