Fork of https://github.com/oxigraph/oxigraph.git for the purpose of NextGraph project
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
156 lines
4.3 KiB
156 lines
4.3 KiB
use crate::utils::StaticSliceMap;
|
|
use std::borrow::Cow;
|
|
use std::char;
|
|
use std::str::Chars;
|
|
|
|
pub fn unescape_unicode_codepoints(input: &str) -> Cow<'_, str> {
|
|
if needs_unescape_unicode_codepoints(input) {
|
|
UnescapeUnicodeCharIterator::new(input).collect()
|
|
} else {
|
|
input.into()
|
|
}
|
|
}
|
|
|
|
fn needs_unescape_unicode_codepoints(input: &str) -> bool {
|
|
let bytes = input.as_bytes();
|
|
for i in 1..bytes.len() {
|
|
if (bytes[i] == b'u' || bytes[i] == b'U') && bytes[i - 1] == b'\\' {
|
|
return true;
|
|
}
|
|
}
|
|
false
|
|
}
|
|
|
|
struct UnescapeUnicodeCharIterator<'a> {
|
|
iter: Chars<'a>,
|
|
buffer: String,
|
|
}
|
|
|
|
impl<'a> UnescapeUnicodeCharIterator<'a> {
|
|
fn new(string: &'a str) -> Self {
|
|
Self {
|
|
iter: string.chars(),
|
|
buffer: String::with_capacity(9),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'a> Iterator for UnescapeUnicodeCharIterator<'a> {
|
|
type Item = char;
|
|
|
|
fn next(&mut self) -> Option<char> {
|
|
if !self.buffer.is_empty() {
|
|
return Some(self.buffer.remove(0));
|
|
}
|
|
match self.iter.next()? {
|
|
'\\' => match self.iter.next() {
|
|
Some('u') => {
|
|
self.buffer.push('u');
|
|
for _ in 0..4 {
|
|
if let Some(c) = self.iter.next() {
|
|
self.buffer.push(c);
|
|
} else {
|
|
return Some('\\');
|
|
}
|
|
}
|
|
if let Some(c) = u32::from_str_radix(&self.buffer[1..5], 16)
|
|
.ok()
|
|
.and_then(char::from_u32)
|
|
{
|
|
self.buffer.clear();
|
|
Some(c)
|
|
} else {
|
|
Some('\\')
|
|
}
|
|
}
|
|
Some('U') => {
|
|
self.buffer.push('U');
|
|
for _ in 0..8 {
|
|
if let Some(c) = self.iter.next() {
|
|
self.buffer.push(c);
|
|
} else {
|
|
return Some('\\');
|
|
}
|
|
}
|
|
if let Some(c) = u32::from_str_radix(&self.buffer[1..9], 16)
|
|
.ok()
|
|
.and_then(char::from_u32)
|
|
{
|
|
self.buffer.clear();
|
|
Some(c)
|
|
} else {
|
|
Some('\\')
|
|
}
|
|
}
|
|
Some(c) => {
|
|
self.buffer.push(c);
|
|
Some('\\')
|
|
}
|
|
None => Some('\\'),
|
|
},
|
|
c => Some(c),
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn unescape_characters<'a>(
|
|
input: &'a str,
|
|
characters: &'static [u8],
|
|
replacement: &'static StaticSliceMap<char, char>,
|
|
) -> Cow<'a, str> {
|
|
if needs_unescape_characters(input, characters) {
|
|
UnescapeCharsIterator::new(input, replacement).collect()
|
|
} else {
|
|
input.into()
|
|
}
|
|
}
|
|
|
|
fn needs_unescape_characters(input: &str, characters: &[u8]) -> bool {
|
|
let bytes = input.as_bytes();
|
|
for i in 1..bytes.len() {
|
|
if bytes[i - 1] == b'\\' && characters.contains(&bytes[i]) {
|
|
return true;
|
|
}
|
|
}
|
|
false
|
|
}
|
|
|
|
struct UnescapeCharsIterator<'a> {
|
|
iter: Chars<'a>,
|
|
buffer: Option<char>,
|
|
replacement: &'static StaticSliceMap<char, char>,
|
|
}
|
|
|
|
impl<'a> UnescapeCharsIterator<'a> {
|
|
fn new(string: &'a str, replacement: &'static StaticSliceMap<char, char>) -> Self {
|
|
Self {
|
|
iter: string.chars(),
|
|
buffer: None,
|
|
replacement,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'a> Iterator for UnescapeCharsIterator<'a> {
|
|
type Item = char;
|
|
|
|
fn next(&mut self) -> Option<char> {
|
|
if let Some(ch) = self.buffer {
|
|
self.buffer = None;
|
|
return Some(ch);
|
|
}
|
|
match self.iter.next()? {
|
|
'\\' => match self.iter.next() {
|
|
Some(ch) => match self.replacement.get(ch) {
|
|
Some(replace) => Some(replace),
|
|
None => {
|
|
self.buffer = Some(ch);
|
|
Some('\\')
|
|
}
|
|
},
|
|
None => Some('\\'),
|
|
},
|
|
c => Some(c),
|
|
}
|
|
}
|
|
}
|
|
|