Normalize unicode refs

Just to keep them a bit more consistent.

Note that there are a lot of code duplications here - but I do not know if they are worth consolidating, and/or any perf implications.
pull/783/head
Yuri Astrakhan 11 months ago committed by Thomas Tanon
parent a078b12508
commit ea300e9081
  1. 2
      lib/oxigraph/tests/store.rs
  2. 4
      lib/oxrdf/src/literal.rs
  3. 4
      lib/oxrdf/src/parser.rs
  4. 2
      lib/oxrdf/src/variable.rs
  5. 10
      lib/oxrdfxml/src/utils.rs
  6. 6
      lib/spargebra/src/parser.rs

@ -78,7 +78,7 @@ fn quads(graph_name: impl Into<GraphNameRef<'static>>) -> Vec<QuadRef<'static>>
QuadRef::new( QuadRef::new(
paris, paris,
name, name,
LiteralRef::new_language_tagged_literal_unchecked("la ville lumi\u{e8}re", "fr"), LiteralRef::new_language_tagged_literal_unchecked("la ville lumi\u{E8}re", "fr"),
graph_name, graph_name,
), ),
QuadRef::new(paris, country, france, graph_name), QuadRef::new(paris, country, france, graph_name),

@ -622,11 +622,11 @@ pub fn print_quoted_str(string: &str, f: &mut impl Write) -> fmt::Result {
'\u{08}' => f.write_str("\\b"), '\u{08}' => f.write_str("\\b"),
'\t' => f.write_str("\\t"), '\t' => f.write_str("\\t"),
'\n' => f.write_str("\\n"), '\n' => f.write_str("\\n"),
'\u{0c}' => f.write_str("\\f"), '\u{0C}' => f.write_str("\\f"),
'\r' => f.write_str("\\r"), '\r' => f.write_str("\\r"),
'"' => f.write_str("\\\""), '"' => f.write_str("\\\""),
'\\' => f.write_str("\\\\"), '\\' => f.write_str("\\\\"),
'\0'..='\u{1f}' | '\u{7f}' => write!(f, "\\u{:04X}", u32::from(c)), '\0'..='\u{1F}' | '\u{7F}' => write!(f, "\\u{:04X}", u32::from(c)),
_ => f.write_char(c), _ => f.write_char(c),
}?; }?;
} }

@ -256,10 +256,10 @@ fn read_literal(s: &str) -> Result<(Literal, &str), TermParseError> {
if let Some(c) = chars.next() { if let Some(c) = chars.next() {
value.push(match c { value.push(match c {
't' => '\t', 't' => '\t',
'b' => '\u{8}', 'b' => '\u{08}',
'n' => '\n', 'n' => '\n',
'r' => '\r', 'r' => '\r',
'f' => '\u{C}', 'f' => '\u{0C}',
'"' => '"', '"' => '"',
'\'' => '\'', '\'' => '\'',
'\\' => '\\', '\\' => '\\',

@ -187,7 +187,7 @@ fn validate_variable_identifier(id: &str) -> Result<(), VariableNameParseError>
match c { match c {
'0'..='9' '0'..='9'
| '\u{00B7}' | '\u{00B7}'
| '\u{00300}'..='\u{036F}' | '\u{0300}'..='\u{036F}'
| '\u{203F}'..='\u{2040}' | '\u{203F}'..='\u{2040}'
| '_' | '_'
| 'A'..='Z' | 'A'..='Z'

@ -5,11 +5,11 @@ pub fn is_name_start_char(c: char) -> bool {
| 'A'..='Z' | 'A'..='Z'
| '_' | '_'
| 'a'..='z' | 'a'..='z'
| '\u{C0}'..='\u{D6}' | '\u{00C0}'..='\u{00D6}'
| '\u{D8}'..='\u{F6}' | '\u{00D8}'..='\u{00F6}'
| '\u{F8}'..='\u{2FF}' | '\u{00F8}'..='\u{02FF}'
| '\u{370}'..='\u{37D}' | '\u{0370}'..='\u{037D}'
| '\u{37F}'..='\u{1FFF}' | '\u{037F}'..='\u{1FFF}'
| '\u{200C}'..='\u{200D}' | '\u{200C}'..='\u{200D}'
| '\u{2070}'..='\u{218F}' | '\u{2070}'..='\u{218F}'
| '\u{2C00}'..='\u{2FEF}' | '\u{2C00}'..='\u{2FEF}'

@ -2019,13 +2019,13 @@ parser! {
rule STRING_LITERAL1() -> String = "'" l:$((STRING_LITERAL1_simple_char() / ECHAR() / UCHAR())*) "'" {? rule STRING_LITERAL1() -> String = "'" l:$((STRING_LITERAL1_simple_char() / ECHAR() / UCHAR())*) "'" {?
unescape_string(l) unescape_string(l)
} }
rule STRING_LITERAL1_simple_char() = !['\u{27}' | '\u{5C}' | '\u{A}' | '\u{D}'] [_] rule STRING_LITERAL1_simple_char() = !['\u{27}' | '\u{5C}' | '\u{0A}' | '\u{0D}'] [_]
rule STRING_LITERAL2() -> String = "\"" l:$((STRING_LITERAL2_simple_char() / ECHAR() / UCHAR())*) "\"" {? rule STRING_LITERAL2() -> String = "\"" l:$((STRING_LITERAL2_simple_char() / ECHAR() / UCHAR())*) "\"" {?
unescape_string(l) unescape_string(l)
} }
rule STRING_LITERAL2_simple_char() = !['\u{22}' | '\u{5C}' | '\u{A}' | '\u{D}'] [_] rule STRING_LITERAL2_simple_char() = !['\u{22}' | '\u{5C}' | '\u{0A}' | '\u{0D}'] [_]
rule STRING_LITERAL_LONG1() -> String = "'''" l:$(STRING_LITERAL_LONG1_inner()*) "'''" {? rule STRING_LITERAL_LONG1() -> String = "'''" l:$(STRING_LITERAL_LONG1_inner()*) "'''" {?
unescape_string(l) unescape_string(l)
@ -2045,7 +2045,7 @@ parser! {
rule NIL() = "(" WS()* ")" rule NIL() = "(" WS()* ")"
rule WS() = quiet! { ['\u{20}' | '\u{9}' | '\u{D}' | '\u{A}'] } rule WS() = quiet! { ['\u{20}' | '\u{09}' | '\u{0D}' | '\u{0A}'] }
rule ANON() = "[" WS()* "]" rule ANON() = "[" WS()* "]"

Loading…
Cancel
Save