From ea300e9081cada562e714d1d2a218b88ce9d32a9 Mon Sep 17 00:00:00 2001 From: Yuri Astrakhan Date: Sat, 10 Feb 2024 12:44:20 -0500 Subject: [PATCH] Normalize unicode refs Just to keep them a bit more consistent. Note that there are a lot of code duplications here - but I do not know if they are worth consolidating, and/or any perf implications. --- lib/oxigraph/tests/store.rs | 2 +- lib/oxrdf/src/literal.rs | 4 ++-- lib/oxrdf/src/parser.rs | 4 ++-- lib/oxrdf/src/variable.rs | 2 +- lib/oxrdfxml/src/utils.rs | 10 +++++----- lib/spargebra/src/parser.rs | 6 +++--- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/lib/oxigraph/tests/store.rs b/lib/oxigraph/tests/store.rs index 48ff00f1..a3c5fe03 100644 --- a/lib/oxigraph/tests/store.rs +++ b/lib/oxigraph/tests/store.rs @@ -78,7 +78,7 @@ fn quads(graph_name: impl Into>) -> Vec> QuadRef::new( paris, name, - LiteralRef::new_language_tagged_literal_unchecked("la ville lumi\u{e8}re", "fr"), + LiteralRef::new_language_tagged_literal_unchecked("la ville lumi\u{E8}re", "fr"), graph_name, ), QuadRef::new(paris, country, france, graph_name), diff --git a/lib/oxrdf/src/literal.rs b/lib/oxrdf/src/literal.rs index 0872fab5..13ab07fe 100644 --- a/lib/oxrdf/src/literal.rs +++ b/lib/oxrdf/src/literal.rs @@ -622,11 +622,11 @@ pub fn print_quoted_str(string: &str, f: &mut impl Write) -> fmt::Result { '\u{08}' => f.write_str("\\b"), '\t' => f.write_str("\\t"), '\n' => f.write_str("\\n"), - '\u{0c}' => f.write_str("\\f"), + '\u{0C}' => f.write_str("\\f"), '\r' => f.write_str("\\r"), '"' => f.write_str("\\\""), '\\' => f.write_str("\\\\"), - '\0'..='\u{1f}' | '\u{7f}' => write!(f, "\\u{:04X}", u32::from(c)), + '\0'..='\u{1F}' | '\u{7F}' => write!(f, "\\u{:04X}", u32::from(c)), _ => f.write_char(c), }?; } diff --git a/lib/oxrdf/src/parser.rs b/lib/oxrdf/src/parser.rs index a531a86e..bcadd1ad 100644 --- a/lib/oxrdf/src/parser.rs +++ b/lib/oxrdf/src/parser.rs @@ -256,10 +256,10 @@ fn read_literal(s: &str) -> Result<(Literal, &str), TermParseError> { if let Some(c) = chars.next() { value.push(match c { 't' => '\t', - 'b' => '\u{8}', + 'b' => '\u{08}', 'n' => '\n', 'r' => '\r', - 'f' => '\u{C}', + 'f' => '\u{0C}', '"' => '"', '\'' => '\'', '\\' => '\\', diff --git a/lib/oxrdf/src/variable.rs b/lib/oxrdf/src/variable.rs index 36dcb3e4..c2d89ca9 100644 --- a/lib/oxrdf/src/variable.rs +++ b/lib/oxrdf/src/variable.rs @@ -187,7 +187,7 @@ fn validate_variable_identifier(id: &str) -> Result<(), VariableNameParseError> match c { '0'..='9' | '\u{00B7}' - | '\u{00300}'..='\u{036F}' + | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}' | '_' | 'A'..='Z' diff --git a/lib/oxrdfxml/src/utils.rs b/lib/oxrdfxml/src/utils.rs index b8fb2447..0483488d 100644 --- a/lib/oxrdfxml/src/utils.rs +++ b/lib/oxrdfxml/src/utils.rs @@ -5,11 +5,11 @@ pub fn is_name_start_char(c: char) -> bool { | 'A'..='Z' | '_' | 'a'..='z' - | '\u{C0}'..='\u{D6}' - | '\u{D8}'..='\u{F6}' - | '\u{F8}'..='\u{2FF}' - | '\u{370}'..='\u{37D}' - | '\u{37F}'..='\u{1FFF}' + | '\u{00C0}'..='\u{00D6}' + | '\u{00D8}'..='\u{00F6}' + | '\u{00F8}'..='\u{02FF}' + | '\u{0370}'..='\u{037D}' + | '\u{037F}'..='\u{1FFF}' | '\u{200C}'..='\u{200D}' | '\u{2070}'..='\u{218F}' | '\u{2C00}'..='\u{2FEF}' diff --git a/lib/spargebra/src/parser.rs b/lib/spargebra/src/parser.rs index 8e5d7445..38496387 100644 --- a/lib/spargebra/src/parser.rs +++ b/lib/spargebra/src/parser.rs @@ -2019,13 +2019,13 @@ parser! { rule STRING_LITERAL1() -> String = "'" l:$((STRING_LITERAL1_simple_char() / ECHAR() / UCHAR())*) "'" {? unescape_string(l) } - rule STRING_LITERAL1_simple_char() = !['\u{27}' | '\u{5C}' | '\u{A}' | '\u{D}'] [_] + rule STRING_LITERAL1_simple_char() = !['\u{27}' | '\u{5C}' | '\u{0A}' | '\u{0D}'] [_] rule STRING_LITERAL2() -> String = "\"" l:$((STRING_LITERAL2_simple_char() / ECHAR() / UCHAR())*) "\"" {? unescape_string(l) } - rule STRING_LITERAL2_simple_char() = !['\u{22}' | '\u{5C}' | '\u{A}' | '\u{D}'] [_] + rule STRING_LITERAL2_simple_char() = !['\u{22}' | '\u{5C}' | '\u{0A}' | '\u{0D}'] [_] rule STRING_LITERAL_LONG1() -> String = "'''" l:$(STRING_LITERAL_LONG1_inner()*) "'''" {? unescape_string(l) @@ -2045,7 +2045,7 @@ parser! { rule NIL() = "(" WS()* ")" - rule WS() = quiet! { ['\u{20}' | '\u{9}' | '\u{D}' | '\u{A}'] } + rule WS() = quiet! { ['\u{20}' | '\u{09}' | '\u{0D}' | '\u{0A}'] } rule ANON() = "[" WS()* "]"