Turtle: fixes parsing bug with escaped dot at the end of a local name

2 years ago · df040400c5
parent b08c201074
commit df040400c5
14 changed files with 52 additions and 27 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1033,7 +1033,7 @@ dependencies = [
 [[package]]
 name = "oxigraph"
-version = "0.4.0-alpha.2"
+version = "0.4.0-alpha.3-dev"
 dependencies = [
 "codspeed-criterion-compat",
 "digest",
@ -1063,7 +1063,7 @@ dependencies = [
 [[package]]
 name = "oxigraph-cli"
-version = "0.4.0-alpha.2"
+version = "0.4.0-alpha.3-dev"
 dependencies = [
 "anyhow",
 "assert_cmd",
@ -1082,7 +1082,7 @@ dependencies = [
 [[package]]
 name = "oxigraph-js"
-version = "0.4.0-alpha.2"
+version = "0.4.0-alpha.3-dev"
 dependencies = [
 "console_error_panic_hook",
 "js-sys",
@ -1129,7 +1129,7 @@ dependencies = [
 [[package]]
 name = "oxrdfio"
-version = "0.1.0-alpha.1"
+version = "0.1.0-alpha.2-dev"
 dependencies = [
 "oxrdf",
 "oxrdfxml",
@ -1150,7 +1150,7 @@ dependencies = [
 [[package]]
 name = "oxrocksdb-sys"
-version = "0.4.0-alpha.2"
+version = "0.4.0-alpha.3-dev"
 dependencies = [
 "bindgen",
 "cc",
@ -1167,7 +1167,7 @@ dependencies = [
 [[package]]
 name = "oxttl"
-version = "0.1.0-alpha.1"
+version = "0.1.0-alpha.2-dev"
 dependencies = [
 "memchr",
 "oxilangtag",
@ -1403,7 +1403,7 @@ dependencies = [
 [[package]]
 name = "pyoxigraph"
-version = "0.4.0-alpha.2"
+version = "0.4.0-alpha.3-dev"
 dependencies = [
 "oxigraph",
 "pyo3",
--- a/cli/Cargo.toml
+++ b/cli/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "oxigraph-cli"
-version = "0.4.0-alpha.2"
+version = "0.4.0-alpha.3-dev"
 authors = ["Tpt <thomas@pellissier-tanon.fr>"]
 license = "MIT OR Apache-2.0"
 readme = "README.md"
@ -29,7 +29,7 @@ rustls-webpki = ["oxigraph/http-client-rustls-webpki"]
 anyhow = "1.0.72"
 oxhttp = { version = "0.2.0-alpha.3", features = ["flate2"] }
 clap = { version = "4.0", features = ["derive"] }
-oxigraph = { version = "0.4.0-alpha.2", path = "../lib" }
+oxigraph = { version = "0.4.0-alpha.3-dev", path = "../lib" }
 rand = "0.8"
 url = "2.4"
 oxiri = "0.2.3-alpha.1"
--- a/js/Cargo.toml
+++ b/js/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "oxigraph-js"
-version = "0.4.0-alpha.2"
+version = "0.4.0-alpha.3-dev"
 authors = ["Tpt <thomas@pellissier-tanon.fr>"]
 license = "MIT OR Apache-2.0"
 readme = "README.md"
--- a/lib/Cargo.toml
+++ b/lib/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "oxigraph"
-version = "0.4.0-alpha.2"
+version = "0.4.0-alpha.3-dev"
 authors = ["Tpt <thomas@pellissier-tanon.fr>"]
 license = "MIT OR Apache-2.0"
 readme = "README.md"
@ -33,7 +33,7 @@ md-5 = "0.10"
 oxilangtag = "0.1"
 oxiri = "0.2.3-alpha.1"
 oxrdf = { version = "0.2.0-alpha.1", path = "oxrdf", features = ["rdf-star", "oxsdatatypes"] }
-oxrdfio = { version = "0.1.0-alpha.1", path = "oxrdfio", features = ["rdf-star"] }
+oxrdfio = { version = "0.1.0-alpha.2-dev", path = "oxrdfio", features = ["rdf-star"] }
 oxsdatatypes = { version = "0.2.0-alpha.1", path = "oxsdatatypes" }
 rand = "0.8"
 regex = "1.7"
@ -46,7 +46,7 @@ sparopt = { version = "0.1.0-alpha.1", path = "sparopt", features = ["rdf-star",
 [target.'cfg(not(target_family = "wasm"))'.dependencies]
 libc = "0.2.147"
-oxrocksdb-sys = { version = "0.4.0-alpha.2", path = "../oxrocksdb-sys" }
+oxrocksdb-sys = { version = "0.4.0-alpha.3-dev", path = "../oxrocksdb-sys" }
 oxhttp = { version = "0.2.0-alpha.3", optional = true }
 [target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies]
--- a/lib/oxrdfio/Cargo.toml
+++ b/lib/oxrdfio/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "oxrdfio"
-version = "0.1.0-alpha.1"
+version = "0.1.0-alpha.2-dev"
 authors = ["Tpt <thomas@pellissier-tanon.fr>"]
 license = "MIT OR Apache-2.0"
 readme = "README.md"
@ -21,7 +21,7 @@ rdf-star = ["oxrdf/rdf-star", "oxttl/rdf-star"]
 [dependencies]
 oxrdf = { version = "0.2.0-alpha.1", path = "../oxrdf" }
 oxrdfxml = { version = "0.1.0-alpha.1", path = "../oxrdfxml" }
-oxttl = { version = "0.1.0-alpha.1", path = "../oxttl" }
+oxttl = { version = "0.1.0-alpha.2-dev", path = "../oxttl" }
 tokio = { version = "1.29", optional = true, features = ["io-util"] }
 [dev-dependencies]
--- a/lib/oxttl/Cargo.toml
+++ b/lib/oxttl/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "oxttl"
-version = "0.1.0-alpha.1"
+version = "0.1.0-alpha.2-dev"
 authors = ["Tpt <thomas@pellissier-tanon.fr>"]
 license = "MIT OR Apache-2.0"
 readme = "README.md"
--- a/lib/oxttl/src/lexer.rs
+++ b/lib/oxttl/src/lexer.rs
@ -354,6 +354,7 @@ impl N3Lexer {
        let mut buffer = None; // Buffer if there are some escaped characters
        let mut position_that_is_already_in_buffer = 0;
        let mut might_be_invalid_iri = false;
        let mut ends_with_unescaped_dot = 0;
        loop {
            if let Some(r) = Self::recognize_unicode_char(&data[i..], i) {
                match r {
@ -369,6 +370,7 @@ impl N3Lexer {
                                ).into())));
                            }
                            i += 1;
                            ends_with_unescaped_dot = 0;
                        } else if c == '\\' {
                            i += 1;
                            let a = char::from(*data.get(i)?);
@ -416,6 +418,7 @@ impl N3Lexer {
                            buffer.push(a);
                            i += 1;
                            position_that_is_already_in_buffer = i;
                            ends_with_unescaped_dot = 0;
                        } else if i == 0 {
                            if !(Self::is_possible_pn_chars_u(c) || c == ':' || c.is_ascii_digit())
                            {
@ -427,13 +430,17 @@ impl N3Lexer {
                                        || c == ':';
                            }
                            i += consumed;
-                        } else if Self::is_possible_pn_chars(c) || c == ':' || c == '.' {
+                        } else if Self::is_possible_pn_chars(c) || c == ':' {
                            if !self.unchecked {
                                might_be_invalid_iri |=
                                    Self::is_possible_pn_chars_base_but_not_valid_iri(c)
                                        || c == ':';
                            }
                            i += consumed;
                            ends_with_unescaped_dot = 0;
                        } else if c == '.' {
                            i += consumed;
                            ends_with_unescaped_dot += 1;
                        } else {
                            let buffer = if let Some(mut buffer) = buffer {
                                buffer.push_str(
@ -445,22 +452,20 @@ impl N3Lexer {
                                        Err(e) => return Some((i, Err(e))),
                                    },
                                );
-                                // We do not include the last dot
+                                // We do not include the last dots
-                                while buffer.ends_with('.') {
+                                for _ in 0..ends_with_unescaped_dot {
                                    buffer.pop();
                                    i -= 1;
                                }
                                i -= ends_with_unescaped_dot;
                                Cow::Owned(buffer)
                            } else {
                                let mut data = match str_from_utf8(&data[..i], 0..i) {
                                    Ok(data) => data,
                                    Err(e) => return Some((i, Err(e))),
                                };
-                                // We do not include the last dot
+                                // We do not include the last dots
-                                while let Some(d) = data.strip_suffix('.') {
+                                data = &data[..data.len() - ends_with_unescaped_dot];
-                                    data = d;
+                                i -= ends_with_unescaped_dot;
                                    i -= 1;
                                }
                                Cow::Borrowed(data)
                            };
                            return Some((i, Ok((buffer, might_be_invalid_iri))));
--- a/oxrocksdb-sys/Cargo.toml
+++ b/oxrocksdb-sys/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "oxrocksdb-sys"
-version = "0.4.0-alpha.2"
+version = "0.4.0-alpha.3-dev"
 authors = ["Tpt <thomas@pellissier-tanon.fr>"]
 license = "GPL-2.0 OR Apache-2.0"
 repository = "https://github.com/oxigraph/oxigraph/tree/main/oxrocksdb-sys"
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "pyoxigraph"
-version = "0.4.0-alpha.2"
+version = "0.4.0-alpha.3-dev"
 authors = ["Tpt <thomas@pellissier-tanon.fr>"]
 license = "MIT OR Apache-2.0"
 readme = "README.md"
--- a/testsuite/oxigraph-tests/parser/escaped_trailing_dot.nq
+++ b/testsuite/oxigraph-tests/parser/escaped_trailing_dot.nq
@ -0,0 +1 @@
 <http://example.com/s> <http://example.com/p> <http://example.com/o.> .
--- a/testsuite/oxigraph-tests/parser/escaped_trailing_dot.nt
+++ b/testsuite/oxigraph-tests/parser/escaped_trailing_dot.nt
@ -0,0 +1 @@
 <http://example.com/s> <http://example.com/p> <http://example.com/o.> .
--- a/testsuite/oxigraph-tests/parser/escaped_trailing_dot.trig
+++ b/testsuite/oxigraph-tests/parser/escaped_trailing_dot.trig
@ -0,0 +1,2 @@
@prefix ex: <http://example.com/> .
 ex:s ex:p ex:o\. .
--- a/testsuite/oxigraph-tests/parser/escaped_trailing_dot.ttl
+++ b/testsuite/oxigraph-tests/parser/escaped_trailing_dot.ttl
@ -0,0 +1,2 @@
@prefix ex: <http://example.com/> .
 ex:s ex:p ex:o\. .
--- a/testsuite/oxigraph-tests/parser/manifest.ttl
+++ b/testsuite/oxigraph-tests/parser/manifest.ttl
@ -18,6 +18,8 @@
 		<#keyword_vs_prefix_ttl>
        <#keyword_vs_prefix_trig>
        <#at_keywords_as_lang_tag>
        <#escaped_trailing_dot_ttl>
        <#escaped_trailing_dot_trig>
 	) .
 <#no_end_line_jump>
@ -88,3 +90,15 @@
 	mf:name "usage of at keywords as language tags" ;
 	mf:action <at_keywords_as_lang_tag.ttl> ;
 	mf:result <at_keywords_as_lang_tag.nt> .
 <#escaped_trailing_dot_ttl>
 	rdf:type rdft:TestTurtleEval ;
 	mf:name "escaped dot at the end of a local name" ;
 	mf:action <escaped_trailing_dot.ttl> ;
 	mf:result <escaped_trailing_dot.nt> .
 <#escaped_trailing_dot_trig>
 	rdf:type rdft:TestTrigEval ;
 	mf:name "escaped dot at the end of a local name" ;
 	mf:action <escaped_trailing_dot.trig> ;
 	mf:result <escaped_trailing_dot.nq> .
		`@ -0,0 +1 @@`
							`<http://example.com/s> <http://example.com/p> <http://example.com/o.> .`
		`@ -0,0 +1,2 @@`
							`@prefix ex: <http://example.com/> .`
							`ex:s ex:p ex:o\. .`