Turtle: fixes parsing bug with escaped dot at the end of a local name

pull/736/head
Tpt 11 months ago committed by Thomas Tanon
parent b08c201074
commit df040400c5
  1. 14
      Cargo.lock
  2. 4
      cli/Cargo.toml
  3. 2
      js/Cargo.toml
  4. 6
      lib/Cargo.toml
  5. 4
      lib/oxrdfio/Cargo.toml
  6. 2
      lib/oxttl/Cargo.toml
  7. 23
      lib/oxttl/src/lexer.rs
  8. 2
      oxrocksdb-sys/Cargo.toml
  9. 2
      python/Cargo.toml
  10. 1
      testsuite/oxigraph-tests/parser/escaped_trailing_dot.nq
  11. 1
      testsuite/oxigraph-tests/parser/escaped_trailing_dot.nt
  12. 2
      testsuite/oxigraph-tests/parser/escaped_trailing_dot.trig
  13. 2
      testsuite/oxigraph-tests/parser/escaped_trailing_dot.ttl
  14. 14
      testsuite/oxigraph-tests/parser/manifest.ttl

14
Cargo.lock generated

@ -1033,7 +1033,7 @@ dependencies = [
[[package]] [[package]]
name = "oxigraph" name = "oxigraph"
version = "0.4.0-alpha.2" version = "0.4.0-alpha.3-dev"
dependencies = [ dependencies = [
"codspeed-criterion-compat", "codspeed-criterion-compat",
"digest", "digest",
@ -1063,7 +1063,7 @@ dependencies = [
[[package]] [[package]]
name = "oxigraph-cli" name = "oxigraph-cli"
version = "0.4.0-alpha.2" version = "0.4.0-alpha.3-dev"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"assert_cmd", "assert_cmd",
@ -1082,7 +1082,7 @@ dependencies = [
[[package]] [[package]]
name = "oxigraph-js" name = "oxigraph-js"
version = "0.4.0-alpha.2" version = "0.4.0-alpha.3-dev"
dependencies = [ dependencies = [
"console_error_panic_hook", "console_error_panic_hook",
"js-sys", "js-sys",
@ -1129,7 +1129,7 @@ dependencies = [
[[package]] [[package]]
name = "oxrdfio" name = "oxrdfio"
version = "0.1.0-alpha.1" version = "0.1.0-alpha.2-dev"
dependencies = [ dependencies = [
"oxrdf", "oxrdf",
"oxrdfxml", "oxrdfxml",
@ -1150,7 +1150,7 @@ dependencies = [
[[package]] [[package]]
name = "oxrocksdb-sys" name = "oxrocksdb-sys"
version = "0.4.0-alpha.2" version = "0.4.0-alpha.3-dev"
dependencies = [ dependencies = [
"bindgen", "bindgen",
"cc", "cc",
@ -1167,7 +1167,7 @@ dependencies = [
[[package]] [[package]]
name = "oxttl" name = "oxttl"
version = "0.1.0-alpha.1" version = "0.1.0-alpha.2-dev"
dependencies = [ dependencies = [
"memchr", "memchr",
"oxilangtag", "oxilangtag",
@ -1403,7 +1403,7 @@ dependencies = [
[[package]] [[package]]
name = "pyoxigraph" name = "pyoxigraph"
version = "0.4.0-alpha.2" version = "0.4.0-alpha.3-dev"
dependencies = [ dependencies = [
"oxigraph", "oxigraph",
"pyo3", "pyo3",

@ -1,6 +1,6 @@
[package] [package]
name = "oxigraph-cli" name = "oxigraph-cli"
version = "0.4.0-alpha.2" version = "0.4.0-alpha.3-dev"
authors = ["Tpt <thomas@pellissier-tanon.fr>"] authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license = "MIT OR Apache-2.0" license = "MIT OR Apache-2.0"
readme = "README.md" readme = "README.md"
@ -29,7 +29,7 @@ rustls-webpki = ["oxigraph/http-client-rustls-webpki"]
anyhow = "1.0.72" anyhow = "1.0.72"
oxhttp = { version = "0.2.0-alpha.3", features = ["flate2"] } oxhttp = { version = "0.2.0-alpha.3", features = ["flate2"] }
clap = { version = "4.0", features = ["derive"] } clap = { version = "4.0", features = ["derive"] }
oxigraph = { version = "0.4.0-alpha.2", path = "../lib" } oxigraph = { version = "0.4.0-alpha.3-dev", path = "../lib" }
rand = "0.8" rand = "0.8"
url = "2.4" url = "2.4"
oxiri = "0.2.3-alpha.1" oxiri = "0.2.3-alpha.1"

@ -1,6 +1,6 @@
[package] [package]
name = "oxigraph-js" name = "oxigraph-js"
version = "0.4.0-alpha.2" version = "0.4.0-alpha.3-dev"
authors = ["Tpt <thomas@pellissier-tanon.fr>"] authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license = "MIT OR Apache-2.0" license = "MIT OR Apache-2.0"
readme = "README.md" readme = "README.md"

@ -1,6 +1,6 @@
[package] [package]
name = "oxigraph" name = "oxigraph"
version = "0.4.0-alpha.2" version = "0.4.0-alpha.3-dev"
authors = ["Tpt <thomas@pellissier-tanon.fr>"] authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license = "MIT OR Apache-2.0" license = "MIT OR Apache-2.0"
readme = "README.md" readme = "README.md"
@ -33,7 +33,7 @@ md-5 = "0.10"
oxilangtag = "0.1" oxilangtag = "0.1"
oxiri = "0.2.3-alpha.1" oxiri = "0.2.3-alpha.1"
oxrdf = { version = "0.2.0-alpha.1", path = "oxrdf", features = ["rdf-star", "oxsdatatypes"] } oxrdf = { version = "0.2.0-alpha.1", path = "oxrdf", features = ["rdf-star", "oxsdatatypes"] }
oxrdfio = { version = "0.1.0-alpha.1", path = "oxrdfio", features = ["rdf-star"] } oxrdfio = { version = "0.1.0-alpha.2-dev", path = "oxrdfio", features = ["rdf-star"] }
oxsdatatypes = { version = "0.2.0-alpha.1", path = "oxsdatatypes" } oxsdatatypes = { version = "0.2.0-alpha.1", path = "oxsdatatypes" }
rand = "0.8" rand = "0.8"
regex = "1.7" regex = "1.7"
@ -46,7 +46,7 @@ sparopt = { version = "0.1.0-alpha.1", path = "sparopt", features = ["rdf-star",
[target.'cfg(not(target_family = "wasm"))'.dependencies] [target.'cfg(not(target_family = "wasm"))'.dependencies]
libc = "0.2.147" libc = "0.2.147"
oxrocksdb-sys = { version = "0.4.0-alpha.2", path = "../oxrocksdb-sys" } oxrocksdb-sys = { version = "0.4.0-alpha.3-dev", path = "../oxrocksdb-sys" }
oxhttp = { version = "0.2.0-alpha.3", optional = true } oxhttp = { version = "0.2.0-alpha.3", optional = true }
[target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies] [target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies]

@ -1,6 +1,6 @@
[package] [package]
name = "oxrdfio" name = "oxrdfio"
version = "0.1.0-alpha.1" version = "0.1.0-alpha.2-dev"
authors = ["Tpt <thomas@pellissier-tanon.fr>"] authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license = "MIT OR Apache-2.0" license = "MIT OR Apache-2.0"
readme = "README.md" readme = "README.md"
@ -21,7 +21,7 @@ rdf-star = ["oxrdf/rdf-star", "oxttl/rdf-star"]
[dependencies] [dependencies]
oxrdf = { version = "0.2.0-alpha.1", path = "../oxrdf" } oxrdf = { version = "0.2.0-alpha.1", path = "../oxrdf" }
oxrdfxml = { version = "0.1.0-alpha.1", path = "../oxrdfxml" } oxrdfxml = { version = "0.1.0-alpha.1", path = "../oxrdfxml" }
oxttl = { version = "0.1.0-alpha.1", path = "../oxttl" } oxttl = { version = "0.1.0-alpha.2-dev", path = "../oxttl" }
tokio = { version = "1.29", optional = true, features = ["io-util"] } tokio = { version = "1.29", optional = true, features = ["io-util"] }
[dev-dependencies] [dev-dependencies]

@ -1,6 +1,6 @@
[package] [package]
name = "oxttl" name = "oxttl"
version = "0.1.0-alpha.1" version = "0.1.0-alpha.2-dev"
authors = ["Tpt <thomas@pellissier-tanon.fr>"] authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license = "MIT OR Apache-2.0" license = "MIT OR Apache-2.0"
readme = "README.md" readme = "README.md"

@ -354,6 +354,7 @@ impl N3Lexer {
let mut buffer = None; // Buffer if there are some escaped characters let mut buffer = None; // Buffer if there are some escaped characters
let mut position_that_is_already_in_buffer = 0; let mut position_that_is_already_in_buffer = 0;
let mut might_be_invalid_iri = false; let mut might_be_invalid_iri = false;
let mut ends_with_unescaped_dot = 0;
loop { loop {
if let Some(r) = Self::recognize_unicode_char(&data[i..], i) { if let Some(r) = Self::recognize_unicode_char(&data[i..], i) {
match r { match r {
@ -369,6 +370,7 @@ impl N3Lexer {
).into()))); ).into())));
} }
i += 1; i += 1;
ends_with_unescaped_dot = 0;
} else if c == '\\' { } else if c == '\\' {
i += 1; i += 1;
let a = char::from(*data.get(i)?); let a = char::from(*data.get(i)?);
@ -416,6 +418,7 @@ impl N3Lexer {
buffer.push(a); buffer.push(a);
i += 1; i += 1;
position_that_is_already_in_buffer = i; position_that_is_already_in_buffer = i;
ends_with_unescaped_dot = 0;
} else if i == 0 { } else if i == 0 {
if !(Self::is_possible_pn_chars_u(c) || c == ':' || c.is_ascii_digit()) if !(Self::is_possible_pn_chars_u(c) || c == ':' || c.is_ascii_digit())
{ {
@ -427,13 +430,17 @@ impl N3Lexer {
|| c == ':'; || c == ':';
} }
i += consumed; i += consumed;
} else if Self::is_possible_pn_chars(c) || c == ':' || c == '.' { } else if Self::is_possible_pn_chars(c) || c == ':' {
if !self.unchecked { if !self.unchecked {
might_be_invalid_iri |= might_be_invalid_iri |=
Self::is_possible_pn_chars_base_but_not_valid_iri(c) Self::is_possible_pn_chars_base_but_not_valid_iri(c)
|| c == ':'; || c == ':';
} }
i += consumed; i += consumed;
ends_with_unescaped_dot = 0;
} else if c == '.' {
i += consumed;
ends_with_unescaped_dot += 1;
} else { } else {
let buffer = if let Some(mut buffer) = buffer { let buffer = if let Some(mut buffer) = buffer {
buffer.push_str( buffer.push_str(
@ -445,22 +452,20 @@ impl N3Lexer {
Err(e) => return Some((i, Err(e))), Err(e) => return Some((i, Err(e))),
}, },
); );
// We do not include the last dot // We do not include the last dots
while buffer.ends_with('.') { for _ in 0..ends_with_unescaped_dot {
buffer.pop(); buffer.pop();
i -= 1;
} }
i -= ends_with_unescaped_dot;
Cow::Owned(buffer) Cow::Owned(buffer)
} else { } else {
let mut data = match str_from_utf8(&data[..i], 0..i) { let mut data = match str_from_utf8(&data[..i], 0..i) {
Ok(data) => data, Ok(data) => data,
Err(e) => return Some((i, Err(e))), Err(e) => return Some((i, Err(e))),
}; };
// We do not include the last dot // We do not include the last dots
while let Some(d) = data.strip_suffix('.') { data = &data[..data.len() - ends_with_unescaped_dot];
data = d; i -= ends_with_unescaped_dot;
i -= 1;
}
Cow::Borrowed(data) Cow::Borrowed(data)
}; };
return Some((i, Ok((buffer, might_be_invalid_iri)))); return Some((i, Ok((buffer, might_be_invalid_iri))));

@ -1,6 +1,6 @@
[package] [package]
name = "oxrocksdb-sys" name = "oxrocksdb-sys"
version = "0.4.0-alpha.2" version = "0.4.0-alpha.3-dev"
authors = ["Tpt <thomas@pellissier-tanon.fr>"] authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license = "GPL-2.0 OR Apache-2.0" license = "GPL-2.0 OR Apache-2.0"
repository = "https://github.com/oxigraph/oxigraph/tree/main/oxrocksdb-sys" repository = "https://github.com/oxigraph/oxigraph/tree/main/oxrocksdb-sys"

@ -1,6 +1,6 @@
[package] [package]
name = "pyoxigraph" name = "pyoxigraph"
version = "0.4.0-alpha.2" version = "0.4.0-alpha.3-dev"
authors = ["Tpt <thomas@pellissier-tanon.fr>"] authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license = "MIT OR Apache-2.0" license = "MIT OR Apache-2.0"
readme = "README.md" readme = "README.md"

@ -0,0 +1 @@
<http://example.com/s> <http://example.com/p> <http://example.com/o.> .

@ -0,0 +1 @@
<http://example.com/s> <http://example.com/p> <http://example.com/o.> .

@ -0,0 +1,2 @@
@prefix ex: <http://example.com/> .
ex:s ex:p ex:o\. .

@ -0,0 +1,2 @@
@prefix ex: <http://example.com/> .
ex:s ex:p ex:o\. .

@ -18,6 +18,8 @@
<#keyword_vs_prefix_ttl> <#keyword_vs_prefix_ttl>
<#keyword_vs_prefix_trig> <#keyword_vs_prefix_trig>
<#at_keywords_as_lang_tag> <#at_keywords_as_lang_tag>
<#escaped_trailing_dot_ttl>
<#escaped_trailing_dot_trig>
) . ) .
<#no_end_line_jump> <#no_end_line_jump>
@ -88,3 +90,15 @@
mf:name "usage of at keywords as language tags" ; mf:name "usage of at keywords as language tags" ;
mf:action <at_keywords_as_lang_tag.ttl> ; mf:action <at_keywords_as_lang_tag.ttl> ;
mf:result <at_keywords_as_lang_tag.nt> . mf:result <at_keywords_as_lang_tag.nt> .
<#escaped_trailing_dot_ttl>
rdf:type rdft:TestTurtleEval ;
mf:name "escaped dot at the end of a local name" ;
mf:action <escaped_trailing_dot.ttl> ;
mf:result <escaped_trailing_dot.nt> .
<#escaped_trailing_dot_trig>
rdf:type rdft:TestTrigEval ;
mf:name "escaped dot at the end of a local name" ;
mf:action <escaped_trailing_dot.trig> ;
mf:result <escaped_trailing_dot.nq> .

Loading…
Cancel
Save