Turtle: fixes parsing bug with escaped dot at the end of a local name

pull/736/head
Tpt 8 months ago committed by Thomas Tanon
parent b08c201074
commit df040400c5
  1. 14
      Cargo.lock
  2. 4
      cli/Cargo.toml
  3. 2
      js/Cargo.toml
  4. 6
      lib/Cargo.toml
  5. 4
      lib/oxrdfio/Cargo.toml
  6. 2
      lib/oxttl/Cargo.toml
  7. 23
      lib/oxttl/src/lexer.rs
  8. 2
      oxrocksdb-sys/Cargo.toml
  9. 2
      python/Cargo.toml
  10. 1
      testsuite/oxigraph-tests/parser/escaped_trailing_dot.nq
  11. 1
      testsuite/oxigraph-tests/parser/escaped_trailing_dot.nt
  12. 2
      testsuite/oxigraph-tests/parser/escaped_trailing_dot.trig
  13. 2
      testsuite/oxigraph-tests/parser/escaped_trailing_dot.ttl
  14. 14
      testsuite/oxigraph-tests/parser/manifest.ttl

14
Cargo.lock generated

@ -1033,7 +1033,7 @@ dependencies = [
[[package]]
name = "oxigraph"
version = "0.4.0-alpha.2"
version = "0.4.0-alpha.3-dev"
dependencies = [
"codspeed-criterion-compat",
"digest",
@ -1063,7 +1063,7 @@ dependencies = [
[[package]]
name = "oxigraph-cli"
version = "0.4.0-alpha.2"
version = "0.4.0-alpha.3-dev"
dependencies = [
"anyhow",
"assert_cmd",
@ -1082,7 +1082,7 @@ dependencies = [
[[package]]
name = "oxigraph-js"
version = "0.4.0-alpha.2"
version = "0.4.0-alpha.3-dev"
dependencies = [
"console_error_panic_hook",
"js-sys",
@ -1129,7 +1129,7 @@ dependencies = [
[[package]]
name = "oxrdfio"
version = "0.1.0-alpha.1"
version = "0.1.0-alpha.2-dev"
dependencies = [
"oxrdf",
"oxrdfxml",
@ -1150,7 +1150,7 @@ dependencies = [
[[package]]
name = "oxrocksdb-sys"
version = "0.4.0-alpha.2"
version = "0.4.0-alpha.3-dev"
dependencies = [
"bindgen",
"cc",
@ -1167,7 +1167,7 @@ dependencies = [
[[package]]
name = "oxttl"
version = "0.1.0-alpha.1"
version = "0.1.0-alpha.2-dev"
dependencies = [
"memchr",
"oxilangtag",
@ -1403,7 +1403,7 @@ dependencies = [
[[package]]
name = "pyoxigraph"
version = "0.4.0-alpha.2"
version = "0.4.0-alpha.3-dev"
dependencies = [
"oxigraph",
"pyo3",

@ -1,6 +1,6 @@
[package]
name = "oxigraph-cli"
version = "0.4.0-alpha.2"
version = "0.4.0-alpha.3-dev"
authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license = "MIT OR Apache-2.0"
readme = "README.md"
@ -29,7 +29,7 @@ rustls-webpki = ["oxigraph/http-client-rustls-webpki"]
anyhow = "1.0.72"
oxhttp = { version = "0.2.0-alpha.3", features = ["flate2"] }
clap = { version = "4.0", features = ["derive"] }
oxigraph = { version = "0.4.0-alpha.2", path = "../lib" }
oxigraph = { version = "0.4.0-alpha.3-dev", path = "../lib" }
rand = "0.8"
url = "2.4"
oxiri = "0.2.3-alpha.1"

@ -1,6 +1,6 @@
[package]
name = "oxigraph-js"
version = "0.4.0-alpha.2"
version = "0.4.0-alpha.3-dev"
authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license = "MIT OR Apache-2.0"
readme = "README.md"

@ -1,6 +1,6 @@
[package]
name = "oxigraph"
version = "0.4.0-alpha.2"
version = "0.4.0-alpha.3-dev"
authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license = "MIT OR Apache-2.0"
readme = "README.md"
@ -33,7 +33,7 @@ md-5 = "0.10"
oxilangtag = "0.1"
oxiri = "0.2.3-alpha.1"
oxrdf = { version = "0.2.0-alpha.1", path = "oxrdf", features = ["rdf-star", "oxsdatatypes"] }
oxrdfio = { version = "0.1.0-alpha.1", path = "oxrdfio", features = ["rdf-star"] }
oxrdfio = { version = "0.1.0-alpha.2-dev", path = "oxrdfio", features = ["rdf-star"] }
oxsdatatypes = { version = "0.2.0-alpha.1", path = "oxsdatatypes" }
rand = "0.8"
regex = "1.7"
@ -46,7 +46,7 @@ sparopt = { version = "0.1.0-alpha.1", path = "sparopt", features = ["rdf-star",
[target.'cfg(not(target_family = "wasm"))'.dependencies]
libc = "0.2.147"
oxrocksdb-sys = { version = "0.4.0-alpha.2", path = "../oxrocksdb-sys" }
oxrocksdb-sys = { version = "0.4.0-alpha.3-dev", path = "../oxrocksdb-sys" }
oxhttp = { version = "0.2.0-alpha.3", optional = true }
[target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies]

@ -1,6 +1,6 @@
[package]
name = "oxrdfio"
version = "0.1.0-alpha.1"
version = "0.1.0-alpha.2-dev"
authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license = "MIT OR Apache-2.0"
readme = "README.md"
@ -21,7 +21,7 @@ rdf-star = ["oxrdf/rdf-star", "oxttl/rdf-star"]
[dependencies]
oxrdf = { version = "0.2.0-alpha.1", path = "../oxrdf" }
oxrdfxml = { version = "0.1.0-alpha.1", path = "../oxrdfxml" }
oxttl = { version = "0.1.0-alpha.1", path = "../oxttl" }
oxttl = { version = "0.1.0-alpha.2-dev", path = "../oxttl" }
tokio = { version = "1.29", optional = true, features = ["io-util"] }
[dev-dependencies]

@ -1,6 +1,6 @@
[package]
name = "oxttl"
version = "0.1.0-alpha.1"
version = "0.1.0-alpha.2-dev"
authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license = "MIT OR Apache-2.0"
readme = "README.md"

@ -354,6 +354,7 @@ impl N3Lexer {
let mut buffer = None; // Buffer if there are some escaped characters
let mut position_that_is_already_in_buffer = 0;
let mut might_be_invalid_iri = false;
let mut ends_with_unescaped_dot = 0;
loop {
if let Some(r) = Self::recognize_unicode_char(&data[i..], i) {
match r {
@ -369,6 +370,7 @@ impl N3Lexer {
).into())));
}
i += 1;
ends_with_unescaped_dot = 0;
} else if c == '\\' {
i += 1;
let a = char::from(*data.get(i)?);
@ -416,6 +418,7 @@ impl N3Lexer {
buffer.push(a);
i += 1;
position_that_is_already_in_buffer = i;
ends_with_unescaped_dot = 0;
} else if i == 0 {
if !(Self::is_possible_pn_chars_u(c) || c == ':' || c.is_ascii_digit())
{
@ -427,13 +430,17 @@ impl N3Lexer {
|| c == ':';
}
i += consumed;
} else if Self::is_possible_pn_chars(c) || c == ':' || c == '.' {
} else if Self::is_possible_pn_chars(c) || c == ':' {
if !self.unchecked {
might_be_invalid_iri |=
Self::is_possible_pn_chars_base_but_not_valid_iri(c)
|| c == ':';
}
i += consumed;
ends_with_unescaped_dot = 0;
} else if c == '.' {
i += consumed;
ends_with_unescaped_dot += 1;
} else {
let buffer = if let Some(mut buffer) = buffer {
buffer.push_str(
@ -445,22 +452,20 @@ impl N3Lexer {
Err(e) => return Some((i, Err(e))),
},
);
// We do not include the last dot
while buffer.ends_with('.') {
// We do not include the last dots
for _ in 0..ends_with_unescaped_dot {
buffer.pop();
i -= 1;
}
i -= ends_with_unescaped_dot;
Cow::Owned(buffer)
} else {
let mut data = match str_from_utf8(&data[..i], 0..i) {
Ok(data) => data,
Err(e) => return Some((i, Err(e))),
};
// We do not include the last dot
while let Some(d) = data.strip_suffix('.') {
data = d;
i -= 1;
}
// We do not include the last dots
data = &data[..data.len() - ends_with_unescaped_dot];
i -= ends_with_unescaped_dot;
Cow::Borrowed(data)
};
return Some((i, Ok((buffer, might_be_invalid_iri))));

@ -1,6 +1,6 @@
[package]
name = "oxrocksdb-sys"
version = "0.4.0-alpha.2"
version = "0.4.0-alpha.3-dev"
authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license = "GPL-2.0 OR Apache-2.0"
repository = "https://github.com/oxigraph/oxigraph/tree/main/oxrocksdb-sys"

@ -1,6 +1,6 @@
[package]
name = "pyoxigraph"
version = "0.4.0-alpha.2"
version = "0.4.0-alpha.3-dev"
authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license = "MIT OR Apache-2.0"
readme = "README.md"

@ -0,0 +1 @@
<http://example.com/s> <http://example.com/p> <http://example.com/o.> .

@ -0,0 +1 @@
<http://example.com/s> <http://example.com/p> <http://example.com/o.> .

@ -0,0 +1,2 @@
@prefix ex: <http://example.com/> .
ex:s ex:p ex:o\. .

@ -0,0 +1,2 @@
@prefix ex: <http://example.com/> .
ex:s ex:p ex:o\. .

@ -18,6 +18,8 @@
<#keyword_vs_prefix_ttl>
<#keyword_vs_prefix_trig>
<#at_keywords_as_lang_tag>
<#escaped_trailing_dot_ttl>
<#escaped_trailing_dot_trig>
) .
<#no_end_line_jump>
@ -88,3 +90,15 @@
mf:name "usage of at keywords as language tags" ;
mf:action <at_keywords_as_lang_tag.ttl> ;
mf:result <at_keywords_as_lang_tag.nt> .
<#escaped_trailing_dot_ttl>
rdf:type rdft:TestTurtleEval ;
mf:name "escaped dot at the end of a local name" ;
mf:action <escaped_trailing_dot.ttl> ;
mf:result <escaped_trailing_dot.nt> .
<#escaped_trailing_dot_trig>
rdf:type rdft:TestTrigEval ;
mf:name "escaped dot at the end of a local name" ;
mf:action <escaped_trailing_dot.trig> ;
mf:result <escaped_trailing_dot.nq> .

Loading…
Cancel
Save