From 7d6c5190c1be3705eadc3ce85b7b894355a51ae6 Mon Sep 17 00:00:00 2001 From: Niko PLP <niko@nextgraph.org> Date: Mon, 20 May 2024 09:01:46 +0300 Subject: [PATCH] remove all dependecies to oxigraph crates, and add dep to ng-rocksdb --- Cargo.lock | 126 +- ng-oxigraph/Cargo.toml | 25 +- ng-oxigraph/src/lib.rs | 22 +- ng-oxigraph/src/{ => oxigraph}/io/format.rs | 2 +- ng-oxigraph/src/{ => oxigraph}/io/mod.rs | 2 +- ng-oxigraph/src/{ => oxigraph}/io/read.rs | 6 +- ng-oxigraph/src/{ => oxigraph}/io/write.rs | 6 +- ng-oxigraph/src/oxigraph/mod.rs | 5 + ng-oxigraph/src/{ => oxigraph}/model.rs | 4 +- .../src/{ => oxigraph}/sparql/algebra.rs | 9 +- .../src/{ => oxigraph}/sparql/dataset.rs | 12 +- .../src/{ => oxigraph}/sparql/error.rs | 10 +- ng-oxigraph/src/{ => oxigraph}/sparql/eval.rs | 43 +- .../src/{ => oxigraph}/sparql/http/dummy.rs | 0 .../src/{ => oxigraph}/sparql/http/mod.rs | 0 .../src/{ => oxigraph}/sparql/http/simple.rs | 0 ng-oxigraph/src/{ => oxigraph}/sparql/mod.rs | 31 +- .../src/{ => oxigraph}/sparql/model.rs | 11 +- .../src/{ => oxigraph}/sparql/results.rs | 2 +- .../src/{ => oxigraph}/sparql/service.rs | 12 +- .../src/{ => oxigraph}/sparql/update.rs | 29 +- .../storage/backend/fallback.rs | 4 +- .../src/{ => oxigraph}/storage/backend/mod.rs | 0 .../storage/backend/oxi_rocksdb.rs | 4 +- .../{ => oxigraph}/storage/binary_encoder.rs | 16 +- .../src/{ => oxigraph}/storage/error.rs | 10 +- ng-oxigraph/src/{ => oxigraph}/storage/mod.rs | 20 +- .../{ => oxigraph}/storage/numeric_encoder.rs | 8 +- .../{ => oxigraph}/storage/small_string.rs | 0 ng-oxigraph/src/{ => oxigraph}/store.rs | 18 +- ng-oxigraph/src/oxrdf/README.md | 51 + ng-oxigraph/src/oxrdf/blank_node.rs | 403 +++ ng-oxigraph/src/oxrdf/dataset.rs | 1641 +++++++++ ng-oxigraph/src/oxrdf/graph.rs | 284 ++ ng-oxigraph/src/oxrdf/interning.rs | 535 +++ ng-oxigraph/src/oxrdf/literal.rs | 669 ++++ ng-oxigraph/src/oxrdf/mod.rs | 24 + ng-oxigraph/src/oxrdf/named_node.rs | 237 ++ ng-oxigraph/src/oxrdf/parser.rs | 469 +++ ng-oxigraph/src/oxrdf/triple.rs | 1368 +++++++ ng-oxigraph/src/oxrdf/variable.rs | 216 ++ ng-oxigraph/src/oxrdf/vocab.rs | 242 ++ ng-oxigraph/src/oxrdfio/README.md | 67 + ng-oxigraph/src/oxrdfio/error.rs | 124 + ng-oxigraph/src/oxrdfio/format.rs | 216 ++ ng-oxigraph/src/oxrdfio/mod.rs | 9 + ng-oxigraph/src/oxrdfio/parser.rs | 795 ++++ ng-oxigraph/src/oxrdfio/serializer.rs | 412 +++ ng-oxigraph/src/oxrdfxml/README.md | 56 + ng-oxigraph/src/oxrdfxml/error.rs | 89 + ng-oxigraph/src/oxrdfxml/mod.rs | 8 + ng-oxigraph/src/oxrdfxml/parser.rs | 1237 +++++++ ng-oxigraph/src/oxrdfxml/serializer.rs | 461 +++ ng-oxigraph/src/oxrdfxml/utils.rs | 26 + ng-oxigraph/src/oxsdatatypes/README.md | 65 + ng-oxigraph/src/oxsdatatypes/boolean.rs | 134 + ng-oxigraph/src/oxsdatatypes/date_time.rs | 3187 +++++++++++++++++ ng-oxigraph/src/oxsdatatypes/decimal.rs | 1099 ++++++ ng-oxigraph/src/oxsdatatypes/double.rs | 326 ++ ng-oxigraph/src/oxsdatatypes/duration.rs | 1249 +++++++ ng-oxigraph/src/oxsdatatypes/float.rs | 310 ++ ng-oxigraph/src/oxsdatatypes/integer.rs | 400 +++ ng-oxigraph/src/oxsdatatypes/mod.rs | 21 + ng-oxigraph/src/oxttl/README.md | 54 + ng-oxigraph/src/oxttl/lexer.rs | 977 +++++ ng-oxigraph/src/oxttl/line_formats.rs | 314 ++ ng-oxigraph/src/oxttl/mod.rs | 19 + ng-oxigraph/src/oxttl/n3.rs | 1326 +++++++ ng-oxigraph/src/oxttl/nquads.rs | 564 +++ ng-oxigraph/src/oxttl/ntriples.rs | 580 +++ ng-oxigraph/src/oxttl/terse.rs | 1072 ++++++ ng-oxigraph/src/oxttl/toolkit/error.rs | 97 + ng-oxigraph/src/oxttl/toolkit/lexer.rs | 432 +++ ng-oxigraph/src/oxttl/toolkit/mod.rs | 13 + ng-oxigraph/src/oxttl/toolkit/parser.rs | 183 + ng-oxigraph/src/oxttl/trig.rs | 1252 +++++++ ng-oxigraph/src/oxttl/turtle.rs | 878 +++++ ng-oxigraph/src/sparesults/README.md | 72 + ng-oxigraph/src/sparesults/csv.rs | 948 +++++ ng-oxigraph/src/sparesults/error.rs | 157 + ng-oxigraph/src/sparesults/format.rs | 176 + ng-oxigraph/src/sparesults/json.rs | 1101 ++++++ ng-oxigraph/src/sparesults/mod.rs | 16 + ng-oxigraph/src/sparesults/parser.rs | 460 +++ ng-oxigraph/src/sparesults/serializer.rs | 427 +++ ng-oxigraph/src/sparesults/solution.rs | 340 ++ ng-oxigraph/src/sparesults/xml.rs | 833 +++++ ng-oxigraph/src/spargebra/README.md | 46 + ng-oxigraph/src/spargebra/algebra.rs | 1419 ++++++++ ng-oxigraph/src/spargebra/mod.rs | 9 + ng-oxigraph/src/spargebra/parser.rs | 2086 +++++++++++ ng-oxigraph/src/spargebra/query.rs | 300 ++ ng-oxigraph/src/spargebra/term.rs | 1012 ++++++ ng-oxigraph/src/spargebra/update.rs | 344 ++ ng-oxigraph/src/sparopt/README.md | 33 + ng-oxigraph/src/sparopt/algebra.rs | 1662 +++++++++ ng-oxigraph/src/sparopt/mod.rs | 5 + ng-oxigraph/src/sparopt/optimizer.rs | 1082 ++++++ ng-oxigraph/src/sparopt/type_inference.rs | 462 +++ ng-oxigraph/tests/store.rs | 8 +- ng-storage-rocksdb/Cargo.toml | 2 +- ng-storage-rocksdb/src/block_storage.rs | 2 +- ng-storage-rocksdb/src/kcv_storage.rs | 18 +- ng-verifier/Cargo.toml | 2 +- ng-verifier/src/request_processor.rs | 2 +- ng-verifier/src/verifier.rs | 17 +- ngaccount/web/src/routes/Create.svelte | 2 +- 107 files changed, 37403 insertions(+), 268 deletions(-) rename ng-oxigraph/src/{ => oxigraph}/io/format.rs (99%) rename ng-oxigraph/src/{ => oxigraph}/io/mod.rs (98%) rename ng-oxigraph/src/{ => oxigraph}/io/read.rs (97%) rename ng-oxigraph/src/{ => oxigraph}/io/write.rs (97%) create mode 100644 ng-oxigraph/src/oxigraph/mod.rs rename ng-oxigraph/src/{ => oxigraph}/model.rs (88%) rename ng-oxigraph/src/{ => oxigraph}/sparql/algebra.rs (98%) rename ng-oxigraph/src/{ => oxigraph}/sparql/dataset.rs (95%) rename ng-oxigraph/src/{ => oxigraph}/sparql/error.rs (92%) rename ng-oxigraph/src/{ => oxigraph}/sparql/eval.rs (99%) rename ng-oxigraph/src/{ => oxigraph}/sparql/http/dummy.rs (100%) rename ng-oxigraph/src/{ => oxigraph}/sparql/http/mod.rs (100%) rename ng-oxigraph/src/{ => oxigraph}/sparql/http/simple.rs (100%) rename ng-oxigraph/src/{ => oxigraph}/sparql/mod.rs (93%) rename ng-oxigraph/src/{ => oxigraph}/sparql/model.rs (98%) rename ng-oxigraph/src/{ => oxigraph}/sparql/results.rs (98%) rename ng-oxigraph/src/{ => oxigraph}/sparql/service.rs (92%) rename ng-oxigraph/src/{ => oxigraph}/sparql/update.rs (96%) rename ng-oxigraph/src/{ => oxigraph}/storage/backend/fallback.rs (99%) rename ng-oxigraph/src/{ => oxigraph}/storage/backend/mod.rs (100%) rename ng-oxigraph/src/{ => oxigraph}/storage/backend/oxi_rocksdb.rs (99%) rename ng-oxigraph/src/{ => oxigraph}/storage/binary_encoder.rs (98%) rename ng-oxigraph/src/{ => oxigraph}/storage/error.rs (95%) rename ng-oxigraph/src/{ => oxigraph}/storage/mod.rs (98%) rename ng-oxigraph/src/{ => oxigraph}/storage/numeric_encoder.rs (99%) rename ng-oxigraph/src/{ => oxigraph}/storage/small_string.rs (100%) rename ng-oxigraph/src/{ => oxigraph}/store.rs (99%) create mode 100644 ng-oxigraph/src/oxrdf/README.md create mode 100644 ng-oxigraph/src/oxrdf/blank_node.rs create mode 100644 ng-oxigraph/src/oxrdf/dataset.rs create mode 100644 ng-oxigraph/src/oxrdf/graph.rs create mode 100644 ng-oxigraph/src/oxrdf/interning.rs create mode 100644 ng-oxigraph/src/oxrdf/literal.rs create mode 100644 ng-oxigraph/src/oxrdf/mod.rs create mode 100644 ng-oxigraph/src/oxrdf/named_node.rs create mode 100644 ng-oxigraph/src/oxrdf/parser.rs create mode 100644 ng-oxigraph/src/oxrdf/triple.rs create mode 100644 ng-oxigraph/src/oxrdf/variable.rs create mode 100644 ng-oxigraph/src/oxrdf/vocab.rs create mode 100644 ng-oxigraph/src/oxrdfio/README.md create mode 100644 ng-oxigraph/src/oxrdfio/error.rs create mode 100644 ng-oxigraph/src/oxrdfio/format.rs create mode 100644 ng-oxigraph/src/oxrdfio/mod.rs create mode 100644 ng-oxigraph/src/oxrdfio/parser.rs create mode 100644 ng-oxigraph/src/oxrdfio/serializer.rs create mode 100644 ng-oxigraph/src/oxrdfxml/README.md create mode 100644 ng-oxigraph/src/oxrdfxml/error.rs create mode 100644 ng-oxigraph/src/oxrdfxml/mod.rs create mode 100644 ng-oxigraph/src/oxrdfxml/parser.rs create mode 100644 ng-oxigraph/src/oxrdfxml/serializer.rs create mode 100644 ng-oxigraph/src/oxrdfxml/utils.rs create mode 100644 ng-oxigraph/src/oxsdatatypes/README.md create mode 100644 ng-oxigraph/src/oxsdatatypes/boolean.rs create mode 100644 ng-oxigraph/src/oxsdatatypes/date_time.rs create mode 100644 ng-oxigraph/src/oxsdatatypes/decimal.rs create mode 100644 ng-oxigraph/src/oxsdatatypes/double.rs create mode 100644 ng-oxigraph/src/oxsdatatypes/duration.rs create mode 100644 ng-oxigraph/src/oxsdatatypes/float.rs create mode 100644 ng-oxigraph/src/oxsdatatypes/integer.rs create mode 100644 ng-oxigraph/src/oxsdatatypes/mod.rs create mode 100644 ng-oxigraph/src/oxttl/README.md create mode 100644 ng-oxigraph/src/oxttl/lexer.rs create mode 100644 ng-oxigraph/src/oxttl/line_formats.rs create mode 100644 ng-oxigraph/src/oxttl/mod.rs create mode 100644 ng-oxigraph/src/oxttl/n3.rs create mode 100644 ng-oxigraph/src/oxttl/nquads.rs create mode 100644 ng-oxigraph/src/oxttl/ntriples.rs create mode 100644 ng-oxigraph/src/oxttl/terse.rs create mode 100644 ng-oxigraph/src/oxttl/toolkit/error.rs create mode 100644 ng-oxigraph/src/oxttl/toolkit/lexer.rs create mode 100644 ng-oxigraph/src/oxttl/toolkit/mod.rs create mode 100644 ng-oxigraph/src/oxttl/toolkit/parser.rs create mode 100644 ng-oxigraph/src/oxttl/trig.rs create mode 100644 ng-oxigraph/src/oxttl/turtle.rs create mode 100644 ng-oxigraph/src/sparesults/README.md create mode 100644 ng-oxigraph/src/sparesults/csv.rs create mode 100644 ng-oxigraph/src/sparesults/error.rs create mode 100644 ng-oxigraph/src/sparesults/format.rs create mode 100644 ng-oxigraph/src/sparesults/json.rs create mode 100644 ng-oxigraph/src/sparesults/mod.rs create mode 100644 ng-oxigraph/src/sparesults/parser.rs create mode 100644 ng-oxigraph/src/sparesults/serializer.rs create mode 100644 ng-oxigraph/src/sparesults/solution.rs create mode 100644 ng-oxigraph/src/sparesults/xml.rs create mode 100644 ng-oxigraph/src/spargebra/README.md create mode 100644 ng-oxigraph/src/spargebra/algebra.rs create mode 100644 ng-oxigraph/src/spargebra/mod.rs create mode 100644 ng-oxigraph/src/spargebra/parser.rs create mode 100644 ng-oxigraph/src/spargebra/query.rs create mode 100644 ng-oxigraph/src/spargebra/term.rs create mode 100644 ng-oxigraph/src/spargebra/update.rs create mode 100644 ng-oxigraph/src/sparopt/README.md create mode 100644 ng-oxigraph/src/sparopt/algebra.rs create mode 100644 ng-oxigraph/src/sparopt/mod.rs create mode 100644 ng-oxigraph/src/sparopt/optimizer.rs create mode 100644 ng-oxigraph/src/sparopt/type_inference.rs diff --git a/Cargo.lock b/Cargo.lock index cf7e678..2973f0e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2911,21 +2911,6 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fc7aa29613bd6a620df431842069224d8bc9011086b1db4c0e0cd47fa03ec9a" -[[package]] -name = "librocksdb-sys" -version = "0.11.0+8.3.2" -source = "git+https://git.nextgraph.org/NextGraph/rust-rocksdb.git?branch=master#13b3c2022202abff8cfe921ee926d6ca567e66e8" -dependencies = [ - "bindgen", - "bzip2-sys", - "cc", - "glob", - "libc", - "libz-sys", - "openssl", - "pkg-config", -] - [[package]] name = "libz-sys" version = "1.1.12" @@ -3376,7 +3361,7 @@ dependencies = [ [[package]] name = "ng-oxigraph" -version = "0.4.0-alpha.8-ng" +version = "0.4.0-alpha.9-ng" dependencies = [ "codspeed-criterion-compat", "digest 0.10.7", @@ -3386,20 +3371,18 @@ dependencies = [ "json-event-parser", "libc", "md-5", + "memchr", + "ng-rocksdb", "oxilangtag", "oxiri", - "oxrdf", - "oxrdfio", - "oxsdatatypes", + "peg", + "quick-xml 0.31.0", "rand 0.8.5", "regex", - "rocksdb", + "serde", "sha1", "sha2 0.10.7", "siphasher 0.3.10", - "sparesults", - "spargebra", - "sparopt", "thiserror", "zstd", ] @@ -3437,6 +3420,21 @@ dependencies = [ "zeroize", ] +[[package]] +name = "ng-rocksdb" +version = "0.21.0" +source = "git+https://git.nextgraph.org/NextGraph/rust-rocksdb.git?branch=master#95ec9536b1a4088cfa75aae2851df468e64aa451" +dependencies = [ + "bindgen", + "bzip2-sys", + "cc", + "glob", + "libc", + "libz-sys", + "openssl", + "pkg-config", +] + [[package]] name = "ng-sdk-js" version = "0.1.0" @@ -3468,7 +3466,7 @@ name = "ng-storage-rocksdb" version = "0.1.0" dependencies = [ "ng-repo", - "rocksdb", + "ng-rocksdb", "serde_bare", ] @@ -3896,51 +3894,15 @@ dependencies = [ "thiserror", ] -[[package]] -name = "oxrdfio" -version = "0.1.0-alpha.5" -source = "git+https://git.nextgraph.org/NextGraph/oxigraph.git?branch=main#c7f873f904617c201e359196717eb2133d91cef5" -dependencies = [ - "oxrdf", - "oxrdfxml", - "oxttl", - "thiserror", -] - -[[package]] -name = "oxrdfxml" -version = "0.1.0-alpha.5" -source = "git+https://git.nextgraph.org/NextGraph/oxigraph.git?branch=main#c7f873f904617c201e359196717eb2133d91cef5" -dependencies = [ - "oxilangtag", - "oxiri", - "oxrdf", - "quick-xml 0.31.0", - "thiserror", -] - [[package]] name = "oxsdatatypes" version = "0.2.0-alpha.1" source = "git+https://git.nextgraph.org/NextGraph/oxigraph.git?branch=main#c7f873f904617c201e359196717eb2133d91cef5" dependencies = [ - "js-sys", "serde", "thiserror", ] -[[package]] -name = "oxttl" -version = "0.1.0-alpha.5" -source = "git+https://git.nextgraph.org/NextGraph/oxigraph.git?branch=main#c7f873f904617c201e359196717eb2133d91cef5" -dependencies = [ - "memchr", - "oxilangtag", - "oxiri", - "oxrdf", - "thiserror", -] - [[package]] name = "packed_simd_2" version = "0.3.8" @@ -4679,15 +4641,6 @@ dependencies = [ "winreg 0.10.1", ] -[[package]] -name = "rocksdb" -version = "0.21.0" -source = "git+https://git.nextgraph.org/NextGraph/rust-rocksdb.git?branch=master#13b3c2022202abff8cfe921ee926d6ca567e66e8" -dependencies = [ - "libc", - "librocksdb-sys", -] - [[package]] name = "rust-embed" version = "6.7.0" @@ -5239,41 +5192,6 @@ dependencies = [ "system-deps", ] -[[package]] -name = "sparesults" -version = "0.2.0-alpha.4" -source = "git+https://git.nextgraph.org/NextGraph/oxigraph.git?branch=main#c7f873f904617c201e359196717eb2133d91cef5" -dependencies = [ - "json-event-parser", - "memchr", - "oxrdf", - "quick-xml 0.31.0", - "thiserror", -] - -[[package]] -name = "spargebra" -version = "0.3.0-alpha.4" -source = "git+https://git.nextgraph.org/NextGraph/oxigraph.git?branch=main#c7f873f904617c201e359196717eb2133d91cef5" -dependencies = [ - "oxilangtag", - "oxiri", - "oxrdf", - "peg", - "rand 0.8.5", - "thiserror", -] - -[[package]] -name = "sparopt" -version = "0.1.0-alpha.5-dev" -source = "git+https://git.nextgraph.org/NextGraph/oxigraph.git?branch=main#c7f873f904617c201e359196717eb2133d91cef5" -dependencies = [ - "oxrdf", - "rand 0.8.5", - "spargebra", -] - [[package]] name = "spin" version = "0.9.8" diff --git a/ng-oxigraph/Cargo.toml b/ng-oxigraph/Cargo.toml index 1158b43..96cce65 100644 --- a/ng-oxigraph/Cargo.toml +++ b/ng-oxigraph/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "ng-oxigraph" -version = "0.4.0-alpha.8-ng" +version = "0.4.0-alpha.9-ng" authors = ["Tpt <thomas@pellissier-tanon.fr>", "Niko PLP <niko@nextgraph.org>"] license = "MIT OR Apache-2.0" readme = "README.md" @@ -16,8 +16,15 @@ edition = "2021" rust-version = "1.70" [features] -js = ["getrandom/js", "oxsdatatypes/js", "js-sys"] - +default = ["rdf-star","sep-0002","sep-0006", "oxsdatatypes"] +js = ["getrandom/js", "js-sys"] +rdf-star = [] +custom-now = [] +xml = [] +ttl = [] +sep-0002 = [] +sep-0006 = [] +oxsdatatypes = [] [dependencies] digest = "0.10" @@ -26,22 +33,20 @@ json-event-parser = "0.2.0-alpha.2" md-5 = "0.10" oxilangtag = "0.1" oxiri = "0.2.3" -oxrdf = { version = "0.2.0-alpha.4", git = "https://git.nextgraph.org/NextGraph/oxigraph.git", branch="main", features = ["rdf-star", "oxsdatatypes"] } -oxrdfio = { version = "0.1.0-alpha.5", git = "https://git.nextgraph.org/NextGraph/oxigraph.git", branch="main", features = ["rdf-star"] } -oxsdatatypes = { version = "0.2.0-alpha.1", git = "https://git.nextgraph.org/NextGraph/oxigraph.git", branch="main" } rand = "0.8" regex = "1.7" +serde = { version = "1.0.142", features = ["derive"] } sha1 = "0.10" sha2 = "0.10" siphasher = ">=0.3, <2.0" -sparesults = { version = "0.2.0-alpha.4", git = "https://git.nextgraph.org/NextGraph/oxigraph.git", branch="main", features = ["rdf-star"] } -spargebra = { version = "0.3.0-alpha.4", git = "https://git.nextgraph.org/NextGraph/oxigraph.git", branch="main", features = ["rdf-star", "sep-0002", "sep-0006"] } -sparopt = { version = "0.1.0-alpha.4", git = "https://git.nextgraph.org/NextGraph/oxigraph.git", branch="main", features = ["rdf-star", "sep-0002", "sep-0006"] } thiserror = "1.0.50" +quick-xml = ">=0.29, <0.32" +memchr = "2.5" +peg = "0.8" [target.'cfg(not(target_family = "wasm"))'.dependencies] libc = "0.2" -rocksdb = { version = "0.21.0", git = "https://git.nextgraph.org/NextGraph/rust-rocksdb.git", branch = "master", features = [ ] } +ng-rocksdb = { version = "0.21.0", git = "https://git.nextgraph.org/NextGraph/rust-rocksdb.git", branch = "master", features = [ ] } [target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies] getrandom = "0.2.8" diff --git a/ng-oxigraph/src/lib.rs b/ng-oxigraph/src/lib.rs index b36c4d6..df9b74b 100644 --- a/ng-oxigraph/src/lib.rs +++ b/ng-oxigraph/src/lib.rs @@ -5,8 +5,20 @@ #![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] #![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] -pub mod io; -pub mod model; -pub mod sparql; -mod storage; -pub mod store; +pub mod oxigraph; + +pub mod oxrdf; + +pub mod oxrdfio; + +pub mod oxsdatatypes; + +pub mod oxttl; + +pub mod oxrdfxml; + +pub mod sparesults; + +pub mod spargebra; + +pub mod sparopt; diff --git a/ng-oxigraph/src/io/format.rs b/ng-oxigraph/src/oxigraph/io/format.rs similarity index 99% rename from ng-oxigraph/src/io/format.rs rename to ng-oxigraph/src/oxigraph/io/format.rs index 08b61d8..8268247 100644 --- a/ng-oxigraph/src/io/format.rs +++ b/ng-oxigraph/src/oxigraph/io/format.rs @@ -1,6 +1,6 @@ #![allow(deprecated)] -use oxrdfio::{RdfFormat, RdfParser, RdfSerializer}; +use crate::oxrdfio::{RdfFormat, RdfParser, RdfSerializer}; /// [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) serialization formats. /// diff --git a/ng-oxigraph/src/io/mod.rs b/ng-oxigraph/src/oxigraph/io/mod.rs similarity index 98% rename from ng-oxigraph/src/io/mod.rs rename to ng-oxigraph/src/oxigraph/io/mod.rs index 1b15bc8..6c76116 100644 --- a/ng-oxigraph/src/io/mod.rs +++ b/ng-oxigraph/src/oxigraph/io/mod.rs @@ -36,4 +36,4 @@ pub use self::format::{DatasetFormat, GraphFormat}; pub use self::read::{DatasetParser, GraphParser}; #[allow(deprecated)] pub use self::write::{DatasetSerializer, GraphSerializer}; -pub use oxrdfio::*; +pub use crate::oxrdfio::*; diff --git a/ng-oxigraph/src/io/read.rs b/ng-oxigraph/src/oxigraph/io/read.rs similarity index 97% rename from ng-oxigraph/src/io/read.rs rename to ng-oxigraph/src/oxigraph/io/read.rs index 6d01f6f..ca2c62f 100644 --- a/ng-oxigraph/src/io/read.rs +++ b/ng-oxigraph/src/oxigraph/io/read.rs @@ -2,9 +2,9 @@ //! Utilities to read RDF graphs and datasets. -use crate::io::{DatasetFormat, GraphFormat}; -use crate::model::*; -use oxrdfio::{FromReadQuadReader, RdfParseError, RdfParser}; +use crate::oxigraph::io::{DatasetFormat, GraphFormat}; +use crate::oxigraph::model::*; +use crate::oxrdfio::{FromReadQuadReader, RdfParseError, RdfParser}; use std::io::Read; /// Parsers for RDF graph serialization formats. diff --git a/ng-oxigraph/src/io/write.rs b/ng-oxigraph/src/oxigraph/io/write.rs similarity index 97% rename from ng-oxigraph/src/io/write.rs rename to ng-oxigraph/src/oxigraph/io/write.rs index 7f27cd9..e487720 100644 --- a/ng-oxigraph/src/io/write.rs +++ b/ng-oxigraph/src/oxigraph/io/write.rs @@ -2,9 +2,9 @@ //! Utilities to write RDF graphs and datasets. -use crate::io::{DatasetFormat, GraphFormat}; -use crate::model::*; -use oxrdfio::{RdfSerializer, ToWriteQuadWriter}; +use crate::oxigraph::io::{DatasetFormat, GraphFormat}; +use crate::oxigraph::model::*; +use crate::oxrdfio::{RdfSerializer, ToWriteQuadWriter}; use std::io::{self, Write}; /// A serializer for RDF graph serialization formats. diff --git a/ng-oxigraph/src/oxigraph/mod.rs b/ng-oxigraph/src/oxigraph/mod.rs new file mode 100644 index 0000000..57a6bd0 --- /dev/null +++ b/ng-oxigraph/src/oxigraph/mod.rs @@ -0,0 +1,5 @@ +pub mod io; +pub mod model; +pub mod sparql; +mod storage; +pub mod store; diff --git a/ng-oxigraph/src/model.rs b/ng-oxigraph/src/oxigraph/model.rs similarity index 88% rename from ng-oxigraph/src/model.rs rename to ng-oxigraph/src/oxigraph/model.rs index dbca934..a173d8a 100644 --- a/ng-oxigraph/src/model.rs +++ b/ng-oxigraph/src/oxigraph/model.rs @@ -17,6 +17,6 @@ //! assert_eq!(vec![triple], results); //! ``` -pub use oxrdf::*; +pub use crate::oxrdf::*; -pub use spargebra::term::GroundQuad; +pub use crate::spargebra::term::GroundQuad; diff --git a/ng-oxigraph/src/sparql/algebra.rs b/ng-oxigraph/src/oxigraph/sparql/algebra.rs similarity index 98% rename from ng-oxigraph/src/sparql/algebra.rs rename to ng-oxigraph/src/oxigraph/sparql/algebra.rs index 8b3f385..52af785 100644 --- a/ng-oxigraph/src/sparql/algebra.rs +++ b/ng-oxigraph/src/oxigraph/sparql/algebra.rs @@ -2,10 +2,11 @@ //! //! The root type for SPARQL queries is [`Query`] and the root type for updates is [`Update`]. -use crate::model::*; -use crate::sparql::eval::Timer; -use oxsdatatypes::DayTimeDuration; -use spargebra::GraphUpdateOperation; +use crate::oxigraph::model::*; +use crate::oxigraph::sparql::eval::Timer; +use crate::oxsdatatypes::DayTimeDuration; +use crate::spargebra; +use crate::spargebra::GraphUpdateOperation; use std::fmt; use std::str::FromStr; diff --git a/ng-oxigraph/src/sparql/dataset.rs b/ng-oxigraph/src/oxigraph/sparql/dataset.rs similarity index 95% rename from ng-oxigraph/src/sparql/dataset.rs rename to ng-oxigraph/src/oxigraph/sparql/dataset.rs index 3253be1..ddd8816 100644 --- a/ng-oxigraph/src/sparql/dataset.rs +++ b/ng-oxigraph/src/oxigraph/sparql/dataset.rs @@ -1,8 +1,10 @@ -use crate::model::TermRef; -use crate::sparql::algebra::QueryDataset; -use crate::sparql::EvaluationError; -use crate::storage::numeric_encoder::{insert_term, EncodedQuad, EncodedTerm, StrHash, StrLookup}; -use crate::storage::{StorageError, StorageReader}; +use crate::oxigraph::model::TermRef; +use crate::oxigraph::sparql::algebra::QueryDataset; +use crate::oxigraph::sparql::EvaluationError; +use crate::oxigraph::storage::numeric_encoder::{ + insert_term, EncodedQuad, EncodedTerm, StrHash, StrLookup, +}; +use crate::oxigraph::storage::{StorageError, StorageReader}; use std::cell::RefCell; use std::collections::hash_map::Entry; use std::collections::HashMap; diff --git a/ng-oxigraph/src/sparql/error.rs b/ng-oxigraph/src/oxigraph/sparql/error.rs similarity index 92% rename from ng-oxigraph/src/sparql/error.rs rename to ng-oxigraph/src/oxigraph/sparql/error.rs index 38731de..39ba505 100644 --- a/ng-oxigraph/src/sparql/error.rs +++ b/ng-oxigraph/src/oxigraph/sparql/error.rs @@ -1,8 +1,8 @@ -use crate::io::RdfParseError; -use crate::model::NamedNode; -use crate::sparql::results::QueryResultsParseError as ResultsParseError; -use crate::sparql::SparqlSyntaxError; -use crate::storage::StorageError; +use crate::oxigraph::io::RdfParseError; +use crate::oxigraph::model::NamedNode; +use crate::oxigraph::sparql::results::QueryResultsParseError as ResultsParseError; +use crate::oxigraph::sparql::SparqlSyntaxError; +use crate::oxigraph::storage::StorageError; use std::convert::Infallible; use std::error::Error; use std::io; diff --git a/ng-oxigraph/src/sparql/eval.rs b/ng-oxigraph/src/oxigraph/sparql/eval.rs similarity index 99% rename from ng-oxigraph/src/sparql/eval.rs rename to ng-oxigraph/src/oxigraph/sparql/eval.rs index 5065f52..d6bd322 100644 --- a/ng-oxigraph/src/sparql/eval.rs +++ b/ng-oxigraph/src/oxigraph/sparql/eval.rs @@ -1,33 +1,34 @@ -use crate::model::vocab::{rdf, xsd}; -use crate::model::{BlankNode, LiteralRef, NamedNodeRef, Term, Triple}; -use crate::sparql::algebra::{Query, QueryDataset}; -use crate::sparql::dataset::DatasetView; -use crate::sparql::error::EvaluationError; -use crate::sparql::model::*; -use crate::sparql::service::ServiceHandler; -use crate::sparql::CustomFunctionRegistry; -use crate::storage::numeric_encoder::*; -use crate::storage::small_string::SmallString; +use crate::oxigraph::model::vocab::{rdf, xsd}; +use crate::oxigraph::model::{BlankNode, LiteralRef, NamedNodeRef, Term, Triple}; +use crate::oxigraph::sparql::algebra::{Query, QueryDataset}; +use crate::oxigraph::sparql::dataset::DatasetView; +use crate::oxigraph::sparql::error::EvaluationError; +use crate::oxigraph::sparql::model::*; +use crate::oxigraph::sparql::service::ServiceHandler; +use crate::oxigraph::sparql::CustomFunctionRegistry; +use crate::oxigraph::storage::numeric_encoder::*; +use crate::oxigraph::storage::small_string::SmallString; +use crate::oxrdf::{TermRef, Variable}; +use crate::oxsdatatypes::*; +use crate::spargebra; +use crate::spargebra::algebra::{AggregateFunction, Function, PropertyPathExpression}; +use crate::spargebra::term::{ + GroundSubject, GroundTerm, GroundTermPattern, GroundTriple, NamedNodePattern, TermPattern, + TriplePattern, +}; +use crate::sparopt::algebra::{ + AggregateExpression, Expression, GraphPattern, JoinAlgorithm, LeftJoinAlgorithm, + MinusAlgorithm, OrderExpression, +}; use digest::Digest; use json_event_parser::{JsonEvent, ToWriteJsonWriter}; use md5::Md5; use oxilangtag::LanguageTag; use oxiri::Iri; -use oxrdf::{TermRef, Variable}; -use oxsdatatypes::*; use rand::random; use regex::{Regex, RegexBuilder}; use sha1::Sha1; use sha2::{Sha256, Sha384, Sha512}; -use spargebra::algebra::{AggregateFunction, Function, PropertyPathExpression}; -use spargebra::term::{ - GroundSubject, GroundTerm, GroundTermPattern, GroundTriple, NamedNodePattern, TermPattern, - TriplePattern, -}; -use sparopt::algebra::{ - AggregateExpression, Expression, GraphPattern, JoinAlgorithm, LeftJoinAlgorithm, - MinusAlgorithm, OrderExpression, -}; use std::cell::Cell; use std::cmp::Ordering; use std::collections::hash_map::DefaultHasher; diff --git a/ng-oxigraph/src/sparql/http/dummy.rs b/ng-oxigraph/src/oxigraph/sparql/http/dummy.rs similarity index 100% rename from ng-oxigraph/src/sparql/http/dummy.rs rename to ng-oxigraph/src/oxigraph/sparql/http/dummy.rs diff --git a/ng-oxigraph/src/sparql/http/mod.rs b/ng-oxigraph/src/oxigraph/sparql/http/mod.rs similarity index 100% rename from ng-oxigraph/src/sparql/http/mod.rs rename to ng-oxigraph/src/oxigraph/sparql/http/mod.rs diff --git a/ng-oxigraph/src/sparql/http/simple.rs b/ng-oxigraph/src/oxigraph/sparql/http/simple.rs similarity index 100% rename from ng-oxigraph/src/sparql/http/simple.rs rename to ng-oxigraph/src/oxigraph/sparql/http/simple.rs diff --git a/ng-oxigraph/src/sparql/mod.rs b/ng-oxigraph/src/oxigraph/sparql/mod.rs similarity index 93% rename from ng-oxigraph/src/sparql/mod.rs rename to ng-oxigraph/src/oxigraph/sparql/mod.rs index 089f84b..2aeb2e2 100644 --- a/ng-oxigraph/src/sparql/mod.rs +++ b/ng-oxigraph/src/oxigraph/sparql/mod.rs @@ -12,22 +12,23 @@ pub mod results; mod service; mod update; -use crate::model::{NamedNode, Term}; -pub use crate::sparql::algebra::{Query, QueryDataset, Update}; -use crate::sparql::dataset::DatasetView; -pub use crate::sparql::error::EvaluationError; -use crate::sparql::eval::{EvalNodeWithStats, SimpleEvaluator, Timer}; -pub use crate::sparql::model::{QueryResults, QuerySolution, QuerySolutionIter, QueryTripleIter}; -pub use crate::sparql::service::ServiceHandler; -use crate::sparql::service::{EmptyServiceHandler, ErrorConversionServiceHandler}; -pub(crate) use crate::sparql::update::evaluate_update; -use crate::storage::StorageReader; +use super::model::{NamedNode, Term}; +pub use super::sparql::algebra::{Query, QueryDataset, Update}; +use super::sparql::dataset::DatasetView; +pub use super::sparql::error::EvaluationError; +use super::sparql::eval::{EvalNodeWithStats, SimpleEvaluator, Timer}; +pub use super::sparql::model::{QueryResults, QuerySolution, QuerySolutionIter, QueryTripleIter}; +pub use super::sparql::service::ServiceHandler; +use super::sparql::service::{EmptyServiceHandler, ErrorConversionServiceHandler}; +pub(super) use super::sparql::update::evaluate_update; +use super::storage::StorageReader; +pub use crate::oxrdf::{Variable, VariableNameParseError}; +use crate::oxsdatatypes::{DayTimeDuration, Float}; +use crate::spargebra; +pub use crate::spargebra::SparqlSyntaxError; +use crate::sparopt::algebra::GraphPattern; +use crate::sparopt::Optimizer; use json_event_parser::{JsonEvent, ToWriteJsonWriter}; -pub use oxrdf::{Variable, VariableNameParseError}; -use oxsdatatypes::{DayTimeDuration, Float}; -pub use spargebra::SparqlSyntaxError; -use sparopt::algebra::GraphPattern; -use sparopt::Optimizer; use std::collections::HashMap; use std::rc::Rc; use std::sync::Arc; diff --git a/ng-oxigraph/src/sparql/model.rs b/ng-oxigraph/src/oxigraph/sparql/model.rs similarity index 98% rename from ng-oxigraph/src/sparql/model.rs rename to ng-oxigraph/src/oxigraph/sparql/model.rs index 0fca83e..7352cf9 100644 --- a/ng-oxigraph/src/sparql/model.rs +++ b/ng-oxigraph/src/oxigraph/sparql/model.rs @@ -1,11 +1,11 @@ -use crate::io::{RdfFormat, RdfSerializer}; -use crate::model::*; -use crate::sparql::error::EvaluationError; -use crate::sparql::results::{ +use crate::oxigraph::io::{RdfFormat, RdfSerializer}; +use crate::oxigraph::model::*; +use crate::oxigraph::sparql::error::EvaluationError; +use crate::oxigraph::sparql::results::{ FromReadQueryResultsReader, FromReadSolutionsReader, QueryResultsFormat, QueryResultsParseError, QueryResultsParser, QueryResultsSerializer, }; -pub use sparesults::QuerySolution; +pub use crate::sparesults::QuerySolution; use std::io::{Read, Write}; use std::sync::Arc; @@ -276,6 +276,7 @@ impl Iterator for QueryTripleIter { } } +#[cfg(feature = "rdf-star")] #[cfg(test)] #[allow(clippy::panic_in_result_fn)] mod tests { diff --git a/ng-oxigraph/src/sparql/results.rs b/ng-oxigraph/src/oxigraph/sparql/results.rs similarity index 98% rename from ng-oxigraph/src/sparql/results.rs rename to ng-oxigraph/src/oxigraph/sparql/results.rs index 00f8cc3..6dea288 100644 --- a/ng-oxigraph/src/sparql/results.rs +++ b/ng-oxigraph/src/oxigraph/sparql/results.rs @@ -41,4 +41,4 @@ //! ); //! ``` -pub use sparesults::*; +pub use crate::sparesults::*; diff --git a/ng-oxigraph/src/sparql/service.rs b/ng-oxigraph/src/oxigraph/sparql/service.rs similarity index 92% rename from ng-oxigraph/src/sparql/service.rs rename to ng-oxigraph/src/oxigraph/sparql/service.rs index e3dd560..40e9aad 100644 --- a/ng-oxigraph/src/sparql/service.rs +++ b/ng-oxigraph/src/oxigraph/sparql/service.rs @@ -1,9 +1,9 @@ -use crate::model::NamedNode; -use crate::sparql::algebra::Query; -use crate::sparql::error::EvaluationError; -use crate::sparql::http::Client; -use crate::sparql::model::QueryResults; -use crate::sparql::results::QueryResultsFormat; +use crate::oxigraph::model::NamedNode; +use crate::oxigraph::sparql::algebra::Query; +use crate::oxigraph::sparql::error::EvaluationError; +use crate::oxigraph::sparql::http::Client; +use crate::oxigraph::sparql::model::QueryResults; +use crate::oxigraph::sparql::results::QueryResultsFormat; use std::error::Error; use std::time::Duration; diff --git a/ng-oxigraph/src/sparql/update.rs b/ng-oxigraph/src/oxigraph/sparql/update.rs similarity index 96% rename from ng-oxigraph/src/sparql/update.rs rename to ng-oxigraph/src/oxigraph/sparql/update.rs index 967de82..3ee9c8a 100644 --- a/ng-oxigraph/src/sparql/update.rs +++ b/ng-oxigraph/src/oxigraph/sparql/update.rs @@ -1,21 +1,22 @@ -use crate::io::{RdfFormat, RdfParser}; -use crate::model::{GraphName as OxGraphName, GraphNameRef, Quad as OxQuad}; -use crate::sparql::algebra::QueryDataset; -use crate::sparql::dataset::DatasetView; -use crate::sparql::eval::{EncodedTuple, SimpleEvaluator}; -use crate::sparql::http::Client; -use crate::sparql::{EvaluationError, Update, UpdateOptions}; -use crate::storage::numeric_encoder::{Decoder, EncodedTerm}; -use crate::storage::StorageWriter; -use oxiri::Iri; -use spargebra::algebra::{GraphPattern, GraphTarget}; -use spargebra::term::{ +use crate::oxigraph::io::{RdfFormat, RdfParser}; +use crate::oxigraph::model::{GraphName as OxGraphName, GraphNameRef, Quad as OxQuad}; +use crate::oxigraph::sparql::algebra::QueryDataset; +use crate::oxigraph::sparql::dataset::DatasetView; +use crate::oxigraph::sparql::eval::{EncodedTuple, SimpleEvaluator}; +use crate::oxigraph::sparql::http::Client; +use crate::oxigraph::sparql::{EvaluationError, Update, UpdateOptions}; +use crate::oxigraph::storage::numeric_encoder::{Decoder, EncodedTerm}; +use crate::oxigraph::storage::StorageWriter; +use crate::spargebra::algebra::{GraphPattern, GraphTarget}; +use crate::spargebra::term::{ BlankNode, GraphName, GraphNamePattern, GroundQuad, GroundQuadPattern, GroundSubject, GroundTerm, GroundTermPattern, GroundTriple, GroundTriplePattern, NamedNode, NamedNodePattern, Quad, QuadPattern, Subject, Term, TermPattern, Triple, TriplePattern, Variable, }; -use spargebra::GraphUpdateOperation; -use sparopt::Optimizer; +use crate::spargebra::GraphUpdateOperation; +use crate::sparopt; +use crate::sparopt::Optimizer; +use oxiri::Iri; use std::collections::HashMap; use std::io; use std::rc::Rc; diff --git a/ng-oxigraph/src/storage/backend/fallback.rs b/ng-oxigraph/src/oxigraph/storage/backend/fallback.rs similarity index 99% rename from ng-oxigraph/src/storage/backend/fallback.rs rename to ng-oxigraph/src/oxigraph/storage/backend/fallback.rs index 7214851..e52ff0a 100644 --- a/ng-oxigraph/src/storage/backend/fallback.rs +++ b/ng-oxigraph/src/oxigraph/storage/backend/fallback.rs @@ -1,7 +1,7 @@ //! TODO: This storage is dramatically naive. -use crate::storage::StorageError; -use crate::store::CorruptionError; +use crate::oxigraph::storage::StorageError; +use crate::oxigraph::store::CorruptionError; use std::cell::RefCell; use std::collections::{BTreeMap, HashMap}; use std::error::Error; diff --git a/ng-oxigraph/src/storage/backend/mod.rs b/ng-oxigraph/src/oxigraph/storage/backend/mod.rs similarity index 100% rename from ng-oxigraph/src/storage/backend/mod.rs rename to ng-oxigraph/src/oxigraph/storage/backend/mod.rs diff --git a/ng-oxigraph/src/storage/backend/oxi_rocksdb.rs b/ng-oxigraph/src/oxigraph/storage/backend/oxi_rocksdb.rs similarity index 99% rename from ng-oxigraph/src/storage/backend/oxi_rocksdb.rs rename to ng-oxigraph/src/oxigraph/storage/backend/oxi_rocksdb.rs index 37e18ee..9dbe7e3 100644 --- a/ng-oxigraph/src/storage/backend/oxi_rocksdb.rs +++ b/ng-oxigraph/src/oxigraph/storage/backend/oxi_rocksdb.rs @@ -8,10 +8,10 @@ clippy::unwrap_in_result )] -use crate::storage::error::{CorruptionError, StorageError}; +use crate::oxigraph::storage::error::{CorruptionError, StorageError}; use libc::{c_char, c_void}; +use ng_rocksdb::ffi::*; use rand::random; -use rocksdb::ffi::*; use std::borrow::Borrow; #[cfg(unix)] use std::cmp::min; diff --git a/ng-oxigraph/src/storage/binary_encoder.rs b/ng-oxigraph/src/oxigraph/storage/binary_encoder.rs similarity index 98% rename from ng-oxigraph/src/storage/binary_encoder.rs rename to ng-oxigraph/src/oxigraph/storage/binary_encoder.rs index 1e789b7..d1cf1ac 100644 --- a/ng-oxigraph/src/storage/binary_encoder.rs +++ b/ng-oxigraph/src/oxigraph/storage/binary_encoder.rs @@ -1,7 +1,7 @@ -use crate::storage::error::{CorruptionError, StorageError}; -use crate::storage::numeric_encoder::{EncodedQuad, EncodedTerm, EncodedTriple, StrHash}; -use crate::storage::small_string::SmallString; -use oxsdatatypes::*; +use crate::oxigraph::storage::error::{CorruptionError, StorageError}; +use crate::oxigraph::storage::numeric_encoder::{EncodedQuad, EncodedTerm, EncodedTriple, StrHash}; +use crate::oxigraph::storage::small_string::SmallString; +use crate::oxsdatatypes::*; use std::io::Read; use std::mem::size_of; @@ -635,8 +635,8 @@ pub fn write_term(sink: &mut Vec<u8>, term: &EncodedTerm) { #[allow(clippy::panic_in_result_fn)] mod tests { use super::*; - use crate::model::TermRef; - use crate::storage::numeric_encoder::*; + use crate::oxigraph::model::TermRef; + use crate::oxigraph::storage::numeric_encoder::*; use std::cell::RefCell; use std::collections::HashMap; @@ -670,8 +670,8 @@ mod tests { #[test] fn test_encoding() { - use crate::model::vocab::xsd; - use crate::model::*; + use crate::oxigraph::model::vocab::xsd; + use crate::oxigraph::model::*; let store = MemoryStrStore::default(); let terms: Vec<Term> = vec. +/// An error raised while loading a file into a [`Store`](crate::oxigraph::store::Store). #[derive(Debug, thiserror::Error)] pub enum LoaderError { /// An error raised while reading the file. @@ -111,7 +111,7 @@ impl From<LoaderError> for io::Error { } } -/// An error raised while writing a file from a [`Store`](crate::store::Store). +/// An error raised while writing a file from a [`Store`](crate::oxigraph::store::Store). #[derive(Debug, thiserror::Error)] pub enum SerializerError { /// An error raised while writing the content. diff --git a/ng-oxigraph/src/storage/mod.rs b/ng-oxigraph/src/oxigraph/storage/mod.rs similarity index 98% rename from ng-oxigraph/src/storage/mod.rs rename to ng-oxigraph/src/oxigraph/storage/mod.rs index 8dc332e..a20740e 100644 --- a/ng-oxigraph/src/storage/mod.rs +++ b/ng-oxigraph/src/oxigraph/storage/mod.rs @@ -1,20 +1,24 @@ #![allow(clippy::same_name_method)] #[cfg(all(not(target_family = "wasm")))] -use crate::model::Quad; -use crate::model::{GraphNameRef, NamedOrBlankNodeRef, QuadRef, TermRef}; -use crate::storage::backend::{Reader, Transaction}; +use crate::oxigraph::model::Quad; +use crate::oxigraph::model::{GraphNameRef, NamedOrBlankNodeRef, QuadRef, TermRef}; +use crate::oxigraph::storage::backend::{Reader, Transaction}; #[cfg(all(not(target_family = "wasm")))] -use crate::storage::binary_encoder::LATEST_STORAGE_VERSION; -use crate::storage::binary_encoder::{ +use crate::oxigraph::storage::binary_encoder::LATEST_STORAGE_VERSION; +use crate::oxigraph::storage::binary_encoder::{ decode_term, encode_term, encode_term_pair, encode_term_quad, encode_term_triple, write_gosp_quad, write_gpos_quad, write_gspo_quad, write_osp_quad, write_ospg_quad, write_pos_quad, write_posg_quad, write_spo_quad, write_spog_quad, write_term, QuadEncoding, WRITTEN_TERM_MAX_SIZE, }; -pub use crate::storage::error::{CorruptionError, LoaderError, SerializerError, StorageError}; +pub use crate::oxigraph::storage::error::{ + CorruptionError, LoaderError, SerializerError, StorageError, +}; #[cfg(all(not(target_family = "wasm")))] -use crate::storage::numeric_encoder::Decoder; -use crate::storage::numeric_encoder::{insert_term, EncodedQuad, EncodedTerm, StrHash, StrLookup}; +use crate::oxigraph::storage::numeric_encoder::Decoder; +use crate::oxigraph::storage::numeric_encoder::{ + insert_term, EncodedQuad, EncodedTerm, StrHash, StrLookup, +}; use backend::{ColumnFamily, ColumnFamilyDefinition, Db, Iter}; #[cfg(all(not(target_family = "wasm")))] use std::collections::VecDeque; diff --git a/ng-oxigraph/src/storage/numeric_encoder.rs b/ng-oxigraph/src/oxigraph/storage/numeric_encoder.rs similarity index 99% rename from ng-oxigraph/src/storage/numeric_encoder.rs rename to ng-oxigraph/src/oxigraph/storage/numeric_encoder.rs index bf4b070..81fa52c 100644 --- a/ng-oxigraph/src/storage/numeric_encoder.rs +++ b/ng-oxigraph/src/oxigraph/storage/numeric_encoder.rs @@ -1,9 +1,9 @@ #![allow(clippy::unreadable_literal)] -use crate::model::*; -use crate::storage::error::{CorruptionError, StorageError}; -use crate::storage::small_string::SmallString; -use oxsdatatypes::*; +use crate::oxigraph::model::*; +use crate::oxigraph::storage::error::{CorruptionError, StorageError}; +use crate::oxigraph::storage::small_string::SmallString; +use crate::oxsdatatypes::*; use siphasher::sip128::{Hasher128, SipHasher24}; use std::fmt::Debug; use std::hash::{Hash, Hasher}; diff --git a/ng-oxigraph/src/storage/small_string.rs b/ng-oxigraph/src/oxigraph/storage/small_string.rs similarity index 100% rename from ng-oxigraph/src/storage/small_string.rs rename to ng-oxigraph/src/oxigraph/storage/small_string.rs diff --git a/ng-oxigraph/src/store.rs b/ng-oxigraph/src/oxigraph/store.rs similarity index 99% rename from ng-oxigraph/src/store.rs rename to ng-oxigraph/src/oxigraph/store.rs index dc2447b..d5f105c 100644 --- a/ng-oxigraph/src/store.rs +++ b/ng-oxigraph/src/oxigraph/store.rs @@ -26,20 +26,20 @@ //! # Result::<_, Box<dyn std::error::Error>>::Ok(()) //! ``` #[cfg(all(not(target_family = "wasm")))] -use crate::io::RdfParseError; -use crate::io::{RdfFormat, RdfParser, RdfSerializer}; -use crate::model::*; -use crate::sparql::{ +use super::io::RdfParseError; +use super::io::{RdfFormat, RdfParser, RdfSerializer}; +use super::model::*; +use super::sparql::{ evaluate_query, evaluate_update, EvaluationError, Query, QueryExplanation, QueryOptions, QueryResults, Update, UpdateOptions, }; -use crate::storage::numeric_encoder::{Decoder, EncodedQuad, EncodedTerm}; +use super::storage::numeric_encoder::{Decoder, EncodedQuad, EncodedTerm}; #[cfg(all(not(target_family = "wasm")))] -use crate::storage::StorageBulkLoader; -use crate::storage::{ +use super::storage::StorageBulkLoader; +use super::storage::{ ChainedDecodingQuadIterator, DecodingGraphIterator, Storage, StorageReader, StorageWriter, }; -pub use crate::storage::{CorruptionError, LoaderError, SerializerError, StorageError}; +pub use super::storage::{CorruptionError, LoaderError, SerializerError, StorageError}; use std::error::Error; use std::io::{Read, Write}; #[cfg(all(not(target_family = "wasm")))] @@ -1930,7 +1930,7 @@ mod tests { #[test] fn store() -> Result<(), StorageError> { - use crate::model::*; + use super::super::model::*; let main_s = Subject::from(BlankNode::default()); let main_p = NamedNode::new("http://example.com").unwrap(); diff --git a/ng-oxigraph/src/oxrdf/README.md b/ng-oxigraph/src/oxrdf/README.md new file mode 100644 index 0000000..88ffa62 --- /dev/null +++ b/ng-oxigraph/src/oxrdf/README.md @@ -0,0 +1,51 @@ +OxRDF +===== + +[](https://crates.io/crates/oxrdf) +[](https://docs.rs/oxrdf) +[](https://crates.io/crates/oxrdf) +[](https://github.com/oxigraph/oxigraph/actions) +[](https://gitter.im/oxigraph/community) + +OxRDF is a simple library providing datastructures encoding [RDF 1.1 concepts](https://www.w3.org/TR/rdf11-concepts/). + +This crate is intended to be a basic building block of other crates like [Oxigraph](https://crates.io/crates/oxigraph) or [Spargebra](https://crates.io/crates/spargebra). + +Support for [RDF-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html) is available behind the `rdf-star` feature. + +OxRDF is inspired by [RDF/JS](https://rdf.js.org/data-model-spec/) and [Apache Commons RDF](http://commons.apache.org/proper/commons-rdf/). + +Use [`oxrdfio`](https://crates.io/crates/oxrdfio) if you need to read or write RDF files. + +Usage example: + +```rust +use oxrdf::*; + +let mut graph = Graph::default(); + +// insertion +let ex = NamedNodeRef::new("http://example.com").unwrap(); +let triple = TripleRef::new(ex, ex, ex); +graph.insert(triple); + +// simple filter +let results: Vec<_> = graph.triples_for_subject(ex).collect(); +assert_eq!(vec![triple], results); +``` + +## License + +This project is licensed under either of + +* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or + `<http://www.apache.org/licenses/LICENSE-2.0>`) +* MIT license ([LICENSE-MIT](../LICENSE-MIT) or + `<http://opensource.org/licenses/MIT>`) + +at your option. + + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. diff --git a/ng-oxigraph/src/oxrdf/blank_node.rs b/ng-oxigraph/src/oxrdf/blank_node.rs new file mode 100644 index 0000000..2fe02c2 --- /dev/null +++ b/ng-oxigraph/src/oxrdf/blank_node.rs @@ -0,0 +1,403 @@ +use rand::random; +use serde::{Deserialize, Serialize}; +use std::io::Write; +use std::{fmt, str}; + +/// An owned RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node). +/// +/// The common way to create a new blank node is to use the [`BlankNode::default()`] function. +/// +/// It is also possible to create a blank node from a blank node identifier using the [`BlankNode::new()`] function. +/// The blank node identifier must be valid according to N-Triples, Turtle, and SPARQL grammars. +/// +/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation: +/// ``` +/// use oxrdf::BlankNode; +/// +/// assert_eq!("_:a122", BlankNode::new("a122")?.to_string()); +/// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(()) +/// ``` +#[derive(Eq, PartialEq, Debug, Clone, Hash, Serialize, Deserialize)] +pub struct BlankNode(BlankNodeContent); + +#[derive(PartialEq, Eq, Debug, Clone, Hash, Serialize, Deserialize)] +enum BlankNodeContent { + Named(String), + Anonymous { id: u128, str: IdStr }, +} + +impl BlankNode { + /// Creates a blank node from a unique identifier. + /// + /// The blank node identifier must be valid according to N-Triples, Turtle, and SPARQL grammars. + /// + /// In most cases, it is much more convenient to create a blank node using [`BlankNode::default()`] + /// that creates a random ID that could be easily inlined by Oxigraph stores. + pub fn new(id: impl Into<String>) -> Result<Self, BlankNodeIdParseError> { + let id = id.into(); + validate_blank_node_identifier(&id)?; + Ok(Self::new_unchecked(id)) + } + + /// Creates a blank node from a unique identifier without validation. + /// + /// It is the caller's responsibility to ensure that `id` is a valid blank node identifier + /// according to N-Triples, Turtle, and SPARQL grammars. + /// + /// [`BlankNode::new()`] is a safe version of this constructor and should be used for untrusted data. + #[inline] + pub fn new_unchecked(id: impl Into<String>) -> Self { + let id = id.into(); + if let Some(numerical_id) = to_integer_id(&id) { + Self::new_from_unique_id(numerical_id) + } else { + Self(BlankNodeContent::Named(id)) + } + } + + /// Creates a blank node from a unique numerical id. + /// + /// In most cases, it is much more convenient to create a blank node using [`BlankNode::default()`]. + #[inline] + pub fn new_from_unique_id(id: u128) -> Self { + Self(BlankNodeContent::Anonymous { + id, + str: IdStr::new(id), + }) + } + + /// Returns the underlying ID of this blank node. + #[inline] + pub fn as_str(&self) -> &str { + match &self.0 { + BlankNodeContent::Named(id) => id, + BlankNodeContent::Anonymous { str, .. } => str.as_str(), + } + } + + /// Returns the underlying ID of this blank node. + #[inline] + pub fn into_string(self) -> String { + match self.0 { + BlankNodeContent::Named(id) => id, + BlankNodeContent::Anonymous { str, .. } => str.as_str().to_owned(), + } + } + + #[inline] + pub fn as_ref(&self) -> BlankNodeRef<'_> { + BlankNodeRef(match &self.0 { + BlankNodeContent::Named(id) => BlankNodeRefContent::Named(id.as_str()), + BlankNodeContent::Anonymous { id, str } => BlankNodeRefContent::Anonymous { + id: *id, + str: str.as_str(), + }, + }) + } +} + +impl fmt::Display for BlankNode { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.as_ref().fmt(f) + } +} + +impl Default for BlankNode { + /// Builds a new RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a unique id. + #[inline] + fn default() -> Self { + // We ensure the ID does not start with a number to be also valid with RDF/XML + loop { + let id = random(); + let str = IdStr::new(id); + if matches!(str.as_str().as_bytes().first(), Some(b'a'..=b'f')) { + return Self(BlankNodeContent::Anonymous { id, str }); + } + } + } +} + +/// A borrowed RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node). +/// +/// The common way to create a new blank node is to use the [`BlankNode::default`] trait method. +/// +/// It is also possible to create a blank node from a blank node identifier using the [`BlankNodeRef::new()`] function. +/// The blank node identifier must be valid according to N-Triples, Turtle, and SPARQL grammars. +/// +/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation: +/// ``` +/// use oxrdf::BlankNodeRef; +/// +/// assert_eq!("_:a122", BlankNodeRef::new("a122")?.to_string()); +/// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(()) +/// ``` +#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] +pub struct BlankNodeRef<'a>(BlankNodeRefContent<'a>); + +#[derive(PartialEq, Eq, Debug, Clone, Copy, Hash)] +enum BlankNodeRefContent<'a> { + Named(&'a str), + Anonymous { id: u128, str: &'a str }, +} + +impl<'a> BlankNodeRef<'a> { + /// Creates a blank node from a unique identifier. + /// + /// The blank node identifier must be valid according to N-Triples, Turtle, and SPARQL grammars. + /// + /// In most cases, it is much more convenient to create a blank node using [`BlankNode::default()`]. + /// that creates a random ID that could be easily inlined by Oxigraph stores. + pub fn new(id: &'a str) -> Result<Self, BlankNodeIdParseError> { + validate_blank_node_identifier(id)?; + Ok(Self::new_unchecked(id)) + } + + /// Creates a blank node from a unique identifier without validation. + /// + /// It is the caller's responsibility to ensure that `id` is a valid blank node identifier + /// according to N-Triples, Turtle, and SPARQL grammars. + /// + /// [`BlankNodeRef::new()`) is a safe version of this constructor and should be used for untrusted data. + #[inline] + pub fn new_unchecked(id: &'a str) -> Self { + if let Some(numerical_id) = to_integer_id(id) { + Self(BlankNodeRefContent::Anonymous { + id: numerical_id, + str: id, + }) + } else { + Self(BlankNodeRefContent::Named(id)) + } + } + + /// Returns the underlying ID of this blank node. + #[inline] + pub const fn as_str(self) -> &'a str { + match self.0 { + BlankNodeRefContent::Named(id) => id, + BlankNodeRefContent::Anonymous { str, .. } => str, + } + } + + /// Returns the internal numerical ID of this blank node if it has been created using [`BlankNode::new_from_unique_id`]. + /// + /// ``` + /// use oxrdf::BlankNode; + /// + /// assert_eq!( + /// BlankNode::new_from_unique_id(128).as_ref().unique_id(), + /// Some(128) + /// ); + /// assert_eq!(BlankNode::new("foo")?.as_ref().unique_id(), None); + /// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(()) + /// ``` + #[inline] + pub const fn unique_id(&self) -> Option<u128> { + match self.0 { + BlankNodeRefContent::Named(_) => None, + BlankNodeRefContent::Anonymous { id, .. } => Some(id), + } + } + + #[inline] + pub fn into_owned(self) -> BlankNode { + BlankNode(match self.0 { + BlankNodeRefContent::Named(id) => BlankNodeContent::Named(id.to_owned()), + BlankNodeRefContent::Anonymous { id, .. } => BlankNodeContent::Anonymous { + id, + str: IdStr::new(id), + }, + }) + } +} + +impl fmt::Display for BlankNodeRef<'_> { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "_:{}", self.as_str()) + } +} + +impl<'a> From<&'a BlankNode> for BlankNodeRef<'a> { + #[inline] + fn from(node: &'a BlankNode) -> Self { + node.as_ref() + } +} + +impl<'a> From<BlankNodeRef<'a>> for BlankNode { + #[inline] + fn from(node: BlankNodeRef<'a>) -> Self { + node.into_owned() + } +} + +impl PartialEq<BlankNode> for BlankNodeRef<'_> { + #[inline] + fn eq(&self, other: &BlankNode) -> bool { + *self == other.as_ref() + } +} + +impl PartialEq<BlankNodeRef<'_>> for BlankNode { + #[inline] + fn eq(&self, other: &BlankNodeRef<'_>) -> bool { + self.as_ref() == *other + } +} + +#[derive(PartialEq, Eq, Debug, Clone, Hash, Serialize, Deserialize)] +struct IdStr([u8; 32]); + +impl IdStr { + #[inline] + fn new(id: u128) -> Self { + let mut str = [0; 32]; + write!(&mut str[..], "{id:x}").unwrap(); + Self(str) + } + + #[inline] + fn as_str(&self) -> &str { + let len = self.0.iter().position(|x| x == &0).unwrap_or(32); + str::from_utf8(&self.0[..len]).unwrap() + } +} + +fn validate_blank_node_identifier(id: &str) -> Result<(), BlankNodeIdParseError> { + let mut chars = id.chars(); + let front = chars.next().ok_or(BlankNodeIdParseError)?; + match front { + '0'..='9' + | '_' + | ':' + | 'A'..='Z' + | 'a'..='z' + | '\u{00C0}'..='\u{00D6}' + | '\u{00D8}'..='\u{00F6}' + | '\u{00F8}'..='\u{02FF}' + | '\u{0370}'..='\u{037D}' + | '\u{037F}'..='\u{1FFF}' + | '\u{200C}'..='\u{200D}' + | '\u{2070}'..='\u{218F}' + | '\u{2C00}'..='\u{2FEF}' + | '\u{3001}'..='\u{D7FF}' + | '\u{F900}'..='\u{FDCF}' + | '\u{FDF0}'..='\u{FFFD}' + | '\u{10000}'..='\u{EFFFF}' => (), + _ => return Err(BlankNodeIdParseError), + } + for c in chars { + match c { + '.' // validated later + | '-' + | '0'..='9' + | '\u{00B7}' + | '\u{0300}'..='\u{036F}' + | '\u{203F}'..='\u{2040}' + | '_' + | ':' + | 'A'..='Z' + | 'a'..='z' + | '\u{00C0}'..='\u{00D6}' + | '\u{00D8}'..='\u{00F6}' + | '\u{00F8}'..='\u{02FF}' + | '\u{0370}'..='\u{037D}' + | '\u{037F}'..='\u{1FFF}' + | '\u{200C}'..='\u{200D}' + | '\u{2070}'..='\u{218F}' + | '\u{2C00}'..='\u{2FEF}' + | '\u{3001}'..='\u{D7FF}' + | '\u{F900}'..='\u{FDCF}' + | '\u{FDF0}'..='\u{FFFD}' + | '\u{10000}'..='\u{EFFFF}' => (), + _ => return Err(BlankNodeIdParseError), + } + } + + // Could not end with a dot + if id.ends_with('.') { + Err(BlankNodeIdParseError) + } else { + Ok(()) + } +} + +#[inline] +fn to_integer_id(id: &str) -> Option<u128> { + let digits = id.as_bytes(); + let mut value: u128 = 0; + if let None | Some(b'0') = digits.first() { + return None; // No empty string or leading zeros + } + for digit in digits { + value = value.checked_mul(16)?.checked_add( + match *digit { + b'0'..=b'9' => digit - b'0', + b'a'..=b'f' => digit - b'a' + 10, + _ => return None, + } + .into(), + )?; + } + Some(value) +} + +/// An error raised during [`BlankNode`] IDs validation. +#[derive(Debug, thiserror::Error)] +#[error("The blank node identifier is invalid")] +pub struct BlankNodeIdParseError; + +#[cfg(test)] +#[allow(clippy::panic_in_result_fn)] +mod tests { + use super::*; + + #[test] + fn as_str_partial() { + let b = BlankNode::new_from_unique_id(0x42); + assert_eq!(b.as_str(), "42"); + } + + #[test] + fn as_str_full() { + let b = BlankNode::new_from_unique_id(0x7777_6666_5555_4444_3333_2222_1111_0000); + assert_eq!(b.as_str(), "77776666555544443333222211110000"); + } + + #[test] + fn new_validation() { + BlankNode::new("").unwrap_err(); + BlankNode::new("a").unwrap(); + BlankNode::new("-").unwrap_err(); + BlankNode::new("a-").unwrap(); + BlankNode::new(".").unwrap_err(); + BlankNode::new("a.").unwrap_err(); + BlankNode::new("a.a").unwrap(); + } + + #[test] + fn new_numerical() { + assert_eq!( + BlankNode::new("100a").unwrap(), + BlankNode::new_from_unique_id(0x100a), + ); + assert_ne!( + BlankNode::new("100A").unwrap(), + BlankNode::new_from_unique_id(0x100a) + ); + } + + #[test] + fn test_equals() { + assert_eq!( + BlankNode::new("100a").unwrap(), + BlankNodeRef::new("100a").unwrap() + ); + assert_eq!( + BlankNode::new("zzz").unwrap(), + BlankNodeRef::new("zzz").unwrap() + ); + } +} diff --git a/ng-oxigraph/src/oxrdf/dataset.rs b/ng-oxigraph/src/oxrdf/dataset.rs new file mode 100644 index 0000000..169bdf7 --- /dev/null +++ b/ng-oxigraph/src/oxrdf/dataset.rs @@ -0,0 +1,1641 @@ +//! [In-memory implementation](Dataset) of [RDF datasets](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset). +//! +//! Usage example: +//! ``` +//! use oxrdf::*; +//! +//! let mut dataset = Dataset::default(); +//! +//! // insertion +//! let ex = NamedNodeRef::new("http://example.com")?; +//! let quad = QuadRef::new(ex, ex, ex, ex); +//! dataset.insert(quad); +//! +//! // simple filter +//! let results: Vec<_> = dataset.quads_for_subject(ex).collect(); +//! assert_eq!(vec![quad], results); +//! +//! // direct access to a dataset graph +//! let results: Vec<_> = dataset.graph(ex).iter().collect(); +//! assert_eq!(vec![TripleRef::new(ex, ex, ex)], results); +//! +//! // Print +//! assert_eq!( +//! dataset.to_string(), +//! "<http://example.com> <http://example.com> <http://example.com> <http://example.com> .\n" +//! ); +//! # Result::<_,Box<dyn std::error::Error>>::Ok(()) +//! ``` +//! +//! See also [`Graph`] if you only care about plain triples. + +use crate::oxrdf::interning::*; +use crate::oxrdf::*; +use std::collections::hash_map::DefaultHasher; +use std::collections::{BTreeSet, HashMap, HashSet}; +use std::fmt; +use std::hash::{Hash, Hasher}; + +/// An in-memory [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset). +/// +/// It can accommodate a fairly large number of quads (in the few millions). +/// +/// <div class="warning">It interns the strings and does not do any garbage collection yet: +/// if you insert and remove a lot of different terms, memory will grow without any reduction.</div> +/// +/// Usage example: +/// ``` +/// use oxrdf::*; +/// +/// let mut dataset = Dataset::default(); +/// +/// // insertion +/// let ex = NamedNodeRef::new("http://example.com")?; +/// let quad = QuadRef::new(ex, ex, ex, ex); +/// dataset.insert(quad); +/// +/// // simple filter +/// let results: Vec<_> = dataset.quads_for_subject(ex).collect(); +/// assert_eq!(vec![quad], results); +/// +/// // direct access to a dataset graph +/// let results: Vec<_> = dataset.graph(ex).iter().collect(); +/// assert_eq!(vec![TripleRef::new(ex, ex, ex)], results); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +#[derive(Debug, Default, Clone)] +pub struct Dataset { + interner: Interner, + gspo: BTreeSet<( + InternedGraphName, + InternedSubject, + InternedNamedNode, + InternedTerm, + )>, + gpos: BTreeSet<( + InternedGraphName, + InternedNamedNode, + InternedTerm, + InternedSubject, + )>, + gosp: BTreeSet<( + InternedGraphName, + InternedTerm, + InternedSubject, + InternedNamedNode, + )>, + spog: BTreeSet<( + InternedSubject, + InternedNamedNode, + InternedTerm, + InternedGraphName, + )>, + posg: BTreeSet<( + InternedNamedNode, + InternedTerm, + InternedSubject, + InternedGraphName, + )>, + ospg: BTreeSet<( + InternedTerm, + InternedSubject, + InternedNamedNode, + InternedGraphName, + )>, +} + +impl Dataset { + /// Creates a new dataset + pub fn new() -> Self { + Self::default() + } + + /// Provides a read-only view on an [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) contained in this dataset. + /// + /// ``` + /// use oxrdf::*; + /// + /// let mut dataset = Dataset::default(); + /// let ex = NamedNodeRef::new("http://example.com")?; + /// dataset.insert(QuadRef::new(ex, ex, ex, ex)); + /// + /// let results: Vec<_> = dataset.graph(ex).iter().collect(); + /// assert_eq!(vec![TripleRef::new(ex, ex, ex)], results); + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + pub fn graph<'a, 'b>(&'a self, graph_name: impl Into<GraphNameRef<'b>>) -> GraphView<'a> { + let graph_name = self + .encoded_graph_name(graph_name) + .unwrap_or_else(InternedGraphName::impossible); + GraphView { + dataset: self, + graph_name, + } + } + + /// Provides a read/write view on an [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) contained in this dataset. + /// + /// ``` + /// use oxrdf::*; + /// + /// let mut dataset = Dataset::default(); + /// let ex = NamedNodeRef::new("http://example.com")?; + /// + /// // We edit and query the dataset http://example.com graph + /// { + /// let mut graph = dataset.graph_mut(ex); + /// graph.insert(TripleRef::new(ex, ex, ex)); + /// let results: Vec<_> = graph.iter().collect(); + /// assert_eq!(vec![TripleRef::new(ex, ex, ex)], results); + /// } + /// + /// // We have also changes the dataset itself + /// let results: Vec<_> = dataset.iter().collect(); + /// assert_eq!(vec![QuadRef::new(ex, ex, ex, ex)], results); + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + pub fn graph_mut<'a, 'b>( + &'a mut self, + graph_name: impl Into<GraphNameRef<'b>>, + ) -> GraphViewMut<'a> { + let graph_name = InternedGraphName::encoded_into(graph_name.into(), &mut self.interner); + GraphViewMut { + dataset: self, + graph_name, + } + } + + /// Returns all the quads contained by the dataset. + pub fn iter(&self) -> Iter<'_> { + let iter = self.spog.iter(); + Iter { + dataset: self, + inner: iter, + } + } + + pub fn quads_for_subject<'a, 'b>( + &'a self, + subject: impl Into<SubjectRef<'b>>, + ) -> impl Iterator<Item = QuadRef<'a>> + 'a { + let subject = self + .encoded_subject(subject) + .unwrap_or_else(InternedSubject::impossible); + self.interned_quads_for_subject(&subject) + .map(move |q| self.decode_spog(q)) + } + + #[allow(clippy::map_identity)] + fn interned_quads_for_subject( + &self, + subject: &InternedSubject, + ) -> impl Iterator< + Item = ( + &InternedSubject, + &InternedNamedNode, + &InternedTerm, + &InternedGraphName, + ), + > + '_ { + self.spog + .range( + &( + subject.clone(), + InternedNamedNode::first(), + InternedTerm::first(), + InternedGraphName::first(), + ) + ..&( + subject.next(), + InternedNamedNode::first(), + InternedTerm::first(), + InternedGraphName::first(), + ), + ) + .map(|(s, p, o, g)| (s, p, o, g)) + } + + pub fn quads_for_predicate<'a, 'b>( + &'a self, + predicate: impl Into<NamedNodeRef<'b>>, + ) -> impl Iterator<Item = QuadRef<'a>> + 'a { + let predicate = self + .encoded_named_node(predicate) + .unwrap_or_else(InternedNamedNode::impossible); + self.interned_quads_for_predicate(predicate) + .map(move |q| self.decode_spog(q)) + } + + fn interned_quads_for_predicate( + &self, + predicate: InternedNamedNode, + ) -> impl Iterator< + Item = ( + &InternedSubject, + &InternedNamedNode, + &InternedTerm, + &InternedGraphName, + ), + > + '_ { + self.posg + .range( + &( + predicate, + InternedTerm::first(), + InternedSubject::first(), + InternedGraphName::first(), + ) + ..&( + predicate.next(), + InternedTerm::first(), + InternedSubject::first(), + InternedGraphName::first(), + ), + ) + .map(|(p, o, s, g)| (s, p, o, g)) + } + + pub fn quads_for_object<'a, 'b>( + &'a self, + object: impl Into<TermRef<'b>>, + ) -> impl Iterator<Item = QuadRef<'a>> + 'a { + let object = self + .encoded_term(object) + .unwrap_or_else(InternedTerm::impossible); + + self.interned_quads_for_object(&object) + .map(move |q| self.decode_spog(q)) + } + + fn interned_quads_for_object( + &self, + object: &InternedTerm, + ) -> impl Iterator< + Item = ( + &InternedSubject, + &InternedNamedNode, + &InternedTerm, + &InternedGraphName, + ), + > + '_ { + self.ospg + .range( + &( + object.clone(), + InternedSubject::first(), + InternedNamedNode::first(), + InternedGraphName::first(), + ) + ..&( + object.next(), + InternedSubject::first(), + InternedNamedNode::first(), + InternedGraphName::first(), + ), + ) + .map(|(o, s, p, g)| (s, p, o, g)) + } + + pub fn quads_for_graph_name<'a, 'b>( + &'a self, + graph_name: impl Into<GraphNameRef<'b>>, + ) -> impl Iterator<Item = QuadRef<'a>> + 'a { + let graph_name = self + .encoded_graph_name(graph_name) + .unwrap_or_else(InternedGraphName::impossible); + + self.interned_quads_for_graph_name(&graph_name) + .map(move |q| self.decode_spog(q)) + } + + fn interned_quads_for_graph_name( + &self, + graph_name: &InternedGraphName, + ) -> impl Iterator< + Item = ( + &InternedSubject, + &InternedNamedNode, + &InternedTerm, + &InternedGraphName, + ), + > + '_ { + self.gspo + .range( + &( + graph_name.clone(), + InternedSubject::first(), + InternedNamedNode::first(), + InternedTerm::first(), + ) + ..&( + graph_name.next(), + InternedSubject::first(), + InternedNamedNode::first(), + InternedTerm::first(), + ), + ) + .map(|(g, s, p, o)| (s, p, o, g)) + } + + /// Checks if the dataset contains the given quad + pub fn contains<'a>(&self, quad: impl Into<QuadRef<'a>>) -> bool { + if let Some(q) = self.encoded_quad(quad.into()) { + self.spog.contains(&q) + } else { + false + } + } + + /// Returns the number of quads in this dataset. + pub fn len(&self) -> usize { + self.gspo.len() + } + + /// Checks if this dataset contains a quad. + pub fn is_empty(&self) -> bool { + self.gspo.is_empty() + } + + /// Adds a quad to the dataset. + pub fn insert<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> bool { + let quad = self.encode_quad(quad.into()); + self.insert_encoded(quad) + } + + fn insert_encoded( + &mut self, + quad: ( + InternedSubject, + InternedNamedNode, + InternedTerm, + InternedGraphName, + ), + ) -> bool { + let (s, p, o, g) = quad; + self.gspo.insert((g.clone(), s.clone(), p, o.clone())); + self.gpos.insert((g.clone(), p, o.clone(), s.clone())); + self.gosp.insert((g.clone(), o.clone(), s.clone(), p)); + self.spog.insert((s.clone(), p, o.clone(), g.clone())); + self.posg.insert((p, o.clone(), s.clone(), g.clone())); + self.ospg.insert((o, s, p, g)) + } + + /// Removes a concrete quad from the dataset. + pub fn remove<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> bool { + if let Some(quad) = self.encoded_quad(quad.into()) { + self.remove_encoded(quad) + } else { + false + } + } + + fn remove_encoded( + &mut self, + quad: ( + InternedSubject, + InternedNamedNode, + InternedTerm, + InternedGraphName, + ), + ) -> bool { + let (s, p, o, g) = quad; + self.gspo.remove(&(g.clone(), s.clone(), p, o.clone())); + self.gpos.remove(&(g.clone(), p, o.clone(), s.clone())); + self.gosp.remove(&(g.clone(), o.clone(), s.clone(), p)); + self.spog.remove(&(s.clone(), p, o.clone(), g.clone())); + self.posg.remove(&(p, o.clone(), s.clone(), g.clone())); + self.ospg.remove(&(o, s, p, g)) + } + + /// Clears the dataset. + pub fn clear(&mut self) { + self.gspo.clear(); + self.gpos.clear(); + self.gosp.clear(); + self.spog.clear(); + self.posg.clear(); + self.ospg.clear(); + } + + fn encode_quad( + &mut self, + quad: QuadRef<'_>, + ) -> ( + InternedSubject, + InternedNamedNode, + InternedTerm, + InternedGraphName, + ) { + ( + InternedSubject::encoded_into(quad.subject, &mut self.interner), + InternedNamedNode::encoded_into(quad.predicate, &mut self.interner), + InternedTerm::encoded_into(quad.object, &mut self.interner), + InternedGraphName::encoded_into(quad.graph_name, &mut self.interner), + ) + } + + fn encoded_quad( + &self, + quad: QuadRef<'_>, + ) -> Option<( + InternedSubject, + InternedNamedNode, + InternedTerm, + InternedGraphName, + )> { + Some(( + self.encoded_subject(quad.subject)?, + self.encoded_named_node(quad.predicate)?, + self.encoded_term(quad.object)?, + self.encoded_graph_name(quad.graph_name)?, + )) + } + + pub(super) fn encoded_named_node<'a>( + &self, + node: impl Into<NamedNodeRef<'a>>, + ) -> Option<InternedNamedNode> { + InternedNamedNode::encoded_from(node.into(), &self.interner) + } + + pub(super) fn encoded_subject<'a>( + &self, + node: impl Into<SubjectRef<'a>>, + ) -> Option<InternedSubject> { + InternedSubject::encoded_from(node.into(), &self.interner) + } + + pub(super) fn encoded_term<'a>(&self, term: impl Into<TermRef<'a>>) -> Option<InternedTerm> { + InternedTerm::encoded_from(term.into(), &self.interner) + } + + pub(super) fn encoded_graph_name<'a>( + &self, + graph_name: impl Into<GraphNameRef<'a>>, + ) -> Option<InternedGraphName> { + InternedGraphName::encoded_from(graph_name.into(), &self.interner) + } + + fn decode_spog( + &self, + quad: ( + &InternedSubject, + &InternedNamedNode, + &InternedTerm, + &InternedGraphName, + ), + ) -> QuadRef<'_> { + QuadRef { + subject: quad.0.decode_from(&self.interner), + predicate: quad.1.decode_from(&self.interner), + object: quad.2.decode_from(&self.interner), + graph_name: quad.3.decode_from(&self.interner), + } + } + + fn decode_spo( + &self, + triple: (&InternedSubject, &InternedNamedNode, &InternedTerm), + ) -> TripleRef<'_> { + TripleRef { + subject: triple.0.decode_from(&self.interner), + predicate: triple.1.decode_from(&self.interner), + object: triple.2.decode_from(&self.interner), + } + } + + /// Canonicalizes the dataset by renaming blank nodes. + /// + /// Usage example ([Dataset isomorphism](https://www.w3.org/TR/rdf11-concepts/#dfn-dataset-isomorphism)): + /// ``` + /// use oxrdf::dataset::CanonicalizationAlgorithm; + /// use oxrdf::*; + /// + /// let iri = NamedNodeRef::new("http://example.com")?; + /// + /// let mut graph1 = Graph::new(); + /// let bnode1 = BlankNode::default(); + /// let g1 = BlankNode::default(); + /// graph1.insert(QuadRef::new(iri, iri, &bnode1, &g1)); + /// graph1.insert(QuadRef::new(&bnode1, iri, iri, &g1)); + /// + /// let mut graph2 = Graph::new(); + /// let bnode2 = BlankNode::default(); + /// let g2 = BlankNode::default(); + /// graph2.insert(QuadRef::new(iri, iri, &bnode2, &g2)); + /// graph2.insert(QuadRef::new(&bnode2, iri, iri, &g2)); + /// + /// assert_ne!(graph1, graph2); + /// graph1.canonicalize(CanonicalizationAlgorithm::Unstable); + /// graph2.canonicalize(CanonicalizationAlgorithm::Unstable); + /// assert_eq!(graph1, graph2); + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + /// + /// <div class="warning">Blank node ids depends on the current shape of the graph. Adding a new quad might change the ids of a lot of blank nodes. + /// Hence, this canonization might not be suitable for diffs.</div> + /// + /// <div class="warning">This implementation worst-case complexity is in *O(b!)* with *b* the number of blank nodes in the input dataset.</div> + pub fn canonicalize(&mut self, algorithm: CanonicalizationAlgorithm) { + let bnode_mapping = self.canonicalize_interned_blank_nodes(algorithm); + let new_quads = self.map_blank_nodes(&bnode_mapping); + self.clear(); + for quad in new_quads { + self.insert_encoded(quad); + } + } + + /// Returns a map between the current dataset blank node and the canonicalized blank node + /// to create a canonical dataset. + /// + /// See also [`canonicalize`](Self::canonicalize). + pub fn canonicalize_blank_nodes( + &self, + algorithm: CanonicalizationAlgorithm, + ) -> HashMap<BlankNodeRef<'_>, BlankNode> { + self.canonicalize_interned_blank_nodes(algorithm) + .into_iter() + .map(|(from, to)| (from.decode_from(&self.interner), to)) + .collect() + } + + fn canonicalize_interned_blank_nodes( + &self, + algorithm: CanonicalizationAlgorithm, + ) -> HashMap<InternedBlankNode, BlankNode> { + match algorithm { + CanonicalizationAlgorithm::Unstable => { + let bnodes = self.blank_nodes(); + let quads_per_blank_node = self.quads_per_blank_nodes(); + let (hash, partition) = self.hash_bnodes( + bnodes.into_iter().map(|bnode| (bnode, 0)).collect(), + &quads_per_blank_node, + ); + self.distinguish(hash, &partition, &quads_per_blank_node) + .into_iter() + .map(|(from, to)| (from, BlankNode::new_from_unique_id(to.into()))) + .collect() + } + } + } + + fn blank_nodes(&self) -> HashSet<InternedBlankNode> { + let mut bnodes = HashSet::new(); + for (g, s, _, o) in &self.gspo { + if let InternedSubject::BlankNode(bnode) = s { + bnodes.insert(*bnode); + } + #[cfg(feature = "rdf-star")] + if let InternedSubject::Triple(triple) = s { + Self::triple_blank_nodes(triple, &mut bnodes); + } + if let InternedTerm::BlankNode(bnode) = o { + bnodes.insert(*bnode); + } + #[cfg(feature = "rdf-star")] + if let InternedTerm::Triple(triple) = o { + Self::triple_blank_nodes(triple, &mut bnodes); + } + if let InternedGraphName::BlankNode(bnode) = g { + bnodes.insert(*bnode); + } + } + bnodes + } + + #[cfg(feature = "rdf-star")] + fn triple_blank_nodes(triple: &InternedTriple, bnodes: &mut HashSet<InternedBlankNode>) { + if let InternedSubject::BlankNode(bnode) = &triple.subject { + bnodes.insert(*bnode); + } else if let InternedSubject::Triple(t) = &triple.subject { + Self::triple_blank_nodes(t, bnodes); + } + if let InternedTerm::BlankNode(bnode) = &triple.object { + bnodes.insert(*bnode); + } else if let InternedTerm::Triple(t) = &triple.object { + Self::triple_blank_nodes(t, bnodes); + } + } + + fn quads_per_blank_nodes(&self) -> QuadsPerBlankNode { + let mut map: HashMap<_, Vec<_>> = HashMap::new(); + for quad in &self.spog { + if let InternedSubject::BlankNode(bnode) = &quad.0 { + map.entry(*bnode).or_default().push(quad.clone()); + } + #[cfg(feature = "rdf-star")] + if let InternedSubject::Triple(t) = &quad.0 { + Self::add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(quad, t, &mut map); + } + if let InternedTerm::BlankNode(bnode) = &quad.2 { + map.entry(*bnode).or_default().push(quad.clone()); + } + #[cfg(feature = "rdf-star")] + if let InternedTerm::Triple(t) = &quad.2 { + Self::add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(quad, t, &mut map); + } + if let InternedGraphName::BlankNode(bnode) = &quad.3 { + map.entry(*bnode).or_default().push(quad.clone()); + } + } + map + } + + #[cfg(feature = "rdf-star")] + fn add_quad_with_quoted_triple_to_quad_per_blank_nodes_map( + quad: &( + InternedSubject, + InternedNamedNode, + InternedTerm, + InternedGraphName, + ), + triple: &InternedTriple, + map: &mut QuadsPerBlankNode, + ) { + if let InternedSubject::BlankNode(bnode) = &triple.subject { + map.entry(*bnode).or_default().push(quad.clone()); + } + if let InternedSubject::Triple(t) = &triple.subject { + Self::add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(quad, t, map); + } + if let InternedTerm::BlankNode(bnode) = &triple.object { + map.entry(*bnode).or_default().push(quad.clone()); + } + if let InternedTerm::Triple(t) = &triple.object { + Self::add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(quad, t, map); + } + } + + fn hash_bnodes( + &self, + mut hashes: HashMap<InternedBlankNode, u64>, + quads_per_blank_node: &QuadsPerBlankNode, + ) -> ( + HashMap<InternedBlankNode, u64>, + Vec<(u64, Vec<InternedBlankNode>)>, + ) { + let mut to_hash = Vec::new(); + let mut to_do = hashes + .keys() + .map(|bnode| (*bnode, true)) + .collect::<HashMap<_, _>>(); + let mut partition = HashMap::<_, Vec<_>>::with_capacity(hashes.len()); + let mut old_partition_count = usize::MAX; + while old_partition_count != partition.len() { + old_partition_count = partition.len(); + partition.clear(); + let mut new_hashes = hashes.clone(); + for bnode in hashes.keys() { + let hash = if to_do.contains_key(bnode) { + for (s, p, o, g) in &quads_per_blank_node[bnode] { + to_hash.push(( + self.hash_subject(s, *bnode, &hashes), + self.hash_named_node(*p), + self.hash_term(o, *bnode, &hashes), + self.hash_graph_name(g, *bnode, &hashes), + )); + } + to_hash.sort_unstable(); + let hash = Self::hash_tuple((&to_hash, hashes[bnode])); + to_hash.clear(); + if hash == hashes[bnode] { + to_do.insert(*bnode, false); + } else { + new_hashes.insert(*bnode, hash); + } + hash + } else { + hashes[bnode] + }; + partition.entry(hash).or_default().push(*bnode); + } + hashes = new_hashes; + } + let mut partition: Vec<_> = partition.into_iter().collect(); + partition.sort_unstable_by(|(h1, b1), (h2, b2)| (b1.len(), h1).cmp(&(b2.len(), h2))); + (hashes, partition) + } + + fn hash_named_node(&self, node: InternedNamedNode) -> u64 { + Self::hash_tuple(node.decode_from(&self.interner)) + } + + fn hash_blank_node( + node: InternedBlankNode, + current_blank_node: InternedBlankNode, + bnodes_hash: &HashMap<InternedBlankNode, u64>, + ) -> u64 { + if node == current_blank_node { + u64::MAX + } else { + bnodes_hash[&node] + } + } + + fn hash_subject( + &self, + node: &InternedSubject, + current_blank_node: InternedBlankNode, + bnodes_hash: &HashMap<InternedBlankNode, u64>, + ) -> u64 { + match node { + InternedSubject::NamedNode(node) => Self::hash_tuple(node.decode_from(&self.interner)), + InternedSubject::BlankNode(bnode) => { + Self::hash_blank_node(*bnode, current_blank_node, bnodes_hash) + } + #[cfg(feature = "rdf-star")] + InternedSubject::Triple(triple) => { + self.hash_triple(triple, current_blank_node, bnodes_hash) + } + } + } + + fn hash_term( + &self, + term: &InternedTerm, + current_blank_node: InternedBlankNode, + bnodes_hash: &HashMap<InternedBlankNode, u64>, + ) -> u64 { + match term { + InternedTerm::NamedNode(node) => Self::hash_tuple(node.decode_from(&self.interner)), + InternedTerm::BlankNode(bnode) => { + Self::hash_blank_node(*bnode, current_blank_node, bnodes_hash) + } + InternedTerm::Literal(literal) => Self::hash_tuple(literal.decode_from(&self.interner)), + #[cfg(feature = "rdf-star")] + InternedTerm::Triple(triple) => { + self.hash_triple(triple, current_blank_node, bnodes_hash) + } + } + } + + fn hash_graph_name( + &self, + graph_name: &InternedGraphName, + current_blank_node: InternedBlankNode, + bnodes_hash: &HashMap<InternedBlankNode, u64>, + ) -> u64 { + match graph_name { + InternedGraphName::NamedNode(node) => { + Self::hash_tuple(node.decode_from(&self.interner)) + } + InternedGraphName::BlankNode(bnode) => { + Self::hash_blank_node(*bnode, current_blank_node, bnodes_hash) + } + InternedGraphName::DefaultGraph => 0, + } + } + + #[cfg(feature = "rdf-star")] + fn hash_triple( + &self, + triple: &InternedTriple, + current_blank_node: InternedBlankNode, + bnodes_hash: &HashMap<InternedBlankNode, u64>, + ) -> u64 { + Self::hash_tuple(( + self.hash_subject(&triple.subject, current_blank_node, bnodes_hash), + self.hash_named_node(triple.predicate), + self.hash_term(&triple.object, current_blank_node, bnodes_hash), + )) + } + + fn hash_tuple(v: impl Hash) -> u64 { + let mut hasher = DefaultHasher::new(); + v.hash(&mut hasher); + hasher.finish() + } + + fn distinguish( + &self, + hash: HashMap<InternedBlankNode, u64>, + partition: &[(u64, Vec<InternedBlankNode>)], + quads_per_blank_node: &QuadsPerBlankNode, + ) -> HashMap<InternedBlankNode, u64> { + let b_prime = partition.iter().map(|(_, b)| b).find(|b| b.len() > 1); + if let Some(b_prime) = b_prime { + b_prime + .iter() + .map(|b| { + let mut hash_prime = hash.clone(); + hash_prime.insert(*b, Self::hash_tuple((hash_prime[b], 22))); + let (hash_prime_prime, partition_prime) = + self.hash_bnodes(hash_prime, quads_per_blank_node); + self.distinguish(hash_prime_prime, &partition_prime, quads_per_blank_node) + }) + .reduce(|a, b| { + let mut a_hashes = a.values().collect::<Vec<_>>(); + a_hashes.sort(); + let mut b_hashes = a.values().collect::<Vec<_>>(); + b_hashes.sort(); + if a_hashes <= b_hashes { + a + } else { + b + } + }) + .unwrap_or_default() + } else { + hash + } + } + + #[allow(clippy::needless_collect)] + fn map_blank_nodes( + &mut self, + bnode_mapping: &HashMap<InternedBlankNode, BlankNode>, + ) -> Vec<( + InternedSubject, + InternedNamedNode, + InternedTerm, + InternedGraphName, + )> { + let old_quads: Vec<_> = self.spog.iter().cloned().collect(); + old_quads + .into_iter() + .map(|(s, p, o, g)| { + ( + match s { + InternedSubject::NamedNode(_) => s, + InternedSubject::BlankNode(bnode) => { + InternedSubject::BlankNode(InternedBlankNode::encoded_into( + bnode_mapping[&bnode].as_ref(), + &mut self.interner, + )) + } + #[cfg(feature = "rdf-star")] + InternedSubject::Triple(triple) => { + InternedSubject::Triple(Box::new(InternedTriple::encoded_into( + self.map_triple_blank_nodes(&triple, bnode_mapping).as_ref(), + &mut self.interner, + ))) + } + }, + p, + match o { + InternedTerm::NamedNode(_) | InternedTerm::Literal(_) => o, + InternedTerm::BlankNode(bnode) => { + InternedTerm::BlankNode(InternedBlankNode::encoded_into( + bnode_mapping[&bnode].as_ref(), + &mut self.interner, + )) + } + #[cfg(feature = "rdf-star")] + InternedTerm::Triple(triple) => { + InternedTerm::Triple(Box::new(InternedTriple::encoded_into( + self.map_triple_blank_nodes(&triple, bnode_mapping).as_ref(), + &mut self.interner, + ))) + } + }, + match g { + InternedGraphName::NamedNode(_) | InternedGraphName::DefaultGraph => g, + InternedGraphName::BlankNode(bnode) => { + InternedGraphName::BlankNode(InternedBlankNode::encoded_into( + bnode_mapping[&bnode].as_ref(), + &mut self.interner, + )) + } + }, + ) + }) + .collect() + } + + #[cfg(feature = "rdf-star")] + fn map_triple_blank_nodes( + &mut self, + triple: &InternedTriple, + bnode_mapping: &HashMap<InternedBlankNode, BlankNode>, + ) -> Triple { + Triple { + subject: if let InternedSubject::BlankNode(bnode) = &triple.subject { + bnode_mapping[bnode].clone().into() + } else if let InternedSubject::Triple(t) = &triple.subject { + self.map_triple_blank_nodes(t, bnode_mapping).into() + } else { + triple.subject.decode_from(&self.interner).into_owned() + }, + predicate: triple.predicate.decode_from(&self.interner).into_owned(), + object: if let InternedTerm::BlankNode(bnode) = &triple.object { + bnode_mapping[bnode].clone().into() + } else if let InternedTerm::Triple(t) = &triple.object { + self.map_triple_blank_nodes(t, bnode_mapping).into() + } else { + triple.object.decode_from(&self.interner).into_owned() + }, + } + } +} + +impl PartialEq for Dataset { + fn eq(&self, other: &Self) -> bool { + if self.len() != other.len() { + return false; + } + for q in self { + if !other.contains(q) { + return false; + } + } + true + } +} + +impl Eq for Dataset {} + +impl<'a> IntoIterator for &'a Dataset { + type Item = QuadRef<'a>; + type IntoIter = Iter<'a>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl FromIterator<Quad> for Dataset { + fn from_iter<I: IntoIterator<Item = Quad>>(iter: I) -> Self { + let mut g = Self::new(); + g.extend(iter); + g + } +} + +impl<'a, T: Into<QuadRef<'a>>> FromIterator<T> for Dataset { + fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self { + let mut g = Self::new(); + g.extend(iter); + g + } +} + +impl Extend<Quad> for Dataset { + fn extend<I: IntoIterator<Item = Quad>>(&mut self, iter: I) { + for t in iter { + self.insert(&t); + } + } +} + +impl<'a, T: Into<QuadRef<'a>>> Extend<T> for Dataset { + fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) { + for t in iter { + self.insert(t); + } + } +} + +impl fmt::Display for Dataset { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for t in self { + writeln!(f, "{t} .")?; + } + Ok(()) + } +} + +/// A read-only view on an [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) contained in a [`Dataset`]. +/// +/// It is built using the [`Dataset::graph`] method. +/// +/// Usage example: +/// ``` +/// use oxrdf::*; +/// +/// let mut dataset = Dataset::default(); +/// let ex = NamedNodeRef::new("http://example.com")?; +/// dataset.insert(QuadRef::new(ex, ex, ex, ex)); +/// +/// let results: Vec<_> = dataset.graph(ex).iter().collect(); +/// assert_eq!(vec![TripleRef::new(ex, ex, ex)], results); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +#[derive(Clone, Debug)] +pub struct GraphView<'a> { + dataset: &'a Dataset, + graph_name: InternedGraphName, +} + +impl<'a> GraphView<'a> { + /// Returns all the triples contained by the graph. + pub fn iter(&self) -> GraphViewIter<'a> { + let iter = self.dataset.gspo.range( + &( + self.graph_name.clone(), + InternedSubject::first(), + InternedNamedNode::first(), + InternedTerm::first(), + ) + ..&( + self.graph_name.next(), + InternedSubject::first(), + InternedNamedNode::first(), + InternedTerm::first(), + ), + ); + GraphViewIter { + dataset: self.dataset, + inner: iter, + } + } + + pub fn triples_for_subject<'b>( + &self, + subject: impl Into<SubjectRef<'b>>, + ) -> impl Iterator<Item = TripleRef<'a>> + 'a { + self.triples_for_interned_subject(self.dataset.encoded_subject(subject)) + } + + pub(super) fn triples_for_interned_subject( + &self, + subject: Option<InternedSubject>, + ) -> impl Iterator<Item = TripleRef<'a>> + 'a { + let subject = subject.unwrap_or_else(InternedSubject::impossible); + let ds = self.dataset; + self.dataset + .gspo + .range( + &( + self.graph_name.clone(), + subject.clone(), + InternedNamedNode::first(), + InternedTerm::first(), + ) + ..&( + self.graph_name.clone(), + subject.next(), + InternedNamedNode::first(), + InternedTerm::first(), + ), + ) + .map(move |q| { + let (_, s, p, o) = q; + ds.decode_spo((s, p, o)) + }) + } + + pub fn objects_for_subject_predicate<'b>( + &self, + subject: impl Into<SubjectRef<'b>>, + predicate: impl Into<NamedNodeRef<'b>>, + ) -> impl Iterator<Item = TermRef<'a>> + 'a { + self.objects_for_interned_subject_predicate( + self.dataset.encoded_subject(subject), + self.dataset.encoded_named_node(predicate), + ) + } + + pub(super) fn objects_for_interned_subject_predicate( + &self, + subject: Option<InternedSubject>, + predicate: Option<InternedNamedNode>, + ) -> impl Iterator<Item = TermRef<'a>> + 'a { + let subject = subject.unwrap_or_else(InternedSubject::impossible); + let predicate = predicate.unwrap_or_else(InternedNamedNode::impossible); + let ds = self.dataset; + self.dataset + .gspo + .range( + &( + self.graph_name.clone(), + subject.clone(), + predicate, + InternedTerm::first(), + ) + ..&( + self.graph_name.clone(), + subject, + predicate.next(), + InternedTerm::first(), + ), + ) + .map(move |q| q.3.decode_from(&ds.interner)) + } + + pub fn object_for_subject_predicate<'b>( + &self, + subject: impl Into<SubjectRef<'b>>, + predicate: impl Into<NamedNodeRef<'b>>, + ) -> Option<TermRef<'a>> { + self.objects_for_subject_predicate(subject, predicate) + .next() + } + + pub fn predicates_for_subject_object<'b>( + &self, + subject: impl Into<SubjectRef<'b>>, + object: impl Into<TermRef<'b>>, + ) -> impl Iterator<Item = NamedNodeRef<'a>> + 'a { + self.predicates_for_interned_subject_object( + self.dataset.encoded_subject(subject), + self.dataset.encoded_term(object), + ) + } + + pub(super) fn predicates_for_interned_subject_object( + &self, + subject: Option<InternedSubject>, + object: Option<InternedTerm>, + ) -> impl Iterator<Item = NamedNodeRef<'a>> + 'a { + let subject = subject.unwrap_or_else(InternedSubject::impossible); + let object = object.unwrap_or_else(InternedTerm::impossible); + let ds = self.dataset; + self.dataset + .gosp + .range( + &( + self.graph_name.clone(), + object.clone(), + subject.clone(), + InternedNamedNode::first(), + ) + ..&( + self.graph_name.clone(), + object, + subject.next(), + InternedNamedNode::first(), + ), + ) + .map(move |q| q.3.decode_from(&ds.interner)) + } + + pub fn triples_for_predicate<'b>( + &self, + predicate: impl Into<NamedNodeRef<'b>>, + ) -> impl Iterator<Item = TripleRef<'a>> + 'a { + self.triples_for_interned_predicate(self.dataset.encoded_named_node(predicate)) + } + + pub(super) fn triples_for_interned_predicate( + &self, + predicate: Option<InternedNamedNode>, + ) -> impl Iterator<Item = TripleRef<'a>> + 'a { + let predicate = predicate.unwrap_or_else(InternedNamedNode::impossible); + let ds = self.dataset; + self.dataset + .gpos + .range( + &( + self.graph_name.clone(), + predicate, + InternedTerm::first(), + InternedSubject::first(), + ) + ..&( + self.graph_name.clone(), + predicate.next(), + InternedTerm::first(), + InternedSubject::first(), + ), + ) + .map(move |(_, p, o, s)| ds.decode_spo((s, p, o))) + } + + pub fn subjects_for_predicate_object<'b>( + &self, + predicate: impl Into<NamedNodeRef<'b>>, + object: impl Into<TermRef<'b>>, + ) -> impl Iterator<Item = SubjectRef<'a>> + 'a { + self.subjects_for_interned_predicate_object( + self.dataset.encoded_named_node(predicate), + self.dataset.encoded_term(object), + ) + } + + pub(super) fn subjects_for_interned_predicate_object( + &self, + predicate: Option<InternedNamedNode>, + object: Option<InternedTerm>, + ) -> impl Iterator<Item = SubjectRef<'a>> + 'a { + let predicate = predicate.unwrap_or_else(InternedNamedNode::impossible); + let object = object.unwrap_or_else(InternedTerm::impossible); + let ds = self.dataset; + self.dataset + .gpos + .range( + &( + self.graph_name.clone(), + predicate, + object.clone(), + InternedSubject::first(), + ) + ..&( + self.graph_name.clone(), + predicate, + object.next(), + InternedSubject::first(), + ), + ) + .map(move |q| q.3.decode_from(&ds.interner)) + } + + pub fn subject_for_predicate_object<'b>( + &self, + predicate: impl Into<NamedNodeRef<'b>>, + object: impl Into<TermRef<'b>>, + ) -> Option<SubjectRef<'a>> { + self.subjects_for_predicate_object(predicate, object).next() + } + + pub fn triples_for_object<'b>( + &self, + object: impl Into<TermRef<'b>>, + ) -> impl Iterator<Item = TripleRef<'a>> + 'a { + self.triples_for_interned_object(self.dataset.encoded_term(object)) + } + + pub(super) fn triples_for_interned_object( + &self, + object: Option<InternedTerm>, + ) -> impl Iterator<Item = TripleRef<'a>> + 'a { + let object = object.unwrap_or_else(InternedTerm::impossible); + let ds = self.dataset; + self.dataset + .gosp + .range( + &( + self.graph_name.clone(), + object.clone(), + InternedSubject::first(), + InternedNamedNode::first(), + ) + ..&( + self.graph_name.clone(), + object.next(), + InternedSubject::first(), + InternedNamedNode::first(), + ), + ) + .map(move |(_, o, s, p)| ds.decode_spo((s, p, o))) + } + + /// Checks if the graph contains the given triple. + pub fn contains<'b>(&self, triple: impl Into<TripleRef<'b>>) -> bool { + if let Some(triple) = self.encoded_triple(triple.into()) { + self.dataset.gspo.contains(&( + self.graph_name.clone(), + triple.subject, + triple.predicate, + triple.object, + )) + } else { + false + } + } + + /// Returns the number of triples in this graph. + pub fn len(&self) -> usize { + self.iter().count() + } + + /// Checks if this graph contains a triple. + pub fn is_empty(&self) -> bool { + self.iter().next().is_none() + } + + fn encoded_triple(&self, triple: TripleRef<'_>) -> Option<InternedTriple> { + Some(InternedTriple { + subject: self.dataset.encoded_subject(triple.subject)?, + predicate: self.dataset.encoded_named_node(triple.predicate)?, + object: self.dataset.encoded_term(triple.object)?, + }) + } +} + +impl<'a> IntoIterator for GraphView<'a> { + type Item = TripleRef<'a>; + type IntoIter = GraphViewIter<'a>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'a, 'b> IntoIterator for &'b GraphView<'a> { + type Item = TripleRef<'a>; + type IntoIter = GraphViewIter<'a>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'a> fmt::Display for GraphView<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for t in self { + writeln!(f, "{t} .")?; + } + Ok(()) + } +} + +/// A read/write view on an [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) contained in a [`Dataset`]. +/// +/// It is built using the [`Dataset::graph_mut`] method. +/// +/// Usage example: +/// ``` +/// use oxrdf::*; +/// +/// let mut dataset = Dataset::default(); +/// let ex = NamedNodeRef::new("http://example.com")?; +/// +/// // We edit and query the dataset http://example.com graph +/// { +/// let mut graph = dataset.graph_mut(ex); +/// graph.insert(TripleRef::new(ex, ex, ex)); +/// let results: Vec<_> = graph.iter().collect(); +/// assert_eq!(vec![TripleRef::new(ex, ex, ex)], results); +/// } +/// +/// // We have also changes the dataset itself +/// let results: Vec<_> = dataset.iter().collect(); +/// assert_eq!(vec![QuadRef::new(ex, ex, ex, ex)], results); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +#[derive(Debug)] +pub struct GraphViewMut<'a> { + dataset: &'a mut Dataset, + graph_name: InternedGraphName, +} + +impl<'a> GraphViewMut<'a> { + fn read(&self) -> GraphView<'_> { + GraphView { + dataset: self.dataset, + graph_name: self.graph_name.clone(), + } + } + + /// Adds a triple to the graph. + pub fn insert<'b>(&mut self, triple: impl Into<TripleRef<'b>>) -> bool { + let triple = self.encode_triple(triple.into()); + self.dataset.insert_encoded(( + triple.subject, + triple.predicate, + triple.object, + self.graph_name.clone(), + )) + } + + /// Removes a concrete triple from the graph. + pub fn remove<'b>(&mut self, triple: impl Into<TripleRef<'b>>) -> bool { + if let Some(triple) = self.read().encoded_triple(triple.into()) { + self.dataset.remove_encoded(( + triple.subject, + triple.predicate, + triple.object, + self.graph_name.clone(), + )) + } else { + false + } + } + + fn encode_triple(&mut self, triple: TripleRef<'_>) -> InternedTriple { + InternedTriple { + subject: InternedSubject::encoded_into(triple.subject, &mut self.dataset.interner), + predicate: InternedNamedNode::encoded_into( + triple.predicate, + &mut self.dataset.interner, + ), + object: InternedTerm::encoded_into(triple.object, &mut self.dataset.interner), + } + } + + /// Returns all the triples contained by the graph + pub fn iter(&'a self) -> GraphViewIter<'a> { + self.read().iter() + } + + pub fn triples_for_subject<'b>( + &'a self, + subject: impl Into<SubjectRef<'b>>, + ) -> impl Iterator<Item = TripleRef<'a>> + 'a { + self.read() + .triples_for_interned_subject(self.dataset.encoded_subject(subject)) + } + + pub fn objects_for_subject_predicate<'b>( + &'a self, + subject: impl Into<SubjectRef<'b>>, + predicate: impl Into<NamedNodeRef<'b>>, + ) -> impl Iterator<Item = TermRef<'a>> + 'a { + self.read().objects_for_interned_subject_predicate( + self.dataset.encoded_subject(subject), + self.dataset.encoded_named_node(predicate), + ) + } + + pub fn object_for_subject_predicate<'b>( + &'a self, + subject: impl Into<SubjectRef<'b>>, + predicate: impl Into<NamedNodeRef<'b>>, + ) -> Option<TermRef<'a>> { + self.read().object_for_subject_predicate(subject, predicate) + } + + pub fn predicates_for_subject_object<'b>( + &'a self, + subject: impl Into<SubjectRef<'b>>, + object: impl Into<TermRef<'b>>, + ) -> impl Iterator<Item = NamedNodeRef<'a>> + 'a { + self.read().predicates_for_interned_subject_object( + self.dataset.encoded_subject(subject), + self.dataset.encoded_term(object), + ) + } + + pub fn triples_for_predicate<'b>( + &'a self, + predicate: impl Into<NamedNodeRef<'b>>, + ) -> impl Iterator<Item = TripleRef<'a>> + 'a { + self.read() + .triples_for_interned_predicate(self.dataset.encoded_named_node(predicate)) + } + + pub fn subjects_for_predicate_object<'b>( + &'a self, + predicate: impl Into<NamedNodeRef<'b>>, + object: impl Into<TermRef<'b>>, + ) -> impl Iterator<Item = SubjectRef<'a>> + 'a { + self.read().subjects_for_interned_predicate_object( + self.dataset.encoded_named_node(predicate), + self.dataset.encoded_term(object), + ) + } + + pub fn subject_for_predicate_object<'b>( + &'a self, + predicate: impl Into<NamedNodeRef<'b>>, + object: impl Into<TermRef<'b>>, + ) -> Option<SubjectRef<'a>> { + self.read().subject_for_predicate_object(predicate, object) + } + + pub fn triples_for_object<'b>( + &'a self, + object: TermRef<'b>, + ) -> impl Iterator<Item = TripleRef<'a>> + 'a { + self.read() + .triples_for_interned_object(self.dataset.encoded_term(object)) + } + + /// Checks if the graph contains the given triple. + pub fn contains<'b>(&self, triple: impl Into<TripleRef<'b>>) -> bool { + self.read().contains(triple) + } + + /// Returns the number of triples in this graph. + pub fn len(&self) -> usize { + self.read().len() + } + + /// Checks if this graph contains a triple. + pub fn is_empty(&self) -> bool { + self.read().is_empty() + } +} + +impl<'a> Extend<Triple> for GraphViewMut<'a> { + fn extend<I: IntoIterator<Item = Triple>>(&mut self, iter: I) { + for t in iter { + self.insert(&t); + } + } +} + +impl<'a, 'b, T: Into<TripleRef<'b>>> Extend<T> for GraphViewMut<'a> { + fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) { + for t in iter { + self.insert(t); + } + } +} + +impl<'a> IntoIterator for &'a GraphViewMut<'a> { + type Item = TripleRef<'a>; + type IntoIter = GraphViewIter<'a>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'a> fmt::Display for GraphViewMut<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for t in self { + writeln!(f, "{t}")?; + } + Ok(()) + } +} + +/// Iterator returned by [`Dataset::iter`]. +pub struct Iter<'a> { + dataset: &'a Dataset, + inner: std::collections::btree_set::Iter< + 'a, + ( + InternedSubject, + InternedNamedNode, + InternedTerm, + InternedGraphName, + ), + >, +} + +impl<'a> Iterator for Iter<'a> { + type Item = QuadRef<'a>; + + fn next(&mut self) -> Option<Self::Item> { + self.inner + .next() + .map(|(s, p, o, g)| self.dataset.decode_spog((s, p, o, g))) + } +} + +/// Iterator returned by [`GraphView::iter`]. +pub struct GraphViewIter<'a> { + dataset: &'a Dataset, + inner: std::collections::btree_set::Range< + 'a, + ( + InternedGraphName, + InternedSubject, + InternedNamedNode, + InternedTerm, + ), + >, +} + +impl<'a> Iterator for GraphViewIter<'a> { + type Item = TripleRef<'a>; + + fn next(&mut self) -> Option<Self::Item> { + self.inner + .next() + .map(|(_, s, p, o)| self.dataset.decode_spo((s, p, o))) + } +} + +type QuadsPerBlankNode = HashMap< + InternedBlankNode, + Vec<( + InternedSubject, + InternedNamedNode, + InternedTerm, + InternedGraphName, + )>, +>; + +/// An algorithm used to canonicalize graph and datasets. +/// +/// See [`Graph::canonicalize`] and [`Dataset::canonicalize`]. +#[derive(Default, Debug, Clone, Copy, Eq, PartialEq, Hash)] +#[non_exhaustive] +pub enum CanonicalizationAlgorithm { + /// The algorithm preferred by OxRDF. + /// + /// <div class="warning">The canonicalization algorithm is not stable and canonical blank node ids might change between Oxigraph version.</div> + #[default] + Unstable, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_canon() { + let mut dataset = Dataset::new(); + dataset.insert(QuadRef::new( + BlankNode::default().as_ref(), + NamedNodeRef::new_unchecked("http://ex"), + BlankNode::default().as_ref(), + GraphNameRef::DefaultGraph, + )); + dataset.insert(QuadRef::new( + BlankNode::default().as_ref(), + NamedNodeRef::new_unchecked("http://ex"), + BlankNode::default().as_ref(), + GraphNameRef::DefaultGraph, + )); + dataset.canonicalize(CanonicalizationAlgorithm::Unstable); + let mut dataset2 = Dataset::new(); + dataset2.insert(QuadRef::new( + BlankNode::default().as_ref(), + NamedNodeRef::new_unchecked("http://ex"), + BlankNode::default().as_ref(), + GraphNameRef::DefaultGraph, + )); + dataset2.insert(QuadRef::new( + BlankNode::default().as_ref(), + NamedNodeRef::new_unchecked("http://ex"), + BlankNode::default().as_ref(), + GraphNameRef::DefaultGraph, + )); + dataset2.canonicalize(CanonicalizationAlgorithm::Unstable); + assert_eq!(dataset, dataset2); + } +} diff --git a/ng-oxigraph/src/oxrdf/graph.rs b/ng-oxigraph/src/oxrdf/graph.rs new file mode 100644 index 0000000..8273590 --- /dev/null +++ b/ng-oxigraph/src/oxrdf/graph.rs @@ -0,0 +1,284 @@ +//! [In-memory implementation](Graph) of [RDF graphs](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph). +//! +//! Usage example: +//! ``` +//! use oxrdf::*; +//! +//! let mut graph = Graph::default(); +//! +//! // insertion +//! let ex = NamedNodeRef::new("http://example.com")?; +//! let triple = TripleRef::new(ex, ex, ex); +//! graph.insert(triple); +//! +//! // simple filter +//! let results: Vec<_> = graph.triples_for_subject(ex).collect(); +//! assert_eq!(vec![triple], results); +//! +//! // Print +//! assert_eq!( +//! graph.to_string(), +//! "<http://example.com> <http://example.com> <http://example.com> .\n" +//! ); +//! # Result::<_,Box<dyn std::error::Error>>::Ok(()) +//! ``` +//! +//! See also [`Dataset`] if you want to get support of multiple RDF graphs at the same time. + +pub use crate::oxrdf::dataset::CanonicalizationAlgorithm; +use crate::oxrdf::dataset::*; +use crate::oxrdf::*; +use std::fmt; + +/// An in-memory [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph). +/// +/// It can accommodate a fairly large number of triples (in the few millions). +/// +/// <div class="warning">It interns the string and does not do any garbage collection yet: +/// if you insert and remove a lot of different terms, memory will grow without any reduction.</div> +/// +/// Usage example: +/// ``` +/// use oxrdf::*; +/// +/// let mut graph = Graph::default(); +/// +/// // insertion +/// let ex = NamedNodeRef::new("http://example.com")?; +/// let triple = TripleRef::new(ex, ex, ex); +/// graph.insert(triple); +/// +/// // simple filter +/// let results: Vec<_> = graph.triples_for_subject(ex).collect(); +/// assert_eq!(vec![triple], results); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +#[derive(Debug, Default, Clone)] +pub struct Graph { + dataset: Dataset, +} + +impl Graph { + /// Creates a new graph. + pub fn new() -> Self { + Self::default() + } + + fn graph(&self) -> GraphView<'_> { + self.dataset.graph(GraphNameRef::DefaultGraph) + } + + fn graph_mut(&mut self) -> GraphViewMut<'_> { + self.dataset.graph_mut(GraphNameRef::DefaultGraph) + } + + /// Returns all the triples contained by the graph. + pub fn iter(&self) -> Iter<'_> { + Iter { + inner: self.graph().iter(), + } + } + + pub fn triples_for_subject<'a, 'b>( + &'a self, + subject: impl Into<SubjectRef<'b>>, + ) -> impl Iterator<Item = TripleRef<'a>> + 'a { + self.graph() + .triples_for_interned_subject(self.dataset.encoded_subject(subject)) + } + + pub fn objects_for_subject_predicate<'a, 'b>( + &'a self, + subject: impl Into<SubjectRef<'b>>, + predicate: impl Into<NamedNodeRef<'b>>, + ) -> impl Iterator<Item = TermRef<'a>> + 'a { + self.graph().objects_for_interned_subject_predicate( + self.dataset.encoded_subject(subject), + self.dataset.encoded_named_node(predicate), + ) + } + + pub fn object_for_subject_predicate<'a, 'b>( + &'a self, + subject: impl Into<SubjectRef<'b>>, + predicate: impl Into<NamedNodeRef<'b>>, + ) -> Option<TermRef<'a>> { + self.graph() + .objects_for_subject_predicate(subject, predicate) + .next() + } + + pub fn predicates_for_subject_object<'a, 'b>( + &'a self, + subject: impl Into<SubjectRef<'b>>, + object: impl Into<TermRef<'b>>, + ) -> impl Iterator<Item = NamedNodeRef<'a>> + 'a { + self.graph().predicates_for_interned_subject_object( + self.dataset.encoded_subject(subject), + self.dataset.encoded_term(object), + ) + } + + pub fn triples_for_predicate<'a, 'b>( + &'a self, + predicate: impl Into<NamedNodeRef<'b>>, + ) -> impl Iterator<Item = TripleRef<'a>> + 'a { + self.graph() + .triples_for_interned_predicate(self.dataset.encoded_named_node(predicate)) + } + + pub fn subjects_for_predicate_object<'a, 'b>( + &'a self, + predicate: impl Into<NamedNodeRef<'b>>, + object: impl Into<TermRef<'b>>, + ) -> impl Iterator<Item = SubjectRef<'a>> + 'a { + self.graph().subjects_for_interned_predicate_object( + self.dataset.encoded_named_node(predicate), + self.dataset.encoded_term(object), + ) + } + + pub fn subject_for_predicate_object<'a, 'b>( + &'a self, + predicate: impl Into<NamedNodeRef<'b>>, + object: impl Into<TermRef<'b>>, + ) -> Option<SubjectRef<'a>> { + self.graph().subject_for_predicate_object(predicate, object) + } + + pub fn triples_for_object<'a, 'b>( + &'a self, + object: impl Into<TermRef<'b>>, + ) -> impl Iterator<Item = TripleRef<'a>> + 'a { + self.graph() + .triples_for_interned_object(self.dataset.encoded_term(object)) + } + + /// Checks if the graph contains the given triple. + pub fn contains<'a>(&self, triple: impl Into<TripleRef<'a>>) -> bool { + self.graph().contains(triple) + } + + /// Returns the number of triples in this graph. + pub fn len(&self) -> usize { + self.dataset.len() + } + + /// Checks if this graph contains a triple. + pub fn is_empty(&self) -> bool { + self.dataset.is_empty() + } + + /// Adds a triple to the graph. + pub fn insert<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> bool { + self.graph_mut().insert(triple) + } + + /// Removes a concrete triple from the graph. + pub fn remove<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> bool { + self.graph_mut().remove(triple) + } + + /// Clears the graph. + pub fn clear(&mut self) { + self.dataset.clear() + } + + /// Canonicalizes the dataset by renaming blank nodes. + /// + /// Usage example ([Graph isomorphism](https://www.w3.org/TR/rdf11-concepts/#dfn-graph-isomorphism)): + /// ``` + /// use oxrdf::graph::CanonicalizationAlgorithm; + /// use oxrdf::*; + /// + /// let iri = NamedNodeRef::new("http://example.com")?; + /// + /// let mut graph1 = Graph::new(); + /// let bnode1 = BlankNode::default(); + /// graph1.insert(TripleRef::new(iri, iri, &bnode1)); + /// graph1.insert(TripleRef::new(&bnode1, iri, iri)); + /// + /// let mut graph2 = Graph::new(); + /// let bnode2 = BlankNode::default(); + /// graph2.insert(TripleRef::new(iri, iri, &bnode2)); + /// graph2.insert(TripleRef::new(&bnode2, iri, iri)); + /// + /// assert_ne!(graph1, graph2); + /// graph1.canonicalize(CanonicalizationAlgorithm::Unstable); + /// graph2.canonicalize(CanonicalizationAlgorithm::Unstable); + /// assert_eq!(graph1, graph2); + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + /// + /// <div class="warning">Blank node ids depends on the current shape of the graph. Adding a new quad might change the ids of a lot of blank nodes. + /// Hence, this canonization might not be suitable for diffs.</div> + /// + /// <div class="warning">This implementation worst-case complexity is in *O(b!)* with *b* the number of blank nodes in the input dataset.</div> + pub fn canonicalize(&mut self, algorithm: CanonicalizationAlgorithm) { + self.dataset.canonicalize(algorithm) + } +} + +impl PartialEq for Graph { + fn eq(&self, other: &Self) -> bool { + self.dataset == other.dataset + } +} + +impl Eq for Graph {} + +impl<'a> IntoIterator for &'a Graph { + type Item = TripleRef<'a>; + type IntoIter = Iter<'a>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl FromIterator<Triple> for Graph { + fn from_iter<I: IntoIterator<Item = Triple>>(iter: I) -> Self { + let mut g = Self::new(); + g.extend(iter); + g + } +} + +impl<'a, T: Into<TripleRef<'a>>> FromIterator<T> for Graph { + fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self { + let mut g = Self::new(); + g.extend(iter); + g + } +} + +impl Extend<Triple> for Graph { + fn extend<I: IntoIterator<Item = Triple>>(&mut self, iter: I) { + self.graph_mut().extend(iter) + } +} + +impl<'a, T: Into<TripleRef<'a>>> Extend<T> for Graph { + fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) { + self.graph_mut().extend(iter) + } +} + +impl fmt::Display for Graph { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.graph().fmt(f) + } +} + +/// Iterator returned by [`Graph::iter`]. +pub struct Iter<'a> { + inner: GraphViewIter<'a>, +} + +impl<'a> Iterator for Iter<'a> { + type Item = TripleRef<'a>; + + fn next(&mut self) -> Option<Self::Item> { + self.inner.next() + } +} diff --git a/ng-oxigraph/src/oxrdf/interning.rs b/ng-oxigraph/src/oxrdf/interning.rs new file mode 100644 index 0000000..7eca09d --- /dev/null +++ b/ng-oxigraph/src/oxrdf/interning.rs @@ -0,0 +1,535 @@ +//! Interning of RDF elements using Rodeo + +use crate::oxrdf::*; +use std::collections::hash_map::{Entry, HashMap, RandomState}; +use std::hash::{BuildHasher, Hasher}; + +#[derive(Debug, Default, Clone)] +pub struct Interner { + hasher: RandomState, + string_for_hash: HashMap<u64, String, IdentityHasherBuilder>, + string_for_blank_node_id: HashMap<u128, String>, + #[cfg(feature = "rdf-star")] + triples: HashMap<InternedTriple, Triple>, +} + +impl Interner { + #[allow(clippy::never_loop)] + fn get_or_intern(&mut self, value: &str) -> Key { + let mut hash = self.hash(value); + loop { + match self.string_for_hash.entry(hash) { + Entry::Vacant(e) => { + e.insert(value.into()); + return Key(hash); + } + Entry::Occupied(e) => loop { + if e.get() == value { + return Key(hash); + } else if hash == u64::MAX - 1 { + hash = 0; + } else { + hash += 1; + } + }, + } + } + } + + fn get(&self, value: &str) -> Option<Key> { + let mut hash = self.hash(value); + loop { + let v = self.string_for_hash.get(&hash)?; + if v == value { + return Some(Key(hash)); + } else if hash == u64::MAX - 1 { + hash = 0; + } else { + hash += 1; + } + } + } + + fn hash(&self, value: &str) -> u64 { + let mut hasher = self.hasher.build_hasher(); + hasher.write(value.as_bytes()); + let hash = hasher.finish(); + if hash == u64::MAX { + 0 + } else { + hash + } + } + + fn resolve(&self, key: Key) -> &str { + &self.string_for_hash[&key.0] + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] +pub struct Key(u64); + +impl Key { + fn first() -> Self { + Self(0) + } + + fn next(self) -> Self { + Self(self.0.saturating_add(1)) + } + + fn impossible() -> Self { + Self(u64::MAX) + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] +pub struct InternedNamedNode { + id: Key, +} + +impl InternedNamedNode { + pub fn encoded_into(named_node: NamedNodeRef<'_>, interner: &mut Interner) -> Self { + Self { + id: interner.get_or_intern(named_node.as_str()), + } + } + + pub fn encoded_from(named_node: NamedNodeRef<'_>, interner: &Interner) -> Option<Self> { + Some(Self { + id: interner.get(named_node.as_str())?, + }) + } + + pub fn decode_from(self, interner: &Interner) -> NamedNodeRef<'_> { + NamedNodeRef::new_unchecked(interner.resolve(self.id)) + } + + pub fn first() -> Self { + Self { id: Key::first() } + } + + pub fn next(self) -> Self { + Self { id: self.id.next() } + } + + pub fn impossible() -> Self { + Self { + id: Key::impossible(), + } + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] +pub enum InternedBlankNode { + Number { id: u128 }, + Other { id: Key }, +} + +impl InternedBlankNode { + pub fn encoded_into(blank_node: BlankNodeRef<'_>, interner: &mut Interner) -> Self { + if let Some(id) = blank_node.unique_id() { + interner + .string_for_blank_node_id + .entry(id) + .or_insert_with(|| blank_node.as_str().into()); + Self::Number { id } + } else { + Self::Other { + id: interner.get_or_intern(blank_node.as_str()), + } + } + } + + pub fn encoded_from(blank_node: BlankNodeRef<'_>, interner: &Interner) -> Option<Self> { + if let Some(id) = blank_node.unique_id() { + interner + .string_for_blank_node_id + .contains_key(&id) + .then_some(Self::Number { id }) + } else { + Some(Self::Other { + id: interner.get(blank_node.as_str())?, + }) + } + } + + pub fn decode_from(self, interner: &Interner) -> BlankNodeRef<'_> { + BlankNodeRef::new_unchecked(match self { + Self::Number { id } => &interner.string_for_blank_node_id[&id], + Self::Other { id } => interner.resolve(id), + }) + } + + pub fn next(self) -> Self { + match self { + Self::Number { id } => Self::Number { + id: id.saturating_add(1), + }, + Self::Other { id } => Self::Other { id: id.next() }, + } + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] +pub enum InternedLiteral { + String { + value_id: Key, + }, + LanguageTaggedString { + value_id: Key, + language_id: Key, + }, + TypedLiteral { + value_id: Key, + datatype: InternedNamedNode, + }, +} + +impl InternedLiteral { + pub fn encoded_into(literal: LiteralRef<'_>, interner: &mut Interner) -> Self { + let value_id = interner.get_or_intern(literal.value()); + if literal.is_plain() { + if let Some(language) = literal.language() { + Self::LanguageTaggedString { + value_id, + language_id: interner.get_or_intern(language), + } + } else { + Self::String { value_id } + } + } else { + Self::TypedLiteral { + value_id, + datatype: InternedNamedNode::encoded_into(literal.datatype(), interner), + } + } + } + + pub fn encoded_from(literal: LiteralRef<'_>, interner: &Interner) -> Option<Self> { + let value_id = interner.get(literal.value())?; + Some(if literal.is_plain() { + if let Some(language) = literal.language() { + Self::LanguageTaggedString { + value_id, + language_id: interner.get(language)?, + } + } else { + Self::String { value_id } + } + } else { + Self::TypedLiteral { + value_id, + datatype: InternedNamedNode::encoded_from(literal.datatype(), interner)?, + } + }) + } + + pub fn decode_from<'a>(&self, interner: &'a Interner) -> LiteralRef<'a> { + match self { + Self::String { value_id } => { + LiteralRef::new_simple_literal(interner.resolve(*value_id)) + } + Self::LanguageTaggedString { + value_id, + language_id, + } => LiteralRef::new_language_tagged_literal_unchecked( + interner.resolve(*value_id), + interner.resolve(*language_id), + ), + Self::TypedLiteral { value_id, datatype } => LiteralRef::new_typed_literal( + interner.resolve(*value_id), + datatype.decode_from(interner), + ), + } + } + + pub fn next(&self) -> Self { + match self { + Self::String { value_id } => Self::String { + value_id: value_id.next(), + }, + Self::LanguageTaggedString { + value_id, + language_id, + } => Self::LanguageTaggedString { + value_id: *value_id, + language_id: language_id.next(), + }, + Self::TypedLiteral { value_id, datatype } => Self::TypedLiteral { + value_id: *value_id, + datatype: datatype.next(), + }, + } + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] +pub enum InternedSubject { + NamedNode(InternedNamedNode), + BlankNode(InternedBlankNode), + #[cfg(feature = "rdf-star")] + Triple(Box<InternedTriple>), +} + +impl InternedSubject { + pub fn encoded_into(node: SubjectRef<'_>, interner: &mut Interner) -> Self { + match node { + SubjectRef::NamedNode(node) => { + Self::NamedNode(InternedNamedNode::encoded_into(node, interner)) + } + SubjectRef::BlankNode(node) => { + Self::BlankNode(InternedBlankNode::encoded_into(node, interner)) + } + #[cfg(feature = "rdf-star")] + SubjectRef::Triple(triple) => Self::Triple(Box::new(InternedTriple::encoded_into( + triple.as_ref(), + interner, + ))), + } + } + + pub fn encoded_from(node: SubjectRef<'_>, interner: &Interner) -> Option<Self> { + Some(match node { + SubjectRef::NamedNode(node) => { + Self::NamedNode(InternedNamedNode::encoded_from(node, interner)?) + } + SubjectRef::BlankNode(node) => { + Self::BlankNode(InternedBlankNode::encoded_from(node, interner)?) + } + #[cfg(feature = "rdf-star")] + SubjectRef::Triple(triple) => Self::Triple(Box::new(InternedTriple::encoded_from( + triple.as_ref(), + interner, + )?)), + }) + } + + pub fn decode_from<'a>(&self, interner: &'a Interner) -> SubjectRef<'a> { + match self { + Self::NamedNode(node) => SubjectRef::NamedNode(node.decode_from(interner)), + Self::BlankNode(node) => SubjectRef::BlankNode(node.decode_from(interner)), + #[cfg(feature = "rdf-star")] + Self::Triple(triple) => SubjectRef::Triple(&interner.triples[triple.as_ref()]), + } + } + + pub fn first() -> Self { + Self::NamedNode(InternedNamedNode::first()) + } + + pub fn next(&self) -> Self { + match self { + Self::NamedNode(node) => Self::NamedNode(node.next()), + Self::BlankNode(node) => Self::BlankNode(node.next()), + #[cfg(feature = "rdf-star")] + Self::Triple(triple) => Self::Triple(Box::new(triple.next())), + } + } + + pub fn impossible() -> Self { + Self::NamedNode(InternedNamedNode::impossible()) + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] +pub enum InternedGraphName { + DefaultGraph, + NamedNode(InternedNamedNode), + BlankNode(InternedBlankNode), +} + +impl InternedGraphName { + pub fn encoded_into(node: GraphNameRef<'_>, interner: &mut Interner) -> Self { + match node { + GraphNameRef::DefaultGraph => Self::DefaultGraph, + GraphNameRef::NamedNode(node) => { + Self::NamedNode(InternedNamedNode::encoded_into(node, interner)) + } + GraphNameRef::BlankNode(node) => { + Self::BlankNode(InternedBlankNode::encoded_into(node, interner)) + } + } + } + + pub fn encoded_from(node: GraphNameRef<'_>, interner: &Interner) -> Option<Self> { + Some(match node { + GraphNameRef::DefaultGraph => Self::DefaultGraph, + GraphNameRef::NamedNode(node) => { + Self::NamedNode(InternedNamedNode::encoded_from(node, interner)?) + } + GraphNameRef::BlankNode(node) => { + Self::BlankNode(InternedBlankNode::encoded_from(node, interner)?) + } + }) + } + + pub fn decode_from<'a>(&self, interner: &'a Interner) -> GraphNameRef<'a> { + match self { + Self::DefaultGraph => GraphNameRef::DefaultGraph, + Self::NamedNode(node) => GraphNameRef::NamedNode(node.decode_from(interner)), + Self::BlankNode(node) => GraphNameRef::BlankNode(node.decode_from(interner)), + } + } + + pub fn first() -> Self { + Self::DefaultGraph + } + + pub fn next(&self) -> Self { + match self { + Self::DefaultGraph => Self::NamedNode(InternedNamedNode::first()), + Self::NamedNode(node) => Self::NamedNode(node.next()), + Self::BlankNode(node) => Self::BlankNode(node.next()), + } + } + + pub fn impossible() -> Self { + Self::NamedNode(InternedNamedNode::impossible()) + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] +pub enum InternedTerm { + NamedNode(InternedNamedNode), + BlankNode(InternedBlankNode), + Literal(InternedLiteral), + #[cfg(feature = "rdf-star")] + Triple(Box<InternedTriple>), +} + +impl InternedTerm { + pub fn encoded_into(term: TermRef<'_>, interner: &mut Interner) -> Self { + match term { + TermRef::NamedNode(term) => { + Self::NamedNode(InternedNamedNode::encoded_into(term, interner)) + } + TermRef::BlankNode(term) => { + Self::BlankNode(InternedBlankNode::encoded_into(term, interner)) + } + TermRef::Literal(term) => Self::Literal(InternedLiteral::encoded_into(term, interner)), + #[cfg(feature = "rdf-star")] + TermRef::Triple(triple) => Self::Triple(Box::new(InternedTriple::encoded_into( + triple.as_ref(), + interner, + ))), + } + } + + pub fn encoded_from(term: TermRef<'_>, interner: &Interner) -> Option<Self> { + Some(match term { + TermRef::NamedNode(term) => { + Self::NamedNode(InternedNamedNode::encoded_from(term, interner)?) + } + TermRef::BlankNode(term) => { + Self::BlankNode(InternedBlankNode::encoded_from(term, interner)?) + } + TermRef::Literal(term) => Self::Literal(InternedLiteral::encoded_from(term, interner)?), + #[cfg(feature = "rdf-star")] + TermRef::Triple(triple) => Self::Triple(Box::new(InternedTriple::encoded_from( + triple.as_ref(), + interner, + )?)), + }) + } + + pub fn decode_from<'a>(&self, interner: &'a Interner) -> TermRef<'a> { + match self { + Self::NamedNode(term) => TermRef::NamedNode(term.decode_from(interner)), + Self::BlankNode(term) => TermRef::BlankNode(term.decode_from(interner)), + Self::Literal(term) => TermRef::Literal(term.decode_from(interner)), + #[cfg(feature = "rdf-star")] + Self::Triple(triple) => TermRef::Triple(&interner.triples[triple.as_ref()]), + } + } + + pub fn first() -> Self { + Self::NamedNode(InternedNamedNode::first()) + } + + pub fn next(&self) -> Self { + match self { + Self::NamedNode(node) => Self::NamedNode(node.next()), + Self::BlankNode(node) => Self::BlankNode(node.next()), + Self::Literal(node) => Self::Literal(node.next()), + #[cfg(feature = "rdf-star")] + Self::Triple(triple) => Self::Triple(Box::new(triple.next())), + } + } + + pub fn impossible() -> Self { + Self::NamedNode(InternedNamedNode::impossible()) + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] +pub struct InternedTriple { + pub subject: InternedSubject, + pub predicate: InternedNamedNode, + pub object: InternedTerm, +} + +#[cfg(feature = "rdf-star")] +impl InternedTriple { + pub fn encoded_into(triple: TripleRef<'_>, interner: &mut Interner) -> Self { + let interned_triple = Self { + subject: InternedSubject::encoded_into(triple.subject, interner), + predicate: InternedNamedNode::encoded_into(triple.predicate, interner), + object: InternedTerm::encoded_into(triple.object, interner), + }; + interner + .triples + .insert(interned_triple.clone(), triple.into_owned()); + interned_triple + } + + pub fn encoded_from(triple: TripleRef<'_>, interner: &Interner) -> Option<Self> { + let interned_triple = Self { + subject: InternedSubject::encoded_from(triple.subject, interner)?, + predicate: InternedNamedNode::encoded_from(triple.predicate, interner)?, + object: InternedTerm::encoded_from(triple.object, interner)?, + }; + interner + .triples + .contains_key(&interned_triple) + .then_some(interned_triple) + } + + pub fn next(&self) -> Self { + Self { + subject: self.subject.clone(), + predicate: self.predicate, + object: self.object.next(), + } + } +} + +#[derive(Default, Clone)] +struct IdentityHasherBuilder; + +impl BuildHasher for IdentityHasherBuilder { + type Hasher = IdentityHasher; + + fn build_hasher(&self) -> Self::Hasher { + Self::Hasher::default() + } +} + +#[derive(Default)] +struct IdentityHasher { + value: u64, +} + +impl Hasher for IdentityHasher { + fn finish(&self) -> u64 { + self.value + } + + fn write(&mut self, _bytes: &[u8]) { + unreachable!("Should only be used on u64 values") + } + + fn write_u64(&mut self, i: u64) { + self.value = i + } +} diff --git a/ng-oxigraph/src/oxrdf/literal.rs b/ng-oxigraph/src/oxrdf/literal.rs new file mode 100644 index 0000000..b9647c9 --- /dev/null +++ b/ng-oxigraph/src/oxrdf/literal.rs @@ -0,0 +1,669 @@ +use crate::oxrdf::named_node::{NamedNode, NamedNodeRef}; +use crate::oxrdf::vocab::{rdf, xsd}; +#[cfg(feature = "oxsdatatypes")] +use crate::oxsdatatypes::*; +use oxilangtag::{LanguageTag, LanguageTagParseError}; +use serde::{Deserialize, Serialize}; +use std::borrow::Cow; +use std::fmt; +use std::fmt::Write; + +/// An owned RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal). +/// +/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation: +/// ``` +/// # use oxilangtag::LanguageTagParseError; +/// use oxrdf::vocab::xsd; +/// use oxrdf::Literal; +/// +/// assert_eq!( +/// "\"foo\\nbar\"", +/// Literal::new_simple_literal("foo\nbar").to_string() +/// ); +/// +/// assert_eq!( +/// r#""1999-01-01"^^<http://www.w3.org/2001/XMLSchema#date>"#, +/// Literal::new_typed_literal("1999-01-01", xsd::DATE).to_string() +/// ); +/// +/// assert_eq!( +/// r#""foo"@en"#, +/// Literal::new_language_tagged_literal("foo", "en")?.to_string() +/// ); +/// # Result::<(), LanguageTagParseError>::Ok(()) +/// ``` +#[derive(Eq, PartialEq, Debug, Clone, Hash, Serialize, Deserialize)] +pub struct Literal(LiteralContent); + +#[derive(PartialEq, Eq, Debug, Clone, Hash, Serialize, Deserialize)] +enum LiteralContent { + String(String), + LanguageTaggedString { value: String, language: String }, + TypedLiteral { value: String, datatype: NamedNode }, +} + +impl Literal { + /// Builds an RDF [simple literal](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal). + #[inline] + pub fn new_simple_literal(value: impl Into<String>) -> Self { + Self(LiteralContent::String(value.into())) + } + + /// Builds an RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal) with a [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri). + #[inline] + pub fn new_typed_literal(value: impl Into<String>, datatype: impl Into<NamedNode>) -> Self { + let value = value.into(); + let datatype = datatype.into(); + Self(if datatype == xsd::STRING { + LiteralContent::String(value) + } else { + LiteralContent::TypedLiteral { value, datatype } + }) + } + + /// Builds an RDF [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string). + #[inline] + pub fn new_language_tagged_literal( + value: impl Into<String>, + language: impl Into<String>, + ) -> Result<Self, LanguageTagParseError> { + let mut language = language.into(); + language.make_ascii_lowercase(); + Ok(Self::new_language_tagged_literal_unchecked( + value, + LanguageTag::parse(language)?.into_inner(), + )) + } + + /// Builds an RDF [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string). + /// + /// It is the responsibility of the caller to check that `language` + /// is valid [BCP47](https://tools.ietf.org/html/bcp47) language tag, + /// and is lowercase. + /// + /// [`Literal::new_language_tagged_literal()`] is a safe version of this constructor and should be used for untrusted data. + #[inline] + pub fn new_language_tagged_literal_unchecked( + value: impl Into<String>, + language: impl Into<String>, + ) -> Self { + Self(LiteralContent::LanguageTaggedString { + value: value.into(), + language: language.into(), + }) + } + + /// The literal [lexical form](https://www.w3.org/TR/rdf11-concepts/#dfn-lexical-form). + #[inline] + pub fn value(&self) -> &str { + self.as_ref().value() + } + + /// The literal [language tag](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tag) if it is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string). + /// + /// Language tags are defined by the [BCP47](https://tools.ietf.org/html/bcp47). + /// They are normalized to lowercase by this implementation. + #[inline] + pub fn language(&self) -> Option<&str> { + self.as_ref().language() + } + + /// The literal [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri). + /// + /// The datatype of [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) is always [rdf:langString](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string). + /// The datatype of [simple literals](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal) is [xsd:string](https://www.w3.org/TR/xmlschema11-2/#string). + #[inline] + pub fn datatype(&self) -> NamedNodeRef<'_> { + self.as_ref().datatype() + } + + /// Checks if this literal could be seen as an RDF 1.0 [plain literal](https://www.w3.org/TR/2004/REC-rdf-concepts-20040210/#dfn-plain-literal). + /// + /// It returns true if the literal is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) + /// or has the datatype [xsd:string](https://www.w3.org/TR/xmlschema11-2/#string). + #[inline] + pub fn is_plain(&self) -> bool { + self.as_ref().is_plain() + } + + #[inline] + pub fn as_ref(&self) -> LiteralRef<'_> { + LiteralRef(match &self.0 { + LiteralContent::String(value) => LiteralRefContent::String(value), + LiteralContent::LanguageTaggedString { value, language } => { + LiteralRefContent::LanguageTaggedString { value, language } + } + LiteralContent::TypedLiteral { value, datatype } => LiteralRefContent::TypedLiteral { + value, + datatype: datatype.as_ref(), + }, + }) + } + + /// Extract components from this literal (value, datatype and language tag). + #[inline] + pub fn destruct(self) -> (String, Option<NamedNode>, Option<String>) { + match self.0 { + LiteralContent::String(s) => (s, None, None), + LiteralContent::LanguageTaggedString { value, language } => { + (value, None, Some(language)) + } + LiteralContent::TypedLiteral { value, datatype } => (value, Some(datatype), None), + } + } +} + +impl fmt::Display for Literal { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.as_ref().fmt(f) + } +} + +impl<'a> From<&'a str> for Literal { + #[inline] + fn from(value: &'a str) -> Self { + Self(LiteralContent::String(value.into())) + } +} + +impl From<String> for Literal { + #[inline] + fn from(value: String) -> Self { + Self(LiteralContent::String(value)) + } +} + +impl<'a> From<Cow<'a, str>> for Literal { + #[inline] + fn from(value: Cow<'a, str>) -> Self { + Self(LiteralContent::String(value.into())) + } +} + +impl From<bool> for Literal { + #[inline] + fn from(value: bool) -> Self { + Self(LiteralContent::TypedLiteral { + value: value.to_string(), + datatype: xsd::BOOLEAN.into(), + }) + } +} + +impl From<i128> for Literal { + #[inline] + fn from(value: i128) -> Self { + Self(LiteralContent::TypedLiteral { + value: value.to_string(), + datatype: xsd::INTEGER.into(), + }) + } +} + +impl From<i64> for Literal { + #[inline] + fn from(value: i64) -> Self { + Self(LiteralContent::TypedLiteral { + value: value.to_string(), + datatype: xsd::INTEGER.into(), + }) + } +} + +impl From<i32> for Literal { + #[inline] + fn from(value: i32) -> Self { + Self(LiteralContent::TypedLiteral { + value: value.to_string(), + datatype: xsd::INTEGER.into(), + }) + } +} + +impl From<i16> for Literal { + #[inline] + fn from(value: i16) -> Self { + Self(LiteralContent::TypedLiteral { + value: value.to_string(), + datatype: xsd::INTEGER.into(), + }) + } +} + +impl From<u64> for Literal { + #[inline] + fn from(value: u64) -> Self { + Self(LiteralContent::TypedLiteral { + value: value.to_string(), + datatype: xsd::INTEGER.into(), + }) + } +} + +impl From<u32> for Literal { + #[inline] + fn from(value: u32) -> Self { + Self(LiteralContent::TypedLiteral { + value: value.to_string(), + datatype: xsd::INTEGER.into(), + }) + } +} + +impl From<u16> for Literal { + #[inline] + fn from(value: u16) -> Self { + Self(LiteralContent::TypedLiteral { + value: value.to_string(), + datatype: xsd::INTEGER.into(), + }) + } +} + +impl From<f32> for Literal { + #[inline] + fn from(value: f32) -> Self { + Self(LiteralContent::TypedLiteral { + value: if value == f32::INFINITY { + "INF".to_owned() + } else if value == f32::NEG_INFINITY { + "-INF".to_owned() + } else { + value.to_string() + }, + datatype: xsd::FLOAT.into(), + }) + } +} + +impl From<f64> for Literal { + #[inline] + fn from(value: f64) -> Self { + Self(LiteralContent::TypedLiteral { + value: if value == f64::INFINITY { + "INF".to_owned() + } else if value == f64::NEG_INFINITY { + "-INF".to_owned() + } else { + value.to_string() + }, + datatype: xsd::DOUBLE.into(), + }) + } +} + +#[cfg(feature = "oxsdatatypes")] +impl From<Boolean> for Literal { + #[inline] + fn from(value: Boolean) -> Self { + Self::new_typed_literal(value.to_string(), xsd::BOOLEAN) + } +} + +#[cfg(feature = "oxsdatatypes")] +impl From<Float> for Literal { + #[inline] + fn from(value: Float) -> Self { + Self::new_typed_literal(value.to_string(), xsd::FLOAT) + } +} + +#[cfg(feature = "oxsdatatypes")] +impl From<Double> for Literal { + #[inline] + fn from(value: Double) -> Self { + Self::new_typed_literal(value.to_string(), xsd::DOUBLE) + } +} + +#[cfg(feature = "oxsdatatypes")] +impl From<Integer> for Literal { + #[inline] + fn from(value: Integer) -> Self { + Self::new_typed_literal(value.to_string(), xsd::INTEGER) + } +} + +#[cfg(feature = "oxsdatatypes")] +impl From<Decimal> for Literal { + #[inline] + fn from(value: Decimal) -> Self { + Self::new_typed_literal(value.to_string(), xsd::DECIMAL) + } +} + +#[cfg(feature = "oxsdatatypes")] +impl From<DateTime> for Literal { + #[inline] + fn from(value: DateTime) -> Self { + Self::new_typed_literal(value.to_string(), xsd::DATE_TIME) + } +} + +#[cfg(feature = "oxsdatatypes")] +impl From<Time> for Literal { + #[inline] + fn from(value: Time) -> Self { + Self::new_typed_literal(value.to_string(), xsd::TIME) + } +} + +#[cfg(feature = "oxsdatatypes")] +impl From<Date> for Literal { + #[inline] + fn from(value: Date) -> Self { + Self::new_typed_literal(value.to_string(), xsd::DATE) + } +} + +#[cfg(feature = "oxsdatatypes")] +impl From<GYearMonth> for Literal { + #[inline] + fn from(value: GYearMonth) -> Self { + Self::new_typed_literal(value.to_string(), xsd::G_YEAR_MONTH) + } +} + +#[cfg(feature = "oxsdatatypes")] +impl From<GYear> for Literal { + #[inline] + fn from(value: GYear) -> Self { + Self::new_typed_literal(value.to_string(), xsd::G_YEAR) + } +} + +#[cfg(feature = "oxsdatatypes")] +impl From<GMonthDay> for Literal { + #[inline] + fn from(value: GMonthDay) -> Self { + Self::new_typed_literal(value.to_string(), xsd::G_MONTH_DAY) + } +} + +#[cfg(feature = "oxsdatatypes")] +impl From<GMonth> for Literal { + #[inline] + fn from(value: GMonth) -> Self { + Self::new_typed_literal(value.to_string(), xsd::G_MONTH) + } +} + +#[cfg(feature = "oxsdatatypes")] +impl From<GDay> for Literal { + #[inline] + fn from(value: GDay) -> Self { + Self::new_typed_literal(value.to_string(), xsd::G_DAY) + } +} + +#[cfg(feature = "oxsdatatypes")] +impl From<Duration> for Literal { + #[inline] + fn from(value: Duration) -> Self { + Self::new_typed_literal(value.to_string(), xsd::DURATION) + } +} + +#[cfg(feature = "oxsdatatypes")] +impl From<YearMonthDuration> for Literal { + #[inline] + fn from(value: YearMonthDuration) -> Self { + Self::new_typed_literal(value.to_string(), xsd::YEAR_MONTH_DURATION) + } +} + +#[cfg(feature = "oxsdatatypes")] +impl From<DayTimeDuration> for Literal { + #[inline] + fn from(value: DayTimeDuration) -> Self { + Self::new_typed_literal(value.to_string(), xsd::DAY_TIME_DURATION) + } +} + +/// A borrowed RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal). +/// +/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation: +/// ``` +/// use oxrdf::vocab::xsd; +/// use oxrdf::LiteralRef; +/// +/// assert_eq!( +/// "\"foo\\nbar\"", +/// LiteralRef::new_simple_literal("foo\nbar").to_string() +/// ); +/// +/// assert_eq!( +/// r#""1999-01-01"^^<http://www.w3.org/2001/XMLSchema#date>"#, +/// LiteralRef::new_typed_literal("1999-01-01", xsd::DATE).to_string() +/// ); +/// ``` +#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] +pub struct LiteralRef<'a>(LiteralRefContent<'a>); + +#[derive(PartialEq, Eq, Debug, Clone, Copy, Hash)] +enum LiteralRefContent<'a> { + String(&'a str), + LanguageTaggedString { + value: &'a str, + language: &'a str, + }, + TypedLiteral { + value: &'a str, + datatype: NamedNodeRef<'a>, + }, +} + +impl<'a> LiteralRef<'a> { + /// Builds an RDF [simple literal](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal). + #[inline] + pub const fn new_simple_literal(value: &'a str) -> Self { + LiteralRef(LiteralRefContent::String(value)) + } + + /// Builds an RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal) with a [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri). + #[inline] + pub fn new_typed_literal(value: &'a str, datatype: impl Into<NamedNodeRef<'a>>) -> Self { + let datatype = datatype.into(); + LiteralRef(if datatype == xsd::STRING { + LiteralRefContent::String(value) + } else { + LiteralRefContent::TypedLiteral { value, datatype } + }) + } + + /// Builds an RDF [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string). + /// + /// It is the responsibility of the caller to check that `language` + /// is valid [BCP47](https://tools.ietf.org/html/bcp47) language tag, + /// and is lowercase. + /// + /// [`Literal::new_language_tagged_literal()`] is a safe version of this constructor and should be used for untrusted data. + #[inline] + pub const fn new_language_tagged_literal_unchecked(value: &'a str, language: &'a str) -> Self { + LiteralRef(LiteralRefContent::LanguageTaggedString { value, language }) + } + + /// The literal [lexical form](https://www.w3.org/TR/rdf11-concepts/#dfn-lexical-form) + #[inline] + pub const fn value(self) -> &'a str { + match self.0 { + LiteralRefContent::String(value) + | LiteralRefContent::LanguageTaggedString { value, .. } + | LiteralRefContent::TypedLiteral { value, .. } => value, + } + } + + /// The literal [language tag](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tag) if it is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string). + /// + /// Language tags are defined by the [BCP47](https://tools.ietf.org/html/bcp47). + /// They are normalized to lowercase by this implementation. + #[inline] + pub const fn language(self) -> Option<&'a str> { + match self.0 { + LiteralRefContent::LanguageTaggedString { language, .. } => Some(language), + _ => None, + } + } + + /// The literal [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri). + /// + /// The datatype of [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) is always [rdf:langString](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string). + /// The datatype of [simple literals](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal) is [xsd:string](https://www.w3.org/TR/xmlschema11-2/#string). + #[inline] + pub const fn datatype(self) -> NamedNodeRef<'a> { + match self.0 { + LiteralRefContent::String(_) => xsd::STRING, + LiteralRefContent::LanguageTaggedString { .. } => rdf::LANG_STRING, + LiteralRefContent::TypedLiteral { datatype, .. } => datatype, + } + } + + /// Checks if this literal could be seen as an RDF 1.0 [plain literal](https://www.w3.org/TR/2004/REC-rdf-concepts-20040210/#dfn-plain-literal). + /// + /// It returns true if the literal is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) + /// or has the datatype [xsd:string](https://www.w3.org/TR/xmlschema11-2/#string). + #[inline] + pub const fn is_plain(self) -> bool { + matches!( + self.0, + LiteralRefContent::String(_) | LiteralRefContent::LanguageTaggedString { .. } + ) + } + + #[inline] + pub fn into_owned(self) -> Literal { + Literal(match self.0 { + LiteralRefContent::String(value) => LiteralContent::String(value.to_owned()), + LiteralRefContent::LanguageTaggedString { value, language } => { + LiteralContent::LanguageTaggedString { + value: value.to_owned(), + language: language.to_owned(), + } + } + LiteralRefContent::TypedLiteral { value, datatype } => LiteralContent::TypedLiteral { + value: value.to_owned(), + datatype: datatype.into_owned(), + }, + }) + } + + /// Extract components from this literal + #[inline] + pub const fn destruct(self) -> (&'a str, Option<NamedNodeRef<'a>>, Option<&'a str>) { + match self.0 { + LiteralRefContent::String(s) => (s, None, None), + LiteralRefContent::LanguageTaggedString { value, language } => { + (value, None, Some(language)) + } + LiteralRefContent::TypedLiteral { value, datatype } => (value, Some(datatype), None), + } + } +} + +impl fmt::Display for LiteralRef<'_> { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.0 { + LiteralRefContent::String(value) => print_quoted_str(value, f), + LiteralRefContent::LanguageTaggedString { value, language } => { + print_quoted_str(value, f)?; + write!(f, "@{language}") + } + LiteralRefContent::TypedLiteral { value, datatype } => { + print_quoted_str(value, f)?; + write!(f, "^^{datatype}") + } + } + } +} + +impl<'a> From<&'a Literal> for LiteralRef<'a> { + #[inline] + fn from(node: &'a Literal) -> Self { + node.as_ref() + } +} + +impl<'a> From<LiteralRef<'a>> for Literal { + #[inline] + fn from(node: LiteralRef<'a>) -> Self { + node.into_owned() + } +} + +impl<'a> From<&'a str> for LiteralRef<'a> { + #[inline] + fn from(value: &'a str) -> Self { + LiteralRef(LiteralRefContent::String(value)) + } +} + +impl PartialEq<Literal> for LiteralRef<'_> { + #[inline] + fn eq(&self, other: &Literal) -> bool { + *self == other.as_ref() + } +} + +impl PartialEq<LiteralRef<'_>> for Literal { + #[inline] + fn eq(&self, other: &LiteralRef<'_>) -> bool { + self.as_ref() == *other + } +} + +#[inline] +pub fn print_quoted_str(string: &str, f: &mut impl Write) -> fmt::Result { + f.write_char('"')?; + for c in string.chars() { + match c { + '\u{08}' => f.write_str("\\b"), + '\t' => f.write_str("\\t"), + '\n' => f.write_str("\\n"), + '\u{0C}' => f.write_str("\\f"), + '\r' => f.write_str("\\r"), + '"' => f.write_str("\\\""), + '\\' => f.write_str("\\\\"), + '\0'..='\u{1F}' | '\u{7F}' => write!(f, "\\u{:04X}", u32::from(c)), + _ => f.write_char(c), + }?; + } + f.write_char('"') +} + +#[cfg(test)] +#[allow(clippy::panic_in_result_fn)] +mod tests { + use super::*; + + #[test] + fn test_simple_literal_equality() { + assert_eq!( + Literal::new_simple_literal("foo"), + Literal::new_typed_literal("foo", xsd::STRING) + ); + assert_eq!( + Literal::new_simple_literal("foo"), + LiteralRef::new_typed_literal("foo", xsd::STRING) + ); + assert_eq!( + LiteralRef::new_simple_literal("foo"), + Literal::new_typed_literal("foo", xsd::STRING) + ); + assert_eq!( + LiteralRef::new_simple_literal("foo"), + LiteralRef::new_typed_literal("foo", xsd::STRING) + ); + } + + #[test] + fn test_float_format() { + assert_eq!("INF", Literal::from(f32::INFINITY).value()); + assert_eq!("INF", Literal::from(f64::INFINITY).value()); + assert_eq!("-INF", Literal::from(f32::NEG_INFINITY).value()); + assert_eq!("-INF", Literal::from(f64::NEG_INFINITY).value()); + assert_eq!("NaN", Literal::from(f32::NAN).value()); + assert_eq!("NaN", Literal::from(f64::NAN).value()); + } +} diff --git a/ng-oxigraph/src/oxrdf/mod.rs b/ng-oxigraph/src/oxrdf/mod.rs new file mode 100644 index 0000000..1c94e87 --- /dev/null +++ b/ng-oxigraph/src/oxrdf/mod.rs @@ -0,0 +1,24 @@ +mod blank_node; +pub mod dataset; +pub mod graph; +mod interning; +mod literal; +mod named_node; +mod parser; +mod triple; +mod variable; +pub mod vocab; + +pub use crate::oxrdf::blank_node::{BlankNode, BlankNodeIdParseError, BlankNodeRef}; +pub use crate::oxrdf::dataset::Dataset; +pub use crate::oxrdf::graph::Graph; +pub use crate::oxrdf::literal::{Literal, LiteralRef}; +pub use crate::oxrdf::named_node::{NamedNode, NamedNodeRef}; +pub use crate::oxrdf::parser::TermParseError; +pub use crate::oxrdf::triple::{ + GraphName, GraphNameRef, NamedOrBlankNode, NamedOrBlankNodeRef, Quad, QuadRef, Subject, + SubjectRef, Term, TermRef, Triple, TripleRef, TryFromTermError, +}; +pub use crate::oxrdf::variable::{Variable, VariableNameParseError, VariableRef}; +pub use oxilangtag::LanguageTagParseError; +pub use oxiri::IriParseError; diff --git a/ng-oxigraph/src/oxrdf/named_node.rs b/ng-oxigraph/src/oxrdf/named_node.rs new file mode 100644 index 0000000..41a5516 --- /dev/null +++ b/ng-oxigraph/src/oxrdf/named_node.rs @@ -0,0 +1,237 @@ +use oxiri::{Iri, IriParseError}; +use serde::{Deserialize, Serialize}; +use std::cmp::Ordering; +use std::fmt; + +/// An owned RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri). +/// +/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation: +/// ``` +/// use oxrdf::NamedNode; +/// +/// assert_eq!( +/// "<http://example.com/foo>", +/// NamedNode::new("http://example.com/foo")?.to_string() +/// ); +/// # Result::<_,oxrdf::IriParseError>::Ok(()) +/// ``` +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash, Serialize, Deserialize)] +pub struct NamedNode { + iri: String, +} + +impl NamedNode { + /// Builds and validate an RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri). + pub fn new(iri: impl Into<String>) -> Result<Self, IriParseError> { + Ok(Self::new_from_iri(Iri::parse(iri.into())?)) + } + + #[inline] + pub(crate) fn new_from_iri(iri: Iri<String>) -> Self { + Self::new_unchecked(iri.into_inner()) + } + + /// Builds an RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) from a string. + /// + /// It is the caller's responsibility to ensure that `iri` is a valid IRI. + /// + /// [`NamedNode::new()`] is a safe version of this constructor and should be used for untrusted data. + #[inline] + pub fn new_unchecked(iri: impl Into<String>) -> Self { + Self { iri: iri.into() } + } + + #[inline] + pub fn as_str(&self) -> &str { + self.iri.as_str() + } + + #[inline] + pub fn into_string(self) -> String { + self.iri + } + + #[inline] + pub fn as_ref(&self) -> NamedNodeRef<'_> { + NamedNodeRef::new_unchecked(&self.iri) + } +} + +impl fmt::Display for NamedNode { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.as_ref().fmt(f) + } +} + +impl PartialEq<str> for NamedNode { + #[inline] + fn eq(&self, other: &str) -> bool { + self.as_str() == other + } +} + +impl PartialEq<NamedNode> for str { + #[inline] + fn eq(&self, other: &NamedNode) -> bool { + self == other.as_str() + } +} + +impl PartialEq<&str> for NamedNode { + #[inline] + fn eq(&self, other: &&str) -> bool { + self == *other + } +} + +impl PartialEq<NamedNode> for &str { + #[inline] + fn eq(&self, other: &NamedNode) -> bool { + *self == other + } +} + +/// A borrowed RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri). +/// +/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation: +/// ``` +/// use oxrdf::NamedNodeRef; +/// +/// assert_eq!( +/// "<http://example.com/foo>", +/// NamedNodeRef::new("http://example.com/foo")?.to_string() +/// ); +/// # Result::<_,oxrdf::IriParseError>::Ok(()) +/// ``` +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] +pub struct NamedNodeRef<'a> { + iri: &'a str, +} + +impl<'a> NamedNodeRef<'a> { + /// Builds and validate an RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) + pub fn new(iri: &'a str) -> Result<Self, IriParseError> { + Ok(Self::new_from_iri(Iri::parse(iri)?)) + } + + #[inline] + pub(crate) fn new_from_iri(iri: Iri<&'a str>) -> Self { + Self::new_unchecked(iri.into_inner()) + } + + /// Builds an RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) from a string. + /// + /// It is the caller's responsibility to ensure that `iri` is a valid IRI. + /// + /// [`NamedNode::new()`] is a safe version of this constructor and should be used for untrusted data. + #[inline] + pub const fn new_unchecked(iri: &'a str) -> Self { + Self { iri } + } + + #[inline] + pub const fn as_str(self) -> &'a str { + self.iri + } + + #[inline] + pub fn into_owned(self) -> NamedNode { + NamedNode::new_unchecked(self.iri) + } +} + +impl fmt::Display for NamedNodeRef<'_> { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "<{}>", self.as_str()) + } +} + +impl From<NamedNodeRef<'_>> for NamedNode { + #[inline] + fn from(node: NamedNodeRef<'_>) -> Self { + node.into_owned() + } +} + +impl<'a> From<&'a NamedNode> for NamedNodeRef<'a> { + #[inline] + fn from(node: &'a NamedNode) -> Self { + node.as_ref() + } +} + +impl PartialEq<NamedNode> for NamedNodeRef<'_> { + #[inline] + fn eq(&self, other: &NamedNode) -> bool { + self.as_str() == other.as_str() + } +} + +impl PartialEq<NamedNodeRef<'_>> for NamedNode { + #[inline] + fn eq(&self, other: &NamedNodeRef<'_>) -> bool { + self.as_str() == other.as_str() + } +} + +impl PartialEq<str> for NamedNodeRef<'_> { + #[inline] + fn eq(&self, other: &str) -> bool { + self.as_str() == other + } +} + +impl PartialEq<NamedNodeRef<'_>> for str { + #[inline] + fn eq(&self, other: &NamedNodeRef<'_>) -> bool { + self == other.as_str() + } +} + +impl PartialEq<&str> for NamedNodeRef<'_> { + #[inline] + fn eq(&self, other: &&str) -> bool { + self == *other + } +} + +impl PartialEq<NamedNodeRef<'_>> for &str { + #[inline] + fn eq(&self, other: &NamedNodeRef<'_>) -> bool { + *self == other + } +} + +impl PartialOrd<NamedNode> for NamedNodeRef<'_> { + #[inline] + fn partial_cmp(&self, other: &NamedNode) -> Option<Ordering> { + self.partial_cmp(&other.as_ref()) + } +} + +impl PartialOrd<NamedNodeRef<'_>> for NamedNode { + #[inline] + fn partial_cmp(&self, other: &NamedNodeRef<'_>) -> Option<Ordering> { + self.as_ref().partial_cmp(other) + } +} + +impl From<Iri<String>> for NamedNode { + #[inline] + fn from(iri: Iri<String>) -> Self { + Self { + iri: iri.into_inner(), + } + } +} + +impl<'a> From<Iri<&'a str>> for NamedNodeRef<'a> { + #[inline] + fn from(iri: Iri<&'a str>) -> Self { + Self { + iri: iri.into_inner(), + } + } +} diff --git a/ng-oxigraph/src/oxrdf/parser.rs b/ng-oxigraph/src/oxrdf/parser.rs new file mode 100644 index 0000000..d0386bb --- /dev/null +++ b/ng-oxigraph/src/oxrdf/parser.rs @@ -0,0 +1,469 @@ +use crate::oxrdf::vocab::xsd; +use crate::oxrdf::{ + BlankNode, BlankNodeIdParseError, IriParseError, LanguageTagParseError, Literal, NamedNode, + Term, Variable, VariableNameParseError, +}; +#[cfg(feature = "rdf-star")] +use crate::oxrdf::{Subject, Triple}; +use std::char; +use std::str::{Chars, FromStr}; + +/// This limit is set in order to avoid stack overflow error when parsing nested triples due to too many recursive calls. +/// The actual limit value is a wet finger compromise between not failing to parse valid files and avoiding to trigger stack overflow errors. +const MAX_NUMBER_OF_NESTED_TRIPLES: usize = 128; + +impl FromStr for NamedNode { + type Err = TermParseError; + + /// Parses a named node from its NTriples and Turtle serialization + /// + /// ``` + /// use oxrdf::NamedNode; + /// use std::str::FromStr; + /// + /// assert_eq!( + /// NamedNode::from_str("<http://example.com>").unwrap(), + /// NamedNode::new("http://example.com").unwrap() + /// ) + /// ``` + fn from_str(s: &str) -> Result<Self, Self::Err> { + let (term, left) = read_named_node(s)?; + if !left.is_empty() { + return Err(Self::Err::msg( + "Named node serialization should end with a >", + )); + } + Ok(term) + } +} + +impl FromStr for BlankNode { + type Err = TermParseError; + + /// Parses a blank node from its NTriples and Turtle serialization + /// + /// ``` + /// use oxrdf::BlankNode; + /// use std::str::FromStr; + /// + /// assert_eq!( + /// BlankNode::from_str("_:ex").unwrap(), + /// BlankNode::new("ex").unwrap() + /// ) + /// ``` + fn from_str(s: &str) -> Result<Self, Self::Err> { + let (term, left) = read_blank_node(s)?; + if !left.is_empty() { + return Err(Self::Err::msg( + "Blank node serialization should not contain whitespaces", + )); + } + Ok(term) + } +} + +impl FromStr for Literal { + type Err = TermParseError; + + /// Parses a literal from its NTriples or Turtle serialization + /// + /// ``` + /// use oxrdf::vocab::xsd; + /// use oxrdf::{Literal, NamedNode}; + /// use std::str::FromStr; + /// + /// assert_eq!( + /// Literal::from_str("\"ex\\n\"").unwrap(), + /// Literal::new_simple_literal("ex\n") + /// ); + /// assert_eq!( + /// Literal::from_str("\"ex\"@en").unwrap(), + /// Literal::new_language_tagged_literal("ex", "en").unwrap() + /// ); + /// assert_eq!( + /// Literal::from_str("\"2020\"^^<http://www.w3.org/2001/XMLSchema#gYear>").unwrap(), + /// Literal::new_typed_literal( + /// "2020", + /// NamedNode::new("http://www.w3.org/2001/XMLSchema#gYear").unwrap() + /// ) + /// ); + /// assert_eq!( + /// Literal::from_str("true").unwrap(), + /// Literal::new_typed_literal("true", xsd::BOOLEAN) + /// ); + /// assert_eq!( + /// Literal::from_str("+122").unwrap(), + /// Literal::new_typed_literal("+122", xsd::INTEGER) + /// ); + /// assert_eq!( + /// Literal::from_str("-122.23").unwrap(), + /// Literal::new_typed_literal("-122.23", xsd::DECIMAL) + /// ); + /// assert_eq!( + /// Literal::from_str("-122e+1").unwrap(), + /// Literal::new_typed_literal("-122e+1", xsd::DOUBLE) + /// ); + /// ``` + fn from_str(s: &str) -> Result<Self, Self::Err> { + let (term, left) = read_literal(s)?; + if !left.is_empty() { + return Err(Self::Err::msg("Invalid literal serialization")); + } + Ok(term) + } +} + +impl FromStr for Term { + type Err = TermParseError; + + /// Parses a term from its NTriples or Turtle serialization + /// + /// ``` + /// use oxrdf::*; + /// use std::str::FromStr; + /// + /// assert_eq!( + /// Term::from_str("\"ex\"").unwrap(), + /// Literal::new_simple_literal("ex").into() + /// ); + /// ``` + fn from_str(s: &str) -> Result<Self, Self::Err> { + let (term, left) = read_term(s, 0)?; + if !left.is_empty() { + return Err(Self::Err::msg("Invalid term serialization")); + } + Ok(term) + } +} + +impl FromStr for Variable { + type Err = TermParseError; + + /// Parses a variable from its SPARQL serialization + /// + /// ``` + /// use oxrdf::Variable; + /// use std::str::FromStr; + /// + /// assert_eq!( + /// Variable::from_str("$foo").unwrap(), + /// Variable::new("foo").unwrap() + /// ) + /// ``` + fn from_str(s: &str) -> Result<Self, Self::Err> { + if !s.starts_with('?') && !s.starts_with('$') { + return Err(Self::Err::msg( + "Variable serialization should start with ? or $", + )); + } + Self::new(&s[1..]).map_err(|error| { + TermParseError(TermParseErrorKind::Variable { + value: s.to_owned(), + error, + }) + }) + } +} + +fn read_named_node(s: &str) -> Result<(NamedNode, &str), TermParseError> { + let s = s.trim(); + if let Some(remain) = s.strip_prefix('<') { + let end = remain + .find('>') + .ok_or_else(|| TermParseError::msg("Named node serialization should end with a >"))?; + let (value, remain) = remain.split_at(end); + let remain = &remain[1..]; + let term = NamedNode::new(value).map_err(|error| { + TermParseError(TermParseErrorKind::Iri { + value: value.to_owned(), + error, + }) + })?; + Ok((term, remain)) + } else { + Err(TermParseError::msg( + "Named node serialization should start with a <", + )) + } +} + +fn read_blank_node(s: &str) -> Result<(BlankNode, &str), TermParseError> { + let s = s.trim(); + if let Some(remain) = s.strip_prefix("_:") { + let end = remain + .find(|v: char| { + v.is_whitespace() + || matches!(v, '<' | '_' | '?' | '$' | '"' | '\'' | '>' | '@' | '^') + }) + .unwrap_or(remain.len()); + let (value, remain) = remain.split_at(end); + let term = BlankNode::new(value).map_err(|error| { + TermParseError(TermParseErrorKind::BlankNode { + value: value.to_owned(), + error, + }) + })?; + Ok((term, remain)) + } else { + Err(TermParseError::msg( + "Blank node serialization should start with '_:'", + )) + } +} + +fn read_literal(s: &str) -> Result<(Literal, &str), TermParseError> { + let s = s.trim(); + if let Some(s) = s.strip_prefix('"') { + let mut value = String::with_capacity(s.len()); + let mut chars = s.chars(); + while let Some(c) = chars.next() { + match c { + '"' => { + let remain = chars.as_str(); + return if let Some(remain) = remain.strip_prefix('@') { + let end = remain + .find(|v| !matches!(v, 'a'..='z' | 'A'..='Z' | '-')) + .unwrap_or(remain.len()); + let (language, remain) = remain.split_at(end); + Ok(( + Literal::new_language_tagged_literal(value, language).map_err( + |error| { + TermParseError(TermParseErrorKind::LanguageTag { + value: language.to_owned(), + error, + }) + }, + )?, + remain, + )) + } else if let Some(remain) = remain.strip_prefix("^^") { + let (datatype, remain) = read_named_node(remain)?; + Ok((Literal::new_typed_literal(value, datatype), remain)) + } else { + Ok((Literal::new_simple_literal(value), remain)) + }; + } + '\\' => { + if let Some(c) = chars.next() { + value.push(match c { + 't' => '\t', + 'b' => '\u{08}', + 'n' => '\n', + 'r' => '\r', + 'f' => '\u{0C}', + '"' => '"', + '\'' => '\'', + '\\' => '\\', + 'u' => read_hexa_char(&mut chars, 4)?, + 'U' => read_hexa_char(&mut chars, 8)?, + _ => return Err(TermParseError::msg("Unexpected escaped char")), + }) + } else { + return Err(TermParseError::msg("Unexpected literal end")); + } + } + _ => value.push(c), + } + } + Err(TermParseError::msg("Unexpected literal end")) + } else if let Some(remain) = s.strip_prefix("true") { + Ok((Literal::new_typed_literal("true", xsd::BOOLEAN), remain)) + } else if let Some(remain) = s.strip_prefix("false") { + Ok((Literal::new_typed_literal("false", xsd::BOOLEAN), remain)) + } else { + let input = s.as_bytes(); + if input.is_empty() { + return Err(TermParseError::msg("Empty term serialization")); + } + + let mut cursor = match input.first() { + Some(b'+' | b'-') => 1, + _ => 0, + }; + let mut with_dot = false; + + let mut count_before: usize = 0; + while cursor < input.len() && b'0' <= input[cursor] && input[cursor] <= b'9' { + count_before += 1; + cursor += 1; + } + + let mut count_after: usize = 0; + if cursor < input.len() && input[cursor] == b'.' { + with_dot = true; + cursor += 1; + while cursor < input.len() && b'0' <= input[cursor] && input[cursor] <= b'9' { + count_after += 1; + cursor += 1; + } + } + + if cursor < input.len() && (input[cursor] == b'e' || input[cursor] == b'E') { + cursor += 1; + cursor += match input.get(cursor) { + Some(b'+' | b'-') => 1, + _ => 0, + }; + let mut count_exponent = 0; + while cursor < input.len() && b'0' <= input[cursor] && input[cursor] <= b'9' { + count_exponent += 1; + cursor += 1; + } + if count_exponent > 0 { + Ok((Literal::new_typed_literal(s, xsd::DOUBLE), &s[cursor..])) + } else { + Err(TermParseError::msg( + "Double serialization with an invalid exponent", + )) + } + } else if with_dot { + if count_after > 0 { + Ok((Literal::new_typed_literal(s, xsd::DECIMAL), &s[cursor..])) + } else { + Err(TermParseError::msg( + "Decimal serialization without floating part", + )) + } + } else if count_before > 0 { + Ok((Literal::new_typed_literal(s, xsd::INTEGER), &s[cursor..])) + } else { + Err(TermParseError::msg("Empty integer serialization")) + } + } +} + +fn read_term(s: &str, number_of_recursive_calls: usize) -> Result<(Term, &str), TermParseError> { + if number_of_recursive_calls == MAX_NUMBER_OF_NESTED_TRIPLES { + return Err(TermParseError::msg( + "Too many nested triples. The parser fails here to avoid a stack overflow.", + )); + } + let s = s.trim(); + #[allow(unused_variables)] + if let Some(remain) = s.strip_prefix("<<") { + #[cfg(feature = "rdf-star")] + { + let (subject, remain) = read_term(remain, number_of_recursive_calls + 1)?; + let (predicate, remain) = read_named_node(remain)?; + let (object, remain) = read_term(remain, number_of_recursive_calls + 1)?; + let remain = remain.trim_start(); + if let Some(remain) = remain.strip_prefix(">>") { + Ok(( + Triple { + subject: match subject { + Term::NamedNode(s) => s.into(), + Term::BlankNode(s) => s.into(), + Term::Literal(_) => { + return Err(TermParseError::msg( + "Literals are not allowed in subject position", + )); + } + Term::Triple(s) => Subject::Triple(s), + }, + predicate, + object, + } + .into(), + remain, + )) + } else { + Err(TermParseError::msg( + "Nested triple serialization should be enclosed between << and >>", + )) + } + } + #[cfg(not(feature = "rdf-star"))] + { + Err(TermParseError::msg("RDF-star is not supported")) + } + } else if s.starts_with('<') { + let (term, remain) = read_named_node(s)?; + Ok((term.into(), remain)) + } else if s.starts_with('_') { + let (term, remain) = read_blank_node(s)?; + Ok((term.into(), remain)) + } else { + let (term, remain) = read_literal(s)?; + Ok((term.into(), remain)) + } +} + +fn read_hexa_char(input: &mut Chars<'_>, len: usize) -> Result<char, TermParseError> { + let mut value = 0; + for _ in 0..len { + if let Some(c) = input.next() { + value = value * 16 + + match c { + '0'..='9' => u32::from(c) - u32::from('0'), + 'a'..='f' => u32::from(c) - u32::from('a') + 10, + 'A'..='F' => u32::from(c) - u32::from('A') + 10, + _ => { + return Err(TermParseError::msg( + "Unexpected character in a unicode escape", + )); + } + } + } else { + return Err(TermParseError::msg("Unexpected literal string end")); + } + } + char::from_u32(value).ok_or_else(|| TermParseError::msg("Invalid encoded unicode code point")) +} + +/// An error raised during term serialization parsing using the [`FromStr`] trait. +#[derive(Debug, thiserror::Error)] +#[error(transparent)] +pub struct TermParseError(#[from] TermParseErrorKind); + +/// An internal error raised during term serialization parsing using the [`FromStr`] trait. +#[derive(Debug, thiserror::Error)] +enum TermParseErrorKind { + #[error("Error while parsing the named node '{value}': {error}")] + Iri { error: IriParseError, value: String }, + #[error("Error while parsing the blank node '{value}': {error}")] + BlankNode { + error: BlankNodeIdParseError, + value: String, + }, + #[error("Error while parsing the language tag '{value}': {error}")] + LanguageTag { + error: LanguageTagParseError, + value: String, + }, + #[error("Error while parsing the variable '{value}': {error}")] + Variable { + error: VariableNameParseError, + value: String, + }, + #[error("{0}")] + Msg(&'static str), +} + +impl TermParseError { + pub(crate) fn msg(msg: &'static str) -> Self { + Self(TermParseErrorKind::Msg(msg)) + } +} + +#[cfg(test)] +#[cfg(feature = "rdf-star")] +mod tests { + use super::*; + + #[test] + fn triple_term_parsing() { + assert_eq!( + Term::from_str("\"ex\"").unwrap(), + Literal::new_simple_literal("ex").into() + ); + assert_eq!( + Term::from_str("<< _:s <http://example.com/p> \"o\" >>").unwrap(), + Triple::new( + BlankNode::new("s").unwrap(), + NamedNode::new("http://example.com/p").unwrap(), + Literal::new_simple_literal("o"), + ) + .into() + ); + } +} diff --git a/ng-oxigraph/src/oxrdf/triple.rs b/ng-oxigraph/src/oxrdf/triple.rs new file mode 100644 index 0000000..3b0f8a9 --- /dev/null +++ b/ng-oxigraph/src/oxrdf/triple.rs @@ -0,0 +1,1368 @@ +use crate::oxrdf::blank_node::BlankNode; +use crate::oxrdf::literal::Literal; +use crate::oxrdf::named_node::NamedNode; +use crate::oxrdf::{BlankNodeRef, LiteralRef, NamedNodeRef}; +use serde::{Deserialize, Serialize}; +use std::fmt; + +/// The owned union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) and [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node). +#[derive(Eq, PartialEq, Debug, Clone, Hash, Serialize, Deserialize)] +pub enum NamedOrBlankNode { + NamedNode(NamedNode), + BlankNode(BlankNode), +} + +impl NamedOrBlankNode { + #[inline] + pub fn is_named_node(&self) -> bool { + self.as_ref().is_named_node() + } + + #[inline] + pub fn is_blank_node(&self) -> bool { + self.as_ref().is_blank_node() + } + + #[inline] + pub fn as_ref(&self) -> NamedOrBlankNodeRef<'_> { + match self { + Self::NamedNode(node) => NamedOrBlankNodeRef::NamedNode(node.as_ref()), + Self::BlankNode(node) => NamedOrBlankNodeRef::BlankNode(node.as_ref()), + } + } +} + +impl fmt::Display for NamedOrBlankNode { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.as_ref().fmt(f) + } +} + +impl From<NamedNode> for NamedOrBlankNode { + #[inline] + fn from(node: NamedNode) -> Self { + Self::NamedNode(node) + } +} + +impl From<NamedNodeRef<'_>> for NamedOrBlankNode { + #[inline] + fn from(node: NamedNodeRef<'_>) -> Self { + node.into_owned().into() + } +} + +impl From<BlankNode> for NamedOrBlankNode { + #[inline] + fn from(node: BlankNode) -> Self { + Self::BlankNode(node) + } +} + +impl From<BlankNodeRef<'_>> for NamedOrBlankNode { + #[inline] + fn from(node: BlankNodeRef<'_>) -> Self { + node.into_owned().into() + } +} + +/// The borrowed union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) and [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node). +#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] +pub enum NamedOrBlankNodeRef<'a> { + NamedNode(NamedNodeRef<'a>), + BlankNode(BlankNodeRef<'a>), +} + +impl<'a> NamedOrBlankNodeRef<'a> { + #[inline] + pub fn is_named_node(&self) -> bool { + match self { + Self::NamedNode(_) => true, + Self::BlankNode(_) => false, + } + } + + #[inline] + pub fn is_blank_node(&self) -> bool { + match self { + Self::NamedNode(_) => false, + Self::BlankNode(_) => true, + } + } + + #[inline] + pub fn into_owned(self) -> NamedOrBlankNode { + match self { + Self::NamedNode(node) => NamedOrBlankNode::NamedNode(node.into_owned()), + Self::BlankNode(node) => NamedOrBlankNode::BlankNode(node.into_owned()), + } + } +} + +impl fmt::Display for NamedOrBlankNodeRef<'_> { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::NamedNode(node) => node.fmt(f), + Self::BlankNode(node) => node.fmt(f), + } + } +} + +impl<'a> From<NamedNodeRef<'a>> for NamedOrBlankNodeRef<'a> { + #[inline] + fn from(node: NamedNodeRef<'a>) -> Self { + Self::NamedNode(node) + } +} + +impl<'a> From<&'a NamedNode> for NamedOrBlankNodeRef<'a> { + #[inline] + fn from(node: &'a NamedNode) -> Self { + node.as_ref().into() + } +} + +impl<'a> From<BlankNodeRef<'a>> for NamedOrBlankNodeRef<'a> { + #[inline] + fn from(node: BlankNodeRef<'a>) -> Self { + Self::BlankNode(node) + } +} + +impl<'a> From<&'a BlankNode> for NamedOrBlankNodeRef<'a> { + #[inline] + fn from(node: &'a BlankNode) -> Self { + node.as_ref().into() + } +} + +impl<'a> From<&'a NamedOrBlankNode> for NamedOrBlankNodeRef<'a> { + #[inline] + fn from(node: &'a NamedOrBlankNode) -> Self { + node.as_ref() + } +} + +impl<'a> From<NamedOrBlankNodeRef<'a>> for NamedOrBlankNode { + #[inline] + fn from(node: NamedOrBlankNodeRef<'a>) -> Self { + node.into_owned() + } +} + +/// The owned union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) and [triples](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) (if the `rdf-star` feature is enabled). +#[derive(Eq, PartialEq, Debug, Clone, Hash, Serialize, Deserialize)] +pub enum Subject { + NamedNode(NamedNode), + BlankNode(BlankNode), + #[cfg(feature = "rdf-star")] + Triple(Box<Triple>), +} + +impl Subject { + #[inline] + pub fn is_named_node(&self) -> bool { + self.as_ref().is_named_node() + } + + #[inline] + pub fn is_blank_node(&self) -> bool { + self.as_ref().is_blank_node() + } + + #[cfg(feature = "rdf-star")] + #[inline] + pub fn is_triple(&self) -> bool { + self.as_ref().is_triple() + } + + #[inline] + pub fn as_ref(&self) -> SubjectRef<'_> { + match self { + Self::NamedNode(node) => SubjectRef::NamedNode(node.as_ref()), + Self::BlankNode(node) => SubjectRef::BlankNode(node.as_ref()), + #[cfg(feature = "rdf-star")] + Self::Triple(triple) => SubjectRef::Triple(triple), + } + } +} + +impl fmt::Display for Subject { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.as_ref().fmt(f) + } +} + +impl From<NamedNode> for Subject { + #[inline] + fn from(node: NamedNode) -> Self { + Self::NamedNode(node) + } +} + +impl From<NamedNodeRef<'_>> for Subject { + #[inline] + fn from(node: NamedNodeRef<'_>) -> Self { + node.into_owned().into() + } +} + +impl From<BlankNode> for Subject { + #[inline] + fn from(node: BlankNode) -> Self { + Self::BlankNode(node) + } +} + +impl From<BlankNodeRef<'_>> for Subject { + #[inline] + fn from(node: BlankNodeRef<'_>) -> Self { + node.into_owned().into() + } +} + +#[cfg(feature = "rdf-star")] +impl From<Triple> for Subject { + #[inline] + fn from(node: Triple) -> Self { + Self::Triple(Box::new(node)) + } +} + +#[cfg(feature = "rdf-star")] +impl From<Box<Triple>> for Subject { + #[inline] + fn from(node: Box<Triple>) -> Self { + Self::Triple(node) + } +} + +#[cfg(feature = "rdf-star")] +impl From<TripleRef<'_>> for Subject { + #[inline] + fn from(node: TripleRef<'_>) -> Self { + node.into_owned().into() + } +} + +impl From<NamedOrBlankNode> for Subject { + #[inline] + fn from(node: NamedOrBlankNode) -> Self { + match node { + NamedOrBlankNode::NamedNode(node) => node.into(), + NamedOrBlankNode::BlankNode(node) => node.into(), + } + } +} + +impl From<NamedOrBlankNodeRef<'_>> for Subject { + #[inline] + fn from(node: NamedOrBlankNodeRef<'_>) -> Self { + node.into_owned().into() + } +} + +/// The borrowed union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) and [triples](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) (if the `rdf-star` feature is enabled). +#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] +pub enum SubjectRef<'a> { + NamedNode(NamedNodeRef<'a>), + BlankNode(BlankNodeRef<'a>), + #[cfg(feature = "rdf-star")] + Triple(&'a Triple), +} + +impl<'a> SubjectRef<'a> { + #[inline] + pub fn is_named_node(&self) -> bool { + matches!(self, Self::NamedNode(_)) + } + + #[inline] + pub fn is_blank_node(&self) -> bool { + matches!(self, Self::BlankNode(_)) + } + + #[cfg(feature = "rdf-star")] + #[inline] + pub fn is_triple(&self) -> bool { + matches!(self, Self::Triple(_)) + } + + #[inline] + pub fn into_owned(self) -> Subject { + match self { + Self::NamedNode(node) => Subject::NamedNode(node.into_owned()), + Self::BlankNode(node) => Subject::BlankNode(node.into_owned()), + #[cfg(feature = "rdf-star")] + Self::Triple(triple) => Subject::Triple(Box::new(triple.clone())), + } + } +} + +impl fmt::Display for SubjectRef<'_> { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::NamedNode(node) => node.fmt(f), + Self::BlankNode(node) => node.fmt(f), + #[cfg(feature = "rdf-star")] + Self::Triple(triple) => write!(f, "<<{triple}>>"), + } + } +} + +impl<'a> From<NamedNodeRef<'a>> for SubjectRef<'a> { + #[inline] + fn from(node: NamedNodeRef<'a>) -> Self { + Self::NamedNode(node) + } +} + +impl<'a> From<&'a NamedNode> for SubjectRef<'a> { + #[inline] + fn from(node: &'a NamedNode) -> Self { + node.as_ref().into() + } +} + +impl<'a> From<BlankNodeRef<'a>> for SubjectRef<'a> { + #[inline] + fn from(node: BlankNodeRef<'a>) -> Self { + Self::BlankNode(node) + } +} + +impl<'a> From<&'a BlankNode> for SubjectRef<'a> { + #[inline] + fn from(node: &'a BlankNode) -> Self { + node.as_ref().into() + } +} + +#[cfg(feature = "rdf-star")] +impl<'a> From<&'a Triple> for SubjectRef<'a> { + #[inline] + fn from(node: &'a Triple) -> Self { + Self::Triple(node) + } +} + +impl<'a> From<&'a Subject> for SubjectRef<'a> { + #[inline] + fn from(node: &'a Subject) -> Self { + node.as_ref() + } +} + +impl<'a> From<SubjectRef<'a>> for Subject { + #[inline] + fn from(node: SubjectRef<'a>) -> Self { + node.into_owned() + } +} + +impl<'a> From<NamedOrBlankNodeRef<'a>> for SubjectRef<'a> { + #[inline] + fn from(node: NamedOrBlankNodeRef<'a>) -> Self { + match node { + NamedOrBlankNodeRef::NamedNode(node) => node.into(), + NamedOrBlankNodeRef::BlankNode(node) => node.into(), + } + } +} + +impl<'a> From<&'a NamedOrBlankNode> for SubjectRef<'a> { + #[inline] + fn from(node: &'a NamedOrBlankNode) -> Self { + node.as_ref().into() + } +} + +/// An owned RDF [term](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-term) +/// It is the union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node), [literals](https://www.w3.org/TR/rdf11-concepts/#dfn-literal) and [triples](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) (if the `rdf-star` feature is enabled). +#[derive(Eq, PartialEq, Debug, Clone, Hash, Serialize, Deserialize)] +pub enum Term { + NamedNode(NamedNode), + BlankNode(BlankNode), + Literal(Literal), + #[cfg(feature = "rdf-star")] + Triple(Box<Triple>), +} + +impl Term { + #[inline] + pub fn is_named_node(&self) -> bool { + self.as_ref().is_named_node() + } + + #[inline] + pub fn is_blank_node(&self) -> bool { + self.as_ref().is_blank_node() + } + + #[inline] + pub fn is_literal(&self) -> bool { + self.as_ref().is_literal() + } + + #[cfg(feature = "rdf-star")] + #[inline] + pub fn is_triple(&self) -> bool { + self.as_ref().is_triple() + } + + #[inline] + pub fn as_ref(&self) -> TermRef<'_> { + match self { + Self::NamedNode(node) => TermRef::NamedNode(node.as_ref()), + Self::BlankNode(node) => TermRef::BlankNode(node.as_ref()), + Self::Literal(literal) => TermRef::Literal(literal.as_ref()), + #[cfg(feature = "rdf-star")] + Self::Triple(triple) => TermRef::Triple(triple), + } + } +} + +impl fmt::Display for Term { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.as_ref().fmt(f) + } +} + +impl From<NamedNode> for Term { + #[inline] + fn from(node: NamedNode) -> Self { + Self::NamedNode(node) + } +} + +impl From<NamedNodeRef<'_>> for Term { + #[inline] + fn from(node: NamedNodeRef<'_>) -> Self { + node.into_owned().into() + } +} + +impl From<BlankNode> for Term { + #[inline] + fn from(node: BlankNode) -> Self { + Self::BlankNode(node) + } +} + +impl From<BlankNodeRef<'_>> for Term { + #[inline] + fn from(node: BlankNodeRef<'_>) -> Self { + node.into_owned().into() + } +} + +impl From<Literal> for Term { + #[inline] + fn from(literal: Literal) -> Self { + Self::Literal(literal) + } +} + +impl From<LiteralRef<'_>> for Term { + #[inline] + fn from(literal: LiteralRef<'_>) -> Self { + literal.into_owned().into() + } +} + +#[cfg(feature = "rdf-star")] +impl From<Triple> for Term { + #[inline] + fn from(triple: Triple) -> Self { + Self::Triple(Box::new(triple)) + } +} + +#[cfg(feature = "rdf-star")] +impl From<Box<Triple>> for Term { + #[inline] + fn from(node: Box<Triple>) -> Self { + Self::Triple(node) + } +} + +#[cfg(feature = "rdf-star")] +impl From<TripleRef<'_>> for Term { + #[inline] + fn from(triple: TripleRef<'_>) -> Self { + triple.into_owned().into() + } +} + +impl From<NamedOrBlankNode> for Term { + #[inline] + fn from(node: NamedOrBlankNode) -> Self { + match node { + NamedOrBlankNode::NamedNode(node) => node.into(), + NamedOrBlankNode::BlankNode(node) => node.into(), + } + } +} + +impl From<NamedOrBlankNodeRef<'_>> for Term { + #[inline] + fn from(node: NamedOrBlankNodeRef<'_>) -> Self { + node.into_owned().into() + } +} + +impl From<Subject> for Term { + #[inline] + fn from(node: Subject) -> Self { + match node { + Subject::NamedNode(node) => node.into(), + Subject::BlankNode(node) => node.into(), + #[cfg(feature = "rdf-star")] + Subject::Triple(triple) => Self::Triple(triple), + } + } +} + +impl From<SubjectRef<'_>> for Term { + #[inline] + fn from(node: SubjectRef<'_>) -> Self { + node.into_owned().into() + } +} + +impl TryFrom<Term> for NamedNode { + type Error = TryFromTermError; + + #[inline] + fn try_from(term: Term) -> Result<Self, Self::Error> { + if let Term::NamedNode(node) = term { + Ok(node) + } else { + Err(TryFromTermError { + term, + target: "NamedNode", + }) + } + } +} + +impl TryFrom<Term> for BlankNode { + type Error = TryFromTermError; + + #[inline] + fn try_from(term: Term) -> Result<Self, Self::Error> { + if let Term::BlankNode(node) = term { + Ok(node) + } else { + Err(TryFromTermError { + term, + target: "BlankNode", + }) + } + } +} + +impl TryFrom<Term> for Literal { + type Error = TryFromTermError; + + #[inline] + fn try_from(term: Term) -> Result<Self, Self::Error> { + if let Term::Literal(node) = term { + Ok(node) + } else { + Err(TryFromTermError { + term, + target: "Literal", + }) + } + } +} + +impl TryFrom<Term> for Subject { + type Error = TryFromTermError; + + #[inline] + fn try_from(term: Term) -> Result<Self, Self::Error> { + match term { + Term::NamedNode(term) => Ok(Self::NamedNode(term)), + Term::BlankNode(term) => Ok(Self::BlankNode(term)), + #[cfg(feature = "rdf-star")] + Term::Triple(term) => Ok(Self::Triple(term)), + Term::Literal(_) => Err(TryFromTermError { + term, + target: "Subject", + }), + } + } +} + +/// A borrowed RDF [term](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-term) +/// It is the union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node), [literals](https://www.w3.org/TR/rdf11-concepts/#dfn-literal) and [triples](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) (if the `rdf-star` feature is enabled). +#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] +pub enum TermRef<'a> { + NamedNode(NamedNodeRef<'a>), + BlankNode(BlankNodeRef<'a>), + Literal(LiteralRef<'a>), + #[cfg(feature = "rdf-star")] + Triple(&'a Triple), +} + +impl<'a> TermRef<'a> { + #[inline] + pub fn is_named_node(&self) -> bool { + matches!(self, Self::NamedNode(_)) + } + + #[inline] + pub fn is_blank_node(&self) -> bool { + matches!(self, Self::BlankNode(_)) + } + + #[inline] + pub fn is_literal(&self) -> bool { + matches!(self, Self::Literal(_)) + } + + #[cfg(feature = "rdf-star")] + #[inline] + pub fn is_triple(&self) -> bool { + matches!(self, Self::Triple(_)) + } + + #[inline] + pub fn into_owned(self) -> Term { + match self { + Self::NamedNode(node) => Term::NamedNode(node.into_owned()), + Self::BlankNode(node) => Term::BlankNode(node.into_owned()), + Self::Literal(literal) => Term::Literal(literal.into_owned()), + #[cfg(feature = "rdf-star")] + Self::Triple(triple) => Term::Triple(Box::new(triple.clone())), + } + } +} + +impl fmt::Display for TermRef<'_> { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::NamedNode(node) => node.fmt(f), + Self::BlankNode(node) => node.fmt(f), + Self::Literal(literal) => literal.fmt(f), + #[cfg(feature = "rdf-star")] + Self::Triple(triple) => { + write!(f, "<<{triple}>>") + } + } + } +} + +impl<'a> From<NamedNodeRef<'a>> for TermRef<'a> { + #[inline] + fn from(node: NamedNodeRef<'a>) -> Self { + Self::NamedNode(node) + } +} + +impl<'a> From<&'a NamedNode> for TermRef<'a> { + #[inline] + fn from(node: &'a NamedNode) -> Self { + node.as_ref().into() + } +} + +impl<'a> From<BlankNodeRef<'a>> for TermRef<'a> { + #[inline] + fn from(node: BlankNodeRef<'a>) -> Self { + Self::BlankNode(node) + } +} + +impl<'a> From<&'a BlankNode> for TermRef<'a> { + #[inline] + fn from(node: &'a BlankNode) -> Self { + node.as_ref().into() + } +} + +impl<'a> From<LiteralRef<'a>> for TermRef<'a> { + #[inline] + fn from(literal: LiteralRef<'a>) -> Self { + Self::Literal(literal) + } +} + +impl<'a> From<&'a Literal> for TermRef<'a> { + #[inline] + fn from(literal: &'a Literal) -> Self { + literal.as_ref().into() + } +} + +#[cfg(feature = "rdf-star")] +impl<'a> From<&'a Triple> for TermRef<'a> { + #[inline] + fn from(node: &'a Triple) -> Self { + Self::Triple(node) + } +} + +impl<'a> From<NamedOrBlankNodeRef<'a>> for TermRef<'a> { + #[inline] + fn from(node: NamedOrBlankNodeRef<'a>) -> Self { + match node { + NamedOrBlankNodeRef::NamedNode(node) => node.into(), + NamedOrBlankNodeRef::BlankNode(node) => node.into(), + } + } +} + +impl<'a> From<&'a NamedOrBlankNode> for TermRef<'a> { + #[inline] + fn from(node: &'a NamedOrBlankNode) -> Self { + node.as_ref().into() + } +} + +impl<'a> From<SubjectRef<'a>> for TermRef<'a> { + #[inline] + fn from(node: SubjectRef<'a>) -> Self { + match node { + SubjectRef::NamedNode(node) => node.into(), + SubjectRef::BlankNode(node) => node.into(), + #[cfg(feature = "rdf-star")] + SubjectRef::Triple(triple) => triple.into(), + } + } +} + +impl<'a> From<&'a Subject> for TermRef<'a> { + #[inline] + fn from(node: &'a Subject) -> Self { + node.as_ref().into() + } +} + +impl<'a> From<&'a Term> for TermRef<'a> { + #[inline] + fn from(node: &'a Term) -> Self { + node.as_ref() + } +} + +impl<'a> From<TermRef<'a>> for Term { + #[inline] + fn from(node: TermRef<'a>) -> Self { + node.into_owned() + } +} + +/// An owned [RDF triple](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple). +/// +/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation: +/// ``` +/// use oxrdf::{NamedNode, Triple}; +/// +/// assert_eq!( +/// "<http://example.com/s> <http://example.com/p> <http://example.com/o>", +/// Triple { +/// subject: NamedNode::new("http://example.com/s")?.into(), +/// predicate: NamedNode::new("http://example.com/p")?, +/// object: NamedNode::new("http://example.com/o")?.into(), +/// } +/// .to_string() +/// ); +/// # Result::<_,oxrdf::IriParseError>::Ok(()) +/// ``` +#[derive(Eq, PartialEq, Debug, Clone, Hash, Serialize, Deserialize)] +pub struct Triple { + /// The [subject](https://www.w3.org/TR/rdf11-concepts/#dfn-subject) of this triple. + pub subject: Subject, + + /// The [predicate](https://www.w3.org/TR/rdf11-concepts/#dfn-predicate) of this triple. + pub predicate: NamedNode, + + /// The [object](https://www.w3.org/TR/rdf11-concepts/#dfn-object) of this triple. + pub object: Term, +} + +impl Triple { + /// Builds an RDF [triple](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple). + #[inline] + pub fn new( + subject: impl Into<Subject>, + predicate: impl Into<NamedNode>, + object: impl Into<Term>, + ) -> Self { + Self { + subject: subject.into(), + predicate: predicate.into(), + object: object.into(), + } + } + + /// Builds an RDF [triple](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) from [`Term`]s. + /// + /// Returns a [`TryFromTermError`] error if the generated triple would be ill-formed. + #[inline] + pub fn from_terms( + subject: impl Into<Term>, + predicate: impl Into<Term>, + object: impl Into<Term>, + ) -> Result<Self, TryFromTermError> { + Ok(Self { + subject: subject.into().try_into()?, + predicate: predicate.into().try_into()?, + object: object.into(), + }) + } + + /// Encodes that this triple is in an [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset). + #[inline] + pub fn in_graph(self, graph_name: impl Into<GraphName>) -> Quad { + Quad { + subject: self.subject, + predicate: self.predicate, + object: self.object, + graph_name: graph_name.into(), + } + } + + #[inline] + pub fn as_ref(&self) -> TripleRef<'_> { + TripleRef { + subject: self.subject.as_ref(), + predicate: self.predicate.as_ref(), + object: self.object.as_ref(), + } + } +} + +impl fmt::Display for Triple { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.as_ref().fmt(f) + } +} + +/// A borrowed [RDF triple](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple). +/// +/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation: +/// ``` +/// use oxrdf::{NamedNodeRef, TripleRef}; +/// +/// assert_eq!( +/// "<http://example.com/s> <http://example.com/p> <http://example.com/o>", +/// TripleRef { +/// subject: NamedNodeRef::new("http://example.com/s")?.into(), +/// predicate: NamedNodeRef::new("http://example.com/p")?, +/// object: NamedNodeRef::new("http://example.com/o")?.into(), +/// } +/// .to_string() +/// ); +/// # Result::<_,oxrdf::IriParseError>::Ok(()) +/// ``` + +#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] +pub struct TripleRef<'a> { + /// The [subject](https://www.w3.org/TR/rdf11-concepts/#dfn-subject) of this triple. + pub subject: SubjectRef<'a>, + + /// The [predicate](https://www.w3.org/TR/rdf11-concepts/#dfn-predicate) of this triple. + pub predicate: NamedNodeRef<'a>, + + /// The [object](https://www.w3.org/TR/rdf11-concepts/#dfn-object) of this triple. + pub object: TermRef<'a>, +} + +impl<'a> TripleRef<'a> { + /// Builds an RDF [triple](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple). + #[inline] + pub fn new( + subject: impl Into<SubjectRef<'a>>, + predicate: impl Into<NamedNodeRef<'a>>, + object: impl Into<TermRef<'a>>, + ) -> Self { + Self { + subject: subject.into(), + predicate: predicate.into(), + object: object.into(), + } + } + + /// Encodes that this triple is in an [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset). + #[inline] + pub fn in_graph(self, graph_name: impl Into<GraphNameRef<'a>>) -> QuadRef<'a> { + QuadRef { + subject: self.subject, + predicate: self.predicate, + object: self.object, + graph_name: graph_name.into(), + } + } + + #[inline] + pub fn into_owned(self) -> Triple { + Triple { + subject: self.subject.into_owned(), + predicate: self.predicate.into_owned(), + object: self.object.into_owned(), + } + } +} + +impl fmt::Display for TripleRef<'_> { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} {} {}", self.subject, self.predicate, self.object) + } +} + +impl<'a> From<&'a Triple> for TripleRef<'a> { + #[inline] + fn from(triple: &'a Triple) -> Self { + triple.as_ref() + } +} + +impl<'a> From<TripleRef<'a>> for Triple { + #[inline] + fn from(triple: TripleRef<'a>) -> Self { + triple.into_owned() + } +} + +/// A possible owned graph name. +/// It is the union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node), and the [default graph name](https://www.w3.org/TR/rdf11-concepts/#dfn-default-graph). +#[derive(Eq, PartialEq, Debug, Clone, Hash, Default, Serialize, Deserialize)] +pub enum GraphName { + NamedNode(NamedNode), + BlankNode(BlankNode), + #[default] + DefaultGraph, +} + +impl GraphName { + #[inline] + pub fn is_named_node(&self) -> bool { + self.as_ref().is_named_node() + } + + #[inline] + pub fn is_blank_node(&self) -> bool { + self.as_ref().is_blank_node() + } + + #[inline] + pub fn is_default_graph(&self) -> bool { + self.as_ref().is_default_graph() + } + + #[inline] + pub fn as_ref(&self) -> GraphNameRef<'_> { + match self { + Self::NamedNode(node) => GraphNameRef::NamedNode(node.as_ref()), + Self::BlankNode(node) => GraphNameRef::BlankNode(node.as_ref()), + Self::DefaultGraph => GraphNameRef::DefaultGraph, + } + } +} + +impl fmt::Display for GraphName { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.as_ref().fmt(f) + } +} + +impl From<NamedNode> for GraphName { + #[inline] + fn from(node: NamedNode) -> Self { + Self::NamedNode(node) + } +} + +impl From<NamedNodeRef<'_>> for GraphName { + #[inline] + fn from(node: NamedNodeRef<'_>) -> Self { + node.into_owned().into() + } +} + +impl From<BlankNode> for GraphName { + #[inline] + fn from(node: BlankNode) -> Self { + Self::BlankNode(node) + } +} + +impl From<BlankNodeRef<'_>> for GraphName { + #[inline] + fn from(node: BlankNodeRef<'_>) -> Self { + node.into_owned().into() + } +} + +impl From<NamedOrBlankNode> for GraphName { + #[inline] + fn from(node: NamedOrBlankNode) -> Self { + match node { + NamedOrBlankNode::NamedNode(node) => node.into(), + NamedOrBlankNode::BlankNode(node) => node.into(), + } + } +} + +impl From<NamedOrBlankNodeRef<'_>> for GraphName { + #[inline] + fn from(node: NamedOrBlankNodeRef<'_>) -> Self { + node.into_owned().into() + } +} + +/// A possible borrowed graph name. +/// It is the union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node), and the [default graph name](https://www.w3.org/TR/rdf11-concepts/#dfn-default-graph). +#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash, Default)] +pub enum GraphNameRef<'a> { + NamedNode(NamedNodeRef<'a>), + BlankNode(BlankNodeRef<'a>), + #[default] + DefaultGraph, +} + +impl<'a> GraphNameRef<'a> { + #[inline] + pub fn is_named_node(&self) -> bool { + matches!(self, Self::NamedNode(_)) + } + + #[inline] + pub fn is_blank_node(&self) -> bool { + matches!(self, Self::BlankNode(_)) + } + + #[inline] + pub fn is_default_graph(&self) -> bool { + matches!(self, Self::DefaultGraph) + } + + #[inline] + pub fn into_owned(self) -> GraphName { + match self { + Self::NamedNode(node) => GraphName::NamedNode(node.into_owned()), + Self::BlankNode(node) => GraphName::BlankNode(node.into_owned()), + Self::DefaultGraph => GraphName::DefaultGraph, + } + } +} + +impl fmt::Display for GraphNameRef<'_> { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::NamedNode(node) => node.fmt(f), + Self::BlankNode(node) => node.fmt(f), + Self::DefaultGraph => f.write_str("DEFAULT"), + } + } +} + +impl<'a> From<NamedNodeRef<'a>> for GraphNameRef<'a> { + #[inline] + fn from(node: NamedNodeRef<'a>) -> Self { + Self::NamedNode(node) + } +} + +impl<'a> From<&'a NamedNode> for GraphNameRef<'a> { + #[inline] + fn from(node: &'a NamedNode) -> Self { + node.as_ref().into() + } +} + +impl<'a> From<BlankNodeRef<'a>> for GraphNameRef<'a> { + #[inline] + fn from(node: BlankNodeRef<'a>) -> Self { + Self::BlankNode(node) + } +} + +impl<'a> From<&'a BlankNode> for GraphNameRef<'a> { + #[inline] + fn from(node: &'a BlankNode) -> Self { + node.as_ref().into() + } +} + +impl<'a> From<NamedOrBlankNodeRef<'a>> for GraphNameRef<'a> { + #[inline] + fn from(node: NamedOrBlankNodeRef<'a>) -> Self { + match node { + NamedOrBlankNodeRef::NamedNode(node) => node.into(), + NamedOrBlankNodeRef::BlankNode(node) => node.into(), + } + } +} + +impl<'a> From<&'a NamedOrBlankNode> for GraphNameRef<'a> { + #[inline] + fn from(node: &'a NamedOrBlankNode) -> Self { + node.as_ref().into() + } +} + +impl<'a> From<&'a GraphName> for GraphNameRef<'a> { + #[inline] + fn from(node: &'a GraphName) -> Self { + node.as_ref() + } +} + +impl<'a> From<GraphNameRef<'a>> for GraphName { + #[inline] + fn from(node: GraphNameRef<'a>) -> Self { + node.into_owned() + } +} + +/// An owned [triple](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) in an [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset). +/// +/// The default string formatter is returning an N-Quads compatible representation: +/// ``` +/// use oxrdf::{Quad, NamedNode}; +/// +/// assert_eq!( +/// "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g>", +/// Quad { +/// subject: NamedNode::new("http://example.com/s")?.into(), +/// predicate: NamedNode::new("http://example.com/p")?, +/// object: NamedNode::new("http://example.com/o")?.into(), +/// graph_name: NamedNode::new("http://example.com/g")?.into(), +/// }.to_string() +/// ); +/// # Result::<_,oxrdf::IriParseError>::Ok(()) +/// ``` +#[derive(Eq, PartialEq, Debug, Clone, Hash, Serialize, Deserialize)] +pub struct Quad { + /// The [subject](https://www.w3.org/TR/rdf11-concepts/#dfn-subject) of this triple. + pub subject: Subject, + + /// The [predicate](https://www.w3.org/TR/rdf11-concepts/#dfn-predicate) of this triple. + pub predicate: NamedNode, + + /// The [object](https://www.w3.org/TR/rdf11-concepts/#dfn-object) of this triple. + pub object: Term, + + /// The name of the RDF [graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) in which the triple is. + pub graph_name: GraphName, +} + +impl Quad { + /// Builds an RDF [triple](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) in an [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset). + #[inline] + pub fn new( + subject: impl Into<Subject>, + predicate: impl Into<NamedNode>, + object: impl Into<Term>, + graph_name: impl Into<GraphName>, + ) -> Self { + Self { + subject: subject.into(), + predicate: predicate.into(), + object: object.into(), + graph_name: graph_name.into(), + } + } + + #[inline] + pub fn as_ref(&self) -> QuadRef<'_> { + QuadRef { + subject: self.subject.as_ref(), + predicate: self.predicate.as_ref(), + object: self.object.as_ref(), + graph_name: self.graph_name.as_ref(), + } + } +} + +impl fmt::Display for Quad { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.as_ref().fmt(f) + } +} + +impl From<Quad> for Triple { + #[inline] + fn from(quad: Quad) -> Self { + Self { + subject: quad.subject, + predicate: quad.predicate, + object: quad.object, + } + } +} + +/// A borrowed [triple](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) in an [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset). +/// +/// The default string formatter is returning an N-Quads compatible representation: +/// ``` +/// use oxrdf::{QuadRef, NamedNodeRef}; +/// +/// assert_eq!( +/// "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g>", +/// QuadRef { +/// subject: NamedNodeRef::new("http://example.com/s")?.into(), +/// predicate: NamedNodeRef::new("http://example.com/p")?, +/// object: NamedNodeRef::new("http://example.com/o")?.into(), +/// graph_name: NamedNodeRef::new("http://example.com/g")?.into(), +/// }.to_string() +/// ); +/// # Result::<_,oxrdf::IriParseError>::Ok(()) +/// ``` +#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] +pub struct QuadRef<'a> { + /// The [subject](https://www.w3.org/TR/rdf11-concepts/#dfn-subject) of this triple. + pub subject: SubjectRef<'a>, + + /// The [predicate](https://www.w3.org/TR/rdf11-concepts/#dfn-predicate) of this triple. + pub predicate: NamedNodeRef<'a>, + + /// The [object](https://www.w3.org/TR/rdf11-concepts/#dfn-object) of this triple. + pub object: TermRef<'a>, + + /// The name of the RDF [graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) in which the triple is. + pub graph_name: GraphNameRef<'a>, +} + +impl<'a> QuadRef<'a> { + /// Builds an RDF [triple](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) in an [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset). + #[inline] + pub fn new( + subject: impl Into<SubjectRef<'a>>, + predicate: impl Into<NamedNodeRef<'a>>, + object: impl Into<TermRef<'a>>, + graph_name: impl Into<GraphNameRef<'a>>, + ) -> Self { + Self { + subject: subject.into(), + predicate: predicate.into(), + object: object.into(), + graph_name: graph_name.into(), + } + } + + #[inline] + pub fn into_owned(self) -> Quad { + Quad { + subject: self.subject.into_owned(), + predicate: self.predicate.into_owned(), + object: self.object.into_owned(), + graph_name: self.graph_name.into_owned(), + } + } +} + +impl fmt::Display for QuadRef<'_> { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.graph_name.is_default_graph() { + write!(f, "{} {} {}", self.subject, self.predicate, self.object) + } else { + write!( + f, + "{} {} {} {}", + self.subject, self.predicate, self.object, self.graph_name + ) + } + } +} + +impl<'a> From<QuadRef<'a>> for TripleRef<'a> { + #[inline] + fn from(quad: QuadRef<'a>) -> Self { + Self { + subject: quad.subject, + predicate: quad.predicate, + object: quad.object, + } + } +} + +impl<'a> From<&'a Quad> for QuadRef<'a> { + #[inline] + fn from(quad: &'a Quad) -> Self { + quad.as_ref() + } +} + +impl<'a> From<QuadRef<'a>> for Quad { + #[inline] + fn from(quad: QuadRef<'a>) -> Self { + quad.into_owned() + } +} + +/// An error return by some [`TryFrom<Term>`](TryFrom) implementations. +#[derive(Debug, Clone, thiserror::Error)] +#[error("{term} can not be converted to a {target}")] +pub struct TryFromTermError { + pub(crate) term: Term, + pub(crate) target: &'static str, +} + +impl TryFromTermError { + /// The term that can't be converted + #[inline] + pub fn into_term(self) -> Term { + self.term + } +} + +#[cfg(test)] +#[allow(clippy::panic_in_result_fn)] +mod tests { + use super::*; + + #[test] + fn triple_from_terms() -> Result<(), TryFromTermError> { + assert_eq!( + Triple::from_terms( + NamedNode::new_unchecked("http://example.com/s"), + NamedNode::new_unchecked("http://example.com/p"), + NamedNode::new_unchecked("http://example.com/o"), + )?, + Triple::new( + NamedNode::new_unchecked("http://example.com/s"), + NamedNode::new_unchecked("http://example.com/p"), + NamedNode::new_unchecked("http://example.com/o"), + ) + ); + assert_eq!( + Triple::from_terms( + Literal::new_simple_literal("foo"), + NamedNode::new_unchecked("http://example.com/p"), + NamedNode::new_unchecked("http://example.com/o"), + ) + .unwrap_err() + .into_term(), + Term::from(Literal::new_simple_literal("foo")) + ); + assert_eq!( + Triple::from_terms( + NamedNode::new_unchecked("http://example.com/s"), + Literal::new_simple_literal("foo"), + NamedNode::new_unchecked("http://example.com/o"), + ) + .unwrap_err() + .into_term(), + Term::from(Literal::new_simple_literal("foo")) + ); + Ok(()) + } +} diff --git a/ng-oxigraph/src/oxrdf/variable.rs b/ng-oxigraph/src/oxrdf/variable.rs new file mode 100644 index 0000000..c2d89ca --- /dev/null +++ b/ng-oxigraph/src/oxrdf/variable.rs @@ -0,0 +1,216 @@ +use std::cmp::Ordering; +use std::fmt; + +/// A [SPARQL query](https://www.w3.org/TR/sparql11-query/) owned variable. +/// +/// The default string formatter is returning a SPARQL compatible representation: +/// ``` +/// use oxrdf::{Variable, VariableNameParseError}; +/// +/// assert_eq!("?foo", Variable::new("foo")?.to_string()); +/// # Result::<_,VariableNameParseError>::Ok(()) +/// ``` +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] +pub struct Variable { + name: String, +} + +impl Variable { + /// Creates a variable name from a unique identifier. + /// + /// The variable identifier must be valid according to the SPARQL grammar. + pub fn new(name: impl Into<String>) -> Result<Self, VariableNameParseError> { + let name = name.into(); + validate_variable_identifier(&name)?; + Ok(Self::new_unchecked(name)) + } + + /// Creates a variable name from a unique identifier without validation. + /// + /// It is the caller's responsibility to ensure that `id` is a valid blank node identifier + /// according to the SPARQL grammar. + /// + /// [`Variable::new()`] is a safe version of this constructor and should be used for untrusted data. + #[inline] + pub fn new_unchecked(name: impl Into<String>) -> Self { + Self { name: name.into() } + } + + #[inline] + pub fn as_str(&self) -> &str { + &self.name + } + + #[inline] + pub fn into_string(self) -> String { + self.name + } + + #[inline] + pub fn as_ref(&self) -> VariableRef<'_> { + VariableRef { name: &self.name } + } +} + +impl fmt::Display for Variable { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.as_ref().fmt(f) + } +} + +/// A [SPARQL query](https://www.w3.org/TR/sparql11-query/) borrowed variable. +/// +/// The default string formatter is returning a SPARQL compatible representation: +/// ``` +/// use oxrdf::{VariableNameParseError, VariableRef}; +/// +/// assert_eq!("?foo", VariableRef::new("foo")?.to_string()); +/// # Result::<_,VariableNameParseError>::Ok(()) +/// ``` +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] +pub struct VariableRef<'a> { + name: &'a str, +} + +impl<'a> VariableRef<'a> { + /// Creates a variable name from a unique identifier. + /// + /// The variable identifier must be valid according to the SPARQL grammar. + pub fn new(name: &'a str) -> Result<Self, VariableNameParseError> { + validate_variable_identifier(name)?; + Ok(Self::new_unchecked(name)) + } + + /// Creates a variable name from a unique identifier without validation. + /// + /// It is the caller's responsibility to ensure that `id` is a valid blank node identifier + /// according to the SPARQL grammar. + /// + /// [`Variable::new()`] is a safe version of this constructor and should be used for untrusted data. + #[inline] + pub const fn new_unchecked(name: &'a str) -> Self { + Self { name } + } + + #[inline] + pub const fn as_str(self) -> &'a str { + self.name + } + + #[inline] + pub fn into_string(self) -> String { + self.name.to_owned() + } + + #[inline] + pub fn into_owned(self) -> Variable { + Variable { + name: self.name.to_owned(), + } + } +} + +impl fmt::Display for VariableRef<'_> { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "?{}", self.name) + } +} + +impl<'a> From<&'a Variable> for VariableRef<'a> { + #[inline] + fn from(variable: &'a Variable) -> Self { + variable.as_ref() + } +} + +impl<'a> From<VariableRef<'a>> for Variable { + #[inline] + fn from(variable: VariableRef<'a>) -> Self { + variable.into_owned() + } +} + +impl PartialEq<Variable> for VariableRef<'_> { + #[inline] + fn eq(&self, other: &Variable) -> bool { + *self == other.as_ref() + } +} + +impl PartialEq<VariableRef<'_>> for Variable { + #[inline] + fn eq(&self, other: &VariableRef<'_>) -> bool { + self.as_ref() == *other + } +} + +impl PartialOrd<Variable> for VariableRef<'_> { + #[inline] + fn partial_cmp(&self, other: &Variable) -> Option<Ordering> { + self.partial_cmp(&other.as_ref()) + } +} + +impl PartialOrd<VariableRef<'_>> for Variable { + #[inline] + fn partial_cmp(&self, other: &VariableRef<'_>) -> Option<Ordering> { + self.as_ref().partial_cmp(other) + } +} + +fn validate_variable_identifier(id: &str) -> Result<(), VariableNameParseError> { + let mut chars = id.chars(); + let front = chars.next().ok_or(VariableNameParseError)?; + match front { + '0'..='9' + | '_' + | ':' + | 'A'..='Z' + | 'a'..='z' + | '\u{00C0}'..='\u{00D6}' + | '\u{00D8}'..='\u{00F6}' + | '\u{00F8}'..='\u{02FF}' + | '\u{0370}'..='\u{037D}' + | '\u{037F}'..='\u{1FFF}' + | '\u{200C}'..='\u{200D}' + | '\u{2070}'..='\u{218F}' + | '\u{2C00}'..='\u{2FEF}' + | '\u{3001}'..='\u{D7FF}' + | '\u{F900}'..='\u{FDCF}' + | '\u{FDF0}'..='\u{FFFD}' + | '\u{10000}'..='\u{EFFFF}' => (), + _ => return Err(VariableNameParseError), + } + for c in chars { + match c { + '0'..='9' + | '\u{00B7}' + | '\u{0300}'..='\u{036F}' + | '\u{203F}'..='\u{2040}' + | '_' + | 'A'..='Z' + | 'a'..='z' + | '\u{00C0}'..='\u{00D6}' + | '\u{00D8}'..='\u{00F6}' + | '\u{00F8}'..='\u{02FF}' + | '\u{0370}'..='\u{037D}' + | '\u{037F}'..='\u{1FFF}' + | '\u{200C}'..='\u{200D}' + | '\u{2070}'..='\u{218F}' + | '\u{2C00}'..='\u{2FEF}' + | '\u{3001}'..='\u{D7FF}' + | '\u{F900}'..='\u{FDCF}' + | '\u{FDF0}'..='\u{FFFD}' + | '\u{10000}'..='\u{EFFFF}' => (), + _ => return Err(VariableNameParseError), + } + } + Ok(()) +} + +/// An error raised during [`Variable`] name validation. +#[derive(Debug, thiserror::Error)] +#[error("The variable name is invalid")] +pub struct VariableNameParseError; diff --git a/ng-oxigraph/src/oxrdf/vocab.rs b/ng-oxigraph/src/oxrdf/vocab.rs new file mode 100644 index 0000000..81c338b --- /dev/null +++ b/ng-oxigraph/src/oxrdf/vocab.rs @@ -0,0 +1,242 @@ +//! Provides ready to use [`NamedNodeRef`](super::NamedNodeRef)s for basic RDF vocabularies. + +pub mod rdf { + //! [RDF](https://www.w3.org/TR/rdf11-concepts/) vocabulary. + use crate::oxrdf::named_node::NamedNodeRef; + + /// The class of containers of alternatives. + pub const ALT: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#Alt"); + /// The class of unordered containers. + pub const BAG: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#Bag"); + /// The first item in the subject RDF list. + pub const FIRST: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#first"); + /// The class of HTML literal values. + pub const HTML: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML"); + /// The class of language-tagged string literal values. + pub const LANG_STRING: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"); + /// The class of RDF lists. + pub const LIST: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#List"); + /// The empty list. + pub const NIL: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil"); + /// The object of the subject RDF statement. + pub const OBJECT: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#object"); + /// The predicate of the subject RDF statement. + pub const PREDICATE: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate"); + /// The class of RDF properties. + pub const PROPERTY: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#Property"); + /// The rest of the subject RDF list after the first item. + pub const REST: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"); + /// The class of ordered containers. + pub const SEQ: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#Seq"); + /// The class of RDF statements. + pub const STATEMENT: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement"); + /// The subject of the subject RDF statement. + pub const SUBJECT: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#subject"); + /// The subject is an instance of a class. + pub const TYPE: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"); + /// Idiomatic property used for structured values. + pub const VALUE: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#value"); + /// The class of XML literal values. + pub const XML_LITERAL: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"); +} + +pub mod rdfs { + //! [RDFS](https://www.w3.org/TR/rdf-schema/) vocabulary. + use crate::oxrdf::named_node::NamedNodeRef; + + /// The class of classes. + pub const CLASS: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#Class"); + /// A description of the subject resource. + pub const COMMENT: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#comment"); + /// The class of RDF containers. + pub const CONTAINER: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#Container"); + /// The class of container membership properties, `rdf:_1`, `rdf:_2`, ..., all of which are sub-properties of `member`. + pub const CONTAINER_MEMBERSHIP_PROPERTY: NamedNodeRef<'_> = NamedNodeRef::new_unchecked( + "http://www.w3.org/2000/01/rdf-schema#ContainerMembershipProperty", + ); + /// The class of RDF datatypes. + pub const DATATYPE: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#Datatype"); + /// A domain of the subject property. + pub const DOMAIN: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#domain"); + /// The definition of the subject resource. + pub const IS_DEFINED_BY: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#isDefinedBy"); + /// A human-readable name for the subject. + pub const LABEL: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#label"); + /// The class of literal values, e.g. textual strings and integers. + pub const LITERAL: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#Literal"); + /// A member of the subject resource. + pub const MEMBER: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#member"); + /// A range of the subject property. + pub const RANGE: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#range"); + /// The class resource, everything. + pub const RESOURCE: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#Resource"); + /// Further information about the subject resource. + pub const SEE_ALSO: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#seeAlso"); + /// The subject is a subclass of a class. + pub const SUB_CLASS_OF: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#subClassOf"); + /// The subject is a subproperty of a property. + pub const SUB_PROPERTY_OF: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#subPropertyOf"); +} + +pub mod xsd { + //! [RDF compatible XSD datatypes](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-compatible-xsd-types). + use crate::oxrdf::named_node::NamedNodeRef; + + /// Absolute or relative URIs and IRIs. + pub const ANY_URI: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#anyURI"); + /// Base64-encoded binary data. + pub const BASE_64_BINARY: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#base64Binary"); + /// true, false. + pub const BOOLEAN: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#boolean"); + /// 128…+127 (8 bit). + pub const BYTE: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#byte"); + /// Dates (yyyy-mm-dd) with or without timezone. + pub const DATE: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#date"); + /// Duration of time (days, hours, minutes, seconds only). + pub const DAY_TIME_DURATION: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#dayTimeDuration"); + /// Date and time with or without timezone. + pub const DATE_TIME: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#dateTime"); + /// Date and time with required timezone. + pub const DATE_TIME_STAMP: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#dateTimeStamp"); + /// Arbitrary-precision decimal numbers. + pub const DECIMAL: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#decimal"); + /// 64-bit floating point numbers incl. ±Inf, ±0, NaN. + pub const DOUBLE: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#double"); + /// Duration of time. + pub const DURATION: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#duration"); + /// 32-bit floating point numbers incl. ±Inf, ±0, NaN. + pub const FLOAT: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#float"); + /// Gregorian calendar day of the month. + pub const G_DAY: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#gDay"); + /// Gregorian calendar month. + pub const G_MONTH: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#gMonth"); + /// Gregorian calendar month and day. + pub const G_MONTH_DAY: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#gMonthDay"); + /// Gregorian calendar year. + pub const G_YEAR: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#gYear"); + /// Gregorian calendar year and month. + pub const G_YEAR_MONTH: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#gYearMonth"); + /// Hex-encoded binary data. + pub const HEX_BINARY: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#hexBinary"); + /// -2147483648…+2147483647 (32 bit). + pub const INT: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#int"); + /// Arbitrary-size integer numbers. + pub const INTEGER: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#integer"); + /// Language tags per [BCP47](http://tools.ietf.org/html/bcp47). + pub const LANGUAGE: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#language"); + /// -9223372036854775808…+9223372036854775807 (64 bit). + pub const LONG: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#long"); + /// XML Names. + pub const NAME: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#Name"); + /// XML NCName. + pub const NC_NAME: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#NCName"); + /// Integer numbers <0. + pub const NEGATIVE_INTEGER: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#negativeInteger"); + /// XML NMTOKENs. + pub const NMTOKEN: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#NMTOKEN"); + /// Integer numbers ≥0. + pub const NON_NEGATIVE_INTEGER: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#nonNegativeInteger"); + /// Integer numbers ≤0. + pub const NON_POSITIVE_INTEGER: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#nonPositiveInteger"); + /// Whitespace-normalized strings. + pub const NORMALIZED_STRING: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#normalizedString"); + /// Integer numbers >0. + pub const POSITIVE_INTEGER: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#positiveInteger"); + /// Times (hh:mm:ss.sss…) with or without timezone. + pub const TIME: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#time"); + /// -32768…+32767 (16 bit). + pub const SHORT: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#short"); + /// Character strings (but not all Unicode character strings). + pub const STRING: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#string"); + /// Tokenized strings. + pub const TOKEN: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#token"); + /// 0…255 (8 bit). + pub const UNSIGNED_BYTE: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#unsignedByte"); + /// 0…4294967295 (32 bit). + pub const UNSIGNED_INT: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#unsignedInt"); + /// 0…18446744073709551615 (64 bit). + pub const UNSIGNED_LONG: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#unsignedLong"); + /// 0…65535 (16 bit). + pub const UNSIGNED_SHORT: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#unsignedShort"); + /// Duration of time (months and years only). + pub const YEAR_MONTH_DURATION: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#yearMonthDuration"); +} + +pub mod geosparql { + //! [GeoSpatial](https://opengeospatial.github.io/ogc-geosparql/) vocabulary. + use crate::oxrdf::named_node::NamedNodeRef; + + /// Geospatial datatype like `"Point({longitude} {latitude})"^^geo:wktLiteral` + pub const WKT_LITERAL: NamedNodeRef<'_> = + NamedNodeRef::new_unchecked("http://www.opengis.net/ont/geosparql#wktLiteral"); +} diff --git a/ng-oxigraph/src/oxrdfio/README.md b/ng-oxigraph/src/oxrdfio/README.md new file mode 100644 index 0000000..72238f8 --- /dev/null +++ b/ng-oxigraph/src/oxrdfio/README.md @@ -0,0 +1,67 @@ +OxRDF I/O +========= + +[](https://crates.io/crates/oxrdfio) +[](https://docs.rs/oxrdfio) +[](https://crates.io/crates/oxrdfio) +[](https://github.com/oxigraph/oxigraph/actions) +[](https://gitter.im/oxigraph/community) + +OxRDF I/O is a set of parsers and serializers for RDF. + +It supports: +* [N3](https://w3c.github.io/N3/spec/) using [`oxttl`](https://crates.io/crates/oxttl) +* [N-Quads](https://www.w3.org/TR/n-quads/) using [`oxttl`](https://crates.io/crates/oxttl) +* [N-Triples](https://www.w3.org/TR/n-triples/) using [`oxttl`](https://crates.io/crates/oxttl) +* [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) using [`oxrdfxml`](https://crates.io/crates/oxrdfxml) +* [TriG](https://www.w3.org/TR/trig/) using [`oxttl`](https://crates.io/crates/oxttl) +* [Turtle](https://www.w3.org/TR/turtle/) using [`oxttl`](https://crates.io/crates/oxttl) + +Support for [SPARQL-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html) is also available behind the `rdf-star`feature for [Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star), [TriG-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#trig-star), [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star) and [N-Quads-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-quads-star). + +It is designed as a low level parser compatible with both synchronous and asynchronous I/O (behind the `async-tokio` feature). + +The entry points of this library are the two [`RdfParser`] and [`RdfSerializer`] structs. + +Usage example converting a Turtle file to a N-Triples file: +```rust +use oxrdfio::{RdfFormat, RdfParser, RdfSerializer}; + +let turtle_file = b"@base <http://example.com/> . +@prefix schema: <http://schema.org/> . +<foo> a schema:Person ; + schema:name \"Foo\" . +<bar> a schema:Person ; + schema:name \"Bar\" ."; + +let ntriples_file = b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> . +<http://example.com/foo> <http://schema.org/name> \"Foo\" . +<http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> . +<http://example.com/bar> <http://schema.org/name> \"Bar\" . +"; + +let mut writer = RdfSerializer::from_format(RdfFormat::NTriples).serialize_to_write(Vec::new()); +for quad in RdfParser::from_format(RdfFormat::Turtle).parse_read(turtle_file.as_ref()) { + writer.write_quad(&quad.unwrap()).unwrap(); +} +assert_eq!(writer.finish().unwrap(), ntriples_file); +``` + +Parsers for other RDF formats exists in Rust like [graph-rdfa-processor](https://github.com/nbittich/graph-rdfa-processor) for RDFa and [json-ld](https://github.com/timothee-haudebourg/json-ld) for JSON-LD. + + +## License + +This project is licensed under either of + +* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or + `<http://www.apache.org/licenses/LICENSE-2.0>`) +* MIT license ([LICENSE-MIT](../LICENSE-MIT) or + `<http://opensource.org/licenses/MIT>`) + +at your option. + + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. diff --git a/ng-oxigraph/src/oxrdfio/error.rs b/ng-oxigraph/src/oxrdfio/error.rs new file mode 100644 index 0000000..af7f6c6 --- /dev/null +++ b/ng-oxigraph/src/oxrdfio/error.rs @@ -0,0 +1,124 @@ +use crate::oxrdfxml; +use crate::oxttl; +use std::io; +use std::ops::Range; + +/// Error returned during RDF format parsing. +#[derive(Debug, thiserror::Error)] +pub enum RdfParseError { + /// I/O error during parsing (file not found...). + #[error(transparent)] + Io(#[from] io::Error), + /// An error in the file syntax. + #[error(transparent)] + Syntax(#[from] RdfSyntaxError), +} + +impl RdfParseError { + pub(crate) fn msg(msg: &'static str) -> Self { + Self::Syntax(RdfSyntaxError(SyntaxErrorKind::Msg(msg))) + } +} + +impl From<oxttl::TurtleSyntaxError> for RdfSyntaxError { + #[inline] + fn from(error: oxttl::TurtleSyntaxError) -> Self { + Self(SyntaxErrorKind::Turtle(error)) + } +} + +impl From<oxttl::TurtleParseError> for RdfParseError { + #[inline] + fn from(error: oxttl::TurtleParseError) -> Self { + match error { + oxttl::TurtleParseError::Syntax(e) => Self::Syntax(e.into()), + oxttl::TurtleParseError::Io(e) => Self::Io(e), + } + } +} + +impl From<oxrdfxml::RdfXmlSyntaxError> for RdfSyntaxError { + #[inline] + fn from(error: oxrdfxml::RdfXmlSyntaxError) -> Self { + Self(SyntaxErrorKind::RdfXml(error)) + } +} + +impl From<oxrdfxml::RdfXmlParseError> for RdfParseError { + #[inline] + fn from(error: oxrdfxml::RdfXmlParseError) -> Self { + match error { + oxrdfxml::RdfXmlParseError::Syntax(e) => Self::Syntax(e.into()), + oxrdfxml::RdfXmlParseError::Io(e) => Self::Io(e), + } + } +} + +impl From<RdfParseError> for io::Error { + #[inline] + fn from(error: RdfParseError) -> Self { + match error { + RdfParseError::Io(error) => error, + RdfParseError::Syntax(error) => error.into(), + } + } +} + +/// An error in the syntax of the parsed file. +#[derive(Debug, thiserror::Error)] +#[error(transparent)] +pub struct RdfSyntaxError(#[from] SyntaxErrorKind); + +/// An error in the syntax of the parsed file. +#[derive(Debug, thiserror::Error)] +enum SyntaxErrorKind { + #[error(transparent)] + Turtle(#[from] oxttl::TurtleSyntaxError), + #[error(transparent)] + RdfXml(#[from] oxrdfxml::RdfXmlSyntaxError), + #[error("{0}")] + Msg(&'static str), +} + +impl RdfSyntaxError { + /// The location of the error inside of the file. + #[inline] + pub fn location(&self) -> Option<Range<TextPosition>> { + match &self.0 { + SyntaxErrorKind::Turtle(e) => { + let location = e.location(); + Some( + TextPosition { + line: location.start.line, + column: location.start.column, + offset: location.start.offset, + }..TextPosition { + line: location.end.line, + column: location.end.column, + offset: location.end.offset, + }, + ) + } + SyntaxErrorKind::RdfXml(_) | SyntaxErrorKind::Msg(_) => None, + } + } +} + +impl From<RdfSyntaxError> for io::Error { + #[inline] + fn from(error: RdfSyntaxError) -> Self { + match error.0 { + SyntaxErrorKind::Turtle(error) => error.into(), + SyntaxErrorKind::RdfXml(error) => error.into(), + SyntaxErrorKind::Msg(msg) => Self::new(io::ErrorKind::InvalidData, msg), + } + } +} + +/// A position in a text i.e. a `line` number starting from 0, a `column` number starting from 0 (in number of code points) and a global file `offset` starting from 0 (in number of bytes). +#[derive(Eq, PartialEq, Debug, Clone, Copy)] +pub struct TextPosition { + pub line: u64, + pub column: u64, + pub offset: u64, +} diff --git a/ng-oxigraph/src/oxrdfio/format.rs b/ng-oxigraph/src/oxrdfio/format.rs new file mode 100644 index 0000000..1cc6aa1 --- /dev/null +++ b/ng-oxigraph/src/oxrdfio/format.rs @@ -0,0 +1,216 @@ +use std::fmt; + +/// RDF serialization formats. +/// +/// This enumeration is non exhaustive. New formats like JSON-LD might be added in the future. +#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] +#[non_exhaustive] +pub enum RdfFormat { + /// [N3](https://w3c.github.io/N3/spec/) + N3, + /// [N-Quads](https://www.w3.org/TR/n-quads/) + NQuads, + /// [N-Triples](https://www.w3.org/TR/n-triples/) + NTriples, + /// [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) + RdfXml, + /// [TriG](https://www.w3.org/TR/trig/) + TriG, + /// [Turtle](https://www.w3.org/TR/turtle/) + Turtle, +} + +impl RdfFormat { + /// The format canonical IRI according to the [Unique URIs for file formats registry](https://www.w3.org/ns/formats/). + /// + /// ``` + /// use oxrdfio::RdfFormat; + /// + /// assert_eq!( + /// RdfFormat::NTriples.iri(), + /// "http://www.w3.org/ns/formats/N-Triples" + /// ) + /// ``` + #[inline] + pub const fn iri(self) -> &'static str { + match self { + Self::N3 => "http://www.w3.org/ns/formats/N3", + Self::NQuads => "http://www.w3.org/ns/formats/N-Quads", + Self::NTriples => "http://www.w3.org/ns/formats/N-Triples", + Self::RdfXml => "http://www.w3.org/ns/formats/RDF_XML", + Self::TriG => "http://www.w3.org/ns/formats/TriG", + Self::Turtle => "http://www.w3.org/ns/formats/Turtle", + } + } + + /// The format [IANA media type](https://tools.ietf.org/html/rfc2046). + /// + /// ``` + /// use oxrdfio::RdfFormat; + /// + /// assert_eq!(RdfFormat::NTriples.media_type(), "application/n-triples") + /// ``` + #[inline] + pub const fn media_type(self) -> &'static str { + match self { + Self::N3 => "text/n3", + Self::NQuads => "application/n-quads", + Self::NTriples => "application/n-triples", + Self::RdfXml => "application/rdf+xml", + Self::TriG => "application/trig", + Self::Turtle => "text/turtle", + } + } + + /// The format [IANA-registered](https://tools.ietf.org/html/rfc2046) file extension. + /// + /// ``` + /// use oxrdfio::RdfFormat; + /// + /// assert_eq!(RdfFormat::NTriples.file_extension(), "nt") + /// ``` + #[inline] + pub const fn file_extension(self) -> &'static str { + match self { + Self::N3 => "n3", + Self::NQuads => "nq", + Self::NTriples => "nt", + Self::RdfXml => "rdf", + Self::TriG => "trig", + Self::Turtle => "ttl", + } + } + + /// The format name. + /// + /// ``` + /// use oxrdfio::RdfFormat; + /// + /// assert_eq!(RdfFormat::NTriples.name(), "N-Triples") + /// ``` + #[inline] + pub const fn name(self) -> &'static str { + match self { + Self::N3 => "N3", + Self::NQuads => "N-Quads", + Self::NTriples => "N-Triples", + Self::RdfXml => "RDF/XML", + Self::TriG => "TriG", + Self::Turtle => "Turtle", + } + } + + /// Checks if the formats supports [RDF datasets](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) and not only [RDF graphs](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph). + /// + /// ``` + /// use oxrdfio::RdfFormat; + /// + /// assert_eq!(RdfFormat::NTriples.supports_datasets(), false); + /// assert_eq!(RdfFormat::NQuads.supports_datasets(), true); + /// ``` + #[inline] + pub const fn supports_datasets(self) -> bool { + matches!(self, Self::NQuads | Self::TriG) + } + + /// Checks if the formats supports [RDF-star quoted triples](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#dfn-quoted). + /// + /// ``` + /// use oxrdfio::RdfFormat; + /// + /// assert_eq!(RdfFormat::NTriples.supports_rdf_star(), true); + /// assert_eq!(RdfFormat::RdfXml.supports_rdf_star(), false); + /// ``` + #[inline] + #[cfg(feature = "rdf-star")] + pub const fn supports_rdf_star(self) -> bool { + matches!( + self, + Self::NTriples | Self::NQuads | Self::Turtle | Self::TriG + ) + } + + /// Looks for a known format from a media type. + /// + /// It supports some media type aliases. + /// For example, "application/xml" is going to return `RdfFormat::RdfXml` even if it is not its canonical media type. + /// + /// Example: + /// ``` + /// use oxrdfio::RdfFormat; + /// + /// assert_eq!( + /// RdfFormat::from_media_type("text/turtle; charset=utf-8"), + /// Some(RdfFormat::Turtle) + /// ) + /// ``` + #[inline] + pub fn from_media_type(media_type: &str) -> Option<Self> { + const MEDIA_SUBTYPES: [(&str, RdfFormat); 10] = [ + ("n-quads", RdfFormat::NQuads), + ("n-triples", RdfFormat::NTriples), + ("n3", RdfFormat::N3), + ("nquads", RdfFormat::NQuads), + ("ntriples", RdfFormat::NTriples), + ("plain", RdfFormat::NTriples), + ("rdf+xml", RdfFormat::RdfXml), + ("trig", RdfFormat::TriG), + ("turtle", RdfFormat::Turtle), + ("xml", RdfFormat::RdfXml), + ]; + + let (r#type, subtype) = media_type + .split_once(';') + .unwrap_or((media_type, "")) + .0 + .split_once('/')?; + let r#type = r#type.trim(); + if !r#type.eq_ignore_ascii_case("application") && !r#type.eq_ignore_ascii_case("text") { + return None; + } + let subtype = subtype.trim(); + let subtype = subtype.strip_prefix("x-").unwrap_or(subtype); + for (candidate_subtype, candidate_id) in MEDIA_SUBTYPES { + if candidate_subtype.eq_ignore_ascii_case(subtype) { + return Some(candidate_id); + } + } + None + } + + /// Looks for a known format from an extension. + /// + /// It supports some aliases. + /// + /// Example: + /// ``` + /// use oxrdfio::RdfFormat; + /// + /// assert_eq!(RdfFormat::from_extension("nt"), Some(RdfFormat::NTriples)) + /// ``` + #[inline] + pub fn from_extension(extension: &str) -> Option<Self> { + const MEDIA_TYPES: [(&str, RdfFormat); 8] = [ + ("n3", RdfFormat::N3), + ("nq", RdfFormat::NQuads), + ("nt", RdfFormat::NTriples), + ("rdf", RdfFormat::RdfXml), + ("trig", RdfFormat::TriG), + ("ttl", RdfFormat::Turtle), + ("txt", RdfFormat::NTriples), + ("xml", RdfFormat::RdfXml), + ]; + for (candidate_extension, candidate_id) in MEDIA_TYPES { + if candidate_extension.eq_ignore_ascii_case(extension) { + return Some(candidate_id); + } + } + None + } +} + +impl fmt::Display for RdfFormat { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(self.name()) + } +} diff --git a/ng-oxigraph/src/oxrdfio/mod.rs b/ng-oxigraph/src/oxrdfio/mod.rs new file mode 100644 index 0000000..5b84cd0 --- /dev/null +++ b/ng-oxigraph/src/oxrdfio/mod.rs @@ -0,0 +1,9 @@ +mod error; +mod format; +mod parser; +mod serializer; + +pub use error::{RdfParseError, RdfSyntaxError, TextPosition}; +pub use format::RdfFormat; +pub use parser::{FromReadQuadReader, RdfParser}; +pub use serializer::{RdfSerializer, ToWriteQuadWriter}; diff --git a/ng-oxigraph/src/oxrdfio/parser.rs b/ng-oxigraph/src/oxrdfio/parser.rs new file mode 100644 index 0000000..7de95dd --- /dev/null +++ b/ng-oxigraph/src/oxrdfio/parser.rs @@ -0,0 +1,795 @@ +//! Utilities to read RDF graphs and datasets. + +use crate::oxrdf::{BlankNode, GraphName, IriParseError, Quad, Subject, Term, Triple}; +pub use crate::oxrdfio::error::RdfParseError; +use crate::oxrdfio::format::RdfFormat; +use crate::oxrdfxml::{FromReadRdfXmlReader, RdfXmlParser}; +use crate::oxttl::n3::{FromReadN3Reader, N3Parser, N3PrefixesIter, N3Quad, N3Term}; +use crate::oxttl::nquads::{FromReadNQuadsReader, NQuadsParser}; +use crate::oxttl::ntriples::{FromReadNTriplesReader, NTriplesParser}; +use crate::oxttl::trig::{FromReadTriGReader, TriGParser, TriGPrefixesIter}; +use crate::oxttl::turtle::{FromReadTurtleReader, TurtleParser, TurtlePrefixesIter}; +use std::collections::HashMap; +use std::io::Read; +#[cfg(feature = "async-tokio")] +use tokio::io::AsyncRead; + +/// Parsers for RDF serialization formats. +/// +/// It currently supports the following formats: +/// * [N3](https://w3c.github.io/N3/spec/) ([`RdfFormat::N3`]) +/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`RdfFormat::NQuads`]) +/// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`RdfFormat::NTriples`]) +/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`RdfFormat::RdfXml`]) +/// * [TriG](https://www.w3.org/TR/trig/) ([`RdfFormat::TriG`]) +/// * [Turtle](https://www.w3.org/TR/turtle/) ([`RdfFormat::Turtle`]) +/// +/// Note the useful options: +/// - [`with_base_iri`](Self::with_base_iri) to resolve the relative IRIs. +/// - [`rename_blank_nodes`](Self::rename_blank_nodes) to rename the blank nodes to auto-generated numbers to avoid conflicts when merging RDF graphs together. +/// - [`without_named_graphs`](Self::without_named_graphs) to parse a single graph. +/// - [`unchecked`](Self::unchecked) to skip some validations if the file is already known to be valid. +/// +/// ``` +/// use oxrdfio::{RdfFormat, RdfParser}; +/// +/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> ."; +/// +/// let parser = RdfParser::from_format(RdfFormat::NTriples); +/// let quads = parser +/// .parse_read(file.as_bytes()) +/// .collect::<Result<Vec<_>, _>>()?; +/// +/// assert_eq!(quads.len(), 1); +/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>"); +/// # std::io::Result::Ok(()) +/// ``` +#[must_use] +pub struct RdfParser { + inner: RdfParserKind, + default_graph: GraphName, + without_named_graphs: bool, + rename_blank_nodes: bool, +} + +enum RdfParserKind { + N3(N3Parser), + NQuads(NQuadsParser), + NTriples(NTriplesParser), + RdfXml(RdfXmlParser), + TriG(TriGParser), + Turtle(TurtleParser), +} + +impl RdfParser { + /// Builds a parser for the given format. + #[inline] + pub fn from_format(format: RdfFormat) -> Self { + Self { + inner: match format { + RdfFormat::N3 => RdfParserKind::N3(N3Parser::new()), + RdfFormat::NQuads => RdfParserKind::NQuads({ + #[cfg(feature = "rdf-star")] + { + NQuadsParser::new().with_quoted_triples() + } + #[cfg(not(feature = "rdf-star"))] + { + NQuadsParser::new() + } + }), + RdfFormat::NTriples => RdfParserKind::NTriples({ + #[cfg(feature = "rdf-star")] + { + NTriplesParser::new().with_quoted_triples() + } + #[cfg(not(feature = "rdf-star"))] + { + NTriplesParser::new() + } + }), + RdfFormat::RdfXml => RdfParserKind::RdfXml(RdfXmlParser::new()), + RdfFormat::TriG => RdfParserKind::TriG({ + #[cfg(feature = "rdf-star")] + { + TriGParser::new().with_quoted_triples() + } + #[cfg(not(feature = "rdf-star"))] + { + TriGParser::new() + } + }), + RdfFormat::Turtle => RdfParserKind::Turtle({ + #[cfg(feature = "rdf-star")] + { + TurtleParser::new().with_quoted_triples() + } + #[cfg(not(feature = "rdf-star"))] + { + TurtleParser::new() + } + }), + }, + default_graph: GraphName::DefaultGraph, + without_named_graphs: false, + rename_blank_nodes: false, + } + } + + /// The format the parser uses. + /// + /// ``` + /// use oxrdfio::{RdfFormat, RdfParser}; + /// + /// assert_eq!( + /// RdfParser::from_format(RdfFormat::Turtle).format(), + /// RdfFormat::Turtle + /// ); + /// ``` + pub fn format(&self) -> RdfFormat { + match &self.inner { + RdfParserKind::N3(_) => RdfFormat::N3, + RdfParserKind::NQuads(_) => RdfFormat::NQuads, + RdfParserKind::NTriples(_) => RdfFormat::NTriples, + RdfParserKind::RdfXml(_) => RdfFormat::RdfXml, + RdfParserKind::TriG(_) => RdfFormat::TriG, + RdfParserKind::Turtle(_) => RdfFormat::Turtle, + } + } + + /// Provides an IRI that could be used to resolve the file relative IRIs. + /// + /// ``` + /// use oxrdfio::{RdfFormat, RdfParser}; + /// + /// let file = "</s> </p> </o> ."; + /// + /// let parser = RdfParser::from_format(RdfFormat::Turtle).with_base_iri("http://example.com")?; + /// let quads = parser + /// .parse_read(file.as_bytes()) + /// .collect::<Result<Vec<_>, _>>()?; + /// + /// assert_eq!(quads.len(), 1); + /// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>"); + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + #[inline] + pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> { + self.inner = match self.inner { + RdfParserKind::N3(p) => RdfParserKind::N3(p), + RdfParserKind::NTriples(p) => RdfParserKind::NTriples(p), + RdfParserKind::NQuads(p) => RdfParserKind::NQuads(p), + RdfParserKind::RdfXml(p) => RdfParserKind::RdfXml(p.with_base_iri(base_iri)?), + RdfParserKind::TriG(p) => RdfParserKind::TriG(p.with_base_iri(base_iri)?), + RdfParserKind::Turtle(p) => RdfParserKind::Turtle(p.with_base_iri(base_iri)?), + }; + Ok(self) + } + + /// Provides the name graph name that should replace the default graph in the returned quads. + /// + /// ``` + /// use oxrdf::NamedNode; + /// use oxrdfio::{RdfFormat, RdfParser}; + /// + /// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> ."; + /// + /// let parser = RdfParser::from_format(RdfFormat::Turtle) + /// .with_default_graph(NamedNode::new("http://example.com/g")?); + /// let quads = parser + /// .parse_read(file.as_bytes()) + /// .collect::<Result<Vec<_>, _>>()?; + /// + /// assert_eq!(quads.len(), 1); + /// assert_eq!(quads[0].graph_name.to_string(), "<http://example.com/g>"); + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + #[inline] + pub fn with_default_graph(mut self, default_graph: impl Into<GraphName>) -> Self { + self.default_graph = default_graph.into(); + self + } + + /// Sets that the parser must fail if parsing a named graph. + /// + /// This function restricts the parser to only parse a single [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) and not an [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset). + /// + /// ``` + /// use oxrdfio::{RdfFormat, RdfParser}; + /// + /// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> ."; + /// + /// let parser = RdfParser::from_format(RdfFormat::NQuads).without_named_graphs(); + /// assert!(parser.parse_read(file.as_bytes()).next().unwrap().is_err()); + /// ``` + #[inline] + pub fn without_named_graphs(mut self) -> Self { + self.without_named_graphs = true; + self + } + + /// Renames the blank nodes ids from the ones set in the serialization to random ids. + /// + /// This allows to avoid id conflicts when merging graphs together. + /// + /// ``` + /// use oxrdfio::{RdfFormat, RdfParser}; + /// + /// let file = "_:a <http://example.com/p> <http://example.com/o> ."; + /// + /// let result1 = RdfParser::from_format(RdfFormat::NQuads) + /// .rename_blank_nodes() + /// .parse_read(file.as_bytes()) + /// .collect::<Result<Vec<_>, _>>()?; + /// let result2 = RdfParser::from_format(RdfFormat::NQuads) + /// .rename_blank_nodes() + /// .parse_read(file.as_bytes()) + /// .collect::<Result<Vec<_>, _>>()?; + /// assert_ne!(result1, result2); + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + #[inline] + pub fn rename_blank_nodes(mut self) -> Self { + self.rename_blank_nodes = true; + self + } + + /// Assumes the file is valid to make parsing faster. + /// + /// It will skip some validations. + /// + /// Note that if the file is actually not valid, then broken RDF might be emitted by the parser. + #[inline] + pub fn unchecked(mut self) -> Self { + self.inner = match self.inner { + RdfParserKind::N3(p) => RdfParserKind::N3(p.unchecked()), + RdfParserKind::NTriples(p) => RdfParserKind::NTriples(p.unchecked()), + RdfParserKind::NQuads(p) => RdfParserKind::NQuads(p.unchecked()), + RdfParserKind::RdfXml(p) => RdfParserKind::RdfXml(p.unchecked()), + RdfParserKind::TriG(p) => RdfParserKind::TriG(p.unchecked()), + RdfParserKind::Turtle(p) => RdfParserKind::Turtle(p.unchecked()), + }; + self + } + + /// Parses from a [`Read`] implementation and returns an iterator of quads. + /// + /// Reads are buffered. + /// + /// ``` + /// use oxrdfio::{RdfFormat, RdfParser}; + /// + /// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> ."; + /// + /// let parser = RdfParser::from_format(RdfFormat::NTriples); + /// let quads = parser + /// .parse_read(file.as_bytes()) + /// .collect::<Result<Vec<_>, _>>()?; + /// + /// assert_eq!(quads.len(), 1); + /// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>"); + /// # std::io::Result::Ok(()) + /// ``` + pub fn parse_read<R: Read>(self, reader: R) -> FromReadQuadReader<R> { + FromReadQuadReader { + parser: match self.inner { + RdfParserKind::N3(p) => FromReadQuadReaderKind::N3(p.parse_read(reader)), + RdfParserKind::NQuads(p) => FromReadQuadReaderKind::NQuads(p.parse_read(reader)), + RdfParserKind::NTriples(p) => { + FromReadQuadReaderKind::NTriples(p.parse_read(reader)) + } + RdfParserKind::RdfXml(p) => FromReadQuadReaderKind::RdfXml(p.parse_read(reader)), + RdfParserKind::TriG(p) => FromReadQuadReaderKind::TriG(p.parse_read(reader)), + RdfParserKind::Turtle(p) => FromReadQuadReaderKind::Turtle(p.parse_read(reader)), + }, + mapper: QuadMapper { + default_graph: self.default_graph.clone(), + without_named_graphs: self.without_named_graphs, + blank_node_map: self.rename_blank_nodes.then(HashMap::new), + }, + } + } + + /// Parses from a Tokio [`AsyncRead`] implementation and returns an async iterator of quads. + /// + /// Reads are buffered. + /// + /// ``` + /// use oxrdfio::{RdfFormat, RdfParser}; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> Result<(), oxrdfio::RdfParseError> { + /// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> ."; + /// + /// let parser = RdfParser::from_format(RdfFormat::NTriples); + /// let mut reader = parser.parse_tokio_async_read(file.as_bytes()); + /// if let Some(quad) = reader.next().await { + /// assert_eq!(quad?.subject.to_string(), "<http://example.com/s>"); + /// } + /// # Ok(()) + /// # } + /// ``` + #[cfg(feature = "async-tokio")] + pub fn parse_tokio_async_read<R: AsyncRead + Unpin>( + self, + reader: R, + ) -> FromTokioAsyncReadQuadReader<R> { + FromTokioAsyncReadQuadReader { + parser: match self.inner { + RdfParserKind::N3(p) => { + FromTokioAsyncReadQuadReaderKind::N3(p.parse_tokio_async_read(reader)) + } + RdfParserKind::NQuads(p) => { + FromTokioAsyncReadQuadReaderKind::NQuads(p.parse_tokio_async_read(reader)) + } + RdfParserKind::NTriples(p) => { + FromTokioAsyncReadQuadReaderKind::NTriples(p.parse_tokio_async_read(reader)) + } + RdfParserKind::RdfXml(p) => { + FromTokioAsyncReadQuadReaderKind::RdfXml(p.parse_tokio_async_read(reader)) + } + RdfParserKind::TriG(p) => { + FromTokioAsyncReadQuadReaderKind::TriG(p.parse_tokio_async_read(reader)) + } + RdfParserKind::Turtle(p) => { + FromTokioAsyncReadQuadReaderKind::Turtle(p.parse_tokio_async_read(reader)) + } + }, + mapper: QuadMapper { + default_graph: self.default_graph.clone(), + without_named_graphs: self.without_named_graphs, + blank_node_map: self.rename_blank_nodes.then(HashMap::new), + }, + } + } +} + +impl From<RdfFormat> for RdfParser { + fn from(format: RdfFormat) -> Self { + Self::from_format(format) + } +} + +/// Parses a RDF file from a [`Read`] implementation. Can be built using [`RdfParser::parse_read`]. +/// +/// Reads are buffered. +/// +/// ``` +/// use oxrdfio::{RdfFormat, RdfParser}; +/// +/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> ."; +/// +/// let parser = RdfParser::from_format(RdfFormat::NTriples); +/// let quads = parser +/// .parse_read(file.as_bytes()) +/// .collect::<Result<Vec<_>, _>>()?; +/// +/// assert_eq!(quads.len(), 1); +/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>"); +/// # std::io::Result::Ok(()) +/// ``` +#[must_use] +pub struct FromReadQuadReader<R: Read> { + parser: FromReadQuadReaderKind<R>, + mapper: QuadMapper, +} + +enum FromReadQuadReaderKind<R: Read> { + N3(FromReadN3Reader<R>), + NQuads(FromReadNQuadsReader<R>), + NTriples(FromReadNTriplesReader<R>), + RdfXml(FromReadRdfXmlReader<R>), + TriG(FromReadTriGReader<R>), + Turtle(FromReadTurtleReader<R>), +} + +impl<R: Read> Iterator for FromReadQuadReader<R> { + type Item = Result<Quad, RdfParseError>; + + fn next(&mut self) -> Option<Self::Item> { + Some(match &mut self.parser { + FromReadQuadReaderKind::N3(parser) => match parser.next()? { + Ok(quad) => self.mapper.map_n3_quad(quad), + Err(e) => Err(e.into()), + }, + FromReadQuadReaderKind::NQuads(parser) => match parser.next()? { + Ok(quad) => self.mapper.map_quad(quad), + Err(e) => Err(e.into()), + }, + FromReadQuadReaderKind::NTriples(parser) => match parser.next()? { + Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), + Err(e) => Err(e.into()), + }, + FromReadQuadReaderKind::RdfXml(parser) => match parser.next()? { + Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), + Err(e) => Err(e.into()), + }, + FromReadQuadReaderKind::TriG(parser) => match parser.next()? { + Ok(quad) => self.mapper.map_quad(quad), + Err(e) => Err(e.into()), + }, + FromReadQuadReaderKind::Turtle(parser) => match parser.next()? { + Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), + Err(e) => Err(e.into()), + }, + }) + } +} + +impl<R: Read> FromReadQuadReader<R> { + /// The list of IRI prefixes considered at the current step of the parsing. + /// + /// This method returns (prefix name, prefix value) tuples. + /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered. + /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned). + /// + /// An empty iterator is return if the format does not support prefixes. + /// + /// ``` + /// use oxrdfio::{RdfFormat, RdfParser}; + /// + /// let file = br#"@base <http://example.com/> . + /// @prefix schema: <http://schema.org/> . + /// <foo> a schema:Person ; + /// schema:name "Foo" ."#; + /// + /// let mut reader = RdfParser::from_format(RdfFormat::Turtle).parse_read(file.as_slice()); + /// assert!(reader.prefixes().collect::<Vec<_>>().is_empty()); // No prefix at the beginning + /// + /// reader.next().unwrap()?; // We read the first triple + /// assert_eq!( + /// reader.prefixes().collect::<Vec<_>>(), + /// [("schema", "http://schema.org/")] + /// ); // There are now prefixes + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + pub fn prefixes(&self) -> PrefixesIter<'_> { + PrefixesIter { + inner: match &self.parser { + FromReadQuadReaderKind::N3(p) => PrefixesIterKind::N3(p.prefixes()), + FromReadQuadReaderKind::TriG(p) => PrefixesIterKind::TriG(p.prefixes()), + FromReadQuadReaderKind::Turtle(p) => PrefixesIterKind::Turtle(p.prefixes()), + FromReadQuadReaderKind::NQuads(_) + | FromReadQuadReaderKind::NTriples(_) + | FromReadQuadReaderKind::RdfXml(_) => PrefixesIterKind::None, /* TODO: implement for RDF/XML */ + }, + } + } + + /// The base IRI considered at the current step of the parsing. + /// + /// `None` is returned if no base IRI is set or the format does not support base IRIs. + /// + /// ``` + /// use oxrdfio::{RdfFormat, RdfParser}; + /// + /// let file = br#"@base <http://example.com/> . + /// @prefix schema: <http://schema.org/> . + /// <foo> a schema:Person ; + /// schema:name "Foo" ."#; + /// + /// let mut reader = RdfParser::from_format(RdfFormat::Turtle).parse_read(file.as_slice()); + /// assert!(reader.base_iri().is_none()); // No base at the beginning because none has been given to the parser. + /// + /// reader.next().unwrap()?; // We read the first triple + /// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI. + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + pub fn base_iri(&self) -> Option<&str> { + match &self.parser { + FromReadQuadReaderKind::N3(p) => p.base_iri(), + FromReadQuadReaderKind::TriG(p) => p.base_iri(), + FromReadQuadReaderKind::Turtle(p) => p.base_iri(), + FromReadQuadReaderKind::NQuads(_) + | FromReadQuadReaderKind::NTriples(_) + | FromReadQuadReaderKind::RdfXml(_) => None, // TODO: implement for RDF/XML + } + } +} + +/// Parses a RDF file from a Tokio [`AsyncRead`] implementation. Can be built using [`RdfParser::parse_tokio_async_read`]. +/// +/// Reads are buffered. +/// +/// ``` +/// use oxrdfio::{RdfFormat, RdfParser}; +/// +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() -> Result<(), oxrdfio::RdfParseError> { +/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> ."; +/// +/// let parser = RdfParser::from_format(RdfFormat::NTriples); +/// let mut reader = parser.parse_tokio_async_read(file.as_bytes()); +/// if let Some(quad) = reader.next().await { +/// assert_eq!(quad?.subject.to_string(), "<http://example.com/s>"); +/// } +/// # Ok(()) +/// # } +/// ``` +#[must_use] +#[cfg(feature = "async-tokio")] +pub struct FromTokioAsyncReadQuadReader<R: AsyncRead + Unpin> { + parser: FromTokioAsyncReadQuadReaderKind<R>, + mapper: QuadMapper, +} + +#[cfg(feature = "async-tokio")] +enum FromTokioAsyncReadQuadReaderKind<R: AsyncRead + Unpin> { + N3(FromTokioAsyncReadN3Reader<R>), + NQuads(FromTokioAsyncReadNQuadsReader<R>), + NTriples(FromTokioAsyncReadNTriplesReader<R>), + RdfXml(FromTokioAsyncReadRdfXmlReader<R>), + TriG(FromTokioAsyncReadTriGReader<R>), + Turtle(FromTokioAsyncReadTurtleReader<R>), +} + +#[cfg(feature = "async-tokio")] +impl<R: AsyncRead + Unpin> FromTokioAsyncReadQuadReader<R> { + pub async fn next(&mut self) -> Option<Result<Quad, RdfParseError>> { + Some(match &mut self.parser { + FromTokioAsyncReadQuadReaderKind::N3(parser) => match parser.next().await? { + Ok(quad) => self.mapper.map_n3_quad(quad), + Err(e) => Err(e.into()), + }, + FromTokioAsyncReadQuadReaderKind::NQuads(parser) => match parser.next().await? { + Ok(quad) => self.mapper.map_quad(quad), + Err(e) => Err(e.into()), + }, + FromTokioAsyncReadQuadReaderKind::NTriples(parser) => match parser.next().await? { + Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), + Err(e) => Err(e.into()), + }, + FromTokioAsyncReadQuadReaderKind::RdfXml(parser) => match parser.next().await? { + Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), + Err(e) => Err(e.into()), + }, + FromTokioAsyncReadQuadReaderKind::TriG(parser) => match parser.next().await? { + Ok(quad) => self.mapper.map_quad(quad), + Err(e) => Err(e.into()), + }, + FromTokioAsyncReadQuadReaderKind::Turtle(parser) => match parser.next().await? { + Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), + Err(e) => Err(e.into()), + }, + }) + } + + /// The list of IRI prefixes considered at the current step of the parsing. + /// + /// This method returns (prefix name, prefix value) tuples. + /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered. + /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned). + /// + /// An empty iterator is return if the format does not support prefixes. + /// + /// ``` + /// use oxrdfio::{RdfFormat, RdfParser}; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> Result<(), oxttl::TurtleParseError> { + /// let file = br#"@base <http://example.com/> . + /// @prefix schema: <http://schema.org/> . + /// <foo> a schema:Person ; + /// schema:name "Foo" ."#; + /// + /// let mut reader = RdfParser::from_format(RdfFormat::Turtle).parse_read(file.as_slice()); + /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning + /// + /// reader.next().await.unwrap()?; // We read the first triple + /// assert_eq!( + /// reader.prefixes().collect::<Vec<_>>(), + /// [("schema", "http://schema.org/")] + /// ); // There are now prefixes + /// # Ok(()) + /// # } + /// ``` + pub fn prefixes(&self) -> PrefixesIter<'_> { + PrefixesIter { + inner: match &self.parser { + FromTokioAsyncReadQuadReaderKind::N3(p) => PrefixesIterKind::N3(p.prefixes()), + FromTokioAsyncReadQuadReaderKind::TriG(p) => PrefixesIterKind::TriG(p.prefixes()), + FromTokioAsyncReadQuadReaderKind::Turtle(p) => { + PrefixesIterKind::Turtle(p.prefixes()) + } + FromTokioAsyncReadQuadReaderKind::NQuads(_) + | FromTokioAsyncReadQuadReaderKind::NTriples(_) + | FromTokioAsyncReadQuadReaderKind::RdfXml(_) => PrefixesIterKind::None, /* TODO: implement for RDF/XML */ + }, + } + } + + /// The base IRI considered at the current step of the parsing. + /// + /// `None` is returned if no base IRI is set or the format does not support base IRIs. + /// + /// ``` + /// use oxrdfio::{RdfFormat, RdfParser}; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> Result<(), oxttl::TurtleParseError> { + /// let file = br#"@base <http://example.com/> . + /// @prefix schema: <http://schema.org/> . + /// <foo> a schema:Person ; + /// schema:name "Foo" ."#; + /// + /// let mut reader = + /// RdfParser::from_format(RdfFormat::Turtle).parse_tokio_async_read(file.as_slice()); + /// assert!(reader.base_iri().is_none()); // No base IRI at the beginning + /// + /// reader.next().await.unwrap()?; // We read the first triple + /// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI + /// # Ok(()) + /// # } + /// ``` + pub fn base_iri(&self) -> Option<&str> { + match &self.parser { + FromTokioAsyncReadQuadReaderKind::N3(p) => p.base_iri(), + FromTokioAsyncReadQuadReaderKind::TriG(p) => p.base_iri(), + FromTokioAsyncReadQuadReaderKind::Turtle(p) => p.base_iri(), + FromTokioAsyncReadQuadReaderKind::NQuads(_) + | FromTokioAsyncReadQuadReaderKind::NTriples(_) + | FromTokioAsyncReadQuadReaderKind::RdfXml(_) => None, // TODO: implement for RDF/XML + } + } +} + +/// Iterator on the file prefixes. +/// +/// See [`FromReadQuadReader::prefixes`]. +pub struct PrefixesIter<'a> { + inner: PrefixesIterKind<'a>, +} + +enum PrefixesIterKind<'a> { + Turtle(TurtlePrefixesIter<'a>), + TriG(TriGPrefixesIter<'a>), + N3(N3PrefixesIter<'a>), + None, +} + +impl<'a> Iterator for PrefixesIter<'a> { + type Item = (&'a str, &'a str); + + #[inline] + fn next(&mut self) -> Option<Self::Item> { + match &mut self.inner { + PrefixesIterKind::Turtle(iter) => iter.next(), + PrefixesIterKind::TriG(iter) => iter.next(), + PrefixesIterKind::N3(iter) => iter.next(), + PrefixesIterKind::None => None, + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + match &self.inner { + PrefixesIterKind::Turtle(iter) => iter.size_hint(), + PrefixesIterKind::TriG(iter) => iter.size_hint(), + PrefixesIterKind::N3(iter) => iter.size_hint(), + PrefixesIterKind::None => (0, Some(0)), + } + } +} + +struct QuadMapper { + default_graph: GraphName, + without_named_graphs: bool, + blank_node_map: Option<HashMap<BlankNode, BlankNode>>, +} + +impl QuadMapper { + fn map_blank_node(&mut self, node: BlankNode) -> BlankNode { + if let Some(blank_node_map) = &mut self.blank_node_map { + blank_node_map + .entry(node) + .or_insert_with(BlankNode::default) + .clone() + } else { + node + } + } + + fn map_subject(&mut self, node: Subject) -> Subject { + match node { + Subject::NamedNode(node) => node.into(), + Subject::BlankNode(node) => self.map_blank_node(node).into(), + #[cfg(feature = "rdf-star")] + Subject::Triple(triple) => self.map_triple(*triple).into(), + } + } + + fn map_term(&mut self, node: Term) -> Term { + match node { + Term::NamedNode(node) => node.into(), + Term::BlankNode(node) => self.map_blank_node(node).into(), + Term::Literal(literal) => literal.into(), + #[cfg(feature = "rdf-star")] + Term::Triple(triple) => self.map_triple(*triple).into(), + } + } + + fn map_triple(&mut self, triple: Triple) -> Triple { + Triple { + subject: self.map_subject(triple.subject), + predicate: triple.predicate, + object: self.map_term(triple.object), + } + } + + fn map_graph_name(&mut self, graph_name: GraphName) -> Result<GraphName, RdfParseError> { + match graph_name { + GraphName::NamedNode(node) => { + if self.without_named_graphs { + Err(RdfParseError::msg("Named graphs are not allowed")) + } else { + Ok(node.into()) + } + } + GraphName::BlankNode(node) => { + if self.without_named_graphs { + Err(RdfParseError::msg("Named graphs are not allowed")) + } else { + Ok(self.map_blank_node(node).into()) + } + } + GraphName::DefaultGraph => Ok(self.default_graph.clone()), + } + } + + fn map_quad(&mut self, quad: Quad) -> Result<Quad, RdfParseError> { + Ok(Quad { + subject: self.map_subject(quad.subject), + predicate: quad.predicate, + object: self.map_term(quad.object), + graph_name: self.map_graph_name(quad.graph_name)?, + }) + } + + fn map_triple_to_quad(&mut self, triple: Triple) -> Quad { + self.map_triple(triple).in_graph(self.default_graph.clone()) + } + + fn map_n3_quad(&mut self, quad: N3Quad) -> Result<Quad, RdfParseError> { + Ok(Quad { + subject: match quad.subject { + N3Term::NamedNode(s) => Ok(s.into()), + N3Term::BlankNode(s) => Ok(self.map_blank_node(s).into()), + N3Term::Literal(_) => Err(RdfParseError::msg( + "literals are not allowed in regular RDF subjects", + )), + #[cfg(feature = "rdf-star")] + N3Term::Triple(s) => Ok(self.map_triple(*s).into()), + N3Term::Variable(_) => Err(RdfParseError::msg( + "variables are not allowed in regular RDF subjects", + )), + }?, + predicate: match quad.predicate { + N3Term::NamedNode(p) => Ok(p), + N3Term::BlankNode(_) => Err(RdfParseError::msg( + "blank nodes are not allowed in regular RDF predicates", + )), + N3Term::Literal(_) => Err(RdfParseError::msg( + "literals are not allowed in regular RDF predicates", + )), + #[cfg(feature = "rdf-star")] + N3Term::Triple(_) => Err(RdfParseError::msg( + "quoted triples are not allowed in regular RDF predicates", + )), + N3Term::Variable(_) => Err(RdfParseError::msg( + "variables are not allowed in regular RDF predicates", + )), + }?, + object: match quad.object { + N3Term::NamedNode(o) => Ok(o.into()), + N3Term::BlankNode(o) => Ok(self.map_blank_node(o).into()), + N3Term::Literal(o) => Ok(o.into()), + #[cfg(feature = "rdf-star")] + N3Term::Triple(o) => Ok(self.map_triple(*o).into()), + N3Term::Variable(_) => Err(RdfParseError::msg( + "variables are not allowed in regular RDF objects", + )), + }?, + graph_name: self.map_graph_name(quad.graph_name)?, + }) + } +} diff --git a/ng-oxigraph/src/oxrdfio/serializer.rs b/ng-oxigraph/src/oxrdfio/serializer.rs new file mode 100644 index 0000000..6e3d439 --- /dev/null +++ b/ng-oxigraph/src/oxrdfio/serializer.rs @@ -0,0 +1,412 @@ +//! Utilities to write RDF graphs and datasets. + +use crate::oxrdf::{GraphNameRef, IriParseError, QuadRef, TripleRef}; +use crate::oxrdfio::format::RdfFormat; + +#[cfg(feature = "async-tokio")] +use crate::oxrdfxml::ToTokioAsyncWriteRdfXmlWriter; +use crate::oxrdfxml::{RdfXmlSerializer, ToWriteRdfXmlWriter}; + +#[cfg(feature = "async-tokio")] +use crate::oxttl::nquads::ToTokioAsyncWriteNQuadsWriter; +use crate::oxttl::nquads::{NQuadsSerializer, ToWriteNQuadsWriter}; +#[cfg(feature = "async-tokio")] +use crate::oxttl::ntriples::ToTokioAsyncWriteNTriplesWriter; +use crate::oxttl::ntriples::{NTriplesSerializer, ToWriteNTriplesWriter}; +#[cfg(feature = "async-tokio")] +use crate::oxttl::trig::ToTokioAsyncWriteTriGWriter; +use crate::oxttl::trig::{ToWriteTriGWriter, TriGSerializer}; +#[cfg(feature = "async-tokio")] +use crate::oxttl::turtle::ToTokioAsyncWriteTurtleWriter; +use crate::oxttl::turtle::{ToWriteTurtleWriter, TurtleSerializer}; +use std::io::{self, Write}; +#[cfg(feature = "async-tokio")] +use tokio::io::AsyncWrite; + +/// A serializer for RDF serialization formats. +/// +/// It currently supports the following formats: +/// * [N3](https://w3c.github.io/N3/spec/) ([`RdfFormat::N3`]) +/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`RdfFormat::NQuads`]) +/// * [canonical](https://www.w3.org/TR/n-triples/#canonical-ntriples) [N-Triples](https://www.w3.org/TR/n-triples/) ([`RdfFormat::NTriples`]) +/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`RdfFormat::RdfXml`]) +/// * [TriG](https://www.w3.org/TR/trig/) ([`RdfFormat::TriG`]) +/// * [Turtle](https://www.w3.org/TR/turtle/) ([`RdfFormat::Turtle`]) +/// +/// ``` +/// use oxrdfio::{RdfFormat, RdfSerializer}; +/// use oxrdf::{Quad, NamedNode}; +/// +/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(Vec::new()); +/// writer.write_quad(&Quad { +/// subject: NamedNode::new("http://example.com/s")?.into(), +/// predicate: NamedNode::new("http://example.com/p")?, +/// object: NamedNode::new("http://example.com/o")?.into(), +/// graph_name: NamedNode::new("http://example.com/g")?.into() +/// })?; +/// assert_eq!(writer.finish()?, b"<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n"); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +#[must_use] +pub struct RdfSerializer { + inner: RdfSerializerKind, +} + +enum RdfSerializerKind { + NQuads(NQuadsSerializer), + NTriples(NTriplesSerializer), + RdfXml(RdfXmlSerializer), + TriG(TriGSerializer), + Turtle(TurtleSerializer), +} + +impl RdfSerializer { + /// Builds a serializer for the given format + #[inline] + pub fn from_format(format: RdfFormat) -> Self { + Self { + inner: match format { + RdfFormat::NQuads => RdfSerializerKind::NQuads(NQuadsSerializer::new()), + RdfFormat::NTriples => RdfSerializerKind::NTriples(NTriplesSerializer::new()), + RdfFormat::RdfXml => RdfSerializerKind::RdfXml(RdfXmlSerializer::new()), + RdfFormat::TriG => RdfSerializerKind::TriG(TriGSerializer::new()), + RdfFormat::Turtle | RdfFormat::N3 => { + RdfSerializerKind::Turtle(TurtleSerializer::new()) + } + }, + } + } + + /// The format the serializer serializes to. + /// + /// ``` + /// use oxrdfio::{RdfFormat, RdfSerializer}; + /// + /// assert_eq!( + /// RdfSerializer::from_format(RdfFormat::Turtle).format(), + /// RdfFormat::Turtle + /// ); + /// ``` + pub fn format(&self) -> RdfFormat { + match &self.inner { + RdfSerializerKind::NQuads(_) => RdfFormat::NQuads, + RdfSerializerKind::NTriples(_) => RdfFormat::NTriples, + RdfSerializerKind::RdfXml(_) => RdfFormat::RdfXml, + RdfSerializerKind::TriG(_) => RdfFormat::TriG, + RdfSerializerKind::Turtle(_) => RdfFormat::Turtle, + } + } + + /// If the format supports it, sets a prefix. + /// + /// ``` + /// use oxrdf::vocab::rdf; + /// use oxrdf::{NamedNodeRef, TripleRef}; + /// use oxrdfio::{RdfFormat, RdfSerializer}; + /// + /// let mut writer = RdfSerializer::from_format(RdfFormat::Turtle) + /// .with_prefix("schema", "http://schema.org/")? + /// .serialize_to_write(Vec::new()); + /// writer.write_triple(TripleRef { + /// subject: NamedNodeRef::new("http://example.com/s")?.into(), + /// predicate: rdf::TYPE.into(), + /// object: NamedNodeRef::new("http://schema.org/Person")?.into(), + /// })?; + /// assert_eq!( + /// writer.finish()?, + /// b"@prefix schema: <http://schema.org/> .\n<http://example.com/s> a schema:Person .\n" + /// ); + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + #[inline] + pub fn with_prefix( + mut self, + prefix_name: impl Into<String>, + prefix_iri: impl Into<String>, + ) -> Result<Self, IriParseError> { + self.inner = match self.inner { + RdfSerializerKind::NQuads(s) => RdfSerializerKind::NQuads(s), + RdfSerializerKind::NTriples(s) => RdfSerializerKind::NTriples(s), + RdfSerializerKind::RdfXml(s) => { + RdfSerializerKind::RdfXml(s.with_prefix(prefix_name, prefix_iri)?) + } + RdfSerializerKind::TriG(s) => { + RdfSerializerKind::TriG(s.with_prefix(prefix_name, prefix_iri)?) + } + RdfSerializerKind::Turtle(s) => { + RdfSerializerKind::Turtle(s.with_prefix(prefix_name, prefix_iri)?) + } + }; + Ok(self) + } + + /// Writes to a [`Write`] implementation. + /// + /// <div class="warning"> + /// + /// Do not forget to run the [`finish`](ToWriteQuadWriter::finish()) method to properly write the last bytes of the file.</div> + /// + /// <div class="warning"> + /// + /// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div> + /// + /// ``` + /// use oxrdfio::{RdfFormat, RdfSerializer}; + /// use oxrdf::{Quad, NamedNode}; + /// + /// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(Vec::new()); + /// writer.write_quad(&Quad { + /// subject: NamedNode::new("http://example.com/s")?.into(), + /// predicate: NamedNode::new("http://example.com/p")?, + /// object: NamedNode::new("http://example.com/o")?.into(), + /// graph_name: NamedNode::new("http://example.com/g")?.into() + /// })?; + /// assert_eq!(writer.finish()?, b"<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n"); + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + pub fn serialize_to_write<W: Write>(self, write: W) -> ToWriteQuadWriter<W> { + ToWriteQuadWriter { + formatter: match self.inner { + RdfSerializerKind::NQuads(s) => { + ToWriteQuadWriterKind::NQuads(s.serialize_to_write(write)) + } + RdfSerializerKind::NTriples(s) => { + ToWriteQuadWriterKind::NTriples(s.serialize_to_write(write)) + } + RdfSerializerKind::RdfXml(s) => { + ToWriteQuadWriterKind::RdfXml(s.serialize_to_write(write)) + } + RdfSerializerKind::TriG(s) => { + ToWriteQuadWriterKind::TriG(s.serialize_to_write(write)) + } + RdfSerializerKind::Turtle(s) => { + ToWriteQuadWriterKind::Turtle(s.serialize_to_write(write)) + } + }, + } + } + + /// Writes to a Tokio [`AsyncWrite`] implementation. + /// + /// <div class="warning"> + /// + /// Do not forget to run the [`finish`](ToTokioAsyncWriteQuadWriter::finish()) method to properly write the last bytes of the file.</div> + /// + /// <div class="warning"> + /// + /// This writer does unbuffered writes. You might want to use [`BufWriter`](tokio::io::BufWriter) to avoid that.</div> + /// + /// ``` + /// use oxrdfio::{RdfFormat, RdfSerializer}; + /// use oxrdf::{Quad, NamedNode}; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> std::io::Result<()> { + /// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_tokio_async_write(Vec::new()); + /// writer.write_quad(&Quad { + /// subject: NamedNode::new_unchecked("http://example.com/s").into(), + /// predicate: NamedNode::new_unchecked("http://example.com/p"), + /// object: NamedNode::new_unchecked("http://example.com/o").into(), + /// graph_name: NamedNode::new_unchecked("http://example.com/g").into() + /// }).await?; + /// assert_eq!(writer.finish().await?, "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n"); + /// # Ok(()) + /// # } + /// ``` + #[cfg(feature = "async-tokio")] + pub fn serialize_to_tokio_async_write<W: AsyncWrite + Unpin>( + self, + write: W, + ) -> ToTokioAsyncWriteQuadWriter<W> { + ToTokioAsyncWriteQuadWriter { + formatter: match self.inner { + RdfSerializerKind::NQuads(s) => { + ToTokioAsyncWriteQuadWriterKind::NQuads(s.serialize_to_tokio_async_write(write)) + } + RdfSerializerKind::NTriples(s) => ToTokioAsyncWriteQuadWriterKind::NTriples( + s.serialize_to_tokio_async_write(write), + ), + RdfSerializerKind::RdfXml(s) => { + ToTokioAsyncWriteQuadWriterKind::RdfXml(s.serialize_to_tokio_async_write(write)) + } + RdfSerializerKind::TriG(s) => { + ToTokioAsyncWriteQuadWriterKind::TriG(s.serialize_to_tokio_async_write(write)) + } + RdfSerializerKind::Turtle(s) => { + ToTokioAsyncWriteQuadWriterKind::Turtle(s.serialize_to_tokio_async_write(write)) + } + }, + } + } +} + +impl From<RdfFormat> for RdfSerializer { + fn from(format: RdfFormat) -> Self { + Self::from_format(format) + } +} + +/// Writes quads or triples to a [`Write`] implementation. +/// +/// Can be built using [`RdfSerializer::serialize_to_write`]. +/// +/// <div class="warning"> +/// +/// Do not forget to run the [`finish`](ToWriteQuadWriter::finish()) method to properly write the last bytes of the file.</div> +/// +/// <div class="warning"> +/// +/// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div> +/// +/// ``` +/// use oxrdfio::{RdfFormat, RdfSerializer}; +/// use oxrdf::{Quad, NamedNode}; +/// +/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(Vec::new()); +/// writer.write_quad(&Quad { +/// subject: NamedNode::new("http://example.com/s")?.into(), +/// predicate: NamedNode::new("http://example.com/p")?, +/// object: NamedNode::new("http://example.com/o")?.into(), +/// graph_name: NamedNode::new("http://example.com/g")?.into(), +/// })?; +/// assert_eq!(writer.finish()?, b"<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n"); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +#[must_use] +pub struct ToWriteQuadWriter<W: Write> { + formatter: ToWriteQuadWriterKind<W>, +} + +enum ToWriteQuadWriterKind<W: Write> { + NQuads(ToWriteNQuadsWriter<W>), + NTriples(ToWriteNTriplesWriter<W>), + RdfXml(ToWriteRdfXmlWriter<W>), + TriG(ToWriteTriGWriter<W>), + Turtle(ToWriteTurtleWriter<W>), +} + +impl<W: Write> ToWriteQuadWriter<W> { + /// Writes a [`QuadRef`] + pub fn write_quad<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> io::Result<()> { + match &mut self.formatter { + ToWriteQuadWriterKind::NQuads(writer) => writer.write_quad(quad), + ToWriteQuadWriterKind::NTriples(writer) => writer.write_triple(to_triple(quad)?), + ToWriteQuadWriterKind::RdfXml(writer) => writer.write_triple(to_triple(quad)?), + ToWriteQuadWriterKind::TriG(writer) => writer.write_quad(quad), + ToWriteQuadWriterKind::Turtle(writer) => writer.write_triple(to_triple(quad)?), + } + } + + /// Writes a [`TripleRef`] + pub fn write_triple<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> io::Result<()> { + self.write_quad(triple.into().in_graph(GraphNameRef::DefaultGraph)) + } + + /// Writes the last bytes of the file + /// + /// Note that this function does not flush the writer. You need to do that if you are using a [`BufWriter`](io::BufWriter). + pub fn finish(self) -> io::Result<W> { + Ok(match self.formatter { + ToWriteQuadWriterKind::NQuads(writer) => writer.finish(), + ToWriteQuadWriterKind::NTriples(writer) => writer.finish(), + ToWriteQuadWriterKind::RdfXml(writer) => writer.finish()?, + ToWriteQuadWriterKind::TriG(writer) => writer.finish()?, + ToWriteQuadWriterKind::Turtle(writer) => writer.finish()?, + }) + } +} + +/// Writes quads or triples to a [`Write`] implementation. +/// +/// Can be built using [`RdfSerializer::serialize_to_write`]. +/// +/// <div class="warning"> +/// +/// Do not forget to run the [`finish`](ToWriteQuadWriter::finish()) method to properly write the last bytes of the file.</div> +/// +/// <div class="warning"> +/// +/// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div> +/// +/// ``` +/// use oxrdfio::{RdfFormat, RdfSerializer}; +/// use oxrdf::{Quad, NamedNode}; +/// +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() -> std::io::Result<()> { +/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_tokio_async_write(Vec::new()); +/// writer.write_quad(&Quad { +/// subject: NamedNode::new_unchecked("http://example.com/s").into(), +/// predicate: NamedNode::new_unchecked("http://example.com/p"), +/// object: NamedNode::new_unchecked("http://example.com/o").into(), +/// graph_name: NamedNode::new_unchecked("http://example.com/g").into() +/// }).await?; +/// assert_eq!(writer.finish().await?, "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n"); +/// # Ok(()) +/// # } +/// ``` +#[must_use] +#[cfg(feature = "async-tokio")] +pub struct ToTokioAsyncWriteQuadWriter<W: AsyncWrite + Unpin> { + formatter: ToTokioAsyncWriteQuadWriterKind<W>, +} + +#[cfg(feature = "async-tokio")] +enum ToTokioAsyncWriteQuadWriterKind<W: AsyncWrite + Unpin> { + NQuads(ToTokioAsyncWriteNQuadsWriter<W>), + NTriples(ToTokioAsyncWriteNTriplesWriter<W>), + RdfXml(ToTokioAsyncWriteRdfXmlWriter<W>), + TriG(ToTokioAsyncWriteTriGWriter<W>), + Turtle(ToTokioAsyncWriteTurtleWriter<W>), +} + +#[cfg(feature = "async-tokio")] +impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteQuadWriter<W> { + /// Writes a [`QuadRef`] + pub async fn write_quad<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> io::Result<()> { + match &mut self.formatter { + ToTokioAsyncWriteQuadWriterKind::NQuads(writer) => writer.write_quad(quad).await, + ToTokioAsyncWriteQuadWriterKind::NTriples(writer) => { + writer.write_triple(to_triple(quad)?).await + } + ToTokioAsyncWriteQuadWriterKind::RdfXml(writer) => { + writer.write_triple(to_triple(quad)?).await + } + ToTokioAsyncWriteQuadWriterKind::TriG(writer) => writer.write_quad(quad).await, + ToTokioAsyncWriteQuadWriterKind::Turtle(writer) => { + writer.write_triple(to_triple(quad)?).await + } + } + } + + /// Writes a [`TripleRef`] + pub async fn write_triple<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> io::Result<()> { + self.write_quad(triple.into().in_graph(GraphNameRef::DefaultGraph)) + .await + } + + /// Writes the last bytes of the file + /// + /// Note that this function does not flush the writer. You need to do that if you are using a [`BufWriter`](io::BufWriter). + pub async fn finish(self) -> io::Result<W> { + Ok(match self.formatter { + ToTokioAsyncWriteQuadWriterKind::NQuads(writer) => writer.finish(), + ToTokioAsyncWriteQuadWriterKind::NTriples(writer) => writer.finish(), + ToTokioAsyncWriteQuadWriterKind::RdfXml(writer) => writer.finish().await?, + ToTokioAsyncWriteQuadWriterKind::TriG(writer) => writer.finish().await?, + ToTokioAsyncWriteQuadWriterKind::Turtle(writer) => writer.finish().await?, + }) + } +} + +fn to_triple<'a>(quad: impl Into<QuadRef<'a>>) -> io::Result<TripleRef<'a>> { + let quad = quad.into(); + if quad.graph_name.is_default_graph() { + Ok(quad.into()) + } else { + Err(io::Error::new( + io::ErrorKind::InvalidInput, + "Only quads in the default graph can be serialized to a RDF graph format", + )) + } +} diff --git a/ng-oxigraph/src/oxrdfxml/README.md b/ng-oxigraph/src/oxrdfxml/README.md new file mode 100644 index 0000000..29ebb4c --- /dev/null +++ b/ng-oxigraph/src/oxrdfxml/README.md @@ -0,0 +1,56 @@ +OxRDF/XML +========= + +[](https://crates.io/crates/oxrdfxml) +[](https://docs.rs/oxrdfxml) +[](https://crates.io/crates/oxrdfxml) +[](https://github.com/oxigraph/oxigraph/actions) +[](https://gitter.im/oxigraph/community) + +OxRdfXml is a parser and serializer for [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/). + +The entry points of this library are the two [`RdfXmlParser`] and [`RdfXmlSerializer`] structs. + +Usage example counting the number of people in a RDF/XML file: + +```rust +use oxrdf::{NamedNodeRef, vocab::rdf}; +use oxrdfxml::RdfXmlParser; + +fn main() { + let file = br#"<?xml version="1.0"?> +<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:schema="http://schema.org/"> + <rdf:Description rdf:about="http://example.com/foo"> + <rdf:type rdf:resource="http://schema.org/Person" /> + <schema:name>Foo</schema:name> + </rdf:Description> + <schema:Person rdf:about="http://example.com/bar" schema:name="Bar" /> +</rdf:RDF>"#; + + let schema_person = NamedNodeRef::new("http://schema.org/Person").unwrap(); + let mut count = 0; + for triple in RdfXmlParser::new().parse_read(file.as_ref()) { + let triple = triple.unwrap(); + if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { + count += 1; + } + } + assert_eq!(2, count); +} +``` + +## License + +This project is licensed under either of + +* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or + `<http://www.apache.org/licenses/LICENSE-2.0>`) +* MIT license ([LICENSE-MIT](../LICENSE-MIT) or + `<http://opensource.org/licenses/MIT>`) + +at your option. + + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. diff --git a/ng-oxigraph/src/oxrdfxml/error.rs b/ng-oxigraph/src/oxrdfxml/error.rs new file mode 100644 index 0000000..9a59a76 --- /dev/null +++ b/ng-oxigraph/src/oxrdfxml/error.rs @@ -0,0 +1,89 @@ +use oxilangtag::LanguageTagParseError; +use oxiri::IriParseError; +use std::io; +use std::sync::Arc; + +/// Error returned during RDF/XML parsing. +#[derive(Debug, thiserror::Error)] +pub enum RdfXmlParseError { + /// I/O error during parsing (file not found...). + #[error(transparent)] + Io(#[from] io::Error), + /// An error in the file syntax. + #[error(transparent)] + Syntax(#[from] RdfXmlSyntaxError), +} + +impl From<RdfXmlParseError> for io::Error { + #[inline] + fn from(error: RdfXmlParseError) -> Self { + match error { + RdfXmlParseError::Io(error) => error, + RdfXmlParseError::Syntax(error) => error.into(), + } + } +} + +impl From<quick_xml::Error> for RdfXmlParseError { + #[inline] + fn from(error: quick_xml::Error) -> Self { + match error { + quick_xml::Error::Io(error) => { + Self::Io(Arc::try_unwrap(error).unwrap_or_else(|e| io::Error::new(e.kind(), e))) + } + _ => Self::Syntax(RdfXmlSyntaxError(SyntaxErrorKind::Xml(error))), + } + } +} + +/// An error in the syntax of the parsed file. +#[derive(Debug, thiserror::Error)] +#[error(transparent)] +pub struct RdfXmlSyntaxError(#[from] pub(crate) SyntaxErrorKind); + +#[derive(Debug, thiserror::Error)] +pub enum SyntaxErrorKind { + #[error(transparent)] + Xml(#[from] quick_xml::Error), + #[error("error while parsing IRI '{iri}': {error}")] + InvalidIri { + iri: String, + #[source] + error: IriParseError, + }, + #[error("error while parsing language tag '{tag}': {error}")] + InvalidLanguageTag { + tag: String, + #[source] + error: LanguageTagParseError, + }, + #[error("{0}")] + Msg(String), +} + +impl RdfXmlSyntaxError { + /// Builds an error from a printable error message. + #[inline] + pub(crate) fn msg(msg: impl Into<String>) -> Self { + Self(SyntaxErrorKind::Msg(msg.into())) + } +} + +impl From<RdfXmlSyntaxError> for io::Error { + #[inline] + fn from(error: RdfXmlSyntaxError) -> Self { + match error.0 { + SyntaxErrorKind::Xml(error) => match error { + quick_xml::Error::Io(error) => { + Arc::try_unwrap(error).unwrap_or_else(|e| Self::new(e.kind(), e)) + } + quick_xml::Error::UnexpectedEof(error) => { + Self::new(io::ErrorKind::UnexpectedEof, error) + } + _ => Self::new(io::ErrorKind::InvalidData, error), + }, + SyntaxErrorKind::Msg(msg) => Self::new(io::ErrorKind::InvalidData, msg), + _ => Self::new(io::ErrorKind::InvalidData, error), + } + } +} diff --git a/ng-oxigraph/src/oxrdfxml/mod.rs b/ng-oxigraph/src/oxrdfxml/mod.rs new file mode 100644 index 0000000..bbd0f21 --- /dev/null +++ b/ng-oxigraph/src/oxrdfxml/mod.rs @@ -0,0 +1,8 @@ +mod error; +mod parser; +mod serializer; +mod utils; + +pub use error::{RdfXmlParseError, RdfXmlSyntaxError}; +pub use parser::{FromReadRdfXmlReader, RdfXmlParser}; +pub use serializer::{RdfXmlSerializer, ToWriteRdfXmlWriter}; diff --git a/ng-oxigraph/src/oxrdfxml/parser.rs b/ng-oxigraph/src/oxrdfxml/parser.rs new file mode 100644 index 0000000..6bdf76d --- /dev/null +++ b/ng-oxigraph/src/oxrdfxml/parser.rs @@ -0,0 +1,1237 @@ +use crate::oxrdf::vocab::rdf; +use crate::oxrdf::{BlankNode, Literal, NamedNode, Subject, Term, Triple}; +use crate::oxrdfxml::error::{RdfXmlParseError, RdfXmlSyntaxError, SyntaxErrorKind}; +use crate::oxrdfxml::utils::*; +use oxilangtag::LanguageTag; +use oxiri::{Iri, IriParseError}; +use quick_xml::escape::unescape_with; +use quick_xml::events::attributes::Attribute; +use quick_xml::events::*; +use quick_xml::name::{LocalName, QName, ResolveResult}; +use quick_xml::{Error, NsReader, Writer}; +use std::collections::{HashMap, HashSet}; +use std::io::{BufReader, Read}; +use std::str; +#[cfg(feature = "async-tokio")] +use tokio::io::{AsyncRead, BufReader as AsyncBufReader}; + +/// A [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) streaming parser. +/// +/// It reads the file in streaming. +/// It does not keep data in memory except a stack for handling nested XML tags, and a set of all +/// seen `rdf:ID`s to detect duplicate ids and fail according to the specification. +/// +/// Its performances are not optimized yet and hopefully could be significantly enhanced by reducing the +/// number of allocations and copies done by the parser. +/// +/// Count the number of people: +/// ``` +/// use oxrdf::vocab::rdf; +/// use oxrdf::NamedNodeRef; +/// use oxrdfxml::RdfXmlParser; +/// +/// let file = br#"<?xml version="1.0"?> +/// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:schema="http://schema.org/"> +/// <rdf:Description rdf:about="http://example.com/foo"> +/// <rdf:type rdf:resource="http://schema.org/Person" /> +/// <schema:name>Foo</schema:name> +/// </rdf:Description> +/// <schema:Person rdf:about="http://example.com/bar" schema:name="Bar" /> +/// </rdf:RDF>"#; +/// +/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; +/// let mut count = 0; +/// for triple in RdfXmlParser::new().parse_read(file.as_ref()) { +/// let triple = triple?; +/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { +/// count += 1; +/// } +/// } +/// assert_eq!(2, count); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +#[derive(Default)] +#[must_use] +pub struct RdfXmlParser { + unchecked: bool, + base: Option<Iri<String>>, +} + +impl RdfXmlParser { + /// Builds a new [`RdfXmlParser`]. + #[inline] + pub fn new() -> Self { + Self::default() + } + + /// Assumes the file is valid to make parsing faster. + /// + /// It will skip some validations. + /// + /// Note that if the file is actually not valid, then broken RDF might be emitted by the parser. + #[inline] + pub fn unchecked(mut self) -> Self { + self.unchecked = true; + self + } + + #[inline] + pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> { + self.base = Some(Iri::parse(base_iri.into())?); + Ok(self) + } + + /// Parses a RDF/XML file from a [`Read`] implementation. + /// + /// Count the number of people: + /// ``` + /// use oxrdf::vocab::rdf; + /// use oxrdf::NamedNodeRef; + /// use oxrdfxml::RdfXmlParser; + /// + /// let file = br#"<?xml version="1.0"?> + /// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:schema="http://schema.org/"> + /// <rdf:Description rdf:about="http://example.com/foo"> + /// <rdf:type rdf:resource="http://schema.org/Person" /> + /// <schema:name>Foo</schema:name> + /// </rdf:Description> + /// <schema:Person rdf:about="http://example.com/bar" schema:name="Bar" /> + /// </rdf:RDF>"#; + /// + /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; + /// let mut count = 0; + /// for triple in RdfXmlParser::new().parse_read(file.as_ref()) { + /// let triple = triple?; + /// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { + /// count += 1; + /// } + /// } + /// assert_eq!(2, count); + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + pub fn parse_read<R: Read>(self, read: R) -> FromReadRdfXmlReader<R> { + FromReadRdfXmlReader { + results: Vec::new(), + reader: self.parse(BufReader::new(read)), + reader_buffer: Vec::default(), + } + } + + /// Parses a RDF/XML file from a [`AsyncRead`] implementation. + /// + /// Count the number of people: + /// ``` + /// use oxrdf::vocab::rdf; + /// use oxrdf::NamedNodeRef; + /// use oxrdfxml::RdfXmlParser; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> Result<(), oxrdfxml::RdfXmlParseError> { + /// let file = br#"<?xml version="1.0"?> + /// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:schema="http://schema.org/"> + /// <rdf:Description rdf:about="http://example.com/foo"> + /// <rdf:type rdf:resource="http://schema.org/Person" /> + /// <schema:name>Foo</schema:name> + /// </rdf:Description> + /// <schema:Person rdf:about="http://example.com/bar" schema:name="Bar" /> + /// </rdf:RDF>"#; + /// + /// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person"); + /// let mut count = 0; + /// let mut parser = RdfXmlParser::new().parse_tokio_async_read(file.as_ref()); + /// while let Some(triple) = parser.next().await { + /// let triple = triple?; + /// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { + /// count += 1; + /// } + /// } + /// assert_eq!(2, count); + /// # Ok(()) + /// # } + /// ``` + #[cfg(feature = "async-tokio")] + pub fn parse_tokio_async_read<R: AsyncRead + Unpin>( + self, + read: R, + ) -> FromTokioAsyncReadRdfXmlReader<R> { + FromTokioAsyncReadRdfXmlReader { + results: Vec::new(), + reader: self.parse(AsyncBufReader::new(read)), + reader_buffer: Vec::default(), + } + } + + fn parse<T>(&self, reader: T) -> RdfXmlReader<T> { + let mut reader = NsReader::from_reader(reader); + reader.expand_empty_elements(true); + RdfXmlReader { + reader, + state: vec![RdfXmlState::Doc { + base_iri: self.base.clone(), + }], + custom_entities: HashMap::default(), + in_literal_depth: 0, + known_rdf_id: HashSet::default(), + is_end: false, + unchecked: self.unchecked, + } + } +} + +/// Parses a RDF/XML file from a [`Read`] implementation. Can be built using [`RdfXmlParser::parse_read`]. +/// +/// Count the number of people: +/// ``` +/// use oxrdf::vocab::rdf; +/// use oxrdf::NamedNodeRef; +/// use oxrdfxml::RdfXmlParser; +/// +/// let file = br#"<?xml version="1.0"?> +/// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:schema="http://schema.org/"> +/// <rdf:Description rdf:about="http://example.com/foo"> +/// <rdf:type rdf:resource="http://schema.org/Person" /> +/// <schema:name>Foo</schema:name> +/// </rdf:Description> +/// <schema:Person rdf:about="http://example.com/bar" schema:name="Bar" /> +/// </rdf:RDF>"#; +/// +/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; +/// let mut count = 0; +/// for triple in RdfXmlParser::new().parse_read(file.as_ref()) { +/// let triple = triple?; +/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { +/// count += 1; +/// } +/// } +/// assert_eq!(2, count); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +#[must_use] +pub struct FromReadRdfXmlReader<R: Read> { + results: Vec<Triple>, + reader: RdfXmlReader<BufReader<R>>, + reader_buffer: Vec<u8>, +} + +impl<R: Read> Iterator for FromReadRdfXmlReader<R> { + type Item = Result<Triple, RdfXmlParseError>; + + fn next(&mut self) -> Option<Self::Item> { + loop { + if let Some(triple) = self.results.pop() { + return Some(Ok(triple)); + } else if self.reader.is_end { + return None; + } + if let Err(e) = self.parse_step() { + return Some(Err(e)); + } + } + } +} + +impl<R: Read> FromReadRdfXmlReader<R> { + /// The current byte position in the input data. + pub fn buffer_position(&self) -> usize { + self.reader.reader.buffer_position() + } + + fn parse_step(&mut self) -> Result<(), RdfXmlParseError> { + self.reader_buffer.clear(); + let event = self + .reader + .reader + .read_event_into(&mut self.reader_buffer)?; + self.reader.parse_event(event, &mut self.results) + } +} + +/// Parses a RDF/XML file from a [`AsyncRead`] implementation. Can be built using [`RdfXmlParser::parse_tokio_async_read`]. +/// +/// Count the number of people: +/// ``` +/// use oxrdf::vocab::rdf; +/// use oxrdf::NamedNodeRef; +/// use oxrdfxml::RdfXmlParser; +/// +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() -> Result<(), oxrdfxml::RdfXmlParseError> { +/// let file = br#"<?xml version="1.0"?> +/// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:schema="http://schema.org/"> +/// <rdf:Description rdf:about="http://example.com/foo"> +/// <rdf:type rdf:resource="http://schema.org/Person" /> +/// <schema:name>Foo</schema:name> +/// </rdf:Description> +/// <schema:Person rdf:about="http://example.com/bar" schema:name="Bar" /> +/// </rdf:RDF>"#; +/// +/// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person"); +/// let mut count = 0; +/// let mut parser = RdfXmlParser::new().parse_tokio_async_read(file.as_ref()); +/// while let Some(triple) = parser.next().await { +/// let triple = triple?; +/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { +/// count += 1; +/// } +/// } +/// assert_eq!(2, count); +/// # Ok(()) +/// # } +/// ``` +#[cfg(feature = "async-tokio")] +#[must_use] +pub struct FromTokioAsyncReadRdfXmlReader<R: AsyncRead + Unpin> { + results: Vec<Triple>, + reader: RdfXmlReader<AsyncBufReader<R>>, + reader_buffer: Vec<u8>, +} + +#[cfg(feature = "async-tokio")] +impl<R: AsyncRead + Unpin> FromTokioAsyncReadRdfXmlReader<R> { + /// Reads the next triple or returns `None` if the file is finished. + pub async fn next(&mut self) -> Option<Result<Triple, RdfXmlParseError>> { + loop { + if let Some(triple) = self.results.pop() { + return Some(Ok(triple)); + } else if self.reader.is_end { + return None; + } + if let Err(e) = self.parse_step().await { + return Some(Err(e)); + } + } + } + + /// The current byte position in the input data. + pub fn buffer_position(&self) -> usize { + self.reader.reader.buffer_position() + } + + async fn parse_step(&mut self) -> Result<(), RdfXmlParseError> { + self.reader_buffer.clear(); + let event = self + .reader + .reader + .read_event_into_async(&mut self.reader_buffer) + .await?; + self.reader.parse_event(event, &mut self.results) + } +} + +const RDF_ABOUT: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#about"; +const RDF_ABOUT_EACH: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEach"; +const RDF_ABOUT_EACH_PREFIX: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEachPrefix"; +const RDF_BAG_ID: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#bagID"; +const RDF_DATATYPE: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#datatype"; +const RDF_DESCRIPTION: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#Description"; +const RDF_ID: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#ID"; +const RDF_LI: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#li"; +const RDF_NODE_ID: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#nodeID"; +const RDF_PARSE_TYPE: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#parseType"; +const RDF_RDF: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#RDF"; +const RDF_RESOURCE: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#resource"; + +const RESERVED_RDF_ELEMENTS: [&str; 11] = [ + RDF_ABOUT, + RDF_ABOUT_EACH, + RDF_ABOUT_EACH_PREFIX, + RDF_BAG_ID, + RDF_DATATYPE, + RDF_ID, + RDF_LI, + RDF_NODE_ID, + RDF_PARSE_TYPE, + RDF_RDF, + RDF_RESOURCE, +]; +const RESERVED_RDF_ATTRIBUTES: [&str; 5] = [ + RDF_ABOUT_EACH, + RDF_ABOUT_EACH_PREFIX, + RDF_LI, + RDF_RDF, + RDF_RESOURCE, +]; + +#[derive(Clone, Debug)] +enum NodeOrText { + Node(Subject), + Text(String), +} + +enum RdfXmlState { + Doc { + base_iri: Option<Iri<String>>, + }, + Rdf { + base_iri: Option<Iri<String>>, + language: Option<String>, + }, + NodeElt { + base_iri: Option<Iri<String>>, + language: Option<String>, + subject: Subject, + li_counter: u64, + }, + PropertyElt { + // Resource, Literal or Empty property element + iri: NamedNode, + base_iri: Option<Iri<String>>, + language: Option<String>, + subject: Subject, + object: Option<NodeOrText>, + id_attr: Option<NamedNode>, + datatype_attr: Option<NamedNode>, + }, + ParseTypeCollectionPropertyElt { + iri: NamedNode, + base_iri: Option<Iri<String>>, + language: Option<String>, + subject: Subject, + objects: Vec<Subject>, + id_attr: Option<NamedNode>, + }, + ParseTypeLiteralPropertyElt { + iri: NamedNode, + base_iri: Option<Iri<String>>, + language: Option<String>, + subject: Subject, + writer: Writer<Vec<u8>>, + id_attr: Option<NamedNode>, + emit: bool, // false for parseTypeOtherPropertyElt support + }, +} + +impl RdfXmlState { + fn base_iri(&self) -> Option<&Iri<String>> { + match self { + Self::Doc { base_iri, .. } + | Self::Rdf { base_iri, .. } + | Self::NodeElt { base_iri, .. } + | Self::PropertyElt { base_iri, .. } + | Self::ParseTypeCollectionPropertyElt { base_iri, .. } + | Self::ParseTypeLiteralPropertyElt { base_iri, .. } => base_iri.as_ref(), + } + } + + fn language(&self) -> Option<&String> { + match self { + Self::Doc { .. } => None, + Self::Rdf { language, .. } + | Self::NodeElt { language, .. } + | Self::PropertyElt { language, .. } + | Self::ParseTypeCollectionPropertyElt { language, .. } + | Self::ParseTypeLiteralPropertyElt { language, .. } => language.as_ref(), + } + } +} + +struct RdfXmlReader<R> { + reader: NsReader<R>, + state: Vec<RdfXmlState>, + custom_entities: HashMap<String, String>, + in_literal_depth: usize, + known_rdf_id: HashSet<String>, + is_end: bool, + unchecked: bool, +} + +impl<R> RdfXmlReader<R> { + fn parse_event( + &mut self, + event: Event<'_>, + results: &mut Vec<Triple>, + ) -> Result<(), RdfXmlParseError> { + match event { + Event::Start(event) => self.parse_start_event(&event, results), + Event::End(event) => self.parse_end_event(&event, results), + Event::Empty(_) => Err(RdfXmlSyntaxError::msg( + "The expand_empty_elements option must be enabled", + ) + .into()), + Event::Text(event) => self.parse_text_event(&event), + Event::CData(event) => self.parse_text_event(&event.escape()?), + Event::Comment(_) | Event::PI(_) => Ok(()), + Event::Decl(decl) => { + if let Some(encoding) = decl.encoding() { + if !is_utf8(&encoding?) { + return Err(RdfXmlSyntaxError::msg( + "Only UTF-8 is supported by the RDF/XML parser", + ) + .into()); + } + } + Ok(()) + } + Event::DocType(dt) => self.parse_doctype(&dt), + Event::Eof => { + self.is_end = true; + Ok(()) + } + } + } + + fn parse_doctype(&mut self, dt: &BytesText<'_>) -> Result<(), RdfXmlParseError> { + // we extract entities + for input in self + .reader + .decoder() + .decode(dt.as_ref())? + .split('<') + .skip(1) + { + if let Some(input) = input.strip_prefix("!ENTITY") { + let input = input.trim_start().strip_prefix('%').unwrap_or(input); + let (entity_name, input) = input.trim_start().split_once(|c: char| c.is_ascii_whitespace()).ok_or_else(|| { + RdfXmlSyntaxError::msg( + "<!ENTITY declarations should contain both an entity name and an entity value", + ) + })?; + let input = input.trim_start().strip_prefix('\"').ok_or_else(|| { + RdfXmlSyntaxError::msg("<!ENTITY values should be enclosed in double quotes") + })?; + let (entity_value, input) = input.split_once('"').ok_or_else(|| { + RdfXmlSyntaxError::msg( + "<!ENTITY declarations values should be enclosed in double quotes", + ) + })?; + input.trim_start().strip_prefix('>').ok_or_else(|| { + RdfXmlSyntaxError::msg("<!ENTITY declarations values should end with >") + })?; + + // Resolves custom entities within the current entity definition. + let entity_value = + unescape_with(entity_value, |e| self.resolve_entity(e)).map_err(Error::from)?; + self.custom_entities + .insert(entity_name.to_owned(), entity_value.to_string()); + } + } + Ok(()) + } + + fn parse_start_event( + &mut self, + event: &BytesStart<'_>, + results: &mut Vec<Triple>, + ) -> Result<(), RdfXmlParseError> { + #[derive(PartialEq, Eq)] + enum RdfXmlParseType { + Default, + Collection, + Literal, + Resource, + Other, + } + + #[derive(PartialEq, Eq)] + enum RdfXmlNextProduction { + Rdf, + NodeElt, + PropertyElt { subject: Subject }, + } + + // Literal case + if let Some(RdfXmlState::ParseTypeLiteralPropertyElt { writer, .. }) = self.state.last_mut() + { + let mut clean_event = BytesStart::new( + self.reader + .decoder() + .decode(event.name().as_ref())? + .to_string(), + ); + for attr in event.attributes() { + clean_event.push_attribute(attr.map_err(Error::InvalidAttr)?); + } + writer.write_event(Event::Start(clean_event))?; + self.in_literal_depth += 1; + return Ok(()); + } + + let tag_name = self.resolve_tag_name(event.name())?; + + // We read attributes + let (mut language, mut base_iri) = if let Some(current_state) = self.state.last() { + ( + current_state.language().cloned(), + current_state.base_iri().cloned(), + ) + } else { + (None, None) + }; + + let mut id_attr = None; + let mut node_id_attr = None; + let mut about_attr = None; + let mut property_attrs = Vec::default(); + let mut resource_attr = None; + let mut datatype_attr = None; + let mut parse_type = RdfXmlParseType::Default; + let mut type_attr = None; + + for attribute in event.attributes() { + let attribute = attribute.map_err(Error::InvalidAttr)?; + if attribute.key.as_ref().starts_with(b"xml") { + if attribute.key.as_ref() == b"xml:lang" { + let tag = self.convert_attribute(&attribute)?.to_ascii_lowercase(); + language = Some(if self.unchecked { + tag + } else { + LanguageTag::parse(tag.to_ascii_lowercase()) + .map_err(|error| { + RdfXmlSyntaxError(SyntaxErrorKind::InvalidLanguageTag { + tag, + error, + }) + })? + .into_inner() + }); + } else if attribute.key.as_ref() == b"xml:base" { + let iri = self.convert_attribute(&attribute)?; + base_iri = Some(if self.unchecked { + Iri::parse_unchecked(iri.clone()) + } else { + Iri::parse(iri.clone()).map_err(|error| { + RdfXmlSyntaxError(SyntaxErrorKind::InvalidIri { iri, error }) + })? + }) + } else { + // We ignore other xml attributes + } + } else { + let attribute_url = self.resolve_attribute_name(attribute.key)?; + if *attribute_url == *RDF_ID { + let mut id = self.convert_attribute(&attribute)?; + if !is_nc_name(&id) { + return Err(RdfXmlSyntaxError::msg(format!( + "{id} is not a valid rdf:ID value" + )) + .into()); + } + id.insert(0, '#'); + id_attr = Some(id); + } else if *attribute_url == *RDF_BAG_ID { + let bag_id = self.convert_attribute(&attribute)?; + if !is_nc_name(&bag_id) { + return Err(RdfXmlSyntaxError::msg(format!( + "{bag_id} is not a valid rdf:bagID value" + )) + .into()); + } + } else if *attribute_url == *RDF_NODE_ID { + let id = self.convert_attribute(&attribute)?; + if !is_nc_name(&id) { + return Err(RdfXmlSyntaxError::msg(format!( + "{id} is not a valid rdf:nodeID value" + )) + .into()); + } + node_id_attr = Some(BlankNode::new_unchecked(id)); + } else if *attribute_url == *RDF_ABOUT { + about_attr = Some(attribute); + } else if *attribute_url == *RDF_RESOURCE { + resource_attr = Some(attribute); + } else if *attribute_url == *RDF_DATATYPE { + datatype_attr = Some(attribute); + } else if *attribute_url == *RDF_PARSE_TYPE { + parse_type = match attribute.value.as_ref() { + b"Collection" => RdfXmlParseType::Collection, + b"Literal" => RdfXmlParseType::Literal, + b"Resource" => RdfXmlParseType::Resource, + _ => RdfXmlParseType::Other, + }; + } else if attribute_url == rdf::TYPE.as_str() { + type_attr = Some(attribute); + } else if RESERVED_RDF_ATTRIBUTES.contains(&&*attribute_url) { + return Err(RdfXmlSyntaxError::msg(format!( + "{attribute_url} is not a valid attribute" + )) + .into()); + } else { + property_attrs.push(( + self.parse_iri(attribute_url)?, + self.convert_attribute(&attribute)?, + )); + } + } + } + + // Parsing with the base URI + let id_attr = match id_attr { + Some(iri) => { + let iri = self.resolve_iri(&base_iri, iri)?; + if self.known_rdf_id.contains(iri.as_str()) { + return Err(RdfXmlSyntaxError::msg(format!( + "{iri} has already been used as rdf:ID value" + )) + .into()); + } + self.known_rdf_id.insert(iri.as_str().into()); + Some(iri) + } + None => None, + }; + let about_attr = match about_attr { + Some(attr) => Some(self.convert_iri_attribute(&base_iri, &attr)?), + None => None, + }; + let resource_attr = match resource_attr { + Some(attr) => Some(self.convert_iri_attribute(&base_iri, &attr)?), + None => None, + }; + let datatype_attr = match datatype_attr { + Some(attr) => Some(self.convert_iri_attribute(&base_iri, &attr)?), + None => None, + }; + let type_attr = match type_attr { + Some(attr) => Some(self.convert_iri_attribute(&base_iri, &attr)?), + None => None, + }; + + let expected_production = match self.state.last() { + Some(RdfXmlState::Doc { .. }) => RdfXmlNextProduction::Rdf, + Some( + RdfXmlState::Rdf { .. } + | RdfXmlState::PropertyElt { .. } + | RdfXmlState::ParseTypeCollectionPropertyElt { .. }, + ) => RdfXmlNextProduction::NodeElt, + Some(RdfXmlState::NodeElt { subject, .. }) => RdfXmlNextProduction::PropertyElt { + subject: subject.clone(), + }, + Some(RdfXmlState::ParseTypeLiteralPropertyElt { .. }) => { + return Err( + RdfXmlSyntaxError::msg("ParseTypeLiteralPropertyElt production children should never be considered as a RDF/XML content").into() + ); + } + None => { + return Err(RdfXmlSyntaxError::msg( + "No state in the stack: the XML is not balanced", + ) + .into()); + } + }; + + let new_state = match expected_production { + RdfXmlNextProduction::Rdf => { + if *tag_name == *RDF_RDF { + RdfXmlState::Rdf { base_iri, language } + } else if RESERVED_RDF_ELEMENTS.contains(&&*tag_name) { + return Err(RdfXmlSyntaxError::msg(format!( + "Invalid node element tag name: {tag_name}" + )) + .into()); + } else { + Self::build_node_elt( + self.parse_iri(tag_name)?, + base_iri, + language, + id_attr, + node_id_attr, + about_attr, + type_attr, + property_attrs, + results, + )? + } + } + RdfXmlNextProduction::NodeElt => { + if RESERVED_RDF_ELEMENTS.contains(&&*tag_name) { + return Err(RdfXmlSyntaxError::msg(format!( + "Invalid property element tag name: {tag_name}" + )) + .into()); + } + Self::build_node_elt( + self.parse_iri(tag_name)?, + base_iri, + language, + id_attr, + node_id_attr, + about_attr, + type_attr, + property_attrs, + results, + )? + } + RdfXmlNextProduction::PropertyElt { subject } => { + let iri = if *tag_name == *RDF_LI { + let Some(RdfXmlState::NodeElt { li_counter, .. }) = self.state.last_mut() + else { + return Err(RdfXmlSyntaxError::msg(format!( + "Invalid property element tag name: {tag_name}" + )) + .into()); + }; + *li_counter += 1; + NamedNode::new_unchecked(format!( + "http://www.w3.org/1999/02/22-rdf-syntax-ns#_{li_counter}" + )) + } else if RESERVED_RDF_ELEMENTS.contains(&&*tag_name) + || *tag_name == *RDF_DESCRIPTION + { + return Err(RdfXmlSyntaxError::msg(format!( + "Invalid property element tag name: {tag_name}" + )) + .into()); + } else { + self.parse_iri(tag_name)? + }; + match parse_type { + RdfXmlParseType::Default => { + if resource_attr.is_some() + || node_id_attr.is_some() + || !property_attrs.is_empty() + { + let object = match (resource_attr, node_id_attr) + { + (Some(resource_attr), None) => Subject::from(resource_attr), + (None, Some(node_id_attr)) => node_id_attr.into(), + (None, None) => BlankNode::default().into(), + (Some(_), Some(_)) => return Err(RdfXmlSyntaxError::msg("Not both rdf:resource and rdf:nodeID could be set at the same time").into()) + }; + Self::emit_property_attrs(&object, property_attrs, &language, results); + if let Some(type_attr) = type_attr { + results.push(Triple::new(object.clone(), rdf::TYPE, type_attr)); + } + RdfXmlState::PropertyElt { + iri, + base_iri, + language, + subject, + object: Some(NodeOrText::Node(object)), + id_attr, + datatype_attr, + } + } else { + RdfXmlState::PropertyElt { + iri, + base_iri, + language, + subject, + object: None, + id_attr, + datatype_attr, + } + } + } + RdfXmlParseType::Literal => RdfXmlState::ParseTypeLiteralPropertyElt { + iri, + base_iri, + language, + subject, + writer: Writer::new(Vec::default()), + id_attr, + emit: true, + }, + RdfXmlParseType::Resource => Self::build_parse_type_resource_property_elt( + iri, base_iri, language, subject, id_attr, results, + ), + RdfXmlParseType::Collection => RdfXmlState::ParseTypeCollectionPropertyElt { + iri, + base_iri, + language, + subject, + objects: Vec::default(), + id_attr, + }, + RdfXmlParseType::Other => RdfXmlState::ParseTypeLiteralPropertyElt { + iri, + base_iri, + language, + subject, + writer: Writer::new(Vec::default()), + id_attr, + emit: false, + }, + } + } + }; + self.state.push(new_state); + Ok(()) + } + + fn parse_end_event( + &mut self, + event: &BytesEnd<'_>, + results: &mut Vec<Triple>, + ) -> Result<(), RdfXmlParseError> { + // Literal case + if self.in_literal_depth > 0 { + if let Some(RdfXmlState::ParseTypeLiteralPropertyElt { writer, .. }) = + self.state.last_mut() + { + writer.write_event(Event::End(BytesEnd::new( + self.reader.decoder().decode(event.name().as_ref())?, + )))?; + self.in_literal_depth -= 1; + return Ok(()); + } + } + + if let Some(current_state) = self.state.pop() { + self.end_state(current_state, results)?; + } + Ok(()) + } + + fn parse_text_event(&mut self, event: &BytesText<'_>) -> Result<(), RdfXmlParseError> { + let text = event.unescape_with(|e| self.resolve_entity(e))?.to_string(); + match self.state.last_mut() { + Some(RdfXmlState::PropertyElt { object, .. }) => { + if !event.iter().copied().all(is_whitespace) { + *object = Some(NodeOrText::Text(text)); + } + Ok(()) + } + Some(RdfXmlState::ParseTypeLiteralPropertyElt { writer, .. }) => { + writer.write_event(Event::Text(BytesText::new(&text)))?; + Ok(()) + } + _ => { + if event.iter().copied().all(is_whitespace) { + Ok(()) + } else { + Err(RdfXmlSyntaxError::msg(format!("Unexpected text event: '{text}'")).into()) + } + } + } + } + + fn resolve_tag_name(&self, qname: QName<'_>) -> Result<String, RdfXmlParseError> { + let (namespace, local_name) = self.reader.resolve_element(qname); + self.resolve_ns_name(namespace, local_name) + } + + fn resolve_attribute_name(&self, qname: QName<'_>) -> Result<String, RdfXmlParseError> { + let (namespace, local_name) = self.reader.resolve_attribute(qname); + self.resolve_ns_name(namespace, local_name) + } + + fn resolve_ns_name( + &self, + namespace: ResolveResult<'_>, + local_name: LocalName<'_>, + ) -> Result<String, RdfXmlParseError> { + match namespace { + ResolveResult::Bound(ns) => { + let mut value = Vec::with_capacity(ns.as_ref().len() + local_name.as_ref().len()); + value.extend_from_slice(ns.as_ref()); + value.extend_from_slice(local_name.as_ref()); + Ok(unescape_with(&self.reader.decoder().decode(&value)?, |e| { + self.resolve_entity(e) + }) + .map_err(Error::from)? + .to_string()) + } + ResolveResult::Unbound => { + Err(RdfXmlSyntaxError::msg("XML namespaces are required in RDF/XML").into()) + } + ResolveResult::Unknown(v) => Err(RdfXmlSyntaxError::msg(format!( + "Unknown prefix {}:", + self.reader.decoder().decode(&v)? + )) + .into()), + } + } + + #[allow(clippy::too_many_arguments)] + fn build_node_elt( + iri: NamedNode, + base_iri: Option<Iri<String>>, + language: Option<String>, + id_attr: Option<NamedNode>, + node_id_attr: Option<BlankNode>, + about_attr: Option<NamedNode>, + type_attr: Option<NamedNode>, + property_attrs: Vec<(NamedNode, String)>, + results: &mut Vec<Triple>, + ) -> Result<RdfXmlState, RdfXmlSyntaxError> { + let subject = match (id_attr, node_id_attr, about_attr) { + (Some(id_attr), None, None) => Subject::from(id_attr), + (None, Some(node_id_attr), None) => node_id_attr.into(), + (None, None, Some(about_attr)) => about_attr.into(), + (None, None, None) => BlankNode::default().into(), + (Some(_), Some(_), _) => { + return Err(RdfXmlSyntaxError::msg( + "Not both rdf:ID and rdf:nodeID could be set at the same time", + )) + } + (_, Some(_), Some(_)) => { + return Err(RdfXmlSyntaxError::msg( + "Not both rdf:nodeID and rdf:resource could be set at the same time", + )) + } + (Some(_), _, Some(_)) => { + return Err(RdfXmlSyntaxError::msg( + "Not both rdf:ID and rdf:resource could be set at the same time", + )) + } + }; + + Self::emit_property_attrs(&subject, property_attrs, &language, results); + + if let Some(type_attr) = type_attr { + results.push(Triple::new(subject.clone(), rdf::TYPE, type_attr)); + } + + if iri != *RDF_DESCRIPTION { + results.push(Triple::new(subject.clone(), rdf::TYPE, iri)); + } + Ok(RdfXmlState::NodeElt { + base_iri, + language, + subject, + li_counter: 0, + }) + } + + fn build_parse_type_resource_property_elt( + iri: NamedNode, + base_iri: Option<Iri<String>>, + language: Option<String>, + subject: Subject, + id_attr: Option<NamedNode>, + results: &mut Vec<Triple>, + ) -> RdfXmlState { + let object = BlankNode::default(); + let triple = Triple::new(subject, iri, object.clone()); + if let Some(id_attr) = id_attr { + Self::reify(triple.clone(), id_attr, results); + } + results.push(triple); + RdfXmlState::NodeElt { + base_iri, + language, + subject: object.into(), + li_counter: 0, + } + } + + fn end_state( + &mut self, + state: RdfXmlState, + results: &mut Vec<Triple>, + ) -> Result<(), RdfXmlSyntaxError> { + match state { + RdfXmlState::PropertyElt { + iri, + language, + subject, + id_attr, + datatype_attr, + object, + .. + } => { + let object = match object { + Some(NodeOrText::Node(node)) => Term::from(node), + Some(NodeOrText::Text(text)) => { + Self::new_literal(text, language, datatype_attr).into() + } + None => Self::new_literal(String::new(), language, datatype_attr).into(), + }; + let triple = Triple::new(subject, iri, object); + if let Some(id_attr) = id_attr { + Self::reify(triple.clone(), id_attr, results); + } + results.push(triple); + } + RdfXmlState::ParseTypeCollectionPropertyElt { + iri, + subject, + id_attr, + objects, + .. + } => { + let mut current_node = Subject::from(rdf::NIL); + for object in objects.into_iter().rev() { + let subject = Subject::from(BlankNode::default()); + results.push(Triple::new(subject.clone(), rdf::FIRST, object)); + results.push(Triple::new(subject.clone(), rdf::REST, current_node)); + current_node = subject; + } + let triple = Triple::new(subject, iri, current_node); + if let Some(id_attr) = id_attr { + Self::reify(triple.clone(), id_attr, results); + } + results.push(triple); + } + RdfXmlState::ParseTypeLiteralPropertyElt { + iri, + subject, + id_attr, + writer, + emit, + .. + } => { + if emit { + let object = writer.into_inner(); + if object.is_empty() { + return Err(RdfXmlSyntaxError::msg(format!( + "No value found for rdf:XMLLiteral value of property {iri}" + ))); + } + let triple = Triple::new( + subject, + iri, + Literal::new_typed_literal( + str::from_utf8(&object).map_err(|_| { + RdfXmlSyntaxError::msg( + "The XML literal is not in valid UTF-8".to_owned(), + ) + })?, + rdf::XML_LITERAL, + ), + ); + if let Some(id_attr) = id_attr { + Self::reify(triple.clone(), id_attr, results); + } + results.push(triple); + } + } + RdfXmlState::NodeElt { subject, .. } => match self.state.last_mut() { + Some(RdfXmlState::PropertyElt { object, .. }) => { + *object = Some(NodeOrText::Node(subject)) + } + Some(RdfXmlState::ParseTypeCollectionPropertyElt { objects, .. }) => { + objects.push(subject) + } + _ => (), + }, + _ => (), + } + Ok(()) + } + + fn new_literal( + value: String, + language: Option<String>, + datatype: Option<NamedNode>, + ) -> Literal { + if let Some(datatype) = datatype { + Literal::new_typed_literal(value, datatype) + } else if let Some(language) = language { + Literal::new_language_tagged_literal_unchecked(value, language) + } else { + Literal::new_simple_literal(value) + } + } + + fn reify(triple: Triple, statement_id: NamedNode, results: &mut Vec<Triple>) { + results.push(Triple::new(statement_id.clone(), rdf::TYPE, rdf::STATEMENT)); + results.push(Triple::new( + statement_id.clone(), + rdf::SUBJECT, + triple.subject, + )); + results.push(Triple::new( + statement_id.clone(), + rdf::PREDICATE, + triple.predicate, + )); + results.push(Triple::new(statement_id, rdf::OBJECT, triple.object)); + } + + fn emit_property_attrs( + subject: &Subject, + literal_attributes: Vec<(NamedNode, String)>, + language: &Option<String>, + results: &mut Vec<Triple>, + ) { + for (literal_predicate, literal_value) in literal_attributes { + results.push(Triple::new( + subject.clone(), + literal_predicate, + if let Some(language) = language.clone() { + Literal::new_language_tagged_literal_unchecked(literal_value, language) + } else { + Literal::new_simple_literal(literal_value) + }, + )); + } + } + + fn convert_attribute(&self, attribute: &Attribute<'_>) -> Result<String, RdfXmlParseError> { + Ok(attribute + .decode_and_unescape_value_with(&self.reader, |e| self.resolve_entity(e))? + .into_owned()) + } + + fn convert_iri_attribute( + &self, + base_iri: &Option<Iri<String>>, + attribute: &Attribute<'_>, + ) -> Result<NamedNode, RdfXmlParseError> { + Ok(self.resolve_iri(base_iri, self.convert_attribute(attribute)?)?) + } + + fn resolve_iri( + &self, + base_iri: &Option<Iri<String>>, + relative_iri: String, + ) -> Result<NamedNode, RdfXmlSyntaxError> { + if let Some(base_iri) = base_iri { + Ok(NamedNode::new_unchecked( + if self.unchecked { + base_iri.resolve_unchecked(&relative_iri) + } else { + base_iri.resolve(&relative_iri).map_err(|error| { + RdfXmlSyntaxError(SyntaxErrorKind::InvalidIri { + iri: relative_iri, + error, + }) + })? + } + .into_inner(), + )) + } else { + self.parse_iri(relative_iri) + } + } + + fn parse_iri(&self, relative_iri: String) -> Result<NamedNode, RdfXmlSyntaxError> { + Ok(NamedNode::new_unchecked(if self.unchecked { + relative_iri + } else { + Iri::parse(relative_iri.clone()) + .map_err(|error| { + RdfXmlSyntaxError(SyntaxErrorKind::InvalidIri { + iri: relative_iri, + error, + }) + })? + .into_inner() + })) + } + + fn resolve_entity(&self, e: &str) -> Option<&str> { + self.custom_entities.get(e).map(String::as_str) + } +} + +fn is_nc_name(name: &str) -> bool { + // Name - (Char* ':' Char*) + is_name(name) && name.chars().all(|c| c != ':') +} + +fn is_name(name: &str) -> bool { + // NameStartChar (NameChar)* + let mut c = name.chars(); + if !c.next().map_or(false, is_name_start_char) { + return false; + } + c.all(is_name_char) +} + +fn is_whitespace(c: u8) -> bool { + matches!(c, b' ' | b'\t' | b'\n' | b'\r') +} + +fn is_utf8(encoding: &[u8]) -> bool { + matches!( + encoding.to_ascii_lowercase().as_slice(), + b"unicode-1-1-utf-8" + | b"unicode11utf8" + | b"unicode20utf8" + | b"utf-8" + | b"utf8" + | b"x-unicode20utf8" + ) +} diff --git a/ng-oxigraph/src/oxrdfxml/serializer.rs b/ng-oxigraph/src/oxrdfxml/serializer.rs new file mode 100644 index 0000000..f23e4f3 --- /dev/null +++ b/ng-oxigraph/src/oxrdfxml/serializer.rs @@ -0,0 +1,461 @@ +use crate::oxrdf::vocab::rdf; +use crate::oxrdf::{NamedNodeRef, Subject, SubjectRef, TermRef, TripleRef}; +use crate::oxrdfxml::utils::*; +use oxiri::{Iri, IriParseError}; +use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event}; +use quick_xml::Writer; +use std::borrow::Cow; +use std::collections::BTreeMap; +use std::io; +use std::io::Write; +use std::sync::Arc; +#[cfg(feature = "async-tokio")] +use tokio::io::AsyncWrite; + +/// A [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) serializer. +/// +/// ``` +/// use oxrdf::{LiteralRef, NamedNodeRef, TripleRef}; +/// use oxrdfxml::RdfXmlSerializer; +/// +/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_write(Vec::new()); +/// writer.write_triple(TripleRef::new( +/// NamedNodeRef::new("http://example.com#me")?, +/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, +/// NamedNodeRef::new("http://schema.org/Person")?, +/// ))?; +/// writer.write_triple(TripleRef::new( +/// NamedNodeRef::new("http://example.com#me")?, +/// NamedNodeRef::new("http://schema.org/name")?, +/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"), +/// ))?; +/// assert_eq!( +/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>", +/// writer.finish()?.as_slice() +/// ); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +#[derive(Default)] +#[must_use] +pub struct RdfXmlSerializer { + prefixes: BTreeMap<String, String>, +} + +impl RdfXmlSerializer { + /// Builds a new [`RdfXmlSerializer`]. + #[inline] + pub fn new() -> Self { + Self { + prefixes: BTreeMap::new(), + } + } + + #[inline] + pub fn with_prefix( + mut self, + prefix_name: impl Into<String>, + prefix_iri: impl Into<String>, + ) -> Result<Self, IriParseError> { + self.prefixes.insert( + Iri::parse(prefix_iri.into())?.into_inner(), + prefix_name.into(), + ); + Ok(self) + } + + /// Writes a RDF/XML file to a [`Write`] implementation. + /// + /// This writer does unbuffered writes. + /// + /// ``` + /// use oxrdf::{LiteralRef, NamedNodeRef, TripleRef}; + /// use oxrdfxml::RdfXmlSerializer; + /// + /// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_write(Vec::new()); + /// writer.write_triple(TripleRef::new( + /// NamedNodeRef::new("http://example.com#me")?, + /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, + /// NamedNodeRef::new("http://schema.org/Person")?, + /// ))?; + /// writer.write_triple(TripleRef::new( + /// NamedNodeRef::new("http://example.com#me")?, + /// NamedNodeRef::new("http://schema.org/name")?, + /// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"), + /// ))?; + /// assert_eq!( + /// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>", + /// writer.finish()?.as_slice() + /// ); + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + #[allow(clippy::unused_self)] + pub fn serialize_to_write<W: Write>(self, write: W) -> ToWriteRdfXmlWriter<W> { + ToWriteRdfXmlWriter { + writer: Writer::new_with_indent(write, b'\t', 1), + inner: self.inner_writer(), + } + } + + /// Writes a RDF/XML file to a [`AsyncWrite`] implementation. + /// + /// This writer does unbuffered writes. + /// + /// ``` + /// use oxrdf::{NamedNodeRef, TripleRef, LiteralRef}; + /// use oxrdfxml::RdfXmlSerializer; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> Result<(), Box<dyn std::error::Error>> { + /// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_tokio_async_write(Vec::new()); + /// writer.write_triple(TripleRef::new( + /// NamedNodeRef::new("http://example.com#me")?, + /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, + /// NamedNodeRef::new("http://schema.org/Person")?, + /// )).await?; + /// writer.write_triple(TripleRef::new( + /// NamedNodeRef::new("http://example.com#me")?, + /// NamedNodeRef::new("http://schema.org/name")?, + /// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"), + /// )).await?; + /// assert_eq!( + /// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>", + /// writer.finish().await?.as_slice() + /// ); + /// # Ok(()) + /// # } + /// ``` + #[allow(clippy::unused_self)] + #[cfg(feature = "async-tokio")] + pub fn serialize_to_tokio_async_write<W: AsyncWrite + Unpin>( + self, + write: W, + ) -> ToTokioAsyncWriteRdfXmlWriter<W> { + ToTokioAsyncWriteRdfXmlWriter { + writer: Writer::new_with_indent(write, b'\t', 1), + inner: self.inner_writer(), + } + } + + fn inner_writer(mut self) -> InnerRdfXmlWriter { + self.prefixes.insert( + "http://www.w3.org/1999/02/22-rdf-syntax-ns#".into(), + "rdf".into(), + ); + InnerRdfXmlWriter { + current_subject: None, + current_resource_tag: None, + prefixes: self.prefixes, + } + } +} + +/// Writes a RDF/XML file to a [`Write`] implementation. Can be built using [`RdfXmlSerializer::serialize_to_write`]. +/// +/// ``` +/// use oxrdf::{LiteralRef, NamedNodeRef, TripleRef}; +/// use oxrdfxml::RdfXmlSerializer; +/// +/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_write(Vec::new()); +/// writer.write_triple(TripleRef::new( +/// NamedNodeRef::new("http://example.com#me")?, +/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, +/// NamedNodeRef::new("http://schema.org/Person")?, +/// ))?; +/// writer.write_triple(TripleRef::new( +/// NamedNodeRef::new("http://example.com#me")?, +/// NamedNodeRef::new("http://schema.org/name")?, +/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"), +/// ))?; +/// assert_eq!( +/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>", +/// writer.finish()?.as_slice() +/// ); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +#[must_use] +pub struct ToWriteRdfXmlWriter<W: Write> { + writer: Writer<W>, + inner: InnerRdfXmlWriter, +} + +impl<W: Write> ToWriteRdfXmlWriter<W> { + /// Writes an extra triple. + #[allow(clippy::match_wildcard_for_single_variants, unreachable_patterns)] + pub fn write_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> { + let mut buffer = Vec::new(); + self.inner.write_triple(t, &mut buffer)?; + self.flush_buffer(&mut buffer) + } + + /// Ends the write process and returns the underlying [`Write`]. + pub fn finish(mut self) -> io::Result<W> { + let mut buffer = Vec::new(); + self.inner.finish(&mut buffer); + self.flush_buffer(&mut buffer)?; + Ok(self.writer.into_inner()) + } + + fn flush_buffer(&mut self, buffer: &mut Vec<Event<'_>>) -> io::Result<()> { + for event in buffer.drain(0..) { + self.writer.write_event(event).map_err(map_err)?; + } + Ok(()) + } +} + +/// Writes a RDF/XML file to a [`AsyncWrite`] implementation. Can be built using [`RdfXmlSerializer::serialize_to_tokio_async_write`]. +/// +/// ``` +/// use oxrdf::{NamedNodeRef, TripleRef, LiteralRef}; +/// use oxrdfxml::RdfXmlSerializer; +/// +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() -> Result<(), Box<dyn std::error::Error>> { +/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_tokio_async_write(Vec::new()); +/// writer.write_triple(TripleRef::new( +/// NamedNodeRef::new("http://example.com#me")?, +/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, +/// NamedNodeRef::new("http://schema.org/Person")?, +/// )).await?; +/// writer.write_triple(TripleRef::new( +/// NamedNodeRef::new("http://example.com#me")?, +/// NamedNodeRef::new("http://schema.org/name")?, +/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"), +/// )).await?; +/// assert_eq!( +/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>", +/// writer.finish().await?.as_slice() +/// ); +/// # Ok(()) +/// # } +/// ``` +#[cfg(feature = "async-tokio")] +#[must_use] +pub struct ToTokioAsyncWriteRdfXmlWriter<W: AsyncWrite + Unpin> { + writer: Writer<W>, + inner: InnerRdfXmlWriter, +} + +#[cfg(feature = "async-tokio")] +impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteRdfXmlWriter<W> { + /// Writes an extra triple. + #[allow(clippy::match_wildcard_for_single_variants, unreachable_patterns)] + pub async fn write_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> { + let mut buffer = Vec::new(); + self.inner.write_triple(t, &mut buffer)?; + self.flush_buffer(&mut buffer).await + } + + /// Ends the write process and returns the underlying [`Write`]. + pub async fn finish(mut self) -> io::Result<W> { + let mut buffer = Vec::new(); + self.inner.finish(&mut buffer); + self.flush_buffer(&mut buffer).await?; + Ok(self.writer.into_inner()) + } + + async fn flush_buffer(&mut self, buffer: &mut Vec<Event<'_>>) -> io::Result<()> { + for event in buffer.drain(0..) { + self.writer + .write_event_async(event) + .await + .map_err(map_err)?; + } + Ok(()) + } +} + +pub struct InnerRdfXmlWriter { + current_subject: Option<Subject>, + current_resource_tag: Option<String>, + prefixes: BTreeMap<String, String>, +} + +impl InnerRdfXmlWriter { + #[allow(clippy::match_wildcard_for_single_variants, unreachable_patterns)] + fn write_triple<'a>( + &mut self, + t: impl Into<TripleRef<'a>>, + output: &mut Vec<Event<'a>>, + ) -> io::Result<()> { + if self.current_subject.is_none() { + self.write_start(output); + } + + let triple = t.into(); + // We open a new rdf:Description if useful + if self.current_subject.as_ref().map(Subject::as_ref) != Some(triple.subject) { + if self.current_subject.is_some() { + output.push(Event::End( + self.current_resource_tag + .take() + .map_or_else(|| BytesEnd::new("rdf:Description"), BytesEnd::new), + )); + } + self.current_subject = Some(triple.subject.into_owned()); + + let (mut description_open, with_type_tag) = if triple.predicate == rdf::TYPE { + if let TermRef::NamedNode(t) = triple.object { + let (prop_qname, prop_xmlns) = self.uri_to_qname_and_xmlns(t); + let mut description_open = BytesStart::new(prop_qname.clone()); + if let Some(prop_xmlns) = prop_xmlns { + description_open.push_attribute(prop_xmlns); + } + self.current_resource_tag = Some(prop_qname.into_owned()); + (description_open, true) + } else { + (BytesStart::new("rdf:Description"), false) + } + } else { + (BytesStart::new("rdf:Description"), false) + }; + match triple.subject { + SubjectRef::NamedNode(node) => { + description_open.push_attribute(("rdf:about", node.as_str())) + } + SubjectRef::BlankNode(node) => { + description_open.push_attribute(("rdf:nodeID", node.as_str())) + } + _ => { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "RDF/XML only supports named or blank subject", + )) + } + } + output.push(Event::Start(description_open)); + if with_type_tag { + return Ok(()); // No need for a value + } + } + + let (prop_qname, prop_xmlns) = self.uri_to_qname_and_xmlns(triple.predicate); + let mut property_open = BytesStart::new(prop_qname.clone()); + if let Some(prop_xmlns) = prop_xmlns { + property_open.push_attribute(prop_xmlns); + } + let content = match triple.object { + TermRef::NamedNode(node) => { + property_open.push_attribute(("rdf:resource", node.as_str())); + None + } + TermRef::BlankNode(node) => { + property_open.push_attribute(("rdf:nodeID", node.as_str())); + None + } + TermRef::Literal(literal) => { + if let Some(language) = literal.language() { + property_open.push_attribute(("xml:lang", language)); + } else if !literal.is_plain() { + property_open.push_attribute(("rdf:datatype", literal.datatype().as_str())); + } + Some(literal.value()) + } + _ => { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "RDF/XML only supports named, blank or literal object", + )) + } + }; + if let Some(content) = content { + output.push(Event::Start(property_open)); + output.push(Event::Text(BytesText::new(content))); + output.push(Event::End(BytesEnd::new(prop_qname))); + } else { + output.push(Event::Empty(property_open)); + } + Ok(()) + } + + fn write_start(&self, output: &mut Vec<Event<'_>>) { + output.push(Event::Decl(BytesDecl::new("1.0", Some("UTF-8"), None))); + let mut rdf_open = BytesStart::new("rdf:RDF"); + for (prefix_value, prefix_name) in &self.prefixes { + rdf_open.push_attribute(( + format!("xmlns:{prefix_name}").as_str(), + prefix_value.as_str(), + )); + } + output.push(Event::Start(rdf_open)) + } + + fn finish(&mut self, output: &mut Vec<Event<'static>>) { + if self.current_subject.is_some() { + output.push(Event::End( + self.current_resource_tag + .take() + .map_or_else(|| BytesEnd::new("rdf:Description"), BytesEnd::new), + )); + } else { + self.write_start(output); + } + output.push(Event::End(BytesEnd::new("rdf:RDF"))); + } + + fn uri_to_qname_and_xmlns<'a>( + &self, + uri: NamedNodeRef<'a>, + ) -> (Cow<'a, str>, Option<(&'a str, &'a str)>) { + let (prop_prefix, prop_value) = split_iri(uri.as_str()); + if let Some(prop_prefix) = self.prefixes.get(prop_prefix) { + ( + if prop_prefix.is_empty() { + Cow::Borrowed(prop_value) + } else { + Cow::Owned(format!("{prop_prefix}:{prop_value}")) + }, + None, + ) + } else if prop_prefix == "http://www.w3.org/2000/xmlns/" { + (Cow::Owned(format!("xmlns:{prop_value}")), None) + } else if prop_value.is_empty() { + (Cow::Borrowed("p:"), Some(("xmlns:p", prop_prefix))) + } else { + (Cow::Borrowed(prop_value), Some(("xmlns", prop_prefix))) + } + } +} + +fn map_err(error: quick_xml::Error) -> io::Error { + if let quick_xml::Error::Io(error) = error { + Arc::try_unwrap(error).unwrap_or_else(|error| io::Error::new(error.kind(), error)) + } else { + io::Error::new(io::ErrorKind::Other, error) + } +} + +fn split_iri(iri: &str) -> (&str, &str) { + if let Some(position_base) = iri.rfind(|c| !is_name_char(c) || c == ':') { + if let Some(position_add) = iri[position_base..].find(|c| is_name_start_char(c) && c != ':') + { + ( + &iri[..position_base + position_add], + &iri[position_base + position_add..], + ) + } else { + (iri, "") + } + } else { + (iri, "") + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_split_iri() { + assert_eq!( + split_iri("http://schema.org/Person"), + ("http://schema.org/", "Person") + ); + assert_eq!(split_iri("http://schema.org/"), ("http://schema.org/", "")); + assert_eq!( + split_iri("http://schema.org#foo"), + ("http://schema.org#", "foo") + ); + assert_eq!(split_iri("urn:isbn:foo"), ("urn:isbn:", "foo")); + } +} diff --git a/ng-oxigraph/src/oxrdfxml/utils.rs b/ng-oxigraph/src/oxrdfxml/utils.rs new file mode 100644 index 0000000..0483488 --- /dev/null +++ b/ng-oxigraph/src/oxrdfxml/utils.rs @@ -0,0 +1,26 @@ +pub fn is_name_start_char(c: char) -> bool { + // ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] + matches!(c, + ':' + | 'A'..='Z' + | '_' + | 'a'..='z' + | '\u{00C0}'..='\u{00D6}' + | '\u{00D8}'..='\u{00F6}' + | '\u{00F8}'..='\u{02FF}' + | '\u{0370}'..='\u{037D}' + | '\u{037F}'..='\u{1FFF}' + | '\u{200C}'..='\u{200D}' + | '\u{2070}'..='\u{218F}' + | '\u{2C00}'..='\u{2FEF}' + | '\u{3001}'..='\u{D7FF}' + | '\u{F900}'..='\u{FDCF}' + | '\u{FDF0}'..='\u{FFFD}' + | '\u{10000}'..='\u{EFFFF}') +} + +pub fn is_name_char(c: char) -> bool { + // NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040] + is_name_start_char(c) + || matches!(c, '-' | '.' | '0'..='9' | '\u{B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}') +} diff --git a/ng-oxigraph/src/oxsdatatypes/README.md b/ng-oxigraph/src/oxsdatatypes/README.md new file mode 100644 index 0000000..1c3b2c3 --- /dev/null +++ b/ng-oxigraph/src/oxsdatatypes/README.md @@ -0,0 +1,65 @@ +oxsdatatypes +============ + +[](https://crates.io/crates/oxsdatatypes) +[](https://docs.rs/oxsdatatypes) +[](https://crates.io/crates/oxsdatatypes) +[](https://github.com/oxigraph/oxigraph/actions) +[](https://gitter.im/oxigraph/community) + +oxsdatatypes is an implementation of some [XML Schema Definition Language Datatypes](https://www.w3.org/TR/xmlschema11-2/). +Its main aim is to ease the implementation of SPARQL and XPath. + +Usage example: + +```rust +use std::str::FromStr; +use oxsdatatypes::Decimal; + +assert!(Decimal::from_str("22.2").unwrap() > Decimal::from_str("21").unwrap()); +``` + +Each datatype is represented by a Rust struct. + +Each datatype provides: +* `FromStr` implementation to parse a datatype string serialization following its [lexical mapping](https://www.w3.org/TR/xmlschema11-2/#dt-lexical-mapping). +* `Display` implementation to serialize a datatype following its [canonical mapping](https://www.w3.org/TR/xmlschema11-2/#dt-canonical-mapping). +* `is_identical_with` method following its [identity relation](https://www.w3.org/TR/xmlschema11-2/#identity). +* `PartialEq`, and `Eq` if possible, implementations following its [equality relation](https://www.w3.org/TR/xmlschema11-2/#equality). +* `PartialOrd`, and `Ord` if possible, implementations following its [order relation](https://www.w3.org/TR/xmlschema11-2/#order). +* `From` and `TryFrom` implementations to implement [XPath casting](https://www.w3.org/TR/xpath-functions-31/#casting). +* Various methods implementing [XPath functions](https://www.w3.org/TR/xpath-functions-31/). +* `from_be_bytes` and `to_be_bytes` methods for serialization. + + +### `DateTime::now` behavior + +The `DateTime::now()` function needs special OS support. +Currently: +- If the `custom-now` feature is enabled, a function computing `now` must be set: + ```rust + use oxsdatatypes::Duration; + + #[no_mangle] + fn custom_ox_now() -> Duration { + unimplemented!("now implementation") + } + ``` +- For `wasm32-unknown-unknown` if the `js` feature is enabled the `Date.now()` ECMAScript API is used. +- For all other targets `SystemTime::now()` is used. + +## License + +This project is licensed under either of + +* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or + `<http://www.apache.org/licenses/LICENSE-2.0>`) +* MIT license ([LICENSE-MIT](../LICENSE-MIT) or + `<http://opensource.org/licenses/MIT>`) + +at your option. + + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. diff --git a/ng-oxigraph/src/oxsdatatypes/boolean.rs b/ng-oxigraph/src/oxsdatatypes/boolean.rs new file mode 100644 index 0000000..94510d4 --- /dev/null +++ b/ng-oxigraph/src/oxsdatatypes/boolean.rs @@ -0,0 +1,134 @@ +use crate::oxsdatatypes::{Decimal, Double, Float, Integer}; +use serde::{Deserialize, Serialize}; +use std::fmt; +use std::str::{FromStr, ParseBoolError}; + +/// [XML Schema `boolean` datatype](https://www.w3.org/TR/xmlschema11-2/#boolean) +/// +/// Uses internally a [`bool`]. +#[derive( + Debug, Clone, Copy, Default, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize, +)] +#[repr(transparent)] +pub struct Boolean { + value: bool, +} + +impl Boolean { + /// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity). + #[inline] + #[must_use] + pub fn is_identical_with(self, other: Self) -> bool { + self == other + } +} + +impl From<bool> for Boolean { + #[inline] + fn from(value: bool) -> Self { + Self { value } + } +} + +impl From<Integer> for Boolean { + #[inline] + fn from(value: Integer) -> Self { + (value != Integer::from(0)).into() + } +} + +impl From<Decimal> for Boolean { + #[inline] + fn from(value: Decimal) -> Self { + (value != Decimal::from(0)).into() + } +} + +impl From<Float> for Boolean { + #[inline] + fn from(value: Float) -> Self { + (value != Float::from(0.) && !value.is_nan()).into() + } +} + +impl From<Double> for Boolean { + #[inline] + fn from(value: Double) -> Self { + (value != Double::from(0.) && !value.is_nan()).into() + } +} + +impl From<Boolean> for bool { + #[inline] + fn from(value: Boolean) -> Self { + value.value + } +} + +impl FromStr for Boolean { + type Err = ParseBoolError; + + #[inline] + fn from_str(input: &str) -> Result<Self, Self::Err> { + Ok(match input { + "true" | "1" => true, + "false" | "0" => false, + _ => bool::from_str(input)?, + } + .into()) + } +} + +impl fmt::Display for Boolean { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.value.fmt(f) + } +} + +#[cfg(test)] +#[allow(clippy::panic_in_result_fn)] +mod tests { + use super::*; + + #[test] + fn from_str() -> Result<(), ParseBoolError> { + assert_eq!(Boolean::from_str("true")?.to_string(), "true"); + assert_eq!(Boolean::from_str("1")?.to_string(), "true"); + assert_eq!(Boolean::from_str("false")?.to_string(), "false"); + assert_eq!(Boolean::from_str("0")?.to_string(), "false"); + Ok(()) + } + + #[test] + fn from_integer() { + assert_eq!(Boolean::from(false), Integer::from(0).into()); + assert_eq!(Boolean::from(true), Integer::from(1).into()); + assert_eq!(Boolean::from(true), Integer::from(2).into()); + } + + #[test] + fn from_decimal() { + assert_eq!(Boolean::from(false), Decimal::from(0).into()); + assert_eq!(Boolean::from(true), Decimal::from(1).into()); + assert_eq!(Boolean::from(true), Decimal::from(2).into()); + } + + #[test] + fn from_float() { + assert_eq!(Boolean::from(false), Float::from(0.).into()); + assert_eq!(Boolean::from(true), Float::from(1.).into()); + assert_eq!(Boolean::from(true), Float::from(2.).into()); + assert_eq!(Boolean::from(false), Float::from(f32::NAN).into()); + assert_eq!(Boolean::from(true), Float::from(f32::INFINITY).into()); + } + + #[test] + fn from_double() { + assert_eq!(Boolean::from(false), Double::from(0.).into()); + assert_eq!(Boolean::from(true), Double::from(1.).into()); + assert_eq!(Boolean::from(true), Double::from(2.).into()); + assert_eq!(Boolean::from(false), Double::from(f64::NAN).into()); + assert_eq!(Boolean::from(true), Double::from(f64::INFINITY).into()); + } +} diff --git a/ng-oxigraph/src/oxsdatatypes/date_time.rs b/ng-oxigraph/src/oxsdatatypes/date_time.rs new file mode 100644 index 0000000..95aad7a --- /dev/null +++ b/ng-oxigraph/src/oxsdatatypes/date_time.rs @@ -0,0 +1,3187 @@ +#![allow(clippy::expect_used)] + +use crate::oxsdatatypes::{DayTimeDuration, Decimal, Duration, YearMonthDuration}; +use serde::{Deserialize, Serialize}; +use std::cmp::{min, Ordering}; +use std::fmt; +use std::hash::{Hash, Hasher}; +use std::str::FromStr; + +/// [XML Schema `dateTime` datatype](https://www.w3.org/TR/xmlschema11-2/#dateTime) +/// +/// It encodes the value using a number of seconds from the Gregorian calendar era using a [`Decimal`] +/// and an optional timezone offset in minutes. +#[derive(Eq, PartialEq, PartialOrd, Debug, Clone, Copy, Hash, Serialize, Deserialize)] +pub struct DateTime { + timestamp: Timestamp, +} + +impl DateTime { + pub const MAX: Self = Self { + timestamp: Timestamp::MAX, + }; + pub const MIN: Self = Self { + timestamp: Timestamp::MIN, + }; + + #[inline] + pub(super) fn new( + year: i64, + month: u8, + day: u8, + hour: u8, + minute: u8, + second: Decimal, + timezone_offset: Option<TimezoneOffset>, + ) -> Result<Self, DateTimeOverflowError> { + Ok(Self { + timestamp: Timestamp::new(&DateTimeSevenPropertyModel { + year: Some(year), + month: Some(month), + day: Some(day), + hour: Some(hour), + minute: Some(minute), + second: Some(second), + timezone_offset, + })?, + }) + } + + /// [fn:current-dateTime](https://www.w3.org/TR/xpath-functions-31/#func-current-dateTime) + #[inline] + pub fn now() -> Self { + Self { + timestamp: Timestamp::now(), + } + } + + #[inline] + #[must_use] + pub fn from_be_bytes(bytes: [u8; 18]) -> Self { + Self { + timestamp: Timestamp::from_be_bytes(bytes), + } + } + + /// [fn:year-from-dateTime](https://www.w3.org/TR/xpath-functions-31/#func-year-from-dateTime) + #[inline] + #[must_use] + pub fn year(self) -> i64 { + self.timestamp.year() + } + + /// [fn:month-from-dateTime](https://www.w3.org/TR/xpath-functions-31/#func-month-from-dateTime) + #[inline] + #[must_use] + pub fn month(self) -> u8 { + self.timestamp.month() + } + + /// [fn:day-from-dateTime](https://www.w3.org/TR/xpath-functions-31/#func-day-from-dateTime) + #[inline] + #[must_use] + pub fn day(self) -> u8 { + self.timestamp.day() + } + + /// [fn:hour-from-dateTime](https://www.w3.org/TR/xpath-functions-31/#func-hours-from-dateTime) + #[inline] + #[must_use] + pub fn hour(self) -> u8 { + self.timestamp.hour() + } + + /// [fn:minute-from-dateTime](https://www.w3.org/TR/xpath-functions-31/#func-minutes-from-dateTime) + #[inline] + #[must_use] + pub fn minute(self) -> u8 { + self.timestamp.minute() + } + + /// [fn:second-from-dateTime](https://www.w3.org/TR/xpath-functions-31/#func-seconds-from-dateTime) + #[inline] + #[must_use] + pub fn second(self) -> Decimal { + self.timestamp.second() + } + + /// [fn:timezone-from-dateTime](https://www.w3.org/TR/xpath-functions-31/#func-timezone-from-dateTime) + #[inline] + #[must_use] + pub fn timezone(self) -> Option<DayTimeDuration> { + Some(self.timezone_offset()?.into()) + } + + #[inline] + #[must_use] + pub fn timezone_offset(self) -> Option<TimezoneOffset> { + self.timestamp.timezone_offset() + } + + #[inline] + fn properties(self) -> DateTimeSevenPropertyModel { + DateTimeSevenPropertyModel { + year: Some(self.year()), + month: Some(self.month()), + day: Some(self.day()), + hour: Some(self.hour()), + minute: Some(self.minute()), + second: Some(self.second()), + timezone_offset: self.timezone_offset(), + } + } + + #[inline] + #[must_use] + pub fn to_be_bytes(self) -> [u8; 18] { + self.timestamp.to_be_bytes() + } + + /// [op:subtract-dateTimes](https://www.w3.org/TR/xpath-functions-31/#func-subtract-dateTimes) + /// + /// Returns `None` in case of overflow ([`FODT0001`](https://www.w3.org/TR/xpath-functions-31/#ERRFODT0001)). + #[inline] + #[must_use] + pub fn checked_sub(self, rhs: impl Into<Self>) -> Option<DayTimeDuration> { + self.timestamp.checked_sub(rhs.into().timestamp) + } + + /// [op:add-yearMonthDuration-to-dateTime](https://www.w3.org/TR/xpath-functions-31/#func-add-yearMonthDuration-to-dateTime) + /// + /// Returns `None` in case of overflow ([`FODT0001`](https://www.w3.org/TR/xpath-functions-31/#ERRFODT0001)). + #[inline] + #[must_use] + pub fn checked_add_year_month_duration( + self, + rhs: impl Into<YearMonthDuration>, + ) -> Option<Self> { + self.checked_add_duration(Duration::from(rhs.into())) + } + + /// [op:add-dayTimeDuration-to-dateTime](https://www.w3.org/TR/xpath-functions-31/#func-add-dayTimeDuration-to-dateTime) + /// + /// Returns `None` in case of overflow ([`FODT0001`](https://www.w3.org/TR/xpath-functions-31/#ERRFODT0001)). + #[inline] + #[must_use] + pub fn checked_add_day_time_duration(self, rhs: impl Into<Duration>) -> Option<Self> { + let rhs = rhs.into(); + Some(Self { + timestamp: self.timestamp.checked_add_seconds(rhs.all_seconds())?, + }) + } + + /// [op:add-yearMonthDuration-to-dateTime](https://www.w3.org/TR/xpath-functions-31/#func-add-yearMonthDuration-to-dateTime) and [op:add-dayTimeDuration-to-dateTime](https://www.w3.org/TR/xpath-functions-31/#func-add-dayTimeDuration-to-dateTime) + /// + /// Returns `None` in case of overflow ([`FODT0001`](https://www.w3.org/TR/xpath-functions-31/#ERRFODT0001)). + #[inline] + #[must_use] + pub fn checked_add_duration(self, rhs: impl Into<Duration>) -> Option<Self> { + let rhs = rhs.into(); + if let Ok(rhs) = DayTimeDuration::try_from(rhs) { + self.checked_add_day_time_duration(rhs) + } else { + Some(Self { + timestamp: Timestamp::new(&date_time_plus_duration(rhs, &self.properties())?) + .ok()?, + }) + } + } + + /// [op:subtract-yearMonthDuration-from-dateTime](https://www.w3.org/TR/xpath-functions-31/#func-subtract-yearMonthDuration-from-dateTime) + /// + /// Returns `None` in case of overflow ([`FODT0001`](https://www.w3.org/TR/xpath-functions-31/#ERRFODT0001)). + #[inline] + #[must_use] + pub fn checked_sub_year_month_duration( + self, + rhs: impl Into<YearMonthDuration>, + ) -> Option<Self> { + self.checked_sub_duration(Duration::from(rhs.into())) + } + + /// [op:subtract-dayTimeDuration-from-dateTime](https://www.w3.org/TR/xpath-functions-31/#func-subtract-dayTimeDuration-from-dateTime) + /// + /// Returns `None` in case of overflow ([`FODT0001`](https://www.w3.org/TR/xpath-functions-31/#ERRFODT0001)). + #[inline] + #[must_use] + pub fn checked_sub_day_time_duration(self, rhs: impl Into<DayTimeDuration>) -> Option<Self> { + let rhs = rhs.into(); + Some(Self { + timestamp: self.timestamp.checked_sub_seconds(rhs.as_seconds())?, + }) + } + + /// [op:subtract-yearMonthDuration-from-dateTime](https://www.w3.org/TR/xpath-functions-31/#func-subtract-yearMonthDuration-from-dateTime) and [op:subtract-dayTimeDuration-from-dateTime](https://www.w3.org/TR/xpath-functions-31/#func-subtract-dayTimeDuration-from-dateTime) + /// + /// Returns `None` in case of overflow ([`FODT0001`](https://www.w3.org/TR/xpath-functions-31/#ERRFODT0001)). + #[inline] + #[must_use] + pub fn checked_sub_duration(self, rhs: impl Into<Duration>) -> Option<Self> { + let rhs = rhs.into(); + if let Ok(rhs) = DayTimeDuration::try_from(rhs) { + self.checked_sub_day_time_duration(rhs) + } else { + Some(Self { + timestamp: Timestamp::new(&date_time_plus_duration( + rhs.checked_neg()?, + &self.properties(), + )?) + .ok()?, + }) + } + } + + /// [fn:adjust-dateTime-to-timezone](https://www.w3.org/TR/xpath-functions-31/#func-adjust-dateTime-to-timezone) + /// + /// Returns `None` in case of overflow ([`FODT0001`](https://www.w3.org/TR/xpath-functions-31/#ERRFODT0001)). + #[inline] + #[must_use] + pub fn adjust(self, timezone_offset: Option<TimezoneOffset>) -> Option<Self> { + Some(Self { + timestamp: self.timestamp.adjust(timezone_offset)?, + }) + } + + /// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity). + #[inline] + #[must_use] + pub fn is_identical_with(self, other: Self) -> bool { + self.timestamp.is_identical_with(other.timestamp) + } +} + +/// Conversion according to [XPath cast rules](https://www.w3.org/TR/xpath-functions-31/#casting-to-datetimes). +impl TryFrom<Date> for DateTime { + type Error = DateTimeOverflowError; + + #[inline] + fn try_from(date: Date) -> Result<Self, Self::Error> { + Self::new( + date.year(), + date.month(), + date.day(), + 0, + 0, + Decimal::default(), + date.timezone_offset(), + ) + } +} + +impl FromStr for DateTime { + type Err = ParseDateTimeError; + + fn from_str(input: &str) -> Result<Self, Self::Err> { + ensure_complete(input, date_time_lexical_rep) + } +} + +impl fmt::Display for DateTime { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let year = self.year(); + if year < 0 { + f.write_str("-")?; + } + let second = self.second(); + write!( + f, + "{:04}-{:02}-{:02}T{:02}:{:02}:{}{}", + year.abs(), + self.month(), + self.day(), + self.hour(), + self.minute(), + if Decimal::from(-10) < second && second < Decimal::from(10) { + "0" + } else { + "" + }, + second + )?; + if let Some(timezone_offset) = self.timezone_offset() { + write!(f, "{timezone_offset}")?; + } + Ok(()) + } +} + +/// [XML Schema `time` datatype](https://www.w3.org/TR/xmlschema11-2/#time) +/// +/// It encodes the value using a number of seconds from the Gregorian calendar era using a [`Decimal`], +/// when combined with the date 1972-12-31, and an optional timezone offset in minutes. +#[derive(Eq, PartialEq, PartialOrd, Debug, Clone, Copy, Hash)] +pub struct Time { + timestamp: Timestamp, +} + +impl Time { + #[cfg(test)] + const MAX: Self = Self { + timestamp: Timestamp { + value: Decimal::new_from_i128_unchecked(62_230_255_200), + timezone_offset: Some(TimezoneOffset::MIN), + }, + }; + #[cfg(test)] + const MIN: Self = Self { + timestamp: Timestamp { + value: Decimal::new_from_i128_unchecked(62_230_154_400), + timezone_offset: Some(TimezoneOffset::MAX), + }, + }; + + #[inline] + fn new( + mut hour: u8, + minute: u8, + second: Decimal, + timezone_offset: Option<TimezoneOffset>, + ) -> Result<Self, DateTimeOverflowError> { + if hour == 24 && minute == 0 && second == Decimal::default() { + hour = 0; + } + Ok(Self { + timestamp: Timestamp::new(&DateTimeSevenPropertyModel { + year: None, + month: None, + day: None, + hour: Some(hour), + minute: Some(minute), + second: Some(second), + timezone_offset, + })?, + }) + } + + #[inline] + #[must_use] + pub fn from_be_bytes(bytes: [u8; 18]) -> Self { + Self { + timestamp: Timestamp::from_be_bytes(bytes), + } + } + + /// [fn:current-time](https://www.w3.org/TR/xpath-functions-31/#func-current-time) + #[inline] + pub fn now() -> Self { + Self { + timestamp: Timestamp::now(), + } + } + + /// [fn:hour-from-time](https://www.w3.org/TR/xpath-functions-31/#func-hours-from-time) + #[inline] + #[must_use] + pub fn hour(self) -> u8 { + self.timestamp.hour() + } + + /// [fn:minute-from-time](https://www.w3.org/TR/xpath-functions-31/#func-minutes-from-time) + #[inline] + #[must_use] + pub fn minute(self) -> u8 { + self.timestamp.minute() + } + + /// [fn:second-from-time](https://www.w3.org/TR/xpath-functions-31/#func-seconds-from-time) + #[inline] + #[must_use] + pub fn second(self) -> Decimal { + self.timestamp.second() + } + + /// [fn:timezone-from-time](https://www.w3.org/TR/xpath-functions-31/#func-timezone-from-time) + #[inline] + #[must_use] + pub fn timezone(self) -> Option<DayTimeDuration> { + Some(self.timezone_offset()?.into()) + } + + #[inline] + #[must_use] + pub fn timezone_offset(self) -> Option<TimezoneOffset> { + self.timestamp.timezone_offset() + } + + #[inline] + #[must_use] + pub fn to_be_bytes(self) -> [u8; 18] { + self.timestamp.to_be_bytes() + } + + /// [op:subtract-times](https://www.w3.org/TR/xpath-functions-31/#func-subtract-times) + /// + /// Returns `None` in case of overflow ([`FODT0001`](https://www.w3.org/TR/xpath-functions-31/#ERRFODT0001)). + #[inline] + #[must_use] + pub fn checked_sub(self, rhs: impl Into<Self>) -> Option<DayTimeDuration> { + self.timestamp.checked_sub(rhs.into().timestamp) + } + + /// [op:add-dayTimeDuration-to-time](https://www.w3.org/TR/xpath-functions-31/#func-add-dayTimeDuration-to-time) + /// + /// Returns `None` in case of overflow ([`FODT0001`](https://www.w3.org/TR/xpath-functions-31/#ERRFODT0001)). + #[inline] + #[must_use] + pub fn checked_add_day_time_duration(self, rhs: impl Into<DayTimeDuration>) -> Option<Self> { + self.checked_add_duration(Duration::from(rhs.into())) + } + + /// [op:add-dayTimeDuration-to-time](https://www.w3.org/TR/xpath-functions-31/#func-add-dayTimeDuration-to-time) + /// + /// Returns `None` in case of overflow ([`FODT0001`](https://www.w3.org/TR/xpath-functions-31/#ERRFODT0001)). + #[inline] + #[must_use] + pub fn checked_add_duration(self, rhs: impl Into<Duration>) -> Option<Self> { + Some( + DateTime::new( + 1972, + 12, + 31, + self.hour(), + self.minute(), + self.second(), + self.timezone_offset(), + ) + .ok()? + .checked_add_duration(rhs)? + .into(), + ) + } + + /// [op:subtract-dayTimeDuration-from-time](https://www.w3.org/TR/xpath-functions-31/#func-subtract-dayTimeDuration-from-time) + /// + /// Returns `None` in case of overflow ([`FODT0001`](https://www.w3.org/TR/xpath-functions-31/#ERRFODT0001)). + #[inline] + #[must_use] + pub fn checked_sub_day_time_duration(self, rhs: impl Into<DayTimeDuration>) -> Option<Self> { + self.checked_sub_duration(Duration::from(rhs.into())) + } + + /// [op:subtract-dayTimeDuration-from-time](https://www.w3.org/TR/xpath-functions-31/#func-subtract-dayTimeDuration-from-time) + /// + /// Returns `None` in case of overflow ([`FODT0001`](https://www.w3.org/TR/xpath-functions-31/#ERRFODT0001)). + #[inline] + #[must_use] + pub fn checked_sub_duration(self, rhs: impl Into<Duration>) -> Option<Self> { + Some( + DateTime::new( + 1972, + 12, + 31, + self.hour(), + self.minute(), + self.second(), + self.timezone_offset(), + ) + .ok()? + .checked_sub_duration(rhs)? + .into(), + ) + } + + // [fn:adjust-time-to-timezone](https://www.w3.org/TR/xpath-functions-31/#func-adjust-time-to-timezone) + #[inline] + #[must_use] + pub fn adjust(self, timezone_offset: Option<TimezoneOffset>) -> Option<Self> { + Some( + DateTime::new( + 1972, + 12, + 31, + self.hour(), + self.minute(), + self.second(), + self.timezone_offset(), + ) + .ok()? + .adjust(timezone_offset)? + .into(), + ) + } + + /// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity). + #[inline] + #[must_use] + pub fn is_identical_with(self, other: Self) -> bool { + self.timestamp.is_identical_with(other.timestamp) + } +} + +/// Conversion according to [XPath cast rules](https://www.w3.org/TR/xpath-functions-31/#casting-to-datetimes). +impl From<DateTime> for Time { + #[inline] + fn from(date_time: DateTime) -> Self { + Self::new( + date_time.hour(), + date_time.minute(), + date_time.second(), + date_time.timezone_offset(), + ) + .expect("Casting from xsd:dateTime to xsd:date can't fail") + } +} + +impl FromStr for Time { + type Err = ParseDateTimeError; + + fn from_str(input: &str) -> Result<Self, Self::Err> { + ensure_complete(input, time_lexical_rep) + } +} + +impl fmt::Display for Time { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let second = self.second(); + write!( + f, + "{:02}:{:02}:{}{}", + self.hour(), + self.minute(), + if Decimal::from(-10) < second && second < Decimal::from(10) { + "0" + } else { + "" + }, + second + )?; + if let Some(timezone_offset) = self.timezone_offset() { + write!(f, "{timezone_offset}")?; + } + Ok(()) + } +} + +/// [XML Schema `date` datatype](https://www.w3.org/TR/xmlschema11-2/#date) +/// +/// It encodes the value using a number of seconds from the Gregorian calendar era using a [`Decimal`], +/// when combined with the time 00:00:00, and an optional timezone offset in minutes. +#[derive(Eq, PartialEq, PartialOrd, Debug, Clone, Copy, Hash)] +pub struct Date { + timestamp: Timestamp, +} + +impl Date { + pub const MAX: Self = Self { + timestamp: Timestamp { + value: Decimal::new_from_i128_unchecked(170_141_183_460_469_216_800), + timezone_offset: Some(TimezoneOffset::MAX), + }, + }; + pub const MIN: Self = Self { + timestamp: Timestamp { + value: Decimal::new_from_i128_unchecked(-170_141_183_460_469_216_800), + timezone_offset: Some(TimezoneOffset::MIN), + }, + }; + + #[inline] + fn new( + year: i64, + month: u8, + day: u8, + timezone_offset: Option<TimezoneOffset>, + ) -> Result<Self, DateTimeOverflowError> { + Ok(Self { + timestamp: Timestamp::new(&DateTimeSevenPropertyModel { + year: Some(year), + month: Some(month), + day: Some(day), + hour: None, + minute: None, + second: None, + timezone_offset, + })?, + }) + } + + #[inline] + #[must_use] + pub fn from_be_bytes(bytes: [u8; 18]) -> Self { + Self { + timestamp: Timestamp::from_be_bytes(bytes), + } + } + + /// [fn:current-date](https://www.w3.org/TR/xpath-functions-31/#func-current-date) + #[inline] + pub fn now() -> Self { + DateTime::now() + .try_into() + .expect("The current time seems way in the future, it's strange") + } + + /// [fn:year-from-date](https://www.w3.org/TR/xpath-functions-31/#func-year-from-date) + #[inline] + #[must_use] + pub fn year(self) -> i64 { + self.timestamp.year() + } + + /// [fn:month-from-date](https://www.w3.org/TR/xpath-functions-31/#func-month-from-date) + #[inline] + #[must_use] + pub fn month(self) -> u8 { + self.timestamp.month() + } + + /// [fn:day-from-date](https://www.w3.org/TR/xpath-functions-31/#func-day-from-date) + #[inline] + #[must_use] + pub fn day(self) -> u8 { + self.timestamp.day() + } + + /// [fn:timezone-from-date](https://www.w3.org/TR/xpath-functions-31/#func-timezone-from-date) + #[inline] + #[must_use] + pub fn timezone(self) -> Option<DayTimeDuration> { + Some(self.timezone_offset()?.into()) + } + + #[inline] + #[must_use] + pub fn timezone_offset(self) -> Option<TimezoneOffset> { + self.timestamp.timezone_offset() + } + + #[inline] + #[must_use] + pub fn to_be_bytes(self) -> [u8; 18] { + self.timestamp.to_be_bytes() + } + + /// [op:subtract-dates](https://www.w3.org/TR/xpath-functions-31/#func-subtract-dates) + /// + /// Returns `None` in case of overflow ([`FODT0001`](https://www.w3.org/TR/xpath-functions-31/#ERRFODT0001)). + #[inline] + #[must_use] + pub fn checked_sub(self, rhs: impl Into<Self>) -> Option<DayTimeDuration> { + self.timestamp.checked_sub(rhs.into().timestamp) + } + + /// [op:add-yearMonthDuration-to-date](https://www.w3.org/TR/xpath-functions-31/#func-add-yearMonthDuration-to-date) + /// + /// Returns `None` in case of overflow ([`FODT0001`](https://www.w3.org/TR/xpath-functions-31/#ERRFODT0001)). + #[inline] + #[must_use] + pub fn checked_add_year_month_duration( + self, + rhs: impl Into<YearMonthDuration>, + ) -> Option<Self> { + self.checked_add_duration(Duration::from(rhs.into())) + } + + /// [op:add-dayTimeDuration-to-dateTime](https://www.w3.org/TR/xpath-functions-31/#func-add-dayTimeDuration-to-date) + /// + /// Returns `None` in case of overflow ([`FODT0001`](https://www.w3.org/TR/xpath-functions-31/#ERRFODT0001)). + #[inline] + #[must_use] + pub fn checked_add_day_time_duration(self, rhs: impl Into<DayTimeDuration>) -> Option<Self> { + self.checked_add_duration(Duration::from(rhs.into())) + } + + /// [op:add-yearMonthDuration-to-date](https://www.w3.org/TR/xpath-functions-31/#func-add-yearMonthDuration-to-date) and [op:add-dayTimeDuration-to-dateTime](https://www.w3.org/TR/xpath-functions-31/#func-add-dayTimeDuration-to-date) + /// + /// Returns `None` in case of overflow ([`FODT0001`](https://www.w3.org/TR/xpath-functions-31/#ERRFODT0001)). + #[inline] + #[must_use] + pub fn checked_add_duration(self, rhs: impl Into<Duration>) -> Option<Self> { + DateTime::try_from(self) + .ok()? + .checked_add_duration(rhs)? + .try_into() + .ok() + } + + /// [op:subtract-yearMonthDuration-from-date](https://www.w3.org/TR/xpath-functions-31/#func-subtract-yearMonthDuration-from-date) + /// + /// Returns `None` in case of overflow ([`FODT0001`](https://www.w3.org/TR/xpath-functions-31/#ERRFODT0001)). + #[inline] + #[must_use] + pub fn checked_sub_year_month_duration( + self, + rhs: impl Into<YearMonthDuration>, + ) -> Option<Self> { + self.checked_sub_duration(Duration::from(rhs.into())) + } + + /// [op:subtract-dayTimeDuration-from-date](https://www.w3.org/TR/xpath-functions-31/#func-subtract-dayTimeDuration-from-date) + /// + /// Returns `None` in case of overflow ([`FODT0001`](https://www.w3.org/TR/xpath-functions-31/#ERRFODT0001)). + #[inline] + #[must_use] + pub fn checked_sub_day_time_duration(self, rhs: impl Into<DayTimeDuration>) -> Option<Self> { + self.checked_sub_duration(Duration::from(rhs.into())) + } + + /// [op:subtract-yearMonthDuration-from-date](https://www.w3.org/TR/xpath-functions-31/#func-subtract-yearMonthDuration-from-date) and [op:subtract-dayTimeDuration-from-date](https://www.w3.org/TR/xpath-functions-31/#func-subtract-dayTimeDuration-from-date) + #[inline] + #[must_use] + pub fn checked_sub_duration(self, rhs: impl Into<Duration>) -> Option<Self> { + DateTime::try_from(self) + .ok()? + .checked_sub_duration(rhs)? + .try_into() + .ok() + } + + // [fn:adjust-date-to-timezone](https://www.w3.org/TR/xpath-functions-31/#func-adjust-date-to-timezone) + #[inline] + #[must_use] + pub fn adjust(self, timezone_offset: Option<TimezoneOffset>) -> Option<Self> { + DateTime::new( + self.year(), + self.month(), + self.day(), + 0, + 0, + Decimal::default(), + self.timezone_offset(), + ) + .ok()? + .adjust(timezone_offset)? + .try_into() + .ok() + } + + /// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity). + #[inline] + #[must_use] + pub fn is_identical_with(self, other: Self) -> bool { + self.timestamp.is_identical_with(other.timestamp) + } +} + +/// Conversion according to [XPath cast rules](https://www.w3.org/TR/xpath-functions-31/#casting-to-datetimes). +impl TryFrom<DateTime> for Date { + type Error = DateTimeOverflowError; + + #[inline] + fn try_from(date_time: DateTime) -> Result<Self, Self::Error> { + Self::new( + date_time.year(), + date_time.month(), + date_time.day(), + date_time.timezone_offset(), + ) + } +} + +impl FromStr for Date { + type Err = ParseDateTimeError; + + fn from_str(input: &str) -> Result<Self, Self::Err> { + ensure_complete(input, date_lexical_rep) + } +} + +impl fmt::Display for Date { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let year = self.year(); + if year < 0 { + f.write_str("-")?; + } + write!(f, "{:04}-{:02}-{:02}", year.abs(), self.month(), self.day())?; + if let Some(timezone_offset) = self.timezone_offset() { + write!(f, "{timezone_offset}")?; + } + Ok(()) + } +} + +/// [XML Schema `gYearMonth` datatype](https://www.w3.org/TR/xmlschema11-2/#gYearMonth) +/// +/// It encodes the value using a number of seconds from the Gregorian calendar era using a [`Decimal`], +/// when combined with the day-time 31T00:00:00, and an optional timezone offset in minutes. +#[derive(Eq, PartialEq, PartialOrd, Debug, Clone, Copy, Hash)] +pub struct GYearMonth { + timestamp: Timestamp, +} + +impl GYearMonth { + pub const MAX: Self = Self { + timestamp: Timestamp { + value: Decimal::new_from_i128_unchecked(170_141_183_460_469_216_800), + timezone_offset: Some(TimezoneOffset::MAX), + }, + }; + pub const MIN: Self = Self { + timestamp: Timestamp { + value: Decimal::new_from_i128_unchecked(-170_141_183_460_466_970_400), + timezone_offset: Some(TimezoneOffset::MIN), + }, + }; + + #[inline] + fn new( + year: i64, + month: u8, + timezone_offset: Option<TimezoneOffset>, + ) -> Result<Self, DateTimeOverflowError> { + Ok(Self { + timestamp: Timestamp::new(&DateTimeSevenPropertyModel { + year: Some(year), + month: Some(month), + day: None, + hour: None, + minute: None, + second: None, + timezone_offset, + })?, + }) + } + + #[inline] + #[must_use] + pub fn from_be_bytes(bytes: [u8; 18]) -> Self { + Self { + timestamp: Timestamp::from_be_bytes(bytes), + } + } + + #[inline] + #[must_use] + pub fn year(self) -> i64 { + self.timestamp.year() + } + + #[inline] + #[must_use] + pub fn month(self) -> u8 { + self.timestamp.month() + } + + #[inline] + #[must_use] + pub fn timezone(self) -> Option<DayTimeDuration> { + Some(self.timezone_offset()?.into()) + } + + #[inline] + #[must_use] + pub fn timezone_offset(self) -> Option<TimezoneOffset> { + self.timestamp.timezone_offset() + } + + #[inline] + #[must_use] + pub fn adjust(self, timezone_offset: Option<TimezoneOffset>) -> Option<Self> { + Some(Self { + timestamp: self.timestamp.adjust(timezone_offset)?, + }) + } + + #[inline] + #[must_use] + pub fn to_be_bytes(self) -> [u8; 18] { + self.timestamp.to_be_bytes() + } + + /// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity). + #[inline] + #[must_use] + pub fn is_identical_with(self, other: Self) -> bool { + self.timestamp.is_identical_with(other.timestamp) + } +} + +/// Conversion according to [XPath cast rules](https://www.w3.org/TR/xpath-functions-31/#casting-to-datetimes). +impl TryFrom<DateTime> for GYearMonth { + type Error = DateTimeOverflowError; + + #[inline] + fn try_from(date_time: DateTime) -> Result<Self, Self::Error> { + Self::new( + date_time.year(), + date_time.month(), + date_time.timezone_offset(), + ) + } +} + +/// Conversion according to [XPath cast rules](https://www.w3.org/TR/xpath-functions-31/#casting-to-datetimes). +impl From<Date> for GYearMonth { + #[inline] + fn from(date: Date) -> Self { + Self::new(date.year(), date.month(), date.timezone_offset()) + .expect("Casting from xsd:date to xsd:gYearMonth can't fail") + } +} + +impl FromStr for GYearMonth { + type Err = ParseDateTimeError; + + fn from_str(input: &str) -> Result<Self, Self::Err> { + ensure_complete(input, g_year_month_lexical_rep) + } +} + +impl fmt::Display for GYearMonth { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let year = self.year(); + if year < 0 { + f.write_str("-")?; + } + write!(f, "{:04}-{:02}", year.abs(), self.month())?; + if let Some(timezone_offset) = self.timezone_offset() { + write!(f, "{timezone_offset}")?; + } + Ok(()) + } +} + +/// [XML Schema `gYear` datatype](https://www.w3.org/TR/xmlschema11-2/#gYear) +/// +/// It encodes the value using a number of seconds from the Gregorian calendar era using a [`Decimal`], +/// when combined with the month-day-time 12-31T00:00:00, and an optional timezone offset in minutes. +#[derive(Eq, PartialEq, PartialOrd, Debug, Clone, Copy, Hash)] +pub struct GYear { + timestamp: Timestamp, +} + +impl GYear { + pub const MAX: Self = Self { + timestamp: Timestamp { + value: Decimal::new_from_i128_unchecked(170_141_183_460_461_440_800), + timezone_offset: Some(TimezoneOffset::MAX), + }, + }; + pub const MIN: Self = Self { + timestamp: Timestamp { + value: Decimal::new_from_i128_unchecked(-170_141_183_460_461_700_000), + timezone_offset: Some(TimezoneOffset::MIN), + }, + }; + + #[inline] + fn new( + year: i64, + timezone_offset: Option<TimezoneOffset>, + ) -> Result<Self, DateTimeOverflowError> { + Ok(Self { + timestamp: Timestamp::new(&DateTimeSevenPropertyModel { + year: Some(year), + month: None, + day: None, + hour: None, + minute: None, + second: None, + timezone_offset, + })?, + }) + } + + #[inline] + #[must_use] + pub fn from_be_bytes(bytes: [u8; 18]) -> Self { + Self { + timestamp: Timestamp::from_be_bytes(bytes), + } + } + + #[inline] + #[must_use] + pub fn year(self) -> i64 { + self.timestamp.year() + } + + #[inline] + #[must_use] + pub fn timezone(self) -> Option<DayTimeDuration> { + Some(self.timezone_offset()?.into()) + } + + #[inline] + #[must_use] + pub fn timezone_offset(self) -> Option<TimezoneOffset> { + self.timestamp.timezone_offset() + } + + #[inline] + #[must_use] + pub fn adjust(self, timezone_offset: Option<TimezoneOffset>) -> Option<Self> { + Some(Self { + timestamp: self.timestamp.adjust(timezone_offset)?, + }) + } + + #[inline] + #[must_use] + pub fn to_be_bytes(self) -> [u8; 18] { + self.timestamp.to_be_bytes() + } + + /// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity). + #[inline] + #[must_use] + pub fn is_identical_with(self, other: Self) -> bool { + self.timestamp.is_identical_with(other.timestamp) + } +} + +/// Conversion according to [XPath cast rules](https://www.w3.org/TR/xpath-functions-31/#casting-to-datetimes). +impl TryFrom<DateTime> for GYear { + type Error = DateTimeOverflowError; + + #[inline] + fn try_from(date_time: DateTime) -> Result<Self, Self::Error> { + Self::new(date_time.year(), date_time.timezone_offset()) + } +} + +/// Conversion according to [XPath cast rules](https://www.w3.org/TR/xpath-functions-31/#casting-to-datetimes). +impl TryFrom<Date> for GYear { + type Error = DateTimeOverflowError; + + #[inline] + fn try_from(date: Date) -> Result<Self, Self::Error> { + Self::new(date.year(), date.timezone_offset()) + } +} + +impl TryFrom<GYearMonth> for GYear { + type Error = DateTimeOverflowError; + + #[inline] + fn try_from(year_month: GYearMonth) -> Result<Self, Self::Error> { + Self::new(year_month.year(), year_month.timezone_offset()) + } +} + +impl FromStr for GYear { + type Err = ParseDateTimeError; + + fn from_str(input: &str) -> Result<Self, Self::Err> { + ensure_complete(input, g_year_lexical_rep) + } +} + +impl fmt::Display for GYear { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let year = self.year(); + if year < 0 { + f.write_str("-")?; + } + write!(f, "{:04}", year.abs())?; + if let Some(timezone_offset) = self.timezone_offset() { + write!(f, "{timezone_offset}")?; + } + Ok(()) + } +} + +/// [XML Schema `gMonthDay` datatype](https://www.w3.org/TR/xmlschema11-2/#gMonthDay) +/// +/// It encodes the value using a number of seconds from the Gregorian calendar era using a [`Decimal`], +/// when combined with the year 1972 and the time 31T00:00:00, and an optional timezone offset in minutes. +#[derive(Eq, PartialEq, PartialOrd, Debug, Clone, Copy, Hash)] +pub struct GMonthDay { + timestamp: Timestamp, +} + +impl GMonthDay { + #[inline] + fn new( + month: u8, + day: u8, + timezone_offset: Option<TimezoneOffset>, + ) -> Result<Self, DateTimeOverflowError> { + Ok(Self { + timestamp: Timestamp::new(&DateTimeSevenPropertyModel { + year: None, + month: Some(month), + day: Some(day), + hour: None, + minute: None, + second: None, + timezone_offset, + })?, + }) + } + + #[inline] + #[must_use] + pub fn from_be_bytes(bytes: [u8; 18]) -> Self { + Self { + timestamp: Timestamp::from_be_bytes(bytes), + } + } + + #[inline] + #[must_use] + pub fn month(&self) -> u8 { + self.timestamp.month() + } + + #[inline] + #[must_use] + pub fn day(&self) -> u8 { + self.timestamp.day() + } + + #[inline] + #[must_use] + pub fn timezone(&self) -> Option<DayTimeDuration> { + Some(self.timezone_offset()?.into()) + } + + #[inline] + #[must_use] + pub fn timezone_offset(&self) -> Option<TimezoneOffset> { + self.timestamp.timezone_offset() + } + + #[inline] + #[must_use] + pub fn adjust(&self, timezone_offset: Option<TimezoneOffset>) -> Option<Self> { + Some(Self { + timestamp: self.timestamp.adjust(timezone_offset)?, + }) + } + + #[inline] + #[must_use] + pub fn to_be_bytes(self) -> [u8; 18] { + self.timestamp.to_be_bytes() + } + + /// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity). + #[inline] + #[must_use] + pub fn is_identical_with(self, other: Self) -> bool { + self.timestamp.is_identical_with(other.timestamp) + } +} + +/// Conversion according to [XPath cast rules](https://www.w3.org/TR/xpath-functions-31/#casting-to-datetimes). +impl From<DateTime> for GMonthDay { + #[inline] + fn from(date_time: DateTime) -> Self { + Self::new( + date_time.month(), + date_time.day(), + date_time.timezone_offset(), + ) + .expect("Casting from xsd:dateTime to xsd:gMonthDay can't fail") + } +} + +/// Conversion according to [XPath cast rules](https://www.w3.org/TR/xpath-functions-31/#casting-to-datetimes). +impl From<Date> for GMonthDay { + #[inline] + fn from(date: Date) -> Self { + Self::new(date.month(), date.day(), date.timezone_offset()) + .expect("Casting from xsd:date to xsd:gMonthDay can't fail") + } +} + +impl FromStr for GMonthDay { + type Err = ParseDateTimeError; + + fn from_str(input: &str) -> Result<Self, Self::Err> { + ensure_complete(input, g_month_day_lexical_rep) + } +} + +impl fmt::Display for GMonthDay { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "--{:02}-{:02}", self.month(), self.day())?; + if let Some(timezone_offset) = self.timezone_offset() { + write!(f, "{timezone_offset}")?; + } + Ok(()) + } +} + +/// [XML Schema `gMonth` datatype](https://www.w3.org/TR/xmlschema11-2/#gMonth) +/// +/// It encodes the value using a number of seconds from the Gregorian calendar era using a [`Decimal`], +/// when combined with the year 1972 and the day-time 31T00:00:00, and an optional timezone offset in minutes. +#[derive(Eq, PartialEq, PartialOrd, Debug, Clone, Copy, Hash)] +pub struct GMonth { + timestamp: Timestamp, +} + +impl GMonth { + #[inline] + fn new( + month: u8, + timezone_offset: Option<TimezoneOffset>, + ) -> Result<Self, DateTimeOverflowError> { + Ok(Self { + timestamp: Timestamp::new(&DateTimeSevenPropertyModel { + year: None, + month: Some(month), + day: None, + hour: None, + minute: None, + second: None, + timezone_offset, + })?, + }) + } + + #[inline] + #[must_use] + pub fn from_be_bytes(bytes: [u8; 18]) -> Self { + Self { + timestamp: Timestamp::from_be_bytes(bytes), + } + } + + #[inline] + #[must_use] + pub fn month(&self) -> u8 { + self.timestamp.month() + } + + #[inline] + #[must_use] + pub fn timezone(&self) -> Option<DayTimeDuration> { + Some(self.timezone_offset()?.into()) + } + + #[inline] + #[must_use] + pub fn timezone_offset(&self) -> Option<TimezoneOffset> { + self.timestamp.timezone_offset() + } + + #[inline] + #[must_use] + pub fn adjust(&self, timezone_offset: Option<TimezoneOffset>) -> Option<Self> { + Some(Self { + timestamp: self.timestamp.adjust(timezone_offset)?, + }) + } + + #[inline] + #[must_use] + pub fn to_be_bytes(self) -> [u8; 18] { + self.timestamp.to_be_bytes() + } + + /// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity). + #[inline] + #[must_use] + pub fn is_identical_with(self, other: Self) -> bool { + self.timestamp.is_identical_with(other.timestamp) + } +} + +/// Conversion according to [XPath cast rules](https://www.w3.org/TR/xpath-functions-31/#casting-to-datetimes). +impl From<DateTime> for GMonth { + #[inline] + fn from(date_time: DateTime) -> Self { + Self::new(date_time.month(), date_time.timezone_offset()) + .expect("Casting from xsd:dateTime to xsd:gMonth can't fail") + } +} + +/// Conversion according to [XPath cast rules](https://www.w3.org/TR/xpath-functions-31/#casting-to-datetimes). +impl From<Date> for GMonth { + #[inline] + fn from(date: Date) -> Self { + Self::new(date.month(), date.timezone_offset()) + .expect("Casting from xsd:date to xsd:gMonth can't fail") + } +} + +impl From<GYearMonth> for GMonth { + #[inline] + fn from(year_month: GYearMonth) -> Self { + Self::new(year_month.month(), year_month.timezone_offset()) + .expect("Casting from xsd:gYearMonth to xsd:gMonth can't fail") + } +} + +impl From<GMonthDay> for GMonth { + #[inline] + fn from(month_day: GMonthDay) -> Self { + Self::new(month_day.month(), month_day.timezone_offset()) + .expect("Casting from xsd:gMonthDay to xsd:gMonth can't fail") + } +} + +impl FromStr for GMonth { + type Err = ParseDateTimeError; + + fn from_str(input: &str) -> Result<Self, Self::Err> { + ensure_complete(input, g_month_lexical_rep) + } +} + +impl fmt::Display for GMonth { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "--{:02}", self.month())?; + if let Some(timezone_offset) = self.timezone_offset() { + write!(f, "{timezone_offset}")?; + } + Ok(()) + } +} + +/// [XML Schema `date` datatype](https://www.w3.org/TR/xmlschema11-2/#date) +/// +/// It encodes the value using a number of seconds from the Gregorian calendar era using a [`Decimal`], +/// when combined with the year-month 1972-12 and the 00:00:00, and an optional timezone offset in minutes. +#[derive(Eq, PartialEq, PartialOrd, Debug, Clone, Copy, Hash)] +pub struct GDay { + timestamp: Timestamp, +} + +impl GDay { + #[inline] + fn new( + day: u8, + timezone_offset: Option<TimezoneOffset>, + ) -> Result<Self, DateTimeOverflowError> { + Ok(Self { + timestamp: Timestamp::new(&DateTimeSevenPropertyModel { + year: None, + month: None, + day: Some(day), + hour: None, + minute: None, + second: None, + timezone_offset, + })?, + }) + } + + #[inline] + #[must_use] + pub fn from_be_bytes(bytes: [u8; 18]) -> Self { + Self { + timestamp: Timestamp::from_be_bytes(bytes), + } + } + + #[inline] + #[must_use] + pub fn day(&self) -> u8 { + self.timestamp.day() + } + + #[inline] + #[must_use] + pub fn timezone(&self) -> Option<DayTimeDuration> { + Some(self.timezone_offset()?.into()) + } + + #[inline] + #[must_use] + pub fn timezone_offset(&self) -> Option<TimezoneOffset> { + self.timestamp.timezone_offset() + } + + #[inline] + #[must_use] + pub fn adjust(&self, timezone_offset: Option<TimezoneOffset>) -> Option<Self> { + Some(Self { + timestamp: self.timestamp.adjust(timezone_offset)?, + }) + } + + #[inline] + #[must_use] + pub fn to_be_bytes(self) -> [u8; 18] { + self.timestamp.to_be_bytes() + } + + /// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity). + #[inline] + #[must_use] + pub fn is_identical_with(self, other: Self) -> bool { + self.timestamp.is_identical_with(other.timestamp) + } +} + +/// Conversion according to [XPath cast rules](https://www.w3.org/TR/xpath-functions-31/#casting-to-datetimes). +impl From<DateTime> for GDay { + #[inline] + fn from(date_time: DateTime) -> Self { + Self::new(date_time.day(), date_time.timezone_offset()) + .expect("Casting from xsd:dateTime to xsd:gDay can't fail") + } +} + +/// Conversion according to [XPath cast rules](https://www.w3.org/TR/xpath-functions-31/#casting-to-datetimes). +impl From<Date> for GDay { + #[inline] + fn from(date: Date) -> Self { + Self::new(date.day(), date.timezone_offset()) + .expect("Casting from xsd:date to xsd:gDay can't fail") + } +} + +impl From<GMonthDay> for GDay { + #[inline] + fn from(month_day: GMonthDay) -> Self { + Self::new(month_day.day(), month_day.timezone_offset()) + .expect("Casting from xsd:gMonthDay to xsd:gDay can't fail") + } +} + +impl FromStr for GDay { + type Err = ParseDateTimeError; + + fn from_str(input: &str) -> Result<Self, Self::Err> { + ensure_complete(input, g_day_lexical_rep) + } +} + +impl fmt::Display for GDay { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "---{:02}", self.day())?; + if let Some(timezone_offset) = self.timezone_offset() { + write!(f, "{timezone_offset}")?; + } + Ok(()) + } +} + +/// A timezone offset with respect to UTC. +/// +/// It is encoded as a number of minutes between -PT14H and PT14H. +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash, Serialize, Deserialize)] +pub struct TimezoneOffset { + offset: i16, // in minute with respect to UTC +} + +impl TimezoneOffset { + pub const MAX: Self = Self { offset: 14 * 60 }; + pub const MIN: Self = Self { offset: -14 * 60 }; + pub const UTC: Self = Self { offset: 0 }; + + /// From offset in minute with respect to UTC + #[inline] + pub fn new(offset_in_minutes: i16) -> Result<Self, InvalidTimezoneError> { + let value = Self { + offset: offset_in_minutes, + }; + if Self::MIN <= value && value <= Self::MAX { + Ok(value) + } else { + Err(InvalidTimezoneError { + offset_in_minutes: offset_in_minutes.into(), + }) + } + } + + #[inline] + #[must_use] + pub fn from_be_bytes(bytes: [u8; 2]) -> Self { + Self { + offset: i16::from_be_bytes(bytes), + } + } + + #[inline] + #[must_use] + pub fn to_be_bytes(self) -> [u8; 2] { + self.offset.to_be_bytes() + } +} + +impl TryFrom<DayTimeDuration> for TimezoneOffset { + type Error = InvalidTimezoneError; + + #[inline] + fn try_from(value: DayTimeDuration) -> Result<Self, Self::Error> { + let offset_in_minutes = value.minutes() + value.hours() * 60; + let result = Self::new( + offset_in_minutes + .try_into() + .map_err(|_| Self::Error { offset_in_minutes })?, + )?; + if DayTimeDuration::from(result) == value { + Ok(result) + } else { + // The value is not an integral number of minutes or overflow problems + Err(Self::Error { offset_in_minutes }) + } + } +} + +impl TryFrom<Duration> for TimezoneOffset { + type Error = InvalidTimezoneError; + + #[inline] + fn try_from(value: Duration) -> Result<Self, Self::Error> { + DayTimeDuration::try_from(value) + .map_err(|_| Self::Error { + offset_in_minutes: 0, + })? + .try_into() + } +} + +impl From<TimezoneOffset> for DayTimeDuration { + #[inline] + fn from(value: TimezoneOffset) -> Self { + Self::new(i64::from(value.offset) * 60) + } +} + +impl From<TimezoneOffset> for Duration { + #[inline] + fn from(value: TimezoneOffset) -> Self { + DayTimeDuration::from(value).into() + } +} + +impl fmt::Display for TimezoneOffset { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.offset { + 0 => f.write_str("Z"), + offset if offset < 0 => write!(f, "-{:02}:{:02}", -offset / 60, -offset % 60), + offset => write!(f, "+{:02}:{:02}", offset / 60, offset % 60), + } + } +} + +/// [The Date/time Seven-property model](https://www.w3.org/TR/xmlschema11-2/#dt-dt-7PropMod) +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +struct DateTimeSevenPropertyModel { + year: Option<i64>, + month: Option<u8>, + day: Option<u8>, + hour: Option<u8>, + minute: Option<u8>, + second: Option<Decimal>, + timezone_offset: Option<TimezoneOffset>, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +struct Timestamp { + value: Decimal, + timezone_offset: Option<TimezoneOffset>, +} + +impl PartialEq for Timestamp { + #[inline] + fn eq(&self, other: &Self) -> bool { + match (self.timezone_offset, other.timezone_offset) { + (Some(_), Some(_)) | (None, None) => self.value.eq(&other.value), + _ => false, // TODO: implicit timezone + } + } +} + +impl Eq for Timestamp {} + +impl PartialOrd for Timestamp { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + match (self.timezone_offset, other.timezone_offset) { + (Some(_), Some(_)) | (None, None) => self.value.partial_cmp(&other.value), + (Some(_), None) => { + let plus_result = self + .value + .partial_cmp(&(other.value.checked_add(14 * 3600)?)); + let minus_result = self + .value + .partial_cmp(&(other.value.checked_sub(14 * 3600)?)); + if plus_result == minus_result { + plus_result + } else { + None + } + } + (None, Some(_)) => { + let plus_result = self.value.checked_add(14 * 3600)?.partial_cmp(&other.value); + let minus_result = self.value.checked_sub(14 * 3600)?.partial_cmp(&other.value); + if plus_result == minus_result { + plus_result + } else { + None + } + } + } + } +} + +impl Hash for Timestamp { + #[inline] + fn hash<H: Hasher>(&self, state: &mut H) { + self.value.hash(state) + } +} + +impl Timestamp { + pub const MAX: Self = Self { + value: Decimal::MAX, + timezone_offset: Some(TimezoneOffset::MAX), + }; + pub const MIN: Self = Self { + value: Decimal::MIN, + timezone_offset: Some(TimezoneOffset::MIN), + }; + + #[inline] + fn new(props: &DateTimeSevenPropertyModel) -> Result<Self, DateTimeOverflowError> { + Ok(Self { + timezone_offset: props.timezone_offset, + value: time_on_timeline(props).ok_or(DateTimeOverflowError)?, + }) + } + + #[inline] + fn now() -> Self { + Self::new( + &date_time_plus_duration( + since_unix_epoch(), + &DateTimeSevenPropertyModel { + year: Some(1970), + month: Some(1), + day: Some(1), + hour: Some(0), + minute: Some(0), + second: Some(Decimal::default()), + timezone_offset: Some(TimezoneOffset::UTC), + }, + ) + .expect("The current time seems way in the future, it's strange"), + ) + .expect("The current time seems way in the future, it's strange") + } + + #[inline] + fn from_be_bytes(bytes: [u8; 18]) -> Self { + Self { + value: Decimal::from_be_bytes(bytes[0..16].try_into().unwrap()), + timezone_offset: if bytes[16..18] == [u8::MAX; 2] { + None + } else { + Some(TimezoneOffset::from_be_bytes( + bytes[16..18].try_into().unwrap(), + )) + }, + } + } + + #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] + #[inline] + #[must_use] + fn year_month_day(&self) -> (i64, u8, u8) { + let mut days = (self.value.as_i128() + + i128::from(self.timezone_offset.unwrap_or(TimezoneOffset::UTC).offset) * 60) + .div_euclid(86400) + + 366; + + // Make days positive + let shift = if days < 0 { + let shift = days / 146_097 - 1; + days -= shift * 146_097; + shift * 400 + } else { + 0 + }; + + let year_mul_400 = days / 146_097; + days -= year_mul_400 * 146_097; + + days -= 1; + let year_mul_100 = days / 36524; + days -= year_mul_100 * 36524; + + days += 1; + let year_mul_4 = days / 1461; + days -= year_mul_4 * 1461; + + days -= 1; + let year_mod_4 = days / 365; + days -= year_mod_4 * 365; + + let year = + (400 * year_mul_400 + 100 * year_mul_100 + 4 * year_mul_4 + year_mod_4 + shift) as i64; + + let is_leap_year = (year_mul_100 == 0 || year_mul_4 != 0) && year_mod_4 == 0; + days += i128::from(is_leap_year); + + let mut month = 0; + for month_i in 1..=12 { + let days_in_month = i128::from(days_in_month(Some(year), month_i)); + if days_in_month > days { + month = month_i; + break; + } + days -= days_in_month + } + let day = days as u8 + 1; + + (year, month, day) + } + + #[inline] + #[must_use] + fn year(&self) -> i64 { + let (year, _, _) = self.year_month_day(); + year + } + + #[inline] + #[must_use] + fn month(&self) -> u8 { + let (_, month, _) = self.year_month_day(); + month + } + + #[inline] + #[must_use] + fn day(&self) -> u8 { + let (_, _, day) = self.year_month_day(); + day + } + + #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] + #[inline] + #[must_use] + fn hour(&self) -> u8 { + (((self.value.as_i128() + + i128::from(self.timezone_offset.unwrap_or(TimezoneOffset::UTC).offset) * 60) + .rem_euclid(86400)) + / 3600) as u8 + } + + #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] + #[inline] + #[must_use] + fn minute(&self) -> u8 { + (((self.value.as_i128() + + i128::from(self.timezone_offset.unwrap_or(TimezoneOffset::UTC).offset) * 60) + .rem_euclid(3600)) + / 60) as u8 + } + + #[inline] + #[must_use] + fn second(&self) -> Decimal { + self.value + .checked_rem_euclid(60) + .unwrap() + .checked_abs() + .unwrap() + } + + #[inline] + #[must_use] + const fn timezone_offset(&self) -> Option<TimezoneOffset> { + self.timezone_offset + } + + #[inline] + #[must_use] + fn checked_add_seconds(&self, seconds: impl Into<Decimal>) -> Option<Self> { + Some(Self { + value: self.value.checked_add(seconds.into())?, + timezone_offset: self.timezone_offset, + }) + } + + #[inline] + #[must_use] + fn checked_sub(&self, rhs: Self) -> Option<DayTimeDuration> { + match (self.timezone_offset, rhs.timezone_offset) { + (Some(_), Some(_)) | (None, None) => { + Some(DayTimeDuration::new(self.value.checked_sub(rhs.value)?)) + } + _ => None, // TODO: implicit timezone + } + } + + #[inline] + #[must_use] + fn checked_sub_seconds(&self, seconds: Decimal) -> Option<Self> { + Some(Self { + value: self.value.checked_sub(seconds)?, + timezone_offset: self.timezone_offset, + }) + } + + #[inline] + #[must_use] + fn adjust(&self, timezone_offset: Option<TimezoneOffset>) -> Option<Self> { + Some(if let Some(from_timezone) = self.timezone_offset { + if let Some(to_timezone) = timezone_offset { + Self { + value: self.value, // We keep the timestamp + timezone_offset: Some(to_timezone), + } + } else { + Self { + value: self + .value + .checked_add(i64::from(from_timezone.offset) * 60)?, /* We keep the literal value */ + timezone_offset: None, + } + } + } else if let Some(to_timezone) = timezone_offset { + Self { + value: self.value.checked_sub(i64::from(to_timezone.offset) * 60)?, /* We keep the literal value */ + timezone_offset: Some(to_timezone), + } + } else { + Self { + value: self.value, + timezone_offset: None, + } + }) + } + + #[inline] + #[must_use] + fn to_be_bytes(self) -> [u8; 18] { + let mut bytes = [0; 18]; + bytes[0..16].copy_from_slice(&self.value.to_be_bytes()); + bytes[16..18].copy_from_slice(&match &self.timezone_offset { + Some(timezone_offset) => timezone_offset.to_be_bytes(), + None => [u8::MAX; 2], + }); + bytes + } + + /// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity). + #[inline] + #[must_use] + pub fn is_identical_with(self, other: Self) -> bool { + self.value == other.value && self.timezone_offset == other.timezone_offset + } +} + +#[cfg(feature = "custom-now")] +#[allow(unsafe_code)] +pub fn since_unix_epoch() -> Duration { + extern "Rust" { + fn custom_ox_now() -> Duration; + } + + // SAFETY: Must be defined, if not compilation fails + unsafe { custom_ox_now() } +} + +#[cfg(all( + feature = "js", + not(feature = "custom-now"), + target_family = "wasm", + target_os = "unknown" +))] +fn since_unix_epoch() -> Duration { + DayTimeDuration::new( + Decimal::try_from(crate::oxsdatatypes::Double::from( + js_sys::Date::now() / 1000., + )) + .expect("The current time seems way in the future, it's strange"), + ) + .into() +} + +#[cfg(not(any( + feature = "custom-now", + all(feature = "js", target_family = "wasm", target_os = "unknown") +)))] +fn since_unix_epoch() -> Duration { + use std::time::SystemTime; + + SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .expect("System time before UNIX epoch") + .try_into() + .expect("The current time seems way in the future, it's strange") +} + +/// The [normalizeMonth](https://www.w3.org/TR/xmlschema11-2/#f-dt-normMo) function +fn normalize_month(yr: i64, mo: i64) -> Option<(i64, u8)> { + if mo >= 0 { + let yr = yr.checked_add(mo.checked_sub(1)?.checked_div(12)?)?; + let mo = u8::try_from(mo.checked_sub(1)?.checked_rem(12)?.abs().checked_add(1)?).ok()?; + Some((yr, mo)) + } else { + // Needed to make it work with negative durations + let yr = yr.checked_add(mo.checked_sub(1)?.checked_div(12)?.checked_sub(1)?)?; + let mo = u8::try_from( + 12_i64 + .checked_add(mo.checked_sub(1)?.checked_rem(12)?)? + .checked_add(1)?, + ) + .ok()?; + Some((yr, mo)) + } +} + +/// The [normalizeDa](https://www.w3.org/TR/xmlschema11-2/#f-dt-normDa) function +fn normalize_day(yr: i64, mo: i64, mut da: i64) -> Option<(i64, u8, u8)> { + let (mut yr, mut mo) = normalize_month(yr, mo)?; + loop { + if da <= 0 { + let (yr2, mo2) = normalize_month(yr, i64::from(mo).checked_sub(1)?)?; + yr = yr2; + mo = mo2; + da = da.checked_add(days_in_month(Some(yr), mo).into())?; + } else if da > days_in_month(Some(yr), mo).into() { + da = da.checked_sub(days_in_month(Some(yr), mo).into())?; + let (yr2, mo2) = normalize_month(yr, i64::from(mo).checked_add(1)?)?; + yr = yr2; + mo = mo2; + } else { + return Some((yr, mo, u8::try_from(da).ok()?)); + }; + } +} + +/// The [normalizeMinute](https://www.w3.org/TR/xmlschema11-2/#f-dt-normMi) function +fn normalize_minute(yr: i64, mo: i64, da: i64, hr: i64, mi: i64) -> Option<(i64, u8, u8, u8, u8)> { + let hr = hr.checked_add(mi.checked_div(60)?)?; + let mi = mi.checked_rem(60)?; + let da = da.checked_add(hr.checked_div(24)?)?; + let hr = hr.checked_rem(24)?; + let (yr, mo, da) = normalize_day(yr, mo, da)?; + Some((yr, mo, da, u8::try_from(hr).ok()?, u8::try_from(mi).ok()?)) +} + +/// The [normalizeSecond](https://www.w3.org/TR/xmlschema11-2/#f-dt-normSe) function +fn normalize_second( + yr: i64, + mo: i64, + da: i64, + hr: i64, + mi: i64, + se: Decimal, +) -> Option<(i64, u8, u8, u8, u8, Decimal)> { + let mi = mi.checked_add(i64::try_from(se.as_i128().checked_div(60)?).ok()?)?; // TODO: good idea? + let se = se.checked_rem(60)?; + let (yr, mo, da, hr, mi) = normalize_minute(yr, mo, da, hr, mi)?; + Some((yr, mo, da, hr, mi, se)) +} + +/// The [daysInMonth](https://www.w3.org/TR/xmlschema11-2/#f-daysInMonth) function +fn days_in_month(y: Option<i64>, m: u8) -> u8 { + match m { + 2 => { + if let Some(y) = y { + if y % 4 != 0 || (y % 100 == 0 && y % 400 != 0) { + 28 + } else { + 29 + } + } else { + 28 + } + } + 4 | 6 | 9 | 11 => 30, + _ => 31, + } +} + +/// The [dateTimePlusDuration](https://www.w3.org/TR/xmlschema11-2/#vp-dt-dateTimePlusDuration) function +fn date_time_plus_duration( + du: Duration, + dt: &DateTimeSevenPropertyModel, +) -> Option<DateTimeSevenPropertyModel> { + let yr = dt.year.unwrap_or(1); + let mo = dt.month.unwrap_or(1); + let da = dt.day.unwrap_or(1); + let hr = dt.hour.unwrap_or(0); + let mi = dt.minute.unwrap_or(0); + let se = dt.second.unwrap_or_default(); + let mo = i64::from(mo).checked_add(du.all_months())?; + let (yr, mo) = normalize_month(yr, mo)?; + let da = min(da, days_in_month(Some(yr), mo)); + let se = se.checked_add(du.all_seconds())?; + let (yr, mo, da, hr, mi, se) = + normalize_second(yr, mo.into(), da.into(), hr.into(), mi.into(), se)?; + + Some(DateTimeSevenPropertyModel { + year: dt.year.map(|_| yr), + month: dt.month.map(|_| mo), + day: dt.day.map(|_| da), + hour: dt.hour.map(|_| hr), + minute: dt.minute.map(|_| mi), + second: dt.second.map(|_| se), + timezone_offset: dt.timezone_offset, + }) +} + +/// The [timeOnTimeline](https://www.w3.org/TR/xmlschema11-2/#vp-dt-timeOnTimeline) function +fn time_on_timeline(props: &DateTimeSevenPropertyModel) -> Option<Decimal> { + let yr = props.year.map_or(1971, |y| y - 1); + let mo = props.month.unwrap_or(12); + let da = props + .day + .map_or_else(|| days_in_month(Some(yr + 1), mo) - 1, |d| d - 1); + let hr = props.hour.unwrap_or(0); + let mi = i128::from(props.minute.unwrap_or(0)) + - i128::from(props.timezone_offset.unwrap_or(TimezoneOffset::UTC).offset); + let se = props.second.unwrap_or_default(); + + Decimal::try_from( + 31_536_000 * i128::from(yr) + + 86400 * i128::from(yr.div_euclid(400) - yr.div_euclid(100) + yr.div_euclid(4)) + + 86400 + * (1..mo) + .map(|m| i128::from(days_in_month(Some(yr + 1), m))) + .sum::<i128>() + + 86400 * i128::from(da) + + 3600 * i128::from(hr) + + 60 * mi, + ) + .ok()? + .checked_add(se) +} + +/// A parsing error +#[derive(Debug, thiserror::Error)] +#[error(transparent)] +pub struct ParseDateTimeError(#[from] ParseDateTimeErrorKind); + +#[derive(Debug, Clone, thiserror::Error)] +enum ParseDateTimeErrorKind { + #[error("{day} is not a valid day of {month}")] + InvalidDayOfMonth { day: u8, month: u8 }, + #[error(transparent)] + Overflow(#[from] DateTimeOverflowError), + #[error(transparent)] + InvalidTimezone(InvalidTimezoneError), + #[error("{0}")] + Message(&'static str), +} + +impl ParseDateTimeError { + const fn msg(message: &'static str) -> Self { + Self(ParseDateTimeErrorKind::Message(message)) + } +} + +// [16] dateTimeLexicalRep ::= yearFrag '-' monthFrag '-' dayFrag 'T' ((hourFrag ':' minuteFrag ':' secondFrag) | endOfDayFrag) timezoneFrag? +fn date_time_lexical_rep(input: &str) -> Result<(DateTime, &str), ParseDateTimeError> { + let (year, input) = year_frag(input)?; + let input = expect_char(input, '-', "The year and month must be separated by '-'")?; + let (month, input) = month_frag(input)?; + let input = expect_char(input, '-', "The month and day must be separated by '-'")?; + let (day, input) = day_frag(input)?; + let input = expect_char(input, 'T', "The date and time must be separated by 'T'")?; + let (hour, input) = hour_frag(input)?; + let input = expect_char(input, ':', "The hours and minutes must be separated by ':'")?; + let (minute, input) = minute_frag(input)?; + let input = expect_char( + input, + ':', + "The minutes and seconds must be separated by ':'", + )?; + let (second, input) = second_frag(input)?; + // We validate 24:00:00 + if hour == 24 && minute != 0 && second != Decimal::from(0) { + return Err(ParseDateTimeError::msg( + "Times are not allowed to be after 24:00:00", + )); + } + let (timezone_offset, input) = optional_end(input, timezone_frag)?; + validate_day_of_month(Some(year), month, day)?; + Ok(( + DateTime::new(year, month, day, hour, minute, second, timezone_offset)?, + input, + )) +} + +// [17] timeLexicalRep ::= ((hourFrag ':' minuteFrag ':' secondFrag) | endOfDayFrag) timezoneFrag? +fn time_lexical_rep(input: &str) -> Result<(Time, &str), ParseDateTimeError> { + let (hour, input) = hour_frag(input)?; + let input = expect_char(input, ':', "The hours and minutes must be separated by ':'")?; + let (minute, input) = minute_frag(input)?; + let input = expect_char( + input, + ':', + "The minutes and seconds must be separated by ':'", + )?; + let (second, input) = second_frag(input)?; + // We validate 24:00:00 + if hour == 24 && minute != 0 && second != Decimal::from(0) { + return Err(ParseDateTimeError::msg( + "Times are not allowed to be after 24:00:00", + )); + } + let (timezone_offset, input) = optional_end(input, timezone_frag)?; + Ok((Time::new(hour, minute, second, timezone_offset)?, input)) +} + +// [18] dateLexicalRep ::= yearFrag '-' monthFrag '-' dayFrag timezoneFrag? Constraint: Day-of-month Representations +fn date_lexical_rep(input: &str) -> Result<(Date, &str), ParseDateTimeError> { + let (year, input) = year_frag(input)?; + let input = expect_char(input, '-', "The year and month must be separated by '-'")?; + let (month, input) = month_frag(input)?; + let input = expect_char(input, '-', "The month and day must be separated by '-'")?; + let (day, input) = day_frag(input)?; + let (timezone_offset, input) = optional_end(input, timezone_frag)?; + validate_day_of_month(Some(year), month, day)?; + Ok((Date::new(year, month, day, timezone_offset)?, input)) +} + +// [19] gYearMonthLexicalRep ::= yearFrag '-' monthFrag timezoneFrag? +fn g_year_month_lexical_rep(input: &str) -> Result<(GYearMonth, &str), ParseDateTimeError> { + let (year, input) = year_frag(input)?; + let input = expect_char(input, '-', "The year and month must be separated by '-'")?; + let (month, input) = month_frag(input)?; + let (timezone_offset, input) = optional_end(input, timezone_frag)?; + Ok((GYearMonth::new(year, month, timezone_offset)?, input)) +} + +// [20] gYearLexicalRep ::= yearFrag timezoneFrag? +fn g_year_lexical_rep(input: &str) -> Result<(GYear, &str), ParseDateTimeError> { + let (year, input) = year_frag(input)?; + let (timezone_offset, input) = optional_end(input, timezone_frag)?; + Ok((GYear::new(year, timezone_offset)?, input)) +} + +// [21] gMonthDayLexicalRep ::= '--' monthFrag '-' dayFrag timezoneFrag? Constraint: Day-of-month Representations +fn g_month_day_lexical_rep(input: &str) -> Result<(GMonthDay, &str), ParseDateTimeError> { + let input = expect_char(input, '-', "gMonthDay values must start with '--'")?; + let input = expect_char(input, '-', "gMonthDay values must start with '--'")?; + let (month, input) = month_frag(input)?; + let input = expect_char(input, '-', "The month and day must be separated by '-'")?; + let (day, input) = day_frag(input)?; + let (timezone_offset, input) = optional_end(input, timezone_frag)?; + validate_day_of_month(None, month, day)?; + Ok((GMonthDay::new(month, day, timezone_offset)?, input)) +} + +// [22] gDayLexicalRep ::= '---' dayFrag timezoneFrag? +fn g_day_lexical_rep(input: &str) -> Result<(GDay, &str), ParseDateTimeError> { + let input = expect_char(input, '-', "gDay values must start with '---'")?; + let input = expect_char(input, '-', "gDay values must start with '---'")?; + let input = expect_char(input, '-', "gDay values must start with '---'")?; + let (day, input) = day_frag(input)?; + let (timezone_offset, input) = optional_end(input, timezone_frag)?; + Ok((GDay::new(day, timezone_offset)?, input)) +} + +// [23] gMonthLexicalRep ::= '--' monthFrag timezoneFrag? +fn g_month_lexical_rep(input: &str) -> Result<(GMonth, &str), ParseDateTimeError> { + let input = expect_char(input, '-', "gMonth values must start with '--'")?; + let input = expect_char(input, '-', "gMonth values must start with '--'")?; + let (month, input) = month_frag(input)?; + let (timezone_offset, input) = optional_end(input, timezone_frag)?; + Ok((GMonth::new(month, timezone_offset)?, input)) +} + +// [56] yearFrag ::= '-'? (([1-9] digit digit digit+)) | ('0' digit digit digit)) +fn year_frag(input: &str) -> Result<(i64, &str), ParseDateTimeError> { + let (sign, input) = if let Some(left) = input.strip_prefix('-') { + (-1, left) + } else { + (1, input) + }; + let (number_str, input) = integer_prefix(input); + if number_str.len() < 4 { + return Err(ParseDateTimeError::msg( + "The year should be encoded on 4 digits", + )); + } + if number_str.len() > 4 && number_str.starts_with('0') { + return Err(ParseDateTimeError::msg( + "The years value must not start with 0 if it can be encoded in at least 4 digits", + )); + } + let number = i64::from_str(number_str).expect("valid integer"); + Ok((sign * number, input)) +} + +// [57] monthFrag ::= ('0' [1-9]) | ('1' [0-2]) +fn month_frag(input: &str) -> Result<(u8, &str), ParseDateTimeError> { + let (number_str, input) = integer_prefix(input); + if number_str.len() != 2 { + return Err(ParseDateTimeError::msg( + "Month must be encoded with two digits", + )); + } + let number = u8::from_str(number_str).expect("valid integer"); + if !(1..=12).contains(&number) { + return Err(ParseDateTimeError::msg("Month must be between 01 and 12")); + } + Ok((number, input)) +} + +// [58] dayFrag ::= ('0' [1-9]) | ([12] digit) | ('3' [01]) +fn day_frag(input: &str) -> Result<(u8, &str), ParseDateTimeError> { + let (number_str, input) = integer_prefix(input); + if number_str.len() != 2 { + return Err(ParseDateTimeError::msg( + "Day must be encoded with two digits", + )); + } + let number = u8::from_str(number_str).expect("valid integer"); + if !(1..=31).contains(&number) { + return Err(ParseDateTimeError::msg("Day must be between 01 and 31")); + } + Ok((number, input)) +} + +// [59] hourFrag ::= ([01] digit) | ('2' [0-3]) +// We also allow 24 for ease of parsing +fn hour_frag(input: &str) -> Result<(u8, &str), ParseDateTimeError> { + let (number_str, input) = integer_prefix(input); + if number_str.len() != 2 { + return Err(ParseDateTimeError::msg( + "Hours must be encoded with two digits", + )); + } + let number = u8::from_str(number_str).expect("valid integer"); + if !(0..=24).contains(&number) { + return Err(ParseDateTimeError::msg("Hours must be between 00 and 24")); + } + Ok((number, input)) +} + +// [60] minuteFrag ::= [0-5] digit +fn minute_frag(input: &str) -> Result<(u8, &str), ParseDateTimeError> { + let (number_str, input) = integer_prefix(input); + if number_str.len() != 2 { + return Err(ParseDateTimeError::msg( + "Minutes must be encoded with two digits", + )); + } + let number = u8::from_str(number_str).expect("valid integer"); + if !(0..=59).contains(&number) { + return Err(ParseDateTimeError::msg("Minutes must be between 00 and 59")); + } + Ok((number, input)) +} + +// [61] secondFrag ::= ([0-5] digit) ('.' digit+)? +fn second_frag(input: &str) -> Result<(Decimal, &str), ParseDateTimeError> { + let (number_str, input) = decimal_prefix(input); + let (before_dot_str, _) = number_str.split_once('.').unwrap_or((number_str, "")); + if before_dot_str.len() != 2 { + return Err(ParseDateTimeError::msg( + "Seconds must be encoded with two digits", + )); + } + let number = Decimal::from_str(number_str) + .map_err(|_| ParseDateTimeError::msg("The second precision is too large"))?; + if number < Decimal::from(0) || number >= Decimal::from(60) { + return Err(ParseDateTimeError::msg("Seconds must be between 00 and 60")); + } + if number_str.ends_with('.') { + return Err(ParseDateTimeError::msg( + "Seconds are not allowed to end with a dot", + )); + } + Ok((number, input)) +} + +// [63] timezoneFrag ::= 'Z' | ('+' | '-') (('0' digit | '1' [0-3]) ':' minuteFrag | '14:00') +fn timezone_frag(input: &str) -> Result<(TimezoneOffset, &str), ParseDateTimeError> { + if let Some(left) = input.strip_prefix('Z') { + return Ok((TimezoneOffset::UTC, left)); + } + let (sign, input) = if let Some(left) = input.strip_prefix('-') { + (-1, left) + } else if let Some(left) = input.strip_prefix('+') { + (1, left) + } else { + (1, input) + }; + + let (hour_str, input) = integer_prefix(input); + if hour_str.len() != 2 { + return Err(ParseDateTimeError::msg( + "The timezone hours must be encoded with two digits", + )); + } + let hours = i16::from_str(hour_str).expect("valid integer"); + + let input = expect_char( + input, + ':', + "The timezone hours and minutes must be separated by ':'", + )?; + let (minutes, input) = minute_frag(input)?; + + if hours > 13 && !(hours == 14 && minutes == 0) { + return Err(ParseDateTimeError::msg( + "The timezone hours must be between 00 and 13", + )); + } + + Ok(( + TimezoneOffset::new(sign * (hours * 60 + i16::from(minutes))) + .map_err(|e| ParseDateTimeError(ParseDateTimeErrorKind::InvalidTimezone(e)))?, + input, + )) +} + +fn ensure_complete<T>( + input: &str, + parse: impl FnOnce(&str) -> Result<(T, &str), ParseDateTimeError>, +) -> Result<T, ParseDateTimeError> { + let (result, left) = parse(input)?; + if !left.is_empty() { + return Err(ParseDateTimeError::msg("Unrecognized value suffix")); + } + Ok(result) +} + +fn expect_char<'a>( + input: &'a str, + constant: char, + error_message: &'static str, +) -> Result<&'a str, ParseDateTimeError> { + if let Some(left) = input.strip_prefix(constant) { + Ok(left) + } else { + Err(ParseDateTimeError::msg(error_message)) + } +} + +fn integer_prefix(input: &str) -> (&str, &str) { + let mut end = input.len(); + for (i, c) in input.char_indices() { + if !c.is_ascii_digit() { + end = i; + break; + } + } + input.split_at(end) +} + +fn decimal_prefix(input: &str) -> (&str, &str) { + let mut end = input.len(); + let mut dot_seen = false; + for (i, c) in input.char_indices() { + if c.is_ascii_digit() { + // Ok + } else if c == '.' && !dot_seen { + dot_seen = true; + } else { + end = i; + break; + } + } + input.split_at(end) +} + +fn optional_end<T>( + input: &str, + parse: impl FnOnce(&str) -> Result<(T, &str), ParseDateTimeError>, +) -> Result<(Option<T>, &str), ParseDateTimeError> { + Ok(if input.is_empty() { + (None, input) + } else { + let (result, input) = parse(input)?; + (Some(result), input) + }) +} + +fn validate_day_of_month(year: Option<i64>, month: u8, day: u8) -> Result<(), ParseDateTimeError> { + // Constraint: Day-of-month Values + if day > days_in_month(year, month) { + return Err(ParseDateTimeError( + ParseDateTimeErrorKind::InvalidDayOfMonth { day, month }, + )); + } + Ok(()) +} + +/// An overflow during [`DateTime`]-related operations. +/// +/// Matches XPath [`FODT0001` error](https://www.w3.org/TR/xpath-functions-31/#ERRFODT0001). +#[derive(Debug, Clone, Copy, thiserror::Error)] +#[error("overflow during xsd:dateTime computation")] +pub struct DateTimeOverflowError; + +impl From<DateTimeOverflowError> for ParseDateTimeError { + fn from(error: DateTimeOverflowError) -> Self { + Self(ParseDateTimeErrorKind::Overflow(error)) + } +} + +/// The value provided as timezone is not valid. +/// +/// Matches XPath [`FODT0003` error](https://www.w3.org/TR/xpath-functions-31/#ERRFODT0003). +#[derive(Debug, Clone, Copy, thiserror::Error)] +#[error("invalid timezone offset {}:{}", + self.offset_in_minutes / 60, + self.offset_in_minutes.abs() % 60)] +pub struct InvalidTimezoneError { + offset_in_minutes: i64, +} + +#[cfg(test)] +#[allow(clippy::panic_in_result_fn)] +mod tests { + use super::*; + use std::error::Error; + + #[test] + fn from_str() -> Result<(), ParseDateTimeError> { + assert_eq!(Time::from_str("00:00:00Z")?.to_string(), "00:00:00Z"); + assert_eq!(Time::from_str("00:00:00+00:00")?.to_string(), "00:00:00Z"); + assert_eq!(Time::from_str("00:00:00-00:00")?.to_string(), "00:00:00Z"); + assert_eq!(Time::from_str("00:00:00")?.to_string(), "00:00:00"); + assert_eq!( + Time::from_str("00:00:00+02:00")?.to_string(), + "00:00:00+02:00" + ); + assert_eq!( + Time::from_str("00:00:00+14:00")?.to_string(), + "00:00:00+14:00" + ); + assert_eq!(Time::from_str("24:00:00")?.to_string(), "00:00:00"); + assert_eq!(Time::from_str("24:00:00.00")?.to_string(), "00:00:00"); + assert_eq!( + Time::from_str("23:59:59.9999999999")?.to_string(), + "23:59:59.9999999999" + ); + + assert_eq!(Date::from_str("0001-01-01Z")?.to_string(), "0001-01-01Z"); + assert_eq!(Date::from_str("0001-01-01")?.to_string(), "0001-01-01"); + assert_eq!( + DateTime::from_str("0001-01-01T00:00:00Z")?.to_string(), + "0001-01-01T00:00:00Z" + ); + assert_eq!( + DateTime::from_str("0001-01-01T00:00:00")?.to_string(), + "0001-01-01T00:00:00" + ); + assert_eq!( + DateTime::from_str("1000000000-01-01T00:00:00")?.to_string(), + "1000000000-01-01T00:00:00" + ); + assert_eq!( + DateTime::from_str("2001-12-31T23:59:59")?.to_string(), + "2001-12-31T23:59:59" + ); + assert_eq!( + DateTime::from_str("2004-12-31T23:59:59")?.to_string(), + "2004-12-31T23:59:59" + ); + assert_eq!( + DateTime::from_str("1900-12-31T23:59:59")?.to_string(), + "1900-12-31T23:59:59" + ); + assert_eq!( + DateTime::from_str("2000-12-31T23:59:59")?.to_string(), + "2000-12-31T23:59:59", + ); + assert_eq!( + DateTime::from_str("1899-12-31T23:59:59")?.to_string(), + "1899-12-31T23:59:59" + ); + + assert_eq!( + DateTime::from_str("2001-02-28T23:59:59")?.to_string(), + "2001-02-28T23:59:59" + ); + assert_eq!( + DateTime::from_str("2004-02-29T23:59:59")?.to_string(), + "2004-02-29T23:59:59" + ); + assert_eq!( + DateTime::from_str("1900-02-28T23:59:59")?.to_string(), + "1900-02-28T23:59:59" + ); + assert_eq!( + DateTime::from_str("2000-02-29T23:59:59")?.to_string(), + "2000-02-29T23:59:59", + ); + assert_eq!( + DateTime::from_str("1899-02-28T23:59:59")?.to_string(), + "1899-02-28T23:59:59" + ); + assert_eq!( + DateTime::from_str("2001-03-01T00:00:00")?.to_string(), + "2001-03-01T00:00:00" + ); + assert_eq!( + DateTime::from_str("2004-03-01T00:00:00")?.to_string(), + "2004-03-01T00:00:00" + ); + assert_eq!( + DateTime::from_str("1900-03-01T00:00:00")?.to_string(), + "1900-03-01T00:00:00" + ); + assert_eq!( + DateTime::from_str("2000-03-01T00:00:00")?.to_string(), + "2000-03-01T00:00:00", + ); + assert_eq!( + DateTime::from_str("1899-03-01T00:00:00")?.to_string(), + "1899-03-01T00:00:00" + ); + assert_eq!( + DateTime::from_str("-0899-03-01T00:00:00")?.to_string(), + "-0899-03-01T00:00:00" + ); + assert_eq!( + DateTime::from_str("2000-01-01T00:00:00.1234567")?.to_string(), + "2000-01-01T00:00:00.1234567" + ); + assert_eq!( + DateTime::from_str("2000-01-01T00:00:12.1234567")?.to_string(), + "2000-01-01T00:00:12.1234567" + ); + assert_eq!( + Time::from_str("01:02:03.1234567")?.to_string(), + "01:02:03.1234567" + ); + assert_eq!( + Time::from_str("01:02:13.1234567")?.to_string(), + "01:02:13.1234567" + ); + + assert_eq!( + DateTime::from_str("-1000000000-01-01T00:00:00")?.to_string(), + "-1000000000-01-01T00:00:00" + ); + assert_eq!( + DateTime::from_str("-2001-12-31T23:59:59")?.to_string(), + "-2001-12-31T23:59:59" + ); + assert_eq!( + DateTime::from_str("-2004-12-31T23:59:59")?.to_string(), + "-2004-12-31T23:59:59" + ); + assert_eq!( + DateTime::from_str("-1900-12-31T23:59:59")?.to_string(), + "-1900-12-31T23:59:59" + ); + assert_eq!( + DateTime::from_str("-2000-12-31T23:59:59")?.to_string(), + "-2000-12-31T23:59:59" + ); + assert_eq!( + DateTime::from_str("-1899-12-31T23:59:59")?.to_string(), + "-1899-12-31T23:59:59" + ); + + assert_eq!( + GYearMonth::from_str("-1899-12+01:00")?.to_string(), + "-1899-12+01:00" + ); + assert_eq!(GYearMonth::from_str("-1899-12")?.to_string(), "-1899-12"); + assert_eq!(GYear::from_str("-1899+01:00")?.to_string(), "-1899+01:00"); + assert_eq!(GYear::from_str("-1899")?.to_string(), "-1899"); + assert_eq!( + GMonthDay::from_str("--01-01+01:00")?.to_string(), + "--01-01+01:00" + ); + assert_eq!(GMonthDay::from_str("--01-01")?.to_string(), "--01-01"); + assert_eq!(GDay::from_str("---01+01:00")?.to_string(), "---01+01:00"); + assert_eq!(GDay::from_str("---01")?.to_string(), "---01"); + assert_eq!(GMonth::from_str("--01+01:00")?.to_string(), "--01+01:00"); + assert_eq!(GMonth::from_str("--01")?.to_string(), "--01"); + + GYear::from_str("02020").unwrap_err(); + GYear::from_str("+2020").unwrap_err(); + GYear::from_str("33").unwrap_err(); + + assert_eq!(Time::from_str("00:00:00+14:00")?, Time::MIN); + assert_eq!(Time::from_str("24:00:00-14:00")?, Time::MAX); + Ok(()) + } + + #[test] + fn to_be_bytes() -> Result<(), ParseDateTimeError> { + assert_eq!( + DateTime::from_be_bytes(DateTime::MIN.to_be_bytes()), + DateTime::MIN + ); + assert_eq!( + DateTime::from_be_bytes(DateTime::MAX.to_be_bytes()), + DateTime::MAX + ); + assert_eq!( + DateTime::from_be_bytes(DateTime::from_str("2022-01-03T01:02:03")?.to_be_bytes()), + DateTime::from_str("2022-01-03T01:02:03")? + ); + assert_eq!(Date::from_be_bytes(Date::MIN.to_be_bytes()), Date::MIN); + assert_eq!(Date::from_be_bytes(Date::MAX.to_be_bytes()), Date::MAX); + assert_eq!( + Date::from_be_bytes(Date::from_str("2022-01-03")?.to_be_bytes()), + Date::from_str("2022-01-03")? + ); + assert_eq!(Time::from_be_bytes(Time::MIN.to_be_bytes()), Time::MIN); + assert_eq!(Time::from_be_bytes(Time::MAX.to_be_bytes()), Time::MAX); + assert_eq!( + Time::from_be_bytes(Time::from_str("01:02:03")?.to_be_bytes()), + Time::from_str("01:02:03")? + ); + assert_eq!( + Time::from_be_bytes(Time::from_str("01:02:03")?.to_be_bytes()), + Time::from_str("01:02:03")? + ); + assert_eq!( + GYearMonth::from_be_bytes(GYearMonth::MIN.to_be_bytes()), + GYearMonth::MIN + ); + assert_eq!( + GYearMonth::from_be_bytes(GYearMonth::MAX.to_be_bytes()), + GYearMonth::MAX + ); + assert_eq!(GYear::from_be_bytes(GYear::MIN.to_be_bytes()), GYear::MIN); + assert_eq!(GYear::from_be_bytes(GYear::MAX.to_be_bytes()), GYear::MAX); + Ok(()) + } + + #[test] + fn equals() -> Result<(), ParseDateTimeError> { + assert_eq!( + DateTime::from_str("2002-04-02T12:00:00-01:00")?, + DateTime::from_str("2002-04-02T17:00:00+04:00")? + ); + assert_eq!( + DateTime::from_str("2002-04-02T12:00:00-05:00")?, + DateTime::from_str("2002-04-02T23:00:00+06:00")? + ); + assert_ne!( + DateTime::from_str("2002-04-02T12:00:00-05:00")?, + DateTime::from_str("2002-04-02T17:00:00-05:00")? + ); + assert_eq!( + DateTime::from_str("2002-04-02T12:00:00-05:00")?, + DateTime::from_str("2002-04-02T12:00:00-05:00")? + ); + assert_eq!( + DateTime::from_str("2002-04-02T23:00:00-04:00")?, + DateTime::from_str("2002-04-03T02:00:00-01:00")? + ); + assert_eq!( + DateTime::from_str("1999-12-31T24:00:00-05:00")?, + DateTime::from_str("2000-01-01T00:00:00-05:00")? + ); + assert_ne!( + DateTime::from_str("2005-04-04T24:00:00-05:00")?, + DateTime::from_str("2005-04-04T00:00:00-05:00")? + ); + + assert_ne!( + Date::from_str("2004-12-25Z")?, + Date::from_str("2004-12-25+07:00")? + ); + assert_eq!( + Date::from_str("2004-12-25-12:00")?, + Date::from_str("2004-12-26+12:00")? + ); + + assert_ne!( + Time::from_str("08:00:00+09:00")?, + Time::from_str("17:00:00-06:00")? + ); + assert_eq!( + Time::from_str("21:30:00+10:30")?, + Time::from_str("06:00:00-05:00")? + ); + assert_eq!( + Time::from_str("24:00:00+01:00")?, + Time::from_str("00:00:00+01:00")? + ); + + assert_eq!( + Time::from_str("05:00:00-03:00")?, + Time::from_str("10:00:00+02:00")? + ); + assert_ne!( + Time::from_str("23:00:00-03:00")?, + Time::from_str("02:00:00Z")? + ); + + assert_ne!( + GYearMonth::from_str("1986-02")?, + GYearMonth::from_str("1986-03")? + ); + assert_ne!( + GYearMonth::from_str("1978-03")?, + GYearMonth::from_str("1978-03Z")? + ); + + assert_ne!( + GYear::from_str("2005-12:00")?, + GYear::from_str("2005+12:00")? + ); + assert_ne!(GYear::from_str("1976-05:00")?, GYear::from_str("1976")?); + + assert_eq!( + GMonthDay::from_str("--12-25-14:00")?, + GMonthDay::from_str("--12-26+10:00")? + ); + assert_ne!( + GMonthDay::from_str("--12-25")?, + GMonthDay::from_str("--12-26Z")? + ); + + assert_ne!( + GMonth::from_str("--12-14:00")?, + GMonth::from_str("--12+10:00")? + ); + assert_ne!(GMonth::from_str("--12")?, GMonth::from_str("--12Z")?); + + assert_ne!( + GDay::from_str("---25-14:00")?, + GDay::from_str("---25+10:00")? + ); + assert_ne!(GDay::from_str("---12")?, GDay::from_str("---12Z")?); + Ok(()) + } + + #[test] + #[allow(clippy::neg_cmp_op_on_partial_ord)] + fn cmp() -> Result<(), ParseDateTimeError> { + assert!(Date::from_str("2004-12-25Z")? < Date::from_str("2004-12-25-05:00")?); + assert!(!(Date::from_str("2004-12-25-12:00")? < Date::from_str("2004-12-26+12:00")?)); + + assert!(Date::from_str("2004-12-25Z")? > Date::from_str("2004-12-25+07:00")?); + assert!(!(Date::from_str("2004-12-25-12:00")? > Date::from_str("2004-12-26+12:00")?)); + + assert!(!(Time::from_str("12:00:00")? < Time::from_str("23:00:00+06:00")?)); + assert!(Time::from_str("11:00:00-05:00")? < Time::from_str("17:00:00Z")?); + assert!(!(Time::from_str("23:59:59")? < Time::from_str("24:00:00")?)); + + assert!(!(Time::from_str("08:00:00+09:00")? > Time::from_str("17:00:00-06:00")?)); + + assert!(GMonthDay::from_str("--12-12+13:00")? < GMonthDay::from_str("--12-12+11:00")?); + assert!(GDay::from_str("---15")? < GDay::from_str("---16")?); + assert!(GDay::from_str("---15-13:00")? > GDay::from_str("---16+13:00")?); + assert_eq!( + GDay::from_str("---15-11:00")?, + GDay::from_str("---16+13:00")? + ); + assert!(GDay::from_str("---15-13:00")? + .partial_cmp(&GDay::from_str("---16")?) + .is_none()); + Ok(()) + } + + #[test] + fn year() -> Result<(), ParseDateTimeError> { + assert_eq!( + DateTime::from_str("1999-05-31T13:20:00-05:00")?.year(), + 1999 + ); + assert_eq!( + DateTime::from_str("1999-05-31T21:30:00-05:00")?.year(), + 1999 + ); + assert_eq!(DateTime::from_str("1999-12-31T19:20:00")?.year(), 1999); + assert_eq!(DateTime::from_str("1999-12-31T24:00:00")?.year(), 2000); + assert_eq!(DateTime::from_str("-0002-06-06T00:00:00")?.year(), -2); + + assert_eq!(Date::from_str("1999-05-31")?.year(), 1999); + assert_eq!(Date::from_str("2000-01-01+05:00")?.year(), 2000); + assert_eq!(Date::from_str("-0002-06-01")?.year(), -2); + + assert_eq!(GYear::from_str("-0002")?.year(), -2); + assert_eq!(GYearMonth::from_str("-0002-02")?.year(), -2); + Ok(()) + } + + #[test] + fn month() -> Result<(), ParseDateTimeError> { + assert_eq!(DateTime::from_str("1999-05-31T13:20:00-05:00")?.month(), 5); + assert_eq!(DateTime::from_str("1999-12-31T19:20:00-05:00")?.month(), 12); + + assert_eq!(Date::from_str("1999-05-31-05:00")?.month(), 5); + assert_eq!(Date::from_str("2000-01-01+05:00")?.month(), 1); + + assert_eq!(GMonth::from_str("--02")?.month(), 2); + assert_eq!(GYearMonth::from_str("-0002-02")?.month(), 2); + assert_eq!(GMonthDay::from_str("--02-03")?.month(), 2); + Ok(()) + } + + #[test] + fn day() -> Result<(), ParseDateTimeError> { + assert_eq!(DateTime::from_str("1999-05-31T13:20:00-05:00")?.day(), 31); + assert_eq!(DateTime::from_str("1999-12-31T20:00:00-05:00")?.day(), 31); + + assert_eq!(Date::from_str("1999-05-31-05:00")?.day(), 31); + assert_eq!(Date::from_str("2000-01-01+05:00")?.day(), 1); + + assert_eq!(GDay::from_str("---03")?.day(), 3); + assert_eq!(GMonthDay::from_str("--02-03")?.day(), 3); + Ok(()) + } + + #[test] + fn hour() -> Result<(), ParseDateTimeError> { + assert_eq!(DateTime::from_str("1999-05-31T08:20:00-05:00")?.hour(), 8); + assert_eq!(DateTime::from_str("1999-12-31T21:20:00-05:00")?.hour(), 21); + assert_eq!(DateTime::from_str("1999-12-31T12:00:00")?.hour(), 12); + assert_eq!(DateTime::from_str("1999-12-31T24:00:00")?.hour(), 0); + + assert_eq!(Time::from_str("11:23:00-05:00")?.hour(), 11); + assert_eq!(Time::from_str("21:23:00-05:00")?.hour(), 21); + assert_eq!(Time::from_str("01:23:00+05:00")?.hour(), 1); + assert_eq!(Time::from_str("24:00:00")?.hour(), 0); + Ok(()) + } + + #[test] + fn minute() -> Result<(), ParseDateTimeError> { + assert_eq!( + DateTime::from_str("1999-05-31T13:20:00-05:00")?.minute(), + 20 + ); + assert_eq!( + DateTime::from_str("1999-05-31T13:30:00+05:30")?.minute(), + 30 + ); + + assert_eq!(Time::from_str("13:00:00Z")?.minute(), 0); + Ok(()) + } + + #[test] + fn second() -> Result<(), Box<dyn Error>> { + assert_eq!( + DateTime::from_str("1999-05-31T13:20:00-05:00")?.second(), + Decimal::from(0) + ); + + assert_eq!( + Time::from_str("13:20:10.5")?.second(), + Decimal::from_str("10.5")? + ); + Ok(()) + } + + #[test] + fn timezone() -> Result<(), Box<dyn Error>> { + assert_eq!( + DateTime::from_str("1999-05-31T13:20:00-05:00")?.timezone(), + Some(DayTimeDuration::from_str("-PT5H")?) + ); + assert_eq!( + DateTime::from_str("2000-06-12T13:20:00Z")?.timezone(), + Some(DayTimeDuration::from_str("PT0S")?) + ); + assert_eq!(DateTime::from_str("2004-08-27T00:00:00")?.timezone(), None); + + assert_eq!( + Date::from_str("1999-05-31-05:00")?.timezone(), + Some(DayTimeDuration::from_str("-PT5H")?) + ); + assert_eq!( + Date::from_str("2000-06-12Z")?.timezone(), + Some(DayTimeDuration::from_str("PT0S")?) + ); + + assert_eq!( + Time::from_str("13:20:00-05:00")?.timezone(), + Some(DayTimeDuration::from_str("-PT5H")?) + ); + assert_eq!(Time::from_str("13:20:00")?.timezone(), None); + Ok(()) + } + + #[test] + fn sub() -> Result<(), Box<dyn Error>> { + assert_eq!( + DateTime::from_str("2000-10-30T06:12:00-05:00")? + .checked_sub(DateTime::from_str("1999-11-28T09:00:00Z")?), + Some(DayTimeDuration::from_str("P337DT2H12M")?) + ); + + assert_eq!( + Date::from_str("2000-10-30")?.checked_sub(Date::from_str("1999-11-28")?), + Some(DayTimeDuration::from_str("P337D")?) + ); + assert_eq!( + Date::from_str("2000-10-30+05:00")?.checked_sub(Date::from_str("1999-11-28Z")?), + Some(DayTimeDuration::from_str("P336DT19H")?) + ); + assert_eq!( + Date::from_str("2000-10-15-05:00")?.checked_sub(Date::from_str("2000-10-10+02:00")?), + Some(DayTimeDuration::from_str("P5DT7H")?) + ); + + assert_eq!( + Time::from_str("11:12:00Z")?.checked_sub(Time::from_str("04:00:00-05:00")?), + Some(DayTimeDuration::from_str("PT2H12M")?) + ); + assert_eq!( + Time::from_str("11:00:00-05:00")?.checked_sub(Time::from_str("21:30:00+05:30")?), + Some(DayTimeDuration::from_str("PT0S")?) + ); + assert_eq!( + Time::from_str("17:00:00-06:00")?.checked_sub(Time::from_str("08:00:00+09:00")?), + Some(DayTimeDuration::from_str("P1D")?) + ); + assert_eq!( + Time::from_str("24:00:00")?.checked_sub(Time::from_str("23:59:59")?), + Some(DayTimeDuration::from_str("-PT23H59M59S")?) + ); + Ok(()) + } + + #[test] + fn add_duration() -> Result<(), Box<dyn Error>> { + assert_eq!( + DateTime::from_str("2000-01-12T12:13:14Z")? + .checked_add_duration(Duration::from_str("P1Y3M5DT7H10M3.3S")?), + Some(DateTime::from_str("2001-04-17T19:23:17.3Z")?) + ); + assert_eq!( + Date::from_str("2000-01-01")?.checked_add_duration(Duration::from_str("-P3M")?), + Some(Date::from_str("1999-10-01")?) + ); + assert_eq!( + Date::from_str("2000-01-12")?.checked_add_duration(Duration::from_str("PT33H")?), + Some(Date::from_str("2000-01-13")?) + ); + assert_eq!( + Date::from_str("2000-03-30")?.checked_add_duration(Duration::from_str("P1D")?), + Some(Date::from_str("2000-03-31")?) + ); + assert_eq!( + Date::from_str("2000-03-31")?.checked_add_duration(Duration::from_str("P1M")?), + Some(Date::from_str("2000-04-30")?) + ); + assert_eq!( + Date::from_str("2000-03-30")?.checked_add_duration(Duration::from_str("P1M")?), + Some(Date::from_str("2000-04-30")?) + ); + assert_eq!( + Date::from_str("2000-04-30")?.checked_add_duration(Duration::from_str("P1D")?), + Some(Date::from_str("2000-05-01")?) + ); + + assert_eq!( + DateTime::from_str("2000-10-30T11:12:00")? + .checked_add_duration(Duration::from_str("P1Y2M")?), + Some(DateTime::from_str("2001-12-30T11:12:00")?) + ); + assert_eq!( + DateTime::from_str("2000-10-30T11:12:00")? + .checked_add_duration(Duration::from_str("P3DT1H15M")?), + Some(DateTime::from_str("2000-11-02T12:27:00")?) + ); + + assert_eq!( + Date::from_str("2000-10-30")?.checked_add_duration(Duration::from_str("P1Y2M")?), + Some(Date::from_str("2001-12-30")?) + ); + assert_eq!( + Date::from_str("2004-10-30Z")?.checked_add_duration(Duration::from_str("P2DT2H30M0S")?), + Some(Date::from_str("2004-11-01Z")?) + ); + + assert_eq!( + Time::from_str("11:12:00")?.checked_add_duration(Duration::from_str("P3DT1H15M")?), + Some(Time::from_str("12:27:00")?) + ); + assert_eq!( + Time::from_str("23:12:00+03:00")? + .checked_add_duration(Duration::from_str("P1DT3H15M")?), + Some(Time::from_str("02:27:00+03:00")?) + ); + Ok(()) + } + + #[test] + fn sub_duration() -> Result<(), Box<dyn Error>> { + assert_eq!( + DateTime::from_str("2000-10-30T11:12:00")? + .checked_sub_duration(Duration::from_str("P1Y2M")?), + Some(DateTime::from_str("1999-08-30T11:12:00")?) + ); + assert_eq!( + DateTime::from_str("2000-10-30T11:12:00")? + .checked_sub_duration(Duration::from_str("P3DT1H15M")?), + Some(DateTime::from_str("2000-10-27T09:57:00")?) + ); + + assert_eq!( + Date::from_str("2000-10-30")?.checked_sub_duration(Duration::from_str("P1Y2M")?), + Some(Date::from_str("1999-08-30")?) + ); + assert_eq!( + Date::from_str("2000-02-29Z")?.checked_sub_duration(Duration::from_str("P1Y")?), + Some(Date::from_str("1999-02-28Z")?) + ); + assert_eq!( + Date::from_str("2000-10-31-05:00")?.checked_sub_duration(Duration::from_str("P1Y1M")?), + Some(Date::from_str("1999-09-30-05:00")?) + ); + assert_eq!( + Date::from_str("2000-10-30")?.checked_sub_duration(Duration::from_str("P3DT1H15M")?), + Some(Date::from_str("2000-10-26")?) + ); + + assert_eq!( + Time::from_str("11:12:00")?.checked_sub_duration(Duration::from_str("P3DT1H15M")?), + Some(Time::from_str("09:57:00")?) + ); + assert_eq!( + Time::from_str("08:20:00-05:00")? + .checked_sub_duration(Duration::from_str("P23DT10H10M")?), + Some(Time::from_str("22:10:00-05:00")?) + ); + Ok(()) + } + + #[test] + fn adjust() -> Result<(), Box<dyn Error>> { + assert_eq!( + DateTime::from_str("2002-03-07T10:00:00-07:00")? + .adjust(Some(DayTimeDuration::from_str("PT10H")?.try_into()?)), + Some(DateTime::from_str("2002-03-08T03:00:00+10:00")?) + ); + assert_eq!( + DateTime::from_str("2002-03-07T00:00:00+01:00")? + .adjust(Some(DayTimeDuration::from_str("-PT8H")?.try_into()?)), + Some(DateTime::from_str("2002-03-06T15:00:00-08:00")?) + ); + assert_eq!( + DateTime::from_str("2002-03-07T10:00:00")?.adjust(None), + Some(DateTime::from_str("2002-03-07T10:00:00")?) + ); + assert_eq!( + DateTime::from_str("2002-03-07T10:00:00-07:00")?.adjust(None), + Some(DateTime::from_str("2002-03-07T10:00:00")?) + ); + + assert_eq!( + Date::from_str("2002-03-07")? + .adjust(Some(DayTimeDuration::from_str("-PT10H")?.try_into()?)), + Some(Date::from_str("2002-03-07-10:00")?) + ); + assert_eq!( + Date::from_str("2002-03-07-07:00")? + .adjust(Some(DayTimeDuration::from_str("-PT10H")?.try_into()?)), + Some(Date::from_str("2002-03-06-10:00")?) + ); + assert_eq!( + Date::from_str("2002-03-07")?.adjust(None), + Some(Date::from_str("2002-03-07")?) + ); + assert_eq!( + Date::from_str("2002-03-07-07:00")?.adjust(None), + Some(Date::from_str("2002-03-07")?) + ); + + assert_eq!( + Time::from_str("10:00:00")? + .adjust(Some(DayTimeDuration::from_str("-PT10H")?.try_into()?)), + Some(Time::from_str("10:00:00-10:00")?) + ); + assert_eq!( + Time::from_str("10:00:00-07:00")? + .adjust(Some(DayTimeDuration::from_str("-PT10H")?.try_into()?)), + Some(Time::from_str("07:00:00-10:00")?) + ); + assert_eq!( + Time::from_str("10:00:00")?.adjust(None), + Some(Time::from_str("10:00:00")?) + ); + assert_eq!( + Time::from_str("10:00:00-07:00")?.adjust(None), + Some(Time::from_str("10:00:00")?) + ); + assert_eq!( + Time::from_str("10:00:00-07:00")? + .adjust(Some(DayTimeDuration::from_str("PT10H")?.try_into()?)), + Some(Time::from_str("03:00:00+10:00")?) + ); + Ok(()) + } + + #[test] + fn time_from_datetime() -> Result<(), ParseDateTimeError> { + assert_eq!( + Time::from(DateTime::MIN), + Time::from_str("19:51:08.312696284115894272-14:00")? + ); + assert_eq!( + Time::from(DateTime::MAX), + Time::from_str("04:08:51.687303715884105727+14:00")? + ); + Ok(()) + } + + #[test] + fn date_from_datetime() -> Result<(), Box<dyn Error>> { + assert_eq!( + Date::try_from( + DateTime::MIN + .checked_add_day_time_duration(DayTimeDuration::from_str("P1D")?) + .unwrap() + )?, + Date::MIN + ); + assert_eq!(Date::try_from(DateTime::MAX)?, Date::MAX); + Ok(()) + } + + #[test] + fn g_year_month_from_date() { + assert_eq!(GYearMonth::from(Date::MIN), GYearMonth::MIN); + assert_eq!(GYearMonth::from(Date::MAX), GYearMonth::MAX); + } + + #[test] + fn g_year_from_g_year_month() -> Result<(), ParseDateTimeError> { + assert_eq!(GYear::try_from(GYearMonth::MIN)?, GYear::MIN); + assert_eq!( + GYear::try_from(GYearMonth::from_str("5391559471918-12+14:00")?)?, + GYear::MAX + ); + Ok(()) + } + + #[cfg(feature = "custom-now")] + #[test] + fn custom_now() { + #[allow(unsafe_code)] + #[no_mangle] + fn custom_ox_now() -> Duration { + Duration::default() + } + DateTime::now(); + } + + #[cfg(not(feature = "custom-now"))] + #[test] + fn now() -> Result<(), ParseDateTimeError> { + let now = DateTime::now(); + assert!(DateTime::from_str("2022-01-01T00:00:00Z")? < now); + assert!(now < DateTime::from_str("2100-01-01T00:00:00Z")?); + Ok(()) + } + + #[test] + fn minimally_conformant() -> Result<(), ParseDateTimeError> { + // All minimally conforming processors must support nonnegative year values less than 10000 + // (i.e., those expressible with four digits) in all datatypes which + // use the seven-property model defined in The Seven-property Model (§D.2.1) + // and have a non-absent value for year (i.e. dateTime, dateTimeStamp, date, gYearMonth, and gYear). + assert_eq!(GYear::from_str("9999")?.to_string(), "9999"); + assert_eq!( + DateTime::from_str("9999-12-31T23:59:59Z")?.to_string(), + "9999-12-31T23:59:59Z" + ); + + // All minimally conforming processors must support second values to milliseconds + // (i.e. those expressible with three fraction digits) in all datatypes + // which use the seven-property model defined in The Seven-property Model (§D.2.1) + // and have a non-absent value for second (i.e. dateTime, dateTimeStamp, and time). + assert_eq!( + Time::from_str("00:00:00.678Z")?.to_string(), + "00:00:00.678Z" + ); + assert_eq!( + DateTime::from_str("2000-01-01T00:00:00.678Z")?.to_string(), + "2000-01-01T00:00:00.678Z" + ); + Ok(()) + } +} diff --git a/ng-oxigraph/src/oxsdatatypes/decimal.rs b/ng-oxigraph/src/oxsdatatypes/decimal.rs new file mode 100644 index 0000000..1516fc6 --- /dev/null +++ b/ng-oxigraph/src/oxsdatatypes/decimal.rs @@ -0,0 +1,1099 @@ +use crate::oxsdatatypes::{Boolean, Double, Float, Integer, TooLargeForIntegerError}; +use serde::{Deserialize, Serialize}; +use std::fmt; +use std::fmt::Write; +use std::str::FromStr; + +const DECIMAL_PART_DIGITS: u32 = 18; +const DECIMAL_PART_POW: i128 = 1_000_000_000_000_000_000; +const DECIMAL_PART_POW_MINUS_ONE: i128 = 100_000_000_000_000_000; + +/// [XML Schema `decimal` datatype](https://www.w3.org/TR/xmlschema11-2/#decimal) +/// +/// It stores the decimal in a fix point encoding allowing nearly 18 digits before and 18 digits after ".". +/// +/// It stores the value in a [`i128`] integer after multiplying it by 10¹⁸. +#[derive( + Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash, Default, Serialize, Deserialize, +)] +pub struct Decimal { + value: i128, // value * 10^18 +} + +impl Decimal { + pub const MAX: Self = Self { value: i128::MAX }; + pub const MIN: Self = Self { value: i128::MIN }; + #[cfg(test)] + pub const STEP: Self = Self { value: 1 }; + + /// Constructs the decimal i / 10^n + #[inline] + pub const fn new(i: i128, n: u32) -> Result<Self, TooLargeForDecimalError> { + let Some(shift) = DECIMAL_PART_DIGITS.checked_sub(n) else { + return Err(TooLargeForDecimalError); + }; + let Some(value) = i.checked_mul(10_i128.pow(shift)) else { + return Err(TooLargeForDecimalError); + }; + Ok(Self { value }) + } + + pub(crate) const fn new_from_i128_unchecked(value: i128) -> Self { + Self { + value: value * DECIMAL_PART_POW, + } + } + + #[inline] + #[must_use] + pub fn from_be_bytes(bytes: [u8; 16]) -> Self { + Self { + value: i128::from_be_bytes(bytes), + } + } + + #[inline] + #[must_use] + pub fn to_be_bytes(self) -> [u8; 16] { + self.value.to_be_bytes() + } + + /// [op:numeric-add](https://www.w3.org/TR/xpath-functions-31/#func-numeric-add) + /// + /// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)). + #[inline] + #[must_use] + pub fn checked_add(self, rhs: impl Into<Self>) -> Option<Self> { + Some(Self { + value: self.value.checked_add(rhs.into().value)?, + }) + } + + /// [op:numeric-subtract](https://www.w3.org/TR/xpath-functions-31/#func-numeric-subtract) + /// + /// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)). + #[inline] + #[must_use] + pub fn checked_sub(self, rhs: impl Into<Self>) -> Option<Self> { + Some(Self { + value: self.value.checked_sub(rhs.into().value)?, + }) + } + + /// [op:numeric-multiply](https://www.w3.org/TR/xpath-functions-31/#func-numeric-multiply) + /// + /// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)). + #[inline] + #[must_use] + pub fn checked_mul(self, rhs: impl Into<Self>) -> Option<Self> { + // Idea: we shift right as much as possible to keep as much precision as possible + // Do the multiplication and do the required left shift + let mut left = self.value; + let mut shift_left = 0_u32; + if left != 0 { + while left % 10 == 0 { + left /= 10; + shift_left += 1; + } + } + + let mut right = rhs.into().value; + let mut shift_right = 0_u32; + if right != 0 { + while right % 10 == 0 { + right /= 10; + shift_right += 1; + } + } + + // We do multiplication + shift + let shift = (shift_left + shift_right).checked_sub(DECIMAL_PART_DIGITS)?; + Some(Self { + value: left + .checked_mul(right)? + .checked_mul(10_i128.checked_pow(shift)?)?, + }) + } + + /// [op:numeric-divide](https://www.w3.org/TR/xpath-functions-31/#func-numeric-divide) + /// + /// Returns `None` in case of division by 0 ([FOAR0001](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0001)) or overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)). + #[inline] + #[must_use] + pub fn checked_div(self, rhs: impl Into<Self>) -> Option<Self> { + // Idea: we shift the dividend left as much as possible to keep as much precision as possible + // And we shift right the divisor as much as possible + // Do the multiplication and do the required shift + let mut left = self.value; + let mut shift_left = 0_u32; + if left != 0 { + while let Some(r) = left.checked_mul(10) { + left = r; + shift_left += 1; + } + } + let mut right = rhs.into().value; + let mut shift_right = 0_u32; + if right != 0 { + while right % 10 == 0 { + right /= 10; + shift_right += 1; + } + } + + // We do division + shift + let shift = (shift_left + shift_right).checked_sub(DECIMAL_PART_DIGITS)?; + Some(Self { + value: left + .checked_div(right)? + .checked_div(10_i128.checked_pow(shift)?)?, + }) + } + + /// [op:numeric-mod](https://www.w3.org/TR/xpath-functions-31/#func-numeric-mod) + /// + /// Returns `None` in case of division by 0 ([FOAR0001](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0001)) or overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)). + #[inline] + #[must_use] + pub fn checked_rem(self, rhs: impl Into<Self>) -> Option<Self> { + Some(Self { + value: self.value.checked_rem(rhs.into().value)?, + }) + } + + /// Euclidean remainder + /// + /// Returns `None` in case of division by 0 ([FOAR0001](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0001)) or overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)). + #[inline] + #[must_use] + pub fn checked_rem_euclid(self, rhs: impl Into<Self>) -> Option<Self> { + Some(Self { + value: self.value.checked_rem_euclid(rhs.into().value)?, + }) + } + + /// [op:numeric-unary-minus](https://www.w3.org/TR/xpath-functions-31/#func-numeric-unary-minus) + /// + /// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)). + #[inline] + #[must_use] + pub fn checked_neg(self) -> Option<Self> { + Some(Self { + value: self.value.checked_neg()?, + }) + } + + /// [fn:abs](https://www.w3.org/TR/xpath-functions-31/#func-abs) + /// + /// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)). + #[inline] + #[must_use] + pub fn checked_abs(self) -> Option<Self> { + Some(Self { + value: self.value.checked_abs()?, + }) + } + + /// [fn:round](https://www.w3.org/TR/xpath-functions-31/#func-round) + /// + /// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)). + #[inline] + #[must_use] + pub fn checked_round(self) -> Option<Self> { + let value = self.value / DECIMAL_PART_POW_MINUS_ONE; + Some(Self { + value: if value >= 0 { + value / 10 + i128::from(value % 10 >= 5) + } else { + value / 10 - i128::from(-value % 10 > 5) + } + .checked_mul(DECIMAL_PART_POW)?, + }) + } + + /// [fn:ceiling](https://www.w3.org/TR/xpath-functions-31/#func-ceiling) + /// + /// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)). + #[inline] + #[must_use] + pub fn checked_ceil(self) -> Option<Self> { + Some(Self { + value: if self.value > 0 && self.value % DECIMAL_PART_POW != 0 { + self.value / DECIMAL_PART_POW + 1 + } else { + self.value / DECIMAL_PART_POW + } + .checked_mul(DECIMAL_PART_POW)?, + }) + } + + /// [fn:floor](https://www.w3.org/TR/xpath-functions-31/#func-floor) + /// + /// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)). + #[inline] + #[must_use] + pub fn checked_floor(self) -> Option<Self> { + Some(Self { + value: if self.value >= 0 || self.value % DECIMAL_PART_POW == 0 { + self.value / DECIMAL_PART_POW + } else { + self.value / DECIMAL_PART_POW - 1 + } + .checked_mul(DECIMAL_PART_POW)?, + }) + } + + #[inline] + #[must_use] + pub const fn is_negative(self) -> bool { + self.value < 0 + } + + #[inline] + #[must_use] + pub const fn is_positive(self) -> bool { + self.value > 0 + } + + /// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity). + #[inline] + #[must_use] + pub fn is_identical_with(self, other: Self) -> bool { + self == other + } + + #[inline] + #[must_use] + pub(super) const fn as_i128(self) -> i128 { + self.value / DECIMAL_PART_POW + } +} + +impl From<bool> for Decimal { + #[inline] + fn from(value: bool) -> Self { + Self { + value: i128::from(value) * DECIMAL_PART_POW, + } + } +} + +impl From<i8> for Decimal { + #[inline] + fn from(value: i8) -> Self { + Self { + value: i128::from(value) * DECIMAL_PART_POW, + } + } +} + +impl From<i16> for Decimal { + #[inline] + fn from(value: i16) -> Self { + Self { + value: i128::from(value) * DECIMAL_PART_POW, + } + } +} + +impl From<i32> for Decimal { + #[inline] + fn from(value: i32) -> Self { + Self { + value: i128::from(value) * DECIMAL_PART_POW, + } + } +} + +impl From<i64> for Decimal { + #[inline] + fn from(value: i64) -> Self { + Self { + value: i128::from(value) * DECIMAL_PART_POW, + } + } +} + +impl From<u8> for Decimal { + #[inline] + fn from(value: u8) -> Self { + Self { + value: i128::from(value) * DECIMAL_PART_POW, + } + } +} + +impl From<u16> for Decimal { + #[inline] + fn from(value: u16) -> Self { + Self { + value: i128::from(value) * DECIMAL_PART_POW, + } + } +} + +impl From<u32> for Decimal { + #[inline] + fn from(value: u32) -> Self { + Self { + value: i128::from(value) * DECIMAL_PART_POW, + } + } +} + +impl From<u64> for Decimal { + #[inline] + fn from(value: u64) -> Self { + Self { + value: i128::from(value) * DECIMAL_PART_POW, + } + } +} + +impl From<Integer> for Decimal { + #[inline] + fn from(value: Integer) -> Self { + i64::from(value).into() + } +} + +impl TryFrom<i128> for Decimal { + type Error = TooLargeForDecimalError; + + #[inline] + fn try_from(value: i128) -> Result<Self, Self::Error> { + Ok(Self { + value: value + .checked_mul(DECIMAL_PART_POW) + .ok_or(TooLargeForDecimalError)?, + }) + } +} + +impl TryFrom<u128> for Decimal { + type Error = TooLargeForDecimalError; + + #[inline] + fn try_from(value: u128) -> Result<Self, Self::Error> { + Ok(Self { + value: i128::try_from(value) + .map_err(|_| TooLargeForDecimalError)? + .checked_mul(DECIMAL_PART_POW) + .ok_or(TooLargeForDecimalError)?, + }) + } +} + +impl From<Boolean> for Decimal { + #[inline] + fn from(value: Boolean) -> Self { + bool::from(value).into() + } +} + +impl TryFrom<Float> for Decimal { + type Error = TooLargeForDecimalError; + + #[inline] + fn try_from(value: Float) -> Result<Self, Self::Error> { + Double::from(value).try_into() + } +} + +impl TryFrom<Double> for Decimal { + type Error = TooLargeForDecimalError; + + #[inline] + #[allow(clippy::cast_precision_loss, clippy::cast_possible_truncation)] + fn try_from(value: Double) -> Result<Self, Self::Error> { + let shifted = f64::from(value) * (DECIMAL_PART_POW as f64); + if (i128::MIN as f64) <= shifted && shifted <= (i128::MAX as f64) { + Ok(Self { + value: shifted as i128, + }) + } else { + Err(TooLargeForDecimalError) + } + } +} + +impl From<Decimal> for Float { + #[inline] + #[allow(clippy::cast_precision_loss)] + fn from(value: Decimal) -> Self { + Double::from(value).into() + } +} + +impl From<Decimal> for Double { + #[inline] + #[allow(clippy::cast_precision_loss)] + fn from(value: Decimal) -> Self { + let mut value = value.value; + let mut shift = DECIMAL_PART_POW; + + // Hack to improve precision + if value != 0 { + while shift != 1 && value % 10 == 0 { + value /= 10; + shift /= 10; + } + } + + ((value as f64) / (shift as f64)).into() + } +} + +impl TryFrom<Decimal> for Integer { + type Error = TooLargeForIntegerError; + + #[inline] + fn try_from(value: Decimal) -> Result<Self, Self::Error> { + Ok(i64::try_from( + value + .value + .checked_div(DECIMAL_PART_POW) + .ok_or(TooLargeForIntegerError)?, + ) + .map_err(|_| TooLargeForIntegerError)? + .into()) + } +} + +impl FromStr for Decimal { + type Err = ParseDecimalError; + + /// Parses decimals lexical mapping + fn from_str(input: &str) -> Result<Self, Self::Err> { + // (\+|-)?([0-9]+(\.[0-9]*)?|\.[0-9]+) + let input = input.as_bytes(); + if input.is_empty() { + return Err(PARSE_UNEXPECTED_END); + } + + let (sign, mut input) = match input.first() { + Some(b'+') => (1_i128, &input[1..]), + Some(b'-') => (-1_i128, &input[1..]), + _ => (1, input), + }; + + let mut value = 0_i128; + let with_before_dot = input.first().map_or(false, u8::is_ascii_digit); + while let Some(c) = input.first() { + if c.is_ascii_digit() { + value = value + .checked_mul(10) + .ok_or(PARSE_OVERFLOW)? + .checked_add(sign * i128::from(*c - b'0')) + .ok_or(PARSE_OVERFLOW)?; + input = &input[1..]; + } else { + break; + } + } + + let mut exp = DECIMAL_PART_POW; + if let Some(c) = input.first() { + if *c != b'.' { + return Err(PARSE_UNEXPECTED_CHAR); + } + input = &input[1..]; + if input.is_empty() && !with_before_dot { + // We only have a dot + return Err(PARSE_UNEXPECTED_END); + } + while input.last() == Some(&b'0') { + // Hack to avoid underflows + input = &input[..input.len() - 1]; + } + while let Some(c) = input.first() { + if c.is_ascii_digit() { + exp /= 10; + value = value + .checked_mul(10) + .ok_or(PARSE_OVERFLOW)? + .checked_add(sign * i128::from(*c - b'0')) + .ok_or(PARSE_OVERFLOW)?; + input = &input[1..]; + } else { + return Err(PARSE_UNEXPECTED_CHAR); + } + } + if exp == 0 { + // Underflow + return Err(PARSE_UNDERFLOW); + } + } else if !with_before_dot { + // It's empty + return Err(PARSE_UNEXPECTED_END); + } + + Ok(Self { + value: value.checked_mul(exp).ok_or(PARSE_OVERFLOW)?, + }) + } +} + +impl fmt::Display for Decimal { + /// Formats the decimal following its canonical representation. + #[allow(clippy::cast_possible_truncation)] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.value == 0 { + return if let Some(width) = f.width() { + for _ in 0..width { + f.write_char('0')?; + } + Ok(()) + } else { + f.write_char('0') + }; + } + + let mut value = self.value; + if self.value.is_negative() { + f.write_char('-')?; + } + + let mut digits = [b'0'; 40]; + let mut i = 0; + while value != 0 { + digits[i] = b'0' + ((value % 10).unsigned_abs() as u8); + value /= 10; + i += 1; + } + + let last_non_zero = i - 1; + let first_non_zero = digits + .iter() + .copied() + .enumerate() + .find_map(|(i, v)| if v == b'0' { None } else { Some(i) }) + .unwrap_or(40); + + let decimal_part_digits = usize::try_from(DECIMAL_PART_DIGITS).map_err(|_| fmt::Error)?; + if last_non_zero >= decimal_part_digits { + let end = if let Some(mut width) = f.width() { + if self.value.is_negative() { + width -= 1; + } + if last_non_zero - decimal_part_digits + 1 < width { + decimal_part_digits + width + } else { + last_non_zero + 1 + } + } else { + last_non_zero + 1 + }; + for c in digits[decimal_part_digits..end].iter().rev() { + f.write_char(char::from(*c))?; + } + } else { + f.write_char('0')? + } + if decimal_part_digits > first_non_zero { + f.write_char('.')?; + let start = if let Some(precision) = f.precision() { + if decimal_part_digits - first_non_zero > precision { + decimal_part_digits - precision + } else { + first_non_zero + } + } else { + first_non_zero + }; + for c in digits[start..decimal_part_digits].iter().rev() { + f.write_char(char::from(*c))?; + } + } + + Ok(()) + } +} + +/// An error when parsing a [`Decimal`]. +#[derive(Debug, thiserror::Error)] +#[error(transparent)] +pub struct ParseDecimalError(#[from] DecimalParseErrorKind); + +#[derive(Debug, Clone, thiserror::Error)] +enum DecimalParseErrorKind { + #[error("Value overflow")] + Overflow, + #[error("Value underflow")] + Underflow, + #[error("Unexpected character")] + UnexpectedChar, + #[error("Unexpected end of string")] + UnexpectedEnd, +} + +const PARSE_OVERFLOW: ParseDecimalError = ParseDecimalError(DecimalParseErrorKind::Overflow); +const PARSE_UNDERFLOW: ParseDecimalError = ParseDecimalError(DecimalParseErrorKind::Underflow); +const PARSE_UNEXPECTED_CHAR: ParseDecimalError = + ParseDecimalError(DecimalParseErrorKind::UnexpectedChar); +const PARSE_UNEXPECTED_END: ParseDecimalError = + ParseDecimalError(DecimalParseErrorKind::UnexpectedEnd); + +impl From<TooLargeForDecimalError> for ParseDecimalError { + fn from(_: TooLargeForDecimalError) -> Self { + Self(DecimalParseErrorKind::Overflow) + } +} + +/// The input is too large to fit into a [`Decimal`]. +/// +/// Matches XPath [`FOCA0001` error](https://www.w3.org/TR/xpath-functions-31/#ERRFOCA0001). +#[derive(Debug, Clone, Copy, thiserror::Error)] +#[error("Value too large for xsd:decimal internal representation")] +pub struct TooLargeForDecimalError; + +#[cfg(test)] +#[allow(clippy::panic_in_result_fn)] +mod tests { + use super::*; + + #[test] + fn new() -> Result<(), ParseDecimalError> { + assert_eq!(Decimal::new(1, 0)?.to_string(), "1"); + assert_eq!(Decimal::new(1, 1)?.to_string(), "0.1"); + assert_eq!(Decimal::new(10, 0)?.to_string(), "10"); + assert_eq!(Decimal::new(10, 1)?.to_string(), "1"); + assert_eq!(Decimal::new(10, 2)?.to_string(), "0.1"); + Ok(()) + } + + #[test] + fn from_str() -> Result<(), ParseDecimalError> { + Decimal::from_str("").unwrap_err(); + Decimal::from_str("+").unwrap_err(); + Decimal::from_str("-").unwrap_err(); + Decimal::from_str(".").unwrap_err(); + Decimal::from_str("+.").unwrap_err(); + Decimal::from_str("-.").unwrap_err(); + Decimal::from_str("a").unwrap_err(); + Decimal::from_str(".a").unwrap_err(); + assert_eq!(Decimal::from_str("210")?.to_string(), "210"); + assert_eq!(Decimal::from_str("1000")?.to_string(), "1000"); + assert_eq!(Decimal::from_str("-1.23")?.to_string(), "-1.23"); + assert_eq!( + Decimal::from_str("12678967.543233")?.to_string(), + "12678967.543233" + ); + assert_eq!(Decimal::from_str("+100000.00")?.to_string(), "100000"); + assert_eq!(Decimal::from_str("0.1220")?.to_string(), "0.122"); + assert_eq!(Decimal::from_str(".12200")?.to_string(), "0.122"); + assert_eq!(Decimal::from_str("1.")?.to_string(), "1"); + assert_eq!(Decimal::from_str("1.0")?.to_string(), "1"); + assert_eq!(Decimal::from_str("01.0")?.to_string(), "1"); + assert_eq!(Decimal::from_str("0")?.to_string(), "0"); + assert_eq!(Decimal::from_str("-0")?.to_string(), "0"); + assert_eq!(Decimal::from_str(&Decimal::MAX.to_string())?, Decimal::MAX); + assert_eq!(Decimal::from_str(&Decimal::MIN.to_string())?, Decimal::MIN); + Decimal::from_str("0.0000000000000000001").unwrap_err(); + Decimal::from_str("1000000000000000000000").unwrap_err(); + assert_eq!( + Decimal::from_str("0.100000000000000000000000000").unwrap(), + Decimal::from_str("0.1").unwrap() + ); + Ok(()) + } + + #[test] + fn format() { + assert_eq!(format!("{}", Decimal::from(0)), "0"); + assert_eq!(format!("{}", Decimal::from(1)), "1"); + assert_eq!(format!("{}", Decimal::from(10)), "10"); + assert_eq!(format!("{}", Decimal::from(100)), "100"); + assert_eq!(format!("{}", Decimal::from(-1)), "-1"); + assert_eq!(format!("{}", Decimal::from(-10)), "-10"); + + assert_eq!(format!("{:02}", Decimal::from(0)), "00"); + assert_eq!(format!("{:02}", Decimal::from(1)), "01"); + assert_eq!(format!("{:02}", Decimal::from(10)), "10"); + assert_eq!(format!("{:02}", Decimal::from(100)), "100"); + assert_eq!(format!("{:02}", Decimal::from(-1)), "-1"); + assert_eq!(format!("{:02}", Decimal::from(-10)), "-10"); + } + + #[test] + fn add() { + assert!(Decimal::MIN.checked_add(Decimal::STEP).is_some()); + assert!(Decimal::MAX.checked_add(Decimal::STEP).is_none()); + assert_eq!( + Decimal::MAX.checked_add(Decimal::MIN), + Decimal::STEP.checked_neg() + ); + } + + #[test] + fn sub() { + assert!(Decimal::MIN.checked_sub(Decimal::STEP).is_none()); + assert!(Decimal::MAX.checked_sub(Decimal::STEP).is_some()); + } + + #[test] + fn mul() -> Result<(), ParseDecimalError> { + assert_eq!(Decimal::from(1).checked_mul(-1), Some(Decimal::from(-1))); + assert_eq!( + Decimal::from(1000).checked_mul(1000), + Some(Decimal::from(1_000_000)) + ); + assert_eq!( + Decimal::from_str("0.1")?.checked_mul(Decimal::from_str("0.01")?), + Some(Decimal::from_str("0.001")?) + ); + assert_eq!(Decimal::from(0).checked_mul(1), Some(Decimal::from(0))); + assert_eq!(Decimal::from(1).checked_mul(0), Some(Decimal::from(0))); + assert_eq!(Decimal::MAX.checked_mul(1), Some(Decimal::MAX)); + assert_eq!(Decimal::MIN.checked_mul(1), Some(Decimal::MIN)); + assert_eq!( + Decimal::from(1).checked_mul(Decimal::MAX), + Some(Decimal::MAX) + ); + assert_eq!( + Decimal::from(1).checked_mul(Decimal::MIN), + Some(Decimal::MIN) + ); + assert_eq!( + Decimal::MAX.checked_mul(-1), + Some(Decimal::MIN.checked_add(Decimal::STEP).unwrap()) + ); + assert_eq!(Decimal::MIN.checked_mul(-1), None); + assert_eq!( + Decimal::MIN + .checked_add(Decimal::STEP) + .unwrap() + .checked_mul(-1), + Some(Decimal::MAX) + ); + Ok(()) + } + + #[test] + fn div() -> Result<(), ParseDecimalError> { + assert_eq!(Decimal::from(1).checked_div(1), Some(Decimal::from(1))); + assert_eq!(Decimal::from(100).checked_div(10), Some(Decimal::from(10))); + assert_eq!( + Decimal::from(10).checked_div(100), + Some(Decimal::from_str("0.1")?) + ); + assert_eq!(Decimal::from(1).checked_div(0), None); + assert_eq!(Decimal::from(0).checked_div(1), Some(Decimal::from(0))); + assert_eq!(Decimal::MAX.checked_div(1), Some(Decimal::MAX)); + assert_eq!(Decimal::MIN.checked_div(1), Some(Decimal::MIN)); + assert_eq!( + Decimal::MAX.checked_div(-1), + Some(Decimal::MIN.checked_add(Decimal::STEP).unwrap()) + ); + assert_eq!(Decimal::MIN.checked_div(-1), None); + assert_eq!( + Decimal::MIN + .checked_add(Decimal::STEP) + .unwrap() + .checked_div(-1), + Some(Decimal::MAX) + ); + Ok(()) + } + + #[test] + fn rem() -> Result<(), ParseDecimalError> { + assert_eq!(Decimal::from(10).checked_rem(3), Some(Decimal::from(1))); + assert_eq!(Decimal::from(6).checked_rem(-2), Some(Decimal::from(0))); + assert_eq!( + Decimal::from_str("4.5")?.checked_rem(Decimal::from_str("1.2")?), + Some(Decimal::from_str("0.9")?) + ); + assert_eq!(Decimal::from(1).checked_rem(0), None); + assert_eq!( + Decimal::MAX.checked_rem(1), + Some(Decimal::from_str("0.687303715884105727")?) + ); + assert_eq!( + Decimal::MIN.checked_rem(1), + Some(Decimal::from_str("-0.687303715884105728")?) + ); + assert_eq!( + Decimal::MAX.checked_rem(Decimal::STEP), + Some(Decimal::default()) + ); + assert_eq!( + Decimal::MIN.checked_rem(Decimal::STEP), + Some(Decimal::default()) + ); + assert_eq!( + Decimal::MAX.checked_rem(Decimal::MAX), + Some(Decimal::default()) + ); + assert_eq!( + Decimal::MIN.checked_rem(Decimal::MIN), + Some(Decimal::default()) + ); + Ok(()) + } + + #[test] + fn round() -> Result<(), ParseDecimalError> { + assert_eq!(Decimal::from(10).checked_round(), Some(Decimal::from(10))); + assert_eq!(Decimal::from(-10).checked_round(), Some(Decimal::from(-10))); + assert_eq!( + Decimal::from(i64::MIN).checked_round(), + Some(Decimal::from(i64::MIN)) + ); + assert_eq!( + Decimal::from(i64::MAX).checked_round(), + Some(Decimal::from(i64::MAX)) + ); + assert_eq!( + Decimal::from_str("2.5")?.checked_round(), + Some(Decimal::from(3)) + ); + assert_eq!( + Decimal::from_str("2.4999")?.checked_round(), + Some(Decimal::from(2)) + ); + assert_eq!( + Decimal::from_str("-2.5")?.checked_round(), + Some(Decimal::from(-2)) + ); + assert_eq!(Decimal::MAX.checked_round(), None); + assert_eq!( + Decimal::MAX + .checked_sub(Decimal::from_str("0.5")?) + .unwrap() + .checked_round(), + Some(Decimal::from_str("170141183460469231731")?) + ); + assert_eq!(Decimal::MIN.checked_round(), None); + assert_eq!( + Decimal::MIN + .checked_add(Decimal::from_str("0.5")?) + .unwrap() + .checked_round(), + Some(Decimal::from_str("-170141183460469231731")?) + ); + Ok(()) + } + + #[test] + fn ceil() -> Result<(), ParseDecimalError> { + assert_eq!(Decimal::from(10).checked_ceil(), Some(Decimal::from(10))); + assert_eq!(Decimal::from(-10).checked_ceil(), Some(Decimal::from(-10))); + assert_eq!( + Decimal::from_str("10.5")?.checked_ceil(), + Some(Decimal::from(11)) + ); + assert_eq!( + Decimal::from_str("-10.5")?.checked_ceil(), + Some(Decimal::from(-10)) + ); + assert_eq!( + Decimal::from(i64::MIN).checked_ceil(), + Some(Decimal::from(i64::MIN)) + ); + assert_eq!( + Decimal::from(i64::MAX).checked_ceil(), + Some(Decimal::from(i64::MAX)) + ); + assert_eq!(Decimal::MAX.checked_ceil(), None); + assert_eq!( + Decimal::MAX + .checked_sub(Decimal::from(1)) + .unwrap() + .checked_ceil(), + Some(Decimal::from_str("170141183460469231731")?) + ); + assert_eq!( + Decimal::MIN.checked_ceil(), + Some(Decimal::from_str("-170141183460469231731")?) + ); + Ok(()) + } + + #[test] + fn floor() -> Result<(), ParseDecimalError> { + assert_eq!(Decimal::from(10).checked_floor(), Some(Decimal::from(10))); + assert_eq!(Decimal::from(-10).checked_floor(), Some(Decimal::from(-10))); + assert_eq!( + Decimal::from_str("10.5")?.checked_floor(), + Some(Decimal::from(10)) + ); + assert_eq!( + Decimal::from_str("-10.5")?.checked_floor(), + Some(Decimal::from(-11)) + ); + assert_eq!( + Decimal::from(i64::MIN).checked_floor(), + Some(Decimal::from(i64::MIN)) + ); + assert_eq!( + Decimal::from(i64::MAX).checked_floor(), + Some(Decimal::from(i64::MAX)) + ); + assert_eq!( + Decimal::MAX.checked_floor(), + Some(Decimal::from_str("170141183460469231731")?) + ); + assert_eq!(Decimal::MIN.checked_floor(), None); + assert_eq!( + Decimal::MIN + .checked_add(Decimal::from_str("1")?) + .unwrap() + .checked_floor(), + Some(Decimal::from_str("-170141183460469231731")?) + ); + Ok(()) + } + + #[test] + fn to_be_bytes() -> Result<(), ParseDecimalError> { + assert_eq!( + Decimal::from_be_bytes(Decimal::MIN.to_be_bytes()), + Decimal::MIN + ); + assert_eq!( + Decimal::from_be_bytes(Decimal::MAX.to_be_bytes()), + Decimal::MAX + ); + assert_eq!( + Decimal::from_be_bytes(Decimal::from(i64::MIN).to_be_bytes()), + Decimal::from(i64::MIN) + ); + assert_eq!( + Decimal::from_be_bytes(Decimal::from(i64::MAX).to_be_bytes()), + Decimal::from(i64::MAX) + ); + assert_eq!( + Decimal::from_be_bytes(Decimal::from(0).to_be_bytes()), + Decimal::from(0) + ); + assert_eq!( + Decimal::from_be_bytes(Decimal::from(0).to_be_bytes()), + Decimal::from(0) + ); + assert_eq!( + Decimal::from_be_bytes(Decimal::from_str("0.01")?.to_be_bytes()), + Decimal::from_str("0.01")? + ); + Ok(()) + } + + #[test] + fn from_bool() { + assert_eq!(Decimal::from(false), Decimal::from(0_u8)); + assert_eq!(Decimal::from(true), Decimal::from(1_u8)); + } + + #[test] + fn from_float() -> Result<(), ParseDecimalError> { + assert_eq!( + Decimal::try_from(Float::from(0.)).ok(), + Some(Decimal::from(0)) + ); + assert_eq!( + Decimal::try_from(Float::from(-0.)).ok(), + Some(Decimal::from(0)) + ); + assert_eq!( + Decimal::try_from(Float::from(-123.5)).ok(), + Some(Decimal::from_str("-123.5")?) + ); + Decimal::try_from(Float::from(f32::NAN)).unwrap_err(); + Decimal::try_from(Float::from(f32::INFINITY)).unwrap_err(); + Decimal::try_from(Float::from(f32::NEG_INFINITY)).unwrap_err(); + Decimal::try_from(Float::from(f32::MIN)).unwrap_err(); + Decimal::try_from(Float::from(f32::MAX)).unwrap_err(); + assert!( + Decimal::try_from(Float::from(1_672_507_300_000.)) + .unwrap() + .checked_sub(Decimal::from(1_672_507_293_696_i64)) + .unwrap() + .checked_abs() + .unwrap() + < Decimal::from(1) + ); + Ok(()) + } + + #[test] + fn from_double() -> Result<(), ParseDecimalError> { + assert_eq!( + Decimal::try_from(Double::from(0.)).ok(), + Some(Decimal::from(0)) + ); + assert_eq!( + Decimal::try_from(Double::from(-0.)).ok(), + Some(Decimal::from(0)) + ); + assert_eq!( + Decimal::try_from(Double::from(-123.1)).ok(), + Some(Decimal::from_str("-123.1")?) + ); + assert!( + Decimal::try_from(Double::from(1_672_507_302_466.)) + .unwrap() + .checked_sub(Decimal::from(1_672_507_302_466_i64)) + .unwrap() + .checked_abs() + .unwrap() + < Decimal::from(1) + ); + Decimal::try_from(Double::from(f64::NAN)).unwrap_err(); + Decimal::try_from(Double::from(f64::INFINITY)).unwrap_err(); + Decimal::try_from(Double::from(f64::NEG_INFINITY)).unwrap_err(); + Decimal::try_from(Double::from(f64::MIN)).unwrap_err(); + Decimal::try_from(Double::from(f64::MAX)).unwrap_err(); + Ok(()) + } + + #[test] + fn to_float() -> Result<(), ParseDecimalError> { + assert_eq!(Float::from(Decimal::from(0)), Float::from(0.)); + assert_eq!(Float::from(Decimal::from(1)), Float::from(1.)); + assert_eq!(Float::from(Decimal::from(10)), Float::from(10.)); + assert_eq!(Float::from(Decimal::from_str("0.1")?), Float::from(0.1)); + assert!((Float::from(Decimal::MAX) - Float::from(1.701_412e20)).abs() < Float::from(1.)); + assert!((Float::from(Decimal::MIN) - Float::from(-1.701_412e20)).abs() < Float::from(1.)); + Ok(()) + } + + #[test] + fn to_double() -> Result<(), ParseDecimalError> { + assert_eq!(Double::from(Decimal::from(0)), Double::from(0.)); + assert_eq!(Double::from(Decimal::from(1)), Double::from(1.)); + assert_eq!(Double::from(Decimal::from(10)), Double::from(10.)); + assert_eq!(Double::from(Decimal::from_str("0.1")?), Double::from(0.1)); + assert!( + (Double::from(Decimal::MAX) - Double::from(1.701_411_834_604_692_4e20)).abs() + < Double::from(1.) + ); + assert!( + (Double::from(Decimal::MIN) - Double::from(-1.701_411_834_604_692_4e20)).abs() + < Double::from(1.) + ); + Ok(()) + } + + #[test] + fn minimally_conformant() -> Result<(), ParseDecimalError> { + // All minimally conforming processors must support decimal values whose absolute value can be expressed as i / 10^k, + // where i and k are nonnegative integers such that i < 10^16 and k ≤ 16 (i.e., those expressible with sixteen total digits). + assert_eq!( + Decimal::from_str("1234567890123456")?.to_string(), + "1234567890123456" + ); + assert_eq!( + Decimal::from_str("-1234567890123456")?.to_string(), + "-1234567890123456" + ); + assert_eq!( + Decimal::from_str("0.1234567890123456")?.to_string(), + "0.1234567890123456" + ); + assert_eq!( + Decimal::from_str("-0.1234567890123456")?.to_string(), + "-0.1234567890123456" + ); + Ok(()) + } +} diff --git a/ng-oxigraph/src/oxsdatatypes/double.rs b/ng-oxigraph/src/oxsdatatypes/double.rs new file mode 100644 index 0000000..48e0022 --- /dev/null +++ b/ng-oxigraph/src/oxsdatatypes/double.rs @@ -0,0 +1,326 @@ +use crate::oxsdatatypes::{Boolean, Float, Integer}; +use serde::{Deserialize, Serialize}; +use std::cmp::Ordering; +use std::fmt; +use std::num::ParseFloatError; +use std::ops::{Add, Div, Mul, Neg, Sub}; +use std::str::FromStr; + +/// [XML Schema `double` datatype](https://www.w3.org/TR/xmlschema11-2/#double) +/// +/// Uses internally a [`f64`]. +/// +/// <div class="warning">Serialization does not follow the canonical mapping.</div> +#[derive(Debug, Clone, Copy, Default, PartialEq, Serialize, Deserialize)] +#[repr(transparent)] +pub struct Double { + value: f64, +} + +impl Double { + pub const INFINITY: Self = Self { + value: f64::INFINITY, + }; + pub const MAX: Self = Self { value: f64::MAX }; + pub const MIN: Self = Self { value: f64::MIN }; + pub const NAN: Self = Self { value: f64::NAN }; + pub const NEG_INFINITY: Self = Self { + value: f64::NEG_INFINITY, + }; + + #[inline] + #[must_use] + pub fn from_be_bytes(bytes: [u8; 8]) -> Self { + Self { + value: f64::from_be_bytes(bytes), + } + } + + #[inline] + #[must_use] + pub fn to_be_bytes(self) -> [u8; 8] { + self.value.to_be_bytes() + } + + /// [fn:abs](https://www.w3.org/TR/xpath-functions-31/#func-abs) + #[inline] + #[must_use] + pub fn abs(self) -> Self { + self.value.abs().into() + } + + /// [fn:ceiling](https://www.w3.org/TR/xpath-functions-31/#func-ceiling) + #[inline] + #[must_use] + pub fn ceil(self) -> Self { + self.value.ceil().into() + } + + /// [fn:floor](https://www.w3.org/TR/xpath-functions-31/#func-floor) + #[inline] + #[must_use] + pub fn floor(self) -> Self { + self.value.floor().into() + } + + /// [fn:round](https://www.w3.org/TR/xpath-functions-31/#func-round) + #[inline] + #[must_use] + pub fn round(self) -> Self { + self.value.round().into() + } + + #[inline] + #[must_use] + pub fn is_nan(self) -> bool { + self.value.is_nan() + } + + #[inline] + #[must_use] + pub fn is_finite(self) -> bool { + self.value.is_finite() + } + + /// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity). + #[inline] + #[must_use] + pub fn is_identical_with(self, other: Self) -> bool { + self.value.to_bits() == other.value.to_bits() + } +} + +impl From<Double> for f64 { + #[inline] + fn from(value: Double) -> Self { + value.value + } +} + +impl From<f64> for Double { + #[inline] + fn from(value: f64) -> Self { + Self { value } + } +} + +impl From<i8> for Double { + #[inline] + fn from(value: i8) -> Self { + Self { + value: value.into(), + } + } +} + +impl From<i16> for Double { + #[inline] + fn from(value: i16) -> Self { + Self { + value: value.into(), + } + } +} + +impl From<i32> for Double { + #[inline] + fn from(value: i32) -> Self { + Self { + value: value.into(), + } + } +} + +impl From<u8> for Double { + #[inline] + fn from(value: u8) -> Self { + Self { + value: value.into(), + } + } +} + +impl From<u16> for Double { + #[inline] + fn from(value: u16) -> Self { + Self { + value: value.into(), + } + } +} + +impl From<u32> for Double { + #[inline] + fn from(value: u32) -> Self { + Self { + value: value.into(), + } + } +} + +impl From<Float> for Double { + #[inline] + fn from(value: Float) -> Self { + Self { + value: value.into(), + } + } +} + +impl From<Boolean> for Double { + #[inline] + fn from(value: Boolean) -> Self { + f64::from(bool::from(value)).into() + } +} + +impl From<Integer> for Double { + #[inline] + #[allow(clippy::cast_precision_loss)] + fn from(value: Integer) -> Self { + (i64::from(value) as f64).into() + } +} + +impl FromStr for Double { + type Err = ParseFloatError; + + #[inline] + fn from_str(input: &str) -> Result<Self, Self::Err> { + Ok(f64::from_str(input)?.into()) + } +} + +impl fmt::Display for Double { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.value == f64::INFINITY { + f.write_str("INF") + } else if self.value == f64::NEG_INFINITY { + f.write_str("-INF") + } else { + self.value.fmt(f) + } + } +} + +impl PartialOrd for Double { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + self.value.partial_cmp(&other.value) + } +} + +impl Neg for Double { + type Output = Self; + + #[inline] + fn neg(self) -> Self { + (-self.value).into() + } +} + +impl Add for Double { + type Output = Self; + + #[inline] + fn add(self, rhs: Self) -> Self { + (self.value + rhs.value).into() + } +} + +impl Sub for Double { + type Output = Self; + + #[inline] + fn sub(self, rhs: Self) -> Self { + (self.value - rhs.value).into() + } +} + +impl Mul for Double { + type Output = Self; + + #[inline] + fn mul(self, rhs: Self) -> Self { + (self.value * rhs.value).into() + } +} + +impl Div for Double { + type Output = Self; + + #[inline] + fn div(self, rhs: Self) -> Self { + (self.value / rhs.value).into() + } +} + +#[cfg(test)] +#[allow(clippy::panic_in_result_fn)] +mod tests { + use super::*; + + #[test] + fn eq() { + assert_eq!(Double::from(0_f64), Double::from(0_f64)); + assert_ne!(Double::NAN, Double::NAN); + assert_eq!(Double::from(-0.), Double::from(0.)); + } + + #[test] + fn cmp() { + assert_eq!( + Double::from(0.).partial_cmp(&Double::from(0.)), + Some(Ordering::Equal) + ); + assert_eq!( + Double::INFINITY.partial_cmp(&Double::MAX), + Some(Ordering::Greater) + ); + assert_eq!( + Double::NEG_INFINITY.partial_cmp(&Double::MIN), + Some(Ordering::Less) + ); + assert_eq!(Double::NAN.partial_cmp(&Double::from(0.)), None); + assert_eq!(Double::NAN.partial_cmp(&Double::NAN), None); + assert_eq!( + Double::from(0.).partial_cmp(&Double::from(-0.)), + Some(Ordering::Equal) + ); + } + + #[test] + fn is_identical_with() { + assert!(Double::from(0.).is_identical_with(Double::from(0.))); + assert!(Double::NAN.is_identical_with(Double::NAN)); + assert!(!Double::from(-0.).is_identical_with(Double::from(0.))); + } + + #[test] + fn from_str() -> Result<(), ParseFloatError> { + assert_eq!(Double::from_str("NaN")?.to_string(), "NaN"); + assert_eq!(Double::from_str("INF")?.to_string(), "INF"); + assert_eq!(Double::from_str("+INF")?.to_string(), "INF"); + assert_eq!(Double::from_str("-INF")?.to_string(), "-INF"); + assert_eq!(Double::from_str("0.0E0")?.to_string(), "0"); + assert_eq!(Double::from_str("-0.0E0")?.to_string(), "-0"); + assert_eq!(Double::from_str("0.1e1")?.to_string(), "1"); + assert_eq!(Double::from_str("-0.1e1")?.to_string(), "-1"); + assert_eq!(Double::from_str("1.e1")?.to_string(), "10"); + assert_eq!(Double::from_str("-1.e1")?.to_string(), "-10"); + assert_eq!(Double::from_str("1")?.to_string(), "1"); + assert_eq!(Double::from_str("-1")?.to_string(), "-1"); + assert_eq!(Double::from_str("1.")?.to_string(), "1"); + assert_eq!(Double::from_str("-1.")?.to_string(), "-1"); + assert_eq!( + Double::from_str(&f64::MIN.to_string()).unwrap(), + Double::MIN + ); + assert_eq!( + Double::from_str(&f64::MAX.to_string()).unwrap(), + Double::MAX + ); + Ok(()) + } +} diff --git a/ng-oxigraph/src/oxsdatatypes/duration.rs b/ng-oxigraph/src/oxsdatatypes/duration.rs new file mode 100644 index 0000000..d8f5eb0 --- /dev/null +++ b/ng-oxigraph/src/oxsdatatypes/duration.rs @@ -0,0 +1,1249 @@ +use crate::oxsdatatypes::{DateTime, Decimal}; +use serde::{Deserialize, Serialize}; +use std::cmp::Ordering; +use std::fmt; +use std::str::FromStr; +use std::time::Duration as StdDuration; + +/// [XML Schema `duration` datatype](https://www.w3.org/TR/xmlschema11-2/#duration) +/// +/// It stores the duration using a pair of a [`YearMonthDuration`] and a [`DayTimeDuration`]. +#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash, Default, Serialize, Deserialize)] +pub struct Duration { + year_month: YearMonthDuration, + day_time: DayTimeDuration, +} + +impl Duration { + pub const MAX: Self = Self { + year_month: YearMonthDuration::MAX, + day_time: DayTimeDuration::MAX, + }; + pub const MIN: Self = Self { + year_month: YearMonthDuration::MIN, + day_time: DayTimeDuration::MIN, + }; + + #[inline] + pub fn new( + months: impl Into<i64>, + seconds: impl Into<Decimal>, + ) -> Result<Self, OppositeSignInDurationComponentsError> { + Self::construct( + YearMonthDuration::new(months), + DayTimeDuration::new(seconds), + ) + } + + #[inline] + fn construct( + year_month: YearMonthDuration, + day_time: DayTimeDuration, + ) -> Result<Self, OppositeSignInDurationComponentsError> { + if (year_month > YearMonthDuration::default() && day_time < DayTimeDuration::default()) + || (year_month < YearMonthDuration::default() && day_time > DayTimeDuration::default()) + { + return Err(OppositeSignInDurationComponentsError); + } + Ok(Self { + year_month, + day_time, + }) + } + + #[inline] + #[must_use] + pub fn from_be_bytes(bytes: [u8; 24]) -> Self { + Self { + year_month: YearMonthDuration::from_be_bytes(bytes[0..8].try_into().unwrap()), + day_time: DayTimeDuration::from_be_bytes(bytes[8..24].try_into().unwrap()), + } + } + + /// [fn:years-from-duration](https://www.w3.org/TR/xpath-functions-31/#func-years-from-duration) + #[inline] + #[must_use] + pub fn years(self) -> i64 { + self.year_month.years() + } + + /// [fn:months-from-duration](https://www.w3.org/TR/xpath-functions-31/#func-months-from-duration) + #[inline] + #[must_use] + pub fn months(self) -> i64 { + self.year_month.months() + } + + /// [fn:days-from-duration](https://www.w3.org/TR/xpath-functions-31/#func-days-from-duration) + #[inline] + #[must_use] + pub fn days(self) -> i64 { + self.day_time.days() + } + + /// [fn:hours-from-duration](https://www.w3.org/TR/xpath-functions-31/#func-hours-from-duration) + #[inline] + #[must_use] + pub fn hours(self) -> i64 { + self.day_time.hours() + } + + /// [fn:minutes-from-duration](https://www.w3.org/TR/xpath-functions-31/#func-minutes-from-duration) + #[inline] + #[must_use] + pub fn minutes(self) -> i64 { + self.day_time.minutes() + } + + /// [fn:seconds-from-duration](https://www.w3.org/TR/xpath-functions-31/#func-seconds-from-duration) + #[inline] + #[must_use] + pub fn seconds(self) -> Decimal { + self.day_time.seconds() + } + + #[inline] + #[must_use] + pub(crate) const fn all_months(self) -> i64 { + self.year_month.all_months() + } + + #[inline] + #[must_use] + pub(crate) const fn all_seconds(self) -> Decimal { + self.day_time.as_seconds() + } + + #[inline] + #[must_use] + pub fn to_be_bytes(self) -> [u8; 24] { + let mut bytes = [0; 24]; + bytes[0..8].copy_from_slice(&self.year_month.to_be_bytes()); + bytes[8..24].copy_from_slice(&self.day_time.to_be_bytes()); + bytes + } + + /// [op:add-yearMonthDurations](https://www.w3.org/TR/xpath-functions-31/#func-add-yearMonthDurations) and [op:add-dayTimeDurations](https://www.w3.org/TR/xpath-functions-31/#func-add-dayTimeDurations) + /// + /// Returns `None` in case of overflow ([`FODT0002`](https://www.w3.org/TR/xpath-functions-31/#ERRFODT0002)). + #[inline] + #[must_use] + pub fn checked_add(self, rhs: impl Into<Self>) -> Option<Self> { + let rhs = rhs.into(); + Self::construct( + self.year_month.checked_add(rhs.year_month)?, + self.day_time.checked_add(rhs.day_time)?, + ) + .ok() + } + + /// [op:subtract-yearMonthDurations](https://www.w3.org/TR/xpath-functions-31/#func-subtract-yearMonthDurations) and [op:subtract-dayTimeDurations](https://www.w3.org/TR/xpath-functions-31/#func-subtract-dayTimeDurations) + /// + /// Returns `None` in case of overflow ([`FODT0002`](https://www.w3.org/TR/xpath-functions-31/#ERRFODT0002)). + #[inline] + #[must_use] + pub fn checked_sub(self, rhs: impl Into<Self>) -> Option<Self> { + let rhs = rhs.into(); + Self::construct( + self.year_month.checked_sub(rhs.year_month)?, + self.day_time.checked_sub(rhs.day_time)?, + ) + .ok() + } + + /// Unary negation. + /// + /// Returns `None` in case of overflow ([`FODT0002`](https://www.w3.org/TR/xpath-functions-31/#ERRFODT0002)). + #[inline] + #[must_use] + pub fn checked_neg(self) -> Option<Self> { + Some(Self { + year_month: self.year_month.checked_neg()?, + day_time: self.day_time.checked_neg()?, + }) + } + + /// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity). + #[inline] + #[must_use] + pub fn is_identical_with(self, other: Self) -> bool { + self == other + } +} + +impl TryFrom<StdDuration> for Duration { + type Error = DurationOverflowError; + + #[inline] + fn try_from(value: StdDuration) -> Result<Self, Self::Error> { + Ok(DayTimeDuration::try_from(value)?.into()) + } +} + +impl FromStr for Duration { + type Err = ParseDurationError; + + fn from_str(input: &str) -> Result<Self, Self::Err> { + let parts = ensure_complete(input, duration_parts)?; + if parts.year_month.is_none() && parts.day_time.is_none() { + return Err(Self::Err::msg("Empty duration")); + } + Ok(Self::new( + parts.year_month.unwrap_or(0), + parts.day_time.unwrap_or_default(), + )?) + } +} + +impl fmt::Display for Duration { + #[allow(clippy::many_single_char_names)] + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let ym = self.year_month.months; + let ss = self.day_time.seconds; + + if (ym < 0 && ss > 0.into()) || (ym > 0 && ss < 0.into()) { + return Err(fmt::Error); // Not able to format with only a part of the duration that is negative + } + if ym < 0 || ss < 0.into() { + f.write_str("-")?; + } + f.write_str("P")?; + + if ym == 0 && ss == 0.into() { + return f.write_str("T0S"); + } + + { + let y = ym / 12; + let m = ym % 12; + + if y != 0 { + if m == 0 { + write!(f, "{}Y", y.abs())?; + } else { + write!(f, "{}Y{}M", y.abs(), m.abs())?; + } + } else if m != 0 || ss == 0.into() { + write!(f, "{}M", m.abs())?; + } + } + + { + let s_int = ss.as_i128(); + let d = s_int / 86400; + let h = (s_int % 86400) / 3600; + let m = (s_int % 3600) / 60; + let s = ss + .checked_sub( + Decimal::try_from(d * 86400 + h * 3600 + m * 60).map_err(|_| fmt::Error)?, + ) + .ok_or(fmt::Error)?; + + if d != 0 { + write!(f, "{}D", d.abs())?; + } + + if h != 0 || m != 0 || s != 0.into() { + f.write_str("T")?; + if h != 0 { + write!(f, "{}H", h.abs())?; + } + if m != 0 { + write!(f, "{}M", m.abs())?; + } + if s != 0.into() { + write!(f, "{}S", s.checked_abs().ok_or(fmt::Error)?)?; + } + } + } + Ok(()) + } +} + +impl PartialOrd for Duration { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + let first = DateTime::new(1969, 9, 1, 0, 0, 0.into(), None).ok()?; + let first_result = first + .checked_add_duration(*self)? + .partial_cmp(&first.checked_add_duration(*other)?); + let second = DateTime::new(1697, 2, 1, 0, 0, 0.into(), None).ok()?; + let second_result = second + .checked_add_duration(*self)? + .partial_cmp(&second.checked_add_duration(*other)?); + let third = DateTime::new(1903, 3, 1, 0, 0, 0.into(), None).ok()?; + let third_result = third + .checked_add_duration(*self)? + .partial_cmp(&third.checked_add_duration(*other)?); + let fourth = DateTime::new(1903, 7, 1, 0, 0, 0.into(), None).ok()?; + let fourth_result = fourth + .checked_add_duration(*self)? + .partial_cmp(&fourth.checked_add_duration(*other)?); + if first_result == second_result + && second_result == third_result + && third_result == fourth_result + { + first_result + } else { + None + } + } +} + +/// [XML Schema `yearMonthDuration` datatype](https://www.w3.org/TR/xmlschema11-2/#yearMonthDuration) +/// +/// It stores the duration as a number of months encoded using a [`i64`]. +#[derive( + Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash, Default, Serialize, Deserialize, +)] +pub struct YearMonthDuration { + months: i64, +} + +impl YearMonthDuration { + pub const MAX: Self = Self { months: i64::MAX }; + pub const MIN: Self = Self { months: i64::MIN }; + + #[inline] + pub fn new(months: impl Into<i64>) -> Self { + Self { + months: months.into(), + } + } + + #[inline] + pub fn from_be_bytes(bytes: [u8; 8]) -> Self { + Self { + months: i64::from_be_bytes(bytes), + } + } + + /// [fn:years-from-duration](https://www.w3.org/TR/xpath-functions-31/#func-years-from-duration) + #[inline] + pub fn years(self) -> i64 { + self.months / 12 + } + + /// [fn:months-from-duration](https://www.w3.org/TR/xpath-functions-31/#func-months-from-duration) + #[inline] + pub fn months(self) -> i64 { + self.months % 12 + } + + #[inline] + pub(crate) const fn all_months(self) -> i64 { + self.months + } + + #[inline] + pub fn to_be_bytes(self) -> [u8; 8] { + self.months.to_be_bytes() + } + + /// [op:add-yearMonthDurations](https://www.w3.org/TR/xpath-functions-31/#func-add-yearMonthDurations) + /// + /// Returns `None` in case of overflow ([`FODT0002`](https://www.w3.org/TR/xpath-functions-31/#ERRFODT0002)). + #[inline] + pub fn checked_add(self, rhs: impl Into<Self>) -> Option<Self> { + let rhs = rhs.into(); + Some(Self { + months: self.months.checked_add(rhs.months)?, + }) + } + + /// [op:subtract-yearMonthDurations](https://www.w3.org/TR/xpath-functions-31/#func-subtract-yearMonthDurations) + /// + /// Returns `None` in case of overflow ([`FODT0002`](https://www.w3.org/TR/xpath-functions-31/#ERRFODT0002)). + #[inline] + pub fn checked_sub(self, rhs: impl Into<Self>) -> Option<Self> { + let rhs = rhs.into(); + Some(Self { + months: self.months.checked_sub(rhs.months)?, + }) + } + + /// Unary negation. + /// + /// Returns `None` in case of overflow ([`FODT0002`](https://www.w3.org/TR/xpath-functions-31/#ERRFODT0002)). + #[inline] + pub fn checked_neg(self) -> Option<Self> { + Some(Self { + months: self.months.checked_neg()?, + }) + } + + /// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity). + #[inline] + pub fn is_identical_with(self, other: Self) -> bool { + self == other + } +} + +impl From<YearMonthDuration> for Duration { + #[inline] + fn from(value: YearMonthDuration) -> Self { + Self { + year_month: value, + day_time: DayTimeDuration::default(), + } + } +} + +impl TryFrom<Duration> for YearMonthDuration { + type Error = DurationOverflowError; + + #[inline] + fn try_from(value: Duration) -> Result<Self, Self::Error> { + if value.day_time == DayTimeDuration::default() { + Ok(value.year_month) + } else { + Err(DurationOverflowError) + } + } +} + +impl FromStr for YearMonthDuration { + type Err = ParseDurationError; + + fn from_str(input: &str) -> Result<Self, Self::Err> { + let parts = ensure_complete(input, duration_parts)?; + if parts.day_time.is_some() { + return Err(Self::Err::msg( + "There must not be any day or time component in a yearMonthDuration", + )); + } + Ok(Self::new( + parts + .year_month + .ok_or(Self::Err::msg("No year and month values found"))?, + )) + } +} + +impl fmt::Display for YearMonthDuration { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.months == 0 { + f.write_str("P0M") + } else { + Duration::from(*self).fmt(f) + } + } +} + +impl PartialEq<Duration> for YearMonthDuration { + #[inline] + fn eq(&self, other: &Duration) -> bool { + Duration::from(*self).eq(other) + } +} + +impl PartialEq<YearMonthDuration> for Duration { + #[inline] + fn eq(&self, other: &YearMonthDuration) -> bool { + self.eq(&Self::from(*other)) + } +} + +impl PartialOrd<Duration> for YearMonthDuration { + #[inline] + fn partial_cmp(&self, other: &Duration) -> Option<Ordering> { + Duration::from(*self).partial_cmp(other) + } +} + +impl PartialOrd<YearMonthDuration> for Duration { + #[inline] + fn partial_cmp(&self, other: &YearMonthDuration) -> Option<Ordering> { + self.partial_cmp(&Self::from(*other)) + } +} + +/// [XML Schema `dayTimeDuration` datatype](https://www.w3.org/TR/xmlschema11-2/#dayTimeDuration) +/// +/// It stores the duration as a number of seconds encoded using a [`Decimal`]. +#[derive( + Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash, Default, Serialize, Deserialize, +)] +pub struct DayTimeDuration { + seconds: Decimal, +} + +impl DayTimeDuration { + pub const MAX: Self = Self { + seconds: Decimal::MAX, + }; + pub const MIN: Self = Self { + seconds: Decimal::MIN, + }; + + #[inline] + pub fn new(seconds: impl Into<Decimal>) -> Self { + Self { + seconds: seconds.into(), + } + } + + #[inline] + pub fn from_be_bytes(bytes: [u8; 16]) -> Self { + Self { + seconds: Decimal::from_be_bytes(bytes), + } + } + + /// [fn:days-from-duration](https://www.w3.org/TR/xpath-functions-31/#func-days-from-duration) + #[allow(clippy::cast_possible_truncation)] + #[inline] + pub fn days(self) -> i64 { + (self.seconds.as_i128() / 86400) as i64 + } + + /// [fn:hours-from-duration](https://www.w3.org/TR/xpath-functions-31/#func-hours-from-duration) + #[allow(clippy::cast_possible_truncation)] + #[inline] + pub fn hours(self) -> i64 { + ((self.seconds.as_i128() % 86400) / 3600) as i64 + } + + /// [fn:minutes-from-duration](https://www.w3.org/TR/xpath-functions-31/#func-minutes-from-duration) + #[allow(clippy::cast_possible_truncation)] + #[inline] + pub fn minutes(self) -> i64 { + ((self.seconds.as_i128() % 3600) / 60) as i64 + } + + /// [fn:seconds-from-duration](https://www.w3.org/TR/xpath-functions-31/#func-seconds-from-duration) + #[inline] + pub fn seconds(self) -> Decimal { + self.seconds.checked_rem(60).unwrap() + } + + /// The duration in seconds. + #[inline] + pub const fn as_seconds(self) -> Decimal { + self.seconds + } + + #[inline] + pub fn to_be_bytes(self) -> [u8; 16] { + self.seconds.to_be_bytes() + } + + /// [op:add-dayTimeDurations](https://www.w3.org/TR/xpath-functions-31/#func-add-dayTimeDurations) + /// + /// Returns `None` in case of overflow ([`FODT0002`](https://www.w3.org/TR/xpath-functions-31/#ERRFODT0002)). + #[inline] + pub fn checked_add(self, rhs: impl Into<Self>) -> Option<Self> { + let rhs = rhs.into(); + Some(Self { + seconds: self.seconds.checked_add(rhs.seconds)?, + }) + } + + /// [op:subtract-dayTimeDurations](https://www.w3.org/TR/xpath-functions-31/#func-subtract-dayTimeDurations) + /// + /// Returns `None` in case of overflow ([`FODT0002`](https://www.w3.org/TR/xpath-functions-31/#ERRFODT0002)). + #[inline] + pub fn checked_sub(self, rhs: impl Into<Self>) -> Option<Self> { + let rhs = rhs.into(); + Some(Self { + seconds: self.seconds.checked_sub(rhs.seconds)?, + }) + } + + /// Unary negation. + /// + /// Returns `None` in case of overflow ([`FODT0002`](https://www.w3.org/TR/xpath-functions-31/#ERRFODT0002)). + #[inline] + pub fn checked_neg(self) -> Option<Self> { + Some(Self { + seconds: self.seconds.checked_neg()?, + }) + } + + /// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity). + #[inline] + pub fn is_identical_with(self, other: Self) -> bool { + self == other + } +} + +impl From<DayTimeDuration> for Duration { + #[inline] + fn from(value: DayTimeDuration) -> Self { + Self { + year_month: YearMonthDuration::default(), + day_time: value, + } + } +} + +impl TryFrom<Duration> for DayTimeDuration { + type Error = DurationOverflowError; + + #[inline] + fn try_from(value: Duration) -> Result<Self, Self::Error> { + if value.year_month == YearMonthDuration::default() { + Ok(value.day_time) + } else { + Err(DurationOverflowError) + } + } +} + +impl TryFrom<StdDuration> for DayTimeDuration { + type Error = DurationOverflowError; + + #[inline] + fn try_from(value: StdDuration) -> Result<Self, Self::Error> { + Ok(Self { + seconds: Decimal::new( + i128::try_from(value.as_nanos()).map_err(|_| DurationOverflowError)?, + 9, + ) + .map_err(|_| DurationOverflowError)?, + }) + } +} + +impl TryFrom<DayTimeDuration> for StdDuration { + type Error = DurationOverflowError; + + #[inline] + fn try_from(value: DayTimeDuration) -> Result<Self, Self::Error> { + if value.seconds.is_negative() { + return Err(DurationOverflowError); + } + let secs = value.seconds.checked_floor().ok_or(DurationOverflowError)?; + let nanos = value + .seconds + .checked_sub(secs) + .ok_or(DurationOverflowError)? + .checked_mul(1_000_000_000) + .ok_or(DurationOverflowError)? + .checked_floor() + .ok_or(DurationOverflowError)?; + Ok(Self::new( + secs.as_i128() + .try_into() + .map_err(|_| DurationOverflowError)?, + nanos + .as_i128() + .try_into() + .map_err(|_| DurationOverflowError)?, + )) + } +} + +impl FromStr for DayTimeDuration { + type Err = ParseDurationError; + + fn from_str(input: &str) -> Result<Self, Self::Err> { + let parts = ensure_complete(input, duration_parts)?; + if parts.year_month.is_some() { + return Err(Self::Err::msg( + "There must not be any year or month component in a dayTimeDuration", + )); + } + Ok(Self::new( + parts + .day_time + .ok_or(Self::Err::msg("No day or time values found"))?, + )) + } +} + +impl fmt::Display for DayTimeDuration { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + Duration::from(*self).fmt(f) + } +} + +impl PartialEq<Duration> for DayTimeDuration { + #[inline] + fn eq(&self, other: &Duration) -> bool { + Duration::from(*self).eq(other) + } +} + +impl PartialEq<DayTimeDuration> for Duration { + #[inline] + fn eq(&self, other: &DayTimeDuration) -> bool { + self.eq(&Self::from(*other)) + } +} + +impl PartialEq<YearMonthDuration> for DayTimeDuration { + #[inline] + fn eq(&self, other: &YearMonthDuration) -> bool { + Duration::from(*self).eq(&Duration::from(*other)) + } +} + +impl PartialEq<DayTimeDuration> for YearMonthDuration { + #[inline] + fn eq(&self, other: &DayTimeDuration) -> bool { + Duration::from(*self).eq(&Duration::from(*other)) + } +} + +impl PartialOrd<Duration> for DayTimeDuration { + #[inline] + fn partial_cmp(&self, other: &Duration) -> Option<Ordering> { + Duration::from(*self).partial_cmp(other) + } +} + +impl PartialOrd<DayTimeDuration> for Duration { + #[inline] + fn partial_cmp(&self, other: &DayTimeDuration) -> Option<Ordering> { + self.partial_cmp(&Self::from(*other)) + } +} + +impl PartialOrd<YearMonthDuration> for DayTimeDuration { + #[inline] + fn partial_cmp(&self, other: &YearMonthDuration) -> Option<Ordering> { + Duration::from(*self).partial_cmp(&Duration::from(*other)) + } +} + +impl PartialOrd<DayTimeDuration> for YearMonthDuration { + #[inline] + fn partial_cmp(&self, other: &DayTimeDuration) -> Option<Ordering> { + Duration::from(*self).partial_cmp(&Duration::from(*other)) + } +} + +// [6] duYearFrag ::= unsignedNoDecimalPtNumeral 'Y' +// [7] duMonthFrag ::= unsignedNoDecimalPtNumeral 'M' +// [8] duDayFrag ::= unsignedNoDecimalPtNumeral 'D' +// [9] duHourFrag ::= unsignedNoDecimalPtNumeral 'H' +// [10] duMinuteFrag ::= unsignedNoDecimalPtNumeral 'M' +// [11] duSecondFrag ::= (unsignedNoDecimalPtNumeral | unsignedDecimalPtNumeral) 'S' +// [12] duYearMonthFrag ::= (duYearFrag duMonthFrag?) | duMonthFrag +// [13] duTimeFrag ::= 'T' ((duHourFrag duMinuteFrag? duSecondFrag?) | (duMinuteFrag duSecondFrag?) | duSecondFrag) +// [14] duDayTimeFrag ::= (duDayFrag duTimeFrag?) | duTimeFrag +// [15] durationLexicalRep ::= '-'? 'P' ((duYearMonthFrag duDayTimeFrag?) | duDayTimeFrag) +struct DurationParts { + year_month: Option<i64>, + day_time: Option<Decimal>, +} + +fn duration_parts(input: &str) -> Result<(DurationParts, &str), ParseDurationError> { + // States + const START: u32 = 0; + const AFTER_YEAR: u32 = 1; + const AFTER_MONTH: u32 = 2; + const AFTER_DAY: u32 = 3; + const AFTER_T: u32 = 4; + const AFTER_HOUR: u32 = 5; + const AFTER_MINUTE: u32 = 6; + const AFTER_SECOND: u32 = 7; + + let (is_negative, input) = if let Some(left) = input.strip_prefix('-') { + (true, left) + } else { + (false, input) + }; + let mut input = expect_char(input, 'P', "Durations must start with 'P'")?; + let mut state = START; + let mut year_month: Option<i64> = None; + let mut day_time: Option<Decimal> = None; + while !input.is_empty() { + if let Some(left) = input.strip_prefix('T') { + if state >= AFTER_T { + return Err(ParseDurationError::msg("Duplicated time separator 'T'")); + } + state = AFTER_T; + input = left; + } else { + let (number_str, left) = decimal_prefix(input); + match left.chars().next() { + Some('Y') if state < AFTER_YEAR => { + year_month = Some( + year_month + .unwrap_or_default() + .checked_add( + apply_i64_neg( + i64::from_str(number_str).map_err(|_| OVERFLOW_ERROR)?, + is_negative, + )? + .checked_mul(12) + .ok_or(OVERFLOW_ERROR)?, + ) + .ok_or(OVERFLOW_ERROR)?, + ); + state = AFTER_YEAR; + } + Some('M') if state < AFTER_MONTH => { + year_month = Some( + year_month + .unwrap_or_default() + .checked_add(apply_i64_neg( + i64::from_str(number_str).map_err(|_| OVERFLOW_ERROR)?, + is_negative, + )?) + .ok_or(OVERFLOW_ERROR)?, + ); + state = AFTER_MONTH; + } + Some('D') if state < AFTER_DAY => { + if number_str.contains('.') { + return Err(ParseDurationError::msg( + "Decimal numbers are not allowed for days", + )); + } + day_time = Some( + day_time + .unwrap_or_default() + .checked_add( + apply_decimal_neg( + Decimal::from_str(number_str).map_err(|_| OVERFLOW_ERROR)?, + is_negative, + )? + .checked_mul(86400) + .ok_or(OVERFLOW_ERROR)?, + ) + .ok_or(OVERFLOW_ERROR)?, + ); + state = AFTER_DAY; + } + Some('H') if state == AFTER_T => { + if number_str.contains('.') { + return Err(ParseDurationError::msg( + "Decimal numbers are not allowed for hours", + )); + } + day_time = Some( + day_time + .unwrap_or_default() + .checked_add( + apply_decimal_neg( + Decimal::from_str(number_str).map_err(|_| OVERFLOW_ERROR)?, + is_negative, + )? + .checked_mul(3600) + .ok_or(OVERFLOW_ERROR)?, + ) + .ok_or(OVERFLOW_ERROR)?, + ); + state = AFTER_HOUR; + } + Some('M') if (AFTER_T..AFTER_MINUTE).contains(&state) => { + if number_str.contains('.') { + return Err(ParseDurationError::msg( + "Decimal numbers are not allowed for minutes", + )); + } + day_time = Some( + day_time + .unwrap_or_default() + .checked_add( + apply_decimal_neg( + Decimal::from_str(number_str).map_err(|_| OVERFLOW_ERROR)?, + is_negative, + )? + .checked_mul(60) + .ok_or(OVERFLOW_ERROR)?, + ) + .ok_or(OVERFLOW_ERROR)?, + ); + state = AFTER_MINUTE; + } + Some('S') if (AFTER_T..AFTER_SECOND).contains(&state) => { + day_time = Some( + day_time + .unwrap_or_default() + .checked_add(apply_decimal_neg( + Decimal::from_str(number_str).map_err(|_| OVERFLOW_ERROR)?, + is_negative, + )?) + .ok_or(OVERFLOW_ERROR)?, + ); + state = AFTER_SECOND; + } + Some(_) => return Err(ParseDurationError::msg("Unexpected type character")), + None => { + return Err(ParseDurationError::msg( + "Numbers in durations must be followed by a type character", + )) + } + } + input = &left[1..]; + } + } + + Ok(( + DurationParts { + year_month, + day_time, + }, + input, + )) +} + +fn apply_i64_neg(value: i64, is_negative: bool) -> Result<i64, ParseDurationError> { + if is_negative { + value.checked_neg().ok_or(OVERFLOW_ERROR) + } else { + Ok(value) + } +} + +fn apply_decimal_neg(value: Decimal, is_negative: bool) -> Result<Decimal, ParseDurationError> { + if is_negative { + value.checked_neg().ok_or(OVERFLOW_ERROR) + } else { + Ok(value) + } +} + +fn ensure_complete<T>( + input: &str, + parse: impl FnOnce(&str) -> Result<(T, &str), ParseDurationError>, +) -> Result<T, ParseDurationError> { + let (result, left) = parse(input)?; + if !left.is_empty() { + return Err(ParseDurationError::msg("Unrecognized value suffix")); + } + Ok(result) +} + +fn expect_char<'a>( + input: &'a str, + constant: char, + error_message: &'static str, +) -> Result<&'a str, ParseDurationError> { + if let Some(left) = input.strip_prefix(constant) { + Ok(left) + } else { + Err(ParseDurationError::msg(error_message)) + } +} + +fn decimal_prefix(input: &str) -> (&str, &str) { + let mut end = input.len(); + let mut dot_seen = false; + for (i, c) in input.char_indices() { + if c.is_ascii_digit() { + // Ok + } else if c == '.' && !dot_seen { + dot_seen = true; + } else { + end = i; + break; + } + } + input.split_at(end) +} + +/// A parsing error +#[derive(Debug, Clone, thiserror::Error)] +#[error("{msg}")] +pub struct ParseDurationError { + msg: &'static str, +} + +const OVERFLOW_ERROR: ParseDurationError = ParseDurationError { + msg: "Overflow error", +}; + +impl ParseDurationError { + const fn msg(msg: &'static str) -> Self { + Self { msg } + } +} + +/// An overflow during [`Duration`]-related operations. +/// +/// Matches XPath [`FODT0002` error](https://www.w3.org/TR/xpath-functions-31/#ERRFODT0002). +#[derive(Debug, Clone, Copy, thiserror::Error)] +#[error("overflow during xsd:duration computation")] +pub struct DurationOverflowError; + +/// The year-month and the day-time components of a [`Duration`] have an opposite sign. +#[derive(Debug, Clone, Copy, thiserror::Error)] +#[error("The xsd:yearMonthDuration and xsd:dayTimeDuration components of a xsd:duration can't have opposite sign")] +pub struct OppositeSignInDurationComponentsError; + +impl From<OppositeSignInDurationComponentsError> for ParseDurationError { + #[inline] + fn from(_: OppositeSignInDurationComponentsError) -> Self { + Self { + msg: "The xsd:yearMonthDuration and xsd:dayTimeDuration components of a xsd:duration can't have opposite sign" + } + } +} + +#[cfg(test)] +#[allow(clippy::panic_in_result_fn)] +mod tests { + use super::*; + use std::error::Error; + + #[test] + fn from_str() -> Result<(), ParseDurationError> { + let min = Duration::new(i64::MIN, Decimal::MIN)?; + let max = Duration::new(i64::MAX, Decimal::MAX)?; + + assert_eq!(YearMonthDuration::from_str("P1Y")?.to_string(), "P1Y"); + assert_eq!(Duration::from_str("P1Y")?.to_string(), "P1Y"); + assert_eq!(YearMonthDuration::from_str("P1M")?.to_string(), "P1M"); + assert_eq!(Duration::from_str("P1M")?.to_string(), "P1M"); + assert_eq!(DayTimeDuration::from_str("P1D")?.to_string(), "P1D"); + assert_eq!(Duration::from_str("P1D")?.to_string(), "P1D"); + assert_eq!(DayTimeDuration::from_str("PT1H")?.to_string(), "PT1H"); + assert_eq!(Duration::from_str("PT1H")?.to_string(), "PT1H"); + assert_eq!(DayTimeDuration::from_str("PT1M")?.to_string(), "PT1M"); + assert_eq!(Duration::from_str("PT1M")?.to_string(), "PT1M"); + assert_eq!(DayTimeDuration::from_str("PT1.1S")?.to_string(), "PT1.1S"); + assert_eq!(Duration::from_str("PT1.1S")?.to_string(), "PT1.1S"); + assert_eq!(YearMonthDuration::from_str("-P1Y")?.to_string(), "-P1Y"); + assert_eq!(Duration::from_str("-P1Y")?.to_string(), "-P1Y"); + assert_eq!(YearMonthDuration::from_str("-P1M")?.to_string(), "-P1M"); + assert_eq!(Duration::from_str("-P1M")?.to_string(), "-P1M"); + assert_eq!(DayTimeDuration::from_str("-P1D")?.to_string(), "-P1D"); + assert_eq!(Duration::from_str("-P1D")?.to_string(), "-P1D"); + assert_eq!(DayTimeDuration::from_str("-PT1H")?.to_string(), "-PT1H"); + assert_eq!(Duration::from_str("-PT1H")?.to_string(), "-PT1H"); + assert_eq!(DayTimeDuration::from_str("-PT1M")?.to_string(), "-PT1M"); + assert_eq!(Duration::from_str("-PT1M")?.to_string(), "-PT1M"); + assert_eq!(DayTimeDuration::from_str("-PT1S")?.to_string(), "-PT1S"); + assert_eq!(Duration::from_str("-PT1S")?.to_string(), "-PT1S"); + assert_eq!(DayTimeDuration::from_str("-PT1.1S")?.to_string(), "-PT1.1S"); + assert_eq!(Duration::from_str("-PT1.1S")?.to_string(), "-PT1.1S"); + assert_eq!(Duration::from_str(&max.to_string())?, max); + assert_eq!(Duration::from_str(&min.to_string())?, min); + assert_eq!(Duration::from_str("PT0H")?.to_string(), "PT0S"); + assert_eq!(Duration::from_str("-PT0H")?.to_string(), "PT0S"); + assert_eq!(YearMonthDuration::from_str("P0Y")?.to_string(), "P0M"); + assert_eq!(DayTimeDuration::from_str("PT0H")?.to_string(), "PT0S"); + Ok(()) + } + + #[test] + fn from_std() -> Result<(), DurationOverflowError> { + assert_eq!( + Duration::try_from(StdDuration::new(10, 10))?.to_string(), + "PT10.00000001S" + ); + Ok(()) + } + + #[test] + fn to_std() -> Result<(), Box<dyn Error>> { + let duration = StdDuration::try_from(DayTimeDuration::from_str("PT10.00000001S")?)?; + assert_eq!(duration.as_secs(), 10); + assert_eq!(duration.subsec_nanos(), 10); + Ok(()) + } + + #[test] + fn to_be_bytes() { + assert_eq!( + Duration::from_be_bytes(Duration::MIN.to_be_bytes()), + Duration::MIN + ); + assert_eq!( + Duration::from_be_bytes(Duration::MAX.to_be_bytes()), + Duration::MAX + ); + assert_eq!( + YearMonthDuration::from_be_bytes(YearMonthDuration::MIN.to_be_bytes()), + YearMonthDuration::MIN + ); + assert_eq!( + YearMonthDuration::from_be_bytes(YearMonthDuration::MAX.to_be_bytes()), + YearMonthDuration::MAX + ); + assert_eq!( + DayTimeDuration::from_be_bytes(DayTimeDuration::MIN.to_be_bytes()), + DayTimeDuration::MIN + ); + assert_eq!( + DayTimeDuration::from_be_bytes(DayTimeDuration::MAX.to_be_bytes()), + DayTimeDuration::MAX + ); + } + + #[test] + fn equals() -> Result<(), ParseDurationError> { + assert_eq!( + YearMonthDuration::from_str("P1Y")?, + YearMonthDuration::from_str("P12M")? + ); + assert_eq!( + YearMonthDuration::from_str("P1Y")?, + Duration::from_str("P12M")? + ); + assert_eq!( + Duration::from_str("P1Y")?, + YearMonthDuration::from_str("P12M")? + ); + assert_eq!(Duration::from_str("P1Y")?, Duration::from_str("P12M")?); + assert_eq!( + DayTimeDuration::from_str("PT24H")?, + DayTimeDuration::from_str("P1D")? + ); + assert_eq!( + DayTimeDuration::from_str("PT24H")?, + Duration::from_str("P1D")? + ); + assert_eq!( + Duration::from_str("PT24H")?, + DayTimeDuration::from_str("P1D")? + ); + assert_eq!(Duration::from_str("PT24H")?, Duration::from_str("P1D")?); + assert_ne!(Duration::from_str("P1Y")?, Duration::from_str("P365D")?); + assert_eq!(Duration::from_str("P0Y")?, Duration::from_str("P0D")?); + assert_ne!(Duration::from_str("P1Y")?, Duration::from_str("P365D")?); + assert_eq!(Duration::from_str("P2Y")?, Duration::from_str("P24M")?); + assert_eq!(Duration::from_str("P10D")?, Duration::from_str("PT240H")?); + assert_eq!( + Duration::from_str("P2Y0M0DT0H0M0S")?, + Duration::from_str("P24M")? + ); + assert_eq!( + Duration::from_str("P0Y0M10D")?, + Duration::from_str("PT240H")? + ); + assert_ne!(Duration::from_str("P1M")?, Duration::from_str("P30D")?); + Ok(()) + } + + #[test] + #[allow(clippy::neg_cmp_op_on_partial_ord)] + fn cmp() -> Result<(), ParseDurationError> { + assert!(Duration::from_str("P1Y1D")? < Duration::from_str("P13MT25H")?); + assert!(YearMonthDuration::from_str("P1Y")? < YearMonthDuration::from_str("P13M")?); + assert!(Duration::from_str("P1Y")? < YearMonthDuration::from_str("P13M")?); + assert!(YearMonthDuration::from_str("P1Y")? < Duration::from_str("P13M")?); + assert!(DayTimeDuration::from_str("P1D")? < DayTimeDuration::from_str("PT25H")?); + assert!(DayTimeDuration::from_str("PT1H")? < DayTimeDuration::from_str("PT61M")?); + assert!(DayTimeDuration::from_str("PT1M")? < DayTimeDuration::from_str("PT61S")?); + assert!(Duration::from_str("PT1H")? < DayTimeDuration::from_str("PT61M")?); + assert!(DayTimeDuration::from_str("PT1H")? < Duration::from_str("PT61M")?); + assert!(YearMonthDuration::from_str("P1M")? < DayTimeDuration::from_str("P40D")?); + assert!(DayTimeDuration::from_str("P25D")? < YearMonthDuration::from_str("P1M")?); + Ok(()) + } + + #[test] + fn years() -> Result<(), ParseDurationError> { + assert_eq!(Duration::from_str("P20Y15M")?.years(), 21); + assert_eq!(Duration::from_str("-P15M")?.years(), -1); + assert_eq!(Duration::from_str("-P2DT15H")?.years(), 0); + Ok(()) + } + + #[test] + fn months() -> Result<(), ParseDurationError> { + assert_eq!(Duration::from_str("P20Y15M")?.months(), 3); + assert_eq!(Duration::from_str("-P20Y18M")?.months(), -6); + assert_eq!(Duration::from_str("-P2DT15H0M0S")?.months(), 0); + Ok(()) + } + + #[test] + fn days() -> Result<(), ParseDurationError> { + assert_eq!(Duration::from_str("P3DT10H")?.days(), 3); + assert_eq!(Duration::from_str("P3DT55H")?.days(), 5); + assert_eq!(Duration::from_str("P3Y5M")?.days(), 0); + Ok(()) + } + + #[test] + fn hours() -> Result<(), ParseDurationError> { + assert_eq!(Duration::from_str("P3DT10H")?.hours(), 10); + assert_eq!(Duration::from_str("P3DT12H32M12S")?.hours(), 12); + assert_eq!(Duration::from_str("PT123H")?.hours(), 3); + assert_eq!(Duration::from_str("-P3DT10H")?.hours(), -10); + Ok(()) + } + + #[test] + fn minutes() -> Result<(), ParseDurationError> { + assert_eq!(Duration::from_str("P3DT10H")?.minutes(), 0); + assert_eq!(Duration::from_str("-P5DT12H30M")?.minutes(), -30); + Ok(()) + } + + #[test] + fn seconds() -> Result<(), Box<dyn Error>> { + assert_eq!( + Duration::from_str("P3DT10H12.5S")?.seconds(), + Decimal::from_str("12.5")? + ); + assert_eq!( + Duration::from_str("-PT256S")?.seconds(), + Decimal::from_str("-16.0")? + ); + Ok(()) + } + + #[test] + fn add() -> Result<(), ParseDurationError> { + assert_eq!( + Duration::from_str("P2Y11M")?.checked_add(Duration::from_str("P3Y3M")?), + Some(Duration::from_str("P6Y2M")?) + ); + assert_eq!( + Duration::from_str("P2DT12H5M")?.checked_add(Duration::from_str("P5DT12H")?), + Some(Duration::from_str("P8DT5M")?) + ); + assert_eq!( + Duration::from_str("P1M2D")?.checked_add(Duration::from_str("-P3D")?), + None + ); + assert_eq!( + Duration::from_str("P1M2D")?.checked_add(Duration::from_str("-P2M")?), + None + ); + Ok(()) + } + + #[test] + fn sub() -> Result<(), ParseDurationError> { + assert_eq!( + Duration::from_str("P2Y11M")?.checked_sub(Duration::from_str("P3Y3M")?), + Some(Duration::from_str("-P4M")?) + ); + assert_eq!( + Duration::from_str("P2DT12H")?.checked_sub(Duration::from_str("P1DT10H30M")?), + Some(Duration::from_str("P1DT1H30M")?) + ); + assert_eq!( + Duration::from_str("P1M2D")?.checked_sub(Duration::from_str("P3D")?), + None + ); + assert_eq!( + Duration::from_str("P1M2D")?.checked_sub(Duration::from_str("P2M")?), + None + ); + Ok(()) + } + + #[test] + fn minimally_conformant() -> Result<(), ParseDurationError> { + // All minimally conforming processors must support fractional-second duration values + // to milliseconds (i.e. those expressible with three fraction digits). + assert_eq!(Duration::from_str("PT0.001S")?.to_string(), "PT0.001S"); + assert_eq!(Duration::from_str("-PT0.001S")?.to_string(), "-PT0.001S"); + + // All minimally conforming processors must support duration values with months values + // in the range −119999 to 119999 months (9999 years and 11 months) + // and seconds values in the range −31622400 to 31622400 seconds (one leap-year). + assert_eq!( + Duration::from_str("P119999MT31622400S")?.to_string(), + "P9999Y11M366D" + ); + assert_eq!( + Duration::from_str("-P119999MT31622400S")?.to_string(), + "-P9999Y11M366D" + ); + Ok(()) + } +} diff --git a/ng-oxigraph/src/oxsdatatypes/float.rs b/ng-oxigraph/src/oxsdatatypes/float.rs new file mode 100644 index 0000000..45c3f64 --- /dev/null +++ b/ng-oxigraph/src/oxsdatatypes/float.rs @@ -0,0 +1,310 @@ +use crate::oxsdatatypes::{Boolean, Double, Integer}; +use serde::{Deserialize, Serialize}; +use std::cmp::Ordering; +use std::fmt; +use std::num::ParseFloatError; +use std::ops::{Add, Div, Mul, Neg, Sub}; +use std::str::FromStr; + +/// [XML Schema `float` datatype](https://www.w3.org/TR/xmlschema11-2/#float) +/// +/// Uses internally a [`f32`]. +/// +/// <div class="warning">Serialization does not follow the canonical mapping.</div> +#[derive(Debug, Clone, Copy, Default, PartialEq, Serialize, Deserialize)] +#[repr(transparent)] +pub struct Float { + value: f32, +} + +impl Float { + pub const INFINITY: Self = Self { + value: f32::INFINITY, + }; + pub const MAX: Self = Self { value: f32::MAX }; + pub const MIN: Self = Self { value: f32::MIN }; + pub const NAN: Self = Self { value: f32::NAN }; + pub const NEG_INFINITY: Self = Self { + value: f32::NEG_INFINITY, + }; + + #[inline] + #[must_use] + pub fn from_be_bytes(bytes: [u8; 4]) -> Self { + Self { + value: f32::from_be_bytes(bytes), + } + } + + #[inline] + #[must_use] + pub fn to_be_bytes(self) -> [u8; 4] { + self.value.to_be_bytes() + } + + /// [fn:abs](https://www.w3.org/TR/xpath-functions-31/#func-abs) + #[inline] + #[must_use] + pub fn abs(self) -> Self { + self.value.abs().into() + } + + /// [fn:ceiling](https://www.w3.org/TR/xpath-functions-31/#func-ceiling) + #[inline] + #[must_use] + pub fn ceil(self) -> Self { + self.value.ceil().into() + } + + /// [fn:floor](https://www.w3.org/TR/xpath-functions-31/#func-floor) + #[inline] + #[must_use] + pub fn floor(self) -> Self { + self.value.floor().into() + } + + /// [fn:round](https://www.w3.org/TR/xpath-functions-31/#func-round) + #[inline] + #[must_use] + pub fn round(self) -> Self { + self.value.round().into() + } + + #[inline] + #[must_use] + pub fn is_nan(self) -> bool { + self.value.is_nan() + } + + #[inline] + #[must_use] + pub fn is_finite(self) -> bool { + self.value.is_finite() + } + + /// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity). + #[inline] + #[must_use] + pub fn is_identical_with(self, other: Self) -> bool { + self.value.to_bits() == other.value.to_bits() + } +} + +impl From<Float> for f32 { + #[inline] + fn from(value: Float) -> Self { + value.value + } +} + +impl From<Float> for f64 { + #[inline] + fn from(value: Float) -> Self { + value.value.into() + } +} + +impl From<f32> for Float { + #[inline] + fn from(value: f32) -> Self { + Self { value } + } +} + +impl From<i8> for Float { + #[inline] + fn from(value: i8) -> Self { + Self { + value: value.into(), + } + } +} + +impl From<i16> for Float { + #[inline] + fn from(value: i16) -> Self { + Self { + value: value.into(), + } + } +} + +impl From<u8> for Float { + #[inline] + fn from(value: u8) -> Self { + Self { + value: value.into(), + } + } +} + +impl From<u16> for Float { + #[inline] + fn from(value: u16) -> Self { + Self { + value: value.into(), + } + } +} + +impl From<Boolean> for Float { + #[inline] + fn from(value: Boolean) -> Self { + f32::from(bool::from(value)).into() + } +} + +impl From<Integer> for Float { + #[inline] + #[allow(clippy::cast_precision_loss)] + fn from(value: Integer) -> Self { + (i64::from(value) as f32).into() + } +} + +impl From<Double> for Float { + #[inline] + #[allow(clippy::cast_possible_truncation)] + fn from(value: Double) -> Self { + Self { + value: f64::from(value) as f32, + } + } +} + +impl FromStr for Float { + type Err = ParseFloatError; + + #[inline] + fn from_str(input: &str) -> Result<Self, Self::Err> { + Ok(f32::from_str(input)?.into()) + } +} + +impl fmt::Display for Float { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.value == f32::INFINITY { + f.write_str("INF") + } else if self.value == f32::NEG_INFINITY { + f.write_str("-INF") + } else { + self.value.fmt(f) + } + } +} + +impl PartialOrd for Float { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + self.value.partial_cmp(&other.value) + } +} + +impl Neg for Float { + type Output = Self; + + #[inline] + fn neg(self) -> Self { + (-self.value).into() + } +} + +impl Add for Float { + type Output = Self; + + #[inline] + fn add(self, rhs: Self) -> Self { + (self.value + rhs.value).into() + } +} + +impl Sub for Float { + type Output = Self; + + #[inline] + fn sub(self, rhs: Self) -> Self { + (self.value - rhs.value).into() + } +} + +impl Mul for Float { + type Output = Self; + + #[inline] + fn mul(self, rhs: Self) -> Self { + (self.value * rhs.value).into() + } +} + +impl Div for Float { + type Output = Self; + + #[inline] + fn div(self, rhs: Self) -> Self { + (self.value / rhs.value).into() + } +} + +#[cfg(test)] +#[allow(clippy::panic_in_result_fn)] +mod tests { + use super::*; + + #[test] + fn eq() { + assert_eq!(Float::from(0.), Float::from(0.)); + assert_ne!(Float::NAN, Float::NAN); + assert_eq!(Float::from(-0.), Float::from(0.)); + } + + #[test] + fn cmp() { + assert_eq!( + Float::from(0.).partial_cmp(&Float::from(0.)), + Some(Ordering::Equal) + ); + assert_eq!( + Float::INFINITY.partial_cmp(&Float::MAX), + Some(Ordering::Greater) + ); + assert_eq!( + Float::NEG_INFINITY.partial_cmp(&Float::MIN), + Some(Ordering::Less) + ); + assert_eq!(Float::NAN.partial_cmp(&Float::from(0.)), None); + assert_eq!(Float::NAN.partial_cmp(&Float::NAN), None); + assert_eq!( + Float::from(0.).partial_cmp(&Float::from(-0.)), + Some(Ordering::Equal) + ); + } + + #[test] + fn is_identical_with() { + assert!(Float::from(0.).is_identical_with(Float::from(0.))); + assert!(Float::NAN.is_identical_with(Float::NAN)); + assert!(!Float::from(-0.).is_identical_with(Float::from(0.))); + } + + #[test] + fn from_str() -> Result<(), ParseFloatError> { + assert_eq!(Float::from_str("NaN")?.to_string(), "NaN"); + assert_eq!(Float::from_str("INF")?.to_string(), "INF"); + assert_eq!(Float::from_str("+INF")?.to_string(), "INF"); + assert_eq!(Float::from_str("-INF")?.to_string(), "-INF"); + assert_eq!(Float::from_str("0.0E0")?.to_string(), "0"); + assert_eq!(Float::from_str("-0.0E0")?.to_string(), "-0"); + assert_eq!(Float::from_str("0.1e1")?.to_string(), "1"); + assert_eq!(Float::from_str("-0.1e1")?.to_string(), "-1"); + assert_eq!(Float::from_str("1.e1")?.to_string(), "10"); + assert_eq!(Float::from_str("-1.e1")?.to_string(), "-10"); + assert_eq!(Float::from_str("1")?.to_string(), "1"); + assert_eq!(Float::from_str("-1")?.to_string(), "-1"); + assert_eq!(Float::from_str("1.")?.to_string(), "1"); + assert_eq!(Float::from_str("-1.")?.to_string(), "-1"); + assert_eq!(Float::from_str(&f32::MIN.to_string())?, Float::MIN); + assert_eq!(Float::from_str(&f32::MAX.to_string())?, Float::MAX); + Ok(()) + } +} diff --git a/ng-oxigraph/src/oxsdatatypes/integer.rs b/ng-oxigraph/src/oxsdatatypes/integer.rs new file mode 100644 index 0000000..c23f9ed --- /dev/null +++ b/ng-oxigraph/src/oxsdatatypes/integer.rs @@ -0,0 +1,400 @@ +use crate::oxsdatatypes::{Boolean, Decimal, Double, Float}; +use serde::{Deserialize, Serialize}; +use std::fmt; +use std::num::ParseIntError; +use std::str::FromStr; + +/// [XML Schema `integer` datatype](https://www.w3.org/TR/xmlschema11-2/#integer) +/// +/// Uses internally a [`i64`]. +#[derive( + Debug, Clone, Copy, Default, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize, +)] +#[repr(transparent)] +pub struct Integer { + value: i64, +} + +impl Integer { + pub const MAX: Self = Self { value: i64::MAX }; + pub const MIN: Self = Self { value: i64::MIN }; + + #[inline] + #[must_use] + pub fn from_be_bytes(bytes: [u8; 8]) -> Self { + Self { + value: i64::from_be_bytes(bytes), + } + } + + #[inline] + #[must_use] + pub fn to_be_bytes(self) -> [u8; 8] { + self.value.to_be_bytes() + } + + /// [op:numeric-add](https://www.w3.org/TR/xpath-functions-31/#func-numeric-add) + /// + /// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)). + #[inline] + #[must_use] + pub fn checked_add(self, rhs: impl Into<Self>) -> Option<Self> { + Some(Self { + value: self.value.checked_add(rhs.into().value)?, + }) + } + + /// [op:numeric-subtract](https://www.w3.org/TR/xpath-functions-31/#func-numeric-subtract) + /// + /// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)). + #[inline] + #[must_use] + pub fn checked_sub(self, rhs: impl Into<Self>) -> Option<Self> { + Some(Self { + value: self.value.checked_sub(rhs.into().value)?, + }) + } + + /// [op:numeric-multiply](https://www.w3.org/TR/xpath-functions-31/#func-numeric-multiply) + /// + /// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)). + #[inline] + #[must_use] + pub fn checked_mul(self, rhs: impl Into<Self>) -> Option<Self> { + Some(Self { + value: self.value.checked_mul(rhs.into().value)?, + }) + } + + /// [op:numeric-integer-divide](https://www.w3.org/TR/xpath-functions-31/#func-numeric-integer-divide) + /// + /// Returns `None` in case of division by 0 ([FOAR0001](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0001)) or overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)). + #[inline] + #[must_use] + pub fn checked_div(self, rhs: impl Into<Self>) -> Option<Self> { + Some(Self { + value: self.value.checked_div(rhs.into().value)?, + }) + } + + /// [op:numeric-mod](https://www.w3.org/TR/xpath-functions-31/#func-numeric-mod) + /// + /// Returns `None` in case of division by 0 ([FOAR0001](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0001)) or overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)). + #[inline] + #[must_use] + pub fn checked_rem(self, rhs: impl Into<Self>) -> Option<Self> { + Some(Self { + value: self.value.checked_rem(rhs.into().value)?, + }) + } + + /// Euclidean remainder + /// + /// Returns `None` in case of division by 0 ([FOAR0001](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0001)) or overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)). + #[inline] + #[must_use] + pub fn checked_rem_euclid(self, rhs: impl Into<Self>) -> Option<Self> { + Some(Self { + value: self.value.checked_rem_euclid(rhs.into().value)?, + }) + } + + /// [op:numeric-unary-minus](https://www.w3.org/TR/xpath-functions-31/#func-numeric-unary-minus) + /// + /// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)). + #[inline] + #[must_use] + pub fn checked_neg(self) -> Option<Self> { + Some(Self { + value: self.value.checked_neg()?, + }) + } + + /// [fn:abs](https://www.w3.org/TR/xpath-functions-31/#func-abs) + /// + /// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)). + #[inline] + #[must_use] + pub fn checked_abs(self) -> Option<Self> { + Some(Self { + value: self.value.checked_abs()?, + }) + } + + #[inline] + #[must_use] + pub const fn is_negative(self) -> bool { + self.value < 0 + } + + #[inline] + #[must_use] + pub const fn is_positive(self) -> bool { + self.value > 0 + } + + /// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity). + #[inline] + #[must_use] + pub fn is_identical_with(self, other: Self) -> bool { + self == other + } +} + +impl From<bool> for Integer { + #[inline] + fn from(value: bool) -> Self { + Self { + value: value.into(), + } + } +} + +impl From<i8> for Integer { + #[inline] + fn from(value: i8) -> Self { + Self { + value: value.into(), + } + } +} + +impl From<i16> for Integer { + #[inline] + fn from(value: i16) -> Self { + Self { + value: value.into(), + } + } +} + +impl From<i32> for Integer { + #[inline] + fn from(value: i32) -> Self { + Self { + value: value.into(), + } + } +} + +impl From<i64> for Integer { + #[inline] + fn from(value: i64) -> Self { + Self { value } + } +} + +impl From<u8> for Integer { + #[inline] + fn from(value: u8) -> Self { + Self { + value: value.into(), + } + } +} + +impl From<u16> for Integer { + #[inline] + fn from(value: u16) -> Self { + Self { + value: value.into(), + } + } +} + +impl From<u32> for Integer { + #[inline] + fn from(value: u32) -> Self { + Self { + value: value.into(), + } + } +} + +impl From<Boolean> for Integer { + #[inline] + fn from(value: Boolean) -> Self { + bool::from(value).into() + } +} + +impl From<Integer> for i64 { + #[inline] + fn from(value: Integer) -> Self { + value.value + } +} + +impl FromStr for Integer { + type Err = ParseIntError; + + #[inline] + fn from_str(input: &str) -> Result<Self, Self::Err> { + Ok(i64::from_str(input)?.into()) + } +} + +impl fmt::Display for Integer { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.value.fmt(f) + } +} + +impl TryFrom<Float> for Integer { + type Error = TooLargeForIntegerError; + + #[inline] + fn try_from(value: Float) -> Result<Self, Self::Error> { + Decimal::try_from(value) + .map_err(|_| TooLargeForIntegerError)? + .try_into() + } +} + +impl TryFrom<Double> for Integer { + type Error = TooLargeForIntegerError; + + #[inline] + fn try_from(value: Double) -> Result<Self, Self::Error> { + Decimal::try_from(value) + .map_err(|_| TooLargeForIntegerError)? + .try_into() + } +} + +/// The input is too large to fit into an [`Integer`]. +/// +/// Matches XPath [`FOCA0003` error](https://www.w3.org/TR/xpath-functions-31/#ERRFOCA0003). +#[derive(Debug, Clone, Copy, thiserror::Error)] +#[error("Value too large for xsd:integer internal representation")] +pub struct TooLargeForIntegerError; + +#[cfg(test)] +#[allow(clippy::panic_in_result_fn)] +mod tests { + use super::*; + + #[test] + fn from_str() -> Result<(), ParseIntError> { + assert_eq!(Integer::from_str("0")?.to_string(), "0"); + assert_eq!(Integer::from_str("-0")?.to_string(), "0"); + assert_eq!(Integer::from_str("123")?.to_string(), "123"); + assert_eq!(Integer::from_str("-123")?.to_string(), "-123"); + Integer::from_str("123456789123456789123456789123456789123456789").unwrap_err(); + Ok(()) + } + + #[test] + fn from_float() -> Result<(), ParseIntError> { + assert_eq!( + Integer::try_from(Float::from(0.)).ok(), + Some(Integer::from_str("0")?) + ); + assert_eq!( + Integer::try_from(Float::from(-0.)).ok(), + Some(Integer::from_str("0")?) + ); + assert_eq!( + Integer::try_from(Float::from(-123.1)).ok(), + Some(Integer::from_str("-123")?) + ); + Integer::try_from(Float::from(f32::NAN)).unwrap_err(); + Integer::try_from(Float::from(f32::INFINITY)).unwrap_err(); + Integer::try_from(Float::from(f32::NEG_INFINITY)).unwrap_err(); + Integer::try_from(Float::from(f32::MIN)).unwrap_err(); + Integer::try_from(Float::from(f32::MAX)).unwrap_err(); + assert!( + Integer::try_from(Float::from(1_672_507_300_000.)) + .unwrap() + .checked_sub(Integer::from_str("1672507300000")?) + .unwrap() + .checked_abs() + .unwrap() + < Integer::from(1_000_000) + ); + Ok(()) + } + + #[test] + fn from_double() -> Result<(), ParseIntError> { + assert_eq!( + Integer::try_from(Double::from(0.0)).ok(), + Some(Integer::from_str("0")?) + ); + assert_eq!( + Integer::try_from(Double::from(-0.0)).ok(), + Some(Integer::from_str("0")?) + ); + assert_eq!( + Integer::try_from(Double::from(-123.1)).ok(), + Some(Integer::from_str("-123")?) + ); + assert!( + Integer::try_from(Double::from(1_672_507_300_000.)) + .unwrap() + .checked_sub(Integer::from_str("1672507300000").unwrap()) + .unwrap() + .checked_abs() + .unwrap() + < Integer::from(10) + ); + Integer::try_from(Double::from(f64::NAN)).unwrap_err(); + Integer::try_from(Double::from(f64::INFINITY)).unwrap_err(); + Integer::try_from(Double::from(f64::NEG_INFINITY)).unwrap_err(); + Integer::try_from(Double::from(f64::MIN)).unwrap_err(); + Integer::try_from(Double::from(f64::MAX)).unwrap_err(); + Ok(()) + } + + #[test] + fn from_decimal() -> Result<(), ParseIntError> { + assert_eq!( + Integer::try_from(Decimal::from(0)).ok(), + Some(Integer::from_str("0")?) + ); + assert_eq!( + Integer::try_from(Decimal::from_str("-123.1").unwrap()).ok(), + Some(Integer::from_str("-123")?) + ); + Integer::try_from(Decimal::MIN).unwrap_err(); + Integer::try_from(Decimal::MAX).unwrap_err(); + Ok(()) + } + + #[test] + fn add() { + assert_eq!( + Integer::MIN.checked_add(1), + Some(Integer::from(i64::MIN + 1)) + ); + assert_eq!(Integer::MAX.checked_add(1), None); + } + + #[test] + fn sub() { + assert_eq!(Integer::MIN.checked_sub(1), None); + assert_eq!( + Integer::MAX.checked_sub(1), + Some(Integer::from(i64::MAX - 1)) + ); + } + + #[test] + fn mul() { + assert_eq!(Integer::MIN.checked_mul(2), None); + assert_eq!(Integer::MAX.checked_mul(2), None); + } + + #[test] + fn div() { + assert_eq!(Integer::from(1).checked_div(0), None); + } + + #[test] + fn rem() { + assert_eq!(Integer::from(10).checked_rem(3), Some(Integer::from(1))); + assert_eq!(Integer::from(6).checked_rem(-2), Some(Integer::from(0))); + assert_eq!(Integer::from(1).checked_rem(0), None); + } +} diff --git a/ng-oxigraph/src/oxsdatatypes/mod.rs b/ng-oxigraph/src/oxsdatatypes/mod.rs new file mode 100644 index 0000000..00e0aa4 --- /dev/null +++ b/ng-oxigraph/src/oxsdatatypes/mod.rs @@ -0,0 +1,21 @@ +mod boolean; +mod date_time; +mod decimal; +mod double; +mod duration; +mod float; +mod integer; + +pub use self::boolean::Boolean; +pub use self::date_time::{ + Date, DateTime, DateTimeOverflowError, GDay, GMonth, GMonthDay, GYear, GYearMonth, + InvalidTimezoneError, ParseDateTimeError, Time, TimezoneOffset, +}; +pub use self::decimal::{Decimal, ParseDecimalError, TooLargeForDecimalError}; +pub use self::double::Double; +pub use self::duration::{ + DayTimeDuration, Duration, DurationOverflowError, OppositeSignInDurationComponentsError, + ParseDurationError, YearMonthDuration, +}; +pub use self::float::Float; +pub use self::integer::{Integer, TooLargeForIntegerError}; diff --git a/ng-oxigraph/src/oxttl/README.md b/ng-oxigraph/src/oxttl/README.md new file mode 100644 index 0000000..47ec03e --- /dev/null +++ b/ng-oxigraph/src/oxttl/README.md @@ -0,0 +1,54 @@ +OxTTL +===== + +[](https://crates.io/crates/oxttl) +[](https://docs.rs/oxttl) +[](https://crates.io/crates/oxttl) +[](https://github.com/oxigraph/oxigraph/actions) +[](https://gitter.im/oxigraph/community) + +Oxttl is a set of parsers and serializers for [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/) and [N3](https://w3c.github.io/N3/spec/). + +Support for [SPARQL-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html) is also available behind the `rdf-star`feature for all languages but N3 ([Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star), [TriG-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#trig-star), [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star) and [N-Quads-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-quads-star)) + +It is designed as a low level parser compatible with both synchronous and asynchronous I/O. + +Usage example counting the number of people in a Turtle file: +```rust +use oxrdf::{NamedNodeRef, vocab::rdf}; +use oxttl::TurtleParser; + +let file = b"@base <http://example.com/> . +@prefix schema: <http://schema.org/> . +<foo> a schema:Person ; + schema:name \"Foo\" . +<bar> a schema:Person ; + schema:name \"Bar\" ."; + +let schema_person = NamedNodeRef::new("http://schema.org/Person").unwrap(); +let mut count = 0; +for triple in TurtleParser::new().parse_read(file.as_ref()) { + let triple = triple.unwrap(); + if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { + count += 1; + } +} +assert_eq!(2, count); +``` + + +## License + +This project is licensed under either of + +* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or + `<http://www.apache.org/licenses/LICENSE-2.0>`) +* MIT license ([LICENSE-MIT](../LICENSE-MIT) or + `<http://opensource.org/licenses/MIT>`) + +at your option. + + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. diff --git a/ng-oxigraph/src/oxttl/lexer.rs b/ng-oxigraph/src/oxttl/lexer.rs new file mode 100644 index 0000000..dba6fc7 --- /dev/null +++ b/ng-oxigraph/src/oxttl/lexer.rs @@ -0,0 +1,977 @@ +use crate::oxrdf::NamedNode; +use crate::oxttl::toolkit::{TokenRecognizer, TokenRecognizerError}; +use memchr::{memchr, memchr2}; +use oxilangtag::LanguageTag; +use oxiri::Iri; +use std::borrow::Cow; +use std::cmp::min; +use std::collections::HashMap; +use std::ops::Range; +use std::str; + +#[derive(Debug, PartialEq, Eq)] +pub enum N3Token<'a> { + IriRef(String), + PrefixedName { + prefix: &'a str, + local: Cow<'a, str>, + might_be_invalid_iri: bool, + }, + Variable(Cow<'a, str>), + BlankNodeLabel(&'a str), + String(String), + Integer(&'a str), + Decimal(&'a str), + Double(&'a str), + LangTag(&'a str), + Punctuation(&'a str), + PlainKeyword(&'a str), +} + +#[derive(Eq, PartialEq)] +pub enum N3LexerMode { + NTriples, + Turtle, + N3, +} + +#[derive(Default)] +pub struct N3LexerOptions { + pub base_iri: Option<Iri<String>>, +} + +pub struct N3Lexer { + mode: N3LexerMode, + unchecked: bool, +} + +// TODO: there are a lot of 'None' (missing data) returned even if the stream is ending!!! +// TODO: simplify by not giving is_end and fail with an "unexpected eof" is none is returned when is_end=true? + +impl TokenRecognizer for N3Lexer { + type Token<'a> = N3Token<'a>; + type Options = N3LexerOptions; + + fn recognize_next_token<'a>( + &mut self, + data: &'a [u8], + is_ending: bool, + options: &N3LexerOptions, + ) -> Option<(usize, Result<N3Token<'a>, TokenRecognizerError>)> { + match *data.first()? { + b'<' => match *data.get(1)? { + b'<' => Some((2, Ok(N3Token::Punctuation("<<")))), + b'=' if self.mode == N3LexerMode::N3 => { + if let Some((consumed, result)) = self.recognize_iri(data, options) { + Some(if let Ok(result) = result { + (consumed, Ok(result)) + } else { + (2, Ok(N3Token::Punctuation("<="))) + }) + } else if is_ending { + Some((2, Ok(N3Token::Punctuation("<=")))) + } else { + None + } + } + b'-' if self.mode == N3LexerMode::N3 => { + if let Some((consumed, result)) = self.recognize_iri(data, options) { + Some(if let Ok(result) = result { + (consumed, Ok(result)) + } else { + (2, Ok(N3Token::Punctuation("<-"))) + }) + } else if is_ending { + Some((2, Ok(N3Token::Punctuation("<-")))) + } else { + None + } + } + _ => self.recognize_iri(data, options), + }, + b'>' => { + if *data.get(1)? == b'>' { + Some((2, Ok(N3Token::Punctuation(">>")))) + } else { + Some((1, Ok(N3Token::Punctuation(">")))) + } + } + b'_' => match data.get(1)? { + b':' => Self::recognize_blank_node_label(data), + c => Some(( + 1, + Err((0, format!("Unexpected character '{}'", char::from(*c))).into()), + )), + }, + b'"' => { + if self.mode != N3LexerMode::NTriples + && *data.get(1)? == b'"' + && *data.get(2)? == b'"' + { + Self::recognize_long_string(data, b'"') + } else { + Self::recognize_string(data, b'"') + } + } + b'\'' if self.mode != N3LexerMode::NTriples => { + if *data.get(1)? == b'\'' && *data.get(2)? == b'\'' { + Self::recognize_long_string(data, b'\'') + } else { + Self::recognize_string(data, b'\'') + } + } + b'@' => self.recognize_lang_tag(data), + b'.' => match data.get(1) { + Some(b'0'..=b'9') => Self::recognize_number(data), + Some(_) => Some((1, Ok(N3Token::Punctuation(".")))), + None => is_ending.then_some((1, Ok(N3Token::Punctuation(".")))), + }, + b'^' => { + if *data.get(1)? == b'^' { + Some((2, Ok(N3Token::Punctuation("^^")))) + } else { + Some((1, Ok(N3Token::Punctuation("^")))) + } + } + b'(' => Some((1, Ok(N3Token::Punctuation("(")))), + b')' => Some((1, Ok(N3Token::Punctuation(")")))), + b'[' => Some((1, Ok(N3Token::Punctuation("[")))), + b']' => Some((1, Ok(N3Token::Punctuation("]")))), + b'{' => { + if *data.get(1)? == b'|' { + Some((2, Ok(N3Token::Punctuation("{|")))) + } else { + Some((1, Ok(N3Token::Punctuation("{")))) + } + } + b'}' => Some((1, Ok(N3Token::Punctuation("}")))), + b',' => Some((1, Ok(N3Token::Punctuation(",")))), + b';' => Some((1, Ok(N3Token::Punctuation(";")))), + b'!' => Some((1, Ok(N3Token::Punctuation("!")))), + b'|' => { + if *data.get(1)? == b'}' { + Some((2, Ok(N3Token::Punctuation("|}")))) + } else { + Some((1, Ok(N3Token::Punctuation("|")))) + } + } + b'=' => { + if *data.get(1)? == b'>' { + Some((2, Ok(N3Token::Punctuation("=>")))) + } else { + Some((1, Ok(N3Token::Punctuation("=")))) + } + } + b'0'..=b'9' | b'+' | b'-' => Self::recognize_number(data), + b'?' => self.recognize_variable(data, is_ending), + _ => self.recognize_pname_or_keyword(data, is_ending), + } + } +} + +impl N3Lexer { + pub fn new(mode: N3LexerMode, unchecked: bool) -> Self { + Self { mode, unchecked } + } + + fn recognize_iri( + &self, + data: &[u8], + options: &N3LexerOptions, + ) -> Option<(usize, Result<N3Token<'static>, TokenRecognizerError>)> { + // [18] IRIREF ::= '<' ([^#x00-#x20<>"{}|^`\] | UCHAR)* '>' /* #x00=NULL #01-#x1F=control codes #x20=space */ + let mut string = Vec::new(); + let mut i = 1; + loop { + let end = memchr2(b'>', b'\\', &data[i..])?; + string.extend_from_slice(&data[i..i + end]); + i += end; + match data[i] { + b'>' => { + #[allow(clippy::range_plus_one)] + return Some((i + 1, self.parse_iri(string, 0..i + 1, options))); + } + b'\\' => { + let (additional, c) = Self::recognize_escape(&data[i..], i, false)?; + i += additional + 1; + match c { + Ok(c) => { + let mut buf = [0; 4]; + string.extend_from_slice(c.encode_utf8(&mut buf).as_bytes()); + } + Err(e) => return Some((i, Err(e))), + } + } + _ => unreachable!(), + } + } + } + + fn parse_iri( + &self, + iri: Vec<u8>, + position: Range<usize>, + options: &N3LexerOptions, + ) -> Result<N3Token<'static>, TokenRecognizerError> { + let iri = string_from_utf8(iri, position.clone())?; + Ok(N3Token::IriRef( + if let Some(base_iri) = options.base_iri.as_ref() { + if self.unchecked { + base_iri.resolve_unchecked(&iri) + } else { + base_iri + .resolve(&iri) + .map_err(|e| (position, e.to_string()))? + } + .into_inner() + } else if self.unchecked { + iri + } else { + Iri::parse(iri) + .map_err(|e| (position, e.to_string()))? + .into_inner() + }, + )) + } + + fn recognize_pname_or_keyword<'a>( + &self, + data: &'a [u8], + is_ending: bool, + ) -> Option<(usize, Result<N3Token<'a>, TokenRecognizerError>)> { + // [139s] PNAME_NS ::= PN_PREFIX? ':' + // [140s] PNAME_LN ::= PNAME_NS PN_LOCAL + // [167s] PN_PREFIX ::= PN_CHARS_BASE ((PN_CHARS | '.')* PN_CHARS)? + let mut i = 0; + loop { + if let Some(r) = Self::recognize_unicode_char(&data[i..], i) { + match r { + Ok((c, consumed)) => { + if c == ':' { + i += consumed; + break; + } else if i == 0 { + if !Self::is_possible_pn_chars_base(c) { + return Some(( + consumed, + Err(( + 0..consumed, + format!( + "'{c}' is not allowed at the beginning of a prefix name" + ), + ) + .into()), + )); + } + i += consumed; + } else if Self::is_possible_pn_chars(c) || c == '.' { + i += consumed; + } else { + while data[..i].ends_with(b".") { + i -= 1; + } + return Some(( + i, + str_from_utf8(&data[..i], 0..i).map(N3Token::PlainKeyword), + )); + } + } + Err(e) => return Some((e.location.end, Err(e))), + } + } else if is_ending { + while data[..i].ends_with(b".") { + i -= 1; + } + return Some(if i == 0 { + ( + 1, + Err((0..1, format!("Unexpected byte {}", data[0])).into()), + ) + } else { + ( + i, + str_from_utf8(&data[..i], 0..i).map(N3Token::PlainKeyword), + ) + }); + } else { + return None; + } + } + let pn_prefix = match str_from_utf8(&data[..i - 1], 0..i - 1) { + Ok(pn_prefix) => pn_prefix, + Err(e) => return Some((i, Err(e))), + }; + if pn_prefix.ends_with('.') { + return Some(( + i, + Err(( + 0..i, + format!( + "'{pn_prefix}' is not a valid prefix: prefixes are not allowed to end with '.'"), + ) + .into()), + )); + } + + let (consumed, pn_local_result) = + self.recognize_optional_pn_local(&data[i..], is_ending)?; + Some(( + consumed + i, + pn_local_result.map(|(local, might_be_invalid_iri)| N3Token::PrefixedName { + prefix: pn_prefix, + local, + might_be_invalid_iri, + }), + )) + } + + fn recognize_variable<'a>( + &self, + data: &'a [u8], + is_ending: bool, + ) -> Option<(usize, Result<N3Token<'a>, TokenRecognizerError>)> { + // [36] QUICK_VAR_NAME ::= "?" PN_LOCAL + let (consumed, result) = self.recognize_optional_pn_local(&data[1..], is_ending)?; + Some(( + consumed + 1, + result.and_then(|(name, _)| { + if name.is_empty() { + Err((0..consumed, "A variable name is not allowed to be empty").into()) + } else { + Ok(N3Token::Variable(name)) + } + }), + )) + } + + fn recognize_optional_pn_local<'a>( + &self, + data: &'a [u8], + is_ending: bool, + ) -> Option<(usize, Result<(Cow<'a, str>, bool), TokenRecognizerError>)> { + // [168s] PN_LOCAL ::= (PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))? + let mut i = 0; + let mut buffer = None; // Buffer if there are some escaped characters + let mut position_that_is_already_in_buffer = 0; + let mut might_be_invalid_iri = false; + let mut ends_with_unescaped_dot = 0; + loop { + if let Some(r) = Self::recognize_unicode_char(&data[i..], i) { + match r { + Ok((c, consumed)) => { + if c == '%' { + i += 1; + let a = char::from(*data.get(i)?); + i += 1; + let b = char::from(*data.get(i)?); + if !a.is_ascii_hexdigit() || !b.is_ascii_hexdigit() { + return Some((i + 1, Err(( + i - 2..=i, format!("escapes in IRIs should be % followed by two hexadecimal characters, found '%{a}{b}'") + ).into()))); + } + i += 1; + ends_with_unescaped_dot = 0; + } else if c == '\\' { + i += 1; + let a = char::from(*data.get(i)?); + if self.unchecked + || matches!( + a, + '_' | '~' + | '.' + | '-' + | '!' + | '$' + | '&' + | '\'' + | '(' + | ')' + | '*' + | '+' + | ',' + | ';' + | '=' + ) + { + // ok to escape + } else if matches!(a, '/' | '?' | '#' | '@' | '%') { + // ok to escape but requires IRI validation + might_be_invalid_iri = true; + } else { + return Some((i + 1, Err(( + i..=i, format!("The character that are allowed to be escaped in IRIs are _~.-!$&'()*+,;=/?#@%, found '{a}'") + ).into()))); + } + let buffer = buffer.get_or_insert_with(String::new); + // We add the missing bytes + if i - position_that_is_already_in_buffer > 1 { + buffer.push_str( + match str_from_utf8( + &data[position_that_is_already_in_buffer..i - 1], + position_that_is_already_in_buffer..i - 1, + ) { + Ok(data) => data, + Err(e) => return Some((i, Err(e))), + }, + ) + } + buffer.push(a); + i += 1; + position_that_is_already_in_buffer = i; + ends_with_unescaped_dot = 0; + } else if i == 0 { + if !(Self::is_possible_pn_chars_u(c) || c == ':' || c.is_ascii_digit()) + { + return Some((0, Ok((Cow::Borrowed(""), false)))); + } + if !self.unchecked { + might_be_invalid_iri |= + Self::is_possible_pn_chars_base_but_not_valid_iri(c) + || c == ':'; + } + i += consumed; + } else if Self::is_possible_pn_chars(c) || c == ':' { + if !self.unchecked { + might_be_invalid_iri |= + Self::is_possible_pn_chars_base_but_not_valid_iri(c) + || c == ':'; + } + i += consumed; + ends_with_unescaped_dot = 0; + } else if c == '.' { + i += consumed; + ends_with_unescaped_dot += 1; + } else { + let buffer = if let Some(mut buffer) = buffer { + buffer.push_str( + match str_from_utf8( + &data[position_that_is_already_in_buffer..i], + position_that_is_already_in_buffer..i, + ) { + Ok(data) => data, + Err(e) => return Some((i, Err(e))), + }, + ); + // We do not include the last dots + for _ in 0..ends_with_unescaped_dot { + buffer.pop(); + } + i -= ends_with_unescaped_dot; + Cow::Owned(buffer) + } else { + let mut data = match str_from_utf8(&data[..i], 0..i) { + Ok(data) => data, + Err(e) => return Some((i, Err(e))), + }; + // We do not include the last dots + data = &data[..data.len() - ends_with_unescaped_dot]; + i -= ends_with_unescaped_dot; + Cow::Borrowed(data) + }; + return Some((i, Ok((buffer, might_be_invalid_iri)))); + } + } + Err(e) => return Some((e.location.end, Err(e))), + } + } else if is_ending { + let buffer = if let Some(mut buffer) = buffer { + // We do not include the last dot + while buffer.ends_with('.') { + buffer.pop(); + i -= 1; + } + Cow::Owned(buffer) + } else { + let mut data = match str_from_utf8(&data[..i], 0..i) { + Ok(data) => data, + Err(e) => return Some((i, Err(e))), + }; + // We do not include the last dot + while let Some(d) = data.strip_suffix('.') { + data = d; + i -= 1; + } + Cow::Borrowed(data) + }; + return Some((i, Ok((buffer, might_be_invalid_iri)))); + } else { + return None; + } + } + } + + fn recognize_blank_node_label( + data: &[u8], + ) -> Option<(usize, Result<N3Token<'_>, TokenRecognizerError>)> { + // [141s] BLANK_NODE_LABEL ::= '_:' (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)? + let mut i = 2; + loop { + match Self::recognize_unicode_char(&data[i..], i)? { + Ok((c, consumed)) => { + if (i == 2 && (Self::is_possible_pn_chars_u(c) || c.is_ascii_digit())) + || (i > 2 && Self::is_possible_pn_chars(c)) + { + // Ok + } else if i > 2 && c == '.' { + if data[i - 1] == b'.' { + i -= 1; + return Some(( + i, + str_from_utf8(&data[2..i], 2..i).map(N3Token::BlankNodeLabel), + )); + } + } else if i == 0 { + return Some(( + i, + Err((0..i, "A blank node ID should not be empty").into()), + )); + } else if data[i - 1] == b'.' { + i -= 1; + return Some(( + i, + str_from_utf8(&data[2..i], 2..i).map(N3Token::BlankNodeLabel), + )); + } else { + return Some(( + i, + str_from_utf8(&data[2..i], 2..i).map(N3Token::BlankNodeLabel), + )); + } + i += consumed; + } + Err(e) => return Some((e.location.end, Err(e))), + } + } + } + + fn recognize_lang_tag<'a>( + &self, + data: &'a [u8], + ) -> Option<(usize, Result<N3Token<'a>, TokenRecognizerError>)> { + // [144s] LANGTAG ::= '@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)* + let mut is_last_block_empty = true; + for (i, c) in data[1..].iter().enumerate() { + if c.is_ascii_alphabetic() { + is_last_block_empty = false; + } else if i == 0 { + return Some(( + 1, + Err((1..2, "A language code should always start with a letter").into()), + )); + } else if is_last_block_empty { + return Some((i, self.parse_lang_tag(&data[1..i], 1..i - 1))); + } else if *c == b'-' { + is_last_block_empty = true; + } else { + return Some((i + 1, self.parse_lang_tag(&data[1..=i], 1..i))); + } + } + None + } + + fn parse_lang_tag<'a>( + &self, + lang_tag: &'a [u8], + position: Range<usize>, + ) -> Result<N3Token<'a>, TokenRecognizerError> { + let lang_tag = str_from_utf8(lang_tag, position.clone())?; + Ok(N3Token::LangTag(if self.unchecked { + lang_tag + } else { + LanguageTag::parse(lang_tag) + .map_err(|e| (position.clone(), e.to_string()))? + .into_inner() + })) + } + + fn recognize_string( + data: &[u8], + delimiter: u8, + ) -> Option<(usize, Result<N3Token<'static>, TokenRecognizerError>)> { + // [22] STRING_LITERAL_QUOTE ::= '"' ([^#x22#x5C#xA#xD] | ECHAR | UCHAR)* '"' /* #x22=" #x5C=\ #xA=new line #xD=carriage return */ + // [23] STRING_LITERAL_SINGLE_QUOTE ::= "'" ([^#x27#x5C#xA#xD] | ECHAR | UCHAR)* "'" /* #x27=' #x5C=\ #xA=new line #xD=carriage return */ + let mut string = String::new(); + let mut i = 1; + loop { + let end = memchr2(delimiter, b'\\', &data[i..])?; + match str_from_utf8(&data[i..i + end], i..i + end) { + Ok(s) => string.push_str(s), + Err(e) => return Some((end, Err(e))), + }; + i += end; + match data[i] { + c if c == delimiter => { + return Some((i + 1, Ok(N3Token::String(string)))); + } + b'\\' => { + let (additional, c) = Self::recognize_escape(&data[i..], i, true)?; + i += additional + 1; + match c { + Ok(c) => { + string.push(c); + } + Err(e) => { + // We read until the end of string char + let end = memchr(delimiter, &data[i..])?; + return Some((i + end + 1, Err(e))); + } + } + } + _ => unreachable!(), + } + } + } + + fn recognize_long_string( + data: &[u8], + delimiter: u8, + ) -> Option<(usize, Result<N3Token<'static>, TokenRecognizerError>)> { + // [24] STRING_LITERAL_LONG_SINGLE_QUOTE ::= "'''" (("'" | "''")? ([^'\] | ECHAR | UCHAR))* "'''" + // [25] STRING_LITERAL_LONG_QUOTE ::= '"""' (('"' | '""')? ([^"\] | ECHAR | UCHAR))* '"""' + let mut string = String::new(); + let mut i = 3; + loop { + let end = memchr2(delimiter, b'\\', &data[i..])?; + match str_from_utf8(&data[i..i + end], i..i + end) { + Ok(s) => string.push_str(s), + Err(e) => return Some((end, Err(e))), + }; + i += end; + match data[i] { + c if c == delimiter => { + if *data.get(i + 1)? == delimiter && *data.get(i + 2)? == delimiter { + return Some((i + 3, Ok(N3Token::String(string)))); + } + i += 1; + string.push(char::from(delimiter)); + } + b'\\' => { + let (additional, c) = Self::recognize_escape(&data[i..], i, true)?; + i += additional + 1; + match c { + Ok(c) => { + string.push(c); + } + Err(e) => return Some((i, Err(e))), + } + } + _ => unreachable!(), + } + } + } + + fn recognize_number(data: &[u8]) -> Option<(usize, Result<N3Token<'_>, TokenRecognizerError>)> { + // [19] INTEGER ::= [+-]? [0-9]+ + // [20] DECIMAL ::= [+-]? [0-9]* '.' [0-9]+ + // [21] DOUBLE ::= [+-]? ([0-9]+ '.' [0-9]* EXPONENT | '.' [0-9]+ EXPONENT | [0-9]+ EXPONENT) + // [154s] EXPONENT ::= [eE] [+-]? [0-9]+ + let mut i = 0; + let c = *data.first()?; + if matches!(c, b'+' | b'-') { + i += 1; + } + // We read the digits before . + let mut count_before: usize = 0; + loop { + let c = *data.get(i)?; + if c.is_ascii_digit() { + i += 1; + count_before += 1; + } else { + break; + } + } + + // We read the digits after . + #[allow(clippy::if_then_some_else_none)] + let count_after = if *data.get(i)? == b'.' { + i += 1; + + let mut count_after = 0; + loop { + let c = *data.get(i)?; + if c.is_ascii_digit() { + i += 1; + count_after += 1; + } else { + break; + } + } + Some(count_after) + } else { + None + }; + + // End + let c = *data.get(i)?; + if matches!(c, b'e' | b'E') { + i += 1; + + let c = *data.get(i)?; + if matches!(c, b'+' | b'-') { + i += 1; + } + + let mut found = false; + loop { + let c = *data.get(i)?; + if c.is_ascii_digit() { + i += 1; + found = true; + } else { + break; + } + } + Some(( + i, + if !found { + Err((0..i, "A double exponent cannot be empty").into()) + } else if count_before == 0 && count_after.unwrap_or(0) == 0 { + Err((0..i, "A double should not be empty").into()) + } else { + str_from_utf8(&data[..i], 0..i).map(N3Token::Double) + }, + )) + } else if let Some(count_after) = count_after { + if count_after == 0 { + // We do not consume the '.' after all + i -= 1; + Some(( + i, + if count_before == 0 { + Err((0..i, "An integer should not be empty").into()) + } else { + str_from_utf8(&data[..i], 0..i).map(N3Token::Integer) + }, + )) + } else { + Some((i, str_from_utf8(&data[..i], 0..i).map(N3Token::Decimal))) + } + } else { + Some(( + i, + if count_before == 0 { + Err((0..i, "An integer should not be empty").into()) + } else { + str_from_utf8(&data[..i], 0..i).map(N3Token::Integer) + }, + )) + } + } + + fn recognize_escape( + data: &[u8], + position: usize, + with_echar: bool, + ) -> Option<(usize, Result<char, TokenRecognizerError>)> { + // [26] UCHAR ::= '\u' HEX HEX HEX HEX | '\U' HEX HEX HEX HEX HEX HEX HEX HEX + // [159s] ECHAR ::= '\' [tbnrf"'\] + match *data.get(1)? { + b'u' => match Self::recognize_hex_char(&data[2..], 4, 'u', position) { + Ok(c) => Some((5, Ok(c?))), + Err(e) => Some((5, Err(e))), + }, + b'U' => match Self::recognize_hex_char(&data[2..], 8, 'u', position) { + Ok(c) => Some((9, Ok(c?))), + Err(e) => Some((9, Err(e))), + }, + b't' if with_echar => Some((1, Ok('\t'))), + b'b' if with_echar => Some((1, Ok('\x08'))), + b'n' if with_echar => Some((1, Ok('\n'))), + b'r' if with_echar => Some((1, Ok('\r'))), + b'f' if with_echar => Some((1, Ok('\x0C'))), + b'"' if with_echar => Some((1, Ok('"'))), + b'\'' if with_echar => Some((1, Ok('\''))), + b'\\' if with_echar => Some((1, Ok('\\'))), + c => Some(( + 1, + Err(( + position..position + 2, + format!("Unexpected escape character '\\{}'", char::from(c)), + ) + .into()), + )), // TODO: read until end of string + } + } + + fn recognize_hex_char( + data: &[u8], + len: usize, + escape_char: char, + position: usize, + ) -> Result<Option<char>, TokenRecognizerError> { + if data.len() < len { + return Ok(None); + } + let val = str_from_utf8(&data[..len], position..position + len + 2)?; + let codepoint = u32::from_str_radix(val, 16).map_err(|e| { + ( + position..position + len + 2, + format!( + "The escape sequence '\\{escape_char}{val}' is not a valid hexadecimal string: {e}" + ), + ) + })?; + let c = char::from_u32(codepoint).ok_or_else(|| { + ( + position..position + len +2, + format!( + "The escape sequence '\\{escape_char}{val}' is encoding {codepoint:X} that is not a valid unicode character", + ), + ) + })?; + Ok(Some(c)) + } + + fn recognize_unicode_char( + data: &[u8], + position: usize, + ) -> Option<Result<(char, usize), TokenRecognizerError>> { + let mut code_point: u32; + let bytes_needed: usize; + let mut lower_boundary = 0x80; + let mut upper_boundary = 0xBF; + + let byte = *data.first()?; + match byte { + 0x00..=0x7F => return Some(Ok((char::from(byte), 1))), + 0xC2..=0xDF => { + bytes_needed = 1; + code_point = u32::from(byte) & 0x1F; + } + 0xE0..=0xEF => { + if byte == 0xE0 { + lower_boundary = 0xA0; + } + if byte == 0xED { + upper_boundary = 0x9F; + } + bytes_needed = 2; + code_point = u32::from(byte) & 0xF; + } + 0xF0..=0xF4 => { + if byte == 0xF0 { + lower_boundary = 0x90; + } + if byte == 0xF4 { + upper_boundary = 0x8F; + } + bytes_needed = 3; + code_point = u32::from(byte) & 0x7; + } + _ => { + return Some(Err(( + position..=position, + "Invalid UTF-8 character encoding", + ) + .into())) + } + } + + for i in 1..=bytes_needed { + let byte = *data.get(i)?; + if byte < lower_boundary || upper_boundary < byte { + return Some(Err(( + position..=position + i, + "Invalid UTF-8 character encoding", + ) + .into())); + } + lower_boundary = 0x80; + upper_boundary = 0xBF; + code_point = (code_point << 6) | (u32::from(byte) & 0x3F); + } + + Some( + char::from_u32(code_point) + .map(|c| (c, bytes_needed + 1)) + .ok_or_else(|| { + ( + position..=position + bytes_needed, + format!("The codepoint {code_point:X} is not a valid unicode character"), + ) + .into() + }), + ) + } + + // [157s] PN_CHARS_BASE ::= [A-Z] | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] + fn is_possible_pn_chars_base(c: char) -> bool { + matches!(c, + 'A'..='Z' + | 'a'..='z' + | '\u{00C0}'..='\u{00D6}' + | '\u{00D8}'..='\u{00F6}' + | '\u{00F8}'..='\u{02FF}' + | '\u{0370}'..='\u{037D}' + | '\u{037F}'..='\u{1FFF}' + | '\u{200C}'..='\u{200D}' + | '\u{2070}'..='\u{218F}' + | '\u{2C00}'..='\u{2FEF}' + | '\u{3001}'..='\u{D7FF}' + | '\u{F900}'..='\u{FDCF}' + | '\u{FDF0}'..='\u{FFFD}' + | '\u{10000}'..='\u{EFFFF}') + } + + // [158s] PN_CHARS_U ::= PN_CHARS_BASE | '_' | ':' + pub(super) fn is_possible_pn_chars_u(c: char) -> bool { + Self::is_possible_pn_chars_base(c) || c == '_' + } + + // [160s] PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040] + pub(crate) fn is_possible_pn_chars(c: char) -> bool { + Self::is_possible_pn_chars_u(c) + || matches!(c, + '-' | '0'..='9' | '\u{00B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}') + } + + fn is_possible_pn_chars_base_but_not_valid_iri(c: char) -> bool { + matches!(c, '\u{FFF0}'..='\u{FFFD}') + || u32::from(c) % u32::from('\u{FFFE}') == 0 + || u32::from(c) % u32::from('\u{FFFF}') == 0 + } +} + +pub fn resolve_local_name( + prefix: &str, + local: &str, + might_be_invalid_iri: bool, + prefixes: &HashMap<String, Iri<String>>, +) -> Result<NamedNode, String> { + if let Some(start) = prefixes.get(prefix) { + let iri = format!("{start}{local}"); + if might_be_invalid_iri || start.path().is_empty() { + // We validate again. We always validate if the local part might be the IRI authority. + if let Err(e) = Iri::parse(iri.as_str()) { + return Err(format!( + "The prefixed name {prefix}:{local} builds IRI {iri} that is invalid: {e}" + )); + } + } + Ok(NamedNode::new_unchecked(iri)) + } else { + Err(format!("The prefix {prefix}: has not been declared")) + } +} + +fn str_from_utf8(data: &[u8], range: Range<usize>) -> Result<&str, TokenRecognizerError> { + str::from_utf8(data).map_err(|e| { + ( + range.start + e.valid_up_to()..min(range.end, range.start + e.valid_up_to() + 4), + format!("Invalid UTF-8: {e}"), + ) + .into() + }) +} + +fn string_from_utf8(data: Vec<u8>, range: Range<usize>) -> Result<String, TokenRecognizerError> { + String::from_utf8(data).map_err(|e| { + ( + range.start + e.utf8_error().valid_up_to() + ..min(range.end, range.start + e.utf8_error().valid_up_to() + 4), + format!("Invalid UTF-8: {e}"), + ) + .into() + }) +} diff --git a/ng-oxigraph/src/oxttl/line_formats.rs b/ng-oxigraph/src/oxttl/line_formats.rs new file mode 100644 index 0000000..ead06b7 --- /dev/null +++ b/ng-oxigraph/src/oxttl/line_formats.rs @@ -0,0 +1,314 @@ +//! Shared parser implementation for N-Triples and N-Quads. + +#[cfg(feature = "rdf-star")] +use crate::oxrdf::Triple; +use crate::oxrdf::{BlankNode, GraphName, Literal, NamedNode, Quad, Subject, Term}; +use crate::oxttl::lexer::{N3Lexer, N3LexerMode, N3LexerOptions, N3Token}; +use crate::oxttl::toolkit::{Lexer, Parser, RuleRecognizer, RuleRecognizerError}; +use crate::oxttl::{MAX_BUFFER_SIZE, MIN_BUFFER_SIZE}; + +pub struct NQuadsRecognizer { + stack: Vec<NQuadsState>, + subjects: Vec<Subject>, + predicates: Vec<NamedNode>, + objects: Vec<Term>, +} +pub struct NQuadsRecognizerContext { + with_graph_name: bool, + #[cfg(feature = "rdf-star")] + with_quoted_triples: bool, + lexer_options: N3LexerOptions, +} + +enum NQuadsState { + ExpectSubject, + ExpectPredicate, + ExpectedObject, + ExpectPossibleGraphOrEndOfQuotedTriple, + ExpectDot, + ExpectLiteralAnnotationOrGraphNameOrDot { + value: String, + }, + ExpectLiteralDatatype { + value: String, + }, + #[cfg(feature = "rdf-star")] + AfterQuotedSubject, + #[cfg(feature = "rdf-star")] + AfterQuotedObject, +} + +impl RuleRecognizer for NQuadsRecognizer { + type TokenRecognizer = N3Lexer; + type Output = Quad; + type Context = NQuadsRecognizerContext; + + fn error_recovery_state(mut self) -> Self { + self.stack.clear(); + self.subjects.clear(); + self.predicates.clear(); + self.objects.clear(); + self + } + + fn recognize_next( + mut self, + token: N3Token<'_>, + context: &mut NQuadsRecognizerContext, + results: &mut Vec<Quad>, + errors: &mut Vec<RuleRecognizerError>, + ) -> Self { + if let Some(state) = self.stack.pop() { + match state { + NQuadsState::ExpectSubject => match token { + N3Token::IriRef(s) => { + self.subjects + .push(NamedNode::new_unchecked(s).into()); + self.stack.push(NQuadsState::ExpectPredicate); + self + } + N3Token::BlankNodeLabel(s) => { + self.subjects.push(BlankNode::new_unchecked(s).into()); + self.stack.push(NQuadsState::ExpectPredicate); + self + } + #[cfg(feature = "rdf-star")] + N3Token::Punctuation("<<") if context.with_quoted_triples => { + self.stack.push(NQuadsState::AfterQuotedSubject); + self.stack.push(NQuadsState::ExpectSubject); + self + } + _ => self.error( + errors, + "The subject of a triple should be an IRI or a blank node, TOKEN found", + ), + }, + NQuadsState::ExpectPredicate => match token { + N3Token::IriRef(p) => { + self.predicates + .push(NamedNode::new_unchecked(p)); + self.stack.push(NQuadsState::ExpectedObject); + self + } + _ => self.error( + errors, + "The predicate of a triple should be an IRI, TOKEN found", + ), + }, + NQuadsState::ExpectedObject => match token { + N3Token::IriRef(o) => { + self.objects + .push(NamedNode::new_unchecked(o).into()); + self.stack + .push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple); + self + } + N3Token::BlankNodeLabel(o) => { + self.objects.push(BlankNode::new_unchecked(o).into()); + self.stack + .push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple); + self + } + N3Token::String(value) => { + self.stack + .push(NQuadsState::ExpectLiteralAnnotationOrGraphNameOrDot { value }); + self + } + #[cfg(feature = "rdf-star")] + N3Token::Punctuation("<<") if context.with_quoted_triples => { + self.stack.push(NQuadsState::AfterQuotedObject); + self.stack.push(NQuadsState::ExpectSubject); + self + } + _ => self.error( + errors, + "The object of a triple should be an IRI, a blank node or a literal, TOKEN found", + ), + }, + NQuadsState::ExpectLiteralAnnotationOrGraphNameOrDot { value } => match token { + N3Token::LangTag(lang_tag) => { + self.objects.push( + Literal::new_language_tagged_literal_unchecked( + value, + lang_tag.to_ascii_lowercase(), + ) + .into(), + ); + self.stack + .push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple); + self + } + N3Token::Punctuation("^^") => { + self.stack + .push(NQuadsState::ExpectLiteralDatatype { value }); + self + } + _ => { + self.objects.push(Literal::new_simple_literal(value).into()); + self.stack + .push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple); + self.recognize_next(token, context, results, errors) + } + }, + NQuadsState::ExpectLiteralDatatype { value } => match token { + N3Token::IriRef(d) => { + self.objects.push( + Literal::new_typed_literal( + value, + NamedNode::new_unchecked(d) + ) + .into(), + ); + self.stack + .push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple); + self + } + _ => self.error(errors, "A literal datatype must be an IRI, found TOKEN"), + }, + NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple => { + if self.stack.is_empty() { + match token { + N3Token::IriRef(g) if context.with_graph_name => { + self.emit_quad( + results, + NamedNode::new_unchecked(g).into(), + ); + self.stack.push(NQuadsState::ExpectDot); + self + } + N3Token::BlankNodeLabel(g) if context.with_graph_name => { + self.emit_quad(results, BlankNode::new_unchecked(g).into()); + self.stack.push(NQuadsState::ExpectDot); + self + } + _ => { + self.emit_quad(results, GraphName::DefaultGraph); + self.stack.push(NQuadsState::ExpectDot); + self.recognize_next(token, context, results, errors) + } + } + } else if token == N3Token::Punctuation(">>") { + self + } else { + self.error(errors, "Expecting the end of a quoted triple '>>'") + } + } + NQuadsState::ExpectDot => if let N3Token::Punctuation(".") = token { + self.stack.push(NQuadsState::ExpectSubject); + self + } else { + errors.push("Quads should be followed by a dot".into()); + self.stack.push(NQuadsState::ExpectSubject); + self.recognize_next(token, context, results, errors) + }, + #[cfg(feature = "rdf-star")] + NQuadsState::AfterQuotedSubject => { + let triple = Triple { + subject: self.subjects.pop().unwrap(), + predicate: self.predicates.pop().unwrap(), + object: self.objects.pop().unwrap(), + }; + self.subjects.push(triple.into()); + self.stack.push(NQuadsState::ExpectPredicate); + self.recognize_next(token,context, results, errors) + } + #[cfg(feature = "rdf-star")] + NQuadsState::AfterQuotedObject => { + let triple = Triple { + subject: self.subjects.pop().unwrap(), + predicate: self.predicates.pop().unwrap(), + object: self.objects.pop().unwrap(), + }; + self.objects.push(triple.into()); + self.stack + .push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple); + self.recognize_next(token, context, results, errors) + } + } + } else if token == N3Token::Punctuation(".") { + self.stack.push(NQuadsState::ExpectSubject); + self + } else { + self + } + } + + fn recognize_end( + mut self, + _context: &mut NQuadsRecognizerContext, + results: &mut Vec<Quad>, + errors: &mut Vec<RuleRecognizerError>, + ) { + match &*self.stack { + [NQuadsState::ExpectSubject] | [] => (), + [NQuadsState::ExpectDot] => errors.push("Triples should be followed by a dot".into()), + [NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple] => { + self.emit_quad(results, GraphName::DefaultGraph); + errors.push("Triples should be followed by a dot".into()) + } + [NQuadsState::ExpectLiteralAnnotationOrGraphNameOrDot { value }] => { + self.objects.push(Literal::new_simple_literal(value).into()); + self.emit_quad(results, GraphName::DefaultGraph); + errors.push("Triples should be followed by a dot".into()) + } + _ => errors.push("Unexpected end".into()), // TODO + } + } + + fn lexer_options(context: &NQuadsRecognizerContext) -> &N3LexerOptions { + &context.lexer_options + } +} + +impl NQuadsRecognizer { + pub fn new_parser( + with_graph_name: bool, + #[cfg(feature = "rdf-star")] with_quoted_triples: bool, + unchecked: bool, + ) -> Parser<Self> { + Parser::new( + Lexer::new( + N3Lexer::new(N3LexerMode::NTriples, unchecked), + MIN_BUFFER_SIZE, + MAX_BUFFER_SIZE, + true, + Some(b"#"), + ), + Self { + stack: vec![NQuadsState::ExpectSubject], + subjects: Vec::new(), + predicates: Vec::new(), + objects: Vec::new(), + }, + NQuadsRecognizerContext { + with_graph_name, + #[cfg(feature = "rdf-star")] + with_quoted_triples, + lexer_options: N3LexerOptions::default(), + }, + ) + } + + #[must_use] + fn error( + mut self, + errors: &mut Vec<RuleRecognizerError>, + msg: impl Into<RuleRecognizerError>, + ) -> Self { + errors.push(msg.into()); + self.stack.clear(); + self.subjects.clear(); + self.predicates.clear(); + self.objects.clear(); + self + } + + fn emit_quad(&mut self, results: &mut Vec<Quad>, graph_name: GraphName) { + results.push(Quad { + subject: self.subjects.pop().unwrap(), + predicate: self.predicates.pop().unwrap(), + object: self.objects.pop().unwrap(), + graph_name, + }) + } +} diff --git a/ng-oxigraph/src/oxttl/mod.rs b/ng-oxigraph/src/oxttl/mod.rs new file mode 100644 index 0000000..5ab210d --- /dev/null +++ b/ng-oxigraph/src/oxttl/mod.rs @@ -0,0 +1,19 @@ +mod lexer; +mod line_formats; +pub mod n3; +pub mod nquads; +pub mod ntriples; +mod terse; +mod toolkit; +pub mod trig; +pub mod turtle; + +pub use crate::oxttl::n3::N3Parser; +pub use crate::oxttl::nquads::{NQuadsParser, NQuadsSerializer}; +pub use crate::oxttl::ntriples::{NTriplesParser, NTriplesSerializer}; +pub use crate::oxttl::toolkit::{TextPosition, TurtleParseError, TurtleSyntaxError}; +pub use crate::oxttl::trig::{TriGParser, TriGSerializer}; +pub use crate::oxttl::turtle::{TurtleParser, TurtleSerializer}; + +pub(crate) const MIN_BUFFER_SIZE: usize = 4096; +pub(crate) const MAX_BUFFER_SIZE: usize = 4096 * 4096; diff --git a/ng-oxigraph/src/oxttl/n3.rs b/ng-oxigraph/src/oxttl/n3.rs new file mode 100644 index 0000000..2e16a78 --- /dev/null +++ b/ng-oxigraph/src/oxttl/n3.rs @@ -0,0 +1,1326 @@ +//! A [N3](https://w3c.github.io/N3/spec/) streaming parser implemented by [`N3Parser`]. + +use crate::oxrdf::vocab::{rdf, xsd}; +#[cfg(feature = "rdf-star")] +use crate::oxrdf::Triple; +use crate::oxrdf::{ + BlankNode, GraphName, Literal, NamedNode, NamedNodeRef, NamedOrBlankNode, Quad, Subject, Term, + Variable, +}; +use crate::oxttl::lexer::{resolve_local_name, N3Lexer, N3LexerMode, N3LexerOptions, N3Token}; +#[cfg(feature = "async-tokio")] +use crate::oxttl::toolkit::FromTokioAsyncReadIterator; +use crate::oxttl::toolkit::{ + FromReadIterator, Lexer, Parser, RuleRecognizer, RuleRecognizerError, TurtleSyntaxError, +}; +use crate::oxttl::{TurtleParseError, MAX_BUFFER_SIZE, MIN_BUFFER_SIZE}; +use oxiri::{Iri, IriParseError}; +use std::collections::hash_map::Iter; +use std::collections::HashMap; +use std::fmt; +use std::io::Read; +#[cfg(feature = "async-tokio")] +use tokio::io::AsyncRead; + +/// A N3 term i.e. a RDF `Term` or a `Variable`. +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum N3Term { + NamedNode(NamedNode), + BlankNode(BlankNode), + Literal(Literal), + #[cfg(feature = "rdf-star")] + Triple(Box<Triple>), + Variable(Variable), +} + +impl fmt::Display for N3Term { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::NamedNode(term) => term.fmt(f), + Self::BlankNode(term) => term.fmt(f), + Self::Literal(term) => term.fmt(f), + #[cfg(feature = "rdf-star")] + Self::Triple(term) => term.fmt(f), + Self::Variable(term) => term.fmt(f), + } + } +} + +impl From<NamedNode> for N3Term { + #[inline] + fn from(node: NamedNode) -> Self { + Self::NamedNode(node) + } +} + +impl From<NamedNodeRef<'_>> for N3Term { + #[inline] + fn from(node: NamedNodeRef<'_>) -> Self { + Self::NamedNode(node.into_owned()) + } +} + +impl From<BlankNode> for N3Term { + #[inline] + fn from(node: BlankNode) -> Self { + Self::BlankNode(node) + } +} + +impl From<Literal> for N3Term { + #[inline] + fn from(literal: Literal) -> Self { + Self::Literal(literal) + } +} + +#[cfg(feature = "rdf-star")] +impl From<Triple> for N3Term { + #[inline] + fn from(triple: Triple) -> Self { + Self::Triple(Box::new(triple)) + } +} + +#[cfg(feature = "rdf-star")] +impl From<Box<Triple>> for N3Term { + #[inline] + fn from(node: Box<Triple>) -> Self { + Self::Triple(node) + } +} + +impl From<NamedOrBlankNode> for N3Term { + #[inline] + fn from(node: NamedOrBlankNode) -> Self { + match node { + NamedOrBlankNode::NamedNode(node) => node.into(), + NamedOrBlankNode::BlankNode(node) => node.into(), + } + } +} + +impl From<Subject> for N3Term { + #[inline] + fn from(node: Subject) -> Self { + match node { + Subject::NamedNode(node) => node.into(), + Subject::BlankNode(node) => node.into(), + #[cfg(feature = "rdf-star")] + Subject::Triple(triple) => Self::Triple(triple), + } + } +} + +impl From<Term> for N3Term { + #[inline] + fn from(node: Term) -> Self { + match node { + Term::NamedNode(node) => node.into(), + Term::BlankNode(node) => node.into(), + Term::Literal(node) => node.into(), + #[cfg(feature = "rdf-star")] + Term::Triple(triple) => Self::Triple(triple), + } + } +} + +impl From<Variable> for N3Term { + #[inline] + fn from(variable: Variable) -> Self { + Self::Variable(variable) + } +} + +/// A N3 quad i.e. a quad composed of [`N3Term`]. +/// +/// The `graph_name` is used to encode the formula where the triple is in. +/// In this case the formula is encoded by a blank node. +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub struct N3Quad { + /// The [subject](https://www.w3.org/TR/rdf11-concepts/#dfn-subject) of this triple. + pub subject: N3Term, + + /// The [predicate](https://www.w3.org/TR/rdf11-concepts/#dfn-predicate) of this triple. + pub predicate: N3Term, + + /// The [object](https://www.w3.org/TR/rdf11-concepts/#dfn-object) of this triple. + pub object: N3Term, + + /// The name of the RDF [graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) in which the triple is. + pub graph_name: GraphName, +} + +impl fmt::Display for N3Quad { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.graph_name == GraphName::DefaultGraph { + write!(f, "{} {} {}", self.subject, self.predicate, self.object) + } else { + write!( + f, + "{} {} {} {}", + self.subject, self.predicate, self.object, self.graph_name + ) + } + } +} + +impl From<Quad> for N3Quad { + fn from(quad: Quad) -> Self { + Self { + subject: quad.subject.into(), + predicate: quad.predicate.into(), + object: quad.object.into(), + graph_name: quad.graph_name, + } + } +} + +/// A [N3](https://w3c.github.io/N3/spec/) streaming parser. +/// +/// Count the number of people: +/// ``` +/// use oxrdf::vocab::rdf; +/// use oxrdf::NamedNode; +/// use oxttl::n3::{N3Parser, N3Term}; +/// +/// let file = br#"@base <http://example.com/> . +/// @prefix schema: <http://schema.org/> . +/// <foo> a schema:Person ; +/// schema:name "Foo" . +/// <bar> a schema:Person ; +/// schema:name "Bar" ."#; +/// +/// let rdf_type = N3Term::NamedNode(rdf::TYPE.into_owned()); +/// let schema_person = N3Term::NamedNode(NamedNode::new("http://schema.org/Person")?); +/// let mut count = 0; +/// for triple in N3Parser::new().parse_read(file.as_ref()) { +/// let triple = triple?; +/// if triple.predicate == rdf_type && triple.object == schema_person { +/// count += 1; +/// } +/// } +/// assert_eq!(2, count); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +#[derive(Default)] +#[must_use] +pub struct N3Parser { + unchecked: bool, + base: Option<Iri<String>>, + prefixes: HashMap<String, Iri<String>>, +} + +impl N3Parser { + /// Builds a new [`N3Parser`]. + #[inline] + pub fn new() -> Self { + Self::default() + } + + /// Assumes the file is valid to make parsing faster. + /// + /// It will skip some validations. + /// + /// Note that if the file is actually not valid, then broken RDF might be emitted by the parser. + #[inline] + pub fn unchecked(mut self) -> Self { + self.unchecked = true; + self + } + + #[inline] + pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> { + self.base = Some(Iri::parse(base_iri.into())?); + Ok(self) + } + + #[inline] + pub fn with_prefix( + mut self, + prefix_name: impl Into<String>, + prefix_iri: impl Into<String>, + ) -> Result<Self, IriParseError> { + self.prefixes + .insert(prefix_name.into(), Iri::parse(prefix_iri.into())?); + Ok(self) + } + + /// Parses a N3 file from a [`Read`] implementation. + /// + /// Count the number of people: + /// ``` + /// use oxrdf::NamedNode; + /// use oxttl::n3::{N3Parser, N3Term}; + /// + /// let file = br#"@base <http://example.com/> . + /// @prefix schema: <http://schema.org/> . + /// <foo> a schema:Person ; + /// schema:name "Foo" . + /// <bar> a schema:Person ; + /// schema:name "Bar" ."#; + /// + /// let rdf_type = N3Term::NamedNode(NamedNode::new( + /// "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", + /// )?); + /// let schema_person = N3Term::NamedNode(NamedNode::new("http://schema.org/Person")?); + /// let mut count = 0; + /// for triple in N3Parser::new().parse_read(file.as_ref()) { + /// let triple = triple?; + /// if triple.predicate == rdf_type && triple.object == schema_person { + /// count += 1; + /// } + /// } + /// assert_eq!(2, count); + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + pub fn parse_read<R: Read>(self, read: R) -> FromReadN3Reader<R> { + FromReadN3Reader { + inner: self.parse().parser.parse_read(read), + } + } + + /// Parses a N3 file from a [`AsyncRead`] implementation. + /// + /// Count the number of people: + /// ``` + /// use oxrdf::vocab::rdf; + /// use oxrdf::NamedNode; + /// use oxttl::n3::{N3Parser, N3Term}; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> Result<(), oxttl::TurtleParseError> { + /// let file = br#"@base <http://example.com/> . + /// @prefix schema: <http://schema.org/> . + /// <foo> a schema:Person ; + /// schema:name "Foo" . + /// <bar> a schema:Person ; + /// schema:name "Bar" ."#; + /// + /// let rdf_type = N3Term::NamedNode(rdf::TYPE.into_owned()); + /// let schema_person = N3Term::NamedNode(NamedNode::new_unchecked("http://schema.org/Person")); + /// let mut count = 0; + /// let mut parser = N3Parser::new().parse_tokio_async_read(file.as_ref()); + /// while let Some(triple) = parser.next().await { + /// let triple = triple?; + /// if triple.predicate == rdf_type && triple.object == schema_person { + /// count += 1; + /// } + /// } + /// assert_eq!(2, count); + /// # Ok(()) + /// # } + /// ``` + #[cfg(feature = "async-tokio")] + pub fn parse_tokio_async_read<R: AsyncRead + Unpin>( + self, + read: R, + ) -> FromTokioAsyncReadN3Reader<R> { + FromTokioAsyncReadN3Reader { + inner: self.parse().parser.parse_tokio_async_read(read), + } + } + + /// Allows to parse a N3 file by using a low-level API. + /// + /// Count the number of people: + /// ``` + /// use oxrdf::vocab::rdf; + /// use oxrdf::NamedNode; + /// use oxttl::n3::{N3Parser, N3Term}; + /// + /// let file: [&[u8]; 5] = [ + /// b"@base <http://example.com/>", + /// b". @prefix schema: <http://schema.org/> .", + /// b"<foo> a schema:Person", + /// b" ; schema:name \"Foo\" . <bar>", + /// b" a schema:Person ; schema:name \"Bar\" .", + /// ]; + /// + /// let rdf_type = N3Term::NamedNode(rdf::TYPE.into_owned()); + /// let schema_person = N3Term::NamedNode(NamedNode::new("http://schema.org/Person")?); + /// let mut count = 0; + /// let mut parser = N3Parser::new().parse(); + /// let mut file_chunks = file.iter(); + /// while !parser.is_end() { + /// // We feed more data to the parser + /// if let Some(chunk) = file_chunks.next() { + /// parser.extend_from_slice(chunk); + /// } else { + /// parser.end(); // It's finished + /// } + /// // We read as many triples from the parser as possible + /// while let Some(triple) = parser.read_next() { + /// let triple = triple?; + /// if triple.predicate == rdf_type && triple.object == schema_person { + /// count += 1; + /// } + /// } + /// } + /// assert_eq!(2, count); + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + pub fn parse(self) -> LowLevelN3Reader { + LowLevelN3Reader { + parser: N3Recognizer::new_parser(self.unchecked, self.base, self.prefixes), + } + } +} + +/// Parses a N3 file from a [`Read`] implementation. Can be built using [`N3Parser::parse_read`]. +/// +/// Count the number of people: +/// ``` +/// use oxrdf::vocab::rdf; +/// use oxrdf::NamedNode; +/// use oxttl::n3::{N3Parser, N3Term}; +/// +/// let file = br#"@base <http://example.com/> . +/// @prefix schema: <http://schema.org/> . +/// <foo> a schema:Person ; +/// schema:name "Foo" . +/// <bar> a schema:Person ; +/// schema:name "Bar" ."#; +/// +/// let rdf_type = N3Term::NamedNode(rdf::TYPE.into_owned()); +/// let schema_person = N3Term::NamedNode(NamedNode::new("http://schema.org/Person")?); +/// let mut count = 0; +/// for triple in N3Parser::new().parse_read(file.as_ref()) { +/// let triple = triple?; +/// if triple.predicate == rdf_type && triple.object == schema_person { +/// count += 1; +/// } +/// } +/// assert_eq!(2, count); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +#[must_use] +pub struct FromReadN3Reader<R: Read> { + inner: FromReadIterator<R, N3Recognizer>, +} + +impl<R: Read> FromReadN3Reader<R> { + /// The list of IRI prefixes considered at the current step of the parsing. + /// + /// This method returns (prefix name, prefix value) tuples. + /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered. + /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned). + /// + /// ``` + /// use oxttl::N3Parser; + /// + /// let file = br#"@base <http://example.com/> . + /// @prefix schema: <http://schema.org/> . + /// <foo> a schema:Person ; + /// schema:name "Foo" ."#; + /// + /// let mut reader = N3Parser::new().parse_read(file.as_ref()); + /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning + /// + /// reader.next().unwrap()?; // We read the first triple + /// assert_eq!( + /// reader.prefixes().collect::<Vec<_>>(), + /// [("schema", "http://schema.org/")] + /// ); // There are now prefixes + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + pub fn prefixes(&self) -> N3PrefixesIter<'_> { + N3PrefixesIter { + inner: self.inner.parser.context.prefixes.iter(), + } + } + + /// The base IRI considered at the current step of the parsing. + /// + /// ``` + /// use oxttl::N3Parser; + /// + /// let file = br#"@base <http://example.com/> . + /// @prefix schema: <http://schema.org/> . + /// <foo> a schema:Person ; + /// schema:name "Foo" ."#; + /// + /// let mut reader = N3Parser::new().parse_read(file.as_ref()); + /// assert!(reader.base_iri().is_none()); // No base at the beginning because none has been given to the parser. + /// + /// reader.next().unwrap()?; // We read the first triple + /// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI. + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + pub fn base_iri(&self) -> Option<&str> { + self.inner + .parser + .context + .lexer_options + .base_iri + .as_ref() + .map(Iri::as_str) + } +} + +impl<R: Read> Iterator for FromReadN3Reader<R> { + type Item = Result<N3Quad, TurtleParseError>; + + fn next(&mut self) -> Option<Self::Item> { + self.inner.next() + } +} + +/// Parses a N3 file from a [`AsyncRead`] implementation. Can be built using [`N3Parser::parse_tokio_async_read`]. +/// +/// Count the number of people: +/// ``` +/// use oxrdf::vocab::rdf; +/// use oxrdf::NamedNode; +/// use oxttl::n3::{N3Parser, N3Term}; +/// +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() -> Result<(), oxttl::TurtleParseError> { +/// let file = br#"@base <http://example.com/> . +/// @prefix schema: <http://schema.org/> . +/// <foo> a schema:Person ; +/// schema:name "Foo" . +/// <bar> a schema:Person ; +/// schema:name "Bar" ."#; +/// +/// let rdf_type = N3Term::NamedNode(rdf::TYPE.into_owned()); +/// let schema_person = N3Term::NamedNode(NamedNode::new_unchecked("http://schema.org/Person")); +/// let mut count = 0; +/// let mut parser = N3Parser::new().parse_tokio_async_read(file.as_ref()); +/// while let Some(triple) = parser.next().await { +/// let triple = triple?; +/// if triple.predicate == rdf_type && triple.object == schema_person { +/// count += 1; +/// } +/// } +/// assert_eq!(2, count); +/// # Ok(()) +/// # } +/// ``` +#[cfg(feature = "async-tokio")] +#[must_use] +pub struct FromTokioAsyncReadN3Reader<R: AsyncRead + Unpin> { + inner: FromTokioAsyncReadIterator<R, N3Recognizer>, +} + +#[cfg(feature = "async-tokio")] +impl<R: AsyncRead + Unpin> FromTokioAsyncReadN3Reader<R> { + /// Reads the next triple or returns `None` if the file is finished. + pub async fn next(&mut self) -> Option<Result<N3Quad, TurtleParseError>> { + Some(self.inner.next().await?.map(Into::into)) + } + + /// The list of IRI prefixes considered at the current step of the parsing. + /// + /// This method returns (prefix name, prefix value) tuples. + /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered. + /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned). + /// + /// ``` + /// use oxttl::N3Parser; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> Result<(), oxttl::TurtleParseError> { + /// let file = br#"@base <http://example.com/> . + /// @prefix schema: <http://schema.org/> . + /// <foo> a schema:Person ; + /// schema:name "Foo" ."#; + /// + /// let mut reader = N3Parser::new().parse_tokio_async_read(file.as_ref()); + /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning + /// + /// reader.next().await.unwrap()?; // We read the first triple + /// assert_eq!( + /// reader.prefixes().collect::<Vec<_>>(), + /// [("schema", "http://schema.org/")] + /// ); // There are now prefixes + /// # Ok(()) + /// # } + /// ``` + pub fn prefixes(&self) -> N3PrefixesIter<'_> { + N3PrefixesIter { + inner: self.inner.parser.context.prefixes.iter(), + } + } + + /// The base IRI considered at the current step of the parsing. + /// + /// ``` + /// use oxttl::N3Parser; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> Result<(), oxttl::TurtleParseError> { + /// let file = br#"@base <http://example.com/> . + /// @prefix schema: <http://schema.org/> . + /// <foo> a schema:Person ; + /// schema:name "Foo" ."#; + /// + /// let mut reader = N3Parser::new().parse_tokio_async_read(file.as_ref()); + /// assert!(reader.base_iri().is_none()); // No base IRI at the beginning + /// + /// reader.next().await.unwrap()?; // We read the first triple + /// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI + /// # Ok(()) + /// # } + /// ``` + pub fn base_iri(&self) -> Option<&str> { + self.inner + .parser + .context + .lexer_options + .base_iri + .as_ref() + .map(Iri::as_str) + } +} + +/// Parses a N3 file by using a low-level API. Can be built using [`N3Parser::parse`]. +/// +/// Count the number of people: +/// ``` +/// use oxrdf::vocab::rdf; +/// use oxrdf::NamedNode; +/// use oxttl::n3::{N3Parser, N3Term}; +/// +/// let file: [&[u8]; 5] = [ +/// b"@base <http://example.com/>", +/// b". @prefix schema: <http://schema.org/> .", +/// b"<foo> a schema:Person", +/// b" ; schema:name \"Foo\" . <bar>", +/// b" a schema:Person ; schema:name \"Bar\" .", +/// ]; +/// +/// let rdf_type = N3Term::NamedNode(rdf::TYPE.into_owned()); +/// let schema_person = N3Term::NamedNode(NamedNode::new("http://schema.org/Person")?); +/// let mut count = 0; +/// let mut parser = N3Parser::new().parse(); +/// let mut file_chunks = file.iter(); +/// while !parser.is_end() { +/// // We feed more data to the parser +/// if let Some(chunk) = file_chunks.next() { +/// parser.extend_from_slice(chunk); +/// } else { +/// parser.end(); // It's finished +/// } +/// // We read as many triples from the parser as possible +/// while let Some(triple) = parser.read_next() { +/// let triple = triple?; +/// if triple.predicate == rdf_type && triple.object == schema_person { +/// count += 1; +/// } +/// } +/// } +/// assert_eq!(2, count); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +pub struct LowLevelN3Reader { + parser: Parser<N3Recognizer>, +} + +impl LowLevelN3Reader { + /// Adds some extra bytes to the parser. Should be called when [`read_next`](Self::read_next) returns [`None`] and there is still unread data. + pub fn extend_from_slice(&mut self, other: &[u8]) { + self.parser.extend_from_slice(other) + } + + /// Tell the parser that the file is finished. + /// + /// This triggers the parsing of the final bytes and might lead [`read_next`](Self::read_next) to return some extra values. + pub fn end(&mut self) { + self.parser.end() + } + + /// Returns if the parsing is finished i.e. [`end`](Self::end) has been called and [`read_next`](Self::read_next) is always going to return `None`. + pub fn is_end(&self) -> bool { + self.parser.is_end() + } + + /// Attempt to parse a new quad from the already provided data. + /// + /// Returns [`None`] if the parsing is finished or more data is required. + /// If it is the case more data should be fed using [`extend_from_slice`](Self::extend_from_slice). + pub fn read_next(&mut self) -> Option<Result<N3Quad, TurtleSyntaxError>> { + self.parser.read_next() + } + + /// The list of IRI prefixes considered at the current step of the parsing. + /// + /// This method returns (prefix name, prefix value) tuples. + /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered. + /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned). + /// + /// ``` + /// use oxttl::N3Parser; + /// + /// let file = br#"@base <http://example.com/> . + /// @prefix schema: <http://schema.org/> . + /// <foo> a schema:Person ; + /// schema:name "Foo" ."#; + /// + /// let mut reader = N3Parser::new().parse(); + /// reader.extend_from_slice(file); + /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning + /// + /// reader.read_next().unwrap()?; // We read the first triple + /// assert_eq!( + /// reader.prefixes().collect::<Vec<_>>(), + /// [("schema", "http://schema.org/")] + /// ); // There are now prefixes + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + pub fn prefixes(&self) -> N3PrefixesIter<'_> { + N3PrefixesIter { + inner: self.parser.context.prefixes.iter(), + } + } + + /// The base IRI considered at the current step of the parsing. + /// + /// ``` + /// use oxttl::N3Parser; + /// + /// let file = br#"@base <http://example.com/> . + /// @prefix schema: <http://schema.org/> . + /// <foo> a schema:Person ; + /// schema:name "Foo" ."#; + /// + /// let mut reader = N3Parser::new().parse(); + /// reader.extend_from_slice(file); + /// assert!(reader.base_iri().is_none()); // No base IRI at the beginning + /// + /// reader.read_next().unwrap()?; // We read the first triple + /// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + pub fn base_iri(&self) -> Option<&str> { + self.parser + .context + .lexer_options + .base_iri + .as_ref() + .map(Iri::as_str) + } +} + +#[derive(Clone)] +enum Predicate { + Regular(N3Term), + Inverted(N3Term), +} + +struct N3Recognizer { + stack: Vec<N3State>, + terms: Vec<N3Term>, + predicates: Vec<Predicate>, + contexts: Vec<BlankNode>, +} + +struct N3RecognizerContext { + lexer_options: N3LexerOptions, + prefixes: HashMap<String, Iri<String>>, +} + +impl RuleRecognizer for N3Recognizer { + type TokenRecognizer = N3Lexer; + type Output = N3Quad; + type Context = N3RecognizerContext; + + fn error_recovery_state(mut self) -> Self { + self.stack.clear(); + self.terms.clear(); + self.predicates.clear(); + self.contexts.clear(); + self + } + + fn recognize_next( + mut self, + token: N3Token<'_>, + context: &mut N3RecognizerContext, + results: &mut Vec<N3Quad>, + errors: &mut Vec<RuleRecognizerError>, + ) -> Self { + while let Some(rule) = self.stack.pop() { + match rule { + // [1] n3Doc ::= ( ( n3Statement ".") | sparqlDirective) * + // [2] n3Statement ::= n3Directive | triples + // [3] n3Directive ::= prefixID | base + // [4] sparqlDirective ::= sparqlBase | sparqlPrefix + // [5] sparqlBase ::= BASE IRIREF + // [6] sparqlPrefix ::= PREFIX PNAME_NS IRIREF + // [7] prefixID ::= "@prefix" PNAME_NS IRIREF + // [8] base ::= "@base" IRIREF + N3State::N3Doc => { + self.stack.push(N3State::N3Doc); + match token { + N3Token::PlainKeyword(k) if k.eq_ignore_ascii_case("base") => { + self.stack.push(N3State::BaseExpectIri); + return self; + } + N3Token::PlainKeyword(k) if k.eq_ignore_ascii_case("prefix") => { + self.stack.push(N3State::PrefixExpectPrefix); + return self; + } + N3Token::LangTag("prefix") => { + self.stack.push(N3State::N3DocExpectDot); + self.stack.push(N3State::PrefixExpectPrefix); + return self; + } + N3Token::LangTag("base") => { + self.stack.push(N3State::N3DocExpectDot); + self.stack.push(N3State::BaseExpectIri); + return self; + } + _ => { + self.stack.push(N3State::N3DocExpectDot); + self.stack.push(N3State::Triples); + } + } + } + N3State::N3DocExpectDot => { + if token == N3Token::Punctuation(".") { + return self; + } + errors.push("A dot is expected at the end of N3 statements".into()); + } + N3State::BaseExpectIri => return if let N3Token::IriRef(iri) = token { + context.lexer_options.base_iri = Some(Iri::parse_unchecked(iri)); + self + } else { + self.error(errors, "The BASE keyword should be followed by an IRI") + }, + N3State::PrefixExpectPrefix => return match token { + N3Token::PrefixedName { prefix, local, .. } if local.is_empty() => { + self.stack.push(N3State::PrefixExpectIri { name: prefix.to_owned() }); + self + } + _ => { + self.error(errors, "The PREFIX keyword should be followed by a prefix like 'ex:'") + } + }, + N3State::PrefixExpectIri { name } => return if let N3Token::IriRef(iri) = token { + context.prefixes.insert(name, Iri::parse_unchecked(iri)); + self + } else { self.error(errors, "The PREFIX declaration should be followed by a prefix and its value as an IRI") + }, + // [9] triples ::= subject predicateObjectList? + N3State::Triples => { + self.stack.push(N3State::TriplesMiddle); + self.stack.push(N3State::Path); + } + N3State::TriplesMiddle => if matches!(token, N3Token::Punctuation("." | "]" | "}" | ")")) {} else { + self.stack.push(N3State::TriplesEnd); + self.stack.push(N3State::PredicateObjectList); + }, + N3State::TriplesEnd => { + self.terms.pop(); + } + // [10] predicateObjectList ::= verb objectList ( ";" ( verb objectList) ? ) * + N3State::PredicateObjectList => { + self.stack.push(N3State::PredicateObjectListEnd); + self.stack.push(N3State::ObjectsList); + self.stack.push(N3State::Verb); + } + N3State::PredicateObjectListEnd => { + self.predicates.pop(); + if token == N3Token::Punctuation(";") { + self.stack.push(N3State::PredicateObjectListPossibleContinuation); + return self; + } + } + N3State::PredicateObjectListPossibleContinuation => if token == N3Token::Punctuation(";") { + self.stack.push(N3State::PredicateObjectListPossibleContinuation); + return self; + } else if matches!(token, N3Token::Punctuation(";" | "." | "}" | "]" | ")")) {} else { + self.stack.push(N3State::PredicateObjectListEnd); + self.stack.push(N3State::ObjectsList); + self.stack.push(N3State::Verb); + }, + // [11] objectList ::= object ( "," object) * + N3State::ObjectsList => { + self.stack.push(N3State::ObjectsListEnd); + self.stack.push(N3State::Path); + } + N3State::ObjectsListEnd => { + let object = self.terms.pop().unwrap(); + let subject = self.terms.last().unwrap().clone(); + results.push(match self.predicates.last().unwrap().clone() { + Predicate::Regular(predicate) => self.quad( + subject, + predicate, + object, + ), + Predicate::Inverted(predicate) => self.quad( + object, + predicate, + subject, + ) + }); + if token == N3Token::Punctuation(",") { + self.stack.push(N3State::ObjectsListEnd); + self.stack.push(N3State::Path); + return self; + } + } + // [12] verb ::= predicate | "a" | ( "has" expression) | ( "is" expression "of") | "=" | "<=" | "=>" + // [14] predicate ::= expression | ( "<-" expression) + N3State::Verb => match token { + N3Token::PlainKeyword("a") => { + self.predicates.push(Predicate::Regular(rdf::TYPE.into())); + return self; + } + N3Token::PlainKeyword("has") => { + self.stack.push(N3State::AfterRegularVerb); + self.stack.push(N3State::Path); + return self; + } + N3Token::PlainKeyword("is") => { + self.stack.push(N3State::AfterVerbIs); + self.stack.push(N3State::Path); + return self; + } + N3Token::Punctuation("=") => { + self.predicates.push(Predicate::Regular(NamedNode::new_unchecked("http://www.w3.org/2002/07/owl#sameAs").into())); + return self; + } + N3Token::Punctuation("=>") => { + self.predicates.push(Predicate::Regular(NamedNode::new_unchecked("http://www.w3.org/2000/10/swap/log#implies").into())); + return self; + } + N3Token::Punctuation("<=") => { + self.predicates.push(Predicate::Inverted(NamedNode::new_unchecked("http://www.w3.org/2000/10/swap/log#implies").into())); + return self; + } + N3Token::Punctuation("<-") => { + self.stack.push(N3State::AfterInvertedVerb); + self.stack.push(N3State::Path); + return self; + } + _ => { + self.stack.push(N3State::AfterRegularVerb); + self.stack.push(N3State::Path); + } + } + N3State::AfterRegularVerb => { + self.predicates.push(Predicate::Regular(self.terms.pop().unwrap())); + } + N3State::AfterInvertedVerb => { + self.predicates.push(Predicate::Inverted(self.terms.pop().unwrap())); + } + N3State::AfterVerbIs => return match token { + N3Token::PlainKeyword("of") => { + self.predicates.push(Predicate::Inverted(self.terms.pop().unwrap())); + self + } + _ => { + self.error(errors, "The keyword 'is' should be followed by a predicate then by the keyword 'of'") + } + }, + // [13] subject ::= expression + // [15] object ::= expression + // [16] expression ::= path + // [17] path ::= pathItem ( ( "!" path) | ( "^" path) ) ? + N3State::Path => { + self.stack.push(N3State::PathFollowUp); + self.stack.push(N3State::PathItem); + } + N3State::PathFollowUp => match token { + N3Token::Punctuation("!") => { + self.stack.push(N3State::PathAfterIndicator { is_inverse: false }); + self.stack.push(N3State::PathItem); + return self; + } + N3Token::Punctuation("^") => { + self.stack.push(N3State::PathAfterIndicator { is_inverse: true }); + self.stack.push(N3State::PathItem); + return self; + } + _ => () + }, + N3State::PathAfterIndicator { is_inverse } => { + let predicate = self.terms.pop().unwrap(); + let previous = self.terms.pop().unwrap(); + let current = BlankNode::default(); + results.push(if is_inverse { self.quad(current.clone(), predicate, previous) } else { self.quad(previous, predicate, current.clone()) }); + self.terms.push(current.into()); + self.stack.push(N3State::PathFollowUp); + } + // [18] pathItem ::= iri | blankNode | quickVar | collection | blankNodePropertyList | iriPropertyList | literal | formula + // [19] literal ::= rdfLiteral | numericLiteral | BOOLEAN_LITERAL + // [20] blankNodePropertyList ::= "[" predicateObjectList "]" + // [21] iriPropertyList ::= IPLSTART iri predicateObjectList "]" + // [22] collection ::= "(" object* ")" + // [23] formula ::= "{" formulaContent? "}" + // [25] numericLiteral ::= DOUBLE | DECIMAL | INTEGER + // [26] rdfLiteral ::= STRING ( LANGTAG | ( "^^" iri) ) ? + // [27] iri ::= IRIREF | prefixedName + // [28] prefixedName ::= PNAME_LN | PNAME_NS + // [29] blankNode ::= BLANK_NODE_LABEL | ANON + // [30] quickVar ::= QUICK_VAR_NAME + N3State::PathItem => { + return match token { + N3Token::IriRef(iri) => { + self.terms.push(NamedNode::new_unchecked(iri).into()); + self + } + N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &context.prefixes) { + Ok(t) => { + self.terms.push(t.into()); + self + } + Err(e) => self.error(errors, e) + } + N3Token::BlankNodeLabel(bnode) => { + self.terms.push(BlankNode::new_unchecked(bnode).into()); + self + } + N3Token::Variable(name) => { + self.terms.push(Variable::new_unchecked(name).into()); + self + } + N3Token::Punctuation("[") => { + self.stack.push(N3State::PropertyListMiddle); + self + } + N3Token::Punctuation("(") => { + self.stack.push(N3State::CollectionBeginning); + self + } + N3Token::String(value) => { + self.stack.push(N3State::LiteralPossibleSuffix { value }); + self + } + N3Token::Integer(v) => { + self.terms.push(Literal::new_typed_literal(v, xsd::INTEGER).into()); + self + } + N3Token::Decimal(v) => { + self.terms.push(Literal::new_typed_literal(v, xsd::DECIMAL).into()); + self + } + N3Token::Double(v) => { + self.terms.push(Literal::new_typed_literal(v, xsd::DOUBLE).into()); + self + } + N3Token::PlainKeyword("true") => { + self.terms.push(Literal::new_typed_literal("true", xsd::BOOLEAN).into()); + self + } + N3Token::PlainKeyword("false") => { + self.terms.push(Literal::new_typed_literal("false", xsd::BOOLEAN).into()); + self + } + N3Token::Punctuation("{") => { + self.contexts.push(BlankNode::default()); + self.stack.push(N3State::FormulaContent); + self + } + _ => + self.error(errors, "TOKEN is not a valid RDF value") + + } + } + N3State::PropertyListMiddle => match token { + N3Token::Punctuation("]") => { + self.terms.push(BlankNode::default().into()); + return self; + } + N3Token::PlainKeyword("id") => { + self.stack.push(N3State::IriPropertyList); + return self; + } + _ => { + self.terms.push(BlankNode::default().into()); + self.stack.push(N3State::PropertyListEnd); + self.stack.push(N3State::PredicateObjectList); + } + } + N3State::PropertyListEnd => if token == N3Token::Punctuation("]") { + return self; + } else { + errors.push("blank node property lists should end with a ']'".into()); + } + N3State::IriPropertyList => return match token { + N3Token::IriRef(id) => { + self.terms.push(NamedNode::new_unchecked(id).into()); + self.stack.push(N3State::PropertyListEnd); + self.stack.push(N3State::PredicateObjectList); + self + } + N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &context.prefixes) { + Ok(t) => { + self.terms.push(t.into()); + self.stack.push(N3State::PropertyListEnd); + self.stack.push(N3State::PredicateObjectList); + self + } + Err(e) => { + self.error(errors, e) + } + } + _ => { + self.error(errors, "The '[ id' construction should be followed by an IRI") + } + }, + N3State::CollectionBeginning => if let N3Token::Punctuation(")") = token { + self.terms.push(rdf::NIL.into()); + return self; + } else { + let root = BlankNode::default(); + self.terms.push(root.clone().into()); + self.terms.push(root.into()); + self.stack.push(N3State::CollectionPossibleEnd); + self.stack.push(N3State::Path); + }, + N3State::CollectionPossibleEnd => { + let value = self.terms.pop().unwrap(); + let old = self.terms.pop().unwrap(); + results.push(self.quad( + old.clone(), + rdf::FIRST, + value, + )); + if let N3Token::Punctuation(")") = token { + results.push(self.quad( + old, + rdf::REST, + rdf::NIL, + )); + return self; + } + let new = BlankNode::default(); + results.push(self.quad( + old, + rdf::REST, + new.clone(), + )); + self.terms.push(new.into()); + self.stack.push(N3State::CollectionPossibleEnd); + self.stack.push(N3State::Path); + } + N3State::LiteralPossibleSuffix { value } => { + match token { + N3Token::LangTag(lang) => { + self.terms.push(Literal::new_language_tagged_literal_unchecked(value, lang.to_ascii_lowercase()).into()); + return self; + } + N3Token::Punctuation("^^") => { + self.stack.push(N3State::LiteralExpectDatatype { value }); + return self; + } + _ => { + self.terms.push(Literal::new_simple_literal(value).into()); + } + } + } + N3State::LiteralExpectDatatype { value } => { + match token { + N3Token::IriRef(datatype) => { + self.terms.push(Literal::new_typed_literal(value, NamedNode::new_unchecked(datatype)).into()); + return self; + } + N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &context.prefixes) { + Ok(datatype) => { + self.terms.push(Literal::new_typed_literal(value, datatype).into()); + return self; + } + Err(e) => { + return self.error(errors, e); + } + } + _ => { + errors.push("Expecting a datatype IRI after '^^, found TOKEN".into()); + self.stack.clear(); + } + } + } + // [24] formulaContent ::= ( n3Statement ( "." formulaContent? ) ? ) | ( sparqlDirective formulaContent? ) + N3State::FormulaContent => { + match token { + N3Token::Punctuation("}") => { + self.terms.push(self.contexts.pop().unwrap().into()); + return self; + } + N3Token::PlainKeyword(k)if k.eq_ignore_ascii_case("base") => { + self.stack.push(N3State::FormulaContent); + self.stack.push(N3State::BaseExpectIri); + return self; + } + N3Token::PlainKeyword(k)if k.eq_ignore_ascii_case("prefix") => { + self.stack.push(N3State::FormulaContent); + self.stack.push(N3State::PrefixExpectPrefix); + return self; + } + N3Token::LangTag("prefix") => { + self.stack.push(N3State::FormulaContentExpectDot); + self.stack.push(N3State::PrefixExpectPrefix); + return self; + } + N3Token::LangTag("base") => { + self.stack.push(N3State::FormulaContentExpectDot); + self.stack.push(N3State::BaseExpectIri); + return self; + } + _ => { + self.stack.push(N3State::FormulaContentExpectDot); + self.stack.push(N3State::Triples); + } + } + } + N3State::FormulaContentExpectDot => { + match token { + N3Token::Punctuation("}") => { + self.terms.push(self.contexts.pop().unwrap().into()); + return self; + } + N3Token::Punctuation(".") => { + self.stack.push(N3State::FormulaContent); + return self; + } + _ => { + errors.push("A dot is expected at the end of N3 statements".into()); + self.stack.push(N3State::FormulaContent); + } + } + } + } + } + // Empty stack + if token == N3Token::Punctuation(".") { + self.stack.push(N3State::N3Doc); + self + } else { + self + } + } + + fn recognize_end( + self, + _state: &mut N3RecognizerContext, + _results: &mut Vec<Self::Output>, + errors: &mut Vec<RuleRecognizerError>, + ) { + match &*self.stack { + [] | [N3State::N3Doc] => (), + _ => errors.push("Unexpected end".into()), // TODO + } + } + + fn lexer_options(context: &N3RecognizerContext) -> &N3LexerOptions { + &context.lexer_options + } +} + +impl N3Recognizer { + pub fn new_parser( + unchecked: bool, + base_iri: Option<Iri<String>>, + prefixes: HashMap<String, Iri<String>>, + ) -> Parser<Self> { + Parser::new( + Lexer::new( + N3Lexer::new(N3LexerMode::N3, unchecked), + MIN_BUFFER_SIZE, + MAX_BUFFER_SIZE, + true, + Some(b"#"), + ), + Self { + stack: vec![N3State::N3Doc], + terms: Vec::new(), + predicates: Vec::new(), + contexts: Vec::new(), + }, + N3RecognizerContext { + lexer_options: N3LexerOptions { base_iri }, + prefixes, + }, + ) + } + + #[must_use] + fn error( + mut self, + errors: &mut Vec<RuleRecognizerError>, + msg: impl Into<RuleRecognizerError>, + ) -> Self { + errors.push(msg.into()); + self.stack.clear(); + self + } + + fn quad( + &self, + subject: impl Into<N3Term>, + predicate: impl Into<N3Term>, + object: impl Into<N3Term>, + ) -> N3Quad { + N3Quad { + subject: subject.into(), + predicate: predicate.into(), + object: object.into(), + graph_name: self + .contexts + .last() + .map_or(GraphName::DefaultGraph, |g| g.clone().into()), + } + } +} + +#[derive(Debug)] +enum N3State { + N3Doc, + N3DocExpectDot, + BaseExpectIri, + PrefixExpectPrefix, + PrefixExpectIri { name: String }, + Triples, + TriplesMiddle, + TriplesEnd, + PredicateObjectList, + PredicateObjectListEnd, + PredicateObjectListPossibleContinuation, + ObjectsList, + ObjectsListEnd, + Verb, + AfterRegularVerb, + AfterInvertedVerb, + AfterVerbIs, + Path, + PathFollowUp, + PathAfterIndicator { is_inverse: bool }, + PathItem, + PropertyListMiddle, + PropertyListEnd, + IriPropertyList, + CollectionBeginning, + CollectionPossibleEnd, + LiteralPossibleSuffix { value: String }, + LiteralExpectDatatype { value: String }, + FormulaContent, + FormulaContentExpectDot, +} + +/// Iterator on the file prefixes. +/// +/// See [`LowLevelN3Reader::prefixes`]. +pub struct N3PrefixesIter<'a> { + inner: Iter<'a, String, Iri<String>>, +} + +impl<'a> Iterator for N3PrefixesIter<'a> { + type Item = (&'a str, &'a str); + + #[inline] + fn next(&mut self) -> Option<Self::Item> { + let (key, value) = self.inner.next()?; + Some((key.as_str(), value.as_str())) + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + self.inner.size_hint() + } +} diff --git a/ng-oxigraph/src/oxttl/nquads.rs b/ng-oxigraph/src/oxttl/nquads.rs new file mode 100644 index 0000000..8b62367 --- /dev/null +++ b/ng-oxigraph/src/oxttl/nquads.rs @@ -0,0 +1,564 @@ +//! A [N-Quads](https://www.w3.org/TR/n-quads/) streaming parser implemented by [`NQuadsParser`] +//! and a serializer implemented by [`NQuadsSerializer`]. + +use crate::oxrdf::{Quad, QuadRef}; +use crate::oxttl::line_formats::NQuadsRecognizer; +#[cfg(feature = "async-tokio")] +use crate::oxttl::toolkit::FromTokioAsyncReadIterator; +use crate::oxttl::toolkit::{FromReadIterator, Parser, TurtleParseError, TurtleSyntaxError}; +use std::io::{self, Read, Write}; +#[cfg(feature = "async-tokio")] +use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt}; + +/// A [N-Quads](https://www.w3.org/TR/n-quads/) streaming parser. +/// +/// Support for [N-Quads-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-quads-star) is available behind the `rdf-star` feature and the [`NQuadsParser::with_quoted_triples`] option. +/// +/// Count the number of people: +/// ``` +/// use oxrdf::{NamedNodeRef, vocab::rdf}; +/// use oxttl::NQuadsParser; +/// +/// let file = br#"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> . +/// <http://example.com/foo> <http://schema.org/name> "Foo" . +/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> . +/// <http://example.com/bar> <http://schema.org/name> "Bar" ."#; +/// +/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; +/// let mut count = 0; +/// for quad in NQuadsParser::new().parse_read(file.as_ref()) { +/// let quad = quad?; +/// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() { +/// count += 1; +/// } +/// } +/// assert_eq!(2, count); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +#[derive(Default)] +#[must_use] +pub struct NQuadsParser { + unchecked: bool, + #[cfg(feature = "rdf-star")] + with_quoted_triples: bool, +} + +impl NQuadsParser { + /// Builds a new [`NQuadsParser`]. + #[inline] + pub fn new() -> Self { + Self::default() + } + + /// Assumes the file is valid to make parsing faster. + /// + /// It will skip some validations. + /// + /// Note that if the file is actually not valid, then broken RDF might be emitted by the parser. + #[inline] + pub fn unchecked(mut self) -> Self { + self.unchecked = true; + self + } + + /// Enables [N-Quads-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-quads-star). + #[cfg(feature = "rdf-star")] + #[inline] + pub fn with_quoted_triples(mut self) -> Self { + self.with_quoted_triples = true; + self + } + + /// Parses a N-Quads file from a [`Read`] implementation. + /// + /// Count the number of people: + /// ``` + /// use oxrdf::{NamedNodeRef, vocab::rdf}; + /// use oxttl::NQuadsParser; + /// + /// let file = br#"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> . + /// <http://example.com/foo> <http://schema.org/name> "Foo" . + /// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> . + /// <http://example.com/bar> <http://schema.org/name> "Bar" ."#; + /// + /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; + /// let mut count = 0; + /// for quad in NQuadsParser::new().parse_read(file.as_ref()) { + /// let quad = quad?; + /// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() { + /// count += 1; + /// } + /// } + /// assert_eq!(2, count); + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + pub fn parse_read<R: Read>(self, read: R) -> FromReadNQuadsReader<R> { + FromReadNQuadsReader { + inner: self.parse().parser.parse_read(read), + } + } + + /// Parses a N-Quads file from a [`AsyncRead`] implementation. + /// + /// Count the number of people: + /// ``` + /// use oxrdf::{NamedNodeRef, vocab::rdf}; + /// use oxttl::NQuadsParser; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> Result<(), oxttl::TurtleParseError> { + /// let file = br#"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> . + /// <http://example.com/foo> <http://schema.org/name> "Foo" . + /// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> . + /// <http://example.com/bar> <http://schema.org/name> "Bar" ."#; + /// + /// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person"); + /// let mut count = 0; + /// let mut parser = NQuadsParser::new().parse_tokio_async_read(file.as_ref()); + /// while let Some(triple) = parser.next().await { + /// let triple = triple?; + /// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { + /// count += 1; + /// } + /// } + /// assert_eq!(2, count); + /// # Ok(()) + /// # } + /// ``` + #[cfg(feature = "async-tokio")] + pub fn parse_tokio_async_read<R: AsyncRead + Unpin>( + self, + read: R, + ) -> FromTokioAsyncReadNQuadsReader<R> { + FromTokioAsyncReadNQuadsReader { + inner: self.parse().parser.parse_tokio_async_read(read), + } + } + + /// Allows to parse a N-Quads file by using a low-level API. + /// + /// Count the number of people: + /// ``` + /// use oxrdf::{NamedNodeRef, vocab::rdf}; + /// use oxttl::NQuadsParser; + /// + /// let file: [&[u8]; 4] = [ + /// b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n", + /// b"<http://example.com/foo> <http://schema.org/name> \"Foo\" .\n", + /// b"<http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n", + /// b"<http://example.com/bar> <http://schema.org/name> \"Bar\" .\n" + /// ]; + /// + /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; + /// let mut count = 0; + /// let mut parser = NQuadsParser::new().parse(); + /// let mut file_chunks = file.iter(); + /// while !parser.is_end() { + /// // We feed more data to the parser + /// if let Some(chunk) = file_chunks.next() { + /// parser.extend_from_slice(chunk); + /// } else { + /// parser.end(); // It's finished + /// } + /// // We read as many quads from the parser as possible + /// while let Some(quad) = parser.read_next() { + /// let quad = quad?; + /// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() { + /// count += 1; + /// } + /// } + /// } + /// assert_eq!(2, count); + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + #[allow(clippy::unused_self)] + pub fn parse(self) -> LowLevelNQuadsReader { + LowLevelNQuadsReader { + parser: NQuadsRecognizer::new_parser( + true, + #[cfg(feature = "rdf-star")] + self.with_quoted_triples, + self.unchecked, + ), + } + } +} + +/// Parses a N-Quads file from a [`Read`] implementation. Can be built using [`NQuadsParser::parse_read`]. +/// +/// Count the number of people: +/// ``` +/// use oxrdf::{NamedNodeRef, vocab::rdf}; +/// use oxttl::NQuadsParser; +/// +/// let file = br#"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> . +/// <http://example.com/foo> <http://schema.org/name> "Foo" . +/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> . +/// <http://example.com/bar> <http://schema.org/name> "Bar" ."#; +/// +/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; +/// let mut count = 0; +/// for quad in NQuadsParser::new().parse_read(file.as_ref()) { +/// let quad = quad?; +/// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() { +/// count += 1; +/// } +/// } +/// assert_eq!(2, count); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +#[must_use] +pub struct FromReadNQuadsReader<R: Read> { + inner: FromReadIterator<R, NQuadsRecognizer>, +} + +impl<R: Read> Iterator for FromReadNQuadsReader<R> { + type Item = Result<Quad, TurtleParseError>; + + fn next(&mut self) -> Option<Self::Item> { + self.inner.next() + } +} + +/// Parses a N-Quads file from a [`AsyncRead`] implementation. Can be built using [`NQuadsParser::parse_tokio_async_read`]. +/// +/// Count the number of people: +/// ``` +/// use oxrdf::{NamedNodeRef, vocab::rdf}; +/// use oxttl::NQuadsParser; +/// +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() -> Result<(), oxttl::TurtleParseError> { +/// let file = br#"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> . +/// <http://example.com/foo> <http://schema.org/name> "Foo" . +/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> . +/// <http://example.com/bar> <http://schema.org/name> "Bar" ."#; +/// +/// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person"); +/// let mut count = 0; +/// let mut parser = NQuadsParser::new().parse_tokio_async_read(file.as_ref()); +/// while let Some(triple) = parser.next().await { +/// let triple = triple?; +/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { +/// count += 1; +/// } +/// } +/// assert_eq!(2, count); +/// # Ok(()) +/// # } +/// ``` +#[cfg(feature = "async-tokio")] +#[must_use] +pub struct FromTokioAsyncReadNQuadsReader<R: AsyncRead + Unpin> { + inner: FromTokioAsyncReadIterator<R, NQuadsRecognizer>, +} + +#[cfg(feature = "async-tokio")] +impl<R: AsyncRead + Unpin> FromTokioAsyncReadNQuadsReader<R> { + /// Reads the next triple or returns `None` if the file is finished. + pub async fn next(&mut self) -> Option<Result<Quad, TurtleParseError>> { + Some(self.inner.next().await?.map(Into::into)) + } +} + +/// Parses a N-Quads file by using a low-level API. Can be built using [`NQuadsParser::parse`]. +/// +/// Count the number of people: +/// ``` +/// use oxrdf::{NamedNodeRef, vocab::rdf}; +/// use oxttl::NQuadsParser; +/// +/// let file: [&[u8]; 4] = [ +/// b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n", +/// b"<http://example.com/foo> <http://schema.org/name> \"Foo\" .\n", +/// b"<http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n", +/// b"<http://example.com/bar> <http://schema.org/name> \"Bar\" .\n" +/// ]; +/// +/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; +/// let mut count = 0; +/// let mut parser = NQuadsParser::new().parse(); +/// let mut file_chunks = file.iter(); +/// while !parser.is_end() { +/// // We feed more data to the parser +/// if let Some(chunk) = file_chunks.next() { +/// parser.extend_from_slice(chunk); +/// } else { +/// parser.end(); // It's finished +/// } +/// // We read as many quads from the parser as possible +/// while let Some(quad) = parser.read_next() { +/// let quad = quad?; +/// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() { +/// count += 1; +/// } +/// } +/// } +/// assert_eq!(2, count); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +pub struct LowLevelNQuadsReader { + parser: Parser<NQuadsRecognizer>, +} + +impl LowLevelNQuadsReader { + /// Adds some extra bytes to the parser. Should be called when [`read_next`](Self::read_next) returns [`None`] and there is still unread data. + pub fn extend_from_slice(&mut self, other: &[u8]) { + self.parser.extend_from_slice(other) + } + + /// Tell the parser that the file is finished. + /// + /// This triggers the parsing of the final bytes and might lead [`read_next`](Self::read_next) to return some extra values. + pub fn end(&mut self) { + self.parser.end() + } + + /// Returns if the parsing is finished i.e. [`end`](Self::end) has been called and [`read_next`](Self::read_next) is always going to return `None`. + pub fn is_end(&self) -> bool { + self.parser.is_end() + } + + /// Attempt to parse a new quad from the already provided data. + /// + /// Returns [`None`] if the parsing is finished or more data is required. + /// If it is the case more data should be fed using [`extend_from_slice`](Self::extend_from_slice). + pub fn read_next(&mut self) -> Option<Result<Quad, TurtleSyntaxError>> { + self.parser.read_next() + } +} + +/// A [N-Quads](https://www.w3.org/TR/n-quads/) serializer. +/// +/// Support for [N-Quads-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-quads-star) is available behind the `rdf-star` feature. +/// +/// ``` +/// use oxrdf::{NamedNodeRef, QuadRef}; +/// use oxttl::NQuadsSerializer; +/// +/// let mut writer = NQuadsSerializer::new().serialize_to_write(Vec::new()); +/// writer.write_quad(QuadRef::new( +/// NamedNodeRef::new("http://example.com#me")?, +/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, +/// NamedNodeRef::new("http://schema.org/Person")?, +/// NamedNodeRef::new("http://example.com")?, +/// ))?; +/// assert_eq!( +/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://example.com> .\n", +/// writer.finish().as_slice() +/// ); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +#[derive(Default)] +#[must_use] +pub struct NQuadsSerializer; + +impl NQuadsSerializer { + /// Builds a new [`NQuadsSerializer`]. + #[inline] + pub fn new() -> Self { + Self + } + + /// Writes a N-Quads file to a [`Write`] implementation. + /// + /// ``` + /// use oxrdf::{NamedNodeRef, QuadRef}; + /// use oxttl::NQuadsSerializer; + /// + /// let mut writer = NQuadsSerializer::new().serialize_to_write(Vec::new()); + /// writer.write_quad(QuadRef::new( + /// NamedNodeRef::new("http://example.com#me")?, + /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, + /// NamedNodeRef::new("http://schema.org/Person")?, + /// NamedNodeRef::new("http://example.com")?, + /// ))?; + /// assert_eq!( + /// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://example.com> .\n", + /// writer.finish().as_slice() + /// ); + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + pub fn serialize_to_write<W: Write>(self, write: W) -> ToWriteNQuadsWriter<W> { + ToWriteNQuadsWriter { + write, + writer: self.serialize(), + } + } + + /// Writes a N-Quads file to a [`AsyncWrite`] implementation. + /// + /// ``` + /// use oxrdf::{NamedNodeRef, QuadRef}; + /// use oxttl::NQuadsSerializer; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> std::io::Result<()> { + /// let mut writer = NQuadsSerializer::new().serialize_to_tokio_async_write(Vec::new()); + /// writer.write_quad(QuadRef::new( + /// NamedNodeRef::new_unchecked("http://example.com#me"), + /// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), + /// NamedNodeRef::new_unchecked("http://schema.org/Person"), + /// NamedNodeRef::new_unchecked("http://example.com"), + /// )).await?; + /// assert_eq!( + /// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://example.com> .\n", + /// writer.finish().as_slice() + /// ); + /// # Ok(()) + /// # } + /// ``` + #[cfg(feature = "async-tokio")] + pub fn serialize_to_tokio_async_write<W: AsyncWrite + Unpin>( + self, + write: W, + ) -> ToTokioAsyncWriteNQuadsWriter<W> { + ToTokioAsyncWriteNQuadsWriter { + write, + writer: self.serialize(), + buffer: Vec::new(), + } + } + + /// Builds a low-level N-Quads writer. + /// + /// ``` + /// use oxrdf::{NamedNodeRef, QuadRef}; + /// use oxttl::NQuadsSerializer; + /// + /// let mut buf = Vec::new(); + /// let mut writer = NQuadsSerializer::new().serialize(); + /// writer.write_quad(QuadRef::new( + /// NamedNodeRef::new("http://example.com#me")?, + /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, + /// NamedNodeRef::new("http://schema.org/Person")?, + /// NamedNodeRef::new("http://example.com")?, + /// ), &mut buf)?; + /// assert_eq!( + /// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://example.com> .\n", + /// buf.as_slice() + /// ); + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + #[allow(clippy::unused_self)] + pub fn serialize(self) -> LowLevelNQuadsWriter { + LowLevelNQuadsWriter + } +} + +/// Writes a N-Quads file to a [`Write`] implementation. Can be built using [`NQuadsSerializer::serialize_to_write`]. +/// +/// ``` +/// use oxrdf::{NamedNodeRef, QuadRef}; +/// use oxttl::NQuadsSerializer; +/// +/// let mut writer = NQuadsSerializer::new().serialize_to_write(Vec::new()); +/// writer.write_quad(QuadRef::new( +/// NamedNodeRef::new("http://example.com#me")?, +/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, +/// NamedNodeRef::new("http://schema.org/Person")?, +/// NamedNodeRef::new("http://example.com")?, +/// ))?; +/// assert_eq!( +/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://example.com> .\n", +/// writer.finish().as_slice() +/// ); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +#[must_use] +pub struct ToWriteNQuadsWriter<W: Write> { + write: W, + writer: LowLevelNQuadsWriter, +} + +impl<W: Write> ToWriteNQuadsWriter<W> { + /// Writes an extra quad. + pub fn write_quad<'a>(&mut self, q: impl Into<QuadRef<'a>>) -> io::Result<()> { + self.writer.write_quad(q, &mut self.write) + } + + /// Ends the write process and returns the underlying [`Write`]. + pub fn finish(self) -> W { + self.write + } +} + +/// Writes a N-Quads file to a [`AsyncWrite`] implementation. Can be built using [`NQuadsSerializer::serialize_to_tokio_async_write`]. +/// +/// ``` +/// use oxrdf::{NamedNodeRef, QuadRef}; +/// use oxttl::NQuadsSerializer; +/// +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() -> std::io::Result<()> { +/// let mut writer = NQuadsSerializer::new().serialize_to_tokio_async_write(Vec::new()); +/// writer.write_quad(QuadRef::new( +/// NamedNodeRef::new_unchecked("http://example.com#me"), +/// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), +/// NamedNodeRef::new_unchecked("http://schema.org/Person"), +/// NamedNodeRef::new_unchecked("http://example.com"), +/// )).await?; +/// assert_eq!( +/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://example.com> .\n", +/// writer.finish().as_slice() +/// ); +/// # Ok(()) +/// # } +/// ``` +#[cfg(feature = "async-tokio")] +#[must_use] +pub struct ToTokioAsyncWriteNQuadsWriter<W: AsyncWrite + Unpin> { + write: W, + writer: LowLevelNQuadsWriter, + buffer: Vec<u8>, +} + +#[cfg(feature = "async-tokio")] +impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteNQuadsWriter<W> { + /// Writes an extra quad. + pub async fn write_quad<'a>(&mut self, q: impl Into<QuadRef<'a>>) -> io::Result<()> { + self.writer.write_quad(q, &mut self.buffer)?; + self.write.write_all(&self.buffer).await?; + self.buffer.clear(); + Ok(()) + } + + /// Ends the write process and returns the underlying [`Write`]. + pub fn finish(self) -> W { + self.write + } +} + +/// Writes a N-Quads file by using a low-level API. Can be built using [`NQuadsSerializer::serialize`]. +/// +/// ``` +/// use oxrdf::{NamedNodeRef, QuadRef}; +/// use oxttl::NQuadsSerializer; +/// +/// let mut buf = Vec::new(); +/// let mut writer = NQuadsSerializer::new().serialize(); +/// writer.write_quad(QuadRef::new( +/// NamedNodeRef::new("http://example.com#me")?, +/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, +/// NamedNodeRef::new("http://schema.org/Person")?, +/// NamedNodeRef::new("http://example.com")?, +/// ), &mut buf)?; +/// assert_eq!( +/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://example.com> .\n", +/// buf.as_slice() +/// ); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +pub struct LowLevelNQuadsWriter; + +impl LowLevelNQuadsWriter { + /// Writes an extra quad. + #[allow(clippy::unused_self)] + pub fn write_quad<'a>( + &mut self, + q: impl Into<QuadRef<'a>>, + mut write: impl Write, + ) -> io::Result<()> { + writeln!(write, "{} .", q.into()) + } +} diff --git a/ng-oxigraph/src/oxttl/ntriples.rs b/ng-oxigraph/src/oxttl/ntriples.rs new file mode 100644 index 0000000..271b920 --- /dev/null +++ b/ng-oxigraph/src/oxttl/ntriples.rs @@ -0,0 +1,580 @@ +//! A [N-Triples](https://www.w3.org/TR/n-triples/) streaming parser implemented by [`NTriplesParser`] +//! and a serializer implemented by [`NTriplesSerializer`]. + +use crate::oxrdf::{Triple, TripleRef}; +use crate::oxttl::line_formats::NQuadsRecognizer; +#[cfg(feature = "async-tokio")] +use crate::oxttl::toolkit::FromTokioAsyncReadIterator; +use crate::oxttl::toolkit::{FromReadIterator, Parser, TurtleParseError, TurtleSyntaxError}; +use std::io::{self, Read, Write}; +#[cfg(feature = "async-tokio")] +use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt}; + +/// A [N-Triples](https://www.w3.org/TR/n-triples/) streaming parser. +/// +/// Support for [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star) is available behind the `rdf-star` feature and the [`NTriplesParser::with_quoted_triples`] option. +/// +/// Count the number of people: +/// ``` +/// use oxrdf::{NamedNodeRef, vocab::rdf}; +/// use oxttl::NTriplesParser; +/// +/// let file = br#"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> . +/// <http://example.com/foo> <http://schema.org/name> "Foo" . +/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> . +/// <http://example.com/bar> <http://schema.org/name> "Bar" ."#; +/// +/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; +/// let mut count = 0; +/// for triple in NTriplesParser::new().parse_read(file.as_ref()) { +/// let triple = triple?; +/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { +/// count += 1; +/// } +/// } +/// assert_eq!(2, count); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +#[derive(Default)] +#[must_use] +pub struct NTriplesParser { + unchecked: bool, + #[cfg(feature = "rdf-star")] + with_quoted_triples: bool, +} + +impl NTriplesParser { + /// Builds a new [`NTriplesParser`]. + #[inline] + pub fn new() -> Self { + Self::default() + } + + /// Assumes the file is valid to make parsing faster. + /// + /// It will skip some validations. + /// + /// Note that if the file is actually not valid, then broken RDF might be emitted by the parser. /// + #[inline] + pub fn unchecked(mut self) -> Self { + self.unchecked = true; + self + } + + /// Enables [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star). + #[cfg(feature = "rdf-star")] + #[inline] + pub fn with_quoted_triples(mut self) -> Self { + self.with_quoted_triples = true; + self + } + + /// Parses a N-Triples file from a [`Read`] implementation. + /// + /// Count the number of people: + /// ``` + /// use oxrdf::{NamedNodeRef, vocab::rdf}; + /// use oxttl::NTriplesParser; + /// + /// let file = br#"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> . + /// <http://example.com/foo> <http://schema.org/name> "Foo" . + /// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> . + /// <http://example.com/bar> <http://schema.org/name> "Bar" ."#; + /// + /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; + /// let mut count = 0; + /// for triple in NTriplesParser::new().parse_read(file.as_ref()) { + /// let triple = triple?; + /// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { + /// count += 1; + /// } + /// } + /// assert_eq!(2, count); + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + pub fn parse_read<R: Read>(self, read: R) -> FromReadNTriplesReader<R> { + FromReadNTriplesReader { + inner: self.parse().parser.parse_read(read), + } + } + + /// Parses a N-Triples file from a [`AsyncRead`] implementation. + /// + /// Count the number of people: + /// ``` + /// use oxrdf::{NamedNodeRef, vocab::rdf}; + /// use oxttl::NTriplesParser; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> Result<(), oxttl::TurtleParseError> { + /// let file = br#"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> . + /// <http://example.com/foo> <http://schema.org/name> "Foo" . + /// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> . + /// <http://example.com/bar> <http://schema.org/name> "Bar" ."#; + /// + /// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person"); + /// let mut count = 0; + /// let mut parser = NTriplesParser::new().parse_tokio_async_read(file.as_ref()); + /// while let Some(triple) = parser.next().await { + /// let triple = triple?; + /// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { + /// count += 1; + /// } + /// } + /// assert_eq!(2, count); + /// # Ok(()) + /// # } + /// ``` + #[cfg(feature = "async-tokio")] + pub fn parse_tokio_async_read<R: AsyncRead + Unpin>( + self, + read: R, + ) -> FromTokioAsyncReadNTriplesReader<R> { + FromTokioAsyncReadNTriplesReader { + inner: self.parse().parser.parse_tokio_async_read(read), + } + } + + /// Allows to parse a N-Triples file by using a low-level API. + /// + /// Count the number of people: + /// ``` + /// use oxrdf::{NamedNodeRef, vocab::rdf}; + /// use oxttl::NTriplesParser; + /// + /// let file: [&[u8]; 4] = [ + /// b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n", + /// b"<http://example.com/foo> <http://schema.org/name> \"Foo\" .\n", + /// b"<http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n", + /// b"<http://example.com/bar> <http://schema.org/name> \"Bar\" .\n" + /// ]; + /// + /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; + /// let mut count = 0; + /// let mut parser = NTriplesParser::new().parse(); + /// let mut file_chunks = file.iter(); + /// while !parser.is_end() { + /// // We feed more data to the parser + /// if let Some(chunk) = file_chunks.next() { + /// parser.extend_from_slice(chunk); + /// } else { + /// parser.end(); // It's finished + /// } + /// // We read as many triples from the parser as possible + /// while let Some(triple) = parser.read_next() { + /// let triple = triple?; + /// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { + /// count += 1; + /// } + /// } + /// } + /// assert_eq!(2, count); + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + #[allow(clippy::unused_self)] + pub fn parse(self) -> LowLevelNTriplesReader { + LowLevelNTriplesReader { + parser: NQuadsRecognizer::new_parser( + false, + #[cfg(feature = "rdf-star")] + self.with_quoted_triples, + self.unchecked, + ), + } + } +} + +/// Parses a N-Triples file from a [`Read`] implementation. Can be built using [`NTriplesParser::parse_read`]. +/// +/// Count the number of people: +/// ``` +/// use oxrdf::{NamedNodeRef, vocab::rdf}; +/// use oxttl::NTriplesParser; +/// +/// let file = br#"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> . +/// <http://example.com/foo> <http://schema.org/name> "Foo" . +/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> . +/// <http://example.com/bar> <http://schema.org/name> "Bar" ."#; +/// +/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; +/// let mut count = 0; +/// for triple in NTriplesParser::new().parse_read(file.as_ref()) { +/// let triple = triple?; +/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { +/// count += 1; +/// } +/// } +/// assert_eq!(2, count); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +#[must_use] +pub struct FromReadNTriplesReader<R: Read> { + inner: FromReadIterator<R, NQuadsRecognizer>, +} + +impl<R: Read> Iterator for FromReadNTriplesReader<R> { + type Item = Result<Triple, TurtleParseError>; + + fn next(&mut self) -> Option<Self::Item> { + Some(self.inner.next()?.map(Into::into)) + } +} + +/// Parses a N-Triples file from a [`AsyncRead`] implementation. Can be built using [`NTriplesParser::parse_tokio_async_read`]. +/// +/// Count the number of people: +/// ``` +/// use oxrdf::{NamedNodeRef, vocab::rdf}; +/// use oxttl::NTriplesParser; +/// +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() -> Result<(), oxttl::TurtleParseError> { +/// let file = br#"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> . +/// <http://example.com/foo> <http://schema.org/name> "Foo" . +/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> . +/// <http://example.com/bar> <http://schema.org/name> "Bar" ."#; +/// +/// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person"); +/// let mut count = 0; +/// let mut parser = NTriplesParser::new().parse_tokio_async_read(file.as_ref()); +/// while let Some(triple) = parser.next().await { +/// let triple = triple?; +/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { +/// count += 1; +/// } +/// } +/// assert_eq!(2, count); +/// # Ok(()) +/// # } +/// ``` +#[cfg(feature = "async-tokio")] +#[must_use] +pub struct FromTokioAsyncReadNTriplesReader<R: AsyncRead + Unpin> { + inner: FromTokioAsyncReadIterator<R, NQuadsRecognizer>, +} + +#[cfg(feature = "async-tokio")] +impl<R: AsyncRead + Unpin> FromTokioAsyncReadNTriplesReader<R> { + /// Reads the next triple or returns `None` if the file is finished. + pub async fn next(&mut self) -> Option<Result<Triple, TurtleParseError>> { + Some(self.inner.next().await?.map(Into::into)) + } +} + +/// Parses a N-Triples file by using a low-level API. Can be built using [`NTriplesParser::parse`]. +/// +/// Count the number of people: +/// ``` +/// use oxrdf::{NamedNodeRef, vocab::rdf}; +/// use oxttl::NTriplesParser; +/// +/// let file: [&[u8]; 4] = [ +/// b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n", +/// b"<http://example.com/foo> <http://schema.org/name> \"Foo\" .\n", +/// b"<http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n", +/// b"<http://example.com/bar> <http://schema.org/name> \"Bar\" .\n" +/// ]; +/// +/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; +/// let mut count = 0; +/// let mut parser = NTriplesParser::new().parse(); +/// let mut file_chunks = file.iter(); +/// while !parser.is_end() { +/// // We feed more data to the parser +/// if let Some(chunk) = file_chunks.next() { +/// parser.extend_from_slice(chunk); +/// } else { +/// parser.end(); // It's finished +/// } +/// // We read as many triples from the parser as possible +/// while let Some(triple) = parser.read_next() { +/// let triple = triple?; +/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { +/// count += 1; +/// } +/// } +/// } +/// assert_eq!(2, count); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +pub struct LowLevelNTriplesReader { + parser: Parser<NQuadsRecognizer>, +} + +impl LowLevelNTriplesReader { + /// Adds some extra bytes to the parser. Should be called when [`read_next`](Self::read_next) returns [`None`] and there is still unread data. + pub fn extend_from_slice(&mut self, other: &[u8]) { + self.parser.extend_from_slice(other) + } + + /// Tell the parser that the file is finished. + /// + /// This triggers the parsing of the final bytes and might lead [`read_next`](Self::read_next) to return some extra values. + pub fn end(&mut self) { + self.parser.end() + } + + /// Returns if the parsing is finished i.e. [`end`](Self::end) has been called and [`read_next`](Self::read_next) is always going to return `None`. + pub fn is_end(&self) -> bool { + self.parser.is_end() + } + + /// Attempt to parse a new triple from the already provided data. + /// + /// Returns [`None`] if the parsing is finished or more data is required. + /// If it is the case more data should be fed using [`extend_from_slice`](Self::extend_from_slice). + pub fn read_next(&mut self) -> Option<Result<Triple, TurtleSyntaxError>> { + Some(self.parser.read_next()?.map(Into::into)) + } +} + +/// A [canonical](https://www.w3.org/TR/n-triples/#canonical-ntriples) [N-Triples](https://www.w3.org/TR/n-triples/) serializer. +/// +/// Support for [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star) is available behind the `rdf-star` feature. +/// +/// ``` +/// use oxrdf::{NamedNodeRef, TripleRef}; +/// use oxttl::NTriplesSerializer; +/// +/// let mut writer = NTriplesSerializer::new().serialize_to_write(Vec::new()); +/// writer.write_triple(TripleRef::new( +/// NamedNodeRef::new("http://example.com#me")?, +/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, +/// NamedNodeRef::new("http://schema.org/Person")?, +/// ))?; +/// assert_eq!( +/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n", +/// writer.finish().as_slice() +/// ); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +#[derive(Default)] +#[must_use] +pub struct NTriplesSerializer; + +impl NTriplesSerializer { + /// Builds a new [`NTriplesSerializer`]. + #[inline] + pub fn new() -> Self { + Self + } + + /// Writes a N-Triples file to a [`Write`] implementation. + /// + /// ``` + /// use oxrdf::{NamedNodeRef, TripleRef}; + /// use oxttl::NTriplesSerializer; + /// + /// let mut writer = NTriplesSerializer::new().serialize_to_write(Vec::new()); + /// writer.write_triple(TripleRef::new( + /// NamedNodeRef::new("http://example.com#me")?, + /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, + /// NamedNodeRef::new("http://schema.org/Person")?, + /// ))?; + /// assert_eq!( + /// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n", + /// writer.finish().as_slice() + /// ); + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + pub fn serialize_to_write<W: Write>(self, write: W) -> ToWriteNTriplesWriter<W> { + ToWriteNTriplesWriter { + write, + writer: self.serialize(), + } + } + + /// Writes a N-Triples file to a [`AsyncWrite`] implementation. + /// + /// ``` + /// use oxrdf::{NamedNodeRef, TripleRef}; + /// use oxttl::NTriplesSerializer; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> std::io::Result<()> { + /// let mut writer = NTriplesSerializer::new().serialize_to_tokio_async_write(Vec::new()); + /// writer.write_triple(TripleRef::new( + /// NamedNodeRef::new_unchecked("http://example.com#me"), + /// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), + /// NamedNodeRef::new_unchecked("http://schema.org/Person"), + /// )).await?; + /// assert_eq!( + /// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n", + /// writer.finish().as_slice() + /// ); + /// # Ok(()) + /// # } + /// ``` + #[cfg(feature = "async-tokio")] + pub fn serialize_to_tokio_async_write<W: AsyncWrite + Unpin>( + self, + write: W, + ) -> ToTokioAsyncWriteNTriplesWriter<W> { + ToTokioAsyncWriteNTriplesWriter { + write, + writer: self.serialize(), + buffer: Vec::new(), + } + } + + /// Builds a low-level N-Triples writer. + /// + /// ``` + /// use oxrdf::{NamedNodeRef, TripleRef}; + /// use oxttl::NTriplesSerializer; + /// + /// let mut buf = Vec::new(); + /// let mut writer = NTriplesSerializer::new().serialize(); + /// writer.write_triple(TripleRef::new( + /// NamedNodeRef::new("http://example.com#me")?, + /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, + /// NamedNodeRef::new("http://schema.org/Person")?, + /// ), &mut buf)?; + /// assert_eq!( + /// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n", + /// buf.as_slice() + /// ); + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + #[allow(clippy::unused_self)] + pub fn serialize(self) -> LowLevelNTriplesWriter { + LowLevelNTriplesWriter + } +} + +/// Writes a N-Triples file to a [`Write`] implementation. Can be built using [`NTriplesSerializer::serialize_to_write`]. +/// +/// ``` +/// use oxrdf::{NamedNodeRef, TripleRef}; +/// use oxttl::NTriplesSerializer; +/// +/// let mut writer = NTriplesSerializer::new().serialize_to_write(Vec::new()); +/// writer.write_triple(TripleRef::new( +/// NamedNodeRef::new("http://example.com#me")?, +/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, +/// NamedNodeRef::new("http://schema.org/Person")?, +/// ))?; +/// assert_eq!( +/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n", +/// writer.finish().as_slice() +/// ); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +#[must_use] +pub struct ToWriteNTriplesWriter<W: Write> { + write: W, + writer: LowLevelNTriplesWriter, +} + +impl<W: Write> ToWriteNTriplesWriter<W> { + /// Writes an extra triple. + pub fn write_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> { + self.writer.write_triple(t, &mut self.write) + } + + /// Ends the write process and returns the underlying [`Write`]. + pub fn finish(self) -> W { + self.write + } +} + +/// Writes a N-Triples file to a [`AsyncWrite`] implementation. Can be built using [`NTriplesSerializer::serialize_to_tokio_async_write`]. +/// +/// ``` +/// use oxrdf::{NamedNodeRef, TripleRef}; +/// use oxttl::NTriplesSerializer; +/// +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() -> std::io::Result<()> { +/// let mut writer = NTriplesSerializer::new().serialize_to_tokio_async_write(Vec::new()); +/// writer.write_triple(TripleRef::new( +/// NamedNodeRef::new_unchecked("http://example.com#me"), +/// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), +/// NamedNodeRef::new_unchecked("http://schema.org/Person") +/// )).await?; +/// assert_eq!( +/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n", +/// writer.finish().as_slice() +/// ); +/// # Ok(()) +/// # } +/// ``` +#[cfg(feature = "async-tokio")] +#[must_use] +pub struct ToTokioAsyncWriteNTriplesWriter<W: AsyncWrite + Unpin> { + write: W, + writer: LowLevelNTriplesWriter, + buffer: Vec<u8>, +} + +#[cfg(feature = "async-tokio")] +impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteNTriplesWriter<W> { + /// Writes an extra triple. + pub async fn write_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> { + self.writer.write_triple(t, &mut self.buffer)?; + self.write.write_all(&self.buffer).await?; + self.buffer.clear(); + Ok(()) + } + + /// Ends the write process and returns the underlying [`Write`]. + pub fn finish(self) -> W { + self.write + } +} + +/// Writes a N-Triples file by using a low-level API. Can be built using [`NTriplesSerializer::serialize`]. +/// +/// ``` +/// use oxrdf::{NamedNodeRef, TripleRef}; +/// use oxttl::NTriplesSerializer; +/// +/// let mut buf = Vec::new(); +/// let mut writer = NTriplesSerializer::new().serialize(); +/// writer.write_triple(TripleRef::new( +/// NamedNodeRef::new("http://example.com#me")?, +/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, +/// NamedNodeRef::new("http://schema.org/Person")?, +/// ), &mut buf)?; +/// assert_eq!( +/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n", +/// buf.as_slice() +/// ); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +pub struct LowLevelNTriplesWriter; + +impl LowLevelNTriplesWriter { + /// Writes an extra triple. + #[allow(clippy::unused_self)] + pub fn write_triple<'a>( + &mut self, + t: impl Into<TripleRef<'a>>, + mut write: impl Write, + ) -> io::Result<()> { + writeln!(write, "{} .", t.into()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::oxrdf::{Literal, NamedNode}; + + #[test] + fn unchecked_parsing() { + let triples = NTriplesParser::new() + .unchecked() + .parse_read(r#"<foo> <bar> "baz"@toolonglangtag ."#.as_bytes()) + .collect::<Result<Vec<_>, _>>() + .unwrap(); + assert_eq!( + triples, + [Triple::new( + NamedNode::new_unchecked("foo"), + NamedNode::new_unchecked("bar"), + Literal::new_language_tagged_literal_unchecked("baz", "toolonglangtag"), + )] + ) + } +} diff --git a/ng-oxigraph/src/oxttl/terse.rs b/ng-oxigraph/src/oxttl/terse.rs new file mode 100644 index 0000000..205f348 --- /dev/null +++ b/ng-oxigraph/src/oxttl/terse.rs @@ -0,0 +1,1072 @@ +//! Shared parser implementation for Turtle and TriG. + +use crate::oxrdf::vocab::{rdf, xsd}; +#[cfg(feature = "rdf-star")] +use crate::oxrdf::Triple; +use crate::oxrdf::{ + BlankNode, GraphName, Literal, NamedNode, NamedOrBlankNode, Quad, Subject, Term, +}; +use crate::oxttl::lexer::{resolve_local_name, N3Lexer, N3LexerMode, N3LexerOptions, N3Token}; +use crate::oxttl::toolkit::{Lexer, Parser, RuleRecognizer, RuleRecognizerError}; +use crate::oxttl::{MAX_BUFFER_SIZE, MIN_BUFFER_SIZE}; +use oxiri::Iri; +use std::collections::hash_map::Iter; +use std::collections::HashMap; + +pub struct TriGRecognizer { + stack: Vec<TriGState>, + cur_subject: Vec<Subject>, + cur_predicate: Vec<NamedNode>, + cur_object: Vec<Term>, + cur_graph: GraphName, +} + +#[allow(clippy::partial_pub_fields)] +pub struct TriGRecognizerContext { + pub lexer_options: N3LexerOptions, + pub with_graph_name: bool, + #[cfg(feature = "rdf-star")] + pub with_quoted_triples: bool, + prefixes: HashMap<String, Iri<String>>, +} + +impl TriGRecognizerContext { + pub fn prefixes(&self) -> Iter<'_, String, Iri<String>> { + self.prefixes.iter() + } +} + +impl RuleRecognizer for TriGRecognizer { + type TokenRecognizer = N3Lexer; + type Output = Quad; + type Context = TriGRecognizerContext; + + fn error_recovery_state(mut self) -> Self { + self.stack.clear(); + self.cur_subject.clear(); + self.cur_predicate.clear(); + self.cur_object.clear(); + self.cur_graph = GraphName::DefaultGraph; + self + } + + fn recognize_next( + mut self, + token: N3Token<'_>, + context: &mut TriGRecognizerContext, + results: &mut Vec<Quad>, + errors: &mut Vec<RuleRecognizerError>, + ) -> Self { + if let Some(rule) = self.stack.pop() { + match rule { + // [1g] trigDoc ::= (directive | block)* + // [2g] block ::= triplesOrGraph | wrappedGraph | triples2 | "GRAPH" labelOrSubject wrappedGraph + // [3] directive ::= prefixID | base | sparqlPrefix | sparqlBase + // [4] prefixID ::= '@prefix' PNAME_NS IRIREF '.' + // [5] base ::= '@base' IRIREF '.' + // [5s] sparqlPrefix ::= "PREFIX" PNAME_NS IRIREF + // [6s] sparqlBase ::= "BASE" IRIREF + TriGState::TriGDoc => { + self.cur_graph = GraphName::DefaultGraph; + self.stack.push(TriGState::TriGDoc); + match token { + N3Token::PlainKeyword(k) if k.eq_ignore_ascii_case("base") => { + self.stack.push(TriGState::BaseExpectIri); + self + } + N3Token::PlainKeyword(k) if k.eq_ignore_ascii_case("prefix") => { + self.stack.push(TriGState::PrefixExpectPrefix); + self + } + N3Token::LangTag("prefix") => { + self.stack.push(TriGState::ExpectDot); + self.stack.push(TriGState::PrefixExpectPrefix); + self + } + N3Token::LangTag("base") => { + self.stack.push(TriGState::ExpectDot); + self.stack.push(TriGState::BaseExpectIri); + self + } + N3Token::PlainKeyword(k) + if k.eq_ignore_ascii_case("graph") && context.with_graph_name => + { + self.stack.push(TriGState::WrappedGraph); + self.stack.push(TriGState::GraphName); + self + } + N3Token::Punctuation("{") if context.with_graph_name => { + self.stack.push(TriGState::WrappedGraph); + self.recognize_next(token, context, results, errors) + } + _ => { + self.stack.push(TriGState::TriplesOrGraph); + self.recognize_next(token, context, results, errors) + } + } + } + TriGState::ExpectDot => { + self.cur_subject.pop(); + if token == N3Token::Punctuation(".") { + self + } else { + errors.push("A dot is expected at the end of statements".into()); + self.recognize_next(token, context, results, errors) + } + } + TriGState::BaseExpectIri => { + if let N3Token::IriRef(iri) = token { + context.lexer_options.base_iri = Some(Iri::parse_unchecked(iri)); + self + } else { + self.error(errors, "The BASE keyword should be followed by an IRI") + } + } + TriGState::PrefixExpectPrefix => match token { + N3Token::PrefixedName { prefix, local, .. } if local.is_empty() => { + self.stack.push(TriGState::PrefixExpectIri { + name: prefix.to_owned(), + }); + self + } + _ => self.error( + errors, + "The PREFIX keyword should be followed by a prefix like 'ex:'", + ), + }, + TriGState::PrefixExpectIri { name } => { + if let N3Token::IriRef(iri) = token { + context.prefixes.insert(name, Iri::parse_unchecked(iri)); + self + } else { + self.error(errors, "The PREFIX declaration should be followed by a prefix and its value as an IRI") + } + } + // [3g] triplesOrGraph ::= labelOrSubject ( wrappedGraph | predicateObjectList '.' ) | quotedTriple predicateObjectList '.' + // [4g] triples2 ::= blankNodePropertyList predicateObjectList? '.' | collection predicateObjectList '.' + TriGState::TriplesOrGraph => match token { + N3Token::IriRef(iri) => { + self.stack + .push(TriGState::WrappedGraphOrPredicateObjectList { + term: NamedNode::new_unchecked(iri).into(), + }); + self + } + N3Token::PrefixedName { + prefix, + local, + might_be_invalid_iri, + } => match resolve_local_name( + prefix, + &local, + might_be_invalid_iri, + &context.prefixes, + ) { + Ok(t) => { + self.stack + .push(TriGState::WrappedGraphOrPredicateObjectList { + term: t.into(), + }); + self + } + Err(e) => self.error(errors, e), + }, + N3Token::BlankNodeLabel(label) => { + self.stack + .push(TriGState::WrappedGraphOrPredicateObjectList { + term: BlankNode::new_unchecked(label).into(), + }); + self + } + N3Token::Punctuation("[") => { + self.stack + .push(TriGState::WrappedGraphBlankNodePropertyListCurrent); + self + } + N3Token::Punctuation("(") => { + self.stack.push(TriGState::ExpectDot); + self.stack.push(TriGState::PredicateObjectList); + self.stack.push(TriGState::SubjectCollectionBeginning); + self + } + #[cfg(feature = "rdf-star")] + N3Token::Punctuation("<<") if context.with_quoted_triples => { + self.stack.push(TriGState::ExpectDot); + self.stack.push(TriGState::PredicateObjectList); + self.stack.push(TriGState::SubjectQuotedTripleEnd); + self.stack.push(TriGState::QuotedObject); + self.stack.push(TriGState::Verb); + self.stack.push(TriGState::QuotedSubject); + self + } + _ => self.error(errors, "TOKEN is not a valid subject or graph name"), + }, + TriGState::WrappedGraphOrPredicateObjectList { term } => { + if token == N3Token::Punctuation("{") && context.with_graph_name { + self.cur_graph = term.into(); + self.stack.push(TriGState::WrappedGraph); + } else { + self.cur_subject.push(term.into()); + self.stack.push(TriGState::ExpectDot); + self.stack.push(TriGState::PredicateObjectList); + } + self.recognize_next(token, context, results, errors) + } + TriGState::WrappedGraphBlankNodePropertyListCurrent => { + if token == N3Token::Punctuation("]") { + self.stack + .push(TriGState::WrappedGraphOrPredicateObjectList { + term: BlankNode::default().into(), + }); + self + } else { + self.cur_subject.push(BlankNode::default().into()); + self.stack.push(TriGState::ExpectDot); + self.stack.push(TriGState::SubjectBlankNodePropertyListEnd); + self.stack.push(TriGState::PredicateObjectList); + self.recognize_next(token, context, results, errors) + } + } + TriGState::SubjectBlankNodePropertyListEnd => { + if token == N3Token::Punctuation("]") { + self.stack + .push(TriGState::SubjectBlankNodePropertyListAfter); + self + } else { + errors.push("blank node property lists should end with a ']'".into()); + self.stack + .push(TriGState::SubjectBlankNodePropertyListAfter); + self.recognize_next(token, context, results, errors) + } + } + TriGState::SubjectBlankNodePropertyListAfter => { + if matches!(token, N3Token::Punctuation("." | "}")) { + self.recognize_next(token, context, results, errors) + } else { + self.stack.push(TriGState::PredicateObjectList); + self.recognize_next(token, context, results, errors) + } + } + TriGState::SubjectCollectionBeginning => { + if let N3Token::Punctuation(")") = token { + self.cur_subject.push(rdf::NIL.into()); + self + } else { + let root = BlankNode::default(); + self.cur_subject.push(root.clone().into()); + self.cur_subject.push(root.into()); + self.cur_predicate.push(rdf::FIRST.into()); + self.stack.push(TriGState::SubjectCollectionPossibleEnd); + self.stack.push(TriGState::Object); + self.recognize_next(token, context, results, errors) + } + } + TriGState::SubjectCollectionPossibleEnd => { + let old = self.cur_subject.pop().unwrap(); + self.cur_object.pop(); + if let N3Token::Punctuation(")") = token { + self.cur_predicate.pop(); + results.push(Quad::new(old, rdf::REST, rdf::NIL, self.cur_graph.clone())); + self + } else { + let new = BlankNode::default(); + results.push(Quad::new( + old, + rdf::REST, + new.clone(), + self.cur_graph.clone(), + )); + self.cur_subject.push(new.into()); + self.stack.push(TriGState::ObjectCollectionPossibleEnd); + self.stack.push(TriGState::Object); + self.recognize_next(token, context, results, errors) + } + } + // [5g] wrappedGraph ::= '{' triplesBlock? '}' + // [6g] triplesBlock ::= triples ('.' triplesBlock?)? + TriGState::WrappedGraph => { + if token == N3Token::Punctuation("{") { + self.stack.push(TriGState::WrappedGraphPossibleEnd); + self.stack.push(TriGState::Triples); + self + } else { + self.error(errors, "The GRAPH keyword should be followed by a graph name and a value in '{'") + } + } + TriGState::WrappedGraphPossibleEnd => { + self.cur_subject.pop(); + match token { + N3Token::Punctuation("}") => self, + N3Token::Punctuation(".") => { + self.stack.push(TriGState::WrappedGraphPossibleEnd); + self.stack.push(TriGState::Triples); + self + } + _ => { + errors.push( + "A '}' or a '.' is expected at the end of a graph block".into(), + ); + self.recognize_next(token, context, results, errors) + } + } + } + // [6] triples ::= subject predicateObjectList | blankNodePropertyList predicateObjectList? + // [10] subject ::= iri | BlankNode | collection | quotedTriple + TriGState::Triples => match token { + N3Token::Punctuation("}") => { + self.recognize_next(token, context, results, errors) // Early end + } + N3Token::Punctuation("[") => { + self.cur_subject.push(BlankNode::default().into()); + self.stack + .push(TriGState::TriplesBlankNodePropertyListCurrent); + self + } + N3Token::IriRef(iri) => { + self.cur_subject.push(NamedNode::new_unchecked(iri).into()); + self.stack.push(TriGState::PredicateObjectList); + self + } + N3Token::PrefixedName { + prefix, + local, + might_be_invalid_iri, + } => match resolve_local_name( + prefix, + &local, + might_be_invalid_iri, + &context.prefixes, + ) { + Ok(t) => { + self.cur_subject.push(t.into()); + self.stack.push(TriGState::PredicateObjectList); + self + } + Err(e) => self.error(errors, e), + }, + N3Token::BlankNodeLabel(label) => { + self.cur_subject + .push(BlankNode::new_unchecked(label).into()); + self.stack.push(TriGState::PredicateObjectList); + self + } + N3Token::Punctuation("(") => { + self.stack.push(TriGState::PredicateObjectList); + self.stack.push(TriGState::SubjectCollectionBeginning); + self + } + #[cfg(feature = "rdf-star")] + N3Token::Punctuation("<<") if context.with_quoted_triples => { + self.stack.push(TriGState::PredicateObjectList); + self.stack.push(TriGState::SubjectQuotedTripleEnd); + self.stack.push(TriGState::QuotedObject); + self.stack.push(TriGState::Verb); + self.stack.push(TriGState::QuotedSubject); + self + } + _ => self.error(errors, "TOKEN is not a valid RDF subject"), + }, + TriGState::TriplesBlankNodePropertyListCurrent => { + if token == N3Token::Punctuation("]") { + self.stack.push(TriGState::PredicateObjectList); + self + } else { + self.stack.push(TriGState::SubjectBlankNodePropertyListEnd); + self.stack.push(TriGState::PredicateObjectList); + self.recognize_next(token, context, results, errors) + } + } + // [7g] labelOrSubject ::= iri | BlankNode + TriGState::GraphName => match token { + N3Token::IriRef(iri) => { + self.cur_graph = NamedNode::new_unchecked(iri).into(); + self + } + N3Token::PrefixedName { + prefix, + local, + might_be_invalid_iri, + } => match resolve_local_name( + prefix, + &local, + might_be_invalid_iri, + &context.prefixes, + ) { + Ok(t) => { + self.cur_graph = t.into(); + self + } + Err(e) => self.error(errors, e), + }, + N3Token::BlankNodeLabel(label) => { + self.cur_graph = BlankNode::new_unchecked(label).into(); + self + } + N3Token::Punctuation("[") => { + self.stack.push(TriGState::GraphNameAnonEnd); + self + } + _ => self.error(errors, "TOKEN is not a valid graph name"), + }, + TriGState::GraphNameAnonEnd => { + if token == N3Token::Punctuation("]") { + self.cur_graph = BlankNode::default().into(); + self + } else { + self.error(errors, "Anonymous blank node with a property list are not allowed as graph name") + } + } + // [7] predicateObjectList ::= verb objectList (';' (verb objectList)?)* + TriGState::PredicateObjectList => { + self.stack.push(TriGState::PredicateObjectListEnd); + self.stack.push(TriGState::ObjectsList); + self.stack.push(TriGState::Verb); + self.recognize_next(token, context, results, errors) + } + TriGState::PredicateObjectListEnd => { + self.cur_predicate.pop(); + if token == N3Token::Punctuation(";") { + self.stack + .push(TriGState::PredicateObjectListPossibleContinuation); + self + } else { + self.recognize_next(token, context, results, errors) + } + } + TriGState::PredicateObjectListPossibleContinuation => { + if token == N3Token::Punctuation(";") { + self.stack + .push(TriGState::PredicateObjectListPossibleContinuation); + self + } else if matches!(token, N3Token::Punctuation("." | "}" | "]")) { + self.recognize_next(token, context, results, errors) + } else { + self.stack.push(TriGState::PredicateObjectListEnd); + self.stack.push(TriGState::ObjectsList); + self.stack.push(TriGState::Verb); + self.recognize_next(token, context, results, errors) + } + } + // [8] objectList ::= object annotation? ( ',' object annotation? )* + // [30t] annotation ::= '{|' predicateObjectList '|}' + TriGState::ObjectsList => { + self.stack.push(TriGState::ObjectsListEnd); + self.stack.push(TriGState::Object); + self.recognize_next(token, context, results, errors) + } + TriGState::ObjectsListEnd => match token { + N3Token::Punctuation(",") => { + self.cur_object.pop(); + self.stack.push(TriGState::ObjectsListEnd); + self.stack.push(TriGState::Object); + self + } + #[cfg(feature = "rdf-star")] + N3Token::Punctuation("{|") => { + let triple = Triple::new( + self.cur_subject.last().unwrap().clone(), + self.cur_predicate.last().unwrap().clone(), + self.cur_object.pop().unwrap(), + ); + self.cur_subject.push(triple.into()); + self.stack.push(TriGState::AnnotationEnd); + self.stack.push(TriGState::PredicateObjectList); + self + } + _ => { + self.cur_object.pop(); + self.recognize_next(token, context, results, errors) + } + }, + #[cfg(feature = "rdf-star")] + TriGState::AnnotationEnd => { + self.cur_subject.pop(); + self.stack.push(TriGState::ObjectsListAfterAnnotation); + if token == N3Token::Punctuation("|}") { + self + } else { + self.error(errors, "Annotations should end with '|}'") + } + } + #[cfg(feature = "rdf-star")] + TriGState::ObjectsListAfterAnnotation => { + if token == N3Token::Punctuation(",") { + self.stack.push(TriGState::ObjectsListEnd); + self.stack.push(TriGState::Object); + self + } else { + self.recognize_next(token, context, results, errors) + } + } + // [9] verb ::= predicate | 'a' + // [11] predicate ::= iri + TriGState::Verb => match token { + N3Token::PlainKeyword("a") => { + self.cur_predicate.push(rdf::TYPE.into()); + self + } + N3Token::IriRef(iri) => { + self.cur_predicate.push(NamedNode::new_unchecked(iri)); + self + } + N3Token::PrefixedName { + prefix, + local, + might_be_invalid_iri, + } => match resolve_local_name( + prefix, + &local, + might_be_invalid_iri, + &context.prefixes, + ) { + Ok(t) => { + self.cur_predicate.push(t); + self + } + Err(e) => self.error(errors, e), + }, + _ => self.error(errors, "TOKEN is not a valid predicate"), + }, + // [12] object ::= iri | BlankNode | collection | blankNodePropertyList | literal | quotedTriple + // [13] literal ::= RDFLiteral | NumericLiteral | BooleanLiteral + // [14] blank ::= BlankNode | collection + // [15] blankNodePropertyList ::= '[' predicateObjectList ']' + // [16] collection ::= '(' object* ')' + // [17] NumericLiteral ::= INTEGER | DECIMAL | DOUBLE + // [128s] RDFLiteral ::= String (LANGTAG | '^^' iri)? + // [133s] BooleanLiteral ::= 'true' | 'false' + // [18] String ::= STRING_LITERAL_QUOTE | STRING_LITERAL_SINGLE_QUOTE | STRING_LITERAL_LONG_SINGLE_QUOTE | STRING_LITERAL_LONG_QUOTE + // [135s] iri ::= IRIREF | PrefixedName + // [136s] PrefixedName ::= PNAME_LN | PNAME_NS + // [137s] BlankNode ::= BLANK_NODE_LABEL | ANON + TriGState::Object => match token { + N3Token::IriRef(iri) => { + self.cur_object.push(NamedNode::new_unchecked(iri).into()); + self.emit_quad(results); + self + } + N3Token::PrefixedName { + prefix, + local, + might_be_invalid_iri, + } => match resolve_local_name( + prefix, + &local, + might_be_invalid_iri, + &context.prefixes, + ) { + Ok(t) => { + self.cur_object.push(t.into()); + self.emit_quad(results); + self + } + Err(e) => self.error(errors, e), + }, + N3Token::BlankNodeLabel(label) => { + self.cur_object.push(BlankNode::new_unchecked(label).into()); + self.emit_quad(results); + self + } + N3Token::Punctuation("[") => { + self.stack + .push(TriGState::ObjectBlankNodePropertyListCurrent); + self + } + N3Token::Punctuation("(") => { + self.stack.push(TriGState::ObjectCollectionBeginning); + self + } + N3Token::String(value) => { + self.stack + .push(TriGState::LiteralPossibleSuffix { value, emit: true }); + self + } + N3Token::Integer(v) => { + self.cur_object + .push(Literal::new_typed_literal(v, xsd::INTEGER).into()); + self.emit_quad(results); + self + } + N3Token::Decimal(v) => { + self.cur_object + .push(Literal::new_typed_literal(v, xsd::DECIMAL).into()); + self.emit_quad(results); + self + } + N3Token::Double(v) => { + self.cur_object + .push(Literal::new_typed_literal(v, xsd::DOUBLE).into()); + self.emit_quad(results); + self + } + N3Token::PlainKeyword("true") => { + self.cur_object + .push(Literal::new_typed_literal("true", xsd::BOOLEAN).into()); + self.emit_quad(results); + self + } + N3Token::PlainKeyword("false") => { + self.cur_object + .push(Literal::new_typed_literal("false", xsd::BOOLEAN).into()); + self.emit_quad(results); + self + } + #[cfg(feature = "rdf-star")] + N3Token::Punctuation("<<") if context.with_quoted_triples => { + self.stack + .push(TriGState::ObjectQuotedTripleEnd { emit: true }); + self.stack.push(TriGState::QuotedObject); + self.stack.push(TriGState::Verb); + self.stack.push(TriGState::QuotedSubject); + self + } + _ => self.error(errors, "TOKEN is not a valid RDF object"), + }, + TriGState::ObjectBlankNodePropertyListCurrent => { + if token == N3Token::Punctuation("]") { + self.cur_object.push(BlankNode::default().into()); + self.emit_quad(results); + self + } else { + self.cur_subject.push(BlankNode::default().into()); + self.stack.push(TriGState::ObjectBlankNodePropertyListEnd); + self.stack.push(TriGState::PredicateObjectList); + self.recognize_next(token, context, results, errors) + } + } + TriGState::ObjectBlankNodePropertyListEnd => { + if token == N3Token::Punctuation("]") { + self.cur_object.push(self.cur_subject.pop().unwrap().into()); + self.emit_quad(results); + self + } else { + self.error(errors, "blank node property lists should end with a ']'") + } + } + TriGState::ObjectCollectionBeginning => { + if let N3Token::Punctuation(")") = token { + self.cur_object.push(rdf::NIL.into()); + self.emit_quad(results); + self + } else { + let root = BlankNode::default(); + self.cur_object.push(root.clone().into()); + self.emit_quad(results); + self.cur_subject.push(root.into()); + self.cur_predicate.push(rdf::FIRST.into()); + self.stack.push(TriGState::ObjectCollectionPossibleEnd); + self.stack.push(TriGState::Object); + self.recognize_next(token, context, results, errors) + } + } + TriGState::ObjectCollectionPossibleEnd => { + let old = self.cur_subject.pop().unwrap(); + self.cur_object.pop(); + if let N3Token::Punctuation(")") = token { + self.cur_predicate.pop(); + results.push(Quad::new(old, rdf::REST, rdf::NIL, self.cur_graph.clone())); + self + } else { + let new = BlankNode::default(); + results.push(Quad::new( + old, + rdf::REST, + new.clone(), + self.cur_graph.clone(), + )); + self.cur_subject.push(new.into()); + self.stack.push(TriGState::ObjectCollectionPossibleEnd); + self.stack.push(TriGState::Object); + self.recognize_next(token, context, results, errors) + } + } + TriGState::LiteralPossibleSuffix { value, emit } => match token { + N3Token::LangTag(lang) => { + self.cur_object.push( + Literal::new_language_tagged_literal_unchecked( + value, + lang.to_ascii_lowercase(), + ) + .into(), + ); + if emit { + self.emit_quad(results); + } + self + } + N3Token::Punctuation("^^") => { + self.stack + .push(TriGState::LiteralExpectDatatype { value, emit }); + self + } + _ => { + self.cur_object + .push(Literal::new_simple_literal(value).into()); + if emit { + self.emit_quad(results); + } + self.recognize_next(token, context, results, errors) + } + }, + TriGState::LiteralExpectDatatype { value, emit } => match token { + N3Token::IriRef(datatype) => { + self.cur_object.push( + Literal::new_typed_literal(value, NamedNode::new_unchecked(datatype)) + .into(), + ); + if emit { + self.emit_quad(results); + } + self + } + N3Token::PrefixedName { + prefix, + local, + might_be_invalid_iri, + } => match resolve_local_name( + prefix, + &local, + might_be_invalid_iri, + &context.prefixes, + ) { + Ok(t) => { + self.cur_object + .push(Literal::new_typed_literal(value, t).into()); + if emit { + self.emit_quad(results); + } + self + } + Err(e) => self.error(errors, e), + }, + _ => self + .error(errors, "Expecting a datatype IRI after ^^, found TOKEN") + .recognize_next(token, context, results, errors), + }, + // [27t] quotedTriple ::= '<<' qtSubject verb qtObject '>>' + #[cfg(feature = "rdf-star")] + TriGState::SubjectQuotedTripleEnd => { + let triple = Triple::new( + self.cur_subject.pop().unwrap(), + self.cur_predicate.pop().unwrap(), + self.cur_object.pop().unwrap(), + ); + self.cur_subject.push(triple.into()); + if token == N3Token::Punctuation(">>") { + self + } else { + self.error( + errors, + "Expecting '>>' to close a quoted triple, found TOKEN", + ) + } + } + #[cfg(feature = "rdf-star")] + TriGState::ObjectQuotedTripleEnd { emit } => { + let triple = Triple::new( + self.cur_subject.pop().unwrap(), + self.cur_predicate.pop().unwrap(), + self.cur_object.pop().unwrap(), + ); + self.cur_object.push(triple.into()); + if emit { + self.emit_quad(results); + } + if token == N3Token::Punctuation(">>") { + self + } else { + self.error( + errors, + "Expecting '>>' to close a quoted triple, found TOKEN", + ) + } + } + // [28t] qtSubject ::= iri | BlankNode | quotedTriple + #[cfg(feature = "rdf-star")] + TriGState::QuotedSubject => match token { + N3Token::Punctuation("[") => { + self.cur_subject.push(BlankNode::default().into()); + self.stack.push(TriGState::QuotedAnonEnd); + self + } + N3Token::IriRef(iri) => { + self.cur_subject.push(NamedNode::new_unchecked(iri).into()); + self + } + N3Token::PrefixedName { + prefix, + local, + might_be_invalid_iri, + } => match resolve_local_name( + prefix, + &local, + might_be_invalid_iri, + &context.prefixes, + ) { + Ok(t) => { + self.cur_subject.push(t.into()); + self + } + Err(e) => self.error(errors, e), + }, + N3Token::BlankNodeLabel(label) => { + self.cur_subject + .push(BlankNode::new_unchecked(label).into()); + self + } + N3Token::Punctuation("<<") => { + self.stack.push(TriGState::SubjectQuotedTripleEnd); + self.stack.push(TriGState::QuotedObject); + self.stack.push(TriGState::Verb); + self.stack.push(TriGState::QuotedSubject); + self + } + _ => self.error( + errors, + "TOKEN is not a valid RDF quoted triple subject: TOKEN", + ), + }, + // [29t] qtObject ::= iri | BlankNode | literal | quotedTriple + #[cfg(feature = "rdf-star")] + TriGState::QuotedObject => match token { + N3Token::Punctuation("[") => { + self.cur_object.push(BlankNode::default().into()); + self.stack.push(TriGState::QuotedAnonEnd); + self + } + N3Token::IriRef(iri) => { + self.cur_object.push(NamedNode::new_unchecked(iri).into()); + self + } + N3Token::PrefixedName { + prefix, + local, + might_be_invalid_iri, + } => match resolve_local_name( + prefix, + &local, + might_be_invalid_iri, + &context.prefixes, + ) { + Ok(t) => { + self.cur_object.push(t.into()); + self + } + Err(e) => self.error(errors, e), + }, + N3Token::BlankNodeLabel(label) => { + self.cur_object.push(BlankNode::new_unchecked(label).into()); + self + } + N3Token::String(value) => { + self.stack + .push(TriGState::LiteralPossibleSuffix { value, emit: false }); + self + } + N3Token::Integer(v) => { + self.cur_object + .push(Literal::new_typed_literal(v, xsd::INTEGER).into()); + self + } + N3Token::Decimal(v) => { + self.cur_object + .push(Literal::new_typed_literal(v, xsd::DECIMAL).into()); + self + } + N3Token::Double(v) => { + self.cur_object + .push(Literal::new_typed_literal(v, xsd::DOUBLE).into()); + self + } + N3Token::PlainKeyword("true") => { + self.cur_object + .push(Literal::new_typed_literal("true", xsd::BOOLEAN).into()); + self + } + N3Token::PlainKeyword("false") => { + self.cur_object + .push(Literal::new_typed_literal("false", xsd::BOOLEAN).into()); + self + } + N3Token::Punctuation("<<") => { + self.stack + .push(TriGState::ObjectQuotedTripleEnd { emit: false }); + self.stack.push(TriGState::QuotedObject); + self.stack.push(TriGState::Verb); + self.stack.push(TriGState::QuotedSubject); + self + } + _ => self.error(errors, "TOKEN is not a valid RDF quoted triple object"), + }, + #[cfg(feature = "rdf-star")] + TriGState::QuotedAnonEnd => { + if token == N3Token::Punctuation("]") { + self + } else { + self.error(errors, "Anonymous blank node with a property list are not allowed in quoted triples") + } + } + } + } else if token == N3Token::Punctuation(".") || token == N3Token::Punctuation("}") { + // TODO: be smarter depending if we are in '{' or not + self.stack.push(TriGState::TriGDoc); + self + } else { + self + } + } + + fn recognize_end( + mut self, + _context: &mut TriGRecognizerContext, + results: &mut Vec<Self::Output>, + errors: &mut Vec<RuleRecognizerError>, + ) { + match &*self.stack { + [] | [TriGState::TriGDoc] => { + debug_assert!( + self.cur_subject.is_empty(), + "The cur_subject stack must be empty if the state stack is empty" + ); + debug_assert!( + self.cur_predicate.is_empty(), + "The cur_predicate stack must be empty if the state stack is empty" + ); + debug_assert!( + self.cur_object.is_empty(), + "The cur_object stack must be empty if the state stack is empty" + ); + } + [.., TriGState::LiteralPossibleSuffix { value, emit: true }] => { + self.cur_object + .push(Literal::new_simple_literal(value).into()); + self.emit_quad(results); + errors.push("Triples should be followed by a dot".into()) + } + _ => errors.push("Unexpected end".into()), // TODO + } + } + + fn lexer_options(context: &TriGRecognizerContext) -> &N3LexerOptions { + &context.lexer_options + } +} + +impl TriGRecognizer { + pub fn new_parser( + with_graph_name: bool, + #[cfg(feature = "rdf-star")] with_quoted_triples: bool, + unchecked: bool, + base_iri: Option<Iri<String>>, + prefixes: HashMap<String, Iri<String>>, + ) -> Parser<Self> { + Parser::new( + Lexer::new( + N3Lexer::new(N3LexerMode::Turtle, unchecked), + MIN_BUFFER_SIZE, + MAX_BUFFER_SIZE, + true, + Some(b"#"), + ), + Self { + stack: vec![TriGState::TriGDoc], + cur_subject: Vec::new(), + cur_predicate: Vec::new(), + cur_object: Vec::new(), + cur_graph: GraphName::DefaultGraph, + }, + TriGRecognizerContext { + with_graph_name, + #[cfg(feature = "rdf-star")] + with_quoted_triples, + prefixes, + lexer_options: N3LexerOptions { base_iri }, + }, + ) + } + + #[must_use] + fn error( + mut self, + errors: &mut Vec<RuleRecognizerError>, + msg: impl Into<RuleRecognizerError>, + ) -> Self { + errors.push(msg.into()); + self.stack.clear(); + self.cur_subject.clear(); + self.cur_predicate.clear(); + self.cur_object.clear(); + self.cur_graph = GraphName::DefaultGraph; + self + } + + fn emit_quad(&mut self, results: &mut Vec<Quad>) { + results.push(Quad::new( + self.cur_subject.last().unwrap().clone(), + self.cur_predicate.last().unwrap().clone(), + self.cur_object.last().unwrap().clone(), + self.cur_graph.clone(), + )); + } +} + +#[derive(Debug)] +enum TriGState { + TriGDoc, + ExpectDot, + BaseExpectIri, + PrefixExpectPrefix, + PrefixExpectIri { + name: String, + }, + TriplesOrGraph, + WrappedGraphBlankNodePropertyListCurrent, + SubjectBlankNodePropertyListEnd, + SubjectBlankNodePropertyListAfter, + SubjectCollectionBeginning, + SubjectCollectionPossibleEnd, + WrappedGraphOrPredicateObjectList { + term: NamedOrBlankNode, + }, + WrappedGraph, + WrappedGraphPossibleEnd, + GraphName, + GraphNameAnonEnd, + Triples, + TriplesBlankNodePropertyListCurrent, + PredicateObjectList, + PredicateObjectListEnd, + PredicateObjectListPossibleContinuation, + ObjectsList, + ObjectsListEnd, + #[cfg(feature = "rdf-star")] + AnnotationEnd, + #[cfg(feature = "rdf-star")] + ObjectsListAfterAnnotation, + Verb, + Object, + ObjectBlankNodePropertyListCurrent, + ObjectBlankNodePropertyListEnd, + ObjectCollectionBeginning, + ObjectCollectionPossibleEnd, + LiteralPossibleSuffix { + value: String, + emit: bool, + }, + LiteralExpectDatatype { + value: String, + emit: bool, + }, + #[cfg(feature = "rdf-star")] + SubjectQuotedTripleEnd, + #[cfg(feature = "rdf-star")] + ObjectQuotedTripleEnd { + emit: bool, + }, + #[cfg(feature = "rdf-star")] + QuotedSubject, + #[cfg(feature = "rdf-star")] + QuotedObject, + #[cfg(feature = "rdf-star")] + QuotedAnonEnd, +} diff --git a/ng-oxigraph/src/oxttl/toolkit/error.rs b/ng-oxigraph/src/oxttl/toolkit/error.rs new file mode 100644 index 0000000..083adef --- /dev/null +++ b/ng-oxigraph/src/oxttl/toolkit/error.rs @@ -0,0 +1,97 @@ +use std::ops::Range; +use std::{fmt, io}; + +/// A position in a text i.e. a `line` number starting from 0, a `column` number starting from 0 (in number of code points) and a global file `offset` starting from 0 (in number of bytes). +#[derive(Eq, PartialEq, Debug, Clone, Copy)] +pub struct TextPosition { + pub line: u64, + pub column: u64, + pub offset: u64, +} + +/// An error in the syntax of the parsed file. +/// +/// It is composed of a message and a byte range in the input. +#[derive(Debug, thiserror::Error)] +pub struct TurtleSyntaxError { + pub(super) location: Range<TextPosition>, + pub(super) message: String, +} + +impl TurtleSyntaxError { + /// The location of the error inside of the file. + #[inline] + pub fn location(&self) -> Range<TextPosition> { + self.location.clone() + } + + /// The error message. + #[inline] + pub fn message(&self) -> &str { + &self.message + } +} + +impl fmt::Display for TurtleSyntaxError { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.location.start.offset + 1 >= self.location.end.offset { + write!( + f, + "Parser error at line {} column {}: {}", + self.location.start.line + 1, + self.location.start.column + 1, + self.message + ) + } else if self.location.start.line == self.location.end.line { + write!( + f, + "Parser error between at line {} between columns {} and column {}: {}", + self.location.start.line + 1, + self.location.start.column + 1, + self.location.end.column + 1, + self.message + ) + } else { + write!( + f, + "Parser error between line {} column {} and line {} column {}: {}", + self.location.start.line + 1, + self.location.start.column + 1, + self.location.end.line + 1, + self.location.end.column + 1, + self.message + ) + } + } +} + +impl From<TurtleSyntaxError> for io::Error { + #[inline] + fn from(error: TurtleSyntaxError) -> Self { + Self::new(io::ErrorKind::InvalidData, error) + } +} + +/// A parsing error. +/// +/// It is the union of [`TurtleSyntaxError`] and [`io::Error`]. +#[derive(Debug, thiserror::Error)] +pub enum TurtleParseError { + /// I/O error during parsing (file not found...). + #[error(transparent)] + Io(#[from] io::Error), + /// An error in the file syntax. + #[error(transparent)] + Syntax(#[from] TurtleSyntaxError), +} + +impl From<TurtleParseError> for io::Error { + #[inline] + fn from(error: TurtleParseError) -> Self { + match error { + TurtleParseError::Syntax(e) => e.into(), + TurtleParseError::Io(e) => e, + } + } +} diff --git a/ng-oxigraph/src/oxttl/toolkit/lexer.rs b/ng-oxigraph/src/oxttl/toolkit/lexer.rs new file mode 100644 index 0000000..b1835e9 --- /dev/null +++ b/ng-oxigraph/src/oxttl/toolkit/lexer.rs @@ -0,0 +1,432 @@ +use crate::oxttl::toolkit::error::{TextPosition, TurtleSyntaxError}; +use memchr::{memchr2, memchr2_iter}; +use std::borrow::Cow; +use std::cmp::min; +use std::io::{self, Read}; +use std::ops::{Range, RangeInclusive}; +use std::str; +#[cfg(feature = "async-tokio")] +use tokio::io::{AsyncRead, AsyncReadExt}; + +pub trait TokenRecognizer { + type Token<'a> + where + Self: 'a; + type Options: Default; + + fn recognize_next_token<'a>( + &mut self, + data: &'a [u8], + is_ending: bool, + config: &Self::Options, + ) -> Option<(usize, Result<Self::Token<'a>, TokenRecognizerError>)>; +} + +pub struct TokenRecognizerError { + pub location: Range<usize>, + pub message: String, +} + +impl<S: Into<String>> From<(Range<usize>, S)> for TokenRecognizerError { + fn from((location, message): (Range<usize>, S)) -> Self { + Self { + location, + message: message.into(), + } + } +} + +#[allow(clippy::range_plus_one)] +impl<S: Into<String>> From<(RangeInclusive<usize>, S)> for TokenRecognizerError { + fn from((location, message): (RangeInclusive<usize>, S)) -> Self { + (*location.start()..*location.end() + 1, message).into() + } +} + +impl<S: Into<String>> From<(usize, S)> for TokenRecognizerError { + fn from((location, message): (usize, S)) -> Self { + (location..=location, message).into() + } +} + +pub struct Lexer<R: TokenRecognizer> { + parser: R, + data: Vec<u8>, + position: Position, + previous_position: Position, // Lexer position before the last emitted token + is_ending: bool, + min_buffer_size: usize, + max_buffer_size: usize, + is_line_jump_whitespace: bool, + line_comment_start: Option<&'static [u8]>, +} + +#[derive(Clone, Copy)] +struct Position { + line_start_buffer_offset: usize, + buffer_offset: usize, + global_offset: u64, + global_line: u64, +} + +impl<R: TokenRecognizer> Lexer<R> { + pub fn new( + parser: R, + min_buffer_size: usize, + max_buffer_size: usize, + is_line_jump_whitespace: bool, + line_comment_start: Option<&'static [u8]>, + ) -> Self { + Self { + parser, + data: Vec::new(), + position: Position { + line_start_buffer_offset: 0, + buffer_offset: 0, + global_offset: 0, + global_line: 0, + }, + previous_position: Position { + line_start_buffer_offset: 0, + buffer_offset: 0, + global_offset: 0, + global_line: 0, + }, + is_ending: false, + min_buffer_size, + max_buffer_size, + is_line_jump_whitespace, + line_comment_start, + } + } + + pub fn extend_from_slice(&mut self, other: &[u8]) { + self.shrink_data(); + self.data.extend_from_slice(other); + } + + #[inline] + pub fn end(&mut self) { + self.is_ending = true; + } + + pub fn extend_from_read(&mut self, read: &mut impl Read) -> io::Result<()> { + self.shrink_data(); + if self.data.len() == self.max_buffer_size { + return Err(io::Error::new( + io::ErrorKind::OutOfMemory, + format!( + "Reached the buffer maximal size of {}", + self.max_buffer_size + ), + )); + } + let min_end = min(self.data.len() + self.min_buffer_size, self.max_buffer_size); + let new_start = self.data.len(); + self.data.resize(min_end, 0); + if self.data.len() < self.data.capacity() { + // We keep extending to have as much space as available without reallocation + self.data.resize(self.data.capacity(), 0); + } + let read = read.read(&mut self.data[new_start..])?; + self.data.truncate(new_start + read); + self.is_ending = read == 0; + Ok(()) + } + + #[cfg(feature = "async-tokio")] + pub async fn extend_from_tokio_async_read( + &mut self, + read: &mut (impl AsyncRead + Unpin), + ) -> io::Result<()> { + self.shrink_data(); + if self.data.len() == self.max_buffer_size { + return Err(io::Error::new( + io::ErrorKind::OutOfMemory, + format!( + "Reached the buffer maximal size of {}", + self.max_buffer_size + ), + )); + } + let min_end = min(self.data.len() + self.min_buffer_size, self.max_buffer_size); + let new_start = self.data.len(); + self.data.resize(min_end, 0); + if self.data.len() < self.data.capacity() { + // We keep extending to have as much space as available without reallocation + self.data.resize(self.data.capacity(), 0); + } + let read = read.read(&mut self.data[new_start..]).await?; + self.data.truncate(new_start + read); + self.is_ending = read == 0; + Ok(()) + } + + #[allow(clippy::unwrap_in_result)] + pub fn read_next( + &mut self, + options: &R::Options, + ) -> Option<Result<R::Token<'_>, TurtleSyntaxError>> { + self.skip_whitespaces_and_comments()?; + self.previous_position = self.position; + let Some((consumed, result)) = self.parser.recognize_next_token( + &self.data[self.position.buffer_offset..], + self.is_ending, + options, + ) else { + return if self.is_ending { + if self.position.buffer_offset == self.data.len() { + None // We have finished + } else { + let (new_line_jumps, new_line_start) = + Self::find_number_of_line_jumps_and_start_of_last_line( + &self.data[self.position.buffer_offset..], + ); + if new_line_jumps > 0 { + self.position.line_start_buffer_offset = + self.position.buffer_offset + new_line_start; + } + self.position.global_offset += + u64::try_from(self.data.len() - self.position.buffer_offset).unwrap(); + self.position.buffer_offset = self.data.len(); + self.position.global_line += new_line_jumps; + let new_position = TextPosition { + line: self.position.global_line, + column: Self::column_from_bytes( + &self.data[self.position.line_start_buffer_offset..], + ), + offset: self.position.global_offset, + }; + let error = TurtleSyntaxError { + location: new_position..new_position, + message: "Unexpected end of file".into(), + }; + self.position.buffer_offset = self.data.len(); // We consume everything + Some(Err(error)) + } + } else { + None + }; + }; + debug_assert!( + consumed > 0, + "The lexer must consume at least one byte each time" + ); + debug_assert!( + self.position.buffer_offset + consumed <= self.data.len(), + "The lexer tried to consumed {consumed} bytes but only {} bytes are readable", + self.data.len() - self.position.buffer_offset + ); + let (new_line_jumps, new_line_start) = + Self::find_number_of_line_jumps_and_start_of_last_line( + &self.data[self.position.buffer_offset..self.position.buffer_offset + consumed], + ); + if new_line_jumps > 0 { + self.position.line_start_buffer_offset = self.position.buffer_offset + new_line_start; + } + self.position.buffer_offset += consumed; + self.position.global_offset += u64::try_from(consumed).unwrap(); + self.position.global_line += new_line_jumps; + Some(result.map_err(|e| TurtleSyntaxError { + location: self.location_from_buffer_offset_range(e.location), + message: e.message, + })) + } + + pub fn location_from_buffer_offset_range( + &self, + offset_range: Range<usize>, + ) -> Range<TextPosition> { + let start_offset = self.previous_position.buffer_offset + offset_range.start; + let (start_extra_line_jumps, start_line_start) = + Self::find_number_of_line_jumps_and_start_of_last_line( + &self.data[self.previous_position.buffer_offset..start_offset], + ); + let start_line_start = if start_extra_line_jumps > 0 { + start_line_start + self.previous_position.buffer_offset + } else { + self.previous_position.line_start_buffer_offset + }; + let end_offset = self.previous_position.buffer_offset + offset_range.end; + let (end_extra_line_jumps, end_line_start) = + Self::find_number_of_line_jumps_and_start_of_last_line( + &self.data[self.previous_position.buffer_offset..end_offset], + ); + let end_line_start = if end_extra_line_jumps > 0 { + end_line_start + self.previous_position.buffer_offset + } else { + self.previous_position.line_start_buffer_offset + }; + TextPosition { + line: self.previous_position.global_line + start_extra_line_jumps, + column: Self::column_from_bytes(&self.data[start_line_start..start_offset]), + offset: self.previous_position.global_offset + + u64::try_from(offset_range.start).unwrap(), + }..TextPosition { + line: self.previous_position.global_line + end_extra_line_jumps, + column: Self::column_from_bytes(&self.data[end_line_start..end_offset]), + offset: self.previous_position.global_offset + u64::try_from(offset_range.end).unwrap(), + } + } + + pub fn last_token_location(&self) -> Range<TextPosition> { + TextPosition { + line: self.previous_position.global_line, + column: Self::column_from_bytes( + &self.data[self.previous_position.line_start_buffer_offset + ..self.previous_position.buffer_offset], + ), + offset: self.previous_position.global_offset, + }..TextPosition { + line: self.position.global_line, + column: Self::column_from_bytes( + &self.data[self.position.line_start_buffer_offset..self.position.buffer_offset], + ), + offset: self.position.global_offset, + } + } + + pub fn last_token_source(&self) -> Cow<'_, str> { + String::from_utf8_lossy( + &self.data[self.previous_position.buffer_offset..self.position.buffer_offset], + ) + } + + pub fn is_end(&self) -> bool { + self.is_ending && self.data.len() == self.position.buffer_offset + } + + #[allow(clippy::unwrap_in_result)] + fn skip_whitespaces_and_comments(&mut self) -> Option<()> { + loop { + self.skip_whitespaces()?; + + let buf = &self.data[self.position.buffer_offset..]; + if let Some(line_comment_start) = self.line_comment_start { + if buf.starts_with(line_comment_start) { + // Comment + if let Some(end) = memchr2(b'\r', b'\n', &buf[line_comment_start.len()..]) { + let mut end_position = line_comment_start.len() + end; + if buf.get(end_position).copied() == Some(b'\r') { + // We look for \n for Windows line end style + if let Some(c) = buf.get(end_position + 1) { + if *c == b'\n' { + end_position += 1; + } + } else if !self.is_ending { + return None; // We need to read more + } + } + let comment_size = end_position + 1; + self.position.buffer_offset += comment_size; + self.position.line_start_buffer_offset = self.position.buffer_offset; + self.position.global_offset += u64::try_from(comment_size).unwrap(); + self.position.global_line += 1; + continue; + } + if self.is_ending { + self.position.buffer_offset = self.data.len(); // EOF + return Some(()); + } + return None; // We need more data + } + } + return Some(()); + } + } + + fn skip_whitespaces(&mut self) -> Option<()> { + if self.is_line_jump_whitespace { + let mut i = self.position.buffer_offset; + while let Some(c) = self.data.get(i) { + match c { + b' ' | b'\t' => { + self.position.buffer_offset += 1; + self.position.global_offset += 1; + } + b'\r' => { + // We look for \n for Windows line end style + let mut increment: u8 = 1; + if let Some(c) = self.data.get(i + 1) { + if *c == b'\n' { + increment += 1; + i += 1; + } + } else if !self.is_ending { + return None; // We need to read more + } + self.position.buffer_offset += usize::from(increment); + self.position.line_start_buffer_offset = self.position.buffer_offset; + self.position.global_offset += u64::from(increment); + self.position.global_line += 1; + } + b'\n' => { + self.position.buffer_offset += 1; + self.position.line_start_buffer_offset = self.position.buffer_offset; + self.position.global_offset += 1; + self.position.global_line += 1; + } + _ => return Some(()), + } + i += 1; + // TODO: SIMD + } + } else { + for c in &self.data[self.position.buffer_offset..] { + if matches!(c, b' ' | b'\t') { + self.position.buffer_offset += 1; + self.position.global_offset += 1; + } else { + return Some(()); + } + // TODO: SIMD + } + } + Some(()) + } + + fn shrink_data(&mut self) { + if self.position.line_start_buffer_offset > 0 { + self.data + .copy_within(self.position.line_start_buffer_offset.., 0); + self.data + .truncate(self.data.len() - self.position.line_start_buffer_offset); + self.position.buffer_offset -= self.position.line_start_buffer_offset; + self.position.line_start_buffer_offset = 0; + self.previous_position = self.position; + } + } + + fn find_number_of_line_jumps_and_start_of_last_line(bytes: &[u8]) -> (u64, usize) { + let mut num_of_jumps = 0; + let mut last_jump_pos = 0; + let mut previous_cr = 0; + for pos in memchr2_iter(b'\r', b'\n', bytes) { + if bytes[pos] == b'\r' { + previous_cr = pos; + num_of_jumps += 1; + last_jump_pos = pos + 1; + } else { + if previous_cr < pos - 1 { + // We count \r\n as a single line jump + num_of_jumps += 1; + } + last_jump_pos = pos + 1; + } + } + (num_of_jumps, last_jump_pos) + } + + fn column_from_bytes(bytes: &[u8]) -> u64 { + match str::from_utf8(bytes) { + Ok(s) => u64::try_from(s.chars().count()).unwrap(), + Err(e) => { + if e.valid_up_to() == 0 { + 0 + } else { + Self::column_from_bytes(&bytes[..e.valid_up_to()]) + } + } + } + } +} diff --git a/ng-oxigraph/src/oxttl/toolkit/mod.rs b/ng-oxigraph/src/oxttl/toolkit/mod.rs new file mode 100644 index 0000000..10c4216 --- /dev/null +++ b/ng-oxigraph/src/oxttl/toolkit/mod.rs @@ -0,0 +1,13 @@ +//! oxttl parsing toolkit. +//! +//! Provides the basic code to write plain Rust lexers and parsers able to read files chunk by chunk. + +mod error; +mod lexer; +mod parser; + +pub use self::error::{TextPosition, TurtleParseError, TurtleSyntaxError}; +pub use self::lexer::{Lexer, TokenRecognizer, TokenRecognizerError}; +#[cfg(feature = "async-tokio")] +pub use self::parser::FromTokioAsyncReadIterator; +pub use self::parser::{FromReadIterator, Parser, RuleRecognizer, RuleRecognizerError}; diff --git a/ng-oxigraph/src/oxttl/toolkit/parser.rs b/ng-oxigraph/src/oxttl/toolkit/parser.rs new file mode 100644 index 0000000..e406096 --- /dev/null +++ b/ng-oxigraph/src/oxttl/toolkit/parser.rs @@ -0,0 +1,183 @@ +use crate::oxttl::toolkit::error::{TurtleParseError, TurtleSyntaxError}; +use crate::oxttl::toolkit::lexer::{Lexer, TokenRecognizer}; +use std::io::Read; +#[cfg(feature = "async-tokio")] +use tokio::io::AsyncRead; + +pub trait RuleRecognizer: Sized { + type TokenRecognizer: TokenRecognizer; + type Output; + type Context; + + fn error_recovery_state(self) -> Self; + + fn recognize_next( + self, + token: <Self::TokenRecognizer as TokenRecognizer>::Token<'_>, + context: &mut Self::Context, + results: &mut Vec<Self::Output>, + errors: &mut Vec<RuleRecognizerError>, + ) -> Self; + + fn recognize_end( + self, + context: &mut Self::Context, + results: &mut Vec<Self::Output>, + errors: &mut Vec<RuleRecognizerError>, + ); + + fn lexer_options( + context: &Self::Context, + ) -> &<Self::TokenRecognizer as TokenRecognizer>::Options; +} + +pub struct RuleRecognizerError { + pub message: String, +} + +impl<S: Into<String>> From<S> for RuleRecognizerError { + fn from(message: S) -> Self { + Self { + message: message.into(), + } + } +} + +#[allow(clippy::partial_pub_fields)] +pub struct Parser<RR: RuleRecognizer> { + lexer: Lexer<RR::TokenRecognizer>, + state: Option<RR>, + pub context: RR::Context, + results: Vec<RR::Output>, + errors: Vec<RuleRecognizerError>, +} + +impl<RR: RuleRecognizer> Parser<RR> { + pub fn new(lexer: Lexer<RR::TokenRecognizer>, recognizer: RR, context: RR::Context) -> Self { + Self { + lexer, + state: Some(recognizer), + context, + results: vec![], + errors: vec![], + } + } + + pub fn extend_from_slice(&mut self, other: &[u8]) { + self.lexer.extend_from_slice(other) + } + + #[inline] + pub fn end(&mut self) { + self.lexer.end() + } + + #[inline] + pub fn is_end(&self) -> bool { + self.state.is_none() && self.results.is_empty() && self.errors.is_empty() + } + + pub fn read_next(&mut self) -> Option<Result<RR::Output, TurtleSyntaxError>> { + loop { + if let Some(error) = self.errors.pop() { + return Some(Err(TurtleSyntaxError { + location: self.lexer.last_token_location(), + message: error + .message + .replace("TOKEN", &self.lexer.last_token_source()), + })); + } + if let Some(result) = self.results.pop() { + return Some(Ok(result)); + } + if let Some(result) = self.lexer.read_next(RR::lexer_options(&self.context)) { + match result { + Ok(token) => { + self.state = self.state.take().map(|state| { + state.recognize_next( + token, + &mut self.context, + &mut self.results, + &mut self.errors, + ) + }); + continue; + } + Err(e) => { + self.state = self.state.take().map(RR::error_recovery_state); + return Some(Err(e)); + } + } + } + if self.lexer.is_end() { + self.state.take()?.recognize_end( + &mut self.context, + &mut self.results, + &mut self.errors, + ) + } else { + return None; + } + } + } + + pub fn parse_read<R: Read>(self, read: R) -> FromReadIterator<R, RR> { + FromReadIterator { read, parser: self } + } + + #[cfg(feature = "async-tokio")] + pub fn parse_tokio_async_read<R: AsyncRead + Unpin>( + self, + read: R, + ) -> FromTokioAsyncReadIterator<R, RR> { + FromTokioAsyncReadIterator { read, parser: self } + } +} + +#[allow(clippy::partial_pub_fields)] +pub struct FromReadIterator<R: Read, RR: RuleRecognizer> { + read: R, + pub parser: Parser<RR>, +} + +impl<R: Read, RR: RuleRecognizer> Iterator for FromReadIterator<R, RR> { + type Item = Result<RR::Output, TurtleParseError>; + + fn next(&mut self) -> Option<Self::Item> { + while !self.parser.is_end() { + if let Some(result) = self.parser.read_next() { + return Some(result.map_err(TurtleParseError::Syntax)); + } + if let Err(e) = self.parser.lexer.extend_from_read(&mut self.read) { + return Some(Err(e.into())); + } + } + None + } +} + +#[cfg(feature = "async-tokio")] +pub struct FromTokioAsyncReadIterator<R: AsyncRead + Unpin, RR: RuleRecognizer> { + pub read: R, + pub parser: Parser<RR>, +} + +#[cfg(feature = "async-tokio")] +impl<R: AsyncRead + Unpin, RR: RuleRecognizer> FromTokioAsyncReadIterator<R, RR> { + pub async fn next(&mut self) -> Option<Result<RR::Output, TurtleParseError>> { + while !self.parser.is_end() { + if let Some(result) = self.parser.read_next() { + return Some(result.map_err(TurtleParseError::Syntax)); + } + if let Err(e) = self + .parser + .lexer + .extend_from_tokio_async_read(&mut self.read) + .await + { + return Some(Err(e.into())); + } + } + None + } +} diff --git a/ng-oxigraph/src/oxttl/trig.rs b/ng-oxigraph/src/oxttl/trig.rs new file mode 100644 index 0000000..7a51396 --- /dev/null +++ b/ng-oxigraph/src/oxttl/trig.rs @@ -0,0 +1,1252 @@ +//! A [TriG](https://www.w3.org/TR/trig/) streaming parser implemented by [`TriGParser`] +//! and a serializer implemented by [`TriGSerializer`]. + +use crate::oxrdf::vocab::{rdf, xsd}; +use crate::oxrdf::{ + GraphName, GraphNameRef, LiteralRef, NamedNode, NamedNodeRef, Quad, QuadRef, Subject, TermRef, +}; +use crate::oxttl::lexer::N3Lexer; +use crate::oxttl::terse::TriGRecognizer; +#[cfg(feature = "async-tokio")] +use crate::oxttl::toolkit::FromTokioAsyncReadIterator; +use crate::oxttl::toolkit::{FromReadIterator, Parser, TurtleParseError, TurtleSyntaxError}; +use oxiri::{Iri, IriParseError}; +use std::collections::hash_map::Iter; +use std::collections::{BTreeMap, HashMap}; +use std::fmt; +use std::io::{self, Read, Write}; +#[cfg(feature = "async-tokio")] +use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt}; + +/// A [TriG](https://www.w3.org/TR/trig/) streaming parser. +/// +/// Support for [TriG-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#trig-star) is available behind the `rdf-star` feature and the [`TriGParser::with_quoted_triples`] option. +/// +/// Count the number of people: +/// ``` +/// use oxrdf::vocab::rdf; +/// use oxrdf::NamedNodeRef; +/// use oxttl::TriGParser; +/// +/// let file = br#"@base <http://example.com/> . +/// @prefix schema: <http://schema.org/> . +/// <foo> a schema:Person ; +/// schema:name "Foo" . +/// <bar> a schema:Person ; +/// schema:name "Bar" ."#; +/// +/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; +/// let mut count = 0; +/// for quad in TriGParser::new().parse_read(file.as_ref()) { +/// let quad = quad?; +/// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() { +/// count += 1; +/// } +/// } +/// assert_eq!(2, count); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +#[derive(Default)] +#[must_use] +pub struct TriGParser { + unchecked: bool, + base: Option<Iri<String>>, + prefixes: HashMap<String, Iri<String>>, + #[cfg(feature = "rdf-star")] + with_quoted_triples: bool, +} + +impl TriGParser { + /// Builds a new [`TriGParser`]. + #[inline] + pub fn new() -> Self { + Self::default() + } + + /// Assumes the file is valid to make parsing faster. + /// + /// It will skip some validations. + /// + /// Note that if the file is actually not valid, then broken RDF might be emitted by the parser. + #[inline] + pub fn unchecked(mut self) -> Self { + self.unchecked = true; + self + } + + #[inline] + pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> { + self.base = Some(Iri::parse(base_iri.into())?); + Ok(self) + } + + #[inline] + pub fn with_prefix( + mut self, + prefix_name: impl Into<String>, + prefix_iri: impl Into<String>, + ) -> Result<Self, IriParseError> { + self.prefixes + .insert(prefix_name.into(), Iri::parse(prefix_iri.into())?); + Ok(self) + } + + /// Enables [TriG-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#trig-star). + #[cfg(feature = "rdf-star")] + #[inline] + pub fn with_quoted_triples(mut self) -> Self { + self.with_quoted_triples = true; + self + } + + /// Parses a TriG file from a [`Read`] implementation. + /// + /// Count the number of people: + /// ``` + /// use oxrdf::vocab::rdf; + /// use oxrdf::NamedNodeRef; + /// use oxttl::TriGParser; + /// + /// let file = br#"@base <http://example.com/> . + /// @prefix schema: <http://schema.org/> . + /// <foo> a schema:Person ; + /// schema:name "Foo" . + /// <bar> a schema:Person ; + /// schema:name "Bar" ."#; + /// + /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; + /// let mut count = 0; + /// for quad in TriGParser::new().parse_read(file.as_ref()) { + /// let quad = quad?; + /// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() { + /// count += 1; + /// } + /// } + /// assert_eq!(2, count); + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + pub fn parse_read<R: Read>(self, read: R) -> FromReadTriGReader<R> { + FromReadTriGReader { + inner: self.parse().parser.parse_read(read), + } + } + + /// Parses a TriG file from a [`AsyncRead`] implementation. + /// + /// Count the number of people: + /// ``` + /// use oxrdf::vocab::rdf; + /// use oxrdf::NamedNodeRef; + /// use oxttl::TriGParser; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> Result<(), oxttl::TurtleParseError> { + /// let file = br#"@base <http://example.com/> . + /// @prefix schema: <http://schema.org/> . + /// <foo> a schema:Person ; + /// schema:name "Foo" . + /// <bar> a schema:Person ; + /// schema:name "Bar" ."#; + /// + /// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person"); + /// let mut count = 0; + /// let mut parser = TriGParser::new().parse_tokio_async_read(file.as_ref()); + /// while let Some(triple) = parser.next().await { + /// let triple = triple?; + /// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { + /// count += 1; + /// } + /// } + /// assert_eq!(2, count); + /// # Ok(()) + /// # } + /// ``` + #[cfg(feature = "async-tokio")] + pub fn parse_tokio_async_read<R: AsyncRead + Unpin>( + self, + read: R, + ) -> FromTokioAsyncReadTriGReader<R> { + FromTokioAsyncReadTriGReader { + inner: self.parse().parser.parse_tokio_async_read(read), + } + } + + /// Allows to parse a TriG file by using a low-level API. + /// + /// Count the number of people: + /// ``` + /// use oxrdf::vocab::rdf; + /// use oxrdf::NamedNodeRef; + /// use oxttl::TriGParser; + /// + /// let file: [&[u8]; 5] = [ + /// b"@base <http://example.com/>", + /// b". @prefix schema: <http://schema.org/> .", + /// b"<foo> a schema:Person", + /// b" ; schema:name \"Foo\" . <bar>", + /// b" a schema:Person ; schema:name \"Bar\" .", + /// ]; + /// + /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; + /// let mut count = 0; + /// let mut parser = TriGParser::new().parse(); + /// let mut file_chunks = file.iter(); + /// while !parser.is_end() { + /// // We feed more data to the parser + /// if let Some(chunk) = file_chunks.next() { + /// parser.extend_from_slice(chunk); + /// } else { + /// parser.end(); // It's finished + /// } + /// // We read as many quads from the parser as possible + /// while let Some(quad) = parser.read_next() { + /// let quad = quad?; + /// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() { + /// count += 1; + /// } + /// } + /// } + /// assert_eq!(2, count); + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + pub fn parse(self) -> LowLevelTriGReader { + LowLevelTriGReader { + parser: TriGRecognizer::new_parser( + true, + #[cfg(feature = "rdf-star")] + self.with_quoted_triples, + self.unchecked, + self.base, + self.prefixes, + ), + } + } +} + +/// Parses a TriG file from a [`Read`] implementation. Can be built using [`TriGParser::parse_read`]. +/// +/// Count the number of people: +/// ``` +/// use oxrdf::vocab::rdf; +/// use oxrdf::NamedNodeRef; +/// use oxttl::TriGParser; +/// +/// let file = br#"@base <http://example.com/> . +/// @prefix schema: <http://schema.org/> . +/// <foo> a schema:Person ; +/// schema:name "Foo" . +/// <bar> a schema:Person ; +/// schema:name "Bar" ."#; +/// +/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; +/// let mut count = 0; +/// for quad in TriGParser::new().parse_read(file.as_ref()) { +/// let quad = quad?; +/// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() { +/// count += 1; +/// } +/// } +/// assert_eq!(2, count); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +#[must_use] +pub struct FromReadTriGReader<R: Read> { + inner: FromReadIterator<R, TriGRecognizer>, +} + +impl<R: Read> FromReadTriGReader<R> { + /// The list of IRI prefixes considered at the current step of the parsing. + /// + /// This method returns (prefix name, prefix value) tuples. + /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered. + /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned). + /// + /// ``` + /// use oxttl::TriGParser; + /// + /// let file = br#"@base <http://example.com/> . + /// @prefix schema: <http://schema.org/> . + /// <foo> a schema:Person ; + /// schema:name "Foo" ."#; + /// + /// let mut reader = TriGParser::new().parse_read(file.as_ref()); + /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning + /// + /// reader.next().unwrap()?; // We read the first triple + /// assert_eq!( + /// reader.prefixes().collect::<Vec<_>>(), + /// [("schema", "http://schema.org/")] + /// ); // There are now prefixes + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + pub fn prefixes(&self) -> TriGPrefixesIter<'_> { + TriGPrefixesIter { + inner: self.inner.parser.context.prefixes(), + } + } + + /// The base IRI considered at the current step of the parsing. + /// + /// ``` + /// use oxttl::TriGParser; + /// + /// let file = br#"@base <http://example.com/> . + /// @prefix schema: <http://schema.org/> . + /// <foo> a schema:Person ; + /// schema:name "Foo" ."#; + /// + /// let mut reader = TriGParser::new().parse_read(file.as_ref()); + /// assert!(reader.base_iri().is_none()); // No base at the beginning because none has been given to the parser. + /// + /// reader.next().unwrap()?; // We read the first triple + /// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI. + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + pub fn base_iri(&self) -> Option<&str> { + self.inner + .parser + .context + .lexer_options + .base_iri + .as_ref() + .map(Iri::as_str) + } +} + +impl<R: Read> Iterator for FromReadTriGReader<R> { + type Item = Result<Quad, TurtleParseError>; + + fn next(&mut self) -> Option<Self::Item> { + self.inner.next() + } +} + +/// Parses a TriG file from a [`AsyncRead`] implementation. Can be built using [`TriGParser::parse_tokio_async_read`]. +/// +/// Count the number of people: +/// ``` +/// use oxrdf::vocab::rdf; +/// use oxrdf::NamedNodeRef; +/// use oxttl::TriGParser; +/// +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() -> Result<(), oxttl::TurtleParseError> { +/// let file = br#"@base <http://example.com/> . +/// @prefix schema: <http://schema.org/> . +/// <foo> a schema:Person ; +/// schema:name "Foo" . +/// <bar> a schema:Person ; +/// schema:name "Bar" ."#; +/// +/// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person"); +/// let mut count = 0; +/// let mut parser = TriGParser::new().parse_tokio_async_read(file.as_ref()); +/// while let Some(triple) = parser.next().await { +/// let triple = triple?; +/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { +/// count += 1; +/// } +/// } +/// assert_eq!(2, count); +/// # Ok(()) +/// # } +/// ``` +#[cfg(feature = "async-tokio")] +#[must_use] +pub struct FromTokioAsyncReadTriGReader<R: AsyncRead + Unpin> { + inner: FromTokioAsyncReadIterator<R, TriGRecognizer>, +} + +#[cfg(feature = "async-tokio")] +impl<R: AsyncRead + Unpin> FromTokioAsyncReadTriGReader<R> { + /// Reads the next triple or returns `None` if the file is finished. + pub async fn next(&mut self) -> Option<Result<Quad, TurtleParseError>> { + Some(self.inner.next().await?.map(Into::into)) + } + + /// The list of IRI prefixes considered at the current step of the parsing. + /// + /// This method returns (prefix name, prefix value) tuples. + /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered. + /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned). + /// + /// ``` + /// use oxttl::TriGParser; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> Result<(), oxttl::TurtleParseError> { + /// let file = br#"@base <http://example.com/> . + /// @prefix schema: <http://schema.org/> . + /// <foo> a schema:Person ; + /// schema:name "Foo" ."#; + /// + /// let mut reader = TriGParser::new().parse_tokio_async_read(file.as_ref()); + /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning + /// + /// reader.next().await.unwrap()?; // We read the first triple + /// assert_eq!( + /// reader.prefixes().collect::<Vec<_>>(), + /// [("schema", "http://schema.org/")] + /// ); // There are now prefixes + /// # Ok(()) + /// # } + /// ``` + pub fn prefixes(&self) -> TriGPrefixesIter<'_> { + TriGPrefixesIter { + inner: self.inner.parser.context.prefixes(), + } + } + + /// The base IRI considered at the current step of the parsing. + /// + /// ``` + /// use oxttl::TriGParser; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> Result<(), oxttl::TurtleParseError> { + /// let file = br#"@base <http://example.com/> . + /// @prefix schema: <http://schema.org/> . + /// <foo> a schema:Person ; + /// schema:name "Foo" ."#; + /// + /// let mut reader = TriGParser::new().parse_tokio_async_read(file.as_ref()); + /// assert!(reader.base_iri().is_none()); // No base IRI at the beginning + /// + /// reader.next().await.unwrap()?; // We read the first triple + /// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI + /// # Ok(()) + /// # } + /// ``` + pub fn base_iri(&self) -> Option<&str> { + self.inner + .parser + .context + .lexer_options + .base_iri + .as_ref() + .map(Iri::as_str) + } +} + +/// Parses a TriG file by using a low-level API. Can be built using [`TriGParser::parse`]. +/// +/// Count the number of people: +/// ``` +/// use oxrdf::vocab::rdf; +/// use oxrdf::NamedNodeRef; +/// use oxttl::TriGParser; +/// +/// let file: [&[u8]; 5] = [ +/// b"@base <http://example.com/>", +/// b". @prefix schema: <http://schema.org/> .", +/// b"<foo> a schema:Person", +/// b" ; schema:name \"Foo\" . <bar>", +/// b" a schema:Person ; schema:name \"Bar\" .", +/// ]; +/// +/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; +/// let mut count = 0; +/// let mut parser = TriGParser::new().parse(); +/// let mut file_chunks = file.iter(); +/// while !parser.is_end() { +/// // We feed more data to the parser +/// if let Some(chunk) = file_chunks.next() { +/// parser.extend_from_slice(chunk); +/// } else { +/// parser.end(); // It's finished +/// } +/// // We read as many quads from the parser as possible +/// while let Some(quad) = parser.read_next() { +/// let quad = quad?; +/// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() { +/// count += 1; +/// } +/// } +/// } +/// assert_eq!(2, count); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +pub struct LowLevelTriGReader { + parser: Parser<TriGRecognizer>, +} + +impl LowLevelTriGReader { + /// Adds some extra bytes to the parser. Should be called when [`read_next`](Self::read_next) returns [`None`] and there is still unread data. + pub fn extend_from_slice(&mut self, other: &[u8]) { + self.parser.extend_from_slice(other) + } + + /// Tell the parser that the file is finished. + /// + /// This triggers the parsing of the final bytes and might lead [`read_next`](Self::read_next) to return some extra values. + pub fn end(&mut self) { + self.parser.end() + } + + /// Returns if the parsing is finished i.e. [`end`](Self::end) has been called and [`read_next`](Self::read_next) is always going to return `None`. + pub fn is_end(&self) -> bool { + self.parser.is_end() + } + + /// Attempt to parse a new quad from the already provided data. + /// + /// Returns [`None`] if the parsing is finished or more data is required. + /// If it is the case more data should be fed using [`extend_from_slice`](Self::extend_from_slice). + pub fn read_next(&mut self) -> Option<Result<Quad, TurtleSyntaxError>> { + self.parser.read_next() + } + + /// The list of IRI prefixes considered at the current step of the parsing. + /// + /// This method returns (prefix name, prefix value) tuples. + /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered. + /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned). + /// + /// ``` + /// use oxttl::TriGParser; + /// + /// let file = br#"@base <http://example.com/> . + /// @prefix schema: <http://schema.org/> . + /// <foo> a schema:Person ; + /// schema:name "Foo" ."#; + /// + /// let mut reader = TriGParser::new().parse(); + /// reader.extend_from_slice(file); + /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning + /// + /// reader.read_next().unwrap()?; // We read the first triple + /// assert_eq!( + /// reader.prefixes().collect::<Vec<_>>(), + /// [("schema", "http://schema.org/")] + /// ); // There are now prefixes + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + pub fn prefixes(&self) -> TriGPrefixesIter<'_> { + TriGPrefixesIter { + inner: self.parser.context.prefixes(), + } + } + + /// The base IRI considered at the current step of the parsing. + /// + /// ``` + /// use oxttl::TriGParser; + /// + /// let file = br#"@base <http://example.com/> . + /// @prefix schema: <http://schema.org/> . + /// <foo> a schema:Person ; + /// schema:name "Foo" ."#; + /// + /// let mut reader = TriGParser::new().parse(); + /// reader.extend_from_slice(file); + /// assert!(reader.base_iri().is_none()); // No base IRI at the beginning + /// + /// reader.read_next().unwrap()?; // We read the first triple + /// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + pub fn base_iri(&self) -> Option<&str> { + self.parser + .context + .lexer_options + .base_iri + .as_ref() + .map(Iri::as_str) + } +} + +/// Iterator on the file prefixes. +/// +/// See [`LowLevelTriGReader::prefixes`]. +pub struct TriGPrefixesIter<'a> { + inner: Iter<'a, String, Iri<String>>, +} + +impl<'a> Iterator for TriGPrefixesIter<'a> { + type Item = (&'a str, &'a str); + + #[inline] + fn next(&mut self) -> Option<Self::Item> { + let (key, value) = self.inner.next()?; + Some((key.as_str(), value.as_str())) + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + self.inner.size_hint() + } +} + +/// A [TriG](https://www.w3.org/TR/trig/) serializer. +/// +/// Support for [TriG-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#trig-star) is available behind the `rdf-star` feature. +/// +/// ``` +/// use oxrdf::{NamedNodeRef, QuadRef}; +/// use oxttl::TriGSerializer; +/// +/// let mut writer = TriGSerializer::new() +/// .with_prefix("schema", "http://schema.org/")? +/// .serialize_to_write(Vec::new()); +/// writer.write_quad(QuadRef::new( +/// NamedNodeRef::new("http://example.com#me")?, +/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, +/// NamedNodeRef::new("http://schema.org/Person")?, +/// NamedNodeRef::new("http://example.com")?, +/// ))?; +/// assert_eq!( +/// b"@prefix schema: <http://schema.org/> .\n<http://example.com> {\n\t<http://example.com#me> a schema:Person .\n}\n", +/// writer.finish()?.as_slice() +/// ); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +#[derive(Default)] +#[must_use] +pub struct TriGSerializer { + prefixes: BTreeMap<String, String>, +} + +impl TriGSerializer { + /// Builds a new [`TriGSerializer`]. + #[inline] + pub fn new() -> Self { + Self { + prefixes: BTreeMap::new(), + } + } + + #[inline] + pub fn with_prefix( + mut self, + prefix_name: impl Into<String>, + prefix_iri: impl Into<String>, + ) -> Result<Self, IriParseError> { + self.prefixes.insert( + Iri::parse(prefix_iri.into())?.into_inner(), + prefix_name.into(), + ); + Ok(self) + } + + /// Writes a TriG file to a [`Write`] implementation. + /// + /// ``` + /// use oxrdf::{NamedNodeRef, QuadRef}; + /// use oxttl::TriGSerializer; + /// + /// let mut writer = TriGSerializer::new() + /// .with_prefix("schema", "http://schema.org/")? + /// .serialize_to_write(Vec::new()); + /// writer.write_quad(QuadRef::new( + /// NamedNodeRef::new("http://example.com#me")?, + /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, + /// NamedNodeRef::new("http://schema.org/Person")?, + /// NamedNodeRef::new("http://example.com")?, + /// ))?; + /// assert_eq!( + /// b"@prefix schema: <http://schema.org/> .\n<http://example.com> {\n\t<http://example.com#me> a schema:Person .\n}\n", + /// writer.finish()?.as_slice() + /// ); + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + pub fn serialize_to_write<W: Write>(self, write: W) -> ToWriteTriGWriter<W> { + ToWriteTriGWriter { + write, + writer: self.serialize(), + } + } + + /// Writes a TriG file to a [`AsyncWrite`] implementation. + /// + /// ``` + /// use oxrdf::{NamedNodeRef, QuadRef}; + /// use oxttl::TriGSerializer; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> Result<(), Box<dyn std::error::Error>> { + /// let mut writer = TriGSerializer::new() + /// .with_prefix("schema", "http://schema.org/")? + /// .serialize_to_tokio_async_write(Vec::new()); + /// writer + /// .write_quad(QuadRef::new( + /// NamedNodeRef::new_unchecked("http://example.com#me"), + /// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), + /// NamedNodeRef::new_unchecked("http://schema.org/Person"), + /// NamedNodeRef::new_unchecked("http://example.com"), + /// )) + /// .await?; + /// assert_eq!( + /// b"@prefix schema: <http://schema.org/> .\n<http://example.com> {\n\t<http://example.com#me> a schema:Person .\n}\n", + /// writer.finish().await?.as_slice() + /// ); + /// # Ok(()) + /// # } + /// ``` + #[cfg(feature = "async-tokio")] + pub fn serialize_to_tokio_async_write<W: AsyncWrite + Unpin>( + self, + write: W, + ) -> ToTokioAsyncWriteTriGWriter<W> { + ToTokioAsyncWriteTriGWriter { + write, + writer: self.serialize(), + buffer: Vec::new(), + } + } + + /// Builds a low-level TriG writer. + /// + /// ``` + /// use oxrdf::{NamedNodeRef, QuadRef}; + /// use oxttl::TriGSerializer; + /// + /// let mut buf = Vec::new(); + /// let mut writer = TriGSerializer::new() + /// .with_prefix("schema", "http://schema.org/")? + /// .serialize(); + /// writer.write_quad( + /// QuadRef::new( + /// NamedNodeRef::new("http://example.com#me")?, + /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, + /// NamedNodeRef::new("http://schema.org/Person")?, + /// NamedNodeRef::new("http://example.com")?, + /// ), + /// &mut buf, + /// )?; + /// writer.finish(&mut buf)?; + /// assert_eq!( + /// b"@prefix schema: <http://schema.org/> .\n<http://example.com> {\n\t<http://example.com#me> a schema:Person .\n}\n", + /// buf.as_slice() + /// ); + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + pub fn serialize(self) -> LowLevelTriGWriter { + LowLevelTriGWriter { + prefixes: self.prefixes, + prelude_written: false, + current_graph_name: GraphName::DefaultGraph, + current_subject_predicate: None, + } + } +} + +/// Writes a TriG file to a [`Write`] implementation. Can be built using [`TriGSerializer::serialize_to_write`]. +/// +/// ``` +/// use oxrdf::{NamedNodeRef, QuadRef}; +/// use oxttl::TriGSerializer; +/// +/// let mut writer = TriGSerializer::new() +/// .with_prefix("schema", "http://schema.org/")? +/// .serialize_to_write(Vec::new()); +/// writer.write_quad(QuadRef::new( +/// NamedNodeRef::new("http://example.com#me")?, +/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, +/// NamedNodeRef::new("http://schema.org/Person")?, +/// NamedNodeRef::new("http://example.com")?, +/// ))?; +/// assert_eq!( +/// b"@prefix schema: <http://schema.org/> .\n<http://example.com> {\n\t<http://example.com#me> a schema:Person .\n}\n", +/// writer.finish()?.as_slice() +/// ); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +#[must_use] +pub struct ToWriteTriGWriter<W: Write> { + write: W, + writer: LowLevelTriGWriter, +} + +impl<W: Write> ToWriteTriGWriter<W> { + /// Writes an extra quad. + pub fn write_quad<'a>(&mut self, q: impl Into<QuadRef<'a>>) -> io::Result<()> { + self.writer.write_quad(q, &mut self.write) + } + + /// Ends the write process and returns the underlying [`Write`]. + pub fn finish(mut self) -> io::Result<W> { + self.writer.finish(&mut self.write)?; + Ok(self.write) + } +} + +/// Writes a TriG file to a [`AsyncWrite`] implementation. Can be built using [`TriGSerializer::serialize_to_tokio_async_write`]. +/// +/// ``` +/// use oxrdf::{NamedNodeRef, QuadRef}; +/// use oxttl::TriGSerializer; +/// +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() -> Result<(), Box<dyn std::error::Error>> { +/// let mut writer = TriGSerializer::new() +/// .with_prefix("schema", "http://schema.org/")? +/// .serialize_to_tokio_async_write(Vec::new()); +/// writer +/// .write_quad(QuadRef::new( +/// NamedNodeRef::new_unchecked("http://example.com#me"), +/// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), +/// NamedNodeRef::new_unchecked("http://schema.org/Person"), +/// NamedNodeRef::new_unchecked("http://example.com"), +/// )) +/// .await?; +/// assert_eq!( +/// b"@prefix schema: <http://schema.org/> .\n<http://example.com> {\n\t<http://example.com#me> a schema:Person .\n}\n", +/// writer.finish().await?.as_slice() +/// ); +/// # Ok(()) +/// # } +/// ``` +#[cfg(feature = "async-tokio")] +#[must_use] +pub struct ToTokioAsyncWriteTriGWriter<W: AsyncWrite + Unpin> { + write: W, + writer: LowLevelTriGWriter, + buffer: Vec<u8>, +} + +#[cfg(feature = "async-tokio")] +impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteTriGWriter<W> { + /// Writes an extra quad. + pub async fn write_quad<'a>(&mut self, q: impl Into<QuadRef<'a>>) -> io::Result<()> { + self.writer.write_quad(q, &mut self.buffer)?; + self.write.write_all(&self.buffer).await?; + self.buffer.clear(); + Ok(()) + } + + /// Ends the write process and returns the underlying [`Write`]. + pub async fn finish(mut self) -> io::Result<W> { + self.writer.finish(&mut self.buffer)?; + self.write.write_all(&self.buffer).await?; + self.buffer.clear(); + Ok(self.write) + } +} + +/// Writes a TriG file by using a low-level API. Can be built using [`TriGSerializer::serialize`]. +/// +/// ``` +/// use oxrdf::{NamedNodeRef, QuadRef}; +/// use oxttl::TriGSerializer; +/// +/// let mut buf = Vec::new(); +/// let mut writer = TriGSerializer::new() +/// .with_prefix("schema", "http://schema.org/")? +/// .serialize(); +/// writer.write_quad( +/// QuadRef::new( +/// NamedNodeRef::new("http://example.com#me")?, +/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, +/// NamedNodeRef::new("http://schema.org/Person")?, +/// NamedNodeRef::new("http://example.com")?, +/// ), +/// &mut buf, +/// )?; +/// writer.finish(&mut buf)?; +/// assert_eq!( +/// b"@prefix schema: <http://schema.org/> .\n<http://example.com> {\n\t<http://example.com#me> a schema:Person .\n}\n", +/// buf.as_slice() +/// ); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +pub struct LowLevelTriGWriter { + prefixes: BTreeMap<String, String>, + prelude_written: bool, + current_graph_name: GraphName, + current_subject_predicate: Option<(Subject, NamedNode)>, +} + +impl LowLevelTriGWriter { + /// Writes an extra quad. + pub fn write_quad<'a>( + &mut self, + q: impl Into<QuadRef<'a>>, + mut write: impl Write, + ) -> io::Result<()> { + if !self.prelude_written { + self.prelude_written = true; + for (prefix_iri, prefix_name) in &self.prefixes { + writeln!(write, "@prefix {prefix_name}: <{prefix_iri}> .")?; + } + } + let q = q.into(); + if q.graph_name == self.current_graph_name.as_ref() { + if let Some((current_subject, current_predicate)) = + self.current_subject_predicate.take() + { + if q.subject == current_subject.as_ref() { + if q.predicate == current_predicate { + self.current_subject_predicate = Some((current_subject, current_predicate)); + write!(write, " , {}", self.term(q.object)) + } else { + self.current_subject_predicate = + Some((current_subject, q.predicate.into_owned())); + writeln!(write, " ;")?; + if !self.current_graph_name.is_default_graph() { + write!(write, "\t")?; + } + write!( + write, + "\t{} {}", + self.predicate(q.predicate), + self.term(q.object) + ) + } + } else { + self.current_subject_predicate = + Some((q.subject.into_owned(), q.predicate.into_owned())); + writeln!(write, " .")?; + if !self.current_graph_name.is_default_graph() { + write!(write, "\t")?; + } + write!( + write, + "{} {} {}", + self.term(q.subject), + self.predicate(q.predicate), + self.term(q.object) + ) + } + } else { + self.current_subject_predicate = + Some((q.subject.into_owned(), q.predicate.into_owned())); + if !self.current_graph_name.is_default_graph() { + write!(write, "\t")?; + } + write!( + write, + "{} {} {}", + self.term(q.subject), + self.predicate(q.predicate), + self.term(q.object) + ) + } + } else { + if self.current_subject_predicate.is_some() { + writeln!(write, " .")?; + } + if !self.current_graph_name.is_default_graph() { + writeln!(write, "}}")?; + } + self.current_graph_name = q.graph_name.into_owned(); + self.current_subject_predicate = + Some((q.subject.into_owned(), q.predicate.into_owned())); + match self.current_graph_name.as_ref() { + GraphNameRef::NamedNode(g) => { + writeln!(write, "{} {{", self.term(g))?; + write!(write, "\t")?; + } + GraphNameRef::BlankNode(g) => { + writeln!(write, "{} {{", self.term(g))?; + write!(write, "\t")?; + } + GraphNameRef::DefaultGraph => (), + } + + write!( + write, + "{} {} {}", + self.term(q.subject), + self.predicate(q.predicate), + self.term(q.object) + ) + } + } + + fn predicate<'a>(&'a self, named_node: impl Into<NamedNodeRef<'a>>) -> TurtlePredicate<'a> { + TurtlePredicate { + named_node: named_node.into(), + prefixes: &self.prefixes, + } + } + + fn term<'a>(&'a self, term: impl Into<TermRef<'a>>) -> TurtleTerm<'a> { + TurtleTerm { + term: term.into(), + prefixes: &self.prefixes, + } + } + + /// Finishes to write the file. + pub fn finish(&mut self, mut write: impl Write) -> io::Result<()> { + if self.current_subject_predicate.is_some() { + writeln!(write, " .")?; + } + if !self.current_graph_name.is_default_graph() { + writeln!(write, "}}")?; + } + Ok(()) + } +} + +struct TurtlePredicate<'a> { + named_node: NamedNodeRef<'a>, + prefixes: &'a BTreeMap<String, String>, +} + +impl<'a> fmt::Display for TurtlePredicate<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.named_node == rdf::TYPE { + f.write_str("a") + } else { + TurtleTerm { + term: self.named_node.into(), + prefixes: self.prefixes, + } + .fmt(f) + } + } +} + +struct TurtleTerm<'a> { + term: TermRef<'a>, + prefixes: &'a BTreeMap<String, String>, +} + +impl<'a> fmt::Display for TurtleTerm<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.term { + TermRef::NamedNode(v) => { + for (prefix_iri, prefix_name) in self.prefixes { + if let Some(local_name) = v.as_str().strip_prefix(prefix_iri) { + if let Some(escaped_local_name) = escape_local_name(local_name) { + return write!(f, "{prefix_name}:{escaped_local_name}"); + } + } + } + write!(f, "{v}") + } + TermRef::BlankNode(v) => write!(f, "{v}"), + TermRef::Literal(v) => { + let value = v.value(); + let inline = match v.datatype() { + xsd::BOOLEAN => is_turtle_boolean(value), + xsd::INTEGER => is_turtle_integer(value), + xsd::DECIMAL => is_turtle_decimal(value), + xsd::DOUBLE => is_turtle_double(value), + _ => false, + }; + if inline { + f.write_str(value) + } else if v.is_plain() { + write!(f, "{v}") + } else { + write!( + f, + "{}^^{}", + LiteralRef::new_simple_literal(v.value()), + TurtleTerm { + term: v.datatype().into(), + prefixes: self.prefixes + } + ) + } + } + #[cfg(feature = "rdf-star")] + TermRef::Triple(t) => { + write!( + f, + "<< {} {} {} >>", + TurtleTerm { + term: t.subject.as_ref().into(), + prefixes: self.prefixes + }, + TurtleTerm { + term: t.predicate.as_ref().into(), + prefixes: self.prefixes + }, + TurtleTerm { + term: t.object.as_ref(), + prefixes: self.prefixes + } + ) + } + } + } +} + +fn is_turtle_boolean(value: &str) -> bool { + matches!(value, "true" | "false") +} + +fn is_turtle_integer(value: &str) -> bool { + // [19] INTEGER ::= [+-]? [0-9]+ + let mut value = value.as_bytes(); + if let Some(v) = value.strip_prefix(b"+") { + value = v; + } else if let Some(v) = value.strip_prefix(b"-") { + value = v; + } + !value.is_empty() && value.iter().all(u8::is_ascii_digit) +} + +fn is_turtle_decimal(value: &str) -> bool { + // [20] DECIMAL ::= [+-]? [0-9]* '.' [0-9]+ + let mut value = value.as_bytes(); + if let Some(v) = value.strip_prefix(b"+") { + value = v; + } else if let Some(v) = value.strip_prefix(b"-") { + value = v; + } + while value.first().map_or(false, u8::is_ascii_digit) { + value = &value[1..]; + } + let Some(value) = value.strip_prefix(b".") else { + return false; + }; + !value.is_empty() && value.iter().all(u8::is_ascii_digit) +} + +fn is_turtle_double(value: &str) -> bool { + // [21] DOUBLE ::= [+-]? ([0-9]+ '.' [0-9]* EXPONENT | '.' [0-9]+ EXPONENT | [0-9]+ EXPONENT) + // [154s] EXPONENT ::= [eE] [+-]? [0-9]+ + let mut value = value.as_bytes(); + if let Some(v) = value.strip_prefix(b"+") { + value = v; + } else if let Some(v) = value.strip_prefix(b"-") { + value = v; + } + let mut with_before = false; + while value.first().map_or(false, u8::is_ascii_digit) { + value = &value[1..]; + with_before = true; + } + let mut with_after = false; + if let Some(v) = value.strip_prefix(b".") { + value = v; + while value.first().map_or(false, u8::is_ascii_digit) { + value = &value[1..]; + with_after = true; + } + } + if let Some(v) = value.strip_prefix(b"e") { + value = v; + } else if let Some(v) = value.strip_prefix(b"E") { + value = v; + } else { + return false; + } + if let Some(v) = value.strip_prefix(b"+") { + value = v; + } else if let Some(v) = value.strip_prefix(b"-") { + value = v; + } + (with_before || with_after) && !value.is_empty() && value.iter().all(u8::is_ascii_digit) +} + +fn escape_local_name(value: &str) -> Option<String> { + // TODO: PLX + // [168s] PN_LOCAL ::= (PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))? + let mut output = String::with_capacity(value.len()); + let mut chars = value.chars(); + let first = chars.next()?; + if N3Lexer::is_possible_pn_chars_u(first) || first == ':' || first.is_ascii_digit() { + output.push(first); + } else if can_be_escaped_in_local_name(first) { + output.push('\\'); + output.push(first); + } else { + return None; + } + + while let Some(c) = chars.next() { + if N3Lexer::is_possible_pn_chars(c) || c == ':' || (c == '.' && !chars.as_str().is_empty()) + { + output.push(c); + } else if can_be_escaped_in_local_name(c) { + output.push('\\'); + output.push(c); + } else { + return None; + } + } + + Some(output) +} + +fn can_be_escaped_in_local_name(c: char) -> bool { + matches!( + c, + '_' | '~' + | '.' + | '-' + | '!' + | '$' + | '&' + | '\'' + | '(' + | ')' + | '*' + | '+' + | ',' + | ';' + | '=' + | '/' + | '?' + | '#' + | '@' + | '%' + ) +} + +#[cfg(test)] +#[allow(clippy::panic_in_result_fn)] +mod tests { + use super::*; + use crate::oxrdf::BlankNodeRef; + + #[test] + fn test_write() -> io::Result<()> { + let mut writer = TriGSerializer::new() + .with_prefix("ex", "http://example.com/") + .unwrap() + .serialize_to_write(Vec::new()); + writer.write_quad(QuadRef::new( + NamedNodeRef::new_unchecked("http://example.com/s"), + NamedNodeRef::new_unchecked("http://example.com/p"), + NamedNodeRef::new_unchecked("http://example.com/o."), + NamedNodeRef::new_unchecked("http://example.com/g"), + ))?; + writer.write_quad(QuadRef::new( + NamedNodeRef::new_unchecked("http://example.com/s"), + NamedNodeRef::new_unchecked("http://example.com/p"), + NamedNodeRef::new_unchecked("http://example.com/o{o}"), + NamedNodeRef::new_unchecked("http://example.com/g"), + ))?; + writer.write_quad(QuadRef::new( + NamedNodeRef::new_unchecked("http://example.com/s"), + NamedNodeRef::new_unchecked("http://example.com/p"), + LiteralRef::new_simple_literal("foo"), + NamedNodeRef::new_unchecked("http://example.com/g"), + ))?; + writer.write_quad(QuadRef::new( + NamedNodeRef::new_unchecked("http://example.com/s"), + NamedNodeRef::new_unchecked("http://example.com/p2"), + LiteralRef::new_language_tagged_literal_unchecked("foo", "en"), + NamedNodeRef::new_unchecked("http://example.com/g"), + ))?; + writer.write_quad(QuadRef::new( + BlankNodeRef::new_unchecked("b"), + NamedNodeRef::new_unchecked("http://example.com/p2"), + BlankNodeRef::new_unchecked("b2"), + NamedNodeRef::new_unchecked("http://example.com/g"), + ))?; + writer.write_quad(QuadRef::new( + BlankNodeRef::new_unchecked("b"), + NamedNodeRef::new_unchecked("http://example.com/p2"), + LiteralRef::new_typed_literal("true", xsd::BOOLEAN), + GraphNameRef::DefaultGraph, + ))?; + writer.write_quad(QuadRef::new( + BlankNodeRef::new_unchecked("b"), + NamedNodeRef::new_unchecked("http://example.org/p2"), + LiteralRef::new_typed_literal("false", xsd::BOOLEAN), + NamedNodeRef::new_unchecked("http://example.com/g2"), + ))?; + assert_eq!( + String::from_utf8(writer.finish()?).unwrap(), + "@prefix ex: <http://example.com/> .\nex:g {\n\tex:s ex:p ex:o\\. , <http://example.com/o{o}> , \"foo\" ;\n\t\tex:p2 \"foo\"@en .\n\t_:b ex:p2 _:b2 .\n}\n_:b ex:p2 true .\nex:g2 {\n\t_:b <http://example.org/p2> false .\n}\n" + ); + Ok(()) + } +} diff --git a/ng-oxigraph/src/oxttl/turtle.rs b/ng-oxigraph/src/oxttl/turtle.rs new file mode 100644 index 0000000..a4420a1 --- /dev/null +++ b/ng-oxigraph/src/oxttl/turtle.rs @@ -0,0 +1,878 @@ +//! A [Turtle](https://www.w3.org/TR/turtle/) streaming parser implemented by [`TurtleParser`] +//! and a serializer implemented by [`TurtleSerializer`]. + +use crate::oxrdf::{GraphNameRef, Triple, TripleRef}; +use crate::oxttl::terse::TriGRecognizer; +#[cfg(feature = "async-tokio")] +use crate::oxttl::toolkit::FromTokioAsyncReadIterator; +use crate::oxttl::toolkit::{FromReadIterator, Parser, TurtleParseError, TurtleSyntaxError}; +#[cfg(feature = "async-tokio")] +use crate::oxttl::trig::ToTokioAsyncWriteTriGWriter; +use crate::oxttl::trig::{LowLevelTriGWriter, ToWriteTriGWriter, TriGSerializer}; +use oxiri::{Iri, IriParseError}; +use std::collections::hash_map::Iter; +use std::collections::HashMap; +use std::io::{self, Read, Write}; +#[cfg(feature = "async-tokio")] +use tokio::io::{AsyncRead, AsyncWrite}; + +/// A [Turtle](https://www.w3.org/TR/turtle/) streaming parser. +/// +/// Support for [Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star) is available behind the `rdf-star` feature and the [`TurtleParser::with_quoted_triples`] option. +/// +/// Count the number of people: +/// ``` +/// use oxrdf::vocab::rdf; +/// use oxrdf::NamedNodeRef; +/// use oxttl::TurtleParser; +/// +/// let file = br#"@base <http://example.com/> . +/// @prefix schema: <http://schema.org/> . +/// <foo> a schema:Person ; +/// schema:name "Foo" . +/// <bar> a schema:Person ; +/// schema:name "Bar" ."#; +/// +/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; +/// let mut count = 0; +/// for triple in TurtleParser::new().parse_read(file.as_ref()) { +/// let triple = triple?; +/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { +/// count += 1; +/// } +/// } +/// assert_eq!(2, count); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +#[derive(Default)] +#[must_use] +pub struct TurtleParser { + unchecked: bool, + base: Option<Iri<String>>, + prefixes: HashMap<String, Iri<String>>, + #[cfg(feature = "rdf-star")] + with_quoted_triples: bool, +} + +impl TurtleParser { + /// Builds a new [`TurtleParser`]. + #[inline] + pub fn new() -> Self { + Self::default() + } + + /// Assumes the file is valid to make parsing faster. + /// + /// It will skip some validations. + /// + /// Note that if the file is actually not valid, then broken RDF might be emitted by the parser. + #[inline] + pub fn unchecked(mut self) -> Self { + self.unchecked = true; + self + } + + #[inline] + pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> { + self.base = Some(Iri::parse(base_iri.into())?); + Ok(self) + } + + #[inline] + pub fn with_prefix( + mut self, + prefix_name: impl Into<String>, + prefix_iri: impl Into<String>, + ) -> Result<Self, IriParseError> { + self.prefixes + .insert(prefix_name.into(), Iri::parse(prefix_iri.into())?); + Ok(self) + } + + /// Enables [Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star). + #[cfg(feature = "rdf-star")] + #[inline] + pub fn with_quoted_triples(mut self) -> Self { + self.with_quoted_triples = true; + self + } + + /// Parses a Turtle file from a [`Read`] implementation. + /// + /// Count the number of people: + /// ``` + /// use oxrdf::vocab::rdf; + /// use oxrdf::NamedNodeRef; + /// use oxttl::TurtleParser; + /// + /// let file = br#"@base <http://example.com/> . + /// @prefix schema: <http://schema.org/> . + /// <foo> a schema:Person ; + /// schema:name "Foo" . + /// <bar> a schema:Person ; + /// schema:name "Bar" ."#; + /// + /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; + /// let mut count = 0; + /// for triple in TurtleParser::new().parse_read(file.as_ref()) { + /// let triple = triple?; + /// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { + /// count += 1; + /// } + /// } + /// assert_eq!(2, count); + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + pub fn parse_read<R: Read>(self, read: R) -> FromReadTurtleReader<R> { + FromReadTurtleReader { + inner: self.parse().parser.parse_read(read), + } + } + + /// Parses a Turtle file from a [`AsyncRead`] implementation. + /// + /// Count the number of people: + /// ``` + /// use oxrdf::vocab::rdf; + /// use oxrdf::NamedNodeRef; + /// use oxttl::TurtleParser; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> Result<(), oxttl::TurtleParseError> { + /// let file = br#"@base <http://example.com/> . + /// @prefix schema: <http://schema.org/> . + /// <foo> a schema:Person ; + /// schema:name "Foo" . + /// <bar> a schema:Person ; + /// schema:name "Bar" ."#; + /// + /// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person"); + /// let mut count = 0; + /// let mut parser = TurtleParser::new().parse_tokio_async_read(file.as_ref()); + /// while let Some(triple) = parser.next().await { + /// let triple = triple?; + /// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { + /// count += 1; + /// } + /// } + /// assert_eq!(2, count); + /// # Ok(()) + /// # } + /// ``` + #[cfg(feature = "async-tokio")] + pub fn parse_tokio_async_read<R: AsyncRead + Unpin>( + self, + read: R, + ) -> FromTokioAsyncReadTurtleReader<R> { + FromTokioAsyncReadTurtleReader { + inner: self.parse().parser.parse_tokio_async_read(read), + } + } + + /// Allows to parse a Turtle file by using a low-level API. + /// + /// Count the number of people: + /// ``` + /// use oxrdf::vocab::rdf; + /// use oxrdf::NamedNodeRef; + /// use oxttl::TurtleParser; + /// + /// let file: [&[u8]; 5] = [ + /// b"@base <http://example.com/>", + /// b". @prefix schema: <http://schema.org/> .", + /// b"<foo> a schema:Person", + /// b" ; schema:name \"Foo\" . <bar>", + /// b" a schema:Person ; schema:name \"Bar\" .", + /// ]; + /// + /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; + /// let mut count = 0; + /// let mut parser = TurtleParser::new().parse(); + /// let mut file_chunks = file.iter(); + /// while !parser.is_end() { + /// // We feed more data to the parser + /// if let Some(chunk) = file_chunks.next() { + /// parser.extend_from_slice(chunk); + /// } else { + /// parser.end(); // It's finished + /// } + /// // We read as many triples from the parser as possible + /// while let Some(triple) = parser.read_next() { + /// let triple = triple?; + /// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { + /// count += 1; + /// } + /// } + /// } + /// assert_eq!(2, count); + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + pub fn parse(self) -> LowLevelTurtleReader { + LowLevelTurtleReader { + parser: TriGRecognizer::new_parser( + false, + #[cfg(feature = "rdf-star")] + self.with_quoted_triples, + self.unchecked, + self.base, + self.prefixes, + ), + } + } +} + +/// Parses a Turtle file from a [`Read`] implementation. Can be built using [`TurtleParser::parse_read`]. +/// +/// Count the number of people: +/// ``` +/// use oxrdf::vocab::rdf; +/// use oxrdf::NamedNodeRef; +/// use oxttl::TurtleParser; +/// +/// let file = br#"@base <http://example.com/> . +/// @prefix schema: <http://schema.org/> . +/// <foo> a schema:Person ; +/// schema:name "Foo" . +/// <bar> a schema:Person ; +/// schema:name "Bar" ."#; +/// +/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; +/// let mut count = 0; +/// for triple in TurtleParser::new().parse_read(file.as_ref()) { +/// let triple = triple?; +/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { +/// count += 1; +/// } +/// } +/// assert_eq!(2, count); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +#[must_use] +pub struct FromReadTurtleReader<R: Read> { + inner: FromReadIterator<R, TriGRecognizer>, +} + +impl<R: Read> FromReadTurtleReader<R> { + /// The list of IRI prefixes considered at the current step of the parsing. + /// + /// This method returns (prefix name, prefix value) tuples. + /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered. + /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned). + /// + /// ``` + /// use oxttl::TurtleParser; + /// + /// let file = br#"@base <http://example.com/> . + /// @prefix schema: <http://schema.org/> . + /// <foo> a schema:Person ; + /// schema:name "Foo" ."#; + /// + /// let mut reader = TurtleParser::new().parse_read(file.as_ref()); + /// assert!(reader.prefixes().collect::<Vec<_>>().is_empty()); // No prefix at the beginning + /// + /// reader.next().unwrap()?; // We read the first triple + /// assert_eq!( + /// reader.prefixes().collect::<Vec<_>>(), + /// [("schema", "http://schema.org/")] + /// ); // There are now prefixes + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + pub fn prefixes(&self) -> TurtlePrefixesIter<'_> { + TurtlePrefixesIter { + inner: self.inner.parser.context.prefixes(), + } + } + + /// The base IRI considered at the current step of the parsing. + /// + /// ``` + /// use oxttl::TurtleParser; + /// + /// let file = br#"@base <http://example.com/> . + /// @prefix schema: <http://schema.org/> . + /// <foo> a schema:Person ; + /// schema:name "Foo" ."#; + /// + /// let mut reader = TurtleParser::new().parse_read(file.as_ref()); + /// assert!(reader.base_iri().is_none()); // No base at the beginning because none has been given to the parser. + /// + /// reader.next().unwrap()?; // We read the first triple + /// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI. + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + pub fn base_iri(&self) -> Option<&str> { + self.inner + .parser + .context + .lexer_options + .base_iri + .as_ref() + .map(Iri::as_str) + } +} + +impl<R: Read> Iterator for FromReadTurtleReader<R> { + type Item = Result<Triple, TurtleParseError>; + + fn next(&mut self) -> Option<Self::Item> { + Some(self.inner.next()?.map(Into::into)) + } +} + +/// Parses a Turtle file from a [`AsyncRead`] implementation. Can be built using [`TurtleParser::parse_tokio_async_read`]. +/// +/// Count the number of people: +/// ``` +/// use oxrdf::vocab::rdf; +/// use oxrdf::NamedNodeRef; +/// use oxttl::TurtleParser; +/// +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() -> Result<(), oxttl::TurtleParseError> { +/// let file = br#"@base <http://example.com/> . +/// @prefix schema: <http://schema.org/> . +/// <foo> a schema:Person ; +/// schema:name "Foo" . +/// <bar> a schema:Person ; +/// schema:name "Bar" ."#; +/// +/// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person"); +/// let mut count = 0; +/// let mut parser = TurtleParser::new().parse_tokio_async_read(file.as_ref()); +/// while let Some(triple) = parser.next().await { +/// let triple = triple?; +/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { +/// count += 1; +/// } +/// } +/// assert_eq!(2, count); +/// # Ok(()) +/// # } +/// ``` +#[cfg(feature = "async-tokio")] +#[must_use] +pub struct FromTokioAsyncReadTurtleReader<R: AsyncRead + Unpin> { + inner: FromTokioAsyncReadIterator<R, TriGRecognizer>, +} + +#[cfg(feature = "async-tokio")] +impl<R: AsyncRead + Unpin> FromTokioAsyncReadTurtleReader<R> { + /// Reads the next triple or returns `None` if the file is finished. + pub async fn next(&mut self) -> Option<Result<Triple, TurtleParseError>> { + Some(self.inner.next().await?.map(Into::into)) + } + + /// The list of IRI prefixes considered at the current step of the parsing. + /// + /// This method returns (prefix name, prefix value) tuples. + /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered. + /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned). + /// + /// ``` + /// use oxttl::TurtleParser; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> Result<(), oxttl::TurtleParseError> { + /// let file = br#"@base <http://example.com/> . + /// @prefix schema: <http://schema.org/> . + /// <foo> a schema:Person ; + /// schema:name "Foo" ."#; + /// + /// let mut reader = TurtleParser::new().parse_tokio_async_read(file.as_ref()); + /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning + /// + /// reader.next().await.unwrap()?; // We read the first triple + /// assert_eq!( + /// reader.prefixes().collect::<Vec<_>>(), + /// [("schema", "http://schema.org/")] + /// ); // There are now prefixes + /// # Ok(()) + /// # } + /// ``` + pub fn prefixes(&self) -> TurtlePrefixesIter<'_> { + TurtlePrefixesIter { + inner: self.inner.parser.context.prefixes(), + } + } + + /// The base IRI considered at the current step of the parsing. + /// + /// ``` + /// use oxttl::TurtleParser; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> Result<(), oxttl::TurtleParseError> { + /// let file = br#"@base <http://example.com/> . + /// @prefix schema: <http://schema.org/> . + /// <foo> a schema:Person ; + /// schema:name "Foo" ."#; + /// + /// let mut reader = TurtleParser::new().parse_tokio_async_read(file.as_ref()); + /// assert!(reader.base_iri().is_none()); // No base IRI at the beginning + /// + /// reader.next().await.unwrap()?; // We read the first triple + /// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI + /// # Ok(()) + /// # } + /// ``` + pub fn base_iri(&self) -> Option<&str> { + self.inner + .parser + .context + .lexer_options + .base_iri + .as_ref() + .map(Iri::as_str) + } +} + +/// Parses a Turtle file by using a low-level API. Can be built using [`TurtleParser::parse`]. +/// +/// Count the number of people: +/// ``` +/// use oxrdf::vocab::rdf; +/// use oxrdf::NamedNodeRef; +/// use oxttl::TurtleParser; +/// +/// let file: [&[u8]; 5] = [ +/// b"@base <http://example.com/>", +/// b". @prefix schema: <http://schema.org/> .", +/// b"<foo> a schema:Person", +/// b" ; schema:name \"Foo\" . <bar>", +/// b" a schema:Person ; schema:name \"Bar\" .", +/// ]; +/// +/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; +/// let mut count = 0; +/// let mut parser = TurtleParser::new().parse(); +/// let mut file_chunks = file.iter(); +/// while !parser.is_end() { +/// // We feed more data to the parser +/// if let Some(chunk) = file_chunks.next() { +/// parser.extend_from_slice(chunk); +/// } else { +/// parser.end(); // It's finished +/// } +/// // We read as many triples from the parser as possible +/// while let Some(triple) = parser.read_next() { +/// let triple = triple?; +/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { +/// count += 1; +/// } +/// } +/// } +/// assert_eq!(2, count); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +pub struct LowLevelTurtleReader { + parser: Parser<TriGRecognizer>, +} + +impl LowLevelTurtleReader { + /// Adds some extra bytes to the parser. Should be called when [`read_next`](Self::read_next) returns [`None`] and there is still unread data. + pub fn extend_from_slice(&mut self, other: &[u8]) { + self.parser.extend_from_slice(other) + } + + /// Tell the parser that the file is finished. + /// + /// This triggers the parsing of the final bytes and might lead [`read_next`](Self::read_next) to return some extra values. + pub fn end(&mut self) { + self.parser.end() + } + + /// Returns if the parsing is finished i.e. [`end`](Self::end) has been called and [`read_next`](Self::read_next) is always going to return `None`. + pub fn is_end(&self) -> bool { + self.parser.is_end() + } + + /// Attempt to parse a new triple from the already provided data. + /// + /// Returns [`None`] if the parsing is finished or more data is required. + /// If it is the case more data should be fed using [`extend_from_slice`](Self::extend_from_slice). + pub fn read_next(&mut self) -> Option<Result<Triple, TurtleSyntaxError>> { + Some(self.parser.read_next()?.map(Into::into)) + } + + /// The list of IRI prefixes considered at the current step of the parsing. + /// + /// This method returns (prefix name, prefix value) tuples. + /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered. + /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned). + /// + /// ``` + /// use oxttl::TurtleParser; + /// + /// let file = br#"@base <http://example.com/> . + /// @prefix schema: <http://schema.org/> . + /// <foo> a schema:Person ; + /// schema:name "Foo" ."#; + /// + /// let mut reader = TurtleParser::new().parse(); + /// reader.extend_from_slice(file); + /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning + /// + /// reader.read_next().unwrap()?; // We read the first triple + /// assert_eq!( + /// reader.prefixes().collect::<Vec<_>>(), + /// [("schema", "http://schema.org/")] + /// ); // There are now prefixes + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + pub fn prefixes(&self) -> TurtlePrefixesIter<'_> { + TurtlePrefixesIter { + inner: self.parser.context.prefixes(), + } + } + + /// The base IRI considered at the current step of the parsing. + /// + /// ``` + /// use oxttl::TurtleParser; + /// + /// let file = br#"@base <http://example.com/> . + /// @prefix schema: <http://schema.org/> . + /// <foo> a schema:Person ; + /// schema:name "Foo" ."#; + /// + /// let mut reader = TurtleParser::new().parse(); + /// reader.extend_from_slice(file); + /// assert!(reader.base_iri().is_none()); // No base IRI at the beginning + /// + /// reader.read_next().unwrap()?; // We read the first triple + /// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + pub fn base_iri(&self) -> Option<&str> { + self.parser + .context + .lexer_options + .base_iri + .as_ref() + .map(Iri::as_str) + } +} + +/// Iterator on the file prefixes. +/// +/// See [`LowLevelTurtleReader::prefixes`]. +pub struct TurtlePrefixesIter<'a> { + inner: Iter<'a, String, Iri<String>>, +} + +impl<'a> Iterator for TurtlePrefixesIter<'a> { + type Item = (&'a str, &'a str); + + #[inline] + fn next(&mut self) -> Option<Self::Item> { + let (key, value) = self.inner.next()?; + Some((key.as_str(), value.as_str())) + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + self.inner.size_hint() + } +} + +/// A [Turtle](https://www.w3.org/TR/turtle/) serializer. +/// +/// Support for [Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star) is available behind the `rdf-star` feature. +/// +/// ``` +/// use oxrdf::{NamedNodeRef, TripleRef}; +/// use oxttl::TurtleSerializer; +/// +/// let mut writer = TurtleSerializer::new() +/// .with_prefix("schema", "http://schema.org/")? +/// .serialize_to_write(Vec::new()); +/// writer.write_triple(TripleRef::new( +/// NamedNodeRef::new("http://example.com#me")?, +/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, +/// NamedNodeRef::new("http://schema.org/Person")?, +/// ))?; +/// assert_eq!( +/// b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n", +/// writer.finish()?.as_slice() +/// ); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +#[derive(Default)] +#[must_use] +pub struct TurtleSerializer { + inner: TriGSerializer, +} + +impl TurtleSerializer { + /// Builds a new [`TurtleSerializer`]. + #[inline] + pub fn new() -> Self { + Self::default() + } + + #[inline] + pub fn with_prefix( + mut self, + prefix_name: impl Into<String>, + prefix_iri: impl Into<String>, + ) -> Result<Self, IriParseError> { + self.inner = self.inner.with_prefix(prefix_name, prefix_iri)?; + Ok(self) + } + + /// Writes a Turtle file to a [`Write`] implementation. + /// + /// ``` + /// use oxrdf::{NamedNodeRef, TripleRef}; + /// use oxttl::TurtleSerializer; + /// + /// let mut writer = TurtleSerializer::new() + /// .with_prefix("schema", "http://schema.org/")? + /// .serialize_to_write(Vec::new()); + /// writer.write_triple(TripleRef::new( + /// NamedNodeRef::new("http://example.com#me")?, + /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, + /// NamedNodeRef::new("http://schema.org/Person")?, + /// ))?; + /// assert_eq!( + /// b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n", + /// writer.finish()?.as_slice() + /// ); + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + pub fn serialize_to_write<W: Write>(self, write: W) -> ToWriteTurtleWriter<W> { + ToWriteTurtleWriter { + inner: self.inner.serialize_to_write(write), + } + } + + /// Writes a Turtle file to a [`AsyncWrite`] implementation. + /// + /// ``` + /// use oxrdf::{NamedNodeRef, TripleRef}; + /// use oxttl::TurtleSerializer; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> Result<(),Box<dyn std::error::Error>> { + /// let mut writer = TurtleSerializer::new() + /// .with_prefix("schema", "http://schema.org/")? + /// .serialize_to_tokio_async_write(Vec::new()); + /// writer + /// .write_triple(TripleRef::new( + /// NamedNodeRef::new_unchecked("http://example.com#me"), + /// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), + /// NamedNodeRef::new_unchecked("http://schema.org/Person"), + /// )) + /// .await?; + /// assert_eq!( + /// b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n", + /// writer.finish().await?.as_slice() + /// ); + /// # Ok(()) + /// # } + /// ``` + #[cfg(feature = "async-tokio")] + pub fn serialize_to_tokio_async_write<W: AsyncWrite + Unpin>( + self, + write: W, + ) -> ToTokioAsyncWriteTurtleWriter<W> { + ToTokioAsyncWriteTurtleWriter { + inner: self.inner.serialize_to_tokio_async_write(write), + } + } + + /// Builds a low-level Turtle writer. + /// + /// ``` + /// use oxrdf::{NamedNodeRef, TripleRef}; + /// use oxttl::TurtleSerializer; + /// + /// let mut buf = Vec::new(); + /// let mut writer = TurtleSerializer::new() + /// .with_prefix("schema", "http://schema.org/")? + /// .serialize(); + /// writer.write_triple( + /// TripleRef::new( + /// NamedNodeRef::new("http://example.com#me")?, + /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, + /// NamedNodeRef::new("http://schema.org/Person")?, + /// ), + /// &mut buf, + /// )?; + /// writer.finish(&mut buf)?; + /// assert_eq!( + /// b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n", + /// buf.as_slice() + /// ); + /// # Result::<_,Box<dyn std::error::Error>>::Ok(()) + /// ``` + pub fn serialize(self) -> LowLevelTurtleWriter { + LowLevelTurtleWriter { + inner: self.inner.serialize(), + } + } +} + +/// Writes a Turtle file to a [`Write`] implementation. Can be built using [`TurtleSerializer::serialize_to_write`]. +/// +/// ``` +/// use oxrdf::{NamedNodeRef, TripleRef}; +/// use oxttl::TurtleSerializer; +/// +/// let mut writer = TurtleSerializer::new() +/// .with_prefix("schema", "http://schema.org/")? +/// .serialize_to_write(Vec::new()); +/// writer.write_triple(TripleRef::new( +/// NamedNodeRef::new("http://example.com#me")?, +/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, +/// NamedNodeRef::new("http://schema.org/Person")?, +/// ))?; +/// assert_eq!( +/// b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n", +/// writer.finish()?.as_slice() +/// ); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +#[must_use] +pub struct ToWriteTurtleWriter<W: Write> { + inner: ToWriteTriGWriter<W>, +} + +impl<W: Write> ToWriteTurtleWriter<W> { + /// Writes an extra triple. + pub fn write_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> { + self.inner + .write_quad(t.into().in_graph(GraphNameRef::DefaultGraph)) + } + + /// Ends the write process and returns the underlying [`Write`]. + pub fn finish(self) -> io::Result<W> { + self.inner.finish() + } +} + +/// Writes a Turtle file to a [`AsyncWrite`] implementation. Can be built using [`TurtleSerializer::serialize_to_tokio_async_write`]. +/// +/// ``` +/// use oxrdf::{NamedNodeRef, TripleRef}; +/// use oxttl::TurtleSerializer; +/// +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() -> Result<(), Box<dyn std::error::Error>> { +/// let mut writer = TurtleSerializer::new() +/// .with_prefix("schema", "http://schema.org/")? +/// .serialize_to_tokio_async_write(Vec::new()); +/// writer +/// .write_triple(TripleRef::new( +/// NamedNodeRef::new_unchecked("http://example.com#me"), +/// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), +/// NamedNodeRef::new_unchecked("http://schema.org/Person"), +/// )) +/// .await?; +/// assert_eq!( +/// b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n", +/// writer.finish().await?.as_slice() +/// ); +/// # Ok(()) +/// # } +/// ``` +#[cfg(feature = "async-tokio")] +#[must_use] +pub struct ToTokioAsyncWriteTurtleWriter<W: AsyncWrite + Unpin> { + inner: ToTokioAsyncWriteTriGWriter<W>, +} + +#[cfg(feature = "async-tokio")] +impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteTurtleWriter<W> { + /// Writes an extra triple. + pub async fn write_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> { + self.inner + .write_quad(t.into().in_graph(GraphNameRef::DefaultGraph)) + .await + } + + /// Ends the write process and returns the underlying [`Write`]. + pub async fn finish(self) -> io::Result<W> { + self.inner.finish().await + } +} + +/// Writes a Turtle file by using a low-level API. Can be built using [`TurtleSerializer::serialize`]. +/// +/// ``` +/// use oxrdf::{NamedNodeRef, TripleRef}; +/// use oxttl::TurtleSerializer; +/// +/// let mut buf = Vec::new(); +/// let mut writer = TurtleSerializer::new() +/// .with_prefix("schema", "http://schema.org/")? +/// .serialize(); +/// writer.write_triple( +/// TripleRef::new( +/// NamedNodeRef::new("http://example.com#me")?, +/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, +/// NamedNodeRef::new("http://schema.org/Person")?, +/// ), +/// &mut buf, +/// )?; +/// writer.finish(&mut buf)?; +/// assert_eq!( +/// b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n", +/// buf.as_slice() +/// ); +/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) +/// ``` +pub struct LowLevelTurtleWriter { + inner: LowLevelTriGWriter, +} + +impl LowLevelTurtleWriter { + /// Writes an extra triple. + pub fn write_triple<'a>( + &mut self, + t: impl Into<TripleRef<'a>>, + write: impl Write, + ) -> io::Result<()> { + self.inner + .write_quad(t.into().in_graph(GraphNameRef::DefaultGraph), write) + } + + /// Finishes to write the file. + pub fn finish(&mut self, write: impl Write) -> io::Result<()> { + self.inner.finish(write) + } +} + +#[cfg(test)] +#[allow(clippy::panic_in_result_fn)] +mod tests { + use super::*; + use crate::oxrdf::{BlankNodeRef, LiteralRef, NamedNodeRef}; + + #[test] + fn test_write() -> io::Result<()> { + let mut writer = TurtleSerializer::new().serialize_to_write(Vec::new()); + writer.write_triple(TripleRef::new( + NamedNodeRef::new_unchecked("http://example.com/s"), + NamedNodeRef::new_unchecked("http://example.com/p"), + NamedNodeRef::new_unchecked("http://example.com/o"), + ))?; + writer.write_triple(TripleRef::new( + NamedNodeRef::new_unchecked("http://example.com/s"), + NamedNodeRef::new_unchecked("http://example.com/p"), + LiteralRef::new_simple_literal("foo"), + ))?; + writer.write_triple(TripleRef::new( + NamedNodeRef::new_unchecked("http://example.com/s"), + NamedNodeRef::new_unchecked("http://example.com/p2"), + LiteralRef::new_language_tagged_literal_unchecked("foo", "en"), + ))?; + writer.write_triple(TripleRef::new( + BlankNodeRef::new_unchecked("b"), + NamedNodeRef::new_unchecked("http://example.com/p2"), + BlankNodeRef::new_unchecked("b2"), + ))?; + assert_eq!(String::from_utf8(writer.finish()?).unwrap(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> , \"foo\" ;\n\t<http://example.com/p2> \"foo\"@en .\n_:b <http://example.com/p2> _:b2 .\n"); + Ok(()) + } +} diff --git a/ng-oxigraph/src/sparesults/README.md b/ng-oxigraph/src/sparesults/README.md new file mode 100644 index 0000000..df5a0fb --- /dev/null +++ b/ng-oxigraph/src/sparesults/README.md @@ -0,0 +1,72 @@ +Sparesults +========== + +[](https://crates.io/crates/sparesults) +[](https://docs.rs/sparesults) +[](https://crates.io/crates/sparesults) +[](https://github.com/oxigraph/oxigraph/actions) +[](https://gitter.im/oxigraph/community) + +Sparesults is a set of parsers and serializers for [SPARQL](https://www.w3.org/TR/sparql11-overview/) query results formats. + +It supports [SPARQL Query Results XML Format (Second Edition)](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/). + +Support for [SPARQL-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#query-result-formats) is also available behind the `rdf-star` feature. + +This crate is intended to be a building piece for SPARQL client and server implementations in Rust like [Oxigraph](https://oxigraph.org). + +The entry points of this library are the two [`QueryResultsParser`] and [`QueryResultsSerializer`] structs. + +Usage example converting a JSON result file into a TSV result file: +```rust +use sparesults::{QueryResultsFormat, QueryResultsParser, FromReadQueryResultsReader, QueryResultsSerializer}; +use std::io::Result; + +fn convert_json_to_tsv(json_file: &[u8]) -> Result<Vec<u8>> { + let json_parser = QueryResultsParser::from_format(QueryResultsFormat::Json); + let tsv_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Tsv); + // We start to read the JSON file and see which kind of results it is + match json_parser.parse_read(json_file)? { + FromReadQueryResultsReader::Boolean(value) => { + // it's a boolean result, we copy it in TSV to the output buffer + tsv_serializer.serialize_boolean_to_write(Vec::new(), value) + }, + FromReadQueryResultsReader::Solutions(solutions_reader) => { + // it's a set of solutions, we create a writer and we write to it while reading in streaming from the JSON file + let mut serialize_solutions_to_write = tsv_serializer.serialize_solutions_to_write(Vec::new(), solutions_reader.variables().to_vec())?; + for solution in solutions_reader { + serialize_solutions_to_write.write(&solution?)?; + } + serialize_solutions_to_write.finish() + } + } +} + +// Let's test with a boolean +assert_eq!( + convert_json_to_tsv(b"{\"boolean\":true}".as_slice()).unwrap(), + b"true" +); + +// And with a set of solutions +assert_eq!( + convert_json_to_tsv(b"{\"head\":{\"vars\":[\"foo\",\"bar\"]},\"results\":{\"bindings\":[{\"foo\":{\"type\":\"literal\",\"value\":\"test\"}}]}}".as_slice()).unwrap(), + b"?foo\t?bar\n\"test\"\t\n" +); +``` + +## License + +This project is licensed under either of + +* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or + `<http://www.apache.org/licenses/LICENSE-2.0>`) +* MIT license ([LICENSE-MIT](../LICENSE-MIT) or + `<http://opensource.org/licenses/MIT>`) + +at your option. + + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. diff --git a/ng-oxigraph/src/sparesults/csv.rs b/ng-oxigraph/src/sparesults/csv.rs new file mode 100644 index 0000000..11138f1 --- /dev/null +++ b/ng-oxigraph/src/sparesults/csv.rs @@ -0,0 +1,948 @@ +//! Implementation of [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/) + +use crate::oxrdf::vocab::xsd; +use crate::oxrdf::*; +use crate::sparesults::error::{ + QueryResultsParseError, QueryResultsSyntaxError, SyntaxErrorKind, TextPosition, +}; +use memchr::memchr; +use std::io::{self, Read, Write}; +use std::str::{self, FromStr}; +#[cfg(feature = "async-tokio")] +use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; + +const MAX_BUFFER_SIZE: usize = 4096 * 4096; + +pub fn write_boolean_csv_result<W: Write>(mut write: W, value: bool) -> io::Result<W> { + write.write_all(if value { b"true" } else { b"false" })?; + Ok(write) +} + +#[cfg(feature = "async-tokio")] +pub async fn tokio_async_write_boolean_csv_result<W: AsyncWrite + Unpin>( + mut write: W, + value: bool, +) -> io::Result<W> { + write + .write_all(if value { b"true" } else { b"false" }) + .await?; + Ok(write) +} + +pub struct ToWriteCsvSolutionsWriter<W: Write> { + inner: InnerCsvSolutionsWriter, + write: W, + buffer: String, +} + +impl<W: Write> ToWriteCsvSolutionsWriter<W> { + pub fn start(mut write: W, variables: Vec<Variable>) -> io::Result<Self> { + let mut buffer = String::new(); + let inner = InnerCsvSolutionsWriter::start(&mut buffer, variables); + write.write_all(buffer.as_bytes())?; + buffer.clear(); + Ok(Self { + inner, + write, + buffer, + }) + } + + pub fn write<'a>( + &mut self, + solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>, + ) -> io::Result<()> { + self.inner.write(&mut self.buffer, solution); + self.write.write_all(self.buffer.as_bytes())?; + self.buffer.clear(); + Ok(()) + } + + pub fn finish(self) -> W { + self.write + } +} + +#[cfg(feature = "async-tokio")] +pub struct ToTokioAsyncWriteCsvSolutionsWriter<W: AsyncWrite + Unpin> { + inner: InnerCsvSolutionsWriter, + write: W, + buffer: String, +} + +#[cfg(feature = "async-tokio")] +impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteCsvSolutionsWriter<W> { + pub async fn start(mut write: W, variables: Vec<Variable>) -> io::Result<Self> { + let mut buffer = String::new(); + let inner = InnerCsvSolutionsWriter::start(&mut buffer, variables); + write.write_all(buffer.as_bytes()).await?; + buffer.clear(); + Ok(Self { + inner, + write, + buffer, + }) + } + + pub async fn write<'a>( + &mut self, + solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>, + ) -> io::Result<()> { + self.inner.write(&mut self.buffer, solution); + self.write.write_all(self.buffer.as_bytes()).await?; + self.buffer.clear(); + Ok(()) + } + + pub fn finish(self) -> W { + self.write + } +} + +struct InnerCsvSolutionsWriter { + variables: Vec<Variable>, +} + +impl InnerCsvSolutionsWriter { + fn start(output: &mut String, variables: Vec<Variable>) -> Self { + let mut start_vars = true; + for variable in &variables { + if start_vars { + start_vars = false; + } else { + output.push(','); + } + output.push_str(variable.as_str()); + } + output.push_str("\r\n"); + Self { variables } + } + + fn write<'a>( + &self, + output: &mut String, + solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>, + ) { + let mut values = vec![None; self.variables.len()]; + for (variable, value) in solution { + if let Some(position) = self.variables.iter().position(|v| *v == variable) { + values[position] = Some(value); + } + } + let mut start_binding = true; + for value in values { + if start_binding { + start_binding = false; + } else { + output.push(','); + } + if let Some(value) = value { + write_csv_term(output, value); + } + } + output.push_str("\r\n"); + } +} + +fn write_csv_term<'a>(output: &mut String, term: impl Into<TermRef<'a>>) { + match term.into() { + TermRef::NamedNode(uri) => output.push_str(uri.as_str()), + TermRef::BlankNode(bnode) => { + output.push_str("_:"); + output.push_str(bnode.as_str()) + } + TermRef::Literal(literal) => write_escaped_csv_string(output, literal.value()), + #[cfg(feature = "rdf-star")] + TermRef::Triple(triple) => { + write_csv_term(output, &triple.subject); + output.push(' '); + write_csv_term(output, &triple.predicate); + output.push(' '); + write_csv_term(output, &triple.object) + } + } +} + +fn write_escaped_csv_string(output: &mut String, s: &str) { + if s.bytes().any(|c| matches!(c, b'"' | b',' | b'\n' | b'\r')) { + output.push('"'); + for c in s.chars() { + if c == '"' { + output.push('"'); + output.push('"'); + } else { + output.push(c) + }; + } + output.push('"'); + } else { + output.push_str(s) + } +} + +pub struct ToWriteTsvSolutionsWriter<W: Write> { + inner: InnerTsvSolutionsWriter, + write: W, + buffer: String, +} + +impl<W: Write> ToWriteTsvSolutionsWriter<W> { + pub fn start(mut write: W, variables: Vec<Variable>) -> io::Result<Self> { + let mut buffer = String::new(); + let inner = InnerTsvSolutionsWriter::start(&mut buffer, variables); + write.write_all(buffer.as_bytes())?; + buffer.clear(); + Ok(Self { + inner, + write, + buffer, + }) + } + + pub fn write<'a>( + &mut self, + solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>, + ) -> io::Result<()> { + self.inner.write(&mut self.buffer, solution); + self.write.write_all(self.buffer.as_bytes())?; + self.buffer.clear(); + Ok(()) + } + + pub fn finish(self) -> W { + self.write + } +} + +#[cfg(feature = "async-tokio")] +pub struct ToTokioAsyncWriteTsvSolutionsWriter<W: AsyncWrite + Unpin> { + inner: InnerTsvSolutionsWriter, + write: W, + buffer: String, +} + +#[cfg(feature = "async-tokio")] +impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteTsvSolutionsWriter<W> { + pub async fn start(mut write: W, variables: Vec<Variable>) -> io::Result<Self> { + let mut buffer = String::new(); + let inner = InnerTsvSolutionsWriter::start(&mut buffer, variables); + write.write_all(buffer.as_bytes()).await?; + buffer.clear(); + Ok(Self { + inner, + write, + buffer, + }) + } + + pub async fn write<'a>( + &mut self, + solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>, + ) -> io::Result<()> { + self.inner.write(&mut self.buffer, solution); + self.write.write_all(self.buffer.as_bytes()).await?; + self.buffer.clear(); + Ok(()) + } + + pub fn finish(self) -> W { + self.write + } +} + +struct InnerTsvSolutionsWriter { + variables: Vec<Variable>, +} + +impl InnerTsvSolutionsWriter { + fn start(output: &mut String, variables: Vec<Variable>) -> Self { + let mut start_vars = true; + for variable in &variables { + if start_vars { + start_vars = false; + } else { + output.push('\t'); + } + output.push('?'); + output.push_str(variable.as_str()); + } + output.push('\n'); + Self { variables } + } + + fn write<'a>( + &self, + output: &mut String, + solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>, + ) { + let mut values = vec![None; self.variables.len()]; + for (variable, value) in solution { + if let Some(position) = self.variables.iter().position(|v| *v == variable) { + values[position] = Some(value); + } + } + let mut start_binding = true; + for value in values { + if start_binding { + start_binding = false; + } else { + output.push('\t'); + } + if let Some(value) = value { + write_tsv_term(output, value); + } + } + output.push('\n'); + } +} + +fn write_tsv_term<'a>(output: &mut String, term: impl Into<TermRef<'a>>) { + match term.into() { + TermRef::NamedNode(node) => { + output.push('<'); + output.push_str(node.as_str()); + output.push('>'); + } + TermRef::BlankNode(node) => { + output.push_str("_:"); + output.push_str(node.as_str()); + } + TermRef::Literal(literal) => { + let value = literal.value(); + if let Some(language) = literal.language() { + write_tsv_quoted_str(output, value); + output.push('@'); + output.push_str(language); + } else { + match literal.datatype() { + xsd::BOOLEAN if is_turtle_boolean(value) => output.push_str(value), + xsd::INTEGER if is_turtle_integer(value) => output.push_str(value), + xsd::DECIMAL if is_turtle_decimal(value) => output.push_str(value), + xsd::DOUBLE if is_turtle_double(value) => output.push_str(value), + xsd::STRING => write_tsv_quoted_str(output, value), + datatype => { + write_tsv_quoted_str(output, value); + output.push_str("^^"); + write_tsv_term(output, datatype); + } + } + } + } + #[cfg(feature = "rdf-star")] + TermRef::Triple(triple) => { + output.push_str("<< "); + write_tsv_term(output, &triple.subject); + output.push(' '); + write_tsv_term(output, &triple.predicate); + output.push(' '); + write_tsv_term(output, &triple.object); + output.push_str(" >>"); + } + } +} + +fn write_tsv_quoted_str(output: &mut String, string: &str) { + output.push('"'); + for c in string.chars() { + match c { + '\t' => output.push_str("\\t"), + '\n' => output.push_str("\\n"), + '\r' => output.push_str("\\r"), + '"' => output.push_str("\\\""), + '\\' => output.push_str("\\\\"), + _ => output.push(c), + }; + } + output.push('"'); +} + +fn is_turtle_boolean(value: &str) -> bool { + matches!(value, "true" | "false") +} + +fn is_turtle_integer(value: &str) -> bool { + // [19] INTEGER ::= [+-]? [0-9]+ + let mut value = value.as_bytes(); + if let Some(v) = value.strip_prefix(b"+") { + value = v; + } else if let Some(v) = value.strip_prefix(b"-") { + value = v; + } + !value.is_empty() && value.iter().all(u8::is_ascii_digit) +} + +fn is_turtle_decimal(value: &str) -> bool { + // [20] DECIMAL ::= [+-]? [0-9]* '.' [0-9]+ + let mut value = value.as_bytes(); + if let Some(v) = value.strip_prefix(b"+") { + value = v; + } else if let Some(v) = value.strip_prefix(b"-") { + value = v; + } + while value.first().map_or(false, u8::is_ascii_digit) { + value = &value[1..]; + } + let Some(value) = value.strip_prefix(b".") else { + return false; + }; + !value.is_empty() && value.iter().all(u8::is_ascii_digit) +} + +fn is_turtle_double(value: &str) -> bool { + // [21] DOUBLE ::= [+-]? ([0-9]+ '.' [0-9]* EXPONENT | '.' [0-9]+ EXPONENT | [0-9]+ EXPONENT) + // [154s] EXPONENT ::= [eE] [+-]? [0-9]+ + let mut value = value.as_bytes(); + if let Some(v) = value.strip_prefix(b"+") { + value = v; + } else if let Some(v) = value.strip_prefix(b"-") { + value = v; + } + let mut with_before = false; + while value.first().map_or(false, u8::is_ascii_digit) { + value = &value[1..]; + with_before = true; + } + let mut with_after = false; + if let Some(v) = value.strip_prefix(b".") { + value = v; + while value.first().map_or(false, u8::is_ascii_digit) { + value = &value[1..]; + with_after = true; + } + } + if let Some(v) = value.strip_prefix(b"e") { + value = v; + } else if let Some(v) = value.strip_prefix(b"E") { + value = v; + } else { + return false; + } + if let Some(v) = value.strip_prefix(b"+") { + value = v; + } else if let Some(v) = value.strip_prefix(b"-") { + value = v; + } + (with_before || with_after) && !value.is_empty() && value.iter().all(u8::is_ascii_digit) +} + +pub enum FromReadTsvQueryResultsReader<R: Read> { + Solutions { + variables: Vec<Variable>, + solutions: FromReadTsvSolutionsReader<R>, + }, + Boolean(bool), +} + +impl<R: Read> FromReadTsvQueryResultsReader<R> { + pub fn read(mut read: R) -> Result<Self, QueryResultsParseError> { + let mut reader = LineReader::new(); + let mut buffer = Vec::new(); + let line = reader.next_line(&mut buffer, &mut read)?; + Ok(match inner_read_first_line(reader, line)? { + TsvInnerQueryResults::Solutions { + variables, + solutions, + } => Self::Solutions { + variables, + solutions: FromReadTsvSolutionsReader { + read, + inner: solutions, + buffer, + }, + }, + TsvInnerQueryResults::Boolean(value) => Self::Boolean(value), + }) + } +} + +pub struct FromReadTsvSolutionsReader<R: Read> { + read: R, + inner: TsvInnerSolutionsReader, + buffer: Vec<u8>, +} + +impl<R: Read> FromReadTsvSolutionsReader<R> { + pub fn read_next(&mut self) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> { + let line = self + .inner + .reader + .next_line(&mut self.buffer, &mut self.read)?; + self.inner.read_next(line) + } +} + +#[cfg(feature = "async-tokio")] +pub enum FromTokioAsyncReadTsvQueryResultsReader<R: AsyncRead + Unpin> { + Solutions { + variables: Vec<Variable>, + solutions: FromTokioAsyncReadTsvSolutionsReader<R>, + }, + Boolean(bool), +} + +#[cfg(feature = "async-tokio")] +impl<R: AsyncRead + Unpin> FromTokioAsyncReadTsvQueryResultsReader<R> { + pub async fn read(mut read: R) -> Result<Self, QueryResultsParseError> { + let mut reader = LineReader::new(); + let mut buffer = Vec::new(); + let line = reader.next_line_tokio_async(&mut buffer, &mut read).await?; + Ok(match inner_read_first_line(reader, line)? { + TsvInnerQueryResults::Solutions { + variables, + solutions, + } => Self::Solutions { + variables, + solutions: FromTokioAsyncReadTsvSolutionsReader { + read, + inner: solutions, + buffer, + }, + }, + TsvInnerQueryResults::Boolean(value) => Self::Boolean(value), + }) + } +} + +#[cfg(feature = "async-tokio")] +pub struct FromTokioAsyncReadTsvSolutionsReader<R: AsyncRead + Unpin> { + read: R, + inner: TsvInnerSolutionsReader, + buffer: Vec<u8>, +} + +#[cfg(feature = "async-tokio")] +impl<R: AsyncRead + Unpin> FromTokioAsyncReadTsvSolutionsReader<R> { + pub async fn read_next(&mut self) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> { + let line = self + .inner + .reader + .next_line_tokio_async(&mut self.buffer, &mut self.read) + .await?; + self.inner.read_next(line) + } +} + +enum TsvInnerQueryResults { + Solutions { + variables: Vec<Variable>, + solutions: TsvInnerSolutionsReader, + }, + Boolean(bool), +} + +fn inner_read_first_line( + reader: LineReader, + line: &str, +) -> Result<TsvInnerQueryResults, QueryResultsParseError> { + let line = line.trim_matches(|c| matches!(c, ' ' | '\r' | '\n')); + if line.eq_ignore_ascii_case("true") { + return Ok(TsvInnerQueryResults::Boolean(true)); + } + if line.eq_ignore_ascii_case("false") { + return Ok(TsvInnerQueryResults::Boolean(false)); + } + let mut variables = Vec::new(); + if !line.is_empty() { + for v in line.split('\t') { + let v = v.trim(); + if v.is_empty() { + return Err(QueryResultsSyntaxError::msg("Empty column on the first row. The first row should be a list of variables like ?foo or $bar").into()); + } + let variable = Variable::from_str(v).map_err(|e| { + QueryResultsSyntaxError::msg(format!("Invalid variable declaration '{v}': {e}")) + })?; + if variables.contains(&variable) { + return Err(QueryResultsSyntaxError::msg(format!( + "The variable {variable} is declared twice" + )) + .into()); + } + variables.push(variable); + } + } + let column_len = variables.len(); + Ok(TsvInnerQueryResults::Solutions { + variables, + solutions: TsvInnerSolutionsReader { reader, column_len }, + }) +} + +struct TsvInnerSolutionsReader { + reader: LineReader, + column_len: usize, +} + +impl TsvInnerSolutionsReader { + #[allow(clippy::unwrap_in_result)] + pub fn read_next( + &self, + line: &str, + ) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> { + if line.is_empty() { + return Ok(None); // EOF + } + let elements = line + .split('\t') + .enumerate() + .map(|(i, v)| { + let v = v.trim(); + if v.is_empty() { + Ok(None) + } else { + Ok(Some(Term::from_str(v).map_err(|e| { + let start_position_char = line + .split('\t') + .take(i) + .map(|c| c.chars().count() + 1) + .sum::<usize>(); + let start_position_bytes = + line.split('\t').take(i).map(|c| c.len() + 1).sum::<usize>(); + QueryResultsSyntaxError(SyntaxErrorKind::Term { + error: e, + term: v.into(), + location: TextPosition { + line: self.reader.line_count - 1, + column: start_position_char.try_into().unwrap(), + offset: self.reader.last_line_start + + u64::try_from(start_position_bytes).unwrap(), + }..TextPosition { + line: self.reader.line_count - 1, + column: (start_position_char + v.chars().count()) + .try_into() + .unwrap(), + offset: self.reader.last_line_start + + u64::try_from(start_position_bytes + v.len()).unwrap(), + }, + }) + })?)) + } + }) + .collect::<Result<Vec<_>, QueryResultsParseError>>()?; + if elements.len() == self.column_len { + Ok(Some(elements)) + } else if self.column_len == 0 && elements == [None] { + Ok(Some(Vec::new())) // Zero columns case + } else { + Err(QueryResultsSyntaxError::located_message( + format!( + "This TSV files has {} columns but we found a row on line {} with {} columns: {}", + self.column_len, + self.reader.line_count - 1, + elements.len(), + line + ), + TextPosition { + line: self.reader.line_count - 1, + column: 0, + offset: self.reader.last_line_start, + }..TextPosition { + line: self.reader.line_count - 1, + column: line.chars().count().try_into().unwrap(), + offset: self.reader.last_line_end, + }, + ) + .into()) + } + } +} + +struct LineReader { + buffer_start: usize, + buffer_end: usize, + line_count: u64, + last_line_start: u64, + last_line_end: u64, +} + +impl LineReader { + fn new() -> Self { + Self { + buffer_start: 0, + buffer_end: 0, + line_count: 0, + last_line_start: 0, + last_line_end: 0, + } + } + + #[allow(clippy::unwrap_in_result)] + fn next_line<'a>( + &mut self, + buffer: &'a mut Vec<u8>, + read: &mut impl Read, + ) -> io::Result<&'a str> { + let line_end = loop { + if let Some(eol) = memchr(b'\n', &buffer[self.buffer_start..self.buffer_end]) { + break self.buffer_start + eol + 1; + } + if self.buffer_start > 0 { + buffer.copy_within(self.buffer_start..self.buffer_end, 0); + self.buffer_end -= self.buffer_start; + self.buffer_start = 0; + } + if self.buffer_end + 1024 > buffer.len() { + if self.buffer_end + 1024 > MAX_BUFFER_SIZE { + return Err(io::Error::new( + io::ErrorKind::OutOfMemory, + format!("Reached the buffer maximal size of {MAX_BUFFER_SIZE}"), + )); + } + buffer.resize(self.buffer_end + 1024, b'\0'); + } + let read = read.read(&mut buffer[self.buffer_end..])?; + if read == 0 { + break self.buffer_end; + } + self.buffer_end += read; + }; + let result = str::from_utf8(&buffer[self.buffer_start..line_end]).map_err(|e| { + io::Error::new( + io::ErrorKind::InvalidData, + format!("Invalid UTF-8 in the TSV file: {e}"), + ) + }); + self.line_count += 1; + self.last_line_start = self.last_line_end; + self.last_line_end += u64::try_from(line_end - self.buffer_start).unwrap(); + self.buffer_start = line_end; + result + } + + #[cfg(feature = "async-tokio")] + #[allow(clippy::unwrap_in_result)] + async fn next_line_tokio_async<'a>( + &mut self, + buffer: &'a mut Vec<u8>, + read: &mut (impl AsyncRead + Unpin), + ) -> io::Result<&'a str> { + let line_end = loop { + if let Some(eol) = memchr(b'\n', &buffer[self.buffer_start..self.buffer_end]) { + break self.buffer_start + eol + 1; + } + if self.buffer_start > 0 { + buffer.copy_within(self.buffer_start..self.buffer_end, 0); + self.buffer_end -= self.buffer_start; + self.buffer_start = 0; + } + if self.buffer_end + 1024 > buffer.len() { + if self.buffer_end + 1024 > MAX_BUFFER_SIZE { + return Err(io::Error::new( + io::ErrorKind::OutOfMemory, + format!("Reached the buffer maximal size of {MAX_BUFFER_SIZE}"), + )); + } + buffer.resize(self.buffer_end + 1024, b'\0'); + } + let read = read.read(&mut buffer[self.buffer_end..]).await?; + if read == 0 { + break self.buffer_end; + } + self.buffer_end += read; + }; + let result = str::from_utf8(&buffer[self.buffer_start..line_end]).map_err(|e| { + io::Error::new( + io::ErrorKind::InvalidData, + format!("Invalid UTF-8 in the TSV file: {e}"), + ) + }); + self.line_count += 1; + self.last_line_start = self.last_line_end; + self.last_line_end += u64::try_from(line_end - self.buffer_start).unwrap(); + self.buffer_start = line_end; + result + } +} + +#[cfg(test)] +#[allow(clippy::panic_in_result_fn)] +mod tests { + use super::*; + use std::error::Error; + + fn build_example() -> (Vec<Variable>, Vec<Vec<Option<Term>>>) { + ( + vec![ + Variable::new_unchecked("x"), + Variable::new_unchecked("literal"), + ], + vec![ + vec![ + Some(NamedNode::new_unchecked("http://example/x").into()), + Some(Literal::new_simple_literal("String").into()), + ], + vec![ + Some(NamedNode::new_unchecked("http://example/x").into()), + Some(Literal::new_simple_literal("String-with-dquote\"").into()), + ], + vec![ + Some(BlankNode::new_unchecked("b0").into()), + Some(Literal::new_simple_literal("Blank node").into()), + ], + vec![ + None, + Some(Literal::new_simple_literal("Missing 'x'").into()), + ], + vec![None, None], + vec![ + Some(NamedNode::new_unchecked("http://example/x").into()), + None, + ], + vec![ + Some(BlankNode::new_unchecked("b1").into()), + Some( + Literal::new_language_tagged_literal_unchecked("String-with-lang", "en") + .into(), + ), + ], + vec![ + Some(BlankNode::new_unchecked("b1").into()), + Some(Literal::new_typed_literal("123", xsd::INTEGER).into()), + ], + vec![ + None, + Some(Literal::new_simple_literal("escape,\t\r\n").into()), + ], + ], + ) + } + + #[test] + fn test_csv_serialization() { + let (variables, solutions) = build_example(); + let mut buffer = String::new(); + let writer = InnerCsvSolutionsWriter::start(&mut buffer, variables.clone()); + for solution in solutions { + writer.write( + &mut buffer, + variables + .iter() + .zip(&solution) + .filter_map(|(v, s)| s.as_ref().map(|s| (v.as_ref(), s.as_ref()))), + ); + } + assert_eq!(buffer, "x,literal\r\nhttp://example/x,String\r\nhttp://example/x,\"String-with-dquote\"\"\"\r\n_:b0,Blank node\r\n,Missing 'x'\r\n,\r\nhttp://example/x,\r\n_:b1,String-with-lang\r\n_:b1,123\r\n,\"escape,\t\r\n\"\r\n"); + } + + #[test] + fn test_tsv_roundtrip() -> Result<(), Box<dyn Error>> { + let (variables, solutions) = build_example(); + + // Write + let mut buffer = String::new(); + let writer = InnerTsvSolutionsWriter::start(&mut buffer, variables.clone()); + for solution in &solutions { + writer.write( + &mut buffer, + variables + .iter() + .zip(solution) + .filter_map(|(v, s)| s.as_ref().map(|s| (v.as_ref(), s.as_ref()))), + ); + } + assert_eq!(buffer, "?x\t?literal\n<http://example/x>\t\"String\"\n<http://example/x>\t\"String-with-dquote\\\"\"\n_:b0\t\"Blank node\"\n\t\"Missing 'x'\"\n\t\n<http://example/x>\t\n_:b1\t\"String-with-lang\"@en\n_:b1\t123\n\t\"escape,\\t\\r\\n\"\n"); + + // Read + if let FromReadTsvQueryResultsReader::Solutions { + solutions: mut solutions_iter, + variables: actual_variables, + } = FromReadTsvQueryResultsReader::read(buffer.as_bytes())? + { + assert_eq!(actual_variables.as_slice(), variables.as_slice()); + let mut rows = Vec::new(); + while let Some(row) = solutions_iter.read_next()? { + rows.push(row); + } + assert_eq!(rows, solutions); + } else { + unreachable!() + } + + Ok(()) + } + + #[test] + fn test_bad_tsv() { + let mut bad_tsvs = vec![ + "?", + "?p", + "?p?o", + "?p\n<", + "?p\n_", + "?p\n_:", + "?p\n\"", + "?p\n<<", + "?p\n1\t2\n", + "?p\n\n", + ]; + let a_lot_of_strings = format!("?p\n{}\n", "<".repeat(100_000)); + bad_tsvs.push(&a_lot_of_strings); + for bad_tsv in bad_tsvs { + if let Ok(FromReadTsvQueryResultsReader::Solutions { mut solutions, .. }) = + FromReadTsvQueryResultsReader::read(bad_tsv.as_bytes()) + { + while let Ok(Some(_)) = solutions.read_next() {} + } + } + } + + #[test] + fn test_no_columns_csv_serialization() { + let mut buffer = String::new(); + let writer = InnerCsvSolutionsWriter::start(&mut buffer, Vec::new()); + writer.write(&mut buffer, []); + assert_eq!(buffer, "\r\n\r\n"); + } + + #[test] + fn test_no_columns_tsv_serialization() { + let mut buffer = String::new(); + let writer = InnerTsvSolutionsWriter::start(&mut buffer, Vec::new()); + writer.write(&mut buffer, []); + assert_eq!(buffer, "\n\n"); + } + + #[test] + fn test_no_columns_tsv_parsing() -> io::Result<()> { + if let FromReadTsvQueryResultsReader::Solutions { + mut solutions, + variables, + } = FromReadTsvQueryResultsReader::read(b"\n\n".as_slice())? + { + assert_eq!(variables, Vec::<Variable>::new()); + assert_eq!(solutions.read_next()?, Some(Vec::new())); + assert_eq!(solutions.read_next()?, None); + } else { + unreachable!() + } + Ok(()) + } + + #[test] + fn test_no_results_csv_serialization() { + let mut buffer = String::new(); + InnerCsvSolutionsWriter::start(&mut buffer, vec![Variable::new_unchecked("a")]); + assert_eq!(buffer, "a\r\n"); + } + + #[test] + fn test_no_results_tsv_serialization() { + let mut buffer = String::new(); + InnerTsvSolutionsWriter::start(&mut buffer, vec![Variable::new_unchecked("a")]); + assert_eq!(buffer, "?a\n"); + } + + #[test] + fn test_no_results_tsv_parsing() -> io::Result<()> { + if let FromReadTsvQueryResultsReader::Solutions { + mut solutions, + variables, + } = FromReadTsvQueryResultsReader::read(b"?a\n".as_slice())? + { + assert_eq!(variables, vec![Variable::new_unchecked("a")]); + assert_eq!(solutions.read_next()?, None); + } else { + unreachable!() + } + Ok(()) + } +} diff --git a/ng-oxigraph/src/sparesults/error.rs b/ng-oxigraph/src/sparesults/error.rs new file mode 100644 index 0000000..d015f71 --- /dev/null +++ b/ng-oxigraph/src/sparesults/error.rs @@ -0,0 +1,157 @@ +use crate::oxrdf::TermParseError; +use std::io; +use std::ops::Range; +use std::sync::Arc; + +/// Error returned during SPARQL result formats format parsing. +#[derive(Debug, thiserror::Error)] +pub enum QueryResultsParseError { + /// I/O error during parsing (file not found...). + #[error(transparent)] + Io(#[from] io::Error), + /// An error in the file syntax. + #[error(transparent)] + Syntax(#[from] QueryResultsSyntaxError), +} + +impl From<QueryResultsParseError> for io::Error { + #[inline] + fn from(error: QueryResultsParseError) -> Self { + match error { + QueryResultsParseError::Io(error) => error, + QueryResultsParseError::Syntax(error) => error.into(), + } + } +} + +impl From<json_event_parser::ParseError> for QueryResultsParseError { + fn from(error: json_event_parser::ParseError) -> Self { + match error { + json_event_parser::ParseError::Syntax(error) => { + QueryResultsSyntaxError::from(error).into() + } + json_event_parser::ParseError::Io(error) => error.into(), + } + } +} + +impl From<quick_xml::Error> for QueryResultsParseError { + #[inline] + fn from(error: quick_xml::Error) -> Self { + match error { + quick_xml::Error::Io(error) => { + Self::Io(Arc::try_unwrap(error).unwrap_or_else(|e| io::Error::new(e.kind(), e))) + } + _ => Self::Syntax(QueryResultsSyntaxError(SyntaxErrorKind::Xml(error))), + } + } +} + +impl From<quick_xml::escape::EscapeError> for QueryResultsParseError { + #[inline] + fn from(error: quick_xml::escape::EscapeError) -> Self { + quick_xml::Error::from(error).into() + } +} +/// An error in the syntax of the parsed file. +#[derive(Debug, thiserror::Error)] +#[error(transparent)] +pub struct QueryResultsSyntaxError(#[from] pub(crate) SyntaxErrorKind); + +#[derive(Debug, thiserror::Error)] +pub(crate) enum SyntaxErrorKind { + #[error(transparent)] + Json(#[from] json_event_parser::SyntaxError), + #[error(transparent)] + Xml(#[from] quick_xml::Error), + #[error("Error {error} on '{term}' in line {}", location.start.line + 1)] + Term { + #[source] + error: TermParseError, + term: String, + location: Range<TextPosition>, + }, + #[error("{msg}")] + Msg { + msg: String, + location: Option<Range<TextPosition>>, + }, +} + +impl QueryResultsSyntaxError { + /// Builds an error from a printable error message. + #[inline] + pub(crate) fn msg(msg: impl Into<String>) -> Self { + Self(SyntaxErrorKind::Msg { + msg: msg.into(), + location: None, + }) + } + + /// Builds an error from a printable error message and a location + #[inline] + pub(crate) fn located_message(msg: impl Into<String>, location: Range<TextPosition>) -> Self { + Self(SyntaxErrorKind::Msg { + msg: msg.into(), + location: Some(location), + }) + } + + /// The location of the error inside of the file. + #[inline] + pub fn location(&self) -> Option<Range<TextPosition>> { + match &self.0 { + SyntaxErrorKind::Json(e) => { + let location = e.location(); + Some( + TextPosition { + line: location.start.line, + column: location.start.column, + offset: location.start.offset, + }..TextPosition { + line: location.end.line, + column: location.end.column, + offset: location.end.offset, + }, + ) + } + SyntaxErrorKind::Term { location, .. } => Some(location.clone()), + SyntaxErrorKind::Msg { location, .. } => location.clone(), + SyntaxErrorKind::Xml(_) => None, + } + } +} + +impl From<QueryResultsSyntaxError> for io::Error { + #[inline] + fn from(error: QueryResultsSyntaxError) -> Self { + match error.0 { + SyntaxErrorKind::Json(error) => Self::new(io::ErrorKind::InvalidData, error), + SyntaxErrorKind::Xml(error) => match error { + quick_xml::Error::Io(error) => { + Arc::try_unwrap(error).unwrap_or_else(|e| Self::new(e.kind(), e)) + } + quick_xml::Error::UnexpectedEof(error) => { + Self::new(io::ErrorKind::UnexpectedEof, error) + } + _ => Self::new(io::ErrorKind::InvalidData, error), + }, + SyntaxErrorKind::Term { .. } => Self::new(io::ErrorKind::InvalidData, error), + SyntaxErrorKind::Msg { msg, .. } => Self::new(io::ErrorKind::InvalidData, msg), + } + } +} + +impl From<json_event_parser::SyntaxError> for QueryResultsSyntaxError { + fn from(error: json_event_parser::SyntaxError) -> Self { + Self(SyntaxErrorKind::Json(error)) + } +} + +/// A position in a text i.e. a `line` number starting from 0, a `column` number starting from 0 (in number of code points) and a global file `offset` starting from 0 (in number of bytes). +#[derive(Eq, PartialEq, Debug, Clone, Copy)] +pub struct TextPosition { + pub line: u64, + pub column: u64, + pub offset: u64, +} diff --git a/ng-oxigraph/src/sparesults/format.rs b/ng-oxigraph/src/sparesults/format.rs new file mode 100644 index 0000000..982ff11 --- /dev/null +++ b/ng-oxigraph/src/sparesults/format.rs @@ -0,0 +1,176 @@ +use std::fmt; + +/// [SPARQL query](https://www.w3.org/TR/sparql11-query/) results serialization formats. +#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] +#[non_exhaustive] +pub enum QueryResultsFormat { + /// [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/) + Xml, + /// [SPARQL Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) + Json, + /// [SPARQL Query Results CSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/) + Csv, + /// [SPARQL Query Results TSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/) + Tsv, +} + +impl QueryResultsFormat { + /// The format canonical IRI according to the [Unique URIs for file formats registry](https://www.w3.org/ns/formats/). + /// + /// ``` + /// use sparesults::QueryResultsFormat; + /// + /// assert_eq!( + /// QueryResultsFormat::Json.iri(), + /// "http://www.w3.org/ns/formats/SPARQL_Results_JSON" + /// ) + /// ``` + #[inline] + pub fn iri(self) -> &'static str { + match self { + Self::Xml => "http://www.w3.org/ns/formats/SPARQL_Results_XML", + Self::Json => "http://www.w3.org/ns/formats/SPARQL_Results_JSON", + Self::Csv => "http://www.w3.org/ns/formats/SPARQL_Results_CSV", + Self::Tsv => "http://www.w3.org/ns/formats/SPARQL_Results_TSV", + } + } + + /// The format [IANA media type](https://tools.ietf.org/html/rfc2046). + /// + /// ``` + /// use sparesults::QueryResultsFormat; + /// + /// assert_eq!( + /// QueryResultsFormat::Json.media_type(), + /// "application/sparql-results+json" + /// ) + /// ``` + #[inline] + pub fn media_type(self) -> &'static str { + match self { + Self::Xml => "application/sparql-results+xml", + Self::Json => "application/sparql-results+json", + Self::Csv => "text/csv; charset=utf-8", + Self::Tsv => "text/tab-separated-values; charset=utf-8", + } + } + + /// The format [IANA-registered](https://tools.ietf.org/html/rfc2046) file extension. + /// + /// ``` + /// use sparesults::QueryResultsFormat; + /// + /// assert_eq!(QueryResultsFormat::Json.file_extension(), "srj") + /// ``` + #[inline] + pub fn file_extension(self) -> &'static str { + match self { + Self::Xml => "srx", + Self::Json => "srj", + Self::Csv => "csv", + Self::Tsv => "tsv", + } + } + + /// The format name. + /// + /// ``` + /// use sparesults::QueryResultsFormat; + /// + /// assert_eq!(QueryResultsFormat::Json.name(), "SPARQL Results in JSON") + /// ``` + #[inline] + pub const fn name(self) -> &'static str { + match self { + Self::Xml => "SPARQL Results in XML", + Self::Json => "SPARQL Results in JSON", + Self::Csv => "SPARQL Results in CSV", + Self::Tsv => "SPARQL Results in TSV", + } + } + + /// Looks for a known format from a media type. + /// + /// It supports some media type aliases. + /// For example, "application/xml" is going to return `Xml` even if it is not its canonical media type. + /// + /// Example: + /// ``` + /// use sparesults::QueryResultsFormat; + /// + /// assert_eq!( + /// QueryResultsFormat::from_media_type("application/sparql-results+json; charset=utf-8"), + /// Some(QueryResultsFormat::Json) + /// ) + /// ``` + #[inline] + pub fn from_media_type(media_type: &str) -> Option<Self> { + const MEDIA_SUBTYPES: [(&str, QueryResultsFormat); 8] = [ + ("csv", QueryResultsFormat::Csv), + ("json", QueryResultsFormat::Json), + ("plain", QueryResultsFormat::Csv), + ("sparql-results+json", QueryResultsFormat::Json), + ("sparql-results+xml", QueryResultsFormat::Xml), + ("tab-separated-values", QueryResultsFormat::Tsv), + ("tsv", QueryResultsFormat::Tsv), + ("xml", QueryResultsFormat::Xml), + ]; + + let (r#type, subtype) = media_type + .split_once(';') + .unwrap_or((media_type, "")) + .0 + .trim() + .split_once('/')?; + let r#type = r#type.trim(); + if !r#type.eq_ignore_ascii_case("application") && !r#type.eq_ignore_ascii_case("text") { + return None; + } + let subtype = subtype.trim(); + let subtype = subtype.strip_prefix("x-").unwrap_or(subtype); + for (candidate_subtype, candidate_id) in MEDIA_SUBTYPES { + if candidate_subtype.eq_ignore_ascii_case(subtype) { + return Some(candidate_id); + } + } + None + } + + /// Looks for a known format from an extension. + /// + /// It supports some aliases. + /// + /// Example: + /// ``` + /// use sparesults::QueryResultsFormat; + /// + /// assert_eq!( + /// QueryResultsFormat::from_extension("json"), + /// Some(QueryResultsFormat::Json) + /// ) + /// ``` + #[inline] + pub fn from_extension(extension: &str) -> Option<Self> { + const MEDIA_TYPES: [(&str, QueryResultsFormat); 7] = [ + ("csv", QueryResultsFormat::Csv), + ("json", QueryResultsFormat::Json), + ("srj", QueryResultsFormat::Json), + ("srx", QueryResultsFormat::Xml), + ("tsv", QueryResultsFormat::Tsv), + ("txt", QueryResultsFormat::Csv), + ("xml", QueryResultsFormat::Xml), + ]; + for (candidate_extension, candidate_id) in MEDIA_TYPES { + if candidate_extension.eq_ignore_ascii_case(extension) { + return Some(candidate_id); + } + } + None + } +} + +impl fmt::Display for QueryResultsFormat { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(self.name()) + } +} diff --git a/ng-oxigraph/src/sparesults/json.rs b/ng-oxigraph/src/sparesults/json.rs new file mode 100644 index 0000000..8ebefb7 --- /dev/null +++ b/ng-oxigraph/src/sparesults/json.rs @@ -0,0 +1,1101 @@ +//! Implementation of [SPARQL Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) + +use crate::oxrdf::vocab::rdf; +use crate::oxrdf::*; +use crate::sparesults::error::{QueryResultsParseError, QueryResultsSyntaxError}; +use json_event_parser::{FromReadJsonReader, JsonEvent, ToWriteJsonWriter}; +#[cfg(feature = "async-tokio")] +use json_event_parser::{FromTokioAsyncReadJsonReader, ToTokioAsyncWriteJsonWriter}; +use std::collections::BTreeMap; +use std::io::{self, Read, Write}; +use std::mem::take; +#[cfg(feature = "async-tokio")] +use tokio::io::{AsyncRead, AsyncWrite}; + +pub fn write_boolean_json_result<W: Write>(write: W, value: bool) -> io::Result<W> { + let mut writer = ToWriteJsonWriter::new(write); + for event in inner_write_boolean_json_result(value) { + writer.write_event(event)?; + } + writer.finish() +} + +#[cfg(feature = "async-tokio")] +pub async fn tokio_async_write_boolean_json_result<W: AsyncWrite + Unpin>( + write: W, + value: bool, +) -> io::Result<W> { + let mut writer = ToTokioAsyncWriteJsonWriter::new(write); + for event in inner_write_boolean_json_result(value) { + writer.write_event(event).await?; + } + writer.finish() +} + +fn inner_write_boolean_json_result(value: bool) -> [JsonEvent<'static>; 7] { + [ + JsonEvent::StartObject, + JsonEvent::ObjectKey("head".into()), + JsonEvent::StartObject, + JsonEvent::EndObject, + JsonEvent::ObjectKey("boolean".into()), + JsonEvent::Boolean(value), + JsonEvent::EndObject, + ] +} + +pub struct ToWriteJsonSolutionsWriter<W: Write> { + inner: InnerJsonSolutionsWriter, + writer: ToWriteJsonWriter<W>, +} + +impl<W: Write> ToWriteJsonSolutionsWriter<W> { + pub fn start(write: W, variables: &[Variable]) -> io::Result<Self> { + let mut writer = ToWriteJsonWriter::new(write); + let mut buffer = Vec::with_capacity(48); + let inner = InnerJsonSolutionsWriter::start(&mut buffer, variables); + Self::do_write(&mut writer, buffer)?; + Ok(Self { inner, writer }) + } + + pub fn write<'a>( + &mut self, + solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>, + ) -> io::Result<()> { + let mut buffer = Vec::with_capacity(48); + self.inner.write(&mut buffer, solution); + Self::do_write(&mut self.writer, buffer) + } + + pub fn finish(mut self) -> io::Result<W> { + let mut buffer = Vec::with_capacity(4); + self.inner.finish(&mut buffer); + Self::do_write(&mut self.writer, buffer)?; + self.writer.finish() + } + + fn do_write(writer: &mut ToWriteJsonWriter<W>, output: Vec<JsonEvent<'_>>) -> io::Result<()> { + for event in output { + writer.write_event(event)?; + } + Ok(()) + } +} + +#[cfg(feature = "async-tokio")] +pub struct ToTokioAsyncWriteJsonSolutionsWriter<W: AsyncWrite + Unpin> { + inner: InnerJsonSolutionsWriter, + writer: ToTokioAsyncWriteJsonWriter<W>, +} + +#[cfg(feature = "async-tokio")] +impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteJsonSolutionsWriter<W> { + pub async fn start(write: W, variables: &[Variable]) -> io::Result<Self> { + let mut writer = ToTokioAsyncWriteJsonWriter::new(write); + let mut buffer = Vec::with_capacity(48); + let inner = InnerJsonSolutionsWriter::start(&mut buffer, variables); + Self::do_write(&mut writer, buffer).await?; + Ok(Self { inner, writer }) + } + + pub async fn write<'a>( + &mut self, + solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>, + ) -> io::Result<()> { + let mut buffer = Vec::with_capacity(48); + self.inner.write(&mut buffer, solution); + Self::do_write(&mut self.writer, buffer).await + } + + pub async fn finish(mut self) -> io::Result<W> { + let mut buffer = Vec::with_capacity(4); + self.inner.finish(&mut buffer); + Self::do_write(&mut self.writer, buffer).await?; + self.writer.finish() + } + + async fn do_write( + writer: &mut ToTokioAsyncWriteJsonWriter<W>, + output: Vec<JsonEvent<'_>>, + ) -> io::Result<()> { + for event in output { + writer.write_event(event).await?; + } + Ok(()) + } +} + +struct InnerJsonSolutionsWriter; + +impl InnerJsonSolutionsWriter { + fn start<'a>(output: &mut Vec<JsonEvent<'a>>, variables: &'a [Variable]) -> Self { + output.push(JsonEvent::StartObject); + output.push(JsonEvent::ObjectKey("head".into())); + output.push(JsonEvent::StartObject); + output.push(JsonEvent::ObjectKey("vars".into())); + output.push(JsonEvent::StartArray); + for variable in variables { + output.push(JsonEvent::String(variable.as_str().into())); + } + output.push(JsonEvent::EndArray); + output.push(JsonEvent::EndObject); + output.push(JsonEvent::ObjectKey("results".into())); + output.push(JsonEvent::StartObject); + output.push(JsonEvent::ObjectKey("bindings".into())); + output.push(JsonEvent::StartArray); + Self {} + } + + #[allow(clippy::unused_self)] + fn write<'a>( + &self, + output: &mut Vec<JsonEvent<'a>>, + solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>, + ) { + output.push(JsonEvent::StartObject); + for (variable, value) in solution { + output.push(JsonEvent::ObjectKey(variable.as_str().into())); + write_json_term(output, value); + } + output.push(JsonEvent::EndObject); + } + + #[allow(clippy::unused_self)] + fn finish(self, output: &mut Vec<JsonEvent<'_>>) { + output.push(JsonEvent::EndArray); + output.push(JsonEvent::EndObject); + output.push(JsonEvent::EndObject); + } +} + +fn write_json_term<'a>(output: &mut Vec<JsonEvent<'a>>, term: TermRef<'a>) { + match term { + TermRef::NamedNode(uri) => { + output.push(JsonEvent::StartObject); + output.push(JsonEvent::ObjectKey("type".into())); + output.push(JsonEvent::String("uri".into())); + output.push(JsonEvent::ObjectKey("value".into())); + output.push(JsonEvent::String(uri.as_str().into())); + output.push(JsonEvent::EndObject); + } + TermRef::BlankNode(bnode) => { + output.push(JsonEvent::StartObject); + output.push(JsonEvent::ObjectKey("type".into())); + output.push(JsonEvent::String("bnode".into())); + output.push(JsonEvent::ObjectKey("value".into())); + output.push(JsonEvent::String(bnode.as_str().into())); + output.push(JsonEvent::EndObject); + } + TermRef::Literal(literal) => { + output.push(JsonEvent::StartObject); + output.push(JsonEvent::ObjectKey("type".into())); + output.push(JsonEvent::String("literal".into())); + output.push(JsonEvent::ObjectKey("value".into())); + output.push(JsonEvent::String(literal.value().into())); + if let Some(language) = literal.language() { + output.push(JsonEvent::ObjectKey("xml:lang".into())); + output.push(JsonEvent::String(language.into())); + } else if !literal.is_plain() { + output.push(JsonEvent::ObjectKey("datatype".into())); + output.push(JsonEvent::String(literal.datatype().as_str().into())); + } + output.push(JsonEvent::EndObject); + } + #[cfg(feature = "rdf-star")] + TermRef::Triple(triple) => { + output.push(JsonEvent::StartObject); + output.push(JsonEvent::ObjectKey("type".into())); + output.push(JsonEvent::String("triple".into())); + output.push(JsonEvent::ObjectKey("value".into())); + output.push(JsonEvent::StartObject); + output.push(JsonEvent::ObjectKey("subject".into())); + write_json_term(output, triple.subject.as_ref().into()); + output.push(JsonEvent::ObjectKey("predicate".into())); + write_json_term(output, triple.predicate.as_ref().into()); + output.push(JsonEvent::ObjectKey("object".into())); + write_json_term(output, triple.object.as_ref()); + output.push(JsonEvent::EndObject); + output.push(JsonEvent::EndObject); + } + } +} + +pub enum FromReadJsonQueryResultsReader<R: Read> { + Solutions { + variables: Vec<Variable>, + solutions: FromReadJsonSolutionsReader<R>, + }, + Boolean(bool), +} + +impl<R: Read> FromReadJsonQueryResultsReader<R> { + pub fn read(read: R) -> Result<Self, QueryResultsParseError> { + let mut reader = FromReadJsonReader::new(read); + let mut inner = JsonInnerReader::new(); + loop { + if let Some(result) = inner.read_event(reader.read_next_event()?)? { + return match result { + JsonInnerQueryResults::Solutions { + variables, + solutions, + } => Ok(Self::Solutions { + variables, + solutions: FromReadJsonSolutionsReader { + inner: solutions, + reader, + }, + }), + JsonInnerQueryResults::Boolean(value) => Ok(Self::Boolean(value)), + }; + } + } + } +} + +pub struct FromReadJsonSolutionsReader<R: Read> { + inner: JsonInnerSolutions, + reader: FromReadJsonReader<R>, +} + +impl<R: Read> FromReadJsonSolutionsReader<R> { + pub fn read_next(&mut self) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> { + match &mut self.inner { + JsonInnerSolutions::Reader(reader) => loop { + let event = self.reader.read_next_event()?; + if event == JsonEvent::Eof { + return Ok(None); + } + if let Some(result) = reader.read_event(event)? { + return Ok(Some(result)); + } + }, + JsonInnerSolutions::Iterator(iter) => iter.next(), + } + } +} + +#[cfg(feature = "async-tokio")] +pub enum FromTokioAsyncReadJsonQueryResultsReader<R: AsyncRead + Unpin> { + Solutions { + variables: Vec<Variable>, + solutions: FromTokioAsyncReadJsonSolutionsReader<R>, + }, + Boolean(bool), +} + +#[cfg(feature = "async-tokio")] +impl<R: AsyncRead + Unpin> FromTokioAsyncReadJsonQueryResultsReader<R> { + pub async fn read(read: R) -> Result<Self, QueryResultsParseError> { + let mut reader = FromTokioAsyncReadJsonReader::new(read); + let mut inner = JsonInnerReader::new(); + loop { + if let Some(result) = inner.read_event(reader.read_next_event().await?)? { + return match result { + JsonInnerQueryResults::Solutions { + variables, + solutions, + } => Ok(Self::Solutions { + variables, + solutions: FromTokioAsyncReadJsonSolutionsReader { + inner: solutions, + reader, + }, + }), + JsonInnerQueryResults::Boolean(value) => Ok(Self::Boolean(value)), + }; + } + } + } +} + +#[cfg(feature = "async-tokio")] +pub struct FromTokioAsyncReadJsonSolutionsReader<R: AsyncRead + Unpin> { + inner: JsonInnerSolutions, + reader: FromTokioAsyncReadJsonReader<R>, +} + +#[cfg(feature = "async-tokio")] +impl<R: AsyncRead + Unpin> FromTokioAsyncReadJsonSolutionsReader<R> { + pub async fn read_next(&mut self) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> { + match &mut self.inner { + JsonInnerSolutions::Reader(reader) => loop { + let event = self.reader.read_next_event().await?; + if event == JsonEvent::Eof { + return Ok(None); + } + if let Some(result) = reader.read_event(event)? { + return Ok(Some(result)); + } + }, + JsonInnerSolutions::Iterator(iter) => iter.next(), + } + } +} + +enum JsonInnerQueryResults { + Solutions { + variables: Vec<Variable>, + solutions: JsonInnerSolutions, + }, + Boolean(bool), +} + +enum JsonInnerSolutions { + Reader(JsonInnerSolutionsReader), + Iterator(JsonBufferedSolutionsIterator), +} + +struct JsonInnerReader { + state: JsonInnerReaderState, + variables: Vec<Variable>, + current_solution_variables: Vec<String>, + current_solution_values: Vec<Term>, + solutions: Vec<(Vec<String>, Vec<Term>)>, + vars_read: bool, + solutions_read: bool, +} + +enum JsonInnerReaderState { + Start, + InRootObject, + BeforeHead, + InHead, + BeforeVars, + InVars, + BeforeLinks, + InLinks, + BeforeResults, + InResults, + BeforeBindings, + BeforeSolution, + BetweenSolutionTerms, + Term { + reader: JsonInnerTermReader, + variable: String, + }, + AfterBindings, + BeforeBoolean, + Ignore { + level: usize, + after: JsonInnerReaderStateAfterIgnore, + }, +} + +#[allow(clippy::enum_variant_names)] +#[derive(Clone, Copy)] +enum JsonInnerReaderStateAfterIgnore { + InRootObject, + InHead, + InResults, + AfterBindings, +} + +impl JsonInnerReader { + fn new() -> Self { + Self { + state: JsonInnerReaderState::Start, + variables: Vec::new(), + current_solution_variables: Vec::new(), + current_solution_values: Vec::new(), + solutions: Vec::new(), + vars_read: false, + solutions_read: false, + } + } + + fn read_event( + &mut self, + event: JsonEvent<'_>, + ) -> Result<Option<JsonInnerQueryResults>, QueryResultsSyntaxError> { + match &mut self.state { + JsonInnerReaderState::Start => { + if event == JsonEvent::StartObject { + self.state = JsonInnerReaderState::InRootObject; + Ok(None) + } else { + Err(QueryResultsSyntaxError::msg( + "SPARQL JSON results must be an object", + )) + } + } + JsonInnerReaderState::InRootObject => match event { + JsonEvent::ObjectKey(key) => match key.as_ref() { + "head" => { + self.state = JsonInnerReaderState::BeforeHead; + Ok(None) + } + "results" => { + self.state = JsonInnerReaderState::BeforeResults; + Ok(None) + } + "boolean" => { + self.state = JsonInnerReaderState::BeforeBoolean; + Ok(None) + } + _ => { + self.state = JsonInnerReaderState::Ignore { + level: 0, + after: JsonInnerReaderStateAfterIgnore::InRootObject, + }; + Ok(None) + } + }, + JsonEvent::EndObject => Err(QueryResultsSyntaxError::msg( + "SPARQL JSON results must contain a 'boolean' or a 'results' key", + )), + _ => unreachable!(), + }, + JsonInnerReaderState::BeforeHead => { + if event == JsonEvent::StartObject { + self.state = JsonInnerReaderState::InHead; + Ok(None) + } else { + Err(QueryResultsSyntaxError::msg( + "SPARQL JSON results head must be an object", + )) + } + } + JsonInnerReaderState::InHead => match event { + JsonEvent::ObjectKey(key) => match key.as_ref() { + "vars" => { + self.state = JsonInnerReaderState::BeforeVars; + self.vars_read = true; + Ok(None) + } + "links" => { + self.state = JsonInnerReaderState::BeforeLinks; + Ok(None) + } + _ => { + self.state = JsonInnerReaderState::Ignore { + level: 0, + after: JsonInnerReaderStateAfterIgnore::InHead, + }; + Ok(None) + } + }, + JsonEvent::EndObject => { + self.state = JsonInnerReaderState::InRootObject; + Ok(None) + } + _ => unreachable!(), + }, + JsonInnerReaderState::BeforeVars => { + if event == JsonEvent::StartArray { + self.state = JsonInnerReaderState::InVars; + Ok(None) + } else { + Err(QueryResultsSyntaxError::msg( + "SPARQL JSON results vars must be an array", + )) + } + } + JsonInnerReaderState::InVars => match event { + JsonEvent::String(variable) => match Variable::new(variable.clone()) { + Ok(var) => { + if self.variables.contains(&var) { + return Err(QueryResultsSyntaxError::msg(format!( + "The variable {var} is declared twice" + ))); + } + self.variables.push(var); + Ok(None) + } + Err(e) => Err(QueryResultsSyntaxError::msg(format!( + "Invalid variable name '{variable}': {e}" + ))), + }, + JsonEvent::EndArray => { + if self.solutions_read { + let mut mapping = BTreeMap::default(); + for (i, var) in self.variables.iter().enumerate() { + mapping.insert(var.as_str().to_owned(), i); + } + Ok(Some(JsonInnerQueryResults::Solutions { + variables: take(&mut self.variables), + solutions: JsonInnerSolutions::Iterator( + JsonBufferedSolutionsIterator { + mapping, + bindings: take(&mut self.solutions).into_iter(), + }, + ), + })) + } else { + self.state = JsonInnerReaderState::InHead; + Ok(None) + } + } + _ => Err(QueryResultsSyntaxError::msg( + "Variables name in the vars array must be strings", + )), + }, + JsonInnerReaderState::BeforeLinks => { + if event == JsonEvent::StartArray { + self.state = JsonInnerReaderState::InLinks; + Ok(None) + } else { + Err(QueryResultsSyntaxError::msg( + "SPARQL JSON results links must be an array", + )) + } + } + JsonInnerReaderState::InLinks => match event { + JsonEvent::String(_) => Ok(None), + JsonEvent::EndArray => { + self.state = JsonInnerReaderState::InHead; + Ok(None) + } + _ => Err(QueryResultsSyntaxError::msg( + "Links in the links array must be strings", + )), + }, + JsonInnerReaderState::BeforeResults => { + if event == JsonEvent::StartObject { + self.state = JsonInnerReaderState::InResults; + Ok(None) + } else { + Err(QueryResultsSyntaxError::msg( + "SPARQL JSON results result must be an object", + )) + } + } + JsonInnerReaderState::InResults => match event { + JsonEvent::ObjectKey(key) => { + if key == "bindings" { + self.state = JsonInnerReaderState::BeforeBindings; + Ok(None) + } else { + self.state = JsonInnerReaderState::Ignore { + level: 0, + after: JsonInnerReaderStateAfterIgnore::InResults, + }; + Ok(None) + } + } + JsonEvent::EndObject => Err(QueryResultsSyntaxError::msg( + "The results object must contains a 'bindings' key", + )), + _ => unreachable!(), + }, + JsonInnerReaderState::BeforeBindings => { + if event == JsonEvent::StartArray { + self.solutions_read = true; + if self.vars_read { + let mut mapping = BTreeMap::default(); + for (i, var) in self.variables.iter().enumerate() { + mapping.insert(var.as_str().to_owned(), i); + } + Ok(Some(JsonInnerQueryResults::Solutions { + variables: take(&mut self.variables), + solutions: JsonInnerSolutions::Reader(JsonInnerSolutionsReader { + state: JsonInnerSolutionsReaderState::BeforeSolution, + mapping, + new_bindings: Vec::new(), + }), + })) + } else { + self.state = JsonInnerReaderState::BeforeSolution; + Ok(None) + } + } else { + Err(QueryResultsSyntaxError::msg( + "SPARQL JSON results bindings must be an array", + )) + } + } + JsonInnerReaderState::BeforeSolution => match event { + JsonEvent::StartObject => { + self.state = JsonInnerReaderState::BetweenSolutionTerms; + Ok(None) + } + JsonEvent::EndArray => { + self.state = JsonInnerReaderState::AfterBindings; + Ok(None) + } + _ => Err(QueryResultsSyntaxError::msg( + "Expecting a new solution object", + )), + }, + JsonInnerReaderState::BetweenSolutionTerms => match event { + JsonEvent::ObjectKey(key) => { + self.state = JsonInnerReaderState::Term { + reader: JsonInnerTermReader::default(), + variable: key.into(), + }; + Ok(None) + } + JsonEvent::EndObject => { + self.state = JsonInnerReaderState::BeforeSolution; + self.solutions.push(( + take(&mut self.current_solution_variables), + take(&mut self.current_solution_values), + )); + Ok(None) + } + _ => unreachable!(), + }, + JsonInnerReaderState::Term { + ref mut reader, + variable, + } => { + let result = reader.read_event(event); + if let Some(term) = result? { + self.current_solution_variables.push(take(variable)); + self.current_solution_values.push(term); + self.state = JsonInnerReaderState::BetweenSolutionTerms; + } + Ok(None) + } + JsonInnerReaderState::AfterBindings => { + if event == JsonEvent::EndObject { + self.state = JsonInnerReaderState::InRootObject; + } else { + self.state = JsonInnerReaderState::Ignore { + level: 0, + after: JsonInnerReaderStateAfterIgnore::AfterBindings, + } + } + Ok(None) + } + JsonInnerReaderState::BeforeBoolean => { + if let JsonEvent::Boolean(v) = event { + Ok(Some(JsonInnerQueryResults::Boolean(v))) + } else { + Err(QueryResultsSyntaxError::msg("Unexpected boolean value")) + } + } + #[allow(clippy::ref_patterns)] + JsonInnerReaderState::Ignore { level, ref after } => { + let level = match event { + JsonEvent::StartArray | JsonEvent::StartObject => *level + 1, + JsonEvent::EndArray | JsonEvent::EndObject => *level - 1, + JsonEvent::String(_) + | JsonEvent::Number(_) + | JsonEvent::Boolean(_) + | JsonEvent::Null + | JsonEvent::ObjectKey(_) + | JsonEvent::Eof => *level, + }; + self.state = if level == 0 { + match after { + JsonInnerReaderStateAfterIgnore::InRootObject => { + JsonInnerReaderState::InRootObject + } + JsonInnerReaderStateAfterIgnore::InHead => JsonInnerReaderState::InHead, + JsonInnerReaderStateAfterIgnore::InResults => { + JsonInnerReaderState::InResults + } + JsonInnerReaderStateAfterIgnore::AfterBindings => { + JsonInnerReaderState::AfterBindings + } + } + } else { + JsonInnerReaderState::Ignore { + level, + after: *after, + } + }; + Ok(None) + } + } + } +} + +struct JsonInnerSolutionsReader { + state: JsonInnerSolutionsReaderState, + mapping: BTreeMap<String, usize>, + new_bindings: Vec<Option<Term>>, +} + +enum JsonInnerSolutionsReaderState { + BeforeSolution, + BetweenSolutionTerms, + Term { + reader: JsonInnerTermReader, + key: usize, + }, + AfterEnd, +} + +impl JsonInnerSolutionsReader { + fn read_event( + &mut self, + event: JsonEvent<'_>, + ) -> Result<Option<Vec<Option<Term>>>, QueryResultsSyntaxError> { + match &mut self.state { + JsonInnerSolutionsReaderState::BeforeSolution => match event { + JsonEvent::StartObject => { + self.state = JsonInnerSolutionsReaderState::BetweenSolutionTerms; + self.new_bindings = vec![None; self.mapping.len()]; + Ok(None) + } + JsonEvent::EndArray => { + self.state = JsonInnerSolutionsReaderState::AfterEnd; + Ok(None) + } + _ => Err(QueryResultsSyntaxError::msg( + "Expecting a new solution object", + )), + }, + JsonInnerSolutionsReaderState::BetweenSolutionTerms => match event { + JsonEvent::ObjectKey(key) => { + let key = *self.mapping.get(key.as_ref()).ok_or_else(|| { + QueryResultsSyntaxError::msg(format!( + "The variable {key} has not been defined in the header" + )) + })?; + self.state = JsonInnerSolutionsReaderState::Term { + reader: JsonInnerTermReader::default(), + key, + }; + Ok(None) + } + JsonEvent::EndObject => { + self.state = JsonInnerSolutionsReaderState::BeforeSolution; + Ok(Some(take(&mut self.new_bindings))) + } + _ => unreachable!(), + }, + JsonInnerSolutionsReaderState::Term { + ref mut reader, + key, + } => { + let result = reader.read_event(event); + if let Some(term) = result? { + self.new_bindings[*key] = Some(term); + self.state = JsonInnerSolutionsReaderState::BetweenSolutionTerms; + } + Ok(None) + } + JsonInnerSolutionsReaderState::AfterEnd => { + if event == JsonEvent::EndObject { + Ok(None) + } else { + Err(QueryResultsSyntaxError::msg( + "Unexpected JSON after the end of the bindings array", + )) + } + } + } + } +} + +#[derive(Default)] +struct JsonInnerTermReader { + state: JsonInnerTermReaderState, + term_type: Option<TermType>, + value: Option<String>, + lang: Option<String>, + datatype: Option<NamedNode>, + #[cfg(feature = "rdf-star")] + subject: Option<Term>, + #[cfg(feature = "rdf-star")] + predicate: Option<Term>, + #[cfg(feature = "rdf-star")] + object: Option<Term>, +} + +#[derive(Default)] +enum JsonInnerTermReaderState { + #[default] + Start, + Middle, + TermType, + Value, + Lang, + Datatype, + #[cfg(feature = "rdf-star")] + InValue, + #[cfg(feature = "rdf-star")] + Subject(Box<JsonInnerTermReader>), + #[cfg(feature = "rdf-star")] + Predicate(Box<JsonInnerTermReader>), + #[cfg(feature = "rdf-star")] + Object(Box<JsonInnerTermReader>), +} + +enum TermType { + Uri, + BNode, + Literal, + #[cfg(feature = "rdf-star")] + Triple, +} + +impl JsonInnerTermReader { + fn read_event( + &mut self, + event: JsonEvent<'_>, + ) -> Result<Option<Term>, QueryResultsSyntaxError> { + match &mut self.state { + JsonInnerTermReaderState::Start => { + if event == JsonEvent::StartObject { + self.state = JsonInnerTermReaderState::Middle; + Ok(None) + } else { + Err(QueryResultsSyntaxError::msg( + "RDF terms must be encoded using objects", + )) + } + } + JsonInnerTermReaderState::Middle => match event { + JsonEvent::ObjectKey(object_key) => { + self.state = match object_key.as_ref() { + "type" => JsonInnerTermReaderState::TermType, + "value" => JsonInnerTermReaderState::Value, + "datatype" => JsonInnerTermReaderState::Datatype, + "xml:lang" => JsonInnerTermReaderState::Lang, + _ => { + return Err(QueryResultsSyntaxError::msg(format!( + "Unsupported term key: {object_key}" + ))); + } + }; + Ok(None) + } + JsonEvent::EndObject => { + self.state = JsonInnerTermReaderState::Start; + match self.term_type.take() { + None => Err(QueryResultsSyntaxError::msg( + "Term serialization should have a 'type' key", + )), + Some(TermType::Uri) => Ok(Some( + NamedNode::new(self.value.take().ok_or_else(|| { + QueryResultsSyntaxError::msg( + "uri serialization should have a 'value' key", + ) + })?) + .map_err(|e| { + QueryResultsSyntaxError::msg(format!("Invalid uri value: {e}")) + })? + .into(), + )), + Some(TermType::BNode) => Ok(Some( + BlankNode::new(self.value.take().ok_or_else(|| { + QueryResultsSyntaxError::msg( + "bnode serialization should have a 'value' key", + ) + })?) + .map_err(|e| { + QueryResultsSyntaxError::msg(format!("Invalid bnode value: {e}")) + })? + .into(), + )), + Some(TermType::Literal) => { + let value = self.value.take().ok_or_else(|| { + QueryResultsSyntaxError::msg( + "literal serialization should have a 'value' key", + ) + })?; + Ok(Some(match self.lang.take() { + Some(lang) => { + if let Some(datatype) = &self.datatype { + if datatype.as_ref() != rdf::LANG_STRING { + return Err(QueryResultsSyntaxError::msg(format!( + "xml:lang value '{lang}' provided with the datatype {datatype}" + ))); + } + } + Literal::new_language_tagged_literal(value, &*lang) + .map_err(|e| { + QueryResultsSyntaxError::msg(format!( + "Invalid xml:lang value '{lang}': {e}" + )) + })? + } + None => { + if let Some(datatype) = self.datatype.take() { + Literal::new_typed_literal(value, datatype) + } else { + Literal::new_simple_literal(value) + } + } + }.into())) + } + #[cfg(feature = "rdf-star")] + Some(TermType::Triple) => Ok(Some( + Triple::new( + match self.subject.take().ok_or_else(|| { + QueryResultsSyntaxError::msg( + "triple serialization should have a 'subject' key", + ) + })? { + Term::NamedNode(subject) => subject.into(), + Term::BlankNode(subject) => subject.into(), + Term::Triple(subject) => Subject::Triple(subject), + Term::Literal(_) => { + return Err(QueryResultsSyntaxError::msg( + "The 'subject' value should not be a literal", + )); + } + }, + match self.predicate.take().ok_or_else(|| { + QueryResultsSyntaxError::msg( + "triple serialization should have a 'predicate' key", + ) + })? { + Term::NamedNode(predicate) => predicate, + _ => { + return Err(QueryResultsSyntaxError::msg( + "The 'predicate' value should be a uri", + )); + } + }, + self.object.take().ok_or_else(|| { + QueryResultsSyntaxError::msg( + "triple serialization should have a 'object' key", + ) + })?, + ) + .into(), + )), + } + } + _ => unreachable!(), + }, + JsonInnerTermReaderState::TermType => { + self.state = JsonInnerTermReaderState::Middle; + if let JsonEvent::String(value) = event { + match value.as_ref() { + "uri" => { + self.term_type = Some(TermType::Uri); + Ok(None) + } + "bnode" => { + self.term_type = Some(TermType::BNode); + Ok(None) + } + "literal" | "typed-literal" => { + self.term_type = Some(TermType::Literal); + Ok(None) + } + #[cfg(feature = "rdf-star")] + "triple" => { + self.term_type = Some(TermType::Triple); + Ok(None) + } + _ => Err(QueryResultsSyntaxError::msg(format!( + "Unexpected term type: '{value}'" + ))), + } + } else { + Err(QueryResultsSyntaxError::msg("Term type must be a string")) + } + } + JsonInnerTermReaderState::Value => match event { + JsonEvent::String(value) => { + self.value = Some(value.into_owned()); + self.state = JsonInnerTermReaderState::Middle; + Ok(None) + } + #[cfg(feature = "rdf-star")] + JsonEvent::StartObject => { + self.state = JsonInnerTermReaderState::InValue; + Ok(None) + } + _ => { + self.state = JsonInnerTermReaderState::Middle; + + Err(QueryResultsSyntaxError::msg("Term value must be a string")) + } + }, + JsonInnerTermReaderState::Lang => { + let result = if let JsonEvent::String(value) = event { + self.lang = Some(value.into_owned()); + Ok(None) + } else { + Err(QueryResultsSyntaxError::msg("Term lang must be strings")) + }; + self.state = JsonInnerTermReaderState::Middle; + + result + } + JsonInnerTermReaderState::Datatype => { + let result = if let JsonEvent::String(value) = event { + match NamedNode::new(value) { + Ok(datatype) => { + self.datatype = Some(datatype); + Ok(None) + } + Err(e) => Err(QueryResultsSyntaxError::msg(format!( + "Invalid datatype: {e}" + ))), + } + } else { + Err(QueryResultsSyntaxError::msg("Term lang must be strings")) + }; + self.state = JsonInnerTermReaderState::Middle; + + result + } + #[cfg(feature = "rdf-star")] + JsonInnerTermReaderState::InValue => match event { + JsonEvent::ObjectKey(object_key) => { + self.state = match object_key.as_ref() { + "subject" => JsonInnerTermReaderState::Subject(Box::default()), + "predicate" => JsonInnerTermReaderState::Predicate(Box::default()), + "object" => JsonInnerTermReaderState::Object(Box::default()), + _ => { + return Err(QueryResultsSyntaxError::msg(format!( + "Unsupported value key: {object_key}" + ))); + } + }; + Ok(None) + } + JsonEvent::EndObject => { + self.state = JsonInnerTermReaderState::Middle; + Ok(None) + } + _ => unreachable!(), + }, + #[cfg(feature = "rdf-star")] + JsonInnerTermReaderState::Subject(ref mut inner_state) => { + if let Some(term) = inner_state.read_event(event)? { + self.state = JsonInnerTermReaderState::InValue; + self.subject = Some(term); + } + Ok(None) + } + #[cfg(feature = "rdf-star")] + JsonInnerTermReaderState::Predicate(ref mut inner_state) => { + if let Some(term) = inner_state.read_event(event)? { + self.state = JsonInnerTermReaderState::InValue; + self.predicate = Some(term); + } + Ok(None) + } + #[cfg(feature = "rdf-star")] + JsonInnerTermReaderState::Object(ref mut inner_state) => { + if let Some(term) = inner_state.read_event(event)? { + self.state = JsonInnerTermReaderState::InValue; + self.object = Some(term); + } + Ok(None) + } + } + } +} + +pub struct JsonBufferedSolutionsIterator { + mapping: BTreeMap<String, usize>, + bindings: std::vec::IntoIter<(Vec<String>, Vec<Term>)>, +} + +impl JsonBufferedSolutionsIterator { + fn next(&mut self) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> { + let Some((variables, values)) = self.bindings.next() else { + return Ok(None); + }; + let mut new_bindings = vec![None; self.mapping.len()]; + for (variable, value) in variables.into_iter().zip(values) { + let k = *self.mapping.get(&variable).ok_or_else(|| { + QueryResultsSyntaxError::msg(format!( + "The variable {variable} has not been defined in the header" + )) + })?; + new_bindings[k] = Some(value); + } + Ok(Some(new_bindings)) + } +} diff --git a/ng-oxigraph/src/sparesults/mod.rs b/ng-oxigraph/src/sparesults/mod.rs new file mode 100644 index 0000000..4f88baa --- /dev/null +++ b/ng-oxigraph/src/sparesults/mod.rs @@ -0,0 +1,16 @@ +mod csv; +mod error; +mod format; +mod json; +mod parser; +mod serializer; +pub mod solution; +mod xml; + +pub use crate::sparesults::error::{QueryResultsParseError, QueryResultsSyntaxError, TextPosition}; +pub use crate::sparesults::format::QueryResultsFormat; +pub use crate::sparesults::parser::{ + FromReadQueryResultsReader, FromReadSolutionsReader, QueryResultsParser, +}; +pub use crate::sparesults::serializer::{QueryResultsSerializer, ToWriteSolutionsWriter}; +pub use crate::sparesults::solution::QuerySolution; diff --git a/ng-oxigraph/src/sparesults/parser.rs b/ng-oxigraph/src/sparesults/parser.rs new file mode 100644 index 0000000..9bac0ad --- /dev/null +++ b/ng-oxigraph/src/sparesults/parser.rs @@ -0,0 +1,460 @@ +use crate::oxrdf::Variable; +use crate::sparesults::csv::{FromReadTsvQueryResultsReader, FromReadTsvSolutionsReader}; +#[cfg(feature = "async-tokio")] +use crate::sparesults::csv::{ + FromTokioAsyncReadTsvQueryResultsReader, FromTokioAsyncReadTsvSolutionsReader, +}; +use crate::sparesults::error::{QueryResultsParseError, QueryResultsSyntaxError}; +use crate::sparesults::format::QueryResultsFormat; +use crate::sparesults::json::{FromReadJsonQueryResultsReader, FromReadJsonSolutionsReader}; +#[cfg(feature = "async-tokio")] +use crate::sparesults::json::{ + FromTokioAsyncReadJsonQueryResultsReader, FromTokioAsyncReadJsonSolutionsReader, +}; +use crate::sparesults::solution::QuerySolution; +use crate::sparesults::xml::{FromReadXmlQueryResultsReader, FromReadXmlSolutionsReader}; +#[cfg(feature = "async-tokio")] +use crate::sparesults::xml::{ + FromTokioAsyncReadXmlQueryResultsReader, FromTokioAsyncReadXmlSolutionsReader, +}; +use std::io::Read; +use std::sync::Arc; +#[cfg(feature = "async-tokio")] +use tokio::io::AsyncRead; + +/// Parsers for [SPARQL query](https://www.w3.org/TR/sparql11-query/) results serialization formats. +/// +/// It currently supports the following formats: +/// * [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/) ([`QueryResultsFormat::Xml`](QueryResultsFormat::Xml)). +/// * [SPARQL Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) ([`QueryResultsFormat::Json`](QueryResultsFormat::Json)). +/// * [SPARQL Query Results TSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/) ([`QueryResultsFormat::Tsv`](QueryResultsFormat::Tsv)). +/// +/// Example in JSON (the API is the same for XML and TSV): +/// ``` +/// use sparesults::{QueryResultsFormat, QueryResultsParser, FromReadQueryResultsReader}; +/// use oxrdf::{Literal, Variable}; +/// +/// let json_parser = QueryResultsParser::from_format(QueryResultsFormat::Json); +/// // boolean +/// if let FromReadQueryResultsReader::Boolean(v) = json_parser.parse_read(br#"{"boolean":true}"#.as_slice())? { +/// assert_eq!(v, true); +/// } +/// // solutions +/// if let FromReadQueryResultsReader::Solutions(solutions) = json_parser.parse_read(br#"{"head":{"vars":["foo","bar"]},"results":{"bindings":[{"foo":{"type":"literal","value":"test"}}]}}"#.as_slice())? { +/// assert_eq!(solutions.variables(), &[Variable::new_unchecked("foo"), Variable::new_unchecked("bar")]); +/// for solution in solutions { +/// assert_eq!(solution?.iter().collect::<Vec<_>>(), vec![(&Variable::new_unchecked("foo"), &Literal::from("test").into())]); +/// } +/// } +/// # Result::<(),sparesults::QueryResultsParseError>::Ok(()) +/// ``` +pub struct QueryResultsParser { + format: QueryResultsFormat, +} + +impl QueryResultsParser { + /// Builds a parser for the given format. + #[inline] + pub fn from_format(format: QueryResultsFormat) -> Self { + Self { format } + } + + /// Reads a result file from a [`Read`] implementation. + /// + /// Reads are automatically buffered. + /// + /// Example in XML (the API is the same for JSON and TSV): + /// ``` + /// use sparesults::{QueryResultsFormat, QueryResultsParser, FromReadQueryResultsReader}; + /// use oxrdf::{Literal, Variable}; + /// + /// let xml_parser = QueryResultsParser::from_format(QueryResultsFormat::Xml); + /// + /// // boolean + /// if let FromReadQueryResultsReader::Boolean(v) = xml_parser.parse_read(br#"<sparql xmlns="http://www.w3.org/2005/sparql-results#"><head/><boolean>true</boolean></sparql>"#.as_slice())? { + /// assert_eq!(v, true); + /// } + /// + /// // solutions + /// if let FromReadQueryResultsReader::Solutions(solutions) = xml_parser.parse_read(br#"<sparql xmlns="http://www.w3.org/2005/sparql-results#"><head><variable name="foo"/><variable name="bar"/></head><results><result><binding name="foo"><literal>test</literal></binding></result></results></sparql>"#.as_slice())? { + /// assert_eq!(solutions.variables(), &[Variable::new_unchecked("foo"), Variable::new_unchecked("bar")]); + /// for solution in solutions { + /// assert_eq!(solution?.iter().collect::<Vec<_>>(), vec![(&Variable::new_unchecked("foo"), &Literal::from("test").into())]); + /// } + /// } + /// # Result::<(),sparesults::QueryResultsParseError>::Ok(()) + /// ``` + pub fn parse_read<R: Read>( + &self, + reader: R, + ) -> Result<FromReadQueryResultsReader<R>, QueryResultsParseError> { + Ok(match self.format { + QueryResultsFormat::Xml => match FromReadXmlQueryResultsReader::read(reader)? { + FromReadXmlQueryResultsReader::Boolean(r) => FromReadQueryResultsReader::Boolean(r), + FromReadXmlQueryResultsReader::Solutions { + solutions, + variables, + } => FromReadQueryResultsReader::Solutions(FromReadSolutionsReader { + variables: variables.into(), + solutions: FromReadSolutionsReaderKind::Xml(solutions), + }), + }, + QueryResultsFormat::Json => match FromReadJsonQueryResultsReader::read(reader)? { + FromReadJsonQueryResultsReader::Boolean(r) => FromReadQueryResultsReader::Boolean(r), + FromReadJsonQueryResultsReader::Solutions { + solutions, + variables, + } => FromReadQueryResultsReader::Solutions(FromReadSolutionsReader { + variables: variables.into(), + solutions: FromReadSolutionsReaderKind::Json(solutions), + }), + }, + QueryResultsFormat::Csv => return Err(QueryResultsSyntaxError::msg("CSV SPARQL results syntax is lossy and can't be parsed to a proper RDF representation").into()), + QueryResultsFormat::Tsv => match FromReadTsvQueryResultsReader::read(reader)? { + FromReadTsvQueryResultsReader::Boolean(r) => FromReadQueryResultsReader::Boolean(r), + FromReadTsvQueryResultsReader::Solutions { + solutions, + variables, + } => FromReadQueryResultsReader::Solutions(FromReadSolutionsReader { + variables: variables.into(), + solutions: FromReadSolutionsReaderKind::Tsv(solutions), + }), + }, + }) + } + + #[deprecated(note = "use parse_read", since = "0.4.0")] + pub fn read_results<R: Read>( + &self, + reader: R, + ) -> Result<FromReadQueryResultsReader<R>, QueryResultsParseError> { + self.parse_read(reader) + } + + /// Reads a result file from a Tokio [`AsyncRead`] implementation. + /// + /// Reads are automatically buffered. + /// + /// Example in XML (the API is the same for JSON and TSV): + /// ``` + /// use sparesults::{QueryResultsFormat, QueryResultsParser, FromTokioAsyncReadQueryResultsReader}; + /// use oxrdf::{Literal, Variable}; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> Result<(), sparesults::QueryResultsParseError> { + /// let xml_parser = QueryResultsParser::from_format(QueryResultsFormat::Xml); + /// + /// // boolean + /// if let FromTokioAsyncReadQueryResultsReader::Boolean(v) = xml_parser.parse_tokio_async_read(br#"<sparql xmlns="http://www.w3.org/2005/sparql-results#"><head/><boolean>true</boolean></sparql>"#.as_slice()).await? { + /// assert_eq!(v, true); + /// } + /// + /// // solutions + /// if let FromTokioAsyncReadQueryResultsReader::Solutions(mut solutions) = xml_parser.parse_tokio_async_read(br#"<sparql xmlns="http://www.w3.org/2005/sparql-results#"><head><variable name="foo"/><variable name="bar"/></head><results><result><binding name="foo"><literal>test</literal></binding></result></results></sparql>"#.as_slice()).await? { + /// assert_eq!(solutions.variables(), &[Variable::new_unchecked("foo"), Variable::new_unchecked("bar")]); + /// while let Some(solution) = solutions.next().await { + /// assert_eq!(solution?.iter().collect::<Vec<_>>(), vec![(&Variable::new_unchecked("foo"), &Literal::from("test").into())]); + /// } + /// } + /// # Ok(()) + /// # } + /// ``` + #[cfg(feature = "async-tokio")] + pub async fn parse_tokio_async_read<R: AsyncRead + Unpin>( + &self, + reader: R, + ) -> Result<FromTokioAsyncReadQueryResultsReader<R>, QueryResultsParseError> { + Ok(match self.format { + QueryResultsFormat::Xml => match FromTokioAsyncReadXmlQueryResultsReader::read(reader).await? { + FromTokioAsyncReadXmlQueryResultsReader::Boolean(r) => FromTokioAsyncReadQueryResultsReader::Boolean(r), + FromTokioAsyncReadXmlQueryResultsReader::Solutions { + solutions, + variables, + } => FromTokioAsyncReadQueryResultsReader::Solutions(FromTokioAsyncReadSolutionsReader { + variables: variables.into(), + solutions: FromTokioAsyncReadSolutionsReaderKind::Xml(solutions), + }), + }, + QueryResultsFormat::Json => match FromTokioAsyncReadJsonQueryResultsReader::read(reader).await? { + FromTokioAsyncReadJsonQueryResultsReader::Boolean(r) => FromTokioAsyncReadQueryResultsReader::Boolean(r), + FromTokioAsyncReadJsonQueryResultsReader::Solutions { + solutions, + variables, + } => FromTokioAsyncReadQueryResultsReader::Solutions(FromTokioAsyncReadSolutionsReader { + variables: variables.into(), + solutions: FromTokioAsyncReadSolutionsReaderKind::Json(solutions), + }), + }, + QueryResultsFormat::Csv => return Err(QueryResultsSyntaxError::msg("CSV SPARQL results syntax is lossy and can't be parsed to a proper RDF representation").into()), + QueryResultsFormat::Tsv => match FromTokioAsyncReadTsvQueryResultsReader::read(reader).await? { + FromTokioAsyncReadTsvQueryResultsReader::Boolean(r) => FromTokioAsyncReadQueryResultsReader::Boolean(r), + FromTokioAsyncReadTsvQueryResultsReader::Solutions { + solutions, + variables, + } => FromTokioAsyncReadQueryResultsReader::Solutions(FromTokioAsyncReadSolutionsReader { + variables: variables.into(), + solutions: FromTokioAsyncReadSolutionsReaderKind::Tsv(solutions), + }), + }, + }) + } +} + +impl From<QueryResultsFormat> for QueryResultsParser { + fn from(format: QueryResultsFormat) -> Self { + Self::from_format(format) + } +} + +/// The reader for a given read of a results file. +/// +/// It is either a read boolean ([`bool`]) or a streaming reader of a set of solutions ([`FromReadSolutionsReader`]). +/// +/// Example in TSV (the API is the same for JSON and XML): +/// ``` +/// use oxrdf::{Literal, Variable}; +/// use sparesults::{FromReadQueryResultsReader, QueryResultsFormat, QueryResultsParser}; +/// +/// let tsv_parser = QueryResultsParser::from_format(QueryResultsFormat::Tsv); +/// +/// // boolean +/// if let FromReadQueryResultsReader::Boolean(v) = tsv_parser.parse_read(b"true".as_slice())? { +/// assert_eq!(v, true); +/// } +/// +/// // solutions +/// if let FromReadQueryResultsReader::Solutions(solutions) = +/// tsv_parser.parse_read(b"?foo\t?bar\n\"test\"\t".as_slice())? +/// { +/// assert_eq!( +/// solutions.variables(), +/// &[ +/// Variable::new_unchecked("foo"), +/// Variable::new_unchecked("bar") +/// ] +/// ); +/// for solution in solutions { +/// assert_eq!( +/// solution?.iter().collect::<Vec<_>>(), +/// vec![( +/// &Variable::new_unchecked("foo"), +/// &Literal::from("test").into() +/// )] +/// ); +/// } +/// } +/// # Result::<(),sparesults::QueryResultsParseError>::Ok(()) +/// ``` +pub enum FromReadQueryResultsReader<R: Read> { + Solutions(FromReadSolutionsReader<R>), + Boolean(bool), +} + +/// A streaming reader of a set of [`QuerySolution`] solutions. +/// +/// It implements the [`Iterator`] API to iterate over the solutions. +/// +/// Example in JSON (the API is the same for XML and TSV): +/// ``` +/// use sparesults::{QueryResultsFormat, QueryResultsParser, FromReadQueryResultsReader}; +/// use oxrdf::{Literal, Variable}; +/// +/// let json_parser = QueryResultsParser::from_format(QueryResultsFormat::Json); +/// if let FromReadQueryResultsReader::Solutions(solutions) = json_parser.parse_read(br#"{"head":{"vars":["foo","bar"]},"results":{"bindings":[{"foo":{"type":"literal","value":"test"}}]}}"#.as_slice())? { +/// assert_eq!(solutions.variables(), &[Variable::new_unchecked("foo"), Variable::new_unchecked("bar")]); +/// for solution in solutions { +/// assert_eq!(solution?.iter().collect::<Vec<_>>(), vec![(&Variable::new_unchecked("foo"), &Literal::from("test").into())]); +/// } +/// } +/// # Result::<(),sparesults::QueryResultsParseError>::Ok(()) +/// ``` +pub struct FromReadSolutionsReader<R: Read> { + variables: Arc<[Variable]>, + solutions: FromReadSolutionsReaderKind<R>, +} + +enum FromReadSolutionsReaderKind<R: Read> { + Xml(FromReadXmlSolutionsReader<R>), + Json(FromReadJsonSolutionsReader<R>), + Tsv(FromReadTsvSolutionsReader<R>), +} + +impl<R: Read> FromReadSolutionsReader<R> { + /// Ordered list of the declared variables at the beginning of the results. + /// + /// Example in TSV (the API is the same for JSON and XML): + /// ``` + /// use oxrdf::Variable; + /// use sparesults::{FromReadQueryResultsReader, QueryResultsFormat, QueryResultsParser}; + /// + /// let tsv_parser = QueryResultsParser::from_format(QueryResultsFormat::Tsv); + /// if let FromReadQueryResultsReader::Solutions(solutions) = + /// tsv_parser.parse_read(b"?foo\t?bar\n\"ex1\"\t\"ex2\"".as_slice())? + /// { + /// assert_eq!( + /// solutions.variables(), + /// &[ + /// Variable::new_unchecked("foo"), + /// Variable::new_unchecked("bar") + /// ] + /// ); + /// } + /// # Result::<(),sparesults::QueryResultsParseError>::Ok(()) + /// ``` + #[inline] + pub fn variables(&self) -> &[Variable] { + &self.variables + } +} + +impl<R: Read> Iterator for FromReadSolutionsReader<R> { + type Item = Result<QuerySolution, QueryResultsParseError>; + + fn next(&mut self) -> Option<Self::Item> { + Some( + match &mut self.solutions { + FromReadSolutionsReaderKind::Xml(reader) => reader.read_next(), + FromReadSolutionsReaderKind::Json(reader) => reader.read_next(), + FromReadSolutionsReaderKind::Tsv(reader) => reader.read_next(), + } + .transpose()? + .map(|values| (Arc::clone(&self.variables), values).into()), + ) + } +} + +/// The reader for a given read of a results file. +/// +/// It is either a read boolean ([`bool`]) or a streaming reader of a set of solutions ([`FromReadSolutionsReader`]). +/// +/// Example in TSV (the API is the same for JSON and XML): +/// ``` +/// use oxrdf::{Literal, Variable}; +/// use sparesults::{ +/// FromTokioAsyncReadQueryResultsReader, QueryResultsFormat, QueryResultsParser, +/// }; +/// +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() -> Result<(), sparesults::QueryResultsParseError> { +/// let tsv_parser = QueryResultsParser::from_format(QueryResultsFormat::Tsv); +/// +/// // boolean +/// if let FromTokioAsyncReadQueryResultsReader::Boolean(v) = tsv_parser +/// .parse_tokio_async_read(b"true".as_slice()) +/// .await? +/// { +/// assert_eq!(v, true); +/// } +/// +/// // solutions +/// if let FromTokioAsyncReadQueryResultsReader::Solutions(mut solutions) = tsv_parser +/// .parse_tokio_async_read(b"?foo\t?bar\n\"test\"\t".as_slice()) +/// .await? +/// { +/// assert_eq!( +/// solutions.variables(), +/// &[ +/// Variable::new_unchecked("foo"), +/// Variable::new_unchecked("bar") +/// ] +/// ); +/// while let Some(solution) = solutions.next().await { +/// assert_eq!( +/// solution?.iter().collect::<Vec<_>>(), +/// vec![( +/// &Variable::new_unchecked("foo"), +/// &Literal::from("test").into() +/// )] +/// ); +/// } +/// } +/// # Ok(()) +/// # } +/// ``` +#[cfg(feature = "async-tokio")] +pub enum FromTokioAsyncReadQueryResultsReader<R: AsyncRead + Unpin> { + Solutions(FromTokioAsyncReadSolutionsReader<R>), + Boolean(bool), +} + +/// A streaming reader of a set of [`QuerySolution`] solutions. +/// +/// It implements the [`Iterator`] API to iterate over the solutions. +/// +/// Example in JSON (the API is the same for XML and TSV): +/// ``` +/// use sparesults::{QueryResultsFormat, QueryResultsParser, FromTokioAsyncReadQueryResultsReader}; +/// use oxrdf::{Literal, Variable}; +/// +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() -> Result<(), sparesults::QueryResultsParseError> { +/// let json_parser = QueryResultsParser::from_format(QueryResultsFormat::Json); +/// if let FromTokioAsyncReadQueryResultsReader::Solutions(mut solutions) = json_parser.parse_tokio_async_read(br#"{"head":{"vars":["foo","bar"]},"results":{"bindings":[{"foo":{"type":"literal","value":"test"}}]}}"#.as_slice()).await? { +/// assert_eq!(solutions.variables(), &[Variable::new_unchecked("foo"), Variable::new_unchecked("bar")]); +/// while let Some(solution) = solutions.next().await { +/// assert_eq!(solution?.iter().collect::<Vec<_>>(), vec![(&Variable::new_unchecked("foo"), &Literal::from("test").into())]); +/// } +/// } +/// # Ok(()) +/// # } +/// ``` +#[cfg(feature = "async-tokio")] +pub struct FromTokioAsyncReadSolutionsReader<R: AsyncRead + Unpin> { + variables: Arc<[Variable]>, + solutions: FromTokioAsyncReadSolutionsReaderKind<R>, +} + +#[cfg(feature = "async-tokio")] +enum FromTokioAsyncReadSolutionsReaderKind<R: AsyncRead + Unpin> { + Json(FromTokioAsyncReadJsonSolutionsReader<R>), + Xml(FromTokioAsyncReadXmlSolutionsReader<R>), + Tsv(FromTokioAsyncReadTsvSolutionsReader<R>), +} + +#[cfg(feature = "async-tokio")] +impl<R: AsyncRead + Unpin> FromTokioAsyncReadSolutionsReader<R> { + /// Ordered list of the declared variables at the beginning of the results. + /// + /// Example in TSV (the API is the same for JSON and XML): + /// ``` + /// use oxrdf::Variable; + /// use sparesults::{ + /// FromTokioAsyncReadQueryResultsReader, QueryResultsFormat, QueryResultsParser, + /// }; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> Result<(), sparesults::QueryResultsParseError> { + /// let tsv_parser = QueryResultsParser::from_format(QueryResultsFormat::Tsv); + /// if let FromTokioAsyncReadQueryResultsReader::Solutions(solutions) = tsv_parser + /// .parse_tokio_async_read(b"?foo\t?bar\n\"ex1\"\t\"ex2\"".as_slice()) + /// .await? + /// { + /// assert_eq!( + /// solutions.variables(), + /// &[ + /// Variable::new_unchecked("foo"), + /// Variable::new_unchecked("bar") + /// ] + /// ); + /// } + /// # Ok(()) + /// # } + /// ``` + #[inline] + pub fn variables(&self) -> &[Variable] { + &self.variables + } + + /// Reads the next solution or returns `None` if the file is finished. + pub async fn next(&mut self) -> Option<Result<QuerySolution, QueryResultsParseError>> { + Some( + match &mut self.solutions { + FromTokioAsyncReadSolutionsReaderKind::Json(reader) => reader.read_next().await, + FromTokioAsyncReadSolutionsReaderKind::Xml(reader) => reader.read_next().await, + FromTokioAsyncReadSolutionsReaderKind::Tsv(reader) => reader.read_next().await, + } + .transpose()? + .map(|values| (Arc::clone(&self.variables), values).into()), + ) + } +} diff --git a/ng-oxigraph/src/sparesults/serializer.rs b/ng-oxigraph/src/sparesults/serializer.rs new file mode 100644 index 0000000..dee026f --- /dev/null +++ b/ng-oxigraph/src/sparesults/serializer.rs @@ -0,0 +1,427 @@ +use crate::oxrdf::{TermRef, Variable, VariableRef}; +#[cfg(feature = "async-tokio")] +use crate::sparesults::csv::{ + tokio_async_write_boolean_csv_result, ToTokioAsyncWriteCsvSolutionsWriter, + ToTokioAsyncWriteTsvSolutionsWriter, +}; +use crate::sparesults::csv::{ + write_boolean_csv_result, ToWriteCsvSolutionsWriter, ToWriteTsvSolutionsWriter, +}; +use crate::sparesults::format::QueryResultsFormat; +#[cfg(feature = "async-tokio")] +use crate::sparesults::json::{ + tokio_async_write_boolean_json_result, ToTokioAsyncWriteJsonSolutionsWriter, +}; +use crate::sparesults::json::{write_boolean_json_result, ToWriteJsonSolutionsWriter}; +#[cfg(feature = "async-tokio")] +use crate::sparesults::xml::{ + tokio_async_write_boolean_xml_result, ToTokioAsyncWriteXmlSolutionsWriter, +}; +use crate::sparesults::xml::{write_boolean_xml_result, ToWriteXmlSolutionsWriter}; +use std::io::{self, Write}; +#[cfg(feature = "async-tokio")] +use tokio::io::AsyncWrite; + +/// A serializer for [SPARQL query](https://www.w3.org/TR/sparql11-query/) results serialization formats. +/// +/// It currently supports the following formats: +/// * [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/) ([`QueryResultsFormat::Xml`](QueryResultsFormat::Xml)) +/// * [SPARQL Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) ([`QueryResultsFormat::Json`](QueryResultsFormat::Json)) +/// * [SPARQL Query Results CSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/) ([`QueryResultsFormat::Csv`](QueryResultsFormat::Csv)) +/// * [SPARQL Query Results TSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/) ([`QueryResultsFormat::Tsv`](QueryResultsFormat::Tsv)) +/// +/// Example in JSON (the API is the same for XML, CSV and TSV): +/// ``` +/// use sparesults::{QueryResultsFormat, QueryResultsSerializer}; +/// use oxrdf::{LiteralRef, Variable, VariableRef}; +/// use std::iter::once; +/// +/// let json_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Json); +/// +/// // boolean +/// let mut buffer = Vec::new(); +/// json_serializer.serialize_boolean_to_write(&mut buffer, true)?; +/// assert_eq!(buffer, br#"{"head":{},"boolean":true}"#); +/// +/// // solutions +/// let mut buffer = Vec::new(); +/// let mut writer = json_serializer.serialize_solutions_to_write(&mut buffer, vec![Variable::new_unchecked("foo"), Variable::new_unchecked("bar")])?; +/// writer.write(once((VariableRef::new_unchecked("foo"), LiteralRef::from("test"))))?; +/// writer.finish()?; +/// assert_eq!(buffer, br#"{"head":{"vars":["foo","bar"]},"results":{"bindings":[{"foo":{"type":"literal","value":"test"}}]}}"#); +/// # std::io::Result::Ok(()) +/// ``` +pub struct QueryResultsSerializer { + format: QueryResultsFormat, +} + +impl QueryResultsSerializer { + /// Builds a serializer for the given format. + #[inline] + pub fn from_format(format: QueryResultsFormat) -> Self { + Self { format } + } + + /// Write a boolean query result (from an `ASK` query) into the given [`Write`] implementation. + /// + /// Example in XML (the API is the same for JSON, CSV and TSV): + /// ``` + /// use sparesults::{QueryResultsFormat, QueryResultsSerializer}; + /// + /// let xml_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Xml); + /// let mut buffer = Vec::new(); + /// xml_serializer.serialize_boolean_to_write(&mut buffer, true)?; + /// assert_eq!(buffer, br#"<?xml version="1.0"?><sparql xmlns="http://www.w3.org/2005/sparql-results#"><head></head><boolean>true</boolean></sparql>"#); + /// # std::io::Result::Ok(()) + /// ``` + pub fn serialize_boolean_to_write<W: Write>(&self, write: W, value: bool) -> io::Result<W> { + match self.format { + QueryResultsFormat::Xml => write_boolean_xml_result(write, value), + QueryResultsFormat::Json => write_boolean_json_result(write, value), + QueryResultsFormat::Csv | QueryResultsFormat::Tsv => { + write_boolean_csv_result(write, value) + } + } + } + + /// Write a boolean query result (from an `ASK` query) into the given [`AsyncWrite`] implementation. + /// + /// Example in JSON (the API is the same for XML, CSV and TSV): + /// ``` + /// use sparesults::{QueryResultsFormat, QueryResultsSerializer}; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> std::io::Result<()> { + /// let json_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Json); + /// let mut buffer = Vec::new(); + /// json_serializer + /// .serialize_boolean_to_tokio_async_write(&mut buffer, false) + /// .await?; + /// assert_eq!(buffer, br#"{"head":{},"boolean":false}"#); + /// # Ok(()) + /// # } + /// ``` + #[cfg(feature = "async-tokio")] + pub async fn serialize_boolean_to_tokio_async_write<W: AsyncWrite + Unpin>( + &self, + write: W, + value: bool, + ) -> io::Result<W> { + match self.format { + QueryResultsFormat::Xml => tokio_async_write_boolean_xml_result(write, value).await, + QueryResultsFormat::Json => tokio_async_write_boolean_json_result(write, value).await, + QueryResultsFormat::Csv | QueryResultsFormat::Tsv => { + tokio_async_write_boolean_csv_result(write, value).await + } + } + } + + #[deprecated(note = "use serialize_boolean_to_write", since = "0.4.0")] + pub fn write_boolean_result<W: Write>(&self, writer: W, value: bool) -> io::Result<W> { + self.serialize_boolean_to_write(writer, value) + } + + /// Returns a `SolutionsWriter` allowing writing query solutions into the given [`Write`] implementation. + /// + /// <div class="warning"> + /// + /// Do not forget to run the [`finish`](ToWriteSolutionsWriter::finish()) method to properly write the last bytes of the file.</div> + /// + /// <div class="warning"> + /// + /// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div> + /// + /// Example in XML (the API is the same for JSON, CSV and TSV): + /// ``` + /// use sparesults::{QueryResultsFormat, QueryResultsSerializer}; + /// use oxrdf::{LiteralRef, Variable, VariableRef}; + /// use std::iter::once; + /// + /// let xml_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Xml); + /// let mut buffer = Vec::new(); + /// let mut writer = xml_serializer.serialize_solutions_to_write(&mut buffer, vec![Variable::new_unchecked("foo"), Variable::new_unchecked("bar")])?; + /// writer.write(once((VariableRef::new_unchecked("foo"), LiteralRef::from("test"))))?; + /// writer.finish()?; + /// assert_eq!(buffer, br#"<?xml version="1.0"?><sparql xmlns="http://www.w3.org/2005/sparql-results#"><head><variable name="foo"/><variable name="bar"/></head><results><result><binding name="foo"><literal>test</literal></binding></result></results></sparql>"#); + /// # std::io::Result::Ok(()) + /// ``` + pub fn serialize_solutions_to_write<W: Write>( + &self, + write: W, + variables: Vec<Variable>, + ) -> io::Result<ToWriteSolutionsWriter<W>> { + Ok(ToWriteSolutionsWriter { + formatter: match self.format { + QueryResultsFormat::Xml => ToWriteSolutionsWriterKind::Xml( + ToWriteXmlSolutionsWriter::start(write, &variables)?, + ), + QueryResultsFormat::Json => ToWriteSolutionsWriterKind::Json( + ToWriteJsonSolutionsWriter::start(write, &variables)?, + ), + QueryResultsFormat::Csv => ToWriteSolutionsWriterKind::Csv( + ToWriteCsvSolutionsWriter::start(write, variables)?, + ), + QueryResultsFormat::Tsv => ToWriteSolutionsWriterKind::Tsv( + ToWriteTsvSolutionsWriter::start(write, variables)?, + ), + }, + }) + } + + /// Returns a `SolutionsWriter` allowing writing query solutions into the given [`Write`] implementation. + /// + /// <div class="warning"> + /// + /// Do not forget to run the [`finish`](ToWriteSolutionsWriter::finish()) method to properly write the last bytes of the file.</div> + /// + /// <div class="warning"> + /// + /// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div> + /// + /// Example in XML (the API is the same for JSON, CSV and TSV): + /// ``` + /// use sparesults::{QueryResultsFormat, QueryResultsSerializer}; + /// use oxrdf::{LiteralRef, Variable, VariableRef}; + /// use std::iter::once; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> std::io::Result<()> { + /// let json_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Json); + /// let mut buffer = Vec::new(); + /// let mut writer = json_serializer.serialize_solutions_to_tokio_async_write(&mut buffer, vec![Variable::new_unchecked("foo"), Variable::new_unchecked("bar")]).await?; + /// writer.write(once((VariableRef::new_unchecked("foo"), LiteralRef::from("test")))).await?; + /// writer.finish().await?; + /// assert_eq!(buffer, br#"{"head":{"vars":["foo","bar"]},"results":{"bindings":[{"foo":{"type":"literal","value":"test"}}]}}"#); + /// # Ok(()) + /// # } + /// ``` + #[cfg(feature = "async-tokio")] + pub async fn serialize_solutions_to_tokio_async_write<W: AsyncWrite + Unpin>( + &self, + write: W, + variables: Vec<Variable>, + ) -> io::Result<ToTokioAsyncWriteSolutionsWriter<W>> { + Ok(ToTokioAsyncWriteSolutionsWriter { + formatter: match self.format { + QueryResultsFormat::Xml => ToTokioAsyncWriteSolutionsWriterKind::Xml( + ToTokioAsyncWriteXmlSolutionsWriter::start(write, &variables).await?, + ), + QueryResultsFormat::Json => ToTokioAsyncWriteSolutionsWriterKind::Json( + ToTokioAsyncWriteJsonSolutionsWriter::start(write, &variables).await?, + ), + QueryResultsFormat::Csv => ToTokioAsyncWriteSolutionsWriterKind::Csv( + ToTokioAsyncWriteCsvSolutionsWriter::start(write, variables).await?, + ), + QueryResultsFormat::Tsv => ToTokioAsyncWriteSolutionsWriterKind::Tsv( + ToTokioAsyncWriteTsvSolutionsWriter::start(write, variables).await?, + ), + }, + }) + } + + #[deprecated(note = "use serialize_solutions_to_write", since = "0.4.0")] + pub fn solutions_writer<W: Write>( + &self, + writer: W, + variables: Vec<Variable>, + ) -> io::Result<ToWriteSolutionsWriter<W>> { + self.serialize_solutions_to_write(writer, variables) + } +} + +impl From<QueryResultsFormat> for QueryResultsSerializer { + fn from(format: QueryResultsFormat) -> Self { + Self::from_format(format) + } +} + +/// Allows writing query results into a [`Write`] implementation. +/// +/// Could be built using a [`QueryResultsSerializer`]. +/// +/// <div class="warning"> +/// +/// Do not forget to run the [`finish`](ToWriteSolutionsWriter::finish()) method to properly write the last bytes of the file.</div> +/// +/// <div class="warning"> +/// +/// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div> +/// +/// Example in TSV (the API is the same for JSON, XML and CSV): +/// ``` +/// use oxrdf::{LiteralRef, Variable, VariableRef}; +/// use sparesults::{QueryResultsFormat, QueryResultsSerializer}; +/// use std::iter::once; +/// +/// let tsv_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Tsv); +/// let mut buffer = Vec::new(); +/// let mut writer = tsv_serializer.serialize_solutions_to_write( +/// &mut buffer, +/// vec![ +/// Variable::new_unchecked("foo"), +/// Variable::new_unchecked("bar"), +/// ], +/// )?; +/// writer.write(once(( +/// VariableRef::new_unchecked("foo"), +/// LiteralRef::from("test"), +/// )))?; +/// writer.finish()?; +/// assert_eq!(buffer, b"?foo\t?bar\n\"test\"\t\n"); +/// # std::io::Result::Ok(()) +/// ``` +#[must_use] +pub struct ToWriteSolutionsWriter<W: Write> { + formatter: ToWriteSolutionsWriterKind<W>, +} + +enum ToWriteSolutionsWriterKind<W: Write> { + Xml(ToWriteXmlSolutionsWriter<W>), + Json(ToWriteJsonSolutionsWriter<W>), + Csv(ToWriteCsvSolutionsWriter<W>), + Tsv(ToWriteTsvSolutionsWriter<W>), +} + +impl<W: Write> ToWriteSolutionsWriter<W> { + /// Writes a solution. + /// + /// Example in JSON (the API is the same for XML, CSV and TSV): + /// ``` + /// use sparesults::{QueryResultsFormat, QueryResultsSerializer, QuerySolution}; + /// use oxrdf::{Literal, LiteralRef, Variable, VariableRef}; + /// use std::iter::once; + /// + /// let json_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Json); + /// let mut buffer = Vec::new(); + /// let mut writer = json_serializer.serialize_solutions_to_write(&mut buffer, vec![Variable::new_unchecked("foo"), Variable::new_unchecked("bar")])?; + /// writer.write(once((VariableRef::new_unchecked("foo"), LiteralRef::from("test"))))?; + /// writer.write(&QuerySolution::from((vec![Variable::new_unchecked("bar")], vec![Some(Literal::from("test").into())])))?; + /// writer.finish()?; + /// assert_eq!(buffer, br#"{"head":{"vars":["foo","bar"]},"results":{"bindings":[{"foo":{"type":"literal","value":"test"}},{"bar":{"type":"literal","value":"test"}}]}}"#); + /// # std::io::Result::Ok(()) + /// ``` + pub fn write<'a>( + &mut self, + solution: impl IntoIterator<Item = (impl Into<VariableRef<'a>>, impl Into<TermRef<'a>>)>, + ) -> io::Result<()> { + let solution = solution.into_iter().map(|(v, s)| (v.into(), s.into())); + match &mut self.formatter { + ToWriteSolutionsWriterKind::Xml(writer) => writer.write(solution), + ToWriteSolutionsWriterKind::Json(writer) => writer.write(solution), + ToWriteSolutionsWriterKind::Csv(writer) => writer.write(solution), + ToWriteSolutionsWriterKind::Tsv(writer) => writer.write(solution), + } + } + + /// Writes the last bytes of the file. + pub fn finish(self) -> io::Result<W> { + match self.formatter { + ToWriteSolutionsWriterKind::Xml(write) => write.finish(), + ToWriteSolutionsWriterKind::Json(write) => write.finish(), + ToWriteSolutionsWriterKind::Csv(write) => Ok(write.finish()), + ToWriteSolutionsWriterKind::Tsv(write) => Ok(write.finish()), + } + } +} + +/// Allows writing query results into an [`AsyncWrite`] implementation. + +/// Could be built using a [`QueryResultsSerializer`]. +/// +/// <div class="warning"> +/// +/// Do not forget to run the [`finish`](ToTokioAsyncWriteSolutionsWriter::finish()) method to properly write the last bytes of the file.</div> +/// +/// <div class="warning"> +/// +/// This writer does unbuffered writes. You might want to use [`BufWriter`](tokio::io::BufWriter) to avoid that.</div> +/// +/// Example in TSV (the API is the same for JSON, CSV and XML): +/// ``` +/// use oxrdf::{LiteralRef, Variable, VariableRef}; +/// use sparesults::{QueryResultsFormat, QueryResultsSerializer}; +/// use std::iter::once; +/// +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() -> std::io::Result<()> { +/// let tsv_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Tsv); +/// let mut buffer = Vec::new(); +/// let mut writer = tsv_serializer +/// .serialize_solutions_to_tokio_async_write( +/// &mut buffer, +/// vec![ +/// Variable::new_unchecked("foo"), +/// Variable::new_unchecked("bar"), +/// ], +/// ) +/// .await?; +/// writer +/// .write(once(( +/// VariableRef::new_unchecked("foo"), +/// LiteralRef::from("test"), +/// ))) +/// .await?; +/// writer.finish().await?; +/// assert_eq!(buffer, b"?foo\t?bar\n\"test\"\t\n"); +/// # Ok(()) +/// # } +/// ``` +#[cfg(feature = "async-tokio")] +#[must_use] +pub struct ToTokioAsyncWriteSolutionsWriter<W: AsyncWrite + Unpin> { + formatter: ToTokioAsyncWriteSolutionsWriterKind<W>, +} + +#[cfg(feature = "async-tokio")] +enum ToTokioAsyncWriteSolutionsWriterKind<W: AsyncWrite + Unpin> { + Xml(ToTokioAsyncWriteXmlSolutionsWriter<W>), + Json(ToTokioAsyncWriteJsonSolutionsWriter<W>), + Csv(ToTokioAsyncWriteCsvSolutionsWriter<W>), + Tsv(ToTokioAsyncWriteTsvSolutionsWriter<W>), +} + +#[cfg(feature = "async-tokio")] +impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteSolutionsWriter<W> { + /// Writes a solution. + /// + /// Example in JSON (the API is the same for XML, CSV and TSV): + /// ``` + /// use sparesults::{QueryResultsFormat, QueryResultsSerializer, QuerySolution}; + /// use oxrdf::{Literal, LiteralRef, Variable, VariableRef}; + /// use std::iter::once; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> std::io::Result<()> { + /// let json_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Json); + /// let mut buffer = Vec::new(); + /// let mut writer = json_serializer.serialize_solutions_to_tokio_async_write(&mut buffer, vec![Variable::new_unchecked("foo"), Variable::new_unchecked("bar")]).await?; + /// writer.write(once((VariableRef::new_unchecked("foo"), LiteralRef::from("test")))).await?; + /// writer.write(&QuerySolution::from((vec![Variable::new_unchecked("bar")], vec![Some(Literal::from("test").into())]))).await?; + /// writer.finish().await?; + /// assert_eq!(buffer, br#"{"head":{"vars":["foo","bar"]},"results":{"bindings":[{"foo":{"type":"literal","value":"test"}},{"bar":{"type":"literal","value":"test"}}]}}"#); + /// # Ok(()) + /// # } + /// ``` + pub async fn write<'a>( + &mut self, + solution: impl IntoIterator<Item = (impl Into<VariableRef<'a>>, impl Into<TermRef<'a>>)>, + ) -> io::Result<()> { + let solution = solution.into_iter().map(|(v, s)| (v.into(), s.into())); + match &mut self.formatter { + ToTokioAsyncWriteSolutionsWriterKind::Xml(writer) => writer.write(solution).await, + ToTokioAsyncWriteSolutionsWriterKind::Json(writer) => writer.write(solution).await, + ToTokioAsyncWriteSolutionsWriterKind::Csv(writer) => writer.write(solution).await, + ToTokioAsyncWriteSolutionsWriterKind::Tsv(writer) => writer.write(solution).await, + } + } + + /// Writes the last bytes of the file. + pub async fn finish(self) -> io::Result<W> { + match self.formatter { + ToTokioAsyncWriteSolutionsWriterKind::Xml(write) => write.finish().await, + ToTokioAsyncWriteSolutionsWriterKind::Json(write) => write.finish().await, + ToTokioAsyncWriteSolutionsWriterKind::Csv(write) => Ok(write.finish()), + ToTokioAsyncWriteSolutionsWriterKind::Tsv(write) => Ok(write.finish()), + } + } +} diff --git a/ng-oxigraph/src/sparesults/solution.rs b/ng-oxigraph/src/sparesults/solution.rs new file mode 100644 index 0000000..1f18589 --- /dev/null +++ b/ng-oxigraph/src/sparesults/solution.rs @@ -0,0 +1,340 @@ +//! Definition of [`QuerySolution`] structure and associated utility constructions. + +use crate::oxrdf::{Term, Variable, VariableRef}; +use std::fmt; +use std::iter::Zip; +use std::ops::Index; +use std::sync::Arc; + +/// Tuple associating variables and terms that are the result of a SPARQL query. +/// +/// It is the equivalent of a row in SQL. +/// +/// ``` +/// use sparesults::QuerySolution; +/// use oxrdf::{Variable, Literal}; +/// +/// let solution = QuerySolution::from((vec![Variable::new_unchecked("foo"), Variable::new_unchecked("bar")], vec![Some(Literal::from(1).into()), None])); +/// assert_eq!(solution.get("foo"), Some(&Literal::from(1).into())); // Get the value of the variable ?foo if it exists (here yes). +/// assert_eq!(solution.get(1), None); // Get the value of the second column if it exists (here no). +/// ``` +pub struct QuerySolution { + variables: Arc<[Variable]>, + values: Vec<Option<Term>>, +} + +impl QuerySolution { + /// Returns a value for a given position in the tuple ([`usize`](std::usize)) or a given variable name ([`&str`](std::str), [`Variable`] or [`VariableRef`]). + /// + /// ``` + /// use sparesults::QuerySolution; + /// use oxrdf::{Variable, Literal}; + /// + /// let solution = QuerySolution::from((vec![Variable::new_unchecked("foo"), Variable::new_unchecked("bar")], vec![Some(Literal::from(1).into()), None])); + /// assert_eq!(solution.get("foo"), Some(&Literal::from(1).into())); // Get the value of the variable ?foo if it exists (here yes). + /// assert_eq!(solution.get(1), None); // Get the value of the second column if it exists (here no). + /// ``` + #[inline] + pub fn get(&self, index: impl VariableSolutionIndex) -> Option<&Term> { + self.values.get(index.index(self)?).and_then(Option::as_ref) + } + + /// The number of variables which could be bound. + /// + /// It is also the number of columns in the solutions table. + /// + /// ``` + /// use oxrdf::{Literal, Variable}; + /// use sparesults::QuerySolution; + /// + /// let solution = QuerySolution::from(( + /// vec![ + /// Variable::new_unchecked("foo"), + /// Variable::new_unchecked("bar"), + /// ], + /// vec![Some(Literal::from(1).into()), None], + /// )); + /// assert_eq!(solution.len(), 2); + /// ``` + #[inline] + pub fn len(&self) -> usize { + self.values.len() + } + + /// Is there any variable bound in the table? + /// + /// ``` + /// use oxrdf::{Literal, Variable}; + /// use sparesults::QuerySolution; + /// + /// let solution = QuerySolution::from(( + /// vec![ + /// Variable::new_unchecked("foo"), + /// Variable::new_unchecked("bar"), + /// ], + /// vec![Some(Literal::from(1).into()), None], + /// )); + /// assert!(!solution.is_empty()); + /// + /// let empty_solution = QuerySolution::from(( + /// vec![ + /// Variable::new_unchecked("foo"), + /// Variable::new_unchecked("bar"), + /// ], + /// vec![None, None], + /// )); + /// assert!(empty_solution.is_empty()); + /// ``` + #[inline] + pub fn is_empty(&self) -> bool { + self.values.iter().all(Option::is_none) + } + + /// Returns an iterator over bound variables. + /// + /// ``` + /// use oxrdf::{Literal, Variable}; + /// use sparesults::QuerySolution; + /// + /// let solution = QuerySolution::from(( + /// vec![ + /// Variable::new_unchecked("foo"), + /// Variable::new_unchecked("bar"), + /// ], + /// vec![Some(Literal::from(1).into()), None], + /// )); + /// assert_eq!( + /// solution.iter().collect::<Vec<_>>(), + /// vec![(&Variable::new_unchecked("foo"), &Literal::from(1).into())] + /// ); + /// ``` + #[inline] + pub fn iter(&self) -> impl Iterator<Item = (&Variable, &Term)> { + self.into_iter() + } + + /// Returns the ordered slice of variable values. + /// + /// ``` + /// use oxrdf::{Literal, Variable}; + /// use sparesults::QuerySolution; + /// + /// let solution = QuerySolution::from(( + /// vec![ + /// Variable::new_unchecked("foo"), + /// Variable::new_unchecked("bar"), + /// ], + /// vec![Some(Literal::from(1).into()), None], + /// )); + /// assert_eq!(solution.values(), &[Some(Literal::from(1).into()), None]); + /// ``` + #[inline] + pub fn values(&self) -> &[Option<Term>] { + &self.values + } + + /// Returns the ordered slice of the solution variables, bound or not. + /// + /// ``` + /// use oxrdf::{Literal, Variable}; + /// use sparesults::QuerySolution; + /// + /// let solution = QuerySolution::from(( + /// vec![ + /// Variable::new_unchecked("foo"), + /// Variable::new_unchecked("bar"), + /// ], + /// vec![Some(Literal::from(1).into()), None], + /// )); + /// assert_eq!( + /// solution.variables(), + /// &[ + /// Variable::new_unchecked("foo"), + /// Variable::new_unchecked("bar") + /// ] + /// ); + /// ``` + #[inline] + pub fn variables(&self) -> &[Variable] { + &self.variables + } +} + +impl<V: Into<Arc<[Variable]>>, S: Into<Vec<Option<Term>>>> From<(V, S)> for QuerySolution { + #[inline] + fn from((v, s): (V, S)) -> Self { + Self { + variables: v.into(), + values: s.into(), + } + } +} + +impl<'a> IntoIterator for &'a QuerySolution { + type Item = (&'a Variable, &'a Term); + type IntoIter = Iter<'a>; + + #[inline] + fn into_iter(self) -> Self::IntoIter { + Iter { + inner: self.variables.iter().zip(&self.values), + } + } +} + +impl Index<usize> for QuerySolution { + type Output = Term; + + #[allow(clippy::panic)] + #[inline] + fn index(&self, index: usize) -> &Self::Output { + self.get(index) + .unwrap_or_else(|| panic!("The column {index} is not set in this solution")) + } +} + +impl Index<&str> for QuerySolution { + type Output = Term; + + #[allow(clippy::panic)] + #[inline] + fn index(&self, index: &str) -> &Self::Output { + self.get(index) + .unwrap_or_else(|| panic!("The variable ?{index} is not set in this solution")) + } +} + +impl Index<VariableRef<'_>> for QuerySolution { + type Output = Term; + + #[allow(clippy::panic)] + #[inline] + fn index(&self, index: VariableRef<'_>) -> &Self::Output { + self.get(index) + .unwrap_or_else(|| panic!("The variable {index} is not set in this solution")) + } +} +impl Index<Variable> for QuerySolution { + type Output = Term; + + #[inline] + fn index(&self, index: Variable) -> &Self::Output { + self.index(index.as_ref()) + } +} + +impl Index<&Variable> for QuerySolution { + type Output = Term; + + #[inline] + fn index(&self, index: &Variable) -> &Self::Output { + self.index(index.as_ref()) + } +} + +impl PartialEq for QuerySolution { + fn eq(&self, other: &Self) -> bool { + for (k, v) in self.iter() { + if other.get(k) != Some(v) { + return false; + } + } + for (k, v) in other.iter() { + if self.get(k) != Some(v) { + return false; + } + } + true + } +} + +impl Eq for QuerySolution {} + +impl fmt::Debug for QuerySolution { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_map().entries(self.iter()).finish() + } +} + +/// An iterator over [`QuerySolution`] bound variables. +/// +/// ``` +/// use oxrdf::{Literal, Variable}; +/// use sparesults::QuerySolution; +/// +/// let solution = QuerySolution::from(( +/// vec![ +/// Variable::new_unchecked("foo"), +/// Variable::new_unchecked("bar"), +/// ], +/// vec![Some(Literal::from(1).into()), None], +/// )); +/// assert_eq!( +/// solution.iter().collect::<Vec<_>>(), +/// vec![(&Variable::new_unchecked("foo"), &Literal::from(1).into())] +/// ); +/// ``` +pub struct Iter<'a> { + inner: Zip<std::slice::Iter<'a, Variable>, std::slice::Iter<'a, Option<Term>>>, +} + +impl<'a> Iterator for Iter<'a> { + type Item = (&'a Variable, &'a Term); + + #[inline] + fn next(&mut self) -> Option<Self::Item> { + for (variable, value) in &mut self.inner { + if let Some(value) = value { + return Some((variable, value)); + } + } + None + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + (0, self.inner.size_hint().1) + } +} + +/// A utility trait to get values for a given variable or tuple position. +/// +/// See [`QuerySolution::get`]. +pub trait VariableSolutionIndex { + fn index(self, solution: &QuerySolution) -> Option<usize>; +} + +impl VariableSolutionIndex for usize { + #[inline] + fn index(self, _: &QuerySolution) -> Option<usize> { + Some(self) + } +} + +impl VariableSolutionIndex for &str { + #[inline] + fn index(self, solution: &QuerySolution) -> Option<usize> { + solution.variables.iter().position(|v| v.as_str() == self) + } +} + +impl VariableSolutionIndex for VariableRef<'_> { + #[inline] + fn index(self, solution: &QuerySolution) -> Option<usize> { + solution.variables.iter().position(|v| *v == self) + } +} + +impl VariableSolutionIndex for &Variable { + #[inline] + fn index(self, solution: &QuerySolution) -> Option<usize> { + self.as_ref().index(solution) + } +} + +impl VariableSolutionIndex for Variable { + #[inline] + fn index(self, solution: &QuerySolution) -> Option<usize> { + self.as_ref().index(solution) + } +} diff --git a/ng-oxigraph/src/sparesults/xml.rs b/ng-oxigraph/src/sparesults/xml.rs new file mode 100644 index 0000000..0c90f4f --- /dev/null +++ b/ng-oxigraph/src/sparesults/xml.rs @@ -0,0 +1,833 @@ +//! Implementation of [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/) + +use crate::oxrdf::vocab::rdf; +use crate::oxrdf::*; +use crate::sparesults::error::{QueryResultsParseError, QueryResultsSyntaxError}; +use quick_xml::escape::unescape; +use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event}; +use quick_xml::{Decoder, Error, Reader, Writer}; +use std::collections::BTreeMap; +use std::io::{self, BufReader, Read, Write}; +use std::mem::take; +use std::sync::Arc; +#[cfg(feature = "async-tokio")] +use tokio::io::{AsyncRead, AsyncWrite, BufReader as AsyncBufReader}; + +pub fn write_boolean_xml_result<W: Write>(write: W, value: bool) -> io::Result<W> { + let mut writer = Writer::new(write); + for event in inner_write_boolean_xml_result(value) { + writer.write_event(event).map_err(map_xml_error)?; + } + Ok(writer.into_inner()) +} + +#[cfg(feature = "async-tokio")] +pub async fn tokio_async_write_boolean_xml_result<W: AsyncWrite + Unpin>( + write: W, + value: bool, +) -> io::Result<W> { + let mut writer = Writer::new(write); + for event in inner_write_boolean_xml_result(value) { + writer + .write_event_async(event) + .await + .map_err(map_xml_error)?; + } + Ok(writer.into_inner()) +} + +fn inner_write_boolean_xml_result(value: bool) -> [Event<'static>; 8] { + [ + Event::Decl(BytesDecl::new("1.0", None, None)), + Event::Start( + BytesStart::new("sparql") + .with_attributes([("xmlns", "http://www.w3.org/2005/sparql-results#")]), + ), + Event::Start(BytesStart::new("head")), + Event::End(BytesEnd::new("head")), + Event::Start(BytesStart::new("boolean")), + Event::Text(BytesText::new(if value { "true" } else { "false" })), + Event::End(BytesEnd::new("boolean")), + Event::End(BytesEnd::new("sparql")), + ] +} + +pub struct ToWriteXmlSolutionsWriter<W: Write> { + inner: InnerXmlSolutionsWriter, + writer: Writer<W>, +} + +impl<W: Write> ToWriteXmlSolutionsWriter<W> { + pub fn start(write: W, variables: &[Variable]) -> io::Result<Self> { + let mut writer = Writer::new(write); + let mut buffer = Vec::with_capacity(48); + let inner = InnerXmlSolutionsWriter::start(&mut buffer, variables); + Self::do_write(&mut writer, buffer)?; + Ok(Self { inner, writer }) + } + + pub fn write<'a>( + &mut self, + solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>, + ) -> io::Result<()> { + let mut buffer = Vec::with_capacity(48); + self.inner.write(&mut buffer, solution); + Self::do_write(&mut self.writer, buffer) + } + + pub fn finish(mut self) -> io::Result<W> { + let mut buffer = Vec::with_capacity(4); + self.inner.finish(&mut buffer); + Self::do_write(&mut self.writer, buffer)?; + Ok(self.writer.into_inner()) + } + + fn do_write(writer: &mut Writer<W>, output: Vec<Event<'_>>) -> io::Result<()> { + for event in output { + writer.write_event(event).map_err(map_xml_error)?; + } + Ok(()) + } +} + +#[cfg(feature = "async-tokio")] +pub struct ToTokioAsyncWriteXmlSolutionsWriter<W: AsyncWrite + Unpin> { + inner: InnerXmlSolutionsWriter, + writer: Writer<W>, +} + +#[cfg(feature = "async-tokio")] +impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteXmlSolutionsWriter<W> { + pub async fn start(write: W, variables: &[Variable]) -> io::Result<Self> { + let mut writer = Writer::new(write); + let mut buffer = Vec::with_capacity(48); + let inner = InnerXmlSolutionsWriter::start(&mut buffer, variables); + Self::do_write(&mut writer, buffer).await?; + Ok(Self { inner, writer }) + } + + pub async fn write<'a>( + &mut self, + solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>, + ) -> io::Result<()> { + let mut buffer = Vec::with_capacity(48); + self.inner.write(&mut buffer, solution); + Self::do_write(&mut self.writer, buffer).await + } + + pub async fn finish(mut self) -> io::Result<W> { + let mut buffer = Vec::with_capacity(4); + self.inner.finish(&mut buffer); + Self::do_write(&mut self.writer, buffer).await?; + Ok(self.writer.into_inner()) + } + + async fn do_write(writer: &mut Writer<W>, output: Vec<Event<'_>>) -> io::Result<()> { + for event in output { + writer + .write_event_async(event) + .await + .map_err(map_xml_error)?; + } + Ok(()) + } +} + +struct InnerXmlSolutionsWriter; + +impl InnerXmlSolutionsWriter { + fn start<'a>(output: &mut Vec<Event<'a>>, variables: &'a [Variable]) -> Self { + output.push(Event::Decl(BytesDecl::new("1.0", None, None))); + output.push(Event::Start(BytesStart::new("sparql").with_attributes([( + "xmlns", + "http://www.w3.org/2005/sparql-results#", + )]))); + output.push(Event::Start(BytesStart::new("head"))); + for variable in variables { + output.push(Event::Empty( + BytesStart::new("variable").with_attributes([("name", variable.as_str())]), + )); + } + output.push(Event::End(BytesEnd::new("head"))); + output.push(Event::Start(BytesStart::new("results"))); + Self {} + } + + #[allow(clippy::unused_self)] + fn write<'a>( + &self, + output: &mut Vec<Event<'a>>, + solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>, + ) { + output.push(Event::Start(BytesStart::new("result"))); + for (variable, value) in solution { + output.push(Event::Start( + BytesStart::new("binding").with_attributes([("name", variable.as_str())]), + )); + write_xml_term(output, value); + output.push(Event::End(BytesEnd::new("binding"))); + } + output.push(Event::End(BytesEnd::new("result"))); + } + + #[allow(clippy::unused_self)] + fn finish(self, output: &mut Vec<Event<'_>>) { + output.push(Event::End(BytesEnd::new("results"))); + output.push(Event::End(BytesEnd::new("sparql"))); + } +} + +fn write_xml_term<'a>(output: &mut Vec<Event<'a>>, term: TermRef<'a>) { + match term { + TermRef::NamedNode(uri) => { + output.push(Event::Start(BytesStart::new("uri"))); + output.push(Event::Text(BytesText::new(uri.as_str()))); + output.push(Event::End(BytesEnd::new("uri"))); + } + TermRef::BlankNode(bnode) => { + output.push(Event::Start(BytesStart::new("bnode"))); + output.push(Event::Text(BytesText::new(bnode.as_str()))); + output.push(Event::End(BytesEnd::new("bnode"))); + } + TermRef::Literal(literal) => { + let mut start = BytesStart::new("literal"); + if let Some(language) = literal.language() { + start.push_attribute(("xml:lang", language)); + } else if !literal.is_plain() { + start.push_attribute(("datatype", literal.datatype().as_str())) + } + output.push(Event::Start(start)); + output.push(Event::Text(BytesText::new(literal.value()))); + output.push(Event::End(BytesEnd::new("literal"))); + } + #[cfg(feature = "rdf-star")] + TermRef::Triple(triple) => { + output.push(Event::Start(BytesStart::new("triple"))); + output.push(Event::Start(BytesStart::new("subject"))); + write_xml_term(output, triple.subject.as_ref().into()); + output.push(Event::End(BytesEnd::new("subject"))); + output.push(Event::Start(BytesStart::new("predicate"))); + write_xml_term(output, triple.predicate.as_ref().into()); + output.push(Event::End(BytesEnd::new("predicate"))); + output.push(Event::Start(BytesStart::new("object"))); + write_xml_term(output, triple.object.as_ref()); + output.push(Event::End(BytesEnd::new("object"))); + output.push(Event::End(BytesEnd::new("triple"))); + } + } +} + +pub enum FromReadXmlQueryResultsReader<R: Read> { + Solutions { + variables: Vec<Variable>, + solutions: FromReadXmlSolutionsReader<R>, + }, + Boolean(bool), +} + +impl<R: Read> FromReadXmlQueryResultsReader<R> { + pub fn read(read: R) -> Result<Self, QueryResultsParseError> { + let mut reader = Reader::from_reader(BufReader::new(read)); + reader.trim_text(true); + reader.expand_empty_elements(true); + let mut reader_buffer = Vec::new(); + let mut inner = XmlInnerQueryResultsReader { + state: ResultsState::Start, + variables: Vec::new(), + decoder: reader.decoder(), + }; + loop { + reader_buffer.clear(); + let event = reader.read_event_into(&mut reader_buffer)?; + if let Some(result) = inner.read_event(event)? { + return Ok(match result { + XmlInnerQueryResults::Solutions { + variables, + solutions, + } => Self::Solutions { + variables, + solutions: FromReadXmlSolutionsReader { + reader, + inner: solutions, + reader_buffer, + }, + }, + XmlInnerQueryResults::Boolean(value) => Self::Boolean(value), + }); + } + } + } +} + +pub struct FromReadXmlSolutionsReader<R: Read> { + reader: Reader<BufReader<R>>, + inner: XmlInnerSolutionsReader, + reader_buffer: Vec<u8>, +} + +impl<R: Read> FromReadXmlSolutionsReader<R> { + pub fn read_next(&mut self) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> { + loop { + self.reader_buffer.clear(); + let event = self.reader.read_event_into(&mut self.reader_buffer)?; + if event == Event::Eof { + return Ok(None); + } + if let Some(solution) = self.inner.read_event(event)? { + return Ok(Some(solution)); + } + } + } +} + +#[cfg(feature = "async-tokio")] +pub enum FromTokioAsyncReadXmlQueryResultsReader<R: AsyncRead + Unpin> { + Solutions { + variables: Vec<Variable>, + solutions: FromTokioAsyncReadXmlSolutionsReader<R>, + }, + Boolean(bool), +} + +#[cfg(feature = "async-tokio")] +impl<R: AsyncRead + Unpin> FromTokioAsyncReadXmlQueryResultsReader<R> { + pub async fn read(read: R) -> Result<Self, QueryResultsParseError> { + let mut reader = Reader::from_reader(AsyncBufReader::new(read)); + reader.trim_text(true); + reader.expand_empty_elements(true); + let mut reader_buffer = Vec::new(); + let mut inner = XmlInnerQueryResultsReader { + state: ResultsState::Start, + variables: Vec::new(), + decoder: reader.decoder(), + }; + loop { + reader_buffer.clear(); + let event = reader.read_event_into_async(&mut reader_buffer).await?; + if let Some(result) = inner.read_event(event)? { + return Ok(match result { + XmlInnerQueryResults::Solutions { + variables, + solutions, + } => Self::Solutions { + variables, + solutions: FromTokioAsyncReadXmlSolutionsReader { + reader, + inner: solutions, + reader_buffer, + }, + }, + XmlInnerQueryResults::Boolean(value) => Self::Boolean(value), + }); + } + } + } +} + +#[cfg(feature = "async-tokio")] +pub struct FromTokioAsyncReadXmlSolutionsReader<R: AsyncRead + Unpin> { + reader: Reader<AsyncBufReader<R>>, + inner: XmlInnerSolutionsReader, + reader_buffer: Vec<u8>, +} + +#[cfg(feature = "async-tokio")] +impl<R: AsyncRead + Unpin> FromTokioAsyncReadXmlSolutionsReader<R> { + pub async fn read_next(&mut self) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> { + loop { + self.reader_buffer.clear(); + let event = self + .reader + .read_event_into_async(&mut self.reader_buffer) + .await?; + if event == Event::Eof { + return Ok(None); + } + if let Some(solution) = self.inner.read_event(event)? { + return Ok(Some(solution)); + } + } + } +} + +enum XmlInnerQueryResults { + Solutions { + variables: Vec<Variable>, + solutions: XmlInnerSolutionsReader, + }, + Boolean(bool), +} + +#[derive(Clone, Copy)] +enum ResultsState { + Start, + Sparql, + Head, + AfterHead, + Boolean, +} + +struct XmlInnerQueryResultsReader { + state: ResultsState, + variables: Vec<Variable>, + decoder: Decoder, +} + +impl XmlInnerQueryResultsReader { + pub fn read_event( + &mut self, + event: Event<'_>, + ) -> Result<Option<XmlInnerQueryResults>, QueryResultsParseError> { + match event { + Event::Start(event) => match self.state { + ResultsState::Start => { + if event.local_name().as_ref() == b"sparql" { + self.state = ResultsState::Sparql; + Ok(None) + } else { + Err(QueryResultsSyntaxError::msg(format!("Expecting <sparql> tag, found <{}>", self.decoder.decode(event.name().as_ref())?)).into()) + } + } + ResultsState::Sparql => { + if event.local_name().as_ref() == b"head" { + self.state = ResultsState::Head; + Ok(None) + } else { + Err(QueryResultsSyntaxError::msg(format!("Expecting <head> tag, found <{}>", self.decoder.decode(event.name().as_ref())?)).into()) + } + } + ResultsState::Head => { + if event.local_name().as_ref() == b"variable" { + let name = event.attributes() + .filter_map(Result::ok) + .find(|attr| attr.key.local_name().as_ref() == b"name") + .ok_or_else(|| QueryResultsSyntaxError::msg("No name attribute found for the <variable> tag"))?; + let name = unescape(&self.decoder.decode(&name.value)?)?.into_owned(); + let variable = Variable::new(name).map_err(|e| QueryResultsSyntaxError::msg(format!("Invalid variable name: {e}")))?; + if self.variables.contains(&variable) { + return Err(QueryResultsSyntaxError::msg(format!( + "The variable {variable} is declared twice" + )) + .into()); + } + self.variables.push(variable); + Ok(None) + } else if event.local_name().as_ref() == b"link" { + // no op + Ok(None) + } else { + Err(QueryResultsSyntaxError::msg(format!("Expecting <variable> or <link> tag, found <{}>", self.decoder.decode(event.name().as_ref())?)).into()) + } + } + ResultsState::AfterHead => { + if event.local_name().as_ref() == b"boolean" { + self.state = ResultsState::Boolean; + Ok(None) + } else if event.local_name().as_ref() == b"results" { + let mut mapping = BTreeMap::default(); + for (i, var) in self.variables.iter().enumerate() { + mapping.insert(var.clone().into_string(), i); + } + Ok(Some(XmlInnerQueryResults::Solutions { + variables: take(&mut self.variables), + solutions: XmlInnerSolutionsReader { + decoder: self.decoder, + mapping, + state_stack: vec![State::Start, State::Start], + new_bindings: Vec::new(), + current_var: None, + term: None, + lang: None, + datatype: None, + subject_stack: Vec::new(), + predicate_stack: Vec::new(), + object_stack: Vec::new(), + }, + })) + } else if event.local_name().as_ref() != b"link" && event.local_name().as_ref() != b"results" && event.local_name().as_ref() != b"boolean" { + Err(QueryResultsSyntaxError::msg(format!("Expecting sparql tag, found <{}>", self.decoder.decode(event.name().as_ref())?)).into()) + } else { + Ok(None) + } + } + ResultsState::Boolean => Err(QueryResultsSyntaxError::msg(format!("Unexpected tag inside of <boolean> tag: <{}>", self.decoder.decode(event.name().as_ref())?)).into()) + }, + Event::Text(event) => { + let value = event.unescape()?; + match self.state { + ResultsState::Boolean => { + if value == "true" { + Ok(Some(XmlInnerQueryResults::Boolean(true))) + } else if value == "false" { + Ok(Some(XmlInnerQueryResults::Boolean(false))) + } else { + Err(QueryResultsSyntaxError::msg(format!("Unexpected boolean value. Found '{value}'")).into()) + } + } + _ => Err(QueryResultsSyntaxError::msg(format!("Unexpected textual value found: '{value}'")).into()) + } + } + Event::End(event) => { + if let ResultsState::Head = self.state { + if event.local_name().as_ref() == b"head" { + self.state = ResultsState::AfterHead + } + Ok(None) + } else { + Err(QueryResultsSyntaxError::msg("Unexpected early file end. All results file should have a <head> and a <result> or <boolean> tag").into()) + } + } + Event::Eof => Err(QueryResultsSyntaxError::msg("Unexpected early file end. All results file should have a <head> and a <result> or <boolean> tag").into()), + Event::Comment(_) | Event::Decl(_) | Event::PI(_) | Event::DocType(_) => { + Ok(None) + } + Event::Empty(_) => unreachable!("Empty events are expended"), + Event::CData(_) => { + Err(QueryResultsSyntaxError::msg( + "<![CDATA[...]]> are not supported in SPARQL XML results", + ) + .into()) + } + } + } +} + +enum State { + Start, + Result, + Binding, + Uri, + BNode, + Literal, + Triple, + Subject, + Predicate, + Object, +} + +struct XmlInnerSolutionsReader { + decoder: Decoder, + mapping: BTreeMap<String, usize>, + state_stack: Vec<State>, + new_bindings: Vec<Option<Term>>, + current_var: Option<String>, + term: Option<Term>, + lang: Option<String>, + datatype: Option<NamedNode>, + subject_stack: Vec<Term>, + predicate_stack: Vec<Term>, + object_stack: Vec<Term>, +} + +impl XmlInnerSolutionsReader { + #[allow(clippy::unwrap_in_result)] + pub fn read_event( + &mut self, + event: Event<'_>, + ) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> { + match event { + Event::Start(event) => match self.state_stack.last().unwrap() { + State::Start => { + if event.local_name().as_ref() == b"result" { + self.new_bindings = vec![None; self.mapping.len()]; + self.state_stack.push(State::Result); + Ok(None) + } else { + Err(QueryResultsSyntaxError::msg(format!( + "Expecting <result>, found <{}>", + self.decoder.decode(event.name().as_ref())? + )) + .into()) + } + } + State::Result => { + if event.local_name().as_ref() == b"binding" { + let Some(attr) = event + .attributes() + .filter_map(Result::ok) + .find(|attr| attr.key.local_name().as_ref() == b"name") + else { + return Err(QueryResultsSyntaxError::msg( + "No name attribute found for the <binding> tag", + ) + .into()); + }; + self.current_var = + Some(unescape(&self.decoder.decode(&attr.value)?)?.into_owned()); + self.state_stack.push(State::Binding); + Ok(None) + } else { + Err(QueryResultsSyntaxError::msg(format!( + "Expecting <binding>, found <{}>", + self.decoder.decode(event.name().as_ref())? + )) + .into()) + } + } + State::Binding | State::Subject | State::Predicate | State::Object => { + if self.term.is_some() { + return Err(QueryResultsSyntaxError::msg( + "There is already a value for the current binding", + ) + .into()); + } + if event.local_name().as_ref() == b"uri" { + self.state_stack.push(State::Uri); + Ok(None) + } else if event.local_name().as_ref() == b"bnode" { + self.state_stack.push(State::BNode); + Ok(None) + } else if event.local_name().as_ref() == b"literal" { + for attr in event.attributes() { + let attr = attr.map_err(Error::from)?; + if attr.key.as_ref() == b"xml:lang" { + self.lang = Some( + unescape(&self.decoder.decode(&attr.value)?)?.into_owned(), + ); + } else if attr.key.local_name().as_ref() == b"datatype" { + let iri = self.decoder.decode(&attr.value)?; + let iri = unescape(&iri)?; + self.datatype = + Some(NamedNode::new(iri.as_ref()).map_err(|e| { + QueryResultsSyntaxError::msg(format!( + "Invalid datatype IRI '{iri}': {e}" + )) + })?); + } + } + self.state_stack.push(State::Literal); + Ok(None) + } else if event.local_name().as_ref() == b"triple" { + self.state_stack.push(State::Triple); + Ok(None) + } else { + Err(QueryResultsSyntaxError::msg(format!( + "Expecting <uri>, <bnode> or <literal> found <{}>", + self.decoder.decode(event.name().as_ref())? + )) + .into()) + } + } + State::Triple => { + if event.local_name().as_ref() == b"subject" { + self.state_stack.push(State::Subject); + Ok(None) + } else if event.local_name().as_ref() == b"predicate" { + self.state_stack.push(State::Predicate); + Ok(None) + } else if event.local_name().as_ref() == b"object" { + self.state_stack.push(State::Object); + Ok(None) + } else { + Err(QueryResultsSyntaxError::msg(format!( + "Expecting <subject>, <predicate> or <object> found <{}>", + self.decoder.decode(event.name().as_ref())? + )) + .into()) + } + } + State::Uri => Err(QueryResultsSyntaxError::msg(format!( + "<uri> must only contain a string, found <{}>", + self.decoder.decode(event.name().as_ref())? + )) + .into()), + State::BNode => Err(QueryResultsSyntaxError::msg(format!( + "<uri> must only contain a string, found <{}>", + self.decoder.decode(event.name().as_ref())? + )) + .into()), + State::Literal => Err(QueryResultsSyntaxError::msg(format!( + "<uri> must only contain a string, found <{}>", + self.decoder.decode(event.name().as_ref())? + )) + .into()), + }, + Event::Text(event) => { + let data = event.unescape()?; + match self.state_stack.last().unwrap() { + State::Uri => { + self.term = Some( + NamedNode::new(data.to_string()) + .map_err(|e| { + QueryResultsSyntaxError::msg(format!( + "Invalid IRI value '{data}': {e}" + )) + })? + .into(), + ); + Ok(None) + } + State::BNode => { + self.term = Some( + BlankNode::new(data.to_string()) + .map_err(|e| { + QueryResultsSyntaxError::msg(format!( + "Invalid blank node value '{data}': {e}" + )) + })? + .into(), + ); + Ok(None) + } + State::Literal => { + self.term = Some( + build_literal(data, self.lang.take(), self.datatype.take())?.into(), + ); + Ok(None) + } + _ => Err(QueryResultsSyntaxError::msg(format!( + "Unexpected textual value found: {data}" + )) + .into()), + } + } + Event::End(_) => match self.state_stack.pop().unwrap() { + State::Start | State::Uri => Ok(None), + State::Result => Ok(Some(take(&mut self.new_bindings))), + State::Binding => { + if let Some(var) = &self.current_var { + if let Some(var) = self.mapping.get(var) { + self.new_bindings[*var] = self.term.take() + } else { + return Err( + QueryResultsSyntaxError::msg(format!("The variable '{var}' is used in a binding but not declared in the variables list")).into() + ); + } + } else { + return Err(QueryResultsSyntaxError::msg( + "No name found for <binding> tag", + ) + .into()); + } + Ok(None) + } + State::Subject => { + if let Some(subject) = self.term.take() { + self.subject_stack.push(subject) + } + Ok(None) + } + State::Predicate => { + if let Some(predicate) = self.term.take() { + self.predicate_stack.push(predicate) + } + Ok(None) + } + State::Object => { + if let Some(object) = self.term.take() { + self.object_stack.push(object) + } + Ok(None) + } + State::BNode => { + if self.term.is_none() { + // We default to a random bnode + self.term = Some(BlankNode::default().into()) + } + Ok(None) + } + State::Literal => { + if self.term.is_none() { + // We default to the empty literal + self.term = + Some(build_literal("", self.lang.take(), self.datatype.take())?.into()) + } + Ok(None) + } + State::Triple => { + #[cfg(feature = "rdf-star")] + if let (Some(subject), Some(predicate), Some(object)) = ( + self.subject_stack.pop(), + self.predicate_stack.pop(), + self.object_stack.pop(), + ) { + self.term = Some( + Triple::new( + match subject { + Term::NamedNode(subject) => subject.into(), + Term::BlankNode(subject) => subject.into(), + Term::Triple(subject) => Subject::Triple(subject), + Term::Literal(_) => { + return Err(QueryResultsSyntaxError::msg( + "The <subject> value should not be a <literal>", + ) + .into()); + } + }, + match predicate { + Term::NamedNode(predicate) => predicate, + _ => { + return Err(QueryResultsSyntaxError::msg( + "The <predicate> value should be an <uri>", + ) + .into()); + } + }, + object, + ) + .into(), + ); + Ok(None) + } else { + Err(QueryResultsSyntaxError::msg( + "A <triple> should contain a <subject>, a <predicate> and an <object>", + ) + .into()) + } + #[cfg(not(feature = "rdf-star"))] + { + Err(QueryResultsSyntaxError::msg( + "The <triple> tag is only supported with RDF-star", + ) + .into()) + } + } + }, + Event::Eof | Event::Comment(_) | Event::Decl(_) | Event::PI(_) | Event::DocType(_) => { + Ok(None) + } + Event::Empty(_) => unreachable!("Empty events are expended"), + Event::CData(_) => Err(QueryResultsSyntaxError::msg( + "<![CDATA[...]]> are not supported in SPARQL XML results", + ) + .into()), + } + } +} + +fn build_literal( + value: impl Into<String>, + lang: Option<String>, + datatype: Option<NamedNode>, +) -> Result<Literal, QueryResultsParseError> { + match lang { + Some(lang) => { + if let Some(datatype) = datatype { + if datatype.as_ref() != rdf::LANG_STRING { + return Err(QueryResultsSyntaxError::msg(format!( + "xml:lang value '{lang}' provided with the datatype {datatype}" + )) + .into()); + } + } + Literal::new_language_tagged_literal(value, &lang).map_err(|e| { + QueryResultsSyntaxError::msg(format!("Invalid xml:lang value '{lang}': {e}")).into() + }) + } + None => Ok(if let Some(datatype) = datatype { + Literal::new_typed_literal(value, datatype) + } else { + Literal::new_simple_literal(value) + }), + } +} + +fn map_xml_error(error: Error) -> io::Error { + match error { + Error::Io(error) => { + Arc::try_unwrap(error).unwrap_or_else(|error| io::Error::new(error.kind(), error)) + } + Error::UnexpectedEof(_) => io::Error::new(io::ErrorKind::UnexpectedEof, error), + _ => io::Error::new(io::ErrorKind::InvalidData, error), + } +} diff --git a/ng-oxigraph/src/spargebra/README.md b/ng-oxigraph/src/spargebra/README.md new file mode 100644 index 0000000..313d875 --- /dev/null +++ b/ng-oxigraph/src/spargebra/README.md @@ -0,0 +1,46 @@ +Spargebra +========= + +[](https://crates.io/crates/spargebra) +[](https://docs.rs/spargebra) +[](https://crates.io/crates/spargebra) +[](https://github.com/oxigraph/oxigraph/actions) +[](https://gitter.im/oxigraph/community) + +Spargebra is a [SPARQL](https://www.w3.org/TR/sparql11-overview/) parser. + +It supports both [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/) and [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/). + +The emitted tree is based on [SPARQL 1.1 Query Algebra](https://www.w3.org/TR/sparql11-query/#sparqlQuery) objects. + +The API entry point for SPARQL queries is the [`Query`] struct and the API entry point for SPARQL updates is the [`Update`] struct. + +Support for [SPARQL-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#sparql-star) is also available behind the `rdf-star` feature. + +This crate is intended to be a building piece for SPARQL implementations in Rust like [Oxigraph](https://oxigraph.org). + +Usage example: + +```rust +use spargebra::Query; + +let query_str = "SELECT ?s ?p ?o WHERE { ?s ?p ?o . }"; +let query = Query::parse(query_str, None).unwrap(); +assert_eq!(query.to_string(), query_str); +``` + +## License + +This project is licensed under either of + +* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or + `<http://www.apache.org/licenses/LICENSE-2.0>`) +* MIT license ([LICENSE-MIT](../LICENSE-MIT) or + `<http://opensource.org/licenses/MIT>`) + +at your option. + + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. diff --git a/ng-oxigraph/src/spargebra/algebra.rs b/ng-oxigraph/src/spargebra/algebra.rs new file mode 100644 index 0000000..7255a91 --- /dev/null +++ b/ng-oxigraph/src/spargebra/algebra.rs @@ -0,0 +1,1419 @@ +//! [SPARQL 1.1 Query Algebra](https://www.w3.org/TR/sparql11-query/#sparqlQuery) representation. + +use crate::oxrdf::LiteralRef; +use crate::spargebra::term::*; +use std::fmt; + +/// A [property path expression](https://www.w3.org/TR/sparql11-query/#defn_PropertyPathExpr). +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum PropertyPathExpression { + NamedNode(NamedNode), + Reverse(Box<Self>), + Sequence(Box<Self>, Box<Self>), + Alternative(Box<Self>, Box<Self>), + ZeroOrMore(Box<Self>), + OneOrMore(Box<Self>), + ZeroOrOne(Box<Self>), + NegatedPropertySet(Vec<NamedNode>), +} + +impl PropertyPathExpression { + /// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html). + pub(crate) fn fmt_sse(&self, f: &mut impl fmt::Write) -> fmt::Result { + match self { + Self::NamedNode(p) => write!(f, "{p}"), + Self::Reverse(p) => { + f.write_str("(reverse ")?; + p.fmt_sse(f)?; + f.write_str(")") + } + Self::Alternative(a, b) => { + f.write_str("(alt ")?; + a.fmt_sse(f)?; + f.write_str(" ")?; + b.fmt_sse(f)?; + f.write_str(")") + } + Self::Sequence(a, b) => { + f.write_str("(seq ")?; + a.fmt_sse(f)?; + f.write_str(" ")?; + b.fmt_sse(f)?; + f.write_str(")") + } + Self::ZeroOrMore(p) => { + f.write_str("(path* ")?; + p.fmt_sse(f)?; + f.write_str(")") + } + Self::OneOrMore(p) => { + f.write_str("(path+ ")?; + p.fmt_sse(f)?; + f.write_str(")") + } + Self::ZeroOrOne(p) => { + f.write_str("(path? ")?; + p.fmt_sse(f)?; + f.write_str(")") + } + Self::NegatedPropertySet(p) => { + f.write_str("(notoneof")?; + for p in p { + write!(f, " {p}")?; + } + f.write_str(")") + } + } + } +} + +impl fmt::Display for PropertyPathExpression { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::NamedNode(p) => p.fmt(f), + Self::Reverse(p) => write!(f, "^({p})"), + Self::Sequence(a, b) => write!(f, "({a} / {b})"), + Self::Alternative(a, b) => write!(f, "({a} | {b})"), + Self::ZeroOrMore(p) => write!(f, "({p})*"), + Self::OneOrMore(p) => write!(f, "({p})+"), + Self::ZeroOrOne(p) => write!(f, "({p})?"), + Self::NegatedPropertySet(p) => { + f.write_str("!(")?; + for (i, c) in p.iter().enumerate() { + if i > 0 { + f.write_str(" | ")?; + } + write!(f, "{c}")?; + } + f.write_str(")") + } + } + } +} + +impl From<NamedNode> for PropertyPathExpression { + fn from(p: NamedNode) -> Self { + Self::NamedNode(p) + } +} + +/// An [expression](https://www.w3.org/TR/sparql11-query/#expressions). +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum Expression { + NamedNode(NamedNode), + Literal(Literal), + Variable(Variable), + /// [Logical-or](https://www.w3.org/TR/sparql11-query/#func-logical-or). + Or(Box<Self>, Box<Self>), + /// [Logical-and](https://www.w3.org/TR/sparql11-query/#func-logical-and). + And(Box<Self>, Box<Self>), + /// [RDFterm-equal](https://www.w3.org/TR/sparql11-query/#func-RDFterm-equal) and all the XSD equalities. + Equal(Box<Self>, Box<Self>), + /// [sameTerm](https://www.w3.org/TR/sparql11-query/#func-sameTerm). + SameTerm(Box<Self>, Box<Self>), + /// [op:numeric-greater-than](https://www.w3.org/TR/xpath-functions-31/#func-numeric-greater-than) and other XSD greater than operators. + Greater(Box<Self>, Box<Self>), + GreaterOrEqual(Box<Self>, Box<Self>), + /// [op:numeric-less-than](https://www.w3.org/TR/xpath-functions-31/#func-numeric-less-than) and other XSD greater than operators. + Less(Box<Self>, Box<Self>), + LessOrEqual(Box<Self>, Box<Self>), + /// [IN](https://www.w3.org/TR/sparql11-query/#func-in) + In(Box<Self>, Vec<Self>), + /// [op:numeric-add](https://www.w3.org/TR/xpath-functions-31/#func-numeric-add) and other XSD additions. + Add(Box<Self>, Box<Self>), + /// [op:numeric-subtract](https://www.w3.org/TR/xpath-functions-31/#func-numeric-subtract) and other XSD subtractions. + Subtract(Box<Self>, Box<Self>), + /// [op:numeric-multiply](https://www.w3.org/TR/xpath-functions-31/#func-numeric-multiply) and other XSD multiplications. + Multiply(Box<Self>, Box<Self>), + /// [op:numeric-divide](https://www.w3.org/TR/xpath-functions-31/#func-numeric-divide) and other XSD divides. + Divide(Box<Self>, Box<Self>), + /// [op:numeric-unary-plus](https://www.w3.org/TR/xpath-functions-31/#func-numeric-unary-plus) and other XSD unary plus. + UnaryPlus(Box<Self>), + /// [op:numeric-unary-minus](https://www.w3.org/TR/xpath-functions-31/#func-numeric-unary-minus) and other XSD unary minus. + UnaryMinus(Box<Self>), + /// [fn:not](https://www.w3.org/TR/xpath-functions-31/#func-not). + Not(Box<Self>), + /// [EXISTS](https://www.w3.org/TR/sparql11-query/#func-filter-exists). + Exists(Box<GraphPattern>), + /// [BOUND](https://www.w3.org/TR/sparql11-query/#func-bound). + Bound(Variable), + /// [IF](https://www.w3.org/TR/sparql11-query/#func-if). + If(Box<Self>, Box<Self>, Box<Self>), + /// [COALESCE](https://www.w3.org/TR/sparql11-query/#func-coalesce). + Coalesce(Vec<Self>), + /// A regular function call. + FunctionCall(Function, Vec<Self>), +} + +impl Expression { + /// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html). + pub(crate) fn fmt_sse(&self, f: &mut impl fmt::Write) -> fmt::Result { + match self { + Self::NamedNode(node) => write!(f, "{node}"), + Self::Literal(l) => write!(f, "{l}"), + Self::Variable(var) => write!(f, "{var}"), + Self::Or(a, b) => fmt_sse_binary_expression(f, "||", a, b), + Self::And(a, b) => fmt_sse_binary_expression(f, "&&", a, b), + Self::Equal(a, b) => fmt_sse_binary_expression(f, "=", a, b), + Self::SameTerm(a, b) => fmt_sse_binary_expression(f, "sameTerm", a, b), + Self::Greater(a, b) => fmt_sse_binary_expression(f, ">", a, b), + Self::GreaterOrEqual(a, b) => fmt_sse_binary_expression(f, ">=", a, b), + Self::Less(a, b) => fmt_sse_binary_expression(f, "<", a, b), + Self::LessOrEqual(a, b) => fmt_sse_binary_expression(f, "<=", a, b), + Self::In(a, b) => { + f.write_str("(in ")?; + a.fmt_sse(f)?; + for p in b { + f.write_str(" ")?; + p.fmt_sse(f)?; + } + f.write_str(")") + } + Self::Add(a, b) => fmt_sse_binary_expression(f, "+", a, b), + Self::Subtract(a, b) => fmt_sse_binary_expression(f, "-", a, b), + Self::Multiply(a, b) => fmt_sse_binary_expression(f, "*", a, b), + Self::Divide(a, b) => fmt_sse_binary_expression(f, "/", a, b), + Self::UnaryPlus(e) => fmt_sse_unary_expression(f, "+", e), + Self::UnaryMinus(e) => fmt_sse_unary_expression(f, "-", e), + Self::Not(e) => fmt_sse_unary_expression(f, "!", e), + Self::FunctionCall(function, parameters) => { + f.write_str("( ")?; + function.fmt_sse(f)?; + for p in parameters { + f.write_str(" ")?; + p.fmt_sse(f)?; + } + f.write_str(")") + } + Self::Exists(p) => { + f.write_str("(exists ")?; + p.fmt_sse(f)?; + f.write_str(")") + } + Self::Bound(v) => { + write!(f, "(bound {v})") + } + Self::If(a, b, c) => { + f.write_str("(if ")?; + a.fmt_sse(f)?; + f.write_str(" ")?; + b.fmt_sse(f)?; + f.write_str(" ")?; + c.fmt_sse(f)?; + f.write_str(")") + } + Self::Coalesce(parameters) => { + f.write_str("(coalesce")?; + for p in parameters { + f.write_str(" ")?; + p.fmt_sse(f)?; + } + f.write_str(")") + } + } + } +} + +impl fmt::Display for Expression { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::NamedNode(node) => node.fmt(f), + Self::Literal(l) => l.fmt(f), + Self::Variable(var) => var.fmt(f), + Self::Or(a, b) => write!(f, "({a} || {b})"), + Self::And(a, b) => write!(f, "({a} && {b})"), + Self::Equal(a, b) => { + write!(f, "({a} = {b})") + } + Self::SameTerm(a, b) => { + write!(f, "sameTerm({a}, {b})") + } + Self::Greater(a, b) => { + write!(f, "({a} > {b})") + } + Self::GreaterOrEqual(a, b) => write!(f, "({a} >= {b})"), + Self::Less(a, b) => { + write!(f, "({a} < {b})") + } + Self::LessOrEqual(a, b) => write!(f, "({a} <= {b})"), + Self::In(a, b) => { + write!(f, "({a} IN ")?; + write_arg_list(b, f)?; + f.write_str(")") + } + Self::Add(a, b) => { + write!(f, "{a} + {b}") + } + Self::Subtract(a, b) => { + write!(f, "{a} - {b}") + } + Self::Multiply(a, b) => { + write!(f, "{a} * {b}") + } + Self::Divide(a, b) => { + write!(f, "{a} / {b}") + } + Self::UnaryPlus(e) => write!(f, "+{e}"), + Self::UnaryMinus(e) => write!(f, "-{e}"), + Self::Not(e) => match e.as_ref() { + Self::Exists(p) => write!(f, "NOT EXISTS {{ {p} }}"), + e => write!(f, "!{e}"), + }, + Self::FunctionCall(function, parameters) => { + write!(f, "{function}")?; + write_arg_list(parameters, f) + } + Self::Bound(v) => write!(f, "BOUND({v})"), + Self::Exists(p) => write!(f, "EXISTS {{ {p} }}"), + Self::If(a, b, c) => write!(f, "IF({a}, {b}, {c})"), + Self::Coalesce(parameters) => { + f.write_str("COALESCE")?; + write_arg_list(parameters, f) + } + } + } +} + +impl From<NamedNode> for Expression { + fn from(p: NamedNode) -> Self { + Self::NamedNode(p) + } +} + +impl From<Literal> for Expression { + fn from(p: Literal) -> Self { + Self::Literal(p) + } +} + +impl From<Variable> for Expression { + fn from(v: Variable) -> Self { + Self::Variable(v) + } +} + +impl From<NamedNodePattern> for Expression { + fn from(p: NamedNodePattern) -> Self { + match p { + NamedNodePattern::NamedNode(p) => p.into(), + NamedNodePattern::Variable(p) => p.into(), + } + } +} + +fn write_arg_list( + params: impl IntoIterator<Item = impl fmt::Display>, + f: &mut fmt::Formatter<'_>, +) -> fmt::Result { + f.write_str("(")?; + let mut cont = false; + for p in params { + if cont { + f.write_str(", ")?; + } + p.fmt(f)?; + cont = true; + } + f.write_str(")") +} + +/// A function name. +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum Function { + Str, + Lang, + LangMatches, + Datatype, + Iri, + BNode, + Rand, + Abs, + Ceil, + Floor, + Round, + Concat, + SubStr, + StrLen, + Replace, + UCase, + LCase, + EncodeForUri, + Contains, + StrStarts, + StrEnds, + StrBefore, + StrAfter, + Year, + Month, + Day, + Hours, + Minutes, + Seconds, + Timezone, + Tz, + Now, + Uuid, + StrUuid, + Md5, + Sha1, + Sha256, + Sha384, + Sha512, + StrLang, + StrDt, + IsIri, + IsBlank, + IsLiteral, + IsNumeric, + Regex, + #[cfg(feature = "rdf-star")] + Triple, + #[cfg(feature = "rdf-star")] + Subject, + #[cfg(feature = "rdf-star")] + Predicate, + #[cfg(feature = "rdf-star")] + Object, + #[cfg(feature = "rdf-star")] + IsTriple, + #[cfg(feature = "sep-0002")] + Adjust, + Custom(NamedNode), +} + +impl Function { + /// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html). + pub(crate) fn fmt_sse(&self, f: &mut impl fmt::Write) -> fmt::Result { + match self { + Self::Str => f.write_str("str"), + Self::Lang => f.write_str("lang"), + Self::LangMatches => f.write_str("langmatches"), + Self::Datatype => f.write_str("datatype"), + Self::Iri => f.write_str("iri"), + Self::BNode => f.write_str("bnode"), + Self::Rand => f.write_str("rand"), + Self::Abs => f.write_str("abs"), + Self::Ceil => f.write_str("ceil"), + Self::Floor => f.write_str("floor"), + Self::Round => f.write_str("round"), + Self::Concat => f.write_str("concat"), + Self::SubStr => f.write_str("substr"), + Self::StrLen => f.write_str("strlen"), + Self::Replace => f.write_str("replace"), + Self::UCase => f.write_str("ucase"), + Self::LCase => f.write_str("lcase"), + Self::EncodeForUri => f.write_str("encode_for_uri"), + Self::Contains => f.write_str("contains"), + Self::StrStarts => f.write_str("strstarts"), + Self::StrEnds => f.write_str("strends"), + Self::StrBefore => f.write_str("strbefore"), + Self::StrAfter => f.write_str("strafter"), + Self::Year => f.write_str("year"), + Self::Month => f.write_str("month"), + Self::Day => f.write_str("day"), + Self::Hours => f.write_str("hours"), + Self::Minutes => f.write_str("minutes"), + Self::Seconds => f.write_str("seconds"), + Self::Timezone => f.write_str("timezone"), + Self::Tz => f.write_str("tz"), + Self::Now => f.write_str("now"), + Self::Uuid => f.write_str("uuid"), + Self::StrUuid => f.write_str("struuid"), + Self::Md5 => f.write_str("md5"), + Self::Sha1 => f.write_str("sha1"), + Self::Sha256 => f.write_str("sha256"), + Self::Sha384 => f.write_str("sha384"), + Self::Sha512 => f.write_str("sha512"), + Self::StrLang => f.write_str("strlang"), + Self::StrDt => f.write_str("strdt"), + Self::IsIri => f.write_str("isiri"), + Self::IsBlank => f.write_str("isblank"), + Self::IsLiteral => f.write_str("isliteral"), + Self::IsNumeric => f.write_str("isnumeric"), + Self::Regex => f.write_str("regex"), + #[cfg(feature = "rdf-star")] + Self::Triple => f.write_str("triple"), + #[cfg(feature = "rdf-star")] + Self::Subject => f.write_str("subject"), + #[cfg(feature = "rdf-star")] + Self::Predicate => f.write_str("predicate"), + #[cfg(feature = "rdf-star")] + Self::Object => f.write_str("object"), + #[cfg(feature = "rdf-star")] + Self::IsTriple => f.write_str("istriple"), + #[cfg(feature = "sep-0002")] + Self::Adjust => f.write_str("adjust"), + Self::Custom(iri) => write!(f, "{iri}"), + } + } +} + +impl fmt::Display for Function { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Str => f.write_str("STR"), + Self::Lang => f.write_str("LANG"), + Self::LangMatches => f.write_str("LANGMATCHES"), + Self::Datatype => f.write_str("DATATYPE"), + Self::Iri => f.write_str("IRI"), + Self::BNode => f.write_str("BNODE"), + Self::Rand => f.write_str("RAND"), + Self::Abs => f.write_str("ABS"), + Self::Ceil => f.write_str("CEIL"), + Self::Floor => f.write_str("FLOOR"), + Self::Round => f.write_str("ROUND"), + Self::Concat => f.write_str("CONCAT"), + Self::SubStr => f.write_str("SUBSTR"), + Self::StrLen => f.write_str("STRLEN"), + Self::Replace => f.write_str("REPLACE"), + Self::UCase => f.write_str("UCASE"), + Self::LCase => f.write_str("LCASE"), + Self::EncodeForUri => f.write_str("ENCODE_FOR_URI"), + Self::Contains => f.write_str("CONTAINS"), + Self::StrStarts => f.write_str("STRSTARTS"), + Self::StrEnds => f.write_str("STRENDS"), + Self::StrBefore => f.write_str("STRBEFORE"), + Self::StrAfter => f.write_str("STRAFTER"), + Self::Year => f.write_str("YEAR"), + Self::Month => f.write_str("MONTH"), + Self::Day => f.write_str("DAY"), + Self::Hours => f.write_str("HOURS"), + Self::Minutes => f.write_str("MINUTES"), + Self::Seconds => f.write_str("SECONDS"), + Self::Timezone => f.write_str("TIMEZONE"), + Self::Tz => f.write_str("TZ"), + Self::Now => f.write_str("NOW"), + Self::Uuid => f.write_str("UUID"), + Self::StrUuid => f.write_str("STRUUID"), + Self::Md5 => f.write_str("MD5"), + Self::Sha1 => f.write_str("SHA1"), + Self::Sha256 => f.write_str("SHA256"), + Self::Sha384 => f.write_str("SHA384"), + Self::Sha512 => f.write_str("SHA512"), + Self::StrLang => f.write_str("STRLANG"), + Self::StrDt => f.write_str("STRDT"), + Self::IsIri => f.write_str("isIRI"), + Self::IsBlank => f.write_str("isBLANK"), + Self::IsLiteral => f.write_str("isLITERAL"), + Self::IsNumeric => f.write_str("isNUMERIC"), + Self::Regex => f.write_str("REGEX"), + #[cfg(feature = "rdf-star")] + Self::Triple => f.write_str("TRIPLE"), + #[cfg(feature = "rdf-star")] + Self::Subject => f.write_str("SUBJECT"), + #[cfg(feature = "rdf-star")] + Self::Predicate => f.write_str("PREDICATE"), + #[cfg(feature = "rdf-star")] + Self::Object => f.write_str("OBJECT"), + #[cfg(feature = "rdf-star")] + Self::IsTriple => f.write_str("isTRIPLE"), + #[cfg(feature = "sep-0002")] + Self::Adjust => f.write_str("ADJUST"), + Self::Custom(iri) => iri.fmt(f), + } + } +} + +/// A SPARQL query [graph pattern](https://www.w3.org/TR/sparql11-query/#sparqlQuery). +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum GraphPattern { + /// A [basic graph pattern](https://www.w3.org/TR/sparql11-query/#defn_BasicGraphPattern). + Bgp { patterns: Vec<TriplePattern> }, + /// A [property path pattern](https://www.w3.org/TR/sparql11-query/#defn_evalPP_predicate). + Path { + subject: TermPattern, + path: PropertyPathExpression, + object: TermPattern, + }, + /// [Join](https://www.w3.org/TR/sparql11-query/#defn_algJoin). + Join { left: Box<Self>, right: Box<Self> }, + /// [LeftJoin](https://www.w3.org/TR/sparql11-query/#defn_algLeftJoin). + LeftJoin { + left: Box<Self>, + right: Box<Self>, + expression: Option<Expression>, + }, + /// Lateral join i.e. evaluate right for all result row of left + #[cfg(feature = "sep-0006")] + Lateral { left: Box<Self>, right: Box<Self> }, + /// [Filter](https://www.w3.org/TR/sparql11-query/#defn_algFilter). + Filter { expr: Expression, inner: Box<Self> }, + /// [Union](https://www.w3.org/TR/sparql11-query/#defn_algUnion). + Union { left: Box<Self>, right: Box<Self> }, + Graph { + name: NamedNodePattern, + inner: Box<Self>, + }, + /// [Extend](https://www.w3.org/TR/sparql11-query/#defn_extend). + Extend { + inner: Box<Self>, + variable: Variable, + expression: Expression, + }, + /// [Minus](https://www.w3.org/TR/sparql11-query/#defn_algMinus). + Minus { left: Box<Self>, right: Box<Self> }, + /// A table used to provide inline values + Values { + variables: Vec<Variable>, + bindings: Vec<Vec<Option<GroundTerm>>>, + }, + /// [OrderBy](https://www.w3.org/TR/sparql11-query/#defn_algOrdered). + OrderBy { + inner: Box<Self>, + expression: Vec<OrderExpression>, + }, + /// [Project](https://www.w3.org/TR/sparql11-query/#defn_algProjection). + Project { + inner: Box<Self>, + variables: Vec<Variable>, + }, + /// [Distinct](https://www.w3.org/TR/sparql11-query/#defn_algDistinct). + Distinct { inner: Box<Self> }, + /// [Reduced](https://www.w3.org/TR/sparql11-query/#defn_algReduced). + Reduced { inner: Box<Self> }, + /// [Slice](https://www.w3.org/TR/sparql11-query/#defn_algSlice). + Slice { + inner: Box<Self>, + start: usize, + length: Option<usize>, + }, + /// [Group](https://www.w3.org/TR/sparql11-query/#aggregateAlgebra). + Group { + inner: Box<Self>, + variables: Vec<Variable>, + aggregates: Vec<(Variable, AggregateExpression)>, + }, + /// [Service](https://www.w3.org/TR/sparql11-federated-query/#defn_evalService). + Service { + name: NamedNodePattern, + inner: Box<Self>, + silent: bool, + }, +} + +impl fmt::Display for GraphPattern { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Bgp { patterns } => { + for pattern in patterns { + write!(f, "{pattern} .")? + } + Ok(()) + } + Self::Path { + subject, + path, + object, + } => write!(f, "{subject} {path} {object} ."), + Self::Join { left, right } => { + #[allow(clippy::match_same_arms)] + match right.as_ref() { + Self::LeftJoin { .. } + | Self::Minus { .. } + | Self::Extend { .. } + | Self::Filter { .. } => { + // The second block might be considered as a modification of the first one. + write!(f, "{left} {{ {right} }}") + } + #[cfg(feature = "sep-0006")] + Self::Lateral { .. } => { + write!(f, "{left} {{ {right} }}") + } + _ => write!(f, "{left} {right}"), + } + } + Self::LeftJoin { + left, + right, + expression, + } => { + if let Some(expr) = expression { + write!(f, "{left} OPTIONAL {{ {right} FILTER({expr}) }}") + } else { + write!(f, "{left} OPTIONAL {{ {right} }}") + } + } + #[cfg(feature = "sep-0006")] + Self::Lateral { left, right } => { + write!(f, "{left} LATERAL {{ {right} }}") + } + Self::Filter { expr, inner } => { + write!(f, "{inner} FILTER({expr})") + } + Self::Union { left, right } => write!(f, "{{ {left} }} UNION {{ {right} }}"), + Self::Graph { name, inner } => { + write!(f, "GRAPH {name} {{ {inner} }}") + } + Self::Extend { + inner, + variable, + expression, + } => write!(f, "{inner} BIND({expression} AS {variable})"), + Self::Minus { left, right } => write!(f, "{left} MINUS {{ {right} }}"), + Self::Service { + name, + inner, + silent, + } => { + if *silent { + write!(f, "SERVICE SILENT {name} {{ {inner} }}") + } else { + write!(f, "SERVICE {name} {{ {inner} }}") + } + } + Self::Values { + variables, + bindings, + } => { + f.write_str("VALUES ( ")?; + for var in variables { + write!(f, "{var} ")?; + } + f.write_str(") { ")?; + for row in bindings { + f.write_str("( ")?; + for val in row { + match val { + Some(val) => write!(f, "{val} "), + None => f.write_str("UNDEF "), + }?; + } + f.write_str(") ")?; + } + f.write_str(" }") + } + Self::Group { + inner, + variables, + aggregates, + } => { + f.write_str("{SELECT")?; + for (a, v) in aggregates { + write!(f, " ({v} AS {a})")?; + } + for b in variables { + write!(f, " {b}")?; + } + write!(f, " WHERE {{ {inner} }}")?; + if !variables.is_empty() { + f.write_str(" GROUP BY")?; + for v in variables { + write!(f, " {v}")?; + } + } + f.write_str("}") + } + p => write!( + f, + "{{ {} }}", + SparqlGraphRootPattern { + pattern: p, + dataset: None + } + ), + } + } +} + +impl Default for GraphPattern { + fn default() -> Self { + Self::Bgp { + patterns: Vec::default(), + } + } +} + +impl GraphPattern { + /// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html). + pub(crate) fn fmt_sse(&self, f: &mut impl fmt::Write) -> fmt::Result { + match self { + Self::Bgp { patterns } => { + f.write_str("(bgp")?; + for pattern in patterns { + f.write_str(" ")?; + pattern.fmt_sse(f)?; + } + f.write_str(")") + } + Self::Path { + subject, + path, + object, + } => { + f.write_str("(path ")?; + subject.fmt_sse(f)?; + f.write_str(" ")?; + path.fmt_sse(f)?; + f.write_str(" ")?; + object.fmt_sse(f)?; + f.write_str(")") + } + Self::Join { left, right } => { + f.write_str("(join ")?; + left.fmt_sse(f)?; + f.write_str(" ")?; + right.fmt_sse(f)?; + f.write_str(")") + } + Self::LeftJoin { + left, + right, + expression, + } => { + f.write_str("(leftjoin ")?; + left.fmt_sse(f)?; + f.write_str(" ")?; + right.fmt_sse(f)?; + if let Some(expr) = expression { + f.write_str(" ")?; + expr.fmt_sse(f)?; + } + f.write_str(")") + } + #[cfg(feature = "sep-0006")] + Self::Lateral { left, right } => { + f.write_str("(lateral ")?; + left.fmt_sse(f)?; + f.write_str(" ")?; + right.fmt_sse(f)?; + f.write_str(")") + } + Self::Filter { expr, inner } => { + f.write_str("(filter ")?; + expr.fmt_sse(f)?; + f.write_str(" ")?; + inner.fmt_sse(f)?; + f.write_str(")") + } + Self::Union { left, right } => { + f.write_str("(union ")?; + left.fmt_sse(f)?; + f.write_str(" ")?; + right.fmt_sse(f)?; + f.write_str(")") + } + Self::Graph { name, inner } => { + f.write_str("(graph ")?; + name.fmt_sse(f)?; + f.write_str(" ")?; + inner.fmt_sse(f)?; + f.write_str(")") + } + Self::Extend { + inner, + variable, + expression, + } => { + write!(f, "(extend (({variable} ")?; + expression.fmt_sse(f)?; + f.write_str(")) ")?; + inner.fmt_sse(f)?; + f.write_str(")") + } + Self::Minus { left, right } => { + f.write_str("(minus ")?; + left.fmt_sse(f)?; + f.write_str(" ")?; + right.fmt_sse(f)?; + f.write_str(")") + } + Self::Service { + name, + inner, + silent, + } => { + f.write_str("(service ")?; + if *silent { + f.write_str("silent ")?; + } + name.fmt_sse(f)?; + f.write_str(" ")?; + inner.fmt_sse(f)?; + f.write_str(")") + } + Self::Group { + inner, + variables, + aggregates, + } => { + f.write_str("(group (")?; + for (i, v) in variables.iter().enumerate() { + if i > 0 { + f.write_str(" ")?; + } + write!(f, "{v}")?; + } + f.write_str(") (")?; + for (i, (v, a)) in aggregates.iter().enumerate() { + if i > 0 { + f.write_str(" ")?; + } + f.write_str("(")?; + a.fmt_sse(f)?; + write!(f, " {v})")?; + } + f.write_str(") ")?; + inner.fmt_sse(f)?; + f.write_str(")") + } + Self::Values { + variables, + bindings, + } => { + f.write_str("(table (vars")?; + for var in variables { + write!(f, " {var}")?; + } + f.write_str(")")?; + for row in bindings { + f.write_str(" (row")?; + for (value, var) in row.iter().zip(variables) { + if let Some(value) = value { + write!(f, " ({var} {value})")?; + } + } + f.write_str(")")?; + } + f.write_str(")") + } + Self::OrderBy { inner, expression } => { + f.write_str("(order (")?; + for (i, c) in expression.iter().enumerate() { + if i > 0 { + f.write_str(" ")?; + } + c.fmt_sse(f)?; + } + f.write_str(") ")?; + inner.fmt_sse(f)?; + f.write_str(")") + } + Self::Project { inner, variables } => { + f.write_str("(project (")?; + for (i, v) in variables.iter().enumerate() { + if i > 0 { + f.write_str(" ")?; + } + write!(f, "{v}")?; + } + f.write_str(") ")?; + inner.fmt_sse(f)?; + f.write_str(")") + } + Self::Distinct { inner } => { + f.write_str("(distinct ")?; + inner.fmt_sse(f)?; + f.write_str(")") + } + Self::Reduced { inner } => { + f.write_str("(reduced ")?; + inner.fmt_sse(f)?; + f.write_str(")") + } + Self::Slice { + inner, + start, + length, + } => { + if let Some(length) = length { + write!(f, "(slice {start} {length} ")?; + } else { + write!(f, "(slice {start} _ ")?; + } + inner.fmt_sse(f)?; + f.write_str(")") + } + } + } + + /// Calls `callback` on each [in-scope variable](https://www.w3.org/TR/sparql11-query/#variableScope) occurrence. + pub fn on_in_scope_variable<'a>(&'a self, mut callback: impl FnMut(&'a Variable)) { + self.lookup_in_scope_variables(&mut callback) + } + + fn lookup_in_scope_variables<'a>(&'a self, callback: &mut impl FnMut(&'a Variable)) { + #[allow(clippy::match_same_arms)] + match self { + Self::Bgp { patterns } => { + for pattern in patterns { + lookup_triple_pattern_variables(pattern, callback) + } + } + Self::Path { + subject, object, .. + } => { + if let TermPattern::Variable(s) = subject { + callback(s); + } + #[cfg(feature = "rdf-star")] + if let TermPattern::Triple(s) = subject { + lookup_triple_pattern_variables(s, callback) + } + if let TermPattern::Variable(o) = object { + callback(o); + } + #[cfg(feature = "rdf-star")] + if let TermPattern::Triple(o) = object { + lookup_triple_pattern_variables(o, callback) + } + } + Self::Join { left, right } + | Self::LeftJoin { left, right, .. } + | Self::Union { left, right } => { + left.lookup_in_scope_variables(callback); + right.lookup_in_scope_variables(callback); + } + #[cfg(feature = "sep-0006")] + Self::Lateral { left, right } => { + left.lookup_in_scope_variables(callback); + right.lookup_in_scope_variables(callback); + } + Self::Graph { name, inner } => { + if let NamedNodePattern::Variable(g) = &name { + callback(g); + } + inner.lookup_in_scope_variables(callback); + } + Self::Extend { + inner, variable, .. + } => { + callback(variable); + inner.lookup_in_scope_variables(callback); + } + Self::Minus { left, .. } => left.lookup_in_scope_variables(callback), + Self::Group { + variables, + aggregates, + .. + } => { + for v in variables { + callback(v); + } + for (v, _) in aggregates { + callback(v); + } + } + Self::Values { variables, .. } | Self::Project { variables, .. } => { + for v in variables { + callback(v); + } + } + Self::Service { inner, .. } + | Self::Filter { inner, .. } + | Self::OrderBy { inner, .. } + | Self::Distinct { inner } + | Self::Reduced { inner } + | Self::Slice { inner, .. } => inner.lookup_in_scope_variables(callback), + } + } +} + +fn lookup_triple_pattern_variables<'a>( + pattern: &'a TriplePattern, + callback: &mut impl FnMut(&'a Variable), +) { + if let TermPattern::Variable(s) = &pattern.subject { + callback(s); + } + #[cfg(feature = "rdf-star")] + if let TermPattern::Triple(s) = &pattern.subject { + lookup_triple_pattern_variables(s, callback) + } + if let NamedNodePattern::Variable(p) = &pattern.predicate { + callback(p); + } + if let TermPattern::Variable(o) = &pattern.object { + callback(o); + } + #[cfg(feature = "rdf-star")] + if let TermPattern::Triple(o) = &pattern.object { + lookup_triple_pattern_variables(o, callback) + } +} + +pub(crate) struct SparqlGraphRootPattern<'a> { + pub(crate) pattern: &'a GraphPattern, + pub(crate) dataset: Option<&'a QueryDataset>, +} + +impl<'a> fmt::Display for SparqlGraphRootPattern<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut distinct = false; + let mut reduced = false; + let mut order = None; + let mut start = 0; + let mut length = None; + let mut project: &[Variable] = &[]; + + let mut child = self.pattern; + loop { + match child { + GraphPattern::OrderBy { inner, expression } => { + order = Some(expression); + child = inner; + } + GraphPattern::Project { inner, variables } if project.is_empty() => { + project = variables; + child = inner; + } + GraphPattern::Distinct { inner } => { + distinct = true; + child = inner; + } + GraphPattern::Reduced { inner } => { + reduced = true; + child = inner; + } + GraphPattern::Slice { + inner, + start: s, + length: l, + } => { + start = *s; + length = *l; + child = inner; + } + p => { + f.write_str("SELECT")?; + if distinct { + f.write_str(" DISTINCT")?; + } + if reduced { + f.write_str(" REDUCED")?; + } + if project.is_empty() { + f.write_str(" *")?; + } else { + for v in project { + write!(f, " {v}")?; + } + } + if let Some(dataset) = self.dataset { + write!(f, " {dataset}")?; + } + write!(f, " WHERE {{ {p} }}")?; + if let Some(order) = order { + f.write_str(" ORDER BY")?; + for c in order { + write!(f, " {c}")?; + } + } + if start > 0 { + write!(f, " OFFSET {start}")?; + } + if let Some(length) = length { + write!(f, " LIMIT {length}")?; + } + return Ok(()); + } + } + } + } +} + +/// A set function used in aggregates (c.f. [`GraphPattern::Group`]). +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum AggregateExpression { + /// [Count](https://www.w3.org/TR/sparql11-query/#defn_aggCount) with *. + CountSolutions { distinct: bool }, + FunctionCall { + name: AggregateFunction, + expr: Expression, + distinct: bool, + }, +} + +impl AggregateExpression { + /// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html). + pub(crate) fn fmt_sse(&self, f: &mut impl fmt::Write) -> fmt::Result { + match self { + Self::CountSolutions { distinct } => { + f.write_str("(count")?; + if *distinct { + f.write_str(" distinct")?; + } + f.write_str(")") + } + Self::FunctionCall { + name: + AggregateFunction::GroupConcat { + separator: Some(separator), + }, + expr, + distinct, + } => { + f.write_str("(group_concat ")?; + if *distinct { + f.write_str("distinct ")?; + } + expr.fmt_sse(f)?; + write!(f, " {})", LiteralRef::new_simple_literal(separator)) + } + Self::FunctionCall { + name, + expr, + distinct, + } => { + f.write_str("(")?; + name.fmt_sse(f)?; + f.write_str(" ")?; + if *distinct { + f.write_str("distinct ")?; + } + expr.fmt_sse(f)?; + f.write_str(")") + } + } + } +} + +impl fmt::Display for AggregateExpression { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::CountSolutions { distinct } => { + if *distinct { + f.write_str("COUNT(DISTINCT *)") + } else { + f.write_str("COUNT(*)") + } + } + Self::FunctionCall { + name: + AggregateFunction::GroupConcat { + separator: Some(separator), + }, + expr, + distinct, + } => { + if *distinct { + write!( + f, + "GROUP_CONCAT(DISTINCT {}; SEPARATOR = {})", + expr, + LiteralRef::new_simple_literal(separator) + ) + } else { + write!( + f, + "GROUP_CONCAT({}; SEPARATOR = {})", + expr, + LiteralRef::new_simple_literal(separator) + ) + } + } + Self::FunctionCall { + name, + expr, + distinct, + } => { + if *distinct { + write!(f, "{name}(DISTINCT {expr})") + } else { + write!(f, "{name}({expr})") + } + } + } + } +} + +/// An aggregate function name. +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum AggregateFunction { + /// [Count](https://www.w3.org/TR/sparql11-query/#defn_aggCount) with *. + Count, + /// [Sum](https://www.w3.org/TR/sparql11-query/#defn_aggSum). + Sum, + /// [Avg](https://www.w3.org/TR/sparql11-query/#defn_aggAvg). + Avg, + /// [Min](https://www.w3.org/TR/sparql11-query/#defn_aggMin). + Min, + /// [Max](https://www.w3.org/TR/sparql11-query/#defn_aggMax). + Max, + /// [GroupConcat](https://www.w3.org/TR/sparql11-query/#defn_aggGroupConcat). + GroupConcat { + separator: Option<String>, + }, + /// [Sample](https://www.w3.org/TR/sparql11-query/#defn_aggSample). + Sample, + Custom(NamedNode), +} + +impl AggregateFunction { + /// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html). + pub(crate) fn fmt_sse(&self, f: &mut impl fmt::Write) -> fmt::Result { + match self { + Self::Count => f.write_str("count"), + Self::Sum => f.write_str("sum"), + Self::Avg => f.write_str("avg"), + Self::Min => f.write_str("min"), + Self::Max => f.write_str("max"), + Self::GroupConcat { .. } => f.write_str("group_concat"), + Self::Sample => f.write_str("sample"), + Self::Custom(iri) => write!(f, "{iri}"), + } + } +} + +impl fmt::Display for AggregateFunction { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Count => f.write_str("COUNT"), + Self::Sum => f.write_str("SUM"), + Self::Avg => f.write_str("AVG"), + Self::Min => f.write_str("MIN"), + Self::Max => f.write_str("MAX"), + Self::GroupConcat { .. } => f.write_str("GROUP_CONCAT"), + Self::Sample => f.write_str("SAMPLE"), + Self::Custom(iri) => iri.fmt(f), + } + } +} + +/// An ordering comparator used by [`GraphPattern::OrderBy`]. +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum OrderExpression { + /// Ascending order + Asc(Expression), + /// Descending order + Desc(Expression), +} + +impl OrderExpression { + /// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html). + pub(crate) fn fmt_sse(&self, f: &mut impl fmt::Write) -> fmt::Result { + match self { + Self::Asc(e) => { + f.write_str("(asc ")?; + e.fmt_sse(f)?; + f.write_str(")") + } + Self::Desc(e) => { + f.write_str("(desc ")?; + e.fmt_sse(f)?; + f.write_str(")") + } + } + } +} + +impl fmt::Display for OrderExpression { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Asc(e) => write!(f, "ASC({e})"), + Self::Desc(e) => write!(f, "DESC({e})"), + } + } +} + +/// A SPARQL query [dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset). +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub struct QueryDataset { + pub default: Vec<NamedNode>, + pub named: Option<Vec<NamedNode>>, +} + +impl QueryDataset { + /// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html). + pub(crate) fn fmt_sse(&self, f: &mut impl fmt::Write) -> fmt::Result { + f.write_str("(")?; + for (i, graph_name) in self.default.iter().enumerate() { + if i > 0 { + f.write_str(" ")?; + } + write!(f, "{graph_name}")?; + } + if let Some(named) = &self.named { + for (i, graph_name) in named.iter().enumerate() { + if !self.default.is_empty() || i > 0 { + f.write_str(" ")?; + } + write!(f, "(named {graph_name})")?; + } + } + f.write_str(")") + } +} + +impl fmt::Display for QueryDataset { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for g in &self.default { + write!(f, " FROM {g}")?; + } + if let Some(named) = &self.named { + for g in named { + write!(f, " FROM NAMED {g}")?; + } + } + Ok(()) + } +} + +/// A target RDF graph for update operations. +/// +/// Could be a specific graph, all named graphs or the complete dataset. +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum GraphTarget { + NamedNode(NamedNode), + DefaultGraph, + NamedGraphs, + AllGraphs, +} + +impl GraphTarget { + /// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html). + pub(crate) fn fmt_sse(&self, f: &mut impl fmt::Write) -> fmt::Result { + match self { + Self::NamedNode(node) => write!(f, "{node}"), + Self::DefaultGraph => f.write_str("default"), + Self::NamedGraphs => f.write_str("named"), + Self::AllGraphs => f.write_str("all"), + } + } +} + +impl fmt::Display for GraphTarget { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::NamedNode(node) => write!(f, "GRAPH {node}"), + Self::DefaultGraph => f.write_str("DEFAULT"), + Self::NamedGraphs => f.write_str("NAMED"), + Self::AllGraphs => f.write_str("ALL"), + } + } +} + +impl From<NamedNode> for GraphTarget { + fn from(node: NamedNode) -> Self { + Self::NamedNode(node) + } +} + +impl From<GraphName> for GraphTarget { + fn from(graph_name: GraphName) -> Self { + match graph_name { + GraphName::NamedNode(node) => Self::NamedNode(node), + GraphName::DefaultGraph => Self::DefaultGraph, + } + } +} + +#[inline] +fn fmt_sse_unary_expression(f: &mut impl fmt::Write, name: &str, e: &Expression) -> fmt::Result { + write!(f, "({name} ")?; + e.fmt_sse(f)?; + f.write_str(")") +} + +#[inline] +fn fmt_sse_binary_expression( + f: &mut impl fmt::Write, + name: &str, + a: &Expression, + b: &Expression, +) -> fmt::Result { + write!(f, "({name} ")?; + a.fmt_sse(f)?; + f.write_str(" ")?; + b.fmt_sse(f)?; + f.write_str(")") +} diff --git a/ng-oxigraph/src/spargebra/mod.rs b/ng-oxigraph/src/spargebra/mod.rs new file mode 100644 index 0000000..f23b35b --- /dev/null +++ b/ng-oxigraph/src/spargebra/mod.rs @@ -0,0 +1,9 @@ +pub mod algebra; +mod parser; +mod query; +pub mod term; +mod update; + +pub use parser::SparqlSyntaxError; +pub use query::*; +pub use update::*; diff --git a/ng-oxigraph/src/spargebra/parser.rs b/ng-oxigraph/src/spargebra/parser.rs new file mode 100644 index 0000000..67718fe --- /dev/null +++ b/ng-oxigraph/src/spargebra/parser.rs @@ -0,0 +1,2086 @@ +#![allow(clippy::ignored_unit_patterns)] +use crate::oxrdf::vocab::{rdf, xsd}; +use crate::spargebra::algebra::*; +use crate::spargebra::query::*; +use crate::spargebra::term::*; +use crate::spargebra::update::*; +use oxilangtag::LanguageTag; +use oxiri::{Iri, IriParseError}; +use peg::parser; +use peg::str::LineCol; +use rand::random; +use std::char; +use std::collections::{HashMap, HashSet}; +use std::mem::take; +use std::str::FromStr; + +/// Parses a SPARQL query with an optional base IRI to resolve relative IRIs in the query. +pub fn parse_query(query: &str, base_iri: Option<&str>) -> Result<Query, SparqlSyntaxError> { + let mut state = ParserState::from_base_iri(base_iri)?; + parser::QueryUnit(query, &mut state).map_err(|e| SparqlSyntaxError(ParseErrorKind::Syntax(e))) +} + +/// Parses a SPARQL update with an optional base IRI to resolve relative IRIs in the query. +pub fn parse_update(update: &str, base_iri: Option<&str>) -> Result<Update, SparqlSyntaxError> { + let mut state = ParserState::from_base_iri(base_iri)?; + let operations = parser::UpdateInit(update, &mut state) + .map_err(|e| SparqlSyntaxError(ParseErrorKind::Syntax(e)))?; + Ok(Update { + operations, + base_iri: state.base_iri, + }) +} + +/// Error returned during SPARQL parsing. +#[derive(Debug, thiserror::Error)] +#[error(transparent)] +pub struct SparqlSyntaxError(#[from] ParseErrorKind); + +#[derive(Debug, thiserror::Error)] +enum ParseErrorKind { + #[error("Invalid SPARQL base IRI provided: {0}")] + InvalidBaseIri(#[from] IriParseError), + #[error(transparent)] + Syntax(#[from] peg::error::ParseError<LineCol>), +} + +struct AnnotatedTerm { + term: TermPattern, + annotations: Vec<(NamedNodePattern, Vec<AnnotatedTerm>)>, +} + +#[derive(Default)] +struct FocusedTriplePattern<F> { + focus: F, + patterns: Vec<TriplePattern>, +} + +impl<F> FocusedTriplePattern<F> { + fn new(focus: F) -> Self { + Self { + focus, + patterns: Vec::new(), + } + } +} + +impl<F> From<FocusedTriplePattern<F>> for FocusedTriplePattern<Vec<F>> { + fn from(input: FocusedTriplePattern<F>) -> Self { + Self { + focus: vec![input.focus], + patterns: input.patterns, + } + } +} + +#[derive(Clone, Debug)] +enum VariableOrPropertyPath { + Variable(Variable), + PropertyPath(PropertyPathExpression), +} + +impl From<Variable> for VariableOrPropertyPath { + fn from(var: Variable) -> Self { + Self::Variable(var) + } +} + +impl From<NamedNodePattern> for VariableOrPropertyPath { + fn from(pattern: NamedNodePattern) -> Self { + match pattern { + NamedNodePattern::NamedNode(node) => PropertyPathExpression::from(node).into(), + NamedNodePattern::Variable(v) => v.into(), + } + } +} + +impl From<PropertyPathExpression> for VariableOrPropertyPath { + fn from(path: PropertyPathExpression) -> Self { + Self::PropertyPath(path) + } +} + +fn add_to_triple_patterns( + subject: TermPattern, + predicate: NamedNodePattern, + object: AnnotatedTerm, + patterns: &mut Vec<TriplePattern>, +) -> Result<(), &'static str> { + let triple = TriplePattern::new(subject, predicate, object.term); + #[cfg(feature = "rdf-star")] + for (p, os) in object.annotations { + for o in os { + add_to_triple_patterns(triple.clone().into(), p.clone(), o, patterns)? + } + } + #[cfg(not(feature = "rdf-star"))] + if !object.annotations.is_empty() { + return Err("Embedded triples are only available in SPARQL-star"); + } + patterns.push(triple); + Ok(()) +} + +fn add_to_triple_or_path_patterns( + subject: TermPattern, + predicate: impl Into<VariableOrPropertyPath>, + object: AnnotatedTermPath, + patterns: &mut Vec<TripleOrPathPattern>, +) -> Result<(), &'static str> { + match predicate.into() { + VariableOrPropertyPath::Variable(p) => { + add_triple_to_triple_or_path_patterns(subject, p, object, patterns)?; + } + VariableOrPropertyPath::PropertyPath(p) => match p { + PropertyPathExpression::NamedNode(p) => { + add_triple_to_triple_or_path_patterns(subject, p, object, patterns)?; + } + PropertyPathExpression::Reverse(p) => add_to_triple_or_path_patterns( + object.term, + *p, + AnnotatedTermPath { + term: subject, + annotations: object.annotations, + }, + patterns, + )?, + PropertyPathExpression::Sequence(a, b) => { + if !object.annotations.is_empty() { + return Err("Annotations are not allowed on property paths"); + } + let middle = BlankNode::default(); + add_to_triple_or_path_patterns( + subject, + *a, + AnnotatedTermPath { + term: middle.clone().into(), + annotations: Vec::new(), + }, + patterns, + )?; + add_to_triple_or_path_patterns( + middle.into(), + *b, + AnnotatedTermPath { + term: object.term, + annotations: Vec::new(), + }, + patterns, + )?; + } + path => { + if !object.annotations.is_empty() { + return Err("Annotations are not allowed on property paths"); + } + patterns.push(TripleOrPathPattern::Path { + subject, + path, + object: object.term, + }) + } + }, + } + Ok(()) +} + +fn add_triple_to_triple_or_path_patterns( + subject: TermPattern, + predicate: impl Into<NamedNodePattern>, + object: AnnotatedTermPath, + patterns: &mut Vec<TripleOrPathPattern>, +) -> Result<(), &'static str> { + let triple = TriplePattern::new(subject, predicate, object.term); + #[cfg(feature = "rdf-star")] + for (p, os) in object.annotations { + for o in os { + add_to_triple_or_path_patterns(triple.clone().into(), p.clone(), o, patterns)? + } + } + #[cfg(not(feature = "rdf-star"))] + if !object.annotations.is_empty() { + return Err("Embedded triples are only available in SPARQL-star"); + } + patterns.push(triple.into()); + Ok(()) +} + +fn build_bgp(patterns: Vec<TripleOrPathPattern>) -> GraphPattern { + let mut bgp = Vec::new(); + let mut elements = Vec::with_capacity(patterns.len()); + for pattern in patterns { + match pattern { + TripleOrPathPattern::Triple(t) => bgp.push(t), + TripleOrPathPattern::Path { + subject, + path, + object, + } => { + if !bgp.is_empty() { + elements.push(GraphPattern::Bgp { + patterns: take(&mut bgp), + }); + } + elements.push(GraphPattern::Path { + subject, + path, + object, + }) + } + } + } + if !bgp.is_empty() { + elements.push(GraphPattern::Bgp { patterns: bgp }); + } + elements.into_iter().reduce(new_join).unwrap_or_default() +} + +#[derive(Debug)] +enum TripleOrPathPattern { + Triple(TriplePattern), + Path { + subject: TermPattern, + path: PropertyPathExpression, + object: TermPattern, + }, +} + +impl From<TriplePattern> for TripleOrPathPattern { + fn from(tp: TriplePattern) -> Self { + Self::Triple(tp) + } +} + +#[derive(Debug)] +struct AnnotatedTermPath { + term: TermPattern, + annotations: Vec<(VariableOrPropertyPath, Vec<AnnotatedTermPath>)>, +} + +impl From<AnnotatedTerm> for AnnotatedTermPath { + fn from(term: AnnotatedTerm) -> Self { + Self { + term: term.term, + annotations: term + .annotations + .into_iter() + .map(|(p, o)| (p.into(), o.into_iter().map(Self::from).collect())) + .collect(), + } + } +} + +#[derive(Debug, Default)] +struct FocusedTripleOrPathPattern<F> { + focus: F, + patterns: Vec<TripleOrPathPattern>, +} + +impl<F> FocusedTripleOrPathPattern<F> { + fn new(focus: F) -> Self { + Self { + focus, + patterns: Vec::new(), + } + } +} + +impl<F> From<FocusedTripleOrPathPattern<F>> for FocusedTripleOrPathPattern<Vec<F>> { + fn from(input: FocusedTripleOrPathPattern<F>) -> Self { + Self { + focus: vec![input.focus], + patterns: input.patterns, + } + } +} + +impl<F, T: From<F>> From<FocusedTriplePattern<F>> for FocusedTripleOrPathPattern<T> { + fn from(input: FocusedTriplePattern<F>) -> Self { + Self { + focus: input.focus.into(), + patterns: input.patterns.into_iter().map(Into::into).collect(), + } + } +} + +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +enum PartialGraphPattern { + Optional(GraphPattern, Option<Expression>), + #[cfg(feature = "sep-0006")] + Lateral(GraphPattern), + Minus(GraphPattern), + Bind(Expression, Variable), + Filter(Expression), + Other(GraphPattern), +} + +fn new_join(l: GraphPattern, r: GraphPattern) -> GraphPattern { + // Avoid to output empty BGPs + if let GraphPattern::Bgp { patterns: pl } = &l { + if pl.is_empty() { + return r; + } + } + if let GraphPattern::Bgp { patterns: pr } = &r { + if pr.is_empty() { + return l; + } + } + + match (l, r) { + (GraphPattern::Bgp { patterns: mut pl }, GraphPattern::Bgp { patterns: pr }) => { + pl.extend(pr); + GraphPattern::Bgp { patterns: pl } + } + (GraphPattern::Bgp { patterns }, other) | (other, GraphPattern::Bgp { patterns }) + if patterns.is_empty() => + { + other + } + (l, r) => GraphPattern::Join { + left: Box::new(l), + right: Box::new(r), + }, + } +} + +fn not_empty_fold<T>( + iter: impl Iterator<Item = T>, + combine: impl Fn(T, T) -> T, +) -> Result<T, &'static str> { + iter.fold(None, |a, b| match a { + Some(av) => Some(combine(av, b)), + None => Some(b), + }) + .ok_or("The iterator should not be empty") +} + +enum SelectionOption { + Distinct, + Reduced, + Default, +} + +enum SelectionMember { + Variable(Variable), + Expression(Expression, Variable), +} + +enum SelectionVariables { + Explicit(Vec<SelectionMember>), + Star, + Everything, +} + +struct Selection { + pub option: SelectionOption, + pub variables: SelectionVariables, +} + +impl Selection { + fn no_op() -> Self { + Self { + option: SelectionOption::Default, + variables: SelectionVariables::Everything, + } + } +} + +fn build_select( + select: Selection, + r#where: GraphPattern, + mut group: Option<(Vec<Variable>, Vec<(Expression, Variable)>)>, + having: Option<Expression>, + order_by: Option<Vec<OrderExpression>>, + offset_limit: Option<(usize, Option<usize>)>, + values: Option<GraphPattern>, + state: &mut ParserState, +) -> Result<GraphPattern, &'static str> { + let mut p = r#where; + let mut with_aggregate = false; + + // GROUP BY + let aggregates = state.aggregates.pop().unwrap_or_default(); + if group.is_none() && !aggregates.is_empty() { + group = Some((vec![], vec![])); + } + + if let Some((clauses, binds)) = group { + for (expression, variable) in binds { + p = GraphPattern::Extend { + inner: Box::new(p), + variable, + expression, + }; + } + p = GraphPattern::Group { + inner: Box::new(p), + variables: clauses, + aggregates, + }; + with_aggregate = true; + } + + // HAVING + if let Some(expr) = having { + p = GraphPattern::Filter { + expr, + inner: Box::new(p), + }; + } + + // VALUES + if let Some(data) = values { + p = new_join(p, data); + } + + // SELECT + let mut pv = Vec::new(); + let with_project = match select.variables { + SelectionVariables::Explicit(sel_items) => { + let mut visible = HashSet::default(); + p.on_in_scope_variable(|v| { + visible.insert(v.clone()); + }); + for sel_item in sel_items { + let v = match sel_item { + SelectionMember::Variable(v) => { + if with_aggregate && !visible.contains(&v) { + // We validate projection variables if there is an aggregate + return Err("The SELECT contains a variable that is unbound"); + } + v + } + SelectionMember::Expression(expression, variable) => { + if visible.contains(&variable) { + // We disallow to override an existing variable with an expression + return Err( + "The SELECT overrides an existing variable using an expression", + ); + } + if with_aggregate && !are_variables_bound(&expression, &visible) { + // We validate projection variables if there is an aggregate + return Err( + "The SELECT contains an expression with a variable that is unbound", + ); + } + p = GraphPattern::Extend { + inner: Box::new(p), + variable: variable.clone(), + expression, + }; + variable + } + }; + if pv.contains(&v) { + return Err("Duplicated variable name in SELECT"); + } + pv.push(v) + } + true + } + SelectionVariables::Star => { + if with_aggregate { + return Err("SELECT * is not authorized with GROUP BY"); + } + // TODO: is it really useful to do a projection? + p.on_in_scope_variable(|v| { + if !pv.contains(v) { + pv.push(v.clone()); + } + }); + pv.sort(); + true + } + SelectionVariables::Everything => false, + }; + + let mut m = p; + + // ORDER BY + if let Some(expression) = order_by { + m = GraphPattern::OrderBy { + inner: Box::new(m), + expression, + }; + } + + // PROJECT + if with_project { + m = GraphPattern::Project { + inner: Box::new(m), + variables: pv, + }; + } + match select.option { + SelectionOption::Distinct => m = GraphPattern::Distinct { inner: Box::new(m) }, + SelectionOption::Reduced => m = GraphPattern::Reduced { inner: Box::new(m) }, + SelectionOption::Default => (), + } + + // OFFSET LIMIT + if let Some((start, length)) = offset_limit { + m = GraphPattern::Slice { + inner: Box::new(m), + start, + length, + } + } + Ok(m) +} + +fn are_variables_bound(expression: &Expression, variables: &HashSet<Variable>) -> bool { + match expression { + Expression::NamedNode(_) + | Expression::Literal(_) + | Expression::Bound(_) + | Expression::Coalesce(_) + | Expression::Exists(_) => true, + Expression::Variable(var) => variables.contains(var), + Expression::UnaryPlus(e) | Expression::UnaryMinus(e) | Expression::Not(e) => { + are_variables_bound(e, variables) + } + Expression::Or(a, b) + | Expression::And(a, b) + | Expression::Equal(a, b) + | Expression::SameTerm(a, b) + | Expression::Greater(a, b) + | Expression::GreaterOrEqual(a, b) + | Expression::Less(a, b) + | Expression::LessOrEqual(a, b) + | Expression::Add(a, b) + | Expression::Subtract(a, b) + | Expression::Multiply(a, b) + | Expression::Divide(a, b) => { + are_variables_bound(a, variables) && are_variables_bound(b, variables) + } + Expression::In(a, b) => { + are_variables_bound(a, variables) && b.iter().all(|b| are_variables_bound(b, variables)) + } + Expression::FunctionCall(_, parameters) => { + parameters.iter().all(|p| are_variables_bound(p, variables)) + } + Expression::If(a, b, c) => { + are_variables_bound(a, variables) + && are_variables_bound(b, variables) + && are_variables_bound(c, variables) + } + } +} + +/// Called on every variable defined using "AS" or "VALUES" +#[cfg(feature = "sep-0006")] +fn add_defined_variables<'a>(pattern: &'a GraphPattern, set: &mut HashSet<&'a Variable>) { + match pattern { + GraphPattern::Bgp { .. } | GraphPattern::Path { .. } => {} + GraphPattern::Join { left, right } + | GraphPattern::LeftJoin { left, right, .. } + | GraphPattern::Lateral { left, right } + | GraphPattern::Union { left, right } + | GraphPattern::Minus { left, right } => { + add_defined_variables(left, set); + add_defined_variables(right, set); + } + GraphPattern::Graph { inner, .. } => { + add_defined_variables(inner, set); + } + GraphPattern::Extend { + inner, variable, .. + } => { + set.insert(variable); + add_defined_variables(inner, set); + } + GraphPattern::Group { + variables, + aggregates, + inner, + } => { + for (v, _) in aggregates { + set.insert(v); + } + let mut inner_variables = HashSet::new(); + add_defined_variables(inner, &mut inner_variables); + for v in inner_variables { + if variables.contains(v) { + set.insert(v); + } + } + } + GraphPattern::Values { variables, .. } => { + for v in variables { + set.insert(v); + } + } + GraphPattern::Project { variables, inner } => { + let mut inner_variables = HashSet::new(); + add_defined_variables(inner, &mut inner_variables); + for v in inner_variables { + if variables.contains(v) { + set.insert(v); + } + } + } + GraphPattern::Service { inner, .. } + | GraphPattern::Filter { inner, .. } + | GraphPattern::OrderBy { inner, .. } + | GraphPattern::Distinct { inner } + | GraphPattern::Reduced { inner } + | GraphPattern::Slice { inner, .. } => add_defined_variables(inner, set), + } +} + +fn copy_graph(from: impl Into<GraphName>, to: impl Into<GraphNamePattern>) -> GraphUpdateOperation { + let bgp = GraphPattern::Bgp { + patterns: vec![TriplePattern::new( + Variable::new_unchecked("s"), + Variable::new_unchecked("p"), + Variable::new_unchecked("o"), + )], + }; + GraphUpdateOperation::DeleteInsert { + delete: Vec::new(), + insert: vec![QuadPattern::new( + Variable::new_unchecked("s"), + Variable::new_unchecked("p"), + Variable::new_unchecked("o"), + to, + )], + using: None, + pattern: Box::new(match from.into() { + GraphName::NamedNode(from) => GraphPattern::Graph { + name: from.into(), + inner: Box::new(bgp), + }, + GraphName::DefaultGraph => bgp, + }), + } +} + +enum Either<L, R> { + Left(L), + Right(R), +} + +pub struct ParserState { + base_iri: Option<Iri<String>>, + namespaces: HashMap<String, String>, + used_bnodes: HashSet<BlankNode>, + currently_used_bnodes: HashSet<BlankNode>, + aggregates: Vec<Vec<(Variable, AggregateExpression)>>, +} + +impl ParserState { + pub(crate) fn from_base_iri(base_iri: Option<&str>) -> Result<Self, SparqlSyntaxError> { + Ok(Self { + base_iri: if let Some(base_iri) = base_iri { + Some( + Iri::parse(base_iri.to_owned()) + .map_err(|e| SparqlSyntaxError(ParseErrorKind::InvalidBaseIri(e)))?, + ) + } else { + None + }, + namespaces: HashMap::default(), + used_bnodes: HashSet::default(), + currently_used_bnodes: HashSet::default(), + aggregates: Vec::new(), + }) + } + + fn parse_iri(&self, iri: String) -> Result<Iri<String>, IriParseError> { + if let Some(base_iri) = &self.base_iri { + base_iri.resolve(&iri) + } else { + Iri::parse(iri) + } + } + + fn new_aggregation(&mut self, agg: AggregateExpression) -> Result<Variable, &'static str> { + let aggregates = self.aggregates.last_mut().ok_or("Unexpected aggregate")?; + Ok(aggregates + .iter() + .find_map(|(v, a)| (a == &agg).then_some(v)) + .cloned() + .unwrap_or_else(|| { + let new_var = variable(); + aggregates.push((new_var.clone(), agg)); + new_var + })) + } +} + +fn unescape_iriref(mut input: &str) -> Result<String, &'static str> { + let mut output = String::with_capacity(input.len()); + while let Some((before, after)) = input.split_once('\\') { + output.push_str(before); + let mut after = after.chars(); + let (escape, after) = match after.next() { + Some('u') => read_hex_char::<4>(after.as_str())?, + Some('U') => read_hex_char::<8>(after.as_str())?, + Some(_) => { + return Err( + "IRIs are only allowed to contain escape sequences \\uXXXX and \\UXXXXXXXX", + ) + } + None => return Err("IRIs are not allowed to end with a '\'"), + }; + output.push(escape); + input = after; + } + output.push_str(input); + Ok(output) +} + +fn unescape_string(mut input: &str) -> Result<String, &'static str> { + let mut output = String::with_capacity(input.len()); + while let Some((before, after)) = input.split_once('\\') { + output.push_str(before); + let mut after = after.chars(); + let (escape, after) = match after.next() { + Some('t') => ('\u{0009}', after.as_str()), + Some('b') => ('\u{0008}', after.as_str()), + Some('n') => ('\u{000A}', after.as_str()), + Some('r') => ('\u{000D}', after.as_str()), + Some('f') => ('\u{000C}', after.as_str()), + Some('"') => ('\u{0022}', after.as_str()), + Some('\'') => ('\u{0027}', after.as_str()), + Some('\\') => ('\u{005C}', after.as_str()), + Some('u') => read_hex_char::<4>(after.as_str())?, + Some('U') => read_hex_char::<8>(after.as_str())?, + Some(_) => return Err("The character that can be escaped in strings are tbnrf\"'\\"), + None => return Err("strings are not allowed to end with a '\'"), + }; + output.push(escape); + input = after; + } + output.push_str(input); + Ok(output) +} + +fn read_hex_char<const SIZE: usize>(input: &str) -> Result<(char, &str), &'static str> { + if let Some(escape) = input.get(..SIZE) { + if let Some(char) = u32::from_str_radix(escape, 16) + .ok() + .and_then(char::from_u32) + { + Ok((char, &input[SIZE..])) + } else { + Err("\\u escape sequence should be followed by hexadecimal digits") + } + } else { + Err("\\u escape sequence should be followed by hexadecimal digits") + } +} + +fn variable() -> Variable { + Variable::new_unchecked(format!("{:x}", random::<u128>())) +} + +parser! { + //See https://www.w3.org/TR/turtle/#sec-grammar + grammar parser(state: &mut ParserState) for str { + pub rule QueryUnit() -> Query = Query() + + rule Query() -> Query = _ Prologue() _ q:(SelectQuery() / ConstructQuery() / DescribeQuery() / AskQuery()) _ { + q + } + + pub rule UpdateInit() -> Vec<GraphUpdateOperation> = Update() + + rule Prologue() = (BaseDecl() _ / PrefixDecl() _)* {} + + rule BaseDecl() = i("BASE") _ i:IRIREF() { + state.base_iri = Some(i) + } + + rule PrefixDecl() = i("PREFIX") _ ns:PNAME_NS() _ i:IRIREF() { + state.namespaces.insert(ns.into(), i.into_inner()); + } + + rule SelectQuery() -> Query = s:SelectClause() _ d:DatasetClauses() _ w:WhereClause() _ g:GroupClause()? _ h:HavingClause()? _ o:OrderClause()? _ l:LimitOffsetClauses()? _ v:ValuesClause() {? + Ok(Query::Select { + dataset: d, + pattern: build_select(s, w, g, h, o, l, v, state)?, + base_iri: state.base_iri.clone() + }) + } + + rule SubSelect() -> GraphPattern = s:SelectClause() _ w:WhereClause() _ g:GroupClause()? _ h:HavingClause()? _ o:OrderClause()? _ l:LimitOffsetClauses()? _ v:ValuesClause() {? + build_select(s, w, g, h, o, l, v, state) + } + + rule SelectClause() -> Selection = i("SELECT") _ Selection_init() o:SelectClause_option() _ v:SelectClause_variables() { + Selection { + option: o, + variables: v + } + } + rule Selection_init() = { + state.aggregates.push(Vec::new()) + } + rule SelectClause_option() -> SelectionOption = + i("DISTINCT") { SelectionOption::Distinct } / + i("REDUCED") { SelectionOption::Reduced } / + { SelectionOption::Default } + rule SelectClause_variables() -> SelectionVariables = + "*" { SelectionVariables::Star } / + p:SelectClause_member()+ { SelectionVariables::Explicit(p) } + rule SelectClause_member() -> SelectionMember = + v:Var() _ { SelectionMember::Variable(v) } / + "(" _ e:Expression() _ i("AS") _ v:Var() _ ")" _ { SelectionMember::Expression(e, v) } + + rule ConstructQuery() -> Query = + i("CONSTRUCT") _ c:ConstructTemplate() _ d:DatasetClauses() _ w:WhereClause() _ g:GroupClause()? _ h:HavingClause()? _ o:OrderClause()? _ l:LimitOffsetClauses()? _ v:ValuesClause() {? + Ok(Query::Construct { + template: c, + dataset: d, + pattern: build_select(Selection::no_op(), w, g, h, o, l, v, state)?, + base_iri: state.base_iri.clone() + }) + } / + i("CONSTRUCT") _ d:DatasetClauses() _ i("WHERE") _ "{" _ c:ConstructQuery_optional_triple_template() _ "}" _ g:GroupClause()? _ h:HavingClause()? _ o:OrderClause()? _ l:LimitOffsetClauses()? _ v:ValuesClause() {? + Ok(Query::Construct { + template: c.clone(), + dataset: d, + pattern: build_select( + Selection::no_op(), + GraphPattern::Bgp { patterns: c }, + g, h, o, l, v, state + )?, + base_iri: state.base_iri.clone() + }) + } + + rule ConstructQuery_optional_triple_template() -> Vec<TriplePattern> = TriplesTemplate() / { Vec::new() } + + rule DescribeQuery() -> Query = + i("DESCRIBE") _ "*" _ d:DatasetClauses() w:WhereClause()? _ g:GroupClause()? _ h:HavingClause()? _ o:OrderClause()? _ l:LimitOffsetClauses()? _ v:ValuesClause() {? + Ok(Query::Describe { + dataset: d, + pattern: build_select(Selection::no_op(), w.unwrap_or_default(), g, h, o, l, v, state)?, + base_iri: state.base_iri.clone() + }) + } / + i("DESCRIBE") _ p:DescribeQuery_item()+ _ d:DatasetClauses() w:WhereClause()? _ g:GroupClause()? _ h:HavingClause()? _ o:OrderClause()? _ l:LimitOffsetClauses()? _ v:ValuesClause() {? + Ok(Query::Describe { + dataset: d, + pattern: build_select(Selection { + option: SelectionOption::Default, + variables: SelectionVariables::Explicit(p.into_iter().map(|var_or_iri| match var_or_iri { + NamedNodePattern::NamedNode(n) => SelectionMember::Expression(n.into(), variable()), + NamedNodePattern::Variable(v) => SelectionMember::Variable(v) + }).collect()) + }, w.unwrap_or_default(), g, h, o, l, v, state)?, + base_iri: state.base_iri.clone() + }) + } + rule DescribeQuery_item() -> NamedNodePattern = i:VarOrIri() _ { i } + + rule AskQuery() -> Query = i("ASK") _ d:DatasetClauses() _ w:WhereClause() _ g:GroupClause()? _ h:HavingClause()? _ o:OrderClause()? _ l:LimitOffsetClauses()? _ v:ValuesClause() {? + Ok(Query::Ask { + dataset: d, + pattern: build_select(Selection::no_op(), w, g, h, o, l, v, state)?, + base_iri: state.base_iri.clone() + }) + } + + rule DatasetClause() -> (Option<NamedNode>, Option<NamedNode>) = i("FROM") _ d:(DefaultGraphClause() / NamedGraphClause()) { d } + rule DatasetClauses() -> Option<QueryDataset> = d:DatasetClause() ** (_) { + if d.is_empty() { + return None; + } + let mut default = Vec::new(); + let mut named = Vec::new(); + for (d, n) in d { + if let Some(d) = d { + default.push(d); + } + if let Some(n) = n { + named.push(n); + } + } + Some(QueryDataset { + default, named: Some(named) + }) + } + + rule DefaultGraphClause() -> (Option<NamedNode>, Option<NamedNode>) = s:SourceSelector() { + (Some(s), None) + } + + rule NamedGraphClause() -> (Option<NamedNode>, Option<NamedNode>) = i("NAMED") _ s:SourceSelector() { + (None, Some(s)) + } + + rule SourceSelector() -> NamedNode = iri() + + rule WhereClause() -> GraphPattern = i("WHERE")? _ p:GroupGraphPattern() { + p + } + + rule GroupClause() -> (Vec<Variable>, Vec<(Expression,Variable)>) = i("GROUP") _ i("BY") _ c:GroupCondition_item()+ { + let mut projections: Vec<(Expression,Variable)> = Vec::new(); + let clauses = c.into_iter().map(|(e, vo)| { + if let Expression::Variable(v) = e { + v + } else { + let v = vo.unwrap_or_else(variable); + projections.push((e, v.clone())); + v + } + }).collect(); + (clauses, projections) + } + rule GroupCondition_item() -> (Expression, Option<Variable>) = c:GroupCondition() _ { c } + + rule GroupCondition() -> (Expression, Option<Variable>) = + e:BuiltInCall() { (e, None) } / + e:FunctionCall() { (e, None) } / + "(" _ e:Expression() _ v:GroupCondition_as()? ")" { (e, v) } / + e:Var() { (e.into(), None) } + rule GroupCondition_as() -> Variable = i("AS") _ v:Var() _ { v } + + rule HavingClause() -> Expression = i("HAVING") _ e:HavingCondition()+ {? + not_empty_fold(e.into_iter(), |a, b| Expression::And(Box::new(a), Box::new(b))) + } + + rule HavingCondition() -> Expression = Constraint() + + rule OrderClause() -> Vec<OrderExpression> = i("ORDER") _ i("BY") _ c:OrderClause_item()+ { c } + rule OrderClause_item() -> OrderExpression = c:OrderCondition() _ { c } + + rule OrderCondition() -> OrderExpression = + i("ASC") _ e: BrackettedExpression() { OrderExpression::Asc(e) } / + i("DESC") _ e: BrackettedExpression() { OrderExpression::Desc(e) } / + e: Constraint() { OrderExpression::Asc(e) } / + v: Var() { OrderExpression::Asc(Expression::from(v)) } + + rule LimitOffsetClauses() -> (usize, Option<usize>) = + l:LimitClause() _ o:OffsetClause()? { (o.unwrap_or(0), Some(l)) } / + o:OffsetClause() _ l:LimitClause()? { (o, l) } + + rule LimitClause() -> usize = i("LIMIT") _ l:$(INTEGER()) {? + usize::from_str(l).map_err(|_| "The query limit should be a non negative integer") + } + + rule OffsetClause() -> usize = i("OFFSET") _ o:$(INTEGER()) {? + usize::from_str(o).map_err(|_| "The query offset should be a non negative integer") + } + + rule ValuesClause() -> Option<GraphPattern> = + i("VALUES") _ p:DataBlock() { Some(p) } / + { None } + + rule Update() -> Vec<GraphUpdateOperation> = _ Prologue() _ u:(Update1() ** (_ ";" _)) _ ( ";" _)? { u.into_iter().flatten().collect() } + + rule Update1() -> Vec<GraphUpdateOperation> = Load() / Clear() / Drop() / Add() / Move() / Copy() / Create() / InsertData() / DeleteData() / DeleteWhere() / Modify() + rule Update1_silent() -> bool = i("SILENT") { true } / { false } + + rule Load() -> Vec<GraphUpdateOperation> = i("LOAD") _ silent:Update1_silent() _ source:iri() _ destination:Load_to()? { + vec![GraphUpdateOperation::Load { silent, source, destination: destination.map_or(GraphName::DefaultGraph, GraphName::NamedNode) }] + } + rule Load_to() -> NamedNode = i("INTO") _ g: GraphRef() { g } + + rule Clear() -> Vec<GraphUpdateOperation> = i("CLEAR") _ silent:Update1_silent() _ graph:GraphRefAll() { + vec![GraphUpdateOperation::Clear { silent, graph }] + } + + rule Drop() -> Vec<GraphUpdateOperation> = i("DROP") _ silent:Update1_silent() _ graph:GraphRefAll() { + vec![GraphUpdateOperation::Drop { silent, graph }] + } + + rule Create() -> Vec<GraphUpdateOperation> = i("CREATE") _ silent:Update1_silent() _ graph:GraphRef() { + vec![GraphUpdateOperation::Create { silent, graph }] + } + + rule Add() -> Vec<GraphUpdateOperation> = i("ADD") _ silent:Update1_silent() _ from:GraphOrDefault() _ i("TO") _ to:GraphOrDefault() { + // Rewriting defined by https://www.w3.org/TR/sparql11-update/#add + if from == to { + Vec::new() // identity case + } else { + let bgp = GraphPattern::Bgp { patterns: vec![TriplePattern::new(Variable::new_unchecked("s"), Variable::new_unchecked("p"), Variable::new_unchecked("o"))] }; + vec![copy_graph(from, to)] + } + } + + rule Move() -> Vec<GraphUpdateOperation> = i("MOVE") _ silent:Update1_silent() _ from:GraphOrDefault() _ i("TO") _ to:GraphOrDefault() { + // Rewriting defined by https://www.w3.org/TR/sparql11-update/#move + if from == to { + Vec::new() // identity case + } else { + let bgp = GraphPattern::Bgp { patterns: vec![TriplePattern::new(Variable::new_unchecked("s"), Variable::new_unchecked("p"), Variable::new_unchecked("o"))] }; + vec![GraphUpdateOperation::Drop { silent: true, graph: to.clone().into() }, copy_graph(from.clone(), to), GraphUpdateOperation::Drop { silent, graph: from.into() }] + } + } + + rule Copy() -> Vec<GraphUpdateOperation> = i("COPY") _ silent:Update1_silent() _ from:GraphOrDefault() _ i("TO") _ to:GraphOrDefault() { + // Rewriting defined by https://www.w3.org/TR/sparql11-update/#copy + if from == to { + Vec::new() // identity case + } else { + let bgp = GraphPattern::Bgp { patterns: vec![TriplePattern::new(Variable::new_unchecked("s"), Variable::new_unchecked("p"), Variable::new_unchecked("o"))] }; + vec![GraphUpdateOperation::Drop { silent: true, graph: to.clone().into() }, copy_graph(from, to)] + } + } + + rule InsertData() -> Vec<GraphUpdateOperation> = i("INSERT") _ i("DATA") _ data:QuadData() { + vec![GraphUpdateOperation::InsertData { data }] + } + + rule DeleteData() -> Vec<GraphUpdateOperation> = i("DELETE") _ i("DATA") _ data:GroundQuadData() { + vec![GraphUpdateOperation::DeleteData { data }] + } + + rule DeleteWhere() -> Vec<GraphUpdateOperation> = i("DELETE") _ i("WHERE") _ d:QuadPattern() {? + let pattern = d.iter().map(|q| { + let bgp = GraphPattern::Bgp { patterns: vec![TriplePattern::new(q.subject.clone(), q.predicate.clone(), q.object.clone())] }; + match &q.graph_name { + GraphNamePattern::NamedNode(graph_name) => GraphPattern::Graph { name: graph_name.clone().into(), inner: Box::new(bgp) }, + GraphNamePattern::DefaultGraph => bgp, + GraphNamePattern::Variable(graph_name) => GraphPattern::Graph { name: graph_name.clone().into(), inner: Box::new(bgp) }, + } + }).reduce(new_join).unwrap_or_default(); + let delete = d.into_iter().map(GroundQuadPattern::try_from).collect::<Result<Vec<_>,_>>().map_err(|()| "Blank nodes are not allowed in DELETE WHERE")?; + Ok(vec![GraphUpdateOperation::DeleteInsert { + delete, + insert: Vec::new(), + using: None, + pattern: Box::new(pattern) + }]) + } + + rule Modify() -> Vec<GraphUpdateOperation> = with:Modify_with()? _ Modify_clear() c:Modify_clauses() _ u:(UsingClause() ** (_)) _ i("WHERE") _ pattern:GroupGraphPattern() { + let (delete, insert) = c; + let mut delete = delete.unwrap_or_default(); + let mut insert = insert.unwrap_or_default(); + #[allow(clippy::shadow_same)] + let mut pattern = pattern; + + let mut using = if u.is_empty() { + None + } else { + let mut default = Vec::new(); + let mut named = Vec::new(); + for (d, n) in u { + if let Some(d) = d { + default.push(d) + } + if let Some(n) = n { + named.push(n) + } + } + Some(QueryDataset { default, named: Some(named) }) + }; + + if let Some(with) = with { + // We inject WITH everywhere + delete = delete.into_iter().map(|q| if q.graph_name == GraphNamePattern::DefaultGraph { + GroundQuadPattern { + subject: q.subject, + predicate: q.predicate, + object: q.object, + graph_name: with.clone().into() + } + } else { + q + }).collect(); + insert = insert.into_iter().map(|q| if q.graph_name == GraphNamePattern::DefaultGraph { + QuadPattern { + subject: q.subject, + predicate: q.predicate, + object: q.object, + graph_name: with.clone().into() + } + } else { + q + }).collect(); + if using.is_none() { + using = Some(QueryDataset { default: vec![with], named: None }); + } + } + + vec![GraphUpdateOperation::DeleteInsert { + delete, + insert, + using, + pattern: Box::new(pattern) + }] + } + rule Modify_with() -> NamedNode = i("WITH") _ i:iri() _ { i } + rule Modify_clauses() -> (Option<Vec<GroundQuadPattern>>, Option<Vec<QuadPattern>>) = d:DeleteClause() _ i:InsertClause()? { + (Some(d), i) + } / i:InsertClause() { + (None, Some(i)) + } + rule Modify_clear() = { + state.used_bnodes.clear(); + state.currently_used_bnodes.clear(); + } + + rule DeleteClause() -> Vec<GroundQuadPattern> = i("DELETE") _ q:QuadPattern() {? + q.into_iter().map(GroundQuadPattern::try_from).collect::<Result<Vec<_>,_>>().map_err(|()| "Blank nodes are not allowed in DELETE WHERE") + } + + rule InsertClause() -> Vec<QuadPattern> = i("INSERT") _ q:QuadPattern() { q } + + rule UsingClause() -> (Option<NamedNode>, Option<NamedNode>) = i("USING") _ d:(UsingClause_default() / UsingClause_named()) { d } + rule UsingClause_default() -> (Option<NamedNode>, Option<NamedNode>) = i:iri() { + (Some(i), None) + } + rule UsingClause_named() -> (Option<NamedNode>, Option<NamedNode>) = i("NAMED") _ i:iri() { + (None, Some(i)) + } + + rule GraphOrDefault() -> GraphName = i("DEFAULT") { + GraphName::DefaultGraph + } / (i("GRAPH") _)? g:iri() { + GraphName::NamedNode(g) + } + + rule GraphRef() -> NamedNode = i("GRAPH") _ g:iri() { g } + + rule GraphRefAll() -> GraphTarget = i: GraphRef() { i.into() } + / i("DEFAULT") { GraphTarget::DefaultGraph } + / i("NAMED") { GraphTarget::NamedGraphs } + / i("ALL") { GraphTarget::AllGraphs } + + rule QuadPattern() -> Vec<QuadPattern> = "{" _ q:Quads() _ "}" { q } + + rule QuadData() -> Vec<Quad> = "{" _ q:Quads() _ "}" {? + q.into_iter().map(Quad::try_from).collect::<Result<Vec<_>, ()>>().map_err(|()| "Variables are not allowed in INSERT DATA") + } + rule GroundQuadData() -> Vec<GroundQuad> = "{" _ q:Quads() _ "}" {? + q.into_iter().map(|q| GroundQuad::try_from(Quad::try_from(q)?)).collect::<Result<Vec<_>, ()>>().map_err(|()| "Variables and blank nodes are not allowed in DELETE DATA") + } + + rule Quads() -> Vec<QuadPattern> = q:(Quads_TriplesTemplate() / Quads_QuadsNotTriples()) ** (_) { + q.into_iter().flatten().collect() + } + rule Quads_TriplesTemplate() -> Vec<QuadPattern> = t:TriplesTemplate() { + t.into_iter().map(|t| QuadPattern::new(t.subject, t.predicate, t.object, GraphNamePattern::DefaultGraph)).collect() + } //TODO: return iter? + rule Quads_QuadsNotTriples() -> Vec<QuadPattern> = q:QuadsNotTriples() _ "."? { q } + + rule QuadsNotTriples() -> Vec<QuadPattern> = i("GRAPH") _ g:VarOrIri() _ "{" _ t:TriplesTemplate()? _ "}" { + t.unwrap_or_default().into_iter().map(|t| QuadPattern::new(t.subject, t.predicate, t.object, g.clone())).collect() + } + + rule TriplesTemplate() -> Vec<TriplePattern> = ts:TriplesTemplate_inner() ++ (".") ("." _)? { + ts.into_iter().flatten().collect() + } + rule TriplesTemplate_inner() -> Vec<TriplePattern> = _ t:TriplesSameSubject() _ { t } + + rule GroupGraphPattern() -> GraphPattern = + "{" _ GroupGraphPattern_clear() p:GroupGraphPatternSub() GroupGraphPattern_clear() _ "}" { p } / + "{" _ GroupGraphPattern_clear() p:SubSelect() GroupGraphPattern_clear() _ "}" { p } + rule GroupGraphPattern_clear() = { + // We deal with blank nodes aliases rule + state.used_bnodes.extend(state.currently_used_bnodes.iter().cloned()); + state.currently_used_bnodes.clear(); + } + + rule GroupGraphPatternSub() -> GraphPattern = a:TriplesBlock()? _ b:GroupGraphPatternSub_item()* {? + let mut filter: Option<Expression> = None; + let mut g = a.map_or_else(GraphPattern::default, build_bgp); + for e in b.into_iter().flatten() { + match e { + PartialGraphPattern::Optional(p, f) => { + g = GraphPattern::LeftJoin { left: Box::new(g), right: Box::new(p), expression: f } + } + #[cfg(feature = "sep-0006")] + PartialGraphPattern::Lateral(p) => { + let mut defined_variables = HashSet::default(); + add_defined_variables(&p, &mut defined_variables); + let mut contains = false; + g.on_in_scope_variable(|v| { + if defined_variables.contains(v) { + contains = true; + } + }); + if contains { + return Err("An existing variable is overridden in the right side of LATERAL"); + } + g = GraphPattern::Lateral { left: Box::new(g), right: Box::new(p) } + } + PartialGraphPattern::Minus(p) => { + g = GraphPattern::Minus { left: Box::new(g), right: Box::new(p) } + } + PartialGraphPattern::Bind(expression, variable) => { + let mut contains = false; + g.on_in_scope_variable(|v| { + if *v == variable { + contains = true; + } + }); + if contains { + return Err("BIND is overriding an existing variable") + } + g = GraphPattern::Extend { inner: Box::new(g), variable, expression } + } + PartialGraphPattern::Filter(expr) => filter = Some(if let Some(f) = filter { + Expression::And(Box::new(f), Box::new(expr)) + } else { + expr + }), + PartialGraphPattern::Other(e) => g = new_join(g, e), + } + } + + Ok(if let Some(expr) = filter { + GraphPattern::Filter { expr, inner: Box::new(g) } + } else { + g + }) + } + rule GroupGraphPatternSub_item() -> Vec<PartialGraphPattern> = a:GraphPatternNotTriples() _ ("." _)? b:TriplesBlock()? _ { + let mut result = vec![a]; + if let Some(v) = b { + result.push(PartialGraphPattern::Other(build_bgp(v))); + } + result + } + + rule TriplesBlock() -> Vec<TripleOrPathPattern> = hs:TriplesBlock_inner() ++ (".") ("." _)? { + hs.into_iter().flatten().collect() + } + rule TriplesBlock_inner() -> Vec<TripleOrPathPattern> = _ h:TriplesSameSubjectPath() _ { h } + + rule GraphPatternNotTriples() -> PartialGraphPattern = GroupOrUnionGraphPattern() / OptionalGraphPattern() / LateralGraphPattern() / MinusGraphPattern() / GraphGraphPattern() / ServiceGraphPattern() / Filter() / Bind() / InlineData() + + rule OptionalGraphPattern() -> PartialGraphPattern = i("OPTIONAL") _ p:GroupGraphPattern() { + if let GraphPattern::Filter { expr, inner } = p { + PartialGraphPattern::Optional(*inner, Some(expr)) + } else { + PartialGraphPattern::Optional(p, None) + } + } + + rule LateralGraphPattern() -> PartialGraphPattern = i("LATERAL") _ p:GroupGraphPattern() {? + #[cfg(feature = "sep-0006")]{Ok(PartialGraphPattern::Lateral(p))} + #[cfg(not(feature = "sep-0006"))]{Err("The LATERAL modifier is not supported")} + } + + rule GraphGraphPattern() -> PartialGraphPattern = i("GRAPH") _ name:VarOrIri() _ p:GroupGraphPattern() { + PartialGraphPattern::Other(GraphPattern::Graph { name, inner: Box::new(p) }) + } + + rule ServiceGraphPattern() -> PartialGraphPattern = + i("SERVICE") _ i("SILENT") _ name:VarOrIri() _ p:GroupGraphPattern() { PartialGraphPattern::Other(GraphPattern::Service { name, inner: Box::new(p), silent: true }) } / + i("SERVICE") _ name:VarOrIri() _ p:GroupGraphPattern() { PartialGraphPattern::Other(GraphPattern::Service{ name, inner: Box::new(p), silent: false }) } + + rule Bind() -> PartialGraphPattern = i("BIND") _ "(" _ e:Expression() _ i("AS") _ v:Var() _ ")" { + PartialGraphPattern::Bind(e, v) + } + + rule InlineData() -> PartialGraphPattern = i("VALUES") _ p:DataBlock() { PartialGraphPattern::Other(p) } + + rule DataBlock() -> GraphPattern = l:(InlineDataOneVar() / InlineDataFull()) { + GraphPattern::Values { variables: l.0, bindings: l.1 } + } + + rule InlineDataOneVar() -> (Vec<Variable>, Vec<Vec<Option<GroundTerm>>>) = var:Var() _ "{" _ d:InlineDataOneVar_value()* "}" { + (vec![var], d) + } + rule InlineDataOneVar_value() -> Vec<Option<GroundTerm>> = t:DataBlockValue() _ { vec![t] } + + rule InlineDataFull() -> (Vec<Variable>, Vec<Vec<Option<GroundTerm>>>) = "(" _ vars:InlineDataFull_var()* _ ")" _ "{" _ vals:InlineDataFull_values()* "}" {? + if vals.iter().all(|vs| vs.len() == vars.len()) { + Ok((vars, vals)) + } else { + Err("The VALUES clause rows should have exactly the same number of values as there are variables. To set a value to undefined use UNDEF.") + } + } + rule InlineDataFull_var() -> Variable = v:Var() _ { v } + rule InlineDataFull_values() -> Vec<Option<GroundTerm>> = "(" _ v:InlineDataFull_value()* _ ")" _ { v } + rule InlineDataFull_value() -> Option<GroundTerm> = v:DataBlockValue() _ { v } + + rule DataBlockValue() -> Option<GroundTerm> = + t:QuotedTripleData() {? + #[cfg(feature = "rdf-star")]{Ok(Some(t.into()))} + #[cfg(not(feature = "rdf-star"))]{Err("Embedded triples are only available in SPARQL-star")} + } / + i:iri() { Some(i.into()) } / + l:RDFLiteral() { Some(l.into()) } / + l:NumericLiteral() { Some(l.into()) } / + l:BooleanLiteral() { Some(l.into()) } / + i("UNDEF") { None } + + rule MinusGraphPattern() -> PartialGraphPattern = i("MINUS") _ p: GroupGraphPattern() { + PartialGraphPattern::Minus(p) + } + + rule GroupOrUnionGraphPattern() -> PartialGraphPattern = p:GroupOrUnionGraphPattern_item() **<1,> (i("UNION") _) {? + not_empty_fold(p.into_iter(), |a, b| { + GraphPattern::Union { left: Box::new(a), right: Box::new(b) } + }).map(PartialGraphPattern::Other) + } + rule GroupOrUnionGraphPattern_item() -> GraphPattern = p:GroupGraphPattern() _ { p } + + rule Filter() -> PartialGraphPattern = i("FILTER") _ c:Constraint() { + PartialGraphPattern::Filter(c) + } + + rule Constraint() -> Expression = BrackettedExpression() / FunctionCall() / BuiltInCall() + + rule FunctionCall() -> Expression = f: iri() _ a: ArgList() { + Expression::FunctionCall(Function::Custom(f), a) + } + + rule ArgList() -> Vec<Expression> = + "(" _ e:ArgList_item() **<1,> ("," _) _ ")" { e } / + NIL() { Vec::new() } + rule ArgList_item() -> Expression = e:Expression() _ { e } + + rule ExpressionList() -> Vec<Expression> = + "(" _ e:ExpressionList_item() **<1,> ("," _) ")" { e } / + NIL() { Vec::new() } + rule ExpressionList_item() -> Expression = e:Expression() _ { e } + + rule ConstructTemplate() -> Vec<TriplePattern> = "{" _ t:ConstructTriples() _ "}" { t } + + rule ConstructTriples() -> Vec<TriplePattern> = p:ConstructTriples_item() ** ("." _) "."? { + p.into_iter().flatten().collect() + } + rule ConstructTriples_item() -> Vec<TriplePattern> = t:TriplesSameSubject() _ { t } + + rule TriplesSameSubject() -> Vec<TriplePattern> = + s:VarOrTerm() _ po:PropertyListNotEmpty() {? + let mut patterns = po.patterns; + for (p, os) in po.focus { + for o in os { + add_to_triple_patterns(s.clone(), p.clone(), o, &mut patterns)? + } + } + Ok(patterns) + } / + s:TriplesNode() _ po:PropertyList() {? + let mut patterns = s.patterns; + patterns.extend(po.patterns); + for (p, os) in po.focus { + for o in os { + add_to_triple_patterns(s.focus.clone(), p.clone(), o, &mut patterns)? + } + } + Ok(patterns) + } + + rule PropertyList() -> FocusedTriplePattern<Vec<(NamedNodePattern,Vec<AnnotatedTerm>)>> = + PropertyListNotEmpty() / + { FocusedTriplePattern::default() } + + rule PropertyListNotEmpty() -> FocusedTriplePattern<Vec<(NamedNodePattern,Vec<AnnotatedTerm>)>> = l:PropertyListNotEmpty_item() **<1,> (";" _) { + l.into_iter().fold(FocusedTriplePattern::<Vec<(NamedNodePattern,Vec<AnnotatedTerm>)>>::default(), |mut a, b| { + a.focus.push(b.focus); + a.patterns.extend(b.patterns); + a + }) + } + rule PropertyListNotEmpty_item() -> FocusedTriplePattern<(NamedNodePattern,Vec<AnnotatedTerm>)> = p:Verb() _ o:ObjectList() _ { + FocusedTriplePattern { + focus: (p, o.focus), + patterns: o.patterns + } + } + + rule Verb() -> NamedNodePattern = VarOrIri() / "a" { rdf::TYPE.into_owned().into() } + + rule ObjectList() -> FocusedTriplePattern<Vec<AnnotatedTerm>> = o:ObjectList_item() **<1,> ("," _) { + o.into_iter().fold(FocusedTriplePattern::<Vec<AnnotatedTerm>>::default(), |mut a, b| { + a.focus.push(b.focus); + a.patterns.extend_from_slice(&b.patterns); + a + }) + } + rule ObjectList_item() -> FocusedTriplePattern<AnnotatedTerm> = o:Object() _ { o } + + rule Object() -> FocusedTriplePattern<AnnotatedTerm> = g:GraphNode() _ a:Annotation()? { + if let Some(a) = a { + let mut patterns = g.patterns; + patterns.extend(a.patterns); + FocusedTriplePattern { + focus: AnnotatedTerm { + term: g.focus, + annotations: a.focus + }, + patterns + } + } else { + FocusedTriplePattern { + focus: AnnotatedTerm { + term: g.focus, + annotations: Vec::new() + }, + patterns: g.patterns + } + } + } + + rule TriplesSameSubjectPath() -> Vec<TripleOrPathPattern> = + s:VarOrTerm() _ po:PropertyListPathNotEmpty() {? + let mut patterns = po.patterns; + for (p, os) in po.focus { + for o in os { + add_to_triple_or_path_patterns(s.clone(), p.clone(), o, &mut patterns)?; + } + } + Ok(patterns) + } / + s:TriplesNodePath() _ po:PropertyListPath() {? + let mut patterns = s.patterns; + patterns.extend(po.patterns); + for (p, os) in po.focus { + for o in os { + add_to_triple_or_path_patterns(s.focus.clone(), p.clone(), o, &mut patterns)?; + } + } + Ok(patterns) + } + + rule PropertyListPath() -> FocusedTripleOrPathPattern<Vec<(VariableOrPropertyPath,Vec<AnnotatedTermPath>)>> = + PropertyListPathNotEmpty() / + { FocusedTripleOrPathPattern::default() } + + rule PropertyListPathNotEmpty() -> FocusedTripleOrPathPattern<Vec<(VariableOrPropertyPath,Vec<AnnotatedTermPath>)>> = hp:(VerbPath() / VerbSimple()) _ ho:ObjectListPath() _ t:PropertyListPathNotEmpty_item()* { + t.into_iter().flatten().fold(FocusedTripleOrPathPattern { + focus: vec![(hp, ho.focus)], + patterns: ho.patterns + }, |mut a, b| { + a.focus.push(b.focus); + a.patterns.extend(b.patterns); + a + }) + } + rule PropertyListPathNotEmpty_item() -> Option<FocusedTripleOrPathPattern<(VariableOrPropertyPath,Vec<AnnotatedTermPath>)>> = ";" _ c:PropertyListPathNotEmpty_item_content()? { + c + } + rule PropertyListPathNotEmpty_item_content() -> FocusedTripleOrPathPattern<(VariableOrPropertyPath,Vec<AnnotatedTermPath>)> = p:(VerbPath() / VerbSimple()) _ o:ObjectListPath() _ { + FocusedTripleOrPathPattern { + focus: (p, o.focus.into_iter().map(AnnotatedTermPath::from).collect()), + patterns: o.patterns + } + } + + rule VerbPath() -> VariableOrPropertyPath = p:Path() { + p.into() + } + + rule VerbSimple() -> VariableOrPropertyPath = v:Var() { + v.into() + } + + rule ObjectListPath() -> FocusedTripleOrPathPattern<Vec<AnnotatedTermPath>> = o:ObjectListPath_item() **<1,> ("," _) { + o.into_iter().fold(FocusedTripleOrPathPattern::<Vec<AnnotatedTermPath>>::default(), |mut a, b| { + a.focus.push(b.focus); + a.patterns.extend(b.patterns); + a + }) + } + rule ObjectListPath_item() -> FocusedTripleOrPathPattern<AnnotatedTermPath> = o:ObjectPath() _ { o } + + rule ObjectPath() -> FocusedTripleOrPathPattern<AnnotatedTermPath> = g:GraphNodePath() _ a:AnnotationPath()? { + if let Some(a) = a { + let mut patterns = g.patterns; + patterns.extend(a.patterns); + FocusedTripleOrPathPattern { + focus: AnnotatedTermPath { + term: g.focus, + annotations: a.focus + }, + patterns + } + } else { + FocusedTripleOrPathPattern { + focus: AnnotatedTermPath { + term: g.focus, + annotations: Vec::new() + }, + patterns: g.patterns + } + } + } + + rule Path() -> PropertyPathExpression = PathAlternative() + + rule PathAlternative() -> PropertyPathExpression = p:PathAlternative_item() **<1,> ("|" _) {? + not_empty_fold(p.into_iter(), |a, b| { + PropertyPathExpression::Alternative(Box::new(a), Box::new(b)) + }) + } + rule PathAlternative_item() -> PropertyPathExpression = p:PathSequence() _ { p } + + rule PathSequence() -> PropertyPathExpression = p:PathSequence_item() **<1,> ("/" _) {? + not_empty_fold(p.into_iter(), |a, b| { + PropertyPathExpression::Sequence(Box::new(a), Box::new(b)) + }) + } + rule PathSequence_item() -> PropertyPathExpression = p:PathEltOrInverse() _ { p } + + rule PathElt() -> PropertyPathExpression = p:PathPrimary() _ o:PathElt_op()? { + match o { + Some('?') => PropertyPathExpression::ZeroOrOne(Box::new(p)), + Some('*') => PropertyPathExpression::ZeroOrMore(Box::new(p)), + Some('+') => PropertyPathExpression::OneOrMore(Box::new(p)), + Some(_) => unreachable!(), + None => p + } + } + rule PathElt_op() -> char = + "*" { '*' } / + "+" { '+' } / + "?" !(['0'..='9'] / PN_CHARS_U()) { '?' } // We mandate that this is not a variable + + rule PathEltOrInverse() -> PropertyPathExpression = + "^" _ p:PathElt() { PropertyPathExpression::Reverse(Box::new(p)) } / + PathElt() + + rule PathPrimary() -> PropertyPathExpression = + v:iri() { v.into() } / + "a" { rdf::TYPE.into_owned().into() } / + "!" _ p:PathNegatedPropertySet() { p } / + "(" _ p:Path() _ ")" { p } + + rule PathNegatedPropertySet() -> PropertyPathExpression = + "(" _ p:PathNegatedPropertySet_item() **<1,> ("|" _) ")" { + let mut direct = Vec::new(); + let mut inverse = Vec::new(); + for e in p { + match e { + Either::Left(a) => direct.push(a), + Either::Right(b) => inverse.push(b) + } + } + if inverse.is_empty() { + PropertyPathExpression::NegatedPropertySet(direct) + } else if direct.is_empty() { + PropertyPathExpression::Reverse(Box::new(PropertyPathExpression::NegatedPropertySet(inverse))) + } else { + PropertyPathExpression::Alternative( + Box::new(PropertyPathExpression::NegatedPropertySet(direct)), + Box::new(PropertyPathExpression::Reverse(Box::new(PropertyPathExpression::NegatedPropertySet(inverse)))) + ) + } + } / + p:PathOneInPropertySet() { + match p { + Either::Left(a) => PropertyPathExpression::NegatedPropertySet(vec![a]), + Either::Right(b) => PropertyPathExpression::Reverse(Box::new(PropertyPathExpression::NegatedPropertySet(vec![b]))), + } + } + rule PathNegatedPropertySet_item() -> Either<NamedNode,NamedNode> = p:PathOneInPropertySet() _ { p } + + rule PathOneInPropertySet() -> Either<NamedNode,NamedNode> = + "^" _ v:iri() { Either::Right(v) } / + "^" _ "a" { Either::Right(rdf::TYPE.into()) } / + v:iri() { Either::Left(v) } / + "a" { Either::Left(rdf::TYPE.into()) } + + rule TriplesNode() -> FocusedTriplePattern<TermPattern> = Collection() / BlankNodePropertyList() + + rule BlankNodePropertyList() -> FocusedTriplePattern<TermPattern> = "[" _ po:PropertyListNotEmpty() _ "]" {? + let mut patterns = po.patterns; + let mut bnode = TermPattern::from(BlankNode::default()); + for (p, os) in po.focus { + for o in os { + add_to_triple_patterns(bnode.clone(), p.clone(), o, &mut patterns)?; + } + } + Ok(FocusedTriplePattern { + focus: bnode, + patterns + }) + } + + rule TriplesNodePath() -> FocusedTripleOrPathPattern<TermPattern> = CollectionPath() / BlankNodePropertyListPath() + + rule BlankNodePropertyListPath() -> FocusedTripleOrPathPattern<TermPattern> = "[" _ po:PropertyListPathNotEmpty() _ "]" {? + let mut patterns = po.patterns; + let mut bnode = TermPattern::from(BlankNode::default()); + for (p, os) in po.focus { + for o in os { + add_to_triple_or_path_patterns(bnode.clone(), p.clone(), o, &mut patterns)?; + } + } + Ok(FocusedTripleOrPathPattern { + focus: bnode, + patterns + }) + } + + rule Collection() -> FocusedTriplePattern<TermPattern> = "(" _ o:Collection_item()+ ")" { + let mut patterns: Vec<TriplePattern> = Vec::new(); + let mut current_list_node = TermPattern::from(rdf::NIL.into_owned()); + for objWithPatterns in o.into_iter().rev() { + let new_blank_node = TermPattern::from(BlankNode::default()); + patterns.push(TriplePattern::new(new_blank_node.clone(), rdf::FIRST.into_owned(), objWithPatterns.focus.clone())); + patterns.push(TriplePattern::new(new_blank_node.clone(), rdf::REST.into_owned(), current_list_node)); + current_list_node = new_blank_node; + patterns.extend_from_slice(&objWithPatterns.patterns); + } + FocusedTriplePattern { + focus: current_list_node, + patterns + } + } + rule Collection_item() -> FocusedTriplePattern<TermPattern> = o:GraphNode() _ { o } + + rule CollectionPath() -> FocusedTripleOrPathPattern<TermPattern> = "(" _ o:CollectionPath_item()+ _ ")" { + let mut patterns: Vec<TripleOrPathPattern> = Vec::new(); + let mut current_list_node = TermPattern::from(rdf::NIL.into_owned()); + for objWithPatterns in o.into_iter().rev() { + let new_blank_node = TermPattern::from(BlankNode::default()); + patterns.push(TriplePattern::new(new_blank_node.clone(), rdf::FIRST.into_owned(), objWithPatterns.focus.clone()).into()); + patterns.push(TriplePattern::new(new_blank_node.clone(), rdf::REST.into_owned(), current_list_node).into()); + current_list_node = new_blank_node; + patterns.extend(objWithPatterns.patterns); + } + FocusedTripleOrPathPattern { + focus: current_list_node, + patterns + } + } + rule CollectionPath_item() -> FocusedTripleOrPathPattern<TermPattern> = p:GraphNodePath() _ { p } + + + rule Annotation() -> FocusedTriplePattern<Vec<(NamedNodePattern,Vec<AnnotatedTerm>)>> = "{|" _ a:PropertyListNotEmpty() _ "|}" { a } + + rule AnnotationPath() -> FocusedTripleOrPathPattern<Vec<(VariableOrPropertyPath,Vec<AnnotatedTermPath>)>> = "{|" _ a: PropertyListPathNotEmpty() _ "|}" { a } + + rule GraphNode() -> FocusedTriplePattern<TermPattern> = + t:VarOrTerm() { FocusedTriplePattern::new(t) } / + TriplesNode() + + rule GraphNodePath() -> FocusedTripleOrPathPattern<TermPattern> = + t:VarOrTerm() { FocusedTripleOrPathPattern::new(t) } / + TriplesNodePath() + + rule VarOrTerm() -> TermPattern = + v:Var() { v.into() } / + t:QuotedTriple() {? + #[cfg(feature = "rdf-star")]{Ok(t.into())} + #[cfg(not(feature = "rdf-star"))]{Err("Embedded triples are only available in SPARQL-star")} + } / + t:GraphTerm() { t.into() } + + rule QuotedTriple() -> TriplePattern = "<<" _ s:VarOrTerm() _ p:Verb() _ o:VarOrTerm() _ ">>" {? + Ok(TriplePattern { + subject: s, + predicate: p, + object: o + }) + } + + rule QuotedTripleData() -> GroundTriple = "<<" _ s:DataValueTerm() _ p:QuotedTripleData_p() _ o:DataValueTerm() _ ">>" {? + Ok(GroundTriple { + subject: s.try_into().map_err(|()| "Literals are not allowed in subject position of nested patterns")?, + predicate: p, + object: o + }) + } + rule QuotedTripleData_p() -> NamedNode = i: iri() { i } / "a" { rdf::TYPE.into() } + + rule DataValueTerm() -> GroundTerm = i:iri() { i.into() } / + l:RDFLiteral() { l.into() } / + l:NumericLiteral() { l.into() } / + l:BooleanLiteral() { l.into() } / + t:QuotedTripleData() {? + #[cfg(feature = "rdf-star")]{Ok(t.into())} + #[cfg(not(feature = "rdf-star"))]{Err("Embedded triples are only available in SPARQL-star")} + } + + rule VarOrIri() -> NamedNodePattern = + v:Var() { v.into() } / + i:iri() { i.into() } + + rule Var() -> Variable = name:(VAR1() / VAR2()) { Variable::new_unchecked(name) } + + rule GraphTerm() -> Term = + i:iri() { i.into() } / + l:RDFLiteral() { l.into() } / + l:NumericLiteral() { l.into() } / + l:BooleanLiteral() { l.into() } / + b:BlankNode() { b.into() } / + NIL() { rdf::NIL.into_owned().into() } + + rule Expression() -> Expression = e:ConditionalOrExpression() {e} + + rule ConditionalOrExpression() -> Expression = e:ConditionalOrExpression_item() **<1,> ("||" _) {? + not_empty_fold(e.into_iter(), |a, b| Expression::Or(Box::new(a), Box::new(b))) + } + rule ConditionalOrExpression_item() -> Expression = e:ConditionalAndExpression() _ { e } + + rule ConditionalAndExpression() -> Expression = e:ConditionalAndExpression_item() **<1,> ("&&" _) {? + not_empty_fold(e.into_iter(), |a, b| Expression::And(Box::new(a), Box::new(b))) + } + rule ConditionalAndExpression_item() -> Expression = e:ValueLogical() _ { e } + + rule ValueLogical() -> Expression = RelationalExpression() + + rule RelationalExpression() -> Expression = a:NumericExpression() _ o: RelationalExpression_inner()? { match o { + Some(("=", Some(b), None)) => Expression::Equal(Box::new(a), Box::new(b)), + Some(("!=", Some(b), None)) => Expression::Not(Box::new(Expression::Equal(Box::new(a), Box::new(b)))), + Some((">", Some(b), None)) => Expression::Greater(Box::new(a), Box::new(b)), + Some((">=", Some(b), None)) => Expression::GreaterOrEqual(Box::new(a), Box::new(b)), + Some(("<", Some(b), None)) => Expression::Less(Box::new(a), Box::new(b)), + Some(("<=", Some(b), None)) => Expression::LessOrEqual(Box::new(a), Box::new(b)), + Some(("IN", None, Some(l))) => Expression::In(Box::new(a), l), + Some(("NOT IN", None, Some(l))) => Expression::Not(Box::new(Expression::In(Box::new(a), l))), + Some(_) => unreachable!(), + None => a + } } + rule RelationalExpression_inner() -> (&'input str, Option<Expression>, Option<Vec<Expression>>) = + s: $("=" / "!=" / ">=" / ">" / "<=" / "<") _ e:NumericExpression() { (s, Some(e), None) } / + i("IN") _ l:ExpressionList() { ("IN", None, Some(l)) } / + i("NOT") _ i("IN") _ l:ExpressionList() { ("NOT IN", None, Some(l)) } + + rule NumericExpression() -> Expression = AdditiveExpression() + + rule AdditiveExpression() -> Expression = a:MultiplicativeExpression() _ o:AdditiveExpression_inner()? { match o { + Some(("+", b)) => Expression::Add(Box::new(a), Box::new(b)), + Some(("-", b)) => Expression::Subtract(Box::new(a), Box::new(b)), + Some(_) => unreachable!(), + None => a, + } } + rule AdditiveExpression_inner() -> (&'input str, Expression) = s: $("+" / "-") _ e:AdditiveExpression() { + (s, e) + } + + rule MultiplicativeExpression() -> Expression = a:UnaryExpression() _ o: MultiplicativeExpression_inner()? { match o { + Some(("*", b)) => Expression::Multiply(Box::new(a), Box::new(b)), + Some(("/", b)) => Expression::Divide(Box::new(a), Box::new(b)), + Some(_) => unreachable!(), + None => a + } } + rule MultiplicativeExpression_inner() -> (&'input str, Expression) = s: $("*" / "/") _ e:MultiplicativeExpression() { + (s, e) + } + + rule UnaryExpression() -> Expression = s: $("!" / "+" / "-")? _ e:PrimaryExpression() { match s { + Some("!") => Expression::Not(Box::new(e)), + Some("+") => Expression::UnaryPlus(Box::new(e)), + Some("-") => Expression::UnaryMinus(Box::new(e)), + Some(_) => unreachable!(), + None => e, + } } + + rule PrimaryExpression() -> Expression = + BrackettedExpression() / + ExprQuotedTriple() / + iriOrFunction() / + v:Var() { v.into() } / + l:RDFLiteral() { l.into() } / + l:NumericLiteral() { l.into() } / + l:BooleanLiteral() { l.into() } / + BuiltInCall() + + rule ExprVarOrTerm() -> Expression = + ExprQuotedTriple() / + i:iri() { i.into() } / + l:RDFLiteral() { l.into() } / + l:NumericLiteral() { l.into() } / + l:BooleanLiteral() { l.into() } / + v:Var() { v.into() } + + rule ExprQuotedTriple() -> Expression = "<<" _ s:ExprVarOrTerm() _ p:Verb() _ o:ExprVarOrTerm() _ ">>" {? + #[cfg(feature = "rdf-star")]{Ok(Expression::FunctionCall(Function::Triple, vec![s, p.into(), o]))} + #[cfg(not(feature = "rdf-star"))]{Err("Embedded triples are only available in SPARQL-star")} + } + + rule BrackettedExpression() -> Expression = "(" _ e:Expression() _ ")" { e } + + rule BuiltInCall() -> Expression = + a:Aggregate() {? state.new_aggregation(a).map(Into::into) } / + i("STR") _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Str, vec![e]) } / + i("LANG") _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Lang, vec![e]) } / + i("LANGMATCHES") _ "(" _ a:Expression() _ "," _ b:Expression() _ ")" { Expression::FunctionCall(Function::LangMatches, vec![a, b]) } / + i("DATATYPE") _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Datatype, vec![e]) } / + i("BOUND") _ "(" _ v:Var() _ ")" { Expression::Bound(v) } / + (i("IRI") / i("URI")) _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Iri, vec![e]) } / + i("BNODE") "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::BNode, vec![e]) } / + i("BNODE") NIL() { Expression::FunctionCall(Function::BNode, vec![]) } / + i("RAND") _ NIL() { Expression::FunctionCall(Function::Rand, vec![]) } / + i("ABS") _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Abs, vec![e]) } / + i("CEIL") _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Ceil, vec![e]) } / + i("FLOOR") _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Floor, vec![e]) } / + i("ROUND") _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Round, vec![e]) } / + i("CONCAT") e:ExpressionList() { Expression::FunctionCall(Function::Concat, e) } / + SubstringExpression() / + i("STRLEN") _ "(" _ e: Expression() _ ")" { Expression::FunctionCall(Function::StrLen, vec![e]) } / + StrReplaceExpression() / + i("UCASE") _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::UCase, vec![e]) } / + i("LCASE") _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::LCase, vec![e]) } / + i("ENCODE_FOR_URI") "(" _ e: Expression() _ ")" { Expression::FunctionCall(Function::EncodeForUri, vec![e]) } / + i("CONTAINS") _ "(" _ a:Expression() _ "," _ b:Expression() _ ")" { Expression::FunctionCall(Function::Contains, vec![a, b]) } / + i("STRSTARTS") _ "(" _ a:Expression() _ "," _ b:Expression() _ ")" { Expression::FunctionCall(Function::StrStarts, vec![a, b]) } / + i("STRENDS") _ "(" _ a:Expression() _ "," _ b:Expression() _ ")" { Expression::FunctionCall(Function::StrEnds, vec![a, b]) } / + i("STRBEFORE") _ "(" _ a:Expression() _ "," _ b:Expression() _ ")" { Expression::FunctionCall(Function::StrBefore, vec![a, b]) } / + i("STRAFTER") _ "(" _ a:Expression() _ "," _ b:Expression() _ ")" { Expression::FunctionCall(Function::StrAfter, vec![a, b]) } / + i("YEAR") _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Year, vec![e]) } / + i("MONTH") _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Month, vec![e]) } / + i("DAY") _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Day, vec![e]) } / + i("HOURS") _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Hours, vec![e]) } / + i("MINUTES") _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Minutes, vec![e]) } / + i("SECONDS") _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Seconds, vec![e]) } / + i("TIMEZONE") _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Timezone, vec![e]) } / + i("TZ") _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Tz, vec![e]) } / + i("NOW") _ NIL() { Expression::FunctionCall(Function::Now, vec![]) } / + i("UUID") _ NIL() { Expression::FunctionCall(Function::Uuid, vec![]) }/ + i("STRUUID") _ NIL() { Expression::FunctionCall(Function::StrUuid, vec![]) } / + i("MD5") "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Md5, vec![e]) } / + i("SHA1") "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Sha1, vec![e]) } / + i("SHA256") "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Sha256, vec![e]) } / + i("SHA384") "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Sha384, vec![e]) } / + i("SHA512") "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Sha512, vec![e]) } / + i("COALESCE") e:ExpressionList() { Expression::Coalesce(e) } / + i("IF") _ "(" _ a:Expression() _ "," _ b:Expression() _ "," _ c:Expression() _ ")" { Expression::If(Box::new(a), Box::new(b), Box::new(c)) } / + i("STRLANG") _ "(" _ a:Expression() _ "," _ b:Expression() _ ")" { Expression::FunctionCall(Function::StrLang, vec![a, b]) } / + i("STRDT") _ "(" _ a:Expression() _ "," _ b:Expression() _ ")" { Expression::FunctionCall(Function::StrDt, vec![a, b]) } / + i("sameTerm") "(" _ a:Expression() _ "," _ b:Expression() _ ")" { Expression::SameTerm(Box::new(a), Box::new(b)) } / + (i("isIRI") / i("isURI")) _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::IsIri, vec![e]) } / + i("isBLANK") "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::IsBlank, vec![e]) } / + i("isLITERAL") "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::IsLiteral, vec![e]) } / + i("isNUMERIC") "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::IsNumeric, vec![e]) } / + RegexExpression() / + ExistsFunc() / + NotExistsFunc() / + i("TRIPLE") "(" _ s:Expression() _ "," _ p:Expression() "," _ o:Expression() ")" {? + #[cfg(feature = "rdf-star")]{Ok(Expression::FunctionCall(Function::Triple, vec![s, p, o]))} + #[cfg(not(feature = "rdf-star"))]{Err("The TRIPLE function is only available in SPARQL-star")} + } / + i("SUBJECT") "(" _ e:Expression() _ ")" {? + #[cfg(feature = "rdf-star")]{Ok(Expression::FunctionCall(Function::Subject, vec![e]))} + #[cfg(not(feature = "rdf-star"))]{Err("The SUBJECT function is only available in SPARQL-star")} + } / + i("PREDICATE") "(" _ e:Expression() _ ")" {? + #[cfg(feature = "rdf-star")]{Ok(Expression::FunctionCall(Function::Predicate, vec![e]))} + #[cfg(not(feature = "rdf-star"))]{Err("The PREDICATE function is only available in SPARQL-star")} + } / + i("OBJECT") "(" _ e:Expression() _ ")" {? + #[cfg(feature = "rdf-star")]{Ok(Expression::FunctionCall(Function::Object, vec![e]))} + #[cfg(not(feature = "rdf-star"))]{Err("The OBJECT function is only available in SPARQL-star")} + } / + i("isTriple") "(" _ e:Expression() _ ")" {? + #[cfg(feature = "rdf-star")]{Ok(Expression::FunctionCall(Function::IsTriple, vec![e]))} + #[cfg(not(feature = "rdf-star"))]{Err("The isTriple function is only available in SPARQL-star")} + } / + i("ADJUST") "(" _ a:Expression() _ "," _ b:Expression() _ ")" {? + #[cfg(feature = "sep-0002")]{Ok(Expression::FunctionCall(Function::Adjust, vec![a, b]))} + #[cfg(not(feature = "sep-0002"))]{Err("The ADJUST function is only available in SPARQL 1.2 SEP 0002")} + } + + rule RegexExpression() -> Expression = + i("REGEX") _ "(" _ a:Expression() _ "," _ b:Expression() _ "," _ c:Expression() _ ")" { Expression::FunctionCall(Function::Regex, vec![a, b, c]) } / + i("REGEX") _ "(" _ a:Expression() _ "," _ b:Expression() _ ")" { Expression::FunctionCall(Function::Regex, vec![a, b]) } + + + rule SubstringExpression() -> Expression = + i("SUBSTR") _ "(" _ a:Expression() _ "," _ b:Expression() _ "," _ c:Expression() _ ")" { Expression::FunctionCall(Function::SubStr, vec![a, b, c]) } / + i("SUBSTR") _ "(" _ a:Expression() _ "," _ b:Expression() _ ")" { Expression::FunctionCall(Function::SubStr, vec![a, b]) } + + + rule StrReplaceExpression() -> Expression = + i("REPLACE") _ "(" _ a:Expression() _ "," _ b:Expression() _ "," _ c:Expression() _ "," _ d:Expression() _ ")" { Expression::FunctionCall(Function::Replace, vec![a, b, c, d]) } / + i("REPLACE") _ "(" _ a:Expression() _ "," _ b:Expression() _ "," _ c:Expression() _ ")" { Expression::FunctionCall(Function::Replace, vec![a, b, c]) } + + rule ExistsFunc() -> Expression = i("EXISTS") _ p:GroupGraphPattern() { Expression::Exists(Box::new(p)) } + + rule NotExistsFunc() -> Expression = i("NOT") _ i("EXISTS") _ p:GroupGraphPattern() { Expression::Not(Box::new(Expression::Exists(Box::new(p)))) } + + rule Aggregate() -> AggregateExpression = + i("COUNT") _ "(" _ i("DISTINCT") _ "*" _ ")" { AggregateExpression::CountSolutions { distinct: true } } / + i("COUNT") _ "(" _ i("DISTINCT") _ expr:Expression() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::Count, expr, distinct: true } } / + i("COUNT") _ "(" _ "*" _ ")" { AggregateExpression::CountSolutions { distinct: false } } / + i("COUNT") _ "(" _ expr:Expression() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::Count, expr, distinct: false } } / + i("SUM") _ "(" _ i("DISTINCT") _ expr:Expression() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::Sum, expr, distinct: true } } / + i("SUM") _ "(" _ expr:Expression() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::Sum, expr, distinct: false } } / + i("MIN") _ "(" _ i("DISTINCT") _ expr:Expression() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::Min, expr, distinct: true } } / + i("MIN") _ "(" _ expr:Expression() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::Min, expr, distinct: false } } / + i("MAX") _ "(" _ i("DISTINCT") _ expr:Expression() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::Max, expr, distinct: true } } / + i("MAX") _ "(" _ expr:Expression() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::Max, expr, distinct: false } } / + i("AVG") _ "(" _ i("DISTINCT") _ expr:Expression() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::Avg, expr, distinct: true } } / + i("AVG") _ "(" _ expr:Expression() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::Avg, expr, distinct: false } } / + i("SAMPLE") _ "(" _ i("DISTINCT") _ expr:Expression() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::Sample, expr, distinct: true } } / + i("SAMPLE") _ "(" _ expr:Expression() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::Sample, expr, distinct: false } } / + i("GROUP_CONCAT") _ "(" _ i("DISTINCT") _ expr:Expression() _ ";" _ i("SEPARATOR") _ "=" _ s:String() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::GroupConcat { separator: Some(s) }, expr, distinct: true } } / + i("GROUP_CONCAT") _ "(" _ i("DISTINCT") _ expr:Expression() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::GroupConcat { separator: None }, expr, distinct: true } } / + i("GROUP_CONCAT") _ "(" _ expr:Expression() _ ";" _ i("SEPARATOR") _ "=" _ s:String() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::GroupConcat { separator: Some(s) }, expr, distinct: true } } / + i("GROUP_CONCAT") _ "(" _ expr:Expression() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::GroupConcat { separator: None }, expr, distinct: false } } / + name:iri() _ "(" _ i("DISTINCT") _ expr:Expression() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::Custom(name), expr, distinct: true } } / + name:iri() _ "(" _ expr:Expression() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::Custom(name), expr, distinct: false } } + + rule iriOrFunction() -> Expression = i: iri() _ a: ArgList()? { + match a { + Some(a) => Expression::FunctionCall(Function::Custom(i), a), + None => i.into() + } + } + + rule RDFLiteral() -> Literal = + value:String() _ "^^" _ datatype:iri() { Literal::new_typed_literal(value, datatype) } / + value:String() _ language:LANGTAG() { Literal::new_language_tagged_literal_unchecked(value, language.into_inner()) } / + value:String() { Literal::new_simple_literal(value) } + + rule NumericLiteral() -> Literal = NumericLiteralUnsigned() / NumericLiteralPositive() / NumericLiteralNegative() + + rule NumericLiteralUnsigned() -> Literal = + d:$(DOUBLE()) { Literal::new_typed_literal(d, xsd::DOUBLE) } / + d:$(DECIMAL()) { Literal::new_typed_literal(d, xsd::DECIMAL) } / + i:$(INTEGER()) { Literal::new_typed_literal(i, xsd::INTEGER) } + + rule NumericLiteralPositive() -> Literal = + d:$(DOUBLE_POSITIVE()) { Literal::new_typed_literal(d, xsd::DOUBLE) } / + d:$(DECIMAL_POSITIVE()) { Literal::new_typed_literal(d, xsd::DECIMAL) } / + i:$(INTEGER_POSITIVE()) { Literal::new_typed_literal(i, xsd::INTEGER) } + + + rule NumericLiteralNegative() -> Literal = + d:$(DOUBLE_NEGATIVE()) { Literal::new_typed_literal(d, xsd::DOUBLE) } / + d:$(DECIMAL_NEGATIVE()) { Literal::new_typed_literal(d, xsd::DECIMAL) } / + i:$(INTEGER_NEGATIVE()) { Literal::new_typed_literal(i, xsd::INTEGER) } + + rule BooleanLiteral() -> Literal = + "true" { Literal::new_typed_literal("true", xsd::BOOLEAN) } / + "false" { Literal::new_typed_literal("false", xsd::BOOLEAN) } + + rule String() -> String = STRING_LITERAL_LONG1() / STRING_LITERAL_LONG2() / STRING_LITERAL1() / STRING_LITERAL2() + + rule iri() -> NamedNode = i:(IRIREF() / PrefixedName()) { + NamedNode::from(i) + } + + rule PrefixedName() -> Iri<String> = PNAME_LN() / + ns:PNAME_NS() {? if let Some(iri) = state.namespaces.get(ns).cloned() { + Iri::parse(iri).map_err(|_| "IRI parsing failed") + } else { + Err("Prefix not found") + } } + + rule BlankNode() -> BlankNode = id:BLANK_NODE_LABEL() {? + let node = BlankNode::new_unchecked(id); + if state.used_bnodes.contains(&node) { + Err("Already used blank node id") + } else { + state.currently_used_bnodes.insert(node.clone()); + Ok(node) + } + } / ANON() { BlankNode::default() } + + rule IRIREF() -> Iri<String> = "<" i:$((!['>'] [_])*) ">" {? + state.parse_iri(unescape_iriref(i)?).map_err(|_| "IRI parsing failed") + } + + rule PNAME_NS() -> &'input str = ns:$(PN_PREFIX()?) ":" { + ns + } + + rule PNAME_LN() -> Iri<String> = ns:PNAME_NS() local:$(PN_LOCAL()) {? + if let Some(base) = state.namespaces.get(ns) { + let mut iri = String::with_capacity(base.len() + local.len()); + iri.push_str(base); + for chunk in local.split('\\') { // We remove \ + iri.push_str(chunk); + } + Iri::parse(iri).map_err(|_| "IRI parsing failed") + } else { + Err("Prefix not found") + } + } + + rule BLANK_NODE_LABEL() -> &'input str = "_:" b:$((['0'..='9'] / PN_CHARS_U()) PN_CHARS()* ("."+ PN_CHARS()+)*) { + b + } + + rule VAR1() -> &'input str = "?" v:$(VARNAME()) { v } + + rule VAR2() -> &'input str = "$" v:$(VARNAME()) { v } + + rule LANGTAG() -> LanguageTag<String> = "@" l:$(['a' ..= 'z' | 'A' ..= 'Z']+ ("-" ['a' ..= 'z' | 'A' ..= 'Z' | '0' ..= '9']+)*) {? + LanguageTag::parse(l.to_ascii_lowercase()).map_err(|_| "language tag parsing failed") + } + + rule INTEGER() = ['0'..='9']+ + + rule DECIMAL() = ['0'..='9']* "." ['0'..='9']+ + + rule DOUBLE() = (['0'..='9']+ "." ['0'..='9']* / "." ['0'..='9']+ / ['0'..='9']+) EXPONENT() + + rule INTEGER_POSITIVE() = "+" _ INTEGER() + + rule DECIMAL_POSITIVE() = "+" _ DECIMAL() + + rule DOUBLE_POSITIVE() = "+" _ DOUBLE() + + rule INTEGER_NEGATIVE() = "-" _ INTEGER() + + rule DECIMAL_NEGATIVE() = "-" _ DECIMAL() + + rule DOUBLE_NEGATIVE() = "-" _ DOUBLE() + + rule EXPONENT() = ['e' | 'E'] ['+' | '-']? ['0'..='9']+ + + rule STRING_LITERAL1() -> String = "'" l:$((STRING_LITERAL1_simple_char() / ECHAR() / UCHAR())*) "'" {? + unescape_string(l) + } + rule STRING_LITERAL1_simple_char() = !['\u{27}' | '\u{5C}' | '\u{0A}' | '\u{0D}'] [_] + + + rule STRING_LITERAL2() -> String = "\"" l:$((STRING_LITERAL2_simple_char() / ECHAR() / UCHAR())*) "\"" {? + unescape_string(l) + } + rule STRING_LITERAL2_simple_char() = !['\u{22}' | '\u{5C}' | '\u{0A}' | '\u{0D}'] [_] + + rule STRING_LITERAL_LONG1() -> String = "'''" l:$(STRING_LITERAL_LONG1_inner()*) "'''" {? + unescape_string(l) + } + rule STRING_LITERAL_LONG1_inner() = ("''" / "'")? (STRING_LITERAL_LONG1_simple_char() / ECHAR() / UCHAR()) + rule STRING_LITERAL_LONG1_simple_char() = !['\'' | '\\'] [_] + + rule STRING_LITERAL_LONG2() -> String = "\"\"\"" l:$(STRING_LITERAL_LONG2_inner()*) "\"\"\"" {? + unescape_string(l) + } + rule STRING_LITERAL_LONG2_inner() = ("\"\"" / "\"")? (STRING_LITERAL_LONG2_simple_char() / ECHAR() / UCHAR()) + rule STRING_LITERAL_LONG2_simple_char() = !['"' | '\\'] [_] + + rule UCHAR() = "\\u" HEX() HEX() HEX() HEX() / "\\U" HEX() HEX() HEX() HEX() HEX() HEX() HEX() HEX() + + rule ECHAR() = "\\" ['t' | 'b' | 'n' | 'r' | 'f' | '"' |'\'' | '\\'] + + rule NIL() = "(" WS()* ")" + + rule WS() = quiet! { ['\u{20}' | '\u{09}' | '\u{0D}' | '\u{0A}'] } + + rule ANON() = "[" WS()* "]" + + rule PN_CHARS_BASE() = ['A' ..= 'Z' | 'a' ..= 'z' | '\u{00C0}'..='\u{00D6}' | '\u{00D8}'..='\u{00F6}' | '\u{00F8}'..='\u{02FF}' | '\u{0370}'..='\u{037D}' | '\u{037F}'..='\u{1FFF}' | '\u{200C}'..='\u{200D}' | '\u{2070}'..='\u{218F}' | '\u{2C00}'..='\u{2FEF}' | '\u{3001}'..='\u{D7FF}' | '\u{F900}'..='\u{FDCF}' | '\u{FDF0}'..='\u{FFFD}'] + + rule PN_CHARS_U() = ['_'] / PN_CHARS_BASE() + + rule VARNAME() = (['0'..='9'] / PN_CHARS_U()) (['0' ..= '9' | '\u{00B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}'] / PN_CHARS_U())* + + rule PN_CHARS() = ['-' | '0' ..= '9' | '\u{00B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}'] / PN_CHARS_U() + + rule PN_PREFIX() = PN_CHARS_BASE() PN_CHARS()* ("."+ PN_CHARS()+)* + + rule PN_LOCAL() = (PN_CHARS_U() / [':' | '0'..='9'] / PLX()) (PN_CHARS() / [':'] / PLX())* (['.']+ (PN_CHARS() / [':'] / PLX())+)? + + rule PLX() = PERCENT() / PN_LOCAL_ESC() + + rule PERCENT() = ['%'] HEX() HEX() + + rule HEX() = ['0' ..= '9' | 'A' ..= 'F' | 'a' ..= 'f'] + + rule PN_LOCAL_ESC() = ['\\'] ['_' | '~' | '.' | '-' | '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '/' | '?' | '#' | '@' | '%'] //TODO: added '/' to make tests pass but is it valid? + + //space + rule _() = quiet! { ([' ' | '\t' | '\n' | '\r'] / comment())* } + + //comment + rule comment() = quiet! { ['#'] (!['\r' | '\n'] [_])* } + + rule i(literal: &'static str) = input: $([_]*<{literal.len()}>) {? + if input.eq_ignore_ascii_case(literal) { + Ok(()) + } else { + Err(literal) + } + } + } +} diff --git a/ng-oxigraph/src/spargebra/query.rs b/ng-oxigraph/src/spargebra/query.rs new file mode 100644 index 0000000..0e9d1c6 --- /dev/null +++ b/ng-oxigraph/src/spargebra/query.rs @@ -0,0 +1,300 @@ +use crate::spargebra::algebra::*; +use crate::spargebra::parser::{parse_query, SparqlSyntaxError}; +use crate::spargebra::term::*; +use oxiri::Iri; +use std::fmt; +use std::str::FromStr; + +/// A parsed [SPARQL query](https://www.w3.org/TR/sparql11-query/). +/// +/// ``` +/// use spargebra::Query; +/// +/// let query_str = "SELECT ?s ?p ?o WHERE { ?s ?p ?o . }"; +/// let query = Query::parse(query_str, None)?; +/// assert_eq!(query.to_string(), query_str); +/// assert_eq!( +/// query.to_sse(), +/// "(project (?s ?p ?o) (bgp (triple ?s ?p ?o)))" +/// ); +/// # Ok::<_, spargebra::SparqlSyntaxError>(()) +/// ``` +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum Query { + /// [SELECT](https://www.w3.org/TR/sparql11-query/#select). + Select { + /// The [query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset). + dataset: Option<QueryDataset>, + /// The query selection graph pattern. + pattern: GraphPattern, + /// The query base IRI. + base_iri: Option<Iri<String>>, + }, + /// [CONSTRUCT](https://www.w3.org/TR/sparql11-query/#construct). + Construct { + /// The query construction template. + template: Vec<TriplePattern>, + /// The [query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset). + dataset: Option<QueryDataset>, + /// The query selection graph pattern. + pattern: GraphPattern, + /// The query base IRI. + base_iri: Option<Iri<String>>, + }, + /// [DESCRIBE](https://www.w3.org/TR/sparql11-query/#describe). + Describe { + /// The [query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset). + dataset: Option<QueryDataset>, + /// The query selection graph pattern. + pattern: GraphPattern, + /// The query base IRI. + base_iri: Option<Iri<String>>, + }, + /// [ASK](https://www.w3.org/TR/sparql11-query/#ask). + Ask { + /// The [query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset). + dataset: Option<QueryDataset>, + /// The query selection graph pattern. + pattern: GraphPattern, + /// The query base IRI. + base_iri: Option<Iri<String>>, + }, +} + +impl Query { + /// Parses a SPARQL query with an optional base IRI to resolve relative IRIs in the query. + pub fn parse(query: &str, base_iri: Option<&str>) -> Result<Self, SparqlSyntaxError> { + parse_query(query, base_iri) + } + + /// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html). + pub fn to_sse(&self) -> String { + let mut buffer = String::new(); + self.fmt_sse(&mut buffer).unwrap(); + buffer + } + + /// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html). + fn fmt_sse(&self, f: &mut impl fmt::Write) -> fmt::Result { + match self { + Self::Select { + dataset, + pattern, + base_iri, + } => { + if let Some(base_iri) = base_iri { + write!(f, "(base <{base_iri}> ")?; + } + if let Some(dataset) = dataset { + f.write_str("(dataset ")?; + dataset.fmt_sse(f)?; + f.write_str(" ")?; + } + pattern.fmt_sse(f)?; + if dataset.is_some() { + f.write_str(")")?; + } + if base_iri.is_some() { + f.write_str(")")?; + } + Ok(()) + } + Self::Construct { + template, + dataset, + pattern, + base_iri, + } => { + if let Some(base_iri) = base_iri { + write!(f, "(base <{base_iri}> ")?; + } + f.write_str("(construct (")?; + for (i, t) in template.iter().enumerate() { + if i > 0 { + f.write_str(" ")?; + } + t.fmt_sse(f)?; + } + f.write_str(") ")?; + if let Some(dataset) = dataset { + f.write_str("(dataset ")?; + dataset.fmt_sse(f)?; + f.write_str(" ")?; + } + pattern.fmt_sse(f)?; + if dataset.is_some() { + f.write_str(")")?; + } + f.write_str(")")?; + if base_iri.is_some() { + f.write_str(")")?; + } + Ok(()) + } + Self::Describe { + dataset, + pattern, + base_iri, + } => { + if let Some(base_iri) = base_iri { + write!(f, "(base <{base_iri}> ")?; + } + f.write_str("(describe ")?; + if let Some(dataset) = dataset { + f.write_str("(dataset ")?; + dataset.fmt_sse(f)?; + f.write_str(" ")?; + } + pattern.fmt_sse(f)?; + if dataset.is_some() { + f.write_str(")")?; + } + f.write_str(")")?; + if base_iri.is_some() { + f.write_str(")")?; + } + Ok(()) + } + Self::Ask { + dataset, + pattern, + base_iri, + } => { + if let Some(base_iri) = base_iri { + write!(f, "(base <{base_iri}> ")?; + } + f.write_str("(ask ")?; + if let Some(dataset) = dataset { + f.write_str("(dataset ")?; + dataset.fmt_sse(f)?; + f.write_str(" ")?; + } + pattern.fmt_sse(f)?; + if dataset.is_some() { + f.write_str(")")?; + } + f.write_str(")")?; + if base_iri.is_some() { + f.write_str(")")?; + } + Ok(()) + } + } + } +} + +impl fmt::Display for Query { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Select { + dataset, + pattern, + base_iri, + } => { + if let Some(base_iri) = base_iri { + writeln!(f, "BASE <{base_iri}>")?; + } + write!( + f, + "{}", + SparqlGraphRootPattern { + pattern, + dataset: dataset.as_ref() + } + ) + } + Self::Construct { + template, + dataset, + pattern, + base_iri, + } => { + if let Some(base_iri) = base_iri { + writeln!(f, "BASE <{base_iri}>")?; + } + f.write_str("CONSTRUCT { ")?; + for triple in template { + write!(f, "{triple} . ")?; + } + f.write_str("}")?; + if let Some(dataset) = dataset { + dataset.fmt(f)?; + } + write!( + f, + " WHERE {{ {} }}", + SparqlGraphRootPattern { + pattern, + dataset: None + } + ) + } + Self::Describe { + dataset, + pattern, + base_iri, + } => { + if let Some(base_iri) = base_iri { + writeln!(f, "BASE <{}>", base_iri.as_str())?; + } + f.write_str("DESCRIBE *")?; + if let Some(dataset) = dataset { + dataset.fmt(f)?; + } + write!( + f, + " WHERE {{ {} }}", + SparqlGraphRootPattern { + pattern, + dataset: None + } + ) + } + Self::Ask { + dataset, + pattern, + base_iri, + } => { + if let Some(base_iri) = base_iri { + writeln!(f, "BASE <{base_iri}>")?; + } + f.write_str("ASK")?; + if let Some(dataset) = dataset { + dataset.fmt(f)?; + } + write!( + f, + " WHERE {{ {} }}", + SparqlGraphRootPattern { + pattern, + dataset: None + } + ) + } + } + } +} + +impl FromStr for Query { + type Err = SparqlSyntaxError; + + fn from_str(query: &str) -> Result<Self, Self::Err> { + Self::parse(query, None) + } +} + +impl<'a> TryFrom<&'a str> for Query { + type Error = SparqlSyntaxError; + + fn try_from(query: &str) -> Result<Self, Self::Error> { + Self::from_str(query) + } +} + +impl<'a> TryFrom<&'a String> for Query { + type Error = SparqlSyntaxError; + + fn try_from(query: &String) -> Result<Self, Self::Error> { + Self::from_str(query) + } +} diff --git a/ng-oxigraph/src/spargebra/term.rs b/ng-oxigraph/src/spargebra/term.rs new file mode 100644 index 0000000..b4ec630 --- /dev/null +++ b/ng-oxigraph/src/spargebra/term.rs @@ -0,0 +1,1012 @@ +//! Data structures for [RDF 1.1 Concepts](https://www.w3.org/TR/rdf11-concepts/) like IRI, literal or triples. + +pub use crate::oxrdf::{BlankNode, Literal, NamedNode, Subject, Term, Triple, Variable}; +use std::fmt; +use std::fmt::Write; + +/// The union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) and [triples](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple). +/// +/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation. +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum GroundSubject { + NamedNode(NamedNode), + #[cfg(feature = "rdf-star")] + Triple(Box<GroundTriple>), +} + +impl fmt::Display for GroundSubject { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::NamedNode(node) => node.fmt(f), + #[cfg(feature = "rdf-star")] + Self::Triple(triple) => write!( + f, + "<<{} {} {}>>", + triple.subject, triple.predicate, triple.object + ), + } + } +} + +impl From<NamedNode> for GroundSubject { + #[inline] + fn from(node: NamedNode) -> Self { + Self::NamedNode(node) + } +} + +#[cfg(feature = "rdf-star")] +impl From<GroundTriple> for GroundSubject { + #[inline] + fn from(triple: GroundTriple) -> Self { + Self::Triple(Box::new(triple)) + } +} + +impl TryFrom<Subject> for GroundSubject { + type Error = (); + + #[inline] + fn try_from(subject: Subject) -> Result<Self, Self::Error> { + match subject { + Subject::NamedNode(t) => Ok(t.into()), + Subject::BlankNode(_) => Err(()), + #[cfg(feature = "rdf-star")] + Subject::Triple(t) => Ok(GroundTriple::try_from(*t)?.into()), + } + } +} + +impl TryFrom<GroundTerm> for GroundSubject { + type Error = (); + + #[inline] + fn try_from(term: GroundTerm) -> Result<Self, Self::Error> { + match term { + GroundTerm::NamedNode(t) => Ok(t.into()), + GroundTerm::Literal(_) => Err(()), + #[cfg(feature = "rdf-star")] + GroundTerm::Triple(t) => Ok((*t).into()), + } + } +} + +/// The union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [literals](https://www.w3.org/TR/rdf11-concepts/#dfn-literal) and [triples](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple). +/// +/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation. +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum GroundTerm { + NamedNode(NamedNode), + Literal(Literal), + #[cfg(feature = "rdf-star")] + Triple(Box<GroundTriple>), +} + +impl fmt::Display for GroundTerm { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::NamedNode(node) => node.fmt(f), + Self::Literal(literal) => literal.fmt(f), + #[cfg(feature = "rdf-star")] + Self::Triple(triple) => write!( + f, + "<<{} {} {}>>", + triple.subject, triple.predicate, triple.object + ), + } + } +} + +impl From<NamedNode> for GroundTerm { + #[inline] + fn from(node: NamedNode) -> Self { + Self::NamedNode(node) + } +} + +impl From<Literal> for GroundTerm { + #[inline] + fn from(literal: Literal) -> Self { + Self::Literal(literal) + } +} + +#[cfg(feature = "rdf-star")] +impl From<GroundTriple> for GroundTerm { + #[inline] + fn from(triple: GroundTriple) -> Self { + Self::Triple(Box::new(triple)) + } +} + +impl TryFrom<Term> for GroundTerm { + type Error = (); + + #[inline] + fn try_from(term: Term) -> Result<Self, Self::Error> { + match term { + Term::NamedNode(t) => Ok(t.into()), + Term::BlankNode(_) => Err(()), + Term::Literal(t) => Ok(t.into()), + #[cfg(feature = "rdf-star")] + Term::Triple(t) => Ok(GroundTriple::try_from(*t)?.into()), + } + } +} + +/// A [RDF triple](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) without blank nodes. +/// +/// The default string formatter is returning a N-Quads representation. +/// +/// ``` +/// use spargebra::term::{GroundTriple, NamedNode}; +/// +/// assert_eq!( +/// "<http://example.com/s> <http://example.com/p> <http://example.com/o>", +/// GroundTriple { +/// subject: NamedNode::new("http://example.com/s")?.into(), +/// predicate: NamedNode::new("http://example.com/p")?, +/// object: NamedNode::new("http://example.com/o")?.into(), +/// } +/// .to_string() +/// ); +/// # Result::<_,oxrdf::IriParseError>::Ok(()) +/// ``` +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub struct GroundTriple { + pub subject: GroundSubject, + pub predicate: NamedNode, + pub object: GroundTerm, +} + +impl fmt::Display for GroundTriple { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} {} {}", self.subject, self.predicate, self.object) + } +} + +impl TryFrom<Triple> for GroundTriple { + type Error = (); + + #[inline] + fn try_from(triple: Triple) -> Result<Self, Self::Error> { + Ok(Self { + subject: triple.subject.try_into()?, + predicate: triple.predicate, + object: triple.object.try_into()?, + }) + } +} + +/// A possible graph name. +/// +/// It is the union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) and the [default graph name](https://www.w3.org/TR/rdf11-concepts/#dfn-default-graph). +#[derive(Eq, PartialEq, Debug, Clone, Hash, Default)] +pub enum GraphName { + NamedNode(NamedNode), + #[default] + DefaultGraph, +} + +impl GraphName { + /// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html). + pub(crate) fn fmt_sse(&self, f: &mut impl Write) -> fmt::Result { + match self { + Self::NamedNode(node) => write!(f, "{node}"), + Self::DefaultGraph => f.write_str("default"), + } + } +} + +impl fmt::Display for GraphName { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::NamedNode(node) => node.fmt(f), + Self::DefaultGraph => f.write_str("DEFAULT"), + } + } +} + +impl From<NamedNode> for GraphName { + #[inline] + fn from(node: NamedNode) -> Self { + Self::NamedNode(node) + } +} + +impl TryFrom<GraphNamePattern> for GraphName { + type Error = (); + + #[inline] + fn try_from(pattern: GraphNamePattern) -> Result<Self, Self::Error> { + match pattern { + GraphNamePattern::NamedNode(t) => Ok(t.into()), + GraphNamePattern::DefaultGraph => Ok(Self::DefaultGraph), + GraphNamePattern::Variable(_) => Err(()), + } + } +} + +/// A [RDF triple](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) in an [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset). +/// +/// The default string formatter is returning a N-Quads representation. +/// +/// ``` +/// use spargebra::term::{NamedNode, Quad}; +/// +/// assert_eq!( +/// "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g>", +/// Quad { +/// subject: NamedNode::new("http://example.com/s")?.into(), +/// predicate: NamedNode::new("http://example.com/p")?, +/// object: NamedNode::new("http://example.com/o")?.into(), +/// graph_name: NamedNode::new("http://example.com/g")?.into(), +/// }.to_string() +/// ); +/// # Result::<_,oxrdf::IriParseError>::Ok(()) +/// ``` +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub struct Quad { + pub subject: Subject, + pub predicate: NamedNode, + pub object: Term, + pub graph_name: GraphName, +} + +impl Quad { + /// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html). + pub(crate) fn fmt_sse(&self, f: &mut impl Write) -> fmt::Result { + if self.graph_name != GraphName::DefaultGraph { + f.write_str("(graph ")?; + self.graph_name.fmt_sse(f)?; + f.write_str(" (")?; + } + write!( + f, + "(triple {} {} {})", + self.subject, self.predicate, self.object + )?; + if self.graph_name != GraphName::DefaultGraph { + f.write_str("))")?; + } + Ok(()) + } +} + +impl fmt::Display for Quad { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.graph_name == GraphName::DefaultGraph { + write!(f, "{} {} {}", self.subject, self.predicate, self.object) + } else { + write!( + f, + "{} {} {} {}", + self.subject, self.predicate, self.object, self.graph_name + ) + } + } +} + +impl TryFrom<QuadPattern> for Quad { + type Error = (); + + #[inline] + fn try_from(quad: QuadPattern) -> Result<Self, Self::Error> { + Ok(Self { + subject: quad.subject.try_into()?, + predicate: quad.predicate.try_into()?, + object: quad.object.try_into()?, + graph_name: quad.graph_name.try_into()?, + }) + } +} + +/// A [RDF triple](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) in an [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) without blank nodes. +/// +/// The default string formatter is returning a N-Quads representation. +/// +/// ``` +/// use spargebra::term::{NamedNode, GroundQuad}; +/// +/// assert_eq!( +/// "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g>", +/// GroundQuad { +/// subject: NamedNode::new("http://example.com/s")?.into(), +/// predicate: NamedNode::new("http://example.com/p")?, +/// object: NamedNode::new("http://example.com/o")?.into(), +/// graph_name: NamedNode::new("http://example.com/g")?.into(), +/// }.to_string() +/// ); +/// # Result::<_,oxrdf::IriParseError>::Ok(()) +/// ``` +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub struct GroundQuad { + pub subject: GroundSubject, + pub predicate: NamedNode, + pub object: GroundTerm, + pub graph_name: GraphName, +} + +impl GroundQuad { + /// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html). + pub(crate) fn fmt_sse(&self, f: &mut impl Write) -> fmt::Result { + if self.graph_name != GraphName::DefaultGraph { + f.write_str("(graph ")?; + self.graph_name.fmt_sse(f)?; + f.write_str(" (")?; + } + write!( + f, + "(triple {} {} {})", + self.subject, self.predicate, self.object + )?; + if self.graph_name != GraphName::DefaultGraph { + f.write_str("))")?; + } + Ok(()) + } +} + +impl fmt::Display for GroundQuad { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.graph_name == GraphName::DefaultGraph { + write!(f, "{} {} {}", self.subject, self.predicate, self.object) + } else { + write!( + f, + "{} {} {} {}", + self.subject, self.predicate, self.object, self.graph_name + ) + } + } +} + +impl TryFrom<Quad> for GroundQuad { + type Error = (); + + #[inline] + fn try_from(quad: Quad) -> Result<Self, Self::Error> { + Ok(Self { + subject: quad.subject.try_into()?, + predicate: quad.predicate, + object: quad.object.try_into()?, + graph_name: quad.graph_name, + }) + } +} + +/// The union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) and [variables](https://www.w3.org/TR/sparql11-query/#sparqlQueryVariables). +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum NamedNodePattern { + NamedNode(NamedNode), + Variable(Variable), +} + +impl NamedNodePattern { + /// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html). + pub(crate) fn fmt_sse(&self, f: &mut impl Write) -> fmt::Result { + match self { + Self::NamedNode(node) => write!(f, "{node}"), + Self::Variable(var) => write!(f, "{var}"), + } + } +} + +impl fmt::Display for NamedNodePattern { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::NamedNode(node) => node.fmt(f), + Self::Variable(var) => var.fmt(f), + } + } +} + +impl From<NamedNode> for NamedNodePattern { + #[inline] + fn from(node: NamedNode) -> Self { + Self::NamedNode(node) + } +} + +impl From<Variable> for NamedNodePattern { + #[inline] + fn from(var: Variable) -> Self { + Self::Variable(var) + } +} + +impl TryFrom<NamedNodePattern> for NamedNode { + type Error = (); + + #[inline] + fn try_from(pattern: NamedNodePattern) -> Result<Self, Self::Error> { + match pattern { + NamedNodePattern::NamedNode(t) => Ok(t), + NamedNodePattern::Variable(_) => Err(()), + } + } +} + +/// The union of [terms](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-term) and [variables](https://www.w3.org/TR/sparql11-query/#sparqlQueryVariables). +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum TermPattern { + NamedNode(NamedNode), + BlankNode(BlankNode), + Literal(Literal), + #[cfg(feature = "rdf-star")] + Triple(Box<TriplePattern>), + Variable(Variable), +} + +impl TermPattern { + /// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html). + pub(crate) fn fmt_sse(&self, f: &mut impl Write) -> fmt::Result { + match self { + Self::NamedNode(term) => write!(f, "{term}"), + Self::BlankNode(term) => write!(f, "{term}"), + Self::Literal(term) => write!(f, "{term}"), + #[cfg(feature = "rdf-star")] + Self::Triple(triple) => triple.fmt_sse(f), + Self::Variable(var) => write!(f, "{var}"), + } + } +} + +impl fmt::Display for TermPattern { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::NamedNode(term) => term.fmt(f), + Self::BlankNode(term) => term.fmt(f), + Self::Literal(term) => term.fmt(f), + #[cfg(feature = "rdf-star")] + Self::Triple(triple) => write!(f, "<<{triple}>>"), + Self::Variable(var) => var.fmt(f), + } + } +} + +impl From<NamedNode> for TermPattern { + #[inline] + fn from(node: NamedNode) -> Self { + Self::NamedNode(node) + } +} + +impl From<BlankNode> for TermPattern { + #[inline] + fn from(node: BlankNode) -> Self { + Self::BlankNode(node) + } +} + +impl From<Literal> for TermPattern { + #[inline] + fn from(literal: Literal) -> Self { + Self::Literal(literal) + } +} + +#[cfg(feature = "rdf-star")] +impl From<TriplePattern> for TermPattern { + #[inline] + fn from(triple: TriplePattern) -> Self { + Self::Triple(Box::new(triple)) + } +} + +impl From<Variable> for TermPattern { + fn from(var: Variable) -> Self { + Self::Variable(var) + } +} + +impl From<Subject> for TermPattern { + #[inline] + fn from(subject: Subject) -> Self { + match subject { + Subject::NamedNode(node) => node.into(), + Subject::BlankNode(node) => node.into(), + #[cfg(feature = "rdf-star")] + Subject::Triple(t) => TriplePattern::from(*t).into(), + } + } +} + +impl From<Term> for TermPattern { + #[inline] + fn from(term: Term) -> Self { + match term { + Term::NamedNode(node) => node.into(), + Term::BlankNode(node) => node.into(), + Term::Literal(literal) => literal.into(), + #[cfg(feature = "rdf-star")] + Term::Triple(t) => TriplePattern::from(*t).into(), + } + } +} + +impl From<NamedNodePattern> for TermPattern { + #[inline] + fn from(element: NamedNodePattern) -> Self { + match element { + NamedNodePattern::NamedNode(node) => node.into(), + NamedNodePattern::Variable(var) => var.into(), + } + } +} + +impl From<GroundTermPattern> for TermPattern { + #[inline] + fn from(element: GroundTermPattern) -> Self { + match element { + GroundTermPattern::NamedNode(node) => node.into(), + GroundTermPattern::Literal(literal) => literal.into(), + #[cfg(feature = "rdf-star")] + GroundTermPattern::Triple(t) => TriplePattern::from(*t).into(), + GroundTermPattern::Variable(variable) => variable.into(), + } + } +} + +impl TryFrom<TermPattern> for Subject { + type Error = (); + + #[inline] + fn try_from(term: TermPattern) -> Result<Self, Self::Error> { + match term { + TermPattern::NamedNode(t) => Ok(t.into()), + TermPattern::BlankNode(t) => Ok(t.into()), + #[cfg(feature = "rdf-star")] + TermPattern::Triple(t) => Ok(Triple::try_from(*t)?.into()), + TermPattern::Literal(_) | TermPattern::Variable(_) => Err(()), + } + } +} + +impl TryFrom<TermPattern> for Term { + type Error = (); + + #[inline] + fn try_from(pattern: TermPattern) -> Result<Self, Self::Error> { + match pattern { + TermPattern::NamedNode(t) => Ok(t.into()), + TermPattern::BlankNode(t) => Ok(t.into()), + TermPattern::Literal(t) => Ok(t.into()), + #[cfg(feature = "rdf-star")] + TermPattern::Triple(t) => Ok(Triple::try_from(*t)?.into()), + TermPattern::Variable(_) => Err(()), + } + } +} +/// The union of [terms](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-term) and [variables](https://www.w3.org/TR/sparql11-query/#sparqlQueryVariables) without blank nodes. +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum GroundTermPattern { + NamedNode(NamedNode), + Literal(Literal), + Variable(Variable), + #[cfg(feature = "rdf-star")] + Triple(Box<GroundTriplePattern>), +} + +impl GroundTermPattern { + /// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html). + pub(crate) fn fmt_sse(&self, f: &mut impl Write) -> fmt::Result { + match self { + Self::NamedNode(term) => write!(f, "{term}"), + Self::Literal(term) => write!(f, "{term}"), + Self::Variable(var) => write!(f, "{var}"), + #[cfg(feature = "rdf-star")] + Self::Triple(triple) => triple.fmt_sse(f), + } + } +} + +impl fmt::Display for GroundTermPattern { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::NamedNode(term) => term.fmt(f), + Self::Literal(term) => term.fmt(f), + Self::Variable(var) => var.fmt(f), + #[cfg(feature = "rdf-star")] + Self::Triple(triple) => write!(f, "<<{triple}>>"), + } + } +} + +impl From<NamedNode> for GroundTermPattern { + #[inline] + fn from(node: NamedNode) -> Self { + Self::NamedNode(node) + } +} + +impl From<Literal> for GroundTermPattern { + #[inline] + fn from(literal: Literal) -> Self { + Self::Literal(literal) + } +} + +#[cfg(feature = "rdf-star")] +impl From<GroundTriplePattern> for GroundTermPattern { + #[inline] + fn from(triple: GroundTriplePattern) -> Self { + Self::Triple(Box::new(triple)) + } +} + +impl From<Variable> for GroundTermPattern { + #[inline] + fn from(var: Variable) -> Self { + Self::Variable(var) + } +} + +impl From<GroundSubject> for GroundTermPattern { + #[inline] + fn from(term: GroundSubject) -> Self { + match term { + GroundSubject::NamedNode(node) => node.into(), + #[cfg(feature = "rdf-star")] + GroundSubject::Triple(triple) => GroundTriplePattern::from(*triple).into(), + } + } +} +impl From<GroundTerm> for GroundTermPattern { + #[inline] + fn from(term: GroundTerm) -> Self { + match term { + GroundTerm::NamedNode(node) => node.into(), + GroundTerm::Literal(literal) => literal.into(), + #[cfg(feature = "rdf-star")] + GroundTerm::Triple(triple) => GroundTriplePattern::from(*triple).into(), + } + } +} + +impl From<NamedNodePattern> for GroundTermPattern { + #[inline] + fn from(element: NamedNodePattern) -> Self { + match element { + NamedNodePattern::NamedNode(node) => node.into(), + NamedNodePattern::Variable(var) => var.into(), + } + } +} + +impl TryFrom<TermPattern> for GroundTermPattern { + type Error = (); + + #[inline] + fn try_from(pattern: TermPattern) -> Result<Self, Self::Error> { + Ok(match pattern { + TermPattern::NamedNode(named_node) => named_node.into(), + TermPattern::BlankNode(_) => return Err(()), + TermPattern::Literal(literal) => literal.into(), + #[cfg(feature = "rdf-star")] + TermPattern::Triple(triple) => GroundTriplePattern::try_from(*triple)?.into(), + TermPattern::Variable(variable) => variable.into(), + }) + } +} + +/// The union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [default graph name](https://www.w3.org/TR/rdf11-concepts/#dfn-default-graph) and [variables](https://www.w3.org/TR/sparql11-query/#sparqlQueryVariables). +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum GraphNamePattern { + NamedNode(NamedNode), + DefaultGraph, + Variable(Variable), +} + +impl GraphNamePattern { + /// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html). + pub(crate) fn fmt_sse(&self, f: &mut impl Write) -> fmt::Result { + match self { + Self::NamedNode(node) => write!(f, "{node}"), + Self::DefaultGraph => f.write_str("default"), + Self::Variable(var) => write!(f, "{var}"), + } + } +} + +impl fmt::Display for GraphNamePattern { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::NamedNode(node) => node.fmt(f), + Self::DefaultGraph => f.write_str("DEFAULT"), + Self::Variable(var) => var.fmt(f), + } + } +} + +impl From<NamedNode> for GraphNamePattern { + #[inline] + fn from(node: NamedNode) -> Self { + Self::NamedNode(node) + } +} + +impl From<Variable> for GraphNamePattern { + #[inline] + fn from(var: Variable) -> Self { + Self::Variable(var) + } +} + +impl From<GraphName> for GraphNamePattern { + #[inline] + fn from(graph_name: GraphName) -> Self { + match graph_name { + GraphName::NamedNode(node) => node.into(), + GraphName::DefaultGraph => Self::DefaultGraph, + } + } +} + +impl From<NamedNodePattern> for GraphNamePattern { + #[inline] + fn from(graph_name: NamedNodePattern) -> Self { + match graph_name { + NamedNodePattern::NamedNode(node) => node.into(), + NamedNodePattern::Variable(var) => var.into(), + } + } +} + +/// A [triple pattern](https://www.w3.org/TR/sparql11-query/#defn_TriplePattern) +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub struct TriplePattern { + pub subject: TermPattern, + pub predicate: NamedNodePattern, + pub object: TermPattern, +} + +impl TriplePattern { + pub(crate) fn new( + subject: impl Into<TermPattern>, + predicate: impl Into<NamedNodePattern>, + object: impl Into<TermPattern>, + ) -> Self { + Self { + subject: subject.into(), + predicate: predicate.into(), + object: object.into(), + } + } + + /// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html). + pub(crate) fn fmt_sse(&self, f: &mut impl Write) -> fmt::Result { + f.write_str("(triple ")?; + self.subject.fmt_sse(f)?; + f.write_str(" ")?; + self.predicate.fmt_sse(f)?; + f.write_str(" ")?; + self.object.fmt_sse(f)?; + f.write_str(")") + } +} + +impl fmt::Display for TriplePattern { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} {} {}", self.subject, self.predicate, self.object) + } +} + +impl From<Triple> for TriplePattern { + #[inline] + fn from(triple: Triple) -> Self { + Self { + subject: triple.subject.into(), + predicate: triple.predicate.into(), + object: triple.object.into(), + } + } +} + +impl From<GroundTriplePattern> for TriplePattern { + #[inline] + fn from(triple: GroundTriplePattern) -> Self { + Self { + subject: triple.subject.into(), + predicate: triple.predicate, + object: triple.object.into(), + } + } +} + +impl TryFrom<TriplePattern> for Triple { + type Error = (); + + #[inline] + fn try_from(triple: TriplePattern) -> Result<Self, Self::Error> { + Ok(Self { + subject: triple.subject.try_into()?, + predicate: triple.predicate.try_into()?, + object: triple.object.try_into()?, + }) + } +} + +/// A [triple pattern](https://www.w3.org/TR/sparql11-query/#defn_TriplePattern) without blank nodes. +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub struct GroundTriplePattern { + pub subject: GroundTermPattern, + pub predicate: NamedNodePattern, + pub object: GroundTermPattern, +} + +impl GroundTriplePattern { + /// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html). + #[allow(dead_code)] + pub(crate) fn fmt_sse(&self, f: &mut impl Write) -> fmt::Result { + f.write_str("(triple ")?; + self.subject.fmt_sse(f)?; + f.write_str(" ")?; + self.predicate.fmt_sse(f)?; + f.write_str(" ")?; + self.object.fmt_sse(f)?; + f.write_str(")") + } +} + +impl fmt::Display for GroundTriplePattern { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} {} {}", self.subject, self.predicate, self.object) + } +} + +impl From<GroundTriple> for GroundTriplePattern { + #[inline] + fn from(triple: GroundTriple) -> Self { + Self { + subject: triple.subject.into(), + predicate: triple.predicate.into(), + object: triple.object.into(), + } + } +} + +impl TryFrom<TriplePattern> for GroundTriplePattern { + type Error = (); + + #[inline] + fn try_from(triple: TriplePattern) -> Result<Self, Self::Error> { + Ok(Self { + subject: triple.subject.try_into()?, + predicate: triple.predicate, + object: triple.object.try_into()?, + }) + } +} + +/// A [triple pattern](https://www.w3.org/TR/sparql11-query/#defn_TriplePattern) in a specific graph +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub struct QuadPattern { + pub subject: TermPattern, + pub predicate: NamedNodePattern, + pub object: TermPattern, + pub graph_name: GraphNamePattern, +} + +impl QuadPattern { + pub(crate) fn new( + subject: impl Into<TermPattern>, + predicate: impl Into<NamedNodePattern>, + object: impl Into<TermPattern>, + graph_name: impl Into<GraphNamePattern>, + ) -> Self { + Self { + subject: subject.into(), + predicate: predicate.into(), + object: object.into(), + graph_name: graph_name.into(), + } + } + + /// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html). + pub(crate) fn fmt_sse(&self, f: &mut impl Write) -> fmt::Result { + if self.graph_name != GraphNamePattern::DefaultGraph { + f.write_str("(graph ")?; + self.graph_name.fmt_sse(f)?; + f.write_str(" (")?; + } + f.write_str("(triple ")?; + self.subject.fmt_sse(f)?; + f.write_str(" ")?; + self.predicate.fmt_sse(f)?; + f.write_str(" ")?; + self.object.fmt_sse(f)?; + f.write_str(")")?; + if self.graph_name != GraphNamePattern::DefaultGraph { + f.write_str("))")?; + } + Ok(()) + } +} + +impl fmt::Display for QuadPattern { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.graph_name == GraphNamePattern::DefaultGraph { + write!(f, "{} {} {}", self.subject, self.predicate, self.object) + } else { + write!( + f, + "GRAPH {} {{ {} {} {} }}", + self.graph_name, self.subject, self.predicate, self.object + ) + } + } +} + +/// A [triple pattern](https://www.w3.org/TR/sparql11-query/#defn_TriplePattern) in a specific graph without blank nodes. +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub struct GroundQuadPattern { + pub subject: GroundTermPattern, + pub predicate: NamedNodePattern, + pub object: GroundTermPattern, + pub graph_name: GraphNamePattern, +} + +impl GroundQuadPattern { + /// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html). + pub(crate) fn fmt_sse(&self, f: &mut impl Write) -> fmt::Result { + if self.graph_name != GraphNamePattern::DefaultGraph { + f.write_str("(graph ")?; + self.graph_name.fmt_sse(f)?; + f.write_str(" (")?; + } + f.write_str("(triple ")?; + self.subject.fmt_sse(f)?; + f.write_str(" ")?; + self.predicate.fmt_sse(f)?; + f.write_str(" ")?; + self.object.fmt_sse(f)?; + f.write_str(")")?; + if self.graph_name != GraphNamePattern::DefaultGraph { + f.write_str("))")?; + } + Ok(()) + } +} + +impl fmt::Display for GroundQuadPattern { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.graph_name == GraphNamePattern::DefaultGraph { + write!(f, "{} {} {}", self.subject, self.predicate, self.object) + } else { + write!( + f, + "GRAPH {} {{ {} {} {} }}", + self.graph_name, self.subject, self.predicate, self.object + ) + } + } +} + +impl TryFrom<QuadPattern> for GroundQuadPattern { + type Error = (); + + #[inline] + fn try_from(pattern: QuadPattern) -> Result<Self, Self::Error> { + Ok(Self { + subject: pattern.subject.try_into()?, + predicate: pattern.predicate, + object: pattern.object.try_into()?, + graph_name: pattern.graph_name, + }) + } +} diff --git a/ng-oxigraph/src/spargebra/update.rs b/ng-oxigraph/src/spargebra/update.rs new file mode 100644 index 0000000..c13887d --- /dev/null +++ b/ng-oxigraph/src/spargebra/update.rs @@ -0,0 +1,344 @@ +use crate::spargebra::algebra::*; +use crate::spargebra::parser::{parse_update, SparqlSyntaxError}; +use crate::spargebra::term::*; +use oxiri::Iri; +use std::fmt; +use std::str::FromStr; + +/// A parsed [SPARQL update](https://www.w3.org/TR/sparql11-update/). +/// +/// ``` +/// use spargebra::Update; +/// +/// let update_str = "CLEAR ALL ;"; +/// let update = Update::parse(update_str, None)?; +/// assert_eq!(update.to_string().trim(), update_str); +/// assert_eq!(update.to_sse(), "(update (clear all))"); +/// # Ok::<_, spargebra::SparqlSyntaxError>(()) +/// ``` +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub struct Update { + /// The update base IRI. + pub base_iri: Option<Iri<String>>, + /// The [update operations](https://www.w3.org/TR/sparql11-update/#formalModelGraphUpdate). + pub operations: Vec<GraphUpdateOperation>, +} + +impl Update { + /// Parses a SPARQL update with an optional base IRI to resolve relative IRIs in the query. + pub fn parse(update: &str, base_iri: Option<&str>) -> Result<Self, SparqlSyntaxError> { + parse_update(update, base_iri) + } + + /// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html). + pub fn to_sse(&self) -> String { + let mut buffer = String::new(); + self.fmt_sse(&mut buffer).unwrap(); + buffer + } + + /// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html). + fn fmt_sse(&self, f: &mut impl fmt::Write) -> fmt::Result { + if let Some(base_iri) = &self.base_iri { + write!(f, "(base <{base_iri}> ")?; + } + f.write_str("(update")?; + for op in &self.operations { + f.write_str(" ")?; + op.fmt_sse(f)?; + } + f.write_str(")")?; + if self.base_iri.is_some() { + f.write_str(")")?; + } + Ok(()) + } +} + +impl fmt::Display for Update { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(base_iri) = &self.base_iri { + writeln!(f, "BASE <{base_iri}>")?; + } + for update in &self.operations { + writeln!(f, "{update} ;")?; + } + Ok(()) + } +} + +impl FromStr for Update { + type Err = SparqlSyntaxError; + + fn from_str(update: &str) -> Result<Self, Self::Err> { + Self::parse(update, None) + } +} + +impl<'a> TryFrom<&'a str> for Update { + type Error = SparqlSyntaxError; + + fn try_from(update: &str) -> Result<Self, Self::Error> { + Self::from_str(update) + } +} + +impl<'a> TryFrom<&'a String> for Update { + type Error = SparqlSyntaxError; + + fn try_from(update: &String) -> Result<Self, Self::Error> { + Self::from_str(update) + } +} + +/// The [graph update operations](https://www.w3.org/TR/sparql11-update/#formalModelGraphUpdate). +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum GraphUpdateOperation { + /// [insert data](https://www.w3.org/TR/sparql11-update/#defn_insertDataOperation). + InsertData { data: Vec<Quad> }, + /// [delete data](https://www.w3.org/TR/sparql11-update/#defn_deleteDataOperation). + DeleteData { data: Vec<GroundQuad> }, + /// [delete insert](https://www.w3.org/TR/sparql11-update/#defn_deleteInsertOperation). + DeleteInsert { + delete: Vec<GroundQuadPattern>, + insert: Vec<QuadPattern>, + using: Option<QueryDataset>, + pattern: Box<GraphPattern>, + }, + /// [load](https://www.w3.org/TR/sparql11-update/#defn_loadOperation). + Load { + silent: bool, + source: NamedNode, + destination: GraphName, + }, + /// [clear](https://www.w3.org/TR/sparql11-update/#defn_clearOperation). + Clear { silent: bool, graph: GraphTarget }, + /// [create](https://www.w3.org/TR/sparql11-update/#defn_createOperation). + Create { silent: bool, graph: NamedNode }, + /// [drop](https://www.w3.org/TR/sparql11-update/#defn_dropOperation). + Drop { silent: bool, graph: GraphTarget }, +} + +impl GraphUpdateOperation { + /// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html). + fn fmt_sse(&self, f: &mut impl fmt::Write) -> fmt::Result { + match self { + Self::InsertData { data } => { + f.write_str("(insertData (")?; + for (i, t) in data.iter().enumerate() { + if i > 0 { + f.write_str(" ")?; + } + t.fmt_sse(f)?; + } + f.write_str("))") + } + Self::DeleteData { data } => { + f.write_str("(deleteData (")?; + for (i, t) in data.iter().enumerate() { + if i > 0 { + f.write_str(" ")?; + } + t.fmt_sse(f)?; + } + f.write_str("))") + } + Self::DeleteInsert { + delete, + insert, + using, + pattern, + } => { + f.write_str("(modify ")?; + if let Some(using) = using { + f.write_str(" (using ")?; + using.fmt_sse(f)?; + f.write_str(" ")?; + pattern.fmt_sse(f)?; + f.write_str(")")?; + } else { + pattern.fmt_sse(f)?; + } + if !delete.is_empty() { + f.write_str(" (delete (")?; + for (i, t) in delete.iter().enumerate() { + if i > 0 { + f.write_str(" ")?; + } + t.fmt_sse(f)?; + } + f.write_str("))")?; + } + if !insert.is_empty() { + f.write_str(" (insert (")?; + for (i, t) in insert.iter().enumerate() { + if i > 0 { + f.write_str(" ")?; + } + t.fmt_sse(f)?; + } + f.write_str("))")?; + } + f.write_str(")") + } + Self::Load { + silent, + source, + destination, + } => { + f.write_str("(load ")?; + if *silent { + f.write_str("silent ")?; + } + write!(f, "{source} ")?; + destination.fmt_sse(f)?; + f.write_str(")") + } + Self::Clear { silent, graph } => { + f.write_str("(clear ")?; + if *silent { + f.write_str("silent ")?; + } + graph.fmt_sse(f)?; + f.write_str(")") + } + Self::Create { silent, graph } => { + f.write_str("(create ")?; + if *silent { + f.write_str("silent ")?; + } + write!(f, "{graph})") + } + Self::Drop { silent, graph } => { + f.write_str("(drop ")?; + if *silent { + f.write_str("silent ")?; + } + graph.fmt_sse(f)?; + f.write_str(")") + } + } + } +} + +impl fmt::Display for GraphUpdateOperation { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::InsertData { data } => { + writeln!(f, "INSERT DATA {{")?; + write_quads(data, f)?; + f.write_str("}") + } + Self::DeleteData { data } => { + writeln!(f, "DELETE DATA {{")?; + write_ground_quads(data, f)?; + f.write_str("}") + } + Self::DeleteInsert { + delete, + insert, + using, + pattern, + } => { + if !delete.is_empty() { + writeln!(f, "DELETE {{")?; + for quad in delete { + writeln!(f, "\t{quad} .")?; + } + writeln!(f, "}}")?; + } + if !insert.is_empty() { + writeln!(f, "INSERT {{")?; + for quad in insert { + writeln!(f, "\t{quad} .")?; + } + writeln!(f, "}}")?; + } + if let Some(using) = using { + for g in &using.default { + writeln!(f, "USING {g}")?; + } + if let Some(named) = &using.named { + for g in named { + writeln!(f, "USING NAMED {g}")?; + } + } + } + write!( + f, + "WHERE {{ {} }}", + SparqlGraphRootPattern { + pattern, + dataset: None + } + ) + } + Self::Load { + silent, + source, + destination, + } => { + f.write_str("LOAD ")?; + if *silent { + f.write_str("SILENT ")?; + } + write!(f, "{source}")?; + if destination != &GraphName::DefaultGraph { + write!(f, " INTO GRAPH {destination}")?; + } + Ok(()) + } + Self::Clear { silent, graph } => { + f.write_str("CLEAR ")?; + if *silent { + f.write_str("SILENT ")?; + } + write!(f, "{graph}") + } + Self::Create { silent, graph } => { + f.write_str("CREATE ")?; + if *silent { + f.write_str("SILENT ")?; + } + write!(f, "GRAPH {graph}") + } + Self::Drop { silent, graph } => { + f.write_str("DROP ")?; + if *silent { + f.write_str("SILENT ")?; + } + write!(f, "{graph}") + } + } + } +} + +fn write_quads(quads: &[Quad], f: &mut fmt::Formatter<'_>) -> fmt::Result { + for quad in quads { + if quad.graph_name == GraphName::DefaultGraph { + writeln!(f, "\t{} {} {} .", quad.subject, quad.predicate, quad.object)?; + } else { + writeln!( + f, + "\tGRAPH {} {{ {} {} {} }}", + quad.graph_name, quad.subject, quad.predicate, quad.object + )?; + } + } + Ok(()) +} + +fn write_ground_quads(quads: &[GroundQuad], f: &mut fmt::Formatter<'_>) -> fmt::Result { + for quad in quads { + if quad.graph_name == GraphName::DefaultGraph { + writeln!(f, "\t{} {} {} .", quad.subject, quad.predicate, quad.object)?; + } else { + writeln!( + f, + "\tGRAPH {} {{ {} {} {} }}", + quad.graph_name, quad.subject, quad.predicate, quad.object + )?; + } + } + Ok(()) +} diff --git a/ng-oxigraph/src/sparopt/README.md b/ng-oxigraph/src/sparopt/README.md new file mode 100644 index 0000000..1a6e1c6 --- /dev/null +++ b/ng-oxigraph/src/sparopt/README.md @@ -0,0 +1,33 @@ +sparopt +======= + +[](https://crates.io/crates/sparopt) +[](https://docs.rs/sparopt) +[](https://crates.io/crates/sparopt) +[](https://github.com/oxigraph/oxigraph/actions) +[](https://gitter.im/oxigraph/community) + +sparopt is a work in progress [SPARQL Query](https://www.w3.org/TR/sparql11-query/) optimizer. + +It relies on the output of [spargebra](https://crates.io/crates/spargebra). + +Support for [SPARQL-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#sparql-star) is also available behind the `rdf-star` feature. + +This crate is intended to be a building piece for SPARQL implementations in Rust like [Oxigraph](https://oxigraph.org). + + +## License + +This project is licensed under either of + +* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or + `<http://www.apache.org/licenses/LICENSE-2.0>`) +* MIT license ([LICENSE-MIT](../LICENSE-MIT) or + `<http://opensource.org/licenses/MIT>`) + +at your option. + + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. diff --git a/ng-oxigraph/src/sparopt/algebra.rs b/ng-oxigraph/src/sparopt/algebra.rs new file mode 100644 index 0000000..e35b892 --- /dev/null +++ b/ng-oxigraph/src/sparopt/algebra.rs @@ -0,0 +1,1662 @@ +//! [SPARQL 1.1 Query Algebra](https://www.w3.org/TR/sparql11-query/#sparqlQuery) representation. + +use crate::oxrdf::vocab::xsd; +use crate::spargebra::algebra::{ + AggregateExpression as AlAggregateExpression, AggregateFunction, Expression as AlExpression, + GraphPattern as AlGraphPattern, OrderExpression as AlOrderExpression, +}; +pub use crate::spargebra::algebra::{Function, PropertyPathExpression}; +use crate::spargebra::term::{BlankNode, GroundSubject, TermPattern, TriplePattern}; +pub use crate::spargebra::term::{ + GroundTerm, GroundTermPattern, Literal, NamedNode, NamedNodePattern, Variable, +}; +#[cfg(feature = "rdf-star")] +use crate::spargebra::term::{GroundTriple, GroundTriplePattern}; +use rand::random; +use std::collections::hash_map::DefaultHasher; +use std::collections::{HashMap, HashSet}; +use std::hash::{Hash, Hasher}; +use std::ops::{Add, BitAnd, BitOr, Div, Mul, Neg, Not, Sub}; + +/// An [expression](https://www.w3.org/TR/sparql11-query/#expressions). +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum Expression { + NamedNode(NamedNode), + Literal(Literal), + Variable(Variable), + /// [Logical-or](https://www.w3.org/TR/sparql11-query/#func-logical-or). + Or(Vec<Self>), + /// [Logical-and](https://www.w3.org/TR/sparql11-query/#func-logical-and). + And(Vec<Self>), + /// [RDFterm-equal](https://www.w3.org/TR/sparql11-query/#func-RDFterm-equal) and all the XSD equalities. + Equal(Box<Self>, Box<Self>), + /// [sameTerm](https://www.w3.org/TR/sparql11-query/#func-sameTerm). + SameTerm(Box<Self>, Box<Self>), + /// [op:numeric-greater-than](https://www.w3.org/TR/xpath-functions-31/#func-numeric-greater-than) and other XSD greater than operators. + Greater(Box<Self>, Box<Self>), + GreaterOrEqual(Box<Self>, Box<Self>), + /// [op:numeric-less-than](https://www.w3.org/TR/xpath-functions-31/#func-numeric-less-than) and other XSD greater than operators. + Less(Box<Self>, Box<Self>), + LessOrEqual(Box<Self>, Box<Self>), + /// [op:numeric-add](https://www.w3.org/TR/xpath-functions-31/#func-numeric-add) and other XSD additions. + Add(Box<Self>, Box<Self>), + /// [op:numeric-subtract](https://www.w3.org/TR/xpath-functions-31/#func-numeric-subtract) and other XSD subtractions. + Subtract(Box<Self>, Box<Self>), + /// [op:numeric-multiply](https://www.w3.org/TR/xpath-functions-31/#func-numeric-multiply) and other XSD multiplications. + Multiply(Box<Self>, Box<Self>), + /// [op:numeric-divide](https://www.w3.org/TR/xpath-functions-31/#func-numeric-divide) and other XSD divides. + Divide(Box<Self>, Box<Self>), + /// [op:numeric-unary-plus](https://www.w3.org/TR/xpath-functions-31/#func-numeric-unary-plus) and other XSD unary plus. + UnaryPlus(Box<Self>), + /// [op:numeric-unary-minus](https://www.w3.org/TR/xpath-functions-31/#func-numeric-unary-minus) and other XSD unary minus. + UnaryMinus(Box<Self>), + /// [fn:not](https://www.w3.org/TR/xpath-functions-31/#func-not). + Not(Box<Self>), + /// [EXISTS](https://www.w3.org/TR/sparql11-query/#func-filter-exists). + Exists(Box<GraphPattern>), + /// [BOUND](https://www.w3.org/TR/sparql11-query/#func-bound). + Bound(Variable), + /// [IF](https://www.w3.org/TR/sparql11-query/#func-if). + If(Box<Self>, Box<Self>, Box<Self>), + /// [COALESCE](https://www.w3.org/TR/sparql11-query/#func-coalesce). + Coalesce(Vec<Self>), + /// A regular function call. + FunctionCall(Function, Vec<Self>), +} + +impl Expression { + pub fn or_all(args: impl IntoIterator<Item = Self>) -> Self { + let args = args.into_iter(); + let mut all = Vec::with_capacity(args.size_hint().0); + for arg in args { + if let Some(ebv) = arg.effective_boolean_value() { + if ebv { + return true.into(); + } + // We ignore false values + } else if let Self::Or(args) = arg { + all.extend(args); + } else { + all.push(arg); + } + } + match all.len() { + 0 => false.into(), + 1 => { + let result = all.pop().unwrap(); + if result.returns_boolean() { + result // It's already casted to boolean + } else { + Self::And(vec![result]) + } + } + _ => Self::Or(order_vec(all)), + } + } + + pub fn and_all(args: impl IntoIterator<Item = Self>) -> Self { + let args = args.into_iter(); + let mut all = Vec::with_capacity(args.size_hint().0); + for arg in args { + if let Some(ebv) = arg.effective_boolean_value() { + if !ebv { + return false.into(); + } + // We ignore true values + } else if let Self::And(args) = arg { + all.extend(args); + } else { + all.push(arg); + } + } + match all.len() { + 0 => true.into(), + 1 => { + let result = all.pop().unwrap(); + if result.returns_boolean() { + result + } else { + Self::And(vec![result]) + } + } + _ => Self::And(order_vec(all)), + } + } + + pub fn equal(left: Self, right: Self) -> Self { + match (left, right) { + (Self::NamedNode(left), Self::NamedNode(right)) => (left == right).into(), + (Self::Literal(left), Self::Literal(right)) if left == right => true.into(), + (left, right) => { + let (left, right) = order_pair(left, right); + Self::Equal(Box::new(left), Box::new(right)) + } + } + } + + pub fn same_term(left: Self, right: Self) -> Self { + match (left, right) { + (Self::NamedNode(left), Self::NamedNode(right)) => (left == right).into(), + (Self::Literal(left), Self::Literal(right)) if left == right => true.into(), + (left, right) => { + let (left, right) = order_pair(left, right); + Self::SameTerm(Box::new(left), Box::new(right)) + } + } + } + + pub fn greater(left: Self, right: Self) -> Self { + Self::Greater(Box::new(left), Box::new(right)) + } + + pub fn greater_or_equal(left: Self, right: Self) -> Self { + Self::GreaterOrEqual(Box::new(left), Box::new(right)) + } + + pub fn less(left: Self, right: Self) -> Self { + Self::Less(Box::new(left), Box::new(right)) + } + + pub fn less_or_equal(left: Self, right: Self) -> Self { + Self::LessOrEqual(Box::new(left), Box::new(right)) + } + + pub fn unary_plus(inner: Self) -> Self { + Self::UnaryPlus(Box::new(inner)) + } + + pub fn exists(inner: GraphPattern) -> Self { + if inner.is_empty() { + return false.into(); + } + if inner.is_empty_singleton() { + return true.into(); + } + Self::Exists(Box::new(inner)) + } + + pub fn if_cond(cond: Self, then: Self, els: Self) -> Self { + match cond.effective_boolean_value() { + Some(true) => then, + Some(false) => els, + None => Self::If(Box::new(cond), Box::new(then), Box::new(els)), + } + } + + pub fn coalesce(args: Vec<Self>) -> Self { + Self::Coalesce(args) + } + + pub fn call(name: Function, args: Vec<Self>) -> Self { + Self::FunctionCall(name, args) + } + + pub fn effective_boolean_value(&self) -> Option<bool> { + if let Self::Literal(literal) = self { + match literal.datatype() { + xsd::BOOLEAN => match literal.value() { + "true" | "1" => Some(true), + "false" | "0" => Some(false), + _ => None, // TODO + }, + xsd::STRING => Some(!literal.value().is_empty()), + _ => None, // TODO + } + } else { + None + } + } + + pub fn used_variables(&self) -> HashSet<&Variable> { + let mut variables = HashSet::new(); + self.lookup_used_variables(&mut |v| { + variables.insert(v); + }); + variables + } + + pub fn lookup_used_variables<'a>(&'a self, callback: &mut impl FnMut(&'a Variable)) { + match self { + Self::NamedNode(_) | Self::Literal(_) => {} + Self::Variable(v) | Self::Bound(v) => callback(v), + Self::Or(inner) + | Self::And(inner) + | Self::Coalesce(inner) + | Self::FunctionCall(_, inner) => { + for i in inner { + i.lookup_used_variables(callback); + } + } + Self::Equal(a, b) + | Self::SameTerm(a, b) + | Self::Greater(a, b) + | Self::GreaterOrEqual(a, b) + | Self::Less(a, b) + | Self::LessOrEqual(a, b) + | Self::Add(a, b) + | Self::Subtract(a, b) + | Self::Multiply(a, b) + | Self::Divide(a, b) => { + a.lookup_used_variables(callback); + b.lookup_used_variables(callback); + } + Self::UnaryPlus(i) | Self::UnaryMinus(i) | Self::Not(i) => { + i.lookup_used_variables(callback) + } + Self::Exists(e) => e.lookup_used_variables(callback), + Self::If(a, b, c) => { + a.lookup_used_variables(callback); + b.lookup_used_variables(callback); + c.lookup_used_variables(callback); + } + } + } + + fn from_sparql_algebra( + expression: &AlExpression, + graph_name: Option<&NamedNodePattern>, + ) -> Self { + match expression { + AlExpression::NamedNode(node) => Self::NamedNode(node.clone()), + AlExpression::Literal(literal) => Self::Literal(literal.clone()), + AlExpression::Variable(variable) => Self::Variable(variable.clone()), + AlExpression::Or(left, right) => Self::Or(vec![ + Self::from_sparql_algebra(left, graph_name), + Self::from_sparql_algebra(right, graph_name), + ]), + AlExpression::And(left, right) => Self::And(vec![ + Self::from_sparql_algebra(left, graph_name), + Self::from_sparql_algebra(right, graph_name), + ]), + AlExpression::Equal(left, right) => Self::Equal( + Box::new(Self::from_sparql_algebra(left, graph_name)), + Box::new(Self::from_sparql_algebra(right, graph_name)), + ), + AlExpression::SameTerm(left, right) => Self::SameTerm( + Box::new(Self::from_sparql_algebra(left, graph_name)), + Box::new(Self::from_sparql_algebra(right, graph_name)), + ), + AlExpression::Greater(left, right) => Self::Greater( + Box::new(Self::from_sparql_algebra(left, graph_name)), + Box::new(Self::from_sparql_algebra(right, graph_name)), + ), + AlExpression::GreaterOrEqual(left, right) => Self::GreaterOrEqual( + Box::new(Self::from_sparql_algebra(left, graph_name)), + Box::new(Self::from_sparql_algebra(right, graph_name)), + ), + AlExpression::Less(left, right) => Self::Less( + Box::new(Self::from_sparql_algebra(left, graph_name)), + Box::new(Self::from_sparql_algebra(right, graph_name)), + ), + AlExpression::LessOrEqual(left, right) => Self::LessOrEqual( + Box::new(Self::from_sparql_algebra(left, graph_name)), + Box::new(Self::from_sparql_algebra(right, graph_name)), + ), + AlExpression::In(left, right) => { + let left = Self::from_sparql_algebra(left, graph_name); + match right.len() { + 0 => Self::if_cond(left, false.into(), false.into()), + 1 => Self::Equal( + Box::new(left), + Box::new(Self::from_sparql_algebra(&right[0], graph_name)), + ), + _ => Self::Or( + right + .iter() + .map(|e| { + Self::Equal( + Box::new(left.clone()), + Box::new(Self::from_sparql_algebra(e, graph_name)), + ) + }) + .collect(), + ), + } + } + AlExpression::Add(left, right) => Self::Add( + Box::new(Self::from_sparql_algebra(left, graph_name)), + Box::new(Self::from_sparql_algebra(right, graph_name)), + ), + AlExpression::Subtract(left, right) => Self::Subtract( + Box::new(Self::from_sparql_algebra(left, graph_name)), + Box::new(Self::from_sparql_algebra(right, graph_name)), + ), + AlExpression::Multiply(left, right) => Self::Multiply( + Box::new(Self::from_sparql_algebra(left, graph_name)), + Box::new(Self::from_sparql_algebra(right, graph_name)), + ), + AlExpression::Divide(left, right) => Self::Divide( + Box::new(Self::from_sparql_algebra(left, graph_name)), + Box::new(Self::from_sparql_algebra(right, graph_name)), + ), + AlExpression::UnaryPlus(inner) => { + Self::UnaryPlus(Box::new(Self::from_sparql_algebra(inner, graph_name))) + } + AlExpression::UnaryMinus(inner) => { + Self::UnaryMinus(Box::new(Self::from_sparql_algebra(inner, graph_name))) + } + AlExpression::Not(inner) => { + Self::Not(Box::new(Self::from_sparql_algebra(inner, graph_name))) + } + AlExpression::Exists(inner) => Self::Exists(Box::new( + GraphPattern::from_sparql_algebra(inner, graph_name, &mut HashMap::new()), + )), + AlExpression::Bound(variable) => Self::Bound(variable.clone()), + AlExpression::If(cond, yes, no) => Self::If( + Box::new(Self::from_sparql_algebra(cond, graph_name)), + Box::new(Self::from_sparql_algebra(yes, graph_name)), + Box::new(Self::from_sparql_algebra(no, graph_name)), + ), + AlExpression::Coalesce(inner) => Self::Coalesce( + inner + .iter() + .map(|e| Self::from_sparql_algebra(e, graph_name)) + .collect(), + ), + AlExpression::FunctionCall(name, args) => Self::FunctionCall( + name.clone(), + args.iter() + .map(|e| Self::from_sparql_algebra(e, graph_name)) + .collect(), + ), + } + } + + fn returns_boolean(&self) -> bool { + match self { + Self::Or(_) + | Self::And(_) + | Self::Equal(_, _) + | Self::SameTerm(_, _) + | Self::Greater(_, _) + | Self::GreaterOrEqual(_, _) + | Self::Less(_, _) + | Self::LessOrEqual(_, _) + | Self::Not(_) + | Self::Exists(_) + | Self::Bound(_) + | Self::FunctionCall( + Function::IsBlank | Function::IsIri | Function::IsLiteral | Function::IsNumeric, + _, + ) => true, + #[cfg(feature = "rdf-star")] + Self::FunctionCall(Function::IsTriple, _) => true, + Self::Literal(literal) => literal.datatype() == xsd::BOOLEAN, + Self::If(_, a, b) => a.returns_boolean() && b.returns_boolean(), + _ => false, + } + } +} + +impl From<NamedNode> for Expression { + fn from(value: NamedNode) -> Self { + Self::NamedNode(value) + } +} + +impl From<Literal> for Expression { + fn from(value: Literal) -> Self { + Self::Literal(value) + } +} + +impl From<GroundSubject> for Expression { + fn from(value: GroundSubject) -> Self { + match value { + GroundSubject::NamedNode(value) => value.into(), + #[cfg(feature = "rdf-star")] + GroundSubject::Triple(value) => (*value).into(), + } + } +} + +impl From<GroundTerm> for Expression { + fn from(value: GroundTerm) -> Self { + match value { + GroundTerm::NamedNode(value) => value.into(), + GroundTerm::Literal(value) => value.into(), + #[cfg(feature = "rdf-star")] + GroundTerm::Triple(value) => (*value).into(), + } + } +} + +impl From<NamedNodePattern> for Expression { + fn from(value: NamedNodePattern) -> Self { + match value { + NamedNodePattern::NamedNode(value) => value.into(), + NamedNodePattern::Variable(variable) => variable.into(), + } + } +} + +impl From<GroundTermPattern> for Expression { + fn from(value: GroundTermPattern) -> Self { + match value { + GroundTermPattern::NamedNode(value) => value.into(), + GroundTermPattern::Literal(value) => value.into(), + #[cfg(feature = "rdf-star")] + GroundTermPattern::Triple(value) => (*value).into(), + GroundTermPattern::Variable(variable) => variable.into(), + } + } +} + +#[cfg(feature = "rdf-star")] +impl From<GroundTriple> for Expression { + fn from(value: GroundTriple) -> Self { + Self::FunctionCall( + Function::Triple, + vec![ + value.subject.into(), + value.predicate.into(), + value.object.into(), + ], + ) + } +} + +#[cfg(feature = "rdf-star")] +impl From<GroundTriplePattern> for Expression { + fn from(value: GroundTriplePattern) -> Self { + Self::FunctionCall( + Function::Triple, + vec![ + value.subject.into(), + value.predicate.into(), + value.object.into(), + ], + ) + } +} + +impl From<Variable> for Expression { + fn from(value: Variable) -> Self { + Self::Variable(value) + } +} + +impl From<bool> for Expression { + fn from(value: bool) -> Self { + Literal::from(value).into() + } +} + +impl From<&Expression> for AlExpression { + fn from(expression: &Expression) -> Self { + match expression { + Expression::NamedNode(node) => Self::NamedNode(node.clone()), + Expression::Literal(literal) => Self::Literal(literal.clone()), + Expression::Variable(variable) => Self::Variable(variable.clone()), + Expression::Or(inner) => inner + .iter() + .map(Into::into) + .reduce(|a, b| Self::Or(Box::new(a), Box::new(b))) + .unwrap_or_else(|| Literal::from(false).into()), + Expression::And(inner) => inner + .iter() + .map(Into::into) + .reduce(|a, b| Self::And(Box::new(a), Box::new(b))) + .unwrap_or_else(|| Literal::from(true).into()), + Expression::Equal(left, right) => Self::Equal( + Box::new(left.as_ref().into()), + Box::new(right.as_ref().into()), + ), + Expression::SameTerm(left, right) => Self::SameTerm( + Box::new(left.as_ref().into()), + Box::new(right.as_ref().into()), + ), + Expression::Greater(left, right) => Self::Greater( + Box::new(left.as_ref().into()), + Box::new(right.as_ref().into()), + ), + Expression::GreaterOrEqual(left, right) => Self::GreaterOrEqual( + Box::new(left.as_ref().into()), + Box::new(right.as_ref().into()), + ), + Expression::Less(left, right) => Self::Less( + Box::new(left.as_ref().into()), + Box::new(right.as_ref().into()), + ), + Expression::LessOrEqual(left, right) => Self::LessOrEqual( + Box::new(left.as_ref().into()), + Box::new(right.as_ref().into()), + ), + Expression::Add(left, right) => Self::Add( + Box::new(left.as_ref().into()), + Box::new(right.as_ref().into()), + ), + Expression::Subtract(left, right) => Self::Subtract( + Box::new(left.as_ref().into()), + Box::new(right.as_ref().into()), + ), + Expression::Multiply(left, right) => Self::Multiply( + Box::new(left.as_ref().into()), + Box::new(right.as_ref().into()), + ), + Expression::Divide(left, right) => Self::Divide( + Box::new(left.as_ref().into()), + Box::new(right.as_ref().into()), + ), + Expression::UnaryPlus(inner) => Self::UnaryPlus(Box::new(inner.as_ref().into())), + Expression::UnaryMinus(inner) => Self::UnaryMinus(Box::new(inner.as_ref().into())), + Expression::Not(inner) => Self::Not(Box::new(inner.as_ref().into())), + Expression::Exists(inner) => Self::Exists(Box::new(inner.as_ref().into())), + Expression::Bound(variable) => Self::Bound(variable.clone()), + Expression::If(cond, yes, no) => Self::If( + Box::new(cond.as_ref().into()), + Box::new(yes.as_ref().into()), + Box::new(no.as_ref().into()), + ), + Expression::Coalesce(inner) => Self::Coalesce(inner.iter().map(Into::into).collect()), + Expression::FunctionCall(name, args) => { + Self::FunctionCall(name.clone(), args.iter().map(Into::into).collect()) + } + } + } +} + +impl BitAnd for Expression { + type Output = Self; + + fn bitand(self, rhs: Self) -> Self::Output { + Self::and_all([self, rhs]) + } +} + +impl BitOr for Expression { + type Output = Self; + + fn bitor(self, rhs: Self) -> Self { + Self::or_all([self, rhs]) + } +} + +impl Not for Expression { + type Output = Self; + + fn not(self) -> Self { + if let Some(v) = self.effective_boolean_value() { + (!v).into() + } else if let Self::Not(v) = self { + if v.returns_boolean() { + *v + } else { + Self::And(vec![*v]) + } + } else { + Self::Not(Box::new(self)) + } + } +} + +impl Add for Expression { + type Output = Self; + + fn add(self, rhs: Self) -> Self { + let (left, right) = order_pair(self, rhs); + Self::Add(Box::new(left), Box::new(right)) + } +} + +impl Sub for Expression { + type Output = Self; + + fn sub(self, rhs: Self) -> Self { + Self::Subtract(Box::new(self), Box::new(rhs)) + } +} + +impl Mul for Expression { + type Output = Self; + + fn mul(self, rhs: Self) -> Self { + let (left, right) = order_pair(self, rhs); + Self::Multiply(Box::new(left), Box::new(right)) + } +} + +impl Div for Expression { + type Output = Self; + + fn div(self, rhs: Self) -> Self { + Self::Divide(Box::new(self), Box::new(rhs)) + } +} + +impl Neg for Expression { + type Output = Self; + + fn neg(self) -> Self { + Self::UnaryMinus(Box::new(self)) + } +} + +/// A SPARQL query [graph pattern](https://www.w3.org/TR/sparql11-query/#sparqlQuery). +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum GraphPattern { + /// A [basic graph pattern](https://www.w3.org/TR/sparql11-query/#defn_BasicGraphPattern). + QuadPattern { + subject: GroundTermPattern, + predicate: NamedNodePattern, + object: GroundTermPattern, + graph_name: Option<NamedNodePattern>, + }, + /// A [property path pattern](https://www.w3.org/TR/sparql11-query/#defn_evalPP_predicate). + Path { + subject: GroundTermPattern, + path: PropertyPathExpression, + object: GroundTermPattern, + graph_name: Option<NamedNodePattern>, + }, + /// [Join](https://www.w3.org/TR/sparql11-query/#defn_algJoin). + Join { + left: Box<Self>, + right: Box<Self>, + algorithm: JoinAlgorithm, + }, + /// [LeftJoin](https://www.w3.org/TR/sparql11-query/#defn_algLeftJoin). + LeftJoin { + left: Box<Self>, + right: Box<Self>, + expression: Expression, + algorithm: LeftJoinAlgorithm, + }, + /// Lateral join i.e. evaluate right for all result row of left + #[cfg(feature = "sep-0006")] + Lateral { left: Box<Self>, right: Box<Self> }, + /// [Filter](https://www.w3.org/TR/sparql11-query/#defn_algFilter). + Filter { + expression: Expression, + inner: Box<Self>, + }, + /// [Union](https://www.w3.org/TR/sparql11-query/#defn_algUnion). + Union { inner: Vec<Self> }, + /// [Extend](https://www.w3.org/TR/sparql11-query/#defn_extend). + Extend { + inner: Box<Self>, + variable: Variable, + expression: Expression, + }, + /// [Minus](https://www.w3.org/TR/sparql11-query/#defn_algMinus). + Minus { + left: Box<Self>, + right: Box<Self>, + algorithm: MinusAlgorithm, + }, + /// A table used to provide inline values + Values { + variables: Vec<Variable>, + bindings: Vec<Vec<Option<GroundTerm>>>, + }, + /// [OrderBy](https://www.w3.org/TR/sparql11-query/#defn_algOrdered). + OrderBy { + inner: Box<Self>, + expression: Vec<OrderExpression>, + }, + /// [Project](https://www.w3.org/TR/sparql11-query/#defn_algProjection). + Project { + inner: Box<Self>, + variables: Vec<Variable>, + }, + /// [Distinct](https://www.w3.org/TR/sparql11-query/#defn_algDistinct). + Distinct { inner: Box<Self> }, + /// [Reduced](https://www.w3.org/TR/sparql11-query/#defn_algReduced). + Reduced { inner: Box<Self> }, + /// [Slice](https://www.w3.org/TR/sparql11-query/#defn_algSlice). + Slice { + inner: Box<Self>, + start: usize, + length: Option<usize>, + }, + /// [Group](https://www.w3.org/TR/sparql11-query/#aggregateAlgebra). + Group { + inner: Box<Self>, + variables: Vec<Variable>, + aggregates: Vec<(Variable, AggregateExpression)>, + }, + /// [Service](https://www.w3.org/TR/sparql11-federated-query/#defn_evalService). + Service { + name: NamedNodePattern, + inner: Box<Self>, + silent: bool, + }, +} + +impl GraphPattern { + pub fn empty() -> Self { + Self::Values { + variables: Vec::new(), + bindings: Vec::new(), + } + } + + /// Check if the pattern is the empty table + fn is_empty(&self) -> bool { + if let Self::Values { bindings, .. } = self { + bindings.is_empty() + } else { + false + } + } + + pub fn empty_singleton() -> Self { + Self::Values { + variables: Vec::new(), + bindings: vec![Vec::new()], + } + } + + pub fn is_empty_singleton(&self) -> bool { + if let Self::Values { bindings, .. } = self { + bindings.len() == 1 && bindings.iter().all(|b| b.iter().all(Option::is_none)) + } else { + false + } + } + + pub fn join(left: Self, right: Self, algorithm: JoinAlgorithm) -> Self { + if left.is_empty() || right.is_empty() { + return Self::empty(); + } + if left.is_empty_singleton() { + return right; + } + if right.is_empty_singleton() { + return left; + } + Self::Join { + left: Box::new(left), + right: Box::new(right), + algorithm, + } + } + + #[cfg(feature = "sep-0006")] + pub fn lateral(left: Self, right: Self) -> Self { + if left.is_empty() || right.is_empty() { + return Self::empty(); + } + if left.is_empty_singleton() { + return right; + } + if right.is_empty_singleton() { + return left; + } + Self::Lateral { + left: Box::new(left), + right: Box::new(right), + } + } + + pub fn left_join( + left: Self, + right: Self, + expression: Expression, + algorithm: LeftJoinAlgorithm, + ) -> Self { + let expression_ebv = expression.effective_boolean_value(); + if left.is_empty() + || right.is_empty() + || right.is_empty_singleton() + || expression_ebv == Some(false) + { + return left; + } + Self::LeftJoin { + left: Box::new(left), + right: Box::new(right), + expression: if expression_ebv == Some(true) { + true.into() + } else { + expression + }, + algorithm, + } + } + + pub fn minus(left: Self, right: Self, algorithm: MinusAlgorithm) -> Self { + if left.is_empty() { + return Self::empty(); + } + if right.is_empty() { + return left; + } + Self::Minus { + left: Box::new(left), + right: Box::new(right), + algorithm, + } + } + + pub fn union(left: Self, right: Self) -> Self { + Self::union_all([left, right]) + } + + pub fn union_all(args: impl IntoIterator<Item = Self>) -> Self { + let args = args.into_iter(); + let mut all = Vec::with_capacity(args.size_hint().0); + for arg in args { + if arg.is_empty() { + continue; + } + if let Self::Union { inner } = arg { + all.extend(inner); + } else { + all.push(arg); + } + } + if all.is_empty() { + Self::empty() + } else { + Self::Union { + inner: order_vec(all), + } + } + } + + pub fn filter(inner: Self, expression: Expression) -> Self { + if inner.is_empty() { + return Self::empty(); + } + // We unwrap singleton And + let expression = match expression { + Expression::And(mut l) if l.len() == 1 => l.pop().unwrap(), + e => e, + }; + match expression.effective_boolean_value() { + Some(true) => inner, + Some(false) => Self::empty(), + None => match inner { + Self::Filter { + inner, + expression: e2, + } => Self::Filter { + inner, + expression: expression & e2, + }, + _ => Self::Filter { + inner: Box::new(inner), + expression, + }, + }, + } + } + + pub fn extend(inner: Self, variable: Variable, expression: Expression) -> Self { + if inner.is_empty() { + return Self::empty(); + } + Self::Extend { + inner: Box::new(inner), + variable, + expression, + } + } + + pub fn values( + mut variables: Vec<Variable>, + mut bindings: Vec<Vec<Option<GroundTerm>>>, + ) -> Self { + let empty_rows = (0..variables.len()) + .filter(|row| !bindings.iter().any(|binding| binding.get(*row).is_some())) + .collect::<Vec<_>>(); + if !empty_rows.is_empty() { + // We remove empty rows + variables = variables + .into_iter() + .enumerate() + .filter_map(|(i, v)| { + if empty_rows.contains(&i) { + None + } else { + Some(v) + } + }) + .collect(); + bindings = bindings + .into_iter() + .map(|binding| { + binding + .into_iter() + .enumerate() + .filter_map(|(i, v)| { + if empty_rows.contains(&i) { + None + } else { + Some(v) + } + }) + .collect() + }) + .collect(); + } + Self::Values { + variables, + bindings, + } + } + + pub fn order_by(inner: Self, expression: Vec<OrderExpression>) -> Self { + if inner.is_empty() { + return Self::empty(); + } + if expression.is_empty() { + return inner; + } + Self::OrderBy { + inner: Box::new(inner), + expression, + } + } + + pub fn project(inner: Self, variables: Vec<Variable>) -> Self { + Self::Project { + inner: Box::new(inner), + variables, + } + } + + pub fn distinct(inner: Self) -> Self { + if inner.is_empty() { + return Self::empty(); + } + Self::Distinct { + inner: Box::new(inner), + } + } + + pub fn reduced(inner: Self) -> Self { + if inner.is_empty() { + return Self::empty(); + } + Self::Reduced { + inner: Box::new(inner), + } + } + + pub fn slice(inner: Self, start: usize, length: Option<usize>) -> Self { + if inner.is_empty() { + return Self::empty(); + } + if start == 0 && length.is_none() { + return inner; + } + Self::Slice { + inner: Box::new(inner), + start, + length, + } + } + + pub fn group( + inner: Self, + variables: Vec<Variable>, + aggregates: Vec<(Variable, AggregateExpression)>, + ) -> Self { + if inner.is_empty() { + return Self::empty(); + } + Self::Group { + inner: Box::new(inner), + variables, + aggregates, + } + } + + pub fn service(inner: Self, name: NamedNodePattern, silent: bool) -> Self { + if inner.is_empty() { + return Self::empty(); + } + Self::Service { + inner: Box::new(inner), + name, + silent, + } + } + + pub fn lookup_used_variables<'a>(&'a self, callback: &mut impl FnMut(&'a Variable)) { + match self { + Self::Values { variables, .. } | Self::Project { variables, .. } => { + for v in variables { + callback(v); + } + } + Self::QuadPattern { + subject, + predicate, + object, + graph_name, + } => { + lookup_term_pattern_variables(subject, callback); + if let NamedNodePattern::Variable(v) = predicate { + callback(v); + } + lookup_term_pattern_variables(object, callback); + if let Some(NamedNodePattern::Variable(v)) = graph_name { + callback(v); + } + } + Self::Path { + subject, + object, + graph_name, + .. + } => { + lookup_term_pattern_variables(subject, callback); + lookup_term_pattern_variables(object, callback); + if let Some(NamedNodePattern::Variable(v)) = graph_name { + callback(v); + } + } + Self::Filter { inner, expression } => { + expression.lookup_used_variables(callback); + inner.lookup_used_variables(callback); + } + Self::Union { inner } => { + for child in inner { + child.lookup_used_variables(callback); + } + } + Self::Join { left, right, .. } | Self::Minus { left, right, .. } => { + left.lookup_used_variables(callback); + right.lookup_used_variables(callback); + } + #[cfg(feature = "sep-0006")] + Self::Lateral { left, right } => { + left.lookup_used_variables(callback); + right.lookup_used_variables(callback); + } + Self::LeftJoin { + left, + right, + expression, + .. + } => { + expression.lookup_used_variables(callback); + left.lookup_used_variables(callback); + right.lookup_used_variables(callback); + } + Self::Extend { + inner, + variable, + expression, + } => { + callback(variable); + expression.lookup_used_variables(callback); + inner.lookup_used_variables(callback); + } + Self::OrderBy { inner, .. } + | Self::Distinct { inner } + | Self::Reduced { inner } + | Self::Slice { inner, .. } => inner.lookup_used_variables(callback), + Self::Service { inner, name, .. } => { + if let NamedNodePattern::Variable(v) = name { + callback(v); + } + inner.lookup_used_variables(callback); + } + Self::Group { + variables, + aggregates, + .. + } => { + for v in variables { + callback(v); + } + for (v, _) in aggregates { + callback(v); + } + } + } + } + + fn from_sparql_algebra( + pattern: &AlGraphPattern, + graph_name: Option<&NamedNodePattern>, + blank_nodes: &mut HashMap<BlankNode, Variable>, + ) -> Self { + match pattern { + AlGraphPattern::Bgp { patterns } => patterns + .iter() + .map(|p| { + let (subject, predicate, object) = + Self::triple_pattern_from_algebra(p, blank_nodes); + Self::QuadPattern { + subject, + predicate, + object, + graph_name: graph_name.cloned(), + } + }) + .reduce(|a, b| Self::Join { + left: Box::new(a), + right: Box::new(b), + algorithm: JoinAlgorithm::default(), + }) + .unwrap_or_else(Self::empty_singleton), + AlGraphPattern::Path { + subject, + path, + object, + } => Self::Path { + subject: Self::term_pattern_from_algebra(subject, blank_nodes), + path: path.clone(), + object: Self::term_pattern_from_algebra(object, blank_nodes), + graph_name: graph_name.cloned(), + }, + AlGraphPattern::Join { left, right } => Self::Join { + left: Box::new(Self::from_sparql_algebra(left, graph_name, blank_nodes)), + right: Box::new(Self::from_sparql_algebra(right, graph_name, blank_nodes)), + algorithm: JoinAlgorithm::default(), + }, + AlGraphPattern::LeftJoin { + left, + right, + expression, + } => Self::LeftJoin { + left: Box::new(Self::from_sparql_algebra(left, graph_name, blank_nodes)), + right: Box::new(Self::from_sparql_algebra(right, graph_name, blank_nodes)), + expression: expression.as_ref().map_or_else( + || true.into(), + |e| Expression::from_sparql_algebra(e, graph_name), + ), + algorithm: LeftJoinAlgorithm::default(), + }, + #[cfg(feature = "sep-0006")] + AlGraphPattern::Lateral { left, right } => Self::Lateral { + left: Box::new(Self::from_sparql_algebra(left, graph_name, blank_nodes)), + right: Box::new(Self::from_sparql_algebra(right, graph_name, blank_nodes)), + }, + AlGraphPattern::Filter { inner, expr } => Self::Filter { + inner: Box::new(Self::from_sparql_algebra(inner, graph_name, blank_nodes)), + expression: Expression::from_sparql_algebra(expr, graph_name), + }, + AlGraphPattern::Union { left, right } => Self::Union { + inner: vec![ + Self::from_sparql_algebra(left, graph_name, blank_nodes), + Self::from_sparql_algebra(right, graph_name, blank_nodes), + ], + }, + AlGraphPattern::Graph { inner, name } => { + Self::from_sparql_algebra(inner, Some(name), blank_nodes) + } + AlGraphPattern::Extend { + inner, + expression, + variable, + } => Self::Extend { + inner: Box::new(Self::from_sparql_algebra(inner, graph_name, blank_nodes)), + expression: Expression::from_sparql_algebra(expression, graph_name), + variable: variable.clone(), + }, + AlGraphPattern::Minus { left, right } => Self::Minus { + left: Box::new(Self::from_sparql_algebra(left, graph_name, blank_nodes)), + right: Box::new(Self::from_sparql_algebra(right, graph_name, blank_nodes)), + algorithm: MinusAlgorithm::default(), + }, + AlGraphPattern::Values { + variables, + bindings, + } => Self::Values { + variables: variables.clone(), + bindings: bindings.clone(), + }, + AlGraphPattern::OrderBy { inner, expression } => Self::OrderBy { + inner: Box::new(Self::from_sparql_algebra(inner, graph_name, blank_nodes)), + expression: expression + .iter() + .map(|e| OrderExpression::from_sparql_algebra(e, graph_name)) + .collect(), + }, + AlGraphPattern::Project { inner, variables } => { + let graph_name = if let Some(NamedNodePattern::Variable(graph_name)) = graph_name { + Some(NamedNodePattern::Variable( + if variables.contains(graph_name) { + graph_name.clone() + } else { + new_var() + }, + )) + } else { + graph_name.cloned() + }; + Self::Project { + inner: Box::new(Self::from_sparql_algebra( + inner, + graph_name.as_ref(), + &mut HashMap::new(), + )), + variables: variables.clone(), + } + } + AlGraphPattern::Distinct { inner } => Self::Distinct { + inner: Box::new(Self::from_sparql_algebra(inner, graph_name, blank_nodes)), + }, + AlGraphPattern::Reduced { inner } => Self::Distinct { + inner: Box::new(Self::from_sparql_algebra(inner, graph_name, blank_nodes)), + }, + AlGraphPattern::Slice { + inner, + start, + length, + } => Self::Slice { + inner: Box::new(Self::from_sparql_algebra(inner, graph_name, blank_nodes)), + start: *start, + length: *length, + }, + AlGraphPattern::Group { + inner, + variables, + aggregates, + } => Self::Group { + inner: Box::new(Self::from_sparql_algebra(inner, graph_name, blank_nodes)), + variables: variables.clone(), + aggregates: aggregates + .iter() + .map(|(var, expr)| { + ( + var.clone(), + AggregateExpression::from_sparql_algebra(expr, graph_name), + ) + }) + .collect(), + }, + AlGraphPattern::Service { + inner, + name, + silent, + } => Self::Service { + inner: Box::new(Self::from_sparql_algebra(inner, graph_name, blank_nodes)), + name: name.clone(), + silent: *silent, + }, + } + } + + fn triple_pattern_from_algebra( + pattern: &TriplePattern, + blank_nodes: &mut HashMap<BlankNode, Variable>, + ) -> (GroundTermPattern, NamedNodePattern, GroundTermPattern) { + ( + Self::term_pattern_from_algebra(&pattern.subject, blank_nodes), + pattern.predicate.clone(), + Self::term_pattern_from_algebra(&pattern.object, blank_nodes), + ) + } + + fn term_pattern_from_algebra( + pattern: &TermPattern, + blank_nodes: &mut HashMap<BlankNode, Variable>, + ) -> GroundTermPattern { + match pattern { + TermPattern::NamedNode(node) => node.clone().into(), + TermPattern::BlankNode(node) => blank_nodes + .entry(node.clone()) + .or_insert_with(new_var) + .clone() + .into(), + TermPattern::Literal(literal) => literal.clone().into(), + #[cfg(feature = "rdf-star")] + TermPattern::Triple(pattern) => { + let (subject, predicate, object) = + Self::triple_pattern_from_algebra(pattern, blank_nodes); + GroundTriplePattern { + subject, + predicate, + object, + } + .into() + } + TermPattern::Variable(variable) => variable.clone().into(), + } + } +} + +impl From<&AlGraphPattern> for GraphPattern { + fn from(pattern: &AlGraphPattern) -> Self { + Self::from_sparql_algebra(pattern, None, &mut HashMap::new()) + } +} + +impl From<&GraphPattern> for AlGraphPattern { + fn from(pattern: &GraphPattern) -> Self { + match pattern { + GraphPattern::QuadPattern { + subject, + predicate, + object, + graph_name, + } => { + let pattern = Self::Bgp { + patterns: vec![TriplePattern { + subject: subject.clone().into(), + predicate: predicate.clone(), + object: object.clone().into(), + }], + }; + if let Some(graph_name) = graph_name { + Self::Graph { + inner: Box::new(pattern), + name: graph_name.clone(), + } + } else { + pattern + } + } + GraphPattern::Path { + subject, + path, + object, + graph_name, + } => { + let pattern = Self::Path { + subject: subject.clone().into(), + path: path.clone(), + object: object.clone().into(), + }; + if let Some(graph_name) = graph_name { + Self::Graph { + inner: Box::new(pattern), + name: graph_name.clone(), + } + } else { + pattern + } + } + GraphPattern::Join { left, right, .. } => { + match (left.as_ref().into(), right.as_ref().into()) { + (Self::Bgp { patterns: mut left }, Self::Bgp { patterns: right }) => { + left.extend(right); + Self::Bgp { patterns: left } + } + (left, right) => Self::Join { + left: Box::new(left), + right: Box::new(right), + }, + } + } + GraphPattern::LeftJoin { + left, + right, + expression, + .. + } => { + let empty_expr = if let Expression::Literal(l) = expression { + l.datatype() == xsd::BOOLEAN && l.value() == "true" + } else { + false + }; + Self::LeftJoin { + left: Box::new(left.as_ref().into()), + right: Box::new(right.as_ref().into()), + expression: if empty_expr { + None + } else { + Some(expression.into()) + }, + } + } + #[cfg(feature = "sep-0006")] + GraphPattern::Lateral { left, right } => { + match (left.as_ref().into(), right.as_ref().into()) { + (Self::Bgp { patterns: mut left }, Self::Bgp { patterns: right }) => { + left.extend(right); + Self::Bgp { patterns: left } + } + (left, right) => Self::Lateral { + left: Box::new(left), + right: Box::new(right), + }, + } + } + GraphPattern::Filter { inner, expression } => Self::Filter { + inner: Box::new(inner.as_ref().into()), + expr: expression.into(), + }, + GraphPattern::Union { inner } => inner + .iter() + .map(Into::into) + .reduce(|a, b| Self::Union { + left: Box::new(a), + right: Box::new(b), + }) + .unwrap_or_else(|| Self::Values { + variables: Vec::new(), + bindings: Vec::new(), + }), + GraphPattern::Extend { + inner, + expression, + variable, + } => Self::Extend { + inner: Box::new(inner.as_ref().into()), + expression: expression.into(), + variable: variable.clone(), + }, + GraphPattern::Minus { left, right, .. } => Self::Minus { + left: Box::new(left.as_ref().into()), + right: Box::new(right.as_ref().into()), + }, + GraphPattern::Values { + variables, + bindings, + } => Self::Values { + variables: variables.clone(), + bindings: bindings.clone(), + }, + GraphPattern::OrderBy { inner, expression } => Self::OrderBy { + inner: Box::new(inner.as_ref().into()), + expression: expression.iter().map(Into::into).collect(), + }, + GraphPattern::Project { inner, variables } => Self::Project { + inner: Box::new(inner.as_ref().into()), + variables: variables.clone(), + }, + GraphPattern::Distinct { inner } => Self::Distinct { + inner: Box::new(inner.as_ref().into()), + }, + GraphPattern::Reduced { inner } => Self::Distinct { + inner: Box::new(inner.as_ref().into()), + }, + GraphPattern::Slice { + inner, + start, + length, + } => Self::Slice { + inner: Box::new(inner.as_ref().into()), + start: *start, + length: *length, + }, + GraphPattern::Group { + inner, + variables, + aggregates, + } => Self::Group { + inner: Box::new(inner.as_ref().into()), + variables: variables.clone(), + aggregates: aggregates + .iter() + .map(|(var, expr)| (var.clone(), expr.into())) + .collect(), + }, + GraphPattern::Service { + inner, + name, + silent, + } => Self::Service { + inner: Box::new(inner.as_ref().into()), + name: name.clone(), + silent: *silent, + }, + } + } +} + +/// The join algorithm used (c.f. [`GraphPattern::Join`]). +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum JoinAlgorithm { + HashBuildLeftProbeRight { keys: Vec<Variable> }, +} + +impl Default for JoinAlgorithm { + fn default() -> Self { + Self::HashBuildLeftProbeRight { + keys: Vec::default(), + } + } +} + +/// The left join algorithm used (c.f. [`GraphPattern::LeftJoin`]). +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum LeftJoinAlgorithm { + HashBuildRightProbeLeft { keys: Vec<Variable> }, +} + +impl Default for LeftJoinAlgorithm { + fn default() -> Self { + Self::HashBuildRightProbeLeft { + keys: Vec::default(), + } + } +} + +/// The left join algorithm used (c.f. [`GraphPattern::Minus`]). +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum MinusAlgorithm { + HashBuildRightProbeLeft { keys: Vec<Variable> }, +} + +impl Default for MinusAlgorithm { + fn default() -> Self { + Self::HashBuildRightProbeLeft { + keys: Vec::default(), + } + } +} + +/// A set function used in aggregates (c.f. [`GraphPattern::Group`]). +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum AggregateExpression { + CountSolutions { + distinct: bool, + }, + FunctionCall { + name: AggregateFunction, + expr: Expression, + distinct: bool, + }, +} + +impl AggregateExpression { + fn from_sparql_algebra( + expression: &AlAggregateExpression, + graph_name: Option<&NamedNodePattern>, + ) -> Self { + match expression { + AlAggregateExpression::CountSolutions { distinct } => Self::CountSolutions { + distinct: *distinct, + }, + AlAggregateExpression::FunctionCall { + name, + expr, + distinct, + } => Self::FunctionCall { + name: name.clone(), + expr: Expression::from_sparql_algebra(expr, graph_name), + distinct: *distinct, + }, + } + } +} + +impl From<&AggregateExpression> for AlAggregateExpression { + fn from(expression: &AggregateExpression) -> Self { + match expression { + AggregateExpression::CountSolutions { distinct } => Self::CountSolutions { + distinct: *distinct, + }, + AggregateExpression::FunctionCall { + name, + expr, + distinct, + } => Self::FunctionCall { + name: name.clone(), + expr: expr.into(), + distinct: *distinct, + }, + } + } +} + +/// An ordering comparator used by [`GraphPattern::OrderBy`]. +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum OrderExpression { + /// Ascending order + Asc(Expression), + /// Descending order + Desc(Expression), +} + +impl OrderExpression { + fn from_sparql_algebra( + expression: &AlOrderExpression, + graph_name: Option<&NamedNodePattern>, + ) -> Self { + match expression { + AlOrderExpression::Asc(e) => Self::Asc(Expression::from_sparql_algebra(e, graph_name)), + AlOrderExpression::Desc(e) => { + Self::Desc(Expression::from_sparql_algebra(e, graph_name)) + } + } + } +} + +impl From<&OrderExpression> for AlOrderExpression { + fn from(expression: &OrderExpression) -> Self { + match expression { + OrderExpression::Asc(e) => Self::Asc(e.into()), + OrderExpression::Desc(e) => Self::Desc(e.into()), + } + } +} + +fn new_var() -> Variable { + Variable::new_unchecked(format!("{:x}", random::<u128>())) +} + +fn order_pair<T: Hash>(a: T, b: T) -> (T, T) { + if hash(&a) <= hash(&b) { + (a, b) + } else { + (b, a) + } +} + +fn order_vec<T: Hash>(mut vec: Vec<T>) -> Vec<T> { + vec.sort_unstable_by_key(|a| hash(a)); + vec +} + +fn hash(v: impl Hash) -> u64 { + let mut hasher = DefaultHasher::new(); + v.hash(&mut hasher); + hasher.finish() +} + +fn lookup_term_pattern_variables<'a>( + pattern: &'a GroundTermPattern, + callback: &mut impl FnMut(&'a Variable), +) { + if let GroundTermPattern::Variable(v) = pattern { + callback(v); + } + #[cfg(feature = "rdf-star")] + if let GroundTermPattern::Triple(t) = pattern { + lookup_term_pattern_variables(&t.subject, callback); + if let NamedNodePattern::Variable(v) = &t.predicate { + callback(v); + } + lookup_term_pattern_variables(&t.object, callback); + } +} diff --git a/ng-oxigraph/src/sparopt/mod.rs b/ng-oxigraph/src/sparopt/mod.rs new file mode 100644 index 0000000..3628eca --- /dev/null +++ b/ng-oxigraph/src/sparopt/mod.rs @@ -0,0 +1,5 @@ +pub use crate::sparopt::optimizer::Optimizer; + +pub mod algebra; +mod optimizer; +mod type_inference; diff --git a/ng-oxigraph/src/sparopt/optimizer.rs b/ng-oxigraph/src/sparopt/optimizer.rs new file mode 100644 index 0000000..7e1a254 --- /dev/null +++ b/ng-oxigraph/src/sparopt/optimizer.rs @@ -0,0 +1,1082 @@ +use crate::oxrdf::Variable; +use crate::spargebra::algebra::PropertyPathExpression; +use crate::spargebra::term::{GroundTermPattern, NamedNodePattern}; +use crate::sparopt::algebra::{ + Expression, GraphPattern, JoinAlgorithm, LeftJoinAlgorithm, MinusAlgorithm, OrderExpression, +}; +use crate::sparopt::type_inference::{ + infer_expression_type, infer_graph_pattern_types, VariableType, VariableTypes, +}; +use std::cmp::{max, min}; + +pub struct Optimizer; + +impl Optimizer { + pub fn optimize_graph_pattern(pattern: GraphPattern) -> GraphPattern { + let pattern = Self::normalize_pattern(pattern, &VariableTypes::default()); + let pattern = Self::reorder_joins(pattern, &VariableTypes::default()); + Self::push_filters(pattern, Vec::new(), &VariableTypes::default()) + } + + /// Normalize the pattern, discarding any join ordering information + fn normalize_pattern(pattern: GraphPattern, input_types: &VariableTypes) -> GraphPattern { + match pattern { + GraphPattern::QuadPattern { + subject, + predicate, + object, + graph_name, + } => GraphPattern::QuadPattern { + subject, + predicate, + object, + graph_name, + }, + GraphPattern::Path { + subject, + path, + object, + graph_name, + } => GraphPattern::Path { + subject, + path, + object, + graph_name, + }, + GraphPattern::Join { + left, + right, + algorithm, + } => GraphPattern::join( + Self::normalize_pattern(*left, input_types), + Self::normalize_pattern(*right, input_types), + algorithm, + ), + GraphPattern::LeftJoin { + left, + right, + expression, + algorithm, + } => { + let left = Self::normalize_pattern(*left, input_types); + let right = Self::normalize_pattern(*right, input_types); + let mut inner_types = infer_graph_pattern_types(&left, input_types.clone()); + inner_types.intersect_with(infer_graph_pattern_types(&right, input_types.clone())); + GraphPattern::left_join( + left, + right, + Self::normalize_expression(expression, &inner_types), + algorithm, + ) + } + #[cfg(feature = "sep-0006")] + GraphPattern::Lateral { left, right } => { + let left = Self::normalize_pattern(*left, input_types); + let left_types = infer_graph_pattern_types(&left, input_types.clone()); + let right = Self::normalize_pattern(*right, &left_types); + GraphPattern::lateral(left, right) + } + GraphPattern::Filter { inner, expression } => { + let inner = Self::normalize_pattern(*inner, input_types); + let inner_types = infer_graph_pattern_types(&inner, input_types.clone()); + let expression = Self::normalize_expression(expression, &inner_types); + let expression_type = infer_expression_type(&expression, &inner_types); + if expression_type == VariableType::UNDEF { + GraphPattern::empty() + } else { + GraphPattern::filter(inner, expression) + } + } + GraphPattern::Union { inner } => GraphPattern::union_all( + inner + .into_iter() + .map(|e| Self::normalize_pattern(e, input_types)), + ), + GraphPattern::Extend { + inner, + variable, + expression, + } => { + let inner = Self::normalize_pattern(*inner, input_types); + let inner_types = infer_graph_pattern_types(&inner, input_types.clone()); + let expression = Self::normalize_expression(expression, &inner_types); + let expression_type = infer_expression_type(&expression, &inner_types); + if expression_type == VariableType::UNDEF { + // TODO: valid? + inner + } else { + GraphPattern::extend(inner, variable, expression) + } + } + GraphPattern::Minus { + left, + right, + algorithm, + } => GraphPattern::minus( + Self::normalize_pattern(*left, input_types), + Self::normalize_pattern(*right, input_types), + algorithm, + ), + GraphPattern::Values { + variables, + bindings, + } => GraphPattern::values(variables, bindings), + GraphPattern::OrderBy { inner, expression } => { + let inner = Self::normalize_pattern(*inner, input_types); + let inner_types = infer_graph_pattern_types(&inner, input_types.clone()); + GraphPattern::order_by( + inner, + expression + .into_iter() + .map(|e| match e { + OrderExpression::Asc(e) => { + OrderExpression::Asc(Self::normalize_expression(e, &inner_types)) + } + OrderExpression::Desc(e) => { + OrderExpression::Desc(Self::normalize_expression(e, &inner_types)) + } + }) + .collect(), + ) + } + GraphPattern::Project { inner, variables } => { + GraphPattern::project(Self::normalize_pattern(*inner, input_types), variables) + } + GraphPattern::Distinct { inner } => { + GraphPattern::distinct(Self::normalize_pattern(*inner, input_types)) + } + GraphPattern::Reduced { inner } => { + GraphPattern::reduced(Self::normalize_pattern(*inner, input_types)) + } + GraphPattern::Slice { + inner, + start, + length, + } => GraphPattern::slice(Self::normalize_pattern(*inner, input_types), start, length), + GraphPattern::Group { + inner, + variables, + aggregates, + } => { + // TODO: min, max and sample don't care about DISTINCT + GraphPattern::group( + Self::normalize_pattern(*inner, input_types), + variables, + aggregates, + ) + } + GraphPattern::Service { + name, + inner, + silent, + } => GraphPattern::service(Self::normalize_pattern(*inner, input_types), name, silent), + } + } + + fn normalize_expression(expression: Expression, types: &VariableTypes) -> Expression { + match expression { + Expression::NamedNode(node) => node.into(), + Expression::Literal(literal) => literal.into(), + Expression::Variable(variable) => variable.into(), + Expression::Or(inner) => Expression::or_all( + inner + .into_iter() + .map(|e| Self::normalize_expression(e, types)), + ), + Expression::And(inner) => Expression::and_all( + inner + .into_iter() + .map(|e| Self::normalize_expression(e, types)), + ), + Expression::Equal(left, right) => { + let left = Self::normalize_expression(*left, types); + let left_types = infer_expression_type(&left, types); + let right = Self::normalize_expression(*right, types); + let right_types = infer_expression_type(&right, types); + #[allow(unused_mut)] + let mut must_use_equal = left_types.literal && right_types.literal; + #[cfg(feature = "rdf-star")] + { + must_use_equal = must_use_equal || left_types.triple && right_types.triple; + } + if must_use_equal { + Expression::equal(left, right) + } else { + Expression::same_term(left, right) + } + } + Expression::SameTerm(left, right) => Expression::same_term( + Self::normalize_expression(*left, types), + Self::normalize_expression(*right, types), + ), + Expression::Greater(left, right) => Expression::greater( + Self::normalize_expression(*left, types), + Self::normalize_expression(*right, types), + ), + Expression::GreaterOrEqual(left, right) => Expression::greater_or_equal( + Self::normalize_expression(*left, types), + Self::normalize_expression(*right, types), + ), + Expression::Less(left, right) => Expression::less( + Self::normalize_expression(*left, types), + Self::normalize_expression(*right, types), + ), + Expression::LessOrEqual(left, right) => Expression::less_or_equal( + Self::normalize_expression(*left, types), + Self::normalize_expression(*right, types), + ), + Expression::Add(left, right) => { + Self::normalize_expression(*left, types) + Self::normalize_expression(*right, types) + } + Expression::Subtract(left, right) => { + Self::normalize_expression(*left, types) - Self::normalize_expression(*right, types) + } + Expression::Multiply(left, right) => { + Self::normalize_expression(*left, types) * Self::normalize_expression(*right, types) + } + Expression::Divide(left, right) => { + Self::normalize_expression(*left, types) / Self::normalize_expression(*right, types) + } + Expression::UnaryPlus(inner) => { + Expression::unary_plus(Self::normalize_expression(*inner, types)) + } + Expression::UnaryMinus(inner) => -Self::normalize_expression(*inner, types), + Expression::Not(inner) => !Self::normalize_expression(*inner, types), + Expression::Exists(inner) => Expression::exists(Self::normalize_pattern(*inner, types)), + Expression::Bound(variable) => { + let t = types.get(&variable); + if !t.undef { + true.into() + } else if t == VariableType::UNDEF { + false.into() + } else { + Expression::Bound(variable) + } + } + Expression::If(cond, then, els) => Expression::if_cond( + Self::normalize_expression(*cond, types), + Self::normalize_expression(*then, types), + Self::normalize_expression(*els, types), + ), + Expression::Coalesce(inners) => Expression::coalesce( + inners + .into_iter() + .map(|e| Self::normalize_expression(e, types)) + .collect(), + ), + Expression::FunctionCall(name, args) => Expression::call( + name, + args.into_iter() + .map(|e| Self::normalize_expression(e, types)) + .collect(), + ), + } + } + + fn push_filters( + pattern: GraphPattern, + mut filters: Vec<Expression>, + input_types: &VariableTypes, + ) -> GraphPattern { + match pattern { + GraphPattern::QuadPattern { .. } + | GraphPattern::Path { .. } + | GraphPattern::Values { .. } => { + GraphPattern::filter(pattern, Expression::and_all(filters)) + } + GraphPattern::Join { + left, + right, + algorithm, + } => { + let left_types = infer_graph_pattern_types(&left, input_types.clone()); + let right_types = infer_graph_pattern_types(&right, input_types.clone()); + let mut left_filters = Vec::new(); + let mut right_filters = Vec::new(); + let mut final_filters = Vec::new(); + for filter in filters { + let push_left = are_all_expression_variables_bound(&filter, &left_types); + let push_right = are_all_expression_variables_bound(&filter, &right_types); + if push_left { + if push_right { + left_filters.push(filter.clone()); + right_filters.push(filter); + } else { + left_filters.push(filter); + } + } else if push_right { + right_filters.push(filter); + } else { + final_filters.push(filter); + } + } + GraphPattern::filter( + GraphPattern::join( + Self::push_filters(*left, left_filters, input_types), + Self::push_filters(*right, right_filters, input_types), + algorithm, + ), + Expression::and_all(final_filters), + ) + } + #[cfg(feature = "sep-0006")] + GraphPattern::Lateral { left, right } => { + let left_types = infer_graph_pattern_types(&left, input_types.clone()); + let mut left_filters = Vec::new(); + let mut right_filters = Vec::new(); + for filter in filters { + let push_left = are_all_expression_variables_bound(&filter, &left_types); + if push_left { + left_filters.push(filter); + } else { + right_filters.push(filter); + } + } + let left = Self::push_filters(*left, left_filters, input_types); + let right = Self::push_filters(*right, right_filters, &left_types); + if let GraphPattern::Filter { + inner: right, + expression, + } = right + { + // We prefer to have filter out of the lateral rather than inside the right part + GraphPattern::filter(GraphPattern::lateral(left, *right), expression) + } else { + GraphPattern::lateral(left, right) + } + } + GraphPattern::LeftJoin { + left, + right, + expression, + algorithm, + } => { + let left_types = infer_graph_pattern_types(&left, input_types.clone()); + let right_types = infer_graph_pattern_types(&right, input_types.clone()); + let mut left_filters = Vec::new(); + let mut right_filters = Vec::new(); + let mut final_filters = Vec::new(); + for filter in filters { + let push_left = are_all_expression_variables_bound(&filter, &left_types); + if push_left { + left_filters.push(filter); + } else { + final_filters.push(filter); + } + } + let expression = if expression.effective_boolean_value().is_none() + && (are_all_expression_variables_bound(&expression, &right_types) + || are_no_expression_variables_bound(&expression, &left_types)) + { + right_filters.push(expression); + true.into() + } else { + expression + }; + GraphPattern::filter( + GraphPattern::left_join( + Self::push_filters(*left, left_filters, input_types), + Self::push_filters(*right, right_filters, input_types), + expression, + algorithm, + ), + Expression::and_all(final_filters), + ) + } + GraphPattern::Minus { + left, + right, + algorithm, + } => GraphPattern::minus( + Self::push_filters(*left, filters, input_types), + Self::push_filters(*right, Vec::new(), input_types), + algorithm, + ), + GraphPattern::Extend { + inner, + expression, + variable, + } => { + // TODO: handle the case where the filter overrides an expression variable (should not happen in SPARQL but allowed in the algebra) + let mut inner_filters = Vec::new(); + let mut final_filters = Vec::new(); + for filter in filters { + let extend_variable_used = + filter.used_variables().into_iter().any(|v| *v == variable); + if extend_variable_used { + final_filters.push(filter); + } else { + inner_filters.push(filter); + } + } + GraphPattern::filter( + GraphPattern::extend( + Self::push_filters(*inner, inner_filters, input_types), + variable, + expression, + ), + Expression::and_all(final_filters), + ) + } + GraphPattern::Filter { inner, expression } => { + if let Expression::And(expressions) = expression { + filters.extend(expressions) + } else { + filters.push(expression) + }; + Self::push_filters(*inner, filters, input_types) + } + GraphPattern::Union { inner } => GraphPattern::union_all( + inner + .into_iter() + .map(|c| Self::push_filters(c, filters.clone(), input_types)), + ), + GraphPattern::Slice { + inner, + start, + length, + } => GraphPattern::filter( + GraphPattern::slice( + Self::push_filters(*inner, Vec::new(), input_types), + start, + length, + ), + Expression::and_all(filters), + ), + GraphPattern::Distinct { inner } => { + GraphPattern::distinct(Self::push_filters(*inner, filters, input_types)) + } + GraphPattern::Reduced { inner } => { + GraphPattern::reduced(Self::push_filters(*inner, filters, input_types)) + } + GraphPattern::Project { inner, variables } => { + GraphPattern::project(Self::push_filters(*inner, filters, input_types), variables) + } + GraphPattern::OrderBy { inner, expression } => { + GraphPattern::order_by(Self::push_filters(*inner, filters, input_types), expression) + } + GraphPattern::Service { + inner, + name, + silent, + } => GraphPattern::service( + Self::push_filters(*inner, filters, input_types), + name, + silent, + ), + GraphPattern::Group { + inner, + variables, + aggregates, + } => GraphPattern::filter( + GraphPattern::group( + Self::push_filters(*inner, Vec::new(), input_types), + variables, + aggregates, + ), + Expression::and_all(filters), + ), + } + } + + fn reorder_joins(pattern: GraphPattern, input_types: &VariableTypes) -> GraphPattern { + match pattern { + GraphPattern::QuadPattern { .. } + | GraphPattern::Path { .. } + | GraphPattern::Values { .. } => pattern, + GraphPattern::Join { left, right, .. } => { + // We flatten the join operation + let mut to_reorder = Vec::new(); + let mut todo = vec![*right, *left]; + while let Some(e) = todo.pop() { + if let GraphPattern::Join { left, right, .. } = e { + todo.push(*right); + todo.push(*left); + } else { + to_reorder.push(e); + } + } + + // We do first type inference + let to_reorder_types = to_reorder + .iter() + .map(|p| infer_graph_pattern_types(p, input_types.clone())) + .collect::<Vec<_>>(); + + // We do greedy join reordering + let mut output_cartesian_product_joins = Vec::new(); + let mut not_yet_reordered_ids = vec![true; to_reorder.len()]; + // We look for the next connected component to reorder and pick the smallest element + while let Some(next_entry_id) = not_yet_reordered_ids + .iter() + .enumerate() + .filter(|(_, v)| **v) + .map(|(i, _)| i) + .min_by_key(|i| estimate_graph_pattern_size(&to_reorder[*i], input_types)) + { + not_yet_reordered_ids[next_entry_id] = false; // It's now done + let mut output = to_reorder[next_entry_id].clone(); + let mut output_types = to_reorder_types[next_entry_id].clone(); + // We look for an other child to join with that does not blow up the join cost + while let Some(next_id) = not_yet_reordered_ids + .iter() + .enumerate() + .filter(|(_, v)| **v) + .map(|(i, _)| i) + .filter(|i| { + has_common_variables(&output_types, &to_reorder_types[*i], input_types) + }) + .min_by_key(|i| { + // Estimation of the join cost + if cfg!(feature = "sep-0006") + && is_fit_for_for_loop_join( + &to_reorder[*i], + input_types, + &output_types, + ) + { + estimate_lateral_cost( + &output, + &output_types, + &to_reorder[*i], + input_types, + ) + } else { + estimate_join_cost( + &output, + &to_reorder[*i], + &JoinAlgorithm::HashBuildLeftProbeRight { + keys: join_key_variables( + &output_types, + &to_reorder_types[*i], + input_types, + ), + }, + input_types, + ) + } + }) + { + not_yet_reordered_ids[next_id] = false; // It's now done + let next = to_reorder[next_id].clone(); + #[cfg(feature = "sep-0006")] + { + output = if is_fit_for_for_loop_join(&next, input_types, &output_types) + { + GraphPattern::lateral(output, next) + } else { + GraphPattern::join( + output, + next, + JoinAlgorithm::HashBuildLeftProbeRight { + keys: join_key_variables( + &output_types, + &to_reorder_types[next_id], + input_types, + ), + }, + ) + }; + } + #[cfg(not(feature = "sep-0006"))] + { + output = GraphPattern::join( + output, + next, + JoinAlgorithm::HashBuildLeftProbeRight { + keys: join_key_variables( + &output_types, + &to_reorder_types[next_id], + input_types, + ), + }, + ); + } + output_types.intersect_with(to_reorder_types[next_id].clone()); + } + output_cartesian_product_joins.push(output); + } + output_cartesian_product_joins + .into_iter() + .reduce(|left, right| { + let keys = join_key_variables( + &infer_graph_pattern_types(&left, input_types.clone()), + &infer_graph_pattern_types(&right, input_types.clone()), + input_types, + ); + if estimate_graph_pattern_size(&left, input_types) + <= estimate_graph_pattern_size(&right, input_types) + { + GraphPattern::join( + left, + right, + JoinAlgorithm::HashBuildLeftProbeRight { keys }, + ) + } else { + GraphPattern::join( + right, + left, + JoinAlgorithm::HashBuildLeftProbeRight { keys }, + ) + } + }) + .unwrap() + } + #[cfg(feature = "sep-0006")] + GraphPattern::Lateral { left, right } => { + let left_types = infer_graph_pattern_types(&left, input_types.clone()); + GraphPattern::lateral( + Self::reorder_joins(*left, input_types), + Self::reorder_joins(*right, &left_types), + ) + } + GraphPattern::LeftJoin { + left, + right, + expression, + .. + } => { + let left = Self::reorder_joins(*left, input_types); + let left_types = infer_graph_pattern_types(&left, input_types.clone()); + let right = Self::reorder_joins(*right, input_types); + let right_types = infer_graph_pattern_types(&right, input_types.clone()); + #[cfg(feature = "sep-0006")] + { + if is_fit_for_for_loop_join(&right, input_types, &left_types) + && has_common_variables(&left_types, &right_types, input_types) + { + return GraphPattern::lateral( + left, + GraphPattern::left_join( + GraphPattern::empty_singleton(), + right, + expression, + LeftJoinAlgorithm::HashBuildRightProbeLeft { keys: Vec::new() }, + ), + ); + } + } + GraphPattern::left_join( + left, + right, + expression, + LeftJoinAlgorithm::HashBuildRightProbeLeft { + keys: join_key_variables(&left_types, &right_types, input_types), + }, + ) + } + GraphPattern::Minus { left, right, .. } => { + let left = Self::reorder_joins(*left, input_types); + let left_types = infer_graph_pattern_types(&left, input_types.clone()); + let right = Self::reorder_joins(*right, input_types); + let right_types = infer_graph_pattern_types(&right, input_types.clone()); + GraphPattern::minus( + left, + right, + MinusAlgorithm::HashBuildRightProbeLeft { + keys: join_key_variables(&left_types, &right_types, input_types), + }, + ) + } + GraphPattern::Extend { + inner, + expression, + variable, + } => GraphPattern::extend( + Self::reorder_joins(*inner, input_types), + variable, + expression, + ), + GraphPattern::Filter { inner, expression } => { + GraphPattern::filter(Self::reorder_joins(*inner, input_types), expression) + } + GraphPattern::Union { inner } => GraphPattern::union_all( + inner + .into_iter() + .map(|c| Self::reorder_joins(c, input_types)), + ), + GraphPattern::Slice { + inner, + start, + length, + } => GraphPattern::slice(Self::reorder_joins(*inner, input_types), start, length), + GraphPattern::Distinct { inner } => { + GraphPattern::distinct(Self::reorder_joins(*inner, input_types)) + } + GraphPattern::Reduced { inner } => { + GraphPattern::reduced(Self::reorder_joins(*inner, input_types)) + } + GraphPattern::Project { inner, variables } => { + GraphPattern::project(Self::reorder_joins(*inner, input_types), variables) + } + GraphPattern::OrderBy { inner, expression } => { + GraphPattern::order_by(Self::reorder_joins(*inner, input_types), expression) + } + service @ GraphPattern::Service { .. } => { + // We don't do join reordering inside of SERVICE calls, we don't know about cardinalities + service + } + GraphPattern::Group { + inner, + variables, + aggregates, + } => GraphPattern::group( + Self::reorder_joins(*inner, input_types), + variables, + aggregates, + ), + } + } +} + +fn is_fit_for_for_loop_join( + pattern: &GraphPattern, + global_input_types: &VariableTypes, + entry_types: &VariableTypes, +) -> bool { + // TODO: think more about it + match pattern { + GraphPattern::Values { .. } + | GraphPattern::QuadPattern { .. } + | GraphPattern::Path { .. } => true, + #[cfg(feature = "sep-0006")] + GraphPattern::Lateral { left, right } => { + is_fit_for_for_loop_join(left, global_input_types, entry_types) + && is_fit_for_for_loop_join(right, global_input_types, entry_types) + } + GraphPattern::LeftJoin { + left, + right, + expression, + .. + } => { + if !is_fit_for_for_loop_join(left, global_input_types, entry_types) { + return false; + } + + // It is not ok to transform into for loop join if right binds a variable also bound by the entry part of the for loop join + let mut left_types = infer_graph_pattern_types(left, global_input_types.clone()); + let right_types = infer_graph_pattern_types(right, global_input_types.clone()); + if right_types.iter().any(|(variable, t)| { + *t != VariableType::UNDEF + && left_types.get(variable).undef + && entry_types.get(variable) != VariableType::UNDEF + }) { + return false; + } + + // We don't forget the final expression + left_types.intersect_with(right_types); + is_expression_fit_for_for_loop_join(expression, &left_types, entry_types) + } + GraphPattern::Union { inner } => inner + .iter() + .all(|i| is_fit_for_for_loop_join(i, global_input_types, entry_types)), + GraphPattern::Filter { inner, expression } => { + is_fit_for_for_loop_join(inner, global_input_types, entry_types) + && is_expression_fit_for_for_loop_join( + expression, + &infer_graph_pattern_types(inner, global_input_types.clone()), + entry_types, + ) + } + GraphPattern::Extend { + inner, + expression, + variable, + } => { + is_fit_for_for_loop_join(inner, global_input_types, entry_types) + && entry_types.get(variable) == VariableType::UNDEF + && is_expression_fit_for_for_loop_join( + expression, + &infer_graph_pattern_types(inner, global_input_types.clone()), + entry_types, + ) + } + GraphPattern::Join { .. } + | GraphPattern::Minus { .. } + | GraphPattern::Service { .. } + | GraphPattern::OrderBy { .. } + | GraphPattern::Distinct { .. } + | GraphPattern::Reduced { .. } + | GraphPattern::Slice { .. } + | GraphPattern::Project { .. } + | GraphPattern::Group { .. } => false, + } +} + +fn are_all_expression_variables_bound( + expression: &Expression, + variable_types: &VariableTypes, +) -> bool { + expression + .used_variables() + .into_iter() + .all(|v| !variable_types.get(v).undef) +} + +fn are_no_expression_variables_bound( + expression: &Expression, + variable_types: &VariableTypes, +) -> bool { + expression + .used_variables() + .into_iter() + .all(|v| variable_types.get(v) == VariableType::UNDEF) +} + +fn is_expression_fit_for_for_loop_join( + expression: &Expression, + input_types: &VariableTypes, + entry_types: &VariableTypes, +) -> bool { + match expression { + Expression::NamedNode(_) | Expression::Literal(_) => true, + Expression::Variable(v) | Expression::Bound(v) => { + !input_types.get(v).undef || entry_types.get(v) == VariableType::UNDEF + } + Expression::Or(inner) + | Expression::And(inner) + | Expression::Coalesce(inner) + | Expression::FunctionCall(_, inner) => inner + .iter() + .all(|e| is_expression_fit_for_for_loop_join(e, input_types, entry_types)), + Expression::Equal(a, b) + | Expression::SameTerm(a, b) + | Expression::Greater(a, b) + | Expression::GreaterOrEqual(a, b) + | Expression::Less(a, b) + | Expression::LessOrEqual(a, b) + | Expression::Add(a, b) + | Expression::Subtract(a, b) + | Expression::Multiply(a, b) + | Expression::Divide(a, b) => { + is_expression_fit_for_for_loop_join(a, input_types, entry_types) + && is_expression_fit_for_for_loop_join(b, input_types, entry_types) + } + Expression::UnaryPlus(e) | Expression::UnaryMinus(e) | Expression::Not(e) => { + is_expression_fit_for_for_loop_join(e, input_types, entry_types) + } + Expression::If(a, b, c) => { + is_expression_fit_for_for_loop_join(a, input_types, entry_types) + && is_expression_fit_for_for_loop_join(b, input_types, entry_types) + && is_expression_fit_for_for_loop_join(c, input_types, entry_types) + } + Expression::Exists(inner) => is_fit_for_for_loop_join(inner, input_types, entry_types), + } +} + +fn has_common_variables( + left: &VariableTypes, + right: &VariableTypes, + input_types: &VariableTypes, +) -> bool { + // TODO: we should be smart and count as shared variables FILTER(?a = ?b) + left.iter().any(|(variable, left_type)| { + !left_type.undef && !right.get(variable).undef && input_types.get(variable).undef + }) +} + +fn join_key_variables( + left: &VariableTypes, + right: &VariableTypes, + input_types: &VariableTypes, +) -> Vec<Variable> { + left.iter() + .filter(|(variable, left_type)| { + !left_type.undef && !right.get(variable).undef && input_types.get(variable).undef + }) + .map(|(variable, _)| variable.clone()) + .collect() +} + +fn estimate_graph_pattern_size(pattern: &GraphPattern, input_types: &VariableTypes) -> usize { + match pattern { + GraphPattern::Values { bindings, .. } => bindings.len(), + GraphPattern::QuadPattern { + subject, + predicate, + object, + .. + } => estimate_triple_pattern_size( + is_term_pattern_bound(subject, input_types), + is_named_node_pattern_bound(predicate, input_types), + is_term_pattern_bound(object, input_types), + ), + GraphPattern::Path { + subject, + path, + object, + .. + } => estimate_path_size( + is_term_pattern_bound(subject, input_types), + path, + is_term_pattern_bound(object, input_types), + ), + GraphPattern::Join { + left, + right, + algorithm, + } => estimate_join_cost(left, right, algorithm, input_types), + GraphPattern::LeftJoin { + left, + right, + algorithm, + .. + } => match algorithm { + LeftJoinAlgorithm::HashBuildRightProbeLeft { keys } => { + let left_size = estimate_graph_pattern_size(left, input_types); + max( + left_size, + left_size + .saturating_mul(estimate_graph_pattern_size( + right, + &infer_graph_pattern_types(right, input_types.clone()), + )) + .saturating_div(1_000_usize.saturating_pow(keys.len().try_into().unwrap())), + ) + } + }, + #[cfg(feature = "sep-0006")] + GraphPattern::Lateral { left, right } => estimate_lateral_cost( + left, + &infer_graph_pattern_types(left, input_types.clone()), + right, + input_types, + ), + GraphPattern::Union { inner } => inner + .iter() + .map(|inner| estimate_graph_pattern_size(inner, input_types)) + .fold(0, usize::saturating_add), + GraphPattern::Minus { left, .. } => estimate_graph_pattern_size(left, input_types), + GraphPattern::Filter { inner, .. } + | GraphPattern::Extend { inner, .. } + | GraphPattern::OrderBy { inner, .. } + | GraphPattern::Project { inner, .. } + | GraphPattern::Distinct { inner, .. } + | GraphPattern::Reduced { inner, .. } + | GraphPattern::Group { inner, .. } + | GraphPattern::Service { inner, .. } => estimate_graph_pattern_size(inner, input_types), + GraphPattern::Slice { + inner, + start, + length, + } => { + let inner = estimate_graph_pattern_size(inner, input_types); + if let Some(length) = length { + min(inner, *length - *start) + } else { + inner + } + } + } +} + +fn estimate_join_cost( + left: &GraphPattern, + right: &GraphPattern, + algorithm: &JoinAlgorithm, + input_types: &VariableTypes, +) -> usize { + match algorithm { + JoinAlgorithm::HashBuildLeftProbeRight { keys } => { + estimate_graph_pattern_size(left, input_types) + .saturating_mul(estimate_graph_pattern_size(right, input_types)) + .saturating_div(1_000_usize.saturating_pow(keys.len().try_into().unwrap())) + } + } +} +fn estimate_lateral_cost( + left: &GraphPattern, + left_types: &VariableTypes, + right: &GraphPattern, + input_types: &VariableTypes, +) -> usize { + estimate_graph_pattern_size(left, input_types) + .saturating_mul(estimate_graph_pattern_size(right, left_types)) +} + +fn estimate_triple_pattern_size( + subject_bound: bool, + predicate_bound: bool, + object_bound: bool, +) -> usize { + match (subject_bound, predicate_bound, object_bound) { + (true, true, true) => 1, + (true, true, false) => 10, + (true, false, true) => 2, + (false, true, true) => 10_000, + (true, false, false) => 100, + (false, false, false) => 1_000_000_000, + (false, true, false) => 1_000_000, + (false, false, true) => 100_000, + } +} + +fn estimate_path_size(start_bound: bool, path: &PropertyPathExpression, end_bound: bool) -> usize { + match path { + PropertyPathExpression::NamedNode(_) => { + estimate_triple_pattern_size(start_bound, true, end_bound) + } + PropertyPathExpression::Reverse(p) => estimate_path_size(end_bound, p, start_bound), + PropertyPathExpression::Sequence(a, b) => { + // We do a for loop join in the best direction + min( + estimate_path_size(start_bound, a, false) + .saturating_mul(estimate_path_size(true, b, end_bound)), + estimate_path_size(start_bound, a, true) + .saturating_mul(estimate_path_size(false, b, end_bound)), + ) + } + PropertyPathExpression::Alternative(a, b) => estimate_path_size(start_bound, a, end_bound) + .saturating_add(estimate_path_size(start_bound, b, end_bound)), + PropertyPathExpression::ZeroOrMore(p) => { + if start_bound && end_bound { + 1 + } else if start_bound || end_bound { + estimate_path_size(start_bound, p, end_bound).saturating_mul(1000) + } else { + 1_000_000_000 + } + } + PropertyPathExpression::OneOrMore(p) => { + if start_bound && end_bound { + 1 + } else { + estimate_path_size(start_bound, p, end_bound).saturating_mul(1000) + } + } + PropertyPathExpression::ZeroOrOne(p) => { + if start_bound && end_bound { + 1 + } else if start_bound || end_bound { + estimate_path_size(start_bound, p, end_bound) + } else { + 1_000_000_000 + } + } + PropertyPathExpression::NegatedPropertySet(_) => { + estimate_triple_pattern_size(start_bound, false, end_bound) + } + } +} + +fn is_term_pattern_bound(pattern: &GroundTermPattern, input_types: &VariableTypes) -> bool { + match pattern { + GroundTermPattern::NamedNode(_) | GroundTermPattern::Literal(_) => true, + GroundTermPattern::Variable(v) => !input_types.get(v).undef, + #[cfg(feature = "rdf-star")] + GroundTermPattern::Triple(t) => { + is_term_pattern_bound(&t.subject, input_types) + && is_named_node_pattern_bound(&t.predicate, input_types) + && is_term_pattern_bound(&t.object, input_types) + } + } +} + +fn is_named_node_pattern_bound(pattern: &NamedNodePattern, input_types: &VariableTypes) -> bool { + match pattern { + NamedNodePattern::NamedNode(_) => true, + NamedNodePattern::Variable(v) => !input_types.get(v).undef, + } +} diff --git a/ng-oxigraph/src/sparopt/type_inference.rs b/ng-oxigraph/src/sparopt/type_inference.rs new file mode 100644 index 0000000..7af0c68 --- /dev/null +++ b/ng-oxigraph/src/sparopt/type_inference.rs @@ -0,0 +1,462 @@ +use crate::oxrdf::Variable; +use crate::spargebra::algebra::Function; +use crate::spargebra::term::{GroundTerm, GroundTermPattern, NamedNodePattern}; +use crate::sparopt::algebra::{Expression, GraphPattern}; +use std::collections::HashMap; +use std::ops::{BitAnd, BitOr}; + +pub fn infer_graph_pattern_types( + pattern: &GraphPattern, + mut types: VariableTypes, +) -> VariableTypes { + match pattern { + GraphPattern::QuadPattern { + subject, + predicate, + object, + graph_name, + } => { + add_ground_term_pattern_types(subject, &mut types, false); + if let NamedNodePattern::Variable(v) = predicate { + types.intersect_variable_with(v.clone(), VariableType::NAMED_NODE) + } + add_ground_term_pattern_types(object, &mut types, true); + if let Some(NamedNodePattern::Variable(v)) = graph_name { + types.intersect_variable_with(v.clone(), VariableType::NAMED_NODE) + } + types + } + GraphPattern::Path { + subject, + object, + graph_name, + .. + } => { + add_ground_term_pattern_types(subject, &mut types, false); + add_ground_term_pattern_types(object, &mut types, true); + if let Some(NamedNodePattern::Variable(v)) = graph_name { + types.intersect_variable_with(v.clone(), VariableType::NAMED_NODE) + } + types + } + GraphPattern::Join { left, right, .. } => { + let mut output_types = infer_graph_pattern_types(left, types.clone()); + output_types.intersect_with(infer_graph_pattern_types(right, types)); + output_types + } + #[cfg(feature = "sep-0006")] + GraphPattern::Lateral { left, right } => { + infer_graph_pattern_types(right, infer_graph_pattern_types(left, types)) + } + GraphPattern::LeftJoin { left, right, .. } => { + let mut right_types = infer_graph_pattern_types(right, types.clone()); // TODO: expression + for t in right_types.inner.values_mut() { + t.undef = true; // Right might be unset + } + let mut output_types = infer_graph_pattern_types(left, types); + output_types.intersect_with(right_types); + output_types + } + GraphPattern::Minus { left, .. } => infer_graph_pattern_types(left, types), + GraphPattern::Union { inner } => inner + .iter() + .map(|inner| infer_graph_pattern_types(inner, types.clone())) + .reduce(|mut a, b| { + a.union_with(b); + a + }) + .unwrap_or_default(), + GraphPattern::Extend { + inner, + variable, + expression, + } => { + let mut types = infer_graph_pattern_types(inner, types); + types.intersect_variable_with( + variable.clone(), + infer_expression_type(expression, &types), + ); + types + } + GraphPattern::Filter { inner, .. } => infer_graph_pattern_types(inner, types), + GraphPattern::Project { inner, variables } => VariableTypes { + inner: infer_graph_pattern_types(inner, types) + .inner + .into_iter() + .filter(|(v, _)| variables.contains(v)) + .collect(), + }, + GraphPattern::Distinct { inner } + | GraphPattern::Reduced { inner } + | GraphPattern::OrderBy { inner, .. } + | GraphPattern::Slice { inner, .. } => infer_graph_pattern_types(inner, types), + GraphPattern::Group { + inner, + variables, + aggregates, + } => { + let types = infer_graph_pattern_types(inner, types); + VariableTypes { + inner: infer_graph_pattern_types(inner, types) + .inner + .into_iter() + .filter(|(v, _)| variables.contains(v)) + .chain( + aggregates + .iter() + .map(|(v, _)| (v.clone(), VariableType::ANY)), + ) //TODO: guess from aggregate + .collect(), + } + } + GraphPattern::Values { + variables, + bindings, + } => { + for (i, v) in variables.iter().enumerate() { + let mut t = VariableType::default(); + for binding in bindings { + match binding[i] { + Some(GroundTerm::NamedNode(_)) => t.named_node = true, + Some(GroundTerm::Literal(_)) => t.literal = true, + #[cfg(feature = "rdf-star")] + Some(GroundTerm::Triple(_)) => t.triple = true, + None => t.undef = true, + } + } + types.intersect_variable_with(v.clone(), t) + } + types + } + GraphPattern::Service { + name, + inner, + silent, + } => { + let parent_types = types.clone(); + let mut types = infer_graph_pattern_types(inner, types); + if let NamedNodePattern::Variable(v) = name { + types.intersect_variable_with(v.clone(), VariableType::NAMED_NODE) + } + if *silent { + // On failure, single empty solution + types.union_with(parent_types); + } + types + } + } +} + +fn add_ground_term_pattern_types( + pattern: &GroundTermPattern, + types: &mut VariableTypes, + is_object: bool, +) { + if let GroundTermPattern::Variable(v) = pattern { + types.intersect_variable_with( + v.clone(), + if is_object { + VariableType::TERM + } else { + VariableType::SUBJECT + }, + ) + } + #[cfg(feature = "rdf-star")] + if let GroundTermPattern::Triple(t) = pattern { + add_ground_term_pattern_types(&t.subject, types, false); + if let NamedNodePattern::Variable(v) = &t.predicate { + types.intersect_variable_with(v.clone(), VariableType::NAMED_NODE) + } + add_ground_term_pattern_types(&t.object, types, true); + } +} + +pub fn infer_expression_type(expression: &Expression, types: &VariableTypes) -> VariableType { + match expression { + Expression::NamedNode(_) => VariableType::NAMED_NODE, + Expression::Literal(_) | Expression::Exists(_) | Expression::Bound(_) => { + VariableType::LITERAL + } + Expression::Variable(v) => types.get(v), + Expression::FunctionCall(Function::Datatype | Function::Iri, _) => { + VariableType::NAMED_NODE | VariableType::UNDEF + } + #[cfg(feature = "rdf-star")] + Expression::FunctionCall(Function::Predicate, _) => { + VariableType::NAMED_NODE | VariableType::UNDEF + } + Expression::FunctionCall(Function::BNode, args) => { + if args.is_empty() { + VariableType::BLANK_NODE + } else { + VariableType::BLANK_NODE | VariableType::UNDEF + } + } + Expression::FunctionCall( + Function::Rand | Function::Now | Function::Uuid | Function::StrUuid, + _, + ) => VariableType::LITERAL, + Expression::Or(_) + | Expression::And(_) + | Expression::Equal(_, _) + | Expression::Greater(_, _) + | Expression::GreaterOrEqual(_, _) + | Expression::Less(_, _) + | Expression::LessOrEqual(_, _) + | Expression::Add(_, _) + | Expression::Subtract(_, _) + | Expression::Multiply(_, _) + | Expression::Divide(_, _) + | Expression::UnaryPlus(_) + | Expression::UnaryMinus(_) + | Expression::Not(_) + | Expression::FunctionCall( + Function::Str + | Function::Lang + | Function::LangMatches + | Function::Abs + | Function::Ceil + | Function::Floor + | Function::Round + | Function::Concat + | Function::SubStr + | Function::StrLen + | Function::Replace + | Function::UCase + | Function::LCase + | Function::EncodeForUri + | Function::Contains + | Function::StrStarts + | Function::StrEnds + | Function::StrBefore + | Function::StrAfter + | Function::Year + | Function::Month + | Function::Day + | Function::Hours + | Function::Minutes + | Function::Seconds + | Function::Timezone + | Function::Tz + | Function::Md5 + | Function::Sha1 + | Function::Sha256 + | Function::Sha384 + | Function::Sha512 + | Function::StrLang + | Function::StrDt + | Function::IsIri + | Function::IsBlank + | Function::IsLiteral + | Function::IsNumeric + | Function::Regex, + _, + ) => VariableType::LITERAL | VariableType::UNDEF, + #[cfg(feature = "sep-0002")] + Expression::FunctionCall(Function::Adjust, _) => { + VariableType::LITERAL | VariableType::UNDEF + } + #[cfg(feature = "rdf-star")] + Expression::FunctionCall(Function::IsTriple, _) => { + VariableType::LITERAL | VariableType::UNDEF + } + Expression::SameTerm(left, right) => { + if infer_expression_type(left, types).undef || infer_expression_type(right, types).undef + { + VariableType::LITERAL | VariableType::UNDEF + } else { + VariableType::LITERAL + } + } + Expression::If(_, then, els) => { + infer_expression_type(then, types) | infer_expression_type(els, types) + } + Expression::Coalesce(inner) => { + let mut t = VariableType::UNDEF; + for e in inner { + let new = infer_expression_type(e, types); + t = t | new; + if !new.undef { + t.undef = false; + return t; + } + } + t + } + #[cfg(feature = "rdf-star")] + Expression::FunctionCall(Function::Triple, _) => VariableType::TRIPLE | VariableType::UNDEF, + #[cfg(feature = "rdf-star")] + Expression::FunctionCall(Function::Subject, _) => { + VariableType::SUBJECT | VariableType::UNDEF + } + #[cfg(feature = "rdf-star")] + Expression::FunctionCall(Function::Object, _) => VariableType::TERM | VariableType::UNDEF, + Expression::FunctionCall(Function::Custom(_), _) => VariableType::ANY, + } +} + +#[derive(Default, Clone, Debug)] +pub struct VariableTypes { + inner: HashMap<Variable, VariableType>, +} + +impl VariableTypes { + pub fn get(&self, variable: &Variable) -> VariableType { + self.inner + .get(variable) + .copied() + .unwrap_or(VariableType::UNDEF) + } + + pub fn iter(&self) -> impl Iterator<Item = (&Variable, &VariableType)> { + self.inner.iter() + } + + pub fn intersect_with(&mut self, other: Self) { + for (v, t) in other.inner { + self.intersect_variable_with(v, t); + } + } + + pub fn union_with(&mut self, other: Self) { + for (v, t) in &mut self.inner { + if other.get(v).undef { + t.undef = true; // Might be undefined + } + } + for (v, mut t) in other.inner { + self.inner + .entry(v) + .and_modify(|ex| *ex = *ex | t) + .or_insert({ + t.undef = true; + t + }); + } + } + + fn intersect_variable_with(&mut self, variable: Variable, t: VariableType) { + let t = self.get(&variable) & t; + if t != VariableType::UNDEF { + self.inner.insert(variable, t); + } + } +} + +#[allow(clippy::struct_excessive_bools)] +#[derive(Clone, Copy, Eq, PartialEq, Debug, Default)] +pub struct VariableType { + pub undef: bool, + pub named_node: bool, + pub blank_node: bool, + pub literal: bool, + #[cfg(feature = "rdf-star")] + pub triple: bool, +} + +impl VariableType { + const ANY: Self = Self { + undef: true, + named_node: true, + blank_node: true, + literal: true, + #[cfg(feature = "rdf-star")] + triple: true, + }; + const BLANK_NODE: Self = Self { + undef: false, + named_node: false, + blank_node: true, + literal: false, + #[cfg(feature = "rdf-star")] + triple: false, + }; + const LITERAL: Self = Self { + undef: false, + named_node: false, + blank_node: false, + literal: true, + #[cfg(feature = "rdf-star")] + triple: false, + }; + const NAMED_NODE: Self = Self { + undef: false, + named_node: true, + blank_node: false, + literal: false, + #[cfg(feature = "rdf-star")] + triple: false, + }; + const SUBJECT: Self = Self { + undef: false, + named_node: true, + blank_node: true, + literal: false, + #[cfg(feature = "rdf-star")] + triple: true, + }; + const TERM: Self = Self { + undef: false, + named_node: true, + blank_node: true, + literal: true, + #[cfg(feature = "rdf-star")] + triple: true, + }; + #[cfg(feature = "rdf-star")] + const TRIPLE: Self = Self { + undef: false, + named_node: false, + blank_node: false, + literal: false, + triple: true, + }; + pub const UNDEF: Self = Self { + undef: true, + named_node: false, + blank_node: false, + literal: false, + #[cfg(feature = "rdf-star")] + triple: false, + }; +} + +impl BitOr for VariableType { + type Output = Self; + + fn bitor(self, other: Self) -> Self { + Self { + undef: self.undef || other.undef, + named_node: self.named_node || other.named_node, + blank_node: self.blank_node || other.blank_node, + literal: self.literal || other.literal, + #[cfg(feature = "rdf-star")] + triple: self.triple || other.triple, + } + } +} + +impl BitAnd for VariableType { + type Output = Self; + + #[allow(clippy::nonminimal_bool)] + fn bitand(self, other: Self) -> Self { + Self { + undef: self.undef && other.undef, + named_node: self.named_node && other.named_node + || (self.undef && other.named_node) + || (self.named_node && other.undef), + blank_node: self.blank_node && other.blank_node + || (self.undef && other.blank_node) + || (self.blank_node && other.undef), + literal: self.literal && other.literal + || (self.undef && other.literal) + || (self.literal && other.undef), + #[cfg(feature = "rdf-star")] + triple: self.triple && other.triple + || (self.undef && other.triple) + || (self.triple && other.undef), + } + } +} diff --git a/ng-oxigraph/tests/store.rs b/ng-oxigraph/tests/store.rs index 198e5f7..1477b6e 100644 --- a/ng-oxigraph/tests/store.rs +++ b/ng-oxigraph/tests/store.rs @@ -1,10 +1,10 @@ #![cfg(test)] #![allow(clippy::panic_in_result_fn)] -use ng_oxigraph::io::RdfFormat; -use ng_oxigraph::model::vocab::{rdf, xsd}; -use ng_oxigraph::model::*; -use ng_oxigraph::store::Store; +use ng_oxigraph::oxigraph::io::RdfFormat; +use ng_oxigraph::oxigraph::model::vocab::{rdf, xsd}; +use ng_oxigraph::oxigraph::model::*; +use ng_oxigraph::oxigraph::store::Store; #[cfg(all(not(target_family = "wasm")))] use rand::random; #[cfg(all(not(target_family = "wasm")))] diff --git a/ng-storage-rocksdb/Cargo.toml b/ng-storage-rocksdb/Cargo.toml index 0ffd87a..415181b 100644 --- a/ng-storage-rocksdb/Cargo.toml +++ b/ng-storage-rocksdb/Cargo.toml @@ -16,7 +16,7 @@ rust-version.workspace = true serde_bare = "0.5.0" ng-repo = { path = "../ng-repo", version = "0.1.0" } -[target.'cfg(not(target_arch = "wasm32"))'.dependencies.rocksdb] +[target.'cfg(not(target_arch = "wasm32"))'.dependencies.ng-rocksdb] git = "https://git.nextgraph.org/NextGraph/rust-rocksdb.git" branch = "master" features = [ ] diff --git a/ng-storage-rocksdb/src/block_storage.rs b/ng-storage-rocksdb/src/block_storage.rs index e6795de..33d394e 100644 --- a/ng-storage-rocksdb/src/block_storage.rs +++ b/ng-storage-rocksdb/src/block_storage.rs @@ -11,7 +11,7 @@ use std::path::Path; use std::thread::available_parallelism; #[allow(unused_imports)] -use rocksdb::{ +use ng_rocksdb::{ BlockBasedOptions, ColumnFamily, ColumnFamilyDescriptor, DBCompressionType, Direction, Env, ErrorKind, IteratorMode, Options, TransactionDB, TransactionDBOptions, }; diff --git a/ng-storage-rocksdb/src/kcv_storage.rs b/ng-storage-rocksdb/src/kcv_storage.rs index 0b42e9d..7d9a253 100644 --- a/ng-storage-rocksdb/src/kcv_storage.rs +++ b/ng-storage-rocksdb/src/kcv_storage.rs @@ -12,37 +12,37 @@ use std::path::Path; use std::path::PathBuf; use std::thread::available_parallelism; -use rocksdb::BlockBasedOptions; -use rocksdb::Cache; -use rocksdb::DBIteratorWithThreadMode; +use ng_rocksdb::BlockBasedOptions; +use ng_rocksdb::Cache; +use ng_rocksdb::DBIteratorWithThreadMode; use ng_repo::errors::*; use ng_repo::kcv_storage::*; use ng_repo::log::*; #[allow(unused_imports)] -use rocksdb::{ +use ng_rocksdb::{ ColumnFamily, ColumnFamilyDescriptor, Direction, Env, ErrorKind, IteratorMode, Options, TransactionDB, TransactionDBOptions, }; pub struct RocksdbTransaction<'a> { store: &'a RocksDbKCVStorage, - tx: Option<rocksdb::Transaction<'a, TransactionDB>>, + tx: Option<ng_rocksdb::Transaction<'a, TransactionDB>>, } impl<'a> RocksdbTransaction<'a> { fn commit(&mut self) { self.tx.take().unwrap().commit().unwrap(); } - fn tx(&self) -> &rocksdb::Transaction<'a, TransactionDB> { + fn tx(&self) -> &ng_rocksdb::Transaction<'a, TransactionDB> { self.tx.as_ref().unwrap() } fn get_iterator( &self, property_start: &[u8], family: &Option<String>, - ) -> Result<DBIteratorWithThreadMode<impl rocksdb::DBAccess + 'a>, StorageError> { + ) -> Result<DBIteratorWithThreadMode<impl ng_rocksdb::DBAccess + 'a>, StorageError> { Ok(match family { Some(cf) => self.tx().iterator_cf( self.store @@ -542,7 +542,7 @@ impl RocksDbKCVStorage { key_size: usize, key_prefix: Vec<u8>, suffix: Option<u8>, - mut iter: DBIteratorWithThreadMode<'_, impl rocksdb::DBAccess>, + mut iter: DBIteratorWithThreadMode<'_, impl ng_rocksdb::DBAccess>, ) -> Result<Vec<(Vec<u8>, Vec<u8>)>, StorageError> { if key_prefix.len() > key_size { return Err(StorageError::InvalidValue); @@ -633,7 +633,7 @@ impl RocksDbKCVStorage { &self, property_start: &[u8], family: &Option<String>, - ) -> Result<DBIteratorWithThreadMode<'_, impl rocksdb::DBAccess>, StorageError> { + ) -> Result<DBIteratorWithThreadMode<'_, impl ng_rocksdb::DBAccess>, StorageError> { Ok(match family { Some(cf) => self.db.iterator_cf( self.db diff --git a/ng-verifier/Cargo.toml b/ng-verifier/Cargo.toml index 6fd0246..c326797 100644 --- a/ng-verifier/Cargo.toml +++ b/ng-verifier/Cargo.toml @@ -32,7 +32,7 @@ yrs = "0.18.2" bloomfilter = { version = "1.0.13", features = ["random","serde"] } ng-repo = { path = "../ng-repo", version = "0.1.0" } ng-net = { path = "../ng-net", version = "0.1.0" } -ng-oxigraph = { path = "../ng-oxigraph", version = "0.4.0-alpha.8-ng" } +ng-oxigraph = { path = "../ng-oxigraph", version = "0.4.0-alpha.9-ng" } [target.'cfg(not(target_arch = "wasm32"))'.dependencies] ng-storage-rocksdb = { path = "../ng-storage-rocksdb", version = "0.1.0" } diff --git a/ng-verifier/src/request_processor.rs b/ng-verifier/src/request_processor.rs index cc9ae33..7c186d2 100644 --- a/ng-verifier/src/request_processor.rs +++ b/ng-verifier/src/request_processor.rs @@ -14,7 +14,7 @@ use std::sync::Arc; use futures::channel::mpsc; use futures::SinkExt; use futures::StreamExt; -use ng_oxigraph::sparql::{results::*, Query, QueryResults}; +use ng_oxigraph::oxigraph::sparql::{results::*, Query, QueryResults}; use ng_repo::errors::*; use ng_repo::file::{RandomAccessFile, ReadFile}; diff --git a/ng-verifier/src/verifier.rs b/ng-verifier/src/verifier.rs index 127a4a0..19b5677 100644 --- a/ng-verifier/src/verifier.rs +++ b/ng-verifier/src/verifier.rs @@ -28,9 +28,9 @@ use ng_repo::object::Object; use serde::{Deserialize, Serialize}; use web_time::SystemTime; -//use ng_oxigraph::io::{RdfFormat, RdfParser, RdfSerializer}; -//use ng_oxigraph::store::Store; -//use ng_oxigraph::model::GroundQuad; +//use ng_oxigraph::oxigraph::io::{RdfFormat, RdfParser, RdfSerializer}; +//use ng_oxigraph::oxigraph::store::Store; +//use ng_oxigraph::oxigraph::model::GroundQuad; //use yrs::{StateVector, Update}; use ng_repo::file::ReadFile; @@ -78,7 +78,7 @@ use crate::user_storage::UserStorage; pub struct Verifier { pub(crate) config: VerifierConfig, pub connected_broker: BrokerPeerId, - pub(crate) graph_dataset: Option<ng_oxigraph::store::Store>, + pub(crate) graph_dataset: Option<ng_oxigraph::oxigraph::store::Store>, pub(crate) user_storage: Option<Arc<Box<dyn UserStorage>>>, block_storage: Option<Arc<std::sync::RwLock<dyn BlockStorage + Send + Sync>>>, last_seq_num: u64, @@ -2019,7 +2019,7 @@ impl Verifier { ) -> Result<Self, NgError> { let (graph, user, block) = match &config.config_type { VerifierConfigType::Memory | VerifierConfigType::JsSaveSession(_) => ( - Some(ng_oxigraph::store::Store::new().unwrap()), + Some(ng_oxigraph::oxigraph::store::Store::new().unwrap()), Some(Box::new(InMemoryUserStorage::new()) as Box<dyn UserStorage>), Some(block_storage), ), @@ -2036,8 +2036,11 @@ impl Verifier { // this is very temporary, until we remove the code in oxi_rocksdb of oxigraph, // and have oxigraph use directly the UserStorage Some( - ng_oxigraph::store::Store::open_with_key(path_oxi, config.user_master_key) - .map_err(|e| NgError::OxiGraphError(e.to_string()))?, + ng_oxigraph::oxigraph::store::Store::open_with_key( + path_oxi, + config.user_master_key, + ) + .map_err(|e| NgError::OxiGraphError(e.to_string()))?, ), Some(Box::new(RocksDbUserStorage::open( &path_user, diff --git a/ngaccount/web/src/routes/Create.svelte b/ngaccount/web/src/routes/Create.svelte index e365e3c..cf7ffe6 100644 --- a/ngaccount/web/src/routes/Create.svelte +++ b/ngaccount/web/src/routes/Create.svelte @@ -210,7 +210,7 @@ /> </svg> <span - >Our servers are located in Germany, and we comply with the + >Our servers are located in France, and we comply with the GDPR regulation.</span > </li>