diff --git a/.cargo/config.toml b/.cargo/config.toml index be86f8ae..e248c473 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -2,94 +2,124 @@ rustflags = [ "-Wtrivial-casts", "-Wtrivial-numeric-casts", - "-Wunsafe_code", + "-Wunsafe-code", "-Wunused-lifetimes", "-Wunused-qualifications", + # TODO: 1.63+ "-Wclippy::as-underscore", + # TODO: 1.65+ ""-Wclippy::bool-to-int-with-if", + "-Wclippy::borrow-as-ptr", + "-Wclippy::case-sensitive-file-extension-comparisons", "-Wclippy::cast-lossless", "-Wclippy::cast-possible-truncation", "-Wclippy::cast-possible-wrap", "-Wclippy::cast-precision-loss", + "-Wclippy::cast-ptr-alignment", "-Wclippy::cast-sign-loss", "-Wclippy::checked-conversions", + "-Wclippy::clone-on-ref-ptr", "-Wclippy::cloned-instead-of-copied", "-Wclippy::copy-iterator", "-Wclippy::dbg-macro", - "-Wclippy::debug-assert-with-mut-call", "-Wclippy::decimal-literal-representation", - "-Wclippy::empty-line-after-outer-attr", + "-Wclippy::default-trait-access", + "-Wclippy::default-union-representation", + # TODO: 1.61+ "-Wclippy::deref-by-slicing", + # TODO: 1.63+ "-Wclippy::doc-link-with-quotes", + # TODO: 1.62+ "-Wclippy::empty-drop", "-Wclippy::empty-enum", + # TODO: on major version "-Wclippy::empty-structs-with-brackets", "-Wclippy::enum-glob-use", + "-Wclippy::exit", "-Wclippy::expect-used", "-Wclippy::expl-impl-clone-on-copy", "-Wclippy::explicit-deref-methods", "-Wclippy::explicit-into-iter-loop", "-Wclippy::explicit-iter-loop", - "-Wclippy::fallible-impl-from", "-Wclippy::filter-map-next", "-Wclippy::flat-map-option", + "-Wclippy::fn-to-numeric-cast-any", + # TODO: 1.62+ "-Wclippy::format-push-string", "-Wclippy::from-iter-instead-of-collect", "-Wclippy::get-unwrap", "-Wclippy::if-not-else", + "-Wclippy::if-then-some-else-none", "-Wclippy::implicit-clone", - "-Wclippy::implicit-saturating-sub", - "-Wclippy::imprecise-flops", "-Wclippy::inconsistent-struct-constructor", + "-Wclippy::index-refutable-slice", "-Wclippy::inefficient-to-string", "-Wclippy::inline-always", + "-Wclippy::inline-asm-x86-att-syntax", + "-Wclippy::inline-asm-x86-intel-syntax", "-Wclippy::invalid-upcast-comparisons", "-Wclippy::items-after-statements", "-Wclippy::large-digit-groups", + # TODO: 1.68+ "-Wclippy::large-futures", "-Wclippy::large-stack-arrays", "-Wclippy::large-types-passed-by-value", "-Wclippy::let-underscore-must-use", "-Wclippy::let-unit-value", "-Wclippy::linkedlist", + "-Wclippy::lossy-float-literal", "-Wclippy::macro-use-imports", + "-Wclippy::manual-assert", + # TODO: 1.65+ "-Wclippy::manual-instant-elapsed", + # TODO: 1.67+ "-Wclippy::manual-let-else", "-Wclippy::manual-ok-or", - "-Wclippy::map-flatten", + # TODO: 1.65+ "-Wclippy::manual-string-new", + "-Wclippy::many-single-char-names", "-Wclippy::map-unwrap-or", "-Wclippy::match-bool", "-Wclippy::match-same-arms", "-Wclippy::match-wildcard-for-single-variants", "-Wclippy::maybe-infinite-iter", "-Wclippy::mem-forget", + # TODO: 1.63+ "-Wclippy::mismatching-type-param-order", "-Wclippy::multiple-inherent-impl", "-Wclippy::mut-mut", - "-Wclippy::mutex-integer", + "-Wclippy::mutex-atomic", "-Wclippy::naive-bytecount", "-Wclippy::needless-bitwise-bool", "-Wclippy::needless-continue", "-Wclippy::needless-pass-by-value", + "-Wclippy::no-effect-underscore-binding", + # TODO: 1.69+ "-Wclippy::no-mangle-with-rust-abi", "-Wclippy::non-ascii-literal", - "-Wclippy::nonstandard-macro-braces", - "-Wclippy::path-buf-push-overwrite", "-Wclippy::print-stderr", "-Wclippy::print-stdout", + "-Wclippy::ptr-as-ptr", "-Wclippy::range-minus-one", "-Wclippy::range-plus-one", + "-Wclippy::rc-buffer", "-Wclippy::rc-mutex", - "-Wclippy::enum-variant-names", + "-Wclippy::redundant-closure-for-method-calls", "-Wclippy::redundant-else", - "-Wclippy::redundant-pub-crate", + "-Wclippy::redundant-feature-names", "-Wclippy::ref-binding-to-reference", "-Wclippy::ref-option-ref", "-Wclippy::rest-pat-in-fully-bound-structs", + "-Wclippy::return-self-not-must-use", "-Wclippy::same-functions-in-if-condition", + # TODO: strange failure on 1.60 "-Wclippy::same-name-method", + # TODO: 1.68+ "-Wclippy::semicolon-outside-block", + "-Wclippy::single-match-else", + "-Wclippy::stable-sort-primitive", "-Wclippy::str-to-string", "-Wclippy::string-add", "-Wclippy::string-add-assign", "-Wclippy::string-lit-as-bytes", "-Wclippy::string-to-string", - "-Wclippy::suboptimal-flops", - "-Wclippy::suspicious-operation-groupings", + # TODO: 1.67+ "-Wclippy::suspicious-xor-used-as-pow", "-Wclippy::todo", - "-Wclippy::trait-duplication-in-bounds", "-Wclippy::transmute-ptr-to-ptr", - "-Wclippy::trivial-regex", "-Wclippy::trivially-copy-pass-by-ref", - "-Wclippy::type-repetition-in-bounds", + "-Wclippy::try-err", "-Wclippy::unicode-not-nfc", "-Wclippy::unimplemented", + # TODO: 1.66+ "-Wclippy::uninlined-format-args", + # TODO: 1.70+ "-Wclippy::unnecessary-box-returns", + # TODO: 1.61+ "-Wclippy::unnecessary-join", + # TODO: 1.67+ "-Wclippy::unnecessary-safety-comment", + # TODO: 1.67+ "-Wclippy::unnecessary-safety-doc", "-Wclippy::unnecessary-self-imports", "-Wclippy::unnecessary-wraps", "-Wclippy::unneeded-field-pattern", @@ -99,13 +129,9 @@ rustflags = [ "-Wclippy::unused-async", "-Wclippy::unused-self", "-Wclippy::use-debug", - "-Wclippy::use-self", "-Wclippy::used-underscore-binding", - "-Wclippy::useless-let-if-seq", - "-Wclippy::useless-transmute", "-Wclippy::verbose-bit-mask", "-Wclippy::verbose-file-reads", "-Wclippy::wildcard-dependencies", "-Wclippy::zero-sized-map-values", - "-Wclippy::wrong-self-convention", ] \ No newline at end of file diff --git a/.clusterfuzzlite/build.sh b/.clusterfuzzlite/build.sh old mode 100644 new mode 100755 index 87af18df..ff3d6712 --- a/.clusterfuzzlite/build.sh +++ b/.clusterfuzzlite/build.sh @@ -5,7 +5,7 @@ function build_seed_corpus() { mkdir "/tmp/oxigraph_$1" for file in **/*."$2" do - hash=($(sha256sum "$file")) + hash=$(sha256sum "$file" | awk '{print $1;}') cp "$file" "/tmp/oxigraph_$1/$hash" done zip "$1_seed_corpus.zip" /tmp/"oxigraph_$1"/* @@ -15,9 +15,10 @@ function build_seed_corpus() { cd "$SRC"/oxigraph cargo fuzz build -O --debug-assertions -for TARGET in sparql_eval # sparql_results_json sparql_results_tsv +for TARGET in sparql_eval sparql_results_json sparql_results_tsv sparql_results_xml do cp fuzz/target/x86_64-unknown-linux-gnu/release/$TARGET "$OUT"/ done -# build_seed_corpus sparql_results_json json -# build_seed_corpus sparql_results_tsv tsv +build_seed_corpus sparql_results_json srj +build_seed_corpus sparql_results_tsv tsv +build_seed_corpus sparql_results_xml srx diff --git a/.github/dependabot.yml b/.github/DEPENDABOT.yml similarity index 65% rename from .github/dependabot.yml rename to .github/DEPENDABOT.yml index a02a15df..507639d0 100644 --- a/.github/dependabot.yml +++ b/.github/DEPENDABOT.yml @@ -9,3 +9,8 @@ updates: versioning-strategy: increase-if-necessary schedule: interval: weekly + - package-ecosystem: "npm" + directory: "/js/" + versioning-strategy: increase-if-necessary + schedule: + interval: weekly diff --git a/.github/workflows/artifacts.yml b/.github/workflows/artifacts.yml index 256a5084..430a1c6e 100644 --- a/.github/workflows/artifacts.yml +++ b/.github/workflows/artifacts.yml @@ -21,8 +21,8 @@ jobs: submodules: true - run: rustup update && rustup target add aarch64-unknown-linux-gnu - run: | - sudo apt install -y g++-aarch64-linux-gnu - echo -e "[target.aarch64-unknown-linux-gnu]\nlinker = \"aarch64-linux-gnu-gcc\"" >> .cargo/config.toml + sudo apt update && sudo apt install -y g++-aarch64-linux-gnu + echo -e "\n\n[target.aarch64-unknown-linux-gnu]\nlinker = \"aarch64-linux-gnu-gcc\"" >> .cargo/config.toml - uses: Swatinem/rust-cache@v2 - run: cargo build --release working-directory: ./server diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 097c191a..be981cc7 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -281,7 +281,7 @@ jobs: python-version: "3.10" cache: pip cache-dependency-path: '**/requirements.dev.txt' - - run: pip install "maturin~=0.14.0" + - run: pip install "maturin~=0.15.0" - run: maturin build -m python/Cargo.toml - run: pip install --no-index --find-links=target/wheels/ pyoxigraph - run: rm -r target/wheels @@ -347,29 +347,18 @@ jobs: minimize-crashes: true parallel-fuzzing: true storage-repo: https://${{ secrets.FULL_ACCESS_TOKEN }}@github.com/oxigraph/clusterfuzzlite-oxigraph.git - - fuzz_prune: - if: github.event_name != 'pull_request' - needs: fuzz_repo - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - with: - submodules: true - - uses: google/clusterfuzzlite/actions/build_fuzzers@v1 - with: - language: rust - github-token: ${{ secrets.GITHUB_TOKEN }} + continue-on-error: true - uses: google/clusterfuzzlite/actions/run_fuzzers@v1 with: github-token: ${{ secrets.GITHUB_TOKEN }} - fuzz-seconds: 14400 + fuzz-seconds: 3600 mode: prune storage-repo: https://${{ secrets.FULL_ACCESS_TOKEN }}@github.com/oxigraph/clusterfuzzlite-oxigraph.git + continue-on-error: true fuzz_coverage: if: github.event_name != 'pull_request' - needs: fuzz_prune + needs: fuzz_repo runs-on: ubuntu-latest steps: - uses: google/clusterfuzzlite/actions/build_fuzzers@v1 @@ -379,7 +368,14 @@ jobs: - uses: google/clusterfuzzlite/actions/run_fuzzers@v1 with: github-token: ${{ secrets.GITHUB_TOKEN }} - fuzz-seconds: 600 + fuzz-seconds: 3600 mode: coverage sanitizer: coverage storage-repo: https://${{ secrets.FULL_ACCESS_TOKEN }}@github.com/oxigraph/clusterfuzzlite-oxigraph.git + + shellcheck: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - run: sudo apt install -y shellcheck + - run: git grep -l '^#\( *shellcheck \|!\(/bin/\|/usr/bin/env \)\(sh\|bash\|dash\|ksh\)\)' | xargs shellcheck diff --git a/.mailmap b/.mailmap new file mode 100644 index 00000000..7c85fb7b --- /dev/null +++ b/.mailmap @@ -0,0 +1,3 @@ +Thomas Tanon +Thomas Tanon +Thomas Tanon diff --git a/CHANGELOG.md b/CHANGELOG.md index b26dc674..58ff2b6a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,12 @@ +## [0.3.16] - 2023-04-29 + +### Changed +- Fixes flush and compaction on the GSPO index. It might improve Oxigraph performances and storage space. +- SPARQL: fixes some optimizations in presence quoted triples with nested variables. +- SPARQL profiler: adds EXISTS operation to the explanation and profiling tree. +- Upgrades RocksDB to 8.1.1. + + ## [0.3.15] - 2023-04-18 ### Added diff --git a/Cargo.lock b/Cargo.lock index 52f50962..07f2bc46 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "aho-corasick" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67fc08ce920c31afb70f013dcce1bfc3a3195de6a228474e45e1f145b36f8d04" +dependencies = [ + "memchr", +] + [[package]] name = "anes" version = "0.1.6" @@ -141,9 +150,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.12.0" +version = "3.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535" +checksum = "9b1ce199063694f33ffb7dd4e0ee620741495c32833cde5aa08f02a0bf96f0c8" [[package]] name = "cast" @@ -215,9 +224,9 @@ dependencies = [ [[package]] name = "clap" -version = "3.2.23" +version = "3.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71655c45cb9845d3270c9d6df84ebe72b4dad3c2ba3f7023ad47c144e4e473a5" +checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123" dependencies = [ "bitflags", "clap_lex 0.2.4", @@ -299,9 +308,9 @@ checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" [[package]] name = "cpufeatures" -version = "0.2.6" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "280a9f2d8b3a38871a3c8a46fb80db65e5e5ed97da80c4d08bf27fb63e35e181" +checksum = "3e4c1eaa2012c47becbbad2ab175484c2a84d1185b566fb2cc5b8707343dfe58" dependencies = [ "libc", ] @@ -325,7 +334,7 @@ dependencies = [ "atty", "cast", "ciborium", - "clap 3.2.23", + "clap 3.2.25", "criterion-plot", "itertools", "lazy_static", @@ -381,7 +390,7 @@ dependencies = [ "autocfg", "cfg-if", "crossbeam-utils", - "memoffset", + "memoffset 0.8.0", "scopeguard", ] @@ -487,9 +496,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.25" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8a2db397cb1c8772f31494cb8917e48cd1e64f0fa7efac59fbd741a0a8ce841" +checksum = "3b9429470923de8e8cbd4d2dc513535400b4b3fef0319fb5c4e1f520a7bef743" dependencies = [ "crc32fast", "miniz_oxide", @@ -563,7 +572,7 @@ version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "029d74589adefde59de1a0c4f4732695c32805624aec7b68d91503d4dba79afc" dependencies = [ - "aho-corasick", + "aho-corasick 0.7.20", "bstr", "fnv", "log", @@ -774,9 +783,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.141" +version = "0.2.142" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3304a64d199bb964be99741b7a14d26972741915b3649639149b2479bb46f4b5" +checksum = "6a987beff54b60ffa6d51982e1aa1146bc42f19bd26be28b0586f252fccf5317" [[package]] name = "libloading" @@ -790,9 +799,9 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.3.3" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b085a4f2cde5781fc4b1717f2e86c62f5cda49de7ba99a7c2eae02b61c9064c" +checksum = "b64f40e5e03e0d54f03845c8197d0291253cdbedfb1cb46b13c2c117554a9f4c" [[package]] name = "lock_api" @@ -837,6 +846,15 @@ dependencies = [ "autocfg", ] +[[package]] +name = "memoffset" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" +dependencies = [ + "autocfg", +] + [[package]] name = "minimal-lexical" version = "0.2.1" @@ -845,9 +863,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.6.2" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa" +checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" dependencies = [ "adler", ] @@ -927,7 +945,7 @@ dependencies = [ [[package]] name = "oxigraph" -version = "0.3.16-dev" +version = "0.3.17-dev" dependencies = [ "criterion", "digest", @@ -959,7 +977,7 @@ dependencies = [ [[package]] name = "oxigraph_js" -version = "0.3.16-dev" +version = "0.3.17-dev" dependencies = [ "console_error_panic_hook", "js-sys", @@ -969,7 +987,7 @@ dependencies = [ [[package]] name = "oxigraph_server" -version = "0.3.16-dev" +version = "0.3.17-dev" dependencies = [ "anyhow", "assert_cmd", @@ -1017,7 +1035,7 @@ checksum = "bb175ec8981211357b7b379869c2f8d555881c55ea62311428ec0de46d89bd5c" [[package]] name = "oxrdf" -version = "0.1.5" +version = "0.1.6-dev" dependencies = [ "oxilangtag", "oxiri", @@ -1027,7 +1045,7 @@ dependencies = [ [[package]] name = "oxrocksdb-sys" -version = "0.3.16-dev" +version = "0.3.17-dev" dependencies = [ "bindgen", "cc", @@ -1036,10 +1054,9 @@ dependencies = [ [[package]] name = "oxsdatatypes" -version = "0.1.1" +version = "0.1.2-dev" dependencies = [ "js-sys", - "nom", ] [[package]] @@ -1219,14 +1236,14 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.18.3" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3b1ac5b3731ba34fdaa9785f8d74d17448cd18f30cf19e0c7e7b1fdb5272109" +checksum = "cffef52f74ec3b1a1baf295d9b8fcc3070327aefc39a6d00656b13c1d0b8885c" dependencies = [ "cfg-if", "indoc", "libc", - "memoffset", + "memoffset 0.9.0", "parking_lot", "pyo3-build-config", "pyo3-ffi", @@ -1236,9 +1253,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.18.3" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cb946f5ac61bb61a5014924910d936ebd2b23b705f7a4a3c40b05c720b079a3" +checksum = "713eccf888fb05f1a96eb78c0dbc51907fee42b3377272dc902eb38985f418d5" dependencies = [ "once_cell", "target-lexicon", @@ -1246,9 +1263,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.18.3" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd4d7c5337821916ea2a1d21d1092e8443cf34879e53a0ac653fbb98f44ff65c" +checksum = "5b2ecbdcfb01cbbf56e179ce969a048fd7305a66d4cdf3303e0da09d69afe4c3" dependencies = [ "libc", "pyo3-build-config", @@ -1256,9 +1273,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.18.3" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9d39c55dab3fc5a4b25bbd1ac10a2da452c4aca13bb450f22818a002e29648d" +checksum = "b78fdc0899f2ea781c463679b20cb08af9247febc8d052de941951024cd8aea0" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -1268,9 +1285,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.18.3" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97daff08a4c48320587b5224cc98d609e3c27b6d437315bd40b605c98eeb5918" +checksum = "60da7b84f1227c3e2fe7593505de274dcf4c8928b4e0a1c23d551a14e4e80a0f" dependencies = [ "proc-macro2", "quote", @@ -1279,7 +1296,7 @@ dependencies = [ [[package]] name = "pyoxigraph" -version = "0.3.16-dev" +version = "0.3.17-dev" dependencies = [ "oxigraph", "pyo3", @@ -1375,11 +1392,11 @@ dependencies = [ [[package]] name = "regex" -version = "1.7.3" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d" +checksum = "af83e617f331cc6ae2da5443c602dfa5af81e517212d9d611a5b3ba1777b5370" dependencies = [ - "aho-corasick", + "aho-corasick 1.0.1", "memchr", "regex-syntax", ] @@ -1392,9 +1409,9 @@ checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" [[package]] name = "regex-syntax" -version = "0.6.29" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" +checksum = "a5996294f19bd3aae0453a862ad728f60e6600695733dd5df01da90c54363a3c" [[package]] name = "ring" @@ -1448,9 +1465,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustix" -version = "0.37.13" +version = "0.37.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f79bef90eb6d984c72722595b5b1348ab39275a5e5123faca6863bf07d75a4e0" +checksum = "a0661814f891c57c930a610266415528da53c4933e6dea5fb350cbfe048a9ece" dependencies = [ "bitflags", "errno", @@ -1629,7 +1646,7 @@ checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" [[package]] name = "sparesults" -version = "0.1.7" +version = "0.1.8-dev" dependencies = [ "json-event-parser", "oxrdf", @@ -1638,7 +1655,7 @@ dependencies = [ [[package]] name = "spargebra" -version = "0.2.7" +version = "0.2.8-dev" dependencies = [ "oxilangtag", "oxiri", @@ -1649,7 +1666,7 @@ dependencies = [ [[package]] name = "sparql-smith" -version = "0.1.0-alpha.3" +version = "0.1.0-alpha.4-dev" dependencies = [ "arbitrary", ] @@ -1690,9 +1707,9 @@ dependencies = [ [[package]] name = "target-lexicon" -version = "0.12.6" +version = "0.12.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ae9980cab1db3fceee2f6c6f643d5d8de2997c58ee8d25fb0cc8a9e9e7348e5" +checksum = "fd1ba337640d60c3e96bc6f0638a939b9c9a7f2c316a1598c279828b3d1dc8c5" [[package]] name = "tempfile" diff --git a/Cargo.toml b/Cargo.toml index 648ca11d..75a171d3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ members = [ "server", "testsuite" ] +resolver = "2" [profile.release] lto = true diff --git a/bench/bsbm_blazegraph.sh b/bench/bsbm_blazegraph.sh index 559334ff..41f2dd0c 100755 --- a/bench/bsbm_blazegraph.sh +++ b/bench/bsbm_blazegraph.sh @@ -2,6 +2,8 @@ DATASET_SIZE=100000 PARALLELISM=16 + +set -eu wget -nc https://github.com/blazegraph/database/releases/download/BLAZEGRAPH_RELEASE_2_1_5/blazegraph.jar cd bsbm-tools || exit ./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}" @@ -12,7 +14,7 @@ curl -f -X POST -H 'Content-Type:text/turtle' -T "explore-${DATASET_SIZE}.nt" ht ./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.blazegraph.2.1.5.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:9999/blazegraph/sparql -u http://localhost:9999/blazegraph/sparql -udataset "explore-update-${DATASET_SIZE}.nt" #./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.blazegraph.2.1.5.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:9999/blazegraph/sparql kill $! -rm blazegraph.jnl -rm "explore-${DATASET_SIZE}.nt" -rm "explore-update-${DATASET_SIZE}.nt" -rm -r td_data +rm -f blazegraph.jnl +rm -f "explore-${DATASET_SIZE}.nt" +rm -f "explore-update-${DATASET_SIZE}.nt" +rm -rf td_data diff --git a/bench/bsbm_graphdb.sh b/bench/bsbm_graphdb.sh index 21c83d7c..1b865fc8 100755 --- a/bench/bsbm_graphdb.sh +++ b/bench/bsbm_graphdb.sh @@ -3,7 +3,8 @@ DATASET_SIZE=100000 PARALLELISM=16 VERSION="9.3.3" -JAVA_HOME=/usr/lib/jvm/java-11-openjdk + +set -eu cd bsbm-tools ./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}" ../graphdb-free-9.3.3/bin/graphdb -s -Dgraphdb.logger.root.level=WARN & @@ -17,7 +18,7 @@ curl -f -X PUT -H 'Content-Type:application/n-triples' -T "explore-${DATASET_SIZ #./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.graphdb.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:7200/repositories/bsbm kill $! sleep 5 -rm -r ../graphdb-free-9.3.3/data -rm "explore-${DATASET_SIZE}.nt" -rm "explore-update-${DATASET_SIZE}.nt" -rm -r td_data +rm -rf ../graphdb-free-9.3.3/data +rm -f "explore-${DATASET_SIZE}.nt" +rm -f "explore-update-${DATASET_SIZE}.nt" +rm -rf td_data diff --git a/bench/bsbm_jena.sh b/bench/bsbm_jena.sh index 107fd47a..caa56d92 100755 --- a/bench/bsbm_jena.sh +++ b/bench/bsbm_jena.sh @@ -3,6 +3,8 @@ DATASET_SIZE=100000 # number of products in the dataset. There is around 350 triples generated by product. PARALLELISM=16 VERSION="4.3.2" + +set -eu wget -nc https://downloads.apache.org/jena/binaries/apache-jena-${VERSION}.zip cd bsbm-tools || exit ./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}" @@ -18,9 +20,9 @@ sleep 10 ./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.jena.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:3030/bsbm/query -u http://localhost:3030/bsbm/update -udataset "explore-update-${DATASET_SIZE}.nt" #./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.jena.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:3030/bsbm/query kill $! -rm "explore-${DATASET_SIZE}.nt" -rm "explore-update-${DATASET_SIZE}.nt" -rm -r td_data -rm -r run -rm -r apache-jena-${VERSION} -rm -r apache-jena-fuseki-${VERSION} +rm -f "explore-${DATASET_SIZE}.nt" +rm -f "explore-update-${DATASET_SIZE}.nt" +rm -rf td_data +rm -rf run +rm -rf apache-jena-${VERSION} +rm -rf apache-jena-fuseki-${VERSION} diff --git a/bench/bsbm_oxigraph.sh b/bench/bsbm_oxigraph.sh index 844b662a..7ee691cb 100755 --- a/bench/bsbm_oxigraph.sh +++ b/bench/bsbm_oxigraph.sh @@ -2,6 +2,8 @@ DATASET_SIZE=100000 # number of products in the dataset. There is around 350 triples generated by product. PARALLELISM=16 + +set -eu cd bsbm-tools ./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}" cargo build --release --manifest-path="../../server/Cargo.toml" @@ -13,7 +15,7 @@ sleep 1 ./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.oxigraph.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://127.0.0.1:7878/query -u http://127.0.0.1:7878/update -udataset "explore-update-${DATASET_SIZE}.nt" #./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" "http://127.0.0.1:7878/query" kill $! -rm -r oxigraph_data -rm "explore-${DATASET_SIZE}.nt" -rm "explore-update-${DATASET_SIZE}.nt" -rm -r td_data +rm -rf oxigraph_data +rm -f "explore-${DATASET_SIZE}.nt" +rm -f "explore-update-${DATASET_SIZE}.nt" +rm -rf td_data diff --git a/bench/bsbm_rdf4j.sh b/bench/bsbm_rdf4j.sh index a88e785e..6592b65f 100755 --- a/bench/bsbm_rdf4j.sh +++ b/bench/bsbm_rdf4j.sh @@ -4,6 +4,8 @@ DATASET_SIZE=100000 PARALLELISM=16 VERSION="4.2.2" TOMCAT_VERSION="9.0.71" + +set -eu wget -nc -O "rdf4j-${VERSION}.zip" "https://www.eclipse.org/downloads/download.php?file=/rdf4j/eclipse-rdf4j-${VERSION}-sdk.zip&mirror_id=1" wget -nc -O "tomcat-${TOMCAT_VERSION}.zip" "https://dlcdn.apache.org/tomcat/tomcat-9/v${TOMCAT_VERSION}/bin/apache-tomcat-${TOMCAT_VERSION}.zip" cd bsbm-tools || exit @@ -40,8 +42,8 @@ curl -f -X PUT -H 'Content-Type:application/n-triples' -T "explore-${DATASET_SIZ ./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.rdf4j-lmdb.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:8080/rdf4j-server/repositories/bsbm -u http://localhost:8080/rdf4j-server/repositories/bsbm/statements -udataset "explore-update-${DATASET_SIZE}.nt" #./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.rdf4j-lmdb.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:8080/rdf4j-server/repositories/bsbm "${CATALINA_HOME}"/bin/shutdown.sh -rm "explore-${DATASET_SIZE}.nt" -rm "explore-update-${DATASET_SIZE}.nt" -rm -r td_data -rm -r "eclipse-rdf4j-${VERSION}" -rm -r "apache-tomcat-${TOMCAT_VERSION}" +rm -f "explore-${DATASET_SIZE}.nt" +rm -f "explore-update-${DATASET_SIZE}.nt" +rm -rf td_data +rm -rf "eclipse-rdf4j-${VERSION}" +rm -rf "apache-tomcat-${TOMCAT_VERSION}" diff --git a/bench/bsbm_virtuoso.sh b/bench/bsbm_virtuoso.sh index 60ef533e..2ff9b405 100755 --- a/bench/bsbm_virtuoso.sh +++ b/bench/bsbm_virtuoso.sh @@ -3,6 +3,8 @@ DATASET_SIZE=100000 # number of products in the dataset. There is around 350 triples generated by product. PARALLELISM=16 VERSION="7.2.5" + +set -eu cd bsbm-tools ./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}" cp ../virtuoso-opensource/database/virtuoso.ini.sample virtuoso.ini @@ -18,7 +20,7 @@ EOF # ./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.virtuoso.${DATASET_SIZE}.${PARALLELISM}.${PARALLELISM}.${VERSION}.xml" 'http://localhost:8890/sparql?graph-uri=urn:graph:test' -u 'http://dba:dba@localhost:8890/sparql-auth?graph-uri=urn:graph:test' -udataset "explore-update-${DATASET_SIZE}.nt" # ./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.virtuoso.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" 'http://localhost:8890/sparql?graph-uri=urn:graph:test' kill $! -rm -r ../database -rm "explore-${DATASET_SIZE}.nt" -rm "explore-update-${DATASET_SIZE}.nt" -rm -r td_data +rm -rf ../database +rm -f "explore-${DATASET_SIZE}.nt" +rm -f "explore-update-${DATASET_SIZE}.nt" +rm -rf td_data diff --git a/js/Cargo.toml b/js/Cargo.toml index fa008e7e..897e37b3 100644 --- a/js/Cargo.toml +++ b/js/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "oxigraph_js" -version = "0.3.16-dev" +version = "0.3.17-dev" authors = ["Tpt "] license = "MIT OR Apache-2.0" readme = "README.md" @@ -14,7 +14,7 @@ crate-type = ["cdylib"] name = "oxigraph" [dependencies] -oxigraph = { version = "0.3.16-dev", path="../lib" } +oxigraph = { version = "0.3.17-dev", path="../lib" } wasm-bindgen = "0.2" js-sys = "0.3" console_error_panic_hook = "0.1" diff --git a/js/package.json b/js/package.json index e549c162..e8744994 100644 --- a/js/package.json +++ b/js/package.json @@ -5,10 +5,10 @@ "devDependencies": { "@rdfjs/data-model": "^2.0.1", "mocha": "^10.0.0", - "rome": "^11.0.0" + "rome": "^12.0.0" }, "scripts": { - "fmt": "rome format . --write && rome check . --apply-suggested", + "fmt": "rome format . --write && rome check . --apply-unsafe", "test": "rome ci . && wasm-pack build --debug --target nodejs && mocha", "build": "rm -rf pkg && wasm-pack build --release --target web --out-name web && mv pkg pkg-web && wasm-pack build --release --target nodejs --out-name node && mv pkg pkg-node && node build_package.js && rm -r pkg-web && rm -r pkg-node", "release": "npm run build && npm publish ./pkg", diff --git a/js/rome.json b/js/rome.json index d92e4ab6..272422a5 100644 --- a/js/rome.json +++ b/js/rome.json @@ -1,10 +1,13 @@ { - "formatter": { - "indentStyle": "space", - "indentSize": 4, - "lineWidth": 100 - }, - "linter": { - "ignore": ["pkg"] - } -} \ No newline at end of file + "formatter": { + "indentStyle": "space", + "indentSize": 4, + "lineWidth": 100 + }, + "linter": { + "ignore": ["pkg"] + }, + "organizeImports": { + "enabled": true + } +} diff --git a/js/src/model.rs b/js/src/model.rs index 179dafce..4929068f 100644 --- a/js/src/model.rs +++ b/js/src/model.rs @@ -19,7 +19,7 @@ thread_local! { #[wasm_bindgen(js_name = namedNode)] pub fn named_node(value: String) -> Result { NamedNode::new(value) - .map(|v| v.into()) + .map(Into::into) .map_err(|v| UriError::new(&v.to_string()).into()) } diff --git a/js/src/store.rs b/js/src/store.rs index 13b3f6b3..adee6eef 100644 --- a/js/src/store.rs +++ b/js/src/store.rs @@ -76,28 +76,28 @@ impl JsStore { None } .as_ref() - .map(|t: &NamedOrBlankNode| t.into()), + .map(<&Subject>::into), if let Some(predicate) = FROM_JS.with(|c| c.to_optional_term(predicate))? { Some(NamedNode::try_from(predicate)?) } else { None } .as_ref() - .map(|t: &NamedNode| t.into()), + .map(<&NamedNode>::into), if let Some(object) = FROM_JS.with(|c| c.to_optional_term(object))? { Some(object.try_into()?) } else { None } .as_ref() - .map(|t: &Term| t.into()), + .map(<&Term>::into), if let Some(graph_name) = FROM_JS.with(|c| c.to_optional_term(graph_name))? { Some(graph_name.try_into()?) } else { None } .as_ref() - .map(|t: &GraphName| t.into()), + .map(<&GraphName>::into), ) .map(|v| v.map(|v| JsQuad::from(v).into())) .collect::, _>>() diff --git a/js/test/model.mjs b/js/test/model.mjs index e001843b..37f83199 100644 --- a/js/test/model.mjs +++ b/js/test/model.mjs @@ -1,8 +1,8 @@ /* global describe, it */ +import runTests from "../node_modules/@rdfjs/data-model/test/index.js"; import oxigraph from "../pkg/oxigraph.js"; import assert from "assert"; -import runTests from "../node_modules/@rdfjs/data-model/test/index.js"; runTests({ factory: oxigraph }); diff --git a/js/test/store.mjs b/js/test/store.mjs index 55a53a66..2317c022 100644 --- a/js/test/store.mjs +++ b/js/test/store.mjs @@ -1,8 +1,8 @@ /* global describe, it */ import { Store } from "../pkg/oxigraph.js"; -import assert from "assert"; import dataModel from "@rdfjs/data-model"; +import assert from "assert"; const ex = dataModel.namedNode("http://example.com"); const triple = dataModel.quad( diff --git a/lib/Cargo.toml b/lib/Cargo.toml index c425dd67..8c7ecfc9 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "oxigraph" -version = "0.3.16-dev" +version = "0.3.17-dev" authors = ["Tpt "] license = "MIT OR Apache-2.0" readme = "README.md" @@ -38,14 +38,14 @@ hex = "0.4" siphasher = "0.3" lazy_static = "1" json-event-parser = "0.1" -oxrdf = { version = "0.1.5", path="oxrdf", features = ["rdf-star", "oxsdatatypes"] } -oxsdatatypes = { version = "0.1.1", path="oxsdatatypes" } -spargebra = { version = "0.2.7", path="spargebra", features = ["rdf-star", "sep-0002", "sep-0006"] } -sparesults = { version = "0.1.7", path="sparesults", features = ["rdf-star"] } +oxrdf = { version = "0.1.6-dev", path="oxrdf", features = ["rdf-star", "oxsdatatypes"] } +oxsdatatypes = { version = "0.1.2-dev", path="oxsdatatypes" } +spargebra = { version = "0.2.8-dev", path="spargebra", features = ["rdf-star", "sep-0002", "sep-0006"] } +sparesults = { version = "0.1.8-dev", path="sparesults", features = ["rdf-star"] } [target.'cfg(not(target_family = "wasm"))'.dependencies] libc = "0.2" -oxrocksdb-sys = { version = "0.3.16-dev", path="../oxrocksdb-sys" } +oxrocksdb-sys = { version = "0.3.17-dev", path="../oxrocksdb-sys" } oxhttp = { version = "0.1", optional = true } [target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies] diff --git a/lib/oxrdf/Cargo.toml b/lib/oxrdf/Cargo.toml index 9bb88fe3..df554630 100644 --- a/lib/oxrdf/Cargo.toml +++ b/lib/oxrdf/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "oxrdf" -version = "0.1.5" +version = "0.1.6-dev" authors = ["Tpt "] license = "MIT OR Apache-2.0" readme = "README.md" @@ -21,7 +21,7 @@ rdf-star = [] rand = "0.8" oxilangtag = "0.1" oxiri = "0.2" -oxsdatatypes = { version = "0.1.1", path="../oxsdatatypes", optional = true } +oxsdatatypes = { version = "0.1.2-dev", path="../oxsdatatypes", optional = true } [package.metadata.docs.rs] all-features = true diff --git a/lib/oxrdf/src/dataset.rs b/lib/oxrdf/src/dataset.rs index 12d07880..9925f7ca 100644 --- a/lib/oxrdf/src/dataset.rs +++ b/lib/oxrdf/src/dataset.rs @@ -705,9 +705,7 @@ impl Dataset { InternedTerm, InternedGraphName, )> { - let b_prime = partition - .iter() - .find_map(|(_, b)| if b.len() > 1 { Some(b) } else { None }); + let b_prime = partition.iter().find_map(|(_, b)| (b.len() > 1).then(|| b)); if let Some(b_prime) = b_prime { b_prime .iter() diff --git a/lib/oxrdf/src/interning.rs b/lib/oxrdf/src/interning.rs index 41725dca..54c8acde 100644 --- a/lib/oxrdf/src/interning.rs +++ b/lib/oxrdf/src/interning.rs @@ -463,11 +463,10 @@ impl InternedTriple { predicate: InternedNamedNode::encoded_from(triple.predicate, interner)?, object: InternedTerm::encoded_from(triple.object, interner)?, }; - if interner.triples.contains_key(&interned_triple) { - Some(interned_triple) - } else { - None - } + interner + .triples + .contains_key(&interned_triple) + .then(|| interned_triple) } pub fn next(&self) -> Self { diff --git a/lib/oxrdf/src/lib.rs b/lib/oxrdf/src/lib.rs index 9d40ead4..aa6f712b 100644 --- a/lib/oxrdf/src/lib.rs +++ b/lib/oxrdf/src/lib.rs @@ -1,5 +1,4 @@ #![doc = include_str!("../README.md")] -#![deny(unsafe_code)] #![doc(test(attr(deny(warnings))))] #![cfg_attr(docsrs, feature(doc_auto_cfg))] #![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] diff --git a/lib/oxsdatatypes/Cargo.toml b/lib/oxsdatatypes/Cargo.toml index 54841f88..e8488b1d 100644 --- a/lib/oxsdatatypes/Cargo.toml +++ b/lib/oxsdatatypes/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "oxsdatatypes" -version = "0.1.1" +version = "0.1.2-dev" authors = ["Tpt "] license = "MIT OR Apache-2.0" readme = "README.md" @@ -13,9 +13,6 @@ An implementation of some XSD datatypes for SPARQL implementations edition = "2021" rust-version = "1.60" -[dependencies] -nom = "7" - [target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies] js-sys = "0.3" diff --git a/lib/oxsdatatypes/src/boolean.rs b/lib/oxsdatatypes/src/boolean.rs index 9544cac9..fd213a90 100644 --- a/lib/oxsdatatypes/src/boolean.rs +++ b/lib/oxsdatatypes/src/boolean.rs @@ -43,14 +43,14 @@ impl From for Boolean { impl From for Boolean { #[inline] fn from(value: Float) -> Self { - (value != Float::from(0.) && !value.is_naan()).into() + (value != Float::from(0.) && !value.is_nan()).into() } } impl From for Boolean { #[inline] fn from(value: Double) -> Self { - (value != Double::from(0.) && !value.is_naan()).into() + (value != Double::from(0.) && !value.is_nan()).into() } } diff --git a/lib/oxsdatatypes/src/date_time.rs b/lib/oxsdatatypes/src/date_time.rs index c7fe48e8..734ebab5 100644 --- a/lib/oxsdatatypes/src/date_time.rs +++ b/lib/oxsdatatypes/src/date_time.rs @@ -1,8 +1,7 @@ -use super::parser::{date_lexical_rep, date_time_lexical_rep, parse_value, time_lexical_rep}; use super::{DayTimeDuration, Decimal, Duration, XsdParseError, YearMonthDuration}; use crate::parser::{ - g_day_lexical_rep, g_month_day_lexical_rep, g_month_lexical_rep, g_year_lexical_rep, - g_year_month_lexical_rep, + parse_date, parse_date_time, parse_g_day, parse_g_month, parse_g_month_day, parse_g_year, + parse_g_year_month, parse_time, }; use std::cmp::{min, Ordering}; use std::error::Error; @@ -44,6 +43,7 @@ impl DateTime { }) } + /// [fn:current-dateTime](https://www.w3.org/TR/xpath-functions/#func-current-dateTime) #[inline] pub fn now() -> Result { Ok(Self { @@ -187,8 +187,11 @@ impl DateTime { self.checked_sub_day_time_duration(rhs) } else { Some(Self { - timestamp: Timestamp::new(&date_time_plus_duration(-rhs, &self.properties())?) - .ok()?, + timestamp: Timestamp::new(&date_time_plus_duration( + rhs.checked_neg()?, + &self.properties(), + )?) + .ok()?, }) } } @@ -230,7 +233,7 @@ impl FromStr for DateTime { type Err = XsdParseError; fn from_str(input: &str) -> Result { - parse_value(date_time_lexical_rep, input) + parse_date_time(input) } } @@ -303,6 +306,12 @@ impl Time { } } + /// [fn:current-time](https://www.w3.org/TR/xpath-functions/#func-current-time) + #[inline] + pub fn now() -> Result { + DateTime::now()?.try_into() + } + /// [fn:hour-from-time](https://www.w3.org/TR/xpath-functions/#func-hour-from-time) #[inline] pub fn hour(&self) -> u8 { @@ -435,7 +444,7 @@ impl FromStr for Time { type Err = XsdParseError; fn from_str(input: &str) -> Result { - parse_value(time_lexical_rep, input) + parse_time(input) } } @@ -498,6 +507,12 @@ impl Date { } } + /// [fn:current-date](https://www.w3.org/TR/xpath-functions/#func-current-date) + #[inline] + pub fn now() -> Result { + DateTime::now()?.try_into() + } + /// [fn:year-from-date](https://www.w3.org/TR/xpath-functions/#func-year-from-date) #[inline] pub fn year(&self) -> i64 { @@ -632,7 +647,7 @@ impl FromStr for Date { type Err = XsdParseError; fn from_str(input: &str) -> Result { - parse_value(date_lexical_rep, input) + parse_date(input) } } @@ -754,7 +769,7 @@ impl FromStr for GYearMonth { type Err = XsdParseError; fn from_str(input: &str) -> Result { - parse_value(g_year_month_lexical_rep, input) + parse_g_year_month(input) } } @@ -875,7 +890,7 @@ impl FromStr for GYear { type Err = XsdParseError; fn from_str(input: &str) -> Result { - parse_value(g_year_lexical_rep, input) + parse_g_year(input) } } @@ -997,7 +1012,7 @@ impl FromStr for GMonthDay { type Err = XsdParseError; fn from_str(input: &str) -> Result { - parse_value(g_month_day_lexical_rep, input) + parse_g_month_day(input) } } @@ -1123,7 +1138,7 @@ impl FromStr for GMonth { type Err = XsdParseError; fn from_str(input: &str) -> Result { - parse_value(g_month_lexical_rep, input) + parse_g_month(input) } } @@ -1240,7 +1255,7 @@ impl FromStr for GDay { type Err = XsdParseError; fn from_str(input: &str) -> Result { - parse_value(g_day_lexical_rep, input) + parse_g_day(input) } } diff --git a/lib/oxsdatatypes/src/decimal.rs b/lib/oxsdatatypes/src/decimal.rs index 3b49e229..11065901 100644 --- a/lib/oxsdatatypes/src/decimal.rs +++ b/lib/oxsdatatypes/src/decimal.rs @@ -5,10 +5,9 @@ use std::fmt::Write; use std::ops::Neg; use std::str::FromStr; -const DECIMAL_PART_DIGITS: usize = 18; +const DECIMAL_PART_DIGITS: u32 = 18; const DECIMAL_PART_POW: i128 = 1_000_000_000_000_000_000; const DECIMAL_PART_POW_MINUS_ONE: i128 = 100_000_000_000_000_000; -const DECIMAL_PART_HALF_POW: i128 = 1_000_000_000; /// [XML Schema `decimal` datatype](https://www.w3.org/TR/xmlschema11-2/#decimal) /// @@ -22,10 +21,9 @@ pub struct Decimal { impl Decimal { /// Constructs the decimal i / 10^n - #[allow(clippy::cast_possible_truncation)] #[inline] pub fn new(i: i128, n: u32) -> Result { - let shift = (DECIMAL_PART_DIGITS as u32) + let shift = DECIMAL_PART_DIGITS .checked_sub(n) .ok_or(DecimalOverflowError)?; Ok(Self { @@ -66,29 +64,69 @@ impl Decimal { /// [op:numeric-multiply](https://www.w3.org/TR/xpath-functions/#func-numeric-multiply) #[inline] pub fn checked_mul(&self, rhs: impl Into) -> Option { - //TODO: better algorithm to keep precision + // Idea: we shift right as much as possible to keep as much precision as possible + // Do the multiplication and do the required left shift + let mut left = self.value; + let mut shift_left = 0_u32; + if left != 0 { + while left % 10 == 0 { + left /= 10; + shift_left += 1; + } + } + + let mut right = rhs.into().value; + let mut shift_right = 0_u32; + if right != 0 { + while right % 10 == 0 { + right /= 10; + shift_right += 1; + } + } + + // We do multiplication + shift + let shift = (shift_left + shift_right).checked_sub(DECIMAL_PART_DIGITS)?; Some(Self { - value: self - .value - .checked_div(DECIMAL_PART_HALF_POW)? - .checked_mul(rhs.into().value.checked_div(DECIMAL_PART_HALF_POW)?)?, + value: left + .checked_mul(right)? + .checked_mul(10_i128.checked_pow(shift)?)?, }) } /// [op:numeric-divide](https://www.w3.org/TR/xpath-functions/#func-numeric-divide) #[inline] pub fn checked_div(&self, rhs: impl Into) -> Option { - //TODO: better algorithm to keep precision + // Idea: we shift the dividend left as much as possible to keep as much precision as possible + // And we shift right the divisor as much as possible + // Do the multiplication and do the required shift + let mut left = self.value; + let mut shift_left = 0_u32; + if left != 0 { + while let Some(r) = left.checked_mul(10) { + assert_eq!(r / 10, left); + left = r; + shift_left += 1; + } + } + let mut right = rhs.into().value; + let mut shift_right = 0_u32; + if right != 0 { + while right % 10 == 0 { + right /= 10; + shift_right += 1; + } + } + + // We do division + shift + let shift = (shift_left + shift_right).checked_sub(DECIMAL_PART_DIGITS)?; Some(Self { - value: self - .value - .checked_mul(DECIMAL_PART_HALF_POW)? - .checked_div(rhs.into().value)? - .checked_mul(DECIMAL_PART_HALF_POW)?, + value: left + .checked_div(right)? + .checked_div(10_i128.checked_pow(shift)?)?, }) } - /// TODO: XSD? is well defined for not integer + /// [op:numeric-mod](https://www.w3.org/TR/xpath-functions/#func-numeric-mod) #[inline] pub fn checked_rem(&self, rhs: impl Into) -> Option { Some(Self { @@ -103,6 +141,14 @@ impl Decimal { }) } + /// [op:numeric-unary-minus](https://www.w3.org/TR/xpath-functions/#func-numeric-unary-minus) + #[inline] + pub fn checked_neg(&self) -> Option { + Some(Self { + value: self.value.checked_neg()?, + }) + } + /// [fn:abs](https://www.w3.org/TR/xpath-functions/#func-abs) #[inline] pub const fn abs(&self) -> Self { @@ -174,9 +220,7 @@ impl Decimal { pub const MAX: Self = Self { value: i128::MAX }; #[cfg(test)] - pub(super) const fn step() -> Self { - Self { value: 1 } - } + pub const STEP: Self = Self { value: 1 }; } impl From for Decimal { @@ -316,13 +360,10 @@ impl TryFrom for Decimal { #[inline] #[allow(clippy::cast_precision_loss, clippy::cast_possible_truncation)] fn try_from(value: Double) -> Result { - let shifted = value * Double::from(DECIMAL_PART_POW as f64); - if shifted.is_finite() - && Double::from(i128::MIN as f64) <= shifted - && shifted <= Double::from(i128::MAX as f64) - { + let shifted = f64::from(value) * (DECIMAL_PART_POW as f64); + if shifted.is_finite() && (i128::MIN as f64) <= shifted && shifted <= (i128::MAX as f64) { Ok(Self { - value: f64::from(shifted) as i128, + value: shifted as i128, }) } else { Err(DecimalOverflowError) @@ -334,7 +375,7 @@ impl From for Float { #[inline] #[allow(clippy::cast_precision_loss)] fn from(value: Decimal) -> Self { - ((value.value as f32) / (DECIMAL_PART_POW as f32)).into() + Double::from(value).into() } } @@ -342,7 +383,18 @@ impl From for Double { #[inline] #[allow(clippy::cast_precision_loss)] fn from(value: Decimal) -> Self { - ((value.value as f64) / (DECIMAL_PART_POW as f64)).into() + let mut value = value.value; + let mut shift = DECIMAL_PART_POW; + + // Hack to improve precision + if value != 0 { + while shift != 1 && value % 10 == 0 { + value /= 10; + shift /= 10; + } + } + + ((value as f64) / (shift as f64)).into() } } @@ -374,19 +426,19 @@ impl FromStr for Decimal { } let (sign, mut input) = match input.first() { - Some(b'+') => (1, &input[1..]), - Some(b'-') => (-1, &input[1..]), + Some(b'+') => (1_i128, &input[1..]), + Some(b'-') => (-1_i128, &input[1..]), _ => (1, input), }; let mut value = 0_i128; - let with_before_dot = input.first().map_or(false, |c| c.is_ascii_digit()); + let with_before_dot = input.first().map_or(false, u8::is_ascii_digit); while let Some(c) = input.first() { if c.is_ascii_digit() { value = value .checked_mul(10) .ok_or(PARSE_OVERFLOW)? - .checked_add((*c - b'0').into()) + .checked_add(sign * i128::from(*c - b'0')) .ok_or(PARSE_OVERFLOW)?; input = &input[1..]; } else { @@ -414,7 +466,7 @@ impl FromStr for Decimal { value = value .checked_mul(10) .ok_or(PARSE_OVERFLOW)? - .checked_add((*c - b'0').into()) + .checked_add(sign * i128::from(*c - b'0')) .ok_or(PARSE_OVERFLOW)?; input = &input[1..]; } else { @@ -431,11 +483,7 @@ impl FromStr for Decimal { } Ok(Self { - value: value - .checked_mul(sign) - .ok_or(PARSE_OVERFLOW)? - .checked_mul(exp) - .ok_or(PARSE_OVERFLOW)?, + value: value.checked_mul(exp).ok_or(PARSE_OVERFLOW)?, }) } } @@ -476,37 +524,38 @@ impl fmt::Display for Decimal { .find_map(|(i, v)| if v == b'0' { None } else { Some(i) }) .unwrap_or(40); - if last_non_zero >= DECIMAL_PART_DIGITS { + let decimal_part_digits = usize::try_from(DECIMAL_PART_DIGITS).unwrap(); + if last_non_zero >= decimal_part_digits { let end = if let Some(mut width) = f.width() { if self.value.is_negative() { width -= 1; } - if last_non_zero - DECIMAL_PART_DIGITS + 1 < width { - DECIMAL_PART_DIGITS + width + if last_non_zero - decimal_part_digits + 1 < width { + decimal_part_digits + width } else { last_non_zero + 1 } } else { last_non_zero + 1 }; - for c in digits[DECIMAL_PART_DIGITS..end].iter().rev() { + for c in digits[decimal_part_digits..end].iter().rev() { f.write_char(char::from(*c))?; } } else { f.write_char('0')? } - if DECIMAL_PART_DIGITS > first_non_zero { + if decimal_part_digits > first_non_zero { f.write_char('.')?; let start = if let Some(precision) = f.precision() { - if DECIMAL_PART_DIGITS - first_non_zero > precision { - DECIMAL_PART_DIGITS - precision + if decimal_part_digits - first_non_zero > precision { + decimal_part_digits - precision } else { first_non_zero } } else { first_non_zero }; - for c in digits[start..DECIMAL_PART_DIGITS].iter().rev() { + for c in digits[start..decimal_part_digits].iter().rev() { f.write_char(char::from(*c))?; } } @@ -626,15 +675,7 @@ mod tests { assert_eq!(Decimal::from_str("0")?.to_string(), "0"); assert_eq!(Decimal::from_str("-0")?.to_string(), "0"); assert_eq!(Decimal::from_str(&Decimal::MAX.to_string())?, Decimal::MAX); - assert_eq!( - Decimal::from_str( - &Decimal::MIN - .checked_add(Decimal::step()) - .unwrap() - .to_string() - )?, - Decimal::MIN.checked_add(Decimal::step()).unwrap() - ); + assert_eq!(Decimal::from_str(&Decimal::MIN.to_string())?, Decimal::MIN); assert!(Decimal::from_str("0.0000000000000000001").is_err()); assert!(Decimal::from_str("1000000000000000000000").is_err()); assert_eq!( @@ -663,58 +704,101 @@ mod tests { #[test] fn add() { - assert!(Decimal::MIN.checked_add(Decimal::step()).is_some()); - assert!(Decimal::MAX.checked_add(Decimal::step()).is_none()); + assert!(Decimal::MIN.checked_add(Decimal::STEP).is_some()); + assert!(Decimal::MAX.checked_add(Decimal::STEP).is_none()); assert_eq!( Decimal::MAX.checked_add(Decimal::MIN), - Some(-Decimal::step()) + Decimal::STEP.checked_neg() ); } #[test] fn sub() { - assert!(Decimal::MIN.checked_sub(Decimal::step()).is_none()); - assert!(Decimal::MAX.checked_sub(Decimal::step()).is_some()); + assert!(Decimal::MIN.checked_sub(Decimal::STEP).is_none()); + assert!(Decimal::MAX.checked_sub(Decimal::STEP).is_some()); } #[test] fn mul() -> Result<(), ParseDecimalError> { + assert_eq!(Decimal::from(1).checked_mul(-1), Some(Decimal::from(-1))); assert_eq!( - Decimal::from_str("1")?.checked_mul(Decimal::from_str("-1")?), - Some(Decimal::from_str("-1")?) - ); - assert_eq!( - Decimal::from_str("1000")?.checked_mul(Decimal::from_str("1000")?), - Some(Decimal::from_str("1000000")?) + Decimal::from(1000).checked_mul(1000), + Some(Decimal::from(1_000_000)) ); assert_eq!( Decimal::from_str("0.1")?.checked_mul(Decimal::from_str("0.01")?), Some(Decimal::from_str("0.001")?) ); + assert_eq!(Decimal::from(0).checked_mul(1), Some(Decimal::from(0))); + assert_eq!(Decimal::from(1).checked_mul(0), Some(Decimal::from(0))); + assert_eq!(Decimal::MAX.checked_mul(1), Some(Decimal::MAX)); + assert_eq!(Decimal::MIN.checked_mul(1), Some(Decimal::MIN)); + assert_eq!( + Decimal::from(1).checked_mul(Decimal::MAX), + Some(Decimal::MAX) + ); + assert_eq!( + Decimal::from(1).checked_mul(Decimal::MIN), + Some(Decimal::MIN) + ); + assert_eq!( + Decimal::MAX.checked_mul(-1), + Some(Decimal::MIN.checked_add(Decimal::STEP).unwrap()) + ); + assert_eq!(Decimal::MIN.checked_mul(-1), None); + assert_eq!( + Decimal::MIN + .checked_add(Decimal::STEP) + .unwrap() + .checked_mul(-1), + Some(Decimal::MAX) + ); Ok(()) } #[test] fn div() -> Result<(), ParseDecimalError> { + assert_eq!(Decimal::from(1).checked_div(1), Some(Decimal::from(1))); + assert_eq!(Decimal::from(100).checked_div(10), Some(Decimal::from(10))); assert_eq!( - Decimal::from_str("1")?.checked_div(Decimal::from_str("1")?), - Some(Decimal::from_str("1")?) + Decimal::from(10).checked_div(100), + Some(Decimal::from_str("0.1")?) ); + assert_eq!(Decimal::from(1).checked_div(0), None); + assert_eq!(Decimal::from(0).checked_div(1), Some(Decimal::from(0))); + assert_eq!(Decimal::MAX.checked_div(1), Some(Decimal::MAX)); + assert_eq!(Decimal::MIN.checked_div(1), Some(Decimal::MIN)); assert_eq!( - Decimal::from_str("100")?.checked_div(Decimal::from_str("10")?), - Some(Decimal::from_str("10")?) + Decimal::MAX.checked_div(-1), + Some(Decimal::MIN.checked_add(Decimal::STEP).unwrap()) ); + assert_eq!(Decimal::MIN.checked_div(-1), None); assert_eq!( - Decimal::from_str("10")?.checked_div(Decimal::from_str("100")?), - Some(Decimal::from_str("0.1")?) + Decimal::MIN + .checked_add(Decimal::STEP) + .unwrap() + .checked_div(-1), + Some(Decimal::MAX) ); Ok(()) } + #[test] + fn rem() -> Result<(), ParseDecimalError> { + assert_eq!(Decimal::from(10).checked_rem(3), Some(Decimal::from(1))); + assert_eq!(Decimal::from(6).checked_rem(-2), Some(Decimal::from(0))); + assert_eq!( + Decimal::from_str("4.5")?.checked_rem(Decimal::from_str("1.2")?), + Some(Decimal::from_str("0.9")?) + ); + assert_eq!(Decimal::from(1).checked_rem(0), None); + Ok(()) + } + #[test] fn round() -> Result<(), ParseDecimalError> { - assert_eq!(Decimal::from_str("10")?.round(), Decimal::from(10)); - assert_eq!(Decimal::from_str("-10")?.round(), Decimal::from(-10)); + assert_eq!(Decimal::from(10).round(), Decimal::from(10)); + assert_eq!(Decimal::from(-10).round(), Decimal::from(-10)); assert_eq!(Decimal::from_str("2.5")?.round(), Decimal::from(3)); assert_eq!(Decimal::from_str("2.4999")?.round(), Decimal::from(2)); assert_eq!(Decimal::from_str("-2.5")?.round(), Decimal::from(-2)); @@ -725,8 +809,8 @@ mod tests { #[test] fn ceil() -> Result<(), ParseDecimalError> { - assert_eq!(Decimal::from_str("10")?.ceil(), Decimal::from(10)); - assert_eq!(Decimal::from_str("-10")?.ceil(), Decimal::from(-10)); + assert_eq!(Decimal::from(10).ceil(), Decimal::from(10)); + assert_eq!(Decimal::from(-10).ceil(), Decimal::from(-10)); assert_eq!(Decimal::from_str("10.5")?.ceil(), Decimal::from(11)); assert_eq!(Decimal::from_str("-10.5")?.ceil(), Decimal::from(-10)); assert_eq!(Decimal::from(i64::MIN).ceil(), Decimal::from(i64::MIN)); @@ -736,8 +820,8 @@ mod tests { #[test] fn floor() -> Result<(), ParseDecimalError> { - assert_eq!(Decimal::from_str("10")?.ceil(), Decimal::from(10)); - assert_eq!(Decimal::from_str("-10")?.ceil(), Decimal::from(-10)); + assert_eq!(Decimal::from(10).ceil(), Decimal::from(10)); + assert_eq!(Decimal::from(-10).ceil(), Decimal::from(-10)); assert_eq!(Decimal::from_str("10.5")?.floor(), Decimal::from(10)); assert_eq!(Decimal::from_str("-10.5")?.floor(), Decimal::from(-11)); assert_eq!(Decimal::from(i64::MIN).floor(), Decimal::from(i64::MIN)); @@ -780,11 +864,11 @@ mod tests { fn from_float() -> Result<(), ParseDecimalError> { assert_eq!( Decimal::try_from(Float::from(0.)).ok(), - Some(Decimal::from_str("0")?) + Some(Decimal::from(0)) ); assert_eq!( Decimal::try_from(Float::from(-0.)).ok(), - Some(Decimal::from_str("0.")?) + Some(Decimal::from(0)) ); assert_eq!( Decimal::try_from(Float::from(-123.5)).ok(), @@ -796,12 +880,12 @@ mod tests { assert!(Decimal::try_from(Float::from(f32::MIN)).is_err()); assert!(Decimal::try_from(Float::from(f32::MAX)).is_err()); assert!( - Decimal::try_from(Float::from(1_672_507_302_466.)) + Decimal::try_from(Float::from(1_672_507_300_000.)) .unwrap() - .checked_sub(Decimal::from_str("1672507302466")?) + .checked_sub(Decimal::from(1_672_507_293_696_i64)) .unwrap() .abs() - < Decimal::from(1_000_000) + < Decimal::from(1) ); Ok(()) } @@ -810,11 +894,11 @@ mod tests { fn from_double() -> Result<(), ParseDecimalError> { assert_eq!( Decimal::try_from(Double::from(0.)).ok(), - Some(Decimal::from_str("0")?) + Some(Decimal::from(0)) ); assert_eq!( Decimal::try_from(Double::from(-0.)).ok(), - Some(Decimal::from_str("0")?) + Some(Decimal::from(0)) ); assert_eq!( Decimal::try_from(Double::from(-123.1)).ok(), @@ -823,7 +907,7 @@ mod tests { assert!( Decimal::try_from(Double::from(1_672_507_302_466.)) .unwrap() - .checked_sub(Decimal::from_str("1672507302466")?) + .checked_sub(Decimal::from(1_672_507_302_466_i64)) .unwrap() .abs() < Decimal::from(1) @@ -836,6 +920,34 @@ mod tests { Ok(()) } + #[test] + fn to_float() -> Result<(), ParseDecimalError> { + assert_eq!(Float::from(Decimal::from(0)), Float::from(0.)); + assert_eq!(Float::from(Decimal::from(1)), Float::from(1.)); + assert_eq!(Float::from(Decimal::from(10)), Float::from(10.)); + assert_eq!(Float::from(Decimal::from_str("0.1")?), Float::from(0.1)); + assert!((Float::from(Decimal::MAX) - Float::from(1.701_412e20)).abs() < Float::from(1.)); + assert!((Float::from(Decimal::MIN) - Float::from(-1.701_412e20)).abs() < Float::from(1.)); + Ok(()) + } + + #[test] + fn to_double() -> Result<(), ParseDecimalError> { + assert_eq!(Double::from(Decimal::from(0)), Double::from(0.)); + assert_eq!(Double::from(Decimal::from(1)), Double::from(1.)); + assert_eq!(Double::from(Decimal::from(10)), Double::from(10.)); + assert_eq!(Double::from(Decimal::from_str("0.1")?), Double::from(0.1)); + assert!( + (Double::from(Decimal::MAX) - Double::from(1.701_411_834_604_692_4e20)).abs() + < Double::from(1.) + ); + assert!( + (Double::from(Decimal::MIN) - Double::from(-1.701_411_834_604_692_4e20)).abs() + < Double::from(1.) + ); + Ok(()) + } + #[test] fn minimally_conformant() -> Result<(), ParseDecimalError> { // All minimally conforming processors must support decimal values whose absolute value can be expressed as i / 10^k, diff --git a/lib/oxsdatatypes/src/double.rs b/lib/oxsdatatypes/src/double.rs index de3b78c4..e9b26ba5 100644 --- a/lib/oxsdatatypes/src/double.rs +++ b/lib/oxsdatatypes/src/double.rs @@ -53,6 +53,12 @@ impl Double { self.value.round().into() } + #[inline] + pub fn is_nan(self) -> bool { + self.value.is_nan() + } + + #[deprecated(note = "Use .is_nan()")] #[inline] pub fn is_naan(self) -> bool { self.value.is_nan() @@ -68,6 +74,20 @@ impl Double { pub fn is_identical_with(&self, other: &Self) -> bool { self.value.to_ne_bytes() == other.value.to_ne_bytes() } + + pub const MIN: Self = Self { value: f64::MIN }; + + pub const MAX: Self = Self { value: f64::MAX }; + + pub const INFINITY: Self = Self { + value: f64::INFINITY, + }; + + pub const NEG_INFINITY: Self = Self { + value: f64::NEG_INFINITY, + }; + + pub const NAN: Self = Self { value: f64::NAN }; } impl From for f64 { @@ -243,7 +263,7 @@ mod tests { #[test] fn eq() { assert_eq!(Double::from(0_f64), Double::from(0_f64)); - assert_ne!(Double::from(f64::NAN), Double::from(f64::NAN)); + assert_ne!(Double::NAN, Double::NAN); assert_eq!(Double::from(-0.), Double::from(0.)); } @@ -254,18 +274,15 @@ mod tests { Some(Ordering::Equal) ); assert_eq!( - Double::from(f64::INFINITY).partial_cmp(&Double::from(f64::MAX)), + Double::INFINITY.partial_cmp(&Double::MAX), Some(Ordering::Greater) ); assert_eq!( - Double::from(f64::NEG_INFINITY).partial_cmp(&Double::from(f64::MIN)), + Double::NEG_INFINITY.partial_cmp(&Double::MIN), Some(Ordering::Less) ); - assert_eq!(Double::from(f64::NAN).partial_cmp(&Double::from(0.)), None); - assert_eq!( - Double::from(f64::NAN).partial_cmp(&Double::from(f64::NAN)), - None - ); + assert_eq!(Double::NAN.partial_cmp(&Double::from(0.)), None); + assert_eq!(Double::NAN.partial_cmp(&Double::NAN), None); assert_eq!( Double::from(0.).partial_cmp(&Double::from(-0.)), Some(Ordering::Equal) @@ -275,7 +292,7 @@ mod tests { #[test] fn is_identical_with() { assert!(Double::from(0.).is_identical_with(&Double::from(0.))); - assert!(Double::from(f64::NAN).is_identical_with(&Double::from(f64::NAN))); + assert!(Double::NAN.is_identical_with(&Double::NAN)); assert!(!Double::from(-0.).is_identical_with(&Double::from(0.))); } @@ -297,11 +314,11 @@ mod tests { assert_eq!(Double::from_str("-1.")?.to_string(), "-1"); assert_eq!( Double::from_str(&f64::MIN.to_string()).unwrap(), - Double::from(f64::MIN) + Double::MIN ); assert_eq!( Double::from_str(&f64::MAX.to_string()).unwrap(), - Double::from(f64::MAX) + Double::MAX ); Ok(()) } diff --git a/lib/oxsdatatypes/src/duration.rs b/lib/oxsdatatypes/src/duration.rs index 27f255eb..a2d6ac47 100644 --- a/lib/oxsdatatypes/src/duration.rs +++ b/lib/oxsdatatypes/src/duration.rs @@ -107,6 +107,14 @@ impl Duration { }) } + #[inline] + pub fn checked_neg(&self) -> Option { + Some(Self { + year_month: self.year_month.checked_neg()?, + day_time: self.day_time.checked_neg()?, + }) + } + /// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity). #[inline] pub fn is_identical_with(&self, other: &Self) -> bool { @@ -127,7 +135,7 @@ impl FromStr for Duration { type Err = XsdParseError; fn from_str(input: &str) -> Result { - parse_value(duration_lexical_rep, input) + parse_duration(input) } } @@ -170,8 +178,10 @@ impl fmt::Display for Duration { let h = (s_int % 86400) / 3600; let m = (s_int % 3600) / 60; let s = ss - .checked_sub(Decimal::try_from(d * 86400 + h * 3600 + m * 60).unwrap()) - .unwrap(); //could not fail + .checked_sub( + Decimal::try_from(d * 86400 + h * 3600 + m * 60).map_err(|_| fmt::Error)?, + ) + .ok_or(fmt::Error)?; if d != 0 { write!(f, "{d}D")?; @@ -299,6 +309,13 @@ impl YearMonthDuration { }) } + #[inline] + pub fn checked_neg(&self) -> Option { + Some(Self { + months: self.months.checked_neg()?, + }) + } + /// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity). #[inline] pub fn is_identical_with(&self, other: &Self) -> bool { @@ -333,7 +350,7 @@ impl FromStr for YearMonthDuration { type Err = XsdParseError; fn from_str(input: &str) -> Result { - parse_value(year_month_duration_lexical_rep, input) + parse_year_month_duration(input) } } @@ -465,6 +482,13 @@ impl DayTimeDuration { }) } + #[inline] + pub fn checked_neg(&self) -> Option { + Some(Self { + seconds: self.seconds.checked_neg()?, + }) + } + /// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity). #[inline] pub fn is_identical_with(&self, other: &Self) -> bool { @@ -513,7 +537,7 @@ impl FromStr for DayTimeDuration { type Err = XsdParseError; fn from_str(input: &str) -> Result { - parse_value(day_time_duration_lexical_rep, input) + parse_day_time_duration(input) } } @@ -599,7 +623,7 @@ mod tests { fn from_str() -> Result<(), XsdParseError> { let min = Duration::new( i64::MIN + 1, - Decimal::MIN.checked_add(Decimal::step()).unwrap(), + Decimal::MIN.checked_add(Decimal::STEP).unwrap(), ); let max = Duration::new(i64::MAX, Decimal::MAX); diff --git a/lib/oxsdatatypes/src/float.rs b/lib/oxsdatatypes/src/float.rs index 001b5006..29ebde30 100644 --- a/lib/oxsdatatypes/src/float.rs +++ b/lib/oxsdatatypes/src/float.rs @@ -53,11 +53,17 @@ impl Float { self.value.round().into() } + #[deprecated(note = "Use .is_nan()")] #[inline] pub fn is_naan(self) -> bool { self.value.is_nan() } + #[inline] + pub fn is_nan(self) -> bool { + self.value.is_nan() + } + #[inline] pub fn is_finite(self) -> bool { self.value.is_finite() @@ -68,6 +74,20 @@ impl Float { pub fn is_identical_with(&self, other: &Self) -> bool { self.value.to_ne_bytes() == other.value.to_ne_bytes() } + + pub const MIN: Self = Self { value: f32::MIN }; + + pub const MAX: Self = Self { value: f32::MAX }; + + pub const INFINITY: Self = Self { + value: f32::INFINITY, + }; + + pub const NEG_INFINITY: Self = Self { + value: f32::NEG_INFINITY, + }; + + pub const NAN: Self = Self { value: f32::NAN }; } impl From for f32 { @@ -233,7 +253,7 @@ mod tests { #[test] fn eq() { assert_eq!(Float::from(0.), Float::from(0.)); - assert_ne!(Float::from(f32::NAN), Float::from(f32::NAN)); + assert_ne!(Float::NAN, Float::NAN); assert_eq!(Float::from(-0.), Float::from(0.)); } @@ -244,18 +264,15 @@ mod tests { Some(Ordering::Equal) ); assert_eq!( - Float::from(f32::INFINITY).partial_cmp(&Float::from(f32::MAX)), + Float::INFINITY.partial_cmp(&Float::MAX), Some(Ordering::Greater) ); assert_eq!( - Float::from(f32::NEG_INFINITY).partial_cmp(&Float::from(f32::MIN)), + Float::NEG_INFINITY.partial_cmp(&Float::MIN), Some(Ordering::Less) ); - assert_eq!(Float::from(f32::NAN).partial_cmp(&Float::from(0.)), None); - assert_eq!( - Float::from(f32::NAN).partial_cmp(&Float::from(f32::NAN)), - None - ); + assert_eq!(Float::NAN.partial_cmp(&Float::from(0.)), None); + assert_eq!(Float::NAN.partial_cmp(&Float::NAN), None); assert_eq!( Float::from(0.).partial_cmp(&Float::from(-0.)), Some(Ordering::Equal) @@ -265,7 +282,7 @@ mod tests { #[test] fn is_identical_with() { assert!(Float::from(0.).is_identical_with(&Float::from(0.))); - assert!(Float::from(f32::NAN).is_identical_with(&Float::from(f32::NAN))); + assert!(Float::NAN.is_identical_with(&Float::NAN)); assert!(!Float::from(-0.).is_identical_with(&Float::from(0.))); } @@ -285,14 +302,8 @@ mod tests { assert_eq!(Float::from_str("-1")?.to_string(), "-1"); assert_eq!(Float::from_str("1.")?.to_string(), "1"); assert_eq!(Float::from_str("-1.")?.to_string(), "-1"); - assert_eq!( - Float::from_str(&f32::MIN.to_string())?, - Float::from(f32::MIN) - ); - assert_eq!( - Float::from_str(&f32::MAX.to_string())?, - Float::from(f32::MAX) - ); + assert_eq!(Float::from_str(&f32::MIN.to_string())?, Float::MIN); + assert_eq!(Float::from_str(&f32::MAX.to_string())?, Float::MAX); Ok(()) } } diff --git a/lib/oxsdatatypes/src/integer.rs b/lib/oxsdatatypes/src/integer.rs index 50f2d002..0c9d90e5 100644 --- a/lib/oxsdatatypes/src/integer.rs +++ b/lib/oxsdatatypes/src/integer.rs @@ -58,6 +58,7 @@ impl Integer { }) } + /// [op:numeric-mod](https://www.w3.org/TR/xpath-functions/#func-numeric-mod) #[inline] pub fn checked_rem(&self, rhs: impl Into) -> Option { Some(Self { @@ -72,6 +73,14 @@ impl Integer { }) } + /// [op:numeric-unary-minus](https://www.w3.org/TR/xpath-functions/#func-numeric-unary-minus) + #[inline] + pub fn checked_neg(&self) -> Option { + Some(Self { + value: self.value.checked_neg()?, + }) + } + /// [fn:abs](https://www.w3.org/TR/xpath-functions/#func-abs) #[inline] pub const fn abs(&self) -> Self { @@ -95,6 +104,10 @@ impl Integer { pub fn is_identical_with(&self, other: &Self) -> bool { self == other } + + pub const MIN: Self = Self { value: i64::MIN }; + + pub const MAX: Self = Self { value: i64::MAX }; } impl From for Integer { @@ -258,9 +271,9 @@ mod tests { assert!(Integer::try_from(Float::from(f32::MIN)).is_err()); assert!(Integer::try_from(Float::from(f32::MAX)).is_err()); assert!( - Integer::try_from(Float::from(1_672_507_302_466.)) + Integer::try_from(Float::from(1_672_507_300_000.)) .unwrap() - .checked_sub(Integer::from_str("1672507302466")?) + .checked_sub(Integer::from_str("1672507300000")?) .unwrap() .abs() < Integer::from(1_000_000) @@ -283,12 +296,12 @@ mod tests { Some(Integer::from_str("-123")?) ); assert!( - Integer::try_from(Double::from(1_672_507_302_466.)) + Integer::try_from(Double::from(1_672_507_300_000.)) .unwrap() - .checked_sub(Integer::from_str("1672507302466").unwrap()) + .checked_sub(Integer::from_str("1672507300000").unwrap()) .unwrap() .abs() - < Integer::from(1) + < Integer::from(10) ); assert!(Integer::try_from(Double::from(f64::NAN)).is_err()); assert!(Integer::try_from(Double::from(f64::INFINITY)).is_err()); @@ -312,4 +325,40 @@ mod tests { assert!(Integer::try_from(Decimal::MAX).is_err()); Ok(()) } + + #[test] + fn add() { + assert_eq!( + Integer::MIN.checked_add(1), + Some(Integer::from(i64::MIN + 1)) + ); + assert_eq!(Integer::MAX.checked_add(1), None); + } + + #[test] + fn sub() { + assert_eq!(Integer::MIN.checked_sub(1), None); + assert_eq!( + Integer::MAX.checked_sub(1), + Some(Integer::from(i64::MAX - 1)) + ); + } + + #[test] + fn mul() { + assert_eq!(Integer::MIN.checked_mul(2), None); + assert_eq!(Integer::MAX.checked_mul(2), None); + } + + #[test] + fn div() { + assert_eq!(Integer::from(1).checked_div(0), None); + } + + #[test] + fn rem() { + assert_eq!(Integer::from(10).checked_rem(3), Some(Integer::from(1))); + assert_eq!(Integer::from(6).checked_rem(-2), Some(Integer::from(0))); + assert_eq!(Integer::from(1).checked_rem(0), None); + } } diff --git a/lib/oxsdatatypes/src/lib.rs b/lib/oxsdatatypes/src/lib.rs index 7c06ca9e..67737b13 100644 --- a/lib/oxsdatatypes/src/lib.rs +++ b/lib/oxsdatatypes/src/lib.rs @@ -1,9 +1,9 @@ #![doc = include_str!("../README.md")] -#![deny(unsafe_code)] #![doc(test(attr(deny(warnings))))] #![cfg_attr(docsrs, feature(doc_auto_cfg))] #![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] #![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] +#![allow(clippy::return_self_not_must_use)] mod boolean; mod date_time; diff --git a/lib/oxsdatatypes/src/parser.rs b/lib/oxsdatatypes/src/parser.rs index 22d652ea..66fb260d 100644 --- a/lib/oxsdatatypes/src/parser.rs +++ b/lib/oxsdatatypes/src/parser.rs @@ -2,15 +2,6 @@ use super::date_time::{DateTimeError, GDay, GMonth, GMonthDay, GYear, GYearMonth use super::decimal::ParseDecimalError; use super::duration::{DayTimeDuration, YearMonthDuration}; use super::*; -use nom::branch::alt; -use nom::bytes::complete::{tag, take_while, take_while_m_n}; -use nom::character::complete::{char, digit0, digit1}; -use nom::combinator::{map, opt, recognize}; -use nom::error::{ErrorKind, ParseError}; -use nom::multi::many1; -use nom::sequence::{preceded, terminated, tuple}; -use nom::Err; -use nom::{IResult, Needed}; use std::error::Error; use std::fmt; use std::num::ParseIntError; @@ -24,46 +15,35 @@ pub struct XsdParseError { #[derive(Debug, Clone)] enum XsdParseErrorKind { - NomKind(ErrorKind), - NomChar(char), - MissingData(Needed), - TooMuchData { count: usize }, - Overflow, ParseInt(ParseIntError), ParseDecimal(ParseDecimalError), - OutOfIntegerRange { value: u8, min: u8, max: u8 }, DateTime(DateTimeError), + Message(&'static str), } +const OVERFLOW_ERROR: XsdParseError = XsdParseError { + kind: XsdParseErrorKind::Message("Overflow error"), +}; + impl fmt::Display for XsdParseError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match &self.kind { - XsdParseErrorKind::NomKind(kind) => { - write!(f, "Invalid XML Schema value: {}", kind.description()) - } - XsdParseErrorKind::NomChar(c) => { - write!(f, "Unexpected character in XML Schema value: '{c}'") - } - XsdParseErrorKind::MissingData(Needed::Unknown) => { - write!(f, "Too small XML Schema value") - } - XsdParseErrorKind::MissingData(Needed::Size(size)) => { - write!(f, "Too small XML Schema value: missing {size} chars") - } - XsdParseErrorKind::TooMuchData { count } => { - write!(f, "Too long XML Schema value: {count} extra chars") - } - XsdParseErrorKind::Overflow => write!(f, "Computation overflow or underflow"), XsdParseErrorKind::ParseInt(error) => { write!(f, "Error while parsing integer: {error}") } XsdParseErrorKind::ParseDecimal(error) => { write!(f, "Error while parsing decimal: {error}") } - XsdParseErrorKind::OutOfIntegerRange { value, min, max } => { - write!(f, "The integer {value} is not between {min} and {max}") - } XsdParseErrorKind::DateTime(error) => error.fmt(f), + XsdParseErrorKind::Message(msg) => write!(f, "{msg}"), + } + } +} + +impl XsdParseError { + const fn msg(message: &'static str) -> Self { + Self { + kind: XsdParseErrorKind::Message(message), } } } @@ -74,33 +54,11 @@ impl Error for XsdParseError { XsdParseErrorKind::ParseInt(error) => Some(error), XsdParseErrorKind::ParseDecimal(error) => Some(error), XsdParseErrorKind::DateTime(error) => Some(error), - _ => None, + XsdParseErrorKind::Message(_) => None, } } } -impl ParseError<&str> for XsdParseError { - fn from_error_kind(_input: &str, kind: ErrorKind) -> Self { - Self { - kind: XsdParseErrorKind::NomKind(kind), - } - } - - fn append(_input: &str, _kind: ErrorKind, other: Self) -> Self { - other - } - - fn from_char(_input: &str, c: char) -> Self { - Self { - kind: XsdParseErrorKind::NomChar(c), - } - } - - fn or(self, other: Self) -> Self { - other - } -} - impl From for XsdParseError { fn from(error: ParseIntError) -> Self { Self { @@ -125,412 +83,538 @@ impl From for XsdParseError { } } -impl From> for XsdParseError { - fn from(err: Err) -> Self { - match err { - Err::Incomplete(needed) => Self { - kind: XsdParseErrorKind::MissingData(needed), - }, - Err::Error(e) | Err::Failure(e) => e, - } - } -} - -type XsdResult<'a, T> = IResult<&'a str, T, XsdParseError>; - -const OVERFLOW_ERROR: XsdParseError = XsdParseError { - kind: XsdParseErrorKind::Overflow, -}; - -pub fn parse_value<'a, T>( - mut f: impl FnMut(&'a str) -> XsdResult<'a, T>, - input: &'a str, -) -> Result { - let (left, result) = f(input)?; - if left.is_empty() { - Ok(result) +// [6] duYearFrag ::= unsignedNoDecimalPtNumeral 'Y' +// [7] duMonthFrag ::= unsignedNoDecimalPtNumeral 'M' +// [8] duDayFrag ::= unsignedNoDecimalPtNumeral 'D' +// [9] duHourFrag ::= unsignedNoDecimalPtNumeral 'H' +// [10] duMinuteFrag ::= unsignedNoDecimalPtNumeral 'M' +// [11] duSecondFrag ::= (unsignedNoDecimalPtNumeral | unsignedDecimalPtNumeral) 'S' +// [12] duYearMonthFrag ::= (duYearFrag duMonthFrag?) | duMonthFrag +// [13] duTimeFrag ::= 'T' ((duHourFrag duMinuteFrag? duSecondFrag?) | (duMinuteFrag duSecondFrag?) | duSecondFrag) +// [14] duDayTimeFrag ::= (duDayFrag duTimeFrag?) | duTimeFrag +// [15] durationLexicalRep ::= '-'? 'P' ((duYearMonthFrag duDayTimeFrag?) | duDayTimeFrag) +struct DurationParts { + year_month: Option, + day_time: Option, +} + +fn duration_parts(input: &str) -> Result<(DurationParts, &str), XsdParseError> { + // States + const START: u32 = 0; + const AFTER_YEAR: u32 = 1; + const AFTER_MONTH: u32 = 2; + const AFTER_DAY: u32 = 3; + const AFTER_T: u32 = 4; + const AFTER_HOUR: u32 = 5; + const AFTER_MINUTE: u32 = 6; + const AFTER_SECOND: u32 = 7; + + let (negative, input) = if let Some(left) = input.strip_prefix('-') { + (true, left) } else { - Err(XsdParseError { - kind: XsdParseErrorKind::TooMuchData { count: left.len() }, - }) + (false, input) + }; + let mut input = expect_char(input, 'P', "Durations must start with 'P'")?; + let mut state = START; + let mut year_month: Option = None; + let mut day_time: Option = None; + while !input.is_empty() { + if let Some(left) = input.strip_prefix('T') { + if state >= AFTER_T { + return Err(XsdParseError::msg("Duplicated time separator 'T'")); + } + state = AFTER_T; + input = left; + } else { + let (number_str, left) = decimal_prefix(input); + match left.chars().next() { + Some('Y') if state < AFTER_YEAR => { + year_month = Some( + year_month + .unwrap_or_default() + .checked_add( + i64::from_str(number_str)? + .checked_mul(12) + .ok_or(OVERFLOW_ERROR)?, + ) + .ok_or(OVERFLOW_ERROR)?, + ); + state = AFTER_YEAR; + } + Some('M') if state < AFTER_MONTH => { + year_month = Some( + year_month + .unwrap_or_default() + .checked_add(i64::from_str(number_str)?) + .ok_or(OVERFLOW_ERROR)?, + ); + state = AFTER_MONTH; + } + Some('D') if state < AFTER_DAY => { + if number_str.contains('.') { + return Err(XsdParseError::msg( + "Decimal numbers are not allowed for days", + )); + } + day_time = Some( + day_time + .unwrap_or_default() + .checked_add( + Decimal::from_str(number_str)? + .checked_mul(86400) + .ok_or(OVERFLOW_ERROR)?, + ) + .ok_or(OVERFLOW_ERROR)?, + ); + state = AFTER_DAY; + } + Some('H') if state == AFTER_T => { + if number_str.contains('.') { + return Err(XsdParseError::msg( + "Decimal numbers are not allowed for hours", + )); + } + day_time = Some( + day_time + .unwrap_or_default() + .checked_add( + Decimal::from_str(number_str)? + .checked_mul(3600) + .ok_or(OVERFLOW_ERROR)?, + ) + .ok_or(OVERFLOW_ERROR)?, + ); + state = AFTER_HOUR; + } + Some('M') if (AFTER_T..AFTER_MINUTE).contains(&state) => { + if number_str.contains('.') { + return Err(XsdParseError::msg( + "Decimal numbers are not allowed for minutes", + )); + } + day_time = Some( + day_time + .unwrap_or_default() + .checked_add( + Decimal::from_str(number_str)? + .checked_mul(60) + .ok_or(OVERFLOW_ERROR)?, + ) + .ok_or(OVERFLOW_ERROR)?, + ); + state = AFTER_MINUTE; + } + Some('S') if (AFTER_T..AFTER_SECOND).contains(&state) => { + day_time = Some( + day_time + .unwrap_or_default() + .checked_add(Decimal::from_str(number_str)?) + .ok_or(OVERFLOW_ERROR)?, + ); + state = AFTER_SECOND; + } + Some(_) => return Err(XsdParseError::msg("Unexpected type character")), + None => { + return Err(XsdParseError::msg( + "Numbers in durations must be followed by a type character", + )) + } + } + input = &left[1..]; + } } -} - -//TODO: check every computation - -// [6] duYearFrag ::= unsignedNoDecimalPtNumeral 'Y' -fn du_year_frag(input: &str) -> XsdResult<'_, i64> { - terminated(unsigned_no_decimal_pt_numeral, char('Y'))(input) -} -// [7] duMonthFrag ::= unsignedNoDecimalPtNumeral 'M' -fn du_month_frag(input: &str) -> XsdResult<'_, i64> { - terminated(unsigned_no_decimal_pt_numeral, char('M'))(input) + Ok(( + DurationParts { + year_month: if let Some(v) = year_month { + Some(if negative { + v.checked_neg().ok_or(OVERFLOW_ERROR)? + } else { + v + }) + } else { + None + }, + day_time: if let Some(v) = day_time { + Some(if negative { + v.checked_neg().ok_or(OVERFLOW_ERROR)? + } else { + v + }) + } else { + None + }, + }, + input, + )) } -// [8] duDayFrag ::= unsignedNoDecimalPtNumeral 'D' -fn du_day_frag(input: &str) -> XsdResult<'_, i64> { - terminated(unsigned_no_decimal_pt_numeral, char('D'))(input) +pub fn parse_duration(input: &str) -> Result { + let parts = ensure_complete(input, duration_parts)?; + if parts.year_month.is_none() && parts.day_time.is_none() { + return Err(XsdParseError::msg("Empty duration")); + } + Ok(Duration::new( + parts.year_month.unwrap_or(0), + parts.day_time.unwrap_or_default(), + )) +} + +pub fn parse_year_month_duration(input: &str) -> Result { + let parts = ensure_complete(input, duration_parts)?; + if parts.day_time.is_some() { + return Err(XsdParseError::msg( + "There must not be any day or time component in a yearMonthDuration", + )); + } + Ok(YearMonthDuration::new(parts.year_month.ok_or( + XsdParseError::msg("No year and month values found"), + )?)) } -// [9] duHourFrag ::= unsignedNoDecimalPtNumeral 'H' -fn du_hour_frag(input: &str) -> XsdResult<'_, i64> { - terminated(unsigned_no_decimal_pt_numeral, char('H'))(input) +pub fn parse_day_time_duration(input: &str) -> Result { + let parts = ensure_complete(input, duration_parts)?; + if parts.year_month.is_some() { + return Err(XsdParseError::msg( + "There must not be any year or month component in a dayTimeDuration", + )); + } + Ok(DayTimeDuration::new(parts.day_time.ok_or( + XsdParseError::msg("No day or time values found"), + )?)) } -// [10] duMinuteFrag ::= unsignedNoDecimalPtNumeral 'M' -fn du_minute_frag(input: &str) -> XsdResult<'_, i64> { - terminated(unsigned_no_decimal_pt_numeral, char('M'))(input) +// [16] dateTimeLexicalRep ::= yearFrag '-' monthFrag '-' dayFrag 'T' ((hourFrag ':' minuteFrag ':' secondFrag) | endOfDayFrag) timezoneFrag? +fn date_time_lexical_rep(input: &str) -> Result<(DateTime, &str), XsdParseError> { + let (year, input) = year_frag(input)?; + let input = expect_char(input, '-', "The year and month must be separated by '-'")?; + let (month, input) = month_frag(input)?; + let input = expect_char(input, '-', "The month and day must be separated by '-'")?; + let (day, input) = day_frag(input)?; + let input = expect_char(input, 'T', "The date and time must be separated by 'T'")?; + let (hour, input) = hour_frag(input)?; + let input = expect_char(input, ':', "The hours and minutes must be separated by ':'")?; + let (minute, input) = minute_frag(input)?; + let input = expect_char( + input, + ':', + "The minutes and seconds must be separated by ':'", + )?; + let (second, input) = second_frag(input)?; + // We validate 24:00:00 + if hour == 24 && minute != 0 && second != Decimal::from(0) { + return Err(XsdParseError::msg( + "Times are not allowed to be after 24:00:00", + )); + } + let (timezone_offset, input) = optional_end(input, timezone_frag)?; + Ok(( + DateTime::new(year, month, day, hour, minute, second, timezone_offset)?, + input, + )) } -// [11] duSecondFrag ::= (unsignedNoDecimalPtNumeral | unsignedDecimalPtNumeral) 'S' -fn du_second_frag(input: &str) -> XsdResult<'_, Decimal> { - terminated( - map_res( - recognize(tuple((digit0, opt(preceded(char('.'), digit0))))), - Decimal::from_str, - ), - char('S'), - )(input) +pub fn parse_date_time(input: &str) -> Result { + ensure_complete(input, date_time_lexical_rep) } -// [12] duYearMonthFrag ::= (duYearFrag duMonthFrag?) | duMonthFrag -fn du_year_month_frag(input: &str) -> XsdResult<'_, i64> { - alt(( - map(tuple((du_year_frag, opt(du_month_frag))), |(y, m)| { - 12 * y + m.unwrap_or(0) - }), - du_month_frag, - ))(input) +// [17] timeLexicalRep ::= ((hourFrag ':' minuteFrag ':' secondFrag) | endOfDayFrag) timezoneFrag? +fn time_lexical_rep(input: &str) -> Result<(Time, &str), XsdParseError> { + let (hour, input) = hour_frag(input)?; + let input = expect_char(input, ':', "The hours and minutes must be separated by ':'")?; + let (minute, input) = minute_frag(input)?; + let input = expect_char( + input, + ':', + "The minutes and seconds must be separated by ':'", + )?; + let (second, input) = second_frag(input)?; + // We validate 24:00:00 + if hour == 24 && minute != 0 && second != Decimal::from(0) { + return Err(XsdParseError::msg( + "Times are not allowed to be after 24:00:00", + )); + } + let (timezone_offset, input) = optional_end(input, timezone_frag)?; + Ok((Time::new(hour, minute, second, timezone_offset)?, input)) } -// [13] duTimeFrag ::= 'T' ((duHourFrag duMinuteFrag? duSecondFrag?) | (duMinuteFrag duSecondFrag?) | duSecondFrag) -fn du_time_frag(input: &str) -> XsdResult<'_, Decimal> { - preceded( - char('T'), - alt(( - map_res( - tuple((du_hour_frag, opt(du_minute_frag), opt(du_second_frag))), - |(h, m, s)| { - Decimal::from(3600 * h + 60 * m.unwrap_or(0)) - .checked_add(s.unwrap_or_default()) - .ok_or(OVERFLOW_ERROR) - }, - ), - map_res(tuple((du_minute_frag, opt(du_second_frag))), |(m, s)| { - Decimal::from(m * 60) - .checked_add(s.unwrap_or_default()) - .ok_or(OVERFLOW_ERROR) - }), - du_second_frag, - )), - )(input) +pub fn parse_time(input: &str) -> Result { + ensure_complete(input, time_lexical_rep) } -// [14] duDayTimeFrag ::= (duDayFrag duTimeFrag?) | duTimeFrag -fn du_day_time_frag(input: &str) -> XsdResult<'_, Decimal> { - alt(( - map_res(tuple((du_day_frag, opt(du_time_frag))), |(d, t)| { - Decimal::from(d) - .checked_mul(Decimal::from(86400)) - .ok_or(OVERFLOW_ERROR)? - .checked_add(t.unwrap_or_default()) - .ok_or(OVERFLOW_ERROR) - }), - du_time_frag, - ))(input) +// [18] dateLexicalRep ::= yearFrag '-' monthFrag '-' dayFrag timezoneFrag? Constraint: Day-of-month Representations +fn date_lexical_rep(input: &str) -> Result<(Date, &str), XsdParseError> { + let (year, input) = year_frag(input)?; + let input = expect_char(input, '-', "The year and month must be separated by '-'")?; + let (month, input) = month_frag(input)?; + let input = expect_char(input, '-', "The month and day must be separated by '-'")?; + let (day, input) = day_frag(input)?; + let (timezone_offset, input) = optional_end(input, timezone_frag)?; + Ok((Date::new(year, month, day, timezone_offset)?, input)) } -// [15] durationLexicalRep ::= '-'? 'P' ((duYearMonthFrag duDayTimeFrag?) | duDayTimeFrag) -pub fn duration_lexical_rep(input: &str) -> XsdResult<'_, Duration> { - map( - tuple(( - opt(char('-')), - preceded( - char('P'), - alt(( - map( - tuple((du_year_month_frag, opt(du_day_time_frag))), - |(y, d)| Duration::new(y, d.unwrap_or_default()), - ), - map(du_day_time_frag, |d| Duration::new(0, d)), - )), - ), - )), - |(sign, duration)| { - if sign == Some('-') { - -duration - } else { - duration - } - }, - )(input) +pub fn parse_date(input: &str) -> Result { + ensure_complete(input, date_lexical_rep) } -// [16] dateTimeLexicalRep ::= yearFrag '-' monthFrag '-' dayFrag 'T' ((hourFrag ':' minuteFrag ':' secondFrag) | endOfDayFrag) timezoneFrag? -pub fn date_time_lexical_rep(input: &str) -> XsdResult<'_, DateTime> { - map_res( - tuple(( - year_frag, - char('-'), - month_frag, - char('-'), - day_frag, - char('T'), - alt(( - map( - tuple((hour_frag, char(':'), minute_frag, char(':'), second_frag)), - |(h, _, m, _, s)| (h, m, s), - ), - end_of_day_frag, - )), - opt(timezone_frag), - )), - |(year, _, month, _, day, _, (hours, minutes, seconds), timezone)| { - DateTime::new(year, month, day, hours, minutes, seconds, timezone) - }, - )(input) +// [19] gYearMonthLexicalRep ::= yearFrag '-' monthFrag timezoneFrag? +fn g_year_month_lexical_rep(input: &str) -> Result<(GYearMonth, &str), XsdParseError> { + let (year, input) = year_frag(input)?; + let input = expect_char(input, '-', "The year and month must be separated by '-'")?; + let (month, input) = month_frag(input)?; + let (timezone_offset, input) = optional_end(input, timezone_frag)?; + Ok((GYearMonth::new(year, month, timezone_offset)?, input)) } -// [17] timeLexicalRep ::= ((hourFrag ':' minuteFrag ':' secondFrag) | endOfDayFrag) timezoneFrag? -pub fn time_lexical_rep(input: &str) -> XsdResult<'_, Time> { - map_res( - tuple(( - alt(( - map( - tuple((hour_frag, char(':'), minute_frag, char(':'), second_frag)), - |(h, _, m, _, s)| (h, m, s), - ), - end_of_day_frag, - )), - opt(timezone_frag), - )), - |((hours, minutes, seconds), timezone)| Time::new(hours, minutes, seconds, timezone), - )(input) +pub fn parse_g_year_month(input: &str) -> Result { + ensure_complete(input, g_year_month_lexical_rep) } -// [18] dateLexicalRep ::= yearFrag '-' monthFrag '-' dayFrag timezoneFrag? Constraint: Day-of-month Representations -pub fn date_lexical_rep(input: &str) -> XsdResult<'_, Date> { - map_res( - tuple(( - year_frag, - char('-'), - month_frag, - char('-'), - day_frag, - opt(timezone_frag), - )), - |(year, _, month, _, day, timezone)| Date::new(year, month, day, timezone), - )(input) +// [20] gYearLexicalRep ::= yearFrag timezoneFrag? +fn g_year_lexical_rep(input: &str) -> Result<(GYear, &str), XsdParseError> { + let (year, input) = year_frag(input)?; + let (timezone_offset, input) = optional_end(input, timezone_frag)?; + Ok((GYear::new(year, timezone_offset)?, input)) } -// [19] gYearMonthLexicalRep ::= yearFrag '-' monthFrag timezoneFrag? -pub fn g_year_month_lexical_rep(input: &str) -> XsdResult<'_, GYearMonth> { - map_res( - tuple((year_frag, char('-'), month_frag, opt(timezone_frag))), - |(year, _, month, timezone)| GYearMonth::new(year, month, timezone), - )(input) +pub fn parse_g_year(input: &str) -> Result { + ensure_complete(input, g_year_lexical_rep) } -// [20] gYearLexicalRep ::= yearFrag timezoneFrag? -pub fn g_year_lexical_rep(input: &str) -> XsdResult<'_, GYear> { - map_res( - tuple((year_frag, opt(timezone_frag))), - |(year, timezone)| GYear::new(year, timezone), - )(input) +// [21] gMonthDayLexicalRep ::= '--' monthFrag '-' dayFrag timezoneFrag? Constraint: Day-of-month Representations +fn g_month_day_lexical_rep(input: &str) -> Result<(GMonthDay, &str), XsdParseError> { + let input = expect_char(input, '-', "gMonthDay values must start with '--'")?; + let input = expect_char(input, '-', "gMonthDay values must start with '--'")?; + let (month, input) = month_frag(input)?; + let input = expect_char(input, '-', "The month and day must be separated by '-'")?; + let (day, input) = day_frag(input)?; + let (timezone_offset, input) = optional_end(input, timezone_frag)?; + Ok((GMonthDay::new(month, day, timezone_offset)?, input)) } -// [21] gMonthDayLexicalRep ::= '--' monthFrag '-' dayFrag timezoneFrag? Constraint: Day-of-month Representations -pub fn g_month_day_lexical_rep(input: &str) -> XsdResult<'_, GMonthDay> { - map_res( - tuple(( - char('-'), - char('-'), - month_frag, - char('-'), - day_frag, - opt(timezone_frag), - )), - |(_, _, month, _, day, timezone)| GMonthDay::new(month, day, timezone), - )(input) +pub fn parse_g_month_day(input: &str) -> Result { + ensure_complete(input, g_month_day_lexical_rep) } // [22] gDayLexicalRep ::= '---' dayFrag timezoneFrag? -pub fn g_day_lexical_rep(input: &str) -> XsdResult<'_, GDay> { - map_res( - tuple(( - char('-'), - char('-'), - char('-'), - day_frag, - opt(timezone_frag), - )), - |(_, _, _, day, timezone)| GDay::new(day, timezone), - )(input) +fn g_day_lexical_rep(input: &str) -> Result<(GDay, &str), XsdParseError> { + let input = expect_char(input, '-', "gDay values must start with '---'")?; + let input = expect_char(input, '-', "gDay values must start with '---'")?; + let input = expect_char(input, '-', "gDay values must start with '---'")?; + let (day, input) = day_frag(input)?; + let (timezone_offset, input) = optional_end(input, timezone_frag)?; + Ok((GDay::new(day, timezone_offset)?, input)) } -// [23] gMonthLexicalRep ::= '--' monthFrag timezoneFrag? -pub fn g_month_lexical_rep(input: &str) -> XsdResult<'_, GMonth> { - map_res( - tuple((char('-'), char('-'), month_frag, opt(timezone_frag))), - |(_, _, month, timezone)| GMonth::new(month, timezone), - )(input) -} - -// [42] yearMonthDurationLexicalRep ::= '-'? 'P' duYearMonthFrag -pub fn year_month_duration_lexical_rep(input: &str) -> XsdResult<'_, YearMonthDuration> { - map( - tuple((opt(char('-')), preceded(char('P'), du_year_month_frag))), - |(sign, duration)| { - YearMonthDuration::new(if sign == Some('-') { - -duration - } else { - duration - }) - }, - )(input) +pub fn parse_g_day(input: &str) -> Result { + ensure_complete(input, g_day_lexical_rep) } -// [43] dayTimeDurationLexicalRep ::= '-'? 'P' duDayTimeFrag -pub fn day_time_duration_lexical_rep(input: &str) -> XsdResult<'_, DayTimeDuration> { - map( - tuple((opt(char('-')), preceded(char('P'), du_day_time_frag))), - |(sign, duration)| { - DayTimeDuration::new(if sign == Some('-') { - -duration - } else { - duration - }) - }, - )(input) +// [23] gMonthLexicalRep ::= '--' monthFrag timezoneFrag? +fn g_month_lexical_rep(input: &str) -> Result<(GMonth, &str), XsdParseError> { + let input = expect_char(input, '-', "gMonth values must start with '--'")?; + let input = expect_char(input, '-', "gMonth values must start with '--'")?; + let (month, input) = month_frag(input)?; + let (timezone_offset, input) = optional_end(input, timezone_frag)?; + Ok((GMonth::new(month, timezone_offset)?, input)) } -// [46] unsignedNoDecimalPtNumeral ::= digit+ -fn unsigned_no_decimal_pt_numeral(input: &str) -> XsdResult<'_, i64> { - map_res(digit1, i64::from_str)(input) +pub fn parse_g_month(input: &str) -> Result { + ensure_complete(input, g_month_lexical_rep) } // [56] yearFrag ::= '-'? (([1-9] digit digit digit+)) | ('0' digit digit digit)) -fn year_frag(input: &str) -> XsdResult<'_, i64> { - map_res( - recognize(tuple(( - opt(char('-')), - take_while_m_n(4, usize::MAX, |c: char| c.is_ascii_digit()), - ))), - i64::from_str, - )(input) +fn year_frag(input: &str) -> Result<(i64, &str), XsdParseError> { + let (sign, input) = if let Some(left) = input.strip_prefix('-') { + (-1, left) + } else { + (1, input) + }; + let (number_str, input) = integer_prefix(input); + let number = i64::from_str(number_str)?; + if number < 1000 && number_str.len() != 4 { + return Err(XsdParseError::msg( + "The years below 1000 must be encoded on exactly 4 digits", + )); + } + Ok((sign * number, input)) } // [57] monthFrag ::= ('0' [1-9]) | ('1' [0-2]) -fn month_frag(input: &str) -> XsdResult<'_, u8> { - map_res(take_while_m_n(2, 2, |c: char| c.is_ascii_digit()), |v| { - parsed_u8_range(v, 1, 12) - })(input) +fn month_frag(input: &str) -> Result<(u8, &str), XsdParseError> { + let (number_str, input) = integer_prefix(input); + if number_str.len() != 2 { + return Err(XsdParseError::msg("Month must be encoded with two digits")); + } + let number = u8::from_str(number_str)?; + if !(1..=12).contains(&number) { + return Err(XsdParseError::msg("Month must be between 01 and 12")); + } + Ok((number, input)) } // [58] dayFrag ::= ('0' [1-9]) | ([12] digit) | ('3' [01]) -fn day_frag(input: &str) -> XsdResult<'_, u8> { - map_res(take_while_m_n(2, 2, |c: char| c.is_ascii_digit()), |v| { - parsed_u8_range(v, 1, 31) - })(input) +fn day_frag(input: &str) -> Result<(u8, &str), XsdParseError> { + let (number_str, input) = integer_prefix(input); + if number_str.len() != 2 { + return Err(XsdParseError::msg("Day must be encoded with two digits")); + } + let number = u8::from_str(number_str)?; + if !(1..=31).contains(&number) { + return Err(XsdParseError::msg("Day must be between 01 and 31")); + } + Ok((number, input)) } // [59] hourFrag ::= ([01] digit) | ('2' [0-3]) -fn hour_frag(input: &str) -> XsdResult<'_, u8> { - map_res(take_while_m_n(2, 2, |c: char| c.is_ascii_digit()), |v| { - parsed_u8_range(v, 0, 23) - })(input) +// We also allow 24 for ease of parsing +fn hour_frag(input: &str) -> Result<(u8, &str), XsdParseError> { + let (number_str, input) = integer_prefix(input); + if number_str.len() != 2 { + return Err(XsdParseError::msg("Hours must be encoded with two digits")); + } + let number = u8::from_str(number_str)?; + if !(0..=24).contains(&number) { + return Err(XsdParseError::msg("Hours must be between 00 and 24")); + } + Ok((number, input)) } // [60] minuteFrag ::= [0-5] digit -fn minute_frag(input: &str) -> XsdResult<'_, u8> { - map_res(take_while_m_n(2, 2, |c: char| c.is_ascii_digit()), |v| { - parsed_u8_range(v, 0, 59) - })(input) +fn minute_frag(input: &str) -> Result<(u8, &str), XsdParseError> { + let (number_str, input) = integer_prefix(input); + if number_str.len() != 2 { + return Err(XsdParseError::msg( + "Minutes must be encoded with two digits", + )); + } + let number = u8::from_str(number_str)?; + if !(0..=59).contains(&number) { + return Err(XsdParseError::msg("Minutes must be between 00 and 59")); + } + Ok((number, input)) } // [61] secondFrag ::= ([0-5] digit) ('.' digit+)? -#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] -fn second_frag(input: &str) -> XsdResult<'_, Decimal> { - map_res( - recognize(tuple(( - take_while_m_n(2, 2, |c: char| c.is_ascii_digit()), - opt(preceded( - char('.'), - take_while(|c: char| c.is_ascii_digit()), - )), - ))), - |v| { - let value = Decimal::from_str(v)?; - if Decimal::from(0) <= value && value < Decimal::from(60) { - Ok(value) - } else { - Err(XsdParseError { - kind: XsdParseErrorKind::OutOfIntegerRange { - value: value.as_i128() as u8, - min: 0, - max: 60, - }, - }) - } - }, - )(input) +fn second_frag(input: &str) -> Result<(Decimal, &str), XsdParseError> { + let (number_str, input) = decimal_prefix(input); + let (before_dot_str, _) = number_str.split_once('.').unwrap_or((number_str, "")); + if before_dot_str.len() != 2 { + return Err(XsdParseError::msg( + "Seconds must be encoded with two digits", + )); + } + let number = Decimal::from_str(number_str)?; + if number < Decimal::from(0) || number >= Decimal::from(60) { + return Err(XsdParseError::msg("Seconds must be between 00 and 60")); + } + if number_str.ends_with('.') { + return Err(XsdParseError::msg( + "Seconds are not allowed to end with a dot", + )); + } + Ok((number, input)) } -// [62] endOfDayFrag ::= '24:00:00' ('.' '0'+)? -fn end_of_day_frag(input: &str) -> XsdResult<'_, (u8, u8, Decimal)> { - map( - recognize(tuple(( - tag("24:00:00"), - opt(preceded(char('.'), many1(char('0')))), - ))), - |_| (24, 0, 0.into()), - )(input) +// [63] timezoneFrag ::= 'Z' | ('+' | '-') (('0' digit | '1' [0-3]) ':' minuteFrag | '14:00') +fn timezone_frag(input: &str) -> Result<(TimezoneOffset, &str), XsdParseError> { + if let Some(left) = input.strip_prefix('Z') { + return Ok((TimezoneOffset::UTC, left)); + } + let (sign, input) = if let Some(left) = input.strip_prefix('-') { + (-1, left) + } else if let Some(left) = input.strip_prefix('+') { + (1, left) + } else { + (1, input) + }; + + let (hour_str, input) = integer_prefix(input); + if hour_str.len() != 2 { + return Err(XsdParseError::msg( + "The timezone hours must be encoded with two digits", + )); + } + let hours = i16::from_str(hour_str)?; + + let input = expect_char( + input, + ':', + "The timezone hours and minutes must be separated by ':'", + )?; + let (minutes, input) = minute_frag(input)?; + + if hours > 13 && !(hours == 14 && minutes == 0) { + return Err(XsdParseError::msg( + "The timezone hours must be between 00 and 13", + )); + } + + Ok(( + TimezoneOffset::new(sign * (hours * 60 + i16::from(minutes)))?, + input, + )) } -// [63] timezoneFrag ::= 'Z' | ('+' | '-') (('0' digit | '1' [0-3]) ':' minuteFrag | '14:00') -fn timezone_frag(input: &str) -> XsdResult<'_, TimezoneOffset> { - alt(( - map(char('Z'), |_| TimezoneOffset::UTC), - map_res( - tuple(( - alt((map(char('+'), |_| 1), map(char('-'), |_| -1))), - alt(( - map( - tuple(( - map_res(take_while_m_n(2, 2, |c: char| c.is_ascii_digit()), |v| { - parsed_u8_range(v, 0, 13) - }), - char(':'), - minute_frag, - )), - |(hours, _, minutes)| i16::from(hours) * 60 + i16::from(minutes), - ), - map(tag("14:00"), |_| 14 * 60), - )), - )), - |(sign, value)| TimezoneOffset::new(sign * value), - ), - ))(input) -} - -fn parsed_u8_range(input: &str, min: u8, max: u8) -> Result { - let value = u8::from_str(input)?; - if min <= value && value <= max { - Ok(value) +fn ensure_complete( + input: &str, + parse: impl FnOnce(&str) -> Result<(T, &str), XsdParseError>, +) -> Result { + let (result, left) = parse(input)?; + if !left.is_empty() { + return Err(XsdParseError::msg("Unrecognized value suffix")); + } + Ok(result) +} + +fn expect_char<'a>( + input: &'a str, + constant: char, + error_message: &'static str, +) -> Result<&'a str, XsdParseError> { + if let Some(left) = input.strip_prefix(constant) { + Ok(left) } else { - Err(XsdParseError { - kind: XsdParseErrorKind::OutOfIntegerRange { value, min, max }, - }) + Err(XsdParseError::msg(error_message)) } } -fn map_res<'a, O1, O2, E2: Into>( - mut first: impl FnMut(&'a str) -> XsdResult<'a, O1>, - mut second: impl FnMut(O1) -> Result, -) -> impl FnMut(&'a str) -> XsdResult<'a, O2> { - move |input| { - let (input, o1) = first(input)?; - Ok((input, second(o1).map_err(|e| Err::Error(e.into()))?)) +fn integer_prefix(input: &str) -> (&str, &str) { + let mut end = input.len(); + for (i, c) in input.char_indices() { + if !c.is_ascii_digit() { + end = i; + break; + } + } + input.split_at(end) +} + +fn decimal_prefix(input: &str) -> (&str, &str) { + let mut end = input.len(); + let mut dot_seen = false; + for (i, c) in input.char_indices() { + if c.is_ascii_digit() { + // Ok + } else if c == '.' && !dot_seen { + dot_seen = true; + } else { + end = i; + break; + } } + input.split_at(end) +} + +fn optional_end( + input: &str, + parse: impl FnOnce(&str) -> Result<(T, &str), XsdParseError>, +) -> Result<(Option, &str), XsdParseError> { + Ok(if input.is_empty() { + (None, input) + } else { + let (result, input) = parse(input)?; + (Some(result), input) + }) } diff --git a/lib/sparesults/Cargo.toml b/lib/sparesults/Cargo.toml index eff7da14..007004eb 100644 --- a/lib/sparesults/Cargo.toml +++ b/lib/sparesults/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "sparesults" -version = "0.1.7" +version = "0.1.8-dev" authors = ["Tpt "] license = "MIT OR Apache-2.0" readme = "README.md" @@ -19,7 +19,7 @@ rdf-star = ["oxrdf/rdf-star"] [dependencies] json-event-parser = "0.1" -oxrdf = { version = "0.1.5", path="../oxrdf" } +oxrdf = { version = "0.1.6-dev", path="../oxrdf" } quick-xml = "0.28" [package.metadata.docs.rs] diff --git a/lib/sparesults/src/csv.rs b/lib/sparesults/src/csv.rs index 14991fe6..b365c4ac 100644 --- a/lib/sparesults/src/csv.rs +++ b/lib/sparesults/src/csv.rs @@ -160,7 +160,7 @@ fn write_tsv_term<'a>(term: impl Into>, sink: &mut impl Write) -> io let value = literal.value(); if let Some(language) = literal.language() { write_tsv_quoted_str(value, sink)?; - write!(sink, "@{}", language) + write!(sink, "@{language}") } else { match literal.datatype() { xsd::BOOLEAN if is_turtle_boolean(value) => sink.write_all(value.as_bytes()), @@ -216,7 +216,7 @@ fn is_turtle_integer(value: &str) -> bool { } else if let Some(v) = value.strip_prefix(b"-") { value = v; } - !value.is_empty() && value.iter().all(|c| c.is_ascii_digit()) + !value.is_empty() && value.iter().all(u8::is_ascii_digit) } fn is_turtle_decimal(value: &str) -> bool { @@ -227,7 +227,7 @@ fn is_turtle_decimal(value: &str) -> bool { } else if let Some(v) = value.strip_prefix(b"-") { value = v; } - while value.first().map_or(false, |c| c.is_ascii_digit()) { + while value.first().map_or(false, u8::is_ascii_digit) { value = &value[1..]; } if let Some(v) = value.strip_prefix(b".") { @@ -235,7 +235,7 @@ fn is_turtle_decimal(value: &str) -> bool { } else { return false; } - !value.is_empty() && value.iter().all(|c| c.is_ascii_digit()) + !value.is_empty() && value.iter().all(u8::is_ascii_digit) } fn is_turtle_double(value: &str) -> bool { @@ -248,14 +248,14 @@ fn is_turtle_double(value: &str) -> bool { value = v; } let mut with_before = false; - while value.first().map_or(false, |c| c.is_ascii_digit()) { + while value.first().map_or(false, u8::is_ascii_digit) { value = &value[1..]; with_before = true; } let mut with_after = false; if let Some(v) = value.strip_prefix(b".") { value = v; - while value.first().map_or(false, |c| c.is_ascii_digit()) { + while value.first().map_or(false, u8::is_ascii_digit) { value = &value[1..]; with_after = true; } @@ -272,7 +272,7 @@ fn is_turtle_double(value: &str) -> bool { } else if let Some(v) = value.strip_prefix(b"-") { value = v; } - (with_before || with_after) && !value.is_empty() && value.iter().all(|c| c.is_ascii_digit()) + (with_before || with_after) && !value.is_empty() && value.iter().all(u8::is_ascii_digit) } pub enum TsvQueryResultsReader { diff --git a/lib/sparesults/src/lib.rs b/lib/sparesults/src/lib.rs index dc9a3075..b30c17f0 100644 --- a/lib/sparesults/src/lib.rs +++ b/lib/sparesults/src/lib.rs @@ -1,5 +1,4 @@ #![doc = include_str!("../README.md")] -#![deny(unsafe_code)] #![doc(test(attr(deny(warnings))))] #![cfg_attr(docsrs, feature(doc_auto_cfg))] #![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] @@ -276,6 +275,7 @@ pub enum QueryResultsReader { /// } /// # Result::<(),sparesults::ParseError>::Ok(()) /// ``` +#[allow(clippy::rc_buffer)] pub struct SolutionsReader { variables: Rc>, solutions: SolutionsReaderKind, @@ -318,7 +318,7 @@ impl Iterator for SolutionsReader { SolutionsReaderKind::Tsv(reader) => reader.read_next(), } .transpose()? - .map(|values| (self.variables.clone(), values).into()), + .map(|values| (Rc::clone(&self.variables), values).into()), ) } } diff --git a/lib/sparesults/src/solution.rs b/lib/sparesults/src/solution.rs index b1be7c7d..a8059204 100644 --- a/lib/sparesults/src/solution.rs +++ b/lib/sparesults/src/solution.rs @@ -18,6 +18,7 @@ use std::rc::Rc; /// assert_eq!(solution.get("foo"), Some(&Literal::from(1).into())); // Get the value of the variable ?foo if it exists (here yes). /// assert_eq!(solution.get(1), None); // Get the value of the second column if it exists (here no). /// ``` +#[allow(clippy::rc_buffer)] pub struct QuerySolution { variables: Rc>, values: Vec>, @@ -69,7 +70,7 @@ impl QuerySolution { /// ``` #[inline] pub fn is_empty(&self) -> bool { - self.values.iter().all(|v| v.is_none()) + self.values.iter().all(Option::is_none) } /// Returns an iterator over bound variables. diff --git a/lib/sparesults/src/xml.rs b/lib/sparesults/src/xml.rs index fd0ed3b4..d4973fdf 100644 --- a/lib/sparesults/src/xml.rs +++ b/lib/sparesults/src/xml.rs @@ -186,6 +186,7 @@ impl XmlQueryResultsReader { //Read header loop { + buffer.clear(); let event = reader.read_event_into(&mut buffer)?; match event { Event::Start(event) => match state { @@ -275,7 +276,6 @@ impl XmlQueryResultsReader { Event::Eof => return Err(SyntaxError::msg("Unexpected early file end. All results file should have a and a or tag").into()), _ => (), } - buffer.clear(); } } } @@ -315,6 +315,7 @@ impl XmlSolutionsReader { let mut lang = None; let mut datatype = None; loop { + self.buffer.clear(); let event = self.reader.read_event_into(&mut self.buffer)?; match event { Event::Start(event) => match state { @@ -482,20 +483,31 @@ impl XmlSolutionsReader { } state = State::Triple; } - State::Uri => state = self.stack.pop().unwrap(), + State::Uri => { + state = self + .stack + .pop() + .ok_or_else(|| SyntaxError::msg("Empty stack"))? + } State::BNode => { if term.is_none() { //We default to a random bnode term = Some(BlankNode::default().into()) } - state = self.stack.pop().unwrap() + state = self + .stack + .pop() + .ok_or_else(|| SyntaxError::msg("Empty stack"))? } State::Literal => { if term.is_none() { //We default to the empty literal term = Some(build_literal("", lang.take(), datatype.take())?.into()) } - state = self.stack.pop().unwrap(); + state = self + .stack + .pop() + .ok_or_else(|| SyntaxError::msg("Empty stack"))?; } State::Triple => { #[cfg(feature = "rdf-star")] @@ -530,7 +542,10 @@ impl XmlSolutionsReader { ) .into(), ); - state = self.stack.pop().unwrap(); + state = self + .stack + .pop() + .ok_or_else(|| SyntaxError::msg("Empty stack"))?; } else { return Err( SyntaxError::msg("A should contain a , a and an ").into() @@ -549,7 +564,6 @@ impl XmlSolutionsReader { Event::Eof => return Ok(None), _ => (), } - self.buffer.clear(); } } } diff --git a/lib/spargebra/Cargo.toml b/lib/spargebra/Cargo.toml index 101ef8a7..88efe608 100644 --- a/lib/spargebra/Cargo.toml +++ b/lib/spargebra/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spargebra" -version = "0.2.7" +version = "0.2.8-dev" authors = ["Tpt "] license = "MIT OR Apache-2.0" readme = "README.md" @@ -24,7 +24,7 @@ peg = "0.8" rand = "0.8" oxiri = "0.2" oxilangtag = "0.1" -oxrdf = { version = "0.1.5", path="../oxrdf" } +oxrdf = { version = "0.1.6-dev", path="../oxrdf" } [package.metadata.docs.rs] all-features = true diff --git a/lib/spargebra/src/lib.rs b/lib/spargebra/src/lib.rs index dc0e2aa7..e2d093f3 100644 --- a/lib/spargebra/src/lib.rs +++ b/lib/spargebra/src/lib.rs @@ -1,5 +1,4 @@ #![doc = include_str!("../README.md")] -#![deny(unsafe_code)] #![doc(test(attr(deny(warnings))))] #![cfg_attr(docsrs, feature(doc_auto_cfg))] #![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] diff --git a/lib/spargebra/src/parser.rs b/lib/spargebra/src/parser.rs index 689b95ce..7779e31c 100644 --- a/lib/spargebra/src/parser.rs +++ b/lib/spargebra/src/parser.rs @@ -352,7 +352,7 @@ impl> From> for FocusedTripleOrPathPattern fn from(input: FocusedTriplePattern) -> Self { Self { focus: input.focus.into(), - patterns: input.patterns.into_iter().map(|p| p.into()).collect(), + patterns: input.patterns.into_iter().map(Into::into).collect(), } } } @@ -736,7 +736,7 @@ impl ParserState { let aggregates = self.aggregates.last_mut().ok_or("Unexpected aggregate")?; Ok(aggregates .iter() - .find_map(|(v, a)| if a == &agg { Some(v) } else { None }) + .find_map(|(v, a)| (a == &agg).then(|| v)) .cloned() .unwrap_or_else(|| { let new_var = variable(); @@ -884,13 +884,14 @@ impl<'a> Iterator for UnescapeCharsIterator<'a> { } match self.iter.next()? { '\\' => match self.iter.next() { - Some(ch) => match self.replacement.get(ch) { - Some(replace) => Some(replace), - None => { + Some(ch) => { + if let Some(replace) = self.replacement.get(ch) { + Some(replace) + } else { self.buffer = Some(ch); Some('\\') } - }, + } None => Some('\\'), }, c => Some(c), @@ -957,31 +958,24 @@ fn variable() -> Variable { parser! { //See https://www.w3.org/TR/turtle/#sec-grammar grammar parser(state: &mut ParserState) for str { - //[1] pub rule QueryUnit() -> Query = Query() - //[2] rule Query() -> Query = _ Prologue() _ q:(SelectQuery() / ConstructQuery() / DescribeQuery() / AskQuery()) _ { q } - //[3] pub rule UpdateInit() -> Vec = Update() - //[4] rule Prologue() = (BaseDecl() _ / PrefixDecl() _)* {} - //[5] rule BaseDecl() = i("BASE") _ i:IRIREF() { state.base_iri = Some(i) } - //[6] rule PrefixDecl() = i("PREFIX") _ ns:PNAME_NS() _ i:IRIREF() { state.namespaces.insert(ns.into(), i.into_inner()); } - //[7] rule SelectQuery() -> Query = s:SelectClause() _ d:DatasetClauses() _ w:WhereClause() _ g:GroupClause()? _ h:HavingClause()? _ o:OrderClause()? _ l:LimitOffsetClauses()? _ v:ValuesClause() {? Ok(Query::Select { dataset: d, @@ -990,12 +984,10 @@ parser! { }) } - //[8] rule SubSelect() -> GraphPattern = s:SelectClause() _ w:WhereClause() _ g:GroupClause()? _ h:HavingClause()? _ o:OrderClause()? _ l:LimitOffsetClauses()? _ v:ValuesClause() {? build_select(s, w, g, h, o, l, v, state) } - //[9] rule SelectClause() -> Selection = i("SELECT") _ Selection_init() o:SelectClause_option() _ v:SelectClause_variables() { Selection { option: o, @@ -1016,7 +1008,6 @@ parser! { v:Var() _ { SelectionMember::Variable(v) } / "(" _ e:Expression() _ i("AS") _ v:Var() _ ")" _ { SelectionMember::Expression(e, v) } - //[10] rule ConstructQuery() -> Query = i("CONSTRUCT") _ c:ConstructTemplate() _ d:DatasetClauses() _ w:WhereClause() _ g:GroupClause()? _ h:HavingClause()? _ o:OrderClause()? _ l:LimitOffsetClauses()? _ v:ValuesClause() {? Ok(Query::Construct { @@ -1041,7 +1032,6 @@ parser! { rule ConstructQuery_optional_triple_template() -> Vec = TriplesTemplate() / { Vec::new() } - //[11] rule DescribeQuery() -> Query = i("DESCRIBE") _ "*" _ d:DatasetClauses() w:WhereClause()? _ g:GroupClause()? _ h:HavingClause()? _ o:OrderClause()? _ l:LimitOffsetClauses()? _ v:ValuesClause() {? Ok(Query::Describe { @@ -1065,7 +1055,6 @@ parser! { } rule DescribeQuery_item() -> NamedNodePattern = i:VarOrIri() _ { i } - //[12] rule AskQuery() -> Query = i("ASK") _ d:DatasetClauses() w:WhereClause() _ g:GroupClause()? _ h:HavingClause()? _ o:OrderClause()? _ l:LimitOffsetClauses()? _ v:ValuesClause() {? Ok(Query::Ask { dataset: d, @@ -1074,7 +1063,6 @@ parser! { }) } - //[13] rule DatasetClause() -> (Option, Option) = i("FROM") _ d:(DefaultGraphClause() / NamedGraphClause()) { d } rule DatasetClauses() -> Option = d:DatasetClause() ** (_) { if d.is_empty() { @@ -1095,25 +1083,20 @@ parser! { }) } - //[14] rule DefaultGraphClause() -> (Option, Option) = s:SourceSelector() { (Some(s), None) } - //[15] rule NamedGraphClause() -> (Option, Option) = i("NAMED") _ s:SourceSelector() { (None, Some(s)) } - //[16] rule SourceSelector() -> NamedNode = iri() - //[17] rule WhereClause() -> GraphPattern = i("WHERE")? _ p:GroupGraphPattern() { p } - //[19] rule GroupClause() -> (Vec, Vec<(Expression,Variable)>) = i("GROUP") _ i("BY") _ c:GroupCondition_item()+ { let mut projections: Vec<(Expression,Variable)> = Vec::new(); let clauses = c.into_iter().map(|(e, vo)| { @@ -1129,7 +1112,6 @@ parser! { } rule GroupCondition_item() -> (Expression, Option) = c:GroupCondition() _ { c } - //[20] rule GroupCondition() -> (Expression, Option) = e:BuiltInCall() { (e, None) } / e:FunctionCall() { (e, None) } / @@ -1137,75 +1119,59 @@ parser! { e:Var() { (e.into(), None) } rule GroupCondition_as() -> Variable = i("AS") _ v:Var() _ { v } - //[21] rule HavingClause() -> Expression = i("HAVING") _ e:HavingCondition()+ {? not_empty_fold(e.into_iter(), |a, b| Expression::And(Box::new(a), Box::new(b))) } - //[22] rule HavingCondition() -> Expression = Constraint() - //[23] rule OrderClause() -> Vec = i("ORDER") _ i("BY") _ c:OrderClause_item()+ { c } rule OrderClause_item() -> OrderExpression = c:OrderCondition() _ { c } - //[24] rule OrderCondition() -> OrderExpression = i("ASC") _ e: BrackettedExpression() { OrderExpression::Asc(e) } / i("DESC") _ e: BrackettedExpression() { OrderExpression::Desc(e) } / e: Constraint() { OrderExpression::Asc(e) } / v: Var() { OrderExpression::Asc(Expression::from(v)) } - //[25] rule LimitOffsetClauses() -> (usize, Option) = l:LimitClause() _ o:OffsetClause()? { (o.unwrap_or(0), Some(l)) } / o:OffsetClause() _ l:LimitClause()? { (o, l) } - //[26] rule LimitClause() -> usize = i("LIMIT") _ l:$(INTEGER()) {? usize::from_str(l).map_err(|_| "The query limit should be a non negative integer") } - //[27] rule OffsetClause() -> usize = i("OFFSET") _ o:$(INTEGER()) {? usize::from_str(o).map_err(|_| "The query offset should be a non negative integer") } - //[28] rule ValuesClause() -> Option = i("VALUES") _ p:DataBlock() { Some(p) } / { None } - - //[29] rule Update() -> Vec = _ Prologue() _ u:(Update1() ** (_ ";" _)) _ ( ";" _)? { u.into_iter().flatten().collect() } - //[30] rule Update1() -> Vec = Load() / Clear() / Drop() / Add() / Move() / Copy() / Create() / InsertData() / DeleteData() / DeleteWhere() / Modify() rule Update1_silent() -> bool = i("SILENT") { true } / { false } - //[31] rule Load() -> Vec = i("LOAD") _ silent:Update1_silent() _ source:iri() _ destination:Load_to()? { vec![GraphUpdateOperation::Load { silent, source, destination: destination.map_or(GraphName::DefaultGraph, GraphName::NamedNode) }] } rule Load_to() -> NamedNode = i("INTO") _ g: GraphRef() { g } - //[32] rule Clear() -> Vec = i("CLEAR") _ silent:Update1_silent() _ graph:GraphRefAll() { vec![GraphUpdateOperation::Clear { silent, graph }] } - //[33] rule Drop() -> Vec = i("DROP") _ silent:Update1_silent() _ graph:GraphRefAll() { vec![GraphUpdateOperation::Drop { silent, graph }] } - //[34] rule Create() -> Vec = i("CREATE") _ silent:Update1_silent() _ graph:GraphRef() { vec![GraphUpdateOperation::Create { silent, graph }] } - //[35] rule Add() -> Vec = i("ADD") _ silent:Update1_silent() _ from:GraphOrDefault() _ i("TO") _ to:GraphOrDefault() { // Rewriting defined by https://www.w3.org/TR/sparql11-update/#add if from == to { @@ -1216,7 +1182,6 @@ parser! { } } - //[36] rule Move() -> Vec = i("MOVE") _ silent:Update1_silent() _ from:GraphOrDefault() _ i("TO") _ to:GraphOrDefault() { // Rewriting defined by https://www.w3.org/TR/sparql11-update/#move if from == to { @@ -1227,7 +1192,6 @@ parser! { } } - //[37] rule Copy() -> Vec = i("COPY") _ silent:Update1_silent() _ from:GraphOrDefault() _ i("TO") _ to:GraphOrDefault() { // Rewriting defined by https://www.w3.org/TR/sparql11-update/#copy if from == to { @@ -1238,17 +1202,14 @@ parser! { } } - //[38] rule InsertData() -> Vec = i("INSERT") _ i("DATA") _ data:QuadData() { vec![GraphUpdateOperation::InsertData { data }] } - //[39] rule DeleteData() -> Vec = i("DELETE") _ i("DATA") _ data:GroundQuadData() { vec![GraphUpdateOperation::DeleteData { data }] } - //[40] rule DeleteWhere() -> Vec = i("DELETE") _ i("WHERE") _ d:QuadPattern() {? let pattern = d.iter().map(|q| { let bgp = GraphPattern::Bgp { patterns: vec![TriplePattern::new(q.subject.clone(), q.predicate.clone(), q.object.clone())] }; @@ -1267,7 +1228,6 @@ parser! { }]) } - //[41] rule Modify() -> Vec = with:Modify_with()? _ Modify_clear() c:Modify_clauses() _ u:(UsingClause() ** (_)) _ i("WHERE") _ pattern:GroupGraphPattern() { let (delete, insert) = c; let mut delete = delete.unwrap_or_default(); @@ -1335,15 +1295,12 @@ parser! { state.currently_used_bnodes.clear(); } - //[42] rule DeleteClause() -> Vec = i("DELETE") _ q:QuadPattern() {? q.into_iter().map(GroundQuadPattern::try_from).collect::,_>>().map_err(|_| "Blank nodes are not allowed in DELETE WHERE") } - //[43] rule InsertClause() -> Vec = i("INSERT") _ q:QuadPattern() { q } - //[44] rule UsingClause() -> (Option, Option) = i("USING") _ d:(UsingClause_default() / UsingClause_named()) { d } rule UsingClause_default() -> (Option, Option) = i:iri() { (Some(i), None) @@ -1352,26 +1309,21 @@ parser! { (None, Some(i)) } - //[45] rule GraphOrDefault() -> GraphName = i("DEFAULT") { GraphName::DefaultGraph } / (i("GRAPH") _)? g:iri() { GraphName::NamedNode(g) } - //[46] rule GraphRef() -> NamedNode = i("GRAPH") _ g:iri() { g } - //[47] rule GraphRefAll() -> GraphTarget = i: GraphRef() { i.into() } / i("DEFAULT") { GraphTarget::DefaultGraph } / i("NAMED") { GraphTarget::NamedGraphs } / i("ALL") { GraphTarget::AllGraphs } - //[48] rule QuadPattern() -> Vec = "{" _ q:Quads() _ "}" { q } - //[49] rule QuadData() -> Vec = "{" _ q:Quads() _ "}" {? q.into_iter().map(Quad::try_from).collect::, ()>>().map_err(|_| "Variables are not allowed in INSERT DATA") } @@ -1379,7 +1331,6 @@ parser! { q.into_iter().map(|q| GroundQuad::try_from(Quad::try_from(q)?)).collect::, ()>>().map_err(|_| "Variables and blank nodes are not allowed in DELETE DATA") } - //[50] rule Quads() -> Vec = q:(Quads_TriplesTemplate() / Quads_QuadsNotTriples()) ** (_) { q.into_iter().flatten().collect() } @@ -1388,18 +1339,15 @@ parser! { } //TODO: return iter? rule Quads_QuadsNotTriples() -> Vec = q:QuadsNotTriples() _ "."? { q } - //[51] rule QuadsNotTriples() -> Vec = i("GRAPH") _ g:VarOrIri() _ "{" _ t:TriplesTemplate()? _ "}" { t.unwrap_or_default().into_iter().map(|t| QuadPattern::new(t.subject, t.predicate, t.object, g.clone())).collect() } - //[52] rule TriplesTemplate() -> Vec = ts:TriplesTemplate_inner() ++ (".") ("." _)? { ts.into_iter().flatten().collect() } rule TriplesTemplate_inner() -> Vec = _ t:TriplesSameSubject() _ { t } - //[53] rule GroupGraphPattern() -> GraphPattern = "{" _ GroupGraphPattern_clear() p:GroupGraphPatternSub() GroupGraphPattern_clear() _ "}" { p } / "{" _ GroupGraphPattern_clear() p:SubSelect() GroupGraphPattern_clear() _ "}" { p } @@ -1409,7 +1357,6 @@ parser! { state.currently_used_bnodes.clear(); } - //[54] rule GroupGraphPatternSub() -> GraphPattern = a:TriplesBlock()? _ b:GroupGraphPatternSub_item()* {? let mut filter: Option = None; let mut g = a.map_or_else(GraphPattern::default, build_bgp); @@ -1471,16 +1418,13 @@ parser! { result } - //[55] rule TriplesBlock() -> Vec = hs:TriplesBlock_inner() ++ (".") ("." _)? { hs.into_iter().flatten().collect() } rule TriplesBlock_inner() -> Vec = _ h:TriplesSameSubjectPath() _ { h } - //[56] rule GraphPatternNotTriples() -> PartialGraphPattern = GroupOrUnionGraphPattern() / OptionalGraphPattern() / LateralGraphPattern() / MinusGraphPattern() / GraphGraphPattern() / ServiceGraphPattern() / Filter() / Bind() / InlineData() - //[57] rule OptionalGraphPattern() -> PartialGraphPattern = i("OPTIONAL") _ p:GroupGraphPattern() { if let GraphPattern::Filter { expr, inner } = p { PartialGraphPattern::Optional(*inner, Some(expr)) @@ -1494,36 +1438,29 @@ parser! { #[cfg(not(feature = "sep-0006"))]{Err("The LATERAL modifier is not supported")} } - //[58] rule GraphGraphPattern() -> PartialGraphPattern = i("GRAPH") _ name:VarOrIri() _ p:GroupGraphPattern() { PartialGraphPattern::Other(GraphPattern::Graph { name, inner: Box::new(p) }) } - //[59] rule ServiceGraphPattern() -> PartialGraphPattern = i("SERVICE") _ i("SILENT") _ name:VarOrIri() _ p:GroupGraphPattern() { PartialGraphPattern::Other(GraphPattern::Service { name, inner: Box::new(p), silent: true }) } / i("SERVICE") _ name:VarOrIri() _ p:GroupGraphPattern() { PartialGraphPattern::Other(GraphPattern::Service{ name, inner: Box::new(p), silent: false }) } - //[60] rule Bind() -> PartialGraphPattern = i("BIND") _ "(" _ e:Expression() _ i("AS") _ v:Var() _ ")" { PartialGraphPattern::Bind(e, v) } - //[61] rule InlineData() -> PartialGraphPattern = i("VALUES") _ p:DataBlock() { PartialGraphPattern::Other(p) } - //[62] rule DataBlock() -> GraphPattern = l:(InlineDataOneVar() / InlineDataFull()) { GraphPattern::Values { variables: l.0, bindings: l.1 } } - //[63] rule InlineDataOneVar() -> (Vec, Vec>>) = var:Var() _ "{" _ d:InlineDataOneVar_value()* "}" { (vec![var], d) } rule InlineDataOneVar_value() -> Vec> = t:DataBlockValue() _ { vec![t] } - //[64] rule InlineDataFull() -> (Vec, Vec>>) = "(" _ vars:InlineDataFull_var()* _ ")" _ "{" _ vals:InlineDataFull_values()* "}" {? if vals.iter().all(|vs| vs.len() == vars.len()) { Ok((vars, vals)) @@ -1535,9 +1472,8 @@ parser! { rule InlineDataFull_values() -> Vec> = "(" _ v:InlineDataFull_value()* _ ")" _ { v } rule InlineDataFull_value() -> Option = v:DataBlockValue() _ { v } - //[65] rule DataBlockValue() -> Option = - t:EmbTriple() {? + t:QuotedTripleData() {? #[cfg(feature = "rdf-star")]{Ok(Some(t.into()))} #[cfg(not(feature = "rdf-star"))]{Err("Embedded triples are only available in SPARQL-star")} } / @@ -1547,12 +1483,10 @@ parser! { l:BooleanLiteral() { Some(l.into()) } / i("UNDEF") { None } - //[66] rule MinusGraphPattern() -> PartialGraphPattern = i("MINUS") _ p: GroupGraphPattern() { PartialGraphPattern::Minus(p) } - //[67] rule GroupOrUnionGraphPattern() -> PartialGraphPattern = p:GroupOrUnionGraphPattern_item() **<1,> (i("UNION") _) {? not_empty_fold(p.into_iter(), |a, b| { GraphPattern::Union { left: Box::new(a), right: Box::new(b) } @@ -1560,43 +1494,35 @@ parser! { } rule GroupOrUnionGraphPattern_item() -> GraphPattern = p:GroupGraphPattern() _ { p } - //[68] rule Filter() -> PartialGraphPattern = i("FILTER") _ c:Constraint() { PartialGraphPattern::Filter(c) } - //[69] rule Constraint() -> Expression = BrackettedExpression() / FunctionCall() / BuiltInCall() - //[70] rule FunctionCall() -> Expression = f: iri() _ a: ArgList() { Expression::FunctionCall(Function::Custom(f), a) } - //[71] rule ArgList() -> Vec = "(" _ e:ArgList_item() **<1,> ("," _) _ ")" { e } / NIL() { Vec::new() } rule ArgList_item() -> Expression = e:Expression() _ { e } - //[72] rule ExpressionList() -> Vec = "(" _ e:ExpressionList_item() **<1,> ("," _) ")" { e } / NIL() { Vec::new() } rule ExpressionList_item() -> Expression = e:Expression() _ { e } - //[73] rule ConstructTemplate() -> Vec = "{" _ t:ConstructTriples() _ "}" { t } - //[74] rule ConstructTriples() -> Vec = p:ConstructTriples_item() ** ("." _) "."? { - p.into_iter().flat_map(|c| c.into_iter()).collect() + p.into_iter().flatten().collect() } rule ConstructTriples_item() -> Vec = t:TriplesSameSubject() _ { t } - //[75] rule TriplesSameSubject() -> Vec = - s:VarOrTermOrEmbTP() _ po:PropertyListNotEmpty() {? + s:VarOrTerm() _ po:PropertyListNotEmpty() {? let mut patterns = po.patterns; for (p, os) in po.focus { for o in os { @@ -1616,12 +1542,10 @@ parser! { Ok(patterns) } - //[76] rule PropertyList() -> FocusedTriplePattern)>> = PropertyListNotEmpty() / { FocusedTriplePattern::default() } - //[77] rule PropertyListNotEmpty() -> FocusedTriplePattern)>> = l:PropertyListNotEmpty_item() **<1,> (";" _) { l.into_iter().fold(FocusedTriplePattern::)>>::default(), |mut a, b| { a.focus.push(b.focus); @@ -1636,10 +1560,8 @@ parser! { } } - //[78] rule Verb() -> NamedNodePattern = VarOrIri() / "a" { rdf::TYPE.into_owned().into() } - //[79] rule ObjectList() -> FocusedTriplePattern> = o:ObjectList_item() **<1,> ("," _) { o.into_iter().fold(FocusedTriplePattern::>::default(), |mut a, b| { a.focus.push(b.focus); @@ -1649,8 +1571,7 @@ parser! { } rule ObjectList_item() -> FocusedTriplePattern = o:Object() _ { o } - //[80] - rule Object() -> FocusedTriplePattern = g:GraphNode() _ a:AnnotationPattern()? { + rule Object() -> FocusedTriplePattern = g:GraphNode() _ a:Annotation()? { if let Some(a) = a { let mut patterns = g.patterns; patterns.extend(a.patterns); @@ -1672,9 +1593,8 @@ parser! { } } - //[81] rule TriplesSameSubjectPath() -> Vec = - s:VarOrTermOrEmbTP() _ po:PropertyListPathNotEmpty() {? + s:VarOrTerm() _ po:PropertyListPathNotEmpty() {? let mut patterns = po.patterns; for (p, os) in po.focus { for o in os { @@ -1694,14 +1614,12 @@ parser! { Ok(patterns) } - //[82] rule PropertyListPath() -> FocusedTripleOrPathPattern)>> = PropertyListPathNotEmpty() / { FocusedTripleOrPathPattern::default() } - //[83] rule PropertyListPathNotEmpty() -> FocusedTripleOrPathPattern)>> = hp:(VerbPath() / VerbSimple()) _ ho:ObjectListPath() _ t:PropertyListPathNotEmpty_item()* { - t.into_iter().flat_map(|e| e.into_iter()).fold(FocusedTripleOrPathPattern { + t.into_iter().flatten().fold(FocusedTripleOrPathPattern { focus: vec![(hp, ho.focus)], patterns: ho.patterns }, |mut a, b| { @@ -1720,17 +1638,14 @@ parser! { } } - //[84] rule VerbPath() -> VariableOrPropertyPath = p:Path() { p.into() } - //[85] rule VerbSimple() -> VariableOrPropertyPath = v:Var() { v.into() } - //[86] rule ObjectListPath() -> FocusedTripleOrPathPattern> = o:ObjectListPath_item() **<1,> ("," _) { o.into_iter().fold(FocusedTripleOrPathPattern::>::default(), |mut a, b| { a.focus.push(b.focus); @@ -1740,8 +1655,7 @@ parser! { } rule ObjectListPath_item() -> FocusedTripleOrPathPattern = o:ObjectPath() _ { o } - //[87] - rule ObjectPath() -> FocusedTripleOrPathPattern = g:GraphNodePath() _ a:AnnotationPatternPath()? { + rule ObjectPath() -> FocusedTripleOrPathPattern = g:GraphNodePath() _ a:AnnotationPath()? { if let Some(a) = a { let mut patterns = g.patterns; patterns.extend(a.patterns); @@ -1763,10 +1677,8 @@ parser! { } } - //[88] rule Path() -> PropertyPathExpression = PathAlternative() - //[89] rule PathAlternative() -> PropertyPathExpression = p:PathAlternative_item() **<1,> ("|" _) {? not_empty_fold(p.into_iter(), |a, b| { PropertyPathExpression::Alternative(Box::new(a), Box::new(b)) @@ -1774,7 +1686,6 @@ parser! { } rule PathAlternative_item() -> PropertyPathExpression = p:PathSequence() _ { p } - //[90] rule PathSequence() -> PropertyPathExpression = p:PathSequence_item() **<1,> ("/" _) {? not_empty_fold(p.into_iter(), |a, b| { PropertyPathExpression::Sequence(Box::new(a), Box::new(b)) @@ -1782,7 +1693,6 @@ parser! { } rule PathSequence_item() -> PropertyPathExpression = p:PathEltOrInverse() _ { p } - //[91] rule PathElt() -> PropertyPathExpression = p:PathPrimary() _ o:PathElt_op()? { match o { Some('?') => PropertyPathExpression::ZeroOrOne(Box::new(p)), @@ -1797,19 +1707,16 @@ parser! { "+" { '+' } / "?" !(['0'..='9'] / PN_CHARS_U()) { '?' } // We mandate that this is not a variable - //[92] rule PathEltOrInverse() -> PropertyPathExpression = "^" _ p:PathElt() { PropertyPathExpression::Reverse(Box::new(p)) } / PathElt() - //[94] rule PathPrimary() -> PropertyPathExpression = v:iri() { v.into() } / "a" { rdf::TYPE.into_owned().into() } / "!" _ p:PathNegatedPropertySet() { p } / "(" _ p:Path() _ ")" { p } - //[95] rule PathNegatedPropertySet() -> PropertyPathExpression = "(" _ p:PathNegatedPropertySet_item() **<1,> ("|" _) ")" { let mut direct = Vec::new(); @@ -1839,17 +1746,14 @@ parser! { } rule PathNegatedPropertySet_item() -> Either = p:PathOneInPropertySet() _ { p } - //[96] rule PathOneInPropertySet() -> Either = "^" _ v:iri() { Either::Right(v) } / "^" _ "a" { Either::Right(rdf::TYPE.into()) } / v:iri() { Either::Left(v) } / "a" { Either::Left(rdf::TYPE.into()) } - //[98] rule TriplesNode() -> FocusedTriplePattern = Collection() / BlankNodePropertyList() - //[99] rule BlankNodePropertyList() -> FocusedTriplePattern = "[" _ po:PropertyListNotEmpty() _ "]" {? let mut patterns = po.patterns; let mut bnode = TermPattern::from(BlankNode::default()); @@ -1864,10 +1768,8 @@ parser! { }) } - //[100] rule TriplesNodePath() -> FocusedTripleOrPathPattern = CollectionPath() / BlankNodePropertyListPath() - //[101] rule BlankNodePropertyListPath() -> FocusedTripleOrPathPattern = "[" _ po:PropertyListPathNotEmpty() _ "]" {? let mut patterns = po.patterns; let mut bnode = TermPattern::from(BlankNode::default()); @@ -1882,7 +1784,6 @@ parser! { }) } - //[102] rule Collection() -> FocusedTriplePattern = "(" _ o:Collection_item()+ ")" { let mut patterns: Vec = Vec::new(); let mut current_list_node = TermPattern::from(rdf::NIL.into_owned()); @@ -1900,7 +1801,6 @@ parser! { } rule Collection_item() -> FocusedTriplePattern = o:GraphNode() _ { o } - //[103] rule CollectionPath() -> FocusedTripleOrPathPattern = "(" _ o:CollectionPath_item()+ _ ")" { let mut patterns: Vec = Vec::new(); let mut current_list_node = TermPattern::from(rdf::NIL.into_owned()); @@ -1918,30 +1818,59 @@ parser! { } rule CollectionPath_item() -> FocusedTripleOrPathPattern = p:GraphNodePath() _ { p } - //[104] + + rule Annotation() -> FocusedTriplePattern)>> = "{|" _ a:PropertyListNotEmpty() _ "|}" { a } + + rule AnnotationPath() -> FocusedTripleOrPathPattern)>> = "{|" _ a: PropertyListPathNotEmpty() _ "|}" { a } + rule GraphNode() -> FocusedTriplePattern = - t:VarOrTermOrEmbTP() { FocusedTriplePattern::new(t) } / + t:VarOrTerm() { FocusedTriplePattern::new(t) } / TriplesNode() - //[105] rule GraphNodePath() -> FocusedTripleOrPathPattern = - t:VarOrTermOrEmbTP() { FocusedTripleOrPathPattern::new(t) } / + t:VarOrTerm() { FocusedTripleOrPathPattern::new(t) } / TriplesNodePath() - //[106] rule VarOrTerm() -> TermPattern = v:Var() { v.into() } / + t:QuotedTriple() {? + #[cfg(feature = "rdf-star")]{Ok(t.into())} + #[cfg(not(feature = "rdf-star"))]{Err("Embedded triples are only available in SPARQL-star")} + } / t:GraphTerm() { t.into() } - //[107] + rule QuotedTriple() -> TriplePattern = "<<" _ s:VarOrTerm() _ p:Verb() _ o:VarOrTerm() _ ">>" {? + Ok(TriplePattern { + subject: s, + predicate: p, + object: o + }) + } + + rule QuotedTripleData() -> GroundTriple = "<<" _ s:DataValueTerm() _ p:QuotedTripleData_p() _ o:DataValueTerm() _ ">>" {? + Ok(GroundTriple { + subject: s.try_into().map_err(|_| "Literals are not allowed in subject position of nested patterns")?, + predicate: p, + object: o + }) + } + rule QuotedTripleData_p() -> NamedNode = i: iri() { i } / "a" { rdf::TYPE.into() } + + rule DataValueTerm() -> GroundTerm = i:iri() { i.into() } / + l:RDFLiteral() { l.into() } / + l:NumericLiteral() { l.into() } / + l:BooleanLiteral() { l.into() } / + t:QuotedTripleData() {? + #[cfg(feature = "rdf-star")]{Ok(t.into())} + #[cfg(not(feature = "rdf-star"))]{Err("Embedded triples are only available in SPARQL-star")} + } + rule VarOrIri() -> NamedNodePattern = v:Var() { v.into() } / i:iri() { i.into() } - //[108] rule Var() -> Variable = name:(VAR1() / VAR2()) { Variable::new_unchecked(name) } - //[109] rule GraphTerm() -> Term = i:iri() { i.into() } / l:RDFLiteral() { l.into() } / @@ -1950,25 +1879,20 @@ parser! { b:BlankNode() { b.into() } / NIL() { rdf::NIL.into_owned().into() } - //[110] rule Expression() -> Expression = e:ConditionalOrExpression() {e} - //[111] rule ConditionalOrExpression() -> Expression = e:ConditionalOrExpression_item() **<1,> ("||" _) {? not_empty_fold(e.into_iter(), |a, b| Expression::Or(Box::new(a), Box::new(b))) } rule ConditionalOrExpression_item() -> Expression = e:ConditionalAndExpression() _ { e } - //[112] rule ConditionalAndExpression() -> Expression = e:ConditionalAndExpression_item() **<1,> ("&&" _) {? not_empty_fold(e.into_iter(), |a, b| Expression::And(Box::new(a), Box::new(b))) } rule ConditionalAndExpression_item() -> Expression = e:ValueLogical() _ { e } - //[113] rule ValueLogical() -> Expression = RelationalExpression() - //[114] rule RelationalExpression() -> Expression = a:NumericExpression() _ o: RelationalExpression_inner()? { match o { Some(("=", Some(b), None)) => Expression::Equal(Box::new(a), Box::new(b)), Some(("!=", Some(b), None)) => Expression::Not(Box::new(Expression::Equal(Box::new(a), Box::new(b)))), @@ -1986,10 +1910,8 @@ parser! { i("IN") _ l:ExpressionList() { ("IN", None, Some(l)) } / i("NOT") _ i("IN") _ l:ExpressionList() { ("NOT IN", None, Some(l)) } - //[115] rule NumericExpression() -> Expression = AdditiveExpression() - //[116] rule AdditiveExpression() -> Expression = a:MultiplicativeExpression() _ o:AdditiveExpression_inner()? { match o { Some(("+", b)) => Expression::Add(Box::new(a), Box::new(b)), Some(("-", b)) => Expression::Subtract(Box::new(a), Box::new(b)), @@ -2000,7 +1922,6 @@ parser! { (s, e) } - //[117] rule MultiplicativeExpression() -> Expression = a:UnaryExpression() _ o: MultiplicativeExpression_inner()? { match o { Some(("*", b)) => Expression::Multiply(Box::new(a), Box::new(b)), Some(("/", b)) => Expression::Divide(Box::new(a), Box::new(b)), @@ -2011,7 +1932,6 @@ parser! { (s, e) } - //[118] rule UnaryExpression() -> Expression = s: $("!" / "+" / "-")? _ e:PrimaryExpression() { match s { Some("!") => Expression::Not(Box::new(e)), Some("+") => Expression::UnaryPlus(Box::new(e)), @@ -2020,10 +1940,9 @@ parser! { None => e, } } - //[119] rule PrimaryExpression() -> Expression = BrackettedExpression() / - ExprEmbTP() / + ExprQuotedTriple() / iriOrFunction() / v:Var() { v.into() } / l:RDFLiteral() { l.into() } / @@ -2031,12 +1950,23 @@ parser! { l:BooleanLiteral() { l.into() } / BuiltInCall() - //[120] + rule ExprVarOrTerm() -> Expression = + ExprQuotedTriple() / + i:iri() { i.into() } / + l:RDFLiteral() { l.into() } / + l:NumericLiteral() { l.into() } / + l:BooleanLiteral() { l.into() } / + v:Var() { v.into() } + + rule ExprQuotedTriple() -> Expression = "<<" _ s:ExprVarOrTerm() _ p:Verb() _ o:ExprVarOrTerm() _ ">>" {? + #[cfg(feature = "rdf-star")]{Ok(Expression::FunctionCall(Function::Triple, vec![s, p.into(), o]))} + #[cfg(not(feature = "rdf-star"))]{Err("Embedded triples are only available in SPARQL-star")} + } + rule BrackettedExpression() -> Expression = "(" _ e:Expression() _ ")" { e } - //[121] rule BuiltInCall() -> Expression = - a:Aggregate() {? state.new_aggregation(a).map(|v| v.into()) } / + a:Aggregate() {? state.new_aggregation(a).map(Into::into) } / i("STR") _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Str, vec![e]) } / i("LANG") _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Lang, vec![e]) } / i("LANGMATCHES") _ "(" _ a:Expression() _ "," _ b:Expression() _ ")" { Expression::FunctionCall(Function::LangMatches, vec![a, b]) } / @@ -2115,7 +2045,6 @@ parser! { #[cfg(not(feature = "sep-0002"))]{Err("The ADJUST function is only available in SPARQL 1.2 SEP 0002")} } - //[122] rule RegexExpression() -> Expression = i("REGEX") _ "(" _ a:Expression() _ "," _ b:Expression() _ "," _ c:Expression() _ ")" { Expression::FunctionCall(Function::Regex, vec![a, b, c]) } / i("REGEX") _ "(" _ a:Expression() _ "," _ b:Expression() _ ")" { Expression::FunctionCall(Function::Regex, vec![a, b]) } @@ -2126,18 +2055,14 @@ parser! { i("SUBSTR") _ "(" _ a:Expression() _ "," _ b:Expression() _ ")" { Expression::FunctionCall(Function::SubStr, vec![a, b]) } - //[124] rule StrReplaceExpression() -> Expression = i("REPLACE") _ "(" _ a:Expression() _ "," _ b:Expression() _ "," _ c:Expression() _ "," _ d:Expression() _ ")" { Expression::FunctionCall(Function::Replace, vec![a, b, c, d]) } / i("REPLACE") _ "(" _ a:Expression() _ "," _ b:Expression() _ "," _ c:Expression() _ ")" { Expression::FunctionCall(Function::Replace, vec![a, b, c]) } - //[125] rule ExistsFunc() -> Expression = i("EXISTS") _ p:GroupGraphPattern() { Expression::Exists(Box::new(p)) } - //[126] rule NotExistsFunc() -> Expression = i("NOT") _ i("EXISTS") _ p:GroupGraphPattern() { Expression::Not(Box::new(Expression::Exists(Box::new(p)))) } - //[127] rule Aggregate() -> AggregateExpression = i("COUNT") _ "(" _ i("DISTINCT") _ "*" _ ")" { AggregateExpression::Count { expr: None, distinct: true } } / i("COUNT") _ "(" _ i("DISTINCT") _ e:Expression() _ ")" { AggregateExpression::Count { expr: Some(Box::new(e)), distinct: true } } / @@ -2160,7 +2085,6 @@ parser! { name:iri() _ "(" _ i("DISTINCT") _ e:Expression() _ ")" { AggregateExpression::Custom { name, expr: Box::new(e), distinct: true } } / name:iri() _ "(" _ e:Expression() _ ")" { AggregateExpression::Custom { name, expr: Box::new(e), distinct: false } } - //[128] rule iriOrFunction() -> Expression = i: iri() _ a: ArgList()? { match a { Some(a) => Expression::FunctionCall(Function::Custom(i), a), @@ -2168,48 +2092,39 @@ parser! { } } - //[129] rule RDFLiteral() -> Literal = value:String() _ "^^" _ datatype:iri() { Literal::new_typed_literal(value, datatype) } / value:String() _ language:LANGTAG() { Literal::new_language_tagged_literal_unchecked(value, language.into_inner()) } / value:String() { Literal::new_simple_literal(value) } - //[130] rule NumericLiteral() -> Literal = NumericLiteralUnsigned() / NumericLiteralPositive() / NumericLiteralNegative() - //[131] rule NumericLiteralUnsigned() -> Literal = d:$(DOUBLE()) { Literal::new_typed_literal(d, xsd::DOUBLE) } / d:$(DECIMAL()) { Literal::new_typed_literal(d, xsd::DECIMAL) } / i:$(INTEGER()) { Literal::new_typed_literal(i, xsd::INTEGER) } - //[132] rule NumericLiteralPositive() -> Literal = d:$(DOUBLE_POSITIVE()) { Literal::new_typed_literal(d, xsd::DOUBLE) } / d:$(DECIMAL_POSITIVE()) { Literal::new_typed_literal(d, xsd::DECIMAL) } / i:$(INTEGER_POSITIVE()) { Literal::new_typed_literal(i, xsd::INTEGER) } - //[133] rule NumericLiteralNegative() -> Literal = d:$(DOUBLE_NEGATIVE()) { Literal::new_typed_literal(d, xsd::DOUBLE) } / d:$(DECIMAL_NEGATIVE()) { Literal::new_typed_literal(d, xsd::DECIMAL) } / i:$(INTEGER_NEGATIVE()) { Literal::new_typed_literal(i, xsd::INTEGER) } - //[134] rule BooleanLiteral() -> Literal = "true" { Literal::new_typed_literal("true", xsd::BOOLEAN) } / "false" { Literal::new_typed_literal("false", xsd::BOOLEAN) } - //[135] rule String() -> String = STRING_LITERAL_LONG1() / STRING_LITERAL_LONG2() / STRING_LITERAL1() / STRING_LITERAL2() - //[136] rule iri() -> NamedNode = i:(IRIREF() / PrefixedName()) { NamedNode::new_unchecked(i.into_inner()) } - //[137] rule PrefixedName() -> Iri = PNAME_LN() / ns:PNAME_NS() {? if let Some(iri) = state.namespaces.get(ns).cloned() { Iri::parse(iri).map_err(|_| "IRI parsing failed") @@ -2217,7 +2132,6 @@ parser! { Err("Prefix not found") } } - //[138] rule BlankNode() -> BlankNode = id:BLANK_NODE_LABEL() {? let node = BlankNode::new_unchecked(id); if state.used_bnodes.contains(&node) { @@ -2228,17 +2142,14 @@ parser! { } } / ANON() { BlankNode::default() } - //[139] rule IRIREF() -> Iri = "<" i:$((!['>'] [_])*) ">" {? state.parse_iri(i).map_err(|_| "IRI parsing failed") } - //[140] rule PNAME_NS() -> &'input str = ns:$(PN_PREFIX()?) ":" { ns } - //[141] rule PNAME_LN() -> Iri = ns:PNAME_NS() local:$(PN_LOCAL()) {? if let Some(base) = state.namespaces.get(ns) { let mut iri = base.clone(); @@ -2249,189 +2160,89 @@ parser! { } } - //[142] rule BLANK_NODE_LABEL() -> &'input str = "_:" b:$((['0'..='9'] / PN_CHARS_U()) PN_CHARS()* ("."+ PN_CHARS()+)*) { b } - //[143] rule VAR1() -> &'input str = "?" v:$(VARNAME()) { v } - //[144] rule VAR2() -> &'input str = "$" v:$(VARNAME()) { v } - //[145] rule LANGTAG() -> LanguageTag = "@" l:$(['a' ..= 'z' | 'A' ..= 'Z']+ ("-" ['a' ..= 'z' | 'A' ..= 'Z' | '0' ..= '9']+)*) {? LanguageTag::parse(l.to_ascii_lowercase()).map_err(|_| "language tag parsing failed") } - //[146] rule INTEGER() = ['0'..='9']+ - //[147] rule DECIMAL() = ['0'..='9']+ "." ['0'..='9']* / ['0'..='9']* "." ['0'..='9']+ - //[148] rule DOUBLE() = (['0'..='9']+ "." ['0'..='9']* / "." ['0'..='9']+ / ['0'..='9']+) EXPONENT() - //[149] rule INTEGER_POSITIVE() = "+" _ INTEGER() - //[150] rule DECIMAL_POSITIVE() = "+" _ DECIMAL() - //[151] rule DOUBLE_POSITIVE() = "+" _ DOUBLE() - //[152] rule INTEGER_NEGATIVE() = "-" _ INTEGER() - //[153] rule DECIMAL_NEGATIVE() = "-" _ DECIMAL() - //[154] rule DOUBLE_NEGATIVE() = "-" _ DOUBLE() - //[155] rule EXPONENT() = ['e' | 'E'] ['+' | '-']? ['0'..='9']+ - //[156] rule STRING_LITERAL1() -> String = "'" l:$((STRING_LITERAL1_simple_char() / ECHAR())*) "'" { unescape_echars(l).to_string() } rule STRING_LITERAL1_simple_char() = !['\u{27}' | '\u{5C}' | '\u{A}' | '\u{D}'] [_] - //[157] rule STRING_LITERAL2() -> String = "\"" l:$((STRING_LITERAL2_simple_char() / ECHAR())*) "\"" { unescape_echars(l).to_string() } rule STRING_LITERAL2_simple_char() = !['\u{22}' | '\u{5C}' | '\u{A}' | '\u{D}'] [_] - //[158] rule STRING_LITERAL_LONG1() -> String = "'''" l:$(STRING_LITERAL_LONG1_inner()*) "'''" { unescape_echars(l).to_string() } rule STRING_LITERAL_LONG1_inner() = ("''" / "'")? (STRING_LITERAL_LONG1_simple_char() / ECHAR()) rule STRING_LITERAL_LONG1_simple_char() = !['\'' | '\\'] [_] - //[159] rule STRING_LITERAL_LONG2() -> String = "\"\"\"" l:$(STRING_LITERAL_LONG2_inner()*) "\"\"\"" { unescape_echars(l).to_string() } rule STRING_LITERAL_LONG2_inner() = ("\"\"" / "\"")? (STRING_LITERAL_LONG2_simple_char() / ECHAR()) rule STRING_LITERAL_LONG2_simple_char() = !['"' | '\\'] [_] - //[160] rule ECHAR() = "\\" ['t' | 'b' | 'n' | 'r' | 'f' | '"' |'\'' | '\\'] - //[161] rule NIL() = "(" WS()* ")" - //[162] rule WS() = quiet! { ['\u{20}' | '\u{9}' | '\u{D}' | '\u{A}'] } - //[163] rule ANON() = "[" WS()* "]" - //[164] rule PN_CHARS_BASE() = ['A' ..= 'Z' | 'a' ..= 'z' | '\u{00C0}' ..='\u{00D6}' | '\u{00D8}'..='\u{00F6}' | '\u{00F8}'..='\u{02FF}' | '\u{0370}'..='\u{037D}' | '\u{037F}'..='\u{1FFF}' | '\u{200C}'..='\u{200D}' | '\u{2070}'..='\u{218F}' | '\u{2C00}'..='\u{2FEF}' | '\u{3001}'..='\u{D7FF}' | '\u{F900}'..='\u{FDCF}' | '\u{FDF0}'..='\u{FFFD}'] - //[165] rule PN_CHARS_U() = ['_'] / PN_CHARS_BASE() - //[166] rule VARNAME() = (['0'..='9'] / PN_CHARS_U()) (['0' ..= '9' | '\u{00B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}'] / PN_CHARS_U())* - //[167] rule PN_CHARS() = ['-' | '0' ..= '9' | '\u{00B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}'] / PN_CHARS_U() - //[168] rule PN_PREFIX() = PN_CHARS_BASE() PN_CHARS()* ("."+ PN_CHARS()+)* - //[169] rule PN_LOCAL() = (PN_CHARS_U() / [':' | '0'..='9'] / PLX()) (PN_CHARS() / [':'] / PLX())* (['.']+ (PN_CHARS() / [':'] / PLX())+)? - //[170] rule PLX() = PERCENT() / PN_LOCAL_ESC() - //[171] rule PERCENT() = ['%'] HEX() HEX() - //[172] rule HEX() = ['0' ..= '9' | 'A' ..= 'F' | 'a' ..= 'f'] - //[173] rule PN_LOCAL_ESC() = ['\\'] ['_' | '~' | '.' | '-' | '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '/' | '?' | '#' | '@' | '%'] //TODO: added '/' to make tests pass but is it valid? - //[174] - rule EmbTP() -> TriplePattern = "<<" _ s:EmbSubjectOrObject() _ p:Verb() _ o:EmbSubjectOrObject() _ ">>" { - TriplePattern { subject: s, predicate: p, object: o } - } - - //[175] - rule EmbTriple() -> GroundTriple = "<<" _ s:DataValueTerm() _ p:EmbTriple_p() _ o:DataValueTerm() _ ">>" {? - Ok(GroundTriple { - subject: s.try_into().map_err(|_| "Literals are not allowed in subject position of nested patterns")?, - predicate: p, - object: o - }) - } - rule EmbTriple_p() -> NamedNode = i: iri() { i } / "a" { rdf::TYPE.into() } - - //[176] - rule EmbSubjectOrObject() -> TermPattern = - t:EmbTP() {? - #[cfg(feature = "rdf-star")]{Ok(t.into())} - #[cfg(not(feature = "rdf-star"))]{Err("Embedded triple patterns are only available in SPARQL-star")} - } / - v:Var() { v.into() } / - b:BlankNode() { b.into() } / - i:iri() { i.into() } / - l:RDFLiteral() { l.into() } / - l:NumericLiteral() { l.into() } / - l:BooleanLiteral() { l.into() } - - //[177] - rule DataValueTerm() -> GroundTerm = i:iri() { i.into() } / - l:RDFLiteral() { l.into() } / - l:NumericLiteral() { l.into() } / - l:BooleanLiteral() { l.into() } / - t:EmbTriple() {? - #[cfg(feature = "rdf-star")]{Ok(t.into())} - #[cfg(not(feature = "rdf-star"))]{Err("Embedded triples are only available in SPARQL-star")} - } - - //[178] - rule VarOrTermOrEmbTP() -> TermPattern = - t:EmbTP() {? - #[cfg(feature = "rdf-star")]{Ok(t.into())} - #[cfg(not(feature = "rdf-star"))]{Err("Embedded triple patterns are only available in SPARQL-star")} - } / - v:Var() { v.into() } / - t:GraphTerm() { t.into() } - - //[179] - rule AnnotationPattern() -> FocusedTriplePattern)>> = "{|" _ a:PropertyListNotEmpty() _ "|}" { a } - - //[180] - rule AnnotationPatternPath() -> FocusedTripleOrPathPattern)>> = "{|" _ a: PropertyListPathNotEmpty() _ "|}" { a } - - //[181] - rule ExprEmbTP() -> Expression = "<<" _ s:ExprVarOrTerm() _ p:Verb() _ o:ExprVarOrTerm() _ ">>" {? - #[cfg(feature = "rdf-star")]{Ok(Expression::FunctionCall(Function::Triple, vec![s, p.into(), o]))} - #[cfg(not(feature = "rdf-star"))]{Err("Embedded triples are only available in SPARQL-star")} - } - - //[182] - rule ExprVarOrTerm() -> Expression = - ExprEmbTP() / - i:iri() { i.into() } / - l:RDFLiteral() { l.into() } / - l:NumericLiteral() { l.into() } / - l:BooleanLiteral() { l.into() } / - v:Var() { v.into() } - //space rule _() = quiet! { ([' ' | '\t' | '\n' | '\r'] / comment())* } diff --git a/lib/spargebra/src/term.rs b/lib/spargebra/src/term.rs index e2d29dc8..3ef91f68 100644 --- a/lib/spargebra/src/term.rs +++ b/lib/spargebra/src/term.rs @@ -577,6 +577,7 @@ pub enum GroundTermPattern { NamedNode(NamedNode), Literal(Literal), Variable(Variable), + #[cfg(feature = "rdf-star")] Triple(Box), } @@ -587,6 +588,7 @@ impl GroundTermPattern { Self::NamedNode(term) => write!(f, "{term}"), Self::Literal(term) => write!(f, "{term}"), Self::Variable(var) => write!(f, "{var}"), + #[cfg(feature = "rdf-star")] Self::Triple(triple) => triple.fmt_sse(f), } } @@ -599,6 +601,7 @@ impl fmt::Display for GroundTermPattern { Self::NamedNode(term) => term.fmt(f), Self::Literal(term) => term.fmt(f), Self::Variable(var) => var.fmt(f), + #[cfg(feature = "rdf-star")] Self::Triple(triple) => write!(f, "<<{triple}>>"), } } @@ -618,6 +621,7 @@ impl From for GroundTermPattern { } } +#[cfg(feature = "rdf-star")] impl From for GroundTermPattern { #[inline] fn from(triple: GroundTriplePattern) -> Self { @@ -818,6 +822,7 @@ pub struct GroundTriplePattern { impl GroundTriplePattern { /// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html). + #[allow(dead_code)] pub(crate) fn fmt_sse(&self, f: &mut impl Write) -> fmt::Result { write!(f, "(triple ")?; self.subject.fmt_sse(f)?; diff --git a/lib/sparql-smith/Cargo.toml b/lib/sparql-smith/Cargo.toml index 64bd375b..2755b225 100644 --- a/lib/sparql-smith/Cargo.toml +++ b/lib/sparql-smith/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "sparql-smith" -version = "0.1.0-alpha.3" +version = "0.1.0-alpha.4-dev" authors = ["Tpt "] license = "MIT OR Apache-2.0" readme = "README.md" @@ -14,6 +14,8 @@ edition = "2021" [features] default = [] +limit-offset = ["order"] +order = [] sep-0006 = [] [dependencies] diff --git a/lib/sparql-smith/src/lib.rs b/lib/sparql-smith/src/lib.rs index 6b554ec7..01ca45e5 100644 --- a/lib/sparql-smith/src/lib.rs +++ b/lib/sparql-smith/src/lib.rs @@ -1,6 +1,5 @@ use arbitrary::{Arbitrary, Result, Unstructured}; use std::fmt; -use std::fmt::Debug; use std::iter::once; use std::ops::ControlFlow; @@ -30,8 +29,12 @@ const LITERALS: [&str; 11] = [ "1e0", ]; -#[derive(Arbitrary)] pub struct Query { + inner: QueryContent, +} + +#[derive(Arbitrary)] +struct QueryContent { // [1] QueryUnit ::= Query // [2] Query ::= Prologue ( SelectQuery | ConstructQuery | DescribeQuery | AskQuery ) ValuesClause variant: QueryVariant, @@ -44,16 +47,34 @@ enum QueryVariant { //TODO: Other variants! } +impl<'a> Arbitrary<'a> for Query { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + Ok(Self { + inner: QueryContent::arbitrary(u)?, + }) + } + + fn arbitrary_take_rest(u: Unstructured<'a>) -> Result { + Ok(Self { + inner: QueryContent::arbitrary_take_rest(u)?, + }) + } + + fn size_hint(_depth: usize) -> (usize, Option) { + (20, None) + } +} + impl fmt::Display for Query { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match &self.variant { + match &self.inner.variant { QueryVariant::Select(s) => write!(f, "{s}"), }?; - write!(f, "{}", self.values_clause) + write!(f, "{}", self.inner.values_clause) } } -impl Debug for Query { +impl fmt::Debug for Query { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(self, f) } @@ -169,7 +190,9 @@ struct SolutionModifier { // [18] SolutionModifier ::= GroupClause? HavingClause? OrderClause? LimitOffsetClauses? group: Option, having: Option, + #[cfg(feature = "order")] order: Option, + #[cfg(feature = "limit-offset")] limit_offset: Option, } @@ -181,9 +204,11 @@ impl fmt::Display for SolutionModifier { if let Some(having) = &self.having { write!(f, " {having}")?; } + #[cfg(feature = "order")] if let Some(order) = &self.order { write!(f, " {order}")?; } + #[cfg(feature = "limit-offset")] if let Some(limit_offset) = &self.limit_offset { write!(f, " {limit_offset}")?; } @@ -254,6 +279,7 @@ impl fmt::Display for HavingClause { // [22] HavingCondition ::= Constraint type HavingCondition = Constraint; +#[cfg(feature = "order")] #[derive(Arbitrary)] struct OrderClause { // [23] OrderClause ::= 'ORDER' 'BY' OrderCondition+ @@ -261,6 +287,7 @@ struct OrderClause { others: Vec, } +#[cfg(feature = "order")] impl fmt::Display for OrderClause { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "ORDER BY {}", self.start)?; @@ -271,6 +298,7 @@ impl fmt::Display for OrderClause { } } +#[cfg(feature = "order")] #[derive(Arbitrary)] enum OrderCondition { // [24] OrderCondition ::= ( ( 'ASC' | 'DESC' ) BrackettedExpression ) | ( Constraint | Var ) @@ -282,6 +310,7 @@ enum OrderCondition { Var(Var), } +#[cfg(feature = "order")] impl fmt::Display for OrderCondition { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { @@ -298,6 +327,7 @@ impl fmt::Display for OrderCondition { } } +#[cfg(feature = "limit-offset")] #[derive(Arbitrary)] enum LimitOffsetClauses { // [25] LimitOffsetClauses ::= LimitClause OffsetClause? | OffsetClause LimitClause? @@ -305,6 +335,7 @@ enum LimitOffsetClauses { OffsetLimit(OffsetClause, Option), } +#[cfg(feature = "limit-offset")] impl fmt::Display for LimitOffsetClauses { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { @@ -316,24 +347,28 @@ impl fmt::Display for LimitOffsetClauses { } } +#[cfg(feature = "limit-offset")] #[derive(Arbitrary)] struct LimitClause { // [26] LimitClause ::= 'LIMIT' INTEGER value: u8, } +#[cfg(feature = "limit-offset")] impl fmt::Display for LimitClause { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "LIMIT {}", self.value) } } +#[cfg(feature = "limit-offset")] #[derive(Arbitrary)] struct OffsetClause { // [27] OffsetClause ::= 'OFFSET' INTEGER value: u8, } +#[cfg(feature = "limit-offset")] impl fmt::Display for OffsetClause { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "OFFSET {}", self.value) diff --git a/lib/src/io/error.rs b/lib/src/io/error.rs index 5584169b..6a90404b 100644 --- a/lib/src/io/error.rs +++ b/lib/src/io/error.rs @@ -45,12 +45,14 @@ impl Error for ParseError { } } -#[allow(clippy::fallible_impl_from)] impl From for ParseError { #[inline] fn from(error: TurtleError) -> Self { let error = io::Error::from(error); - if error.get_ref().map_or(false, |e| e.is::()) { + if error.get_ref().map_or( + false, + <(dyn Error + Send + Sync + 'static)>::is::, + ) { Self::Syntax(SyntaxError { inner: SyntaxErrorKind::Turtle(*error.into_inner().unwrap().downcast().unwrap()), }) @@ -60,12 +62,14 @@ impl From for ParseError { } } -#[allow(clippy::fallible_impl_from)] impl From for ParseError { #[inline] fn from(error: RdfXmlError) -> Self { let error = io::Error::from(error); - if error.get_ref().map_or(false, |e| e.is::()) { + if error.get_ref().map_or( + false, + <(dyn Error + Send + Sync + 'static)>::is::, + ) { Self::Syntax(SyntaxError { inner: SyntaxErrorKind::RdfXml(*error.into_inner().unwrap().downcast().unwrap()), }) diff --git a/lib/src/lib.rs b/lib/src/lib.rs index 2b67f0c9..29ef24ae 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -3,7 +3,7 @@ #![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] #![cfg_attr(docsrs, feature(doc_auto_cfg))] #![doc(test(attr(deny(warnings))))] -#![deny(unsafe_code)] +#![allow(clippy::return_self_not_must_use)] pub mod io; pub mod sparql; diff --git a/lib/src/sparql/dataset.rs b/lib/src/sparql/dataset.rs index 75191258..bf7e6195 100644 --- a/lib/src/sparql/dataset.rs +++ b/lib/src/sparql/dataset.rs @@ -40,7 +40,7 @@ impl DatasetView { ) -> impl Iterator> + 'static { self.reader .quads_for_pattern(subject, predicate, object, graph_name) - .map(|t| t.map_err(|e| e.into())) + .map(|t| t.map_err(Into::into)) } #[allow(clippy::needless_collect)] diff --git a/lib/src/sparql/eval.rs b/lib/src/sparql/eval.rs index 7cce17d6..c3a9b033 100644 --- a/lib/src/sparql/eval.rs +++ b/lib/src/sparql/eval.rs @@ -65,6 +65,7 @@ impl SimpleEvaluator { } } + #[allow(clippy::rc_buffer)] pub fn evaluate_select_plan( &self, plan: Rc, @@ -73,7 +74,7 @@ impl SimpleEvaluator { let (eval, stats) = self.plan_evaluator(plan); ( QueryResults::Solutions(decode_bindings( - self.dataset.clone(), + Rc::clone(&self.dataset), eval(EncodedTuple::with_capacity(variables.len())), variables, )), @@ -164,16 +165,16 @@ impl SimpleEvaluator { graph_pattern, .. } => { - let variables = variables.clone(); + let variables = Rc::clone(variables); let silent = *silent; let service_name = service_name.clone(); - let graph_pattern = graph_pattern.clone(); + let graph_pattern = Rc::clone(graph_pattern); let eval = self.clone(); Rc::new(move |from| { match eval.evaluate_service( &service_name, &graph_pattern, - variables.clone(), + Rc::clone(&variables), &from, ) { Ok(result) => Box::new(result.filter_map(move |binding| { @@ -201,7 +202,7 @@ impl SimpleEvaluator { let predicate = TupleSelector::from(predicate); let object = TupleSelector::from(object); let graph_name = TupleSelector::from(graph_name); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |from| { let iter = dataset.encoded_quads_for_pattern( get_pattern_value(&subject, &from).as_ref(), @@ -233,16 +234,16 @@ impl SimpleEvaluator { graph_name, } => { let subject = TupleSelector::from(subject); - let path = path.clone(); + let path = Rc::clone(path); let object = TupleSelector::from(object); let graph_name = TupleSelector::from(graph_name); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |from| { let input_subject = get_pattern_value(&subject, &from); let input_object = get_pattern_value(&object, &from); let input_graph_name = get_pattern_value(&graph_name, &from); let path_eval = PathEvaluator { - dataset: dataset.clone(), + dataset: Rc::clone(&dataset), }; match (input_subject, input_object, input_graph_name) { (Some(input_subject), Some(input_object), Some(input_graph_name)) => { @@ -384,9 +385,9 @@ impl SimpleEvaluator { .intersection(&right.always_bound_variables()) .copied() .collect(); - let (left, left_stats) = self.plan_evaluator(left.clone()); + let (left, left_stats) = self.plan_evaluator(Rc::clone(left)); stat_children.push(left_stats); - let (right, right_stats) = self.plan_evaluator(right.clone()); + let (right, right_stats) = self.plan_evaluator(Rc::clone(right)); stat_children.push(right_stats); if join_keys.is_empty() { // Cartesian product @@ -430,12 +431,12 @@ impl SimpleEvaluator { } } PlanNode::ForLoopJoin { left, right } => { - let (left, left_stats) = self.plan_evaluator(left.clone()); + let (left, left_stats) = self.plan_evaluator(Rc::clone(left)); stat_children.push(left_stats); - let (right, right_stats) = self.plan_evaluator(right.clone()); + let (right, right_stats) = self.plan_evaluator(Rc::clone(right)); stat_children.push(right_stats); Rc::new(move |from| { - let right = right.clone(); + let right = Rc::clone(&right); Box::new(left(from).flat_map(move |t| match t { Ok(t) => right(t), Err(e) => Box::new(once(Err(e))), @@ -448,9 +449,9 @@ impl SimpleEvaluator { .intersection(&right.always_bound_variables()) .copied() .collect(); - let (left, left_stats) = self.plan_evaluator(left.clone()); + let (left, left_stats) = self.plan_evaluator(Rc::clone(left)); stat_children.push(left_stats); - let (right, right_stats) = self.plan_evaluator(right.clone()); + let (right, right_stats) = self.plan_evaluator(Rc::clone(right)); stat_children.push(right_stats); if join_keys.is_empty() { Rc::new(move |from| { @@ -491,11 +492,11 @@ impl SimpleEvaluator { .intersection(&right.always_bound_variables()) .copied() .collect(); - let (left, left_stats) = self.plan_evaluator(left.clone()); + let (left, left_stats) = self.plan_evaluator(Rc::clone(left)); stat_children.push(left_stats); - let (right, right_stats) = self.plan_evaluator(right.clone()); + let (right, right_stats) = self.plan_evaluator(Rc::clone(right)); stat_children.push(right_stats); - let expression = self.expression_evaluator(expression); + let expression = self.expression_evaluator(expression, &mut stat_children); // Real hash join Rc::new(move |from| { let mut errors = Vec::default(); @@ -511,7 +512,7 @@ impl SimpleEvaluator { left_iter: left(from), right: right_values, buffered_results: errors, - expression: expression.clone(), + expression: Rc::clone(&expression), }) }) } @@ -520,36 +521,36 @@ impl SimpleEvaluator { right, possible_problem_vars, } => { - let (left, left_stats) = self.plan_evaluator(left.clone()); + let (left, left_stats) = self.plan_evaluator(Rc::clone(left)); stat_children.push(left_stats); - let (right, right_stats) = self.plan_evaluator(right.clone()); + let (right, right_stats) = self.plan_evaluator(Rc::clone(right)); stat_children.push(right_stats); - let possible_problem_vars = possible_problem_vars.clone(); + let possible_problem_vars = Rc::clone(possible_problem_vars); Rc::new(move |from| { if possible_problem_vars.is_empty() { Box::new(ForLoopLeftJoinIterator { - right_evaluator: right.clone(), + right_evaluator: Rc::clone(&right), left_iter: left(from), current_right: Box::new(empty()), }) } else { Box::new(BadForLoopLeftJoinIterator { from_tuple: from.clone(), - right_evaluator: right.clone(), + right_evaluator: Rc::clone(&right), left_iter: left(from), - current_left: None, + current_left: EncodedTuple::with_capacity(0), current_right: Box::new(empty()), - problem_vars: possible_problem_vars.clone(), + problem_vars: Rc::clone(&possible_problem_vars), }) } }) } PlanNode::Filter { child, expression } => { - let (child, child_stats) = self.plan_evaluator(child.clone()); + let (child, child_stats) = self.plan_evaluator(Rc::clone(child)); stat_children.push(child_stats); - let expression = self.expression_evaluator(expression); + let expression = self.expression_evaluator(expression, &mut stat_children); Rc::new(move |from| { - let expression = expression.clone(); + let expression = Rc::clone(&expression); Box::new(child(from).filter(move |tuple| { match tuple { Ok(tuple) => expression(tuple) @@ -564,7 +565,7 @@ impl SimpleEvaluator { let children: Vec<_> = children .iter() .map(|child| { - let (child, child_stats) = self.plan_evaluator(child.clone()); + let (child, child_stats) = self.plan_evaluator(Rc::clone(child)); stat_children.push(child_stats); child }) @@ -583,12 +584,12 @@ impl SimpleEvaluator { variable, expression, } => { - let (child, child_stats) = self.plan_evaluator(child.clone()); + let (child, child_stats) = self.plan_evaluator(Rc::clone(child)); stat_children.push(child_stats); let position = variable.encoded; - let expression = self.expression_evaluator(expression); + let expression = self.expression_evaluator(expression, &mut stat_children); Rc::new(move |from| { - let expression = expression.clone(); + let expression = Rc::clone(&expression); Box::new(child(from).map(move |tuple| { let mut tuple = tuple?; if let Some(value) = expression(&tuple) { @@ -599,20 +600,20 @@ impl SimpleEvaluator { }) } PlanNode::Sort { child, by } => { - let (child, child_stats) = self.plan_evaluator(child.clone()); + let (child, child_stats) = self.plan_evaluator(Rc::clone(child)); stat_children.push(child_stats); let by: Vec<_> = by .iter() .map(|comp| match comp { - Comparator::Asc(expression) => { - ComparatorFunction::Asc(self.expression_evaluator(expression)) - } - Comparator::Desc(expression) => { - ComparatorFunction::Desc(self.expression_evaluator(expression)) - } + Comparator::Asc(expression) => ComparatorFunction::Asc( + self.expression_evaluator(expression, &mut stat_children), + ), + Comparator::Desc(expression) => ComparatorFunction::Desc( + self.expression_evaluator(expression, &mut stat_children), + ), }) .collect(); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |from| { let mut errors = Vec::default(); let mut values = child(from) @@ -657,12 +658,12 @@ impl SimpleEvaluator { }) } PlanNode::HashDeduplicate { child } => { - let (child, child_stats) = self.plan_evaluator(child.clone()); + let (child, child_stats) = self.plan_evaluator(Rc::clone(child)); stat_children.push(child_stats); Rc::new(move |from| Box::new(hash_deduplicate(child(from)))) } PlanNode::Reduced { child } => { - let (child, child_stats) = self.plan_evaluator(child.clone()); + let (child, child_stats) = self.plan_evaluator(Rc::clone(child)); stat_children.push(child_stats); Rc::new(move |from| { Box::new(ConsecutiveDeduplication { @@ -672,23 +673,23 @@ impl SimpleEvaluator { }) } PlanNode::Skip { child, count } => { - let (child, child_stats) = self.plan_evaluator(child.clone()); + let (child, child_stats) = self.plan_evaluator(Rc::clone(child)); stat_children.push(child_stats); let count = *count; Rc::new(move |from| Box::new(child(from).skip(count))) } PlanNode::Limit { child, count } => { - let (child, child_stats) = self.plan_evaluator(child.clone()); + let (child, child_stats) = self.plan_evaluator(Rc::clone(child)); stat_children.push(child_stats); let count = *count; Rc::new(move |from| Box::new(child(from).take(count))) } PlanNode::Project { child, mapping } => { - let (child, child_stats) = self.plan_evaluator(child.clone()); + let (child, child_stats) = self.plan_evaluator(Rc::clone(child)); stat_children.push(child_stats); - let mapping = mapping.clone(); + let mapping = Rc::clone(mapping); Rc::new(move |from| { - let mapping = mapping.clone(); + let mapping = Rc::clone(&mapping); let mut input_tuple = EncodedTuple::with_capacity(mapping.len()); for (input_key, output_key) in mapping.iter() { if let Some(value) = from.get(output_key.encoded) { @@ -724,16 +725,16 @@ impl SimpleEvaluator { key_variables, aggregates, } => { - let (child, child_stats) = self.plan_evaluator(child.clone()); + let (child, child_stats) = self.plan_evaluator(Rc::clone(child)); stat_children.push(child_stats); - let key_variables = key_variables.clone(); + let key_variables = Rc::clone(key_variables); let aggregate_input_expressions: Vec<_> = aggregates .iter() .map(|(aggregate, _)| { aggregate .parameter .as_ref() - .map(|p| self.expression_evaluator(p)) + .map(|p| self.expression_evaluator(p, &mut stat_children)) }) .collect(); let accumulator_builders: Vec<_> = aggregates @@ -750,7 +751,7 @@ impl SimpleEvaluator { aggregates.iter().map(|(_, var)| var.encoded).collect(); Rc::new(move |from| { let tuple_size = from.capacity(); - let key_variables = key_variables.clone(); + let key_variables = Rc::clone(&key_variables); let mut errors = Vec::default(); let mut accumulators_for_group = HashMap::>, Vec>>::default(); @@ -825,7 +826,7 @@ impl SimpleEvaluator { exec_duration: Cell::new(std::time::Duration::from_secs(0)), }); if self.run_stats { - let stats = stats.clone(); + let stats = Rc::clone(&stats); evaluator = Rc::new(move |tuple| { let start = Timer::now(); let inner = evaluator(tuple); @@ -834,7 +835,7 @@ impl SimpleEvaluator { .set(stats.exec_duration.get() + start.elapsed()); Box::new(StatsIterator { inner, - stats: stats.clone(), + stats: Rc::clone(&stats), }) }) } @@ -845,7 +846,7 @@ impl SimpleEvaluator { &self, service_name: &PatternValue, graph_pattern: &GraphPattern, - variables: Rc>, + variables: Rc<[Variable]>, from: &EncodedTuple, ) -> Result { let service_name = get_pattern_value(&service_name.into(), from) @@ -862,7 +863,7 @@ impl SimpleEvaluator { parsing_duration: None, }, )? { - Ok(encode_bindings(self.dataset.clone(), variables, iter)) + Ok(encode_bindings(Rc::clone(&self.dataset), variables, iter)) } else { Err(EvaluationError::msg( "The service call has not returned a set of solutions", @@ -892,12 +893,12 @@ impl SimpleEvaluator { } } PlanAggregationFunction::Min => { - let dataset = dataset.clone(); - Box::new(move || Box::new(MinAccumulator::new(dataset.clone()))) + let dataset = Rc::clone(dataset); + Box::new(move || Box::new(MinAccumulator::new(Rc::clone(&dataset)))) } // DISTINCT does not make sense with min PlanAggregationFunction::Max => { - let dataset = dataset.clone(); - Box::new(move || Box::new(MaxAccumulator::new(dataset.clone()))) + let dataset = Rc::clone(dataset); + Box::new(move || Box::new(MaxAccumulator::new(Rc::clone(&dataset)))) } // DISTINCT does not make sense with max PlanAggregationFunction::Avg => { if distinct { @@ -908,20 +909,20 @@ impl SimpleEvaluator { } PlanAggregationFunction::Sample => Box::new(|| Box::::default()), // DISTINCT does not make sense with sample PlanAggregationFunction::GroupConcat { separator } => { - let dataset = dataset.clone(); - let separator = separator.clone(); + let dataset = Rc::clone(dataset); + let separator = Rc::clone(separator); if distinct { Box::new(move || { Box::new(DistinctAccumulator::new(GroupConcatAccumulator::new( - dataset.clone(), - separator.clone(), + Rc::clone(&dataset), + Rc::clone(&separator), ))) }) } else { Box::new(move || { Box::new(GroupConcatAccumulator::new( - dataset.clone(), - separator.clone(), + Rc::clone(&dataset), + Rc::clone(&separator), )) }) } @@ -929,10 +930,10 @@ impl SimpleEvaluator { } } - #[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)] fn expression_evaluator( &self, expression: &PlanExpression, + stat_children: &mut Vec>, ) -> Rc Option> { match expression { PlanExpression::NamedNode(t) => { @@ -948,48 +949,61 @@ impl SimpleEvaluator { Rc::new(move |tuple| tuple.get(v).cloned()) } PlanExpression::Exists(plan) => { - let (eval, _) = self.plan_evaluator(plan.clone()); //TODO: stats + let (eval, stats) = self.plan_evaluator(Rc::clone(plan)); + stat_children.push(stats); Rc::new(move |tuple| Some(eval(tuple.clone()).next().is_some().into())) } - PlanExpression::Or(a, b) => { - let a = self.expression_evaluator(a); - let b = self.expression_evaluator(b); - Rc::new(move |tuple| match a(tuple).and_then(|v| to_bool(&v)) { - Some(true) => Some(true.into()), - Some(false) => b(tuple), - None => { - if Some(true) == a(tuple).and_then(|v| to_bool(&v)) { - Some(true.into()) - } else { - None + PlanExpression::Or(inner) => { + let children = inner + .iter() + .map(|i| self.expression_evaluator(i, stat_children)) + .collect::>(); + Rc::new(move |tuple| { + let mut error = true; + for child in children.iter() { + match child(tuple).and_then(|v| to_bool(&v)) { + Some(true) => return Some(true.into()), + Some(false) => continue, + None => error = true, } } + if error { + None + } else { + Some(false.into()) + } }) } - PlanExpression::And(a, b) => { - let a = self.expression_evaluator(a); - let b = self.expression_evaluator(b); - Rc::new(move |tuple| match a(tuple).and_then(|v| to_bool(&v)) { - Some(true) => b(tuple), - Some(false) => Some(false.into()), - None => { - if Some(false) == b(tuple).and_then(|v| to_bool(&v)) { - Some(false.into()) - } else { - None + PlanExpression::And(inner) => { + let children = inner + .iter() + .map(|i| self.expression_evaluator(i, stat_children)) + .collect::>(); + Rc::new(move |tuple| { + let mut error = false; + for child in children.iter() { + match child(tuple).and_then(|v| to_bool(&v)) { + Some(true) => continue, + Some(false) => return Some(false.into()), + None => error = true, } } + if error { + None + } else { + Some(true.into()) + } }) } PlanExpression::Equal(a, b) => { - let a = self.expression_evaluator(a); - let b = self.expression_evaluator(b); - Rc::new(move |tuple| equals(&a(tuple)?, &b(tuple)?).map(|v| v.into())) + let a = self.expression_evaluator(a, stat_children); + let b = self.expression_evaluator(b, stat_children); + Rc::new(move |tuple| equals(&a(tuple)?, &b(tuple)?).map(Into::into)) } PlanExpression::Greater(a, b) => { - let a = self.expression_evaluator(a); - let b = self.expression_evaluator(b); - let dataset = self.dataset.clone(); + let a = self.expression_evaluator(a, stat_children); + let b = self.expression_evaluator(b, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { Some( (partial_cmp(&dataset, &a(tuple)?, &b(tuple)?)? == Ordering::Greater) @@ -998,9 +1012,9 @@ impl SimpleEvaluator { }) } PlanExpression::GreaterOrEqual(a, b) => { - let a = self.expression_evaluator(a); - let b = self.expression_evaluator(b); - let dataset = self.dataset.clone(); + let a = self.expression_evaluator(a, stat_children); + let b = self.expression_evaluator(b, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { Some( match partial_cmp(&dataset, &a(tuple)?, &b(tuple)?)? { @@ -1012,17 +1026,17 @@ impl SimpleEvaluator { }) } PlanExpression::Less(a, b) => { - let a = self.expression_evaluator(a); - let b = self.expression_evaluator(b); - let dataset = self.dataset.clone(); + let a = self.expression_evaluator(a, stat_children); + let b = self.expression_evaluator(b, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { Some((partial_cmp(&dataset, &a(tuple)?, &b(tuple)?)? == Ordering::Less).into()) }) } PlanExpression::LessOrEqual(a, b) => { - let a = self.expression_evaluator(a); - let b = self.expression_evaluator(b); - let dataset = self.dataset.clone(); + let a = self.expression_evaluator(a, stat_children); + let b = self.expression_evaluator(b, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { Some( match partial_cmp(&dataset, &a(tuple)?, &b(tuple)?)? { @@ -1034,8 +1048,8 @@ impl SimpleEvaluator { }) } PlanExpression::Add(a, b) => { - let a = self.expression_evaluator(a); - let b = self.expression_evaluator(b); + let a = self.expression_evaluator(a, stat_children); + let b = self.expression_evaluator(b, stat_children); Rc::new( move |tuple| match NumericBinaryOperands::new(a(tuple)?, b(tuple)?)? { NumericBinaryOperands::Float(v1, v2) => Some((v1 + v2).into()), @@ -1073,13 +1087,15 @@ impl SimpleEvaluator { NumericBinaryOperands::TimeDayTimeDuration(v1, v2) => { Some(v1.checked_add_day_time_duration(v2)?.into()) } - _ => None, + NumericBinaryOperands::DateTime(_, _) + | NumericBinaryOperands::Time(_, _) + | NumericBinaryOperands::Date(_, _) => None, }, ) } PlanExpression::Subtract(a, b) => { - let a = self.expression_evaluator(a); - let b = self.expression_evaluator(b); + let a = self.expression_evaluator(a, stat_children); + let b = self.expression_evaluator(b, stat_children); Rc::new(move |tuple| { Some(match NumericBinaryOperands::new(a(tuple)?, b(tuple)?)? { NumericBinaryOperands::Float(v1, v2) => (v1 - v2).into(), @@ -1130,8 +1146,8 @@ impl SimpleEvaluator { }) } PlanExpression::Multiply(a, b) => { - let a = self.expression_evaluator(a); - let b = self.expression_evaluator(b); + let a = self.expression_evaluator(a, stat_children); + let b = self.expression_evaluator(b, stat_children); Rc::new( move |tuple| match NumericBinaryOperands::new(a(tuple)?, b(tuple)?)? { NumericBinaryOperands::Float(v1, v2) => Some((v1 * v2).into()), @@ -1143,8 +1159,8 @@ impl SimpleEvaluator { ) } PlanExpression::Divide(a, b) => { - let a = self.expression_evaluator(a); - let b = self.expression_evaluator(b); + let a = self.expression_evaluator(a, stat_children); + let b = self.expression_evaluator(b, stat_children); Rc::new( move |tuple| match NumericBinaryOperands::new(a(tuple)?, b(tuple)?)? { NumericBinaryOperands::Float(v1, v2) => Some((v1 / v2).into()), @@ -1158,7 +1174,7 @@ impl SimpleEvaluator { ) } PlanExpression::UnaryPlus(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::FloatLiteral(value) => Some(value.into()), EncodedTerm::DoubleLiteral(value) => Some(value.into()), @@ -1171,25 +1187,27 @@ impl SimpleEvaluator { }) } PlanExpression::UnaryMinus(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::FloatLiteral(value) => Some((-value).into()), EncodedTerm::DoubleLiteral(value) => Some((-value).into()), - EncodedTerm::IntegerLiteral(value) => Some((-value).into()), - EncodedTerm::DecimalLiteral(value) => Some((-value).into()), - EncodedTerm::DurationLiteral(value) => Some((-value).into()), - EncodedTerm::YearMonthDurationLiteral(value) => Some((-value).into()), - EncodedTerm::DayTimeDurationLiteral(value) => Some((-value).into()), + EncodedTerm::IntegerLiteral(value) => Some(value.checked_neg()?.into()), + EncodedTerm::DecimalLiteral(value) => Some(value.checked_neg()?.into()), + EncodedTerm::DurationLiteral(value) => Some(value.checked_neg()?.into()), + EncodedTerm::YearMonthDurationLiteral(value) => { + Some(value.checked_neg()?.into()) + } + EncodedTerm::DayTimeDurationLiteral(value) => Some(value.checked_neg()?.into()), _ => None, }) } PlanExpression::Not(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| to_bool(&e(tuple)?).map(|v| (!v).into())) } PlanExpression::Str(e) | PlanExpression::StringCast(e) => { - let e = self.expression_evaluator(e); - let dataset = self.dataset.clone(); + let e = self.expression_evaluator(e, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { Some(build_string_literal_from_id(to_string_id( &dataset, @@ -1198,8 +1216,8 @@ impl SimpleEvaluator { }) } PlanExpression::Lang(e) => { - let e = self.expression_evaluator(e); - let dataset = self.dataset.clone(); + let e = self.expression_evaluator(e, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::SmallSmallLangStringLiteral { language, .. } | EncodedTerm::BigSmallLangStringLiteral { language, .. } => { @@ -1214,9 +1232,9 @@ impl SimpleEvaluator { }) } PlanExpression::LangMatches(language_tag, language_range) => { - let language_tag = self.expression_evaluator(language_tag); - let language_range = self.expression_evaluator(language_range); - let dataset = self.dataset.clone(); + let language_tag = self.expression_evaluator(language_tag, stat_children); + let language_range = self.expression_evaluator(language_range, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let mut language_tag = to_simple_string(&dataset, &language_tag(tuple)?)?; language_tag.make_ascii_lowercase(); @@ -1240,8 +1258,8 @@ impl SimpleEvaluator { }) } PlanExpression::Datatype(e) => { - let e = self.expression_evaluator(e); - let dataset = self.dataset.clone(); + let e = self.expression_evaluator(e, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| datatype(&dataset, &e(tuple)?)) } PlanExpression::Bound(v) => { @@ -1249,8 +1267,8 @@ impl SimpleEvaluator { Rc::new(move |tuple| Some(tuple.contains(v).into())) } PlanExpression::Iri(e) => { - let e = self.expression_evaluator(e); - let dataset = self.dataset.clone(); + let e = self.expression_evaluator(e, stat_children); + let dataset = Rc::clone(&self.dataset); let base_iri = self.base_iri.clone(); Rc::new(move |tuple| { let e = e(tuple)?; @@ -1273,8 +1291,8 @@ impl SimpleEvaluator { } PlanExpression::BNode(id) => match id { Some(id) => { - let id = self.expression_evaluator(id); - let dataset = self.dataset.clone(); + let id = self.expression_evaluator(id, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { Some( dataset.encode_term( @@ -1293,7 +1311,7 @@ impl SimpleEvaluator { }, PlanExpression::Rand => Rc::new(|_| Some(random::().into())), PlanExpression::Abs(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::IntegerLiteral(value) => Some(value.abs().into()), EncodedTerm::DecimalLiteral(value) => Some(value.abs().into()), @@ -1303,7 +1321,7 @@ impl SimpleEvaluator { }) } PlanExpression::Ceil(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::IntegerLiteral(value) => Some(value.into()), EncodedTerm::DecimalLiteral(value) => Some(value.ceil().into()), @@ -1313,7 +1331,7 @@ impl SimpleEvaluator { }) } PlanExpression::Floor(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::IntegerLiteral(value) => Some(value.into()), EncodedTerm::DecimalLiteral(value) => Some(value.floor().into()), @@ -1323,7 +1341,7 @@ impl SimpleEvaluator { }) } PlanExpression::Round(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::IntegerLiteral(value) => Some(value.into()), EncodedTerm::DecimalLiteral(value) => Some(value.round().into()), @@ -1333,8 +1351,11 @@ impl SimpleEvaluator { }) } PlanExpression::Concat(l) => { - let l: Vec<_> = l.iter().map(|e| self.expression_evaluator(e)).collect(); - let dataset = self.dataset.clone(); + let l: Vec<_> = l + .iter() + .map(|e| self.expression_evaluator(e, stat_children)) + .collect(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let mut result = String::default(); let mut language = None; @@ -1357,10 +1378,12 @@ impl SimpleEvaluator { }) } PlanExpression::SubStr(source, starting_loc, length) => { - let source = self.expression_evaluator(source); - let starting_loc = self.expression_evaluator(starting_loc); - let length = length.as_ref().map(|l| self.expression_evaluator(l)); - let dataset = self.dataset.clone(); + let source = self.expression_evaluator(source, stat_children); + let starting_loc = self.expression_evaluator(starting_loc, stat_children); + let length = length + .as_ref() + .map(|l| self.expression_evaluator(l, stat_children)); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let (source, language) = to_string_and_language(&dataset, &source(tuple)?)?; @@ -1403,17 +1426,21 @@ impl SimpleEvaluator { }) } PlanExpression::StrLen(arg) => { - let arg = self.expression_evaluator(arg); - let dataset = self.dataset.clone(); + let arg = self.expression_evaluator(arg, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { - Some((to_string(&dataset, &arg(tuple)?)?.chars().count() as i64).into()) + Some( + i64::try_from(to_string(&dataset, &arg(tuple)?)?.chars().count()) + .ok()? + .into(), + ) }) } PlanExpression::StaticReplace(arg, regex, replacement) => { - let arg = self.expression_evaluator(arg); + let arg = self.expression_evaluator(arg, stat_children); let regex = regex.clone(); - let replacement = self.expression_evaluator(replacement); - let dataset = self.dataset.clone(); + let replacement = self.expression_evaluator(replacement, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let (text, language) = to_string_and_language(&dataset, &arg(tuple)?)?; let replacement = to_simple_string(&dataset, &replacement(tuple)?)?; @@ -1425,11 +1452,13 @@ impl SimpleEvaluator { }) } PlanExpression::DynamicReplace(arg, pattern, replacement, flags) => { - let arg = self.expression_evaluator(arg); - let pattern = self.expression_evaluator(pattern); - let replacement = self.expression_evaluator(replacement); - let flags = flags.as_ref().map(|flags| self.expression_evaluator(flags)); - let dataset = self.dataset.clone(); + let arg = self.expression_evaluator(arg, stat_children); + let pattern = self.expression_evaluator(pattern, stat_children); + let replacement = self.expression_evaluator(replacement, stat_children); + let flags = flags + .as_ref() + .map(|flags| self.expression_evaluator(flags, stat_children)); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let pattern = to_simple_string(&dataset, &pattern(tuple)?)?; let options = if let Some(flags) = &flags { @@ -1448,8 +1477,8 @@ impl SimpleEvaluator { }) } PlanExpression::UCase(e) => { - let e = self.expression_evaluator(e); - let dataset = self.dataset.clone(); + let e = self.expression_evaluator(e, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let (value, language) = to_string_and_language(&dataset, &e(tuple)?)?; Some(build_plain_literal( @@ -1460,8 +1489,8 @@ impl SimpleEvaluator { }) } PlanExpression::LCase(e) => { - let e = self.expression_evaluator(e); - let dataset = self.dataset.clone(); + let e = self.expression_evaluator(e, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let (value, language) = to_string_and_language(&dataset, &e(tuple)?)?; Some(build_plain_literal( @@ -1472,9 +1501,9 @@ impl SimpleEvaluator { }) } PlanExpression::StrStarts(arg1, arg2) => { - let arg1 = self.expression_evaluator(arg1); - let arg2 = self.expression_evaluator(arg2); - let dataset = self.dataset.clone(); + let arg1 = self.expression_evaluator(arg1, stat_children); + let arg2 = self.expression_evaluator(arg2, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let (arg1, arg2, _) = to_argument_compatible_strings(&dataset, &arg1(tuple)?, &arg2(tuple)?)?; @@ -1482,8 +1511,8 @@ impl SimpleEvaluator { }) } PlanExpression::EncodeForUri(ltrl) => { - let ltrl = self.expression_evaluator(ltrl); - let dataset = self.dataset.clone(); + let ltrl = self.expression_evaluator(ltrl, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let ltlr = to_string(&dataset, <rl(tuple)?)?; let mut result = Vec::with_capacity(ltlr.len()); @@ -1516,9 +1545,9 @@ impl SimpleEvaluator { }) } PlanExpression::StrEnds(arg1, arg2) => { - let arg1 = self.expression_evaluator(arg1); - let arg2 = self.expression_evaluator(arg2); - let dataset = self.dataset.clone(); + let arg1 = self.expression_evaluator(arg1, stat_children); + let arg2 = self.expression_evaluator(arg2, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let (arg1, arg2, _) = to_argument_compatible_strings(&dataset, &arg1(tuple)?, &arg2(tuple)?)?; @@ -1526,9 +1555,9 @@ impl SimpleEvaluator { }) } PlanExpression::Contains(arg1, arg2) => { - let arg1 = self.expression_evaluator(arg1); - let arg2 = self.expression_evaluator(arg2); - let dataset = self.dataset.clone(); + let arg1 = self.expression_evaluator(arg1, stat_children); + let arg2 = self.expression_evaluator(arg2, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let (arg1, arg2, _) = to_argument_compatible_strings(&dataset, &arg1(tuple)?, &arg2(tuple)?)?; @@ -1536,9 +1565,9 @@ impl SimpleEvaluator { }) } PlanExpression::StrBefore(arg1, arg2) => { - let arg1 = self.expression_evaluator(arg1); - let arg2 = self.expression_evaluator(arg2); - let dataset = self.dataset.clone(); + let arg1 = self.expression_evaluator(arg1, stat_children); + let arg2 = self.expression_evaluator(arg2, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let (arg1, arg2, language) = to_argument_compatible_strings(&dataset, &arg1(tuple)?, &arg2(tuple)?)?; @@ -1550,9 +1579,9 @@ impl SimpleEvaluator { }) } PlanExpression::StrAfter(arg1, arg2) => { - let arg1 = self.expression_evaluator(arg1); - let arg2 = self.expression_evaluator(arg2); - let dataset = self.dataset.clone(); + let arg1 = self.expression_evaluator(arg1, stat_children); + let arg2 = self.expression_evaluator(arg2, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let (arg1, arg2, language) = to_argument_compatible_strings(&dataset, &arg1(tuple)?, &arg2(tuple)?)?; @@ -1564,7 +1593,7 @@ impl SimpleEvaluator { }) } PlanExpression::Year(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::DateTimeLiteral(date_time) => Some(date_time.year().into()), EncodedTerm::DateLiteral(date) => Some(date.year().into()), @@ -1574,7 +1603,7 @@ impl SimpleEvaluator { }) } PlanExpression::Month(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::DateTimeLiteral(date_time) => Some(date_time.month().into()), EncodedTerm::DateLiteral(date) => Some(date.month().into()), @@ -1585,7 +1614,7 @@ impl SimpleEvaluator { }) } PlanExpression::Day(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::DateTimeLiteral(date_time) => Some(date_time.day().into()), EncodedTerm::DateLiteral(date) => Some(date.day().into()), @@ -1595,7 +1624,7 @@ impl SimpleEvaluator { }) } PlanExpression::Hours(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::DateTimeLiteral(date_time) => Some(date_time.hour().into()), EncodedTerm::TimeLiteral(time) => Some(time.hour().into()), @@ -1603,7 +1632,7 @@ impl SimpleEvaluator { }) } PlanExpression::Minutes(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::DateTimeLiteral(date_time) => Some(date_time.minute().into()), EncodedTerm::TimeLiteral(time) => Some(time.minute().into()), @@ -1611,7 +1640,7 @@ impl SimpleEvaluator { }) } PlanExpression::Seconds(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::DateTimeLiteral(date_time) => Some(date_time.second().into()), EncodedTerm::TimeLiteral(time) => Some(time.second().into()), @@ -1619,7 +1648,7 @@ impl SimpleEvaluator { }) } PlanExpression::Timezone(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| { Some( match e(tuple)? { @@ -1638,8 +1667,8 @@ impl SimpleEvaluator { }) } PlanExpression::Tz(e) => { - let e = self.expression_evaluator(e); - let dataset = self.dataset.clone(); + let e = self.expression_evaluator(e, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let timezone_offset = match e(tuple)? { EncodedTerm::DateTimeLiteral(date_time) => date_time.timezone_offset(), @@ -1662,8 +1691,8 @@ impl SimpleEvaluator { } PlanExpression::Adjust(dt, tz) => { - let dt = self.expression_evaluator(dt); - let tz = self.expression_evaluator(tz); + let dt = self.expression_evaluator(dt, stat_children); + let tz = self.expression_evaluator(tz, stat_children); Rc::new(move |tuple| { let timezone_offset = Some( match tz(tuple)? { @@ -1697,7 +1726,7 @@ impl SimpleEvaluator { Rc::new(move |_| Some(now.into())) } PlanExpression::Uuid => { - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |_| { let mut buffer = String::with_capacity(44); buffer.push_str("urn:uuid:"); @@ -1706,20 +1735,23 @@ impl SimpleEvaluator { }) } PlanExpression::StrUuid => { - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |_| { let mut buffer = String::with_capacity(36); generate_uuid(&mut buffer); Some(build_string_literal(&dataset, &buffer)) }) } - PlanExpression::Md5(arg) => self.hash::(arg), - PlanExpression::Sha1(arg) => self.hash::(arg), - PlanExpression::Sha256(arg) => self.hash::(arg), - PlanExpression::Sha384(arg) => self.hash::(arg), - PlanExpression::Sha512(arg) => self.hash::(arg), + PlanExpression::Md5(arg) => self.hash::(arg, stat_children), + PlanExpression::Sha1(arg) => self.hash::(arg, stat_children), + PlanExpression::Sha256(arg) => self.hash::(arg, stat_children), + PlanExpression::Sha384(arg) => self.hash::(arg, stat_children), + PlanExpression::Sha512(arg) => self.hash::(arg, stat_children), PlanExpression::Coalesce(l) => { - let l: Vec<_> = l.iter().map(|e| self.expression_evaluator(e)).collect(); + let l: Vec<_> = l + .iter() + .map(|e| self.expression_evaluator(e, stat_children)) + .collect(); Rc::new(move |tuple| { for e in &l { if let Some(result) = e(tuple) { @@ -1730,9 +1762,9 @@ impl SimpleEvaluator { }) } PlanExpression::If(a, b, c) => { - let a = self.expression_evaluator(a); - let b = self.expression_evaluator(b); - let c = self.expression_evaluator(c); + let a = self.expression_evaluator(a, stat_children); + let b = self.expression_evaluator(b, stat_children); + let c = self.expression_evaluator(c, stat_children); Rc::new(move |tuple| { if to_bool(&a(tuple)?)? { b(tuple) @@ -1742,9 +1774,9 @@ impl SimpleEvaluator { }) } PlanExpression::StrLang(lexical_form, lang_tag) => { - let lexical_form = self.expression_evaluator(lexical_form); - let lang_tag = self.expression_evaluator(lang_tag); - let dataset = self.dataset.clone(); + let lexical_form = self.expression_evaluator(lexical_form, stat_children); + let lang_tag = self.expression_evaluator(lang_tag, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { Some(build_lang_string_literal_from_id( to_simple_string_id(&lexical_form(tuple)?)?, @@ -1753,9 +1785,9 @@ impl SimpleEvaluator { }) } PlanExpression::StrDt(lexical_form, datatype) => { - let lexical_form = self.expression_evaluator(lexical_form); - let datatype = self.expression_evaluator(datatype); - let dataset = self.dataset.clone(); + let lexical_form = self.expression_evaluator(lexical_form, stat_children); + let datatype = self.expression_evaluator(datatype, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let value = to_simple_string(&dataset, &lexical_form(tuple)?)?; let datatype = if let EncodedTerm::NamedNode { iri_id } = datatype(tuple)? { @@ -1770,24 +1802,24 @@ impl SimpleEvaluator { }) } PlanExpression::SameTerm(a, b) => { - let a = self.expression_evaluator(a); - let b = self.expression_evaluator(b); + let a = self.expression_evaluator(a, stat_children); + let b = self.expression_evaluator(b, stat_children); Rc::new(move |tuple| Some((a(tuple)? == b(tuple)?).into())) } PlanExpression::IsIri(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| Some(e(tuple)?.is_named_node().into())) } PlanExpression::IsBlank(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| Some(e(tuple)?.is_blank_node().into())) } PlanExpression::IsLiteral(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| Some(e(tuple)?.is_literal().into())) } PlanExpression::IsNumeric(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| { Some( matches!( @@ -1802,8 +1834,8 @@ impl SimpleEvaluator { }) } PlanExpression::StaticRegex(text, regex) => { - let text = self.expression_evaluator(text); - let dataset = self.dataset.clone(); + let text = self.expression_evaluator(text, stat_children); + let dataset = Rc::clone(&self.dataset); let regex = regex.clone(); Rc::new(move |tuple| { let text = to_string(&dataset, &text(tuple)?)?; @@ -1811,10 +1843,12 @@ impl SimpleEvaluator { }) } PlanExpression::DynamicRegex(text, pattern, flags) => { - let text = self.expression_evaluator(text); - let pattern = self.expression_evaluator(pattern); - let flags = flags.as_ref().map(|flags| self.expression_evaluator(flags)); - let dataset = self.dataset.clone(); + let text = self.expression_evaluator(text, stat_children); + let pattern = self.expression_evaluator(pattern, stat_children); + let flags = flags + .as_ref() + .map(|flags| self.expression_evaluator(flags, stat_children)); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let pattern = to_simple_string(&dataset, &pattern(tuple)?)?; let options = if let Some(flags) = &flags { @@ -1828,26 +1862,22 @@ impl SimpleEvaluator { }) } PlanExpression::Triple(s, p, o) => { - let s = self.expression_evaluator(s); - let p = self.expression_evaluator(p); - let o = self.expression_evaluator(o); + let s = self.expression_evaluator(s, stat_children); + let p = self.expression_evaluator(p, stat_children); + let o = self.expression_evaluator(o, stat_children); Rc::new(move |tuple| { let s = s(tuple)?; let p = p(tuple)?; let o = o(tuple)?; - if !s.is_literal() + (!s.is_literal() && !s.is_default_graph() && p.is_named_node() - && !o.is_default_graph() - { - Some(EncodedTriple::new(s, p, o).into()) - } else { - None - } + && !o.is_default_graph()) + .then(|| EncodedTriple::new(s, p, o).into()) }) } PlanExpression::Subject(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| { if let EncodedTerm::Triple(t) = e(tuple)? { Some(t.subject.clone()) @@ -1857,7 +1887,7 @@ impl SimpleEvaluator { }) } PlanExpression::Predicate(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| { if let EncodedTerm::Triple(t) = e(tuple)? { Some(t.predicate.clone()) @@ -1867,7 +1897,7 @@ impl SimpleEvaluator { }) } PlanExpression::Object(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| { if let EncodedTerm::Triple(t) = e(tuple)? { Some(t.object.clone()) @@ -1877,11 +1907,11 @@ impl SimpleEvaluator { }) } PlanExpression::IsTriple(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| Some(e(tuple)?.is_triple().into())) } PlanExpression::BooleanCast(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::BooleanLiteral(value) => Some(value.into()), EncodedTerm::FloatLiteral(value) => Some(Boolean::from(value).into()), @@ -1893,8 +1923,8 @@ impl SimpleEvaluator { }) } PlanExpression::DoubleCast(e) => { - let e = self.expression_evaluator(e); - let dataset = self.dataset.clone(); + let e = self.expression_evaluator(e, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::FloatLiteral(value) => Some(Double::from(value).into()), EncodedTerm::DoubleLiteral(value) => Some(value.into()), @@ -1909,8 +1939,8 @@ impl SimpleEvaluator { }) } PlanExpression::FloatCast(e) => { - let e = self.expression_evaluator(e); - let dataset = self.dataset.clone(); + let e = self.expression_evaluator(e, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::FloatLiteral(value) => Some(value.into()), EncodedTerm::DoubleLiteral(value) => Some(Float::from(value).into()), @@ -1925,8 +1955,8 @@ impl SimpleEvaluator { }) } PlanExpression::IntegerCast(e) => { - let e = self.expression_evaluator(e); - let dataset = self.dataset.clone(); + let e = self.expression_evaluator(e, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::FloatLiteral(value) => Some(Integer::try_from(value).ok()?.into()), EncodedTerm::DoubleLiteral(value) => { @@ -1945,8 +1975,8 @@ impl SimpleEvaluator { }) } PlanExpression::DecimalCast(e) => { - let e = self.expression_evaluator(e); - let dataset = self.dataset.clone(); + let e = self.expression_evaluator(e, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::FloatLiteral(value) => Some(Decimal::try_from(value).ok()?.into()), EncodedTerm::DoubleLiteral(value) => { @@ -1965,8 +1995,8 @@ impl SimpleEvaluator { }) } PlanExpression::DateCast(e) => { - let e = self.expression_evaluator(e); - let dataset = self.dataset.clone(); + let e = self.expression_evaluator(e, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::DateLiteral(value) => Some(value.into()), EncodedTerm::DateTimeLiteral(value) => Some(Date::try_from(value).ok()?.into()), @@ -1978,8 +2008,8 @@ impl SimpleEvaluator { }) } PlanExpression::TimeCast(e) => { - let e = self.expression_evaluator(e); - let dataset = self.dataset.clone(); + let e = self.expression_evaluator(e, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::TimeLiteral(value) => Some(value.into()), EncodedTerm::DateTimeLiteral(value) => Some(Time::try_from(value).ok()?.into()), @@ -1991,8 +2021,8 @@ impl SimpleEvaluator { }) } PlanExpression::DateTimeCast(e) => { - let e = self.expression_evaluator(e); - let dataset = self.dataset.clone(); + let e = self.expression_evaluator(e, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::DateTimeLiteral(value) => Some(value.into()), EncodedTerm::DateLiteral(value) => Some(DateTime::try_from(value).ok()?.into()), @@ -2004,8 +2034,8 @@ impl SimpleEvaluator { }) } PlanExpression::DurationCast(e) => { - let e = self.expression_evaluator(e); - let dataset = self.dataset.clone(); + let e = self.expression_evaluator(e, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::DurationLiteral(value) => Some(value.into()), EncodedTerm::YearMonthDurationLiteral(value) => { @@ -2022,8 +2052,8 @@ impl SimpleEvaluator { }) } PlanExpression::YearMonthDurationCast(e) => { - let e = self.expression_evaluator(e); - let dataset = self.dataset.clone(); + let e = self.expression_evaluator(e, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::DurationLiteral(value) => { Some(YearMonthDuration::try_from(value).ok()?.into()) @@ -2037,8 +2067,8 @@ impl SimpleEvaluator { }) } PlanExpression::DayTimeDurationCast(e) => { - let e = self.expression_evaluator(e); - let dataset = self.dataset.clone(); + let e = self.expression_evaluator(e, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::DurationLiteral(value) => { Some(DayTimeDuration::try_from(value).ok()?.into()) @@ -2055,9 +2085,9 @@ impl SimpleEvaluator { if let Some(function) = self.custom_functions.get(function_name).cloned() { let args = args .iter() - .map(|e| self.expression_evaluator(e)) + .map(|e| self.expression_evaluator(e, stat_children)) .collect::>(); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let args = args .iter() @@ -2075,9 +2105,10 @@ impl SimpleEvaluator { fn hash( &self, arg: &PlanExpression, + stat_children: &mut Vec>, ) -> Rc Option> { - let arg = self.expression_evaluator(arg); - let dataset = self.dataset.clone(); + let arg = self.expression_evaluator(arg, stat_children); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let input = to_simple_string(&dataset, &arg(tuple)?)?; let hash = hex::encode(H::new().chain_update(input.as_str()).finalize()); @@ -2290,11 +2321,7 @@ fn to_argument_compatible_strings( ) -> Option<(String, String, Option)> { let (value1, language1) = to_string_and_language(dataset, arg1)?; let (value2, language2) = to_string_and_language(dataset, arg2)?; - if language2.is_none() || language1 == language2 { - Some((value1, value2, language1)) - } else { - None - } + (language2.is_none() || language1 == language2).then(|| (value1, value2, language1)) } pub(super) fn compile_pattern(pattern: &str, flags: Option<&str>) -> Option { @@ -2322,6 +2349,7 @@ pub(super) fn compile_pattern(pattern: &str, flags: Option<&str>) -> Option, iter: EncodedTuplesIterator, @@ -2345,7 +2373,7 @@ fn decode_bindings( // this is used to encode results from a BindingIterator into an EncodedTuplesIterator. This happens when SERVICE clauses are evaluated fn encode_bindings( dataset: Rc, - variables: Rc>, + variables: Rc<[Variable]>, iter: QuerySolutionIter, ) -> EncodedTuplesIterator { Box::new(iter.map(move |solution| { @@ -2362,11 +2390,6 @@ fn encode_bindings( })) } -#[allow( - clippy::float_cmp, - clippy::cast_possible_truncation, - clippy::cast_precision_loss -)] fn equals(a: &EncodedTerm, b: &EncodedTerm) -> Option { match a { EncodedTerm::DefaultGraph @@ -2634,7 +2657,6 @@ fn partial_cmp(dataset: &DatasetView, a: &EncodedTerm, b: &EncodedTerm) -> Optio } } -#[allow(clippy::cast_precision_loss)] fn partial_cmp_literals( dataset: &DatasetView, a: &EncodedTerm, @@ -2892,7 +2914,6 @@ enum NumericBinaryOperands { } impl NumericBinaryOperands { - #[allow(clippy::cast_precision_loss)] fn new(a: EncodedTerm, b: EncodedTerm) -> Option { match (a, b) { (EncodedTerm::FloatLiteral(v1), EncodedTerm::FloatLiteral(v2)) => { @@ -3059,20 +3080,10 @@ fn put_pattern_value( tuple: &mut EncodedTuple, ) -> Option<()> { match selector { - TupleSelector::Constant(c) => { - if *c == value { - Some(()) - } else { - None - } - } + TupleSelector::Constant(c) => (*c == value).then(|| ()), TupleSelector::Variable(v) => { if let Some(old) = tuple.get(*v) { - if value == *old { - Some(()) - } else { - None - } + (value == *old).then(|| ()) } else { tuple.set(*v, value); Some(()) @@ -3148,11 +3159,9 @@ impl PathEvaluator { .find_map(|middle| { middle .and_then(|middle| { - Ok(if self.eval_closed_in_graph(b, &middle, end, graph_name)? { - Some(()) - } else { - None - }) + Ok(self + .eval_closed_in_graph(b, &middle, end, graph_name)? + .then(|| ())) }) .transpose() }) @@ -3218,12 +3227,12 @@ impl PathEvaluator { PlanPropertyPath::Reverse(p) => self.eval_closed_in_unknown_graph(p, end, start), PlanPropertyPath::Sequence(a, b) => { let eval = self.clone(); - let b = b.clone(); + let b = Rc::clone(b); let end = end.clone(); Box::new(self.eval_from_in_unknown_graph(a, start).flat_map_ok( move |(middle, graph_name)| { eval.eval_closed_in_graph(&b, &middle, &end, &graph_name) - .map(|is_found| if is_found { Some(graph_name) } else { None }) + .map(|is_found| is_found.then(|| graph_name)) .transpose() }, )) @@ -3236,21 +3245,21 @@ impl PathEvaluator { let eval = self.clone(); let start2 = start.clone(); let end = end.clone(); - let p = p.clone(); + let p = Rc::clone(p); self.run_if_term_is_a_dataset_node(start, move |graph_name| { look_in_transitive_closure( Some(Ok(start2.clone())), |e| eval.eval_from_in_graph(&p, &e, &graph_name), &end, ) - .map(|is_found| if is_found { Some(graph_name) } else { None }) + .map(|is_found| is_found.then(|| graph_name)) .transpose() }) } PlanPropertyPath::OneOrMore(p) => { let eval = self.clone(); let end = end.clone(); - let p = p.clone(); + let p = Rc::clone(p); Box::new( self.eval_from_in_unknown_graph(&p, start) .filter_map(move |r| { @@ -3260,13 +3269,7 @@ impl PathEvaluator { |e| eval.eval_from_in_graph(&p, &e, &graph_name), &end, ) - .map(|is_found| { - if is_found { - Some(graph_name) - } else { - None - } - }) + .map(|is_found| is_found.then(|| graph_name)) }) .transpose() }), @@ -3279,16 +3282,16 @@ impl PathEvaluator { let eval = self.clone(); let start2 = start.clone(); let end = end.clone(); - let p = p.clone(); + let p = Rc::clone(p); self.run_if_term_is_a_dataset_node(start, move |graph_name| { eval.eval_closed_in_graph(&p, &start2, &end, &graph_name) - .map(|is_found| if is_found { Some(graph_name) } else { None }) + .map(|is_found| is_found.then(|| graph_name)) .transpose() }) } } PlanPropertyPath::NegatedPropertySet(ps) => { - let ps = ps.clone(); + let ps = Rc::clone(ps); Box::new( self.dataset .encoded_quads_for_pattern(Some(start), None, Some(end), None) @@ -3327,7 +3330,7 @@ impl PathEvaluator { PlanPropertyPath::Reverse(p) => self.eval_to_in_graph(p, start, graph_name), PlanPropertyPath::Sequence(a, b) => { let eval = self.clone(); - let b = b.clone(); + let b = Rc::clone(b); let graph_name2 = graph_name.clone(); Box::new( self.eval_from_in_graph(a, start, graph_name) @@ -3343,7 +3346,7 @@ impl PathEvaluator { PlanPropertyPath::ZeroOrMore(p) => { self.run_if_term_is_a_graph_node(start, graph_name, || { let eval = self.clone(); - let p = p.clone(); + let p = Rc::clone(p); let graph_name2 = graph_name.clone(); transitive_closure(Some(Ok(start.clone())), move |e| { eval.eval_from_in_graph(&p, &e, &graph_name2) @@ -3352,7 +3355,7 @@ impl PathEvaluator { } PlanPropertyPath::OneOrMore(p) => { let eval = self.clone(); - let p = p.clone(); + let p = Rc::clone(p); let graph_name2 = graph_name.clone(); Box::new(transitive_closure( self.eval_from_in_graph(&p, start, graph_name), @@ -3368,7 +3371,7 @@ impl PathEvaluator { }) } PlanPropertyPath::NegatedPropertySet(ps) => { - let ps = ps.clone(); + let ps = Rc::clone(ps); Box::new( self.dataset .encoded_quads_for_pattern(Some(start), None, None, Some(graph_name)) @@ -3404,7 +3407,7 @@ impl PathEvaluator { PlanPropertyPath::Reverse(p) => self.eval_to_in_unknown_graph(p, start), PlanPropertyPath::Sequence(a, b) => { let eval = self.clone(); - let b = b.clone(); + let b = Rc::clone(b); Box::new(self.eval_from_in_unknown_graph(a, start).flat_map_ok( move |(middle, graph_name)| { eval.eval_from_in_graph(&b, &middle, &graph_name) @@ -3419,10 +3422,10 @@ impl PathEvaluator { PlanPropertyPath::ZeroOrMore(p) => { let start2 = start.clone(); let eval = self.clone(); - let p = p.clone(); + let p = Rc::clone(p); self.run_if_term_is_a_dataset_node(start, move |graph_name| { let eval = eval.clone(); - let p = p.clone(); + let p = Rc::clone(&p); let graph_name2 = graph_name.clone(); transitive_closure(Some(Ok(start2.clone())), move |e| { eval.eval_from_in_graph(&p, &e, &graph_name2) @@ -3432,7 +3435,7 @@ impl PathEvaluator { } PlanPropertyPath::OneOrMore(p) => { let eval = self.clone(); - let p = p.clone(); + let p = Rc::clone(p); Box::new(transitive_closure( self.eval_from_in_unknown_graph(&p, start), move |(e, graph_name)| { @@ -3444,7 +3447,7 @@ impl PathEvaluator { PlanPropertyPath::ZeroOrOne(p) => { let eval = self.clone(); let start2 = start.clone(); - let p = p.clone(); + let p = Rc::clone(p); self.run_if_term_is_a_dataset_node(start, move |graph_name| { hash_deduplicate(once(Ok(start2.clone())).chain(eval.eval_from_in_graph( &p, @@ -3455,7 +3458,7 @@ impl PathEvaluator { }) } PlanPropertyPath::NegatedPropertySet(ps) => { - let ps = ps.clone(); + let ps = Rc::clone(ps); Box::new( self.dataset .encoded_quads_for_pattern(Some(start), None, None, None) @@ -3489,7 +3492,7 @@ impl PathEvaluator { PlanPropertyPath::Reverse(p) => self.eval_from_in_graph(p, end, graph_name), PlanPropertyPath::Sequence(a, b) => { let eval = self.clone(); - let a = a.clone(); + let a = Rc::clone(a); let graph_name2 = graph_name.clone(); Box::new( self.eval_to_in_graph(b, end, graph_name) @@ -3505,7 +3508,7 @@ impl PathEvaluator { PlanPropertyPath::ZeroOrMore(p) => { self.run_if_term_is_a_graph_node(end, graph_name, || { let eval = self.clone(); - let p = p.clone(); + let p = Rc::clone(p); let graph_name2 = graph_name.clone(); transitive_closure(Some(Ok(end.clone())), move |e| { eval.eval_to_in_graph(&p, &e, &graph_name2) @@ -3514,7 +3517,7 @@ impl PathEvaluator { } PlanPropertyPath::OneOrMore(p) => { let eval = self.clone(); - let p = p.clone(); + let p = Rc::clone(p); let graph_name2 = graph_name.clone(); Box::new(transitive_closure( self.eval_to_in_graph(&p, end, graph_name), @@ -3529,7 +3532,7 @@ impl PathEvaluator { }) } PlanPropertyPath::NegatedPropertySet(ps) => { - let ps = ps.clone(); + let ps = Rc::clone(ps); Box::new( self.dataset .encoded_quads_for_pattern(None, None, Some(end), Some(graph_name)) @@ -3564,7 +3567,7 @@ impl PathEvaluator { PlanPropertyPath::Reverse(p) => self.eval_from_in_unknown_graph(p, end), PlanPropertyPath::Sequence(a, b) => { let eval = self.clone(); - let a = a.clone(); + let a = Rc::clone(a); Box::new(self.eval_to_in_unknown_graph(b, end).flat_map_ok( move |(middle, graph_name)| { eval.eval_from_in_graph(&a, &middle, &graph_name) @@ -3579,10 +3582,10 @@ impl PathEvaluator { PlanPropertyPath::ZeroOrMore(p) => { let end2 = end.clone(); let eval = self.clone(); - let p = p.clone(); + let p = Rc::clone(p); self.run_if_term_is_a_dataset_node(end, move |graph_name| { let eval = eval.clone(); - let p = p.clone(); + let p = Rc::clone(&p); let graph_name2 = graph_name.clone(); transitive_closure(Some(Ok(end2.clone())), move |e| { eval.eval_to_in_graph(&p, &e, &graph_name2) @@ -3592,7 +3595,7 @@ impl PathEvaluator { } PlanPropertyPath::OneOrMore(p) => { let eval = self.clone(); - let p = p.clone(); + let p = Rc::clone(p); Box::new(transitive_closure( self.eval_to_in_unknown_graph(&p, end), move |(e, graph_name)| { @@ -3604,7 +3607,7 @@ impl PathEvaluator { PlanPropertyPath::ZeroOrOne(p) => { let eval = self.clone(); let end2 = end.clone(); - let p = p.clone(); + let p = Rc::clone(p); self.run_if_term_is_a_dataset_node(end, move |graph_name| { hash_deduplicate(once(Ok(end2.clone())).chain(eval.eval_to_in_graph( &p, @@ -3615,7 +3618,7 @@ impl PathEvaluator { }) } PlanPropertyPath::NegatedPropertySet(ps) => { - let ps = ps.clone(); + let ps = Rc::clone(ps); Box::new( self.dataset .encoded_quads_for_pattern(Some(end), None, None, None) @@ -3651,7 +3654,7 @@ impl PathEvaluator { ), PlanPropertyPath::Sequence(a, b) => { let eval = self.clone(); - let b = b.clone(); + let b = Rc::clone(b); let graph_name2 = graph_name.clone(); Box::new(self.eval_open_in_graph(a, graph_name).flat_map_ok( move |(start, middle)| { @@ -3666,7 +3669,7 @@ impl PathEvaluator { )), PlanPropertyPath::ZeroOrMore(p) => { let eval = self.clone(); - let p = p.clone(); + let p = Rc::clone(p); let graph_name2 = graph_name.clone(); Box::new(transitive_closure( self.get_subject_or_object_identity_pairs_in_graph(graph_name), @@ -3678,7 +3681,7 @@ impl PathEvaluator { } PlanPropertyPath::OneOrMore(p) => { let eval = self.clone(); - let p = p.clone(); + let p = Rc::clone(p); let graph_name2 = graph_name.clone(); Box::new(transitive_closure( self.eval_open_in_graph(&p, graph_name), @@ -3693,7 +3696,7 @@ impl PathEvaluator { .chain(self.eval_open_in_graph(p, graph_name)), )), PlanPropertyPath::NegatedPropertySet(ps) => { - let ps = ps.clone(); + let ps = Rc::clone(ps); Box::new( self.dataset .encoded_quads_for_pattern(None, None, None, Some(graph_name)) @@ -3729,7 +3732,7 @@ impl PathEvaluator { ), PlanPropertyPath::Sequence(a, b) => { let eval = self.clone(); - let b = b.clone(); + let b = Rc::clone(b); Box::new(self.eval_open_in_unknown_graph(a).flat_map_ok( move |(start, middle, graph_name)| { eval.eval_from_in_graph(&b, &middle, &graph_name) @@ -3743,7 +3746,7 @@ impl PathEvaluator { )), PlanPropertyPath::ZeroOrMore(p) => { let eval = self.clone(); - let p = p.clone(); + let p = Rc::clone(p); Box::new(transitive_closure( self.get_subject_or_object_identity_pairs_in_dataset(), move |(start, middle, graph_name)| { @@ -3754,7 +3757,7 @@ impl PathEvaluator { } PlanPropertyPath::OneOrMore(p) => { let eval = self.clone(); - let p = p.clone(); + let p = Rc::clone(p); Box::new(transitive_closure( self.eval_open_in_unknown_graph(&p), move |(start, middle, graph_name)| { @@ -3768,7 +3771,7 @@ impl PathEvaluator { .chain(self.eval_open_in_unknown_graph(p)), )), PlanPropertyPath::NegatedPropertySet(ps) => { - let ps = ps.clone(); + let ps = Rc::clone(ps); Box::new( self.dataset .encoded_quads_for_pattern(None, None, None, None) @@ -4035,9 +4038,9 @@ struct BadForLoopLeftJoinIterator { from_tuple: EncodedTuple, right_evaluator: Rc EncodedTuplesIterator>, left_iter: EncodedTuplesIterator, - current_left: Option, + current_left: EncodedTuple, current_right: EncodedTuplesIterator, - problem_vars: Rc>, + problem_vars: Rc<[usize]>, } impl Iterator for BadForLoopLeftJoinIterator { @@ -4047,9 +4050,7 @@ impl Iterator for BadForLoopLeftJoinIterator { for right_tuple in &mut self.current_right { match right_tuple { Ok(right_tuple) => { - if let Some(combined) = - right_tuple.combine_with(self.current_left.as_ref().unwrap()) - { + if let Some(combined) = right_tuple.combine_with(&self.current_left) { return Some(Ok(combined)); } } @@ -4071,7 +4072,7 @@ impl Iterator for BadForLoopLeftJoinIterator { match right_tuple { Ok(right_tuple) => { if let Some(combined) = right_tuple.combine_with(&left_tuple) { - self.current_left = Some(left_tuple); + self.current_left = left_tuple; return Some(Ok(combined)); } } @@ -4255,8 +4256,8 @@ impl Iterator for DescribeIterator { .eval .dataset .decode_quad(&quad) - .map(|q| q.into()) - .map_err(|e| e.into()), + .map(Into::into) + .map_err(Into::into), Err(error) => Err(error), }); } @@ -4507,9 +4508,15 @@ impl Accumulator for SumAccumulator { self.sum = match operands { NumericBinaryOperands::Float(v1, v2) => Some((v1 + v2).into()), NumericBinaryOperands::Double(v1, v2) => Some((v1 + v2).into()), - NumericBinaryOperands::Integer(v1, v2) => v1.checked_add(v2).map(|v| v.into()), - NumericBinaryOperands::Decimal(v1, v2) => v1.checked_add(v2).map(|v| v.into()), - NumericBinaryOperands::Duration(v1, v2) => v1.checked_add(v2).map(|v| v.into()), + NumericBinaryOperands::Integer(v1, v2) => v1.checked_add(v2).map(Into::into), + NumericBinaryOperands::Decimal(v1, v2) => v1.checked_add(v2).map(Into::into), + NumericBinaryOperands::Duration(v1, v2) => v1.checked_add(v2).map(Into::into), + NumericBinaryOperands::YearMonthDuration(v1, v2) => { + v1.checked_add(v2).map(Into::into) + } + NumericBinaryOperands::DayTimeDuration(v1, v2) => { + v1.checked_add(v2).map(Into::into) + } _ => None, }; } else { @@ -4547,9 +4554,9 @@ impl Accumulator for AvgAccumulator { NumericBinaryOperands::Float(v1, v2) => Some((v1 / v2).into()), NumericBinaryOperands::Double(v1, v2) => Some((v1 / v2).into()), NumericBinaryOperands::Integer(v1, v2) => { - Decimal::from(v1).checked_div(v2).map(|v| v.into()) + Decimal::from(v1).checked_div(v2).map(Into::into) } - NumericBinaryOperands::Decimal(v1, v2) => v1.checked_div(v2).map(|v| v.into()), + NumericBinaryOperands::Decimal(v1, v2) => v1.checked_div(v2).map(Into::into), _ => None, } } @@ -4634,14 +4641,14 @@ struct GroupConcatAccumulator { dataset: Rc, concat: Option, language: Option>, - separator: Rc, + separator: Rc, } impl GroupConcatAccumulator { - fn new(dataset: Rc, separator: Rc) -> Self { + fn new(dataset: Rc, separator: Rc) -> Self { Self { dataset, - concat: Some("".to_owned()), + concat: Some(String::new()), language: None, separator, } diff --git a/lib/src/sparql/model.rs b/lib/src/sparql/model.rs index 70cad955..d7c69ca7 100644 --- a/lib/src/sparql/model.rs +++ b/lib/src/sparql/model.rs @@ -160,6 +160,7 @@ impl From> for QueryResults { /// } /// # Result::<_,Box>::Ok(()) /// ``` +#[allow(clippy::rc_buffer)] pub struct QuerySolutionIter { variables: Rc>, iter: Box>>, @@ -171,8 +172,10 @@ impl QuerySolutionIter { iter: impl Iterator>, EvaluationError>> + 'static, ) -> Self { Self { - variables: variables.clone(), - iter: Box::new(iter.map(move |t| t.map(|values| (variables.clone(), values).into()))), + variables: Rc::clone(&variables), + iter: Box::new( + iter.map(move |t| t.map(|values| (Rc::clone(&variables), values).into())), + ), } } diff --git a/lib/src/sparql/plan.rs b/lib/src/sparql/plan.rs index bdd4b9e2..88469433 100644 --- a/lib/src/sparql/plan.rs +++ b/lib/src/sparql/plan.rs @@ -13,7 +13,7 @@ use std::rc::Rc; use std::time::Duration; use std::{fmt, io}; -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum PlanNode { StaticBindings { encoded_tuples: Vec, @@ -22,7 +22,7 @@ pub enum PlanNode { }, Service { service_name: PatternValue, - variables: Rc>, + variables: Rc<[Variable]>, child: Rc, graph_pattern: Rc, silent: bool, @@ -71,7 +71,7 @@ pub enum PlanNode { ForLoopLeftJoin { left: Rc, right: Rc, - possible_problem_vars: Rc>, //Variables that should not be part of the entry of the left join + possible_problem_vars: Rc<[usize]>, //Variables that should not be part of the entry of the left join }, Extend { child: Rc, @@ -99,13 +99,13 @@ pub enum PlanNode { }, Project { child: Rc, - mapping: Rc>, // pairs of (variable key in child, variable key in output) + mapping: Rc<[(PlanVariable, PlanVariable)]>, // pairs of (variable key in child, variable key in output) }, Aggregate { // By definition the group by key are the range 0..key_mapping.len() child: Rc, - key_variables: Rc>, - aggregates: Rc>, + key_variables: Rc<[PlanVariable]>, + aggregates: Rc<[(PlanAggregation, PlanVariable)]>, }, } @@ -236,7 +236,10 @@ impl PlanNode { match self { Self::StaticBindings { encoded_tuples, .. } => { let mut variables = BTreeMap::default(); // value true iff always bound - let max_tuple_length = encoded_tuples.iter().map(|t| t.capacity()).fold(0, max); + let max_tuple_length = encoded_tuples + .iter() + .map(EncodedTuple::capacity) + .fold(0, max); for tuple in encoded_tuples { for key in 0..max_tuple_length { match variables.entry(key) { @@ -444,8 +447,8 @@ pub enum PlanExpression { Literal(PlanTerm), Variable(PlanVariable), Exists(Rc), - Or(Box, Box), - And(Box, Box), + Or(Vec), + And(Vec), Equal(Box, Box), Greater(Box, Box), GreaterOrEqual(Box, Box), @@ -594,9 +597,7 @@ impl PlanExpression { | Self::YearMonthDurationCast(e) | Self::DayTimeDurationCast(e) | Self::StringCast(e) => e.lookup_used_variables(callback), - Self::Or(a, b) - | Self::And(a, b) - | Self::Equal(a, b) + Self::Equal(a, b) | Self::Greater(a, b) | Self::GreaterOrEqual(a, b) | Self::Less(a, b) @@ -636,7 +637,11 @@ impl PlanExpression { c.lookup_used_variables(callback); d.lookup_used_variables(callback); } - Self::Concat(es) | Self::Coalesce(es) | Self::CustomFunction(_, es) => { + Self::Or(es) + | Self::And(es) + | Self::Concat(es) + | Self::Coalesce(es) + | Self::CustomFunction(_, es) => { for e in es { e.lookup_used_variables(callback); } @@ -649,6 +654,7 @@ impl PlanExpression { } impl fmt::Display for PlanExpression { + #[allow(clippy::many_single_char_names)] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Variable(v) => { @@ -719,8 +725,26 @@ impl fmt::Display for PlanExpression { Self::YearMonthDurationCast(e) => write!(f, "YearMonthDurationCast({e})"), Self::DayTimeDurationCast(e) => write!(f, "DayTimeDurationCast({e})"), Self::StringCast(e) => write!(f, "StringCast({e})"), - Self::Or(a, b) => write!(f, "Or({a}, {b})"), - Self::And(a, b) => write!(f, "And({a}, {b})"), + Self::Or(es) => { + write!(f, "Or(")?; + for (i, e) in es.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{e}")?; + } + write!(f, ")") + } + Self::And(es) => { + write!(f, "And(")?; + for (i, e) in es.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{e}")?; + } + write!(f, ")") + } Self::Equal(a, b) => write!(f, "Equal({a}, {b})"), Self::Greater(a, b) => write!(f, "Greater({a}, {b})"), Self::GreaterOrEqual(a, b) => write!(f, "GreaterOrEqual({a}, {b})"), @@ -838,7 +862,7 @@ pub enum PlanAggregationFunction { Max, Avg, Sample, - GroupConcat { separator: Rc }, + GroupConcat { separator: Rc }, } #[derive(Debug, Clone)] @@ -850,7 +874,7 @@ pub enum PlanPropertyPath { ZeroOrMore(Rc), OneOrMore(Rc), ZeroOrOne(Rc), - NegatedPropertySet(Rc>>), + NegatedPropertySet(Rc<[PlanTerm]>), } impl fmt::Display for PlanPropertyPath { @@ -1046,7 +1070,7 @@ impl PlanNodeWithStats { "Aggregate({})", key_variables .iter() - .map(|c| c.to_string()) + .map(ToString::to_string) .chain(aggregates.iter().map(|(agg, v)| format!("{agg} -> {v}"))) .collect::>() .join(", ") @@ -1107,7 +1131,7 @@ impl PlanNodeWithStats { format!( "Sort({})", by.iter() - .map(|c| c.to_string()) + .map(ToString::to_string) .collect::>() .join(", ") ) @@ -1117,7 +1141,7 @@ impl PlanNodeWithStats { "StaticBindings({})", variables .iter() - .map(|v| v.to_string()) + .map(ToString::to_string) .collect::>() .join(", ") ) diff --git a/lib/src/sparql/plan_builder.rs b/lib/src/sparql/plan_builder.rs index 0b152db1..5e7370c1 100644 --- a/lib/src/sparql/plan_builder.rs +++ b/lib/src/sparql/plan_builder.rs @@ -105,10 +105,9 @@ impl<'a> PlanBuilder<'a> { let left = self.build_for_graph_pattern(left, variables, graph_name)?; let right = self.build_for_graph_pattern(right, variables, graph_name)?; - let mut possible_problem_vars = BTreeSet::new(); - Self::add_left_join_problematic_variables(&right, &mut possible_problem_vars); - if self.with_optimizations { - // TODO: don't use if SERVICE is inside of for loop + if self.with_optimizations && Self::can_use_for_loop_left_join(&right) { + let mut possible_problem_vars = BTreeSet::new(); + Self::add_left_join_problematic_variables(&right, &mut possible_problem_vars); //We add the extra filter if needed let right = if let Some(expr) = expression { @@ -122,7 +121,7 @@ impl<'a> PlanBuilder<'a> { PlanNode::ForLoopLeftJoin { left: Rc::new(left), right: Rc::new(right), - possible_problem_vars: Rc::new(possible_problem_vars.into_iter().collect()), + possible_problem_vars: possible_problem_vars.into_iter().collect(), } } else { PlanNode::HashLeftJoin { @@ -191,7 +190,7 @@ impl<'a> PlanBuilder<'a> { let service_name = self.pattern_value_from_named_node_or_variable(name, variables); PlanNode::Service { service_name, - variables: Rc::new(variables.clone()), + variables: Rc::from(variables.as_slice()), child: Rc::new(child), graph_pattern: Rc::new(inner.as_ref().clone()), silent: *silent, @@ -203,22 +202,19 @@ impl<'a> PlanBuilder<'a> { aggregates, } => PlanNode::Aggregate { child: Rc::new(self.build_for_graph_pattern(inner, variables, graph_name)?), - key_variables: Rc::new( - by.iter() - .map(|k| build_plan_variable(variables, k)) - .collect(), - ), - aggregates: Rc::new( - aggregates - .iter() - .map(|(v, a)| { - Ok(( - self.build_for_aggregate(a, variables, graph_name)?, - build_plan_variable(variables, v), - )) - }) - .collect::, EvaluationError>>()?, - ), + key_variables: by + .iter() + .map(|k| build_plan_variable(variables, k)) + .collect(), + aggregates: aggregates + .iter() + .map(|(v, a)| { + Ok(( + self.build_for_aggregate(a, variables, graph_name)?, + build_plan_variable(variables, v), + )) + }) + .collect::>()?, }, GraphPattern::Values { variables: table_variables, @@ -283,21 +279,19 @@ impl<'a> PlanBuilder<'a> { &mut inner_variables, &inner_graph_name, )?), - mapping: Rc::new( - projection - .iter() - .enumerate() - .map(|(new_variable, variable)| { - ( - PlanVariable { - encoded: new_variable, - plain: variable.clone(), - }, - build_plan_variable(variables, variable), - ) - }) - .collect(), - ), + mapping: projection + .iter() + .enumerate() + .map(|(new_variable, variable)| { + ( + PlanVariable { + encoded: new_variable, + plain: variable.clone(), + }, + build_plan_variable(variables, variable), + ) + }) + .collect(), } } GraphPattern::Distinct { inner } => PlanNode::HashDeduplicate { @@ -378,16 +372,14 @@ impl<'a> PlanBuilder<'a> { PropertyPathExpression::ZeroOrOne(p) => { PlanPropertyPath::ZeroOrOne(Rc::new(self.build_for_path(p))) } - PropertyPathExpression::NegatedPropertySet(p) => { - PlanPropertyPath::NegatedPropertySet(Rc::new( - p.iter() - .map(|p| PlanTerm { - encoded: self.build_term(p), - plain: p.clone(), - }) - .collect(), - )) - } + PropertyPathExpression::NegatedPropertySet(p) => PlanPropertyPath::NegatedPropertySet( + p.iter() + .map(|p| PlanTerm { + encoded: self.build_term(p), + plain: p.clone(), + }) + .collect(), + ), } } @@ -407,14 +399,14 @@ impl<'a> PlanBuilder<'a> { plain: l.clone(), }), Expression::Variable(v) => PlanExpression::Variable(build_plan_variable(variables, v)), - Expression::Or(a, b) => PlanExpression::Or( - Box::new(self.build_for_expression(a, variables, graph_name)?), - Box::new(self.build_for_expression(b, variables, graph_name)?), - ), - Expression::And(a, b) => PlanExpression::And( - Box::new(self.build_for_expression(a, variables, graph_name)?), - Box::new(self.build_for_expression(b, variables, graph_name)?), - ), + Expression::Or(a, b) => PlanExpression::Or(vec![ + self.build_for_expression(a, variables, graph_name)?, + self.build_for_expression(b, variables, graph_name)?, + ]), + Expression::And(a, b) => PlanExpression::And(vec![ + self.build_for_expression(a, variables, graph_name)?, + self.build_for_expression(b, variables, graph_name)?, + ]), Expression::Equal(a, b) => PlanExpression::Equal( Box::new(self.build_for_expression(a, variables, graph_name)?), Box::new(self.build_for_expression(b, variables, graph_name)?), @@ -441,22 +433,31 @@ impl<'a> PlanBuilder<'a> { ), Expression::In(e, l) => { let e = self.build_for_expression(e, variables, graph_name)?; - l.iter() - .map(|v| { - Ok(PlanExpression::Equal( - Box::new(e.clone()), - Box::new(self.build_for_expression(v, variables, graph_name)?), - )) - }) - .reduce(|a: Result<_, EvaluationError>, b| { - Ok(PlanExpression::Or(Box::new(a?), Box::new(b?))) - }) - .unwrap_or_else(|| { - Ok(PlanExpression::Literal(PlanTerm { + if l.is_empty() { + // False except on error + PlanExpression::If( + Box::new(e), + Box::new(PlanExpression::Literal(PlanTerm { encoded: false.into(), plain: false.into(), - })) - })? + })), + Box::new(PlanExpression::Literal(PlanTerm { + encoded: false.into(), + plain: false.into(), + })), + ) + } else { + PlanExpression::Or( + l.iter() + .map(|v| { + Ok(PlanExpression::Equal( + Box::new(e.clone()), + Box::new(self.build_for_expression(v, variables, graph_name)?), + )) + }) + .collect::>()?, + ) + } } Expression::Add(a, b) => PlanExpression::Add( Box::new(self.build_for_expression(a, variables, graph_name)?), @@ -1084,7 +1085,7 @@ impl<'a> PlanBuilder<'a> { separator, } => Ok(PlanAggregation { function: PlanAggregationFunction::GroupConcat { - separator: Rc::new(separator.clone().unwrap_or_else(|| " ".to_owned())), + separator: Rc::from(separator.as_deref().unwrap_or(" ")), }, parameter: Some(self.build_for_expression(expr, variables, graph_name)?), distinct: *distinct, @@ -1219,13 +1220,11 @@ impl<'a> PlanBuilder<'a> { } fn convert_plan_variable(from_variable: &PlanVariable, to: &mut Vec) -> PlanVariable { - let encoded = if let Some(to_id) = to.iter().enumerate().find_map(|(to_id, var)| { - if *var == from_variable.plain { - Some(to_id) - } else { - None - } - }) { + let encoded = if let Some(to_id) = to + .iter() + .enumerate() + .find_map(|(to_id, var)| (*var == from_variable.plain).then(|| to_id)) + { to_id } else { to.push(Variable::new_unchecked(format!("{:x}", random::()))); @@ -1237,6 +1236,35 @@ impl<'a> PlanBuilder<'a> { } } + fn can_use_for_loop_left_join(node: &PlanNode) -> bool { + // We forbid MINUS, SERVICE and everything that affects cardinality in for loop left joins + match node { + PlanNode::StaticBindings { .. } + | PlanNode::QuadPattern { .. } + | PlanNode::PathPattern { .. } => true, + PlanNode::Filter { child, .. } + | PlanNode::Extend { child, .. } + | PlanNode::Sort { child, .. } + | PlanNode::Project { child, .. } + | PlanNode::Aggregate { child, .. } => Self::can_use_for_loop_left_join(child), + PlanNode::Union { children } => { + children.iter().all(|c| Self::can_use_for_loop_left_join(c)) + } + PlanNode::HashJoin { left, right } + | PlanNode::ForLoopJoin { left, right } + | PlanNode::ForLoopLeftJoin { left, right, .. } + | PlanNode::HashLeftJoin { left, right, .. } => { + Self::can_use_for_loop_left_join(left) && Self::can_use_for_loop_left_join(right) + } + PlanNode::AntiJoin { .. } + | PlanNode::Service { .. } + | PlanNode::HashDeduplicate { .. } + | PlanNode::Reduced { .. } + | PlanNode::Skip { .. } + | PlanNode::Limit { .. } => false, + } + } + fn add_left_join_problematic_variables(node: &PlanNode, set: &mut BTreeSet) { match node { PlanNode::StaticBindings { .. } @@ -1299,7 +1327,8 @@ impl<'a> PlanBuilder<'a> { } PlanNode::Sort { child, .. } | PlanNode::HashDeduplicate { child } - | PlanNode::Reduced { child } => { + | PlanNode::Reduced { child } + | PlanNode::Project { child, .. } => { Self::add_left_join_problematic_variables(child, set); } PlanNode::Skip { child, .. } | PlanNode::Limit { child, .. } => { @@ -1317,15 +1346,6 @@ impl<'a> PlanBuilder<'a> { Self::add_left_join_problematic_variables(child, set) } } - PlanNode::Project { mapping, child } => { - let mut child_bound = BTreeSet::new(); - Self::add_left_join_problematic_variables(child, &mut child_bound); - for (child_i, output_i) in mapping.iter() { - if child_bound.contains(&child_i.encoded) { - set.insert(output_i.encoded); - } - } - } PlanNode::Aggregate { key_variables, aggregates, @@ -1341,15 +1361,15 @@ impl<'a> PlanBuilder<'a> { } fn new_join(&self, mut left: PlanNode, mut right: PlanNode) -> PlanNode { + // We first use VALUES to filter the following patterns evaluation + if matches!(right, PlanNode::StaticBindings { .. }) { + swap(&mut left, &mut right); + } + if self.with_optimizations - && Self::is_fit_for_for_loop_join(&left) && Self::is_fit_for_for_loop_join(&right) && Self::has_some_common_variables(&left, &right) { - // We first use VALUES to filter the following patterns evaluation - if matches!(right, PlanNode::StaticBindings { .. }) { - swap(&mut left, &mut right); - } PlanNode::ForLoopJoin { left: Rc::new(left), right: Rc::new(right), @@ -1379,9 +1399,8 @@ impl<'a> PlanBuilder<'a> { match node { PlanNode::StaticBindings { .. } | PlanNode::QuadPattern { .. } - | PlanNode::PathPattern { .. } - | PlanNode::ForLoopJoin { .. } => true, - PlanNode::HashJoin { left, right } => { + | PlanNode::PathPattern { .. } => true, + PlanNode::ForLoopJoin { left, right } | PlanNode::HashJoin { left, right } => { Self::is_fit_for_for_loop_join(left) && Self::is_fit_for_for_loop_join(right) } PlanNode::Filter { child, .. } | PlanNode::Extend { child, .. } => { @@ -1411,8 +1430,12 @@ impl<'a> PlanBuilder<'a> { expression: filter, }; } - if let PlanExpression::And(f1, f2) = *filter { - return self.push_filter(Rc::new(self.push_filter(node, f1)), f2); + if let PlanExpression::And(filters) = *filter { + return filters + .into_iter() + .fold((*node.as_ref()).clone(), |acc, f| { + self.push_filter(Rc::new(acc), Box::new(f)) + }); } let mut filter_variables = BTreeSet::new(); filter.lookup_used_variables(&mut |v| { @@ -1423,25 +1446,25 @@ impl<'a> PlanBuilder<'a> { if filter_variables.iter().all(|v| left.is_variable_bound(*v)) { if filter_variables.iter().all(|v| right.is_variable_bound(*v)) { PlanNode::HashJoin { - left: Rc::new(self.push_filter(left.clone(), filter.clone())), - right: Rc::new(self.push_filter(right.clone(), filter)), + left: Rc::new(self.push_filter(Rc::clone(left), filter.clone())), + right: Rc::new(self.push_filter(Rc::clone(right), filter)), } } else { PlanNode::HashJoin { - left: Rc::new(self.push_filter(left.clone(), filter)), - right: right.clone(), + left: Rc::new(self.push_filter(Rc::clone(left), filter)), + right: Rc::clone(right), } } } else if filter_variables.iter().all(|v| right.is_variable_bound(*v)) { PlanNode::HashJoin { - left: left.clone(), - right: Rc::new(self.push_filter(right.clone(), filter)), + left: Rc::clone(left), + right: Rc::new(self.push_filter(Rc::clone(right), filter)), } } else { PlanNode::Filter { child: Rc::new(PlanNode::HashJoin { - left: left.clone(), - right: right.clone(), + left: Rc::clone(left), + right: Rc::clone(right), }), expression: filter, } @@ -1450,20 +1473,20 @@ impl<'a> PlanBuilder<'a> { PlanNode::ForLoopJoin { left, right } => { if filter_variables.iter().all(|v| left.is_variable_bound(*v)) { PlanNode::ForLoopJoin { - left: Rc::new(self.push_filter(left.clone(), filter)), - right: right.clone(), + left: Rc::new(self.push_filter(Rc::clone(left), filter)), + right: Rc::clone(right), } } else if filter_variables.iter().all(|v| right.is_variable_bound(*v)) { PlanNode::ForLoopJoin { //TODO: should we do that always? - left: left.clone(), - right: Rc::new(self.push_filter(right.clone(), filter)), + left: Rc::clone(left), + right: Rc::new(self.push_filter(Rc::clone(right), filter)), } } else { PlanNode::Filter { child: Rc::new(PlanNode::HashJoin { - left: left.clone(), - right: right.clone(), + left: Rc::clone(left), + right: Rc::clone(right), }), expression: filter, } @@ -1477,14 +1500,14 @@ impl<'a> PlanBuilder<'a> { //TODO: handle the case where the filter generates an expression variable if filter_variables.iter().all(|v| child.is_variable_bound(*v)) { PlanNode::Extend { - child: Rc::new(self.push_filter(child.clone(), filter)), + child: Rc::new(self.push_filter(Rc::clone(child), filter)), expression: expression.clone(), variable: variable.clone(), } } else { PlanNode::Filter { child: Rc::new(PlanNode::Extend { - child: child.clone(), + child: Rc::clone(child), expression: expression.clone(), variable: variable.clone(), }), @@ -1495,20 +1518,23 @@ impl<'a> PlanBuilder<'a> { PlanNode::Filter { child, expression } => { if filter_variables.iter().all(|v| child.is_variable_bound(*v)) { PlanNode::Filter { - child: Rc::new(self.push_filter(child.clone(), filter)), + child: Rc::new(self.push_filter(Rc::clone(child), filter)), expression: expression.clone(), } } else { PlanNode::Filter { - child: child.clone(), - expression: Box::new(PlanExpression::And(expression.clone(), filter)), + child: Rc::clone(child), + expression: Box::new(PlanExpression::And(vec![ + *expression.clone(), + *filter, + ])), } } } PlanNode::Union { children } => PlanNode::Union { children: children .iter() - .map(|c| Rc::new(self.push_filter(c.clone(), filter.clone()))) + .map(|c| Rc::new(self.push_filter(Rc::clone(c), filter.clone()))) .collect(), }, _ => PlanNode::Filter { @@ -1541,12 +1567,11 @@ impl<'a> PlanBuilder<'a> { } fn build_plan_variable(variables: &mut Vec, variable: &Variable) -> PlanVariable { - let encoded = match slice_key(variables, variable) { - Some(key) => key, - None => { - variables.push(variable.clone()); - variables.len() - 1 - } + let encoded = if let Some(key) = slice_key(variables, variable) { + key + } else { + variables.push(variable.clone()); + variables.len() - 1 }; PlanVariable { plain: variable.clone(), @@ -1555,12 +1580,11 @@ fn build_plan_variable(variables: &mut Vec, variable: &Variable) -> Pl } fn bnode_key(blank_nodes: &mut Vec, blank_node: &BlankNode) -> usize { - match slice_key(blank_nodes, blank_node) { - Some(key) => key, - None => { - blank_nodes.push(blank_node.clone()); - blank_nodes.len() - 1 - } + if let Some(key) = slice_key(blank_nodes, blank_node) { + key + } else { + blank_nodes.push(blank_node.clone()); + blank_nodes.len() - 1 } } @@ -1673,21 +1697,13 @@ fn compile_static_pattern_if_exists( options: Option<&Expression>, ) -> Option { let static_pattern = if let Expression::Literal(pattern) = pattern { - if pattern.datatype() == xsd::STRING { - Some(pattern.value()) - } else { - None - } + (pattern.datatype() == xsd::STRING).then(|| pattern.value()) } else { None }; let static_options = if let Some(options) = options { if let Expression::Literal(options) = options { - if options.datatype() == xsd::STRING { - Some(Some(options.value())) - } else { - None - } + (options.datatype() == xsd::STRING).then(|| Some(options.value())) } else { None } diff --git a/lib/src/sparql/update.rs b/lib/src/sparql/update.rs index dff7d02c..2da5d08c 100644 --- a/lib/src/sparql/update.rs +++ b/lib/src/sparql/update.rs @@ -71,7 +71,14 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> { insert, pattern, .. - } => self.eval_delete_insert(delete, insert, using_dataset.as_ref().unwrap(), pattern), + } => self.eval_delete_insert( + delete, + insert, + using_dataset + .as_ref() + .ok_or_else(|| EvaluationError::msg("No dataset"))?, + pattern, + ), GraphUpdateOperation::Load { silent, source, @@ -119,14 +126,14 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> { ) -> Result<(), EvaluationError> { let dataset = Rc::new(DatasetView::new(self.transaction.reader(), using)); let (plan, variables) = PlanBuilder::build( - dataset.as_ref(), + &dataset, algebra, false, &self.options.query_options.custom_functions, !self.options.query_options.without_optimizations, )?; let evaluator = SimpleEvaluator::new( - dataset.clone(), + Rc::clone(&dataset), self.base_iri.clone(), self.options.query_options.service_handler(), Rc::new(self.options.query_options.custom_functions.clone()), @@ -374,7 +381,7 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> { TermPattern::Literal(term) => Some(term.clone().into()), TermPattern::Triple(triple) => { Self::convert_triple_pattern(triple, variables, values, dataset, bnodes)? - .map(|t| t.into()) + .map(Into::into) } TermPattern::Variable(v) => Self::lookup_variable(v, variables, values) .map(|node| dataset.decode_term(&node)) @@ -507,7 +514,7 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> { GroundTermPattern::Literal(term) => Some(term.clone().into()), GroundTermPattern::Triple(triple) => { Self::convert_ground_triple_pattern(triple, variables, values, dataset)? - .map(|t| t.into()) + .map(Into::into) } GroundTermPattern::Variable(v) => Self::lookup_variable(v, variables, values) .map(|node| dataset.decode_term(&node)) diff --git a/lib/src/storage/backend/fallback.rs b/lib/src/storage/backend/fallback.rs index 4c4a1ded..6000863b 100644 --- a/lib/src/storage/backend/fallback.rs +++ b/lib/src/storage/backend/fallback.rs @@ -29,20 +29,18 @@ impl Db { Ok(Self(Arc::new(RwLock::new(trees)))) } + #[allow(clippy::unwrap_in_result)] pub fn column_family(&self, name: &'static str) -> Option { let name = ColumnFamily(name); - if self.0.read().unwrap().contains_key(&name) { - Some(name) - } else { - None - } + (self.0.read().unwrap().contains_key(&name)).then(|| name) } #[must_use] pub fn snapshot(&self) -> Reader { - Reader(InnerReader::Simple(self.0.clone())) + Reader(InnerReader::Simple(Arc::clone(&self.0))) } + #[allow(clippy::unwrap_in_result)] pub fn transaction<'a, 'b: 'a, T, E: Error + 'static + From>( &'b self, f: impl Fn(Transaction<'a>) -> Result, @@ -64,6 +62,7 @@ enum InnerReader { } impl Reader { + #[allow(clippy::unwrap_in_result)] pub fn get( &self, column_family: &ColumnFamily, @@ -90,6 +89,7 @@ impl Reader { } } + #[allow(clippy::unwrap_in_result)] pub fn contains_key( &self, column_family: &ColumnFamily, @@ -120,6 +120,7 @@ impl Reader { self.scan_prefix(column_family, &[]) } + #[allow(clippy::unwrap_in_result)] pub fn scan_prefix( &self, column_family: &ColumnFamily, @@ -176,19 +177,20 @@ impl Reader { Ok(Iter { iter, current }) } + #[allow(clippy::unwrap_in_result)] pub fn len(&self, column_family: &ColumnFamily) -> Result { match &self.0 { InnerReader::Simple(reader) => Ok(reader .read() .unwrap() .get(column_family) - .map_or(0, |tree| tree.len())), + .map_or(0, BTreeMap::len)), InnerReader::Transaction(reader) => { if let Some(reader) = reader.upgrade() { Ok((*reader) .borrow() .get(column_family) - .map_or(0, |tree| tree.len())) + .map_or(0, BTreeMap::len)) } else { Err(StorageError::Other( "The transaction is already ended".into(), @@ -198,19 +200,20 @@ impl Reader { } } + #[allow(clippy::unwrap_in_result)] pub fn is_empty(&self, column_family: &ColumnFamily) -> Result { match &self.0 { InnerReader::Simple(reader) => Ok(reader .read() .unwrap() .get(column_family) - .map_or(true, |tree| tree.is_empty())), + .map_or(true, BTreeMap::is_empty)), InnerReader::Transaction(reader) => { if let Some(reader) = reader.upgrade() { Ok((*reader) .borrow() .get(column_family) - .map_or(true, |tree| tree.is_empty())) + .map_or(true, BTreeMap::is_empty)) } else { Err(StorageError::Other( "The transaction is already ended".into(), @@ -246,7 +249,7 @@ impl Transaction<'_> { .map_or(false, |cf| cf.contains_key(key))) } - #[allow(clippy::unnecessary_wraps)] + #[allow(clippy::unnecessary_wraps, clippy::unwrap_in_result)] pub fn insert( &mut self, column_family: &ColumnFamily, @@ -269,7 +272,7 @@ impl Transaction<'_> { self.insert(column_family, key, &[]) } - #[allow(clippy::unnecessary_wraps)] + #[allow(clippy::unnecessary_wraps, clippy::unwrap_in_result)] pub fn remove(&mut self, column_family: &ColumnFamily, key: &[u8]) -> Result<(), StorageError> { self.0 .borrow_mut() diff --git a/lib/src/storage/backend/rocksdb.rs b/lib/src/storage/backend/rocksdb.rs index 6da4be07..fc8f4da2 100644 --- a/lib/src/storage/backend/rocksdb.rs +++ b/lib/src/storage/backend/rocksdb.rs @@ -4,7 +4,7 @@ use crate::storage::error::{CorruptionError, StorageError}; use lazy_static::lazy_static; -use libc::{self, c_char, c_void, free}; +use libc::{self, c_void, free}; use oxrocksdb_sys::*; use rand::random; use std::borrow::Borrow; @@ -241,7 +241,7 @@ impl Db { .map(|cf| cf.as_ptr()) .collect::>() .as_ptr(), - cf_options.as_ptr() as *const *const rocksdb_options_t, + cf_options.as_ptr().cast(), cf_handles.as_mut_ptr(), )) .map_err(|e| { @@ -359,7 +359,7 @@ impl Db { .map(|cf| cf.as_ptr()) .collect::>() .as_ptr(), - cf_options.as_ptr() as *const *const rocksdb_options_t, + cf_options.as_ptr().cast(), cf_handles.as_mut_ptr(), )) .map_err(|e| { @@ -393,11 +393,7 @@ impl Db { cf_handles, cf_options, is_secondary: true, - path_to_remove: if in_memory { - Some(secondary_path) - } else { - None - }, + path_to_remove: in_memory.then(|| secondary_path), })), }) } @@ -424,7 +420,7 @@ impl Db { .map(|cf| cf.as_ptr()) .collect::>() .as_ptr(), - cf_options.as_ptr() as *const *const rocksdb_options_t, + cf_options.as_ptr().cast(), cf_handles.as_mut_ptr(), 0, // false )) @@ -580,7 +576,7 @@ impl Db { } let options = rocksdb_readoptions_create_copy(db.read_options); Reader { - inner: InnerReader::PlainDb(db.clone()), + inner: InnerReader::PlainDb(Arc::clone(db)), options, } } @@ -594,7 +590,7 @@ impl Db { rocksdb_readoptions_set_snapshot(options, snapshot); Reader { inner: InnerReader::TransactionalSnapshot(Rc::new(TransactionalSnapshot { - db: db.clone(), + db: Arc::clone(db), snapshot, })), options, @@ -632,7 +628,7 @@ impl Db { let result = f(Transaction { transaction: Rc::new(transaction), read_options, - _lifetime: PhantomData::default(), + _lifetime: PhantomData, }); match result { Ok(result) => { @@ -698,7 +694,7 @@ impl Db { db.db, db.read_options, column_family.0, - key.as_ptr() as *const c_char, + key.as_ptr().cast(), key.len(), )) } @@ -707,7 +703,7 @@ impl Db { db.db, db.read_options, column_family.0, - key.as_ptr() as *const c_char, + key.as_ptr().cast(), key.len() )) } @@ -740,9 +736,9 @@ impl Db { db.db, db.write_options, column_family.0, - key.as_ptr() as *const c_char, + key.as_ptr().cast(), key.len(), - value.as_ptr() as *const c_char, + value.as_ptr().cast(), value.len(), )) }?; @@ -940,7 +936,7 @@ impl Reader { inner.db.db, self.options, column_family.0, - key.as_ptr() as *const c_char, + key.as_ptr().cast(), key.len() )) } @@ -950,7 +946,7 @@ impl Reader { *inner, self.options, column_family.0, - key.as_ptr() as *const c_char, + key.as_ptr().cast(), key.len() )) } else { @@ -964,7 +960,7 @@ impl Reader { inner.db, self.options, column_family.0, - key.as_ptr() as *const c_char, + key.as_ptr().cast(), key.len() )) } @@ -1005,11 +1001,7 @@ impl Reader { break; } } - if found { - Some(bound) - } else { - None - } + found.then(|| bound) }; unsafe { @@ -1021,7 +1013,7 @@ impl Reader { if let Some(upper_bound) = &upper_bound { rocksdb_readoptions_set_iterate_upper_bound( options, - upper_bound.as_ptr() as *const c_char, + upper_bound.as_ptr().cast(), upper_bound.len(), ); } @@ -1046,7 +1038,7 @@ impl Reader { if prefix.is_empty() { rocksdb_iter_seek_to_first(iter); } else { - rocksdb_iter_seek(iter, prefix.as_ptr() as *const c_char, prefix.len()); + rocksdb_iter_seek(iter, prefix.as_ptr().cast(), prefix.len()); } let is_currently_valid = rocksdb_iter_valid(iter) != 0; Ok(Iter { @@ -1101,7 +1093,7 @@ impl Transaction<'_> { *self.transaction, self.read_options, column_family.0, - key.as_ptr() as *const c_char, + key.as_ptr().cast(), key.len() ))?; Ok(if slice.is_null() { @@ -1130,9 +1122,9 @@ impl Transaction<'_> { ffi_result!(rocksdb_transaction_put_cf_with_status( *self.transaction, column_family.0, - key.as_ptr() as *const c_char, + key.as_ptr().cast(), key.len(), - value.as_ptr() as *const c_char, + value.as_ptr().cast(), value.len(), ))?; } @@ -1152,7 +1144,7 @@ impl Transaction<'_> { ffi_result!(rocksdb_transaction_delete_cf_with_status( *self.transaction, column_family.0, - key.as_ptr() as *const c_char, + key.as_ptr().cast(), key.len(), ))?; } @@ -1177,7 +1169,7 @@ impl Deref for PinnableSlice { unsafe { let mut len = 0; let val = rocksdb_pinnableslice_value(self.0, &mut len); - slice::from_raw_parts(val as *const u8, len) + slice::from_raw_parts(val.cast(), len) } } } @@ -1208,7 +1200,7 @@ pub struct Buffer { impl Drop for Buffer { fn drop(&mut self) { unsafe { - free(self.base as *mut c_void); + free(self.base.cast()); } } } @@ -1285,7 +1277,7 @@ impl Iter { unsafe { let mut len = 0; let val = rocksdb_iter_key(self.iter, &mut len); - Some(slice::from_raw_parts(val as *const u8, len)) + Some(slice::from_raw_parts(val.cast(), len)) } } else { None @@ -1311,9 +1303,9 @@ impl SstFileWriter { unsafe { ffi_result!(rocksdb_sstfilewriter_put_with_status( self.writer, - key.as_ptr() as *const c_char, + key.as_ptr().cast(), key.len(), - value.as_ptr() as *const c_char, + value.as_ptr().cast(), value.len(), ))?; } diff --git a/lib/src/storage/mod.rs b/lib/src/storage/mod.rs index 1dbebcfa..8a92e77f 100644 --- a/lib/src/storage/mod.rs +++ b/lib/src/storage/mod.rs @@ -1,3 +1,4 @@ +#![allow(clippy::same_name_method)] #[cfg(not(target_family = "wasm"))] use crate::model::Quad; use crate::model::{GraphNameRef, NamedOrBlankNodeRef, QuadRef, TermRef}; @@ -181,7 +182,7 @@ impl Storage { ] } - #[allow(clippy::unnecessary_wraps)] + #[allow(clippy::unnecessary_wraps, clippy::unwrap_in_result)] fn setup(db: Db) -> Result { let this = Self { #[cfg(not(target_family = "wasm"))] @@ -1305,7 +1306,7 @@ impl StorageBulkLoader { let mut buffer_to_load = Vec::with_capacity(batch_size); swap(buffer, &mut buffer_to_load); let storage = self.storage.clone(); - let done_counter_clone = done_counter.clone(); + let done_counter_clone = Arc::clone(done_counter); threads.push_back(spawn(move || { FileBulkLoader::new(storage, batch_size).load(buffer_to_load, &done_counter_clone) })); diff --git a/lib/src/store.rs b/lib/src/store.rs index 95234498..3d854a4a 100644 --- a/lib/src/store.rs +++ b/lib/src/store.rs @@ -565,7 +565,7 @@ impl Store { &self, quads: impl IntoIterator>, ) -> Result<(), StorageError> { - let quads = quads.into_iter().map(|q| q.into()).collect::>(); + let quads = quads.into_iter().map(Into::into).collect::>(); self.transaction(move |mut t| t.extend(&quads)) } @@ -1569,7 +1569,7 @@ impl BulkLoader { quads: impl IntoIterator, EI>>, ) -> Result<(), EO> { self.storage - .load(quads.into_iter().map(|q| q.map(|q| q.into()))) + .load(quads.into_iter().map(|q| q.map(Into::into))) } } diff --git a/lib/tests/store.rs b/lib/tests/store.rs index 66d2fae0..5f8a6809 100644 --- a/lib/tests/store.rs +++ b/lib/tests/store.rs @@ -175,7 +175,7 @@ fn test_load_dataset() -> Result<(), Box> { #[test] #[cfg(not(target_family = "wasm"))] fn test_bulk_load_dataset() -> Result<(), Box> { - let store = Store::new().unwrap(); + let store = Store::new()?; store .bulk_loader() .load_dataset(Cursor::new(GRAPH_DATA), DatasetFormat::TriG, None)?; diff --git a/oxrocksdb-sys/Cargo.toml b/oxrocksdb-sys/Cargo.toml index 9ba2d405..fea41139 100644 --- a/oxrocksdb-sys/Cargo.toml +++ b/oxrocksdb-sys/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "oxrocksdb-sys" -version = "0.3.16-dev" +version = "0.3.17-dev" authors = ["Tpt "] license = "GPL-2.0 OR Apache-2.0" repository = "https://github.com/oxigraph/oxigraph/tree/main/oxrocksdb-sys" diff --git a/oxrocksdb-sys/build.rs b/oxrocksdb-sys/build.rs index f1884dbf..ff8a633b 100644 --- a/oxrocksdb-sys/build.rs +++ b/oxrocksdb-sys/build.rs @@ -1,7 +1,7 @@ // Code from https://github.com/rust-rocksdb/rust-rocksdb/blob/eb2d302682418b361a80ad8f4dcf335ade60dcf5/librocksdb-sys/build.rs // License: https://github.com/rust-rocksdb/rust-rocksdb/blob/master/LICENSE -use std::env::{set_var, var}; +use std::env::{remove_var, set_var, var}; use std::path::PathBuf; fn link(name: &str, bundled: bool) { @@ -88,8 +88,6 @@ fn build_rocksdb() { config.define("HAVE_LZCNT", Some("1")); config.flag_if_supported("-mlzcnt"); } - } else if target.contains("aarch64") { - lib_sources.push("util/crc32c_arm64.cc") } if target.contains("apple-ios") { @@ -100,11 +98,13 @@ fn build_rocksdb() { config.define("NPERF_CONTEXT", None); config.define("ROCKSDB_PLATFORM_POSIX", None); config.define("ROCKSDB_LIB_IO_POSIX", None); + remove_var("SDKROOT"); // We override SDKROOT for cross-compilation set_var("IPHONEOS_DEPLOYMENT_TARGET", "11.0"); } else if target.contains("darwin") { config.define("OS_MACOSX", None); config.define("ROCKSDB_PLATFORM_POSIX", None); config.define("ROCKSDB_LIB_IO_POSIX", None); + remove_var("SDKROOT"); // We override SDKROOT for cross-compilation } else if target.contains("android") { config.define("OS_ANDROID", None); config.define("ROCKSDB_PLATFORM_POSIX", None); @@ -179,6 +179,7 @@ fn build_rocksdb() { } config.file(&format!("rocksdb/{file}")); } + config.compile("rocksdb"); } diff --git a/oxrocksdb-sys/rocksdb b/oxrocksdb-sys/rocksdb index 740854a7..443333d8 160000 --- a/oxrocksdb-sys/rocksdb +++ b/oxrocksdb-sys/rocksdb @@ -1 +1 @@ -Subproject commit 740854a7b0c09450e67e5e29d9979e743485aebf +Subproject commit 443333d8c059c87db408ec2d11685db00031b30a diff --git a/python/Cargo.toml b/python/Cargo.toml index 414dc63d..32bf13f4 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pyoxigraph" -version = "0.3.16-dev" +version = "0.3.17-dev" authors = ["Tpt"] license = "MIT OR Apache-2.0" readme = "README.md" @@ -19,5 +19,5 @@ doctest = false abi3 = ["pyo3/abi3-py37"] [dependencies] -oxigraph = { version = "0.3.16-dev", path="../lib", features = ["http_client"] } -pyo3 = { version = "0.18", features = ["extension-module"] } +oxigraph = { version = "0.3.17-dev", path="../lib", features = ["http_client"] } +pyo3 = { version = "0.19", features = ["extension-module"] } diff --git a/python/pyproject.toml b/python/pyproject.toml index 8b7cdb91..c272d3d2 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["maturin~=0.14.0"] +requires = ["maturin~=0.15.0"] build-backend = "maturin" [project] diff --git a/python/requirements.dev.txt b/python/requirements.dev.txt index 6bc286d4..bebb6736 100644 --- a/python/requirements.dev.txt +++ b/python/requirements.dev.txt @@ -1,6 +1,6 @@ black~=23.1 furo -maturin~=0.14.0 +maturin~=0.15.1 mypy~=1.0 ruff~=0.0.255 sphinx~=5.3 diff --git a/python/src/io.rs b/python/src/io.rs index b4fbf3c5..53e53af4 100644 --- a/python/src/io.rs +++ b/python/src/io.rs @@ -8,7 +8,9 @@ use oxigraph::io::{ use pyo3::exceptions::{PyIOError, PySyntaxError, PyValueError}; use pyo3::prelude::*; use pyo3::types::PyBytes; -use pyo3::wrap_pyfunction; +use pyo3::{intern, wrap_pyfunction}; +use std::cmp::max; +use std::error::Error; use std::fs::File; use std::io::{self, BufRead, BufReader, BufWriter, Cursor, Read, Write}; @@ -46,7 +48,7 @@ pub fn add_to_module(module: &PyModule) -> PyResult<()> { /// >>> list(parse(input, "text/turtle", base_iri="http://example.com/")) /// [ predicate= object=>>] #[pyfunction] -#[pyo3(text_signature = "(input, mime_type, *, base_iri = None)")] +#[pyo3(signature = (input, mime_type, *, base_iri = None))] pub fn parse( input: PyObject, mime_type: &str, @@ -281,21 +283,22 @@ impl Write for PyWritable { pub struct PyIo(PyObject); impl Read for PyIo { - fn read(&mut self, mut buf: &mut [u8]) -> io::Result { + fn read(&mut self, buf: &mut [u8]) -> io::Result { Python::with_gil(|py| { + if buf.is_empty() { + return Ok(0); + } + let to_read = max(1, buf.len() / 4); // We divide by 4 because TextIO works with number of characters and not with number of bytes let read = self .0 - .call_method(py, "read", (buf.len(),), None) + .as_ref(py) + .call_method1(intern!(py, "read"), (to_read,)) .map_err(to_io_err)?; let bytes = read - .extract::<&[u8]>(py) - .or_else(|e| { - read.extract::<&str>(py) - .map(|s| s.as_bytes()) - .map_err(|_| e) - }) + .extract::<&[u8]>() + .or_else(|e| read.extract::<&str>().map(str::as_bytes).map_err(|_| e)) .map_err(to_io_err)?; - buf.write_all(bytes)?; + buf[..bytes.len()].copy_from_slice(bytes); Ok(bytes.len()) }) } @@ -305,16 +308,17 @@ impl Write for PyIo { fn write(&mut self, buf: &[u8]) -> io::Result { Python::with_gil(|py| { self.0 - .call_method(py, "write", (PyBytes::new(py, buf),), None) + .as_ref(py) + .call_method1(intern!(py, "write"), (PyBytes::new(py, buf),)) .map_err(to_io_err)? - .extract::(py) + .extract::() .map_err(to_io_err) }) } fn flush(&mut self) -> io::Result<()> { Python::with_gil(|py| { - self.0.call_method(py, "flush", (), None)?; + self.0.as_ref(py).call_method0(intern!(py, "flush"))?; Ok(()) }) } @@ -325,7 +329,10 @@ fn to_io_err(error: impl Into) -> io::Error { } pub fn map_io_err(error: io::Error) -> PyErr { - if error.get_ref().map_or(false, |s| s.is::()) { + if error + .get_ref() + .map_or(false, <(dyn Error + Send + Sync + 'static)>::is::) + { *error.into_inner().unwrap().downcast().unwrap() } else { PyIOError::new_err(error.to_string()) diff --git a/python/src/lib.rs b/python/src/lib.rs index d20f80be..170d78b8 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -1,9 +1,3 @@ -#![allow( - clippy::redundant_pub_crate, - clippy::used_underscore_binding, - clippy::unused_self, - clippy::trivially_copy_pass_by_ref -)] mod io; mod model; mod sparql; diff --git a/python/src/model.rs b/python/src/model.rs index 8fbeecbe..26791351 100644 --- a/python/src/model.rs +++ b/python/src/model.rs @@ -20,8 +20,7 @@ use std::vec::IntoIter; /// /// >>> str(NamedNode('http://example.com')) /// '' -#[pyclass(name = "NamedNode", module = "pyoxigraph")] -#[pyo3(text_signature = "(value)")] +#[pyclass(frozen, name = "NamedNode", module = "pyoxigraph")] #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] pub struct PyNamedNode { inner: NamedNode, @@ -144,8 +143,7 @@ impl PyNamedNode { /// /// >>> str(BlankNode('ex')) /// '_:ex' -#[pyclass(name = "BlankNode", module = "pyoxigraph")] -#[pyo3(text_signature = "(value = None)")] +#[pyclass(frozen, name = "BlankNode", module = "pyoxigraph")] #[derive(Eq, PartialEq, Debug, Clone, Hash)] pub struct PyBlankNode { inner: BlankNode, @@ -280,8 +278,7 @@ impl PyBlankNode { /// '"example"@en' /// >>> str(Literal('11', datatype=NamedNode('http://www.w3.org/2001/XMLSchema#integer'))) /// '"11"^^' -#[pyclass(name = "Literal", module = "pyoxigraph")] -#[pyo3(text_signature = "(value, *, datatype = None, language = None)")] +#[pyclass(frozen, name = "Literal", module = "pyoxigraph")] #[derive(Eq, PartialEq, Debug, Clone, Hash)] pub struct PyLiteral { inner: Literal, @@ -427,8 +424,7 @@ impl PyLiteral { } /// The RDF `default graph name `_. -#[pyclass(name = "DefaultGraph", module = "pyoxigraph")] -#[pyo3(text_signature = "()")] +#[pyclass(frozen, name = "DefaultGraph", module = "pyoxigraph")] #[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] pub struct PyDefaultGraph {} @@ -625,9 +621,8 @@ impl IntoPy for PyTerm { /// A triple could also be easily destructed into its components: /// /// >>> (s, p, o) = Triple(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1')) -#[pyclass(name = "Triple", module = "pyoxigraph")] +#[pyclass(frozen, name = "Triple", module = "pyoxigraph")] #[derive(Eq, PartialEq, Debug, Clone, Hash)] -#[pyo3(text_signature = "(subject, predicate, object)")] pub struct PyTriple { inner: Triple, } @@ -824,8 +819,7 @@ impl IntoPy for PyGraphName { /// A quad could also be easily destructed into its components: /// /// >>> (s, p, o, g) = Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g')) -#[pyclass(name = "Quad", module = "pyoxigraph")] -#[pyo3(text_signature = "(subject, predicate, object, graph_name = None)")] +#[pyclass(frozen, name = "Quad", module = "pyoxigraph")] #[derive(Eq, PartialEq, Debug, Clone, Hash)] pub struct PyQuad { inner: Quad, @@ -1012,8 +1006,7 @@ impl PyQuad { /// /// >>> str(Variable('foo')) /// '?foo' -#[pyclass(name = "Variable", module = "pyoxigraph")] -#[pyo3(text_signature = "(value)")] +#[pyclass(frozen, name = "Variable", module = "pyoxigraph")] #[derive(Eq, PartialEq, Debug, Clone, Hash)] pub struct PyVariable { inner: Variable, diff --git a/python/src/sparql.rs b/python/src/sparql.rs index 0fe9ceff..01298fa6 100644 --- a/python/src/sparql.rs +++ b/python/src/sparql.rs @@ -88,7 +88,7 @@ pub fn query_results_to_python(py: Python<'_>, results: QueryResults) -> PyObjec /// >>> s, p, o = solution /// >>> s /// -#[pyclass(unsendable, name = "QuerySolution", module = "pyoxigraph")] +#[pyclass(frozen, unsendable, name = "QuerySolution", module = "pyoxigraph")] pub struct PyQuerySolution { inner: QuerySolution, } @@ -225,7 +225,7 @@ impl PyQueryTriples { Ok(allow_threads_unsafe(|| self.inner.next()) .transpose() .map_err(map_evaluation_error)? - .map(|t| t.into())) + .map(Into::into)) } } diff --git a/python/src/store.rs b/python/src/store.rs index e0790aca..9410aeed 100644 --- a/python/src/store.rs +++ b/python/src/store.rs @@ -35,8 +35,7 @@ use pyo3::prelude::*; /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))) /// >>> str(store) /// ' "1" .\n' -#[pyclass(name = "Store", module = "pyoxigraph")] -#[pyo3(text_signature = "(path = None)")] +#[pyclass(frozen, name = "Store", module = "pyoxigraph")] #[derive(Clone)] pub struct PyStore { inner: Store, @@ -94,7 +93,7 @@ impl PyStore { /// :rtype: Store /// :raises IOError: if the target directories contain invalid data or could not be accessed. #[staticmethod] - #[pyo3(signature = (primary_path, secondary_path = None), text_signature = "(primary_path, secondary_path = None)")] + #[pyo3(signature = (primary_path, secondary_path = None))] fn secondary( primary_path: &str, secondary_path: Option<&str>, @@ -216,7 +215,7 @@ impl PyStore { /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))) /// >>> list(store.quads_for_pattern(NamedNode('http://example.com'), None, None, None)) /// [ predicate= object=> graph_name=>] - #[pyo3(signature = (subject, predicate, object, graph_name = None), text_signature = "($self, subject, predicate, object, graph_name = None)")] + #[pyo3(signature = (subject, predicate, object, graph_name = None))] fn quads_for_pattern( &self, subject: &PyAny, @@ -228,10 +227,10 @@ impl PyStore { extract_quads_pattern(subject, predicate, object, graph_name)?; Ok(QuadIter { inner: self.inner.quads_for_pattern( - subject.as_ref().map(|p| p.into()), - predicate.as_ref().map(|p| p.into()), - object.as_ref().map(|p| p.into()), - graph_name.as_ref().map(|p| p.into()), + subject.as_ref().map(Into::into), + predicate.as_ref().map(Into::into), + object.as_ref().map(Into::into), + graph_name.as_ref().map(Into::into), ), }) } @@ -273,10 +272,7 @@ impl PyStore { /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))) /// >>> store.query('ASK { ?s ?p ?o }') /// True - #[pyo3( - signature = (query, *, base_iri = None, use_default_graph_as_union = false, default_graph = None, named_graphs = None), - text_signature = "($self, query, *, base_iri = None, use_default_graph_as_union = False, default_graph = None, named_graphs = None)" - )] + #[pyo3(signature = (query, *, base_iri = None, use_default_graph_as_union = false, default_graph = None, named_graphs = None))] fn query( &self, query: &str, @@ -332,7 +328,7 @@ impl PyStore { /// >>> store.update('DELETE WHERE { ?p ?o }') /// >>> list(store) /// [] - #[pyo3(signature = (update, *, base_iri = None), text_signature = "($self, update, *, base_iri = None)")] + #[pyo3(signature = (update, *, base_iri = None))] fn update(&self, update: &str, base_iri: Option<&str>, py: Python<'_>) -> PyResult<()> { py.allow_threads(|| { let update = @@ -377,7 +373,7 @@ impl PyStore { /// >>> store.load(io.BytesIO(b'

"1" .'), "text/turtle", base_iri="http://example.com/", to_graph=NamedNode("http://example.com/g")) /// >>> list(store) /// [ predicate= object=> graph_name=>] - #[pyo3(signature = (input, mime_type, *, base_iri = None, to_graph = None), text_signature = "($self, input, mime_type, *, base_iri = None, to_graph = None)")] + #[pyo3(signature = (input, mime_type, *, base_iri = None, to_graph = None))] fn load( &self, input: PyObject, @@ -459,7 +455,7 @@ impl PyStore { /// >>> store.bulk_load(io.BytesIO(b'

"1" .'), "text/turtle", base_iri="http://example.com/", to_graph=NamedNode("http://example.com/g")) /// >>> list(store) /// [ predicate= object=> graph_name=>] - #[pyo3(signature = (input, mime_type, *, base_iri = None, to_graph = None), text_signature = "($self, input, mime_type, *, base_iri = None, to_graph = None)")] + #[pyo3(signature = (input, mime_type, *, base_iri = None, to_graph = None))] fn bulk_load( &self, input: PyObject, @@ -537,7 +533,7 @@ impl PyStore { /// >>> store.dump(output, "text/turtle", from_graph=NamedNode("http://example.com/g")) /// >>> output.getvalue() /// b' "1" .\n' - #[pyo3(signature = (output, mime_type, *, from_graph = None), text_signature = "($self, output, mime_type, *, from_graph = None)")] + #[pyo3(signature = (output, mime_type, *, from_graph = None))] fn dump( &self, output: PyObject, @@ -597,10 +593,31 @@ impl PyStore { } } + /// Returns if the store contains the given named graph. + /// + /// :param graph_name: the name of the named graph. + /// :type graph_name: NamedNode or BlankNode or DefaultGraph + /// :rtype: None + /// :raises IOError: if an I/O error happens during the named graph lookup. + /// + /// >>> store = Store() + /// >>> store.add_graph(NamedNode('http://example.com/g')) + /// >>> store.contains_named_graph(NamedNode('http://example.com/g')) + /// True + fn contains_named_graph(&self, graph_name: &PyAny) -> PyResult { + let graph_name = GraphName::from(&PyGraphNameRef::try_from(graph_name)?); + match graph_name { + GraphName::DefaultGraph => Ok(true), + GraphName::NamedNode(graph_name) => self.inner.contains_named_graph(&graph_name), + GraphName::BlankNode(graph_name) => self.inner.contains_named_graph(&graph_name), + } + .map_err(map_storage_error) + } + /// Adds a named graph to the store. /// /// :param graph_name: the name of the name graph to add. - /// :type graph_name: NamedNode or BlankNode + /// :type graph_name: NamedNode or BlankNode or DefaultGraph /// :rtype: None /// :raises IOError: if an I/O error happens during the named graph insertion. /// diff --git a/python/tests/test_io.py b/python/tests/test_io.py index 5dda57ca..e7519f5d 100644 --- a/python/tests/test_io.py +++ b/python/tests/test_io.py @@ -5,7 +5,9 @@ from tempfile import NamedTemporaryFile, TemporaryFile from pyoxigraph import Literal, NamedNode, Quad, Triple, parse, serialize EXAMPLE_TRIPLE = Triple( - NamedNode("http://example.com/foo"), NamedNode("http://example.com/p"), Literal("1") + NamedNode("http://example.com/foo"), + NamedNode("http://example.com/p"), + Literal("éù"), ) EXAMPLE_QUAD = Quad( NamedNode("http://example.com/foo"), @@ -18,7 +20,7 @@ EXAMPLE_QUAD = Quad( class TestParse(unittest.TestCase): def test_parse_file(self) -> None: with NamedTemporaryFile() as fp: - fp.write(b'

"1" .') + fp.write('

"éù" .'.encode()) fp.flush() self.assertEqual( list(parse(fp.name, "text/turtle", base_iri="http://example.com/")), @@ -33,7 +35,7 @@ class TestParse(unittest.TestCase): self.assertEqual( list( parse( - StringIO('

"1" .'), + StringIO('

"éù" .'), "text/turtle", base_iri="http://example.com/", ) @@ -41,11 +43,23 @@ class TestParse(unittest.TestCase): [EXAMPLE_TRIPLE], ) + def test_parse_long_str_io(self) -> None: + self.assertEqual( + list( + parse( + StringIO('

"éù" .\n' * 1024), + "text/turtle", + base_iri="http://example.com/", + ) + ), + [EXAMPLE_TRIPLE] * 1024, + ) + def test_parse_bytes_io(self) -> None: self.assertEqual( list( parse( - BytesIO(b'

"1" .'), + BytesIO('

"éù" .'.encode()), "text/turtle", base_iri="http://example.com/", ) @@ -75,15 +89,16 @@ class TestSerialize(unittest.TestCase): output = BytesIO() serialize([EXAMPLE_TRIPLE], output, "text/turtle") self.assertEqual( - output.getvalue(), - b' "1" .\n', + output.getvalue().decode(), + ' "éù" .\n', ) def test_serialize_to_file(self) -> None: with NamedTemporaryFile() as fp: serialize([EXAMPLE_TRIPLE], fp.name, "text/turtle") self.assertEqual( - fp.read(), b' "1" .\n' + fp.read().decode(), + ' "éù" .\n', ) def test_serialize_io_error(self) -> None: diff --git a/server/Cargo.toml b/server/Cargo.toml index 0dc7c5ec..82ba6ab6 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "oxigraph_server" -version = "0.3.16-dev" +version = "0.3.17-dev" authors = ["Tpt "] license = "MIT OR Apache-2.0" readme = "README.md" @@ -17,8 +17,8 @@ anyhow = "1" oxhttp = { version = "0.1", features = ["rayon"] } clap = { version = "=4.0", features = ["derive"] } clap_lex = "=0.3.0" -oxigraph = { version = "0.3.16-dev", path = "../lib", features = ["http_client"] } -sparesults = { version = "0.1.7", path = "../lib/sparesults", features = ["rdf-star"] } +oxigraph = { version = "0.3.17-dev", path = "../lib", features = ["http_client"] } +sparesults = { version = "0.1.8-dev", path = "../lib/sparesults", features = ["rdf-star"] } rand = "0.8" url = "2" oxiri = "0.2" diff --git a/server/README.md b/server/README.md index 4d9eb2cc..6bcf92b4 100644 --- a/server/README.md +++ b/server/README.md @@ -220,6 +220,25 @@ brew install oxigraph It installs the `oxigraph_server` binary. [See the usage documentation to know how to use it](#usage). +## Systemd + +It is possible to run Oxigraph in the background using systemd. + +For that, you can use the following `oxigraph_server.service` file (it might be inserted into `/etc/systemd/system/` or `$HOME/.config/systemd/user`): +```ini +[Unit] +Description=Oxigraph database server +After=network-online.target +Wants=network-online.target + +[Service] +Type=notify +ExecStart=/PATH/TO/oxigraph_server serve --location /PATH/TO/OXIGRAPH/DATA + +[Install] +WantedBy=multi-user.target +``` + ## Migration guide ### From 0.2 to 0.3 diff --git a/server/src/main.rs b/server/src/main.rs index e095d495..f57ada51 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -17,9 +17,13 @@ use sparesults::{QueryResultsFormat, QueryResultsSerializer}; use std::borrow::Cow; use std::cell::RefCell; use std::cmp::{max, min}; +#[cfg(target_os = "linux")] +use std::env; use std::ffi::OsStr; use std::fs::File; use std::io::{self, stdin, stdout, BufRead, BufReader, BufWriter, Read, Write}; +#[cfg(target_os = "linux")] +use std::os::unix::net::UnixDatagram; use std::path::{Path, PathBuf}; use std::rc::Rc; use std::str::FromStr; @@ -591,7 +595,7 @@ pub fn main() -> anyhow::Result<()> { let mut file = BufWriter::new(File::create(&explain_file)?); match explain_file .extension() - .and_then(|e| e.to_str()) { + .and_then(OsStr::to_str) { Some("json") => { explanation.write_in_json(file)?; }, @@ -734,7 +738,7 @@ fn format_from_path( path: &Path, from_extension: impl FnOnce(&str) -> anyhow::Result, ) -> anyhow::Result { - if let Some(ext) = path.extension().and_then(|ext| ext.to_str()) { + if let Some(ext) = path.extension().and_then(OsStr::to_str) { from_extension(ext).map_err(|e| { e.context(format!( "Not able to guess the file format from file name extension '{ext}'" @@ -776,6 +780,8 @@ fn serve(store: Store, bind: String, read_only: bool, cors: bool) -> anyhow::Res }; server.set_global_timeout(HTTP_TIMEOUT); server.set_server_name(concat!("Oxigraph/", env!("CARGO_PKG_VERSION")))?; + #[cfg(target_os = "linux")] + systemd_notify_ready()?; eprintln!("Listening for requests at http://{}", &bind); server.listen(bind)?; Ok(()) @@ -1531,19 +1537,24 @@ fn web_load_graph( format: GraphFormat, to_graph_name: GraphNameRef<'_>, ) -> Result<(), HttpError> { + let base_iri = if let GraphNameRef::NamedNode(graph_name) = to_graph_name { + Some(graph_name.as_str()) + } else { + None + }; if url_query_parameter(request, "no_transaction").is_some() { web_bulk_loader(store, request).load_graph( BufReader::new(request.body_mut()), format, to_graph_name, - None, + base_iri, ) } else { store.load_graph( BufReader::new(request.body_mut()), format, to_graph_name, - None, + base_iri, ) } .map_err(loader_to_http_error) @@ -1636,7 +1647,7 @@ impl io::Result>) + 'static> ReadForWrite Result { let buffer = Rc::new(RefCell::new(Vec::new())); let state = initial_state_builder(ReadForWriteWriter { - buffer: buffer.clone(), + buffer: Rc::clone(&buffer), }) .map_err(internal_server_error)?; Ok(Response::builder(Status::OK) @@ -1698,6 +1709,14 @@ impl Write for ReadForWriteWriter { } } +#[cfg(target_os = "linux")] +fn systemd_notify_ready() -> io::Result<()> { + if let Some(path) = env::var_os("NOTIFY_SOCKET") { + UnixDatagram::unbound()?.send_to(b"READY=1", path)?; + } + Ok(()) +} + #[cfg(test)] mod tests { use super::*; @@ -2367,6 +2386,53 @@ mod tests { ) } + #[test] + fn graph_store_base_url() -> Result<()> { + let server = ServerTest::new()?; + + // POST + let request = Request::builder( + Method::POST, + "http://localhost/store?graph=http://example.com".parse()?, + ) + .with_header(HeaderName::CONTENT_TYPE, "text/turtle")? + .with_body("<> ."); + server.test_status(request, Status::NO_CONTENT)?; + + // GET + let request = Request::builder( + Method::GET, + "http://localhost/store?graph=http://example.com".parse()?, + ) + .with_header(HeaderName::ACCEPT, "application/n-triples")? + .build(); + server.test_body( + request, + " .\n", + )?; + + // PUT + let request = Request::builder( + Method::PUT, + "http://localhost/store?graph=http://example.com".parse()?, + ) + .with_header(HeaderName::CONTENT_TYPE, "text/turtle")? + .with_body("<> ."); + server.test_status(request, Status::NO_CONTENT)?; + + // GET + let request = Request::builder( + Method::GET, + "http://localhost/store?graph=http://example.com".parse()?, + ) + .with_header(HeaderName::ACCEPT, "application/n-triples")? + .build(); + server.test_body( + request, + " .\n", + ) + } + #[test] fn graph_store_protocol() -> Result<()> { // Tests from https://www.w3.org/2009/sparql/docs/tests/data-sparql11/http-rdf-update/ diff --git a/testsuite/oxigraph-tests/sparql/halloween_problem.ru b/testsuite/oxigraph-tests/sparql/halloween_problem.ru deleted file mode 100644 index d62147d2..00000000 --- a/testsuite/oxigraph-tests/sparql/halloween_problem.ru +++ /dev/null @@ -1,3 +0,0 @@ -PREFIX ex: -INSERT DATA { ex:s ex:salary 1200 . ex:s2 ex:salary 1250 . ex:s3 ex:salary 1280 . ex:boss ex:salary 1600 . }; -DELETE { ?s ex:salary ?o } INSERT { ?s ex:salary ?v } WHERE { ?s ex:salary ?o FILTER(?o < 1500) BIND(?o + 100 AS ?v) } diff --git a/testsuite/oxigraph-tests/sparql/halloween_problem_result.ttl b/testsuite/oxigraph-tests/sparql/halloween_problem_result.ttl deleted file mode 100644 index 5e394780..00000000 --- a/testsuite/oxigraph-tests/sparql/halloween_problem_result.ttl +++ /dev/null @@ -1,6 +0,0 @@ -@prefix ex: . - -ex:s ex:salary 1300 . -ex:s2 ex:salary 1350 . -ex:s3 ex:salary 1380 . -ex:boss ex:salary 1600 . diff --git a/testsuite/oxigraph-tests/sparql/in_empty_error.rq b/testsuite/oxigraph-tests/sparql/in_empty_error.rq new file mode 100644 index 00000000..96060b9f --- /dev/null +++ b/testsuite/oxigraph-tests/sparql/in_empty_error.rq @@ -0,0 +1 @@ +SELECT ?r WHERE { BIND((?foo IN ()) AS ?r) } \ No newline at end of file diff --git a/testsuite/oxigraph-tests/sparql/values_property_path_all.srx b/testsuite/oxigraph-tests/sparql/in_empty_error.srx similarity index 67% rename from testsuite/oxigraph-tests/sparql/values_property_path_all.srx rename to testsuite/oxigraph-tests/sparql/in_empty_error.srx index 0632c2aa..762d6c88 100644 --- a/testsuite/oxigraph-tests/sparql/values_property_path_all.srx +++ b/testsuite/oxigraph-tests/sparql/in_empty_error.srx @@ -1,8 +1,9 @@ - + + \ No newline at end of file diff --git a/testsuite/oxigraph-tests/sparql/manifest.ttl b/testsuite/oxigraph-tests/sparql/manifest.ttl index 98701fcb..21ef9e76 100644 --- a/testsuite/oxigraph-tests/sparql/manifest.ttl +++ b/testsuite/oxigraph-tests/sparql/manifest.ttl @@ -23,17 +23,14 @@ :values_in_filter_not_exists :subquery_in_filter_not_exists :cmp_langString - :halloween_problem :nested_path :nested_expression :order_terms :nested_anonymous :unbound_variable_in_subquery - :values_too_many - :values_too_few - :values_property_path_all :one_or_more_shared :one_or_more_star + :in_empty_error ) . :small_unicode_escape_with_multibytes_char rdf:type mf:NegativeSyntaxTest ; @@ -105,11 +102,6 @@ mf:action [ qt:query ] ; mf:result . -:halloween_problem rdf:type mf:UpdateEvaluationTest ; - mf:name "Halloween Problem: An update operation should not be able to read its own writes" ; - mf:action [ ut:request ] ; - mf:result [ ut:data ] . - :nested_path rdf:type mf:PositiveSyntaxTest11 ; mf:name "A very nested property path" ; mf:action . @@ -136,20 +128,6 @@ [ qt:query ] ; mf:result . -:values_too_many rdf:type mf:NegativeSyntaxTest11 ; - mf:name "Too many values in a VALUE clause compared to the number of variable" ; - mf:action . - -:values_too_few rdf:type mf:NegativeSyntaxTest11 ; - mf:name "Too few values in a VALUE clause compared to the number of variable" ; - mf:action . - -:values_property_path_all rdf:type mf:QueryEvaluationTest ; - mf:name "ZeroOrX property paths should only return terms in the graph and not also terms defined in the query" ; - mf:action - [ qt:query ] ; - mf:result . - :one_or_more_shared rdf:type mf:QueryEvaluationTest ; mf:name "SPARQL one or more with shared variable" ; mf:action @@ -163,3 +141,8 @@ [ qt:query ; qt:data ] ; mf:result . + +:in_empty_error rdf:type mf:QueryEvaluationTest ; + mf:name "IN should propagate errors on the left side, even on the empty input" ; + mf:action [ qt:query ] ; + mf:result . diff --git a/testsuite/oxigraph-tests/sparql/values_property_path_all.rq b/testsuite/oxigraph-tests/sparql/values_property_path_all.rq deleted file mode 100644 index ef4d6c8d..00000000 --- a/testsuite/oxigraph-tests/sparql/values_property_path_all.rq +++ /dev/null @@ -1,4 +0,0 @@ -SELECT * WHERE { - VALUES ?v { 1 } - ?v ? ?v -} \ No newline at end of file diff --git a/testsuite/oxigraph-tests/sparql/values_too_few.rq b/testsuite/oxigraph-tests/sparql/values_too_few.rq deleted file mode 100644 index ceac3c3c..00000000 --- a/testsuite/oxigraph-tests/sparql/values_too_few.rq +++ /dev/null @@ -1 +0,0 @@ -SELECT * WHERE { VALUES (?a ?b) { (1) } } \ No newline at end of file diff --git a/testsuite/oxigraph-tests/sparql/values_too_many.rq b/testsuite/oxigraph-tests/sparql/values_too_many.rq deleted file mode 100644 index e4e3c7ab..00000000 --- a/testsuite/oxigraph-tests/sparql/values_too_many.rq +++ /dev/null @@ -1 +0,0 @@ -SELECT * WHERE { VALUES (?a ?b) { (1 2 3) } } \ No newline at end of file diff --git a/testsuite/rdf-tests b/testsuite/rdf-tests index 52be3f1b..9d70ac92 160000 --- a/testsuite/rdf-tests +++ b/testsuite/rdf-tests @@ -1 +1 @@ -Subproject commit 52be3f1b99a7890ec1266bac7b52be19a85a720c +Subproject commit 9d70ac9298f494bfc3a2becabc8fa8bc3d169685 diff --git a/testsuite/src/manifest.rs b/testsuite/src/manifest.rs index fcf79b36..1b93f244 100644 --- a/testsuite/src/manifest.rs +++ b/testsuite/src/manifest.rs @@ -347,7 +347,7 @@ impl<'a> Iterator for RdfListIterator<'a> { let result = self .graph .object_for_subject_predicate(current, rdf::FIRST) - .map(|v| v.into_owned()); + .map(TermRef::into_owned); self.current_node = match self.graph.object_for_subject_predicate(current, rdf::REST) { Some(TermRef::NamedNode(n)) if n == rdf::NIL => None, diff --git a/testsuite/src/sparql_evaluator.rs b/testsuite/src/sparql_evaluator.rs index 44153c68..4790d175 100644 --- a/testsuite/src/sparql_evaluator.rs +++ b/testsuite/src/sparql_evaluator.rs @@ -287,21 +287,11 @@ fn evaluate_update_evaluation_test(test: &Test) -> Result<()> { } fn load_sparql_query_result(url: &str) -> Result { - if url.ends_with(".srx") { - StaticQueryResults::from_query_results( - QueryResults::read(read_file(url)?, QueryResultsFormat::Xml)?, - false, - ) - } else if url.ends_with(".srj") { - StaticQueryResults::from_query_results( - QueryResults::read(read_file(url)?, QueryResultsFormat::Json)?, - false, - ) - } else if url.ends_with(".tsv") { - StaticQueryResults::from_query_results( - QueryResults::read(read_file(url)?, QueryResultsFormat::Tsv)?, - false, - ) + if let Some(format) = url + .rsplit_once('.') + .and_then(|(_, extension)| QueryResultsFormat::from_extension(extension)) + { + StaticQueryResults::from_query_results(QueryResults::read(read_file(url)?, format)?, false) } else { StaticQueryResults::from_graph(&load_graph(url, guess_graph_format(url)?)?) } @@ -505,7 +495,7 @@ impl StaticQueryResults { fn from_graph(graph: &Graph) -> Result { // Hack to normalize literals - let store = Store::new().unwrap(); + let store = Store::new()?; for t in graph.iter() { store .insert(t.in_graph(GraphNameRef::DefaultGraph)) @@ -617,12 +607,12 @@ fn results_diff(expected: StaticQueryResults, actual: StaticQueryResults) -> Str format_diff( &expected_variables .iter() - .map(|v| v.to_string()) + .map(ToString::to_string) .collect::>() .join("\n"), &actual_variables .iter() - .map(|v| v.to_string()) + .map(ToString::to_string) .collect::>() .join("\n"), "variables",