Merge branch 'main' of github.com:heringerp/oxigraph

pull/825/head
Peter Heringer 1 year ago
commit 8aa63ee91a
  1. 137
      .cargo/config.toml
  2. 6
      .clusterfuzzlite/build.sh
  3. 21
      .devcontainer/Dockerfile
  4. 69
      .devcontainer/devcontainer.json
  5. 27
      .github/actions/setup-rust/action.yml
  6. 11
      .github/workflows/install_rocksdb.sh
  7. 169
      .github/workflows/tests.yml
  8. 7
      .gitmodules
  9. 2
      .readthedocs.yaml
  10. 96
      CHANGELOG.md
  11. 1357
      Cargo.lock
  12. 260
      Cargo.toml
  13. 69
      README.md
  14. 8
      bench/bsbm_oxigraph.sh
  15. 47
      cli/Cargo.toml
  16. 16
      cli/Dockerfile
  17. 54
      cli/README.md
  18. 0
      cli/logo.svg
  19. 1082
      cli/src/main.rs
  20. 0
      cli/templates/query.html
  21. 2
      clippy.toml
  22. 120
      docs/arch-diagram.svg
  23. 35
      docs/arch-diagram.txt
  24. 27
      fuzz/Cargo.toml
  25. 28
      fuzz/fuzz_targets/n3.rs
  26. 84
      fuzz/fuzz_targets/nquads.rs
  27. 37
      fuzz/fuzz_targets/rdf_xml.rs
  28. 25
      fuzz/fuzz_targets/sparql_eval.rs
  29. 2
      fuzz/fuzz_targets/sparql_query.rs
  30. 2
      fuzz/fuzz_targets/sparql_results_json.rs
  31. 2
      fuzz/fuzz_targets/sparql_results_tsv.rs
  32. 2
      fuzz/fuzz_targets/sparql_results_xml.rs
  33. 2
      fuzz/fuzz_targets/sparql_update.rs
  34. 167
      fuzz/fuzz_targets/trig.rs
  35. 24
      fuzz/src/result_format.rs
  36. 26
      js/Cargo.toml
  37. 34
      js/README.md
  38. 3
      js/biome.json
  39. 10
      js/build_package.js
  40. 1079
      js/package-lock.json
  41. 10
      js/package.json
  42. 2
      js/src/lib.rs
  43. 32
      js/src/model.rs
  44. 94
      js/src/store.rs
  45. 18
      js/test/model.mjs
  46. 70
      js/test/store.mjs
  47. 65
      lib/Cargo.toml
  48. 85
      lib/README.md
  49. 72
      lib/oxigraph/Cargo.toml
  50. 77
      lib/oxigraph/README.md
  51. 200
      lib/oxigraph/benches/store.rs
  52. 91
      lib/oxigraph/src/io/format.rs
  53. 39
      lib/oxigraph/src/io/mod.rs
  54. 200
      lib/oxigraph/src/io/read.rs
  55. 185
      lib/oxigraph/src/io/write.rs
  56. 12
      lib/oxigraph/src/lib.rs
  57. 20
      lib/oxigraph/src/model.rs
  58. 119
      lib/oxigraph/src/sparql/algebra.rs
  59. 4
      lib/oxigraph/src/sparql/dataset.rs
  60. 84
      lib/oxigraph/src/sparql/error.rs
  61. 4093
      lib/oxigraph/src/sparql/eval.rs
  62. 14
      lib/oxigraph/src/sparql/http/dummy.rs
  63. 9
      lib/oxigraph/src/sparql/http/mod.rs
  64. 17
      lib/oxigraph/src/sparql/http/simple.rs
  65. 177
      lib/oxigraph/src/sparql/mod.rs
  66. 373
      lib/oxigraph/src/sparql/model.rs
  67. 44
      lib/oxigraph/src/sparql/results.rs
  68. 70
      lib/oxigraph/src/sparql/service.rs
  69. 86
      lib/oxigraph/src/sparql/update.rs
  70. 53
      lib/oxigraph/src/storage/backend/fallback.rs
  71. 4
      lib/oxigraph/src/storage/backend/mod.rs
  72. 168
      lib/oxigraph/src/storage/backend/rocksdb.rs
  73. 35
      lib/oxigraph/src/storage/binary_encoder.rs
  74. 139
      lib/oxigraph/src/storage/error.rs
  75. 161
      lib/oxigraph/src/storage/mod.rs
  76. 55
      lib/oxigraph/src/storage/numeric_encoder.rs
  77. 51
      lib/oxigraph/src/storage/small_string.rs
  78. 6
      lib/oxigraph/src/storage/storage_generator.rs
  79. 0
      lib/oxigraph/src/storage/vg_vocab.rs
  80. 1168
      lib/oxigraph/src/store.rs
  81. 0
      lib/oxigraph/tests/rocksdb_bc_data/000003.log
  82. 0
      lib/oxigraph/tests/rocksdb_bc_data/CURRENT
  83. 0
      lib/oxigraph/tests/rocksdb_bc_data/IDENTITY
  84. 0
      lib/oxigraph/tests/rocksdb_bc_data/LOCK
  85. 0
      lib/oxigraph/tests/rocksdb_bc_data/MANIFEST-000004
  86. 0
      lib/oxigraph/tests/rocksdb_bc_data/OPTIONS-000026
  87. 93
      lib/oxigraph/tests/store.rs
  88. 24
      lib/oxrdf/Cargo.toml
  89. 4
      lib/oxrdf/README.md
  90. 72
      lib/oxrdf/src/blank_node.rs
  91. 386
      lib/oxrdf/src/dataset.rs
  92. 16
      lib/oxrdf/src/graph.rs
  93. 59
      lib/oxrdf/src/interning.rs
  94. 35
      lib/oxrdf/src/literal.rs
  95. 18
      lib/oxrdf/src/named_node.rs
  96. 178
      lib/oxrdf/src/parser.rs
  97. 18
      lib/oxrdf/src/triple.rs
  98. 39
      lib/oxrdf/src/variable.rs
  99. 9
      lib/oxrdf/src/vocab.rs
  100. 36
      lib/oxrdfio/Cargo.toml
  101. Some files were not shown because too many files have changed in this diff Show More

@ -1,137 +0,0 @@
[build]
rustflags = [
"-Wtrivial-casts",
"-Wtrivial-numeric-casts",
"-Wunsafe-code",
"-Wunused-lifetimes",
"-Wunused-qualifications",
# TODO: 1.63+ "-Wclippy::as-underscore",
# TODO: 1.65+ ""-Wclippy::bool-to-int-with-if",
"-Wclippy::borrow-as-ptr",
"-Wclippy::case-sensitive-file-extension-comparisons",
"-Wclippy::cast-lossless",
"-Wclippy::cast-possible-truncation",
"-Wclippy::cast-possible-wrap",
"-Wclippy::cast-precision-loss",
"-Wclippy::cast-ptr-alignment",
"-Wclippy::cast-sign-loss",
"-Wclippy::checked-conversions",
"-Wclippy::clone-on-ref-ptr",
"-Wclippy::cloned-instead-of-copied",
"-Wclippy::copy-iterator",
"-Wclippy::dbg-macro",
"-Wclippy::decimal-literal-representation",
"-Wclippy::default-trait-access",
"-Wclippy::default-union-representation",
# TODO: 1.61+ "-Wclippy::deref-by-slicing",
# TODO: 1.63+ "-Wclippy::doc-link-with-quotes",
# TODO: 1.62+ "-Wclippy::empty-drop",
"-Wclippy::empty-enum",
# TODO: on major version "-Wclippy::empty-structs-with-brackets",
"-Wclippy::enum-glob-use",
"-Wclippy::exit",
"-Wclippy::expect-used",
"-Wclippy::expl-impl-clone-on-copy",
"-Wclippy::explicit-deref-methods",
"-Wclippy::explicit-into-iter-loop",
"-Wclippy::explicit-iter-loop",
"-Wclippy::filter-map-next",
"-Wclippy::flat-map-option",
"-Wclippy::fn-to-numeric-cast-any",
# TODO: 1.62+ "-Wclippy::format-push-string",
"-Wclippy::from-iter-instead-of-collect",
"-Wclippy::get-unwrap",
"-Wclippy::if-not-else",
"-Wclippy::if-then-some-else-none",
"-Wclippy::implicit-clone",
"-Wclippy::inconsistent-struct-constructor",
"-Wclippy::index-refutable-slice",
"-Wclippy::inefficient-to-string",
"-Wclippy::inline-always",
"-Wclippy::inline-asm-x86-att-syntax",
"-Wclippy::inline-asm-x86-intel-syntax",
"-Wclippy::invalid-upcast-comparisons",
"-Wclippy::items-after-statements",
"-Wclippy::large-digit-groups",
# TODO: 1.68+ "-Wclippy::large-futures",
"-Wclippy::large-stack-arrays",
"-Wclippy::large-types-passed-by-value",
"-Wclippy::let-underscore-must-use",
"-Wclippy::let-unit-value",
"-Wclippy::linkedlist",
"-Wclippy::lossy-float-literal",
"-Wclippy::macro-use-imports",
"-Wclippy::manual-assert",
# TODO: 1.65+ "-Wclippy::manual-instant-elapsed",
# TODO: 1.67+ "-Wclippy::manual-let-else",
"-Wclippy::manual-ok-or",
# TODO: 1.65+ "-Wclippy::manual-string-new",
"-Wclippy::many-single-char-names",
"-Wclippy::map-unwrap-or",
"-Wclippy::match-bool",
"-Wclippy::match-same-arms",
"-Wclippy::match-wildcard-for-single-variants",
"-Wclippy::maybe-infinite-iter",
"-Wclippy::mem-forget",
# TODO: 1.63+ "-Wclippy::mismatching-type-param-order",
"-Wclippy::multiple-inherent-impl",
"-Wclippy::mut-mut",
"-Wclippy::mutex-atomic",
"-Wclippy::naive-bytecount",
"-Wclippy::needless-bitwise-bool",
"-Wclippy::needless-continue",
"-Wclippy::needless-pass-by-value",
"-Wclippy::no-effect-underscore-binding",
# TODO: 1.69+ "-Wclippy::no-mangle-with-rust-abi",
"-Wclippy::non-ascii-literal",
"-Wclippy::print-stderr",
"-Wclippy::print-stdout",
"-Wclippy::ptr-as-ptr",
"-Wclippy::range-minus-one",
"-Wclippy::range-plus-one",
"-Wclippy::rc-buffer",
"-Wclippy::rc-mutex",
"-Wclippy::redundant-closure-for-method-calls",
"-Wclippy::redundant-else",
"-Wclippy::redundant-feature-names",
"-Wclippy::ref-binding-to-reference",
"-Wclippy::ref-option-ref",
"-Wclippy::rest-pat-in-fully-bound-structs",
"-Wclippy::return-self-not-must-use",
"-Wclippy::same-functions-in-if-condition",
# TODO: strange failure on 1.60 "-Wclippy::same-name-method",
# TODO: 1.68+ "-Wclippy::semicolon-outside-block",
"-Wclippy::single-match-else",
"-Wclippy::stable-sort-primitive",
"-Wclippy::str-to-string",
"-Wclippy::string-add",
"-Wclippy::string-add-assign",
"-Wclippy::string-lit-as-bytes",
"-Wclippy::string-to-string",
# TODO: 1.67+ "-Wclippy::suspicious-xor-used-as-pow",
"-Wclippy::todo",
"-Wclippy::transmute-ptr-to-ptr",
"-Wclippy::trivially-copy-pass-by-ref",
"-Wclippy::try-err",
"-Wclippy::unicode-not-nfc",
"-Wclippy::unimplemented",
# TODO: 1.66+ "-Wclippy::uninlined-format-args",
# TODO: 1.70+ "-Wclippy::unnecessary-box-returns",
# TODO: 1.61+ "-Wclippy::unnecessary-join",
# TODO: 1.67+ "-Wclippy::unnecessary-safety-comment",
# TODO: 1.67+ "-Wclippy::unnecessary-safety-doc",
"-Wclippy::unnecessary-self-imports",
"-Wclippy::unnecessary-wraps",
"-Wclippy::unneeded-field-pattern",
"-Wclippy::unnested-or-patterns",
"-Wclippy::unreadable-literal",
"-Wclippy::unseparated-literal-suffix",
"-Wclippy::unused-async",
"-Wclippy::unused-self",
"-Wclippy::use-debug",
"-Wclippy::used-underscore-binding",
"-Wclippy::verbose-bit-mask",
"-Wclippy::verbose-file-reads",
"-Wclippy::wildcard-dependencies",
"-Wclippy::zero-sized-map-values",
]

@ -15,10 +15,14 @@ function build_seed_corpus() {
cd "$SRC"/oxigraph cd "$SRC"/oxigraph
cargo fuzz build -O --debug-assertions cargo fuzz build -O --debug-assertions
for TARGET in sparql_eval sparql_results_json sparql_results_tsv # sparql_results_xml https://github.com/tafia/quick-xml/issues/608 for TARGET in sparql_eval sparql_results_json sparql_results_tsv sparql_results_xml n3 nquads trig rdf_xml
do do
cp fuzz/target/x86_64-unknown-linux-gnu/release/$TARGET "$OUT"/ cp fuzz/target/x86_64-unknown-linux-gnu/release/$TARGET "$OUT"/
done done
build_seed_corpus sparql_results_json srj build_seed_corpus sparql_results_json srj
build_seed_corpus sparql_results_tsv tsv build_seed_corpus sparql_results_tsv tsv
build_seed_corpus sparql_results_xml srx build_seed_corpus sparql_results_xml srx
build_seed_corpus n3 n3
build_seed_corpus nquads nq
build_seed_corpus trig trig
build_seed_corpus rdf_xml rdf

@ -1,21 +0,0 @@
# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.241.1/containers/rust/.devcontainer/base.Dockerfile
# [Choice] Debian OS version (use bullseye on local arm64/Apple Silicon): buster, bullseye
ARG VARIANT="bullseye"
FROM mcr.microsoft.com/vscode/devcontainers/rust:0-${VARIANT}
# [Optional] Uncomment this section to install additional packages.
RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
&& apt-get -y install --no-install-recommends \
python3 \
python3-venv \
python-is-python3 \
libclang-dev
ENV VIRTUAL_ENV=/opt/venv
RUN python -m venv $VIRTUAL_ENV
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
RUN pip install --no-cache-dir -r python/requirements.dev.txt
# Change owner to the devcontainer user
RUN chown -R 1000:1000 $VIRTUAL_ENV

@ -1,69 +0,0 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at:
// https://github.com/microsoft/vscode-dev-containers/tree/v0.241.1/containers/rust
{
"name": "Rust",
"build": {
"dockerfile": "Dockerfile",
"args": {
// Use the VARIANT arg to pick a Debian OS version: buster, bullseye
// Use bullseye when on local on arm64/Apple Silicon.
"VARIANT": "bullseye"
}
},
"runArgs": ["--cap-add=SYS_PTRACE", "--security-opt", "seccomp=unconfined"],
// Configure tool-specific properties.
"customizations": {
// Configure properties specific to VS Code.
"vscode": {
// Set *default* container specific settings.json values on container create.
"settings": {
"lldb.executable": "/usr/bin/lldb",
// VS Code don't watch files under ./target
"files.watcherExclude": {
"**/target/**": true
},
"rust-analyzer.checkOnSave.command": "clippy",
"python.defaultInterpreterPath": "/opt/venv/bin/python",
"python.linting.enabled": true,
"python.linting.pylintEnabled": true,
"python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8",
"python.formatting.blackPath": "/usr/local/py-utils/bin/black",
"python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf",
"python.linting.banditPath": "/usr/local/py-utils/bin/bandit",
"python.linting.flake8Path": "/usr/local/py-utils/bin/flake8",
"python.linting.mypyPath": "/usr/local/py-utils/bin/mypy",
"python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle",
"python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle",
"python.linting.pylintPath": "/opt/venv/bin/pylint",
"python.testing.pytestPath": "/opt/venv/bin/pytest"
},
// Add the IDs of extensions you want installed when the container is created.
"extensions": [
"vadimcn.vscode-lldb",
"mutantdino.resourcemonitor",
"rust-lang.rust-analyzer",
"tamasfe.even-better-toml",
"serayuzgur.crates",
"ms-python.python",
"ms-python.vscode-pylance",
"esbenp.prettier-vscode",
"stardog-union.stardog-rdf-grammars"
]
}
},
// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": [],
// Use 'postCreateCommand' to run commands after the container is created.
"postCreateCommand": "git submodule update --init && cargo build",
// Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root.
"remoteUser": "vscode",
"features": {
"python": "3.10"
}
}

@ -0,0 +1,27 @@
name: 'Setup Rust'
description: 'Setup Rust using Rustup'
inputs:
version:
description: 'Rust version to use. By default latest stable version'
required: false
default: 'stable'
component:
description: 'Rust extra component to install like clippy'
required: false
target:
description: 'Rust extra target to install like wasm32-unknown-unknown'
required: false
runs:
using: "composite"
steps:
- run: rustup update
shell: bash
- run: rustup default ${{ inputs.version }}
shell: bash
- run: rustup component add ${{ inputs.component }}
shell: bash
if: ${{ inputs.component }}
- run: rustup target add ${{ inputs.target }}
shell: bash
if: ${{ inputs.target }}
- uses: Swatinem/rust-cache@v2

@ -0,0 +1,11 @@
if [ -f "rocksdb" ]
then
cd rocksdb || exit
else
git clone https://github.com/facebook/rocksdb.git
cd rocksdb || exit
git checkout v8.0.0
make shared_lib
fi
sudo make install-shared
sudo ldconfig /usr/local/lib

@ -4,6 +4,7 @@ on:
pull_request: pull_request:
branches: branches:
- main - main
- next
schedule: schedule:
- cron: "12 3 * * *" - cron: "12 3 * * *"
@ -16,7 +17,9 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- run: rustup update && rustup component add rustfmt - uses: ./.github/actions/setup-rust
with:
component: rustfmt
- run: cargo fmt -- --check - run: cargo fmt -- --check
clippy: clippy:
@ -25,17 +28,38 @@ jobs:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
with: with:
submodules: true submodules: true
- run: rustup update && rustup component add clippy - uses: ./.github/actions/setup-rust
- uses: Swatinem/rust-cache@v2 with:
- run: cargo clippy version: 1.74.1
component: clippy
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/oxsdatatypes working-directory: ./lib/oxsdatatypes
- run: cargo clippy - run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/oxrdf working-directory: ./lib/oxrdf
- run: cargo clippy - run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/oxrdfxml
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/oxttl
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/oxrdfio
- run: cargo clippy --all-targets --features async-tokio -- -D warnings -D clippy::all
working-directory: ./lib/oxrdfio
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/sparesults working-directory: ./lib/sparesults
- run: cargo clippy - run: cargo clippy --all-targets --features async-tokio -- -D warnings -D clippy::all
working-directory: ./lib/sparesults
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/spargebra working-directory: ./lib/spargebra
- run: cargo clippy --all-targets --all-features - run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/sparopt
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/oxigraph
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./python
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./cli
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./testsuite
clippy_msv: clippy_msv:
runs-on: ubuntu-latest runs-on: ubuntu-latest
@ -60,9 +84,8 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- run: rustup update - uses: taiki-e/install-action@v2
- uses: Swatinem/rust-cache@v2 with: { tool: cargo-deny }
- run: cargo install cargo-deny || true
- run: cargo deny check - run: cargo deny check
semver_checks: semver_checks:
@ -71,71 +94,109 @@ jobs:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
with: with:
submodules: true submodules: true
- run: rustup update - uses: ./.github/actions/setup-rust
- uses: Swatinem/rust-cache@v2 - uses: taiki-e/install-action@v2
- run: cargo install cargo-semver-checks || true with: { tool: cargo-semver-checks }
- run: cargo semver-checks check-release --exclude oxrocksdb-sys --exclude oxigraph_js --exclude pyoxigraph --exclude oxigraph_testsuite --exclude oxigraph_server - uses: actions/cache@v3
with:
path: rocksdb
key: ${{ runner.os }}-rocksdb-8.0.0
- run: bash .github/workflows/install_rocksdb.sh
- run: cargo semver-checks check-release --exclude oxrocksdb-sys --exclude oxigraph-js --exclude pyoxigraph --exclude oxigraph-testsuite --exclude oxigraph-cli
test_linux: test_linux_x86_64:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
with: with:
submodules: true submodules: true
- run: rustup update - uses: ./.github/actions/setup-rust
- uses: Swatinem/rust-cache@v2 - run: cargo test
- run: cargo test --all-features
env:
RUST_BACKTRACE: 1
address_sanitizer: test_linux_i686:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
with: with:
submodules: true submodules: true
- run: rustup update && rustup override set nightly - uses: ./.github/actions/setup-rust
- run: sudo apt install -y llvm with:
- uses: Swatinem/rust-cache@v2 target: i686-unknown-linux-gnu
- run: cargo test --tests --target x86_64-unknown-linux-gnu --workspace --exclude pyoxigraph --exclude oxigraph_testsuite --exclude oxigraph_server - run: sudo apt-get update && sudo apt-get install -y g++-multilib
env: - run: cargo test --target i686-unknown-linux-gnu --no-default-features --features http-client-rustls-native
RUST_BACKTRACE: 1 working-directory: ./lib/oxigraph
RUSTFLAGS: -Z sanitizer=address
test_windows: test_linux_msv:
runs-on: windows-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
with: with:
submodules: true submodules: true
- run: rustup update - uses: ./.github/actions/setup-rust
- uses: Swatinem/rust-cache@v2 with:
- run: Remove-Item -LiteralPath "C:\msys64\" -Force -Recurse version: 1.70.0
- run: cargo test --all-features - run: rustup toolchain install nightly
- run: rm Cargo.lock && cargo +nightly update -Z direct-minimal-versions
- run: cargo test
test_linux_latest:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- uses: ./.github/actions/setup-rust
- run: rm Cargo.lock && cargo update
- run: cargo test
test_linux_address_sanitizer:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- uses: ./.github/actions/setup-rust
with:
version: nightly
- run: sudo apt-get update && sudo apt-get install -y llvm
- run: cargo test --tests --target x86_64-unknown-linux-gnu --workspace --exclude pyoxigraph --exclude oxigraph-testsuite --exclude oxigraph-cli
env: env:
RUST_BACKTRACE: 1 RUSTFLAGS: -Z sanitizer=address
rustdoc: test_linux_dynamic_linking:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
with: with:
submodules: true submodules: true
- run: rustup update - uses: ./.github/actions/setup-rust
- uses: Swatinem/rust-cache@v2 - uses: actions/cache@v3
- run: cargo doc --all-features with:
working-directory: ./lib path: rocksdb
key: ${{ runner.os }}-rocksdb-8.0.0
- run: bash .github/workflows/install_rocksdb.sh
- run: cargo test --tests --features oxrocksdb-sys/pkg-config
rustdoc_msrv: test_windows:
runs-on: windows-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- uses: ./.github/actions/setup-rust
- run: Remove-Item -LiteralPath "C:\msys64\" -Force -Recurse
- run: cargo test
rustdoc:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
with: with:
submodules: true submodules: true
- run: rustup update && rustup override set 1.60.0 - uses: ./.github/actions/setup-rust
- uses: Swatinem/rust-cache@v2 with:
- run: cargo doc --all-features version: 1.74.1
working-directory: ./lib - run: cargo doc
env: env:
RUSTDOCFLAGS: -D warnings RUSTDOCFLAGS: -D warnings
@ -143,24 +204,21 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: Swatinem/rust-cache@v2 - uses: taiki-e/install-action@v2
- run: cargo install typos-cli || true with: { tool: typos-cli }
- run: typos - run: typos
clang_fmt: clang_fmt:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- run: sudo apt install -y clang-format - run: sudo apt-get update && sudo apt-get install -y clang-format
- run: clang-format --Werror --dry-run oxrocksdb-sys/api/* - run: clang-format --Werror --dry-run oxrocksdb-sys/api/*
fuzz_changes: fuzz_changes:
if: github.event_name == 'pull_request' if: github.event_name == 'pull_request'
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3
with:
submodules: true
- uses: google/clusterfuzzlite/actions/build_fuzzers@v1 - uses: google/clusterfuzzlite/actions/build_fuzzers@v1
with: with:
language: rust language: rust
@ -170,7 +228,7 @@ jobs:
- uses: google/clusterfuzzlite/actions/run_fuzzers@v1 - uses: google/clusterfuzzlite/actions/run_fuzzers@v1
with: with:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
fuzz-seconds: 600 fuzz-seconds: 900
mode: code-change mode: code-change
sanitizer: address sanitizer: address
minimize-crashes: true minimize-crashes: true
@ -181,9 +239,6 @@ jobs:
if: github.event_name != 'pull_request' if: github.event_name != 'pull_request'
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3
with:
submodules: true
- uses: google/clusterfuzzlite/actions/build_fuzzers@v1 - uses: google/clusterfuzzlite/actions/build_fuzzers@v1
with: with:
language: rust language: rust
@ -192,7 +247,7 @@ jobs:
- uses: google/clusterfuzzlite/actions/run_fuzzers@v1 - uses: google/clusterfuzzlite/actions/run_fuzzers@v1
with: with:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
fuzz-seconds: 3600 fuzz-seconds: 7200
mode: batch mode: batch
sanitizer: address sanitizer: address
minimize-crashes: true minimize-crashes: true

7
.gitmodules vendored

@ -13,3 +13,10 @@
[submodule "oxrocksdb-sys/lz4"] [submodule "oxrocksdb-sys/lz4"]
path = oxrocksdb-sys/lz4 path = oxrocksdb-sys/lz4
url = https://github.com/lz4/lz4.git url = https://github.com/lz4/lz4.git
[submodule "testsuite/N3"]
path = testsuite/N3
url = https://github.com/w3c/N3.git
branch = master
[submodule "testsuite/rdf-canon"]
path = testsuite/rdf-canon
url = https://github.com/w3c/rdf-canon.git

@ -7,7 +7,7 @@ build:
os: "ubuntu-22.04" os: "ubuntu-22.04"
tools: tools:
python: "3" python: "3"
rust: "1.64" rust: "1.70"
apt_packages: apt_packages:
- clang - clang

@ -1,3 +1,99 @@
## [0.4.0-alpha.3] - 2024-01-25
### Added
- `oxttl`: expose base IRIs.
- `oxttl`: allows to inject prefixes for serialization.
- `oxrdf`: `vocab::geosparql::WKT_LITERAL`.
### Changed
- Turtle: Fixes parsing bug with escaped dot at the end of local name.
- `oxttl`: Changes `prefixes` getter return type.
- JS: simplify build.
- Python: uses rustls by default all platforms that are not Windows/macOS/iOS/WASM.
- Strips debug info of the Rust std library in release build.
## [0.4.0-alpha.2] - 2024-01-08
### Added
- i686 linux support
### Changed
- Docker: fixes Docker image Glib version error.
- Docker: tags now use semver e.g. `0.3.22` and not `v0.3.22`. Preversions are also not tagged `latest` anymore.
- Python: `QuerySolution` is now thread safe.
## [0.4.0-alpha.1] - 2024-01-03
### Added
- `sparopt` crate: A new still quite naive query optimizer.
- `oxttl` crate: A N-Triples/N-Quads/Turtle/TriG/N3 parser and serializer compatible with Tokio.
- `oxrdfxml` crate: A RDF/XML parser and serializer compatible with Tokio.
- `oxrdfio` crate: A stand-alone crate with oxigraph I/O related APIs.
- Rust: SPARQL results I/O is now exposed in the `oxigraph` crate (`oxigraph::sparql::results` module).
- Rust: It is now possible to dynamically link rocksdb with the `rocksdb-pkg-config` feature.
- Python: error location is now included in some `SyntaxError` exceptions.
- Python: the file type can be guessed from the file path extension during parsing and serialization.
- Python: the serialization method returns a `bytes` value if no output-related argument is given.
- Python: SPARQL query results I/O is now exposed (`parse_query_results` function and `.serialize` method).
- Python: `RdfFormat` and `QueryResultsFormat` enum to encode supported formats.
- CLI: a `convert` command to convert RDF file between different formats.
### Removed
- Rust: automated flush at the end of serialization. This should be done explicitly now.
- oxsdatatypes: Deprecated methods.
- Python: 3.7 and Musl linux 1.1 support.
- Python: `GraphName.value`.
### Changed
- SPARQL: a digit after `.` is now required for `xsd:decimal`.
- SPARQL: calendar subtraction returns `xsd:dayTimeDuration` and not `xsd:duration`.
- SPARQL: Unicode escapes (`\u` and `\U`) are now only supported in IRIs and strings and not everywhere.
- Literal serialization now produces canonical N-Triples according to the RDF 1.2 and RDF Dataset Canonicalization drafts
- Rust: MSRV is now 1.70.
- Rust Makes `GraphName` implement `Default`.
- Rust: `wasm32-unknown-unknown` does not assumes JS platform by default. Enable the `js` feature for that.
- Rust: Parsers take `Read` and not `BufRead` for input.
- Rust: `GraphFormat` and `DatasetFormat` have been merged into `RdfFormat`.
- Rust: `GraphParser` and `DatasetParser` have been merged into `RdfParser`.
- Rust: `GraphSerializer` and `DatasetSerializer` have been merged into `RdfSerializer`.
- Rust: query results are now `Send` and `Sync`.
- Rust: `Store.load_graph` and `Store.load_dataset` have been merged into a `load_from_read` method.
- Rust: `Store.dump_graph` and `Store.dump_dataset` have been renamed to `dump_graph_to_write` and `dump_to_write`.
- Rust: `BulkLoader.set_*` methods have been renamed to `BulkLoader.with_*`.
- oxsdatatypes: pass by-values instead of by-reference parameters when relevant.
- oxsdatatypes: error types have been redesigned.
- oxsdatatypes: return an error when building not serializable duration (year-month and day-time of opposite signs).
- sparesults: renames some methods to move closer to the new oxrdfio crate.
- Python: raise `OSError` instead of `IOError` on OS errors.
- Python: the `mime_type` parameter have been renamed to `format`.
- Python: boolean SPARQL results are now encoded with a `QueryBoolean` class and not a simple `bool`.
- Python: a `path` parameter has been added to all I/O method to read from a file.
The existing `input` parameter now consider `str` values to be a serialization to parse.
- JS: the `mime_type` parameter have been renamed to `format`.
- CLI: the `oxigraph_server` binary has been renamed to `oxigraph`.
- CLI: the `--location` argument is now part of sub-commands where it is relevant.
`oxigraph_server --location foo serve` is not possible anymore.
One need to write `oxigraph serve --location foo`.
- CLI: is is now possible to upload gzip encoded files to the HTTP API with the `Content-Encoding: gzip` header.
## [0.3.22] - 2023-11-29
### Changed
- Allows to compile with more recent `bindgen` and `cc`
- Fixes compatibility with `spin_no_std` feature of `lazy_static`
## [0.3.21] - 2023-11-29
### Changed
- Bulk loader: do not fail when loading empty files.
- Python: fixes source distribution.
- Upgrades RocksDB to 7.8.1.
## [0.3.20] - 2023-10-23 ## [0.3.20] - 2023-10-23
### Changed ### Changed

1357
Cargo.lock generated

File diff suppressed because it is too large Load Diff

@ -1,22 +1,274 @@
[workspace] [workspace]
members = [ members = [
"cli",
"js", "js",
"lib", "lib/oxigraph",
"lib/oxrdf", "lib/oxrdf",
"lib/oxrdfio",
"lib/oxrdfxml",
"lib/oxsdatatypes", "lib/oxsdatatypes",
"lib/spargebra", "lib/oxttl",
"lib/sparesults", "lib/sparesults",
"lib/spargebra",
"lib/sparopt",
"lib/sparql-smith", "lib/sparql-smith",
"oxrocksdb-sys", "oxrocksdb-sys",
"python", "python",
"server",
"testsuite" "testsuite"
] ]
resolver = "2" resolver = "2"
[workspace.package]
version = "0.4.0-alpha.3-dev"
authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license = "MIT OR Apache-2.0"
edition = "2021"
rust-version = "1.70"
[workspace.dependencies]
anyhow = "1.0.72"
arbitrary = "1.3"
assert_cmd = "2.0"
assert_fs = "1.0"
bindgen = ">=0.60, <0.70"
cc = "1.0.73"
clap = "4.0"
codspeed-criterion-compat = "2.3.3"
console_error_panic_hook = "0.1.7"
digest = "0.10"
escargot = "0.5"
flate2 = "1.0"
getrandom = "0.2.8"
hex = "0.4"
js-sys = "0.3.60"
json-event-parser = "0.2.0-alpha.2"
libc = "0.2.147"
md-5 = "0.10"
memchr = "2.5"
oxhttp = "0.2.0-alpha.3"
oxilangtag = "0.1"
oxiri = "0.2.3-alpha.1"
peg = "0.8"
pkg-config = "0.3.25"
predicates = ">=2.0, <4.0"
pyo3 = "0.20.1"
quick-xml = ">=0.29, <0.32"
rand = "0.8"
rayon-core = "1.11"
regex = "1.7"
sha1 = "0.10"
sha2 = "0.10"
siphasher = ">=0.3, <2.0"
text-diff = "0.4"
thiserror = "1.0.50"
time = "0.3"
tokio = "1.29"
url = "2.4"
wasm-bindgen = "0.2.83"
zstd = ">=0.12, <0.14"
# Internal dependencies
oxigraph = { version = "0.4.0-alpha.3-dev", path = "lib/oxigraph" }
oxrdf = { version = "0.2.0-alpha.2", path = "lib/oxrdf" }
oxrdfio = { version = "0.1.0-alpha.3-dev", path = "lib/oxrdfio" }
oxrdfxml = { version = "0.1.0-alpha.3-dev", path = "lib/oxrdfxml" }
oxrocksdb-sys = { version = "0.4.0-alpha.3-dev", path = "./oxrocksdb-sys" }
oxsdatatypes = { version = "0.2.0-alpha.1", path = "lib/oxsdatatypes" }
oxttl = { version = "0.1.0-alpha.3-dev", path = "lib/oxttl" }
sparesults = { version = "0.2.0-alpha.2", path = "lib/sparesults" }
spargebra = { version = "0.3.0-alpha.2", path = "lib/spargebra" }
sparopt = { version = "0.1.0-alpha.2", path = "lib/sparopt" }
sparql-smith = { version = "0.1.0-alpha.5", path = "lib/sparql-smith" }
[workspace.lints.rust]
absolute_paths_not_starting_with_crate = "warn"
elided_lifetimes_in_paths = "warn"
explicit_outlives_requirements = "warn"
let_underscore_drop = "warn"
macro_use_extern_crate = "warn"
# TODO missing_docs = "warn"
trivial_casts = "warn"
trivial_numeric_casts = "warn"
unsafe_code = "warn"
unused_import_braces = "warn"
unused_lifetimes = "warn"
unused_macro_rules = "warn"
unused_qualifications = "warn"
[workspace.lints.clippy]
allow_attributes = "warn"
allow_attributes_without_reason = "warn"
as_underscore = "warn"
assertions_on_result_states = "warn"
bool_to_int_with_if = "warn"
borrow_as_ptr = "warn"
case_sensitive_file_extension_comparisons = "warn"
cast_lossless = "warn"
cast_possible_truncation = "warn"
cast_possible_wrap = "warn"
cast_precision_loss = "warn"
cast_ptr_alignment = "warn"
cast_sign_loss = "warn"
checked_conversions = "warn"
clone_on_ref_ptr = "warn"
cloned_instead_of_copied = "warn"
copy_iterator = "warn"
create_dir = "warn"
dbg_macro = "warn"
decimal_literal_representation = "warn"
default_trait_access = "warn"
default_union_representation = "warn"
deref_by_slicing = "warn"
disallowed_script_idents = "warn"
doc_link_with_quotes = "warn"
empty_drop = "warn"
empty_enum = "warn"
empty_structs_with_brackets = "warn"
enum_glob_use = "warn"
error_impl_error = "warn"
exit = "warn"
expect_used = "warn"
expl_impl_clone_on_copy = "warn"
explicit_deref_methods = "warn"
explicit_into_iter_loop = "warn"
explicit_iter_loop = "warn"
filetype_is_file = "warn"
filter_map_next = "warn"
flat_map_option = "warn"
fn_params_excessive_bools = "warn"
fn_to_numeric_cast_any = "warn"
format_push_string = "warn"
from_iter_instead_of_collect = "warn"
get_unwrap = "warn"
host_endian_bytes = "warn"
if_not_else = "warn"
if_then_some_else_none = "warn"
ignored_unit_patterns = "warn"
implicit_clone = "warn"
implicit_hasher = "warn"
inconsistent_struct_constructor = "warn"
index_refutable_slice = "warn"
inefficient_to_string = "warn"
inline_always = "warn"
inline_asm_x86_att_syntax = "warn"
inline_asm_x86_intel_syntax = "warn"
invalid_upcast_comparisons = "warn"
items_after_statements = "warn"
iter_not_returning_iterator = "warn"
large_digit_groups = "warn"
large_futures = "warn"
large_include_file = "warn"
large_stack_arrays = "warn"
large_types_passed_by_value = "warn"
let_underscore_must_use = "warn"
let_underscore_untyped = "warn"
linkedlist = "warn"
lossy_float_literal = "warn"
macro_use_imports = "warn"
manual_assert = "warn"
manual_instant_elapsed = "warn"
manual_let_else = "warn"
manual_ok_or = "warn"
manual_string_new = "warn"
many_single_char_names = "warn"
map_unwrap_or = "warn"
match_bool = "warn"
match_on_vec_items = "warn"
match_same_arms = "warn"
match_wild_err_arm = "warn"
match_wildcard_for_single_variants = "warn"
maybe_infinite_iter = "warn"
mem_forget = "warn"
mismatching_type_param_order = "warn"
missing_assert_message = "warn"
missing_asserts_for_indexing = "warn"
missing_enforced_import_renames = "warn"
missing_fields_in_debug = "warn"
multiple_inherent_impl = "warn"
mut_mut = "warn"
mutex_atomic = "warn"
naive_bytecount = "warn"
needless_bitwise_bool = "warn"
needless_continue = "warn"
needless_for_each = "warn"
needless_pass_by_value = "warn"
needless_raw_strings = "warn"
negative_feature_names = "warn"
no_effect_underscore_binding = "warn"
no_mangle_with_rust_abi = "warn"
non_ascii_literal = "warn"
panic = "warn"
panic_in_result_fn = "warn"
partial_pub_fields = "warn"
print_stderr = "warn"
print_stdout = "warn"
ptr_as_ptr = "warn"
ptr_cast_constness = "warn"
pub_without_shorthand = "warn"
range_minus_one = "warn"
range_plus_one = "warn"
rc_buffer = "warn"
rc_mutex = "warn"
redundant_closure_for_method_calls = "warn"
redundant_else = "warn"
redundant_feature_names = "warn"
redundant_type_annotations = "warn"
ref_binding_to_reference = "warn"
ref_option_ref = "warn"
ref_patterns = "warn"
rest_pat_in_fully_bound_structs = "warn"
return_self_not_must_use = "warn"
same_functions_in_if_condition = "warn"
same_name_method = "warn"
semicolon_inside_block = "warn"
shadow_same = "warn"
should_panic_without_expect = "warn"
single_match_else = "warn"
stable_sort_primitive = "warn"
str_to_string = "warn"
string_add = "warn"
string_add_assign = "warn"
string_lit_chars_any = "warn"
string_to_string = "warn"
struct_excessive_bools = "warn"
suspicious_xor_used_as_pow = "warn"
tests_outside_test_module = "warn"
todo = "warn"
transmute_ptr_to_ptr = "warn"
trivially_copy_pass_by_ref = "warn"
try_err = "warn"
unchecked_duration_subtraction = "warn"
undocumented_unsafe_blocks = "warn"
unicode_not_nfc = "warn"
unimplemented = "warn"
uninlined_format_args = "warn"
unnecessary_box_returns = "warn"
unnecessary_join = "warn"
unnecessary_safety_comment = "warn"
unnecessary_safety_doc = "warn"
unnecessary_self_imports = "warn"
unnecessary_wraps = "warn"
unneeded_field_pattern = "warn"
unnested_or_patterns = "warn"
unreadable_literal = "warn"
unsafe_derive_deserialize = "warn"
unseparated_literal_suffix = "warn"
unused_async = "warn"
unused_self = "warn"
unwrap_in_result = "warn"
use_debug = "warn"
used_underscore_binding = "warn"
verbose_bit_mask = "warn"
verbose_file_reads = "warn"
wildcard_dependencies = "warn"
zero_sized_map_values = "warn"
[profile.release] [profile.release]
lto = true lto = true
codegen-units = 1 codegen-units = 1
strip = "debuginfo"
[profile.release.package.oxigraph_js] [profile.release.package.oxigraph-js]
codegen-units = 1
opt-level = "z" opt-level = "z"
strip = "debuginfo"

@ -1,44 +1,22 @@
# Oxigraph # Oxigraph-handlegraph
[![Latest Version](https://img.shields.io/crates/v/oxigraph.svg)](https://crates.io/crates/oxigraph) [Oxigraph](https://github.com/oxigraph/oxigraph) is a graph database implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard.
[![Released API docs](https://docs.rs/oxigraph/badge.svg)](https://docs.rs/oxigraph)
[![PyPI](https://img.shields.io/pypi/v/pyoxigraph)](https://pypi.org/project/pyoxigraph/)
[![npm](https://img.shields.io/npm/v/oxigraph)](https://www.npmjs.com/package/oxigraph)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![dependency status](https://deps.rs/repo/github/oxigraph/oxigraph/status.svg)](https://deps.rs/repo/github/oxigraph/oxigraph)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
[![Twitter URL](https://img.shields.io/twitter/url?style=social&url=https%3A%2F%2Ftwitter.com%2Foxigraph)](https://twitter.com/oxigraph)
Oxigraph is a graph database implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard. This is a fork of oxigraph that includes [rs-handlegraph](https://github.com/chfi/rs-handlegraph).
The purpose is to make pangenomic GFA-files accessible with SPARQL queries.
Its goal is to provide a compliant, safe, and fast graph database based on the [RocksDB](https://rocksdb.org/) key-value store. Also, some parts of Oxigraph are available as standalone Rust crates:
It is written in Rust. * [`oxrdf`](https://crates.io/crates/oxrdf), datastructures encoding RDF basic concepts (the [`oxigraph::model`](crate::model) module).
It also provides a set of utility functions for reading, writing, and processing RDF files. * [`oxrdfio`](https://crates.io/crates/oxrdfio), a unified parser and serializer API for RDF formats (the [`oxigraph::io`](crate::io) module). It itself relies on:
* [`oxttl`](https://crates.io/crates/oxttl), N-Triple, N-Quad, Turtle, TriG and N3 parsing and serialization.
* [`oxrdfxml`](https://crates.io/crates/oxrdfxml), RDF/XML parsing and serialization.
* [`spargebra`](https://crates.io/crates/spargebra), a SPARQL parser.
* [`sparesults`](https://crates.io/crates/sparesults), parsers and serializers for SPARQL result formats.
* [`sparopt`](https://crates.io/crates/sparesults), a SPARQL optimizer.
* [`oxsdatatypes`](https://crates.io/crates/oxsdatatypes), an implementation of some XML Schema datatypes.
Oxigraph is in heavy development and SPARQL query evaluation has not been optimized yet. The library layers in Oxigraph. The elements above depend on the elements below:
The development roadmap is using [GitHub milestones](https://github.com/oxigraph/oxigraph/milestones?direction=desc&sort=completeness&state=open). ![Oxigraph libraries architecture diagram](./docs/arch-diagram.svg)
Oxigraph internal design [is described on the wiki](https://github.com/oxigraph/oxigraph/wiki/Architecture).
The future 0.4 release is currently in development in the [next branch](https://github.com/oxigraph/oxigraph/tree/next).
It is split into multiple parts:
- [The database written as a Rust library](https://crates.io/crates/oxigraph). Its source code is in the `lib` directory.
[![Latest Version](https://img.shields.io/crates/v/oxigraph.svg)](https://crates.io/crates/oxigraph)
[![Released API docs](https://docs.rs/oxigraph/badge.svg)](https://docs.rs/oxigraph)
- [`pyoxigraph` that exposes Oxigraph to the Python world](https://pyoxigraph.readthedocs.io/). Its source code is in the `python` directory. [![PyPI](https://img.shields.io/pypi/v/pyoxigraph)](https://pypi.org/project/pyoxigraph/)
- [JavaScript bindings for Oxigraph](https://www.npmjs.com/package/oxigraph). WebAssembly is used to package Oxigraph into a NodeJS compatible NPM package. Its source code is in the `js` directory.
[![npm](https://img.shields.io/npm/v/oxigraph)](https://www.npmjs.com/package/oxigraph)
- [Oxigraph server](https://crates.io/crates/oxigraph_server) that provides a standalone binary of a web server implementing the [SPARQL 1.1 Protocol](https://www.w3.org/TR/sparql11-protocol/) and the [SPARQL 1.1 Graph Store Protocol](https://www.w3.org/TR/sparql11-http-rdf-update/). Its source code is in the `server` directory.
[![Latest Version](https://img.shields.io/crates/v/oxigraph_server.svg)](https://crates.io/crates/oxigraph_server)
Oxigraph implements the following specifications:
- [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/), [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/), and [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/).
- [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/), and [RDF XML](https://www.w3.org/TR/rdf-syntax-grammar/) RDF serialization formats for both data ingestion and retrieval using the [Rio library](https://github.com/oxigraph/rio).
- [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/).
A preliminary benchmark [is provided](bench/README.md). There is also [a document describing Oxigraph technical architecture](https://github.com/oxigraph/oxigraph/wiki/Architecture).
When cloning this codebase, don't forget to clone the submodules using When cloning this codebase, don't forget to clone the submodules using
`git clone --recursive https://github.com/oxigraph/oxigraph.git` to clone the repository including submodules or `git clone --recursive https://github.com/oxigraph/oxigraph.git` to clone the repository including submodules or
@ -47,10 +25,8 @@ When cloning this codebase, don't forget to clone the submodules using
## Help ## Help
Feel free to use [GitHub discussions](https://github.com/oxigraph/oxigraph/discussions) or [the Gitter chat](https://gitter.im/oxigraph/community) to ask questions or talk about Oxigraph. Feel free to ask [heringerp](https://github.com/heringerp) for help.
[Bug reports](https://github.com/oxigraph/oxigraph/issues) are also very welcome. [Bug reports](https://github.com/heringerp/oxigraph/issues) are also very welcome.
If you need advanced support or are willing to pay to get some extra features, feel free to reach out to [Tpt](https://github.com/Tpt/).
## License ## License
@ -67,14 +43,3 @@ at your option.
### Contribution ### Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.
## Sponsors
* [RelationLabs](https://relationlabs.ai/) that is building [Relation-Graph](https://github.com/relationlabs/Relation-Graph), a SPARQL database module for the [Substrate blockchain platform](https://substrate.io/) based on Oxigraph.
* [Field 33](https://field33.com) that is building [an ontology management plateform](https://plow.pm/).
* [Magnus Bakken](https://github.com/magbak) who is building [chrontext](https://github.com/magbak/chrontext), providing a SPARQL query endpoint on top of joint RDF and time series databases.
* [ACE IoT Solutions](https://aceiotsolutions.com/), a building IOT platform.
* [Albin Larsson](https://byabbe.se/) who is building [GovDirectory](https://www.govdirectory.org/), a directory of public agencies based on Wikidata.
And [others](https://github.com/sponsors/Tpt). Many thanks to them!

@ -6,10 +6,10 @@ PARALLELISM=16
set -eu set -eu
cd bsbm-tools cd bsbm-tools
./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}" ./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}"
cargo build --release --manifest-path="../../server/Cargo.toml" cargo build --release --manifest-path="../../cli/Cargo.toml"
VERSION=$(./../../target/release/oxigraph_server --version | sed 's/oxigraph_server //g') VERSION=$(./../../target/release/oxigraph --version | sed 's/oxigraph //g')
./../../target/release/oxigraph_server --location oxigraph_data load --file "explore-${DATASET_SIZE}.nt" ./../../target/release/oxigraph --location oxigraph_data load --file "explore-${DATASET_SIZE}.nt"
./../../target/release/oxigraph_server --location oxigraph_data serve --bind 127.0.0.1:7878 & ./../../target/release/oxigraph --location oxigraph_data serve --bind 127.0.0.1:7878 &
sleep 1 sleep 1
./testdriver -mt ${PARALLELISM} -ucf usecases/explore/sparql.txt -o "../bsbm.explore.oxigraph.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://127.0.0.1:7878/query ./testdriver -mt ${PARALLELISM} -ucf usecases/explore/sparql.txt -o "../bsbm.explore.oxigraph.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://127.0.0.1:7878/query
./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.oxigraph.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://127.0.0.1:7878/query -u http://127.0.0.1:7878/update -udataset "explore-update-${DATASET_SIZE}.nt" ./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.oxigraph.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://127.0.0.1:7878/query -u http://127.0.0.1:7878/update -udataset "explore-update-${DATASET_SIZE}.nt"

@ -0,0 +1,47 @@
[package]
name = "oxigraph-cli"
version.workspace = true
authors.workspace = true
license.workspace = true
readme = "README.md"
keywords = ["RDF", "SPARQL", "graph-database", "database"]
categories = ["command-line-utilities", "database"]
repository = "https://github.com/oxigraph/oxigraph/tree/main/cli"
homepage = "https://oxigraph.org/cli/"
description = """
Oxigraph command line toolkit and SPARQL HTTP server
"""
edition.workspace = true
rust-version.workspace = true
[[bin]]
name = "oxigraph"
path = "src/main.rs"
doc = false
[features]
default = ["native-tls"]
native-tls = ["oxigraph/http-client-native-tls"]
rocksdb-pkg-config = ["oxigraph/rocksdb-pkg-config"]
rustls-native = ["oxigraph/http-client-rustls-native"]
rustls-webpki = ["oxigraph/http-client-rustls-webpki"]
[dependencies]
anyhow.workspace = true
clap = { workspace = true, features = ["derive"] }
flate2.workspace = true
oxhttp = { workspace = true, features = ["flate2"] }
oxigraph.workspace = true
oxiri.workspace = true
rand.workspace = true
rayon-core.workspace = true
url.workspace = true
[dev-dependencies]
assert_cmd.workspace = true
assert_fs.workspace = true
escargot.workspace = true
predicates.workspace = true
[lints]
workspace = true

@ -1,4 +1,4 @@
FROM --platform=$BUILDPLATFORM rust:1-bullseye as builder FROM --platform=$BUILDPLATFORM rust:1-bookworm as builder
ARG BUILDARCH TARGETARCH ARG BUILDARCH TARGETARCH
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y libclang-dev clang && \ apt-get install -y libclang-dev clang && \
@ -8,18 +8,18 @@ RUN apt-get update && \
rustup target add aarch64-unknown-linux-gnu ; \ rustup target add aarch64-unknown-linux-gnu ; \
fi fi
COPY . /oxigraph COPY . /oxigraph
WORKDIR /oxigraph/server WORKDIR /oxigraph/cli
RUN if [ "$BUILDARCH" != "$TARGETARCH" ] && [ "$TARGETARCH" = "arm64" ] ; \ RUN if [ "$BUILDARCH" != "$TARGETARCH" ] && [ "$TARGETARCH" = "arm64" ] ; \
then \ then \
export CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc && \ export CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc && \
export BINDGEN_EXTRA_CLANG_ARGS="--sysroot /usr/aarch64-linux-gnu" && \ export BINDGEN_EXTRA_CLANG_ARGS="--sysroot /usr/aarch64-linux-gnu" && \
cargo build --release --target aarch64-unknown-linux-gnu && \ cargo build --release --target aarch64-unknown-linux-gnu --no-default-features --features rustls-webpki && \
mv /oxigraph/target/aarch64-unknown-linux-gnu/release/oxigraph_server /oxigraph/target/release/oxigraph_server ; \ mv /oxigraph/target/aarch64-unknown-linux-gnu/release/oxigraph /oxigraph/target/release/oxigraph ; \
else \ else \
cargo build --release ; \ cargo build --release --no-default-features --features rustls-webpki ; \
fi fi
FROM --platform=$TARGETPLATFORM gcr.io/distroless/cc-debian11 FROM --platform=$TARGETPLATFORM gcr.io/distroless/cc-debian12
COPY --from=builder /oxigraph/target/release/oxigraph_server /usr/local/bin/oxigraph_server COPY --from=builder /oxigraph/target/release/oxigraph /usr/local/bin/oxigraph
ENTRYPOINT [ "/usr/local/bin/oxigraph_server" ] ENTRYPOINT [ "/usr/local/bin/oxigraph" ]
CMD [ "serve", "--location", "/data", "--bind", "0.0.0.0:7878" ] CMD [ "serve", "--location", "/data", "--bind", "0.0.0.0:7878" ]

@ -1,21 +1,19 @@
Oxigraph Server Oxigraph CLI
=============== ============
[![Latest Version](https://img.shields.io/crates/v/oxigraph_server.svg)](https://crates.io/crates/oxigraph_server) [![Latest Version](https://img.shields.io/crates/v/oxigraph-cli.svg)](https://crates.io/crates/oxigraph-cli)
[![Crates.io downloads](https://img.shields.io/crates/d/oxigraph_server)](https://crates.io/crates/oxigraph_server) [![Crates.io downloads](https://img.shields.io/crates/d/oxigraph-cli)](https://crates.io/crates/oxigraph-cli)
[![Conda](https://img.shields.io/conda/vn/conda-forge/oxigraph-server)](https://anaconda.org/conda-forge/oxigraph-server) [![Conda](https://img.shields.io/conda/vn/conda-forge/oxigraph-server)](https://anaconda.org/conda-forge/oxigraph-server)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) [![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) [![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community)
Oxigraph Server is a standalone HTTP server providing a graph database implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard. Oxigraph CLI is a graph database implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard.
It is packaged as a command line tool allowing to manipulate an RDF files, query them using SPARQL...
Its goal is to provide a compliant, safe, and fast graph database based on the [RocksDB](https://rocksdb.org/) key-value store. It also allows to spawn a HTTP server on top of the database.
It is written in Rust.
It also provides a set of utility functions for reading, writing, and processing RDF files.
Oxigraph is in heavy development and SPARQL query evaluation has not been optimized yet. Oxigraph is in heavy development and SPARQL query evaluation has not been optimized yet.
Oxigraph provides different installation methods for Oxigraph server: Oxigraph provides different installation methods for Oxigraph CLI:
* [`cargo install`](#installation) (multiplatform) * [`cargo install`](#installation) (multiplatform)
* [A Docker image](#using-a-docker-image) * [A Docker image](#using-a-docker-image)
* [A Homebrew formula](#homebrew) * [A Homebrew formula](#homebrew)
@ -32,19 +30,29 @@ Oxigraph implements the following specifications:
A preliminary benchmark [is provided](../bench/README.md). A preliminary benchmark [is provided](../bench/README.md).
Note that Oxigraph CLI was previously named Oxigraph Server before version 0.4. Older versions are available under [this name](https://crates.io/crates/oxigraph_server).
## Installation ## Installation
You need to have [a recent stable version of Rust and Cargo installed](https://www.rust-lang.org/tools/install). You need to have [a recent stable version of Rust and Cargo installed](https://www.rust-lang.org/tools/install).
To download, build and install the latest released version run `cargo install oxigraph_server`. To download, build and install the latest released version run `cargo install oxigraph-cli`.
There is no need to clone the git repository. There is no need to clone the git repository.
To compile the server from source, clone this git repository including its submodules (`git clone --recursive https://github.com/oxigraph/oxigraph.git`), and execute `cargo build --release` in the `server` directory to compile the full server after having downloaded its dependencies. To compile the command line tool from source, clone this git repository including its submodules (`git clone --recursive https://github.com/oxigraph/oxigraph.git`), and execute `cargo build --release` in the `cli` directory to compile the full binary after having downloaded its dependencies.
It will create a fat binary in `target/release/oxigraph_server`. It will create a fat binary in `target/release/oxigraph`.
Some build options (cargo features) are available:
- `rocksdb-pkg-config`: links against an already compiled rocksdb shared library found using [pkg-config](https://crates.io/crates/pkg-config).
- `native-tls`: Enables Oxigraph HTTP client for query federation using the host OS TLS stack (enabled by default).
- `rustls-native` Enables Oxigraph HTTP client for query federation using [Rustls](https://crates.io/crates/rustls) and the native certificates.
- `rustls-webpki` Enables Oxigraph HTTP client for query federation using [Rustls](https://crates.io/crates/rustls) and the [Common CA Database](https://www.ccadb.org/) certificates.
## Usage ## Usage
Run `oxigraph_server --location my_data_storage_directory serve` to start the server where `my_data_storage_directory` is the directory where you want Oxigraph data to be stored. It listens by default on `localhost:7878`. Run `oxigraph serve --location my_data_storage_directory` to start the server where `my_data_storage_directory` is the directory where you want Oxigraph data to be stored. It listens by default on `localhost:7878`.
The server provides an HTML UI, based on [YASGUI](https://yasgui.triply.cc), with a form to execute SPARQL requests. The server provides an HTML UI, based on [YASGUI](https://yasgui.triply.cc), with a form to execute SPARQL requests.
@ -78,10 +86,10 @@ It provides the following REST actions:
``` ```
will add the N-Quads file `MY_FILE.nq` to the server dataset. will add the N-Quads file `MY_FILE.nq` to the server dataset.
Use `oxigraph_server --help` to see the possible options when starting the server. Use `oxigraph --help` to see the possible options when starting the server.
It is also possible to load RDF data offline using bulk loading: It is also possible to load RDF data offline using bulk loading:
`oxigraph_server --location my_data_storage_directory load --file my_file.nq` `oxigraph load --location my_data_storage_directory --file my_file.nq`
## Using a Docker image ## Using a Docker image
@ -93,7 +101,7 @@ docker run --rm ghcr.io/oxigraph/oxigraph --help
### Run the Webserver ### Run the Webserver
Expose the server on port `7878` of the host machine, and save data on the local `./data` folder Expose the server on port `7878` of the host machine, and save data on the local `./data` folder
```sh ```sh
docker run --rm -v $PWD/data:/data -p 7878:7878 ghcr.io/oxigraph/oxigraph --location /data serve --bind 0.0.0.0:7878 docker run --rm -v $PWD/data:/data -p 7878:7878 ghcr.io/oxigraph/oxigraph serve --location /data --bind 0.0.0.0:7878
``` ```
You can then access it from your machine on port `7878`: You can then access it from your machine on port `7878`:
@ -217,14 +225,14 @@ To install Oxigraph server using Homebrew do:
brew tap oxigraph/oxigraph brew tap oxigraph/oxigraph
brew install oxigraph brew install oxigraph
``` ```
It installs the `oxigraph_server` binary. [See the usage documentation to know how to use it](#usage). It installs the `oxigraph` binary. [See the usage documentation to know how to use it](#usage).
## Systemd ## Systemd
It is possible to run Oxigraph in the background using systemd. It is possible to run Oxigraph in the background using systemd.
For that, you can use the following `oxigraph_server.service` file (it might be inserted into `/etc/systemd/system/` or `$HOME/.config/systemd/user`): For that, you can use the following `oxigraph.service` file (it might be inserted into `/etc/systemd/system/` or `$HOME/.config/systemd/user`):
```ini ```ini
[Unit] [Unit]
Description=Oxigraph database server Description=Oxigraph database server
@ -233,7 +241,7 @@ Wants=network-online.target
[Service] [Service]
Type=notify Type=notify
ExecStart=/PATH/TO/oxigraph_server serve --location /PATH/TO/OXIGRAPH/DATA ExecStart=/PATH/TO/oxigraph serve --location /PATH/TO/OXIGRAPH/DATA
[Install] [Install]
WantedBy=multi-user.target WantedBy=multi-user.target
@ -242,8 +250,8 @@ WantedBy=multi-user.target
## Migration guide ## Migration guide
### From 0.2 to 0.3 ### From 0.2 to 0.3
* The cli API has been completely rewritten. To start the server run `oxigraph_server serve --location MY_STORAGE` instead of `oxigraph_server --file MY_STORAGE`. * The cli API has been completely rewritten. To start the server run `oxigraph serve --location MY_STORAGE` instead of `oxigraph --file MY_STORAGE`.
* Fast data bulk loading is not supported using `oxigraph_server load --location MY_STORAGE --file MY_FILE`. The file format is guessed from the extension (`.nt`, `.ttl`, `.nq`...). * Fast data bulk loading is not supported using `oxigraph load --location MY_STORAGE --file MY_FILE`. The file format is guessed from the extension (`.nt`, `.ttl`, `.nq`...).
* [RDF-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html) is now implemented. * [RDF-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html) is now implemented.
* All operations are now transactional using the "repeatable read" isolation level: * All operations are now transactional using the "repeatable read" isolation level:
the store only exposes changes that have been "committed" (i.e. no partial writes) the store only exposes changes that have been "committed" (i.e. no partial writes)

Before

Width:  |  Height:  |  Size: 4.5 KiB

After

Width:  |  Height:  |  Size: 4.5 KiB

File diff suppressed because it is too large Load Diff

@ -1,4 +1,4 @@
avoid-breaking-exported-api = true avoid-breaking-exported-api = false
cognitive-complexity-threshold = 50 cognitive-complexity-threshold = 50
too-many-arguments-threshold = 10 too-many-arguments-threshold = 10
type-complexity-threshold = 500 type-complexity-threshold = 500

@ -0,0 +1,120 @@
<svg xmlns="http://www.w3.org/2000/svg" width="624" height="384" class="svgbob">
<style>.svgbob line, .svgbob path, .svgbob circle, .svgbob rect, .svgbob polygon {
stroke: black;
stroke-width: 2;
stroke-opacity: 1;
fill-opacity: 1;
stroke-linecap: round;
stroke-linejoin: miter;
}
.svgbob text {
white-space: pre;
fill: black;
font-family: Iosevka Fixed, monospace;
font-size: 14px;
}
.svgbob rect.backdrop {
stroke: none;
fill: white;
}
.svgbob .broken {
stroke-dasharray: 8;
}
.svgbob .filled {
fill: black;
}
.svgbob .bg_filled {
fill: white;
stroke-width: 1;
}
.svgbob .nofill {
fill: white;
}
.svgbob .end_marked_arrow {
marker-end: url(#arrow);
}
.svgbob .start_marked_arrow {
marker-start: url(#arrow);
}
.svgbob .end_marked_diamond {
marker-end: url(#diamond);
}
.svgbob .start_marked_diamond {
marker-start: url(#diamond);
}
.svgbob .end_marked_circle {
marker-end: url(#circle);
}
.svgbob .start_marked_circle {
marker-start: url(#circle);
}
.svgbob .end_marked_open_circle {
marker-end: url(#open_circle);
}
.svgbob .start_marked_open_circle {
marker-start: url(#open_circle);
}
.svgbob .end_marked_big_open_circle {
marker-end: url(#big_open_circle);
}
.svgbob .start_marked_big_open_circle {
marker-start: url(#big_open_circle);
}<!--separator-->.svgbob .r{
fill: papayawhip;
}
.svgbob .p{
fill: lightyellow;
}
.svgbob .j{
fill: lightgreen;
}
</style>
<defs>
<marker id="arrow" viewBox="-2 -2 8 8" refX="4" refY="2" markerWidth="7" markerHeight="7"
orient="auto-start-reverse">
<polygon points="0,0 0,4 4,2 0,0"></polygon>
</marker>
<marker id="diamond" viewBox="-2 -2 8 8" refX="4" refY="2" markerWidth="7" markerHeight="7"
orient="auto-start-reverse">
<polygon points="0,2 2,0 4,2 2,4 0,2"></polygon>
</marker>
<marker id="circle" viewBox="0 0 8 8" refX="4" refY="4" markerWidth="7" markerHeight="7"
orient="auto-start-reverse">
<circle cx="4" cy="4" r="2" class="filled"></circle>
</marker>
<marker id="open_circle" viewBox="0 0 8 8" refX="4" refY="4" markerWidth="7" markerHeight="7"
orient="auto-start-reverse">
<circle cx="4" cy="4" r="2" class="bg_filled"></circle>
</marker>
<marker id="big_open_circle" viewBox="0 0 8 8" refX="4" refY="4" markerWidth="7" markerHeight="7"
orient="auto-start-reverse">
<circle cx="4" cy="4" r="3" class="bg_filled"></circle>
</marker>
</defs>
<rect class="backdrop" x="0" y="0" width="624" height="384"></rect>
<rect x="4" y="8" width="152" height="32" class="solid nofill r" rx="0"></rect>
<text x="18" y="28">oxigraph CLI</text>
<rect x="244" y="8" width="136" height="32" class="solid nofill p" rx="0"></rect>
<text x="258" y="28">pyoxigraph</text>
<rect x="468" y="8" width="144" height="32" class="solid nofill j" rx="0"></rect>
<text x="482" y="28">oxigraph</text>
<text x="554" y="28">JS</text>
<rect x="4" y="72" width="608" height="32" class="solid nofill r" rx="0"></rect>
<text x="18" y="92">oxigraph</text>
<rect x="68" y="136" width="232" height="32" class="solid nofill r" rx="0"></rect>
<text x="82" y="156">oxrdfio</text>
<rect x="348" y="136" width="112" height="32" class="solid nofill r" rx="0"></rect>
<text x="362" y="156">sparopt</text>
<rect x="68" y="200" width="96" height="32" class="solid nofill r" rx="0"></rect>
<text x="82" y="220">oxttl</text>
<rect x="180" y="200" width="120" height="32" class="solid nofill r" rx="0"></rect>
<text x="194" y="220">oxrdfxml</text>
<rect x="316" y="200" width="144" height="32" class="solid nofill r" rx="0"></rect>
<text x="330" y="220">spargebra</text>
<rect x="476" y="200" width="136" height="32" class="solid nofill r" rx="0"></rect>
<text x="490" y="220">sparesults</text>
<rect x="36" y="264" width="576" height="32" class="solid nofill r" rx="0"></rect>
<text x="50" y="284">oxrdf</text>
<rect x="4" y="328" width="152" height="32" class="solid nofill r" rx="0"></rect>
<text x="18" y="348">oxsdatatypes</text>
</svg>

After

Width:  |  Height:  |  Size: 4.6 KiB

@ -0,0 +1,35 @@
+------------------+ +----------------+ +-----------------+
+ oxigraph CLI {r} + + pyoxigraph {p} + + oxigraph JS {j} +
+------------------+ +----------------+ +-----------------+
+---------------------------------------------------------------------------+
+ oxigraph (Rust) {r} +
+---------------------------------------------------------------------------+
+----------------------------+ +-------------+
+ oxrdfio {r} + + sparopt {r} +
+----------------------------+ +-------------+
+-----------+ +--------------+ +-----------------+ +----------------+
+ oxttl {r} + + oxrdfxml {r} + + spargebra {r} + + sparesults {r} +
+-----------+ +--------------+ +-----------------+ +----------------+
+-----------------------------------------------------------------------+
+ oxrdf {r} +
+-----------------------------------------------------------------------+
+------------------+
+ oxsdatatypes {r} +
+------------------+
# Legend:
r = {
fill: papayawhip;
}
p = {
fill: lightyellow;
}
j = {
fill: lightgreen;
}

@ -1,7 +1,6 @@
[package] [package]
name = "oxigraph-fuzz" name = "oxigraph-fuzz"
version = "0.0.0" version = "0.0.0"
authors = ["Automatically generated"]
publish = false publish = false
edition = "2021" edition = "2021"
@ -9,13 +8,15 @@ edition = "2021"
cargo-fuzz = true cargo-fuzz = true
[dependencies] [dependencies]
anyhow = "1" anyhow = "1.0.72"
lazy_static = "1"
libfuzzer-sys = "0.4" libfuzzer-sys = "0.4"
spargebra = { path = "../lib/spargebra", features = ["rdf-star", "sep-0006"] } oxigraph = { path = "../lib/oxigraph" }
oxrdf = { path = "../lib/oxrdf", features = ["rdf-star"] }
oxrdfxml = { path = "../lib/oxrdfxml" }
oxttl = { path = "../lib/oxttl", features = ["rdf-star"] }
sparesults = { path = "../lib/sparesults", features = ["rdf-star"] } sparesults = { path = "../lib/sparesults", features = ["rdf-star"] }
spargebra = { path = "../lib/spargebra", features = ["rdf-star", "sep-0006"] }
sparql-smith = { path = "../lib/sparql-smith", features = ["sep-0006"] } sparql-smith = { path = "../lib/sparql-smith", features = ["sep-0006"] }
oxigraph = { path = "../lib" }
[profile.release] [profile.release]
codegen-units = 1 codegen-units = 1
@ -23,6 +24,18 @@ debug = true
[workspace] [workspace]
[[bin]]
name = "nquads"
path = "fuzz_targets/nquads.rs"
[[bin]]
name = "n3"
path = "fuzz_targets/n3.rs"
[[bin]]
name = "rdf_xml"
path = "fuzz_targets/rdf_xml.rs"
[[bin]] [[bin]]
name = "sparql_eval" name = "sparql_eval"
path = "fuzz_targets/sparql_eval.rs" path = "fuzz_targets/sparql_eval.rs"
@ -46,3 +59,7 @@ path = "fuzz_targets/sparql_results_xml.rs"
[[bin]] [[bin]]
name = "sparql_results_tsv" name = "sparql_results_tsv"
path = "fuzz_targets/sparql_results_tsv.rs" path = "fuzz_targets/sparql_results_tsv.rs"
[[bin]]
name = "trig"
path = "fuzz_targets/trig.rs"

@ -0,0 +1,28 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use oxttl::N3Parser;
fuzz_target!(|data: &[u8]| {
let mut quads = Vec::new();
let mut parser = N3Parser::new()
.with_base_iri("http://example.com/")
.unwrap()
.parse();
for chunk in data.split(|c| *c == 0xFF) {
parser.extend_from_slice(chunk);
while let Some(result) = parser.read_next() {
if let Ok(quad) = result {
quads.push(quad);
}
}
}
parser.end();
while let Some(result) = parser.read_next() {
if let Ok(quad) = result {
quads.push(quad);
}
}
assert!(parser.is_end());
//TODO: serialize
});

@ -0,0 +1,84 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use oxrdf::Quad;
use oxttl::{NQuadsParser, NQuadsSerializer};
fn parse<'a>(
chunks: impl IntoIterator<Item = &'a [u8]>,
unchecked: bool,
) -> (Vec<Quad>, Vec<String>) {
let mut quads = Vec::new();
let mut errors = Vec::new();
let mut parser = NQuadsParser::new().with_quoted_triples();
if unchecked {
parser = parser.unchecked();
}
let mut reader = parser.parse();
for chunk in chunks {
reader.extend_from_slice(chunk);
while let Some(result) = reader.read_next() {
match result {
Ok(quad) => quads.push(quad),
Err(error) => errors.push(error.to_string()),
}
}
}
reader.end();
while let Some(result) = reader.read_next() {
match result {
Ok(quad) => quads.push(quad),
Err(error) => errors.push(error.to_string()),
}
}
assert!(reader.is_end());
(quads, errors)
}
fuzz_target!(|data: &[u8]| {
// We parse with splitting
let (quads, errors) = parse(data.split(|c| *c == 0xFF), false);
// We parse without splitting
let (quads_without_split, errors_without_split) = parse(
[data
.iter()
.copied()
.filter(|c| *c != 0xFF)
.collect::<Vec<_>>()
.as_slice()],
false,
);
assert_eq!(quads, quads_without_split);
assert_eq!(errors, errors_without_split);
// We test also unchecked if valid
if errors.is_empty() {
let (quads_unchecked, errors_unchecked) = parse(data.split(|c| *c == 0xFF), true);
assert!(errors_unchecked.is_empty());
assert_eq!(quads, quads_unchecked);
}
// We serialize
let mut writer = NQuadsSerializer::new().serialize_to_write(Vec::new());
for quad in &quads {
writer.write_quad(quad).unwrap();
}
let new_serialization = writer.finish();
// We parse the serialization
let new_quads = NQuadsParser::new()
.with_quoted_triples()
.parse_read(new_serialization.as_slice())
.collect::<Result<Vec<_>, _>>()
.map_err(|e| {
format!(
"Error on {:?} from {quads:?} based on {:?}: {e}",
String::from_utf8_lossy(&new_serialization),
String::from_utf8_lossy(data)
)
})
.unwrap();
// We check the roundtrip has not changed anything
assert_eq!(new_quads, quads);
});

@ -0,0 +1,37 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use oxrdfxml::{RdfXmlParser, RdfXmlSerializer};
fuzz_target!(|data: &[u8]| {
// We parse
let mut triples = Vec::new();
for triple in RdfXmlParser::new().parse_read(data) {
if let Ok(triple) = triple {
triples.push(triple);
}
}
// We serialize
let mut writer = RdfXmlSerializer::new().serialize_to_write(Vec::new());
for triple in &triples {
writer.write_triple(triple).unwrap();
}
let new_serialization = writer.finish().unwrap();
// We parse the serialization
let new_triples = RdfXmlParser::new()
.parse_read(new_serialization.as_slice())
.collect::<Result<Vec<_>, _>>()
.map_err(|e| {
format!(
"Error on {:?} from {triples:?} based on {:?}: {e}",
String::from_utf8_lossy(&new_serialization),
String::from_utf8_lossy(data)
)
})
.unwrap();
// We check the roundtrip has not changed anything
assert_eq!(new_triples, triples);
});

@ -1,31 +1,26 @@
#![no_main] #![no_main]
use lazy_static::lazy_static;
use libfuzzer_sys::fuzz_target; use libfuzzer_sys::fuzz_target;
use oxigraph::io::DatasetFormat; use oxigraph::io::RdfFormat;
use oxigraph::sparql::{Query, QueryOptions, QueryResults, QuerySolutionIter}; use oxigraph::sparql::{Query, QueryOptions, QueryResults, QuerySolutionIter};
use oxigraph::store::Store; use oxigraph::store::Store;
use std::sync::OnceLock;
lazy_static! { fuzz_target!(|data: sparql_smith::Query| {
static ref STORE: Store = { static STORE: OnceLock<Store> = OnceLock::new();
let store = STORE.get_or_init(|| {
let store = Store::new().unwrap(); let store = Store::new().unwrap();
store store
.load_dataset( .load_dataset(sparql_smith::DATA_TRIG.as_bytes(), RdfFormat::TriG, None)
sparql_smith::DATA_TRIG.as_bytes(),
DatasetFormat::TriG,
None,
)
.unwrap(); .unwrap();
store store
}; });
}
fuzz_target!(|data: sparql_smith::Query| {
let query_str = data.to_string(); let query_str = data.to_string();
if let Ok(query) = Query::parse(&query_str, None) { if let Ok(query) = Query::parse(&query_str, None) {
let options = QueryOptions::default(); let options = QueryOptions::default();
let with_opt = STORE.query_opt(query.clone(), options.clone()).unwrap(); let with_opt = store.query_opt(query.clone(), options.clone()).unwrap();
let without_opt = STORE let without_opt = store
.query_opt(query, options.without_optimizations()) .query_opt(query, options.without_optimizations())
.unwrap(); .unwrap();
match (with_opt, without_opt) { match (with_opt, without_opt) {
@ -52,7 +47,7 @@ fn query_solutions_key(iter: QuerySolutionIter, is_reduced: bool) -> String {
let mut b = t let mut b = t
.unwrap() .unwrap()
.iter() .iter()
.map(|(var, val)| format!("{}: {}", var, val)) .map(|(var, val)| format!("{var}: {val}"))
.collect::<Vec<_>>(); .collect::<Vec<_>>();
b.sort_unstable(); b.sort_unstable();
b.join(" ") b.join(" ")

@ -3,5 +3,5 @@ use libfuzzer_sys::fuzz_target;
use spargebra::Query; use spargebra::Query;
fuzz_target!(|data: &str| { fuzz_target!(|data: &str| {
Query::parse(data, None); let _ = Query::parse(data, None);
}); });

@ -3,4 +3,4 @@ use libfuzzer_sys::fuzz_target;
use oxigraph_fuzz::result_format::fuzz_result_format; use oxigraph_fuzz::result_format::fuzz_result_format;
use sparesults::QueryResultsFormat; use sparesults::QueryResultsFormat;
fuzz_target!(|data: &[u8]| { fuzz_result_format(QueryResultsFormat::Json, data) }); fuzz_target!(|data: &[u8]| fuzz_result_format(QueryResultsFormat::Json, data));

@ -3,4 +3,4 @@ use libfuzzer_sys::fuzz_target;
use oxigraph_fuzz::result_format::fuzz_result_format; use oxigraph_fuzz::result_format::fuzz_result_format;
use sparesults::QueryResultsFormat; use sparesults::QueryResultsFormat;
fuzz_target!(|data: &[u8]| { fuzz_result_format(QueryResultsFormat::Tsv, data) }); fuzz_target!(|data: &[u8]| fuzz_result_format(QueryResultsFormat::Tsv, data));

@ -3,4 +3,4 @@ use libfuzzer_sys::fuzz_target;
use oxigraph_fuzz::result_format::fuzz_result_format; use oxigraph_fuzz::result_format::fuzz_result_format;
use sparesults::QueryResultsFormat; use sparesults::QueryResultsFormat;
fuzz_target!(|data: &[u8]| { fuzz_result_format(QueryResultsFormat::Xml, data) }); fuzz_target!(|data: &[u8]| fuzz_result_format(QueryResultsFormat::Xml, data));

@ -4,5 +4,5 @@ use spargebra::Update;
use std::str; use std::str;
fuzz_target!(|data: &str| { fuzz_target!(|data: &str| {
Update::parse(data, None); let _ = Update::parse(data, None);
}); });

@ -0,0 +1,167 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use oxrdf::{Dataset, GraphName, Quad, Subject, Term, Triple};
use oxttl::{TriGParser, TriGSerializer};
fn parse<'a>(
chunks: impl IntoIterator<Item = &'a [u8]>,
unchecked: bool,
) -> (Vec<Quad>, Vec<String>, Vec<(String, String)>) {
let mut quads = Vec::new();
let mut errors = Vec::new();
let mut parser = TriGParser::new()
.with_quoted_triples()
.with_base_iri("http://example.com/")
.unwrap();
if unchecked {
parser = parser.unchecked();
}
let mut reader = parser.parse();
for chunk in chunks {
reader.extend_from_slice(chunk);
while let Some(result) = reader.read_next() {
match result {
Ok(quad) => quads.push(quad),
Err(error) => errors.push(error.to_string()),
}
}
}
reader.end();
while let Some(result) = reader.read_next() {
match result {
Ok(quad) => quads.push(quad),
Err(error) => errors.push(error.to_string()),
}
}
assert!(reader.is_end());
(
quads,
errors,
reader
.prefixes()
.map(|(k, v)| (k.to_owned(), v.to_owned()))
.collect(),
)
}
fn count_triple_blank_nodes(triple: &Triple) -> usize {
(match &triple.subject {
Subject::BlankNode(_) => 1,
Subject::Triple(t) => count_triple_blank_nodes(t),
_ => 0,
}) + (match &triple.object {
Term::BlankNode(_) => 1,
Term::Triple(t) => count_triple_blank_nodes(t),
_ => 0,
})
}
fn count_quad_blank_nodes(quad: &Quad) -> usize {
(match &quad.subject {
Subject::BlankNode(_) => 1,
Subject::Triple(t) => count_triple_blank_nodes(t),
_ => 0,
}) + (match &quad.object {
Term::BlankNode(_) => 1,
Term::Triple(t) => count_triple_blank_nodes(t),
_ => 0,
}) + usize::from(matches!(quad.graph_name, GraphName::BlankNode(_)))
}
fn serialize_quads(quads: &[Quad], prefixes: Vec<(String, String)>) -> Vec<u8> {
let mut serializer = TriGSerializer::new();
for (prefix_name, prefix_iri) in prefixes {
serializer = serializer.with_prefix(prefix_name, prefix_iri).unwrap();
}
let mut writer = serializer.serialize_to_write(Vec::new());
for quad in quads {
writer.write_quad(quad).unwrap();
}
writer.finish().unwrap()
}
fuzz_target!(|data: &[u8]| {
// We parse with splitting
let (quads, errors, prefixes) = parse(data.split(|c| *c == 0xFF), false);
// We parse without splitting
let (quads_without_split, errors_without_split, _) = parse(
[data
.iter()
.copied()
.filter(|c| *c != 0xFF)
.collect::<Vec<_>>()
.as_slice()],
false,
);
let (quads_unchecked, errors_unchecked, _) = parse(data.split(|c| *c == 0xFF), true);
if errors.is_empty() {
assert!(errors_unchecked.is_empty());
}
let bnodes_count = quads.iter().map(count_quad_blank_nodes).sum::<usize>();
if bnodes_count == 0 {
assert_eq!(
quads,
quads_without_split,
"With split:\n{}\nWithout split:\n{}",
String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())),
String::from_utf8_lossy(&serialize_quads(&quads_without_split, Vec::new()))
);
if errors.is_empty() {
assert_eq!(
quads,
quads_unchecked,
"Validating:\n{}\nUnchecked:\n{}",
String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())),
String::from_utf8_lossy(&serialize_quads(&quads_unchecked, Vec::new()))
);
}
} else if bnodes_count <= 4 {
let mut dataset_with_split = quads.iter().collect::<Dataset>();
let mut dataset_without_split = quads_without_split.iter().collect::<Dataset>();
dataset_with_split.canonicalize();
dataset_without_split.canonicalize();
assert_eq!(
dataset_with_split,
dataset_without_split,
"With split:\n{}\nWithout split:\n{}",
String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())),
String::from_utf8_lossy(&serialize_quads(&quads_without_split, Vec::new()))
);
if errors.is_empty() {
if errors.is_empty() {
let mut dataset_unchecked = quads_unchecked.iter().collect::<Dataset>();
dataset_unchecked.canonicalize();
assert_eq!(
dataset_with_split,
dataset_unchecked,
"Validating:\n{}\nUnchecked:\n{}",
String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())),
String::from_utf8_lossy(&serialize_quads(&quads_unchecked, Vec::new()))
);
}
}
}
assert_eq!(errors, errors_without_split);
// We serialize
let new_serialization = serialize_quads(&quads, prefixes);
// We parse the serialization
let new_quads = TriGParser::new()
.with_quoted_triples()
.parse_read(new_serialization.as_slice())
.collect::<Result<Vec<_>, _>>()
.map_err(|e| {
format!(
"Error on {:?} from {quads:?} based on {:?}: {e}",
String::from_utf8_lossy(&new_serialization),
String::from_utf8_lossy(data)
)
})
.unwrap();
// We check the roundtrip has not changed anything
assert_eq!(new_quads, quads);
});

@ -1,24 +1,24 @@
use anyhow::Context; use anyhow::Context;
use sparesults::{ use sparesults::{
QueryResultsFormat, QueryResultsParser, QueryResultsReader, QueryResultsSerializer, FromReadQueryResultsReader, QueryResultsFormat, QueryResultsParser, QueryResultsSerializer,
}; };
pub fn fuzz_result_format(format: QueryResultsFormat, data: &[u8]) { pub fn fuzz_result_format(format: QueryResultsFormat, data: &[u8]) {
let parser = QueryResultsParser::from_format(format); let parser = QueryResultsParser::from_format(format);
let serializer = QueryResultsSerializer::from_format(format); let serializer = QueryResultsSerializer::from_format(format);
let Ok(reader) = parser.read_results(data) else { let Ok(reader) = parser.parse_read(data) else {
return; return;
}; };
match reader { match reader {
QueryResultsReader::Solutions(solutions) => { FromReadQueryResultsReader::Solutions(solutions) => {
let Ok(solutions) = solutions.collect::<Result<Vec<_>, _>>() else { let Ok(solutions) = solutions.collect::<Result<Vec<_>, _>>() else {
return; return;
}; };
// We try to write again // We try to write again
let mut writer = serializer let mut writer = serializer
.solutions_writer( .serialize_solutions_to_write(
Vec::new(), Vec::new(),
solutions solutions
.get(0) .get(0)
@ -31,30 +31,30 @@ pub fn fuzz_result_format(format: QueryResultsFormat, data: &[u8]) {
let serialized = String::from_utf8(writer.finish().unwrap()).unwrap(); let serialized = String::from_utf8(writer.finish().unwrap()).unwrap();
// And to parse again // And to parse again
if let QueryResultsReader::Solutions(roundtrip_solutions) = parser if let FromReadQueryResultsReader::Solutions(roundtrip_solutions) = parser
.read_results(serialized.as_bytes()) .parse_read(serialized.as_bytes())
.with_context(|| format!("Parsing {:?}", &serialized)) .with_context(|| format!("Parsing {serialized:?}"))
.unwrap() .unwrap()
{ {
assert_eq!( assert_eq!(
roundtrip_solutions roundtrip_solutions
.collect::<Result<Vec<_>, _>>() .collect::<Result<Vec<_>, _>>()
.with_context(|| format!("Parsing {:?}", &serialized)) .with_context(|| format!("Parsing {serialized:?}"))
.unwrap(), .unwrap(),
solutions solutions
) )
} }
} }
QueryResultsReader::Boolean(value) => { FromReadQueryResultsReader::Boolean(value) => {
// We try to write again // We try to write again
let mut serialized = Vec::new(); let mut serialized = Vec::new();
serializer serializer
.write_boolean_result(&mut serialized, value) .serialize_boolean_to_write(&mut serialized, value)
.unwrap(); .unwrap();
// And to parse again // And to parse again
if let QueryResultsReader::Boolean(roundtrip_value) = if let FromReadQueryResultsReader::Boolean(roundtrip_value) =
parser.read_results(serialized.as_slice()).unwrap() parser.parse_read(serialized.as_slice()).unwrap()
{ {
assert_eq!(roundtrip_value, value) assert_eq!(roundtrip_value, value)
} }

@ -1,20 +1,26 @@
[package] [package]
name = "oxigraph_js" name = "oxigraph-js"
version = "0.3.20" version.workspace = true
authors = ["Tpt <thomas@pellissier-tanon.fr>"] authors.workspace = true
license = "MIT OR Apache-2.0" license.workspace = true
readme = "README.md" readme = "README.md"
keywords = ["RDF", "N-Triples", "Turtle", "RDF/XML", "SPARQL"] keywords = ["RDF", "N-Triples", "Turtle", "XML", "SPARQL"]
repository = "https://github.com/oxigraph/oxigraph/tree/main/js" repository = "https://github.com/oxigraph/oxigraph/tree/main/js"
description = "JavaScript bindings of Oxigraph" description = "JavaScript bindings of Oxigraph"
edition = "2021" edition.workspace = true
rust-version.workspace = true
publish = false
[lib] [lib]
crate-type = ["cdylib"] crate-type = ["cdylib"]
name = "oxigraph" name = "oxigraph"
doc = false
[dependencies] [dependencies]
oxigraph = { version = "0.3.20", path="../lib" } console_error_panic_hook.workspace = true
wasm-bindgen = "0.2" js-sys.workspace = true
js-sys = "0.3" oxigraph = { workspace = true, features = ["js"] }
console_error_panic_hook = "0.1" wasm-bindgen.workspace = true
[lints]
workspace = true

@ -3,7 +3,7 @@ Oxigraph for JavaScript
[![npm](https://img.shields.io/npm/v/oxigraph)](https://www.npmjs.com/package/oxigraph) [![npm](https://img.shields.io/npm/v/oxigraph)](https://www.npmjs.com/package/oxigraph)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) [![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) [![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community)
This package provides a JavaScript API on top of [Oxigraph](https://crates.io/crates/oxigraph), compiled with WebAssembly. This package provides a JavaScript API on top of [Oxigraph](https://crates.io/crates/oxigraph), compiled with WebAssembly.
@ -197,40 +197,42 @@ Example of update:
store.update("DELETE WHERE { <http://example.com/s> ?p ?o }") store.update("DELETE WHERE { <http://example.com/s> ?p ?o }")
``` ```
#### `Store.prototype.load(String data, String mimeType, NamedNode|String? baseIRI, NamedNode|BlankNode|DefaultGraph? toNamedGraph)` #### `Store.prototype.load(String data, String format, NamedNode|String? baseIRI, NamedNode|BlankNode|DefaultGraph? toNamedGraph)`
Loads serialized RDF triples or quad into the store. Loads serialized RDF triples or quad into the store.
The method arguments are: The method arguments are:
1. `data`: the serialized RDF triples or quads. 1. `data`: the serialized RDF triples or quads.
2. `mimeType`: the MIME type of the serialization. See below for the supported mime types. 2. `format`: the format of the serialization. See below for the supported formats.
3. `baseIRI`: the base IRI to use to resolve the relative IRIs in the serialization. 3. `baseIRI`: the base IRI to use to resolve the relative IRIs in the serialization.
4. `toNamedGraph`: for triple serialization formats, the name of the named graph the triple should be loaded to. 4. `toNamedGraph`: for triple serialization formats, the name of the named graph the triple should be loaded to.
The available formats are: The available formats are:
* [Turtle](https://www.w3.org/TR/turtle/): `text/turtle` * [Turtle](https://www.w3.org/TR/turtle/): `text/turtle` or `ttl`
* [TriG](https://www.w3.org/TR/trig/): `application/trig` * [TriG](https://www.w3.org/TR/trig/): `application/trig` or `trig`
* [N-Triples](https://www.w3.org/TR/n-triples/): `application/n-triples` * [N-Triples](https://www.w3.org/TR/n-triples/): `application/n-triples` or `nt`
* [N-Quads](https://www.w3.org/TR/n-quads/): `application/n-quads` * [N-Quads](https://www.w3.org/TR/n-quads/): `application/n-quads` or `nq`
* [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/): `application/rdf+xml` * [N3](https://w3c.github.io/N3/spec/): `text/n3` or `n3`
* [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/): `application/rdf+xml` or `rdf`
Example of loading a Turtle file into the named graph `<http://example.com/graph>` with the base IRI `http://example.com`: Example of loading a Turtle file into the named graph `<http://example.com/graph>` with the base IRI `http://example.com`:
```js ```js
store.load("<http://example.com> <http://example.com> <> .", "text/turtle", "http://example.com", oxigraph.namedNode("http://example.com/graph")); store.load("<http://example.com> <http://example.com> <> .", "text/turtle", "http://example.com", oxigraph.namedNode("http://example.com/graph"));
``` ```
#### `Store.prototype.dump(String mimeType, NamedNode|BlankNode|DefaultGraph? fromNamedGraph)` #### `Store.prototype.dump(String format, NamedNode|BlankNode|DefaultGraph? fromNamedGraph)`
Returns serialized RDF triples or quad from the store. Returns serialized RDF triples or quad from the store.
The method arguments are: The method arguments are:
1. `mimeType`: the MIME type of the serialization. See below for the supported mime types. 1. `format`: the format type of the serialization. See below for the supported types.
2. `fromNamedGraph`: for triple serialization formats, the name of the named graph the triple should be loaded from. 2. `fromNamedGraph`: for triple serialization formats, the name of the named graph the triple should be loaded from.
The available formats are: The available formats are:
* [Turtle](https://www.w3.org/TR/turtle/): `text/turtle` * [Turtle](https://www.w3.org/TR/turtle/): `text/turtle` or `ttl`
* [TriG](https://www.w3.org/TR/trig/): `application/trig` * [TriG](https://www.w3.org/TR/trig/): `application/trig` or `trig`
* [N-Triples](https://www.w3.org/TR/n-triples/): `application/n-triples` * [N-Triples](https://www.w3.org/TR/n-triples/): `application/n-triples` or `nt`
* [N-Quads](https://www.w3.org/TR/n-quads/): `application/n-quads` * [N-Quads](https://www.w3.org/TR/n-quads/): `application/n-quads` or `nq`
* [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/): `application/rdf+xml` * [N3](https://w3c.github.io/N3/spec/): `text/n3` or `n3`
* [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/): `application/rdf+xml` or `rdf`
Example of building a Turtle file from the named graph `<http://example.com/graph>`: Example of building a Turtle file from the named graph `<http://example.com/graph>`:
```js ```js
@ -260,7 +262,7 @@ To setup a dev environment:
Testing and linting: Testing and linting:
- Rust code is formatted with [rustfmt](https://github.com/rust-lang/rustfmt) and linted with [clippy](https://github.com/rust-lang/rust-clippy). - Rust code is formatted with [rustfmt](https://github.com/rust-lang/rustfmt) and linted with [clippy](https://github.com/rust-lang/rust-clippy).
You can execute them with `cargo fmt` and `cargo clippy`. You can execute them with `cargo fmt` and `cargo clippy`.
- JS code is formatted and linted with [Rome](https://rome.tools/). `npm run fmt` to auto-format and `npm test` to lint and test. - JS code is formatted and linted with [Biome](https://biomejs.dev/). `npm run fmt` to auto-format and `npm test` to lint and test.
- Tests are written in JavaScript using [Mocha](https://mochajs.org/) in the `test` directory. `npm test` to run them. - Tests are written in JavaScript using [Mocha](https://mochajs.org/) in the `test` directory. `npm test` to run them.

@ -1,7 +1,8 @@
{ {
"$schema": "https://biomejs.dev/schemas/1.0.0/schema.json",
"formatter": { "formatter": {
"indentStyle": "space", "indentStyle": "space",
"indentSize": 4, "indentWidth": 4,
"lineWidth": 100 "lineWidth": 100
}, },
"linter": { "linter": {

@ -1,16 +1,6 @@
#! /usr/bin/env node #! /usr/bin/env node
const fs = require("fs"); const fs = require("fs");
// We copy file to the new directory
fs.mkdirSync("pkg");
for (const file of fs.readdirSync("./pkg-web")) {
fs.copyFileSync(`./pkg-web/${file}`, `./pkg/${file}`);
}
for (const file of fs.readdirSync("./pkg-node")) {
fs.copyFileSync(`./pkg-node/${file}`, `./pkg/${file}`);
}
const pkg = JSON.parse(fs.readFileSync("./pkg/package.json")); const pkg = JSON.parse(fs.readFileSync("./pkg/package.json"));
pkg.name = "oxigraph"; pkg.name = "oxigraph";
pkg.main = "node.js"; pkg.main = "node.js";

1079
js/package-lock.json generated

File diff suppressed because it is too large Load Diff

@ -3,14 +3,14 @@
"description": "Oxigraph JS build and tests", "description": "Oxigraph JS build and tests",
"private": true, "private": true,
"devDependencies": { "devDependencies": {
"@biomejs/biome": "^1.0.0",
"@rdfjs/data-model": "^2.0.1", "@rdfjs/data-model": "^2.0.1",
"mocha": "^10.0.0", "mocha": "^10.0.0"
"rome": "^12.0.0"
}, },
"scripts": { "scripts": {
"fmt": "rome format . --write && rome check . --apply-unsafe", "fmt": "biome format . --write && biome check . --apply-unsafe && biome format . --write",
"test": "rome ci . && wasm-pack build --debug --target nodejs && mocha", "test": "biome ci . && wasm-pack build --debug --target nodejs && mocha",
"build": "rm -rf pkg && wasm-pack build --release --target web --out-name web && mv pkg pkg-web && wasm-pack build --release --target nodejs --out-name node && mv pkg pkg-node && node build_package.js && rm -r pkg-web && rm -r pkg-node", "build": "wasm-pack build --release --target web --out-name web && wasm-pack build --release --target nodejs --out-name node && node build_package.js",
"release": "npm run build && npm publish ./pkg", "release": "npm run build && npm publish ./pkg",
"pack": "npm run build && npm pack ./pkg" "pack": "npm run build && npm pack ./pkg"
}, },

@ -1,4 +1,4 @@
#![allow(clippy::unused_unit)] #![allow(clippy::mem_forget)]
use wasm_bindgen::prelude::*; use wasm_bindgen::prelude::*;
mod model; mod model;

@ -1,9 +1,4 @@
#![allow( #![allow(dead_code, clippy::inherent_to_string, clippy::unused_self)]
dead_code,
clippy::inherent_to_string,
clippy::unused_self,
clippy::use_self
)]
use crate::format_err; use crate::format_err;
use crate::utils::to_err; use crate::utils::to_err;
@ -56,7 +51,7 @@ pub fn literal(
#[wasm_bindgen(js_name = defaultGraph)] #[wasm_bindgen(js_name = defaultGraph)]
pub fn default_graph() -> JsDefaultGraph { pub fn default_graph() -> JsDefaultGraph {
JsDefaultGraph {} JsDefaultGraph
} }
#[wasm_bindgen(js_name = variable)] #[wasm_bindgen(js_name = variable)]
@ -302,7 +297,7 @@ impl From<JsLiteral> for Term {
#[wasm_bindgen(js_name = DefaultGraph)] #[wasm_bindgen(js_name = DefaultGraph)]
#[derive(Eq, PartialEq, Debug, Clone, Hash)] #[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct JsDefaultGraph {} pub struct JsDefaultGraph;
#[wasm_bindgen(js_class = DefaultGraph)] #[wasm_bindgen(js_class = DefaultGraph)]
impl JsDefaultGraph { impl JsDefaultGraph {
@ -313,7 +308,7 @@ impl JsDefaultGraph {
#[wasm_bindgen(getter)] #[wasm_bindgen(getter)]
pub fn value(&self) -> String { pub fn value(&self) -> String {
"".to_owned() String::new()
} }
#[wasm_bindgen(js_name = toString)] #[wasm_bindgen(js_name = toString)]
@ -393,7 +388,7 @@ impl JsQuad {
#[wasm_bindgen(getter)] #[wasm_bindgen(getter)]
pub fn value(&self) -> String { pub fn value(&self) -> String {
"".to_owned() String::new()
} }
#[wasm_bindgen(getter = subject)] #[wasm_bindgen(getter = subject)]
@ -532,7 +527,7 @@ impl From<GraphName> for JsTerm {
match name { match name {
GraphName::NamedNode(node) => node.into(), GraphName::NamedNode(node) => node.into(),
GraphName::BlankNode(node) => node.into(), GraphName::BlankNode(node) => node.into(),
GraphName::DefaultGraph => Self::DefaultGraph(JsDefaultGraph {}), GraphName::DefaultGraph => Self::DefaultGraph(JsDefaultGraph),
} }
} }
} }
@ -564,7 +559,7 @@ impl From<Quad> for JsTerm {
impl TryFrom<JsTerm> for NamedNode { impl TryFrom<JsTerm> for NamedNode {
type Error = JsValue; type Error = JsValue;
fn try_from(value: JsTerm) -> Result<Self, JsValue> { fn try_from(value: JsTerm) -> Result<Self, Self::Error> {
match value { match value {
JsTerm::NamedNode(node) => Ok(node.into()), JsTerm::NamedNode(node) => Ok(node.into()),
JsTerm::BlankNode(node) => Err(format_err!( JsTerm::BlankNode(node) => Err(format_err!(
@ -588,7 +583,7 @@ impl TryFrom<JsTerm> for NamedNode {
impl TryFrom<JsTerm> for NamedOrBlankNode { impl TryFrom<JsTerm> for NamedOrBlankNode {
type Error = JsValue; type Error = JsValue;
fn try_from(value: JsTerm) -> Result<Self, JsValue> { fn try_from(value: JsTerm) -> Result<Self, Self::Error> {
match value { match value {
JsTerm::NamedNode(node) => Ok(node.into()), JsTerm::NamedNode(node) => Ok(node.into()),
JsTerm::BlankNode(node) => Ok(node.into()), JsTerm::BlankNode(node) => Ok(node.into()),
@ -614,7 +609,7 @@ impl TryFrom<JsTerm> for NamedOrBlankNode {
impl TryFrom<JsTerm> for Subject { impl TryFrom<JsTerm> for Subject {
type Error = JsValue; type Error = JsValue;
fn try_from(value: JsTerm) -> Result<Self, JsValue> { fn try_from(value: JsTerm) -> Result<Self, Self::Error> {
match value { match value {
JsTerm::NamedNode(node) => Ok(node.into()), JsTerm::NamedNode(node) => Ok(node.into()),
JsTerm::BlankNode(node) => Ok(node.into()), JsTerm::BlankNode(node) => Ok(node.into()),
@ -637,7 +632,7 @@ impl TryFrom<JsTerm> for Subject {
impl TryFrom<JsTerm> for Term { impl TryFrom<JsTerm> for Term {
type Error = JsValue; type Error = JsValue;
fn try_from(value: JsTerm) -> Result<Self, JsValue> { fn try_from(value: JsTerm) -> Result<Self, Self::Error> {
match value { match value {
JsTerm::NamedNode(node) => Ok(node.into()), JsTerm::NamedNode(node) => Ok(node.into()),
JsTerm::BlankNode(node) => Ok(node.into()), JsTerm::BlankNode(node) => Ok(node.into()),
@ -657,7 +652,7 @@ impl TryFrom<JsTerm> for Term {
impl TryFrom<JsTerm> for GraphName { impl TryFrom<JsTerm> for GraphName {
type Error = JsValue; type Error = JsValue;
fn try_from(value: JsTerm) -> Result<Self, JsValue> { fn try_from(value: JsTerm) -> Result<Self, Self::Error> {
match value { match value {
JsTerm::NamedNode(node) => Ok(node.into()), JsTerm::NamedNode(node) => Ok(node.into()),
JsTerm::BlankNode(node) => Ok(node.into()), JsTerm::BlankNode(node) => Ok(node.into()),
@ -744,7 +739,7 @@ impl FromJsConverter {
)) ))
} }
} }
"DefaultGraph" => Ok(JsTerm::DefaultGraph(JsDefaultGraph {})), "DefaultGraph" => Ok(JsTerm::DefaultGraph(JsDefaultGraph)),
"Variable" => Ok(Variable::new( "Variable" => Ok(Variable::new(
Reflect::get(value, &self.value)? Reflect::get(value, &self.value)?
.as_string() .as_string()
@ -754,8 +749,7 @@ impl FromJsConverter {
.into()), .into()),
"Quad" => Ok(self.to_quad(value)?.into()), "Quad" => Ok(self.to_quad(value)?.into()),
_ => Err(format_err!( _ => Err(format_err!(
"The termType {} is not supported by Oxigraph", "The termType {term_type} is not supported by Oxigraph"
term_type
)), )),
} }
} else if term_type.is_undefined() { } else if term_type.is_undefined() {

@ -1,14 +1,11 @@
#![allow(clippy::use_self)]
use crate::format_err; use crate::format_err;
use crate::model::*; use crate::model::*;
use crate::utils::to_err; use crate::utils::to_err;
use js_sys::{Array, Map}; use js_sys::{Array, Map};
use oxigraph::io::{DatasetFormat, GraphFormat}; use oxigraph::io::{RdfFormat, RdfParser};
use oxigraph::model::*; use oxigraph::model::*;
use oxigraph::sparql::QueryResults; use oxigraph::sparql::QueryResults;
use oxigraph::store::Store; use oxigraph::store::Store;
use std::io::Cursor;
use wasm_bindgen::prelude::*; use wasm_bindgen::prelude::*;
#[wasm_bindgen(js_name = Store)] #[wasm_bindgen(js_name = Store)]
@ -27,7 +24,7 @@ impl JsStore {
store: Store::new().map_err(to_err)?, store: Store::new().map_err(to_err)?,
}; };
if let Some(quads) = quads { if let Some(quads) = quads {
for quad in quads.iter() { for quad in &*quads {
store.add(quad)?; store.add(quad)?;
} }
} }
@ -145,10 +142,11 @@ impl JsStore {
pub fn load( pub fn load(
&self, &self,
data: &str, data: &str,
mime_type: &str, format: &str,
base_iri: &JsValue, base_iri: &JsValue,
to_graph_name: &JsValue, to_graph_name: &JsValue,
) -> Result<(), JsValue> { ) -> Result<(), JsValue> {
let format = rdf_format(format)?;
let base_iri = if base_iri.is_null() || base_iri.is_undefined() { let base_iri = if base_iri.is_null() || base_iri.is_undefined() {
None None
} else if base_iri.is_string() { } else if base_iri.is_string() {
@ -161,65 +159,41 @@ impl JsStore {
)); ));
}; };
let to_graph_name = let mut parser = RdfParser::from_format(format);
if let Some(graph_name) = FROM_JS.with(|c| c.to_optional_term(to_graph_name))? { if let Some(to_graph_name) = FROM_JS.with(|c| c.to_optional_term(to_graph_name))? {
Some(graph_name.try_into()?) parser = parser.with_default_graph(GraphName::try_from(to_graph_name)?);
} else { }
None if let Some(base_iri) = base_iri {
}; parser = parser.with_base_iri(base_iri).map_err(to_err)?;
if let Some(graph_format) = GraphFormat::from_media_type(mime_type) {
self.store
.load_graph(
Cursor::new(data),
graph_format,
&to_graph_name.unwrap_or(GraphName::DefaultGraph),
base_iri.as_deref(),
)
.map_err(to_err)
} else if let Some(dataset_format) = DatasetFormat::from_media_type(mime_type) {
if to_graph_name.is_some() {
return Err(format_err!(
"The target graph name parameter is not available for dataset formats"
));
}
self.store
.load_dataset(Cursor::new(data), dataset_format, base_iri.as_deref())
.map_err(to_err)
} else {
Err(format_err!("Not supported MIME type: {}", mime_type))
} }
self.store
.load_from_read(parser, data.as_bytes())
.map_err(to_err)
} }
pub fn dump(&self, mime_type: &str, from_graph_name: &JsValue) -> Result<String, JsValue> { pub fn dump(&self, format: &str, from_graph_name: &JsValue) -> Result<String, JsValue> {
let from_graph_name = let format = rdf_format(format)?;
if let Some(graph_name) = FROM_JS.with(|c| c.to_optional_term(from_graph_name))? { let buffer =
Some(graph_name.try_into()?) if let Some(from_graph_name) = FROM_JS.with(|c| c.to_optional_term(from_graph_name))? {
} else { self.store.dump_graph_to_write(
None &GraphName::try_from(from_graph_name)?,
}; format,
Vec::new(),
let mut buffer = Vec::new();
if let Some(graph_format) = GraphFormat::from_media_type(mime_type) {
self.store
.dump_graph(
&mut buffer,
graph_format,
&from_graph_name.unwrap_or(GraphName::DefaultGraph),
) )
.map_err(to_err)?; } else {
} else if let Some(dataset_format) = DatasetFormat::from_media_type(mime_type) { self.store.dump_to_write(format, Vec::new())
if from_graph_name.is_some() {
return Err(format_err!(
"The target graph name parameter is not available for dataset formats"
));
} }
self.store .map_err(to_err)?;
.dump_dataset(&mut buffer, dataset_format)
.map_err(to_err)?;
} else {
return Err(format_err!("Not supported MIME type: {}", mime_type));
}
String::from_utf8(buffer).map_err(to_err) String::from_utf8(buffer).map_err(to_err)
} }
} }
fn rdf_format(format: &str) -> Result<RdfFormat, JsValue> {
if format.contains('/') {
RdfFormat::from_media_type(format)
.ok_or_else(|| format_err!("Not supported RDF format media type: {format}"))
} else {
RdfFormat::from_extension(format)
.ok_or_else(|| format_err!("Not supported RDF format extension: {format}"))
}
}

@ -1,37 +1,37 @@
/* global describe, it */ /* global describe, it */
import assert from "assert";
import runTests from "../node_modules/@rdfjs/data-model/test/index.js"; import runTests from "../node_modules/@rdfjs/data-model/test/index.js";
import oxigraph from "../pkg/oxigraph.js"; import oxigraph from "../pkg/oxigraph.js";
import assert from "assert";
runTests({ factory: oxigraph }); runTests({ factory: oxigraph });
describe("DataModel", function () { describe("DataModel", () => {
describe("#toString()", function () { describe("#toString()", () => {
it("namedNode().toString() should return SPARQL compatible syntax", function () { it("namedNode().toString() should return SPARQL compatible syntax", () => {
assert.strictEqual( assert.strictEqual(
"<http://example.com>", "<http://example.com>",
oxigraph.namedNode("http://example.com").toString(), oxigraph.namedNode("http://example.com").toString(),
); );
}); });
it("blankNode().toString() should return SPARQL compatible syntax", function () { it("blankNode().toString() should return SPARQL compatible syntax", () => {
assert.strictEqual("_:a", oxigraph.blankNode("a").toString()); assert.strictEqual("_:a", oxigraph.blankNode("a").toString());
}); });
it("literal().toString() should return SPARQL compatible syntax", function () { it("literal().toString() should return SPARQL compatible syntax", () => {
assert.strictEqual('"a\\"b"@en', oxigraph.literal('a"b', "en").toString()); assert.strictEqual('"a\\"b"@en', oxigraph.literal('a"b', "en").toString());
}); });
it("defaultGraph().toString() should return SPARQL compatible syntax", function () { it("defaultGraph().toString() should return SPARQL compatible syntax", () => {
assert.strictEqual("DEFAULT", oxigraph.defaultGraph().toString()); assert.strictEqual("DEFAULT", oxigraph.defaultGraph().toString());
}); });
it("variable().toString() should return SPARQL compatible syntax", function () { it("variable().toString() should return SPARQL compatible syntax", () => {
assert.strictEqual("?a", oxigraph.variable("a").toString()); assert.strictEqual("?a", oxigraph.variable("a").toString());
}); });
it("quad().toString() should return SPARQL compatible syntax", function () { it("quad().toString() should return SPARQL compatible syntax", () => {
assert.strictEqual( assert.strictEqual(
"<http://example.com/s> <http://example.com/p> <<<http://example.com/s1> <http://example.com/p1> <http://example.com/o1>>> <http://example.com/g>", "<http://example.com/s> <http://example.com/p> <<<http://example.com/s1> <http://example.com/p1> <http://example.com/o1>>> <http://example.com/g>",
oxigraph oxigraph

@ -1,8 +1,8 @@
/* global describe, it */ /* global describe, it */
import { Store } from "../pkg/oxigraph.js";
import dataModel from "@rdfjs/data-model";
import assert from "assert"; import assert from "assert";
import dataModel from "@rdfjs/data-model";
import { Store } from "../pkg/oxigraph.js";
const ex = dataModel.namedNode("http://example.com"); const ex = dataModel.namedNode("http://example.com");
const triple = dataModel.quad( const triple = dataModel.quad(
@ -11,17 +11,17 @@ const triple = dataModel.quad(
dataModel.literal("o"), dataModel.literal("o"),
); );
describe("Store", function () { describe("Store", () => {
describe("#add()", function () { describe("#add()", () => {
it("an added quad should be in the store", function () { it("an added quad should be in the store", () => {
const store = new Store(); const store = new Store();
store.add(dataModel.quad(ex, ex, triple)); store.add(dataModel.quad(ex, ex, triple));
assert(store.has(dataModel.quad(ex, ex, triple))); assert(store.has(dataModel.quad(ex, ex, triple)));
}); });
}); });
describe("#delete()", function () { describe("#delete()", () => {
it("an removed quad should not be in the store anymore", function () { it("an removed quad should not be in the store anymore", () => {
const store = new Store([dataModel.quad(triple, ex, ex)]); const store = new Store([dataModel.quad(triple, ex, ex)]);
assert(store.has(dataModel.quad(triple, ex, ex))); assert(store.has(dataModel.quad(triple, ex, ex)));
store.delete(dataModel.quad(triple, ex, ex)); store.delete(dataModel.quad(triple, ex, ex));
@ -29,22 +29,22 @@ describe("Store", function () {
}); });
}); });
describe("#has()", function () { describe("#has()", () => {
it("an added quad should be in the store", function () { it("an added quad should be in the store", () => {
const store = new Store([dataModel.quad(ex, ex, ex)]); const store = new Store([dataModel.quad(ex, ex, ex)]);
assert(store.has(dataModel.quad(ex, ex, ex))); assert(store.has(dataModel.quad(ex, ex, ex)));
}); });
}); });
describe("#size()", function () { describe("#size()", () => {
it("A store with one quad should have 1 for size", function () { it("A store with one quad should have 1 for size", () => {
const store = new Store([dataModel.quad(ex, ex, ex)]); const store = new Store([dataModel.quad(ex, ex, ex)]);
assert.strictEqual(1, store.size); assert.strictEqual(1, store.size);
}); });
}); });
describe("#match_quads()", function () { describe("#match_quads()", () => {
it("blank pattern should return all quads", function () { it("blank pattern should return all quads", () => {
const store = new Store([dataModel.quad(ex, ex, ex)]); const store = new Store([dataModel.quad(ex, ex, ex)]);
const results = store.match(); const results = store.match();
assert.strictEqual(1, results.length); assert.strictEqual(1, results.length);
@ -52,32 +52,32 @@ describe("Store", function () {
}); });
}); });
describe("#query()", function () { describe("#query()", () => {
it("ASK true", function () { it("ASK true", () => {
const store = new Store([dataModel.quad(ex, ex, ex)]); const store = new Store([dataModel.quad(ex, ex, ex)]);
assert.strictEqual(true, store.query("ASK { ?s ?s ?s }")); assert.strictEqual(true, store.query("ASK { ?s ?s ?s }"));
}); });
it("ASK false", function () { it("ASK false", () => {
const store = new Store(); const store = new Store();
assert.strictEqual(false, store.query("ASK { FILTER(false)}")); assert.strictEqual(false, store.query("ASK { FILTER(false)}"));
}); });
it("CONSTRUCT", function () { it("CONSTRUCT", () => {
const store = new Store([dataModel.quad(ex, ex, ex)]); const store = new Store([dataModel.quad(ex, ex, ex)]);
const results = store.query("CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }"); const results = store.query("CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }");
assert.strictEqual(1, results.length); assert.strictEqual(1, results.length);
assert(dataModel.quad(ex, ex, ex).equals(results[0])); assert(dataModel.quad(ex, ex, ex).equals(results[0]));
}); });
it("SELECT", function () { it("SELECT", () => {
const store = new Store([dataModel.quad(ex, ex, ex)]); const store = new Store([dataModel.quad(ex, ex, ex)]);
const results = store.query("SELECT ?s WHERE { ?s ?p ?o }"); const results = store.query("SELECT ?s WHERE { ?s ?p ?o }");
assert.strictEqual(1, results.length); assert.strictEqual(1, results.length);
assert(ex.equals(results[0].get("s"))); assert(ex.equals(results[0].get("s")));
}); });
it("SELECT with NOW()", function () { it("SELECT with NOW()", () => {
const store = new Store([dataModel.quad(ex, ex, ex)]); const store = new Store([dataModel.quad(ex, ex, ex)]);
const results = store.query( const results = store.query(
"SELECT * WHERE { FILTER(2022 <= YEAR(NOW()) && YEAR(NOW()) <= 2100) }", "SELECT * WHERE { FILTER(2022 <= YEAR(NOW()) && YEAR(NOW()) <= 2100) }",
@ -85,15 +85,15 @@ describe("Store", function () {
assert.strictEqual(1, results.length); assert.strictEqual(1, results.length);
}); });
it("SELECT with RAND()", function () { it("SELECT with RAND()", () => {
const store = new Store([dataModel.quad(ex, ex, ex)]); const store = new Store([dataModel.quad(ex, ex, ex)]);
const results = store.query("SELECT (RAND() AS ?y) WHERE {}"); const results = store.query("SELECT (RAND() AS ?y) WHERE {}");
assert.strictEqual(1, results.length); assert.strictEqual(1, results.length);
}); });
}); });
describe("#update()", function () { describe("#update()", () => {
it("INSERT DATA", function () { it("INSERT DATA", () => {
const store = new Store(); const store = new Store();
store.update( store.update(
"INSERT DATA { <http://example.com> <http://example.com> <http://example.com> }", "INSERT DATA { <http://example.com> <http://example.com> <http://example.com> }",
@ -101,7 +101,7 @@ describe("Store", function () {
assert.strictEqual(1, store.size); assert.strictEqual(1, store.size);
}); });
it("DELETE DATA", function () { it("DELETE DATA", () => {
const store = new Store([dataModel.quad(ex, ex, ex)]); const store = new Store([dataModel.quad(ex, ex, ex)]);
store.update( store.update(
"DELETE DATA { <http://example.com> <http://example.com> <http://example.com> }", "DELETE DATA { <http://example.com> <http://example.com> <http://example.com> }",
@ -109,15 +109,15 @@ describe("Store", function () {
assert.strictEqual(0, store.size); assert.strictEqual(0, store.size);
}); });
it("DELETE WHERE", function () { it("DELETE WHERE", () => {
const store = new Store([dataModel.quad(ex, ex, ex)]); const store = new Store([dataModel.quad(ex, ex, ex)]);
store.update("DELETE WHERE { ?v ?v ?v }"); store.update("DELETE WHERE { ?v ?v ?v }");
assert.strictEqual(0, store.size); assert.strictEqual(0, store.size);
}); });
}); });
describe("#load()", function () { describe("#load()", () => {
it("load NTriples in the default graph", function () { it("load NTriples in the default graph", () => {
const store = new Store(); const store = new Store();
store.load( store.load(
"<http://example.com> <http://example.com> <http://example.com> .", "<http://example.com> <http://example.com> <http://example.com> .",
@ -126,7 +126,7 @@ describe("Store", function () {
assert(store.has(dataModel.quad(ex, ex, ex))); assert(store.has(dataModel.quad(ex, ex, ex)));
}); });
it("load NTriples in an other graph", function () { it("load NTriples in an other graph", () => {
const store = new Store(); const store = new Store();
store.load( store.load(
"<http://example.com> <http://example.com> <http://example.com> .", "<http://example.com> <http://example.com> <http://example.com> .",
@ -137,7 +137,7 @@ describe("Store", function () {
assert(store.has(dataModel.quad(ex, ex, ex, ex))); assert(store.has(dataModel.quad(ex, ex, ex, ex)));
}); });
it("load Turtle with a base IRI", function () { it("load Turtle with a base IRI", () => {
const store = new Store(); const store = new Store();
store.load( store.load(
"<http://example.com> <http://example.com> <> .", "<http://example.com> <http://example.com> <> .",
@ -147,7 +147,7 @@ describe("Store", function () {
assert(store.has(dataModel.quad(ex, ex, ex))); assert(store.has(dataModel.quad(ex, ex, ex)));
}); });
it("load NQuads", function () { it("load NQuads", () => {
const store = new Store(); const store = new Store();
store.load( store.load(
"<http://example.com> <http://example.com> <http://example.com> <http://example.com> .", "<http://example.com> <http://example.com> <http://example.com> <http://example.com> .",
@ -156,7 +156,7 @@ describe("Store", function () {
assert(store.has(dataModel.quad(ex, ex, ex, ex))); assert(store.has(dataModel.quad(ex, ex, ex, ex)));
}); });
it("load TriG with a base IRI", function () { it("load TriG with a base IRI", () => {
const store = new Store(); const store = new Store();
store.load( store.load(
"GRAPH <> { <http://example.com> <http://example.com> <> }", "GRAPH <> { <http://example.com> <http://example.com> <> }",
@ -167,8 +167,8 @@ describe("Store", function () {
}); });
}); });
describe("#dump()", function () { describe("#dump()", () => {
it("dump dataset content", function () { it("dump dataset content", () => {
const store = new Store([dataModel.quad(ex, ex, ex, ex)]); const store = new Store([dataModel.quad(ex, ex, ex, ex)]);
assert.strictEqual( assert.strictEqual(
"<http://example.com> <http://example.com> <http://example.com> <http://example.com> .\n", "<http://example.com> <http://example.com> <http://example.com> <http://example.com> .\n",
@ -176,7 +176,7 @@ describe("Store", function () {
); );
}); });
it("dump named graph content", function () { it("dump named graph content", () => {
const store = new Store([dataModel.quad(ex, ex, ex, ex)]); const store = new Store([dataModel.quad(ex, ex, ex, ex)]);
assert.strictEqual( assert.strictEqual(
"<http://example.com> <http://example.com> <http://example.com> .\n", "<http://example.com> <http://example.com> <http://example.com> .\n",
@ -184,9 +184,9 @@ describe("Store", function () {
); );
}); });
it("dump default graph content", function () { it("dump default graph content", () => {
const store = new Store([dataModel.quad(ex, ex, ex, ex)]); const store = new Store([dataModel.quad(ex, ex, ex, ex)]);
assert.strictEqual("", store.dump("application/n-triples")); assert.strictEqual("", store.dump("application/n-triples", dataModel.defaultGraph()));
}); });
}); });
}); });

@ -1,65 +0,0 @@
[package]
name = "oxigraph"
version = "0.3.20"
authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license = "MIT OR Apache-2.0"
readme = "README.md"
keywords = ["RDF", "SPARQL", "graph-database", "database"]
categories = ["database-implementations"]
repository = "https://github.com/oxigraph/oxigraph/tree/main/lib"
homepage = "https://oxigraph.org/"
documentation = "https://docs.rs/oxigraph"
description = """
a SPARQL database and RDF toolkit
"""
edition = "2021"
rust-version = "1.60"
[package.metadata.docs.rs]
all-features = true
[features]
default = []
http_client = ["oxhttp", "oxhttp/rustls"]
rocksdb_debug = []
[dependencies]
rand = "0.8"
md-5 = "0.10"
sha-1 = "0.10"
sha2 = "0.10"
digest = "0.10"
regex = "1"
oxilangtag = "0.1"
oxiri = "0.2"
rio_api = "0.8"
rio_turtle = "0.8"
rio_xml = "0.8"
hex = "0.4"
siphasher = "0.3"
lazy_static = "1"
json-event-parser = "0.1"
oxrdf = { version = "0.1.7", path="oxrdf", features = ["rdf-star", "oxsdatatypes"] }
oxsdatatypes = { version = "0.1.3", path="oxsdatatypes" }
spargebra = { version = "0.2.8", path="spargebra", features = ["rdf-star", "sep-0002", "sep-0006"] }
sparesults = { version = "0.1.8", path="sparesults", features = ["rdf-star"] }
gfa = "0.10.1"
handlegraph = { git = "https://github.com/chfi/rs-handlegraph", branch = "master" }
[target.'cfg(not(target_family = "wasm"))'.dependencies]
libc = "0.2"
oxrocksdb-sys = { version = "0.3.20", path="../oxrocksdb-sys" }
oxhttp = { version = "0.1", optional = true }
[target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies]
getrandom = { version = "0.2", features = ["js"] }
js-sys = "0.3"
[target.'cfg(not(target_family = "wasm"))'.dev-dependencies]
criterion = "0.4"
oxhttp = "0.1"
zstd = "0.12"
[[bench]]
name = "store"
harness = false

@ -1,72 +1,13 @@
Oxigraph Oxigraph Rust crates
======== ====================
[![Latest Version](https://img.shields.io/crates/v/oxigraph.svg)](https://crates.io/crates/oxigraph) Oxigraph is implemented in Rust.
[![Released API docs](https://docs.rs/oxigraph/badge.svg)](https://docs.rs/oxigraph) It is composed on a main library, [`oxigraph`](./oxigraph) and a set of smaller crates used by the `oxigraph` crate:
[![Crates.io downloads](https://img.shields.io/crates/d/oxigraph)](https://crates.io/crates/oxigraph) * [`oxrdf`](./oxrdf), datastructures encoding RDF basic concepts (the `model` module of the `oxigraph` crate).
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) * [`oxrdfio`](./oxrdfio), a unified parser and serializer API for RDF formats (the `io` module of the `oxigraph` crate). It itself relies on:
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) * [`oxttl`](./oxttl), N-Triple, N-Quad, Turtle, TriG and N3 parsing and serialization.
* [`oxrdfxml`](./oxrdfxml), RDF/XML parsing and serialization.
Oxigraph is a graph database library implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard. * [`spargebra`](./spargebra), a SPARQL parser.
* [`sparesults`](./sparesults), parsers and serializers for SPARQL result formats (the `sparql::results` module of the `oxigraph` crate).
Its goal is to provide a compliant, safe and fast on-disk graph database. * [`sparopt`](./sparesults), a SPARQL optimizer.
It also provides a set of utility functions for reading, writing, and processing RDF files. * [`oxsdatatypes`](./oxsdatatypes), an implementation of some XML Schema datatypes.
Oxigraph is in heavy development and SPARQL query evaluation has not been optimized yet.
Oxigraph also provides [a standalone HTTP server](https://crates.io/crates/oxigraph_server) and [a Python library](https://pyoxigraph.readthedocs.io/) based on this library.
Oxigraph implements the following specifications:
* [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/), [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/), and [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/).
* [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/), and [RDF XML](https://www.w3.org/TR/rdf-syntax-grammar/) RDF serialization formats for both data ingestion and retrieval using the [Rio library](https://github.com/oxigraph/rio).
* [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/).
A preliminary benchmark [is provided](../bench/README.md). Oxigraph internal design [is described on the wiki](https://github.com/oxigraph/oxigraph/wiki/Architecture).
The main entry point of Oxigraph is the [`Store`](store::Store) struct:
```rust
use oxigraph::store::Store;
use oxigraph::model::*;
use oxigraph::sparql::QueryResults;
let store = Store::new().unwrap();
// insertion
let ex = NamedNode::new("http://example.com").unwrap();
let quad = Quad::new(ex.clone(), ex.clone(), ex.clone(), GraphName::DefaultGraph);
store.insert(&quad).unwrap();
// quad filter
let results = store.quads_for_pattern(Some(ex.as_ref().into()), None, None, None).collect::<Result<Vec<Quad>,_>>().unwrap();
assert_eq!(vec![quad], results);
// SPARQL query
if let QueryResults::Solutions(mut solutions) = store.query("SELECT ?s WHERE { ?s ?p ?o }").unwrap() {
assert_eq!(solutions.next().unwrap().unwrap().get("s"), Some(&ex.into()));
}
```
Some parts of this library are available as standalone crates:
* [`oxrdf`](https://crates.io/crates/oxrdf) provides datastructures encoding RDF basic concepts (the `oxigraph::model` module).
* [`spargebra`](https://crates.io/crates/spargebra) provides a SPARQL parser.
* [`sparesults`](https://crates.io/crates/sparesults) provides parsers and serializers for SPARQL result formats.
To build the library, don't forget to clone the submodules using `git clone --recursive https://github.com/oxigraph/oxigraph.git` to clone the repository including submodules or `git submodule update --init` to add submodules to the already cloned repository.
## License
This project is licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or
`<http://www.apache.org/licenses/LICENSE-2.0>`)
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or
`<http://opensource.org/licenses/MIT>`)
at your option.
### Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.

@ -0,0 +1,72 @@
[package]
name = "oxigraph"
version.workspace = true
authors.workspace = true
license.workspace = true
readme = "README.md"
keywords = ["RDF", "SPARQL", "graph-database", "database"]
categories = ["database-implementations"]
repository = "https://github.com/oxigraph/oxigraph/tree/main/lib/oxigraph"
homepage = "https://oxigraph.org/"
documentation = "https://docs.rs/oxigraph"
description = """
a SPARQL database and RDF toolkit
"""
edition.workspace = true
rust-version.workspace = true
[features]
default = []
js = ["getrandom/js", "oxsdatatypes/js", "js-sys"]
http-client = ["oxhttp"]
http-client-native-tls = ["http-client", "oxhttp/native-tls"]
http-client-rustls-webpki = ["http-client", "oxhttp/rustls-webpki"]
http-client-rustls-native = ["http-client", "oxhttp/rustls-native"]
rocksdb-pkg-config = ["oxrocksdb-sys/pkg-config"]
rocksdb-debug = []
[dependencies]
digest.workspace = true
hex.workspace = true
json-event-parser.workspace = true
md-5.workspace = true
oxilangtag.workspace = true
oxiri.workspace = true
oxrdf = { workspace = true, features = ["rdf-star", "oxsdatatypes"] }
oxrdfio = { workspace = true, features = ["rdf-star"] }
oxsdatatypes.workspace = true
rand.workspace = true
regex.workspace = true
sha1.workspace = true
sha2.workspace = true
siphasher.workspace = true
sparesults = { workspace = true, features = ["rdf-star"] }
spargebra = { workspace = true, features = ["rdf-star", "sep-0002", "sep-0006"] }
sparopt = { workspace = true, features = ["rdf-star", "sep-0002", "sep-0006"] }
thiserror.workspace = true
gfa = "0.10.1"
handlegraph = { git = "https://github.com/chfi/rs-handlegraph", branch = "master" }
[target.'cfg(not(target_family = "wasm"))'.dependencies]
libc.workspace = true
oxhttp = { workspace = true, optional = true }
oxrocksdb-sys.workspace = true
[target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies]
getrandom.workspace = true
js-sys = { workspace = true, optional = true }
[target.'cfg(not(target_family = "wasm"))'.dev-dependencies]
codspeed-criterion-compat.workspace = true
oxhttp.workspace = true
zstd.workspace = true
[lints]
workspace = true
[package.metadata.docs.rs]
rustdoc-args = ["--cfg", "docsrs"]
[[bench]]
name = "store"
harness = false

@ -0,0 +1,77 @@
Oxigraph
========
[![Latest Version](https://img.shields.io/crates/v/oxigraph.svg)](https://crates.io/crates/oxigraph)
[![Released API docs](https://docs.rs/oxigraph/badge.svg)](https://docs.rs/oxigraph)
[![Crates.io downloads](https://img.shields.io/crates/d/oxigraph)](https://crates.io/crates/oxigraph)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community)
Oxigraph is a graph database library implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard.
Its goal is to provide a compliant, safe and fast on-disk graph database.
It also provides a set of utility functions for reading, writing, and processing RDF files.
Oxigraph is in heavy development and SPARQL query evaluation has not been optimized yet.
Oxigraph also provides [a CLI tool](https://crates.io/crates/oxigraph-cli) and [a Python library](https://pyoxigraph.readthedocs.io/) based on this library.
Oxigraph implements the following specifications:
* [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/), [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/), and [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/).
* [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/), and [RDF XML](https://www.w3.org/TR/rdf-syntax-grammar/) RDF serialization formats for both data ingestion and retrieval using the [Rio library](https://github.com/oxigraph/rio).
* [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/).
A preliminary benchmark [is provided](../bench/README.md). Oxigraph internal design [is described on the wiki](https://github.com/oxigraph/oxigraph/wiki/Architecture).
The main entry point of Oxigraph is the [`Store`](store::Store) struct:
```rust
use oxigraph::store::Store;
use oxigraph::model::*;
use oxigraph::sparql::QueryResults;
let store = Store::new().unwrap();
// insertion
let ex = NamedNode::new("http://example.com").unwrap();
let quad = Quad::new(ex.clone(), ex.clone(), ex.clone(), GraphName::DefaultGraph);
store.insert(&quad).unwrap();
// quad filter
let results = store.quads_for_pattern(Some(ex.as_ref().into()), None, None, None).collect::<Result<Vec<Quad>,_>>().unwrap();
assert_eq!(vec![quad], results);
// SPARQL query
if let QueryResults::Solutions(mut solutions) = store.query("SELECT ?s WHERE { ?s ?p ?o }").unwrap() {
assert_eq!(solutions.next().unwrap().unwrap().get("s"), Some(&ex.into()));
}
```
It is based on these crates that can be used separately:
* [`oxrdf`](https://crates.io/crates/oxrdf), datastructures encoding RDF basic concepts (the [`oxigraph::model`](crate::model) module).
* [`oxrdfio`](https://crates.io/crates/oxrdfio), a unified parser and serializer API for RDF formats (the [`oxigraph::io`](crate::io) module). It itself relies on:
* [`oxttl`](https://crates.io/crates/oxttl), N-Triple, N-Quad, Turtle, TriG and N3 parsing and serialization.
* [`oxrdfxml`](https://crates.io/crates/oxrdfxml), RDF/XML parsing and serialization.
* [`spargebra`](https://crates.io/crates/spargebra), a SPARQL parser.
* [`sparesults`](https://crates.io/crates/sparesults), parsers and serializers for SPARQL result formats (the [`oxigraph::sparql::results`](crate::sparql::results) module).
* [`sparopt`](https://crates.io/crates/sparesults), a SPARQL optimizer.
* [`oxsdatatypes`](https://crates.io/crates/oxsdatatypes), an implementation of some XML Schema datatypes.
To build the library locally, don't forget to clone the submodules using `git clone --recursive https://github.com/oxigraph/oxigraph.git` to clone the repository including submodules or `git submodule update --init` to add submodules to the already cloned repository.
## License
This project is licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or
`<http://www.apache.org/licenses/LICENSE-2.0>`)
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or
`<http://opensource.org/licenses/MIT>`)
at your option.
### Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.

@ -1,109 +1,88 @@
use criterion::{criterion_group, criterion_main, Criterion, Throughput}; #![allow(clippy::panic)]
use codspeed_criterion_compat::{criterion_group, criterion_main, Criterion, Throughput};
use oxhttp::model::{Method, Request, Status}; use oxhttp::model::{Method, Request, Status};
use oxigraph::io::GraphFormat; use oxigraph::io::{RdfFormat, RdfParser};
use oxigraph::model::GraphNameRef;
use oxigraph::sparql::{Query, QueryResults, Update}; use oxigraph::sparql::{Query, QueryResults, Update};
use oxigraph::store::Store; use oxigraph::store::Store;
use rand::random; use rand::random;
use std::env::temp_dir; use std::env::temp_dir;
use std::fs::{remove_dir_all, File}; use std::fs::{remove_dir_all, File};
use std::io::{BufRead, BufReader, Cursor, Read}; use std::io::Read;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::str;
fn parse_nt(c: &mut Criterion) {
let data = read_data("explore-1000.nt.zst");
let mut group = c.benchmark_group("parse");
group.throughput(Throughput::Bytes(data.len() as u64));
group.sample_size(50);
group.bench_function("parse BSBM explore 1000", |b| {
b.iter(|| {
for r in RdfParser::from_format(RdfFormat::NTriples).parse_read(data.as_slice()) {
r.unwrap();
}
})
});
group.bench_function("parse BSBM explore 1000 unchecked", |b| {
b.iter(|| {
for r in RdfParser::from_format(RdfFormat::NTriples)
.unchecked()
.parse_read(data.as_slice())
{
r.unwrap();
}
})
});
}
fn store_load(c: &mut Criterion) { fn store_load(c: &mut Criterion) {
{ let data = read_data("explore-1000.nt.zst");
let mut data = Vec::new(); let mut group = c.benchmark_group("store load");
read_data("explore-1000.nt.zst") group.throughput(Throughput::Bytes(data.len() as u64));
.read_to_end(&mut data) group.sample_size(10);
.unwrap(); group.bench_function("load BSBM explore 1000 in memory", |b| {
b.iter(|| {
let mut group = c.benchmark_group("store load"); let store = Store::new().unwrap();
group.throughput(Throughput::Bytes(data.len() as u64)); do_load(&store, &data);
group.sample_size(10); })
group.bench_function("load BSBM explore 1000 in memory", |b| { });
b.iter(|| { group.bench_function("load BSBM explore 1000 in on disk", |b| {
let store = Store::new().unwrap(); b.iter(|| {
do_load(&store, &data); let path = TempDir::default();
}) let store = Store::open(&path).unwrap();
}); do_load(&store, &data);
group.bench_function("load BSBM explore 1000 in on disk", |b| { })
b.iter(|| { });
let path = TempDir::default(); group.bench_function("load BSBM explore 1000 in on disk with bulk load", |b| {
let store = Store::open(&path).unwrap(); b.iter(|| {
do_load(&store, &data); let path = TempDir::default();
}) let store = Store::open(&path).unwrap();
}); do_bulk_load(&store, &data);
group.bench_function("load BSBM explore 1000 in on disk with bulk load", |b| { })
b.iter(|| { });
let path = TempDir::default();
let store = Store::open(&path).unwrap();
do_bulk_load(&store, &data);
})
});
}
{
let mut data = Vec::new();
read_data("explore-10000.nt.zst")
.read_to_end(&mut data)
.unwrap();
let mut group = c.benchmark_group("store load large");
group.throughput(Throughput::Bytes(data.len() as u64));
group.sample_size(10);
group.bench_function("load BSBM explore 10000 in on disk with bulk load", |b| {
b.iter(|| {
let path = TempDir::default();
let store = Store::open(&path).unwrap();
do_bulk_load(&store, &data);
})
});
}
} }
fn do_load(store: &Store, data: &[u8]) { fn do_load(store: &Store, data: &[u8]) {
store store.load_from_read(RdfFormat::NTriples, data).unwrap();
.load_graph(
Cursor::new(&data),
GraphFormat::NTriples,
GraphNameRef::DefaultGraph,
None,
)
.unwrap();
store.optimize().unwrap(); store.optimize().unwrap();
} }
fn do_bulk_load(store: &Store, data: &[u8]) { fn do_bulk_load(store: &Store, data: &[u8]) {
store store
.bulk_loader() .bulk_loader()
.load_graph( .load_from_read(RdfFormat::NTriples, data)
Cursor::new(&data),
GraphFormat::NTriples,
GraphNameRef::DefaultGraph,
None,
)
.unwrap(); .unwrap();
store.optimize().unwrap(); store.optimize().unwrap();
} }
fn store_query_and_update(c: &mut Criterion) { fn store_query_and_update(c: &mut Criterion) {
let mut data = Vec::new(); let data = read_data("explore-1000.nt.zst");
read_data("explore-1000.nt.zst") let operations = bsbm_sparql_operation()
.read_to_end(&mut data) .into_iter()
.unwrap(); .map(|op| match op {
RawOperation::Query(q) => Operation::Query(Query::parse(&q, None).unwrap()),
let operations = read_data("mix-exploreAndUpdate-1000.tsv.zst") RawOperation::Update(q) => Operation::Update(Update::parse(&q, None).unwrap()),
.lines()
.map(|l| {
let l = l.unwrap();
let mut parts = l.trim().split('\t');
let kind = parts.next().unwrap();
let operation = parts.next().unwrap();
match kind {
"query" => Operation::Query(Query::parse(operation, None).unwrap()),
"update" => Operation::Update(Update::parse(operation, None).unwrap()),
_ => panic!("Unexpected operation kind {kind}"),
}
}) })
.collect::<Vec<_>>(); .collect::<Vec<_>>();
let query_operations = operations let query_operations = operations
@ -162,26 +141,7 @@ fn run_operation(store: &Store, operations: &[Operation]) {
} }
fn sparql_parsing(c: &mut Criterion) { fn sparql_parsing(c: &mut Criterion) {
let mut data = Vec::new(); let operations = bsbm_sparql_operation();
read_data("explore-1000.nt.zst")
.read_to_end(&mut data)
.unwrap();
let operations = read_data("mix-exploreAndUpdate-1000.tsv.zst")
.lines()
.map(|l| {
let l = l.unwrap();
let mut parts = l.trim().split('\t');
let kind = parts.next().unwrap();
let operation = parts.next().unwrap();
match kind {
"query" => RawOperation::Query(operation.to_owned()),
"update" => RawOperation::Update(operation.to_owned()),
_ => panic!("Unexpected operation kind {kind}"),
}
})
.collect::<Vec<_>>();
let mut group = c.benchmark_group("sparql parsing"); let mut group = c.benchmark_group("sparql parsing");
group.sample_size(10); group.sample_size(10);
group.throughput(Throughput::Bytes( group.throughput(Throughput::Bytes(
@ -209,14 +169,14 @@ fn sparql_parsing(c: &mut Criterion) {
}); });
} }
criterion_group!(parse, parse_nt);
criterion_group!(store, sparql_parsing, store_query_and_update, store_load); criterion_group!(store, sparql_parsing, store_query_and_update, store_load);
criterion_main!(store); criterion_main!(parse, store);
fn read_data(file: &str) -> impl BufRead { fn read_data(file: &str) -> Vec<u8> {
if !Path::new(file).exists() { if !Path::new(file).exists() {
let mut client = oxhttp::Client::new(); let client = oxhttp::Client::new().with_redirection_limit(5);
client.set_redirection_limit(5);
let url = format!("https://github.com/Tpt/bsbm-tools/releases/download/v0.2/{file}"); let url = format!("https://github.com/Tpt/bsbm-tools/releases/download/v0.2/{file}");
let request = Request::builder(Method::GET, url.parse().unwrap()).build(); let request = Request::builder(Method::GET, url.parse().unwrap()).build();
let response = client.request(request).unwrap(); let response = client.request(request).unwrap();
@ -228,7 +188,31 @@ fn read_data(file: &str) -> impl BufRead {
); );
std::io::copy(&mut response.into_body(), &mut File::create(file).unwrap()).unwrap(); std::io::copy(&mut response.into_body(), &mut File::create(file).unwrap()).unwrap();
} }
BufReader::new(zstd::Decoder::new(File::open(file).unwrap()).unwrap()) let mut buf = Vec::new();
zstd::Decoder::new(File::open(file).unwrap())
.unwrap()
.read_to_end(&mut buf)
.unwrap();
buf
}
fn bsbm_sparql_operation() -> Vec<RawOperation> {
String::from_utf8(read_data("mix-exploreAndUpdate-1000.tsv.zst"))
.unwrap()
.lines()
.rev()
.take(300) // We take only 10 groups
.map(|l| {
let mut parts = l.trim().split('\t');
let kind = parts.next().unwrap();
let operation = parts.next().unwrap();
match kind {
"query" => RawOperation::Query(operation.into()),
"update" => RawOperation::Update(operation.into()),
_ => panic!("Unexpected operation kind {kind}"),
}
})
.collect()
} }
#[derive(Clone)] #[derive(Clone)]

@ -1,8 +1,13 @@
#![allow(deprecated)]
use oxrdfio::{RdfFormat, RdfParser, RdfSerializer};
/// [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) serialization formats. /// [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) serialization formats.
/// ///
/// This enumeration is non exhaustive. New formats like JSON-LD will be added in the future. /// This enumeration is non exhaustive. New formats like JSON-LD will be added in the future.
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] #[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
#[non_exhaustive] #[non_exhaustive]
#[deprecated(note = "use RdfFormat instead", since = "0.4.0")]
pub enum GraphFormat { pub enum GraphFormat {
/// [N-Triples](https://www.w3.org/TR/n-triples/) /// [N-Triples](https://www.w3.org/TR/n-triples/)
NTriples, NTriples,
@ -18,7 +23,10 @@ impl GraphFormat {
/// ``` /// ```
/// use oxigraph::io::GraphFormat; /// use oxigraph::io::GraphFormat;
/// ///
/// assert_eq!(GraphFormat::NTriples.iri(), "http://www.w3.org/ns/formats/N-Triples") /// assert_eq!(
/// GraphFormat::NTriples.iri(),
/// "http://www.w3.org/ns/formats/N-Triples"
/// )
/// ``` /// ```
#[inline] #[inline]
pub fn iri(self) -> &'static str { pub fn iri(self) -> &'static str {
@ -60,6 +68,7 @@ impl GraphFormat {
Self::RdfXml => "rdf", Self::RdfXml => "rdf",
} }
} }
/// Looks for a known format from a media type. /// Looks for a known format from a media type.
/// ///
/// It supports some media type aliases. /// It supports some media type aliases.
@ -69,7 +78,10 @@ impl GraphFormat {
/// ``` /// ```
/// use oxigraph::io::GraphFormat; /// use oxigraph::io::GraphFormat;
/// ///
/// assert_eq!(GraphFormat::from_media_type("text/turtle; charset=utf-8"), Some(GraphFormat::Turtle)) /// assert_eq!(
/// GraphFormat::from_media_type("text/turtle; charset=utf-8"),
/// Some(GraphFormat::Turtle)
/// )
/// ``` /// ```
#[inline] #[inline]
pub fn from_media_type(media_type: &str) -> Option<Self> { pub fn from_media_type(media_type: &str) -> Option<Self> {
@ -89,7 +101,10 @@ impl GraphFormat {
/// ``` /// ```
/// use oxigraph::io::GraphFormat; /// use oxigraph::io::GraphFormat;
/// ///
/// assert_eq!(GraphFormat::from_extension("nt"), Some(GraphFormat::NTriples)) /// assert_eq!(
/// GraphFormat::from_extension("nt"),
/// Some(GraphFormat::NTriples)
/// )
/// ``` /// ```
#[inline] #[inline]
pub fn from_extension(extension: &str) -> Option<Self> { pub fn from_extension(extension: &str) -> Option<Self> {
@ -102,11 +117,37 @@ impl GraphFormat {
} }
} }
impl From<GraphFormat> for RdfFormat {
#[inline]
fn from(format: GraphFormat) -> Self {
match format {
GraphFormat::NTriples => Self::NTriples,
GraphFormat::Turtle => Self::Turtle,
GraphFormat::RdfXml => Self::RdfXml,
}
}
}
impl From<GraphFormat> for RdfParser {
#[inline]
fn from(format: GraphFormat) -> Self {
RdfFormat::from(format).into()
}
}
impl From<GraphFormat> for RdfSerializer {
#[inline]
fn from(format: GraphFormat) -> Self {
RdfFormat::from(format).into()
}
}
/// [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) serialization formats. /// [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) serialization formats.
/// ///
/// This enumeration is non exhaustive. New formats like JSON-LD will be added in the future. /// This enumeration is non exhaustive. New formats like JSON-LD will be added in the future.
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] #[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
#[non_exhaustive] #[non_exhaustive]
#[deprecated(note = "use RdfFormat instead", since = "0.4.0")]
pub enum DatasetFormat { pub enum DatasetFormat {
/// [N-Quads](https://www.w3.org/TR/n-quads/) /// [N-Quads](https://www.w3.org/TR/n-quads/)
NQuads, NQuads,
@ -120,7 +161,10 @@ impl DatasetFormat {
/// ``` /// ```
/// use oxigraph::io::DatasetFormat; /// use oxigraph::io::DatasetFormat;
/// ///
/// assert_eq!(DatasetFormat::NQuads.iri(), "http://www.w3.org/ns/formats/N-Quads") /// assert_eq!(
/// DatasetFormat::NQuads.iri(),
/// "http://www.w3.org/ns/formats/N-Quads"
/// )
/// ``` /// ```
#[inline] #[inline]
pub fn iri(self) -> &'static str { pub fn iri(self) -> &'static str {
@ -159,6 +203,7 @@ impl DatasetFormat {
Self::TriG => "trig", Self::TriG => "trig",
} }
} }
/// Looks for a known format from a media type. /// Looks for a known format from a media type.
/// ///
/// It supports some media type aliases. /// It supports some media type aliases.
@ -167,7 +212,10 @@ impl DatasetFormat {
/// ``` /// ```
/// use oxigraph::io::DatasetFormat; /// use oxigraph::io::DatasetFormat;
/// ///
/// assert_eq!(DatasetFormat::from_media_type("application/n-quads; charset=utf-8"), Some(DatasetFormat::NQuads)) /// assert_eq!(
/// DatasetFormat::from_media_type("application/n-quads; charset=utf-8"),
/// Some(DatasetFormat::NQuads)
/// )
/// ``` /// ```
#[inline] #[inline]
pub fn from_media_type(media_type: &str) -> Option<Self> { pub fn from_media_type(media_type: &str) -> Option<Self> {
@ -186,7 +234,10 @@ impl DatasetFormat {
/// ``` /// ```
/// use oxigraph::io::DatasetFormat; /// use oxigraph::io::DatasetFormat;
/// ///
/// assert_eq!(DatasetFormat::from_extension("nq"), Some(DatasetFormat::NQuads)) /// assert_eq!(
/// DatasetFormat::from_extension("nq"),
/// Some(DatasetFormat::NQuads)
/// )
/// ``` /// ```
#[inline] #[inline]
pub fn from_extension(extension: &str) -> Option<Self> { pub fn from_extension(extension: &str) -> Option<Self> {
@ -198,12 +249,36 @@ impl DatasetFormat {
} }
} }
impl From<DatasetFormat> for RdfFormat {
#[inline]
fn from(format: DatasetFormat) -> Self {
match format {
DatasetFormat::NQuads => Self::NQuads,
DatasetFormat::TriG => Self::TriG,
}
}
}
impl From<DatasetFormat> for RdfParser {
#[inline]
fn from(format: DatasetFormat) -> Self {
RdfFormat::from(format).into()
}
}
impl From<DatasetFormat> for RdfSerializer {
#[inline]
fn from(format: DatasetFormat) -> Self {
RdfFormat::from(format).into()
}
}
impl TryFrom<DatasetFormat> for GraphFormat { impl TryFrom<DatasetFormat> for GraphFormat {
type Error = (); type Error = ();
/// Attempts to find a graph format that is a subset of this [`DatasetFormat`]. /// Attempts to find a graph format that is a subset of this [`DatasetFormat`].
#[inline] #[inline]
fn try_from(value: DatasetFormat) -> Result<Self, ()> { fn try_from(value: DatasetFormat) -> Result<Self, Self::Error> {
match value { match value {
DatasetFormat::NQuads => Ok(Self::NTriples), DatasetFormat::NQuads => Ok(Self::NTriples),
DatasetFormat::TriG => Ok(Self::Turtle), DatasetFormat::TriG => Ok(Self::Turtle),
@ -216,7 +291,7 @@ impl TryFrom<GraphFormat> for DatasetFormat {
/// Attempts to find a dataset format that is a superset of this [`GraphFormat`]. /// Attempts to find a dataset format that is a superset of this [`GraphFormat`].
#[inline] #[inline]
fn try_from(value: GraphFormat) -> Result<Self, ()> { fn try_from(value: GraphFormat) -> Result<Self, Self::Error> {
match value { match value {
GraphFormat::NTriples => Ok(Self::NQuads), GraphFormat::NTriples => Ok(Self::NQuads),
GraphFormat::Turtle => Ok(Self::TriG), GraphFormat::Turtle => Ok(Self::TriG),

@ -0,0 +1,39 @@
//! Utilities to read and write RDF graphs and datasets using [OxRDF I/O](https://crates.io/crates/oxrdfio).
//!
//! The entry points of this module are the two [`RdfParser`] and [`RdfSerializer`] structs.
//!
//! Usage example converting a Turtle file to a N-Triples file:
//! ```
//! use oxigraph::io::{RdfFormat, RdfParser, RdfSerializer};
//!
//! let turtle_file = b"@base <http://example.com/> .
//! @prefix schema: <http://schema.org/> .
//! <foo> a schema:Person ;
//! schema:name \"Foo\" .
//! <bar> a schema:Person ;
//! schema:name \"Bar\" .";
//!
//! let ntriples_file = b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
//! <http://example.com/foo> <http://schema.org/name> \"Foo\" .
//! <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
//! <http://example.com/bar> <http://schema.org/name> \"Bar\" .
//! ";
//!
//! let mut writer = RdfSerializer::from_format(RdfFormat::NTriples).serialize_to_write(Vec::new());
//! for quad in RdfParser::from_format(RdfFormat::Turtle).parse_read(turtle_file.as_ref()) {
//! writer.write_quad(&quad.unwrap()).unwrap();
//! }
//! assert_eq!(writer.finish().unwrap(), ntriples_file);
//! ```
mod format;
pub mod read;
pub mod write;
#[allow(deprecated)]
pub use self::format::{DatasetFormat, GraphFormat};
#[allow(deprecated)]
pub use self::read::{DatasetParser, GraphParser};
#[allow(deprecated)]
pub use self::write::{DatasetSerializer, GraphSerializer};
pub use oxrdfio::*;

@ -0,0 +1,200 @@
#![allow(deprecated)]
//! Utilities to read RDF graphs and datasets.
use crate::io::{DatasetFormat, GraphFormat};
use crate::model::*;
use oxiri::IriParseError;
use oxrdfio::{FromReadQuadReader, RdfParseError, RdfParser};
use std::io::Read;
/// Parsers for RDF graph serialization formats.
///
/// It currently supports the following formats:
/// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`GraphFormat::NTriples`])
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`GraphFormat::Turtle`])
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`GraphFormat::RdfXml`])
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = GraphParser::from_format(GraphFormat::NTriples);
/// let triples = parser
/// .read_triples(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(triples.len(), 1);
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[deprecated(note = "use RdfParser instead", since = "0.4.0")]
pub struct GraphParser {
inner: RdfParser,
}
impl GraphParser {
/// Builds a parser for the given format.
#[inline]
pub fn from_format(format: GraphFormat) -> Self {
Self {
inner: RdfParser::from_format(format.into())
.without_named_graphs()
.rename_blank_nodes(),
}
}
/// Provides an IRI that could be used to resolve the file relative IRIs.
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphParser};
///
/// let file = "</s> </p> </o> .";
///
/// let parser =
/// GraphParser::from_format(GraphFormat::Turtle).with_base_iri("http://example.com")?;
/// let triples = parser
/// .read_triples(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(triples.len(), 1);
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn with_base_iri(self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
Ok(Self {
inner: self.inner.with_base_iri(base_iri)?,
})
}
/// Executes the parsing itself on a [`Read`] implementation and returns an iterator of triples.
pub fn read_triples<R: Read>(self, reader: R) -> TripleReader<R> {
TripleReader {
parser: self.inner.parse_read(reader),
}
}
}
/// An iterator yielding read triples.
/// Could be built using a [`GraphParser`].
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = GraphParser::from_format(GraphFormat::NTriples);
/// let triples = parser
/// .read_triples(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(triples.len(), 1);
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[must_use]
pub struct TripleReader<R: Read> {
parser: FromReadQuadReader<R>,
}
impl<R: Read> Iterator for TripleReader<R> {
type Item = Result<Triple, RdfParseError>;
fn next(&mut self) -> Option<Self::Item> {
Some(self.parser.next()?.map(Into::into).map_err(Into::into))
}
}
/// A parser for RDF dataset serialization formats.
///
/// It currently supports the following formats:
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`DatasetFormat::NQuads`])
/// * [TriG](https://www.w3.org/TR/trig/) ([`DatasetFormat::TriG`])
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .";
///
/// let parser = DatasetParser::from_format(DatasetFormat::NQuads);
/// let quads = parser.read_quads(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
///
/// assert_eq!(quads.len(), 1);
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[deprecated(note = "use RdfParser instead", since = "0.4.0")]
pub struct DatasetParser {
inner: RdfParser,
}
impl DatasetParser {
/// Builds a parser for the given format.
#[inline]
pub fn from_format(format: DatasetFormat) -> Self {
Self {
inner: RdfParser::from_format(format.into()).rename_blank_nodes(),
}
}
/// Provides an IRI that could be used to resolve the file relative IRIs.
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetParser};
///
/// let file = "<g> { </s> </p> </o> }";
///
/// let parser =
/// DatasetParser::from_format(DatasetFormat::TriG).with_base_iri("http://example.com")?;
/// let triples = parser
/// .read_quads(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(triples.len(), 1);
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn with_base_iri(self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
Ok(Self {
inner: self.inner.with_base_iri(base_iri)?,
})
}
/// Executes the parsing itself on a [`Read`] implementation and returns an iterator of quads.
pub fn read_quads<R: Read>(self, reader: R) -> QuadReader<R> {
QuadReader {
parser: self.inner.parse_read(reader),
}
}
}
/// An iterator yielding read quads.
/// Could be built using a [`DatasetParser`].
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .";
///
/// let parser = DatasetParser::from_format(DatasetFormat::NQuads);
/// let quads = parser.read_quads(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
///
/// assert_eq!(quads.len(), 1);
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[must_use]
pub struct QuadReader<R: Read> {
parser: FromReadQuadReader<R>,
}
impl<R: Read> Iterator for QuadReader<R> {
type Item = Result<Quad, RdfParseError>;
fn next(&mut self) -> Option<Self::Item> {
Some(self.parser.next()?.map_err(Into::into))
}
}

@ -0,0 +1,185 @@
#![allow(deprecated)]
//! Utilities to write RDF graphs and datasets.
use crate::io::{DatasetFormat, GraphFormat};
use crate::model::*;
use oxrdfio::{RdfSerializer, ToWriteQuadWriter};
use std::io::{self, Write};
/// A serializer for RDF graph serialization formats.
///
/// It currently supports the following formats:
/// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`GraphFormat::NTriples`])
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`GraphFormat::Turtle`])
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`GraphFormat::RdfXml`])
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphSerializer};
/// use oxigraph::model::*;
///
/// let mut buffer = Vec::new();
/// let mut writer = GraphSerializer::from_format(GraphFormat::NTriples).triple_writer(&mut buffer);
/// writer.write(&Triple {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// })?;
/// writer.finish()?;
///
/// assert_eq!(
/// buffer.as_slice(),
/// "<http://example.com/s> <http://example.com/p> <http://example.com/o> .\n".as_bytes()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[deprecated(note = "use RdfSerializer instead", since = "0.4.0")]
pub struct GraphSerializer {
inner: RdfSerializer,
}
impl GraphSerializer {
/// Builds a serializer for the given format
#[inline]
pub fn from_format(format: GraphFormat) -> Self {
Self {
inner: RdfSerializer::from_format(format.into()),
}
}
/// Returns a [`TripleWriter`] allowing writing triples into the given [`Write`] implementation
pub fn triple_writer<W: Write>(self, write: W) -> TripleWriter<W> {
TripleWriter {
writer: self.inner.serialize_to_write(write),
}
}
}
/// Allows writing triples.
/// Could be built using a [`GraphSerializer`].
///
/// <div class="warning">
///
/// Do not forget to run the [`finish`](TripleWriter::finish()) method to properly write the last bytes of the file.</div>
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphSerializer};
/// use oxigraph::model::*;
///
/// let mut buffer = Vec::new();
/// let mut writer = GraphSerializer::from_format(GraphFormat::NTriples).triple_writer(&mut buffer);
/// writer.write(&Triple {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// })?;
/// writer.finish()?;
///
/// assert_eq!(
/// buffer.as_slice(),
/// "<http://example.com/s> <http://example.com/p> <http://example.com/o> .\n".as_bytes()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[must_use]
pub struct TripleWriter<W: Write> {
writer: ToWriteQuadWriter<W>,
}
impl<W: Write> TripleWriter<W> {
/// Writes a triple
pub fn write<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> io::Result<()> {
self.writer.write_triple(triple)
}
/// Writes the last bytes of the file
pub fn finish(self) -> io::Result<()> {
self.writer.finish()?.flush()
}
}
/// A serializer for RDF graph serialization formats.
///
/// It currently supports the following formats:
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`DatasetFormat::NQuads`])
/// * [TriG](https://www.w3.org/TR/trig/) ([`DatasetFormat::TriG`])
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetSerializer};
/// use oxigraph::model::*;
///
/// let mut buffer = Vec::new();
/// let mut writer = DatasetSerializer::from_format(DatasetFormat::NQuads).quad_writer(&mut buffer);
/// writer.write(&Quad {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// graph_name: NamedNode::new("http://example.com/g")?.into(),
/// })?;
/// writer.finish()?;
///
/// assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n".as_bytes());
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[deprecated(note = "use RdfSerializer instead", since = "0.4.0")]
pub struct DatasetSerializer {
inner: RdfSerializer,
}
impl DatasetSerializer {
/// Builds a serializer for the given format
#[inline]
pub fn from_format(format: DatasetFormat) -> Self {
Self {
inner: RdfSerializer::from_format(format.into()),
}
}
/// Returns a [`QuadWriter`] allowing writing triples into the given [`Write`] implementation
pub fn quad_writer<W: Write>(self, write: W) -> QuadWriter<W> {
QuadWriter {
writer: self.inner.serialize_to_write(write),
}
}
}
/// Allows writing triples.
/// Could be built using a [`DatasetSerializer`].
///
/// <div class="warning">
///
/// Do not forget to run the [`finish`](QuadWriter::finish()) method to properly write the last bytes of the file.</div>
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetSerializer};
/// use oxigraph::model::*;
///
/// let mut buffer = Vec::new();
/// let mut writer = DatasetSerializer::from_format(DatasetFormat::NQuads).quad_writer(&mut buffer);
/// writer.write(&Quad {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// graph_name: NamedNode::new("http://example.com/g")?.into(),
/// })?;
/// writer.finish()?;
///
/// assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n".as_bytes());
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[must_use]
pub struct QuadWriter<W: Write> {
writer: ToWriteQuadWriter<W>,
}
impl<W: Write> QuadWriter<W> {
/// Writes a quad
pub fn write<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> io::Result<()> {
self.writer.write_quad(quad)
}
/// Writes the last bytes of the file
pub fn finish(self) -> io::Result<()> {
self.writer.finish()?.flush()
}
}

@ -0,0 +1,12 @@
#![doc = include_str!("../README.md")]
#![doc(test(attr(deny(warnings))))]
#![doc(test(attr(allow(deprecated))))]
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
#![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")]
#![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")]
pub mod io;
pub mod model;
pub mod sparql;
mod storage;
pub mod store;

@ -0,0 +1,20 @@
//! Implements data structures for [RDF 1.1 Concepts](https://www.w3.org/TR/rdf11-concepts/) using [OxRDF](https://crates.io/crates/oxrdf).
//!
//! Usage example:
//!
//! ```
//! use oxigraph::model::*;
//!
//! let mut graph = Graph::default();
//!
//! // insertion
//! let ex = NamedNodeRef::new("http://example.com").unwrap();
//! let triple = TripleRef::new(ex, ex, ex);
//! graph.insert(triple);
//!
//! // simple filter
//! let results: Vec<_> = graph.triples_for_subject(ex).collect();
//! assert_eq!(vec![triple], results);
//! ```
pub use oxrdf::*;

@ -1,15 +1,13 @@
//! [SPARQL 1.1 Query Algebra](https://www.w3.org/TR/sparql11-query/#sparqlQuery) //! [SPARQL 1.1 Query Algebra](https://www.w3.org/TR/sparql11-query/#sparqlQuery)
//! //!
//! The root type for SPARQL queries is [`Query`] and the root type for updates is [`Update`]. //! The root type for SPARQL queries is [`Query`] and the root type for updates is [`Update`].
//!
//! Warning: this implementation is an unstable work in progress
use crate::model::*; use crate::model::*;
use crate::sparql::eval::Timer; use crate::sparql::eval::Timer;
use oxsdatatypes::DayTimeDuration;
use spargebra::GraphUpdateOperation; use spargebra::GraphUpdateOperation;
use std::fmt; use std::fmt;
use std::str::FromStr; use std::str::FromStr;
use std::time::Duration;
/// A parsed [SPARQL query](https://www.w3.org/TR/sparql11-query/). /// A parsed [SPARQL query](https://www.w3.org/TR/sparql11-query/).
/// ///
@ -25,25 +23,31 @@ use std::time::Duration;
/// // We edit the query dataset specification /// // We edit the query dataset specification
/// let default = vec![NamedNode::new("http://example.com")?.into()]; /// let default = vec![NamedNode::new("http://example.com")?.into()];
/// query.dataset_mut().set_default_graph(default.clone()); /// query.dataset_mut().set_default_graph(default.clone());
/// assert_eq!(query.dataset().default_graph_graphs(), Some(default.as_slice())); /// assert_eq!(
/// query.dataset().default_graph_graphs(),
/// Some(default.as_slice())
/// );
/// # Ok::<_, Box<dyn std::error::Error>>(()) /// # Ok::<_, Box<dyn std::error::Error>>(())
/// ``` /// ```
#[derive(Eq, PartialEq, Debug, Clone, Hash)] #[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct Query { pub struct Query {
pub(super) inner: spargebra::Query, pub(super) inner: spargebra::Query,
pub(super) dataset: QueryDataset, pub(super) dataset: QueryDataset,
pub(super) parsing_duration: Option<Duration>, pub(super) parsing_duration: Option<DayTimeDuration>,
} }
impl Query { impl Query {
/// Parses a SPARQL query with an optional base IRI to resolve relative IRIs in the query. /// Parses a SPARQL query with an optional base IRI to resolve relative IRIs in the query.
pub fn parse(query: &str, base_iri: Option<&str>) -> Result<Self, spargebra::ParseError> { pub fn parse(
query: &str,
base_iri: Option<&str>,
) -> Result<Self, spargebra::SparqlSyntaxError> {
let start = Timer::now(); let start = Timer::now();
let query = Self::from(spargebra::Query::parse(query, base_iri)?); let query = Self::from(spargebra::Query::parse(query, base_iri)?);
Ok(Self { Ok(Self {
dataset: query.dataset, dataset: query.dataset,
inner: query.inner, inner: query.inner,
parsing_duration: Some(start.elapsed()), parsing_duration: start.elapsed(),
}) })
} }
@ -60,30 +64,30 @@ impl Query {
impl fmt::Display for Query { impl fmt::Display for Query {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.inner.fmt(f) //TODO: override self.inner.fmt(f) // TODO: override
} }
} }
impl FromStr for Query { impl FromStr for Query {
type Err = spargebra::ParseError; type Err = spargebra::SparqlSyntaxError;
fn from_str(query: &str) -> Result<Self, spargebra::ParseError> { fn from_str(query: &str) -> Result<Self, Self::Err> {
Self::parse(query, None) Self::parse(query, None)
} }
} }
impl<'a> TryFrom<&'a str> for Query { impl TryFrom<&str> for Query {
type Error = spargebra::ParseError; type Error = spargebra::SparqlSyntaxError;
fn try_from(query: &str) -> Result<Self, spargebra::ParseError> { fn try_from(query: &str) -> Result<Self, Self::Error> {
Self::from_str(query) Self::from_str(query)
} }
} }
impl<'a> TryFrom<&'a String> for Query { impl TryFrom<&String> for Query {
type Error = spargebra::ParseError; type Error = spargebra::SparqlSyntaxError;
fn try_from(query: &String) -> Result<Self, spargebra::ParseError> { fn try_from(query: &String) -> Result<Self, Self::Error> {
Self::from_str(query) Self::from_str(query)
} }
} }
@ -112,7 +116,7 @@ impl From<spargebra::Query> for Query {
/// let update = Update::parse(update_str, None)?; /// let update = Update::parse(update_str, None)?;
/// ///
/// assert_eq!(update.to_string().trim(), update_str); /// assert_eq!(update.to_string().trim(), update_str);
/// # Ok::<_, oxigraph::sparql::ParseError>(()) /// # Ok::<_, oxigraph::sparql::SparqlSyntaxError>(())
/// ``` /// ```
#[derive(Eq, PartialEq, Debug, Clone, Hash)] #[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct Update { pub struct Update {
@ -122,22 +126,11 @@ pub struct Update {
impl Update { impl Update {
/// Parses a SPARQL update with an optional base IRI to resolve relative IRIs in the query. /// Parses a SPARQL update with an optional base IRI to resolve relative IRIs in the query.
pub fn parse(update: &str, base_iri: Option<&str>) -> Result<Self, spargebra::ParseError> { pub fn parse(
let update = spargebra::Update::parse(update, base_iri)?; update: &str,
Ok(Self { base_iri: Option<&str>,
using_datasets: update ) -> Result<Self, spargebra::SparqlSyntaxError> {
.operations Ok(spargebra::Update::parse(update, base_iri)?.into())
.iter()
.map(|operation| {
if let GraphUpdateOperation::DeleteInsert { using, .. } = operation {
Some(QueryDataset::from_algebra(using))
} else {
None
}
})
.collect(),
inner: update,
})
} }
/// Returns [the query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset) in [DELETE/INSERT operations](https://www.w3.org/TR/sparql11-update/#deleteInsert). /// Returns [the query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset) in [DELETE/INSERT operations](https://www.w3.org/TR/sparql11-update/#deleteInsert).
@ -158,29 +151,48 @@ impl fmt::Display for Update {
} }
impl FromStr for Update { impl FromStr for Update {
type Err = spargebra::ParseError; type Err = spargebra::SparqlSyntaxError;
fn from_str(update: &str) -> Result<Self, spargebra::ParseError> { fn from_str(update: &str) -> Result<Self, Self::Err> {
Self::parse(update, None) Self::parse(update, None)
} }
} }
impl<'a> TryFrom<&'a str> for Update { impl TryFrom<&str> for Update {
type Error = spargebra::ParseError; type Error = spargebra::SparqlSyntaxError;
fn try_from(update: &str) -> Result<Self, spargebra::ParseError> { fn try_from(update: &str) -> Result<Self, Self::Error> {
Self::from_str(update) Self::from_str(update)
} }
} }
impl<'a> TryFrom<&'a String> for Update { impl TryFrom<&String> for Update {
type Error = spargebra::ParseError; type Error = spargebra::SparqlSyntaxError;
fn try_from(update: &String) -> Result<Self, spargebra::ParseError> { fn try_from(update: &String) -> Result<Self, Self::Error> {
Self::from_str(update) Self::from_str(update)
} }
} }
impl From<spargebra::Update> for Update {
fn from(update: spargebra::Update) -> Self {
Self {
using_datasets: update
.operations
.iter()
.map(|operation| {
if let GraphUpdateOperation::DeleteInsert { using, .. } = operation {
Some(QueryDataset::from_algebra(using))
} else {
None
}
})
.collect(),
inner: update,
}
}
}
/// A SPARQL query [dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset) /// A SPARQL query [dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset)
#[derive(Eq, PartialEq, Debug, Clone, Hash)] #[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct QueryDataset { pub struct QueryDataset {
@ -219,8 +231,15 @@ impl QueryDataset {
/// ``` /// ```
/// use oxigraph::sparql::Query; /// use oxigraph::sparql::Query;
/// ///
/// assert!(Query::parse("SELECT ?s ?p ?o WHERE { ?s ?p ?o . }", None)?.dataset().is_default_dataset()); /// assert!(Query::parse("SELECT ?s ?p ?o WHERE { ?s ?p ?o . }", None)?
/// assert!(!Query::parse("SELECT ?s ?p ?o FROM <http://example.com> WHERE { ?s ?p ?o . }", None)?.dataset().is_default_dataset()); /// .dataset()
/// .is_default_dataset());
/// assert!(!Query::parse(
/// "SELECT ?s ?p ?o FROM <http://example.com> WHERE { ?s ?p ?o . }",
/// None
/// )?
/// .dataset()
/// .is_default_dataset());
/// ///
/// # Ok::<_, Box<dyn std::error::Error>>(()) /// # Ok::<_, Box<dyn std::error::Error>>(())
/// ``` /// ```
@ -252,7 +271,10 @@ impl QueryDataset {
/// let mut query = Query::parse("SELECT ?s ?p ?o WHERE { ?s ?p ?o . }", None)?; /// let mut query = Query::parse("SELECT ?s ?p ?o WHERE { ?s ?p ?o . }", None)?;
/// let default = vec![NamedNode::new("http://example.com")?.into()]; /// let default = vec![NamedNode::new("http://example.com")?.into()];
/// query.dataset_mut().set_default_graph(default.clone()); /// query.dataset_mut().set_default_graph(default.clone());
/// assert_eq!(query.dataset().default_graph_graphs(), Some(default.as_slice())); /// assert_eq!(
/// query.dataset().default_graph_graphs(),
/// Some(default.as_slice())
/// );
/// ///
/// # Ok::<_, Box<dyn std::error::Error>>(()) /// # Ok::<_, Box<dyn std::error::Error>>(())
/// ``` /// ```
@ -273,8 +295,13 @@ impl QueryDataset {
/// ///
/// let mut query = Query::parse("SELECT ?s ?p ?o WHERE { ?s ?p ?o . }", None)?; /// let mut query = Query::parse("SELECT ?s ?p ?o WHERE { ?s ?p ?o . }", None)?;
/// let named = vec![NamedNode::new("http://example.com")?.into()]; /// let named = vec![NamedNode::new("http://example.com")?.into()];
/// query.dataset_mut().set_available_named_graphs(named.clone()); /// query
/// assert_eq!(query.dataset().available_named_graphs(), Some(named.as_slice())); /// .dataset_mut()
/// .set_available_named_graphs(named.clone());
/// assert_eq!(
/// query.dataset().available_named_graphs(),
/// Some(named.as_slice())
/// );
/// ///
/// # Ok::<_, Box<dyn std::error::Error>>(()) /// # Ok::<_, Box<dyn std::error::Error>>(())
/// ``` /// ```

@ -179,10 +179,6 @@ impl StrLookup for DatasetView {
self.reader.get_str(key)? self.reader.get_str(key)?
}) })
} }
fn contains_str(&self, key: &StrHash) -> Result<bool, StorageError> {
Ok(self.extra.borrow().contains_key(key) || self.reader.contains_str(key)?)
}
} }
struct EncodedDatasetSpec { struct EncodedDatasetSpec {

@ -0,0 +1,84 @@
use crate::io::RdfParseError;
use crate::model::NamedNode;
use crate::sparql::results::QueryResultsParseError as ResultsParseError;
use crate::sparql::SparqlSyntaxError;
use crate::storage::StorageError;
use std::convert::Infallible;
use std::error::Error;
use std::io;
/// A SPARQL evaluation error.
#[derive(Debug, thiserror::Error)]
#[non_exhaustive]
pub enum EvaluationError {
/// An error in SPARQL parsing.
#[error(transparent)]
Parsing(#[from] SparqlSyntaxError),
/// An error from the storage.
#[error(transparent)]
Storage(#[from] StorageError),
/// An error while parsing an external RDF file.
#[error(transparent)]
GraphParsing(#[from] RdfParseError),
/// An error while parsing an external result file (likely from a federated query).
#[error(transparent)]
ResultsParsing(#[from] ResultsParseError),
/// An error returned during results serialization.
#[error(transparent)]
ResultsSerialization(#[from] io::Error),
/// Error during `SERVICE` evaluation
#[error("{0}")]
Service(#[source] Box<dyn Error + Send + Sync + 'static>),
/// Error when `CREATE` tries to create an already existing graph
#[error("The graph {0} already exists")]
GraphAlreadyExists(NamedNode),
/// Error when `DROP` or `CLEAR` tries to remove a not existing graph
#[error("The graph {0} does not exist")]
GraphDoesNotExist(NamedNode),
/// The variable storing the `SERVICE` name is unbound
#[error("The variable encoding the service name is unbound")]
UnboundService,
/// The given `SERVICE` is not supported
#[error("The service {0} is not supported")]
UnsupportedService(NamedNode),
/// The given content media type returned from an HTTP response is not supported (`SERVICE` and `LOAD`)
#[error("The content media type {0} is not supported")]
UnsupportedContentType(String),
/// The `SERVICE` call has not returns solutions
#[error("The service is not returning solutions but a boolean or a graph")]
ServiceDoesNotReturnSolutions,
/// The results are not a RDF graph
#[error("The query results are not a RDF graph")]
NotAGraph,
}
impl From<Infallible> for EvaluationError {
#[inline]
fn from(error: Infallible) -> Self {
match error {}
}
}
impl From<EvaluationError> for io::Error {
#[inline]
fn from(error: EvaluationError) -> Self {
match error {
EvaluationError::Parsing(error) => Self::new(io::ErrorKind::InvalidData, error),
EvaluationError::GraphParsing(error) => error.into(),
EvaluationError::ResultsParsing(error) => error.into(),
EvaluationError::ResultsSerialization(error) => error,
EvaluationError::Storage(error) => error.into(),
EvaluationError::Service(error) => match error.downcast() {
Ok(error) => *error,
Err(error) => Self::new(io::ErrorKind::Other, error),
},
EvaluationError::GraphAlreadyExists(_)
| EvaluationError::GraphDoesNotExist(_)
| EvaluationError::UnboundService
| EvaluationError::UnsupportedService(_)
| EvaluationError::UnsupportedContentType(_)
| EvaluationError::ServiceDoesNotReturnSolutions
| EvaluationError::NotAGraph => Self::new(io::ErrorKind::InvalidInput, error),
}
}
}

File diff suppressed because it is too large Load Diff

@ -3,18 +3,18 @@
use std::io::{Empty, Error, ErrorKind, Result}; use std::io::{Empty, Error, ErrorKind, Result};
use std::time::Duration; use std::time::Duration;
pub struct Client {} pub struct Client;
impl Client { impl Client {
pub fn new(_timeout: Option<Duration>, _redirection_limit: usize) -> Self { pub fn new(_timeout: Option<Duration>, _redirection_limit: usize) -> Self {
Self {} Self
} }
#[allow(clippy::unused_self)] #[allow(clippy::unused_self)]
pub fn get(&self, _url: &str, _accept: &str) -> Result<(String, Empty)> { pub fn get(&self, _url: &str, _accept: &'static str) -> Result<(String, Empty)> {
Err(Error::new( Err(Error::new(
ErrorKind::Unsupported, ErrorKind::Unsupported,
"HTTP client is not available. Enable the feature 'http_client'", "HTTP client is not available. Enable the feature 'http-client'",
)) ))
} }
@ -23,12 +23,12 @@ impl Client {
&self, &self,
_url: &str, _url: &str,
_payload: Vec<u8>, _payload: Vec<u8>,
_content_type: &str, _content_type: &'static str,
_accept: &str, _accept: &'static str,
) -> Result<(String, Empty)> { ) -> Result<(String, Empty)> {
Err(Error::new( Err(Error::new(
ErrorKind::Unsupported, ErrorKind::Unsupported,
"HTTP client is not available. Enable the feature 'http_client'", "HTTP client is not available. Enable the feature 'http-client'",
)) ))
} }
} }

@ -0,0 +1,9 @@
#[cfg(not(feature = "http-client"))]
mod dummy;
#[cfg(feature = "http-client")]
mod simple;
#[cfg(not(feature = "http-client"))]
pub use dummy::Client;
#[cfg(feature = "http-client")]
pub use simple::Client;

@ -8,18 +8,17 @@ pub struct Client {
impl Client { impl Client {
pub fn new(timeout: Option<Duration>, redirection_limit: usize) -> Self { pub fn new(timeout: Option<Duration>, redirection_limit: usize) -> Self {
let mut client = oxhttp::Client::new(); let mut client = oxhttp::Client::new()
.with_redirection_limit(redirection_limit)
.with_user_agent(concat!("Oxigraph/", env!("CARGO_PKG_VERSION")))
.unwrap();
if let Some(timeout) = timeout { if let Some(timeout) = timeout {
client.set_global_timeout(timeout); client = client.with_global_timeout(timeout);
} }
client.set_redirection_limit(redirection_limit);
client
.set_user_agent(concat!("Oxigraph/", env!("CARGO_PKG_VERSION")))
.unwrap();
Self { client } Self { client }
} }
pub fn get(&self, url: &str, accept: &str) -> Result<(String, Body)> { pub fn get(&self, url: &str, accept: &'static str) -> Result<(String, Body)> {
let request = Request::builder(Method::GET, url.parse().map_err(invalid_input_error)?) let request = Request::builder(Method::GET, url.parse().map_err(invalid_input_error)?)
.with_header(HeaderName::ACCEPT, accept) .with_header(HeaderName::ACCEPT, accept)
.map_err(invalid_input_error)? .map_err(invalid_input_error)?
@ -50,8 +49,8 @@ impl Client {
&self, &self,
url: &str, url: &str,
payload: Vec<u8>, payload: Vec<u8>,
content_type: &str, content_type: &'static str,
accept: &str, accept: &'static str,
) -> Result<(String, Body)> { ) -> Result<(String, Body)> {
let request = Request::builder(Method::POST, url.parse().map_err(invalid_input_error)?) let request = Request::builder(Method::POST, url.parse().map_err(invalid_input_error)?)
.with_header(HeaderName::ACCEPT, accept) .with_header(HeaderName::ACCEPT, accept)

@ -8,29 +8,29 @@ mod error;
mod eval; mod eval;
mod http; mod http;
mod model; mod model;
mod plan; pub mod results;
mod plan_builder;
mod service; mod service;
mod update; mod update;
use crate::model::{NamedNode, Term}; use crate::model::{NamedNode, Term};
pub use crate::sparql::algebra::{Query, QueryDataset, Update}; pub use crate::sparql::algebra::{Query, QueryDataset, Update};
use crate::sparql::dataset::DatasetView; use crate::sparql::dataset::DatasetView;
pub use crate::sparql::error::{EvaluationError, QueryError}; pub use crate::sparql::error::EvaluationError;
use crate::sparql::eval::{SimpleEvaluator, Timer}; use crate::sparql::eval::{EvalNodeWithStats, SimpleEvaluator, Timer};
pub use crate::sparql::model::{QueryResults, QuerySolution, QuerySolutionIter, QueryTripleIter}; pub use crate::sparql::model::{QueryResults, QuerySolution, QuerySolutionIter, QueryTripleIter};
use crate::sparql::plan::PlanNodeWithStats;
use crate::sparql::plan_builder::PlanBuilder;
pub use crate::sparql::service::ServiceHandler; pub use crate::sparql::service::ServiceHandler;
use crate::sparql::service::{EmptyServiceHandler, ErrorConversionServiceHandler}; use crate::sparql::service::{EmptyServiceHandler, ErrorConversionServiceHandler};
pub(crate) use crate::sparql::update::evaluate_update; pub(crate) use crate::sparql::update::evaluate_update;
use crate::storage::StorageReader; use crate::storage::StorageReader;
use json_event_parser::{JsonEvent, JsonWriter}; use json_event_parser::{JsonEvent, ToWriteJsonWriter};
pub use oxrdf::{Variable, VariableNameParseError}; pub use oxrdf::{Variable, VariableNameParseError};
pub use sparesults::QueryResultsFormat; use oxsdatatypes::{DayTimeDuration, Float};
pub use spargebra::ParseError; pub use spargebra::SparqlSyntaxError;
use sparopt::algebra::GraphPattern;
use sparopt::Optimizer;
use std::collections::HashMap; use std::collections::HashMap;
use std::rc::Rc; use std::rc::Rc;
use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
use std::{fmt, io}; use std::{fmt, io};
@ -48,43 +48,39 @@ pub(crate) fn evaluate_query(
spargebra::Query::Select { spargebra::Query::Select {
pattern, base_iri, .. pattern, base_iri, ..
} => { } => {
let (plan, variables) = PlanBuilder::build( let mut pattern = GraphPattern::from(&pattern);
&dataset, if !options.without_optimizations {
&pattern, pattern = Optimizer::optimize_graph_pattern(pattern);
true, }
&options.custom_functions,
options.without_optimizations,
)?;
let planning_duration = start_planning.elapsed(); let planning_duration = start_planning.elapsed();
let (results, explanation) = SimpleEvaluator::new( let (results, explanation) = SimpleEvaluator::new(
Rc::new(dataset), Rc::new(dataset),
base_iri.map(Rc::new), base_iri.map(Rc::new),
options.service_handler(), options.service_handler(),
Rc::new(options.custom_functions), Arc::new(options.custom_functions),
run_stats, run_stats,
) )
.evaluate_select_plan(Rc::new(plan), Rc::new(variables)); .evaluate_select(&pattern);
(Ok(results), explanation, planning_duration) (Ok(results), explanation, planning_duration)
} }
spargebra::Query::Ask { spargebra::Query::Ask {
pattern, base_iri, .. pattern, base_iri, ..
} => { } => {
let (plan, _) = PlanBuilder::build( let mut pattern = GraphPattern::from(&pattern);
&dataset, if !options.without_optimizations {
&pattern, pattern = Optimizer::optimize_graph_pattern(GraphPattern::Reduced {
false, inner: Box::new(pattern),
&options.custom_functions, });
options.without_optimizations, }
)?;
let planning_duration = start_planning.elapsed(); let planning_duration = start_planning.elapsed();
let (results, explanation) = SimpleEvaluator::new( let (results, explanation) = SimpleEvaluator::new(
Rc::new(dataset), Rc::new(dataset),
base_iri.map(Rc::new), base_iri.map(Rc::new),
options.service_handler(), options.service_handler(),
Rc::new(options.custom_functions), Arc::new(options.custom_functions),
run_stats, run_stats,
) )
.evaluate_ask_plan(Rc::new(plan)); .evaluate_ask(&pattern);
(results, explanation, planning_duration) (results, explanation, planning_duration)
} }
spargebra::Query::Construct { spargebra::Query::Construct {
@ -93,50 +89,41 @@ pub(crate) fn evaluate_query(
base_iri, base_iri,
.. ..
} => { } => {
let (plan, variables) = PlanBuilder::build( let mut pattern = GraphPattern::from(&pattern);
&dataset, if !options.without_optimizations {
&pattern, pattern = Optimizer::optimize_graph_pattern(GraphPattern::Reduced {
false, inner: Box::new(pattern),
&options.custom_functions, });
options.without_optimizations, }
)?;
let construct = PlanBuilder::build_graph_template(
&dataset,
&template,
variables,
&options.custom_functions,
options.without_optimizations,
);
let planning_duration = start_planning.elapsed(); let planning_duration = start_planning.elapsed();
let (results, explanation) = SimpleEvaluator::new( let (results, explanation) = SimpleEvaluator::new(
Rc::new(dataset), Rc::new(dataset),
base_iri.map(Rc::new), base_iri.map(Rc::new),
options.service_handler(), options.service_handler(),
Rc::new(options.custom_functions), Arc::new(options.custom_functions),
run_stats, run_stats,
) )
.evaluate_construct_plan(Rc::new(plan), construct); .evaluate_construct(&pattern, &template);
(Ok(results), explanation, planning_duration) (Ok(results), explanation, planning_duration)
} }
spargebra::Query::Describe { spargebra::Query::Describe {
pattern, base_iri, .. pattern, base_iri, ..
} => { } => {
let (plan, _) = PlanBuilder::build( let mut pattern = GraphPattern::from(&pattern);
&dataset, if !options.without_optimizations {
&pattern, pattern = Optimizer::optimize_graph_pattern(GraphPattern::Reduced {
false, inner: Box::new(pattern),
&options.custom_functions, });
options.without_optimizations, }
)?;
let planning_duration = start_planning.elapsed(); let planning_duration = start_planning.elapsed();
let (results, explanation) = SimpleEvaluator::new( let (results, explanation) = SimpleEvaluator::new(
Rc::new(dataset), Rc::new(dataset),
base_iri.map(Rc::new), base_iri.map(Rc::new),
options.service_handler(), options.service_handler(),
Rc::new(options.custom_functions), Arc::new(options.custom_functions),
run_stats, run_stats,
) )
.evaluate_describe_plan(Rc::new(plan)); .evaluate_describe(&pattern);
(Ok(results), explanation, planning_duration) (Ok(results), explanation, planning_duration)
} }
}; };
@ -152,36 +139,39 @@ pub(crate) fn evaluate_query(
/// Options for SPARQL query evaluation. /// Options for SPARQL query evaluation.
/// ///
/// ///
/// If the `"http_client"` optional feature is enabled, /// If the `"http-client"` optional feature is enabled,
/// a simple HTTP 1.1 client is used to execute [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/) SERVICE calls. /// a simple HTTP 1.1 client is used to execute [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/) SERVICE calls.
/// ///
/// Usage example disabling the federated query support: /// Usage example disabling the federated query support:
/// ``` /// ```
/// use oxigraph::store::Store;
/// use oxigraph::sparql::QueryOptions; /// use oxigraph::sparql::QueryOptions;
/// use oxigraph::store::Store;
/// ///
/// let store = Store::new()?; /// let store = Store::new()?;
/// store.query_opt( /// store.query_opt(
/// "SELECT * WHERE { SERVICE <https://query.wikidata.org/sparql> {} }", /// "SELECT * WHERE { SERVICE <https://query.wikidata.org/sparql> {} }",
/// QueryOptions::default().without_service_handler() /// QueryOptions::default().without_service_handler(),
/// )?; /// )?;
/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) /// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ``` /// ```
#[derive(Clone, Default)] #[derive(Clone, Default)]
pub struct QueryOptions { pub struct QueryOptions {
service_handler: Option<Rc<dyn ServiceHandler<Error = EvaluationError>>>, service_handler: Option<Arc<dyn ServiceHandler<Error = EvaluationError>>>,
custom_functions: HashMap<NamedNode, Rc<dyn Fn(&[Term]) -> Option<Term>>>, custom_functions: CustomFunctionRegistry,
http_timeout: Option<Duration>, http_timeout: Option<Duration>,
http_redirection_limit: usize, http_redirection_limit: usize,
without_optimizations: bool, without_optimizations: bool,
} }
pub(crate) type CustomFunctionRegistry =
HashMap<NamedNode, Arc<dyn (Fn(&[Term]) -> Option<Term>) + Send + Sync>>;
impl QueryOptions { impl QueryOptions {
/// Use a given [`ServiceHandler`] to execute [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/) SERVICE calls. /// Use a given [`ServiceHandler`] to execute [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/) SERVICE calls.
#[inline] #[inline]
#[must_use] #[must_use]
pub fn with_service_handler(mut self, service_handler: impl ServiceHandler + 'static) -> Self { pub fn with_service_handler(mut self, service_handler: impl ServiceHandler + 'static) -> Self {
self.service_handler = Some(Rc::new(ErrorConversionServiceHandler::wrap( self.service_handler = Some(Arc::new(ErrorConversionServiceHandler::wrap(
service_handler, service_handler,
))); )));
self self
@ -191,12 +181,12 @@ impl QueryOptions {
#[inline] #[inline]
#[must_use] #[must_use]
pub fn without_service_handler(mut self) -> Self { pub fn without_service_handler(mut self) -> Self {
self.service_handler = Some(Rc::new(EmptyServiceHandler)); self.service_handler = Some(Arc::new(EmptyServiceHandler));
self self
} }
/// Sets a timeout for HTTP requests done during SPARQL evaluation. /// Sets a timeout for HTTP requests done during SPARQL evaluation.
#[cfg(feature = "http_client")] #[cfg(feature = "http-client")]
#[inline] #[inline]
#[must_use] #[must_use]
pub fn with_http_timeout(mut self, timeout: Duration) -> Self { pub fn with_http_timeout(mut self, timeout: Duration) -> Self {
@ -207,7 +197,7 @@ impl QueryOptions {
/// Sets an upper bound of the number of HTTP redirection followed per HTTP request done during SPARQL evaluation. /// Sets an upper bound of the number of HTTP redirection followed per HTTP request done during SPARQL evaluation.
/// ///
/// By default this value is `0`. /// By default this value is `0`.
#[cfg(feature = "http_client")] #[cfg(feature = "http-client")]
#[inline] #[inline]
#[must_use] #[must_use]
pub fn with_http_redirection_limit(mut self, redirection_limit: usize) -> Self { pub fn with_http_redirection_limit(mut self, redirection_limit: usize) -> Self {
@ -219,9 +209,9 @@ impl QueryOptions {
/// ///
/// Example with a function serializing terms to N-Triples: /// Example with a function serializing terms to N-Triples:
/// ``` /// ```
/// use oxigraph::store::Store;
/// use oxigraph::model::*; /// use oxigraph::model::*;
/// use oxigraph::sparql::{QueryOptions, QueryResults}; /// use oxigraph::sparql::{QueryOptions, QueryResults};
/// use oxigraph::store::Store;
/// ///
/// let store = Store::new()?; /// let store = Store::new()?;
/// ///
@ -229,10 +219,13 @@ impl QueryOptions {
/// "SELECT (<http://www.w3.org/ns/formats/N-Triples>(1) AS ?nt) WHERE {}", /// "SELECT (<http://www.w3.org/ns/formats/N-Triples>(1) AS ?nt) WHERE {}",
/// QueryOptions::default().with_custom_function( /// QueryOptions::default().with_custom_function(
/// NamedNode::new("http://www.w3.org/ns/formats/N-Triples")?, /// NamedNode::new("http://www.w3.org/ns/formats/N-Triples")?,
/// |args| args.get(0).map(|t| Literal::from(t.to_string()).into()) /// |args| args.get(0).map(|t| Literal::from(t.to_string()).into()),
/// ) /// ),
/// )? { /// )? {
/// assert_eq!(solutions.next().unwrap()?.get("nt"), Some(&Literal::from("\"1\"^^<http://www.w3.org/2001/XMLSchema#integer>").into())); /// assert_eq!(
/// solutions.next().unwrap()?.get("nt"),
/// Some(&Literal::from("\"1\"^^<http://www.w3.org/2001/XMLSchema#integer>").into())
/// );
/// } /// }
/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) /// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ``` /// ```
@ -241,21 +234,21 @@ impl QueryOptions {
pub fn with_custom_function( pub fn with_custom_function(
mut self, mut self,
name: NamedNode, name: NamedNode,
evaluator: impl Fn(&[Term]) -> Option<Term> + 'static, evaluator: impl Fn(&[Term]) -> Option<Term> + Send + Sync + 'static,
) -> Self { ) -> Self {
self.custom_functions.insert(name, Rc::new(evaluator)); self.custom_functions.insert(name, Arc::new(evaluator));
self self
} }
fn service_handler(&self) -> Rc<dyn ServiceHandler<Error = EvaluationError>> { fn service_handler(&self) -> Arc<dyn ServiceHandler<Error = EvaluationError>> {
self.service_handler.clone().unwrap_or_else(|| { self.service_handler.clone().unwrap_or_else(|| {
if cfg!(feature = "http_client") { if cfg!(feature = "http-client") {
Rc::new(service::SimpleServiceHandler::new( Arc::new(service::SimpleServiceHandler::new(
self.http_timeout, self.http_timeout,
self.http_redirection_limit, self.http_redirection_limit,
)) ))
} else { } else {
Rc::new(EmptyServiceHandler) Arc::new(EmptyServiceHandler)
} }
}) })
} }
@ -285,28 +278,30 @@ impl From<QueryOptions> for UpdateOptions {
/// The explanation of a query. /// The explanation of a query.
#[derive(Clone)] #[derive(Clone)]
pub struct QueryExplanation { pub struct QueryExplanation {
inner: Rc<PlanNodeWithStats>, inner: Rc<EvalNodeWithStats>,
with_stats: bool, with_stats: bool,
parsing_duration: Option<Duration>, parsing_duration: Option<DayTimeDuration>,
planning_duration: Duration, planning_duration: Option<DayTimeDuration>,
} }
impl QueryExplanation { impl QueryExplanation {
/// Writes the explanation as JSON. /// Writes the explanation as JSON.
pub fn write_in_json(&self, output: impl io::Write) -> io::Result<()> { pub fn write_in_json(&self, write: impl io::Write) -> io::Result<()> {
let mut writer = JsonWriter::from_writer(output); let mut writer = ToWriteJsonWriter::new(write);
writer.write_event(JsonEvent::StartObject)?; writer.write_event(JsonEvent::StartObject)?;
if let Some(parsing_duration) = self.parsing_duration { if let Some(parsing_duration) = self.parsing_duration {
writer.write_event(JsonEvent::ObjectKey("parsing duration in seconds"))?; writer.write_event(JsonEvent::ObjectKey("parsing duration in seconds".into()))?;
writer.write_event(JsonEvent::Number( writer.write_event(JsonEvent::Number(
&parsing_duration.as_secs_f32().to_string(), parsing_duration.as_seconds().to_string().into(),
))?; ))?;
} }
writer.write_event(JsonEvent::ObjectKey("planning duration in seconds"))?; if let Some(planning_duration) = self.planning_duration {
writer.write_event(JsonEvent::Number( writer.write_event(JsonEvent::ObjectKey("planning duration in seconds".into()))?;
&self.planning_duration.as_secs_f32().to_string(), writer.write_event(JsonEvent::Number(
))?; planning_duration.as_seconds().to_string().into(),
writer.write_event(JsonEvent::ObjectKey("plan"))?; ))?;
}
writer.write_event(JsonEvent::ObjectKey("plan".into()))?;
self.inner.json_node(&mut writer, self.with_stats)?; self.inner.json_node(&mut writer, self.with_stats)?;
writer.write_event(JsonEvent::EndObject) writer.write_event(JsonEvent::EndObject)
} }
@ -314,6 +309,20 @@ impl QueryExplanation {
impl fmt::Debug for QueryExplanation { impl fmt::Debug for QueryExplanation {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{:?}", self.inner) let mut obj = f.debug_struct("QueryExplanation");
if let Some(parsing_duration) = self.parsing_duration {
obj.field(
"parsing duration in seconds",
&f32::from(Float::from(parsing_duration.as_seconds())),
);
}
if let Some(planning_duration) = self.planning_duration {
obj.field(
"planning duration in seconds",
&f32::from(Float::from(planning_duration.as_seconds())),
);
}
obj.field("tree", &self.inner);
obj.finish_non_exhaustive()
} }
} }

@ -0,0 +1,373 @@
use crate::io::{RdfFormat, RdfSerializer};
use crate::model::*;
use crate::sparql::error::EvaluationError;
use crate::sparql::results::{
FromReadQueryResultsReader, FromReadSolutionsReader, QueryResultsFormat,
QueryResultsParseError, QueryResultsParser, QueryResultsSerializer,
};
use oxrdf::{Variable, VariableRef};
pub use sparesults::QuerySolution;
use std::io::{Read, Write};
use std::sync::Arc;
/// Results of a [SPARQL query](https://www.w3.org/TR/sparql11-query/).
pub enum QueryResults {
/// Results of a [SELECT](https://www.w3.org/TR/sparql11-query/#select) query.
Solutions(QuerySolutionIter),
/// Result of a [ASK](https://www.w3.org/TR/sparql11-query/#ask) query.
Boolean(bool),
/// Results of a [CONSTRUCT](https://www.w3.org/TR/sparql11-query/#construct) or [DESCRIBE](https://www.w3.org/TR/sparql11-query/#describe) query.
Graph(QueryTripleIter),
}
impl QueryResults {
/// Reads a SPARQL query results serialization.
pub fn read(
read: impl Read + 'static,
format: QueryResultsFormat,
) -> Result<Self, QueryResultsParseError> {
Ok(QueryResultsParser::from_format(format)
.parse_read(read)?
.into())
}
/// Writes the query results (solutions or boolean).
///
/// This method fails if it is called on the `Graph` results.
///
/// ```
/// use oxigraph::store::Store;
/// use oxigraph::model::*;
/// use oxigraph::sparql::results::QueryResultsFormat;
///
/// let store = Store::new()?;
/// let ex = NamedNodeRef::new("http://example.com")?;
/// store.insert(QuadRef::new(ex, ex, ex, GraphNameRef::DefaultGraph))?;
///
/// let results = store.query("SELECT ?s WHERE { ?s ?p ?o }")?;
/// assert_eq!(
/// results.write(Vec::new(), QueryResultsFormat::Json)?,
/// r#"{"head":{"vars":["s"]},"results":{"bindings":[{"s":{"type":"uri","value":"http://example.com"}}]}}"#.as_bytes()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn write<W: Write>(
self,
write: W,
format: QueryResultsFormat,
) -> Result<W, EvaluationError> {
let serializer = QueryResultsSerializer::from_format(format);
match self {
Self::Boolean(value) => serializer.serialize_boolean_to_write(write, value),
Self::Solutions(solutions) => {
let mut writer = serializer
.serialize_solutions_to_write(write, solutions.variables().to_vec())
.map_err(EvaluationError::ResultsSerialization)?;
for solution in solutions {
writer
.write(&solution?)
.map_err(EvaluationError::ResultsSerialization)?;
}
writer.finish()
}
Self::Graph(triples) => {
let s = VariableRef::new_unchecked("subject");
let p = VariableRef::new_unchecked("predicate");
let o = VariableRef::new_unchecked("object");
let mut writer = serializer
.serialize_solutions_to_write(
write,
vec![s.into_owned(), p.into_owned(), o.into_owned()],
)
.map_err(EvaluationError::ResultsSerialization)?;
for triple in triples {
let triple = triple?;
writer
.write([
(s, &triple.subject.into()),
(p, &triple.predicate.into()),
(o, &triple.object),
])
.map_err(EvaluationError::ResultsSerialization)?;
}
writer.finish()
}
}
.map_err(EvaluationError::ResultsSerialization)
}
/// Writes the graph query results.
///
/// This method fails if it is called on the `Solution` or `Boolean` results.
///
/// ```
/// use oxigraph::io::RdfFormat;
/// use oxigraph::model::*;
/// use oxigraph::store::Store;
///
/// let graph = "<http://example.com> <http://example.com> <http://example.com> .\n";
///
/// let store = Store::new()?;
/// store.load_graph(
/// graph.as_bytes(),
/// RdfFormat::NTriples,
/// GraphName::DefaultGraph,
/// None,
/// )?;
///
/// let results = store.query("CONSTRUCT WHERE { ?s ?p ?o }")?;
/// assert_eq!(
/// results.write_graph(Vec::new(), RdfFormat::NTriples)?,
/// graph.as_bytes()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn write_graph<W: Write>(
self,
write: W,
format: impl Into<RdfFormat>,
) -> Result<W, EvaluationError> {
if let Self::Graph(triples) = self {
let mut writer = RdfSerializer::from_format(format.into()).serialize_to_write(write);
for triple in triples {
writer
.write_triple(&triple?)
.map_err(EvaluationError::ResultsSerialization)?;
}
writer
.finish()
.map_err(EvaluationError::ResultsSerialization)
} else {
Err(EvaluationError::NotAGraph)
}
}
}
impl From<QuerySolutionIter> for QueryResults {
#[inline]
fn from(value: QuerySolutionIter) -> Self {
Self::Solutions(value)
}
}
impl<R: Read + 'static> From<FromReadQueryResultsReader<R>> for QueryResults {
fn from(reader: FromReadQueryResultsReader<R>) -> Self {
match reader {
FromReadQueryResultsReader::Solutions(s) => Self::Solutions(s.into()),
FromReadQueryResultsReader::Boolean(v) => Self::Boolean(v),
}
}
}
/// An iterator over [`QuerySolution`]s.
///
/// ```
/// use oxigraph::sparql::QueryResults;
/// use oxigraph::store::Store;
///
/// let store = Store::new()?;
/// if let QueryResults::Solutions(solutions) = store.query("SELECT ?s WHERE { ?s ?p ?o }")? {
/// for solution in solutions {
/// println!("{:?}", solution?.get("s"));
/// }
/// }
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct QuerySolutionIter {
variables: Arc<[Variable]>,
iter: Box<dyn Iterator<Item = Result<QuerySolution, EvaluationError>>>,
}
impl QuerySolutionIter {
/// Construct a new iterator of solution from an ordered list of solution variables and an iterator of solution tuples
/// (each tuple using the same ordering as the variable list such that tuple element 0 is the value for the variable 0...)
pub fn new(
variables: Arc<[Variable]>,
iter: impl Iterator<Item = Result<Vec<Option<Term>>, EvaluationError>> + 'static,
) -> Self {
Self {
variables: Arc::clone(&variables),
iter: Box::new(
iter.map(move |t| t.map(|values| (Arc::clone(&variables), values).into())),
),
}
}
/// The variables used in the solutions.
///
/// ```
/// use oxigraph::sparql::{QueryResults, Variable};
/// use oxigraph::store::Store;
///
/// let store = Store::new()?;
/// if let QueryResults::Solutions(solutions) = store.query("SELECT ?s ?o WHERE { ?s ?p ?o }")? {
/// assert_eq!(
/// solutions.variables(),
/// &[Variable::new("s")?, Variable::new("o")?]
/// );
/// }
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn variables(&self) -> &[Variable] {
&self.variables
}
}
impl<R: Read + 'static> From<FromReadSolutionsReader<R>> for QuerySolutionIter {
fn from(reader: FromReadSolutionsReader<R>) -> Self {
Self {
variables: reader.variables().into(),
iter: Box::new(reader.map(|t| t.map_err(EvaluationError::from))),
}
}
}
impl Iterator for QuerySolutionIter {
type Item = Result<QuerySolution, EvaluationError>;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.iter.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
/// An iterator over the triples that compose a graph solution.
///
/// ```
/// use oxigraph::sparql::QueryResults;
/// use oxigraph::store::Store;
///
/// let store = Store::new()?;
/// if let QueryResults::Graph(triples) = store.query("CONSTRUCT WHERE { ?s ?p ?o }")? {
/// for triple in triples {
/// println!("{}", triple?);
/// }
/// }
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct QueryTripleIter {
pub(crate) iter: Box<dyn Iterator<Item = Result<Triple, EvaluationError>>>,
}
impl Iterator for QueryTripleIter {
type Item = Result<Triple, EvaluationError>;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.iter.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
#[inline]
fn fold<Acc, G>(self, init: Acc, g: G) -> Acc
where
G: FnMut(Acc, Self::Item) -> Acc,
{
self.iter.fold(init, g)
}
}
#[cfg(test)]
mod tests {
#![allow(clippy::panic_in_result_fn)]
use super::*;
use std::io::Cursor;
#[test]
fn test_serialization_roundtrip() -> Result<(), EvaluationError> {
use std::str;
for format in [
QueryResultsFormat::Json,
QueryResultsFormat::Xml,
QueryResultsFormat::Tsv,
] {
let results = vec![
QueryResults::Boolean(true),
QueryResults::Boolean(false),
QueryResults::Solutions(QuerySolutionIter::new(
[
Variable::new_unchecked("foo"),
Variable::new_unchecked("bar"),
]
.as_ref()
.into(),
Box::new(
vec![
Ok(vec![None, None]),
Ok(vec![
Some(NamedNode::new_unchecked("http://example.com").into()),
None,
]),
Ok(vec![
None,
Some(NamedNode::new_unchecked("http://example.com").into()),
]),
Ok(vec![
Some(BlankNode::new_unchecked("foo").into()),
Some(BlankNode::new_unchecked("bar").into()),
]),
Ok(vec![Some(Literal::new_simple_literal("foo").into()), None]),
Ok(vec![
Some(
Literal::new_language_tagged_literal_unchecked("foo", "fr")
.into(),
),
None,
]),
Ok(vec![
Some(Literal::from(1).into()),
Some(Literal::from(true).into()),
]),
Ok(vec![
Some(Literal::from(1.33).into()),
Some(Literal::from(false).into()),
]),
Ok(vec![
Some(
Triple::new(
NamedNode::new_unchecked("http://example.com/s"),
NamedNode::new_unchecked("http://example.com/p"),
Triple::new(
NamedNode::new_unchecked("http://example.com/os"),
NamedNode::new_unchecked("http://example.com/op"),
NamedNode::new_unchecked("http://example.com/oo"),
),
)
.into(),
),
None,
]),
]
.into_iter(),
),
)),
];
for ex in results {
let mut buffer = Vec::new();
ex.write(&mut buffer, format)?;
let ex2 = QueryResults::read(Cursor::new(buffer.clone()), format)?;
let mut buffer2 = Vec::new();
ex2.write(&mut buffer2, format)?;
assert_eq!(
str::from_utf8(&buffer).unwrap(),
str::from_utf8(&buffer2).unwrap()
);
}
}
Ok(())
}
}

@ -0,0 +1,44 @@
//! Utilities to read and write RDF results formats using [sparesults](https://crates.io/crates/sparesults).
//!
//! It supports [SPARQL Query Results XML Format (Second Edition)](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/).
//!
//! Usage example converting a JSON result file into a TSV result file:
//!
//! ```
//! use oxigraph::sparql::results::{QueryResultsFormat, QueryResultsParser, FromReadQueryResultsReader, QueryResultsSerializer};
//! use std::io::Result;
//!
//! fn convert_json_to_tsv(json_file: &[u8]) -> Result<Vec<u8>> {
//! let json_parser = QueryResultsParser::from_format(QueryResultsFormat::Json);
//! let tsv_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Tsv);
//! // We start to read the JSON file and see which kind of results it is
//! match json_parser.parse_read(json_file)? {
//! FromReadQueryResultsReader::Boolean(value) => {
//! // it's a boolean result, we copy it in TSV to the output buffer
//! tsv_serializer.serialize_boolean_to_write(Vec::new(), value)
//! }
//! FromReadQueryResultsReader::Solutions(solutions_reader) => {
//! // it's a set of solutions, we create a writer and we write to it while reading in streaming from the JSON file
//! let mut serialize_solutions_to_write = tsv_serializer.serialize_solutions_to_write(Vec::new(), solutions_reader.variables().to_vec())?;
//! for solution in solutions_reader {
//! serialize_solutions_to_write.write(&solution?)?;
//! }
//! serialize_solutions_to_write.finish()
//! }
//! }
//! }
//!
//! // Let's test with a boolean
//! assert_eq!(
//! convert_json_to_tsv(br#"{"boolean":true}"#.as_slice()).unwrap(),
//! b"true"
//! );
//!
//! // And with a set of solutions
//! assert_eq!(
//! convert_json_to_tsv(br#"{"head":{"vars":["foo","bar"]},"results":{"bindings":[{"foo":{"type":"literal","value":"test"}}]}}"#.as_slice()).unwrap(),
//! b"?foo\t?bar\n\"test\"\t\n"
//! );
//! ```
pub use sparesults::*;

@ -3,9 +3,8 @@ use crate::sparql::algebra::Query;
use crate::sparql::error::EvaluationError; use crate::sparql::error::EvaluationError;
use crate::sparql::http::Client; use crate::sparql::http::Client;
use crate::sparql::model::QueryResults; use crate::sparql::model::QueryResults;
use crate::sparql::QueryResultsFormat; use crate::sparql::results::QueryResultsFormat;
use std::error::Error; use std::error::Error;
use std::io::BufReader;
use std::time::Duration; use std::time::Duration;
/// Handler for [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/) SERVICE. /// Handler for [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/) SERVICE.
@ -14,18 +13,22 @@ use std::time::Duration;
/// before evaluating a SPARQL query that uses SERVICE calls. /// before evaluating a SPARQL query that uses SERVICE calls.
/// ///
/// ``` /// ```
/// use oxigraph::store::Store;
/// use oxigraph::model::*; /// use oxigraph::model::*;
/// use oxigraph::sparql::{QueryOptions, QueryResults, ServiceHandler, Query, EvaluationError}; /// use oxigraph::sparql::{EvaluationError, Query, QueryOptions, QueryResults, ServiceHandler};
/// use oxigraph::store::Store;
/// ///
/// struct TestServiceHandler { /// struct TestServiceHandler {
/// store: Store /// store: Store,
/// } /// }
/// ///
/// impl ServiceHandler for TestServiceHandler { /// impl ServiceHandler for TestServiceHandler {
/// type Error = EvaluationError; /// type Error = EvaluationError;
/// ///
/// fn handle(&self,service_name: NamedNode, query: Query) -> Result<QueryResults,EvaluationError> { /// fn handle(
/// &self,
/// service_name: NamedNode,
/// query: Query,
/// ) -> Result<QueryResults, Self::Error> {
/// if service_name == "http://example.com/service" { /// if service_name == "http://example.com/service" {
/// self.store.query(query) /// self.store.query(query)
/// } else { /// } else {
@ -36,20 +39,23 @@ use std::time::Duration;
/// ///
/// let store = Store::new()?; /// let store = Store::new()?;
/// let service = TestServiceHandler { /// let service = TestServiceHandler {
/// store: Store::new()? /// store: Store::new()?,
/// }; /// };
/// let ex = NamedNodeRef::new("http://example.com")?; /// let ex = NamedNodeRef::new("http://example.com")?;
/// service.store.insert(QuadRef::new(ex, ex, ex, GraphNameRef::DefaultGraph))?; /// service
/// .store
/// .insert(QuadRef::new(ex, ex, ex, GraphNameRef::DefaultGraph))?;
/// ///
/// if let QueryResults::Solutions(mut solutions) = store.query_opt( /// if let QueryResults::Solutions(mut solutions) = store.query_opt(
/// "SELECT ?s WHERE { SERVICE <http://example.com/service> { ?s ?p ?o } }", /// "SELECT ?s WHERE { SERVICE <http://example.com/service> { ?s ?p ?o } }",
/// QueryOptions::default().with_service_handler(service) /// QueryOptions::default().with_service_handler(service),
/// )? { /// )? {
/// assert_eq!(solutions.next().unwrap()?.get("s"), Some(&ex.into())); /// assert_eq!(solutions.next().unwrap()?.get("s"), Some(&ex.into()));
/// } /// }
/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) /// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ``` /// ```
pub trait ServiceHandler { pub trait ServiceHandler: Send + Sync {
/// The service evaluation error.
type Error: Error + Send + Sync + 'static; type Error: Error + Send + Sync + 'static;
/// Evaluates a [`Query`] against a given service identified by a [`NamedNode`]. /// Evaluates a [`Query`] against a given service identified by a [`NamedNode`].
@ -61,10 +67,8 @@ pub struct EmptyServiceHandler;
impl ServiceHandler for EmptyServiceHandler { impl ServiceHandler for EmptyServiceHandler {
type Error = EvaluationError; type Error = EvaluationError;
fn handle(&self, _: NamedNode, _: Query) -> Result<QueryResults, EvaluationError> { fn handle(&self, name: NamedNode, _: Query) -> Result<QueryResults, Self::Error> {
Err(EvaluationError::msg( Err(EvaluationError::UnsupportedService(name))
"The SERVICE feature is not implemented",
))
} }
} }
@ -81,14 +85,10 @@ impl<S: ServiceHandler> ErrorConversionServiceHandler<S> {
impl<S: ServiceHandler> ServiceHandler for ErrorConversionServiceHandler<S> { impl<S: ServiceHandler> ServiceHandler for ErrorConversionServiceHandler<S> {
type Error = EvaluationError; type Error = EvaluationError;
fn handle( fn handle(&self, service_name: NamedNode, query: Query) -> Result<QueryResults, Self::Error> {
&self,
service_name: NamedNode,
query: Query,
) -> Result<QueryResults, EvaluationError> {
self.handler self.handler
.handle(service_name, query) .handle(service_name, query)
.map_err(EvaluationError::wrap) .map_err(|e| EvaluationError::Service(Box::new(e)))
} }
} }
@ -107,22 +107,18 @@ impl SimpleServiceHandler {
impl ServiceHandler for SimpleServiceHandler { impl ServiceHandler for SimpleServiceHandler {
type Error = EvaluationError; type Error = EvaluationError;
fn handle( fn handle(&self, service_name: NamedNode, query: Query) -> Result<QueryResults, Self::Error> {
&self, let (content_type, body) = self
service_name: NamedNode, .client
query: Query, .post(
) -> Result<QueryResults, EvaluationError> { service_name.as_str(),
let (content_type, body) = self.client.post( query.to_string().into_bytes(),
service_name.as_str(), "application/sparql-query",
query.to_string().into_bytes(), "application/sparql-results+json, application/sparql-results+xml",
"application/sparql-query", )
"application/sparql-results+json, application/sparql-results+xml", .map_err(|e| EvaluationError::Service(Box::new(e)))?;
)?; let format = QueryResultsFormat::from_media_type(&content_type)
let format = QueryResultsFormat::from_media_type(&content_type).ok_or_else(|| { .ok_or_else(|| EvaluationError::UnsupportedContentType(content_type))?;
EvaluationError::msg(format!( Ok(QueryResults::read(body, format)?)
"Unsupported Content-Type returned by {service_name}: {content_type}"
))
})?;
Ok(QueryResults::read(BufReader::new(body), format)?)
} }
} }

@ -1,12 +1,9 @@
use crate::io::read::ParseError; use crate::io::{RdfFormat, RdfParser};
use crate::io::{GraphFormat, GraphParser};
use crate::model::{GraphName as OxGraphName, GraphNameRef, Quad as OxQuad}; use crate::model::{GraphName as OxGraphName, GraphNameRef, Quad as OxQuad};
use crate::sparql::algebra::QueryDataset; use crate::sparql::algebra::QueryDataset;
use crate::sparql::dataset::DatasetView; use crate::sparql::dataset::DatasetView;
use crate::sparql::eval::SimpleEvaluator; use crate::sparql::eval::{EncodedTuple, SimpleEvaluator};
use crate::sparql::http::Client; use crate::sparql::http::Client;
use crate::sparql::plan::EncodedTuple;
use crate::sparql::plan_builder::PlanBuilder;
use crate::sparql::{EvaluationError, Update, UpdateOptions}; use crate::sparql::{EvaluationError, Update, UpdateOptions};
use crate::storage::numeric_encoder::{Decoder, EncodedTerm}; use crate::storage::numeric_encoder::{Decoder, EncodedTerm};
use crate::storage::StorageWriter; use crate::storage::StorageWriter;
@ -18,9 +15,11 @@ use spargebra::term::{
Quad, QuadPattern, Subject, Term, TermPattern, Triple, TriplePattern, Variable, Quad, QuadPattern, Subject, Term, TermPattern, Triple, TriplePattern, Variable,
}; };
use spargebra::GraphUpdateOperation; use spargebra::GraphUpdateOperation;
use sparopt::Optimizer;
use std::collections::HashMap; use std::collections::HashMap;
use std::io::BufReader; use std::io;
use std::rc::Rc; use std::rc::Rc;
use std::sync::Arc;
pub fn evaluate_update<'a, 'b: 'a>( pub fn evaluate_update<'a, 'b: 'a>(
transaction: &'a mut StorageWriter<'b>, transaction: &'a mut StorageWriter<'b>,
@ -74,9 +73,7 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
} => self.eval_delete_insert( } => self.eval_delete_insert(
delete, delete,
insert, insert,
using_dataset using_dataset.as_ref().unwrap_or(&QueryDataset::new()),
.as_ref()
.ok_or_else(|| EvaluationError::msg("No dataset"))?,
pattern, pattern,
), ),
GraphUpdateOperation::Load { GraphUpdateOperation::Load {
@ -125,24 +122,24 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
algebra: &GraphPattern, algebra: &GraphPattern,
) -> Result<(), EvaluationError> { ) -> Result<(), EvaluationError> {
let dataset = Rc::new(DatasetView::new(self.transaction.reader(), using)); let dataset = Rc::new(DatasetView::new(self.transaction.reader(), using));
let (plan, variables) = PlanBuilder::build( let mut pattern = sparopt::algebra::GraphPattern::from(algebra);
&dataset, if !self.options.query_options.without_optimizations {
algebra, pattern = Optimizer::optimize_graph_pattern(sparopt::algebra::GraphPattern::Reduced {
false, inner: Box::new(pattern),
&self.options.query_options.custom_functions, });
!self.options.query_options.without_optimizations, }
)?;
let evaluator = SimpleEvaluator::new( let evaluator = SimpleEvaluator::new(
Rc::clone(&dataset), Rc::clone(&dataset),
self.base_iri.clone(), self.base_iri.clone(),
self.options.query_options.service_handler(), self.options.query_options.service_handler(),
Rc::new(self.options.query_options.custom_functions.clone()), Arc::new(self.options.query_options.custom_functions.clone()),
false, false,
); );
let mut variables = Vec::new();
let mut bnodes = HashMap::new(); let mut bnodes = HashMap::new();
let (eval, _) = evaluator.plan_evaluator(Rc::new(plan)); let (eval, _) = evaluator.graph_pattern_evaluator(&pattern, &mut variables);
let tuples = let tuples =
eval(EncodedTuple::with_capacity(variables.len())).collect::<Result<Vec<_>, _>>()?; //TODO: would be much better to stream eval(EncodedTuple::with_capacity(variables.len())).collect::<Result<Vec<_>, _>>()?; // TODO: would be much better to stream
for tuple in tuples { for tuple in tuples {
for quad in delete { for quad in delete {
if let Some(quad) = if let Some(quad) =
@ -164,28 +161,31 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
} }
fn eval_load(&mut self, from: &NamedNode, to: &GraphName) -> Result<(), EvaluationError> { fn eval_load(&mut self, from: &NamedNode, to: &GraphName) -> Result<(), EvaluationError> {
let (content_type, body) = self.client.get( let (content_type, body) = self
from.as_str(), .client
"application/n-triples, text/turtle, application/rdf+xml", .get(
)?; from.as_str(),
let format = GraphFormat::from_media_type(&content_type).ok_or_else(|| { "application/n-triples, text/turtle, application/rdf+xml",
EvaluationError::msg(format!( )
"Unsupported Content-Type returned by {from}: {content_type}" .map_err(|e| EvaluationError::Service(Box::new(e)))?;
)) let format = RdfFormat::from_media_type(&content_type)
})?; .ok_or_else(|| EvaluationError::UnsupportedContentType(content_type))?;
let to_graph_name = match to { let to_graph_name = match to {
GraphName::NamedNode(graph_name) => graph_name.into(), GraphName::NamedNode(graph_name) => graph_name.into(),
GraphName::DefaultGraph => GraphNameRef::DefaultGraph, GraphName::DefaultGraph => GraphNameRef::DefaultGraph,
}; };
let mut parser = GraphParser::from_format(format); let mut parser = RdfParser::from_format(format)
if let Some(base_iri) = &self.base_iri { .rename_blank_nodes()
parser = parser .without_named_graphs()
.with_base_iri(base_iri.as_str()) .with_default_graph(to_graph_name);
.map_err(|e| ParseError::invalid_base_iri(base_iri, e))?; parser = parser.with_base_iri(from.as_str()).map_err(|e| {
} EvaluationError::Service(Box::new(io::Error::new(
for t in parser.read_triples(BufReader::new(body))? { io::ErrorKind::InvalidInput,
self.transaction format!("Invalid URL: {from}: {e}"),
.insert(t?.as_ref().in_graph(to_graph_name))?; )))
})?;
for q in parser.parse_read(body) {
self.transaction.insert(q?.as_ref())?;
} }
Ok(()) Ok(())
} }
@ -194,9 +194,7 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
if self.transaction.insert_named_graph(graph_name.into())? || silent { if self.transaction.insert_named_graph(graph_name.into())? || silent {
Ok(()) Ok(())
} else { } else {
Err(EvaluationError::msg(format!( Err(EvaluationError::GraphAlreadyExists(graph_name.clone()))
"The graph {graph_name} already exists"
)))
} }
} }
@ -212,9 +210,7 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
} else if silent { } else if silent {
Ok(()) Ok(())
} else { } else {
Err(EvaluationError::msg(format!( Err(EvaluationError::GraphDoesNotExist(graph_name.clone()))
"The graph {graph} does not exists"
)))
} }
} }
GraphTarget::DefaultGraph => { GraphTarget::DefaultGraph => {
@ -232,9 +228,7 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
if self.transaction.remove_named_graph(graph_name.into())? || silent { if self.transaction.remove_named_graph(graph_name.into())? || silent {
Ok(()) Ok(())
} else { } else {
Err(EvaluationError::msg(format!( Err(EvaluationError::GraphDoesNotExist(graph_name.clone()))
"The graph {graph_name} does not exists"
)))
} }
} }
GraphTarget::DefaultGraph => { GraphTarget::DefaultGraph => {

@ -1,6 +1,7 @@
//! TODO: This storage is dramatically naive. //! TODO: This storage is dramatically naive.
use crate::storage::StorageError; use crate::storage::StorageError;
use crate::store::CorruptionError;
use std::cell::RefCell; use std::cell::RefCell;
use std::collections::{BTreeMap, HashMap}; use std::collections::{BTreeMap, HashMap};
use std::error::Error; use std::error::Error;
@ -30,9 +31,13 @@ impl Db {
} }
#[allow(clippy::unwrap_in_result)] #[allow(clippy::unwrap_in_result)]
pub fn column_family(&self, name: &'static str) -> Option<ColumnFamily> { pub fn column_family(&self, name: &'static str) -> Result<ColumnFamily, StorageError> {
let name = ColumnFamily(name); let column_family = ColumnFamily(name);
(self.0.read().unwrap().contains_key(&name)).then(|| name) if self.0.read().unwrap().contains_key(&column_family) {
Ok(column_family)
} else {
Err(CorruptionError::from_missing_column_family_name(name).into())
}
} }
#[must_use] #[must_use]
@ -116,6 +121,7 @@ impl Reader {
} }
} }
#[allow(clippy::iter_not_returning_iterator)]
pub fn iter(&self, column_family: &ColumnFamily) -> Result<Iter, StorageError> { pub fn iter(&self, column_family: &ColumnFamily) -> Result<Iter, StorageError> {
self.scan_prefix(column_family, &[]) self.scan_prefix(column_family, &[])
} }
@ -129,9 +135,7 @@ impl Reader {
let data: Vec<_> = match &self.0 { let data: Vec<_> = match &self.0 {
InnerReader::Simple(reader) => { InnerReader::Simple(reader) => {
let trees = reader.read().unwrap(); let trees = reader.read().unwrap();
let tree = if let Some(tree) = trees.get(column_family) { let Some(tree) = trees.get(column_family) else {
tree
} else {
return Ok(Iter { return Ok(Iter {
iter: Vec::new().into_iter(), iter: Vec::new().into_iter(),
current: None, current: None,
@ -147,28 +151,25 @@ impl Reader {
} }
} }
InnerReader::Transaction(reader) => { InnerReader::Transaction(reader) => {
if let Some(reader) = reader.upgrade() { let Some(reader) = reader.upgrade() else {
let trees = (*reader).borrow();
let tree = if let Some(tree) = trees.get(column_family) {
tree
} else {
return Ok(Iter {
iter: Vec::new().into_iter(),
current: None,
});
};
if prefix.is_empty() {
tree.iter().map(|(k, v)| (k.clone(), v.clone())).collect()
} else {
tree.range(prefix.to_vec()..)
.take_while(|(k, _)| k.starts_with(prefix))
.map(|(k, v)| (k.clone(), v.clone()))
.collect()
}
} else {
return Err(StorageError::Other( return Err(StorageError::Other(
"The transaction is already ended".into(), "The transaction is already ended".into(),
)); ));
};
let trees = (*reader).borrow();
let Some(tree) = trees.get(column_family) else {
return Ok(Iter {
iter: Vec::new().into_iter(),
current: None,
});
};
if prefix.is_empty() {
tree.iter().map(|(k, v)| (k.clone(), v.clone())).collect()
} else {
tree.range(prefix.to_vec()..)
.take_while(|(k, _)| k.starts_with(prefix))
.map(|(k, v)| (k.clone(), v.clone()))
.collect()
} }
} }
}; };
@ -231,7 +232,7 @@ pub struct Transaction<'a>(
impl Transaction<'_> { impl Transaction<'_> {
#[allow(unsafe_code, clippy::useless_transmute)] #[allow(unsafe_code, clippy::useless_transmute)]
pub fn reader(&self) -> Reader { pub fn reader(&self) -> Reader {
// This transmute is safe because we take a weak reference and the only Rc reference used is guarded by the lifetime. // SAFETY: This transmute is safe because we take a weak reference and the only Rc reference used is guarded by the lifetime.
Reader(InnerReader::Transaction(Rc::downgrade(unsafe { Reader(InnerReader::Transaction(Rc::downgrade(unsafe {
transmute(&self.0) transmute(&self.0)
}))) })))

@ -4,9 +4,7 @@
#[cfg(target_family = "wasm")] #[cfg(target_family = "wasm")]
pub use fallback::{ColumnFamily, ColumnFamilyDefinition, Db, Iter, Reader, Transaction}; pub use fallback::{ColumnFamily, ColumnFamilyDefinition, Db, Iter, Reader, Transaction};
#[cfg(not(target_family = "wasm"))] #[cfg(not(target_family = "wasm"))]
pub use rocksdb::{ pub use rocksdb::{ColumnFamily, ColumnFamilyDefinition, Db, Iter, Reader, Transaction};
ColumnFamily, ColumnFamilyDefinition, Db, Iter, Reader, SstFileWriter, Transaction,
};
#[cfg(target_family = "wasm")] #[cfg(target_family = "wasm")]
mod fallback; mod fallback;

@ -1,48 +1,42 @@
//! Code inspired by [Rust RocksDB](https://github.com/rust-rocksdb/rust-rocksdb) under Apache License 2.0. //! Code inspired by [Rust RocksDB](https://github.com/rust-rocksdb/rust-rocksdb) under Apache License 2.0.
#![allow(unsafe_code, trivial_casts)] #![allow(
unsafe_code,
trivial_casts,
clippy::undocumented_unsafe_blocks,
clippy::panic_in_result_fn,
clippy::unwrap_in_result
)]
use crate::storage::error::{CorruptionError, StorageError}; use crate::storage::error::{CorruptionError, StorageError};
use lazy_static::lazy_static; use libc::{self, c_void};
use libc::{self, c_void, free};
use oxrocksdb_sys::*; use oxrocksdb_sys::*;
use rand::random; use rand::random;
use std::borrow::Borrow; use std::borrow::Borrow;
#[cfg(unix)]
use std::cmp::min; use std::cmp::min;
use std::collections::HashMap; use std::collections::HashMap;
use std::env::temp_dir; use std::env::temp_dir;
use std::error::Error; use std::error::Error;
use std::ffi::{CStr, CString}; use std::ffi::{CStr, CString};
use std::fmt;
use std::fs::remove_dir_all; use std::fs::remove_dir_all;
use std::io;
use std::marker::PhantomData; use std::marker::PhantomData;
use std::ops::Deref; use std::ops::Deref;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::rc::{Rc, Weak}; use std::rc::{Rc, Weak};
use std::sync::Arc; use std::sync::{Arc, OnceLock};
use std::thread::{available_parallelism, yield_now}; use std::thread::{available_parallelism, yield_now};
use std::{ptr, slice}; use std::{fmt, io, ptr, slice};
macro_rules! ffi_result { macro_rules! ffi_result {
( $($function:ident)::*() ) => { ( $($function:ident)::*( $arg1:expr $(, $arg:expr)* $(,)? ) ) => {{
ffi_result_impl!($($function)::*())
};
( $($function:ident)::*( $arg1:expr $(, $arg:expr)* $(,)? ) ) => {
ffi_result_impl!($($function)::*($arg1 $(, $arg)* ,))
};
}
macro_rules! ffi_result_impl {
( $($function:ident)::*( $($arg:expr,)*) ) => {{
let mut status = rocksdb_status_t { let mut status = rocksdb_status_t {
code: rocksdb_status_code_t_rocksdb_status_code_ok, code: rocksdb_status_code_t_rocksdb_status_code_ok,
subcode: rocksdb_status_subcode_t_rocksdb_status_subcode_none, subcode: rocksdb_status_subcode_t_rocksdb_status_subcode_none,
severity: rocksdb_status_severity_t_rocksdb_status_severity_none, severity: rocksdb_status_severity_t_rocksdb_status_severity_none,
string: ptr::null() string: ptr::null()
}; };
let result = $($function)::*($($arg,)* &mut status); let result = $($function)::*($arg1 $(, $arg)* , &mut status);
if status.code == rocksdb_status_code_t_rocksdb_status_code_ok { if status.code == rocksdb_status_code_t_rocksdb_status_code_ok {
Ok(result) Ok(result)
} else { } else {
@ -51,23 +45,6 @@ macro_rules! ffi_result_impl {
}} }}
} }
lazy_static! {
static ref ROCKSDB_ENV: UnsafeEnv = {
unsafe {
let env = rocksdb_create_default_env();
assert!(!env.is_null(), "rocksdb_create_default_env returned null");
UnsafeEnv(env)
}
};
static ref ROCKSDB_MEM_ENV: UnsafeEnv = {
unsafe {
let env = rocksdb_create_mem_env();
assert!(!env.is_null(), "rocksdb_create_mem_env returned null");
UnsafeEnv(env)
}
};
}
pub struct ColumnFamilyDefinition { pub struct ColumnFamilyDefinition {
pub name: &'static str, pub name: &'static str,
pub use_iter: bool, pub use_iter: bool,
@ -131,8 +108,7 @@ impl Drop for RwDbHandler {
rocksdb_block_based_options_destroy(self.block_based_table_options); rocksdb_block_based_options_destroy(self.block_based_table_options);
} }
if self.in_memory { if self.in_memory {
#[allow(clippy::let_underscore_must_use)] drop(remove_dir_all(&self.path));
let _ = remove_dir_all(&self.path);
} }
} }
} }
@ -166,8 +142,7 @@ impl Drop for RoDbHandler {
rocksdb_options_destroy(self.options); rocksdb_options_destroy(self.options);
} }
if let Some(path) = &self.path_to_remove { if let Some(path) = &self.path_to_remove {
#[allow(clippy::let_underscore_must_use)] drop(remove_dir_all(path));
let _ = remove_dir_all(path);
} }
} }
} }
@ -212,7 +187,7 @@ impl Db {
16, 16,
); );
rocksdb_options_set_block_based_table_factory(options, block_based_table_options); rocksdb_options_set_block_based_table_factory(options, block_based_table_options);
#[cfg(feature = "rocksdb_debug")] #[cfg(feature = "rocksdb-debug")]
{ {
rocksdb_options_set_info_log_level(options, 0); rocksdb_options_set_info_log_level(options, 0);
rocksdb_options_enable_statistics(options); rocksdb_options_enable_statistics(options);
@ -393,7 +368,7 @@ impl Db {
cf_handles, cf_handles,
cf_options, cf_options,
is_secondary: true, is_secondary: true,
path_to_remove: in_memory.then(|| secondary_path), path_to_remove: in_memory.then_some(secondary_path),
})), })),
}) })
} }
@ -466,6 +441,9 @@ impl Db {
limit_max_open_files: bool, limit_max_open_files: bool,
in_memory: bool, in_memory: bool,
) -> Result<*mut rocksdb_options_t, StorageError> { ) -> Result<*mut rocksdb_options_t, StorageError> {
static ROCKSDB_ENV: OnceLock<UnsafeEnv> = OnceLock::new();
static ROCKSDB_MEM_ENV: OnceLock<UnsafeEnv> = OnceLock::new();
unsafe { unsafe {
let options = rocksdb_options_create(); let options = rocksdb_options_create();
assert!(!options.is_null(), "rocksdb_options_create returned null"); assert!(!options.is_null(), "rocksdb_options_create returned null");
@ -502,10 +480,19 @@ impl Db {
rocksdb_options_set_env( rocksdb_options_set_env(
options, options,
if in_memory { if in_memory {
ROCKSDB_MEM_ENV.0 ROCKSDB_MEM_ENV.get_or_init(|| {
let env = rocksdb_create_mem_env();
assert!(!env.is_null(), "rocksdb_create_mem_env returned null");
UnsafeEnv(env)
})
} else { } else {
ROCKSDB_ENV.0 ROCKSDB_ENV.get_or_init(|| {
}, let env = rocksdb_create_default_env();
assert!(!env.is_null(), "rocksdb_create_default_env returned null");
UnsafeEnv(env)
})
}
.0,
); );
Ok(options) Ok(options)
} }
@ -551,17 +538,17 @@ impl Db {
(column_family_names, c_column_family_names, cf_options) (column_family_names, c_column_family_names, cf_options)
} }
pub fn column_family(&self, name: &'static str) -> Option<ColumnFamily> { pub fn column_family(&self, name: &'static str) -> Result<ColumnFamily, StorageError> {
let (column_family_names, cf_handles) = match &self.inner { let (column_family_names, cf_handles) = match &self.inner {
DbKind::ReadOnly(db) => (&db.column_family_names, &db.cf_handles), DbKind::ReadOnly(db) => (&db.column_family_names, &db.cf_handles),
DbKind::ReadWrite(db) => (&db.column_family_names, &db.cf_handles), DbKind::ReadWrite(db) => (&db.column_family_names, &db.cf_handles),
}; };
for (cf, cf_handle) in column_family_names.iter().zip(cf_handles) { for (cf, cf_handle) in column_family_names.iter().zip(cf_handles) {
if *cf == name { if *cf == name {
return Some(ColumnFamily(*cf_handle)); return Ok(ColumnFamily(*cf_handle));
} }
} }
None Err(CorruptionError::from_missing_column_family_name(name).into())
} }
#[must_use] #[must_use]
@ -571,8 +558,9 @@ impl Db {
DbKind::ReadOnly(db) => { DbKind::ReadOnly(db) => {
if db.is_secondary { if db.is_secondary {
// We try to refresh (and ignore the errors) // We try to refresh (and ignore the errors)
#[allow(clippy::let_underscore_must_use)] drop(ffi_result!(rocksdb_try_catch_up_with_primary_with_status(
let _ = ffi_result!(rocksdb_try_catch_up_with_primary_with_status(db.db)); db.db
)));
} }
let options = rocksdb_readoptions_create_copy(db.read_options); let options = rocksdb_readoptions_create_copy(db.read_options);
Reader { Reader {
@ -637,7 +625,7 @@ impl Db {
ffi_result!(rocksdb_transaction_commit_with_status(transaction)); ffi_result!(rocksdb_transaction_commit_with_status(transaction));
rocksdb_transaction_destroy(transaction); rocksdb_transaction_destroy(transaction);
rocksdb_readoptions_destroy(read_options); rocksdb_readoptions_destroy(read_options);
free(snapshot as *mut c_void); rocksdb_free(snapshot as *mut c_void);
r.map_err(StorageError::from)?; // We make sure to also run destructors if the commit fails r.map_err(StorageError::from)?; // We make sure to also run destructors if the commit fails
} }
return Ok(result); return Ok(result);
@ -648,7 +636,7 @@ impl Db {
ffi_result!(rocksdb_transaction_rollback_with_status(transaction)); ffi_result!(rocksdb_transaction_rollback_with_status(transaction));
rocksdb_transaction_destroy(transaction); rocksdb_transaction_destroy(transaction);
rocksdb_readoptions_destroy(read_options); rocksdb_readoptions_destroy(read_options);
free(snapshot as *mut c_void); rocksdb_free(snapshot as *mut c_void);
r.map_err(StorageError::from)?; // We make sure to also run destructors if the commit fails r.map_err(StorageError::from)?; // We make sure to also run destructors if the commit fails
} }
// We look for the root error // We look for the root error
@ -721,7 +709,7 @@ impl Db {
column_family: &ColumnFamily, column_family: &ColumnFamily,
key: &[u8], key: &[u8],
) -> Result<bool, StorageError> { ) -> Result<bool, StorageError> {
Ok(self.get(column_family, key)?.is_some()) //TODO: optimize Ok(self.get(column_family, key)?.is_some()) // TODO: optimize
} }
pub fn insert( pub fn insert(
@ -750,13 +738,14 @@ impl Db {
} }
} }
pub fn flush(&self, column_family: &ColumnFamily) -> Result<(), StorageError> { pub fn flush(&self) -> Result<(), StorageError> {
if let DbKind::ReadWrite(db) = &self.inner { if let DbKind::ReadWrite(db) = &self.inner {
unsafe { unsafe {
ffi_result!(rocksdb_transactiondb_flush_cf_with_status( ffi_result!(rocksdb_transactiondb_flush_cfs_with_status(
db.db, db.db,
db.flush_options, db.flush_options,
column_family.0, db.cf_handles.as_ptr().cast_mut(),
db.cf_handles.len().try_into().unwrap()
)) ))
}?; }?;
Ok(()) Ok(())
@ -814,6 +803,9 @@ impl Db {
&self, &self,
ssts_for_cf: &[(&ColumnFamily, PathBuf)], ssts_for_cf: &[(&ColumnFamily, PathBuf)],
) -> Result<(), StorageError> { ) -> Result<(), StorageError> {
if ssts_for_cf.is_empty() {
return Ok(()); // Rocksdb does not support empty lists
}
if let DbKind::ReadWrite(db) = &self.inner { if let DbKind::ReadWrite(db) = &self.inner {
let mut paths_by_cf = HashMap::<_, Vec<_>>::new(); let mut paths_by_cf = HashMap::<_, Vec<_>>::new();
for (cf, path) in ssts_for_cf { for (cf, path) in ssts_for_cf {
@ -941,19 +933,18 @@ impl Reader {
)) ))
} }
InnerReader::Transaction(inner) => { InnerReader::Transaction(inner) => {
if let Some(inner) = inner.upgrade() { let Some(inner) = inner.upgrade() else {
ffi_result!(rocksdb_transaction_get_pinned_cf_with_status(
*inner,
self.options,
column_family.0,
key.as_ptr().cast(),
key.len()
))
} else {
return Err(StorageError::Other( return Err(StorageError::Other(
"The transaction is already ended".into(), "The transaction is already ended".into(),
)); ));
} };
ffi_result!(rocksdb_transaction_get_pinned_cf_with_status(
*inner,
self.options,
column_family.0,
key.as_ptr().cast(),
key.len()
))
} }
InnerReader::PlainDb(inner) => { InnerReader::PlainDb(inner) => {
ffi_result!(rocksdb_get_pinned_cf_with_status( ffi_result!(rocksdb_get_pinned_cf_with_status(
@ -978,9 +969,10 @@ impl Reader {
column_family: &ColumnFamily, column_family: &ColumnFamily,
key: &[u8], key: &[u8],
) -> Result<bool, StorageError> { ) -> Result<bool, StorageError> {
Ok(self.get(column_family, key)?.is_some()) //TODO: optimize Ok(self.get(column_family, key)?.is_some()) // TODO: optimize
} }
#[allow(clippy::iter_not_returning_iterator)]
pub fn iter(&self, column_family: &ColumnFamily) -> Result<Iter, StorageError> { pub fn iter(&self, column_family: &ColumnFamily) -> Result<Iter, StorageError> {
self.scan_prefix(column_family, &[]) self.scan_prefix(column_family, &[])
} }
@ -990,7 +982,7 @@ impl Reader {
column_family: &ColumnFamily, column_family: &ColumnFamily,
prefix: &[u8], prefix: &[u8],
) -> Result<Iter, StorageError> { ) -> Result<Iter, StorageError> {
//We generate the upper bound // We generate the upper bound
let upper_bound = { let upper_bound = {
let mut bound = prefix.to_vec(); let mut bound = prefix.to_vec();
let mut found = false; let mut found = false;
@ -1001,7 +993,7 @@ impl Reader {
break; break;
} }
} }
found.then(|| bound) found.then_some(bound)
}; };
unsafe { unsafe {
@ -1022,13 +1014,12 @@ impl Reader {
rocksdb_transactiondb_create_iterator_cf(inner.db.db, options, column_family.0) rocksdb_transactiondb_create_iterator_cf(inner.db.db, options, column_family.0)
} }
InnerReader::Transaction(inner) => { InnerReader::Transaction(inner) => {
if let Some(inner) = inner.upgrade() { let Some(inner) = inner.upgrade() else {
rocksdb_transaction_create_iterator_cf(*inner, options, column_family.0)
} else {
return Err(StorageError::Other( return Err(StorageError::Other(
"The transaction is already ended".into(), "The transaction is already ended".into(),
)); ));
} };
rocksdb_transaction_create_iterator_cf(*inner, options, column_family.0)
} }
InnerReader::PlainDb(inner) => { InnerReader::PlainDb(inner) => {
rocksdb_create_iterator_cf(inner.db, options, column_family.0) rocksdb_create_iterator_cf(inner.db, options, column_family.0)
@ -1109,7 +1100,7 @@ impl Transaction<'_> {
column_family: &ColumnFamily, column_family: &ColumnFamily,
key: &[u8], key: &[u8],
) -> Result<bool, StorageError> { ) -> Result<bool, StorageError> {
Ok(self.get_for_update(column_family, key)?.is_some()) //TODO: optimize Ok(self.get_for_update(column_family, key)?.is_some()) // TODO: optimize
} }
pub fn insert( pub fn insert(
@ -1165,7 +1156,7 @@ impl Drop for PinnableSlice {
impl Deref for PinnableSlice { impl Deref for PinnableSlice {
type Target = [u8]; type Target = [u8];
fn deref(&self) -> &[u8] { fn deref(&self) -> &Self::Target {
unsafe { unsafe {
let mut len = 0; let mut len = 0;
let val = rocksdb_pinnableslice_value(self.0, &mut len); let val = rocksdb_pinnableslice_value(self.0, &mut len);
@ -1200,7 +1191,7 @@ pub struct Buffer {
impl Drop for Buffer { impl Drop for Buffer {
fn drop(&mut self) { fn drop(&mut self) {
unsafe { unsafe {
free(self.base.cast()); rocksdb_free(self.base.cast());
} }
} }
} }
@ -1208,7 +1199,7 @@ impl Drop for Buffer {
impl Deref for Buffer { impl Deref for Buffer {
type Target = [u8]; type Target = [u8];
fn deref(&self) -> &[u8] { fn deref(&self) -> &Self::Target {
unsafe { slice::from_raw_parts(self.base, self.len) } unsafe { slice::from_raw_parts(self.base, self.len) }
} }
} }
@ -1236,7 +1227,7 @@ pub struct Iter {
is_currently_valid: bool, is_currently_valid: bool,
_upper_bound: Option<Vec<u8>>, _upper_bound: Option<Vec<u8>>,
_reader: Reader, // needed to ensure that DB still lives while iter is used _reader: Reader, // needed to ensure that DB still lives while iter is used
options: *mut rocksdb_readoptions_t, // needed to ensure that options still lives while iter is used options: *mut rocksdb_readoptions_t, /* needed to ensure that options still lives while iter is used */
} }
impl Drop for Iter { impl Drop for Iter {
@ -1324,6 +1315,8 @@ impl SstFileWriter {
} }
} }
#[derive(thiserror::Error)]
#[error("{}", self.message())]
struct ErrorStatus(rocksdb_status_t); struct ErrorStatus(rocksdb_status_t);
unsafe impl Send for ErrorStatus {} unsafe impl Send for ErrorStatus {}
@ -1333,7 +1326,7 @@ impl Drop for ErrorStatus {
fn drop(&mut self) { fn drop(&mut self) {
if !self.0.string.is_null() { if !self.0.string.is_null() {
unsafe { unsafe {
free(self.0.string as *mut c_void); rocksdb_free(self.0.string as *mut c_void);
} }
} }
} }
@ -1362,14 +1355,6 @@ impl fmt::Debug for ErrorStatus {
} }
} }
impl fmt::Display for ErrorStatus {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.message())
}
}
impl Error for ErrorStatus {}
impl From<ErrorStatus> for StorageError { impl From<ErrorStatus> for StorageError {
fn from(status: ErrorStatus) -> Self { fn from(status: ErrorStatus) -> Self {
if status.0.code == rocksdb_status_code_t_rocksdb_status_code_io_error { if status.0.code == rocksdb_status_code_t_rocksdb_status_code_io_error {
@ -1394,7 +1379,8 @@ impl From<ErrorStatus> for StorageError {
struct UnsafeEnv(*mut rocksdb_env_t); struct UnsafeEnv(*mut rocksdb_env_t);
// Hack for lazy_static. OK because only written in lazy static and used in a thread-safe way by RocksDB // Hack for OnceCell. OK because only written in OnceCell and used in a thread-safe way by RocksDB
unsafe impl Send for UnsafeEnv {}
unsafe impl Sync for UnsafeEnv {} unsafe impl Sync for UnsafeEnv {}
fn path_to_cstring(path: &Path) -> Result<CString, StorageError> { fn path_to_cstring(path: &Path) -> Result<CString, StorageError> {
@ -1413,7 +1399,7 @@ fn path_to_cstring(path: &Path) -> Result<CString, StorageError> {
} }
#[cfg(unix)] #[cfg(unix)]
fn available_file_descriptors() -> io::Result<Option<u64>> { fn available_file_descriptors() -> io::Result<Option<libc::rlim_t>> {
let mut rlimit = libc::rlimit { let mut rlimit = libc::rlimit {
rlim_cur: 0, rlim_cur: 0,
rlim_max: 0, rlim_max: 0,
@ -1426,12 +1412,12 @@ fn available_file_descriptors() -> io::Result<Option<u64>> {
} }
#[cfg(windows)] #[cfg(windows)]
fn available_file_descriptors() -> io::Result<Option<u64>> { fn available_file_descriptors() -> io::Result<Option<libc::c_int>> {
Ok(Some(512)) // https://docs.microsoft.com/en-us/cpp/c-runtime-library/file-handling Ok(Some(512)) // https://docs.microsoft.com/en-us/cpp/c-runtime-library/file-handling
} }
#[cfg(not(any(unix, windows)))] #[cfg(not(any(unix, windows)))]
fn available_file_descriptors() -> io::Result<Option<u64>> { fn available_file_descriptors() -> io::Result<Option<libc::c_int>> {
Ok(None) Ok(None)
} }

@ -2,7 +2,7 @@ use crate::storage::error::{CorruptionError, StorageError};
use crate::storage::numeric_encoder::{EncodedQuad, EncodedTerm, EncodedTriple, StrHash}; use crate::storage::numeric_encoder::{EncodedQuad, EncodedTerm, EncodedTriple, StrHash};
use crate::storage::small_string::SmallString; use crate::storage::small_string::SmallString;
use oxsdatatypes::*; use oxsdatatypes::*;
use std::io::{Cursor, Read}; use std::io::Read;
use std::mem::size_of; use std::mem::size_of;
#[cfg(not(target_family = "wasm"))] #[cfg(not(target_family = "wasm"))]
@ -62,24 +62,23 @@ pub enum QuadEncoding {
} }
impl QuadEncoding { impl QuadEncoding {
pub fn decode(self, buffer: &[u8]) -> Result<EncodedQuad, StorageError> { pub fn decode(self, mut buffer: &[u8]) -> Result<EncodedQuad, StorageError> {
let mut cursor = Cursor::new(&buffer);
match self { match self {
Self::Spog => cursor.read_spog_quad(), Self::Spog => buffer.read_spog_quad(),
Self::Posg => cursor.read_posg_quad(), Self::Posg => buffer.read_posg_quad(),
Self::Ospg => cursor.read_ospg_quad(), Self::Ospg => buffer.read_ospg_quad(),
Self::Gspo => cursor.read_gspo_quad(), Self::Gspo => buffer.read_gspo_quad(),
Self::Gpos => cursor.read_gpos_quad(), Self::Gpos => buffer.read_gpos_quad(),
Self::Gosp => cursor.read_gosp_quad(), Self::Gosp => buffer.read_gosp_quad(),
Self::Dspo => cursor.read_dspo_quad(), Self::Dspo => buffer.read_dspo_quad(),
Self::Dpos => cursor.read_dpos_quad(), Self::Dpos => buffer.read_dpos_quad(),
Self::Dosp => cursor.read_dosp_quad(), Self::Dosp => buffer.read_dosp_quad(),
} }
} }
} }
pub fn decode_term(buffer: &[u8]) -> Result<EncodedTerm, StorageError> { pub fn decode_term(mut buffer: &[u8]) -> Result<EncodedTerm, StorageError> {
Cursor::new(&buffer).read_term() buffer.read_term()
} }
pub trait TermReader { pub trait TermReader {
@ -636,6 +635,8 @@ pub fn write_term(sink: &mut Vec<u8>, term: &EncodedTerm) {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
#![allow(clippy::panic_in_result_fn)]
use super::*; use super::*;
use crate::model::TermRef; use crate::model::TermRef;
use crate::storage::numeric_encoder::*; use crate::storage::numeric_encoder::*;
@ -651,10 +652,6 @@ mod tests {
fn get_str(&self, key: &StrHash) -> Result<Option<String>, StorageError> { fn get_str(&self, key: &StrHash) -> Result<Option<String>, StorageError> {
Ok(self.id2str.borrow().get(key).cloned()) Ok(self.id2str.borrow().get(key).cloned())
} }
fn contains_str(&self, key: &StrHash) -> Result<bool, StorageError> {
Ok(self.id2str.borrow().contains_key(key))
}
} }
impl MemoryStrStore { impl MemoryStrStore {
@ -742,7 +739,7 @@ mod tests {
let mut buffer = Vec::new(); let mut buffer = Vec::new();
write_term(&mut buffer, &encoded); write_term(&mut buffer, &encoded);
assert_eq!(encoded, Cursor::new(&buffer).read_term().unwrap()); assert_eq!(encoded, buffer.as_slice().read_term().unwrap());
} }
} }
} }

@ -0,0 +1,139 @@
use crate::io::{RdfFormat, RdfParseError};
use crate::storage::numeric_encoder::EncodedTerm;
use oxiri::IriParseError;
use oxrdf::TermRef;
use std::error::Error;
use std::io;
/// An error related to storage operations (reads, writes...).
#[derive(Debug, thiserror::Error)]
#[non_exhaustive]
pub enum StorageError {
/// Error from the OS I/O layer.
#[error(transparent)]
Io(#[from] io::Error),
/// Error related to data corruption.
#[error(transparent)]
Corruption(#[from] CorruptionError),
#[doc(hidden)]
#[error("{0}")]
Other(#[source] Box<dyn Error + Send + Sync + 'static>),
}
impl From<StorageError> for io::Error {
#[inline]
fn from(error: StorageError) -> Self {
match error {
StorageError::Io(error) => error,
StorageError::Corruption(error) => error.into(),
StorageError::Other(error) => Self::new(io::ErrorKind::Other, error),
}
}
}
/// An error return if some content in the database is corrupted.
#[derive(Debug, thiserror::Error)]
#[error(transparent)]
pub struct CorruptionError(#[from] CorruptionErrorKind);
/// An error return if some content in the database is corrupted.
#[derive(Debug, thiserror::Error)]
enum CorruptionErrorKind {
#[error("{0}")]
Msg(String),
#[error("{0}")]
Other(#[source] Box<dyn Error + Send + Sync + 'static>),
}
impl CorruptionError {
/// Builds an error from a printable error message.
#[inline]
pub(crate) fn new(error: impl Into<Box<dyn Error + Send + Sync + 'static>>) -> Self {
Self(CorruptionErrorKind::Other(error.into()))
}
#[inline]
pub(crate) fn from_encoded_term(encoded: &EncodedTerm, term: &TermRef<'_>) -> Self {
// TODO: eventually use a dedicated error enum value
Self::msg(format!("Invalid term encoding {encoded:?} for {term}"))
}
#[inline]
pub(crate) fn from_missing_column_family_name(name: &'static str) -> Self {
// TODO: eventually use a dedicated error enum value
Self::msg(format!("Column family {name} does not exist"))
}
/// Builds an error from a printable error message.
#[inline]
pub(crate) fn msg(msg: impl Into<String>) -> Self {
Self(CorruptionErrorKind::Msg(msg.into()))
}
}
impl From<CorruptionError> for io::Error {
#[inline]
fn from(error: CorruptionError) -> Self {
Self::new(io::ErrorKind::InvalidData, error)
}
}
/// An error raised while loading a file into a [`Store`](crate::store::Store).
#[derive(Debug, thiserror::Error)]
pub enum LoaderError {
/// An error raised while reading the file.
#[error(transparent)]
Parsing(#[from] RdfParseError),
/// An error raised during the insertion in the store.
#[error(transparent)]
Storage(#[from] StorageError),
/// The base IRI is invalid.
#[error("Invalid base IRI '{iri}': {error}")]
InvalidBaseIri {
/// The IRI itself.
iri: String,
/// The parsing error.
#[source]
error: IriParseError,
},
}
impl From<LoaderError> for io::Error {
#[inline]
fn from(error: LoaderError) -> Self {
match error {
LoaderError::Storage(error) => error.into(),
LoaderError::Parsing(error) => error.into(),
LoaderError::InvalidBaseIri { .. } => {
Self::new(io::ErrorKind::InvalidInput, error.to_string())
}
}
}
}
/// An error raised while writing a file from a [`Store`](crate::store::Store).
#[derive(Debug, thiserror::Error)]
pub enum SerializerError {
/// An error raised while writing the content.
#[error(transparent)]
Io(#[from] io::Error),
/// An error raised during the lookup in the store.
#[error(transparent)]
Storage(#[from] StorageError),
/// A format compatible with [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) is required.
#[error("A RDF format supporting datasets was expected, {0} found")]
DatasetFormatExpected(RdfFormat),
}
impl From<SerializerError> for io::Error {
#[inline]
fn from(error: SerializerError) -> Self {
match error {
SerializerError::Storage(error) => error.into(),
SerializerError::Io(error) => error,
SerializerError::DatasetFormatExpected(_) => {
Self::new(io::ErrorKind::InvalidInput, error.to_string())
}
}
}
}

@ -20,13 +20,9 @@ use std::mem::swap;
#[cfg(not(target_family = "wasm"))] #[cfg(not(target_family = "wasm"))]
use std::path::Path; use std::path::Path;
#[cfg(not(target_family = "wasm"))] #[cfg(not(target_family = "wasm"))]
use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Mutex;
#[cfg(not(target_family = "wasm"))] #[cfg(not(target_family = "wasm"))]
use std::sync::Arc; use std::{io, thread};
#[cfg(not(target_family = "wasm"))]
use std::thread::spawn;
#[cfg(not(target_family = "wasm"))]
use std::thread::JoinHandle;
use self::storage_generator::StorageGenerator; use self::storage_generator::StorageGenerator;
@ -239,6 +235,13 @@ impl StorageReader {
pub fn validate(&self) -> Result<(), StorageError> { pub fn validate(&self) -> Result<(), StorageError> {
Ok(()) Ok(())
} }
/// Validates that all the storage invariants held in the data
#[cfg(target_family = "wasm")]
#[allow(clippy::unused_self, clippy::unnecessary_wraps)]
pub fn validate(&self) -> Result<(), StorageError> {
Ok(()) // TODO
}
} }
pub struct ChainedDecodingQuadIterator { pub struct ChainedDecodingQuadIterator {
@ -265,7 +268,7 @@ impl ChainedDecodingQuadIterator {
impl Iterator for ChainedDecodingQuadIterator { impl Iterator for ChainedDecodingQuadIterator {
type Item = Result<EncodedQuad, StorageError>; type Item = Result<EncodedQuad, StorageError>;
fn next(&mut self) -> Option<Result<EncodedQuad, StorageError>> { fn next(&mut self) -> Option<Self::Item> {
if let Some(result) = self.first.next() { if let Some(result) = self.first.next() {
Some(result) Some(result)
} else if let Some(second) = self.second.as_mut() { } else if let Some(second) = self.second.as_mut() {
@ -315,10 +318,6 @@ impl StrLookup for StorageReader {
fn get_str(&self, key: &StrHash) -> Result<Option<String>, StorageError> { fn get_str(&self, key: &StrHash) -> Result<Option<String>, StorageError> {
self.get_str(key) self.get_str(key)
} }
fn contains_str(&self, key: &StrHash) -> Result<bool, StorageError> {
self.contains_str(key)
}
} }
pub struct StorageWriter<'a> { pub struct StorageWriter<'a> {
@ -645,6 +644,7 @@ impl<'a> StorageWriter<'a> {
} }
#[cfg(not(target_family = "wasm"))] #[cfg(not(target_family = "wasm"))]
#[must_use]
pub struct StorageBulkLoader { pub struct StorageBulkLoader {
storage: Storage, storage: Storage,
hooks: Vec<Box<dyn Fn(u64)>>, hooks: Vec<Box<dyn Fn(u64)>>,
@ -663,12 +663,12 @@ impl StorageBulkLoader {
} }
} }
pub fn set_num_threads(mut self, num_threads: usize) -> Self { pub fn with_num_threads(mut self, num_threads: usize) -> Self {
self.num_threads = Some(num_threads); self.num_threads = Some(num_threads);
self self
} }
pub fn set_max_memory_size_in_megabytes(mut self, max_memory_size: usize) -> Self { pub fn with_max_memory_size_in_megabytes(mut self, max_memory_size: usize) -> Self {
self.max_memory_size = Some(max_memory_size); self.max_memory_size = Some(max_memory_size);
self self
} }
@ -700,81 +700,92 @@ impl StorageBulkLoader {
) )
.into()); .into());
} }
let mut threads = VecDeque::with_capacity(num_threads - 1); let done_counter = Mutex::new(0);
let mut buffer = Vec::with_capacity(batch_size);
let done_counter = Arc::new(AtomicU64::new(0));
let mut done_and_displayed_counter = 0; let mut done_and_displayed_counter = 0;
for quad in quads { thread::scope(|thread_scope| {
let quad = quad?; let mut threads = VecDeque::with_capacity(num_threads - 1);
buffer.push(quad); let mut buffer = Vec::with_capacity(batch_size);
if buffer.len() >= batch_size { for quad in quads {
self.spawn_load_thread( let quad = quad?;
&mut buffer, buffer.push(quad);
&mut threads, if buffer.len() >= batch_size {
&done_counter, self.spawn_load_thread(
&mut done_and_displayed_counter, &mut buffer,
num_threads, &mut threads,
batch_size, thread_scope,
)?; &done_counter,
&mut done_and_displayed_counter,
num_threads,
batch_size,
)?;
}
} }
} self.spawn_load_thread(
self.spawn_load_thread( &mut buffer,
&mut buffer, &mut threads,
&mut threads, thread_scope,
&done_counter, &done_counter,
&mut done_and_displayed_counter, &mut done_and_displayed_counter,
num_threads, num_threads,
batch_size, batch_size,
)?; )?;
for thread in threads { for thread in threads {
thread.join().unwrap()?; map_thread_result(thread.join()).map_err(StorageError::Io)??;
self.on_possible_progress(&done_counter, &mut done_and_displayed_counter); self.on_possible_progress(&done_counter, &mut done_and_displayed_counter)?;
} }
Ok(()) Ok(())
})
} }
fn spawn_load_thread( fn spawn_load_thread<'scope>(
&self, &'scope self,
buffer: &mut Vec<Quad>, buffer: &mut Vec<Quad>,
threads: &mut VecDeque<JoinHandle<Result<(), StorageError>>>, threads: &mut VecDeque<thread::ScopedJoinHandle<'scope, Result<(), StorageError>>>,
done_counter: &Arc<AtomicU64>, thread_scope: &'scope thread::Scope<'scope, '_>,
done_counter: &'scope Mutex<u64>,
done_and_displayed_counter: &mut u64, done_and_displayed_counter: &mut u64,
num_threads: usize, num_threads: usize,
batch_size: usize, batch_size: usize,
) -> Result<(), StorageError> { ) -> Result<(), StorageError> {
self.on_possible_progress(done_counter, done_and_displayed_counter); self.on_possible_progress(done_counter, done_and_displayed_counter)?;
// We avoid to have too many threads // We avoid to have too many threads
if threads.len() >= num_threads { if threads.len() >= num_threads {
if let Some(thread) = threads.pop_front() { if let Some(thread) = threads.pop_front() {
thread.join().unwrap()?; map_thread_result(thread.join()).map_err(StorageError::Io)??;
self.on_possible_progress(done_counter, done_and_displayed_counter); self.on_possible_progress(done_counter, done_and_displayed_counter)?;
} }
} }
let mut buffer_to_load = Vec::with_capacity(batch_size); let mut buffer_to_load = Vec::with_capacity(batch_size);
swap(buffer, &mut buffer_to_load); swap(buffer, &mut buffer_to_load);
let storage = self.storage.clone(); let storage = &self.storage;
let done_counter_clone = Arc::clone(done_counter); threads.push_back(thread_scope.spawn(move || {
threads.push_back(spawn(move || { FileBulkLoader::new(storage, batch_size).load(buffer_to_load, done_counter)
FileBulkLoader::new(storage, batch_size).load(buffer_to_load, &done_counter_clone)
})); }));
Ok(()) Ok(())
} }
fn on_possible_progress(&self, done: &AtomicU64, done_and_displayed: &mut u64) { fn on_possible_progress(
let new_counter = done.load(Ordering::Relaxed); &self,
let display_step = u64::try_from(DEFAULT_BULK_LOAD_BATCH_SIZE).unwrap(); done: &Mutex<u64>,
done_and_displayed: &mut u64,
) -> Result<(), StorageError> {
let new_counter = *done
.lock()
.map_err(|_| io::Error::new(io::ErrorKind::Other, "Mutex poisoned"))?;
let display_step = DEFAULT_BULK_LOAD_BATCH_SIZE as u64;
if new_counter / display_step > *done_and_displayed / display_step { if new_counter / display_step > *done_and_displayed / display_step {
for hook in &self.hooks { for hook in &self.hooks {
hook(new_counter); hook(new_counter);
} }
} }
*done_and_displayed = new_counter; *done_and_displayed = new_counter;
Ok(())
} }
} }
#[cfg(not(target_family = "wasm"))] #[cfg(not(target_family = "wasm"))]
struct FileBulkLoader { struct FileBulkLoader<'a> {
storage: Storage, storage: &'a Storage,
id2str: HashMap<StrHash, Box<str>>, id2str: HashMap<StrHash, Box<str>>,
quads: HashSet<EncodedQuad>, quads: HashSet<EncodedQuad>,
triples: HashSet<EncodedQuad>, triples: HashSet<EncodedQuad>,
@ -782,8 +793,8 @@ struct FileBulkLoader {
} }
#[cfg(not(target_family = "wasm"))] #[cfg(not(target_family = "wasm"))]
impl FileBulkLoader { impl<'a> FileBulkLoader<'a> {
fn new(storage: Storage, batch_size: usize) -> Self { fn new(storage: &'a Storage, batch_size: usize) -> Self {
Self { Self {
storage, storage,
id2str: HashMap::with_capacity(3 * batch_size), id2str: HashMap::with_capacity(3 * batch_size),
@ -793,11 +804,14 @@ impl FileBulkLoader {
} }
} }
fn load(&mut self, quads: Vec<Quad>, counter: &AtomicU64) -> Result<(), StorageError> { fn load(&mut self, quads: Vec<Quad>, counter: &Mutex<u64>) -> Result<(), StorageError> {
self.encode(quads)?; self.encode(quads)?;
let size = self.triples.len() + self.quads.len(); let size = self.triples.len() + self.quads.len();
// self.save()?; //self.save()?;
counter.fetch_add(size.try_into().unwrap(), Ordering::Relaxed); *counter
.lock()
.map_err(|_| io::Error::new(io::ErrorKind::Other, "Mutex poisoned"))? +=
size.try_into().unwrap_or(u64::MAX);
Ok(()) Ok(())
} }
@ -820,7 +834,12 @@ impl FileBulkLoader {
match quad.graph_name.as_ref() { match quad.graph_name.as_ref() {
GraphNameRef::NamedNode(n) => n.into(), GraphNameRef::NamedNode(n) => n.into(),
GraphNameRef::BlankNode(n) => n.into(), GraphNameRef::BlankNode(n) => n.into(),
GraphNameRef::DefaultGraph => unreachable!(), GraphNameRef::DefaultGraph => {
return Err(CorruptionError::new(
"Default graph this not the default graph",
)
.into())
}
}, },
&encoded.graph_name, &encoded.graph_name,
)?; )?;
@ -978,3 +997,17 @@ impl FileBulkLoader {
// sst.finish() // sst.finish()
// } // }
} }
#[cfg(not(target_family = "wasm"))]
fn map_thread_result<R>(result: thread::Result<R>) -> io::Result<R> {
result.map_err(|e| {
io::Error::new(
io::ErrorKind::Other,
if let Ok(e) = e.downcast::<&dyn std::fmt::Display>() {
format!("A loader processed crashed with {e}")
} else {
"A loader processed crashed with and unknown error".into()
},
)
})
}

@ -6,10 +6,9 @@ use crate::storage::small_string::SmallString;
use oxsdatatypes::*; use oxsdatatypes::*;
use siphasher::sip128::{Hasher128, SipHasher24}; use siphasher::sip128::{Hasher128, SipHasher24};
use std::fmt::Debug; use std::fmt::Debug;
use std::hash::Hash; use std::hash::{Hash, Hasher};
use std::hash::Hasher;
use std::rc::Rc;
use std::str; use std::str;
use std::sync::Arc;
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] #[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
#[repr(transparent)] #[repr(transparent)]
@ -98,7 +97,7 @@ pub enum EncodedTerm {
DurationLiteral(Duration), DurationLiteral(Duration),
YearMonthDurationLiteral(YearMonthDuration), YearMonthDurationLiteral(YearMonthDuration),
DayTimeDurationLiteral(DayTimeDuration), DayTimeDurationLiteral(DayTimeDuration),
Triple(Rc<EncodedTriple>), Triple(Arc<EncodedTriple>),
} }
impl PartialEq for EncodedTerm { impl PartialEq for EncodedTerm {
@ -194,24 +193,24 @@ impl PartialEq for EncodedTerm {
}, },
) => value_id_a == value_id_b && datatype_id_a == datatype_id_b, ) => value_id_a == value_id_b && datatype_id_a == datatype_id_b,
(Self::BooleanLiteral(a), Self::BooleanLiteral(b)) => a == b, (Self::BooleanLiteral(a), Self::BooleanLiteral(b)) => a == b,
(Self::FloatLiteral(a), Self::FloatLiteral(b)) => a.is_identical_with(b), (Self::FloatLiteral(a), Self::FloatLiteral(b)) => a.is_identical_with(*b),
(Self::DoubleLiteral(a), Self::DoubleLiteral(b)) => a.is_identical_with(b), (Self::DoubleLiteral(a), Self::DoubleLiteral(b)) => a.is_identical_with(*b),
(Self::IntegerLiteral(a), Self::IntegerLiteral(b)) => a.is_identical_with(b), (Self::IntegerLiteral(a), Self::IntegerLiteral(b)) => a.is_identical_with(*b),
(Self::DecimalLiteral(a), Self::DecimalLiteral(b)) => a.is_identical_with(b), (Self::DecimalLiteral(a), Self::DecimalLiteral(b)) => a.is_identical_with(*b),
(Self::DateTimeLiteral(a), Self::DateTimeLiteral(b)) => a.is_identical_with(b), (Self::DateTimeLiteral(a), Self::DateTimeLiteral(b)) => a.is_identical_with(*b),
(Self::TimeLiteral(a), Self::TimeLiteral(b)) => a.is_identical_with(b), (Self::TimeLiteral(a), Self::TimeLiteral(b)) => a.is_identical_with(*b),
(Self::DateLiteral(a), Self::DateLiteral(b)) => a.is_identical_with(b), (Self::DateLiteral(a), Self::DateLiteral(b)) => a.is_identical_with(*b),
(Self::GYearMonthLiteral(a), Self::GYearMonthLiteral(b)) => a.is_identical_with(b), (Self::GYearMonthLiteral(a), Self::GYearMonthLiteral(b)) => a.is_identical_with(*b),
(Self::GYearLiteral(a), Self::GYearLiteral(b)) => a.is_identical_with(b), (Self::GYearLiteral(a), Self::GYearLiteral(b)) => a.is_identical_with(*b),
(Self::GMonthDayLiteral(a), Self::GMonthDayLiteral(b)) => a.is_identical_with(b), (Self::GMonthDayLiteral(a), Self::GMonthDayLiteral(b)) => a.is_identical_with(*b),
(Self::GMonthLiteral(a), Self::GMonthLiteral(b)) => a.is_identical_with(b), (Self::GMonthLiteral(a), Self::GMonthLiteral(b)) => a.is_identical_with(*b),
(Self::GDayLiteral(a), Self::GDayLiteral(b)) => a.is_identical_with(b), (Self::GDayLiteral(a), Self::GDayLiteral(b)) => a.is_identical_with(*b),
(Self::DurationLiteral(a), Self::DurationLiteral(b)) => a.is_identical_with(b), (Self::DurationLiteral(a), Self::DurationLiteral(b)) => a.is_identical_with(*b),
(Self::YearMonthDurationLiteral(a), Self::YearMonthDurationLiteral(b)) => { (Self::YearMonthDurationLiteral(a), Self::YearMonthDurationLiteral(b)) => {
a.is_identical_with(b) a.is_identical_with(*b)
} }
(Self::DayTimeDurationLiteral(a), Self::DayTimeDurationLiteral(b)) => { (Self::DayTimeDurationLiteral(a), Self::DayTimeDurationLiteral(b)) => {
a.is_identical_with(b) a.is_identical_with(*b)
} }
(Self::Triple(a), Self::Triple(b)) => a == b, (Self::Triple(a), Self::Triple(b)) => a == b,
(_, _) => false, (_, _) => false,
@ -489,7 +488,7 @@ impl From<DayTimeDuration> for EncodedTerm {
impl From<EncodedTriple> for EncodedTerm { impl From<EncodedTriple> for EncodedTerm {
fn from(value: EncodedTriple) -> Self { fn from(value: EncodedTriple) -> Self {
Self::Triple(Rc::new(value)) Self::Triple(Arc::new(value))
} }
} }
@ -654,7 +653,7 @@ impl From<GraphNameRef<'_>> for EncodedTerm {
impl From<TripleRef<'_>> for EncodedTerm { impl From<TripleRef<'_>> for EncodedTerm {
fn from(triple: TripleRef<'_>) -> Self { fn from(triple: TripleRef<'_>) -> Self {
Self::Triple(Rc::new(triple.into())) Self::Triple(Arc::new(triple.into()))
} }
} }
@ -722,8 +721,6 @@ impl From<QuadRef<'_>> for EncodedQuad {
pub trait StrLookup { pub trait StrLookup {
fn get_str(&self, key: &StrHash) -> Result<Option<String>, StorageError>; fn get_str(&self, key: &StrHash) -> Result<Option<String>, StorageError>;
fn contains_str(&self, key: &StrHash) -> Result<bool, StorageError>;
} }
pub fn insert_term<F: FnMut(&StrHash, &str) -> Result<(), StorageError>>( pub fn insert_term<F: FnMut(&StrHash, &str) -> Result<(), StorageError>>(
@ -736,13 +733,13 @@ pub fn insert_term<F: FnMut(&StrHash, &str) -> Result<(), StorageError>>(
if let EncodedTerm::NamedNode { iri_id, value } = encoded { if let EncodedTerm::NamedNode { iri_id, value } = encoded {
insert_str(iri_id, node.as_str()) insert_str(iri_id, node.as_str())
} else { } else {
unreachable!("Invalid term encoding {:?} for {}", encoded, term) Err(CorruptionError::from_encoded_term(encoded, &term).into())
} }
} }
TermRef::BlankNode(node) => match encoded { TermRef::BlankNode(node) => match encoded {
EncodedTerm::BigBlankNode { id_id } => insert_str(id_id, node.as_str()), EncodedTerm::BigBlankNode { id_id } => insert_str(id_id, node.as_str()),
EncodedTerm::SmallBlankNode(..) | EncodedTerm::NumericalBlankNode { .. } => Ok(()), EncodedTerm::SmallBlankNode(..) | EncodedTerm::NumericalBlankNode { .. } => Ok(()),
_ => unreachable!("Invalid term encoding {:?} for {}", encoded, term), _ => Err(CorruptionError::from_encoded_term(encoded, &term).into()),
}, },
TermRef::Literal(literal) => match encoded { TermRef::Literal(literal) => match encoded {
EncodedTerm::BigStringLiteral { value_id, .. } EncodedTerm::BigStringLiteral { value_id, .. }
@ -753,7 +750,7 @@ pub fn insert_term<F: FnMut(&StrHash, &str) -> Result<(), StorageError>>(
if let Some(language) = literal.language() { if let Some(language) = literal.language() {
insert_str(language_id, language) insert_str(language_id, language)
} else { } else {
unreachable!("Invalid term encoding {:?} for {}", encoded, term) Err(CorruptionError::from_encoded_term(encoded, &term).into())
} }
} }
EncodedTerm::BigBigLangStringLiteral { EncodedTerm::BigBigLangStringLiteral {
@ -764,7 +761,7 @@ pub fn insert_term<F: FnMut(&StrHash, &str) -> Result<(), StorageError>>(
if let Some(language) = literal.language() { if let Some(language) = literal.language() {
insert_str(language_id, language) insert_str(language_id, language)
} else { } else {
unreachable!("Invalid term encoding {:?} for {}", encoded, term) Err(CorruptionError::from_encoded_term(encoded, &term).into())
} }
} }
EncodedTerm::SmallTypedLiteral { datatype_id, .. } => { EncodedTerm::SmallTypedLiteral { datatype_id, .. } => {
@ -795,7 +792,7 @@ pub fn insert_term<F: FnMut(&StrHash, &str) -> Result<(), StorageError>>(
| EncodedTerm::DurationLiteral(..) | EncodedTerm::DurationLiteral(..)
| EncodedTerm::YearMonthDurationLiteral(..) | EncodedTerm::YearMonthDurationLiteral(..)
| EncodedTerm::DayTimeDurationLiteral(..) => Ok(()), | EncodedTerm::DayTimeDurationLiteral(..) => Ok(()),
_ => unreachable!("Invalid term encoding {:?} for {}", encoded, term), _ => Err(CorruptionError::from_encoded_term(encoded, &term).into()),
}, },
TermRef::Triple(triple) => { TermRef::Triple(triple) => {
if let EncodedTerm::Triple(encoded) = encoded { if let EncodedTerm::Triple(encoded) = encoded {
@ -807,7 +804,7 @@ pub fn insert_term<F: FnMut(&StrHash, &str) -> Result<(), StorageError>>(
)?; )?;
insert_term(triple.object.as_ref(), &encoded.object, insert_str) insert_term(triple.object.as_ref(), &encoded.object, insert_str)
} else { } else {
unreachable!("Invalid term encoding {:?} for {}", encoded, term) Err(CorruptionError::from_encoded_term(encoded, &term).into())
} }
} }
} }

@ -1,11 +1,9 @@
use std::borrow::Borrow; use std::borrow::Borrow;
use std::cmp::Ordering; use std::cmp::Ordering;
use std::error::Error;
use std::fmt;
use std::hash::{Hash, Hasher}; use std::hash::{Hash, Hasher};
use std::ops::Deref; use std::ops::Deref;
use std::str;
use std::str::{FromStr, Utf8Error}; use std::str::{FromStr, Utf8Error};
use std::{fmt, str};
/// A small inline string /// A small inline string
#[derive(Clone, Copy, Default)] #[derive(Clone, Copy, Default)]
@ -46,10 +44,8 @@ impl SmallString {
#[inline] #[inline]
#[allow(unsafe_code)] #[allow(unsafe_code)]
pub fn as_str(&self) -> &str { pub fn as_str(&self) -> &str {
unsafe { // SAFETY: safe because we ensured it in constructors
// safe because we ensured it in constructors unsafe { str::from_utf8_unchecked(self.as_bytes()) }
str::from_utf8_unchecked(self.as_bytes())
}
} }
#[inline] #[inline]
@ -67,7 +63,7 @@ impl Deref for SmallString {
type Target = str; type Target = str;
#[inline] #[inline]
fn deref(&self) -> &str { fn deref(&self) -> &Self::Target {
self.as_str() self.as_str()
} }
} }
@ -103,7 +99,7 @@ impl fmt::Display for SmallString {
impl PartialEq for SmallString { impl PartialEq for SmallString {
#[inline] #[inline]
fn eq(&self, other: &Self) -> bool { fn eq(&self, other: &Self) -> bool {
self.as_str().eq(&**other) self.as_str() == other.as_str()
} }
} }
@ -148,17 +144,17 @@ impl FromStr for SmallString {
type Err = BadSmallStringError; type Err = BadSmallStringError;
#[inline] #[inline]
fn from_str(value: &str) -> Result<Self, BadSmallStringError> { fn from_str(value: &str) -> Result<Self, Self::Err> {
if value.len() <= 15 { if value.len() <= 15 {
let mut inner = [0; 16]; let mut inner = [0; 16];
inner[..value.len()].copy_from_slice(value.as_bytes()); inner[..value.len()].copy_from_slice(value.as_bytes());
inner[15] = value inner[15] = value
.len() .len()
.try_into() .try_into()
.map_err(|_| BadSmallStringError::TooLong(value.len()))?; .map_err(|_| Self::Err::TooLong(value.len()))?;
Ok(Self { inner }) Ok(Self { inner })
} else { } else {
Err(BadSmallStringError::TooLong(value.len())) Err(Self::Err::TooLong(value.len()))
} }
} }
} }
@ -167,36 +163,15 @@ impl<'a> TryFrom<&'a str> for SmallString {
type Error = BadSmallStringError; type Error = BadSmallStringError;
#[inline] #[inline]
fn try_from(value: &'a str) -> Result<Self, BadSmallStringError> { fn try_from(value: &'a str) -> Result<Self, Self::Error> {
Self::from_str(value) Self::from_str(value)
} }
} }
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy, thiserror::Error)]
pub enum BadSmallStringError { pub enum BadSmallStringError {
#[error("small strings could only contain at most 15 characters, found {0}")]
TooLong(usize), TooLong(usize),
BadUtf8(Utf8Error), #[error(transparent)]
} BadUtf8(#[from] Utf8Error),
impl fmt::Display for BadSmallStringError {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::TooLong(v) => write!(
f,
"small strings could only contain at most 15 characters, found {v}"
),
Self::BadUtf8(e) => e.fmt(f),
}
}
}
impl Error for BadSmallStringError {
#[inline]
fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
Self::TooLong(_) => None,
Self::BadUtf8(e) => Some(e),
}
}
} }

@ -957,9 +957,9 @@ impl StrLookup for StorageGenerator {
self.get_str(key) self.get_str(key)
} }
fn contains_str(&self, key: &StrHash) -> Result<bool, StorageError> { //fn contains_str(&self, key: &StrHash) -> Result<bool, StorageError> {
self.contains_str(key) // self.contains_str(key)
} //}
} }
// FIX: Change usize to u64 // FIX: Change usize to u64

File diff suppressed because it is too large Load Diff

@ -1,4 +1,7 @@
use oxigraph::io::{DatasetFormat, GraphFormat}; #![cfg(test)]
#![allow(clippy::panic_in_result_fn)]
use oxigraph::io::RdfFormat;
use oxigraph::model::vocab::{rdf, xsd}; use oxigraph::model::vocab::{rdf, xsd};
use oxigraph::model::*; use oxigraph::model::*;
use oxigraph::store::Store; use oxigraph::store::Store;
@ -8,10 +11,11 @@ use rand::random;
use std::env::temp_dir; use std::env::temp_dir;
use std::error::Error; use std::error::Error;
#[cfg(not(target_family = "wasm"))] #[cfg(not(target_family = "wasm"))]
use std::fs::{create_dir, remove_dir_all, File}; use std::fs::{create_dir_all, remove_dir_all, File};
use std::io::Cursor;
#[cfg(not(target_family = "wasm"))] #[cfg(not(target_family = "wasm"))]
use std::io::Write; use std::io::Write;
#[cfg(not(target_family = "wasm"))]
use std::iter::empty;
#[cfg(target_os = "linux")] #[cfg(target_os = "linux")]
use std::iter::once; use std::iter::once;
#[cfg(not(target_family = "wasm"))] #[cfg(not(target_family = "wasm"))]
@ -74,7 +78,7 @@ fn quads(graph_name: impl Into<GraphNameRef<'static>>) -> Vec<QuadRef<'static>>
QuadRef::new( QuadRef::new(
paris, paris,
name, name,
LiteralRef::new_language_tagged_literal_unchecked("la ville lumi\u{e8}re", "fr"), LiteralRef::new_language_tagged_literal_unchecked("la ville lumi\u{E8}re", "fr"),
graph_name, graph_name,
), ),
QuadRef::new(paris, country, france, graph_name), QuadRef::new(paris, country, france, graph_name),
@ -108,12 +112,7 @@ fn quads(graph_name: impl Into<GraphNameRef<'static>>) -> Vec<QuadRef<'static>>
#[test] #[test]
fn test_load_graph() -> Result<(), Box<dyn Error>> { fn test_load_graph() -> Result<(), Box<dyn Error>> {
let store = Store::new()?; let store = Store::new()?;
store.load_graph( store.load_from_read(RdfFormat::Turtle, DATA.as_bytes())?;
Cursor::new(DATA),
GraphFormat::Turtle,
GraphNameRef::DefaultGraph,
None,
)?;
for q in quads(GraphNameRef::DefaultGraph) { for q in quads(GraphNameRef::DefaultGraph) {
assert!(store.contains(q)?); assert!(store.contains(q)?);
} }
@ -125,12 +124,9 @@ fn test_load_graph() -> Result<(), Box<dyn Error>> {
#[cfg(not(target_family = "wasm"))] #[cfg(not(target_family = "wasm"))]
fn test_bulk_load_graph() -> Result<(), Box<dyn Error>> { fn test_bulk_load_graph() -> Result<(), Box<dyn Error>> {
let store = Store::new()?; let store = Store::new()?;
store.bulk_loader().load_graph( store
Cursor::new(DATA), .bulk_loader()
GraphFormat::Turtle, .load_from_read(RdfFormat::Turtle, DATA.as_bytes())?;
GraphNameRef::DefaultGraph,
None,
)?;
for q in quads(GraphNameRef::DefaultGraph) { for q in quads(GraphNameRef::DefaultGraph) {
assert!(store.contains(q)?); assert!(store.contains(q)?);
} }
@ -143,11 +139,9 @@ fn test_bulk_load_graph() -> Result<(), Box<dyn Error>> {
#[cfg(not(target_family = "wasm"))] #[cfg(not(target_family = "wasm"))]
fn test_bulk_load_graph_lenient() -> Result<(), Box<dyn Error>> { fn test_bulk_load_graph_lenient() -> Result<(), Box<dyn Error>> {
let store = Store::new()?; let store = Store::new()?;
store.bulk_loader().on_parse_error(|_| Ok(())).load_graph( store.bulk_loader().on_parse_error(|_| Ok(())).load_from_read(
Cursor::new(b"<http://example.com> <http://example.com> <http://example.com##> .\n<http://example.com> <http://example.com> <http://example.com> ."), RdfFormat::NTriples,
GraphFormat::NTriples, b"<http://example.com> <http://example.com> <http://example.com##> .\n<http://example.com> <http://example.com> <http://example.com> .".as_slice(),
GraphNameRef::DefaultGraph,
None,
)?; )?;
assert_eq!(store.len()?, 1); assert_eq!(store.len()?, 1);
assert!(store.contains(QuadRef::new( assert!(store.contains(QuadRef::new(
@ -160,10 +154,20 @@ fn test_bulk_load_graph_lenient() -> Result<(), Box<dyn Error>> {
Ok(()) Ok(())
} }
#[test]
#[cfg(not(target_family = "wasm"))]
fn test_bulk_load_empty() -> Result<(), Box<dyn Error>> {
let store = Store::new()?;
store.bulk_loader().load_quads(empty::<Quad>())?;
assert!(store.is_empty()?);
store.validate()?;
Ok(())
}
#[test] #[test]
fn test_load_dataset() -> Result<(), Box<dyn Error>> { fn test_load_dataset() -> Result<(), Box<dyn Error>> {
let store = Store::new()?; let store = Store::new()?;
store.load_dataset(Cursor::new(GRAPH_DATA), DatasetFormat::TriG, None)?; store.load_from_read(RdfFormat::TriG, GRAPH_DATA.as_bytes())?;
for q in quads(NamedNodeRef::new_unchecked( for q in quads(NamedNodeRef::new_unchecked(
"http://www.wikidata.org/wiki/Special:EntityData/Q90", "http://www.wikidata.org/wiki/Special:EntityData/Q90",
)) { )) {
@ -179,7 +183,7 @@ fn test_bulk_load_dataset() -> Result<(), Box<dyn Error>> {
let store = Store::new()?; let store = Store::new()?;
store store
.bulk_loader() .bulk_loader()
.load_dataset(Cursor::new(GRAPH_DATA), DatasetFormat::TriG, None)?; .load_from_read(RdfFormat::TriG, GRAPH_DATA.as_bytes())?;
let graph_name = let graph_name =
NamedNodeRef::new_unchecked("http://www.wikidata.org/wiki/Special:EntityData/Q90"); NamedNodeRef::new_unchecked("http://www.wikidata.org/wiki/Special:EntityData/Q90");
for q in quads(graph_name) { for q in quads(graph_name) {
@ -195,11 +199,9 @@ fn test_bulk_load_dataset() -> Result<(), Box<dyn Error>> {
fn test_load_graph_generates_new_blank_nodes() -> Result<(), Box<dyn Error>> { fn test_load_graph_generates_new_blank_nodes() -> Result<(), Box<dyn Error>> {
let store = Store::new()?; let store = Store::new()?;
for _ in 0..2 { for _ in 0..2 {
store.load_graph( store.load_from_read(
Cursor::new("_:a <http://example.com/p> <http://example.com/p> ."), RdfFormat::NTriples,
GraphFormat::NTriples, "_:a <http://example.com/p> <http://example.com/p> .".as_bytes(),
GraphNameRef::DefaultGraph,
None,
)?; )?;
} }
assert_eq!(store.len()?, 2); assert_eq!(store.len()?, 2);
@ -215,11 +217,7 @@ fn test_dump_graph() -> Result<(), Box<dyn Error>> {
} }
let mut buffer = Vec::new(); let mut buffer = Vec::new();
store.dump_graph( store.dump_graph_to_write(GraphNameRef::DefaultGraph, RdfFormat::NTriples, &mut buffer)?;
&mut buffer,
GraphFormat::NTriples,
GraphNameRef::DefaultGraph,
)?;
assert_eq!( assert_eq!(
buffer.into_iter().filter(|c| *c == b'\n').count(), buffer.into_iter().filter(|c| *c == b'\n').count(),
NUMBER_OF_TRIPLES NUMBER_OF_TRIPLES
@ -235,8 +233,7 @@ fn test_dump_dataset() -> Result<(), Box<dyn Error>> {
store.insert(q)?; store.insert(q)?;
} }
let mut buffer = Vec::new(); let buffer = store.dump_to_write(RdfFormat::NQuads, Vec::new())?;
store.dump_dataset(&mut buffer, DatasetFormat::NQuads)?;
assert_eq!( assert_eq!(
buffer.into_iter().filter(|c| *c == b'\n').count(), buffer.into_iter().filter(|c| *c == b'\n').count(),
NUMBER_OF_TRIPLES NUMBER_OF_TRIPLES
@ -248,10 +245,10 @@ fn test_dump_dataset() -> Result<(), Box<dyn Error>> {
#[test] #[test]
fn test_snapshot_isolation_iterator() -> Result<(), Box<dyn Error>> { fn test_snapshot_isolation_iterator() -> Result<(), Box<dyn Error>> {
let quad = QuadRef::new( let quad = QuadRef::new(
NamedNodeRef::new_unchecked("http://example.com/s"), NamedNodeRef::new("http://example.com/s")?,
NamedNodeRef::new_unchecked("http://example.com/p"), NamedNodeRef::new("http://example.com/p")?,
NamedNodeRef::new_unchecked("http://example.com/o"), NamedNodeRef::new("http://example.com/o")?,
NamedNodeRef::new_unchecked("http://www.wikidata.org/wiki/Special:EntityData/Q90"), NamedNodeRef::new("http://www.wikidata.org/wiki/Special:EntityData/Q90")?,
); );
let store = Store::new()?; let store = Store::new()?;
store.insert(quad)?; store.insert(quad)?;
@ -286,7 +283,7 @@ fn test_bulk_load_on_existing_delete_overrides_the_delete() -> Result<(), Box<dy
#[cfg(not(target_family = "wasm"))] #[cfg(not(target_family = "wasm"))]
fn test_open_bad_dir() -> Result<(), Box<dyn Error>> { fn test_open_bad_dir() -> Result<(), Box<dyn Error>> {
let dir = TempDir::default(); let dir = TempDir::default();
create_dir(&dir.0)?; create_dir_all(&dir.0)?;
{ {
File::create(dir.0.join("CURRENT"))?.write_all(b"foo")?; File::create(dir.0.join("CURRENT"))?.write_all(b"foo")?;
} }
@ -301,15 +298,15 @@ fn test_bad_stt_open() -> Result<(), Box<dyn Error>> {
let dir = TempDir::default(); let dir = TempDir::default();
let store = Store::open(&dir.0)?; let store = Store::open(&dir.0)?;
remove_dir_all(&dir.0)?; remove_dir_all(&dir.0)?;
assert!(store store
.bulk_loader() .bulk_loader()
.load_quads(once(Quad::new( .load_quads(once(Quad::new(
NamedNode::new_unchecked("http://example.com/s"), NamedNode::new_unchecked("http://example.com/s"),
NamedNode::new_unchecked("http://example.com/p"), NamedNode::new_unchecked("http://example.com/p"),
NamedNode::new_unchecked("http://example.com/o"), NamedNode::new_unchecked("http://example.com/o"),
GraphName::DefaultGraph GraphName::DefaultGraph,
))) )))
.is_err()); .unwrap_err();
Ok(()) Ok(())
} }
@ -361,8 +358,8 @@ fn test_bad_backup() -> Result<(), Box<dyn Error>> {
let store_dir = TempDir::default(); let store_dir = TempDir::default();
let backup_dir = TempDir::default(); let backup_dir = TempDir::default();
create_dir(&backup_dir.0)?; create_dir_all(&backup_dir.0)?;
assert!(Store::open(&store_dir)?.backup(&backup_dir.0).is_err()); Store::open(&store_dir)?.backup(&backup_dir.0).unwrap_err();
Ok(()) Ok(())
} }
@ -371,7 +368,7 @@ fn test_bad_backup() -> Result<(), Box<dyn Error>> {
#[cfg(not(target_family = "wasm"))] #[cfg(not(target_family = "wasm"))]
fn test_backup_on_in_memory() -> Result<(), Box<dyn Error>> { fn test_backup_on_in_memory() -> Result<(), Box<dyn Error>> {
let backup_dir = TempDir::default(); let backup_dir = TempDir::default();
assert!(Store::new()?.backup(&backup_dir).is_err()); Store::new()?.backup(&backup_dir).unwrap_err();
Ok(()) Ok(())
} }
@ -448,7 +445,7 @@ fn test_secondary() -> Result<(), Box<dyn Error>> {
#[cfg(not(target_family = "wasm"))] #[cfg(not(target_family = "wasm"))]
fn test_open_secondary_bad_dir() -> Result<(), Box<dyn Error>> { fn test_open_secondary_bad_dir() -> Result<(), Box<dyn Error>> {
let primary_dir = TempDir::default(); let primary_dir = TempDir::default();
create_dir(&primary_dir.0)?; create_dir_all(&primary_dir.0)?;
{ {
File::create(primary_dir.0.join("CURRENT"))?.write_all(b"foo")?; File::create(primary_dir.0.join("CURRENT"))?.write_all(b"foo")?;
} }
@ -510,7 +507,7 @@ fn test_read_only() -> Result<(), Box<dyn Error>> {
#[cfg(not(target_family = "wasm"))] #[cfg(not(target_family = "wasm"))]
fn test_open_read_only_bad_dir() -> Result<(), Box<dyn Error>> { fn test_open_read_only_bad_dir() -> Result<(), Box<dyn Error>> {
let dir = TempDir::default(); let dir = TempDir::default();
create_dir(&dir.0)?; create_dir_all(&dir.0)?;
{ {
File::create(dir.0.join("CURRENT"))?.write_all(b"foo")?; File::create(dir.0.join("CURRENT"))?.write_all(b"foo")?;
} }

@ -1,28 +1,32 @@
[package] [package]
name = "oxrdf" name = "oxrdf"
version = "0.1.7" version = "0.2.0-alpha.2"
authors = ["Tpt <thomas@pellissier-tanon.fr>"] authors.workspace = true
license = "MIT OR Apache-2.0" license.workspace = true
readme = "README.md" readme = "README.md"
keywords = ["RDF"] keywords = ["RDF"]
repository = "https://github.com/oxigraph/oxigraph/tree/main/lib/oxrdf" repository = "https://github.com/oxigraph/oxigraph/tree/main/lib/oxrdf"
homepage = "https://oxigraph.org/"
description = """ description = """
A library providing basic data structures related to RDF A library providing basic data structures related to RDF
""" """
documentation = "https://docs.rs/oxrdf" documentation = "https://docs.rs/oxrdf"
edition = "2021" edition.workspace = true
rust-version = "1.60" rust-version.workspace = true
[features] [features]
default = [] default = []
rdf-star = [] rdf-star = []
[dependencies] [dependencies]
rand = "0.8" oxilangtag.workspace = true
oxilangtag = "0.1" oxiri.workspace = true
oxiri = "0.2" oxsdatatypes = { workspace = true, optional = true }
oxsdatatypes = { version = "0.1.3", path="../oxsdatatypes", optional = true } rand.workspace = true
thiserror.workspace = true
[lints]
workspace = true
[package.metadata.docs.rs] [package.metadata.docs.rs]
all-features = true all-features = true
rustdoc-args = ["--cfg", "docsrs"]

@ -5,7 +5,7 @@ OxRDF
[![Released API docs](https://docs.rs/oxrdf/badge.svg)](https://docs.rs/oxrdf) [![Released API docs](https://docs.rs/oxrdf/badge.svg)](https://docs.rs/oxrdf)
[![Crates.io downloads](https://img.shields.io/crates/d/oxrdf)](https://crates.io/crates/oxrdf) [![Crates.io downloads](https://img.shields.io/crates/d/oxrdf)](https://crates.io/crates/oxrdf)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) [![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) [![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community)
OxRDF is a simple library providing datastructures encoding [RDF 1.1 concepts](https://www.w3.org/TR/rdf11-concepts/). OxRDF is a simple library providing datastructures encoding [RDF 1.1 concepts](https://www.w3.org/TR/rdf11-concepts/).
@ -15,6 +15,8 @@ Support for [RDF-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html) i
OxRDF is inspired by [RDF/JS](https://rdf.js.org/data-model-spec/) and [Apache Commons RDF](http://commons.apache.org/proper/commons-rdf/). OxRDF is inspired by [RDF/JS](https://rdf.js.org/data-model-spec/) and [Apache Commons RDF](http://commons.apache.org/proper/commons-rdf/).
Use [`oxrdfio`](https://crates.io/crates/oxrdfio) if you need to read or write RDF files.
Usage example: Usage example:
```rust ```rust

@ -1,8 +1,6 @@
use rand::random; use rand::random;
use std::error::Error;
use std::fmt;
use std::io::Write; use std::io::Write;
use std::str; use std::{fmt, str};
/// An owned RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node). /// An owned RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node).
/// ///
@ -15,10 +13,7 @@ use std::str;
/// ``` /// ```
/// use oxrdf::BlankNode; /// use oxrdf::BlankNode;
/// ///
/// assert_eq!( /// assert_eq!("_:a122", BlankNode::new("a122")?.to_string());
/// "_:a122",
/// BlankNode::new("a122")?.to_string()
/// );
/// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(()) /// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(())
/// ``` /// ```
#[derive(Eq, PartialEq, Debug, Clone, Hash)] #[derive(Eq, PartialEq, Debug, Clone, Hash)]
@ -36,7 +31,7 @@ impl BlankNode {
/// The blank node identifier must be valid according to N-Triples, Turtle, and SPARQL grammars. /// The blank node identifier must be valid according to N-Triples, Turtle, and SPARQL grammars.
/// ///
/// In most cases, it is much more convenient to create a blank node using [`BlankNode::default()`] /// In most cases, it is much more convenient to create a blank node using [`BlankNode::default()`]
///that creates a random ID that could be easily inlined by Oxigraph stores. /// that creates a random ID that could be easily inlined by Oxigraph stores.
pub fn new(id: impl Into<String>) -> Result<Self, BlankNodeIdParseError> { pub fn new(id: impl Into<String>) -> Result<Self, BlankNodeIdParseError> {
let id = id.into(); let id = id.into();
validate_blank_node_identifier(&id)?; validate_blank_node_identifier(&id)?;
@ -111,7 +106,14 @@ impl Default for BlankNode {
/// Builds a new RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a unique id. /// Builds a new RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a unique id.
#[inline] #[inline]
fn default() -> Self { fn default() -> Self {
Self::new_from_unique_id(random::<u128>()) // We ensure the ID does not start with a number to be also valid with RDF/XML
loop {
let id = random();
let str = IdStr::new(id);
if matches!(str.as_str().as_bytes().first(), Some(b'a'..=b'f')) {
return Self(BlankNodeContent::Anonymous { id, str });
}
}
} }
} }
@ -126,10 +128,7 @@ impl Default for BlankNode {
/// ``` /// ```
/// use oxrdf::BlankNodeRef; /// use oxrdf::BlankNodeRef;
/// ///
/// assert_eq!( /// assert_eq!("_:a122", BlankNodeRef::new("a122")?.to_string());
/// "_:a122",
/// BlankNodeRef::new("a122")?.to_string()
/// );
/// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(()) /// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(())
/// ``` /// ```
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] #[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
@ -173,7 +172,7 @@ impl<'a> BlankNodeRef<'a> {
/// Returns the underlying ID of this blank node. /// Returns the underlying ID of this blank node.
#[inline] #[inline]
pub fn as_str(self) -> &'a str { pub const fn as_str(self) -> &'a str {
match self.0 { match self.0 {
BlankNodeRefContent::Named(id) => id, BlankNodeRefContent::Named(id) => id,
BlankNodeRefContent::Anonymous { str, .. } => str, BlankNodeRefContent::Anonymous { str, .. } => str,
@ -185,12 +184,15 @@ impl<'a> BlankNodeRef<'a> {
/// ``` /// ```
/// use oxrdf::BlankNode; /// use oxrdf::BlankNode;
/// ///
/// assert_eq!(BlankNode::new_from_unique_id(128).as_ref().unique_id(), Some(128)); /// assert_eq!(
/// BlankNode::new_from_unique_id(128).as_ref().unique_id(),
/// Some(128)
/// );
/// assert_eq!(BlankNode::new("foo")?.as_ref().unique_id(), None); /// assert_eq!(BlankNode::new("foo")?.as_ref().unique_id(), None);
/// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(()) /// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(())
/// ``` /// ```
#[inline] #[inline]
pub fn unique_id(&self) -> Option<u128> { pub const fn unique_id(&self) -> Option<u128> {
match self.0 { match self.0 {
BlankNodeRefContent::Named(_) => None, BlankNodeRefContent::Named(_) => None,
BlankNodeRefContent::Anonymous { id, .. } => Some(id), BlankNodeRefContent::Anonymous { id, .. } => Some(id),
@ -264,7 +266,7 @@ impl IdStr {
fn validate_blank_node_identifier(id: &str) -> Result<(), BlankNodeIdParseError> { fn validate_blank_node_identifier(id: &str) -> Result<(), BlankNodeIdParseError> {
let mut chars = id.chars(); let mut chars = id.chars();
let front = chars.next().ok_or(BlankNodeIdParseError {})?; let front = chars.next().ok_or(BlankNodeIdParseError)?;
match front { match front {
'0'..='9' '0'..='9'
| '_' | '_'
@ -283,7 +285,7 @@ fn validate_blank_node_identifier(id: &str) -> Result<(), BlankNodeIdParseError>
| '\u{F900}'..='\u{FDCF}' | '\u{F900}'..='\u{FDCF}'
| '\u{FDF0}'..='\u{FFFD}' | '\u{FDF0}'..='\u{FFFD}'
| '\u{10000}'..='\u{EFFFF}' => (), | '\u{10000}'..='\u{EFFFF}' => (),
_ => return Err(BlankNodeIdParseError {}), _ => return Err(BlankNodeIdParseError),
} }
for c in chars { for c in chars {
match c { match c {
@ -309,13 +311,13 @@ fn validate_blank_node_identifier(id: &str) -> Result<(), BlankNodeIdParseError>
| '\u{F900}'..='\u{FDCF}' | '\u{F900}'..='\u{FDCF}'
| '\u{FDF0}'..='\u{FFFD}' | '\u{FDF0}'..='\u{FFFD}'
| '\u{10000}'..='\u{EFFFF}' => (), | '\u{10000}'..='\u{EFFFF}' => (),
_ => return Err(BlankNodeIdParseError {}), _ => return Err(BlankNodeIdParseError),
} }
} }
// Could not end with a dot // Could not end with a dot
if id.ends_with('.') { if id.ends_with('.') {
Err(BlankNodeIdParseError {}) Err(BlankNodeIdParseError)
} else { } else {
Ok(()) Ok(())
} }
@ -342,20 +344,14 @@ fn to_integer_id(id: &str) -> Option<u128> {
} }
/// An error raised during [`BlankNode`] IDs validation. /// An error raised during [`BlankNode`] IDs validation.
#[derive(Debug)] #[derive(Debug, thiserror::Error)]
pub struct BlankNodeIdParseError {} #[error("The blank node identifier is invalid")]
pub struct BlankNodeIdParseError;
impl fmt::Display for BlankNodeIdParseError {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "The blank node identifier is invalid")
}
}
impl Error for BlankNodeIdParseError {}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
#![allow(clippy::panic_in_result_fn)]
use super::*; use super::*;
#[test] #[test]
@ -372,13 +368,13 @@ mod tests {
#[test] #[test]
fn new_validation() { fn new_validation() {
assert!(BlankNode::new("").is_err()); BlankNode::new("").unwrap_err();
assert!(BlankNode::new("a").is_ok()); BlankNode::new("a").unwrap();
assert!(BlankNode::new("-").is_err()); BlankNode::new("-").unwrap_err();
assert!(BlankNode::new("a-").is_ok()); BlankNode::new("a-").unwrap();
assert!(BlankNode::new(".").is_err()); BlankNode::new(".").unwrap_err();
assert!(BlankNode::new("a.").is_err()); BlankNode::new("a.").unwrap_err();
assert!(BlankNode::new("a.a").is_ok()); BlankNode::new("a.a").unwrap();
} }
#[test] #[test]

@ -20,26 +20,29 @@
//! assert_eq!(vec![TripleRef::new(ex, ex, ex)], results); //! assert_eq!(vec![TripleRef::new(ex, ex, ex)], results);
//! //!
//! // Print //! // Print
//! assert_eq!(dataset.to_string(), "<http://example.com> <http://example.com> <http://example.com> <http://example.com> .\n"); //! assert_eq!(
//! dataset.to_string(),
//! "<http://example.com> <http://example.com> <http://example.com> <http://example.com> .\n"
//! );
//! # Result::<_,Box<dyn std::error::Error>>::Ok(()) //! # Result::<_,Box<dyn std::error::Error>>::Ok(())
//! ``` //! ```
//! //!
//! See also [`Graph`] if you only care about plain triples. //! See also [`Graph`] if you only care about plain triples.
use crate::interning::*; use crate::interning::*;
use crate::SubjectRef;
use crate::*; use crate::*;
use std::cmp::min;
use std::collections::hash_map::DefaultHasher; use std::collections::hash_map::DefaultHasher;
use std::collections::BTreeSet; use std::collections::{BTreeSet, HashMap, HashSet};
use std::collections::{HashMap, HashSet};
use std::fmt; use std::fmt;
use std::hash::{Hash, Hasher}; use std::hash::{Hash, Hasher};
/// An in-memory [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset). /// An in-memory [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset).
/// ///
/// It can accommodate a fairly large number of quads (in the few millions). /// It can accommodate a fairly large number of quads (in the few millions).
/// Beware: it interns the string and does not do any garbage collection yet: ///
/// if you insert and remove a lot of different terms, memory will grow without any reduction. /// <div class="warning">It interns the strings and does not do any garbage collection yet:
/// if you insert and remove a lot of different terms, memory will grow without any reduction.</div>
/// ///
/// Usage example: /// Usage example:
/// ``` /// ```
@ -61,7 +64,7 @@ use std::hash::{Hash, Hasher};
/// assert_eq!(vec![TripleRef::new(ex, ex, ex)], results); /// assert_eq!(vec![TripleRef::new(ex, ex, ex)], results);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) /// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ``` /// ```
#[derive(Debug, Default)] #[derive(Debug, Default, Clone)]
pub struct Dataset { pub struct Dataset {
interner: Interner, interner: Interner,
gspo: BTreeSet<( gspo: BTreeSet<(
@ -183,6 +186,7 @@ impl Dataset {
.map(move |q| self.decode_spog(q)) .map(move |q| self.decode_spog(q))
} }
#[allow(clippy::map_identity)]
fn interned_quads_for_subject( fn interned_quads_for_subject(
&self, &self,
subject: &InternedSubject, subject: &InternedSubject,
@ -293,6 +297,18 @@ impl Dataset {
.map(|(o, s, p, g)| (s, p, o, g)) .map(|(o, s, p, g)| (s, p, o, g))
} }
pub fn quads_for_graph_name<'a, 'b>(
&'a self,
graph_name: impl Into<GraphNameRef<'b>>,
) -> impl Iterator<Item = QuadRef<'a>> + 'a {
let graph_name = self
.encoded_graph_name(graph_name)
.unwrap_or_else(InternedGraphName::impossible);
self.interned_quads_for_graph_name(&graph_name)
.map(move |q| self.decode_spog(q))
}
fn interned_quads_for_graph_name( fn interned_quads_for_graph_name(
&self, &self,
graph_name: &InternedGraphName, graph_name: &InternedGraphName,
@ -525,9 +541,12 @@ impl Dataset {
/// Warning 3: This implementation worst-case complexity is in *O(b!)* with *b* the number of blank nodes in the input dataset. /// Warning 3: This implementation worst-case complexity is in *O(b!)* with *b* the number of blank nodes in the input dataset.
pub fn canonicalize(&mut self) { pub fn canonicalize(&mut self) {
let bnodes = self.blank_nodes(); let bnodes = self.blank_nodes();
let (hash, partition) = let quads_per_blank_node = self.quads_per_blank_nodes();
self.hash_bnodes(bnodes.into_iter().map(|bnode| (bnode, 0)).collect()); let (hash, partition) = self.hash_bnodes(
let new_quads = self.distinguish(&hash, &partition); bnodes.into_iter().map(|bnode| (bnode, 0)).collect(),
&quads_per_blank_node,
);
let new_quads = self.distinguish(&hash, &partition, &quads_per_blank_node);
self.clear(); self.clear();
for quad in new_quads { for quad in new_quads {
self.insert_encoded(quad); self.insert_encoded(quad);
@ -572,107 +591,172 @@ impl Dataset {
} }
} }
fn quads_per_blank_nodes(&self) -> QuadsPerBlankNode {
let mut map: HashMap<_, Vec<_>> = HashMap::new();
for quad in &self.spog {
if let InternedSubject::BlankNode(bnode) = &quad.0 {
map.entry(*bnode).or_default().push(quad.clone());
}
#[cfg(feature = "rdf-star")]
if let InternedSubject::Triple(t) = &quad.0 {
Self::add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(quad, t, &mut map);
}
if let InternedTerm::BlankNode(bnode) = &quad.2 {
map.entry(*bnode).or_default().push(quad.clone());
}
#[cfg(feature = "rdf-star")]
if let InternedTerm::Triple(t) = &quad.2 {
Self::add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(quad, t, &mut map);
}
if let InternedGraphName::BlankNode(bnode) = &quad.3 {
map.entry(*bnode).or_default().push(quad.clone());
}
}
map
}
#[cfg(feature = "rdf-star")]
fn add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(
quad: &(
InternedSubject,
InternedNamedNode,
InternedTerm,
InternedGraphName,
),
triple: &InternedTriple,
map: &mut QuadsPerBlankNode,
) {
if let InternedSubject::BlankNode(bnode) = &triple.subject {
map.entry(*bnode).or_default().push(quad.clone());
}
if let InternedSubject::Triple(t) = &triple.subject {
Self::add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(quad, t, map);
}
if let InternedTerm::BlankNode(bnode) = &triple.object {
map.entry(*bnode).or_default().push(quad.clone());
}
if let InternedTerm::Triple(t) = &triple.object {
Self::add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(quad, t, map);
}
}
fn hash_bnodes( fn hash_bnodes(
&self, &self,
mut hashes: HashMap<InternedBlankNode, u64>, mut hashes: HashMap<InternedBlankNode, u64>,
quads_per_blank_node: &QuadsPerBlankNode,
) -> ( ) -> (
HashMap<InternedBlankNode, u64>, HashMap<InternedBlankNode, u64>,
Vec<(u64, Vec<InternedBlankNode>)>, Vec<(u64, Vec<InternedBlankNode>)>,
) { ) {
let mut to_hash = Vec::new(); let mut to_hash = Vec::new();
let mut partition: HashMap<u64, Vec<InternedBlankNode>> = HashMap::new(); let mut to_do = hashes
let mut partition_len = 0; .keys()
loop { .map(|bnode| (*bnode, true))
//TODO: improve termination .collect::<HashMap<_, _>>();
let mut new_hashes = HashMap::new(); let mut partition = HashMap::<_, Vec<_>>::with_capacity(hashes.len());
for (bnode, old_hash) in &hashes { let mut old_partition_count = usize::MAX;
for (_, p, o, g) in while old_partition_count != partition.len() {
self.interned_quads_for_subject(&InternedSubject::BlankNode(*bnode)) old_partition_count = partition.len();
{ partition.clear();
to_hash.push(( let mut new_hashes = hashes.clone();
self.hash_named_node(*p), for bnode in hashes.keys() {
self.hash_term(o, &hashes), let hash = if to_do.contains_key(bnode) {
self.hash_graph_name(g, &hashes), for (s, p, o, g) in &quads_per_blank_node[bnode] {
0, to_hash.push((
)); self.hash_subject(s, *bnode, &hashes),
} self.hash_named_node(*p),
for (s, p, _, g) in self.interned_quads_for_object(&InternedTerm::BlankNode(*bnode)) self.hash_term(o, *bnode, &hashes),
{ self.hash_graph_name(g, *bnode, &hashes),
to_hash.push(( ));
self.hash_subject(s, &hashes), }
self.hash_named_node(*p), to_hash.sort_unstable();
self.hash_graph_name(g, &hashes), let hash = Self::hash_tuple((&to_hash, hashes[bnode]));
1, to_hash.clear();
)); if hash == hashes[bnode] {
} to_do.insert(*bnode, false);
for (s, p, o, _) in } else {
self.interned_quads_for_graph_name(&InternedGraphName::BlankNode(*bnode)) new_hashes.insert(*bnode, hash);
{ }
to_hash.push(( hash
self.hash_subject(s, &hashes), } else {
self.hash_named_node(*p), hashes[bnode]
self.hash_term(o, &hashes), };
2,
));
}
to_hash.sort_unstable();
let hash = Self::hash_tuple((old_hash, &to_hash));
to_hash.clear();
new_hashes.insert(*bnode, hash);
partition.entry(hash).or_default().push(*bnode); partition.entry(hash).or_default().push(*bnode);
} }
if partition.len() == partition_len {
let mut partition: Vec<_> = partition.into_iter().collect();
partition.sort_by(|(h1, b1), (h2, b2)| (b1.len(), h1).cmp(&(b2.len(), h2)));
return (hashes, partition);
}
hashes = new_hashes; hashes = new_hashes;
partition_len = partition.len();
partition.clear();
} }
let mut partition: Vec<_> = partition.into_iter().collect();
partition.sort_unstable_by(|(h1, b1), (h2, b2)| (b1.len(), h1).cmp(&(b2.len(), h2)));
(hashes, partition)
} }
fn hash_named_node(&self, node: InternedNamedNode) -> u64 { fn hash_named_node(&self, node: InternedNamedNode) -> u64 {
Self::hash_tuple(node.decode_from(&self.interner)) Self::hash_tuple(node.decode_from(&self.interner))
} }
fn hash_blank_node(
node: InternedBlankNode,
current_blank_node: InternedBlankNode,
bnodes_hash: &HashMap<InternedBlankNode, u64>,
) -> u64 {
if node == current_blank_node {
u64::MAX
} else {
bnodes_hash[&node]
}
}
fn hash_subject( fn hash_subject(
&self, &self,
node: &InternedSubject, node: &InternedSubject,
current_blank_node: InternedBlankNode,
bnodes_hash: &HashMap<InternedBlankNode, u64>, bnodes_hash: &HashMap<InternedBlankNode, u64>,
) -> u64 { ) -> u64 {
#[cfg(feature = "rdf-star")] match node {
if let InternedSubject::Triple(triple) = node { InternedSubject::NamedNode(node) => Self::hash_tuple(node.decode_from(&self.interner)),
return self.hash_triple(triple, bnodes_hash); InternedSubject::BlankNode(bnode) => {
} Self::hash_blank_node(*bnode, current_blank_node, bnodes_hash)
if let InternedSubject::BlankNode(bnode) = node { }
bnodes_hash[bnode] #[cfg(feature = "rdf-star")]
} else { InternedSubject::Triple(triple) => {
Self::hash_tuple(node.decode_from(&self.interner)) self.hash_triple(triple, current_blank_node, bnodes_hash)
}
} }
} }
fn hash_term(&self, term: &InternedTerm, bnodes_hash: &HashMap<InternedBlankNode, u64>) -> u64 { fn hash_term(
#[cfg(feature = "rdf-star")] &self,
if let InternedTerm::Triple(triple) = term { term: &InternedTerm,
return self.hash_triple(triple, bnodes_hash); current_blank_node: InternedBlankNode,
} bnodes_hash: &HashMap<InternedBlankNode, u64>,
if let InternedTerm::BlankNode(bnode) = term { ) -> u64 {
bnodes_hash[bnode] match term {
} else { InternedTerm::NamedNode(node) => Self::hash_tuple(node.decode_from(&self.interner)),
Self::hash_tuple(term.decode_from(&self.interner)) InternedTerm::BlankNode(bnode) => {
Self::hash_blank_node(*bnode, current_blank_node, bnodes_hash)
}
InternedTerm::Literal(literal) => Self::hash_tuple(literal.decode_from(&self.interner)),
#[cfg(feature = "rdf-star")]
InternedTerm::Triple(triple) => {
self.hash_triple(triple, current_blank_node, bnodes_hash)
}
} }
} }
fn hash_graph_name( fn hash_graph_name(
&self, &self,
graph_name: &InternedGraphName, graph_name: &InternedGraphName,
current_blank_node: InternedBlankNode,
bnodes_hash: &HashMap<InternedBlankNode, u64>, bnodes_hash: &HashMap<InternedBlankNode, u64>,
) -> u64 { ) -> u64 {
if let InternedGraphName::BlankNode(bnode) = graph_name { match graph_name {
bnodes_hash[bnode] InternedGraphName::NamedNode(node) => {
} else { Self::hash_tuple(node.decode_from(&self.interner))
Self::hash_tuple(graph_name.decode_from(&self.interner)) }
InternedGraphName::BlankNode(bnode) => {
Self::hash_blank_node(*bnode, current_blank_node, bnodes_hash)
}
InternedGraphName::DefaultGraph => 0,
} }
} }
@ -680,12 +764,13 @@ impl Dataset {
fn hash_triple( fn hash_triple(
&self, &self,
triple: &InternedTriple, triple: &InternedTriple,
current_blank_node: InternedBlankNode,
bnodes_hash: &HashMap<InternedBlankNode, u64>, bnodes_hash: &HashMap<InternedBlankNode, u64>,
) -> u64 { ) -> u64 {
Self::hash_tuple(( Self::hash_tuple((
self.hash_subject(&triple.subject, bnodes_hash), self.hash_subject(&triple.subject, current_blank_node, bnodes_hash),
self.hash_named_node(triple.predicate), self.hash_named_node(triple.predicate),
self.hash_term(&triple.object, bnodes_hash), self.hash_term(&triple.object, current_blank_node, bnodes_hash),
)) ))
} }
@ -699,33 +784,25 @@ impl Dataset {
&mut self, &mut self,
hash: &HashMap<InternedBlankNode, u64>, hash: &HashMap<InternedBlankNode, u64>,
partition: &[(u64, Vec<InternedBlankNode>)], partition: &[(u64, Vec<InternedBlankNode>)],
quads_per_blank_node: &QuadsPerBlankNode,
) -> Vec<( ) -> Vec<(
InternedSubject, InternedSubject,
InternedNamedNode, InternedNamedNode,
InternedTerm, InternedTerm,
InternedGraphName, InternedGraphName,
)> { )> {
let b_prime = partition.iter().find_map(|(_, b)| (b.len() > 1).then(|| b)); let b_prime = partition.iter().map(|(_, b)| b).find(|b| b.len() > 1);
if let Some(b_prime) = b_prime { if let Some(b_prime) = b_prime {
b_prime b_prime
.iter() .iter()
.map(|b| { .map(|b| {
let mut hash_prime = hash.clone(); let mut hash_prime = hash.clone();
hash_prime.insert(*b, Self::hash_tuple((hash_prime[b], 22))); hash_prime.insert(*b, Self::hash_tuple((hash_prime[b], 22)));
let (hash_prime_prime, partition_prime) = self.hash_bnodes(hash_prime); let (hash_prime_prime, partition_prime) =
self.distinguish(&hash_prime_prime, &partition_prime) self.hash_bnodes(hash_prime, quads_per_blank_node);
}) self.distinguish(&hash_prime_prime, &partition_prime, quads_per_blank_node)
.fold(None, |a, b| {
Some(if let Some(a) = a {
if a <= b {
a
} else {
b
}
} else {
b
})
}) })
.reduce(min)
.unwrap_or_default() .unwrap_or_default()
} else { } else {
self.label(hash) self.label(hash)
@ -747,54 +824,43 @@ impl Dataset {
.into_iter() .into_iter()
.map(|(s, p, o, g)| { .map(|(s, p, o, g)| {
( (
if let InternedSubject::BlankNode(bnode) = s { match s {
InternedSubject::BlankNode(self.map_bnode(bnode, hashes)) InternedSubject::NamedNode(_) => s,
} else { InternedSubject::BlankNode(bnode) => {
#[cfg(feature = "rdf-star")] InternedSubject::BlankNode(self.map_bnode(bnode, hashes))
{
if let InternedSubject::Triple(triple) = s {
InternedSubject::Triple(Box::new(InternedTriple::encoded_into(
self.label_triple(&triple, hashes).as_ref(),
&mut self.interner,
)))
} else {
s
}
} }
#[cfg(not(feature = "rdf-star"))] #[cfg(feature = "rdf-star")]
{ InternedSubject::Triple(triple) => {
s InternedSubject::Triple(Box::new(InternedTriple::encoded_into(
self.label_triple(&triple, hashes).as_ref(),
&mut self.interner,
)))
} }
}, },
p, p,
if let InternedTerm::BlankNode(bnode) = o { match o {
InternedTerm::BlankNode(self.map_bnode(bnode, hashes)) InternedTerm::NamedNode(_) | InternedTerm::Literal(_) => o,
} else { InternedTerm::BlankNode(bnode) => {
#[cfg(feature = "rdf-star")] InternedTerm::BlankNode(self.map_bnode(bnode, hashes))
{
if let InternedTerm::Triple(triple) = o {
InternedTerm::Triple(Box::new(InternedTriple::encoded_into(
self.label_triple(&triple, hashes).as_ref(),
&mut self.interner,
)))
} else {
o
}
} }
#[cfg(not(feature = "rdf-star"))] #[cfg(feature = "rdf-star")]
{ InternedTerm::Triple(triple) => {
o InternedTerm::Triple(Box::new(InternedTriple::encoded_into(
self.label_triple(&triple, hashes).as_ref(),
&mut self.interner,
)))
} }
}, },
if let InternedGraphName::BlankNode(bnode) = g { match g {
InternedGraphName::BlankNode(self.map_bnode(bnode, hashes)) InternedGraphName::NamedNode(_) | InternedGraphName::DefaultGraph => g,
} else { InternedGraphName::BlankNode(bnode) => {
g InternedGraphName::BlankNode(self.map_bnode(bnode, hashes))
}
}, },
) )
}) })
.collect(); .collect();
quads.sort(); quads.sort_unstable();
quads quads
} }
@ -862,7 +928,7 @@ impl<'a> IntoIterator for &'a Dataset {
type Item = QuadRef<'a>; type Item = QuadRef<'a>;
type IntoIter = Iter<'a>; type IntoIter = Iter<'a>;
fn into_iter(self) -> Iter<'a> { fn into_iter(self) -> Self::IntoIter {
self.iter() self.iter()
} }
} }
@ -1220,7 +1286,7 @@ impl<'a> IntoIterator for GraphView<'a> {
type Item = TripleRef<'a>; type Item = TripleRef<'a>;
type IntoIter = GraphViewIter<'a>; type IntoIter = GraphViewIter<'a>;
fn into_iter(self) -> GraphViewIter<'a> { fn into_iter(self) -> Self::IntoIter {
self.iter() self.iter()
} }
} }
@ -1229,7 +1295,7 @@ impl<'a, 'b> IntoIterator for &'b GraphView<'a> {
type Item = TripleRef<'a>; type Item = TripleRef<'a>;
type IntoIter = GraphViewIter<'a>; type IntoIter = GraphViewIter<'a>;
fn into_iter(self) -> GraphViewIter<'a> { fn into_iter(self) -> Self::IntoIter {
self.iter() self.iter()
} }
} }
@ -1431,7 +1497,7 @@ impl<'a> IntoIterator for &'a GraphViewMut<'a> {
type Item = TripleRef<'a>; type Item = TripleRef<'a>;
type IntoIter = GraphViewIter<'a>; type IntoIter = GraphViewIter<'a>;
fn into_iter(self) -> GraphViewIter<'a> { fn into_iter(self) -> Self::IntoIter {
self.iter() self.iter()
} }
} }
@ -1462,7 +1528,7 @@ pub struct Iter<'a> {
impl<'a> Iterator for Iter<'a> { impl<'a> Iterator for Iter<'a> {
type Item = QuadRef<'a>; type Item = QuadRef<'a>;
fn next(&mut self) -> Option<QuadRef<'a>> { fn next(&mut self) -> Option<Self::Item> {
self.inner self.inner
.next() .next()
.map(|(s, p, o, g)| self.dataset.decode_spog((s, p, o, g))) .map(|(s, p, o, g)| self.dataset.decode_spog((s, p, o, g)))
@ -1486,9 +1552,57 @@ pub struct GraphViewIter<'a> {
impl<'a> Iterator for GraphViewIter<'a> { impl<'a> Iterator for GraphViewIter<'a> {
type Item = TripleRef<'a>; type Item = TripleRef<'a>;
fn next(&mut self) -> Option<TripleRef<'a>> { fn next(&mut self) -> Option<Self::Item> {
self.inner self.inner
.next() .next()
.map(|(_, s, p, o)| self.dataset.decode_spo((s, p, o))) .map(|(_, s, p, o)| self.dataset.decode_spo((s, p, o)))
} }
} }
type QuadsPerBlankNode = HashMap<
InternedBlankNode,
Vec<(
InternedSubject,
InternedNamedNode,
InternedTerm,
InternedGraphName,
)>,
>;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_canon() {
let mut dataset = Dataset::new();
dataset.insert(QuadRef::new(
BlankNode::default().as_ref(),
NamedNodeRef::new_unchecked("http://ex"),
BlankNode::default().as_ref(),
GraphNameRef::DefaultGraph,
));
dataset.insert(QuadRef::new(
BlankNode::default().as_ref(),
NamedNodeRef::new_unchecked("http://ex"),
BlankNode::default().as_ref(),
GraphNameRef::DefaultGraph,
));
dataset.canonicalize();
let mut dataset2 = Dataset::new();
dataset2.insert(QuadRef::new(
BlankNode::default().as_ref(),
NamedNodeRef::new_unchecked("http://ex"),
BlankNode::default().as_ref(),
GraphNameRef::DefaultGraph,
));
dataset2.insert(QuadRef::new(
BlankNode::default().as_ref(),
NamedNodeRef::new_unchecked("http://ex"),
BlankNode::default().as_ref(),
GraphNameRef::DefaultGraph,
));
dataset2.canonicalize();
assert_eq!(dataset, dataset2);
}
}

@ -16,7 +16,10 @@
//! assert_eq!(vec![triple], results); //! assert_eq!(vec![triple], results);
//! //!
//! // Print //! // Print
//! assert_eq!(graph.to_string(), "<http://example.com> <http://example.com> <http://example.com> .\n"); //! assert_eq!(
//! graph.to_string(),
//! "<http://example.com> <http://example.com> <http://example.com> .\n"
//! );
//! # Result::<_,Box<dyn std::error::Error>>::Ok(()) //! # Result::<_,Box<dyn std::error::Error>>::Ok(())
//! ``` //! ```
//! //!
@ -29,8 +32,9 @@ use std::fmt;
/// An in-memory [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph). /// An in-memory [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph).
/// ///
/// It can accommodate a fairly large number of triples (in the few millions). /// It can accommodate a fairly large number of triples (in the few millions).
/// Beware: it interns the string and does not do any garbage collection yet: ///
/// if you insert and remove a lot of different terms, memory will grow without any reduction. /// <div class="warning">It interns the string and does not do any garbage collection yet:
/// if you insert and remove a lot of different terms, memory will grow without any reduction.</div>
/// ///
/// Usage example: /// Usage example:
/// ``` /// ```
@ -48,7 +52,7 @@ use std::fmt;
/// assert_eq!(vec![triple], results); /// assert_eq!(vec![triple], results);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) /// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ``` /// ```
#[derive(Debug, Default)] #[derive(Debug, Default, Clone)]
pub struct Graph { pub struct Graph {
dataset: Dataset, dataset: Dataset,
} }
@ -228,7 +232,7 @@ impl<'a> IntoIterator for &'a Graph {
type Item = TripleRef<'a>; type Item = TripleRef<'a>;
type IntoIter = Iter<'a>; type IntoIter = Iter<'a>;
fn into_iter(self) -> Iter<'a> { fn into_iter(self) -> Self::IntoIter {
self.iter() self.iter()
} }
} }
@ -275,7 +279,7 @@ pub struct Iter<'a> {
impl<'a> Iterator for Iter<'a> { impl<'a> Iterator for Iter<'a> {
type Item = TripleRef<'a>; type Item = TripleRef<'a>;
fn next(&mut self) -> Option<TripleRef<'a>> { fn next(&mut self) -> Option<Self::Item> {
self.inner.next() self.inner.next()
} }
} }

@ -4,10 +4,11 @@ use crate::*;
use std::collections::hash_map::{Entry, HashMap, RandomState}; use std::collections::hash_map::{Entry, HashMap, RandomState};
use std::hash::{BuildHasher, Hasher}; use std::hash::{BuildHasher, Hasher};
#[derive(Debug, Default)] #[derive(Debug, Default, Clone)]
pub struct Interner { pub struct Interner {
hasher: RandomState, hasher: RandomState,
string_for_hash: HashMap<u64, String, IdentityHasherBuilder>, string_for_hash: HashMap<u64, String, IdentityHasherBuilder>,
string_for_blank_node_id: HashMap<u128, String>,
#[cfg(feature = "rdf-star")] #[cfg(feature = "rdf-star")]
triples: HashMap<InternedTriple, Triple>, triples: HashMap<InternedTriple, Triple>,
} }
@ -100,7 +101,7 @@ impl InternedNamedNode {
}) })
} }
pub fn decode_from(self, interner: &Interner) -> NamedNodeRef { pub fn decode_from(self, interner: &Interner) -> NamedNodeRef<'_> {
NamedNodeRef::new_unchecked(interner.resolve(self.id)) NamedNodeRef::new_unchecked(interner.resolve(self.id))
} }
@ -120,29 +121,53 @@ impl InternedNamedNode {
} }
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
pub struct InternedBlankNode { pub enum InternedBlankNode {
id: Key, Number { id: u128 },
Other { id: Key },
} }
impl InternedBlankNode { impl InternedBlankNode {
pub fn encoded_into(blank_node: BlankNodeRef<'_>, interner: &mut Interner) -> Self { pub fn encoded_into(blank_node: BlankNodeRef<'_>, interner: &mut Interner) -> Self {
Self { if let Some(id) = blank_node.unique_id() {
id: interner.get_or_intern(blank_node.as_str()), interner
.string_for_blank_node_id
.entry(id)
.or_insert_with(|| blank_node.as_str().into());
Self::Number { id }
} else {
Self::Other {
id: interner.get_or_intern(blank_node.as_str()),
}
} }
} }
pub fn encoded_from(blank_node: BlankNodeRef<'_>, interner: &Interner) -> Option<Self> { pub fn encoded_from(blank_node: BlankNodeRef<'_>, interner: &Interner) -> Option<Self> {
Some(Self { if let Some(id) = blank_node.unique_id() {
id: interner.get(blank_node.as_str())?, interner
}) .string_for_blank_node_id
.contains_key(&id)
.then_some(Self::Number { id })
} else {
Some(Self::Other {
id: interner.get(blank_node.as_str())?,
})
}
} }
pub fn decode_from(self, interner: &Interner) -> BlankNodeRef { pub fn decode_from(self, interner: &Interner) -> BlankNodeRef<'_> {
BlankNodeRef::new_unchecked(interner.resolve(self.id)) BlankNodeRef::new_unchecked(match self {
Self::Number { id } => &interner.string_for_blank_node_id[&id],
Self::Other { id } => interner.resolve(id),
})
} }
pub fn next(self) -> Self { pub fn next(self) -> Self {
Self { id: self.id.next() } match self {
Self::Number { id } => Self::Number {
id: id.saturating_add(1),
},
Self::Other { id } => Self::Other { id: id.next() },
}
} }
} }
@ -467,7 +492,7 @@ impl InternedTriple {
interner interner
.triples .triples
.contains_key(&interned_triple) .contains_key(&interned_triple)
.then(|| interned_triple) .then_some(interned_triple)
} }
pub fn next(&self) -> Self { pub fn next(&self) -> Self {
@ -479,14 +504,14 @@ impl InternedTriple {
} }
} }
#[derive(Default)] #[derive(Default, Clone)]
struct IdentityHasherBuilder {} struct IdentityHasherBuilder;
impl BuildHasher for IdentityHasherBuilder { impl BuildHasher for IdentityHasherBuilder {
type Hasher = IdentityHasher; type Hasher = IdentityHasher;
fn build_hasher(&self) -> IdentityHasher { fn build_hasher(&self) -> Self::Hasher {
IdentityHasher::default() Self::Hasher::default()
} }
} }

@ -1,6 +1,5 @@
use crate::named_node::NamedNode; use crate::named_node::NamedNode;
use crate::vocab::rdf; use crate::vocab::{rdf, xsd};
use crate::vocab::xsd;
use crate::NamedNodeRef; use crate::NamedNodeRef;
use oxilangtag::{LanguageTag, LanguageTagParseError}; use oxilangtag::{LanguageTag, LanguageTagParseError};
#[cfg(feature = "oxsdatatypes")] #[cfg(feature = "oxsdatatypes")]
@ -15,8 +14,8 @@ use std::option::Option;
/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation: /// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation:
/// ``` /// ```
/// # use oxilangtag::LanguageTagParseError; /// # use oxilangtag::LanguageTagParseError;
/// use oxrdf::Literal;
/// use oxrdf::vocab::xsd; /// use oxrdf::vocab::xsd;
/// use oxrdf::Literal;
/// ///
/// assert_eq!( /// assert_eq!(
/// "\"foo\\nbar\"", /// "\"foo\\nbar\"",
@ -24,12 +23,12 @@ use std::option::Option;
/// ); /// );
/// ///
/// assert_eq!( /// assert_eq!(
/// "\"1999-01-01\"^^<http://www.w3.org/2001/XMLSchema#date>", /// r#""1999-01-01"^^<http://www.w3.org/2001/XMLSchema#date>"#,
/// Literal::new_typed_literal("1999-01-01", xsd::DATE).to_string() /// Literal::new_typed_literal("1999-01-01", xsd::DATE).to_string()
/// ); /// );
/// ///
/// assert_eq!( /// assert_eq!(
/// "\"foo\"@en", /// r#""foo"@en"#,
/// Literal::new_language_tagged_literal("foo", "en")?.to_string() /// Literal::new_language_tagged_literal("foo", "en")?.to_string()
/// ); /// );
/// # Result::<(), LanguageTagParseError>::Ok(()) /// # Result::<(), LanguageTagParseError>::Ok(())
@ -427,8 +426,8 @@ impl From<DayTimeDuration> for Literal {
/// ///
/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation: /// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation:
/// ``` /// ```
/// use oxrdf::LiteralRef;
/// use oxrdf::vocab::xsd; /// use oxrdf::vocab::xsd;
/// use oxrdf::LiteralRef;
/// ///
/// assert_eq!( /// assert_eq!(
/// "\"foo\\nbar\"", /// "\"foo\\nbar\"",
@ -436,7 +435,7 @@ impl From<DayTimeDuration> for Literal {
/// ); /// );
/// ///
/// assert_eq!( /// assert_eq!(
/// "\"1999-01-01\"^^<http://www.w3.org/2001/XMLSchema#date>", /// r#""1999-01-01"^^<http://www.w3.org/2001/XMLSchema#date>"#,
/// LiteralRef::new_typed_literal("1999-01-01", xsd::DATE).to_string() /// LiteralRef::new_typed_literal("1999-01-01", xsd::DATE).to_string()
/// ); /// );
/// ``` /// ```
@ -459,7 +458,7 @@ enum LiteralRefContent<'a> {
impl<'a> LiteralRef<'a> { impl<'a> LiteralRef<'a> {
/// Builds an RDF [simple literal](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal). /// Builds an RDF [simple literal](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal).
#[inline] #[inline]
pub fn new_simple_literal(value: &'a str) -> Self { pub const fn new_simple_literal(value: &'a str) -> Self {
LiteralRef(LiteralRefContent::String(value)) LiteralRef(LiteralRefContent::String(value))
} }
@ -482,13 +481,13 @@ impl<'a> LiteralRef<'a> {
/// ///
/// [`Literal::new_language_tagged_literal()`] is a safe version of this constructor and should be used for untrusted data. /// [`Literal::new_language_tagged_literal()`] is a safe version of this constructor and should be used for untrusted data.
#[inline] #[inline]
pub fn new_language_tagged_literal_unchecked(value: &'a str, language: &'a str) -> Self { pub const fn new_language_tagged_literal_unchecked(value: &'a str, language: &'a str) -> Self {
LiteralRef(LiteralRefContent::LanguageTaggedString { value, language }) LiteralRef(LiteralRefContent::LanguageTaggedString { value, language })
} }
/// The literal [lexical form](https://www.w3.org/TR/rdf11-concepts/#dfn-lexical-form) /// The literal [lexical form](https://www.w3.org/TR/rdf11-concepts/#dfn-lexical-form)
#[inline] #[inline]
pub fn value(self) -> &'a str { pub const fn value(self) -> &'a str {
match self.0 { match self.0 {
LiteralRefContent::String(value) LiteralRefContent::String(value)
| LiteralRefContent::LanguageTaggedString { value, .. } | LiteralRefContent::LanguageTaggedString { value, .. }
@ -501,7 +500,7 @@ impl<'a> LiteralRef<'a> {
/// Language tags are defined by the [BCP47](https://tools.ietf.org/html/bcp47). /// Language tags are defined by the [BCP47](https://tools.ietf.org/html/bcp47).
/// They are normalized to lowercase by this implementation. /// They are normalized to lowercase by this implementation.
#[inline] #[inline]
pub fn language(self) -> Option<&'a str> { pub const fn language(self) -> Option<&'a str> {
match self.0 { match self.0 {
LiteralRefContent::LanguageTaggedString { language, .. } => Some(language), LiteralRefContent::LanguageTaggedString { language, .. } => Some(language),
_ => None, _ => None,
@ -513,7 +512,7 @@ impl<'a> LiteralRef<'a> {
/// The datatype of [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) is always [rdf:langString](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string). /// The datatype of [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) is always [rdf:langString](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string).
/// The datatype of [simple literals](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal) is [xsd:string](https://www.w3.org/TR/xmlschema11-2/#string). /// The datatype of [simple literals](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal) is [xsd:string](https://www.w3.org/TR/xmlschema11-2/#string).
#[inline] #[inline]
pub fn datatype(self) -> NamedNodeRef<'a> { pub const fn datatype(self) -> NamedNodeRef<'a> {
match self.0 { match self.0 {
LiteralRefContent::String(_) => xsd::STRING, LiteralRefContent::String(_) => xsd::STRING,
LiteralRefContent::LanguageTaggedString { .. } => rdf::LANG_STRING, LiteralRefContent::LanguageTaggedString { .. } => rdf::LANG_STRING,
@ -526,7 +525,7 @@ impl<'a> LiteralRef<'a> {
/// It returns true if the literal is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) /// It returns true if the literal is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string)
/// or has the datatype [xsd:string](https://www.w3.org/TR/xmlschema11-2/#string). /// or has the datatype [xsd:string](https://www.w3.org/TR/xmlschema11-2/#string).
#[inline] #[inline]
pub fn is_plain(self) -> bool { pub const fn is_plain(self) -> bool {
matches!( matches!(
self.0, self.0,
LiteralRefContent::String(_) | LiteralRefContent::LanguageTaggedString { .. } LiteralRefContent::String(_) | LiteralRefContent::LanguageTaggedString { .. }
@ -552,7 +551,7 @@ impl<'a> LiteralRef<'a> {
/// Extract components from this literal /// Extract components from this literal
#[inline] #[inline]
pub fn destruct(self) -> (&'a str, Option<NamedNodeRef<'a>>, Option<&'a str>) { pub const fn destruct(self) -> (&'a str, Option<NamedNodeRef<'a>>, Option<&'a str>) {
match self.0 { match self.0 {
LiteralRefContent::String(s) => (s, None, None), LiteralRefContent::String(s) => (s, None, None),
LiteralRefContent::LanguageTaggedString { value, language } => { LiteralRefContent::LanguageTaggedString { value, language } => {
@ -620,11 +619,15 @@ pub fn print_quoted_str(string: &str, f: &mut impl Write) -> fmt::Result {
f.write_char('"')?; f.write_char('"')?;
for c in string.chars() { for c in string.chars() {
match c { match c {
'\u{08}' => f.write_str("\\b"),
'\t' => f.write_str("\\t"),
'\n' => f.write_str("\\n"), '\n' => f.write_str("\\n"),
'\u{0C}' => f.write_str("\\f"),
'\r' => f.write_str("\\r"), '\r' => f.write_str("\\r"),
'"' => f.write_str("\\\""), '"' => f.write_str("\\\""),
'\\' => f.write_str("\\\\"), '\\' => f.write_str("\\\\"),
c => f.write_char(c), '\0'..='\u{1F}' | '\u{7F}' => write!(f, "\\u{:04X}", u32::from(c)),
_ => f.write_char(c),
}?; }?;
} }
f.write_char('"') f.write_char('"')
@ -632,6 +635,8 @@ pub fn print_quoted_str(string: &str, f: &mut impl Write) -> fmt::Result {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
#![allow(clippy::panic_in_result_fn)]
use super::*; use super::*;
#[test] #[test]

@ -216,3 +216,21 @@ impl PartialOrd<NamedNodeRef<'_>> for NamedNode {
self.as_ref().partial_cmp(other) self.as_ref().partial_cmp(other)
} }
} }
impl From<Iri<String>> for NamedNode {
#[inline]
fn from(iri: Iri<String>) -> Self {
Self {
iri: iri.into_inner(),
}
}
}
impl<'a> From<Iri<&'a str>> for NamedNodeRef<'a> {
#[inline]
fn from(iri: Iri<&'a str>) -> Self {
Self {
iri: iri.into_inner(),
}
}
}

@ -6,8 +6,6 @@ use crate::{
#[cfg(feature = "rdf-star")] #[cfg(feature = "rdf-star")]
use crate::{Subject, Triple}; use crate::{Subject, Triple};
use std::char; use std::char;
use std::error::Error;
use std::fmt;
use std::str::{Chars, FromStr}; use std::str::{Chars, FromStr};
/// This limit is set in order to avoid stack overflow error when parsing nested triples due to too many recursive calls. /// This limit is set in order to avoid stack overflow error when parsing nested triples due to too many recursive calls.
@ -23,12 +21,15 @@ impl FromStr for NamedNode {
/// use oxrdf::NamedNode; /// use oxrdf::NamedNode;
/// use std::str::FromStr; /// use std::str::FromStr;
/// ///
/// assert_eq!(NamedNode::from_str("<http://example.com>").unwrap(), NamedNode::new("http://example.com").unwrap()) /// assert_eq!(
/// NamedNode::from_str("<http://example.com>").unwrap(),
/// NamedNode::new("http://example.com").unwrap()
/// )
/// ``` /// ```
fn from_str(s: &str) -> Result<Self, TermParseError> { fn from_str(s: &str) -> Result<Self, Self::Err> {
let (term, left) = read_named_node(s)?; let (term, left) = read_named_node(s)?;
if !left.is_empty() { if !left.is_empty() {
return Err(TermParseError::msg( return Err(Self::Err::msg(
"Named node serialization should end with a >", "Named node serialization should end with a >",
)); ));
} }
@ -45,12 +46,15 @@ impl FromStr for BlankNode {
/// use oxrdf::BlankNode; /// use oxrdf::BlankNode;
/// use std::str::FromStr; /// use std::str::FromStr;
/// ///
/// assert_eq!(BlankNode::from_str("_:ex").unwrap(), BlankNode::new("ex").unwrap()) /// assert_eq!(
/// BlankNode::from_str("_:ex").unwrap(),
/// BlankNode::new("ex").unwrap()
/// )
/// ``` /// ```
fn from_str(s: &str) -> Result<Self, TermParseError> { fn from_str(s: &str) -> Result<Self, Self::Err> {
let (term, left) = read_blank_node(s)?; let (term, left) = read_blank_node(s)?;
if !left.is_empty() { if !left.is_empty() {
return Err(TermParseError::msg( return Err(Self::Err::msg(
"Blank node serialization should not contain whitespaces", "Blank node serialization should not contain whitespaces",
)); ));
} }
@ -64,21 +68,46 @@ impl FromStr for Literal {
/// Parses a literal from its NTriples or Turtle serialization /// Parses a literal from its NTriples or Turtle serialization
/// ///
/// ``` /// ```
/// use oxrdf::{Literal, NamedNode, vocab::xsd}; /// use oxrdf::vocab::xsd;
/// use oxrdf::{Literal, NamedNode};
/// use std::str::FromStr; /// use std::str::FromStr;
/// ///
/// assert_eq!(Literal::from_str("\"ex\\n\"").unwrap(), Literal::new_simple_literal("ex\n")); /// assert_eq!(
/// assert_eq!(Literal::from_str("\"ex\"@en").unwrap(), Literal::new_language_tagged_literal("ex", "en").unwrap()); /// Literal::from_str("\"ex\\n\"").unwrap(),
/// assert_eq!(Literal::from_str("\"2020\"^^<http://www.w3.org/2001/XMLSchema#gYear>").unwrap(), Literal::new_typed_literal("2020", NamedNode::new("http://www.w3.org/2001/XMLSchema#gYear").unwrap())); /// Literal::new_simple_literal("ex\n")
/// assert_eq!(Literal::from_str("true").unwrap(), Literal::new_typed_literal("true", xsd::BOOLEAN)); /// );
/// assert_eq!(Literal::from_str("+122").unwrap(), Literal::new_typed_literal("+122", xsd::INTEGER)); /// assert_eq!(
/// assert_eq!(Literal::from_str("-122.23").unwrap(), Literal::new_typed_literal("-122.23", xsd::DECIMAL)); /// Literal::from_str("\"ex\"@en").unwrap(),
/// assert_eq!(Literal::from_str("-122e+1").unwrap(), Literal::new_typed_literal("-122e+1", xsd::DOUBLE)); /// Literal::new_language_tagged_literal("ex", "en").unwrap()
/// );
/// assert_eq!(
/// Literal::from_str("\"2020\"^^<http://www.w3.org/2001/XMLSchema#gYear>").unwrap(),
/// Literal::new_typed_literal(
/// "2020",
/// NamedNode::new("http://www.w3.org/2001/XMLSchema#gYear").unwrap()
/// )
/// );
/// assert_eq!(
/// Literal::from_str("true").unwrap(),
/// Literal::new_typed_literal("true", xsd::BOOLEAN)
/// );
/// assert_eq!(
/// Literal::from_str("+122").unwrap(),
/// Literal::new_typed_literal("+122", xsd::INTEGER)
/// );
/// assert_eq!(
/// Literal::from_str("-122.23").unwrap(),
/// Literal::new_typed_literal("-122.23", xsd::DECIMAL)
/// );
/// assert_eq!(
/// Literal::from_str("-122e+1").unwrap(),
/// Literal::new_typed_literal("-122e+1", xsd::DOUBLE)
/// );
/// ``` /// ```
fn from_str(s: &str) -> Result<Self, TermParseError> { fn from_str(s: &str) -> Result<Self, Self::Err> {
let (term, left) = read_literal(s)?; let (term, left) = read_literal(s)?;
if !left.is_empty() { if !left.is_empty() {
return Err(TermParseError::msg("Invalid literal serialization")); return Err(Self::Err::msg("Invalid literal serialization"));
} }
Ok(term) Ok(term)
} }
@ -93,17 +122,24 @@ impl FromStr for Term {
/// use oxrdf::*; /// use oxrdf::*;
/// use std::str::FromStr; /// use std::str::FromStr;
/// ///
/// assert_eq!(Term::from_str("\"ex\"").unwrap(), Literal::new_simple_literal("ex").into()); /// assert_eq!(
/// assert_eq!(Term::from_str("<< _:s <http://example.com/p> \"o\" >>").unwrap(), Triple::new( /// Term::from_str("\"ex\"").unwrap(),
/// BlankNode::new("s").unwrap(), /// Literal::new_simple_literal("ex").into()
/// NamedNode::new("http://example.com/p").unwrap(), /// );
/// Literal::new_simple_literal("o") /// assert_eq!(
/// ).into()); /// Term::from_str("<< _:s <http://example.com/p> \"o\" >>").unwrap(),
/// Triple::new(
/// BlankNode::new("s").unwrap(),
/// NamedNode::new("http://example.com/p").unwrap(),
/// Literal::new_simple_literal("o")
/// )
/// .into()
/// );
/// ``` /// ```
fn from_str(s: &str) -> Result<Self, TermParseError> { fn from_str(s: &str) -> Result<Self, Self::Err> {
let (term, left) = read_term(s, 0)?; let (term, left) = read_term(s, 0)?;
if !left.is_empty() { if !left.is_empty() {
return Err(TermParseError::msg("Invalid term serialization")); return Err(Self::Err::msg("Invalid term serialization"));
} }
Ok(term) Ok(term)
} }
@ -118,19 +154,22 @@ impl FromStr for Variable {
/// use oxrdf::Variable; /// use oxrdf::Variable;
/// use std::str::FromStr; /// use std::str::FromStr;
/// ///
/// assert_eq!(Variable::from_str("$foo").unwrap(), Variable::new("foo").unwrap()) /// assert_eq!(
/// Variable::from_str("$foo").unwrap(),
/// Variable::new("foo").unwrap()
/// )
/// ``` /// ```
fn from_str(s: &str) -> Result<Self, TermParseError> { fn from_str(s: &str) -> Result<Self, Self::Err> {
if !s.starts_with('?') && !s.starts_with('$') { if !s.starts_with('?') && !s.starts_with('$') {
return Err(TermParseError::msg( return Err(Self::Err::msg(
"Variable serialization should start with ? or $", "Variable serialization should start with ? or $",
)); ));
} }
Self::new(&s[1..]).map_err(|error| TermParseError { Self::new(&s[1..]).map_err(|error| {
kind: TermParseErrorKind::Variable { TermParseError(TermParseErrorKind::Variable {
value: s.to_owned(), value: s.to_owned(),
error, error,
}, })
}) })
} }
} }
@ -143,11 +182,11 @@ fn read_named_node(s: &str) -> Result<(NamedNode, &str), TermParseError> {
.ok_or_else(|| TermParseError::msg("Named node serialization should end with a >"))?; .ok_or_else(|| TermParseError::msg("Named node serialization should end with a >"))?;
let (value, remain) = remain.split_at(end); let (value, remain) = remain.split_at(end);
let remain = &remain[1..]; let remain = &remain[1..];
let term = NamedNode::new(value).map_err(|error| TermParseError { let term = NamedNode::new(value).map_err(|error| {
kind: TermParseErrorKind::Iri { TermParseError(TermParseErrorKind::Iri {
value: value.to_owned(), value: value.to_owned(),
error, error,
}, })
})?; })?;
Ok((term, remain)) Ok((term, remain))
} else { } else {
@ -167,11 +206,11 @@ fn read_blank_node(s: &str) -> Result<(BlankNode, &str), TermParseError> {
}) })
.unwrap_or(remain.len()); .unwrap_or(remain.len());
let (value, remain) = remain.split_at(end); let (value, remain) = remain.split_at(end);
let term = BlankNode::new(value).map_err(|error| TermParseError { let term = BlankNode::new(value).map_err(|error| {
kind: TermParseErrorKind::BlankNode { TermParseError(TermParseErrorKind::BlankNode {
value: value.to_owned(), value: value.to_owned(),
error, error,
}, })
})?; })?;
Ok((term, remain)) Ok((term, remain))
} else { } else {
@ -197,11 +236,11 @@ fn read_literal(s: &str) -> Result<(Literal, &str), TermParseError> {
let (language, remain) = remain.split_at(end); let (language, remain) = remain.split_at(end);
Ok(( Ok((
Literal::new_language_tagged_literal(value, language).map_err( Literal::new_language_tagged_literal(value, language).map_err(
|error| TermParseError { |error| {
kind: TermParseErrorKind::LanguageTag { TermParseError(TermParseErrorKind::LanguageTag {
value: language.to_owned(), value: language.to_owned(),
error, error,
}, })
}, },
)?, )?,
remain, remain,
@ -217,10 +256,10 @@ fn read_literal(s: &str) -> Result<(Literal, &str), TermParseError> {
if let Some(c) = chars.next() { if let Some(c) = chars.next() {
value.push(match c { value.push(match c {
't' => '\t', 't' => '\t',
'b' => '\u{8}', 'b' => '\u{08}',
'n' => '\n', 'n' => '\n',
'r' => '\r', 'r' => '\r',
'f' => '\u{C}', 'f' => '\u{0C}',
'"' => '"', '"' => '"',
'\'' => '\'', '\'' => '\'',
'\\' => '\\', '\\' => '\\',
@ -232,7 +271,7 @@ fn read_literal(s: &str) -> Result<(Literal, &str), TermParseError> {
return Err(TermParseError::msg("Unexpected literal end")); return Err(TermParseError::msg("Unexpected literal end"));
} }
} }
c => value.push(c), _ => value.push(c),
} }
} }
Err(TermParseError::msg("Unexpected literal end")) Err(TermParseError::msg("Unexpected literal end"))
@ -381,61 +420,36 @@ fn read_hexa_char(input: &mut Chars<'_>, len: usize) -> Result<char, TermParseEr
} }
/// An error raised during term serialization parsing using the [`FromStr`] trait. /// An error raised during term serialization parsing using the [`FromStr`] trait.
#[derive(Debug)] #[derive(Debug, thiserror::Error)]
pub struct TermParseError { #[error(transparent)]
kind: TermParseErrorKind, pub struct TermParseError(#[from] TermParseErrorKind);
}
#[derive(Debug)] /// An internal error raised during term serialization parsing using the [`FromStr`] trait.
#[derive(Debug, thiserror::Error)]
enum TermParseErrorKind { enum TermParseErrorKind {
Iri { #[error("Error while parsing the named node '{value}': {error}")]
error: IriParseError, Iri { error: IriParseError, value: String },
value: String, #[error("Error while parsing the blank node '{value}': {error}")]
},
BlankNode { BlankNode {
error: BlankNodeIdParseError, error: BlankNodeIdParseError,
value: String, value: String,
}, },
#[error("Error while parsing the language tag '{value}': {error}")]
LanguageTag { LanguageTag {
error: LanguageTagParseError, error: LanguageTagParseError,
value: String, value: String,
}, },
#[error("Error while parsing the variable '{value}': {error}")]
Variable { Variable {
error: VariableNameParseError, error: VariableNameParseError,
value: String, value: String,
}, },
Msg { #[error("{0}")]
msg: &'static str, Msg(&'static str),
},
} }
impl fmt::Display for TermParseError {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match &self.kind {
TermParseErrorKind::Iri { error, value } => {
write!(f, "Error while parsing the named node '{value}': {error}")
}
TermParseErrorKind::BlankNode { error, value } => {
write!(f, "Error while parsing the blank node '{value}': {error}")
}
TermParseErrorKind::LanguageTag { error, value } => {
write!(f, "Error while parsing the language tag '{value}': {error}")
}
TermParseErrorKind::Variable { error, value } => {
write!(f, "Error while parsing the variable '{value}': {error}")
}
TermParseErrorKind::Msg { msg } => f.write_str(msg),
}
}
}
impl Error for TermParseError {}
impl TermParseError { impl TermParseError {
pub(crate) fn msg(msg: &'static str) -> Self { pub(crate) fn msg(msg: &'static str) -> Self {
Self { Self(TermParseErrorKind::Msg(msg))
kind: TermParseErrorKind::Msg { msg },
}
} }
} }

@ -698,7 +698,7 @@ impl<'a> From<TermRef<'a>> for Term {
/// ///
/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation: /// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation:
/// ``` /// ```
/// use oxrdf::{Triple, NamedNode}; /// use oxrdf::{NamedNode, Triple};
/// ///
/// assert_eq!( /// assert_eq!(
/// "<http://example.com/s> <http://example.com/p> <http://example.com/o>", /// "<http://example.com/s> <http://example.com/p> <http://example.com/o>",
@ -706,7 +706,8 @@ impl<'a> From<TermRef<'a>> for Term {
/// subject: NamedNode::new("http://example.com/s")?.into(), /// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?, /// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(), /// object: NamedNode::new("http://example.com/o")?.into(),
/// }.to_string() /// }
/// .to_string()
/// ); /// );
/// # Result::<_,oxrdf::IriParseError>::Ok(()) /// # Result::<_,oxrdf::IriParseError>::Ok(())
/// ``` /// ```
@ -769,7 +770,7 @@ impl fmt::Display for Triple {
/// ///
/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation: /// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation:
/// ``` /// ```
/// use oxrdf::{TripleRef, NamedNodeRef}; /// use oxrdf::{NamedNodeRef, TripleRef};
/// ///
/// assert_eq!( /// assert_eq!(
/// "<http://example.com/s> <http://example.com/p> <http://example.com/o>", /// "<http://example.com/s> <http://example.com/p> <http://example.com/o>",
@ -777,7 +778,8 @@ impl fmt::Display for Triple {
/// subject: NamedNodeRef::new("http://example.com/s")?.into(), /// subject: NamedNodeRef::new("http://example.com/s")?.into(),
/// predicate: NamedNodeRef::new("http://example.com/p")?, /// predicate: NamedNodeRef::new("http://example.com/p")?,
/// object: NamedNodeRef::new("http://example.com/o")?.into(), /// object: NamedNodeRef::new("http://example.com/o")?.into(),
/// }.to_string() /// }
/// .to_string()
/// ); /// );
/// # Result::<_,oxrdf::IriParseError>::Ok(()) /// # Result::<_,oxrdf::IriParseError>::Ok(())
/// ``` /// ```
@ -853,10 +855,11 @@ impl<'a> From<TripleRef<'a>> for Triple {
/// A possible owned graph name. /// A possible owned graph name.
/// It is the union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node), and the [default graph name](https://www.w3.org/TR/rdf11-concepts/#dfn-default-graph). /// It is the union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node), and the [default graph name](https://www.w3.org/TR/rdf11-concepts/#dfn-default-graph).
#[derive(Eq, PartialEq, Debug, Clone, Hash)] #[derive(Eq, PartialEq, Debug, Clone, Hash, Default)]
pub enum GraphName { pub enum GraphName {
NamedNode(NamedNode), NamedNode(NamedNode),
BlankNode(BlankNode), BlankNode(BlankNode),
#[default]
DefaultGraph, DefaultGraph,
} }
@ -940,10 +943,11 @@ impl From<NamedOrBlankNodeRef<'_>> for GraphName {
/// A possible borrowed graph name. /// A possible borrowed graph name.
/// It is the union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node), and the [default graph name](https://www.w3.org/TR/rdf11-concepts/#dfn-default-graph). /// It is the union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node), and the [default graph name](https://www.w3.org/TR/rdf11-concepts/#dfn-default-graph).
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] #[derive(Eq, PartialEq, Debug, Clone, Copy, Hash, Default)]
pub enum GraphNameRef<'a> { pub enum GraphNameRef<'a> {
NamedNode(NamedNodeRef<'a>), NamedNode(NamedNodeRef<'a>),
BlankNode(BlankNodeRef<'a>), BlankNode(BlankNodeRef<'a>),
#[default]
DefaultGraph, DefaultGraph,
} }
@ -979,7 +983,7 @@ impl fmt::Display for GraphNameRef<'_> {
match self { match self {
Self::NamedNode(node) => node.fmt(f), Self::NamedNode(node) => node.fmt(f),
Self::BlankNode(node) => node.fmt(f), Self::BlankNode(node) => node.fmt(f),
Self::DefaultGraph => write!(f, "DEFAULT"), Self::DefaultGraph => f.write_str("DEFAULT"),
} }
} }
} }

@ -1,5 +1,4 @@
use std::cmp::Ordering; use std::cmp::Ordering;
use std::error::Error;
use std::fmt; use std::fmt;
/// A [SPARQL query](https://www.w3.org/TR/sparql11-query/) owned variable. /// A [SPARQL query](https://www.w3.org/TR/sparql11-query/) owned variable.
@ -8,10 +7,7 @@ use std::fmt;
/// ``` /// ```
/// use oxrdf::{Variable, VariableNameParseError}; /// use oxrdf::{Variable, VariableNameParseError};
/// ///
/// assert_eq!( /// assert_eq!("?foo", Variable::new("foo")?.to_string());
/// "?foo",
/// Variable::new("foo")?.to_string()
/// );
/// # Result::<_,VariableNameParseError>::Ok(()) /// # Result::<_,VariableNameParseError>::Ok(())
/// ``` /// ```
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
@ -67,12 +63,9 @@ impl fmt::Display for Variable {
/// ///
/// The default string formatter is returning a SPARQL compatible representation: /// The default string formatter is returning a SPARQL compatible representation:
/// ``` /// ```
/// use oxrdf::{VariableRef, VariableNameParseError}; /// use oxrdf::{VariableNameParseError, VariableRef};
/// ///
/// assert_eq!( /// assert_eq!("?foo", VariableRef::new("foo")?.to_string());
/// "?foo",
/// VariableRef::new("foo")?.to_string()
/// );
/// # Result::<_,VariableNameParseError>::Ok(()) /// # Result::<_,VariableNameParseError>::Ok(())
/// ``` /// ```
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
@ -96,12 +89,12 @@ impl<'a> VariableRef<'a> {
/// ///
/// [`Variable::new()`] is a safe version of this constructor and should be used for untrusted data. /// [`Variable::new()`] is a safe version of this constructor and should be used for untrusted data.
#[inline] #[inline]
pub fn new_unchecked(name: &'a str) -> Self { pub const fn new_unchecked(name: &'a str) -> Self {
Self { name } Self { name }
} }
#[inline] #[inline]
pub fn as_str(&self) -> &str { pub const fn as_str(self) -> &'a str {
self.name self.name
} }
@ -169,7 +162,7 @@ impl PartialOrd<VariableRef<'_>> for Variable {
fn validate_variable_identifier(id: &str) -> Result<(), VariableNameParseError> { fn validate_variable_identifier(id: &str) -> Result<(), VariableNameParseError> {
let mut chars = id.chars(); let mut chars = id.chars();
let front = chars.next().ok_or(VariableNameParseError {})?; let front = chars.next().ok_or(VariableNameParseError)?;
match front { match front {
'0'..='9' '0'..='9'
| '_' | '_'
@ -188,13 +181,13 @@ fn validate_variable_identifier(id: &str) -> Result<(), VariableNameParseError>
| '\u{F900}'..='\u{FDCF}' | '\u{F900}'..='\u{FDCF}'
| '\u{FDF0}'..='\u{FFFD}' | '\u{FDF0}'..='\u{FFFD}'
| '\u{10000}'..='\u{EFFFF}' => (), | '\u{10000}'..='\u{EFFFF}' => (),
_ => return Err(VariableNameParseError {}), _ => return Err(VariableNameParseError),
} }
for c in chars { for c in chars {
match c { match c {
'0'..='9' '0'..='9'
| '\u{00B7}' | '\u{00B7}'
| '\u{00300}'..='\u{036F}' | '\u{0300}'..='\u{036F}'
| '\u{203F}'..='\u{2040}' | '\u{203F}'..='\u{2040}'
| '_' | '_'
| 'A'..='Z' | 'A'..='Z'
@ -211,21 +204,13 @@ fn validate_variable_identifier(id: &str) -> Result<(), VariableNameParseError>
| '\u{F900}'..='\u{FDCF}' | '\u{F900}'..='\u{FDCF}'
| '\u{FDF0}'..='\u{FFFD}' | '\u{FDF0}'..='\u{FFFD}'
| '\u{10000}'..='\u{EFFFF}' => (), | '\u{10000}'..='\u{EFFFF}' => (),
_ => return Err(VariableNameParseError {}), _ => return Err(VariableNameParseError),
} }
} }
Ok(()) Ok(())
} }
/// An error raised during [`Variable`] name validation. /// An error raised during [`Variable`] name validation.
#[derive(Debug)] #[derive(Debug, thiserror::Error)]
pub struct VariableNameParseError {} #[error("The variable name is invalid")]
pub struct VariableNameParseError;
impl fmt::Display for VariableNameParseError {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "The variable name is invalid")
}
}
impl Error for VariableNameParseError {}

@ -231,3 +231,12 @@ pub mod xsd {
pub const YEAR_MONTH_DURATION: NamedNodeRef<'_> = pub const YEAR_MONTH_DURATION: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#yearMonthDuration"); NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#yearMonthDuration");
} }
pub mod geosparql {
//! [GeoSpatial](https://opengeospatial.github.io/ogc-geosparql/) vocabulary.
use crate::named_node::NamedNodeRef;
/// Geospatial datatype like `"Point({longitude} {latitude})"^^geo:wktLiteral`
pub const WKT_LITERAL: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.opengis.net/ont/geosparql#wktLiteral");
}

@ -0,0 +1,36 @@
[package]
name = "oxrdfio"
version = "0.1.0-alpha.3-dev"
authors.workspace = true
license.workspace = true
readme = "README.md"
keywords = ["RDF"]
repository = "https://github.com/oxigraph/oxigraph/tree/master/lib/oxrdfxml"
documentation = "https://docs.rs/oxrdfio"
description = """
Parser and serializer for various RDF formats
"""
edition.workspace = true
rust-version.workspace = true
[features]
default = []
async-tokio = ["dep:tokio", "oxrdfxml/async-tokio", "oxttl/async-tokio"]
rdf-star = ["oxrdf/rdf-star", "oxttl/rdf-star"]
[dependencies]
oxrdf.workspace = true
oxrdfxml.workspace = true
oxttl.workspace = true
thiserror.workspace = true
tokio = { workspace = true, optional = true, features = ["io-util"] }
[dev-dependencies]
tokio = { workspace = true, features = ["rt", "macros"] }
[lints]
workspace = true
[package.metadata.docs.rs]
all-features = true
rustdoc-args = ["--cfg", "docsrs"]

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save