Merge branch 'main' of github.com:oxigraph/oxigraph

pull/825/head^2
Peter Heringer 2 years ago
commit b7519dbe22
  1. 137
      .cargo/config.toml
  2. 6
      .clusterfuzzlite/build.sh
  3. 21
      .devcontainer/Dockerfile
  4. 69
      .devcontainer/devcontainer.json
  5. 27
      .github/actions/setup-rust/action.yml
  6. 11
      .github/workflows/install_rocksdb.sh
  7. 169
      .github/workflows/tests.yml
  8. 7
      .gitmodules
  9. 2
      .readthedocs.yaml
  10. 96
      CHANGELOG.md
  11. 1357
      Cargo.lock
  12. 260
      Cargo.toml
  13. 17
      README.md
  14. 8
      bench/bsbm_oxigraph.sh
  15. 47
      cli/Cargo.toml
  16. 16
      cli/Dockerfile
  17. 54
      cli/README.md
  18. 0
      cli/logo.svg
  19. 1018
      cli/src/main.rs
  20. 0
      cli/templates/query.html
  21. 2
      clippy.toml
  22. 120
      docs/arch-diagram.svg
  23. 35
      docs/arch-diagram.txt
  24. 27
      fuzz/Cargo.toml
  25. 28
      fuzz/fuzz_targets/n3.rs
  26. 84
      fuzz/fuzz_targets/nquads.rs
  27. 37
      fuzz/fuzz_targets/rdf_xml.rs
  28. 25
      fuzz/fuzz_targets/sparql_eval.rs
  29. 2
      fuzz/fuzz_targets/sparql_query.rs
  30. 2
      fuzz/fuzz_targets/sparql_results_json.rs
  31. 2
      fuzz/fuzz_targets/sparql_results_tsv.rs
  32. 2
      fuzz/fuzz_targets/sparql_results_xml.rs
  33. 2
      fuzz/fuzz_targets/sparql_update.rs
  34. 167
      fuzz/fuzz_targets/trig.rs
  35. 24
      fuzz/src/result_format.rs
  36. 26
      js/Cargo.toml
  37. 34
      js/README.md
  38. 3
      js/biome.json
  39. 10
      js/build_package.js
  40. 1079
      js/package-lock.json
  41. 10
      js/package.json
  42. 2
      js/src/lib.rs
  43. 32
      js/src/model.rs
  44. 88
      js/src/store.rs
  45. 18
      js/test/model.mjs
  46. 70
      js/test/store.mjs
  47. 65
      lib/Cargo.toml
  48. 85
      lib/README.md
  49. 72
      lib/oxigraph/Cargo.toml
  50. 77
      lib/oxigraph/README.md
  51. 152
      lib/oxigraph/benches/store.rs
  52. 91
      lib/oxigraph/src/io/format.rs
  53. 39
      lib/oxigraph/src/io/mod.rs
  54. 200
      lib/oxigraph/src/io/read.rs
  55. 185
      lib/oxigraph/src/io/write.rs
  56. 12
      lib/oxigraph/src/lib.rs
  57. 20
      lib/oxigraph/src/model.rs
  58. 117
      lib/oxigraph/src/sparql/algebra.rs
  59. 4
      lib/oxigraph/src/sparql/dataset.rs
  60. 84
      lib/oxigraph/src/sparql/error.rs
  61. 2789
      lib/oxigraph/src/sparql/eval.rs
  62. 14
      lib/oxigraph/src/sparql/http/dummy.rs
  63. 9
      lib/oxigraph/src/sparql/http/mod.rs
  64. 17
      lib/oxigraph/src/sparql/http/simple.rs
  65. 173
      lib/oxigraph/src/sparql/mod.rs
  66. 373
      lib/oxigraph/src/sparql/model.rs
  67. 44
      lib/oxigraph/src/sparql/results.rs
  68. 62
      lib/oxigraph/src/sparql/service.rs
  69. 80
      lib/oxigraph/src/sparql/update.rs
  70. 33
      lib/oxigraph/src/storage/backend/fallback.rs
  71. 4
      lib/oxigraph/src/storage/backend/mod.rs
  72. 152
      lib/oxigraph/src/storage/backend/rocksdb.rs
  73. 35
      lib/oxigraph/src/storage/binary_encoder.rs
  74. 139
      lib/oxigraph/src/storage/error.rs
  75. 109
      lib/oxigraph/src/storage/mod.rs
  76. 55
      lib/oxigraph/src/storage/numeric_encoder.rs
  77. 51
      lib/oxigraph/src/storage/small_string.rs
  78. 6
      lib/oxigraph/src/storage/storage_generator.rs
  79. 0
      lib/oxigraph/src/storage/vg_vocab.rs
  80. 744
      lib/oxigraph/src/store.rs
  81. 0
      lib/oxigraph/tests/rocksdb_bc_data/000003.log
  82. 0
      lib/oxigraph/tests/rocksdb_bc_data/CURRENT
  83. 0
      lib/oxigraph/tests/rocksdb_bc_data/IDENTITY
  84. 0
      lib/oxigraph/tests/rocksdb_bc_data/LOCK
  85. 0
      lib/oxigraph/tests/rocksdb_bc_data/MANIFEST-000004
  86. 0
      lib/oxigraph/tests/rocksdb_bc_data/OPTIONS-000026
  87. 93
      lib/oxigraph/tests/store.rs
  88. 24
      lib/oxrdf/Cargo.toml
  89. 4
      lib/oxrdf/README.md
  90. 70
      lib/oxrdf/src/blank_node.rs
  91. 342
      lib/oxrdf/src/dataset.rs
  92. 16
      lib/oxrdf/src/graph.rs
  93. 53
      lib/oxrdf/src/interning.rs
  94. 35
      lib/oxrdf/src/literal.rs
  95. 18
      lib/oxrdf/src/named_node.rs
  96. 172
      lib/oxrdf/src/parser.rs
  97. 18
      lib/oxrdf/src/triple.rs
  98. 39
      lib/oxrdf/src/variable.rs
  99. 9
      lib/oxrdf/src/vocab.rs
  100. 36
      lib/oxrdfio/Cargo.toml
  101. Some files were not shown because too many files have changed in this diff Show More

@ -1,137 +0,0 @@
[build]
rustflags = [
"-Wtrivial-casts",
"-Wtrivial-numeric-casts",
"-Wunsafe-code",
"-Wunused-lifetimes",
"-Wunused-qualifications",
# TODO: 1.63+ "-Wclippy::as-underscore",
# TODO: 1.65+ ""-Wclippy::bool-to-int-with-if",
"-Wclippy::borrow-as-ptr",
"-Wclippy::case-sensitive-file-extension-comparisons",
"-Wclippy::cast-lossless",
"-Wclippy::cast-possible-truncation",
"-Wclippy::cast-possible-wrap",
"-Wclippy::cast-precision-loss",
"-Wclippy::cast-ptr-alignment",
"-Wclippy::cast-sign-loss",
"-Wclippy::checked-conversions",
"-Wclippy::clone-on-ref-ptr",
"-Wclippy::cloned-instead-of-copied",
"-Wclippy::copy-iterator",
"-Wclippy::dbg-macro",
"-Wclippy::decimal-literal-representation",
"-Wclippy::default-trait-access",
"-Wclippy::default-union-representation",
# TODO: 1.61+ "-Wclippy::deref-by-slicing",
# TODO: 1.63+ "-Wclippy::doc-link-with-quotes",
# TODO: 1.62+ "-Wclippy::empty-drop",
"-Wclippy::empty-enum",
# TODO: on major version "-Wclippy::empty-structs-with-brackets",
"-Wclippy::enum-glob-use",
"-Wclippy::exit",
"-Wclippy::expect-used",
"-Wclippy::expl-impl-clone-on-copy",
"-Wclippy::explicit-deref-methods",
"-Wclippy::explicit-into-iter-loop",
"-Wclippy::explicit-iter-loop",
"-Wclippy::filter-map-next",
"-Wclippy::flat-map-option",
"-Wclippy::fn-to-numeric-cast-any",
# TODO: 1.62+ "-Wclippy::format-push-string",
"-Wclippy::from-iter-instead-of-collect",
"-Wclippy::get-unwrap",
"-Wclippy::if-not-else",
"-Wclippy::if-then-some-else-none",
"-Wclippy::implicit-clone",
"-Wclippy::inconsistent-struct-constructor",
"-Wclippy::index-refutable-slice",
"-Wclippy::inefficient-to-string",
"-Wclippy::inline-always",
"-Wclippy::inline-asm-x86-att-syntax",
"-Wclippy::inline-asm-x86-intel-syntax",
"-Wclippy::invalid-upcast-comparisons",
"-Wclippy::items-after-statements",
"-Wclippy::large-digit-groups",
# TODO: 1.68+ "-Wclippy::large-futures",
"-Wclippy::large-stack-arrays",
"-Wclippy::large-types-passed-by-value",
"-Wclippy::let-underscore-must-use",
"-Wclippy::let-unit-value",
"-Wclippy::linkedlist",
"-Wclippy::lossy-float-literal",
"-Wclippy::macro-use-imports",
"-Wclippy::manual-assert",
# TODO: 1.65+ "-Wclippy::manual-instant-elapsed",
# TODO: 1.67+ "-Wclippy::manual-let-else",
"-Wclippy::manual-ok-or",
# TODO: 1.65+ "-Wclippy::manual-string-new",
"-Wclippy::many-single-char-names",
"-Wclippy::map-unwrap-or",
"-Wclippy::match-bool",
"-Wclippy::match-same-arms",
"-Wclippy::match-wildcard-for-single-variants",
"-Wclippy::maybe-infinite-iter",
"-Wclippy::mem-forget",
# TODO: 1.63+ "-Wclippy::mismatching-type-param-order",
"-Wclippy::multiple-inherent-impl",
"-Wclippy::mut-mut",
"-Wclippy::mutex-atomic",
"-Wclippy::naive-bytecount",
"-Wclippy::needless-bitwise-bool",
"-Wclippy::needless-continue",
"-Wclippy::needless-pass-by-value",
"-Wclippy::no-effect-underscore-binding",
# TODO: 1.69+ "-Wclippy::no-mangle-with-rust-abi",
"-Wclippy::non-ascii-literal",
"-Wclippy::print-stderr",
"-Wclippy::print-stdout",
"-Wclippy::ptr-as-ptr",
"-Wclippy::range-minus-one",
"-Wclippy::range-plus-one",
"-Wclippy::rc-buffer",
"-Wclippy::rc-mutex",
"-Wclippy::redundant-closure-for-method-calls",
"-Wclippy::redundant-else",
"-Wclippy::redundant-feature-names",
"-Wclippy::ref-binding-to-reference",
"-Wclippy::ref-option-ref",
"-Wclippy::rest-pat-in-fully-bound-structs",
"-Wclippy::return-self-not-must-use",
"-Wclippy::same-functions-in-if-condition",
# TODO: strange failure on 1.60 "-Wclippy::same-name-method",
# TODO: 1.68+ "-Wclippy::semicolon-outside-block",
"-Wclippy::single-match-else",
"-Wclippy::stable-sort-primitive",
"-Wclippy::str-to-string",
"-Wclippy::string-add",
"-Wclippy::string-add-assign",
"-Wclippy::string-lit-as-bytes",
"-Wclippy::string-to-string",
# TODO: 1.67+ "-Wclippy::suspicious-xor-used-as-pow",
"-Wclippy::todo",
"-Wclippy::transmute-ptr-to-ptr",
"-Wclippy::trivially-copy-pass-by-ref",
"-Wclippy::try-err",
"-Wclippy::unicode-not-nfc",
"-Wclippy::unimplemented",
# TODO: 1.66+ "-Wclippy::uninlined-format-args",
# TODO: 1.70+ "-Wclippy::unnecessary-box-returns",
# TODO: 1.61+ "-Wclippy::unnecessary-join",
# TODO: 1.67+ "-Wclippy::unnecessary-safety-comment",
# TODO: 1.67+ "-Wclippy::unnecessary-safety-doc",
"-Wclippy::unnecessary-self-imports",
"-Wclippy::unnecessary-wraps",
"-Wclippy::unneeded-field-pattern",
"-Wclippy::unnested-or-patterns",
"-Wclippy::unreadable-literal",
"-Wclippy::unseparated-literal-suffix",
"-Wclippy::unused-async",
"-Wclippy::unused-self",
"-Wclippy::use-debug",
"-Wclippy::used-underscore-binding",
"-Wclippy::verbose-bit-mask",
"-Wclippy::verbose-file-reads",
"-Wclippy::wildcard-dependencies",
"-Wclippy::zero-sized-map-values",
]

@ -15,10 +15,14 @@ function build_seed_corpus() {
cd "$SRC"/oxigraph
cargo fuzz build -O --debug-assertions
for TARGET in sparql_eval sparql_results_json sparql_results_tsv # sparql_results_xml https://github.com/tafia/quick-xml/issues/608
for TARGET in sparql_eval sparql_results_json sparql_results_tsv sparql_results_xml n3 nquads trig rdf_xml
do
cp fuzz/target/x86_64-unknown-linux-gnu/release/$TARGET "$OUT"/
done
build_seed_corpus sparql_results_json srj
build_seed_corpus sparql_results_tsv tsv
build_seed_corpus sparql_results_xml srx
build_seed_corpus n3 n3
build_seed_corpus nquads nq
build_seed_corpus trig trig
build_seed_corpus rdf_xml rdf

@ -1,21 +0,0 @@
# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.241.1/containers/rust/.devcontainer/base.Dockerfile
# [Choice] Debian OS version (use bullseye on local arm64/Apple Silicon): buster, bullseye
ARG VARIANT="bullseye"
FROM mcr.microsoft.com/vscode/devcontainers/rust:0-${VARIANT}
# [Optional] Uncomment this section to install additional packages.
RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
&& apt-get -y install --no-install-recommends \
python3 \
python3-venv \
python-is-python3 \
libclang-dev
ENV VIRTUAL_ENV=/opt/venv
RUN python -m venv $VIRTUAL_ENV
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
RUN pip install --no-cache-dir -r python/requirements.dev.txt
# Change owner to the devcontainer user
RUN chown -R 1000:1000 $VIRTUAL_ENV

@ -1,69 +0,0 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at:
// https://github.com/microsoft/vscode-dev-containers/tree/v0.241.1/containers/rust
{
"name": "Rust",
"build": {
"dockerfile": "Dockerfile",
"args": {
// Use the VARIANT arg to pick a Debian OS version: buster, bullseye
// Use bullseye when on local on arm64/Apple Silicon.
"VARIANT": "bullseye"
}
},
"runArgs": ["--cap-add=SYS_PTRACE", "--security-opt", "seccomp=unconfined"],
// Configure tool-specific properties.
"customizations": {
// Configure properties specific to VS Code.
"vscode": {
// Set *default* container specific settings.json values on container create.
"settings": {
"lldb.executable": "/usr/bin/lldb",
// VS Code don't watch files under ./target
"files.watcherExclude": {
"**/target/**": true
},
"rust-analyzer.checkOnSave.command": "clippy",
"python.defaultInterpreterPath": "/opt/venv/bin/python",
"python.linting.enabled": true,
"python.linting.pylintEnabled": true,
"python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8",
"python.formatting.blackPath": "/usr/local/py-utils/bin/black",
"python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf",
"python.linting.banditPath": "/usr/local/py-utils/bin/bandit",
"python.linting.flake8Path": "/usr/local/py-utils/bin/flake8",
"python.linting.mypyPath": "/usr/local/py-utils/bin/mypy",
"python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle",
"python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle",
"python.linting.pylintPath": "/opt/venv/bin/pylint",
"python.testing.pytestPath": "/opt/venv/bin/pytest"
},
// Add the IDs of extensions you want installed when the container is created.
"extensions": [
"vadimcn.vscode-lldb",
"mutantdino.resourcemonitor",
"rust-lang.rust-analyzer",
"tamasfe.even-better-toml",
"serayuzgur.crates",
"ms-python.python",
"ms-python.vscode-pylance",
"esbenp.prettier-vscode",
"stardog-union.stardog-rdf-grammars"
]
}
},
// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": [],
// Use 'postCreateCommand' to run commands after the container is created.
"postCreateCommand": "git submodule update --init && cargo build",
// Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root.
"remoteUser": "vscode",
"features": {
"python": "3.10"
}
}

@ -0,0 +1,27 @@
name: 'Setup Rust'
description: 'Setup Rust using Rustup'
inputs:
version:
description: 'Rust version to use. By default latest stable version'
required: false
default: 'stable'
component:
description: 'Rust extra component to install like clippy'
required: false
target:
description: 'Rust extra target to install like wasm32-unknown-unknown'
required: false
runs:
using: "composite"
steps:
- run: rustup update
shell: bash
- run: rustup default ${{ inputs.version }}
shell: bash
- run: rustup component add ${{ inputs.component }}
shell: bash
if: ${{ inputs.component }}
- run: rustup target add ${{ inputs.target }}
shell: bash
if: ${{ inputs.target }}
- uses: Swatinem/rust-cache@v2

@ -0,0 +1,11 @@
if [ -f "rocksdb" ]
then
cd rocksdb || exit
else
git clone https://github.com/facebook/rocksdb.git
cd rocksdb || exit
git checkout v8.0.0
make shared_lib
fi
sudo make install-shared
sudo ldconfig /usr/local/lib

@ -4,6 +4,7 @@ on:
pull_request:
branches:
- main
- next
schedule:
- cron: "12 3 * * *"
@ -16,7 +17,9 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- run: rustup update && rustup component add rustfmt
- uses: ./.github/actions/setup-rust
with:
component: rustfmt
- run: cargo fmt -- --check
clippy:
@ -25,17 +28,38 @@ jobs:
- uses: actions/checkout@v3
with:
submodules: true
- run: rustup update && rustup component add clippy
- uses: Swatinem/rust-cache@v2
- run: cargo clippy
- uses: ./.github/actions/setup-rust
with:
version: 1.74.1
component: clippy
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/oxsdatatypes
- run: cargo clippy
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/oxrdf
- run: cargo clippy
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/oxrdfxml
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/oxttl
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/oxrdfio
- run: cargo clippy --all-targets --features async-tokio -- -D warnings -D clippy::all
working-directory: ./lib/oxrdfio
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/sparesults
- run: cargo clippy
- run: cargo clippy --all-targets --features async-tokio -- -D warnings -D clippy::all
working-directory: ./lib/sparesults
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/spargebra
- run: cargo clippy --all-targets --all-features
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/sparopt
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/oxigraph
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./python
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./cli
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./testsuite
clippy_msv:
runs-on: ubuntu-latest
@ -60,9 +84,8 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- run: rustup update
- uses: Swatinem/rust-cache@v2
- run: cargo install cargo-deny || true
- uses: taiki-e/install-action@v2
with: { tool: cargo-deny }
- run: cargo deny check
semver_checks:
@ -71,71 +94,109 @@ jobs:
- uses: actions/checkout@v3
with:
submodules: true
- run: rustup update
- uses: Swatinem/rust-cache@v2
- run: cargo install cargo-semver-checks || true
- run: cargo semver-checks check-release --exclude oxrocksdb-sys --exclude oxigraph_js --exclude pyoxigraph --exclude oxigraph_testsuite --exclude oxigraph_server
- uses: ./.github/actions/setup-rust
- uses: taiki-e/install-action@v2
with: { tool: cargo-semver-checks }
- uses: actions/cache@v3
with:
path: rocksdb
key: ${{ runner.os }}-rocksdb-8.0.0
- run: bash .github/workflows/install_rocksdb.sh
- run: cargo semver-checks check-release --exclude oxrocksdb-sys --exclude oxigraph-js --exclude pyoxigraph --exclude oxigraph-testsuite --exclude oxigraph-cli
test_linux:
test_linux_x86_64:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- run: rustup update
- uses: Swatinem/rust-cache@v2
- run: cargo test --all-features
env:
RUST_BACKTRACE: 1
- uses: ./.github/actions/setup-rust
- run: cargo test
address_sanitizer:
test_linux_i686:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- run: rustup update && rustup override set nightly
- run: sudo apt install -y llvm
- uses: Swatinem/rust-cache@v2
- run: cargo test --tests --target x86_64-unknown-linux-gnu --workspace --exclude pyoxigraph --exclude oxigraph_testsuite --exclude oxigraph_server
env:
RUST_BACKTRACE: 1
RUSTFLAGS: -Z sanitizer=address
- uses: ./.github/actions/setup-rust
with:
target: i686-unknown-linux-gnu
- run: sudo apt-get update && sudo apt-get install -y g++-multilib
- run: cargo test --target i686-unknown-linux-gnu --no-default-features --features http-client-rustls-native
working-directory: ./lib/oxigraph
test_windows:
runs-on: windows-latest
test_linux_msv:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- run: rustup update
- uses: Swatinem/rust-cache@v2
- run: Remove-Item -LiteralPath "C:\msys64\" -Force -Recurse
- run: cargo test --all-features
- uses: ./.github/actions/setup-rust
with:
version: 1.70.0
- run: rustup toolchain install nightly
- run: rm Cargo.lock && cargo +nightly update -Z direct-minimal-versions
- run: cargo test
test_linux_latest:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- uses: ./.github/actions/setup-rust
- run: rm Cargo.lock && cargo update
- run: cargo test
test_linux_address_sanitizer:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- uses: ./.github/actions/setup-rust
with:
version: nightly
- run: sudo apt-get update && sudo apt-get install -y llvm
- run: cargo test --tests --target x86_64-unknown-linux-gnu --workspace --exclude pyoxigraph --exclude oxigraph-testsuite --exclude oxigraph-cli
env:
RUST_BACKTRACE: 1
RUSTFLAGS: -Z sanitizer=address
rustdoc:
test_linux_dynamic_linking:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- run: rustup update
- uses: Swatinem/rust-cache@v2
- run: cargo doc --all-features
working-directory: ./lib
- uses: ./.github/actions/setup-rust
- uses: actions/cache@v3
with:
path: rocksdb
key: ${{ runner.os }}-rocksdb-8.0.0
- run: bash .github/workflows/install_rocksdb.sh
- run: cargo test --tests --features oxrocksdb-sys/pkg-config
rustdoc_msrv:
test_windows:
runs-on: windows-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- uses: ./.github/actions/setup-rust
- run: Remove-Item -LiteralPath "C:\msys64\" -Force -Recurse
- run: cargo test
rustdoc:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- run: rustup update && rustup override set 1.60.0
- uses: Swatinem/rust-cache@v2
- run: cargo doc --all-features
working-directory: ./lib
- uses: ./.github/actions/setup-rust
with:
version: 1.74.1
- run: cargo doc
env:
RUSTDOCFLAGS: -D warnings
@ -143,24 +204,21 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: Swatinem/rust-cache@v2
- run: cargo install typos-cli || true
- uses: taiki-e/install-action@v2
with: { tool: typos-cli }
- run: typos
clang_fmt:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- run: sudo apt install -y clang-format
- run: sudo apt-get update && sudo apt-get install -y clang-format
- run: clang-format --Werror --dry-run oxrocksdb-sys/api/*
fuzz_changes:
if: github.event_name == 'pull_request'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- uses: google/clusterfuzzlite/actions/build_fuzzers@v1
with:
language: rust
@ -170,7 +228,7 @@ jobs:
- uses: google/clusterfuzzlite/actions/run_fuzzers@v1
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
fuzz-seconds: 600
fuzz-seconds: 900
mode: code-change
sanitizer: address
minimize-crashes: true
@ -181,9 +239,6 @@ jobs:
if: github.event_name != 'pull_request'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- uses: google/clusterfuzzlite/actions/build_fuzzers@v1
with:
language: rust
@ -192,7 +247,7 @@ jobs:
- uses: google/clusterfuzzlite/actions/run_fuzzers@v1
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
fuzz-seconds: 3600
fuzz-seconds: 7200
mode: batch
sanitizer: address
minimize-crashes: true

7
.gitmodules vendored

@ -13,3 +13,10 @@
[submodule "oxrocksdb-sys/lz4"]
path = oxrocksdb-sys/lz4
url = https://github.com/lz4/lz4.git
[submodule "testsuite/N3"]
path = testsuite/N3
url = https://github.com/w3c/N3.git
branch = master
[submodule "testsuite/rdf-canon"]
path = testsuite/rdf-canon
url = https://github.com/w3c/rdf-canon.git

@ -7,7 +7,7 @@ build:
os: "ubuntu-22.04"
tools:
python: "3"
rust: "1.64"
rust: "1.70"
apt_packages:
- clang

@ -1,3 +1,99 @@
## [0.4.0-alpha.3] - 2024-01-25
### Added
- `oxttl`: expose base IRIs.
- `oxttl`: allows to inject prefixes for serialization.
- `oxrdf`: `vocab::geosparql::WKT_LITERAL`.
### Changed
- Turtle: Fixes parsing bug with escaped dot at the end of local name.
- `oxttl`: Changes `prefixes` getter return type.
- JS: simplify build.
- Python: uses rustls by default all platforms that are not Windows/macOS/iOS/WASM.
- Strips debug info of the Rust std library in release build.
## [0.4.0-alpha.2] - 2024-01-08
### Added
- i686 linux support
### Changed
- Docker: fixes Docker image Glib version error.
- Docker: tags now use semver e.g. `0.3.22` and not `v0.3.22`. Preversions are also not tagged `latest` anymore.
- Python: `QuerySolution` is now thread safe.
## [0.4.0-alpha.1] - 2024-01-03
### Added
- `sparopt` crate: A new still quite naive query optimizer.
- `oxttl` crate: A N-Triples/N-Quads/Turtle/TriG/N3 parser and serializer compatible with Tokio.
- `oxrdfxml` crate: A RDF/XML parser and serializer compatible with Tokio.
- `oxrdfio` crate: A stand-alone crate with oxigraph I/O related APIs.
- Rust: SPARQL results I/O is now exposed in the `oxigraph` crate (`oxigraph::sparql::results` module).
- Rust: It is now possible to dynamically link rocksdb with the `rocksdb-pkg-config` feature.
- Python: error location is now included in some `SyntaxError` exceptions.
- Python: the file type can be guessed from the file path extension during parsing and serialization.
- Python: the serialization method returns a `bytes` value if no output-related argument is given.
- Python: SPARQL query results I/O is now exposed (`parse_query_results` function and `.serialize` method).
- Python: `RdfFormat` and `QueryResultsFormat` enum to encode supported formats.
- CLI: a `convert` command to convert RDF file between different formats.
### Removed
- Rust: automated flush at the end of serialization. This should be done explicitly now.
- oxsdatatypes: Deprecated methods.
- Python: 3.7 and Musl linux 1.1 support.
- Python: `GraphName.value`.
### Changed
- SPARQL: a digit after `.` is now required for `xsd:decimal`.
- SPARQL: calendar subtraction returns `xsd:dayTimeDuration` and not `xsd:duration`.
- SPARQL: Unicode escapes (`\u` and `\U`) are now only supported in IRIs and strings and not everywhere.
- Literal serialization now produces canonical N-Triples according to the RDF 1.2 and RDF Dataset Canonicalization drafts
- Rust: MSRV is now 1.70.
- Rust Makes `GraphName` implement `Default`.
- Rust: `wasm32-unknown-unknown` does not assumes JS platform by default. Enable the `js` feature for that.
- Rust: Parsers take `Read` and not `BufRead` for input.
- Rust: `GraphFormat` and `DatasetFormat` have been merged into `RdfFormat`.
- Rust: `GraphParser` and `DatasetParser` have been merged into `RdfParser`.
- Rust: `GraphSerializer` and `DatasetSerializer` have been merged into `RdfSerializer`.
- Rust: query results are now `Send` and `Sync`.
- Rust: `Store.load_graph` and `Store.load_dataset` have been merged into a `load_from_read` method.
- Rust: `Store.dump_graph` and `Store.dump_dataset` have been renamed to `dump_graph_to_write` and `dump_to_write`.
- Rust: `BulkLoader.set_*` methods have been renamed to `BulkLoader.with_*`.
- oxsdatatypes: pass by-values instead of by-reference parameters when relevant.
- oxsdatatypes: error types have been redesigned.
- oxsdatatypes: return an error when building not serializable duration (year-month and day-time of opposite signs).
- sparesults: renames some methods to move closer to the new oxrdfio crate.
- Python: raise `OSError` instead of `IOError` on OS errors.
- Python: the `mime_type` parameter have been renamed to `format`.
- Python: boolean SPARQL results are now encoded with a `QueryBoolean` class and not a simple `bool`.
- Python: a `path` parameter has been added to all I/O method to read from a file.
The existing `input` parameter now consider `str` values to be a serialization to parse.
- JS: the `mime_type` parameter have been renamed to `format`.
- CLI: the `oxigraph_server` binary has been renamed to `oxigraph`.
- CLI: the `--location` argument is now part of sub-commands where it is relevant.
`oxigraph_server --location foo serve` is not possible anymore.
One need to write `oxigraph serve --location foo`.
- CLI: is is now possible to upload gzip encoded files to the HTTP API with the `Content-Encoding: gzip` header.
## [0.3.22] - 2023-11-29
### Changed
- Allows to compile with more recent `bindgen` and `cc`
- Fixes compatibility with `spin_no_std` feature of `lazy_static`
## [0.3.21] - 2023-11-29
### Changed
- Bulk loader: do not fail when loading empty files.
- Python: fixes source distribution.
- Upgrades RocksDB to 7.8.1.
## [0.3.20] - 2023-10-23
### Changed

1357
Cargo.lock generated

File diff suppressed because it is too large Load Diff

@ -1,22 +1,274 @@
[workspace]
members = [
"cli",
"js",
"lib",
"lib/oxigraph",
"lib/oxrdf",
"lib/oxrdfio",
"lib/oxrdfxml",
"lib/oxsdatatypes",
"lib/spargebra",
"lib/oxttl",
"lib/sparesults",
"lib/spargebra",
"lib/sparopt",
"lib/sparql-smith",
"oxrocksdb-sys",
"python",
"server",
"testsuite"
]
resolver = "2"
[workspace.package]
version = "0.4.0-alpha.3-dev"
authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license = "MIT OR Apache-2.0"
edition = "2021"
rust-version = "1.70"
[workspace.dependencies]
anyhow = "1.0.72"
arbitrary = "1.3"
assert_cmd = "2.0"
assert_fs = "1.0"
bindgen = ">=0.60, <0.70"
cc = "1.0.73"
clap = "4.0"
codspeed-criterion-compat = "2.3.3"
console_error_panic_hook = "0.1.7"
digest = "0.10"
escargot = "0.5"
flate2 = "1.0"
getrandom = "0.2.8"
hex = "0.4"
js-sys = "0.3.60"
json-event-parser = "0.2.0-alpha.2"
libc = "0.2.147"
md-5 = "0.10"
memchr = "2.5"
oxhttp = "0.2.0-alpha.3"
oxilangtag = "0.1"
oxiri = "0.2.3-alpha.1"
peg = "0.8"
pkg-config = "0.3.25"
predicates = ">=2.0, <4.0"
pyo3 = "0.20.1"
quick-xml = ">=0.29, <0.32"
rand = "0.8"
rayon-core = "1.11"
regex = "1.7"
sha1 = "0.10"
sha2 = "0.10"
siphasher = ">=0.3, <2.0"
text-diff = "0.4"
thiserror = "1.0.50"
time = "0.3"
tokio = "1.29"
url = "2.4"
wasm-bindgen = "0.2.83"
zstd = ">=0.12, <0.14"
# Internal dependencies
oxigraph = { version = "0.4.0-alpha.3-dev", path = "lib/oxigraph" }
oxrdf = { version = "0.2.0-alpha.2", path = "lib/oxrdf" }
oxrdfio = { version = "0.1.0-alpha.3-dev", path = "lib/oxrdfio" }
oxrdfxml = { version = "0.1.0-alpha.3-dev", path = "lib/oxrdfxml" }
oxrocksdb-sys = { version = "0.4.0-alpha.3-dev", path = "./oxrocksdb-sys" }
oxsdatatypes = { version = "0.2.0-alpha.1", path = "lib/oxsdatatypes" }
oxttl = { version = "0.1.0-alpha.3-dev", path = "lib/oxttl" }
sparesults = { version = "0.2.0-alpha.2", path = "lib/sparesults" }
spargebra = { version = "0.3.0-alpha.2", path = "lib/spargebra" }
sparopt = { version = "0.1.0-alpha.2", path = "lib/sparopt" }
sparql-smith = { version = "0.1.0-alpha.5", path = "lib/sparql-smith" }
[workspace.lints.rust]
absolute_paths_not_starting_with_crate = "warn"
elided_lifetimes_in_paths = "warn"
explicit_outlives_requirements = "warn"
let_underscore_drop = "warn"
macro_use_extern_crate = "warn"
# TODO missing_docs = "warn"
trivial_casts = "warn"
trivial_numeric_casts = "warn"
unsafe_code = "warn"
unused_import_braces = "warn"
unused_lifetimes = "warn"
unused_macro_rules = "warn"
unused_qualifications = "warn"
[workspace.lints.clippy]
allow_attributes = "warn"
allow_attributes_without_reason = "warn"
as_underscore = "warn"
assertions_on_result_states = "warn"
bool_to_int_with_if = "warn"
borrow_as_ptr = "warn"
case_sensitive_file_extension_comparisons = "warn"
cast_lossless = "warn"
cast_possible_truncation = "warn"
cast_possible_wrap = "warn"
cast_precision_loss = "warn"
cast_ptr_alignment = "warn"
cast_sign_loss = "warn"
checked_conversions = "warn"
clone_on_ref_ptr = "warn"
cloned_instead_of_copied = "warn"
copy_iterator = "warn"
create_dir = "warn"
dbg_macro = "warn"
decimal_literal_representation = "warn"
default_trait_access = "warn"
default_union_representation = "warn"
deref_by_slicing = "warn"
disallowed_script_idents = "warn"
doc_link_with_quotes = "warn"
empty_drop = "warn"
empty_enum = "warn"
empty_structs_with_brackets = "warn"
enum_glob_use = "warn"
error_impl_error = "warn"
exit = "warn"
expect_used = "warn"
expl_impl_clone_on_copy = "warn"
explicit_deref_methods = "warn"
explicit_into_iter_loop = "warn"
explicit_iter_loop = "warn"
filetype_is_file = "warn"
filter_map_next = "warn"
flat_map_option = "warn"
fn_params_excessive_bools = "warn"
fn_to_numeric_cast_any = "warn"
format_push_string = "warn"
from_iter_instead_of_collect = "warn"
get_unwrap = "warn"
host_endian_bytes = "warn"
if_not_else = "warn"
if_then_some_else_none = "warn"
ignored_unit_patterns = "warn"
implicit_clone = "warn"
implicit_hasher = "warn"
inconsistent_struct_constructor = "warn"
index_refutable_slice = "warn"
inefficient_to_string = "warn"
inline_always = "warn"
inline_asm_x86_att_syntax = "warn"
inline_asm_x86_intel_syntax = "warn"
invalid_upcast_comparisons = "warn"
items_after_statements = "warn"
iter_not_returning_iterator = "warn"
large_digit_groups = "warn"
large_futures = "warn"
large_include_file = "warn"
large_stack_arrays = "warn"
large_types_passed_by_value = "warn"
let_underscore_must_use = "warn"
let_underscore_untyped = "warn"
linkedlist = "warn"
lossy_float_literal = "warn"
macro_use_imports = "warn"
manual_assert = "warn"
manual_instant_elapsed = "warn"
manual_let_else = "warn"
manual_ok_or = "warn"
manual_string_new = "warn"
many_single_char_names = "warn"
map_unwrap_or = "warn"
match_bool = "warn"
match_on_vec_items = "warn"
match_same_arms = "warn"
match_wild_err_arm = "warn"
match_wildcard_for_single_variants = "warn"
maybe_infinite_iter = "warn"
mem_forget = "warn"
mismatching_type_param_order = "warn"
missing_assert_message = "warn"
missing_asserts_for_indexing = "warn"
missing_enforced_import_renames = "warn"
missing_fields_in_debug = "warn"
multiple_inherent_impl = "warn"
mut_mut = "warn"
mutex_atomic = "warn"
naive_bytecount = "warn"
needless_bitwise_bool = "warn"
needless_continue = "warn"
needless_for_each = "warn"
needless_pass_by_value = "warn"
needless_raw_strings = "warn"
negative_feature_names = "warn"
no_effect_underscore_binding = "warn"
no_mangle_with_rust_abi = "warn"
non_ascii_literal = "warn"
panic = "warn"
panic_in_result_fn = "warn"
partial_pub_fields = "warn"
print_stderr = "warn"
print_stdout = "warn"
ptr_as_ptr = "warn"
ptr_cast_constness = "warn"
pub_without_shorthand = "warn"
range_minus_one = "warn"
range_plus_one = "warn"
rc_buffer = "warn"
rc_mutex = "warn"
redundant_closure_for_method_calls = "warn"
redundant_else = "warn"
redundant_feature_names = "warn"
redundant_type_annotations = "warn"
ref_binding_to_reference = "warn"
ref_option_ref = "warn"
ref_patterns = "warn"
rest_pat_in_fully_bound_structs = "warn"
return_self_not_must_use = "warn"
same_functions_in_if_condition = "warn"
same_name_method = "warn"
semicolon_inside_block = "warn"
shadow_same = "warn"
should_panic_without_expect = "warn"
single_match_else = "warn"
stable_sort_primitive = "warn"
str_to_string = "warn"
string_add = "warn"
string_add_assign = "warn"
string_lit_chars_any = "warn"
string_to_string = "warn"
struct_excessive_bools = "warn"
suspicious_xor_used_as_pow = "warn"
tests_outside_test_module = "warn"
todo = "warn"
transmute_ptr_to_ptr = "warn"
trivially_copy_pass_by_ref = "warn"
try_err = "warn"
unchecked_duration_subtraction = "warn"
undocumented_unsafe_blocks = "warn"
unicode_not_nfc = "warn"
unimplemented = "warn"
uninlined_format_args = "warn"
unnecessary_box_returns = "warn"
unnecessary_join = "warn"
unnecessary_safety_comment = "warn"
unnecessary_safety_doc = "warn"
unnecessary_self_imports = "warn"
unnecessary_wraps = "warn"
unneeded_field_pattern = "warn"
unnested_or_patterns = "warn"
unreadable_literal = "warn"
unsafe_derive_deserialize = "warn"
unseparated_literal_suffix = "warn"
unused_async = "warn"
unused_self = "warn"
unwrap_in_result = "warn"
use_debug = "warn"
used_underscore_binding = "warn"
verbose_bit_mask = "warn"
verbose_file_reads = "warn"
wildcard_dependencies = "warn"
zero_sized_map_values = "warn"
[profile.release]
lto = true
codegen-units = 1
strip = "debuginfo"
[profile.release.package.oxigraph_js]
[profile.release.package.oxigraph-js]
codegen-units = 1
opt-level = "z"
strip = "debuginfo"

@ -5,11 +5,18 @@
This is a fork of oxigraph that includes [rs-handlegraph](https://github.com/chfi/rs-handlegraph).
The purpose is to make pangenomic GFA-files accessible with SPARQL queries.
Oxigraph implements the following specifications:
- [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/), [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/), and [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/).
- [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/), and [RDF XML](https://www.w3.org/TR/rdf-syntax-grammar/) RDF serialization formats for both data ingestion and retrieval using the [Rio library](https://github.com/oxigraph/rio).
- [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/).
Also, some parts of Oxigraph are available as standalone Rust crates:
* [`oxrdf`](https://crates.io/crates/oxrdf), datastructures encoding RDF basic concepts (the [`oxigraph::model`](crate::model) module).
* [`oxrdfio`](https://crates.io/crates/oxrdfio), a unified parser and serializer API for RDF formats (the [`oxigraph::io`](crate::io) module). It itself relies on:
* [`oxttl`](https://crates.io/crates/oxttl), N-Triple, N-Quad, Turtle, TriG and N3 parsing and serialization.
* [`oxrdfxml`](https://crates.io/crates/oxrdfxml), RDF/XML parsing and serialization.
* [`spargebra`](https://crates.io/crates/spargebra), a SPARQL parser.
* [`sparesults`](https://crates.io/crates/sparesults), parsers and serializers for SPARQL result formats.
* [`sparopt`](https://crates.io/crates/sparesults), a SPARQL optimizer.
* [`oxsdatatypes`](https://crates.io/crates/oxsdatatypes), an implementation of some XML Schema datatypes.
The library layers in Oxigraph. The elements above depend on the elements below:
![Oxigraph libraries architecture diagram](./docs/arch-diagram.svg)
When cloning this codebase, don't forget to clone the submodules using
`git clone --recursive https://github.com/oxigraph/oxigraph.git` to clone the repository including submodules or

@ -6,10 +6,10 @@ PARALLELISM=16
set -eu
cd bsbm-tools
./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}"
cargo build --release --manifest-path="../../server/Cargo.toml"
VERSION=$(./../../target/release/oxigraph_server --version | sed 's/oxigraph_server //g')
./../../target/release/oxigraph_server --location oxigraph_data load --file "explore-${DATASET_SIZE}.nt"
./../../target/release/oxigraph_server --location oxigraph_data serve --bind 127.0.0.1:7878 &
cargo build --release --manifest-path="../../cli/Cargo.toml"
VERSION=$(./../../target/release/oxigraph --version | sed 's/oxigraph //g')
./../../target/release/oxigraph --location oxigraph_data load --file "explore-${DATASET_SIZE}.nt"
./../../target/release/oxigraph --location oxigraph_data serve --bind 127.0.0.1:7878 &
sleep 1
./testdriver -mt ${PARALLELISM} -ucf usecases/explore/sparql.txt -o "../bsbm.explore.oxigraph.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://127.0.0.1:7878/query
./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.oxigraph.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://127.0.0.1:7878/query -u http://127.0.0.1:7878/update -udataset "explore-update-${DATASET_SIZE}.nt"

@ -0,0 +1,47 @@
[package]
name = "oxigraph-cli"
version.workspace = true
authors.workspace = true
license.workspace = true
readme = "README.md"
keywords = ["RDF", "SPARQL", "graph-database", "database"]
categories = ["command-line-utilities", "database"]
repository = "https://github.com/oxigraph/oxigraph/tree/main/cli"
homepage = "https://oxigraph.org/cli/"
description = """
Oxigraph command line toolkit and SPARQL HTTP server
"""
edition.workspace = true
rust-version.workspace = true
[[bin]]
name = "oxigraph"
path = "src/main.rs"
doc = false
[features]
default = ["native-tls"]
native-tls = ["oxigraph/http-client-native-tls"]
rocksdb-pkg-config = ["oxigraph/rocksdb-pkg-config"]
rustls-native = ["oxigraph/http-client-rustls-native"]
rustls-webpki = ["oxigraph/http-client-rustls-webpki"]
[dependencies]
anyhow.workspace = true
clap = { workspace = true, features = ["derive"] }
flate2.workspace = true
oxhttp = { workspace = true, features = ["flate2"] }
oxigraph.workspace = true
oxiri.workspace = true
rand.workspace = true
rayon-core.workspace = true
url.workspace = true
[dev-dependencies]
assert_cmd.workspace = true
assert_fs.workspace = true
escargot.workspace = true
predicates.workspace = true
[lints]
workspace = true

@ -1,4 +1,4 @@
FROM --platform=$BUILDPLATFORM rust:1-bullseye as builder
FROM --platform=$BUILDPLATFORM rust:1-bookworm as builder
ARG BUILDARCH TARGETARCH
RUN apt-get update && \
apt-get install -y libclang-dev clang && \
@ -8,18 +8,18 @@ RUN apt-get update && \
rustup target add aarch64-unknown-linux-gnu ; \
fi
COPY . /oxigraph
WORKDIR /oxigraph/server
WORKDIR /oxigraph/cli
RUN if [ "$BUILDARCH" != "$TARGETARCH" ] && [ "$TARGETARCH" = "arm64" ] ; \
then \
export CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc && \
export BINDGEN_EXTRA_CLANG_ARGS="--sysroot /usr/aarch64-linux-gnu" && \
cargo build --release --target aarch64-unknown-linux-gnu && \
mv /oxigraph/target/aarch64-unknown-linux-gnu/release/oxigraph_server /oxigraph/target/release/oxigraph_server ; \
cargo build --release --target aarch64-unknown-linux-gnu --no-default-features --features rustls-webpki && \
mv /oxigraph/target/aarch64-unknown-linux-gnu/release/oxigraph /oxigraph/target/release/oxigraph ; \
else \
cargo build --release ; \
cargo build --release --no-default-features --features rustls-webpki ; \
fi
FROM --platform=$TARGETPLATFORM gcr.io/distroless/cc-debian11
COPY --from=builder /oxigraph/target/release/oxigraph_server /usr/local/bin/oxigraph_server
ENTRYPOINT [ "/usr/local/bin/oxigraph_server" ]
FROM --platform=$TARGETPLATFORM gcr.io/distroless/cc-debian12
COPY --from=builder /oxigraph/target/release/oxigraph /usr/local/bin/oxigraph
ENTRYPOINT [ "/usr/local/bin/oxigraph" ]
CMD [ "serve", "--location", "/data", "--bind", "0.0.0.0:7878" ]

@ -1,21 +1,19 @@
Oxigraph Server
===============
Oxigraph CLI
============
[![Latest Version](https://img.shields.io/crates/v/oxigraph_server.svg)](https://crates.io/crates/oxigraph_server)
[![Crates.io downloads](https://img.shields.io/crates/d/oxigraph_server)](https://crates.io/crates/oxigraph_server)
[![Latest Version](https://img.shields.io/crates/v/oxigraph-cli.svg)](https://crates.io/crates/oxigraph-cli)
[![Crates.io downloads](https://img.shields.io/crates/d/oxigraph-cli)](https://crates.io/crates/oxigraph-cli)
[![Conda](https://img.shields.io/conda/vn/conda-forge/oxigraph-server)](https://anaconda.org/conda-forge/oxigraph-server)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community)
Oxigraph Server is a standalone HTTP server providing a graph database implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard.
Its goal is to provide a compliant, safe, and fast graph database based on the [RocksDB](https://rocksdb.org/) key-value store.
It is written in Rust.
It also provides a set of utility functions for reading, writing, and processing RDF files.
Oxigraph CLI is a graph database implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard.
It is packaged as a command line tool allowing to manipulate an RDF files, query them using SPARQL...
It also allows to spawn a HTTP server on top of the database.
Oxigraph is in heavy development and SPARQL query evaluation has not been optimized yet.
Oxigraph provides different installation methods for Oxigraph server:
Oxigraph provides different installation methods for Oxigraph CLI:
* [`cargo install`](#installation) (multiplatform)
* [A Docker image](#using-a-docker-image)
* [A Homebrew formula](#homebrew)
@ -32,19 +30,29 @@ Oxigraph implements the following specifications:
A preliminary benchmark [is provided](../bench/README.md).
Note that Oxigraph CLI was previously named Oxigraph Server before version 0.4. Older versions are available under [this name](https://crates.io/crates/oxigraph_server).
## Installation
You need to have [a recent stable version of Rust and Cargo installed](https://www.rust-lang.org/tools/install).
To download, build and install the latest released version run `cargo install oxigraph_server`.
To download, build and install the latest released version run `cargo install oxigraph-cli`.
There is no need to clone the git repository.
To compile the server from source, clone this git repository including its submodules (`git clone --recursive https://github.com/oxigraph/oxigraph.git`), and execute `cargo build --release` in the `server` directory to compile the full server after having downloaded its dependencies.
It will create a fat binary in `target/release/oxigraph_server`.
To compile the command line tool from source, clone this git repository including its submodules (`git clone --recursive https://github.com/oxigraph/oxigraph.git`), and execute `cargo build --release` in the `cli` directory to compile the full binary after having downloaded its dependencies.
It will create a fat binary in `target/release/oxigraph`.
Some build options (cargo features) are available:
- `rocksdb-pkg-config`: links against an already compiled rocksdb shared library found using [pkg-config](https://crates.io/crates/pkg-config).
- `native-tls`: Enables Oxigraph HTTP client for query federation using the host OS TLS stack (enabled by default).
- `rustls-native` Enables Oxigraph HTTP client for query federation using [Rustls](https://crates.io/crates/rustls) and the native certificates.
- `rustls-webpki` Enables Oxigraph HTTP client for query federation using [Rustls](https://crates.io/crates/rustls) and the [Common CA Database](https://www.ccadb.org/) certificates.
## Usage
Run `oxigraph_server --location my_data_storage_directory serve` to start the server where `my_data_storage_directory` is the directory where you want Oxigraph data to be stored. It listens by default on `localhost:7878`.
Run `oxigraph serve --location my_data_storage_directory` to start the server where `my_data_storage_directory` is the directory where you want Oxigraph data to be stored. It listens by default on `localhost:7878`.
The server provides an HTML UI, based on [YASGUI](https://yasgui.triply.cc), with a form to execute SPARQL requests.
@ -78,10 +86,10 @@ It provides the following REST actions:
```
will add the N-Quads file `MY_FILE.nq` to the server dataset.
Use `oxigraph_server --help` to see the possible options when starting the server.
Use `oxigraph --help` to see the possible options when starting the server.
It is also possible to load RDF data offline using bulk loading:
`oxigraph_server --location my_data_storage_directory load --file my_file.nq`
`oxigraph load --location my_data_storage_directory --file my_file.nq`
## Using a Docker image
@ -93,7 +101,7 @@ docker run --rm ghcr.io/oxigraph/oxigraph --help
### Run the Webserver
Expose the server on port `7878` of the host machine, and save data on the local `./data` folder
```sh
docker run --rm -v $PWD/data:/data -p 7878:7878 ghcr.io/oxigraph/oxigraph --location /data serve --bind 0.0.0.0:7878
docker run --rm -v $PWD/data:/data -p 7878:7878 ghcr.io/oxigraph/oxigraph serve --location /data --bind 0.0.0.0:7878
```
You can then access it from your machine on port `7878`:
@ -217,14 +225,14 @@ To install Oxigraph server using Homebrew do:
brew tap oxigraph/oxigraph
brew install oxigraph
```
It installs the `oxigraph_server` binary. [See the usage documentation to know how to use it](#usage).
It installs the `oxigraph` binary. [See the usage documentation to know how to use it](#usage).
## Systemd
It is possible to run Oxigraph in the background using systemd.
For that, you can use the following `oxigraph_server.service` file (it might be inserted into `/etc/systemd/system/` or `$HOME/.config/systemd/user`):
For that, you can use the following `oxigraph.service` file (it might be inserted into `/etc/systemd/system/` or `$HOME/.config/systemd/user`):
```ini
[Unit]
Description=Oxigraph database server
@ -233,7 +241,7 @@ Wants=network-online.target
[Service]
Type=notify
ExecStart=/PATH/TO/oxigraph_server serve --location /PATH/TO/OXIGRAPH/DATA
ExecStart=/PATH/TO/oxigraph serve --location /PATH/TO/OXIGRAPH/DATA
[Install]
WantedBy=multi-user.target
@ -242,8 +250,8 @@ WantedBy=multi-user.target
## Migration guide
### From 0.2 to 0.3
* The cli API has been completely rewritten. To start the server run `oxigraph_server serve --location MY_STORAGE` instead of `oxigraph_server --file MY_STORAGE`.
* Fast data bulk loading is not supported using `oxigraph_server load --location MY_STORAGE --file MY_FILE`. The file format is guessed from the extension (`.nt`, `.ttl`, `.nq`...).
* The cli API has been completely rewritten. To start the server run `oxigraph serve --location MY_STORAGE` instead of `oxigraph --file MY_STORAGE`.
* Fast data bulk loading is not supported using `oxigraph load --location MY_STORAGE --file MY_FILE`. The file format is guessed from the extension (`.nt`, `.ttl`, `.nq`...).
* [RDF-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html) is now implemented.
* All operations are now transactional using the "repeatable read" isolation level:
the store only exposes changes that have been "committed" (i.e. no partial writes)

Before

Width:  |  Height:  |  Size: 4.5 KiB

After

Width:  |  Height:  |  Size: 4.5 KiB

File diff suppressed because it is too large Load Diff

@ -1,4 +1,4 @@
avoid-breaking-exported-api = true
avoid-breaking-exported-api = false
cognitive-complexity-threshold = 50
too-many-arguments-threshold = 10
type-complexity-threshold = 500

@ -0,0 +1,120 @@
<svg xmlns="http://www.w3.org/2000/svg" width="624" height="384" class="svgbob">
<style>.svgbob line, .svgbob path, .svgbob circle, .svgbob rect, .svgbob polygon {
stroke: black;
stroke-width: 2;
stroke-opacity: 1;
fill-opacity: 1;
stroke-linecap: round;
stroke-linejoin: miter;
}
.svgbob text {
white-space: pre;
fill: black;
font-family: Iosevka Fixed, monospace;
font-size: 14px;
}
.svgbob rect.backdrop {
stroke: none;
fill: white;
}
.svgbob .broken {
stroke-dasharray: 8;
}
.svgbob .filled {
fill: black;
}
.svgbob .bg_filled {
fill: white;
stroke-width: 1;
}
.svgbob .nofill {
fill: white;
}
.svgbob .end_marked_arrow {
marker-end: url(#arrow);
}
.svgbob .start_marked_arrow {
marker-start: url(#arrow);
}
.svgbob .end_marked_diamond {
marker-end: url(#diamond);
}
.svgbob .start_marked_diamond {
marker-start: url(#diamond);
}
.svgbob .end_marked_circle {
marker-end: url(#circle);
}
.svgbob .start_marked_circle {
marker-start: url(#circle);
}
.svgbob .end_marked_open_circle {
marker-end: url(#open_circle);
}
.svgbob .start_marked_open_circle {
marker-start: url(#open_circle);
}
.svgbob .end_marked_big_open_circle {
marker-end: url(#big_open_circle);
}
.svgbob .start_marked_big_open_circle {
marker-start: url(#big_open_circle);
}<!--separator-->.svgbob .r{
fill: papayawhip;
}
.svgbob .p{
fill: lightyellow;
}
.svgbob .j{
fill: lightgreen;
}
</style>
<defs>
<marker id="arrow" viewBox="-2 -2 8 8" refX="4" refY="2" markerWidth="7" markerHeight="7"
orient="auto-start-reverse">
<polygon points="0,0 0,4 4,2 0,0"></polygon>
</marker>
<marker id="diamond" viewBox="-2 -2 8 8" refX="4" refY="2" markerWidth="7" markerHeight="7"
orient="auto-start-reverse">
<polygon points="0,2 2,0 4,2 2,4 0,2"></polygon>
</marker>
<marker id="circle" viewBox="0 0 8 8" refX="4" refY="4" markerWidth="7" markerHeight="7"
orient="auto-start-reverse">
<circle cx="4" cy="4" r="2" class="filled"></circle>
</marker>
<marker id="open_circle" viewBox="0 0 8 8" refX="4" refY="4" markerWidth="7" markerHeight="7"
orient="auto-start-reverse">
<circle cx="4" cy="4" r="2" class="bg_filled"></circle>
</marker>
<marker id="big_open_circle" viewBox="0 0 8 8" refX="4" refY="4" markerWidth="7" markerHeight="7"
orient="auto-start-reverse">
<circle cx="4" cy="4" r="3" class="bg_filled"></circle>
</marker>
</defs>
<rect class="backdrop" x="0" y="0" width="624" height="384"></rect>
<rect x="4" y="8" width="152" height="32" class="solid nofill r" rx="0"></rect>
<text x="18" y="28">oxigraph CLI</text>
<rect x="244" y="8" width="136" height="32" class="solid nofill p" rx="0"></rect>
<text x="258" y="28">pyoxigraph</text>
<rect x="468" y="8" width="144" height="32" class="solid nofill j" rx="0"></rect>
<text x="482" y="28">oxigraph</text>
<text x="554" y="28">JS</text>
<rect x="4" y="72" width="608" height="32" class="solid nofill r" rx="0"></rect>
<text x="18" y="92">oxigraph</text>
<rect x="68" y="136" width="232" height="32" class="solid nofill r" rx="0"></rect>
<text x="82" y="156">oxrdfio</text>
<rect x="348" y="136" width="112" height="32" class="solid nofill r" rx="0"></rect>
<text x="362" y="156">sparopt</text>
<rect x="68" y="200" width="96" height="32" class="solid nofill r" rx="0"></rect>
<text x="82" y="220">oxttl</text>
<rect x="180" y="200" width="120" height="32" class="solid nofill r" rx="0"></rect>
<text x="194" y="220">oxrdfxml</text>
<rect x="316" y="200" width="144" height="32" class="solid nofill r" rx="0"></rect>
<text x="330" y="220">spargebra</text>
<rect x="476" y="200" width="136" height="32" class="solid nofill r" rx="0"></rect>
<text x="490" y="220">sparesults</text>
<rect x="36" y="264" width="576" height="32" class="solid nofill r" rx="0"></rect>
<text x="50" y="284">oxrdf</text>
<rect x="4" y="328" width="152" height="32" class="solid nofill r" rx="0"></rect>
<text x="18" y="348">oxsdatatypes</text>
</svg>

After

Width:  |  Height:  |  Size: 4.6 KiB

@ -0,0 +1,35 @@
+------------------+ +----------------+ +-----------------+
+ oxigraph CLI {r} + + pyoxigraph {p} + + oxigraph JS {j} +
+------------------+ +----------------+ +-----------------+
+---------------------------------------------------------------------------+
+ oxigraph (Rust) {r} +
+---------------------------------------------------------------------------+
+----------------------------+ +-------------+
+ oxrdfio {r} + + sparopt {r} +
+----------------------------+ +-------------+
+-----------+ +--------------+ +-----------------+ +----------------+
+ oxttl {r} + + oxrdfxml {r} + + spargebra {r} + + sparesults {r} +
+-----------+ +--------------+ +-----------------+ +----------------+
+-----------------------------------------------------------------------+
+ oxrdf {r} +
+-----------------------------------------------------------------------+
+------------------+
+ oxsdatatypes {r} +
+------------------+
# Legend:
r = {
fill: papayawhip;
}
p = {
fill: lightyellow;
}
j = {
fill: lightgreen;
}

@ -1,7 +1,6 @@
[package]
name = "oxigraph-fuzz"
version = "0.0.0"
authors = ["Automatically generated"]
publish = false
edition = "2021"
@ -9,13 +8,15 @@ edition = "2021"
cargo-fuzz = true
[dependencies]
anyhow = "1"
lazy_static = "1"
anyhow = "1.0.72"
libfuzzer-sys = "0.4"
spargebra = { path = "../lib/spargebra", features = ["rdf-star", "sep-0006"] }
oxigraph = { path = "../lib/oxigraph" }
oxrdf = { path = "../lib/oxrdf", features = ["rdf-star"] }
oxrdfxml = { path = "../lib/oxrdfxml" }
oxttl = { path = "../lib/oxttl", features = ["rdf-star"] }
sparesults = { path = "../lib/sparesults", features = ["rdf-star"] }
spargebra = { path = "../lib/spargebra", features = ["rdf-star", "sep-0006"] }
sparql-smith = { path = "../lib/sparql-smith", features = ["sep-0006"] }
oxigraph = { path = "../lib" }
[profile.release]
codegen-units = 1
@ -23,6 +24,18 @@ debug = true
[workspace]
[[bin]]
name = "nquads"
path = "fuzz_targets/nquads.rs"
[[bin]]
name = "n3"
path = "fuzz_targets/n3.rs"
[[bin]]
name = "rdf_xml"
path = "fuzz_targets/rdf_xml.rs"
[[bin]]
name = "sparql_eval"
path = "fuzz_targets/sparql_eval.rs"
@ -46,3 +59,7 @@ path = "fuzz_targets/sparql_results_xml.rs"
[[bin]]
name = "sparql_results_tsv"
path = "fuzz_targets/sparql_results_tsv.rs"
[[bin]]
name = "trig"
path = "fuzz_targets/trig.rs"

@ -0,0 +1,28 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use oxttl::N3Parser;
fuzz_target!(|data: &[u8]| {
let mut quads = Vec::new();
let mut parser = N3Parser::new()
.with_base_iri("http://example.com/")
.unwrap()
.parse();
for chunk in data.split(|c| *c == 0xFF) {
parser.extend_from_slice(chunk);
while let Some(result) = parser.read_next() {
if let Ok(quad) = result {
quads.push(quad);
}
}
}
parser.end();
while let Some(result) = parser.read_next() {
if let Ok(quad) = result {
quads.push(quad);
}
}
assert!(parser.is_end());
//TODO: serialize
});

@ -0,0 +1,84 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use oxrdf::Quad;
use oxttl::{NQuadsParser, NQuadsSerializer};
fn parse<'a>(
chunks: impl IntoIterator<Item = &'a [u8]>,
unchecked: bool,
) -> (Vec<Quad>, Vec<String>) {
let mut quads = Vec::new();
let mut errors = Vec::new();
let mut parser = NQuadsParser::new().with_quoted_triples();
if unchecked {
parser = parser.unchecked();
}
let mut reader = parser.parse();
for chunk in chunks {
reader.extend_from_slice(chunk);
while let Some(result) = reader.read_next() {
match result {
Ok(quad) => quads.push(quad),
Err(error) => errors.push(error.to_string()),
}
}
}
reader.end();
while let Some(result) = reader.read_next() {
match result {
Ok(quad) => quads.push(quad),
Err(error) => errors.push(error.to_string()),
}
}
assert!(reader.is_end());
(quads, errors)
}
fuzz_target!(|data: &[u8]| {
// We parse with splitting
let (quads, errors) = parse(data.split(|c| *c == 0xFF), false);
// We parse without splitting
let (quads_without_split, errors_without_split) = parse(
[data
.iter()
.copied()
.filter(|c| *c != 0xFF)
.collect::<Vec<_>>()
.as_slice()],
false,
);
assert_eq!(quads, quads_without_split);
assert_eq!(errors, errors_without_split);
// We test also unchecked if valid
if errors.is_empty() {
let (quads_unchecked, errors_unchecked) = parse(data.split(|c| *c == 0xFF), true);
assert!(errors_unchecked.is_empty());
assert_eq!(quads, quads_unchecked);
}
// We serialize
let mut writer = NQuadsSerializer::new().serialize_to_write(Vec::new());
for quad in &quads {
writer.write_quad(quad).unwrap();
}
let new_serialization = writer.finish();
// We parse the serialization
let new_quads = NQuadsParser::new()
.with_quoted_triples()
.parse_read(new_serialization.as_slice())
.collect::<Result<Vec<_>, _>>()
.map_err(|e| {
format!(
"Error on {:?} from {quads:?} based on {:?}: {e}",
String::from_utf8_lossy(&new_serialization),
String::from_utf8_lossy(data)
)
})
.unwrap();
// We check the roundtrip has not changed anything
assert_eq!(new_quads, quads);
});

@ -0,0 +1,37 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use oxrdfxml::{RdfXmlParser, RdfXmlSerializer};
fuzz_target!(|data: &[u8]| {
// We parse
let mut triples = Vec::new();
for triple in RdfXmlParser::new().parse_read(data) {
if let Ok(triple) = triple {
triples.push(triple);
}
}
// We serialize
let mut writer = RdfXmlSerializer::new().serialize_to_write(Vec::new());
for triple in &triples {
writer.write_triple(triple).unwrap();
}
let new_serialization = writer.finish().unwrap();
// We parse the serialization
let new_triples = RdfXmlParser::new()
.parse_read(new_serialization.as_slice())
.collect::<Result<Vec<_>, _>>()
.map_err(|e| {
format!(
"Error on {:?} from {triples:?} based on {:?}: {e}",
String::from_utf8_lossy(&new_serialization),
String::from_utf8_lossy(data)
)
})
.unwrap();
// We check the roundtrip has not changed anything
assert_eq!(new_triples, triples);
});

@ -1,31 +1,26 @@
#![no_main]
use lazy_static::lazy_static;
use libfuzzer_sys::fuzz_target;
use oxigraph::io::DatasetFormat;
use oxigraph::io::RdfFormat;
use oxigraph::sparql::{Query, QueryOptions, QueryResults, QuerySolutionIter};
use oxigraph::store::Store;
use std::sync::OnceLock;
lazy_static! {
static ref STORE: Store = {
fuzz_target!(|data: sparql_smith::Query| {
static STORE: OnceLock<Store> = OnceLock::new();
let store = STORE.get_or_init(|| {
let store = Store::new().unwrap();
store
.load_dataset(
sparql_smith::DATA_TRIG.as_bytes(),
DatasetFormat::TriG,
None,
)
.load_dataset(sparql_smith::DATA_TRIG.as_bytes(), RdfFormat::TriG, None)
.unwrap();
store
};
}
});
fuzz_target!(|data: sparql_smith::Query| {
let query_str = data.to_string();
if let Ok(query) = Query::parse(&query_str, None) {
let options = QueryOptions::default();
let with_opt = STORE.query_opt(query.clone(), options.clone()).unwrap();
let without_opt = STORE
let with_opt = store.query_opt(query.clone(), options.clone()).unwrap();
let without_opt = store
.query_opt(query, options.without_optimizations())
.unwrap();
match (with_opt, without_opt) {
@ -52,7 +47,7 @@ fn query_solutions_key(iter: QuerySolutionIter, is_reduced: bool) -> String {
let mut b = t
.unwrap()
.iter()
.map(|(var, val)| format!("{}: {}", var, val))
.map(|(var, val)| format!("{var}: {val}"))
.collect::<Vec<_>>();
b.sort_unstable();
b.join(" ")

@ -3,5 +3,5 @@ use libfuzzer_sys::fuzz_target;
use spargebra::Query;
fuzz_target!(|data: &str| {
Query::parse(data, None);
let _ = Query::parse(data, None);
});

@ -3,4 +3,4 @@ use libfuzzer_sys::fuzz_target;
use oxigraph_fuzz::result_format::fuzz_result_format;
use sparesults::QueryResultsFormat;
fuzz_target!(|data: &[u8]| { fuzz_result_format(QueryResultsFormat::Json, data) });
fuzz_target!(|data: &[u8]| fuzz_result_format(QueryResultsFormat::Json, data));

@ -3,4 +3,4 @@ use libfuzzer_sys::fuzz_target;
use oxigraph_fuzz::result_format::fuzz_result_format;
use sparesults::QueryResultsFormat;
fuzz_target!(|data: &[u8]| { fuzz_result_format(QueryResultsFormat::Tsv, data) });
fuzz_target!(|data: &[u8]| fuzz_result_format(QueryResultsFormat::Tsv, data));

@ -3,4 +3,4 @@ use libfuzzer_sys::fuzz_target;
use oxigraph_fuzz::result_format::fuzz_result_format;
use sparesults::QueryResultsFormat;
fuzz_target!(|data: &[u8]| { fuzz_result_format(QueryResultsFormat::Xml, data) });
fuzz_target!(|data: &[u8]| fuzz_result_format(QueryResultsFormat::Xml, data));

@ -4,5 +4,5 @@ use spargebra::Update;
use std::str;
fuzz_target!(|data: &str| {
Update::parse(data, None);
let _ = Update::parse(data, None);
});

@ -0,0 +1,167 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use oxrdf::{Dataset, GraphName, Quad, Subject, Term, Triple};
use oxttl::{TriGParser, TriGSerializer};
fn parse<'a>(
chunks: impl IntoIterator<Item = &'a [u8]>,
unchecked: bool,
) -> (Vec<Quad>, Vec<String>, Vec<(String, String)>) {
let mut quads = Vec::new();
let mut errors = Vec::new();
let mut parser = TriGParser::new()
.with_quoted_triples()
.with_base_iri("http://example.com/")
.unwrap();
if unchecked {
parser = parser.unchecked();
}
let mut reader = parser.parse();
for chunk in chunks {
reader.extend_from_slice(chunk);
while let Some(result) = reader.read_next() {
match result {
Ok(quad) => quads.push(quad),
Err(error) => errors.push(error.to_string()),
}
}
}
reader.end();
while let Some(result) = reader.read_next() {
match result {
Ok(quad) => quads.push(quad),
Err(error) => errors.push(error.to_string()),
}
}
assert!(reader.is_end());
(
quads,
errors,
reader
.prefixes()
.map(|(k, v)| (k.to_owned(), v.to_owned()))
.collect(),
)
}
fn count_triple_blank_nodes(triple: &Triple) -> usize {
(match &triple.subject {
Subject::BlankNode(_) => 1,
Subject::Triple(t) => count_triple_blank_nodes(t),
_ => 0,
}) + (match &triple.object {
Term::BlankNode(_) => 1,
Term::Triple(t) => count_triple_blank_nodes(t),
_ => 0,
})
}
fn count_quad_blank_nodes(quad: &Quad) -> usize {
(match &quad.subject {
Subject::BlankNode(_) => 1,
Subject::Triple(t) => count_triple_blank_nodes(t),
_ => 0,
}) + (match &quad.object {
Term::BlankNode(_) => 1,
Term::Triple(t) => count_triple_blank_nodes(t),
_ => 0,
}) + usize::from(matches!(quad.graph_name, GraphName::BlankNode(_)))
}
fn serialize_quads(quads: &[Quad], prefixes: Vec<(String, String)>) -> Vec<u8> {
let mut serializer = TriGSerializer::new();
for (prefix_name, prefix_iri) in prefixes {
serializer = serializer.with_prefix(prefix_name, prefix_iri).unwrap();
}
let mut writer = serializer.serialize_to_write(Vec::new());
for quad in quads {
writer.write_quad(quad).unwrap();
}
writer.finish().unwrap()
}
fuzz_target!(|data: &[u8]| {
// We parse with splitting
let (quads, errors, prefixes) = parse(data.split(|c| *c == 0xFF), false);
// We parse without splitting
let (quads_without_split, errors_without_split, _) = parse(
[data
.iter()
.copied()
.filter(|c| *c != 0xFF)
.collect::<Vec<_>>()
.as_slice()],
false,
);
let (quads_unchecked, errors_unchecked, _) = parse(data.split(|c| *c == 0xFF), true);
if errors.is_empty() {
assert!(errors_unchecked.is_empty());
}
let bnodes_count = quads.iter().map(count_quad_blank_nodes).sum::<usize>();
if bnodes_count == 0 {
assert_eq!(
quads,
quads_without_split,
"With split:\n{}\nWithout split:\n{}",
String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())),
String::from_utf8_lossy(&serialize_quads(&quads_without_split, Vec::new()))
);
if errors.is_empty() {
assert_eq!(
quads,
quads_unchecked,
"Validating:\n{}\nUnchecked:\n{}",
String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())),
String::from_utf8_lossy(&serialize_quads(&quads_unchecked, Vec::new()))
);
}
} else if bnodes_count <= 4 {
let mut dataset_with_split = quads.iter().collect::<Dataset>();
let mut dataset_without_split = quads_without_split.iter().collect::<Dataset>();
dataset_with_split.canonicalize();
dataset_without_split.canonicalize();
assert_eq!(
dataset_with_split,
dataset_without_split,
"With split:\n{}\nWithout split:\n{}",
String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())),
String::from_utf8_lossy(&serialize_quads(&quads_without_split, Vec::new()))
);
if errors.is_empty() {
if errors.is_empty() {
let mut dataset_unchecked = quads_unchecked.iter().collect::<Dataset>();
dataset_unchecked.canonicalize();
assert_eq!(
dataset_with_split,
dataset_unchecked,
"Validating:\n{}\nUnchecked:\n{}",
String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())),
String::from_utf8_lossy(&serialize_quads(&quads_unchecked, Vec::new()))
);
}
}
}
assert_eq!(errors, errors_without_split);
// We serialize
let new_serialization = serialize_quads(&quads, prefixes);
// We parse the serialization
let new_quads = TriGParser::new()
.with_quoted_triples()
.parse_read(new_serialization.as_slice())
.collect::<Result<Vec<_>, _>>()
.map_err(|e| {
format!(
"Error on {:?} from {quads:?} based on {:?}: {e}",
String::from_utf8_lossy(&new_serialization),
String::from_utf8_lossy(data)
)
})
.unwrap();
// We check the roundtrip has not changed anything
assert_eq!(new_quads, quads);
});

@ -1,24 +1,24 @@
use anyhow::Context;
use sparesults::{
QueryResultsFormat, QueryResultsParser, QueryResultsReader, QueryResultsSerializer,
FromReadQueryResultsReader, QueryResultsFormat, QueryResultsParser, QueryResultsSerializer,
};
pub fn fuzz_result_format(format: QueryResultsFormat, data: &[u8]) {
let parser = QueryResultsParser::from_format(format);
let serializer = QueryResultsSerializer::from_format(format);
let Ok(reader) = parser.read_results(data) else {
let Ok(reader) = parser.parse_read(data) else {
return;
};
match reader {
QueryResultsReader::Solutions(solutions) => {
FromReadQueryResultsReader::Solutions(solutions) => {
let Ok(solutions) = solutions.collect::<Result<Vec<_>, _>>() else {
return;
};
// We try to write again
let mut writer = serializer
.solutions_writer(
.serialize_solutions_to_write(
Vec::new(),
solutions
.get(0)
@ -31,30 +31,30 @@ pub fn fuzz_result_format(format: QueryResultsFormat, data: &[u8]) {
let serialized = String::from_utf8(writer.finish().unwrap()).unwrap();
// And to parse again
if let QueryResultsReader::Solutions(roundtrip_solutions) = parser
.read_results(serialized.as_bytes())
.with_context(|| format!("Parsing {:?}", &serialized))
if let FromReadQueryResultsReader::Solutions(roundtrip_solutions) = parser
.parse_read(serialized.as_bytes())
.with_context(|| format!("Parsing {serialized:?}"))
.unwrap()
{
assert_eq!(
roundtrip_solutions
.collect::<Result<Vec<_>, _>>()
.with_context(|| format!("Parsing {:?}", &serialized))
.with_context(|| format!("Parsing {serialized:?}"))
.unwrap(),
solutions
)
}
}
QueryResultsReader::Boolean(value) => {
FromReadQueryResultsReader::Boolean(value) => {
// We try to write again
let mut serialized = Vec::new();
serializer
.write_boolean_result(&mut serialized, value)
.serialize_boolean_to_write(&mut serialized, value)
.unwrap();
// And to parse again
if let QueryResultsReader::Boolean(roundtrip_value) =
parser.read_results(serialized.as_slice()).unwrap()
if let FromReadQueryResultsReader::Boolean(roundtrip_value) =
parser.parse_read(serialized.as_slice()).unwrap()
{
assert_eq!(roundtrip_value, value)
}

@ -1,20 +1,26 @@
[package]
name = "oxigraph_js"
version = "0.3.20"
authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license = "MIT OR Apache-2.0"
name = "oxigraph-js"
version.workspace = true
authors.workspace = true
license.workspace = true
readme = "README.md"
keywords = ["RDF", "N-Triples", "Turtle", "RDF/XML", "SPARQL"]
keywords = ["RDF", "N-Triples", "Turtle", "XML", "SPARQL"]
repository = "https://github.com/oxigraph/oxigraph/tree/main/js"
description = "JavaScript bindings of Oxigraph"
edition = "2021"
edition.workspace = true
rust-version.workspace = true
publish = false
[lib]
crate-type = ["cdylib"]
name = "oxigraph"
doc = false
[dependencies]
oxigraph = { version = "0.3.20", path="../lib" }
wasm-bindgen = "0.2"
js-sys = "0.3"
console_error_panic_hook = "0.1"
console_error_panic_hook.workspace = true
js-sys.workspace = true
oxigraph = { workspace = true, features = ["js"] }
wasm-bindgen.workspace = true
[lints]
workspace = true

@ -3,7 +3,7 @@ Oxigraph for JavaScript
[![npm](https://img.shields.io/npm/v/oxigraph)](https://www.npmjs.com/package/oxigraph)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community)
This package provides a JavaScript API on top of [Oxigraph](https://crates.io/crates/oxigraph), compiled with WebAssembly.
@ -197,40 +197,42 @@ Example of update:
store.update("DELETE WHERE { <http://example.com/s> ?p ?o }")
```
#### `Store.prototype.load(String data, String mimeType, NamedNode|String? baseIRI, NamedNode|BlankNode|DefaultGraph? toNamedGraph)`
#### `Store.prototype.load(String data, String format, NamedNode|String? baseIRI, NamedNode|BlankNode|DefaultGraph? toNamedGraph)`
Loads serialized RDF triples or quad into the store.
The method arguments are:
1. `data`: the serialized RDF triples or quads.
2. `mimeType`: the MIME type of the serialization. See below for the supported mime types.
2. `format`: the format of the serialization. See below for the supported formats.
3. `baseIRI`: the base IRI to use to resolve the relative IRIs in the serialization.
4. `toNamedGraph`: for triple serialization formats, the name of the named graph the triple should be loaded to.
The available formats are:
* [Turtle](https://www.w3.org/TR/turtle/): `text/turtle`
* [TriG](https://www.w3.org/TR/trig/): `application/trig`
* [N-Triples](https://www.w3.org/TR/n-triples/): `application/n-triples`
* [N-Quads](https://www.w3.org/TR/n-quads/): `application/n-quads`
* [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/): `application/rdf+xml`
* [Turtle](https://www.w3.org/TR/turtle/): `text/turtle` or `ttl`
* [TriG](https://www.w3.org/TR/trig/): `application/trig` or `trig`
* [N-Triples](https://www.w3.org/TR/n-triples/): `application/n-triples` or `nt`
* [N-Quads](https://www.w3.org/TR/n-quads/): `application/n-quads` or `nq`
* [N3](https://w3c.github.io/N3/spec/): `text/n3` or `n3`
* [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/): `application/rdf+xml` or `rdf`
Example of loading a Turtle file into the named graph `<http://example.com/graph>` with the base IRI `http://example.com`:
```js
store.load("<http://example.com> <http://example.com> <> .", "text/turtle", "http://example.com", oxigraph.namedNode("http://example.com/graph"));
```
#### `Store.prototype.dump(String mimeType, NamedNode|BlankNode|DefaultGraph? fromNamedGraph)`
#### `Store.prototype.dump(String format, NamedNode|BlankNode|DefaultGraph? fromNamedGraph)`
Returns serialized RDF triples or quad from the store.
The method arguments are:
1. `mimeType`: the MIME type of the serialization. See below for the supported mime types.
1. `format`: the format type of the serialization. See below for the supported types.
2. `fromNamedGraph`: for triple serialization formats, the name of the named graph the triple should be loaded from.
The available formats are:
* [Turtle](https://www.w3.org/TR/turtle/): `text/turtle`
* [TriG](https://www.w3.org/TR/trig/): `application/trig`
* [N-Triples](https://www.w3.org/TR/n-triples/): `application/n-triples`
* [N-Quads](https://www.w3.org/TR/n-quads/): `application/n-quads`
* [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/): `application/rdf+xml`
* [Turtle](https://www.w3.org/TR/turtle/): `text/turtle` or `ttl`
* [TriG](https://www.w3.org/TR/trig/): `application/trig` or `trig`
* [N-Triples](https://www.w3.org/TR/n-triples/): `application/n-triples` or `nt`
* [N-Quads](https://www.w3.org/TR/n-quads/): `application/n-quads` or `nq`
* [N3](https://w3c.github.io/N3/spec/): `text/n3` or `n3`
* [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/): `application/rdf+xml` or `rdf`
Example of building a Turtle file from the named graph `<http://example.com/graph>`:
```js
@ -260,7 +262,7 @@ To setup a dev environment:
Testing and linting:
- Rust code is formatted with [rustfmt](https://github.com/rust-lang/rustfmt) and linted with [clippy](https://github.com/rust-lang/rust-clippy).
You can execute them with `cargo fmt` and `cargo clippy`.
- JS code is formatted and linted with [Rome](https://rome.tools/). `npm run fmt` to auto-format and `npm test` to lint and test.
- JS code is formatted and linted with [Biome](https://biomejs.dev/). `npm run fmt` to auto-format and `npm test` to lint and test.
- Tests are written in JavaScript using [Mocha](https://mochajs.org/) in the `test` directory. `npm test` to run them.

@ -1,7 +1,8 @@
{
"$schema": "https://biomejs.dev/schemas/1.0.0/schema.json",
"formatter": {
"indentStyle": "space",
"indentSize": 4,
"indentWidth": 4,
"lineWidth": 100
},
"linter": {

@ -1,16 +1,6 @@
#! /usr/bin/env node
const fs = require("fs");
// We copy file to the new directory
fs.mkdirSync("pkg");
for (const file of fs.readdirSync("./pkg-web")) {
fs.copyFileSync(`./pkg-web/${file}`, `./pkg/${file}`);
}
for (const file of fs.readdirSync("./pkg-node")) {
fs.copyFileSync(`./pkg-node/${file}`, `./pkg/${file}`);
}
const pkg = JSON.parse(fs.readFileSync("./pkg/package.json"));
pkg.name = "oxigraph";
pkg.main = "node.js";

1079
js/package-lock.json generated

File diff suppressed because it is too large Load Diff

@ -3,14 +3,14 @@
"description": "Oxigraph JS build and tests",
"private": true,
"devDependencies": {
"@biomejs/biome": "^1.0.0",
"@rdfjs/data-model": "^2.0.1",
"mocha": "^10.0.0",
"rome": "^12.0.0"
"mocha": "^10.0.0"
},
"scripts": {
"fmt": "rome format . --write && rome check . --apply-unsafe",
"test": "rome ci . && wasm-pack build --debug --target nodejs && mocha",
"build": "rm -rf pkg && wasm-pack build --release --target web --out-name web && mv pkg pkg-web && wasm-pack build --release --target nodejs --out-name node && mv pkg pkg-node && node build_package.js && rm -r pkg-web && rm -r pkg-node",
"fmt": "biome format . --write && biome check . --apply-unsafe && biome format . --write",
"test": "biome ci . && wasm-pack build --debug --target nodejs && mocha",
"build": "wasm-pack build --release --target web --out-name web && wasm-pack build --release --target nodejs --out-name node && node build_package.js",
"release": "npm run build && npm publish ./pkg",
"pack": "npm run build && npm pack ./pkg"
},

@ -1,4 +1,4 @@
#![allow(clippy::unused_unit)]
#![allow(clippy::mem_forget)]
use wasm_bindgen::prelude::*;
mod model;

@ -1,9 +1,4 @@
#![allow(
dead_code,
clippy::inherent_to_string,
clippy::unused_self,
clippy::use_self
)]
#![allow(dead_code, clippy::inherent_to_string, clippy::unused_self)]
use crate::format_err;
use crate::utils::to_err;
@ -56,7 +51,7 @@ pub fn literal(
#[wasm_bindgen(js_name = defaultGraph)]
pub fn default_graph() -> JsDefaultGraph {
JsDefaultGraph {}
JsDefaultGraph
}
#[wasm_bindgen(js_name = variable)]
@ -302,7 +297,7 @@ impl From<JsLiteral> for Term {
#[wasm_bindgen(js_name = DefaultGraph)]
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct JsDefaultGraph {}
pub struct JsDefaultGraph;
#[wasm_bindgen(js_class = DefaultGraph)]
impl JsDefaultGraph {
@ -313,7 +308,7 @@ impl JsDefaultGraph {
#[wasm_bindgen(getter)]
pub fn value(&self) -> String {
"".to_owned()
String::new()
}
#[wasm_bindgen(js_name = toString)]
@ -393,7 +388,7 @@ impl JsQuad {
#[wasm_bindgen(getter)]
pub fn value(&self) -> String {
"".to_owned()
String::new()
}
#[wasm_bindgen(getter = subject)]
@ -532,7 +527,7 @@ impl From<GraphName> for JsTerm {
match name {
GraphName::NamedNode(node) => node.into(),
GraphName::BlankNode(node) => node.into(),
GraphName::DefaultGraph => Self::DefaultGraph(JsDefaultGraph {}),
GraphName::DefaultGraph => Self::DefaultGraph(JsDefaultGraph),
}
}
}
@ -564,7 +559,7 @@ impl From<Quad> for JsTerm {
impl TryFrom<JsTerm> for NamedNode {
type Error = JsValue;
fn try_from(value: JsTerm) -> Result<Self, JsValue> {
fn try_from(value: JsTerm) -> Result<Self, Self::Error> {
match value {
JsTerm::NamedNode(node) => Ok(node.into()),
JsTerm::BlankNode(node) => Err(format_err!(
@ -588,7 +583,7 @@ impl TryFrom<JsTerm> for NamedNode {
impl TryFrom<JsTerm> for NamedOrBlankNode {
type Error = JsValue;
fn try_from(value: JsTerm) -> Result<Self, JsValue> {
fn try_from(value: JsTerm) -> Result<Self, Self::Error> {
match value {
JsTerm::NamedNode(node) => Ok(node.into()),
JsTerm::BlankNode(node) => Ok(node.into()),
@ -614,7 +609,7 @@ impl TryFrom<JsTerm> for NamedOrBlankNode {
impl TryFrom<JsTerm> for Subject {
type Error = JsValue;
fn try_from(value: JsTerm) -> Result<Self, JsValue> {
fn try_from(value: JsTerm) -> Result<Self, Self::Error> {
match value {
JsTerm::NamedNode(node) => Ok(node.into()),
JsTerm::BlankNode(node) => Ok(node.into()),
@ -637,7 +632,7 @@ impl TryFrom<JsTerm> for Subject {
impl TryFrom<JsTerm> for Term {
type Error = JsValue;
fn try_from(value: JsTerm) -> Result<Self, JsValue> {
fn try_from(value: JsTerm) -> Result<Self, Self::Error> {
match value {
JsTerm::NamedNode(node) => Ok(node.into()),
JsTerm::BlankNode(node) => Ok(node.into()),
@ -657,7 +652,7 @@ impl TryFrom<JsTerm> for Term {
impl TryFrom<JsTerm> for GraphName {
type Error = JsValue;
fn try_from(value: JsTerm) -> Result<Self, JsValue> {
fn try_from(value: JsTerm) -> Result<Self, Self::Error> {
match value {
JsTerm::NamedNode(node) => Ok(node.into()),
JsTerm::BlankNode(node) => Ok(node.into()),
@ -744,7 +739,7 @@ impl FromJsConverter {
))
}
}
"DefaultGraph" => Ok(JsTerm::DefaultGraph(JsDefaultGraph {})),
"DefaultGraph" => Ok(JsTerm::DefaultGraph(JsDefaultGraph)),
"Variable" => Ok(Variable::new(
Reflect::get(value, &self.value)?
.as_string()
@ -754,8 +749,7 @@ impl FromJsConverter {
.into()),
"Quad" => Ok(self.to_quad(value)?.into()),
_ => Err(format_err!(
"The termType {} is not supported by Oxigraph",
term_type
"The termType {term_type} is not supported by Oxigraph"
)),
}
} else if term_type.is_undefined() {

@ -1,14 +1,11 @@
#![allow(clippy::use_self)]
use crate::format_err;
use crate::model::*;
use crate::utils::to_err;
use js_sys::{Array, Map};
use oxigraph::io::{DatasetFormat, GraphFormat};
use oxigraph::io::{RdfFormat, RdfParser};
use oxigraph::model::*;
use oxigraph::sparql::QueryResults;
use oxigraph::store::Store;
use std::io::Cursor;
use wasm_bindgen::prelude::*;
#[wasm_bindgen(js_name = Store)]
@ -27,7 +24,7 @@ impl JsStore {
store: Store::new().map_err(to_err)?,
};
if let Some(quads) = quads {
for quad in quads.iter() {
for quad in &*quads {
store.add(quad)?;
}
}
@ -145,10 +142,11 @@ impl JsStore {
pub fn load(
&self,
data: &str,
mime_type: &str,
format: &str,
base_iri: &JsValue,
to_graph_name: &JsValue,
) -> Result<(), JsValue> {
let format = rdf_format(format)?;
let base_iri = if base_iri.is_null() || base_iri.is_undefined() {
None
} else if base_iri.is_string() {
@ -161,65 +159,41 @@ impl JsStore {
));
};
let to_graph_name =
if let Some(graph_name) = FROM_JS.with(|c| c.to_optional_term(to_graph_name))? {
Some(graph_name.try_into()?)
} else {
None
};
if let Some(graph_format) = GraphFormat::from_media_type(mime_type) {
self.store
.load_graph(
Cursor::new(data),
graph_format,
&to_graph_name.unwrap_or(GraphName::DefaultGraph),
base_iri.as_deref(),
)
.map_err(to_err)
} else if let Some(dataset_format) = DatasetFormat::from_media_type(mime_type) {
if to_graph_name.is_some() {
return Err(format_err!(
"The target graph name parameter is not available for dataset formats"
));
let mut parser = RdfParser::from_format(format);
if let Some(to_graph_name) = FROM_JS.with(|c| c.to_optional_term(to_graph_name))? {
parser = parser.with_default_graph(GraphName::try_from(to_graph_name)?);
}
if let Some(base_iri) = base_iri {
parser = parser.with_base_iri(base_iri).map_err(to_err)?;
}
self.store
.load_dataset(Cursor::new(data), dataset_format, base_iri.as_deref())
.load_from_read(parser, data.as_bytes())
.map_err(to_err)
} else {
Err(format_err!("Not supported MIME type: {}", mime_type))
}
}
pub fn dump(&self, mime_type: &str, from_graph_name: &JsValue) -> Result<String, JsValue> {
let from_graph_name =
if let Some(graph_name) = FROM_JS.with(|c| c.to_optional_term(from_graph_name))? {
Some(graph_name.try_into()?)
} else {
None
};
let mut buffer = Vec::new();
if let Some(graph_format) = GraphFormat::from_media_type(mime_type) {
self.store
.dump_graph(
&mut buffer,
graph_format,
&from_graph_name.unwrap_or(GraphName::DefaultGraph),
pub fn dump(&self, format: &str, from_graph_name: &JsValue) -> Result<String, JsValue> {
let format = rdf_format(format)?;
let buffer =
if let Some(from_graph_name) = FROM_JS.with(|c| c.to_optional_term(from_graph_name))? {
self.store.dump_graph_to_write(
&GraphName::try_from(from_graph_name)?,
format,
Vec::new(),
)
.map_err(to_err)?;
} else if let Some(dataset_format) = DatasetFormat::from_media_type(mime_type) {
if from_graph_name.is_some() {
return Err(format_err!(
"The target graph name parameter is not available for dataset formats"
));
}
self.store
.dump_dataset(&mut buffer, dataset_format)
.map_err(to_err)?;
} else {
return Err(format_err!("Not supported MIME type: {}", mime_type));
self.store.dump_to_write(format, Vec::new())
}
.map_err(to_err)?;
String::from_utf8(buffer).map_err(to_err)
}
}
fn rdf_format(format: &str) -> Result<RdfFormat, JsValue> {
if format.contains('/') {
RdfFormat::from_media_type(format)
.ok_or_else(|| format_err!("Not supported RDF format media type: {format}"))
} else {
RdfFormat::from_extension(format)
.ok_or_else(|| format_err!("Not supported RDF format extension: {format}"))
}
}

@ -1,37 +1,37 @@
/* global describe, it */
import assert from "assert";
import runTests from "../node_modules/@rdfjs/data-model/test/index.js";
import oxigraph from "../pkg/oxigraph.js";
import assert from "assert";
runTests({ factory: oxigraph });
describe("DataModel", function () {
describe("#toString()", function () {
it("namedNode().toString() should return SPARQL compatible syntax", function () {
describe("DataModel", () => {
describe("#toString()", () => {
it("namedNode().toString() should return SPARQL compatible syntax", () => {
assert.strictEqual(
"<http://example.com>",
oxigraph.namedNode("http://example.com").toString(),
);
});
it("blankNode().toString() should return SPARQL compatible syntax", function () {
it("blankNode().toString() should return SPARQL compatible syntax", () => {
assert.strictEqual("_:a", oxigraph.blankNode("a").toString());
});
it("literal().toString() should return SPARQL compatible syntax", function () {
it("literal().toString() should return SPARQL compatible syntax", () => {
assert.strictEqual('"a\\"b"@en', oxigraph.literal('a"b', "en").toString());
});
it("defaultGraph().toString() should return SPARQL compatible syntax", function () {
it("defaultGraph().toString() should return SPARQL compatible syntax", () => {
assert.strictEqual("DEFAULT", oxigraph.defaultGraph().toString());
});
it("variable().toString() should return SPARQL compatible syntax", function () {
it("variable().toString() should return SPARQL compatible syntax", () => {
assert.strictEqual("?a", oxigraph.variable("a").toString());
});
it("quad().toString() should return SPARQL compatible syntax", function () {
it("quad().toString() should return SPARQL compatible syntax", () => {
assert.strictEqual(
"<http://example.com/s> <http://example.com/p> <<<http://example.com/s1> <http://example.com/p1> <http://example.com/o1>>> <http://example.com/g>",
oxigraph

@ -1,8 +1,8 @@
/* global describe, it */
import { Store } from "../pkg/oxigraph.js";
import dataModel from "@rdfjs/data-model";
import assert from "assert";
import dataModel from "@rdfjs/data-model";
import { Store } from "../pkg/oxigraph.js";
const ex = dataModel.namedNode("http://example.com");
const triple = dataModel.quad(
@ -11,17 +11,17 @@ const triple = dataModel.quad(
dataModel.literal("o"),
);
describe("Store", function () {
describe("#add()", function () {
it("an added quad should be in the store", function () {
describe("Store", () => {
describe("#add()", () => {
it("an added quad should be in the store", () => {
const store = new Store();
store.add(dataModel.quad(ex, ex, triple));
assert(store.has(dataModel.quad(ex, ex, triple)));
});
});
describe("#delete()", function () {
it("an removed quad should not be in the store anymore", function () {
describe("#delete()", () => {
it("an removed quad should not be in the store anymore", () => {
const store = new Store([dataModel.quad(triple, ex, ex)]);
assert(store.has(dataModel.quad(triple, ex, ex)));
store.delete(dataModel.quad(triple, ex, ex));
@ -29,22 +29,22 @@ describe("Store", function () {
});
});
describe("#has()", function () {
it("an added quad should be in the store", function () {
describe("#has()", () => {
it("an added quad should be in the store", () => {
const store = new Store([dataModel.quad(ex, ex, ex)]);
assert(store.has(dataModel.quad(ex, ex, ex)));
});
});
describe("#size()", function () {
it("A store with one quad should have 1 for size", function () {
describe("#size()", () => {
it("A store with one quad should have 1 for size", () => {
const store = new Store([dataModel.quad(ex, ex, ex)]);
assert.strictEqual(1, store.size);
});
});
describe("#match_quads()", function () {
it("blank pattern should return all quads", function () {
describe("#match_quads()", () => {
it("blank pattern should return all quads", () => {
const store = new Store([dataModel.quad(ex, ex, ex)]);
const results = store.match();
assert.strictEqual(1, results.length);
@ -52,32 +52,32 @@ describe("Store", function () {
});
});
describe("#query()", function () {
it("ASK true", function () {
describe("#query()", () => {
it("ASK true", () => {
const store = new Store([dataModel.quad(ex, ex, ex)]);
assert.strictEqual(true, store.query("ASK { ?s ?s ?s }"));
});
it("ASK false", function () {
it("ASK false", () => {
const store = new Store();
assert.strictEqual(false, store.query("ASK { FILTER(false)}"));
});
it("CONSTRUCT", function () {
it("CONSTRUCT", () => {
const store = new Store([dataModel.quad(ex, ex, ex)]);
const results = store.query("CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }");
assert.strictEqual(1, results.length);
assert(dataModel.quad(ex, ex, ex).equals(results[0]));
});
it("SELECT", function () {
it("SELECT", () => {
const store = new Store([dataModel.quad(ex, ex, ex)]);
const results = store.query("SELECT ?s WHERE { ?s ?p ?o }");
assert.strictEqual(1, results.length);
assert(ex.equals(results[0].get("s")));
});
it("SELECT with NOW()", function () {
it("SELECT with NOW()", () => {
const store = new Store([dataModel.quad(ex, ex, ex)]);
const results = store.query(
"SELECT * WHERE { FILTER(2022 <= YEAR(NOW()) && YEAR(NOW()) <= 2100) }",
@ -85,15 +85,15 @@ describe("Store", function () {
assert.strictEqual(1, results.length);
});
it("SELECT with RAND()", function () {
it("SELECT with RAND()", () => {
const store = new Store([dataModel.quad(ex, ex, ex)]);
const results = store.query("SELECT (RAND() AS ?y) WHERE {}");
assert.strictEqual(1, results.length);
});
});
describe("#update()", function () {
it("INSERT DATA", function () {
describe("#update()", () => {
it("INSERT DATA", () => {
const store = new Store();
store.update(
"INSERT DATA { <http://example.com> <http://example.com> <http://example.com> }",
@ -101,7 +101,7 @@ describe("Store", function () {
assert.strictEqual(1, store.size);
});
it("DELETE DATA", function () {
it("DELETE DATA", () => {
const store = new Store([dataModel.quad(ex, ex, ex)]);
store.update(
"DELETE DATA { <http://example.com> <http://example.com> <http://example.com> }",
@ -109,15 +109,15 @@ describe("Store", function () {
assert.strictEqual(0, store.size);
});
it("DELETE WHERE", function () {
it("DELETE WHERE", () => {
const store = new Store([dataModel.quad(ex, ex, ex)]);
store.update("DELETE WHERE { ?v ?v ?v }");
assert.strictEqual(0, store.size);
});
});
describe("#load()", function () {
it("load NTriples in the default graph", function () {
describe("#load()", () => {
it("load NTriples in the default graph", () => {
const store = new Store();
store.load(
"<http://example.com> <http://example.com> <http://example.com> .",
@ -126,7 +126,7 @@ describe("Store", function () {
assert(store.has(dataModel.quad(ex, ex, ex)));
});
it("load NTriples in an other graph", function () {
it("load NTriples in an other graph", () => {
const store = new Store();
store.load(
"<http://example.com> <http://example.com> <http://example.com> .",
@ -137,7 +137,7 @@ describe("Store", function () {
assert(store.has(dataModel.quad(ex, ex, ex, ex)));
});
it("load Turtle with a base IRI", function () {
it("load Turtle with a base IRI", () => {
const store = new Store();
store.load(
"<http://example.com> <http://example.com> <> .",
@ -147,7 +147,7 @@ describe("Store", function () {
assert(store.has(dataModel.quad(ex, ex, ex)));
});
it("load NQuads", function () {
it("load NQuads", () => {
const store = new Store();
store.load(
"<http://example.com> <http://example.com> <http://example.com> <http://example.com> .",
@ -156,7 +156,7 @@ describe("Store", function () {
assert(store.has(dataModel.quad(ex, ex, ex, ex)));
});
it("load TriG with a base IRI", function () {
it("load TriG with a base IRI", () => {
const store = new Store();
store.load(
"GRAPH <> { <http://example.com> <http://example.com> <> }",
@ -167,8 +167,8 @@ describe("Store", function () {
});
});
describe("#dump()", function () {
it("dump dataset content", function () {
describe("#dump()", () => {
it("dump dataset content", () => {
const store = new Store([dataModel.quad(ex, ex, ex, ex)]);
assert.strictEqual(
"<http://example.com> <http://example.com> <http://example.com> <http://example.com> .\n",
@ -176,7 +176,7 @@ describe("Store", function () {
);
});
it("dump named graph content", function () {
it("dump named graph content", () => {
const store = new Store([dataModel.quad(ex, ex, ex, ex)]);
assert.strictEqual(
"<http://example.com> <http://example.com> <http://example.com> .\n",
@ -184,9 +184,9 @@ describe("Store", function () {
);
});
it("dump default graph content", function () {
it("dump default graph content", () => {
const store = new Store([dataModel.quad(ex, ex, ex, ex)]);
assert.strictEqual("", store.dump("application/n-triples"));
assert.strictEqual("", store.dump("application/n-triples", dataModel.defaultGraph()));
});
});
});

@ -1,65 +0,0 @@
[package]
name = "oxigraph"
version = "0.3.20"
authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license = "MIT OR Apache-2.0"
readme = "README.md"
keywords = ["RDF", "SPARQL", "graph-database", "database"]
categories = ["database-implementations"]
repository = "https://github.com/oxigraph/oxigraph/tree/main/lib"
homepage = "https://oxigraph.org/"
documentation = "https://docs.rs/oxigraph"
description = """
a SPARQL database and RDF toolkit
"""
edition = "2021"
rust-version = "1.60"
[package.metadata.docs.rs]
all-features = true
[features]
default = []
http_client = ["oxhttp", "oxhttp/rustls"]
rocksdb_debug = []
[dependencies]
rand = "0.8"
md-5 = "0.10"
sha-1 = "0.10"
sha2 = "0.10"
digest = "0.10"
regex = "1"
oxilangtag = "0.1"
oxiri = "0.2"
rio_api = "0.8"
rio_turtle = "0.8"
rio_xml = "0.8"
hex = "0.4"
siphasher = "0.3"
lazy_static = "1"
json-event-parser = "0.1"
oxrdf = { version = "0.1.7", path="oxrdf", features = ["rdf-star", "oxsdatatypes"] }
oxsdatatypes = { version = "0.1.3", path="oxsdatatypes" }
spargebra = { version = "0.2.8", path="spargebra", features = ["rdf-star", "sep-0002", "sep-0006"] }
sparesults = { version = "0.1.8", path="sparesults", features = ["rdf-star"] }
gfa = "0.10.1"
handlegraph = { git = "https://github.com/chfi/rs-handlegraph", branch = "master" }
[target.'cfg(not(target_family = "wasm"))'.dependencies]
libc = "0.2"
oxrocksdb-sys = { version = "0.3.20", path="../oxrocksdb-sys" }
oxhttp = { version = "0.1", optional = true }
[target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies]
getrandom = { version = "0.2", features = ["js"] }
js-sys = "0.3"
[target.'cfg(not(target_family = "wasm"))'.dev-dependencies]
criterion = "0.4"
oxhttp = "0.1"
zstd = "0.12"
[[bench]]
name = "store"
harness = false

@ -1,72 +1,13 @@
Oxigraph
========
[![Latest Version](https://img.shields.io/crates/v/oxigraph.svg)](https://crates.io/crates/oxigraph)
[![Released API docs](https://docs.rs/oxigraph/badge.svg)](https://docs.rs/oxigraph)
[![Crates.io downloads](https://img.shields.io/crates/d/oxigraph)](https://crates.io/crates/oxigraph)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
Oxigraph is a graph database library implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard.
Its goal is to provide a compliant, safe and fast on-disk graph database.
It also provides a set of utility functions for reading, writing, and processing RDF files.
Oxigraph is in heavy development and SPARQL query evaluation has not been optimized yet.
Oxigraph also provides [a standalone HTTP server](https://crates.io/crates/oxigraph_server) and [a Python library](https://pyoxigraph.readthedocs.io/) based on this library.
Oxigraph implements the following specifications:
* [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/), [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/), and [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/).
* [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/), and [RDF XML](https://www.w3.org/TR/rdf-syntax-grammar/) RDF serialization formats for both data ingestion and retrieval using the [Rio library](https://github.com/oxigraph/rio).
* [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/).
A preliminary benchmark [is provided](../bench/README.md). Oxigraph internal design [is described on the wiki](https://github.com/oxigraph/oxigraph/wiki/Architecture).
The main entry point of Oxigraph is the [`Store`](store::Store) struct:
```rust
use oxigraph::store::Store;
use oxigraph::model::*;
use oxigraph::sparql::QueryResults;
let store = Store::new().unwrap();
// insertion
let ex = NamedNode::new("http://example.com").unwrap();
let quad = Quad::new(ex.clone(), ex.clone(), ex.clone(), GraphName::DefaultGraph);
store.insert(&quad).unwrap();
// quad filter
let results = store.quads_for_pattern(Some(ex.as_ref().into()), None, None, None).collect::<Result<Vec<Quad>,_>>().unwrap();
assert_eq!(vec![quad], results);
// SPARQL query
if let QueryResults::Solutions(mut solutions) = store.query("SELECT ?s WHERE { ?s ?p ?o }").unwrap() {
assert_eq!(solutions.next().unwrap().unwrap().get("s"), Some(&ex.into()));
}
```
Some parts of this library are available as standalone crates:
* [`oxrdf`](https://crates.io/crates/oxrdf) provides datastructures encoding RDF basic concepts (the `oxigraph::model` module).
* [`spargebra`](https://crates.io/crates/spargebra) provides a SPARQL parser.
* [`sparesults`](https://crates.io/crates/sparesults) provides parsers and serializers for SPARQL result formats.
To build the library, don't forget to clone the submodules using `git clone --recursive https://github.com/oxigraph/oxigraph.git` to clone the repository including submodules or `git submodule update --init` to add submodules to the already cloned repository.
## License
This project is licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or
`<http://www.apache.org/licenses/LICENSE-2.0>`)
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or
`<http://opensource.org/licenses/MIT>`)
at your option.
### Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.
Oxigraph Rust crates
====================
Oxigraph is implemented in Rust.
It is composed on a main library, [`oxigraph`](./oxigraph) and a set of smaller crates used by the `oxigraph` crate:
* [`oxrdf`](./oxrdf), datastructures encoding RDF basic concepts (the `model` module of the `oxigraph` crate).
* [`oxrdfio`](./oxrdfio), a unified parser and serializer API for RDF formats (the `io` module of the `oxigraph` crate). It itself relies on:
* [`oxttl`](./oxttl), N-Triple, N-Quad, Turtle, TriG and N3 parsing and serialization.
* [`oxrdfxml`](./oxrdfxml), RDF/XML parsing and serialization.
* [`spargebra`](./spargebra), a SPARQL parser.
* [`sparesults`](./sparesults), parsers and serializers for SPARQL result formats (the `sparql::results` module of the `oxigraph` crate).
* [`sparopt`](./sparesults), a SPARQL optimizer.
* [`oxsdatatypes`](./oxsdatatypes), an implementation of some XML Schema datatypes.

@ -0,0 +1,72 @@
[package]
name = "oxigraph"
version.workspace = true
authors.workspace = true
license.workspace = true
readme = "README.md"
keywords = ["RDF", "SPARQL", "graph-database", "database"]
categories = ["database-implementations"]
repository = "https://github.com/oxigraph/oxigraph/tree/main/lib/oxigraph"
homepage = "https://oxigraph.org/"
documentation = "https://docs.rs/oxigraph"
description = """
a SPARQL database and RDF toolkit
"""
edition.workspace = true
rust-version.workspace = true
[features]
default = []
js = ["getrandom/js", "oxsdatatypes/js", "js-sys"]
http-client = ["oxhttp"]
http-client-native-tls = ["http-client", "oxhttp/native-tls"]
http-client-rustls-webpki = ["http-client", "oxhttp/rustls-webpki"]
http-client-rustls-native = ["http-client", "oxhttp/rustls-native"]
rocksdb-pkg-config = ["oxrocksdb-sys/pkg-config"]
rocksdb-debug = []
[dependencies]
digest.workspace = true
hex.workspace = true
json-event-parser.workspace = true
md-5.workspace = true
oxilangtag.workspace = true
oxiri.workspace = true
oxrdf = { workspace = true, features = ["rdf-star", "oxsdatatypes"] }
oxrdfio = { workspace = true, features = ["rdf-star"] }
oxsdatatypes.workspace = true
rand.workspace = true
regex.workspace = true
sha1.workspace = true
sha2.workspace = true
siphasher.workspace = true
sparesults = { workspace = true, features = ["rdf-star"] }
spargebra = { workspace = true, features = ["rdf-star", "sep-0002", "sep-0006"] }
sparopt = { workspace = true, features = ["rdf-star", "sep-0002", "sep-0006"] }
thiserror.workspace = true
gfa = "0.10.1"
handlegraph = { git = "https://github.com/chfi/rs-handlegraph", branch = "master" }
[target.'cfg(not(target_family = "wasm"))'.dependencies]
libc.workspace = true
oxhttp = { workspace = true, optional = true }
oxrocksdb-sys.workspace = true
[target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies]
getrandom.workspace = true
js-sys = { workspace = true, optional = true }
[target.'cfg(not(target_family = "wasm"))'.dev-dependencies]
codspeed-criterion-compat.workspace = true
oxhttp.workspace = true
zstd.workspace = true
[lints]
workspace = true
[package.metadata.docs.rs]
rustdoc-args = ["--cfg", "docsrs"]
[[bench]]
name = "store"
harness = false

@ -0,0 +1,77 @@
Oxigraph
========
[![Latest Version](https://img.shields.io/crates/v/oxigraph.svg)](https://crates.io/crates/oxigraph)
[![Released API docs](https://docs.rs/oxigraph/badge.svg)](https://docs.rs/oxigraph)
[![Crates.io downloads](https://img.shields.io/crates/d/oxigraph)](https://crates.io/crates/oxigraph)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community)
Oxigraph is a graph database library implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard.
Its goal is to provide a compliant, safe and fast on-disk graph database.
It also provides a set of utility functions for reading, writing, and processing RDF files.
Oxigraph is in heavy development and SPARQL query evaluation has not been optimized yet.
Oxigraph also provides [a CLI tool](https://crates.io/crates/oxigraph-cli) and [a Python library](https://pyoxigraph.readthedocs.io/) based on this library.
Oxigraph implements the following specifications:
* [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/), [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/), and [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/).
* [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/), and [RDF XML](https://www.w3.org/TR/rdf-syntax-grammar/) RDF serialization formats for both data ingestion and retrieval using the [Rio library](https://github.com/oxigraph/rio).
* [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/).
A preliminary benchmark [is provided](../bench/README.md). Oxigraph internal design [is described on the wiki](https://github.com/oxigraph/oxigraph/wiki/Architecture).
The main entry point of Oxigraph is the [`Store`](store::Store) struct:
```rust
use oxigraph::store::Store;
use oxigraph::model::*;
use oxigraph::sparql::QueryResults;
let store = Store::new().unwrap();
// insertion
let ex = NamedNode::new("http://example.com").unwrap();
let quad = Quad::new(ex.clone(), ex.clone(), ex.clone(), GraphName::DefaultGraph);
store.insert(&quad).unwrap();
// quad filter
let results = store.quads_for_pattern(Some(ex.as_ref().into()), None, None, None).collect::<Result<Vec<Quad>,_>>().unwrap();
assert_eq!(vec![quad], results);
// SPARQL query
if let QueryResults::Solutions(mut solutions) = store.query("SELECT ?s WHERE { ?s ?p ?o }").unwrap() {
assert_eq!(solutions.next().unwrap().unwrap().get("s"), Some(&ex.into()));
}
```
It is based on these crates that can be used separately:
* [`oxrdf`](https://crates.io/crates/oxrdf), datastructures encoding RDF basic concepts (the [`oxigraph::model`](crate::model) module).
* [`oxrdfio`](https://crates.io/crates/oxrdfio), a unified parser and serializer API for RDF formats (the [`oxigraph::io`](crate::io) module). It itself relies on:
* [`oxttl`](https://crates.io/crates/oxttl), N-Triple, N-Quad, Turtle, TriG and N3 parsing and serialization.
* [`oxrdfxml`](https://crates.io/crates/oxrdfxml), RDF/XML parsing and serialization.
* [`spargebra`](https://crates.io/crates/spargebra), a SPARQL parser.
* [`sparesults`](https://crates.io/crates/sparesults), parsers and serializers for SPARQL result formats (the [`oxigraph::sparql::results`](crate::sparql::results) module).
* [`sparopt`](https://crates.io/crates/sparesults), a SPARQL optimizer.
* [`oxsdatatypes`](https://crates.io/crates/oxsdatatypes), an implementation of some XML Schema datatypes.
To build the library locally, don't forget to clone the submodules using `git clone --recursive https://github.com/oxigraph/oxigraph.git` to clone the repository including submodules or `git submodule update --init` to add submodules to the already cloned repository.
## License
This project is licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or
`<http://www.apache.org/licenses/LICENSE-2.0>`)
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or
`<http://opensource.org/licenses/MIT>`)
at your option.
### Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.

@ -1,22 +1,43 @@
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
#![allow(clippy::panic)]
use codspeed_criterion_compat::{criterion_group, criterion_main, Criterion, Throughput};
use oxhttp::model::{Method, Request, Status};
use oxigraph::io::GraphFormat;
use oxigraph::model::GraphNameRef;
use oxigraph::io::{RdfFormat, RdfParser};
use oxigraph::sparql::{Query, QueryResults, Update};
use oxigraph::store::Store;
use rand::random;
use std::env::temp_dir;
use std::fs::{remove_dir_all, File};
use std::io::{BufRead, BufReader, Cursor, Read};
use std::io::Read;
use std::path::{Path, PathBuf};
use std::str;
fn store_load(c: &mut Criterion) {
fn parse_nt(c: &mut Criterion) {
let data = read_data("explore-1000.nt.zst");
let mut group = c.benchmark_group("parse");
group.throughput(Throughput::Bytes(data.len() as u64));
group.sample_size(50);
group.bench_function("parse BSBM explore 1000", |b| {
b.iter(|| {
for r in RdfParser::from_format(RdfFormat::NTriples).parse_read(data.as_slice()) {
r.unwrap();
}
})
});
group.bench_function("parse BSBM explore 1000 unchecked", |b| {
b.iter(|| {
for r in RdfParser::from_format(RdfFormat::NTriples)
.unchecked()
.parse_read(data.as_slice())
{
let mut data = Vec::new();
read_data("explore-1000.nt.zst")
.read_to_end(&mut data)
.unwrap();
r.unwrap();
}
})
});
}
fn store_load(c: &mut Criterion) {
let data = read_data("explore-1000.nt.zst");
let mut group = c.benchmark_group("store load");
group.throughput(Throughput::Bytes(data.len() as u64));
group.sample_size(10);
@ -42,68 +63,26 @@ fn store_load(c: &mut Criterion) {
});
}
{
let mut data = Vec::new();
read_data("explore-10000.nt.zst")
.read_to_end(&mut data)
.unwrap();
let mut group = c.benchmark_group("store load large");
group.throughput(Throughput::Bytes(data.len() as u64));
group.sample_size(10);
group.bench_function("load BSBM explore 10000 in on disk with bulk load", |b| {
b.iter(|| {
let path = TempDir::default();
let store = Store::open(&path).unwrap();
do_bulk_load(&store, &data);
})
});
}
}
fn do_load(store: &Store, data: &[u8]) {
store
.load_graph(
Cursor::new(&data),
GraphFormat::NTriples,
GraphNameRef::DefaultGraph,
None,
)
.unwrap();
store.load_from_read(RdfFormat::NTriples, data).unwrap();
store.optimize().unwrap();
}
fn do_bulk_load(store: &Store, data: &[u8]) {
store
.bulk_loader()
.load_graph(
Cursor::new(&data),
GraphFormat::NTriples,
GraphNameRef::DefaultGraph,
None,
)
.load_from_read(RdfFormat::NTriples, data)
.unwrap();
store.optimize().unwrap();
}
fn store_query_and_update(c: &mut Criterion) {
let mut data = Vec::new();
read_data("explore-1000.nt.zst")
.read_to_end(&mut data)
.unwrap();
let operations = read_data("mix-exploreAndUpdate-1000.tsv.zst")
.lines()
.map(|l| {
let l = l.unwrap();
let mut parts = l.trim().split('\t');
let kind = parts.next().unwrap();
let operation = parts.next().unwrap();
match kind {
"query" => Operation::Query(Query::parse(operation, None).unwrap()),
"update" => Operation::Update(Update::parse(operation, None).unwrap()),
_ => panic!("Unexpected operation kind {kind}"),
}
let data = read_data("explore-1000.nt.zst");
let operations = bsbm_sparql_operation()
.into_iter()
.map(|op| match op {
RawOperation::Query(q) => Operation::Query(Query::parse(&q, None).unwrap()),
RawOperation::Update(q) => Operation::Update(Update::parse(&q, None).unwrap()),
})
.collect::<Vec<_>>();
let query_operations = operations
@ -162,26 +141,7 @@ fn run_operation(store: &Store, operations: &[Operation]) {
}
fn sparql_parsing(c: &mut Criterion) {
let mut data = Vec::new();
read_data("explore-1000.nt.zst")
.read_to_end(&mut data)
.unwrap();
let operations = read_data("mix-exploreAndUpdate-1000.tsv.zst")
.lines()
.map(|l| {
let l = l.unwrap();
let mut parts = l.trim().split('\t');
let kind = parts.next().unwrap();
let operation = parts.next().unwrap();
match kind {
"query" => RawOperation::Query(operation.to_owned()),
"update" => RawOperation::Update(operation.to_owned()),
_ => panic!("Unexpected operation kind {kind}"),
}
})
.collect::<Vec<_>>();
let operations = bsbm_sparql_operation();
let mut group = c.benchmark_group("sparql parsing");
group.sample_size(10);
group.throughput(Throughput::Bytes(
@ -209,14 +169,14 @@ fn sparql_parsing(c: &mut Criterion) {
});
}
criterion_group!(parse, parse_nt);
criterion_group!(store, sparql_parsing, store_query_and_update, store_load);
criterion_main!(store);
criterion_main!(parse, store);
fn read_data(file: &str) -> impl BufRead {
fn read_data(file: &str) -> Vec<u8> {
if !Path::new(file).exists() {
let mut client = oxhttp::Client::new();
client.set_redirection_limit(5);
let client = oxhttp::Client::new().with_redirection_limit(5);
let url = format!("https://github.com/Tpt/bsbm-tools/releases/download/v0.2/{file}");
let request = Request::builder(Method::GET, url.parse().unwrap()).build();
let response = client.request(request).unwrap();
@ -228,7 +188,31 @@ fn read_data(file: &str) -> impl BufRead {
);
std::io::copy(&mut response.into_body(), &mut File::create(file).unwrap()).unwrap();
}
BufReader::new(zstd::Decoder::new(File::open(file).unwrap()).unwrap())
let mut buf = Vec::new();
zstd::Decoder::new(File::open(file).unwrap())
.unwrap()
.read_to_end(&mut buf)
.unwrap();
buf
}
fn bsbm_sparql_operation() -> Vec<RawOperation> {
String::from_utf8(read_data("mix-exploreAndUpdate-1000.tsv.zst"))
.unwrap()
.lines()
.rev()
.take(300) // We take only 10 groups
.map(|l| {
let mut parts = l.trim().split('\t');
let kind = parts.next().unwrap();
let operation = parts.next().unwrap();
match kind {
"query" => RawOperation::Query(operation.into()),
"update" => RawOperation::Update(operation.into()),
_ => panic!("Unexpected operation kind {kind}"),
}
})
.collect()
}
#[derive(Clone)]

@ -1,8 +1,13 @@
#![allow(deprecated)]
use oxrdfio::{RdfFormat, RdfParser, RdfSerializer};
/// [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) serialization formats.
///
/// This enumeration is non exhaustive. New formats like JSON-LD will be added in the future.
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
#[non_exhaustive]
#[deprecated(note = "use RdfFormat instead", since = "0.4.0")]
pub enum GraphFormat {
/// [N-Triples](https://www.w3.org/TR/n-triples/)
NTriples,
@ -18,7 +23,10 @@ impl GraphFormat {
/// ```
/// use oxigraph::io::GraphFormat;
///
/// assert_eq!(GraphFormat::NTriples.iri(), "http://www.w3.org/ns/formats/N-Triples")
/// assert_eq!(
/// GraphFormat::NTriples.iri(),
/// "http://www.w3.org/ns/formats/N-Triples"
/// )
/// ```
#[inline]
pub fn iri(self) -> &'static str {
@ -60,6 +68,7 @@ impl GraphFormat {
Self::RdfXml => "rdf",
}
}
/// Looks for a known format from a media type.
///
/// It supports some media type aliases.
@ -69,7 +78,10 @@ impl GraphFormat {
/// ```
/// use oxigraph::io::GraphFormat;
///
/// assert_eq!(GraphFormat::from_media_type("text/turtle; charset=utf-8"), Some(GraphFormat::Turtle))
/// assert_eq!(
/// GraphFormat::from_media_type("text/turtle; charset=utf-8"),
/// Some(GraphFormat::Turtle)
/// )
/// ```
#[inline]
pub fn from_media_type(media_type: &str) -> Option<Self> {
@ -89,7 +101,10 @@ impl GraphFormat {
/// ```
/// use oxigraph::io::GraphFormat;
///
/// assert_eq!(GraphFormat::from_extension("nt"), Some(GraphFormat::NTriples))
/// assert_eq!(
/// GraphFormat::from_extension("nt"),
/// Some(GraphFormat::NTriples)
/// )
/// ```
#[inline]
pub fn from_extension(extension: &str) -> Option<Self> {
@ -102,11 +117,37 @@ impl GraphFormat {
}
}
impl From<GraphFormat> for RdfFormat {
#[inline]
fn from(format: GraphFormat) -> Self {
match format {
GraphFormat::NTriples => Self::NTriples,
GraphFormat::Turtle => Self::Turtle,
GraphFormat::RdfXml => Self::RdfXml,
}
}
}
impl From<GraphFormat> for RdfParser {
#[inline]
fn from(format: GraphFormat) -> Self {
RdfFormat::from(format).into()
}
}
impl From<GraphFormat> for RdfSerializer {
#[inline]
fn from(format: GraphFormat) -> Self {
RdfFormat::from(format).into()
}
}
/// [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) serialization formats.
///
/// This enumeration is non exhaustive. New formats like JSON-LD will be added in the future.
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
#[non_exhaustive]
#[deprecated(note = "use RdfFormat instead", since = "0.4.0")]
pub enum DatasetFormat {
/// [N-Quads](https://www.w3.org/TR/n-quads/)
NQuads,
@ -120,7 +161,10 @@ impl DatasetFormat {
/// ```
/// use oxigraph::io::DatasetFormat;
///
/// assert_eq!(DatasetFormat::NQuads.iri(), "http://www.w3.org/ns/formats/N-Quads")
/// assert_eq!(
/// DatasetFormat::NQuads.iri(),
/// "http://www.w3.org/ns/formats/N-Quads"
/// )
/// ```
#[inline]
pub fn iri(self) -> &'static str {
@ -159,6 +203,7 @@ impl DatasetFormat {
Self::TriG => "trig",
}
}
/// Looks for a known format from a media type.
///
/// It supports some media type aliases.
@ -167,7 +212,10 @@ impl DatasetFormat {
/// ```
/// use oxigraph::io::DatasetFormat;
///
/// assert_eq!(DatasetFormat::from_media_type("application/n-quads; charset=utf-8"), Some(DatasetFormat::NQuads))
/// assert_eq!(
/// DatasetFormat::from_media_type("application/n-quads; charset=utf-8"),
/// Some(DatasetFormat::NQuads)
/// )
/// ```
#[inline]
pub fn from_media_type(media_type: &str) -> Option<Self> {
@ -186,7 +234,10 @@ impl DatasetFormat {
/// ```
/// use oxigraph::io::DatasetFormat;
///
/// assert_eq!(DatasetFormat::from_extension("nq"), Some(DatasetFormat::NQuads))
/// assert_eq!(
/// DatasetFormat::from_extension("nq"),
/// Some(DatasetFormat::NQuads)
/// )
/// ```
#[inline]
pub fn from_extension(extension: &str) -> Option<Self> {
@ -198,12 +249,36 @@ impl DatasetFormat {
}
}
impl From<DatasetFormat> for RdfFormat {
#[inline]
fn from(format: DatasetFormat) -> Self {
match format {
DatasetFormat::NQuads => Self::NQuads,
DatasetFormat::TriG => Self::TriG,
}
}
}
impl From<DatasetFormat> for RdfParser {
#[inline]
fn from(format: DatasetFormat) -> Self {
RdfFormat::from(format).into()
}
}
impl From<DatasetFormat> for RdfSerializer {
#[inline]
fn from(format: DatasetFormat) -> Self {
RdfFormat::from(format).into()
}
}
impl TryFrom<DatasetFormat> for GraphFormat {
type Error = ();
/// Attempts to find a graph format that is a subset of this [`DatasetFormat`].
#[inline]
fn try_from(value: DatasetFormat) -> Result<Self, ()> {
fn try_from(value: DatasetFormat) -> Result<Self, Self::Error> {
match value {
DatasetFormat::NQuads => Ok(Self::NTriples),
DatasetFormat::TriG => Ok(Self::Turtle),
@ -216,7 +291,7 @@ impl TryFrom<GraphFormat> for DatasetFormat {
/// Attempts to find a dataset format that is a superset of this [`GraphFormat`].
#[inline]
fn try_from(value: GraphFormat) -> Result<Self, ()> {
fn try_from(value: GraphFormat) -> Result<Self, Self::Error> {
match value {
GraphFormat::NTriples => Ok(Self::NQuads),
GraphFormat::Turtle => Ok(Self::TriG),

@ -0,0 +1,39 @@
//! Utilities to read and write RDF graphs and datasets using [OxRDF I/O](https://crates.io/crates/oxrdfio).
//!
//! The entry points of this module are the two [`RdfParser`] and [`RdfSerializer`] structs.
//!
//! Usage example converting a Turtle file to a N-Triples file:
//! ```
//! use oxigraph::io::{RdfFormat, RdfParser, RdfSerializer};
//!
//! let turtle_file = b"@base <http://example.com/> .
//! @prefix schema: <http://schema.org/> .
//! <foo> a schema:Person ;
//! schema:name \"Foo\" .
//! <bar> a schema:Person ;
//! schema:name \"Bar\" .";
//!
//! let ntriples_file = b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
//! <http://example.com/foo> <http://schema.org/name> \"Foo\" .
//! <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
//! <http://example.com/bar> <http://schema.org/name> \"Bar\" .
//! ";
//!
//! let mut writer = RdfSerializer::from_format(RdfFormat::NTriples).serialize_to_write(Vec::new());
//! for quad in RdfParser::from_format(RdfFormat::Turtle).parse_read(turtle_file.as_ref()) {
//! writer.write_quad(&quad.unwrap()).unwrap();
//! }
//! assert_eq!(writer.finish().unwrap(), ntriples_file);
//! ```
mod format;
pub mod read;
pub mod write;
#[allow(deprecated)]
pub use self::format::{DatasetFormat, GraphFormat};
#[allow(deprecated)]
pub use self::read::{DatasetParser, GraphParser};
#[allow(deprecated)]
pub use self::write::{DatasetSerializer, GraphSerializer};
pub use oxrdfio::*;

@ -0,0 +1,200 @@
#![allow(deprecated)]
//! Utilities to read RDF graphs and datasets.
use crate::io::{DatasetFormat, GraphFormat};
use crate::model::*;
use oxiri::IriParseError;
use oxrdfio::{FromReadQuadReader, RdfParseError, RdfParser};
use std::io::Read;
/// Parsers for RDF graph serialization formats.
///
/// It currently supports the following formats:
/// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`GraphFormat::NTriples`])
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`GraphFormat::Turtle`])
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`GraphFormat::RdfXml`])
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = GraphParser::from_format(GraphFormat::NTriples);
/// let triples = parser
/// .read_triples(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(triples.len(), 1);
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[deprecated(note = "use RdfParser instead", since = "0.4.0")]
pub struct GraphParser {
inner: RdfParser,
}
impl GraphParser {
/// Builds a parser for the given format.
#[inline]
pub fn from_format(format: GraphFormat) -> Self {
Self {
inner: RdfParser::from_format(format.into())
.without_named_graphs()
.rename_blank_nodes(),
}
}
/// Provides an IRI that could be used to resolve the file relative IRIs.
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphParser};
///
/// let file = "</s> </p> </o> .";
///
/// let parser =
/// GraphParser::from_format(GraphFormat::Turtle).with_base_iri("http://example.com")?;
/// let triples = parser
/// .read_triples(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(triples.len(), 1);
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn with_base_iri(self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
Ok(Self {
inner: self.inner.with_base_iri(base_iri)?,
})
}
/// Executes the parsing itself on a [`Read`] implementation and returns an iterator of triples.
pub fn read_triples<R: Read>(self, reader: R) -> TripleReader<R> {
TripleReader {
parser: self.inner.parse_read(reader),
}
}
}
/// An iterator yielding read triples.
/// Could be built using a [`GraphParser`].
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = GraphParser::from_format(GraphFormat::NTriples);
/// let triples = parser
/// .read_triples(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(triples.len(), 1);
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[must_use]
pub struct TripleReader<R: Read> {
parser: FromReadQuadReader<R>,
}
impl<R: Read> Iterator for TripleReader<R> {
type Item = Result<Triple, RdfParseError>;
fn next(&mut self) -> Option<Self::Item> {
Some(self.parser.next()?.map(Into::into).map_err(Into::into))
}
}
/// A parser for RDF dataset serialization formats.
///
/// It currently supports the following formats:
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`DatasetFormat::NQuads`])
/// * [TriG](https://www.w3.org/TR/trig/) ([`DatasetFormat::TriG`])
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .";
///
/// let parser = DatasetParser::from_format(DatasetFormat::NQuads);
/// let quads = parser.read_quads(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
///
/// assert_eq!(quads.len(), 1);
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[deprecated(note = "use RdfParser instead", since = "0.4.0")]
pub struct DatasetParser {
inner: RdfParser,
}
impl DatasetParser {
/// Builds a parser for the given format.
#[inline]
pub fn from_format(format: DatasetFormat) -> Self {
Self {
inner: RdfParser::from_format(format.into()).rename_blank_nodes(),
}
}
/// Provides an IRI that could be used to resolve the file relative IRIs.
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetParser};
///
/// let file = "<g> { </s> </p> </o> }";
///
/// let parser =
/// DatasetParser::from_format(DatasetFormat::TriG).with_base_iri("http://example.com")?;
/// let triples = parser
/// .read_quads(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(triples.len(), 1);
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn with_base_iri(self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
Ok(Self {
inner: self.inner.with_base_iri(base_iri)?,
})
}
/// Executes the parsing itself on a [`Read`] implementation and returns an iterator of quads.
pub fn read_quads<R: Read>(self, reader: R) -> QuadReader<R> {
QuadReader {
parser: self.inner.parse_read(reader),
}
}
}
/// An iterator yielding read quads.
/// Could be built using a [`DatasetParser`].
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .";
///
/// let parser = DatasetParser::from_format(DatasetFormat::NQuads);
/// let quads = parser.read_quads(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
///
/// assert_eq!(quads.len(), 1);
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[must_use]
pub struct QuadReader<R: Read> {
parser: FromReadQuadReader<R>,
}
impl<R: Read> Iterator for QuadReader<R> {
type Item = Result<Quad, RdfParseError>;
fn next(&mut self) -> Option<Self::Item> {
Some(self.parser.next()?.map_err(Into::into))
}
}

@ -0,0 +1,185 @@
#![allow(deprecated)]
//! Utilities to write RDF graphs and datasets.
use crate::io::{DatasetFormat, GraphFormat};
use crate::model::*;
use oxrdfio::{RdfSerializer, ToWriteQuadWriter};
use std::io::{self, Write};
/// A serializer for RDF graph serialization formats.
///
/// It currently supports the following formats:
/// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`GraphFormat::NTriples`])
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`GraphFormat::Turtle`])
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`GraphFormat::RdfXml`])
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphSerializer};
/// use oxigraph::model::*;
///
/// let mut buffer = Vec::new();
/// let mut writer = GraphSerializer::from_format(GraphFormat::NTriples).triple_writer(&mut buffer);
/// writer.write(&Triple {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// })?;
/// writer.finish()?;
///
/// assert_eq!(
/// buffer.as_slice(),
/// "<http://example.com/s> <http://example.com/p> <http://example.com/o> .\n".as_bytes()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[deprecated(note = "use RdfSerializer instead", since = "0.4.0")]
pub struct GraphSerializer {
inner: RdfSerializer,
}
impl GraphSerializer {
/// Builds a serializer for the given format
#[inline]
pub fn from_format(format: GraphFormat) -> Self {
Self {
inner: RdfSerializer::from_format(format.into()),
}
}
/// Returns a [`TripleWriter`] allowing writing triples into the given [`Write`] implementation
pub fn triple_writer<W: Write>(self, write: W) -> TripleWriter<W> {
TripleWriter {
writer: self.inner.serialize_to_write(write),
}
}
}
/// Allows writing triples.
/// Could be built using a [`GraphSerializer`].
///
/// <div class="warning">
///
/// Do not forget to run the [`finish`](TripleWriter::finish()) method to properly write the last bytes of the file.</div>
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphSerializer};
/// use oxigraph::model::*;
///
/// let mut buffer = Vec::new();
/// let mut writer = GraphSerializer::from_format(GraphFormat::NTriples).triple_writer(&mut buffer);
/// writer.write(&Triple {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// })?;
/// writer.finish()?;
///
/// assert_eq!(
/// buffer.as_slice(),
/// "<http://example.com/s> <http://example.com/p> <http://example.com/o> .\n".as_bytes()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[must_use]
pub struct TripleWriter<W: Write> {
writer: ToWriteQuadWriter<W>,
}
impl<W: Write> TripleWriter<W> {
/// Writes a triple
pub fn write<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> io::Result<()> {
self.writer.write_triple(triple)
}
/// Writes the last bytes of the file
pub fn finish(self) -> io::Result<()> {
self.writer.finish()?.flush()
}
}
/// A serializer for RDF graph serialization formats.
///
/// It currently supports the following formats:
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`DatasetFormat::NQuads`])
/// * [TriG](https://www.w3.org/TR/trig/) ([`DatasetFormat::TriG`])
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetSerializer};
/// use oxigraph::model::*;
///
/// let mut buffer = Vec::new();
/// let mut writer = DatasetSerializer::from_format(DatasetFormat::NQuads).quad_writer(&mut buffer);
/// writer.write(&Quad {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// graph_name: NamedNode::new("http://example.com/g")?.into(),
/// })?;
/// writer.finish()?;
///
/// assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n".as_bytes());
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[deprecated(note = "use RdfSerializer instead", since = "0.4.0")]
pub struct DatasetSerializer {
inner: RdfSerializer,
}
impl DatasetSerializer {
/// Builds a serializer for the given format
#[inline]
pub fn from_format(format: DatasetFormat) -> Self {
Self {
inner: RdfSerializer::from_format(format.into()),
}
}
/// Returns a [`QuadWriter`] allowing writing triples into the given [`Write`] implementation
pub fn quad_writer<W: Write>(self, write: W) -> QuadWriter<W> {
QuadWriter {
writer: self.inner.serialize_to_write(write),
}
}
}
/// Allows writing triples.
/// Could be built using a [`DatasetSerializer`].
///
/// <div class="warning">
///
/// Do not forget to run the [`finish`](QuadWriter::finish()) method to properly write the last bytes of the file.</div>
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetSerializer};
/// use oxigraph::model::*;
///
/// let mut buffer = Vec::new();
/// let mut writer = DatasetSerializer::from_format(DatasetFormat::NQuads).quad_writer(&mut buffer);
/// writer.write(&Quad {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// graph_name: NamedNode::new("http://example.com/g")?.into(),
/// })?;
/// writer.finish()?;
///
/// assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n".as_bytes());
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[must_use]
pub struct QuadWriter<W: Write> {
writer: ToWriteQuadWriter<W>,
}
impl<W: Write> QuadWriter<W> {
/// Writes a quad
pub fn write<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> io::Result<()> {
self.writer.write_quad(quad)
}
/// Writes the last bytes of the file
pub fn finish(self) -> io::Result<()> {
self.writer.finish()?.flush()
}
}

@ -0,0 +1,12 @@
#![doc = include_str!("../README.md")]
#![doc(test(attr(deny(warnings))))]
#![doc(test(attr(allow(deprecated))))]
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
#![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")]
#![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")]
pub mod io;
pub mod model;
pub mod sparql;
mod storage;
pub mod store;

@ -0,0 +1,20 @@
//! Implements data structures for [RDF 1.1 Concepts](https://www.w3.org/TR/rdf11-concepts/) using [OxRDF](https://crates.io/crates/oxrdf).
//!
//! Usage example:
//!
//! ```
//! use oxigraph::model::*;
//!
//! let mut graph = Graph::default();
//!
//! // insertion
//! let ex = NamedNodeRef::new("http://example.com").unwrap();
//! let triple = TripleRef::new(ex, ex, ex);
//! graph.insert(triple);
//!
//! // simple filter
//! let results: Vec<_> = graph.triples_for_subject(ex).collect();
//! assert_eq!(vec![triple], results);
//! ```
pub use oxrdf::*;

@ -1,15 +1,13 @@
//! [SPARQL 1.1 Query Algebra](https://www.w3.org/TR/sparql11-query/#sparqlQuery)
//!
//! The root type for SPARQL queries is [`Query`] and the root type for updates is [`Update`].
//!
//! Warning: this implementation is an unstable work in progress
use crate::model::*;
use crate::sparql::eval::Timer;
use oxsdatatypes::DayTimeDuration;
use spargebra::GraphUpdateOperation;
use std::fmt;
use std::str::FromStr;
use std::time::Duration;
/// A parsed [SPARQL query](https://www.w3.org/TR/sparql11-query/).
///
@ -25,25 +23,31 @@ use std::time::Duration;
/// // We edit the query dataset specification
/// let default = vec![NamedNode::new("http://example.com")?.into()];
/// query.dataset_mut().set_default_graph(default.clone());
/// assert_eq!(query.dataset().default_graph_graphs(), Some(default.as_slice()));
/// assert_eq!(
/// query.dataset().default_graph_graphs(),
/// Some(default.as_slice())
/// );
/// # Ok::<_, Box<dyn std::error::Error>>(())
/// ```
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct Query {
pub(super) inner: spargebra::Query,
pub(super) dataset: QueryDataset,
pub(super) parsing_duration: Option<Duration>,
pub(super) parsing_duration: Option<DayTimeDuration>,
}
impl Query {
/// Parses a SPARQL query with an optional base IRI to resolve relative IRIs in the query.
pub fn parse(query: &str, base_iri: Option<&str>) -> Result<Self, spargebra::ParseError> {
pub fn parse(
query: &str,
base_iri: Option<&str>,
) -> Result<Self, spargebra::SparqlSyntaxError> {
let start = Timer::now();
let query = Self::from(spargebra::Query::parse(query, base_iri)?);
Ok(Self {
dataset: query.dataset,
inner: query.inner,
parsing_duration: Some(start.elapsed()),
parsing_duration: start.elapsed(),
})
}
@ -65,25 +69,25 @@ impl fmt::Display for Query {
}
impl FromStr for Query {
type Err = spargebra::ParseError;
type Err = spargebra::SparqlSyntaxError;
fn from_str(query: &str) -> Result<Self, spargebra::ParseError> {
fn from_str(query: &str) -> Result<Self, Self::Err> {
Self::parse(query, None)
}
}
impl<'a> TryFrom<&'a str> for Query {
type Error = spargebra::ParseError;
impl TryFrom<&str> for Query {
type Error = spargebra::SparqlSyntaxError;
fn try_from(query: &str) -> Result<Self, spargebra::ParseError> {
fn try_from(query: &str) -> Result<Self, Self::Error> {
Self::from_str(query)
}
}
impl<'a> TryFrom<&'a String> for Query {
type Error = spargebra::ParseError;
impl TryFrom<&String> for Query {
type Error = spargebra::SparqlSyntaxError;
fn try_from(query: &String) -> Result<Self, spargebra::ParseError> {
fn try_from(query: &String) -> Result<Self, Self::Error> {
Self::from_str(query)
}
}
@ -112,7 +116,7 @@ impl From<spargebra::Query> for Query {
/// let update = Update::parse(update_str, None)?;
///
/// assert_eq!(update.to_string().trim(), update_str);
/// # Ok::<_, oxigraph::sparql::ParseError>(())
/// # Ok::<_, oxigraph::sparql::SparqlSyntaxError>(())
/// ```
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct Update {
@ -122,22 +126,11 @@ pub struct Update {
impl Update {
/// Parses a SPARQL update with an optional base IRI to resolve relative IRIs in the query.
pub fn parse(update: &str, base_iri: Option<&str>) -> Result<Self, spargebra::ParseError> {
let update = spargebra::Update::parse(update, base_iri)?;
Ok(Self {
using_datasets: update
.operations
.iter()
.map(|operation| {
if let GraphUpdateOperation::DeleteInsert { using, .. } = operation {
Some(QueryDataset::from_algebra(using))
} else {
None
}
})
.collect(),
inner: update,
})
pub fn parse(
update: &str,
base_iri: Option<&str>,
) -> Result<Self, spargebra::SparqlSyntaxError> {
Ok(spargebra::Update::parse(update, base_iri)?.into())
}
/// Returns [the query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset) in [DELETE/INSERT operations](https://www.w3.org/TR/sparql11-update/#deleteInsert).
@ -158,29 +151,48 @@ impl fmt::Display for Update {
}
impl FromStr for Update {
type Err = spargebra::ParseError;
type Err = spargebra::SparqlSyntaxError;
fn from_str(update: &str) -> Result<Self, spargebra::ParseError> {
fn from_str(update: &str) -> Result<Self, Self::Err> {
Self::parse(update, None)
}
}
impl<'a> TryFrom<&'a str> for Update {
type Error = spargebra::ParseError;
impl TryFrom<&str> for Update {
type Error = spargebra::SparqlSyntaxError;
fn try_from(update: &str) -> Result<Self, spargebra::ParseError> {
fn try_from(update: &str) -> Result<Self, Self::Error> {
Self::from_str(update)
}
}
impl<'a> TryFrom<&'a String> for Update {
type Error = spargebra::ParseError;
impl TryFrom<&String> for Update {
type Error = spargebra::SparqlSyntaxError;
fn try_from(update: &String) -> Result<Self, spargebra::ParseError> {
fn try_from(update: &String) -> Result<Self, Self::Error> {
Self::from_str(update)
}
}
impl From<spargebra::Update> for Update {
fn from(update: spargebra::Update) -> Self {
Self {
using_datasets: update
.operations
.iter()
.map(|operation| {
if let GraphUpdateOperation::DeleteInsert { using, .. } = operation {
Some(QueryDataset::from_algebra(using))
} else {
None
}
})
.collect(),
inner: update,
}
}
}
/// A SPARQL query [dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset)
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct QueryDataset {
@ -219,8 +231,15 @@ impl QueryDataset {
/// ```
/// use oxigraph::sparql::Query;
///
/// assert!(Query::parse("SELECT ?s ?p ?o WHERE { ?s ?p ?o . }", None)?.dataset().is_default_dataset());
/// assert!(!Query::parse("SELECT ?s ?p ?o FROM <http://example.com> WHERE { ?s ?p ?o . }", None)?.dataset().is_default_dataset());
/// assert!(Query::parse("SELECT ?s ?p ?o WHERE { ?s ?p ?o . }", None)?
/// .dataset()
/// .is_default_dataset());
/// assert!(!Query::parse(
/// "SELECT ?s ?p ?o FROM <http://example.com> WHERE { ?s ?p ?o . }",
/// None
/// )?
/// .dataset()
/// .is_default_dataset());
///
/// # Ok::<_, Box<dyn std::error::Error>>(())
/// ```
@ -252,7 +271,10 @@ impl QueryDataset {
/// let mut query = Query::parse("SELECT ?s ?p ?o WHERE { ?s ?p ?o . }", None)?;
/// let default = vec![NamedNode::new("http://example.com")?.into()];
/// query.dataset_mut().set_default_graph(default.clone());
/// assert_eq!(query.dataset().default_graph_graphs(), Some(default.as_slice()));
/// assert_eq!(
/// query.dataset().default_graph_graphs(),
/// Some(default.as_slice())
/// );
///
/// # Ok::<_, Box<dyn std::error::Error>>(())
/// ```
@ -273,8 +295,13 @@ impl QueryDataset {
///
/// let mut query = Query::parse("SELECT ?s ?p ?o WHERE { ?s ?p ?o . }", None)?;
/// let named = vec![NamedNode::new("http://example.com")?.into()];
/// query.dataset_mut().set_available_named_graphs(named.clone());
/// assert_eq!(query.dataset().available_named_graphs(), Some(named.as_slice()));
/// query
/// .dataset_mut()
/// .set_available_named_graphs(named.clone());
/// assert_eq!(
/// query.dataset().available_named_graphs(),
/// Some(named.as_slice())
/// );
///
/// # Ok::<_, Box<dyn std::error::Error>>(())
/// ```

@ -179,10 +179,6 @@ impl StrLookup for DatasetView {
self.reader.get_str(key)?
})
}
fn contains_str(&self, key: &StrHash) -> Result<bool, StorageError> {
Ok(self.extra.borrow().contains_key(key) || self.reader.contains_str(key)?)
}
}
struct EncodedDatasetSpec {

@ -0,0 +1,84 @@
use crate::io::RdfParseError;
use crate::model::NamedNode;
use crate::sparql::results::QueryResultsParseError as ResultsParseError;
use crate::sparql::SparqlSyntaxError;
use crate::storage::StorageError;
use std::convert::Infallible;
use std::error::Error;
use std::io;
/// A SPARQL evaluation error.
#[derive(Debug, thiserror::Error)]
#[non_exhaustive]
pub enum EvaluationError {
/// An error in SPARQL parsing.
#[error(transparent)]
Parsing(#[from] SparqlSyntaxError),
/// An error from the storage.
#[error(transparent)]
Storage(#[from] StorageError),
/// An error while parsing an external RDF file.
#[error(transparent)]
GraphParsing(#[from] RdfParseError),
/// An error while parsing an external result file (likely from a federated query).
#[error(transparent)]
ResultsParsing(#[from] ResultsParseError),
/// An error returned during results serialization.
#[error(transparent)]
ResultsSerialization(#[from] io::Error),
/// Error during `SERVICE` evaluation
#[error("{0}")]
Service(#[source] Box<dyn Error + Send + Sync + 'static>),
/// Error when `CREATE` tries to create an already existing graph
#[error("The graph {0} already exists")]
GraphAlreadyExists(NamedNode),
/// Error when `DROP` or `CLEAR` tries to remove a not existing graph
#[error("The graph {0} does not exist")]
GraphDoesNotExist(NamedNode),
/// The variable storing the `SERVICE` name is unbound
#[error("The variable encoding the service name is unbound")]
UnboundService,
/// The given `SERVICE` is not supported
#[error("The service {0} is not supported")]
UnsupportedService(NamedNode),
/// The given content media type returned from an HTTP response is not supported (`SERVICE` and `LOAD`)
#[error("The content media type {0} is not supported")]
UnsupportedContentType(String),
/// The `SERVICE` call has not returns solutions
#[error("The service is not returning solutions but a boolean or a graph")]
ServiceDoesNotReturnSolutions,
/// The results are not a RDF graph
#[error("The query results are not a RDF graph")]
NotAGraph,
}
impl From<Infallible> for EvaluationError {
#[inline]
fn from(error: Infallible) -> Self {
match error {}
}
}
impl From<EvaluationError> for io::Error {
#[inline]
fn from(error: EvaluationError) -> Self {
match error {
EvaluationError::Parsing(error) => Self::new(io::ErrorKind::InvalidData, error),
EvaluationError::GraphParsing(error) => error.into(),
EvaluationError::ResultsParsing(error) => error.into(),
EvaluationError::ResultsSerialization(error) => error,
EvaluationError::Storage(error) => error.into(),
EvaluationError::Service(error) => match error.downcast() {
Ok(error) => *error,
Err(error) => Self::new(io::ErrorKind::Other, error),
},
EvaluationError::GraphAlreadyExists(_)
| EvaluationError::GraphDoesNotExist(_)
| EvaluationError::UnboundService
| EvaluationError::UnsupportedService(_)
| EvaluationError::UnsupportedContentType(_)
| EvaluationError::ServiceDoesNotReturnSolutions
| EvaluationError::NotAGraph => Self::new(io::ErrorKind::InvalidInput, error),
}
}
}

File diff suppressed because it is too large Load Diff

@ -3,18 +3,18 @@
use std::io::{Empty, Error, ErrorKind, Result};
use std::time::Duration;
pub struct Client {}
pub struct Client;
impl Client {
pub fn new(_timeout: Option<Duration>, _redirection_limit: usize) -> Self {
Self {}
Self
}
#[allow(clippy::unused_self)]
pub fn get(&self, _url: &str, _accept: &str) -> Result<(String, Empty)> {
pub fn get(&self, _url: &str, _accept: &'static str) -> Result<(String, Empty)> {
Err(Error::new(
ErrorKind::Unsupported,
"HTTP client is not available. Enable the feature 'http_client'",
"HTTP client is not available. Enable the feature 'http-client'",
))
}
@ -23,12 +23,12 @@ impl Client {
&self,
_url: &str,
_payload: Vec<u8>,
_content_type: &str,
_accept: &str,
_content_type: &'static str,
_accept: &'static str,
) -> Result<(String, Empty)> {
Err(Error::new(
ErrorKind::Unsupported,
"HTTP client is not available. Enable the feature 'http_client'",
"HTTP client is not available. Enable the feature 'http-client'",
))
}
}

@ -0,0 +1,9 @@
#[cfg(not(feature = "http-client"))]
mod dummy;
#[cfg(feature = "http-client")]
mod simple;
#[cfg(not(feature = "http-client"))]
pub use dummy::Client;
#[cfg(feature = "http-client")]
pub use simple::Client;

@ -8,18 +8,17 @@ pub struct Client {
impl Client {
pub fn new(timeout: Option<Duration>, redirection_limit: usize) -> Self {
let mut client = oxhttp::Client::new();
let mut client = oxhttp::Client::new()
.with_redirection_limit(redirection_limit)
.with_user_agent(concat!("Oxigraph/", env!("CARGO_PKG_VERSION")))
.unwrap();
if let Some(timeout) = timeout {
client.set_global_timeout(timeout);
client = client.with_global_timeout(timeout);
}
client.set_redirection_limit(redirection_limit);
client
.set_user_agent(concat!("Oxigraph/", env!("CARGO_PKG_VERSION")))
.unwrap();
Self { client }
}
pub fn get(&self, url: &str, accept: &str) -> Result<(String, Body)> {
pub fn get(&self, url: &str, accept: &'static str) -> Result<(String, Body)> {
let request = Request::builder(Method::GET, url.parse().map_err(invalid_input_error)?)
.with_header(HeaderName::ACCEPT, accept)
.map_err(invalid_input_error)?
@ -50,8 +49,8 @@ impl Client {
&self,
url: &str,
payload: Vec<u8>,
content_type: &str,
accept: &str,
content_type: &'static str,
accept: &'static str,
) -> Result<(String, Body)> {
let request = Request::builder(Method::POST, url.parse().map_err(invalid_input_error)?)
.with_header(HeaderName::ACCEPT, accept)

@ -8,29 +8,29 @@ mod error;
mod eval;
mod http;
mod model;
mod plan;
mod plan_builder;
pub mod results;
mod service;
mod update;
use crate::model::{NamedNode, Term};
pub use crate::sparql::algebra::{Query, QueryDataset, Update};
use crate::sparql::dataset::DatasetView;
pub use crate::sparql::error::{EvaluationError, QueryError};
use crate::sparql::eval::{SimpleEvaluator, Timer};
pub use crate::sparql::error::EvaluationError;
use crate::sparql::eval::{EvalNodeWithStats, SimpleEvaluator, Timer};
pub use crate::sparql::model::{QueryResults, QuerySolution, QuerySolutionIter, QueryTripleIter};
use crate::sparql::plan::PlanNodeWithStats;
use crate::sparql::plan_builder::PlanBuilder;
pub use crate::sparql::service::ServiceHandler;
use crate::sparql::service::{EmptyServiceHandler, ErrorConversionServiceHandler};
pub(crate) use crate::sparql::update::evaluate_update;
use crate::storage::StorageReader;
use json_event_parser::{JsonEvent, JsonWriter};
use json_event_parser::{JsonEvent, ToWriteJsonWriter};
pub use oxrdf::{Variable, VariableNameParseError};
pub use sparesults::QueryResultsFormat;
pub use spargebra::ParseError;
use oxsdatatypes::{DayTimeDuration, Float};
pub use spargebra::SparqlSyntaxError;
use sparopt::algebra::GraphPattern;
use sparopt::Optimizer;
use std::collections::HashMap;
use std::rc::Rc;
use std::sync::Arc;
use std::time::Duration;
use std::{fmt, io};
@ -48,43 +48,39 @@ pub(crate) fn evaluate_query(
spargebra::Query::Select {
pattern, base_iri, ..
} => {
let (plan, variables) = PlanBuilder::build(
&dataset,
&pattern,
true,
&options.custom_functions,
options.without_optimizations,
)?;
let mut pattern = GraphPattern::from(&pattern);
if !options.without_optimizations {
pattern = Optimizer::optimize_graph_pattern(pattern);
}
let planning_duration = start_planning.elapsed();
let (results, explanation) = SimpleEvaluator::new(
Rc::new(dataset),
base_iri.map(Rc::new),
options.service_handler(),
Rc::new(options.custom_functions),
Arc::new(options.custom_functions),
run_stats,
)
.evaluate_select_plan(Rc::new(plan), Rc::new(variables));
.evaluate_select(&pattern);
(Ok(results), explanation, planning_duration)
}
spargebra::Query::Ask {
pattern, base_iri, ..
} => {
let (plan, _) = PlanBuilder::build(
&dataset,
&pattern,
false,
&options.custom_functions,
options.without_optimizations,
)?;
let mut pattern = GraphPattern::from(&pattern);
if !options.without_optimizations {
pattern = Optimizer::optimize_graph_pattern(GraphPattern::Reduced {
inner: Box::new(pattern),
});
}
let planning_duration = start_planning.elapsed();
let (results, explanation) = SimpleEvaluator::new(
Rc::new(dataset),
base_iri.map(Rc::new),
options.service_handler(),
Rc::new(options.custom_functions),
Arc::new(options.custom_functions),
run_stats,
)
.evaluate_ask_plan(Rc::new(plan));
.evaluate_ask(&pattern);
(results, explanation, planning_duration)
}
spargebra::Query::Construct {
@ -93,50 +89,41 @@ pub(crate) fn evaluate_query(
base_iri,
..
} => {
let (plan, variables) = PlanBuilder::build(
&dataset,
&pattern,
false,
&options.custom_functions,
options.without_optimizations,
)?;
let construct = PlanBuilder::build_graph_template(
&dataset,
&template,
variables,
&options.custom_functions,
options.without_optimizations,
);
let mut pattern = GraphPattern::from(&pattern);
if !options.without_optimizations {
pattern = Optimizer::optimize_graph_pattern(GraphPattern::Reduced {
inner: Box::new(pattern),
});
}
let planning_duration = start_planning.elapsed();
let (results, explanation) = SimpleEvaluator::new(
Rc::new(dataset),
base_iri.map(Rc::new),
options.service_handler(),
Rc::new(options.custom_functions),
Arc::new(options.custom_functions),
run_stats,
)
.evaluate_construct_plan(Rc::new(plan), construct);
.evaluate_construct(&pattern, &template);
(Ok(results), explanation, planning_duration)
}
spargebra::Query::Describe {
pattern, base_iri, ..
} => {
let (plan, _) = PlanBuilder::build(
&dataset,
&pattern,
false,
&options.custom_functions,
options.without_optimizations,
)?;
let mut pattern = GraphPattern::from(&pattern);
if !options.without_optimizations {
pattern = Optimizer::optimize_graph_pattern(GraphPattern::Reduced {
inner: Box::new(pattern),
});
}
let planning_duration = start_planning.elapsed();
let (results, explanation) = SimpleEvaluator::new(
Rc::new(dataset),
base_iri.map(Rc::new),
options.service_handler(),
Rc::new(options.custom_functions),
Arc::new(options.custom_functions),
run_stats,
)
.evaluate_describe_plan(Rc::new(plan));
.evaluate_describe(&pattern);
(Ok(results), explanation, planning_duration)
}
};
@ -152,36 +139,39 @@ pub(crate) fn evaluate_query(
/// Options for SPARQL query evaluation.
///
///
/// If the `"http_client"` optional feature is enabled,
/// If the `"http-client"` optional feature is enabled,
/// a simple HTTP 1.1 client is used to execute [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/) SERVICE calls.
///
/// Usage example disabling the federated query support:
/// ```
/// use oxigraph::store::Store;
/// use oxigraph::sparql::QueryOptions;
/// use oxigraph::store::Store;
///
/// let store = Store::new()?;
/// store.query_opt(
/// "SELECT * WHERE { SERVICE <https://query.wikidata.org/sparql> {} }",
/// QueryOptions::default().without_service_handler()
/// QueryOptions::default().without_service_handler(),
/// )?;
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[derive(Clone, Default)]
pub struct QueryOptions {
service_handler: Option<Rc<dyn ServiceHandler<Error = EvaluationError>>>,
custom_functions: HashMap<NamedNode, Rc<dyn Fn(&[Term]) -> Option<Term>>>,
service_handler: Option<Arc<dyn ServiceHandler<Error = EvaluationError>>>,
custom_functions: CustomFunctionRegistry,
http_timeout: Option<Duration>,
http_redirection_limit: usize,
without_optimizations: bool,
}
pub(crate) type CustomFunctionRegistry =
HashMap<NamedNode, Arc<dyn (Fn(&[Term]) -> Option<Term>) + Send + Sync>>;
impl QueryOptions {
/// Use a given [`ServiceHandler`] to execute [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/) SERVICE calls.
#[inline]
#[must_use]
pub fn with_service_handler(mut self, service_handler: impl ServiceHandler + 'static) -> Self {
self.service_handler = Some(Rc::new(ErrorConversionServiceHandler::wrap(
self.service_handler = Some(Arc::new(ErrorConversionServiceHandler::wrap(
service_handler,
)));
self
@ -191,12 +181,12 @@ impl QueryOptions {
#[inline]
#[must_use]
pub fn without_service_handler(mut self) -> Self {
self.service_handler = Some(Rc::new(EmptyServiceHandler));
self.service_handler = Some(Arc::new(EmptyServiceHandler));
self
}
/// Sets a timeout for HTTP requests done during SPARQL evaluation.
#[cfg(feature = "http_client")]
#[cfg(feature = "http-client")]
#[inline]
#[must_use]
pub fn with_http_timeout(mut self, timeout: Duration) -> Self {
@ -207,7 +197,7 @@ impl QueryOptions {
/// Sets an upper bound of the number of HTTP redirection followed per HTTP request done during SPARQL evaluation.
///
/// By default this value is `0`.
#[cfg(feature = "http_client")]
#[cfg(feature = "http-client")]
#[inline]
#[must_use]
pub fn with_http_redirection_limit(mut self, redirection_limit: usize) -> Self {
@ -219,9 +209,9 @@ impl QueryOptions {
///
/// Example with a function serializing terms to N-Triples:
/// ```
/// use oxigraph::store::Store;
/// use oxigraph::model::*;
/// use oxigraph::sparql::{QueryOptions, QueryResults};
/// use oxigraph::store::Store;
///
/// let store = Store::new()?;
///
@ -229,10 +219,13 @@ impl QueryOptions {
/// "SELECT (<http://www.w3.org/ns/formats/N-Triples>(1) AS ?nt) WHERE {}",
/// QueryOptions::default().with_custom_function(
/// NamedNode::new("http://www.w3.org/ns/formats/N-Triples")?,
/// |args| args.get(0).map(|t| Literal::from(t.to_string()).into())
/// )
/// |args| args.get(0).map(|t| Literal::from(t.to_string()).into()),
/// ),
/// )? {
/// assert_eq!(solutions.next().unwrap()?.get("nt"), Some(&Literal::from("\"1\"^^<http://www.w3.org/2001/XMLSchema#integer>").into()));
/// assert_eq!(
/// solutions.next().unwrap()?.get("nt"),
/// Some(&Literal::from("\"1\"^^<http://www.w3.org/2001/XMLSchema#integer>").into())
/// );
/// }
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
@ -241,21 +234,21 @@ impl QueryOptions {
pub fn with_custom_function(
mut self,
name: NamedNode,
evaluator: impl Fn(&[Term]) -> Option<Term> + 'static,
evaluator: impl Fn(&[Term]) -> Option<Term> + Send + Sync + 'static,
) -> Self {
self.custom_functions.insert(name, Rc::new(evaluator));
self.custom_functions.insert(name, Arc::new(evaluator));
self
}
fn service_handler(&self) -> Rc<dyn ServiceHandler<Error = EvaluationError>> {
fn service_handler(&self) -> Arc<dyn ServiceHandler<Error = EvaluationError>> {
self.service_handler.clone().unwrap_or_else(|| {
if cfg!(feature = "http_client") {
Rc::new(service::SimpleServiceHandler::new(
if cfg!(feature = "http-client") {
Arc::new(service::SimpleServiceHandler::new(
self.http_timeout,
self.http_redirection_limit,
))
} else {
Rc::new(EmptyServiceHandler)
Arc::new(EmptyServiceHandler)
}
})
}
@ -285,28 +278,30 @@ impl From<QueryOptions> for UpdateOptions {
/// The explanation of a query.
#[derive(Clone)]
pub struct QueryExplanation {
inner: Rc<PlanNodeWithStats>,
inner: Rc<EvalNodeWithStats>,
with_stats: bool,
parsing_duration: Option<Duration>,
planning_duration: Duration,
parsing_duration: Option<DayTimeDuration>,
planning_duration: Option<DayTimeDuration>,
}
impl QueryExplanation {
/// Writes the explanation as JSON.
pub fn write_in_json(&self, output: impl io::Write) -> io::Result<()> {
let mut writer = JsonWriter::from_writer(output);
pub fn write_in_json(&self, write: impl io::Write) -> io::Result<()> {
let mut writer = ToWriteJsonWriter::new(write);
writer.write_event(JsonEvent::StartObject)?;
if let Some(parsing_duration) = self.parsing_duration {
writer.write_event(JsonEvent::ObjectKey("parsing duration in seconds"))?;
writer.write_event(JsonEvent::ObjectKey("parsing duration in seconds".into()))?;
writer.write_event(JsonEvent::Number(
&parsing_duration.as_secs_f32().to_string(),
parsing_duration.as_seconds().to_string().into(),
))?;
}
writer.write_event(JsonEvent::ObjectKey("planning duration in seconds"))?;
if let Some(planning_duration) = self.planning_duration {
writer.write_event(JsonEvent::ObjectKey("planning duration in seconds".into()))?;
writer.write_event(JsonEvent::Number(
&self.planning_duration.as_secs_f32().to_string(),
planning_duration.as_seconds().to_string().into(),
))?;
writer.write_event(JsonEvent::ObjectKey("plan"))?;
}
writer.write_event(JsonEvent::ObjectKey("plan".into()))?;
self.inner.json_node(&mut writer, self.with_stats)?;
writer.write_event(JsonEvent::EndObject)
}
@ -314,6 +309,20 @@ impl QueryExplanation {
impl fmt::Debug for QueryExplanation {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{:?}", self.inner)
let mut obj = f.debug_struct("QueryExplanation");
if let Some(parsing_duration) = self.parsing_duration {
obj.field(
"parsing duration in seconds",
&f32::from(Float::from(parsing_duration.as_seconds())),
);
}
if let Some(planning_duration) = self.planning_duration {
obj.field(
"planning duration in seconds",
&f32::from(Float::from(planning_duration.as_seconds())),
);
}
obj.field("tree", &self.inner);
obj.finish_non_exhaustive()
}
}

@ -0,0 +1,373 @@
use crate::io::{RdfFormat, RdfSerializer};
use crate::model::*;
use crate::sparql::error::EvaluationError;
use crate::sparql::results::{
FromReadQueryResultsReader, FromReadSolutionsReader, QueryResultsFormat,
QueryResultsParseError, QueryResultsParser, QueryResultsSerializer,
};
use oxrdf::{Variable, VariableRef};
pub use sparesults::QuerySolution;
use std::io::{Read, Write};
use std::sync::Arc;
/// Results of a [SPARQL query](https://www.w3.org/TR/sparql11-query/).
pub enum QueryResults {
/// Results of a [SELECT](https://www.w3.org/TR/sparql11-query/#select) query.
Solutions(QuerySolutionIter),
/// Result of a [ASK](https://www.w3.org/TR/sparql11-query/#ask) query.
Boolean(bool),
/// Results of a [CONSTRUCT](https://www.w3.org/TR/sparql11-query/#construct) or [DESCRIBE](https://www.w3.org/TR/sparql11-query/#describe) query.
Graph(QueryTripleIter),
}
impl QueryResults {
/// Reads a SPARQL query results serialization.
pub fn read(
read: impl Read + 'static,
format: QueryResultsFormat,
) -> Result<Self, QueryResultsParseError> {
Ok(QueryResultsParser::from_format(format)
.parse_read(read)?
.into())
}
/// Writes the query results (solutions or boolean).
///
/// This method fails if it is called on the `Graph` results.
///
/// ```
/// use oxigraph::store::Store;
/// use oxigraph::model::*;
/// use oxigraph::sparql::results::QueryResultsFormat;
///
/// let store = Store::new()?;
/// let ex = NamedNodeRef::new("http://example.com")?;
/// store.insert(QuadRef::new(ex, ex, ex, GraphNameRef::DefaultGraph))?;
///
/// let results = store.query("SELECT ?s WHERE { ?s ?p ?o }")?;
/// assert_eq!(
/// results.write(Vec::new(), QueryResultsFormat::Json)?,
/// r#"{"head":{"vars":["s"]},"results":{"bindings":[{"s":{"type":"uri","value":"http://example.com"}}]}}"#.as_bytes()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn write<W: Write>(
self,
write: W,
format: QueryResultsFormat,
) -> Result<W, EvaluationError> {
let serializer = QueryResultsSerializer::from_format(format);
match self {
Self::Boolean(value) => serializer.serialize_boolean_to_write(write, value),
Self::Solutions(solutions) => {
let mut writer = serializer
.serialize_solutions_to_write(write, solutions.variables().to_vec())
.map_err(EvaluationError::ResultsSerialization)?;
for solution in solutions {
writer
.write(&solution?)
.map_err(EvaluationError::ResultsSerialization)?;
}
writer.finish()
}
Self::Graph(triples) => {
let s = VariableRef::new_unchecked("subject");
let p = VariableRef::new_unchecked("predicate");
let o = VariableRef::new_unchecked("object");
let mut writer = serializer
.serialize_solutions_to_write(
write,
vec![s.into_owned(), p.into_owned(), o.into_owned()],
)
.map_err(EvaluationError::ResultsSerialization)?;
for triple in triples {
let triple = triple?;
writer
.write([
(s, &triple.subject.into()),
(p, &triple.predicate.into()),
(o, &triple.object),
])
.map_err(EvaluationError::ResultsSerialization)?;
}
writer.finish()
}
}
.map_err(EvaluationError::ResultsSerialization)
}
/// Writes the graph query results.
///
/// This method fails if it is called on the `Solution` or `Boolean` results.
///
/// ```
/// use oxigraph::io::RdfFormat;
/// use oxigraph::model::*;
/// use oxigraph::store::Store;
///
/// let graph = "<http://example.com> <http://example.com> <http://example.com> .\n";
///
/// let store = Store::new()?;
/// store.load_graph(
/// graph.as_bytes(),
/// RdfFormat::NTriples,
/// GraphName::DefaultGraph,
/// None,
/// )?;
///
/// let results = store.query("CONSTRUCT WHERE { ?s ?p ?o }")?;
/// assert_eq!(
/// results.write_graph(Vec::new(), RdfFormat::NTriples)?,
/// graph.as_bytes()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn write_graph<W: Write>(
self,
write: W,
format: impl Into<RdfFormat>,
) -> Result<W, EvaluationError> {
if let Self::Graph(triples) = self {
let mut writer = RdfSerializer::from_format(format.into()).serialize_to_write(write);
for triple in triples {
writer
.write_triple(&triple?)
.map_err(EvaluationError::ResultsSerialization)?;
}
writer
.finish()
.map_err(EvaluationError::ResultsSerialization)
} else {
Err(EvaluationError::NotAGraph)
}
}
}
impl From<QuerySolutionIter> for QueryResults {
#[inline]
fn from(value: QuerySolutionIter) -> Self {
Self::Solutions(value)
}
}
impl<R: Read + 'static> From<FromReadQueryResultsReader<R>> for QueryResults {
fn from(reader: FromReadQueryResultsReader<R>) -> Self {
match reader {
FromReadQueryResultsReader::Solutions(s) => Self::Solutions(s.into()),
FromReadQueryResultsReader::Boolean(v) => Self::Boolean(v),
}
}
}
/// An iterator over [`QuerySolution`]s.
///
/// ```
/// use oxigraph::sparql::QueryResults;
/// use oxigraph::store::Store;
///
/// let store = Store::new()?;
/// if let QueryResults::Solutions(solutions) = store.query("SELECT ?s WHERE { ?s ?p ?o }")? {
/// for solution in solutions {
/// println!("{:?}", solution?.get("s"));
/// }
/// }
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct QuerySolutionIter {
variables: Arc<[Variable]>,
iter: Box<dyn Iterator<Item = Result<QuerySolution, EvaluationError>>>,
}
impl QuerySolutionIter {
/// Construct a new iterator of solution from an ordered list of solution variables and an iterator of solution tuples
/// (each tuple using the same ordering as the variable list such that tuple element 0 is the value for the variable 0...)
pub fn new(
variables: Arc<[Variable]>,
iter: impl Iterator<Item = Result<Vec<Option<Term>>, EvaluationError>> + 'static,
) -> Self {
Self {
variables: Arc::clone(&variables),
iter: Box::new(
iter.map(move |t| t.map(|values| (Arc::clone(&variables), values).into())),
),
}
}
/// The variables used in the solutions.
///
/// ```
/// use oxigraph::sparql::{QueryResults, Variable};
/// use oxigraph::store::Store;
///
/// let store = Store::new()?;
/// if let QueryResults::Solutions(solutions) = store.query("SELECT ?s ?o WHERE { ?s ?p ?o }")? {
/// assert_eq!(
/// solutions.variables(),
/// &[Variable::new("s")?, Variable::new("o")?]
/// );
/// }
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn variables(&self) -> &[Variable] {
&self.variables
}
}
impl<R: Read + 'static> From<FromReadSolutionsReader<R>> for QuerySolutionIter {
fn from(reader: FromReadSolutionsReader<R>) -> Self {
Self {
variables: reader.variables().into(),
iter: Box::new(reader.map(|t| t.map_err(EvaluationError::from))),
}
}
}
impl Iterator for QuerySolutionIter {
type Item = Result<QuerySolution, EvaluationError>;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.iter.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
/// An iterator over the triples that compose a graph solution.
///
/// ```
/// use oxigraph::sparql::QueryResults;
/// use oxigraph::store::Store;
///
/// let store = Store::new()?;
/// if let QueryResults::Graph(triples) = store.query("CONSTRUCT WHERE { ?s ?p ?o }")? {
/// for triple in triples {
/// println!("{}", triple?);
/// }
/// }
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct QueryTripleIter {
pub(crate) iter: Box<dyn Iterator<Item = Result<Triple, EvaluationError>>>,
}
impl Iterator for QueryTripleIter {
type Item = Result<Triple, EvaluationError>;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.iter.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
#[inline]
fn fold<Acc, G>(self, init: Acc, g: G) -> Acc
where
G: FnMut(Acc, Self::Item) -> Acc,
{
self.iter.fold(init, g)
}
}
#[cfg(test)]
mod tests {
#![allow(clippy::panic_in_result_fn)]
use super::*;
use std::io::Cursor;
#[test]
fn test_serialization_roundtrip() -> Result<(), EvaluationError> {
use std::str;
for format in [
QueryResultsFormat::Json,
QueryResultsFormat::Xml,
QueryResultsFormat::Tsv,
] {
let results = vec![
QueryResults::Boolean(true),
QueryResults::Boolean(false),
QueryResults::Solutions(QuerySolutionIter::new(
[
Variable::new_unchecked("foo"),
Variable::new_unchecked("bar"),
]
.as_ref()
.into(),
Box::new(
vec![
Ok(vec![None, None]),
Ok(vec![
Some(NamedNode::new_unchecked("http://example.com").into()),
None,
]),
Ok(vec![
None,
Some(NamedNode::new_unchecked("http://example.com").into()),
]),
Ok(vec![
Some(BlankNode::new_unchecked("foo").into()),
Some(BlankNode::new_unchecked("bar").into()),
]),
Ok(vec![Some(Literal::new_simple_literal("foo").into()), None]),
Ok(vec![
Some(
Literal::new_language_tagged_literal_unchecked("foo", "fr")
.into(),
),
None,
]),
Ok(vec![
Some(Literal::from(1).into()),
Some(Literal::from(true).into()),
]),
Ok(vec![
Some(Literal::from(1.33).into()),
Some(Literal::from(false).into()),
]),
Ok(vec![
Some(
Triple::new(
NamedNode::new_unchecked("http://example.com/s"),
NamedNode::new_unchecked("http://example.com/p"),
Triple::new(
NamedNode::new_unchecked("http://example.com/os"),
NamedNode::new_unchecked("http://example.com/op"),
NamedNode::new_unchecked("http://example.com/oo"),
),
)
.into(),
),
None,
]),
]
.into_iter(),
),
)),
];
for ex in results {
let mut buffer = Vec::new();
ex.write(&mut buffer, format)?;
let ex2 = QueryResults::read(Cursor::new(buffer.clone()), format)?;
let mut buffer2 = Vec::new();
ex2.write(&mut buffer2, format)?;
assert_eq!(
str::from_utf8(&buffer).unwrap(),
str::from_utf8(&buffer2).unwrap()
);
}
}
Ok(())
}
}

@ -0,0 +1,44 @@
//! Utilities to read and write RDF results formats using [sparesults](https://crates.io/crates/sparesults).
//!
//! It supports [SPARQL Query Results XML Format (Second Edition)](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/).
//!
//! Usage example converting a JSON result file into a TSV result file:
//!
//! ```
//! use oxigraph::sparql::results::{QueryResultsFormat, QueryResultsParser, FromReadQueryResultsReader, QueryResultsSerializer};
//! use std::io::Result;
//!
//! fn convert_json_to_tsv(json_file: &[u8]) -> Result<Vec<u8>> {
//! let json_parser = QueryResultsParser::from_format(QueryResultsFormat::Json);
//! let tsv_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Tsv);
//! // We start to read the JSON file and see which kind of results it is
//! match json_parser.parse_read(json_file)? {
//! FromReadQueryResultsReader::Boolean(value) => {
//! // it's a boolean result, we copy it in TSV to the output buffer
//! tsv_serializer.serialize_boolean_to_write(Vec::new(), value)
//! }
//! FromReadQueryResultsReader::Solutions(solutions_reader) => {
//! // it's a set of solutions, we create a writer and we write to it while reading in streaming from the JSON file
//! let mut serialize_solutions_to_write = tsv_serializer.serialize_solutions_to_write(Vec::new(), solutions_reader.variables().to_vec())?;
//! for solution in solutions_reader {
//! serialize_solutions_to_write.write(&solution?)?;
//! }
//! serialize_solutions_to_write.finish()
//! }
//! }
//! }
//!
//! // Let's test with a boolean
//! assert_eq!(
//! convert_json_to_tsv(br#"{"boolean":true}"#.as_slice()).unwrap(),
//! b"true"
//! );
//!
//! // And with a set of solutions
//! assert_eq!(
//! convert_json_to_tsv(br#"{"head":{"vars":["foo","bar"]},"results":{"bindings":[{"foo":{"type":"literal","value":"test"}}]}}"#.as_slice()).unwrap(),
//! b"?foo\t?bar\n\"test\"\t\n"
//! );
//! ```
pub use sparesults::*;

@ -3,9 +3,8 @@ use crate::sparql::algebra::Query;
use crate::sparql::error::EvaluationError;
use crate::sparql::http::Client;
use crate::sparql::model::QueryResults;
use crate::sparql::QueryResultsFormat;
use crate::sparql::results::QueryResultsFormat;
use std::error::Error;
use std::io::BufReader;
use std::time::Duration;
/// Handler for [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/) SERVICE.
@ -14,18 +13,22 @@ use std::time::Duration;
/// before evaluating a SPARQL query that uses SERVICE calls.
///
/// ```
/// use oxigraph::store::Store;
/// use oxigraph::model::*;
/// use oxigraph::sparql::{QueryOptions, QueryResults, ServiceHandler, Query, EvaluationError};
/// use oxigraph::sparql::{EvaluationError, Query, QueryOptions, QueryResults, ServiceHandler};
/// use oxigraph::store::Store;
///
/// struct TestServiceHandler {
/// store: Store
/// store: Store,
/// }
///
/// impl ServiceHandler for TestServiceHandler {
/// type Error = EvaluationError;
///
/// fn handle(&self,service_name: NamedNode, query: Query) -> Result<QueryResults,EvaluationError> {
/// fn handle(
/// &self,
/// service_name: NamedNode,
/// query: Query,
/// ) -> Result<QueryResults, Self::Error> {
/// if service_name == "http://example.com/service" {
/// self.store.query(query)
/// } else {
@ -36,20 +39,23 @@ use std::time::Duration;
///
/// let store = Store::new()?;
/// let service = TestServiceHandler {
/// store: Store::new()?
/// store: Store::new()?,
/// };
/// let ex = NamedNodeRef::new("http://example.com")?;
/// service.store.insert(QuadRef::new(ex, ex, ex, GraphNameRef::DefaultGraph))?;
/// service
/// .store
/// .insert(QuadRef::new(ex, ex, ex, GraphNameRef::DefaultGraph))?;
///
/// if let QueryResults::Solutions(mut solutions) = store.query_opt(
/// "SELECT ?s WHERE { SERVICE <http://example.com/service> { ?s ?p ?o } }",
/// QueryOptions::default().with_service_handler(service)
/// QueryOptions::default().with_service_handler(service),
/// )? {
/// assert_eq!(solutions.next().unwrap()?.get("s"), Some(&ex.into()));
/// }
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub trait ServiceHandler {
pub trait ServiceHandler: Send + Sync {
/// The service evaluation error.
type Error: Error + Send + Sync + 'static;
/// Evaluates a [`Query`] against a given service identified by a [`NamedNode`].
@ -61,10 +67,8 @@ pub struct EmptyServiceHandler;
impl ServiceHandler for EmptyServiceHandler {
type Error = EvaluationError;
fn handle(&self, _: NamedNode, _: Query) -> Result<QueryResults, EvaluationError> {
Err(EvaluationError::msg(
"The SERVICE feature is not implemented",
))
fn handle(&self, name: NamedNode, _: Query) -> Result<QueryResults, Self::Error> {
Err(EvaluationError::UnsupportedService(name))
}
}
@ -81,14 +85,10 @@ impl<S: ServiceHandler> ErrorConversionServiceHandler<S> {
impl<S: ServiceHandler> ServiceHandler for ErrorConversionServiceHandler<S> {
type Error = EvaluationError;
fn handle(
&self,
service_name: NamedNode,
query: Query,
) -> Result<QueryResults, EvaluationError> {
fn handle(&self, service_name: NamedNode, query: Query) -> Result<QueryResults, Self::Error> {
self.handler
.handle(service_name, query)
.map_err(EvaluationError::wrap)
.map_err(|e| EvaluationError::Service(Box::new(e)))
}
}
@ -107,22 +107,18 @@ impl SimpleServiceHandler {
impl ServiceHandler for SimpleServiceHandler {
type Error = EvaluationError;
fn handle(
&self,
service_name: NamedNode,
query: Query,
) -> Result<QueryResults, EvaluationError> {
let (content_type, body) = self.client.post(
fn handle(&self, service_name: NamedNode, query: Query) -> Result<QueryResults, Self::Error> {
let (content_type, body) = self
.client
.post(
service_name.as_str(),
query.to_string().into_bytes(),
"application/sparql-query",
"application/sparql-results+json, application/sparql-results+xml",
)?;
let format = QueryResultsFormat::from_media_type(&content_type).ok_or_else(|| {
EvaluationError::msg(format!(
"Unsupported Content-Type returned by {service_name}: {content_type}"
))
})?;
Ok(QueryResults::read(BufReader::new(body), format)?)
)
.map_err(|e| EvaluationError::Service(Box::new(e)))?;
let format = QueryResultsFormat::from_media_type(&content_type)
.ok_or_else(|| EvaluationError::UnsupportedContentType(content_type))?;
Ok(QueryResults::read(body, format)?)
}
}

@ -1,12 +1,9 @@
use crate::io::read::ParseError;
use crate::io::{GraphFormat, GraphParser};
use crate::io::{RdfFormat, RdfParser};
use crate::model::{GraphName as OxGraphName, GraphNameRef, Quad as OxQuad};
use crate::sparql::algebra::QueryDataset;
use crate::sparql::dataset::DatasetView;
use crate::sparql::eval::SimpleEvaluator;
use crate::sparql::eval::{EncodedTuple, SimpleEvaluator};
use crate::sparql::http::Client;
use crate::sparql::plan::EncodedTuple;
use crate::sparql::plan_builder::PlanBuilder;
use crate::sparql::{EvaluationError, Update, UpdateOptions};
use crate::storage::numeric_encoder::{Decoder, EncodedTerm};
use crate::storage::StorageWriter;
@ -18,9 +15,11 @@ use spargebra::term::{
Quad, QuadPattern, Subject, Term, TermPattern, Triple, TriplePattern, Variable,
};
use spargebra::GraphUpdateOperation;
use sparopt::Optimizer;
use std::collections::HashMap;
use std::io::BufReader;
use std::io;
use std::rc::Rc;
use std::sync::Arc;
pub fn evaluate_update<'a, 'b: 'a>(
transaction: &'a mut StorageWriter<'b>,
@ -74,9 +73,7 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
} => self.eval_delete_insert(
delete,
insert,
using_dataset
.as_ref()
.ok_or_else(|| EvaluationError::msg("No dataset"))?,
using_dataset.as_ref().unwrap_or(&QueryDataset::new()),
pattern,
),
GraphUpdateOperation::Load {
@ -125,22 +122,22 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
algebra: &GraphPattern,
) -> Result<(), EvaluationError> {
let dataset = Rc::new(DatasetView::new(self.transaction.reader(), using));
let (plan, variables) = PlanBuilder::build(
&dataset,
algebra,
false,
&self.options.query_options.custom_functions,
!self.options.query_options.without_optimizations,
)?;
let mut pattern = sparopt::algebra::GraphPattern::from(algebra);
if !self.options.query_options.without_optimizations {
pattern = Optimizer::optimize_graph_pattern(sparopt::algebra::GraphPattern::Reduced {
inner: Box::new(pattern),
});
}
let evaluator = SimpleEvaluator::new(
Rc::clone(&dataset),
self.base_iri.clone(),
self.options.query_options.service_handler(),
Rc::new(self.options.query_options.custom_functions.clone()),
Arc::new(self.options.query_options.custom_functions.clone()),
false,
);
let mut variables = Vec::new();
let mut bnodes = HashMap::new();
let (eval, _) = evaluator.plan_evaluator(Rc::new(plan));
let (eval, _) = evaluator.graph_pattern_evaluator(&pattern, &mut variables);
let tuples =
eval(EncodedTuple::with_capacity(variables.len())).collect::<Result<Vec<_>, _>>()?; // TODO: would be much better to stream
for tuple in tuples {
@ -164,28 +161,31 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
}
fn eval_load(&mut self, from: &NamedNode, to: &GraphName) -> Result<(), EvaluationError> {
let (content_type, body) = self.client.get(
let (content_type, body) = self
.client
.get(
from.as_str(),
"application/n-triples, text/turtle, application/rdf+xml",
)?;
let format = GraphFormat::from_media_type(&content_type).ok_or_else(|| {
EvaluationError::msg(format!(
"Unsupported Content-Type returned by {from}: {content_type}"
))
})?;
)
.map_err(|e| EvaluationError::Service(Box::new(e)))?;
let format = RdfFormat::from_media_type(&content_type)
.ok_or_else(|| EvaluationError::UnsupportedContentType(content_type))?;
let to_graph_name = match to {
GraphName::NamedNode(graph_name) => graph_name.into(),
GraphName::DefaultGraph => GraphNameRef::DefaultGraph,
};
let mut parser = GraphParser::from_format(format);
if let Some(base_iri) = &self.base_iri {
parser = parser
.with_base_iri(base_iri.as_str())
.map_err(|e| ParseError::invalid_base_iri(base_iri, e))?;
}
for t in parser.read_triples(BufReader::new(body))? {
self.transaction
.insert(t?.as_ref().in_graph(to_graph_name))?;
let mut parser = RdfParser::from_format(format)
.rename_blank_nodes()
.without_named_graphs()
.with_default_graph(to_graph_name);
parser = parser.with_base_iri(from.as_str()).map_err(|e| {
EvaluationError::Service(Box::new(io::Error::new(
io::ErrorKind::InvalidInput,
format!("Invalid URL: {from}: {e}"),
)))
})?;
for q in parser.parse_read(body) {
self.transaction.insert(q?.as_ref())?;
}
Ok(())
}
@ -194,9 +194,7 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
if self.transaction.insert_named_graph(graph_name.into())? || silent {
Ok(())
} else {
Err(EvaluationError::msg(format!(
"The graph {graph_name} already exists"
)))
Err(EvaluationError::GraphAlreadyExists(graph_name.clone()))
}
}
@ -212,9 +210,7 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
} else if silent {
Ok(())
} else {
Err(EvaluationError::msg(format!(
"The graph {graph} does not exists"
)))
Err(EvaluationError::GraphDoesNotExist(graph_name.clone()))
}
}
GraphTarget::DefaultGraph => {
@ -232,9 +228,7 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
if self.transaction.remove_named_graph(graph_name.into())? || silent {
Ok(())
} else {
Err(EvaluationError::msg(format!(
"The graph {graph_name} does not exists"
)))
Err(EvaluationError::GraphDoesNotExist(graph_name.clone()))
}
}
GraphTarget::DefaultGraph => {

@ -1,6 +1,7 @@
//! TODO: This storage is dramatically naive.
use crate::storage::StorageError;
use crate::store::CorruptionError;
use std::cell::RefCell;
use std::collections::{BTreeMap, HashMap};
use std::error::Error;
@ -30,9 +31,13 @@ impl Db {
}
#[allow(clippy::unwrap_in_result)]
pub fn column_family(&self, name: &'static str) -> Option<ColumnFamily> {
let name = ColumnFamily(name);
(self.0.read().unwrap().contains_key(&name)).then(|| name)
pub fn column_family(&self, name: &'static str) -> Result<ColumnFamily, StorageError> {
let column_family = ColumnFamily(name);
if self.0.read().unwrap().contains_key(&column_family) {
Ok(column_family)
} else {
Err(CorruptionError::from_missing_column_family_name(name).into())
}
}
#[must_use]
@ -116,6 +121,7 @@ impl Reader {
}
}
#[allow(clippy::iter_not_returning_iterator)]
pub fn iter(&self, column_family: &ColumnFamily) -> Result<Iter, StorageError> {
self.scan_prefix(column_family, &[])
}
@ -129,9 +135,7 @@ impl Reader {
let data: Vec<_> = match &self.0 {
InnerReader::Simple(reader) => {
let trees = reader.read().unwrap();
let tree = if let Some(tree) = trees.get(column_family) {
tree
} else {
let Some(tree) = trees.get(column_family) else {
return Ok(Iter {
iter: Vec::new().into_iter(),
current: None,
@ -147,11 +151,13 @@ impl Reader {
}
}
InnerReader::Transaction(reader) => {
if let Some(reader) = reader.upgrade() {
let Some(reader) = reader.upgrade() else {
return Err(StorageError::Other(
"The transaction is already ended".into(),
));
};
let trees = (*reader).borrow();
let tree = if let Some(tree) = trees.get(column_family) {
tree
} else {
let Some(tree) = trees.get(column_family) else {
return Ok(Iter {
iter: Vec::new().into_iter(),
current: None,
@ -165,11 +171,6 @@ impl Reader {
.map(|(k, v)| (k.clone(), v.clone()))
.collect()
}
} else {
return Err(StorageError::Other(
"The transaction is already ended".into(),
));
}
}
};
let mut iter = data.into_iter();
@ -231,7 +232,7 @@ pub struct Transaction<'a>(
impl Transaction<'_> {
#[allow(unsafe_code, clippy::useless_transmute)]
pub fn reader(&self) -> Reader {
// This transmute is safe because we take a weak reference and the only Rc reference used is guarded by the lifetime.
// SAFETY: This transmute is safe because we take a weak reference and the only Rc reference used is guarded by the lifetime.
Reader(InnerReader::Transaction(Rc::downgrade(unsafe {
transmute(&self.0)
})))

@ -4,9 +4,7 @@
#[cfg(target_family = "wasm")]
pub use fallback::{ColumnFamily, ColumnFamilyDefinition, Db, Iter, Reader, Transaction};
#[cfg(not(target_family = "wasm"))]
pub use rocksdb::{
ColumnFamily, ColumnFamilyDefinition, Db, Iter, Reader, SstFileWriter, Transaction,
};
pub use rocksdb::{ColumnFamily, ColumnFamilyDefinition, Db, Iter, Reader, Transaction};
#[cfg(target_family = "wasm")]
mod fallback;

@ -1,48 +1,42 @@
//! Code inspired by [Rust RocksDB](https://github.com/rust-rocksdb/rust-rocksdb) under Apache License 2.0.
#![allow(unsafe_code, trivial_casts)]
#![allow(
unsafe_code,
trivial_casts,
clippy::undocumented_unsafe_blocks,
clippy::panic_in_result_fn,
clippy::unwrap_in_result
)]
use crate::storage::error::{CorruptionError, StorageError};
use lazy_static::lazy_static;
use libc::{self, c_void, free};
use libc::{self, c_void};
use oxrocksdb_sys::*;
use rand::random;
use std::borrow::Borrow;
#[cfg(unix)]
use std::cmp::min;
use std::collections::HashMap;
use std::env::temp_dir;
use std::error::Error;
use std::ffi::{CStr, CString};
use std::fmt;
use std::fs::remove_dir_all;
use std::io;
use std::marker::PhantomData;
use std::ops::Deref;
use std::path::{Path, PathBuf};
use std::rc::{Rc, Weak};
use std::sync::Arc;
use std::sync::{Arc, OnceLock};
use std::thread::{available_parallelism, yield_now};
use std::{ptr, slice};
use std::{fmt, io, ptr, slice};
macro_rules! ffi_result {
( $($function:ident)::*() ) => {
ffi_result_impl!($($function)::*())
};
( $($function:ident)::*( $arg1:expr $(, $arg:expr)* $(,)? ) ) => {
ffi_result_impl!($($function)::*($arg1 $(, $arg)* ,))
};
}
macro_rules! ffi_result_impl {
( $($function:ident)::*( $($arg:expr,)*) ) => {{
( $($function:ident)::*( $arg1:expr $(, $arg:expr)* $(,)? ) ) => {{
let mut status = rocksdb_status_t {
code: rocksdb_status_code_t_rocksdb_status_code_ok,
subcode: rocksdb_status_subcode_t_rocksdb_status_subcode_none,
severity: rocksdb_status_severity_t_rocksdb_status_severity_none,
string: ptr::null()
};
let result = $($function)::*($($arg,)* &mut status);
let result = $($function)::*($arg1 $(, $arg)* , &mut status);
if status.code == rocksdb_status_code_t_rocksdb_status_code_ok {
Ok(result)
} else {
@ -51,23 +45,6 @@ macro_rules! ffi_result_impl {
}}
}
lazy_static! {
static ref ROCKSDB_ENV: UnsafeEnv = {
unsafe {
let env = rocksdb_create_default_env();
assert!(!env.is_null(), "rocksdb_create_default_env returned null");
UnsafeEnv(env)
}
};
static ref ROCKSDB_MEM_ENV: UnsafeEnv = {
unsafe {
let env = rocksdb_create_mem_env();
assert!(!env.is_null(), "rocksdb_create_mem_env returned null");
UnsafeEnv(env)
}
};
}
pub struct ColumnFamilyDefinition {
pub name: &'static str,
pub use_iter: bool,
@ -131,8 +108,7 @@ impl Drop for RwDbHandler {
rocksdb_block_based_options_destroy(self.block_based_table_options);
}
if self.in_memory {
#[allow(clippy::let_underscore_must_use)]
let _ = remove_dir_all(&self.path);
drop(remove_dir_all(&self.path));
}
}
}
@ -166,8 +142,7 @@ impl Drop for RoDbHandler {
rocksdb_options_destroy(self.options);
}
if let Some(path) = &self.path_to_remove {
#[allow(clippy::let_underscore_must_use)]
let _ = remove_dir_all(path);
drop(remove_dir_all(path));
}
}
}
@ -212,7 +187,7 @@ impl Db {
16,
);
rocksdb_options_set_block_based_table_factory(options, block_based_table_options);
#[cfg(feature = "rocksdb_debug")]
#[cfg(feature = "rocksdb-debug")]
{
rocksdb_options_set_info_log_level(options, 0);
rocksdb_options_enable_statistics(options);
@ -393,7 +368,7 @@ impl Db {
cf_handles,
cf_options,
is_secondary: true,
path_to_remove: in_memory.then(|| secondary_path),
path_to_remove: in_memory.then_some(secondary_path),
})),
})
}
@ -466,6 +441,9 @@ impl Db {
limit_max_open_files: bool,
in_memory: bool,
) -> Result<*mut rocksdb_options_t, StorageError> {
static ROCKSDB_ENV: OnceLock<UnsafeEnv> = OnceLock::new();
static ROCKSDB_MEM_ENV: OnceLock<UnsafeEnv> = OnceLock::new();
unsafe {
let options = rocksdb_options_create();
assert!(!options.is_null(), "rocksdb_options_create returned null");
@ -502,10 +480,19 @@ impl Db {
rocksdb_options_set_env(
options,
if in_memory {
ROCKSDB_MEM_ENV.0
ROCKSDB_MEM_ENV.get_or_init(|| {
let env = rocksdb_create_mem_env();
assert!(!env.is_null(), "rocksdb_create_mem_env returned null");
UnsafeEnv(env)
})
} else {
ROCKSDB_ENV.0
},
ROCKSDB_ENV.get_or_init(|| {
let env = rocksdb_create_default_env();
assert!(!env.is_null(), "rocksdb_create_default_env returned null");
UnsafeEnv(env)
})
}
.0,
);
Ok(options)
}
@ -551,17 +538,17 @@ impl Db {
(column_family_names, c_column_family_names, cf_options)
}
pub fn column_family(&self, name: &'static str) -> Option<ColumnFamily> {
pub fn column_family(&self, name: &'static str) -> Result<ColumnFamily, StorageError> {
let (column_family_names, cf_handles) = match &self.inner {
DbKind::ReadOnly(db) => (&db.column_family_names, &db.cf_handles),
DbKind::ReadWrite(db) => (&db.column_family_names, &db.cf_handles),
};
for (cf, cf_handle) in column_family_names.iter().zip(cf_handles) {
if *cf == name {
return Some(ColumnFamily(*cf_handle));
return Ok(ColumnFamily(*cf_handle));
}
}
None
Err(CorruptionError::from_missing_column_family_name(name).into())
}
#[must_use]
@ -571,8 +558,9 @@ impl Db {
DbKind::ReadOnly(db) => {
if db.is_secondary {
// We try to refresh (and ignore the errors)
#[allow(clippy::let_underscore_must_use)]
let _ = ffi_result!(rocksdb_try_catch_up_with_primary_with_status(db.db));
drop(ffi_result!(rocksdb_try_catch_up_with_primary_with_status(
db.db
)));
}
let options = rocksdb_readoptions_create_copy(db.read_options);
Reader {
@ -637,7 +625,7 @@ impl Db {
ffi_result!(rocksdb_transaction_commit_with_status(transaction));
rocksdb_transaction_destroy(transaction);
rocksdb_readoptions_destroy(read_options);
free(snapshot as *mut c_void);
rocksdb_free(snapshot as *mut c_void);
r.map_err(StorageError::from)?; // We make sure to also run destructors if the commit fails
}
return Ok(result);
@ -648,7 +636,7 @@ impl Db {
ffi_result!(rocksdb_transaction_rollback_with_status(transaction));
rocksdb_transaction_destroy(transaction);
rocksdb_readoptions_destroy(read_options);
free(snapshot as *mut c_void);
rocksdb_free(snapshot as *mut c_void);
r.map_err(StorageError::from)?; // We make sure to also run destructors if the commit fails
}
// We look for the root error
@ -750,13 +738,14 @@ impl Db {
}
}
pub fn flush(&self, column_family: &ColumnFamily) -> Result<(), StorageError> {
pub fn flush(&self) -> Result<(), StorageError> {
if let DbKind::ReadWrite(db) = &self.inner {
unsafe {
ffi_result!(rocksdb_transactiondb_flush_cf_with_status(
ffi_result!(rocksdb_transactiondb_flush_cfs_with_status(
db.db,
db.flush_options,
column_family.0,
db.cf_handles.as_ptr().cast_mut(),
db.cf_handles.len().try_into().unwrap()
))
}?;
Ok(())
@ -814,6 +803,9 @@ impl Db {
&self,
ssts_for_cf: &[(&ColumnFamily, PathBuf)],
) -> Result<(), StorageError> {
if ssts_for_cf.is_empty() {
return Ok(()); // Rocksdb does not support empty lists
}
if let DbKind::ReadWrite(db) = &self.inner {
let mut paths_by_cf = HashMap::<_, Vec<_>>::new();
for (cf, path) in ssts_for_cf {
@ -941,7 +933,11 @@ impl Reader {
))
}
InnerReader::Transaction(inner) => {
if let Some(inner) = inner.upgrade() {
let Some(inner) = inner.upgrade() else {
return Err(StorageError::Other(
"The transaction is already ended".into(),
));
};
ffi_result!(rocksdb_transaction_get_pinned_cf_with_status(
*inner,
self.options,
@ -949,11 +945,6 @@ impl Reader {
key.as_ptr().cast(),
key.len()
))
} else {
return Err(StorageError::Other(
"The transaction is already ended".into(),
));
}
}
InnerReader::PlainDb(inner) => {
ffi_result!(rocksdb_get_pinned_cf_with_status(
@ -981,6 +972,7 @@ impl Reader {
Ok(self.get(column_family, key)?.is_some()) // TODO: optimize
}
#[allow(clippy::iter_not_returning_iterator)]
pub fn iter(&self, column_family: &ColumnFamily) -> Result<Iter, StorageError> {
self.scan_prefix(column_family, &[])
}
@ -1001,7 +993,7 @@ impl Reader {
break;
}
}
found.then(|| bound)
found.then_some(bound)
};
unsafe {
@ -1022,13 +1014,12 @@ impl Reader {
rocksdb_transactiondb_create_iterator_cf(inner.db.db, options, column_family.0)
}
InnerReader::Transaction(inner) => {
if let Some(inner) = inner.upgrade() {
rocksdb_transaction_create_iterator_cf(*inner, options, column_family.0)
} else {
let Some(inner) = inner.upgrade() else {
return Err(StorageError::Other(
"The transaction is already ended".into(),
));
}
};
rocksdb_transaction_create_iterator_cf(*inner, options, column_family.0)
}
InnerReader::PlainDb(inner) => {
rocksdb_create_iterator_cf(inner.db, options, column_family.0)
@ -1165,7 +1156,7 @@ impl Drop for PinnableSlice {
impl Deref for PinnableSlice {
type Target = [u8];
fn deref(&self) -> &[u8] {
fn deref(&self) -> &Self::Target {
unsafe {
let mut len = 0;
let val = rocksdb_pinnableslice_value(self.0, &mut len);
@ -1200,7 +1191,7 @@ pub struct Buffer {
impl Drop for Buffer {
fn drop(&mut self) {
unsafe {
free(self.base.cast());
rocksdb_free(self.base.cast());
}
}
}
@ -1208,7 +1199,7 @@ impl Drop for Buffer {
impl Deref for Buffer {
type Target = [u8];
fn deref(&self) -> &[u8] {
fn deref(&self) -> &Self::Target {
unsafe { slice::from_raw_parts(self.base, self.len) }
}
}
@ -1236,7 +1227,7 @@ pub struct Iter {
is_currently_valid: bool,
_upper_bound: Option<Vec<u8>>,
_reader: Reader, // needed to ensure that DB still lives while iter is used
options: *mut rocksdb_readoptions_t, // needed to ensure that options still lives while iter is used
options: *mut rocksdb_readoptions_t, /* needed to ensure that options still lives while iter is used */
}
impl Drop for Iter {
@ -1324,6 +1315,8 @@ impl SstFileWriter {
}
}
#[derive(thiserror::Error)]
#[error("{}", self.message())]
struct ErrorStatus(rocksdb_status_t);
unsafe impl Send for ErrorStatus {}
@ -1333,7 +1326,7 @@ impl Drop for ErrorStatus {
fn drop(&mut self) {
if !self.0.string.is_null() {
unsafe {
free(self.0.string as *mut c_void);
rocksdb_free(self.0.string as *mut c_void);
}
}
}
@ -1362,14 +1355,6 @@ impl fmt::Debug for ErrorStatus {
}
}
impl fmt::Display for ErrorStatus {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.message())
}
}
impl Error for ErrorStatus {}
impl From<ErrorStatus> for StorageError {
fn from(status: ErrorStatus) -> Self {
if status.0.code == rocksdb_status_code_t_rocksdb_status_code_io_error {
@ -1394,7 +1379,8 @@ impl From<ErrorStatus> for StorageError {
struct UnsafeEnv(*mut rocksdb_env_t);
// Hack for lazy_static. OK because only written in lazy static and used in a thread-safe way by RocksDB
// Hack for OnceCell. OK because only written in OnceCell and used in a thread-safe way by RocksDB
unsafe impl Send for UnsafeEnv {}
unsafe impl Sync for UnsafeEnv {}
fn path_to_cstring(path: &Path) -> Result<CString, StorageError> {
@ -1413,7 +1399,7 @@ fn path_to_cstring(path: &Path) -> Result<CString, StorageError> {
}
#[cfg(unix)]
fn available_file_descriptors() -> io::Result<Option<u64>> {
fn available_file_descriptors() -> io::Result<Option<libc::rlim_t>> {
let mut rlimit = libc::rlimit {
rlim_cur: 0,
rlim_max: 0,
@ -1426,12 +1412,12 @@ fn available_file_descriptors() -> io::Result<Option<u64>> {
}
#[cfg(windows)]
fn available_file_descriptors() -> io::Result<Option<u64>> {
fn available_file_descriptors() -> io::Result<Option<libc::c_int>> {
Ok(Some(512)) // https://docs.microsoft.com/en-us/cpp/c-runtime-library/file-handling
}
#[cfg(not(any(unix, windows)))]
fn available_file_descriptors() -> io::Result<Option<u64>> {
fn available_file_descriptors() -> io::Result<Option<libc::c_int>> {
Ok(None)
}

@ -2,7 +2,7 @@ use crate::storage::error::{CorruptionError, StorageError};
use crate::storage::numeric_encoder::{EncodedQuad, EncodedTerm, EncodedTriple, StrHash};
use crate::storage::small_string::SmallString;
use oxsdatatypes::*;
use std::io::{Cursor, Read};
use std::io::Read;
use std::mem::size_of;
#[cfg(not(target_family = "wasm"))]
@ -62,24 +62,23 @@ pub enum QuadEncoding {
}
impl QuadEncoding {
pub fn decode(self, buffer: &[u8]) -> Result<EncodedQuad, StorageError> {
let mut cursor = Cursor::new(&buffer);
pub fn decode(self, mut buffer: &[u8]) -> Result<EncodedQuad, StorageError> {
match self {
Self::Spog => cursor.read_spog_quad(),
Self::Posg => cursor.read_posg_quad(),
Self::Ospg => cursor.read_ospg_quad(),
Self::Gspo => cursor.read_gspo_quad(),
Self::Gpos => cursor.read_gpos_quad(),
Self::Gosp => cursor.read_gosp_quad(),
Self::Dspo => cursor.read_dspo_quad(),
Self::Dpos => cursor.read_dpos_quad(),
Self::Dosp => cursor.read_dosp_quad(),
Self::Spog => buffer.read_spog_quad(),
Self::Posg => buffer.read_posg_quad(),
Self::Ospg => buffer.read_ospg_quad(),
Self::Gspo => buffer.read_gspo_quad(),
Self::Gpos => buffer.read_gpos_quad(),
Self::Gosp => buffer.read_gosp_quad(),
Self::Dspo => buffer.read_dspo_quad(),
Self::Dpos => buffer.read_dpos_quad(),
Self::Dosp => buffer.read_dosp_quad(),
}
}
}
pub fn decode_term(buffer: &[u8]) -> Result<EncodedTerm, StorageError> {
Cursor::new(&buffer).read_term()
pub fn decode_term(mut buffer: &[u8]) -> Result<EncodedTerm, StorageError> {
buffer.read_term()
}
pub trait TermReader {
@ -635,6 +634,8 @@ pub fn write_term(sink: &mut Vec<u8>, term: &EncodedTerm) {
#[cfg(test)]
mod tests {
#![allow(clippy::panic_in_result_fn)]
use super::*;
use crate::model::TermRef;
use crate::storage::numeric_encoder::*;
@ -650,10 +651,6 @@ mod tests {
fn get_str(&self, key: &StrHash) -> Result<Option<String>, StorageError> {
Ok(self.id2str.borrow().get(key).cloned())
}
fn contains_str(&self, key: &StrHash) -> Result<bool, StorageError> {
Ok(self.id2str.borrow().contains_key(key))
}
}
impl MemoryStrStore {
@ -741,7 +738,7 @@ mod tests {
let mut buffer = Vec::new();
write_term(&mut buffer, &encoded);
assert_eq!(encoded, Cursor::new(&buffer).read_term().unwrap());
assert_eq!(encoded, buffer.as_slice().read_term().unwrap());
}
}
}

@ -0,0 +1,139 @@
use crate::io::{RdfFormat, RdfParseError};
use crate::storage::numeric_encoder::EncodedTerm;
use oxiri::IriParseError;
use oxrdf::TermRef;
use std::error::Error;
use std::io;
/// An error related to storage operations (reads, writes...).
#[derive(Debug, thiserror::Error)]
#[non_exhaustive]
pub enum StorageError {
/// Error from the OS I/O layer.
#[error(transparent)]
Io(#[from] io::Error),
/// Error related to data corruption.
#[error(transparent)]
Corruption(#[from] CorruptionError),
#[doc(hidden)]
#[error("{0}")]
Other(#[source] Box<dyn Error + Send + Sync + 'static>),
}
impl From<StorageError> for io::Error {
#[inline]
fn from(error: StorageError) -> Self {
match error {
StorageError::Io(error) => error,
StorageError::Corruption(error) => error.into(),
StorageError::Other(error) => Self::new(io::ErrorKind::Other, error),
}
}
}
/// An error return if some content in the database is corrupted.
#[derive(Debug, thiserror::Error)]
#[error(transparent)]
pub struct CorruptionError(#[from] CorruptionErrorKind);
/// An error return if some content in the database is corrupted.
#[derive(Debug, thiserror::Error)]
enum CorruptionErrorKind {
#[error("{0}")]
Msg(String),
#[error("{0}")]
Other(#[source] Box<dyn Error + Send + Sync + 'static>),
}
impl CorruptionError {
/// Builds an error from a printable error message.
#[inline]
pub(crate) fn new(error: impl Into<Box<dyn Error + Send + Sync + 'static>>) -> Self {
Self(CorruptionErrorKind::Other(error.into()))
}
#[inline]
pub(crate) fn from_encoded_term(encoded: &EncodedTerm, term: &TermRef<'_>) -> Self {
// TODO: eventually use a dedicated error enum value
Self::msg(format!("Invalid term encoding {encoded:?} for {term}"))
}
#[inline]
pub(crate) fn from_missing_column_family_name(name: &'static str) -> Self {
// TODO: eventually use a dedicated error enum value
Self::msg(format!("Column family {name} does not exist"))
}
/// Builds an error from a printable error message.
#[inline]
pub(crate) fn msg(msg: impl Into<String>) -> Self {
Self(CorruptionErrorKind::Msg(msg.into()))
}
}
impl From<CorruptionError> for io::Error {
#[inline]
fn from(error: CorruptionError) -> Self {
Self::new(io::ErrorKind::InvalidData, error)
}
}
/// An error raised while loading a file into a [`Store`](crate::store::Store).
#[derive(Debug, thiserror::Error)]
pub enum LoaderError {
/// An error raised while reading the file.
#[error(transparent)]
Parsing(#[from] RdfParseError),
/// An error raised during the insertion in the store.
#[error(transparent)]
Storage(#[from] StorageError),
/// The base IRI is invalid.
#[error("Invalid base IRI '{iri}': {error}")]
InvalidBaseIri {
/// The IRI itself.
iri: String,
/// The parsing error.
#[source]
error: IriParseError,
},
}
impl From<LoaderError> for io::Error {
#[inline]
fn from(error: LoaderError) -> Self {
match error {
LoaderError::Storage(error) => error.into(),
LoaderError::Parsing(error) => error.into(),
LoaderError::InvalidBaseIri { .. } => {
Self::new(io::ErrorKind::InvalidInput, error.to_string())
}
}
}
}
/// An error raised while writing a file from a [`Store`](crate::store::Store).
#[derive(Debug, thiserror::Error)]
pub enum SerializerError {
/// An error raised while writing the content.
#[error(transparent)]
Io(#[from] io::Error),
/// An error raised during the lookup in the store.
#[error(transparent)]
Storage(#[from] StorageError),
/// A format compatible with [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) is required.
#[error("A RDF format supporting datasets was expected, {0} found")]
DatasetFormatExpected(RdfFormat),
}
impl From<SerializerError> for io::Error {
#[inline]
fn from(error: SerializerError) -> Self {
match error {
SerializerError::Storage(error) => error.into(),
SerializerError::Io(error) => error,
SerializerError::DatasetFormatExpected(_) => {
Self::new(io::ErrorKind::InvalidInput, error.to_string())
}
}
}
}

@ -22,13 +22,9 @@ use std::mem::swap;
#[cfg(not(target_family = "wasm"))]
use std::path::Path;
#[cfg(not(target_family = "wasm"))]
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Mutex;
#[cfg(not(target_family = "wasm"))]
use std::sync::Arc;
#[cfg(not(target_family = "wasm"))]
use std::thread::spawn;
#[cfg(not(target_family = "wasm"))]
use std::thread::JoinHandle;
use std::{io, thread};
use self::storage_generator::StorageGenerator;
@ -240,6 +236,13 @@ impl StorageReader {
pub fn validate(&self) -> Result<(), StorageError> {
Ok(())
}
/// Validates that all the storage invariants held in the data
#[cfg(target_family = "wasm")]
#[allow(clippy::unused_self, clippy::unnecessary_wraps)]
pub fn validate(&self) -> Result<(), StorageError> {
Ok(()) // TODO
}
}
pub struct ChainedDecodingQuadIterator {
@ -266,7 +269,7 @@ impl ChainedDecodingQuadIterator {
impl Iterator for ChainedDecodingQuadIterator {
type Item = Result<EncodedQuad, StorageError>;
fn next(&mut self) -> Option<Result<EncodedQuad, StorageError>> {
fn next(&mut self) -> Option<Self::Item> {
if let Some(result) = self.first.next() {
Some(result)
} else if let Some(second) = self.second.as_mut() {
@ -316,10 +319,6 @@ impl StrLookup for StorageReader {
fn get_str(&self, key: &StrHash) -> Result<Option<String>, StorageError> {
self.get_str(key)
}
fn contains_str(&self, key: &StrHash) -> Result<bool, StorageError> {
self.contains_str(key)
}
}
pub struct StorageWriter<'a> {
@ -646,6 +645,7 @@ impl<'a> StorageWriter<'a> {
}
#[cfg(not(target_family = "wasm"))]
#[must_use]
pub struct StorageBulkLoader {
storage: Storage,
hooks: Vec<Box<dyn Fn(u64)>>,
@ -664,12 +664,12 @@ impl StorageBulkLoader {
}
}
pub fn set_num_threads(mut self, num_threads: usize) -> Self {
pub fn with_num_threads(mut self, num_threads: usize) -> Self {
self.num_threads = Some(num_threads);
self
}
pub fn set_max_memory_size_in_megabytes(mut self, max_memory_size: usize) -> Self {
pub fn with_max_memory_size_in_megabytes(mut self, max_memory_size: usize) -> Self {
self.max_memory_size = Some(max_memory_size);
self
}
@ -701,10 +701,11 @@ impl StorageBulkLoader {
)
.into());
}
let done_counter = Mutex::new(0);
let mut done_and_displayed_counter = 0;
thread::scope(|thread_scope| {
let mut threads = VecDeque::with_capacity(num_threads - 1);
let mut buffer = Vec::with_capacity(batch_size);
let done_counter = Arc::new(AtomicU64::new(0));
let mut done_and_displayed_counter = 0;
for quad in quads {
let quad = quad?;
buffer.push(quad);
@ -712,6 +713,7 @@ impl StorageBulkLoader {
self.spawn_load_thread(
&mut buffer,
&mut threads,
thread_scope,
&done_counter,
&mut done_and_displayed_counter,
num_threads,
@ -722,60 +724,69 @@ impl StorageBulkLoader {
self.spawn_load_thread(
&mut buffer,
&mut threads,
thread_scope,
&done_counter,
&mut done_and_displayed_counter,
num_threads,
batch_size,
)?;
for thread in threads {
thread.join().unwrap()?;
self.on_possible_progress(&done_counter, &mut done_and_displayed_counter);
map_thread_result(thread.join()).map_err(StorageError::Io)??;
self.on_possible_progress(&done_counter, &mut done_and_displayed_counter)?;
}
Ok(())
})
}
fn spawn_load_thread(
&self,
fn spawn_load_thread<'scope>(
&'scope self,
buffer: &mut Vec<Quad>,
threads: &mut VecDeque<JoinHandle<Result<(), StorageError>>>,
done_counter: &Arc<AtomicU64>,
threads: &mut VecDeque<thread::ScopedJoinHandle<'scope, Result<(), StorageError>>>,
thread_scope: &'scope thread::Scope<'scope, '_>,
done_counter: &'scope Mutex<u64>,
done_and_displayed_counter: &mut u64,
num_threads: usize,
batch_size: usize,
) -> Result<(), StorageError> {
self.on_possible_progress(done_counter, done_and_displayed_counter);
self.on_possible_progress(done_counter, done_and_displayed_counter)?;
// We avoid to have too many threads
if threads.len() >= num_threads {
if let Some(thread) = threads.pop_front() {
thread.join().unwrap()?;
self.on_possible_progress(done_counter, done_and_displayed_counter);
map_thread_result(thread.join()).map_err(StorageError::Io)??;
self.on_possible_progress(done_counter, done_and_displayed_counter)?;
}
}
let mut buffer_to_load = Vec::with_capacity(batch_size);
swap(buffer, &mut buffer_to_load);
let storage = self.storage.clone();
let done_counter_clone = Arc::clone(done_counter);
threads.push_back(spawn(move || {
FileBulkLoader::new(storage, batch_size).load(buffer_to_load, &done_counter_clone)
let storage = &self.storage;
threads.push_back(thread_scope.spawn(move || {
FileBulkLoader::new(storage, batch_size).load(buffer_to_load, done_counter)
}));
Ok(())
}
fn on_possible_progress(&self, done: &AtomicU64, done_and_displayed: &mut u64) {
let new_counter = done.load(Ordering::Relaxed);
let display_step = u64::try_from(DEFAULT_BULK_LOAD_BATCH_SIZE).unwrap();
fn on_possible_progress(
&self,
done: &Mutex<u64>,
done_and_displayed: &mut u64,
) -> Result<(), StorageError> {
let new_counter = *done
.lock()
.map_err(|_| io::Error::new(io::ErrorKind::Other, "Mutex poisoned"))?;
let display_step = DEFAULT_BULK_LOAD_BATCH_SIZE as u64;
if new_counter / display_step > *done_and_displayed / display_step {
for hook in &self.hooks {
hook(new_counter);
}
}
*done_and_displayed = new_counter;
Ok(())
}
}
#[cfg(not(target_family = "wasm"))]
struct FileBulkLoader {
storage: Storage,
struct FileBulkLoader<'a> {
storage: &'a Storage,
id2str: HashMap<StrHash, Box<str>>,
quads: HashSet<EncodedQuad>,
triples: HashSet<EncodedQuad>,
@ -783,8 +794,8 @@ struct FileBulkLoader {
}
#[cfg(not(target_family = "wasm"))]
impl FileBulkLoader {
fn new(storage: Storage, batch_size: usize) -> Self {
impl<'a> FileBulkLoader<'a> {
fn new(storage: &'a Storage, batch_size: usize) -> Self {
Self {
storage,
id2str: HashMap::with_capacity(3 * batch_size),
@ -794,11 +805,14 @@ impl FileBulkLoader {
}
}
fn load(&mut self, quads: Vec<Quad>, counter: &AtomicU64) -> Result<(), StorageError> {
fn load(&mut self, quads: Vec<Quad>, counter: &Mutex<u64>) -> Result<(), StorageError> {
self.encode(quads)?;
let size = self.triples.len() + self.quads.len();
//self.save()?;
counter.fetch_add(size.try_into().unwrap(), Ordering::Relaxed);
*counter
.lock()
.map_err(|_| io::Error::new(io::ErrorKind::Other, "Mutex poisoned"))? +=
size.try_into().unwrap_or(u64::MAX);
Ok(())
}
@ -821,7 +835,12 @@ impl FileBulkLoader {
match quad.graph_name.as_ref() {
GraphNameRef::NamedNode(n) => n.into(),
GraphNameRef::BlankNode(n) => n.into(),
GraphNameRef::DefaultGraph => unreachable!(),
GraphNameRef::DefaultGraph => {
return Err(CorruptionError::new(
"Default graph this not the default graph",
)
.into())
}
},
&encoded.graph_name,
)?;
@ -979,3 +998,17 @@ impl FileBulkLoader {
// sst.finish()
// }
}
#[cfg(not(target_family = "wasm"))]
fn map_thread_result<R>(result: thread::Result<R>) -> io::Result<R> {
result.map_err(|e| {
io::Error::new(
io::ErrorKind::Other,
if let Ok(e) = e.downcast::<&dyn std::fmt::Display>() {
format!("A loader processed crashed with {e}")
} else {
"A loader processed crashed with and unknown error".into()
},
)
})
}

@ -6,10 +6,9 @@ use crate::storage::small_string::SmallString;
use oxsdatatypes::*;
use siphasher::sip128::{Hasher128, SipHasher24};
use std::fmt::Debug;
use std::hash::Hash;
use std::hash::Hasher;
use std::rc::Rc;
use std::hash::{Hash, Hasher};
use std::str;
use std::sync::Arc;
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
#[repr(transparent)]
@ -97,7 +96,7 @@ pub enum EncodedTerm {
DurationLiteral(Duration),
YearMonthDurationLiteral(YearMonthDuration),
DayTimeDurationLiteral(DayTimeDuration),
Triple(Rc<EncodedTriple>),
Triple(Arc<EncodedTriple>),
}
impl PartialEq for EncodedTerm {
@ -191,24 +190,24 @@ impl PartialEq for EncodedTerm {
},
) => value_id_a == value_id_b && datatype_id_a == datatype_id_b,
(Self::BooleanLiteral(a), Self::BooleanLiteral(b)) => a == b,
(Self::FloatLiteral(a), Self::FloatLiteral(b)) => a.is_identical_with(b),
(Self::DoubleLiteral(a), Self::DoubleLiteral(b)) => a.is_identical_with(b),
(Self::IntegerLiteral(a), Self::IntegerLiteral(b)) => a.is_identical_with(b),
(Self::DecimalLiteral(a), Self::DecimalLiteral(b)) => a.is_identical_with(b),
(Self::DateTimeLiteral(a), Self::DateTimeLiteral(b)) => a.is_identical_with(b),
(Self::TimeLiteral(a), Self::TimeLiteral(b)) => a.is_identical_with(b),
(Self::DateLiteral(a), Self::DateLiteral(b)) => a.is_identical_with(b),
(Self::GYearMonthLiteral(a), Self::GYearMonthLiteral(b)) => a.is_identical_with(b),
(Self::GYearLiteral(a), Self::GYearLiteral(b)) => a.is_identical_with(b),
(Self::GMonthDayLiteral(a), Self::GMonthDayLiteral(b)) => a.is_identical_with(b),
(Self::GMonthLiteral(a), Self::GMonthLiteral(b)) => a.is_identical_with(b),
(Self::GDayLiteral(a), Self::GDayLiteral(b)) => a.is_identical_with(b),
(Self::DurationLiteral(a), Self::DurationLiteral(b)) => a.is_identical_with(b),
(Self::FloatLiteral(a), Self::FloatLiteral(b)) => a.is_identical_with(*b),
(Self::DoubleLiteral(a), Self::DoubleLiteral(b)) => a.is_identical_with(*b),
(Self::IntegerLiteral(a), Self::IntegerLiteral(b)) => a.is_identical_with(*b),
(Self::DecimalLiteral(a), Self::DecimalLiteral(b)) => a.is_identical_with(*b),
(Self::DateTimeLiteral(a), Self::DateTimeLiteral(b)) => a.is_identical_with(*b),
(Self::TimeLiteral(a), Self::TimeLiteral(b)) => a.is_identical_with(*b),
(Self::DateLiteral(a), Self::DateLiteral(b)) => a.is_identical_with(*b),
(Self::GYearMonthLiteral(a), Self::GYearMonthLiteral(b)) => a.is_identical_with(*b),
(Self::GYearLiteral(a), Self::GYearLiteral(b)) => a.is_identical_with(*b),
(Self::GMonthDayLiteral(a), Self::GMonthDayLiteral(b)) => a.is_identical_with(*b),
(Self::GMonthLiteral(a), Self::GMonthLiteral(b)) => a.is_identical_with(*b),
(Self::GDayLiteral(a), Self::GDayLiteral(b)) => a.is_identical_with(*b),
(Self::DurationLiteral(a), Self::DurationLiteral(b)) => a.is_identical_with(*b),
(Self::YearMonthDurationLiteral(a), Self::YearMonthDurationLiteral(b)) => {
a.is_identical_with(b)
a.is_identical_with(*b)
}
(Self::DayTimeDurationLiteral(a), Self::DayTimeDurationLiteral(b)) => {
a.is_identical_with(b)
a.is_identical_with(*b)
}
(Self::Triple(a), Self::Triple(b)) => a == b,
(_, _) => false,
@ -486,7 +485,7 @@ impl From<DayTimeDuration> for EncodedTerm {
impl From<EncodedTriple> for EncodedTerm {
fn from(value: EncodedTriple) -> Self {
Self::Triple(Rc::new(value))
Self::Triple(Arc::new(value))
}
}
@ -650,7 +649,7 @@ impl From<GraphNameRef<'_>> for EncodedTerm {
impl From<TripleRef<'_>> for EncodedTerm {
fn from(triple: TripleRef<'_>) -> Self {
Self::Triple(Rc::new(triple.into()))
Self::Triple(Arc::new(triple.into()))
}
}
@ -718,8 +717,6 @@ impl From<QuadRef<'_>> for EncodedQuad {
pub trait StrLookup {
fn get_str(&self, key: &StrHash) -> Result<Option<String>, StorageError>;
fn contains_str(&self, key: &StrHash) -> Result<bool, StorageError>;
}
pub fn insert_term<F: FnMut(&StrHash, &str) -> Result<(), StorageError>>(
@ -732,13 +729,13 @@ pub fn insert_term<F: FnMut(&StrHash, &str) -> Result<(), StorageError>>(
if let EncodedTerm::NamedNode { iri_id, value } = encoded {
insert_str(iri_id, node.as_str())
} else {
unreachable!("Invalid term encoding {:?} for {}", encoded, term)
Err(CorruptionError::from_encoded_term(encoded, &term).into())
}
}
TermRef::BlankNode(node) => match encoded {
EncodedTerm::BigBlankNode { id_id } => insert_str(id_id, node.as_str()),
EncodedTerm::SmallBlankNode(..) | EncodedTerm::NumericalBlankNode { .. } => Ok(()),
_ => unreachable!("Invalid term encoding {:?} for {}", encoded, term),
_ => Err(CorruptionError::from_encoded_term(encoded, &term).into()),
},
TermRef::Literal(literal) => match encoded {
EncodedTerm::BigStringLiteral { value_id }
@ -749,7 +746,7 @@ pub fn insert_term<F: FnMut(&StrHash, &str) -> Result<(), StorageError>>(
if let Some(language) = literal.language() {
insert_str(language_id, language)
} else {
unreachable!("Invalid term encoding {:?} for {}", encoded, term)
Err(CorruptionError::from_encoded_term(encoded, &term).into())
}
}
EncodedTerm::BigBigLangStringLiteral {
@ -760,7 +757,7 @@ pub fn insert_term<F: FnMut(&StrHash, &str) -> Result<(), StorageError>>(
if let Some(language) = literal.language() {
insert_str(language_id, language)
} else {
unreachable!("Invalid term encoding {:?} for {}", encoded, term)
Err(CorruptionError::from_encoded_term(encoded, &term).into())
}
}
EncodedTerm::SmallTypedLiteral { datatype_id, .. } => {
@ -791,7 +788,7 @@ pub fn insert_term<F: FnMut(&StrHash, &str) -> Result<(), StorageError>>(
| EncodedTerm::DurationLiteral(..)
| EncodedTerm::YearMonthDurationLiteral(..)
| EncodedTerm::DayTimeDurationLiteral(..) => Ok(()),
_ => unreachable!("Invalid term encoding {:?} for {}", encoded, term),
_ => Err(CorruptionError::from_encoded_term(encoded, &term).into()),
},
TermRef::Triple(triple) => {
if let EncodedTerm::Triple(encoded) = encoded {
@ -803,7 +800,7 @@ pub fn insert_term<F: FnMut(&StrHash, &str) -> Result<(), StorageError>>(
)?;
insert_term(triple.object.as_ref(), &encoded.object, insert_str)
} else {
unreachable!("Invalid term encoding {:?} for {}", encoded, term)
Err(CorruptionError::from_encoded_term(encoded, &term).into())
}
}
}

@ -1,11 +1,9 @@
use std::borrow::Borrow;
use std::cmp::Ordering;
use std::error::Error;
use std::fmt;
use std::hash::{Hash, Hasher};
use std::ops::Deref;
use std::str;
use std::str::{FromStr, Utf8Error};
use std::{fmt, str};
/// A small inline string
#[derive(Clone, Copy, Default)]
@ -46,10 +44,8 @@ impl SmallString {
#[inline]
#[allow(unsafe_code)]
pub fn as_str(&self) -> &str {
unsafe {
// safe because we ensured it in constructors
str::from_utf8_unchecked(self.as_bytes())
}
// SAFETY: safe because we ensured it in constructors
unsafe { str::from_utf8_unchecked(self.as_bytes()) }
}
#[inline]
@ -67,7 +63,7 @@ impl Deref for SmallString {
type Target = str;
#[inline]
fn deref(&self) -> &str {
fn deref(&self) -> &Self::Target {
self.as_str()
}
}
@ -103,7 +99,7 @@ impl fmt::Display for SmallString {
impl PartialEq for SmallString {
#[inline]
fn eq(&self, other: &Self) -> bool {
self.as_str().eq(&**other)
self.as_str() == other.as_str()
}
}
@ -148,17 +144,17 @@ impl FromStr for SmallString {
type Err = BadSmallStringError;
#[inline]
fn from_str(value: &str) -> Result<Self, BadSmallStringError> {
fn from_str(value: &str) -> Result<Self, Self::Err> {
if value.len() <= 15 {
let mut inner = [0; 16];
inner[..value.len()].copy_from_slice(value.as_bytes());
inner[15] = value
.len()
.try_into()
.map_err(|_| BadSmallStringError::TooLong(value.len()))?;
.map_err(|_| Self::Err::TooLong(value.len()))?;
Ok(Self { inner })
} else {
Err(BadSmallStringError::TooLong(value.len()))
Err(Self::Err::TooLong(value.len()))
}
}
}
@ -167,36 +163,15 @@ impl<'a> TryFrom<&'a str> for SmallString {
type Error = BadSmallStringError;
#[inline]
fn try_from(value: &'a str) -> Result<Self, BadSmallStringError> {
fn try_from(value: &'a str) -> Result<Self, Self::Error> {
Self::from_str(value)
}
}
#[derive(Debug, Clone, Copy)]
#[derive(Debug, Clone, Copy, thiserror::Error)]
pub enum BadSmallStringError {
#[error("small strings could only contain at most 15 characters, found {0}")]
TooLong(usize),
BadUtf8(Utf8Error),
}
impl fmt::Display for BadSmallStringError {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::TooLong(v) => write!(
f,
"small strings could only contain at most 15 characters, found {v}"
),
Self::BadUtf8(e) => e.fmt(f),
}
}
}
impl Error for BadSmallStringError {
#[inline]
fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
Self::TooLong(_) => None,
Self::BadUtf8(e) => Some(e),
}
}
#[error(transparent)]
BadUtf8(#[from] Utf8Error),
}

@ -668,9 +668,9 @@ impl StrLookup for StorageGenerator {
self.get_str(key)
}
fn contains_str(&self, key: &StrHash) -> Result<bool, StorageError> {
self.contains_str(key)
}
//fn contains_str(&self, key: &StrHash) -> Result<bool, StorageError> {
// self.contains_str(key)
//}
}
#[cfg(test)]

File diff suppressed because it is too large Load Diff

@ -1,4 +1,7 @@
use oxigraph::io::{DatasetFormat, GraphFormat};
#![cfg(test)]
#![allow(clippy::panic_in_result_fn)]
use oxigraph::io::RdfFormat;
use oxigraph::model::vocab::{rdf, xsd};
use oxigraph::model::*;
use oxigraph::store::Store;
@ -8,10 +11,11 @@ use rand::random;
use std::env::temp_dir;
use std::error::Error;
#[cfg(not(target_family = "wasm"))]
use std::fs::{create_dir, remove_dir_all, File};
use std::io::Cursor;
use std::fs::{create_dir_all, remove_dir_all, File};
#[cfg(not(target_family = "wasm"))]
use std::io::Write;
#[cfg(not(target_family = "wasm"))]
use std::iter::empty;
#[cfg(target_os = "linux")]
use std::iter::once;
#[cfg(not(target_family = "wasm"))]
@ -74,7 +78,7 @@ fn quads(graph_name: impl Into<GraphNameRef<'static>>) -> Vec<QuadRef<'static>>
QuadRef::new(
paris,
name,
LiteralRef::new_language_tagged_literal_unchecked("la ville lumi\u{e8}re", "fr"),
LiteralRef::new_language_tagged_literal_unchecked("la ville lumi\u{E8}re", "fr"),
graph_name,
),
QuadRef::new(paris, country, france, graph_name),
@ -108,12 +112,7 @@ fn quads(graph_name: impl Into<GraphNameRef<'static>>) -> Vec<QuadRef<'static>>
#[test]
fn test_load_graph() -> Result<(), Box<dyn Error>> {
let store = Store::new()?;
store.load_graph(
Cursor::new(DATA),
GraphFormat::Turtle,
GraphNameRef::DefaultGraph,
None,
)?;
store.load_from_read(RdfFormat::Turtle, DATA.as_bytes())?;
for q in quads(GraphNameRef::DefaultGraph) {
assert!(store.contains(q)?);
}
@ -125,12 +124,9 @@ fn test_load_graph() -> Result<(), Box<dyn Error>> {
#[cfg(not(target_family = "wasm"))]
fn test_bulk_load_graph() -> Result<(), Box<dyn Error>> {
let store = Store::new()?;
store.bulk_loader().load_graph(
Cursor::new(DATA),
GraphFormat::Turtle,
GraphNameRef::DefaultGraph,
None,
)?;
store
.bulk_loader()
.load_from_read(RdfFormat::Turtle, DATA.as_bytes())?;
for q in quads(GraphNameRef::DefaultGraph) {
assert!(store.contains(q)?);
}
@ -143,11 +139,9 @@ fn test_bulk_load_graph() -> Result<(), Box<dyn Error>> {
#[cfg(not(target_family = "wasm"))]
fn test_bulk_load_graph_lenient() -> Result<(), Box<dyn Error>> {
let store = Store::new()?;
store.bulk_loader().on_parse_error(|_| Ok(())).load_graph(
Cursor::new(b"<http://example.com> <http://example.com> <http://example.com##> .\n<http://example.com> <http://example.com> <http://example.com> ."),
GraphFormat::NTriples,
GraphNameRef::DefaultGraph,
None,
store.bulk_loader().on_parse_error(|_| Ok(())).load_from_read(
RdfFormat::NTriples,
b"<http://example.com> <http://example.com> <http://example.com##> .\n<http://example.com> <http://example.com> <http://example.com> .".as_slice(),
)?;
assert_eq!(store.len()?, 1);
assert!(store.contains(QuadRef::new(
@ -160,10 +154,20 @@ fn test_bulk_load_graph_lenient() -> Result<(), Box<dyn Error>> {
Ok(())
}
#[test]
#[cfg(not(target_family = "wasm"))]
fn test_bulk_load_empty() -> Result<(), Box<dyn Error>> {
let store = Store::new()?;
store.bulk_loader().load_quads(empty::<Quad>())?;
assert!(store.is_empty()?);
store.validate()?;
Ok(())
}
#[test]
fn test_load_dataset() -> Result<(), Box<dyn Error>> {
let store = Store::new()?;
store.load_dataset(Cursor::new(GRAPH_DATA), DatasetFormat::TriG, None)?;
store.load_from_read(RdfFormat::TriG, GRAPH_DATA.as_bytes())?;
for q in quads(NamedNodeRef::new_unchecked(
"http://www.wikidata.org/wiki/Special:EntityData/Q90",
)) {
@ -179,7 +183,7 @@ fn test_bulk_load_dataset() -> Result<(), Box<dyn Error>> {
let store = Store::new()?;
store
.bulk_loader()
.load_dataset(Cursor::new(GRAPH_DATA), DatasetFormat::TriG, None)?;
.load_from_read(RdfFormat::TriG, GRAPH_DATA.as_bytes())?;
let graph_name =
NamedNodeRef::new_unchecked("http://www.wikidata.org/wiki/Special:EntityData/Q90");
for q in quads(graph_name) {
@ -195,11 +199,9 @@ fn test_bulk_load_dataset() -> Result<(), Box<dyn Error>> {
fn test_load_graph_generates_new_blank_nodes() -> Result<(), Box<dyn Error>> {
let store = Store::new()?;
for _ in 0..2 {
store.load_graph(
Cursor::new("_:a <http://example.com/p> <http://example.com/p> ."),
GraphFormat::NTriples,
GraphNameRef::DefaultGraph,
None,
store.load_from_read(
RdfFormat::NTriples,
"_:a <http://example.com/p> <http://example.com/p> .".as_bytes(),
)?;
}
assert_eq!(store.len()?, 2);
@ -215,11 +217,7 @@ fn test_dump_graph() -> Result<(), Box<dyn Error>> {
}
let mut buffer = Vec::new();
store.dump_graph(
&mut buffer,
GraphFormat::NTriples,
GraphNameRef::DefaultGraph,
)?;
store.dump_graph_to_write(GraphNameRef::DefaultGraph, RdfFormat::NTriples, &mut buffer)?;
assert_eq!(
buffer.into_iter().filter(|c| *c == b'\n').count(),
NUMBER_OF_TRIPLES
@ -235,8 +233,7 @@ fn test_dump_dataset() -> Result<(), Box<dyn Error>> {
store.insert(q)?;
}
let mut buffer = Vec::new();
store.dump_dataset(&mut buffer, DatasetFormat::NQuads)?;
let buffer = store.dump_to_write(RdfFormat::NQuads, Vec::new())?;
assert_eq!(
buffer.into_iter().filter(|c| *c == b'\n').count(),
NUMBER_OF_TRIPLES
@ -248,10 +245,10 @@ fn test_dump_dataset() -> Result<(), Box<dyn Error>> {
#[test]
fn test_snapshot_isolation_iterator() -> Result<(), Box<dyn Error>> {
let quad = QuadRef::new(
NamedNodeRef::new_unchecked("http://example.com/s"),
NamedNodeRef::new_unchecked("http://example.com/p"),
NamedNodeRef::new_unchecked("http://example.com/o"),
NamedNodeRef::new_unchecked("http://www.wikidata.org/wiki/Special:EntityData/Q90"),
NamedNodeRef::new("http://example.com/s")?,
NamedNodeRef::new("http://example.com/p")?,
NamedNodeRef::new("http://example.com/o")?,
NamedNodeRef::new("http://www.wikidata.org/wiki/Special:EntityData/Q90")?,
);
let store = Store::new()?;
store.insert(quad)?;
@ -286,7 +283,7 @@ fn test_bulk_load_on_existing_delete_overrides_the_delete() -> Result<(), Box<dy
#[cfg(not(target_family = "wasm"))]
fn test_open_bad_dir() -> Result<(), Box<dyn Error>> {
let dir = TempDir::default();
create_dir(&dir.0)?;
create_dir_all(&dir.0)?;
{
File::create(dir.0.join("CURRENT"))?.write_all(b"foo")?;
}
@ -301,15 +298,15 @@ fn test_bad_stt_open() -> Result<(), Box<dyn Error>> {
let dir = TempDir::default();
let store = Store::open(&dir.0)?;
remove_dir_all(&dir.0)?;
assert!(store
store
.bulk_loader()
.load_quads(once(Quad::new(
NamedNode::new_unchecked("http://example.com/s"),
NamedNode::new_unchecked("http://example.com/p"),
NamedNode::new_unchecked("http://example.com/o"),
GraphName::DefaultGraph
GraphName::DefaultGraph,
)))
.is_err());
.unwrap_err();
Ok(())
}
@ -361,8 +358,8 @@ fn test_bad_backup() -> Result<(), Box<dyn Error>> {
let store_dir = TempDir::default();
let backup_dir = TempDir::default();
create_dir(&backup_dir.0)?;
assert!(Store::open(&store_dir)?.backup(&backup_dir.0).is_err());
create_dir_all(&backup_dir.0)?;
Store::open(&store_dir)?.backup(&backup_dir.0).unwrap_err();
Ok(())
}
@ -371,7 +368,7 @@ fn test_bad_backup() -> Result<(), Box<dyn Error>> {
#[cfg(not(target_family = "wasm"))]
fn test_backup_on_in_memory() -> Result<(), Box<dyn Error>> {
let backup_dir = TempDir::default();
assert!(Store::new()?.backup(&backup_dir).is_err());
Store::new()?.backup(&backup_dir).unwrap_err();
Ok(())
}
@ -448,7 +445,7 @@ fn test_secondary() -> Result<(), Box<dyn Error>> {
#[cfg(not(target_family = "wasm"))]
fn test_open_secondary_bad_dir() -> Result<(), Box<dyn Error>> {
let primary_dir = TempDir::default();
create_dir(&primary_dir.0)?;
create_dir_all(&primary_dir.0)?;
{
File::create(primary_dir.0.join("CURRENT"))?.write_all(b"foo")?;
}
@ -510,7 +507,7 @@ fn test_read_only() -> Result<(), Box<dyn Error>> {
#[cfg(not(target_family = "wasm"))]
fn test_open_read_only_bad_dir() -> Result<(), Box<dyn Error>> {
let dir = TempDir::default();
create_dir(&dir.0)?;
create_dir_all(&dir.0)?;
{
File::create(dir.0.join("CURRENT"))?.write_all(b"foo")?;
}

@ -1,28 +1,32 @@
[package]
name = "oxrdf"
version = "0.1.7"
authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license = "MIT OR Apache-2.0"
version = "0.2.0-alpha.2"
authors.workspace = true
license.workspace = true
readme = "README.md"
keywords = ["RDF"]
repository = "https://github.com/oxigraph/oxigraph/tree/main/lib/oxrdf"
homepage = "https://oxigraph.org/"
description = """
A library providing basic data structures related to RDF
"""
documentation = "https://docs.rs/oxrdf"
edition = "2021"
rust-version = "1.60"
edition.workspace = true
rust-version.workspace = true
[features]
default = []
rdf-star = []
[dependencies]
rand = "0.8"
oxilangtag = "0.1"
oxiri = "0.2"
oxsdatatypes = { version = "0.1.3", path="../oxsdatatypes", optional = true }
oxilangtag.workspace = true
oxiri.workspace = true
oxsdatatypes = { workspace = true, optional = true }
rand.workspace = true
thiserror.workspace = true
[lints]
workspace = true
[package.metadata.docs.rs]
all-features = true
rustdoc-args = ["--cfg", "docsrs"]

@ -5,7 +5,7 @@ OxRDF
[![Released API docs](https://docs.rs/oxrdf/badge.svg)](https://docs.rs/oxrdf)
[![Crates.io downloads](https://img.shields.io/crates/d/oxrdf)](https://crates.io/crates/oxrdf)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community)
OxRDF is a simple library providing datastructures encoding [RDF 1.1 concepts](https://www.w3.org/TR/rdf11-concepts/).
@ -15,6 +15,8 @@ Support for [RDF-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html) i
OxRDF is inspired by [RDF/JS](https://rdf.js.org/data-model-spec/) and [Apache Commons RDF](http://commons.apache.org/proper/commons-rdf/).
Use [`oxrdfio`](https://crates.io/crates/oxrdfio) if you need to read or write RDF files.
Usage example:
```rust

@ -1,8 +1,6 @@
use rand::random;
use std::error::Error;
use std::fmt;
use std::io::Write;
use std::str;
use std::{fmt, str};
/// An owned RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node).
///
@ -15,10 +13,7 @@ use std::str;
/// ```
/// use oxrdf::BlankNode;
///
/// assert_eq!(
/// "_:a122",
/// BlankNode::new("a122")?.to_string()
/// );
/// assert_eq!("_:a122", BlankNode::new("a122")?.to_string());
/// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(())
/// ```
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
@ -111,7 +106,14 @@ impl Default for BlankNode {
/// Builds a new RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a unique id.
#[inline]
fn default() -> Self {
Self::new_from_unique_id(random::<u128>())
// We ensure the ID does not start with a number to be also valid with RDF/XML
loop {
let id = random();
let str = IdStr::new(id);
if matches!(str.as_str().as_bytes().first(), Some(b'a'..=b'f')) {
return Self(BlankNodeContent::Anonymous { id, str });
}
}
}
}
@ -126,10 +128,7 @@ impl Default for BlankNode {
/// ```
/// use oxrdf::BlankNodeRef;
///
/// assert_eq!(
/// "_:a122",
/// BlankNodeRef::new("a122")?.to_string()
/// );
/// assert_eq!("_:a122", BlankNodeRef::new("a122")?.to_string());
/// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(())
/// ```
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
@ -173,7 +172,7 @@ impl<'a> BlankNodeRef<'a> {
/// Returns the underlying ID of this blank node.
#[inline]
pub fn as_str(self) -> &'a str {
pub const fn as_str(self) -> &'a str {
match self.0 {
BlankNodeRefContent::Named(id) => id,
BlankNodeRefContent::Anonymous { str, .. } => str,
@ -185,12 +184,15 @@ impl<'a> BlankNodeRef<'a> {
/// ```
/// use oxrdf::BlankNode;
///
/// assert_eq!(BlankNode::new_from_unique_id(128).as_ref().unique_id(), Some(128));
/// assert_eq!(
/// BlankNode::new_from_unique_id(128).as_ref().unique_id(),
/// Some(128)
/// );
/// assert_eq!(BlankNode::new("foo")?.as_ref().unique_id(), None);
/// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(())
/// ```
#[inline]
pub fn unique_id(&self) -> Option<u128> {
pub const fn unique_id(&self) -> Option<u128> {
match self.0 {
BlankNodeRefContent::Named(_) => None,
BlankNodeRefContent::Anonymous { id, .. } => Some(id),
@ -264,7 +266,7 @@ impl IdStr {
fn validate_blank_node_identifier(id: &str) -> Result<(), BlankNodeIdParseError> {
let mut chars = id.chars();
let front = chars.next().ok_or(BlankNodeIdParseError {})?;
let front = chars.next().ok_or(BlankNodeIdParseError)?;
match front {
'0'..='9'
| '_'
@ -283,7 +285,7 @@ fn validate_blank_node_identifier(id: &str) -> Result<(), BlankNodeIdParseError>
| '\u{F900}'..='\u{FDCF}'
| '\u{FDF0}'..='\u{FFFD}'
| '\u{10000}'..='\u{EFFFF}' => (),
_ => return Err(BlankNodeIdParseError {}),
_ => return Err(BlankNodeIdParseError),
}
for c in chars {
match c {
@ -309,13 +311,13 @@ fn validate_blank_node_identifier(id: &str) -> Result<(), BlankNodeIdParseError>
| '\u{F900}'..='\u{FDCF}'
| '\u{FDF0}'..='\u{FFFD}'
| '\u{10000}'..='\u{EFFFF}' => (),
_ => return Err(BlankNodeIdParseError {}),
_ => return Err(BlankNodeIdParseError),
}
}
// Could not end with a dot
if id.ends_with('.') {
Err(BlankNodeIdParseError {})
Err(BlankNodeIdParseError)
} else {
Ok(())
}
@ -342,20 +344,14 @@ fn to_integer_id(id: &str) -> Option<u128> {
}
/// An error raised during [`BlankNode`] IDs validation.
#[derive(Debug)]
pub struct BlankNodeIdParseError {}
impl fmt::Display for BlankNodeIdParseError {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "The blank node identifier is invalid")
}
}
impl Error for BlankNodeIdParseError {}
#[derive(Debug, thiserror::Error)]
#[error("The blank node identifier is invalid")]
pub struct BlankNodeIdParseError;
#[cfg(test)]
mod tests {
#![allow(clippy::panic_in_result_fn)]
use super::*;
#[test]
@ -372,13 +368,13 @@ mod tests {
#[test]
fn new_validation() {
assert!(BlankNode::new("").is_err());
assert!(BlankNode::new("a").is_ok());
assert!(BlankNode::new("-").is_err());
assert!(BlankNode::new("a-").is_ok());
assert!(BlankNode::new(".").is_err());
assert!(BlankNode::new("a.").is_err());
assert!(BlankNode::new("a.a").is_ok());
BlankNode::new("").unwrap_err();
BlankNode::new("a").unwrap();
BlankNode::new("-").unwrap_err();
BlankNode::new("a-").unwrap();
BlankNode::new(".").unwrap_err();
BlankNode::new("a.").unwrap_err();
BlankNode::new("a.a").unwrap();
}
#[test]

@ -20,26 +20,29 @@
//! assert_eq!(vec![TripleRef::new(ex, ex, ex)], results);
//!
//! // Print
//! assert_eq!(dataset.to_string(), "<http://example.com> <http://example.com> <http://example.com> <http://example.com> .\n");
//! assert_eq!(
//! dataset.to_string(),
//! "<http://example.com> <http://example.com> <http://example.com> <http://example.com> .\n"
//! );
//! # Result::<_,Box<dyn std::error::Error>>::Ok(())
//! ```
//!
//! See also [`Graph`] if you only care about plain triples.
use crate::interning::*;
use crate::SubjectRef;
use crate::*;
use std::cmp::min;
use std::collections::hash_map::DefaultHasher;
use std::collections::BTreeSet;
use std::collections::{HashMap, HashSet};
use std::collections::{BTreeSet, HashMap, HashSet};
use std::fmt;
use std::hash::{Hash, Hasher};
/// An in-memory [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset).
///
/// It can accommodate a fairly large number of quads (in the few millions).
/// Beware: it interns the string and does not do any garbage collection yet:
/// if you insert and remove a lot of different terms, memory will grow without any reduction.
///
/// <div class="warning">It interns the strings and does not do any garbage collection yet:
/// if you insert and remove a lot of different terms, memory will grow without any reduction.</div>
///
/// Usage example:
/// ```
@ -61,7 +64,7 @@ use std::hash::{Hash, Hasher};
/// assert_eq!(vec![TripleRef::new(ex, ex, ex)], results);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[derive(Debug, Default)]
#[derive(Debug, Default, Clone)]
pub struct Dataset {
interner: Interner,
gspo: BTreeSet<(
@ -183,6 +186,7 @@ impl Dataset {
.map(move |q| self.decode_spog(q))
}
#[allow(clippy::map_identity)]
fn interned_quads_for_subject(
&self,
subject: &InternedSubject,
@ -293,6 +297,18 @@ impl Dataset {
.map(|(o, s, p, g)| (s, p, o, g))
}
pub fn quads_for_graph_name<'a, 'b>(
&'a self,
graph_name: impl Into<GraphNameRef<'b>>,
) -> impl Iterator<Item = QuadRef<'a>> + 'a {
let graph_name = self
.encoded_graph_name(graph_name)
.unwrap_or_else(InternedGraphName::impossible);
self.interned_quads_for_graph_name(&graph_name)
.map(move |q| self.decode_spog(q))
}
fn interned_quads_for_graph_name(
&self,
graph_name: &InternedGraphName,
@ -525,9 +541,12 @@ impl Dataset {
/// Warning 3: This implementation worst-case complexity is in *O(b!)* with *b* the number of blank nodes in the input dataset.
pub fn canonicalize(&mut self) {
let bnodes = self.blank_nodes();
let (hash, partition) =
self.hash_bnodes(bnodes.into_iter().map(|bnode| (bnode, 0)).collect());
let new_quads = self.distinguish(&hash, &partition);
let quads_per_blank_node = self.quads_per_blank_nodes();
let (hash, partition) = self.hash_bnodes(
bnodes.into_iter().map(|bnode| (bnode, 0)).collect(),
&quads_per_blank_node,
);
let new_quads = self.distinguish(&hash, &partition, &quads_per_blank_node);
self.clear();
for quad in new_quads {
self.insert_encoded(quad);
@ -572,107 +591,172 @@ impl Dataset {
}
}
fn quads_per_blank_nodes(&self) -> QuadsPerBlankNode {
let mut map: HashMap<_, Vec<_>> = HashMap::new();
for quad in &self.spog {
if let InternedSubject::BlankNode(bnode) = &quad.0 {
map.entry(*bnode).or_default().push(quad.clone());
}
#[cfg(feature = "rdf-star")]
if let InternedSubject::Triple(t) = &quad.0 {
Self::add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(quad, t, &mut map);
}
if let InternedTerm::BlankNode(bnode) = &quad.2 {
map.entry(*bnode).or_default().push(quad.clone());
}
#[cfg(feature = "rdf-star")]
if let InternedTerm::Triple(t) = &quad.2 {
Self::add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(quad, t, &mut map);
}
if let InternedGraphName::BlankNode(bnode) = &quad.3 {
map.entry(*bnode).or_default().push(quad.clone());
}
}
map
}
#[cfg(feature = "rdf-star")]
fn add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(
quad: &(
InternedSubject,
InternedNamedNode,
InternedTerm,
InternedGraphName,
),
triple: &InternedTriple,
map: &mut QuadsPerBlankNode,
) {
if let InternedSubject::BlankNode(bnode) = &triple.subject {
map.entry(*bnode).or_default().push(quad.clone());
}
if let InternedSubject::Triple(t) = &triple.subject {
Self::add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(quad, t, map);
}
if let InternedTerm::BlankNode(bnode) = &triple.object {
map.entry(*bnode).or_default().push(quad.clone());
}
if let InternedTerm::Triple(t) = &triple.object {
Self::add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(quad, t, map);
}
}
fn hash_bnodes(
&self,
mut hashes: HashMap<InternedBlankNode, u64>,
quads_per_blank_node: &QuadsPerBlankNode,
) -> (
HashMap<InternedBlankNode, u64>,
Vec<(u64, Vec<InternedBlankNode>)>,
) {
let mut to_hash = Vec::new();
let mut partition: HashMap<u64, Vec<InternedBlankNode>> = HashMap::new();
let mut partition_len = 0;
loop {
//TODO: improve termination
let mut new_hashes = HashMap::new();
for (bnode, old_hash) in &hashes {
for (_, p, o, g) in
self.interned_quads_for_subject(&InternedSubject::BlankNode(*bnode))
{
to_hash.push((
self.hash_named_node(*p),
self.hash_term(o, &hashes),
self.hash_graph_name(g, &hashes),
0,
));
}
for (s, p, _, g) in self.interned_quads_for_object(&InternedTerm::BlankNode(*bnode))
{
to_hash.push((
self.hash_subject(s, &hashes),
self.hash_named_node(*p),
self.hash_graph_name(g, &hashes),
1,
));
}
for (s, p, o, _) in
self.interned_quads_for_graph_name(&InternedGraphName::BlankNode(*bnode))
{
let mut to_do = hashes
.keys()
.map(|bnode| (*bnode, true))
.collect::<HashMap<_, _>>();
let mut partition = HashMap::<_, Vec<_>>::with_capacity(hashes.len());
let mut old_partition_count = usize::MAX;
while old_partition_count != partition.len() {
old_partition_count = partition.len();
partition.clear();
let mut new_hashes = hashes.clone();
for bnode in hashes.keys() {
let hash = if to_do.contains_key(bnode) {
for (s, p, o, g) in &quads_per_blank_node[bnode] {
to_hash.push((
self.hash_subject(s, &hashes),
self.hash_subject(s, *bnode, &hashes),
self.hash_named_node(*p),
self.hash_term(o, &hashes),
2,
self.hash_term(o, *bnode, &hashes),
self.hash_graph_name(g, *bnode, &hashes),
));
}
to_hash.sort_unstable();
let hash = Self::hash_tuple((old_hash, &to_hash));
let hash = Self::hash_tuple((&to_hash, hashes[bnode]));
to_hash.clear();
if hash == hashes[bnode] {
to_do.insert(*bnode, false);
} else {
new_hashes.insert(*bnode, hash);
partition.entry(hash).or_default().push(*bnode);
}
if partition.len() == partition_len {
let mut partition: Vec<_> = partition.into_iter().collect();
partition.sort_by(|(h1, b1), (h2, b2)| (b1.len(), h1).cmp(&(b2.len(), h2)));
return (hashes, partition);
hash
} else {
hashes[bnode]
};
partition.entry(hash).or_default().push(*bnode);
}
hashes = new_hashes;
partition_len = partition.len();
partition.clear();
}
let mut partition: Vec<_> = partition.into_iter().collect();
partition.sort_unstable_by(|(h1, b1), (h2, b2)| (b1.len(), h1).cmp(&(b2.len(), h2)));
(hashes, partition)
}
fn hash_named_node(&self, node: InternedNamedNode) -> u64 {
Self::hash_tuple(node.decode_from(&self.interner))
}
fn hash_blank_node(
node: InternedBlankNode,
current_blank_node: InternedBlankNode,
bnodes_hash: &HashMap<InternedBlankNode, u64>,
) -> u64 {
if node == current_blank_node {
u64::MAX
} else {
bnodes_hash[&node]
}
}
fn hash_subject(
&self,
node: &InternedSubject,
current_blank_node: InternedBlankNode,
bnodes_hash: &HashMap<InternedBlankNode, u64>,
) -> u64 {
match node {
InternedSubject::NamedNode(node) => Self::hash_tuple(node.decode_from(&self.interner)),
InternedSubject::BlankNode(bnode) => {
Self::hash_blank_node(*bnode, current_blank_node, bnodes_hash)
}
#[cfg(feature = "rdf-star")]
if let InternedSubject::Triple(triple) = node {
return self.hash_triple(triple, bnodes_hash);
InternedSubject::Triple(triple) => {
self.hash_triple(triple, current_blank_node, bnodes_hash)
}
if let InternedSubject::BlankNode(bnode) = node {
bnodes_hash[bnode]
} else {
Self::hash_tuple(node.decode_from(&self.interner))
}
}
fn hash_term(&self, term: &InternedTerm, bnodes_hash: &HashMap<InternedBlankNode, u64>) -> u64 {
fn hash_term(
&self,
term: &InternedTerm,
current_blank_node: InternedBlankNode,
bnodes_hash: &HashMap<InternedBlankNode, u64>,
) -> u64 {
match term {
InternedTerm::NamedNode(node) => Self::hash_tuple(node.decode_from(&self.interner)),
InternedTerm::BlankNode(bnode) => {
Self::hash_blank_node(*bnode, current_blank_node, bnodes_hash)
}
InternedTerm::Literal(literal) => Self::hash_tuple(literal.decode_from(&self.interner)),
#[cfg(feature = "rdf-star")]
if let InternedTerm::Triple(triple) = term {
return self.hash_triple(triple, bnodes_hash);
InternedTerm::Triple(triple) => {
self.hash_triple(triple, current_blank_node, bnodes_hash)
}
if let InternedTerm::BlankNode(bnode) = term {
bnodes_hash[bnode]
} else {
Self::hash_tuple(term.decode_from(&self.interner))
}
}
fn hash_graph_name(
&self,
graph_name: &InternedGraphName,
current_blank_node: InternedBlankNode,
bnodes_hash: &HashMap<InternedBlankNode, u64>,
) -> u64 {
if let InternedGraphName::BlankNode(bnode) = graph_name {
bnodes_hash[bnode]
} else {
Self::hash_tuple(graph_name.decode_from(&self.interner))
match graph_name {
InternedGraphName::NamedNode(node) => {
Self::hash_tuple(node.decode_from(&self.interner))
}
InternedGraphName::BlankNode(bnode) => {
Self::hash_blank_node(*bnode, current_blank_node, bnodes_hash)
}
InternedGraphName::DefaultGraph => 0,
}
}
@ -680,12 +764,13 @@ impl Dataset {
fn hash_triple(
&self,
triple: &InternedTriple,
current_blank_node: InternedBlankNode,
bnodes_hash: &HashMap<InternedBlankNode, u64>,
) -> u64 {
Self::hash_tuple((
self.hash_subject(&triple.subject, bnodes_hash),
self.hash_subject(&triple.subject, current_blank_node, bnodes_hash),
self.hash_named_node(triple.predicate),
self.hash_term(&triple.object, bnodes_hash),
self.hash_term(&triple.object, current_blank_node, bnodes_hash),
))
}
@ -699,33 +784,25 @@ impl Dataset {
&mut self,
hash: &HashMap<InternedBlankNode, u64>,
partition: &[(u64, Vec<InternedBlankNode>)],
quads_per_blank_node: &QuadsPerBlankNode,
) -> Vec<(
InternedSubject,
InternedNamedNode,
InternedTerm,
InternedGraphName,
)> {
let b_prime = partition.iter().find_map(|(_, b)| (b.len() > 1).then(|| b));
let b_prime = partition.iter().map(|(_, b)| b).find(|b| b.len() > 1);
if let Some(b_prime) = b_prime {
b_prime
.iter()
.map(|b| {
let mut hash_prime = hash.clone();
hash_prime.insert(*b, Self::hash_tuple((hash_prime[b], 22)));
let (hash_prime_prime, partition_prime) = self.hash_bnodes(hash_prime);
self.distinguish(&hash_prime_prime, &partition_prime)
})
.fold(None, |a, b| {
Some(if let Some(a) = a {
if a <= b {
a
} else {
b
}
} else {
b
})
let (hash_prime_prime, partition_prime) =
self.hash_bnodes(hash_prime, quads_per_blank_node);
self.distinguish(&hash_prime_prime, &partition_prime, quads_per_blank_node)
})
.reduce(min)
.unwrap_or_default()
} else {
self.label(hash)
@ -747,54 +824,43 @@ impl Dataset {
.into_iter()
.map(|(s, p, o, g)| {
(
if let InternedSubject::BlankNode(bnode) = s {
match s {
InternedSubject::NamedNode(_) => s,
InternedSubject::BlankNode(bnode) => {
InternedSubject::BlankNode(self.map_bnode(bnode, hashes))
} else {
}
#[cfg(feature = "rdf-star")]
{
if let InternedSubject::Triple(triple) = s {
InternedSubject::Triple(triple) => {
InternedSubject::Triple(Box::new(InternedTriple::encoded_into(
self.label_triple(&triple, hashes).as_ref(),
&mut self.interner,
)))
} else {
s
}
}
#[cfg(not(feature = "rdf-star"))]
{
s
}
},
p,
if let InternedTerm::BlankNode(bnode) = o {
match o {
InternedTerm::NamedNode(_) | InternedTerm::Literal(_) => o,
InternedTerm::BlankNode(bnode) => {
InternedTerm::BlankNode(self.map_bnode(bnode, hashes))
} else {
}
#[cfg(feature = "rdf-star")]
{
if let InternedTerm::Triple(triple) = o {
InternedTerm::Triple(triple) => {
InternedTerm::Triple(Box::new(InternedTriple::encoded_into(
self.label_triple(&triple, hashes).as_ref(),
&mut self.interner,
)))
} else {
o
}
}
#[cfg(not(feature = "rdf-star"))]
{
o
}
},
if let InternedGraphName::BlankNode(bnode) = g {
match g {
InternedGraphName::NamedNode(_) | InternedGraphName::DefaultGraph => g,
InternedGraphName::BlankNode(bnode) => {
InternedGraphName::BlankNode(self.map_bnode(bnode, hashes))
} else {
g
}
},
)
})
.collect();
quads.sort();
quads.sort_unstable();
quads
}
@ -862,7 +928,7 @@ impl<'a> IntoIterator for &'a Dataset {
type Item = QuadRef<'a>;
type IntoIter = Iter<'a>;
fn into_iter(self) -> Iter<'a> {
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
@ -1220,7 +1286,7 @@ impl<'a> IntoIterator for GraphView<'a> {
type Item = TripleRef<'a>;
type IntoIter = GraphViewIter<'a>;
fn into_iter(self) -> GraphViewIter<'a> {
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
@ -1229,7 +1295,7 @@ impl<'a, 'b> IntoIterator for &'b GraphView<'a> {
type Item = TripleRef<'a>;
type IntoIter = GraphViewIter<'a>;
fn into_iter(self) -> GraphViewIter<'a> {
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
@ -1431,7 +1497,7 @@ impl<'a> IntoIterator for &'a GraphViewMut<'a> {
type Item = TripleRef<'a>;
type IntoIter = GraphViewIter<'a>;
fn into_iter(self) -> GraphViewIter<'a> {
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
@ -1462,7 +1528,7 @@ pub struct Iter<'a> {
impl<'a> Iterator for Iter<'a> {
type Item = QuadRef<'a>;
fn next(&mut self) -> Option<QuadRef<'a>> {
fn next(&mut self) -> Option<Self::Item> {
self.inner
.next()
.map(|(s, p, o, g)| self.dataset.decode_spog((s, p, o, g)))
@ -1486,9 +1552,57 @@ pub struct GraphViewIter<'a> {
impl<'a> Iterator for GraphViewIter<'a> {
type Item = TripleRef<'a>;
fn next(&mut self) -> Option<TripleRef<'a>> {
fn next(&mut self) -> Option<Self::Item> {
self.inner
.next()
.map(|(_, s, p, o)| self.dataset.decode_spo((s, p, o)))
}
}
type QuadsPerBlankNode = HashMap<
InternedBlankNode,
Vec<(
InternedSubject,
InternedNamedNode,
InternedTerm,
InternedGraphName,
)>,
>;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_canon() {
let mut dataset = Dataset::new();
dataset.insert(QuadRef::new(
BlankNode::default().as_ref(),
NamedNodeRef::new_unchecked("http://ex"),
BlankNode::default().as_ref(),
GraphNameRef::DefaultGraph,
));
dataset.insert(QuadRef::new(
BlankNode::default().as_ref(),
NamedNodeRef::new_unchecked("http://ex"),
BlankNode::default().as_ref(),
GraphNameRef::DefaultGraph,
));
dataset.canonicalize();
let mut dataset2 = Dataset::new();
dataset2.insert(QuadRef::new(
BlankNode::default().as_ref(),
NamedNodeRef::new_unchecked("http://ex"),
BlankNode::default().as_ref(),
GraphNameRef::DefaultGraph,
));
dataset2.insert(QuadRef::new(
BlankNode::default().as_ref(),
NamedNodeRef::new_unchecked("http://ex"),
BlankNode::default().as_ref(),
GraphNameRef::DefaultGraph,
));
dataset2.canonicalize();
assert_eq!(dataset, dataset2);
}
}

@ -16,7 +16,10 @@
//! assert_eq!(vec![triple], results);
//!
//! // Print
//! assert_eq!(graph.to_string(), "<http://example.com> <http://example.com> <http://example.com> .\n");
//! assert_eq!(
//! graph.to_string(),
//! "<http://example.com> <http://example.com> <http://example.com> .\n"
//! );
//! # Result::<_,Box<dyn std::error::Error>>::Ok(())
//! ```
//!
@ -29,8 +32,9 @@ use std::fmt;
/// An in-memory [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph).
///
/// It can accommodate a fairly large number of triples (in the few millions).
/// Beware: it interns the string and does not do any garbage collection yet:
/// if you insert and remove a lot of different terms, memory will grow without any reduction.
///
/// <div class="warning">It interns the string and does not do any garbage collection yet:
/// if you insert and remove a lot of different terms, memory will grow without any reduction.</div>
///
/// Usage example:
/// ```
@ -48,7 +52,7 @@ use std::fmt;
/// assert_eq!(vec![triple], results);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[derive(Debug, Default)]
#[derive(Debug, Default, Clone)]
pub struct Graph {
dataset: Dataset,
}
@ -228,7 +232,7 @@ impl<'a> IntoIterator for &'a Graph {
type Item = TripleRef<'a>;
type IntoIter = Iter<'a>;
fn into_iter(self) -> Iter<'a> {
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
@ -275,7 +279,7 @@ pub struct Iter<'a> {
impl<'a> Iterator for Iter<'a> {
type Item = TripleRef<'a>;
fn next(&mut self) -> Option<TripleRef<'a>> {
fn next(&mut self) -> Option<Self::Item> {
self.inner.next()
}
}

@ -4,10 +4,11 @@ use crate::*;
use std::collections::hash_map::{Entry, HashMap, RandomState};
use std::hash::{BuildHasher, Hasher};
#[derive(Debug, Default)]
#[derive(Debug, Default, Clone)]
pub struct Interner {
hasher: RandomState,
string_for_hash: HashMap<u64, String, IdentityHasherBuilder>,
string_for_blank_node_id: HashMap<u128, String>,
#[cfg(feature = "rdf-star")]
triples: HashMap<InternedTriple, Triple>,
}
@ -100,7 +101,7 @@ impl InternedNamedNode {
})
}
pub fn decode_from(self, interner: &Interner) -> NamedNodeRef {
pub fn decode_from(self, interner: &Interner) -> NamedNodeRef<'_> {
NamedNodeRef::new_unchecked(interner.resolve(self.id))
}
@ -120,29 +121,53 @@ impl InternedNamedNode {
}
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
pub struct InternedBlankNode {
id: Key,
pub enum InternedBlankNode {
Number { id: u128 },
Other { id: Key },
}
impl InternedBlankNode {
pub fn encoded_into(blank_node: BlankNodeRef<'_>, interner: &mut Interner) -> Self {
Self {
if let Some(id) = blank_node.unique_id() {
interner
.string_for_blank_node_id
.entry(id)
.or_insert_with(|| blank_node.as_str().into());
Self::Number { id }
} else {
Self::Other {
id: interner.get_or_intern(blank_node.as_str()),
}
}
}
pub fn encoded_from(blank_node: BlankNodeRef<'_>, interner: &Interner) -> Option<Self> {
Some(Self {
if let Some(id) = blank_node.unique_id() {
interner
.string_for_blank_node_id
.contains_key(&id)
.then_some(Self::Number { id })
} else {
Some(Self::Other {
id: interner.get(blank_node.as_str())?,
})
}
}
pub fn decode_from(self, interner: &Interner) -> BlankNodeRef {
BlankNodeRef::new_unchecked(interner.resolve(self.id))
pub fn decode_from(self, interner: &Interner) -> BlankNodeRef<'_> {
BlankNodeRef::new_unchecked(match self {
Self::Number { id } => &interner.string_for_blank_node_id[&id],
Self::Other { id } => interner.resolve(id),
})
}
pub fn next(self) -> Self {
Self { id: self.id.next() }
match self {
Self::Number { id } => Self::Number {
id: id.saturating_add(1),
},
Self::Other { id } => Self::Other { id: id.next() },
}
}
}
@ -467,7 +492,7 @@ impl InternedTriple {
interner
.triples
.contains_key(&interned_triple)
.then(|| interned_triple)
.then_some(interned_triple)
}
pub fn next(&self) -> Self {
@ -479,14 +504,14 @@ impl InternedTriple {
}
}
#[derive(Default)]
struct IdentityHasherBuilder {}
#[derive(Default, Clone)]
struct IdentityHasherBuilder;
impl BuildHasher for IdentityHasherBuilder {
type Hasher = IdentityHasher;
fn build_hasher(&self) -> IdentityHasher {
IdentityHasher::default()
fn build_hasher(&self) -> Self::Hasher {
Self::Hasher::default()
}
}

@ -1,6 +1,5 @@
use crate::named_node::NamedNode;
use crate::vocab::rdf;
use crate::vocab::xsd;
use crate::vocab::{rdf, xsd};
use crate::NamedNodeRef;
use oxilangtag::{LanguageTag, LanguageTagParseError};
#[cfg(feature = "oxsdatatypes")]
@ -15,8 +14,8 @@ use std::option::Option;
/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation:
/// ```
/// # use oxilangtag::LanguageTagParseError;
/// use oxrdf::Literal;
/// use oxrdf::vocab::xsd;
/// use oxrdf::Literal;
///
/// assert_eq!(
/// "\"foo\\nbar\"",
@ -24,12 +23,12 @@ use std::option::Option;
/// );
///
/// assert_eq!(
/// "\"1999-01-01\"^^<http://www.w3.org/2001/XMLSchema#date>",
/// r#""1999-01-01"^^<http://www.w3.org/2001/XMLSchema#date>"#,
/// Literal::new_typed_literal("1999-01-01", xsd::DATE).to_string()
/// );
///
/// assert_eq!(
/// "\"foo\"@en",
/// r#""foo"@en"#,
/// Literal::new_language_tagged_literal("foo", "en")?.to_string()
/// );
/// # Result::<(), LanguageTagParseError>::Ok(())
@ -427,8 +426,8 @@ impl From<DayTimeDuration> for Literal {
///
/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation:
/// ```
/// use oxrdf::LiteralRef;
/// use oxrdf::vocab::xsd;
/// use oxrdf::LiteralRef;
///
/// assert_eq!(
/// "\"foo\\nbar\"",
@ -436,7 +435,7 @@ impl From<DayTimeDuration> for Literal {
/// );
///
/// assert_eq!(
/// "\"1999-01-01\"^^<http://www.w3.org/2001/XMLSchema#date>",
/// r#""1999-01-01"^^<http://www.w3.org/2001/XMLSchema#date>"#,
/// LiteralRef::new_typed_literal("1999-01-01", xsd::DATE).to_string()
/// );
/// ```
@ -459,7 +458,7 @@ enum LiteralRefContent<'a> {
impl<'a> LiteralRef<'a> {
/// Builds an RDF [simple literal](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal).
#[inline]
pub fn new_simple_literal(value: &'a str) -> Self {
pub const fn new_simple_literal(value: &'a str) -> Self {
LiteralRef(LiteralRefContent::String(value))
}
@ -482,13 +481,13 @@ impl<'a> LiteralRef<'a> {
///
/// [`Literal::new_language_tagged_literal()`] is a safe version of this constructor and should be used for untrusted data.
#[inline]
pub fn new_language_tagged_literal_unchecked(value: &'a str, language: &'a str) -> Self {
pub const fn new_language_tagged_literal_unchecked(value: &'a str, language: &'a str) -> Self {
LiteralRef(LiteralRefContent::LanguageTaggedString { value, language })
}
/// The literal [lexical form](https://www.w3.org/TR/rdf11-concepts/#dfn-lexical-form)
#[inline]
pub fn value(self) -> &'a str {
pub const fn value(self) -> &'a str {
match self.0 {
LiteralRefContent::String(value)
| LiteralRefContent::LanguageTaggedString { value, .. }
@ -501,7 +500,7 @@ impl<'a> LiteralRef<'a> {
/// Language tags are defined by the [BCP47](https://tools.ietf.org/html/bcp47).
/// They are normalized to lowercase by this implementation.
#[inline]
pub fn language(self) -> Option<&'a str> {
pub const fn language(self) -> Option<&'a str> {
match self.0 {
LiteralRefContent::LanguageTaggedString { language, .. } => Some(language),
_ => None,
@ -513,7 +512,7 @@ impl<'a> LiteralRef<'a> {
/// The datatype of [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) is always [rdf:langString](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string).
/// The datatype of [simple literals](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal) is [xsd:string](https://www.w3.org/TR/xmlschema11-2/#string).
#[inline]
pub fn datatype(self) -> NamedNodeRef<'a> {
pub const fn datatype(self) -> NamedNodeRef<'a> {
match self.0 {
LiteralRefContent::String(_) => xsd::STRING,
LiteralRefContent::LanguageTaggedString { .. } => rdf::LANG_STRING,
@ -526,7 +525,7 @@ impl<'a> LiteralRef<'a> {
/// It returns true if the literal is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string)
/// or has the datatype [xsd:string](https://www.w3.org/TR/xmlschema11-2/#string).
#[inline]
pub fn is_plain(self) -> bool {
pub const fn is_plain(self) -> bool {
matches!(
self.0,
LiteralRefContent::String(_) | LiteralRefContent::LanguageTaggedString { .. }
@ -552,7 +551,7 @@ impl<'a> LiteralRef<'a> {
/// Extract components from this literal
#[inline]
pub fn destruct(self) -> (&'a str, Option<NamedNodeRef<'a>>, Option<&'a str>) {
pub const fn destruct(self) -> (&'a str, Option<NamedNodeRef<'a>>, Option<&'a str>) {
match self.0 {
LiteralRefContent::String(s) => (s, None, None),
LiteralRefContent::LanguageTaggedString { value, language } => {
@ -620,11 +619,15 @@ pub fn print_quoted_str(string: &str, f: &mut impl Write) -> fmt::Result {
f.write_char('"')?;
for c in string.chars() {
match c {
'\u{08}' => f.write_str("\\b"),
'\t' => f.write_str("\\t"),
'\n' => f.write_str("\\n"),
'\u{0C}' => f.write_str("\\f"),
'\r' => f.write_str("\\r"),
'"' => f.write_str("\\\""),
'\\' => f.write_str("\\\\"),
c => f.write_char(c),
'\0'..='\u{1F}' | '\u{7F}' => write!(f, "\\u{:04X}", u32::from(c)),
_ => f.write_char(c),
}?;
}
f.write_char('"')
@ -632,6 +635,8 @@ pub fn print_quoted_str(string: &str, f: &mut impl Write) -> fmt::Result {
#[cfg(test)]
mod tests {
#![allow(clippy::panic_in_result_fn)]
use super::*;
#[test]

@ -216,3 +216,21 @@ impl PartialOrd<NamedNodeRef<'_>> for NamedNode {
self.as_ref().partial_cmp(other)
}
}
impl From<Iri<String>> for NamedNode {
#[inline]
fn from(iri: Iri<String>) -> Self {
Self {
iri: iri.into_inner(),
}
}
}
impl<'a> From<Iri<&'a str>> for NamedNodeRef<'a> {
#[inline]
fn from(iri: Iri<&'a str>) -> Self {
Self {
iri: iri.into_inner(),
}
}
}

@ -6,8 +6,6 @@ use crate::{
#[cfg(feature = "rdf-star")]
use crate::{Subject, Triple};
use std::char;
use std::error::Error;
use std::fmt;
use std::str::{Chars, FromStr};
/// This limit is set in order to avoid stack overflow error when parsing nested triples due to too many recursive calls.
@ -23,12 +21,15 @@ impl FromStr for NamedNode {
/// use oxrdf::NamedNode;
/// use std::str::FromStr;
///
/// assert_eq!(NamedNode::from_str("<http://example.com>").unwrap(), NamedNode::new("http://example.com").unwrap())
/// assert_eq!(
/// NamedNode::from_str("<http://example.com>").unwrap(),
/// NamedNode::new("http://example.com").unwrap()
/// )
/// ```
fn from_str(s: &str) -> Result<Self, TermParseError> {
fn from_str(s: &str) -> Result<Self, Self::Err> {
let (term, left) = read_named_node(s)?;
if !left.is_empty() {
return Err(TermParseError::msg(
return Err(Self::Err::msg(
"Named node serialization should end with a >",
));
}
@ -45,12 +46,15 @@ impl FromStr for BlankNode {
/// use oxrdf::BlankNode;
/// use std::str::FromStr;
///
/// assert_eq!(BlankNode::from_str("_:ex").unwrap(), BlankNode::new("ex").unwrap())
/// assert_eq!(
/// BlankNode::from_str("_:ex").unwrap(),
/// BlankNode::new("ex").unwrap()
/// )
/// ```
fn from_str(s: &str) -> Result<Self, TermParseError> {
fn from_str(s: &str) -> Result<Self, Self::Err> {
let (term, left) = read_blank_node(s)?;
if !left.is_empty() {
return Err(TermParseError::msg(
return Err(Self::Err::msg(
"Blank node serialization should not contain whitespaces",
));
}
@ -64,21 +68,46 @@ impl FromStr for Literal {
/// Parses a literal from its NTriples or Turtle serialization
///
/// ```
/// use oxrdf::{Literal, NamedNode, vocab::xsd};
/// use oxrdf::vocab::xsd;
/// use oxrdf::{Literal, NamedNode};
/// use std::str::FromStr;
///
/// assert_eq!(Literal::from_str("\"ex\\n\"").unwrap(), Literal::new_simple_literal("ex\n"));
/// assert_eq!(Literal::from_str("\"ex\"@en").unwrap(), Literal::new_language_tagged_literal("ex", "en").unwrap());
/// assert_eq!(Literal::from_str("\"2020\"^^<http://www.w3.org/2001/XMLSchema#gYear>").unwrap(), Literal::new_typed_literal("2020", NamedNode::new("http://www.w3.org/2001/XMLSchema#gYear").unwrap()));
/// assert_eq!(Literal::from_str("true").unwrap(), Literal::new_typed_literal("true", xsd::BOOLEAN));
/// assert_eq!(Literal::from_str("+122").unwrap(), Literal::new_typed_literal("+122", xsd::INTEGER));
/// assert_eq!(Literal::from_str("-122.23").unwrap(), Literal::new_typed_literal("-122.23", xsd::DECIMAL));
/// assert_eq!(Literal::from_str("-122e+1").unwrap(), Literal::new_typed_literal("-122e+1", xsd::DOUBLE));
/// assert_eq!(
/// Literal::from_str("\"ex\\n\"").unwrap(),
/// Literal::new_simple_literal("ex\n")
/// );
/// assert_eq!(
/// Literal::from_str("\"ex\"@en").unwrap(),
/// Literal::new_language_tagged_literal("ex", "en").unwrap()
/// );
/// assert_eq!(
/// Literal::from_str("\"2020\"^^<http://www.w3.org/2001/XMLSchema#gYear>").unwrap(),
/// Literal::new_typed_literal(
/// "2020",
/// NamedNode::new("http://www.w3.org/2001/XMLSchema#gYear").unwrap()
/// )
/// );
/// assert_eq!(
/// Literal::from_str("true").unwrap(),
/// Literal::new_typed_literal("true", xsd::BOOLEAN)
/// );
/// assert_eq!(
/// Literal::from_str("+122").unwrap(),
/// Literal::new_typed_literal("+122", xsd::INTEGER)
/// );
/// assert_eq!(
/// Literal::from_str("-122.23").unwrap(),
/// Literal::new_typed_literal("-122.23", xsd::DECIMAL)
/// );
/// assert_eq!(
/// Literal::from_str("-122e+1").unwrap(),
/// Literal::new_typed_literal("-122e+1", xsd::DOUBLE)
/// );
/// ```
fn from_str(s: &str) -> Result<Self, TermParseError> {
fn from_str(s: &str) -> Result<Self, Self::Err> {
let (term, left) = read_literal(s)?;
if !left.is_empty() {
return Err(TermParseError::msg("Invalid literal serialization"));
return Err(Self::Err::msg("Invalid literal serialization"));
}
Ok(term)
}
@ -93,17 +122,24 @@ impl FromStr for Term {
/// use oxrdf::*;
/// use std::str::FromStr;
///
/// assert_eq!(Term::from_str("\"ex\"").unwrap(), Literal::new_simple_literal("ex").into());
/// assert_eq!(Term::from_str("<< _:s <http://example.com/p> \"o\" >>").unwrap(), Triple::new(
/// assert_eq!(
/// Term::from_str("\"ex\"").unwrap(),
/// Literal::new_simple_literal("ex").into()
/// );
/// assert_eq!(
/// Term::from_str("<< _:s <http://example.com/p> \"o\" >>").unwrap(),
/// Triple::new(
/// BlankNode::new("s").unwrap(),
/// NamedNode::new("http://example.com/p").unwrap(),
/// Literal::new_simple_literal("o")
/// ).into());
/// )
/// .into()
/// );
/// ```
fn from_str(s: &str) -> Result<Self, TermParseError> {
fn from_str(s: &str) -> Result<Self, Self::Err> {
let (term, left) = read_term(s, 0)?;
if !left.is_empty() {
return Err(TermParseError::msg("Invalid term serialization"));
return Err(Self::Err::msg("Invalid term serialization"));
}
Ok(term)
}
@ -118,19 +154,22 @@ impl FromStr for Variable {
/// use oxrdf::Variable;
/// use std::str::FromStr;
///
/// assert_eq!(Variable::from_str("$foo").unwrap(), Variable::new("foo").unwrap())
/// assert_eq!(
/// Variable::from_str("$foo").unwrap(),
/// Variable::new("foo").unwrap()
/// )
/// ```
fn from_str(s: &str) -> Result<Self, TermParseError> {
fn from_str(s: &str) -> Result<Self, Self::Err> {
if !s.starts_with('?') && !s.starts_with('$') {
return Err(TermParseError::msg(
return Err(Self::Err::msg(
"Variable serialization should start with ? or $",
));
}
Self::new(&s[1..]).map_err(|error| TermParseError {
kind: TermParseErrorKind::Variable {
Self::new(&s[1..]).map_err(|error| {
TermParseError(TermParseErrorKind::Variable {
value: s.to_owned(),
error,
},
})
})
}
}
@ -143,11 +182,11 @@ fn read_named_node(s: &str) -> Result<(NamedNode, &str), TermParseError> {
.ok_or_else(|| TermParseError::msg("Named node serialization should end with a >"))?;
let (value, remain) = remain.split_at(end);
let remain = &remain[1..];
let term = NamedNode::new(value).map_err(|error| TermParseError {
kind: TermParseErrorKind::Iri {
let term = NamedNode::new(value).map_err(|error| {
TermParseError(TermParseErrorKind::Iri {
value: value.to_owned(),
error,
},
})
})?;
Ok((term, remain))
} else {
@ -167,11 +206,11 @@ fn read_blank_node(s: &str) -> Result<(BlankNode, &str), TermParseError> {
})
.unwrap_or(remain.len());
let (value, remain) = remain.split_at(end);
let term = BlankNode::new(value).map_err(|error| TermParseError {
kind: TermParseErrorKind::BlankNode {
let term = BlankNode::new(value).map_err(|error| {
TermParseError(TermParseErrorKind::BlankNode {
value: value.to_owned(),
error,
},
})
})?;
Ok((term, remain))
} else {
@ -197,11 +236,11 @@ fn read_literal(s: &str) -> Result<(Literal, &str), TermParseError> {
let (language, remain) = remain.split_at(end);
Ok((
Literal::new_language_tagged_literal(value, language).map_err(
|error| TermParseError {
kind: TermParseErrorKind::LanguageTag {
|error| {
TermParseError(TermParseErrorKind::LanguageTag {
value: language.to_owned(),
error,
},
})
},
)?,
remain,
@ -217,10 +256,10 @@ fn read_literal(s: &str) -> Result<(Literal, &str), TermParseError> {
if let Some(c) = chars.next() {
value.push(match c {
't' => '\t',
'b' => '\u{8}',
'b' => '\u{08}',
'n' => '\n',
'r' => '\r',
'f' => '\u{C}',
'f' => '\u{0C}',
'"' => '"',
'\'' => '\'',
'\\' => '\\',
@ -232,7 +271,7 @@ fn read_literal(s: &str) -> Result<(Literal, &str), TermParseError> {
return Err(TermParseError::msg("Unexpected literal end"));
}
}
c => value.push(c),
_ => value.push(c),
}
}
Err(TermParseError::msg("Unexpected literal end"))
@ -381,61 +420,36 @@ fn read_hexa_char(input: &mut Chars<'_>, len: usize) -> Result<char, TermParseEr
}
/// An error raised during term serialization parsing using the [`FromStr`] trait.
#[derive(Debug)]
pub struct TermParseError {
kind: TermParseErrorKind,
}
#[derive(Debug, thiserror::Error)]
#[error(transparent)]
pub struct TermParseError(#[from] TermParseErrorKind);
#[derive(Debug)]
/// An internal error raised during term serialization parsing using the [`FromStr`] trait.
#[derive(Debug, thiserror::Error)]
enum TermParseErrorKind {
Iri {
error: IriParseError,
value: String,
},
#[error("Error while parsing the named node '{value}': {error}")]
Iri { error: IriParseError, value: String },
#[error("Error while parsing the blank node '{value}': {error}")]
BlankNode {
error: BlankNodeIdParseError,
value: String,
},
#[error("Error while parsing the language tag '{value}': {error}")]
LanguageTag {
error: LanguageTagParseError,
value: String,
},
#[error("Error while parsing the variable '{value}': {error}")]
Variable {
error: VariableNameParseError,
value: String,
},
Msg {
msg: &'static str,
},
}
impl fmt::Display for TermParseError {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match &self.kind {
TermParseErrorKind::Iri { error, value } => {
write!(f, "Error while parsing the named node '{value}': {error}")
}
TermParseErrorKind::BlankNode { error, value } => {
write!(f, "Error while parsing the blank node '{value}': {error}")
}
TermParseErrorKind::LanguageTag { error, value } => {
write!(f, "Error while parsing the language tag '{value}': {error}")
#[error("{0}")]
Msg(&'static str),
}
TermParseErrorKind::Variable { error, value } => {
write!(f, "Error while parsing the variable '{value}': {error}")
}
TermParseErrorKind::Msg { msg } => f.write_str(msg),
}
}
}
impl Error for TermParseError {}
impl TermParseError {
pub(crate) fn msg(msg: &'static str) -> Self {
Self {
kind: TermParseErrorKind::Msg { msg },
}
Self(TermParseErrorKind::Msg(msg))
}
}

@ -698,7 +698,7 @@ impl<'a> From<TermRef<'a>> for Term {
///
/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation:
/// ```
/// use oxrdf::{Triple, NamedNode};
/// use oxrdf::{NamedNode, Triple};
///
/// assert_eq!(
/// "<http://example.com/s> <http://example.com/p> <http://example.com/o>",
@ -706,7 +706,8 @@ impl<'a> From<TermRef<'a>> for Term {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// }.to_string()
/// }
/// .to_string()
/// );
/// # Result::<_,oxrdf::IriParseError>::Ok(())
/// ```
@ -769,7 +770,7 @@ impl fmt::Display for Triple {
///
/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation:
/// ```
/// use oxrdf::{TripleRef, NamedNodeRef};
/// use oxrdf::{NamedNodeRef, TripleRef};
///
/// assert_eq!(
/// "<http://example.com/s> <http://example.com/p> <http://example.com/o>",
@ -777,7 +778,8 @@ impl fmt::Display for Triple {
/// subject: NamedNodeRef::new("http://example.com/s")?.into(),
/// predicate: NamedNodeRef::new("http://example.com/p")?,
/// object: NamedNodeRef::new("http://example.com/o")?.into(),
/// }.to_string()
/// }
/// .to_string()
/// );
/// # Result::<_,oxrdf::IriParseError>::Ok(())
/// ```
@ -853,10 +855,11 @@ impl<'a> From<TripleRef<'a>> for Triple {
/// A possible owned graph name.
/// It is the union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node), and the [default graph name](https://www.w3.org/TR/rdf11-concepts/#dfn-default-graph).
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
#[derive(Eq, PartialEq, Debug, Clone, Hash, Default)]
pub enum GraphName {
NamedNode(NamedNode),
BlankNode(BlankNode),
#[default]
DefaultGraph,
}
@ -940,10 +943,11 @@ impl From<NamedOrBlankNodeRef<'_>> for GraphName {
/// A possible borrowed graph name.
/// It is the union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node), and the [default graph name](https://www.w3.org/TR/rdf11-concepts/#dfn-default-graph).
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash, Default)]
pub enum GraphNameRef<'a> {
NamedNode(NamedNodeRef<'a>),
BlankNode(BlankNodeRef<'a>),
#[default]
DefaultGraph,
}
@ -979,7 +983,7 @@ impl fmt::Display for GraphNameRef<'_> {
match self {
Self::NamedNode(node) => node.fmt(f),
Self::BlankNode(node) => node.fmt(f),
Self::DefaultGraph => write!(f, "DEFAULT"),
Self::DefaultGraph => f.write_str("DEFAULT"),
}
}
}

@ -1,5 +1,4 @@
use std::cmp::Ordering;
use std::error::Error;
use std::fmt;
/// A [SPARQL query](https://www.w3.org/TR/sparql11-query/) owned variable.
@ -8,10 +7,7 @@ use std::fmt;
/// ```
/// use oxrdf::{Variable, VariableNameParseError};
///
/// assert_eq!(
/// "?foo",
/// Variable::new("foo")?.to_string()
/// );
/// assert_eq!("?foo", Variable::new("foo")?.to_string());
/// # Result::<_,VariableNameParseError>::Ok(())
/// ```
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
@ -67,12 +63,9 @@ impl fmt::Display for Variable {
///
/// The default string formatter is returning a SPARQL compatible representation:
/// ```
/// use oxrdf::{VariableRef, VariableNameParseError};
/// use oxrdf::{VariableNameParseError, VariableRef};
///
/// assert_eq!(
/// "?foo",
/// VariableRef::new("foo")?.to_string()
/// );
/// assert_eq!("?foo", VariableRef::new("foo")?.to_string());
/// # Result::<_,VariableNameParseError>::Ok(())
/// ```
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
@ -96,12 +89,12 @@ impl<'a> VariableRef<'a> {
///
/// [`Variable::new()`] is a safe version of this constructor and should be used for untrusted data.
#[inline]
pub fn new_unchecked(name: &'a str) -> Self {
pub const fn new_unchecked(name: &'a str) -> Self {
Self { name }
}
#[inline]
pub fn as_str(&self) -> &str {
pub const fn as_str(self) -> &'a str {
self.name
}
@ -169,7 +162,7 @@ impl PartialOrd<VariableRef<'_>> for Variable {
fn validate_variable_identifier(id: &str) -> Result<(), VariableNameParseError> {
let mut chars = id.chars();
let front = chars.next().ok_or(VariableNameParseError {})?;
let front = chars.next().ok_or(VariableNameParseError)?;
match front {
'0'..='9'
| '_'
@ -188,13 +181,13 @@ fn validate_variable_identifier(id: &str) -> Result<(), VariableNameParseError>
| '\u{F900}'..='\u{FDCF}'
| '\u{FDF0}'..='\u{FFFD}'
| '\u{10000}'..='\u{EFFFF}' => (),
_ => return Err(VariableNameParseError {}),
_ => return Err(VariableNameParseError),
}
for c in chars {
match c {
'0'..='9'
| '\u{00B7}'
| '\u{00300}'..='\u{036F}'
| '\u{0300}'..='\u{036F}'
| '\u{203F}'..='\u{2040}'
| '_'
| 'A'..='Z'
@ -211,21 +204,13 @@ fn validate_variable_identifier(id: &str) -> Result<(), VariableNameParseError>
| '\u{F900}'..='\u{FDCF}'
| '\u{FDF0}'..='\u{FFFD}'
| '\u{10000}'..='\u{EFFFF}' => (),
_ => return Err(VariableNameParseError {}),
_ => return Err(VariableNameParseError),
}
}
Ok(())
}
/// An error raised during [`Variable`] name validation.
#[derive(Debug)]
pub struct VariableNameParseError {}
impl fmt::Display for VariableNameParseError {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "The variable name is invalid")
}
}
impl Error for VariableNameParseError {}
#[derive(Debug, thiserror::Error)]
#[error("The variable name is invalid")]
pub struct VariableNameParseError;

@ -231,3 +231,12 @@ pub mod xsd {
pub const YEAR_MONTH_DURATION: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#yearMonthDuration");
}
pub mod geosparql {
//! [GeoSpatial](https://opengeospatial.github.io/ogc-geosparql/) vocabulary.
use crate::named_node::NamedNodeRef;
/// Geospatial datatype like `"Point({longitude} {latitude})"^^geo:wktLiteral`
pub const WKT_LITERAL: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.opengis.net/ont/geosparql#wktLiteral");
}

@ -0,0 +1,36 @@
[package]
name = "oxrdfio"
version = "0.1.0-alpha.3-dev"
authors.workspace = true
license.workspace = true
readme = "README.md"
keywords = ["RDF"]
repository = "https://github.com/oxigraph/oxigraph/tree/master/lib/oxrdfxml"
documentation = "https://docs.rs/oxrdfio"
description = """
Parser and serializer for various RDF formats
"""
edition.workspace = true
rust-version.workspace = true
[features]
default = []
async-tokio = ["dep:tokio", "oxrdfxml/async-tokio", "oxttl/async-tokio"]
rdf-star = ["oxrdf/rdf-star", "oxttl/rdf-star"]
[dependencies]
oxrdf.workspace = true
oxrdfxml.workspace = true
oxttl.workspace = true
thiserror.workspace = true
tokio = { workspace = true, optional = true, features = ["io-util"] }
[dev-dependencies]
tokio = { workspace = true, features = ["rt", "macros"] }
[lints]
workspace = true
[package.metadata.docs.rs]
all-features = true
rustdoc-args = ["--cfg", "docsrs"]

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save