Compare commits

..

5 Commits

Author SHA1 Message Date
Tpt add1dff458 Adds OX_LATERAL OPTIONAL and OX_LATERAL GRAPH 2 years ago
Tpt e922d3293b Fixes LATERAL inside of OPTIONAL behavior 2 years ago
Tpt 2d7eac932f Renames tests 2 years ago
Tpt 15819907af Adds experimental OX_LATERAL operation 2 years ago
Tpt 6262e02edf Initialise v0.4 dev branch 2 years ago
  1. 4
      .clusterfuzzlite/Dockerfile
  2. 30
      .clusterfuzzlite/build.sh
  3. 1
      .clusterfuzzlite/project.yaml
  4. 21
      .devcontainer/Dockerfile
  5. 69
      .devcontainer/devcontainer.json
  6. 16
      .github/DEPENDABOT.yml
  7. 16
      .github/ISSUE_TEMPLATE/bug_report.md
  8. 20
      .github/ISSUE_TEMPLATE/feature-request.md
  9. 10
      .github/ISSUE_TEMPLATE/question.md
  10. 27
      .github/actions/setup-rust/action.yml
  11. 372
      .github/workflows/artifacts.yml
  12. 11
      .github/workflows/install_rocksdb.sh
  13. 23
      .github/workflows/manylinux_build.sh
  14. 19
      .github/workflows/musllinux_build.sh
  15. 276
      .github/workflows/release.yml
  16. 508
      .github/workflows/tests.yml
  17. 10
      .gitmodules
  18. 3
      .mailmap
  19. 2
      .readthedocs.yaml
  20. 328
      CHANGELOG.md
  21. 5
      CITATION.cff
  22. 1490
      Cargo.lock
  23. 266
      Cargo.toml
  24. 49
      README.md
  25. 17
      bench/bsbm_blazegraph.sh
  26. 21
      bench/bsbm_graphdb.sh
  27. 21
      bench/bsbm_jena.sh
  28. 18
      bench/bsbm_oxigraph.sh
  29. 49
      bench/bsbm_rdf4j.sh
  30. 10
      bench/bsbm_virtuoso.sh
  31. 63
      bench/explanation_to_flamegraph.py
  32. 52
      bench/explanation_to_trace.py
  33. 2
      clippy.toml
  34. 16
      deny.toml
  35. 120
      docs/arch-diagram.svg
  36. 35
      docs/arch-diagram.txt
  37. 33
      fuzz/Cargo.toml
  38. 28
      fuzz/fuzz_targets/n3.rs
  39. 84
      fuzz/fuzz_targets/nquads.rs
  40. 35
      fuzz/fuzz_targets/rdf_xml.rs
  41. 61
      fuzz/fuzz_targets/sparql_eval.rs
  42. 7
      fuzz/fuzz_targets/sparql_query.rs
  43. 15
      fuzz/fuzz_targets/sparql_results_json.rs
  44. 10
      fuzz/fuzz_targets/sparql_results_tsv.rs
  45. 10
      fuzz/fuzz_targets/sparql_results_xml.rs
  46. 6
      fuzz/fuzz_targets/sparql_update.rs
  47. 166
      fuzz/fuzz_targets/trig.rs
  48. 1
      fuzz/src/lib.rs
  49. 63
      fuzz/src/result_format.rs
  50. 27
      js/Cargo.toml
  51. 69
      js/README.md
  52. 14
      js/biome.json
  53. 42
      js/build_package.js
  54. 1027
      js/package-lock.json
  55. 37
      js/package.json
  56. 2
      js/src/lib.rs
  57. 50
      js/src/model.rs
  58. 130
      js/src/store.rs
  59. 1
      js/src/utils.rs
  60. 86
      js/test/model.mjs
  61. 361
      js/test/store.mjs
  62. 63
      lib/Cargo.toml
  63. 85
      lib/README.md
  64. 208
      lib/benches/store.rs
  65. 59
      lib/oxigraph/Cargo.toml
  66. 82
      lib/oxigraph/README.md
  67. 39
      lib/oxigraph/src/io/mod.rs
  68. 199
      lib/oxigraph/src/io/read.rs
  69. 185
      lib/oxigraph/src/io/write.rs
  70. 12
      lib/oxigraph/src/lib.rs
  71. 22
      lib/oxigraph/src/model.rs
  72. 84
      lib/oxigraph/src/sparql/error.rs
  73. 5870
      lib/oxigraph/src/sparql/eval.rs
  74. 9
      lib/oxigraph/src/sparql/http/mod.rs
  75. 328
      lib/oxigraph/src/sparql/mod.rs
  76. 371
      lib/oxigraph/src/sparql/model.rs
  77. 44
      lib/oxigraph/src/sparql/results.rs
  78. 12
      lib/oxigraph/src/storage/backend/mod.rs
  79. 1445
      lib/oxigraph/src/storage/backend/oxi_rocksdb.rs
  80. 139
      lib/oxigraph/src/storage/error.rs
  81. 25
      lib/oxrdf/Cargo.toml
  82. 6
      lib/oxrdf/README.md
  83. 80
      lib/oxrdf/src/blank_node.rs
  84. 532
      lib/oxrdf/src/dataset.rs
  85. 44
      lib/oxrdf/src/graph.rs
  86. 216
      lib/oxrdf/src/interning.rs
  87. 4
      lib/oxrdf/src/lib.rs
  88. 203
      lib/oxrdf/src/literal.rs
  89. 21
      lib/oxrdf/src/named_node.rs
  90. 203
      lib/oxrdf/src/parser.rs
  91. 210
      lib/oxrdf/src/triple.rs
  92. 39
      lib/oxrdf/src/variable.rs
  93. 9
      lib/oxrdf/src/vocab.rs
  94. 36
      lib/oxrdfio/Cargo.toml
  95. 67
      lib/oxrdfio/README.md
  96. 122
      lib/oxrdfio/src/error.rs
  97. 216
      lib/oxrdfio/src/format.rs
  98. 19
      lib/oxrdfio/src/lib.rs
  99. 807
      lib/oxrdfio/src/parser.rs
  100. 410
      lib/oxrdfio/src/serializer.rs
  101. Some files were not shown because too many files have changed in this diff Show More

@ -1,4 +0,0 @@
FROM gcr.io/oss-fuzz-base/base-builder-rust:v1
COPY . $SRC/oxigraph
WORKDIR oxigraph
COPY .clusterfuzzlite/build.sh $SRC/

@ -1,30 +0,0 @@
#!/bin/bash -eu
shopt -s globstar
function build_seed_corpus() {
mkdir "/tmp/oxigraph_$1"
for file in **/*."$2"
do
hash=$(sha256sum "$file" | awk '{print $1;}')
cp "$file" "/tmp/oxigraph_$1/$hash"
done
zip "$1_seed_corpus.zip" /tmp/"oxigraph_$1"/*
rm -r "/tmp/oxigraph_$1"
}
cd "$SRC"/oxigraph
git submodule init
git submodule update
cargo fuzz build -O --debug-assertions
for TARGET in sparql_eval sparql_results_json sparql_results_tsv sparql_results_xml n3 nquads trig rdf_xml
do
cp fuzz/target/x86_64-unknown-linux-gnu/release/$TARGET "$OUT"/
done
build_seed_corpus sparql_results_json srj
build_seed_corpus sparql_results_tsv tsv
build_seed_corpus sparql_results_xml srx
build_seed_corpus n3 n3
build_seed_corpus nquads nq
build_seed_corpus trig trig
build_seed_corpus rdf_xml rdf

@ -0,0 +1,21 @@
# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.241.1/containers/rust/.devcontainer/base.Dockerfile
# [Choice] Debian OS version (use bullseye on local arm64/Apple Silicon): buster, bullseye
ARG VARIANT="bullseye"
FROM mcr.microsoft.com/vscode/devcontainers/rust:0-${VARIANT}
# [Optional] Uncomment this section to install additional packages.
RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
&& apt-get -y install --no-install-recommends \
python3 \
python3-venv \
python-is-python3 \
libclang-dev
ENV VIRTUAL_ENV=/opt/venv
RUN python -m venv $VIRTUAL_ENV
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
RUN pip install --no-cache-dir -r python/requirements.dev.txt
# Change owner to the devcontainer user
RUN chown -R 1000:1000 $VIRTUAL_ENV

@ -0,0 +1,69 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at:
// https://github.com/microsoft/vscode-dev-containers/tree/v0.241.1/containers/rust
{
"name": "Rust",
"build": {
"dockerfile": "Dockerfile",
"args": {
// Use the VARIANT arg to pick a Debian OS version: buster, bullseye
// Use bullseye when on local on arm64/Apple Silicon.
"VARIANT": "bullseye"
}
},
"runArgs": ["--cap-add=SYS_PTRACE", "--security-opt", "seccomp=unconfined"],
// Configure tool-specific properties.
"customizations": {
// Configure properties specific to VS Code.
"vscode": {
// Set *default* container specific settings.json values on container create.
"settings": {
"lldb.executable": "/usr/bin/lldb",
// VS Code don't watch files under ./target
"files.watcherExclude": {
"**/target/**": true
},
"rust-analyzer.checkOnSave.command": "clippy",
"python.defaultInterpreterPath": "/opt/venv/bin/python",
"python.linting.enabled": true,
"python.linting.pylintEnabled": true,
"python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8",
"python.formatting.blackPath": "/usr/local/py-utils/bin/black",
"python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf",
"python.linting.banditPath": "/usr/local/py-utils/bin/bandit",
"python.linting.flake8Path": "/usr/local/py-utils/bin/flake8",
"python.linting.mypyPath": "/usr/local/py-utils/bin/mypy",
"python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle",
"python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle",
"python.linting.pylintPath": "/opt/venv/bin/pylint",
"python.testing.pytestPath": "/opt/venv/bin/pytest"
},
// Add the IDs of extensions you want installed when the container is created.
"extensions": [
"vadimcn.vscode-lldb",
"mutantdino.resourcemonitor",
"rust-lang.rust-analyzer",
"tamasfe.even-better-toml",
"serayuzgur.crates",
"ms-python.python",
"ms-python.vscode-pylance",
"esbenp.prettier-vscode",
"stardog-union.stardog-rdf-grammars"
]
}
},
// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": [],
// Use 'postCreateCommand' to run commands after the container is created.
"postCreateCommand": "git submodule update --init && cargo build",
// Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root.
"remoteUser": "vscode",
"features": {
"python": "3.10"
}
}

@ -1,16 +0,0 @@
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: weekly
- package-ecosystem: "pip"
directory: "/python/"
versioning-strategy: increase-if-necessary
schedule:
interval: weekly
- package-ecosystem: "npm"
directory: "/js/"
versioning-strategy: increase-if-necessary
schedule:
interval: weekly

@ -1,16 +0,0 @@
---
name: Bug report
about: Create a report to help us improve
title: ''
labels: bug
assignees: ''
---
**Describe the bug**
A clear and concise description of what the bug is.
**To Reproduce**
Steps to reproduce the behavior:
1. Which version of Oxigraph are you using? On which platform?
2. A command-line or a code snippet that triggers the bug.

@ -1,20 +0,0 @@
---
name: Feature request
about: Suggest an idea for this project
title: ''
labels: enhancement
assignees: ''
---
**Is your feature request related to a problem? Please describe.**
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
**Describe the solution you'd like**
A clear and concise description of what you want to happen.
**Describe alternatives you've considered**
A clear and concise description of any alternative solutions or features you've considered.
**Additional context**
Please link to other systems implementing the feature, specification of it if it exists and/or existing documentation about this feature.

@ -1,10 +0,0 @@
---
name: Question
about: Please don't use issues but the Q&A section of the "discussions" space
title: ''
labels: question
assignees: ''
---

@ -1,27 +0,0 @@
name: 'Setup Rust'
description: 'Setup Rust using Rustup'
inputs:
version:
description: 'Rust version to use. By default latest stable version'
required: false
default: 'stable'
component:
description: 'Rust extra component to install like clippy'
required: false
target:
description: 'Rust extra target to install like wasm32-unknown-unknown'
required: false
runs:
using: "composite"
steps:
- run: rustup update
shell: bash
- run: rustup default ${{ inputs.version }}
shell: bash
- run: rustup component add ${{ inputs.component }}
shell: bash
if: ${{ inputs.component }}
- run: rustup target add ${{ inputs.target }}
shell: bash
if: ${{ inputs.target }}
- uses: Swatinem/rust-cache@v2

@ -1,13 +1,9 @@
name: Artifacts name: Nightly artifacts
on: on:
push: push:
branches: branches:
- main - main
- next
release:
types:
- published
concurrency: concurrency:
group: ${{ github.workflow }}-${{ github.ref }} group: ${{ github.workflow }}-${{ github.ref }}
@ -15,42 +11,17 @@ concurrency:
jobs: jobs:
binary_linux: binary_linux:
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
submodules: true submodules: true
- uses: ./.github/actions/setup-rust - run: cargo build --release
with: working-directory: ./server
target: aarch64-unknown-linux-gnu - uses: actions/upload-artifact@v3
- run: |
sudo apt-get update && sudo apt-get install -y g++-aarch64-linux-gnu
mkdir .cargo
echo -e "[target.aarch64-unknown-linux-gnu]\nlinker = \"aarch64-linux-gnu-gcc\"" >> .cargo/config.toml
- run: cargo build --release --no-default-features --features rustls-native
working-directory: ./cli
- run: cargo build --release --target aarch64-unknown-linux-gnu --no-default-features --features rustls-native
working-directory: ./cli
env:
BINDGEN_EXTRA_CLANG_ARGS: --sysroot /usr/aarch64-linux-gnu
- uses: actions/upload-artifact@v4
with:
name: oxigraph_x86_64_linux_gnu
path: target/release/oxigraph
- uses: actions/upload-artifact@v4
with:
name: oxigraph_aarch64_linux_gnu
path: target/aarch64-unknown-linux-gnu/release/oxigraph
- run: mv target/release/oxigraph oxigraph_${{ github.event.release.tag_name }}_x86_64_linux_gnu
if: github.event_name == 'release'
- run: mv target/aarch64-unknown-linux-gnu/release/oxigraph oxigraph_${{ github.event.release.tag_name }}_aarch64_linux_gnu
if: github.event_name == 'release'
- uses: softprops/action-gh-release@v1
with: with:
files: | name: oxigraph_server_x86_64_linux_gnu
oxigraph_${{ github.event.release.tag_name }}_x86_64_linux_gnu path: target/release/oxigraph_server
oxigraph_${{ github.event.release.tag_name }}_aarch64_linux_gnu
if: github.event_name == 'release'
binary_mac: binary_mac:
runs-on: macos-latest runs-on: macos-latest
@ -59,124 +30,43 @@ jobs:
SDKROOT: '/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk' SDKROOT: '/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk'
MACOSX_DEPLOYMENT_TARGET: '10.14' MACOSX_DEPLOYMENT_TARGET: '10.14'
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
submodules: true submodules: true
- uses: ./.github/actions/setup-rust - run: rustup update
with:
target: aarch64-apple-darwin
- run: cargo build --release - run: cargo build --release
working-directory: ./cli working-directory: ./server
- run: cargo build --release --target aarch64-apple-darwin - uses: actions/upload-artifact@v3
working-directory: ./cli
- uses: actions/upload-artifact@v4
with:
name: oxigraph_x86_64_apple
path: target/release/oxigraph
- uses: actions/upload-artifact@v4
with:
name: oxigraph_aarch64_apple
path: target/aarch64-apple-darwin/release/oxigraph
- run: mv target/release/oxigraph oxigraph_${{ github.event.release.tag_name }}_x86_64_apple
if: github.event_name == 'release'
- run: mv target/aarch64-apple-darwin/release/oxigraph oxigraph_${{ github.event.release.tag_name }}_aarch64_apple
if: github.event_name == 'release'
- uses: softprops/action-gh-release@v1
with: with:
files: | name: oxigraph_server_x86_64_apple
oxigraph_${{ github.event.release.tag_name }}_x86_64_apple path: target/release/oxigraph_server
oxigraph_${{ github.event.release.tag_name }}_aarch64_apple
if: github.event_name == 'release'
binary_windows: binary_windows:
runs-on: windows-latest runs-on: windows-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
submodules: true submodules: true
- uses: ./.github/actions/setup-rust - run: rustup update
- run: Remove-Item -LiteralPath "C:\msys64\" -Force -Recurse - run: Remove-Item -LiteralPath "C:\msys64\" -Force -Recurse
- run: cargo build --release - run: cargo build --release
working-directory: ./cli working-directory: ./server
- uses: actions/upload-artifact@v4 - uses: actions/upload-artifact@v3
with:
name: oxigraph_x86_64_windows_msvc
path: target/release/oxigraph.exe
- run: mv target/release/oxigraph.exe oxigraph_${{ github.event.release.tag_name }}_x86_64_windows_msvc.exe
if: github.event_name == 'release'
- uses: softprops/action-gh-release@v1
with: with:
files: oxigraph_${{ github.event.release.tag_name }}_x86_64_windows_msvc.exe name: oxigraph_server_x86_64_windows_msvc
if: github.event_name == 'release' path: target/release/oxigraph_server.exe
python_sdist:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
- uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: pip
cache-dependency-path: '**/requirements.dev.txt'
- run: pip install -r python/requirements.dev.txt
- run: maturin build -m python/Cargo.toml
- run: pip install --no-index --find-links=target/wheels/ pyoxigraph
- run: rm -r target/wheels
- run: python generate_stubs.py pyoxigraph pyoxigraph.pyi --ruff
working-directory: ./python
- run: maturin sdist -m python/Cargo.toml
- uses: actions/upload-artifact@v4
with:
name: pyoxigraph_sdist
path: target/wheels/*.tar.gz
wheel_linux: wheel_linux:
runs-on: ubuntu-latest runs-on: ubuntu-latest
strategy:
matrix:
architecture: [ "x86_64", "aarch64" ]
continue-on-error: true
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: docker/setup-qemu-action@v2
with:
platforms: linux/${{ matrix.architecture }}
if: github.event_name == 'release' && matrix.architecture != 'x86_64'
- uses: ./.github/actions/setup-rust
- run: sed 's/%arch%/${{ matrix.architecture }}/g' .github/workflows/manylinux_build.sh | sed 's/%for_each_version%/${{ github.event_name == 'release' || '' }}/g' > .github/workflows/manylinux_build_script.sh
- run: docker run -v "$(pwd)":/workdir --platform linux/${{ matrix.architecture }} quay.io/pypa/manylinux2014_${{ matrix.architecture }} /bin/bash /workdir/.github/workflows/manylinux_build_script.sh
if: github.event_name == 'release' || matrix.architecture == 'x86_64'
- uses: actions/upload-artifact@v4
with:
name: pyoxigraph_${{ matrix.architecture }}_linux_gnu
path: target/wheels/*.whl
wheel_linux_musl:
runs-on: ubuntu-latest
strategy:
matrix:
architecture: [ "x86_64", "aarch64" ]
continue-on-error: true
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
submodules: true submodules: true
- uses: docker/setup-qemu-action@v2 - run: sed 's/%arch%/x86_64/g' .github/workflows/manylinux_build.sh > .github/workflows/manylinux_build_script.sh
with: - run: docker run -v "$(pwd)":/workdir --platform linux/x86_64 quay.io/pypa/manylinux2014_x86_64 /bin/bash /workdir/.github/workflows/manylinux_build_script.sh
platforms: linux/${{ matrix.architecture }} - uses: actions/upload-artifact@v3
if: github.event_name == 'release' && matrix.architecture != 'x86_64'
- uses: ./.github/actions/setup-rust
- run: sed 's/%arch%/${{ matrix.architecture }}/g' .github/workflows/musllinux_build.sh | sed 's/%for_each_version%/${{ github.event_name == 'release' || '' }}/g' > .github/workflows/musllinux_build_script.sh
- run: docker run -v "$(pwd)":/workdir --platform linux/${{ matrix.architecture }} quay.io/pypa/musllinux_1_2_${{ matrix.architecture }} /bin/bash /workdir/.github/workflows/musllinux_build_script.sh
if: github.event_name == 'release' || matrix.architecture == 'x86_64'
- uses: actions/upload-artifact@v4
with: with:
name: pyoxigraph_${{ matrix.architecture }}_linux_musl name: pyoxigraph_x86_64_linux
path: target/wheels/*.whl path: target/wheels/*.whl
wheel_mac: wheel_mac:
@ -186,237 +76,85 @@ jobs:
SDKROOT: '/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk' SDKROOT: '/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk'
MACOSX_DEPLOYMENT_TARGET: '10.14' MACOSX_DEPLOYMENT_TARGET: '10.14'
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
submodules: true submodules: true
- uses: ./.github/actions/setup-rust - uses: actions/setup-python@v4
with: with:
target: aarch64-apple-darwin python-version: "3.10"
- uses: actions/setup-python@v5 - run: rustup update && rustup target add aarch64-apple-darwin
with:
python-version: "3.12"
cache: pip
cache-dependency-path: '**/requirements.dev.txt'
- run: pip install -r python/requirements.dev.txt - run: pip install -r python/requirements.dev.txt
- run: maturin build --release --features abi3 - run: maturin build --release -m python/Cargo.toml
working-directory: ./python
- run: pip install --no-index --find-links=target/wheels/ pyoxigraph - run: pip install --no-index --find-links=target/wheels/ pyoxigraph
- run: rm -r target/wheels - run: rm -r target/wheels
- run: python generate_stubs.py pyoxigraph pyoxigraph.pyi --ruff - run: python generate_stubs.py pyoxigraph pyoxigraph.pyi --black
working-directory: ./python working-directory: ./python
- run: maturin build --release --target universal2-apple-darwin --features abi3 - run: maturin build --release -m python/Cargo.toml --universal2
working-directory: ./python - uses: actions/upload-artifact@v3
- run: maturin build --release --features abi3
working-directory: ./python
if: github.event_name == 'release'
- run: maturin build --release --target aarch64-apple-darwin --features abi3
working-directory: ./python
if: github.event_name == 'release'
- uses: actions/upload-artifact@v4
with: with:
name: pyoxigraph_macos name: pyoxigraph_wheel_universal2_mac
path: target/wheels/*.whl path: target/wheels/*.whl
wheel_windows: wheel_windows:
runs-on: windows-latest runs-on: windows-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
submodules: true submodules: true
- uses: ./.github/actions/setup-rust - uses: actions/setup-python@v4
- uses: actions/setup-python@v5
with: with:
python-version: "3.12" python-version: "3.10"
cache: pip - run: rustup update
cache-dependency-path: '**/requirements.dev.txt'
- run: Remove-Item -LiteralPath "C:\msys64\" -Force -Recurse - run: Remove-Item -LiteralPath "C:\msys64\" -Force -Recurse
- run: pip install -r python/requirements.dev.txt - run: pip install -r python/requirements.dev.txt
- run: maturin build --release --features abi3 - run: maturin build --release -m python/Cargo.toml
working-directory: ./python
- run: pip install --no-index --find-links=target/wheels/ pyoxigraph - run: pip install --no-index --find-links=target/wheels/ pyoxigraph
- run: rm -r target/wheels - run: rm -r target/wheels
- run: python generate_stubs.py pyoxigraph pyoxigraph.pyi --ruff - run: python generate_stubs.py pyoxigraph pyoxigraph.pyi --black
working-directory: ./python working-directory: ./python
- run: maturin build --release -m python/Cargo.toml --features abi3 - run: maturin build --release -m python/Cargo.toml
- uses: actions/upload-artifact@v4 - uses: actions/upload-artifact@v3
with: with:
name: pyoxigraph_windows name: pyoxigraph_wheel_x86_64_windows
path: target/wheels/*.whl path: target/wheels/*.whl
publish_pypi:
if: github.event_name == 'release'
runs-on: ubuntu-latest
environment:
name: pypi
url: https://pypi.org/p/pyoxigraph
permissions:
id-token: write
needs:
- python_sdist
- wheel_windows
- wheel_mac
- wheel_linux
- wheel_linux_musl
steps:
- uses: actions/download-artifact@v4
with:
pattern: pyoxigraph_*
path: dist
merge-multiple: true
- uses: pypa/gh-action-pypi-publish@release/v1
with:
packages-dir: dist
skip-existing: true
npm_tarball: npm_tarball:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: - run: rustup update
submodules: true - run: cargo install wasm-pack
- uses: ./.github/actions/setup-rust
- uses: taiki-e/install-action@v2
with: { tool: wasm-pack }
- uses: actions/setup-node@v4
with:
node-version: 16
cache: npm
cache-dependency-path: "js/package.json"
registry-url: https://registry.npmjs.org
- run: npm run pack - run: npm run pack
working-directory: ./js working-directory: ./js
- uses: actions/upload-artifact@v4 - uses: actions/upload-artifact@v3
with: with:
name: oxigraph_wasm_npm name: oxigraph_wasm_npm
path: js/*.tgz path: js/*.tgz
- run: npm run release
working-directory: ./js
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
if: github.event_name == 'release'
docker: docker:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
submodules: true submodules: 'recursive'
- uses: docker/setup-buildx-action@v3 - uses: docker/setup-buildx-action@v2
- uses: docker/login-action@v3 - uses: docker/login-action@v2
with: with:
registry: ghcr.io registry: ghcr.io
username: ${{github.actor}} username: ${{github.actor}}
password: ${{secrets.GITHUB_TOKEN}} password: ${{secrets.GITHUB_TOKEN}}
- uses: docker/login-action@v3 - uses: docker/metadata-action@v4
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
if: github.event_name == 'release'
- uses: docker/metadata-action@v5
id: docker_meta id: docker_meta
with: with:
images: | images: ghcr.io/${{ github.repository }}
${{ github.repository }},enable=${{ github.event_name == 'release' }} tags: nightly
ghcr.io/${{ github.repository }} - uses: docker/build-push-action@v3
tags: |
type=ref,event=branch
type=ref,event=pr
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
- uses: docker/build-push-action@v5
with: with:
context: . context: .
file: server/Dockerfile file: server/Dockerfile
platforms: linux/amd64,linux/arm64
pull: true pull: true
push: true push: true
tags: ${{ steps.docker_meta.outputs.tags }} tags: ${{ steps.docker_meta.outputs.tags }}
labels: ${{ steps.docker_meta.outputs.labels }} labels: ${{ steps.docker_meta.outputs.labels }}
cache-from: type=gha cache-from: type=gha
cache-to: type=gha,mode=max cache-to: type=gha,mode=max
publish_crates:
if: github.event_name == 'release'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- run: rustup update
- run: cargo login $CRATES_IO_TOKEN
env:
CRATES_IO_TOKEN: ${{ secrets.CRATES_IO_TOKEN }}
- run: cargo publish
working-directory: ./oxrocksdb-sys
continue-on-error: true
- run: cargo publish
working-directory: ./lib/oxsdatatypes
continue-on-error: true
- run: cargo publish
working-directory: ./lib/oxrdf
continue-on-error: true
- run: cargo publish
working-directory: ./lib/oxrdfxml
continue-on-error: true
- run: cargo publish
working-directory: ./lib/oxttl
continue-on-error: true
- run: cargo publish
working-directory: ./lib/oxrdfio
continue-on-error: true
- run: cargo publish
working-directory: ./lib/sparesults
continue-on-error: true
- run: cargo publish
working-directory: ./lib/spargebra
continue-on-error: true
- run: cargo publish
working-directory: ./lib/sparopt
continue-on-error: true
- run: cargo publish
working-directory: ./lib/sparql-smith
continue-on-error: true
- run: cargo publish
working-directory: ./lib/oxigraph
continue-on-error: true
- run: cargo publish
working-directory: ./cli
homebrew:
if: "github.event_name == 'release' && !contains('-', github.event.release.tag_name)"
runs-on: ubuntu-latest
needs: full_archive
steps:
- uses: actions/checkout@v4
with:
repository: oxigraph/homebrew-oxigraph
token: ${{ secrets.FULL_ACCESS_TOKEN }}
- run: |
wget "https://github.com/oxigraph/oxigraph/releases/download/${{ github.event.release.tag_name }}/oxigraph_${{ github.event.release.tag_name }}.tar.gz"
SHA=`shasum -a 256 "oxigraph_${{ github.event.release.tag_name }}.tar.gz" | awk '{ print $1 }'`
rm "oxigraph_${{ github.event.release.tag_name }}.tar.gz"
sed -i "s/download\/.*\.tar/download\/${{ github.event.release.tag_name }}\/oxigraph_${{ github.event.release.tag_name }}.tar/g" Formula/oxigraph.rb
sed -i "s/sha256 \".*\"/sha256 \"$SHA\"/g" Formula/oxigraph.rb
git config user.name github-actions
git config user.email github-actions@github.com
git add .
git diff-index --quiet HEAD || git commit -m "Upgrades to ${{ github.event.release.tag_name }}"
git push
full_archive:
if: github.event_name == 'release'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- run: |
rm -rf .git bench fuzz
zip -r oxigraph_${{ github.event.release.tag_name }}.zip .
tar -czf /tmp/oxigraph_${{ github.event.release.tag_name }}.tar.gz .
mv /tmp/oxigraph_${{ github.event.release.tag_name }}.tar.gz .
- uses: softprops/action-gh-release@v1
with:
files: |
oxigraph_${{ github.event.release.tag_name }}.zip
oxigraph_${{ github.event.release.tag_name }}.tar.gz

@ -1,11 +0,0 @@
if [ -f "rocksdb" ]
then
cd rocksdb || exit
else
git clone https://github.com/facebook/rocksdb.git
cd rocksdb || exit
git checkout v8.0.0
make shared_lib
fi
sudo make install-shared
sudo ldconfig /usr/local/lib

@ -2,23 +2,12 @@ cd /workdir
yum -y install centos-release-scl-rh yum -y install centos-release-scl-rh
yum -y install llvm-toolset-7.0 yum -y install llvm-toolset-7.0
source scl_source enable llvm-toolset-7.0 source scl_source enable llvm-toolset-7.0
curl https://static.rust-lang.org/rustup/dist/%arch%-unknown-linux-gnu/rustup-init --output rustup-init curl https://sh.rustup.rs -sSf | sh -s -- -y --profile minimal
chmod +x rustup-init export PATH="${PATH}:/root/.cargo/bin:/opt/python/cp37-cp37m/bin:/opt/python/cp38-cp38/bin:/opt/python/cp39-cp39/bin:/opt/python/cp310-cp310/bin:/opt/python/cp311-cp311/bin"
./rustup-init -y --profile minimal
source "$HOME/.cargo/env"
export PATH="${PATH}:/opt/python/cp37-cp37m/bin:/opt/python/cp38-cp38/bin:/opt/python/cp39-cp39/bin:/opt/python/cp310-cp310/bin:/opt/python/cp311-cp311/bin"
cd python cd python
python3.12 -m venv venv python3.10 -m venv venv
source venv/bin/activate source venv/bin/activate
pip install -r requirements.dev.txt pip install -r requirements.dev.txt
maturin develop --release maturin develop --release -m Cargo.toml
python generate_stubs.py pyoxigraph pyoxigraph.pyi --ruff python generate_stubs.py pyoxigraph pyoxigraph.pyi --black
maturin build --release --features abi3 --compatibility manylinux2014 maturin build --release -m Cargo.toml
if [ %for_each_version% ]; then
for VERSION in 8 9 10 11 12; do
maturin build --release --interpreter "python3.$VERSION" --compatibility manylinux2014
done
for VERSION in 9 10; do
maturin build --release --interpreter "pypy3.$VERSION" --compatibility manylinux2014
done
fi

@ -1,19 +0,0 @@
cd /workdir
apk add clang-dev
curl https://static.rust-lang.org/rustup/dist/%arch%-unknown-linux-musl/rustup-init --output rustup-init
chmod +x rustup-init
./rustup-init -y --profile minimal
source "$HOME/.cargo/env"
export PATH="${PATH}:/opt/python/cp37-cp37m/bin:/opt/python/cp38-cp38/bin:/opt/python/cp39-cp39/bin:/opt/python/cp310-cp310/bin:/opt/python/cp311-cp311/bin"
cd python
python3.12 -m venv venv
source venv/bin/activate
pip install -r requirements.dev.txt
maturin develop --release
python generate_stubs.py pyoxigraph pyoxigraph.pyi --ruff
maturin build --release --features abi3 --compatibility musllinux_1_2
if [ %for_each_version% ]; then
for VERSION in 8 9 10 11 12; do
maturin build --release --interpreter "python3.$VERSION" --compatibility musllinux_1_2
done
fi

@ -0,0 +1,276 @@
name: Release artifacts
on:
release:
types: [ published ]
jobs:
push_server_to_docker_registry:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- uses: docker/setup-buildx-action@v2
- uses: docker/metadata-action@v4
id: docker_meta
with:
images: |
${{ github.repository }}
ghcr.io/${{ github.repository }}
- uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{github.actor}}
password: ${{secrets.GITHUB_TOKEN}}
- uses: docker/build-push-action@v3
with:
context: .
file: server/Dockerfile
pull: true
push: true
tags: ${{ steps.docker_meta.outputs.tags }}
labels: ${{ steps.docker_meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
publish_crates:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- run: rustup update
- run: cargo login $CRATES_IO_TOKEN
env:
CRATES_IO_TOKEN: ${{ secrets.CRATES_IO_TOKEN }}
- run: cargo publish
working-directory: ./oxrocksdb-sys
continue-on-error: true
- run: cargo publish
working-directory: ./lib/oxrdf
continue-on-error: true
- run: sleep 60
- run: cargo publish
working-directory: ./lib/sparesults
continue-on-error: true
- run: cargo publish
working-directory: ./lib/spargebra
continue-on-error: true
- run: sleep 60
- run: cargo publish
working-directory: ./lib
continue-on-error: true
- run: sleep 60
- run: cargo publish
working-directory: ./server
publish_pypi_linux:
runs-on: ubuntu-latest
strategy:
matrix:
architecture: [ "x86_64", "aarch64" ]
continue-on-error: true
steps:
- uses: actions/checkout@v3
with:
submodules: true
- uses: docker/setup-qemu-action@v2
with:
platforms: linux/${{ matrix.architecture }}
if: matrix.architecture != 'x86_64'
- run: sed 's/%arch%/${{ matrix.architecture }}/g' .github/workflows/manylinux_build.sh > .github/workflows/manylinux_build_script.sh
- run: docker run -v "$(pwd)":/workdir --platform linux/${{ matrix.architecture }} quay.io/pypa/manylinux2014_${{ matrix.architecture }} /bin/bash /workdir/.github/workflows/manylinux_build_script.sh
- uses: pypa/gh-action-pypi-publish@release/v1
with:
user: __token__
password: ${{ secrets.PYPI_PASSWORD }}
packages_dir: target/wheels
- uses: softprops/action-gh-release@v1
with:
files: target/wheels/*.whl
publish_pypi_mac:
runs-on: macos-latest
env:
DEVELOPER_DIR: '/Applications/Xcode.app/Contents/Developer'
SDKROOT: '/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk'
MACOSX_DEPLOYMENT_TARGET: '10.14'
steps:
- uses: actions/checkout@v3
with:
submodules: true
- uses: actions/setup-python@v4
with:
python-version: "3.10"
- run: rustup update && rustup target add aarch64-apple-darwin
- run: pip install -r python/requirements.dev.txt
- run: maturin build --release -m python/Cargo.toml
- run: pip install --no-index --find-links=target/wheels/ pyoxigraph
- run: rm -r target/wheels
- run: python generate_stubs.py pyoxigraph pyoxigraph.pyi --black
working-directory: ./python
- run: maturin publish --no-sdist --universal2 -m python/Cargo.toml -u __token__ -p ${{ secrets.PYPI_PASSWORD }}
- run: maturin publish --no-sdist -m python/Cargo.toml -u __token__ -p ${{ secrets.PYPI_PASSWORD }}
- uses: softprops/action-gh-release@v1
with:
files: target/wheels/*.whl
publish_pypi_windows:
runs-on: windows-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- uses: actions/setup-python@v4
with:
python-version: "3.10"
- run: rustup update
- run: Remove-Item -LiteralPath "C:\msys64\" -Force -Recurse
- run: pip install -r python/requirements.dev.txt
- run: maturin build --release -m python/Cargo.toml
- run: pip install --no-index --find-links=target/wheels/ pyoxigraph
- run: rm -r target/wheels
- run: python generate_stubs.py pyoxigraph pyoxigraph.pyi --black
working-directory: ./python
- run: maturin publish --no-sdist -m python/Cargo.toml -u __token__ -p ${{ secrets.PYPI_PASSWORD }}
- uses: softprops/action-gh-release@v1
with:
files: target/wheels/*.whl
publish_pypi_stdist:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- uses: actions/setup-python@v4
with:
python-version: "3.10"
- run: rustup update
- run: pip install -r python/requirements.dev.txt
- run: maturin build -m python/Cargo.toml
- run: pip install --no-index --find-links=target/wheels/ pyoxigraph
- run: rm -r target/wheels
- run: python generate_stubs.py pyoxigraph pyoxigraph.pyi --black
working-directory: ./python
- run: maturin sdist -m python/Cargo.toml
- uses: pypa/gh-action-pypi-publish@release/v1
with:
user: __token__
password: ${{ secrets.PYPI_PASSWORD }}
packages_dir: target/wheels
- uses: softprops/action-gh-release@v1
with:
files: target/wheels/*.tar.gz
publish_npm:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- uses: actions/setup-node@v3
with:
node-version: 16
registry-url: https://registry.npmjs.org
- run: rustup update
- run: cargo install wasm-pack
- run: npm install
working-directory: ./js
- run: npm run release
working-directory: ./js
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
- run: npm run pack
working-directory: ./js
- uses: softprops/action-gh-release@v1
with:
files: js/*.tgz
publish_full_archive:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- run: |
zip -r oxigraph_${{ github.event.release.tag_name }}.zip .
tar -czf /tmp/oxigraph_${{ github.event.release.tag_name }}.tar.gz .
mv /tmp/oxigraph_${{ github.event.release.tag_name }}.tar.gz .
- uses: softprops/action-gh-release@v1
with:
files: |
oxigraph_${{ github.event.release.tag_name }}.zip
oxigraph_${{ github.event.release.tag_name }}.tar.gz
publish_homebrew:
if: "!contains('-', github.event.release.tag_name)"
runs-on: ubuntu-latest
needs: publish_full_archive
steps:
- uses: actions/checkout@v3
with:
repository: oxigraph/homebrew-oxigraph
token: ${{ secrets.FULL_ACCESS_TOKEN }}
- run: |
wget "https://github.com/oxigraph/oxigraph/releases/download/${{ github.event.release.tag_name }}/oxigraph_${{ github.event.release.tag_name }}.tar.gz"
SHA=`shasum -a 256 "oxigraph_${{ github.event.release.tag_name }}.tar.gz" | awk '{ print $1 }'`
rm "oxigraph_${{ github.event.release.tag_name }}.tar.gz"
sed -i "s/download\/.*\.tar/download\/${{ github.event.release.tag_name }}\/oxigraph_${{ github.event.release.tag_name }}.tar/g" Formula/oxigraph.rb
sed -i "s/sha256 \".*\"/sha256 \"$SHA\"/g" Formula/oxigraph.rb
git config user.name github-actions
git config user.email github-actions@github.com
git add .
git diff-index --quiet HEAD || git commit -m "Upgrades to ${{ github.event.release.tag_name }}"
git push
publish_binary_linux:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- run: cargo build --release
working-directory: ./server
- run: mv target/release/oxigraph_server oxigraph_server_${{ github.event.release.tag_name }}_x86_64_linux_gnu
- uses: softprops/action-gh-release@v1
with:
files: oxigraph_server_${{ github.event.release.tag_name }}_x86_64_linux_gnu
publish_binary_mac:
runs-on: macos-latest
env:
DEVELOPER_DIR: '/Applications/Xcode.app/Contents/Developer'
SDKROOT: '/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk'
MACOSX_DEPLOYMENT_TARGET: '10.14'
steps:
- uses: actions/checkout@v3
with:
submodules: true
- run: rustup update
- run: cargo build --release
working-directory: ./server
- run: mv target/release/oxigraph_server oxigraph_server_${{ github.event.release.tag_name }}_x86_64_apple
- uses: softprops/action-gh-release@v1
with:
files: oxigraph_server_${{ github.event.release.tag_name }}_x86_64_apple
publish_binary_windows:
runs-on: windows-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- run: rustup update
- run: Remove-Item -LiteralPath "C:\msys64\" -Force -Recurse
- run: cargo build --release
working-directory: ./server
- run: mv target/release/oxigraph_server.exe oxigraph_server_${{ github.event.release.tag_name }}_x86_64_windows_msvc.exe
- uses: softprops/action-gh-release@v1
with:
files: oxigraph_server_${{ github.event.release.tag_name }}_x86_64_windows_msvc.exe

@ -1,12 +1,16 @@
name: Change tests name: Change tests
on: on:
push:
branches:
- main
- dev
pull_request: pull_request:
branches: branches:
- main - main
- next - dev
schedule: schedule:
- cron: "12 3 * * *" - cron: "0 0 * * 0"
concurrency: concurrency:
group: ${{ github.workflow }}-${{ github.ref }} group: ${{ github.workflow }}-${{ github.ref }}
@ -16,261 +20,72 @@ jobs:
fmt: fmt:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
- uses: ./.github/actions/setup-rust - run: rustup update && rustup component add rustfmt
with:
component: rustfmt
- run: cargo fmt -- --check - run: cargo fmt -- --check
clippy: clippy:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
submodules: true submodules: true
- uses: ./.github/actions/setup-rust - run: rustup update && rustup component add clippy
with: - run: cargo clippy
version: 1.76.0
component: clippy
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/oxsdatatypes
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/oxrdf working-directory: ./lib/oxrdf
- run: cargo clippy --all-targets -- -D warnings -D clippy::all - run: cargo clippy
working-directory: ./lib/oxrdfxml
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/oxttl
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/oxrdfio
- run: cargo clippy --all-targets --features async-tokio -- -D warnings -D clippy::all
working-directory: ./lib/oxrdfio
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/sparesults
- run: cargo clippy --all-targets --features async-tokio -- -D warnings -D clippy::all
working-directory: ./lib/sparesults working-directory: ./lib/sparesults
- run: cargo clippy --all-targets -- -D warnings -D clippy::all - run: cargo clippy
working-directory: ./lib/spargebra working-directory: ./lib/spargebra
- run: cargo clippy --all-targets -- -D warnings -D clippy::all - run: cargo clippy --all-targets --all-features
working-directory: ./lib/sparopt
- run: cargo clippy --all-targets --no-default-features -- -D warnings -D clippy::all
working-directory: ./lib/oxigraph
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/oxigraph
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./python
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./cli
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./testsuite
clippy_wasm_js:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
with:
version: 1.76.0
target: wasm32-unknown-unknown
component: clippy
- run: cargo clippy --lib --tests --target wasm32-unknown-unknown -- -D warnings -D clippy::all
working-directory: ./js
clippy_wasi:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
with:
version: 1.76.0
target: wasm32-wasi
component: clippy
- run: cargo clippy --lib --tests --target wasm32-wasi -- -D warnings -D clippy::all
working-directory: ./lib/oxigraph
- run: cargo clippy --target wasm32-wasi --features abi3 --no-default-features -- -D warnings -D clippy::all
working-directory: ./python
clippy_wasm_emscripten:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
with:
version: 1.76.0
target: wasm32-unknown-emscripten
component: clippy
- run: cargo clippy --lib --tests --target wasm32-unknown-emscripten -- -D warnings -D clippy::all
working-directory: ./lib/oxigraph
- run: cargo clippy --target wasm32-unknown-emscripten --features abi3 -- -D warnings -D clippy::all
working-directory: ./python
clippy_wasm_unknown:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
with:
version: 1.76.0
target: wasm32-unknown-unknown
component: clippy
- run: cargo clippy --lib --tests --target wasm32-unknown-unknown --features getrandom/custom --features oxsdatatypes/custom-now -- -D warnings -D clippy::all
working-directory: ./lib/oxigraph
deny: deny:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
- uses: taiki-e/install-action@v2 - uses: EmbarkStudios/cargo-deny-action@v1
with: { tool: cargo-deny }
- run: cargo deny check
semver_checks:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
- uses: taiki-e/install-action@v2
with: { tool: cargo-semver-checks }
- uses: actions/cache@v4
with:
path: rocksdb
key: ${{ runner.os }}-rocksdb-8.0.0
- run: bash .github/workflows/install_rocksdb.sh
- run: cargo semver-checks check-release --exclude oxrocksdb-sys --exclude oxigraph-js --exclude pyoxigraph --exclude oxigraph-testsuite --exclude oxigraph-cli
test_linux_x86_64: test_linux:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
submodules: true submodules: true
- uses: ./.github/actions/setup-rust - run: rustup update
- run: cargo test - run: cargo test --all-features
env:
test_linux_i686: RUST_BACKTRACE: 1
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
with:
target: i686-unknown-linux-gnu
- run: sudo apt-get update && sudo apt-get install -y g++-multilib
- run: cargo test --target i686-unknown-linux-gnu --no-default-features --features http-client-rustls-native
working-directory: ./lib/oxigraph
test_linux_msv:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Read MSRV from Cargo.toml
id: metadata
run: echo "rust-version=$(sed -ne 's/rust-version *= *\"\(.*\)\"/\1/p' Cargo.toml)" >> $GITHUB_OUTPUT
- uses: ./.github/actions/setup-rust
with:
version: ${{ steps.metadata.outputs.rust-version }}
- run: rustup toolchain install nightly
- run: rm Cargo.lock && cargo +nightly update -Z direct-minimal-versions && cargo update -p bumpalo --precise 3.14.0
- run: cargo test
test_linux_latest:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
- run: rm Cargo.lock && cargo update
- run: cargo test
test_linux_address_sanitizer: address_sanitizer:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
submodules: true submodules: true
- uses: ./.github/actions/setup-rust - run: rustup update && rustup toolchain install nightly
with: - run: cargo +nightly test --tests --target x86_64-unknown-linux-gnu --workspace --exclude pyoxigraph --exclude oxigraph_testsuite
version: nightly
- run: sudo apt-get update && sudo apt-get install -y llvm
- run: cargo test --tests --target x86_64-unknown-linux-gnu --workspace --exclude pyoxigraph --exclude oxigraph-testsuite --exclude oxigraph-cli
env: env:
RUST_BACKTRACE: 1
RUSTFLAGS: -Z sanitizer=address RUSTFLAGS: -Z sanitizer=address
test_linux_dynamic_linking:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
- uses: actions/cache@v4
with:
path: rocksdb
key: ${{ runner.os }}-rocksdb-8.0.0
- run: bash .github/workflows/install_rocksdb.sh
- run: cargo test --tests --features oxrocksdb-sys/pkg-config
test_windows: test_windows:
runs-on: windows-latest runs-on: windows-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
submodules: true submodules: true
- uses: ./.github/actions/setup-rust - run: rustup update
- run: Remove-Item -LiteralPath "C:\msys64\" -Force -Recurse - run: Remove-Item -LiteralPath "C:\msys64\" -Force -Recurse
- run: cargo test - run: cargo test --all-features
test_wasi:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
with:
target: wasm32-wasi
- uses: taiki-e/install-action@v2
with: { tool: "wasmtime,cargo-wasi" }
- run: cargo wasi test --workspace --exclude oxigraph-js --exclude oxigraph-cli --exclude oxigraph-testsuite --exclude oxrocksdb-sys --exclude pyoxigraph
rustdoc:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
with:
version: 1.76.0
- run: cargo doc
env: env:
RUSTDOCFLAGS: -D warnings RUST_BACKTRACE: 1
js: js:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
- uses: ./.github/actions/setup-rust - run: rustup update
- uses: taiki-e/install-action@v2 - run: cargo install wasm-pack
with: { tool: wasm-pack }
- uses: actions/setup-node@v4
with:
node-version: 18
cache: npm
cache-dependency-path: "js/package.json"
- run: npm install - run: npm install
working-directory: ./js working-directory: ./js
- run: npm test - run: npm test
@ -279,258 +94,27 @@ jobs:
python: python:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
submodules: true submodules: true
- uses: ./.github/actions/setup-rust - run: rustup update
- uses: actions/setup-python@v5 - uses: actions/setup-python@v4
with: with:
python-version: "3.12" python-version: "3.10"
cache: pip
cache-dependency-path: '**/requirements.dev.txt'
- run: pip install -r python/requirements.dev.txt - run: pip install -r python/requirements.dev.txt
- run: maturin build -m python/Cargo.toml - run: python -m black --check --diff --color .
- run: pip install --no-index --find-links=target/wheels/ pyoxigraph working-directory: ./python
- run: rm -r target/wheels - run: maturin sdist -m python/Cargo.toml
- run: pip install target/wheels/*.tar.gz
- run: python -m unittest - run: python -m unittest
working-directory: ./python/tests working-directory: ./python/tests
- run: sphinx-build -M doctest . build
working-directory: ./python/docs
- run: sphinx-build -M html . build - run: sphinx-build -M html . build
working-directory: ./python/docs working-directory: ./python/docs
- run: python generate_stubs.py pyoxigraph pyoxigraph.pyi --ruff - run: python generate_stubs.py pyoxigraph pyoxigraph.pyi --black
working-directory: ./python working-directory: ./python
- run: python -m mypy.stubtest pyoxigraph --allowlist=mypy_allowlist.txt - run: python -m mypy.stubtest pyoxigraph --allowlist=mypy_allowlist.txt
working-directory: ./python working-directory: ./python
- run: python -m mypy generate_stubs.py tests --strict - run: python -m mypy generate_stubs.py tests
working-directory: ./python
- run: python -m ruff format --check .
working-directory: ./python
- run: python -m ruff check --output-format=github .
working-directory: ./python working-directory: ./python
- run: sphinx-lint docs
working-directory: ./python
python_msv:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Read MSRV from Cargo.toml
id: metadata
run: echo "rust-version=$(sed -ne 's/rust-version *= *\"\(.*\)\"/\1/p' Cargo.toml)" >> $GITHUB_OUTPUT
- uses: ./.github/actions/setup-rust
with:
version: ${{ steps.metadata.outputs.rust-version }}
- run: rustup toolchain install nightly
- uses: actions/setup-python@v5
with:
python-version: "3.8"
cache: pip
cache-dependency-path: '**/requirements.dev.txt'
- run: pip install -r python/requirements.dev.txt
- run: rm Cargo.lock && cargo +nightly update -Z direct-minimal-versions
- run: maturin build -m python/Cargo.toml
- run: pip install --no-index --find-links=target/wheels/ pyoxigraph
- run: rm -r target/wheels
- run: python -m unittest
working-directory: ./python/tests
python_pypy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
- uses: actions/setup-python@v5
with:
python-version: "pypy3.10"
cache: pip
cache-dependency-path: '**/requirements.dev.txt'
- run: pip install -r python/requirements.dev.txt
- run: maturin build -m python/Cargo.toml
- run: pip install --no-index --find-links=target/wheels/ pyoxigraph
- run: rm -r target/wheels
- run: python -m unittest
working-directory: ./python/tests
python_windows:
runs-on: windows-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
- uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: pip
cache-dependency-path: '**/requirements.dev.txt'
- run: pip install "maturin~=1.0"
- run: maturin build -m python/Cargo.toml
- run: pip install --no-index --find-links=target/wheels/ pyoxigraph
- run: rm -r target/wheels
- run: python -m unittest
working-directory: ./python/tests
python_pyodide:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: actions/setup-python@v5
with:
python-version: "3.11"
cache: pip
cache-dependency-path: '**/requirements.dev.txt'
- uses: ./.github/actions/setup-rust
with:
version: nightly
target: wasm32-unknown-emscripten
- run: |
pip install pyodide-build
echo EMSCRIPTEN_VERSION=$(pyodide config get emscripten_version) >> $GITHUB_ENV
- uses: mymindstorm/setup-emsdk@v13
with:
version: ${{ env.EMSCRIPTEN_VERSION }}
- run: pyodide build
working-directory: ./python
- run: |
pyodide venv venv
source venv/bin/activate
pip install --no-index --find-links=../dist/ pyoxigraph
python -m unittest
working-directory: ./python/tests
typos:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: taiki-e/install-action@v2
with: { tool: typos-cli }
- run: typos
clang_fmt:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- run: sudo apt-get update && sudo apt-get install -y clang-format
- run: clang-format --Werror --dry-run oxrocksdb-sys/api/*
fuzz_changes:
if: github.event_name == 'pull_request'
runs-on: ubuntu-latest
steps:
- uses: google/clusterfuzzlite/actions/build_fuzzers@v1
with:
language: rust
github-token: ${{ secrets.GITHUB_TOKEN }}
sanitizer: address
storage-repo: https://${{ secrets.FULL_ACCESS_TOKEN }}@github.com/oxigraph/clusterfuzzlite-oxigraph.git
- uses: google/clusterfuzzlite/actions/run_fuzzers@v1
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
fuzz-seconds: 900
mode: code-change
sanitizer: address
minimize-crashes: true
parallel-fuzzing: true
storage-repo: https://${{ secrets.FULL_ACCESS_TOKEN }}@github.com/oxigraph/clusterfuzzlite-oxigraph.git
fuzz_repo:
if: github.event_name != 'pull_request'
runs-on: ubuntu-latest
steps:
- uses: google/clusterfuzzlite/actions/build_fuzzers@v1
with:
language: rust
github-token: ${{ secrets.GITHUB_TOKEN }}
sanitizer: address
- uses: google/clusterfuzzlite/actions/run_fuzzers@v1
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
fuzz-seconds: 7200
mode: batch
sanitizer: address
minimize-crashes: true
parallel-fuzzing: true
storage-repo: https://${{ secrets.FULL_ACCESS_TOKEN }}@github.com/oxigraph/clusterfuzzlite-oxigraph.git
continue-on-error: true
- uses: google/clusterfuzzlite/actions/run_fuzzers@v1
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
fuzz-seconds: 3600
mode: prune
storage-repo: https://${{ secrets.FULL_ACCESS_TOKEN }}@github.com/oxigraph/clusterfuzzlite-oxigraph.git
continue-on-error: true
fuzz_coverage:
if: github.event_name != 'pull_request'
needs: fuzz_repo
runs-on: ubuntu-latest
steps:
- uses: google/clusterfuzzlite/actions/build_fuzzers@v1
with:
language: rust
sanitizer: coverage
- uses: google/clusterfuzzlite/actions/run_fuzzers@v1
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
fuzz-seconds: 3600
mode: coverage
sanitizer: coverage
storage-repo: https://${{ secrets.FULL_ACCESS_TOKEN }}@github.com/oxigraph/clusterfuzzlite-oxigraph.git
shellcheck:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- run: sudo apt-get update && sudo apt-get install -y shellcheck
- run: git grep -l '^#\( *shellcheck \|!\(/bin/\|/usr/bin/env \)\(sh\|bash\|dash\|ksh\)\)' | xargs shellcheck
spec_links:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- run: python lints/test_spec_links.py
debian_compatibility:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/setup-rust
- run: python lints/test_debian_compatibility.py
codspeed:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
- uses: taiki-e/install-action@v2
with: { tool: cargo-codspeed }
- run: cargo codspeed build -p oxigraph --features http-client-native-tls
- run: cargo codspeed build -p oxigraph-testsuite
- uses: CodSpeedHQ/action@v2
with:
run: cargo codspeed run
token: ${{ secrets.CODSPEED_TOKEN }}
codecov:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
- uses: taiki-e/install-action@v2
with: { tool: cargo-llvm-cov }
- run: cargo llvm-cov --codecov --output-path codecov.json
- uses: codecov/codecov-action@v4
with:
files: codecov.json
flags: rust
fail_ci_if_error: true
token: ${{ secrets.CODECOV_TOKEN }}

10
.gitmodules vendored

@ -7,13 +7,9 @@
[submodule "bench/bsbm-tools"] [submodule "bench/bsbm-tools"]
path = bench/bsbm-tools path = bench/bsbm-tools
url = https://github.com/Tpt/bsbm-tools.git url = https://github.com/Tpt/bsbm-tools.git
[submodule "oxrocksdb-sys/rocksdb"]
path = oxrocksdb-sys/rocksdb
url = https://github.com/facebook/rocksdb.git
[submodule "oxrocksdb-sys/lz4"] [submodule "oxrocksdb-sys/lz4"]
path = oxrocksdb-sys/lz4 path = oxrocksdb-sys/lz4
url = https://github.com/lz4/lz4.git url = https://github.com/lz4/lz4.git
[submodule "testsuite/N3"]
path = testsuite/N3
url = https://github.com/w3c/N3.git
branch = master
[submodule "testsuite/rdf-canon"]
path = testsuite/rdf-canon
url = https://github.com/w3c/rdf-canon.git

@ -1,3 +0,0 @@
Thomas Tanon <thomas@pellissier-tanon.fr> <thomaspt@hotmail.fr> <Tpt@users.noreply.github.com>
Thomas Tanon <thomas@pellissier-tanon.fr>
Thomas Tanon <thomas.pellissier-tanon@helsing.ai>

@ -7,7 +7,7 @@ build:
os: "ubuntu-22.04" os: "ubuntu-22.04"
tools: tools:
python: "3" python: "3"
rust: "1.70" rust: "1.61"
apt_packages: apt_packages:
- clang - clang

@ -1,317 +1,3 @@
## [0.4.0-alpha.6] - 2024-03-25
### Changed
- Fixes compatibility with OxIRI 0.2.3.
## [0.4.0-alpha.5] - 2024-03-23
### Added
- Python: `Dataset` class
### Changed
- Rust: `Dataset::canonicalize` and `Graph::canonicalize` takes for input a `CanonicalizationAlgorithm` to set which algorithm to use.
- Upgrades RocksDB to 9.0.0
- JS: Drops NodeJS 12-16 and older web browsers (Edge before Chromium...) support.
## [0.4.0-alpha.4] - 2024-03-07
### Added
- Rust: `From<spargebra::Update` on `Update`.
- `sparesults`: Tokio Async readers for SPARQL query results (XML, JSON and TSV).
- `oxrdf`: `Term::from_terms` constructor.
- JS: options to set query and update base IRI and query union default graph.
## Changed
- Uses RocksDB atomic multi-columns flush.
- Rust: RocksDB is optional but enabled by default.
- Upgrades RocksDB to 8.11.3.
- Rust: `oxigraph` crate has been moved to `lib/oxigraph`.
- Rust: `QueryResults::write` returns the `Write` impl and not `()`.
- Rust: use `thierror` for errors.
- Rust: rename `oxrdfio::{Parse,Syntax}Error` to `oxrdfio::Rdf{Parse,Syntax}Error`,
`oxrdfxml::{Parse,Syntax}Error` to `oxrdfxml::RdfXml{Parse,Syntax}Error`,
`oxttl::{Parse,Syntax}Error` to `oxttl::Turtle{Parse,Syntax}Error`,
`sparesults::{Parse,Syntax}Error` to `sparesults::QueryResults{Parse,Syntax}Error` and
`spargebra::SyntaxError` to `spargebra::SparqlSyntaxError`.
## [0.4.0-alpha.3] - 2024-01-25
### Added
- `oxttl`: expose base IRIs.
- `oxttl`: allows to inject prefixes for serialization.
- `oxrdf`: `vocab::geosparql::WKT_LITERAL`.
### Changed
- Turtle: Fixes parsing bug with escaped dot at the end of local name.
- `oxttl`: Changes `prefixes` getter return type.
- JS: simplify build.
- Python: uses rustls by default all platforms that are not Windows/macOS/iOS/WASM.
- Strips debug info of the Rust std library in release build.
## [0.4.0-alpha.2] - 2024-01-08
### Added
- i686 linux support
### Changed
- Docker: fixes Docker image Glib version error.
- Docker: tags now use semver e.g. `0.3.22` and not `v0.3.22`. Preversions are also not tagged `latest` anymore.
- Python: `QuerySolution` is now thread safe.
## [0.4.0-alpha.1] - 2024-01-03
### Added
- `sparopt` crate: A new still quite naive query optimizer.
- `oxttl` crate: A N-Triples/N-Quads/Turtle/TriG/N3 parser and serializer compatible with Tokio.
- `oxrdfxml` crate: A RDF/XML parser and serializer compatible with Tokio.
- `oxrdfio` crate: A stand-alone crate with oxigraph I/O related APIs.
- Rust: SPARQL results I/O is now exposed in the `oxigraph` crate (`oxigraph::sparql::results` module).
- Rust: It is now possible to dynamically link rocksdb with the `rocksdb-pkg-config` feature.
- Python: error location is now included in some `SyntaxError` exceptions.
- Python: the file type can be guessed from the file path extension during parsing and serialization.
- Python: the serialization method returns a `bytes` value if no output-related argument is given.
- Python: SPARQL query results I/O is now exposed (`parse_query_results` function and `.serialize` method).
- Python: `RdfFormat` and `QueryResultsFormat` enum to encode supported formats.
- CLI: a `convert` command to convert RDF file between different formats.
### Removed
- Rust: automated flush at the end of serialization. This should be done explicitly now.
- oxsdatatypes: Deprecated methods.
- Python: 3.7 and Musl linux 1.1 support.
- Python: `GraphName.value`.
### Changed
- SPARQL: a digit after `.` is now required for `xsd:decimal`.
- SPARQL: calendar subtraction returns `xsd:dayTimeDuration` and not `xsd:duration`.
- SPARQL: Unicode escapes (`\u` and `\U`) are now only supported in IRIs and strings and not everywhere.
- Literal serialization now produces canonical N-Triples according to the RDF 1.2 and RDF Dataset Canonicalization drafts
- Rust: MSRV is now 1.70.
- Rust Makes `GraphName` implement `Default`.
- Rust: `wasm32-unknown-unknown` does not assumes JS platform by default. Enable the `js` feature for that.
- Rust: Parsers take `Read` and not `BufRead` for input.
- Rust: `GraphFormat` and `DatasetFormat` have been merged into `RdfFormat`.
- Rust: `GraphParser` and `DatasetParser` have been merged into `RdfParser`.
- Rust: `GraphSerializer` and `DatasetSerializer` have been merged into `RdfSerializer`.
- Rust: query results are now `Send` and `Sync`.
- Rust: `Store.load_graph` and `Store.load_dataset` have been merged into a `load_from_read` method.
- Rust: `Store.dump_graph` and `Store.dump_dataset` have been renamed to `dump_graph_to_write` and `dump_to_write`.
- Rust: `BulkLoader.set_*` methods have been renamed to `BulkLoader.with_*`.
- oxsdatatypes: pass by-values instead of by-reference parameters when relevant.
- oxsdatatypes: error types have been redesigned.
- oxsdatatypes: return an error when building not serializable duration (year-month and day-time of opposite signs).
- sparesults: renames some methods to move closer to the new oxrdfio crate.
- Python: raise `OSError` instead of `IOError` on OS errors.
- Python: the `mime_type` parameter have been renamed to `format`.
- Python: boolean SPARQL results are now encoded with a `QueryBoolean` class and not a simple `bool`.
- Python: a `path` parameter has been added to all I/O method to read from a file.
The existing `input` parameter now consider `str` values to be a serialization to parse.
- JS: the `mime_type` parameter have been renamed to `format`.
- CLI: the `oxigraph_server` binary has been renamed to `oxigraph`.
- CLI: the `--location` argument is now part of sub-commands where it is relevant.
`oxigraph_server --location foo serve` is not possible anymore.
One need to write `oxigraph serve --location foo`.
- CLI: is is now possible to upload gzip encoded files to the HTTP API with the `Content-Encoding: gzip` header.
## [0.3.22] - 2023-11-29
### Changed
- Allows to compile with more recent `bindgen` and `cc`
- Fixes compatibility with `spin_no_std` feature of `lazy_static`
## [0.3.21] - 2023-11-29
### Changed
- Bulk loader: do not fail when loading empty files.
- Python: fixes source distribution.
- Upgrades RocksDB to 7.8.1.
## [0.3.20] - 2023-10-23
### Changed
- SPARQL: fixes `STR` evaluation on small IRI (less than 16 bytes) that was broken.
- SPARQL update: fixes `WHERE` clause evaluation that was generating too many solutions in some cases.
- Upgrades RocksDB to 8.7.1.
## [0.3.19] - 2023-08-18
### Added
- Python: allows to give `pathlib.Path` argument when a path is expected.
- Cargo.toml: add a documentation field to link to docs.rs documentation.
### Changed
- Upgrades RocksDB to 8.3.2.
## [0.3.18] - 2023-06-13
### Changed
- SPARQL: fixes evaluation of `||` when all alternatives are `false`. The results must be `false` and not an error.
- SPARQL: `xsd:duration` it is now possible to properly parse and serialize all numbers that can be internally represented by the encoding used by Oxigraph.
- Python: fixes `Store.contains_named_graph` return type annotation.
## [0.3.17] - 2023-06-11
### Added
- SPARQL: support of `xsd:duration` to SPARQL `SUM` aggregate function.
- Server: support for systemd "notify" service option.
- Server: uses the target graph name as base URI when `POST`ing and `PUT`ing data.
- Python: `Store.contains_named_graph` method.
### Changed
- SPARQL: `xsd:decimal` multiplication and division have been improved to avoid raising overflow because of internal operations in some cases.
- SPARQL: `xsd:decimal` parser is now able to properly parse all numbers that can be internally represented by the encoding used by Oxigraph.
- SPARQL: `xsd:decimal` avoids bad overflow on unary `-` operator.
- SPARQL: reduces loss of precision when casting `xsd:decimal` to `xsd:float` or `xsd:double`.
- SPARQL: improves a bit the AST in case of a lot of `&&`, `||` or `IN` operators.
- SPARQL: fixes some optimization bugs around `MINUS` inside `OPTIONAL`.
- SPARQL: makes for loop join choice instead of hash join a bit more aggressive.
- Avoids random crashes when building RocksDB for aarch64.
- Python: fixes support of text I/O with not-ASCII char.
- Python: upgrades PyO3 to 0.19.
- `spargebra`: `GroundTermPattern::Triple` is now properly gated behind the `"rdf-star"` feature.
- `oxsdatatypes`: Deprecates `is_naan` that is renamed to `is_nan` (we like bread but prefer numbers).
- `oxsdatatypes`: Adds `checked_neg` methods on numerical types.
- `oxsdatatypes`: Drops `nom` dependency and uses hand-written parsers instead.
## [0.3.16] - 2023-04-29
### Changed
- Fixes flush and compaction on the GSPO index. It might improve Oxigraph performances and storage space.
- SPARQL: fixes some optimizations in presence quoted triples with nested variables.
- SPARQL profiler: adds EXISTS operation to the explanation and profiling tree.
- Upgrades RocksDB to 8.1.1.
## [0.3.15] - 2023-04-18
### Added
- Rust: adds `Store.explain_query_opt` method that allows to get an explanation of the evaluation with profiling statistics.
- Server: adds explanation and profiling to the `query` action (`--explain`, `--explain-file` and `--stats` options).
- Python: implements the `__match_args__` attribute on terms (literals, triples, quads...) to allow `match` operator usage.
- Server: adds the `--cors` option to the `serve` actions to allow cross-origin resource sharing.
### Changed
- SPARQL: fixes evaluation of empty aggregation without GROUP BY: aggregators now return their default value (0 for COUNT...) and not an empty row.
- SPARQL: fixes parsing of decimal with more than 19 digits in the fractional part.
- Server docker image: switch to the smaller distroless base images.
- Bulk loader: by default only uses 2 concurrent threads and around 2GB of RAM.
- Server load: progress is now printed to stderr much more regularly.
## [0.3.14] - 2023-03-19
### Added
- Read only and secondary RocksDB storage. Allows to open the data as read-only or to follow a primary instance.
- Server: adds multiple commands:
- `serve-secondary` and `serve-read-only` to serve the HTTP server in secondary and read-only modes.
- `dump` to dump the database content to a file.
- `query` and `update` to execute SPARQL query and updates.
- `backup` to do a database backup.
- `optimize` to optimize the database storage.
- Server: adds `format` and `graph` options to the `load` command in order to select the loaded data format and the target graph.
- Python: `Store.extend` and `Store.bulk_extend` methods.
- Python: allows to pickle basic data model classes.
### Changed
- Upgrades RocksDB to 8.0.0.
- Server: listening to localhost now properly listen to both IPv4 and IPv6.
- RDF/XML and XML parser results: avoid an ever growing buffer.
- JSON SPARQL results: allows the "head" key to be at the end of the document.
- TSV SPARQL results: properly quote `\t` and print trailing line jumps.
- `Term::from_str`: fixes parsing of blank nodes in object position of quoted triples.
- `QuerySolution`: implements `Eq` and `Debug`.
- JS: Reduces WASM build size.
- OxRDF: fixes `Graph` and `Dataset` serialization to output proper N-Triples and N-Quads.
## [0.3.13] - 2023-02-23
### Changed
- Fixes serialization of times with a decimal number of seconds lower than 10 (now `01:01:01.01` and not `01:01:1.01`).
- Turtle and TriG: fixes parsing for prefixes named after keywords (`prefix`, `base`, `graph`, `true` and `false`).
## [0.3.12] - 2023-02-18
### Added
- `From<NamedOrBlankNode>` for `GraphName` (and similarly for `*Ref`).
- Prebuilt Python wheels for Linux with [musl libc](https://www.musl-libc.org/).
### Changed
- Fixes TriG serialization.
- `QueryDataset` is now properly exposed in the public Rust API.
## [0.3.11] - 2023-01-18
### Added
- SPARQL: Implementation of the `LATERAL` operator following [SPARQL SEP-0006](https://github.com/w3c/sparql-12/blob/main/SEP/SEP-0006/sep-0006.md).
Support is behind the `sep-0006` feature in `spargebra` and enabled by default in Oxigraph.
- SPARQL: Implementation of the `ADJUST` function following [SPARQL SEP-0002](https://github.com/w3c/sparql-12/blob/main/SEP/SEP-0002/sep-0002.md).
Support is behind the `sep-0002` feature in `spargebra` and enabled by default in Oxigraph.
- Rust: There is a new stand-alone crate `oxsdatatypes` implementing Rust structs for the common XML schema datatypes.
It was part of the Oxigraph crate and it might be useful for other SPARQL or XPath implementations.
- Rust: The `oxigraph` crate can now be compiled for `wasm32-wasi` with the basic in-memory backend but without RocksDB.
### Changed
- SPARQL: The property path evaluator was sometime emitting duplicates when evaluating disjunctive patterns (`a|b`). It is now fixed.
- SPARQL: If written explicitly in the SPARQL query, the regular expressions are now compiled once and not for each row.
- SPARQL: Property path evaluation with both start and end variables bound has been optimized.
- SPARQL: Casts to `xsd:decimal` from `xsd:float` and `xsd:double` now properly fails on overflow instead of overflowing the internal 128-bits representation.
This follows [XPath casting rules](https://www.w3.org/TR/xpath-functions-31/#casting-to-decimal).
- Rust: The minimal supported Rust version is set at 1.60 and enforced using the CI.
- Python: Local builds will now target the specific Python version instead of [abi3](https://docs.python.org/3/c-api/stable.html).
abi3 wheels are still released on Pypi alongside new Python-version specific builds for Linux+GNU.
- SPARQL: Fixes a panic when the estimated upper bound of the results size was overflowing a `usize`.
- Python: Uses `typing.IO` in Python stubs instead of narrower interfaces.
- Upgrades RocksDB to 7.9.2, `quick-xml` to 0.27 and `pyo3` to 0.18.
## [0.3.10] - 2022-12-21
### Added
- SPARQL: Property path with unbound graph are now evaluated.
For example, `SELECT * WHERE { GRAPH ?g { ?s ex:p+ ?o } }` now works.
- SPARQL: The query optimizer is now fuzzed for better testing.
### Changed
- SPARQL: Evaluation of zero-length paths with both start and end unbounded now return only terms defined in the current graph but not terms only defined in the query.
For example, in `SELECT * WHERE { VALUES ?s { 1 } { ?s ex:p? ?o } }` the literal `1` won't be returned anymore if not in the queried graph.
- Python: type stubs are now compatible with Python 3.7 and Mypy strict mode.
- RDF/XML: allows entities declared using other entities.
- Upgrades `quick-xml` to 0.26.
## [0.3.9] - 2022-12-07
### Added
- Server: The `/store` endpoints now has a `no_transaction` HTTP option for `POST` and `PUT` request to get better performances at the cost of transactional guarantees.
- Server: The `/store` endpoints now has a `lenient` HTTP option for `POST` and `PUT` request to ignore syntax errors (requires the `no_transaction` option).
- Server: allows path that are not valid UTF-8 in file path CLI arguments.
- Rust: `From<spargebra::Query>` to `oxigraph::Query` (thanks to @hobofan).
### Changed
- SPARQL: `NOW()` function properly returns the current time and not 1970-01-01
- SPARQL: fixes serialization of SPARQL queries (property path and STRSTARTS function).
- SPARQL: slightly optimize aggregates by avoiding an unneeded projection.
- SPARQL: the parser now cleanly fails if invalid `VALUES` clauses are provided.
- SPARQL: In DELETE/INSERT UPDATE the currently written values can't be read anymore ("Halloween problem").
- `oxrdf`: makes Clippy run without warnings when `rdf-star` is disable.
- Python: makes type annotations compatible with Python 3.7.
- Python: makes sure the parameter default value is always included in the type annotation.
## [0.3.8] - 2022-10-22 ## [0.3.8] - 2022-10-22
### Changed ### Changed
@ -440,8 +126,8 @@
## [0.3.0-beta.1] - 2022-01-29 ## [0.3.0-beta.1] - 2022-01-29
### Added ### Added
- [RDF-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html) support. `Triple` is now a possible `Term`. Serialization formats and SPARQL support have been updated to match the [latest version of the specification draft](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html). - [RDF-star](https://w3c.github.io/rdf-star/cg-spec) support. `Triple` is now a possible `Term`. Serialization formats and SPARQL support have been updated to match the [latest version of the specification draft](https://w3c.github.io/rdf-star/cg-spec/2021-07-01.html).
- Fast data bulk load with the `Store` `bulk_load_dataset` and `bulk_load_graph` methods and a special command-line option of the server. - Fast data bulk load with the `Store` `bulk_load_dataset` and `bulk_load_graph` methods and a special command line option of the server.
- It is now possible to quickly backup the database using the `backup` method. - It is now possible to quickly backup the database using the `backup` method.
- Rust: `*Syntax::from_extension` to easy guess a graph/dataset/sparql result format from a file extension. - Rust: `*Syntax::from_extension` to easy guess a graph/dataset/sparql result format from a file extension.
- Rust: Custom SPARQL functions are now supported using `QueryOptions::with_custom_function`. - Rust: Custom SPARQL functions are now supported using `QueryOptions::with_custom_function`.
@ -450,14 +136,14 @@
- `Store` operations are now transactional using the "repeatable read" isolation level: - `Store` operations are now transactional using the "repeatable read" isolation level:
the store only exposes changes that have been "committed" (i.e. no partial writes) and the exposed state does not change for the complete duration of a read operation (e.g. a SPARQL query) or a read/write operation (e.g. a SPARQL update). the store only exposes changes that have been "committed" (i.e. no partial writes) and the exposed state does not change for the complete duration of a read operation (e.g. a SPARQL query) or a read/write operation (e.g. a SPARQL update).
the `Store` `transaction` method now allows to do read/write transactions. the `Store` `transaction` method now allows to do read/write transactions.
-`RDF-star <https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html>`_ is now supported (including serialization formats and SPARQL-star). :py:class:`.Triple` can now be used in :py:attr:`.Triple.object`, :py:attr:`.Triple.object`, :py:attr:`.Quad.subject` and :py:attr:`.Quad.object`. -`RDF-star <https://w3c.github.io/rdf-star/cg-spec>`_ is now supported (including serialization formats and SPARQL-star). :py:class:`.Triple` can now be used in :py:attr:`.Triple.object`, :py:attr:`.Triple.object`, :py:attr:`.Quad.subject` and :py:attr:`.Quad.object`.
### Changed ### Changed
- SPARQL: It is now possible to compare `rdf:langString` literals with the same language tag. - SPARQL: It is now possible to compare `rdf:langString` literals with the same language tag.
- SPARQL: The parser now validates more carefully the inputs following the SPARQL specification and test suite. - SPARQL: The parser now validates more carefully the inputs following the SPARQL specification and test suite.
- SPARQL: Variable scoping was buggy with "FILTER EXISTS". It is now fixed. - SPARQL: Variable scoping was buggy with "FILTER EXISTS". It is now fixed.
- Rust: RDF model, SPARQL parser and SPARQL result parsers have been moved to stand-alone reusable libraries. - Rust: RDF model, SPARQL parser and SPARQL result parsers have been moved to stand-alone reusable libraries.
- Rust: HTTPS is not supported by default with the `http_client` option. You need to enable the `native-tls` or the `rustls` feature of the `oxhttp` crate to enable a TLS layer. - Rust: HTTPS is not supported by default with the `http_client` option. You need to enable the `native-tls` or the `rustls` feature of the `oxhttp` crate to enable a TSL layer.
- Rust: The error types have been cleaned. - Rust: The error types have been cleaned.
Most of the `Store` methods now return a `StorageError` that is more descriptive than the previous `std::io::Error`. Most of the `Store` methods now return a `StorageError` that is more descriptive than the previous `std::io::Error`.
The new error type all implements `Into<std::io::Error>` for easy conversion. The new error type all implements `Into<std::io::Error>` for easy conversion.
@ -465,12 +151,12 @@
It is The used type of the `subject` field of the `Triple` and `Quad` structs. It is The used type of the `subject` field of the `Triple` and `Quad` structs.
- Rust: The SPARQL algebra is not anymore publicly exposed in the `oxigraph` crate. The new `oxalgebra` crate exposes it. - Rust: The SPARQL algebra is not anymore publicly exposed in the `oxigraph` crate. The new `oxalgebra` crate exposes it.
- Rust: `UpdateOptions` API have been rewritten. It can now be built using `From<QueryOptions>` or `Default`. - Rust: `UpdateOptions` API have been rewritten. It can now be built using `From<QueryOptions>` or `Default`.
- Server: The command-line API has been redesign. See the [server README](server/README.md) for more information. - Server: The command line API has been redesign. See the [server README](server/README.md) for more information.
- Server: The HTTP implementation is now provided by [`oxhttp`](https://github.com/oxigraph/oxhttp). - Server: The HTTP implementation is now provided by [`oxhttp`](https://github.com/oxigraph/oxhttp).
- Server: The HTTP response bodies are now generated on the fly instead of being buffered. - Server: The HTTP response bodies are now generated on the fly instead of being buffered.
- Python: The `SledStore` and `MemoryStore` classes have been removed in favor of the `Store` class. - Python: The `SledStore` and `MemoryStore` classes have been removed in favor of the `Store` class.
- JS: The `MemoryStore` class has been renamed to `Store`. - JS: The `MemoryStore` class has been renamed to `Store`.
- JS: The [RDF/JS `DataFactory` interface](http://rdf.js.org/data-model-spec/#datafactory-interface) is now implemented by the `oxigraph` module itself and the `MemoryStore.dataFactory` property has been removed. - JS: The [RDF/JS `DataFactory` interface](http://rdf.js.org/data-model-spec/#datafactory-interface) is now implemented by the `oxigraph` module itself and the `MemoryStore.dataFactory` propery has been removed.
- The implementation of SPARQL evaluation has been improved for better performances (especially joins). - The implementation of SPARQL evaluation has been improved for better performances (especially joins).
- The TLS implementation used in SPARQL HTTP calls is now [rustls](https://github.com/rustls/rustls) and not [native-tls](https://github.com/sfackler/rust-native-tls). The host system certificate registry is still used. - The TLS implementation used in SPARQL HTTP calls is now [rustls](https://github.com/rustls/rustls) and not [native-tls](https://github.com/sfackler/rust-native-tls). The host system certificate registry is still used.
- Spargebra: The basic RDF terms are now the ones of the `oxrdf` crate. - Spargebra: The basic RDF terms are now the ones of the `oxrdf` crate.
@ -486,7 +172,7 @@ Many thanks to [Thad Guidry](https://github.com/thadguidry), [James Overton](htt
## [0.2.5] - 2021-07-11 ## [0.2.5] - 2021-07-11
### Added ### Added
- [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) parser. - [SPARQL 1.1 Query Results JSON Format](http://www.w3.org/TR/sparql11-results-json/) parser.
- Python wheels for macOS are now universal2 binaries. - Python wheels for macOS are now universal2 binaries.
### Changed ### Changed

@ -5,10 +5,7 @@ authors:
family-names: Pellissier Tanon family-names: Pellissier Tanon
email: thomas@pellissier-tanon.fr email: thomas@pellissier-tanon.fr
orcid: "https://orcid.org/0000-0002-0620-6486" orcid: "https://orcid.org/0000-0002-0620-6486"
doi: 10.5281/zenodo.7408022 license: "MIT OR Apache-2.0"
license:
- Apache-2.0
- MIT
message: "If you use this software, please cite it as below." message: "If you use this software, please cite it as below."
repository-code: "https://github.com/oxigraph/oxigraph" repository-code: "https://github.com/oxigraph/oxigraph"
title: Oxigraph title: Oxigraph

1490
Cargo.lock generated

File diff suppressed because it is too large Load Diff

@ -1,272 +1,16 @@
[workspace] [workspace]
members = [ members = [
"js", "js",
"lib/oxigraph", "lib",
"lib/oxrdf", "lib/oxrdf",
"lib/oxrdfio",
"lib/oxrdfxml",
"lib/oxsdatatypes",
"lib/oxttl",
"lib/sparesults",
"lib/spargebra", "lib/spargebra",
"lib/sparopt", "lib/sparesults",
"lib/sparql-smith", "python",
"oxrocksdb-sys",
"server",
"testsuite" "testsuite"
] ]
resolver = "2"
[workspace.package]
version = "0.4.0-alpha.7-dev"
authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license = "MIT OR Apache-2.0"
edition = "2021"
rust-version = "1.70"
[workspace.dependencies]
rocksdb = {git = "https://git.nextgraph.org/NextGraph/rust-rocksdb.git", branch = "master", features = [ ] }
serde = { version = "1.0.142", features = ["derive"] }
anyhow = "1.0.72"
arbitrary = "1.3"
assert_cmd = "2.0"
assert_fs = "1.0"
bindgen = ">=0.60, <0.70"
cc = "1.0.73"
clap = "4.0"
codspeed-criterion-compat = "2.3.3"
console_error_panic_hook = "0.1.7"
digest = "0.10"
flate2 = "1.0"
getrandom = "0.2.8"
hex = "0.4"
js-sys = "0.3.60"
json-event-parser = "0.2.0-alpha.2"
md-5 = "0.10"
memchr = "2.5"
oxilangtag = "0.1"
oxiri = "0.2.3"
peg = "0.8"
pkg-config = "0.3.25"
predicates = ">=2.0, <4.0"
pyo3 = "0.21.0"
quick-xml = ">=0.29, <0.32"
rand = "0.8"
rayon-core = "1.11"
regex = "1.7"
sha1 = "0.10"
sha2 = "0.10"
siphasher = ">=0.3, <2.0"
text-diff = "0.4"
thiserror = "1.0.50"
time = "0.3"
tokio = "1.29"
url = "2.4"
wasm-bindgen = "0.2.83"
zstd = ">=0.12, <0.14"
# Internal dependencies
oxigraph = { version = "=0.4.0-alpha.7-dev", path = "lib/oxigraph" }
oxrdf = { version = "=0.2.0-alpha.4", path = "lib/oxrdf" }
oxrdfio = { version = "=0.1.0-alpha.5", path = "lib/oxrdfio" }
oxrdfxml = { version = "=0.1.0-alpha.5", path = "lib/oxrdfxml" }
oxsdatatypes = { version = "=0.2.0-alpha.1", path = "lib/oxsdatatypes" }
oxttl = { version = "=0.1.0-alpha.5", path = "lib/oxttl" }
sparesults = { version = "=0.2.0-alpha.4", path = "lib/sparesults" }
spargebra = { version = "=0.3.0-alpha.4", path = "lib/spargebra" }
sparopt = { version = "=0.1.0-alpha.5-dev", path = "lib/sparopt" }
[workspace.lints.rust]
absolute_paths_not_starting_with_crate = "warn"
elided_lifetimes_in_paths = "warn"
explicit_outlives_requirements = "warn"
let_underscore_drop = "warn"
macro_use_extern_crate = "warn"
# TODO missing_docs = "warn"
trivial_casts = "warn"
trivial_numeric_casts = "warn"
unsafe_code = "warn"
unused_import_braces = "warn"
unused_lifetimes = "warn"
unused_macro_rules = "warn"
unused_qualifications = "warn"
[workspace.lints.clippy]
allow_attributes = "warn"
allow_attributes_without_reason = "warn"
as_underscore = "warn"
assertions_on_result_states = "warn"
bool_to_int_with_if = "warn"
borrow_as_ptr = "warn"
case_sensitive_file_extension_comparisons = "warn"
cast_lossless = "warn"
cast_possible_truncation = "warn"
cast_possible_wrap = "warn"
cast_precision_loss = "warn"
cast_ptr_alignment = "warn"
cast_sign_loss = "warn"
checked_conversions = "warn"
clone_on_ref_ptr = "warn"
cloned_instead_of_copied = "warn"
copy_iterator = "warn"
create_dir = "warn"
dbg_macro = "warn"
decimal_literal_representation = "warn"
default_trait_access = "warn"
default_union_representation = "warn"
deref_by_slicing = "warn"
disallowed_script_idents = "warn"
doc_link_with_quotes = "warn"
empty_drop = "warn"
empty_enum = "warn"
empty_structs_with_brackets = "warn"
enum_glob_use = "warn"
error_impl_error = "warn"
exit = "warn"
expect_used = "warn"
expl_impl_clone_on_copy = "warn"
explicit_deref_methods = "warn"
explicit_into_iter_loop = "warn"
explicit_iter_loop = "warn"
filetype_is_file = "warn"
filter_map_next = "warn"
flat_map_option = "warn"
fn_params_excessive_bools = "warn"
fn_to_numeric_cast_any = "warn"
format_push_string = "warn"
from_iter_instead_of_collect = "warn"
get_unwrap = "warn"
host_endian_bytes = "warn"
if_not_else = "warn"
if_then_some_else_none = "warn"
ignored_unit_patterns = "warn"
implicit_clone = "warn"
implicit_hasher = "warn"
inconsistent_struct_constructor = "warn"
index_refutable_slice = "warn"
inefficient_to_string = "warn"
infinite_loop = "warn"
inline_always = "warn"
inline_asm_x86_att_syntax = "warn"
inline_asm_x86_intel_syntax = "warn"
into_iter_without_iter = "warn"
invalid_upcast_comparisons = "warn"
items_after_statements = "warn"
iter_not_returning_iterator = "warn"
iter_without_into_iter = "warn"
large_digit_groups = "warn"
large_futures = "warn"
large_include_file = "warn"
large_stack_arrays = "warn"
large_types_passed_by_value = "warn"
let_underscore_must_use = "warn"
let_underscore_untyped = "warn"
linkedlist = "warn"
lossy_float_literal = "warn"
macro_use_imports = "warn"
manual_assert = "warn"
manual_instant_elapsed = "warn"
manual_let_else = "warn"
manual_ok_or = "warn"
manual_string_new = "warn"
many_single_char_names = "warn"
map_unwrap_or = "warn"
match_bool = "warn"
match_on_vec_items = "warn"
match_same_arms = "warn"
match_wild_err_arm = "warn"
match_wildcard_for_single_variants = "warn"
maybe_infinite_iter = "warn"
mem_forget = "warn"
mismatching_type_param_order = "warn"
missing_assert_message = "warn"
missing_asserts_for_indexing = "warn"
missing_fields_in_debug = "warn"
multiple_inherent_impl = "warn"
mut_mut = "warn"
mutex_atomic = "warn"
naive_bytecount = "warn"
needless_bitwise_bool = "warn"
needless_continue = "warn"
needless_for_each = "warn"
needless_pass_by_value = "warn"
needless_raw_string_hashes = "warn"
needless_raw_strings = "warn"
negative_feature_names = "warn"
no_effect_underscore_binding = "warn"
no_mangle_with_rust_abi = "warn"
non_ascii_literal = "warn"
panic = "warn"
panic_in_result_fn = "warn"
partial_pub_fields = "warn"
print_stderr = "warn"
print_stdout = "warn"
ptr_as_ptr = "warn"
ptr_cast_constness = "warn"
pub_without_shorthand = "warn"
range_minus_one = "warn"
range_plus_one = "warn"
rc_buffer = "warn"
rc_mutex = "warn"
redundant_closure_for_method_calls = "warn"
redundant_else = "warn"
redundant_feature_names = "warn"
redundant_type_annotations = "warn"
ref_binding_to_reference = "warn"
ref_option_ref = "warn"
ref_patterns = "warn"
rest_pat_in_fully_bound_structs = "warn"
return_self_not_must_use = "warn"
same_functions_in_if_condition = "warn"
same_name_method = "warn"
semicolon_inside_block = "warn"
shadow_same = "warn"
should_panic_without_expect = "warn"
single_match_else = "warn"
stable_sort_primitive = "warn"
str_to_string = "warn"
string_add = "warn"
string_add_assign = "warn"
string_lit_chars_any = "warn"
string_to_string = "warn"
struct_excessive_bools = "warn"
struct_field_names = "warn"
suspicious_xor_used_as_pow = "warn"
tests_outside_test_module = "warn"
todo = "warn"
transmute_ptr_to_ptr = "warn"
trivially_copy_pass_by_ref = "warn"
try_err = "warn"
unchecked_duration_subtraction = "warn"
undocumented_unsafe_blocks = "warn"
unicode_not_nfc = "warn"
unimplemented = "warn"
uninlined_format_args = "warn"
unnecessary_box_returns = "warn"
unnecessary_join = "warn"
unnecessary_safety_comment = "warn"
unnecessary_safety_doc = "warn"
unnecessary_self_imports = "warn"
unnecessary_wraps = "warn"
unneeded_field_pattern = "warn"
unnested_or_patterns = "warn"
unreadable_literal = "warn"
unsafe_derive_deserialize = "warn"
unseparated_literal_suffix = "warn"
unused_async = "warn"
unused_self = "warn"
unwrap_in_result = "warn"
use_debug = "warn"
used_underscore_binding = "warn"
verbose_bit_mask = "warn"
verbose_file_reads = "warn"
wildcard_dependencies = "warn"
zero_sized_map_values = "warn"
[profile.release] [profile.release]
lto = true lto = true
codegen-units = 1 codegen-units = 1
strip = "debuginfo"
[profile.release.package.oxigraph-js]
codegen-units = 1
opt-level = "z"
strip = "debuginfo"

@ -4,10 +4,9 @@
[![Released API docs](https://docs.rs/oxigraph/badge.svg)](https://docs.rs/oxigraph) [![Released API docs](https://docs.rs/oxigraph/badge.svg)](https://docs.rs/oxigraph)
[![PyPI](https://img.shields.io/pypi/v/pyoxigraph)](https://pypi.org/project/pyoxigraph/) [![PyPI](https://img.shields.io/pypi/v/pyoxigraph)](https://pypi.org/project/pyoxigraph/)
[![npm](https://img.shields.io/npm/v/oxigraph)](https://www.npmjs.com/package/oxigraph) [![npm](https://img.shields.io/npm/v/oxigraph)](https://www.npmjs.com/package/oxigraph)
[![tests status](https://github.com/oxigraph/oxigraph/actions/workflows/tests.yml/badge.svg)](https://github.com/oxigraph/oxigraph/actions) [![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![artifacts status](https://github.com/oxigraph/oxigraph/actions/workflows/artifacts.yml/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![dependency status](https://deps.rs/repo/github/oxigraph/oxigraph/status.svg)](https://deps.rs/repo/github/oxigraph/oxigraph) [![dependency status](https://deps.rs/repo/github/oxigraph/oxigraph/status.svg)](https://deps.rs/repo/github/oxigraph/oxigraph)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community) [![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
[![Twitter URL](https://img.shields.io/twitter/url?style=social&url=https%3A%2F%2Ftwitter.com%2Foxigraph)](https://twitter.com/oxigraph) [![Twitter URL](https://img.shields.io/twitter/url?style=social&url=https%3A%2F%2Ftwitter.com%2Foxigraph)](https://twitter.com/oxigraph)
Oxigraph is a graph database implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard. Oxigraph is a graph database implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard.
@ -20,12 +19,6 @@ Oxigraph is in heavy development and SPARQL query evaluation has not been optimi
The development roadmap is using [GitHub milestones](https://github.com/oxigraph/oxigraph/milestones?direction=desc&sort=completeness&state=open). The development roadmap is using [GitHub milestones](https://github.com/oxigraph/oxigraph/milestones?direction=desc&sort=completeness&state=open).
Oxigraph internal design [is described on the wiki](https://github.com/oxigraph/oxigraph/wiki/Architecture). Oxigraph internal design [is described on the wiki](https://github.com/oxigraph/oxigraph/wiki/Architecture).
Oxigraph implements the following specifications:
- [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/), [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/), and [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/).
- [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/), and [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) RDF serialization formats for both data ingestion and retrieval.
- [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/).
It is split into multiple parts: It is split into multiple parts:
- [The database written as a Rust library](https://crates.io/crates/oxigraph). Its source code is in the `lib` directory. - [The database written as a Rust library](https://crates.io/crates/oxigraph). Its source code is in the `lib` directory.
@ -34,22 +27,15 @@ It is split into multiple parts:
- [`pyoxigraph` that exposes Oxigraph to the Python world](https://pyoxigraph.readthedocs.io/). Its source code is in the `python` directory. [![PyPI](https://img.shields.io/pypi/v/pyoxigraph)](https://pypi.org/project/pyoxigraph/) - [`pyoxigraph` that exposes Oxigraph to the Python world](https://pyoxigraph.readthedocs.io/). Its source code is in the `python` directory. [![PyPI](https://img.shields.io/pypi/v/pyoxigraph)](https://pypi.org/project/pyoxigraph/)
- [JavaScript bindings for Oxigraph](https://www.npmjs.com/package/oxigraph). WebAssembly is used to package Oxigraph into a NodeJS compatible NPM package. Its source code is in the `js` directory. - [JavaScript bindings for Oxigraph](https://www.npmjs.com/package/oxigraph). WebAssembly is used to package Oxigraph into a NodeJS compatible NPM package. Its source code is in the `js` directory.
[![npm](https://img.shields.io/npm/v/oxigraph)](https://www.npmjs.com/package/oxigraph) [![npm](https://img.shields.io/npm/v/oxigraph)](https://www.npmjs.com/package/oxigraph)
- [Oxigraph binary](https://crates.io/crates/oxigraph-cli) that provides a standalone command-line tool allowing to manipulate RDF data and spawn a a web server implementing the [SPARQL 1.1 Protocol](https://www.w3.org/TR/sparql11-protocol/) and the [SPARQL 1.1 Graph Store Protocol](https://www.w3.org/TR/sparql11-http-rdf-update/). Its source code is in the `cli` directory. - [Oxigraph server](https://crates.io/crates/oxigraph_server) that provides a standalone binary of a web server implementing the [SPARQL 1.1 Protocol](https://www.w3.org/TR/sparql11-protocol/) and the [SPARQL 1.1 Graph Store Protocol](https://www.w3.org/TR/sparql11-http-rdf-update/). Its source code is in the `server` directory.
Note that it was previously named [Oxigraph server](https://crates.io/crates/oxigraph-server). [![Latest Version](https://img.shields.io/crates/v/oxigraph_server.svg)](https://crates.io/crates/oxigraph_server)
[![Latest Version](https://img.shields.io/crates/v/oxigraph-cli.svg)](https://crates.io/crates/oxigraph-cli) [![Docker Image Version (latest semver)](https://img.shields.io/docker/v/oxigraph/oxigraph?sort=semver)](https://hub.docker.com/r/oxigraph/oxigraph)
Also, some parts of Oxigraph are available as standalone Rust crates: Oxigraph implements the following specifications:
* [`oxrdf`](https://crates.io/crates/oxrdf), datastructures encoding RDF basic concepts (the [`oxigraph::model`](crate::model) module).
* [`oxrdfio`](https://crates.io/crates/oxrdfio), a unified parser and serializer API for RDF formats (the [`oxigraph::io`](crate::io) module). It itself relies on: - [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/), [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/), and [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/).
* [`oxttl`](https://crates.io/crates/oxttl), N-Triple, N-Quad, Turtle, TriG and N3 parsing and serialization. - [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/), and [RDF XML](https://www.w3.org/TR/rdf-syntax-grammar/) RDF serialization formats for both data ingestion and retrieval using the [Rio library](https://github.com/oxigraph/rio).
* [`oxrdfxml`](https://crates.io/crates/oxrdfxml), RDF/XML parsing and serialization. - [SPARQL Query Results XML Format](http://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/).
* [`spargebra`](https://crates.io/crates/spargebra), a SPARQL parser.
* [`sparesults`](https://crates.io/crates/sparesults), parsers and serializers for SPARQL result formats.
* [`sparopt`](https://crates.io/crates/sparesults), a SPARQL optimizer.
* [`oxsdatatypes`](https://crates.io/crates/oxsdatatypes), an implementation of some XML Schema datatypes.
The library layers in Oxigraph. The elements above depend on the elements below:
![Oxigraph libraries architecture diagram](./docs/arch-diagram.svg)
A preliminary benchmark [is provided](bench/README.md). There is also [a document describing Oxigraph technical architecture](https://github.com/oxigraph/oxigraph/wiki/Architecture). A preliminary benchmark [is provided](bench/README.md). There is also [a document describing Oxigraph technical architecture](https://github.com/oxigraph/oxigraph/wiki/Architecture).
@ -65,7 +51,6 @@ Feel free to use [GitHub discussions](https://github.com/oxigraph/oxigraph/discu
If you need advanced support or are willing to pay to get some extra features, feel free to reach out to [Tpt](https://github.com/Tpt/). If you need advanced support or are willing to pay to get some extra features, feel free to reach out to [Tpt](https://github.com/Tpt/).
## License ## License
This project is licensed under either of This project is licensed under either of
@ -80,15 +65,3 @@ at your option.
### Contribution ### Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.
## Sponsors
* [RelationLabs](https://relationlabs.ai/) that is building [Relation-Graph](https://github.com/relationlabs/Relation-Graph), a SPARQL database module for the [Substrate blockchain platform](https://substrate.io/) based on Oxigraph.
* [Field 33](https://field33.com) that was building [an ontology management platform](https://plow.pm/).
* [Magnus Bakken](https://github.com/magbak) who is building [Data Treehouse](https://www.data-treehouse.com/), a time-series + RDF datalake platform, and [chrontext](https://github.com/magbak/chrontext), a SPARQL query endpoint on top of joint RDF and time series databases.
* [DeciSym.AI](https://www.decisym.ai/) a cyber security consulting company providing RDF-based software.
* [ACE IoT Solutions](https://aceiotsolutions.com/), a building IOT platform.
* [Albin Larsson](https://byabbe.se/) who is building [GovDirectory](https://www.govdirectory.org/), a directory of public agencies based on Wikidata.
And [others](https://github.com/sponsors/Tpt). Many thanks to them!

@ -2,19 +2,18 @@
DATASET_SIZE=100000 DATASET_SIZE=100000
PARALLELISM=16 PARALLELISM=16
cd bsbm-tools
set -eu
wget -nc https://github.com/blazegraph/database/releases/download/BLAZEGRAPH_RELEASE_2_1_5/blazegraph.jar
cd bsbm-tools || exit
./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}" ./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}"
/usr/lib/jvm/java-8-openjdk/bin/java -server -jar ../blazegraph.jar & wget https://github.com/blazegraph/database/releases/download/BLAZEGRAPH_RELEASE_2_1_5/blazegraph.jar
/usr/lib/jvm/java-8-openjdk/bin/java -server -jar blazegraph.jar &
sleep 10 sleep 10
curl -f -X POST -H 'Content-Type:text/turtle' -T "explore-${DATASET_SIZE}.nt" http://localhost:9999/blazegraph/sparql curl -f -X POST -H 'Content-Type:text/turtle' -T "explore-${DATASET_SIZE}.nt" http://localhost:9999/blazegraph/sparql
./testdriver -mt ${PARALLELISM} -ucf usecases/explore/sparql.txt -o "../bsbm.explore.blazegraph.2.1.5.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:9999/blazegraph/sparql ./testdriver -mt ${PARALLELISM} -ucf usecases/explore/sparql.txt -o "../bsbm.explore.blazegraph.2.1.5.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:9999/blazegraph/sparql
./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.blazegraph.2.1.5.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:9999/blazegraph/sparql -u http://localhost:9999/blazegraph/sparql -udataset "explore-update-${DATASET_SIZE}.nt" ./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.blazegraph.2.1.5.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:9999/blazegraph/sparql -u http://localhost:9999/blazegraph/sparql -udataset "explore-update-${DATASET_SIZE}.nt"
#./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.blazegraph.2.1.5.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:9999/blazegraph/sparql #./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.blazegraph.2.1.5.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:9999/blazegraph/sparql
kill $! kill $!
rm -f blazegraph.jnl rm blazegraph.jar
rm -f "explore-${DATASET_SIZE}.nt" rm blazegraph.jnl
rm -f "explore-update-${DATASET_SIZE}.nt" rm "explore-${DATASET_SIZE}.nt"
rm -rf td_data rm "explore-update-${DATASET_SIZE}.nt"
rm -r td_data

@ -3,22 +3,21 @@
DATASET_SIZE=100000 DATASET_SIZE=100000
PARALLELISM=16 PARALLELISM=16
VERSION="9.3.3" VERSION="9.3.3"
set -eu
cd bsbm-tools cd bsbm-tools
./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}" ./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}"
export JAVA_HOME=/usr/lib/jvm/java-11-openjdk
../graphdb-free-9.3.3/bin/graphdb -s -Dgraphdb.logger.root.level=WARN & ../graphdb-free-9.3.3/bin/graphdb -s -Dgraphdb.logger.root.level=WARN &
sleep 10 sleep 10
curl -f -X POST http://localhost:7200/rest/repositories -H 'Content-Type:application/json' -d ' curl -f -X POST http://localhost:7200/rest/repositories -H 'Content-Type:application/json' -d '
{"id":"bsbm","params":{"ruleset":{"label":"Ruleset","name":"ruleset","value":"empty"},"title":{"label":"Repository title","name":"title","value":"GraphDB Free repository"},"checkForInconsistencies":{"label":"Check for inconsistencies","name":"checkForInconsistencies","value":"false"},"disableSameAs":{"label":"Disable owl:sameAs","name":"disableSameAs","value":"true"},"baseURL":{"label":"Base URL","name":"baseURL","value":"http://example.org/owlim#"},"repositoryType":{"label":"Repository type","name":"repositoryType","value":"file-repository"},"id":{"label":"Repository ID","name":"id","value":"repo-bsbm"},"storageFolder":{"label":"Storage folder","name":"storageFolder","value":"storage"}},"title":"BSBM","type":"free"} {"id":"test","params":{"ruleset":{"label":"Ruleset","name":"ruleset","value":"empty"},"title":{"label":"Repository title","name":"title","value":"GraphDB Free repository"},"checkForInconsistencies":{"label":"Check for inconsistencies","name":"checkForInconsistencies","value":"false"},"disableSameAs":{"label":"Disable owl:sameAs","name":"disableSameAs","value":"true"},"baseURL":{"label":"Base URL","name":"baseURL","value":"http://example.org/owlim#"},"repositoryType":{"label":"Repository type","name":"repositoryType","value":"file-repository"},"id":{"label":"Repository ID","name":"id","value":"repo-test"},"storageFolder":{"label":"Storage folder","name":"storageFolder","value":"storage"}},"title":"Test","type":"free"}
' '
curl -f -X PUT -H 'Content-Type:application/n-triples' -T "explore-${DATASET_SIZE}.nt" http://localhost:7200/repositories/bsbm/statements curl -f -X PUT -H 'Content-Type:application/n-triples' -T "explore-${DATASET_SIZE}.nt" http://localhost:7200/repositories/test/statements
./testdriver -mt ${PARALLELISM} -ucf usecases/explore/sparql.txt -o "../bsbm.explore.graphdb.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:7200/repositories/bsbm ./testdriver -mt ${PARALLELISM} -ucf usecases/explore/sparql.txt -o "../bsbm.explore.graphdb.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:7200/repositories/test
./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.graphdb.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:7200/repositories/bsbm -u http://localhost:7200/repositories/bsbm/statements -udataset "explore-update-${DATASET_SIZE}.nt" ./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.graphdb.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:7200/repositories/test -u http://localhost:7200/repositories/test/statements -udataset "explore-update-${DATASET_SIZE}.nt"
#./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.graphdb.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:7200/repositories/bsbm #./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.graphdb.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:7200/repositories/test
kill $! kill $!
sleep 5 sleep 5
rm -rf ../graphdb-free-9.3.3/data rm -r ../graphdb-free-9.3.3/data
rm -f "explore-${DATASET_SIZE}.nt" rm "explore-${DATASET_SIZE}.nt"
rm -f "explore-update-${DATASET_SIZE}.nt" rm "explore-update-${DATASET_SIZE}.nt"
rm -rf td_data rm -r td_data

@ -3,12 +3,11 @@
DATASET_SIZE=100000 # number of products in the dataset. There is around 350 triples generated by product. DATASET_SIZE=100000 # number of products in the dataset. There is around 350 triples generated by product.
PARALLELISM=16 PARALLELISM=16
VERSION="4.3.2" VERSION="4.3.2"
cd bsbm-tools
set -eu
wget -nc https://downloads.apache.org/jena/binaries/apache-jena-${VERSION}.zip
cd bsbm-tools || exit
./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}" ./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}"
unzip ../"apache-jena-${VERSION}.zip" wget https://downloads.apache.org/jena/binaries/apache-jena-${VERSION}.zip
unzip apache-jena-${VERSION}.zip
rm apache-jena-${VERSION}.zip
./apache-jena-${VERSION}/bin/tdb2.tdbloader --loader=parallel --loc=td_data "explore-${DATASET_SIZE}.nt" ./apache-jena-${VERSION}/bin/tdb2.tdbloader --loader=parallel --loc=td_data "explore-${DATASET_SIZE}.nt"
wget https://downloads.apache.org/jena/binaries/apache-jena-fuseki-${VERSION}.zip wget https://downloads.apache.org/jena/binaries/apache-jena-fuseki-${VERSION}.zip
unzip apache-jena-fuseki-${VERSION}.zip unzip apache-jena-fuseki-${VERSION}.zip
@ -20,9 +19,9 @@ sleep 10
./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.jena.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:3030/bsbm/query -u http://localhost:3030/bsbm/update -udataset "explore-update-${DATASET_SIZE}.nt" ./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.jena.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:3030/bsbm/query -u http://localhost:3030/bsbm/update -udataset "explore-update-${DATASET_SIZE}.nt"
#./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.jena.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:3030/bsbm/query #./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.jena.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:3030/bsbm/query
kill $! kill $!
rm -f "explore-${DATASET_SIZE}.nt" rm "explore-${DATASET_SIZE}.nt"
rm -f "explore-update-${DATASET_SIZE}.nt" rm "explore-update-${DATASET_SIZE}.nt"
rm -rf td_data rm -r td_data
rm -rf run rm -r run
rm -rf apache-jena-${VERSION} rm -r apache-jena-${VERSION}
rm -rf apache-jena-fuseki-${VERSION} rm -r apache-jena-fuseki-${VERSION}

@ -2,20 +2,18 @@
DATASET_SIZE=100000 # number of products in the dataset. There is around 350 triples generated by product. DATASET_SIZE=100000 # number of products in the dataset. There is around 350 triples generated by product.
PARALLELISM=16 PARALLELISM=16
set -eu
cd bsbm-tools cd bsbm-tools
./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}" ./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}"
cargo build --release --manifest-path="../../cli/Cargo.toml" cargo build --release --manifest-path="../../server/Cargo.toml"
VERSION=$(./../../target/release/oxigraph --version | sed 's/oxigraph //g') VERSION=$(./../../target/release/oxigraph_server --version | sed 's/oxigraph_server //g')
./../../target/release/oxigraph --location oxigraph_data load --file "explore-${DATASET_SIZE}.nt" ./../../target/release/oxigraph_server --location oxigraph_data load --file "explore-${DATASET_SIZE}.nt"
./../../target/release/oxigraph --location oxigraph_data serve --bind 127.0.0.1:7878 & ./../../target/release/oxigraph_server --location oxigraph_data serve --bind 127.0.0.1:7878 &
sleep 1 sleep 1
./testdriver -mt ${PARALLELISM} -ucf usecases/explore/sparql.txt -o "../bsbm.explore.oxigraph.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://127.0.0.1:7878/query ./testdriver -mt ${PARALLELISM} -ucf usecases/explore/sparql.txt -o "../bsbm.explore.oxigraph.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://127.0.0.1:7878/query
./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.oxigraph.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://127.0.0.1:7878/query -u http://127.0.0.1:7878/update -udataset "explore-update-${DATASET_SIZE}.nt" ./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.oxigraph.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://127.0.0.1:7878/query -u http://127.0.0.1:7878/update -udataset "explore-update-${DATASET_SIZE}.nt"
#./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" "http://127.0.0.1:7878/query" #./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" "http://127.0.0.1:7878/query"
kill $! kill $!
rm -rf oxigraph_data rm -r oxigraph_data
rm -f "explore-${DATASET_SIZE}.nt" rm "explore-${DATASET_SIZE}.nt"
rm -f "explore-update-${DATASET_SIZE}.nt" rm "explore-update-${DATASET_SIZE}.nt"
rm -rf td_data rm -r td_data

@ -1,49 +0,0 @@
#!/usr/bin/env bash
DATASET_SIZE=100000
PARALLELISM=16
VERSION="4.2.2"
TOMCAT_VERSION="9.0.71"
set -eu
wget -nc -O "rdf4j-${VERSION}.zip" "https://www.eclipse.org/downloads/download.php?file=/rdf4j/eclipse-rdf4j-${VERSION}-sdk.zip&mirror_id=1"
wget -nc -O "tomcat-${TOMCAT_VERSION}.zip" "https://dlcdn.apache.org/tomcat/tomcat-9/v${TOMCAT_VERSION}/bin/apache-tomcat-${TOMCAT_VERSION}.zip"
cd bsbm-tools || exit
./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}"
wget -nc -O "rdf4j-${VERSION}.zip" "https://www.eclipse.org/downloads/download.php?file=/rdf4j/eclipse-rdf4j-${VERSION}-sdk.zip&mirror_id=1"
unzip ../"rdf4j-${VERSION}.zip"
unzip ../"tomcat-${TOMCAT_VERSION}.zip"
CATALINA_HOME="$(pwd)/apache-tomcat-${TOMCAT_VERSION}"
export CATALINA_HOME
export JAVA_OPTS="-Dorg.eclipse.rdf4j.appdata.basedir=${CATALINA_HOME}/rdf4j"
cp "eclipse-rdf4j-${VERSION}"/war/rdf4j-server.war "${CATALINA_HOME}"/webapps/
chmod +x "${CATALINA_HOME}"/bin/*.sh
"${CATALINA_HOME}"/bin/startup.sh
sleep 30
curl -f -X PUT http://localhost:8080/rdf4j-server/repositories/bsbm -H 'Content-Type:text/turtle' -d '
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.
@prefix rep: <http://www.openrdf.org/config/repository#>.
@prefix sr: <http://www.openrdf.org/config/repository/sail#>.
@prefix sail: <http://www.openrdf.org/config/sail#>.
[] a rep:Repository ;
rep:repositoryID "bsbm" ;
rdfs:label "BSBM" ;
rep:repositoryImpl [
rep:repositoryType "openrdf:SailRepository" ;
sr:sailImpl [
sail:sailType "rdf4j:LmdbStore"
]
] .
'
sleep 10
curl -f -X PUT -H 'Content-Type:application/n-triples' -T "explore-${DATASET_SIZE}.nt" http://localhost:8080/rdf4j-server/repositories/bsbm/statements
./testdriver -mt ${PARALLELISM} -ucf usecases/explore/sparql.txt -o "../bsbm.explore.rdf4j-lmdb.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:8080/rdf4j-server/repositories/bsbm
./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.rdf4j-lmdb.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:8080/rdf4j-server/repositories/bsbm -u http://localhost:8080/rdf4j-server/repositories/bsbm/statements -udataset "explore-update-${DATASET_SIZE}.nt"
#./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.rdf4j-lmdb.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:8080/rdf4j-server/repositories/bsbm
"${CATALINA_HOME}"/bin/shutdown.sh
rm -f "explore-${DATASET_SIZE}.nt"
rm -f "explore-update-${DATASET_SIZE}.nt"
rm -rf td_data
rm -rf "eclipse-rdf4j-${VERSION}"
rm -rf "apache-tomcat-${TOMCAT_VERSION}"

@ -3,8 +3,6 @@
DATASET_SIZE=100000 # number of products in the dataset. There is around 350 triples generated by product. DATASET_SIZE=100000 # number of products in the dataset. There is around 350 triples generated by product.
PARALLELISM=16 PARALLELISM=16
VERSION="7.2.5" VERSION="7.2.5"
set -eu
cd bsbm-tools cd bsbm-tools
./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}" ./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}"
cp ../virtuoso-opensource/database/virtuoso.ini.sample virtuoso.ini cp ../virtuoso-opensource/database/virtuoso.ini.sample virtuoso.ini
@ -20,7 +18,7 @@ EOF
# ./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.virtuoso.${DATASET_SIZE}.${PARALLELISM}.${PARALLELISM}.${VERSION}.xml" 'http://localhost:8890/sparql?graph-uri=urn:graph:test' -u 'http://dba:dba@localhost:8890/sparql-auth?graph-uri=urn:graph:test' -udataset "explore-update-${DATASET_SIZE}.nt" # ./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.virtuoso.${DATASET_SIZE}.${PARALLELISM}.${PARALLELISM}.${VERSION}.xml" 'http://localhost:8890/sparql?graph-uri=urn:graph:test' -u 'http://dba:dba@localhost:8890/sparql-auth?graph-uri=urn:graph:test' -udataset "explore-update-${DATASET_SIZE}.nt"
# ./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.virtuoso.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" 'http://localhost:8890/sparql?graph-uri=urn:graph:test' # ./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.virtuoso.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" 'http://localhost:8890/sparql?graph-uri=urn:graph:test'
kill $! kill $!
rm -rf ../database rm -r ../database
rm -f "explore-${DATASET_SIZE}.nt" rm "explore-${DATASET_SIZE}.nt"
rm -f "explore-update-${DATASET_SIZE}.nt" rm "explore-update-${DATASET_SIZE}.nt"
rm -rf td_data rm -r td_data

@ -1,63 +0,0 @@
"""
Converts a SPARQL query JSON explanation file to a flamegraph.
Usage: python explanation_to_flamegraph.py explanation.json flamegraph.svg
"""
import json
import subprocess
from argparse import ArgumentParser
from pathlib import Path
from shutil import which
from tempfile import NamedTemporaryFile
parser = ArgumentParser(
prog='OxigraphFlamegraph',
description='Builds a flamegraph from the Oxigraph query explanation JSON format',
epilog='Text at the bottom of help')
parser.add_argument('json_explanation', type=Path)
parser.add_argument('flamegraph_svg', type=Path)
args = parser.parse_args()
def trace_line(label: str, value: float):
return f"{label} {int(value * 1_000_000)}"
with args.json_explanation.open('rt') as fp:
explanation = json.load(fp)
trace = []
if "parsing duration in seconds" in explanation:
trace.append(trace_line("parsing", explanation['parsing duration in seconds']))
if "planning duration in seconds" in explanation:
trace.append(trace_line("planning", explanation['planning duration in seconds']))
already_used_names = {}
def add_to_trace(node, path):
path = f"{path};{node['name'].replace(' ', '`')}"
if path in already_used_names:
already_used_names[path] += 1
path = f"{path}`{already_used_names[path]}"
else:
already_used_names[path] = 0
samples = node['duration in seconds'] - sum(child['duration in seconds'] for child in node.get("children", ()))
if int(samples * 1_000_000) > 0:
trace.append(trace_line(path, samples))
for i, child in enumerate(node.get("children", ())):
add_to_trace(child, path)
add_to_trace(explanation["plan"], 'eval')
inferno = which('inferno-flamegraph')
flamegraph_pl = which('flamegraph.pl')
if inferno:
args.flamegraph_svg.write_text(
subprocess.run([inferno], input='\n'.join(trace), stdout=subprocess.PIPE, text=True).stdout)
elif flamegraph_pl:
with NamedTemporaryFile('w+t') as fp:
fp.write('\n'.join(trace))
fp.flush()
args.flamegraph_svg.write_text(
subprocess.run([flamegraph_pl, fp.name], stdout=subprocess.PIPE, text=True).stdout)
else:
raise Exception(
'This script requires either the inferno-flamegraph from https://github.com/jonhoo/inferno either the flamegraph.pl script from https://github.com/brendangregg/FlameGraph to be installed and be in $PATH.')

@ -1,52 +0,0 @@
"""
Converts a SPARQL query JSON explanation file to a tracing event file compatible with Chrome.
Usage: python explanation_to_trace.py explanation.json trace.json
"""
import json
from argparse import ArgumentParser
from pathlib import Path
parser = ArgumentParser(
prog='OxigraphTracing',
description='Builds a Trace Event Format file from the Oxigraph query explanation JSON format')
parser.add_argument('json_explanation', type=Path)
parser.add_argument('json_trace_event', type=Path)
args = parser.parse_args()
with args.json_explanation.open('rt') as fp:
explanation = json.load(fp)
trace = []
def trace_element(name: str, cat: str, start_s: float, duration_s: float):
return {
"name": name,
"cat": cat,
"ph": "X",
"ts": int(start_s * 1_000_000),
"dur": int(duration_s * 1_000_000),
"pid": 1
}
def add_to_trace(node, path, start_time: float):
path = f"{path};{node['name'].replace(' ', '`')}"
trace.append(trace_element(node["name"], node["name"].split("(")[0], start_time, node["duration in seconds"]))
for child in node.get("children", ()):
add_to_trace(child, path, start_time)
start_time += child["duration in seconds"]
current_time = 0
if "parsing duration in seconds" in explanation:
d = explanation["parsing duration in seconds"]
trace.append(trace_element(f"parsing", "parsing", current_time, d))
current_time += d
if "planning duration in seconds" in explanation:
d = explanation["planning duration in seconds"]
trace.append(trace_element(f"planning", "planning", current_time, d))
current_time += d
add_to_trace(explanation["plan"], 'eval', current_time)
with args.json_trace_event.open("wt") as fp:
json.dump(trace, fp)

@ -1,4 +1,4 @@
avoid-breaking-exported-api = false avoid-breaking-exported-api = true
cognitive-complexity-threshold = 50 cognitive-complexity-threshold = 50
too-many-arguments-threshold = 10 too-many-arguments-threshold = 10
type-complexity-threshold = 500 type-complexity-threshold = 500

@ -1,18 +1,12 @@
[advisories]
version = 2
ignore = ["RUSTSEC-2018-0015"]
[licenses] [licenses]
version = 2 unlicensed = "deny"
allow = [ allow = [
"Apache-2.0",
"Apache-2.0 WITH LLVM-exception",
"BSD-3-Clause",
"ISC",
"MIT", "MIT",
"OpenSSL", "Apache-2.0",
"Unicode-DFS-2016" "Apache-2.0 WITH LLVM-exception"
] ]
allow-osi-fsf-free = "either"
default = "deny"
[[licenses.clarify]] [[licenses.clarify]]
name = "ring" name = "ring"

@ -1,120 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="624" height="384" class="svgbob">
<style>.svgbob line, .svgbob path, .svgbob circle, .svgbob rect, .svgbob polygon {
stroke: black;
stroke-width: 2;
stroke-opacity: 1;
fill-opacity: 1;
stroke-linecap: round;
stroke-linejoin: miter;
}
.svgbob text {
white-space: pre;
fill: black;
font-family: Iosevka Fixed, monospace;
font-size: 14px;
}
.svgbob rect.backdrop {
stroke: none;
fill: white;
}
.svgbob .broken {
stroke-dasharray: 8;
}
.svgbob .filled {
fill: black;
}
.svgbob .bg_filled {
fill: white;
stroke-width: 1;
}
.svgbob .nofill {
fill: white;
}
.svgbob .end_marked_arrow {
marker-end: url(#arrow);
}
.svgbob .start_marked_arrow {
marker-start: url(#arrow);
}
.svgbob .end_marked_diamond {
marker-end: url(#diamond);
}
.svgbob .start_marked_diamond {
marker-start: url(#diamond);
}
.svgbob .end_marked_circle {
marker-end: url(#circle);
}
.svgbob .start_marked_circle {
marker-start: url(#circle);
}
.svgbob .end_marked_open_circle {
marker-end: url(#open_circle);
}
.svgbob .start_marked_open_circle {
marker-start: url(#open_circle);
}
.svgbob .end_marked_big_open_circle {
marker-end: url(#big_open_circle);
}
.svgbob .start_marked_big_open_circle {
marker-start: url(#big_open_circle);
}<!--separator-->.svgbob .r{
fill: papayawhip;
}
.svgbob .p{
fill: lightyellow;
}
.svgbob .j{
fill: lightgreen;
}
</style>
<defs>
<marker id="arrow" viewBox="-2 -2 8 8" refX="4" refY="2" markerWidth="7" markerHeight="7"
orient="auto-start-reverse">
<polygon points="0,0 0,4 4,2 0,0"></polygon>
</marker>
<marker id="diamond" viewBox="-2 -2 8 8" refX="4" refY="2" markerWidth="7" markerHeight="7"
orient="auto-start-reverse">
<polygon points="0,2 2,0 4,2 2,4 0,2"></polygon>
</marker>
<marker id="circle" viewBox="0 0 8 8" refX="4" refY="4" markerWidth="7" markerHeight="7"
orient="auto-start-reverse">
<circle cx="4" cy="4" r="2" class="filled"></circle>
</marker>
<marker id="open_circle" viewBox="0 0 8 8" refX="4" refY="4" markerWidth="7" markerHeight="7"
orient="auto-start-reverse">
<circle cx="4" cy="4" r="2" class="bg_filled"></circle>
</marker>
<marker id="big_open_circle" viewBox="0 0 8 8" refX="4" refY="4" markerWidth="7" markerHeight="7"
orient="auto-start-reverse">
<circle cx="4" cy="4" r="3" class="bg_filled"></circle>
</marker>
</defs>
<rect class="backdrop" x="0" y="0" width="624" height="384"></rect>
<rect x="4" y="8" width="152" height="32" class="solid nofill r" rx="0"></rect>
<text x="18" y="28">oxigraph CLI</text>
<rect x="244" y="8" width="136" height="32" class="solid nofill p" rx="0"></rect>
<text x="258" y="28">pyoxigraph</text>
<rect x="468" y="8" width="144" height="32" class="solid nofill j" rx="0"></rect>
<text x="482" y="28">oxigraph</text>
<text x="554" y="28">JS</text>
<rect x="4" y="72" width="608" height="32" class="solid nofill r" rx="0"></rect>
<text x="18" y="92">oxigraph</text>
<rect x="68" y="136" width="232" height="32" class="solid nofill r" rx="0"></rect>
<text x="82" y="156">oxrdfio</text>
<rect x="348" y="136" width="112" height="32" class="solid nofill r" rx="0"></rect>
<text x="362" y="156">sparopt</text>
<rect x="68" y="200" width="96" height="32" class="solid nofill r" rx="0"></rect>
<text x="82" y="220">oxttl</text>
<rect x="180" y="200" width="120" height="32" class="solid nofill r" rx="0"></rect>
<text x="194" y="220">oxrdfxml</text>
<rect x="316" y="200" width="144" height="32" class="solid nofill r" rx="0"></rect>
<text x="330" y="220">spargebra</text>
<rect x="476" y="200" width="136" height="32" class="solid nofill r" rx="0"></rect>
<text x="490" y="220">sparesults</text>
<rect x="36" y="264" width="576" height="32" class="solid nofill r" rx="0"></rect>
<text x="50" y="284">oxrdf</text>
<rect x="4" y="328" width="152" height="32" class="solid nofill r" rx="0"></rect>
<text x="18" y="348">oxsdatatypes</text>
</svg>

Before

Width:  |  Height:  |  Size: 4.6 KiB

@ -1,35 +0,0 @@
+------------------+ +----------------+ +-----------------+
+ oxigraph CLI {r} + + pyoxigraph {p} + + oxigraph JS {j} +
+------------------+ +----------------+ +-----------------+
+---------------------------------------------------------------------------+
+ oxigraph (Rust) {r} +
+---------------------------------------------------------------------------+
+----------------------------+ +-------------+
+ oxrdfio {r} + + sparopt {r} +
+----------------------------+ +-------------+
+-----------+ +--------------+ +-----------------+ +----------------+
+ oxttl {r} + + oxrdfxml {r} + + spargebra {r} + + sparesults {r} +
+-----------+ +--------------+ +-----------------+ +----------------+
+-----------------------------------------------------------------------+
+ oxrdf {r} +
+-----------------------------------------------------------------------+
+------------------+
+ oxsdatatypes {r} +
+------------------+
# Legend:
r = {
fill: papayawhip;
}
p = {
fill: lightyellow;
}
j = {
fill: lightgreen;
}

@ -1,6 +1,7 @@
[package] [package]
name = "oxigraph-fuzz" name = "oxigraph-fuzz"
version = "0.0.0" version = "0.0.0"
authors = ["Automatically generated"]
publish = false publish = false
edition = "2021" edition = "2021"
@ -8,38 +9,12 @@ edition = "2021"
cargo-fuzz = true cargo-fuzz = true
[dependencies] [dependencies]
anyhow = "1.0.72"
libfuzzer-sys = "0.4" libfuzzer-sys = "0.4"
oxigraph = { path = "../lib/oxigraph", default-features = false } spargebra = { path = "../lib/spargebra", features = ["rdf-star"] }
oxrdf = { path = "../lib/oxrdf", features = ["rdf-star"] }
oxrdfxml = { path = "../lib/oxrdfxml" }
oxttl = { path = "../lib/oxttl", features = ["rdf-star"] }
sparesults = { path = "../lib/sparesults", features = ["rdf-star"] } sparesults = { path = "../lib/sparesults", features = ["rdf-star"] }
spargebra = { path = "../lib/spargebra", features = ["rdf-star", "sep-0006"] }
sparql-smith = { path = "../lib/sparql-smith", features = ["sep-0006"] }
[profile.release]
codegen-units = 1
debug = true
[workspace] [workspace]
[[bin]]
name = "nquads"
path = "fuzz_targets/nquads.rs"
[[bin]]
name = "n3"
path = "fuzz_targets/n3.rs"
[[bin]]
name = "rdf_xml"
path = "fuzz_targets/rdf_xml.rs"
[[bin]]
name = "sparql_eval"
path = "fuzz_targets/sparql_eval.rs"
[[bin]] [[bin]]
name = "sparql_query" name = "sparql_query"
path = "fuzz_targets/sparql_query.rs" path = "fuzz_targets/sparql_query.rs"
@ -59,7 +34,3 @@ path = "fuzz_targets/sparql_results_xml.rs"
[[bin]] [[bin]]
name = "sparql_results_tsv" name = "sparql_results_tsv"
path = "fuzz_targets/sparql_results_tsv.rs" path = "fuzz_targets/sparql_results_tsv.rs"
[[bin]]
name = "trig"
path = "fuzz_targets/trig.rs"

@ -1,28 +0,0 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use oxttl::N3Parser;
fuzz_target!(|data: &[u8]| {
let mut quads = Vec::new();
let mut parser = N3Parser::new()
.with_base_iri("http://example.com/")
.unwrap()
.parse();
for chunk in data.split(|c| *c == 0xFF) {
parser.extend_from_slice(chunk);
while let Some(result) = parser.read_next() {
if let Ok(quad) = result {
quads.push(quad);
}
}
}
parser.end();
while let Some(result) = parser.read_next() {
if let Ok(quad) = result {
quads.push(quad);
}
}
assert!(parser.is_end());
//TODO: serialize
});

@ -1,84 +0,0 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use oxrdf::Quad;
use oxttl::{NQuadsParser, NQuadsSerializer};
fn parse<'a>(
chunks: impl IntoIterator<Item = &'a [u8]>,
unchecked: bool,
) -> (Vec<Quad>, Vec<String>) {
let mut quads = Vec::new();
let mut errors = Vec::new();
let mut parser = NQuadsParser::new().with_quoted_triples();
if unchecked {
parser = parser.unchecked();
}
let mut reader = parser.parse();
for chunk in chunks {
reader.extend_from_slice(chunk);
while let Some(result) = reader.read_next() {
match result {
Ok(quad) => quads.push(quad),
Err(error) => errors.push(error.to_string()),
}
}
}
reader.end();
while let Some(result) = reader.read_next() {
match result {
Ok(quad) => quads.push(quad),
Err(error) => errors.push(error.to_string()),
}
}
assert!(reader.is_end());
(quads, errors)
}
fuzz_target!(|data: &[u8]| {
// We parse with splitting
let (quads, errors) = parse(data.split(|c| *c == 0xFF), false);
// We parse without splitting
let (quads_without_split, errors_without_split) = parse(
[data
.iter()
.copied()
.filter(|c| *c != 0xFF)
.collect::<Vec<_>>()
.as_slice()],
false,
);
assert_eq!(quads, quads_without_split);
assert_eq!(errors, errors_without_split);
// We test also unchecked if valid
if errors.is_empty() {
let (quads_unchecked, errors_unchecked) = parse(data.split(|c| *c == 0xFF), true);
assert!(errors_unchecked.is_empty());
assert_eq!(quads, quads_unchecked);
}
// We serialize
let mut writer = NQuadsSerializer::new().serialize_to_write(Vec::new());
for quad in &quads {
writer.write_quad(quad).unwrap();
}
let new_serialization = writer.finish();
// We parse the serialization
let new_quads = NQuadsParser::new()
.with_quoted_triples()
.parse_read(new_serialization.as_slice())
.collect::<Result<Vec<_>, _>>()
.map_err(|e| {
format!(
"Error on {:?} from {quads:?} based on {:?}: {e}",
String::from_utf8_lossy(&new_serialization),
String::from_utf8_lossy(data)
)
})
.unwrap();
// We check the roundtrip has not changed anything
assert_eq!(new_quads, quads);
});

@ -1,35 +0,0 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use oxrdfxml::{RdfXmlParser, RdfXmlSerializer};
fuzz_target!(|data: &[u8]| {
// We parse
let triples = RdfXmlParser::new()
.parse_read(data)
.flatten()
.collect::<Vec<_>>();
// We serialize
let mut writer = RdfXmlSerializer::new().serialize_to_write(Vec::new());
for triple in &triples {
writer.write_triple(triple).unwrap();
}
let new_serialization = writer.finish().unwrap();
// We parse the serialization
let new_triples = RdfXmlParser::new()
.parse_read(new_serialization.as_slice())
.collect::<Result<Vec<_>, _>>()
.map_err(|e| {
format!(
"Error on {:?} from {triples:?} based on {:?}: {e}",
String::from_utf8_lossy(&new_serialization),
String::from_utf8_lossy(data)
)
})
.unwrap();
// We check the roundtrip has not changed anything
assert_eq!(new_triples, triples);
});

@ -1,61 +0,0 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use oxigraph::io::RdfFormat;
use oxigraph::sparql::{Query, QueryOptions, QueryResults, QuerySolutionIter};
use oxigraph::store::Store;
use std::sync::OnceLock;
fuzz_target!(|data: sparql_smith::Query| {
static STORE: OnceLock<Store> = OnceLock::new();
let store = STORE.get_or_init(|| {
let store = Store::new().unwrap();
store
.load_from_read(RdfFormat::TriG, sparql_smith::DATA_TRIG.as_bytes())
.unwrap();
store
});
let query_str = data.to_string();
if let Ok(query) = Query::parse(&query_str, None) {
let options = QueryOptions::default();
let with_opt = store.query_opt(query.clone(), options.clone()).unwrap();
let without_opt = store
.query_opt(query, options.without_optimizations())
.unwrap();
match (with_opt, without_opt) {
(QueryResults::Solutions(with_opt), QueryResults::Solutions(without_opt)) => {
assert_eq!(
query_solutions_key(with_opt, query_str.contains(" REDUCED ")),
query_solutions_key(without_opt, query_str.contains(" REDUCED "))
)
}
(QueryResults::Graph(_), QueryResults::Graph(_)) => unimplemented!(),
(QueryResults::Boolean(with_opt), QueryResults::Boolean(without_opt)) => {
assert_eq!(with_opt, without_opt)
}
_ => panic!("Different query result types"),
}
}
});
fn query_solutions_key(iter: QuerySolutionIter, is_reduced: bool) -> String {
// TODO: ordering
let mut b = iter
.into_iter()
.map(|t| {
let mut b = t
.unwrap()
.iter()
.map(|(var, val)| format!("{var}: {val}"))
.collect::<Vec<_>>();
b.sort_unstable();
b.join(" ")
})
.collect::<Vec<_>>();
b.sort_unstable();
if is_reduced {
b.dedup();
}
b.join("\n")
}

@ -1,7 +1,10 @@
#![no_main] #![no_main]
use libfuzzer_sys::fuzz_target; use libfuzzer_sys::fuzz_target;
use spargebra::Query; use spargebra::Query;
use std::str;
fuzz_target!(|data: &str| { fuzz_target!(|data: &[u8]| {
let _ = Query::parse(data, None); if let Ok(data) = str::from_utf8(data) {
Query::parse(data, None);
}
}); });

@ -1,6 +1,15 @@
#![no_main] #![no_main]
use libfuzzer_sys::fuzz_target; use libfuzzer_sys::fuzz_target;
use oxigraph_fuzz::result_format::fuzz_result_format; use sparesults::{QueryResultsFormat, QueryResultsParser, QueryResultsReader};
use sparesults::QueryResultsFormat;
fuzz_target!(|data: &[u8]| fuzz_result_format(QueryResultsFormat::Json, data)); fuzz_target!(|data: &[u8]| {
let parser = QueryResultsParser::from_format(QueryResultsFormat::Json);
if let Ok(QueryResultsReader::Solutions(solutions)) = parser.read_results(data) {
for s in solutions {
if s.is_err() {
// TODO: avoid infinite loop of errors
break;
}
}
}
});

@ -1,6 +1,10 @@
#![no_main] #![no_main]
use libfuzzer_sys::fuzz_target; use libfuzzer_sys::fuzz_target;
use oxigraph_fuzz::result_format::fuzz_result_format; use sparesults::{QueryResultsFormat, QueryResultsParser, QueryResultsReader};
use sparesults::QueryResultsFormat;
fuzz_target!(|data: &[u8]| fuzz_result_format(QueryResultsFormat::Tsv, data)); fuzz_target!(|data: &[u8]| {
let parser = QueryResultsParser::from_format(QueryResultsFormat::Tsv);
if let Ok(QueryResultsReader::Solutions(solutions)) = parser.read_results(data) {
for _ in solutions {}
}
});

@ -1,6 +1,10 @@
#![no_main] #![no_main]
use libfuzzer_sys::fuzz_target; use libfuzzer_sys::fuzz_target;
use oxigraph_fuzz::result_format::fuzz_result_format; use sparesults::{QueryResultsFormat, QueryResultsParser, QueryResultsReader};
use sparesults::QueryResultsFormat;
fuzz_target!(|data: &[u8]| fuzz_result_format(QueryResultsFormat::Xml, data)); fuzz_target!(|data: &[u8]| {
let parser = QueryResultsParser::from_format(QueryResultsFormat::Xml);
if let Ok(QueryResultsReader::Solutions(solutions)) = parser.read_results(data) {
for _ in solutions {}
}
});

@ -3,6 +3,8 @@ use libfuzzer_sys::fuzz_target;
use spargebra::Update; use spargebra::Update;
use std::str; use std::str;
fuzz_target!(|data: &str| { fuzz_target!(|data: &[u8]| {
let _ = Update::parse(data, None); if let Ok(data) = str::from_utf8(data) {
Update::parse(data, None);
}
}); });

@ -1,166 +0,0 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use oxrdf::graph::CanonicalizationAlgorithm;
use oxrdf::{Dataset, GraphName, Quad, Subject, Term, Triple};
use oxttl::{TriGParser, TriGSerializer};
fn parse<'a>(
chunks: impl IntoIterator<Item = &'a [u8]>,
unchecked: bool,
) -> (Vec<Quad>, Vec<String>, Vec<(String, String)>) {
let mut quads = Vec::new();
let mut errors = Vec::new();
let mut parser = TriGParser::new()
.with_quoted_triples()
.with_base_iri("http://example.com/")
.unwrap();
if unchecked {
parser = parser.unchecked();
}
let mut reader = parser.parse();
for chunk in chunks {
reader.extend_from_slice(chunk);
while let Some(result) = reader.read_next() {
match result {
Ok(quad) => quads.push(quad),
Err(error) => errors.push(error.to_string()),
}
}
}
reader.end();
while let Some(result) = reader.read_next() {
match result {
Ok(quad) => quads.push(quad),
Err(error) => errors.push(error.to_string()),
}
}
assert!(reader.is_end());
(
quads,
errors,
reader
.prefixes()
.map(|(k, v)| (k.to_owned(), v.to_owned()))
.collect(),
)
}
fn count_triple_blank_nodes(triple: &Triple) -> usize {
(match &triple.subject {
Subject::BlankNode(_) => 1,
Subject::Triple(t) => count_triple_blank_nodes(t),
_ => 0,
}) + (match &triple.object {
Term::BlankNode(_) => 1,
Term::Triple(t) => count_triple_blank_nodes(t),
_ => 0,
})
}
fn count_quad_blank_nodes(quad: &Quad) -> usize {
(match &quad.subject {
Subject::BlankNode(_) => 1,
Subject::Triple(t) => count_triple_blank_nodes(t),
_ => 0,
}) + (match &quad.object {
Term::BlankNode(_) => 1,
Term::Triple(t) => count_triple_blank_nodes(t),
_ => 0,
}) + usize::from(matches!(quad.graph_name, GraphName::BlankNode(_)))
}
fn serialize_quads(quads: &[Quad], prefixes: Vec<(String, String)>) -> Vec<u8> {
let mut serializer = TriGSerializer::new();
for (prefix_name, prefix_iri) in prefixes {
serializer = serializer.with_prefix(prefix_name, prefix_iri).unwrap();
}
let mut writer = serializer.serialize_to_write(Vec::new());
for quad in quads {
writer.write_quad(quad).unwrap();
}
writer.finish().unwrap()
}
fuzz_target!(|data: &[u8]| {
// We parse with splitting
let (quads, errors, prefixes) = parse(data.split(|c| *c == 0xFF), false);
// We parse without splitting
let (quads_without_split, errors_without_split, _) = parse(
[data
.iter()
.copied()
.filter(|c| *c != 0xFF)
.collect::<Vec<_>>()
.as_slice()],
false,
);
let (quads_unchecked, errors_unchecked, _) = parse(data.split(|c| *c == 0xFF), true);
if errors.is_empty() {
assert!(errors_unchecked.is_empty());
}
let bnodes_count = quads.iter().map(count_quad_blank_nodes).sum::<usize>();
if bnodes_count == 0 {
assert_eq!(
quads,
quads_without_split,
"With split:\n{}\nWithout split:\n{}",
String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())),
String::from_utf8_lossy(&serialize_quads(&quads_without_split, Vec::new()))
);
if errors.is_empty() {
assert_eq!(
quads,
quads_unchecked,
"Validating:\n{}\nUnchecked:\n{}",
String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())),
String::from_utf8_lossy(&serialize_quads(&quads_unchecked, Vec::new()))
);
}
} else if bnodes_count <= 4 {
let mut dataset_with_split = quads.iter().collect::<Dataset>();
let mut dataset_without_split = quads_without_split.iter().collect::<Dataset>();
dataset_with_split.canonicalize(CanonicalizationAlgorithm::Unstable);
dataset_without_split.canonicalize(CanonicalizationAlgorithm::Unstable);
assert_eq!(
dataset_with_split,
dataset_without_split,
"With split:\n{}\nWithout split:\n{}",
String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())),
String::from_utf8_lossy(&serialize_quads(&quads_without_split, Vec::new()))
);
if errors.is_empty() {
let mut dataset_unchecked = quads_unchecked.iter().collect::<Dataset>();
dataset_unchecked.canonicalize(CanonicalizationAlgorithm::Unstable);
assert_eq!(
dataset_with_split,
dataset_unchecked,
"Validating:\n{}\nUnchecked:\n{}",
String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())),
String::from_utf8_lossy(&serialize_quads(&quads_unchecked, Vec::new()))
);
}
}
assert_eq!(errors, errors_without_split);
// We serialize
let new_serialization = serialize_quads(&quads, prefixes);
// We parse the serialization
let new_quads = TriGParser::new()
.with_quoted_triples()
.parse_read(new_serialization.as_slice())
.collect::<Result<Vec<_>, _>>()
.map_err(|e| {
format!(
"Error on {:?} from {quads:?} based on {:?}: {e}",
String::from_utf8_lossy(&new_serialization),
String::from_utf8_lossy(data)
)
})
.unwrap();
// We check the roundtrip has not changed anything
assert_eq!(new_quads, quads);
});

@ -1 +0,0 @@
pub mod result_format;

@ -1,63 +0,0 @@
use anyhow::Context;
use sparesults::{
FromReadQueryResultsReader, QueryResultsFormat, QueryResultsParser, QueryResultsSerializer,
};
pub fn fuzz_result_format(format: QueryResultsFormat, data: &[u8]) {
let parser = QueryResultsParser::from_format(format);
let serializer = QueryResultsSerializer::from_format(format);
let Ok(reader) = parser.parse_read(data) else {
return;
};
match reader {
FromReadQueryResultsReader::Solutions(solutions) => {
let Ok(solutions) = solutions.collect::<Result<Vec<_>, _>>() else {
return;
};
// We try to write again
let mut writer = serializer
.serialize_solutions_to_write(
Vec::new(),
solutions
.first()
.map_or_else(Vec::new, |s| s.variables().to_vec()),
)
.unwrap();
for solution in &solutions {
writer.write(solution).unwrap();
}
let serialized = String::from_utf8(writer.finish().unwrap()).unwrap();
// And to parse again
if let FromReadQueryResultsReader::Solutions(roundtrip_solutions) = parser
.parse_read(serialized.as_bytes())
.with_context(|| format!("Parsing {serialized:?}"))
.unwrap()
{
assert_eq!(
roundtrip_solutions
.collect::<Result<Vec<_>, _>>()
.with_context(|| format!("Parsing {serialized:?}"))
.unwrap(),
solutions
)
}
}
FromReadQueryResultsReader::Boolean(value) => {
// We try to write again
let mut serialized = Vec::new();
serializer
.serialize_boolean_to_write(&mut serialized, value)
.unwrap();
// And to parse again
if let FromReadQueryResultsReader::Boolean(roundtrip_value) =
parser.parse_read(serialized.as_slice()).unwrap()
{
assert_eq!(roundtrip_value, value)
}
}
}
}

@ -1,26 +1,23 @@
[package] [package]
name = "oxigraph-js" name = "oxigraph_js"
version.workspace = true version = "0.4.0-alpha"
authors.workspace = true authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license.workspace = true license = "MIT OR Apache-2.0"
readme = "README.md" readme = "README.md"
keywords = ["RDF", "N-Triples", "Turtle", "XML", "SPARQL"] keywords = ["RDF", "N-Triples", "Turtle", "RDF/XML", "SPARQL"]
repository = "https://github.com/oxigraph/oxigraph/tree/main/js" repository = "https://github.com/oxigraph/oxigraph/tree/main/js"
description = "JavaScript bindings of Oxigraph" description = "JavaScript bindings of Oxigraph"
edition.workspace = true edition = "2021"
rust-version.workspace = true
publish = false
[lib] [lib]
crate-type = ["cdylib"] crate-type = ["cdylib"]
name = "oxigraph" name = "oxigraph"
doc = false
[dependencies] [dependencies]
console_error_panic_hook.workspace = true oxigraph = { version = "0.4.0-alpha", path="../lib" }
js-sys.workspace = true wasm-bindgen = "0.2"
oxigraph = { workspace = true, features = ["js"] } js-sys = "0.3"
wasm-bindgen.workspace = true console_error_panic_hook = "0.1"
[lints] [dev-dependencies]
workspace = true wasm-bindgen-test = "0.3"

@ -3,7 +3,7 @@ Oxigraph for JavaScript
[![npm](https://img.shields.io/npm/v/oxigraph)](https://www.npmjs.com/package/oxigraph) [![npm](https://img.shields.io/npm/v/oxigraph)](https://www.npmjs.com/package/oxigraph)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) [![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community) [![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
This package provides a JavaScript API on top of [Oxigraph](https://crates.io/crates/oxigraph), compiled with WebAssembly. This package provides a JavaScript API on top of [Oxigraph](https://crates.io/crates/oxigraph), compiled with WebAssembly.
@ -13,7 +13,7 @@ Oxigraph for JavaScript is a work in progress and currently offers a simple in-m
The store is also able to load RDF serialized in [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/) and [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/). The store is also able to load RDF serialized in [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/) and [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/).
It is distributed using a [a NPM package](https://www.npmjs.com/package/oxigraph) that should work with Node.JS 18+ and [modern web browsers compatible with WebAssembly reference types and JavaScript `WeakRef`](https://caniuse.com/wasm-reference-types,mdn-javascript_builtins_weakref). It is distributed using a [a NPM package](https://www.npmjs.com/package/oxigraph) that should work with Node.JS 12+ and modern web browsers compatible with WebAssembly.
To install: To install:
```bash ```bash
@ -188,18 +188,6 @@ if (store.query("ASK { ?s ?s ?s }")) {
} }
``` ```
It is also possible to provide some options in an object given as second argument:
```js
if (store.query("ASK { <s> ?p ?o }", {
base_iri: "http://example.com/", // base IRI to resolve relative IRIs in the query
use_default_graph_as_union: true, // the default graph in the query is the union of all the dataset graphs
})) {
console.log("there is a triple with same subject, predicate and object");
}
```
#### `Store.prototype.update(String query)` #### `Store.prototype.update(String query)`
Executes a [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/). Executes a [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/).
The [`LOAD` operation](https://www.w3.org/TR/sparql11-update/#load) is not supported yet. The [`LOAD` operation](https://www.w3.org/TR/sparql11-update/#load) is not supported yet.
@ -209,50 +197,40 @@ Example of update:
store.update("DELETE WHERE { <http://example.com/s> ?p ?o }") store.update("DELETE WHERE { <http://example.com/s> ?p ?o }")
``` ```
It is also possible to provide some options in an object given as second argument: #### `Store.prototype.load(String data, String mimeType, NamedNode|String? baseIRI, NamedNode|BlankNode|DefaultGraph? toNamedGraph)`
```js
store.update("DELETE WHERE { <s> ?p ?o }", {
base_iri: "http://example.com/" // base IRI to resolve relative IRIs in the update
})
```
#### `Store.prototype.load(String data, String format, NamedNode|String? baseIRI, NamedNode|BlankNode|DefaultGraph? toNamedGraph)`
Loads serialized RDF triples or quad into the store. Loads serialized RDF triples or quad into the store.
The method arguments are: The method arguments are:
1. `data`: the serialized RDF triples or quads. 1. `data`: the serialized RDF triples or quads.
2. `format`: the format of the serialization. See below for the supported formats. 2. `mimeType`: the MIME type of the serialization. See below for the supported mime types.
3. `baseIRI`: the base IRI to use to resolve the relative IRIs in the serialization. 3. `baseIRI`: the base IRI to use to resolve the relative IRIs in the serialization.
4. `toNamedGraph`: for triple serialization formats, the name of the named graph the triple should be loaded to. 4. `toNamedGraph`: for triple serialization formats, the name of the named graph the triple should be loaded to.
The available formats are: The available formats are:
* [Turtle](https://www.w3.org/TR/turtle/): `text/turtle` or `ttl` * [Turtle](https://www.w3.org/TR/turtle/): `text/turtle`
* [TriG](https://www.w3.org/TR/trig/): `application/trig` or `trig` * [TriG](https://www.w3.org/TR/trig/): `application/trig`
* [N-Triples](https://www.w3.org/TR/n-triples/): `application/n-triples` or `nt` * [N-Triples](https://www.w3.org/TR/n-triples/): `application/n-triples`
* [N-Quads](https://www.w3.org/TR/n-quads/): `application/n-quads` or `nq` * [N-Quads](https://www.w3.org/TR/n-quads/): `application/n-quads`
* [N3](https://w3c.github.io/N3/spec/): `text/n3` or `n3` * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/): `application/rdf+xml`
* [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/): `application/rdf+xml` or `rdf`
Example of loading a Turtle file into the named graph `<http://example.com/graph>` with the base IRI `http://example.com`: Example of loading a Turtle file into the named graph `<http://example.com/graph>` with the base IRI `http://example.com`:
```js ```js
store.load("<http://example.com> <http://example.com> <> .", "text/turtle", "http://example.com", oxigraph.namedNode("http://example.com/graph")); store.load("<http://example.com> <http://example.com> <> .", "text/turtle", "http://example.com", oxigraph.namedNode("http://example.com/graph"));
``` ```
#### `Store.prototype.dump(String format, NamedNode|BlankNode|DefaultGraph? fromNamedGraph)` #### `Store.prototype.dump(String mimeType, NamedNode|BlankNode|DefaultGraph? fromNamedGraph)`
Returns serialized RDF triples or quad from the store. Returns serialized RDF triples or quad from the store.
The method arguments are: The method arguments are:
1. `format`: the format type of the serialization. See below for the supported types. 1. `mimeType`: the MIME type of the serialization. See below for the supported mime types.
2. `fromNamedGraph`: for triple serialization formats, the name of the named graph the triple should be loaded from. 2. `fromNamedGraph`: for triple serialization formats, the name of the named graph the triple should be loaded from.
The available formats are: The available formats are:
* [Turtle](https://www.w3.org/TR/turtle/): `text/turtle` or `ttl` * [Turtle](https://www.w3.org/TR/turtle/): `text/turtle`
* [TriG](https://www.w3.org/TR/trig/): `application/trig` or `trig` * [TriG](https://www.w3.org/TR/trig/): `application/trig`
* [N-Triples](https://www.w3.org/TR/n-triples/): `application/n-triples` or `nt` * [N-Triples](https://www.w3.org/TR/n-triples/): `application/n-triples`
* [N-Quads](https://www.w3.org/TR/n-quads/): `application/n-quads` or `nq` * [N-Quads](https://www.w3.org/TR/n-quads/): `application/n-quads`
* [N3](https://w3c.github.io/N3/spec/): `text/n3` or `n3` * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/): `application/rdf+xml`
* [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/): `application/rdf+xml` or `rdf`
Example of building a Turtle file from the named graph `<http://example.com/graph>`: Example of building a Turtle file from the named graph `<http://example.com/graph>`:
```js ```js
@ -264,8 +242,7 @@ store.dump("text/turtle", oxigraph.namedNode("http://example.com/graph"));
### From 0.2 to 0.3 ### From 0.2 to 0.3
* The `MemoryStore` class is now called `Store` (there is no other kind of stores...). * The `MemoryStore` class is now called `Store` (there is no other kind of stores...).
* RDF/JS datamodel functions (`namedNode`...) are now available at the root of the `oxigraph` package. You now need to call `oxigraph.namedNode` instead of `store.dataFactory.namedNode`. * RDF/JS datamodel functions (`namedNode`...) are now available at the root of the `oxigraph` package. You now need to call `oxigraph.namedNode` instead of `store.dataFactory.namedNode`.
* [RDF-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html) is now implemented. `Quad` is now a valid value for the `Ωuad` `subject` and `object` properties. * [RDF-star](https://w3c.github.io/rdf-star/cg-spec) is now implemented. `Quad` is now a valid value for the `Ωuad` `subject` and `object` properties.
## How to contribute ## How to contribute
@ -273,17 +250,7 @@ The Oxigraph bindings are written in Rust using [the Rust WASM toolkit](https://
The [The Rust Wasm Book](https://rustwasm.github.io/docs/book/) is a great tutorial to get started. The [The Rust Wasm Book](https://rustwasm.github.io/docs/book/) is a great tutorial to get started.
To setup a dev environment: To run the tests of the JS bindings written in JS run `npm test`.
- ensure to have a Rust toolchain with `rustup` and `cargo` installed ([possible instructions](https://www.rust-lang.org/tools/install)).
- install [`wasm-pack`](https://rustwasm.github.io/wasm-pack/): `cargo install wasm-pack` (it is also in some Linux distribution repositories).
- `npm install` to install pure JS dependencies.
- you are good to go!
Testing and linting:
- Rust code is formatted with [rustfmt](https://github.com/rust-lang/rustfmt) and linted with [clippy](https://github.com/rust-lang/rust-clippy).
You can execute them with `cargo fmt` and `cargo clippy`.
- JS code is formatted and linted with [Biome](https://biomejs.dev/). `npm run fmt` to auto-format and `npm test` to lint and test.
- Tests are written in JavaScript using [Mocha](https://mochajs.org/) in the `test` directory. `npm test` to run them.
## License ## License

@ -1,14 +0,0 @@
{
"$schema": "https://biomejs.dev/schemas/1.0.0/schema.json",
"formatter": {
"indentStyle": "space",
"indentWidth": 4,
"lineWidth": 100
},
"linter": {
"ignore": ["pkg"]
},
"organizeImports": {
"enabled": true
}
}

@ -1,19 +1,31 @@
#! /usr/bin/env node #! /usr/bin/env node
const fs = require("node:fs"); const fs = require('fs')
const pkg = JSON.parse(fs.readFileSync("./pkg/package.json"));
pkg.name = "oxigraph"; // We copy file to the new directory
pkg.main = "node.js"; fs.mkdirSync('pkg')
pkg.browser = "web.js"; for (const file of fs.readdirSync('./pkg-web')) {
pkg.files = ["*.{js,wasm,d.ts}"]; fs.copyFileSync('./pkg-web/' + file, './pkg/' + file)
pkg.homepage = "https://github.com/oxigraph/oxigraph/tree/main/js"; }
for (const file of fs.readdirSync('./pkg-node')) {
fs.copyFileSync('./pkg-node/' + file, './pkg/' + file)
}
const pkg = JSON.parse(fs.readFileSync('./pkg/package.json'))
pkg.name = 'oxigraph'
pkg.main = 'node.js'
pkg.browser = 'web.js'
pkg.files = [
'*.{js,wasm,d.ts}'
]
pkg.homepage = 'https://github.com/oxigraph/oxigraph/tree/main/js'
pkg.bugs = { pkg.bugs = {
url: "https://github.com/oxigraph/oxigraph/issues", url: 'https://github.com/oxigraph/oxigraph/issues'
}; }
pkg.collaborators = undefined; pkg.collaborators = undefined
pkg.repository = { pkg.repository = {
type: "git", type: 'git',
url: "https://github.com/oxigraph/oxigraph.git", url: 'https://github.com/oxigraph/oxigraph.git',
directory: "js", directory: 'js'
}; }
fs.writeFileSync("./pkg/package.json", JSON.stringify(pkg, null, 2)); fs.writeFileSync('./pkg/package.json', JSON.stringify(pkg, null, 2))

1027
js/package-lock.json generated

File diff suppressed because it is too large Load Diff

@ -1,20 +1,21 @@
{ {
"name": "oxigraph_tests", "name": "oxigraph_tests",
"description": "Oxigraph JS build and tests", "description": "Oxigraph JS build and tests",
"private": true, "private": true,
"devDependencies": { "devDependencies": {
"@biomejs/biome": "^1.0.0", "mocha": "^10.0.0",
"@rdfjs/data-model": "^2.0.1", "@rdfjs/data-model": "^2.0.1",
"mocha": "^10.0.0" "standard": "^17.0.0"
}, },
"scripts": { "scripts": {
"fmt": "biome format . --write && biome check . --apply-unsafe && biome format . --write", "test": "standard && wasm-pack build --debug --target nodejs && mocha",
"test": "biome ci . && wasm-pack build --debug --target nodejs --weak-refs --reference-types && mocha", "build": "rm -rf pkg && wasm-pack build --release --target web --out-name web && mv pkg pkg-web && wasm-pack build --release --target nodejs --out-name node && mv pkg pkg-node && node build_package.js && rm -r pkg-web && rm -r pkg-node",
"build": "wasm-pack build --release --target web --out-name web --weak-refs --reference-types && wasm-pack build --release --target nodejs --out-name node --weak-refs --reference-types && node build_package.js", "release": "npm run build && npm publish ./pkg",
"release": "npm run build && npm publish ./pkg", "pack": "npm run build && npm pack ./pkg"
"pack": "npm run build && npm pack ./pkg" },
}, "standard": {
"standard": { "ignore": [
"ignore": ["pkg*"] "pkg*"
} ]
}
} }

@ -1,4 +1,4 @@
#![allow(clippy::mem_forget)] #![allow(clippy::unused_unit)]
use wasm_bindgen::prelude::*; use wasm_bindgen::prelude::*;
mod model; mod model;

@ -1,9 +1,10 @@
#![allow(dead_code, clippy::inherent_to_string, clippy::unused_self)] #![allow(dead_code, clippy::inherent_to_string)]
use crate::format_err; use crate::format_err;
use crate::utils::to_err; use crate::utils::to_err;
use js_sys::{Reflect, UriError}; use js_sys::{Reflect, UriError};
use oxigraph::model::*; use oxigraph::model::*;
use oxigraph::sparql::Variable;
use wasm_bindgen::prelude::*; use wasm_bindgen::prelude::*;
thread_local! { thread_local! {
@ -13,7 +14,7 @@ thread_local! {
#[wasm_bindgen(js_name = namedNode)] #[wasm_bindgen(js_name = namedNode)]
pub fn named_node(value: String) -> Result<JsNamedNode, JsValue> { pub fn named_node(value: String) -> Result<JsNamedNode, JsValue> {
NamedNode::new(value) NamedNode::new(value)
.map(Into::into) .map(|v| v.into())
.map_err(|v| UriError::new(&v.to_string()).into()) .map_err(|v| UriError::new(&v.to_string()).into())
} }
@ -50,7 +51,7 @@ pub fn literal(
#[wasm_bindgen(js_name = defaultGraph)] #[wasm_bindgen(js_name = defaultGraph)]
pub fn default_graph() -> JsDefaultGraph { pub fn default_graph() -> JsDefaultGraph {
JsDefaultGraph JsDefaultGraph {}
} }
#[wasm_bindgen(js_name = variable)] #[wasm_bindgen(js_name = variable)]
@ -296,7 +297,7 @@ impl From<JsLiteral> for Term {
#[wasm_bindgen(js_name = DefaultGraph)] #[wasm_bindgen(js_name = DefaultGraph)]
#[derive(Eq, PartialEq, Debug, Clone, Hash)] #[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct JsDefaultGraph; pub struct JsDefaultGraph {}
#[wasm_bindgen(js_class = DefaultGraph)] #[wasm_bindgen(js_class = DefaultGraph)]
impl JsDefaultGraph { impl JsDefaultGraph {
@ -307,12 +308,12 @@ impl JsDefaultGraph {
#[wasm_bindgen(getter)] #[wasm_bindgen(getter)]
pub fn value(&self) -> String { pub fn value(&self) -> String {
String::new() "".to_owned()
} }
#[wasm_bindgen(js_name = toString)] #[wasm_bindgen(js_name = toString)]
pub fn to_string(&self) -> String { pub fn to_string(&self) -> String {
"DEFAULT".to_owned() "DEFAULT".to_string()
} }
pub fn equals(&self, other: &JsValue) -> bool { pub fn equals(&self, other: &JsValue) -> bool {
@ -387,7 +388,7 @@ impl JsQuad {
#[wasm_bindgen(getter)] #[wasm_bindgen(getter)]
pub fn value(&self) -> String { pub fn value(&self) -> String {
String::new() "".to_owned()
} }
#[wasm_bindgen(getter = subject)] #[wasm_bindgen(getter = subject)]
@ -475,19 +476,19 @@ impl From<JsTerm> for JsValue {
impl From<NamedNode> for JsTerm { impl From<NamedNode> for JsTerm {
fn from(node: NamedNode) -> Self { fn from(node: NamedNode) -> Self {
Self::NamedNode(node.into()) JsTerm::NamedNode(node.into())
} }
} }
impl From<BlankNode> for JsTerm { impl From<BlankNode> for JsTerm {
fn from(node: BlankNode) -> Self { fn from(node: BlankNode) -> Self {
Self::BlankNode(node.into()) JsTerm::BlankNode(node.into())
} }
} }
impl From<Literal> for JsTerm { impl From<Literal> for JsTerm {
fn from(literal: Literal) -> Self { fn from(literal: Literal) -> Self {
Self::Literal(literal.into()) JsTerm::Literal(literal.into())
} }
} }
@ -526,20 +527,20 @@ impl From<GraphName> for JsTerm {
match name { match name {
GraphName::NamedNode(node) => node.into(), GraphName::NamedNode(node) => node.into(),
GraphName::BlankNode(node) => node.into(), GraphName::BlankNode(node) => node.into(),
GraphName::DefaultGraph => Self::DefaultGraph(JsDefaultGraph), GraphName::DefaultGraph => JsTerm::DefaultGraph(JsDefaultGraph {}),
} }
} }
} }
impl From<Variable> for JsTerm { impl From<Variable> for JsTerm {
fn from(variable: Variable) -> Self { fn from(variable: Variable) -> Self {
Self::Variable(variable.into()) JsTerm::Variable(variable.into())
} }
} }
impl From<Triple> for JsTerm { impl From<Triple> for JsTerm {
fn from(triple: Triple) -> Self { fn from(triple: Triple) -> Self {
Self::Quad(triple.into()) JsTerm::Quad(triple.into())
} }
} }
@ -551,14 +552,14 @@ impl From<Box<Triple>> for JsTerm {
impl From<Quad> for JsTerm { impl From<Quad> for JsTerm {
fn from(quad: Quad) -> Self { fn from(quad: Quad) -> Self {
Self::Quad(quad.into()) JsTerm::Quad(quad.into())
} }
} }
impl TryFrom<JsTerm> for NamedNode { impl TryFrom<JsTerm> for NamedNode {
type Error = JsValue; type Error = JsValue;
fn try_from(value: JsTerm) -> Result<Self, Self::Error> { fn try_from(value: JsTerm) -> Result<Self, JsValue> {
match value { match value {
JsTerm::NamedNode(node) => Ok(node.into()), JsTerm::NamedNode(node) => Ok(node.into()),
JsTerm::BlankNode(node) => Err(format_err!( JsTerm::BlankNode(node) => Err(format_err!(
@ -582,7 +583,7 @@ impl TryFrom<JsTerm> for NamedNode {
impl TryFrom<JsTerm> for NamedOrBlankNode { impl TryFrom<JsTerm> for NamedOrBlankNode {
type Error = JsValue; type Error = JsValue;
fn try_from(value: JsTerm) -> Result<Self, Self::Error> { fn try_from(value: JsTerm) -> Result<Self, JsValue> {
match value { match value {
JsTerm::NamedNode(node) => Ok(node.into()), JsTerm::NamedNode(node) => Ok(node.into()),
JsTerm::BlankNode(node) => Ok(node.into()), JsTerm::BlankNode(node) => Ok(node.into()),
@ -608,7 +609,7 @@ impl TryFrom<JsTerm> for NamedOrBlankNode {
impl TryFrom<JsTerm> for Subject { impl TryFrom<JsTerm> for Subject {
type Error = JsValue; type Error = JsValue;
fn try_from(value: JsTerm) -> Result<Self, Self::Error> { fn try_from(value: JsTerm) -> Result<Self, JsValue> {
match value { match value {
JsTerm::NamedNode(node) => Ok(node.into()), JsTerm::NamedNode(node) => Ok(node.into()),
JsTerm::BlankNode(node) => Ok(node.into()), JsTerm::BlankNode(node) => Ok(node.into()),
@ -631,7 +632,7 @@ impl TryFrom<JsTerm> for Subject {
impl TryFrom<JsTerm> for Term { impl TryFrom<JsTerm> for Term {
type Error = JsValue; type Error = JsValue;
fn try_from(value: JsTerm) -> Result<Self, Self::Error> { fn try_from(value: JsTerm) -> Result<Self, JsValue> {
match value { match value {
JsTerm::NamedNode(node) => Ok(node.into()), JsTerm::NamedNode(node) => Ok(node.into()),
JsTerm::BlankNode(node) => Ok(node.into()), JsTerm::BlankNode(node) => Ok(node.into()),
@ -651,7 +652,7 @@ impl TryFrom<JsTerm> for Term {
impl TryFrom<JsTerm> for GraphName { impl TryFrom<JsTerm> for GraphName {
type Error = JsValue; type Error = JsValue;
fn try_from(value: JsTerm) -> Result<Self, Self::Error> { fn try_from(value: JsTerm) -> Result<Self, JsValue> {
match value { match value {
JsTerm::NamedNode(node) => Ok(node.into()), JsTerm::NamedNode(node) => Ok(node.into()),
JsTerm::BlankNode(node) => Ok(node.into()), JsTerm::BlankNode(node) => Ok(node.into()),
@ -659,7 +660,7 @@ impl TryFrom<JsTerm> for GraphName {
"The literal {} is not a possible graph name", "The literal {} is not a possible graph name",
literal.inner literal.inner
)), )),
JsTerm::DefaultGraph(_) => Ok(Self::DefaultGraph), JsTerm::DefaultGraph(_) => Ok(GraphName::DefaultGraph),
JsTerm::Variable(variable) => Err(format_err!( JsTerm::Variable(variable) => Err(format_err!(
"The variable {} is not a possible RDF term", "The variable {} is not a possible RDF term",
variable.inner variable.inner
@ -711,7 +712,7 @@ impl FromJsConverter {
.map_err(|v| UriError::new(&v.to_string()))? .map_err(|v| UriError::new(&v.to_string()))?
.into()), .into()),
"BlankNode" => Ok(BlankNode::new( "BlankNode" => Ok(BlankNode::new(
Reflect::get(value, &self.value)? &Reflect::get(value, &self.value)?
.as_string() .as_string()
.ok_or_else(|| format_err!("BlankNode should have a string value"))?, .ok_or_else(|| format_err!("BlankNode should have a string value"))?,
) )
@ -738,9 +739,9 @@ impl FromJsConverter {
)) ))
} }
} }
"DefaultGraph" => Ok(JsTerm::DefaultGraph(JsDefaultGraph)), "DefaultGraph" => Ok(JsTerm::DefaultGraph(JsDefaultGraph {})),
"Variable" => Ok(Variable::new( "Variable" => Ok(Variable::new(
Reflect::get(value, &self.value)? &Reflect::get(value, &self.value)?
.as_string() .as_string()
.ok_or_else(|| format_err!("Variable should have a string value"))?, .ok_or_else(|| format_err!("Variable should have a string value"))?,
) )
@ -748,7 +749,8 @@ impl FromJsConverter {
.into()), .into()),
"Quad" => Ok(self.to_quad(value)?.into()), "Quad" => Ok(self.to_quad(value)?.into()),
_ => Err(format_err!( _ => Err(format_err!(
"The termType {term_type} is not supported by Oxigraph" "The termType {} is not supported by Oxigraph",
term_type
)), )),
} }
} else if term_type.is_undefined() { } else if term_type.is_undefined() {

@ -1,11 +1,12 @@
use crate::format_err; use crate::format_err;
use crate::model::*; use crate::model::*;
use crate::utils::to_err; use crate::utils::to_err;
use js_sys::{Array, Map, Reflect}; use js_sys::{Array, Map};
use oxigraph::io::{RdfFormat, RdfParser}; use oxigraph::io::{DatasetFormat, GraphFormat};
use oxigraph::model::*; use oxigraph::model::*;
use oxigraph::sparql::{Query, QueryResults, Update}; use oxigraph::sparql::QueryResults;
use oxigraph::store::Store; use oxigraph::store::Store;
use std::io::Cursor;
use wasm_bindgen::prelude::*; use wasm_bindgen::prelude::*;
#[wasm_bindgen(js_name = Store)] #[wasm_bindgen(js_name = Store)]
@ -16,7 +17,6 @@ pub struct JsStore {
#[wasm_bindgen(js_class = Store)] #[wasm_bindgen(js_class = Store)]
impl JsStore { impl JsStore {
#[wasm_bindgen(constructor)] #[wasm_bindgen(constructor)]
#[allow(clippy::use_self)]
pub fn new(quads: Option<Box<[JsValue]>>) -> Result<JsStore, JsValue> { pub fn new(quads: Option<Box<[JsValue]>>) -> Result<JsStore, JsValue> {
console_error_panic_hook::set_once(); console_error_panic_hook::set_once();
@ -24,7 +24,7 @@ impl JsStore {
store: Store::new().map_err(to_err)?, store: Store::new().map_err(to_err)?,
}; };
if let Some(quads) = quads { if let Some(quads) = quads {
for quad in &*quads { for quad in quads.iter() {
store.add(quad)?; store.add(quad)?;
} }
} }
@ -73,28 +73,28 @@ impl JsStore {
None None
} }
.as_ref() .as_ref()
.map(<&Subject>::into), .map(|t: &NamedOrBlankNode| t.into()),
if let Some(predicate) = FROM_JS.with(|c| c.to_optional_term(predicate))? { if let Some(predicate) = FROM_JS.with(|c| c.to_optional_term(predicate))? {
Some(NamedNode::try_from(predicate)?) Some(NamedNode::try_from(predicate)?)
} else { } else {
None None
} }
.as_ref() .as_ref()
.map(<&NamedNode>::into), .map(|t: &NamedNode| t.into()),
if let Some(object) = FROM_JS.with(|c| c.to_optional_term(object))? { if let Some(object) = FROM_JS.with(|c| c.to_optional_term(object))? {
Some(object.try_into()?) Some(object.try_into()?)
} else { } else {
None None
} }
.as_ref() .as_ref()
.map(<&Term>::into), .map(|t: &Term| t.into()),
if let Some(graph_name) = FROM_JS.with(|c| c.to_optional_term(graph_name))? { if let Some(graph_name) = FROM_JS.with(|c| c.to_optional_term(graph_name))? {
Some(graph_name.try_into()?) Some(graph_name.try_into()?)
} else { } else {
None None
} }
.as_ref() .as_ref()
.map(<&GraphName>::into), .map(|t: &GraphName| t.into()),
) )
.map(|v| v.map(|v| JsQuad::from(v).into())) .map(|v| v.map(|v| JsQuad::from(v).into()))
.collect::<Result<Vec<_>, _>>() .collect::<Result<Vec<_>, _>>()
@ -102,21 +102,7 @@ impl JsStore {
.into_boxed_slice()) .into_boxed_slice())
} }
pub fn query(&self, query: &str, options: &JsValue) -> Result<JsValue, JsValue> { pub fn query(&self, query: &str) -> Result<JsValue, JsValue> {
// Parsing options
let mut base_iri = None;
let mut use_default_graph_as_union = false;
if !options.is_undefined() {
base_iri = Reflect::get(options, &JsValue::from_str("base_iri"))?.as_string();
use_default_graph_as_union =
Reflect::get(options, &JsValue::from_str("use_default_graph_as_union"))?
.is_truthy();
}
let mut query = Query::parse(query, base_iri.as_deref()).map_err(to_err)?;
if use_default_graph_as_union {
query.dataset_mut().set_default_graph_as_union();
}
let results = self.store.query(query).map_err(to_err)?; let results = self.store.query(query).map_err(to_err)?;
let output = match results { let output = match results {
QueryResults::Solutions(solutions) => { QueryResults::Solutions(solutions) => {
@ -149,25 +135,17 @@ impl JsStore {
Ok(output) Ok(output)
} }
pub fn update(&self, update: &str, options: &JsValue) -> Result<(), JsValue> { pub fn update(&self, update: &str) -> Result<(), JsValue> {
// Parsing options
let mut base_iri = None;
if !options.is_undefined() {
base_iri = Reflect::get(options, &JsValue::from_str("base_iri"))?.as_string();
}
let update = Update::parse(update, base_iri.as_deref()).map_err(to_err)?;
self.store.update(update).map_err(to_err) self.store.update(update).map_err(to_err)
} }
pub fn load( pub fn load(
&self, &self,
data: &str, data: &str,
format: &str, mime_type: &str,
base_iri: &JsValue, base_iri: &JsValue,
to_graph_name: &JsValue, to_graph_name: &JsValue,
) -> Result<(), JsValue> { ) -> Result<(), JsValue> {
let format = rdf_format(format)?;
let base_iri = if base_iri.is_null() || base_iri.is_undefined() { let base_iri = if base_iri.is_null() || base_iri.is_undefined() {
None None
} else if base_iri.is_string() { } else if base_iri.is_string() {
@ -180,41 +158,65 @@ impl JsStore {
)); ));
}; };
let mut parser = RdfParser::from_format(format); let to_graph_name =
if let Some(to_graph_name) = FROM_JS.with(|c| c.to_optional_term(to_graph_name))? { if let Some(graph_name) = FROM_JS.with(|c| c.to_optional_term(to_graph_name))? {
parser = parser.with_default_graph(GraphName::try_from(to_graph_name)?); Some(graph_name.try_into()?)
} } else {
if let Some(base_iri) = base_iri { None
parser = parser.with_base_iri(base_iri).map_err(to_err)?; };
if let Some(graph_format) = GraphFormat::from_media_type(mime_type) {
self.store
.load_graph(
Cursor::new(data),
graph_format,
&to_graph_name.unwrap_or(GraphName::DefaultGraph),
base_iri.as_deref(),
)
.map_err(to_err)
} else if let Some(dataset_format) = DatasetFormat::from_media_type(mime_type) {
if to_graph_name.is_some() {
return Err(format_err!(
"The target graph name parameter is not available for dataset formats"
));
}
self.store
.load_dataset(Cursor::new(data), dataset_format, base_iri.as_deref())
.map_err(to_err)
} else {
Err(format_err!("Not supported MIME type: {}", mime_type))
} }
self.store
.load_from_read(parser, data.as_bytes())
.map_err(to_err)
} }
pub fn dump(&self, format: &str, from_graph_name: &JsValue) -> Result<String, JsValue> { pub fn dump(&self, mime_type: &str, from_graph_name: &JsValue) -> Result<String, JsValue> {
let format = rdf_format(format)?; let from_graph_name =
let buffer = if let Some(graph_name) = FROM_JS.with(|c| c.to_optional_term(from_graph_name))? {
if let Some(from_graph_name) = FROM_JS.with(|c| c.to_optional_term(from_graph_name))? { Some(graph_name.try_into()?)
self.store.dump_graph_to_write(
&GraphName::try_from(from_graph_name)?,
format,
Vec::new(),
)
} else { } else {
self.store.dump_to_write(format, Vec::new()) None
};
let mut buffer = Vec::new();
if let Some(graph_format) = GraphFormat::from_media_type(mime_type) {
self.store
.dump_graph(
&mut buffer,
graph_format,
&from_graph_name.unwrap_or(GraphName::DefaultGraph),
)
.map_err(to_err)?;
} else if let Some(dataset_format) = DatasetFormat::from_media_type(mime_type) {
if from_graph_name.is_some() {
return Err(format_err!(
"The target graph name parameter is not available for dataset formats"
));
} }
.map_err(to_err)?; self.store
.dump_dataset(&mut buffer, dataset_format)
.map_err(to_err)?;
} else {
return Err(format_err!("Not supported MIME type: {}", mime_type));
}
String::from_utf8(buffer).map_err(to_err) String::from_utf8(buffer).map_err(to_err)
} }
} }
fn rdf_format(format: &str) -> Result<RdfFormat, JsValue> {
if format.contains('/') {
RdfFormat::from_media_type(format)
.ok_or_else(|| format_err!("Not supported RDF format media type: {format}"))
} else {
RdfFormat::from_extension(format)
.ok_or_else(|| format_err!("Not supported RDF format extension: {format}"))
}
}

@ -11,7 +11,6 @@ macro_rules! format_err {
}; };
} }
#[allow(clippy::needless_pass_by_value)]
pub fn to_err(e: impl ToString) -> JsValue { pub fn to_err(e: impl ToString) -> JsValue {
JsValue::from(Error::new(&e.to_string())) JsValue::from(Error::new(&e.to_string()))
} }

@ -1,52 +1,38 @@
/* global describe, it */ /* global describe, it */
import assert from "node:assert"; import oxigraph from '../pkg/oxigraph.js'
import runTests from "../node_modules/@rdfjs/data-model/test/index.js"; import assert from 'assert'
import oxigraph from "../pkg/oxigraph.js"; import runTests from '../node_modules/@rdfjs/data-model/test/index.js'
runTests({ factory: oxigraph }); runTests({ factory: oxigraph })
describe("DataModel", () => { describe('DataModel', function () {
describe("#toString()", () => { describe('#toString()', function () {
it("namedNode().toString() should return SPARQL compatible syntax", () => { it('namedNode().toString() should return SPARQL compatible syntax', function () {
assert.strictEqual( assert.strictEqual('<http://example.com>', oxigraph.namedNode('http://example.com').toString())
"<http://example.com>", })
oxigraph.namedNode("http://example.com").toString(),
); it('blankNode().toString() should return SPARQL compatible syntax', function () {
}); assert.strictEqual('_:a', oxigraph.blankNode('a').toString())
})
it("blankNode().toString() should return SPARQL compatible syntax", () => {
assert.strictEqual("_:a", oxigraph.blankNode("a").toString()); it('literal().toString() should return SPARQL compatible syntax', function () {
}); assert.strictEqual('"a\\"b"@en', oxigraph.literal('a"b', 'en').toString())
})
it("literal().toString() should return SPARQL compatible syntax", () => {
assert.strictEqual('"a\\"b"@en', oxigraph.literal('a"b', "en").toString()); it('defaultGraph().toString() should return SPARQL compatible syntax', function () {
}); assert.strictEqual('DEFAULT', oxigraph.defaultGraph().toString())
})
it("defaultGraph().toString() should return SPARQL compatible syntax", () => {
assert.strictEqual("DEFAULT", oxigraph.defaultGraph().toString()); it('variable().toString() should return SPARQL compatible syntax', function () {
}); assert.strictEqual('?a', oxigraph.variable('a').toString())
})
it("variable().toString() should return SPARQL compatible syntax", () => {
assert.strictEqual("?a", oxigraph.variable("a").toString()); it('quad().toString() should return SPARQL compatible syntax', function () {
}); assert.strictEqual(
'<http://example.com/s> <http://example.com/p> <<<http://example.com/s1> <http://example.com/p1> <http://example.com/o1>>> <http://example.com/g>',
it("quad().toString() should return SPARQL compatible syntax", () => { oxigraph.quad(oxigraph.namedNode('http://example.com/s'), oxigraph.namedNode('http://example.com/p'), oxigraph.quad(oxigraph.namedNode('http://example.com/s1'), oxigraph.namedNode('http://example.com/p1'), oxigraph.namedNode('http://example.com/o1')), oxigraph.namedNode('http://example.com/g')).toString()
assert.strictEqual( )
"<http://example.com/s> <http://example.com/p> <<<http://example.com/s1> <http://example.com/p1> <http://example.com/o1>>> <http://example.com/g>", })
oxigraph })
.quad( })
oxigraph.namedNode("http://example.com/s"),
oxigraph.namedNode("http://example.com/p"),
oxigraph.quad(
oxigraph.namedNode("http://example.com/s1"),
oxigraph.namedNode("http://example.com/p1"),
oxigraph.namedNode("http://example.com/o1"),
),
oxigraph.namedNode("http://example.com/g"),
)
.toString(),
);
});
});
});

@ -1,208 +1,161 @@
/* global describe, it */ /* global describe, it */
import assert from "node:assert"; import { Store } from '../pkg/oxigraph.js'
import dataModel from "@rdfjs/data-model"; import assert from 'assert'
import { Store } from "../pkg/oxigraph.js"; import dataModel from '@rdfjs/data-model'
const ex = dataModel.namedNode("http://example.com"); const ex = dataModel.namedNode('http://example.com')
const triple = dataModel.quad( const triple = dataModel.quad(
dataModel.blankNode("s"), dataModel.blankNode('s'),
dataModel.namedNode("http://example.com/p"), dataModel.namedNode('http://example.com/p'),
dataModel.literal("o"), dataModel.literal('o')
); )
describe("Store", () => { describe('Store', function () {
describe("#add()", () => { describe('#add()', function () {
it("an added quad should be in the store", () => { it('an added quad should be in the store', function () {
const store = new Store(); const store = new Store()
store.add(dataModel.quad(ex, ex, triple)); store.add(dataModel.quad(ex, ex, triple))
assert(store.has(dataModel.quad(ex, ex, triple))); assert(store.has(dataModel.quad(ex, ex, triple)))
}); })
}); })
describe("#delete()", () => { describe('#delete()', function () {
it("an removed quad should not be in the store anymore", () => { it('an removed quad should not be in the store anymore', function () {
const store = new Store([dataModel.quad(triple, ex, ex)]); const store = new Store([dataModel.quad(triple, ex, ex)])
assert(store.has(dataModel.quad(triple, ex, ex))); assert(store.has(dataModel.quad(triple, ex, ex)))
store.delete(dataModel.quad(triple, ex, ex)); store.delete(dataModel.quad(triple, ex, ex))
assert(!store.has(dataModel.quad(triple, ex, ex))); assert(!store.has(dataModel.quad(triple, ex, ex)))
}); })
}); })
describe("#has()", () => { describe('#has()', function () {
it("an added quad should be in the store", () => { it('an added quad should be in the store', function () {
const store = new Store([dataModel.quad(ex, ex, ex)]); const store = new Store([dataModel.quad(ex, ex, ex)])
assert(store.has(dataModel.quad(ex, ex, ex))); assert(store.has(dataModel.quad(ex, ex, ex)))
}); })
}); })
describe("#size()", () => { describe('#size()', function () {
it("A store with one quad should have 1 for size", () => { it('A store with one quad should have 1 for size', function () {
const store = new Store([dataModel.quad(ex, ex, ex)]); const store = new Store([dataModel.quad(ex, ex, ex)])
assert.strictEqual(1, store.size); assert.strictEqual(1, store.size)
}); })
}); })
describe("#match_quads()", () => { describe('#match_quads()', function () {
it("blank pattern should return all quads", () => { it('blank pattern should return all quads', function () {
const store = new Store([dataModel.quad(ex, ex, ex)]); const store = new Store([dataModel.quad(ex, ex, ex)])
const results = store.match(); const results = store.match()
assert.strictEqual(1, results.length); assert.strictEqual(1, results.length)
assert(dataModel.quad(ex, ex, ex).equals(results[0])); assert(dataModel.quad(ex, ex, ex).equals(results[0]))
}); })
}); })
describe("#query()", () => { describe('#query()', function () {
it("ASK true", () => { it('ASK true', function () {
const store = new Store([dataModel.quad(ex, ex, ex)]); const store = new Store([dataModel.quad(ex, ex, ex)])
assert.strictEqual(true, store.query("ASK { ?s ?s ?s }")); assert.strictEqual(true, store.query('ASK { ?s ?s ?s }'))
}); })
it("ASK false", () => { it('ASK false', function () {
const store = new Store(); const store = new Store()
assert.strictEqual(false, store.query("ASK { FILTER(false)}")); assert.strictEqual(false, store.query('ASK { FILTER(false)}'))
}); })
it("CONSTRUCT", () => { it('CONSTRUCT', function () {
const store = new Store([dataModel.quad(ex, ex, ex)]); const store = new Store([dataModel.quad(ex, ex, ex)])
const results = store.query("CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }"); const results = store.query('CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }')
assert.strictEqual(1, results.length); assert.strictEqual(1, results.length)
assert(dataModel.quad(ex, ex, ex).equals(results[0])); assert(dataModel.quad(ex, ex, ex).equals(results[0]))
}); })
it("SELECT", () => { it('SELECT', function () {
const store = new Store([dataModel.quad(ex, ex, ex)]); const store = new Store([dataModel.quad(ex, ex, ex)])
const results = store.query("SELECT ?s WHERE { ?s ?p ?o }"); const results = store.query('SELECT ?s WHERE { ?s ?p ?o }')
assert.strictEqual(1, results.length); assert.strictEqual(1, results.length)
assert(ex.equals(results[0].get("s"))); assert(ex.equals(results[0].get('s')))
}); })
it("SELECT with NOW()", () => { it('SELECT with NOW()', function () {
const store = new Store([dataModel.quad(ex, ex, ex)]); const store = new Store([dataModel.quad(ex, ex, ex)])
const results = store.query( const results = store.query('SELECT (YEAR(NOW()) AS ?y) WHERE {}')
"SELECT * WHERE { FILTER(2022 <= YEAR(NOW()) && YEAR(NOW()) <= 2100) }", assert.strictEqual(1, results.length)
); })
assert.strictEqual(1, results.length);
}); it('SELECT with RAND()', function () {
const store = new Store([dataModel.quad(ex, ex, ex)])
it("SELECT with RAND()", () => { const results = store.query('SELECT (RAND() AS ?y) WHERE {}')
const store = new Store([dataModel.quad(ex, ex, ex)]); assert.strictEqual(1, results.length)
const results = store.query("SELECT (RAND() AS ?y) WHERE {}"); })
assert.strictEqual(1, results.length); })
});
describe('#update()', function () {
it("SELECT with base IRI", () => { it('INSERT DATA', function () {
const store = new Store(); const store = new Store()
const results = store.query("SELECT * WHERE { BIND(<t> AS ?t) }", { store.update('INSERT DATA { <http://example.com> <http://example.com> <http://example.com> }')
base_iri: "http://example.com/", assert.strictEqual(1, store.size)
}); })
assert.strictEqual(1, results.length);
}); it('DELETE DATA', function () {
const store = new Store([dataModel.quad(ex, ex, ex)])
it("SELECT with union graph", () => { store.update('DELETE DATA { <http://example.com> <http://example.com> <http://example.com> }')
const store = new Store([dataModel.quad(ex, ex, ex, ex)]); assert.strictEqual(0, store.size)
const results = store.query("SELECT * WHERE { ?s ?p ?o }", { })
use_default_graph_as_union: true,
}); it('DELETE WHERE', function () {
assert.strictEqual(1, results.length); const store = new Store([dataModel.quad(ex, ex, ex)])
}); store.update('DELETE WHERE { ?v ?v ?v }')
}); assert.strictEqual(0, store.size)
})
describe("#update()", () => { })
it("INSERT DATA", () => {
const store = new Store(); describe('#load()', function () {
store.update( it('load NTriples in the default graph', function () {
"INSERT DATA { <http://example.com> <http://example.com> <http://example.com> }", const store = new Store()
); store.load('<http://example.com> <http://example.com> <http://example.com> .', 'application/n-triples')
assert.strictEqual(1, store.size); assert(store.has(dataModel.quad(ex, ex, ex)))
}); })
it("DELETE DATA", () => { it('load NTriples in an other graph', function () {
const store = new Store([dataModel.quad(ex, ex, ex)]); const store = new Store()
store.update( store.load('<http://example.com> <http://example.com> <http://example.com> .', 'application/n-triples', null, ex)
"DELETE DATA { <http://example.com> <http://example.com> <http://example.com> }", assert(store.has(dataModel.quad(ex, ex, ex, ex)))
); })
assert.strictEqual(0, store.size);
}); it('load Turtle with a base IRI', function () {
const store = new Store()
it("DELETE WHERE", () => { store.load('<http://example.com> <http://example.com> <> .', 'text/turtle', 'http://example.com')
const store = new Store([dataModel.quad(ex, ex, ex)]); assert(store.has(dataModel.quad(ex, ex, ex)))
store.update("DELETE WHERE { ?v ?v ?v }"); })
assert.strictEqual(0, store.size);
}); it('load NQuads', function () {
}); const store = new Store()
store.load('<http://example.com> <http://example.com> <http://example.com> <http://example.com> .', 'application/n-quads')
describe("#load()", () => { assert(store.has(dataModel.quad(ex, ex, ex, ex)))
it("load NTriples in the default graph", () => { })
const store = new Store();
store.load( it('load TriG with a base IRI', function () {
"<http://example.com> <http://example.com> <http://example.com> .", const store = new Store()
"application/n-triples", store.load('GRAPH <> { <http://example.com> <http://example.com> <> }', 'application/trig', 'http://example.com')
); assert(store.has(dataModel.quad(ex, ex, ex, ex)))
assert(store.has(dataModel.quad(ex, ex, ex))); })
}); })
it("load NTriples in an other graph", () => { describe('#dump()', function () {
const store = new Store(); it('dump dataset content', function () {
store.load( const store = new Store([dataModel.quad(ex, ex, ex, ex)])
"<http://example.com> <http://example.com> <http://example.com> .", assert.strictEqual('<http://example.com> <http://example.com> <http://example.com> <http://example.com> .\n', store.dump('application/n-quads'))
"application/n-triples", })
null,
ex, it('dump named graph content', function () {
); const store = new Store([dataModel.quad(ex, ex, ex, ex)])
assert(store.has(dataModel.quad(ex, ex, ex, ex))); assert.strictEqual('<http://example.com> <http://example.com> <http://example.com> .\n', store.dump('application/n-triples', ex))
}); })
it("load Turtle with a base IRI", () => { it('dump default graph content', function () {
const store = new Store(); const store = new Store([dataModel.quad(ex, ex, ex, ex)])
store.load( assert.strictEqual('', store.dump('application/n-triples'))
"<http://example.com> <http://example.com> <> .", })
"text/turtle", })
"http://example.com", })
);
assert(store.has(dataModel.quad(ex, ex, ex)));
});
it("load NQuads", () => {
const store = new Store();
store.load(
"<http://example.com> <http://example.com> <http://example.com> <http://example.com> .",
"application/n-quads",
);
assert(store.has(dataModel.quad(ex, ex, ex, ex)));
});
it("load TriG with a base IRI", () => {
const store = new Store();
store.load(
"GRAPH <> { <http://example.com> <http://example.com> <> }",
"application/trig",
"http://example.com",
);
assert(store.has(dataModel.quad(ex, ex, ex, ex)));
});
});
describe("#dump()", () => {
it("dump dataset content", () => {
const store = new Store([dataModel.quad(ex, ex, ex, ex)]);
assert.strictEqual(
"<http://example.com> <http://example.com> <http://example.com> <http://example.com> .\n",
store.dump("application/n-quads"),
);
});
it("dump named graph content", () => {
const store = new Store([dataModel.quad(ex, ex, ex, ex)]);
assert.strictEqual(
"<http://example.com> <http://example.com> <http://example.com> .\n",
store.dump("application/n-triples", ex),
);
});
it("dump default graph content", () => {
const store = new Store([dataModel.quad(ex, ex, ex, ex)]);
assert.strictEqual("", store.dump("application/n-triples", dataModel.defaultGraph()));
});
});
});

@ -0,0 +1,63 @@
[package]
name = "oxigraph"
version = "0.4.0-alpha"
authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license = "MIT OR Apache-2.0"
readme = "README.md"
keywords = ["RDF", "SPARQL", "graph-database", "database"]
categories = ["database-implementations"]
repository = "https://github.com/oxigraph/oxigraph/tree/main/lib"
homepage = "https://oxigraph.org/"
description = """
a SPARQL database and RDF toolkit
"""
edition = "2021"
[package.metadata.docs.rs]
all-features = true
[features]
default = []
http_client = ["oxhttp", "oxhttp/rustls"]
[dependencies]
rand = "0.8"
md-5 = "0.10"
sha-1 = "0.10"
sha2 = "0.10"
digest = "0.10"
regex = "1"
oxilangtag = "0.1"
oxiri = "0.2"
rio_api = "0.7"
rio_turtle = "0.7"
rio_xml = "0.7"
hex = "0.4"
nom = "7"
siphasher = "0.3"
lazy_static = "1"
sysinfo = "0.26"
oxrdf = { version = "0.1.0", path="oxrdf", features = ["rdf-star"] }
spargebra = { version = "0.3.0-alpha", path="spargebra", features = ["rdf-star", "ex-lateral"] }
sparesults = { version = "0.1.1", path="sparesults", features = ["rdf-star"] }
[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
libc = "0.2"
oxrocksdb-sys = { version = "0.3.7", path="../oxrocksdb-sys" }
oxhttp = { version = "0.1", optional = true }
[target.'cfg(target_arch = "wasm32")'.dependencies]
js-sys = "0.3"
getrandom = {version="0.2", features=["js"]}
[dev-dependencies]
criterion = "0.4"
oxhttp = "0.1"
zstd = "0.11"
[target.'cfg(target_arch = "wasm32")'.dev-dependencies]
wasm-bindgen-test = "0.3"
[[bench]]
name = "store"
harness = false

@ -1,13 +1,72 @@
Oxigraph Rust crates Oxigraph
==================== ========
Oxigraph is implemented in Rust. [![Latest Version](https://img.shields.io/crates/v/oxigraph.svg)](https://crates.io/crates/oxigraph)
It is composed on a main library, [`oxigraph`](./oxigraph) and a set of smaller crates used by the `oxigraph` crate: [![Released API docs](https://docs.rs/oxigraph/badge.svg)](https://docs.rs/oxigraph)
* [`oxrdf`](./oxrdf), datastructures encoding RDF basic concepts (the `model` module of the `oxigraph` crate). [![Crates.io downloads](https://img.shields.io/crates/d/oxigraph)](https://crates.io/crates/oxigraph)
* [`oxrdfio`](./oxrdfio), a unified parser and serializer API for RDF formats (the `io` module of the `oxigraph` crate). It itself relies on: [![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
* [`oxttl`](./oxttl), N-Triple, N-Quad, Turtle, TriG and N3 parsing and serialization. [![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
* [`oxrdfxml`](./oxrdfxml), RDF/XML parsing and serialization.
* [`spargebra`](./spargebra), a SPARQL parser. Oxigraph is a graph database library implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard.
* [`sparesults`](./sparesults), parsers and serializers for SPARQL result formats (the `sparql::results` module of the `oxigraph` crate).
* [`sparopt`](./sparesults), a SPARQL optimizer. Its goal is to provide a compliant, safe and fast on-disk graph database.
* [`oxsdatatypes`](./oxsdatatypes), an implementation of some XML Schema datatypes. It also provides a set of utility functions for reading, writing, and processing RDF files.
Oxigraph is in heavy development and SPARQL query evaluation has not been optimized yet.
Oxigraph also provides [a standalone HTTP server](https://crates.io/crates/oxigraph_server) and [a Python library](https://pyoxigraph.readthedocs.io/) based on this library.
Oxigraph implements the following specifications:
* [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/), [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/), and [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/).
* [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/), and [RDF XML](https://www.w3.org/TR/rdf-syntax-grammar/) RDF serialization formats for both data ingestion and retrieval using the [Rio library](https://github.com/oxigraph/rio).
* [SPARQL Query Results XML Format](http://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/).
A preliminary benchmark [is provided](../bench/README.md). Oxigraph internal design [is described on the wiki](https://github.com/oxigraph/oxigraph/wiki/Architecture).
The main entry point of Oxigraph is the [`Store`](store::Store) struct:
```rust
use oxigraph::store::Store;
use oxigraph::model::*;
use oxigraph::sparql::QueryResults;
let store = Store::new().unwrap();
// insertion
let ex = NamedNode::new("http://example.com").unwrap();
let quad = Quad::new(ex.clone(), ex.clone(), ex.clone(), GraphName::DefaultGraph);
store.insert(&quad).unwrap();
// quad filter
let results = store.quads_for_pattern(Some(ex.as_ref().into()), None, None, None).collect::<Result<Vec<Quad>,_>>().unwrap();
assert_eq!(vec![quad], results);
// SPARQL query
if let QueryResults::Solutions(mut solutions) = store.query("SELECT ?s WHERE { ?s ?p ?o }").unwrap() {
assert_eq!(solutions.next().unwrap().unwrap().get("s"), Some(&ex.into()));
}
```
Some parts of this library are available as standalone crates:
* [`oxrdf`](https://crates.io/crates/oxrdf) provides datastructures encoding RDF basic concepts (the `oxigraph::model` module).
* [`spargebra`](https://crates.io/crates/spargebra) provides a SPARQL parser.
* [`sparesults`](https://crates.io/crates/sparesults) provides parsers and serializers for SPARQL result formats.
To build the library, don't forget to clone the submodules using `git clone --recursive https://github.com/oxigraph/oxigraph.git` to clone the repository including submodules or `git submodule update --init` to add submodules to the already cloned repository.
## License
This project is licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or
`<http://www.apache.org/licenses/LICENSE-2.0>`)
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or
`<http://opensource.org/licenses/MIT>`)
at your option.
### Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.

@ -0,0 +1,208 @@
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
use oxhttp::model::{Method, Request, Status};
use oxigraph::io::GraphFormat;
use oxigraph::model::GraphNameRef;
use oxigraph::sparql::{Query, QueryResults, Update};
use oxigraph::store::Store;
use rand::random;
use std::env::temp_dir;
use std::fs::{remove_dir_all, File};
use std::io::{BufRead, BufReader, Cursor, Read};
use std::path::{Path, PathBuf};
fn store_load(c: &mut Criterion) {
{
let mut data = Vec::new();
read_data("explore-1000.nt.zst")
.read_to_end(&mut data)
.unwrap();
let mut group = c.benchmark_group("store load");
group.throughput(Throughput::Bytes(data.len() as u64));
group.sample_size(10);
group.bench_function("load BSBM explore 1000 in memory", |b| {
b.iter(|| {
let store = Store::new().unwrap();
do_load(&store, &data);
})
});
group.bench_function("load BSBM explore 1000 in on disk", |b| {
b.iter(|| {
let path = TempDir::default();
let store = Store::open(&path.0).unwrap();
do_load(&store, &data);
})
});
group.bench_function("load BSBM explore 1000 in on disk with bulk load", |b| {
b.iter(|| {
let path = TempDir::default();
let store = Store::open(&path.0).unwrap();
do_bulk_load(&store, &data);
})
});
}
{
let mut data = Vec::new();
read_data("explore-10000.nt.zst")
.read_to_end(&mut data)
.unwrap();
let mut group = c.benchmark_group("store load large");
group.throughput(Throughput::Bytes(data.len() as u64));
group.sample_size(10);
group.bench_function("load BSBM explore 10000 in on disk with bulk load", |b| {
b.iter(|| {
let path = TempDir::default();
let store = Store::open(&path.0).unwrap();
do_bulk_load(&store, &data);
})
});
}
}
fn do_load(store: &Store, data: &[u8]) {
store
.load_graph(
Cursor::new(&data),
GraphFormat::NTriples,
GraphNameRef::DefaultGraph,
None,
)
.unwrap();
store.optimize().unwrap();
}
fn do_bulk_load(store: &Store, data: &[u8]) {
store
.bulk_loader()
.load_graph(
Cursor::new(&data),
GraphFormat::NTriples,
GraphNameRef::DefaultGraph,
None,
)
.unwrap();
store.optimize().unwrap();
}
fn store_query_and_update(c: &mut Criterion) {
let mut data = Vec::new();
read_data("explore-1000.nt.zst")
.read_to_end(&mut data)
.unwrap();
let operations = read_data("mix-exploreAndUpdate-1000.tsv.zst")
.lines()
.map(|l| {
let l = l.unwrap();
let mut parts = l.trim().split('\t');
let kind = parts.next().unwrap();
let operation = parts.next().unwrap();
match kind {
"query" => Operation::Query(Query::parse(operation, None).unwrap()),
"update" => Operation::Update(Update::parse(operation, None).unwrap()),
_ => panic!("Unexpected operation kind {}", kind),
}
})
.collect::<Vec<_>>();
let query_operations = operations
.iter()
.filter(|o| matches!(o, Operation::Query(_)))
.cloned()
.collect::<Vec<_>>();
let mut group = c.benchmark_group("store operations");
group.throughput(Throughput::Elements(operations.len() as u64));
group.sample_size(10);
{
let memory_store = Store::new().unwrap();
do_bulk_load(&memory_store, &data);
group.bench_function("BSBM explore 1000 query in memory", |b| {
b.iter(|| run_operation(&memory_store, &query_operations))
});
group.bench_function("BSBM explore 1000 queryAndUpdate in memory", |b| {
b.iter(|| run_operation(&memory_store, &operations))
});
}
{
let path = TempDir::default();
let disk_store = Store::open(&path.0).unwrap();
do_bulk_load(&disk_store, &data);
group.bench_function("BSBM explore 1000 query on disk", |b| {
b.iter(|| run_operation(&disk_store, &query_operations))
});
group.bench_function("BSBM explore 1000 queryAndUpdate on disk", |b| {
b.iter(|| run_operation(&disk_store, &operations))
});
}
}
fn run_operation(store: &Store, operations: &[Operation]) {
for operation in operations {
match operation {
Operation::Query(q) => match store.query(q.clone()).unwrap() {
QueryResults::Boolean(_) => (),
QueryResults::Solutions(s) => {
for s in s {
s.unwrap();
}
}
QueryResults::Graph(g) => {
for t in g {
t.unwrap();
}
}
},
Operation::Update(u) => store.update(u.clone()).unwrap(),
}
}
}
criterion_group!(store, store_query_and_update, store_load);
criterion_main!(store);
fn read_data(file: &str) -> impl BufRead {
if !Path::new(file).exists() {
let mut client = oxhttp::Client::new();
client.set_redirection_limit(5);
let url = format!(
"https://github.com/Tpt/bsbm-tools/releases/download/v0.2/{}",
file
);
let request = Request::builder(Method::GET, url.parse().unwrap()).build();
let response = client.request(request).unwrap();
assert_eq!(
response.status(),
Status::OK,
"{}",
response.into_body().to_string().unwrap()
);
std::io::copy(&mut response.into_body(), &mut File::create(file).unwrap()).unwrap();
}
BufReader::new(zstd::Decoder::new(File::open(file).unwrap()).unwrap())
}
#[allow(clippy::large_enum_variant)]
#[derive(Clone)]
enum Operation {
Query(Query),
Update(Update),
}
struct TempDir(PathBuf);
impl Default for TempDir {
fn default() -> Self {
Self(temp_dir().join(format!("oxigraph-bench-{}", random::<u128>())))
}
}
impl Drop for TempDir {
fn drop(&mut self) {
remove_dir_all(&self.0).unwrap()
}
}

@ -1,59 +0,0 @@
[package]
name = "oxigraph"
version.workspace = true
authors.workspace = true
license.workspace = true
readme = "README.md"
keywords = ["RDF", "SPARQL", "graph-database", "database"]
categories = ["database-implementations"]
repository = "https://github.com/oxigraph/oxigraph/tree/main/lib/oxigraph"
homepage = "https://oxigraph.org/"
documentation = "https://docs.rs/oxigraph"
description = """
a SPARQL database and RDF toolkit
"""
edition.workspace = true
rust-version.workspace = true
[features]
js = ["getrandom/js", "oxsdatatypes/js", "js-sys"]
[dependencies]
digest.workspace = true
hex.workspace = true
json-event-parser.workspace = true
md-5.workspace = true
oxilangtag.workspace = true
oxiri.workspace = true
oxrdf = { workspace = true, features = ["rdf-star", "oxsdatatypes"] }
oxrdfio = { workspace = true, features = ["rdf-star"] }
oxsdatatypes.workspace = true
rand.workspace = true
regex.workspace = true
sha1.workspace = true
sha2.workspace = true
siphasher.workspace = true
sparesults = { workspace = true, features = ["rdf-star"] }
spargebra = { workspace = true, features = ["rdf-star", "sep-0002", "sep-0006"] }
sparopt = { workspace = true, features = ["rdf-star", "sep-0002", "sep-0006"] }
thiserror.workspace = true
[target.'cfg(not(target_family = "wasm"))'.dependencies]
libc = "0.2"
rocksdb.workspace = true
[target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies]
getrandom.workspace = true
js-sys = { workspace = true, optional = true }
[target.'cfg(not(target_family = "wasm"))'.dev-dependencies]
codspeed-criterion-compat.workspace = true
zstd.workspace = true
[lints]
workspace = true
[package.metadata.docs.rs]
rustdoc-args = ["--cfg", "docsrs"]

@ -1,82 +0,0 @@
Oxigraph
========
[![Latest Version](https://img.shields.io/crates/v/oxigraph.svg)](https://crates.io/crates/oxigraph)
[![Released API docs](https://docs.rs/oxigraph/badge.svg)](https://docs.rs/oxigraph)
[![Crates.io downloads](https://img.shields.io/crates/d/oxigraph)](https://crates.io/crates/oxigraph)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community)
Oxigraph is a graph database library implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard.
Its goal is to provide a compliant, safe and fast on-disk graph database.
It also provides a set of utility functions for reading, writing, and processing RDF files.
Oxigraph is in heavy development and SPARQL query evaluation has not been optimized yet.
Oxigraph also provides [a CLI tool](https://crates.io/crates/oxigraph-cli) and [a Python library](https://pyoxigraph.readthedocs.io/) based on this library.
Oxigraph implements the following specifications:
* [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/), [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/), and [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/).
* [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/), and [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) RDF serialization formats for both data ingestion and retrieval.
* [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/).
A preliminary benchmark [is provided](../bench/README.md). Oxigraph internal design [is described on the wiki](https://github.com/oxigraph/oxigraph/wiki/Architecture).
The main entry point of Oxigraph is the [`Store`](store::Store) struct:
```rust
use oxigraph::store::Store;
use oxigraph::model::*;
use oxigraph::sparql::QueryResults;
let store = Store::new().unwrap();
// insertion
let ex = NamedNode::new("http://example.com").unwrap();
let quad = Quad::new(ex.clone(), ex.clone(), ex.clone(), GraphName::DefaultGraph);
store.insert(&quad).unwrap();
// quad filter
let results = store.quads_for_pattern(Some(ex.as_ref().into()), None, None, None).collect::<Result<Vec<Quad>,_>>().unwrap();
assert_eq!(vec![quad], results);
// SPARQL query
if let QueryResults::Solutions(mut solutions) = store.query("SELECT ?s WHERE { ?s ?p ?o }").unwrap() {
assert_eq!(solutions.next().unwrap().unwrap().get("s"), Some(&ex.into()));
}
```
It is based on these crates that can be used separately:
* [`oxrdf`](https://crates.io/crates/oxrdf), datastructures encoding RDF basic concepts (the [`oxigraph::model`](crate::model) module).
* [`oxrdfio`](https://crates.io/crates/oxrdfio), a unified parser and serializer API for RDF formats (the [`oxigraph::io`](crate::io) module). It itself relies on:
* [`oxttl`](https://crates.io/crates/oxttl), N-Triple, N-Quad, Turtle, TriG and N3 parsing and serialization.
* [`oxrdfxml`](https://crates.io/crates/oxrdfxml), RDF/XML parsing and serialization.
* [`spargebra`](https://crates.io/crates/spargebra), a SPARQL parser.
* [`sparesults`](https://crates.io/crates/sparesults), parsers and serializers for SPARQL result formats (the [`oxigraph::sparql::results`](crate::sparql::results) module).
* [`sparopt`](https://crates.io/crates/sparesults), a SPARQL optimizer.
* [`oxsdatatypes`](https://crates.io/crates/oxsdatatypes), an implementation of some XML Schema datatypes.
To build the library locally, don't forget to clone the submodules using `git clone --recursive https://github.com/oxigraph/oxigraph.git` to clone the repository including submodules or `git submodule update --init` to add submodules to the already cloned repository.
It is possible to disable the RocksDB storage backend to only use the in-memory fallback by disabling the `rocksdb` default feature:
```toml
oxigraph = { version = "*", default-features = false }
```
This is the default behavior when compiling Oxigraph to WASM.
## License
This project is licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or
`<http://www.apache.org/licenses/LICENSE-2.0>`)
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or
`<http://opensource.org/licenses/MIT>`)
at your option.
### Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.

@ -1,39 +0,0 @@
//! Utilities to read and write RDF graphs and datasets using [OxRDF I/O](https://crates.io/crates/oxrdfio).
//!
//! The entry points of this module are the two [`RdfParser`] and [`RdfSerializer`] structs.
//!
//! Usage example converting a Turtle file to a N-Triples file:
//! ```
//! use oxigraph::io::{RdfFormat, RdfParser, RdfSerializer};
//!
//! let turtle_file = b"@base <http://example.com/> .
//! @prefix schema: <http://schema.org/> .
//! <foo> a schema:Person ;
//! schema:name \"Foo\" .
//! <bar> a schema:Person ;
//! schema:name \"Bar\" .";
//!
//! let ntriples_file = b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
//! <http://example.com/foo> <http://schema.org/name> \"Foo\" .
//! <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
//! <http://example.com/bar> <http://schema.org/name> \"Bar\" .
//! ";
//!
//! let mut writer = RdfSerializer::from_format(RdfFormat::NTriples).serialize_to_write(Vec::new());
//! for quad in RdfParser::from_format(RdfFormat::Turtle).parse_read(turtle_file.as_ref()) {
//! writer.write_quad(&quad.unwrap()).unwrap();
//! }
//! assert_eq!(writer.finish().unwrap(), ntriples_file);
//! ```
mod format;
pub mod read;
pub mod write;
#[allow(deprecated)]
pub use self::format::{DatasetFormat, GraphFormat};
#[allow(deprecated)]
pub use self::read::{DatasetParser, GraphParser};
#[allow(deprecated)]
pub use self::write::{DatasetSerializer, GraphSerializer};
pub use oxrdfio::*;

@ -1,199 +0,0 @@
#![allow(deprecated)]
//! Utilities to read RDF graphs and datasets.
use crate::io::{DatasetFormat, GraphFormat};
use crate::model::*;
use oxrdfio::{FromReadQuadReader, RdfParseError, RdfParser};
use std::io::Read;
/// Parsers for RDF graph serialization formats.
///
/// It currently supports the following formats:
/// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`GraphFormat::NTriples`])
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`GraphFormat::Turtle`])
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`GraphFormat::RdfXml`])
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = GraphParser::from_format(GraphFormat::NTriples);
/// let triples = parser
/// .read_triples(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(triples.len(), 1);
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[deprecated(note = "use RdfParser instead", since = "0.4.0")]
pub struct GraphParser {
inner: RdfParser,
}
impl GraphParser {
/// Builds a parser for the given format.
#[inline]
pub fn from_format(format: GraphFormat) -> Self {
Self {
inner: RdfParser::from_format(format.into())
.without_named_graphs()
.rename_blank_nodes(),
}
}
/// Provides an IRI that could be used to resolve the file relative IRIs.
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphParser};
///
/// let file = "</s> </p> </o> .";
///
/// let parser =
/// GraphParser::from_format(GraphFormat::Turtle).with_base_iri("http://example.com")?;
/// let triples = parser
/// .read_triples(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(triples.len(), 1);
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn with_base_iri(self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
Ok(Self {
inner: self.inner.with_base_iri(base_iri)?,
})
}
/// Executes the parsing itself on a [`Read`] implementation and returns an iterator of triples.
pub fn read_triples<R: Read>(self, reader: R) -> TripleReader<R> {
TripleReader {
parser: self.inner.parse_read(reader),
}
}
}
/// An iterator yielding read triples.
/// Could be built using a [`GraphParser`].
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = GraphParser::from_format(GraphFormat::NTriples);
/// let triples = parser
/// .read_triples(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(triples.len(), 1);
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[must_use]
pub struct TripleReader<R: Read> {
parser: FromReadQuadReader<R>,
}
impl<R: Read> Iterator for TripleReader<R> {
type Item = Result<Triple, RdfParseError>;
fn next(&mut self) -> Option<Self::Item> {
Some(self.parser.next()?.map(Into::into).map_err(Into::into))
}
}
/// A parser for RDF dataset serialization formats.
///
/// It currently supports the following formats:
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`DatasetFormat::NQuads`])
/// * [TriG](https://www.w3.org/TR/trig/) ([`DatasetFormat::TriG`])
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .";
///
/// let parser = DatasetParser::from_format(DatasetFormat::NQuads);
/// let quads = parser.read_quads(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
///
/// assert_eq!(quads.len(), 1);
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[deprecated(note = "use RdfParser instead", since = "0.4.0")]
pub struct DatasetParser {
inner: RdfParser,
}
impl DatasetParser {
/// Builds a parser for the given format.
#[inline]
pub fn from_format(format: DatasetFormat) -> Self {
Self {
inner: RdfParser::from_format(format.into()).rename_blank_nodes(),
}
}
/// Provides an IRI that could be used to resolve the file relative IRIs.
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetParser};
///
/// let file = "<g> { </s> </p> </o> }";
///
/// let parser =
/// DatasetParser::from_format(DatasetFormat::TriG).with_base_iri("http://example.com")?;
/// let triples = parser
/// .read_quads(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(triples.len(), 1);
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn with_base_iri(self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
Ok(Self {
inner: self.inner.with_base_iri(base_iri)?,
})
}
/// Executes the parsing itself on a [`Read`] implementation and returns an iterator of quads.
pub fn read_quads<R: Read>(self, reader: R) -> QuadReader<R> {
QuadReader {
parser: self.inner.parse_read(reader),
}
}
}
/// An iterator yielding read quads.
/// Could be built using a [`DatasetParser`].
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .";
///
/// let parser = DatasetParser::from_format(DatasetFormat::NQuads);
/// let quads = parser.read_quads(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
///
/// assert_eq!(quads.len(), 1);
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[must_use]
pub struct QuadReader<R: Read> {
parser: FromReadQuadReader<R>,
}
impl<R: Read> Iterator for QuadReader<R> {
type Item = Result<Quad, RdfParseError>;
fn next(&mut self) -> Option<Self::Item> {
Some(self.parser.next()?.map_err(Into::into))
}
}

@ -1,185 +0,0 @@
#![allow(deprecated)]
//! Utilities to write RDF graphs and datasets.
use crate::io::{DatasetFormat, GraphFormat};
use crate::model::*;
use oxrdfio::{RdfSerializer, ToWriteQuadWriter};
use std::io::{self, Write};
/// A serializer for RDF graph serialization formats.
///
/// It currently supports the following formats:
/// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`GraphFormat::NTriples`])
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`GraphFormat::Turtle`])
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`GraphFormat::RdfXml`])
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphSerializer};
/// use oxigraph::model::*;
///
/// let mut buffer = Vec::new();
/// let mut writer = GraphSerializer::from_format(GraphFormat::NTriples).triple_writer(&mut buffer);
/// writer.write(&Triple {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// })?;
/// writer.finish()?;
///
/// assert_eq!(
/// buffer.as_slice(),
/// "<http://example.com/s> <http://example.com/p> <http://example.com/o> .\n".as_bytes()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[deprecated(note = "use RdfSerializer instead", since = "0.4.0")]
pub struct GraphSerializer {
inner: RdfSerializer,
}
impl GraphSerializer {
/// Builds a serializer for the given format
#[inline]
pub fn from_format(format: GraphFormat) -> Self {
Self {
inner: RdfSerializer::from_format(format.into()),
}
}
/// Returns a [`TripleWriter`] allowing writing triples into the given [`Write`] implementation
pub fn triple_writer<W: Write>(self, write: W) -> TripleWriter<W> {
TripleWriter {
writer: self.inner.serialize_to_write(write),
}
}
}
/// Allows writing triples.
/// Could be built using a [`GraphSerializer`].
///
/// <div class="warning">
///
/// Do not forget to run the [`finish`](TripleWriter::finish()) method to properly write the last bytes of the file.</div>
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphSerializer};
/// use oxigraph::model::*;
///
/// let mut buffer = Vec::new();
/// let mut writer = GraphSerializer::from_format(GraphFormat::NTriples).triple_writer(&mut buffer);
/// writer.write(&Triple {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// })?;
/// writer.finish()?;
///
/// assert_eq!(
/// buffer.as_slice(),
/// "<http://example.com/s> <http://example.com/p> <http://example.com/o> .\n".as_bytes()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[must_use]
pub struct TripleWriter<W: Write> {
writer: ToWriteQuadWriter<W>,
}
impl<W: Write> TripleWriter<W> {
/// Writes a triple
pub fn write<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> io::Result<()> {
self.writer.write_triple(triple)
}
/// Writes the last bytes of the file
pub fn finish(self) -> io::Result<()> {
self.writer.finish()?.flush()
}
}
/// A serializer for RDF graph serialization formats.
///
/// It currently supports the following formats:
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`DatasetFormat::NQuads`])
/// * [TriG](https://www.w3.org/TR/trig/) ([`DatasetFormat::TriG`])
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetSerializer};
/// use oxigraph::model::*;
///
/// let mut buffer = Vec::new();
/// let mut writer = DatasetSerializer::from_format(DatasetFormat::NQuads).quad_writer(&mut buffer);
/// writer.write(&Quad {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// graph_name: NamedNode::new("http://example.com/g")?.into(),
/// })?;
/// writer.finish()?;
///
/// assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n".as_bytes());
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[deprecated(note = "use RdfSerializer instead", since = "0.4.0")]
pub struct DatasetSerializer {
inner: RdfSerializer,
}
impl DatasetSerializer {
/// Builds a serializer for the given format
#[inline]
pub fn from_format(format: DatasetFormat) -> Self {
Self {
inner: RdfSerializer::from_format(format.into()),
}
}
/// Returns a [`QuadWriter`] allowing writing triples into the given [`Write`] implementation
pub fn quad_writer<W: Write>(self, write: W) -> QuadWriter<W> {
QuadWriter {
writer: self.inner.serialize_to_write(write),
}
}
}
/// Allows writing triples.
/// Could be built using a [`DatasetSerializer`].
///
/// <div class="warning">
///
/// Do not forget to run the [`finish`](QuadWriter::finish()) method to properly write the last bytes of the file.</div>
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetSerializer};
/// use oxigraph::model::*;
///
/// let mut buffer = Vec::new();
/// let mut writer = DatasetSerializer::from_format(DatasetFormat::NQuads).quad_writer(&mut buffer);
/// writer.write(&Quad {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// graph_name: NamedNode::new("http://example.com/g")?.into(),
/// })?;
/// writer.finish()?;
///
/// assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n".as_bytes());
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[must_use]
pub struct QuadWriter<W: Write> {
writer: ToWriteQuadWriter<W>,
}
impl<W: Write> QuadWriter<W> {
/// Writes a quad
pub fn write<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> io::Result<()> {
self.writer.write_quad(quad)
}
/// Writes the last bytes of the file
pub fn finish(self) -> io::Result<()> {
self.writer.finish()?.flush()
}
}

@ -1,12 +0,0 @@
#![doc = include_str!("../README.md")]
#![doc(test(attr(deny(warnings))))]
#![doc(test(attr(allow(deprecated))))]
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
#![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")]
#![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")]
pub mod io;
pub mod model;
pub mod sparql;
mod storage;
pub mod store;

@ -1,22 +0,0 @@
//! Implements data structures for [RDF 1.1 Concepts](https://www.w3.org/TR/rdf11-concepts/) using [OxRDF](https://crates.io/crates/oxrdf).
//!
//! Usage example:
//!
//! ```
//! use oxigraph::model::*;
//!
//! let mut graph = Graph::default();
//!
//! // insertion
//! let ex = NamedNodeRef::new("http://example.com").unwrap();
//! let triple = TripleRef::new(ex, ex, ex);
//! graph.insert(triple);
//!
//! // simple filter
//! let results: Vec<_> = graph.triples_for_subject(ex).collect();
//! assert_eq!(vec![triple], results);
//! ```
pub use oxrdf::*;
pub use spargebra::term::GroundQuad;

@ -1,84 +0,0 @@
use crate::io::RdfParseError;
use crate::model::NamedNode;
use crate::sparql::results::QueryResultsParseError as ResultsParseError;
use crate::sparql::SparqlSyntaxError;
use crate::storage::StorageError;
use std::convert::Infallible;
use std::error::Error;
use std::io;
/// A SPARQL evaluation error.
#[derive(Debug, thiserror::Error)]
#[non_exhaustive]
pub enum EvaluationError {
/// An error in SPARQL parsing.
#[error(transparent)]
Parsing(#[from] SparqlSyntaxError),
/// An error from the storage.
#[error(transparent)]
Storage(#[from] StorageError),
/// An error while parsing an external RDF file.
#[error(transparent)]
GraphParsing(#[from] RdfParseError),
/// An error while parsing an external result file (likely from a federated query).
#[error(transparent)]
ResultsParsing(#[from] ResultsParseError),
/// An error returned during results serialization.
#[error(transparent)]
ResultsSerialization(#[from] io::Error),
/// Error during `SERVICE` evaluation
#[error("{0}")]
Service(#[source] Box<dyn Error + Send + Sync + 'static>),
/// Error when `CREATE` tries to create an already existing graph
#[error("The graph {0} already exists")]
GraphAlreadyExists(NamedNode),
/// Error when `DROP` or `CLEAR` tries to remove a not existing graph
#[error("The graph {0} does not exist")]
GraphDoesNotExist(NamedNode),
/// The variable storing the `SERVICE` name is unbound
#[error("The variable encoding the service name is unbound")]
UnboundService,
/// The given `SERVICE` is not supported
#[error("The service {0} is not supported")]
UnsupportedService(NamedNode),
/// The given content media type returned from an HTTP response is not supported (`SERVICE` and `LOAD`)
#[error("The content media type {0} is not supported")]
UnsupportedContentType(String),
/// The `SERVICE` call has not returns solutions
#[error("The service is not returning solutions but a boolean or a graph")]
ServiceDoesNotReturnSolutions,
/// The results are not a RDF graph
#[error("The query results are not a RDF graph")]
NotAGraph,
}
impl From<Infallible> for EvaluationError {
#[inline]
fn from(error: Infallible) -> Self {
match error {}
}
}
impl From<EvaluationError> for io::Error {
#[inline]
fn from(error: EvaluationError) -> Self {
match error {
EvaluationError::Parsing(error) => Self::new(io::ErrorKind::InvalidData, error),
EvaluationError::GraphParsing(error) => error.into(),
EvaluationError::ResultsParsing(error) => error.into(),
EvaluationError::ResultsSerialization(error) => error,
EvaluationError::Storage(error) => error.into(),
EvaluationError::Service(error) => match error.downcast() {
Ok(error) => *error,
Err(error) => Self::new(io::ErrorKind::Other, error),
},
EvaluationError::GraphAlreadyExists(_)
| EvaluationError::GraphDoesNotExist(_)
| EvaluationError::UnboundService
| EvaluationError::UnsupportedService(_)
| EvaluationError::UnsupportedContentType(_)
| EvaluationError::ServiceDoesNotReturnSolutions
| EvaluationError::NotAGraph => Self::new(io::ErrorKind::InvalidInput, error),
}
}
}

File diff suppressed because it is too large Load Diff

@ -1,9 +0,0 @@
#[cfg(not(feature = "http-client"))]
mod dummy;
#[cfg(feature = "http-client")]
mod simple;
#[cfg(not(feature = "http-client"))]
pub use dummy::Client;
#[cfg(feature = "http-client")]
pub use simple::Client;

@ -1,328 +0,0 @@
//! [SPARQL](https://www.w3.org/TR/sparql11-overview/) implementation.
//!
//! Stores execute SPARQL. See [`Store`](crate::store::Store::query()) for an example.
mod algebra;
mod dataset;
mod error;
mod eval;
mod http;
mod model;
pub mod results;
mod service;
mod update;
use crate::model::{NamedNode, Term};
pub use crate::sparql::algebra::{Query, QueryDataset, Update};
use crate::sparql::dataset::DatasetView;
pub use crate::sparql::error::EvaluationError;
use crate::sparql::eval::{EvalNodeWithStats, SimpleEvaluator, Timer};
pub use crate::sparql::model::{QueryResults, QuerySolution, QuerySolutionIter, QueryTripleIter};
pub use crate::sparql::service::ServiceHandler;
use crate::sparql::service::{EmptyServiceHandler, ErrorConversionServiceHandler};
pub(crate) use crate::sparql::update::evaluate_update;
use crate::storage::StorageReader;
use json_event_parser::{JsonEvent, ToWriteJsonWriter};
pub use oxrdf::{Variable, VariableNameParseError};
use oxsdatatypes::{DayTimeDuration, Float};
pub use spargebra::SparqlSyntaxError;
use sparopt::algebra::GraphPattern;
use sparopt::Optimizer;
use std::collections::HashMap;
use std::rc::Rc;
use std::sync::Arc;
use std::time::Duration;
use std::{fmt, io};
#[allow(clippy::needless_pass_by_value)]
pub(crate) fn evaluate_query(
reader: StorageReader,
query: impl TryInto<Query, Error = impl Into<EvaluationError>>,
options: QueryOptions,
run_stats: bool,
) -> Result<(Result<QueryResults, EvaluationError>, QueryExplanation), EvaluationError> {
let query = query.try_into().map_err(Into::into)?;
let dataset = DatasetView::new(reader, &query.dataset);
let start_planning = Timer::now();
let (results, plan_node_with_stats, planning_duration) = match query.inner {
spargebra::Query::Select {
pattern, base_iri, ..
} => {
let mut pattern = GraphPattern::from(&pattern);
if !options.without_optimizations {
pattern = Optimizer::optimize_graph_pattern(pattern);
}
let planning_duration = start_planning.elapsed();
let (results, explanation) = SimpleEvaluator::new(
Rc::new(dataset),
base_iri.map(Rc::new),
options.service_handler(),
Arc::new(options.custom_functions),
run_stats,
)
.evaluate_select(&pattern);
(Ok(results), explanation, planning_duration)
}
spargebra::Query::Ask {
pattern, base_iri, ..
} => {
let mut pattern = GraphPattern::from(&pattern);
if !options.without_optimizations {
pattern = Optimizer::optimize_graph_pattern(GraphPattern::Reduced {
inner: Box::new(pattern),
});
}
let planning_duration = start_planning.elapsed();
let (results, explanation) = SimpleEvaluator::new(
Rc::new(dataset),
base_iri.map(Rc::new),
options.service_handler(),
Arc::new(options.custom_functions),
run_stats,
)
.evaluate_ask(&pattern);
(results, explanation, planning_duration)
}
spargebra::Query::Construct {
template,
pattern,
base_iri,
..
} => {
let mut pattern = GraphPattern::from(&pattern);
if !options.without_optimizations {
pattern = Optimizer::optimize_graph_pattern(GraphPattern::Reduced {
inner: Box::new(pattern),
});
}
let planning_duration = start_planning.elapsed();
let (results, explanation) = SimpleEvaluator::new(
Rc::new(dataset),
base_iri.map(Rc::new),
options.service_handler(),
Arc::new(options.custom_functions),
run_stats,
)
.evaluate_construct(&pattern, &template);
(Ok(results), explanation, planning_duration)
}
spargebra::Query::Describe {
pattern, base_iri, ..
} => {
let mut pattern = GraphPattern::from(&pattern);
if !options.without_optimizations {
pattern = Optimizer::optimize_graph_pattern(GraphPattern::Reduced {
inner: Box::new(pattern),
});
}
let planning_duration = start_planning.elapsed();
let (results, explanation) = SimpleEvaluator::new(
Rc::new(dataset),
base_iri.map(Rc::new),
options.service_handler(),
Arc::new(options.custom_functions),
run_stats,
)
.evaluate_describe(&pattern);
(Ok(results), explanation, planning_duration)
}
};
let explanation = QueryExplanation {
inner: plan_node_with_stats,
with_stats: run_stats,
parsing_duration: query.parsing_duration,
planning_duration,
};
Ok((results, explanation))
}
/// Options for SPARQL query evaluation.
///
///
/// If the `"http-client"` optional feature is enabled,
/// a simple HTTP 1.1 client is used to execute [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/) SERVICE calls.
///
/// Usage example disabling the federated query support:
/// ```
/// use oxigraph::sparql::QueryOptions;
/// use oxigraph::store::Store;
///
/// let store = Store::new()?;
/// store.query_opt(
/// "SELECT * WHERE { SERVICE <https://query.wikidata.org/sparql> {} }",
/// QueryOptions::default().without_service_handler(),
/// )?;
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[derive(Clone, Default)]
pub struct QueryOptions {
service_handler: Option<Arc<dyn ServiceHandler<Error = EvaluationError>>>,
custom_functions: CustomFunctionRegistry,
http_timeout: Option<Duration>,
http_redirection_limit: usize,
without_optimizations: bool,
}
pub(crate) type CustomFunctionRegistry =
HashMap<NamedNode, Arc<dyn (Fn(&[Term]) -> Option<Term>) + Send + Sync>>;
impl QueryOptions {
/// Use a given [`ServiceHandler`] to execute [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/) SERVICE calls.
#[inline]
#[must_use]
pub fn with_service_handler(mut self, service_handler: impl ServiceHandler + 'static) -> Self {
self.service_handler = Some(Arc::new(ErrorConversionServiceHandler::wrap(
service_handler,
)));
self
}
/// Disables the `SERVICE` calls
#[inline]
#[must_use]
pub fn without_service_handler(mut self) -> Self {
self.service_handler = Some(Arc::new(EmptyServiceHandler));
self
}
/// Sets a timeout for HTTP requests done during SPARQL evaluation.
#[cfg(feature = "http-client")]
#[inline]
#[must_use]
pub fn with_http_timeout(mut self, timeout: Duration) -> Self {
self.http_timeout = Some(timeout);
self
}
/// Sets an upper bound of the number of HTTP redirection followed per HTTP request done during SPARQL evaluation.
///
/// By default this value is `0`.
#[cfg(feature = "http-client")]
#[inline]
#[must_use]
pub fn with_http_redirection_limit(mut self, redirection_limit: usize) -> Self {
self.http_redirection_limit = redirection_limit;
self
}
/// Adds a custom SPARQL evaluation function.
///
/// Example with a function serializing terms to N-Triples:
/// ```
/// use oxigraph::model::*;
/// use oxigraph::sparql::{QueryOptions, QueryResults};
/// use oxigraph::store::Store;
///
/// let store = Store::new()?;
///
/// if let QueryResults::Solutions(mut solutions) = store.query_opt(
/// "SELECT (<http://www.w3.org/ns/formats/N-Triples>(1) AS ?nt) WHERE {}",
/// QueryOptions::default().with_custom_function(
/// NamedNode::new("http://www.w3.org/ns/formats/N-Triples")?,
/// |args| args.get(0).map(|t| Literal::from(t.to_string()).into()),
/// ),
/// )? {
/// assert_eq!(
/// solutions.next().unwrap()?.get("nt"),
/// Some(&Literal::from("\"1\"^^<http://www.w3.org/2001/XMLSchema#integer>").into())
/// );
/// }
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
#[must_use]
pub fn with_custom_function(
mut self,
name: NamedNode,
evaluator: impl Fn(&[Term]) -> Option<Term> + Send + Sync + 'static,
) -> Self {
self.custom_functions.insert(name, Arc::new(evaluator));
self
}
fn service_handler(&self) -> Arc<dyn ServiceHandler<Error = EvaluationError>> {
self.service_handler.clone().unwrap_or_else(|| {
if cfg!(feature = "http-client") {
Arc::new(service::SimpleServiceHandler::new(
self.http_timeout,
self.http_redirection_limit,
))
} else {
Arc::new(EmptyServiceHandler)
}
})
}
#[doc(hidden)]
#[inline]
#[must_use]
pub fn without_optimizations(mut self) -> Self {
self.without_optimizations = true;
self
}
}
/// Options for SPARQL update evaluation.
#[derive(Clone, Default)]
pub struct UpdateOptions {
query_options: QueryOptions,
}
impl From<QueryOptions> for UpdateOptions {
#[inline]
fn from(query_options: QueryOptions) -> Self {
Self { query_options }
}
}
/// The explanation of a query.
#[derive(Clone)]
pub struct QueryExplanation {
inner: Rc<EvalNodeWithStats>,
with_stats: bool,
parsing_duration: Option<DayTimeDuration>,
planning_duration: Option<DayTimeDuration>,
}
impl QueryExplanation {
/// Writes the explanation as JSON.
pub fn write_in_json(&self, write: impl io::Write) -> io::Result<()> {
let mut writer = ToWriteJsonWriter::new(write);
writer.write_event(JsonEvent::StartObject)?;
if let Some(parsing_duration) = self.parsing_duration {
writer.write_event(JsonEvent::ObjectKey("parsing duration in seconds".into()))?;
writer.write_event(JsonEvent::Number(
parsing_duration.as_seconds().to_string().into(),
))?;
}
if let Some(planning_duration) = self.planning_duration {
writer.write_event(JsonEvent::ObjectKey("planning duration in seconds".into()))?;
writer.write_event(JsonEvent::Number(
planning_duration.as_seconds().to_string().into(),
))?;
}
writer.write_event(JsonEvent::ObjectKey("plan".into()))?;
self.inner.json_node(&mut writer, self.with_stats)?;
writer.write_event(JsonEvent::EndObject)
}
}
impl fmt::Debug for QueryExplanation {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut obj = f.debug_struct("QueryExplanation");
if let Some(parsing_duration) = self.parsing_duration {
obj.field(
"parsing duration in seconds",
&f32::from(Float::from(parsing_duration.as_seconds())),
);
}
if let Some(planning_duration) = self.planning_duration {
obj.field(
"planning duration in seconds",
&f32::from(Float::from(planning_duration.as_seconds())),
);
}
obj.field("tree", &self.inner);
obj.finish_non_exhaustive()
}
}

@ -1,371 +0,0 @@
use crate::io::{RdfFormat, RdfSerializer};
use crate::model::*;
use crate::sparql::error::EvaluationError;
use crate::sparql::results::{
FromReadQueryResultsReader, FromReadSolutionsReader, QueryResultsFormat,
QueryResultsParseError, QueryResultsParser, QueryResultsSerializer,
};
pub use sparesults::QuerySolution;
use std::io::{Read, Write};
use std::sync::Arc;
/// Results of a [SPARQL query](https://www.w3.org/TR/sparql11-query/).
pub enum QueryResults {
/// Results of a [SELECT](https://www.w3.org/TR/sparql11-query/#select) query.
Solutions(QuerySolutionIter),
/// Result of a [ASK](https://www.w3.org/TR/sparql11-query/#ask) query.
Boolean(bool),
/// Results of a [CONSTRUCT](https://www.w3.org/TR/sparql11-query/#construct) or [DESCRIBE](https://www.w3.org/TR/sparql11-query/#describe) query.
Graph(QueryTripleIter),
}
impl QueryResults {
/// Reads a SPARQL query results serialization.
pub fn read(
read: impl Read + 'static,
format: QueryResultsFormat,
) -> Result<Self, QueryResultsParseError> {
Ok(QueryResultsParser::from_format(format)
.parse_read(read)?
.into())
}
/// Writes the query results (solutions or boolean).
///
/// This method fails if it is called on the `Graph` results.
///
/// ```
/// use oxigraph::store::Store;
/// use oxigraph::model::*;
/// use oxigraph::sparql::results::QueryResultsFormat;
///
/// let store = Store::new()?;
/// let ex = NamedNodeRef::new("http://example.com")?;
/// store.insert(QuadRef::new(ex, ex, ex, GraphNameRef::DefaultGraph))?;
///
/// let results = store.query("SELECT ?s WHERE { ?s ?p ?o }")?;
/// assert_eq!(
/// results.write(Vec::new(), QueryResultsFormat::Json)?,
/// r#"{"head":{"vars":["s"]},"results":{"bindings":[{"s":{"type":"uri","value":"http://example.com"}}]}}"#.as_bytes()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn write<W: Write>(
self,
write: W,
format: QueryResultsFormat,
) -> Result<W, EvaluationError> {
let serializer = QueryResultsSerializer::from_format(format);
match self {
Self::Boolean(value) => serializer.serialize_boolean_to_write(write, value),
Self::Solutions(solutions) => {
let mut writer = serializer
.serialize_solutions_to_write(write, solutions.variables().to_vec())
.map_err(EvaluationError::ResultsSerialization)?;
for solution in solutions {
writer
.write(&solution?)
.map_err(EvaluationError::ResultsSerialization)?;
}
writer.finish()
}
Self::Graph(triples) => {
let s = VariableRef::new_unchecked("subject");
let p = VariableRef::new_unchecked("predicate");
let o = VariableRef::new_unchecked("object");
let mut writer = serializer
.serialize_solutions_to_write(
write,
vec![s.into_owned(), p.into_owned(), o.into_owned()],
)
.map_err(EvaluationError::ResultsSerialization)?;
for triple in triples {
let triple = triple?;
writer
.write([
(s, &triple.subject.into()),
(p, &triple.predicate.into()),
(o, &triple.object),
])
.map_err(EvaluationError::ResultsSerialization)?;
}
writer.finish()
}
}
.map_err(EvaluationError::ResultsSerialization)
}
/// Writes the graph query results.
///
/// This method fails if it is called on the `Solution` or `Boolean` results.
///
/// ```
/// use oxigraph::io::RdfFormat;
/// use oxigraph::model::*;
/// use oxigraph::store::Store;
///
/// let graph = "<http://example.com> <http://example.com> <http://example.com> .\n";
///
/// let store = Store::new()?;
/// store.load_graph(
/// graph.as_bytes(),
/// RdfFormat::NTriples,
/// GraphName::DefaultGraph,
/// None,
/// )?;
///
/// let results = store.query("CONSTRUCT WHERE { ?s ?p ?o }")?;
/// assert_eq!(
/// results.write_graph(Vec::new(), RdfFormat::NTriples)?,
/// graph.as_bytes()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn write_graph<W: Write>(
self,
write: W,
format: impl Into<RdfFormat>,
) -> Result<W, EvaluationError> {
if let Self::Graph(triples) = self {
let mut writer = RdfSerializer::from_format(format.into()).serialize_to_write(write);
for triple in triples {
writer
.write_triple(&triple?)
.map_err(EvaluationError::ResultsSerialization)?;
}
writer
.finish()
.map_err(EvaluationError::ResultsSerialization)
} else {
Err(EvaluationError::NotAGraph)
}
}
}
impl From<QuerySolutionIter> for QueryResults {
#[inline]
fn from(value: QuerySolutionIter) -> Self {
Self::Solutions(value)
}
}
impl<R: Read + 'static> From<FromReadQueryResultsReader<R>> for QueryResults {
fn from(reader: FromReadQueryResultsReader<R>) -> Self {
match reader {
FromReadQueryResultsReader::Solutions(s) => Self::Solutions(s.into()),
FromReadQueryResultsReader::Boolean(v) => Self::Boolean(v),
}
}
}
/// An iterator over [`QuerySolution`]s.
///
/// ```
/// use oxigraph::sparql::QueryResults;
/// use oxigraph::store::Store;
///
/// let store = Store::new()?;
/// if let QueryResults::Solutions(solutions) = store.query("SELECT ?s WHERE { ?s ?p ?o }")? {
/// for solution in solutions {
/// println!("{:?}", solution?.get("s"));
/// }
/// }
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct QuerySolutionIter {
variables: Arc<[Variable]>,
iter: Box<dyn Iterator<Item = Result<QuerySolution, EvaluationError>>>,
}
impl QuerySolutionIter {
/// Construct a new iterator of solution from an ordered list of solution variables and an iterator of solution tuples
/// (each tuple using the same ordering as the variable list such that tuple element 0 is the value for the variable 0...)
pub fn new(
variables: Arc<[Variable]>,
iter: impl Iterator<Item = Result<Vec<Option<Term>>, EvaluationError>> + 'static,
) -> Self {
Self {
variables: Arc::clone(&variables),
iter: Box::new(
iter.map(move |t| t.map(|values| (Arc::clone(&variables), values).into())),
),
}
}
/// The variables used in the solutions.
///
/// ```
/// use oxigraph::sparql::{QueryResults, Variable};
/// use oxigraph::store::Store;
///
/// let store = Store::new()?;
/// if let QueryResults::Solutions(solutions) = store.query("SELECT ?s ?o WHERE { ?s ?p ?o }")? {
/// assert_eq!(
/// solutions.variables(),
/// &[Variable::new("s")?, Variable::new("o")?]
/// );
/// }
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn variables(&self) -> &[Variable] {
&self.variables
}
}
impl<R: Read + 'static> From<FromReadSolutionsReader<R>> for QuerySolutionIter {
fn from(reader: FromReadSolutionsReader<R>) -> Self {
Self {
variables: reader.variables().into(),
iter: Box::new(reader.map(|t| t.map_err(EvaluationError::from))),
}
}
}
impl Iterator for QuerySolutionIter {
type Item = Result<QuerySolution, EvaluationError>;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.iter.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
/// An iterator over the triples that compose a graph solution.
///
/// ```
/// use oxigraph::sparql::QueryResults;
/// use oxigraph::store::Store;
///
/// let store = Store::new()?;
/// if let QueryResults::Graph(triples) = store.query("CONSTRUCT WHERE { ?s ?p ?o }")? {
/// for triple in triples {
/// println!("{}", triple?);
/// }
/// }
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct QueryTripleIter {
pub(crate) iter: Box<dyn Iterator<Item = Result<Triple, EvaluationError>>>,
}
impl Iterator for QueryTripleIter {
type Item = Result<Triple, EvaluationError>;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.iter.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
#[inline]
fn fold<Acc, G>(self, init: Acc, g: G) -> Acc
where
G: FnMut(Acc, Self::Item) -> Acc,
{
self.iter.fold(init, g)
}
}
#[cfg(test)]
#[allow(clippy::panic_in_result_fn)]
mod tests {
use super::*;
use std::io::Cursor;
#[test]
fn test_serialization_roundtrip() -> Result<(), EvaluationError> {
use std::str;
for format in [
QueryResultsFormat::Json,
QueryResultsFormat::Xml,
QueryResultsFormat::Tsv,
] {
let results = vec![
QueryResults::Boolean(true),
QueryResults::Boolean(false),
QueryResults::Solutions(QuerySolutionIter::new(
[
Variable::new_unchecked("foo"),
Variable::new_unchecked("bar"),
]
.as_ref()
.into(),
Box::new(
vec![
Ok(vec![None, None]),
Ok(vec![
Some(NamedNode::new_unchecked("http://example.com").into()),
None,
]),
Ok(vec![
None,
Some(NamedNode::new_unchecked("http://example.com").into()),
]),
Ok(vec![
Some(BlankNode::new_unchecked("foo").into()),
Some(BlankNode::new_unchecked("bar").into()),
]),
Ok(vec![Some(Literal::new_simple_literal("foo").into()), None]),
Ok(vec![
Some(
Literal::new_language_tagged_literal_unchecked("foo", "fr")
.into(),
),
None,
]),
Ok(vec![
Some(Literal::from(1).into()),
Some(Literal::from(true).into()),
]),
Ok(vec![
Some(Literal::from(1.33).into()),
Some(Literal::from(false).into()),
]),
Ok(vec![
Some(
Triple::new(
NamedNode::new_unchecked("http://example.com/s"),
NamedNode::new_unchecked("http://example.com/p"),
Triple::new(
NamedNode::new_unchecked("http://example.com/os"),
NamedNode::new_unchecked("http://example.com/op"),
NamedNode::new_unchecked("http://example.com/oo"),
),
)
.into(),
),
None,
]),
]
.into_iter(),
),
)),
];
for ex in results {
let mut buffer = Vec::new();
ex.write(&mut buffer, format)?;
let ex2 = QueryResults::read(Cursor::new(buffer.clone()), format)?;
let mut buffer2 = Vec::new();
ex2.write(&mut buffer2, format)?;
assert_eq!(
str::from_utf8(&buffer).unwrap(),
str::from_utf8(&buffer2).unwrap()
);
}
}
Ok(())
}
}

@ -1,44 +0,0 @@
//! Utilities to read and write RDF results formats using [sparesults](https://crates.io/crates/sparesults).
//!
//! It supports [SPARQL Query Results XML Format (Second Edition)](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/).
//!
//! Usage example converting a JSON result file into a TSV result file:
//!
//! ```
//! use oxigraph::sparql::results::{QueryResultsFormat, QueryResultsParser, FromReadQueryResultsReader, QueryResultsSerializer};
//! use std::io::Result;
//!
//! fn convert_json_to_tsv(json_file: &[u8]) -> Result<Vec<u8>> {
//! let json_parser = QueryResultsParser::from_format(QueryResultsFormat::Json);
//! let tsv_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Tsv);
//! // We start to read the JSON file and see which kind of results it is
//! match json_parser.parse_read(json_file)? {
//! FromReadQueryResultsReader::Boolean(value) => {
//! // it's a boolean result, we copy it in TSV to the output buffer
//! tsv_serializer.serialize_boolean_to_write(Vec::new(), value)
//! }
//! FromReadQueryResultsReader::Solutions(solutions_reader) => {
//! // it's a set of solutions, we create a writer and we write to it while reading in streaming from the JSON file
//! let mut serialize_solutions_to_write = tsv_serializer.serialize_solutions_to_write(Vec::new(), solutions_reader.variables().to_vec())?;
//! for solution in solutions_reader {
//! serialize_solutions_to_write.write(&solution?)?;
//! }
//! serialize_solutions_to_write.finish()
//! }
//! }
//! }
//!
//! // Let's test with a boolean
//! assert_eq!(
//! convert_json_to_tsv(br#"{"boolean":true}"#.as_slice()).unwrap(),
//! b"true"
//! );
//!
//! // And with a set of solutions
//! assert_eq!(
//! convert_json_to_tsv(br#"{"head":{"vars":["foo","bar"]},"results":{"bindings":[{"foo":{"type":"literal","value":"test"}}]}}"#.as_slice()).unwrap(),
//! b"?foo\t?bar\n\"test\"\t\n"
//! );
//! ```
pub use sparesults::*;

@ -1,12 +0,0 @@
//! A storage backend
//! RocksDB is available, if not in memory
#[cfg(any(target_family = "wasm"))]
pub use fallback::{ColumnFamily, ColumnFamilyDefinition, Db, Iter, Reader, Transaction};
#[cfg(all(not(target_family = "wasm")))]
pub use oxi_rocksdb::{ColumnFamily, ColumnFamilyDefinition, Db, Iter, Reader, Transaction};
#[cfg(any(target_family = "wasm"))]
mod fallback;
#[cfg(all(not(target_family = "wasm")))]
mod oxi_rocksdb;

File diff suppressed because it is too large Load Diff

@ -1,139 +0,0 @@
use crate::io::{RdfFormat, RdfParseError};
use crate::storage::numeric_encoder::EncodedTerm;
use oxiri::IriParseError;
use oxrdf::TermRef;
use std::error::Error;
use std::io;
/// An error related to storage operations (reads, writes...).
#[derive(Debug, thiserror::Error)]
#[non_exhaustive]
pub enum StorageError {
/// Error from the OS I/O layer.
#[error(transparent)]
Io(#[from] io::Error),
/// Error related to data corruption.
#[error(transparent)]
Corruption(#[from] CorruptionError),
#[doc(hidden)]
#[error("{0}")]
Other(#[source] Box<dyn Error + Send + Sync + 'static>),
}
impl From<StorageError> for io::Error {
#[inline]
fn from(error: StorageError) -> Self {
match error {
StorageError::Io(error) => error,
StorageError::Corruption(error) => error.into(),
StorageError::Other(error) => Self::new(io::ErrorKind::Other, error),
}
}
}
/// An error return if some content in the database is corrupted.
#[derive(Debug, thiserror::Error)]
#[error(transparent)]
pub struct CorruptionError(#[from] CorruptionErrorKind);
/// An error return if some content in the database is corrupted.
#[derive(Debug, thiserror::Error)]
enum CorruptionErrorKind {
#[error("{0}")]
Msg(String),
#[error("{0}")]
Other(#[source] Box<dyn Error + Send + Sync + 'static>),
}
impl CorruptionError {
/// Builds an error from a printable error message.
#[inline]
pub(crate) fn new(error: impl Into<Box<dyn Error + Send + Sync + 'static>>) -> Self {
Self(CorruptionErrorKind::Other(error.into()))
}
#[inline]
pub(crate) fn from_encoded_term(encoded: &EncodedTerm, term: &TermRef<'_>) -> Self {
// TODO: eventually use a dedicated error enum value
Self::msg(format!("Invalid term encoding {encoded:?} for {term}"))
}
#[inline]
pub(crate) fn from_missing_column_family_name(name: &'static str) -> Self {
// TODO: eventually use a dedicated error enum value
Self::msg(format!("Column family {name} does not exist"))
}
/// Builds an error from a printable error message.
#[inline]
pub(crate) fn msg(msg: impl Into<String>) -> Self {
Self(CorruptionErrorKind::Msg(msg.into()))
}
}
impl From<CorruptionError> for io::Error {
#[inline]
fn from(error: CorruptionError) -> Self {
Self::new(io::ErrorKind::InvalidData, error)
}
}
/// An error raised while loading a file into a [`Store`](crate::store::Store).
#[derive(Debug, thiserror::Error)]
pub enum LoaderError {
/// An error raised while reading the file.
#[error(transparent)]
Parsing(#[from] RdfParseError),
/// An error raised during the insertion in the store.
#[error(transparent)]
Storage(#[from] StorageError),
/// The base IRI is invalid.
#[error("Invalid base IRI '{iri}': {error}")]
InvalidBaseIri {
/// The IRI itself.
iri: String,
/// The parsing error.
#[source]
error: IriParseError,
},
}
impl From<LoaderError> for io::Error {
#[inline]
fn from(error: LoaderError) -> Self {
match error {
LoaderError::Storage(error) => error.into(),
LoaderError::Parsing(error) => error.into(),
LoaderError::InvalidBaseIri { .. } => {
Self::new(io::ErrorKind::InvalidInput, error.to_string())
}
}
}
}
/// An error raised while writing a file from a [`Store`](crate::store::Store).
#[derive(Debug, thiserror::Error)]
pub enum SerializerError {
/// An error raised while writing the content.
#[error(transparent)]
Io(#[from] io::Error),
/// An error raised during the lookup in the store.
#[error(transparent)]
Storage(#[from] StorageError),
/// A format compatible with [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) is required.
#[error("A RDF format supporting datasets was expected, {0} found")]
DatasetFormatExpected(RdfFormat),
}
impl From<SerializerError> for io::Error {
#[inline]
fn from(error: SerializerError) -> Self {
match error {
SerializerError::Storage(error) => error.into(),
SerializerError::Io(error) => error,
SerializerError::DatasetFormatExpected(_) => {
Self::new(io::ErrorKind::InvalidInput, error.to_string())
}
}
}
}

@ -1,33 +1,26 @@
[package] [package]
name = "oxrdf" name = "oxrdf"
version = "0.2.0-alpha.4" version = "0.1.0"
authors.workspace = true authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license.workspace = true license = "MIT OR Apache-2.0"
readme = "README.md" readme = "README.md"
keywords = ["RDF"] keywords = ["RDF"]
repository = "https://github.com/oxigraph/oxigraph/tree/main/lib/oxrdf" repository = "https://github.com/oxigraph/oxigraph/tree/main/lib/oxrdf"
homepage = "https://oxigraph.org/"
description = """ description = """
A library providing basic data structures related to RDF A library providing basic data structures related to RDF
""" """
documentation = "https://docs.rs/oxrdf" edition = "2021"
edition.workspace = true
rust-version.workspace = true
[features] [features]
default = [] default = []
rdf-star = [] rdf-star = []
[dependencies] [dependencies]
oxilangtag.workspace = true rand = "0.8"
oxiri.workspace = true oxilangtag = "0.1"
oxsdatatypes = { workspace = true, optional = true } oxiri = "0.2"
rand.workspace = true lasso = { version = "0.6", features = ["inline-more"] }
thiserror.workspace = true
serde.workspace = true
[lints]
workspace = true
[package.metadata.docs.rs] [package.metadata.docs.rs]
all-features = true all-features = true
rustdoc-args = ["--cfg", "docsrs"]

@ -5,18 +5,16 @@ OxRDF
[![Released API docs](https://docs.rs/oxrdf/badge.svg)](https://docs.rs/oxrdf) [![Released API docs](https://docs.rs/oxrdf/badge.svg)](https://docs.rs/oxrdf)
[![Crates.io downloads](https://img.shields.io/crates/d/oxrdf)](https://crates.io/crates/oxrdf) [![Crates.io downloads](https://img.shields.io/crates/d/oxrdf)](https://crates.io/crates/oxrdf)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) [![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community) [![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
OxRDF is a simple library providing datastructures encoding [RDF 1.1 concepts](https://www.w3.org/TR/rdf11-concepts/). OxRDF is a simple library providing datastructures encoding [RDF 1.1 concepts](https://www.w3.org/TR/rdf11-concepts/).
This crate is intended to be a basic building block of other crates like [Oxigraph](https://crates.io/crates/oxigraph) or [Spargebra](https://crates.io/crates/spargebra). This crate is intended to be a basic building block of other crates like [Oxigraph](https://crates.io/crates/oxigraph) or [Spargebra](https://crates.io/crates/spargebra).
Support for [RDF-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html) is available behind the `rdf-star` feature. Support for [RDF-star](https://w3c.github.io/rdf-star/cg-spec/) is available behind the `rdf-star` feature.
OxRDF is inspired by [RDF/JS](https://rdf.js.org/data-model-spec/) and [Apache Commons RDF](http://commons.apache.org/proper/commons-rdf/). OxRDF is inspired by [RDF/JS](https://rdf.js.org/data-model-spec/) and [Apache Commons RDF](http://commons.apache.org/proper/commons-rdf/).
Use [`oxrdfio`](https://crates.io/crates/oxrdfio) if you need to read or write RDF files.
Usage example: Usage example:
```rust ```rust

@ -1,7 +1,8 @@
use rand::random; use rand::random;
use serde::{Deserialize, Serialize}; use std::error::Error;
use std::fmt;
use std::io::Write; use std::io::Write;
use std::{fmt, str}; use std::str;
/// An owned RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node). /// An owned RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node).
/// ///
@ -14,13 +15,16 @@ use std::{fmt, str};
/// ``` /// ```
/// use oxrdf::BlankNode; /// use oxrdf::BlankNode;
/// ///
/// assert_eq!("_:a122", BlankNode::new("a122")?.to_string()); /// assert_eq!(
/// "_:a122",
/// BlankNode::new("a122")?.to_string()
/// );
/// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(()) /// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(())
/// ``` /// ```
#[derive(Eq, PartialEq, Debug, Clone, Hash, Serialize, Deserialize)] #[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct BlankNode(BlankNodeContent); pub struct BlankNode(BlankNodeContent);
#[derive(PartialEq, Eq, Debug, Clone, Hash, Serialize, Deserialize)] #[derive(PartialEq, Eq, Debug, Clone, Hash)]
enum BlankNodeContent { enum BlankNodeContent {
Named(String), Named(String),
Anonymous { id: u128, str: IdStr }, Anonymous { id: u128, str: IdStr },
@ -32,7 +36,7 @@ impl BlankNode {
/// The blank node identifier must be valid according to N-Triples, Turtle, and SPARQL grammars. /// The blank node identifier must be valid according to N-Triples, Turtle, and SPARQL grammars.
/// ///
/// In most cases, it is much more convenient to create a blank node using [`BlankNode::default()`] /// In most cases, it is much more convenient to create a blank node using [`BlankNode::default()`]
/// that creates a random ID that could be easily inlined by Oxigraph stores. ///that creates a random ID that could be easily inlined by Oxigraph stores.
pub fn new(id: impl Into<String>) -> Result<Self, BlankNodeIdParseError> { pub fn new(id: impl Into<String>) -> Result<Self, BlankNodeIdParseError> {
let id = id.into(); let id = id.into();
validate_blank_node_identifier(&id)?; validate_blank_node_identifier(&id)?;
@ -107,14 +111,7 @@ impl Default for BlankNode {
/// Builds a new RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a unique id. /// Builds a new RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a unique id.
#[inline] #[inline]
fn default() -> Self { fn default() -> Self {
// We ensure the ID does not start with a number to be also valid with RDF/XML Self::new_from_unique_id(random::<u128>())
loop {
let id = random();
let str = IdStr::new(id);
if matches!(str.as_str().as_bytes().first(), Some(b'a'..=b'f')) {
return Self(BlankNodeContent::Anonymous { id, str });
}
}
} }
} }
@ -129,7 +126,10 @@ impl Default for BlankNode {
/// ``` /// ```
/// use oxrdf::BlankNodeRef; /// use oxrdf::BlankNodeRef;
/// ///
/// assert_eq!("_:a122", BlankNodeRef::new("a122")?.to_string()); /// assert_eq!(
/// "_:a122",
/// BlankNodeRef::new("a122")?.to_string()
/// );
/// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(()) /// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(())
/// ``` /// ```
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] #[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
@ -173,7 +173,7 @@ impl<'a> BlankNodeRef<'a> {
/// Returns the underlying ID of this blank node. /// Returns the underlying ID of this blank node.
#[inline] #[inline]
pub const fn as_str(self) -> &'a str { pub fn as_str(self) -> &'a str {
match self.0 { match self.0 {
BlankNodeRefContent::Named(id) => id, BlankNodeRefContent::Named(id) => id,
BlankNodeRefContent::Anonymous { str, .. } => str, BlankNodeRefContent::Anonymous { str, .. } => str,
@ -185,15 +185,12 @@ impl<'a> BlankNodeRef<'a> {
/// ``` /// ```
/// use oxrdf::BlankNode; /// use oxrdf::BlankNode;
/// ///
/// assert_eq!( /// assert_eq!(BlankNode::new_from_unique_id(128).as_ref().unique_id(), Some(128));
/// BlankNode::new_from_unique_id(128).as_ref().unique_id(),
/// Some(128)
/// );
/// assert_eq!(BlankNode::new("foo")?.as_ref().unique_id(), None); /// assert_eq!(BlankNode::new("foo")?.as_ref().unique_id(), None);
/// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(()) /// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(())
/// ``` /// ```
#[inline] #[inline]
pub const fn unique_id(&self) -> Option<u128> { pub fn unique_id(&self) -> Option<u128> {
match self.0 { match self.0 {
BlankNodeRefContent::Named(_) => None, BlankNodeRefContent::Named(_) => None,
BlankNodeRefContent::Anonymous { id, .. } => Some(id), BlankNodeRefContent::Anonymous { id, .. } => Some(id),
@ -247,14 +244,14 @@ impl PartialEq<BlankNodeRef<'_>> for BlankNode {
} }
} }
#[derive(PartialEq, Eq, Debug, Clone, Hash, Serialize, Deserialize)] #[derive(PartialEq, Eq, Debug, Clone, Hash)]
struct IdStr([u8; 32]); struct IdStr([u8; 32]);
impl IdStr { impl IdStr {
#[inline] #[inline]
fn new(id: u128) -> Self { fn new(id: u128) -> Self {
let mut str = [0; 32]; let mut str = [0; 32];
write!(&mut str[..], "{id:x}").unwrap(); write!(&mut str[..], "{:x}", id).unwrap();
Self(str) Self(str)
} }
@ -267,7 +264,7 @@ impl IdStr {
fn validate_blank_node_identifier(id: &str) -> Result<(), BlankNodeIdParseError> { fn validate_blank_node_identifier(id: &str) -> Result<(), BlankNodeIdParseError> {
let mut chars = id.chars(); let mut chars = id.chars();
let front = chars.next().ok_or(BlankNodeIdParseError)?; let front = chars.next().ok_or(BlankNodeIdParseError {})?;
match front { match front {
'0'..='9' '0'..='9'
| '_' | '_'
@ -286,7 +283,7 @@ fn validate_blank_node_identifier(id: &str) -> Result<(), BlankNodeIdParseError>
| '\u{F900}'..='\u{FDCF}' | '\u{F900}'..='\u{FDCF}'
| '\u{FDF0}'..='\u{FFFD}' | '\u{FDF0}'..='\u{FFFD}'
| '\u{10000}'..='\u{EFFFF}' => (), | '\u{10000}'..='\u{EFFFF}' => (),
_ => return Err(BlankNodeIdParseError), _ => return Err(BlankNodeIdParseError {}),
} }
for c in chars { for c in chars {
match c { match c {
@ -312,13 +309,13 @@ fn validate_blank_node_identifier(id: &str) -> Result<(), BlankNodeIdParseError>
| '\u{F900}'..='\u{FDCF}' | '\u{F900}'..='\u{FDCF}'
| '\u{FDF0}'..='\u{FFFD}' | '\u{FDF0}'..='\u{FFFD}'
| '\u{10000}'..='\u{EFFFF}' => (), | '\u{10000}'..='\u{EFFFF}' => (),
_ => return Err(BlankNodeIdParseError), _ => return Err(BlankNodeIdParseError {}),
} }
} }
// Could not end with a dot // Could not end with a dot
if id.ends_with('.') { if id.ends_with('.') {
Err(BlankNodeIdParseError) Err(BlankNodeIdParseError {})
} else { } else {
Ok(()) Ok(())
} }
@ -345,12 +342,19 @@ fn to_integer_id(id: &str) -> Option<u128> {
} }
/// An error raised during [`BlankNode`] IDs validation. /// An error raised during [`BlankNode`] IDs validation.
#[derive(Debug, thiserror::Error)] #[derive(Debug)]
#[error("The blank node identifier is invalid")] pub struct BlankNodeIdParseError {}
pub struct BlankNodeIdParseError;
impl fmt::Display for BlankNodeIdParseError {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "The blank node identifier is invalid")
}
}
impl Error for BlankNodeIdParseError {}
#[cfg(test)] #[cfg(test)]
#[allow(clippy::panic_in_result_fn)]
mod tests { mod tests {
use super::*; use super::*;
@ -368,13 +372,13 @@ mod tests {
#[test] #[test]
fn new_validation() { fn new_validation() {
BlankNode::new("").unwrap_err(); assert!(BlankNode::new("").is_err());
BlankNode::new("a").unwrap(); assert!(BlankNode::new("a").is_ok());
BlankNode::new("-").unwrap_err(); assert!(BlankNode::new("-").is_err());
BlankNode::new("a-").unwrap(); assert!(BlankNode::new("a-").is_ok());
BlankNode::new(".").unwrap_err(); assert!(BlankNode::new(".").is_err());
BlankNode::new("a.").unwrap_err(); assert!(BlankNode::new("a.").is_err());
BlankNode::new("a.a").unwrap(); assert!(BlankNode::new("a.a").is_ok());
} }
#[test] #[test]

@ -1,4 +1,4 @@
//! [In-memory implementation](Dataset) of [RDF datasets](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset). //! [In-memory implementation](super::Dataset) of [RDF datasets](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset).
//! //!
//! Usage example: //! Usage example:
//! ``` //! ```
@ -18,30 +18,25 @@
//! // direct access to a dataset graph //! // direct access to a dataset graph
//! let results: Vec<_> = dataset.graph(ex).iter().collect(); //! let results: Vec<_> = dataset.graph(ex).iter().collect();
//! assert_eq!(vec![TripleRef::new(ex, ex, ex)], results); //! assert_eq!(vec![TripleRef::new(ex, ex, ex)], results);
//!
//! // Print
//! assert_eq!(
//! dataset.to_string(),
//! "<http://example.com> <http://example.com> <http://example.com> <http://example.com> .\n"
//! );
//! # Result::<_,Box<dyn std::error::Error>>::Ok(()) //! # Result::<_,Box<dyn std::error::Error>>::Ok(())
//! ``` //! ```
//! //!
//! See also [`Graph`] if you only care about plain triples. //! See also [`Graph`](super::Graph) if you only care about plain triples.
use crate::interning::*; use crate::interning::*;
use crate::SubjectRef;
use crate::*; use crate::*;
use std::collections::hash_map::DefaultHasher; use std::collections::hash_map::DefaultHasher;
use std::collections::{BTreeSet, HashMap, HashSet}; use std::collections::BTreeSet;
use std::collections::{HashMap, HashSet};
use std::fmt; use std::fmt;
use std::hash::{Hash, Hasher}; use std::hash::{Hash, Hasher};
/// An in-memory [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset). /// An in-memory [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset).
/// ///
/// It can accommodate a fairly large number of quads (in the few millions). /// It can accommodate a fairly large number of quads (in the few millions).
/// /// Beware: it interns the string and does not do any garbage collection yet:
/// <div class="warning">It interns the strings and does not do any garbage collection yet: /// if you insert and remove a lot of different terms, memory will grow without any reduction.
/// if you insert and remove a lot of different terms, memory will grow without any reduction.</div>
/// ///
/// Usage example: /// Usage example:
/// ``` /// ```
@ -63,7 +58,7 @@ use std::hash::{Hash, Hasher};
/// assert_eq!(vec![TripleRef::new(ex, ex, ex)], results); /// assert_eq!(vec![TripleRef::new(ex, ex, ex)], results);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) /// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ``` /// ```
#[derive(Debug, Default, Clone)] #[derive(Debug, Default)]
pub struct Dataset { pub struct Dataset {
interner: Interner, interner: Interner,
gspo: BTreeSet<( gspo: BTreeSet<(
@ -110,7 +105,7 @@ impl Dataset {
Self::default() Self::default()
} }
/// Provides a read-only view on an [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) contained in this dataset. /// Provides a read-only view on an [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-graph) contained in this dataset.
/// ///
/// ``` /// ```
/// use oxrdf::*; /// use oxrdf::*;
@ -133,7 +128,7 @@ impl Dataset {
} }
} }
/// Provides a read/write view on an [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) contained in this dataset. /// Provides a read/write view on an [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-graph) contained in this dataset.
/// ///
/// ``` /// ```
/// use oxrdf::*; /// use oxrdf::*;
@ -185,7 +180,6 @@ impl Dataset {
.map(move |q| self.decode_spog(q)) .map(move |q| self.decode_spog(q))
} }
#[allow(clippy::map_identity)]
fn interned_quads_for_subject( fn interned_quads_for_subject(
&self, &self,
subject: &InternedSubject, subject: &InternedSubject,
@ -296,18 +290,6 @@ impl Dataset {
.map(|(o, s, p, g)| (s, p, o, g)) .map(|(o, s, p, g)| (s, p, o, g))
} }
pub fn quads_for_graph_name<'a, 'b>(
&'a self,
graph_name: impl Into<GraphNameRef<'b>>,
) -> impl Iterator<Item = QuadRef<'a>> + 'a {
let graph_name = self
.encoded_graph_name(graph_name)
.unwrap_or_else(InternedGraphName::impossible);
self.interned_quads_for_graph_name(&graph_name)
.map(move |q| self.decode_spog(q))
}
fn interned_quads_for_graph_name( fn interned_quads_for_graph_name(
&self, &self,
graph_name: &InternedGraphName, graph_name: &InternedGraphName,
@ -504,11 +486,11 @@ impl Dataset {
} }
} }
/// Canonicalizes the dataset by renaming blank nodes. /// Applies on the dataset the canonicalization process described in
/// [Canonical Forms for Isomorphic and Equivalent RDF Graphs: Algorithms for Leaning and Labelling Blank Nodes, Aidan Hogan, 2017](http://aidanhogan.com/docs/rdf-canonicalisation.pdf).
/// ///
/// Usage example ([Dataset isomorphism](https://www.w3.org/TR/rdf11-concepts/#dfn-dataset-isomorphism)): /// Usage example ([Dataset isomorphism](https://www.w3.org/TR/rdf11-concepts/#dfn-dataset-isomorphism)):
/// ``` /// ```
/// use oxrdf::dataset::CanonicalizationAlgorithm;
/// use oxrdf::*; /// use oxrdf::*;
/// ///
/// let iri = NamedNodeRef::new("http://example.com")?; /// let iri = NamedNodeRef::new("http://example.com")?;
@ -526,59 +508,29 @@ impl Dataset {
/// graph2.insert(QuadRef::new(&bnode2, iri, iri, &g2)); /// graph2.insert(QuadRef::new(&bnode2, iri, iri, &g2));
/// ///
/// assert_ne!(graph1, graph2); /// assert_ne!(graph1, graph2);
/// graph1.canonicalize(CanonicalizationAlgorithm::Unstable); /// graph1.canonicalize();
/// graph2.canonicalize(CanonicalizationAlgorithm::Unstable); /// graph2.canonicalize();
/// assert_eq!(graph1, graph2); /// assert_eq!(graph1, graph2);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) /// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ``` /// ```
/// ///
/// <div class="warning">Blank node ids depends on the current shape of the graph. Adding a new quad might change the ids of a lot of blank nodes. /// Warning 1: Blank node ids depends on the current shape of the graph. Adding a new quad might change the ids of a lot of blank nodes.
/// Hence, this canonization might not be suitable for diffs.</div> /// Hence, this canonization might not be suitable for diffs.
///
/// Warning 2: The canonicalization algorithm is not stable and canonical blank node ids might change between Oxigraph version.
/// ///
/// <div class="warning">This implementation worst-case complexity is in *O(b!)* with *b* the number of blank nodes in the input dataset.</div> /// Warning 3: This implementation worst-case complexity is in *O(b!)* with *b* the number of blank nodes in the input dataset.
pub fn canonicalize(&mut self, algorithm: CanonicalizationAlgorithm) { pub fn canonicalize(&mut self) {
let bnode_mapping = self.canonicalize_interned_blank_nodes(algorithm); let bnodes = self.blank_nodes();
let new_quads = self.map_blank_nodes(&bnode_mapping); let (hash, partition) =
self.hash_bnodes(bnodes.into_iter().map(|bnode| (bnode, 0)).collect());
let new_quads = self.distinguish(&hash, &partition);
self.clear(); self.clear();
for quad in new_quads { for quad in new_quads {
self.insert_encoded(quad); self.insert_encoded(quad);
} }
} }
/// Returns a map between the current dataset blank node and the canonicalized blank node
/// to create a canonical dataset.
///
/// See also [`canonicalize`](Self::canonicalize).
pub fn canonicalize_blank_nodes(
&self,
algorithm: CanonicalizationAlgorithm,
) -> HashMap<BlankNodeRef<'_>, BlankNode> {
self.canonicalize_interned_blank_nodes(algorithm)
.into_iter()
.map(|(from, to)| (from.decode_from(&self.interner), to))
.collect()
}
fn canonicalize_interned_blank_nodes(
&self,
algorithm: CanonicalizationAlgorithm,
) -> HashMap<InternedBlankNode, BlankNode> {
match algorithm {
CanonicalizationAlgorithm::Unstable => {
let bnodes = self.blank_nodes();
let quads_per_blank_node = self.quads_per_blank_nodes();
let (hash, partition) = self.hash_bnodes(
bnodes.into_iter().map(|bnode| (bnode, 0)).collect(),
&quads_per_blank_node,
);
self.distinguish(hash, &partition, &quads_per_blank_node)
.into_iter()
.map(|(from, to)| (from, BlankNode::new_from_unique_id(to.into())))
.collect()
}
}
}
fn blank_nodes(&self) -> HashSet<InternedBlankNode> { fn blank_nodes(&self) -> HashSet<InternedBlankNode> {
let mut bnodes = HashSet::new(); let mut bnodes = HashSet::new();
for (g, s, _, o) in &self.gspo { for (g, s, _, o) in &self.gspo {
@ -617,172 +569,107 @@ impl Dataset {
} }
} }
fn quads_per_blank_nodes(&self) -> QuadsPerBlankNode {
let mut map: HashMap<_, Vec<_>> = HashMap::new();
for quad in &self.spog {
if let InternedSubject::BlankNode(bnode) = &quad.0 {
map.entry(*bnode).or_default().push(quad.clone());
}
#[cfg(feature = "rdf-star")]
if let InternedSubject::Triple(t) = &quad.0 {
Self::add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(quad, t, &mut map);
}
if let InternedTerm::BlankNode(bnode) = &quad.2 {
map.entry(*bnode).or_default().push(quad.clone());
}
#[cfg(feature = "rdf-star")]
if let InternedTerm::Triple(t) = &quad.2 {
Self::add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(quad, t, &mut map);
}
if let InternedGraphName::BlankNode(bnode) = &quad.3 {
map.entry(*bnode).or_default().push(quad.clone());
}
}
map
}
#[cfg(feature = "rdf-star")]
fn add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(
quad: &(
InternedSubject,
InternedNamedNode,
InternedTerm,
InternedGraphName,
),
triple: &InternedTriple,
map: &mut QuadsPerBlankNode,
) {
if let InternedSubject::BlankNode(bnode) = &triple.subject {
map.entry(*bnode).or_default().push(quad.clone());
}
if let InternedSubject::Triple(t) = &triple.subject {
Self::add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(quad, t, map);
}
if let InternedTerm::BlankNode(bnode) = &triple.object {
map.entry(*bnode).or_default().push(quad.clone());
}
if let InternedTerm::Triple(t) = &triple.object {
Self::add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(quad, t, map);
}
}
fn hash_bnodes( fn hash_bnodes(
&self, &self,
mut hashes: HashMap<InternedBlankNode, u64>, mut hashes: HashMap<InternedBlankNode, u64>,
quads_per_blank_node: &QuadsPerBlankNode,
) -> ( ) -> (
HashMap<InternedBlankNode, u64>, HashMap<InternedBlankNode, u64>,
Vec<(u64, Vec<InternedBlankNode>)>, Vec<(u64, Vec<InternedBlankNode>)>,
) { ) {
let mut to_hash = Vec::new(); let mut to_hash = Vec::new();
let mut to_do = hashes let mut partition: HashMap<u64, Vec<InternedBlankNode>> = HashMap::new();
.keys() let mut partition_len = 0;
.map(|bnode| (*bnode, true)) loop {
.collect::<HashMap<_, _>>(); //TODO: improve termination
let mut partition = HashMap::<_, Vec<_>>::with_capacity(hashes.len()); let mut new_hashes = HashMap::new();
let mut old_partition_count = usize::MAX; for (bnode, old_hash) in &hashes {
while old_partition_count != partition.len() { for (_, p, o, g) in
old_partition_count = partition.len(); self.interned_quads_for_subject(&InternedSubject::BlankNode(*bnode))
partition.clear(); {
let mut new_hashes = hashes.clone(); to_hash.push((
for bnode in hashes.keys() { self.hash_named_node(*p),
let hash = if to_do.contains_key(bnode) { self.hash_term(o, &hashes),
for (s, p, o, g) in &quads_per_blank_node[bnode] { self.hash_graph_name(g, &hashes),
to_hash.push(( 0,
self.hash_subject(s, *bnode, &hashes), ));
self.hash_named_node(*p), }
self.hash_term(o, *bnode, &hashes), for (s, p, _, g) in self.interned_quads_for_object(&InternedTerm::BlankNode(*bnode))
self.hash_graph_name(g, *bnode, &hashes), {
)); to_hash.push((
} self.hash_subject(s, &hashes),
to_hash.sort_unstable(); self.hash_named_node(*p),
let hash = Self::hash_tuple((&to_hash, hashes[bnode])); self.hash_graph_name(g, &hashes),
to_hash.clear(); 1,
if hash == hashes[bnode] { ));
to_do.insert(*bnode, false); }
} else { for (s, p, o, _) in
new_hashes.insert(*bnode, hash); self.interned_quads_for_graph_name(&InternedGraphName::BlankNode(*bnode))
} {
hash to_hash.push((
} else { self.hash_subject(s, &hashes),
hashes[bnode] self.hash_named_node(*p),
}; self.hash_term(o, &hashes),
2,
));
}
to_hash.sort_unstable();
let hash = Self::hash_tuple((old_hash, &to_hash));
to_hash.clear();
new_hashes.insert(*bnode, hash);
partition.entry(hash).or_default().push(*bnode); partition.entry(hash).or_default().push(*bnode);
} }
if partition.len() == partition_len {
let mut partition: Vec<_> = partition.into_iter().collect();
partition.sort_by(|(h1, b1), (h2, b2)| (b1.len(), h1).cmp(&(b2.len(), h2)));
return (hashes, partition);
}
hashes = new_hashes; hashes = new_hashes;
partition_len = partition.len();
partition.clear();
} }
let mut partition: Vec<_> = partition.into_iter().collect();
partition.sort_unstable_by(|(h1, b1), (h2, b2)| (b1.len(), h1).cmp(&(b2.len(), h2)));
(hashes, partition)
} }
fn hash_named_node(&self, node: InternedNamedNode) -> u64 { fn hash_named_node(&self, node: InternedNamedNode) -> u64 {
Self::hash_tuple(node.decode_from(&self.interner)) Self::hash_tuple(node.decode_from(&self.interner))
} }
fn hash_blank_node(
node: InternedBlankNode,
current_blank_node: InternedBlankNode,
bnodes_hash: &HashMap<InternedBlankNode, u64>,
) -> u64 {
if node == current_blank_node {
u64::MAX
} else {
bnodes_hash[&node]
}
}
fn hash_subject( fn hash_subject(
&self, &self,
node: &InternedSubject, node: &InternedSubject,
current_blank_node: InternedBlankNode,
bnodes_hash: &HashMap<InternedBlankNode, u64>, bnodes_hash: &HashMap<InternedBlankNode, u64>,
) -> u64 { ) -> u64 {
match node { #[cfg(feature = "rdf-star")]
InternedSubject::NamedNode(node) => Self::hash_tuple(node.decode_from(&self.interner)), if let InternedSubject::Triple(triple) = node {
InternedSubject::BlankNode(bnode) => { return self.hash_triple(triple, bnodes_hash);
Self::hash_blank_node(*bnode, current_blank_node, bnodes_hash) }
} if let InternedSubject::BlankNode(bnode) = node {
#[cfg(feature = "rdf-star")] bnodes_hash[bnode]
InternedSubject::Triple(triple) => { } else {
self.hash_triple(triple, current_blank_node, bnodes_hash) Self::hash_tuple(node.decode_from(&self.interner))
}
} }
} }
fn hash_term( fn hash_term(&self, term: &InternedTerm, bnodes_hash: &HashMap<InternedBlankNode, u64>) -> u64 {
&self, #[cfg(feature = "rdf-star")]
term: &InternedTerm, if let InternedTerm::Triple(triple) = term {
current_blank_node: InternedBlankNode, return self.hash_triple(triple, bnodes_hash);
bnodes_hash: &HashMap<InternedBlankNode, u64>, }
) -> u64 { if let InternedTerm::BlankNode(bnode) = term {
match term { bnodes_hash[bnode]
InternedTerm::NamedNode(node) => Self::hash_tuple(node.decode_from(&self.interner)), } else {
InternedTerm::BlankNode(bnode) => { Self::hash_tuple(term.decode_from(&self.interner))
Self::hash_blank_node(*bnode, current_blank_node, bnodes_hash)
}
InternedTerm::Literal(literal) => Self::hash_tuple(literal.decode_from(&self.interner)),
#[cfg(feature = "rdf-star")]
InternedTerm::Triple(triple) => {
self.hash_triple(triple, current_blank_node, bnodes_hash)
}
} }
} }
fn hash_graph_name( fn hash_graph_name(
&self, &self,
graph_name: &InternedGraphName, graph_name: &InternedGraphName,
current_blank_node: InternedBlankNode,
bnodes_hash: &HashMap<InternedBlankNode, u64>, bnodes_hash: &HashMap<InternedBlankNode, u64>,
) -> u64 { ) -> u64 {
match graph_name { if let InternedGraphName::BlankNode(bnode) = graph_name {
InternedGraphName::NamedNode(node) => { bnodes_hash[bnode]
Self::hash_tuple(node.decode_from(&self.interner)) } else {
} Self::hash_tuple(graph_name.decode_from(&self.interner))
InternedGraphName::BlankNode(bnode) => {
Self::hash_blank_node(*bnode, current_blank_node, bnodes_hash)
}
InternedGraphName::DefaultGraph => 0,
} }
} }
@ -790,13 +677,12 @@ impl Dataset {
fn hash_triple( fn hash_triple(
&self, &self,
triple: &InternedTriple, triple: &InternedTriple,
current_blank_node: InternedBlankNode,
bnodes_hash: &HashMap<InternedBlankNode, u64>, bnodes_hash: &HashMap<InternedBlankNode, u64>,
) -> u64 { ) -> u64 {
Self::hash_tuple(( Self::hash_tuple((
self.hash_subject(&triple.subject, current_blank_node, bnodes_hash), self.hash_subject(&triple.subject, bnodes_hash),
self.hash_named_node(triple.predicate), self.hash_named_node(triple.predicate),
self.hash_term(&triple.object, current_blank_node, bnodes_hash), self.hash_term(&triple.object, bnodes_hash),
)) ))
} }
@ -807,43 +693,48 @@ impl Dataset {
} }
fn distinguish( fn distinguish(
&self, &mut self,
hash: HashMap<InternedBlankNode, u64>, hash: &HashMap<InternedBlankNode, u64>,
partition: &[(u64, Vec<InternedBlankNode>)], partition: &[(u64, Vec<InternedBlankNode>)],
quads_per_blank_node: &QuadsPerBlankNode, ) -> Vec<(
) -> HashMap<InternedBlankNode, u64> { InternedSubject,
let b_prime = partition.iter().map(|(_, b)| b).find(|b| b.len() > 1); InternedNamedNode,
InternedTerm,
InternedGraphName,
)> {
let b_prime = partition
.iter()
.find_map(|(_, b)| if b.len() > 1 { Some(b) } else { None });
if let Some(b_prime) = b_prime { if let Some(b_prime) = b_prime {
b_prime b_prime
.iter() .iter()
.map(|b| { .map(|b| {
let mut hash_prime = hash.clone(); let mut hash_prime = hash.clone();
hash_prime.insert(*b, Self::hash_tuple((hash_prime[b], 22))); hash_prime.insert(*b, Self::hash_tuple((hash_prime[b], 22)));
let (hash_prime_prime, partition_prime) = let (hash_prime_prime, partition_prime) = self.hash_bnodes(hash_prime);
self.hash_bnodes(hash_prime, quads_per_blank_node); self.distinguish(&hash_prime_prime, &partition_prime)
self.distinguish(hash_prime_prime, &partition_prime, quads_per_blank_node)
}) })
.reduce(|a, b| { .fold(None, |a, b| {
let mut a_hashes = a.values().collect::<Vec<_>>(); Some(if let Some(a) = a {
a_hashes.sort(); if a <= b {
let mut b_hashes = a.values().collect::<Vec<_>>(); a
b_hashes.sort(); } else {
if a_hashes <= b_hashes { b
a }
} else { } else {
b b
} })
}) })
.unwrap_or_default() .unwrap_or_default()
} else { } else {
hash self.label(hash)
} }
} }
#[allow(clippy::needless_collect)] #[allow(clippy::needless_collect)]
fn map_blank_nodes( fn label(
&mut self, &mut self,
bnode_mapping: &HashMap<InternedBlankNode, BlankNode>, hashes: &HashMap<InternedBlankNode, u64>,
) -> Vec<( ) -> Vec<(
InternedSubject, InternedSubject,
InternedNamedNode, InternedNamedNode,
@ -851,81 +742,103 @@ impl Dataset {
InternedGraphName, InternedGraphName,
)> { )> {
let old_quads: Vec<_> = self.spog.iter().cloned().collect(); let old_quads: Vec<_> = self.spog.iter().cloned().collect();
old_quads let mut quads: Vec<_> = old_quads
.into_iter() .into_iter()
.map(|(s, p, o, g)| { .map(|(s, p, o, g)| {
( (
match s { if let InternedSubject::BlankNode(bnode) = s {
InternedSubject::NamedNode(_) => s, InternedSubject::BlankNode(self.map_bnode(bnode, hashes))
InternedSubject::BlankNode(bnode) => { } else {
InternedSubject::BlankNode(InternedBlankNode::encoded_into(
bnode_mapping[&bnode].as_ref(),
&mut self.interner,
))
}
#[cfg(feature = "rdf-star")] #[cfg(feature = "rdf-star")]
InternedSubject::Triple(triple) => { {
InternedSubject::Triple(Box::new(InternedTriple::encoded_into( if let InternedSubject::Triple(triple) = s {
self.map_triple_blank_nodes(&triple, bnode_mapping).as_ref(), InternedSubject::Triple(Box::new(InternedTriple::encoded_into(
&mut self.interner, self.label_triple(&triple, hashes).as_ref(),
))) &mut self.interner,
)))
} else {
s
}
}
#[cfg(not(feature = "rdf-star"))]
{
s
} }
}, },
p, p,
match o { if let InternedTerm::BlankNode(bnode) = o {
InternedTerm::NamedNode(_) | InternedTerm::Literal(_) => o, InternedTerm::BlankNode(self.map_bnode(bnode, hashes))
InternedTerm::BlankNode(bnode) => { } else {
InternedTerm::BlankNode(InternedBlankNode::encoded_into(
bnode_mapping[&bnode].as_ref(),
&mut self.interner,
))
}
#[cfg(feature = "rdf-star")] #[cfg(feature = "rdf-star")]
InternedTerm::Triple(triple) => { {
InternedTerm::Triple(Box::new(InternedTriple::encoded_into( if let InternedTerm::Triple(triple) = o {
self.map_triple_blank_nodes(&triple, bnode_mapping).as_ref(), InternedTerm::Triple(Box::new(InternedTriple::encoded_into(
&mut self.interner, self.label_triple(&triple, hashes).as_ref(),
))) &mut self.interner,
)))
} else {
o
}
} }
}, #[cfg(not(feature = "rdf-star"))]
match g { {
InternedGraphName::NamedNode(_) | InternedGraphName::DefaultGraph => g, o
InternedGraphName::BlankNode(bnode) => {
InternedGraphName::BlankNode(InternedBlankNode::encoded_into(
bnode_mapping[&bnode].as_ref(),
&mut self.interner,
))
} }
}, },
if let InternedGraphName::BlankNode(bnode) = g {
InternedGraphName::BlankNode(self.map_bnode(bnode, hashes))
} else {
g
},
) )
}) })
.collect() .collect();
quads.sort();
quads
} }
#[cfg(feature = "rdf-star")] #[cfg(feature = "rdf-star")]
fn map_triple_blank_nodes( fn label_triple(
&mut self, &mut self,
triple: &InternedTriple, triple: &InternedTriple,
bnode_mapping: &HashMap<InternedBlankNode, BlankNode>, hashes: &HashMap<InternedBlankNode, u64>,
) -> Triple { ) -> Triple {
Triple { Triple {
subject: if let InternedSubject::BlankNode(bnode) = &triple.subject { subject: if let InternedSubject::BlankNode(bnode) = &triple.subject {
bnode_mapping[bnode].clone().into() Self::gen_bnode(*bnode, hashes).into()
} else if let InternedSubject::Triple(t) = &triple.subject { } else if let InternedSubject::Triple(t) = &triple.subject {
self.map_triple_blank_nodes(t, bnode_mapping).into() self.label_triple(t, hashes).into()
} else { } else {
triple.subject.decode_from(&self.interner).into_owned() triple.subject.decode_from(&self.interner).into_owned()
}, },
predicate: triple.predicate.decode_from(&self.interner).into_owned(), predicate: triple.predicate.decode_from(&self.interner).into_owned(),
object: if let InternedTerm::BlankNode(bnode) = &triple.object { object: if let InternedTerm::BlankNode(bnode) = &triple.object {
bnode_mapping[bnode].clone().into() Self::gen_bnode(*bnode, hashes).into()
} else if let InternedTerm::Triple(t) = &triple.object { } else if let InternedTerm::Triple(t) = &triple.object {
self.map_triple_blank_nodes(t, bnode_mapping).into() self.label_triple(t, hashes).into()
} else { } else {
triple.object.decode_from(&self.interner).into_owned() triple.object.decode_from(&self.interner).into_owned()
}, },
} }
} }
fn map_bnode(
&mut self,
old_bnode: InternedBlankNode,
hashes: &HashMap<InternedBlankNode, u64>,
) -> InternedBlankNode {
InternedBlankNode::encoded_into(
Self::gen_bnode(old_bnode, hashes).as_ref(),
&mut self.interner,
)
}
fn gen_bnode(
old_bnode: InternedBlankNode,
hashes: &HashMap<InternedBlankNode, u64>,
) -> BlankNode {
BlankNode::new_from_unique_id(hashes[&old_bnode].into())
}
} }
impl PartialEq for Dataset { impl PartialEq for Dataset {
@ -948,7 +861,7 @@ impl<'a> IntoIterator for &'a Dataset {
type Item = QuadRef<'a>; type Item = QuadRef<'a>;
type IntoIter = Iter<'a>; type IntoIter = Iter<'a>;
fn into_iter(self) -> Self::IntoIter { fn into_iter(self) -> Iter<'a> {
self.iter() self.iter()
} }
} }
@ -988,13 +901,13 @@ impl<'a, T: Into<QuadRef<'a>>> Extend<T> for Dataset {
impl fmt::Display for Dataset { impl fmt::Display for Dataset {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for t in self { for t in self {
writeln!(f, "{t} .")?; writeln!(f, "{}", t)?;
} }
Ok(()) Ok(())
} }
} }
/// A read-only view on an [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) contained in a [`Dataset`]. /// A read-only view on an [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-graph) contained in a [`Dataset`].
/// ///
/// It is built using the [`Dataset::graph`] method. /// It is built using the [`Dataset::graph`] method.
/// ///
@ -1306,7 +1219,7 @@ impl<'a> IntoIterator for GraphView<'a> {
type Item = TripleRef<'a>; type Item = TripleRef<'a>;
type IntoIter = GraphViewIter<'a>; type IntoIter = GraphViewIter<'a>;
fn into_iter(self) -> Self::IntoIter { fn into_iter(self) -> GraphViewIter<'a> {
self.iter() self.iter()
} }
} }
@ -1315,7 +1228,7 @@ impl<'a, 'b> IntoIterator for &'b GraphView<'a> {
type Item = TripleRef<'a>; type Item = TripleRef<'a>;
type IntoIter = GraphViewIter<'a>; type IntoIter = GraphViewIter<'a>;
fn into_iter(self) -> Self::IntoIter { fn into_iter(self) -> GraphViewIter<'a> {
self.iter() self.iter()
} }
} }
@ -1323,13 +1236,13 @@ impl<'a, 'b> IntoIterator for &'b GraphView<'a> {
impl<'a> fmt::Display for GraphView<'a> { impl<'a> fmt::Display for GraphView<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for t in self { for t in self {
writeln!(f, "{t} .")?; writeln!(f, "{}", t)?;
} }
Ok(()) Ok(())
} }
} }
/// A read/write view on an [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) contained in a [`Dataset`]. /// A read/write view on an [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-graph) contained in a [`Dataset`].
/// ///
/// It is built using the [`Dataset::graph_mut`] method. /// It is built using the [`Dataset::graph_mut`] method.
/// ///
@ -1517,7 +1430,7 @@ impl<'a> IntoIterator for &'a GraphViewMut<'a> {
type Item = TripleRef<'a>; type Item = TripleRef<'a>;
type IntoIter = GraphViewIter<'a>; type IntoIter = GraphViewIter<'a>;
fn into_iter(self) -> Self::IntoIter { fn into_iter(self) -> GraphViewIter<'a> {
self.iter() self.iter()
} }
} }
@ -1525,7 +1438,7 @@ impl<'a> IntoIterator for &'a GraphViewMut<'a> {
impl<'a> fmt::Display for GraphViewMut<'a> { impl<'a> fmt::Display for GraphViewMut<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for t in self { for t in self {
writeln!(f, "{t}")?; writeln!(f, "{}", t)?;
} }
Ok(()) Ok(())
} }
@ -1548,7 +1461,7 @@ pub struct Iter<'a> {
impl<'a> Iterator for Iter<'a> { impl<'a> Iterator for Iter<'a> {
type Item = QuadRef<'a>; type Item = QuadRef<'a>;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<QuadRef<'a>> {
self.inner self.inner
.next() .next()
.map(|(s, p, o, g)| self.dataset.decode_spog((s, p, o, g))) .map(|(s, p, o, g)| self.dataset.decode_spog((s, p, o, g)))
@ -1572,70 +1485,9 @@ pub struct GraphViewIter<'a> {
impl<'a> Iterator for GraphViewIter<'a> { impl<'a> Iterator for GraphViewIter<'a> {
type Item = TripleRef<'a>; type Item = TripleRef<'a>;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<TripleRef<'a>> {
self.inner self.inner
.next() .next()
.map(|(_, s, p, o)| self.dataset.decode_spo((s, p, o))) .map(|(_, s, p, o)| self.dataset.decode_spo((s, p, o)))
} }
} }
type QuadsPerBlankNode = HashMap<
InternedBlankNode,
Vec<(
InternedSubject,
InternedNamedNode,
InternedTerm,
InternedGraphName,
)>,
>;
/// An algorithm used to canonicalize graph and datasets.
///
/// See [`Graph::canonicalize`] and [`Dataset::canonicalize`].
#[derive(Default, Debug, Clone, Copy, Eq, PartialEq, Hash)]
#[non_exhaustive]
pub enum CanonicalizationAlgorithm {
/// The algorithm preferred by OxRDF.
///
/// <div class="warning">The canonicalization algorithm is not stable and canonical blank node ids might change between Oxigraph version.</div>
#[default]
Unstable,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_canon() {
let mut dataset = Dataset::new();
dataset.insert(QuadRef::new(
BlankNode::default().as_ref(),
NamedNodeRef::new_unchecked("http://ex"),
BlankNode::default().as_ref(),
GraphNameRef::DefaultGraph,
));
dataset.insert(QuadRef::new(
BlankNode::default().as_ref(),
NamedNodeRef::new_unchecked("http://ex"),
BlankNode::default().as_ref(),
GraphNameRef::DefaultGraph,
));
dataset.canonicalize(CanonicalizationAlgorithm::Unstable);
let mut dataset2 = Dataset::new();
dataset2.insert(QuadRef::new(
BlankNode::default().as_ref(),
NamedNodeRef::new_unchecked("http://ex"),
BlankNode::default().as_ref(),
GraphNameRef::DefaultGraph,
));
dataset2.insert(QuadRef::new(
BlankNode::default().as_ref(),
NamedNodeRef::new_unchecked("http://ex"),
BlankNode::default().as_ref(),
GraphNameRef::DefaultGraph,
));
dataset2.canonicalize(CanonicalizationAlgorithm::Unstable);
assert_eq!(dataset, dataset2);
}
}

@ -1,4 +1,4 @@
//! [In-memory implementation](Graph) of [RDF graphs](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph). //! [In-memory implementation](super::Graph) of [RDF graphs](https://www.w3.org/TR/rdf11-concepts/#dfn-graph).
//! //!
//! Usage example: //! Usage example:
//! ``` //! ```
@ -14,28 +14,20 @@
//! // simple filter //! // simple filter
//! let results: Vec<_> = graph.triples_for_subject(ex).collect(); //! let results: Vec<_> = graph.triples_for_subject(ex).collect();
//! assert_eq!(vec![triple], results); //! assert_eq!(vec![triple], results);
//!
//! // Print
//! assert_eq!(
//! graph.to_string(),
//! "<http://example.com> <http://example.com> <http://example.com> .\n"
//! );
//! # Result::<_,Box<dyn std::error::Error>>::Ok(()) //! # Result::<_,Box<dyn std::error::Error>>::Ok(())
//! ``` //! ```
//! //!
//! See also [`Dataset`] if you want to get support of multiple RDF graphs at the same time. //! See also [`Dataset`](super::Dataset) if you want to get support of multiple RDF graphs at the same time.
pub use crate::dataset::CanonicalizationAlgorithm;
use crate::dataset::*; use crate::dataset::*;
use crate::*; use crate::*;
use std::fmt; use std::fmt;
/// An in-memory [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph). /// An in-memory [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-graph).
/// ///
/// It can accommodate a fairly large number of triples (in the few millions). /// It can accommodate a fairly large number of triples (in the few millions).
/// /// Beware: it interns the string and does not do any garbage collection yet:
/// <div class="warning">It interns the string and does not do any garbage collection yet: /// if you insert and remove a lot of different terms, memory will grow without any reduction.
/// if you insert and remove a lot of different terms, memory will grow without any reduction.</div>
/// ///
/// Usage example: /// Usage example:
/// ``` /// ```
@ -53,7 +45,7 @@ use std::fmt;
/// assert_eq!(vec![triple], results); /// assert_eq!(vec![triple], results);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) /// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ``` /// ```
#[derive(Debug, Default, Clone)] #[derive(Debug, Default)]
pub struct Graph { pub struct Graph {
dataset: Dataset, dataset: Dataset,
} }
@ -184,11 +176,11 @@ impl Graph {
self.dataset.clear() self.dataset.clear()
} }
/// Canonicalizes the dataset by renaming blank nodes. /// Applies on the graph the canonicalization process described in
/// [Canonical Forms for Isomorphic and Equivalent RDF Graphs: Algorithms for Leaning and Labelling Blank Nodes, Aidan Hogan, 2017](http://aidanhogan.com/docs/rdf-canonicalisation.pdf).
/// ///
/// Usage example ([Graph isomorphism](https://www.w3.org/TR/rdf11-concepts/#dfn-graph-isomorphism)): /// Usage example ([Graph isomorphism](https://www.w3.org/TR/rdf11-concepts/#dfn-graph-isomorphism)):
/// ``` /// ```
/// use oxrdf::graph::CanonicalizationAlgorithm;
/// use oxrdf::*; /// use oxrdf::*;
/// ///
/// let iri = NamedNodeRef::new("http://example.com")?; /// let iri = NamedNodeRef::new("http://example.com")?;
@ -204,18 +196,20 @@ impl Graph {
/// graph2.insert(TripleRef::new(&bnode2, iri, iri)); /// graph2.insert(TripleRef::new(&bnode2, iri, iri));
/// ///
/// assert_ne!(graph1, graph2); /// assert_ne!(graph1, graph2);
/// graph1.canonicalize(CanonicalizationAlgorithm::Unstable); /// graph1.canonicalize();
/// graph2.canonicalize(CanonicalizationAlgorithm::Unstable); /// graph2.canonicalize();
/// assert_eq!(graph1, graph2); /// assert_eq!(graph1, graph2);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) /// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ``` /// ```
/// ///
/// <div class="warning">Blank node ids depends on the current shape of the graph. Adding a new quad might change the ids of a lot of blank nodes. /// Warning 1: Blank node ids depends on the current shape of the graph. Adding a new triple might change the ids of a lot of blank nodes.
/// Hence, this canonization might not be suitable for diffs.</div> /// Hence, this canonization might not be suitable for diffs.
///
/// Warning 2: The canonicalization algorithm is not stable and canonical blank node Ids might change between Oxigraph version.
/// ///
/// <div class="warning">This implementation worst-case complexity is in *O(b!)* with *b* the number of blank nodes in the input dataset.</div> /// Warning 3: This implementation worst-case complexity is in *O(b!)* with *b* the number of blank nodes in the input graph.
pub fn canonicalize(&mut self, algorithm: CanonicalizationAlgorithm) { pub fn canonicalize(&mut self) {
self.dataset.canonicalize(algorithm) self.dataset.canonicalize()
} }
} }
@ -231,7 +225,7 @@ impl<'a> IntoIterator for &'a Graph {
type Item = TripleRef<'a>; type Item = TripleRef<'a>;
type IntoIter = Iter<'a>; type IntoIter = Iter<'a>;
fn into_iter(self) -> Self::IntoIter { fn into_iter(self) -> Iter<'a> {
self.iter() self.iter()
} }
} }
@ -278,7 +272,7 @@ pub struct Iter<'a> {
impl<'a> Iterator for Iter<'a> { impl<'a> Iterator for Iter<'a> {
type Item = TripleRef<'a>; type Item = TripleRef<'a>;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<TripleRef<'a>> {
self.inner.next() self.inner.next()
} }
} }

@ -1,172 +1,81 @@
//! Interning of RDF elements using Rodeo //! Interning of RDF elements using Rodeo
use crate::*; use crate::*;
use std::collections::hash_map::{Entry, HashMap, RandomState}; use lasso::{Key, Rodeo, Spur};
use std::hash::{BuildHasher, Hasher}; #[cfg(feature = "rdf-star")]
use std::collections::HashMap;
#[derive(Debug, Default, Clone)] #[derive(Debug, Default)]
pub struct Interner { pub struct Interner {
hasher: RandomState, strings: Rodeo,
string_for_hash: HashMap<u64, String, IdentityHasherBuilder>,
string_for_blank_node_id: HashMap<u128, String>,
#[cfg(feature = "rdf-star")] #[cfg(feature = "rdf-star")]
triples: HashMap<InternedTriple, Triple>, triples: HashMap<InternedTriple, Triple>,
} }
impl Interner {
#[allow(clippy::never_loop)]
fn get_or_intern(&mut self, value: &str) -> Key {
let mut hash = self.hash(value);
loop {
match self.string_for_hash.entry(hash) {
Entry::Vacant(e) => {
e.insert(value.into());
return Key(hash);
}
Entry::Occupied(e) => loop {
if e.get() == value {
return Key(hash);
} else if hash == u64::MAX - 1 {
hash = 0;
} else {
hash += 1;
}
},
}
}
}
fn get(&self, value: &str) -> Option<Key> {
let mut hash = self.hash(value);
loop {
let v = self.string_for_hash.get(&hash)?;
if v == value {
return Some(Key(hash));
} else if hash == u64::MAX - 1 {
hash = 0;
} else {
hash += 1;
}
}
}
fn hash(&self, value: &str) -> u64 {
let mut hasher = self.hasher.build_hasher();
hasher.write(value.as_bytes());
let hash = hasher.finish();
if hash == u64::MAX {
0
} else {
hash
}
}
fn resolve(&self, key: Key) -> &str {
&self.string_for_hash[&key.0]
}
}
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
pub struct Key(u64);
impl Key {
fn first() -> Self {
Self(0)
}
fn next(self) -> Self {
Self(self.0.saturating_add(1))
}
fn impossible() -> Self {
Self(u64::MAX)
}
}
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
pub struct InternedNamedNode { pub struct InternedNamedNode {
id: Key, id: Spur,
} }
impl InternedNamedNode { impl InternedNamedNode {
pub fn encoded_into(named_node: NamedNodeRef<'_>, interner: &mut Interner) -> Self { pub fn encoded_into(named_node: NamedNodeRef<'_>, interner: &mut Interner) -> Self {
Self { Self {
id: interner.get_or_intern(named_node.as_str()), id: interner.strings.get_or_intern(named_node.as_str()),
} }
} }
pub fn encoded_from(named_node: NamedNodeRef<'_>, interner: &Interner) -> Option<Self> { pub fn encoded_from(named_node: NamedNodeRef<'_>, interner: &Interner) -> Option<Self> {
Some(Self { Some(Self {
id: interner.get(named_node.as_str())?, id: interner.strings.get(named_node.as_str())?,
}) })
} }
pub fn decode_from(self, interner: &Interner) -> NamedNodeRef<'_> { pub fn decode_from<'a>(&self, interner: &'a Interner) -> NamedNodeRef<'a> {
NamedNodeRef::new_unchecked(interner.resolve(self.id)) NamedNodeRef::new_unchecked(interner.strings.resolve(&self.id))
} }
pub fn first() -> Self { pub fn first() -> Self {
Self { id: Key::first() } Self { id: fist_spur() }
} }
pub fn next(self) -> Self { pub fn next(self) -> Self {
Self { id: self.id.next() } Self {
id: next_spur(self.id),
}
} }
pub fn impossible() -> Self { pub fn impossible() -> Self {
Self { Self {
id: Key::impossible(), id: impossible_spur(),
} }
} }
} }
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
pub enum InternedBlankNode { pub struct InternedBlankNode {
Number { id: u128 }, id: Spur,
Other { id: Key },
} }
impl InternedBlankNode { impl InternedBlankNode {
pub fn encoded_into(blank_node: BlankNodeRef<'_>, interner: &mut Interner) -> Self { pub fn encoded_into(blank_node: BlankNodeRef<'_>, interner: &mut Interner) -> Self {
if let Some(id) = blank_node.unique_id() { Self {
interner id: interner.strings.get_or_intern(blank_node.as_str()),
.string_for_blank_node_id
.entry(id)
.or_insert_with(|| blank_node.as_str().into());
Self::Number { id }
} else {
Self::Other {
id: interner.get_or_intern(blank_node.as_str()),
}
} }
} }
pub fn encoded_from(blank_node: BlankNodeRef<'_>, interner: &Interner) -> Option<Self> { pub fn encoded_from(blank_node: BlankNodeRef<'_>, interner: &Interner) -> Option<Self> {
if let Some(id) = blank_node.unique_id() { Some(Self {
interner id: interner.strings.get(blank_node.as_str())?,
.string_for_blank_node_id })
.contains_key(&id)
.then_some(Self::Number { id })
} else {
Some(Self::Other {
id: interner.get(blank_node.as_str())?,
})
}
} }
pub fn decode_from(self, interner: &Interner) -> BlankNodeRef<'_> { pub fn decode_from<'a>(&self, interner: &'a Interner) -> BlankNodeRef<'a> {
BlankNodeRef::new_unchecked(match self { BlankNodeRef::new_unchecked(interner.strings.resolve(&self.id))
Self::Number { id } => &interner.string_for_blank_node_id[&id],
Self::Other { id } => interner.resolve(id),
})
} }
pub fn next(self) -> Self { pub fn next(self) -> Self {
match self { Self {
Self::Number { id } => Self::Number { id: next_spur(self.id),
id: id.saturating_add(1),
},
Self::Other { id } => Self::Other { id: id.next() },
} }
} }
} }
@ -174,26 +83,26 @@ impl InternedBlankNode {
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
pub enum InternedLiteral { pub enum InternedLiteral {
String { String {
value_id: Key, value_id: Spur,
}, },
LanguageTaggedString { LanguageTaggedString {
value_id: Key, value_id: Spur,
language_id: Key, language_id: Spur,
}, },
TypedLiteral { TypedLiteral {
value_id: Key, value_id: Spur,
datatype: InternedNamedNode, datatype: InternedNamedNode,
}, },
} }
impl InternedLiteral { impl InternedLiteral {
pub fn encoded_into(literal: LiteralRef<'_>, interner: &mut Interner) -> Self { pub fn encoded_into(literal: LiteralRef<'_>, interner: &mut Interner) -> Self {
let value_id = interner.get_or_intern(literal.value()); let value_id = interner.strings.get_or_intern(literal.value());
if literal.is_plain() { if literal.is_plain() {
if let Some(language) = literal.language() { if let Some(language) = literal.language() {
Self::LanguageTaggedString { Self::LanguageTaggedString {
value_id, value_id,
language_id: interner.get_or_intern(language), language_id: interner.strings.get_or_intern(language),
} }
} else { } else {
Self::String { value_id } Self::String { value_id }
@ -207,12 +116,12 @@ impl InternedLiteral {
} }
pub fn encoded_from(literal: LiteralRef<'_>, interner: &Interner) -> Option<Self> { pub fn encoded_from(literal: LiteralRef<'_>, interner: &Interner) -> Option<Self> {
let value_id = interner.get(literal.value())?; let value_id = interner.strings.get(literal.value())?;
Some(if literal.is_plain() { Some(if literal.is_plain() {
if let Some(language) = literal.language() { if let Some(language) = literal.language() {
Self::LanguageTaggedString { Self::LanguageTaggedString {
value_id, value_id,
language_id: interner.get(language)?, language_id: interner.strings.get(language)?,
} }
} else { } else {
Self::String { value_id } Self::String { value_id }
@ -227,18 +136,18 @@ impl InternedLiteral {
pub fn decode_from<'a>(&self, interner: &'a Interner) -> LiteralRef<'a> { pub fn decode_from<'a>(&self, interner: &'a Interner) -> LiteralRef<'a> {
match self { match self {
Self::String { value_id } => { InternedLiteral::String { value_id } => {
LiteralRef::new_simple_literal(interner.resolve(*value_id)) LiteralRef::new_simple_literal(interner.strings.resolve(value_id))
} }
Self::LanguageTaggedString { InternedLiteral::LanguageTaggedString {
value_id, value_id,
language_id, language_id,
} => LiteralRef::new_language_tagged_literal_unchecked( } => LiteralRef::new_language_tagged_literal_unchecked(
interner.resolve(*value_id), interner.strings.resolve(value_id),
interner.resolve(*language_id), interner.strings.resolve(language_id),
), ),
Self::TypedLiteral { value_id, datatype } => LiteralRef::new_typed_literal( InternedLiteral::TypedLiteral { value_id, datatype } => LiteralRef::new_typed_literal(
interner.resolve(*value_id), interner.strings.resolve(value_id),
datatype.decode_from(interner), datatype.decode_from(interner),
), ),
} }
@ -247,14 +156,14 @@ impl InternedLiteral {
pub fn next(&self) -> Self { pub fn next(&self) -> Self {
match self { match self {
Self::String { value_id } => Self::String { Self::String { value_id } => Self::String {
value_id: value_id.next(), value_id: next_spur(*value_id),
}, },
Self::LanguageTaggedString { Self::LanguageTaggedString {
value_id, value_id,
language_id, language_id,
} => Self::LanguageTaggedString { } => Self::LanguageTaggedString {
value_id: *value_id, value_id: *value_id,
language_id: language_id.next(), language_id: next_spur(*language_id),
}, },
Self::TypedLiteral { value_id, datatype } => Self::TypedLiteral { Self::TypedLiteral { value_id, datatype } => Self::TypedLiteral {
value_id: *value_id, value_id: *value_id,
@ -489,10 +398,11 @@ impl InternedTriple {
predicate: InternedNamedNode::encoded_from(triple.predicate, interner)?, predicate: InternedNamedNode::encoded_from(triple.predicate, interner)?,
object: InternedTerm::encoded_from(triple.object, interner)?, object: InternedTerm::encoded_from(triple.object, interner)?,
}; };
interner if interner.triples.contains_key(&interned_triple) {
.triples Some(interned_triple)
.contains_key(&interned_triple) } else {
.then_some(interned_triple) None
}
} }
pub fn next(&self) -> Self { pub fn next(&self) -> Self {
@ -504,32 +414,14 @@ impl InternedTriple {
} }
} }
#[derive(Default, Clone)] fn fist_spur() -> Spur {
struct IdentityHasherBuilder; Spur::try_from_usize(0).unwrap()
impl BuildHasher for IdentityHasherBuilder {
type Hasher = IdentityHasher;
fn build_hasher(&self) -> Self::Hasher {
Self::Hasher::default()
}
} }
#[derive(Default)] fn next_spur(value: Spur) -> Spur {
struct IdentityHasher { Spur::try_from_usize(value.into_usize() + 1).unwrap()
value: u64,
} }
impl Hasher for IdentityHasher { fn impossible_spur() -> Spur {
fn finish(&self) -> u64 { Spur::try_from_usize((u32::MAX - 10).try_into().unwrap()).unwrap()
self.value
}
fn write(&mut self, _bytes: &[u8]) {
unreachable!("Should only be used on u64 values")
}
fn write_u64(&mut self, i: u64) {
self.value = i
}
} }

@ -1,6 +1,6 @@
#![doc = include_str!("../README.md")] #![doc = include_str!("../README.md")]
#![deny(unsafe_code)]
#![doc(test(attr(deny(warnings))))] #![doc(test(attr(deny(warnings))))]
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
#![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] #![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")]
#![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] #![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")]
@ -23,7 +23,7 @@ pub use crate::named_node::{NamedNode, NamedNodeRef};
pub use crate::parser::TermParseError; pub use crate::parser::TermParseError;
pub use crate::triple::{ pub use crate::triple::{
GraphName, GraphNameRef, NamedOrBlankNode, NamedOrBlankNodeRef, Quad, QuadRef, Subject, GraphName, GraphNameRef, NamedOrBlankNode, NamedOrBlankNodeRef, Quad, QuadRef, Subject,
SubjectRef, Term, TermRef, Triple, TripleRef, TryFromTermError, SubjectRef, Term, TermRef, Triple, TripleRef,
}; };
pub use crate::variable::{Variable, VariableNameParseError, VariableRef}; pub use crate::variable::{Variable, VariableNameParseError, VariableRef};
pub use oxilangtag::LanguageTagParseError; pub use oxilangtag::LanguageTagParseError;

@ -1,20 +1,20 @@
use crate::named_node::{NamedNode, NamedNodeRef}; use crate::named_node::NamedNode;
use crate::vocab::{rdf, xsd}; use crate::vocab::rdf;
use crate::vocab::xsd;
use crate::NamedNodeRef;
use oxilangtag::{LanguageTag, LanguageTagParseError}; use oxilangtag::{LanguageTag, LanguageTagParseError};
#[cfg(feature = "oxsdatatypes")]
use oxsdatatypes::*;
use serde::{Deserialize, Serialize};
use std::borrow::Cow; use std::borrow::Cow;
use std::fmt; use std::fmt;
use std::fmt::Write; use std::fmt::Write;
use std::option::Option;
/// An owned RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal). /// An owned RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal).
/// ///
/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation: /// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation:
/// ``` /// ```
/// # use oxilangtag::LanguageTagParseError; /// # use oxilangtag::LanguageTagParseError;
/// use oxrdf::vocab::xsd;
/// use oxrdf::Literal; /// use oxrdf::Literal;
/// use oxrdf::vocab::xsd;
/// ///
/// assert_eq!( /// assert_eq!(
/// "\"foo\\nbar\"", /// "\"foo\\nbar\"",
@ -22,20 +22,20 @@ use std::fmt::Write;
/// ); /// );
/// ///
/// assert_eq!( /// assert_eq!(
/// r#""1999-01-01"^^<http://www.w3.org/2001/XMLSchema#date>"#, /// "\"1999-01-01\"^^<http://www.w3.org/2001/XMLSchema#date>",
/// Literal::new_typed_literal("1999-01-01", xsd::DATE).to_string() /// Literal::new_typed_literal("1999-01-01", xsd::DATE).to_string()
/// ); /// );
/// ///
/// assert_eq!( /// assert_eq!(
/// r#""foo"@en"#, /// "\"foo\"@en",
/// Literal::new_language_tagged_literal("foo", "en")?.to_string() /// Literal::new_language_tagged_literal("foo", "en")?.to_string()
/// ); /// );
/// # Result::<(), LanguageTagParseError>::Ok(()) /// # Result::<(), LanguageTagParseError>::Ok(())
/// ``` /// ```
#[derive(Eq, PartialEq, Debug, Clone, Hash, Serialize, Deserialize)] #[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct Literal(LiteralContent); pub struct Literal(LiteralContent);
#[derive(PartialEq, Eq, Debug, Clone, Hash, Serialize, Deserialize)] #[derive(PartialEq, Eq, Debug, Clone, Hash)]
enum LiteralContent { enum LiteralContent {
String(String), String(String),
LanguageTaggedString { value: String, language: String }, LanguageTaggedString { value: String, language: String },
@ -110,17 +110,17 @@ impl Literal {
/// The literal [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri). /// The literal [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri).
/// ///
/// The datatype of [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) is always [rdf:langString](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string). /// The datatype of [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) is always [rdf:langString](http://www.w3.org/1999/02/22-rdf-syntax-ns#langString).
/// The datatype of [simple literals](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal) is [xsd:string](https://www.w3.org/TR/xmlschema11-2/#string). /// The datatype of [simple literals](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal) is [xsd:string](http://www.w3.org/2001/XMLSchema#string).
#[inline] #[inline]
pub fn datatype(&self) -> NamedNodeRef<'_> { pub fn datatype(&self) -> NamedNodeRef<'_> {
self.as_ref().datatype() self.as_ref().datatype()
} }
/// Checks if this literal could be seen as an RDF 1.0 [plain literal](https://www.w3.org/TR/2004/REC-rdf-concepts-20040210/#dfn-plain-literal). /// Checks if this literal could be seen as an RDF 1.0 [plain literal](https://www.w3.org/TR/rdf-concepts/#dfn-plain-literal).
/// ///
/// It returns true if the literal is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) /// It returns true if the literal is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string)
/// or has the datatype [xsd:string](https://www.w3.org/TR/xmlschema11-2/#string). /// or has the datatype [xsd:string](http://www.w3.org/2001/XMLSchema#string).
#[inline] #[inline]
pub fn is_plain(&self) -> bool { pub fn is_plain(&self) -> bool {
self.as_ref().is_plain() self.as_ref().is_plain()
@ -266,9 +266,9 @@ impl From<f32> for Literal {
fn from(value: f32) -> Self { fn from(value: f32) -> Self {
Self(LiteralContent::TypedLiteral { Self(LiteralContent::TypedLiteral {
value: if value == f32::INFINITY { value: if value == f32::INFINITY {
"INF".to_owned() "INF".to_string()
} else if value == f32::NEG_INFINITY { } else if value == f32::NEG_INFINITY {
"-INF".to_owned() "-INF".to_string()
} else { } else {
value.to_string() value.to_string()
}, },
@ -282,9 +282,9 @@ impl From<f64> for Literal {
fn from(value: f64) -> Self { fn from(value: f64) -> Self {
Self(LiteralContent::TypedLiteral { Self(LiteralContent::TypedLiteral {
value: if value == f64::INFINITY { value: if value == f64::INFINITY {
"INF".to_owned() "INF".to_string()
} else if value == f64::NEG_INFINITY { } else if value == f64::NEG_INFINITY {
"-INF".to_owned() "-INF".to_string()
} else { } else {
value.to_string() value.to_string()
}, },
@ -293,140 +293,12 @@ impl From<f64> for Literal {
} }
} }
#[cfg(feature = "oxsdatatypes")]
impl From<Boolean> for Literal {
#[inline]
fn from(value: Boolean) -> Self {
Self::new_typed_literal(value.to_string(), xsd::BOOLEAN)
}
}
#[cfg(feature = "oxsdatatypes")]
impl From<Float> for Literal {
#[inline]
fn from(value: Float) -> Self {
Self::new_typed_literal(value.to_string(), xsd::FLOAT)
}
}
#[cfg(feature = "oxsdatatypes")]
impl From<Double> for Literal {
#[inline]
fn from(value: Double) -> Self {
Self::new_typed_literal(value.to_string(), xsd::DOUBLE)
}
}
#[cfg(feature = "oxsdatatypes")]
impl From<Integer> for Literal {
#[inline]
fn from(value: Integer) -> Self {
Self::new_typed_literal(value.to_string(), xsd::INTEGER)
}
}
#[cfg(feature = "oxsdatatypes")]
impl From<Decimal> for Literal {
#[inline]
fn from(value: Decimal) -> Self {
Self::new_typed_literal(value.to_string(), xsd::DECIMAL)
}
}
#[cfg(feature = "oxsdatatypes")]
impl From<DateTime> for Literal {
#[inline]
fn from(value: DateTime) -> Self {
Self::new_typed_literal(value.to_string(), xsd::DATE_TIME)
}
}
#[cfg(feature = "oxsdatatypes")]
impl From<Time> for Literal {
#[inline]
fn from(value: Time) -> Self {
Self::new_typed_literal(value.to_string(), xsd::TIME)
}
}
#[cfg(feature = "oxsdatatypes")]
impl From<Date> for Literal {
#[inline]
fn from(value: Date) -> Self {
Self::new_typed_literal(value.to_string(), xsd::DATE)
}
}
#[cfg(feature = "oxsdatatypes")]
impl From<GYearMonth> for Literal {
#[inline]
fn from(value: GYearMonth) -> Self {
Self::new_typed_literal(value.to_string(), xsd::G_YEAR_MONTH)
}
}
#[cfg(feature = "oxsdatatypes")]
impl From<GYear> for Literal {
#[inline]
fn from(value: GYear) -> Self {
Self::new_typed_literal(value.to_string(), xsd::G_YEAR)
}
}
#[cfg(feature = "oxsdatatypes")]
impl From<GMonthDay> for Literal {
#[inline]
fn from(value: GMonthDay) -> Self {
Self::new_typed_literal(value.to_string(), xsd::G_MONTH_DAY)
}
}
#[cfg(feature = "oxsdatatypes")]
impl From<GMonth> for Literal {
#[inline]
fn from(value: GMonth) -> Self {
Self::new_typed_literal(value.to_string(), xsd::G_MONTH)
}
}
#[cfg(feature = "oxsdatatypes")]
impl From<GDay> for Literal {
#[inline]
fn from(value: GDay) -> Self {
Self::new_typed_literal(value.to_string(), xsd::G_DAY)
}
}
#[cfg(feature = "oxsdatatypes")]
impl From<Duration> for Literal {
#[inline]
fn from(value: Duration) -> Self {
Self::new_typed_literal(value.to_string(), xsd::DURATION)
}
}
#[cfg(feature = "oxsdatatypes")]
impl From<YearMonthDuration> for Literal {
#[inline]
fn from(value: YearMonthDuration) -> Self {
Self::new_typed_literal(value.to_string(), xsd::YEAR_MONTH_DURATION)
}
}
#[cfg(feature = "oxsdatatypes")]
impl From<DayTimeDuration> for Literal {
#[inline]
fn from(value: DayTimeDuration) -> Self {
Self::new_typed_literal(value.to_string(), xsd::DAY_TIME_DURATION)
}
}
/// A borrowed RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal). /// A borrowed RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal).
/// ///
/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation: /// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation:
/// ``` /// ```
/// use oxrdf::vocab::xsd;
/// use oxrdf::LiteralRef; /// use oxrdf::LiteralRef;
/// use oxrdf::vocab::xsd;
/// ///
/// assert_eq!( /// assert_eq!(
/// "\"foo\\nbar\"", /// "\"foo\\nbar\"",
@ -434,7 +306,7 @@ impl From<DayTimeDuration> for Literal {
/// ); /// );
/// ///
/// assert_eq!( /// assert_eq!(
/// r#""1999-01-01"^^<http://www.w3.org/2001/XMLSchema#date>"#, /// "\"1999-01-01\"^^<http://www.w3.org/2001/XMLSchema#date>",
/// LiteralRef::new_typed_literal("1999-01-01", xsd::DATE).to_string() /// LiteralRef::new_typed_literal("1999-01-01", xsd::DATE).to_string()
/// ); /// );
/// ``` /// ```
@ -457,7 +329,7 @@ enum LiteralRefContent<'a> {
impl<'a> LiteralRef<'a> { impl<'a> LiteralRef<'a> {
/// Builds an RDF [simple literal](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal). /// Builds an RDF [simple literal](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal).
#[inline] #[inline]
pub const fn new_simple_literal(value: &'a str) -> Self { pub fn new_simple_literal(value: &'a str) -> Self {
LiteralRef(LiteralRefContent::String(value)) LiteralRef(LiteralRefContent::String(value))
} }
@ -480,13 +352,13 @@ impl<'a> LiteralRef<'a> {
/// ///
/// [`Literal::new_language_tagged_literal()`] is a safe version of this constructor and should be used for untrusted data. /// [`Literal::new_language_tagged_literal()`] is a safe version of this constructor and should be used for untrusted data.
#[inline] #[inline]
pub const fn new_language_tagged_literal_unchecked(value: &'a str, language: &'a str) -> Self { pub fn new_language_tagged_literal_unchecked(value: &'a str, language: &'a str) -> Self {
LiteralRef(LiteralRefContent::LanguageTaggedString { value, language }) LiteralRef(LiteralRefContent::LanguageTaggedString { value, language })
} }
/// The literal [lexical form](https://www.w3.org/TR/rdf11-concepts/#dfn-lexical-form) /// The literal [lexical form](https://www.w3.org/TR/rdf11-concepts/#dfn-lexical-form)
#[inline] #[inline]
pub const fn value(self) -> &'a str { pub fn value(self) -> &'a str {
match self.0 { match self.0 {
LiteralRefContent::String(value) LiteralRefContent::String(value)
| LiteralRefContent::LanguageTaggedString { value, .. } | LiteralRefContent::LanguageTaggedString { value, .. }
@ -499,7 +371,7 @@ impl<'a> LiteralRef<'a> {
/// Language tags are defined by the [BCP47](https://tools.ietf.org/html/bcp47). /// Language tags are defined by the [BCP47](https://tools.ietf.org/html/bcp47).
/// They are normalized to lowercase by this implementation. /// They are normalized to lowercase by this implementation.
#[inline] #[inline]
pub const fn language(self) -> Option<&'a str> { pub fn language(self) -> Option<&'a str> {
match self.0 { match self.0 {
LiteralRefContent::LanguageTaggedString { language, .. } => Some(language), LiteralRefContent::LanguageTaggedString { language, .. } => Some(language),
_ => None, _ => None,
@ -508,10 +380,10 @@ impl<'a> LiteralRef<'a> {
/// The literal [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri). /// The literal [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri).
/// ///
/// The datatype of [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) is always [rdf:langString](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string). /// The datatype of [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) is always [rdf:langString](http://www.w3.org/1999/02/22-rdf-syntax-ns#langString).
/// The datatype of [simple literals](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal) is [xsd:string](https://www.w3.org/TR/xmlschema11-2/#string). /// The datatype of [simple literals](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal) is [xsd:string](http://www.w3.org/2001/XMLSchema#string).
#[inline] #[inline]
pub const fn datatype(self) -> NamedNodeRef<'a> { pub fn datatype(self) -> NamedNodeRef<'a> {
match self.0 { match self.0 {
LiteralRefContent::String(_) => xsd::STRING, LiteralRefContent::String(_) => xsd::STRING,
LiteralRefContent::LanguageTaggedString { .. } => rdf::LANG_STRING, LiteralRefContent::LanguageTaggedString { .. } => rdf::LANG_STRING,
@ -519,12 +391,12 @@ impl<'a> LiteralRef<'a> {
} }
} }
/// Checks if this literal could be seen as an RDF 1.0 [plain literal](https://www.w3.org/TR/2004/REC-rdf-concepts-20040210/#dfn-plain-literal). /// Checks if this literal could be seen as an RDF 1.0 [plain literal](https://www.w3.org/TR/rdf-concepts/#dfn-plain-literal).
/// ///
/// It returns true if the literal is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) /// It returns true if the literal is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string)
/// or has the datatype [xsd:string](https://www.w3.org/TR/xmlschema11-2/#string). /// or has the datatype [xsd:string](http://www.w3.org/2001/XMLSchema#string).
#[inline] #[inline]
pub const fn is_plain(self) -> bool { pub fn is_plain(self) -> bool {
matches!( matches!(
self.0, self.0,
LiteralRefContent::String(_) | LiteralRefContent::LanguageTaggedString { .. } LiteralRefContent::String(_) | LiteralRefContent::LanguageTaggedString { .. }
@ -550,7 +422,7 @@ impl<'a> LiteralRef<'a> {
/// Extract components from this literal /// Extract components from this literal
#[inline] #[inline]
pub const fn destruct(self) -> (&'a str, Option<NamedNodeRef<'a>>, Option<&'a str>) { pub fn destruct(self) -> (&'a str, Option<NamedNodeRef<'a>>, Option<&'a str>) {
match self.0 { match self.0 {
LiteralRefContent::String(s) => (s, None, None), LiteralRefContent::String(s) => (s, None, None),
LiteralRefContent::LanguageTaggedString { value, language } => { LiteralRefContent::LanguageTaggedString { value, language } => {
@ -568,11 +440,11 @@ impl fmt::Display for LiteralRef<'_> {
LiteralRefContent::String(value) => print_quoted_str(value, f), LiteralRefContent::String(value) => print_quoted_str(value, f),
LiteralRefContent::LanguageTaggedString { value, language } => { LiteralRefContent::LanguageTaggedString { value, language } => {
print_quoted_str(value, f)?; print_quoted_str(value, f)?;
write!(f, "@{language}") write!(f, "@{}", language)
} }
LiteralRefContent::TypedLiteral { value, datatype } => { LiteralRefContent::TypedLiteral { value, datatype } => {
print_quoted_str(value, f)?; print_quoted_str(value, f)?;
write!(f, "^^{datatype}") write!(f, "^^{}", datatype)
} }
} }
} }
@ -614,26 +486,21 @@ impl PartialEq<LiteralRef<'_>> for Literal {
} }
#[inline] #[inline]
pub fn print_quoted_str(string: &str, f: &mut impl Write) -> fmt::Result { pub(crate) fn print_quoted_str(string: &str, f: &mut impl Write) -> fmt::Result {
f.write_char('"')?; f.write_char('"')?;
for c in string.chars() { for c in string.chars() {
match c { match c {
'\u{08}' => f.write_str("\\b"),
'\t' => f.write_str("\\t"),
'\n' => f.write_str("\\n"), '\n' => f.write_str("\\n"),
'\u{0C}' => f.write_str("\\f"),
'\r' => f.write_str("\\r"), '\r' => f.write_str("\\r"),
'"' => f.write_str("\\\""), '"' => f.write_str("\\\""),
'\\' => f.write_str("\\\\"), '\\' => f.write_str("\\\\"),
'\0'..='\u{1F}' | '\u{7F}' => write!(f, "\\u{:04X}", u32::from(c)), c => f.write_char(c),
_ => f.write_char(c),
}?; }?;
} }
f.write_char('"') f.write_char('"')
} }
#[cfg(test)] #[cfg(test)]
#[allow(clippy::panic_in_result_fn)]
mod tests { mod tests {
use super::*; use super::*;

@ -1,5 +1,4 @@
use oxiri::{Iri, IriParseError}; use oxiri::{Iri, IriParseError};
use serde::{Deserialize, Serialize};
use std::cmp::Ordering; use std::cmp::Ordering;
use std::fmt; use std::fmt;
@ -15,7 +14,7 @@ use std::fmt;
/// ); /// );
/// # Result::<_,oxrdf::IriParseError>::Ok(()) /// # Result::<_,oxrdf::IriParseError>::Ok(())
/// ``` /// ```
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash, Serialize, Deserialize)] #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
pub struct NamedNode { pub struct NamedNode {
iri: String, iri: String,
} }
@ -217,21 +216,3 @@ impl PartialOrd<NamedNodeRef<'_>> for NamedNode {
self.as_ref().partial_cmp(other) self.as_ref().partial_cmp(other)
} }
} }
impl From<Iri<String>> for NamedNode {
#[inline]
fn from(iri: Iri<String>) -> Self {
Self {
iri: iri.into_inner(),
}
}
}
impl<'a> From<Iri<&'a str>> for NamedNodeRef<'a> {
#[inline]
fn from(iri: Iri<&'a str>) -> Self {
Self {
iri: iri.into_inner(),
}
}
}

@ -6,6 +6,8 @@ use crate::{
#[cfg(feature = "rdf-star")] #[cfg(feature = "rdf-star")]
use crate::{Subject, Triple}; use crate::{Subject, Triple};
use std::char; use std::char;
use std::error::Error;
use std::fmt;
use std::str::{Chars, FromStr}; use std::str::{Chars, FromStr};
/// This limit is set in order to avoid stack overflow error when parsing nested triples due to too many recursive calls. /// This limit is set in order to avoid stack overflow error when parsing nested triples due to too many recursive calls.
@ -21,15 +23,12 @@ impl FromStr for NamedNode {
/// use oxrdf::NamedNode; /// use oxrdf::NamedNode;
/// use std::str::FromStr; /// use std::str::FromStr;
/// ///
/// assert_eq!( /// assert_eq!(NamedNode::from_str("<http://example.com>").unwrap(), NamedNode::new("http://example.com").unwrap())
/// NamedNode::from_str("<http://example.com>").unwrap(),
/// NamedNode::new("http://example.com").unwrap()
/// )
/// ``` /// ```
fn from_str(s: &str) -> Result<Self, Self::Err> { fn from_str(s: &str) -> Result<Self, TermParseError> {
let (term, left) = read_named_node(s)?; let (term, left) = read_named_node(s)?;
if !left.is_empty() { if !left.is_empty() {
return Err(Self::Err::msg( return Err(TermParseError::msg(
"Named node serialization should end with a >", "Named node serialization should end with a >",
)); ));
} }
@ -46,15 +45,12 @@ impl FromStr for BlankNode {
/// use oxrdf::BlankNode; /// use oxrdf::BlankNode;
/// use std::str::FromStr; /// use std::str::FromStr;
/// ///
/// assert_eq!( /// assert_eq!(BlankNode::from_str("_:ex").unwrap(), BlankNode::new("ex").unwrap())
/// BlankNode::from_str("_:ex").unwrap(),
/// BlankNode::new("ex").unwrap()
/// )
/// ``` /// ```
fn from_str(s: &str) -> Result<Self, Self::Err> { fn from_str(s: &str) -> Result<Self, TermParseError> {
let (term, left) = read_blank_node(s)?; let (term, left) = read_blank_node(s)?;
if !left.is_empty() { if !left.is_empty() {
return Err(Self::Err::msg( return Err(TermParseError::msg(
"Blank node serialization should not contain whitespaces", "Blank node serialization should not contain whitespaces",
)); ));
} }
@ -68,46 +64,21 @@ impl FromStr for Literal {
/// Parses a literal from its NTriples or Turtle serialization /// Parses a literal from its NTriples or Turtle serialization
/// ///
/// ``` /// ```
/// use oxrdf::vocab::xsd; /// use oxrdf::{Literal, NamedNode, vocab::xsd};
/// use oxrdf::{Literal, NamedNode};
/// use std::str::FromStr; /// use std::str::FromStr;
/// ///
/// assert_eq!( /// assert_eq!(Literal::from_str("\"ex\\n\"").unwrap(), Literal::new_simple_literal("ex\n"));
/// Literal::from_str("\"ex\\n\"").unwrap(), /// assert_eq!(Literal::from_str("\"ex\"@en").unwrap(), Literal::new_language_tagged_literal("ex", "en").unwrap());
/// Literal::new_simple_literal("ex\n") /// assert_eq!(Literal::from_str("\"2020\"^^<http://www.w3.org/2001/XMLSchema#gYear>").unwrap(), Literal::new_typed_literal("2020", NamedNode::new("http://www.w3.org/2001/XMLSchema#gYear").unwrap()));
/// ); /// assert_eq!(Literal::from_str("true").unwrap(), Literal::new_typed_literal("true", xsd::BOOLEAN));
/// assert_eq!( /// assert_eq!(Literal::from_str("+122").unwrap(), Literal::new_typed_literal("+122", xsd::INTEGER));
/// Literal::from_str("\"ex\"@en").unwrap(), /// assert_eq!(Literal::from_str("-122.23").unwrap(), Literal::new_typed_literal("-122.23", xsd::DECIMAL));
/// Literal::new_language_tagged_literal("ex", "en").unwrap() /// assert_eq!(Literal::from_str("-122e+1").unwrap(), Literal::new_typed_literal("-122e+1", xsd::DOUBLE));
/// );
/// assert_eq!(
/// Literal::from_str("\"2020\"^^<http://www.w3.org/2001/XMLSchema#gYear>").unwrap(),
/// Literal::new_typed_literal(
/// "2020",
/// NamedNode::new("http://www.w3.org/2001/XMLSchema#gYear").unwrap()
/// )
/// );
/// assert_eq!(
/// Literal::from_str("true").unwrap(),
/// Literal::new_typed_literal("true", xsd::BOOLEAN)
/// );
/// assert_eq!(
/// Literal::from_str("+122").unwrap(),
/// Literal::new_typed_literal("+122", xsd::INTEGER)
/// );
/// assert_eq!(
/// Literal::from_str("-122.23").unwrap(),
/// Literal::new_typed_literal("-122.23", xsd::DECIMAL)
/// );
/// assert_eq!(
/// Literal::from_str("-122e+1").unwrap(),
/// Literal::new_typed_literal("-122e+1", xsd::DOUBLE)
/// );
/// ``` /// ```
fn from_str(s: &str) -> Result<Self, Self::Err> { fn from_str(s: &str) -> Result<Self, TermParseError> {
let (term, left) = read_literal(s)?; let (term, left) = read_literal(s)?;
if !left.is_empty() { if !left.is_empty() {
return Err(Self::Err::msg("Invalid literal serialization")); return Err(TermParseError::msg("Invalid literal serialization"));
} }
Ok(term) Ok(term)
} }
@ -122,15 +93,17 @@ impl FromStr for Term {
/// use oxrdf::*; /// use oxrdf::*;
/// use std::str::FromStr; /// use std::str::FromStr;
/// ///
/// assert_eq!( /// assert_eq!(Term::from_str("\"ex\"").unwrap(), Literal::new_simple_literal("ex").into());
/// Term::from_str("\"ex\"").unwrap(), /// assert_eq!(Term::from_str("<< _:s <http://example.com/p> \"o\" >>").unwrap(), Triple::new(
/// Literal::new_simple_literal("ex").into() /// BlankNode::new("s").unwrap(),
/// ); /// NamedNode::new("http://example.com/p").unwrap(),
/// Literal::new_simple_literal("o")
/// ).into());
/// ``` /// ```
fn from_str(s: &str) -> Result<Self, Self::Err> { fn from_str(s: &str) -> Result<Self, TermParseError> {
let (term, left) = read_term(s, 0)?; let (term, left) = read_term(s, 0)?;
if !left.is_empty() { if !left.is_empty() {
return Err(Self::Err::msg("Invalid term serialization")); return Err(TermParseError::msg("Invalid term serialization"));
} }
Ok(term) Ok(term)
} }
@ -145,22 +118,19 @@ impl FromStr for Variable {
/// use oxrdf::Variable; /// use oxrdf::Variable;
/// use std::str::FromStr; /// use std::str::FromStr;
/// ///
/// assert_eq!( /// assert_eq!(Variable::from_str("$foo").unwrap(), Variable::new("foo").unwrap())
/// Variable::from_str("$foo").unwrap(),
/// Variable::new("foo").unwrap()
/// )
/// ``` /// ```
fn from_str(s: &str) -> Result<Self, Self::Err> { fn from_str(s: &str) -> Result<Self, TermParseError> {
if !s.starts_with('?') && !s.starts_with('$') { if !s.starts_with('?') && !s.starts_with('$') {
return Err(Self::Err::msg( return Err(TermParseError::msg(
"Variable serialization should start with ? or $", "Variable serialization should start with ? or $",
)); ));
} }
Self::new(&s[1..]).map_err(|error| { Self::new(&s[1..]).map_err(|error| TermParseError {
TermParseError(TermParseErrorKind::Variable { kind: TermParseErrorKind::Variable {
value: s.to_owned(), value: s.to_owned(),
error, error,
}) },
}) })
} }
} }
@ -173,11 +143,11 @@ fn read_named_node(s: &str) -> Result<(NamedNode, &str), TermParseError> {
.ok_or_else(|| TermParseError::msg("Named node serialization should end with a >"))?; .ok_or_else(|| TermParseError::msg("Named node serialization should end with a >"))?;
let (value, remain) = remain.split_at(end); let (value, remain) = remain.split_at(end);
let remain = &remain[1..]; let remain = &remain[1..];
let term = NamedNode::new(value).map_err(|error| { let term = NamedNode::new(value).map_err(|error| TermParseError {
TermParseError(TermParseErrorKind::Iri { kind: TermParseErrorKind::Iri {
value: value.to_owned(), value: value.to_owned(),
error, error,
}) },
})?; })?;
Ok((term, remain)) Ok((term, remain))
} else { } else {
@ -191,17 +161,14 @@ fn read_blank_node(s: &str) -> Result<(BlankNode, &str), TermParseError> {
let s = s.trim(); let s = s.trim();
if let Some(remain) = s.strip_prefix("_:") { if let Some(remain) = s.strip_prefix("_:") {
let end = remain let end = remain
.find(|v: char| { .find(|v: char| v.is_whitespace() || matches!(v, '<' | '_' | '?' | '$' | '"' | '\''))
v.is_whitespace()
|| matches!(v, '<' | '_' | '?' | '$' | '"' | '\'' | '>' | '@' | '^')
})
.unwrap_or(remain.len()); .unwrap_or(remain.len());
let (value, remain) = remain.split_at(end); let (value, remain) = remain.split_at(end);
let term = BlankNode::new(value).map_err(|error| { let term = BlankNode::new(value).map_err(|error| TermParseError {
TermParseError(TermParseErrorKind::BlankNode { kind: TermParseErrorKind::BlankNode {
value: value.to_owned(), value: value.to_owned(),
error, error,
}) },
})?; })?;
Ok((term, remain)) Ok((term, remain))
} else { } else {
@ -227,11 +194,11 @@ fn read_literal(s: &str) -> Result<(Literal, &str), TermParseError> {
let (language, remain) = remain.split_at(end); let (language, remain) = remain.split_at(end);
Ok(( Ok((
Literal::new_language_tagged_literal(value, language).map_err( Literal::new_language_tagged_literal(value, language).map_err(
|error| { |error| TermParseError {
TermParseError(TermParseErrorKind::LanguageTag { kind: TermParseErrorKind::LanguageTag {
value: language.to_owned(), value: language.to_owned(),
error, error,
}) },
}, },
)?, )?,
remain, remain,
@ -247,10 +214,10 @@ fn read_literal(s: &str) -> Result<(Literal, &str), TermParseError> {
if let Some(c) = chars.next() { if let Some(c) = chars.next() {
value.push(match c { value.push(match c {
't' => '\t', 't' => '\t',
'b' => '\u{08}', 'b' => '\u{8}',
'n' => '\n', 'n' => '\n',
'r' => '\r', 'r' => '\r',
'f' => '\u{0C}', 'f' => '\u{C}',
'"' => '"', '"' => '"',
'\'' => '\'', '\'' => '\'',
'\\' => '\\', '\\' => '\\',
@ -262,7 +229,7 @@ fn read_literal(s: &str) -> Result<(Literal, &str), TermParseError> {
return Err(TermParseError::msg("Unexpected literal end")); return Err(TermParseError::msg("Unexpected literal end"));
} }
} }
_ => value.push(c), c => value.push(c),
} }
} }
Err(TermParseError::msg("Unexpected literal end")) Err(TermParseError::msg("Unexpected literal end"))
@ -356,7 +323,7 @@ fn read_term(s: &str, number_of_recursive_calls: usize) -> Result<(Term, &str),
Term::Literal(_) => { Term::Literal(_) => {
return Err(TermParseError::msg( return Err(TermParseError::msg(
"Literals are not allowed in subject position", "Literals are not allowed in subject position",
)); ))
} }
Term::Triple(s) => Subject::Triple(s), Term::Triple(s) => Subject::Triple(s),
}, },
@ -400,7 +367,7 @@ fn read_hexa_char(input: &mut Chars<'_>, len: usize) -> Result<char, TermParseEr
_ => { _ => {
return Err(TermParseError::msg( return Err(TermParseError::msg(
"Unexpected character in a unicode escape", "Unexpected character in a unicode escape",
)); ))
} }
} }
} else { } else {
@ -411,59 +378,67 @@ fn read_hexa_char(input: &mut Chars<'_>, len: usize) -> Result<char, TermParseEr
} }
/// An error raised during term serialization parsing using the [`FromStr`] trait. /// An error raised during term serialization parsing using the [`FromStr`] trait.
#[derive(Debug, thiserror::Error)] #[derive(Debug)]
#[error(transparent)] pub struct TermParseError {
pub struct TermParseError(#[from] TermParseErrorKind); kind: TermParseErrorKind,
}
/// An internal error raised during term serialization parsing using the [`FromStr`] trait. #[derive(Debug)]
#[derive(Debug, thiserror::Error)]
enum TermParseErrorKind { enum TermParseErrorKind {
#[error("Error while parsing the named node '{value}': {error}")] Iri {
Iri { error: IriParseError, value: String }, error: IriParseError,
#[error("Error while parsing the blank node '{value}': {error}")] value: String,
},
BlankNode { BlankNode {
error: BlankNodeIdParseError, error: BlankNodeIdParseError,
value: String, value: String,
}, },
#[error("Error while parsing the language tag '{value}': {error}")]
LanguageTag { LanguageTag {
error: LanguageTagParseError, error: LanguageTagParseError,
value: String, value: String,
}, },
#[error("Error while parsing the variable '{value}': {error}")]
Variable { Variable {
error: VariableNameParseError, error: VariableNameParseError,
value: String, value: String,
}, },
#[error("{0}")] Msg {
Msg(&'static str), msg: &'static str,
},
} }
impl TermParseError { impl fmt::Display for TermParseError {
pub(crate) fn msg(msg: &'static str) -> Self { #[inline]
Self(TermParseErrorKind::Msg(msg)) fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match &self.kind {
TermParseErrorKind::Iri { error, value } => write!(
f,
"Error while parsing the named node '{}': {}",
value, error
),
TermParseErrorKind::BlankNode { error, value } => write!(
f,
"Error while parsing the blank node '{}': {}",
value, error
),
TermParseErrorKind::LanguageTag { error, value } => write!(
f,
"Error while parsing the language tag '{}': {}",
value, error
),
TermParseErrorKind::Variable { error, value } => {
write!(f, "Error while parsing the variable '{}': {}", value, error)
}
TermParseErrorKind::Msg { msg } => f.write_str(msg),
}
} }
} }
#[cfg(test)] impl Error for TermParseError {}
#[cfg(feature = "rdf-star")]
mod tests {
use super::*;
#[test] impl TermParseError {
fn triple_term_parsing() { pub(crate) fn msg(msg: &'static str) -> Self {
assert_eq!( Self {
Term::from_str("\"ex\"").unwrap(), kind: TermParseErrorKind::Msg { msg },
Literal::new_simple_literal("ex").into() }
);
assert_eq!(
Term::from_str("<< _:s <http://example.com/p> \"o\" >>").unwrap(),
Triple::new(
BlankNode::new("s").unwrap(),
NamedNode::new("http://example.com/p").unwrap(),
Literal::new_simple_literal("o"),
)
.into()
);
} }
} }

@ -2,11 +2,10 @@ use crate::blank_node::BlankNode;
use crate::literal::Literal; use crate::literal::Literal;
use crate::named_node::NamedNode; use crate::named_node::NamedNode;
use crate::{BlankNodeRef, LiteralRef, NamedNodeRef}; use crate::{BlankNodeRef, LiteralRef, NamedNodeRef};
use serde::{Deserialize, Serialize};
use std::fmt; use std::fmt;
/// The owned union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) and [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node). /// The owned union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) and [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node).
#[derive(Eq, PartialEq, Debug, Clone, Hash, Serialize, Deserialize)] #[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub enum NamedOrBlankNode { pub enum NamedOrBlankNode {
NamedNode(NamedNode), NamedNode(NamedNode),
BlankNode(BlankNode), BlankNode(BlankNode),
@ -153,7 +152,7 @@ impl<'a> From<NamedOrBlankNodeRef<'a>> for NamedOrBlankNode {
} }
/// The owned union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) and [triples](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) (if the `rdf-star` feature is enabled). /// The owned union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) and [triples](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) (if the `rdf-star` feature is enabled).
#[derive(Eq, PartialEq, Debug, Clone, Hash, Serialize, Deserialize)] #[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub enum Subject { pub enum Subject {
NamedNode(NamedNode), NamedNode(NamedNode),
BlankNode(BlankNode), BlankNode(BlankNode),
@ -309,7 +308,7 @@ impl fmt::Display for SubjectRef<'_> {
Self::NamedNode(node) => node.fmt(f), Self::NamedNode(node) => node.fmt(f),
Self::BlankNode(node) => node.fmt(f), Self::BlankNode(node) => node.fmt(f),
#[cfg(feature = "rdf-star")] #[cfg(feature = "rdf-star")]
Self::Triple(triple) => write!(f, "<<{triple}>>"), Self::Triple(triple) => write!(f, "<<{}>>", triple),
} }
} }
} }
@ -383,7 +382,7 @@ impl<'a> From<&'a NamedOrBlankNode> for SubjectRef<'a> {
/// An owned RDF [term](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-term) /// An owned RDF [term](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-term)
/// It is the union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node), [literals](https://www.w3.org/TR/rdf11-concepts/#dfn-literal) and [triples](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) (if the `rdf-star` feature is enabled). /// It is the union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node), [literals](https://www.w3.org/TR/rdf11-concepts/#dfn-literal) and [triples](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) (if the `rdf-star` feature is enabled).
#[derive(Eq, PartialEq, Debug, Clone, Hash, Serialize, Deserialize)] #[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub enum Term { pub enum Term {
NamedNode(NamedNode), NamedNode(NamedNode),
BlankNode(BlankNode), BlankNode(BlankNode),
@ -535,72 +534,6 @@ impl From<SubjectRef<'_>> for Term {
} }
} }
impl TryFrom<Term> for NamedNode {
type Error = TryFromTermError;
#[inline]
fn try_from(term: Term) -> Result<Self, Self::Error> {
if let Term::NamedNode(node) = term {
Ok(node)
} else {
Err(TryFromTermError {
term,
target: "NamedNode",
})
}
}
}
impl TryFrom<Term> for BlankNode {
type Error = TryFromTermError;
#[inline]
fn try_from(term: Term) -> Result<Self, Self::Error> {
if let Term::BlankNode(node) = term {
Ok(node)
} else {
Err(TryFromTermError {
term,
target: "BlankNode",
})
}
}
}
impl TryFrom<Term> for Literal {
type Error = TryFromTermError;
#[inline]
fn try_from(term: Term) -> Result<Self, Self::Error> {
if let Term::Literal(node) = term {
Ok(node)
} else {
Err(TryFromTermError {
term,
target: "Literal",
})
}
}
}
impl TryFrom<Term> for Subject {
type Error = TryFromTermError;
#[inline]
fn try_from(term: Term) -> Result<Self, Self::Error> {
match term {
Term::NamedNode(term) => Ok(Self::NamedNode(term)),
Term::BlankNode(term) => Ok(Self::BlankNode(term)),
#[cfg(feature = "rdf-star")]
Term::Triple(term) => Ok(Self::Triple(term)),
Term::Literal(_) => Err(TryFromTermError {
term,
target: "Subject",
}),
}
}
}
/// A borrowed RDF [term](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-term) /// A borrowed RDF [term](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-term)
/// It is the union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node), [literals](https://www.w3.org/TR/rdf11-concepts/#dfn-literal) and [triples](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) (if the `rdf-star` feature is enabled). /// It is the union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node), [literals](https://www.w3.org/TR/rdf11-concepts/#dfn-literal) and [triples](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) (if the `rdf-star` feature is enabled).
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] #[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
@ -655,7 +588,7 @@ impl fmt::Display for TermRef<'_> {
Self::Literal(literal) => literal.fmt(f), Self::Literal(literal) => literal.fmt(f),
#[cfg(feature = "rdf-star")] #[cfg(feature = "rdf-star")]
Self::Triple(triple) => { Self::Triple(triple) => {
write!(f, "<<{triple}>>") write!(f, "<<{}>>", triple)
} }
} }
} }
@ -765,7 +698,7 @@ impl<'a> From<TermRef<'a>> for Term {
/// ///
/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation: /// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation:
/// ``` /// ```
/// use oxrdf::{NamedNode, Triple}; /// use oxrdf::{Triple, NamedNode};
/// ///
/// assert_eq!( /// assert_eq!(
/// "<http://example.com/s> <http://example.com/p> <http://example.com/o>", /// "<http://example.com/s> <http://example.com/p> <http://example.com/o>",
@ -773,12 +706,11 @@ impl<'a> From<TermRef<'a>> for Term {
/// subject: NamedNode::new("http://example.com/s")?.into(), /// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?, /// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(), /// object: NamedNode::new("http://example.com/o")?.into(),
/// } /// }.to_string()
/// .to_string()
/// ); /// );
/// # Result::<_,oxrdf::IriParseError>::Ok(()) /// # Result::<_,oxrdf::IriParseError>::Ok(())
/// ``` /// ```
#[derive(Eq, PartialEq, Debug, Clone, Hash, Serialize, Deserialize)] #[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct Triple { pub struct Triple {
/// The [subject](https://www.w3.org/TR/rdf11-concepts/#dfn-subject) of this triple. /// The [subject](https://www.w3.org/TR/rdf11-concepts/#dfn-subject) of this triple.
pub subject: Subject, pub subject: Subject,
@ -805,22 +737,6 @@ impl Triple {
} }
} }
/// Builds an RDF [triple](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) from [`Term`]s.
///
/// Returns a [`TryFromTermError`] error if the generated triple would be ill-formed.
#[inline]
pub fn from_terms(
subject: impl Into<Term>,
predicate: impl Into<Term>,
object: impl Into<Term>,
) -> Result<Self, TryFromTermError> {
Ok(Self {
subject: subject.into().try_into()?,
predicate: predicate.into().try_into()?,
object: object.into(),
})
}
/// Encodes that this triple is in an [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset). /// Encodes that this triple is in an [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset).
#[inline] #[inline]
pub fn in_graph(self, graph_name: impl Into<GraphName>) -> Quad { pub fn in_graph(self, graph_name: impl Into<GraphName>) -> Quad {
@ -853,7 +769,7 @@ impl fmt::Display for Triple {
/// ///
/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation: /// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation:
/// ``` /// ```
/// use oxrdf::{NamedNodeRef, TripleRef}; /// use oxrdf::{TripleRef, NamedNodeRef};
/// ///
/// assert_eq!( /// assert_eq!(
/// "<http://example.com/s> <http://example.com/p> <http://example.com/o>", /// "<http://example.com/s> <http://example.com/p> <http://example.com/o>",
@ -861,8 +777,7 @@ impl fmt::Display for Triple {
/// subject: NamedNodeRef::new("http://example.com/s")?.into(), /// subject: NamedNodeRef::new("http://example.com/s")?.into(),
/// predicate: NamedNodeRef::new("http://example.com/p")?, /// predicate: NamedNodeRef::new("http://example.com/p")?,
/// object: NamedNodeRef::new("http://example.com/o")?.into(), /// object: NamedNodeRef::new("http://example.com/o")?.into(),
/// } /// }.to_string()
/// .to_string()
/// ); /// );
/// # Result::<_,oxrdf::IriParseError>::Ok(()) /// # Result::<_,oxrdf::IriParseError>::Ok(())
/// ``` /// ```
@ -938,11 +853,10 @@ impl<'a> From<TripleRef<'a>> for Triple {
/// A possible owned graph name. /// A possible owned graph name.
/// It is the union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node), and the [default graph name](https://www.w3.org/TR/rdf11-concepts/#dfn-default-graph). /// It is the union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node), and the [default graph name](https://www.w3.org/TR/rdf11-concepts/#dfn-default-graph).
#[derive(Eq, PartialEq, Debug, Clone, Hash, Default, Serialize, Deserialize)] #[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub enum GraphName { pub enum GraphName {
NamedNode(NamedNode), NamedNode(NamedNode),
BlankNode(BlankNode), BlankNode(BlankNode),
#[default]
DefaultGraph, DefaultGraph,
} }
@ -1007,30 +921,12 @@ impl From<BlankNodeRef<'_>> for GraphName {
} }
} }
impl From<NamedOrBlankNode> for GraphName {
#[inline]
fn from(node: NamedOrBlankNode) -> Self {
match node {
NamedOrBlankNode::NamedNode(node) => node.into(),
NamedOrBlankNode::BlankNode(node) => node.into(),
}
}
}
impl From<NamedOrBlankNodeRef<'_>> for GraphName {
#[inline]
fn from(node: NamedOrBlankNodeRef<'_>) -> Self {
node.into_owned().into()
}
}
/// A possible borrowed graph name. /// A possible borrowed graph name.
/// It is the union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node), and the [default graph name](https://www.w3.org/TR/rdf11-concepts/#dfn-default-graph). /// It is the union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node), and the [default graph name](https://www.w3.org/TR/rdf11-concepts/#dfn-default-graph).
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash, Default)] #[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
pub enum GraphNameRef<'a> { pub enum GraphNameRef<'a> {
NamedNode(NamedNodeRef<'a>), NamedNode(NamedNodeRef<'a>),
BlankNode(BlankNodeRef<'a>), BlankNode(BlankNodeRef<'a>),
#[default]
DefaultGraph, DefaultGraph,
} }
@ -1066,7 +962,7 @@ impl fmt::Display for GraphNameRef<'_> {
match self { match self {
Self::NamedNode(node) => node.fmt(f), Self::NamedNode(node) => node.fmt(f),
Self::BlankNode(node) => node.fmt(f), Self::BlankNode(node) => node.fmt(f),
Self::DefaultGraph => f.write_str("DEFAULT"), Self::DefaultGraph => write!(f, "DEFAULT"),
} }
} }
} }
@ -1099,23 +995,6 @@ impl<'a> From<&'a BlankNode> for GraphNameRef<'a> {
} }
} }
impl<'a> From<NamedOrBlankNodeRef<'a>> for GraphNameRef<'a> {
#[inline]
fn from(node: NamedOrBlankNodeRef<'a>) -> Self {
match node {
NamedOrBlankNodeRef::NamedNode(node) => node.into(),
NamedOrBlankNodeRef::BlankNode(node) => node.into(),
}
}
}
impl<'a> From<&'a NamedOrBlankNode> for GraphNameRef<'a> {
#[inline]
fn from(node: &'a NamedOrBlankNode) -> Self {
node.as_ref().into()
}
}
impl<'a> From<&'a GraphName> for GraphNameRef<'a> { impl<'a> From<&'a GraphName> for GraphNameRef<'a> {
#[inline] #[inline]
fn from(node: &'a GraphName) -> Self { fn from(node: &'a GraphName) -> Self {
@ -1147,7 +1026,7 @@ impl<'a> From<GraphNameRef<'a>> for GraphName {
/// ); /// );
/// # Result::<_,oxrdf::IriParseError>::Ok(()) /// # Result::<_,oxrdf::IriParseError>::Ok(())
/// ``` /// ```
#[derive(Eq, PartialEq, Debug, Clone, Hash, Serialize, Deserialize)] #[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct Quad { pub struct Quad {
/// The [subject](https://www.w3.org/TR/rdf11-concepts/#dfn-subject) of this triple. /// The [subject](https://www.w3.org/TR/rdf11-concepts/#dfn-subject) of this triple.
pub subject: Subject, pub subject: Subject,
@ -1271,7 +1150,7 @@ impl<'a> QuadRef<'a> {
impl fmt::Display for QuadRef<'_> { impl fmt::Display for QuadRef<'_> {
#[inline] #[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.graph_name.is_default_graph() { if self.graph_name == GraphNameRef::DefaultGraph {
write!(f, "{} {} {}", self.subject, self.predicate, self.object) write!(f, "{} {} {}", self.subject, self.predicate, self.object)
} else { } else {
write!( write!(
@ -1307,62 +1186,3 @@ impl<'a> From<QuadRef<'a>> for Quad {
quad.into_owned() quad.into_owned()
} }
} }
/// An error return by some [`TryFrom<Term>`](TryFrom) implementations.
#[derive(Debug, Clone, thiserror::Error)]
#[error("{term} can not be converted to a {target}")]
pub struct TryFromTermError {
pub(crate) term: Term,
pub(crate) target: &'static str,
}
impl TryFromTermError {
/// The term that can't be converted
#[inline]
pub fn into_term(self) -> Term {
self.term
}
}
#[cfg(test)]
#[allow(clippy::panic_in_result_fn)]
mod tests {
use super::*;
#[test]
fn triple_from_terms() -> Result<(), TryFromTermError> {
assert_eq!(
Triple::from_terms(
NamedNode::new_unchecked("http://example.com/s"),
NamedNode::new_unchecked("http://example.com/p"),
NamedNode::new_unchecked("http://example.com/o"),
)?,
Triple::new(
NamedNode::new_unchecked("http://example.com/s"),
NamedNode::new_unchecked("http://example.com/p"),
NamedNode::new_unchecked("http://example.com/o"),
)
);
assert_eq!(
Triple::from_terms(
Literal::new_simple_literal("foo"),
NamedNode::new_unchecked("http://example.com/p"),
NamedNode::new_unchecked("http://example.com/o"),
)
.unwrap_err()
.into_term(),
Term::from(Literal::new_simple_literal("foo"))
);
assert_eq!(
Triple::from_terms(
NamedNode::new_unchecked("http://example.com/s"),
Literal::new_simple_literal("foo"),
NamedNode::new_unchecked("http://example.com/o"),
)
.unwrap_err()
.into_term(),
Term::from(Literal::new_simple_literal("foo"))
);
Ok(())
}
}

@ -1,4 +1,5 @@
use std::cmp::Ordering; use std::cmp::Ordering;
use std::error::Error;
use std::fmt; use std::fmt;
/// A [SPARQL query](https://www.w3.org/TR/sparql11-query/) owned variable. /// A [SPARQL query](https://www.w3.org/TR/sparql11-query/) owned variable.
@ -7,7 +8,10 @@ use std::fmt;
/// ``` /// ```
/// use oxrdf::{Variable, VariableNameParseError}; /// use oxrdf::{Variable, VariableNameParseError};
/// ///
/// assert_eq!("?foo", Variable::new("foo")?.to_string()); /// assert_eq!(
/// "?foo",
/// Variable::new("foo")?.to_string()
/// );
/// # Result::<_,VariableNameParseError>::Ok(()) /// # Result::<_,VariableNameParseError>::Ok(())
/// ``` /// ```
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
@ -63,9 +67,12 @@ impl fmt::Display for Variable {
/// ///
/// The default string formatter is returning a SPARQL compatible representation: /// The default string formatter is returning a SPARQL compatible representation:
/// ``` /// ```
/// use oxrdf::{VariableNameParseError, VariableRef}; /// use oxrdf::{VariableRef, VariableNameParseError};
/// ///
/// assert_eq!("?foo", VariableRef::new("foo")?.to_string()); /// assert_eq!(
/// "?foo",
/// VariableRef::new("foo")?.to_string()
/// );
/// # Result::<_,VariableNameParseError>::Ok(()) /// # Result::<_,VariableNameParseError>::Ok(())
/// ``` /// ```
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
@ -89,12 +96,12 @@ impl<'a> VariableRef<'a> {
/// ///
/// [`Variable::new()`] is a safe version of this constructor and should be used for untrusted data. /// [`Variable::new()`] is a safe version of this constructor and should be used for untrusted data.
#[inline] #[inline]
pub const fn new_unchecked(name: &'a str) -> Self { pub fn new_unchecked(name: &'a str) -> Self {
Self { name } Self { name }
} }
#[inline] #[inline]
pub const fn as_str(self) -> &'a str { pub fn as_str(&self) -> &str {
self.name self.name
} }
@ -162,7 +169,7 @@ impl PartialOrd<VariableRef<'_>> for Variable {
fn validate_variable_identifier(id: &str) -> Result<(), VariableNameParseError> { fn validate_variable_identifier(id: &str) -> Result<(), VariableNameParseError> {
let mut chars = id.chars(); let mut chars = id.chars();
let front = chars.next().ok_or(VariableNameParseError)?; let front = chars.next().ok_or(VariableNameParseError {})?;
match front { match front {
'0'..='9' '0'..='9'
| '_' | '_'
@ -181,13 +188,13 @@ fn validate_variable_identifier(id: &str) -> Result<(), VariableNameParseError>
| '\u{F900}'..='\u{FDCF}' | '\u{F900}'..='\u{FDCF}'
| '\u{FDF0}'..='\u{FFFD}' | '\u{FDF0}'..='\u{FFFD}'
| '\u{10000}'..='\u{EFFFF}' => (), | '\u{10000}'..='\u{EFFFF}' => (),
_ => return Err(VariableNameParseError), _ => return Err(VariableNameParseError {}),
} }
for c in chars { for c in chars {
match c { match c {
'0'..='9' '0'..='9'
| '\u{00B7}' | '\u{00B7}'
| '\u{0300}'..='\u{036F}' | '\u{00300}'..='\u{036F}'
| '\u{203F}'..='\u{2040}' | '\u{203F}'..='\u{2040}'
| '_' | '_'
| 'A'..='Z' | 'A'..='Z'
@ -204,13 +211,21 @@ fn validate_variable_identifier(id: &str) -> Result<(), VariableNameParseError>
| '\u{F900}'..='\u{FDCF}' | '\u{F900}'..='\u{FDCF}'
| '\u{FDF0}'..='\u{FFFD}' | '\u{FDF0}'..='\u{FFFD}'
| '\u{10000}'..='\u{EFFFF}' => (), | '\u{10000}'..='\u{EFFFF}' => (),
_ => return Err(VariableNameParseError), _ => return Err(VariableNameParseError {}),
} }
} }
Ok(()) Ok(())
} }
/// An error raised during [`Variable`] name validation. /// An error raised during [`Variable`] name validation.
#[derive(Debug, thiserror::Error)] #[derive(Debug)]
#[error("The variable name is invalid")] pub struct VariableNameParseError {}
pub struct VariableNameParseError;
impl fmt::Display for VariableNameParseError {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "The variable name is invalid")
}
}
impl Error for VariableNameParseError {}

@ -231,12 +231,3 @@ pub mod xsd {
pub const YEAR_MONTH_DURATION: NamedNodeRef<'_> = pub const YEAR_MONTH_DURATION: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#yearMonthDuration"); NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#yearMonthDuration");
} }
pub mod geosparql {
//! [GeoSpatial](https://opengeospatial.github.io/ogc-geosparql/) vocabulary.
use crate::named_node::NamedNodeRef;
/// Geospatial datatype like `"Point({longitude} {latitude})"^^geo:wktLiteral`
pub const WKT_LITERAL: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.opengis.net/ont/geosparql#wktLiteral");
}

@ -1,36 +0,0 @@
[package]
name = "oxrdfio"
version = "0.1.0-alpha.5"
authors.workspace = true
license.workspace = true
readme = "README.md"
keywords = ["RDF"]
repository = "https://github.com/oxigraph/oxigraph/tree/master/lib/oxrdfxml"
documentation = "https://docs.rs/oxrdfio"
description = """
Parser and serializer for various RDF formats
"""
edition.workspace = true
rust-version.workspace = true
[features]
default = []
async-tokio = ["dep:tokio", "oxrdfxml/async-tokio", "oxttl/async-tokio"]
rdf-star = ["oxrdf/rdf-star", "oxttl/rdf-star"]
[dependencies]
oxrdf.workspace = true
oxrdfxml.workspace = true
oxttl.workspace = true
thiserror.workspace = true
tokio = { workspace = true, optional = true, features = ["io-util"] }
[dev-dependencies]
tokio = { workspace = true, features = ["rt", "macros"] }
[lints]
workspace = true
[package.metadata.docs.rs]
all-features = true
rustdoc-args = ["--cfg", "docsrs"]

@ -1,67 +0,0 @@
OxRDF I/O
=========
[![Latest Version](https://img.shields.io/crates/v/oxrdfio.svg)](https://crates.io/crates/oxrdfio)
[![Released API docs](https://docs.rs/oxrdfio/badge.svg)](https://docs.rs/oxrdfio)
[![Crates.io downloads](https://img.shields.io/crates/d/oxrdfio)](https://crates.io/crates/oxrdfio)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community)
OxRDF I/O is a set of parsers and serializers for RDF.
It supports:
* [N3](https://w3c.github.io/N3/spec/) using [`oxttl`](https://crates.io/crates/oxttl)
* [N-Quads](https://www.w3.org/TR/n-quads/) using [`oxttl`](https://crates.io/crates/oxttl)
* [N-Triples](https://www.w3.org/TR/n-triples/) using [`oxttl`](https://crates.io/crates/oxttl)
* [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) using [`oxrdfxml`](https://crates.io/crates/oxrdfxml)
* [TriG](https://www.w3.org/TR/trig/) using [`oxttl`](https://crates.io/crates/oxttl)
* [Turtle](https://www.w3.org/TR/turtle/) using [`oxttl`](https://crates.io/crates/oxttl)
Support for [SPARQL-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html) is also available behind the `rdf-star`feature for [Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star), [TriG-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#trig-star), [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star) and [N-Quads-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-quads-star).
It is designed as a low level parser compatible with both synchronous and asynchronous I/O (behind the `async-tokio` feature).
The entry points of this library are the two [`RdfParser`] and [`RdfSerializer`] structs.
Usage example converting a Turtle file to a N-Triples file:
```rust
use oxrdfio::{RdfFormat, RdfParser, RdfSerializer};
let turtle_file = b"@base <http://example.com/> .
@prefix schema: <http://schema.org/> .
<foo> a schema:Person ;
schema:name \"Foo\" .
<bar> a schema:Person ;
schema:name \"Bar\" .";
let ntriples_file = b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
<http://example.com/foo> <http://schema.org/name> \"Foo\" .
<http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
<http://example.com/bar> <http://schema.org/name> \"Bar\" .
";
let mut writer = RdfSerializer::from_format(RdfFormat::NTriples).serialize_to_write(Vec::new());
for quad in RdfParser::from_format(RdfFormat::Turtle).parse_read(turtle_file.as_ref()) {
writer.write_quad(&quad.unwrap()).unwrap();
}
assert_eq!(writer.finish().unwrap(), ntriples_file);
```
Parsers for other RDF formats exists in Rust like [graph-rdfa-processor](https://github.com/nbittich/graph-rdfa-processor) for RDFa and [json-ld](https://github.com/timothee-haudebourg/json-ld) for JSON-LD.
## License
This project is licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or
`<http://www.apache.org/licenses/LICENSE-2.0>`)
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or
`<http://opensource.org/licenses/MIT>`)
at your option.
### Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.

@ -1,122 +0,0 @@
use std::io;
use std::ops::Range;
/// Error returned during RDF format parsing.
#[derive(Debug, thiserror::Error)]
pub enum RdfParseError {
/// I/O error during parsing (file not found...).
#[error(transparent)]
Io(#[from] io::Error),
/// An error in the file syntax.
#[error(transparent)]
Syntax(#[from] RdfSyntaxError),
}
impl RdfParseError {
pub(crate) fn msg(msg: &'static str) -> Self {
Self::Syntax(RdfSyntaxError(SyntaxErrorKind::Msg(msg)))
}
}
impl From<oxttl::TurtleSyntaxError> for RdfSyntaxError {
#[inline]
fn from(error: oxttl::TurtleSyntaxError) -> Self {
Self(SyntaxErrorKind::Turtle(error))
}
}
impl From<oxttl::TurtleParseError> for RdfParseError {
#[inline]
fn from(error: oxttl::TurtleParseError) -> Self {
match error {
oxttl::TurtleParseError::Syntax(e) => Self::Syntax(e.into()),
oxttl::TurtleParseError::Io(e) => Self::Io(e),
}
}
}
impl From<oxrdfxml::RdfXmlSyntaxError> for RdfSyntaxError {
#[inline]
fn from(error: oxrdfxml::RdfXmlSyntaxError) -> Self {
Self(SyntaxErrorKind::RdfXml(error))
}
}
impl From<oxrdfxml::RdfXmlParseError> for RdfParseError {
#[inline]
fn from(error: oxrdfxml::RdfXmlParseError) -> Self {
match error {
oxrdfxml::RdfXmlParseError::Syntax(e) => Self::Syntax(e.into()),
oxrdfxml::RdfXmlParseError::Io(e) => Self::Io(e),
}
}
}
impl From<RdfParseError> for io::Error {
#[inline]
fn from(error: RdfParseError) -> Self {
match error {
RdfParseError::Io(error) => error,
RdfParseError::Syntax(error) => error.into(),
}
}
}
/// An error in the syntax of the parsed file.
#[derive(Debug, thiserror::Error)]
#[error(transparent)]
pub struct RdfSyntaxError(#[from] SyntaxErrorKind);
/// An error in the syntax of the parsed file.
#[derive(Debug, thiserror::Error)]
enum SyntaxErrorKind {
#[error(transparent)]
Turtle(#[from] oxttl::TurtleSyntaxError),
#[error(transparent)]
RdfXml(#[from] oxrdfxml::RdfXmlSyntaxError),
#[error("{0}")]
Msg(&'static str),
}
impl RdfSyntaxError {
/// The location of the error inside of the file.
#[inline]
pub fn location(&self) -> Option<Range<TextPosition>> {
match &self.0 {
SyntaxErrorKind::Turtle(e) => {
let location = e.location();
Some(
TextPosition {
line: location.start.line,
column: location.start.column,
offset: location.start.offset,
}..TextPosition {
line: location.end.line,
column: location.end.column,
offset: location.end.offset,
},
)
}
SyntaxErrorKind::RdfXml(_) | SyntaxErrorKind::Msg(_) => None,
}
}
}
impl From<RdfSyntaxError> for io::Error {
#[inline]
fn from(error: RdfSyntaxError) -> Self {
match error.0 {
SyntaxErrorKind::Turtle(error) => error.into(),
SyntaxErrorKind::RdfXml(error) => error.into(),
SyntaxErrorKind::Msg(msg) => Self::new(io::ErrorKind::InvalidData, msg),
}
}
}
/// A position in a text i.e. a `line` number starting from 0, a `column` number starting from 0 (in number of code points) and a global file `offset` starting from 0 (in number of bytes).
#[derive(Eq, PartialEq, Debug, Clone, Copy)]
pub struct TextPosition {
pub line: u64,
pub column: u64,
pub offset: u64,
}

@ -1,216 +0,0 @@
use std::fmt;
/// RDF serialization formats.
///
/// This enumeration is non exhaustive. New formats like JSON-LD might be added in the future.
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
#[non_exhaustive]
pub enum RdfFormat {
/// [N3](https://w3c.github.io/N3/spec/)
N3,
/// [N-Quads](https://www.w3.org/TR/n-quads/)
NQuads,
/// [N-Triples](https://www.w3.org/TR/n-triples/)
NTriples,
/// [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/)
RdfXml,
/// [TriG](https://www.w3.org/TR/trig/)
TriG,
/// [Turtle](https://www.w3.org/TR/turtle/)
Turtle,
}
impl RdfFormat {
/// The format canonical IRI according to the [Unique URIs for file formats registry](https://www.w3.org/ns/formats/).
///
/// ```
/// use oxrdfio::RdfFormat;
///
/// assert_eq!(
/// RdfFormat::NTriples.iri(),
/// "http://www.w3.org/ns/formats/N-Triples"
/// )
/// ```
#[inline]
pub const fn iri(self) -> &'static str {
match self {
Self::N3 => "http://www.w3.org/ns/formats/N3",
Self::NQuads => "http://www.w3.org/ns/formats/N-Quads",
Self::NTriples => "http://www.w3.org/ns/formats/N-Triples",
Self::RdfXml => "http://www.w3.org/ns/formats/RDF_XML",
Self::TriG => "http://www.w3.org/ns/formats/TriG",
Self::Turtle => "http://www.w3.org/ns/formats/Turtle",
}
}
/// The format [IANA media type](https://tools.ietf.org/html/rfc2046).
///
/// ```
/// use oxrdfio::RdfFormat;
///
/// assert_eq!(RdfFormat::NTriples.media_type(), "application/n-triples")
/// ```
#[inline]
pub const fn media_type(self) -> &'static str {
match self {
Self::N3 => "text/n3",
Self::NQuads => "application/n-quads",
Self::NTriples => "application/n-triples",
Self::RdfXml => "application/rdf+xml",
Self::TriG => "application/trig",
Self::Turtle => "text/turtle",
}
}
/// The format [IANA-registered](https://tools.ietf.org/html/rfc2046) file extension.
///
/// ```
/// use oxrdfio::RdfFormat;
///
/// assert_eq!(RdfFormat::NTriples.file_extension(), "nt")
/// ```
#[inline]
pub const fn file_extension(self) -> &'static str {
match self {
Self::N3 => "n3",
Self::NQuads => "nq",
Self::NTriples => "nt",
Self::RdfXml => "rdf",
Self::TriG => "trig",
Self::Turtle => "ttl",
}
}
/// The format name.
///
/// ```
/// use oxrdfio::RdfFormat;
///
/// assert_eq!(RdfFormat::NTriples.name(), "N-Triples")
/// ```
#[inline]
pub const fn name(self) -> &'static str {
match self {
Self::N3 => "N3",
Self::NQuads => "N-Quads",
Self::NTriples => "N-Triples",
Self::RdfXml => "RDF/XML",
Self::TriG => "TriG",
Self::Turtle => "Turtle",
}
}
/// Checks if the formats supports [RDF datasets](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) and not only [RDF graphs](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph).
///
/// ```
/// use oxrdfio::RdfFormat;
///
/// assert_eq!(RdfFormat::NTriples.supports_datasets(), false);
/// assert_eq!(RdfFormat::NQuads.supports_datasets(), true);
/// ```
#[inline]
pub const fn supports_datasets(self) -> bool {
matches!(self, Self::NQuads | Self::TriG)
}
/// Checks if the formats supports [RDF-star quoted triples](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#dfn-quoted).
///
/// ```
/// use oxrdfio::RdfFormat;
///
/// assert_eq!(RdfFormat::NTriples.supports_rdf_star(), true);
/// assert_eq!(RdfFormat::RdfXml.supports_rdf_star(), false);
/// ```
#[inline]
#[cfg(feature = "rdf-star")]
pub const fn supports_rdf_star(self) -> bool {
matches!(
self,
Self::NTriples | Self::NQuads | Self::Turtle | Self::TriG
)
}
/// Looks for a known format from a media type.
///
/// It supports some media type aliases.
/// For example, "application/xml" is going to return `RdfFormat::RdfXml` even if it is not its canonical media type.
///
/// Example:
/// ```
/// use oxrdfio::RdfFormat;
///
/// assert_eq!(
/// RdfFormat::from_media_type("text/turtle; charset=utf-8"),
/// Some(RdfFormat::Turtle)
/// )
/// ```
#[inline]
pub fn from_media_type(media_type: &str) -> Option<Self> {
const MEDIA_SUBTYPES: [(&str, RdfFormat); 10] = [
("n-quads", RdfFormat::NQuads),
("n-triples", RdfFormat::NTriples),
("n3", RdfFormat::N3),
("nquads", RdfFormat::NQuads),
("ntriples", RdfFormat::NTriples),
("plain", RdfFormat::NTriples),
("rdf+xml", RdfFormat::RdfXml),
("trig", RdfFormat::TriG),
("turtle", RdfFormat::Turtle),
("xml", RdfFormat::RdfXml),
];
let (r#type, subtype) = media_type
.split_once(';')
.unwrap_or((media_type, ""))
.0
.split_once('/')?;
let r#type = r#type.trim();
if !r#type.eq_ignore_ascii_case("application") && !r#type.eq_ignore_ascii_case("text") {
return None;
}
let subtype = subtype.trim();
let subtype = subtype.strip_prefix("x-").unwrap_or(subtype);
for (candidate_subtype, candidate_id) in MEDIA_SUBTYPES {
if candidate_subtype.eq_ignore_ascii_case(subtype) {
return Some(candidate_id);
}
}
None
}
/// Looks for a known format from an extension.
///
/// It supports some aliases.
///
/// Example:
/// ```
/// use oxrdfio::RdfFormat;
///
/// assert_eq!(RdfFormat::from_extension("nt"), Some(RdfFormat::NTriples))
/// ```
#[inline]
pub fn from_extension(extension: &str) -> Option<Self> {
const MEDIA_TYPES: [(&str, RdfFormat); 8] = [
("n3", RdfFormat::N3),
("nq", RdfFormat::NQuads),
("nt", RdfFormat::NTriples),
("rdf", RdfFormat::RdfXml),
("trig", RdfFormat::TriG),
("ttl", RdfFormat::Turtle),
("txt", RdfFormat::NTriples),
("xml", RdfFormat::RdfXml),
];
for (candidate_extension, candidate_id) in MEDIA_TYPES {
if candidate_extension.eq_ignore_ascii_case(extension) {
return Some(candidate_id);
}
}
None
}
}
impl fmt::Display for RdfFormat {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.name())
}
}

@ -1,19 +0,0 @@
#![doc = include_str!("../README.md")]
#![doc(test(attr(deny(warnings))))]
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
#![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")]
#![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")]
mod error;
mod format;
mod parser;
mod serializer;
pub use error::{RdfParseError, RdfSyntaxError, TextPosition};
pub use format::RdfFormat;
#[cfg(feature = "async-tokio")]
pub use parser::FromTokioAsyncReadQuadReader;
pub use parser::{FromReadQuadReader, RdfParser};
#[cfg(feature = "async-tokio")]
pub use serializer::ToTokioAsyncWriteQuadWriter;
pub use serializer::{RdfSerializer, ToWriteQuadWriter};

@ -1,807 +0,0 @@
//! Utilities to read RDF graphs and datasets.
pub use crate::error::RdfParseError;
use crate::format::RdfFormat;
use oxrdf::{BlankNode, GraphName, IriParseError, Quad, Subject, Term, Triple};
#[cfg(feature = "async-tokio")]
use oxrdfxml::FromTokioAsyncReadRdfXmlReader;
use oxrdfxml::{FromReadRdfXmlReader, RdfXmlParser};
#[cfg(feature = "async-tokio")]
use oxttl::n3::FromTokioAsyncReadN3Reader;
use oxttl::n3::{FromReadN3Reader, N3Parser, N3PrefixesIter, N3Quad, N3Term};
#[cfg(feature = "async-tokio")]
use oxttl::nquads::FromTokioAsyncReadNQuadsReader;
use oxttl::nquads::{FromReadNQuadsReader, NQuadsParser};
#[cfg(feature = "async-tokio")]
use oxttl::ntriples::FromTokioAsyncReadNTriplesReader;
use oxttl::ntriples::{FromReadNTriplesReader, NTriplesParser};
#[cfg(feature = "async-tokio")]
use oxttl::trig::FromTokioAsyncReadTriGReader;
use oxttl::trig::{FromReadTriGReader, TriGParser, TriGPrefixesIter};
#[cfg(feature = "async-tokio")]
use oxttl::turtle::FromTokioAsyncReadTurtleReader;
use oxttl::turtle::{FromReadTurtleReader, TurtleParser, TurtlePrefixesIter};
use std::collections::HashMap;
use std::io::Read;
#[cfg(feature = "async-tokio")]
use tokio::io::AsyncRead;
/// Parsers for RDF serialization formats.
///
/// It currently supports the following formats:
/// * [N3](https://w3c.github.io/N3/spec/) ([`RdfFormat::N3`])
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`RdfFormat::NQuads`])
/// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`RdfFormat::NTriples`])
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`RdfFormat::RdfXml`])
/// * [TriG](https://www.w3.org/TR/trig/) ([`RdfFormat::TriG`])
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`RdfFormat::Turtle`])
///
/// Note the useful options:
/// - [`with_base_iri`](Self::with_base_iri) to resolve the relative IRIs.
/// - [`rename_blank_nodes`](Self::rename_blank_nodes) to rename the blank nodes to auto-generated numbers to avoid conflicts when merging RDF graphs together.
/// - [`without_named_graphs`](Self::without_named_graphs) to parse a single graph.
/// - [`unchecked`](Self::unchecked) to skip some validations if the file is already known to be valid.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
/// let quads = parser
/// .parse_read(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(quads.len(), 1);
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[must_use]
pub struct RdfParser {
inner: RdfParserKind,
default_graph: GraphName,
without_named_graphs: bool,
rename_blank_nodes: bool,
}
enum RdfParserKind {
N3(N3Parser),
NQuads(NQuadsParser),
NTriples(NTriplesParser),
RdfXml(RdfXmlParser),
TriG(TriGParser),
Turtle(TurtleParser),
}
impl RdfParser {
/// Builds a parser for the given format.
#[inline]
pub fn from_format(format: RdfFormat) -> Self {
Self {
inner: match format {
RdfFormat::N3 => RdfParserKind::N3(N3Parser::new()),
RdfFormat::NQuads => RdfParserKind::NQuads({
#[cfg(feature = "rdf-star")]
{
NQuadsParser::new().with_quoted_triples()
}
#[cfg(not(feature = "rdf-star"))]
{
NQuadsParser::new()
}
}),
RdfFormat::NTriples => RdfParserKind::NTriples({
#[cfg(feature = "rdf-star")]
{
NTriplesParser::new().with_quoted_triples()
}
#[cfg(not(feature = "rdf-star"))]
{
NTriplesParser::new()
}
}),
RdfFormat::RdfXml => RdfParserKind::RdfXml(RdfXmlParser::new()),
RdfFormat::TriG => RdfParserKind::TriG({
#[cfg(feature = "rdf-star")]
{
TriGParser::new().with_quoted_triples()
}
#[cfg(not(feature = "rdf-star"))]
{
TriGParser::new()
}
}),
RdfFormat::Turtle => RdfParserKind::Turtle({
#[cfg(feature = "rdf-star")]
{
TurtleParser::new().with_quoted_triples()
}
#[cfg(not(feature = "rdf-star"))]
{
TurtleParser::new()
}
}),
},
default_graph: GraphName::DefaultGraph,
without_named_graphs: false,
rename_blank_nodes: false,
}
}
/// The format the parser uses.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// assert_eq!(
/// RdfParser::from_format(RdfFormat::Turtle).format(),
/// RdfFormat::Turtle
/// );
/// ```
pub fn format(&self) -> RdfFormat {
match &self.inner {
RdfParserKind::N3(_) => RdfFormat::N3,
RdfParserKind::NQuads(_) => RdfFormat::NQuads,
RdfParserKind::NTriples(_) => RdfFormat::NTriples,
RdfParserKind::RdfXml(_) => RdfFormat::RdfXml,
RdfParserKind::TriG(_) => RdfFormat::TriG,
RdfParserKind::Turtle(_) => RdfFormat::Turtle,
}
}
/// Provides an IRI that could be used to resolve the file relative IRIs.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = "</s> </p> </o> .";
///
/// let parser = RdfParser::from_format(RdfFormat::Turtle).with_base_iri("http://example.com")?;
/// let quads = parser
/// .parse_read(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(quads.len(), 1);
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
self.inner = match self.inner {
RdfParserKind::N3(p) => RdfParserKind::N3(p),
RdfParserKind::NTriples(p) => RdfParserKind::NTriples(p),
RdfParserKind::NQuads(p) => RdfParserKind::NQuads(p),
RdfParserKind::RdfXml(p) => RdfParserKind::RdfXml(p.with_base_iri(base_iri)?),
RdfParserKind::TriG(p) => RdfParserKind::TriG(p.with_base_iri(base_iri)?),
RdfParserKind::Turtle(p) => RdfParserKind::Turtle(p.with_base_iri(base_iri)?),
};
Ok(self)
}
/// Provides the name graph name that should replace the default graph in the returned quads.
///
/// ```
/// use oxrdf::NamedNode;
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = RdfParser::from_format(RdfFormat::Turtle)
/// .with_default_graph(NamedNode::new("http://example.com/g")?);
/// let quads = parser
/// .parse_read(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(quads.len(), 1);
/// assert_eq!(quads[0].graph_name.to_string(), "<http://example.com/g>");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn with_default_graph(mut self, default_graph: impl Into<GraphName>) -> Self {
self.default_graph = default_graph.into();
self
}
/// Sets that the parser must fail if parsing a named graph.
///
/// This function restricts the parser to only parse a single [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) and not an [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset).
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .";
///
/// let parser = RdfParser::from_format(RdfFormat::NQuads).without_named_graphs();
/// assert!(parser.parse_read(file.as_bytes()).next().unwrap().is_err());
/// ```
#[inline]
pub fn without_named_graphs(mut self) -> Self {
self.without_named_graphs = true;
self
}
/// Renames the blank nodes ids from the ones set in the serialization to random ids.
///
/// This allows to avoid id conflicts when merging graphs together.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = "_:a <http://example.com/p> <http://example.com/o> .";
///
/// let result1 = RdfParser::from_format(RdfFormat::NQuads)
/// .rename_blank_nodes()
/// .parse_read(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
/// let result2 = RdfParser::from_format(RdfFormat::NQuads)
/// .rename_blank_nodes()
/// .parse_read(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
/// assert_ne!(result1, result2);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn rename_blank_nodes(mut self) -> Self {
self.rename_blank_nodes = true;
self
}
/// Assumes the file is valid to make parsing faster.
///
/// It will skip some validations.
///
/// Note that if the file is actually not valid, then broken RDF might be emitted by the parser.
#[inline]
pub fn unchecked(mut self) -> Self {
self.inner = match self.inner {
RdfParserKind::N3(p) => RdfParserKind::N3(p.unchecked()),
RdfParserKind::NTriples(p) => RdfParserKind::NTriples(p.unchecked()),
RdfParserKind::NQuads(p) => RdfParserKind::NQuads(p.unchecked()),
RdfParserKind::RdfXml(p) => RdfParserKind::RdfXml(p.unchecked()),
RdfParserKind::TriG(p) => RdfParserKind::TriG(p.unchecked()),
RdfParserKind::Turtle(p) => RdfParserKind::Turtle(p.unchecked()),
};
self
}
/// Parses from a [`Read`] implementation and returns an iterator of quads.
///
/// Reads are buffered.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
/// let quads = parser
/// .parse_read(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(quads.len(), 1);
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
pub fn parse_read<R: Read>(self, reader: R) -> FromReadQuadReader<R> {
FromReadQuadReader {
parser: match self.inner {
RdfParserKind::N3(p) => FromReadQuadReaderKind::N3(p.parse_read(reader)),
RdfParserKind::NQuads(p) => FromReadQuadReaderKind::NQuads(p.parse_read(reader)),
RdfParserKind::NTriples(p) => {
FromReadQuadReaderKind::NTriples(p.parse_read(reader))
}
RdfParserKind::RdfXml(p) => FromReadQuadReaderKind::RdfXml(p.parse_read(reader)),
RdfParserKind::TriG(p) => FromReadQuadReaderKind::TriG(p.parse_read(reader)),
RdfParserKind::Turtle(p) => FromReadQuadReaderKind::Turtle(p.parse_read(reader)),
},
mapper: QuadMapper {
default_graph: self.default_graph.clone(),
without_named_graphs: self.without_named_graphs,
blank_node_map: self.rename_blank_nodes.then(HashMap::new),
},
}
}
/// Parses from a Tokio [`AsyncRead`] implementation and returns an async iterator of quads.
///
/// Reads are buffered.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> Result<(), oxrdfio::RdfParseError> {
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
/// let mut reader = parser.parse_tokio_async_read(file.as_bytes());
/// if let Some(quad) = reader.next().await {
/// assert_eq!(quad?.subject.to_string(), "<http://example.com/s>");
/// }
/// # Ok(())
/// # }
/// ```
#[cfg(feature = "async-tokio")]
pub fn parse_tokio_async_read<R: AsyncRead + Unpin>(
self,
reader: R,
) -> FromTokioAsyncReadQuadReader<R> {
FromTokioAsyncReadQuadReader {
parser: match self.inner {
RdfParserKind::N3(p) => {
FromTokioAsyncReadQuadReaderKind::N3(p.parse_tokio_async_read(reader))
}
RdfParserKind::NQuads(p) => {
FromTokioAsyncReadQuadReaderKind::NQuads(p.parse_tokio_async_read(reader))
}
RdfParserKind::NTriples(p) => {
FromTokioAsyncReadQuadReaderKind::NTriples(p.parse_tokio_async_read(reader))
}
RdfParserKind::RdfXml(p) => {
FromTokioAsyncReadQuadReaderKind::RdfXml(p.parse_tokio_async_read(reader))
}
RdfParserKind::TriG(p) => {
FromTokioAsyncReadQuadReaderKind::TriG(p.parse_tokio_async_read(reader))
}
RdfParserKind::Turtle(p) => {
FromTokioAsyncReadQuadReaderKind::Turtle(p.parse_tokio_async_read(reader))
}
},
mapper: QuadMapper {
default_graph: self.default_graph.clone(),
without_named_graphs: self.without_named_graphs,
blank_node_map: self.rename_blank_nodes.then(HashMap::new),
},
}
}
}
impl From<RdfFormat> for RdfParser {
fn from(format: RdfFormat) -> Self {
Self::from_format(format)
}
}
/// Parses a RDF file from a [`Read`] implementation. Can be built using [`RdfParser::parse_read`].
///
/// Reads are buffered.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
/// let quads = parser
/// .parse_read(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(quads.len(), 1);
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[must_use]
pub struct FromReadQuadReader<R: Read> {
parser: FromReadQuadReaderKind<R>,
mapper: QuadMapper,
}
enum FromReadQuadReaderKind<R: Read> {
N3(FromReadN3Reader<R>),
NQuads(FromReadNQuadsReader<R>),
NTriples(FromReadNTriplesReader<R>),
RdfXml(FromReadRdfXmlReader<R>),
TriG(FromReadTriGReader<R>),
Turtle(FromReadTurtleReader<R>),
}
impl<R: Read> Iterator for FromReadQuadReader<R> {
type Item = Result<Quad, RdfParseError>;
fn next(&mut self) -> Option<Self::Item> {
Some(match &mut self.parser {
FromReadQuadReaderKind::N3(parser) => match parser.next()? {
Ok(quad) => self.mapper.map_n3_quad(quad),
Err(e) => Err(e.into()),
},
FromReadQuadReaderKind::NQuads(parser) => match parser.next()? {
Ok(quad) => self.mapper.map_quad(quad),
Err(e) => Err(e.into()),
},
FromReadQuadReaderKind::NTriples(parser) => match parser.next()? {
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
Err(e) => Err(e.into()),
},
FromReadQuadReaderKind::RdfXml(parser) => match parser.next()? {
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
Err(e) => Err(e.into()),
},
FromReadQuadReaderKind::TriG(parser) => match parser.next()? {
Ok(quad) => self.mapper.map_quad(quad),
Err(e) => Err(e.into()),
},
FromReadQuadReaderKind::Turtle(parser) => match parser.next()? {
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
Err(e) => Err(e.into()),
},
})
}
}
impl<R: Read> FromReadQuadReader<R> {
/// The list of IRI prefixes considered at the current step of the parsing.
///
/// This method returns (prefix name, prefix value) tuples.
/// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
/// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
///
/// An empty iterator is return if the format does not support prefixes.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = br#"@base <http://example.com/> .
/// @prefix schema: <http://schema.org/> .
/// <foo> a schema:Person ;
/// schema:name "Foo" ."#;
///
/// let mut reader = RdfParser::from_format(RdfFormat::Turtle).parse_read(file.as_slice());
/// assert!(reader.prefixes().collect::<Vec<_>>().is_empty()); // No prefix at the beginning
///
/// reader.next().unwrap()?; // We read the first triple
/// assert_eq!(
/// reader.prefixes().collect::<Vec<_>>(),
/// [("schema", "http://schema.org/")]
/// ); // There are now prefixes
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn prefixes(&self) -> PrefixesIter<'_> {
PrefixesIter {
inner: match &self.parser {
FromReadQuadReaderKind::N3(p) => PrefixesIterKind::N3(p.prefixes()),
FromReadQuadReaderKind::TriG(p) => PrefixesIterKind::TriG(p.prefixes()),
FromReadQuadReaderKind::Turtle(p) => PrefixesIterKind::Turtle(p.prefixes()),
FromReadQuadReaderKind::NQuads(_)
| FromReadQuadReaderKind::NTriples(_)
| FromReadQuadReaderKind::RdfXml(_) => PrefixesIterKind::None, /* TODO: implement for RDF/XML */
},
}
}
/// The base IRI considered at the current step of the parsing.
///
/// `None` is returned if no base IRI is set or the format does not support base IRIs.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = br#"@base <http://example.com/> .
/// @prefix schema: <http://schema.org/> .
/// <foo> a schema:Person ;
/// schema:name "Foo" ."#;
///
/// let mut reader = RdfParser::from_format(RdfFormat::Turtle).parse_read(file.as_slice());
/// assert!(reader.base_iri().is_none()); // No base at the beginning because none has been given to the parser.
///
/// reader.next().unwrap()?; // We read the first triple
/// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI.
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn base_iri(&self) -> Option<&str> {
match &self.parser {
FromReadQuadReaderKind::N3(p) => p.base_iri(),
FromReadQuadReaderKind::TriG(p) => p.base_iri(),
FromReadQuadReaderKind::Turtle(p) => p.base_iri(),
FromReadQuadReaderKind::NQuads(_)
| FromReadQuadReaderKind::NTriples(_)
| FromReadQuadReaderKind::RdfXml(_) => None, // TODO: implement for RDF/XML
}
}
}
/// Parses a RDF file from a Tokio [`AsyncRead`] implementation. Can be built using [`RdfParser::parse_tokio_async_read`].
///
/// Reads are buffered.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> Result<(), oxrdfio::RdfParseError> {
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
/// let mut reader = parser.parse_tokio_async_read(file.as_bytes());
/// if let Some(quad) = reader.next().await {
/// assert_eq!(quad?.subject.to_string(), "<http://example.com/s>");
/// }
/// # Ok(())
/// # }
/// ```
#[must_use]
#[cfg(feature = "async-tokio")]
pub struct FromTokioAsyncReadQuadReader<R: AsyncRead + Unpin> {
parser: FromTokioAsyncReadQuadReaderKind<R>,
mapper: QuadMapper,
}
#[cfg(feature = "async-tokio")]
enum FromTokioAsyncReadQuadReaderKind<R: AsyncRead + Unpin> {
N3(FromTokioAsyncReadN3Reader<R>),
NQuads(FromTokioAsyncReadNQuadsReader<R>),
NTriples(FromTokioAsyncReadNTriplesReader<R>),
RdfXml(FromTokioAsyncReadRdfXmlReader<R>),
TriG(FromTokioAsyncReadTriGReader<R>),
Turtle(FromTokioAsyncReadTurtleReader<R>),
}
#[cfg(feature = "async-tokio")]
impl<R: AsyncRead + Unpin> FromTokioAsyncReadQuadReader<R> {
pub async fn next(&mut self) -> Option<Result<Quad, RdfParseError>> {
Some(match &mut self.parser {
FromTokioAsyncReadQuadReaderKind::N3(parser) => match parser.next().await? {
Ok(quad) => self.mapper.map_n3_quad(quad),
Err(e) => Err(e.into()),
},
FromTokioAsyncReadQuadReaderKind::NQuads(parser) => match parser.next().await? {
Ok(quad) => self.mapper.map_quad(quad),
Err(e) => Err(e.into()),
},
FromTokioAsyncReadQuadReaderKind::NTriples(parser) => match parser.next().await? {
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
Err(e) => Err(e.into()),
},
FromTokioAsyncReadQuadReaderKind::RdfXml(parser) => match parser.next().await? {
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
Err(e) => Err(e.into()),
},
FromTokioAsyncReadQuadReaderKind::TriG(parser) => match parser.next().await? {
Ok(quad) => self.mapper.map_quad(quad),
Err(e) => Err(e.into()),
},
FromTokioAsyncReadQuadReaderKind::Turtle(parser) => match parser.next().await? {
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
Err(e) => Err(e.into()),
},
})
}
/// The list of IRI prefixes considered at the current step of the parsing.
///
/// This method returns (prefix name, prefix value) tuples.
/// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
/// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
///
/// An empty iterator is return if the format does not support prefixes.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> Result<(), oxttl::TurtleParseError> {
/// let file = br#"@base <http://example.com/> .
/// @prefix schema: <http://schema.org/> .
/// <foo> a schema:Person ;
/// schema:name "Foo" ."#;
///
/// let mut reader = RdfParser::from_format(RdfFormat::Turtle).parse_read(file.as_slice());
/// assert_eq!(reader.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning
///
/// reader.next().await.unwrap()?; // We read the first triple
/// assert_eq!(
/// reader.prefixes().collect::<Vec<_>>(),
/// [("schema", "http://schema.org/")]
/// ); // There are now prefixes
/// # Ok(())
/// # }
/// ```
pub fn prefixes(&self) -> PrefixesIter<'_> {
PrefixesIter {
inner: match &self.parser {
FromTokioAsyncReadQuadReaderKind::N3(p) => PrefixesIterKind::N3(p.prefixes()),
FromTokioAsyncReadQuadReaderKind::TriG(p) => PrefixesIterKind::TriG(p.prefixes()),
FromTokioAsyncReadQuadReaderKind::Turtle(p) => {
PrefixesIterKind::Turtle(p.prefixes())
}
FromTokioAsyncReadQuadReaderKind::NQuads(_)
| FromTokioAsyncReadQuadReaderKind::NTriples(_)
| FromTokioAsyncReadQuadReaderKind::RdfXml(_) => PrefixesIterKind::None, /* TODO: implement for RDF/XML */
},
}
}
/// The base IRI considered at the current step of the parsing.
///
/// `None` is returned if no base IRI is set or the format does not support base IRIs.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> Result<(), oxttl::TurtleParseError> {
/// let file = br#"@base <http://example.com/> .
/// @prefix schema: <http://schema.org/> .
/// <foo> a schema:Person ;
/// schema:name "Foo" ."#;
///
/// let mut reader =
/// RdfParser::from_format(RdfFormat::Turtle).parse_tokio_async_read(file.as_slice());
/// assert!(reader.base_iri().is_none()); // No base IRI at the beginning
///
/// reader.next().await.unwrap()?; // We read the first triple
/// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI
/// # Ok(())
/// # }
/// ```
pub fn base_iri(&self) -> Option<&str> {
match &self.parser {
FromTokioAsyncReadQuadReaderKind::N3(p) => p.base_iri(),
FromTokioAsyncReadQuadReaderKind::TriG(p) => p.base_iri(),
FromTokioAsyncReadQuadReaderKind::Turtle(p) => p.base_iri(),
FromTokioAsyncReadQuadReaderKind::NQuads(_)
| FromTokioAsyncReadQuadReaderKind::NTriples(_)
| FromTokioAsyncReadQuadReaderKind::RdfXml(_) => None, // TODO: implement for RDF/XML
}
}
}
/// Iterator on the file prefixes.
///
/// See [`FromReadQuadReader::prefixes`].
pub struct PrefixesIter<'a> {
inner: PrefixesIterKind<'a>,
}
enum PrefixesIterKind<'a> {
Turtle(TurtlePrefixesIter<'a>),
TriG(TriGPrefixesIter<'a>),
N3(N3PrefixesIter<'a>),
None,
}
impl<'a> Iterator for PrefixesIter<'a> {
type Item = (&'a str, &'a str);
#[inline]
fn next(&mut self) -> Option<Self::Item> {
match &mut self.inner {
PrefixesIterKind::Turtle(iter) => iter.next(),
PrefixesIterKind::TriG(iter) => iter.next(),
PrefixesIterKind::N3(iter) => iter.next(),
PrefixesIterKind::None => None,
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
match &self.inner {
PrefixesIterKind::Turtle(iter) => iter.size_hint(),
PrefixesIterKind::TriG(iter) => iter.size_hint(),
PrefixesIterKind::N3(iter) => iter.size_hint(),
PrefixesIterKind::None => (0, Some(0)),
}
}
}
struct QuadMapper {
default_graph: GraphName,
without_named_graphs: bool,
blank_node_map: Option<HashMap<BlankNode, BlankNode>>,
}
impl QuadMapper {
fn map_blank_node(&mut self, node: BlankNode) -> BlankNode {
if let Some(blank_node_map) = &mut self.blank_node_map {
blank_node_map
.entry(node)
.or_insert_with(BlankNode::default)
.clone()
} else {
node
}
}
fn map_subject(&mut self, node: Subject) -> Subject {
match node {
Subject::NamedNode(node) => node.into(),
Subject::BlankNode(node) => self.map_blank_node(node).into(),
#[cfg(feature = "rdf-star")]
Subject::Triple(triple) => self.map_triple(*triple).into(),
}
}
fn map_term(&mut self, node: Term) -> Term {
match node {
Term::NamedNode(node) => node.into(),
Term::BlankNode(node) => self.map_blank_node(node).into(),
Term::Literal(literal) => literal.into(),
#[cfg(feature = "rdf-star")]
Term::Triple(triple) => self.map_triple(*triple).into(),
}
}
fn map_triple(&mut self, triple: Triple) -> Triple {
Triple {
subject: self.map_subject(triple.subject),
predicate: triple.predicate,
object: self.map_term(triple.object),
}
}
fn map_graph_name(&mut self, graph_name: GraphName) -> Result<GraphName, RdfParseError> {
match graph_name {
GraphName::NamedNode(node) => {
if self.without_named_graphs {
Err(RdfParseError::msg("Named graphs are not allowed"))
} else {
Ok(node.into())
}
}
GraphName::BlankNode(node) => {
if self.without_named_graphs {
Err(RdfParseError::msg("Named graphs are not allowed"))
} else {
Ok(self.map_blank_node(node).into())
}
}
GraphName::DefaultGraph => Ok(self.default_graph.clone()),
}
}
fn map_quad(&mut self, quad: Quad) -> Result<Quad, RdfParseError> {
Ok(Quad {
subject: self.map_subject(quad.subject),
predicate: quad.predicate,
object: self.map_term(quad.object),
graph_name: self.map_graph_name(quad.graph_name)?,
})
}
fn map_triple_to_quad(&mut self, triple: Triple) -> Quad {
self.map_triple(triple).in_graph(self.default_graph.clone())
}
fn map_n3_quad(&mut self, quad: N3Quad) -> Result<Quad, RdfParseError> {
Ok(Quad {
subject: match quad.subject {
N3Term::NamedNode(s) => Ok(s.into()),
N3Term::BlankNode(s) => Ok(self.map_blank_node(s).into()),
N3Term::Literal(_) => Err(RdfParseError::msg(
"literals are not allowed in regular RDF subjects",
)),
#[cfg(feature = "rdf-star")]
N3Term::Triple(s) => Ok(self.map_triple(*s).into()),
N3Term::Variable(_) => Err(RdfParseError::msg(
"variables are not allowed in regular RDF subjects",
)),
}?,
predicate: match quad.predicate {
N3Term::NamedNode(p) => Ok(p),
N3Term::BlankNode(_) => Err(RdfParseError::msg(
"blank nodes are not allowed in regular RDF predicates",
)),
N3Term::Literal(_) => Err(RdfParseError::msg(
"literals are not allowed in regular RDF predicates",
)),
#[cfg(feature = "rdf-star")]
N3Term::Triple(_) => Err(RdfParseError::msg(
"quoted triples are not allowed in regular RDF predicates",
)),
N3Term::Variable(_) => Err(RdfParseError::msg(
"variables are not allowed in regular RDF predicates",
)),
}?,
object: match quad.object {
N3Term::NamedNode(o) => Ok(o.into()),
N3Term::BlankNode(o) => Ok(self.map_blank_node(o).into()),
N3Term::Literal(o) => Ok(o.into()),
#[cfg(feature = "rdf-star")]
N3Term::Triple(o) => Ok(self.map_triple(*o).into()),
N3Term::Variable(_) => Err(RdfParseError::msg(
"variables are not allowed in regular RDF objects",
)),
}?,
graph_name: self.map_graph_name(quad.graph_name)?,
})
}
}

@ -1,410 +0,0 @@
//! Utilities to write RDF graphs and datasets.
use crate::format::RdfFormat;
use oxrdf::{GraphNameRef, IriParseError, QuadRef, TripleRef};
#[cfg(feature = "async-tokio")]
use oxrdfxml::ToTokioAsyncWriteRdfXmlWriter;
use oxrdfxml::{RdfXmlSerializer, ToWriteRdfXmlWriter};
#[cfg(feature = "async-tokio")]
use oxttl::nquads::ToTokioAsyncWriteNQuadsWriter;
use oxttl::nquads::{NQuadsSerializer, ToWriteNQuadsWriter};
#[cfg(feature = "async-tokio")]
use oxttl::ntriples::ToTokioAsyncWriteNTriplesWriter;
use oxttl::ntriples::{NTriplesSerializer, ToWriteNTriplesWriter};
#[cfg(feature = "async-tokio")]
use oxttl::trig::ToTokioAsyncWriteTriGWriter;
use oxttl::trig::{ToWriteTriGWriter, TriGSerializer};
#[cfg(feature = "async-tokio")]
use oxttl::turtle::ToTokioAsyncWriteTurtleWriter;
use oxttl::turtle::{ToWriteTurtleWriter, TurtleSerializer};
use std::io::{self, Write};
#[cfg(feature = "async-tokio")]
use tokio::io::AsyncWrite;
/// A serializer for RDF serialization formats.
///
/// It currently supports the following formats:
/// * [N3](https://w3c.github.io/N3/spec/) ([`RdfFormat::N3`])
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`RdfFormat::NQuads`])
/// * [canonical](https://www.w3.org/TR/n-triples/#canonical-ntriples) [N-Triples](https://www.w3.org/TR/n-triples/) ([`RdfFormat::NTriples`])
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`RdfFormat::RdfXml`])
/// * [TriG](https://www.w3.org/TR/trig/) ([`RdfFormat::TriG`])
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`RdfFormat::Turtle`])
///
/// ```
/// use oxrdfio::{RdfFormat, RdfSerializer};
/// use oxrdf::{Quad, NamedNode};
///
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(Vec::new());
/// writer.write_quad(&Quad {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// graph_name: NamedNode::new("http://example.com/g")?.into()
/// })?;
/// assert_eq!(writer.finish()?, b"<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[must_use]
pub struct RdfSerializer {
inner: RdfSerializerKind,
}
enum RdfSerializerKind {
NQuads(NQuadsSerializer),
NTriples(NTriplesSerializer),
RdfXml(RdfXmlSerializer),
TriG(TriGSerializer),
Turtle(TurtleSerializer),
}
impl RdfSerializer {
/// Builds a serializer for the given format
#[inline]
pub fn from_format(format: RdfFormat) -> Self {
Self {
inner: match format {
RdfFormat::NQuads => RdfSerializerKind::NQuads(NQuadsSerializer::new()),
RdfFormat::NTriples => RdfSerializerKind::NTriples(NTriplesSerializer::new()),
RdfFormat::RdfXml => RdfSerializerKind::RdfXml(RdfXmlSerializer::new()),
RdfFormat::TriG => RdfSerializerKind::TriG(TriGSerializer::new()),
RdfFormat::Turtle | RdfFormat::N3 => {
RdfSerializerKind::Turtle(TurtleSerializer::new())
}
},
}
}
/// The format the serializer serializes to.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfSerializer};
///
/// assert_eq!(
/// RdfSerializer::from_format(RdfFormat::Turtle).format(),
/// RdfFormat::Turtle
/// );
/// ```
pub fn format(&self) -> RdfFormat {
match &self.inner {
RdfSerializerKind::NQuads(_) => RdfFormat::NQuads,
RdfSerializerKind::NTriples(_) => RdfFormat::NTriples,
RdfSerializerKind::RdfXml(_) => RdfFormat::RdfXml,
RdfSerializerKind::TriG(_) => RdfFormat::TriG,
RdfSerializerKind::Turtle(_) => RdfFormat::Turtle,
}
}
/// If the format supports it, sets a prefix.
///
/// ```
/// use oxrdf::vocab::rdf;
/// use oxrdf::{NamedNodeRef, TripleRef};
/// use oxrdfio::{RdfFormat, RdfSerializer};
///
/// let mut writer = RdfSerializer::from_format(RdfFormat::Turtle)
/// .with_prefix("schema", "http://schema.org/")?
/// .serialize_to_write(Vec::new());
/// writer.write_triple(TripleRef {
/// subject: NamedNodeRef::new("http://example.com/s")?.into(),
/// predicate: rdf::TYPE.into(),
/// object: NamedNodeRef::new("http://schema.org/Person")?.into(),
/// })?;
/// assert_eq!(
/// writer.finish()?,
/// b"@prefix schema: <http://schema.org/> .\n<http://example.com/s> a schema:Person .\n"
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn with_prefix(
mut self,
prefix_name: impl Into<String>,
prefix_iri: impl Into<String>,
) -> Result<Self, IriParseError> {
self.inner = match self.inner {
RdfSerializerKind::NQuads(s) => RdfSerializerKind::NQuads(s),
RdfSerializerKind::NTriples(s) => RdfSerializerKind::NTriples(s),
RdfSerializerKind::RdfXml(s) => {
RdfSerializerKind::RdfXml(s.with_prefix(prefix_name, prefix_iri)?)
}
RdfSerializerKind::TriG(s) => {
RdfSerializerKind::TriG(s.with_prefix(prefix_name, prefix_iri)?)
}
RdfSerializerKind::Turtle(s) => {
RdfSerializerKind::Turtle(s.with_prefix(prefix_name, prefix_iri)?)
}
};
Ok(self)
}
/// Writes to a [`Write`] implementation.
///
/// <div class="warning">
///
/// Do not forget to run the [`finish`](ToWriteQuadWriter::finish()) method to properly write the last bytes of the file.</div>
///
/// <div class="warning">
///
/// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div>
///
/// ```
/// use oxrdfio::{RdfFormat, RdfSerializer};
/// use oxrdf::{Quad, NamedNode};
///
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(Vec::new());
/// writer.write_quad(&Quad {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// graph_name: NamedNode::new("http://example.com/g")?.into()
/// })?;
/// assert_eq!(writer.finish()?, b"<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn serialize_to_write<W: Write>(self, write: W) -> ToWriteQuadWriter<W> {
ToWriteQuadWriter {
formatter: match self.inner {
RdfSerializerKind::NQuads(s) => {
ToWriteQuadWriterKind::NQuads(s.serialize_to_write(write))
}
RdfSerializerKind::NTriples(s) => {
ToWriteQuadWriterKind::NTriples(s.serialize_to_write(write))
}
RdfSerializerKind::RdfXml(s) => {
ToWriteQuadWriterKind::RdfXml(s.serialize_to_write(write))
}
RdfSerializerKind::TriG(s) => {
ToWriteQuadWriterKind::TriG(s.serialize_to_write(write))
}
RdfSerializerKind::Turtle(s) => {
ToWriteQuadWriterKind::Turtle(s.serialize_to_write(write))
}
},
}
}
/// Writes to a Tokio [`AsyncWrite`] implementation.
///
/// <div class="warning">
///
/// Do not forget to run the [`finish`](ToTokioAsyncWriteQuadWriter::finish()) method to properly write the last bytes of the file.</div>
///
/// <div class="warning">
///
/// This writer does unbuffered writes. You might want to use [`BufWriter`](tokio::io::BufWriter) to avoid that.</div>
///
/// ```
/// use oxrdfio::{RdfFormat, RdfSerializer};
/// use oxrdf::{Quad, NamedNode};
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> std::io::Result<()> {
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_tokio_async_write(Vec::new());
/// writer.write_quad(&Quad {
/// subject: NamedNode::new_unchecked("http://example.com/s").into(),
/// predicate: NamedNode::new_unchecked("http://example.com/p"),
/// object: NamedNode::new_unchecked("http://example.com/o").into(),
/// graph_name: NamedNode::new_unchecked("http://example.com/g").into()
/// }).await?;
/// assert_eq!(writer.finish().await?, "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
/// # Ok(())
/// # }
/// ```
#[cfg(feature = "async-tokio")]
pub fn serialize_to_tokio_async_write<W: AsyncWrite + Unpin>(
self,
write: W,
) -> ToTokioAsyncWriteQuadWriter<W> {
ToTokioAsyncWriteQuadWriter {
formatter: match self.inner {
RdfSerializerKind::NQuads(s) => {
ToTokioAsyncWriteQuadWriterKind::NQuads(s.serialize_to_tokio_async_write(write))
}
RdfSerializerKind::NTriples(s) => ToTokioAsyncWriteQuadWriterKind::NTriples(
s.serialize_to_tokio_async_write(write),
),
RdfSerializerKind::RdfXml(s) => {
ToTokioAsyncWriteQuadWriterKind::RdfXml(s.serialize_to_tokio_async_write(write))
}
RdfSerializerKind::TriG(s) => {
ToTokioAsyncWriteQuadWriterKind::TriG(s.serialize_to_tokio_async_write(write))
}
RdfSerializerKind::Turtle(s) => {
ToTokioAsyncWriteQuadWriterKind::Turtle(s.serialize_to_tokio_async_write(write))
}
},
}
}
}
impl From<RdfFormat> for RdfSerializer {
fn from(format: RdfFormat) -> Self {
Self::from_format(format)
}
}
/// Writes quads or triples to a [`Write`] implementation.
///
/// Can be built using [`RdfSerializer::serialize_to_write`].
///
/// <div class="warning">
///
/// Do not forget to run the [`finish`](ToWriteQuadWriter::finish()) method to properly write the last bytes of the file.</div>
///
/// <div class="warning">
///
/// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div>
///
/// ```
/// use oxrdfio::{RdfFormat, RdfSerializer};
/// use oxrdf::{Quad, NamedNode};
///
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(Vec::new());
/// writer.write_quad(&Quad {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// graph_name: NamedNode::new("http://example.com/g")?.into(),
/// })?;
/// assert_eq!(writer.finish()?, b"<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[must_use]
pub struct ToWriteQuadWriter<W: Write> {
formatter: ToWriteQuadWriterKind<W>,
}
enum ToWriteQuadWriterKind<W: Write> {
NQuads(ToWriteNQuadsWriter<W>),
NTriples(ToWriteNTriplesWriter<W>),
RdfXml(ToWriteRdfXmlWriter<W>),
TriG(ToWriteTriGWriter<W>),
Turtle(ToWriteTurtleWriter<W>),
}
impl<W: Write> ToWriteQuadWriter<W> {
/// Writes a [`QuadRef`]
pub fn write_quad<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> io::Result<()> {
match &mut self.formatter {
ToWriteQuadWriterKind::NQuads(writer) => writer.write_quad(quad),
ToWriteQuadWriterKind::NTriples(writer) => writer.write_triple(to_triple(quad)?),
ToWriteQuadWriterKind::RdfXml(writer) => writer.write_triple(to_triple(quad)?),
ToWriteQuadWriterKind::TriG(writer) => writer.write_quad(quad),
ToWriteQuadWriterKind::Turtle(writer) => writer.write_triple(to_triple(quad)?),
}
}
/// Writes a [`TripleRef`]
pub fn write_triple<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> io::Result<()> {
self.write_quad(triple.into().in_graph(GraphNameRef::DefaultGraph))
}
/// Writes the last bytes of the file
///
/// Note that this function does not flush the writer. You need to do that if you are using a [`BufWriter`](io::BufWriter).
pub fn finish(self) -> io::Result<W> {
Ok(match self.formatter {
ToWriteQuadWriterKind::NQuads(writer) => writer.finish(),
ToWriteQuadWriterKind::NTriples(writer) => writer.finish(),
ToWriteQuadWriterKind::RdfXml(writer) => writer.finish()?,
ToWriteQuadWriterKind::TriG(writer) => writer.finish()?,
ToWriteQuadWriterKind::Turtle(writer) => writer.finish()?,
})
}
}
/// Writes quads or triples to a [`Write`] implementation.
///
/// Can be built using [`RdfSerializer::serialize_to_write`].
///
/// <div class="warning">
///
/// Do not forget to run the [`finish`](ToWriteQuadWriter::finish()) method to properly write the last bytes of the file.</div>
///
/// <div class="warning">
///
/// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div>
///
/// ```
/// use oxrdfio::{RdfFormat, RdfSerializer};
/// use oxrdf::{Quad, NamedNode};
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> std::io::Result<()> {
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_tokio_async_write(Vec::new());
/// writer.write_quad(&Quad {
/// subject: NamedNode::new_unchecked("http://example.com/s").into(),
/// predicate: NamedNode::new_unchecked("http://example.com/p"),
/// object: NamedNode::new_unchecked("http://example.com/o").into(),
/// graph_name: NamedNode::new_unchecked("http://example.com/g").into()
/// }).await?;
/// assert_eq!(writer.finish().await?, "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
/// # Ok(())
/// # }
/// ```
#[must_use]
#[cfg(feature = "async-tokio")]
pub struct ToTokioAsyncWriteQuadWriter<W: AsyncWrite + Unpin> {
formatter: ToTokioAsyncWriteQuadWriterKind<W>,
}
#[cfg(feature = "async-tokio")]
enum ToTokioAsyncWriteQuadWriterKind<W: AsyncWrite + Unpin> {
NQuads(ToTokioAsyncWriteNQuadsWriter<W>),
NTriples(ToTokioAsyncWriteNTriplesWriter<W>),
RdfXml(ToTokioAsyncWriteRdfXmlWriter<W>),
TriG(ToTokioAsyncWriteTriGWriter<W>),
Turtle(ToTokioAsyncWriteTurtleWriter<W>),
}
#[cfg(feature = "async-tokio")]
impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteQuadWriter<W> {
/// Writes a [`QuadRef`]
pub async fn write_quad<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> io::Result<()> {
match &mut self.formatter {
ToTokioAsyncWriteQuadWriterKind::NQuads(writer) => writer.write_quad(quad).await,
ToTokioAsyncWriteQuadWriterKind::NTriples(writer) => {
writer.write_triple(to_triple(quad)?).await
}
ToTokioAsyncWriteQuadWriterKind::RdfXml(writer) => {
writer.write_triple(to_triple(quad)?).await
}
ToTokioAsyncWriteQuadWriterKind::TriG(writer) => writer.write_quad(quad).await,
ToTokioAsyncWriteQuadWriterKind::Turtle(writer) => {
writer.write_triple(to_triple(quad)?).await
}
}
}
/// Writes a [`TripleRef`]
pub async fn write_triple<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> io::Result<()> {
self.write_quad(triple.into().in_graph(GraphNameRef::DefaultGraph))
.await
}
/// Writes the last bytes of the file
///
/// Note that this function does not flush the writer. You need to do that if you are using a [`BufWriter`](io::BufWriter).
pub async fn finish(self) -> io::Result<W> {
Ok(match self.formatter {
ToTokioAsyncWriteQuadWriterKind::NQuads(writer) => writer.finish(),
ToTokioAsyncWriteQuadWriterKind::NTriples(writer) => writer.finish(),
ToTokioAsyncWriteQuadWriterKind::RdfXml(writer) => writer.finish().await?,
ToTokioAsyncWriteQuadWriterKind::TriG(writer) => writer.finish().await?,
ToTokioAsyncWriteQuadWriterKind::Turtle(writer) => writer.finish().await?,
})
}
}
fn to_triple<'a>(quad: impl Into<QuadRef<'a>>) -> io::Result<TripleRef<'a>> {
let quad = quad.into();
if quad.graph_name.is_default_graph() {
Ok(quad.into())
} else {
Err(io::Error::new(
io::ErrorKind::InvalidInput,
"Only quads in the default graph can be serialized to a RDF graph format",
))
}
}

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save