Compare commits

..

3 Commits

  1. 137
      .cargo/config.toml
  2. 1
      .clusterfuzzlite/Dockerfile
  3. 8
      .clusterfuzzlite/build.sh
  4. 21
      .devcontainer/Dockerfile
  5. 69
      .devcontainer/devcontainer.json
  6. 2
      .github/ISSUE_TEMPLATE/bug_report.md
  7. 27
      .github/actions/setup-rust/action.yml
  8. 266
      .github/workflows/artifacts.yml
  9. 11
      .github/workflows/install_rocksdb.sh
  10. 14
      .github/workflows/manylinux_build.sh
  11. 12
      .github/workflows/musllinux_build.sh
  12. 432
      .github/workflows/tests.yml
  13. 7
      .gitmodules
  14. 2
      .readthedocs.yaml
  15. 126
      CHANGELOG.md
  16. 1541
      Cargo.lock
  17. 261
      Cargo.toml
  18. 40
      README.md
  19. 8
      bench/bsbm_oxigraph.sh
  20. 2
      clippy.toml
  21. 16
      deny.toml
  22. 120
      docs/arch-diagram.svg
  23. 35
      docs/arch-diagram.txt
  24. 27
      fuzz/Cargo.toml
  25. 28
      fuzz/fuzz_targets/n3.rs
  26. 84
      fuzz/fuzz_targets/nquads.rs
  27. 35
      fuzz/fuzz_targets/rdf_xml.rs
  28. 25
      fuzz/fuzz_targets/sparql_eval.rs
  29. 2
      fuzz/fuzz_targets/sparql_query.rs
  30. 2
      fuzz/fuzz_targets/sparql_results_json.rs
  31. 2
      fuzz/fuzz_targets/sparql_results_tsv.rs
  32. 2
      fuzz/fuzz_targets/sparql_results_xml.rs
  33. 2
      fuzz/fuzz_targets/sparql_update.rs
  34. 166
      fuzz/fuzz_targets/trig.rs
  35. 26
      fuzz/src/result_format.rs
  36. 26
      js/Cargo.toml
  37. 56
      js/README.md
  38. 12
      js/build_package.js
  39. 1027
      js/package-lock.json
  40. 14
      js/package.json
  41. 3
      js/rome.json
  42. 2
      js/src/lib.rs
  43. 33
      js/src/model.rs
  44. 117
      js/src/store.rs
  45. 18
      js/test/model.mjs
  46. 86
      js/test/store.mjs
  47. 63
      lib/Cargo.toml
  48. 85
      lib/README.md
  49. 265
      lib/benches/store.rs
  50. 59
      lib/oxigraph/Cargo.toml
  51. 82
      lib/oxigraph/README.md
  52. 39
      lib/oxigraph/src/io/mod.rs
  53. 199
      lib/oxigraph/src/io/read.rs
  54. 185
      lib/oxigraph/src/io/write.rs
  55. 12
      lib/oxigraph/src/lib.rs
  56. 22
      lib/oxigraph/src/model.rs
  57. 84
      lib/oxigraph/src/sparql/error.rs
  58. 9
      lib/oxigraph/src/sparql/http/mod.rs
  59. 371
      lib/oxigraph/src/sparql/model.rs
  60. 44
      lib/oxigraph/src/sparql/results.rs
  61. 12
      lib/oxigraph/src/storage/backend/mod.rs
  62. 139
      lib/oxigraph/src/storage/error.rs
  63. 25
      lib/oxrdf/Cargo.toml
  64. 4
      lib/oxrdf/README.md
  65. 78
      lib/oxrdf/src/blank_node.rs
  66. 473
      lib/oxrdf/src/dataset.rs
  67. 37
      lib/oxrdf/src/graph.rs
  68. 53
      lib/oxrdf/src/interning.rs
  69. 2
      lib/oxrdf/src/lib.rs
  70. 43
      lib/oxrdf/src/literal.rs
  71. 21
      lib/oxrdf/src/named_node.rs
  72. 192
      lib/oxrdf/src/parser.rs
  73. 170
      lib/oxrdf/src/triple.rs
  74. 39
      lib/oxrdf/src/variable.rs
  75. 9
      lib/oxrdf/src/vocab.rs
  76. 36
      lib/oxrdfio/Cargo.toml
  77. 67
      lib/oxrdfio/README.md
  78. 122
      lib/oxrdfio/src/error.rs
  79. 216
      lib/oxrdfio/src/format.rs
  80. 19
      lib/oxrdfio/src/lib.rs
  81. 807
      lib/oxrdfio/src/parser.rs
  82. 410
      lib/oxrdfio/src/serializer.rs
  83. 36
      lib/oxrdfxml/Cargo.toml
  84. 56
      lib/oxrdfxml/README.md
  85. 89
      lib/oxrdfxml/src/error.rs
  86. 18
      lib/oxrdfxml/src/lib.rs
  87. 1237
      lib/oxrdfxml/src/parser.rs
  88. 461
      lib/oxrdfxml/src/serializer.rs
  89. 26
      lib/oxrdfxml/src/utils.rs
  90. 25
      lib/oxsdatatypes/Cargo.toml
  91. 18
      lib/oxsdatatypes/README.md
  92. 11
      lib/oxsdatatypes/src/boolean.rs
  93. 1289
      lib/oxsdatatypes/src/date_time.rs
  94. 470
      lib/oxsdatatypes/src/decimal.rs
  95. 59
      lib/oxsdatatypes/src/double.rs
  96. 665
      lib/oxsdatatypes/src/duration.rs
  97. 59
      lib/oxsdatatypes/src/float.rs
  98. 142
      lib/oxsdatatypes/src/integer.rs
  99. 15
      lib/oxsdatatypes/src/lib.rs
  100. 626
      lib/oxsdatatypes/src/parser.rs
  101. Some files were not shown because too many files have changed in this diff Show More

@ -0,0 +1,137 @@
[build]
rustflags = [
"-Wtrivial-casts",
"-Wtrivial-numeric-casts",
"-Wunsafe-code",
"-Wunused-lifetimes",
"-Wunused-qualifications",
# TODO: 1.63+ "-Wclippy::as-underscore",
# TODO: 1.65+ ""-Wclippy::bool-to-int-with-if",
"-Wclippy::borrow-as-ptr",
"-Wclippy::case-sensitive-file-extension-comparisons",
"-Wclippy::cast-lossless",
"-Wclippy::cast-possible-truncation",
"-Wclippy::cast-possible-wrap",
"-Wclippy::cast-precision-loss",
"-Wclippy::cast-ptr-alignment",
"-Wclippy::cast-sign-loss",
"-Wclippy::checked-conversions",
"-Wclippy::clone-on-ref-ptr",
"-Wclippy::cloned-instead-of-copied",
"-Wclippy::copy-iterator",
"-Wclippy::dbg-macro",
"-Wclippy::decimal-literal-representation",
"-Wclippy::default-trait-access",
"-Wclippy::default-union-representation",
# TODO: 1.61+ "-Wclippy::deref-by-slicing",
# TODO: 1.63+ "-Wclippy::doc-link-with-quotes",
# TODO: 1.62+ "-Wclippy::empty-drop",
"-Wclippy::empty-enum",
# TODO: on major version "-Wclippy::empty-structs-with-brackets",
"-Wclippy::enum-glob-use",
"-Wclippy::exit",
"-Wclippy::expect-used",
"-Wclippy::expl-impl-clone-on-copy",
"-Wclippy::explicit-deref-methods",
"-Wclippy::explicit-into-iter-loop",
"-Wclippy::explicit-iter-loop",
"-Wclippy::filter-map-next",
"-Wclippy::flat-map-option",
"-Wclippy::fn-to-numeric-cast-any",
# TODO: 1.62+ "-Wclippy::format-push-string",
"-Wclippy::from-iter-instead-of-collect",
"-Wclippy::get-unwrap",
"-Wclippy::if-not-else",
"-Wclippy::if-then-some-else-none",
"-Wclippy::implicit-clone",
"-Wclippy::inconsistent-struct-constructor",
"-Wclippy::index-refutable-slice",
"-Wclippy::inefficient-to-string",
"-Wclippy::inline-always",
"-Wclippy::inline-asm-x86-att-syntax",
"-Wclippy::inline-asm-x86-intel-syntax",
"-Wclippy::invalid-upcast-comparisons",
"-Wclippy::items-after-statements",
"-Wclippy::large-digit-groups",
# TODO: 1.68+ "-Wclippy::large-futures",
"-Wclippy::large-stack-arrays",
"-Wclippy::large-types-passed-by-value",
"-Wclippy::let-underscore-must-use",
"-Wclippy::let-unit-value",
"-Wclippy::linkedlist",
"-Wclippy::lossy-float-literal",
"-Wclippy::macro-use-imports",
"-Wclippy::manual-assert",
# TODO: 1.65+ "-Wclippy::manual-instant-elapsed",
# TODO: 1.67+ "-Wclippy::manual-let-else",
"-Wclippy::manual-ok-or",
# TODO: 1.65+ "-Wclippy::manual-string-new",
"-Wclippy::many-single-char-names",
"-Wclippy::map-unwrap-or",
"-Wclippy::match-bool",
"-Wclippy::match-same-arms",
"-Wclippy::match-wildcard-for-single-variants",
"-Wclippy::maybe-infinite-iter",
"-Wclippy::mem-forget",
# TODO: 1.63+ "-Wclippy::mismatching-type-param-order",
"-Wclippy::multiple-inherent-impl",
"-Wclippy::mut-mut",
"-Wclippy::mutex-atomic",
"-Wclippy::naive-bytecount",
"-Wclippy::needless-bitwise-bool",
"-Wclippy::needless-continue",
"-Wclippy::needless-pass-by-value",
"-Wclippy::no-effect-underscore-binding",
# TODO: 1.69+ "-Wclippy::no-mangle-with-rust-abi",
"-Wclippy::non-ascii-literal",
"-Wclippy::print-stderr",
"-Wclippy::print-stdout",
"-Wclippy::ptr-as-ptr",
"-Wclippy::range-minus-one",
"-Wclippy::range-plus-one",
"-Wclippy::rc-buffer",
"-Wclippy::rc-mutex",
"-Wclippy::redundant-closure-for-method-calls",
"-Wclippy::redundant-else",
"-Wclippy::redundant-feature-names",
"-Wclippy::ref-binding-to-reference",
"-Wclippy::ref-option-ref",
"-Wclippy::rest-pat-in-fully-bound-structs",
"-Wclippy::return-self-not-must-use",
"-Wclippy::same-functions-in-if-condition",
# TODO: strange failure on 1.60 "-Wclippy::same-name-method",
# TODO: 1.68+ "-Wclippy::semicolon-outside-block",
"-Wclippy::single-match-else",
"-Wclippy::stable-sort-primitive",
"-Wclippy::str-to-string",
"-Wclippy::string-add",
"-Wclippy::string-add-assign",
"-Wclippy::string-lit-as-bytes",
"-Wclippy::string-to-string",
# TODO: 1.67+ "-Wclippy::suspicious-xor-used-as-pow",
"-Wclippy::todo",
"-Wclippy::transmute-ptr-to-ptr",
"-Wclippy::trivially-copy-pass-by-ref",
"-Wclippy::try-err",
"-Wclippy::unicode-not-nfc",
"-Wclippy::unimplemented",
# TODO: 1.66+ "-Wclippy::uninlined-format-args",
# TODO: 1.70+ "-Wclippy::unnecessary-box-returns",
# TODO: 1.61+ "-Wclippy::unnecessary-join",
# TODO: 1.67+ "-Wclippy::unnecessary-safety-comment",
# TODO: 1.67+ "-Wclippy::unnecessary-safety-doc",
"-Wclippy::unnecessary-self-imports",
"-Wclippy::unnecessary-wraps",
"-Wclippy::unneeded-field-pattern",
"-Wclippy::unnested-or-patterns",
"-Wclippy::unreadable-literal",
"-Wclippy::unseparated-literal-suffix",
"-Wclippy::unused-async",
"-Wclippy::unused-self",
"-Wclippy::use-debug",
"-Wclippy::used-underscore-binding",
"-Wclippy::verbose-bit-mask",
"-Wclippy::verbose-file-reads",
"-Wclippy::wildcard-dependencies",
"-Wclippy::zero-sized-map-values",
]

@ -1,4 +1,5 @@
FROM gcr.io/oss-fuzz-base/base-builder-rust:v1
RUN apt-get update && apt-get install -y llvm-dev libclang-dev clang && apt-get clean && rm --recursive --force /var/lib/apt/lists/*
COPY . $SRC/oxigraph
WORKDIR oxigraph
COPY .clusterfuzzlite/build.sh $SRC/

@ -14,17 +14,11 @@ function build_seed_corpus() {
cd "$SRC"/oxigraph
git submodule init
git submodule update
cargo fuzz build -O --debug-assertions
for TARGET in sparql_eval sparql_results_json sparql_results_tsv sparql_results_xml n3 nquads trig rdf_xml
for TARGET in sparql_eval sparql_results_json sparql_results_tsv # sparql_results_xml https://github.com/tafia/quick-xml/issues/608
do
cp fuzz/target/x86_64-unknown-linux-gnu/release/$TARGET "$OUT"/
done
build_seed_corpus sparql_results_json srj
build_seed_corpus sparql_results_tsv tsv
build_seed_corpus sparql_results_xml srx
build_seed_corpus n3 n3
build_seed_corpus nquads nq
build_seed_corpus trig trig
build_seed_corpus rdf_xml rdf

@ -0,0 +1,21 @@
# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.241.1/containers/rust/.devcontainer/base.Dockerfile
# [Choice] Debian OS version (use bullseye on local arm64/Apple Silicon): buster, bullseye
ARG VARIANT="bullseye"
FROM mcr.microsoft.com/vscode/devcontainers/rust:0-${VARIANT}
# [Optional] Uncomment this section to install additional packages.
RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
&& apt-get -y install --no-install-recommends \
python3 \
python3-venv \
python-is-python3 \
libclang-dev
ENV VIRTUAL_ENV=/opt/venv
RUN python -m venv $VIRTUAL_ENV
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
RUN pip install --no-cache-dir -r python/requirements.dev.txt
# Change owner to the devcontainer user
RUN chown -R 1000:1000 $VIRTUAL_ENV

@ -0,0 +1,69 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at:
// https://github.com/microsoft/vscode-dev-containers/tree/v0.241.1/containers/rust
{
"name": "Rust",
"build": {
"dockerfile": "Dockerfile",
"args": {
// Use the VARIANT arg to pick a Debian OS version: buster, bullseye
// Use bullseye when on local on arm64/Apple Silicon.
"VARIANT": "bullseye"
}
},
"runArgs": ["--cap-add=SYS_PTRACE", "--security-opt", "seccomp=unconfined"],
// Configure tool-specific properties.
"customizations": {
// Configure properties specific to VS Code.
"vscode": {
// Set *default* container specific settings.json values on container create.
"settings": {
"lldb.executable": "/usr/bin/lldb",
// VS Code don't watch files under ./target
"files.watcherExclude": {
"**/target/**": true
},
"rust-analyzer.checkOnSave.command": "clippy",
"python.defaultInterpreterPath": "/opt/venv/bin/python",
"python.linting.enabled": true,
"python.linting.pylintEnabled": true,
"python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8",
"python.formatting.blackPath": "/usr/local/py-utils/bin/black",
"python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf",
"python.linting.banditPath": "/usr/local/py-utils/bin/bandit",
"python.linting.flake8Path": "/usr/local/py-utils/bin/flake8",
"python.linting.mypyPath": "/usr/local/py-utils/bin/mypy",
"python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle",
"python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle",
"python.linting.pylintPath": "/opt/venv/bin/pylint",
"python.testing.pytestPath": "/opt/venv/bin/pytest"
},
// Add the IDs of extensions you want installed when the container is created.
"extensions": [
"vadimcn.vscode-lldb",
"mutantdino.resourcemonitor",
"rust-lang.rust-analyzer",
"tamasfe.even-better-toml",
"serayuzgur.crates",
"ms-python.python",
"ms-python.vscode-pylance",
"esbenp.prettier-vscode",
"stardog-union.stardog-rdf-grammars"
]
}
},
// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": [],
// Use 'postCreateCommand' to run commands after the container is created.
"postCreateCommand": "git submodule update --init && cargo build",
// Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root.
"remoteUser": "vscode",
"features": {
"python": "3.10"
}
}

@ -13,4 +13,4 @@ A clear and concise description of what the bug is.
**To Reproduce**
Steps to reproduce the behavior:
1. Which version of Oxigraph are you using? On which platform?
2. A command-line or a code snippet that triggers the bug.
2. A command line or a code snippet that triggers the bug.

@ -1,27 +0,0 @@
name: 'Setup Rust'
description: 'Setup Rust using Rustup'
inputs:
version:
description: 'Rust version to use. By default latest stable version'
required: false
default: 'stable'
component:
description: 'Rust extra component to install like clippy'
required: false
target:
description: 'Rust extra target to install like wasm32-unknown-unknown'
required: false
runs:
using: "composite"
steps:
- run: rustup update
shell: bash
- run: rustup default ${{ inputs.version }}
shell: bash
- run: rustup component add ${{ inputs.component }}
shell: bash
if: ${{ inputs.component }}
- run: rustup target add ${{ inputs.target }}
shell: bash
if: ${{ inputs.target }}
- uses: Swatinem/rust-cache@v2

@ -4,7 +4,6 @@ on:
push:
branches:
- main
- next
release:
types:
- published
@ -17,39 +16,37 @@ jobs:
binary_linux:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
submodules: true
- uses: ./.github/actions/setup-rust
with:
target: aarch64-unknown-linux-gnu
- run: rustup update && rustup target add aarch64-unknown-linux-gnu
- run: |
sudo apt-get update && sudo apt-get install -y g++-aarch64-linux-gnu
mkdir .cargo
echo -e "[target.aarch64-unknown-linux-gnu]\nlinker = \"aarch64-linux-gnu-gcc\"" >> .cargo/config.toml
- run: cargo build --release --no-default-features --features rustls-native
working-directory: ./cli
- run: cargo build --release --target aarch64-unknown-linux-gnu --no-default-features --features rustls-native
working-directory: ./cli
sudo apt update && sudo apt install -y g++-aarch64-linux-gnu
echo -e "\n\n[target.aarch64-unknown-linux-gnu]\nlinker = \"aarch64-linux-gnu-gcc\"" >> .cargo/config.toml
- uses: Swatinem/rust-cache@v2
- run: cargo build --release
working-directory: ./server
- run: cargo build --release --target aarch64-unknown-linux-gnu
working-directory: ./server
env:
BINDGEN_EXTRA_CLANG_ARGS: --sysroot /usr/aarch64-linux-gnu
- uses: actions/upload-artifact@v4
- uses: actions/upload-artifact@v3
with:
name: oxigraph_x86_64_linux_gnu
path: target/release/oxigraph
- uses: actions/upload-artifact@v4
name: oxigraph_server_x86_64_linux_gnu
path: target/release/oxigraph_server
- uses: actions/upload-artifact@v3
with:
name: oxigraph_aarch64_linux_gnu
path: target/aarch64-unknown-linux-gnu/release/oxigraph
- run: mv target/release/oxigraph oxigraph_${{ github.event.release.tag_name }}_x86_64_linux_gnu
name: oxigraph_server_aarch64-linux_gnu
path: target/aarch64-unknown-linux-gnu/release/oxigraph_server
- run: mv target/release/oxigraph_server oxigraph_server_${{ github.event.release.tag_name }}_x86_64_linux_gnu
if: github.event_name == 'release'
- run: mv target/aarch64-unknown-linux-gnu/release/oxigraph oxigraph_${{ github.event.release.tag_name }}_aarch64_linux_gnu
- run: mv target/aarch64-unknown-linux-gnu/release/oxigraph_server oxigraph_server_${{ github.event.release.tag_name }}_aarch64_linux_gnu
if: github.event_name == 'release'
- uses: softprops/action-gh-release@v1
with:
files: |
oxigraph_${{ github.event.release.tag_name }}_x86_64_linux_gnu
oxigraph_${{ github.event.release.tag_name }}_aarch64_linux_gnu
oxigraph_server_${{ github.event.release.tag_name }}_x86_64_linux_gnu
oxigraph_server_${{ github.event.release.tag_name }}_aarch64_linux_gnu
if: github.event_name == 'release'
binary_mac:
@ -59,64 +56,65 @@ jobs:
SDKROOT: '/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk'
MACOSX_DEPLOYMENT_TARGET: '10.14'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
submodules: true
- uses: ./.github/actions/setup-rust
with:
target: aarch64-apple-darwin
- run: rustup update && rustup target add aarch64-apple-darwin
- uses: Swatinem/rust-cache@v2
- run: cargo build --release
working-directory: ./cli
working-directory: ./server
- run: cargo build --release --target aarch64-apple-darwin
working-directory: ./cli
- uses: actions/upload-artifact@v4
working-directory: ./server
- uses: actions/upload-artifact@v3
with:
name: oxigraph_x86_64_apple
path: target/release/oxigraph
- uses: actions/upload-artifact@v4
name: oxigraph_server_x86_64_apple
path: target/release/oxigraph_server
- uses: actions/upload-artifact@v3
with:
name: oxigraph_aarch64_apple
path: target/aarch64-apple-darwin/release/oxigraph
- run: mv target/release/oxigraph oxigraph_${{ github.event.release.tag_name }}_x86_64_apple
name: oxigraph_server_aarch64_apple
path: target/aarch64-apple-darwin/release/oxigraph_server
- run: mv target/release/oxigraph_server oxigraph_server_${{ github.event.release.tag_name }}_x86_64_apple
if: github.event_name == 'release'
- run: mv target/aarch64-apple-darwin/release/oxigraph oxigraph_${{ github.event.release.tag_name }}_aarch64_apple
- run: mv target/aarch64-apple-darwin/release/oxigraph_server oxigraph_server_${{ github.event.release.tag_name }}_aarch64_apple
if: github.event_name == 'release'
- uses: softprops/action-gh-release@v1
with:
files: |
oxigraph_${{ github.event.release.tag_name }}_x86_64_apple
oxigraph_${{ github.event.release.tag_name }}_aarch64_apple
oxigraph_server_${{ github.event.release.tag_name }}_x86_64_apple
oxigraph_server_${{ github.event.release.tag_name }}_aarch64_apple
if: github.event_name == 'release'
binary_windows:
runs-on: windows-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
submodules: true
- uses: ./.github/actions/setup-rust
- run: rustup update
- uses: Swatinem/rust-cache@v2
- run: Remove-Item -LiteralPath "C:\msys64\" -Force -Recurse
- run: cargo build --release
working-directory: ./cli
- uses: actions/upload-artifact@v4
working-directory: ./server
- uses: actions/upload-artifact@v3
with:
name: oxigraph_x86_64_windows_msvc
path: target/release/oxigraph.exe
- run: mv target/release/oxigraph.exe oxigraph_${{ github.event.release.tag_name }}_x86_64_windows_msvc.exe
name: oxigraph_server_x86_64_windows_msvc
path: target/release/oxigraph_server.exe
- run: mv target/release/oxigraph_server.exe oxigraph_server_${{ github.event.release.tag_name }}_x86_64_windows_msvc.exe
if: github.event_name == 'release'
- uses: softprops/action-gh-release@v1
with:
files: oxigraph_${{ github.event.release.tag_name }}_x86_64_windows_msvc.exe
files: oxigraph_server_${{ github.event.release.tag_name }}_x86_64_windows_msvc.exe
if: github.event_name == 'release'
python_sdist:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
submodules: true
- uses: ./.github/actions/setup-rust
- uses: actions/setup-python@v5
- run: rustup update
- uses: Swatinem/rust-cache@v2
- uses: actions/setup-python@v4
with:
python-version: "3.12"
cache: pip
@ -125,13 +123,18 @@ jobs:
- run: maturin build -m python/Cargo.toml
- run: pip install --no-index --find-links=target/wheels/ pyoxigraph
- run: rm -r target/wheels
- run: python generate_stubs.py pyoxigraph pyoxigraph.pyi --ruff
- run: python generate_stubs.py pyoxigraph pyoxigraph.pyi --black
working-directory: ./python
- run: maturin sdist -m python/Cargo.toml
- uses: actions/upload-artifact@v4
- uses: actions/upload-artifact@v3
with:
name: pyoxigraph_sdist
name: pyoxigraph_source
path: target/wheels/*.tar.gz
- run: pip install twine && twine upload target/wheels/*
env:
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
if: github.event_name == 'release'
wheel_linux:
runs-on: ubuntu-latest
@ -140,21 +143,26 @@ jobs:
architecture: [ "x86_64", "aarch64" ]
continue-on-error: true
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
submodules: true
- uses: docker/setup-qemu-action@v2
with:
platforms: linux/${{ matrix.architecture }}
if: github.event_name == 'release' && matrix.architecture != 'x86_64'
- uses: ./.github/actions/setup-rust
- run: sed 's/%arch%/${{ matrix.architecture }}/g' .github/workflows/manylinux_build.sh | sed 's/%for_each_version%/${{ github.event_name == 'release' || '' }}/g' > .github/workflows/manylinux_build_script.sh
- run: docker run -v "$(pwd)":/workdir --platform linux/${{ matrix.architecture }} quay.io/pypa/manylinux2014_${{ matrix.architecture }} /bin/bash /workdir/.github/workflows/manylinux_build_script.sh
if: github.event_name == 'release' || matrix.architecture == 'x86_64'
- uses: actions/upload-artifact@v4
- uses: actions/upload-artifact@v3
with:
name: pyoxigraph_${{ matrix.architecture }}_linux_gnu
name: pyoxigraph_wheel_x86_64_linux
path: target/wheels/*.whl
- uses: pypa/gh-action-pypi-publish@release/v1
with:
user: __token__
password: ${{ secrets.PYPI_PASSWORD }}
packages-dir: target/wheels
if: github.event_name == 'release'
wheel_linux_musl:
runs-on: ubuntu-latest
@ -163,21 +171,26 @@ jobs:
architecture: [ "x86_64", "aarch64" ]
continue-on-error: true
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
submodules: true
- uses: docker/setup-qemu-action@v2
with:
platforms: linux/${{ matrix.architecture }}
if: github.event_name == 'release' && matrix.architecture != 'x86_64'
- uses: ./.github/actions/setup-rust
- run: sed 's/%arch%/${{ matrix.architecture }}/g' .github/workflows/musllinux_build.sh | sed 's/%for_each_version%/${{ github.event_name == 'release' || '' }}/g' > .github/workflows/musllinux_build_script.sh
- run: docker run -v "$(pwd)":/workdir --platform linux/${{ matrix.architecture }} quay.io/pypa/musllinux_1_2_${{ matrix.architecture }} /bin/bash /workdir/.github/workflows/musllinux_build_script.sh
- run: docker run -v "$(pwd)":/workdir --platform linux/${{ matrix.architecture }} quay.io/pypa/musllinux_1_1_${{ matrix.architecture }} /bin/bash /workdir/.github/workflows/musllinux_build_script.sh
if: github.event_name == 'release' || matrix.architecture == 'x86_64'
- uses: actions/upload-artifact@v4
- uses: actions/upload-artifact@v3
with:
name: pyoxigraph_${{ matrix.architecture }}_linux_musl
name: pyoxigraph_wheel_x86_64_linux_musl
path: target/wheels/*.whl
- uses: pypa/gh-action-pypi-publish@release/v1
with:
user: __token__
password: ${{ secrets.PYPI_PASSWORD }}
packages-dir: target/wheels
if: github.event_name == 'release'
wheel_mac:
runs-on: macos-latest
@ -186,98 +199,78 @@ jobs:
SDKROOT: '/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk'
MACOSX_DEPLOYMENT_TARGET: '10.14'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
submodules: true
- uses: ./.github/actions/setup-rust
with:
target: aarch64-apple-darwin
- uses: actions/setup-python@v5
- run: rustup update && rustup target add aarch64-apple-darwin
- uses: Swatinem/rust-cache@v2
- uses: actions/setup-python@v4
with:
python-version: "3.12"
python-version: "3.10"
cache: pip
cache-dependency-path: '**/requirements.dev.txt'
- run: pip install -r python/requirements.dev.txt
- run: maturin build --release --features abi3
working-directory: ./python
- run: maturin build --release -m python/Cargo.toml --features abi3
- run: pip install --no-index --find-links=target/wheels/ pyoxigraph
- run: rm -r target/wheels
- run: python generate_stubs.py pyoxigraph pyoxigraph.pyi --ruff
working-directory: ./python
- run: maturin build --release --target universal2-apple-darwin --features abi3
working-directory: ./python
- run: maturin build --release --features abi3
- run: python generate_stubs.py pyoxigraph pyoxigraph.pyi --black
working-directory: ./python
- run: maturin build --release --target universal2-apple-darwin -m python/Cargo.toml --features abi3
- run: maturin build --release -m python/Cargo.toml --features abi3
if: github.event_name == 'release'
- run: maturin build --release --target aarch64-apple-darwin --features abi3
working-directory: ./python
- run: maturin build --release --target aarch64-apple-darwin -m python/Cargo.toml --features abi3
if: github.event_name == 'release'
- uses: actions/upload-artifact@v4
- uses: actions/upload-artifact@v3
with:
name: pyoxigraph_macos
name: pyoxigraph_wheel_universal2_mac
path: target/wheels/*.whl
- run: pip install twine && twine upload target/wheels/*
env:
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
if: github.event_name == 'release'
wheel_windows:
runs-on: windows-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
submodules: true
- uses: ./.github/actions/setup-rust
- uses: actions/setup-python@v5
- run: rustup update
- uses: Swatinem/rust-cache@v2
- uses: actions/setup-python@v4
with:
python-version: "3.12"
python-version: "3.10"
cache: pip
cache-dependency-path: '**/requirements.dev.txt'
- run: Remove-Item -LiteralPath "C:\msys64\" -Force -Recurse
- run: pip install -r python/requirements.dev.txt
- run: maturin build --release --features abi3
working-directory: ./python
- run: maturin build --release -m python/Cargo.toml --features abi3
- run: pip install --no-index --find-links=target/wheels/ pyoxigraph
- run: rm -r target/wheels
- run: python generate_stubs.py pyoxigraph pyoxigraph.pyi --ruff
- run: python generate_stubs.py pyoxigraph pyoxigraph.pyi --black
working-directory: ./python
- run: maturin build --release -m python/Cargo.toml --features abi3
- uses: actions/upload-artifact@v4
- uses: actions/upload-artifact@v3
with:
name: pyoxigraph_windows
name: pyoxigraph_wheel_x86_64_windows
path: target/wheels/*.whl
publish_pypi:
- run: pip install twine && twine upload target/wheels/*
env:
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
if: github.event_name == 'release'
runs-on: ubuntu-latest
environment:
name: pypi
url: https://pypi.org/p/pyoxigraph
permissions:
id-token: write
needs:
- python_sdist
- wheel_windows
- wheel_mac
- wheel_linux
- wheel_linux_musl
steps:
- uses: actions/download-artifact@v4
with:
pattern: pyoxigraph_*
path: dist
merge-multiple: true
- uses: pypa/gh-action-pypi-publish@release/v1
with:
packages-dir: dist
skip-existing: true
npm_tarball:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
submodules: true
- uses: ./.github/actions/setup-rust
- uses: taiki-e/install-action@v2
with: { tool: wasm-pack }
- uses: actions/setup-node@v4
- run: rustup update
- uses: Swatinem/rust-cache@v2
- run: cargo install wasm-pack || true
- uses: actions/setup-node@v3
with:
node-version: 16
cache: npm
@ -285,7 +278,7 @@ jobs:
registry-url: https://registry.npmjs.org
- run: npm run pack
working-directory: ./js
- uses: actions/upload-artifact@v4
- uses: actions/upload-artifact@v3
with:
name: oxigraph_wasm_npm
path: js/*.tgz
@ -298,32 +291,27 @@ jobs:
docker:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
submodules: true
- uses: docker/setup-buildx-action@v3
- uses: docker/login-action@v3
- uses: docker/setup-buildx-action@v2
- uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{github.actor}}
password: ${{secrets.GITHUB_TOKEN}}
- uses: docker/login-action@v3
- uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
if: github.event_name == 'release'
- uses: docker/metadata-action@v5
- uses: docker/metadata-action@v4
id: docker_meta
with:
images: |
${{ github.repository }},enable=${{ github.event_name == 'release' }}
ghcr.io/${{ github.repository }}
tags: |
type=ref,event=branch
type=ref,event=pr
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
- uses: docker/build-push-action@v5
- uses: docker/build-push-action@v4
with:
context: .
file: server/Dockerfile
@ -339,7 +327,7 @@ jobs:
if: github.event_name == 'release'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
submodules: true
- run: rustup update
@ -355,39 +343,27 @@ jobs:
- run: cargo publish
working-directory: ./lib/oxrdf
continue-on-error: true
- run: cargo publish
working-directory: ./lib/oxrdfxml
continue-on-error: true
- run: cargo publish
working-directory: ./lib/oxttl
continue-on-error: true
- run: cargo publish
working-directory: ./lib/oxrdfio
continue-on-error: true
- run: cargo publish
working-directory: ./lib/sparesults
continue-on-error: true
- run: cargo publish
working-directory: ./lib/spargebra
continue-on-error: true
- run: cargo publish
working-directory: ./lib/sparopt
continue-on-error: true
- run: cargo publish
working-directory: ./lib/sparql-smith
continue-on-error: true
- run: cargo publish
working-directory: ./lib/oxigraph
working-directory: ./lib
continue-on-error: true
- run: cargo publish
working-directory: ./cli
working-directory: ./server
homebrew:
if: "github.event_name == 'release' && !contains('-', github.event.release.tag_name)"
runs-on: ubuntu-latest
needs: full_archive
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: oxigraph/homebrew-oxigraph
token: ${{ secrets.FULL_ACCESS_TOKEN }}
@ -407,7 +383,7 @@ jobs:
if: github.event_name == 'release'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
submodules: true
- run: |

@ -1,11 +0,0 @@
if [ -f "rocksdb" ]
then
cd rocksdb || exit
else
git clone https://github.com/facebook/rocksdb.git
cd rocksdb || exit
git checkout v8.0.0
make shared_lib
fi
sudo make install-shared
sudo ldconfig /usr/local/lib

@ -8,17 +8,17 @@ chmod +x rustup-init
source "$HOME/.cargo/env"
export PATH="${PATH}:/opt/python/cp37-cp37m/bin:/opt/python/cp38-cp38/bin:/opt/python/cp39-cp39/bin:/opt/python/cp310-cp310/bin:/opt/python/cp311-cp311/bin"
cd python
python3.12 -m venv venv
python3.10 -m venv venv
source venv/bin/activate
pip install -r requirements.dev.txt
maturin develop --release
python generate_stubs.py pyoxigraph pyoxigraph.pyi --ruff
maturin build --release --features abi3 --compatibility manylinux2014
maturin develop --release -m Cargo.toml
python generate_stubs.py pyoxigraph pyoxigraph.pyi --black
maturin build --release -m Cargo.toml --features abi3 --compatibility manylinux2014
if [ %for_each_version% ]; then
for VERSION in 8 9 10 11 12; do
maturin build --release --interpreter "python3.$VERSION" --compatibility manylinux2014
for VERSION in 7 8 9 10 11 12; do
maturin build --release -m Cargo.toml --interpreter "python3.$VERSION" --compatibility manylinux2014
done
for VERSION in 9 10; do
maturin build --release --interpreter "pypy3.$VERSION" --compatibility manylinux2014
maturin build --release -m Cargo.toml --interpreter "pypy3.$VERSION" --compatibility manylinux2014
done
fi

@ -6,14 +6,14 @@ chmod +x rustup-init
source "$HOME/.cargo/env"
export PATH="${PATH}:/opt/python/cp37-cp37m/bin:/opt/python/cp38-cp38/bin:/opt/python/cp39-cp39/bin:/opt/python/cp310-cp310/bin:/opt/python/cp311-cp311/bin"
cd python
python3.12 -m venv venv
python3.10 -m venv venv
source venv/bin/activate
pip install -r requirements.dev.txt
maturin develop --release
python generate_stubs.py pyoxigraph pyoxigraph.pyi --ruff
maturin build --release --features abi3 --compatibility musllinux_1_2
maturin develop --release -m Cargo.toml
python generate_stubs.py pyoxigraph pyoxigraph.pyi --black
maturin build --release -m Cargo.toml --features abi3 --compatibility musllinux_1_1
if [ %for_each_version% ]; then
for VERSION in 8 9 10 11 12; do
maturin build --release --interpreter "python3.$VERSION" --compatibility musllinux_1_2
for VERSION in 7 8 9 10 11 12; do
maturin build --release -m Cargo.toml --interpreter "python3.$VERSION" --compatibility musllinux_1_1
done
fi

@ -4,7 +4,6 @@ on:
pull_request:
branches:
- main
- next
schedule:
- cron: "12 3 * * *"
@ -16,259 +15,209 @@ jobs:
fmt:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/setup-rust
with:
component: rustfmt
- uses: actions/checkout@v3
- run: rustup update && rustup component add rustfmt
- run: cargo fmt -- --check
clippy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
submodules: true
- uses: ./.github/actions/setup-rust
with:
version: 1.76.0
component: clippy
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
- run: rustup update && rustup component add clippy
- uses: Swatinem/rust-cache@v2
- run: cargo clippy
working-directory: ./lib/oxsdatatypes
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
- run: cargo clippy
working-directory: ./lib/oxrdf
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/oxrdfxml
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/oxttl
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/oxrdfio
- run: cargo clippy --all-targets --features async-tokio -- -D warnings -D clippy::all
working-directory: ./lib/oxrdfio
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
- run: cargo clippy
working-directory: ./lib/sparesults
- run: cargo clippy --all-targets --features async-tokio -- -D warnings -D clippy::all
working-directory: ./lib/sparesults
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
- run: cargo clippy
working-directory: ./lib/spargebra
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/sparopt
- run: cargo clippy --all-targets --no-default-features -- -D warnings -D clippy::all
working-directory: ./lib/oxigraph
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/oxigraph
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./python
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./cli
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./testsuite
- run: cargo clippy --all-targets --all-features
clippy_wasm_js:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
submodules: true
- uses: ./.github/actions/setup-rust
with:
version: 1.76.0
target: wasm32-unknown-unknown
component: clippy
- run: cargo clippy --lib --tests --target wasm32-unknown-unknown -- -D warnings -D clippy::all
- run: rustup update && rustup target add wasm32-unknown-unknown && rustup component add clippy
- uses: Swatinem/rust-cache@v2
- run: cargo clippy --lib --tests --target wasm32-unknown-unknown
working-directory: ./js
clippy_wasi:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
submodules: true
- uses: ./.github/actions/setup-rust
with:
version: 1.76.0
target: wasm32-wasi
component: clippy
- run: cargo clippy --lib --tests --target wasm32-wasi -- -D warnings -D clippy::all
working-directory: ./lib/oxigraph
- run: cargo clippy --target wasm32-wasi --features abi3 --no-default-features -- -D warnings -D clippy::all
working-directory: ./python
- run: rustup update && rustup target add wasm32-wasi && rustup component add clippy
- uses: Swatinem/rust-cache@v2
- run: cargo clippy --lib --tests --target wasm32-wasi
working-directory: ./lib
clippy_wasm_emscripten:
clippy_msv:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
submodules: true
- uses: ./.github/actions/setup-rust
with:
version: 1.76.0
target: wasm32-unknown-emscripten
component: clippy
- run: cargo clippy --lib --tests --target wasm32-unknown-emscripten -- -D warnings -D clippy::all
working-directory: ./lib/oxigraph
- run: cargo clippy --target wasm32-unknown-emscripten --features abi3 -- -D warnings -D clippy::all
working-directory: ./python
- run: rustup update && rustup override set 1.60.0 && rustup component add clippy
- uses: Swatinem/rust-cache@v2
- run: cargo clippy -- -D warnings -D clippy::all
working-directory: ./lib/oxsdatatypes
- run: cargo clippy -- -D warnings -D clippy::all
working-directory: ./lib/oxrdf
- run: cargo clippy -- -D warnings -D clippy::all
working-directory: ./lib/sparesults
- run: cargo clippy -- -D warnings -D clippy::all
working-directory: ./lib/spargebra
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./server
clippy_wasm_unknown:
clippy_msv_wasm_js:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
submodules: true
- uses: ./.github/actions/setup-rust
with:
version: 1.76.0
target: wasm32-unknown-unknown
component: clippy
- run: cargo clippy --lib --tests --target wasm32-unknown-unknown --features getrandom/custom --features oxsdatatypes/custom-now -- -D warnings -D clippy::all
working-directory: ./lib/oxigraph
deny:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: taiki-e/install-action@v2
with: { tool: cargo-deny }
- run: cargo deny check
- run: rustup update && rustup override set 1.60.0 && rustup target add wasm32-unknown-unknown && rustup component add clippy
- uses: Swatinem/rust-cache@v2
- run: cargo clippy --lib --tests --target wasm32-unknown-unknown -- -D warnings -D clippy::all
working-directory: ./js
semver_checks:
clippy_msv_wasi:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
submodules: true
- uses: ./.github/actions/setup-rust
- uses: taiki-e/install-action@v2
with: { tool: cargo-semver-checks }
- uses: actions/cache@v4
with:
path: rocksdb
key: ${{ runner.os }}-rocksdb-8.0.0
- run: bash .github/workflows/install_rocksdb.sh
- run: cargo semver-checks check-release --exclude oxrocksdb-sys --exclude oxigraph-js --exclude pyoxigraph --exclude oxigraph-testsuite --exclude oxigraph-cli
- run: rustup update && rustup override set 1.60.0 && rustup target add wasm32-wasi && rustup component add clippy
- uses: Swatinem/rust-cache@v2
- run: cargo clippy --lib --tests --target wasm32-wasi -- -D warnings -D clippy::all
working-directory: ./lib
test_linux_x86_64:
deny:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
- run: cargo test
- uses: actions/checkout@v3
- run: rustup update
- uses: Swatinem/rust-cache@v2
- run: cargo install cargo-deny || true
- run: cargo deny check
test_linux_i686:
semver_checks:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
submodules: true
- uses: ./.github/actions/setup-rust
with:
target: i686-unknown-linux-gnu
- run: sudo apt-get update && sudo apt-get install -y g++-multilib
- run: cargo test --target i686-unknown-linux-gnu --no-default-features --features http-client-rustls-native
working-directory: ./lib/oxigraph
- run: rustup update
- uses: Swatinem/rust-cache@v2
- run: cargo install cargo-semver-checks || true
- run: cargo semver-checks check-release --exclude oxrocksdb-sys --exclude oxigraph_js --exclude pyoxigraph --exclude oxigraph_testsuite --exclude oxigraph_server
test_linux_msv:
test_linux:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
submodules: true
- name: Read MSRV from Cargo.toml
id: metadata
run: echo "rust-version=$(sed -ne 's/rust-version *= *\"\(.*\)\"/\1/p' Cargo.toml)" >> $GITHUB_OUTPUT
- uses: ./.github/actions/setup-rust
with:
version: ${{ steps.metadata.outputs.rust-version }}
- run: rustup toolchain install nightly
- run: rm Cargo.lock && cargo +nightly update -Z direct-minimal-versions && cargo update -p bumpalo --precise 3.14.0
- run: cargo test
- run: rustup update
- uses: Swatinem/rust-cache@v2
- run: cargo test --all-features
env:
RUST_BACKTRACE: 1
test_linux_latest:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
submodules: true
- uses: ./.github/actions/setup-rust
- run: rustup update
- uses: Swatinem/rust-cache@v2
- run: rm Cargo.lock && cargo update
- run: cargo test
test_linux_address_sanitizer:
address_sanitizer:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
submodules: true
- uses: ./.github/actions/setup-rust
with:
version: nightly
- run: sudo apt-get update && sudo apt-get install -y llvm
- run: cargo test --tests --target x86_64-unknown-linux-gnu --workspace --exclude pyoxigraph --exclude oxigraph-testsuite --exclude oxigraph-cli
- run: rustup update && rustup override set nightly
- run: sudo apt install -y llvm
- uses: Swatinem/rust-cache@v2
- run: cargo test --tests --target x86_64-unknown-linux-gnu --workspace --exclude pyoxigraph --exclude oxigraph_testsuite --exclude oxigraph_server
env:
RUST_BACKTRACE: 1
RUSTFLAGS: -Z sanitizer=address
test_linux_dynamic_linking:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
- uses: actions/cache@v4
with:
path: rocksdb
key: ${{ runner.os }}-rocksdb-8.0.0
- run: bash .github/workflows/install_rocksdb.sh
- run: cargo test --tests --features oxrocksdb-sys/pkg-config
test_windows:
runs-on: windows-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
submodules: true
- uses: ./.github/actions/setup-rust
- run: rustup update
- uses: Swatinem/rust-cache@v2
- run: Remove-Item -LiteralPath "C:\msys64\" -Force -Recurse
- run: cargo test
- run: cargo test --all-features
env:
RUST_BACKTRACE: 1
test_wasi:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
submodules: true
- uses: ./.github/actions/setup-rust
with:
target: wasm32-wasi
- uses: taiki-e/install-action@v2
with: { tool: "wasmtime,cargo-wasi" }
- run: cargo wasi test --workspace --exclude oxigraph-js --exclude oxigraph-cli --exclude oxigraph-testsuite --exclude oxrocksdb-sys --exclude pyoxigraph
- run: rustup update && rustup target add wasm32-wasi
- uses: Swatinem/rust-cache@v2
- uses: taiki-e/install-action@wasmtime
- run: cargo install cargo-wasi || true
- run: cargo wasi test --workspace --exclude oxigraph_js --exclude oxigraph_server --exclude oxigraph_testsuite --exclude oxrocksdb-sys --exclude pyoxigraph
env:
RUST_BACKTRACE: 1
rustdoc:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
submodules: true
- uses: ./.github/actions/setup-rust
- run: rustup update
- uses: Swatinem/rust-cache@v2
- run: cargo doc --all-features
working-directory: ./lib
rustdoc_msrv:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
version: 1.76.0
- run: cargo doc
submodules: true
- run: rustup update && rustup override set 1.60.0
- uses: Swatinem/rust-cache@v2
- run: cargo doc --all-features
working-directory: ./lib
env:
RUSTDOCFLAGS: -D warnings
js:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/setup-rust
- uses: taiki-e/install-action@v2
with: { tool: wasm-pack }
- uses: actions/setup-node@v4
- uses: actions/checkout@v3
- run: rustup update
- uses: Swatinem/rust-cache@v2
- run: cargo install wasm-pack || true
- uses: actions/setup-node@v3
with:
node-version: 18
node-version: 16
cache: npm
cache-dependency-path: "js/package.json"
- run: npm install
@ -279,72 +228,48 @@ jobs:
python:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
submodules: true
- uses: ./.github/actions/setup-rust
- uses: actions/setup-python@v5
- run: rustup update
- uses: Swatinem/rust-cache@v2
- uses: actions/setup-python@v4
with:
python-version: "3.12"
python-version: "3.10"
cache: pip
cache-dependency-path: '**/requirements.dev.txt'
- run: pip install -r python/requirements.dev.txt
- run: python -m black --check --diff --color .
working-directory: ./python
- run: maturin build -m python/Cargo.toml
- run: pip install --no-index --find-links=target/wheels/ pyoxigraph
- run: rm -r target/wheels
- run: python -m unittest
working-directory: ./python/tests
- run: sphinx-build -M doctest . build
working-directory: ./python/docs
- run: sphinx-build -M html . build
working-directory: ./python/docs
- run: python generate_stubs.py pyoxigraph pyoxigraph.pyi --ruff
- run: python generate_stubs.py pyoxigraph pyoxigraph.pyi --black
working-directory: ./python
- run: python -m mypy.stubtest pyoxigraph --allowlist=mypy_allowlist.txt
working-directory: ./python
- run: python -m mypy generate_stubs.py tests --strict
working-directory: ./python
- run: python -m ruff format --check .
working-directory: ./python
- run: python -m ruff check --output-format=github .
working-directory: ./python
- run: sphinx-lint docs
- run: python -m ruff check .
working-directory: ./python
python_msv:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
submodules: true
- name: Read MSRV from Cargo.toml
id: metadata
run: echo "rust-version=$(sed -ne 's/rust-version *= *\"\(.*\)\"/\1/p' Cargo.toml)" >> $GITHUB_OUTPUT
- uses: ./.github/actions/setup-rust
- run: rustup update
- uses: Swatinem/rust-cache@v2
- uses: actions/setup-python@v4
with:
version: ${{ steps.metadata.outputs.rust-version }}
- run: rustup toolchain install nightly
- uses: actions/setup-python@v5
with:
python-version: "3.8"
cache: pip
cache-dependency-path: '**/requirements.dev.txt'
- run: pip install -r python/requirements.dev.txt
- run: rm Cargo.lock && cargo +nightly update -Z direct-minimal-versions
- run: maturin build -m python/Cargo.toml
- run: pip install --no-index --find-links=target/wheels/ pyoxigraph
- run: rm -r target/wheels
- run: python -m unittest
working-directory: ./python/tests
python_pypy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
- uses: actions/setup-python@v5
with:
python-version: "pypy3.10"
python-version: "3.7"
cache: pip
cache-dependency-path: '**/requirements.dev.txt'
- run: pip install -r python/requirements.dev.txt
@ -357,13 +282,14 @@ jobs:
python_windows:
runs-on: windows-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
submodules: true
- uses: ./.github/actions/setup-rust
- uses: actions/setup-python@v5
- run: rustup update
- uses: Swatinem/rust-cache@v2
- uses: actions/setup-python@v4
with:
python-version: "3.12"
python-version: "3.10"
cache: pip
cache-dependency-path: '**/requirements.dev.txt'
- run: pip install "maturin~=1.0"
@ -373,55 +299,28 @@ jobs:
- run: python -m unittest
working-directory: ./python/tests
python_pyodide:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: actions/setup-python@v5
with:
python-version: "3.11"
cache: pip
cache-dependency-path: '**/requirements.dev.txt'
- uses: ./.github/actions/setup-rust
with:
version: nightly
target: wasm32-unknown-emscripten
- run: |
pip install pyodide-build
echo EMSCRIPTEN_VERSION=$(pyodide config get emscripten_version) >> $GITHUB_ENV
- uses: mymindstorm/setup-emsdk@v13
with:
version: ${{ env.EMSCRIPTEN_VERSION }}
- run: pyodide build
working-directory: ./python
- run: |
pyodide venv venv
source venv/bin/activate
pip install --no-index --find-links=../dist/ pyoxigraph
python -m unittest
working-directory: ./python/tests
typos:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: taiki-e/install-action@v2
with: { tool: typos-cli }
- uses: actions/checkout@v3
- uses: Swatinem/rust-cache@v2
- run: cargo install typos-cli || true
- run: typos
clang_fmt:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- run: sudo apt-get update && sudo apt-get install -y clang-format
- uses: actions/checkout@v3
- run: sudo apt install -y clang-format
- run: clang-format --Werror --dry-run oxrocksdb-sys/api/*
fuzz_changes:
if: github.event_name == 'pull_request'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- uses: google/clusterfuzzlite/actions/build_fuzzers@v1
with:
language: rust
@ -431,7 +330,7 @@ jobs:
- uses: google/clusterfuzzlite/actions/run_fuzzers@v1
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
fuzz-seconds: 900
fuzz-seconds: 600
mode: code-change
sanitizer: address
minimize-crashes: true
@ -442,6 +341,9 @@ jobs:
if: github.event_name != 'pull_request'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- uses: google/clusterfuzzlite/actions/build_fuzzers@v1
with:
language: rust
@ -450,7 +352,7 @@ jobs:
- uses: google/clusterfuzzlite/actions/run_fuzzers@v1
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
fuzz-seconds: 7200
fuzz-seconds: 3600
mode: batch
sanitizer: address
minimize-crashes: true
@ -485,52 +387,12 @@ jobs:
shellcheck:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- run: sudo apt-get update && sudo apt-get install -y shellcheck
- uses: actions/checkout@v3
- run: sudo apt install -y shellcheck
- run: git grep -l '^#\( *shellcheck \|!\(/bin/\|/usr/bin/env \)\(sh\|bash\|dash\|ksh\)\)' | xargs shellcheck
spec_links:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- run: python lints/test_spec_links.py
debian_compatibility:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/setup-rust
- run: python lints/test_debian_compatibility.py
codspeed:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
- uses: taiki-e/install-action@v2
with: { tool: cargo-codspeed }
- run: cargo codspeed build -p oxigraph --features http-client-native-tls
- run: cargo codspeed build -p oxigraph-testsuite
- uses: CodSpeedHQ/action@v2
with:
run: cargo codspeed run
token: ${{ secrets.CODSPEED_TOKEN }}
codecov:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
- uses: taiki-e/install-action@v2
with: { tool: cargo-llvm-cov }
- run: cargo llvm-cov --codecov --output-path codecov.json
- uses: codecov/codecov-action@v4
with:
files: codecov.json
flags: rust
fail_ci_if_error: true
token: ${{ secrets.CODECOV_TOKEN }}
- uses: actions/checkout@v3
- run: python test_spec_links.py

7
.gitmodules vendored

@ -10,10 +10,3 @@
[submodule "oxrocksdb-sys/lz4"]
path = oxrocksdb-sys/lz4
url = https://github.com/lz4/lz4.git
[submodule "testsuite/N3"]
path = testsuite/N3
url = https://github.com/w3c/N3.git
branch = master
[submodule "testsuite/rdf-canon"]
path = testsuite/rdf-canon
url = https://github.com/w3c/rdf-canon.git

@ -7,7 +7,7 @@ build:
os: "ubuntu-22.04"
tools:
python: "3"
rust: "1.70"
rust: "1.64"
apt_packages:
- clang

@ -1,123 +1,3 @@
## [0.4.0-alpha.6] - 2024-03-25
### Changed
- Fixes compatibility with OxIRI 0.2.3.
## [0.4.0-alpha.5] - 2024-03-23
### Added
- Python: `Dataset` class
### Changed
- Rust: `Dataset::canonicalize` and `Graph::canonicalize` takes for input a `CanonicalizationAlgorithm` to set which algorithm to use.
- Upgrades RocksDB to 9.0.0
- JS: Drops NodeJS 12-16 and older web browsers (Edge before Chromium...) support.
## [0.4.0-alpha.4] - 2024-03-07
### Added
- Rust: `From<spargebra::Update` on `Update`.
- `sparesults`: Tokio Async readers for SPARQL query results (XML, JSON and TSV).
- `oxrdf`: `Term::from_terms` constructor.
- JS: options to set query and update base IRI and query union default graph.
## Changed
- Uses RocksDB atomic multi-columns flush.
- Rust: RocksDB is optional but enabled by default.
- Upgrades RocksDB to 8.11.3.
- Rust: `oxigraph` crate has been moved to `lib/oxigraph`.
- Rust: `QueryResults::write` returns the `Write` impl and not `()`.
- Rust: use `thierror` for errors.
- Rust: rename `oxrdfio::{Parse,Syntax}Error` to `oxrdfio::Rdf{Parse,Syntax}Error`,
`oxrdfxml::{Parse,Syntax}Error` to `oxrdfxml::RdfXml{Parse,Syntax}Error`,
`oxttl::{Parse,Syntax}Error` to `oxttl::Turtle{Parse,Syntax}Error`,
`sparesults::{Parse,Syntax}Error` to `sparesults::QueryResults{Parse,Syntax}Error` and
`spargebra::SyntaxError` to `spargebra::SparqlSyntaxError`.
## [0.4.0-alpha.3] - 2024-01-25
### Added
- `oxttl`: expose base IRIs.
- `oxttl`: allows to inject prefixes for serialization.
- `oxrdf`: `vocab::geosparql::WKT_LITERAL`.
### Changed
- Turtle: Fixes parsing bug with escaped dot at the end of local name.
- `oxttl`: Changes `prefixes` getter return type.
- JS: simplify build.
- Python: uses rustls by default all platforms that are not Windows/macOS/iOS/WASM.
- Strips debug info of the Rust std library in release build.
## [0.4.0-alpha.2] - 2024-01-08
### Added
- i686 linux support
### Changed
- Docker: fixes Docker image Glib version error.
- Docker: tags now use semver e.g. `0.3.22` and not `v0.3.22`. Preversions are also not tagged `latest` anymore.
- Python: `QuerySolution` is now thread safe.
## [0.4.0-alpha.1] - 2024-01-03
### Added
- `sparopt` crate: A new still quite naive query optimizer.
- `oxttl` crate: A N-Triples/N-Quads/Turtle/TriG/N3 parser and serializer compatible with Tokio.
- `oxrdfxml` crate: A RDF/XML parser and serializer compatible with Tokio.
- `oxrdfio` crate: A stand-alone crate with oxigraph I/O related APIs.
- Rust: SPARQL results I/O is now exposed in the `oxigraph` crate (`oxigraph::sparql::results` module).
- Rust: It is now possible to dynamically link rocksdb with the `rocksdb-pkg-config` feature.
- Python: error location is now included in some `SyntaxError` exceptions.
- Python: the file type can be guessed from the file path extension during parsing and serialization.
- Python: the serialization method returns a `bytes` value if no output-related argument is given.
- Python: SPARQL query results I/O is now exposed (`parse_query_results` function and `.serialize` method).
- Python: `RdfFormat` and `QueryResultsFormat` enum to encode supported formats.
- CLI: a `convert` command to convert RDF file between different formats.
### Removed
- Rust: automated flush at the end of serialization. This should be done explicitly now.
- oxsdatatypes: Deprecated methods.
- Python: 3.7 and Musl linux 1.1 support.
- Python: `GraphName.value`.
### Changed
- SPARQL: a digit after `.` is now required for `xsd:decimal`.
- SPARQL: calendar subtraction returns `xsd:dayTimeDuration` and not `xsd:duration`.
- SPARQL: Unicode escapes (`\u` and `\U`) are now only supported in IRIs and strings and not everywhere.
- Literal serialization now produces canonical N-Triples according to the RDF 1.2 and RDF Dataset Canonicalization drafts
- Rust: MSRV is now 1.70.
- Rust Makes `GraphName` implement `Default`.
- Rust: `wasm32-unknown-unknown` does not assumes JS platform by default. Enable the `js` feature for that.
- Rust: Parsers take `Read` and not `BufRead` for input.
- Rust: `GraphFormat` and `DatasetFormat` have been merged into `RdfFormat`.
- Rust: `GraphParser` and `DatasetParser` have been merged into `RdfParser`.
- Rust: `GraphSerializer` and `DatasetSerializer` have been merged into `RdfSerializer`.
- Rust: query results are now `Send` and `Sync`.
- Rust: `Store.load_graph` and `Store.load_dataset` have been merged into a `load_from_read` method.
- Rust: `Store.dump_graph` and `Store.dump_dataset` have been renamed to `dump_graph_to_write` and `dump_to_write`.
- Rust: `BulkLoader.set_*` methods have been renamed to `BulkLoader.with_*`.
- oxsdatatypes: pass by-values instead of by-reference parameters when relevant.
- oxsdatatypes: error types have been redesigned.
- oxsdatatypes: return an error when building not serializable duration (year-month and day-time of opposite signs).
- sparesults: renames some methods to move closer to the new oxrdfio crate.
- Python: raise `OSError` instead of `IOError` on OS errors.
- Python: the `mime_type` parameter have been renamed to `format`.
- Python: boolean SPARQL results are now encoded with a `QueryBoolean` class and not a simple `bool`.
- Python: a `path` parameter has been added to all I/O method to read from a file.
The existing `input` parameter now consider `str` values to be a serialization to parse.
- JS: the `mime_type` parameter have been renamed to `format`.
- CLI: the `oxigraph_server` binary has been renamed to `oxigraph`.
- CLI: the `--location` argument is now part of sub-commands where it is relevant.
`oxigraph_server --location foo serve` is not possible anymore.
One need to write `oxigraph serve --location foo`.
- CLI: is is now possible to upload gzip encoded files to the HTTP API with the `Content-Encoding: gzip` header.
## [0.3.22] - 2023-11-29
### Changed
@ -441,7 +321,7 @@
### Added
- [RDF-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html) support. `Triple` is now a possible `Term`. Serialization formats and SPARQL support have been updated to match the [latest version of the specification draft](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html).
- Fast data bulk load with the `Store` `bulk_load_dataset` and `bulk_load_graph` methods and a special command-line option of the server.
- Fast data bulk load with the `Store` `bulk_load_dataset` and `bulk_load_graph` methods and a special command line option of the server.
- It is now possible to quickly backup the database using the `backup` method.
- Rust: `*Syntax::from_extension` to easy guess a graph/dataset/sparql result format from a file extension.
- Rust: Custom SPARQL functions are now supported using `QueryOptions::with_custom_function`.
@ -457,7 +337,7 @@
- SPARQL: The parser now validates more carefully the inputs following the SPARQL specification and test suite.
- SPARQL: Variable scoping was buggy with "FILTER EXISTS". It is now fixed.
- Rust: RDF model, SPARQL parser and SPARQL result parsers have been moved to stand-alone reusable libraries.
- Rust: HTTPS is not supported by default with the `http_client` option. You need to enable the `native-tls` or the `rustls` feature of the `oxhttp` crate to enable a TLS layer.
- Rust: HTTPS is not supported by default with the `http_client` option. You need to enable the `native-tls` or the `rustls` feature of the `oxhttp` crate to enable a TSL layer.
- Rust: The error types have been cleaned.
Most of the `Store` methods now return a `StorageError` that is more descriptive than the previous `std::io::Error`.
The new error type all implements `Into<std::io::Error>` for easy conversion.
@ -465,7 +345,7 @@
It is The used type of the `subject` field of the `Triple` and `Quad` structs.
- Rust: The SPARQL algebra is not anymore publicly exposed in the `oxigraph` crate. The new `oxalgebra` crate exposes it.
- Rust: `UpdateOptions` API have been rewritten. It can now be built using `From<QueryOptions>` or `Default`.
- Server: The command-line API has been redesign. See the [server README](server/README.md) for more information.
- Server: The command line API has been redesign. See the [server README](server/README.md) for more information.
- Server: The HTTP implementation is now provided by [`oxhttp`](https://github.com/oxigraph/oxhttp).
- Server: The HTTP response bodies are now generated on the fly instead of being buffered.
- Python: The `SledStore` and `MemoryStore` classes have been removed in favor of the `Store` class.

1541
Cargo.lock generated

File diff suppressed because it is too large Load Diff

@ -1,272 +1,21 @@
[workspace]
members = [
"js",
"lib/oxigraph",
"lib",
"lib/oxrdf",
"lib/oxrdfio",
"lib/oxrdfxml",
"lib/oxsdatatypes",
"lib/oxttl",
"lib/sparesults",
"lib/spargebra",
"lib/sparopt",
"lib/sparesults",
"lib/sparql-smith",
"python",
"server",
"testsuite"
]
resolver = "2"
[workspace.package]
version = "0.4.0-alpha.7-dev"
authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license = "MIT OR Apache-2.0"
edition = "2021"
rust-version = "1.70"
[workspace.dependencies]
rocksdb = {git = "https://git.nextgraph.org/NextGraph/rust-rocksdb.git", branch = "master", features = [ ] }
serde = { version = "1.0.142", features = ["derive"] }
anyhow = "1.0.72"
arbitrary = "1.3"
assert_cmd = "2.0"
assert_fs = "1.0"
bindgen = ">=0.60, <0.70"
cc = "1.0.73"
clap = "4.0"
codspeed-criterion-compat = "2.3.3"
console_error_panic_hook = "0.1.7"
digest = "0.10"
flate2 = "1.0"
getrandom = "0.2.8"
hex = "0.4"
js-sys = "0.3.60"
json-event-parser = "0.2.0-alpha.2"
md-5 = "0.10"
memchr = "2.5"
oxilangtag = "0.1"
oxiri = "0.2.3"
peg = "0.8"
pkg-config = "0.3.25"
predicates = ">=2.0, <4.0"
pyo3 = "0.21.0"
quick-xml = ">=0.29, <0.32"
rand = "0.8"
rayon-core = "1.11"
regex = "1.7"
sha1 = "0.10"
sha2 = "0.10"
siphasher = ">=0.3, <2.0"
text-diff = "0.4"
thiserror = "1.0.50"
time = "0.3"
tokio = "1.29"
url = "2.4"
wasm-bindgen = "0.2.83"
zstd = ">=0.12, <0.14"
# Internal dependencies
oxigraph = { version = "=0.4.0-alpha.7-dev", path = "lib/oxigraph" }
oxrdf = { version = "=0.2.0-alpha.4", path = "lib/oxrdf" }
oxrdfio = { version = "=0.1.0-alpha.5", path = "lib/oxrdfio" }
oxrdfxml = { version = "=0.1.0-alpha.5", path = "lib/oxrdfxml" }
oxsdatatypes = { version = "=0.2.0-alpha.1", path = "lib/oxsdatatypes" }
oxttl = { version = "=0.1.0-alpha.5", path = "lib/oxttl" }
sparesults = { version = "=0.2.0-alpha.4", path = "lib/sparesults" }
spargebra = { version = "=0.3.0-alpha.4", path = "lib/spargebra" }
sparopt = { version = "=0.1.0-alpha.5-dev", path = "lib/sparopt" }
[workspace.lints.rust]
absolute_paths_not_starting_with_crate = "warn"
elided_lifetimes_in_paths = "warn"
explicit_outlives_requirements = "warn"
let_underscore_drop = "warn"
macro_use_extern_crate = "warn"
# TODO missing_docs = "warn"
trivial_casts = "warn"
trivial_numeric_casts = "warn"
unsafe_code = "warn"
unused_import_braces = "warn"
unused_lifetimes = "warn"
unused_macro_rules = "warn"
unused_qualifications = "warn"
[workspace.lints.clippy]
allow_attributes = "warn"
allow_attributes_without_reason = "warn"
as_underscore = "warn"
assertions_on_result_states = "warn"
bool_to_int_with_if = "warn"
borrow_as_ptr = "warn"
case_sensitive_file_extension_comparisons = "warn"
cast_lossless = "warn"
cast_possible_truncation = "warn"
cast_possible_wrap = "warn"
cast_precision_loss = "warn"
cast_ptr_alignment = "warn"
cast_sign_loss = "warn"
checked_conversions = "warn"
clone_on_ref_ptr = "warn"
cloned_instead_of_copied = "warn"
copy_iterator = "warn"
create_dir = "warn"
dbg_macro = "warn"
decimal_literal_representation = "warn"
default_trait_access = "warn"
default_union_representation = "warn"
deref_by_slicing = "warn"
disallowed_script_idents = "warn"
doc_link_with_quotes = "warn"
empty_drop = "warn"
empty_enum = "warn"
empty_structs_with_brackets = "warn"
enum_glob_use = "warn"
error_impl_error = "warn"
exit = "warn"
expect_used = "warn"
expl_impl_clone_on_copy = "warn"
explicit_deref_methods = "warn"
explicit_into_iter_loop = "warn"
explicit_iter_loop = "warn"
filetype_is_file = "warn"
filter_map_next = "warn"
flat_map_option = "warn"
fn_params_excessive_bools = "warn"
fn_to_numeric_cast_any = "warn"
format_push_string = "warn"
from_iter_instead_of_collect = "warn"
get_unwrap = "warn"
host_endian_bytes = "warn"
if_not_else = "warn"
if_then_some_else_none = "warn"
ignored_unit_patterns = "warn"
implicit_clone = "warn"
implicit_hasher = "warn"
inconsistent_struct_constructor = "warn"
index_refutable_slice = "warn"
inefficient_to_string = "warn"
infinite_loop = "warn"
inline_always = "warn"
inline_asm_x86_att_syntax = "warn"
inline_asm_x86_intel_syntax = "warn"
into_iter_without_iter = "warn"
invalid_upcast_comparisons = "warn"
items_after_statements = "warn"
iter_not_returning_iterator = "warn"
iter_without_into_iter = "warn"
large_digit_groups = "warn"
large_futures = "warn"
large_include_file = "warn"
large_stack_arrays = "warn"
large_types_passed_by_value = "warn"
let_underscore_must_use = "warn"
let_underscore_untyped = "warn"
linkedlist = "warn"
lossy_float_literal = "warn"
macro_use_imports = "warn"
manual_assert = "warn"
manual_instant_elapsed = "warn"
manual_let_else = "warn"
manual_ok_or = "warn"
manual_string_new = "warn"
many_single_char_names = "warn"
map_unwrap_or = "warn"
match_bool = "warn"
match_on_vec_items = "warn"
match_same_arms = "warn"
match_wild_err_arm = "warn"
match_wildcard_for_single_variants = "warn"
maybe_infinite_iter = "warn"
mem_forget = "warn"
mismatching_type_param_order = "warn"
missing_assert_message = "warn"
missing_asserts_for_indexing = "warn"
missing_fields_in_debug = "warn"
multiple_inherent_impl = "warn"
mut_mut = "warn"
mutex_atomic = "warn"
naive_bytecount = "warn"
needless_bitwise_bool = "warn"
needless_continue = "warn"
needless_for_each = "warn"
needless_pass_by_value = "warn"
needless_raw_string_hashes = "warn"
needless_raw_strings = "warn"
negative_feature_names = "warn"
no_effect_underscore_binding = "warn"
no_mangle_with_rust_abi = "warn"
non_ascii_literal = "warn"
panic = "warn"
panic_in_result_fn = "warn"
partial_pub_fields = "warn"
print_stderr = "warn"
print_stdout = "warn"
ptr_as_ptr = "warn"
ptr_cast_constness = "warn"
pub_without_shorthand = "warn"
range_minus_one = "warn"
range_plus_one = "warn"
rc_buffer = "warn"
rc_mutex = "warn"
redundant_closure_for_method_calls = "warn"
redundant_else = "warn"
redundant_feature_names = "warn"
redundant_type_annotations = "warn"
ref_binding_to_reference = "warn"
ref_option_ref = "warn"
ref_patterns = "warn"
rest_pat_in_fully_bound_structs = "warn"
return_self_not_must_use = "warn"
same_functions_in_if_condition = "warn"
same_name_method = "warn"
semicolon_inside_block = "warn"
shadow_same = "warn"
should_panic_without_expect = "warn"
single_match_else = "warn"
stable_sort_primitive = "warn"
str_to_string = "warn"
string_add = "warn"
string_add_assign = "warn"
string_lit_chars_any = "warn"
string_to_string = "warn"
struct_excessive_bools = "warn"
struct_field_names = "warn"
suspicious_xor_used_as_pow = "warn"
tests_outside_test_module = "warn"
todo = "warn"
transmute_ptr_to_ptr = "warn"
trivially_copy_pass_by_ref = "warn"
try_err = "warn"
unchecked_duration_subtraction = "warn"
undocumented_unsafe_blocks = "warn"
unicode_not_nfc = "warn"
unimplemented = "warn"
uninlined_format_args = "warn"
unnecessary_box_returns = "warn"
unnecessary_join = "warn"
unnecessary_safety_comment = "warn"
unnecessary_safety_doc = "warn"
unnecessary_self_imports = "warn"
unnecessary_wraps = "warn"
unneeded_field_pattern = "warn"
unnested_or_patterns = "warn"
unreadable_literal = "warn"
unsafe_derive_deserialize = "warn"
unseparated_literal_suffix = "warn"
unused_async = "warn"
unused_self = "warn"
unwrap_in_result = "warn"
use_debug = "warn"
used_underscore_binding = "warn"
verbose_bit_mask = "warn"
verbose_file_reads = "warn"
wildcard_dependencies = "warn"
zero_sized_map_values = "warn"
[profile.release]
lto = true
codegen-units = 1
strip = "debuginfo"
[profile.release.package.oxigraph-js]
codegen-units = 1
[profile.release.package.oxigraph_js]
opt-level = "z"
strip = "debuginfo"

@ -4,10 +4,9 @@
[![Released API docs](https://docs.rs/oxigraph/badge.svg)](https://docs.rs/oxigraph)
[![PyPI](https://img.shields.io/pypi/v/pyoxigraph)](https://pypi.org/project/pyoxigraph/)
[![npm](https://img.shields.io/npm/v/oxigraph)](https://www.npmjs.com/package/oxigraph)
[![tests status](https://github.com/oxigraph/oxigraph/actions/workflows/tests.yml/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![artifacts status](https://github.com/oxigraph/oxigraph/actions/workflows/artifacts.yml/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![dependency status](https://deps.rs/repo/github/oxigraph/oxigraph/status.svg)](https://deps.rs/repo/github/oxigraph/oxigraph)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
[![Twitter URL](https://img.shields.io/twitter/url?style=social&url=https%3A%2F%2Ftwitter.com%2Foxigraph)](https://twitter.com/oxigraph)
Oxigraph is a graph database implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard.
@ -20,11 +19,7 @@ Oxigraph is in heavy development and SPARQL query evaluation has not been optimi
The development roadmap is using [GitHub milestones](https://github.com/oxigraph/oxigraph/milestones?direction=desc&sort=completeness&state=open).
Oxigraph internal design [is described on the wiki](https://github.com/oxigraph/oxigraph/wiki/Architecture).
Oxigraph implements the following specifications:
- [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/), [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/), and [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/).
- [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/), and [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) RDF serialization formats for both data ingestion and retrieval.
- [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/).
The future 0.4 release is currently in development in the [next branch](https://github.com/oxigraph/oxigraph/tree/next).
It is split into multiple parts:
@ -34,22 +29,14 @@ It is split into multiple parts:
- [`pyoxigraph` that exposes Oxigraph to the Python world](https://pyoxigraph.readthedocs.io/). Its source code is in the `python` directory. [![PyPI](https://img.shields.io/pypi/v/pyoxigraph)](https://pypi.org/project/pyoxigraph/)
- [JavaScript bindings for Oxigraph](https://www.npmjs.com/package/oxigraph). WebAssembly is used to package Oxigraph into a NodeJS compatible NPM package. Its source code is in the `js` directory.
[![npm](https://img.shields.io/npm/v/oxigraph)](https://www.npmjs.com/package/oxigraph)
- [Oxigraph binary](https://crates.io/crates/oxigraph-cli) that provides a standalone command-line tool allowing to manipulate RDF data and spawn a a web server implementing the [SPARQL 1.1 Protocol](https://www.w3.org/TR/sparql11-protocol/) and the [SPARQL 1.1 Graph Store Protocol](https://www.w3.org/TR/sparql11-http-rdf-update/). Its source code is in the `cli` directory.
Note that it was previously named [Oxigraph server](https://crates.io/crates/oxigraph-server).
[![Latest Version](https://img.shields.io/crates/v/oxigraph-cli.svg)](https://crates.io/crates/oxigraph-cli)
Also, some parts of Oxigraph are available as standalone Rust crates:
* [`oxrdf`](https://crates.io/crates/oxrdf), datastructures encoding RDF basic concepts (the [`oxigraph::model`](crate::model) module).
* [`oxrdfio`](https://crates.io/crates/oxrdfio), a unified parser and serializer API for RDF formats (the [`oxigraph::io`](crate::io) module). It itself relies on:
* [`oxttl`](https://crates.io/crates/oxttl), N-Triple, N-Quad, Turtle, TriG and N3 parsing and serialization.
* [`oxrdfxml`](https://crates.io/crates/oxrdfxml), RDF/XML parsing and serialization.
* [`spargebra`](https://crates.io/crates/spargebra), a SPARQL parser.
* [`sparesults`](https://crates.io/crates/sparesults), parsers and serializers for SPARQL result formats.
* [`sparopt`](https://crates.io/crates/sparesults), a SPARQL optimizer.
* [`oxsdatatypes`](https://crates.io/crates/oxsdatatypes), an implementation of some XML Schema datatypes.
The library layers in Oxigraph. The elements above depend on the elements below:
![Oxigraph libraries architecture diagram](./docs/arch-diagram.svg)
- [Oxigraph server](https://crates.io/crates/oxigraph_server) that provides a standalone binary of a web server implementing the [SPARQL 1.1 Protocol](https://www.w3.org/TR/sparql11-protocol/) and the [SPARQL 1.1 Graph Store Protocol](https://www.w3.org/TR/sparql11-http-rdf-update/). Its source code is in the `server` directory.
[![Latest Version](https://img.shields.io/crates/v/oxigraph_server.svg)](https://crates.io/crates/oxigraph_server)
Oxigraph implements the following specifications:
- [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/), [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/), and [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/).
- [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/), and [RDF XML](https://www.w3.org/TR/rdf-syntax-grammar/) RDF serialization formats for both data ingestion and retrieval using the [Rio library](https://github.com/oxigraph/rio).
- [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/).
A preliminary benchmark [is provided](bench/README.md). There is also [a document describing Oxigraph technical architecture](https://github.com/oxigraph/oxigraph/wiki/Architecture).
@ -85,9 +72,8 @@ Unless you explicitly state otherwise, any contribution intentionally submitted
## Sponsors
* [RelationLabs](https://relationlabs.ai/) that is building [Relation-Graph](https://github.com/relationlabs/Relation-Graph), a SPARQL database module for the [Substrate blockchain platform](https://substrate.io/) based on Oxigraph.
* [Field 33](https://field33.com) that was building [an ontology management platform](https://plow.pm/).
* [Magnus Bakken](https://github.com/magbak) who is building [Data Treehouse](https://www.data-treehouse.com/), a time-series + RDF datalake platform, and [chrontext](https://github.com/magbak/chrontext), a SPARQL query endpoint on top of joint RDF and time series databases.
* [DeciSym.AI](https://www.decisym.ai/) a cyber security consulting company providing RDF-based software.
* [Field 33](https://field33.com) that is building [an ontology management platform](https://plow.pm/).
* [Magnus Bakken](https://github.com/magbak) who is building [chrontext](https://github.com/magbak/chrontext), providing a SPARQL query endpoint on top of joint RDF and time series databases.
* [ACE IoT Solutions](https://aceiotsolutions.com/), a building IOT platform.
* [Albin Larsson](https://byabbe.se/) who is building [GovDirectory](https://www.govdirectory.org/), a directory of public agencies based on Wikidata.

@ -6,10 +6,10 @@ PARALLELISM=16
set -eu
cd bsbm-tools
./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}"
cargo build --release --manifest-path="../../cli/Cargo.toml"
VERSION=$(./../../target/release/oxigraph --version | sed 's/oxigraph //g')
./../../target/release/oxigraph --location oxigraph_data load --file "explore-${DATASET_SIZE}.nt"
./../../target/release/oxigraph --location oxigraph_data serve --bind 127.0.0.1:7878 &
cargo build --release --manifest-path="../../server/Cargo.toml"
VERSION=$(./../../target/release/oxigraph_server --version | sed 's/oxigraph_server //g')
./../../target/release/oxigraph_server --location oxigraph_data load --file "explore-${DATASET_SIZE}.nt"
./../../target/release/oxigraph_server --location oxigraph_data serve --bind 127.0.0.1:7878 &
sleep 1
./testdriver -mt ${PARALLELISM} -ucf usecases/explore/sparql.txt -o "../bsbm.explore.oxigraph.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://127.0.0.1:7878/query
./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.oxigraph.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://127.0.0.1:7878/query -u http://127.0.0.1:7878/update -udataset "explore-update-${DATASET_SIZE}.nt"

@ -1,4 +1,4 @@
avoid-breaking-exported-api = false
avoid-breaking-exported-api = true
cognitive-complexity-threshold = 50
too-many-arguments-threshold = 10
type-complexity-threshold = 500

@ -1,18 +1,12 @@
[advisories]
version = 2
ignore = ["RUSTSEC-2018-0015"]
[licenses]
version = 2
unlicensed = "deny"
allow = [
"Apache-2.0",
"Apache-2.0 WITH LLVM-exception",
"BSD-3-Clause",
"ISC",
"MIT",
"OpenSSL",
"Unicode-DFS-2016"
"Apache-2.0",
"Apache-2.0 WITH LLVM-exception"
]
allow-osi-fsf-free = "either"
default = "deny"
[[licenses.clarify]]
name = "ring"

@ -1,120 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="624" height="384" class="svgbob">
<style>.svgbob line, .svgbob path, .svgbob circle, .svgbob rect, .svgbob polygon {
stroke: black;
stroke-width: 2;
stroke-opacity: 1;
fill-opacity: 1;
stroke-linecap: round;
stroke-linejoin: miter;
}
.svgbob text {
white-space: pre;
fill: black;
font-family: Iosevka Fixed, monospace;
font-size: 14px;
}
.svgbob rect.backdrop {
stroke: none;
fill: white;
}
.svgbob .broken {
stroke-dasharray: 8;
}
.svgbob .filled {
fill: black;
}
.svgbob .bg_filled {
fill: white;
stroke-width: 1;
}
.svgbob .nofill {
fill: white;
}
.svgbob .end_marked_arrow {
marker-end: url(#arrow);
}
.svgbob .start_marked_arrow {
marker-start: url(#arrow);
}
.svgbob .end_marked_diamond {
marker-end: url(#diamond);
}
.svgbob .start_marked_diamond {
marker-start: url(#diamond);
}
.svgbob .end_marked_circle {
marker-end: url(#circle);
}
.svgbob .start_marked_circle {
marker-start: url(#circle);
}
.svgbob .end_marked_open_circle {
marker-end: url(#open_circle);
}
.svgbob .start_marked_open_circle {
marker-start: url(#open_circle);
}
.svgbob .end_marked_big_open_circle {
marker-end: url(#big_open_circle);
}
.svgbob .start_marked_big_open_circle {
marker-start: url(#big_open_circle);
}<!--separator-->.svgbob .r{
fill: papayawhip;
}
.svgbob .p{
fill: lightyellow;
}
.svgbob .j{
fill: lightgreen;
}
</style>
<defs>
<marker id="arrow" viewBox="-2 -2 8 8" refX="4" refY="2" markerWidth="7" markerHeight="7"
orient="auto-start-reverse">
<polygon points="0,0 0,4 4,2 0,0"></polygon>
</marker>
<marker id="diamond" viewBox="-2 -2 8 8" refX="4" refY="2" markerWidth="7" markerHeight="7"
orient="auto-start-reverse">
<polygon points="0,2 2,0 4,2 2,4 0,2"></polygon>
</marker>
<marker id="circle" viewBox="0 0 8 8" refX="4" refY="4" markerWidth="7" markerHeight="7"
orient="auto-start-reverse">
<circle cx="4" cy="4" r="2" class="filled"></circle>
</marker>
<marker id="open_circle" viewBox="0 0 8 8" refX="4" refY="4" markerWidth="7" markerHeight="7"
orient="auto-start-reverse">
<circle cx="4" cy="4" r="2" class="bg_filled"></circle>
</marker>
<marker id="big_open_circle" viewBox="0 0 8 8" refX="4" refY="4" markerWidth="7" markerHeight="7"
orient="auto-start-reverse">
<circle cx="4" cy="4" r="3" class="bg_filled"></circle>
</marker>
</defs>
<rect class="backdrop" x="0" y="0" width="624" height="384"></rect>
<rect x="4" y="8" width="152" height="32" class="solid nofill r" rx="0"></rect>
<text x="18" y="28">oxigraph CLI</text>
<rect x="244" y="8" width="136" height="32" class="solid nofill p" rx="0"></rect>
<text x="258" y="28">pyoxigraph</text>
<rect x="468" y="8" width="144" height="32" class="solid nofill j" rx="0"></rect>
<text x="482" y="28">oxigraph</text>
<text x="554" y="28">JS</text>
<rect x="4" y="72" width="608" height="32" class="solid nofill r" rx="0"></rect>
<text x="18" y="92">oxigraph</text>
<rect x="68" y="136" width="232" height="32" class="solid nofill r" rx="0"></rect>
<text x="82" y="156">oxrdfio</text>
<rect x="348" y="136" width="112" height="32" class="solid nofill r" rx="0"></rect>
<text x="362" y="156">sparopt</text>
<rect x="68" y="200" width="96" height="32" class="solid nofill r" rx="0"></rect>
<text x="82" y="220">oxttl</text>
<rect x="180" y="200" width="120" height="32" class="solid nofill r" rx="0"></rect>
<text x="194" y="220">oxrdfxml</text>
<rect x="316" y="200" width="144" height="32" class="solid nofill r" rx="0"></rect>
<text x="330" y="220">spargebra</text>
<rect x="476" y="200" width="136" height="32" class="solid nofill r" rx="0"></rect>
<text x="490" y="220">sparesults</text>
<rect x="36" y="264" width="576" height="32" class="solid nofill r" rx="0"></rect>
<text x="50" y="284">oxrdf</text>
<rect x="4" y="328" width="152" height="32" class="solid nofill r" rx="0"></rect>
<text x="18" y="348">oxsdatatypes</text>
</svg>

Before

Width:  |  Height:  |  Size: 4.6 KiB

@ -1,35 +0,0 @@
+------------------+ +----------------+ +-----------------+
+ oxigraph CLI {r} + + pyoxigraph {p} + + oxigraph JS {j} +
+------------------+ +----------------+ +-----------------+
+---------------------------------------------------------------------------+
+ oxigraph (Rust) {r} +
+---------------------------------------------------------------------------+
+----------------------------+ +-------------+
+ oxrdfio {r} + + sparopt {r} +
+----------------------------+ +-------------+
+-----------+ +--------------+ +-----------------+ +----------------+
+ oxttl {r} + + oxrdfxml {r} + + spargebra {r} + + sparesults {r} +
+-----------+ +--------------+ +-----------------+ +----------------+
+-----------------------------------------------------------------------+
+ oxrdf {r} +
+-----------------------------------------------------------------------+
+------------------+
+ oxsdatatypes {r} +
+------------------+
# Legend:
r = {
fill: papayawhip;
}
p = {
fill: lightyellow;
}
j = {
fill: lightgreen;
}

@ -1,6 +1,7 @@
[package]
name = "oxigraph-fuzz"
version = "0.0.0"
authors = ["Automatically generated"]
publish = false
edition = "2021"
@ -8,15 +9,13 @@ edition = "2021"
cargo-fuzz = true
[dependencies]
anyhow = "1.0.72"
anyhow = "1"
lazy_static = "1"
libfuzzer-sys = "0.4"
oxigraph = { path = "../lib/oxigraph", default-features = false }
oxrdf = { path = "../lib/oxrdf", features = ["rdf-star"] }
oxrdfxml = { path = "../lib/oxrdfxml" }
oxttl = { path = "../lib/oxttl", features = ["rdf-star"] }
sparesults = { path = "../lib/sparesults", features = ["rdf-star"] }
spargebra = { path = "../lib/spargebra", features = ["rdf-star", "sep-0006"] }
sparesults = { path = "../lib/sparesults", features = ["rdf-star"] }
sparql-smith = { path = "../lib/sparql-smith", features = ["sep-0006"] }
oxigraph = { path = "../lib" }
[profile.release]
codegen-units = 1
@ -24,18 +23,6 @@ debug = true
[workspace]
[[bin]]
name = "nquads"
path = "fuzz_targets/nquads.rs"
[[bin]]
name = "n3"
path = "fuzz_targets/n3.rs"
[[bin]]
name = "rdf_xml"
path = "fuzz_targets/rdf_xml.rs"
[[bin]]
name = "sparql_eval"
path = "fuzz_targets/sparql_eval.rs"
@ -59,7 +46,3 @@ path = "fuzz_targets/sparql_results_xml.rs"
[[bin]]
name = "sparql_results_tsv"
path = "fuzz_targets/sparql_results_tsv.rs"
[[bin]]
name = "trig"
path = "fuzz_targets/trig.rs"

@ -1,28 +0,0 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use oxttl::N3Parser;
fuzz_target!(|data: &[u8]| {
let mut quads = Vec::new();
let mut parser = N3Parser::new()
.with_base_iri("http://example.com/")
.unwrap()
.parse();
for chunk in data.split(|c| *c == 0xFF) {
parser.extend_from_slice(chunk);
while let Some(result) = parser.read_next() {
if let Ok(quad) = result {
quads.push(quad);
}
}
}
parser.end();
while let Some(result) = parser.read_next() {
if let Ok(quad) = result {
quads.push(quad);
}
}
assert!(parser.is_end());
//TODO: serialize
});

@ -1,84 +0,0 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use oxrdf::Quad;
use oxttl::{NQuadsParser, NQuadsSerializer};
fn parse<'a>(
chunks: impl IntoIterator<Item = &'a [u8]>,
unchecked: bool,
) -> (Vec<Quad>, Vec<String>) {
let mut quads = Vec::new();
let mut errors = Vec::new();
let mut parser = NQuadsParser::new().with_quoted_triples();
if unchecked {
parser = parser.unchecked();
}
let mut reader = parser.parse();
for chunk in chunks {
reader.extend_from_slice(chunk);
while let Some(result) = reader.read_next() {
match result {
Ok(quad) => quads.push(quad),
Err(error) => errors.push(error.to_string()),
}
}
}
reader.end();
while let Some(result) = reader.read_next() {
match result {
Ok(quad) => quads.push(quad),
Err(error) => errors.push(error.to_string()),
}
}
assert!(reader.is_end());
(quads, errors)
}
fuzz_target!(|data: &[u8]| {
// We parse with splitting
let (quads, errors) = parse(data.split(|c| *c == 0xFF), false);
// We parse without splitting
let (quads_without_split, errors_without_split) = parse(
[data
.iter()
.copied()
.filter(|c| *c != 0xFF)
.collect::<Vec<_>>()
.as_slice()],
false,
);
assert_eq!(quads, quads_without_split);
assert_eq!(errors, errors_without_split);
// We test also unchecked if valid
if errors.is_empty() {
let (quads_unchecked, errors_unchecked) = parse(data.split(|c| *c == 0xFF), true);
assert!(errors_unchecked.is_empty());
assert_eq!(quads, quads_unchecked);
}
// We serialize
let mut writer = NQuadsSerializer::new().serialize_to_write(Vec::new());
for quad in &quads {
writer.write_quad(quad).unwrap();
}
let new_serialization = writer.finish();
// We parse the serialization
let new_quads = NQuadsParser::new()
.with_quoted_triples()
.parse_read(new_serialization.as_slice())
.collect::<Result<Vec<_>, _>>()
.map_err(|e| {
format!(
"Error on {:?} from {quads:?} based on {:?}: {e}",
String::from_utf8_lossy(&new_serialization),
String::from_utf8_lossy(data)
)
})
.unwrap();
// We check the roundtrip has not changed anything
assert_eq!(new_quads, quads);
});

@ -1,35 +0,0 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use oxrdfxml::{RdfXmlParser, RdfXmlSerializer};
fuzz_target!(|data: &[u8]| {
// We parse
let triples = RdfXmlParser::new()
.parse_read(data)
.flatten()
.collect::<Vec<_>>();
// We serialize
let mut writer = RdfXmlSerializer::new().serialize_to_write(Vec::new());
for triple in &triples {
writer.write_triple(triple).unwrap();
}
let new_serialization = writer.finish().unwrap();
// We parse the serialization
let new_triples = RdfXmlParser::new()
.parse_read(new_serialization.as_slice())
.collect::<Result<Vec<_>, _>>()
.map_err(|e| {
format!(
"Error on {:?} from {triples:?} based on {:?}: {e}",
String::from_utf8_lossy(&new_serialization),
String::from_utf8_lossy(data)
)
})
.unwrap();
// We check the roundtrip has not changed anything
assert_eq!(new_triples, triples);
});

@ -1,26 +1,31 @@
#![no_main]
use lazy_static::lazy_static;
use libfuzzer_sys::fuzz_target;
use oxigraph::io::RdfFormat;
use oxigraph::io::DatasetFormat;
use oxigraph::sparql::{Query, QueryOptions, QueryResults, QuerySolutionIter};
use oxigraph::store::Store;
use std::sync::OnceLock;
fuzz_target!(|data: sparql_smith::Query| {
static STORE: OnceLock<Store> = OnceLock::new();
let store = STORE.get_or_init(|| {
lazy_static! {
static ref STORE: Store = {
let store = Store::new().unwrap();
store
.load_from_read(RdfFormat::TriG, sparql_smith::DATA_TRIG.as_bytes())
.load_dataset(
sparql_smith::DATA_TRIG.as_bytes(),
DatasetFormat::TriG,
None,
)
.unwrap();
store
});
};
}
fuzz_target!(|data: sparql_smith::Query| {
let query_str = data.to_string();
if let Ok(query) = Query::parse(&query_str, None) {
let options = QueryOptions::default();
let with_opt = store.query_opt(query.clone(), options.clone()).unwrap();
let without_opt = store
let with_opt = STORE.query_opt(query.clone(), options.clone()).unwrap();
let without_opt = STORE
.query_opt(query, options.without_optimizations())
.unwrap();
match (with_opt, without_opt) {
@ -47,7 +52,7 @@ fn query_solutions_key(iter: QuerySolutionIter, is_reduced: bool) -> String {
let mut b = t
.unwrap()
.iter()
.map(|(var, val)| format!("{var}: {val}"))
.map(|(var, val)| format!("{}: {}", var, val))
.collect::<Vec<_>>();
b.sort_unstable();
b.join(" ")

@ -3,5 +3,5 @@ use libfuzzer_sys::fuzz_target;
use spargebra::Query;
fuzz_target!(|data: &str| {
let _ = Query::parse(data, None);
Query::parse(data, None);
});

@ -3,4 +3,4 @@ use libfuzzer_sys::fuzz_target;
use oxigraph_fuzz::result_format::fuzz_result_format;
use sparesults::QueryResultsFormat;
fuzz_target!(|data: &[u8]| fuzz_result_format(QueryResultsFormat::Json, data));
fuzz_target!(|data: &[u8]| { fuzz_result_format(QueryResultsFormat::Json, data) });

@ -3,4 +3,4 @@ use libfuzzer_sys::fuzz_target;
use oxigraph_fuzz::result_format::fuzz_result_format;
use sparesults::QueryResultsFormat;
fuzz_target!(|data: &[u8]| fuzz_result_format(QueryResultsFormat::Tsv, data));
fuzz_target!(|data: &[u8]| { fuzz_result_format(QueryResultsFormat::Tsv, data) });

@ -3,4 +3,4 @@ use libfuzzer_sys::fuzz_target;
use oxigraph_fuzz::result_format::fuzz_result_format;
use sparesults::QueryResultsFormat;
fuzz_target!(|data: &[u8]| fuzz_result_format(QueryResultsFormat::Xml, data));
fuzz_target!(|data: &[u8]| { fuzz_result_format(QueryResultsFormat::Xml, data) });

@ -4,5 +4,5 @@ use spargebra::Update;
use std::str;
fuzz_target!(|data: &str| {
let _ = Update::parse(data, None);
Update::parse(data, None);
});

@ -1,166 +0,0 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use oxrdf::graph::CanonicalizationAlgorithm;
use oxrdf::{Dataset, GraphName, Quad, Subject, Term, Triple};
use oxttl::{TriGParser, TriGSerializer};
fn parse<'a>(
chunks: impl IntoIterator<Item = &'a [u8]>,
unchecked: bool,
) -> (Vec<Quad>, Vec<String>, Vec<(String, String)>) {
let mut quads = Vec::new();
let mut errors = Vec::new();
let mut parser = TriGParser::new()
.with_quoted_triples()
.with_base_iri("http://example.com/")
.unwrap();
if unchecked {
parser = parser.unchecked();
}
let mut reader = parser.parse();
for chunk in chunks {
reader.extend_from_slice(chunk);
while let Some(result) = reader.read_next() {
match result {
Ok(quad) => quads.push(quad),
Err(error) => errors.push(error.to_string()),
}
}
}
reader.end();
while let Some(result) = reader.read_next() {
match result {
Ok(quad) => quads.push(quad),
Err(error) => errors.push(error.to_string()),
}
}
assert!(reader.is_end());
(
quads,
errors,
reader
.prefixes()
.map(|(k, v)| (k.to_owned(), v.to_owned()))
.collect(),
)
}
fn count_triple_blank_nodes(triple: &Triple) -> usize {
(match &triple.subject {
Subject::BlankNode(_) => 1,
Subject::Triple(t) => count_triple_blank_nodes(t),
_ => 0,
}) + (match &triple.object {
Term::BlankNode(_) => 1,
Term::Triple(t) => count_triple_blank_nodes(t),
_ => 0,
})
}
fn count_quad_blank_nodes(quad: &Quad) -> usize {
(match &quad.subject {
Subject::BlankNode(_) => 1,
Subject::Triple(t) => count_triple_blank_nodes(t),
_ => 0,
}) + (match &quad.object {
Term::BlankNode(_) => 1,
Term::Triple(t) => count_triple_blank_nodes(t),
_ => 0,
}) + usize::from(matches!(quad.graph_name, GraphName::BlankNode(_)))
}
fn serialize_quads(quads: &[Quad], prefixes: Vec<(String, String)>) -> Vec<u8> {
let mut serializer = TriGSerializer::new();
for (prefix_name, prefix_iri) in prefixes {
serializer = serializer.with_prefix(prefix_name, prefix_iri).unwrap();
}
let mut writer = serializer.serialize_to_write(Vec::new());
for quad in quads {
writer.write_quad(quad).unwrap();
}
writer.finish().unwrap()
}
fuzz_target!(|data: &[u8]| {
// We parse with splitting
let (quads, errors, prefixes) = parse(data.split(|c| *c == 0xFF), false);
// We parse without splitting
let (quads_without_split, errors_without_split, _) = parse(
[data
.iter()
.copied()
.filter(|c| *c != 0xFF)
.collect::<Vec<_>>()
.as_slice()],
false,
);
let (quads_unchecked, errors_unchecked, _) = parse(data.split(|c| *c == 0xFF), true);
if errors.is_empty() {
assert!(errors_unchecked.is_empty());
}
let bnodes_count = quads.iter().map(count_quad_blank_nodes).sum::<usize>();
if bnodes_count == 0 {
assert_eq!(
quads,
quads_without_split,
"With split:\n{}\nWithout split:\n{}",
String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())),
String::from_utf8_lossy(&serialize_quads(&quads_without_split, Vec::new()))
);
if errors.is_empty() {
assert_eq!(
quads,
quads_unchecked,
"Validating:\n{}\nUnchecked:\n{}",
String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())),
String::from_utf8_lossy(&serialize_quads(&quads_unchecked, Vec::new()))
);
}
} else if bnodes_count <= 4 {
let mut dataset_with_split = quads.iter().collect::<Dataset>();
let mut dataset_without_split = quads_without_split.iter().collect::<Dataset>();
dataset_with_split.canonicalize(CanonicalizationAlgorithm::Unstable);
dataset_without_split.canonicalize(CanonicalizationAlgorithm::Unstable);
assert_eq!(
dataset_with_split,
dataset_without_split,
"With split:\n{}\nWithout split:\n{}",
String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())),
String::from_utf8_lossy(&serialize_quads(&quads_without_split, Vec::new()))
);
if errors.is_empty() {
let mut dataset_unchecked = quads_unchecked.iter().collect::<Dataset>();
dataset_unchecked.canonicalize(CanonicalizationAlgorithm::Unstable);
assert_eq!(
dataset_with_split,
dataset_unchecked,
"Validating:\n{}\nUnchecked:\n{}",
String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())),
String::from_utf8_lossy(&serialize_quads(&quads_unchecked, Vec::new()))
);
}
}
assert_eq!(errors, errors_without_split);
// We serialize
let new_serialization = serialize_quads(&quads, prefixes);
// We parse the serialization
let new_quads = TriGParser::new()
.with_quoted_triples()
.parse_read(new_serialization.as_slice())
.collect::<Result<Vec<_>, _>>()
.map_err(|e| {
format!(
"Error on {:?} from {quads:?} based on {:?}: {e}",
String::from_utf8_lossy(&new_serialization),
String::from_utf8_lossy(data)
)
})
.unwrap();
// We check the roundtrip has not changed anything
assert_eq!(new_quads, quads);
});

@ -1,27 +1,27 @@
use anyhow::Context;
use sparesults::{
FromReadQueryResultsReader, QueryResultsFormat, QueryResultsParser, QueryResultsSerializer,
QueryResultsFormat, QueryResultsParser, QueryResultsReader, QueryResultsSerializer,
};
pub fn fuzz_result_format(format: QueryResultsFormat, data: &[u8]) {
let parser = QueryResultsParser::from_format(format);
let serializer = QueryResultsSerializer::from_format(format);
let Ok(reader) = parser.parse_read(data) else {
let Ok(reader) = parser.read_results(data) else {
return;
};
match reader {
FromReadQueryResultsReader::Solutions(solutions) => {
QueryResultsReader::Solutions(solutions) => {
let Ok(solutions) = solutions.collect::<Result<Vec<_>, _>>() else {
return;
};
// We try to write again
let mut writer = serializer
.serialize_solutions_to_write(
.solutions_writer(
Vec::new(),
solutions
.first()
.get(0)
.map_or_else(Vec::new, |s| s.variables().to_vec()),
)
.unwrap();
@ -31,30 +31,30 @@ pub fn fuzz_result_format(format: QueryResultsFormat, data: &[u8]) {
let serialized = String::from_utf8(writer.finish().unwrap()).unwrap();
// And to parse again
if let FromReadQueryResultsReader::Solutions(roundtrip_solutions) = parser
.parse_read(serialized.as_bytes())
.with_context(|| format!("Parsing {serialized:?}"))
if let QueryResultsReader::Solutions(roundtrip_solutions) = parser
.read_results(serialized.as_bytes())
.with_context(|| format!("Parsing {:?}", &serialized))
.unwrap()
{
assert_eq!(
roundtrip_solutions
.collect::<Result<Vec<_>, _>>()
.with_context(|| format!("Parsing {serialized:?}"))
.with_context(|| format!("Parsing {:?}", &serialized))
.unwrap(),
solutions
)
}
}
FromReadQueryResultsReader::Boolean(value) => {
QueryResultsReader::Boolean(value) => {
// We try to write again
let mut serialized = Vec::new();
serializer
.serialize_boolean_to_write(&mut serialized, value)
.write_boolean_result(&mut serialized, value)
.unwrap();
// And to parse again
if let FromReadQueryResultsReader::Boolean(roundtrip_value) =
parser.parse_read(serialized.as_slice()).unwrap()
if let QueryResultsReader::Boolean(roundtrip_value) =
parser.read_results(serialized.as_slice()).unwrap()
{
assert_eq!(roundtrip_value, value)
}

@ -1,26 +1,20 @@
[package]
name = "oxigraph-js"
version.workspace = true
authors.workspace = true
license.workspace = true
name = "oxigraph_js"
version = "0.3.22"
authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license = "MIT OR Apache-2.0"
readme = "README.md"
keywords = ["RDF", "N-Triples", "Turtle", "XML", "SPARQL"]
keywords = ["RDF", "N-Triples", "Turtle", "RDF/XML", "SPARQL"]
repository = "https://github.com/oxigraph/oxigraph/tree/main/js"
description = "JavaScript bindings of Oxigraph"
edition.workspace = true
rust-version.workspace = true
publish = false
edition = "2021"
[lib]
crate-type = ["cdylib"]
name = "oxigraph"
doc = false
[dependencies]
console_error_panic_hook.workspace = true
js-sys.workspace = true
oxigraph = { workspace = true, features = ["js"] }
wasm-bindgen.workspace = true
[lints]
workspace = true
oxigraph = { version = "0.3.22", path="../lib" }
wasm-bindgen = "0.2"
js-sys = "0.3"
console_error_panic_hook = "0.1"

@ -3,7 +3,7 @@ Oxigraph for JavaScript
[![npm](https://img.shields.io/npm/v/oxigraph)](https://www.npmjs.com/package/oxigraph)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
This package provides a JavaScript API on top of [Oxigraph](https://crates.io/crates/oxigraph), compiled with WebAssembly.
@ -13,7 +13,7 @@ Oxigraph for JavaScript is a work in progress and currently offers a simple in-m
The store is also able to load RDF serialized in [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/) and [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/).
It is distributed using a [a NPM package](https://www.npmjs.com/package/oxigraph) that should work with Node.JS 18+ and [modern web browsers compatible with WebAssembly reference types and JavaScript `WeakRef`](https://caniuse.com/wasm-reference-types,mdn-javascript_builtins_weakref).
It is distributed using a [a NPM package](https://www.npmjs.com/package/oxigraph) that should work with Node.JS 12+ and modern web browsers compatible with WebAssembly.
To install:
```bash
@ -188,18 +188,6 @@ if (store.query("ASK { ?s ?s ?s }")) {
}
```
It is also possible to provide some options in an object given as second argument:
```js
if (store.query("ASK { <s> ?p ?o }", {
base_iri: "http://example.com/", // base IRI to resolve relative IRIs in the query
use_default_graph_as_union: true, // the default graph in the query is the union of all the dataset graphs
})) {
console.log("there is a triple with same subject, predicate and object");
}
```
#### `Store.prototype.update(String query)`
Executes a [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/).
The [`LOAD` operation](https://www.w3.org/TR/sparql11-update/#load) is not supported yet.
@ -209,50 +197,40 @@ Example of update:
store.update("DELETE WHERE { <http://example.com/s> ?p ?o }")
```
It is also possible to provide some options in an object given as second argument:
```js
store.update("DELETE WHERE { <s> ?p ?o }", {
base_iri: "http://example.com/" // base IRI to resolve relative IRIs in the update
})
```
#### `Store.prototype.load(String data, String format, NamedNode|String? baseIRI, NamedNode|BlankNode|DefaultGraph? toNamedGraph)`
#### `Store.prototype.load(String data, String mimeType, NamedNode|String? baseIRI, NamedNode|BlankNode|DefaultGraph? toNamedGraph)`
Loads serialized RDF triples or quad into the store.
The method arguments are:
1. `data`: the serialized RDF triples or quads.
2. `format`: the format of the serialization. See below for the supported formats.
2. `mimeType`: the MIME type of the serialization. See below for the supported mime types.
3. `baseIRI`: the base IRI to use to resolve the relative IRIs in the serialization.
4. `toNamedGraph`: for triple serialization formats, the name of the named graph the triple should be loaded to.
The available formats are:
* [Turtle](https://www.w3.org/TR/turtle/): `text/turtle` or `ttl`
* [TriG](https://www.w3.org/TR/trig/): `application/trig` or `trig`
* [N-Triples](https://www.w3.org/TR/n-triples/): `application/n-triples` or `nt`
* [N-Quads](https://www.w3.org/TR/n-quads/): `application/n-quads` or `nq`
* [N3](https://w3c.github.io/N3/spec/): `text/n3` or `n3`
* [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/): `application/rdf+xml` or `rdf`
* [Turtle](https://www.w3.org/TR/turtle/): `text/turtle`
* [TriG](https://www.w3.org/TR/trig/): `application/trig`
* [N-Triples](https://www.w3.org/TR/n-triples/): `application/n-triples`
* [N-Quads](https://www.w3.org/TR/n-quads/): `application/n-quads`
* [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/): `application/rdf+xml`
Example of loading a Turtle file into the named graph `<http://example.com/graph>` with the base IRI `http://example.com`:
```js
store.load("<http://example.com> <http://example.com> <> .", "text/turtle", "http://example.com", oxigraph.namedNode("http://example.com/graph"));
```
#### `Store.prototype.dump(String format, NamedNode|BlankNode|DefaultGraph? fromNamedGraph)`
#### `Store.prototype.dump(String mimeType, NamedNode|BlankNode|DefaultGraph? fromNamedGraph)`
Returns serialized RDF triples or quad from the store.
The method arguments are:
1. `format`: the format type of the serialization. See below for the supported types.
1. `mimeType`: the MIME type of the serialization. See below for the supported mime types.
2. `fromNamedGraph`: for triple serialization formats, the name of the named graph the triple should be loaded from.
The available formats are:
* [Turtle](https://www.w3.org/TR/turtle/): `text/turtle` or `ttl`
* [TriG](https://www.w3.org/TR/trig/): `application/trig` or `trig`
* [N-Triples](https://www.w3.org/TR/n-triples/): `application/n-triples` or `nt`
* [N-Quads](https://www.w3.org/TR/n-quads/): `application/n-quads` or `nq`
* [N3](https://w3c.github.io/N3/spec/): `text/n3` or `n3`
* [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/): `application/rdf+xml` or `rdf`
* [Turtle](https://www.w3.org/TR/turtle/): `text/turtle`
* [TriG](https://www.w3.org/TR/trig/): `application/trig`
* [N-Triples](https://www.w3.org/TR/n-triples/): `application/n-triples`
* [N-Quads](https://www.w3.org/TR/n-quads/): `application/n-quads`
* [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/): `application/rdf+xml`
Example of building a Turtle file from the named graph `<http://example.com/graph>`:
```js
@ -282,7 +260,7 @@ To setup a dev environment:
Testing and linting:
- Rust code is formatted with [rustfmt](https://github.com/rust-lang/rustfmt) and linted with [clippy](https://github.com/rust-lang/rust-clippy).
You can execute them with `cargo fmt` and `cargo clippy`.
- JS code is formatted and linted with [Biome](https://biomejs.dev/). `npm run fmt` to auto-format and `npm test` to lint and test.
- JS code is formatted and linted with [Rome](https://rome.tools/). `npm run fmt` to auto-format and `npm test` to lint and test.
- Tests are written in JavaScript using [Mocha](https://mochajs.org/) in the `test` directory. `npm test` to run them.

@ -1,6 +1,16 @@
#! /usr/bin/env node
const fs = require("node:fs");
const fs = require("fs");
// We copy file to the new directory
fs.mkdirSync("pkg");
for (const file of fs.readdirSync("./pkg-web")) {
fs.copyFileSync(`./pkg-web/${file}`, `./pkg/${file}`);
}
for (const file of fs.readdirSync("./pkg-node")) {
fs.copyFileSync(`./pkg-node/${file}`, `./pkg/${file}`);
}
const pkg = JSON.parse(fs.readFileSync("./pkg/package.json"));
pkg.name = "oxigraph";
pkg.main = "node.js";

1027
js/package-lock.json generated

File diff suppressed because it is too large Load Diff

@ -3,18 +3,20 @@
"description": "Oxigraph JS build and tests",
"private": true,
"devDependencies": {
"@biomejs/biome": "^1.0.0",
"@rdfjs/data-model": "^2.0.1",
"mocha": "^10.0.0"
"mocha": "^10.0.0",
"rome": "^12.0.0"
},
"scripts": {
"fmt": "biome format . --write && biome check . --apply-unsafe && biome format . --write",
"test": "biome ci . && wasm-pack build --debug --target nodejs --weak-refs --reference-types && mocha",
"build": "wasm-pack build --release --target web --out-name web --weak-refs --reference-types && wasm-pack build --release --target nodejs --out-name node --weak-refs --reference-types && node build_package.js",
"fmt": "rome format . --write && rome check . --apply-unsafe",
"test": "rome ci . && wasm-pack build --debug --target nodejs && mocha",
"build": "rm -rf pkg && wasm-pack build --release --target web --out-name web && mv pkg pkg-web && wasm-pack build --release --target nodejs --out-name node && mv pkg pkg-node && node build_package.js && rm -r pkg-web && rm -r pkg-node",
"release": "npm run build && npm publish ./pkg",
"pack": "npm run build && npm pack ./pkg"
},
"standard": {
"ignore": ["pkg*"]
"ignore": [
"pkg*"
]
}
}

@ -1,8 +1,7 @@
{
"$schema": "https://biomejs.dev/schemas/1.0.0/schema.json",
"formatter": {
"indentStyle": "space",
"indentWidth": 4,
"indentSize": 4,
"lineWidth": 100
},
"linter": {

@ -1,4 +1,4 @@
#![allow(clippy::mem_forget)]
#![allow(clippy::unused_unit)]
use wasm_bindgen::prelude::*;
mod model;

@ -1,9 +1,15 @@
#![allow(dead_code, clippy::inherent_to_string, clippy::unused_self)]
#![allow(
dead_code,
clippy::inherent_to_string,
clippy::unused_self,
clippy::use_self
)]
use crate::format_err;
use crate::utils::to_err;
use js_sys::{Reflect, UriError};
use oxigraph::model::*;
use oxigraph::sparql::Variable;
use wasm_bindgen::prelude::*;
thread_local! {
@ -50,7 +56,7 @@ pub fn literal(
#[wasm_bindgen(js_name = defaultGraph)]
pub fn default_graph() -> JsDefaultGraph {
JsDefaultGraph
JsDefaultGraph {}
}
#[wasm_bindgen(js_name = variable)]
@ -296,7 +302,7 @@ impl From<JsLiteral> for Term {
#[wasm_bindgen(js_name = DefaultGraph)]
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct JsDefaultGraph;
pub struct JsDefaultGraph {}
#[wasm_bindgen(js_class = DefaultGraph)]
impl JsDefaultGraph {
@ -307,7 +313,7 @@ impl JsDefaultGraph {
#[wasm_bindgen(getter)]
pub fn value(&self) -> String {
String::new()
"".to_owned()
}
#[wasm_bindgen(js_name = toString)]
@ -387,7 +393,7 @@ impl JsQuad {
#[wasm_bindgen(getter)]
pub fn value(&self) -> String {
String::new()
"".to_owned()
}
#[wasm_bindgen(getter = subject)]
@ -526,7 +532,7 @@ impl From<GraphName> for JsTerm {
match name {
GraphName::NamedNode(node) => node.into(),
GraphName::BlankNode(node) => node.into(),
GraphName::DefaultGraph => Self::DefaultGraph(JsDefaultGraph),
GraphName::DefaultGraph => Self::DefaultGraph(JsDefaultGraph {}),
}
}
}
@ -558,7 +564,7 @@ impl From<Quad> for JsTerm {
impl TryFrom<JsTerm> for NamedNode {
type Error = JsValue;
fn try_from(value: JsTerm) -> Result<Self, Self::Error> {
fn try_from(value: JsTerm) -> Result<Self, JsValue> {
match value {
JsTerm::NamedNode(node) => Ok(node.into()),
JsTerm::BlankNode(node) => Err(format_err!(
@ -582,7 +588,7 @@ impl TryFrom<JsTerm> for NamedNode {
impl TryFrom<JsTerm> for NamedOrBlankNode {
type Error = JsValue;
fn try_from(value: JsTerm) -> Result<Self, Self::Error> {
fn try_from(value: JsTerm) -> Result<Self, JsValue> {
match value {
JsTerm::NamedNode(node) => Ok(node.into()),
JsTerm::BlankNode(node) => Ok(node.into()),
@ -608,7 +614,7 @@ impl TryFrom<JsTerm> for NamedOrBlankNode {
impl TryFrom<JsTerm> for Subject {
type Error = JsValue;
fn try_from(value: JsTerm) -> Result<Self, Self::Error> {
fn try_from(value: JsTerm) -> Result<Self, JsValue> {
match value {
JsTerm::NamedNode(node) => Ok(node.into()),
JsTerm::BlankNode(node) => Ok(node.into()),
@ -631,7 +637,7 @@ impl TryFrom<JsTerm> for Subject {
impl TryFrom<JsTerm> for Term {
type Error = JsValue;
fn try_from(value: JsTerm) -> Result<Self, Self::Error> {
fn try_from(value: JsTerm) -> Result<Self, JsValue> {
match value {
JsTerm::NamedNode(node) => Ok(node.into()),
JsTerm::BlankNode(node) => Ok(node.into()),
@ -651,7 +657,7 @@ impl TryFrom<JsTerm> for Term {
impl TryFrom<JsTerm> for GraphName {
type Error = JsValue;
fn try_from(value: JsTerm) -> Result<Self, Self::Error> {
fn try_from(value: JsTerm) -> Result<Self, JsValue> {
match value {
JsTerm::NamedNode(node) => Ok(node.into()),
JsTerm::BlankNode(node) => Ok(node.into()),
@ -738,7 +744,7 @@ impl FromJsConverter {
))
}
}
"DefaultGraph" => Ok(JsTerm::DefaultGraph(JsDefaultGraph)),
"DefaultGraph" => Ok(JsTerm::DefaultGraph(JsDefaultGraph {})),
"Variable" => Ok(Variable::new(
Reflect::get(value, &self.value)?
.as_string()
@ -748,7 +754,8 @@ impl FromJsConverter {
.into()),
"Quad" => Ok(self.to_quad(value)?.into()),
_ => Err(format_err!(
"The termType {term_type} is not supported by Oxigraph"
"The termType {} is not supported by Oxigraph",
term_type
)),
}
} else if term_type.is_undefined() {

@ -1,11 +1,14 @@
#![allow(clippy::use_self)]
use crate::format_err;
use crate::model::*;
use crate::utils::to_err;
use js_sys::{Array, Map, Reflect};
use oxigraph::io::{RdfFormat, RdfParser};
use js_sys::{Array, Map};
use oxigraph::io::{DatasetFormat, GraphFormat};
use oxigraph::model::*;
use oxigraph::sparql::{Query, QueryResults, Update};
use oxigraph::sparql::QueryResults;
use oxigraph::store::Store;
use std::io::Cursor;
use wasm_bindgen::prelude::*;
#[wasm_bindgen(js_name = Store)]
@ -24,7 +27,7 @@ impl JsStore {
store: Store::new().map_err(to_err)?,
};
if let Some(quads) = quads {
for quad in &*quads {
for quad in quads.iter() {
store.add(quad)?;
}
}
@ -102,21 +105,7 @@ impl JsStore {
.into_boxed_slice())
}
pub fn query(&self, query: &str, options: &JsValue) -> Result<JsValue, JsValue> {
// Parsing options
let mut base_iri = None;
let mut use_default_graph_as_union = false;
if !options.is_undefined() {
base_iri = Reflect::get(options, &JsValue::from_str("base_iri"))?.as_string();
use_default_graph_as_union =
Reflect::get(options, &JsValue::from_str("use_default_graph_as_union"))?
.is_truthy();
}
let mut query = Query::parse(query, base_iri.as_deref()).map_err(to_err)?;
if use_default_graph_as_union {
query.dataset_mut().set_default_graph_as_union();
}
pub fn query(&self, query: &str) -> Result<JsValue, JsValue> {
let results = self.store.query(query).map_err(to_err)?;
let output = match results {
QueryResults::Solutions(solutions) => {
@ -149,25 +138,17 @@ impl JsStore {
Ok(output)
}
pub fn update(&self, update: &str, options: &JsValue) -> Result<(), JsValue> {
// Parsing options
let mut base_iri = None;
if !options.is_undefined() {
base_iri = Reflect::get(options, &JsValue::from_str("base_iri"))?.as_string();
}
let update = Update::parse(update, base_iri.as_deref()).map_err(to_err)?;
pub fn update(&self, update: &str) -> Result<(), JsValue> {
self.store.update(update).map_err(to_err)
}
pub fn load(
&self,
data: &str,
format: &str,
mime_type: &str,
base_iri: &JsValue,
to_graph_name: &JsValue,
) -> Result<(), JsValue> {
let format = rdf_format(format)?;
let base_iri = if base_iri.is_null() || base_iri.is_undefined() {
None
} else if base_iri.is_string() {
@ -180,41 +161,65 @@ impl JsStore {
));
};
let mut parser = RdfParser::from_format(format);
if let Some(to_graph_name) = FROM_JS.with(|c| c.to_optional_term(to_graph_name))? {
parser = parser.with_default_graph(GraphName::try_from(to_graph_name)?);
}
if let Some(base_iri) = base_iri {
parser = parser.with_base_iri(base_iri).map_err(to_err)?;
let to_graph_name =
if let Some(graph_name) = FROM_JS.with(|c| c.to_optional_term(to_graph_name))? {
Some(graph_name.try_into()?)
} else {
None
};
if let Some(graph_format) = GraphFormat::from_media_type(mime_type) {
self.store
.load_graph(
Cursor::new(data),
graph_format,
&to_graph_name.unwrap_or(GraphName::DefaultGraph),
base_iri.as_deref(),
)
.map_err(to_err)
} else if let Some(dataset_format) = DatasetFormat::from_media_type(mime_type) {
if to_graph_name.is_some() {
return Err(format_err!(
"The target graph name parameter is not available for dataset formats"
));
}
self.store
.load_from_read(parser, data.as_bytes())
.load_dataset(Cursor::new(data), dataset_format, base_iri.as_deref())
.map_err(to_err)
} else {
Err(format_err!("Not supported MIME type: {}", mime_type))
}
}
pub fn dump(&self, format: &str, from_graph_name: &JsValue) -> Result<String, JsValue> {
let format = rdf_format(format)?;
let buffer =
if let Some(from_graph_name) = FROM_JS.with(|c| c.to_optional_term(from_graph_name))? {
self.store.dump_graph_to_write(
&GraphName::try_from(from_graph_name)?,
format,
Vec::new(),
)
pub fn dump(&self, mime_type: &str, from_graph_name: &JsValue) -> Result<String, JsValue> {
let from_graph_name =
if let Some(graph_name) = FROM_JS.with(|c| c.to_optional_term(from_graph_name))? {
Some(graph_name.try_into()?)
} else {
self.store.dump_to_write(format, Vec::new())
}
None
};
let mut buffer = Vec::new();
if let Some(graph_format) = GraphFormat::from_media_type(mime_type) {
self.store
.dump_graph(
&mut buffer,
graph_format,
&from_graph_name.unwrap_or(GraphName::DefaultGraph),
)
.map_err(to_err)?;
String::from_utf8(buffer).map_err(to_err)
} else if let Some(dataset_format) = DatasetFormat::from_media_type(mime_type) {
if from_graph_name.is_some() {
return Err(format_err!(
"The target graph name parameter is not available for dataset formats"
));
}
}
fn rdf_format(format: &str) -> Result<RdfFormat, JsValue> {
if format.contains('/') {
RdfFormat::from_media_type(format)
.ok_or_else(|| format_err!("Not supported RDF format media type: {format}"))
self.store
.dump_dataset(&mut buffer, dataset_format)
.map_err(to_err)?;
} else {
RdfFormat::from_extension(format)
.ok_or_else(|| format_err!("Not supported RDF format extension: {format}"))
return Err(format_err!("Not supported MIME type: {}", mime_type));
}
String::from_utf8(buffer).map_err(to_err)
}
}

@ -1,37 +1,37 @@
/* global describe, it */
import assert from "node:assert";
import runTests from "../node_modules/@rdfjs/data-model/test/index.js";
import oxigraph from "../pkg/oxigraph.js";
import assert from "assert";
runTests({ factory: oxigraph });
describe("DataModel", () => {
describe("#toString()", () => {
it("namedNode().toString() should return SPARQL compatible syntax", () => {
describe("DataModel", function () {
describe("#toString()", function () {
it("namedNode().toString() should return SPARQL compatible syntax", function () {
assert.strictEqual(
"<http://example.com>",
oxigraph.namedNode("http://example.com").toString(),
);
});
it("blankNode().toString() should return SPARQL compatible syntax", () => {
it("blankNode().toString() should return SPARQL compatible syntax", function () {
assert.strictEqual("_:a", oxigraph.blankNode("a").toString());
});
it("literal().toString() should return SPARQL compatible syntax", () => {
it("literal().toString() should return SPARQL compatible syntax", function () {
assert.strictEqual('"a\\"b"@en', oxigraph.literal('a"b', "en").toString());
});
it("defaultGraph().toString() should return SPARQL compatible syntax", () => {
it("defaultGraph().toString() should return SPARQL compatible syntax", function () {
assert.strictEqual("DEFAULT", oxigraph.defaultGraph().toString());
});
it("variable().toString() should return SPARQL compatible syntax", () => {
it("variable().toString() should return SPARQL compatible syntax", function () {
assert.strictEqual("?a", oxigraph.variable("a").toString());
});
it("quad().toString() should return SPARQL compatible syntax", () => {
it("quad().toString() should return SPARQL compatible syntax", function () {
assert.strictEqual(
"<http://example.com/s> <http://example.com/p> <<<http://example.com/s1> <http://example.com/p1> <http://example.com/o1>>> <http://example.com/g>",
oxigraph

@ -1,8 +1,8 @@
/* global describe, it */
import assert from "node:assert";
import dataModel from "@rdfjs/data-model";
import { Store } from "../pkg/oxigraph.js";
import dataModel from "@rdfjs/data-model";
import assert from "assert";
const ex = dataModel.namedNode("http://example.com");
const triple = dataModel.quad(
@ -11,17 +11,17 @@ const triple = dataModel.quad(
dataModel.literal("o"),
);
describe("Store", () => {
describe("#add()", () => {
it("an added quad should be in the store", () => {
describe("Store", function () {
describe("#add()", function () {
it("an added quad should be in the store", function () {
const store = new Store();
store.add(dataModel.quad(ex, ex, triple));
assert(store.has(dataModel.quad(ex, ex, triple)));
});
});
describe("#delete()", () => {
it("an removed quad should not be in the store anymore", () => {
describe("#delete()", function () {
it("an removed quad should not be in the store anymore", function () {
const store = new Store([dataModel.quad(triple, ex, ex)]);
assert(store.has(dataModel.quad(triple, ex, ex)));
store.delete(dataModel.quad(triple, ex, ex));
@ -29,22 +29,22 @@ describe("Store", () => {
});
});
describe("#has()", () => {
it("an added quad should be in the store", () => {
describe("#has()", function () {
it("an added quad should be in the store", function () {
const store = new Store([dataModel.quad(ex, ex, ex)]);
assert(store.has(dataModel.quad(ex, ex, ex)));
});
});
describe("#size()", () => {
it("A store with one quad should have 1 for size", () => {
describe("#size()", function () {
it("A store with one quad should have 1 for size", function () {
const store = new Store([dataModel.quad(ex, ex, ex)]);
assert.strictEqual(1, store.size);
});
});
describe("#match_quads()", () => {
it("blank pattern should return all quads", () => {
describe("#match_quads()", function () {
it("blank pattern should return all quads", function () {
const store = new Store([dataModel.quad(ex, ex, ex)]);
const results = store.match();
assert.strictEqual(1, results.length);
@ -52,32 +52,32 @@ describe("Store", () => {
});
});
describe("#query()", () => {
it("ASK true", () => {
describe("#query()", function () {
it("ASK true", function () {
const store = new Store([dataModel.quad(ex, ex, ex)]);
assert.strictEqual(true, store.query("ASK { ?s ?s ?s }"));
});
it("ASK false", () => {
it("ASK false", function () {
const store = new Store();
assert.strictEqual(false, store.query("ASK { FILTER(false)}"));
});
it("CONSTRUCT", () => {
it("CONSTRUCT", function () {
const store = new Store([dataModel.quad(ex, ex, ex)]);
const results = store.query("CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }");
assert.strictEqual(1, results.length);
assert(dataModel.quad(ex, ex, ex).equals(results[0]));
});
it("SELECT", () => {
it("SELECT", function () {
const store = new Store([dataModel.quad(ex, ex, ex)]);
const results = store.query("SELECT ?s WHERE { ?s ?p ?o }");
assert.strictEqual(1, results.length);
assert(ex.equals(results[0].get("s")));
});
it("SELECT with NOW()", () => {
it("SELECT with NOW()", function () {
const store = new Store([dataModel.quad(ex, ex, ex)]);
const results = store.query(
"SELECT * WHERE { FILTER(2022 <= YEAR(NOW()) && YEAR(NOW()) <= 2100) }",
@ -85,31 +85,15 @@ describe("Store", () => {
assert.strictEqual(1, results.length);
});
it("SELECT with RAND()", () => {
it("SELECT with RAND()", function () {
const store = new Store([dataModel.quad(ex, ex, ex)]);
const results = store.query("SELECT (RAND() AS ?y) WHERE {}");
assert.strictEqual(1, results.length);
});
it("SELECT with base IRI", () => {
const store = new Store();
const results = store.query("SELECT * WHERE { BIND(<t> AS ?t) }", {
base_iri: "http://example.com/",
});
assert.strictEqual(1, results.length);
});
it("SELECT with union graph", () => {
const store = new Store([dataModel.quad(ex, ex, ex, ex)]);
const results = store.query("SELECT * WHERE { ?s ?p ?o }", {
use_default_graph_as_union: true,
});
assert.strictEqual(1, results.length);
});
});
describe("#update()", () => {
it("INSERT DATA", () => {
describe("#update()", function () {
it("INSERT DATA", function () {
const store = new Store();
store.update(
"INSERT DATA { <http://example.com> <http://example.com> <http://example.com> }",
@ -117,7 +101,7 @@ describe("Store", () => {
assert.strictEqual(1, store.size);
});
it("DELETE DATA", () => {
it("DELETE DATA", function () {
const store = new Store([dataModel.quad(ex, ex, ex)]);
store.update(
"DELETE DATA { <http://example.com> <http://example.com> <http://example.com> }",
@ -125,15 +109,15 @@ describe("Store", () => {
assert.strictEqual(0, store.size);
});
it("DELETE WHERE", () => {
it("DELETE WHERE", function () {
const store = new Store([dataModel.quad(ex, ex, ex)]);
store.update("DELETE WHERE { ?v ?v ?v }");
assert.strictEqual(0, store.size);
});
});
describe("#load()", () => {
it("load NTriples in the default graph", () => {
describe("#load()", function () {
it("load NTriples in the default graph", function () {
const store = new Store();
store.load(
"<http://example.com> <http://example.com> <http://example.com> .",
@ -142,7 +126,7 @@ describe("Store", () => {
assert(store.has(dataModel.quad(ex, ex, ex)));
});
it("load NTriples in an other graph", () => {
it("load NTriples in an other graph", function () {
const store = new Store();
store.load(
"<http://example.com> <http://example.com> <http://example.com> .",
@ -153,7 +137,7 @@ describe("Store", () => {
assert(store.has(dataModel.quad(ex, ex, ex, ex)));
});
it("load Turtle with a base IRI", () => {
it("load Turtle with a base IRI", function () {
const store = new Store();
store.load(
"<http://example.com> <http://example.com> <> .",
@ -163,7 +147,7 @@ describe("Store", () => {
assert(store.has(dataModel.quad(ex, ex, ex)));
});
it("load NQuads", () => {
it("load NQuads", function () {
const store = new Store();
store.load(
"<http://example.com> <http://example.com> <http://example.com> <http://example.com> .",
@ -172,7 +156,7 @@ describe("Store", () => {
assert(store.has(dataModel.quad(ex, ex, ex, ex)));
});
it("load TriG with a base IRI", () => {
it("load TriG with a base IRI", function () {
const store = new Store();
store.load(
"GRAPH <> { <http://example.com> <http://example.com> <> }",
@ -183,8 +167,8 @@ describe("Store", () => {
});
});
describe("#dump()", () => {
it("dump dataset content", () => {
describe("#dump()", function () {
it("dump dataset content", function () {
const store = new Store([dataModel.quad(ex, ex, ex, ex)]);
assert.strictEqual(
"<http://example.com> <http://example.com> <http://example.com> <http://example.com> .\n",
@ -192,7 +176,7 @@ describe("Store", () => {
);
});
it("dump named graph content", () => {
it("dump named graph content", function () {
const store = new Store([dataModel.quad(ex, ex, ex, ex)]);
assert.strictEqual(
"<http://example.com> <http://example.com> <http://example.com> .\n",
@ -200,9 +184,9 @@ describe("Store", () => {
);
});
it("dump default graph content", () => {
it("dump default graph content", function () {
const store = new Store([dataModel.quad(ex, ex, ex, ex)]);
assert.strictEqual("", store.dump("application/n-triples", dataModel.defaultGraph()));
assert.strictEqual("", store.dump("application/n-triples"));
});
});
});

@ -0,0 +1,63 @@
[package]
name = "oxigraph"
version = "0.3.22"
authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license = "MIT OR Apache-2.0"
readme = "README.md"
keywords = ["RDF", "SPARQL", "graph-database", "database"]
categories = ["database-implementations"]
repository = "https://github.com/oxigraph/oxigraph/tree/main/lib"
homepage = "https://oxigraph.org/"
documentation = "https://docs.rs/oxigraph"
description = """
a SPARQL database and RDF toolkit
"""
edition = "2021"
rust-version = "1.60"
[package.metadata.docs.rs]
all-features = true
[features]
default = []
http_client = ["oxhttp", "oxhttp/rustls"]
rocksdb_debug = []
[dependencies]
rand = "0.8"
md-5 = "0.10"
sha-1 = "0.10"
sha2 = "0.10"
digest = "0.10"
regex = "1"
oxilangtag = "0.1"
oxiri = "0.2"
rio_api = "0.8"
rio_turtle = "0.8"
rio_xml = "0.8"
hex = "0.4"
siphasher = ">=0.3,<2.0"
lazy_static = "1"
json-event-parser = "0.1"
oxrdf = { version = "0.1.7", path="oxrdf", features = ["rdf-star", "oxsdatatypes"] }
oxsdatatypes = { version = "0.1.3", path="oxsdatatypes" }
spargebra = { version = "0.2.8", path="spargebra", features = ["rdf-star", "sep-0002", "sep-0006"] }
sparesults = { version = "0.1.8", path="sparesults", features = ["rdf-star"] }
[target.'cfg(not(target_family = "wasm"))'.dependencies]
libc = "0.2"
rocksdb = {git = "https://git.nextgraph.org/NextGraph/rust-rocksdb.git", branch = "master", features = [ ] }
oxhttp = { version = "0.1", optional = true }
[target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies]
getrandom = { version = "0.2", features = ["js"] }
js-sys = "0.3"
[target.'cfg(not(target_family = "wasm"))'.dev-dependencies]
criterion = "0.4"
oxhttp = "0.1"
zstd = "0.12"
[[bench]]
name = "store"
harness = false

@ -1,13 +1,72 @@
Oxigraph Rust crates
====================
Oxigraph is implemented in Rust.
It is composed on a main library, [`oxigraph`](./oxigraph) and a set of smaller crates used by the `oxigraph` crate:
* [`oxrdf`](./oxrdf), datastructures encoding RDF basic concepts (the `model` module of the `oxigraph` crate).
* [`oxrdfio`](./oxrdfio), a unified parser and serializer API for RDF formats (the `io` module of the `oxigraph` crate). It itself relies on:
* [`oxttl`](./oxttl), N-Triple, N-Quad, Turtle, TriG and N3 parsing and serialization.
* [`oxrdfxml`](./oxrdfxml), RDF/XML parsing and serialization.
* [`spargebra`](./spargebra), a SPARQL parser.
* [`sparesults`](./sparesults), parsers and serializers for SPARQL result formats (the `sparql::results` module of the `oxigraph` crate).
* [`sparopt`](./sparesults), a SPARQL optimizer.
* [`oxsdatatypes`](./oxsdatatypes), an implementation of some XML Schema datatypes.
Oxigraph
========
[![Latest Version](https://img.shields.io/crates/v/oxigraph.svg)](https://crates.io/crates/oxigraph)
[![Released API docs](https://docs.rs/oxigraph/badge.svg)](https://docs.rs/oxigraph)
[![Crates.io downloads](https://img.shields.io/crates/d/oxigraph)](https://crates.io/crates/oxigraph)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
Oxigraph is a graph database library implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard.
Its goal is to provide a compliant, safe and fast on-disk graph database.
It also provides a set of utility functions for reading, writing, and processing RDF files.
Oxigraph is in heavy development and SPARQL query evaluation has not been optimized yet.
Oxigraph also provides [a standalone HTTP server](https://crates.io/crates/oxigraph_server) and [a Python library](https://pyoxigraph.readthedocs.io/) based on this library.
Oxigraph implements the following specifications:
* [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/), [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/), and [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/).
* [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/), and [RDF XML](https://www.w3.org/TR/rdf-syntax-grammar/) RDF serialization formats for both data ingestion and retrieval using the [Rio library](https://github.com/oxigraph/rio).
* [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/).
A preliminary benchmark [is provided](../bench/README.md). Oxigraph internal design [is described on the wiki](https://github.com/oxigraph/oxigraph/wiki/Architecture).
The main entry point of Oxigraph is the [`Store`](store::Store) struct:
```rust
use oxigraph::store::Store;
use oxigraph::model::*;
use oxigraph::sparql::QueryResults;
let store = Store::new().unwrap();
// insertion
let ex = NamedNode::new("http://example.com").unwrap();
let quad = Quad::new(ex.clone(), ex.clone(), ex.clone(), GraphName::DefaultGraph);
store.insert(&quad).unwrap();
// quad filter
let results = store.quads_for_pattern(Some(ex.as_ref().into()), None, None, None).collect::<Result<Vec<Quad>,_>>().unwrap();
assert_eq!(vec![quad], results);
// SPARQL query
if let QueryResults::Solutions(mut solutions) = store.query("SELECT ?s WHERE { ?s ?p ?o }").unwrap() {
assert_eq!(solutions.next().unwrap().unwrap().get("s"), Some(&ex.into()));
}
```
Some parts of this library are available as standalone crates:
* [`oxrdf`](https://crates.io/crates/oxrdf) provides datastructures encoding RDF basic concepts (the `oxigraph::model` module).
* [`spargebra`](https://crates.io/crates/spargebra) provides a SPARQL parser.
* [`sparesults`](https://crates.io/crates/sparesults) provides parsers and serializers for SPARQL result formats.
To build the library, don't forget to clone the submodules using `git clone --recursive https://github.com/oxigraph/oxigraph.git` to clone the repository including submodules or `git submodule update --init` to add submodules to the already cloned repository.
## License
This project is licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or
`<http://www.apache.org/licenses/LICENSE-2.0>`)
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or
`<http://opensource.org/licenses/MIT>`)
at your option.
### Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.

@ -0,0 +1,265 @@
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
use oxhttp::model::{Method, Request, Status};
use oxigraph::io::GraphFormat;
use oxigraph::model::GraphNameRef;
use oxigraph::sparql::{Query, QueryResults, Update};
use oxigraph::store::Store;
use rand::random;
use std::env::temp_dir;
use std::fs::{remove_dir_all, File};
use std::io::{BufRead, BufReader, Cursor, Read};
use std::path::{Path, PathBuf};
fn store_load(c: &mut Criterion) {
{
let mut data = Vec::new();
read_data("explore-1000.nt.zst")
.read_to_end(&mut data)
.unwrap();
let mut group = c.benchmark_group("store load");
group.throughput(Throughput::Bytes(data.len() as u64));
group.sample_size(10);
group.bench_function("load BSBM explore 1000 in memory", |b| {
b.iter(|| {
let store = Store::new().unwrap();
do_load(&store, &data);
})
});
group.bench_function("load BSBM explore 1000 in on disk", |b| {
b.iter(|| {
let path = TempDir::default();
let store = Store::open(&path).unwrap();
do_load(&store, &data);
})
});
group.bench_function("load BSBM explore 1000 in on disk with bulk load", |b| {
b.iter(|| {
let path = TempDir::default();
let store = Store::open(&path).unwrap();
do_bulk_load(&store, &data);
})
});
}
{
let mut data = Vec::new();
read_data("explore-10000.nt.zst")
.read_to_end(&mut data)
.unwrap();
let mut group = c.benchmark_group("store load large");
group.throughput(Throughput::Bytes(data.len() as u64));
group.sample_size(10);
group.bench_function("load BSBM explore 10000 in on disk with bulk load", |b| {
b.iter(|| {
let path = TempDir::default();
let store = Store::open(&path).unwrap();
do_bulk_load(&store, &data);
})
});
}
}
fn do_load(store: &Store, data: &[u8]) {
store
.load_graph(
Cursor::new(&data),
GraphFormat::NTriples,
GraphNameRef::DefaultGraph,
None,
)
.unwrap();
store.optimize().unwrap();
}
fn do_bulk_load(store: &Store, data: &[u8]) {
store
.bulk_loader()
.load_graph(
Cursor::new(&data),
GraphFormat::NTriples,
GraphNameRef::DefaultGraph,
None,
)
.unwrap();
store.optimize().unwrap();
}
fn store_query_and_update(c: &mut Criterion) {
let mut data = Vec::new();
read_data("explore-1000.nt.zst")
.read_to_end(&mut data)
.unwrap();
let operations = read_data("mix-exploreAndUpdate-1000.tsv.zst")
.lines()
.map(|l| {
let l = l.unwrap();
let mut parts = l.trim().split('\t');
let kind = parts.next().unwrap();
let operation = parts.next().unwrap();
match kind {
"query" => Operation::Query(Query::parse(operation, None).unwrap()),
"update" => Operation::Update(Update::parse(operation, None).unwrap()),
_ => panic!("Unexpected operation kind {kind}"),
}
})
.collect::<Vec<_>>();
let query_operations = operations
.iter()
.filter(|o| matches!(o, Operation::Query(_)))
.cloned()
.collect::<Vec<_>>();
let mut group = c.benchmark_group("store operations");
group.throughput(Throughput::Elements(operations.len() as u64));
group.sample_size(10);
{
let memory_store = Store::new().unwrap();
do_bulk_load(&memory_store, &data);
group.bench_function("BSBM explore 1000 query in memory", |b| {
b.iter(|| run_operation(&memory_store, &query_operations))
});
group.bench_function("BSBM explore 1000 queryAndUpdate in memory", |b| {
b.iter(|| run_operation(&memory_store, &operations))
});
}
{
let path = TempDir::default();
let disk_store = Store::open(&path).unwrap();
do_bulk_load(&disk_store, &data);
group.bench_function("BSBM explore 1000 query on disk", |b| {
b.iter(|| run_operation(&disk_store, &query_operations))
});
group.bench_function("BSBM explore 1000 queryAndUpdate on disk", |b| {
b.iter(|| run_operation(&disk_store, &operations))
});
}
}
fn run_operation(store: &Store, operations: &[Operation]) {
for operation in operations {
match operation {
Operation::Query(q) => match store.query(q.clone()).unwrap() {
QueryResults::Boolean(_) => (),
QueryResults::Solutions(s) => {
for s in s {
s.unwrap();
}
}
QueryResults::Graph(g) => {
for t in g {
t.unwrap();
}
}
},
Operation::Update(u) => store.update(u.clone()).unwrap(),
}
}
}
fn sparql_parsing(c: &mut Criterion) {
let mut data = Vec::new();
read_data("explore-1000.nt.zst")
.read_to_end(&mut data)
.unwrap();
let operations = read_data("mix-exploreAndUpdate-1000.tsv.zst")
.lines()
.map(|l| {
let l = l.unwrap();
let mut parts = l.trim().split('\t');
let kind = parts.next().unwrap();
let operation = parts.next().unwrap();
match kind {
"query" => RawOperation::Query(operation.to_owned()),
"update" => RawOperation::Update(operation.to_owned()),
_ => panic!("Unexpected operation kind {kind}"),
}
})
.collect::<Vec<_>>();
let mut group = c.benchmark_group("sparql parsing");
group.sample_size(10);
group.throughput(Throughput::Bytes(
operations
.iter()
.map(|o| match o {
RawOperation::Query(q) => q.len(),
RawOperation::Update(u) => u.len(),
})
.sum::<usize>() as u64,
));
group.bench_function("BSBM query and update set", |b| {
b.iter(|| {
for operation in &operations {
match operation {
RawOperation::Query(q) => {
Query::parse(q, None).unwrap();
}
RawOperation::Update(u) => {
Update::parse(u, None).unwrap();
}
}
}
})
});
}
criterion_group!(store, sparql_parsing, store_query_and_update, store_load);
criterion_main!(store);
fn read_data(file: &str) -> impl BufRead {
if !Path::new(file).exists() {
let mut client = oxhttp::Client::new();
client.set_redirection_limit(5);
let url = format!("https://github.com/Tpt/bsbm-tools/releases/download/v0.2/{file}");
let request = Request::builder(Method::GET, url.parse().unwrap()).build();
let response = client.request(request).unwrap();
assert_eq!(
response.status(),
Status::OK,
"{}",
response.into_body().to_string().unwrap()
);
std::io::copy(&mut response.into_body(), &mut File::create(file).unwrap()).unwrap();
}
BufReader::new(zstd::Decoder::new(File::open(file).unwrap()).unwrap())
}
#[derive(Clone)]
enum RawOperation {
Query(String),
Update(String),
}
#[allow(clippy::large_enum_variant)]
#[derive(Clone)]
enum Operation {
Query(Query),
Update(Update),
}
struct TempDir(PathBuf);
impl Default for TempDir {
fn default() -> Self {
Self(temp_dir().join(format!("oxigraph-bench-{}", random::<u128>())))
}
}
impl AsRef<Path> for TempDir {
fn as_ref(&self) -> &Path {
&self.0
}
}
impl Drop for TempDir {
fn drop(&mut self) {
remove_dir_all(&self.0).unwrap()
}
}

@ -1,59 +0,0 @@
[package]
name = "oxigraph"
version.workspace = true
authors.workspace = true
license.workspace = true
readme = "README.md"
keywords = ["RDF", "SPARQL", "graph-database", "database"]
categories = ["database-implementations"]
repository = "https://github.com/oxigraph/oxigraph/tree/main/lib/oxigraph"
homepage = "https://oxigraph.org/"
documentation = "https://docs.rs/oxigraph"
description = """
a SPARQL database and RDF toolkit
"""
edition.workspace = true
rust-version.workspace = true
[features]
js = ["getrandom/js", "oxsdatatypes/js", "js-sys"]
[dependencies]
digest.workspace = true
hex.workspace = true
json-event-parser.workspace = true
md-5.workspace = true
oxilangtag.workspace = true
oxiri.workspace = true
oxrdf = { workspace = true, features = ["rdf-star", "oxsdatatypes"] }
oxrdfio = { workspace = true, features = ["rdf-star"] }
oxsdatatypes.workspace = true
rand.workspace = true
regex.workspace = true
sha1.workspace = true
sha2.workspace = true
siphasher.workspace = true
sparesults = { workspace = true, features = ["rdf-star"] }
spargebra = { workspace = true, features = ["rdf-star", "sep-0002", "sep-0006"] }
sparopt = { workspace = true, features = ["rdf-star", "sep-0002", "sep-0006"] }
thiserror.workspace = true
[target.'cfg(not(target_family = "wasm"))'.dependencies]
libc = "0.2"
rocksdb.workspace = true
[target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies]
getrandom.workspace = true
js-sys = { workspace = true, optional = true }
[target.'cfg(not(target_family = "wasm"))'.dev-dependencies]
codspeed-criterion-compat.workspace = true
zstd.workspace = true
[lints]
workspace = true
[package.metadata.docs.rs]
rustdoc-args = ["--cfg", "docsrs"]

@ -1,82 +0,0 @@
Oxigraph
========
[![Latest Version](https://img.shields.io/crates/v/oxigraph.svg)](https://crates.io/crates/oxigraph)
[![Released API docs](https://docs.rs/oxigraph/badge.svg)](https://docs.rs/oxigraph)
[![Crates.io downloads](https://img.shields.io/crates/d/oxigraph)](https://crates.io/crates/oxigraph)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community)
Oxigraph is a graph database library implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard.
Its goal is to provide a compliant, safe and fast on-disk graph database.
It also provides a set of utility functions for reading, writing, and processing RDF files.
Oxigraph is in heavy development and SPARQL query evaluation has not been optimized yet.
Oxigraph also provides [a CLI tool](https://crates.io/crates/oxigraph-cli) and [a Python library](https://pyoxigraph.readthedocs.io/) based on this library.
Oxigraph implements the following specifications:
* [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/), [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/), and [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/).
* [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/), and [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) RDF serialization formats for both data ingestion and retrieval.
* [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/).
A preliminary benchmark [is provided](../bench/README.md). Oxigraph internal design [is described on the wiki](https://github.com/oxigraph/oxigraph/wiki/Architecture).
The main entry point of Oxigraph is the [`Store`](store::Store) struct:
```rust
use oxigraph::store::Store;
use oxigraph::model::*;
use oxigraph::sparql::QueryResults;
let store = Store::new().unwrap();
// insertion
let ex = NamedNode::new("http://example.com").unwrap();
let quad = Quad::new(ex.clone(), ex.clone(), ex.clone(), GraphName::DefaultGraph);
store.insert(&quad).unwrap();
// quad filter
let results = store.quads_for_pattern(Some(ex.as_ref().into()), None, None, None).collect::<Result<Vec<Quad>,_>>().unwrap();
assert_eq!(vec![quad], results);
// SPARQL query
if let QueryResults::Solutions(mut solutions) = store.query("SELECT ?s WHERE { ?s ?p ?o }").unwrap() {
assert_eq!(solutions.next().unwrap().unwrap().get("s"), Some(&ex.into()));
}
```
It is based on these crates that can be used separately:
* [`oxrdf`](https://crates.io/crates/oxrdf), datastructures encoding RDF basic concepts (the [`oxigraph::model`](crate::model) module).
* [`oxrdfio`](https://crates.io/crates/oxrdfio), a unified parser and serializer API for RDF formats (the [`oxigraph::io`](crate::io) module). It itself relies on:
* [`oxttl`](https://crates.io/crates/oxttl), N-Triple, N-Quad, Turtle, TriG and N3 parsing and serialization.
* [`oxrdfxml`](https://crates.io/crates/oxrdfxml), RDF/XML parsing and serialization.
* [`spargebra`](https://crates.io/crates/spargebra), a SPARQL parser.
* [`sparesults`](https://crates.io/crates/sparesults), parsers and serializers for SPARQL result formats (the [`oxigraph::sparql::results`](crate::sparql::results) module).
* [`sparopt`](https://crates.io/crates/sparesults), a SPARQL optimizer.
* [`oxsdatatypes`](https://crates.io/crates/oxsdatatypes), an implementation of some XML Schema datatypes.
To build the library locally, don't forget to clone the submodules using `git clone --recursive https://github.com/oxigraph/oxigraph.git` to clone the repository including submodules or `git submodule update --init` to add submodules to the already cloned repository.
It is possible to disable the RocksDB storage backend to only use the in-memory fallback by disabling the `rocksdb` default feature:
```toml
oxigraph = { version = "*", default-features = false }
```
This is the default behavior when compiling Oxigraph to WASM.
## License
This project is licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or
`<http://www.apache.org/licenses/LICENSE-2.0>`)
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or
`<http://opensource.org/licenses/MIT>`)
at your option.
### Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.

@ -1,39 +0,0 @@
//! Utilities to read and write RDF graphs and datasets using [OxRDF I/O](https://crates.io/crates/oxrdfio).
//!
//! The entry points of this module are the two [`RdfParser`] and [`RdfSerializer`] structs.
//!
//! Usage example converting a Turtle file to a N-Triples file:
//! ```
//! use oxigraph::io::{RdfFormat, RdfParser, RdfSerializer};
//!
//! let turtle_file = b"@base <http://example.com/> .
//! @prefix schema: <http://schema.org/> .
//! <foo> a schema:Person ;
//! schema:name \"Foo\" .
//! <bar> a schema:Person ;
//! schema:name \"Bar\" .";
//!
//! let ntriples_file = b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
//! <http://example.com/foo> <http://schema.org/name> \"Foo\" .
//! <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
//! <http://example.com/bar> <http://schema.org/name> \"Bar\" .
//! ";
//!
//! let mut writer = RdfSerializer::from_format(RdfFormat::NTriples).serialize_to_write(Vec::new());
//! for quad in RdfParser::from_format(RdfFormat::Turtle).parse_read(turtle_file.as_ref()) {
//! writer.write_quad(&quad.unwrap()).unwrap();
//! }
//! assert_eq!(writer.finish().unwrap(), ntriples_file);
//! ```
mod format;
pub mod read;
pub mod write;
#[allow(deprecated)]
pub use self::format::{DatasetFormat, GraphFormat};
#[allow(deprecated)]
pub use self::read::{DatasetParser, GraphParser};
#[allow(deprecated)]
pub use self::write::{DatasetSerializer, GraphSerializer};
pub use oxrdfio::*;

@ -1,199 +0,0 @@
#![allow(deprecated)]
//! Utilities to read RDF graphs and datasets.
use crate::io::{DatasetFormat, GraphFormat};
use crate::model::*;
use oxrdfio::{FromReadQuadReader, RdfParseError, RdfParser};
use std::io::Read;
/// Parsers for RDF graph serialization formats.
///
/// It currently supports the following formats:
/// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`GraphFormat::NTriples`])
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`GraphFormat::Turtle`])
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`GraphFormat::RdfXml`])
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = GraphParser::from_format(GraphFormat::NTriples);
/// let triples = parser
/// .read_triples(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(triples.len(), 1);
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[deprecated(note = "use RdfParser instead", since = "0.4.0")]
pub struct GraphParser {
inner: RdfParser,
}
impl GraphParser {
/// Builds a parser for the given format.
#[inline]
pub fn from_format(format: GraphFormat) -> Self {
Self {
inner: RdfParser::from_format(format.into())
.without_named_graphs()
.rename_blank_nodes(),
}
}
/// Provides an IRI that could be used to resolve the file relative IRIs.
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphParser};
///
/// let file = "</s> </p> </o> .";
///
/// let parser =
/// GraphParser::from_format(GraphFormat::Turtle).with_base_iri("http://example.com")?;
/// let triples = parser
/// .read_triples(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(triples.len(), 1);
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn with_base_iri(self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
Ok(Self {
inner: self.inner.with_base_iri(base_iri)?,
})
}
/// Executes the parsing itself on a [`Read`] implementation and returns an iterator of triples.
pub fn read_triples<R: Read>(self, reader: R) -> TripleReader<R> {
TripleReader {
parser: self.inner.parse_read(reader),
}
}
}
/// An iterator yielding read triples.
/// Could be built using a [`GraphParser`].
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = GraphParser::from_format(GraphFormat::NTriples);
/// let triples = parser
/// .read_triples(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(triples.len(), 1);
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[must_use]
pub struct TripleReader<R: Read> {
parser: FromReadQuadReader<R>,
}
impl<R: Read> Iterator for TripleReader<R> {
type Item = Result<Triple, RdfParseError>;
fn next(&mut self) -> Option<Self::Item> {
Some(self.parser.next()?.map(Into::into).map_err(Into::into))
}
}
/// A parser for RDF dataset serialization formats.
///
/// It currently supports the following formats:
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`DatasetFormat::NQuads`])
/// * [TriG](https://www.w3.org/TR/trig/) ([`DatasetFormat::TriG`])
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .";
///
/// let parser = DatasetParser::from_format(DatasetFormat::NQuads);
/// let quads = parser.read_quads(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
///
/// assert_eq!(quads.len(), 1);
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[deprecated(note = "use RdfParser instead", since = "0.4.0")]
pub struct DatasetParser {
inner: RdfParser,
}
impl DatasetParser {
/// Builds a parser for the given format.
#[inline]
pub fn from_format(format: DatasetFormat) -> Self {
Self {
inner: RdfParser::from_format(format.into()).rename_blank_nodes(),
}
}
/// Provides an IRI that could be used to resolve the file relative IRIs.
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetParser};
///
/// let file = "<g> { </s> </p> </o> }";
///
/// let parser =
/// DatasetParser::from_format(DatasetFormat::TriG).with_base_iri("http://example.com")?;
/// let triples = parser
/// .read_quads(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(triples.len(), 1);
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn with_base_iri(self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
Ok(Self {
inner: self.inner.with_base_iri(base_iri)?,
})
}
/// Executes the parsing itself on a [`Read`] implementation and returns an iterator of quads.
pub fn read_quads<R: Read>(self, reader: R) -> QuadReader<R> {
QuadReader {
parser: self.inner.parse_read(reader),
}
}
}
/// An iterator yielding read quads.
/// Could be built using a [`DatasetParser`].
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .";
///
/// let parser = DatasetParser::from_format(DatasetFormat::NQuads);
/// let quads = parser.read_quads(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
///
/// assert_eq!(quads.len(), 1);
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[must_use]
pub struct QuadReader<R: Read> {
parser: FromReadQuadReader<R>,
}
impl<R: Read> Iterator for QuadReader<R> {
type Item = Result<Quad, RdfParseError>;
fn next(&mut self) -> Option<Self::Item> {
Some(self.parser.next()?.map_err(Into::into))
}
}

@ -1,185 +0,0 @@
#![allow(deprecated)]
//! Utilities to write RDF graphs and datasets.
use crate::io::{DatasetFormat, GraphFormat};
use crate::model::*;
use oxrdfio::{RdfSerializer, ToWriteQuadWriter};
use std::io::{self, Write};
/// A serializer for RDF graph serialization formats.
///
/// It currently supports the following formats:
/// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`GraphFormat::NTriples`])
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`GraphFormat::Turtle`])
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`GraphFormat::RdfXml`])
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphSerializer};
/// use oxigraph::model::*;
///
/// let mut buffer = Vec::new();
/// let mut writer = GraphSerializer::from_format(GraphFormat::NTriples).triple_writer(&mut buffer);
/// writer.write(&Triple {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// })?;
/// writer.finish()?;
///
/// assert_eq!(
/// buffer.as_slice(),
/// "<http://example.com/s> <http://example.com/p> <http://example.com/o> .\n".as_bytes()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[deprecated(note = "use RdfSerializer instead", since = "0.4.0")]
pub struct GraphSerializer {
inner: RdfSerializer,
}
impl GraphSerializer {
/// Builds a serializer for the given format
#[inline]
pub fn from_format(format: GraphFormat) -> Self {
Self {
inner: RdfSerializer::from_format(format.into()),
}
}
/// Returns a [`TripleWriter`] allowing writing triples into the given [`Write`] implementation
pub fn triple_writer<W: Write>(self, write: W) -> TripleWriter<W> {
TripleWriter {
writer: self.inner.serialize_to_write(write),
}
}
}
/// Allows writing triples.
/// Could be built using a [`GraphSerializer`].
///
/// <div class="warning">
///
/// Do not forget to run the [`finish`](TripleWriter::finish()) method to properly write the last bytes of the file.</div>
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphSerializer};
/// use oxigraph::model::*;
///
/// let mut buffer = Vec::new();
/// let mut writer = GraphSerializer::from_format(GraphFormat::NTriples).triple_writer(&mut buffer);
/// writer.write(&Triple {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// })?;
/// writer.finish()?;
///
/// assert_eq!(
/// buffer.as_slice(),
/// "<http://example.com/s> <http://example.com/p> <http://example.com/o> .\n".as_bytes()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[must_use]
pub struct TripleWriter<W: Write> {
writer: ToWriteQuadWriter<W>,
}
impl<W: Write> TripleWriter<W> {
/// Writes a triple
pub fn write<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> io::Result<()> {
self.writer.write_triple(triple)
}
/// Writes the last bytes of the file
pub fn finish(self) -> io::Result<()> {
self.writer.finish()?.flush()
}
}
/// A serializer for RDF graph serialization formats.
///
/// It currently supports the following formats:
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`DatasetFormat::NQuads`])
/// * [TriG](https://www.w3.org/TR/trig/) ([`DatasetFormat::TriG`])
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetSerializer};
/// use oxigraph::model::*;
///
/// let mut buffer = Vec::new();
/// let mut writer = DatasetSerializer::from_format(DatasetFormat::NQuads).quad_writer(&mut buffer);
/// writer.write(&Quad {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// graph_name: NamedNode::new("http://example.com/g")?.into(),
/// })?;
/// writer.finish()?;
///
/// assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n".as_bytes());
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[deprecated(note = "use RdfSerializer instead", since = "0.4.0")]
pub struct DatasetSerializer {
inner: RdfSerializer,
}
impl DatasetSerializer {
/// Builds a serializer for the given format
#[inline]
pub fn from_format(format: DatasetFormat) -> Self {
Self {
inner: RdfSerializer::from_format(format.into()),
}
}
/// Returns a [`QuadWriter`] allowing writing triples into the given [`Write`] implementation
pub fn quad_writer<W: Write>(self, write: W) -> QuadWriter<W> {
QuadWriter {
writer: self.inner.serialize_to_write(write),
}
}
}
/// Allows writing triples.
/// Could be built using a [`DatasetSerializer`].
///
/// <div class="warning">
///
/// Do not forget to run the [`finish`](QuadWriter::finish()) method to properly write the last bytes of the file.</div>
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetSerializer};
/// use oxigraph::model::*;
///
/// let mut buffer = Vec::new();
/// let mut writer = DatasetSerializer::from_format(DatasetFormat::NQuads).quad_writer(&mut buffer);
/// writer.write(&Quad {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// graph_name: NamedNode::new("http://example.com/g")?.into(),
/// })?;
/// writer.finish()?;
///
/// assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n".as_bytes());
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[must_use]
pub struct QuadWriter<W: Write> {
writer: ToWriteQuadWriter<W>,
}
impl<W: Write> QuadWriter<W> {
/// Writes a quad
pub fn write<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> io::Result<()> {
self.writer.write_quad(quad)
}
/// Writes the last bytes of the file
pub fn finish(self) -> io::Result<()> {
self.writer.finish()?.flush()
}
}

@ -1,12 +0,0 @@
#![doc = include_str!("../README.md")]
#![doc(test(attr(deny(warnings))))]
#![doc(test(attr(allow(deprecated))))]
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
#![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")]
#![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")]
pub mod io;
pub mod model;
pub mod sparql;
mod storage;
pub mod store;

@ -1,22 +0,0 @@
//! Implements data structures for [RDF 1.1 Concepts](https://www.w3.org/TR/rdf11-concepts/) using [OxRDF](https://crates.io/crates/oxrdf).
//!
//! Usage example:
//!
//! ```
//! use oxigraph::model::*;
//!
//! let mut graph = Graph::default();
//!
//! // insertion
//! let ex = NamedNodeRef::new("http://example.com").unwrap();
//! let triple = TripleRef::new(ex, ex, ex);
//! graph.insert(triple);
//!
//! // simple filter
//! let results: Vec<_> = graph.triples_for_subject(ex).collect();
//! assert_eq!(vec![triple], results);
//! ```
pub use oxrdf::*;
pub use spargebra::term::GroundQuad;

@ -1,84 +0,0 @@
use crate::io::RdfParseError;
use crate::model::NamedNode;
use crate::sparql::results::QueryResultsParseError as ResultsParseError;
use crate::sparql::SparqlSyntaxError;
use crate::storage::StorageError;
use std::convert::Infallible;
use std::error::Error;
use std::io;
/// A SPARQL evaluation error.
#[derive(Debug, thiserror::Error)]
#[non_exhaustive]
pub enum EvaluationError {
/// An error in SPARQL parsing.
#[error(transparent)]
Parsing(#[from] SparqlSyntaxError),
/// An error from the storage.
#[error(transparent)]
Storage(#[from] StorageError),
/// An error while parsing an external RDF file.
#[error(transparent)]
GraphParsing(#[from] RdfParseError),
/// An error while parsing an external result file (likely from a federated query).
#[error(transparent)]
ResultsParsing(#[from] ResultsParseError),
/// An error returned during results serialization.
#[error(transparent)]
ResultsSerialization(#[from] io::Error),
/// Error during `SERVICE` evaluation
#[error("{0}")]
Service(#[source] Box<dyn Error + Send + Sync + 'static>),
/// Error when `CREATE` tries to create an already existing graph
#[error("The graph {0} already exists")]
GraphAlreadyExists(NamedNode),
/// Error when `DROP` or `CLEAR` tries to remove a not existing graph
#[error("The graph {0} does not exist")]
GraphDoesNotExist(NamedNode),
/// The variable storing the `SERVICE` name is unbound
#[error("The variable encoding the service name is unbound")]
UnboundService,
/// The given `SERVICE` is not supported
#[error("The service {0} is not supported")]
UnsupportedService(NamedNode),
/// The given content media type returned from an HTTP response is not supported (`SERVICE` and `LOAD`)
#[error("The content media type {0} is not supported")]
UnsupportedContentType(String),
/// The `SERVICE` call has not returns solutions
#[error("The service is not returning solutions but a boolean or a graph")]
ServiceDoesNotReturnSolutions,
/// The results are not a RDF graph
#[error("The query results are not a RDF graph")]
NotAGraph,
}
impl From<Infallible> for EvaluationError {
#[inline]
fn from(error: Infallible) -> Self {
match error {}
}
}
impl From<EvaluationError> for io::Error {
#[inline]
fn from(error: EvaluationError) -> Self {
match error {
EvaluationError::Parsing(error) => Self::new(io::ErrorKind::InvalidData, error),
EvaluationError::GraphParsing(error) => error.into(),
EvaluationError::ResultsParsing(error) => error.into(),
EvaluationError::ResultsSerialization(error) => error,
EvaluationError::Storage(error) => error.into(),
EvaluationError::Service(error) => match error.downcast() {
Ok(error) => *error,
Err(error) => Self::new(io::ErrorKind::Other, error),
},
EvaluationError::GraphAlreadyExists(_)
| EvaluationError::GraphDoesNotExist(_)
| EvaluationError::UnboundService
| EvaluationError::UnsupportedService(_)
| EvaluationError::UnsupportedContentType(_)
| EvaluationError::ServiceDoesNotReturnSolutions
| EvaluationError::NotAGraph => Self::new(io::ErrorKind::InvalidInput, error),
}
}
}

@ -1,9 +0,0 @@
#[cfg(not(feature = "http-client"))]
mod dummy;
#[cfg(feature = "http-client")]
mod simple;
#[cfg(not(feature = "http-client"))]
pub use dummy::Client;
#[cfg(feature = "http-client")]
pub use simple::Client;

@ -1,371 +0,0 @@
use crate::io::{RdfFormat, RdfSerializer};
use crate::model::*;
use crate::sparql::error::EvaluationError;
use crate::sparql::results::{
FromReadQueryResultsReader, FromReadSolutionsReader, QueryResultsFormat,
QueryResultsParseError, QueryResultsParser, QueryResultsSerializer,
};
pub use sparesults::QuerySolution;
use std::io::{Read, Write};
use std::sync::Arc;
/// Results of a [SPARQL query](https://www.w3.org/TR/sparql11-query/).
pub enum QueryResults {
/// Results of a [SELECT](https://www.w3.org/TR/sparql11-query/#select) query.
Solutions(QuerySolutionIter),
/// Result of a [ASK](https://www.w3.org/TR/sparql11-query/#ask) query.
Boolean(bool),
/// Results of a [CONSTRUCT](https://www.w3.org/TR/sparql11-query/#construct) or [DESCRIBE](https://www.w3.org/TR/sparql11-query/#describe) query.
Graph(QueryTripleIter),
}
impl QueryResults {
/// Reads a SPARQL query results serialization.
pub fn read(
read: impl Read + 'static,
format: QueryResultsFormat,
) -> Result<Self, QueryResultsParseError> {
Ok(QueryResultsParser::from_format(format)
.parse_read(read)?
.into())
}
/// Writes the query results (solutions or boolean).
///
/// This method fails if it is called on the `Graph` results.
///
/// ```
/// use oxigraph::store::Store;
/// use oxigraph::model::*;
/// use oxigraph::sparql::results::QueryResultsFormat;
///
/// let store = Store::new()?;
/// let ex = NamedNodeRef::new("http://example.com")?;
/// store.insert(QuadRef::new(ex, ex, ex, GraphNameRef::DefaultGraph))?;
///
/// let results = store.query("SELECT ?s WHERE { ?s ?p ?o }")?;
/// assert_eq!(
/// results.write(Vec::new(), QueryResultsFormat::Json)?,
/// r#"{"head":{"vars":["s"]},"results":{"bindings":[{"s":{"type":"uri","value":"http://example.com"}}]}}"#.as_bytes()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn write<W: Write>(
self,
write: W,
format: QueryResultsFormat,
) -> Result<W, EvaluationError> {
let serializer = QueryResultsSerializer::from_format(format);
match self {
Self::Boolean(value) => serializer.serialize_boolean_to_write(write, value),
Self::Solutions(solutions) => {
let mut writer = serializer
.serialize_solutions_to_write(write, solutions.variables().to_vec())
.map_err(EvaluationError::ResultsSerialization)?;
for solution in solutions {
writer
.write(&solution?)
.map_err(EvaluationError::ResultsSerialization)?;
}
writer.finish()
}
Self::Graph(triples) => {
let s = VariableRef::new_unchecked("subject");
let p = VariableRef::new_unchecked("predicate");
let o = VariableRef::new_unchecked("object");
let mut writer = serializer
.serialize_solutions_to_write(
write,
vec![s.into_owned(), p.into_owned(), o.into_owned()],
)
.map_err(EvaluationError::ResultsSerialization)?;
for triple in triples {
let triple = triple?;
writer
.write([
(s, &triple.subject.into()),
(p, &triple.predicate.into()),
(o, &triple.object),
])
.map_err(EvaluationError::ResultsSerialization)?;
}
writer.finish()
}
}
.map_err(EvaluationError::ResultsSerialization)
}
/// Writes the graph query results.
///
/// This method fails if it is called on the `Solution` or `Boolean` results.
///
/// ```
/// use oxigraph::io::RdfFormat;
/// use oxigraph::model::*;
/// use oxigraph::store::Store;
///
/// let graph = "<http://example.com> <http://example.com> <http://example.com> .\n";
///
/// let store = Store::new()?;
/// store.load_graph(
/// graph.as_bytes(),
/// RdfFormat::NTriples,
/// GraphName::DefaultGraph,
/// None,
/// )?;
///
/// let results = store.query("CONSTRUCT WHERE { ?s ?p ?o }")?;
/// assert_eq!(
/// results.write_graph(Vec::new(), RdfFormat::NTriples)?,
/// graph.as_bytes()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn write_graph<W: Write>(
self,
write: W,
format: impl Into<RdfFormat>,
) -> Result<W, EvaluationError> {
if let Self::Graph(triples) = self {
let mut writer = RdfSerializer::from_format(format.into()).serialize_to_write(write);
for triple in triples {
writer
.write_triple(&triple?)
.map_err(EvaluationError::ResultsSerialization)?;
}
writer
.finish()
.map_err(EvaluationError::ResultsSerialization)
} else {
Err(EvaluationError::NotAGraph)
}
}
}
impl From<QuerySolutionIter> for QueryResults {
#[inline]
fn from(value: QuerySolutionIter) -> Self {
Self::Solutions(value)
}
}
impl<R: Read + 'static> From<FromReadQueryResultsReader<R>> for QueryResults {
fn from(reader: FromReadQueryResultsReader<R>) -> Self {
match reader {
FromReadQueryResultsReader::Solutions(s) => Self::Solutions(s.into()),
FromReadQueryResultsReader::Boolean(v) => Self::Boolean(v),
}
}
}
/// An iterator over [`QuerySolution`]s.
///
/// ```
/// use oxigraph::sparql::QueryResults;
/// use oxigraph::store::Store;
///
/// let store = Store::new()?;
/// if let QueryResults::Solutions(solutions) = store.query("SELECT ?s WHERE { ?s ?p ?o }")? {
/// for solution in solutions {
/// println!("{:?}", solution?.get("s"));
/// }
/// }
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct QuerySolutionIter {
variables: Arc<[Variable]>,
iter: Box<dyn Iterator<Item = Result<QuerySolution, EvaluationError>>>,
}
impl QuerySolutionIter {
/// Construct a new iterator of solution from an ordered list of solution variables and an iterator of solution tuples
/// (each tuple using the same ordering as the variable list such that tuple element 0 is the value for the variable 0...)
pub fn new(
variables: Arc<[Variable]>,
iter: impl Iterator<Item = Result<Vec<Option<Term>>, EvaluationError>> + 'static,
) -> Self {
Self {
variables: Arc::clone(&variables),
iter: Box::new(
iter.map(move |t| t.map(|values| (Arc::clone(&variables), values).into())),
),
}
}
/// The variables used in the solutions.
///
/// ```
/// use oxigraph::sparql::{QueryResults, Variable};
/// use oxigraph::store::Store;
///
/// let store = Store::new()?;
/// if let QueryResults::Solutions(solutions) = store.query("SELECT ?s ?o WHERE { ?s ?p ?o }")? {
/// assert_eq!(
/// solutions.variables(),
/// &[Variable::new("s")?, Variable::new("o")?]
/// );
/// }
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn variables(&self) -> &[Variable] {
&self.variables
}
}
impl<R: Read + 'static> From<FromReadSolutionsReader<R>> for QuerySolutionIter {
fn from(reader: FromReadSolutionsReader<R>) -> Self {
Self {
variables: reader.variables().into(),
iter: Box::new(reader.map(|t| t.map_err(EvaluationError::from))),
}
}
}
impl Iterator for QuerySolutionIter {
type Item = Result<QuerySolution, EvaluationError>;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.iter.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
/// An iterator over the triples that compose a graph solution.
///
/// ```
/// use oxigraph::sparql::QueryResults;
/// use oxigraph::store::Store;
///
/// let store = Store::new()?;
/// if let QueryResults::Graph(triples) = store.query("CONSTRUCT WHERE { ?s ?p ?o }")? {
/// for triple in triples {
/// println!("{}", triple?);
/// }
/// }
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct QueryTripleIter {
pub(crate) iter: Box<dyn Iterator<Item = Result<Triple, EvaluationError>>>,
}
impl Iterator for QueryTripleIter {
type Item = Result<Triple, EvaluationError>;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.iter.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
#[inline]
fn fold<Acc, G>(self, init: Acc, g: G) -> Acc
where
G: FnMut(Acc, Self::Item) -> Acc,
{
self.iter.fold(init, g)
}
}
#[cfg(test)]
#[allow(clippy::panic_in_result_fn)]
mod tests {
use super::*;
use std::io::Cursor;
#[test]
fn test_serialization_roundtrip() -> Result<(), EvaluationError> {
use std::str;
for format in [
QueryResultsFormat::Json,
QueryResultsFormat::Xml,
QueryResultsFormat::Tsv,
] {
let results = vec![
QueryResults::Boolean(true),
QueryResults::Boolean(false),
QueryResults::Solutions(QuerySolutionIter::new(
[
Variable::new_unchecked("foo"),
Variable::new_unchecked("bar"),
]
.as_ref()
.into(),
Box::new(
vec![
Ok(vec![None, None]),
Ok(vec![
Some(NamedNode::new_unchecked("http://example.com").into()),
None,
]),
Ok(vec![
None,
Some(NamedNode::new_unchecked("http://example.com").into()),
]),
Ok(vec![
Some(BlankNode::new_unchecked("foo").into()),
Some(BlankNode::new_unchecked("bar").into()),
]),
Ok(vec![Some(Literal::new_simple_literal("foo").into()), None]),
Ok(vec![
Some(
Literal::new_language_tagged_literal_unchecked("foo", "fr")
.into(),
),
None,
]),
Ok(vec![
Some(Literal::from(1).into()),
Some(Literal::from(true).into()),
]),
Ok(vec![
Some(Literal::from(1.33).into()),
Some(Literal::from(false).into()),
]),
Ok(vec![
Some(
Triple::new(
NamedNode::new_unchecked("http://example.com/s"),
NamedNode::new_unchecked("http://example.com/p"),
Triple::new(
NamedNode::new_unchecked("http://example.com/os"),
NamedNode::new_unchecked("http://example.com/op"),
NamedNode::new_unchecked("http://example.com/oo"),
),
)
.into(),
),
None,
]),
]
.into_iter(),
),
)),
];
for ex in results {
let mut buffer = Vec::new();
ex.write(&mut buffer, format)?;
let ex2 = QueryResults::read(Cursor::new(buffer.clone()), format)?;
let mut buffer2 = Vec::new();
ex2.write(&mut buffer2, format)?;
assert_eq!(
str::from_utf8(&buffer).unwrap(),
str::from_utf8(&buffer2).unwrap()
);
}
}
Ok(())
}
}

@ -1,44 +0,0 @@
//! Utilities to read and write RDF results formats using [sparesults](https://crates.io/crates/sparesults).
//!
//! It supports [SPARQL Query Results XML Format (Second Edition)](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/).
//!
//! Usage example converting a JSON result file into a TSV result file:
//!
//! ```
//! use oxigraph::sparql::results::{QueryResultsFormat, QueryResultsParser, FromReadQueryResultsReader, QueryResultsSerializer};
//! use std::io::Result;
//!
//! fn convert_json_to_tsv(json_file: &[u8]) -> Result<Vec<u8>> {
//! let json_parser = QueryResultsParser::from_format(QueryResultsFormat::Json);
//! let tsv_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Tsv);
//! // We start to read the JSON file and see which kind of results it is
//! match json_parser.parse_read(json_file)? {
//! FromReadQueryResultsReader::Boolean(value) => {
//! // it's a boolean result, we copy it in TSV to the output buffer
//! tsv_serializer.serialize_boolean_to_write(Vec::new(), value)
//! }
//! FromReadQueryResultsReader::Solutions(solutions_reader) => {
//! // it's a set of solutions, we create a writer and we write to it while reading in streaming from the JSON file
//! let mut serialize_solutions_to_write = tsv_serializer.serialize_solutions_to_write(Vec::new(), solutions_reader.variables().to_vec())?;
//! for solution in solutions_reader {
//! serialize_solutions_to_write.write(&solution?)?;
//! }
//! serialize_solutions_to_write.finish()
//! }
//! }
//! }
//!
//! // Let's test with a boolean
//! assert_eq!(
//! convert_json_to_tsv(br#"{"boolean":true}"#.as_slice()).unwrap(),
//! b"true"
//! );
//!
//! // And with a set of solutions
//! assert_eq!(
//! convert_json_to_tsv(br#"{"head":{"vars":["foo","bar"]},"results":{"bindings":[{"foo":{"type":"literal","value":"test"}}]}}"#.as_slice()).unwrap(),
//! b"?foo\t?bar\n\"test\"\t\n"
//! );
//! ```
pub use sparesults::*;

@ -1,12 +0,0 @@
//! A storage backend
//! RocksDB is available, if not in memory
#[cfg(any(target_family = "wasm"))]
pub use fallback::{ColumnFamily, ColumnFamilyDefinition, Db, Iter, Reader, Transaction};
#[cfg(all(not(target_family = "wasm")))]
pub use oxi_rocksdb::{ColumnFamily, ColumnFamilyDefinition, Db, Iter, Reader, Transaction};
#[cfg(any(target_family = "wasm"))]
mod fallback;
#[cfg(all(not(target_family = "wasm")))]
mod oxi_rocksdb;

@ -1,139 +0,0 @@
use crate::io::{RdfFormat, RdfParseError};
use crate::storage::numeric_encoder::EncodedTerm;
use oxiri::IriParseError;
use oxrdf::TermRef;
use std::error::Error;
use std::io;
/// An error related to storage operations (reads, writes...).
#[derive(Debug, thiserror::Error)]
#[non_exhaustive]
pub enum StorageError {
/// Error from the OS I/O layer.
#[error(transparent)]
Io(#[from] io::Error),
/// Error related to data corruption.
#[error(transparent)]
Corruption(#[from] CorruptionError),
#[doc(hidden)]
#[error("{0}")]
Other(#[source] Box<dyn Error + Send + Sync + 'static>),
}
impl From<StorageError> for io::Error {
#[inline]
fn from(error: StorageError) -> Self {
match error {
StorageError::Io(error) => error,
StorageError::Corruption(error) => error.into(),
StorageError::Other(error) => Self::new(io::ErrorKind::Other, error),
}
}
}
/// An error return if some content in the database is corrupted.
#[derive(Debug, thiserror::Error)]
#[error(transparent)]
pub struct CorruptionError(#[from] CorruptionErrorKind);
/// An error return if some content in the database is corrupted.
#[derive(Debug, thiserror::Error)]
enum CorruptionErrorKind {
#[error("{0}")]
Msg(String),
#[error("{0}")]
Other(#[source] Box<dyn Error + Send + Sync + 'static>),
}
impl CorruptionError {
/// Builds an error from a printable error message.
#[inline]
pub(crate) fn new(error: impl Into<Box<dyn Error + Send + Sync + 'static>>) -> Self {
Self(CorruptionErrorKind::Other(error.into()))
}
#[inline]
pub(crate) fn from_encoded_term(encoded: &EncodedTerm, term: &TermRef<'_>) -> Self {
// TODO: eventually use a dedicated error enum value
Self::msg(format!("Invalid term encoding {encoded:?} for {term}"))
}
#[inline]
pub(crate) fn from_missing_column_family_name(name: &'static str) -> Self {
// TODO: eventually use a dedicated error enum value
Self::msg(format!("Column family {name} does not exist"))
}
/// Builds an error from a printable error message.
#[inline]
pub(crate) fn msg(msg: impl Into<String>) -> Self {
Self(CorruptionErrorKind::Msg(msg.into()))
}
}
impl From<CorruptionError> for io::Error {
#[inline]
fn from(error: CorruptionError) -> Self {
Self::new(io::ErrorKind::InvalidData, error)
}
}
/// An error raised while loading a file into a [`Store`](crate::store::Store).
#[derive(Debug, thiserror::Error)]
pub enum LoaderError {
/// An error raised while reading the file.
#[error(transparent)]
Parsing(#[from] RdfParseError),
/// An error raised during the insertion in the store.
#[error(transparent)]
Storage(#[from] StorageError),
/// The base IRI is invalid.
#[error("Invalid base IRI '{iri}': {error}")]
InvalidBaseIri {
/// The IRI itself.
iri: String,
/// The parsing error.
#[source]
error: IriParseError,
},
}
impl From<LoaderError> for io::Error {
#[inline]
fn from(error: LoaderError) -> Self {
match error {
LoaderError::Storage(error) => error.into(),
LoaderError::Parsing(error) => error.into(),
LoaderError::InvalidBaseIri { .. } => {
Self::new(io::ErrorKind::InvalidInput, error.to_string())
}
}
}
}
/// An error raised while writing a file from a [`Store`](crate::store::Store).
#[derive(Debug, thiserror::Error)]
pub enum SerializerError {
/// An error raised while writing the content.
#[error(transparent)]
Io(#[from] io::Error),
/// An error raised during the lookup in the store.
#[error(transparent)]
Storage(#[from] StorageError),
/// A format compatible with [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) is required.
#[error("A RDF format supporting datasets was expected, {0} found")]
DatasetFormatExpected(RdfFormat),
}
impl From<SerializerError> for io::Error {
#[inline]
fn from(error: SerializerError) -> Self {
match error {
SerializerError::Storage(error) => error.into(),
SerializerError::Io(error) => error,
SerializerError::DatasetFormatExpected(_) => {
Self::new(io::ErrorKind::InvalidInput, error.to_string())
}
}
}
}

@ -1,33 +1,28 @@
[package]
name = "oxrdf"
version = "0.2.0-alpha.4"
authors.workspace = true
license.workspace = true
version = "0.1.7"
authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license = "MIT OR Apache-2.0"
readme = "README.md"
keywords = ["RDF"]
repository = "https://github.com/oxigraph/oxigraph/tree/main/lib/oxrdf"
homepage = "https://oxigraph.org/"
description = """
A library providing basic data structures related to RDF
"""
documentation = "https://docs.rs/oxrdf"
edition.workspace = true
rust-version.workspace = true
edition = "2021"
rust-version = "1.60"
[features]
default = []
rdf-star = []
[dependencies]
oxilangtag.workspace = true
oxiri.workspace = true
oxsdatatypes = { workspace = true, optional = true }
rand.workspace = true
thiserror.workspace = true
serde.workspace = true
[lints]
workspace = true
rand = "0.8"
oxilangtag = "0.1"
oxiri = "0.2"
oxsdatatypes = { version = "0.1.3", path="../oxsdatatypes", optional = true }
[package.metadata.docs.rs]
all-features = true
rustdoc-args = ["--cfg", "docsrs"]

@ -5,7 +5,7 @@ OxRDF
[![Released API docs](https://docs.rs/oxrdf/badge.svg)](https://docs.rs/oxrdf)
[![Crates.io downloads](https://img.shields.io/crates/d/oxrdf)](https://crates.io/crates/oxrdf)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
OxRDF is a simple library providing datastructures encoding [RDF 1.1 concepts](https://www.w3.org/TR/rdf11-concepts/).
@ -15,8 +15,6 @@ Support for [RDF-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html) i
OxRDF is inspired by [RDF/JS](https://rdf.js.org/data-model-spec/) and [Apache Commons RDF](http://commons.apache.org/proper/commons-rdf/).
Use [`oxrdfio`](https://crates.io/crates/oxrdfio) if you need to read or write RDF files.
Usage example:
```rust

@ -1,7 +1,8 @@
use rand::random;
use serde::{Deserialize, Serialize};
use std::error::Error;
use std::fmt;
use std::io::Write;
use std::{fmt, str};
use std::str;
/// An owned RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node).
///
@ -14,13 +15,16 @@ use std::{fmt, str};
/// ```
/// use oxrdf::BlankNode;
///
/// assert_eq!("_:a122", BlankNode::new("a122")?.to_string());
/// assert_eq!(
/// "_:a122",
/// BlankNode::new("a122")?.to_string()
/// );
/// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(())
/// ```
#[derive(Eq, PartialEq, Debug, Clone, Hash, Serialize, Deserialize)]
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct BlankNode(BlankNodeContent);
#[derive(PartialEq, Eq, Debug, Clone, Hash, Serialize, Deserialize)]
#[derive(PartialEq, Eq, Debug, Clone, Hash)]
enum BlankNodeContent {
Named(String),
Anonymous { id: u128, str: IdStr },
@ -32,7 +36,7 @@ impl BlankNode {
/// The blank node identifier must be valid according to N-Triples, Turtle, and SPARQL grammars.
///
/// In most cases, it is much more convenient to create a blank node using [`BlankNode::default()`]
/// that creates a random ID that could be easily inlined by Oxigraph stores.
///that creates a random ID that could be easily inlined by Oxigraph stores.
pub fn new(id: impl Into<String>) -> Result<Self, BlankNodeIdParseError> {
let id = id.into();
validate_blank_node_identifier(&id)?;
@ -107,14 +111,7 @@ impl Default for BlankNode {
/// Builds a new RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a unique id.
#[inline]
fn default() -> Self {
// We ensure the ID does not start with a number to be also valid with RDF/XML
loop {
let id = random();
let str = IdStr::new(id);
if matches!(str.as_str().as_bytes().first(), Some(b'a'..=b'f')) {
return Self(BlankNodeContent::Anonymous { id, str });
}
}
Self::new_from_unique_id(random::<u128>())
}
}
@ -129,7 +126,10 @@ impl Default for BlankNode {
/// ```
/// use oxrdf::BlankNodeRef;
///
/// assert_eq!("_:a122", BlankNodeRef::new("a122")?.to_string());
/// assert_eq!(
/// "_:a122",
/// BlankNodeRef::new("a122")?.to_string()
/// );
/// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(())
/// ```
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
@ -173,7 +173,7 @@ impl<'a> BlankNodeRef<'a> {
/// Returns the underlying ID of this blank node.
#[inline]
pub const fn as_str(self) -> &'a str {
pub fn as_str(self) -> &'a str {
match self.0 {
BlankNodeRefContent::Named(id) => id,
BlankNodeRefContent::Anonymous { str, .. } => str,
@ -185,15 +185,12 @@ impl<'a> BlankNodeRef<'a> {
/// ```
/// use oxrdf::BlankNode;
///
/// assert_eq!(
/// BlankNode::new_from_unique_id(128).as_ref().unique_id(),
/// Some(128)
/// );
/// assert_eq!(BlankNode::new_from_unique_id(128).as_ref().unique_id(), Some(128));
/// assert_eq!(BlankNode::new("foo")?.as_ref().unique_id(), None);
/// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(())
/// ```
#[inline]
pub const fn unique_id(&self) -> Option<u128> {
pub fn unique_id(&self) -> Option<u128> {
match self.0 {
BlankNodeRefContent::Named(_) => None,
BlankNodeRefContent::Anonymous { id, .. } => Some(id),
@ -247,7 +244,7 @@ impl PartialEq<BlankNodeRef<'_>> for BlankNode {
}
}
#[derive(PartialEq, Eq, Debug, Clone, Hash, Serialize, Deserialize)]
#[derive(PartialEq, Eq, Debug, Clone, Hash)]
struct IdStr([u8; 32]);
impl IdStr {
@ -267,7 +264,7 @@ impl IdStr {
fn validate_blank_node_identifier(id: &str) -> Result<(), BlankNodeIdParseError> {
let mut chars = id.chars();
let front = chars.next().ok_or(BlankNodeIdParseError)?;
let front = chars.next().ok_or(BlankNodeIdParseError {})?;
match front {
'0'..='9'
| '_'
@ -286,7 +283,7 @@ fn validate_blank_node_identifier(id: &str) -> Result<(), BlankNodeIdParseError>
| '\u{F900}'..='\u{FDCF}'
| '\u{FDF0}'..='\u{FFFD}'
| '\u{10000}'..='\u{EFFFF}' => (),
_ => return Err(BlankNodeIdParseError),
_ => return Err(BlankNodeIdParseError {}),
}
for c in chars {
match c {
@ -312,13 +309,13 @@ fn validate_blank_node_identifier(id: &str) -> Result<(), BlankNodeIdParseError>
| '\u{F900}'..='\u{FDCF}'
| '\u{FDF0}'..='\u{FFFD}'
| '\u{10000}'..='\u{EFFFF}' => (),
_ => return Err(BlankNodeIdParseError),
_ => return Err(BlankNodeIdParseError {}),
}
}
// Could not end with a dot
if id.ends_with('.') {
Err(BlankNodeIdParseError)
Err(BlankNodeIdParseError {})
} else {
Ok(())
}
@ -345,12 +342,19 @@ fn to_integer_id(id: &str) -> Option<u128> {
}
/// An error raised during [`BlankNode`] IDs validation.
#[derive(Debug, thiserror::Error)]
#[error("The blank node identifier is invalid")]
pub struct BlankNodeIdParseError;
#[derive(Debug)]
pub struct BlankNodeIdParseError {}
impl fmt::Display for BlankNodeIdParseError {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "The blank node identifier is invalid")
}
}
impl Error for BlankNodeIdParseError {}
#[cfg(test)]
#[allow(clippy::panic_in_result_fn)]
mod tests {
use super::*;
@ -368,13 +372,13 @@ mod tests {
#[test]
fn new_validation() {
BlankNode::new("").unwrap_err();
BlankNode::new("a").unwrap();
BlankNode::new("-").unwrap_err();
BlankNode::new("a-").unwrap();
BlankNode::new(".").unwrap_err();
BlankNode::new("a.").unwrap_err();
BlankNode::new("a.a").unwrap();
assert!(BlankNode::new("").is_err());
assert!(BlankNode::new("a").is_ok());
assert!(BlankNode::new("-").is_err());
assert!(BlankNode::new("a-").is_ok());
assert!(BlankNode::new(".").is_err());
assert!(BlankNode::new("a.").is_err());
assert!(BlankNode::new("a.a").is_ok());
}
#[test]

@ -20,28 +20,26 @@
//! assert_eq!(vec![TripleRef::new(ex, ex, ex)], results);
//!
//! // Print
//! assert_eq!(
//! dataset.to_string(),
//! "<http://example.com> <http://example.com> <http://example.com> <http://example.com> .\n"
//! );
//! assert_eq!(dataset.to_string(), "<http://example.com> <http://example.com> <http://example.com> <http://example.com> .\n");
//! # Result::<_,Box<dyn std::error::Error>>::Ok(())
//! ```
//!
//! See also [`Graph`] if you only care about plain triples.
use crate::interning::*;
use crate::SubjectRef;
use crate::*;
use std::collections::hash_map::DefaultHasher;
use std::collections::{BTreeSet, HashMap, HashSet};
use std::collections::BTreeSet;
use std::collections::{HashMap, HashSet};
use std::fmt;
use std::hash::{Hash, Hasher};
/// An in-memory [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset).
///
/// It can accommodate a fairly large number of quads (in the few millions).
///
/// <div class="warning">It interns the strings and does not do any garbage collection yet:
/// if you insert and remove a lot of different terms, memory will grow without any reduction.</div>
/// Beware: it interns the string and does not do any garbage collection yet:
/// if you insert and remove a lot of different terms, memory will grow without any reduction.
///
/// Usage example:
/// ```
@ -63,7 +61,7 @@ use std::hash::{Hash, Hasher};
/// assert_eq!(vec![TripleRef::new(ex, ex, ex)], results);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[derive(Debug, Default, Clone)]
#[derive(Debug, Default)]
pub struct Dataset {
interner: Interner,
gspo: BTreeSet<(
@ -185,7 +183,6 @@ impl Dataset {
.map(move |q| self.decode_spog(q))
}
#[allow(clippy::map_identity)]
fn interned_quads_for_subject(
&self,
subject: &InternedSubject,
@ -296,18 +293,6 @@ impl Dataset {
.map(|(o, s, p, g)| (s, p, o, g))
}
pub fn quads_for_graph_name<'a, 'b>(
&'a self,
graph_name: impl Into<GraphNameRef<'b>>,
) -> impl Iterator<Item = QuadRef<'a>> + 'a {
let graph_name = self
.encoded_graph_name(graph_name)
.unwrap_or_else(InternedGraphName::impossible);
self.interned_quads_for_graph_name(&graph_name)
.map(move |q| self.decode_spog(q))
}
fn interned_quads_for_graph_name(
&self,
graph_name: &InternedGraphName,
@ -504,11 +489,11 @@ impl Dataset {
}
}
/// Canonicalizes the dataset by renaming blank nodes.
/// Applies on the dataset the canonicalization process described in
/// [Canonical Forms for Isomorphic and Equivalent RDF Graphs: Algorithms for Leaning and Labelling Blank Nodes, Aidan Hogan, 2017](http://aidanhogan.com/docs/rdf-canonicalisation.pdf).
///
/// Usage example ([Dataset isomorphism](https://www.w3.org/TR/rdf11-concepts/#dfn-dataset-isomorphism)):
/// ```
/// use oxrdf::dataset::CanonicalizationAlgorithm;
/// use oxrdf::*;
///
/// let iri = NamedNodeRef::new("http://example.com")?;
@ -526,59 +511,29 @@ impl Dataset {
/// graph2.insert(QuadRef::new(&bnode2, iri, iri, &g2));
///
/// assert_ne!(graph1, graph2);
/// graph1.canonicalize(CanonicalizationAlgorithm::Unstable);
/// graph2.canonicalize(CanonicalizationAlgorithm::Unstable);
/// graph1.canonicalize();
/// graph2.canonicalize();
/// assert_eq!(graph1, graph2);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
///
/// <div class="warning">Blank node ids depends on the current shape of the graph. Adding a new quad might change the ids of a lot of blank nodes.
/// Hence, this canonization might not be suitable for diffs.</div>
/// Warning 1: Blank node ids depends on the current shape of the graph. Adding a new quad might change the ids of a lot of blank nodes.
/// Hence, this canonization might not be suitable for diffs.
///
/// Warning 2: The canonicalization algorithm is not stable and canonical blank node ids might change between Oxigraph version.
///
/// <div class="warning">This implementation worst-case complexity is in *O(b!)* with *b* the number of blank nodes in the input dataset.</div>
pub fn canonicalize(&mut self, algorithm: CanonicalizationAlgorithm) {
let bnode_mapping = self.canonicalize_interned_blank_nodes(algorithm);
let new_quads = self.map_blank_nodes(&bnode_mapping);
/// Warning 3: This implementation worst-case complexity is in *O(b!)* with *b* the number of blank nodes in the input dataset.
pub fn canonicalize(&mut self) {
let bnodes = self.blank_nodes();
let (hash, partition) =
self.hash_bnodes(bnodes.into_iter().map(|bnode| (bnode, 0)).collect());
let new_quads = self.distinguish(&hash, &partition);
self.clear();
for quad in new_quads {
self.insert_encoded(quad);
}
}
/// Returns a map between the current dataset blank node and the canonicalized blank node
/// to create a canonical dataset.
///
/// See also [`canonicalize`](Self::canonicalize).
pub fn canonicalize_blank_nodes(
&self,
algorithm: CanonicalizationAlgorithm,
) -> HashMap<BlankNodeRef<'_>, BlankNode> {
self.canonicalize_interned_blank_nodes(algorithm)
.into_iter()
.map(|(from, to)| (from.decode_from(&self.interner), to))
.collect()
}
fn canonicalize_interned_blank_nodes(
&self,
algorithm: CanonicalizationAlgorithm,
) -> HashMap<InternedBlankNode, BlankNode> {
match algorithm {
CanonicalizationAlgorithm::Unstable => {
let bnodes = self.blank_nodes();
let quads_per_blank_node = self.quads_per_blank_nodes();
let (hash, partition) = self.hash_bnodes(
bnodes.into_iter().map(|bnode| (bnode, 0)).collect(),
&quads_per_blank_node,
);
self.distinguish(hash, &partition, &quads_per_blank_node)
.into_iter()
.map(|(from, to)| (from, BlankNode::new_from_unique_id(to.into())))
.collect()
}
}
}
fn blank_nodes(&self) -> HashSet<InternedBlankNode> {
let mut bnodes = HashSet::new();
for (g, s, _, o) in &self.gspo {
@ -617,172 +572,107 @@ impl Dataset {
}
}
fn quads_per_blank_nodes(&self) -> QuadsPerBlankNode {
let mut map: HashMap<_, Vec<_>> = HashMap::new();
for quad in &self.spog {
if let InternedSubject::BlankNode(bnode) = &quad.0 {
map.entry(*bnode).or_default().push(quad.clone());
}
#[cfg(feature = "rdf-star")]
if let InternedSubject::Triple(t) = &quad.0 {
Self::add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(quad, t, &mut map);
}
if let InternedTerm::BlankNode(bnode) = &quad.2 {
map.entry(*bnode).or_default().push(quad.clone());
}
#[cfg(feature = "rdf-star")]
if let InternedTerm::Triple(t) = &quad.2 {
Self::add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(quad, t, &mut map);
}
if let InternedGraphName::BlankNode(bnode) = &quad.3 {
map.entry(*bnode).or_default().push(quad.clone());
}
}
map
}
#[cfg(feature = "rdf-star")]
fn add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(
quad: &(
InternedSubject,
InternedNamedNode,
InternedTerm,
InternedGraphName,
),
triple: &InternedTriple,
map: &mut QuadsPerBlankNode,
) {
if let InternedSubject::BlankNode(bnode) = &triple.subject {
map.entry(*bnode).or_default().push(quad.clone());
}
if let InternedSubject::Triple(t) = &triple.subject {
Self::add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(quad, t, map);
}
if let InternedTerm::BlankNode(bnode) = &triple.object {
map.entry(*bnode).or_default().push(quad.clone());
}
if let InternedTerm::Triple(t) = &triple.object {
Self::add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(quad, t, map);
}
}
fn hash_bnodes(
&self,
mut hashes: HashMap<InternedBlankNode, u64>,
quads_per_blank_node: &QuadsPerBlankNode,
) -> (
HashMap<InternedBlankNode, u64>,
Vec<(u64, Vec<InternedBlankNode>)>,
) {
let mut to_hash = Vec::new();
let mut to_do = hashes
.keys()
.map(|bnode| (*bnode, true))
.collect::<HashMap<_, _>>();
let mut partition = HashMap::<_, Vec<_>>::with_capacity(hashes.len());
let mut old_partition_count = usize::MAX;
while old_partition_count != partition.len() {
old_partition_count = partition.len();
partition.clear();
let mut new_hashes = hashes.clone();
for bnode in hashes.keys() {
let hash = if to_do.contains_key(bnode) {
for (s, p, o, g) in &quads_per_blank_node[bnode] {
let mut partition: HashMap<u64, Vec<InternedBlankNode>> = HashMap::new();
let mut partition_len = 0;
loop {
//TODO: improve termination
let mut new_hashes = HashMap::new();
for (bnode, old_hash) in &hashes {
for (_, p, o, g) in
self.interned_quads_for_subject(&InternedSubject::BlankNode(*bnode))
{
to_hash.push((
self.hash_named_node(*p),
self.hash_term(o, &hashes),
self.hash_graph_name(g, &hashes),
0,
));
}
for (s, p, _, g) in self.interned_quads_for_object(&InternedTerm::BlankNode(*bnode))
{
to_hash.push((
self.hash_subject(s, *bnode, &hashes),
self.hash_subject(s, &hashes),
self.hash_named_node(*p),
self.hash_term(o, *bnode, &hashes),
self.hash_graph_name(g, *bnode, &hashes),
self.hash_graph_name(g, &hashes),
1,
));
}
for (s, p, o, _) in
self.interned_quads_for_graph_name(&InternedGraphName::BlankNode(*bnode))
{
to_hash.push((
self.hash_subject(s, &hashes),
self.hash_named_node(*p),
self.hash_term(o, &hashes),
2,
));
}
to_hash.sort_unstable();
let hash = Self::hash_tuple((&to_hash, hashes[bnode]));
let hash = Self::hash_tuple((old_hash, &to_hash));
to_hash.clear();
if hash == hashes[bnode] {
to_do.insert(*bnode, false);
} else {
new_hashes.insert(*bnode, hash);
}
hash
} else {
hashes[bnode]
};
partition.entry(hash).or_default().push(*bnode);
}
if partition.len() == partition_len {
let mut partition: Vec<_> = partition.into_iter().collect();
partition.sort_by(|(h1, b1), (h2, b2)| (b1.len(), h1).cmp(&(b2.len(), h2)));
return (hashes, partition);
}
hashes = new_hashes;
partition_len = partition.len();
partition.clear();
}
let mut partition: Vec<_> = partition.into_iter().collect();
partition.sort_unstable_by(|(h1, b1), (h2, b2)| (b1.len(), h1).cmp(&(b2.len(), h2)));
(hashes, partition)
}
fn hash_named_node(&self, node: InternedNamedNode) -> u64 {
Self::hash_tuple(node.decode_from(&self.interner))
}
fn hash_blank_node(
node: InternedBlankNode,
current_blank_node: InternedBlankNode,
bnodes_hash: &HashMap<InternedBlankNode, u64>,
) -> u64 {
if node == current_blank_node {
u64::MAX
} else {
bnodes_hash[&node]
}
}
fn hash_subject(
&self,
node: &InternedSubject,
current_blank_node: InternedBlankNode,
bnodes_hash: &HashMap<InternedBlankNode, u64>,
) -> u64 {
match node {
InternedSubject::NamedNode(node) => Self::hash_tuple(node.decode_from(&self.interner)),
InternedSubject::BlankNode(bnode) => {
Self::hash_blank_node(*bnode, current_blank_node, bnodes_hash)
}
#[cfg(feature = "rdf-star")]
InternedSubject::Triple(triple) => {
self.hash_triple(triple, current_blank_node, bnodes_hash)
if let InternedSubject::Triple(triple) = node {
return self.hash_triple(triple, bnodes_hash);
}
if let InternedSubject::BlankNode(bnode) = node {
bnodes_hash[bnode]
} else {
Self::hash_tuple(node.decode_from(&self.interner))
}
}
fn hash_term(
&self,
term: &InternedTerm,
current_blank_node: InternedBlankNode,
bnodes_hash: &HashMap<InternedBlankNode, u64>,
) -> u64 {
match term {
InternedTerm::NamedNode(node) => Self::hash_tuple(node.decode_from(&self.interner)),
InternedTerm::BlankNode(bnode) => {
Self::hash_blank_node(*bnode, current_blank_node, bnodes_hash)
}
InternedTerm::Literal(literal) => Self::hash_tuple(literal.decode_from(&self.interner)),
fn hash_term(&self, term: &InternedTerm, bnodes_hash: &HashMap<InternedBlankNode, u64>) -> u64 {
#[cfg(feature = "rdf-star")]
InternedTerm::Triple(triple) => {
self.hash_triple(triple, current_blank_node, bnodes_hash)
if let InternedTerm::Triple(triple) = term {
return self.hash_triple(triple, bnodes_hash);
}
if let InternedTerm::BlankNode(bnode) = term {
bnodes_hash[bnode]
} else {
Self::hash_tuple(term.decode_from(&self.interner))
}
}
fn hash_graph_name(
&self,
graph_name: &InternedGraphName,
current_blank_node: InternedBlankNode,
bnodes_hash: &HashMap<InternedBlankNode, u64>,
) -> u64 {
match graph_name {
InternedGraphName::NamedNode(node) => {
Self::hash_tuple(node.decode_from(&self.interner))
}
InternedGraphName::BlankNode(bnode) => {
Self::hash_blank_node(*bnode, current_blank_node, bnodes_hash)
}
InternedGraphName::DefaultGraph => 0,
if let InternedGraphName::BlankNode(bnode) = graph_name {
bnodes_hash[bnode]
} else {
Self::hash_tuple(graph_name.decode_from(&self.interner))
}
}
@ -790,13 +680,12 @@ impl Dataset {
fn hash_triple(
&self,
triple: &InternedTriple,
current_blank_node: InternedBlankNode,
bnodes_hash: &HashMap<InternedBlankNode, u64>,
) -> u64 {
Self::hash_tuple((
self.hash_subject(&triple.subject, current_blank_node, bnodes_hash),
self.hash_subject(&triple.subject, bnodes_hash),
self.hash_named_node(triple.predicate),
self.hash_term(&triple.object, current_blank_node, bnodes_hash),
self.hash_term(&triple.object, bnodes_hash),
))
}
@ -807,43 +696,46 @@ impl Dataset {
}
fn distinguish(
&self,
hash: HashMap<InternedBlankNode, u64>,
&mut self,
hash: &HashMap<InternedBlankNode, u64>,
partition: &[(u64, Vec<InternedBlankNode>)],
quads_per_blank_node: &QuadsPerBlankNode,
) -> HashMap<InternedBlankNode, u64> {
let b_prime = partition.iter().map(|(_, b)| b).find(|b| b.len() > 1);
) -> Vec<(
InternedSubject,
InternedNamedNode,
InternedTerm,
InternedGraphName,
)> {
let b_prime = partition.iter().find_map(|(_, b)| (b.len() > 1).then(|| b));
if let Some(b_prime) = b_prime {
b_prime
.iter()
.map(|b| {
let mut hash_prime = hash.clone();
hash_prime.insert(*b, Self::hash_tuple((hash_prime[b], 22)));
let (hash_prime_prime, partition_prime) =
self.hash_bnodes(hash_prime, quads_per_blank_node);
self.distinguish(hash_prime_prime, &partition_prime, quads_per_blank_node)
let (hash_prime_prime, partition_prime) = self.hash_bnodes(hash_prime);
self.distinguish(&hash_prime_prime, &partition_prime)
})
.reduce(|a, b| {
let mut a_hashes = a.values().collect::<Vec<_>>();
a_hashes.sort();
let mut b_hashes = a.values().collect::<Vec<_>>();
b_hashes.sort();
if a_hashes <= b_hashes {
.fold(None, |a, b| {
Some(if let Some(a) = a {
if a <= b {
a
} else {
b
}
} else {
b
})
})
.unwrap_or_default()
} else {
hash
self.label(hash)
}
}
#[allow(clippy::needless_collect)]
fn map_blank_nodes(
fn label(
&mut self,
bnode_mapping: &HashMap<InternedBlankNode, BlankNode>,
hashes: &HashMap<InternedBlankNode, u64>,
) -> Vec<(
InternedSubject,
InternedNamedNode,
@ -851,81 +743,103 @@ impl Dataset {
InternedGraphName,
)> {
let old_quads: Vec<_> = self.spog.iter().cloned().collect();
old_quads
let mut quads: Vec<_> = old_quads
.into_iter()
.map(|(s, p, o, g)| {
(
match s {
InternedSubject::NamedNode(_) => s,
InternedSubject::BlankNode(bnode) => {
InternedSubject::BlankNode(InternedBlankNode::encoded_into(
bnode_mapping[&bnode].as_ref(),
&mut self.interner,
))
}
if let InternedSubject::BlankNode(bnode) = s {
InternedSubject::BlankNode(self.map_bnode(bnode, hashes))
} else {
#[cfg(feature = "rdf-star")]
InternedSubject::Triple(triple) => {
{
if let InternedSubject::Triple(triple) = s {
InternedSubject::Triple(Box::new(InternedTriple::encoded_into(
self.map_triple_blank_nodes(&triple, bnode_mapping).as_ref(),
self.label_triple(&triple, hashes).as_ref(),
&mut self.interner,
)))
} else {
s
}
}
#[cfg(not(feature = "rdf-star"))]
{
s
}
},
p,
match o {
InternedTerm::NamedNode(_) | InternedTerm::Literal(_) => o,
InternedTerm::BlankNode(bnode) => {
InternedTerm::BlankNode(InternedBlankNode::encoded_into(
bnode_mapping[&bnode].as_ref(),
&mut self.interner,
))
}
if let InternedTerm::BlankNode(bnode) = o {
InternedTerm::BlankNode(self.map_bnode(bnode, hashes))
} else {
#[cfg(feature = "rdf-star")]
InternedTerm::Triple(triple) => {
{
if let InternedTerm::Triple(triple) = o {
InternedTerm::Triple(Box::new(InternedTriple::encoded_into(
self.map_triple_blank_nodes(&triple, bnode_mapping).as_ref(),
self.label_triple(&triple, hashes).as_ref(),
&mut self.interner,
)))
} else {
o
}
},
match g {
InternedGraphName::NamedNode(_) | InternedGraphName::DefaultGraph => g,
InternedGraphName::BlankNode(bnode) => {
InternedGraphName::BlankNode(InternedBlankNode::encoded_into(
bnode_mapping[&bnode].as_ref(),
&mut self.interner,
))
}
#[cfg(not(feature = "rdf-star"))]
{
o
}
},
if let InternedGraphName::BlankNode(bnode) = g {
InternedGraphName::BlankNode(self.map_bnode(bnode, hashes))
} else {
g
},
)
})
.collect()
.collect();
quads.sort();
quads
}
#[cfg(feature = "rdf-star")]
fn map_triple_blank_nodes(
fn label_triple(
&mut self,
triple: &InternedTriple,
bnode_mapping: &HashMap<InternedBlankNode, BlankNode>,
hashes: &HashMap<InternedBlankNode, u64>,
) -> Triple {
Triple {
subject: if let InternedSubject::BlankNode(bnode) = &triple.subject {
bnode_mapping[bnode].clone().into()
Self::gen_bnode(*bnode, hashes).into()
} else if let InternedSubject::Triple(t) = &triple.subject {
self.map_triple_blank_nodes(t, bnode_mapping).into()
self.label_triple(t, hashes).into()
} else {
triple.subject.decode_from(&self.interner).into_owned()
},
predicate: triple.predicate.decode_from(&self.interner).into_owned(),
object: if let InternedTerm::BlankNode(bnode) = &triple.object {
bnode_mapping[bnode].clone().into()
Self::gen_bnode(*bnode, hashes).into()
} else if let InternedTerm::Triple(t) = &triple.object {
self.map_triple_blank_nodes(t, bnode_mapping).into()
self.label_triple(t, hashes).into()
} else {
triple.object.decode_from(&self.interner).into_owned()
},
}
}
fn map_bnode(
&mut self,
old_bnode: InternedBlankNode,
hashes: &HashMap<InternedBlankNode, u64>,
) -> InternedBlankNode {
InternedBlankNode::encoded_into(
Self::gen_bnode(old_bnode, hashes).as_ref(),
&mut self.interner,
)
}
fn gen_bnode(
old_bnode: InternedBlankNode,
hashes: &HashMap<InternedBlankNode, u64>,
) -> BlankNode {
BlankNode::new_from_unique_id(hashes[&old_bnode].into())
}
}
impl PartialEq for Dataset {
@ -948,7 +862,7 @@ impl<'a> IntoIterator for &'a Dataset {
type Item = QuadRef<'a>;
type IntoIter = Iter<'a>;
fn into_iter(self) -> Self::IntoIter {
fn into_iter(self) -> Iter<'a> {
self.iter()
}
}
@ -1306,7 +1220,7 @@ impl<'a> IntoIterator for GraphView<'a> {
type Item = TripleRef<'a>;
type IntoIter = GraphViewIter<'a>;
fn into_iter(self) -> Self::IntoIter {
fn into_iter(self) -> GraphViewIter<'a> {
self.iter()
}
}
@ -1315,7 +1229,7 @@ impl<'a, 'b> IntoIterator for &'b GraphView<'a> {
type Item = TripleRef<'a>;
type IntoIter = GraphViewIter<'a>;
fn into_iter(self) -> Self::IntoIter {
fn into_iter(self) -> GraphViewIter<'a> {
self.iter()
}
}
@ -1517,7 +1431,7 @@ impl<'a> IntoIterator for &'a GraphViewMut<'a> {
type Item = TripleRef<'a>;
type IntoIter = GraphViewIter<'a>;
fn into_iter(self) -> Self::IntoIter {
fn into_iter(self) -> GraphViewIter<'a> {
self.iter()
}
}
@ -1548,7 +1462,7 @@ pub struct Iter<'a> {
impl<'a> Iterator for Iter<'a> {
type Item = QuadRef<'a>;
fn next(&mut self) -> Option<Self::Item> {
fn next(&mut self) -> Option<QuadRef<'a>> {
self.inner
.next()
.map(|(s, p, o, g)| self.dataset.decode_spog((s, p, o, g)))
@ -1572,70 +1486,9 @@ pub struct GraphViewIter<'a> {
impl<'a> Iterator for GraphViewIter<'a> {
type Item = TripleRef<'a>;
fn next(&mut self) -> Option<Self::Item> {
fn next(&mut self) -> Option<TripleRef<'a>> {
self.inner
.next()
.map(|(_, s, p, o)| self.dataset.decode_spo((s, p, o)))
}
}
type QuadsPerBlankNode = HashMap<
InternedBlankNode,
Vec<(
InternedSubject,
InternedNamedNode,
InternedTerm,
InternedGraphName,
)>,
>;
/// An algorithm used to canonicalize graph and datasets.
///
/// See [`Graph::canonicalize`] and [`Dataset::canonicalize`].
#[derive(Default, Debug, Clone, Copy, Eq, PartialEq, Hash)]
#[non_exhaustive]
pub enum CanonicalizationAlgorithm {
/// The algorithm preferred by OxRDF.
///
/// <div class="warning">The canonicalization algorithm is not stable and canonical blank node ids might change between Oxigraph version.</div>
#[default]
Unstable,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_canon() {
let mut dataset = Dataset::new();
dataset.insert(QuadRef::new(
BlankNode::default().as_ref(),
NamedNodeRef::new_unchecked("http://ex"),
BlankNode::default().as_ref(),
GraphNameRef::DefaultGraph,
));
dataset.insert(QuadRef::new(
BlankNode::default().as_ref(),
NamedNodeRef::new_unchecked("http://ex"),
BlankNode::default().as_ref(),
GraphNameRef::DefaultGraph,
));
dataset.canonicalize(CanonicalizationAlgorithm::Unstable);
let mut dataset2 = Dataset::new();
dataset2.insert(QuadRef::new(
BlankNode::default().as_ref(),
NamedNodeRef::new_unchecked("http://ex"),
BlankNode::default().as_ref(),
GraphNameRef::DefaultGraph,
));
dataset2.insert(QuadRef::new(
BlankNode::default().as_ref(),
NamedNodeRef::new_unchecked("http://ex"),
BlankNode::default().as_ref(),
GraphNameRef::DefaultGraph,
));
dataset2.canonicalize(CanonicalizationAlgorithm::Unstable);
assert_eq!(dataset, dataset2);
}
}

@ -16,16 +16,12 @@
//! assert_eq!(vec![triple], results);
//!
//! // Print
//! assert_eq!(
//! graph.to_string(),
//! "<http://example.com> <http://example.com> <http://example.com> .\n"
//! );
//! assert_eq!(graph.to_string(), "<http://example.com> <http://example.com> <http://example.com> .\n");
//! # Result::<_,Box<dyn std::error::Error>>::Ok(())
//! ```
//!
//! See also [`Dataset`] if you want to get support of multiple RDF graphs at the same time.
pub use crate::dataset::CanonicalizationAlgorithm;
use crate::dataset::*;
use crate::*;
use std::fmt;
@ -33,9 +29,8 @@ use std::fmt;
/// An in-memory [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph).
///
/// It can accommodate a fairly large number of triples (in the few millions).
///
/// <div class="warning">It interns the string and does not do any garbage collection yet:
/// if you insert and remove a lot of different terms, memory will grow without any reduction.</div>
/// Beware: it interns the string and does not do any garbage collection yet:
/// if you insert and remove a lot of different terms, memory will grow without any reduction.
///
/// Usage example:
/// ```
@ -53,7 +48,7 @@ use std::fmt;
/// assert_eq!(vec![triple], results);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[derive(Debug, Default, Clone)]
#[derive(Debug, Default)]
pub struct Graph {
dataset: Dataset,
}
@ -184,11 +179,11 @@ impl Graph {
self.dataset.clear()
}
/// Canonicalizes the dataset by renaming blank nodes.
/// Applies on the graph the canonicalization process described in
/// [Canonical Forms for Isomorphic and Equivalent RDF Graphs: Algorithms for Leaning and Labelling Blank Nodes, Aidan Hogan, 2017](http://aidanhogan.com/docs/rdf-canonicalisation.pdf).
///
/// Usage example ([Graph isomorphism](https://www.w3.org/TR/rdf11-concepts/#dfn-graph-isomorphism)):
/// ```
/// use oxrdf::graph::CanonicalizationAlgorithm;
/// use oxrdf::*;
///
/// let iri = NamedNodeRef::new("http://example.com")?;
@ -204,18 +199,20 @@ impl Graph {
/// graph2.insert(TripleRef::new(&bnode2, iri, iri));
///
/// assert_ne!(graph1, graph2);
/// graph1.canonicalize(CanonicalizationAlgorithm::Unstable);
/// graph2.canonicalize(CanonicalizationAlgorithm::Unstable);
/// graph1.canonicalize();
/// graph2.canonicalize();
/// assert_eq!(graph1, graph2);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
///
/// <div class="warning">Blank node ids depends on the current shape of the graph. Adding a new quad might change the ids of a lot of blank nodes.
/// Hence, this canonization might not be suitable for diffs.</div>
/// Warning 1: Blank node ids depends on the current shape of the graph. Adding a new triple might change the ids of a lot of blank nodes.
/// Hence, this canonization might not be suitable for diffs.
///
/// Warning 2: The canonicalization algorithm is not stable and canonical blank node Ids might change between Oxigraph version.
///
/// <div class="warning">This implementation worst-case complexity is in *O(b!)* with *b* the number of blank nodes in the input dataset.</div>
pub fn canonicalize(&mut self, algorithm: CanonicalizationAlgorithm) {
self.dataset.canonicalize(algorithm)
/// Warning 3: This implementation worst-case complexity is in *O(b!)* with *b* the number of blank nodes in the input graph.
pub fn canonicalize(&mut self) {
self.dataset.canonicalize()
}
}
@ -231,7 +228,7 @@ impl<'a> IntoIterator for &'a Graph {
type Item = TripleRef<'a>;
type IntoIter = Iter<'a>;
fn into_iter(self) -> Self::IntoIter {
fn into_iter(self) -> Iter<'a> {
self.iter()
}
}
@ -278,7 +275,7 @@ pub struct Iter<'a> {
impl<'a> Iterator for Iter<'a> {
type Item = TripleRef<'a>;
fn next(&mut self) -> Option<Self::Item> {
fn next(&mut self) -> Option<TripleRef<'a>> {
self.inner.next()
}
}

@ -4,11 +4,10 @@ use crate::*;
use std::collections::hash_map::{Entry, HashMap, RandomState};
use std::hash::{BuildHasher, Hasher};
#[derive(Debug, Default, Clone)]
#[derive(Debug, Default)]
pub struct Interner {
hasher: RandomState,
string_for_hash: HashMap<u64, String, IdentityHasherBuilder>,
string_for_blank_node_id: HashMap<u128, String>,
#[cfg(feature = "rdf-star")]
triples: HashMap<InternedTriple, Triple>,
}
@ -101,7 +100,7 @@ impl InternedNamedNode {
})
}
pub fn decode_from(self, interner: &Interner) -> NamedNodeRef<'_> {
pub fn decode_from(self, interner: &Interner) -> NamedNodeRef {
NamedNodeRef::new_unchecked(interner.resolve(self.id))
}
@ -121,53 +120,29 @@ impl InternedNamedNode {
}
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
pub enum InternedBlankNode {
Number { id: u128 },
Other { id: Key },
pub struct InternedBlankNode {
id: Key,
}
impl InternedBlankNode {
pub fn encoded_into(blank_node: BlankNodeRef<'_>, interner: &mut Interner) -> Self {
if let Some(id) = blank_node.unique_id() {
interner
.string_for_blank_node_id
.entry(id)
.or_insert_with(|| blank_node.as_str().into());
Self::Number { id }
} else {
Self::Other {
Self {
id: interner.get_or_intern(blank_node.as_str()),
}
}
}
pub fn encoded_from(blank_node: BlankNodeRef<'_>, interner: &Interner) -> Option<Self> {
if let Some(id) = blank_node.unique_id() {
interner
.string_for_blank_node_id
.contains_key(&id)
.then_some(Self::Number { id })
} else {
Some(Self::Other {
Some(Self {
id: interner.get(blank_node.as_str())?,
})
}
}
pub fn decode_from(self, interner: &Interner) -> BlankNodeRef<'_> {
BlankNodeRef::new_unchecked(match self {
Self::Number { id } => &interner.string_for_blank_node_id[&id],
Self::Other { id } => interner.resolve(id),
})
pub fn decode_from(self, interner: &Interner) -> BlankNodeRef {
BlankNodeRef::new_unchecked(interner.resolve(self.id))
}
pub fn next(self) -> Self {
match self {
Self::Number { id } => Self::Number {
id: id.saturating_add(1),
},
Self::Other { id } => Self::Other { id: id.next() },
}
Self { id: self.id.next() }
}
}
@ -492,7 +467,7 @@ impl InternedTriple {
interner
.triples
.contains_key(&interned_triple)
.then_some(interned_triple)
.then(|| interned_triple)
}
pub fn next(&self) -> Self {
@ -504,14 +479,14 @@ impl InternedTriple {
}
}
#[derive(Default, Clone)]
struct IdentityHasherBuilder;
#[derive(Default)]
struct IdentityHasherBuilder {}
impl BuildHasher for IdentityHasherBuilder {
type Hasher = IdentityHasher;
fn build_hasher(&self) -> Self::Hasher {
Self::Hasher::default()
fn build_hasher(&self) -> IdentityHasher {
IdentityHasher::default()
}
}

@ -23,7 +23,7 @@ pub use crate::named_node::{NamedNode, NamedNodeRef};
pub use crate::parser::TermParseError;
pub use crate::triple::{
GraphName, GraphNameRef, NamedOrBlankNode, NamedOrBlankNodeRef, Quad, QuadRef, Subject,
SubjectRef, Term, TermRef, Triple, TripleRef, TryFromTermError,
SubjectRef, Term, TermRef, Triple, TripleRef,
};
pub use crate::variable::{Variable, VariableNameParseError, VariableRef};
pub use oxilangtag::LanguageTagParseError;

@ -1,20 +1,22 @@
use crate::named_node::{NamedNode, NamedNodeRef};
use crate::vocab::{rdf, xsd};
use crate::named_node::NamedNode;
use crate::vocab::rdf;
use crate::vocab::xsd;
use crate::NamedNodeRef;
use oxilangtag::{LanguageTag, LanguageTagParseError};
#[cfg(feature = "oxsdatatypes")]
use oxsdatatypes::*;
use serde::{Deserialize, Serialize};
use std::borrow::Cow;
use std::fmt;
use std::fmt::Write;
use std::option::Option;
/// An owned RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal).
///
/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation:
/// ```
/// # use oxilangtag::LanguageTagParseError;
/// use oxrdf::vocab::xsd;
/// use oxrdf::Literal;
/// use oxrdf::vocab::xsd;
///
/// assert_eq!(
/// "\"foo\\nbar\"",
@ -22,20 +24,20 @@ use std::fmt::Write;
/// );
///
/// assert_eq!(
/// r#""1999-01-01"^^<http://www.w3.org/2001/XMLSchema#date>"#,
/// "\"1999-01-01\"^^<http://www.w3.org/2001/XMLSchema#date>",
/// Literal::new_typed_literal("1999-01-01", xsd::DATE).to_string()
/// );
///
/// assert_eq!(
/// r#""foo"@en"#,
/// "\"foo\"@en",
/// Literal::new_language_tagged_literal("foo", "en")?.to_string()
/// );
/// # Result::<(), LanguageTagParseError>::Ok(())
/// ```
#[derive(Eq, PartialEq, Debug, Clone, Hash, Serialize, Deserialize)]
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct Literal(LiteralContent);
#[derive(PartialEq, Eq, Debug, Clone, Hash, Serialize, Deserialize)]
#[derive(PartialEq, Eq, Debug, Clone, Hash)]
enum LiteralContent {
String(String),
LanguageTaggedString { value: String, language: String },
@ -425,8 +427,8 @@ impl From<DayTimeDuration> for Literal {
///
/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation:
/// ```
/// use oxrdf::vocab::xsd;
/// use oxrdf::LiteralRef;
/// use oxrdf::vocab::xsd;
///
/// assert_eq!(
/// "\"foo\\nbar\"",
@ -434,7 +436,7 @@ impl From<DayTimeDuration> for Literal {
/// );
///
/// assert_eq!(
/// r#""1999-01-01"^^<http://www.w3.org/2001/XMLSchema#date>"#,
/// "\"1999-01-01\"^^<http://www.w3.org/2001/XMLSchema#date>",
/// LiteralRef::new_typed_literal("1999-01-01", xsd::DATE).to_string()
/// );
/// ```
@ -457,7 +459,7 @@ enum LiteralRefContent<'a> {
impl<'a> LiteralRef<'a> {
/// Builds an RDF [simple literal](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal).
#[inline]
pub const fn new_simple_literal(value: &'a str) -> Self {
pub fn new_simple_literal(value: &'a str) -> Self {
LiteralRef(LiteralRefContent::String(value))
}
@ -480,13 +482,13 @@ impl<'a> LiteralRef<'a> {
///
/// [`Literal::new_language_tagged_literal()`] is a safe version of this constructor and should be used for untrusted data.
#[inline]
pub const fn new_language_tagged_literal_unchecked(value: &'a str, language: &'a str) -> Self {
pub fn new_language_tagged_literal_unchecked(value: &'a str, language: &'a str) -> Self {
LiteralRef(LiteralRefContent::LanguageTaggedString { value, language })
}
/// The literal [lexical form](https://www.w3.org/TR/rdf11-concepts/#dfn-lexical-form)
#[inline]
pub const fn value(self) -> &'a str {
pub fn value(self) -> &'a str {
match self.0 {
LiteralRefContent::String(value)
| LiteralRefContent::LanguageTaggedString { value, .. }
@ -499,7 +501,7 @@ impl<'a> LiteralRef<'a> {
/// Language tags are defined by the [BCP47](https://tools.ietf.org/html/bcp47).
/// They are normalized to lowercase by this implementation.
#[inline]
pub const fn language(self) -> Option<&'a str> {
pub fn language(self) -> Option<&'a str> {
match self.0 {
LiteralRefContent::LanguageTaggedString { language, .. } => Some(language),
_ => None,
@ -511,7 +513,7 @@ impl<'a> LiteralRef<'a> {
/// The datatype of [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) is always [rdf:langString](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string).
/// The datatype of [simple literals](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal) is [xsd:string](https://www.w3.org/TR/xmlschema11-2/#string).
#[inline]
pub const fn datatype(self) -> NamedNodeRef<'a> {
pub fn datatype(self) -> NamedNodeRef<'a> {
match self.0 {
LiteralRefContent::String(_) => xsd::STRING,
LiteralRefContent::LanguageTaggedString { .. } => rdf::LANG_STRING,
@ -524,7 +526,7 @@ impl<'a> LiteralRef<'a> {
/// It returns true if the literal is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string)
/// or has the datatype [xsd:string](https://www.w3.org/TR/xmlschema11-2/#string).
#[inline]
pub const fn is_plain(self) -> bool {
pub fn is_plain(self) -> bool {
matches!(
self.0,
LiteralRefContent::String(_) | LiteralRefContent::LanguageTaggedString { .. }
@ -550,7 +552,7 @@ impl<'a> LiteralRef<'a> {
/// Extract components from this literal
#[inline]
pub const fn destruct(self) -> (&'a str, Option<NamedNodeRef<'a>>, Option<&'a str>) {
pub fn destruct(self) -> (&'a str, Option<NamedNodeRef<'a>>, Option<&'a str>) {
match self.0 {
LiteralRefContent::String(s) => (s, None, None),
LiteralRefContent::LanguageTaggedString { value, language } => {
@ -618,22 +620,17 @@ pub fn print_quoted_str(string: &str, f: &mut impl Write) -> fmt::Result {
f.write_char('"')?;
for c in string.chars() {
match c {
'\u{08}' => f.write_str("\\b"),
'\t' => f.write_str("\\t"),
'\n' => f.write_str("\\n"),
'\u{0C}' => f.write_str("\\f"),
'\r' => f.write_str("\\r"),
'"' => f.write_str("\\\""),
'\\' => f.write_str("\\\\"),
'\0'..='\u{1F}' | '\u{7F}' => write!(f, "\\u{:04X}", u32::from(c)),
_ => f.write_char(c),
c => f.write_char(c),
}?;
}
f.write_char('"')
}
#[cfg(test)]
#[allow(clippy::panic_in_result_fn)]
mod tests {
use super::*;

@ -1,5 +1,4 @@
use oxiri::{Iri, IriParseError};
use serde::{Deserialize, Serialize};
use std::cmp::Ordering;
use std::fmt;
@ -15,7 +14,7 @@ use std::fmt;
/// );
/// # Result::<_,oxrdf::IriParseError>::Ok(())
/// ```
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash, Serialize, Deserialize)]
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
pub struct NamedNode {
iri: String,
}
@ -217,21 +216,3 @@ impl PartialOrd<NamedNodeRef<'_>> for NamedNode {
self.as_ref().partial_cmp(other)
}
}
impl From<Iri<String>> for NamedNode {
#[inline]
fn from(iri: Iri<String>) -> Self {
Self {
iri: iri.into_inner(),
}
}
}
impl<'a> From<Iri<&'a str>> for NamedNodeRef<'a> {
#[inline]
fn from(iri: Iri<&'a str>) -> Self {
Self {
iri: iri.into_inner(),
}
}
}

@ -6,6 +6,8 @@ use crate::{
#[cfg(feature = "rdf-star")]
use crate::{Subject, Triple};
use std::char;
use std::error::Error;
use std::fmt;
use std::str::{Chars, FromStr};
/// This limit is set in order to avoid stack overflow error when parsing nested triples due to too many recursive calls.
@ -21,15 +23,12 @@ impl FromStr for NamedNode {
/// use oxrdf::NamedNode;
/// use std::str::FromStr;
///
/// assert_eq!(
/// NamedNode::from_str("<http://example.com>").unwrap(),
/// NamedNode::new("http://example.com").unwrap()
/// )
/// assert_eq!(NamedNode::from_str("<http://example.com>").unwrap(), NamedNode::new("http://example.com").unwrap())
/// ```
fn from_str(s: &str) -> Result<Self, Self::Err> {
fn from_str(s: &str) -> Result<Self, TermParseError> {
let (term, left) = read_named_node(s)?;
if !left.is_empty() {
return Err(Self::Err::msg(
return Err(TermParseError::msg(
"Named node serialization should end with a >",
));
}
@ -46,15 +45,12 @@ impl FromStr for BlankNode {
/// use oxrdf::BlankNode;
/// use std::str::FromStr;
///
/// assert_eq!(
/// BlankNode::from_str("_:ex").unwrap(),
/// BlankNode::new("ex").unwrap()
/// )
/// assert_eq!(BlankNode::from_str("_:ex").unwrap(), BlankNode::new("ex").unwrap())
/// ```
fn from_str(s: &str) -> Result<Self, Self::Err> {
fn from_str(s: &str) -> Result<Self, TermParseError> {
let (term, left) = read_blank_node(s)?;
if !left.is_empty() {
return Err(Self::Err::msg(
return Err(TermParseError::msg(
"Blank node serialization should not contain whitespaces",
));
}
@ -68,46 +64,21 @@ impl FromStr for Literal {
/// Parses a literal from its NTriples or Turtle serialization
///
/// ```
/// use oxrdf::vocab::xsd;
/// use oxrdf::{Literal, NamedNode};
/// use oxrdf::{Literal, NamedNode, vocab::xsd};
/// use std::str::FromStr;
///
/// assert_eq!(
/// Literal::from_str("\"ex\\n\"").unwrap(),
/// Literal::new_simple_literal("ex\n")
/// );
/// assert_eq!(
/// Literal::from_str("\"ex\"@en").unwrap(),
/// Literal::new_language_tagged_literal("ex", "en").unwrap()
/// );
/// assert_eq!(
/// Literal::from_str("\"2020\"^^<http://www.w3.org/2001/XMLSchema#gYear>").unwrap(),
/// Literal::new_typed_literal(
/// "2020",
/// NamedNode::new("http://www.w3.org/2001/XMLSchema#gYear").unwrap()
/// )
/// );
/// assert_eq!(
/// Literal::from_str("true").unwrap(),
/// Literal::new_typed_literal("true", xsd::BOOLEAN)
/// );
/// assert_eq!(
/// Literal::from_str("+122").unwrap(),
/// Literal::new_typed_literal("+122", xsd::INTEGER)
/// );
/// assert_eq!(
/// Literal::from_str("-122.23").unwrap(),
/// Literal::new_typed_literal("-122.23", xsd::DECIMAL)
/// );
/// assert_eq!(
/// Literal::from_str("-122e+1").unwrap(),
/// Literal::new_typed_literal("-122e+1", xsd::DOUBLE)
/// );
/// assert_eq!(Literal::from_str("\"ex\\n\"").unwrap(), Literal::new_simple_literal("ex\n"));
/// assert_eq!(Literal::from_str("\"ex\"@en").unwrap(), Literal::new_language_tagged_literal("ex", "en").unwrap());
/// assert_eq!(Literal::from_str("\"2020\"^^<http://www.w3.org/2001/XMLSchema#gYear>").unwrap(), Literal::new_typed_literal("2020", NamedNode::new("http://www.w3.org/2001/XMLSchema#gYear").unwrap()));
/// assert_eq!(Literal::from_str("true").unwrap(), Literal::new_typed_literal("true", xsd::BOOLEAN));
/// assert_eq!(Literal::from_str("+122").unwrap(), Literal::new_typed_literal("+122", xsd::INTEGER));
/// assert_eq!(Literal::from_str("-122.23").unwrap(), Literal::new_typed_literal("-122.23", xsd::DECIMAL));
/// assert_eq!(Literal::from_str("-122e+1").unwrap(), Literal::new_typed_literal("-122e+1", xsd::DOUBLE));
/// ```
fn from_str(s: &str) -> Result<Self, Self::Err> {
fn from_str(s: &str) -> Result<Self, TermParseError> {
let (term, left) = read_literal(s)?;
if !left.is_empty() {
return Err(Self::Err::msg("Invalid literal serialization"));
return Err(TermParseError::msg("Invalid literal serialization"));
}
Ok(term)
}
@ -122,15 +93,17 @@ impl FromStr for Term {
/// use oxrdf::*;
/// use std::str::FromStr;
///
/// assert_eq!(
/// Term::from_str("\"ex\"").unwrap(),
/// Literal::new_simple_literal("ex").into()
/// );
/// assert_eq!(Term::from_str("\"ex\"").unwrap(), Literal::new_simple_literal("ex").into());
/// assert_eq!(Term::from_str("<< _:s <http://example.com/p> \"o\" >>").unwrap(), Triple::new(
/// BlankNode::new("s").unwrap(),
/// NamedNode::new("http://example.com/p").unwrap(),
/// Literal::new_simple_literal("o")
/// ).into());
/// ```
fn from_str(s: &str) -> Result<Self, Self::Err> {
fn from_str(s: &str) -> Result<Self, TermParseError> {
let (term, left) = read_term(s, 0)?;
if !left.is_empty() {
return Err(Self::Err::msg("Invalid term serialization"));
return Err(TermParseError::msg("Invalid term serialization"));
}
Ok(term)
}
@ -145,22 +118,19 @@ impl FromStr for Variable {
/// use oxrdf::Variable;
/// use std::str::FromStr;
///
/// assert_eq!(
/// Variable::from_str("$foo").unwrap(),
/// Variable::new("foo").unwrap()
/// )
/// assert_eq!(Variable::from_str("$foo").unwrap(), Variable::new("foo").unwrap())
/// ```
fn from_str(s: &str) -> Result<Self, Self::Err> {
fn from_str(s: &str) -> Result<Self, TermParseError> {
if !s.starts_with('?') && !s.starts_with('$') {
return Err(Self::Err::msg(
return Err(TermParseError::msg(
"Variable serialization should start with ? or $",
));
}
Self::new(&s[1..]).map_err(|error| {
TermParseError(TermParseErrorKind::Variable {
Self::new(&s[1..]).map_err(|error| TermParseError {
kind: TermParseErrorKind::Variable {
value: s.to_owned(),
error,
})
},
})
}
}
@ -173,11 +143,11 @@ fn read_named_node(s: &str) -> Result<(NamedNode, &str), TermParseError> {
.ok_or_else(|| TermParseError::msg("Named node serialization should end with a >"))?;
let (value, remain) = remain.split_at(end);
let remain = &remain[1..];
let term = NamedNode::new(value).map_err(|error| {
TermParseError(TermParseErrorKind::Iri {
let term = NamedNode::new(value).map_err(|error| TermParseError {
kind: TermParseErrorKind::Iri {
value: value.to_owned(),
error,
})
},
})?;
Ok((term, remain))
} else {
@ -197,11 +167,11 @@ fn read_blank_node(s: &str) -> Result<(BlankNode, &str), TermParseError> {
})
.unwrap_or(remain.len());
let (value, remain) = remain.split_at(end);
let term = BlankNode::new(value).map_err(|error| {
TermParseError(TermParseErrorKind::BlankNode {
let term = BlankNode::new(value).map_err(|error| TermParseError {
kind: TermParseErrorKind::BlankNode {
value: value.to_owned(),
error,
})
},
})?;
Ok((term, remain))
} else {
@ -227,11 +197,11 @@ fn read_literal(s: &str) -> Result<(Literal, &str), TermParseError> {
let (language, remain) = remain.split_at(end);
Ok((
Literal::new_language_tagged_literal(value, language).map_err(
|error| {
TermParseError(TermParseErrorKind::LanguageTag {
|error| TermParseError {
kind: TermParseErrorKind::LanguageTag {
value: language.to_owned(),
error,
})
},
},
)?,
remain,
@ -247,10 +217,10 @@ fn read_literal(s: &str) -> Result<(Literal, &str), TermParseError> {
if let Some(c) = chars.next() {
value.push(match c {
't' => '\t',
'b' => '\u{08}',
'b' => '\u{8}',
'n' => '\n',
'r' => '\r',
'f' => '\u{0C}',
'f' => '\u{C}',
'"' => '"',
'\'' => '\'',
'\\' => '\\',
@ -262,7 +232,7 @@ fn read_literal(s: &str) -> Result<(Literal, &str), TermParseError> {
return Err(TermParseError::msg("Unexpected literal end"));
}
}
_ => value.push(c),
c => value.push(c),
}
}
Err(TermParseError::msg("Unexpected literal end"))
@ -356,7 +326,7 @@ fn read_term(s: &str, number_of_recursive_calls: usize) -> Result<(Term, &str),
Term::Literal(_) => {
return Err(TermParseError::msg(
"Literals are not allowed in subject position",
));
))
}
Term::Triple(s) => Subject::Triple(s),
},
@ -400,7 +370,7 @@ fn read_hexa_char(input: &mut Chars<'_>, len: usize) -> Result<char, TermParseEr
_ => {
return Err(TermParseError::msg(
"Unexpected character in a unicode escape",
));
))
}
}
} else {
@ -411,59 +381,61 @@ fn read_hexa_char(input: &mut Chars<'_>, len: usize) -> Result<char, TermParseEr
}
/// An error raised during term serialization parsing using the [`FromStr`] trait.
#[derive(Debug, thiserror::Error)]
#[error(transparent)]
pub struct TermParseError(#[from] TermParseErrorKind);
#[derive(Debug)]
pub struct TermParseError {
kind: TermParseErrorKind,
}
/// An internal error raised during term serialization parsing using the [`FromStr`] trait.
#[derive(Debug, thiserror::Error)]
#[derive(Debug)]
enum TermParseErrorKind {
#[error("Error while parsing the named node '{value}': {error}")]
Iri { error: IriParseError, value: String },
#[error("Error while parsing the blank node '{value}': {error}")]
Iri {
error: IriParseError,
value: String,
},
BlankNode {
error: BlankNodeIdParseError,
value: String,
},
#[error("Error while parsing the language tag '{value}': {error}")]
LanguageTag {
error: LanguageTagParseError,
value: String,
},
#[error("Error while parsing the variable '{value}': {error}")]
Variable {
error: VariableNameParseError,
value: String,
},
#[error("{0}")]
Msg(&'static str),
Msg {
msg: &'static str,
},
}
impl TermParseError {
pub(crate) fn msg(msg: &'static str) -> Self {
Self(TermParseErrorKind::Msg(msg))
impl fmt::Display for TermParseError {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match &self.kind {
TermParseErrorKind::Iri { error, value } => {
write!(f, "Error while parsing the named node '{value}': {error}")
}
TermParseErrorKind::BlankNode { error, value } => {
write!(f, "Error while parsing the blank node '{value}': {error}")
}
TermParseErrorKind::LanguageTag { error, value } => {
write!(f, "Error while parsing the language tag '{value}': {error}")
}
TermParseErrorKind::Variable { error, value } => {
write!(f, "Error while parsing the variable '{value}': {error}")
}
TermParseErrorKind::Msg { msg } => f.write_str(msg),
}
}
}
#[cfg(test)]
#[cfg(feature = "rdf-star")]
mod tests {
use super::*;
impl Error for TermParseError {}
#[test]
fn triple_term_parsing() {
assert_eq!(
Term::from_str("\"ex\"").unwrap(),
Literal::new_simple_literal("ex").into()
);
assert_eq!(
Term::from_str("<< _:s <http://example.com/p> \"o\" >>").unwrap(),
Triple::new(
BlankNode::new("s").unwrap(),
NamedNode::new("http://example.com/p").unwrap(),
Literal::new_simple_literal("o"),
)
.into()
);
impl TermParseError {
pub(crate) fn msg(msg: &'static str) -> Self {
Self {
kind: TermParseErrorKind::Msg { msg },
}
}
}

@ -2,11 +2,10 @@ use crate::blank_node::BlankNode;
use crate::literal::Literal;
use crate::named_node::NamedNode;
use crate::{BlankNodeRef, LiteralRef, NamedNodeRef};
use serde::{Deserialize, Serialize};
use std::fmt;
/// The owned union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) and [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node).
#[derive(Eq, PartialEq, Debug, Clone, Hash, Serialize, Deserialize)]
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub enum NamedOrBlankNode {
NamedNode(NamedNode),
BlankNode(BlankNode),
@ -153,7 +152,7 @@ impl<'a> From<NamedOrBlankNodeRef<'a>> for NamedOrBlankNode {
}
/// The owned union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) and [triples](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) (if the `rdf-star` feature is enabled).
#[derive(Eq, PartialEq, Debug, Clone, Hash, Serialize, Deserialize)]
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub enum Subject {
NamedNode(NamedNode),
BlankNode(BlankNode),
@ -383,7 +382,7 @@ impl<'a> From<&'a NamedOrBlankNode> for SubjectRef<'a> {
/// An owned RDF [term](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-term)
/// It is the union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node), [literals](https://www.w3.org/TR/rdf11-concepts/#dfn-literal) and [triples](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) (if the `rdf-star` feature is enabled).
#[derive(Eq, PartialEq, Debug, Clone, Hash, Serialize, Deserialize)]
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub enum Term {
NamedNode(NamedNode),
BlankNode(BlankNode),
@ -535,72 +534,6 @@ impl From<SubjectRef<'_>> for Term {
}
}
impl TryFrom<Term> for NamedNode {
type Error = TryFromTermError;
#[inline]
fn try_from(term: Term) -> Result<Self, Self::Error> {
if let Term::NamedNode(node) = term {
Ok(node)
} else {
Err(TryFromTermError {
term,
target: "NamedNode",
})
}
}
}
impl TryFrom<Term> for BlankNode {
type Error = TryFromTermError;
#[inline]
fn try_from(term: Term) -> Result<Self, Self::Error> {
if let Term::BlankNode(node) = term {
Ok(node)
} else {
Err(TryFromTermError {
term,
target: "BlankNode",
})
}
}
}
impl TryFrom<Term> for Literal {
type Error = TryFromTermError;
#[inline]
fn try_from(term: Term) -> Result<Self, Self::Error> {
if let Term::Literal(node) = term {
Ok(node)
} else {
Err(TryFromTermError {
term,
target: "Literal",
})
}
}
}
impl TryFrom<Term> for Subject {
type Error = TryFromTermError;
#[inline]
fn try_from(term: Term) -> Result<Self, Self::Error> {
match term {
Term::NamedNode(term) => Ok(Self::NamedNode(term)),
Term::BlankNode(term) => Ok(Self::BlankNode(term)),
#[cfg(feature = "rdf-star")]
Term::Triple(term) => Ok(Self::Triple(term)),
Term::Literal(_) => Err(TryFromTermError {
term,
target: "Subject",
}),
}
}
}
/// A borrowed RDF [term](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-term)
/// It is the union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node), [literals](https://www.w3.org/TR/rdf11-concepts/#dfn-literal) and [triples](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) (if the `rdf-star` feature is enabled).
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
@ -765,7 +698,7 @@ impl<'a> From<TermRef<'a>> for Term {
///
/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation:
/// ```
/// use oxrdf::{NamedNode, Triple};
/// use oxrdf::{Triple, NamedNode};
///
/// assert_eq!(
/// "<http://example.com/s> <http://example.com/p> <http://example.com/o>",
@ -773,12 +706,11 @@ impl<'a> From<TermRef<'a>> for Term {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// }
/// .to_string()
/// }.to_string()
/// );
/// # Result::<_,oxrdf::IriParseError>::Ok(())
/// ```
#[derive(Eq, PartialEq, Debug, Clone, Hash, Serialize, Deserialize)]
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct Triple {
/// The [subject](https://www.w3.org/TR/rdf11-concepts/#dfn-subject) of this triple.
pub subject: Subject,
@ -805,22 +737,6 @@ impl Triple {
}
}
/// Builds an RDF [triple](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) from [`Term`]s.
///
/// Returns a [`TryFromTermError`] error if the generated triple would be ill-formed.
#[inline]
pub fn from_terms(
subject: impl Into<Term>,
predicate: impl Into<Term>,
object: impl Into<Term>,
) -> Result<Self, TryFromTermError> {
Ok(Self {
subject: subject.into().try_into()?,
predicate: predicate.into().try_into()?,
object: object.into(),
})
}
/// Encodes that this triple is in an [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset).
#[inline]
pub fn in_graph(self, graph_name: impl Into<GraphName>) -> Quad {
@ -853,7 +769,7 @@ impl fmt::Display for Triple {
///
/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation:
/// ```
/// use oxrdf::{NamedNodeRef, TripleRef};
/// use oxrdf::{TripleRef, NamedNodeRef};
///
/// assert_eq!(
/// "<http://example.com/s> <http://example.com/p> <http://example.com/o>",
@ -861,8 +777,7 @@ impl fmt::Display for Triple {
/// subject: NamedNodeRef::new("http://example.com/s")?.into(),
/// predicate: NamedNodeRef::new("http://example.com/p")?,
/// object: NamedNodeRef::new("http://example.com/o")?.into(),
/// }
/// .to_string()
/// }.to_string()
/// );
/// # Result::<_,oxrdf::IriParseError>::Ok(())
/// ```
@ -938,11 +853,10 @@ impl<'a> From<TripleRef<'a>> for Triple {
/// A possible owned graph name.
/// It is the union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node), and the [default graph name](https://www.w3.org/TR/rdf11-concepts/#dfn-default-graph).
#[derive(Eq, PartialEq, Debug, Clone, Hash, Default, Serialize, Deserialize)]
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub enum GraphName {
NamedNode(NamedNode),
BlankNode(BlankNode),
#[default]
DefaultGraph,
}
@ -1026,11 +940,10 @@ impl From<NamedOrBlankNodeRef<'_>> for GraphName {
/// A possible borrowed graph name.
/// It is the union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node), and the [default graph name](https://www.w3.org/TR/rdf11-concepts/#dfn-default-graph).
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash, Default)]
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
pub enum GraphNameRef<'a> {
NamedNode(NamedNodeRef<'a>),
BlankNode(BlankNodeRef<'a>),
#[default]
DefaultGraph,
}
@ -1066,7 +979,7 @@ impl fmt::Display for GraphNameRef<'_> {
match self {
Self::NamedNode(node) => node.fmt(f),
Self::BlankNode(node) => node.fmt(f),
Self::DefaultGraph => f.write_str("DEFAULT"),
Self::DefaultGraph => write!(f, "DEFAULT"),
}
}
}
@ -1147,7 +1060,7 @@ impl<'a> From<GraphNameRef<'a>> for GraphName {
/// );
/// # Result::<_,oxrdf::IriParseError>::Ok(())
/// ```
#[derive(Eq, PartialEq, Debug, Clone, Hash, Serialize, Deserialize)]
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct Quad {
/// The [subject](https://www.w3.org/TR/rdf11-concepts/#dfn-subject) of this triple.
pub subject: Subject,
@ -1307,62 +1220,3 @@ impl<'a> From<QuadRef<'a>> for Quad {
quad.into_owned()
}
}
/// An error return by some [`TryFrom<Term>`](TryFrom) implementations.
#[derive(Debug, Clone, thiserror::Error)]
#[error("{term} can not be converted to a {target}")]
pub struct TryFromTermError {
pub(crate) term: Term,
pub(crate) target: &'static str,
}
impl TryFromTermError {
/// The term that can't be converted
#[inline]
pub fn into_term(self) -> Term {
self.term
}
}
#[cfg(test)]
#[allow(clippy::panic_in_result_fn)]
mod tests {
use super::*;
#[test]
fn triple_from_terms() -> Result<(), TryFromTermError> {
assert_eq!(
Triple::from_terms(
NamedNode::new_unchecked("http://example.com/s"),
NamedNode::new_unchecked("http://example.com/p"),
NamedNode::new_unchecked("http://example.com/o"),
)?,
Triple::new(
NamedNode::new_unchecked("http://example.com/s"),
NamedNode::new_unchecked("http://example.com/p"),
NamedNode::new_unchecked("http://example.com/o"),
)
);
assert_eq!(
Triple::from_terms(
Literal::new_simple_literal("foo"),
NamedNode::new_unchecked("http://example.com/p"),
NamedNode::new_unchecked("http://example.com/o"),
)
.unwrap_err()
.into_term(),
Term::from(Literal::new_simple_literal("foo"))
);
assert_eq!(
Triple::from_terms(
NamedNode::new_unchecked("http://example.com/s"),
Literal::new_simple_literal("foo"),
NamedNode::new_unchecked("http://example.com/o"),
)
.unwrap_err()
.into_term(),
Term::from(Literal::new_simple_literal("foo"))
);
Ok(())
}
}

@ -1,4 +1,5 @@
use std::cmp::Ordering;
use std::error::Error;
use std::fmt;
/// A [SPARQL query](https://www.w3.org/TR/sparql11-query/) owned variable.
@ -7,7 +8,10 @@ use std::fmt;
/// ```
/// use oxrdf::{Variable, VariableNameParseError};
///
/// assert_eq!("?foo", Variable::new("foo")?.to_string());
/// assert_eq!(
/// "?foo",
/// Variable::new("foo")?.to_string()
/// );
/// # Result::<_,VariableNameParseError>::Ok(())
/// ```
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
@ -63,9 +67,12 @@ impl fmt::Display for Variable {
///
/// The default string formatter is returning a SPARQL compatible representation:
/// ```
/// use oxrdf::{VariableNameParseError, VariableRef};
/// use oxrdf::{VariableRef, VariableNameParseError};
///
/// assert_eq!("?foo", VariableRef::new("foo")?.to_string());
/// assert_eq!(
/// "?foo",
/// VariableRef::new("foo")?.to_string()
/// );
/// # Result::<_,VariableNameParseError>::Ok(())
/// ```
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
@ -89,12 +96,12 @@ impl<'a> VariableRef<'a> {
///
/// [`Variable::new()`] is a safe version of this constructor and should be used for untrusted data.
#[inline]
pub const fn new_unchecked(name: &'a str) -> Self {
pub fn new_unchecked(name: &'a str) -> Self {
Self { name }
}
#[inline]
pub const fn as_str(self) -> &'a str {
pub fn as_str(&self) -> &str {
self.name
}
@ -162,7 +169,7 @@ impl PartialOrd<VariableRef<'_>> for Variable {
fn validate_variable_identifier(id: &str) -> Result<(), VariableNameParseError> {
let mut chars = id.chars();
let front = chars.next().ok_or(VariableNameParseError)?;
let front = chars.next().ok_or(VariableNameParseError {})?;
match front {
'0'..='9'
| '_'
@ -181,13 +188,13 @@ fn validate_variable_identifier(id: &str) -> Result<(), VariableNameParseError>
| '\u{F900}'..='\u{FDCF}'
| '\u{FDF0}'..='\u{FFFD}'
| '\u{10000}'..='\u{EFFFF}' => (),
_ => return Err(VariableNameParseError),
_ => return Err(VariableNameParseError {}),
}
for c in chars {
match c {
'0'..='9'
| '\u{00B7}'
| '\u{0300}'..='\u{036F}'
| '\u{00300}'..='\u{036F}'
| '\u{203F}'..='\u{2040}'
| '_'
| 'A'..='Z'
@ -204,13 +211,21 @@ fn validate_variable_identifier(id: &str) -> Result<(), VariableNameParseError>
| '\u{F900}'..='\u{FDCF}'
| '\u{FDF0}'..='\u{FFFD}'
| '\u{10000}'..='\u{EFFFF}' => (),
_ => return Err(VariableNameParseError),
_ => return Err(VariableNameParseError {}),
}
}
Ok(())
}
/// An error raised during [`Variable`] name validation.
#[derive(Debug, thiserror::Error)]
#[error("The variable name is invalid")]
pub struct VariableNameParseError;
#[derive(Debug)]
pub struct VariableNameParseError {}
impl fmt::Display for VariableNameParseError {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "The variable name is invalid")
}
}
impl Error for VariableNameParseError {}

@ -231,12 +231,3 @@ pub mod xsd {
pub const YEAR_MONTH_DURATION: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#yearMonthDuration");
}
pub mod geosparql {
//! [GeoSpatial](https://opengeospatial.github.io/ogc-geosparql/) vocabulary.
use crate::named_node::NamedNodeRef;
/// Geospatial datatype like `"Point({longitude} {latitude})"^^geo:wktLiteral`
pub const WKT_LITERAL: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.opengis.net/ont/geosparql#wktLiteral");
}

@ -1,36 +0,0 @@
[package]
name = "oxrdfio"
version = "0.1.0-alpha.5"
authors.workspace = true
license.workspace = true
readme = "README.md"
keywords = ["RDF"]
repository = "https://github.com/oxigraph/oxigraph/tree/master/lib/oxrdfxml"
documentation = "https://docs.rs/oxrdfio"
description = """
Parser and serializer for various RDF formats
"""
edition.workspace = true
rust-version.workspace = true
[features]
default = []
async-tokio = ["dep:tokio", "oxrdfxml/async-tokio", "oxttl/async-tokio"]
rdf-star = ["oxrdf/rdf-star", "oxttl/rdf-star"]
[dependencies]
oxrdf.workspace = true
oxrdfxml.workspace = true
oxttl.workspace = true
thiserror.workspace = true
tokio = { workspace = true, optional = true, features = ["io-util"] }
[dev-dependencies]
tokio = { workspace = true, features = ["rt", "macros"] }
[lints]
workspace = true
[package.metadata.docs.rs]
all-features = true
rustdoc-args = ["--cfg", "docsrs"]

@ -1,67 +0,0 @@
OxRDF I/O
=========
[![Latest Version](https://img.shields.io/crates/v/oxrdfio.svg)](https://crates.io/crates/oxrdfio)
[![Released API docs](https://docs.rs/oxrdfio/badge.svg)](https://docs.rs/oxrdfio)
[![Crates.io downloads](https://img.shields.io/crates/d/oxrdfio)](https://crates.io/crates/oxrdfio)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community)
OxRDF I/O is a set of parsers and serializers for RDF.
It supports:
* [N3](https://w3c.github.io/N3/spec/) using [`oxttl`](https://crates.io/crates/oxttl)
* [N-Quads](https://www.w3.org/TR/n-quads/) using [`oxttl`](https://crates.io/crates/oxttl)
* [N-Triples](https://www.w3.org/TR/n-triples/) using [`oxttl`](https://crates.io/crates/oxttl)
* [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) using [`oxrdfxml`](https://crates.io/crates/oxrdfxml)
* [TriG](https://www.w3.org/TR/trig/) using [`oxttl`](https://crates.io/crates/oxttl)
* [Turtle](https://www.w3.org/TR/turtle/) using [`oxttl`](https://crates.io/crates/oxttl)
Support for [SPARQL-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html) is also available behind the `rdf-star`feature for [Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star), [TriG-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#trig-star), [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star) and [N-Quads-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-quads-star).
It is designed as a low level parser compatible with both synchronous and asynchronous I/O (behind the `async-tokio` feature).
The entry points of this library are the two [`RdfParser`] and [`RdfSerializer`] structs.
Usage example converting a Turtle file to a N-Triples file:
```rust
use oxrdfio::{RdfFormat, RdfParser, RdfSerializer};
let turtle_file = b"@base <http://example.com/> .
@prefix schema: <http://schema.org/> .
<foo> a schema:Person ;
schema:name \"Foo\" .
<bar> a schema:Person ;
schema:name \"Bar\" .";
let ntriples_file = b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
<http://example.com/foo> <http://schema.org/name> \"Foo\" .
<http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
<http://example.com/bar> <http://schema.org/name> \"Bar\" .
";
let mut writer = RdfSerializer::from_format(RdfFormat::NTriples).serialize_to_write(Vec::new());
for quad in RdfParser::from_format(RdfFormat::Turtle).parse_read(turtle_file.as_ref()) {
writer.write_quad(&quad.unwrap()).unwrap();
}
assert_eq!(writer.finish().unwrap(), ntriples_file);
```
Parsers for other RDF formats exists in Rust like [graph-rdfa-processor](https://github.com/nbittich/graph-rdfa-processor) for RDFa and [json-ld](https://github.com/timothee-haudebourg/json-ld) for JSON-LD.
## License
This project is licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or
`<http://www.apache.org/licenses/LICENSE-2.0>`)
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or
`<http://opensource.org/licenses/MIT>`)
at your option.
### Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.

@ -1,122 +0,0 @@
use std::io;
use std::ops::Range;
/// Error returned during RDF format parsing.
#[derive(Debug, thiserror::Error)]
pub enum RdfParseError {
/// I/O error during parsing (file not found...).
#[error(transparent)]
Io(#[from] io::Error),
/// An error in the file syntax.
#[error(transparent)]
Syntax(#[from] RdfSyntaxError),
}
impl RdfParseError {
pub(crate) fn msg(msg: &'static str) -> Self {
Self::Syntax(RdfSyntaxError(SyntaxErrorKind::Msg(msg)))
}
}
impl From<oxttl::TurtleSyntaxError> for RdfSyntaxError {
#[inline]
fn from(error: oxttl::TurtleSyntaxError) -> Self {
Self(SyntaxErrorKind::Turtle(error))
}
}
impl From<oxttl::TurtleParseError> for RdfParseError {
#[inline]
fn from(error: oxttl::TurtleParseError) -> Self {
match error {
oxttl::TurtleParseError::Syntax(e) => Self::Syntax(e.into()),
oxttl::TurtleParseError::Io(e) => Self::Io(e),
}
}
}
impl From<oxrdfxml::RdfXmlSyntaxError> for RdfSyntaxError {
#[inline]
fn from(error: oxrdfxml::RdfXmlSyntaxError) -> Self {
Self(SyntaxErrorKind::RdfXml(error))
}
}
impl From<oxrdfxml::RdfXmlParseError> for RdfParseError {
#[inline]
fn from(error: oxrdfxml::RdfXmlParseError) -> Self {
match error {
oxrdfxml::RdfXmlParseError::Syntax(e) => Self::Syntax(e.into()),
oxrdfxml::RdfXmlParseError::Io(e) => Self::Io(e),
}
}
}
impl From<RdfParseError> for io::Error {
#[inline]
fn from(error: RdfParseError) -> Self {
match error {
RdfParseError::Io(error) => error,
RdfParseError::Syntax(error) => error.into(),
}
}
}
/// An error in the syntax of the parsed file.
#[derive(Debug, thiserror::Error)]
#[error(transparent)]
pub struct RdfSyntaxError(#[from] SyntaxErrorKind);
/// An error in the syntax of the parsed file.
#[derive(Debug, thiserror::Error)]
enum SyntaxErrorKind {
#[error(transparent)]
Turtle(#[from] oxttl::TurtleSyntaxError),
#[error(transparent)]
RdfXml(#[from] oxrdfxml::RdfXmlSyntaxError),
#[error("{0}")]
Msg(&'static str),
}
impl RdfSyntaxError {
/// The location of the error inside of the file.
#[inline]
pub fn location(&self) -> Option<Range<TextPosition>> {
match &self.0 {
SyntaxErrorKind::Turtle(e) => {
let location = e.location();
Some(
TextPosition {
line: location.start.line,
column: location.start.column,
offset: location.start.offset,
}..TextPosition {
line: location.end.line,
column: location.end.column,
offset: location.end.offset,
},
)
}
SyntaxErrorKind::RdfXml(_) | SyntaxErrorKind::Msg(_) => None,
}
}
}
impl From<RdfSyntaxError> for io::Error {
#[inline]
fn from(error: RdfSyntaxError) -> Self {
match error.0 {
SyntaxErrorKind::Turtle(error) => error.into(),
SyntaxErrorKind::RdfXml(error) => error.into(),
SyntaxErrorKind::Msg(msg) => Self::new(io::ErrorKind::InvalidData, msg),
}
}
}
/// A position in a text i.e. a `line` number starting from 0, a `column` number starting from 0 (in number of code points) and a global file `offset` starting from 0 (in number of bytes).
#[derive(Eq, PartialEq, Debug, Clone, Copy)]
pub struct TextPosition {
pub line: u64,
pub column: u64,
pub offset: u64,
}

@ -1,216 +0,0 @@
use std::fmt;
/// RDF serialization formats.
///
/// This enumeration is non exhaustive. New formats like JSON-LD might be added in the future.
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
#[non_exhaustive]
pub enum RdfFormat {
/// [N3](https://w3c.github.io/N3/spec/)
N3,
/// [N-Quads](https://www.w3.org/TR/n-quads/)
NQuads,
/// [N-Triples](https://www.w3.org/TR/n-triples/)
NTriples,
/// [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/)
RdfXml,
/// [TriG](https://www.w3.org/TR/trig/)
TriG,
/// [Turtle](https://www.w3.org/TR/turtle/)
Turtle,
}
impl RdfFormat {
/// The format canonical IRI according to the [Unique URIs for file formats registry](https://www.w3.org/ns/formats/).
///
/// ```
/// use oxrdfio::RdfFormat;
///
/// assert_eq!(
/// RdfFormat::NTriples.iri(),
/// "http://www.w3.org/ns/formats/N-Triples"
/// )
/// ```
#[inline]
pub const fn iri(self) -> &'static str {
match self {
Self::N3 => "http://www.w3.org/ns/formats/N3",
Self::NQuads => "http://www.w3.org/ns/formats/N-Quads",
Self::NTriples => "http://www.w3.org/ns/formats/N-Triples",
Self::RdfXml => "http://www.w3.org/ns/formats/RDF_XML",
Self::TriG => "http://www.w3.org/ns/formats/TriG",
Self::Turtle => "http://www.w3.org/ns/formats/Turtle",
}
}
/// The format [IANA media type](https://tools.ietf.org/html/rfc2046).
///
/// ```
/// use oxrdfio::RdfFormat;
///
/// assert_eq!(RdfFormat::NTriples.media_type(), "application/n-triples")
/// ```
#[inline]
pub const fn media_type(self) -> &'static str {
match self {
Self::N3 => "text/n3",
Self::NQuads => "application/n-quads",
Self::NTriples => "application/n-triples",
Self::RdfXml => "application/rdf+xml",
Self::TriG => "application/trig",
Self::Turtle => "text/turtle",
}
}
/// The format [IANA-registered](https://tools.ietf.org/html/rfc2046) file extension.
///
/// ```
/// use oxrdfio::RdfFormat;
///
/// assert_eq!(RdfFormat::NTriples.file_extension(), "nt")
/// ```
#[inline]
pub const fn file_extension(self) -> &'static str {
match self {
Self::N3 => "n3",
Self::NQuads => "nq",
Self::NTriples => "nt",
Self::RdfXml => "rdf",
Self::TriG => "trig",
Self::Turtle => "ttl",
}
}
/// The format name.
///
/// ```
/// use oxrdfio::RdfFormat;
///
/// assert_eq!(RdfFormat::NTriples.name(), "N-Triples")
/// ```
#[inline]
pub const fn name(self) -> &'static str {
match self {
Self::N3 => "N3",
Self::NQuads => "N-Quads",
Self::NTriples => "N-Triples",
Self::RdfXml => "RDF/XML",
Self::TriG => "TriG",
Self::Turtle => "Turtle",
}
}
/// Checks if the formats supports [RDF datasets](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) and not only [RDF graphs](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph).
///
/// ```
/// use oxrdfio::RdfFormat;
///
/// assert_eq!(RdfFormat::NTriples.supports_datasets(), false);
/// assert_eq!(RdfFormat::NQuads.supports_datasets(), true);
/// ```
#[inline]
pub const fn supports_datasets(self) -> bool {
matches!(self, Self::NQuads | Self::TriG)
}
/// Checks if the formats supports [RDF-star quoted triples](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#dfn-quoted).
///
/// ```
/// use oxrdfio::RdfFormat;
///
/// assert_eq!(RdfFormat::NTriples.supports_rdf_star(), true);
/// assert_eq!(RdfFormat::RdfXml.supports_rdf_star(), false);
/// ```
#[inline]
#[cfg(feature = "rdf-star")]
pub const fn supports_rdf_star(self) -> bool {
matches!(
self,
Self::NTriples | Self::NQuads | Self::Turtle | Self::TriG
)
}
/// Looks for a known format from a media type.
///
/// It supports some media type aliases.
/// For example, "application/xml" is going to return `RdfFormat::RdfXml` even if it is not its canonical media type.
///
/// Example:
/// ```
/// use oxrdfio::RdfFormat;
///
/// assert_eq!(
/// RdfFormat::from_media_type("text/turtle; charset=utf-8"),
/// Some(RdfFormat::Turtle)
/// )
/// ```
#[inline]
pub fn from_media_type(media_type: &str) -> Option<Self> {
const MEDIA_SUBTYPES: [(&str, RdfFormat); 10] = [
("n-quads", RdfFormat::NQuads),
("n-triples", RdfFormat::NTriples),
("n3", RdfFormat::N3),
("nquads", RdfFormat::NQuads),
("ntriples", RdfFormat::NTriples),
("plain", RdfFormat::NTriples),
("rdf+xml", RdfFormat::RdfXml),
("trig", RdfFormat::TriG),
("turtle", RdfFormat::Turtle),
("xml", RdfFormat::RdfXml),
];
let (r#type, subtype) = media_type
.split_once(';')
.unwrap_or((media_type, ""))
.0
.split_once('/')?;
let r#type = r#type.trim();
if !r#type.eq_ignore_ascii_case("application") && !r#type.eq_ignore_ascii_case("text") {
return None;
}
let subtype = subtype.trim();
let subtype = subtype.strip_prefix("x-").unwrap_or(subtype);
for (candidate_subtype, candidate_id) in MEDIA_SUBTYPES {
if candidate_subtype.eq_ignore_ascii_case(subtype) {
return Some(candidate_id);
}
}
None
}
/// Looks for a known format from an extension.
///
/// It supports some aliases.
///
/// Example:
/// ```
/// use oxrdfio::RdfFormat;
///
/// assert_eq!(RdfFormat::from_extension("nt"), Some(RdfFormat::NTriples))
/// ```
#[inline]
pub fn from_extension(extension: &str) -> Option<Self> {
const MEDIA_TYPES: [(&str, RdfFormat); 8] = [
("n3", RdfFormat::N3),
("nq", RdfFormat::NQuads),
("nt", RdfFormat::NTriples),
("rdf", RdfFormat::RdfXml),
("trig", RdfFormat::TriG),
("ttl", RdfFormat::Turtle),
("txt", RdfFormat::NTriples),
("xml", RdfFormat::RdfXml),
];
for (candidate_extension, candidate_id) in MEDIA_TYPES {
if candidate_extension.eq_ignore_ascii_case(extension) {
return Some(candidate_id);
}
}
None
}
}
impl fmt::Display for RdfFormat {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.name())
}
}

@ -1,19 +0,0 @@
#![doc = include_str!("../README.md")]
#![doc(test(attr(deny(warnings))))]
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
#![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")]
#![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")]
mod error;
mod format;
mod parser;
mod serializer;
pub use error::{RdfParseError, RdfSyntaxError, TextPosition};
pub use format::RdfFormat;
#[cfg(feature = "async-tokio")]
pub use parser::FromTokioAsyncReadQuadReader;
pub use parser::{FromReadQuadReader, RdfParser};
#[cfg(feature = "async-tokio")]
pub use serializer::ToTokioAsyncWriteQuadWriter;
pub use serializer::{RdfSerializer, ToWriteQuadWriter};

@ -1,807 +0,0 @@
//! Utilities to read RDF graphs and datasets.
pub use crate::error::RdfParseError;
use crate::format::RdfFormat;
use oxrdf::{BlankNode, GraphName, IriParseError, Quad, Subject, Term, Triple};
#[cfg(feature = "async-tokio")]
use oxrdfxml::FromTokioAsyncReadRdfXmlReader;
use oxrdfxml::{FromReadRdfXmlReader, RdfXmlParser};
#[cfg(feature = "async-tokio")]
use oxttl::n3::FromTokioAsyncReadN3Reader;
use oxttl::n3::{FromReadN3Reader, N3Parser, N3PrefixesIter, N3Quad, N3Term};
#[cfg(feature = "async-tokio")]
use oxttl::nquads::FromTokioAsyncReadNQuadsReader;
use oxttl::nquads::{FromReadNQuadsReader, NQuadsParser};
#[cfg(feature = "async-tokio")]
use oxttl::ntriples::FromTokioAsyncReadNTriplesReader;
use oxttl::ntriples::{FromReadNTriplesReader, NTriplesParser};
#[cfg(feature = "async-tokio")]
use oxttl::trig::FromTokioAsyncReadTriGReader;
use oxttl::trig::{FromReadTriGReader, TriGParser, TriGPrefixesIter};
#[cfg(feature = "async-tokio")]
use oxttl::turtle::FromTokioAsyncReadTurtleReader;
use oxttl::turtle::{FromReadTurtleReader, TurtleParser, TurtlePrefixesIter};
use std::collections::HashMap;
use std::io::Read;
#[cfg(feature = "async-tokio")]
use tokio::io::AsyncRead;
/// Parsers for RDF serialization formats.
///
/// It currently supports the following formats:
/// * [N3](https://w3c.github.io/N3/spec/) ([`RdfFormat::N3`])
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`RdfFormat::NQuads`])
/// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`RdfFormat::NTriples`])
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`RdfFormat::RdfXml`])
/// * [TriG](https://www.w3.org/TR/trig/) ([`RdfFormat::TriG`])
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`RdfFormat::Turtle`])
///
/// Note the useful options:
/// - [`with_base_iri`](Self::with_base_iri) to resolve the relative IRIs.
/// - [`rename_blank_nodes`](Self::rename_blank_nodes) to rename the blank nodes to auto-generated numbers to avoid conflicts when merging RDF graphs together.
/// - [`without_named_graphs`](Self::without_named_graphs) to parse a single graph.
/// - [`unchecked`](Self::unchecked) to skip some validations if the file is already known to be valid.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
/// let quads = parser
/// .parse_read(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(quads.len(), 1);
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[must_use]
pub struct RdfParser {
inner: RdfParserKind,
default_graph: GraphName,
without_named_graphs: bool,
rename_blank_nodes: bool,
}
enum RdfParserKind {
N3(N3Parser),
NQuads(NQuadsParser),
NTriples(NTriplesParser),
RdfXml(RdfXmlParser),
TriG(TriGParser),
Turtle(TurtleParser),
}
impl RdfParser {
/// Builds a parser for the given format.
#[inline]
pub fn from_format(format: RdfFormat) -> Self {
Self {
inner: match format {
RdfFormat::N3 => RdfParserKind::N3(N3Parser::new()),
RdfFormat::NQuads => RdfParserKind::NQuads({
#[cfg(feature = "rdf-star")]
{
NQuadsParser::new().with_quoted_triples()
}
#[cfg(not(feature = "rdf-star"))]
{
NQuadsParser::new()
}
}),
RdfFormat::NTriples => RdfParserKind::NTriples({
#[cfg(feature = "rdf-star")]
{
NTriplesParser::new().with_quoted_triples()
}
#[cfg(not(feature = "rdf-star"))]
{
NTriplesParser::new()
}
}),
RdfFormat::RdfXml => RdfParserKind::RdfXml(RdfXmlParser::new()),
RdfFormat::TriG => RdfParserKind::TriG({
#[cfg(feature = "rdf-star")]
{
TriGParser::new().with_quoted_triples()
}
#[cfg(not(feature = "rdf-star"))]
{
TriGParser::new()
}
}),
RdfFormat::Turtle => RdfParserKind::Turtle({
#[cfg(feature = "rdf-star")]
{
TurtleParser::new().with_quoted_triples()
}
#[cfg(not(feature = "rdf-star"))]
{
TurtleParser::new()
}
}),
},
default_graph: GraphName::DefaultGraph,
without_named_graphs: false,
rename_blank_nodes: false,
}
}
/// The format the parser uses.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// assert_eq!(
/// RdfParser::from_format(RdfFormat::Turtle).format(),
/// RdfFormat::Turtle
/// );
/// ```
pub fn format(&self) -> RdfFormat {
match &self.inner {
RdfParserKind::N3(_) => RdfFormat::N3,
RdfParserKind::NQuads(_) => RdfFormat::NQuads,
RdfParserKind::NTriples(_) => RdfFormat::NTriples,
RdfParserKind::RdfXml(_) => RdfFormat::RdfXml,
RdfParserKind::TriG(_) => RdfFormat::TriG,
RdfParserKind::Turtle(_) => RdfFormat::Turtle,
}
}
/// Provides an IRI that could be used to resolve the file relative IRIs.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = "</s> </p> </o> .";
///
/// let parser = RdfParser::from_format(RdfFormat::Turtle).with_base_iri("http://example.com")?;
/// let quads = parser
/// .parse_read(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(quads.len(), 1);
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
self.inner = match self.inner {
RdfParserKind::N3(p) => RdfParserKind::N3(p),
RdfParserKind::NTriples(p) => RdfParserKind::NTriples(p),
RdfParserKind::NQuads(p) => RdfParserKind::NQuads(p),
RdfParserKind::RdfXml(p) => RdfParserKind::RdfXml(p.with_base_iri(base_iri)?),
RdfParserKind::TriG(p) => RdfParserKind::TriG(p.with_base_iri(base_iri)?),
RdfParserKind::Turtle(p) => RdfParserKind::Turtle(p.with_base_iri(base_iri)?),
};
Ok(self)
}
/// Provides the name graph name that should replace the default graph in the returned quads.
///
/// ```
/// use oxrdf::NamedNode;
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = RdfParser::from_format(RdfFormat::Turtle)
/// .with_default_graph(NamedNode::new("http://example.com/g")?);
/// let quads = parser
/// .parse_read(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(quads.len(), 1);
/// assert_eq!(quads[0].graph_name.to_string(), "<http://example.com/g>");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn with_default_graph(mut self, default_graph: impl Into<GraphName>) -> Self {
self.default_graph = default_graph.into();
self
}
/// Sets that the parser must fail if parsing a named graph.
///
/// This function restricts the parser to only parse a single [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) and not an [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset).
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .";
///
/// let parser = RdfParser::from_format(RdfFormat::NQuads).without_named_graphs();
/// assert!(parser.parse_read(file.as_bytes()).next().unwrap().is_err());
/// ```
#[inline]
pub fn without_named_graphs(mut self) -> Self {
self.without_named_graphs = true;
self
}
/// Renames the blank nodes ids from the ones set in the serialization to random ids.
///
/// This allows to avoid id conflicts when merging graphs together.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = "_:a <http://example.com/p> <http://example.com/o> .";
///
/// let result1 = RdfParser::from_format(RdfFormat::NQuads)
/// .rename_blank_nodes()
/// .parse_read(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
/// let result2 = RdfParser::from_format(RdfFormat::NQuads)
/// .rename_blank_nodes()
/// .parse_read(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
/// assert_ne!(result1, result2);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn rename_blank_nodes(mut self) -> Self {
self.rename_blank_nodes = true;
self
}
/// Assumes the file is valid to make parsing faster.
///
/// It will skip some validations.
///
/// Note that if the file is actually not valid, then broken RDF might be emitted by the parser.
#[inline]
pub fn unchecked(mut self) -> Self {
self.inner = match self.inner {
RdfParserKind::N3(p) => RdfParserKind::N3(p.unchecked()),
RdfParserKind::NTriples(p) => RdfParserKind::NTriples(p.unchecked()),
RdfParserKind::NQuads(p) => RdfParserKind::NQuads(p.unchecked()),
RdfParserKind::RdfXml(p) => RdfParserKind::RdfXml(p.unchecked()),
RdfParserKind::TriG(p) => RdfParserKind::TriG(p.unchecked()),
RdfParserKind::Turtle(p) => RdfParserKind::Turtle(p.unchecked()),
};
self
}
/// Parses from a [`Read`] implementation and returns an iterator of quads.
///
/// Reads are buffered.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
/// let quads = parser
/// .parse_read(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(quads.len(), 1);
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
pub fn parse_read<R: Read>(self, reader: R) -> FromReadQuadReader<R> {
FromReadQuadReader {
parser: match self.inner {
RdfParserKind::N3(p) => FromReadQuadReaderKind::N3(p.parse_read(reader)),
RdfParserKind::NQuads(p) => FromReadQuadReaderKind::NQuads(p.parse_read(reader)),
RdfParserKind::NTriples(p) => {
FromReadQuadReaderKind::NTriples(p.parse_read(reader))
}
RdfParserKind::RdfXml(p) => FromReadQuadReaderKind::RdfXml(p.parse_read(reader)),
RdfParserKind::TriG(p) => FromReadQuadReaderKind::TriG(p.parse_read(reader)),
RdfParserKind::Turtle(p) => FromReadQuadReaderKind::Turtle(p.parse_read(reader)),
},
mapper: QuadMapper {
default_graph: self.default_graph.clone(),
without_named_graphs: self.without_named_graphs,
blank_node_map: self.rename_blank_nodes.then(HashMap::new),
},
}
}
/// Parses from a Tokio [`AsyncRead`] implementation and returns an async iterator of quads.
///
/// Reads are buffered.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> Result<(), oxrdfio::RdfParseError> {
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
/// let mut reader = parser.parse_tokio_async_read(file.as_bytes());
/// if let Some(quad) = reader.next().await {
/// assert_eq!(quad?.subject.to_string(), "<http://example.com/s>");
/// }
/// # Ok(())
/// # }
/// ```
#[cfg(feature = "async-tokio")]
pub fn parse_tokio_async_read<R: AsyncRead + Unpin>(
self,
reader: R,
) -> FromTokioAsyncReadQuadReader<R> {
FromTokioAsyncReadQuadReader {
parser: match self.inner {
RdfParserKind::N3(p) => {
FromTokioAsyncReadQuadReaderKind::N3(p.parse_tokio_async_read(reader))
}
RdfParserKind::NQuads(p) => {
FromTokioAsyncReadQuadReaderKind::NQuads(p.parse_tokio_async_read(reader))
}
RdfParserKind::NTriples(p) => {
FromTokioAsyncReadQuadReaderKind::NTriples(p.parse_tokio_async_read(reader))
}
RdfParserKind::RdfXml(p) => {
FromTokioAsyncReadQuadReaderKind::RdfXml(p.parse_tokio_async_read(reader))
}
RdfParserKind::TriG(p) => {
FromTokioAsyncReadQuadReaderKind::TriG(p.parse_tokio_async_read(reader))
}
RdfParserKind::Turtle(p) => {
FromTokioAsyncReadQuadReaderKind::Turtle(p.parse_tokio_async_read(reader))
}
},
mapper: QuadMapper {
default_graph: self.default_graph.clone(),
without_named_graphs: self.without_named_graphs,
blank_node_map: self.rename_blank_nodes.then(HashMap::new),
},
}
}
}
impl From<RdfFormat> for RdfParser {
fn from(format: RdfFormat) -> Self {
Self::from_format(format)
}
}
/// Parses a RDF file from a [`Read`] implementation. Can be built using [`RdfParser::parse_read`].
///
/// Reads are buffered.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
/// let quads = parser
/// .parse_read(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(quads.len(), 1);
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[must_use]
pub struct FromReadQuadReader<R: Read> {
parser: FromReadQuadReaderKind<R>,
mapper: QuadMapper,
}
enum FromReadQuadReaderKind<R: Read> {
N3(FromReadN3Reader<R>),
NQuads(FromReadNQuadsReader<R>),
NTriples(FromReadNTriplesReader<R>),
RdfXml(FromReadRdfXmlReader<R>),
TriG(FromReadTriGReader<R>),
Turtle(FromReadTurtleReader<R>),
}
impl<R: Read> Iterator for FromReadQuadReader<R> {
type Item = Result<Quad, RdfParseError>;
fn next(&mut self) -> Option<Self::Item> {
Some(match &mut self.parser {
FromReadQuadReaderKind::N3(parser) => match parser.next()? {
Ok(quad) => self.mapper.map_n3_quad(quad),
Err(e) => Err(e.into()),
},
FromReadQuadReaderKind::NQuads(parser) => match parser.next()? {
Ok(quad) => self.mapper.map_quad(quad),
Err(e) => Err(e.into()),
},
FromReadQuadReaderKind::NTriples(parser) => match parser.next()? {
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
Err(e) => Err(e.into()),
},
FromReadQuadReaderKind::RdfXml(parser) => match parser.next()? {
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
Err(e) => Err(e.into()),
},
FromReadQuadReaderKind::TriG(parser) => match parser.next()? {
Ok(quad) => self.mapper.map_quad(quad),
Err(e) => Err(e.into()),
},
FromReadQuadReaderKind::Turtle(parser) => match parser.next()? {
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
Err(e) => Err(e.into()),
},
})
}
}
impl<R: Read> FromReadQuadReader<R> {
/// The list of IRI prefixes considered at the current step of the parsing.
///
/// This method returns (prefix name, prefix value) tuples.
/// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
/// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
///
/// An empty iterator is return if the format does not support prefixes.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = br#"@base <http://example.com/> .
/// @prefix schema: <http://schema.org/> .
/// <foo> a schema:Person ;
/// schema:name "Foo" ."#;
///
/// let mut reader = RdfParser::from_format(RdfFormat::Turtle).parse_read(file.as_slice());
/// assert!(reader.prefixes().collect::<Vec<_>>().is_empty()); // No prefix at the beginning
///
/// reader.next().unwrap()?; // We read the first triple
/// assert_eq!(
/// reader.prefixes().collect::<Vec<_>>(),
/// [("schema", "http://schema.org/")]
/// ); // There are now prefixes
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn prefixes(&self) -> PrefixesIter<'_> {
PrefixesIter {
inner: match &self.parser {
FromReadQuadReaderKind::N3(p) => PrefixesIterKind::N3(p.prefixes()),
FromReadQuadReaderKind::TriG(p) => PrefixesIterKind::TriG(p.prefixes()),
FromReadQuadReaderKind::Turtle(p) => PrefixesIterKind::Turtle(p.prefixes()),
FromReadQuadReaderKind::NQuads(_)
| FromReadQuadReaderKind::NTriples(_)
| FromReadQuadReaderKind::RdfXml(_) => PrefixesIterKind::None, /* TODO: implement for RDF/XML */
},
}
}
/// The base IRI considered at the current step of the parsing.
///
/// `None` is returned if no base IRI is set or the format does not support base IRIs.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = br#"@base <http://example.com/> .
/// @prefix schema: <http://schema.org/> .
/// <foo> a schema:Person ;
/// schema:name "Foo" ."#;
///
/// let mut reader = RdfParser::from_format(RdfFormat::Turtle).parse_read(file.as_slice());
/// assert!(reader.base_iri().is_none()); // No base at the beginning because none has been given to the parser.
///
/// reader.next().unwrap()?; // We read the first triple
/// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI.
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn base_iri(&self) -> Option<&str> {
match &self.parser {
FromReadQuadReaderKind::N3(p) => p.base_iri(),
FromReadQuadReaderKind::TriG(p) => p.base_iri(),
FromReadQuadReaderKind::Turtle(p) => p.base_iri(),
FromReadQuadReaderKind::NQuads(_)
| FromReadQuadReaderKind::NTriples(_)
| FromReadQuadReaderKind::RdfXml(_) => None, // TODO: implement for RDF/XML
}
}
}
/// Parses a RDF file from a Tokio [`AsyncRead`] implementation. Can be built using [`RdfParser::parse_tokio_async_read`].
///
/// Reads are buffered.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> Result<(), oxrdfio::RdfParseError> {
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
/// let mut reader = parser.parse_tokio_async_read(file.as_bytes());
/// if let Some(quad) = reader.next().await {
/// assert_eq!(quad?.subject.to_string(), "<http://example.com/s>");
/// }
/// # Ok(())
/// # }
/// ```
#[must_use]
#[cfg(feature = "async-tokio")]
pub struct FromTokioAsyncReadQuadReader<R: AsyncRead + Unpin> {
parser: FromTokioAsyncReadQuadReaderKind<R>,
mapper: QuadMapper,
}
#[cfg(feature = "async-tokio")]
enum FromTokioAsyncReadQuadReaderKind<R: AsyncRead + Unpin> {
N3(FromTokioAsyncReadN3Reader<R>),
NQuads(FromTokioAsyncReadNQuadsReader<R>),
NTriples(FromTokioAsyncReadNTriplesReader<R>),
RdfXml(FromTokioAsyncReadRdfXmlReader<R>),
TriG(FromTokioAsyncReadTriGReader<R>),
Turtle(FromTokioAsyncReadTurtleReader<R>),
}
#[cfg(feature = "async-tokio")]
impl<R: AsyncRead + Unpin> FromTokioAsyncReadQuadReader<R> {
pub async fn next(&mut self) -> Option<Result<Quad, RdfParseError>> {
Some(match &mut self.parser {
FromTokioAsyncReadQuadReaderKind::N3(parser) => match parser.next().await? {
Ok(quad) => self.mapper.map_n3_quad(quad),
Err(e) => Err(e.into()),
},
FromTokioAsyncReadQuadReaderKind::NQuads(parser) => match parser.next().await? {
Ok(quad) => self.mapper.map_quad(quad),
Err(e) => Err(e.into()),
},
FromTokioAsyncReadQuadReaderKind::NTriples(parser) => match parser.next().await? {
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
Err(e) => Err(e.into()),
},
FromTokioAsyncReadQuadReaderKind::RdfXml(parser) => match parser.next().await? {
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
Err(e) => Err(e.into()),
},
FromTokioAsyncReadQuadReaderKind::TriG(parser) => match parser.next().await? {
Ok(quad) => self.mapper.map_quad(quad),
Err(e) => Err(e.into()),
},
FromTokioAsyncReadQuadReaderKind::Turtle(parser) => match parser.next().await? {
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
Err(e) => Err(e.into()),
},
})
}
/// The list of IRI prefixes considered at the current step of the parsing.
///
/// This method returns (prefix name, prefix value) tuples.
/// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
/// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
///
/// An empty iterator is return if the format does not support prefixes.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> Result<(), oxttl::TurtleParseError> {
/// let file = br#"@base <http://example.com/> .
/// @prefix schema: <http://schema.org/> .
/// <foo> a schema:Person ;
/// schema:name "Foo" ."#;
///
/// let mut reader = RdfParser::from_format(RdfFormat::Turtle).parse_read(file.as_slice());
/// assert_eq!(reader.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning
///
/// reader.next().await.unwrap()?; // We read the first triple
/// assert_eq!(
/// reader.prefixes().collect::<Vec<_>>(),
/// [("schema", "http://schema.org/")]
/// ); // There are now prefixes
/// # Ok(())
/// # }
/// ```
pub fn prefixes(&self) -> PrefixesIter<'_> {
PrefixesIter {
inner: match &self.parser {
FromTokioAsyncReadQuadReaderKind::N3(p) => PrefixesIterKind::N3(p.prefixes()),
FromTokioAsyncReadQuadReaderKind::TriG(p) => PrefixesIterKind::TriG(p.prefixes()),
FromTokioAsyncReadQuadReaderKind::Turtle(p) => {
PrefixesIterKind::Turtle(p.prefixes())
}
FromTokioAsyncReadQuadReaderKind::NQuads(_)
| FromTokioAsyncReadQuadReaderKind::NTriples(_)
| FromTokioAsyncReadQuadReaderKind::RdfXml(_) => PrefixesIterKind::None, /* TODO: implement for RDF/XML */
},
}
}
/// The base IRI considered at the current step of the parsing.
///
/// `None` is returned if no base IRI is set or the format does not support base IRIs.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> Result<(), oxttl::TurtleParseError> {
/// let file = br#"@base <http://example.com/> .
/// @prefix schema: <http://schema.org/> .
/// <foo> a schema:Person ;
/// schema:name "Foo" ."#;
///
/// let mut reader =
/// RdfParser::from_format(RdfFormat::Turtle).parse_tokio_async_read(file.as_slice());
/// assert!(reader.base_iri().is_none()); // No base IRI at the beginning
///
/// reader.next().await.unwrap()?; // We read the first triple
/// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI
/// # Ok(())
/// # }
/// ```
pub fn base_iri(&self) -> Option<&str> {
match &self.parser {
FromTokioAsyncReadQuadReaderKind::N3(p) => p.base_iri(),
FromTokioAsyncReadQuadReaderKind::TriG(p) => p.base_iri(),
FromTokioAsyncReadQuadReaderKind::Turtle(p) => p.base_iri(),
FromTokioAsyncReadQuadReaderKind::NQuads(_)
| FromTokioAsyncReadQuadReaderKind::NTriples(_)
| FromTokioAsyncReadQuadReaderKind::RdfXml(_) => None, // TODO: implement for RDF/XML
}
}
}
/// Iterator on the file prefixes.
///
/// See [`FromReadQuadReader::prefixes`].
pub struct PrefixesIter<'a> {
inner: PrefixesIterKind<'a>,
}
enum PrefixesIterKind<'a> {
Turtle(TurtlePrefixesIter<'a>),
TriG(TriGPrefixesIter<'a>),
N3(N3PrefixesIter<'a>),
None,
}
impl<'a> Iterator for PrefixesIter<'a> {
type Item = (&'a str, &'a str);
#[inline]
fn next(&mut self) -> Option<Self::Item> {
match &mut self.inner {
PrefixesIterKind::Turtle(iter) => iter.next(),
PrefixesIterKind::TriG(iter) => iter.next(),
PrefixesIterKind::N3(iter) => iter.next(),
PrefixesIterKind::None => None,
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
match &self.inner {
PrefixesIterKind::Turtle(iter) => iter.size_hint(),
PrefixesIterKind::TriG(iter) => iter.size_hint(),
PrefixesIterKind::N3(iter) => iter.size_hint(),
PrefixesIterKind::None => (0, Some(0)),
}
}
}
struct QuadMapper {
default_graph: GraphName,
without_named_graphs: bool,
blank_node_map: Option<HashMap<BlankNode, BlankNode>>,
}
impl QuadMapper {
fn map_blank_node(&mut self, node: BlankNode) -> BlankNode {
if let Some(blank_node_map) = &mut self.blank_node_map {
blank_node_map
.entry(node)
.or_insert_with(BlankNode::default)
.clone()
} else {
node
}
}
fn map_subject(&mut self, node: Subject) -> Subject {
match node {
Subject::NamedNode(node) => node.into(),
Subject::BlankNode(node) => self.map_blank_node(node).into(),
#[cfg(feature = "rdf-star")]
Subject::Triple(triple) => self.map_triple(*triple).into(),
}
}
fn map_term(&mut self, node: Term) -> Term {
match node {
Term::NamedNode(node) => node.into(),
Term::BlankNode(node) => self.map_blank_node(node).into(),
Term::Literal(literal) => literal.into(),
#[cfg(feature = "rdf-star")]
Term::Triple(triple) => self.map_triple(*triple).into(),
}
}
fn map_triple(&mut self, triple: Triple) -> Triple {
Triple {
subject: self.map_subject(triple.subject),
predicate: triple.predicate,
object: self.map_term(triple.object),
}
}
fn map_graph_name(&mut self, graph_name: GraphName) -> Result<GraphName, RdfParseError> {
match graph_name {
GraphName::NamedNode(node) => {
if self.without_named_graphs {
Err(RdfParseError::msg("Named graphs are not allowed"))
} else {
Ok(node.into())
}
}
GraphName::BlankNode(node) => {
if self.without_named_graphs {
Err(RdfParseError::msg("Named graphs are not allowed"))
} else {
Ok(self.map_blank_node(node).into())
}
}
GraphName::DefaultGraph => Ok(self.default_graph.clone()),
}
}
fn map_quad(&mut self, quad: Quad) -> Result<Quad, RdfParseError> {
Ok(Quad {
subject: self.map_subject(quad.subject),
predicate: quad.predicate,
object: self.map_term(quad.object),
graph_name: self.map_graph_name(quad.graph_name)?,
})
}
fn map_triple_to_quad(&mut self, triple: Triple) -> Quad {
self.map_triple(triple).in_graph(self.default_graph.clone())
}
fn map_n3_quad(&mut self, quad: N3Quad) -> Result<Quad, RdfParseError> {
Ok(Quad {
subject: match quad.subject {
N3Term::NamedNode(s) => Ok(s.into()),
N3Term::BlankNode(s) => Ok(self.map_blank_node(s).into()),
N3Term::Literal(_) => Err(RdfParseError::msg(
"literals are not allowed in regular RDF subjects",
)),
#[cfg(feature = "rdf-star")]
N3Term::Triple(s) => Ok(self.map_triple(*s).into()),
N3Term::Variable(_) => Err(RdfParseError::msg(
"variables are not allowed in regular RDF subjects",
)),
}?,
predicate: match quad.predicate {
N3Term::NamedNode(p) => Ok(p),
N3Term::BlankNode(_) => Err(RdfParseError::msg(
"blank nodes are not allowed in regular RDF predicates",
)),
N3Term::Literal(_) => Err(RdfParseError::msg(
"literals are not allowed in regular RDF predicates",
)),
#[cfg(feature = "rdf-star")]
N3Term::Triple(_) => Err(RdfParseError::msg(
"quoted triples are not allowed in regular RDF predicates",
)),
N3Term::Variable(_) => Err(RdfParseError::msg(
"variables are not allowed in regular RDF predicates",
)),
}?,
object: match quad.object {
N3Term::NamedNode(o) => Ok(o.into()),
N3Term::BlankNode(o) => Ok(self.map_blank_node(o).into()),
N3Term::Literal(o) => Ok(o.into()),
#[cfg(feature = "rdf-star")]
N3Term::Triple(o) => Ok(self.map_triple(*o).into()),
N3Term::Variable(_) => Err(RdfParseError::msg(
"variables are not allowed in regular RDF objects",
)),
}?,
graph_name: self.map_graph_name(quad.graph_name)?,
})
}
}

@ -1,410 +0,0 @@
//! Utilities to write RDF graphs and datasets.
use crate::format::RdfFormat;
use oxrdf::{GraphNameRef, IriParseError, QuadRef, TripleRef};
#[cfg(feature = "async-tokio")]
use oxrdfxml::ToTokioAsyncWriteRdfXmlWriter;
use oxrdfxml::{RdfXmlSerializer, ToWriteRdfXmlWriter};
#[cfg(feature = "async-tokio")]
use oxttl::nquads::ToTokioAsyncWriteNQuadsWriter;
use oxttl::nquads::{NQuadsSerializer, ToWriteNQuadsWriter};
#[cfg(feature = "async-tokio")]
use oxttl::ntriples::ToTokioAsyncWriteNTriplesWriter;
use oxttl::ntriples::{NTriplesSerializer, ToWriteNTriplesWriter};
#[cfg(feature = "async-tokio")]
use oxttl::trig::ToTokioAsyncWriteTriGWriter;
use oxttl::trig::{ToWriteTriGWriter, TriGSerializer};
#[cfg(feature = "async-tokio")]
use oxttl::turtle::ToTokioAsyncWriteTurtleWriter;
use oxttl::turtle::{ToWriteTurtleWriter, TurtleSerializer};
use std::io::{self, Write};
#[cfg(feature = "async-tokio")]
use tokio::io::AsyncWrite;
/// A serializer for RDF serialization formats.
///
/// It currently supports the following formats:
/// * [N3](https://w3c.github.io/N3/spec/) ([`RdfFormat::N3`])
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`RdfFormat::NQuads`])
/// * [canonical](https://www.w3.org/TR/n-triples/#canonical-ntriples) [N-Triples](https://www.w3.org/TR/n-triples/) ([`RdfFormat::NTriples`])
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`RdfFormat::RdfXml`])
/// * [TriG](https://www.w3.org/TR/trig/) ([`RdfFormat::TriG`])
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`RdfFormat::Turtle`])
///
/// ```
/// use oxrdfio::{RdfFormat, RdfSerializer};
/// use oxrdf::{Quad, NamedNode};
///
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(Vec::new());
/// writer.write_quad(&Quad {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// graph_name: NamedNode::new("http://example.com/g")?.into()
/// })?;
/// assert_eq!(writer.finish()?, b"<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[must_use]
pub struct RdfSerializer {
inner: RdfSerializerKind,
}
enum RdfSerializerKind {
NQuads(NQuadsSerializer),
NTriples(NTriplesSerializer),
RdfXml(RdfXmlSerializer),
TriG(TriGSerializer),
Turtle(TurtleSerializer),
}
impl RdfSerializer {
/// Builds a serializer for the given format
#[inline]
pub fn from_format(format: RdfFormat) -> Self {
Self {
inner: match format {
RdfFormat::NQuads => RdfSerializerKind::NQuads(NQuadsSerializer::new()),
RdfFormat::NTriples => RdfSerializerKind::NTriples(NTriplesSerializer::new()),
RdfFormat::RdfXml => RdfSerializerKind::RdfXml(RdfXmlSerializer::new()),
RdfFormat::TriG => RdfSerializerKind::TriG(TriGSerializer::new()),
RdfFormat::Turtle | RdfFormat::N3 => {
RdfSerializerKind::Turtle(TurtleSerializer::new())
}
},
}
}
/// The format the serializer serializes to.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfSerializer};
///
/// assert_eq!(
/// RdfSerializer::from_format(RdfFormat::Turtle).format(),
/// RdfFormat::Turtle
/// );
/// ```
pub fn format(&self) -> RdfFormat {
match &self.inner {
RdfSerializerKind::NQuads(_) => RdfFormat::NQuads,
RdfSerializerKind::NTriples(_) => RdfFormat::NTriples,
RdfSerializerKind::RdfXml(_) => RdfFormat::RdfXml,
RdfSerializerKind::TriG(_) => RdfFormat::TriG,
RdfSerializerKind::Turtle(_) => RdfFormat::Turtle,
}
}
/// If the format supports it, sets a prefix.
///
/// ```
/// use oxrdf::vocab::rdf;
/// use oxrdf::{NamedNodeRef, TripleRef};
/// use oxrdfio::{RdfFormat, RdfSerializer};
///
/// let mut writer = RdfSerializer::from_format(RdfFormat::Turtle)
/// .with_prefix("schema", "http://schema.org/")?
/// .serialize_to_write(Vec::new());
/// writer.write_triple(TripleRef {
/// subject: NamedNodeRef::new("http://example.com/s")?.into(),
/// predicate: rdf::TYPE.into(),
/// object: NamedNodeRef::new("http://schema.org/Person")?.into(),
/// })?;
/// assert_eq!(
/// writer.finish()?,
/// b"@prefix schema: <http://schema.org/> .\n<http://example.com/s> a schema:Person .\n"
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn with_prefix(
mut self,
prefix_name: impl Into<String>,
prefix_iri: impl Into<String>,
) -> Result<Self, IriParseError> {
self.inner = match self.inner {
RdfSerializerKind::NQuads(s) => RdfSerializerKind::NQuads(s),
RdfSerializerKind::NTriples(s) => RdfSerializerKind::NTriples(s),
RdfSerializerKind::RdfXml(s) => {
RdfSerializerKind::RdfXml(s.with_prefix(prefix_name, prefix_iri)?)
}
RdfSerializerKind::TriG(s) => {
RdfSerializerKind::TriG(s.with_prefix(prefix_name, prefix_iri)?)
}
RdfSerializerKind::Turtle(s) => {
RdfSerializerKind::Turtle(s.with_prefix(prefix_name, prefix_iri)?)
}
};
Ok(self)
}
/// Writes to a [`Write`] implementation.
///
/// <div class="warning">
///
/// Do not forget to run the [`finish`](ToWriteQuadWriter::finish()) method to properly write the last bytes of the file.</div>
///
/// <div class="warning">
///
/// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div>
///
/// ```
/// use oxrdfio::{RdfFormat, RdfSerializer};
/// use oxrdf::{Quad, NamedNode};
///
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(Vec::new());
/// writer.write_quad(&Quad {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// graph_name: NamedNode::new("http://example.com/g")?.into()
/// })?;
/// assert_eq!(writer.finish()?, b"<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn serialize_to_write<W: Write>(self, write: W) -> ToWriteQuadWriter<W> {
ToWriteQuadWriter {
formatter: match self.inner {
RdfSerializerKind::NQuads(s) => {
ToWriteQuadWriterKind::NQuads(s.serialize_to_write(write))
}
RdfSerializerKind::NTriples(s) => {
ToWriteQuadWriterKind::NTriples(s.serialize_to_write(write))
}
RdfSerializerKind::RdfXml(s) => {
ToWriteQuadWriterKind::RdfXml(s.serialize_to_write(write))
}
RdfSerializerKind::TriG(s) => {
ToWriteQuadWriterKind::TriG(s.serialize_to_write(write))
}
RdfSerializerKind::Turtle(s) => {
ToWriteQuadWriterKind::Turtle(s.serialize_to_write(write))
}
},
}
}
/// Writes to a Tokio [`AsyncWrite`] implementation.
///
/// <div class="warning">
///
/// Do not forget to run the [`finish`](ToTokioAsyncWriteQuadWriter::finish()) method to properly write the last bytes of the file.</div>
///
/// <div class="warning">
///
/// This writer does unbuffered writes. You might want to use [`BufWriter`](tokio::io::BufWriter) to avoid that.</div>
///
/// ```
/// use oxrdfio::{RdfFormat, RdfSerializer};
/// use oxrdf::{Quad, NamedNode};
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> std::io::Result<()> {
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_tokio_async_write(Vec::new());
/// writer.write_quad(&Quad {
/// subject: NamedNode::new_unchecked("http://example.com/s").into(),
/// predicate: NamedNode::new_unchecked("http://example.com/p"),
/// object: NamedNode::new_unchecked("http://example.com/o").into(),
/// graph_name: NamedNode::new_unchecked("http://example.com/g").into()
/// }).await?;
/// assert_eq!(writer.finish().await?, "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
/// # Ok(())
/// # }
/// ```
#[cfg(feature = "async-tokio")]
pub fn serialize_to_tokio_async_write<W: AsyncWrite + Unpin>(
self,
write: W,
) -> ToTokioAsyncWriteQuadWriter<W> {
ToTokioAsyncWriteQuadWriter {
formatter: match self.inner {
RdfSerializerKind::NQuads(s) => {
ToTokioAsyncWriteQuadWriterKind::NQuads(s.serialize_to_tokio_async_write(write))
}
RdfSerializerKind::NTriples(s) => ToTokioAsyncWriteQuadWriterKind::NTriples(
s.serialize_to_tokio_async_write(write),
),
RdfSerializerKind::RdfXml(s) => {
ToTokioAsyncWriteQuadWriterKind::RdfXml(s.serialize_to_tokio_async_write(write))
}
RdfSerializerKind::TriG(s) => {
ToTokioAsyncWriteQuadWriterKind::TriG(s.serialize_to_tokio_async_write(write))
}
RdfSerializerKind::Turtle(s) => {
ToTokioAsyncWriteQuadWriterKind::Turtle(s.serialize_to_tokio_async_write(write))
}
},
}
}
}
impl From<RdfFormat> for RdfSerializer {
fn from(format: RdfFormat) -> Self {
Self::from_format(format)
}
}
/// Writes quads or triples to a [`Write`] implementation.
///
/// Can be built using [`RdfSerializer::serialize_to_write`].
///
/// <div class="warning">
///
/// Do not forget to run the [`finish`](ToWriteQuadWriter::finish()) method to properly write the last bytes of the file.</div>
///
/// <div class="warning">
///
/// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div>
///
/// ```
/// use oxrdfio::{RdfFormat, RdfSerializer};
/// use oxrdf::{Quad, NamedNode};
///
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(Vec::new());
/// writer.write_quad(&Quad {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// graph_name: NamedNode::new("http://example.com/g")?.into(),
/// })?;
/// assert_eq!(writer.finish()?, b"<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[must_use]
pub struct ToWriteQuadWriter<W: Write> {
formatter: ToWriteQuadWriterKind<W>,
}
enum ToWriteQuadWriterKind<W: Write> {
NQuads(ToWriteNQuadsWriter<W>),
NTriples(ToWriteNTriplesWriter<W>),
RdfXml(ToWriteRdfXmlWriter<W>),
TriG(ToWriteTriGWriter<W>),
Turtle(ToWriteTurtleWriter<W>),
}
impl<W: Write> ToWriteQuadWriter<W> {
/// Writes a [`QuadRef`]
pub fn write_quad<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> io::Result<()> {
match &mut self.formatter {
ToWriteQuadWriterKind::NQuads(writer) => writer.write_quad(quad),
ToWriteQuadWriterKind::NTriples(writer) => writer.write_triple(to_triple(quad)?),
ToWriteQuadWriterKind::RdfXml(writer) => writer.write_triple(to_triple(quad)?),
ToWriteQuadWriterKind::TriG(writer) => writer.write_quad(quad),
ToWriteQuadWriterKind::Turtle(writer) => writer.write_triple(to_triple(quad)?),
}
}
/// Writes a [`TripleRef`]
pub fn write_triple<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> io::Result<()> {
self.write_quad(triple.into().in_graph(GraphNameRef::DefaultGraph))
}
/// Writes the last bytes of the file
///
/// Note that this function does not flush the writer. You need to do that if you are using a [`BufWriter`](io::BufWriter).
pub fn finish(self) -> io::Result<W> {
Ok(match self.formatter {
ToWriteQuadWriterKind::NQuads(writer) => writer.finish(),
ToWriteQuadWriterKind::NTriples(writer) => writer.finish(),
ToWriteQuadWriterKind::RdfXml(writer) => writer.finish()?,
ToWriteQuadWriterKind::TriG(writer) => writer.finish()?,
ToWriteQuadWriterKind::Turtle(writer) => writer.finish()?,
})
}
}
/// Writes quads or triples to a [`Write`] implementation.
///
/// Can be built using [`RdfSerializer::serialize_to_write`].
///
/// <div class="warning">
///
/// Do not forget to run the [`finish`](ToWriteQuadWriter::finish()) method to properly write the last bytes of the file.</div>
///
/// <div class="warning">
///
/// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div>
///
/// ```
/// use oxrdfio::{RdfFormat, RdfSerializer};
/// use oxrdf::{Quad, NamedNode};
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> std::io::Result<()> {
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_tokio_async_write(Vec::new());
/// writer.write_quad(&Quad {
/// subject: NamedNode::new_unchecked("http://example.com/s").into(),
/// predicate: NamedNode::new_unchecked("http://example.com/p"),
/// object: NamedNode::new_unchecked("http://example.com/o").into(),
/// graph_name: NamedNode::new_unchecked("http://example.com/g").into()
/// }).await?;
/// assert_eq!(writer.finish().await?, "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
/// # Ok(())
/// # }
/// ```
#[must_use]
#[cfg(feature = "async-tokio")]
pub struct ToTokioAsyncWriteQuadWriter<W: AsyncWrite + Unpin> {
formatter: ToTokioAsyncWriteQuadWriterKind<W>,
}
#[cfg(feature = "async-tokio")]
enum ToTokioAsyncWriteQuadWriterKind<W: AsyncWrite + Unpin> {
NQuads(ToTokioAsyncWriteNQuadsWriter<W>),
NTriples(ToTokioAsyncWriteNTriplesWriter<W>),
RdfXml(ToTokioAsyncWriteRdfXmlWriter<W>),
TriG(ToTokioAsyncWriteTriGWriter<W>),
Turtle(ToTokioAsyncWriteTurtleWriter<W>),
}
#[cfg(feature = "async-tokio")]
impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteQuadWriter<W> {
/// Writes a [`QuadRef`]
pub async fn write_quad<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> io::Result<()> {
match &mut self.formatter {
ToTokioAsyncWriteQuadWriterKind::NQuads(writer) => writer.write_quad(quad).await,
ToTokioAsyncWriteQuadWriterKind::NTriples(writer) => {
writer.write_triple(to_triple(quad)?).await
}
ToTokioAsyncWriteQuadWriterKind::RdfXml(writer) => {
writer.write_triple(to_triple(quad)?).await
}
ToTokioAsyncWriteQuadWriterKind::TriG(writer) => writer.write_quad(quad).await,
ToTokioAsyncWriteQuadWriterKind::Turtle(writer) => {
writer.write_triple(to_triple(quad)?).await
}
}
}
/// Writes a [`TripleRef`]
pub async fn write_triple<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> io::Result<()> {
self.write_quad(triple.into().in_graph(GraphNameRef::DefaultGraph))
.await
}
/// Writes the last bytes of the file
///
/// Note that this function does not flush the writer. You need to do that if you are using a [`BufWriter`](io::BufWriter).
pub async fn finish(self) -> io::Result<W> {
Ok(match self.formatter {
ToTokioAsyncWriteQuadWriterKind::NQuads(writer) => writer.finish(),
ToTokioAsyncWriteQuadWriterKind::NTriples(writer) => writer.finish(),
ToTokioAsyncWriteQuadWriterKind::RdfXml(writer) => writer.finish().await?,
ToTokioAsyncWriteQuadWriterKind::TriG(writer) => writer.finish().await?,
ToTokioAsyncWriteQuadWriterKind::Turtle(writer) => writer.finish().await?,
})
}
}
fn to_triple<'a>(quad: impl Into<QuadRef<'a>>) -> io::Result<TripleRef<'a>> {
let quad = quad.into();
if quad.graph_name.is_default_graph() {
Ok(quad.into())
} else {
Err(io::Error::new(
io::ErrorKind::InvalidInput,
"Only quads in the default graph can be serialized to a RDF graph format",
))
}
}

@ -1,36 +0,0 @@
[package]
name = "oxrdfxml"
version = "0.1.0-alpha.5"
authors.workspace = true
license.workspace = true
readme = "README.md"
keywords = ["RDFXML", "XML", "RDF"]
repository = "https://github.com/oxigraph/oxigraph/tree/master/lib/oxrdfxml"
description = """
Parser and serializer for the RDF/XML format
"""
documentation = "https://docs.rs/oxrdfxml"
edition.workspace = true
rust-version.workspace = true
[features]
default = []
async-tokio = ["dep:tokio", "quick-xml/async-tokio"]
[dependencies]
oxilangtag.workspace = true
oxiri.workspace = true
oxrdf.workspace = true
quick-xml.workspace = true
thiserror.workspace = true
tokio = { workspace = true, optional = true, features = ["io-util"] }
[dev-dependencies]
tokio = { workspace = true, features = ["rt", "macros"] }
[lints]
workspace = true
[package.metadata.docs.rs]
all-features = true
rustdoc-args = ["--cfg", "docsrs"]

@ -1,56 +0,0 @@
OxRDF/XML
=========
[![Latest Version](https://img.shields.io/crates/v/oxrdfxml.svg)](https://crates.io/crates/oxrdfxml)
[![Released API docs](https://docs.rs/oxrdfxml/badge.svg)](https://docs.rs/oxrdfxml)
[![Crates.io downloads](https://img.shields.io/crates/d/oxrdfxml)](https://crates.io/crates/oxrdfxml)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community)
OxRdfXml is a parser and serializer for [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/).
The entry points of this library are the two [`RdfXmlParser`] and [`RdfXmlSerializer`] structs.
Usage example counting the number of people in a RDF/XML file:
```rust
use oxrdf::{NamedNodeRef, vocab::rdf};
use oxrdfxml::RdfXmlParser;
fn main() {
let file = br#"<?xml version="1.0"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:schema="http://schema.org/">
<rdf:Description rdf:about="http://example.com/foo">
<rdf:type rdf:resource="http://schema.org/Person" />
<schema:name>Foo</schema:name>
</rdf:Description>
<schema:Person rdf:about="http://example.com/bar" schema:name="Bar" />
</rdf:RDF>"#;
let schema_person = NamedNodeRef::new("http://schema.org/Person").unwrap();
let mut count = 0;
for triple in RdfXmlParser::new().parse_read(file.as_ref()) {
let triple = triple.unwrap();
if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
count += 1;
}
}
assert_eq!(2, count);
}
```
## License
This project is licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or
`<http://www.apache.org/licenses/LICENSE-2.0>`)
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or
`<http://opensource.org/licenses/MIT>`)
at your option.
### Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.

@ -1,89 +0,0 @@
use oxilangtag::LanguageTagParseError;
use oxiri::IriParseError;
use std::io;
use std::sync::Arc;
/// Error returned during RDF/XML parsing.
#[derive(Debug, thiserror::Error)]
pub enum RdfXmlParseError {
/// I/O error during parsing (file not found...).
#[error(transparent)]
Io(#[from] io::Error),
/// An error in the file syntax.
#[error(transparent)]
Syntax(#[from] RdfXmlSyntaxError),
}
impl From<RdfXmlParseError> for io::Error {
#[inline]
fn from(error: RdfXmlParseError) -> Self {
match error {
RdfXmlParseError::Io(error) => error,
RdfXmlParseError::Syntax(error) => error.into(),
}
}
}
impl From<quick_xml::Error> for RdfXmlParseError {
#[inline]
fn from(error: quick_xml::Error) -> Self {
match error {
quick_xml::Error::Io(error) => {
Self::Io(Arc::try_unwrap(error).unwrap_or_else(|e| io::Error::new(e.kind(), e)))
}
_ => Self::Syntax(RdfXmlSyntaxError(SyntaxErrorKind::Xml(error))),
}
}
}
/// An error in the syntax of the parsed file.
#[derive(Debug, thiserror::Error)]
#[error(transparent)]
pub struct RdfXmlSyntaxError(#[from] pub(crate) SyntaxErrorKind);
#[derive(Debug, thiserror::Error)]
pub enum SyntaxErrorKind {
#[error(transparent)]
Xml(#[from] quick_xml::Error),
#[error("error while parsing IRI '{iri}': {error}")]
InvalidIri {
iri: String,
#[source]
error: IriParseError,
},
#[error("error while parsing language tag '{tag}': {error}")]
InvalidLanguageTag {
tag: String,
#[source]
error: LanguageTagParseError,
},
#[error("{0}")]
Msg(String),
}
impl RdfXmlSyntaxError {
/// Builds an error from a printable error message.
#[inline]
pub(crate) fn msg(msg: impl Into<String>) -> Self {
Self(SyntaxErrorKind::Msg(msg.into()))
}
}
impl From<RdfXmlSyntaxError> for io::Error {
#[inline]
fn from(error: RdfXmlSyntaxError) -> Self {
match error.0 {
SyntaxErrorKind::Xml(error) => match error {
quick_xml::Error::Io(error) => {
Arc::try_unwrap(error).unwrap_or_else(|e| Self::new(e.kind(), e))
}
quick_xml::Error::UnexpectedEof(error) => {
Self::new(io::ErrorKind::UnexpectedEof, error)
}
_ => Self::new(io::ErrorKind::InvalidData, error),
},
SyntaxErrorKind::Msg(msg) => Self::new(io::ErrorKind::InvalidData, msg),
_ => Self::new(io::ErrorKind::InvalidData, error),
}
}
}

@ -1,18 +0,0 @@
#![doc = include_str!("../README.md")]
#![doc(test(attr(deny(warnings))))]
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
#![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")]
#![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")]
mod error;
mod parser;
mod serializer;
mod utils;
pub use error::{RdfXmlParseError, RdfXmlSyntaxError};
#[cfg(feature = "async-tokio")]
pub use parser::FromTokioAsyncReadRdfXmlReader;
pub use parser::{FromReadRdfXmlReader, RdfXmlParser};
#[cfg(feature = "async-tokio")]
pub use serializer::ToTokioAsyncWriteRdfXmlWriter;
pub use serializer::{RdfXmlSerializer, ToWriteRdfXmlWriter};

File diff suppressed because it is too large Load Diff

@ -1,461 +0,0 @@
use crate::utils::*;
use oxiri::{Iri, IriParseError};
use oxrdf::vocab::rdf;
use oxrdf::{NamedNodeRef, Subject, SubjectRef, TermRef, TripleRef};
use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event};
use quick_xml::Writer;
use std::borrow::Cow;
use std::collections::BTreeMap;
use std::io;
use std::io::Write;
use std::sync::Arc;
#[cfg(feature = "async-tokio")]
use tokio::io::AsyncWrite;
/// A [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) serializer.
///
/// ```
/// use oxrdf::{LiteralRef, NamedNodeRef, TripleRef};
/// use oxrdfxml::RdfXmlSerializer;
///
/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_write(Vec::new());
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// ))?;
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://schema.org/name")?,
/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"),
/// ))?;
/// assert_eq!(
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>",
/// writer.finish()?.as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[derive(Default)]
#[must_use]
pub struct RdfXmlSerializer {
prefixes: BTreeMap<String, String>,
}
impl RdfXmlSerializer {
/// Builds a new [`RdfXmlSerializer`].
#[inline]
pub fn new() -> Self {
Self {
prefixes: BTreeMap::new(),
}
}
#[inline]
pub fn with_prefix(
mut self,
prefix_name: impl Into<String>,
prefix_iri: impl Into<String>,
) -> Result<Self, IriParseError> {
self.prefixes.insert(
Iri::parse(prefix_iri.into())?.into_inner(),
prefix_name.into(),
);
Ok(self)
}
/// Writes a RDF/XML file to a [`Write`] implementation.
///
/// This writer does unbuffered writes.
///
/// ```
/// use oxrdf::{LiteralRef, NamedNodeRef, TripleRef};
/// use oxrdfxml::RdfXmlSerializer;
///
/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_write(Vec::new());
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// ))?;
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://schema.org/name")?,
/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"),
/// ))?;
/// assert_eq!(
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>",
/// writer.finish()?.as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[allow(clippy::unused_self)]
pub fn serialize_to_write<W: Write>(self, write: W) -> ToWriteRdfXmlWriter<W> {
ToWriteRdfXmlWriter {
writer: Writer::new_with_indent(write, b'\t', 1),
inner: self.inner_writer(),
}
}
/// Writes a RDF/XML file to a [`AsyncWrite`] implementation.
///
/// This writer does unbuffered writes.
///
/// ```
/// use oxrdf::{NamedNodeRef, TripleRef, LiteralRef};
/// use oxrdfxml::RdfXmlSerializer;
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_tokio_async_write(Vec::new());
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// )).await?;
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://schema.org/name")?,
/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"),
/// )).await?;
/// assert_eq!(
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>",
/// writer.finish().await?.as_slice()
/// );
/// # Ok(())
/// # }
/// ```
#[allow(clippy::unused_self)]
#[cfg(feature = "async-tokio")]
pub fn serialize_to_tokio_async_write<W: AsyncWrite + Unpin>(
self,
write: W,
) -> ToTokioAsyncWriteRdfXmlWriter<W> {
ToTokioAsyncWriteRdfXmlWriter {
writer: Writer::new_with_indent(write, b'\t', 1),
inner: self.inner_writer(),
}
}
fn inner_writer(mut self) -> InnerRdfXmlWriter {
self.prefixes.insert(
"http://www.w3.org/1999/02/22-rdf-syntax-ns#".into(),
"rdf".into(),
);
InnerRdfXmlWriter {
current_subject: None,
current_resource_tag: None,
prefixes: self.prefixes,
}
}
}
/// Writes a RDF/XML file to a [`Write`] implementation. Can be built using [`RdfXmlSerializer::serialize_to_write`].
///
/// ```
/// use oxrdf::{LiteralRef, NamedNodeRef, TripleRef};
/// use oxrdfxml::RdfXmlSerializer;
///
/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_write(Vec::new());
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// ))?;
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://schema.org/name")?,
/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"),
/// ))?;
/// assert_eq!(
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>",
/// writer.finish()?.as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[must_use]
pub struct ToWriteRdfXmlWriter<W: Write> {
writer: Writer<W>,
inner: InnerRdfXmlWriter,
}
impl<W: Write> ToWriteRdfXmlWriter<W> {
/// Writes an extra triple.
#[allow(clippy::match_wildcard_for_single_variants, unreachable_patterns)]
pub fn write_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> {
let mut buffer = Vec::new();
self.inner.write_triple(t, &mut buffer)?;
self.flush_buffer(&mut buffer)
}
/// Ends the write process and returns the underlying [`Write`].
pub fn finish(mut self) -> io::Result<W> {
let mut buffer = Vec::new();
self.inner.finish(&mut buffer);
self.flush_buffer(&mut buffer)?;
Ok(self.writer.into_inner())
}
fn flush_buffer(&mut self, buffer: &mut Vec<Event<'_>>) -> io::Result<()> {
for event in buffer.drain(0..) {
self.writer.write_event(event).map_err(map_err)?;
}
Ok(())
}
}
/// Writes a RDF/XML file to a [`AsyncWrite`] implementation. Can be built using [`RdfXmlSerializer::serialize_to_tokio_async_write`].
///
/// ```
/// use oxrdf::{NamedNodeRef, TripleRef, LiteralRef};
/// use oxrdfxml::RdfXmlSerializer;
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_tokio_async_write(Vec::new());
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// )).await?;
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://schema.org/name")?,
/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"),
/// )).await?;
/// assert_eq!(
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>",
/// writer.finish().await?.as_slice()
/// );
/// # Ok(())
/// # }
/// ```
#[cfg(feature = "async-tokio")]
#[must_use]
pub struct ToTokioAsyncWriteRdfXmlWriter<W: AsyncWrite + Unpin> {
writer: Writer<W>,
inner: InnerRdfXmlWriter,
}
#[cfg(feature = "async-tokio")]
impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteRdfXmlWriter<W> {
/// Writes an extra triple.
#[allow(clippy::match_wildcard_for_single_variants, unreachable_patterns)]
pub async fn write_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> {
let mut buffer = Vec::new();
self.inner.write_triple(t, &mut buffer)?;
self.flush_buffer(&mut buffer).await
}
/// Ends the write process and returns the underlying [`Write`].
pub async fn finish(mut self) -> io::Result<W> {
let mut buffer = Vec::new();
self.inner.finish(&mut buffer);
self.flush_buffer(&mut buffer).await?;
Ok(self.writer.into_inner())
}
async fn flush_buffer(&mut self, buffer: &mut Vec<Event<'_>>) -> io::Result<()> {
for event in buffer.drain(0..) {
self.writer
.write_event_async(event)
.await
.map_err(map_err)?;
}
Ok(())
}
}
pub struct InnerRdfXmlWriter {
current_subject: Option<Subject>,
current_resource_tag: Option<String>,
prefixes: BTreeMap<String, String>,
}
impl InnerRdfXmlWriter {
#[allow(clippy::match_wildcard_for_single_variants, unreachable_patterns)]
fn write_triple<'a>(
&mut self,
t: impl Into<TripleRef<'a>>,
output: &mut Vec<Event<'a>>,
) -> io::Result<()> {
if self.current_subject.is_none() {
self.write_start(output);
}
let triple = t.into();
// We open a new rdf:Description if useful
if self.current_subject.as_ref().map(Subject::as_ref) != Some(triple.subject) {
if self.current_subject.is_some() {
output.push(Event::End(
self.current_resource_tag
.take()
.map_or_else(|| BytesEnd::new("rdf:Description"), BytesEnd::new),
));
}
self.current_subject = Some(triple.subject.into_owned());
let (mut description_open, with_type_tag) = if triple.predicate == rdf::TYPE {
if let TermRef::NamedNode(t) = triple.object {
let (prop_qname, prop_xmlns) = self.uri_to_qname_and_xmlns(t);
let mut description_open = BytesStart::new(prop_qname.clone());
if let Some(prop_xmlns) = prop_xmlns {
description_open.push_attribute(prop_xmlns);
}
self.current_resource_tag = Some(prop_qname.into_owned());
(description_open, true)
} else {
(BytesStart::new("rdf:Description"), false)
}
} else {
(BytesStart::new("rdf:Description"), false)
};
match triple.subject {
SubjectRef::NamedNode(node) => {
description_open.push_attribute(("rdf:about", node.as_str()))
}
SubjectRef::BlankNode(node) => {
description_open.push_attribute(("rdf:nodeID", node.as_str()))
}
_ => {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"RDF/XML only supports named or blank subject",
))
}
}
output.push(Event::Start(description_open));
if with_type_tag {
return Ok(()); // No need for a value
}
}
let (prop_qname, prop_xmlns) = self.uri_to_qname_and_xmlns(triple.predicate);
let mut property_open = BytesStart::new(prop_qname.clone());
if let Some(prop_xmlns) = prop_xmlns {
property_open.push_attribute(prop_xmlns);
}
let content = match triple.object {
TermRef::NamedNode(node) => {
property_open.push_attribute(("rdf:resource", node.as_str()));
None
}
TermRef::BlankNode(node) => {
property_open.push_attribute(("rdf:nodeID", node.as_str()));
None
}
TermRef::Literal(literal) => {
if let Some(language) = literal.language() {
property_open.push_attribute(("xml:lang", language));
} else if !literal.is_plain() {
property_open.push_attribute(("rdf:datatype", literal.datatype().as_str()));
}
Some(literal.value())
}
_ => {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"RDF/XML only supports named, blank or literal object",
))
}
};
if let Some(content) = content {
output.push(Event::Start(property_open));
output.push(Event::Text(BytesText::new(content)));
output.push(Event::End(BytesEnd::new(prop_qname)));
} else {
output.push(Event::Empty(property_open));
}
Ok(())
}
fn write_start(&self, output: &mut Vec<Event<'_>>) {
output.push(Event::Decl(BytesDecl::new("1.0", Some("UTF-8"), None)));
let mut rdf_open = BytesStart::new("rdf:RDF");
for (prefix_value, prefix_name) in &self.prefixes {
rdf_open.push_attribute((
format!("xmlns:{prefix_name}").as_str(),
prefix_value.as_str(),
));
}
output.push(Event::Start(rdf_open))
}
fn finish(&mut self, output: &mut Vec<Event<'static>>) {
if self.current_subject.is_some() {
output.push(Event::End(
self.current_resource_tag
.take()
.map_or_else(|| BytesEnd::new("rdf:Description"), BytesEnd::new),
));
} else {
self.write_start(output);
}
output.push(Event::End(BytesEnd::new("rdf:RDF")));
}
fn uri_to_qname_and_xmlns<'a>(
&self,
uri: NamedNodeRef<'a>,
) -> (Cow<'a, str>, Option<(&'a str, &'a str)>) {
let (prop_prefix, prop_value) = split_iri(uri.as_str());
if let Some(prop_prefix) = self.prefixes.get(prop_prefix) {
(
if prop_prefix.is_empty() {
Cow::Borrowed(prop_value)
} else {
Cow::Owned(format!("{prop_prefix}:{prop_value}"))
},
None,
)
} else if prop_prefix == "http://www.w3.org/2000/xmlns/" {
(Cow::Owned(format!("xmlns:{prop_value}")), None)
} else if prop_value.is_empty() {
(Cow::Borrowed("p:"), Some(("xmlns:p", prop_prefix)))
} else {
(Cow::Borrowed(prop_value), Some(("xmlns", prop_prefix)))
}
}
}
fn map_err(error: quick_xml::Error) -> io::Error {
if let quick_xml::Error::Io(error) = error {
Arc::try_unwrap(error).unwrap_or_else(|error| io::Error::new(error.kind(), error))
} else {
io::Error::new(io::ErrorKind::Other, error)
}
}
fn split_iri(iri: &str) -> (&str, &str) {
if let Some(position_base) = iri.rfind(|c| !is_name_char(c) || c == ':') {
if let Some(position_add) = iri[position_base..].find(|c| is_name_start_char(c) && c != ':')
{
(
&iri[..position_base + position_add],
&iri[position_base + position_add..],
)
} else {
(iri, "")
}
} else {
(iri, "")
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_split_iri() {
assert_eq!(
split_iri("http://schema.org/Person"),
("http://schema.org/", "Person")
);
assert_eq!(split_iri("http://schema.org/"), ("http://schema.org/", ""));
assert_eq!(
split_iri("http://schema.org#foo"),
("http://schema.org#", "foo")
);
assert_eq!(split_iri("urn:isbn:foo"), ("urn:isbn:", "foo"));
}
}

@ -1,26 +0,0 @@
pub fn is_name_start_char(c: char) -> bool {
// ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
matches!(c,
':'
| 'A'..='Z'
| '_'
| 'a'..='z'
| '\u{00C0}'..='\u{00D6}'
| '\u{00D8}'..='\u{00F6}'
| '\u{00F8}'..='\u{02FF}'
| '\u{0370}'..='\u{037D}'
| '\u{037F}'..='\u{1FFF}'
| '\u{200C}'..='\u{200D}'
| '\u{2070}'..='\u{218F}'
| '\u{2C00}'..='\u{2FEF}'
| '\u{3001}'..='\u{D7FF}'
| '\u{F900}'..='\u{FDCF}'
| '\u{FDF0}'..='\u{FFFD}'
| '\u{10000}'..='\u{EFFFF}')
}
pub fn is_name_char(c: char) -> bool {
// NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
is_name_start_char(c)
|| matches!(c, '-' | '.' | '0'..='9' | '\u{B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}')
}

@ -1,32 +1,21 @@
[package]
name = "oxsdatatypes"
version = "0.2.0-alpha.1"
authors.workspace = true
license.workspace = true
version = "0.1.3"
authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license = "MIT OR Apache-2.0"
readme = "README.md"
keywords = ["XSD"]
repository = "https://github.com/oxigraph/oxigraph/tree/main/lib/oxsdatatypes"
homepage = "https://oxigraph.org/"
description = """
An implementation of some XSD datatypes for SPARQL implementations
"""
documentation = "https://docs.rs/oxsdatatypes"
edition.workspace = true
rust-version.workspace = true
[features]
js = ["js-sys"]
custom-now = []
[dependencies]
thiserror.workspace = true
serde.workspace = true
edition = "2021"
rust-version = "1.60"
[target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies]
js-sys = { workspace = true, optional = true }
[lints]
workspace = true
js-sys = "0.3"
[package.metadata.docs.rs]
all-features = true
rustdoc-args = ["--cfg", "docsrs"]

@ -5,7 +5,7 @@ oxsdatatypes
[![Released API docs](https://docs.rs/oxsdatatypes/badge.svg)](https://docs.rs/oxsdatatypes)
[![Crates.io downloads](https://img.shields.io/crates/d/oxsdatatypes)](https://crates.io/crates/oxsdatatypes)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
oxsdatatypes is an implementation of some [XML Schema Definition Language Datatypes](https://www.w3.org/TR/xmlschema11-2/).
Its main aim is to ease the implementation of SPARQL and XPath.
@ -32,22 +32,6 @@ Each datatype provides:
* `from_be_bytes` and `to_be_bytes` methods for serialization.
### `DateTime::now` behavior
The `DateTime::now()` function needs special OS support.
Currently:
- If the `custom-now` feature is enabled, a function computing `now` must be set:
```rust
use oxsdatatypes::Duration;
#[no_mangle]
fn custom_ox_now() -> Duration {
unimplemented!("now implementation")
}
```
- For `wasm32-unknown-unknown` if the `js` feature is enabled the `Date.now()` ECMAScript API is used.
- For all other targets `SystemTime::now()` is used.
## License
This project is licensed under either of

@ -1,14 +1,11 @@
use crate::{Decimal, Double, Float, Integer};
use serde::{Deserialize, Serialize};
use std::fmt;
use std::str::{FromStr, ParseBoolError};
/// [XML Schema `boolean` datatype](https://www.w3.org/TR/xmlschema11-2/#boolean)
///
/// Uses internally a [`bool`].
#[derive(
Debug, Clone, Copy, Default, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize,
)]
#[derive(Debug, Clone, Copy, Default, Eq, PartialEq, Ord, PartialOrd, Hash)]
#[repr(transparent)]
pub struct Boolean {
value: bool,
@ -17,8 +14,7 @@ pub struct Boolean {
impl Boolean {
/// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity).
#[inline]
#[must_use]
pub fn is_identical_with(self, other: Self) -> bool {
pub fn is_identical_with(&self, other: &Self) -> bool {
self == other
}
}
@ -69,7 +65,7 @@ impl FromStr for Boolean {
type Err = ParseBoolError;
#[inline]
fn from_str(input: &str) -> Result<Self, Self::Err> {
fn from_str(input: &str) -> Result<Self, ParseBoolError> {
Ok(match input {
"true" | "1" => true,
"false" | "0" => false,
@ -87,7 +83,6 @@ impl fmt::Display for Boolean {
}
#[cfg(test)]
#[allow(clippy::panic_in_result_fn)]
mod tests {
use super::*;

File diff suppressed because it is too large Load Diff

@ -1,7 +1,8 @@
use crate::{Boolean, Double, Float, Integer, TooLargeForIntegerError};
use serde::{Deserialize, Serialize};
use crate::{Boolean, Double, Float, Integer};
use std::error::Error;
use std::fmt;
use std::fmt::Write;
use std::ops::Neg;
use std::str::FromStr;
const DECIMAL_PART_DIGITS: u32 = 18;
@ -13,39 +14,26 @@ const DECIMAL_PART_POW_MINUS_ONE: i128 = 100_000_000_000_000_000;
/// It stores the decimal in a fix point encoding allowing nearly 18 digits before and 18 digits after ".".
///
/// It stores the value in a [`i128`] integer after multiplying it by 10¹⁸.
#[derive(
Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash, Default, Serialize, Deserialize,
)]
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash, Default)]
pub struct Decimal {
value: i128, // value * 10^18
}
impl Decimal {
pub const MAX: Self = Self { value: i128::MAX };
pub const MIN: Self = Self { value: i128::MIN };
#[cfg(test)]
pub const STEP: Self = Self { value: 1 };
/// Constructs the decimal i / 10^n
#[inline]
pub const fn new(i: i128, n: u32) -> Result<Self, TooLargeForDecimalError> {
let Some(shift) = DECIMAL_PART_DIGITS.checked_sub(n) else {
return Err(TooLargeForDecimalError);
};
let Some(value) = i.checked_mul(10_i128.pow(shift)) else {
return Err(TooLargeForDecimalError);
};
Ok(Self { value })
}
pub(crate) const fn new_from_i128_unchecked(value: i128) -> Self {
Self {
value: value * DECIMAL_PART_POW,
}
pub fn new(i: i128, n: u32) -> Result<Self, DecimalOverflowError> {
let shift = DECIMAL_PART_DIGITS
.checked_sub(n)
.ok_or(DecimalOverflowError)?;
Ok(Self {
value: i
.checked_mul(10_i128.pow(shift))
.ok_or(DecimalOverflowError)?,
})
}
#[inline]
#[must_use]
pub fn from_be_bytes(bytes: [u8; 16]) -> Self {
Self {
value: i128::from_be_bytes(bytes),
@ -53,39 +41,29 @@ impl Decimal {
}
#[inline]
#[must_use]
pub fn to_be_bytes(self) -> [u8; 16] {
self.value.to_be_bytes()
}
/// [op:numeric-add](https://www.w3.org/TR/xpath-functions-31/#func-numeric-add)
///
/// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
#[inline]
#[must_use]
pub fn checked_add(self, rhs: impl Into<Self>) -> Option<Self> {
pub fn checked_add(&self, rhs: impl Into<Self>) -> Option<Self> {
Some(Self {
value: self.value.checked_add(rhs.into().value)?,
})
}
/// [op:numeric-subtract](https://www.w3.org/TR/xpath-functions-31/#func-numeric-subtract)
///
/// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
#[inline]
#[must_use]
pub fn checked_sub(self, rhs: impl Into<Self>) -> Option<Self> {
pub fn checked_sub(&self, rhs: impl Into<Self>) -> Option<Self> {
Some(Self {
value: self.value.checked_sub(rhs.into().value)?,
})
}
/// [op:numeric-multiply](https://www.w3.org/TR/xpath-functions-31/#func-numeric-multiply)
///
/// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
#[inline]
#[must_use]
pub fn checked_mul(self, rhs: impl Into<Self>) -> Option<Self> {
pub fn checked_mul(&self, rhs: impl Into<Self>) -> Option<Self> {
// Idea: we shift right as much as possible to keep as much precision as possible
// Do the multiplication and do the required left shift
let mut left = self.value;
@ -116,11 +94,8 @@ impl Decimal {
}
/// [op:numeric-divide](https://www.w3.org/TR/xpath-functions-31/#func-numeric-divide)
///
/// Returns `None` in case of division by 0 ([FOAR0001](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0001)) or overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
#[inline]
#[must_use]
pub fn checked_div(self, rhs: impl Into<Self>) -> Option<Self> {
pub fn checked_div(&self, rhs: impl Into<Self>) -> Option<Self> {
// Idea: we shift the dividend left as much as possible to keep as much precision as possible
// And we shift right the divisor as much as possible
// Do the multiplication and do the required shift
@ -128,6 +103,7 @@ impl Decimal {
let mut shift_left = 0_u32;
if left != 0 {
while let Some(r) = left.checked_mul(10) {
assert_eq!(r / 10, left);
left = r;
shift_left += 1;
}
@ -151,122 +127,100 @@ impl Decimal {
}
/// [op:numeric-mod](https://www.w3.org/TR/xpath-functions-31/#func-numeric-mod)
///
/// Returns `None` in case of division by 0 ([FOAR0001](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0001)) or overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
#[inline]
#[must_use]
pub fn checked_rem(self, rhs: impl Into<Self>) -> Option<Self> {
pub fn checked_rem(&self, rhs: impl Into<Self>) -> Option<Self> {
Some(Self {
value: self.value.checked_rem(rhs.into().value)?,
})
}
/// Euclidean remainder
///
/// Returns `None` in case of division by 0 ([FOAR0001](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0001)) or overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
#[inline]
#[must_use]
pub fn checked_rem_euclid(self, rhs: impl Into<Self>) -> Option<Self> {
pub fn checked_rem_euclid(&self, rhs: impl Into<Self>) -> Option<Self> {
Some(Self {
value: self.value.checked_rem_euclid(rhs.into().value)?,
})
}
/// [op:numeric-unary-minus](https://www.w3.org/TR/xpath-functions-31/#func-numeric-unary-minus)
///
/// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
#[inline]
#[must_use]
pub fn checked_neg(self) -> Option<Self> {
pub fn checked_neg(&self) -> Option<Self> {
Some(Self {
value: self.value.checked_neg()?,
})
}
/// [fn:abs](https://www.w3.org/TR/xpath-functions-31/#func-abs)
///
/// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
#[inline]
#[must_use]
pub fn checked_abs(self) -> Option<Self> {
Some(Self {
value: self.value.checked_abs()?,
})
pub const fn abs(&self) -> Self {
Self {
value: self.value.abs(),
}
}
/// [fn:round](https://www.w3.org/TR/xpath-functions-31/#func-round)
///
/// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
#[inline]
#[must_use]
pub fn checked_round(self) -> Option<Self> {
pub fn round(&self) -> Self {
let value = self.value / DECIMAL_PART_POW_MINUS_ONE;
Some(Self {
Self {
value: if value >= 0 {
value / 10 + i128::from(value % 10 >= 5)
(value / 10 + i128::from(value % 10 >= 5)) * DECIMAL_PART_POW
} else {
value / 10 - i128::from(-value % 10 > 5)
(value / 10 - i128::from(-value % 10 > 5)) * DECIMAL_PART_POW
},
}
.checked_mul(DECIMAL_PART_POW)?,
})
}
/// [fn:ceiling](https://www.w3.org/TR/xpath-functions-31/#func-ceiling)
///
/// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
#[inline]
#[must_use]
pub fn checked_ceil(self) -> Option<Self> {
Some(Self {
value: if self.value > 0 && self.value % DECIMAL_PART_POW != 0 {
self.value / DECIMAL_PART_POW + 1
pub fn ceil(&self) -> Self {
Self {
value: if self.value >= 0 && self.value % DECIMAL_PART_POW != 0 {
(self.value / DECIMAL_PART_POW + 1) * DECIMAL_PART_POW
} else {
self.value / DECIMAL_PART_POW
(self.value / DECIMAL_PART_POW) * DECIMAL_PART_POW
},
}
.checked_mul(DECIMAL_PART_POW)?,
})
}
/// [fn:floor](https://www.w3.org/TR/xpath-functions-31/#func-floor)
///
/// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
#[inline]
#[must_use]
pub fn checked_floor(self) -> Option<Self> {
Some(Self {
pub fn floor(&self) -> Self {
Self {
value: if self.value >= 0 || self.value % DECIMAL_PART_POW == 0 {
self.value / DECIMAL_PART_POW
(self.value / DECIMAL_PART_POW) * DECIMAL_PART_POW
} else {
self.value / DECIMAL_PART_POW - 1
(self.value / DECIMAL_PART_POW - 1) * DECIMAL_PART_POW
},
}
.checked_mul(DECIMAL_PART_POW)?,
})
}
#[inline]
#[must_use]
pub const fn is_negative(self) -> bool {
pub const fn is_negative(&self) -> bool {
self.value < 0
}
#[inline]
#[must_use]
pub const fn is_positive(self) -> bool {
pub const fn is_positive(&self) -> bool {
self.value > 0
}
/// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity).
#[inline]
#[must_use]
pub fn is_identical_with(self, other: Self) -> bool {
pub fn is_identical_with(&self, other: &Self) -> bool {
self == other
}
#[inline]
#[must_use]
pub(super) const fn as_i128(self) -> i128 {
pub(super) const fn as_i128(&self) -> i128 {
self.value / DECIMAL_PART_POW
}
pub const MIN: Self = Self { value: i128::MIN };
pub const MAX: Self = Self { value: i128::MAX };
#[cfg(test)]
pub const STEP: Self = Self { value: 1 };
}
impl From<bool> for Decimal {
@ -358,28 +312,28 @@ impl From<Integer> for Decimal {
}
impl TryFrom<i128> for Decimal {
type Error = TooLargeForDecimalError;
type Error = DecimalOverflowError;
#[inline]
fn try_from(value: i128) -> Result<Self, Self::Error> {
fn try_from(value: i128) -> Result<Self, DecimalOverflowError> {
Ok(Self {
value: value
.checked_mul(DECIMAL_PART_POW)
.ok_or(TooLargeForDecimalError)?,
.ok_or(DecimalOverflowError)?,
})
}
}
impl TryFrom<u128> for Decimal {
type Error = TooLargeForDecimalError;
type Error = DecimalOverflowError;
#[inline]
fn try_from(value: u128) -> Result<Self, Self::Error> {
fn try_from(value: u128) -> Result<Self, DecimalOverflowError> {
Ok(Self {
value: i128::try_from(value)
.map_err(|_| TooLargeForDecimalError)?
.map_err(|_| DecimalOverflowError)?
.checked_mul(DECIMAL_PART_POW)
.ok_or(TooLargeForDecimalError)?,
.ok_or(DecimalOverflowError)?,
})
}
}
@ -392,27 +346,27 @@ impl From<Boolean> for Decimal {
}
impl TryFrom<Float> for Decimal {
type Error = TooLargeForDecimalError;
type Error = DecimalOverflowError;
#[inline]
fn try_from(value: Float) -> Result<Self, Self::Error> {
fn try_from(value: Float) -> Result<Self, DecimalOverflowError> {
Double::from(value).try_into()
}
}
impl TryFrom<Double> for Decimal {
type Error = TooLargeForDecimalError;
type Error = DecimalOverflowError;
#[inline]
#[allow(clippy::cast_precision_loss, clippy::cast_possible_truncation)]
fn try_from(value: Double) -> Result<Self, Self::Error> {
fn try_from(value: Double) -> Result<Self, DecimalOverflowError> {
let shifted = f64::from(value) * (DECIMAL_PART_POW as f64);
if (i128::MIN as f64) <= shifted && shifted <= (i128::MAX as f64) {
if shifted.is_finite() && (i128::MIN as f64) <= shifted && shifted <= (i128::MAX as f64) {
Ok(Self {
value: shifted as i128,
})
} else {
Err(TooLargeForDecimalError)
Err(DecimalOverflowError)
}
}
}
@ -445,17 +399,17 @@ impl From<Decimal> for Double {
}
impl TryFrom<Decimal> for Integer {
type Error = TooLargeForIntegerError;
type Error = DecimalOverflowError;
#[inline]
fn try_from(value: Decimal) -> Result<Self, Self::Error> {
fn try_from(value: Decimal) -> Result<Self, DecimalOverflowError> {
Ok(i64::try_from(
value
.value
.checked_div(DECIMAL_PART_POW)
.ok_or(TooLargeForIntegerError)?,
.ok_or(DecimalOverflowError)?,
)
.map_err(|_| TooLargeForIntegerError)?
.map_err(|_| DecimalOverflowError)?
.into())
}
}
@ -464,7 +418,7 @@ impl FromStr for Decimal {
type Err = ParseDecimalError;
/// Parses decimals lexical mapping
fn from_str(input: &str) -> Result<Self, Self::Err> {
fn from_str(input: &str) -> Result<Self, ParseDecimalError> {
// (\+|-)?([0-9]+(\.[0-9]*)?|\.[0-9]+)
let input = input.as_bytes();
if input.is_empty() {
@ -499,7 +453,7 @@ impl FromStr for Decimal {
}
input = &input[1..];
if input.is_empty() && !with_before_dot {
// We only have a dot
//We only have a dot
return Err(PARSE_UNEXPECTED_END);
}
while input.last() == Some(&b'0') {
@ -520,11 +474,11 @@ impl FromStr for Decimal {
}
}
if exp == 0 {
// Underflow
//Underflow
return Err(PARSE_UNDERFLOW);
}
} else if !with_before_dot {
// It's empty
//It's empty
return Err(PARSE_UNEXPECTED_END);
}
@ -570,7 +524,7 @@ impl fmt::Display for Decimal {
.find_map(|(i, v)| if v == b'0' { None } else { Some(i) })
.unwrap_or(40);
let decimal_part_digits = usize::try_from(DECIMAL_PART_DIGITS).map_err(|_| fmt::Error)?;
let decimal_part_digits = usize::try_from(DECIMAL_PART_DIGITS).unwrap();
if last_non_zero >= decimal_part_digits {
let end = if let Some(mut width) = f.width() {
if self.value.is_negative() {
@ -610,45 +564,78 @@ impl fmt::Display for Decimal {
}
}
impl Neg for Decimal {
type Output = Self;
#[inline]
fn neg(self) -> Self {
Self {
value: self.value.neg(),
}
}
}
/// An error when parsing a [`Decimal`].
#[derive(Debug, thiserror::Error)]
#[error(transparent)]
pub struct ParseDecimalError(#[from] DecimalParseErrorKind);
#[derive(Debug, Clone)]
pub struct ParseDecimalError {
kind: DecimalParseErrorKind,
}
#[derive(Debug, Clone, thiserror::Error)]
#[derive(Debug, Clone)]
enum DecimalParseErrorKind {
#[error("Value overflow")]
Overflow,
#[error("Value underflow")]
Underflow,
#[error("Unexpected character")]
UnexpectedChar,
#[error("Unexpected end of string")]
UnexpectedEnd,
}
const PARSE_OVERFLOW: ParseDecimalError = ParseDecimalError(DecimalParseErrorKind::Overflow);
const PARSE_UNDERFLOW: ParseDecimalError = ParseDecimalError(DecimalParseErrorKind::Underflow);
const PARSE_UNEXPECTED_CHAR: ParseDecimalError =
ParseDecimalError(DecimalParseErrorKind::UnexpectedChar);
const PARSE_UNEXPECTED_END: ParseDecimalError =
ParseDecimalError(DecimalParseErrorKind::UnexpectedEnd);
const PARSE_OVERFLOW: ParseDecimalError = ParseDecimalError {
kind: DecimalParseErrorKind::Overflow,
};
const PARSE_UNDERFLOW: ParseDecimalError = ParseDecimalError {
kind: DecimalParseErrorKind::Underflow,
};
const PARSE_UNEXPECTED_CHAR: ParseDecimalError = ParseDecimalError {
kind: DecimalParseErrorKind::UnexpectedChar,
};
const PARSE_UNEXPECTED_END: ParseDecimalError = ParseDecimalError {
kind: DecimalParseErrorKind::UnexpectedEnd,
};
impl fmt::Display for ParseDecimalError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.kind {
DecimalParseErrorKind::Overflow => write!(f, "Value overflow"),
DecimalParseErrorKind::Underflow => write!(f, "Value underflow"),
DecimalParseErrorKind::UnexpectedChar => write!(f, "Unexpected character"),
DecimalParseErrorKind::UnexpectedEnd => write!(f, "Unexpected end of string"),
}
}
}
impl Error for ParseDecimalError {}
impl From<TooLargeForDecimalError> for ParseDecimalError {
fn from(_: TooLargeForDecimalError) -> Self {
Self(DecimalParseErrorKind::Overflow)
impl From<DecimalOverflowError> for ParseDecimalError {
fn from(_: DecimalOverflowError) -> Self {
Self {
kind: DecimalParseErrorKind::Overflow,
}
}
}
/// The input is too large to fit into a [`Decimal`].
///
/// Matches XPath [`FOCA0001` error](https://www.w3.org/TR/xpath-functions-31/#ERRFOCA0001).
#[derive(Debug, Clone, Copy, thiserror::Error)]
#[error("Value too large for xsd:decimal internal representation")]
pub struct TooLargeForDecimalError;
/// An overflow in [`Decimal`] computations.
#[derive(Debug, Clone, Copy)]
pub struct DecimalOverflowError;
impl fmt::Display for DecimalOverflowError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Value overflow")
}
}
impl Error for DecimalOverflowError {}
#[cfg(test)]
#[allow(clippy::panic_in_result_fn)]
mod tests {
use super::*;
@ -664,14 +651,14 @@ mod tests {
#[test]
fn from_str() -> Result<(), ParseDecimalError> {
Decimal::from_str("").unwrap_err();
Decimal::from_str("+").unwrap_err();
Decimal::from_str("-").unwrap_err();
Decimal::from_str(".").unwrap_err();
Decimal::from_str("+.").unwrap_err();
Decimal::from_str("-.").unwrap_err();
Decimal::from_str("a").unwrap_err();
Decimal::from_str(".a").unwrap_err();
assert!(Decimal::from_str("").is_err());
assert!(Decimal::from_str("+").is_err());
assert!(Decimal::from_str("-").is_err());
assert!(Decimal::from_str(".").is_err());
assert!(Decimal::from_str("+.").is_err());
assert!(Decimal::from_str("-.").is_err());
assert!(Decimal::from_str("a").is_err());
assert!(Decimal::from_str(".a").is_err());
assert_eq!(Decimal::from_str("210")?.to_string(), "210");
assert_eq!(Decimal::from_str("1000")?.to_string(), "1000");
assert_eq!(Decimal::from_str("-1.23")?.to_string(), "-1.23");
@ -689,8 +676,8 @@ mod tests {
assert_eq!(Decimal::from_str("-0")?.to_string(), "0");
assert_eq!(Decimal::from_str(&Decimal::MAX.to_string())?, Decimal::MAX);
assert_eq!(Decimal::from_str(&Decimal::MIN.to_string())?, Decimal::MIN);
Decimal::from_str("0.0000000000000000001").unwrap_err();
Decimal::from_str("1000000000000000000000").unwrap_err();
assert!(Decimal::from_str("0.0000000000000000001").is_err());
assert!(Decimal::from_str("1000000000000000000000").is_err());
assert_eq!(
Decimal::from_str("0.100000000000000000000000000").unwrap(),
Decimal::from_str("0.1").unwrap()
@ -805,156 +792,45 @@ mod tests {
Some(Decimal::from_str("0.9")?)
);
assert_eq!(Decimal::from(1).checked_rem(0), None);
assert_eq!(
Decimal::MAX.checked_rem(1),
Some(Decimal::from_str("0.687303715884105727")?)
);
assert_eq!(
Decimal::MIN.checked_rem(1),
Some(Decimal::from_str("-0.687303715884105728")?)
);
assert_eq!(
Decimal::MAX.checked_rem(Decimal::STEP),
Some(Decimal::default())
);
assert_eq!(
Decimal::MIN.checked_rem(Decimal::STEP),
Some(Decimal::default())
);
assert_eq!(
Decimal::MAX.checked_rem(Decimal::MAX),
Some(Decimal::default())
);
assert_eq!(
Decimal::MIN.checked_rem(Decimal::MIN),
Some(Decimal::default())
);
Ok(())
}
#[test]
fn round() -> Result<(), ParseDecimalError> {
assert_eq!(Decimal::from(10).checked_round(), Some(Decimal::from(10)));
assert_eq!(Decimal::from(-10).checked_round(), Some(Decimal::from(-10)));
assert_eq!(
Decimal::from(i64::MIN).checked_round(),
Some(Decimal::from(i64::MIN))
);
assert_eq!(
Decimal::from(i64::MAX).checked_round(),
Some(Decimal::from(i64::MAX))
);
assert_eq!(
Decimal::from_str("2.5")?.checked_round(),
Some(Decimal::from(3))
);
assert_eq!(
Decimal::from_str("2.4999")?.checked_round(),
Some(Decimal::from(2))
);
assert_eq!(
Decimal::from_str("-2.5")?.checked_round(),
Some(Decimal::from(-2))
);
assert_eq!(Decimal::MAX.checked_round(), None);
assert_eq!(
Decimal::MAX
.checked_sub(Decimal::from_str("0.5")?)
.unwrap()
.checked_round(),
Some(Decimal::from_str("170141183460469231731")?)
);
assert_eq!(Decimal::MIN.checked_round(), None);
assert_eq!(
Decimal::MIN
.checked_add(Decimal::from_str("0.5")?)
.unwrap()
.checked_round(),
Some(Decimal::from_str("-170141183460469231731")?)
);
assert_eq!(Decimal::from(10).round(), Decimal::from(10));
assert_eq!(Decimal::from(-10).round(), Decimal::from(-10));
assert_eq!(Decimal::from_str("2.5")?.round(), Decimal::from(3));
assert_eq!(Decimal::from_str("2.4999")?.round(), Decimal::from(2));
assert_eq!(Decimal::from_str("-2.5")?.round(), Decimal::from(-2));
assert_eq!(Decimal::from(i64::MIN).round(), Decimal::from(i64::MIN));
assert_eq!(Decimal::from(i64::MAX).round(), Decimal::from(i64::MAX));
Ok(())
}
#[test]
fn ceil() -> Result<(), ParseDecimalError> {
assert_eq!(Decimal::from(10).checked_ceil(), Some(Decimal::from(10)));
assert_eq!(Decimal::from(-10).checked_ceil(), Some(Decimal::from(-10)));
assert_eq!(
Decimal::from_str("10.5")?.checked_ceil(),
Some(Decimal::from(11))
);
assert_eq!(
Decimal::from_str("-10.5")?.checked_ceil(),
Some(Decimal::from(-10))
);
assert_eq!(
Decimal::from(i64::MIN).checked_ceil(),
Some(Decimal::from(i64::MIN))
);
assert_eq!(
Decimal::from(i64::MAX).checked_ceil(),
Some(Decimal::from(i64::MAX))
);
assert_eq!(Decimal::MAX.checked_ceil(), None);
assert_eq!(
Decimal::MAX
.checked_sub(Decimal::from(1))
.unwrap()
.checked_ceil(),
Some(Decimal::from_str("170141183460469231731")?)
);
assert_eq!(
Decimal::MIN.checked_ceil(),
Some(Decimal::from_str("-170141183460469231731")?)
);
assert_eq!(Decimal::from(10).ceil(), Decimal::from(10));
assert_eq!(Decimal::from(-10).ceil(), Decimal::from(-10));
assert_eq!(Decimal::from_str("10.5")?.ceil(), Decimal::from(11));
assert_eq!(Decimal::from_str("-10.5")?.ceil(), Decimal::from(-10));
assert_eq!(Decimal::from(i64::MIN).ceil(), Decimal::from(i64::MIN));
assert_eq!(Decimal::from(i64::MAX).ceil(), Decimal::from(i64::MAX));
Ok(())
}
#[test]
fn floor() -> Result<(), ParseDecimalError> {
assert_eq!(Decimal::from(10).checked_floor(), Some(Decimal::from(10)));
assert_eq!(Decimal::from(-10).checked_floor(), Some(Decimal::from(-10)));
assert_eq!(
Decimal::from_str("10.5")?.checked_floor(),
Some(Decimal::from(10))
);
assert_eq!(
Decimal::from_str("-10.5")?.checked_floor(),
Some(Decimal::from(-11))
);
assert_eq!(
Decimal::from(i64::MIN).checked_floor(),
Some(Decimal::from(i64::MIN))
);
assert_eq!(
Decimal::from(i64::MAX).checked_floor(),
Some(Decimal::from(i64::MAX))
);
assert_eq!(
Decimal::MAX.checked_floor(),
Some(Decimal::from_str("170141183460469231731")?)
);
assert_eq!(Decimal::MIN.checked_floor(), None);
assert_eq!(
Decimal::MIN
.checked_add(Decimal::from_str("1")?)
.unwrap()
.checked_floor(),
Some(Decimal::from_str("-170141183460469231731")?)
);
assert_eq!(Decimal::from(10).ceil(), Decimal::from(10));
assert_eq!(Decimal::from(-10).ceil(), Decimal::from(-10));
assert_eq!(Decimal::from_str("10.5")?.floor(), Decimal::from(10));
assert_eq!(Decimal::from_str("-10.5")?.floor(), Decimal::from(-11));
assert_eq!(Decimal::from(i64::MIN).floor(), Decimal::from(i64::MIN));
assert_eq!(Decimal::from(i64::MAX).floor(), Decimal::from(i64::MAX));
Ok(())
}
#[test]
fn to_be_bytes() -> Result<(), ParseDecimalError> {
assert_eq!(
Decimal::from_be_bytes(Decimal::MIN.to_be_bytes()),
Decimal::MIN
);
assert_eq!(
Decimal::from_be_bytes(Decimal::MAX.to_be_bytes()),
Decimal::MAX
);
assert_eq!(
Decimal::from_be_bytes(Decimal::from(i64::MIN).to_be_bytes()),
Decimal::from(i64::MIN)
@ -998,18 +874,17 @@ mod tests {
Decimal::try_from(Float::from(-123.5)).ok(),
Some(Decimal::from_str("-123.5")?)
);
Decimal::try_from(Float::from(f32::NAN)).unwrap_err();
Decimal::try_from(Float::from(f32::INFINITY)).unwrap_err();
Decimal::try_from(Float::from(f32::NEG_INFINITY)).unwrap_err();
Decimal::try_from(Float::from(f32::MIN)).unwrap_err();
Decimal::try_from(Float::from(f32::MAX)).unwrap_err();
assert!(Decimal::try_from(Float::from(f32::NAN)).is_err());
assert!(Decimal::try_from(Float::from(f32::INFINITY)).is_err());
assert!(Decimal::try_from(Float::from(f32::NEG_INFINITY)).is_err());
assert!(Decimal::try_from(Float::from(f32::MIN)).is_err());
assert!(Decimal::try_from(Float::from(f32::MAX)).is_err());
assert!(
Decimal::try_from(Float::from(1_672_507_300_000.))
.unwrap()
.checked_sub(Decimal::from(1_672_507_293_696_i64))
.unwrap()
.checked_abs()
.unwrap()
.abs()
< Decimal::from(1)
);
Ok(())
@ -1034,15 +909,14 @@ mod tests {
.unwrap()
.checked_sub(Decimal::from(1_672_507_302_466_i64))
.unwrap()
.checked_abs()
.unwrap()
.abs()
< Decimal::from(1)
);
Decimal::try_from(Double::from(f64::NAN)).unwrap_err();
Decimal::try_from(Double::from(f64::INFINITY)).unwrap_err();
Decimal::try_from(Double::from(f64::NEG_INFINITY)).unwrap_err();
Decimal::try_from(Double::from(f64::MIN)).unwrap_err();
Decimal::try_from(Double::from(f64::MAX)).unwrap_err();
assert!(Decimal::try_from(Double::from(f64::NAN)).is_err());
assert!(Decimal::try_from(Double::from(f64::INFINITY)).is_err());
assert!(Decimal::try_from(Double::from(f64::NEG_INFINITY)).is_err());
assert!(Decimal::try_from(Double::from(f64::MIN)).is_err());
assert!(Decimal::try_from(Double::from(f64::MAX)).is_err());
Ok(())
}

@ -1,5 +1,4 @@
use crate::{Boolean, Float, Integer};
use serde::{Deserialize, Serialize};
use std::cmp::Ordering;
use std::fmt;
use std::num::ParseFloatError;
@ -10,26 +9,15 @@ use std::str::FromStr;
///
/// Uses internally a [`f64`].
///
/// <div class="warning">Serialization does not follow the canonical mapping.</div>
#[derive(Debug, Clone, Copy, Default, PartialEq, Serialize, Deserialize)]
/// Beware: serialization is currently buggy and do not follow the canonical mapping yet.
#[derive(Debug, Clone, Copy, Default, PartialEq)]
#[repr(transparent)]
pub struct Double {
value: f64,
}
impl Double {
pub const INFINITY: Self = Self {
value: f64::INFINITY,
};
pub const MAX: Self = Self { value: f64::MAX };
pub const MIN: Self = Self { value: f64::MIN };
pub const NAN: Self = Self { value: f64::NAN };
pub const NEG_INFINITY: Self = Self {
value: f64::NEG_INFINITY,
};
#[inline]
#[must_use]
pub fn from_be_bytes(bytes: [u8; 8]) -> Self {
Self {
value: f64::from_be_bytes(bytes),
@ -37,57 +25,69 @@ impl Double {
}
#[inline]
#[must_use]
pub fn to_be_bytes(self) -> [u8; 8] {
self.value.to_be_bytes()
}
/// [fn:abs](https://www.w3.org/TR/xpath-functions-31/#func-abs)
#[inline]
#[must_use]
pub fn abs(self) -> Self {
self.value.abs().into()
}
/// [fn:ceiling](https://www.w3.org/TR/xpath-functions-31/#func-ceiling)
#[inline]
#[must_use]
pub fn ceil(self) -> Self {
self.value.ceil().into()
}
/// [fn:floor](https://www.w3.org/TR/xpath-functions-31/#func-floor)
#[inline]
#[must_use]
pub fn floor(self) -> Self {
self.value.floor().into()
}
/// [fn:round](https://www.w3.org/TR/xpath-functions-31/#func-round)
#[inline]
#[must_use]
pub fn round(self) -> Self {
self.value.round().into()
}
#[inline]
#[must_use]
pub fn is_nan(self) -> bool {
self.value.is_nan()
}
#[deprecated(note = "Use .is_nan()")]
#[inline]
pub fn is_naan(self) -> bool {
self.value.is_nan()
}
#[inline]
#[must_use]
pub fn is_finite(self) -> bool {
self.value.is_finite()
}
/// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity).
#[inline]
#[must_use]
pub fn is_identical_with(self, other: Self) -> bool {
self.value.to_bits() == other.value.to_bits()
pub fn is_identical_with(&self, other: &Self) -> bool {
self.value.to_ne_bytes() == other.value.to_ne_bytes()
}
pub const MIN: Self = Self { value: f64::MIN };
pub const MAX: Self = Self { value: f64::MAX };
pub const INFINITY: Self = Self {
value: f64::INFINITY,
};
pub const NEG_INFINITY: Self = Self {
value: f64::NEG_INFINITY,
};
pub const NAN: Self = Self { value: f64::NAN };
}
impl From<Double> for f64 {
@ -170,7 +170,7 @@ impl From<Float> for Double {
impl From<Boolean> for Double {
#[inline]
fn from(value: Boolean) -> Self {
f64::from(bool::from(value)).into()
if bool::from(value) { 1. } else { 0. }.into()
}
}
@ -186,7 +186,7 @@ impl FromStr for Double {
type Err = ParseFloatError;
#[inline]
fn from_str(input: &str) -> Result<Self, Self::Err> {
fn from_str(input: &str) -> Result<Self, ParseFloatError> {
Ok(f64::from_str(input)?.into())
}
}
@ -257,7 +257,6 @@ impl Div for Double {
}
#[cfg(test)]
#[allow(clippy::panic_in_result_fn)]
mod tests {
use super::*;
@ -292,9 +291,9 @@ mod tests {
#[test]
fn is_identical_with() {
assert!(Double::from(0.).is_identical_with(Double::from(0.)));
assert!(Double::NAN.is_identical_with(Double::NAN));
assert!(!Double::from(-0.).is_identical_with(Double::from(0.)));
assert!(Double::from(0.).is_identical_with(&Double::from(0.)));
assert!(Double::NAN.is_identical_with(&Double::NAN));
assert!(!Double::from(-0.).is_identical_with(&Double::from(0.)));
}
#[test]

File diff suppressed because it is too large Load Diff

@ -1,5 +1,4 @@
use crate::{Boolean, Double, Integer};
use serde::{Deserialize, Serialize};
use std::cmp::Ordering;
use std::fmt;
use std::num::ParseFloatError;
@ -10,26 +9,15 @@ use std::str::FromStr;
///
/// Uses internally a [`f32`].
///
/// <div class="warning">Serialization does not follow the canonical mapping.</div>
#[derive(Debug, Clone, Copy, Default, PartialEq, Serialize, Deserialize)]
/// Beware: serialization is currently buggy and do not follow the canonical mapping yet.
#[derive(Debug, Clone, Copy, Default, PartialEq)]
#[repr(transparent)]
pub struct Float {
value: f32,
}
impl Float {
pub const INFINITY: Self = Self {
value: f32::INFINITY,
};
pub const MAX: Self = Self { value: f32::MAX };
pub const MIN: Self = Self { value: f32::MIN };
pub const NAN: Self = Self { value: f32::NAN };
pub const NEG_INFINITY: Self = Self {
value: f32::NEG_INFINITY,
};
#[inline]
#[must_use]
pub fn from_be_bytes(bytes: [u8; 4]) -> Self {
Self {
value: f32::from_be_bytes(bytes),
@ -37,57 +25,69 @@ impl Float {
}
#[inline]
#[must_use]
pub fn to_be_bytes(self) -> [u8; 4] {
self.value.to_be_bytes()
}
/// [fn:abs](https://www.w3.org/TR/xpath-functions-31/#func-abs)
#[inline]
#[must_use]
pub fn abs(self) -> Self {
self.value.abs().into()
}
/// [fn:ceiling](https://www.w3.org/TR/xpath-functions-31/#func-ceiling)
#[inline]
#[must_use]
pub fn ceil(self) -> Self {
self.value.ceil().into()
}
/// [fn:floor](https://www.w3.org/TR/xpath-functions-31/#func-floor)
#[inline]
#[must_use]
pub fn floor(self) -> Self {
self.value.floor().into()
}
/// [fn:round](https://www.w3.org/TR/xpath-functions-31/#func-round)
#[inline]
#[must_use]
pub fn round(self) -> Self {
self.value.round().into()
}
#[deprecated(note = "Use .is_nan()")]
#[inline]
pub fn is_naan(self) -> bool {
self.value.is_nan()
}
#[inline]
#[must_use]
pub fn is_nan(self) -> bool {
self.value.is_nan()
}
#[inline]
#[must_use]
pub fn is_finite(self) -> bool {
self.value.is_finite()
}
/// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity).
#[inline]
#[must_use]
pub fn is_identical_with(self, other: Self) -> bool {
self.value.to_bits() == other.value.to_bits()
pub fn is_identical_with(&self, other: &Self) -> bool {
self.value.to_ne_bytes() == other.value.to_ne_bytes()
}
pub const MIN: Self = Self { value: f32::MIN };
pub const MAX: Self = Self { value: f32::MAX };
pub const INFINITY: Self = Self {
value: f32::INFINITY,
};
pub const NEG_INFINITY: Self = Self {
value: f32::NEG_INFINITY,
};
pub const NAN: Self = Self { value: f32::NAN };
}
impl From<Float> for f32 {
@ -150,7 +150,7 @@ impl From<u16> for Float {
impl From<Boolean> for Float {
#[inline]
fn from(value: Boolean) -> Self {
f32::from(bool::from(value)).into()
if bool::from(value) { 1. } else { 0. }.into()
}
}
@ -176,7 +176,7 @@ impl FromStr for Float {
type Err = ParseFloatError;
#[inline]
fn from_str(input: &str) -> Result<Self, Self::Err> {
fn from_str(input: &str) -> Result<Self, ParseFloatError> {
Ok(f32::from_str(input)?.into())
}
}
@ -247,7 +247,6 @@ impl Div for Float {
}
#[cfg(test)]
#[allow(clippy::panic_in_result_fn)]
mod tests {
use super::*;
@ -282,9 +281,9 @@ mod tests {
#[test]
fn is_identical_with() {
assert!(Float::from(0.).is_identical_with(Float::from(0.)));
assert!(Float::NAN.is_identical_with(Float::NAN));
assert!(!Float::from(-0.).is_identical_with(Float::from(0.)));
assert!(Float::from(0.).is_identical_with(&Float::from(0.)));
assert!(Float::NAN.is_identical_with(&Float::NAN));
assert!(!Float::from(-0.).is_identical_with(&Float::from(0.)));
}
#[test]

@ -1,26 +1,20 @@
use crate::{Boolean, Decimal, Double, Float};
use serde::{Deserialize, Serialize};
use crate::{Boolean, Decimal, DecimalOverflowError, Double, Float};
use std::fmt;
use std::num::ParseIntError;
use std::ops::Neg;
use std::str::FromStr;
/// [XML Schema `integer` datatype](https://www.w3.org/TR/xmlschema11-2/#integer)
///
/// Uses internally a [`i64`].
#[derive(
Debug, Clone, Copy, Default, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize,
)]
#[derive(Debug, Clone, Copy, Default, Eq, PartialEq, Ord, PartialOrd, Hash)]
#[repr(transparent)]
pub struct Integer {
value: i64,
}
impl Integer {
pub const MAX: Self = Self { value: i64::MAX };
pub const MIN: Self = Self { value: i64::MIN };
#[inline]
#[must_use]
pub fn from_be_bytes(bytes: [u8; 8]) -> Self {
Self {
value: i64::from_be_bytes(bytes),
@ -28,117 +22,92 @@ impl Integer {
}
#[inline]
#[must_use]
pub fn to_be_bytes(self) -> [u8; 8] {
self.value.to_be_bytes()
}
/// [op:numeric-add](https://www.w3.org/TR/xpath-functions-31/#func-numeric-add)
///
/// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
#[inline]
#[must_use]
pub fn checked_add(self, rhs: impl Into<Self>) -> Option<Self> {
pub fn checked_add(&self, rhs: impl Into<Self>) -> Option<Self> {
Some(Self {
value: self.value.checked_add(rhs.into().value)?,
})
}
/// [op:numeric-subtract](https://www.w3.org/TR/xpath-functions-31/#func-numeric-subtract)
///
/// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
#[inline]
#[must_use]
pub fn checked_sub(self, rhs: impl Into<Self>) -> Option<Self> {
pub fn checked_sub(&self, rhs: impl Into<Self>) -> Option<Self> {
Some(Self {
value: self.value.checked_sub(rhs.into().value)?,
})
}
/// [op:numeric-multiply](https://www.w3.org/TR/xpath-functions-31/#func-numeric-multiply)
///
/// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
#[inline]
#[must_use]
pub fn checked_mul(self, rhs: impl Into<Self>) -> Option<Self> {
pub fn checked_mul(&self, rhs: impl Into<Self>) -> Option<Self> {
Some(Self {
value: self.value.checked_mul(rhs.into().value)?,
})
}
/// [op:numeric-integer-divide](https://www.w3.org/TR/xpath-functions-31/#func-numeric-integer-divide)
///
/// Returns `None` in case of division by 0 ([FOAR0001](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0001)) or overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
/// [op:numeric-divide](https://www.w3.org/TR/xpath-functions-31/#func-numeric-divide)
#[inline]
#[must_use]
pub fn checked_div(self, rhs: impl Into<Self>) -> Option<Self> {
pub fn checked_div(&self, rhs: impl Into<Self>) -> Option<Self> {
Some(Self {
value: self.value.checked_div(rhs.into().value)?,
})
}
/// [op:numeric-mod](https://www.w3.org/TR/xpath-functions-31/#func-numeric-mod)
///
/// Returns `None` in case of division by 0 ([FOAR0001](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0001)) or overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
#[inline]
#[must_use]
pub fn checked_rem(self, rhs: impl Into<Self>) -> Option<Self> {
pub fn checked_rem(&self, rhs: impl Into<Self>) -> Option<Self> {
Some(Self {
value: self.value.checked_rem(rhs.into().value)?,
})
}
/// Euclidean remainder
///
/// Returns `None` in case of division by 0 ([FOAR0001](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0001)) or overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
#[inline]
#[must_use]
pub fn checked_rem_euclid(self, rhs: impl Into<Self>) -> Option<Self> {
pub fn checked_rem_euclid(&self, rhs: impl Into<Self>) -> Option<Self> {
Some(Self {
value: self.value.checked_rem_euclid(rhs.into().value)?,
})
}
/// [op:numeric-unary-minus](https://www.w3.org/TR/xpath-functions-31/#func-numeric-unary-minus)
///
/// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
#[inline]
#[must_use]
pub fn checked_neg(self) -> Option<Self> {
pub fn checked_neg(&self) -> Option<Self> {
Some(Self {
value: self.value.checked_neg()?,
})
}
/// [fn:abs](https://www.w3.org/TR/xpath-functions-31/#func-abs)
///
/// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
#[inline]
#[must_use]
pub fn checked_abs(self) -> Option<Self> {
Some(Self {
value: self.value.checked_abs()?,
})
pub const fn abs(&self) -> Self {
Self {
value: self.value.abs(),
}
}
#[inline]
#[must_use]
pub const fn is_negative(self) -> bool {
pub const fn is_negative(&self) -> bool {
self.value < 0
}
#[inline]
#[must_use]
pub const fn is_positive(self) -> bool {
pub const fn is_positive(&self) -> bool {
self.value > 0
}
/// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity).
#[inline]
#[must_use]
pub fn is_identical_with(self, other: Self) -> bool {
pub fn is_identical_with(&self, other: &Self) -> bool {
self == other
}
pub const MIN: Self = Self { value: i64::MIN };
pub const MAX: Self = Self { value: i64::MAX };
}
impl From<bool> for Integer {
@ -229,7 +198,7 @@ impl FromStr for Integer {
type Err = ParseIntError;
#[inline]
fn from_str(input: &str) -> Result<Self, Self::Err> {
fn from_str(input: &str) -> Result<Self, ParseIntError> {
Ok(i64::from_str(input)?.into())
}
}
@ -241,37 +210,34 @@ impl fmt::Display for Integer {
}
}
impl Neg for Integer {
type Output = Self;
#[inline]
fn neg(self) -> Self {
(-self.value).into()
}
}
impl TryFrom<Float> for Integer {
type Error = TooLargeForIntegerError;
type Error = DecimalOverflowError;
#[inline]
fn try_from(value: Float) -> Result<Self, Self::Error> {
Decimal::try_from(value)
.map_err(|_| TooLargeForIntegerError)?
.try_into()
fn try_from(value: Float) -> Result<Self, DecimalOverflowError> {
Decimal::try_from(value)?.try_into()
}
}
impl TryFrom<Double> for Integer {
type Error = TooLargeForIntegerError;
type Error = DecimalOverflowError;
#[inline]
fn try_from(value: Double) -> Result<Self, Self::Error> {
Decimal::try_from(value)
.map_err(|_| TooLargeForIntegerError)?
.try_into()
fn try_from(value: Double) -> Result<Self, DecimalOverflowError> {
Decimal::try_from(value)?.try_into()
}
}
/// The input is too large to fit into an [`Integer`].
///
/// Matches XPath [`FOCA0003` error](https://www.w3.org/TR/xpath-functions-31/#ERRFOCA0003).
#[derive(Debug, Clone, Copy, thiserror::Error)]
#[error("Value too large for xsd:integer internal representation")]
pub struct TooLargeForIntegerError;
#[cfg(test)]
#[allow(clippy::panic_in_result_fn)]
mod tests {
use super::*;
@ -281,7 +247,7 @@ mod tests {
assert_eq!(Integer::from_str("-0")?.to_string(), "0");
assert_eq!(Integer::from_str("123")?.to_string(), "123");
assert_eq!(Integer::from_str("-123")?.to_string(), "-123");
Integer::from_str("123456789123456789123456789123456789123456789").unwrap_err();
assert!(Integer::from_str("123456789123456789123456789123456789123456789").is_err());
Ok(())
}
@ -299,18 +265,17 @@ mod tests {
Integer::try_from(Float::from(-123.1)).ok(),
Some(Integer::from_str("-123")?)
);
Integer::try_from(Float::from(f32::NAN)).unwrap_err();
Integer::try_from(Float::from(f32::INFINITY)).unwrap_err();
Integer::try_from(Float::from(f32::NEG_INFINITY)).unwrap_err();
Integer::try_from(Float::from(f32::MIN)).unwrap_err();
Integer::try_from(Float::from(f32::MAX)).unwrap_err();
assert!(Integer::try_from(Float::from(f32::NAN)).is_err());
assert!(Integer::try_from(Float::from(f32::INFINITY)).is_err());
assert!(Integer::try_from(Float::from(f32::NEG_INFINITY)).is_err());
assert!(Integer::try_from(Float::from(f32::MIN)).is_err());
assert!(Integer::try_from(Float::from(f32::MAX)).is_err());
assert!(
Integer::try_from(Float::from(1_672_507_300_000.))
.unwrap()
.checked_sub(Integer::from_str("1672507300000")?)
.unwrap()
.checked_abs()
.unwrap()
.abs()
< Integer::from(1_000_000)
);
Ok(())
@ -335,15 +300,14 @@ mod tests {
.unwrap()
.checked_sub(Integer::from_str("1672507300000").unwrap())
.unwrap()
.checked_abs()
.unwrap()
.abs()
< Integer::from(10)
);
Integer::try_from(Double::from(f64::NAN)).unwrap_err();
Integer::try_from(Double::from(f64::INFINITY)).unwrap_err();
Integer::try_from(Double::from(f64::NEG_INFINITY)).unwrap_err();
Integer::try_from(Double::from(f64::MIN)).unwrap_err();
Integer::try_from(Double::from(f64::MAX)).unwrap_err();
assert!(Integer::try_from(Double::from(f64::NAN)).is_err());
assert!(Integer::try_from(Double::from(f64::INFINITY)).is_err());
assert!(Integer::try_from(Double::from(f64::NEG_INFINITY)).is_err());
assert!(Integer::try_from(Double::from(f64::MIN)).is_err());
assert!(Integer::try_from(Double::from(f64::MAX)).is_err());
Ok(())
}
@ -357,8 +321,8 @@ mod tests {
Integer::try_from(Decimal::from_str("-123.1").unwrap()).ok(),
Some(Integer::from_str("-123")?)
);
Integer::try_from(Decimal::MIN).unwrap_err();
Integer::try_from(Decimal::MAX).unwrap_err();
assert!(Integer::try_from(Decimal::MIN).is_err());
assert!(Integer::try_from(Decimal::MAX).is_err());
Ok(())
}

@ -3,6 +3,7 @@
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
#![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")]
#![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")]
#![allow(clippy::return_self_not_must_use)]
mod boolean;
mod date_time;
@ -11,17 +12,15 @@ mod double;
mod duration;
mod float;
mod integer;
mod parser;
pub use self::boolean::Boolean;
pub use self::date_time::{
Date, DateTime, DateTimeOverflowError, GDay, GMonth, GMonthDay, GYear, GYearMonth,
InvalidTimezoneError, ParseDateTimeError, Time, TimezoneOffset,
Date, DateTime, DateTimeError, GDay, GMonth, GMonthDay, GYear, GYearMonth, Time, TimezoneOffset,
};
pub use self::decimal::{Decimal, ParseDecimalError, TooLargeForDecimalError};
pub use self::decimal::{Decimal, DecimalOverflowError, ParseDecimalError};
pub use self::double::Double;
pub use self::duration::{
DayTimeDuration, Duration, DurationOverflowError, OppositeSignInDurationComponentsError,
ParseDurationError, YearMonthDuration,
};
pub use self::duration::{DayTimeDuration, Duration, YearMonthDuration};
pub use self::float::Float;
pub use self::integer::{Integer, TooLargeForIntegerError};
pub use self::integer::Integer;
pub use self::parser::XsdParseError;

@ -0,0 +1,626 @@
use super::date_time::{DateTimeError, GDay, GMonth, GMonthDay, GYear, GYearMonth, TimezoneOffset};
use super::decimal::ParseDecimalError;
use super::duration::{DayTimeDuration, YearMonthDuration};
use super::*;
use std::error::Error;
use std::fmt;
use std::num::ParseIntError;
use std::str::FromStr;
/// A parsing error
#[derive(Debug, Clone)]
pub struct XsdParseError {
kind: XsdParseErrorKind,
}
#[derive(Debug, Clone)]
enum XsdParseErrorKind {
ParseInt(ParseIntError),
ParseDecimal(ParseDecimalError),
DateTime(DateTimeError),
Message(&'static str),
}
const OVERFLOW_ERROR: XsdParseError = XsdParseError {
kind: XsdParseErrorKind::Message("Overflow error"),
};
impl fmt::Display for XsdParseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match &self.kind {
XsdParseErrorKind::ParseInt(error) => {
write!(f, "Error while parsing integer: {error}")
}
XsdParseErrorKind::ParseDecimal(error) => {
write!(f, "Error while parsing decimal: {error}")
}
XsdParseErrorKind::DateTime(error) => error.fmt(f),
XsdParseErrorKind::Message(msg) => write!(f, "{msg}"),
}
}
}
impl XsdParseError {
const fn msg(message: &'static str) -> Self {
Self {
kind: XsdParseErrorKind::Message(message),
}
}
}
impl Error for XsdParseError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
match &self.kind {
XsdParseErrorKind::ParseInt(error) => Some(error),
XsdParseErrorKind::ParseDecimal(error) => Some(error),
XsdParseErrorKind::DateTime(error) => Some(error),
XsdParseErrorKind::Message(_) => None,
}
}
}
impl From<ParseIntError> for XsdParseError {
fn from(error: ParseIntError) -> Self {
Self {
kind: XsdParseErrorKind::ParseInt(error),
}
}
}
impl From<ParseDecimalError> for XsdParseError {
fn from(error: ParseDecimalError) -> Self {
Self {
kind: XsdParseErrorKind::ParseDecimal(error),
}
}
}
impl From<DateTimeError> for XsdParseError {
fn from(error: DateTimeError) -> Self {
Self {
kind: XsdParseErrorKind::DateTime(error),
}
}
}
// [6] duYearFrag ::= unsignedNoDecimalPtNumeral 'Y'
// [7] duMonthFrag ::= unsignedNoDecimalPtNumeral 'M'
// [8] duDayFrag ::= unsignedNoDecimalPtNumeral 'D'
// [9] duHourFrag ::= unsignedNoDecimalPtNumeral 'H'
// [10] duMinuteFrag ::= unsignedNoDecimalPtNumeral 'M'
// [11] duSecondFrag ::= (unsignedNoDecimalPtNumeral | unsignedDecimalPtNumeral) 'S'
// [12] duYearMonthFrag ::= (duYearFrag duMonthFrag?) | duMonthFrag
// [13] duTimeFrag ::= 'T' ((duHourFrag duMinuteFrag? duSecondFrag?) | (duMinuteFrag duSecondFrag?) | duSecondFrag)
// [14] duDayTimeFrag ::= (duDayFrag duTimeFrag?) | duTimeFrag
// [15] durationLexicalRep ::= '-'? 'P' ((duYearMonthFrag duDayTimeFrag?) | duDayTimeFrag)
struct DurationParts {
year_month: Option<i64>,
day_time: Option<Decimal>,
}
fn duration_parts(input: &str) -> Result<(DurationParts, &str), XsdParseError> {
// States
const START: u32 = 0;
const AFTER_YEAR: u32 = 1;
const AFTER_MONTH: u32 = 2;
const AFTER_DAY: u32 = 3;
const AFTER_T: u32 = 4;
const AFTER_HOUR: u32 = 5;
const AFTER_MINUTE: u32 = 6;
const AFTER_SECOND: u32 = 7;
let (is_negative, input) = if let Some(left) = input.strip_prefix('-') {
(true, left)
} else {
(false, input)
};
let mut input = expect_char(input, 'P', "Durations must start with 'P'")?;
let mut state = START;
let mut year_month: Option<i64> = None;
let mut day_time: Option<Decimal> = None;
while !input.is_empty() {
if let Some(left) = input.strip_prefix('T') {
if state >= AFTER_T {
return Err(XsdParseError::msg("Duplicated time separator 'T'"));
}
state = AFTER_T;
input = left;
} else {
let (number_str, left) = decimal_prefix(input);
match left.chars().next() {
Some('Y') if state < AFTER_YEAR => {
year_month = Some(
year_month
.unwrap_or_default()
.checked_add(
apply_i64_neg(i64::from_str(number_str)?, is_negative)?
.checked_mul(12)
.ok_or(OVERFLOW_ERROR)?,
)
.ok_or(OVERFLOW_ERROR)?,
);
state = AFTER_YEAR;
}
Some('M') if state < AFTER_MONTH => {
year_month = Some(
year_month
.unwrap_or_default()
.checked_add(apply_i64_neg(i64::from_str(number_str)?, is_negative)?)
.ok_or(OVERFLOW_ERROR)?,
);
state = AFTER_MONTH;
}
Some('D') if state < AFTER_DAY => {
if number_str.contains('.') {
return Err(XsdParseError::msg(
"Decimal numbers are not allowed for days",
));
}
day_time = Some(
day_time
.unwrap_or_default()
.checked_add(
apply_decimal_neg(Decimal::from_str(number_str)?, is_negative)?
.checked_mul(86400)
.ok_or(OVERFLOW_ERROR)?,
)
.ok_or(OVERFLOW_ERROR)?,
);
state = AFTER_DAY;
}
Some('H') if state == AFTER_T => {
if number_str.contains('.') {
return Err(XsdParseError::msg(
"Decimal numbers are not allowed for hours",
));
}
day_time = Some(
day_time
.unwrap_or_default()
.checked_add(
apply_decimal_neg(Decimal::from_str(number_str)?, is_negative)?
.checked_mul(3600)
.ok_or(OVERFLOW_ERROR)?,
)
.ok_or(OVERFLOW_ERROR)?,
);
state = AFTER_HOUR;
}
Some('M') if (AFTER_T..AFTER_MINUTE).contains(&state) => {
if number_str.contains('.') {
return Err(XsdParseError::msg(
"Decimal numbers are not allowed for minutes",
));
}
day_time = Some(
day_time
.unwrap_or_default()
.checked_add(
apply_decimal_neg(Decimal::from_str(number_str)?, is_negative)?
.checked_mul(60)
.ok_or(OVERFLOW_ERROR)?,
)
.ok_or(OVERFLOW_ERROR)?,
);
state = AFTER_MINUTE;
}
Some('S') if (AFTER_T..AFTER_SECOND).contains(&state) => {
day_time = Some(
day_time
.unwrap_or_default()
.checked_add(apply_decimal_neg(
Decimal::from_str(number_str)?,
is_negative,
)?)
.ok_or(OVERFLOW_ERROR)?,
);
state = AFTER_SECOND;
}
Some(_) => return Err(XsdParseError::msg("Unexpected type character")),
None => {
return Err(XsdParseError::msg(
"Numbers in durations must be followed by a type character",
))
}
}
input = &left[1..];
}
}
Ok((
DurationParts {
year_month,
day_time,
},
input,
))
}
fn apply_i64_neg(value: i64, is_negative: bool) -> Result<i64, XsdParseError> {
if is_negative {
value.checked_neg().ok_or(OVERFLOW_ERROR)
} else {
Ok(value)
}
}
fn apply_decimal_neg(value: Decimal, is_negative: bool) -> Result<Decimal, XsdParseError> {
if is_negative {
value.checked_neg().ok_or(OVERFLOW_ERROR)
} else {
Ok(value)
}
}
pub fn parse_duration(input: &str) -> Result<Duration, XsdParseError> {
let parts = ensure_complete(input, duration_parts)?;
if parts.year_month.is_none() && parts.day_time.is_none() {
return Err(XsdParseError::msg("Empty duration"));
}
Ok(Duration::new(
parts.year_month.unwrap_or(0),
parts.day_time.unwrap_or_default(),
))
}
pub fn parse_year_month_duration(input: &str) -> Result<YearMonthDuration, XsdParseError> {
let parts = ensure_complete(input, duration_parts)?;
if parts.day_time.is_some() {
return Err(XsdParseError::msg(
"There must not be any day or time component in a yearMonthDuration",
));
}
Ok(YearMonthDuration::new(parts.year_month.ok_or(
XsdParseError::msg("No year and month values found"),
)?))
}
pub fn parse_day_time_duration(input: &str) -> Result<DayTimeDuration, XsdParseError> {
let parts = ensure_complete(input, duration_parts)?;
if parts.year_month.is_some() {
return Err(XsdParseError::msg(
"There must not be any year or month component in a dayTimeDuration",
));
}
Ok(DayTimeDuration::new(parts.day_time.ok_or(
XsdParseError::msg("No day or time values found"),
)?))
}
// [16] dateTimeLexicalRep ::= yearFrag '-' monthFrag '-' dayFrag 'T' ((hourFrag ':' minuteFrag ':' secondFrag) | endOfDayFrag) timezoneFrag?
fn date_time_lexical_rep(input: &str) -> Result<(DateTime, &str), XsdParseError> {
let (year, input) = year_frag(input)?;
let input = expect_char(input, '-', "The year and month must be separated by '-'")?;
let (month, input) = month_frag(input)?;
let input = expect_char(input, '-', "The month and day must be separated by '-'")?;
let (day, input) = day_frag(input)?;
let input = expect_char(input, 'T', "The date and time must be separated by 'T'")?;
let (hour, input) = hour_frag(input)?;
let input = expect_char(input, ':', "The hours and minutes must be separated by ':'")?;
let (minute, input) = minute_frag(input)?;
let input = expect_char(
input,
':',
"The minutes and seconds must be separated by ':'",
)?;
let (second, input) = second_frag(input)?;
// We validate 24:00:00
if hour == 24 && minute != 0 && second != Decimal::from(0) {
return Err(XsdParseError::msg(
"Times are not allowed to be after 24:00:00",
));
}
let (timezone_offset, input) = optional_end(input, timezone_frag)?;
Ok((
DateTime::new(year, month, day, hour, minute, second, timezone_offset)?,
input,
))
}
pub fn parse_date_time(input: &str) -> Result<DateTime, XsdParseError> {
ensure_complete(input, date_time_lexical_rep)
}
// [17] timeLexicalRep ::= ((hourFrag ':' minuteFrag ':' secondFrag) | endOfDayFrag) timezoneFrag?
fn time_lexical_rep(input: &str) -> Result<(Time, &str), XsdParseError> {
let (hour, input) = hour_frag(input)?;
let input = expect_char(input, ':', "The hours and minutes must be separated by ':'")?;
let (minute, input) = minute_frag(input)?;
let input = expect_char(
input,
':',
"The minutes and seconds must be separated by ':'",
)?;
let (second, input) = second_frag(input)?;
// We validate 24:00:00
if hour == 24 && minute != 0 && second != Decimal::from(0) {
return Err(XsdParseError::msg(
"Times are not allowed to be after 24:00:00",
));
}
let (timezone_offset, input) = optional_end(input, timezone_frag)?;
Ok((Time::new(hour, minute, second, timezone_offset)?, input))
}
pub fn parse_time(input: &str) -> Result<Time, XsdParseError> {
ensure_complete(input, time_lexical_rep)
}
// [18] dateLexicalRep ::= yearFrag '-' monthFrag '-' dayFrag timezoneFrag? Constraint: Day-of-month Representations
fn date_lexical_rep(input: &str) -> Result<(Date, &str), XsdParseError> {
let (year, input) = year_frag(input)?;
let input = expect_char(input, '-', "The year and month must be separated by '-'")?;
let (month, input) = month_frag(input)?;
let input = expect_char(input, '-', "The month and day must be separated by '-'")?;
let (day, input) = day_frag(input)?;
let (timezone_offset, input) = optional_end(input, timezone_frag)?;
Ok((Date::new(year, month, day, timezone_offset)?, input))
}
pub fn parse_date(input: &str) -> Result<Date, XsdParseError> {
ensure_complete(input, date_lexical_rep)
}
// [19] gYearMonthLexicalRep ::= yearFrag '-' monthFrag timezoneFrag?
fn g_year_month_lexical_rep(input: &str) -> Result<(GYearMonth, &str), XsdParseError> {
let (year, input) = year_frag(input)?;
let input = expect_char(input, '-', "The year and month must be separated by '-'")?;
let (month, input) = month_frag(input)?;
let (timezone_offset, input) = optional_end(input, timezone_frag)?;
Ok((GYearMonth::new(year, month, timezone_offset)?, input))
}
pub fn parse_g_year_month(input: &str) -> Result<GYearMonth, XsdParseError> {
ensure_complete(input, g_year_month_lexical_rep)
}
// [20] gYearLexicalRep ::= yearFrag timezoneFrag?
fn g_year_lexical_rep(input: &str) -> Result<(GYear, &str), XsdParseError> {
let (year, input) = year_frag(input)?;
let (timezone_offset, input) = optional_end(input, timezone_frag)?;
Ok((GYear::new(year, timezone_offset)?, input))
}
pub fn parse_g_year(input: &str) -> Result<GYear, XsdParseError> {
ensure_complete(input, g_year_lexical_rep)
}
// [21] gMonthDayLexicalRep ::= '--' monthFrag '-' dayFrag timezoneFrag? Constraint: Day-of-month Representations
fn g_month_day_lexical_rep(input: &str) -> Result<(GMonthDay, &str), XsdParseError> {
let input = expect_char(input, '-', "gMonthDay values must start with '--'")?;
let input = expect_char(input, '-', "gMonthDay values must start with '--'")?;
let (month, input) = month_frag(input)?;
let input = expect_char(input, '-', "The month and day must be separated by '-'")?;
let (day, input) = day_frag(input)?;
let (timezone_offset, input) = optional_end(input, timezone_frag)?;
Ok((GMonthDay::new(month, day, timezone_offset)?, input))
}
pub fn parse_g_month_day(input: &str) -> Result<GMonthDay, XsdParseError> {
ensure_complete(input, g_month_day_lexical_rep)
}
// [22] gDayLexicalRep ::= '---' dayFrag timezoneFrag?
fn g_day_lexical_rep(input: &str) -> Result<(GDay, &str), XsdParseError> {
let input = expect_char(input, '-', "gDay values must start with '---'")?;
let input = expect_char(input, '-', "gDay values must start with '---'")?;
let input = expect_char(input, '-', "gDay values must start with '---'")?;
let (day, input) = day_frag(input)?;
let (timezone_offset, input) = optional_end(input, timezone_frag)?;
Ok((GDay::new(day, timezone_offset)?, input))
}
pub fn parse_g_day(input: &str) -> Result<GDay, XsdParseError> {
ensure_complete(input, g_day_lexical_rep)
}
// [23] gMonthLexicalRep ::= '--' monthFrag timezoneFrag?
fn g_month_lexical_rep(input: &str) -> Result<(GMonth, &str), XsdParseError> {
let input = expect_char(input, '-', "gMonth values must start with '--'")?;
let input = expect_char(input, '-', "gMonth values must start with '--'")?;
let (month, input) = month_frag(input)?;
let (timezone_offset, input) = optional_end(input, timezone_frag)?;
Ok((GMonth::new(month, timezone_offset)?, input))
}
pub fn parse_g_month(input: &str) -> Result<GMonth, XsdParseError> {
ensure_complete(input, g_month_lexical_rep)
}
// [56] yearFrag ::= '-'? (([1-9] digit digit digit+)) | ('0' digit digit digit))
fn year_frag(input: &str) -> Result<(i64, &str), XsdParseError> {
let (sign, input) = if let Some(left) = input.strip_prefix('-') {
(-1, left)
} else {
(1, input)
};
let (number_str, input) = integer_prefix(input);
if number_str.len() < 4 {
return Err(XsdParseError::msg("The year should be encoded on 4 digits"));
}
if number_str.len() > 4 && number_str.starts_with('0') {
return Err(XsdParseError::msg(
"The years value must not start with 0 if it can be encoded in at least 4 digits",
));
}
let number = i64::from_str(number_str)?;
Ok((sign * number, input))
}
// [57] monthFrag ::= ('0' [1-9]) | ('1' [0-2])
fn month_frag(input: &str) -> Result<(u8, &str), XsdParseError> {
let (number_str, input) = integer_prefix(input);
if number_str.len() != 2 {
return Err(XsdParseError::msg("Month must be encoded with two digits"));
}
let number = u8::from_str(number_str)?;
if !(1..=12).contains(&number) {
return Err(XsdParseError::msg("Month must be between 01 and 12"));
}
Ok((number, input))
}
// [58] dayFrag ::= ('0' [1-9]) | ([12] digit) | ('3' [01])
fn day_frag(input: &str) -> Result<(u8, &str), XsdParseError> {
let (number_str, input) = integer_prefix(input);
if number_str.len() != 2 {
return Err(XsdParseError::msg("Day must be encoded with two digits"));
}
let number = u8::from_str(number_str)?;
if !(1..=31).contains(&number) {
return Err(XsdParseError::msg("Day must be between 01 and 31"));
}
Ok((number, input))
}
// [59] hourFrag ::= ([01] digit) | ('2' [0-3])
// We also allow 24 for ease of parsing
fn hour_frag(input: &str) -> Result<(u8, &str), XsdParseError> {
let (number_str, input) = integer_prefix(input);
if number_str.len() != 2 {
return Err(XsdParseError::msg("Hours must be encoded with two digits"));
}
let number = u8::from_str(number_str)?;
if !(0..=24).contains(&number) {
return Err(XsdParseError::msg("Hours must be between 00 and 24"));
}
Ok((number, input))
}
// [60] minuteFrag ::= [0-5] digit
fn minute_frag(input: &str) -> Result<(u8, &str), XsdParseError> {
let (number_str, input) = integer_prefix(input);
if number_str.len() != 2 {
return Err(XsdParseError::msg(
"Minutes must be encoded with two digits",
));
}
let number = u8::from_str(number_str)?;
if !(0..=59).contains(&number) {
return Err(XsdParseError::msg("Minutes must be between 00 and 59"));
}
Ok((number, input))
}
// [61] secondFrag ::= ([0-5] digit) ('.' digit+)?
fn second_frag(input: &str) -> Result<(Decimal, &str), XsdParseError> {
let (number_str, input) = decimal_prefix(input);
let (before_dot_str, _) = number_str.split_once('.').unwrap_or((number_str, ""));
if before_dot_str.len() != 2 {
return Err(XsdParseError::msg(
"Seconds must be encoded with two digits",
));
}
let number = Decimal::from_str(number_str)?;
if number < Decimal::from(0) || number >= Decimal::from(60) {
return Err(XsdParseError::msg("Seconds must be between 00 and 60"));
}
if number_str.ends_with('.') {
return Err(XsdParseError::msg(
"Seconds are not allowed to end with a dot",
));
}
Ok((number, input))
}
// [63] timezoneFrag ::= 'Z' | ('+' | '-') (('0' digit | '1' [0-3]) ':' minuteFrag | '14:00')
fn timezone_frag(input: &str) -> Result<(TimezoneOffset, &str), XsdParseError> {
if let Some(left) = input.strip_prefix('Z') {
return Ok((TimezoneOffset::UTC, left));
}
let (sign, input) = if let Some(left) = input.strip_prefix('-') {
(-1, left)
} else if let Some(left) = input.strip_prefix('+') {
(1, left)
} else {
(1, input)
};
let (hour_str, input) = integer_prefix(input);
if hour_str.len() != 2 {
return Err(XsdParseError::msg(
"The timezone hours must be encoded with two digits",
));
}
let hours = i16::from_str(hour_str)?;
let input = expect_char(
input,
':',
"The timezone hours and minutes must be separated by ':'",
)?;
let (minutes, input) = minute_frag(input)?;
if hours > 13 && !(hours == 14 && minutes == 0) {
return Err(XsdParseError::msg(
"The timezone hours must be between 00 and 13",
));
}
Ok((
TimezoneOffset::new(sign * (hours * 60 + i16::from(minutes)))?,
input,
))
}
fn ensure_complete<T>(
input: &str,
parse: impl FnOnce(&str) -> Result<(T, &str), XsdParseError>,
) -> Result<T, XsdParseError> {
let (result, left) = parse(input)?;
if !left.is_empty() {
return Err(XsdParseError::msg("Unrecognized value suffix"));
}
Ok(result)
}
fn expect_char<'a>(
input: &'a str,
constant: char,
error_message: &'static str,
) -> Result<&'a str, XsdParseError> {
if let Some(left) = input.strip_prefix(constant) {
Ok(left)
} else {
Err(XsdParseError::msg(error_message))
}
}
fn integer_prefix(input: &str) -> (&str, &str) {
let mut end = input.len();
for (i, c) in input.char_indices() {
if !c.is_ascii_digit() {
end = i;
break;
}
}
input.split_at(end)
}
fn decimal_prefix(input: &str) -> (&str, &str) {
let mut end = input.len();
let mut dot_seen = false;
for (i, c) in input.char_indices() {
if c.is_ascii_digit() {
// Ok
} else if c == '.' && !dot_seen {
dot_seen = true;
} else {
end = i;
break;
}
}
input.split_at(end)
}
fn optional_end<T>(
input: &str,
parse: impl FnOnce(&str) -> Result<(T, &str), XsdParseError>,
) -> Result<(Option<T>, &str), XsdParseError> {
Ok(if input.is_empty() {
(None, input)
} else {
let (result, input) = parse(input)?;
(Some(result), input)
})
}

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save