Compare commits
1 Commits
main
...
python-zig
Author | SHA1 | Date |
---|---|---|
Tpt | ae67bc1bfa | 1 year ago |
@ -0,0 +1,137 @@ |
||||
[build] |
||||
rustflags = [ |
||||
"-Wtrivial-casts", |
||||
"-Wtrivial-numeric-casts", |
||||
"-Wunsafe-code", |
||||
"-Wunused-lifetimes", |
||||
"-Wunused-qualifications", |
||||
# TODO: 1.63+ "-Wclippy::as-underscore", |
||||
# TODO: 1.65+ ""-Wclippy::bool-to-int-with-if", |
||||
"-Wclippy::borrow-as-ptr", |
||||
"-Wclippy::case-sensitive-file-extension-comparisons", |
||||
"-Wclippy::cast-lossless", |
||||
"-Wclippy::cast-possible-truncation", |
||||
"-Wclippy::cast-possible-wrap", |
||||
"-Wclippy::cast-precision-loss", |
||||
"-Wclippy::cast-ptr-alignment", |
||||
"-Wclippy::cast-sign-loss", |
||||
"-Wclippy::checked-conversions", |
||||
"-Wclippy::clone-on-ref-ptr", |
||||
"-Wclippy::cloned-instead-of-copied", |
||||
"-Wclippy::copy-iterator", |
||||
"-Wclippy::dbg-macro", |
||||
"-Wclippy::decimal-literal-representation", |
||||
"-Wclippy::default-trait-access", |
||||
"-Wclippy::default-union-representation", |
||||
# TODO: 1.61+ "-Wclippy::deref-by-slicing", |
||||
# TODO: 1.63+ "-Wclippy::doc-link-with-quotes", |
||||
# TODO: 1.62+ "-Wclippy::empty-drop", |
||||
"-Wclippy::empty-enum", |
||||
# TODO: on major version "-Wclippy::empty-structs-with-brackets", |
||||
"-Wclippy::enum-glob-use", |
||||
"-Wclippy::exit", |
||||
"-Wclippy::expect-used", |
||||
"-Wclippy::expl-impl-clone-on-copy", |
||||
"-Wclippy::explicit-deref-methods", |
||||
"-Wclippy::explicit-into-iter-loop", |
||||
"-Wclippy::explicit-iter-loop", |
||||
"-Wclippy::filter-map-next", |
||||
"-Wclippy::flat-map-option", |
||||
"-Wclippy::fn-to-numeric-cast-any", |
||||
# TODO: 1.62+ "-Wclippy::format-push-string", |
||||
"-Wclippy::from-iter-instead-of-collect", |
||||
"-Wclippy::get-unwrap", |
||||
"-Wclippy::if-not-else", |
||||
"-Wclippy::if-then-some-else-none", |
||||
"-Wclippy::implicit-clone", |
||||
"-Wclippy::inconsistent-struct-constructor", |
||||
"-Wclippy::index-refutable-slice", |
||||
"-Wclippy::inefficient-to-string", |
||||
"-Wclippy::inline-always", |
||||
"-Wclippy::inline-asm-x86-att-syntax", |
||||
"-Wclippy::inline-asm-x86-intel-syntax", |
||||
"-Wclippy::invalid-upcast-comparisons", |
||||
"-Wclippy::items-after-statements", |
||||
"-Wclippy::large-digit-groups", |
||||
# TODO: 1.68+ "-Wclippy::large-futures", |
||||
"-Wclippy::large-stack-arrays", |
||||
"-Wclippy::large-types-passed-by-value", |
||||
"-Wclippy::let-underscore-must-use", |
||||
"-Wclippy::let-unit-value", |
||||
"-Wclippy::linkedlist", |
||||
"-Wclippy::lossy-float-literal", |
||||
"-Wclippy::macro-use-imports", |
||||
"-Wclippy::manual-assert", |
||||
# TODO: 1.65+ "-Wclippy::manual-instant-elapsed", |
||||
# TODO: 1.67+ "-Wclippy::manual-let-else", |
||||
"-Wclippy::manual-ok-or", |
||||
# TODO: 1.65+ "-Wclippy::manual-string-new", |
||||
"-Wclippy::many-single-char-names", |
||||
"-Wclippy::map-unwrap-or", |
||||
"-Wclippy::match-bool", |
||||
"-Wclippy::match-same-arms", |
||||
"-Wclippy::match-wildcard-for-single-variants", |
||||
"-Wclippy::maybe-infinite-iter", |
||||
"-Wclippy::mem-forget", |
||||
# TODO: 1.63+ "-Wclippy::mismatching-type-param-order", |
||||
"-Wclippy::multiple-inherent-impl", |
||||
"-Wclippy::mut-mut", |
||||
"-Wclippy::mutex-atomic", |
||||
"-Wclippy::naive-bytecount", |
||||
"-Wclippy::needless-bitwise-bool", |
||||
"-Wclippy::needless-continue", |
||||
"-Wclippy::needless-pass-by-value", |
||||
"-Wclippy::no-effect-underscore-binding", |
||||
# TODO: 1.69+ "-Wclippy::no-mangle-with-rust-abi", |
||||
"-Wclippy::non-ascii-literal", |
||||
"-Wclippy::print-stderr", |
||||
"-Wclippy::print-stdout", |
||||
"-Wclippy::ptr-as-ptr", |
||||
"-Wclippy::range-minus-one", |
||||
"-Wclippy::range-plus-one", |
||||
"-Wclippy::rc-buffer", |
||||
"-Wclippy::rc-mutex", |
||||
"-Wclippy::redundant-closure-for-method-calls", |
||||
"-Wclippy::redundant-else", |
||||
"-Wclippy::redundant-feature-names", |
||||
"-Wclippy::ref-binding-to-reference", |
||||
"-Wclippy::ref-option-ref", |
||||
"-Wclippy::rest-pat-in-fully-bound-structs", |
||||
"-Wclippy::return-self-not-must-use", |
||||
"-Wclippy::same-functions-in-if-condition", |
||||
# TODO: strange failure on 1.60 "-Wclippy::same-name-method", |
||||
# TODO: 1.68+ "-Wclippy::semicolon-outside-block", |
||||
"-Wclippy::single-match-else", |
||||
"-Wclippy::stable-sort-primitive", |
||||
"-Wclippy::str-to-string", |
||||
"-Wclippy::string-add", |
||||
"-Wclippy::string-add-assign", |
||||
"-Wclippy::string-lit-as-bytes", |
||||
"-Wclippy::string-to-string", |
||||
# TODO: 1.67+ "-Wclippy::suspicious-xor-used-as-pow", |
||||
"-Wclippy::todo", |
||||
"-Wclippy::transmute-ptr-to-ptr", |
||||
"-Wclippy::trivially-copy-pass-by-ref", |
||||
"-Wclippy::try-err", |
||||
"-Wclippy::unicode-not-nfc", |
||||
"-Wclippy::unimplemented", |
||||
# TODO: 1.66+ "-Wclippy::uninlined-format-args", |
||||
# TODO: 1.70+ "-Wclippy::unnecessary-box-returns", |
||||
# TODO: 1.61+ "-Wclippy::unnecessary-join", |
||||
# TODO: 1.67+ "-Wclippy::unnecessary-safety-comment", |
||||
# TODO: 1.67+ "-Wclippy::unnecessary-safety-doc", |
||||
"-Wclippy::unnecessary-self-imports", |
||||
"-Wclippy::unnecessary-wraps", |
||||
"-Wclippy::unneeded-field-pattern", |
||||
"-Wclippy::unnested-or-patterns", |
||||
"-Wclippy::unreadable-literal", |
||||
"-Wclippy::unseparated-literal-suffix", |
||||
"-Wclippy::unused-async", |
||||
"-Wclippy::unused-self", |
||||
"-Wclippy::use-debug", |
||||
"-Wclippy::used-underscore-binding", |
||||
"-Wclippy::verbose-bit-mask", |
||||
"-Wclippy::verbose-file-reads", |
||||
"-Wclippy::wildcard-dependencies", |
||||
"-Wclippy::zero-sized-map-values", |
||||
] |
@ -1,4 +1,5 @@ |
||||
FROM gcr.io/oss-fuzz-base/base-builder-rust:v1 |
||||
RUN apt-get update && apt-get install -y llvm-dev libclang-dev clang && apt-get clean && rm --recursive --force /var/lib/apt/lists/* |
||||
COPY . $SRC/oxigraph |
||||
WORKDIR oxigraph |
||||
COPY .clusterfuzzlite/build.sh $SRC/ |
||||
|
@ -0,0 +1,21 @@ |
||||
# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.241.1/containers/rust/.devcontainer/base.Dockerfile |
||||
|
||||
# [Choice] Debian OS version (use bullseye on local arm64/Apple Silicon): buster, bullseye |
||||
ARG VARIANT="bullseye" |
||||
FROM mcr.microsoft.com/vscode/devcontainers/rust:0-${VARIANT} |
||||
|
||||
# [Optional] Uncomment this section to install additional packages. |
||||
RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ |
||||
&& apt-get -y install --no-install-recommends \ |
||||
python3 \ |
||||
python3-venv \ |
||||
python-is-python3 \ |
||||
libclang-dev |
||||
|
||||
ENV VIRTUAL_ENV=/opt/venv |
||||
RUN python -m venv $VIRTUAL_ENV |
||||
ENV PATH="$VIRTUAL_ENV/bin:$PATH" |
||||
RUN pip install --no-cache-dir -r python/requirements.dev.txt |
||||
|
||||
# Change owner to the devcontainer user |
||||
RUN chown -R 1000:1000 $VIRTUAL_ENV |
@ -0,0 +1,69 @@ |
||||
// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at: |
||||
// https://github.com/microsoft/vscode-dev-containers/tree/v0.241.1/containers/rust |
||||
{ |
||||
"name": "Rust", |
||||
"build": { |
||||
"dockerfile": "Dockerfile", |
||||
"args": { |
||||
// Use the VARIANT arg to pick a Debian OS version: buster, bullseye |
||||
// Use bullseye when on local on arm64/Apple Silicon. |
||||
"VARIANT": "bullseye" |
||||
} |
||||
}, |
||||
"runArgs": ["--cap-add=SYS_PTRACE", "--security-opt", "seccomp=unconfined"], |
||||
|
||||
// Configure tool-specific properties. |
||||
"customizations": { |
||||
// Configure properties specific to VS Code. |
||||
"vscode": { |
||||
// Set *default* container specific settings.json values on container create. |
||||
"settings": { |
||||
"lldb.executable": "/usr/bin/lldb", |
||||
// VS Code don't watch files under ./target |
||||
"files.watcherExclude": { |
||||
"**/target/**": true |
||||
}, |
||||
"rust-analyzer.checkOnSave.command": "clippy", |
||||
|
||||
"python.defaultInterpreterPath": "/opt/venv/bin/python", |
||||
"python.linting.enabled": true, |
||||
"python.linting.pylintEnabled": true, |
||||
"python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8", |
||||
"python.formatting.blackPath": "/usr/local/py-utils/bin/black", |
||||
"python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf", |
||||
"python.linting.banditPath": "/usr/local/py-utils/bin/bandit", |
||||
"python.linting.flake8Path": "/usr/local/py-utils/bin/flake8", |
||||
"python.linting.mypyPath": "/usr/local/py-utils/bin/mypy", |
||||
"python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle", |
||||
"python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle", |
||||
"python.linting.pylintPath": "/opt/venv/bin/pylint", |
||||
"python.testing.pytestPath": "/opt/venv/bin/pytest" |
||||
}, |
||||
|
||||
// Add the IDs of extensions you want installed when the container is created. |
||||
"extensions": [ |
||||
"vadimcn.vscode-lldb", |
||||
"mutantdino.resourcemonitor", |
||||
"rust-lang.rust-analyzer", |
||||
"tamasfe.even-better-toml", |
||||
"serayuzgur.crates", |
||||
"ms-python.python", |
||||
"ms-python.vscode-pylance", |
||||
"esbenp.prettier-vscode", |
||||
"stardog-union.stardog-rdf-grammars" |
||||
] |
||||
} |
||||
}, |
||||
|
||||
// Use 'forwardPorts' to make a list of ports inside the container available locally. |
||||
// "forwardPorts": [], |
||||
|
||||
// Use 'postCreateCommand' to run commands after the container is created. |
||||
"postCreateCommand": "git submodule update --init && cargo build", |
||||
|
||||
// Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. |
||||
"remoteUser": "vscode", |
||||
"features": { |
||||
"python": "3.10" |
||||
} |
||||
} |
@ -1,27 +0,0 @@ |
||||
name: 'Setup Rust' |
||||
description: 'Setup Rust using Rustup' |
||||
inputs: |
||||
version: |
||||
description: 'Rust version to use. By default latest stable version' |
||||
required: false |
||||
default: 'stable' |
||||
component: |
||||
description: 'Rust extra component to install like clippy' |
||||
required: false |
||||
target: |
||||
description: 'Rust extra target to install like wasm32-unknown-unknown' |
||||
required: false |
||||
runs: |
||||
using: "composite" |
||||
steps: |
||||
- run: rustup update |
||||
shell: bash |
||||
- run: rustup default ${{ inputs.version }} |
||||
shell: bash |
||||
- run: rustup component add ${{ inputs.component }} |
||||
shell: bash |
||||
if: ${{ inputs.component }} |
||||
- run: rustup target add ${{ inputs.target }} |
||||
shell: bash |
||||
if: ${{ inputs.target }} |
||||
- uses: Swatinem/rust-cache@v2 |
@ -1,11 +0,0 @@ |
||||
if [ -f "rocksdb" ] |
||||
then |
||||
cd rocksdb || exit |
||||
else |
||||
git clone https://github.com/facebook/rocksdb.git |
||||
cd rocksdb || exit |
||||
git checkout v8.0.0 |
||||
make shared_lib |
||||
fi |
||||
sudo make install-shared |
||||
sudo ldconfig /usr/local/lib |
@ -0,0 +1,28 @@ |
||||
name: Change tests |
||||
|
||||
on: [push] |
||||
|
||||
concurrency: |
||||
group: ${{ github.workflow }}-${{ github.ref }} |
||||
cancel-in-progress: true |
||||
|
||||
jobs: |
||||
python: |
||||
runs-on: ubuntu-latest |
||||
steps: |
||||
- uses: actions/checkout@v3 |
||||
with: |
||||
submodules: true |
||||
- run: rustup update && rustup target add aarch64-unknown-linux-gnu |
||||
- uses: Swatinem/rust-cache@v2 |
||||
- uses: actions/setup-python@v4 |
||||
with: |
||||
python-version: "3.10" |
||||
cache: pip |
||||
cache-dependency-path: '**/requirements.dev.txt' |
||||
- run: pip install -r python/requirements.dev.txt |
||||
- run: maturin build -m python/Cargo.toml --zig --compatibility manylinux2014 --target aarch64-unknown-linux-gnu --features abi3 |
||||
- uses: docker/setup-qemu-action@v2 |
||||
with: |
||||
platforms: linux/aarch64 |
||||
- run: docker run -v "$(pwd)":/workdir --platform linux/aarch64 quay.io/pypa/manylinux2014_aarch64 /bin/bash /workdir/.github/workflows/zig_test.sh |
@ -0,0 +1,9 @@ |
||||
cd /workdir || exit |
||||
python3.10 -m venv venv |
||||
source venv/bin/activate |
||||
pip install auditwheel |
||||
auditwheel show target/wheels/*.whl |
||||
pip install --no-index --find-links=target/wheels/ pyoxigraph |
||||
rm -r target/wheels |
||||
cd python/tests || exit |
||||
python -m unittest |
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,4 @@ |
||||
avoid-breaking-exported-api = false |
||||
avoid-breaking-exported-api = true |
||||
cognitive-complexity-threshold = 50 |
||||
too-many-arguments-threshold = 10 |
||||
type-complexity-threshold = 500 |
Before Width: | Height: | Size: 4.6 KiB |
@ -1,35 +0,0 @@ |
||||
+------------------+ +----------------+ +-----------------+ |
||||
+ oxigraph CLI {r} + + pyoxigraph {p} + + oxigraph JS {j} + |
||||
+------------------+ +----------------+ +-----------------+ |
||||
|
||||
+---------------------------------------------------------------------------+ |
||||
+ oxigraph (Rust) {r} + |
||||
+---------------------------------------------------------------------------+ |
||||
|
||||
+----------------------------+ +-------------+ |
||||
+ oxrdfio {r} + + sparopt {r} + |
||||
+----------------------------+ +-------------+ |
||||
|
||||
+-----------+ +--------------+ +-----------------+ +----------------+ |
||||
+ oxttl {r} + + oxrdfxml {r} + + spargebra {r} + + sparesults {r} + |
||||
+-----------+ +--------------+ +-----------------+ +----------------+ |
||||
|
||||
+-----------------------------------------------------------------------+ |
||||
+ oxrdf {r} + |
||||
+-----------------------------------------------------------------------+ |
||||
|
||||
+------------------+ |
||||
+ oxsdatatypes {r} + |
||||
+------------------+ |
||||
|
||||
|
||||
# Legend: |
||||
r = { |
||||
fill: papayawhip; |
||||
} |
||||
p = { |
||||
fill: lightyellow; |
||||
} |
||||
j = { |
||||
fill: lightgreen; |
||||
} |
@ -1,28 +0,0 @@ |
||||
#![no_main] |
||||
|
||||
use libfuzzer_sys::fuzz_target; |
||||
use oxttl::N3Parser; |
||||
|
||||
fuzz_target!(|data: &[u8]| { |
||||
let mut quads = Vec::new(); |
||||
let mut parser = N3Parser::new() |
||||
.with_base_iri("http://example.com/") |
||||
.unwrap() |
||||
.parse(); |
||||
for chunk in data.split(|c| *c == 0xFF) { |
||||
parser.extend_from_slice(chunk); |
||||
while let Some(result) = parser.read_next() { |
||||
if let Ok(quad) = result { |
||||
quads.push(quad); |
||||
} |
||||
} |
||||
} |
||||
parser.end(); |
||||
while let Some(result) = parser.read_next() { |
||||
if let Ok(quad) = result { |
||||
quads.push(quad); |
||||
} |
||||
} |
||||
assert!(parser.is_end()); |
||||
//TODO: serialize
|
||||
}); |
@ -1,84 +0,0 @@ |
||||
#![no_main] |
||||
|
||||
use libfuzzer_sys::fuzz_target; |
||||
use oxrdf::Quad; |
||||
use oxttl::{NQuadsParser, NQuadsSerializer}; |
||||
|
||||
fn parse<'a>( |
||||
chunks: impl IntoIterator<Item = &'a [u8]>, |
||||
unchecked: bool, |
||||
) -> (Vec<Quad>, Vec<String>) { |
||||
let mut quads = Vec::new(); |
||||
let mut errors = Vec::new(); |
||||
let mut parser = NQuadsParser::new().with_quoted_triples(); |
||||
if unchecked { |
||||
parser = parser.unchecked(); |
||||
} |
||||
let mut reader = parser.parse(); |
||||
for chunk in chunks { |
||||
reader.extend_from_slice(chunk); |
||||
while let Some(result) = reader.read_next() { |
||||
match result { |
||||
Ok(quad) => quads.push(quad), |
||||
Err(error) => errors.push(error.to_string()), |
||||
} |
||||
} |
||||
} |
||||
reader.end(); |
||||
while let Some(result) = reader.read_next() { |
||||
match result { |
||||
Ok(quad) => quads.push(quad), |
||||
Err(error) => errors.push(error.to_string()), |
||||
} |
||||
} |
||||
assert!(reader.is_end()); |
||||
(quads, errors) |
||||
} |
||||
|
||||
fuzz_target!(|data: &[u8]| { |
||||
// We parse with splitting
|
||||
let (quads, errors) = parse(data.split(|c| *c == 0xFF), false); |
||||
// We parse without splitting
|
||||
let (quads_without_split, errors_without_split) = parse( |
||||
[data |
||||
.iter() |
||||
.copied() |
||||
.filter(|c| *c != 0xFF) |
||||
.collect::<Vec<_>>() |
||||
.as_slice()], |
||||
false, |
||||
); |
||||
assert_eq!(quads, quads_without_split); |
||||
assert_eq!(errors, errors_without_split); |
||||
|
||||
// We test also unchecked if valid
|
||||
if errors.is_empty() { |
||||
let (quads_unchecked, errors_unchecked) = parse(data.split(|c| *c == 0xFF), true); |
||||
assert!(errors_unchecked.is_empty()); |
||||
assert_eq!(quads, quads_unchecked); |
||||
} |
||||
|
||||
// We serialize
|
||||
let mut writer = NQuadsSerializer::new().serialize_to_write(Vec::new()); |
||||
for quad in &quads { |
||||
writer.write_quad(quad).unwrap(); |
||||
} |
||||
let new_serialization = writer.finish(); |
||||
|
||||
// We parse the serialization
|
||||
let new_quads = NQuadsParser::new() |
||||
.with_quoted_triples() |
||||
.parse_read(new_serialization.as_slice()) |
||||
.collect::<Result<Vec<_>, _>>() |
||||
.map_err(|e| { |
||||
format!( |
||||
"Error on {:?} from {quads:?} based on {:?}: {e}", |
||||
String::from_utf8_lossy(&new_serialization), |
||||
String::from_utf8_lossy(data) |
||||
) |
||||
}) |
||||
.unwrap(); |
||||
|
||||
// We check the roundtrip has not changed anything
|
||||
assert_eq!(new_quads, quads); |
||||
}); |
@ -1,35 +0,0 @@ |
||||
#![no_main] |
||||
|
||||
use libfuzzer_sys::fuzz_target; |
||||
use oxrdfxml::{RdfXmlParser, RdfXmlSerializer}; |
||||
|
||||
fuzz_target!(|data: &[u8]| { |
||||
// We parse
|
||||
let triples = RdfXmlParser::new() |
||||
.parse_read(data) |
||||
.flatten() |
||||
.collect::<Vec<_>>(); |
||||
|
||||
// We serialize
|
||||
let mut writer = RdfXmlSerializer::new().serialize_to_write(Vec::new()); |
||||
for triple in &triples { |
||||
writer.write_triple(triple).unwrap(); |
||||
} |
||||
let new_serialization = writer.finish().unwrap(); |
||||
|
||||
// We parse the serialization
|
||||
let new_triples = RdfXmlParser::new() |
||||
.parse_read(new_serialization.as_slice()) |
||||
.collect::<Result<Vec<_>, _>>() |
||||
.map_err(|e| { |
||||
format!( |
||||
"Error on {:?} from {triples:?} based on {:?}: {e}", |
||||
String::from_utf8_lossy(&new_serialization), |
||||
String::from_utf8_lossy(data) |
||||
) |
||||
}) |
||||
.unwrap(); |
||||
|
||||
// We check the roundtrip has not changed anything
|
||||
assert_eq!(new_triples, triples); |
||||
}); |
@ -1,166 +0,0 @@ |
||||
#![no_main] |
||||
|
||||
use libfuzzer_sys::fuzz_target; |
||||
use oxrdf::graph::CanonicalizationAlgorithm; |
||||
use oxrdf::{Dataset, GraphName, Quad, Subject, Term, Triple}; |
||||
use oxttl::{TriGParser, TriGSerializer}; |
||||
|
||||
fn parse<'a>( |
||||
chunks: impl IntoIterator<Item = &'a [u8]>, |
||||
unchecked: bool, |
||||
) -> (Vec<Quad>, Vec<String>, Vec<(String, String)>) { |
||||
let mut quads = Vec::new(); |
||||
let mut errors = Vec::new(); |
||||
let mut parser = TriGParser::new() |
||||
.with_quoted_triples() |
||||
.with_base_iri("http://example.com/") |
||||
.unwrap(); |
||||
if unchecked { |
||||
parser = parser.unchecked(); |
||||
} |
||||
let mut reader = parser.parse(); |
||||
for chunk in chunks { |
||||
reader.extend_from_slice(chunk); |
||||
while let Some(result) = reader.read_next() { |
||||
match result { |
||||
Ok(quad) => quads.push(quad), |
||||
Err(error) => errors.push(error.to_string()), |
||||
} |
||||
} |
||||
} |
||||
reader.end(); |
||||
while let Some(result) = reader.read_next() { |
||||
match result { |
||||
Ok(quad) => quads.push(quad), |
||||
Err(error) => errors.push(error.to_string()), |
||||
} |
||||
} |
||||
assert!(reader.is_end()); |
||||
( |
||||
quads, |
||||
errors, |
||||
reader |
||||
.prefixes() |
||||
.map(|(k, v)| (k.to_owned(), v.to_owned())) |
||||
.collect(), |
||||
) |
||||
} |
||||
|
||||
fn count_triple_blank_nodes(triple: &Triple) -> usize { |
||||
(match &triple.subject { |
||||
Subject::BlankNode(_) => 1, |
||||
Subject::Triple(t) => count_triple_blank_nodes(t), |
||||
_ => 0, |
||||
}) + (match &triple.object { |
||||
Term::BlankNode(_) => 1, |
||||
Term::Triple(t) => count_triple_blank_nodes(t), |
||||
_ => 0, |
||||
}) |
||||
} |
||||
|
||||
fn count_quad_blank_nodes(quad: &Quad) -> usize { |
||||
(match &quad.subject { |
||||
Subject::BlankNode(_) => 1, |
||||
Subject::Triple(t) => count_triple_blank_nodes(t), |
||||
_ => 0, |
||||
}) + (match &quad.object { |
||||
Term::BlankNode(_) => 1, |
||||
Term::Triple(t) => count_triple_blank_nodes(t), |
||||
_ => 0, |
||||
}) + usize::from(matches!(quad.graph_name, GraphName::BlankNode(_))) |
||||
} |
||||
|
||||
fn serialize_quads(quads: &[Quad], prefixes: Vec<(String, String)>) -> Vec<u8> { |
||||
let mut serializer = TriGSerializer::new(); |
||||
for (prefix_name, prefix_iri) in prefixes { |
||||
serializer = serializer.with_prefix(prefix_name, prefix_iri).unwrap(); |
||||
} |
||||
let mut writer = serializer.serialize_to_write(Vec::new()); |
||||
for quad in quads { |
||||
writer.write_quad(quad).unwrap(); |
||||
} |
||||
writer.finish().unwrap() |
||||
} |
||||
|
||||
fuzz_target!(|data: &[u8]| { |
||||
// We parse with splitting
|
||||
let (quads, errors, prefixes) = parse(data.split(|c| *c == 0xFF), false); |
||||
// We parse without splitting
|
||||
let (quads_without_split, errors_without_split, _) = parse( |
||||
[data |
||||
.iter() |
||||
.copied() |
||||
.filter(|c| *c != 0xFF) |
||||
.collect::<Vec<_>>() |
||||
.as_slice()], |
||||
false, |
||||
); |
||||
let (quads_unchecked, errors_unchecked, _) = parse(data.split(|c| *c == 0xFF), true); |
||||
if errors.is_empty() { |
||||
assert!(errors_unchecked.is_empty()); |
||||
} |
||||
|
||||
let bnodes_count = quads.iter().map(count_quad_blank_nodes).sum::<usize>(); |
||||
if bnodes_count == 0 { |
||||
assert_eq!( |
||||
quads, |
||||
quads_without_split, |
||||
"With split:\n{}\nWithout split:\n{}", |
||||
String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())), |
||||
String::from_utf8_lossy(&serialize_quads(&quads_without_split, Vec::new())) |
||||
); |
||||
if errors.is_empty() { |
||||
assert_eq!( |
||||
quads, |
||||
quads_unchecked, |
||||
"Validating:\n{}\nUnchecked:\n{}", |
||||
String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())), |
||||
String::from_utf8_lossy(&serialize_quads(&quads_unchecked, Vec::new())) |
||||
); |
||||
} |
||||
} else if bnodes_count <= 4 { |
||||
let mut dataset_with_split = quads.iter().collect::<Dataset>(); |
||||
let mut dataset_without_split = quads_without_split.iter().collect::<Dataset>(); |
||||
dataset_with_split.canonicalize(CanonicalizationAlgorithm::Unstable); |
||||
dataset_without_split.canonicalize(CanonicalizationAlgorithm::Unstable); |
||||
assert_eq!( |
||||
dataset_with_split, |
||||
dataset_without_split, |
||||
"With split:\n{}\nWithout split:\n{}", |
||||
String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())), |
||||
String::from_utf8_lossy(&serialize_quads(&quads_without_split, Vec::new())) |
||||
); |
||||
if errors.is_empty() { |
||||
let mut dataset_unchecked = quads_unchecked.iter().collect::<Dataset>(); |
||||
dataset_unchecked.canonicalize(CanonicalizationAlgorithm::Unstable); |
||||
assert_eq!( |
||||
dataset_with_split, |
||||
dataset_unchecked, |
||||
"Validating:\n{}\nUnchecked:\n{}", |
||||
String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())), |
||||
String::from_utf8_lossy(&serialize_quads(&quads_unchecked, Vec::new())) |
||||
); |
||||
} |
||||
} |
||||
assert_eq!(errors, errors_without_split); |
||||
|
||||
// We serialize
|
||||
let new_serialization = serialize_quads(&quads, prefixes); |
||||
|
||||
// We parse the serialization
|
||||
let new_quads = TriGParser::new() |
||||
.with_quoted_triples() |
||||
.parse_read(new_serialization.as_slice()) |
||||
.collect::<Result<Vec<_>, _>>() |
||||
.map_err(|e| { |
||||
format!( |
||||
"Error on {:?} from {quads:?} based on {:?}: {e}", |
||||
String::from_utf8_lossy(&new_serialization), |
||||
String::from_utf8_lossy(data) |
||||
) |
||||
}) |
||||
.unwrap(); |
||||
|
||||
// We check the roundtrip has not changed anything
|
||||
assert_eq!(new_quads, quads); |
||||
}); |
@ -1,26 +1,20 @@ |
||||
[package] |
||||
name = "oxigraph-js" |
||||
version.workspace = true |
||||
authors.workspace = true |
||||
license.workspace = true |
||||
name = "oxigraph_js" |
||||
version = "0.3.19" |
||||
authors = ["Tpt <thomas@pellissier-tanon.fr>"] |
||||
license = "MIT OR Apache-2.0" |
||||
readme = "README.md" |
||||
keywords = ["RDF", "N-Triples", "Turtle", "XML", "SPARQL"] |
||||
keywords = ["RDF", "N-Triples", "Turtle", "RDF/XML", "SPARQL"] |
||||
repository = "https://github.com/oxigraph/oxigraph/tree/main/js" |
||||
description = "JavaScript bindings of Oxigraph" |
||||
edition.workspace = true |
||||
rust-version.workspace = true |
||||
publish = false |
||||
edition = "2021" |
||||
|
||||
[lib] |
||||
crate-type = ["cdylib"] |
||||
name = "oxigraph" |
||||
doc = false |
||||
|
||||
[dependencies] |
||||
console_error_panic_hook.workspace = true |
||||
js-sys.workspace = true |
||||
oxigraph = { workspace = true, features = ["js"] } |
||||
wasm-bindgen.workspace = true |
||||
|
||||
[lints] |
||||
workspace = true |
||||
oxigraph = { version = "0.3.19", path="../lib" } |
||||
wasm-bindgen = "0.2" |
||||
js-sys = "0.3" |
||||
console_error_panic_hook = "0.1" |
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,20 +1,22 @@ |
||||
{ |
||||
"name": "oxigraph_tests", |
||||
"description": "Oxigraph JS build and tests", |
||||
"private": true, |
||||
"devDependencies": { |
||||
"@biomejs/biome": "^1.0.0", |
||||
"@rdfjs/data-model": "^2.0.1", |
||||
"mocha": "^10.0.0" |
||||
}, |
||||
"scripts": { |
||||
"fmt": "biome format . --write && biome check . --apply-unsafe && biome format . --write", |
||||
"test": "biome ci . && wasm-pack build --debug --target nodejs --weak-refs --reference-types && mocha", |
||||
"build": "wasm-pack build --release --target web --out-name web --weak-refs --reference-types && wasm-pack build --release --target nodejs --out-name node --weak-refs --reference-types && node build_package.js", |
||||
"release": "npm run build && npm publish ./pkg", |
||||
"pack": "npm run build && npm pack ./pkg" |
||||
}, |
||||
"standard": { |
||||
"ignore": ["pkg*"] |
||||
} |
||||
"name": "oxigraph_tests", |
||||
"description": "Oxigraph JS build and tests", |
||||
"private": true, |
||||
"devDependencies": { |
||||
"@rdfjs/data-model": "^2.0.1", |
||||
"mocha": "^10.0.0", |
||||
"rome": "^12.0.0" |
||||
}, |
||||
"scripts": { |
||||
"fmt": "rome format . --write && rome check . --apply-unsafe", |
||||
"test": "rome ci . && wasm-pack build --debug --target nodejs && mocha", |
||||
"build": "rm -rf pkg && wasm-pack build --release --target web --out-name web && mv pkg pkg-web && wasm-pack build --release --target nodejs --out-name node && mv pkg pkg-node && node build_package.js && rm -r pkg-web && rm -r pkg-node", |
||||
"release": "npm run build && npm publish ./pkg", |
||||
"pack": "npm run build && npm pack ./pkg" |
||||
}, |
||||
"standard": { |
||||
"ignore": [ |
||||
"pkg*" |
||||
] |
||||
} |
||||
} |
||||
|
@ -1,8 +1,7 @@ |
||||
{ |
||||
"$schema": "https://biomejs.dev/schemas/1.0.0/schema.json", |
||||
"formatter": { |
||||
"indentStyle": "space", |
||||
"indentWidth": 4, |
||||
"indentSize": 4, |
||||
"lineWidth": 100 |
||||
}, |
||||
"linter": { |
@ -0,0 +1,63 @@ |
||||
[package] |
||||
name = "oxigraph" |
||||
version = "0.3.19" |
||||
authors = ["Tpt <thomas@pellissier-tanon.fr>"] |
||||
license = "MIT OR Apache-2.0" |
||||
readme = "README.md" |
||||
keywords = ["RDF", "SPARQL", "graph-database", "database"] |
||||
categories = ["database-implementations"] |
||||
repository = "https://github.com/oxigraph/oxigraph/tree/main/lib" |
||||
homepage = "https://oxigraph.org/" |
||||
documentation = "https://docs.rs/oxigraph" |
||||
description = """ |
||||
a SPARQL database and RDF toolkit |
||||
""" |
||||
edition = "2021" |
||||
rust-version = "1.60" |
||||
|
||||
[package.metadata.docs.rs] |
||||
all-features = true |
||||
|
||||
[features] |
||||
default = [] |
||||
http_client = ["oxhttp", "oxhttp/rustls"] |
||||
rocksdb_debug = [] |
||||
|
||||
[dependencies] |
||||
rand = "0.8" |
||||
md-5 = "0.10" |
||||
sha-1 = "0.10" |
||||
sha2 = "0.10" |
||||
digest = "0.10" |
||||
regex = "1" |
||||
oxilangtag = "0.1" |
||||
oxiri = "0.2" |
||||
rio_api = "0.8" |
||||
rio_turtle = "0.8" |
||||
rio_xml = "0.8" |
||||
hex = "0.4" |
||||
siphasher = "0.3" |
||||
lazy_static = "1" |
||||
json-event-parser = "0.1" |
||||
oxrdf = { version = "0.1.7", path="oxrdf", features = ["rdf-star", "oxsdatatypes"] } |
||||
oxsdatatypes = { version = "0.1.3", path="oxsdatatypes" } |
||||
spargebra = { version = "0.2.8", path="spargebra", features = ["rdf-star", "sep-0002", "sep-0006"] } |
||||
sparesults = { version = "0.1.8", path="sparesults", features = ["rdf-star"] } |
||||
|
||||
[target.'cfg(not(target_family = "wasm"))'.dependencies] |
||||
libc = "0.2" |
||||
oxrocksdb-sys = { version = "0.3.19", path="../oxrocksdb-sys" } |
||||
oxhttp = { version = "0.1", optional = true } |
||||
|
||||
[target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies] |
||||
getrandom = { version = "0.2", features = ["js"] } |
||||
js-sys = "0.3" |
||||
|
||||
[target.'cfg(not(target_family = "wasm"))'.dev-dependencies] |
||||
criterion = "0.4" |
||||
oxhttp = "0.1" |
||||
zstd = "0.12" |
||||
|
||||
[[bench]] |
||||
name = "store" |
||||
harness = false |
@ -1,13 +1,72 @@ |
||||
Oxigraph Rust crates |
||||
==================== |
||||
|
||||
Oxigraph is implemented in Rust. |
||||
It is composed on a main library, [`oxigraph`](./oxigraph) and a set of smaller crates used by the `oxigraph` crate: |
||||
* [`oxrdf`](./oxrdf), datastructures encoding RDF basic concepts (the `model` module of the `oxigraph` crate). |
||||
* [`oxrdfio`](./oxrdfio), a unified parser and serializer API for RDF formats (the `io` module of the `oxigraph` crate). It itself relies on: |
||||
* [`oxttl`](./oxttl), N-Triple, N-Quad, Turtle, TriG and N3 parsing and serialization. |
||||
* [`oxrdfxml`](./oxrdfxml), RDF/XML parsing and serialization. |
||||
* [`spargebra`](./spargebra), a SPARQL parser. |
||||
* [`sparesults`](./sparesults), parsers and serializers for SPARQL result formats (the `sparql::results` module of the `oxigraph` crate). |
||||
* [`sparopt`](./sparesults), a SPARQL optimizer. |
||||
* [`oxsdatatypes`](./oxsdatatypes), an implementation of some XML Schema datatypes. |
||||
Oxigraph |
||||
======== |
||||
|
||||
[![Latest Version](https://img.shields.io/crates/v/oxigraph.svg)](https://crates.io/crates/oxigraph) |
||||
[![Released API docs](https://docs.rs/oxigraph/badge.svg)](https://docs.rs/oxigraph) |
||||
[![Crates.io downloads](https://img.shields.io/crates/d/oxigraph)](https://crates.io/crates/oxigraph) |
||||
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) |
||||
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) |
||||
|
||||
Oxigraph is a graph database library implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard. |
||||
|
||||
Its goal is to provide a compliant, safe and fast on-disk graph database. |
||||
It also provides a set of utility functions for reading, writing, and processing RDF files. |
||||
|
||||
Oxigraph is in heavy development and SPARQL query evaluation has not been optimized yet. |
||||
|
||||
Oxigraph also provides [a standalone HTTP server](https://crates.io/crates/oxigraph_server) and [a Python library](https://pyoxigraph.readthedocs.io/) based on this library. |
||||
|
||||
|
||||
Oxigraph implements the following specifications: |
||||
* [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/), [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/), and [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/). |
||||
* [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/), and [RDF XML](https://www.w3.org/TR/rdf-syntax-grammar/) RDF serialization formats for both data ingestion and retrieval using the [Rio library](https://github.com/oxigraph/rio). |
||||
* [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/). |
||||
|
||||
A preliminary benchmark [is provided](../bench/README.md). Oxigraph internal design [is described on the wiki](https://github.com/oxigraph/oxigraph/wiki/Architecture). |
||||
|
||||
The main entry point of Oxigraph is the [`Store`](store::Store) struct: |
||||
```rust |
||||
use oxigraph::store::Store; |
||||
use oxigraph::model::*; |
||||
use oxigraph::sparql::QueryResults; |
||||
|
||||
let store = Store::new().unwrap(); |
||||
|
||||
// insertion |
||||
let ex = NamedNode::new("http://example.com").unwrap(); |
||||
let quad = Quad::new(ex.clone(), ex.clone(), ex.clone(), GraphName::DefaultGraph); |
||||
store.insert(&quad).unwrap(); |
||||
|
||||
// quad filter |
||||
let results = store.quads_for_pattern(Some(ex.as_ref().into()), None, None, None).collect::<Result<Vec<Quad>,_>>().unwrap(); |
||||
assert_eq!(vec![quad], results); |
||||
|
||||
// SPARQL query |
||||
if let QueryResults::Solutions(mut solutions) = store.query("SELECT ?s WHERE { ?s ?p ?o }").unwrap() { |
||||
assert_eq!(solutions.next().unwrap().unwrap().get("s"), Some(&ex.into())); |
||||
} |
||||
``` |
||||
|
||||
Some parts of this library are available as standalone crates: |
||||
* [`oxrdf`](https://crates.io/crates/oxrdf) provides datastructures encoding RDF basic concepts (the `oxigraph::model` module). |
||||
* [`spargebra`](https://crates.io/crates/spargebra) provides a SPARQL parser. |
||||
* [`sparesults`](https://crates.io/crates/sparesults) provides parsers and serializers for SPARQL result formats. |
||||
|
||||
To build the library, don't forget to clone the submodules using `git clone --recursive https://github.com/oxigraph/oxigraph.git` to clone the repository including submodules or `git submodule update --init` to add submodules to the already cloned repository. |
||||
|
||||
|
||||
## License |
||||
|
||||
This project is licensed under either of |
||||
|
||||
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or |
||||
`<http://www.apache.org/licenses/LICENSE-2.0>`) |
||||
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or |
||||
`<http://opensource.org/licenses/MIT>`) |
||||
|
||||
at your option. |
||||
|
||||
|
||||
### Contribution |
||||
|
||||
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. |
||||
|
@ -0,0 +1,265 @@ |
||||
use criterion::{criterion_group, criterion_main, Criterion, Throughput}; |
||||
use oxhttp::model::{Method, Request, Status}; |
||||
use oxigraph::io::GraphFormat; |
||||
use oxigraph::model::GraphNameRef; |
||||
use oxigraph::sparql::{Query, QueryResults, Update}; |
||||
use oxigraph::store::Store; |
||||
use rand::random; |
||||
use std::env::temp_dir; |
||||
use std::fs::{remove_dir_all, File}; |
||||
use std::io::{BufRead, BufReader, Cursor, Read}; |
||||
use std::path::{Path, PathBuf}; |
||||
|
||||
fn store_load(c: &mut Criterion) { |
||||
{ |
||||
let mut data = Vec::new(); |
||||
read_data("explore-1000.nt.zst") |
||||
.read_to_end(&mut data) |
||||
.unwrap(); |
||||
|
||||
let mut group = c.benchmark_group("store load"); |
||||
group.throughput(Throughput::Bytes(data.len() as u64)); |
||||
group.sample_size(10); |
||||
group.bench_function("load BSBM explore 1000 in memory", |b| { |
||||
b.iter(|| { |
||||
let store = Store::new().unwrap(); |
||||
do_load(&store, &data); |
||||
}) |
||||
}); |
||||
group.bench_function("load BSBM explore 1000 in on disk", |b| { |
||||
b.iter(|| { |
||||
let path = TempDir::default(); |
||||
let store = Store::open(&path).unwrap(); |
||||
do_load(&store, &data); |
||||
}) |
||||
}); |
||||
group.bench_function("load BSBM explore 1000 in on disk with bulk load", |b| { |
||||
b.iter(|| { |
||||
let path = TempDir::default(); |
||||
let store = Store::open(&path).unwrap(); |
||||
do_bulk_load(&store, &data); |
||||
}) |
||||
}); |
||||
} |
||||
|
||||
{ |
||||
let mut data = Vec::new(); |
||||
read_data("explore-10000.nt.zst") |
||||
.read_to_end(&mut data) |
||||
.unwrap(); |
||||
|
||||
let mut group = c.benchmark_group("store load large"); |
||||
group.throughput(Throughput::Bytes(data.len() as u64)); |
||||
group.sample_size(10); |
||||
group.bench_function("load BSBM explore 10000 in on disk with bulk load", |b| { |
||||
b.iter(|| { |
||||
let path = TempDir::default(); |
||||
let store = Store::open(&path).unwrap(); |
||||
do_bulk_load(&store, &data); |
||||
}) |
||||
}); |
||||
} |
||||
} |
||||
|
||||
fn do_load(store: &Store, data: &[u8]) { |
||||
store |
||||
.load_graph( |
||||
Cursor::new(&data), |
||||
GraphFormat::NTriples, |
||||
GraphNameRef::DefaultGraph, |
||||
None, |
||||
) |
||||
.unwrap(); |
||||
store.optimize().unwrap(); |
||||
} |
||||
|
||||
fn do_bulk_load(store: &Store, data: &[u8]) { |
||||
store |
||||
.bulk_loader() |
||||
.load_graph( |
||||
Cursor::new(&data), |
||||
GraphFormat::NTriples, |
||||
GraphNameRef::DefaultGraph, |
||||
None, |
||||
) |
||||
.unwrap(); |
||||
store.optimize().unwrap(); |
||||
} |
||||
|
||||
fn store_query_and_update(c: &mut Criterion) { |
||||
let mut data = Vec::new(); |
||||
read_data("explore-1000.nt.zst") |
||||
.read_to_end(&mut data) |
||||
.unwrap(); |
||||
|
||||
let operations = read_data("mix-exploreAndUpdate-1000.tsv.zst") |
||||
.lines() |
||||
.map(|l| { |
||||
let l = l.unwrap(); |
||||
let mut parts = l.trim().split('\t'); |
||||
let kind = parts.next().unwrap(); |
||||
let operation = parts.next().unwrap(); |
||||
match kind { |
||||
"query" => Operation::Query(Query::parse(operation, None).unwrap()), |
||||
"update" => Operation::Update(Update::parse(operation, None).unwrap()), |
||||
_ => panic!("Unexpected operation kind {kind}"), |
||||
} |
||||
}) |
||||
.collect::<Vec<_>>(); |
||||
let query_operations = operations |
||||
.iter() |
||||
.filter(|o| matches!(o, Operation::Query(_))) |
||||
.cloned() |
||||
.collect::<Vec<_>>(); |
||||
|
||||
let mut group = c.benchmark_group("store operations"); |
||||
group.throughput(Throughput::Elements(operations.len() as u64)); |
||||
group.sample_size(10); |
||||
|
||||
{ |
||||
let memory_store = Store::new().unwrap(); |
||||
do_bulk_load(&memory_store, &data); |
||||
group.bench_function("BSBM explore 1000 query in memory", |b| { |
||||
b.iter(|| run_operation(&memory_store, &query_operations)) |
||||
}); |
||||
group.bench_function("BSBM explore 1000 queryAndUpdate in memory", |b| { |
||||
b.iter(|| run_operation(&memory_store, &operations)) |
||||
}); |
||||
} |
||||
|
||||
{ |
||||
let path = TempDir::default(); |
||||
let disk_store = Store::open(&path).unwrap(); |
||||
do_bulk_load(&disk_store, &data); |
||||
group.bench_function("BSBM explore 1000 query on disk", |b| { |
||||
b.iter(|| run_operation(&disk_store, &query_operations)) |
||||
}); |
||||
group.bench_function("BSBM explore 1000 queryAndUpdate on disk", |b| { |
||||
b.iter(|| run_operation(&disk_store, &operations)) |
||||
}); |
||||
} |
||||
} |
||||
|
||||
fn run_operation(store: &Store, operations: &[Operation]) { |
||||
for operation in operations { |
||||
match operation { |
||||
Operation::Query(q) => match store.query(q.clone()).unwrap() { |
||||
QueryResults::Boolean(_) => (), |
||||
QueryResults::Solutions(s) => { |
||||
for s in s { |
||||
s.unwrap(); |
||||
} |
||||
} |
||||
QueryResults::Graph(g) => { |
||||
for t in g { |
||||
t.unwrap(); |
||||
} |
||||
} |
||||
}, |
||||
Operation::Update(u) => store.update(u.clone()).unwrap(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
fn sparql_parsing(c: &mut Criterion) { |
||||
let mut data = Vec::new(); |
||||
read_data("explore-1000.nt.zst") |
||||
.read_to_end(&mut data) |
||||
.unwrap(); |
||||
|
||||
let operations = read_data("mix-exploreAndUpdate-1000.tsv.zst") |
||||
.lines() |
||||
.map(|l| { |
||||
let l = l.unwrap(); |
||||
let mut parts = l.trim().split('\t'); |
||||
let kind = parts.next().unwrap(); |
||||
let operation = parts.next().unwrap(); |
||||
match kind { |
||||
"query" => RawOperation::Query(operation.to_owned()), |
||||
"update" => RawOperation::Update(operation.to_owned()), |
||||
_ => panic!("Unexpected operation kind {kind}"), |
||||
} |
||||
}) |
||||
.collect::<Vec<_>>(); |
||||
|
||||
let mut group = c.benchmark_group("sparql parsing"); |
||||
group.sample_size(10); |
||||
group.throughput(Throughput::Bytes( |
||||
operations |
||||
.iter() |
||||
.map(|o| match o { |
||||
RawOperation::Query(q) => q.len(), |
||||
RawOperation::Update(u) => u.len(), |
||||
}) |
||||
.sum::<usize>() as u64, |
||||
)); |
||||
group.bench_function("BSBM query and update set", |b| { |
||||
b.iter(|| { |
||||
for operation in &operations { |
||||
match operation { |
||||
RawOperation::Query(q) => { |
||||
Query::parse(q, None).unwrap(); |
||||
} |
||||
RawOperation::Update(u) => { |
||||
Update::parse(u, None).unwrap(); |
||||
} |
||||
} |
||||
} |
||||
}) |
||||
}); |
||||
} |
||||
|
||||
criterion_group!(store, sparql_parsing, store_query_and_update, store_load); |
||||
|
||||
criterion_main!(store); |
||||
|
||||
fn read_data(file: &str) -> impl BufRead { |
||||
if !Path::new(file).exists() { |
||||
let mut client = oxhttp::Client::new(); |
||||
client.set_redirection_limit(5); |
||||
let url = format!("https://github.com/Tpt/bsbm-tools/releases/download/v0.2/{file}"); |
||||
let request = Request::builder(Method::GET, url.parse().unwrap()).build(); |
||||
let response = client.request(request).unwrap(); |
||||
assert_eq!( |
||||
response.status(), |
||||
Status::OK, |
||||
"{}", |
||||
response.into_body().to_string().unwrap() |
||||
); |
||||
std::io::copy(&mut response.into_body(), &mut File::create(file).unwrap()).unwrap(); |
||||
} |
||||
BufReader::new(zstd::Decoder::new(File::open(file).unwrap()).unwrap()) |
||||
} |
||||
|
||||
#[derive(Clone)] |
||||
enum RawOperation { |
||||
Query(String), |
||||
Update(String), |
||||
} |
||||
|
||||
#[allow(clippy::large_enum_variant)] |
||||
#[derive(Clone)] |
||||
enum Operation { |
||||
Query(Query), |
||||
Update(Update), |
||||
} |
||||
|
||||
struct TempDir(PathBuf); |
||||
|
||||
impl Default for TempDir { |
||||
fn default() -> Self { |
||||
Self(temp_dir().join(format!("oxigraph-bench-{}", random::<u128>()))) |
||||
} |
||||
} |
||||
|
||||
impl AsRef<Path> for TempDir { |
||||
fn as_ref(&self) -> &Path { |
||||
&self.0 |
||||
} |
||||
} |
||||
|
||||
impl Drop for TempDir { |
||||
fn drop(&mut self) { |
||||
remove_dir_all(&self.0).unwrap() |
||||
} |
||||
} |
@ -1,59 +0,0 @@ |
||||
[package] |
||||
name = "oxigraph" |
||||
version.workspace = true |
||||
authors.workspace = true |
||||
license.workspace = true |
||||
readme = "README.md" |
||||
keywords = ["RDF", "SPARQL", "graph-database", "database"] |
||||
categories = ["database-implementations"] |
||||
repository = "https://github.com/oxigraph/oxigraph/tree/main/lib/oxigraph" |
||||
homepage = "https://oxigraph.org/" |
||||
documentation = "https://docs.rs/oxigraph" |
||||
description = """ |
||||
a SPARQL database and RDF toolkit |
||||
""" |
||||
edition.workspace = true |
||||
rust-version.workspace = true |
||||
|
||||
[features] |
||||
js = ["getrandom/js", "oxsdatatypes/js", "js-sys"] |
||||
|
||||
|
||||
[dependencies] |
||||
digest.workspace = true |
||||
hex.workspace = true |
||||
json-event-parser.workspace = true |
||||
md-5.workspace = true |
||||
oxilangtag.workspace = true |
||||
oxiri.workspace = true |
||||
oxrdf = { workspace = true, features = ["rdf-star", "oxsdatatypes"] } |
||||
oxrdfio = { workspace = true, features = ["rdf-star"] } |
||||
oxsdatatypes.workspace = true |
||||
rand.workspace = true |
||||
regex.workspace = true |
||||
sha1.workspace = true |
||||
sha2.workspace = true |
||||
siphasher.workspace = true |
||||
sparesults = { workspace = true, features = ["rdf-star"] } |
||||
spargebra = { workspace = true, features = ["rdf-star", "sep-0002", "sep-0006"] } |
||||
sparopt = { workspace = true, features = ["rdf-star", "sep-0002", "sep-0006"] } |
||||
thiserror.workspace = true |
||||
|
||||
[target.'cfg(not(target_family = "wasm"))'.dependencies] |
||||
libc = "0.2" |
||||
rocksdb.workspace = true |
||||
|
||||
[target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies] |
||||
getrandom.workspace = true |
||||
js-sys = { workspace = true, optional = true } |
||||
|
||||
[target.'cfg(not(target_family = "wasm"))'.dev-dependencies] |
||||
codspeed-criterion-compat.workspace = true |
||||
zstd.workspace = true |
||||
|
||||
[lints] |
||||
workspace = true |
||||
|
||||
[package.metadata.docs.rs] |
||||
rustdoc-args = ["--cfg", "docsrs"] |
||||
|
@ -1,82 +0,0 @@ |
||||
Oxigraph |
||||
======== |
||||
|
||||
[![Latest Version](https://img.shields.io/crates/v/oxigraph.svg)](https://crates.io/crates/oxigraph) |
||||
[![Released API docs](https://docs.rs/oxigraph/badge.svg)](https://docs.rs/oxigraph) |
||||
[![Crates.io downloads](https://img.shields.io/crates/d/oxigraph)](https://crates.io/crates/oxigraph) |
||||
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) |
||||
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community) |
||||
|
||||
Oxigraph is a graph database library implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard. |
||||
|
||||
Its goal is to provide a compliant, safe and fast on-disk graph database. |
||||
It also provides a set of utility functions for reading, writing, and processing RDF files. |
||||
|
||||
Oxigraph is in heavy development and SPARQL query evaluation has not been optimized yet. |
||||
|
||||
Oxigraph also provides [a CLI tool](https://crates.io/crates/oxigraph-cli) and [a Python library](https://pyoxigraph.readthedocs.io/) based on this library. |
||||
|
||||
|
||||
Oxigraph implements the following specifications: |
||||
* [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/), [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/), and [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/). |
||||
* [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/), and [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) RDF serialization formats for both data ingestion and retrieval. |
||||
* [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/). |
||||
|
||||
A preliminary benchmark [is provided](../bench/README.md). Oxigraph internal design [is described on the wiki](https://github.com/oxigraph/oxigraph/wiki/Architecture). |
||||
|
||||
The main entry point of Oxigraph is the [`Store`](store::Store) struct: |
||||
```rust |
||||
use oxigraph::store::Store; |
||||
use oxigraph::model::*; |
||||
use oxigraph::sparql::QueryResults; |
||||
|
||||
let store = Store::new().unwrap(); |
||||
|
||||
// insertion |
||||
let ex = NamedNode::new("http://example.com").unwrap(); |
||||
let quad = Quad::new(ex.clone(), ex.clone(), ex.clone(), GraphName::DefaultGraph); |
||||
store.insert(&quad).unwrap(); |
||||
|
||||
// quad filter |
||||
let results = store.quads_for_pattern(Some(ex.as_ref().into()), None, None, None).collect::<Result<Vec<Quad>,_>>().unwrap(); |
||||
assert_eq!(vec![quad], results); |
||||
|
||||
// SPARQL query |
||||
if let QueryResults::Solutions(mut solutions) = store.query("SELECT ?s WHERE { ?s ?p ?o }").unwrap() { |
||||
assert_eq!(solutions.next().unwrap().unwrap().get("s"), Some(&ex.into())); |
||||
} |
||||
``` |
||||
|
||||
It is based on these crates that can be used separately: |
||||
* [`oxrdf`](https://crates.io/crates/oxrdf), datastructures encoding RDF basic concepts (the [`oxigraph::model`](crate::model) module). |
||||
* [`oxrdfio`](https://crates.io/crates/oxrdfio), a unified parser and serializer API for RDF formats (the [`oxigraph::io`](crate::io) module). It itself relies on: |
||||
* [`oxttl`](https://crates.io/crates/oxttl), N-Triple, N-Quad, Turtle, TriG and N3 parsing and serialization. |
||||
* [`oxrdfxml`](https://crates.io/crates/oxrdfxml), RDF/XML parsing and serialization. |
||||
* [`spargebra`](https://crates.io/crates/spargebra), a SPARQL parser. |
||||
* [`sparesults`](https://crates.io/crates/sparesults), parsers and serializers for SPARQL result formats (the [`oxigraph::sparql::results`](crate::sparql::results) module). |
||||
* [`sparopt`](https://crates.io/crates/sparesults), a SPARQL optimizer. |
||||
* [`oxsdatatypes`](https://crates.io/crates/oxsdatatypes), an implementation of some XML Schema datatypes. |
||||
|
||||
To build the library locally, don't forget to clone the submodules using `git clone --recursive https://github.com/oxigraph/oxigraph.git` to clone the repository including submodules or `git submodule update --init` to add submodules to the already cloned repository. |
||||
|
||||
It is possible to disable the RocksDB storage backend to only use the in-memory fallback by disabling the `rocksdb` default feature: |
||||
```toml |
||||
oxigraph = { version = "*", default-features = false } |
||||
``` |
||||
This is the default behavior when compiling Oxigraph to WASM. |
||||
|
||||
## License |
||||
|
||||
This project is licensed under either of |
||||
|
||||
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or |
||||
`<http://www.apache.org/licenses/LICENSE-2.0>`) |
||||
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or |
||||
`<http://opensource.org/licenses/MIT>`) |
||||
|
||||
at your option. |
||||
|
||||
|
||||
### Contribution |
||||
|
||||
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. |
@ -1,39 +0,0 @@ |
||||
//! Utilities to read and write RDF graphs and datasets using [OxRDF I/O](https://crates.io/crates/oxrdfio).
|
||||
//!
|
||||
//! The entry points of this module are the two [`RdfParser`] and [`RdfSerializer`] structs.
|
||||
//!
|
||||
//! Usage example converting a Turtle file to a N-Triples file:
|
||||
//! ```
|
||||
//! use oxigraph::io::{RdfFormat, RdfParser, RdfSerializer};
|
||||
//!
|
||||
//! let turtle_file = b"@base <http://example.com/> .
|
||||
//! @prefix schema: <http://schema.org/> .
|
||||
//! <foo> a schema:Person ;
|
||||
//! schema:name \"Foo\" .
|
||||
//! <bar> a schema:Person ;
|
||||
//! schema:name \"Bar\" .";
|
||||
//!
|
||||
//! let ntriples_file = b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||
//! <http://example.com/foo> <http://schema.org/name> \"Foo\" .
|
||||
//! <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||
//! <http://example.com/bar> <http://schema.org/name> \"Bar\" .
|
||||
//! ";
|
||||
//!
|
||||
//! let mut writer = RdfSerializer::from_format(RdfFormat::NTriples).serialize_to_write(Vec::new());
|
||||
//! for quad in RdfParser::from_format(RdfFormat::Turtle).parse_read(turtle_file.as_ref()) {
|
||||
//! writer.write_quad(&quad.unwrap()).unwrap();
|
||||
//! }
|
||||
//! assert_eq!(writer.finish().unwrap(), ntriples_file);
|
||||
//! ```
|
||||
|
||||
mod format; |
||||
pub mod read; |
||||
pub mod write; |
||||
|
||||
#[allow(deprecated)] |
||||
pub use self::format::{DatasetFormat, GraphFormat}; |
||||
#[allow(deprecated)] |
||||
pub use self::read::{DatasetParser, GraphParser}; |
||||
#[allow(deprecated)] |
||||
pub use self::write::{DatasetSerializer, GraphSerializer}; |
||||
pub use oxrdfio::*; |
@ -1,199 +0,0 @@ |
||||
#![allow(deprecated)] |
||||
|
||||
//! Utilities to read RDF graphs and datasets.
|
||||
|
||||
use crate::io::{DatasetFormat, GraphFormat}; |
||||
use crate::model::*; |
||||
use oxrdfio::{FromReadQuadReader, RdfParseError, RdfParser}; |
||||
use std::io::Read; |
||||
|
||||
/// Parsers for RDF graph serialization formats.
|
||||
///
|
||||
/// It currently supports the following formats:
|
||||
/// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`GraphFormat::NTriples`])
|
||||
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`GraphFormat::Turtle`])
|
||||
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`GraphFormat::RdfXml`])
|
||||
///
|
||||
/// ```
|
||||
/// use oxigraph::io::{GraphFormat, GraphParser};
|
||||
///
|
||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
||||
///
|
||||
/// let parser = GraphParser::from_format(GraphFormat::NTriples);
|
||||
/// let triples = parser
|
||||
/// .read_triples(file.as_bytes())
|
||||
/// .collect::<Result<Vec<_>, _>>()?;
|
||||
///
|
||||
/// assert_eq!(triples.len(), 1);
|
||||
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
|
||||
/// # std::io::Result::Ok(())
|
||||
/// ```
|
||||
#[deprecated(note = "use RdfParser instead", since = "0.4.0")] |
||||
pub struct GraphParser { |
||||
inner: RdfParser, |
||||
} |
||||
|
||||
impl GraphParser { |
||||
/// Builds a parser for the given format.
|
||||
#[inline] |
||||
pub fn from_format(format: GraphFormat) -> Self { |
||||
Self { |
||||
inner: RdfParser::from_format(format.into()) |
||||
.without_named_graphs() |
||||
.rename_blank_nodes(), |
||||
} |
||||
} |
||||
|
||||
/// Provides an IRI that could be used to resolve the file relative IRIs.
|
||||
///
|
||||
/// ```
|
||||
/// use oxigraph::io::{GraphFormat, GraphParser};
|
||||
///
|
||||
/// let file = "</s> </p> </o> .";
|
||||
///
|
||||
/// let parser =
|
||||
/// GraphParser::from_format(GraphFormat::Turtle).with_base_iri("http://example.com")?;
|
||||
/// let triples = parser
|
||||
/// .read_triples(file.as_bytes())
|
||||
/// .collect::<Result<Vec<_>, _>>()?;
|
||||
///
|
||||
/// assert_eq!(triples.len(), 1);
|
||||
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn with_base_iri(self, base_iri: impl Into<String>) -> Result<Self, IriParseError> { |
||||
Ok(Self { |
||||
inner: self.inner.with_base_iri(base_iri)?, |
||||
}) |
||||
} |
||||
|
||||
/// Executes the parsing itself on a [`Read`] implementation and returns an iterator of triples.
|
||||
pub fn read_triples<R: Read>(self, reader: R) -> TripleReader<R> { |
||||
TripleReader { |
||||
parser: self.inner.parse_read(reader), |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// An iterator yielding read triples.
|
||||
/// Could be built using a [`GraphParser`].
|
||||
///
|
||||
/// ```
|
||||
/// use oxigraph::io::{GraphFormat, GraphParser};
|
||||
///
|
||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
||||
///
|
||||
/// let parser = GraphParser::from_format(GraphFormat::NTriples);
|
||||
/// let triples = parser
|
||||
/// .read_triples(file.as_bytes())
|
||||
/// .collect::<Result<Vec<_>, _>>()?;
|
||||
///
|
||||
/// assert_eq!(triples.len(), 1);
|
||||
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
|
||||
/// # std::io::Result::Ok(())
|
||||
/// ```
|
||||
#[must_use] |
||||
pub struct TripleReader<R: Read> { |
||||
parser: FromReadQuadReader<R>, |
||||
} |
||||
|
||||
impl<R: Read> Iterator for TripleReader<R> { |
||||
type Item = Result<Triple, RdfParseError>; |
||||
|
||||
fn next(&mut self) -> Option<Self::Item> { |
||||
Some(self.parser.next()?.map(Into::into).map_err(Into::into)) |
||||
} |
||||
} |
||||
|
||||
/// A parser for RDF dataset serialization formats.
|
||||
///
|
||||
/// It currently supports the following formats:
|
||||
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`DatasetFormat::NQuads`])
|
||||
/// * [TriG](https://www.w3.org/TR/trig/) ([`DatasetFormat::TriG`])
|
||||
///
|
||||
/// ```
|
||||
/// use oxigraph::io::{DatasetFormat, DatasetParser};
|
||||
///
|
||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .";
|
||||
///
|
||||
/// let parser = DatasetParser::from_format(DatasetFormat::NQuads);
|
||||
/// let quads = parser.read_quads(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
|
||||
///
|
||||
/// assert_eq!(quads.len(), 1);
|
||||
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
|
||||
/// # std::io::Result::Ok(())
|
||||
/// ```
|
||||
#[deprecated(note = "use RdfParser instead", since = "0.4.0")] |
||||
pub struct DatasetParser { |
||||
inner: RdfParser, |
||||
} |
||||
|
||||
impl DatasetParser { |
||||
/// Builds a parser for the given format.
|
||||
#[inline] |
||||
pub fn from_format(format: DatasetFormat) -> Self { |
||||
Self { |
||||
inner: RdfParser::from_format(format.into()).rename_blank_nodes(), |
||||
} |
||||
} |
||||
|
||||
/// Provides an IRI that could be used to resolve the file relative IRIs.
|
||||
///
|
||||
/// ```
|
||||
/// use oxigraph::io::{DatasetFormat, DatasetParser};
|
||||
///
|
||||
/// let file = "<g> { </s> </p> </o> }";
|
||||
///
|
||||
/// let parser =
|
||||
/// DatasetParser::from_format(DatasetFormat::TriG).with_base_iri("http://example.com")?;
|
||||
/// let triples = parser
|
||||
/// .read_quads(file.as_bytes())
|
||||
/// .collect::<Result<Vec<_>, _>>()?;
|
||||
///
|
||||
/// assert_eq!(triples.len(), 1);
|
||||
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn with_base_iri(self, base_iri: impl Into<String>) -> Result<Self, IriParseError> { |
||||
Ok(Self { |
||||
inner: self.inner.with_base_iri(base_iri)?, |
||||
}) |
||||
} |
||||
|
||||
/// Executes the parsing itself on a [`Read`] implementation and returns an iterator of quads.
|
||||
pub fn read_quads<R: Read>(self, reader: R) -> QuadReader<R> { |
||||
QuadReader { |
||||
parser: self.inner.parse_read(reader), |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// An iterator yielding read quads.
|
||||
/// Could be built using a [`DatasetParser`].
|
||||
///
|
||||
/// ```
|
||||
/// use oxigraph::io::{DatasetFormat, DatasetParser};
|
||||
///
|
||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .";
|
||||
///
|
||||
/// let parser = DatasetParser::from_format(DatasetFormat::NQuads);
|
||||
/// let quads = parser.read_quads(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
|
||||
///
|
||||
/// assert_eq!(quads.len(), 1);
|
||||
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
|
||||
/// # std::io::Result::Ok(())
|
||||
/// ```
|
||||
#[must_use] |
||||
pub struct QuadReader<R: Read> { |
||||
parser: FromReadQuadReader<R>, |
||||
} |
||||
|
||||
impl<R: Read> Iterator for QuadReader<R> { |
||||
type Item = Result<Quad, RdfParseError>; |
||||
|
||||
fn next(&mut self) -> Option<Self::Item> { |
||||
Some(self.parser.next()?.map_err(Into::into)) |
||||
} |
||||
} |
@ -1,185 +0,0 @@ |
||||
#![allow(deprecated)] |
||||
|
||||
//! Utilities to write RDF graphs and datasets.
|
||||
|
||||
use crate::io::{DatasetFormat, GraphFormat}; |
||||
use crate::model::*; |
||||
use oxrdfio::{RdfSerializer, ToWriteQuadWriter}; |
||||
use std::io::{self, Write}; |
||||
|
||||
/// A serializer for RDF graph serialization formats.
|
||||
///
|
||||
/// It currently supports the following formats:
|
||||
/// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`GraphFormat::NTriples`])
|
||||
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`GraphFormat::Turtle`])
|
||||
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`GraphFormat::RdfXml`])
|
||||
///
|
||||
/// ```
|
||||
/// use oxigraph::io::{GraphFormat, GraphSerializer};
|
||||
/// use oxigraph::model::*;
|
||||
///
|
||||
/// let mut buffer = Vec::new();
|
||||
/// let mut writer = GraphSerializer::from_format(GraphFormat::NTriples).triple_writer(&mut buffer);
|
||||
/// writer.write(&Triple {
|
||||
/// subject: NamedNode::new("http://example.com/s")?.into(),
|
||||
/// predicate: NamedNode::new("http://example.com/p")?,
|
||||
/// object: NamedNode::new("http://example.com/o")?.into(),
|
||||
/// })?;
|
||||
/// writer.finish()?;
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// buffer.as_slice(),
|
||||
/// "<http://example.com/s> <http://example.com/p> <http://example.com/o> .\n".as_bytes()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[deprecated(note = "use RdfSerializer instead", since = "0.4.0")] |
||||
pub struct GraphSerializer { |
||||
inner: RdfSerializer, |
||||
} |
||||
|
||||
impl GraphSerializer { |
||||
/// Builds a serializer for the given format
|
||||
#[inline] |
||||
pub fn from_format(format: GraphFormat) -> Self { |
||||
Self { |
||||
inner: RdfSerializer::from_format(format.into()), |
||||
} |
||||
} |
||||
|
||||
/// Returns a [`TripleWriter`] allowing writing triples into the given [`Write`] implementation
|
||||
pub fn triple_writer<W: Write>(self, write: W) -> TripleWriter<W> { |
||||
TripleWriter { |
||||
writer: self.inner.serialize_to_write(write), |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// Allows writing triples.
|
||||
/// Could be built using a [`GraphSerializer`].
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// Do not forget to run the [`finish`](TripleWriter::finish()) method to properly write the last bytes of the file.</div>
|
||||
///
|
||||
/// ```
|
||||
/// use oxigraph::io::{GraphFormat, GraphSerializer};
|
||||
/// use oxigraph::model::*;
|
||||
///
|
||||
/// let mut buffer = Vec::new();
|
||||
/// let mut writer = GraphSerializer::from_format(GraphFormat::NTriples).triple_writer(&mut buffer);
|
||||
/// writer.write(&Triple {
|
||||
/// subject: NamedNode::new("http://example.com/s")?.into(),
|
||||
/// predicate: NamedNode::new("http://example.com/p")?,
|
||||
/// object: NamedNode::new("http://example.com/o")?.into(),
|
||||
/// })?;
|
||||
/// writer.finish()?;
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// buffer.as_slice(),
|
||||
/// "<http://example.com/s> <http://example.com/p> <http://example.com/o> .\n".as_bytes()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[must_use] |
||||
pub struct TripleWriter<W: Write> { |
||||
writer: ToWriteQuadWriter<W>, |
||||
} |
||||
|
||||
impl<W: Write> TripleWriter<W> { |
||||
/// Writes a triple
|
||||
pub fn write<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> io::Result<()> { |
||||
self.writer.write_triple(triple) |
||||
} |
||||
|
||||
/// Writes the last bytes of the file
|
||||
pub fn finish(self) -> io::Result<()> { |
||||
self.writer.finish()?.flush() |
||||
} |
||||
} |
||||
|
||||
/// A serializer for RDF graph serialization formats.
|
||||
///
|
||||
/// It currently supports the following formats:
|
||||
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`DatasetFormat::NQuads`])
|
||||
/// * [TriG](https://www.w3.org/TR/trig/) ([`DatasetFormat::TriG`])
|
||||
///
|
||||
/// ```
|
||||
/// use oxigraph::io::{DatasetFormat, DatasetSerializer};
|
||||
/// use oxigraph::model::*;
|
||||
///
|
||||
/// let mut buffer = Vec::new();
|
||||
/// let mut writer = DatasetSerializer::from_format(DatasetFormat::NQuads).quad_writer(&mut buffer);
|
||||
/// writer.write(&Quad {
|
||||
/// subject: NamedNode::new("http://example.com/s")?.into(),
|
||||
/// predicate: NamedNode::new("http://example.com/p")?,
|
||||
/// object: NamedNode::new("http://example.com/o")?.into(),
|
||||
/// graph_name: NamedNode::new("http://example.com/g")?.into(),
|
||||
/// })?;
|
||||
/// writer.finish()?;
|
||||
///
|
||||
/// assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n".as_bytes());
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[deprecated(note = "use RdfSerializer instead", since = "0.4.0")] |
||||
pub struct DatasetSerializer { |
||||
inner: RdfSerializer, |
||||
} |
||||
|
||||
impl DatasetSerializer { |
||||
/// Builds a serializer for the given format
|
||||
#[inline] |
||||
pub fn from_format(format: DatasetFormat) -> Self { |
||||
Self { |
||||
inner: RdfSerializer::from_format(format.into()), |
||||
} |
||||
} |
||||
|
||||
/// Returns a [`QuadWriter`] allowing writing triples into the given [`Write`] implementation
|
||||
pub fn quad_writer<W: Write>(self, write: W) -> QuadWriter<W> { |
||||
QuadWriter { |
||||
writer: self.inner.serialize_to_write(write), |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// Allows writing triples.
|
||||
/// Could be built using a [`DatasetSerializer`].
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// Do not forget to run the [`finish`](QuadWriter::finish()) method to properly write the last bytes of the file.</div>
|
||||
///
|
||||
/// ```
|
||||
/// use oxigraph::io::{DatasetFormat, DatasetSerializer};
|
||||
/// use oxigraph::model::*;
|
||||
///
|
||||
/// let mut buffer = Vec::new();
|
||||
/// let mut writer = DatasetSerializer::from_format(DatasetFormat::NQuads).quad_writer(&mut buffer);
|
||||
/// writer.write(&Quad {
|
||||
/// subject: NamedNode::new("http://example.com/s")?.into(),
|
||||
/// predicate: NamedNode::new("http://example.com/p")?,
|
||||
/// object: NamedNode::new("http://example.com/o")?.into(),
|
||||
/// graph_name: NamedNode::new("http://example.com/g")?.into(),
|
||||
/// })?;
|
||||
/// writer.finish()?;
|
||||
///
|
||||
/// assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n".as_bytes());
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[must_use] |
||||
pub struct QuadWriter<W: Write> { |
||||
writer: ToWriteQuadWriter<W>, |
||||
} |
||||
|
||||
impl<W: Write> QuadWriter<W> { |
||||
/// Writes a quad
|
||||
pub fn write<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> io::Result<()> { |
||||
self.writer.write_quad(quad) |
||||
} |
||||
|
||||
/// Writes the last bytes of the file
|
||||
pub fn finish(self) -> io::Result<()> { |
||||
self.writer.finish()?.flush() |
||||
} |
||||
} |
@ -1,12 +0,0 @@ |
||||
#![doc = include_str!("../README.md")] |
||||
#![doc(test(attr(deny(warnings))))] |
||||
#![doc(test(attr(allow(deprecated))))] |
||||
#![cfg_attr(docsrs, feature(doc_auto_cfg))] |
||||
#![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] |
||||
#![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] |
||||
|
||||
pub mod io; |
||||
pub mod model; |
||||
pub mod sparql; |
||||
mod storage; |
||||
pub mod store; |
@ -1,22 +0,0 @@ |
||||
//! Implements data structures for [RDF 1.1 Concepts](https://www.w3.org/TR/rdf11-concepts/) using [OxRDF](https://crates.io/crates/oxrdf).
|
||||
//!
|
||||
//! Usage example:
|
||||
//!
|
||||
//! ```
|
||||
//! use oxigraph::model::*;
|
||||
//!
|
||||
//! let mut graph = Graph::default();
|
||||
//!
|
||||
//! // insertion
|
||||
//! let ex = NamedNodeRef::new("http://example.com").unwrap();
|
||||
//! let triple = TripleRef::new(ex, ex, ex);
|
||||
//! graph.insert(triple);
|
||||
//!
|
||||
//! // simple filter
|
||||
//! let results: Vec<_> = graph.triples_for_subject(ex).collect();
|
||||
//! assert_eq!(vec![triple], results);
|
||||
//! ```
|
||||
|
||||
pub use oxrdf::*; |
||||
|
||||
pub use spargebra::term::GroundQuad; |
@ -1,84 +0,0 @@ |
||||
use crate::io::RdfParseError; |
||||
use crate::model::NamedNode; |
||||
use crate::sparql::results::QueryResultsParseError as ResultsParseError; |
||||
use crate::sparql::SparqlSyntaxError; |
||||
use crate::storage::StorageError; |
||||
use std::convert::Infallible; |
||||
use std::error::Error; |
||||
use std::io; |
||||
|
||||
/// A SPARQL evaluation error.
|
||||
#[derive(Debug, thiserror::Error)] |
||||
#[non_exhaustive] |
||||
pub enum EvaluationError { |
||||
/// An error in SPARQL parsing.
|
||||
#[error(transparent)] |
||||
Parsing(#[from] SparqlSyntaxError), |
||||
/// An error from the storage.
|
||||
#[error(transparent)] |
||||
Storage(#[from] StorageError), |
||||
/// An error while parsing an external RDF file.
|
||||
#[error(transparent)] |
||||
GraphParsing(#[from] RdfParseError), |
||||
/// An error while parsing an external result file (likely from a federated query).
|
||||
#[error(transparent)] |
||||
ResultsParsing(#[from] ResultsParseError), |
||||
/// An error returned during results serialization.
|
||||
#[error(transparent)] |
||||
ResultsSerialization(#[from] io::Error), |
||||
/// Error during `SERVICE` evaluation
|
||||
#[error("{0}")] |
||||
Service(#[source] Box<dyn Error + Send + Sync + 'static>), |
||||
/// Error when `CREATE` tries to create an already existing graph
|
||||
#[error("The graph {0} already exists")] |
||||
GraphAlreadyExists(NamedNode), |
||||
/// Error when `DROP` or `CLEAR` tries to remove a not existing graph
|
||||
#[error("The graph {0} does not exist")] |
||||
GraphDoesNotExist(NamedNode), |
||||
/// The variable storing the `SERVICE` name is unbound
|
||||
#[error("The variable encoding the service name is unbound")] |
||||
UnboundService, |
||||
/// The given `SERVICE` is not supported
|
||||
#[error("The service {0} is not supported")] |
||||
UnsupportedService(NamedNode), |
||||
/// The given content media type returned from an HTTP response is not supported (`SERVICE` and `LOAD`)
|
||||
#[error("The content media type {0} is not supported")] |
||||
UnsupportedContentType(String), |
||||
/// The `SERVICE` call has not returns solutions
|
||||
#[error("The service is not returning solutions but a boolean or a graph")] |
||||
ServiceDoesNotReturnSolutions, |
||||
/// The results are not a RDF graph
|
||||
#[error("The query results are not a RDF graph")] |
||||
NotAGraph, |
||||
} |
||||
|
||||
impl From<Infallible> for EvaluationError { |
||||
#[inline] |
||||
fn from(error: Infallible) -> Self { |
||||
match error {} |
||||
} |
||||
} |
||||
|
||||
impl From<EvaluationError> for io::Error { |
||||
#[inline] |
||||
fn from(error: EvaluationError) -> Self { |
||||
match error { |
||||
EvaluationError::Parsing(error) => Self::new(io::ErrorKind::InvalidData, error), |
||||
EvaluationError::GraphParsing(error) => error.into(), |
||||
EvaluationError::ResultsParsing(error) => error.into(), |
||||
EvaluationError::ResultsSerialization(error) => error, |
||||
EvaluationError::Storage(error) => error.into(), |
||||
EvaluationError::Service(error) => match error.downcast() { |
||||
Ok(error) => *error, |
||||
Err(error) => Self::new(io::ErrorKind::Other, error), |
||||
}, |
||||
EvaluationError::GraphAlreadyExists(_) |
||||
| EvaluationError::GraphDoesNotExist(_) |
||||
| EvaluationError::UnboundService |
||||
| EvaluationError::UnsupportedService(_) |
||||
| EvaluationError::UnsupportedContentType(_) |
||||
| EvaluationError::ServiceDoesNotReturnSolutions |
||||
| EvaluationError::NotAGraph => Self::new(io::ErrorKind::InvalidInput, error), |
||||
} |
||||
} |
||||
} |
@ -1,9 +0,0 @@ |
||||
#[cfg(not(feature = "http-client"))] |
||||
mod dummy; |
||||
#[cfg(feature = "http-client")] |
||||
mod simple; |
||||
|
||||
#[cfg(not(feature = "http-client"))] |
||||
pub use dummy::Client; |
||||
#[cfg(feature = "http-client")] |
||||
pub use simple::Client; |
@ -1,371 +0,0 @@ |
||||
use crate::io::{RdfFormat, RdfSerializer}; |
||||
use crate::model::*; |
||||
use crate::sparql::error::EvaluationError; |
||||
use crate::sparql::results::{ |
||||
FromReadQueryResultsReader, FromReadSolutionsReader, QueryResultsFormat, |
||||
QueryResultsParseError, QueryResultsParser, QueryResultsSerializer, |
||||
}; |
||||
pub use sparesults::QuerySolution; |
||||
use std::io::{Read, Write}; |
||||
use std::sync::Arc; |
||||
|
||||
/// Results of a [SPARQL query](https://www.w3.org/TR/sparql11-query/).
|
||||
pub enum QueryResults { |
||||
/// Results of a [SELECT](https://www.w3.org/TR/sparql11-query/#select) query.
|
||||
Solutions(QuerySolutionIter), |
||||
/// Result of a [ASK](https://www.w3.org/TR/sparql11-query/#ask) query.
|
||||
Boolean(bool), |
||||
/// Results of a [CONSTRUCT](https://www.w3.org/TR/sparql11-query/#construct) or [DESCRIBE](https://www.w3.org/TR/sparql11-query/#describe) query.
|
||||
Graph(QueryTripleIter), |
||||
} |
||||
|
||||
impl QueryResults { |
||||
/// Reads a SPARQL query results serialization.
|
||||
pub fn read( |
||||
read: impl Read + 'static, |
||||
format: QueryResultsFormat, |
||||
) -> Result<Self, QueryResultsParseError> { |
||||
Ok(QueryResultsParser::from_format(format) |
||||
.parse_read(read)? |
||||
.into()) |
||||
} |
||||
|
||||
/// Writes the query results (solutions or boolean).
|
||||
///
|
||||
/// This method fails if it is called on the `Graph` results.
|
||||
///
|
||||
/// ```
|
||||
/// use oxigraph::store::Store;
|
||||
/// use oxigraph::model::*;
|
||||
/// use oxigraph::sparql::results::QueryResultsFormat;
|
||||
///
|
||||
/// let store = Store::new()?;
|
||||
/// let ex = NamedNodeRef::new("http://example.com")?;
|
||||
/// store.insert(QuadRef::new(ex, ex, ex, GraphNameRef::DefaultGraph))?;
|
||||
///
|
||||
/// let results = store.query("SELECT ?s WHERE { ?s ?p ?o }")?;
|
||||
/// assert_eq!(
|
||||
/// results.write(Vec::new(), QueryResultsFormat::Json)?,
|
||||
/// r#"{"head":{"vars":["s"]},"results":{"bindings":[{"s":{"type":"uri","value":"http://example.com"}}]}}"#.as_bytes()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub fn write<W: Write>( |
||||
self, |
||||
write: W, |
||||
format: QueryResultsFormat, |
||||
) -> Result<W, EvaluationError> { |
||||
let serializer = QueryResultsSerializer::from_format(format); |
||||
match self { |
||||
Self::Boolean(value) => serializer.serialize_boolean_to_write(write, value), |
||||
Self::Solutions(solutions) => { |
||||
let mut writer = serializer |
||||
.serialize_solutions_to_write(write, solutions.variables().to_vec()) |
||||
.map_err(EvaluationError::ResultsSerialization)?; |
||||
for solution in solutions { |
||||
writer |
||||
.write(&solution?) |
||||
.map_err(EvaluationError::ResultsSerialization)?; |
||||
} |
||||
writer.finish() |
||||
} |
||||
Self::Graph(triples) => { |
||||
let s = VariableRef::new_unchecked("subject"); |
||||
let p = VariableRef::new_unchecked("predicate"); |
||||
let o = VariableRef::new_unchecked("object"); |
||||
let mut writer = serializer |
||||
.serialize_solutions_to_write( |
||||
write, |
||||
vec![s.into_owned(), p.into_owned(), o.into_owned()], |
||||
) |
||||
.map_err(EvaluationError::ResultsSerialization)?; |
||||
for triple in triples { |
||||
let triple = triple?; |
||||
writer |
||||
.write([ |
||||
(s, &triple.subject.into()), |
||||
(p, &triple.predicate.into()), |
||||
(o, &triple.object), |
||||
]) |
||||
.map_err(EvaluationError::ResultsSerialization)?; |
||||
} |
||||
writer.finish() |
||||
} |
||||
} |
||||
.map_err(EvaluationError::ResultsSerialization) |
||||
} |
||||
|
||||
/// Writes the graph query results.
|
||||
///
|
||||
/// This method fails if it is called on the `Solution` or `Boolean` results.
|
||||
///
|
||||
/// ```
|
||||
/// use oxigraph::io::RdfFormat;
|
||||
/// use oxigraph::model::*;
|
||||
/// use oxigraph::store::Store;
|
||||
///
|
||||
/// let graph = "<http://example.com> <http://example.com> <http://example.com> .\n";
|
||||
///
|
||||
/// let store = Store::new()?;
|
||||
/// store.load_graph(
|
||||
/// graph.as_bytes(),
|
||||
/// RdfFormat::NTriples,
|
||||
/// GraphName::DefaultGraph,
|
||||
/// None,
|
||||
/// )?;
|
||||
///
|
||||
/// let results = store.query("CONSTRUCT WHERE { ?s ?p ?o }")?;
|
||||
/// assert_eq!(
|
||||
/// results.write_graph(Vec::new(), RdfFormat::NTriples)?,
|
||||
/// graph.as_bytes()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub fn write_graph<W: Write>( |
||||
self, |
||||
write: W, |
||||
format: impl Into<RdfFormat>, |
||||
) -> Result<W, EvaluationError> { |
||||
if let Self::Graph(triples) = self { |
||||
let mut writer = RdfSerializer::from_format(format.into()).serialize_to_write(write); |
||||
for triple in triples { |
||||
writer |
||||
.write_triple(&triple?) |
||||
.map_err(EvaluationError::ResultsSerialization)?; |
||||
} |
||||
writer |
||||
.finish() |
||||
.map_err(EvaluationError::ResultsSerialization) |
||||
} else { |
||||
Err(EvaluationError::NotAGraph) |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<QuerySolutionIter> for QueryResults { |
||||
#[inline] |
||||
fn from(value: QuerySolutionIter) -> Self { |
||||
Self::Solutions(value) |
||||
} |
||||
} |
||||
|
||||
impl<R: Read + 'static> From<FromReadQueryResultsReader<R>> for QueryResults { |
||||
fn from(reader: FromReadQueryResultsReader<R>) -> Self { |
||||
match reader { |
||||
FromReadQueryResultsReader::Solutions(s) => Self::Solutions(s.into()), |
||||
FromReadQueryResultsReader::Boolean(v) => Self::Boolean(v), |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// An iterator over [`QuerySolution`]s.
|
||||
///
|
||||
/// ```
|
||||
/// use oxigraph::sparql::QueryResults;
|
||||
/// use oxigraph::store::Store;
|
||||
///
|
||||
/// let store = Store::new()?;
|
||||
/// if let QueryResults::Solutions(solutions) = store.query("SELECT ?s WHERE { ?s ?p ?o }")? {
|
||||
/// for solution in solutions {
|
||||
/// println!("{:?}", solution?.get("s"));
|
||||
/// }
|
||||
/// }
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub struct QuerySolutionIter { |
||||
variables: Arc<[Variable]>, |
||||
iter: Box<dyn Iterator<Item = Result<QuerySolution, EvaluationError>>>, |
||||
} |
||||
|
||||
impl QuerySolutionIter { |
||||
/// Construct a new iterator of solution from an ordered list of solution variables and an iterator of solution tuples
|
||||
/// (each tuple using the same ordering as the variable list such that tuple element 0 is the value for the variable 0...)
|
||||
pub fn new( |
||||
variables: Arc<[Variable]>, |
||||
iter: impl Iterator<Item = Result<Vec<Option<Term>>, EvaluationError>> + 'static, |
||||
) -> Self { |
||||
Self { |
||||
variables: Arc::clone(&variables), |
||||
iter: Box::new( |
||||
iter.map(move |t| t.map(|values| (Arc::clone(&variables), values).into())), |
||||
), |
||||
} |
||||
} |
||||
|
||||
/// The variables used in the solutions.
|
||||
///
|
||||
/// ```
|
||||
/// use oxigraph::sparql::{QueryResults, Variable};
|
||||
/// use oxigraph::store::Store;
|
||||
///
|
||||
/// let store = Store::new()?;
|
||||
/// if let QueryResults::Solutions(solutions) = store.query("SELECT ?s ?o WHERE { ?s ?p ?o }")? {
|
||||
/// assert_eq!(
|
||||
/// solutions.variables(),
|
||||
/// &[Variable::new("s")?, Variable::new("o")?]
|
||||
/// );
|
||||
/// }
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn variables(&self) -> &[Variable] { |
||||
&self.variables |
||||
} |
||||
} |
||||
|
||||
impl<R: Read + 'static> From<FromReadSolutionsReader<R>> for QuerySolutionIter { |
||||
fn from(reader: FromReadSolutionsReader<R>) -> Self { |
||||
Self { |
||||
variables: reader.variables().into(), |
||||
iter: Box::new(reader.map(|t| t.map_err(EvaluationError::from))), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl Iterator for QuerySolutionIter { |
||||
type Item = Result<QuerySolution, EvaluationError>; |
||||
|
||||
#[inline] |
||||
fn next(&mut self) -> Option<Self::Item> { |
||||
self.iter.next() |
||||
} |
||||
|
||||
#[inline] |
||||
fn size_hint(&self) -> (usize, Option<usize>) { |
||||
self.iter.size_hint() |
||||
} |
||||
} |
||||
|
||||
/// An iterator over the triples that compose a graph solution.
|
||||
///
|
||||
/// ```
|
||||
/// use oxigraph::sparql::QueryResults;
|
||||
/// use oxigraph::store::Store;
|
||||
///
|
||||
/// let store = Store::new()?;
|
||||
/// if let QueryResults::Graph(triples) = store.query("CONSTRUCT WHERE { ?s ?p ?o }")? {
|
||||
/// for triple in triples {
|
||||
/// println!("{}", triple?);
|
||||
/// }
|
||||
/// }
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub struct QueryTripleIter { |
||||
pub(crate) iter: Box<dyn Iterator<Item = Result<Triple, EvaluationError>>>, |
||||
} |
||||
|
||||
impl Iterator for QueryTripleIter { |
||||
type Item = Result<Triple, EvaluationError>; |
||||
|
||||
#[inline] |
||||
fn next(&mut self) -> Option<Self::Item> { |
||||
self.iter.next() |
||||
} |
||||
|
||||
#[inline] |
||||
fn size_hint(&self) -> (usize, Option<usize>) { |
||||
self.iter.size_hint() |
||||
} |
||||
|
||||
#[inline] |
||||
fn fold<Acc, G>(self, init: Acc, g: G) -> Acc |
||||
where |
||||
G: FnMut(Acc, Self::Item) -> Acc, |
||||
{ |
||||
self.iter.fold(init, g) |
||||
} |
||||
} |
||||
|
||||
#[cfg(test)] |
||||
#[allow(clippy::panic_in_result_fn)] |
||||
mod tests { |
||||
use super::*; |
||||
use std::io::Cursor; |
||||
|
||||
#[test] |
||||
fn test_serialization_roundtrip() -> Result<(), EvaluationError> { |
||||
use std::str; |
||||
|
||||
for format in [ |
||||
QueryResultsFormat::Json, |
||||
QueryResultsFormat::Xml, |
||||
QueryResultsFormat::Tsv, |
||||
] { |
||||
let results = vec![ |
||||
QueryResults::Boolean(true), |
||||
QueryResults::Boolean(false), |
||||
QueryResults::Solutions(QuerySolutionIter::new( |
||||
[ |
||||
Variable::new_unchecked("foo"), |
||||
Variable::new_unchecked("bar"), |
||||
] |
||||
.as_ref() |
||||
.into(), |
||||
Box::new( |
||||
vec![ |
||||
Ok(vec![None, None]), |
||||
Ok(vec![ |
||||
Some(NamedNode::new_unchecked("http://example.com").into()), |
||||
None, |
||||
]), |
||||
Ok(vec![ |
||||
None, |
||||
Some(NamedNode::new_unchecked("http://example.com").into()), |
||||
]), |
||||
Ok(vec![ |
||||
Some(BlankNode::new_unchecked("foo").into()), |
||||
Some(BlankNode::new_unchecked("bar").into()), |
||||
]), |
||||
Ok(vec![Some(Literal::new_simple_literal("foo").into()), None]), |
||||
Ok(vec![ |
||||
Some( |
||||
Literal::new_language_tagged_literal_unchecked("foo", "fr") |
||||
.into(), |
||||
), |
||||
None, |
||||
]), |
||||
Ok(vec![ |
||||
Some(Literal::from(1).into()), |
||||
Some(Literal::from(true).into()), |
||||
]), |
||||
Ok(vec![ |
||||
Some(Literal::from(1.33).into()), |
||||
Some(Literal::from(false).into()), |
||||
]), |
||||
Ok(vec![ |
||||
Some( |
||||
Triple::new( |
||||
NamedNode::new_unchecked("http://example.com/s"), |
||||
NamedNode::new_unchecked("http://example.com/p"), |
||||
Triple::new( |
||||
NamedNode::new_unchecked("http://example.com/os"), |
||||
NamedNode::new_unchecked("http://example.com/op"), |
||||
NamedNode::new_unchecked("http://example.com/oo"), |
||||
), |
||||
) |
||||
.into(), |
||||
), |
||||
None, |
||||
]), |
||||
] |
||||
.into_iter(), |
||||
), |
||||
)), |
||||
]; |
||||
|
||||
for ex in results { |
||||
let mut buffer = Vec::new(); |
||||
ex.write(&mut buffer, format)?; |
||||
let ex2 = QueryResults::read(Cursor::new(buffer.clone()), format)?; |
||||
let mut buffer2 = Vec::new(); |
||||
ex2.write(&mut buffer2, format)?; |
||||
assert_eq!( |
||||
str::from_utf8(&buffer).unwrap(), |
||||
str::from_utf8(&buffer2).unwrap() |
||||
); |
||||
} |
||||
} |
||||
|
||||
Ok(()) |
||||
} |
||||
} |
@ -1,44 +0,0 @@ |
||||
//! Utilities to read and write RDF results formats using [sparesults](https://crates.io/crates/sparesults).
|
||||
//!
|
||||
//! It supports [SPARQL Query Results XML Format (Second Edition)](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/).
|
||||
//!
|
||||
//! Usage example converting a JSON result file into a TSV result file:
|
||||
//!
|
||||
//! ```
|
||||
//! use oxigraph::sparql::results::{QueryResultsFormat, QueryResultsParser, FromReadQueryResultsReader, QueryResultsSerializer};
|
||||
//! use std::io::Result;
|
||||
//!
|
||||
//! fn convert_json_to_tsv(json_file: &[u8]) -> Result<Vec<u8>> {
|
||||
//! let json_parser = QueryResultsParser::from_format(QueryResultsFormat::Json);
|
||||
//! let tsv_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Tsv);
|
||||
//! // We start to read the JSON file and see which kind of results it is
|
||||
//! match json_parser.parse_read(json_file)? {
|
||||
//! FromReadQueryResultsReader::Boolean(value) => {
|
||||
//! // it's a boolean result, we copy it in TSV to the output buffer
|
||||
//! tsv_serializer.serialize_boolean_to_write(Vec::new(), value)
|
||||
//! }
|
||||
//! FromReadQueryResultsReader::Solutions(solutions_reader) => {
|
||||
//! // it's a set of solutions, we create a writer and we write to it while reading in streaming from the JSON file
|
||||
//! let mut serialize_solutions_to_write = tsv_serializer.serialize_solutions_to_write(Vec::new(), solutions_reader.variables().to_vec())?;
|
||||
//! for solution in solutions_reader {
|
||||
//! serialize_solutions_to_write.write(&solution?)?;
|
||||
//! }
|
||||
//! serialize_solutions_to_write.finish()
|
||||
//! }
|
||||
//! }
|
||||
//! }
|
||||
//!
|
||||
//! // Let's test with a boolean
|
||||
//! assert_eq!(
|
||||
//! convert_json_to_tsv(br#"{"boolean":true}"#.as_slice()).unwrap(),
|
||||
//! b"true"
|
||||
//! );
|
||||
//!
|
||||
//! // And with a set of solutions
|
||||
//! assert_eq!(
|
||||
//! convert_json_to_tsv(br#"{"head":{"vars":["foo","bar"]},"results":{"bindings":[{"foo":{"type":"literal","value":"test"}}]}}"#.as_slice()).unwrap(),
|
||||
//! b"?foo\t?bar\n\"test\"\t\n"
|
||||
//! );
|
||||
//! ```
|
||||
|
||||
pub use sparesults::*; |
@ -1,12 +0,0 @@ |
||||
//! A storage backend
|
||||
//! RocksDB is available, if not in memory
|
||||
|
||||
#[cfg(any(target_family = "wasm"))] |
||||
pub use fallback::{ColumnFamily, ColumnFamilyDefinition, Db, Iter, Reader, Transaction}; |
||||
#[cfg(all(not(target_family = "wasm")))] |
||||
pub use oxi_rocksdb::{ColumnFamily, ColumnFamilyDefinition, Db, Iter, Reader, Transaction}; |
||||
|
||||
#[cfg(any(target_family = "wasm"))] |
||||
mod fallback; |
||||
#[cfg(all(not(target_family = "wasm")))] |
||||
mod oxi_rocksdb; |
@ -1,139 +0,0 @@ |
||||
use crate::io::{RdfFormat, RdfParseError}; |
||||
use crate::storage::numeric_encoder::EncodedTerm; |
||||
use oxiri::IriParseError; |
||||
use oxrdf::TermRef; |
||||
use std::error::Error; |
||||
use std::io; |
||||
|
||||
/// An error related to storage operations (reads, writes...).
|
||||
#[derive(Debug, thiserror::Error)] |
||||
#[non_exhaustive] |
||||
pub enum StorageError { |
||||
/// Error from the OS I/O layer.
|
||||
#[error(transparent)] |
||||
Io(#[from] io::Error), |
||||
/// Error related to data corruption.
|
||||
#[error(transparent)] |
||||
Corruption(#[from] CorruptionError), |
||||
#[doc(hidden)] |
||||
#[error("{0}")] |
||||
Other(#[source] Box<dyn Error + Send + Sync + 'static>), |
||||
} |
||||
|
||||
impl From<StorageError> for io::Error { |
||||
#[inline] |
||||
fn from(error: StorageError) -> Self { |
||||
match error { |
||||
StorageError::Io(error) => error, |
||||
StorageError::Corruption(error) => error.into(), |
||||
StorageError::Other(error) => Self::new(io::ErrorKind::Other, error), |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// An error return if some content in the database is corrupted.
|
||||
#[derive(Debug, thiserror::Error)] |
||||
#[error(transparent)] |
||||
pub struct CorruptionError(#[from] CorruptionErrorKind); |
||||
|
||||
/// An error return if some content in the database is corrupted.
|
||||
#[derive(Debug, thiserror::Error)] |
||||
enum CorruptionErrorKind { |
||||
#[error("{0}")] |
||||
Msg(String), |
||||
#[error("{0}")] |
||||
Other(#[source] Box<dyn Error + Send + Sync + 'static>), |
||||
} |
||||
|
||||
impl CorruptionError { |
||||
/// Builds an error from a printable error message.
|
||||
#[inline] |
||||
pub(crate) fn new(error: impl Into<Box<dyn Error + Send + Sync + 'static>>) -> Self { |
||||
Self(CorruptionErrorKind::Other(error.into())) |
||||
} |
||||
|
||||
#[inline] |
||||
pub(crate) fn from_encoded_term(encoded: &EncodedTerm, term: &TermRef<'_>) -> Self { |
||||
// TODO: eventually use a dedicated error enum value
|
||||
Self::msg(format!("Invalid term encoding {encoded:?} for {term}")) |
||||
} |
||||
|
||||
#[inline] |
||||
pub(crate) fn from_missing_column_family_name(name: &'static str) -> Self { |
||||
// TODO: eventually use a dedicated error enum value
|
||||
Self::msg(format!("Column family {name} does not exist")) |
||||
} |
||||
|
||||
/// Builds an error from a printable error message.
|
||||
#[inline] |
||||
pub(crate) fn msg(msg: impl Into<String>) -> Self { |
||||
Self(CorruptionErrorKind::Msg(msg.into())) |
||||
} |
||||
} |
||||
|
||||
impl From<CorruptionError> for io::Error { |
||||
#[inline] |
||||
fn from(error: CorruptionError) -> Self { |
||||
Self::new(io::ErrorKind::InvalidData, error) |
||||
} |
||||
} |
||||
|
||||
/// An error raised while loading a file into a [`Store`](crate::store::Store).
|
||||
#[derive(Debug, thiserror::Error)] |
||||
pub enum LoaderError { |
||||
/// An error raised while reading the file.
|
||||
#[error(transparent)] |
||||
Parsing(#[from] RdfParseError), |
||||
/// An error raised during the insertion in the store.
|
||||
#[error(transparent)] |
||||
Storage(#[from] StorageError), |
||||
/// The base IRI is invalid.
|
||||
#[error("Invalid base IRI '{iri}': {error}")] |
||||
InvalidBaseIri { |
||||
/// The IRI itself.
|
||||
iri: String, |
||||
/// The parsing error.
|
||||
#[source] |
||||
error: IriParseError, |
||||
}, |
||||
} |
||||
|
||||
impl From<LoaderError> for io::Error { |
||||
#[inline] |
||||
fn from(error: LoaderError) -> Self { |
||||
match error { |
||||
LoaderError::Storage(error) => error.into(), |
||||
LoaderError::Parsing(error) => error.into(), |
||||
LoaderError::InvalidBaseIri { .. } => { |
||||
Self::new(io::ErrorKind::InvalidInput, error.to_string()) |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// An error raised while writing a file from a [`Store`](crate::store::Store).
|
||||
#[derive(Debug, thiserror::Error)] |
||||
pub enum SerializerError { |
||||
/// An error raised while writing the content.
|
||||
#[error(transparent)] |
||||
Io(#[from] io::Error), |
||||
/// An error raised during the lookup in the store.
|
||||
#[error(transparent)] |
||||
Storage(#[from] StorageError), |
||||
/// A format compatible with [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) is required.
|
||||
#[error("A RDF format supporting datasets was expected, {0} found")] |
||||
DatasetFormatExpected(RdfFormat), |
||||
} |
||||
|
||||
impl From<SerializerError> for io::Error { |
||||
#[inline] |
||||
fn from(error: SerializerError) -> Self { |
||||
match error { |
||||
SerializerError::Storage(error) => error.into(), |
||||
SerializerError::Io(error) => error, |
||||
SerializerError::DatasetFormatExpected(_) => { |
||||
Self::new(io::ErrorKind::InvalidInput, error.to_string()) |
||||
} |
||||
} |
||||
} |
||||
} |
@ -1,36 +0,0 @@ |
||||
[package] |
||||
name = "oxrdfio" |
||||
version = "0.1.0-alpha.5" |
||||
authors.workspace = true |
||||
license.workspace = true |
||||
readme = "README.md" |
||||
keywords = ["RDF"] |
||||
repository = "https://github.com/oxigraph/oxigraph/tree/master/lib/oxrdfxml" |
||||
documentation = "https://docs.rs/oxrdfio" |
||||
description = """ |
||||
Parser and serializer for various RDF formats |
||||
""" |
||||
edition.workspace = true |
||||
rust-version.workspace = true |
||||
|
||||
[features] |
||||
default = [] |
||||
async-tokio = ["dep:tokio", "oxrdfxml/async-tokio", "oxttl/async-tokio"] |
||||
rdf-star = ["oxrdf/rdf-star", "oxttl/rdf-star"] |
||||
|
||||
[dependencies] |
||||
oxrdf.workspace = true |
||||
oxrdfxml.workspace = true |
||||
oxttl.workspace = true |
||||
thiserror.workspace = true |
||||
tokio = { workspace = true, optional = true, features = ["io-util"] } |
||||
|
||||
[dev-dependencies] |
||||
tokio = { workspace = true, features = ["rt", "macros"] } |
||||
|
||||
[lints] |
||||
workspace = true |
||||
|
||||
[package.metadata.docs.rs] |
||||
all-features = true |
||||
rustdoc-args = ["--cfg", "docsrs"] |
@ -1,67 +0,0 @@ |
||||
OxRDF I/O |
||||
========= |
||||
|
||||
[![Latest Version](https://img.shields.io/crates/v/oxrdfio.svg)](https://crates.io/crates/oxrdfio) |
||||
[![Released API docs](https://docs.rs/oxrdfio/badge.svg)](https://docs.rs/oxrdfio) |
||||
[![Crates.io downloads](https://img.shields.io/crates/d/oxrdfio)](https://crates.io/crates/oxrdfio) |
||||
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) |
||||
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community) |
||||
|
||||
OxRDF I/O is a set of parsers and serializers for RDF. |
||||
|
||||
It supports: |
||||
* [N3](https://w3c.github.io/N3/spec/) using [`oxttl`](https://crates.io/crates/oxttl) |
||||
* [N-Quads](https://www.w3.org/TR/n-quads/) using [`oxttl`](https://crates.io/crates/oxttl) |
||||
* [N-Triples](https://www.w3.org/TR/n-triples/) using [`oxttl`](https://crates.io/crates/oxttl) |
||||
* [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) using [`oxrdfxml`](https://crates.io/crates/oxrdfxml) |
||||
* [TriG](https://www.w3.org/TR/trig/) using [`oxttl`](https://crates.io/crates/oxttl) |
||||
* [Turtle](https://www.w3.org/TR/turtle/) using [`oxttl`](https://crates.io/crates/oxttl) |
||||
|
||||
Support for [SPARQL-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html) is also available behind the `rdf-star`feature for [Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star), [TriG-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#trig-star), [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star) and [N-Quads-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-quads-star). |
||||
|
||||
It is designed as a low level parser compatible with both synchronous and asynchronous I/O (behind the `async-tokio` feature). |
||||
|
||||
The entry points of this library are the two [`RdfParser`] and [`RdfSerializer`] structs. |
||||
|
||||
Usage example converting a Turtle file to a N-Triples file: |
||||
```rust |
||||
use oxrdfio::{RdfFormat, RdfParser, RdfSerializer}; |
||||
|
||||
let turtle_file = b"@base <http://example.com/> . |
||||
@prefix schema: <http://schema.org/> . |
||||
<foo> a schema:Person ; |
||||
schema:name \"Foo\" . |
||||
<bar> a schema:Person ; |
||||
schema:name \"Bar\" ."; |
||||
|
||||
let ntriples_file = b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> . |
||||
<http://example.com/foo> <http://schema.org/name> \"Foo\" . |
||||
<http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> . |
||||
<http://example.com/bar> <http://schema.org/name> \"Bar\" . |
||||
"; |
||||
|
||||
let mut writer = RdfSerializer::from_format(RdfFormat::NTriples).serialize_to_write(Vec::new()); |
||||
for quad in RdfParser::from_format(RdfFormat::Turtle).parse_read(turtle_file.as_ref()) { |
||||
writer.write_quad(&quad.unwrap()).unwrap(); |
||||
} |
||||
assert_eq!(writer.finish().unwrap(), ntriples_file); |
||||
``` |
||||
|
||||
Parsers for other RDF formats exists in Rust like [graph-rdfa-processor](https://github.com/nbittich/graph-rdfa-processor) for RDFa and [json-ld](https://github.com/timothee-haudebourg/json-ld) for JSON-LD. |
||||
|
||||
|
||||
## License |
||||
|
||||
This project is licensed under either of |
||||
|
||||
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or |
||||
`<http://www.apache.org/licenses/LICENSE-2.0>`) |
||||
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or |
||||
`<http://opensource.org/licenses/MIT>`) |
||||
|
||||
at your option. |
||||
|
||||
|
||||
### Contribution |
||||
|
||||
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. |
@ -1,122 +0,0 @@ |
||||
use std::io; |
||||
use std::ops::Range; |
||||
|
||||
/// Error returned during RDF format parsing.
|
||||
#[derive(Debug, thiserror::Error)] |
||||
pub enum RdfParseError { |
||||
/// I/O error during parsing (file not found...).
|
||||
#[error(transparent)] |
||||
Io(#[from] io::Error), |
||||
/// An error in the file syntax.
|
||||
#[error(transparent)] |
||||
Syntax(#[from] RdfSyntaxError), |
||||
} |
||||
|
||||
impl RdfParseError { |
||||
pub(crate) fn msg(msg: &'static str) -> Self { |
||||
Self::Syntax(RdfSyntaxError(SyntaxErrorKind::Msg(msg))) |
||||
} |
||||
} |
||||
|
||||
impl From<oxttl::TurtleSyntaxError> for RdfSyntaxError { |
||||
#[inline] |
||||
fn from(error: oxttl::TurtleSyntaxError) -> Self { |
||||
Self(SyntaxErrorKind::Turtle(error)) |
||||
} |
||||
} |
||||
|
||||
impl From<oxttl::TurtleParseError> for RdfParseError { |
||||
#[inline] |
||||
fn from(error: oxttl::TurtleParseError) -> Self { |
||||
match error { |
||||
oxttl::TurtleParseError::Syntax(e) => Self::Syntax(e.into()), |
||||
oxttl::TurtleParseError::Io(e) => Self::Io(e), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<oxrdfxml::RdfXmlSyntaxError> for RdfSyntaxError { |
||||
#[inline] |
||||
fn from(error: oxrdfxml::RdfXmlSyntaxError) -> Self { |
||||
Self(SyntaxErrorKind::RdfXml(error)) |
||||
} |
||||
} |
||||
|
||||
impl From<oxrdfxml::RdfXmlParseError> for RdfParseError { |
||||
#[inline] |
||||
fn from(error: oxrdfxml::RdfXmlParseError) -> Self { |
||||
match error { |
||||
oxrdfxml::RdfXmlParseError::Syntax(e) => Self::Syntax(e.into()), |
||||
oxrdfxml::RdfXmlParseError::Io(e) => Self::Io(e), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<RdfParseError> for io::Error { |
||||
#[inline] |
||||
fn from(error: RdfParseError) -> Self { |
||||
match error { |
||||
RdfParseError::Io(error) => error, |
||||
RdfParseError::Syntax(error) => error.into(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// An error in the syntax of the parsed file.
|
||||
#[derive(Debug, thiserror::Error)] |
||||
#[error(transparent)] |
||||
pub struct RdfSyntaxError(#[from] SyntaxErrorKind); |
||||
|
||||
/// An error in the syntax of the parsed file.
|
||||
#[derive(Debug, thiserror::Error)] |
||||
enum SyntaxErrorKind { |
||||
#[error(transparent)] |
||||
Turtle(#[from] oxttl::TurtleSyntaxError), |
||||
#[error(transparent)] |
||||
RdfXml(#[from] oxrdfxml::RdfXmlSyntaxError), |
||||
#[error("{0}")] |
||||
Msg(&'static str), |
||||
} |
||||
|
||||
impl RdfSyntaxError { |
||||
/// The location of the error inside of the file.
|
||||
#[inline] |
||||
pub fn location(&self) -> Option<Range<TextPosition>> { |
||||
match &self.0 { |
||||
SyntaxErrorKind::Turtle(e) => { |
||||
let location = e.location(); |
||||
Some( |
||||
TextPosition { |
||||
line: location.start.line, |
||||
column: location.start.column, |
||||
offset: location.start.offset, |
||||
}..TextPosition { |
||||
line: location.end.line, |
||||
column: location.end.column, |
||||
offset: location.end.offset, |
||||
}, |
||||
) |
||||
} |
||||
SyntaxErrorKind::RdfXml(_) | SyntaxErrorKind::Msg(_) => None, |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<RdfSyntaxError> for io::Error { |
||||
#[inline] |
||||
fn from(error: RdfSyntaxError) -> Self { |
||||
match error.0 { |
||||
SyntaxErrorKind::Turtle(error) => error.into(), |
||||
SyntaxErrorKind::RdfXml(error) => error.into(), |
||||
SyntaxErrorKind::Msg(msg) => Self::new(io::ErrorKind::InvalidData, msg), |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// A position in a text i.e. a `line` number starting from 0, a `column` number starting from 0 (in number of code points) and a global file `offset` starting from 0 (in number of bytes).
|
||||
#[derive(Eq, PartialEq, Debug, Clone, Copy)] |
||||
pub struct TextPosition { |
||||
pub line: u64, |
||||
pub column: u64, |
||||
pub offset: u64, |
||||
} |
@ -1,216 +0,0 @@ |
||||
use std::fmt; |
||||
|
||||
/// RDF serialization formats.
|
||||
///
|
||||
/// This enumeration is non exhaustive. New formats like JSON-LD might be added in the future.
|
||||
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] |
||||
#[non_exhaustive] |
||||
pub enum RdfFormat { |
||||
/// [N3](https://w3c.github.io/N3/spec/)
|
||||
N3, |
||||
/// [N-Quads](https://www.w3.org/TR/n-quads/)
|
||||
NQuads, |
||||
/// [N-Triples](https://www.w3.org/TR/n-triples/)
|
||||
NTriples, |
||||
/// [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/)
|
||||
RdfXml, |
||||
/// [TriG](https://www.w3.org/TR/trig/)
|
||||
TriG, |
||||
/// [Turtle](https://www.w3.org/TR/turtle/)
|
||||
Turtle, |
||||
} |
||||
|
||||
impl RdfFormat { |
||||
/// The format canonical IRI according to the [Unique URIs for file formats registry](https://www.w3.org/ns/formats/).
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::RdfFormat;
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// RdfFormat::NTriples.iri(),
|
||||
/// "http://www.w3.org/ns/formats/N-Triples"
|
||||
/// )
|
||||
/// ```
|
||||
#[inline] |
||||
pub const fn iri(self) -> &'static str { |
||||
match self { |
||||
Self::N3 => "http://www.w3.org/ns/formats/N3", |
||||
Self::NQuads => "http://www.w3.org/ns/formats/N-Quads", |
||||
Self::NTriples => "http://www.w3.org/ns/formats/N-Triples", |
||||
Self::RdfXml => "http://www.w3.org/ns/formats/RDF_XML", |
||||
Self::TriG => "http://www.w3.org/ns/formats/TriG", |
||||
Self::Turtle => "http://www.w3.org/ns/formats/Turtle", |
||||
} |
||||
} |
||||
|
||||
/// The format [IANA media type](https://tools.ietf.org/html/rfc2046).
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::RdfFormat;
|
||||
///
|
||||
/// assert_eq!(RdfFormat::NTriples.media_type(), "application/n-triples")
|
||||
/// ```
|
||||
#[inline] |
||||
pub const fn media_type(self) -> &'static str { |
||||
match self { |
||||
Self::N3 => "text/n3", |
||||
Self::NQuads => "application/n-quads", |
||||
Self::NTriples => "application/n-triples", |
||||
Self::RdfXml => "application/rdf+xml", |
||||
Self::TriG => "application/trig", |
||||
Self::Turtle => "text/turtle", |
||||
} |
||||
} |
||||
|
||||
/// The format [IANA-registered](https://tools.ietf.org/html/rfc2046) file extension.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::RdfFormat;
|
||||
///
|
||||
/// assert_eq!(RdfFormat::NTriples.file_extension(), "nt")
|
||||
/// ```
|
||||
#[inline] |
||||
pub const fn file_extension(self) -> &'static str { |
||||
match self { |
||||
Self::N3 => "n3", |
||||
Self::NQuads => "nq", |
||||
Self::NTriples => "nt", |
||||
Self::RdfXml => "rdf", |
||||
Self::TriG => "trig", |
||||
Self::Turtle => "ttl", |
||||
} |
||||
} |
||||
|
||||
/// The format name.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::RdfFormat;
|
||||
///
|
||||
/// assert_eq!(RdfFormat::NTriples.name(), "N-Triples")
|
||||
/// ```
|
||||
#[inline] |
||||
pub const fn name(self) -> &'static str { |
||||
match self { |
||||
Self::N3 => "N3", |
||||
Self::NQuads => "N-Quads", |
||||
Self::NTriples => "N-Triples", |
||||
Self::RdfXml => "RDF/XML", |
||||
Self::TriG => "TriG", |
||||
Self::Turtle => "Turtle", |
||||
} |
||||
} |
||||
|
||||
/// Checks if the formats supports [RDF datasets](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) and not only [RDF graphs](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph).
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::RdfFormat;
|
||||
///
|
||||
/// assert_eq!(RdfFormat::NTriples.supports_datasets(), false);
|
||||
/// assert_eq!(RdfFormat::NQuads.supports_datasets(), true);
|
||||
/// ```
|
||||
#[inline] |
||||
pub const fn supports_datasets(self) -> bool { |
||||
matches!(self, Self::NQuads | Self::TriG) |
||||
} |
||||
|
||||
/// Checks if the formats supports [RDF-star quoted triples](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#dfn-quoted).
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::RdfFormat;
|
||||
///
|
||||
/// assert_eq!(RdfFormat::NTriples.supports_rdf_star(), true);
|
||||
/// assert_eq!(RdfFormat::RdfXml.supports_rdf_star(), false);
|
||||
/// ```
|
||||
#[inline] |
||||
#[cfg(feature = "rdf-star")] |
||||
pub const fn supports_rdf_star(self) -> bool { |
||||
matches!( |
||||
self, |
||||
Self::NTriples | Self::NQuads | Self::Turtle | Self::TriG |
||||
) |
||||
} |
||||
|
||||
/// Looks for a known format from a media type.
|
||||
///
|
||||
/// It supports some media type aliases.
|
||||
/// For example, "application/xml" is going to return `RdfFormat::RdfXml` even if it is not its canonical media type.
|
||||
///
|
||||
/// Example:
|
||||
/// ```
|
||||
/// use oxrdfio::RdfFormat;
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// RdfFormat::from_media_type("text/turtle; charset=utf-8"),
|
||||
/// Some(RdfFormat::Turtle)
|
||||
/// )
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn from_media_type(media_type: &str) -> Option<Self> { |
||||
const MEDIA_SUBTYPES: [(&str, RdfFormat); 10] = [ |
||||
("n-quads", RdfFormat::NQuads), |
||||
("n-triples", RdfFormat::NTriples), |
||||
("n3", RdfFormat::N3), |
||||
("nquads", RdfFormat::NQuads), |
||||
("ntriples", RdfFormat::NTriples), |
||||
("plain", RdfFormat::NTriples), |
||||
("rdf+xml", RdfFormat::RdfXml), |
||||
("trig", RdfFormat::TriG), |
||||
("turtle", RdfFormat::Turtle), |
||||
("xml", RdfFormat::RdfXml), |
||||
]; |
||||
|
||||
let (r#type, subtype) = media_type |
||||
.split_once(';') |
||||
.unwrap_or((media_type, "")) |
||||
.0 |
||||
.split_once('/')?; |
||||
let r#type = r#type.trim(); |
||||
if !r#type.eq_ignore_ascii_case("application") && !r#type.eq_ignore_ascii_case("text") { |
||||
return None; |
||||
} |
||||
let subtype = subtype.trim(); |
||||
let subtype = subtype.strip_prefix("x-").unwrap_or(subtype); |
||||
for (candidate_subtype, candidate_id) in MEDIA_SUBTYPES { |
||||
if candidate_subtype.eq_ignore_ascii_case(subtype) { |
||||
return Some(candidate_id); |
||||
} |
||||
} |
||||
None |
||||
} |
||||
|
||||
/// Looks for a known format from an extension.
|
||||
///
|
||||
/// It supports some aliases.
|
||||
///
|
||||
/// Example:
|
||||
/// ```
|
||||
/// use oxrdfio::RdfFormat;
|
||||
///
|
||||
/// assert_eq!(RdfFormat::from_extension("nt"), Some(RdfFormat::NTriples))
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn from_extension(extension: &str) -> Option<Self> { |
||||
const MEDIA_TYPES: [(&str, RdfFormat); 8] = [ |
||||
("n3", RdfFormat::N3), |
||||
("nq", RdfFormat::NQuads), |
||||
("nt", RdfFormat::NTriples), |
||||
("rdf", RdfFormat::RdfXml), |
||||
("trig", RdfFormat::TriG), |
||||
("ttl", RdfFormat::Turtle), |
||||
("txt", RdfFormat::NTriples), |
||||
("xml", RdfFormat::RdfXml), |
||||
]; |
||||
for (candidate_extension, candidate_id) in MEDIA_TYPES { |
||||
if candidate_extension.eq_ignore_ascii_case(extension) { |
||||
return Some(candidate_id); |
||||
} |
||||
} |
||||
None |
||||
} |
||||
} |
||||
|
||||
impl fmt::Display for RdfFormat { |
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
||||
f.write_str(self.name()) |
||||
} |
||||
} |
@ -1,19 +0,0 @@ |
||||
#![doc = include_str!("../README.md")] |
||||
#![doc(test(attr(deny(warnings))))] |
||||
#![cfg_attr(docsrs, feature(doc_auto_cfg))] |
||||
#![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] |
||||
#![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] |
||||
|
||||
mod error; |
||||
mod format; |
||||
mod parser; |
||||
mod serializer; |
||||
|
||||
pub use error::{RdfParseError, RdfSyntaxError, TextPosition}; |
||||
pub use format::RdfFormat; |
||||
#[cfg(feature = "async-tokio")] |
||||
pub use parser::FromTokioAsyncReadQuadReader; |
||||
pub use parser::{FromReadQuadReader, RdfParser}; |
||||
#[cfg(feature = "async-tokio")] |
||||
pub use serializer::ToTokioAsyncWriteQuadWriter; |
||||
pub use serializer::{RdfSerializer, ToWriteQuadWriter}; |
@ -1,807 +0,0 @@ |
||||
//! Utilities to read RDF graphs and datasets.
|
||||
|
||||
pub use crate::error::RdfParseError; |
||||
use crate::format::RdfFormat; |
||||
use oxrdf::{BlankNode, GraphName, IriParseError, Quad, Subject, Term, Triple}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use oxrdfxml::FromTokioAsyncReadRdfXmlReader; |
||||
use oxrdfxml::{FromReadRdfXmlReader, RdfXmlParser}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use oxttl::n3::FromTokioAsyncReadN3Reader; |
||||
use oxttl::n3::{FromReadN3Reader, N3Parser, N3PrefixesIter, N3Quad, N3Term}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use oxttl::nquads::FromTokioAsyncReadNQuadsReader; |
||||
use oxttl::nquads::{FromReadNQuadsReader, NQuadsParser}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use oxttl::ntriples::FromTokioAsyncReadNTriplesReader; |
||||
use oxttl::ntriples::{FromReadNTriplesReader, NTriplesParser}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use oxttl::trig::FromTokioAsyncReadTriGReader; |
||||
use oxttl::trig::{FromReadTriGReader, TriGParser, TriGPrefixesIter}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use oxttl::turtle::FromTokioAsyncReadTurtleReader; |
||||
use oxttl::turtle::{FromReadTurtleReader, TurtleParser, TurtlePrefixesIter}; |
||||
use std::collections::HashMap; |
||||
use std::io::Read; |
||||
#[cfg(feature = "async-tokio")] |
||||
use tokio::io::AsyncRead; |
||||
|
||||
/// Parsers for RDF serialization formats.
|
||||
///
|
||||
/// It currently supports the following formats:
|
||||
/// * [N3](https://w3c.github.io/N3/spec/) ([`RdfFormat::N3`])
|
||||
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`RdfFormat::NQuads`])
|
||||
/// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`RdfFormat::NTriples`])
|
||||
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`RdfFormat::RdfXml`])
|
||||
/// * [TriG](https://www.w3.org/TR/trig/) ([`RdfFormat::TriG`])
|
||||
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`RdfFormat::Turtle`])
|
||||
///
|
||||
/// Note the useful options:
|
||||
/// - [`with_base_iri`](Self::with_base_iri) to resolve the relative IRIs.
|
||||
/// - [`rename_blank_nodes`](Self::rename_blank_nodes) to rename the blank nodes to auto-generated numbers to avoid conflicts when merging RDF graphs together.
|
||||
/// - [`without_named_graphs`](Self::without_named_graphs) to parse a single graph.
|
||||
/// - [`unchecked`](Self::unchecked) to skip some validations if the file is already known to be valid.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
||||
///
|
||||
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
|
||||
/// let quads = parser
|
||||
/// .parse_read(file.as_bytes())
|
||||
/// .collect::<Result<Vec<_>, _>>()?;
|
||||
///
|
||||
/// assert_eq!(quads.len(), 1);
|
||||
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
|
||||
/// # std::io::Result::Ok(())
|
||||
/// ```
|
||||
#[must_use] |
||||
pub struct RdfParser { |
||||
inner: RdfParserKind, |
||||
default_graph: GraphName, |
||||
without_named_graphs: bool, |
||||
rename_blank_nodes: bool, |
||||
} |
||||
|
||||
enum RdfParserKind { |
||||
N3(N3Parser), |
||||
NQuads(NQuadsParser), |
||||
NTriples(NTriplesParser), |
||||
RdfXml(RdfXmlParser), |
||||
TriG(TriGParser), |
||||
Turtle(TurtleParser), |
||||
} |
||||
|
||||
impl RdfParser { |
||||
/// Builds a parser for the given format.
|
||||
#[inline] |
||||
pub fn from_format(format: RdfFormat) -> Self { |
||||
Self { |
||||
inner: match format { |
||||
RdfFormat::N3 => RdfParserKind::N3(N3Parser::new()), |
||||
RdfFormat::NQuads => RdfParserKind::NQuads({ |
||||
#[cfg(feature = "rdf-star")] |
||||
{ |
||||
NQuadsParser::new().with_quoted_triples() |
||||
} |
||||
#[cfg(not(feature = "rdf-star"))] |
||||
{ |
||||
NQuadsParser::new() |
||||
} |
||||
}), |
||||
RdfFormat::NTriples => RdfParserKind::NTriples({ |
||||
#[cfg(feature = "rdf-star")] |
||||
{ |
||||
NTriplesParser::new().with_quoted_triples() |
||||
} |
||||
#[cfg(not(feature = "rdf-star"))] |
||||
{ |
||||
NTriplesParser::new() |
||||
} |
||||
}), |
||||
RdfFormat::RdfXml => RdfParserKind::RdfXml(RdfXmlParser::new()), |
||||
RdfFormat::TriG => RdfParserKind::TriG({ |
||||
#[cfg(feature = "rdf-star")] |
||||
{ |
||||
TriGParser::new().with_quoted_triples() |
||||
} |
||||
#[cfg(not(feature = "rdf-star"))] |
||||
{ |
||||
TriGParser::new() |
||||
} |
||||
}), |
||||
RdfFormat::Turtle => RdfParserKind::Turtle({ |
||||
#[cfg(feature = "rdf-star")] |
||||
{ |
||||
TurtleParser::new().with_quoted_triples() |
||||
} |
||||
#[cfg(not(feature = "rdf-star"))] |
||||
{ |
||||
TurtleParser::new() |
||||
} |
||||
}), |
||||
}, |
||||
default_graph: GraphName::DefaultGraph, |
||||
without_named_graphs: false, |
||||
rename_blank_nodes: false, |
||||
} |
||||
} |
||||
|
||||
/// The format the parser uses.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// RdfParser::from_format(RdfFormat::Turtle).format(),
|
||||
/// RdfFormat::Turtle
|
||||
/// );
|
||||
/// ```
|
||||
pub fn format(&self) -> RdfFormat { |
||||
match &self.inner { |
||||
RdfParserKind::N3(_) => RdfFormat::N3, |
||||
RdfParserKind::NQuads(_) => RdfFormat::NQuads, |
||||
RdfParserKind::NTriples(_) => RdfFormat::NTriples, |
||||
RdfParserKind::RdfXml(_) => RdfFormat::RdfXml, |
||||
RdfParserKind::TriG(_) => RdfFormat::TriG, |
||||
RdfParserKind::Turtle(_) => RdfFormat::Turtle, |
||||
} |
||||
} |
||||
|
||||
/// Provides an IRI that could be used to resolve the file relative IRIs.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// let file = "</s> </p> </o> .";
|
||||
///
|
||||
/// let parser = RdfParser::from_format(RdfFormat::Turtle).with_base_iri("http://example.com")?;
|
||||
/// let quads = parser
|
||||
/// .parse_read(file.as_bytes())
|
||||
/// .collect::<Result<Vec<_>, _>>()?;
|
||||
///
|
||||
/// assert_eq!(quads.len(), 1);
|
||||
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> { |
||||
self.inner = match self.inner { |
||||
RdfParserKind::N3(p) => RdfParserKind::N3(p), |
||||
RdfParserKind::NTriples(p) => RdfParserKind::NTriples(p), |
||||
RdfParserKind::NQuads(p) => RdfParserKind::NQuads(p), |
||||
RdfParserKind::RdfXml(p) => RdfParserKind::RdfXml(p.with_base_iri(base_iri)?), |
||||
RdfParserKind::TriG(p) => RdfParserKind::TriG(p.with_base_iri(base_iri)?), |
||||
RdfParserKind::Turtle(p) => RdfParserKind::Turtle(p.with_base_iri(base_iri)?), |
||||
}; |
||||
Ok(self) |
||||
} |
||||
|
||||
/// Provides the name graph name that should replace the default graph in the returned quads.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::NamedNode;
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
||||
///
|
||||
/// let parser = RdfParser::from_format(RdfFormat::Turtle)
|
||||
/// .with_default_graph(NamedNode::new("http://example.com/g")?);
|
||||
/// let quads = parser
|
||||
/// .parse_read(file.as_bytes())
|
||||
/// .collect::<Result<Vec<_>, _>>()?;
|
||||
///
|
||||
/// assert_eq!(quads.len(), 1);
|
||||
/// assert_eq!(quads[0].graph_name.to_string(), "<http://example.com/g>");
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn with_default_graph(mut self, default_graph: impl Into<GraphName>) -> Self { |
||||
self.default_graph = default_graph.into(); |
||||
self |
||||
} |
||||
|
||||
/// Sets that the parser must fail if parsing a named graph.
|
||||
///
|
||||
/// This function restricts the parser to only parse a single [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) and not an [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset).
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .";
|
||||
///
|
||||
/// let parser = RdfParser::from_format(RdfFormat::NQuads).without_named_graphs();
|
||||
/// assert!(parser.parse_read(file.as_bytes()).next().unwrap().is_err());
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn without_named_graphs(mut self) -> Self { |
||||
self.without_named_graphs = true; |
||||
self |
||||
} |
||||
|
||||
/// Renames the blank nodes ids from the ones set in the serialization to random ids.
|
||||
///
|
||||
/// This allows to avoid id conflicts when merging graphs together.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// let file = "_:a <http://example.com/p> <http://example.com/o> .";
|
||||
///
|
||||
/// let result1 = RdfParser::from_format(RdfFormat::NQuads)
|
||||
/// .rename_blank_nodes()
|
||||
/// .parse_read(file.as_bytes())
|
||||
/// .collect::<Result<Vec<_>, _>>()?;
|
||||
/// let result2 = RdfParser::from_format(RdfFormat::NQuads)
|
||||
/// .rename_blank_nodes()
|
||||
/// .parse_read(file.as_bytes())
|
||||
/// .collect::<Result<Vec<_>, _>>()?;
|
||||
/// assert_ne!(result1, result2);
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn rename_blank_nodes(mut self) -> Self { |
||||
self.rename_blank_nodes = true; |
||||
self |
||||
} |
||||
|
||||
/// Assumes the file is valid to make parsing faster.
|
||||
///
|
||||
/// It will skip some validations.
|
||||
///
|
||||
/// Note that if the file is actually not valid, then broken RDF might be emitted by the parser.
|
||||
#[inline] |
||||
pub fn unchecked(mut self) -> Self { |
||||
self.inner = match self.inner { |
||||
RdfParserKind::N3(p) => RdfParserKind::N3(p.unchecked()), |
||||
RdfParserKind::NTriples(p) => RdfParserKind::NTriples(p.unchecked()), |
||||
RdfParserKind::NQuads(p) => RdfParserKind::NQuads(p.unchecked()), |
||||
RdfParserKind::RdfXml(p) => RdfParserKind::RdfXml(p.unchecked()), |
||||
RdfParserKind::TriG(p) => RdfParserKind::TriG(p.unchecked()), |
||||
RdfParserKind::Turtle(p) => RdfParserKind::Turtle(p.unchecked()), |
||||
}; |
||||
self |
||||
} |
||||
|
||||
/// Parses from a [`Read`] implementation and returns an iterator of quads.
|
||||
///
|
||||
/// Reads are buffered.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
||||
///
|
||||
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
|
||||
/// let quads = parser
|
||||
/// .parse_read(file.as_bytes())
|
||||
/// .collect::<Result<Vec<_>, _>>()?;
|
||||
///
|
||||
/// assert_eq!(quads.len(), 1);
|
||||
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
|
||||
/// # std::io::Result::Ok(())
|
||||
/// ```
|
||||
pub fn parse_read<R: Read>(self, reader: R) -> FromReadQuadReader<R> { |
||||
FromReadQuadReader { |
||||
parser: match self.inner { |
||||
RdfParserKind::N3(p) => FromReadQuadReaderKind::N3(p.parse_read(reader)), |
||||
RdfParserKind::NQuads(p) => FromReadQuadReaderKind::NQuads(p.parse_read(reader)), |
||||
RdfParserKind::NTriples(p) => { |
||||
FromReadQuadReaderKind::NTriples(p.parse_read(reader)) |
||||
} |
||||
RdfParserKind::RdfXml(p) => FromReadQuadReaderKind::RdfXml(p.parse_read(reader)), |
||||
RdfParserKind::TriG(p) => FromReadQuadReaderKind::TriG(p.parse_read(reader)), |
||||
RdfParserKind::Turtle(p) => FromReadQuadReaderKind::Turtle(p.parse_read(reader)), |
||||
}, |
||||
mapper: QuadMapper { |
||||
default_graph: self.default_graph.clone(), |
||||
without_named_graphs: self.without_named_graphs, |
||||
blank_node_map: self.rename_blank_nodes.then(HashMap::new), |
||||
}, |
||||
} |
||||
} |
||||
|
||||
/// Parses from a Tokio [`AsyncRead`] implementation and returns an async iterator of quads.
|
||||
///
|
||||
/// Reads are buffered.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> Result<(), oxrdfio::RdfParseError> {
|
||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
||||
///
|
||||
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
|
||||
/// let mut reader = parser.parse_tokio_async_read(file.as_bytes());
|
||||
/// if let Some(quad) = reader.next().await {
|
||||
/// assert_eq!(quad?.subject.to_string(), "<http://example.com/s>");
|
||||
/// }
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[cfg(feature = "async-tokio")] |
||||
pub fn parse_tokio_async_read<R: AsyncRead + Unpin>( |
||||
self, |
||||
reader: R, |
||||
) -> FromTokioAsyncReadQuadReader<R> { |
||||
FromTokioAsyncReadQuadReader { |
||||
parser: match self.inner { |
||||
RdfParserKind::N3(p) => { |
||||
FromTokioAsyncReadQuadReaderKind::N3(p.parse_tokio_async_read(reader)) |
||||
} |
||||
RdfParserKind::NQuads(p) => { |
||||
FromTokioAsyncReadQuadReaderKind::NQuads(p.parse_tokio_async_read(reader)) |
||||
} |
||||
RdfParserKind::NTriples(p) => { |
||||
FromTokioAsyncReadQuadReaderKind::NTriples(p.parse_tokio_async_read(reader)) |
||||
} |
||||
RdfParserKind::RdfXml(p) => { |
||||
FromTokioAsyncReadQuadReaderKind::RdfXml(p.parse_tokio_async_read(reader)) |
||||
} |
||||
RdfParserKind::TriG(p) => { |
||||
FromTokioAsyncReadQuadReaderKind::TriG(p.parse_tokio_async_read(reader)) |
||||
} |
||||
RdfParserKind::Turtle(p) => { |
||||
FromTokioAsyncReadQuadReaderKind::Turtle(p.parse_tokio_async_read(reader)) |
||||
} |
||||
}, |
||||
mapper: QuadMapper { |
||||
default_graph: self.default_graph.clone(), |
||||
without_named_graphs: self.without_named_graphs, |
||||
blank_node_map: self.rename_blank_nodes.then(HashMap::new), |
||||
}, |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<RdfFormat> for RdfParser { |
||||
fn from(format: RdfFormat) -> Self { |
||||
Self::from_format(format) |
||||
} |
||||
} |
||||
|
||||
/// Parses a RDF file from a [`Read`] implementation. Can be built using [`RdfParser::parse_read`].
|
||||
///
|
||||
/// Reads are buffered.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
||||
///
|
||||
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
|
||||
/// let quads = parser
|
||||
/// .parse_read(file.as_bytes())
|
||||
/// .collect::<Result<Vec<_>, _>>()?;
|
||||
///
|
||||
/// assert_eq!(quads.len(), 1);
|
||||
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
|
||||
/// # std::io::Result::Ok(())
|
||||
/// ```
|
||||
#[must_use] |
||||
pub struct FromReadQuadReader<R: Read> { |
||||
parser: FromReadQuadReaderKind<R>, |
||||
mapper: QuadMapper, |
||||
} |
||||
|
||||
enum FromReadQuadReaderKind<R: Read> { |
||||
N3(FromReadN3Reader<R>), |
||||
NQuads(FromReadNQuadsReader<R>), |
||||
NTriples(FromReadNTriplesReader<R>), |
||||
RdfXml(FromReadRdfXmlReader<R>), |
||||
TriG(FromReadTriGReader<R>), |
||||
Turtle(FromReadTurtleReader<R>), |
||||
} |
||||
|
||||
impl<R: Read> Iterator for FromReadQuadReader<R> { |
||||
type Item = Result<Quad, RdfParseError>; |
||||
|
||||
fn next(&mut self) -> Option<Self::Item> { |
||||
Some(match &mut self.parser { |
||||
FromReadQuadReaderKind::N3(parser) => match parser.next()? { |
||||
Ok(quad) => self.mapper.map_n3_quad(quad), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
FromReadQuadReaderKind::NQuads(parser) => match parser.next()? { |
||||
Ok(quad) => self.mapper.map_quad(quad), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
FromReadQuadReaderKind::NTriples(parser) => match parser.next()? { |
||||
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
FromReadQuadReaderKind::RdfXml(parser) => match parser.next()? { |
||||
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
FromReadQuadReaderKind::TriG(parser) => match parser.next()? { |
||||
Ok(quad) => self.mapper.map_quad(quad), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
FromReadQuadReaderKind::Turtle(parser) => match parser.next()? { |
||||
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
}) |
||||
} |
||||
} |
||||
|
||||
impl<R: Read> FromReadQuadReader<R> { |
||||
/// The list of IRI prefixes considered at the current step of the parsing.
|
||||
///
|
||||
/// This method returns (prefix name, prefix value) tuples.
|
||||
/// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
|
||||
/// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
|
||||
///
|
||||
/// An empty iterator is return if the format does not support prefixes.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// let file = br#"@base <http://example.com/> .
|
||||
/// @prefix schema: <http://schema.org/> .
|
||||
/// <foo> a schema:Person ;
|
||||
/// schema:name "Foo" ."#;
|
||||
///
|
||||
/// let mut reader = RdfParser::from_format(RdfFormat::Turtle).parse_read(file.as_slice());
|
||||
/// assert!(reader.prefixes().collect::<Vec<_>>().is_empty()); // No prefix at the beginning
|
||||
///
|
||||
/// reader.next().unwrap()?; // We read the first triple
|
||||
/// assert_eq!(
|
||||
/// reader.prefixes().collect::<Vec<_>>(),
|
||||
/// [("schema", "http://schema.org/")]
|
||||
/// ); // There are now prefixes
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub fn prefixes(&self) -> PrefixesIter<'_> { |
||||
PrefixesIter { |
||||
inner: match &self.parser { |
||||
FromReadQuadReaderKind::N3(p) => PrefixesIterKind::N3(p.prefixes()), |
||||
FromReadQuadReaderKind::TriG(p) => PrefixesIterKind::TriG(p.prefixes()), |
||||
FromReadQuadReaderKind::Turtle(p) => PrefixesIterKind::Turtle(p.prefixes()), |
||||
FromReadQuadReaderKind::NQuads(_) |
||||
| FromReadQuadReaderKind::NTriples(_) |
||||
| FromReadQuadReaderKind::RdfXml(_) => PrefixesIterKind::None, /* TODO: implement for RDF/XML */ |
||||
}, |
||||
} |
||||
} |
||||
|
||||
/// The base IRI considered at the current step of the parsing.
|
||||
///
|
||||
/// `None` is returned if no base IRI is set or the format does not support base IRIs.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// let file = br#"@base <http://example.com/> .
|
||||
/// @prefix schema: <http://schema.org/> .
|
||||
/// <foo> a schema:Person ;
|
||||
/// schema:name "Foo" ."#;
|
||||
///
|
||||
/// let mut reader = RdfParser::from_format(RdfFormat::Turtle).parse_read(file.as_slice());
|
||||
/// assert!(reader.base_iri().is_none()); // No base at the beginning because none has been given to the parser.
|
||||
///
|
||||
/// reader.next().unwrap()?; // We read the first triple
|
||||
/// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI.
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub fn base_iri(&self) -> Option<&str> { |
||||
match &self.parser { |
||||
FromReadQuadReaderKind::N3(p) => p.base_iri(), |
||||
FromReadQuadReaderKind::TriG(p) => p.base_iri(), |
||||
FromReadQuadReaderKind::Turtle(p) => p.base_iri(), |
||||
FromReadQuadReaderKind::NQuads(_) |
||||
| FromReadQuadReaderKind::NTriples(_) |
||||
| FromReadQuadReaderKind::RdfXml(_) => None, // TODO: implement for RDF/XML
|
||||
} |
||||
} |
||||
} |
||||
|
||||
/// Parses a RDF file from a Tokio [`AsyncRead`] implementation. Can be built using [`RdfParser::parse_tokio_async_read`].
|
||||
///
|
||||
/// Reads are buffered.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> Result<(), oxrdfio::RdfParseError> {
|
||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
||||
///
|
||||
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
|
||||
/// let mut reader = parser.parse_tokio_async_read(file.as_bytes());
|
||||
/// if let Some(quad) = reader.next().await {
|
||||
/// assert_eq!(quad?.subject.to_string(), "<http://example.com/s>");
|
||||
/// }
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[must_use] |
||||
#[cfg(feature = "async-tokio")] |
||||
pub struct FromTokioAsyncReadQuadReader<R: AsyncRead + Unpin> { |
||||
parser: FromTokioAsyncReadQuadReaderKind<R>, |
||||
mapper: QuadMapper, |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
enum FromTokioAsyncReadQuadReaderKind<R: AsyncRead + Unpin> { |
||||
N3(FromTokioAsyncReadN3Reader<R>), |
||||
NQuads(FromTokioAsyncReadNQuadsReader<R>), |
||||
NTriples(FromTokioAsyncReadNTriplesReader<R>), |
||||
RdfXml(FromTokioAsyncReadRdfXmlReader<R>), |
||||
TriG(FromTokioAsyncReadTriGReader<R>), |
||||
Turtle(FromTokioAsyncReadTurtleReader<R>), |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
impl<R: AsyncRead + Unpin> FromTokioAsyncReadQuadReader<R> { |
||||
pub async fn next(&mut self) -> Option<Result<Quad, RdfParseError>> { |
||||
Some(match &mut self.parser { |
||||
FromTokioAsyncReadQuadReaderKind::N3(parser) => match parser.next().await? { |
||||
Ok(quad) => self.mapper.map_n3_quad(quad), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
FromTokioAsyncReadQuadReaderKind::NQuads(parser) => match parser.next().await? { |
||||
Ok(quad) => self.mapper.map_quad(quad), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
FromTokioAsyncReadQuadReaderKind::NTriples(parser) => match parser.next().await? { |
||||
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
FromTokioAsyncReadQuadReaderKind::RdfXml(parser) => match parser.next().await? { |
||||
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
FromTokioAsyncReadQuadReaderKind::TriG(parser) => match parser.next().await? { |
||||
Ok(quad) => self.mapper.map_quad(quad), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
FromTokioAsyncReadQuadReaderKind::Turtle(parser) => match parser.next().await? { |
||||
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
}) |
||||
} |
||||
|
||||
/// The list of IRI prefixes considered at the current step of the parsing.
|
||||
///
|
||||
/// This method returns (prefix name, prefix value) tuples.
|
||||
/// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
|
||||
/// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
|
||||
///
|
||||
/// An empty iterator is return if the format does not support prefixes.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> Result<(), oxttl::TurtleParseError> {
|
||||
/// let file = br#"@base <http://example.com/> .
|
||||
/// @prefix schema: <http://schema.org/> .
|
||||
/// <foo> a schema:Person ;
|
||||
/// schema:name "Foo" ."#;
|
||||
///
|
||||
/// let mut reader = RdfParser::from_format(RdfFormat::Turtle).parse_read(file.as_slice());
|
||||
/// assert_eq!(reader.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning
|
||||
///
|
||||
/// reader.next().await.unwrap()?; // We read the first triple
|
||||
/// assert_eq!(
|
||||
/// reader.prefixes().collect::<Vec<_>>(),
|
||||
/// [("schema", "http://schema.org/")]
|
||||
/// ); // There are now prefixes
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
pub fn prefixes(&self) -> PrefixesIter<'_> { |
||||
PrefixesIter { |
||||
inner: match &self.parser { |
||||
FromTokioAsyncReadQuadReaderKind::N3(p) => PrefixesIterKind::N3(p.prefixes()), |
||||
FromTokioAsyncReadQuadReaderKind::TriG(p) => PrefixesIterKind::TriG(p.prefixes()), |
||||
FromTokioAsyncReadQuadReaderKind::Turtle(p) => { |
||||
PrefixesIterKind::Turtle(p.prefixes()) |
||||
} |
||||
FromTokioAsyncReadQuadReaderKind::NQuads(_) |
||||
| FromTokioAsyncReadQuadReaderKind::NTriples(_) |
||||
| FromTokioAsyncReadQuadReaderKind::RdfXml(_) => PrefixesIterKind::None, /* TODO: implement for RDF/XML */ |
||||
}, |
||||
} |
||||
} |
||||
|
||||
/// The base IRI considered at the current step of the parsing.
|
||||
///
|
||||
/// `None` is returned if no base IRI is set or the format does not support base IRIs.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> Result<(), oxttl::TurtleParseError> {
|
||||
/// let file = br#"@base <http://example.com/> .
|
||||
/// @prefix schema: <http://schema.org/> .
|
||||
/// <foo> a schema:Person ;
|
||||
/// schema:name "Foo" ."#;
|
||||
///
|
||||
/// let mut reader =
|
||||
/// RdfParser::from_format(RdfFormat::Turtle).parse_tokio_async_read(file.as_slice());
|
||||
/// assert!(reader.base_iri().is_none()); // No base IRI at the beginning
|
||||
///
|
||||
/// reader.next().await.unwrap()?; // We read the first triple
|
||||
/// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
pub fn base_iri(&self) -> Option<&str> { |
||||
match &self.parser { |
||||
FromTokioAsyncReadQuadReaderKind::N3(p) => p.base_iri(), |
||||
FromTokioAsyncReadQuadReaderKind::TriG(p) => p.base_iri(), |
||||
FromTokioAsyncReadQuadReaderKind::Turtle(p) => p.base_iri(), |
||||
FromTokioAsyncReadQuadReaderKind::NQuads(_) |
||||
| FromTokioAsyncReadQuadReaderKind::NTriples(_) |
||||
| FromTokioAsyncReadQuadReaderKind::RdfXml(_) => None, // TODO: implement for RDF/XML
|
||||
} |
||||
} |
||||
} |
||||
|
||||
/// Iterator on the file prefixes.
|
||||
///
|
||||
/// See [`FromReadQuadReader::prefixes`].
|
||||
pub struct PrefixesIter<'a> { |
||||
inner: PrefixesIterKind<'a>, |
||||
} |
||||
|
||||
enum PrefixesIterKind<'a> { |
||||
Turtle(TurtlePrefixesIter<'a>), |
||||
TriG(TriGPrefixesIter<'a>), |
||||
N3(N3PrefixesIter<'a>), |
||||
None, |
||||
} |
||||
|
||||
impl<'a> Iterator for PrefixesIter<'a> { |
||||
type Item = (&'a str, &'a str); |
||||
|
||||
#[inline] |
||||
fn next(&mut self) -> Option<Self::Item> { |
||||
match &mut self.inner { |
||||
PrefixesIterKind::Turtle(iter) => iter.next(), |
||||
PrefixesIterKind::TriG(iter) => iter.next(), |
||||
PrefixesIterKind::N3(iter) => iter.next(), |
||||
PrefixesIterKind::None => None, |
||||
} |
||||
} |
||||
|
||||
#[inline] |
||||
fn size_hint(&self) -> (usize, Option<usize>) { |
||||
match &self.inner { |
||||
PrefixesIterKind::Turtle(iter) => iter.size_hint(), |
||||
PrefixesIterKind::TriG(iter) => iter.size_hint(), |
||||
PrefixesIterKind::N3(iter) => iter.size_hint(), |
||||
PrefixesIterKind::None => (0, Some(0)), |
||||
} |
||||
} |
||||
} |
||||
|
||||
struct QuadMapper { |
||||
default_graph: GraphName, |
||||
without_named_graphs: bool, |
||||
blank_node_map: Option<HashMap<BlankNode, BlankNode>>, |
||||
} |
||||
|
||||
impl QuadMapper { |
||||
fn map_blank_node(&mut self, node: BlankNode) -> BlankNode { |
||||
if let Some(blank_node_map) = &mut self.blank_node_map { |
||||
blank_node_map |
||||
.entry(node) |
||||
.or_insert_with(BlankNode::default) |
||||
.clone() |
||||
} else { |
||||
node |
||||
} |
||||
} |
||||
|
||||
fn map_subject(&mut self, node: Subject) -> Subject { |
||||
match node { |
||||
Subject::NamedNode(node) => node.into(), |
||||
Subject::BlankNode(node) => self.map_blank_node(node).into(), |
||||
#[cfg(feature = "rdf-star")] |
||||
Subject::Triple(triple) => self.map_triple(*triple).into(), |
||||
} |
||||
} |
||||
|
||||
fn map_term(&mut self, node: Term) -> Term { |
||||
match node { |
||||
Term::NamedNode(node) => node.into(), |
||||
Term::BlankNode(node) => self.map_blank_node(node).into(), |
||||
Term::Literal(literal) => literal.into(), |
||||
#[cfg(feature = "rdf-star")] |
||||
Term::Triple(triple) => self.map_triple(*triple).into(), |
||||
} |
||||
} |
||||
|
||||
fn map_triple(&mut self, triple: Triple) -> Triple { |
||||
Triple { |
||||
subject: self.map_subject(triple.subject), |
||||
predicate: triple.predicate, |
||||
object: self.map_term(triple.object), |
||||
} |
||||
} |
||||
|
||||
fn map_graph_name(&mut self, graph_name: GraphName) -> Result<GraphName, RdfParseError> { |
||||
match graph_name { |
||||
GraphName::NamedNode(node) => { |
||||
if self.without_named_graphs { |
||||
Err(RdfParseError::msg("Named graphs are not allowed")) |
||||
} else { |
||||
Ok(node.into()) |
||||
} |
||||
} |
||||
GraphName::BlankNode(node) => { |
||||
if self.without_named_graphs { |
||||
Err(RdfParseError::msg("Named graphs are not allowed")) |
||||
} else { |
||||
Ok(self.map_blank_node(node).into()) |
||||
} |
||||
} |
||||
GraphName::DefaultGraph => Ok(self.default_graph.clone()), |
||||
} |
||||
} |
||||
|
||||
fn map_quad(&mut self, quad: Quad) -> Result<Quad, RdfParseError> { |
||||
Ok(Quad { |
||||
subject: self.map_subject(quad.subject), |
||||
predicate: quad.predicate, |
||||
object: self.map_term(quad.object), |
||||
graph_name: self.map_graph_name(quad.graph_name)?, |
||||
}) |
||||
} |
||||
|
||||
fn map_triple_to_quad(&mut self, triple: Triple) -> Quad { |
||||
self.map_triple(triple).in_graph(self.default_graph.clone()) |
||||
} |
||||
|
||||
fn map_n3_quad(&mut self, quad: N3Quad) -> Result<Quad, RdfParseError> { |
||||
Ok(Quad { |
||||
subject: match quad.subject { |
||||
N3Term::NamedNode(s) => Ok(s.into()), |
||||
N3Term::BlankNode(s) => Ok(self.map_blank_node(s).into()), |
||||
N3Term::Literal(_) => Err(RdfParseError::msg( |
||||
"literals are not allowed in regular RDF subjects", |
||||
)), |
||||
#[cfg(feature = "rdf-star")] |
||||
N3Term::Triple(s) => Ok(self.map_triple(*s).into()), |
||||
N3Term::Variable(_) => Err(RdfParseError::msg( |
||||
"variables are not allowed in regular RDF subjects", |
||||
)), |
||||
}?, |
||||
predicate: match quad.predicate { |
||||
N3Term::NamedNode(p) => Ok(p), |
||||
N3Term::BlankNode(_) => Err(RdfParseError::msg( |
||||
"blank nodes are not allowed in regular RDF predicates", |
||||
)), |
||||
N3Term::Literal(_) => Err(RdfParseError::msg( |
||||
"literals are not allowed in regular RDF predicates", |
||||
)), |
||||
#[cfg(feature = "rdf-star")] |
||||
N3Term::Triple(_) => Err(RdfParseError::msg( |
||||
"quoted triples are not allowed in regular RDF predicates", |
||||
)), |
||||
N3Term::Variable(_) => Err(RdfParseError::msg( |
||||
"variables are not allowed in regular RDF predicates", |
||||
)), |
||||
}?, |
||||
object: match quad.object { |
||||
N3Term::NamedNode(o) => Ok(o.into()), |
||||
N3Term::BlankNode(o) => Ok(self.map_blank_node(o).into()), |
||||
N3Term::Literal(o) => Ok(o.into()), |
||||
#[cfg(feature = "rdf-star")] |
||||
N3Term::Triple(o) => Ok(self.map_triple(*o).into()), |
||||
N3Term::Variable(_) => Err(RdfParseError::msg( |
||||
"variables are not allowed in regular RDF objects", |
||||
)), |
||||
}?, |
||||
graph_name: self.map_graph_name(quad.graph_name)?, |
||||
}) |
||||
} |
||||
} |
@ -1,410 +0,0 @@ |
||||
//! Utilities to write RDF graphs and datasets.
|
||||
|
||||
use crate::format::RdfFormat; |
||||
use oxrdf::{GraphNameRef, IriParseError, QuadRef, TripleRef}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use oxrdfxml::ToTokioAsyncWriteRdfXmlWriter; |
||||
use oxrdfxml::{RdfXmlSerializer, ToWriteRdfXmlWriter}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use oxttl::nquads::ToTokioAsyncWriteNQuadsWriter; |
||||
use oxttl::nquads::{NQuadsSerializer, ToWriteNQuadsWriter}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use oxttl::ntriples::ToTokioAsyncWriteNTriplesWriter; |
||||
use oxttl::ntriples::{NTriplesSerializer, ToWriteNTriplesWriter}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use oxttl::trig::ToTokioAsyncWriteTriGWriter; |
||||
use oxttl::trig::{ToWriteTriGWriter, TriGSerializer}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use oxttl::turtle::ToTokioAsyncWriteTurtleWriter; |
||||
use oxttl::turtle::{ToWriteTurtleWriter, TurtleSerializer}; |
||||
use std::io::{self, Write}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use tokio::io::AsyncWrite; |
||||
|
||||
/// A serializer for RDF serialization formats.
|
||||
///
|
||||
/// It currently supports the following formats:
|
||||
/// * [N3](https://w3c.github.io/N3/spec/) ([`RdfFormat::N3`])
|
||||
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`RdfFormat::NQuads`])
|
||||
/// * [canonical](https://www.w3.org/TR/n-triples/#canonical-ntriples) [N-Triples](https://www.w3.org/TR/n-triples/) ([`RdfFormat::NTriples`])
|
||||
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`RdfFormat::RdfXml`])
|
||||
/// * [TriG](https://www.w3.org/TR/trig/) ([`RdfFormat::TriG`])
|
||||
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`RdfFormat::Turtle`])
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfSerializer};
|
||||
/// use oxrdf::{Quad, NamedNode};
|
||||
///
|
||||
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(Vec::new());
|
||||
/// writer.write_quad(&Quad {
|
||||
/// subject: NamedNode::new("http://example.com/s")?.into(),
|
||||
/// predicate: NamedNode::new("http://example.com/p")?,
|
||||
/// object: NamedNode::new("http://example.com/o")?.into(),
|
||||
/// graph_name: NamedNode::new("http://example.com/g")?.into()
|
||||
/// })?;
|
||||
/// assert_eq!(writer.finish()?, b"<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[must_use] |
||||
pub struct RdfSerializer { |
||||
inner: RdfSerializerKind, |
||||
} |
||||
|
||||
enum RdfSerializerKind { |
||||
NQuads(NQuadsSerializer), |
||||
NTriples(NTriplesSerializer), |
||||
RdfXml(RdfXmlSerializer), |
||||
TriG(TriGSerializer), |
||||
Turtle(TurtleSerializer), |
||||
} |
||||
|
||||
impl RdfSerializer { |
||||
/// Builds a serializer for the given format
|
||||
#[inline] |
||||
pub fn from_format(format: RdfFormat) -> Self { |
||||
Self { |
||||
inner: match format { |
||||
RdfFormat::NQuads => RdfSerializerKind::NQuads(NQuadsSerializer::new()), |
||||
RdfFormat::NTriples => RdfSerializerKind::NTriples(NTriplesSerializer::new()), |
||||
RdfFormat::RdfXml => RdfSerializerKind::RdfXml(RdfXmlSerializer::new()), |
||||
RdfFormat::TriG => RdfSerializerKind::TriG(TriGSerializer::new()), |
||||
RdfFormat::Turtle | RdfFormat::N3 => { |
||||
RdfSerializerKind::Turtle(TurtleSerializer::new()) |
||||
} |
||||
}, |
||||
} |
||||
} |
||||
|
||||
/// The format the serializer serializes to.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfSerializer};
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// RdfSerializer::from_format(RdfFormat::Turtle).format(),
|
||||
/// RdfFormat::Turtle
|
||||
/// );
|
||||
/// ```
|
||||
pub fn format(&self) -> RdfFormat { |
||||
match &self.inner { |
||||
RdfSerializerKind::NQuads(_) => RdfFormat::NQuads, |
||||
RdfSerializerKind::NTriples(_) => RdfFormat::NTriples, |
||||
RdfSerializerKind::RdfXml(_) => RdfFormat::RdfXml, |
||||
RdfSerializerKind::TriG(_) => RdfFormat::TriG, |
||||
RdfSerializerKind::Turtle(_) => RdfFormat::Turtle, |
||||
} |
||||
} |
||||
|
||||
/// If the format supports it, sets a prefix.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::vocab::rdf;
|
||||
/// use oxrdf::{NamedNodeRef, TripleRef};
|
||||
/// use oxrdfio::{RdfFormat, RdfSerializer};
|
||||
///
|
||||
/// let mut writer = RdfSerializer::from_format(RdfFormat::Turtle)
|
||||
/// .with_prefix("schema", "http://schema.org/")?
|
||||
/// .serialize_to_write(Vec::new());
|
||||
/// writer.write_triple(TripleRef {
|
||||
/// subject: NamedNodeRef::new("http://example.com/s")?.into(),
|
||||
/// predicate: rdf::TYPE.into(),
|
||||
/// object: NamedNodeRef::new("http://schema.org/Person")?.into(),
|
||||
/// })?;
|
||||
/// assert_eq!(
|
||||
/// writer.finish()?,
|
||||
/// b"@prefix schema: <http://schema.org/> .\n<http://example.com/s> a schema:Person .\n"
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn with_prefix( |
||||
mut self, |
||||
prefix_name: impl Into<String>, |
||||
prefix_iri: impl Into<String>, |
||||
) -> Result<Self, IriParseError> { |
||||
self.inner = match self.inner { |
||||
RdfSerializerKind::NQuads(s) => RdfSerializerKind::NQuads(s), |
||||
RdfSerializerKind::NTriples(s) => RdfSerializerKind::NTriples(s), |
||||
RdfSerializerKind::RdfXml(s) => { |
||||
RdfSerializerKind::RdfXml(s.with_prefix(prefix_name, prefix_iri)?) |
||||
} |
||||
RdfSerializerKind::TriG(s) => { |
||||
RdfSerializerKind::TriG(s.with_prefix(prefix_name, prefix_iri)?) |
||||
} |
||||
RdfSerializerKind::Turtle(s) => { |
||||
RdfSerializerKind::Turtle(s.with_prefix(prefix_name, prefix_iri)?) |
||||
} |
||||
}; |
||||
Ok(self) |
||||
} |
||||
|
||||
/// Writes to a [`Write`] implementation.
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// Do not forget to run the [`finish`](ToWriteQuadWriter::finish()) method to properly write the last bytes of the file.</div>
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div>
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfSerializer};
|
||||
/// use oxrdf::{Quad, NamedNode};
|
||||
///
|
||||
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(Vec::new());
|
||||
/// writer.write_quad(&Quad {
|
||||
/// subject: NamedNode::new("http://example.com/s")?.into(),
|
||||
/// predicate: NamedNode::new("http://example.com/p")?,
|
||||
/// object: NamedNode::new("http://example.com/o")?.into(),
|
||||
/// graph_name: NamedNode::new("http://example.com/g")?.into()
|
||||
/// })?;
|
||||
/// assert_eq!(writer.finish()?, b"<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub fn serialize_to_write<W: Write>(self, write: W) -> ToWriteQuadWriter<W> { |
||||
ToWriteQuadWriter { |
||||
formatter: match self.inner { |
||||
RdfSerializerKind::NQuads(s) => { |
||||
ToWriteQuadWriterKind::NQuads(s.serialize_to_write(write)) |
||||
} |
||||
RdfSerializerKind::NTriples(s) => { |
||||
ToWriteQuadWriterKind::NTriples(s.serialize_to_write(write)) |
||||
} |
||||
RdfSerializerKind::RdfXml(s) => { |
||||
ToWriteQuadWriterKind::RdfXml(s.serialize_to_write(write)) |
||||
} |
||||
RdfSerializerKind::TriG(s) => { |
||||
ToWriteQuadWriterKind::TriG(s.serialize_to_write(write)) |
||||
} |
||||
RdfSerializerKind::Turtle(s) => { |
||||
ToWriteQuadWriterKind::Turtle(s.serialize_to_write(write)) |
||||
} |
||||
}, |
||||
} |
||||
} |
||||
|
||||
/// Writes to a Tokio [`AsyncWrite`] implementation.
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// Do not forget to run the [`finish`](ToTokioAsyncWriteQuadWriter::finish()) method to properly write the last bytes of the file.</div>
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// This writer does unbuffered writes. You might want to use [`BufWriter`](tokio::io::BufWriter) to avoid that.</div>
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfSerializer};
|
||||
/// use oxrdf::{Quad, NamedNode};
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> std::io::Result<()> {
|
||||
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_tokio_async_write(Vec::new());
|
||||
/// writer.write_quad(&Quad {
|
||||
/// subject: NamedNode::new_unchecked("http://example.com/s").into(),
|
||||
/// predicate: NamedNode::new_unchecked("http://example.com/p"),
|
||||
/// object: NamedNode::new_unchecked("http://example.com/o").into(),
|
||||
/// graph_name: NamedNode::new_unchecked("http://example.com/g").into()
|
||||
/// }).await?;
|
||||
/// assert_eq!(writer.finish().await?, "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[cfg(feature = "async-tokio")] |
||||
pub fn serialize_to_tokio_async_write<W: AsyncWrite + Unpin>( |
||||
self, |
||||
write: W, |
||||
) -> ToTokioAsyncWriteQuadWriter<W> { |
||||
ToTokioAsyncWriteQuadWriter { |
||||
formatter: match self.inner { |
||||
RdfSerializerKind::NQuads(s) => { |
||||
ToTokioAsyncWriteQuadWriterKind::NQuads(s.serialize_to_tokio_async_write(write)) |
||||
} |
||||
RdfSerializerKind::NTriples(s) => ToTokioAsyncWriteQuadWriterKind::NTriples( |
||||
s.serialize_to_tokio_async_write(write), |
||||
), |
||||
RdfSerializerKind::RdfXml(s) => { |
||||
ToTokioAsyncWriteQuadWriterKind::RdfXml(s.serialize_to_tokio_async_write(write)) |
||||
} |
||||
RdfSerializerKind::TriG(s) => { |
||||
ToTokioAsyncWriteQuadWriterKind::TriG(s.serialize_to_tokio_async_write(write)) |
||||
} |
||||
RdfSerializerKind::Turtle(s) => { |
||||
ToTokioAsyncWriteQuadWriterKind::Turtle(s.serialize_to_tokio_async_write(write)) |
||||
} |
||||
}, |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<RdfFormat> for RdfSerializer { |
||||
fn from(format: RdfFormat) -> Self { |
||||
Self::from_format(format) |
||||
} |
||||
} |
||||
|
||||
/// Writes quads or triples to a [`Write`] implementation.
|
||||
///
|
||||
/// Can be built using [`RdfSerializer::serialize_to_write`].
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// Do not forget to run the [`finish`](ToWriteQuadWriter::finish()) method to properly write the last bytes of the file.</div>
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div>
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfSerializer};
|
||||
/// use oxrdf::{Quad, NamedNode};
|
||||
///
|
||||
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(Vec::new());
|
||||
/// writer.write_quad(&Quad {
|
||||
/// subject: NamedNode::new("http://example.com/s")?.into(),
|
||||
/// predicate: NamedNode::new("http://example.com/p")?,
|
||||
/// object: NamedNode::new("http://example.com/o")?.into(),
|
||||
/// graph_name: NamedNode::new("http://example.com/g")?.into(),
|
||||
/// })?;
|
||||
/// assert_eq!(writer.finish()?, b"<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[must_use] |
||||
pub struct ToWriteQuadWriter<W: Write> { |
||||
formatter: ToWriteQuadWriterKind<W>, |
||||
} |
||||
|
||||
enum ToWriteQuadWriterKind<W: Write> { |
||||
NQuads(ToWriteNQuadsWriter<W>), |
||||
NTriples(ToWriteNTriplesWriter<W>), |
||||
RdfXml(ToWriteRdfXmlWriter<W>), |
||||
TriG(ToWriteTriGWriter<W>), |
||||
Turtle(ToWriteTurtleWriter<W>), |
||||
} |
||||
|
||||
impl<W: Write> ToWriteQuadWriter<W> { |
||||
/// Writes a [`QuadRef`]
|
||||
pub fn write_quad<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> io::Result<()> { |
||||
match &mut self.formatter { |
||||
ToWriteQuadWriterKind::NQuads(writer) => writer.write_quad(quad), |
||||
ToWriteQuadWriterKind::NTriples(writer) => writer.write_triple(to_triple(quad)?), |
||||
ToWriteQuadWriterKind::RdfXml(writer) => writer.write_triple(to_triple(quad)?), |
||||
ToWriteQuadWriterKind::TriG(writer) => writer.write_quad(quad), |
||||
ToWriteQuadWriterKind::Turtle(writer) => writer.write_triple(to_triple(quad)?), |
||||
} |
||||
} |
||||
|
||||
/// Writes a [`TripleRef`]
|
||||
pub fn write_triple<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> io::Result<()> { |
||||
self.write_quad(triple.into().in_graph(GraphNameRef::DefaultGraph)) |
||||
} |
||||
|
||||
/// Writes the last bytes of the file
|
||||
///
|
||||
/// Note that this function does not flush the writer. You need to do that if you are using a [`BufWriter`](io::BufWriter).
|
||||
pub fn finish(self) -> io::Result<W> { |
||||
Ok(match self.formatter { |
||||
ToWriteQuadWriterKind::NQuads(writer) => writer.finish(), |
||||
ToWriteQuadWriterKind::NTriples(writer) => writer.finish(), |
||||
ToWriteQuadWriterKind::RdfXml(writer) => writer.finish()?, |
||||
ToWriteQuadWriterKind::TriG(writer) => writer.finish()?, |
||||
ToWriteQuadWriterKind::Turtle(writer) => writer.finish()?, |
||||
}) |
||||
} |
||||
} |
||||
|
||||
/// Writes quads or triples to a [`Write`] implementation.
|
||||
///
|
||||
/// Can be built using [`RdfSerializer::serialize_to_write`].
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// Do not forget to run the [`finish`](ToWriteQuadWriter::finish()) method to properly write the last bytes of the file.</div>
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div>
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfSerializer};
|
||||
/// use oxrdf::{Quad, NamedNode};
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> std::io::Result<()> {
|
||||
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_tokio_async_write(Vec::new());
|
||||
/// writer.write_quad(&Quad {
|
||||
/// subject: NamedNode::new_unchecked("http://example.com/s").into(),
|
||||
/// predicate: NamedNode::new_unchecked("http://example.com/p"),
|
||||
/// object: NamedNode::new_unchecked("http://example.com/o").into(),
|
||||
/// graph_name: NamedNode::new_unchecked("http://example.com/g").into()
|
||||
/// }).await?;
|
||||
/// assert_eq!(writer.finish().await?, "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[must_use] |
||||
#[cfg(feature = "async-tokio")] |
||||
pub struct ToTokioAsyncWriteQuadWriter<W: AsyncWrite + Unpin> { |
||||
formatter: ToTokioAsyncWriteQuadWriterKind<W>, |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
enum ToTokioAsyncWriteQuadWriterKind<W: AsyncWrite + Unpin> { |
||||
NQuads(ToTokioAsyncWriteNQuadsWriter<W>), |
||||
NTriples(ToTokioAsyncWriteNTriplesWriter<W>), |
||||
RdfXml(ToTokioAsyncWriteRdfXmlWriter<W>), |
||||
TriG(ToTokioAsyncWriteTriGWriter<W>), |
||||
Turtle(ToTokioAsyncWriteTurtleWriter<W>), |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteQuadWriter<W> { |
||||
/// Writes a [`QuadRef`]
|
||||
pub async fn write_quad<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> io::Result<()> { |
||||
match &mut self.formatter { |
||||
ToTokioAsyncWriteQuadWriterKind::NQuads(writer) => writer.write_quad(quad).await, |
||||
ToTokioAsyncWriteQuadWriterKind::NTriples(writer) => { |
||||
writer.write_triple(to_triple(quad)?).await |
||||
} |
||||
ToTokioAsyncWriteQuadWriterKind::RdfXml(writer) => { |
||||
writer.write_triple(to_triple(quad)?).await |
||||
} |
||||
ToTokioAsyncWriteQuadWriterKind::TriG(writer) => writer.write_quad(quad).await, |
||||
ToTokioAsyncWriteQuadWriterKind::Turtle(writer) => { |
||||
writer.write_triple(to_triple(quad)?).await |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// Writes a [`TripleRef`]
|
||||
pub async fn write_triple<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> io::Result<()> { |
||||
self.write_quad(triple.into().in_graph(GraphNameRef::DefaultGraph)) |
||||
.await |
||||
} |
||||
|
||||
/// Writes the last bytes of the file
|
||||
///
|
||||
/// Note that this function does not flush the writer. You need to do that if you are using a [`BufWriter`](io::BufWriter).
|
||||
pub async fn finish(self) -> io::Result<W> { |
||||
Ok(match self.formatter { |
||||
ToTokioAsyncWriteQuadWriterKind::NQuads(writer) => writer.finish(), |
||||
ToTokioAsyncWriteQuadWriterKind::NTriples(writer) => writer.finish(), |
||||
ToTokioAsyncWriteQuadWriterKind::RdfXml(writer) => writer.finish().await?, |
||||
ToTokioAsyncWriteQuadWriterKind::TriG(writer) => writer.finish().await?, |
||||
ToTokioAsyncWriteQuadWriterKind::Turtle(writer) => writer.finish().await?, |
||||
}) |
||||
} |
||||
} |
||||
|
||||
fn to_triple<'a>(quad: impl Into<QuadRef<'a>>) -> io::Result<TripleRef<'a>> { |
||||
let quad = quad.into(); |
||||
if quad.graph_name.is_default_graph() { |
||||
Ok(quad.into()) |
||||
} else { |
||||
Err(io::Error::new( |
||||
io::ErrorKind::InvalidInput, |
||||
"Only quads in the default graph can be serialized to a RDF graph format", |
||||
)) |
||||
} |
||||
} |
@ -1,36 +0,0 @@ |
||||
[package] |
||||
name = "oxrdfxml" |
||||
version = "0.1.0-alpha.5" |
||||
authors.workspace = true |
||||
license.workspace = true |
||||
readme = "README.md" |
||||
keywords = ["RDFXML", "XML", "RDF"] |
||||
repository = "https://github.com/oxigraph/oxigraph/tree/master/lib/oxrdfxml" |
||||
description = """ |
||||
Parser and serializer for the RDF/XML format |
||||
""" |
||||
documentation = "https://docs.rs/oxrdfxml" |
||||
edition.workspace = true |
||||
rust-version.workspace = true |
||||
|
||||
[features] |
||||
default = [] |
||||
async-tokio = ["dep:tokio", "quick-xml/async-tokio"] |
||||
|
||||
[dependencies] |
||||
oxilangtag.workspace = true |
||||
oxiri.workspace = true |
||||
oxrdf.workspace = true |
||||
quick-xml.workspace = true |
||||
thiserror.workspace = true |
||||
tokio = { workspace = true, optional = true, features = ["io-util"] } |
||||
|
||||
[dev-dependencies] |
||||
tokio = { workspace = true, features = ["rt", "macros"] } |
||||
|
||||
[lints] |
||||
workspace = true |
||||
|
||||
[package.metadata.docs.rs] |
||||
all-features = true |
||||
rustdoc-args = ["--cfg", "docsrs"] |
@ -1,56 +0,0 @@ |
||||
OxRDF/XML |
||||
========= |
||||
|
||||
[![Latest Version](https://img.shields.io/crates/v/oxrdfxml.svg)](https://crates.io/crates/oxrdfxml) |
||||
[![Released API docs](https://docs.rs/oxrdfxml/badge.svg)](https://docs.rs/oxrdfxml) |
||||
[![Crates.io downloads](https://img.shields.io/crates/d/oxrdfxml)](https://crates.io/crates/oxrdfxml) |
||||
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) |
||||
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community) |
||||
|
||||
OxRdfXml is a parser and serializer for [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/). |
||||
|
||||
The entry points of this library are the two [`RdfXmlParser`] and [`RdfXmlSerializer`] structs. |
||||
|
||||
Usage example counting the number of people in a RDF/XML file: |
||||
|
||||
```rust |
||||
use oxrdf::{NamedNodeRef, vocab::rdf}; |
||||
use oxrdfxml::RdfXmlParser; |
||||
|
||||
fn main() { |
||||
let file = br#"<?xml version="1.0"?> |
||||
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:schema="http://schema.org/"> |
||||
<rdf:Description rdf:about="http://example.com/foo"> |
||||
<rdf:type rdf:resource="http://schema.org/Person" /> |
||||
<schema:name>Foo</schema:name> |
||||
</rdf:Description> |
||||
<schema:Person rdf:about="http://example.com/bar" schema:name="Bar" /> |
||||
</rdf:RDF>"#; |
||||
|
||||
let schema_person = NamedNodeRef::new("http://schema.org/Person").unwrap(); |
||||
let mut count = 0; |
||||
for triple in RdfXmlParser::new().parse_read(file.as_ref()) { |
||||
let triple = triple.unwrap(); |
||||
if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { |
||||
count += 1; |
||||
} |
||||
} |
||||
assert_eq!(2, count); |
||||
} |
||||
``` |
||||
|
||||
## License |
||||
|
||||
This project is licensed under either of |
||||
|
||||
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or |
||||
`<http://www.apache.org/licenses/LICENSE-2.0>`) |
||||
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or |
||||
`<http://opensource.org/licenses/MIT>`) |
||||
|
||||
at your option. |
||||
|
||||
|
||||
### Contribution |
||||
|
||||
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. |
@ -1,89 +0,0 @@ |
||||
use oxilangtag::LanguageTagParseError; |
||||
use oxiri::IriParseError; |
||||
use std::io; |
||||
use std::sync::Arc; |
||||
|
||||
/// Error returned during RDF/XML parsing.
|
||||
#[derive(Debug, thiserror::Error)] |
||||
pub enum RdfXmlParseError { |
||||
/// I/O error during parsing (file not found...).
|
||||
#[error(transparent)] |
||||
Io(#[from] io::Error), |
||||
/// An error in the file syntax.
|
||||
#[error(transparent)] |
||||
Syntax(#[from] RdfXmlSyntaxError), |
||||
} |
||||
|
||||
impl From<RdfXmlParseError> for io::Error { |
||||
#[inline] |
||||
fn from(error: RdfXmlParseError) -> Self { |
||||
match error { |
||||
RdfXmlParseError::Io(error) => error, |
||||
RdfXmlParseError::Syntax(error) => error.into(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<quick_xml::Error> for RdfXmlParseError { |
||||
#[inline] |
||||
fn from(error: quick_xml::Error) -> Self { |
||||
match error { |
||||
quick_xml::Error::Io(error) => { |
||||
Self::Io(Arc::try_unwrap(error).unwrap_or_else(|e| io::Error::new(e.kind(), e))) |
||||
} |
||||
_ => Self::Syntax(RdfXmlSyntaxError(SyntaxErrorKind::Xml(error))), |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// An error in the syntax of the parsed file.
|
||||
#[derive(Debug, thiserror::Error)] |
||||
#[error(transparent)] |
||||
pub struct RdfXmlSyntaxError(#[from] pub(crate) SyntaxErrorKind); |
||||
|
||||
#[derive(Debug, thiserror::Error)] |
||||
pub enum SyntaxErrorKind { |
||||
#[error(transparent)] |
||||
Xml(#[from] quick_xml::Error), |
||||
#[error("error while parsing IRI '{iri}': {error}")] |
||||
InvalidIri { |
||||
iri: String, |
||||
#[source] |
||||
error: IriParseError, |
||||
}, |
||||
#[error("error while parsing language tag '{tag}': {error}")] |
||||
InvalidLanguageTag { |
||||
tag: String, |
||||
#[source] |
||||
error: LanguageTagParseError, |
||||
}, |
||||
#[error("{0}")] |
||||
Msg(String), |
||||
} |
||||
|
||||
impl RdfXmlSyntaxError { |
||||
/// Builds an error from a printable error message.
|
||||
#[inline] |
||||
pub(crate) fn msg(msg: impl Into<String>) -> Self { |
||||
Self(SyntaxErrorKind::Msg(msg.into())) |
||||
} |
||||
} |
||||
|
||||
impl From<RdfXmlSyntaxError> for io::Error { |
||||
#[inline] |
||||
fn from(error: RdfXmlSyntaxError) -> Self { |
||||
match error.0 { |
||||
SyntaxErrorKind::Xml(error) => match error { |
||||
quick_xml::Error::Io(error) => { |
||||
Arc::try_unwrap(error).unwrap_or_else(|e| Self::new(e.kind(), e)) |
||||
} |
||||
quick_xml::Error::UnexpectedEof(error) => { |
||||
Self::new(io::ErrorKind::UnexpectedEof, error) |
||||
} |
||||
_ => Self::new(io::ErrorKind::InvalidData, error), |
||||
}, |
||||
SyntaxErrorKind::Msg(msg) => Self::new(io::ErrorKind::InvalidData, msg), |
||||
_ => Self::new(io::ErrorKind::InvalidData, error), |
||||
} |
||||
} |
||||
} |
@ -1,18 +0,0 @@ |
||||
#![doc = include_str!("../README.md")] |
||||
#![doc(test(attr(deny(warnings))))] |
||||
#![cfg_attr(docsrs, feature(doc_auto_cfg))] |
||||
#![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] |
||||
#![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] |
||||
|
||||
mod error; |
||||
mod parser; |
||||
mod serializer; |
||||
mod utils; |
||||
|
||||
pub use error::{RdfXmlParseError, RdfXmlSyntaxError}; |
||||
#[cfg(feature = "async-tokio")] |
||||
pub use parser::FromTokioAsyncReadRdfXmlReader; |
||||
pub use parser::{FromReadRdfXmlReader, RdfXmlParser}; |
||||
#[cfg(feature = "async-tokio")] |
||||
pub use serializer::ToTokioAsyncWriteRdfXmlWriter; |
||||
pub use serializer::{RdfXmlSerializer, ToWriteRdfXmlWriter}; |
File diff suppressed because it is too large
Load Diff
@ -1,461 +0,0 @@ |
||||
use crate::utils::*; |
||||
use oxiri::{Iri, IriParseError}; |
||||
use oxrdf::vocab::rdf; |
||||
use oxrdf::{NamedNodeRef, Subject, SubjectRef, TermRef, TripleRef}; |
||||
use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event}; |
||||
use quick_xml::Writer; |
||||
use std::borrow::Cow; |
||||
use std::collections::BTreeMap; |
||||
use std::io; |
||||
use std::io::Write; |
||||
use std::sync::Arc; |
||||
#[cfg(feature = "async-tokio")] |
||||
use tokio::io::AsyncWrite; |
||||
|
||||
/// A [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) serializer.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{LiteralRef, NamedNodeRef, TripleRef};
|
||||
/// use oxrdfxml::RdfXmlSerializer;
|
||||
///
|
||||
/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_write(Vec::new());
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// ))?;
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://schema.org/name")?,
|
||||
/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"),
|
||||
/// ))?;
|
||||
/// assert_eq!(
|
||||
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>",
|
||||
/// writer.finish()?.as_slice()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[derive(Default)] |
||||
#[must_use] |
||||
pub struct RdfXmlSerializer { |
||||
prefixes: BTreeMap<String, String>, |
||||
} |
||||
|
||||
impl RdfXmlSerializer { |
||||
/// Builds a new [`RdfXmlSerializer`].
|
||||
#[inline] |
||||
pub fn new() -> Self { |
||||
Self { |
||||
prefixes: BTreeMap::new(), |
||||
} |
||||
} |
||||
|
||||
#[inline] |
||||
pub fn with_prefix( |
||||
mut self, |
||||
prefix_name: impl Into<String>, |
||||
prefix_iri: impl Into<String>, |
||||
) -> Result<Self, IriParseError> { |
||||
self.prefixes.insert( |
||||
Iri::parse(prefix_iri.into())?.into_inner(), |
||||
prefix_name.into(), |
||||
); |
||||
Ok(self) |
||||
} |
||||
|
||||
/// Writes a RDF/XML file to a [`Write`] implementation.
|
||||
///
|
||||
/// This writer does unbuffered writes.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{LiteralRef, NamedNodeRef, TripleRef};
|
||||
/// use oxrdfxml::RdfXmlSerializer;
|
||||
///
|
||||
/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_write(Vec::new());
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// ))?;
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://schema.org/name")?,
|
||||
/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"),
|
||||
/// ))?;
|
||||
/// assert_eq!(
|
||||
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>",
|
||||
/// writer.finish()?.as_slice()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[allow(clippy::unused_self)] |
||||
pub fn serialize_to_write<W: Write>(self, write: W) -> ToWriteRdfXmlWriter<W> { |
||||
ToWriteRdfXmlWriter { |
||||
writer: Writer::new_with_indent(write, b'\t', 1), |
||||
inner: self.inner_writer(), |
||||
} |
||||
} |
||||
|
||||
/// Writes a RDF/XML file to a [`AsyncWrite`] implementation.
|
||||
///
|
||||
/// This writer does unbuffered writes.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, TripleRef, LiteralRef};
|
||||
/// use oxrdfxml::RdfXmlSerializer;
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_tokio_async_write(Vec::new());
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// )).await?;
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://schema.org/name")?,
|
||||
/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"),
|
||||
/// )).await?;
|
||||
/// assert_eq!(
|
||||
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>",
|
||||
/// writer.finish().await?.as_slice()
|
||||
/// );
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[allow(clippy::unused_self)] |
||||
#[cfg(feature = "async-tokio")] |
||||
pub fn serialize_to_tokio_async_write<W: AsyncWrite + Unpin>( |
||||
self, |
||||
write: W, |
||||
) -> ToTokioAsyncWriteRdfXmlWriter<W> { |
||||
ToTokioAsyncWriteRdfXmlWriter { |
||||
writer: Writer::new_with_indent(write, b'\t', 1), |
||||
inner: self.inner_writer(), |
||||
} |
||||
} |
||||
|
||||
fn inner_writer(mut self) -> InnerRdfXmlWriter { |
||||
self.prefixes.insert( |
||||
"http://www.w3.org/1999/02/22-rdf-syntax-ns#".into(), |
||||
"rdf".into(), |
||||
); |
||||
InnerRdfXmlWriter { |
||||
current_subject: None, |
||||
current_resource_tag: None, |
||||
prefixes: self.prefixes, |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// Writes a RDF/XML file to a [`Write`] implementation. Can be built using [`RdfXmlSerializer::serialize_to_write`].
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{LiteralRef, NamedNodeRef, TripleRef};
|
||||
/// use oxrdfxml::RdfXmlSerializer;
|
||||
///
|
||||
/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_write(Vec::new());
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// ))?;
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://schema.org/name")?,
|
||||
/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"),
|
||||
/// ))?;
|
||||
/// assert_eq!(
|
||||
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>",
|
||||
/// writer.finish()?.as_slice()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[must_use] |
||||
pub struct ToWriteRdfXmlWriter<W: Write> { |
||||
writer: Writer<W>, |
||||
inner: InnerRdfXmlWriter, |
||||
} |
||||
|
||||
impl<W: Write> ToWriteRdfXmlWriter<W> { |
||||
/// Writes an extra triple.
|
||||
#[allow(clippy::match_wildcard_for_single_variants, unreachable_patterns)] |
||||
pub fn write_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> { |
||||
let mut buffer = Vec::new(); |
||||
self.inner.write_triple(t, &mut buffer)?; |
||||
self.flush_buffer(&mut buffer) |
||||
} |
||||
|
||||
/// Ends the write process and returns the underlying [`Write`].
|
||||
pub fn finish(mut self) -> io::Result<W> { |
||||
let mut buffer = Vec::new(); |
||||
self.inner.finish(&mut buffer); |
||||
self.flush_buffer(&mut buffer)?; |
||||
Ok(self.writer.into_inner()) |
||||
} |
||||
|
||||
fn flush_buffer(&mut self, buffer: &mut Vec<Event<'_>>) -> io::Result<()> { |
||||
for event in buffer.drain(0..) { |
||||
self.writer.write_event(event).map_err(map_err)?; |
||||
} |
||||
Ok(()) |
||||
} |
||||
} |
||||
|
||||
/// Writes a RDF/XML file to a [`AsyncWrite`] implementation. Can be built using [`RdfXmlSerializer::serialize_to_tokio_async_write`].
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, TripleRef, LiteralRef};
|
||||
/// use oxrdfxml::RdfXmlSerializer;
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_tokio_async_write(Vec::new());
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// )).await?;
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://schema.org/name")?,
|
||||
/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"),
|
||||
/// )).await?;
|
||||
/// assert_eq!(
|
||||
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>",
|
||||
/// writer.finish().await?.as_slice()
|
||||
/// );
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[cfg(feature = "async-tokio")] |
||||
#[must_use] |
||||
pub struct ToTokioAsyncWriteRdfXmlWriter<W: AsyncWrite + Unpin> { |
||||
writer: Writer<W>, |
||||
inner: InnerRdfXmlWriter, |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteRdfXmlWriter<W> { |
||||
/// Writes an extra triple.
|
||||
#[allow(clippy::match_wildcard_for_single_variants, unreachable_patterns)] |
||||
pub async fn write_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> { |
||||
let mut buffer = Vec::new(); |
||||
self.inner.write_triple(t, &mut buffer)?; |
||||
self.flush_buffer(&mut buffer).await |
||||
} |
||||
|
||||
/// Ends the write process and returns the underlying [`Write`].
|
||||
pub async fn finish(mut self) -> io::Result<W> { |
||||
let mut buffer = Vec::new(); |
||||
self.inner.finish(&mut buffer); |
||||
self.flush_buffer(&mut buffer).await?; |
||||
Ok(self.writer.into_inner()) |
||||
} |
||||
|
||||
async fn flush_buffer(&mut self, buffer: &mut Vec<Event<'_>>) -> io::Result<()> { |
||||
for event in buffer.drain(0..) { |
||||
self.writer |
||||
.write_event_async(event) |
||||
.await |
||||
.map_err(map_err)?; |
||||
} |
||||
Ok(()) |
||||
} |
||||
} |
||||
|
||||
pub struct InnerRdfXmlWriter { |
||||
current_subject: Option<Subject>, |
||||
current_resource_tag: Option<String>, |
||||
prefixes: BTreeMap<String, String>, |
||||
} |
||||
|
||||
impl InnerRdfXmlWriter { |
||||
#[allow(clippy::match_wildcard_for_single_variants, unreachable_patterns)] |
||||
fn write_triple<'a>( |
||||
&mut self, |
||||
t: impl Into<TripleRef<'a>>, |
||||
output: &mut Vec<Event<'a>>, |
||||
) -> io::Result<()> { |
||||
if self.current_subject.is_none() { |
||||
self.write_start(output); |
||||
} |
||||
|
||||
let triple = t.into(); |
||||
// We open a new rdf:Description if useful
|
||||
if self.current_subject.as_ref().map(Subject::as_ref) != Some(triple.subject) { |
||||
if self.current_subject.is_some() { |
||||
output.push(Event::End( |
||||
self.current_resource_tag |
||||
.take() |
||||
.map_or_else(|| BytesEnd::new("rdf:Description"), BytesEnd::new), |
||||
)); |
||||
} |
||||
self.current_subject = Some(triple.subject.into_owned()); |
||||
|
||||
let (mut description_open, with_type_tag) = if triple.predicate == rdf::TYPE { |
||||
if let TermRef::NamedNode(t) = triple.object { |
||||
let (prop_qname, prop_xmlns) = self.uri_to_qname_and_xmlns(t); |
||||
let mut description_open = BytesStart::new(prop_qname.clone()); |
||||
if let Some(prop_xmlns) = prop_xmlns { |
||||
description_open.push_attribute(prop_xmlns); |
||||
} |
||||
self.current_resource_tag = Some(prop_qname.into_owned()); |
||||
(description_open, true) |
||||
} else { |
||||
(BytesStart::new("rdf:Description"), false) |
||||
} |
||||
} else { |
||||
(BytesStart::new("rdf:Description"), false) |
||||
}; |
||||
match triple.subject { |
||||
SubjectRef::NamedNode(node) => { |
||||
description_open.push_attribute(("rdf:about", node.as_str())) |
||||
} |
||||
SubjectRef::BlankNode(node) => { |
||||
description_open.push_attribute(("rdf:nodeID", node.as_str())) |
||||
} |
||||
_ => { |
||||
return Err(io::Error::new( |
||||
io::ErrorKind::InvalidInput, |
||||
"RDF/XML only supports named or blank subject", |
||||
)) |
||||
} |
||||
} |
||||
output.push(Event::Start(description_open)); |
||||
if with_type_tag { |
||||
return Ok(()); // No need for a value
|
||||
} |
||||
} |
||||
|
||||
let (prop_qname, prop_xmlns) = self.uri_to_qname_and_xmlns(triple.predicate); |
||||
let mut property_open = BytesStart::new(prop_qname.clone()); |
||||
if let Some(prop_xmlns) = prop_xmlns { |
||||
property_open.push_attribute(prop_xmlns); |
||||
} |
||||
let content = match triple.object { |
||||
TermRef::NamedNode(node) => { |
||||
property_open.push_attribute(("rdf:resource", node.as_str())); |
||||
None |
||||
} |
||||
TermRef::BlankNode(node) => { |
||||
property_open.push_attribute(("rdf:nodeID", node.as_str())); |
||||
None |
||||
} |
||||
TermRef::Literal(literal) => { |
||||
if let Some(language) = literal.language() { |
||||
property_open.push_attribute(("xml:lang", language)); |
||||
} else if !literal.is_plain() { |
||||
property_open.push_attribute(("rdf:datatype", literal.datatype().as_str())); |
||||
} |
||||
Some(literal.value()) |
||||
} |
||||
_ => { |
||||
return Err(io::Error::new( |
||||
io::ErrorKind::InvalidInput, |
||||
"RDF/XML only supports named, blank or literal object", |
||||
)) |
||||
} |
||||
}; |
||||
if let Some(content) = content { |
||||
output.push(Event::Start(property_open)); |
||||
output.push(Event::Text(BytesText::new(content))); |
||||
output.push(Event::End(BytesEnd::new(prop_qname))); |
||||
} else { |
||||
output.push(Event::Empty(property_open)); |
||||
} |
||||
Ok(()) |
||||
} |
||||
|
||||
fn write_start(&self, output: &mut Vec<Event<'_>>) { |
||||
output.push(Event::Decl(BytesDecl::new("1.0", Some("UTF-8"), None))); |
||||
let mut rdf_open = BytesStart::new("rdf:RDF"); |
||||
for (prefix_value, prefix_name) in &self.prefixes { |
||||
rdf_open.push_attribute(( |
||||
format!("xmlns:{prefix_name}").as_str(), |
||||
prefix_value.as_str(), |
||||
)); |
||||
} |
||||
output.push(Event::Start(rdf_open)) |
||||
} |
||||
|
||||
fn finish(&mut self, output: &mut Vec<Event<'static>>) { |
||||
if self.current_subject.is_some() { |
||||
output.push(Event::End( |
||||
self.current_resource_tag |
||||
.take() |
||||
.map_or_else(|| BytesEnd::new("rdf:Description"), BytesEnd::new), |
||||
)); |
||||
} else { |
||||
self.write_start(output); |
||||
} |
||||
output.push(Event::End(BytesEnd::new("rdf:RDF"))); |
||||
} |
||||
|
||||
fn uri_to_qname_and_xmlns<'a>( |
||||
&self, |
||||
uri: NamedNodeRef<'a>, |
||||
) -> (Cow<'a, str>, Option<(&'a str, &'a str)>) { |
||||
let (prop_prefix, prop_value) = split_iri(uri.as_str()); |
||||
if let Some(prop_prefix) = self.prefixes.get(prop_prefix) { |
||||
( |
||||
if prop_prefix.is_empty() { |
||||
Cow::Borrowed(prop_value) |
||||
} else { |
||||
Cow::Owned(format!("{prop_prefix}:{prop_value}")) |
||||
}, |
||||
None, |
||||
) |
||||
} else if prop_prefix == "http://www.w3.org/2000/xmlns/" { |
||||
(Cow::Owned(format!("xmlns:{prop_value}")), None) |
||||
} else if prop_value.is_empty() { |
||||
(Cow::Borrowed("p:"), Some(("xmlns:p", prop_prefix))) |
||||
} else { |
||||
(Cow::Borrowed(prop_value), Some(("xmlns", prop_prefix))) |
||||
} |
||||
} |
||||
} |
||||
|
||||
fn map_err(error: quick_xml::Error) -> io::Error { |
||||
if let quick_xml::Error::Io(error) = error { |
||||
Arc::try_unwrap(error).unwrap_or_else(|error| io::Error::new(error.kind(), error)) |
||||
} else { |
||||
io::Error::new(io::ErrorKind::Other, error) |
||||
} |
||||
} |
||||
|
||||
fn split_iri(iri: &str) -> (&str, &str) { |
||||
if let Some(position_base) = iri.rfind(|c| !is_name_char(c) || c == ':') { |
||||
if let Some(position_add) = iri[position_base..].find(|c| is_name_start_char(c) && c != ':') |
||||
{ |
||||
( |
||||
&iri[..position_base + position_add], |
||||
&iri[position_base + position_add..], |
||||
) |
||||
} else { |
||||
(iri, "") |
||||
} |
||||
} else { |
||||
(iri, "") |
||||
} |
||||
} |
||||
|
||||
#[cfg(test)] |
||||
mod tests { |
||||
use super::*; |
||||
|
||||
#[test] |
||||
fn test_split_iri() { |
||||
assert_eq!( |
||||
split_iri("http://schema.org/Person"), |
||||
("http://schema.org/", "Person") |
||||
); |
||||
assert_eq!(split_iri("http://schema.org/"), ("http://schema.org/", "")); |
||||
assert_eq!( |
||||
split_iri("http://schema.org#foo"), |
||||
("http://schema.org#", "foo") |
||||
); |
||||
assert_eq!(split_iri("urn:isbn:foo"), ("urn:isbn:", "foo")); |
||||
} |
||||
} |
@ -1,26 +0,0 @@ |
||||
pub fn is_name_start_char(c: char) -> bool { |
||||
// ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
|
||||
matches!(c, |
||||
':' |
||||
| 'A'..='Z' |
||||
| '_' |
||||
| 'a'..='z' |
||||
| '\u{00C0}'..='\u{00D6}' |
||||
| '\u{00D8}'..='\u{00F6}' |
||||
| '\u{00F8}'..='\u{02FF}' |
||||
| '\u{0370}'..='\u{037D}' |
||||
| '\u{037F}'..='\u{1FFF}' |
||||
| '\u{200C}'..='\u{200D}' |
||||
| '\u{2070}'..='\u{218F}' |
||||
| '\u{2C00}'..='\u{2FEF}' |
||||
| '\u{3001}'..='\u{D7FF}' |
||||
| '\u{F900}'..='\u{FDCF}' |
||||
| '\u{FDF0}'..='\u{FFFD}' |
||||
| '\u{10000}'..='\u{EFFFF}') |
||||
} |
||||
|
||||
pub fn is_name_char(c: char) -> bool { |
||||
// NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
|
||||
is_name_start_char(c) |
||||
|| matches!(c, '-' | '.' | '0'..='9' | '\u{B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}') |
||||
} |
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue