Compare commits
3 Commits
Author | SHA1 | Date |
---|---|---|
Niko PLP | e963387b02 | 9 months ago |
Niko PLP | 7b8901718e | 9 months ago |
Niko PLP | b3ec66e21b | 9 months ago |
@ -0,0 +1,137 @@ |
|||||||
|
[build] |
||||||
|
rustflags = [ |
||||||
|
"-Wtrivial-casts", |
||||||
|
"-Wtrivial-numeric-casts", |
||||||
|
"-Wunsafe-code", |
||||||
|
"-Wunused-lifetimes", |
||||||
|
"-Wunused-qualifications", |
||||||
|
# TODO: 1.63+ "-Wclippy::as-underscore", |
||||||
|
# TODO: 1.65+ ""-Wclippy::bool-to-int-with-if", |
||||||
|
"-Wclippy::borrow-as-ptr", |
||||||
|
"-Wclippy::case-sensitive-file-extension-comparisons", |
||||||
|
"-Wclippy::cast-lossless", |
||||||
|
"-Wclippy::cast-possible-truncation", |
||||||
|
"-Wclippy::cast-possible-wrap", |
||||||
|
"-Wclippy::cast-precision-loss", |
||||||
|
"-Wclippy::cast-ptr-alignment", |
||||||
|
"-Wclippy::cast-sign-loss", |
||||||
|
"-Wclippy::checked-conversions", |
||||||
|
"-Wclippy::clone-on-ref-ptr", |
||||||
|
"-Wclippy::cloned-instead-of-copied", |
||||||
|
"-Wclippy::copy-iterator", |
||||||
|
"-Wclippy::dbg-macro", |
||||||
|
"-Wclippy::decimal-literal-representation", |
||||||
|
"-Wclippy::default-trait-access", |
||||||
|
"-Wclippy::default-union-representation", |
||||||
|
# TODO: 1.61+ "-Wclippy::deref-by-slicing", |
||||||
|
# TODO: 1.63+ "-Wclippy::doc-link-with-quotes", |
||||||
|
# TODO: 1.62+ "-Wclippy::empty-drop", |
||||||
|
"-Wclippy::empty-enum", |
||||||
|
# TODO: on major version "-Wclippy::empty-structs-with-brackets", |
||||||
|
"-Wclippy::enum-glob-use", |
||||||
|
"-Wclippy::exit", |
||||||
|
"-Wclippy::expect-used", |
||||||
|
"-Wclippy::expl-impl-clone-on-copy", |
||||||
|
"-Wclippy::explicit-deref-methods", |
||||||
|
"-Wclippy::explicit-into-iter-loop", |
||||||
|
"-Wclippy::explicit-iter-loop", |
||||||
|
"-Wclippy::filter-map-next", |
||||||
|
"-Wclippy::flat-map-option", |
||||||
|
"-Wclippy::fn-to-numeric-cast-any", |
||||||
|
# TODO: 1.62+ "-Wclippy::format-push-string", |
||||||
|
"-Wclippy::from-iter-instead-of-collect", |
||||||
|
"-Wclippy::get-unwrap", |
||||||
|
"-Wclippy::if-not-else", |
||||||
|
"-Wclippy::if-then-some-else-none", |
||||||
|
"-Wclippy::implicit-clone", |
||||||
|
"-Wclippy::inconsistent-struct-constructor", |
||||||
|
"-Wclippy::index-refutable-slice", |
||||||
|
"-Wclippy::inefficient-to-string", |
||||||
|
"-Wclippy::inline-always", |
||||||
|
"-Wclippy::inline-asm-x86-att-syntax", |
||||||
|
"-Wclippy::inline-asm-x86-intel-syntax", |
||||||
|
"-Wclippy::invalid-upcast-comparisons", |
||||||
|
"-Wclippy::items-after-statements", |
||||||
|
"-Wclippy::large-digit-groups", |
||||||
|
# TODO: 1.68+ "-Wclippy::large-futures", |
||||||
|
"-Wclippy::large-stack-arrays", |
||||||
|
"-Wclippy::large-types-passed-by-value", |
||||||
|
"-Wclippy::let-underscore-must-use", |
||||||
|
"-Wclippy::let-unit-value", |
||||||
|
"-Wclippy::linkedlist", |
||||||
|
"-Wclippy::lossy-float-literal", |
||||||
|
"-Wclippy::macro-use-imports", |
||||||
|
"-Wclippy::manual-assert", |
||||||
|
# TODO: 1.65+ "-Wclippy::manual-instant-elapsed", |
||||||
|
# TODO: 1.67+ "-Wclippy::manual-let-else", |
||||||
|
"-Wclippy::manual-ok-or", |
||||||
|
# TODO: 1.65+ "-Wclippy::manual-string-new", |
||||||
|
"-Wclippy::many-single-char-names", |
||||||
|
"-Wclippy::map-unwrap-or", |
||||||
|
"-Wclippy::match-bool", |
||||||
|
"-Wclippy::match-same-arms", |
||||||
|
"-Wclippy::match-wildcard-for-single-variants", |
||||||
|
"-Wclippy::maybe-infinite-iter", |
||||||
|
"-Wclippy::mem-forget", |
||||||
|
# TODO: 1.63+ "-Wclippy::mismatching-type-param-order", |
||||||
|
"-Wclippy::multiple-inherent-impl", |
||||||
|
"-Wclippy::mut-mut", |
||||||
|
"-Wclippy::mutex-atomic", |
||||||
|
"-Wclippy::naive-bytecount", |
||||||
|
"-Wclippy::needless-bitwise-bool", |
||||||
|
"-Wclippy::needless-continue", |
||||||
|
"-Wclippy::needless-pass-by-value", |
||||||
|
"-Wclippy::no-effect-underscore-binding", |
||||||
|
# TODO: 1.69+ "-Wclippy::no-mangle-with-rust-abi", |
||||||
|
"-Wclippy::non-ascii-literal", |
||||||
|
"-Wclippy::print-stderr", |
||||||
|
"-Wclippy::print-stdout", |
||||||
|
"-Wclippy::ptr-as-ptr", |
||||||
|
"-Wclippy::range-minus-one", |
||||||
|
"-Wclippy::range-plus-one", |
||||||
|
"-Wclippy::rc-buffer", |
||||||
|
"-Wclippy::rc-mutex", |
||||||
|
"-Wclippy::redundant-closure-for-method-calls", |
||||||
|
"-Wclippy::redundant-else", |
||||||
|
"-Wclippy::redundant-feature-names", |
||||||
|
"-Wclippy::ref-binding-to-reference", |
||||||
|
"-Wclippy::ref-option-ref", |
||||||
|
"-Wclippy::rest-pat-in-fully-bound-structs", |
||||||
|
"-Wclippy::return-self-not-must-use", |
||||||
|
"-Wclippy::same-functions-in-if-condition", |
||||||
|
# TODO: strange failure on 1.60 "-Wclippy::same-name-method", |
||||||
|
# TODO: 1.68+ "-Wclippy::semicolon-outside-block", |
||||||
|
"-Wclippy::single-match-else", |
||||||
|
"-Wclippy::stable-sort-primitive", |
||||||
|
"-Wclippy::str-to-string", |
||||||
|
"-Wclippy::string-add", |
||||||
|
"-Wclippy::string-add-assign", |
||||||
|
"-Wclippy::string-lit-as-bytes", |
||||||
|
"-Wclippy::string-to-string", |
||||||
|
# TODO: 1.67+ "-Wclippy::suspicious-xor-used-as-pow", |
||||||
|
"-Wclippy::todo", |
||||||
|
"-Wclippy::transmute-ptr-to-ptr", |
||||||
|
"-Wclippy::trivially-copy-pass-by-ref", |
||||||
|
"-Wclippy::try-err", |
||||||
|
"-Wclippy::unicode-not-nfc", |
||||||
|
"-Wclippy::unimplemented", |
||||||
|
# TODO: 1.66+ "-Wclippy::uninlined-format-args", |
||||||
|
# TODO: 1.70+ "-Wclippy::unnecessary-box-returns", |
||||||
|
# TODO: 1.61+ "-Wclippy::unnecessary-join", |
||||||
|
# TODO: 1.67+ "-Wclippy::unnecessary-safety-comment", |
||||||
|
# TODO: 1.67+ "-Wclippy::unnecessary-safety-doc", |
||||||
|
"-Wclippy::unnecessary-self-imports", |
||||||
|
"-Wclippy::unnecessary-wraps", |
||||||
|
"-Wclippy::unneeded-field-pattern", |
||||||
|
"-Wclippy::unnested-or-patterns", |
||||||
|
"-Wclippy::unreadable-literal", |
||||||
|
"-Wclippy::unseparated-literal-suffix", |
||||||
|
"-Wclippy::unused-async", |
||||||
|
"-Wclippy::unused-self", |
||||||
|
"-Wclippy::use-debug", |
||||||
|
"-Wclippy::used-underscore-binding", |
||||||
|
"-Wclippy::verbose-bit-mask", |
||||||
|
"-Wclippy::verbose-file-reads", |
||||||
|
"-Wclippy::wildcard-dependencies", |
||||||
|
"-Wclippy::zero-sized-map-values", |
||||||
|
] |
@ -1,4 +1,5 @@ |
|||||||
FROM gcr.io/oss-fuzz-base/base-builder-rust:v1 |
FROM gcr.io/oss-fuzz-base/base-builder-rust:v1 |
||||||
|
RUN apt-get update && apt-get install -y llvm-dev libclang-dev clang && apt-get clean && rm --recursive --force /var/lib/apt/lists/* |
||||||
COPY . $SRC/oxigraph |
COPY . $SRC/oxigraph |
||||||
WORKDIR oxigraph |
WORKDIR oxigraph |
||||||
COPY .clusterfuzzlite/build.sh $SRC/ |
COPY .clusterfuzzlite/build.sh $SRC/ |
||||||
|
@ -0,0 +1,21 @@ |
|||||||
|
# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.241.1/containers/rust/.devcontainer/base.Dockerfile |
||||||
|
|
||||||
|
# [Choice] Debian OS version (use bullseye on local arm64/Apple Silicon): buster, bullseye |
||||||
|
ARG VARIANT="bullseye" |
||||||
|
FROM mcr.microsoft.com/vscode/devcontainers/rust:0-${VARIANT} |
||||||
|
|
||||||
|
# [Optional] Uncomment this section to install additional packages. |
||||||
|
RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ |
||||||
|
&& apt-get -y install --no-install-recommends \ |
||||||
|
python3 \ |
||||||
|
python3-venv \ |
||||||
|
python-is-python3 \ |
||||||
|
libclang-dev |
||||||
|
|
||||||
|
ENV VIRTUAL_ENV=/opt/venv |
||||||
|
RUN python -m venv $VIRTUAL_ENV |
||||||
|
ENV PATH="$VIRTUAL_ENV/bin:$PATH" |
||||||
|
RUN pip install --no-cache-dir -r python/requirements.dev.txt |
||||||
|
|
||||||
|
# Change owner to the devcontainer user |
||||||
|
RUN chown -R 1000:1000 $VIRTUAL_ENV |
@ -0,0 +1,69 @@ |
|||||||
|
// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at: |
||||||
|
// https://github.com/microsoft/vscode-dev-containers/tree/v0.241.1/containers/rust |
||||||
|
{ |
||||||
|
"name": "Rust", |
||||||
|
"build": { |
||||||
|
"dockerfile": "Dockerfile", |
||||||
|
"args": { |
||||||
|
// Use the VARIANT arg to pick a Debian OS version: buster, bullseye |
||||||
|
// Use bullseye when on local on arm64/Apple Silicon. |
||||||
|
"VARIANT": "bullseye" |
||||||
|
} |
||||||
|
}, |
||||||
|
"runArgs": ["--cap-add=SYS_PTRACE", "--security-opt", "seccomp=unconfined"], |
||||||
|
|
||||||
|
// Configure tool-specific properties. |
||||||
|
"customizations": { |
||||||
|
// Configure properties specific to VS Code. |
||||||
|
"vscode": { |
||||||
|
// Set *default* container specific settings.json values on container create. |
||||||
|
"settings": { |
||||||
|
"lldb.executable": "/usr/bin/lldb", |
||||||
|
// VS Code don't watch files under ./target |
||||||
|
"files.watcherExclude": { |
||||||
|
"**/target/**": true |
||||||
|
}, |
||||||
|
"rust-analyzer.checkOnSave.command": "clippy", |
||||||
|
|
||||||
|
"python.defaultInterpreterPath": "/opt/venv/bin/python", |
||||||
|
"python.linting.enabled": true, |
||||||
|
"python.linting.pylintEnabled": true, |
||||||
|
"python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8", |
||||||
|
"python.formatting.blackPath": "/usr/local/py-utils/bin/black", |
||||||
|
"python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf", |
||||||
|
"python.linting.banditPath": "/usr/local/py-utils/bin/bandit", |
||||||
|
"python.linting.flake8Path": "/usr/local/py-utils/bin/flake8", |
||||||
|
"python.linting.mypyPath": "/usr/local/py-utils/bin/mypy", |
||||||
|
"python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle", |
||||||
|
"python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle", |
||||||
|
"python.linting.pylintPath": "/opt/venv/bin/pylint", |
||||||
|
"python.testing.pytestPath": "/opt/venv/bin/pytest" |
||||||
|
}, |
||||||
|
|
||||||
|
// Add the IDs of extensions you want installed when the container is created. |
||||||
|
"extensions": [ |
||||||
|
"vadimcn.vscode-lldb", |
||||||
|
"mutantdino.resourcemonitor", |
||||||
|
"rust-lang.rust-analyzer", |
||||||
|
"tamasfe.even-better-toml", |
||||||
|
"serayuzgur.crates", |
||||||
|
"ms-python.python", |
||||||
|
"ms-python.vscode-pylance", |
||||||
|
"esbenp.prettier-vscode", |
||||||
|
"stardog-union.stardog-rdf-grammars" |
||||||
|
] |
||||||
|
} |
||||||
|
}, |
||||||
|
|
||||||
|
// Use 'forwardPorts' to make a list of ports inside the container available locally. |
||||||
|
// "forwardPorts": [], |
||||||
|
|
||||||
|
// Use 'postCreateCommand' to run commands after the container is created. |
||||||
|
"postCreateCommand": "git submodule update --init && cargo build", |
||||||
|
|
||||||
|
// Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. |
||||||
|
"remoteUser": "vscode", |
||||||
|
"features": { |
||||||
|
"python": "3.10" |
||||||
|
} |
||||||
|
} |
@ -1,27 +0,0 @@ |
|||||||
name: 'Setup Rust' |
|
||||||
description: 'Setup Rust using Rustup' |
|
||||||
inputs: |
|
||||||
version: |
|
||||||
description: 'Rust version to use. By default latest stable version' |
|
||||||
required: false |
|
||||||
default: 'stable' |
|
||||||
component: |
|
||||||
description: 'Rust extra component to install like clippy' |
|
||||||
required: false |
|
||||||
target: |
|
||||||
description: 'Rust extra target to install like wasm32-unknown-unknown' |
|
||||||
required: false |
|
||||||
runs: |
|
||||||
using: "composite" |
|
||||||
steps: |
|
||||||
- run: rustup update |
|
||||||
shell: bash |
|
||||||
- run: rustup default ${{ inputs.version }} |
|
||||||
shell: bash |
|
||||||
- run: rustup component add ${{ inputs.component }} |
|
||||||
shell: bash |
|
||||||
if: ${{ inputs.component }} |
|
||||||
- run: rustup target add ${{ inputs.target }} |
|
||||||
shell: bash |
|
||||||
if: ${{ inputs.target }} |
|
||||||
- uses: Swatinem/rust-cache@v2 |
|
@ -1,11 +0,0 @@ |
|||||||
if [ -f "rocksdb" ] |
|
||||||
then |
|
||||||
cd rocksdb || exit |
|
||||||
else |
|
||||||
git clone https://github.com/facebook/rocksdb.git |
|
||||||
cd rocksdb || exit |
|
||||||
git checkout v8.0.0 |
|
||||||
make shared_lib |
|
||||||
fi |
|
||||||
sudo make install-shared |
|
||||||
sudo ldconfig /usr/local/lib |
|
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,4 @@ |
|||||||
avoid-breaking-exported-api = false |
avoid-breaking-exported-api = true |
||||||
cognitive-complexity-threshold = 50 |
cognitive-complexity-threshold = 50 |
||||||
too-many-arguments-threshold = 10 |
too-many-arguments-threshold = 10 |
||||||
type-complexity-threshold = 500 |
type-complexity-threshold = 500 |
Before Width: | Height: | Size: 4.6 KiB |
@ -1,35 +0,0 @@ |
|||||||
+------------------+ +----------------+ +-----------------+ |
|
||||||
+ oxigraph CLI {r} + + pyoxigraph {p} + + oxigraph JS {j} + |
|
||||||
+------------------+ +----------------+ +-----------------+ |
|
||||||
|
|
||||||
+---------------------------------------------------------------------------+ |
|
||||||
+ oxigraph (Rust) {r} + |
|
||||||
+---------------------------------------------------------------------------+ |
|
||||||
|
|
||||||
+----------------------------+ +-------------+ |
|
||||||
+ oxrdfio {r} + + sparopt {r} + |
|
||||||
+----------------------------+ +-------------+ |
|
||||||
|
|
||||||
+-----------+ +--------------+ +-----------------+ +----------------+ |
|
||||||
+ oxttl {r} + + oxrdfxml {r} + + spargebra {r} + + sparesults {r} + |
|
||||||
+-----------+ +--------------+ +-----------------+ +----------------+ |
|
||||||
|
|
||||||
+-----------------------------------------------------------------------+ |
|
||||||
+ oxrdf {r} + |
|
||||||
+-----------------------------------------------------------------------+ |
|
||||||
|
|
||||||
+------------------+ |
|
||||||
+ oxsdatatypes {r} + |
|
||||||
+------------------+ |
|
||||||
|
|
||||||
|
|
||||||
# Legend: |
|
||||||
r = { |
|
||||||
fill: papayawhip; |
|
||||||
} |
|
||||||
p = { |
|
||||||
fill: lightyellow; |
|
||||||
} |
|
||||||
j = { |
|
||||||
fill: lightgreen; |
|
||||||
} |
|
@ -1,28 +0,0 @@ |
|||||||
#![no_main] |
|
||||||
|
|
||||||
use libfuzzer_sys::fuzz_target; |
|
||||||
use oxttl::N3Parser; |
|
||||||
|
|
||||||
fuzz_target!(|data: &[u8]| { |
|
||||||
let mut quads = Vec::new(); |
|
||||||
let mut parser = N3Parser::new() |
|
||||||
.with_base_iri("http://example.com/") |
|
||||||
.unwrap() |
|
||||||
.parse(); |
|
||||||
for chunk in data.split(|c| *c == 0xFF) { |
|
||||||
parser.extend_from_slice(chunk); |
|
||||||
while let Some(result) = parser.read_next() { |
|
||||||
if let Ok(quad) = result { |
|
||||||
quads.push(quad); |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
parser.end(); |
|
||||||
while let Some(result) = parser.read_next() { |
|
||||||
if let Ok(quad) = result { |
|
||||||
quads.push(quad); |
|
||||||
} |
|
||||||
} |
|
||||||
assert!(parser.is_end()); |
|
||||||
//TODO: serialize
|
|
||||||
}); |
|
@ -1,84 +0,0 @@ |
|||||||
#![no_main] |
|
||||||
|
|
||||||
use libfuzzer_sys::fuzz_target; |
|
||||||
use oxrdf::Quad; |
|
||||||
use oxttl::{NQuadsParser, NQuadsSerializer}; |
|
||||||
|
|
||||||
fn parse<'a>( |
|
||||||
chunks: impl IntoIterator<Item = &'a [u8]>, |
|
||||||
unchecked: bool, |
|
||||||
) -> (Vec<Quad>, Vec<String>) { |
|
||||||
let mut quads = Vec::new(); |
|
||||||
let mut errors = Vec::new(); |
|
||||||
let mut parser = NQuadsParser::new().with_quoted_triples(); |
|
||||||
if unchecked { |
|
||||||
parser = parser.unchecked(); |
|
||||||
} |
|
||||||
let mut reader = parser.parse(); |
|
||||||
for chunk in chunks { |
|
||||||
reader.extend_from_slice(chunk); |
|
||||||
while let Some(result) = reader.read_next() { |
|
||||||
match result { |
|
||||||
Ok(quad) => quads.push(quad), |
|
||||||
Err(error) => errors.push(error.to_string()), |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
reader.end(); |
|
||||||
while let Some(result) = reader.read_next() { |
|
||||||
match result { |
|
||||||
Ok(quad) => quads.push(quad), |
|
||||||
Err(error) => errors.push(error.to_string()), |
|
||||||
} |
|
||||||
} |
|
||||||
assert!(reader.is_end()); |
|
||||||
(quads, errors) |
|
||||||
} |
|
||||||
|
|
||||||
fuzz_target!(|data: &[u8]| { |
|
||||||
// We parse with splitting
|
|
||||||
let (quads, errors) = parse(data.split(|c| *c == 0xFF), false); |
|
||||||
// We parse without splitting
|
|
||||||
let (quads_without_split, errors_without_split) = parse( |
|
||||||
[data |
|
||||||
.iter() |
|
||||||
.copied() |
|
||||||
.filter(|c| *c != 0xFF) |
|
||||||
.collect::<Vec<_>>() |
|
||||||
.as_slice()], |
|
||||||
false, |
|
||||||
); |
|
||||||
assert_eq!(quads, quads_without_split); |
|
||||||
assert_eq!(errors, errors_without_split); |
|
||||||
|
|
||||||
// We test also unchecked if valid
|
|
||||||
if errors.is_empty() { |
|
||||||
let (quads_unchecked, errors_unchecked) = parse(data.split(|c| *c == 0xFF), true); |
|
||||||
assert!(errors_unchecked.is_empty()); |
|
||||||
assert_eq!(quads, quads_unchecked); |
|
||||||
} |
|
||||||
|
|
||||||
// We serialize
|
|
||||||
let mut writer = NQuadsSerializer::new().serialize_to_write(Vec::new()); |
|
||||||
for quad in &quads { |
|
||||||
writer.write_quad(quad).unwrap(); |
|
||||||
} |
|
||||||
let new_serialization = writer.finish(); |
|
||||||
|
|
||||||
// We parse the serialization
|
|
||||||
let new_quads = NQuadsParser::new() |
|
||||||
.with_quoted_triples() |
|
||||||
.parse_read(new_serialization.as_slice()) |
|
||||||
.collect::<Result<Vec<_>, _>>() |
|
||||||
.map_err(|e| { |
|
||||||
format!( |
|
||||||
"Error on {:?} from {quads:?} based on {:?}: {e}", |
|
||||||
String::from_utf8_lossy(&new_serialization), |
|
||||||
String::from_utf8_lossy(data) |
|
||||||
) |
|
||||||
}) |
|
||||||
.unwrap(); |
|
||||||
|
|
||||||
// We check the roundtrip has not changed anything
|
|
||||||
assert_eq!(new_quads, quads); |
|
||||||
}); |
|
@ -1,35 +0,0 @@ |
|||||||
#![no_main] |
|
||||||
|
|
||||||
use libfuzzer_sys::fuzz_target; |
|
||||||
use oxrdfxml::{RdfXmlParser, RdfXmlSerializer}; |
|
||||||
|
|
||||||
fuzz_target!(|data: &[u8]| { |
|
||||||
// We parse
|
|
||||||
let triples = RdfXmlParser::new() |
|
||||||
.parse_read(data) |
|
||||||
.flatten() |
|
||||||
.collect::<Vec<_>>(); |
|
||||||
|
|
||||||
// We serialize
|
|
||||||
let mut writer = RdfXmlSerializer::new().serialize_to_write(Vec::new()); |
|
||||||
for triple in &triples { |
|
||||||
writer.write_triple(triple).unwrap(); |
|
||||||
} |
|
||||||
let new_serialization = writer.finish().unwrap(); |
|
||||||
|
|
||||||
// We parse the serialization
|
|
||||||
let new_triples = RdfXmlParser::new() |
|
||||||
.parse_read(new_serialization.as_slice()) |
|
||||||
.collect::<Result<Vec<_>, _>>() |
|
||||||
.map_err(|e| { |
|
||||||
format!( |
|
||||||
"Error on {:?} from {triples:?} based on {:?}: {e}", |
|
||||||
String::from_utf8_lossy(&new_serialization), |
|
||||||
String::from_utf8_lossy(data) |
|
||||||
) |
|
||||||
}) |
|
||||||
.unwrap(); |
|
||||||
|
|
||||||
// We check the roundtrip has not changed anything
|
|
||||||
assert_eq!(new_triples, triples); |
|
||||||
}); |
|
@ -1,166 +0,0 @@ |
|||||||
#![no_main] |
|
||||||
|
|
||||||
use libfuzzer_sys::fuzz_target; |
|
||||||
use oxrdf::graph::CanonicalizationAlgorithm; |
|
||||||
use oxrdf::{Dataset, GraphName, Quad, Subject, Term, Triple}; |
|
||||||
use oxttl::{TriGParser, TriGSerializer}; |
|
||||||
|
|
||||||
fn parse<'a>( |
|
||||||
chunks: impl IntoIterator<Item = &'a [u8]>, |
|
||||||
unchecked: bool, |
|
||||||
) -> (Vec<Quad>, Vec<String>, Vec<(String, String)>) { |
|
||||||
let mut quads = Vec::new(); |
|
||||||
let mut errors = Vec::new(); |
|
||||||
let mut parser = TriGParser::new() |
|
||||||
.with_quoted_triples() |
|
||||||
.with_base_iri("http://example.com/") |
|
||||||
.unwrap(); |
|
||||||
if unchecked { |
|
||||||
parser = parser.unchecked(); |
|
||||||
} |
|
||||||
let mut reader = parser.parse(); |
|
||||||
for chunk in chunks { |
|
||||||
reader.extend_from_slice(chunk); |
|
||||||
while let Some(result) = reader.read_next() { |
|
||||||
match result { |
|
||||||
Ok(quad) => quads.push(quad), |
|
||||||
Err(error) => errors.push(error.to_string()), |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
reader.end(); |
|
||||||
while let Some(result) = reader.read_next() { |
|
||||||
match result { |
|
||||||
Ok(quad) => quads.push(quad), |
|
||||||
Err(error) => errors.push(error.to_string()), |
|
||||||
} |
|
||||||
} |
|
||||||
assert!(reader.is_end()); |
|
||||||
( |
|
||||||
quads, |
|
||||||
errors, |
|
||||||
reader |
|
||||||
.prefixes() |
|
||||||
.map(|(k, v)| (k.to_owned(), v.to_owned())) |
|
||||||
.collect(), |
|
||||||
) |
|
||||||
} |
|
||||||
|
|
||||||
fn count_triple_blank_nodes(triple: &Triple) -> usize { |
|
||||||
(match &triple.subject { |
|
||||||
Subject::BlankNode(_) => 1, |
|
||||||
Subject::Triple(t) => count_triple_blank_nodes(t), |
|
||||||
_ => 0, |
|
||||||
}) + (match &triple.object { |
|
||||||
Term::BlankNode(_) => 1, |
|
||||||
Term::Triple(t) => count_triple_blank_nodes(t), |
|
||||||
_ => 0, |
|
||||||
}) |
|
||||||
} |
|
||||||
|
|
||||||
fn count_quad_blank_nodes(quad: &Quad) -> usize { |
|
||||||
(match &quad.subject { |
|
||||||
Subject::BlankNode(_) => 1, |
|
||||||
Subject::Triple(t) => count_triple_blank_nodes(t), |
|
||||||
_ => 0, |
|
||||||
}) + (match &quad.object { |
|
||||||
Term::BlankNode(_) => 1, |
|
||||||
Term::Triple(t) => count_triple_blank_nodes(t), |
|
||||||
_ => 0, |
|
||||||
}) + usize::from(matches!(quad.graph_name, GraphName::BlankNode(_))) |
|
||||||
} |
|
||||||
|
|
||||||
fn serialize_quads(quads: &[Quad], prefixes: Vec<(String, String)>) -> Vec<u8> { |
|
||||||
let mut serializer = TriGSerializer::new(); |
|
||||||
for (prefix_name, prefix_iri) in prefixes { |
|
||||||
serializer = serializer.with_prefix(prefix_name, prefix_iri).unwrap(); |
|
||||||
} |
|
||||||
let mut writer = serializer.serialize_to_write(Vec::new()); |
|
||||||
for quad in quads { |
|
||||||
writer.write_quad(quad).unwrap(); |
|
||||||
} |
|
||||||
writer.finish().unwrap() |
|
||||||
} |
|
||||||
|
|
||||||
fuzz_target!(|data: &[u8]| { |
|
||||||
// We parse with splitting
|
|
||||||
let (quads, errors, prefixes) = parse(data.split(|c| *c == 0xFF), false); |
|
||||||
// We parse without splitting
|
|
||||||
let (quads_without_split, errors_without_split, _) = parse( |
|
||||||
[data |
|
||||||
.iter() |
|
||||||
.copied() |
|
||||||
.filter(|c| *c != 0xFF) |
|
||||||
.collect::<Vec<_>>() |
|
||||||
.as_slice()], |
|
||||||
false, |
|
||||||
); |
|
||||||
let (quads_unchecked, errors_unchecked, _) = parse(data.split(|c| *c == 0xFF), true); |
|
||||||
if errors.is_empty() { |
|
||||||
assert!(errors_unchecked.is_empty()); |
|
||||||
} |
|
||||||
|
|
||||||
let bnodes_count = quads.iter().map(count_quad_blank_nodes).sum::<usize>(); |
|
||||||
if bnodes_count == 0 { |
|
||||||
assert_eq!( |
|
||||||
quads, |
|
||||||
quads_without_split, |
|
||||||
"With split:\n{}\nWithout split:\n{}", |
|
||||||
String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())), |
|
||||||
String::from_utf8_lossy(&serialize_quads(&quads_without_split, Vec::new())) |
|
||||||
); |
|
||||||
if errors.is_empty() { |
|
||||||
assert_eq!( |
|
||||||
quads, |
|
||||||
quads_unchecked, |
|
||||||
"Validating:\n{}\nUnchecked:\n{}", |
|
||||||
String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())), |
|
||||||
String::from_utf8_lossy(&serialize_quads(&quads_unchecked, Vec::new())) |
|
||||||
); |
|
||||||
} |
|
||||||
} else if bnodes_count <= 4 { |
|
||||||
let mut dataset_with_split = quads.iter().collect::<Dataset>(); |
|
||||||
let mut dataset_without_split = quads_without_split.iter().collect::<Dataset>(); |
|
||||||
dataset_with_split.canonicalize(CanonicalizationAlgorithm::Unstable); |
|
||||||
dataset_without_split.canonicalize(CanonicalizationAlgorithm::Unstable); |
|
||||||
assert_eq!( |
|
||||||
dataset_with_split, |
|
||||||
dataset_without_split, |
|
||||||
"With split:\n{}\nWithout split:\n{}", |
|
||||||
String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())), |
|
||||||
String::from_utf8_lossy(&serialize_quads(&quads_without_split, Vec::new())) |
|
||||||
); |
|
||||||
if errors.is_empty() { |
|
||||||
let mut dataset_unchecked = quads_unchecked.iter().collect::<Dataset>(); |
|
||||||
dataset_unchecked.canonicalize(CanonicalizationAlgorithm::Unstable); |
|
||||||
assert_eq!( |
|
||||||
dataset_with_split, |
|
||||||
dataset_unchecked, |
|
||||||
"Validating:\n{}\nUnchecked:\n{}", |
|
||||||
String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())), |
|
||||||
String::from_utf8_lossy(&serialize_quads(&quads_unchecked, Vec::new())) |
|
||||||
); |
|
||||||
} |
|
||||||
} |
|
||||||
assert_eq!(errors, errors_without_split); |
|
||||||
|
|
||||||
// We serialize
|
|
||||||
let new_serialization = serialize_quads(&quads, prefixes); |
|
||||||
|
|
||||||
// We parse the serialization
|
|
||||||
let new_quads = TriGParser::new() |
|
||||||
.with_quoted_triples() |
|
||||||
.parse_read(new_serialization.as_slice()) |
|
||||||
.collect::<Result<Vec<_>, _>>() |
|
||||||
.map_err(|e| { |
|
||||||
format!( |
|
||||||
"Error on {:?} from {quads:?} based on {:?}: {e}", |
|
||||||
String::from_utf8_lossy(&new_serialization), |
|
||||||
String::from_utf8_lossy(data) |
|
||||||
) |
|
||||||
}) |
|
||||||
.unwrap(); |
|
||||||
|
|
||||||
// We check the roundtrip has not changed anything
|
|
||||||
assert_eq!(new_quads, quads); |
|
||||||
}); |
|
@ -1,26 +1,20 @@ |
|||||||
[package] |
[package] |
||||||
name = "oxigraph-js" |
name = "oxigraph_js" |
||||||
version.workspace = true |
version = "0.3.22" |
||||||
authors.workspace = true |
authors = ["Tpt <thomas@pellissier-tanon.fr>"] |
||||||
license.workspace = true |
license = "MIT OR Apache-2.0" |
||||||
readme = "README.md" |
readme = "README.md" |
||||||
keywords = ["RDF", "N-Triples", "Turtle", "XML", "SPARQL"] |
keywords = ["RDF", "N-Triples", "Turtle", "RDF/XML", "SPARQL"] |
||||||
repository = "https://github.com/oxigraph/oxigraph/tree/main/js" |
repository = "https://github.com/oxigraph/oxigraph/tree/main/js" |
||||||
description = "JavaScript bindings of Oxigraph" |
description = "JavaScript bindings of Oxigraph" |
||||||
edition.workspace = true |
edition = "2021" |
||||||
rust-version.workspace = true |
|
||||||
publish = false |
|
||||||
|
|
||||||
[lib] |
[lib] |
||||||
crate-type = ["cdylib"] |
crate-type = ["cdylib"] |
||||||
name = "oxigraph" |
name = "oxigraph" |
||||||
doc = false |
|
||||||
|
|
||||||
[dependencies] |
[dependencies] |
||||||
console_error_panic_hook.workspace = true |
oxigraph = { version = "0.3.22", path="../lib" } |
||||||
js-sys.workspace = true |
wasm-bindgen = "0.2" |
||||||
oxigraph = { workspace = true, features = ["js"] } |
js-sys = "0.3" |
||||||
wasm-bindgen.workspace = true |
console_error_panic_hook = "0.1" |
||||||
|
|
||||||
[lints] |
|
||||||
workspace = true |
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,20 +1,22 @@ |
|||||||
{ |
{ |
||||||
"name": "oxigraph_tests", |
"name": "oxigraph_tests", |
||||||
"description": "Oxigraph JS build and tests", |
"description": "Oxigraph JS build and tests", |
||||||
"private": true, |
"private": true, |
||||||
"devDependencies": { |
"devDependencies": { |
||||||
"@biomejs/biome": "^1.0.0", |
"@rdfjs/data-model": "^2.0.1", |
||||||
"@rdfjs/data-model": "^2.0.1", |
"mocha": "^10.0.0", |
||||||
"mocha": "^10.0.0" |
"rome": "^12.0.0" |
||||||
}, |
}, |
||||||
"scripts": { |
"scripts": { |
||||||
"fmt": "biome format . --write && biome check . --apply-unsafe && biome format . --write", |
"fmt": "rome format . --write && rome check . --apply-unsafe", |
||||||
"test": "biome ci . && wasm-pack build --debug --target nodejs --weak-refs --reference-types && mocha", |
"test": "rome ci . && wasm-pack build --debug --target nodejs && mocha", |
||||||
"build": "wasm-pack build --release --target web --out-name web --weak-refs --reference-types && wasm-pack build --release --target nodejs --out-name node --weak-refs --reference-types && node build_package.js", |
"build": "rm -rf pkg && wasm-pack build --release --target web --out-name web && mv pkg pkg-web && wasm-pack build --release --target nodejs --out-name node && mv pkg pkg-node && node build_package.js && rm -r pkg-web && rm -r pkg-node", |
||||||
"release": "npm run build && npm publish ./pkg", |
"release": "npm run build && npm publish ./pkg", |
||||||
"pack": "npm run build && npm pack ./pkg" |
"pack": "npm run build && npm pack ./pkg" |
||||||
}, |
}, |
||||||
"standard": { |
"standard": { |
||||||
"ignore": ["pkg*"] |
"ignore": [ |
||||||
} |
"pkg*" |
||||||
|
] |
||||||
|
} |
||||||
} |
} |
||||||
|
@ -1,8 +1,7 @@ |
|||||||
{ |
{ |
||||||
"$schema": "https://biomejs.dev/schemas/1.0.0/schema.json", |
|
||||||
"formatter": { |
"formatter": { |
||||||
"indentStyle": "space", |
"indentStyle": "space", |
||||||
"indentWidth": 4, |
"indentSize": 4, |
||||||
"lineWidth": 100 |
"lineWidth": 100 |
||||||
}, |
}, |
||||||
"linter": { |
"linter": { |
@ -0,0 +1,63 @@ |
|||||||
|
[package] |
||||||
|
name = "oxigraph" |
||||||
|
version = "0.3.22" |
||||||
|
authors = ["Tpt <thomas@pellissier-tanon.fr>"] |
||||||
|
license = "MIT OR Apache-2.0" |
||||||
|
readme = "README.md" |
||||||
|
keywords = ["RDF", "SPARQL", "graph-database", "database"] |
||||||
|
categories = ["database-implementations"] |
||||||
|
repository = "https://github.com/oxigraph/oxigraph/tree/main/lib" |
||||||
|
homepage = "https://oxigraph.org/" |
||||||
|
documentation = "https://docs.rs/oxigraph" |
||||||
|
description = """ |
||||||
|
a SPARQL database and RDF toolkit |
||||||
|
""" |
||||||
|
edition = "2021" |
||||||
|
rust-version = "1.60" |
||||||
|
|
||||||
|
[package.metadata.docs.rs] |
||||||
|
all-features = true |
||||||
|
|
||||||
|
[features] |
||||||
|
default = [] |
||||||
|
http_client = ["oxhttp", "oxhttp/rustls"] |
||||||
|
rocksdb_debug = [] |
||||||
|
|
||||||
|
[dependencies] |
||||||
|
rand = "0.8" |
||||||
|
md-5 = "0.10" |
||||||
|
sha-1 = "0.10" |
||||||
|
sha2 = "0.10" |
||||||
|
digest = "0.10" |
||||||
|
regex = "1" |
||||||
|
oxilangtag = "0.1" |
||||||
|
oxiri = "0.2" |
||||||
|
rio_api = "0.8" |
||||||
|
rio_turtle = "0.8" |
||||||
|
rio_xml = "0.8" |
||||||
|
hex = "0.4" |
||||||
|
siphasher = ">=0.3,<2.0" |
||||||
|
lazy_static = "1" |
||||||
|
json-event-parser = "0.1" |
||||||
|
oxrdf = { version = "0.1.7", path="oxrdf", features = ["rdf-star", "oxsdatatypes"] } |
||||||
|
oxsdatatypes = { version = "0.1.3", path="oxsdatatypes" } |
||||||
|
spargebra = { version = "0.2.8", path="spargebra", features = ["rdf-star", "sep-0002", "sep-0006"] } |
||||||
|
sparesults = { version = "0.1.8", path="sparesults", features = ["rdf-star"] } |
||||||
|
|
||||||
|
[target.'cfg(not(target_family = "wasm"))'.dependencies] |
||||||
|
libc = "0.2" |
||||||
|
rocksdb = {git = "https://git.nextgraph.org/NextGraph/rust-rocksdb.git", branch = "master", features = [ ] } |
||||||
|
oxhttp = { version = "0.1", optional = true } |
||||||
|
|
||||||
|
[target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies] |
||||||
|
getrandom = { version = "0.2", features = ["js"] } |
||||||
|
js-sys = "0.3" |
||||||
|
|
||||||
|
[target.'cfg(not(target_family = "wasm"))'.dev-dependencies] |
||||||
|
criterion = "0.4" |
||||||
|
oxhttp = "0.1" |
||||||
|
zstd = "0.12" |
||||||
|
|
||||||
|
[[bench]] |
||||||
|
name = "store" |
||||||
|
harness = false |
@ -1,13 +1,72 @@ |
|||||||
Oxigraph Rust crates |
Oxigraph |
||||||
==================== |
======== |
||||||
|
|
||||||
Oxigraph is implemented in Rust. |
[![Latest Version](https://img.shields.io/crates/v/oxigraph.svg)](https://crates.io/crates/oxigraph) |
||||||
It is composed on a main library, [`oxigraph`](./oxigraph) and a set of smaller crates used by the `oxigraph` crate: |
[![Released API docs](https://docs.rs/oxigraph/badge.svg)](https://docs.rs/oxigraph) |
||||||
* [`oxrdf`](./oxrdf), datastructures encoding RDF basic concepts (the `model` module of the `oxigraph` crate). |
[![Crates.io downloads](https://img.shields.io/crates/d/oxigraph)](https://crates.io/crates/oxigraph) |
||||||
* [`oxrdfio`](./oxrdfio), a unified parser and serializer API for RDF formats (the `io` module of the `oxigraph` crate). It itself relies on: |
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) |
||||||
* [`oxttl`](./oxttl), N-Triple, N-Quad, Turtle, TriG and N3 parsing and serialization. |
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) |
||||||
* [`oxrdfxml`](./oxrdfxml), RDF/XML parsing and serialization. |
|
||||||
* [`spargebra`](./spargebra), a SPARQL parser. |
Oxigraph is a graph database library implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard. |
||||||
* [`sparesults`](./sparesults), parsers and serializers for SPARQL result formats (the `sparql::results` module of the `oxigraph` crate). |
|
||||||
* [`sparopt`](./sparesults), a SPARQL optimizer. |
Its goal is to provide a compliant, safe and fast on-disk graph database. |
||||||
* [`oxsdatatypes`](./oxsdatatypes), an implementation of some XML Schema datatypes. |
It also provides a set of utility functions for reading, writing, and processing RDF files. |
||||||
|
|
||||||
|
Oxigraph is in heavy development and SPARQL query evaluation has not been optimized yet. |
||||||
|
|
||||||
|
Oxigraph also provides [a standalone HTTP server](https://crates.io/crates/oxigraph_server) and [a Python library](https://pyoxigraph.readthedocs.io/) based on this library. |
||||||
|
|
||||||
|
|
||||||
|
Oxigraph implements the following specifications: |
||||||
|
* [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/), [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/), and [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/). |
||||||
|
* [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/), and [RDF XML](https://www.w3.org/TR/rdf-syntax-grammar/) RDF serialization formats for both data ingestion and retrieval using the [Rio library](https://github.com/oxigraph/rio). |
||||||
|
* [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/). |
||||||
|
|
||||||
|
A preliminary benchmark [is provided](../bench/README.md). Oxigraph internal design [is described on the wiki](https://github.com/oxigraph/oxigraph/wiki/Architecture). |
||||||
|
|
||||||
|
The main entry point of Oxigraph is the [`Store`](store::Store) struct: |
||||||
|
```rust |
||||||
|
use oxigraph::store::Store; |
||||||
|
use oxigraph::model::*; |
||||||
|
use oxigraph::sparql::QueryResults; |
||||||
|
|
||||||
|
let store = Store::new().unwrap(); |
||||||
|
|
||||||
|
// insertion |
||||||
|
let ex = NamedNode::new("http://example.com").unwrap(); |
||||||
|
let quad = Quad::new(ex.clone(), ex.clone(), ex.clone(), GraphName::DefaultGraph); |
||||||
|
store.insert(&quad).unwrap(); |
||||||
|
|
||||||
|
// quad filter |
||||||
|
let results = store.quads_for_pattern(Some(ex.as_ref().into()), None, None, None).collect::<Result<Vec<Quad>,_>>().unwrap(); |
||||||
|
assert_eq!(vec![quad], results); |
||||||
|
|
||||||
|
// SPARQL query |
||||||
|
if let QueryResults::Solutions(mut solutions) = store.query("SELECT ?s WHERE { ?s ?p ?o }").unwrap() { |
||||||
|
assert_eq!(solutions.next().unwrap().unwrap().get("s"), Some(&ex.into())); |
||||||
|
} |
||||||
|
``` |
||||||
|
|
||||||
|
Some parts of this library are available as standalone crates: |
||||||
|
* [`oxrdf`](https://crates.io/crates/oxrdf) provides datastructures encoding RDF basic concepts (the `oxigraph::model` module). |
||||||
|
* [`spargebra`](https://crates.io/crates/spargebra) provides a SPARQL parser. |
||||||
|
* [`sparesults`](https://crates.io/crates/sparesults) provides parsers and serializers for SPARQL result formats. |
||||||
|
|
||||||
|
To build the library, don't forget to clone the submodules using `git clone --recursive https://github.com/oxigraph/oxigraph.git` to clone the repository including submodules or `git submodule update --init` to add submodules to the already cloned repository. |
||||||
|
|
||||||
|
|
||||||
|
## License |
||||||
|
|
||||||
|
This project is licensed under either of |
||||||
|
|
||||||
|
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or |
||||||
|
`<http://www.apache.org/licenses/LICENSE-2.0>`) |
||||||
|
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or |
||||||
|
`<http://opensource.org/licenses/MIT>`) |
||||||
|
|
||||||
|
at your option. |
||||||
|
|
||||||
|
|
||||||
|
### Contribution |
||||||
|
|
||||||
|
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. |
||||||
|
@ -0,0 +1,265 @@ |
|||||||
|
use criterion::{criterion_group, criterion_main, Criterion, Throughput}; |
||||||
|
use oxhttp::model::{Method, Request, Status}; |
||||||
|
use oxigraph::io::GraphFormat; |
||||||
|
use oxigraph::model::GraphNameRef; |
||||||
|
use oxigraph::sparql::{Query, QueryResults, Update}; |
||||||
|
use oxigraph::store::Store; |
||||||
|
use rand::random; |
||||||
|
use std::env::temp_dir; |
||||||
|
use std::fs::{remove_dir_all, File}; |
||||||
|
use std::io::{BufRead, BufReader, Cursor, Read}; |
||||||
|
use std::path::{Path, PathBuf}; |
||||||
|
|
||||||
|
fn store_load(c: &mut Criterion) { |
||||||
|
{ |
||||||
|
let mut data = Vec::new(); |
||||||
|
read_data("explore-1000.nt.zst") |
||||||
|
.read_to_end(&mut data) |
||||||
|
.unwrap(); |
||||||
|
|
||||||
|
let mut group = c.benchmark_group("store load"); |
||||||
|
group.throughput(Throughput::Bytes(data.len() as u64)); |
||||||
|
group.sample_size(10); |
||||||
|
group.bench_function("load BSBM explore 1000 in memory", |b| { |
||||||
|
b.iter(|| { |
||||||
|
let store = Store::new().unwrap(); |
||||||
|
do_load(&store, &data); |
||||||
|
}) |
||||||
|
}); |
||||||
|
group.bench_function("load BSBM explore 1000 in on disk", |b| { |
||||||
|
b.iter(|| { |
||||||
|
let path = TempDir::default(); |
||||||
|
let store = Store::open(&path).unwrap(); |
||||||
|
do_load(&store, &data); |
||||||
|
}) |
||||||
|
}); |
||||||
|
group.bench_function("load BSBM explore 1000 in on disk with bulk load", |b| { |
||||||
|
b.iter(|| { |
||||||
|
let path = TempDir::default(); |
||||||
|
let store = Store::open(&path).unwrap(); |
||||||
|
do_bulk_load(&store, &data); |
||||||
|
}) |
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
{ |
||||||
|
let mut data = Vec::new(); |
||||||
|
read_data("explore-10000.nt.zst") |
||||||
|
.read_to_end(&mut data) |
||||||
|
.unwrap(); |
||||||
|
|
||||||
|
let mut group = c.benchmark_group("store load large"); |
||||||
|
group.throughput(Throughput::Bytes(data.len() as u64)); |
||||||
|
group.sample_size(10); |
||||||
|
group.bench_function("load BSBM explore 10000 in on disk with bulk load", |b| { |
||||||
|
b.iter(|| { |
||||||
|
let path = TempDir::default(); |
||||||
|
let store = Store::open(&path).unwrap(); |
||||||
|
do_bulk_load(&store, &data); |
||||||
|
}) |
||||||
|
}); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
fn do_load(store: &Store, data: &[u8]) { |
||||||
|
store |
||||||
|
.load_graph( |
||||||
|
Cursor::new(&data), |
||||||
|
GraphFormat::NTriples, |
||||||
|
GraphNameRef::DefaultGraph, |
||||||
|
None, |
||||||
|
) |
||||||
|
.unwrap(); |
||||||
|
store.optimize().unwrap(); |
||||||
|
} |
||||||
|
|
||||||
|
fn do_bulk_load(store: &Store, data: &[u8]) { |
||||||
|
store |
||||||
|
.bulk_loader() |
||||||
|
.load_graph( |
||||||
|
Cursor::new(&data), |
||||||
|
GraphFormat::NTriples, |
||||||
|
GraphNameRef::DefaultGraph, |
||||||
|
None, |
||||||
|
) |
||||||
|
.unwrap(); |
||||||
|
store.optimize().unwrap(); |
||||||
|
} |
||||||
|
|
||||||
|
fn store_query_and_update(c: &mut Criterion) { |
||||||
|
let mut data = Vec::new(); |
||||||
|
read_data("explore-1000.nt.zst") |
||||||
|
.read_to_end(&mut data) |
||||||
|
.unwrap(); |
||||||
|
|
||||||
|
let operations = read_data("mix-exploreAndUpdate-1000.tsv.zst") |
||||||
|
.lines() |
||||||
|
.map(|l| { |
||||||
|
let l = l.unwrap(); |
||||||
|
let mut parts = l.trim().split('\t'); |
||||||
|
let kind = parts.next().unwrap(); |
||||||
|
let operation = parts.next().unwrap(); |
||||||
|
match kind { |
||||||
|
"query" => Operation::Query(Query::parse(operation, None).unwrap()), |
||||||
|
"update" => Operation::Update(Update::parse(operation, None).unwrap()), |
||||||
|
_ => panic!("Unexpected operation kind {kind}"), |
||||||
|
} |
||||||
|
}) |
||||||
|
.collect::<Vec<_>>(); |
||||||
|
let query_operations = operations |
||||||
|
.iter() |
||||||
|
.filter(|o| matches!(o, Operation::Query(_))) |
||||||
|
.cloned() |
||||||
|
.collect::<Vec<_>>(); |
||||||
|
|
||||||
|
let mut group = c.benchmark_group("store operations"); |
||||||
|
group.throughput(Throughput::Elements(operations.len() as u64)); |
||||||
|
group.sample_size(10); |
||||||
|
|
||||||
|
{ |
||||||
|
let memory_store = Store::new().unwrap(); |
||||||
|
do_bulk_load(&memory_store, &data); |
||||||
|
group.bench_function("BSBM explore 1000 query in memory", |b| { |
||||||
|
b.iter(|| run_operation(&memory_store, &query_operations)) |
||||||
|
}); |
||||||
|
group.bench_function("BSBM explore 1000 queryAndUpdate in memory", |b| { |
||||||
|
b.iter(|| run_operation(&memory_store, &operations)) |
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
{ |
||||||
|
let path = TempDir::default(); |
||||||
|
let disk_store = Store::open(&path).unwrap(); |
||||||
|
do_bulk_load(&disk_store, &data); |
||||||
|
group.bench_function("BSBM explore 1000 query on disk", |b| { |
||||||
|
b.iter(|| run_operation(&disk_store, &query_operations)) |
||||||
|
}); |
||||||
|
group.bench_function("BSBM explore 1000 queryAndUpdate on disk", |b| { |
||||||
|
b.iter(|| run_operation(&disk_store, &operations)) |
||||||
|
}); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
fn run_operation(store: &Store, operations: &[Operation]) { |
||||||
|
for operation in operations { |
||||||
|
match operation { |
||||||
|
Operation::Query(q) => match store.query(q.clone()).unwrap() { |
||||||
|
QueryResults::Boolean(_) => (), |
||||||
|
QueryResults::Solutions(s) => { |
||||||
|
for s in s { |
||||||
|
s.unwrap(); |
||||||
|
} |
||||||
|
} |
||||||
|
QueryResults::Graph(g) => { |
||||||
|
for t in g { |
||||||
|
t.unwrap(); |
||||||
|
} |
||||||
|
} |
||||||
|
}, |
||||||
|
Operation::Update(u) => store.update(u.clone()).unwrap(), |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
fn sparql_parsing(c: &mut Criterion) { |
||||||
|
let mut data = Vec::new(); |
||||||
|
read_data("explore-1000.nt.zst") |
||||||
|
.read_to_end(&mut data) |
||||||
|
.unwrap(); |
||||||
|
|
||||||
|
let operations = read_data("mix-exploreAndUpdate-1000.tsv.zst") |
||||||
|
.lines() |
||||||
|
.map(|l| { |
||||||
|
let l = l.unwrap(); |
||||||
|
let mut parts = l.trim().split('\t'); |
||||||
|
let kind = parts.next().unwrap(); |
||||||
|
let operation = parts.next().unwrap(); |
||||||
|
match kind { |
||||||
|
"query" => RawOperation::Query(operation.to_owned()), |
||||||
|
"update" => RawOperation::Update(operation.to_owned()), |
||||||
|
_ => panic!("Unexpected operation kind {kind}"), |
||||||
|
} |
||||||
|
}) |
||||||
|
.collect::<Vec<_>>(); |
||||||
|
|
||||||
|
let mut group = c.benchmark_group("sparql parsing"); |
||||||
|
group.sample_size(10); |
||||||
|
group.throughput(Throughput::Bytes( |
||||||
|
operations |
||||||
|
.iter() |
||||||
|
.map(|o| match o { |
||||||
|
RawOperation::Query(q) => q.len(), |
||||||
|
RawOperation::Update(u) => u.len(), |
||||||
|
}) |
||||||
|
.sum::<usize>() as u64, |
||||||
|
)); |
||||||
|
group.bench_function("BSBM query and update set", |b| { |
||||||
|
b.iter(|| { |
||||||
|
for operation in &operations { |
||||||
|
match operation { |
||||||
|
RawOperation::Query(q) => { |
||||||
|
Query::parse(q, None).unwrap(); |
||||||
|
} |
||||||
|
RawOperation::Update(u) => { |
||||||
|
Update::parse(u, None).unwrap(); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
}) |
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
criterion_group!(store, sparql_parsing, store_query_and_update, store_load); |
||||||
|
|
||||||
|
criterion_main!(store); |
||||||
|
|
||||||
|
fn read_data(file: &str) -> impl BufRead { |
||||||
|
if !Path::new(file).exists() { |
||||||
|
let mut client = oxhttp::Client::new(); |
||||||
|
client.set_redirection_limit(5); |
||||||
|
let url = format!("https://github.com/Tpt/bsbm-tools/releases/download/v0.2/{file}"); |
||||||
|
let request = Request::builder(Method::GET, url.parse().unwrap()).build(); |
||||||
|
let response = client.request(request).unwrap(); |
||||||
|
assert_eq!( |
||||||
|
response.status(), |
||||||
|
Status::OK, |
||||||
|
"{}", |
||||||
|
response.into_body().to_string().unwrap() |
||||||
|
); |
||||||
|
std::io::copy(&mut response.into_body(), &mut File::create(file).unwrap()).unwrap(); |
||||||
|
} |
||||||
|
BufReader::new(zstd::Decoder::new(File::open(file).unwrap()).unwrap()) |
||||||
|
} |
||||||
|
|
||||||
|
#[derive(Clone)] |
||||||
|
enum RawOperation { |
||||||
|
Query(String), |
||||||
|
Update(String), |
||||||
|
} |
||||||
|
|
||||||
|
#[allow(clippy::large_enum_variant)] |
||||||
|
#[derive(Clone)] |
||||||
|
enum Operation { |
||||||
|
Query(Query), |
||||||
|
Update(Update), |
||||||
|
} |
||||||
|
|
||||||
|
struct TempDir(PathBuf); |
||||||
|
|
||||||
|
impl Default for TempDir { |
||||||
|
fn default() -> Self { |
||||||
|
Self(temp_dir().join(format!("oxigraph-bench-{}", random::<u128>()))) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
impl AsRef<Path> for TempDir { |
||||||
|
fn as_ref(&self) -> &Path { |
||||||
|
&self.0 |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
impl Drop for TempDir { |
||||||
|
fn drop(&mut self) { |
||||||
|
remove_dir_all(&self.0).unwrap() |
||||||
|
} |
||||||
|
} |
@ -1,59 +0,0 @@ |
|||||||
[package] |
|
||||||
name = "oxigraph" |
|
||||||
version.workspace = true |
|
||||||
authors.workspace = true |
|
||||||
license.workspace = true |
|
||||||
readme = "README.md" |
|
||||||
keywords = ["RDF", "SPARQL", "graph-database", "database"] |
|
||||||
categories = ["database-implementations"] |
|
||||||
repository = "https://github.com/oxigraph/oxigraph/tree/main/lib/oxigraph" |
|
||||||
homepage = "https://oxigraph.org/" |
|
||||||
documentation = "https://docs.rs/oxigraph" |
|
||||||
description = """ |
|
||||||
a SPARQL database and RDF toolkit |
|
||||||
""" |
|
||||||
edition.workspace = true |
|
||||||
rust-version.workspace = true |
|
||||||
|
|
||||||
[features] |
|
||||||
js = ["getrandom/js", "oxsdatatypes/js", "js-sys"] |
|
||||||
|
|
||||||
|
|
||||||
[dependencies] |
|
||||||
digest.workspace = true |
|
||||||
hex.workspace = true |
|
||||||
json-event-parser.workspace = true |
|
||||||
md-5.workspace = true |
|
||||||
oxilangtag.workspace = true |
|
||||||
oxiri.workspace = true |
|
||||||
oxrdf = { workspace = true, features = ["rdf-star", "oxsdatatypes"] } |
|
||||||
oxrdfio = { workspace = true, features = ["rdf-star"] } |
|
||||||
oxsdatatypes.workspace = true |
|
||||||
rand.workspace = true |
|
||||||
regex.workspace = true |
|
||||||
sha1.workspace = true |
|
||||||
sha2.workspace = true |
|
||||||
siphasher.workspace = true |
|
||||||
sparesults = { workspace = true, features = ["rdf-star"] } |
|
||||||
spargebra = { workspace = true, features = ["rdf-star", "sep-0002", "sep-0006"] } |
|
||||||
sparopt = { workspace = true, features = ["rdf-star", "sep-0002", "sep-0006"] } |
|
||||||
thiserror.workspace = true |
|
||||||
|
|
||||||
[target.'cfg(not(target_family = "wasm"))'.dependencies] |
|
||||||
libc = "0.2" |
|
||||||
rocksdb.workspace = true |
|
||||||
|
|
||||||
[target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies] |
|
||||||
getrandom.workspace = true |
|
||||||
js-sys = { workspace = true, optional = true } |
|
||||||
|
|
||||||
[target.'cfg(not(target_family = "wasm"))'.dev-dependencies] |
|
||||||
codspeed-criterion-compat.workspace = true |
|
||||||
zstd.workspace = true |
|
||||||
|
|
||||||
[lints] |
|
||||||
workspace = true |
|
||||||
|
|
||||||
[package.metadata.docs.rs] |
|
||||||
rustdoc-args = ["--cfg", "docsrs"] |
|
||||||
|
|
@ -1,82 +0,0 @@ |
|||||||
Oxigraph |
|
||||||
======== |
|
||||||
|
|
||||||
[![Latest Version](https://img.shields.io/crates/v/oxigraph.svg)](https://crates.io/crates/oxigraph) |
|
||||||
[![Released API docs](https://docs.rs/oxigraph/badge.svg)](https://docs.rs/oxigraph) |
|
||||||
[![Crates.io downloads](https://img.shields.io/crates/d/oxigraph)](https://crates.io/crates/oxigraph) |
|
||||||
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) |
|
||||||
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community) |
|
||||||
|
|
||||||
Oxigraph is a graph database library implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard. |
|
||||||
|
|
||||||
Its goal is to provide a compliant, safe and fast on-disk graph database. |
|
||||||
It also provides a set of utility functions for reading, writing, and processing RDF files. |
|
||||||
|
|
||||||
Oxigraph is in heavy development and SPARQL query evaluation has not been optimized yet. |
|
||||||
|
|
||||||
Oxigraph also provides [a CLI tool](https://crates.io/crates/oxigraph-cli) and [a Python library](https://pyoxigraph.readthedocs.io/) based on this library. |
|
||||||
|
|
||||||
|
|
||||||
Oxigraph implements the following specifications: |
|
||||||
* [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/), [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/), and [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/). |
|
||||||
* [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/), and [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) RDF serialization formats for both data ingestion and retrieval. |
|
||||||
* [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/). |
|
||||||
|
|
||||||
A preliminary benchmark [is provided](../bench/README.md). Oxigraph internal design [is described on the wiki](https://github.com/oxigraph/oxigraph/wiki/Architecture). |
|
||||||
|
|
||||||
The main entry point of Oxigraph is the [`Store`](store::Store) struct: |
|
||||||
```rust |
|
||||||
use oxigraph::store::Store; |
|
||||||
use oxigraph::model::*; |
|
||||||
use oxigraph::sparql::QueryResults; |
|
||||||
|
|
||||||
let store = Store::new().unwrap(); |
|
||||||
|
|
||||||
// insertion |
|
||||||
let ex = NamedNode::new("http://example.com").unwrap(); |
|
||||||
let quad = Quad::new(ex.clone(), ex.clone(), ex.clone(), GraphName::DefaultGraph); |
|
||||||
store.insert(&quad).unwrap(); |
|
||||||
|
|
||||||
// quad filter |
|
||||||
let results = store.quads_for_pattern(Some(ex.as_ref().into()), None, None, None).collect::<Result<Vec<Quad>,_>>().unwrap(); |
|
||||||
assert_eq!(vec![quad], results); |
|
||||||
|
|
||||||
// SPARQL query |
|
||||||
if let QueryResults::Solutions(mut solutions) = store.query("SELECT ?s WHERE { ?s ?p ?o }").unwrap() { |
|
||||||
assert_eq!(solutions.next().unwrap().unwrap().get("s"), Some(&ex.into())); |
|
||||||
} |
|
||||||
``` |
|
||||||
|
|
||||||
It is based on these crates that can be used separately: |
|
||||||
* [`oxrdf`](https://crates.io/crates/oxrdf), datastructures encoding RDF basic concepts (the [`oxigraph::model`](crate::model) module). |
|
||||||
* [`oxrdfio`](https://crates.io/crates/oxrdfio), a unified parser and serializer API for RDF formats (the [`oxigraph::io`](crate::io) module). It itself relies on: |
|
||||||
* [`oxttl`](https://crates.io/crates/oxttl), N-Triple, N-Quad, Turtle, TriG and N3 parsing and serialization. |
|
||||||
* [`oxrdfxml`](https://crates.io/crates/oxrdfxml), RDF/XML parsing and serialization. |
|
||||||
* [`spargebra`](https://crates.io/crates/spargebra), a SPARQL parser. |
|
||||||
* [`sparesults`](https://crates.io/crates/sparesults), parsers and serializers for SPARQL result formats (the [`oxigraph::sparql::results`](crate::sparql::results) module). |
|
||||||
* [`sparopt`](https://crates.io/crates/sparesults), a SPARQL optimizer. |
|
||||||
* [`oxsdatatypes`](https://crates.io/crates/oxsdatatypes), an implementation of some XML Schema datatypes. |
|
||||||
|
|
||||||
To build the library locally, don't forget to clone the submodules using `git clone --recursive https://github.com/oxigraph/oxigraph.git` to clone the repository including submodules or `git submodule update --init` to add submodules to the already cloned repository. |
|
||||||
|
|
||||||
It is possible to disable the RocksDB storage backend to only use the in-memory fallback by disabling the `rocksdb` default feature: |
|
||||||
```toml |
|
||||||
oxigraph = { version = "*", default-features = false } |
|
||||||
``` |
|
||||||
This is the default behavior when compiling Oxigraph to WASM. |
|
||||||
|
|
||||||
## License |
|
||||||
|
|
||||||
This project is licensed under either of |
|
||||||
|
|
||||||
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or |
|
||||||
`<http://www.apache.org/licenses/LICENSE-2.0>`) |
|
||||||
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or |
|
||||||
`<http://opensource.org/licenses/MIT>`) |
|
||||||
|
|
||||||
at your option. |
|
||||||
|
|
||||||
|
|
||||||
### Contribution |
|
||||||
|
|
||||||
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. |
|
@ -1,39 +0,0 @@ |
|||||||
//! Utilities to read and write RDF graphs and datasets using [OxRDF I/O](https://crates.io/crates/oxrdfio).
|
|
||||||
//!
|
|
||||||
//! The entry points of this module are the two [`RdfParser`] and [`RdfSerializer`] structs.
|
|
||||||
//!
|
|
||||||
//! Usage example converting a Turtle file to a N-Triples file:
|
|
||||||
//! ```
|
|
||||||
//! use oxigraph::io::{RdfFormat, RdfParser, RdfSerializer};
|
|
||||||
//!
|
|
||||||
//! let turtle_file = b"@base <http://example.com/> .
|
|
||||||
//! @prefix schema: <http://schema.org/> .
|
|
||||||
//! <foo> a schema:Person ;
|
|
||||||
//! schema:name \"Foo\" .
|
|
||||||
//! <bar> a schema:Person ;
|
|
||||||
//! schema:name \"Bar\" .";
|
|
||||||
//!
|
|
||||||
//! let ntriples_file = b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
|
||||||
//! <http://example.com/foo> <http://schema.org/name> \"Foo\" .
|
|
||||||
//! <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
|
||||||
//! <http://example.com/bar> <http://schema.org/name> \"Bar\" .
|
|
||||||
//! ";
|
|
||||||
//!
|
|
||||||
//! let mut writer = RdfSerializer::from_format(RdfFormat::NTriples).serialize_to_write(Vec::new());
|
|
||||||
//! for quad in RdfParser::from_format(RdfFormat::Turtle).parse_read(turtle_file.as_ref()) {
|
|
||||||
//! writer.write_quad(&quad.unwrap()).unwrap();
|
|
||||||
//! }
|
|
||||||
//! assert_eq!(writer.finish().unwrap(), ntriples_file);
|
|
||||||
//! ```
|
|
||||||
|
|
||||||
mod format; |
|
||||||
pub mod read; |
|
||||||
pub mod write; |
|
||||||
|
|
||||||
#[allow(deprecated)] |
|
||||||
pub use self::format::{DatasetFormat, GraphFormat}; |
|
||||||
#[allow(deprecated)] |
|
||||||
pub use self::read::{DatasetParser, GraphParser}; |
|
||||||
#[allow(deprecated)] |
|
||||||
pub use self::write::{DatasetSerializer, GraphSerializer}; |
|
||||||
pub use oxrdfio::*; |
|
@ -1,199 +0,0 @@ |
|||||||
#![allow(deprecated)] |
|
||||||
|
|
||||||
//! Utilities to read RDF graphs and datasets.
|
|
||||||
|
|
||||||
use crate::io::{DatasetFormat, GraphFormat}; |
|
||||||
use crate::model::*; |
|
||||||
use oxrdfio::{FromReadQuadReader, RdfParseError, RdfParser}; |
|
||||||
use std::io::Read; |
|
||||||
|
|
||||||
/// Parsers for RDF graph serialization formats.
|
|
||||||
///
|
|
||||||
/// It currently supports the following formats:
|
|
||||||
/// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`GraphFormat::NTriples`])
|
|
||||||
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`GraphFormat::Turtle`])
|
|
||||||
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`GraphFormat::RdfXml`])
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxigraph::io::{GraphFormat, GraphParser};
|
|
||||||
///
|
|
||||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
|
||||||
///
|
|
||||||
/// let parser = GraphParser::from_format(GraphFormat::NTriples);
|
|
||||||
/// let triples = parser
|
|
||||||
/// .read_triples(file.as_bytes())
|
|
||||||
/// .collect::<Result<Vec<_>, _>>()?;
|
|
||||||
///
|
|
||||||
/// assert_eq!(triples.len(), 1);
|
|
||||||
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
|
|
||||||
/// # std::io::Result::Ok(())
|
|
||||||
/// ```
|
|
||||||
#[deprecated(note = "use RdfParser instead", since = "0.4.0")] |
|
||||||
pub struct GraphParser { |
|
||||||
inner: RdfParser, |
|
||||||
} |
|
||||||
|
|
||||||
impl GraphParser { |
|
||||||
/// Builds a parser for the given format.
|
|
||||||
#[inline] |
|
||||||
pub fn from_format(format: GraphFormat) -> Self { |
|
||||||
Self { |
|
||||||
inner: RdfParser::from_format(format.into()) |
|
||||||
.without_named_graphs() |
|
||||||
.rename_blank_nodes(), |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// Provides an IRI that could be used to resolve the file relative IRIs.
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxigraph::io::{GraphFormat, GraphParser};
|
|
||||||
///
|
|
||||||
/// let file = "</s> </p> </o> .";
|
|
||||||
///
|
|
||||||
/// let parser =
|
|
||||||
/// GraphParser::from_format(GraphFormat::Turtle).with_base_iri("http://example.com")?;
|
|
||||||
/// let triples = parser
|
|
||||||
/// .read_triples(file.as_bytes())
|
|
||||||
/// .collect::<Result<Vec<_>, _>>()?;
|
|
||||||
///
|
|
||||||
/// assert_eq!(triples.len(), 1);
|
|
||||||
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
|
|
||||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
|
||||||
/// ```
|
|
||||||
#[inline] |
|
||||||
pub fn with_base_iri(self, base_iri: impl Into<String>) -> Result<Self, IriParseError> { |
|
||||||
Ok(Self { |
|
||||||
inner: self.inner.with_base_iri(base_iri)?, |
|
||||||
}) |
|
||||||
} |
|
||||||
|
|
||||||
/// Executes the parsing itself on a [`Read`] implementation and returns an iterator of triples.
|
|
||||||
pub fn read_triples<R: Read>(self, reader: R) -> TripleReader<R> { |
|
||||||
TripleReader { |
|
||||||
parser: self.inner.parse_read(reader), |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// An iterator yielding read triples.
|
|
||||||
/// Could be built using a [`GraphParser`].
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxigraph::io::{GraphFormat, GraphParser};
|
|
||||||
///
|
|
||||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
|
||||||
///
|
|
||||||
/// let parser = GraphParser::from_format(GraphFormat::NTriples);
|
|
||||||
/// let triples = parser
|
|
||||||
/// .read_triples(file.as_bytes())
|
|
||||||
/// .collect::<Result<Vec<_>, _>>()?;
|
|
||||||
///
|
|
||||||
/// assert_eq!(triples.len(), 1);
|
|
||||||
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
|
|
||||||
/// # std::io::Result::Ok(())
|
|
||||||
/// ```
|
|
||||||
#[must_use] |
|
||||||
pub struct TripleReader<R: Read> { |
|
||||||
parser: FromReadQuadReader<R>, |
|
||||||
} |
|
||||||
|
|
||||||
impl<R: Read> Iterator for TripleReader<R> { |
|
||||||
type Item = Result<Triple, RdfParseError>; |
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> { |
|
||||||
Some(self.parser.next()?.map(Into::into).map_err(Into::into)) |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// A parser for RDF dataset serialization formats.
|
|
||||||
///
|
|
||||||
/// It currently supports the following formats:
|
|
||||||
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`DatasetFormat::NQuads`])
|
|
||||||
/// * [TriG](https://www.w3.org/TR/trig/) ([`DatasetFormat::TriG`])
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxigraph::io::{DatasetFormat, DatasetParser};
|
|
||||||
///
|
|
||||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .";
|
|
||||||
///
|
|
||||||
/// let parser = DatasetParser::from_format(DatasetFormat::NQuads);
|
|
||||||
/// let quads = parser.read_quads(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
|
|
||||||
///
|
|
||||||
/// assert_eq!(quads.len(), 1);
|
|
||||||
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
|
|
||||||
/// # std::io::Result::Ok(())
|
|
||||||
/// ```
|
|
||||||
#[deprecated(note = "use RdfParser instead", since = "0.4.0")] |
|
||||||
pub struct DatasetParser { |
|
||||||
inner: RdfParser, |
|
||||||
} |
|
||||||
|
|
||||||
impl DatasetParser { |
|
||||||
/// Builds a parser for the given format.
|
|
||||||
#[inline] |
|
||||||
pub fn from_format(format: DatasetFormat) -> Self { |
|
||||||
Self { |
|
||||||
inner: RdfParser::from_format(format.into()).rename_blank_nodes(), |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// Provides an IRI that could be used to resolve the file relative IRIs.
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxigraph::io::{DatasetFormat, DatasetParser};
|
|
||||||
///
|
|
||||||
/// let file = "<g> { </s> </p> </o> }";
|
|
||||||
///
|
|
||||||
/// let parser =
|
|
||||||
/// DatasetParser::from_format(DatasetFormat::TriG).with_base_iri("http://example.com")?;
|
|
||||||
/// let triples = parser
|
|
||||||
/// .read_quads(file.as_bytes())
|
|
||||||
/// .collect::<Result<Vec<_>, _>>()?;
|
|
||||||
///
|
|
||||||
/// assert_eq!(triples.len(), 1);
|
|
||||||
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
|
|
||||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
|
||||||
/// ```
|
|
||||||
#[inline] |
|
||||||
pub fn with_base_iri(self, base_iri: impl Into<String>) -> Result<Self, IriParseError> { |
|
||||||
Ok(Self { |
|
||||||
inner: self.inner.with_base_iri(base_iri)?, |
|
||||||
}) |
|
||||||
} |
|
||||||
|
|
||||||
/// Executes the parsing itself on a [`Read`] implementation and returns an iterator of quads.
|
|
||||||
pub fn read_quads<R: Read>(self, reader: R) -> QuadReader<R> { |
|
||||||
QuadReader { |
|
||||||
parser: self.inner.parse_read(reader), |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// An iterator yielding read quads.
|
|
||||||
/// Could be built using a [`DatasetParser`].
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxigraph::io::{DatasetFormat, DatasetParser};
|
|
||||||
///
|
|
||||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .";
|
|
||||||
///
|
|
||||||
/// let parser = DatasetParser::from_format(DatasetFormat::NQuads);
|
|
||||||
/// let quads = parser.read_quads(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
|
|
||||||
///
|
|
||||||
/// assert_eq!(quads.len(), 1);
|
|
||||||
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
|
|
||||||
/// # std::io::Result::Ok(())
|
|
||||||
/// ```
|
|
||||||
#[must_use] |
|
||||||
pub struct QuadReader<R: Read> { |
|
||||||
parser: FromReadQuadReader<R>, |
|
||||||
} |
|
||||||
|
|
||||||
impl<R: Read> Iterator for QuadReader<R> { |
|
||||||
type Item = Result<Quad, RdfParseError>; |
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> { |
|
||||||
Some(self.parser.next()?.map_err(Into::into)) |
|
||||||
} |
|
||||||
} |
|
@ -1,185 +0,0 @@ |
|||||||
#![allow(deprecated)] |
|
||||||
|
|
||||||
//! Utilities to write RDF graphs and datasets.
|
|
||||||
|
|
||||||
use crate::io::{DatasetFormat, GraphFormat}; |
|
||||||
use crate::model::*; |
|
||||||
use oxrdfio::{RdfSerializer, ToWriteQuadWriter}; |
|
||||||
use std::io::{self, Write}; |
|
||||||
|
|
||||||
/// A serializer for RDF graph serialization formats.
|
|
||||||
///
|
|
||||||
/// It currently supports the following formats:
|
|
||||||
/// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`GraphFormat::NTriples`])
|
|
||||||
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`GraphFormat::Turtle`])
|
|
||||||
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`GraphFormat::RdfXml`])
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxigraph::io::{GraphFormat, GraphSerializer};
|
|
||||||
/// use oxigraph::model::*;
|
|
||||||
///
|
|
||||||
/// let mut buffer = Vec::new();
|
|
||||||
/// let mut writer = GraphSerializer::from_format(GraphFormat::NTriples).triple_writer(&mut buffer);
|
|
||||||
/// writer.write(&Triple {
|
|
||||||
/// subject: NamedNode::new("http://example.com/s")?.into(),
|
|
||||||
/// predicate: NamedNode::new("http://example.com/p")?,
|
|
||||||
/// object: NamedNode::new("http://example.com/o")?.into(),
|
|
||||||
/// })?;
|
|
||||||
/// writer.finish()?;
|
|
||||||
///
|
|
||||||
/// assert_eq!(
|
|
||||||
/// buffer.as_slice(),
|
|
||||||
/// "<http://example.com/s> <http://example.com/p> <http://example.com/o> .\n".as_bytes()
|
|
||||||
/// );
|
|
||||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
|
||||||
/// ```
|
|
||||||
#[deprecated(note = "use RdfSerializer instead", since = "0.4.0")] |
|
||||||
pub struct GraphSerializer { |
|
||||||
inner: RdfSerializer, |
|
||||||
} |
|
||||||
|
|
||||||
impl GraphSerializer { |
|
||||||
/// Builds a serializer for the given format
|
|
||||||
#[inline] |
|
||||||
pub fn from_format(format: GraphFormat) -> Self { |
|
||||||
Self { |
|
||||||
inner: RdfSerializer::from_format(format.into()), |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// Returns a [`TripleWriter`] allowing writing triples into the given [`Write`] implementation
|
|
||||||
pub fn triple_writer<W: Write>(self, write: W) -> TripleWriter<W> { |
|
||||||
TripleWriter { |
|
||||||
writer: self.inner.serialize_to_write(write), |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// Allows writing triples.
|
|
||||||
/// Could be built using a [`GraphSerializer`].
|
|
||||||
///
|
|
||||||
/// <div class="warning">
|
|
||||||
///
|
|
||||||
/// Do not forget to run the [`finish`](TripleWriter::finish()) method to properly write the last bytes of the file.</div>
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxigraph::io::{GraphFormat, GraphSerializer};
|
|
||||||
/// use oxigraph::model::*;
|
|
||||||
///
|
|
||||||
/// let mut buffer = Vec::new();
|
|
||||||
/// let mut writer = GraphSerializer::from_format(GraphFormat::NTriples).triple_writer(&mut buffer);
|
|
||||||
/// writer.write(&Triple {
|
|
||||||
/// subject: NamedNode::new("http://example.com/s")?.into(),
|
|
||||||
/// predicate: NamedNode::new("http://example.com/p")?,
|
|
||||||
/// object: NamedNode::new("http://example.com/o")?.into(),
|
|
||||||
/// })?;
|
|
||||||
/// writer.finish()?;
|
|
||||||
///
|
|
||||||
/// assert_eq!(
|
|
||||||
/// buffer.as_slice(),
|
|
||||||
/// "<http://example.com/s> <http://example.com/p> <http://example.com/o> .\n".as_bytes()
|
|
||||||
/// );
|
|
||||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
|
||||||
/// ```
|
|
||||||
#[must_use] |
|
||||||
pub struct TripleWriter<W: Write> { |
|
||||||
writer: ToWriteQuadWriter<W>, |
|
||||||
} |
|
||||||
|
|
||||||
impl<W: Write> TripleWriter<W> { |
|
||||||
/// Writes a triple
|
|
||||||
pub fn write<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> io::Result<()> { |
|
||||||
self.writer.write_triple(triple) |
|
||||||
} |
|
||||||
|
|
||||||
/// Writes the last bytes of the file
|
|
||||||
pub fn finish(self) -> io::Result<()> { |
|
||||||
self.writer.finish()?.flush() |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// A serializer for RDF graph serialization formats.
|
|
||||||
///
|
|
||||||
/// It currently supports the following formats:
|
|
||||||
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`DatasetFormat::NQuads`])
|
|
||||||
/// * [TriG](https://www.w3.org/TR/trig/) ([`DatasetFormat::TriG`])
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxigraph::io::{DatasetFormat, DatasetSerializer};
|
|
||||||
/// use oxigraph::model::*;
|
|
||||||
///
|
|
||||||
/// let mut buffer = Vec::new();
|
|
||||||
/// let mut writer = DatasetSerializer::from_format(DatasetFormat::NQuads).quad_writer(&mut buffer);
|
|
||||||
/// writer.write(&Quad {
|
|
||||||
/// subject: NamedNode::new("http://example.com/s")?.into(),
|
|
||||||
/// predicate: NamedNode::new("http://example.com/p")?,
|
|
||||||
/// object: NamedNode::new("http://example.com/o")?.into(),
|
|
||||||
/// graph_name: NamedNode::new("http://example.com/g")?.into(),
|
|
||||||
/// })?;
|
|
||||||
/// writer.finish()?;
|
|
||||||
///
|
|
||||||
/// assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n".as_bytes());
|
|
||||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
|
||||||
/// ```
|
|
||||||
#[deprecated(note = "use RdfSerializer instead", since = "0.4.0")] |
|
||||||
pub struct DatasetSerializer { |
|
||||||
inner: RdfSerializer, |
|
||||||
} |
|
||||||
|
|
||||||
impl DatasetSerializer { |
|
||||||
/// Builds a serializer for the given format
|
|
||||||
#[inline] |
|
||||||
pub fn from_format(format: DatasetFormat) -> Self { |
|
||||||
Self { |
|
||||||
inner: RdfSerializer::from_format(format.into()), |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// Returns a [`QuadWriter`] allowing writing triples into the given [`Write`] implementation
|
|
||||||
pub fn quad_writer<W: Write>(self, write: W) -> QuadWriter<W> { |
|
||||||
QuadWriter { |
|
||||||
writer: self.inner.serialize_to_write(write), |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// Allows writing triples.
|
|
||||||
/// Could be built using a [`DatasetSerializer`].
|
|
||||||
///
|
|
||||||
/// <div class="warning">
|
|
||||||
///
|
|
||||||
/// Do not forget to run the [`finish`](QuadWriter::finish()) method to properly write the last bytes of the file.</div>
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxigraph::io::{DatasetFormat, DatasetSerializer};
|
|
||||||
/// use oxigraph::model::*;
|
|
||||||
///
|
|
||||||
/// let mut buffer = Vec::new();
|
|
||||||
/// let mut writer = DatasetSerializer::from_format(DatasetFormat::NQuads).quad_writer(&mut buffer);
|
|
||||||
/// writer.write(&Quad {
|
|
||||||
/// subject: NamedNode::new("http://example.com/s")?.into(),
|
|
||||||
/// predicate: NamedNode::new("http://example.com/p")?,
|
|
||||||
/// object: NamedNode::new("http://example.com/o")?.into(),
|
|
||||||
/// graph_name: NamedNode::new("http://example.com/g")?.into(),
|
|
||||||
/// })?;
|
|
||||||
/// writer.finish()?;
|
|
||||||
///
|
|
||||||
/// assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n".as_bytes());
|
|
||||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
|
||||||
/// ```
|
|
||||||
#[must_use] |
|
||||||
pub struct QuadWriter<W: Write> { |
|
||||||
writer: ToWriteQuadWriter<W>, |
|
||||||
} |
|
||||||
|
|
||||||
impl<W: Write> QuadWriter<W> { |
|
||||||
/// Writes a quad
|
|
||||||
pub fn write<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> io::Result<()> { |
|
||||||
self.writer.write_quad(quad) |
|
||||||
} |
|
||||||
|
|
||||||
/// Writes the last bytes of the file
|
|
||||||
pub fn finish(self) -> io::Result<()> { |
|
||||||
self.writer.finish()?.flush() |
|
||||||
} |
|
||||||
} |
|
@ -1,12 +0,0 @@ |
|||||||
#![doc = include_str!("../README.md")] |
|
||||||
#![doc(test(attr(deny(warnings))))] |
|
||||||
#![doc(test(attr(allow(deprecated))))] |
|
||||||
#![cfg_attr(docsrs, feature(doc_auto_cfg))] |
|
||||||
#![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] |
|
||||||
#![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] |
|
||||||
|
|
||||||
pub mod io; |
|
||||||
pub mod model; |
|
||||||
pub mod sparql; |
|
||||||
mod storage; |
|
||||||
pub mod store; |
|
@ -1,22 +0,0 @@ |
|||||||
//! Implements data structures for [RDF 1.1 Concepts](https://www.w3.org/TR/rdf11-concepts/) using [OxRDF](https://crates.io/crates/oxrdf).
|
|
||||||
//!
|
|
||||||
//! Usage example:
|
|
||||||
//!
|
|
||||||
//! ```
|
|
||||||
//! use oxigraph::model::*;
|
|
||||||
//!
|
|
||||||
//! let mut graph = Graph::default();
|
|
||||||
//!
|
|
||||||
//! // insertion
|
|
||||||
//! let ex = NamedNodeRef::new("http://example.com").unwrap();
|
|
||||||
//! let triple = TripleRef::new(ex, ex, ex);
|
|
||||||
//! graph.insert(triple);
|
|
||||||
//!
|
|
||||||
//! // simple filter
|
|
||||||
//! let results: Vec<_> = graph.triples_for_subject(ex).collect();
|
|
||||||
//! assert_eq!(vec![triple], results);
|
|
||||||
//! ```
|
|
||||||
|
|
||||||
pub use oxrdf::*; |
|
||||||
|
|
||||||
pub use spargebra::term::GroundQuad; |
|
@ -1,84 +0,0 @@ |
|||||||
use crate::io::RdfParseError; |
|
||||||
use crate::model::NamedNode; |
|
||||||
use crate::sparql::results::QueryResultsParseError as ResultsParseError; |
|
||||||
use crate::sparql::SparqlSyntaxError; |
|
||||||
use crate::storage::StorageError; |
|
||||||
use std::convert::Infallible; |
|
||||||
use std::error::Error; |
|
||||||
use std::io; |
|
||||||
|
|
||||||
/// A SPARQL evaluation error.
|
|
||||||
#[derive(Debug, thiserror::Error)] |
|
||||||
#[non_exhaustive] |
|
||||||
pub enum EvaluationError { |
|
||||||
/// An error in SPARQL parsing.
|
|
||||||
#[error(transparent)] |
|
||||||
Parsing(#[from] SparqlSyntaxError), |
|
||||||
/// An error from the storage.
|
|
||||||
#[error(transparent)] |
|
||||||
Storage(#[from] StorageError), |
|
||||||
/// An error while parsing an external RDF file.
|
|
||||||
#[error(transparent)] |
|
||||||
GraphParsing(#[from] RdfParseError), |
|
||||||
/// An error while parsing an external result file (likely from a federated query).
|
|
||||||
#[error(transparent)] |
|
||||||
ResultsParsing(#[from] ResultsParseError), |
|
||||||
/// An error returned during results serialization.
|
|
||||||
#[error(transparent)] |
|
||||||
ResultsSerialization(#[from] io::Error), |
|
||||||
/// Error during `SERVICE` evaluation
|
|
||||||
#[error("{0}")] |
|
||||||
Service(#[source] Box<dyn Error + Send + Sync + 'static>), |
|
||||||
/// Error when `CREATE` tries to create an already existing graph
|
|
||||||
#[error("The graph {0} already exists")] |
|
||||||
GraphAlreadyExists(NamedNode), |
|
||||||
/// Error when `DROP` or `CLEAR` tries to remove a not existing graph
|
|
||||||
#[error("The graph {0} does not exist")] |
|
||||||
GraphDoesNotExist(NamedNode), |
|
||||||
/// The variable storing the `SERVICE` name is unbound
|
|
||||||
#[error("The variable encoding the service name is unbound")] |
|
||||||
UnboundService, |
|
||||||
/// The given `SERVICE` is not supported
|
|
||||||
#[error("The service {0} is not supported")] |
|
||||||
UnsupportedService(NamedNode), |
|
||||||
/// The given content media type returned from an HTTP response is not supported (`SERVICE` and `LOAD`)
|
|
||||||
#[error("The content media type {0} is not supported")] |
|
||||||
UnsupportedContentType(String), |
|
||||||
/// The `SERVICE` call has not returns solutions
|
|
||||||
#[error("The service is not returning solutions but a boolean or a graph")] |
|
||||||
ServiceDoesNotReturnSolutions, |
|
||||||
/// The results are not a RDF graph
|
|
||||||
#[error("The query results are not a RDF graph")] |
|
||||||
NotAGraph, |
|
||||||
} |
|
||||||
|
|
||||||
impl From<Infallible> for EvaluationError { |
|
||||||
#[inline] |
|
||||||
fn from(error: Infallible) -> Self { |
|
||||||
match error {} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
impl From<EvaluationError> for io::Error { |
|
||||||
#[inline] |
|
||||||
fn from(error: EvaluationError) -> Self { |
|
||||||
match error { |
|
||||||
EvaluationError::Parsing(error) => Self::new(io::ErrorKind::InvalidData, error), |
|
||||||
EvaluationError::GraphParsing(error) => error.into(), |
|
||||||
EvaluationError::ResultsParsing(error) => error.into(), |
|
||||||
EvaluationError::ResultsSerialization(error) => error, |
|
||||||
EvaluationError::Storage(error) => error.into(), |
|
||||||
EvaluationError::Service(error) => match error.downcast() { |
|
||||||
Ok(error) => *error, |
|
||||||
Err(error) => Self::new(io::ErrorKind::Other, error), |
|
||||||
}, |
|
||||||
EvaluationError::GraphAlreadyExists(_) |
|
||||||
| EvaluationError::GraphDoesNotExist(_) |
|
||||||
| EvaluationError::UnboundService |
|
||||||
| EvaluationError::UnsupportedService(_) |
|
||||||
| EvaluationError::UnsupportedContentType(_) |
|
||||||
| EvaluationError::ServiceDoesNotReturnSolutions |
|
||||||
| EvaluationError::NotAGraph => Self::new(io::ErrorKind::InvalidInput, error), |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
@ -1,9 +0,0 @@ |
|||||||
#[cfg(not(feature = "http-client"))] |
|
||||||
mod dummy; |
|
||||||
#[cfg(feature = "http-client")] |
|
||||||
mod simple; |
|
||||||
|
|
||||||
#[cfg(not(feature = "http-client"))] |
|
||||||
pub use dummy::Client; |
|
||||||
#[cfg(feature = "http-client")] |
|
||||||
pub use simple::Client; |
|
@ -1,371 +0,0 @@ |
|||||||
use crate::io::{RdfFormat, RdfSerializer}; |
|
||||||
use crate::model::*; |
|
||||||
use crate::sparql::error::EvaluationError; |
|
||||||
use crate::sparql::results::{ |
|
||||||
FromReadQueryResultsReader, FromReadSolutionsReader, QueryResultsFormat, |
|
||||||
QueryResultsParseError, QueryResultsParser, QueryResultsSerializer, |
|
||||||
}; |
|
||||||
pub use sparesults::QuerySolution; |
|
||||||
use std::io::{Read, Write}; |
|
||||||
use std::sync::Arc; |
|
||||||
|
|
||||||
/// Results of a [SPARQL query](https://www.w3.org/TR/sparql11-query/).
|
|
||||||
pub enum QueryResults { |
|
||||||
/// Results of a [SELECT](https://www.w3.org/TR/sparql11-query/#select) query.
|
|
||||||
Solutions(QuerySolutionIter), |
|
||||||
/// Result of a [ASK](https://www.w3.org/TR/sparql11-query/#ask) query.
|
|
||||||
Boolean(bool), |
|
||||||
/// Results of a [CONSTRUCT](https://www.w3.org/TR/sparql11-query/#construct) or [DESCRIBE](https://www.w3.org/TR/sparql11-query/#describe) query.
|
|
||||||
Graph(QueryTripleIter), |
|
||||||
} |
|
||||||
|
|
||||||
impl QueryResults { |
|
||||||
/// Reads a SPARQL query results serialization.
|
|
||||||
pub fn read( |
|
||||||
read: impl Read + 'static, |
|
||||||
format: QueryResultsFormat, |
|
||||||
) -> Result<Self, QueryResultsParseError> { |
|
||||||
Ok(QueryResultsParser::from_format(format) |
|
||||||
.parse_read(read)? |
|
||||||
.into()) |
|
||||||
} |
|
||||||
|
|
||||||
/// Writes the query results (solutions or boolean).
|
|
||||||
///
|
|
||||||
/// This method fails if it is called on the `Graph` results.
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxigraph::store::Store;
|
|
||||||
/// use oxigraph::model::*;
|
|
||||||
/// use oxigraph::sparql::results::QueryResultsFormat;
|
|
||||||
///
|
|
||||||
/// let store = Store::new()?;
|
|
||||||
/// let ex = NamedNodeRef::new("http://example.com")?;
|
|
||||||
/// store.insert(QuadRef::new(ex, ex, ex, GraphNameRef::DefaultGraph))?;
|
|
||||||
///
|
|
||||||
/// let results = store.query("SELECT ?s WHERE { ?s ?p ?o }")?;
|
|
||||||
/// assert_eq!(
|
|
||||||
/// results.write(Vec::new(), QueryResultsFormat::Json)?,
|
|
||||||
/// r#"{"head":{"vars":["s"]},"results":{"bindings":[{"s":{"type":"uri","value":"http://example.com"}}]}}"#.as_bytes()
|
|
||||||
/// );
|
|
||||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
|
||||||
/// ```
|
|
||||||
pub fn write<W: Write>( |
|
||||||
self, |
|
||||||
write: W, |
|
||||||
format: QueryResultsFormat, |
|
||||||
) -> Result<W, EvaluationError> { |
|
||||||
let serializer = QueryResultsSerializer::from_format(format); |
|
||||||
match self { |
|
||||||
Self::Boolean(value) => serializer.serialize_boolean_to_write(write, value), |
|
||||||
Self::Solutions(solutions) => { |
|
||||||
let mut writer = serializer |
|
||||||
.serialize_solutions_to_write(write, solutions.variables().to_vec()) |
|
||||||
.map_err(EvaluationError::ResultsSerialization)?; |
|
||||||
for solution in solutions { |
|
||||||
writer |
|
||||||
.write(&solution?) |
|
||||||
.map_err(EvaluationError::ResultsSerialization)?; |
|
||||||
} |
|
||||||
writer.finish() |
|
||||||
} |
|
||||||
Self::Graph(triples) => { |
|
||||||
let s = VariableRef::new_unchecked("subject"); |
|
||||||
let p = VariableRef::new_unchecked("predicate"); |
|
||||||
let o = VariableRef::new_unchecked("object"); |
|
||||||
let mut writer = serializer |
|
||||||
.serialize_solutions_to_write( |
|
||||||
write, |
|
||||||
vec![s.into_owned(), p.into_owned(), o.into_owned()], |
|
||||||
) |
|
||||||
.map_err(EvaluationError::ResultsSerialization)?; |
|
||||||
for triple in triples { |
|
||||||
let triple = triple?; |
|
||||||
writer |
|
||||||
.write([ |
|
||||||
(s, &triple.subject.into()), |
|
||||||
(p, &triple.predicate.into()), |
|
||||||
(o, &triple.object), |
|
||||||
]) |
|
||||||
.map_err(EvaluationError::ResultsSerialization)?; |
|
||||||
} |
|
||||||
writer.finish() |
|
||||||
} |
|
||||||
} |
|
||||||
.map_err(EvaluationError::ResultsSerialization) |
|
||||||
} |
|
||||||
|
|
||||||
/// Writes the graph query results.
|
|
||||||
///
|
|
||||||
/// This method fails if it is called on the `Solution` or `Boolean` results.
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxigraph::io::RdfFormat;
|
|
||||||
/// use oxigraph::model::*;
|
|
||||||
/// use oxigraph::store::Store;
|
|
||||||
///
|
|
||||||
/// let graph = "<http://example.com> <http://example.com> <http://example.com> .\n";
|
|
||||||
///
|
|
||||||
/// let store = Store::new()?;
|
|
||||||
/// store.load_graph(
|
|
||||||
/// graph.as_bytes(),
|
|
||||||
/// RdfFormat::NTriples,
|
|
||||||
/// GraphName::DefaultGraph,
|
|
||||||
/// None,
|
|
||||||
/// )?;
|
|
||||||
///
|
|
||||||
/// let results = store.query("CONSTRUCT WHERE { ?s ?p ?o }")?;
|
|
||||||
/// assert_eq!(
|
|
||||||
/// results.write_graph(Vec::new(), RdfFormat::NTriples)?,
|
|
||||||
/// graph.as_bytes()
|
|
||||||
/// );
|
|
||||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
|
||||||
/// ```
|
|
||||||
pub fn write_graph<W: Write>( |
|
||||||
self, |
|
||||||
write: W, |
|
||||||
format: impl Into<RdfFormat>, |
|
||||||
) -> Result<W, EvaluationError> { |
|
||||||
if let Self::Graph(triples) = self { |
|
||||||
let mut writer = RdfSerializer::from_format(format.into()).serialize_to_write(write); |
|
||||||
for triple in triples { |
|
||||||
writer |
|
||||||
.write_triple(&triple?) |
|
||||||
.map_err(EvaluationError::ResultsSerialization)?; |
|
||||||
} |
|
||||||
writer |
|
||||||
.finish() |
|
||||||
.map_err(EvaluationError::ResultsSerialization) |
|
||||||
} else { |
|
||||||
Err(EvaluationError::NotAGraph) |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
impl From<QuerySolutionIter> for QueryResults { |
|
||||||
#[inline] |
|
||||||
fn from(value: QuerySolutionIter) -> Self { |
|
||||||
Self::Solutions(value) |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
impl<R: Read + 'static> From<FromReadQueryResultsReader<R>> for QueryResults { |
|
||||||
fn from(reader: FromReadQueryResultsReader<R>) -> Self { |
|
||||||
match reader { |
|
||||||
FromReadQueryResultsReader::Solutions(s) => Self::Solutions(s.into()), |
|
||||||
FromReadQueryResultsReader::Boolean(v) => Self::Boolean(v), |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// An iterator over [`QuerySolution`]s.
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxigraph::sparql::QueryResults;
|
|
||||||
/// use oxigraph::store::Store;
|
|
||||||
///
|
|
||||||
/// let store = Store::new()?;
|
|
||||||
/// if let QueryResults::Solutions(solutions) = store.query("SELECT ?s WHERE { ?s ?p ?o }")? {
|
|
||||||
/// for solution in solutions {
|
|
||||||
/// println!("{:?}", solution?.get("s"));
|
|
||||||
/// }
|
|
||||||
/// }
|
|
||||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
|
||||||
/// ```
|
|
||||||
pub struct QuerySolutionIter { |
|
||||||
variables: Arc<[Variable]>, |
|
||||||
iter: Box<dyn Iterator<Item = Result<QuerySolution, EvaluationError>>>, |
|
||||||
} |
|
||||||
|
|
||||||
impl QuerySolutionIter { |
|
||||||
/// Construct a new iterator of solution from an ordered list of solution variables and an iterator of solution tuples
|
|
||||||
/// (each tuple using the same ordering as the variable list such that tuple element 0 is the value for the variable 0...)
|
|
||||||
pub fn new( |
|
||||||
variables: Arc<[Variable]>, |
|
||||||
iter: impl Iterator<Item = Result<Vec<Option<Term>>, EvaluationError>> + 'static, |
|
||||||
) -> Self { |
|
||||||
Self { |
|
||||||
variables: Arc::clone(&variables), |
|
||||||
iter: Box::new( |
|
||||||
iter.map(move |t| t.map(|values| (Arc::clone(&variables), values).into())), |
|
||||||
), |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// The variables used in the solutions.
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxigraph::sparql::{QueryResults, Variable};
|
|
||||||
/// use oxigraph::store::Store;
|
|
||||||
///
|
|
||||||
/// let store = Store::new()?;
|
|
||||||
/// if let QueryResults::Solutions(solutions) = store.query("SELECT ?s ?o WHERE { ?s ?p ?o }")? {
|
|
||||||
/// assert_eq!(
|
|
||||||
/// solutions.variables(),
|
|
||||||
/// &[Variable::new("s")?, Variable::new("o")?]
|
|
||||||
/// );
|
|
||||||
/// }
|
|
||||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
|
||||||
/// ```
|
|
||||||
#[inline] |
|
||||||
pub fn variables(&self) -> &[Variable] { |
|
||||||
&self.variables |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
impl<R: Read + 'static> From<FromReadSolutionsReader<R>> for QuerySolutionIter { |
|
||||||
fn from(reader: FromReadSolutionsReader<R>) -> Self { |
|
||||||
Self { |
|
||||||
variables: reader.variables().into(), |
|
||||||
iter: Box::new(reader.map(|t| t.map_err(EvaluationError::from))), |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
impl Iterator for QuerySolutionIter { |
|
||||||
type Item = Result<QuerySolution, EvaluationError>; |
|
||||||
|
|
||||||
#[inline] |
|
||||||
fn next(&mut self) -> Option<Self::Item> { |
|
||||||
self.iter.next() |
|
||||||
} |
|
||||||
|
|
||||||
#[inline] |
|
||||||
fn size_hint(&self) -> (usize, Option<usize>) { |
|
||||||
self.iter.size_hint() |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// An iterator over the triples that compose a graph solution.
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxigraph::sparql::QueryResults;
|
|
||||||
/// use oxigraph::store::Store;
|
|
||||||
///
|
|
||||||
/// let store = Store::new()?;
|
|
||||||
/// if let QueryResults::Graph(triples) = store.query("CONSTRUCT WHERE { ?s ?p ?o }")? {
|
|
||||||
/// for triple in triples {
|
|
||||||
/// println!("{}", triple?);
|
|
||||||
/// }
|
|
||||||
/// }
|
|
||||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
|
||||||
/// ```
|
|
||||||
pub struct QueryTripleIter { |
|
||||||
pub(crate) iter: Box<dyn Iterator<Item = Result<Triple, EvaluationError>>>, |
|
||||||
} |
|
||||||
|
|
||||||
impl Iterator for QueryTripleIter { |
|
||||||
type Item = Result<Triple, EvaluationError>; |
|
||||||
|
|
||||||
#[inline] |
|
||||||
fn next(&mut self) -> Option<Self::Item> { |
|
||||||
self.iter.next() |
|
||||||
} |
|
||||||
|
|
||||||
#[inline] |
|
||||||
fn size_hint(&self) -> (usize, Option<usize>) { |
|
||||||
self.iter.size_hint() |
|
||||||
} |
|
||||||
|
|
||||||
#[inline] |
|
||||||
fn fold<Acc, G>(self, init: Acc, g: G) -> Acc |
|
||||||
where |
|
||||||
G: FnMut(Acc, Self::Item) -> Acc, |
|
||||||
{ |
|
||||||
self.iter.fold(init, g) |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
#[cfg(test)] |
|
||||||
#[allow(clippy::panic_in_result_fn)] |
|
||||||
mod tests { |
|
||||||
use super::*; |
|
||||||
use std::io::Cursor; |
|
||||||
|
|
||||||
#[test] |
|
||||||
fn test_serialization_roundtrip() -> Result<(), EvaluationError> { |
|
||||||
use std::str; |
|
||||||
|
|
||||||
for format in [ |
|
||||||
QueryResultsFormat::Json, |
|
||||||
QueryResultsFormat::Xml, |
|
||||||
QueryResultsFormat::Tsv, |
|
||||||
] { |
|
||||||
let results = vec![ |
|
||||||
QueryResults::Boolean(true), |
|
||||||
QueryResults::Boolean(false), |
|
||||||
QueryResults::Solutions(QuerySolutionIter::new( |
|
||||||
[ |
|
||||||
Variable::new_unchecked("foo"), |
|
||||||
Variable::new_unchecked("bar"), |
|
||||||
] |
|
||||||
.as_ref() |
|
||||||
.into(), |
|
||||||
Box::new( |
|
||||||
vec![ |
|
||||||
Ok(vec![None, None]), |
|
||||||
Ok(vec![ |
|
||||||
Some(NamedNode::new_unchecked("http://example.com").into()), |
|
||||||
None, |
|
||||||
]), |
|
||||||
Ok(vec![ |
|
||||||
None, |
|
||||||
Some(NamedNode::new_unchecked("http://example.com").into()), |
|
||||||
]), |
|
||||||
Ok(vec![ |
|
||||||
Some(BlankNode::new_unchecked("foo").into()), |
|
||||||
Some(BlankNode::new_unchecked("bar").into()), |
|
||||||
]), |
|
||||||
Ok(vec![Some(Literal::new_simple_literal("foo").into()), None]), |
|
||||||
Ok(vec![ |
|
||||||
Some( |
|
||||||
Literal::new_language_tagged_literal_unchecked("foo", "fr") |
|
||||||
.into(), |
|
||||||
), |
|
||||||
None, |
|
||||||
]), |
|
||||||
Ok(vec![ |
|
||||||
Some(Literal::from(1).into()), |
|
||||||
Some(Literal::from(true).into()), |
|
||||||
]), |
|
||||||
Ok(vec![ |
|
||||||
Some(Literal::from(1.33).into()), |
|
||||||
Some(Literal::from(false).into()), |
|
||||||
]), |
|
||||||
Ok(vec![ |
|
||||||
Some( |
|
||||||
Triple::new( |
|
||||||
NamedNode::new_unchecked("http://example.com/s"), |
|
||||||
NamedNode::new_unchecked("http://example.com/p"), |
|
||||||
Triple::new( |
|
||||||
NamedNode::new_unchecked("http://example.com/os"), |
|
||||||
NamedNode::new_unchecked("http://example.com/op"), |
|
||||||
NamedNode::new_unchecked("http://example.com/oo"), |
|
||||||
), |
|
||||||
) |
|
||||||
.into(), |
|
||||||
), |
|
||||||
None, |
|
||||||
]), |
|
||||||
] |
|
||||||
.into_iter(), |
|
||||||
), |
|
||||||
)), |
|
||||||
]; |
|
||||||
|
|
||||||
for ex in results { |
|
||||||
let mut buffer = Vec::new(); |
|
||||||
ex.write(&mut buffer, format)?; |
|
||||||
let ex2 = QueryResults::read(Cursor::new(buffer.clone()), format)?; |
|
||||||
let mut buffer2 = Vec::new(); |
|
||||||
ex2.write(&mut buffer2, format)?; |
|
||||||
assert_eq!( |
|
||||||
str::from_utf8(&buffer).unwrap(), |
|
||||||
str::from_utf8(&buffer2).unwrap() |
|
||||||
); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
Ok(()) |
|
||||||
} |
|
||||||
} |
|
@ -1,44 +0,0 @@ |
|||||||
//! Utilities to read and write RDF results formats using [sparesults](https://crates.io/crates/sparesults).
|
|
||||||
//!
|
|
||||||
//! It supports [SPARQL Query Results XML Format (Second Edition)](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/).
|
|
||||||
//!
|
|
||||||
//! Usage example converting a JSON result file into a TSV result file:
|
|
||||||
//!
|
|
||||||
//! ```
|
|
||||||
//! use oxigraph::sparql::results::{QueryResultsFormat, QueryResultsParser, FromReadQueryResultsReader, QueryResultsSerializer};
|
|
||||||
//! use std::io::Result;
|
|
||||||
//!
|
|
||||||
//! fn convert_json_to_tsv(json_file: &[u8]) -> Result<Vec<u8>> {
|
|
||||||
//! let json_parser = QueryResultsParser::from_format(QueryResultsFormat::Json);
|
|
||||||
//! let tsv_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Tsv);
|
|
||||||
//! // We start to read the JSON file and see which kind of results it is
|
|
||||||
//! match json_parser.parse_read(json_file)? {
|
|
||||||
//! FromReadQueryResultsReader::Boolean(value) => {
|
|
||||||
//! // it's a boolean result, we copy it in TSV to the output buffer
|
|
||||||
//! tsv_serializer.serialize_boolean_to_write(Vec::new(), value)
|
|
||||||
//! }
|
|
||||||
//! FromReadQueryResultsReader::Solutions(solutions_reader) => {
|
|
||||||
//! // it's a set of solutions, we create a writer and we write to it while reading in streaming from the JSON file
|
|
||||||
//! let mut serialize_solutions_to_write = tsv_serializer.serialize_solutions_to_write(Vec::new(), solutions_reader.variables().to_vec())?;
|
|
||||||
//! for solution in solutions_reader {
|
|
||||||
//! serialize_solutions_to_write.write(&solution?)?;
|
|
||||||
//! }
|
|
||||||
//! serialize_solutions_to_write.finish()
|
|
||||||
//! }
|
|
||||||
//! }
|
|
||||||
//! }
|
|
||||||
//!
|
|
||||||
//! // Let's test with a boolean
|
|
||||||
//! assert_eq!(
|
|
||||||
//! convert_json_to_tsv(br#"{"boolean":true}"#.as_slice()).unwrap(),
|
|
||||||
//! b"true"
|
|
||||||
//! );
|
|
||||||
//!
|
|
||||||
//! // And with a set of solutions
|
|
||||||
//! assert_eq!(
|
|
||||||
//! convert_json_to_tsv(br#"{"head":{"vars":["foo","bar"]},"results":{"bindings":[{"foo":{"type":"literal","value":"test"}}]}}"#.as_slice()).unwrap(),
|
|
||||||
//! b"?foo\t?bar\n\"test\"\t\n"
|
|
||||||
//! );
|
|
||||||
//! ```
|
|
||||||
|
|
||||||
pub use sparesults::*; |
|
@ -1,12 +0,0 @@ |
|||||||
//! A storage backend
|
|
||||||
//! RocksDB is available, if not in memory
|
|
||||||
|
|
||||||
#[cfg(any(target_family = "wasm"))] |
|
||||||
pub use fallback::{ColumnFamily, ColumnFamilyDefinition, Db, Iter, Reader, Transaction}; |
|
||||||
#[cfg(all(not(target_family = "wasm")))] |
|
||||||
pub use oxi_rocksdb::{ColumnFamily, ColumnFamilyDefinition, Db, Iter, Reader, Transaction}; |
|
||||||
|
|
||||||
#[cfg(any(target_family = "wasm"))] |
|
||||||
mod fallback; |
|
||||||
#[cfg(all(not(target_family = "wasm")))] |
|
||||||
mod oxi_rocksdb; |
|
@ -1,139 +0,0 @@ |
|||||||
use crate::io::{RdfFormat, RdfParseError}; |
|
||||||
use crate::storage::numeric_encoder::EncodedTerm; |
|
||||||
use oxiri::IriParseError; |
|
||||||
use oxrdf::TermRef; |
|
||||||
use std::error::Error; |
|
||||||
use std::io; |
|
||||||
|
|
||||||
/// An error related to storage operations (reads, writes...).
|
|
||||||
#[derive(Debug, thiserror::Error)] |
|
||||||
#[non_exhaustive] |
|
||||||
pub enum StorageError { |
|
||||||
/// Error from the OS I/O layer.
|
|
||||||
#[error(transparent)] |
|
||||||
Io(#[from] io::Error), |
|
||||||
/// Error related to data corruption.
|
|
||||||
#[error(transparent)] |
|
||||||
Corruption(#[from] CorruptionError), |
|
||||||
#[doc(hidden)] |
|
||||||
#[error("{0}")] |
|
||||||
Other(#[source] Box<dyn Error + Send + Sync + 'static>), |
|
||||||
} |
|
||||||
|
|
||||||
impl From<StorageError> for io::Error { |
|
||||||
#[inline] |
|
||||||
fn from(error: StorageError) -> Self { |
|
||||||
match error { |
|
||||||
StorageError::Io(error) => error, |
|
||||||
StorageError::Corruption(error) => error.into(), |
|
||||||
StorageError::Other(error) => Self::new(io::ErrorKind::Other, error), |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// An error return if some content in the database is corrupted.
|
|
||||||
#[derive(Debug, thiserror::Error)] |
|
||||||
#[error(transparent)] |
|
||||||
pub struct CorruptionError(#[from] CorruptionErrorKind); |
|
||||||
|
|
||||||
/// An error return if some content in the database is corrupted.
|
|
||||||
#[derive(Debug, thiserror::Error)] |
|
||||||
enum CorruptionErrorKind { |
|
||||||
#[error("{0}")] |
|
||||||
Msg(String), |
|
||||||
#[error("{0}")] |
|
||||||
Other(#[source] Box<dyn Error + Send + Sync + 'static>), |
|
||||||
} |
|
||||||
|
|
||||||
impl CorruptionError { |
|
||||||
/// Builds an error from a printable error message.
|
|
||||||
#[inline] |
|
||||||
pub(crate) fn new(error: impl Into<Box<dyn Error + Send + Sync + 'static>>) -> Self { |
|
||||||
Self(CorruptionErrorKind::Other(error.into())) |
|
||||||
} |
|
||||||
|
|
||||||
#[inline] |
|
||||||
pub(crate) fn from_encoded_term(encoded: &EncodedTerm, term: &TermRef<'_>) -> Self { |
|
||||||
// TODO: eventually use a dedicated error enum value
|
|
||||||
Self::msg(format!("Invalid term encoding {encoded:?} for {term}")) |
|
||||||
} |
|
||||||
|
|
||||||
#[inline] |
|
||||||
pub(crate) fn from_missing_column_family_name(name: &'static str) -> Self { |
|
||||||
// TODO: eventually use a dedicated error enum value
|
|
||||||
Self::msg(format!("Column family {name} does not exist")) |
|
||||||
} |
|
||||||
|
|
||||||
/// Builds an error from a printable error message.
|
|
||||||
#[inline] |
|
||||||
pub(crate) fn msg(msg: impl Into<String>) -> Self { |
|
||||||
Self(CorruptionErrorKind::Msg(msg.into())) |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
impl From<CorruptionError> for io::Error { |
|
||||||
#[inline] |
|
||||||
fn from(error: CorruptionError) -> Self { |
|
||||||
Self::new(io::ErrorKind::InvalidData, error) |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// An error raised while loading a file into a [`Store`](crate::store::Store).
|
|
||||||
#[derive(Debug, thiserror::Error)] |
|
||||||
pub enum LoaderError { |
|
||||||
/// An error raised while reading the file.
|
|
||||||
#[error(transparent)] |
|
||||||
Parsing(#[from] RdfParseError), |
|
||||||
/// An error raised during the insertion in the store.
|
|
||||||
#[error(transparent)] |
|
||||||
Storage(#[from] StorageError), |
|
||||||
/// The base IRI is invalid.
|
|
||||||
#[error("Invalid base IRI '{iri}': {error}")] |
|
||||||
InvalidBaseIri { |
|
||||||
/// The IRI itself.
|
|
||||||
iri: String, |
|
||||||
/// The parsing error.
|
|
||||||
#[source] |
|
||||||
error: IriParseError, |
|
||||||
}, |
|
||||||
} |
|
||||||
|
|
||||||
impl From<LoaderError> for io::Error { |
|
||||||
#[inline] |
|
||||||
fn from(error: LoaderError) -> Self { |
|
||||||
match error { |
|
||||||
LoaderError::Storage(error) => error.into(), |
|
||||||
LoaderError::Parsing(error) => error.into(), |
|
||||||
LoaderError::InvalidBaseIri { .. } => { |
|
||||||
Self::new(io::ErrorKind::InvalidInput, error.to_string()) |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// An error raised while writing a file from a [`Store`](crate::store::Store).
|
|
||||||
#[derive(Debug, thiserror::Error)] |
|
||||||
pub enum SerializerError { |
|
||||||
/// An error raised while writing the content.
|
|
||||||
#[error(transparent)] |
|
||||||
Io(#[from] io::Error), |
|
||||||
/// An error raised during the lookup in the store.
|
|
||||||
#[error(transparent)] |
|
||||||
Storage(#[from] StorageError), |
|
||||||
/// A format compatible with [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) is required.
|
|
||||||
#[error("A RDF format supporting datasets was expected, {0} found")] |
|
||||||
DatasetFormatExpected(RdfFormat), |
|
||||||
} |
|
||||||
|
|
||||||
impl From<SerializerError> for io::Error { |
|
||||||
#[inline] |
|
||||||
fn from(error: SerializerError) -> Self { |
|
||||||
match error { |
|
||||||
SerializerError::Storage(error) => error.into(), |
|
||||||
SerializerError::Io(error) => error, |
|
||||||
SerializerError::DatasetFormatExpected(_) => { |
|
||||||
Self::new(io::ErrorKind::InvalidInput, error.to_string()) |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
@ -1,36 +0,0 @@ |
|||||||
[package] |
|
||||||
name = "oxrdfio" |
|
||||||
version = "0.1.0-alpha.5" |
|
||||||
authors.workspace = true |
|
||||||
license.workspace = true |
|
||||||
readme = "README.md" |
|
||||||
keywords = ["RDF"] |
|
||||||
repository = "https://github.com/oxigraph/oxigraph/tree/master/lib/oxrdfxml" |
|
||||||
documentation = "https://docs.rs/oxrdfio" |
|
||||||
description = """ |
|
||||||
Parser and serializer for various RDF formats |
|
||||||
""" |
|
||||||
edition.workspace = true |
|
||||||
rust-version.workspace = true |
|
||||||
|
|
||||||
[features] |
|
||||||
default = [] |
|
||||||
async-tokio = ["dep:tokio", "oxrdfxml/async-tokio", "oxttl/async-tokio"] |
|
||||||
rdf-star = ["oxrdf/rdf-star", "oxttl/rdf-star"] |
|
||||||
|
|
||||||
[dependencies] |
|
||||||
oxrdf.workspace = true |
|
||||||
oxrdfxml.workspace = true |
|
||||||
oxttl.workspace = true |
|
||||||
thiserror.workspace = true |
|
||||||
tokio = { workspace = true, optional = true, features = ["io-util"] } |
|
||||||
|
|
||||||
[dev-dependencies] |
|
||||||
tokio = { workspace = true, features = ["rt", "macros"] } |
|
||||||
|
|
||||||
[lints] |
|
||||||
workspace = true |
|
||||||
|
|
||||||
[package.metadata.docs.rs] |
|
||||||
all-features = true |
|
||||||
rustdoc-args = ["--cfg", "docsrs"] |
|
@ -1,67 +0,0 @@ |
|||||||
OxRDF I/O |
|
||||||
========= |
|
||||||
|
|
||||||
[![Latest Version](https://img.shields.io/crates/v/oxrdfio.svg)](https://crates.io/crates/oxrdfio) |
|
||||||
[![Released API docs](https://docs.rs/oxrdfio/badge.svg)](https://docs.rs/oxrdfio) |
|
||||||
[![Crates.io downloads](https://img.shields.io/crates/d/oxrdfio)](https://crates.io/crates/oxrdfio) |
|
||||||
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) |
|
||||||
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community) |
|
||||||
|
|
||||||
OxRDF I/O is a set of parsers and serializers for RDF. |
|
||||||
|
|
||||||
It supports: |
|
||||||
* [N3](https://w3c.github.io/N3/spec/) using [`oxttl`](https://crates.io/crates/oxttl) |
|
||||||
* [N-Quads](https://www.w3.org/TR/n-quads/) using [`oxttl`](https://crates.io/crates/oxttl) |
|
||||||
* [N-Triples](https://www.w3.org/TR/n-triples/) using [`oxttl`](https://crates.io/crates/oxttl) |
|
||||||
* [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) using [`oxrdfxml`](https://crates.io/crates/oxrdfxml) |
|
||||||
* [TriG](https://www.w3.org/TR/trig/) using [`oxttl`](https://crates.io/crates/oxttl) |
|
||||||
* [Turtle](https://www.w3.org/TR/turtle/) using [`oxttl`](https://crates.io/crates/oxttl) |
|
||||||
|
|
||||||
Support for [SPARQL-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html) is also available behind the `rdf-star`feature for [Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star), [TriG-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#trig-star), [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star) and [N-Quads-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-quads-star). |
|
||||||
|
|
||||||
It is designed as a low level parser compatible with both synchronous and asynchronous I/O (behind the `async-tokio` feature). |
|
||||||
|
|
||||||
The entry points of this library are the two [`RdfParser`] and [`RdfSerializer`] structs. |
|
||||||
|
|
||||||
Usage example converting a Turtle file to a N-Triples file: |
|
||||||
```rust |
|
||||||
use oxrdfio::{RdfFormat, RdfParser, RdfSerializer}; |
|
||||||
|
|
||||||
let turtle_file = b"@base <http://example.com/> . |
|
||||||
@prefix schema: <http://schema.org/> . |
|
||||||
<foo> a schema:Person ; |
|
||||||
schema:name \"Foo\" . |
|
||||||
<bar> a schema:Person ; |
|
||||||
schema:name \"Bar\" ."; |
|
||||||
|
|
||||||
let ntriples_file = b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> . |
|
||||||
<http://example.com/foo> <http://schema.org/name> \"Foo\" . |
|
||||||
<http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> . |
|
||||||
<http://example.com/bar> <http://schema.org/name> \"Bar\" . |
|
||||||
"; |
|
||||||
|
|
||||||
let mut writer = RdfSerializer::from_format(RdfFormat::NTriples).serialize_to_write(Vec::new()); |
|
||||||
for quad in RdfParser::from_format(RdfFormat::Turtle).parse_read(turtle_file.as_ref()) { |
|
||||||
writer.write_quad(&quad.unwrap()).unwrap(); |
|
||||||
} |
|
||||||
assert_eq!(writer.finish().unwrap(), ntriples_file); |
|
||||||
``` |
|
||||||
|
|
||||||
Parsers for other RDF formats exists in Rust like [graph-rdfa-processor](https://github.com/nbittich/graph-rdfa-processor) for RDFa and [json-ld](https://github.com/timothee-haudebourg/json-ld) for JSON-LD. |
|
||||||
|
|
||||||
|
|
||||||
## License |
|
||||||
|
|
||||||
This project is licensed under either of |
|
||||||
|
|
||||||
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or |
|
||||||
`<http://www.apache.org/licenses/LICENSE-2.0>`) |
|
||||||
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or |
|
||||||
`<http://opensource.org/licenses/MIT>`) |
|
||||||
|
|
||||||
at your option. |
|
||||||
|
|
||||||
|
|
||||||
### Contribution |
|
||||||
|
|
||||||
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. |
|
@ -1,122 +0,0 @@ |
|||||||
use std::io; |
|
||||||
use std::ops::Range; |
|
||||||
|
|
||||||
/// Error returned during RDF format parsing.
|
|
||||||
#[derive(Debug, thiserror::Error)] |
|
||||||
pub enum RdfParseError { |
|
||||||
/// I/O error during parsing (file not found...).
|
|
||||||
#[error(transparent)] |
|
||||||
Io(#[from] io::Error), |
|
||||||
/// An error in the file syntax.
|
|
||||||
#[error(transparent)] |
|
||||||
Syntax(#[from] RdfSyntaxError), |
|
||||||
} |
|
||||||
|
|
||||||
impl RdfParseError { |
|
||||||
pub(crate) fn msg(msg: &'static str) -> Self { |
|
||||||
Self::Syntax(RdfSyntaxError(SyntaxErrorKind::Msg(msg))) |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
impl From<oxttl::TurtleSyntaxError> for RdfSyntaxError { |
|
||||||
#[inline] |
|
||||||
fn from(error: oxttl::TurtleSyntaxError) -> Self { |
|
||||||
Self(SyntaxErrorKind::Turtle(error)) |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
impl From<oxttl::TurtleParseError> for RdfParseError { |
|
||||||
#[inline] |
|
||||||
fn from(error: oxttl::TurtleParseError) -> Self { |
|
||||||
match error { |
|
||||||
oxttl::TurtleParseError::Syntax(e) => Self::Syntax(e.into()), |
|
||||||
oxttl::TurtleParseError::Io(e) => Self::Io(e), |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
impl From<oxrdfxml::RdfXmlSyntaxError> for RdfSyntaxError { |
|
||||||
#[inline] |
|
||||||
fn from(error: oxrdfxml::RdfXmlSyntaxError) -> Self { |
|
||||||
Self(SyntaxErrorKind::RdfXml(error)) |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
impl From<oxrdfxml::RdfXmlParseError> for RdfParseError { |
|
||||||
#[inline] |
|
||||||
fn from(error: oxrdfxml::RdfXmlParseError) -> Self { |
|
||||||
match error { |
|
||||||
oxrdfxml::RdfXmlParseError::Syntax(e) => Self::Syntax(e.into()), |
|
||||||
oxrdfxml::RdfXmlParseError::Io(e) => Self::Io(e), |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
impl From<RdfParseError> for io::Error { |
|
||||||
#[inline] |
|
||||||
fn from(error: RdfParseError) -> Self { |
|
||||||
match error { |
|
||||||
RdfParseError::Io(error) => error, |
|
||||||
RdfParseError::Syntax(error) => error.into(), |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// An error in the syntax of the parsed file.
|
|
||||||
#[derive(Debug, thiserror::Error)] |
|
||||||
#[error(transparent)] |
|
||||||
pub struct RdfSyntaxError(#[from] SyntaxErrorKind); |
|
||||||
|
|
||||||
/// An error in the syntax of the parsed file.
|
|
||||||
#[derive(Debug, thiserror::Error)] |
|
||||||
enum SyntaxErrorKind { |
|
||||||
#[error(transparent)] |
|
||||||
Turtle(#[from] oxttl::TurtleSyntaxError), |
|
||||||
#[error(transparent)] |
|
||||||
RdfXml(#[from] oxrdfxml::RdfXmlSyntaxError), |
|
||||||
#[error("{0}")] |
|
||||||
Msg(&'static str), |
|
||||||
} |
|
||||||
|
|
||||||
impl RdfSyntaxError { |
|
||||||
/// The location of the error inside of the file.
|
|
||||||
#[inline] |
|
||||||
pub fn location(&self) -> Option<Range<TextPosition>> { |
|
||||||
match &self.0 { |
|
||||||
SyntaxErrorKind::Turtle(e) => { |
|
||||||
let location = e.location(); |
|
||||||
Some( |
|
||||||
TextPosition { |
|
||||||
line: location.start.line, |
|
||||||
column: location.start.column, |
|
||||||
offset: location.start.offset, |
|
||||||
}..TextPosition { |
|
||||||
line: location.end.line, |
|
||||||
column: location.end.column, |
|
||||||
offset: location.end.offset, |
|
||||||
}, |
|
||||||
) |
|
||||||
} |
|
||||||
SyntaxErrorKind::RdfXml(_) | SyntaxErrorKind::Msg(_) => None, |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
impl From<RdfSyntaxError> for io::Error { |
|
||||||
#[inline] |
|
||||||
fn from(error: RdfSyntaxError) -> Self { |
|
||||||
match error.0 { |
|
||||||
SyntaxErrorKind::Turtle(error) => error.into(), |
|
||||||
SyntaxErrorKind::RdfXml(error) => error.into(), |
|
||||||
SyntaxErrorKind::Msg(msg) => Self::new(io::ErrorKind::InvalidData, msg), |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// A position in a text i.e. a `line` number starting from 0, a `column` number starting from 0 (in number of code points) and a global file `offset` starting from 0 (in number of bytes).
|
|
||||||
#[derive(Eq, PartialEq, Debug, Clone, Copy)] |
|
||||||
pub struct TextPosition { |
|
||||||
pub line: u64, |
|
||||||
pub column: u64, |
|
||||||
pub offset: u64, |
|
||||||
} |
|
@ -1,216 +0,0 @@ |
|||||||
use std::fmt; |
|
||||||
|
|
||||||
/// RDF serialization formats.
|
|
||||||
///
|
|
||||||
/// This enumeration is non exhaustive. New formats like JSON-LD might be added in the future.
|
|
||||||
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] |
|
||||||
#[non_exhaustive] |
|
||||||
pub enum RdfFormat { |
|
||||||
/// [N3](https://w3c.github.io/N3/spec/)
|
|
||||||
N3, |
|
||||||
/// [N-Quads](https://www.w3.org/TR/n-quads/)
|
|
||||||
NQuads, |
|
||||||
/// [N-Triples](https://www.w3.org/TR/n-triples/)
|
|
||||||
NTriples, |
|
||||||
/// [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/)
|
|
||||||
RdfXml, |
|
||||||
/// [TriG](https://www.w3.org/TR/trig/)
|
|
||||||
TriG, |
|
||||||
/// [Turtle](https://www.w3.org/TR/turtle/)
|
|
||||||
Turtle, |
|
||||||
} |
|
||||||
|
|
||||||
impl RdfFormat { |
|
||||||
/// The format canonical IRI according to the [Unique URIs for file formats registry](https://www.w3.org/ns/formats/).
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxrdfio::RdfFormat;
|
|
||||||
///
|
|
||||||
/// assert_eq!(
|
|
||||||
/// RdfFormat::NTriples.iri(),
|
|
||||||
/// "http://www.w3.org/ns/formats/N-Triples"
|
|
||||||
/// )
|
|
||||||
/// ```
|
|
||||||
#[inline] |
|
||||||
pub const fn iri(self) -> &'static str { |
|
||||||
match self { |
|
||||||
Self::N3 => "http://www.w3.org/ns/formats/N3", |
|
||||||
Self::NQuads => "http://www.w3.org/ns/formats/N-Quads", |
|
||||||
Self::NTriples => "http://www.w3.org/ns/formats/N-Triples", |
|
||||||
Self::RdfXml => "http://www.w3.org/ns/formats/RDF_XML", |
|
||||||
Self::TriG => "http://www.w3.org/ns/formats/TriG", |
|
||||||
Self::Turtle => "http://www.w3.org/ns/formats/Turtle", |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// The format [IANA media type](https://tools.ietf.org/html/rfc2046).
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxrdfio::RdfFormat;
|
|
||||||
///
|
|
||||||
/// assert_eq!(RdfFormat::NTriples.media_type(), "application/n-triples")
|
|
||||||
/// ```
|
|
||||||
#[inline] |
|
||||||
pub const fn media_type(self) -> &'static str { |
|
||||||
match self { |
|
||||||
Self::N3 => "text/n3", |
|
||||||
Self::NQuads => "application/n-quads", |
|
||||||
Self::NTriples => "application/n-triples", |
|
||||||
Self::RdfXml => "application/rdf+xml", |
|
||||||
Self::TriG => "application/trig", |
|
||||||
Self::Turtle => "text/turtle", |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// The format [IANA-registered](https://tools.ietf.org/html/rfc2046) file extension.
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxrdfio::RdfFormat;
|
|
||||||
///
|
|
||||||
/// assert_eq!(RdfFormat::NTriples.file_extension(), "nt")
|
|
||||||
/// ```
|
|
||||||
#[inline] |
|
||||||
pub const fn file_extension(self) -> &'static str { |
|
||||||
match self { |
|
||||||
Self::N3 => "n3", |
|
||||||
Self::NQuads => "nq", |
|
||||||
Self::NTriples => "nt", |
|
||||||
Self::RdfXml => "rdf", |
|
||||||
Self::TriG => "trig", |
|
||||||
Self::Turtle => "ttl", |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// The format name.
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxrdfio::RdfFormat;
|
|
||||||
///
|
|
||||||
/// assert_eq!(RdfFormat::NTriples.name(), "N-Triples")
|
|
||||||
/// ```
|
|
||||||
#[inline] |
|
||||||
pub const fn name(self) -> &'static str { |
|
||||||
match self { |
|
||||||
Self::N3 => "N3", |
|
||||||
Self::NQuads => "N-Quads", |
|
||||||
Self::NTriples => "N-Triples", |
|
||||||
Self::RdfXml => "RDF/XML", |
|
||||||
Self::TriG => "TriG", |
|
||||||
Self::Turtle => "Turtle", |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// Checks if the formats supports [RDF datasets](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) and not only [RDF graphs](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph).
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxrdfio::RdfFormat;
|
|
||||||
///
|
|
||||||
/// assert_eq!(RdfFormat::NTriples.supports_datasets(), false);
|
|
||||||
/// assert_eq!(RdfFormat::NQuads.supports_datasets(), true);
|
|
||||||
/// ```
|
|
||||||
#[inline] |
|
||||||
pub const fn supports_datasets(self) -> bool { |
|
||||||
matches!(self, Self::NQuads | Self::TriG) |
|
||||||
} |
|
||||||
|
|
||||||
/// Checks if the formats supports [RDF-star quoted triples](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#dfn-quoted).
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxrdfio::RdfFormat;
|
|
||||||
///
|
|
||||||
/// assert_eq!(RdfFormat::NTriples.supports_rdf_star(), true);
|
|
||||||
/// assert_eq!(RdfFormat::RdfXml.supports_rdf_star(), false);
|
|
||||||
/// ```
|
|
||||||
#[inline] |
|
||||||
#[cfg(feature = "rdf-star")] |
|
||||||
pub const fn supports_rdf_star(self) -> bool { |
|
||||||
matches!( |
|
||||||
self, |
|
||||||
Self::NTriples | Self::NQuads | Self::Turtle | Self::TriG |
|
||||||
) |
|
||||||
} |
|
||||||
|
|
||||||
/// Looks for a known format from a media type.
|
|
||||||
///
|
|
||||||
/// It supports some media type aliases.
|
|
||||||
/// For example, "application/xml" is going to return `RdfFormat::RdfXml` even if it is not its canonical media type.
|
|
||||||
///
|
|
||||||
/// Example:
|
|
||||||
/// ```
|
|
||||||
/// use oxrdfio::RdfFormat;
|
|
||||||
///
|
|
||||||
/// assert_eq!(
|
|
||||||
/// RdfFormat::from_media_type("text/turtle; charset=utf-8"),
|
|
||||||
/// Some(RdfFormat::Turtle)
|
|
||||||
/// )
|
|
||||||
/// ```
|
|
||||||
#[inline] |
|
||||||
pub fn from_media_type(media_type: &str) -> Option<Self> { |
|
||||||
const MEDIA_SUBTYPES: [(&str, RdfFormat); 10] = [ |
|
||||||
("n-quads", RdfFormat::NQuads), |
|
||||||
("n-triples", RdfFormat::NTriples), |
|
||||||
("n3", RdfFormat::N3), |
|
||||||
("nquads", RdfFormat::NQuads), |
|
||||||
("ntriples", RdfFormat::NTriples), |
|
||||||
("plain", RdfFormat::NTriples), |
|
||||||
("rdf+xml", RdfFormat::RdfXml), |
|
||||||
("trig", RdfFormat::TriG), |
|
||||||
("turtle", RdfFormat::Turtle), |
|
||||||
("xml", RdfFormat::RdfXml), |
|
||||||
]; |
|
||||||
|
|
||||||
let (r#type, subtype) = media_type |
|
||||||
.split_once(';') |
|
||||||
.unwrap_or((media_type, "")) |
|
||||||
.0 |
|
||||||
.split_once('/')?; |
|
||||||
let r#type = r#type.trim(); |
|
||||||
if !r#type.eq_ignore_ascii_case("application") && !r#type.eq_ignore_ascii_case("text") { |
|
||||||
return None; |
|
||||||
} |
|
||||||
let subtype = subtype.trim(); |
|
||||||
let subtype = subtype.strip_prefix("x-").unwrap_or(subtype); |
|
||||||
for (candidate_subtype, candidate_id) in MEDIA_SUBTYPES { |
|
||||||
if candidate_subtype.eq_ignore_ascii_case(subtype) { |
|
||||||
return Some(candidate_id); |
|
||||||
} |
|
||||||
} |
|
||||||
None |
|
||||||
} |
|
||||||
|
|
||||||
/// Looks for a known format from an extension.
|
|
||||||
///
|
|
||||||
/// It supports some aliases.
|
|
||||||
///
|
|
||||||
/// Example:
|
|
||||||
/// ```
|
|
||||||
/// use oxrdfio::RdfFormat;
|
|
||||||
///
|
|
||||||
/// assert_eq!(RdfFormat::from_extension("nt"), Some(RdfFormat::NTriples))
|
|
||||||
/// ```
|
|
||||||
#[inline] |
|
||||||
pub fn from_extension(extension: &str) -> Option<Self> { |
|
||||||
const MEDIA_TYPES: [(&str, RdfFormat); 8] = [ |
|
||||||
("n3", RdfFormat::N3), |
|
||||||
("nq", RdfFormat::NQuads), |
|
||||||
("nt", RdfFormat::NTriples), |
|
||||||
("rdf", RdfFormat::RdfXml), |
|
||||||
("trig", RdfFormat::TriG), |
|
||||||
("ttl", RdfFormat::Turtle), |
|
||||||
("txt", RdfFormat::NTriples), |
|
||||||
("xml", RdfFormat::RdfXml), |
|
||||||
]; |
|
||||||
for (candidate_extension, candidate_id) in MEDIA_TYPES { |
|
||||||
if candidate_extension.eq_ignore_ascii_case(extension) { |
|
||||||
return Some(candidate_id); |
|
||||||
} |
|
||||||
} |
|
||||||
None |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
impl fmt::Display for RdfFormat { |
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
|
||||||
f.write_str(self.name()) |
|
||||||
} |
|
||||||
} |
|
@ -1,19 +0,0 @@ |
|||||||
#![doc = include_str!("../README.md")] |
|
||||||
#![doc(test(attr(deny(warnings))))] |
|
||||||
#![cfg_attr(docsrs, feature(doc_auto_cfg))] |
|
||||||
#![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] |
|
||||||
#![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] |
|
||||||
|
|
||||||
mod error; |
|
||||||
mod format; |
|
||||||
mod parser; |
|
||||||
mod serializer; |
|
||||||
|
|
||||||
pub use error::{RdfParseError, RdfSyntaxError, TextPosition}; |
|
||||||
pub use format::RdfFormat; |
|
||||||
#[cfg(feature = "async-tokio")] |
|
||||||
pub use parser::FromTokioAsyncReadQuadReader; |
|
||||||
pub use parser::{FromReadQuadReader, RdfParser}; |
|
||||||
#[cfg(feature = "async-tokio")] |
|
||||||
pub use serializer::ToTokioAsyncWriteQuadWriter; |
|
||||||
pub use serializer::{RdfSerializer, ToWriteQuadWriter}; |
|
@ -1,807 +0,0 @@ |
|||||||
//! Utilities to read RDF graphs and datasets.
|
|
||||||
|
|
||||||
pub use crate::error::RdfParseError; |
|
||||||
use crate::format::RdfFormat; |
|
||||||
use oxrdf::{BlankNode, GraphName, IriParseError, Quad, Subject, Term, Triple}; |
|
||||||
#[cfg(feature = "async-tokio")] |
|
||||||
use oxrdfxml::FromTokioAsyncReadRdfXmlReader; |
|
||||||
use oxrdfxml::{FromReadRdfXmlReader, RdfXmlParser}; |
|
||||||
#[cfg(feature = "async-tokio")] |
|
||||||
use oxttl::n3::FromTokioAsyncReadN3Reader; |
|
||||||
use oxttl::n3::{FromReadN3Reader, N3Parser, N3PrefixesIter, N3Quad, N3Term}; |
|
||||||
#[cfg(feature = "async-tokio")] |
|
||||||
use oxttl::nquads::FromTokioAsyncReadNQuadsReader; |
|
||||||
use oxttl::nquads::{FromReadNQuadsReader, NQuadsParser}; |
|
||||||
#[cfg(feature = "async-tokio")] |
|
||||||
use oxttl::ntriples::FromTokioAsyncReadNTriplesReader; |
|
||||||
use oxttl::ntriples::{FromReadNTriplesReader, NTriplesParser}; |
|
||||||
#[cfg(feature = "async-tokio")] |
|
||||||
use oxttl::trig::FromTokioAsyncReadTriGReader; |
|
||||||
use oxttl::trig::{FromReadTriGReader, TriGParser, TriGPrefixesIter}; |
|
||||||
#[cfg(feature = "async-tokio")] |
|
||||||
use oxttl::turtle::FromTokioAsyncReadTurtleReader; |
|
||||||
use oxttl::turtle::{FromReadTurtleReader, TurtleParser, TurtlePrefixesIter}; |
|
||||||
use std::collections::HashMap; |
|
||||||
use std::io::Read; |
|
||||||
#[cfg(feature = "async-tokio")] |
|
||||||
use tokio::io::AsyncRead; |
|
||||||
|
|
||||||
/// Parsers for RDF serialization formats.
|
|
||||||
///
|
|
||||||
/// It currently supports the following formats:
|
|
||||||
/// * [N3](https://w3c.github.io/N3/spec/) ([`RdfFormat::N3`])
|
|
||||||
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`RdfFormat::NQuads`])
|
|
||||||
/// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`RdfFormat::NTriples`])
|
|
||||||
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`RdfFormat::RdfXml`])
|
|
||||||
/// * [TriG](https://www.w3.org/TR/trig/) ([`RdfFormat::TriG`])
|
|
||||||
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`RdfFormat::Turtle`])
|
|
||||||
///
|
|
||||||
/// Note the useful options:
|
|
||||||
/// - [`with_base_iri`](Self::with_base_iri) to resolve the relative IRIs.
|
|
||||||
/// - [`rename_blank_nodes`](Self::rename_blank_nodes) to rename the blank nodes to auto-generated numbers to avoid conflicts when merging RDF graphs together.
|
|
||||||
/// - [`without_named_graphs`](Self::without_named_graphs) to parse a single graph.
|
|
||||||
/// - [`unchecked`](Self::unchecked) to skip some validations if the file is already known to be valid.
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
|
||||||
///
|
|
||||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
|
||||||
///
|
|
||||||
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
|
|
||||||
/// let quads = parser
|
|
||||||
/// .parse_read(file.as_bytes())
|
|
||||||
/// .collect::<Result<Vec<_>, _>>()?;
|
|
||||||
///
|
|
||||||
/// assert_eq!(quads.len(), 1);
|
|
||||||
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
|
|
||||||
/// # std::io::Result::Ok(())
|
|
||||||
/// ```
|
|
||||||
#[must_use] |
|
||||||
pub struct RdfParser { |
|
||||||
inner: RdfParserKind, |
|
||||||
default_graph: GraphName, |
|
||||||
without_named_graphs: bool, |
|
||||||
rename_blank_nodes: bool, |
|
||||||
} |
|
||||||
|
|
||||||
enum RdfParserKind { |
|
||||||
N3(N3Parser), |
|
||||||
NQuads(NQuadsParser), |
|
||||||
NTriples(NTriplesParser), |
|
||||||
RdfXml(RdfXmlParser), |
|
||||||
TriG(TriGParser), |
|
||||||
Turtle(TurtleParser), |
|
||||||
} |
|
||||||
|
|
||||||
impl RdfParser { |
|
||||||
/// Builds a parser for the given format.
|
|
||||||
#[inline] |
|
||||||
pub fn from_format(format: RdfFormat) -> Self { |
|
||||||
Self { |
|
||||||
inner: match format { |
|
||||||
RdfFormat::N3 => RdfParserKind::N3(N3Parser::new()), |
|
||||||
RdfFormat::NQuads => RdfParserKind::NQuads({ |
|
||||||
#[cfg(feature = "rdf-star")] |
|
||||||
{ |
|
||||||
NQuadsParser::new().with_quoted_triples() |
|
||||||
} |
|
||||||
#[cfg(not(feature = "rdf-star"))] |
|
||||||
{ |
|
||||||
NQuadsParser::new() |
|
||||||
} |
|
||||||
}), |
|
||||||
RdfFormat::NTriples => RdfParserKind::NTriples({ |
|
||||||
#[cfg(feature = "rdf-star")] |
|
||||||
{ |
|
||||||
NTriplesParser::new().with_quoted_triples() |
|
||||||
} |
|
||||||
#[cfg(not(feature = "rdf-star"))] |
|
||||||
{ |
|
||||||
NTriplesParser::new() |
|
||||||
} |
|
||||||
}), |
|
||||||
RdfFormat::RdfXml => RdfParserKind::RdfXml(RdfXmlParser::new()), |
|
||||||
RdfFormat::TriG => RdfParserKind::TriG({ |
|
||||||
#[cfg(feature = "rdf-star")] |
|
||||||
{ |
|
||||||
TriGParser::new().with_quoted_triples() |
|
||||||
} |
|
||||||
#[cfg(not(feature = "rdf-star"))] |
|
||||||
{ |
|
||||||
TriGParser::new() |
|
||||||
} |
|
||||||
}), |
|
||||||
RdfFormat::Turtle => RdfParserKind::Turtle({ |
|
||||||
#[cfg(feature = "rdf-star")] |
|
||||||
{ |
|
||||||
TurtleParser::new().with_quoted_triples() |
|
||||||
} |
|
||||||
#[cfg(not(feature = "rdf-star"))] |
|
||||||
{ |
|
||||||
TurtleParser::new() |
|
||||||
} |
|
||||||
}), |
|
||||||
}, |
|
||||||
default_graph: GraphName::DefaultGraph, |
|
||||||
without_named_graphs: false, |
|
||||||
rename_blank_nodes: false, |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// The format the parser uses.
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
|
||||||
///
|
|
||||||
/// assert_eq!(
|
|
||||||
/// RdfParser::from_format(RdfFormat::Turtle).format(),
|
|
||||||
/// RdfFormat::Turtle
|
|
||||||
/// );
|
|
||||||
/// ```
|
|
||||||
pub fn format(&self) -> RdfFormat { |
|
||||||
match &self.inner { |
|
||||||
RdfParserKind::N3(_) => RdfFormat::N3, |
|
||||||
RdfParserKind::NQuads(_) => RdfFormat::NQuads, |
|
||||||
RdfParserKind::NTriples(_) => RdfFormat::NTriples, |
|
||||||
RdfParserKind::RdfXml(_) => RdfFormat::RdfXml, |
|
||||||
RdfParserKind::TriG(_) => RdfFormat::TriG, |
|
||||||
RdfParserKind::Turtle(_) => RdfFormat::Turtle, |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// Provides an IRI that could be used to resolve the file relative IRIs.
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
|
||||||
///
|
|
||||||
/// let file = "</s> </p> </o> .";
|
|
||||||
///
|
|
||||||
/// let parser = RdfParser::from_format(RdfFormat::Turtle).with_base_iri("http://example.com")?;
|
|
||||||
/// let quads = parser
|
|
||||||
/// .parse_read(file.as_bytes())
|
|
||||||
/// .collect::<Result<Vec<_>, _>>()?;
|
|
||||||
///
|
|
||||||
/// assert_eq!(quads.len(), 1);
|
|
||||||
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
|
|
||||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
|
||||||
/// ```
|
|
||||||
#[inline] |
|
||||||
pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> { |
|
||||||
self.inner = match self.inner { |
|
||||||
RdfParserKind::N3(p) => RdfParserKind::N3(p), |
|
||||||
RdfParserKind::NTriples(p) => RdfParserKind::NTriples(p), |
|
||||||
RdfParserKind::NQuads(p) => RdfParserKind::NQuads(p), |
|
||||||
RdfParserKind::RdfXml(p) => RdfParserKind::RdfXml(p.with_base_iri(base_iri)?), |
|
||||||
RdfParserKind::TriG(p) => RdfParserKind::TriG(p.with_base_iri(base_iri)?), |
|
||||||
RdfParserKind::Turtle(p) => RdfParserKind::Turtle(p.with_base_iri(base_iri)?), |
|
||||||
}; |
|
||||||
Ok(self) |
|
||||||
} |
|
||||||
|
|
||||||
/// Provides the name graph name that should replace the default graph in the returned quads.
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxrdf::NamedNode;
|
|
||||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
|
||||||
///
|
|
||||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
|
||||||
///
|
|
||||||
/// let parser = RdfParser::from_format(RdfFormat::Turtle)
|
|
||||||
/// .with_default_graph(NamedNode::new("http://example.com/g")?);
|
|
||||||
/// let quads = parser
|
|
||||||
/// .parse_read(file.as_bytes())
|
|
||||||
/// .collect::<Result<Vec<_>, _>>()?;
|
|
||||||
///
|
|
||||||
/// assert_eq!(quads.len(), 1);
|
|
||||||
/// assert_eq!(quads[0].graph_name.to_string(), "<http://example.com/g>");
|
|
||||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
|
||||||
/// ```
|
|
||||||
#[inline] |
|
||||||
pub fn with_default_graph(mut self, default_graph: impl Into<GraphName>) -> Self { |
|
||||||
self.default_graph = default_graph.into(); |
|
||||||
self |
|
||||||
} |
|
||||||
|
|
||||||
/// Sets that the parser must fail if parsing a named graph.
|
|
||||||
///
|
|
||||||
/// This function restricts the parser to only parse a single [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) and not an [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset).
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
|
||||||
///
|
|
||||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .";
|
|
||||||
///
|
|
||||||
/// let parser = RdfParser::from_format(RdfFormat::NQuads).without_named_graphs();
|
|
||||||
/// assert!(parser.parse_read(file.as_bytes()).next().unwrap().is_err());
|
|
||||||
/// ```
|
|
||||||
#[inline] |
|
||||||
pub fn without_named_graphs(mut self) -> Self { |
|
||||||
self.without_named_graphs = true; |
|
||||||
self |
|
||||||
} |
|
||||||
|
|
||||||
/// Renames the blank nodes ids from the ones set in the serialization to random ids.
|
|
||||||
///
|
|
||||||
/// This allows to avoid id conflicts when merging graphs together.
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
|
||||||
///
|
|
||||||
/// let file = "_:a <http://example.com/p> <http://example.com/o> .";
|
|
||||||
///
|
|
||||||
/// let result1 = RdfParser::from_format(RdfFormat::NQuads)
|
|
||||||
/// .rename_blank_nodes()
|
|
||||||
/// .parse_read(file.as_bytes())
|
|
||||||
/// .collect::<Result<Vec<_>, _>>()?;
|
|
||||||
/// let result2 = RdfParser::from_format(RdfFormat::NQuads)
|
|
||||||
/// .rename_blank_nodes()
|
|
||||||
/// .parse_read(file.as_bytes())
|
|
||||||
/// .collect::<Result<Vec<_>, _>>()?;
|
|
||||||
/// assert_ne!(result1, result2);
|
|
||||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
|
||||||
/// ```
|
|
||||||
#[inline] |
|
||||||
pub fn rename_blank_nodes(mut self) -> Self { |
|
||||||
self.rename_blank_nodes = true; |
|
||||||
self |
|
||||||
} |
|
||||||
|
|
||||||
/// Assumes the file is valid to make parsing faster.
|
|
||||||
///
|
|
||||||
/// It will skip some validations.
|
|
||||||
///
|
|
||||||
/// Note that if the file is actually not valid, then broken RDF might be emitted by the parser.
|
|
||||||
#[inline] |
|
||||||
pub fn unchecked(mut self) -> Self { |
|
||||||
self.inner = match self.inner { |
|
||||||
RdfParserKind::N3(p) => RdfParserKind::N3(p.unchecked()), |
|
||||||
RdfParserKind::NTriples(p) => RdfParserKind::NTriples(p.unchecked()), |
|
||||||
RdfParserKind::NQuads(p) => RdfParserKind::NQuads(p.unchecked()), |
|
||||||
RdfParserKind::RdfXml(p) => RdfParserKind::RdfXml(p.unchecked()), |
|
||||||
RdfParserKind::TriG(p) => RdfParserKind::TriG(p.unchecked()), |
|
||||||
RdfParserKind::Turtle(p) => RdfParserKind::Turtle(p.unchecked()), |
|
||||||
}; |
|
||||||
self |
|
||||||
} |
|
||||||
|
|
||||||
/// Parses from a [`Read`] implementation and returns an iterator of quads.
|
|
||||||
///
|
|
||||||
/// Reads are buffered.
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
|
||||||
///
|
|
||||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
|
||||||
///
|
|
||||||
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
|
|
||||||
/// let quads = parser
|
|
||||||
/// .parse_read(file.as_bytes())
|
|
||||||
/// .collect::<Result<Vec<_>, _>>()?;
|
|
||||||
///
|
|
||||||
/// assert_eq!(quads.len(), 1);
|
|
||||||
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
|
|
||||||
/// # std::io::Result::Ok(())
|
|
||||||
/// ```
|
|
||||||
pub fn parse_read<R: Read>(self, reader: R) -> FromReadQuadReader<R> { |
|
||||||
FromReadQuadReader { |
|
||||||
parser: match self.inner { |
|
||||||
RdfParserKind::N3(p) => FromReadQuadReaderKind::N3(p.parse_read(reader)), |
|
||||||
RdfParserKind::NQuads(p) => FromReadQuadReaderKind::NQuads(p.parse_read(reader)), |
|
||||||
RdfParserKind::NTriples(p) => { |
|
||||||
FromReadQuadReaderKind::NTriples(p.parse_read(reader)) |
|
||||||
} |
|
||||||
RdfParserKind::RdfXml(p) => FromReadQuadReaderKind::RdfXml(p.parse_read(reader)), |
|
||||||
RdfParserKind::TriG(p) => FromReadQuadReaderKind::TriG(p.parse_read(reader)), |
|
||||||
RdfParserKind::Turtle(p) => FromReadQuadReaderKind::Turtle(p.parse_read(reader)), |
|
||||||
}, |
|
||||||
mapper: QuadMapper { |
|
||||||
default_graph: self.default_graph.clone(), |
|
||||||
without_named_graphs: self.without_named_graphs, |
|
||||||
blank_node_map: self.rename_blank_nodes.then(HashMap::new), |
|
||||||
}, |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// Parses from a Tokio [`AsyncRead`] implementation and returns an async iterator of quads.
|
|
||||||
///
|
|
||||||
/// Reads are buffered.
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
|
||||||
///
|
|
||||||
/// # #[tokio::main(flavor = "current_thread")]
|
|
||||||
/// # async fn main() -> Result<(), oxrdfio::RdfParseError> {
|
|
||||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
|
||||||
///
|
|
||||||
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
|
|
||||||
/// let mut reader = parser.parse_tokio_async_read(file.as_bytes());
|
|
||||||
/// if let Some(quad) = reader.next().await {
|
|
||||||
/// assert_eq!(quad?.subject.to_string(), "<http://example.com/s>");
|
|
||||||
/// }
|
|
||||||
/// # Ok(())
|
|
||||||
/// # }
|
|
||||||
/// ```
|
|
||||||
#[cfg(feature = "async-tokio")] |
|
||||||
pub fn parse_tokio_async_read<R: AsyncRead + Unpin>( |
|
||||||
self, |
|
||||||
reader: R, |
|
||||||
) -> FromTokioAsyncReadQuadReader<R> { |
|
||||||
FromTokioAsyncReadQuadReader { |
|
||||||
parser: match self.inner { |
|
||||||
RdfParserKind::N3(p) => { |
|
||||||
FromTokioAsyncReadQuadReaderKind::N3(p.parse_tokio_async_read(reader)) |
|
||||||
} |
|
||||||
RdfParserKind::NQuads(p) => { |
|
||||||
FromTokioAsyncReadQuadReaderKind::NQuads(p.parse_tokio_async_read(reader)) |
|
||||||
} |
|
||||||
RdfParserKind::NTriples(p) => { |
|
||||||
FromTokioAsyncReadQuadReaderKind::NTriples(p.parse_tokio_async_read(reader)) |
|
||||||
} |
|
||||||
RdfParserKind::RdfXml(p) => { |
|
||||||
FromTokioAsyncReadQuadReaderKind::RdfXml(p.parse_tokio_async_read(reader)) |
|
||||||
} |
|
||||||
RdfParserKind::TriG(p) => { |
|
||||||
FromTokioAsyncReadQuadReaderKind::TriG(p.parse_tokio_async_read(reader)) |
|
||||||
} |
|
||||||
RdfParserKind::Turtle(p) => { |
|
||||||
FromTokioAsyncReadQuadReaderKind::Turtle(p.parse_tokio_async_read(reader)) |
|
||||||
} |
|
||||||
}, |
|
||||||
mapper: QuadMapper { |
|
||||||
default_graph: self.default_graph.clone(), |
|
||||||
without_named_graphs: self.without_named_graphs, |
|
||||||
blank_node_map: self.rename_blank_nodes.then(HashMap::new), |
|
||||||
}, |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
impl From<RdfFormat> for RdfParser { |
|
||||||
fn from(format: RdfFormat) -> Self { |
|
||||||
Self::from_format(format) |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// Parses a RDF file from a [`Read`] implementation. Can be built using [`RdfParser::parse_read`].
|
|
||||||
///
|
|
||||||
/// Reads are buffered.
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
|
||||||
///
|
|
||||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
|
||||||
///
|
|
||||||
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
|
|
||||||
/// let quads = parser
|
|
||||||
/// .parse_read(file.as_bytes())
|
|
||||||
/// .collect::<Result<Vec<_>, _>>()?;
|
|
||||||
///
|
|
||||||
/// assert_eq!(quads.len(), 1);
|
|
||||||
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
|
|
||||||
/// # std::io::Result::Ok(())
|
|
||||||
/// ```
|
|
||||||
#[must_use] |
|
||||||
pub struct FromReadQuadReader<R: Read> { |
|
||||||
parser: FromReadQuadReaderKind<R>, |
|
||||||
mapper: QuadMapper, |
|
||||||
} |
|
||||||
|
|
||||||
enum FromReadQuadReaderKind<R: Read> { |
|
||||||
N3(FromReadN3Reader<R>), |
|
||||||
NQuads(FromReadNQuadsReader<R>), |
|
||||||
NTriples(FromReadNTriplesReader<R>), |
|
||||||
RdfXml(FromReadRdfXmlReader<R>), |
|
||||||
TriG(FromReadTriGReader<R>), |
|
||||||
Turtle(FromReadTurtleReader<R>), |
|
||||||
} |
|
||||||
|
|
||||||
impl<R: Read> Iterator for FromReadQuadReader<R> { |
|
||||||
type Item = Result<Quad, RdfParseError>; |
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> { |
|
||||||
Some(match &mut self.parser { |
|
||||||
FromReadQuadReaderKind::N3(parser) => match parser.next()? { |
|
||||||
Ok(quad) => self.mapper.map_n3_quad(quad), |
|
||||||
Err(e) => Err(e.into()), |
|
||||||
}, |
|
||||||
FromReadQuadReaderKind::NQuads(parser) => match parser.next()? { |
|
||||||
Ok(quad) => self.mapper.map_quad(quad), |
|
||||||
Err(e) => Err(e.into()), |
|
||||||
}, |
|
||||||
FromReadQuadReaderKind::NTriples(parser) => match parser.next()? { |
|
||||||
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), |
|
||||||
Err(e) => Err(e.into()), |
|
||||||
}, |
|
||||||
FromReadQuadReaderKind::RdfXml(parser) => match parser.next()? { |
|
||||||
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), |
|
||||||
Err(e) => Err(e.into()), |
|
||||||
}, |
|
||||||
FromReadQuadReaderKind::TriG(parser) => match parser.next()? { |
|
||||||
Ok(quad) => self.mapper.map_quad(quad), |
|
||||||
Err(e) => Err(e.into()), |
|
||||||
}, |
|
||||||
FromReadQuadReaderKind::Turtle(parser) => match parser.next()? { |
|
||||||
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), |
|
||||||
Err(e) => Err(e.into()), |
|
||||||
}, |
|
||||||
}) |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
impl<R: Read> FromReadQuadReader<R> { |
|
||||||
/// The list of IRI prefixes considered at the current step of the parsing.
|
|
||||||
///
|
|
||||||
/// This method returns (prefix name, prefix value) tuples.
|
|
||||||
/// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
|
|
||||||
/// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
|
|
||||||
///
|
|
||||||
/// An empty iterator is return if the format does not support prefixes.
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
|
||||||
///
|
|
||||||
/// let file = br#"@base <http://example.com/> .
|
|
||||||
/// @prefix schema: <http://schema.org/> .
|
|
||||||
/// <foo> a schema:Person ;
|
|
||||||
/// schema:name "Foo" ."#;
|
|
||||||
///
|
|
||||||
/// let mut reader = RdfParser::from_format(RdfFormat::Turtle).parse_read(file.as_slice());
|
|
||||||
/// assert!(reader.prefixes().collect::<Vec<_>>().is_empty()); // No prefix at the beginning
|
|
||||||
///
|
|
||||||
/// reader.next().unwrap()?; // We read the first triple
|
|
||||||
/// assert_eq!(
|
|
||||||
/// reader.prefixes().collect::<Vec<_>>(),
|
|
||||||
/// [("schema", "http://schema.org/")]
|
|
||||||
/// ); // There are now prefixes
|
|
||||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
|
||||||
/// ```
|
|
||||||
pub fn prefixes(&self) -> PrefixesIter<'_> { |
|
||||||
PrefixesIter { |
|
||||||
inner: match &self.parser { |
|
||||||
FromReadQuadReaderKind::N3(p) => PrefixesIterKind::N3(p.prefixes()), |
|
||||||
FromReadQuadReaderKind::TriG(p) => PrefixesIterKind::TriG(p.prefixes()), |
|
||||||
FromReadQuadReaderKind::Turtle(p) => PrefixesIterKind::Turtle(p.prefixes()), |
|
||||||
FromReadQuadReaderKind::NQuads(_) |
|
||||||
| FromReadQuadReaderKind::NTriples(_) |
|
||||||
| FromReadQuadReaderKind::RdfXml(_) => PrefixesIterKind::None, /* TODO: implement for RDF/XML */ |
|
||||||
}, |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// The base IRI considered at the current step of the parsing.
|
|
||||||
///
|
|
||||||
/// `None` is returned if no base IRI is set or the format does not support base IRIs.
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
|
||||||
///
|
|
||||||
/// let file = br#"@base <http://example.com/> .
|
|
||||||
/// @prefix schema: <http://schema.org/> .
|
|
||||||
/// <foo> a schema:Person ;
|
|
||||||
/// schema:name "Foo" ."#;
|
|
||||||
///
|
|
||||||
/// let mut reader = RdfParser::from_format(RdfFormat::Turtle).parse_read(file.as_slice());
|
|
||||||
/// assert!(reader.base_iri().is_none()); // No base at the beginning because none has been given to the parser.
|
|
||||||
///
|
|
||||||
/// reader.next().unwrap()?; // We read the first triple
|
|
||||||
/// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI.
|
|
||||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
|
||||||
/// ```
|
|
||||||
pub fn base_iri(&self) -> Option<&str> { |
|
||||||
match &self.parser { |
|
||||||
FromReadQuadReaderKind::N3(p) => p.base_iri(), |
|
||||||
FromReadQuadReaderKind::TriG(p) => p.base_iri(), |
|
||||||
FromReadQuadReaderKind::Turtle(p) => p.base_iri(), |
|
||||||
FromReadQuadReaderKind::NQuads(_) |
|
||||||
| FromReadQuadReaderKind::NTriples(_) |
|
||||||
| FromReadQuadReaderKind::RdfXml(_) => None, // TODO: implement for RDF/XML
|
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// Parses a RDF file from a Tokio [`AsyncRead`] implementation. Can be built using [`RdfParser::parse_tokio_async_read`].
|
|
||||||
///
|
|
||||||
/// Reads are buffered.
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
|
||||||
///
|
|
||||||
/// # #[tokio::main(flavor = "current_thread")]
|
|
||||||
/// # async fn main() -> Result<(), oxrdfio::RdfParseError> {
|
|
||||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
|
||||||
///
|
|
||||||
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
|
|
||||||
/// let mut reader = parser.parse_tokio_async_read(file.as_bytes());
|
|
||||||
/// if let Some(quad) = reader.next().await {
|
|
||||||
/// assert_eq!(quad?.subject.to_string(), "<http://example.com/s>");
|
|
||||||
/// }
|
|
||||||
/// # Ok(())
|
|
||||||
/// # }
|
|
||||||
/// ```
|
|
||||||
#[must_use] |
|
||||||
#[cfg(feature = "async-tokio")] |
|
||||||
pub struct FromTokioAsyncReadQuadReader<R: AsyncRead + Unpin> { |
|
||||||
parser: FromTokioAsyncReadQuadReaderKind<R>, |
|
||||||
mapper: QuadMapper, |
|
||||||
} |
|
||||||
|
|
||||||
#[cfg(feature = "async-tokio")] |
|
||||||
enum FromTokioAsyncReadQuadReaderKind<R: AsyncRead + Unpin> { |
|
||||||
N3(FromTokioAsyncReadN3Reader<R>), |
|
||||||
NQuads(FromTokioAsyncReadNQuadsReader<R>), |
|
||||||
NTriples(FromTokioAsyncReadNTriplesReader<R>), |
|
||||||
RdfXml(FromTokioAsyncReadRdfXmlReader<R>), |
|
||||||
TriG(FromTokioAsyncReadTriGReader<R>), |
|
||||||
Turtle(FromTokioAsyncReadTurtleReader<R>), |
|
||||||
} |
|
||||||
|
|
||||||
#[cfg(feature = "async-tokio")] |
|
||||||
impl<R: AsyncRead + Unpin> FromTokioAsyncReadQuadReader<R> { |
|
||||||
pub async fn next(&mut self) -> Option<Result<Quad, RdfParseError>> { |
|
||||||
Some(match &mut self.parser { |
|
||||||
FromTokioAsyncReadQuadReaderKind::N3(parser) => match parser.next().await? { |
|
||||||
Ok(quad) => self.mapper.map_n3_quad(quad), |
|
||||||
Err(e) => Err(e.into()), |
|
||||||
}, |
|
||||||
FromTokioAsyncReadQuadReaderKind::NQuads(parser) => match parser.next().await? { |
|
||||||
Ok(quad) => self.mapper.map_quad(quad), |
|
||||||
Err(e) => Err(e.into()), |
|
||||||
}, |
|
||||||
FromTokioAsyncReadQuadReaderKind::NTriples(parser) => match parser.next().await? { |
|
||||||
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), |
|
||||||
Err(e) => Err(e.into()), |
|
||||||
}, |
|
||||||
FromTokioAsyncReadQuadReaderKind::RdfXml(parser) => match parser.next().await? { |
|
||||||
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), |
|
||||||
Err(e) => Err(e.into()), |
|
||||||
}, |
|
||||||
FromTokioAsyncReadQuadReaderKind::TriG(parser) => match parser.next().await? { |
|
||||||
Ok(quad) => self.mapper.map_quad(quad), |
|
||||||
Err(e) => Err(e.into()), |
|
||||||
}, |
|
||||||
FromTokioAsyncReadQuadReaderKind::Turtle(parser) => match parser.next().await? { |
|
||||||
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), |
|
||||||
Err(e) => Err(e.into()), |
|
||||||
}, |
|
||||||
}) |
|
||||||
} |
|
||||||
|
|
||||||
/// The list of IRI prefixes considered at the current step of the parsing.
|
|
||||||
///
|
|
||||||
/// This method returns (prefix name, prefix value) tuples.
|
|
||||||
/// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
|
|
||||||
/// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
|
|
||||||
///
|
|
||||||
/// An empty iterator is return if the format does not support prefixes.
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
|
||||||
///
|
|
||||||
/// # #[tokio::main(flavor = "current_thread")]
|
|
||||||
/// # async fn main() -> Result<(), oxttl::TurtleParseError> {
|
|
||||||
/// let file = br#"@base <http://example.com/> .
|
|
||||||
/// @prefix schema: <http://schema.org/> .
|
|
||||||
/// <foo> a schema:Person ;
|
|
||||||
/// schema:name "Foo" ."#;
|
|
||||||
///
|
|
||||||
/// let mut reader = RdfParser::from_format(RdfFormat::Turtle).parse_read(file.as_slice());
|
|
||||||
/// assert_eq!(reader.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning
|
|
||||||
///
|
|
||||||
/// reader.next().await.unwrap()?; // We read the first triple
|
|
||||||
/// assert_eq!(
|
|
||||||
/// reader.prefixes().collect::<Vec<_>>(),
|
|
||||||
/// [("schema", "http://schema.org/")]
|
|
||||||
/// ); // There are now prefixes
|
|
||||||
/// # Ok(())
|
|
||||||
/// # }
|
|
||||||
/// ```
|
|
||||||
pub fn prefixes(&self) -> PrefixesIter<'_> { |
|
||||||
PrefixesIter { |
|
||||||
inner: match &self.parser { |
|
||||||
FromTokioAsyncReadQuadReaderKind::N3(p) => PrefixesIterKind::N3(p.prefixes()), |
|
||||||
FromTokioAsyncReadQuadReaderKind::TriG(p) => PrefixesIterKind::TriG(p.prefixes()), |
|
||||||
FromTokioAsyncReadQuadReaderKind::Turtle(p) => { |
|
||||||
PrefixesIterKind::Turtle(p.prefixes()) |
|
||||||
} |
|
||||||
FromTokioAsyncReadQuadReaderKind::NQuads(_) |
|
||||||
| FromTokioAsyncReadQuadReaderKind::NTriples(_) |
|
||||||
| FromTokioAsyncReadQuadReaderKind::RdfXml(_) => PrefixesIterKind::None, /* TODO: implement for RDF/XML */ |
|
||||||
}, |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// The base IRI considered at the current step of the parsing.
|
|
||||||
///
|
|
||||||
/// `None` is returned if no base IRI is set or the format does not support base IRIs.
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
|
||||||
///
|
|
||||||
/// # #[tokio::main(flavor = "current_thread")]
|
|
||||||
/// # async fn main() -> Result<(), oxttl::TurtleParseError> {
|
|
||||||
/// let file = br#"@base <http://example.com/> .
|
|
||||||
/// @prefix schema: <http://schema.org/> .
|
|
||||||
/// <foo> a schema:Person ;
|
|
||||||
/// schema:name "Foo" ."#;
|
|
||||||
///
|
|
||||||
/// let mut reader =
|
|
||||||
/// RdfParser::from_format(RdfFormat::Turtle).parse_tokio_async_read(file.as_slice());
|
|
||||||
/// assert!(reader.base_iri().is_none()); // No base IRI at the beginning
|
|
||||||
///
|
|
||||||
/// reader.next().await.unwrap()?; // We read the first triple
|
|
||||||
/// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI
|
|
||||||
/// # Ok(())
|
|
||||||
/// # }
|
|
||||||
/// ```
|
|
||||||
pub fn base_iri(&self) -> Option<&str> { |
|
||||||
match &self.parser { |
|
||||||
FromTokioAsyncReadQuadReaderKind::N3(p) => p.base_iri(), |
|
||||||
FromTokioAsyncReadQuadReaderKind::TriG(p) => p.base_iri(), |
|
||||||
FromTokioAsyncReadQuadReaderKind::Turtle(p) => p.base_iri(), |
|
||||||
FromTokioAsyncReadQuadReaderKind::NQuads(_) |
|
||||||
| FromTokioAsyncReadQuadReaderKind::NTriples(_) |
|
||||||
| FromTokioAsyncReadQuadReaderKind::RdfXml(_) => None, // TODO: implement for RDF/XML
|
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// Iterator on the file prefixes.
|
|
||||||
///
|
|
||||||
/// See [`FromReadQuadReader::prefixes`].
|
|
||||||
pub struct PrefixesIter<'a> { |
|
||||||
inner: PrefixesIterKind<'a>, |
|
||||||
} |
|
||||||
|
|
||||||
enum PrefixesIterKind<'a> { |
|
||||||
Turtle(TurtlePrefixesIter<'a>), |
|
||||||
TriG(TriGPrefixesIter<'a>), |
|
||||||
N3(N3PrefixesIter<'a>), |
|
||||||
None, |
|
||||||
} |
|
||||||
|
|
||||||
impl<'a> Iterator for PrefixesIter<'a> { |
|
||||||
type Item = (&'a str, &'a str); |
|
||||||
|
|
||||||
#[inline] |
|
||||||
fn next(&mut self) -> Option<Self::Item> { |
|
||||||
match &mut self.inner { |
|
||||||
PrefixesIterKind::Turtle(iter) => iter.next(), |
|
||||||
PrefixesIterKind::TriG(iter) => iter.next(), |
|
||||||
PrefixesIterKind::N3(iter) => iter.next(), |
|
||||||
PrefixesIterKind::None => None, |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
#[inline] |
|
||||||
fn size_hint(&self) -> (usize, Option<usize>) { |
|
||||||
match &self.inner { |
|
||||||
PrefixesIterKind::Turtle(iter) => iter.size_hint(), |
|
||||||
PrefixesIterKind::TriG(iter) => iter.size_hint(), |
|
||||||
PrefixesIterKind::N3(iter) => iter.size_hint(), |
|
||||||
PrefixesIterKind::None => (0, Some(0)), |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
struct QuadMapper { |
|
||||||
default_graph: GraphName, |
|
||||||
without_named_graphs: bool, |
|
||||||
blank_node_map: Option<HashMap<BlankNode, BlankNode>>, |
|
||||||
} |
|
||||||
|
|
||||||
impl QuadMapper { |
|
||||||
fn map_blank_node(&mut self, node: BlankNode) -> BlankNode { |
|
||||||
if let Some(blank_node_map) = &mut self.blank_node_map { |
|
||||||
blank_node_map |
|
||||||
.entry(node) |
|
||||||
.or_insert_with(BlankNode::default) |
|
||||||
.clone() |
|
||||||
} else { |
|
||||||
node |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
fn map_subject(&mut self, node: Subject) -> Subject { |
|
||||||
match node { |
|
||||||
Subject::NamedNode(node) => node.into(), |
|
||||||
Subject::BlankNode(node) => self.map_blank_node(node).into(), |
|
||||||
#[cfg(feature = "rdf-star")] |
|
||||||
Subject::Triple(triple) => self.map_triple(*triple).into(), |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
fn map_term(&mut self, node: Term) -> Term { |
|
||||||
match node { |
|
||||||
Term::NamedNode(node) => node.into(), |
|
||||||
Term::BlankNode(node) => self.map_blank_node(node).into(), |
|
||||||
Term::Literal(literal) => literal.into(), |
|
||||||
#[cfg(feature = "rdf-star")] |
|
||||||
Term::Triple(triple) => self.map_triple(*triple).into(), |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
fn map_triple(&mut self, triple: Triple) -> Triple { |
|
||||||
Triple { |
|
||||||
subject: self.map_subject(triple.subject), |
|
||||||
predicate: triple.predicate, |
|
||||||
object: self.map_term(triple.object), |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
fn map_graph_name(&mut self, graph_name: GraphName) -> Result<GraphName, RdfParseError> { |
|
||||||
match graph_name { |
|
||||||
GraphName::NamedNode(node) => { |
|
||||||
if self.without_named_graphs { |
|
||||||
Err(RdfParseError::msg("Named graphs are not allowed")) |
|
||||||
} else { |
|
||||||
Ok(node.into()) |
|
||||||
} |
|
||||||
} |
|
||||||
GraphName::BlankNode(node) => { |
|
||||||
if self.without_named_graphs { |
|
||||||
Err(RdfParseError::msg("Named graphs are not allowed")) |
|
||||||
} else { |
|
||||||
Ok(self.map_blank_node(node).into()) |
|
||||||
} |
|
||||||
} |
|
||||||
GraphName::DefaultGraph => Ok(self.default_graph.clone()), |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
fn map_quad(&mut self, quad: Quad) -> Result<Quad, RdfParseError> { |
|
||||||
Ok(Quad { |
|
||||||
subject: self.map_subject(quad.subject), |
|
||||||
predicate: quad.predicate, |
|
||||||
object: self.map_term(quad.object), |
|
||||||
graph_name: self.map_graph_name(quad.graph_name)?, |
|
||||||
}) |
|
||||||
} |
|
||||||
|
|
||||||
fn map_triple_to_quad(&mut self, triple: Triple) -> Quad { |
|
||||||
self.map_triple(triple).in_graph(self.default_graph.clone()) |
|
||||||
} |
|
||||||
|
|
||||||
fn map_n3_quad(&mut self, quad: N3Quad) -> Result<Quad, RdfParseError> { |
|
||||||
Ok(Quad { |
|
||||||
subject: match quad.subject { |
|
||||||
N3Term::NamedNode(s) => Ok(s.into()), |
|
||||||
N3Term::BlankNode(s) => Ok(self.map_blank_node(s).into()), |
|
||||||
N3Term::Literal(_) => Err(RdfParseError::msg( |
|
||||||
"literals are not allowed in regular RDF subjects", |
|
||||||
)), |
|
||||||
#[cfg(feature = "rdf-star")] |
|
||||||
N3Term::Triple(s) => Ok(self.map_triple(*s).into()), |
|
||||||
N3Term::Variable(_) => Err(RdfParseError::msg( |
|
||||||
"variables are not allowed in regular RDF subjects", |
|
||||||
)), |
|
||||||
}?, |
|
||||||
predicate: match quad.predicate { |
|
||||||
N3Term::NamedNode(p) => Ok(p), |
|
||||||
N3Term::BlankNode(_) => Err(RdfParseError::msg( |
|
||||||
"blank nodes are not allowed in regular RDF predicates", |
|
||||||
)), |
|
||||||
N3Term::Literal(_) => Err(RdfParseError::msg( |
|
||||||
"literals are not allowed in regular RDF predicates", |
|
||||||
)), |
|
||||||
#[cfg(feature = "rdf-star")] |
|
||||||
N3Term::Triple(_) => Err(RdfParseError::msg( |
|
||||||
"quoted triples are not allowed in regular RDF predicates", |
|
||||||
)), |
|
||||||
N3Term::Variable(_) => Err(RdfParseError::msg( |
|
||||||
"variables are not allowed in regular RDF predicates", |
|
||||||
)), |
|
||||||
}?, |
|
||||||
object: match quad.object { |
|
||||||
N3Term::NamedNode(o) => Ok(o.into()), |
|
||||||
N3Term::BlankNode(o) => Ok(self.map_blank_node(o).into()), |
|
||||||
N3Term::Literal(o) => Ok(o.into()), |
|
||||||
#[cfg(feature = "rdf-star")] |
|
||||||
N3Term::Triple(o) => Ok(self.map_triple(*o).into()), |
|
||||||
N3Term::Variable(_) => Err(RdfParseError::msg( |
|
||||||
"variables are not allowed in regular RDF objects", |
|
||||||
)), |
|
||||||
}?, |
|
||||||
graph_name: self.map_graph_name(quad.graph_name)?, |
|
||||||
}) |
|
||||||
} |
|
||||||
} |
|
@ -1,410 +0,0 @@ |
|||||||
//! Utilities to write RDF graphs and datasets.
|
|
||||||
|
|
||||||
use crate::format::RdfFormat; |
|
||||||
use oxrdf::{GraphNameRef, IriParseError, QuadRef, TripleRef}; |
|
||||||
#[cfg(feature = "async-tokio")] |
|
||||||
use oxrdfxml::ToTokioAsyncWriteRdfXmlWriter; |
|
||||||
use oxrdfxml::{RdfXmlSerializer, ToWriteRdfXmlWriter}; |
|
||||||
#[cfg(feature = "async-tokio")] |
|
||||||
use oxttl::nquads::ToTokioAsyncWriteNQuadsWriter; |
|
||||||
use oxttl::nquads::{NQuadsSerializer, ToWriteNQuadsWriter}; |
|
||||||
#[cfg(feature = "async-tokio")] |
|
||||||
use oxttl::ntriples::ToTokioAsyncWriteNTriplesWriter; |
|
||||||
use oxttl::ntriples::{NTriplesSerializer, ToWriteNTriplesWriter}; |
|
||||||
#[cfg(feature = "async-tokio")] |
|
||||||
use oxttl::trig::ToTokioAsyncWriteTriGWriter; |
|
||||||
use oxttl::trig::{ToWriteTriGWriter, TriGSerializer}; |
|
||||||
#[cfg(feature = "async-tokio")] |
|
||||||
use oxttl::turtle::ToTokioAsyncWriteTurtleWriter; |
|
||||||
use oxttl::turtle::{ToWriteTurtleWriter, TurtleSerializer}; |
|
||||||
use std::io::{self, Write}; |
|
||||||
#[cfg(feature = "async-tokio")] |
|
||||||
use tokio::io::AsyncWrite; |
|
||||||
|
|
||||||
/// A serializer for RDF serialization formats.
|
|
||||||
///
|
|
||||||
/// It currently supports the following formats:
|
|
||||||
/// * [N3](https://w3c.github.io/N3/spec/) ([`RdfFormat::N3`])
|
|
||||||
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`RdfFormat::NQuads`])
|
|
||||||
/// * [canonical](https://www.w3.org/TR/n-triples/#canonical-ntriples) [N-Triples](https://www.w3.org/TR/n-triples/) ([`RdfFormat::NTriples`])
|
|
||||||
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`RdfFormat::RdfXml`])
|
|
||||||
/// * [TriG](https://www.w3.org/TR/trig/) ([`RdfFormat::TriG`])
|
|
||||||
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`RdfFormat::Turtle`])
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxrdfio::{RdfFormat, RdfSerializer};
|
|
||||||
/// use oxrdf::{Quad, NamedNode};
|
|
||||||
///
|
|
||||||
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(Vec::new());
|
|
||||||
/// writer.write_quad(&Quad {
|
|
||||||
/// subject: NamedNode::new("http://example.com/s")?.into(),
|
|
||||||
/// predicate: NamedNode::new("http://example.com/p")?,
|
|
||||||
/// object: NamedNode::new("http://example.com/o")?.into(),
|
|
||||||
/// graph_name: NamedNode::new("http://example.com/g")?.into()
|
|
||||||
/// })?;
|
|
||||||
/// assert_eq!(writer.finish()?, b"<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
|
|
||||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
|
||||||
/// ```
|
|
||||||
#[must_use] |
|
||||||
pub struct RdfSerializer { |
|
||||||
inner: RdfSerializerKind, |
|
||||||
} |
|
||||||
|
|
||||||
enum RdfSerializerKind { |
|
||||||
NQuads(NQuadsSerializer), |
|
||||||
NTriples(NTriplesSerializer), |
|
||||||
RdfXml(RdfXmlSerializer), |
|
||||||
TriG(TriGSerializer), |
|
||||||
Turtle(TurtleSerializer), |
|
||||||
} |
|
||||||
|
|
||||||
impl RdfSerializer { |
|
||||||
/// Builds a serializer for the given format
|
|
||||||
#[inline] |
|
||||||
pub fn from_format(format: RdfFormat) -> Self { |
|
||||||
Self { |
|
||||||
inner: match format { |
|
||||||
RdfFormat::NQuads => RdfSerializerKind::NQuads(NQuadsSerializer::new()), |
|
||||||
RdfFormat::NTriples => RdfSerializerKind::NTriples(NTriplesSerializer::new()), |
|
||||||
RdfFormat::RdfXml => RdfSerializerKind::RdfXml(RdfXmlSerializer::new()), |
|
||||||
RdfFormat::TriG => RdfSerializerKind::TriG(TriGSerializer::new()), |
|
||||||
RdfFormat::Turtle | RdfFormat::N3 => { |
|
||||||
RdfSerializerKind::Turtle(TurtleSerializer::new()) |
|
||||||
} |
|
||||||
}, |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// The format the serializer serializes to.
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxrdfio::{RdfFormat, RdfSerializer};
|
|
||||||
///
|
|
||||||
/// assert_eq!(
|
|
||||||
/// RdfSerializer::from_format(RdfFormat::Turtle).format(),
|
|
||||||
/// RdfFormat::Turtle
|
|
||||||
/// );
|
|
||||||
/// ```
|
|
||||||
pub fn format(&self) -> RdfFormat { |
|
||||||
match &self.inner { |
|
||||||
RdfSerializerKind::NQuads(_) => RdfFormat::NQuads, |
|
||||||
RdfSerializerKind::NTriples(_) => RdfFormat::NTriples, |
|
||||||
RdfSerializerKind::RdfXml(_) => RdfFormat::RdfXml, |
|
||||||
RdfSerializerKind::TriG(_) => RdfFormat::TriG, |
|
||||||
RdfSerializerKind::Turtle(_) => RdfFormat::Turtle, |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// If the format supports it, sets a prefix.
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxrdf::vocab::rdf;
|
|
||||||
/// use oxrdf::{NamedNodeRef, TripleRef};
|
|
||||||
/// use oxrdfio::{RdfFormat, RdfSerializer};
|
|
||||||
///
|
|
||||||
/// let mut writer = RdfSerializer::from_format(RdfFormat::Turtle)
|
|
||||||
/// .with_prefix("schema", "http://schema.org/")?
|
|
||||||
/// .serialize_to_write(Vec::new());
|
|
||||||
/// writer.write_triple(TripleRef {
|
|
||||||
/// subject: NamedNodeRef::new("http://example.com/s")?.into(),
|
|
||||||
/// predicate: rdf::TYPE.into(),
|
|
||||||
/// object: NamedNodeRef::new("http://schema.org/Person")?.into(),
|
|
||||||
/// })?;
|
|
||||||
/// assert_eq!(
|
|
||||||
/// writer.finish()?,
|
|
||||||
/// b"@prefix schema: <http://schema.org/> .\n<http://example.com/s> a schema:Person .\n"
|
|
||||||
/// );
|
|
||||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
|
||||||
/// ```
|
|
||||||
#[inline] |
|
||||||
pub fn with_prefix( |
|
||||||
mut self, |
|
||||||
prefix_name: impl Into<String>, |
|
||||||
prefix_iri: impl Into<String>, |
|
||||||
) -> Result<Self, IriParseError> { |
|
||||||
self.inner = match self.inner { |
|
||||||
RdfSerializerKind::NQuads(s) => RdfSerializerKind::NQuads(s), |
|
||||||
RdfSerializerKind::NTriples(s) => RdfSerializerKind::NTriples(s), |
|
||||||
RdfSerializerKind::RdfXml(s) => { |
|
||||||
RdfSerializerKind::RdfXml(s.with_prefix(prefix_name, prefix_iri)?) |
|
||||||
} |
|
||||||
RdfSerializerKind::TriG(s) => { |
|
||||||
RdfSerializerKind::TriG(s.with_prefix(prefix_name, prefix_iri)?) |
|
||||||
} |
|
||||||
RdfSerializerKind::Turtle(s) => { |
|
||||||
RdfSerializerKind::Turtle(s.with_prefix(prefix_name, prefix_iri)?) |
|
||||||
} |
|
||||||
}; |
|
||||||
Ok(self) |
|
||||||
} |
|
||||||
|
|
||||||
/// Writes to a [`Write`] implementation.
|
|
||||||
///
|
|
||||||
/// <div class="warning">
|
|
||||||
///
|
|
||||||
/// Do not forget to run the [`finish`](ToWriteQuadWriter::finish()) method to properly write the last bytes of the file.</div>
|
|
||||||
///
|
|
||||||
/// <div class="warning">
|
|
||||||
///
|
|
||||||
/// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div>
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxrdfio::{RdfFormat, RdfSerializer};
|
|
||||||
/// use oxrdf::{Quad, NamedNode};
|
|
||||||
///
|
|
||||||
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(Vec::new());
|
|
||||||
/// writer.write_quad(&Quad {
|
|
||||||
/// subject: NamedNode::new("http://example.com/s")?.into(),
|
|
||||||
/// predicate: NamedNode::new("http://example.com/p")?,
|
|
||||||
/// object: NamedNode::new("http://example.com/o")?.into(),
|
|
||||||
/// graph_name: NamedNode::new("http://example.com/g")?.into()
|
|
||||||
/// })?;
|
|
||||||
/// assert_eq!(writer.finish()?, b"<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
|
|
||||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
|
||||||
/// ```
|
|
||||||
pub fn serialize_to_write<W: Write>(self, write: W) -> ToWriteQuadWriter<W> { |
|
||||||
ToWriteQuadWriter { |
|
||||||
formatter: match self.inner { |
|
||||||
RdfSerializerKind::NQuads(s) => { |
|
||||||
ToWriteQuadWriterKind::NQuads(s.serialize_to_write(write)) |
|
||||||
} |
|
||||||
RdfSerializerKind::NTriples(s) => { |
|
||||||
ToWriteQuadWriterKind::NTriples(s.serialize_to_write(write)) |
|
||||||
} |
|
||||||
RdfSerializerKind::RdfXml(s) => { |
|
||||||
ToWriteQuadWriterKind::RdfXml(s.serialize_to_write(write)) |
|
||||||
} |
|
||||||
RdfSerializerKind::TriG(s) => { |
|
||||||
ToWriteQuadWriterKind::TriG(s.serialize_to_write(write)) |
|
||||||
} |
|
||||||
RdfSerializerKind::Turtle(s) => { |
|
||||||
ToWriteQuadWriterKind::Turtle(s.serialize_to_write(write)) |
|
||||||
} |
|
||||||
}, |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// Writes to a Tokio [`AsyncWrite`] implementation.
|
|
||||||
///
|
|
||||||
/// <div class="warning">
|
|
||||||
///
|
|
||||||
/// Do not forget to run the [`finish`](ToTokioAsyncWriteQuadWriter::finish()) method to properly write the last bytes of the file.</div>
|
|
||||||
///
|
|
||||||
/// <div class="warning">
|
|
||||||
///
|
|
||||||
/// This writer does unbuffered writes. You might want to use [`BufWriter`](tokio::io::BufWriter) to avoid that.</div>
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxrdfio::{RdfFormat, RdfSerializer};
|
|
||||||
/// use oxrdf::{Quad, NamedNode};
|
|
||||||
///
|
|
||||||
/// # #[tokio::main(flavor = "current_thread")]
|
|
||||||
/// # async fn main() -> std::io::Result<()> {
|
|
||||||
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_tokio_async_write(Vec::new());
|
|
||||||
/// writer.write_quad(&Quad {
|
|
||||||
/// subject: NamedNode::new_unchecked("http://example.com/s").into(),
|
|
||||||
/// predicate: NamedNode::new_unchecked("http://example.com/p"),
|
|
||||||
/// object: NamedNode::new_unchecked("http://example.com/o").into(),
|
|
||||||
/// graph_name: NamedNode::new_unchecked("http://example.com/g").into()
|
|
||||||
/// }).await?;
|
|
||||||
/// assert_eq!(writer.finish().await?, "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
|
|
||||||
/// # Ok(())
|
|
||||||
/// # }
|
|
||||||
/// ```
|
|
||||||
#[cfg(feature = "async-tokio")] |
|
||||||
pub fn serialize_to_tokio_async_write<W: AsyncWrite + Unpin>( |
|
||||||
self, |
|
||||||
write: W, |
|
||||||
) -> ToTokioAsyncWriteQuadWriter<W> { |
|
||||||
ToTokioAsyncWriteQuadWriter { |
|
||||||
formatter: match self.inner { |
|
||||||
RdfSerializerKind::NQuads(s) => { |
|
||||||
ToTokioAsyncWriteQuadWriterKind::NQuads(s.serialize_to_tokio_async_write(write)) |
|
||||||
} |
|
||||||
RdfSerializerKind::NTriples(s) => ToTokioAsyncWriteQuadWriterKind::NTriples( |
|
||||||
s.serialize_to_tokio_async_write(write), |
|
||||||
), |
|
||||||
RdfSerializerKind::RdfXml(s) => { |
|
||||||
ToTokioAsyncWriteQuadWriterKind::RdfXml(s.serialize_to_tokio_async_write(write)) |
|
||||||
} |
|
||||||
RdfSerializerKind::TriG(s) => { |
|
||||||
ToTokioAsyncWriteQuadWriterKind::TriG(s.serialize_to_tokio_async_write(write)) |
|
||||||
} |
|
||||||
RdfSerializerKind::Turtle(s) => { |
|
||||||
ToTokioAsyncWriteQuadWriterKind::Turtle(s.serialize_to_tokio_async_write(write)) |
|
||||||
} |
|
||||||
}, |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
impl From<RdfFormat> for RdfSerializer { |
|
||||||
fn from(format: RdfFormat) -> Self { |
|
||||||
Self::from_format(format) |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// Writes quads or triples to a [`Write`] implementation.
|
|
||||||
///
|
|
||||||
/// Can be built using [`RdfSerializer::serialize_to_write`].
|
|
||||||
///
|
|
||||||
/// <div class="warning">
|
|
||||||
///
|
|
||||||
/// Do not forget to run the [`finish`](ToWriteQuadWriter::finish()) method to properly write the last bytes of the file.</div>
|
|
||||||
///
|
|
||||||
/// <div class="warning">
|
|
||||||
///
|
|
||||||
/// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div>
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxrdfio::{RdfFormat, RdfSerializer};
|
|
||||||
/// use oxrdf::{Quad, NamedNode};
|
|
||||||
///
|
|
||||||
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(Vec::new());
|
|
||||||
/// writer.write_quad(&Quad {
|
|
||||||
/// subject: NamedNode::new("http://example.com/s")?.into(),
|
|
||||||
/// predicate: NamedNode::new("http://example.com/p")?,
|
|
||||||
/// object: NamedNode::new("http://example.com/o")?.into(),
|
|
||||||
/// graph_name: NamedNode::new("http://example.com/g")?.into(),
|
|
||||||
/// })?;
|
|
||||||
/// assert_eq!(writer.finish()?, b"<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
|
|
||||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
|
||||||
/// ```
|
|
||||||
#[must_use] |
|
||||||
pub struct ToWriteQuadWriter<W: Write> { |
|
||||||
formatter: ToWriteQuadWriterKind<W>, |
|
||||||
} |
|
||||||
|
|
||||||
enum ToWriteQuadWriterKind<W: Write> { |
|
||||||
NQuads(ToWriteNQuadsWriter<W>), |
|
||||||
NTriples(ToWriteNTriplesWriter<W>), |
|
||||||
RdfXml(ToWriteRdfXmlWriter<W>), |
|
||||||
TriG(ToWriteTriGWriter<W>), |
|
||||||
Turtle(ToWriteTurtleWriter<W>), |
|
||||||
} |
|
||||||
|
|
||||||
impl<W: Write> ToWriteQuadWriter<W> { |
|
||||||
/// Writes a [`QuadRef`]
|
|
||||||
pub fn write_quad<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> io::Result<()> { |
|
||||||
match &mut self.formatter { |
|
||||||
ToWriteQuadWriterKind::NQuads(writer) => writer.write_quad(quad), |
|
||||||
ToWriteQuadWriterKind::NTriples(writer) => writer.write_triple(to_triple(quad)?), |
|
||||||
ToWriteQuadWriterKind::RdfXml(writer) => writer.write_triple(to_triple(quad)?), |
|
||||||
ToWriteQuadWriterKind::TriG(writer) => writer.write_quad(quad), |
|
||||||
ToWriteQuadWriterKind::Turtle(writer) => writer.write_triple(to_triple(quad)?), |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// Writes a [`TripleRef`]
|
|
||||||
pub fn write_triple<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> io::Result<()> { |
|
||||||
self.write_quad(triple.into().in_graph(GraphNameRef::DefaultGraph)) |
|
||||||
} |
|
||||||
|
|
||||||
/// Writes the last bytes of the file
|
|
||||||
///
|
|
||||||
/// Note that this function does not flush the writer. You need to do that if you are using a [`BufWriter`](io::BufWriter).
|
|
||||||
pub fn finish(self) -> io::Result<W> { |
|
||||||
Ok(match self.formatter { |
|
||||||
ToWriteQuadWriterKind::NQuads(writer) => writer.finish(), |
|
||||||
ToWriteQuadWriterKind::NTriples(writer) => writer.finish(), |
|
||||||
ToWriteQuadWriterKind::RdfXml(writer) => writer.finish()?, |
|
||||||
ToWriteQuadWriterKind::TriG(writer) => writer.finish()?, |
|
||||||
ToWriteQuadWriterKind::Turtle(writer) => writer.finish()?, |
|
||||||
}) |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// Writes quads or triples to a [`Write`] implementation.
|
|
||||||
///
|
|
||||||
/// Can be built using [`RdfSerializer::serialize_to_write`].
|
|
||||||
///
|
|
||||||
/// <div class="warning">
|
|
||||||
///
|
|
||||||
/// Do not forget to run the [`finish`](ToWriteQuadWriter::finish()) method to properly write the last bytes of the file.</div>
|
|
||||||
///
|
|
||||||
/// <div class="warning">
|
|
||||||
///
|
|
||||||
/// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div>
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxrdfio::{RdfFormat, RdfSerializer};
|
|
||||||
/// use oxrdf::{Quad, NamedNode};
|
|
||||||
///
|
|
||||||
/// # #[tokio::main(flavor = "current_thread")]
|
|
||||||
/// # async fn main() -> std::io::Result<()> {
|
|
||||||
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_tokio_async_write(Vec::new());
|
|
||||||
/// writer.write_quad(&Quad {
|
|
||||||
/// subject: NamedNode::new_unchecked("http://example.com/s").into(),
|
|
||||||
/// predicate: NamedNode::new_unchecked("http://example.com/p"),
|
|
||||||
/// object: NamedNode::new_unchecked("http://example.com/o").into(),
|
|
||||||
/// graph_name: NamedNode::new_unchecked("http://example.com/g").into()
|
|
||||||
/// }).await?;
|
|
||||||
/// assert_eq!(writer.finish().await?, "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
|
|
||||||
/// # Ok(())
|
|
||||||
/// # }
|
|
||||||
/// ```
|
|
||||||
#[must_use] |
|
||||||
#[cfg(feature = "async-tokio")] |
|
||||||
pub struct ToTokioAsyncWriteQuadWriter<W: AsyncWrite + Unpin> { |
|
||||||
formatter: ToTokioAsyncWriteQuadWriterKind<W>, |
|
||||||
} |
|
||||||
|
|
||||||
#[cfg(feature = "async-tokio")] |
|
||||||
enum ToTokioAsyncWriteQuadWriterKind<W: AsyncWrite + Unpin> { |
|
||||||
NQuads(ToTokioAsyncWriteNQuadsWriter<W>), |
|
||||||
NTriples(ToTokioAsyncWriteNTriplesWriter<W>), |
|
||||||
RdfXml(ToTokioAsyncWriteRdfXmlWriter<W>), |
|
||||||
TriG(ToTokioAsyncWriteTriGWriter<W>), |
|
||||||
Turtle(ToTokioAsyncWriteTurtleWriter<W>), |
|
||||||
} |
|
||||||
|
|
||||||
#[cfg(feature = "async-tokio")] |
|
||||||
impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteQuadWriter<W> { |
|
||||||
/// Writes a [`QuadRef`]
|
|
||||||
pub async fn write_quad<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> io::Result<()> { |
|
||||||
match &mut self.formatter { |
|
||||||
ToTokioAsyncWriteQuadWriterKind::NQuads(writer) => writer.write_quad(quad).await, |
|
||||||
ToTokioAsyncWriteQuadWriterKind::NTriples(writer) => { |
|
||||||
writer.write_triple(to_triple(quad)?).await |
|
||||||
} |
|
||||||
ToTokioAsyncWriteQuadWriterKind::RdfXml(writer) => { |
|
||||||
writer.write_triple(to_triple(quad)?).await |
|
||||||
} |
|
||||||
ToTokioAsyncWriteQuadWriterKind::TriG(writer) => writer.write_quad(quad).await, |
|
||||||
ToTokioAsyncWriteQuadWriterKind::Turtle(writer) => { |
|
||||||
writer.write_triple(to_triple(quad)?).await |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// Writes a [`TripleRef`]
|
|
||||||
pub async fn write_triple<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> io::Result<()> { |
|
||||||
self.write_quad(triple.into().in_graph(GraphNameRef::DefaultGraph)) |
|
||||||
.await |
|
||||||
} |
|
||||||
|
|
||||||
/// Writes the last bytes of the file
|
|
||||||
///
|
|
||||||
/// Note that this function does not flush the writer. You need to do that if you are using a [`BufWriter`](io::BufWriter).
|
|
||||||
pub async fn finish(self) -> io::Result<W> { |
|
||||||
Ok(match self.formatter { |
|
||||||
ToTokioAsyncWriteQuadWriterKind::NQuads(writer) => writer.finish(), |
|
||||||
ToTokioAsyncWriteQuadWriterKind::NTriples(writer) => writer.finish(), |
|
||||||
ToTokioAsyncWriteQuadWriterKind::RdfXml(writer) => writer.finish().await?, |
|
||||||
ToTokioAsyncWriteQuadWriterKind::TriG(writer) => writer.finish().await?, |
|
||||||
ToTokioAsyncWriteQuadWriterKind::Turtle(writer) => writer.finish().await?, |
|
||||||
}) |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
fn to_triple<'a>(quad: impl Into<QuadRef<'a>>) -> io::Result<TripleRef<'a>> { |
|
||||||
let quad = quad.into(); |
|
||||||
if quad.graph_name.is_default_graph() { |
|
||||||
Ok(quad.into()) |
|
||||||
} else { |
|
||||||
Err(io::Error::new( |
|
||||||
io::ErrorKind::InvalidInput, |
|
||||||
"Only quads in the default graph can be serialized to a RDF graph format", |
|
||||||
)) |
|
||||||
} |
|
||||||
} |
|
@ -1,36 +0,0 @@ |
|||||||
[package] |
|
||||||
name = "oxrdfxml" |
|
||||||
version = "0.1.0-alpha.5" |
|
||||||
authors.workspace = true |
|
||||||
license.workspace = true |
|
||||||
readme = "README.md" |
|
||||||
keywords = ["RDFXML", "XML", "RDF"] |
|
||||||
repository = "https://github.com/oxigraph/oxigraph/tree/master/lib/oxrdfxml" |
|
||||||
description = """ |
|
||||||
Parser and serializer for the RDF/XML format |
|
||||||
""" |
|
||||||
documentation = "https://docs.rs/oxrdfxml" |
|
||||||
edition.workspace = true |
|
||||||
rust-version.workspace = true |
|
||||||
|
|
||||||
[features] |
|
||||||
default = [] |
|
||||||
async-tokio = ["dep:tokio", "quick-xml/async-tokio"] |
|
||||||
|
|
||||||
[dependencies] |
|
||||||
oxilangtag.workspace = true |
|
||||||
oxiri.workspace = true |
|
||||||
oxrdf.workspace = true |
|
||||||
quick-xml.workspace = true |
|
||||||
thiserror.workspace = true |
|
||||||
tokio = { workspace = true, optional = true, features = ["io-util"] } |
|
||||||
|
|
||||||
[dev-dependencies] |
|
||||||
tokio = { workspace = true, features = ["rt", "macros"] } |
|
||||||
|
|
||||||
[lints] |
|
||||||
workspace = true |
|
||||||
|
|
||||||
[package.metadata.docs.rs] |
|
||||||
all-features = true |
|
||||||
rustdoc-args = ["--cfg", "docsrs"] |
|
@ -1,56 +0,0 @@ |
|||||||
OxRDF/XML |
|
||||||
========= |
|
||||||
|
|
||||||
[![Latest Version](https://img.shields.io/crates/v/oxrdfxml.svg)](https://crates.io/crates/oxrdfxml) |
|
||||||
[![Released API docs](https://docs.rs/oxrdfxml/badge.svg)](https://docs.rs/oxrdfxml) |
|
||||||
[![Crates.io downloads](https://img.shields.io/crates/d/oxrdfxml)](https://crates.io/crates/oxrdfxml) |
|
||||||
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) |
|
||||||
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community) |
|
||||||
|
|
||||||
OxRdfXml is a parser and serializer for [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/). |
|
||||||
|
|
||||||
The entry points of this library are the two [`RdfXmlParser`] and [`RdfXmlSerializer`] structs. |
|
||||||
|
|
||||||
Usage example counting the number of people in a RDF/XML file: |
|
||||||
|
|
||||||
```rust |
|
||||||
use oxrdf::{NamedNodeRef, vocab::rdf}; |
|
||||||
use oxrdfxml::RdfXmlParser; |
|
||||||
|
|
||||||
fn main() { |
|
||||||
let file = br#"<?xml version="1.0"?> |
|
||||||
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:schema="http://schema.org/"> |
|
||||||
<rdf:Description rdf:about="http://example.com/foo"> |
|
||||||
<rdf:type rdf:resource="http://schema.org/Person" /> |
|
||||||
<schema:name>Foo</schema:name> |
|
||||||
</rdf:Description> |
|
||||||
<schema:Person rdf:about="http://example.com/bar" schema:name="Bar" /> |
|
||||||
</rdf:RDF>"#; |
|
||||||
|
|
||||||
let schema_person = NamedNodeRef::new("http://schema.org/Person").unwrap(); |
|
||||||
let mut count = 0; |
|
||||||
for triple in RdfXmlParser::new().parse_read(file.as_ref()) { |
|
||||||
let triple = triple.unwrap(); |
|
||||||
if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { |
|
||||||
count += 1; |
|
||||||
} |
|
||||||
} |
|
||||||
assert_eq!(2, count); |
|
||||||
} |
|
||||||
``` |
|
||||||
|
|
||||||
## License |
|
||||||
|
|
||||||
This project is licensed under either of |
|
||||||
|
|
||||||
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or |
|
||||||
`<http://www.apache.org/licenses/LICENSE-2.0>`) |
|
||||||
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or |
|
||||||
`<http://opensource.org/licenses/MIT>`) |
|
||||||
|
|
||||||
at your option. |
|
||||||
|
|
||||||
|
|
||||||
### Contribution |
|
||||||
|
|
||||||
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. |
|
@ -1,89 +0,0 @@ |
|||||||
use oxilangtag::LanguageTagParseError; |
|
||||||
use oxiri::IriParseError; |
|
||||||
use std::io; |
|
||||||
use std::sync::Arc; |
|
||||||
|
|
||||||
/// Error returned during RDF/XML parsing.
|
|
||||||
#[derive(Debug, thiserror::Error)] |
|
||||||
pub enum RdfXmlParseError { |
|
||||||
/// I/O error during parsing (file not found...).
|
|
||||||
#[error(transparent)] |
|
||||||
Io(#[from] io::Error), |
|
||||||
/// An error in the file syntax.
|
|
||||||
#[error(transparent)] |
|
||||||
Syntax(#[from] RdfXmlSyntaxError), |
|
||||||
} |
|
||||||
|
|
||||||
impl From<RdfXmlParseError> for io::Error { |
|
||||||
#[inline] |
|
||||||
fn from(error: RdfXmlParseError) -> Self { |
|
||||||
match error { |
|
||||||
RdfXmlParseError::Io(error) => error, |
|
||||||
RdfXmlParseError::Syntax(error) => error.into(), |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
impl From<quick_xml::Error> for RdfXmlParseError { |
|
||||||
#[inline] |
|
||||||
fn from(error: quick_xml::Error) -> Self { |
|
||||||
match error { |
|
||||||
quick_xml::Error::Io(error) => { |
|
||||||
Self::Io(Arc::try_unwrap(error).unwrap_or_else(|e| io::Error::new(e.kind(), e))) |
|
||||||
} |
|
||||||
_ => Self::Syntax(RdfXmlSyntaxError(SyntaxErrorKind::Xml(error))), |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// An error in the syntax of the parsed file.
|
|
||||||
#[derive(Debug, thiserror::Error)] |
|
||||||
#[error(transparent)] |
|
||||||
pub struct RdfXmlSyntaxError(#[from] pub(crate) SyntaxErrorKind); |
|
||||||
|
|
||||||
#[derive(Debug, thiserror::Error)] |
|
||||||
pub enum SyntaxErrorKind { |
|
||||||
#[error(transparent)] |
|
||||||
Xml(#[from] quick_xml::Error), |
|
||||||
#[error("error while parsing IRI '{iri}': {error}")] |
|
||||||
InvalidIri { |
|
||||||
iri: String, |
|
||||||
#[source] |
|
||||||
error: IriParseError, |
|
||||||
}, |
|
||||||
#[error("error while parsing language tag '{tag}': {error}")] |
|
||||||
InvalidLanguageTag { |
|
||||||
tag: String, |
|
||||||
#[source] |
|
||||||
error: LanguageTagParseError, |
|
||||||
}, |
|
||||||
#[error("{0}")] |
|
||||||
Msg(String), |
|
||||||
} |
|
||||||
|
|
||||||
impl RdfXmlSyntaxError { |
|
||||||
/// Builds an error from a printable error message.
|
|
||||||
#[inline] |
|
||||||
pub(crate) fn msg(msg: impl Into<String>) -> Self { |
|
||||||
Self(SyntaxErrorKind::Msg(msg.into())) |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
impl From<RdfXmlSyntaxError> for io::Error { |
|
||||||
#[inline] |
|
||||||
fn from(error: RdfXmlSyntaxError) -> Self { |
|
||||||
match error.0 { |
|
||||||
SyntaxErrorKind::Xml(error) => match error { |
|
||||||
quick_xml::Error::Io(error) => { |
|
||||||
Arc::try_unwrap(error).unwrap_or_else(|e| Self::new(e.kind(), e)) |
|
||||||
} |
|
||||||
quick_xml::Error::UnexpectedEof(error) => { |
|
||||||
Self::new(io::ErrorKind::UnexpectedEof, error) |
|
||||||
} |
|
||||||
_ => Self::new(io::ErrorKind::InvalidData, error), |
|
||||||
}, |
|
||||||
SyntaxErrorKind::Msg(msg) => Self::new(io::ErrorKind::InvalidData, msg), |
|
||||||
_ => Self::new(io::ErrorKind::InvalidData, error), |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
@ -1,18 +0,0 @@ |
|||||||
#![doc = include_str!("../README.md")] |
|
||||||
#![doc(test(attr(deny(warnings))))] |
|
||||||
#![cfg_attr(docsrs, feature(doc_auto_cfg))] |
|
||||||
#![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] |
|
||||||
#![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] |
|
||||||
|
|
||||||
mod error; |
|
||||||
mod parser; |
|
||||||
mod serializer; |
|
||||||
mod utils; |
|
||||||
|
|
||||||
pub use error::{RdfXmlParseError, RdfXmlSyntaxError}; |
|
||||||
#[cfg(feature = "async-tokio")] |
|
||||||
pub use parser::FromTokioAsyncReadRdfXmlReader; |
|
||||||
pub use parser::{FromReadRdfXmlReader, RdfXmlParser}; |
|
||||||
#[cfg(feature = "async-tokio")] |
|
||||||
pub use serializer::ToTokioAsyncWriteRdfXmlWriter; |
|
||||||
pub use serializer::{RdfXmlSerializer, ToWriteRdfXmlWriter}; |
|
File diff suppressed because it is too large
Load Diff
@ -1,461 +0,0 @@ |
|||||||
use crate::utils::*; |
|
||||||
use oxiri::{Iri, IriParseError}; |
|
||||||
use oxrdf::vocab::rdf; |
|
||||||
use oxrdf::{NamedNodeRef, Subject, SubjectRef, TermRef, TripleRef}; |
|
||||||
use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event}; |
|
||||||
use quick_xml::Writer; |
|
||||||
use std::borrow::Cow; |
|
||||||
use std::collections::BTreeMap; |
|
||||||
use std::io; |
|
||||||
use std::io::Write; |
|
||||||
use std::sync::Arc; |
|
||||||
#[cfg(feature = "async-tokio")] |
|
||||||
use tokio::io::AsyncWrite; |
|
||||||
|
|
||||||
/// A [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) serializer.
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxrdf::{LiteralRef, NamedNodeRef, TripleRef};
|
|
||||||
/// use oxrdfxml::RdfXmlSerializer;
|
|
||||||
///
|
|
||||||
/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_write(Vec::new());
|
|
||||||
/// writer.write_triple(TripleRef::new(
|
|
||||||
/// NamedNodeRef::new("http://example.com#me")?,
|
|
||||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
|
||||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
|
||||||
/// ))?;
|
|
||||||
/// writer.write_triple(TripleRef::new(
|
|
||||||
/// NamedNodeRef::new("http://example.com#me")?,
|
|
||||||
/// NamedNodeRef::new("http://schema.org/name")?,
|
|
||||||
/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"),
|
|
||||||
/// ))?;
|
|
||||||
/// assert_eq!(
|
|
||||||
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>",
|
|
||||||
/// writer.finish()?.as_slice()
|
|
||||||
/// );
|
|
||||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
|
||||||
/// ```
|
|
||||||
#[derive(Default)] |
|
||||||
#[must_use] |
|
||||||
pub struct RdfXmlSerializer { |
|
||||||
prefixes: BTreeMap<String, String>, |
|
||||||
} |
|
||||||
|
|
||||||
impl RdfXmlSerializer { |
|
||||||
/// Builds a new [`RdfXmlSerializer`].
|
|
||||||
#[inline] |
|
||||||
pub fn new() -> Self { |
|
||||||
Self { |
|
||||||
prefixes: BTreeMap::new(), |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
#[inline] |
|
||||||
pub fn with_prefix( |
|
||||||
mut self, |
|
||||||
prefix_name: impl Into<String>, |
|
||||||
prefix_iri: impl Into<String>, |
|
||||||
) -> Result<Self, IriParseError> { |
|
||||||
self.prefixes.insert( |
|
||||||
Iri::parse(prefix_iri.into())?.into_inner(), |
|
||||||
prefix_name.into(), |
|
||||||
); |
|
||||||
Ok(self) |
|
||||||
} |
|
||||||
|
|
||||||
/// Writes a RDF/XML file to a [`Write`] implementation.
|
|
||||||
///
|
|
||||||
/// This writer does unbuffered writes.
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxrdf::{LiteralRef, NamedNodeRef, TripleRef};
|
|
||||||
/// use oxrdfxml::RdfXmlSerializer;
|
|
||||||
///
|
|
||||||
/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_write(Vec::new());
|
|
||||||
/// writer.write_triple(TripleRef::new(
|
|
||||||
/// NamedNodeRef::new("http://example.com#me")?,
|
|
||||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
|
||||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
|
||||||
/// ))?;
|
|
||||||
/// writer.write_triple(TripleRef::new(
|
|
||||||
/// NamedNodeRef::new("http://example.com#me")?,
|
|
||||||
/// NamedNodeRef::new("http://schema.org/name")?,
|
|
||||||
/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"),
|
|
||||||
/// ))?;
|
|
||||||
/// assert_eq!(
|
|
||||||
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>",
|
|
||||||
/// writer.finish()?.as_slice()
|
|
||||||
/// );
|
|
||||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
|
||||||
/// ```
|
|
||||||
#[allow(clippy::unused_self)] |
|
||||||
pub fn serialize_to_write<W: Write>(self, write: W) -> ToWriteRdfXmlWriter<W> { |
|
||||||
ToWriteRdfXmlWriter { |
|
||||||
writer: Writer::new_with_indent(write, b'\t', 1), |
|
||||||
inner: self.inner_writer(), |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// Writes a RDF/XML file to a [`AsyncWrite`] implementation.
|
|
||||||
///
|
|
||||||
/// This writer does unbuffered writes.
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxrdf::{NamedNodeRef, TripleRef, LiteralRef};
|
|
||||||
/// use oxrdfxml::RdfXmlSerializer;
|
|
||||||
///
|
|
||||||
/// # #[tokio::main(flavor = "current_thread")]
|
|
||||||
/// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|
||||||
/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_tokio_async_write(Vec::new());
|
|
||||||
/// writer.write_triple(TripleRef::new(
|
|
||||||
/// NamedNodeRef::new("http://example.com#me")?,
|
|
||||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
|
||||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
|
||||||
/// )).await?;
|
|
||||||
/// writer.write_triple(TripleRef::new(
|
|
||||||
/// NamedNodeRef::new("http://example.com#me")?,
|
|
||||||
/// NamedNodeRef::new("http://schema.org/name")?,
|
|
||||||
/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"),
|
|
||||||
/// )).await?;
|
|
||||||
/// assert_eq!(
|
|
||||||
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>",
|
|
||||||
/// writer.finish().await?.as_slice()
|
|
||||||
/// );
|
|
||||||
/// # Ok(())
|
|
||||||
/// # }
|
|
||||||
/// ```
|
|
||||||
#[allow(clippy::unused_self)] |
|
||||||
#[cfg(feature = "async-tokio")] |
|
||||||
pub fn serialize_to_tokio_async_write<W: AsyncWrite + Unpin>( |
|
||||||
self, |
|
||||||
write: W, |
|
||||||
) -> ToTokioAsyncWriteRdfXmlWriter<W> { |
|
||||||
ToTokioAsyncWriteRdfXmlWriter { |
|
||||||
writer: Writer::new_with_indent(write, b'\t', 1), |
|
||||||
inner: self.inner_writer(), |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
fn inner_writer(mut self) -> InnerRdfXmlWriter { |
|
||||||
self.prefixes.insert( |
|
||||||
"http://www.w3.org/1999/02/22-rdf-syntax-ns#".into(), |
|
||||||
"rdf".into(), |
|
||||||
); |
|
||||||
InnerRdfXmlWriter { |
|
||||||
current_subject: None, |
|
||||||
current_resource_tag: None, |
|
||||||
prefixes: self.prefixes, |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// Writes a RDF/XML file to a [`Write`] implementation. Can be built using [`RdfXmlSerializer::serialize_to_write`].
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxrdf::{LiteralRef, NamedNodeRef, TripleRef};
|
|
||||||
/// use oxrdfxml::RdfXmlSerializer;
|
|
||||||
///
|
|
||||||
/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_write(Vec::new());
|
|
||||||
/// writer.write_triple(TripleRef::new(
|
|
||||||
/// NamedNodeRef::new("http://example.com#me")?,
|
|
||||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
|
||||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
|
||||||
/// ))?;
|
|
||||||
/// writer.write_triple(TripleRef::new(
|
|
||||||
/// NamedNodeRef::new("http://example.com#me")?,
|
|
||||||
/// NamedNodeRef::new("http://schema.org/name")?,
|
|
||||||
/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"),
|
|
||||||
/// ))?;
|
|
||||||
/// assert_eq!(
|
|
||||||
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>",
|
|
||||||
/// writer.finish()?.as_slice()
|
|
||||||
/// );
|
|
||||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
|
||||||
/// ```
|
|
||||||
#[must_use] |
|
||||||
pub struct ToWriteRdfXmlWriter<W: Write> { |
|
||||||
writer: Writer<W>, |
|
||||||
inner: InnerRdfXmlWriter, |
|
||||||
} |
|
||||||
|
|
||||||
impl<W: Write> ToWriteRdfXmlWriter<W> { |
|
||||||
/// Writes an extra triple.
|
|
||||||
#[allow(clippy::match_wildcard_for_single_variants, unreachable_patterns)] |
|
||||||
pub fn write_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> { |
|
||||||
let mut buffer = Vec::new(); |
|
||||||
self.inner.write_triple(t, &mut buffer)?; |
|
||||||
self.flush_buffer(&mut buffer) |
|
||||||
} |
|
||||||
|
|
||||||
/// Ends the write process and returns the underlying [`Write`].
|
|
||||||
pub fn finish(mut self) -> io::Result<W> { |
|
||||||
let mut buffer = Vec::new(); |
|
||||||
self.inner.finish(&mut buffer); |
|
||||||
self.flush_buffer(&mut buffer)?; |
|
||||||
Ok(self.writer.into_inner()) |
|
||||||
} |
|
||||||
|
|
||||||
fn flush_buffer(&mut self, buffer: &mut Vec<Event<'_>>) -> io::Result<()> { |
|
||||||
for event in buffer.drain(0..) { |
|
||||||
self.writer.write_event(event).map_err(map_err)?; |
|
||||||
} |
|
||||||
Ok(()) |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/// Writes a RDF/XML file to a [`AsyncWrite`] implementation. Can be built using [`RdfXmlSerializer::serialize_to_tokio_async_write`].
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use oxrdf::{NamedNodeRef, TripleRef, LiteralRef};
|
|
||||||
/// use oxrdfxml::RdfXmlSerializer;
|
|
||||||
///
|
|
||||||
/// # #[tokio::main(flavor = "current_thread")]
|
|
||||||
/// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|
||||||
/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_tokio_async_write(Vec::new());
|
|
||||||
/// writer.write_triple(TripleRef::new(
|
|
||||||
/// NamedNodeRef::new("http://example.com#me")?,
|
|
||||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
|
||||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
|
||||||
/// )).await?;
|
|
||||||
/// writer.write_triple(TripleRef::new(
|
|
||||||
/// NamedNodeRef::new("http://example.com#me")?,
|
|
||||||
/// NamedNodeRef::new("http://schema.org/name")?,
|
|
||||||
/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"),
|
|
||||||
/// )).await?;
|
|
||||||
/// assert_eq!(
|
|
||||||
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>",
|
|
||||||
/// writer.finish().await?.as_slice()
|
|
||||||
/// );
|
|
||||||
/// # Ok(())
|
|
||||||
/// # }
|
|
||||||
/// ```
|
|
||||||
#[cfg(feature = "async-tokio")] |
|
||||||
#[must_use] |
|
||||||
pub struct ToTokioAsyncWriteRdfXmlWriter<W: AsyncWrite + Unpin> { |
|
||||||
writer: Writer<W>, |
|
||||||
inner: InnerRdfXmlWriter, |
|
||||||
} |
|
||||||
|
|
||||||
#[cfg(feature = "async-tokio")] |
|
||||||
impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteRdfXmlWriter<W> { |
|
||||||
/// Writes an extra triple.
|
|
||||||
#[allow(clippy::match_wildcard_for_single_variants, unreachable_patterns)] |
|
||||||
pub async fn write_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> { |
|
||||||
let mut buffer = Vec::new(); |
|
||||||
self.inner.write_triple(t, &mut buffer)?; |
|
||||||
self.flush_buffer(&mut buffer).await |
|
||||||
} |
|
||||||
|
|
||||||
/// Ends the write process and returns the underlying [`Write`].
|
|
||||||
pub async fn finish(mut self) -> io::Result<W> { |
|
||||||
let mut buffer = Vec::new(); |
|
||||||
self.inner.finish(&mut buffer); |
|
||||||
self.flush_buffer(&mut buffer).await?; |
|
||||||
Ok(self.writer.into_inner()) |
|
||||||
} |
|
||||||
|
|
||||||
async fn flush_buffer(&mut self, buffer: &mut Vec<Event<'_>>) -> io::Result<()> { |
|
||||||
for event in buffer.drain(0..) { |
|
||||||
self.writer |
|
||||||
.write_event_async(event) |
|
||||||
.await |
|
||||||
.map_err(map_err)?; |
|
||||||
} |
|
||||||
Ok(()) |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
pub struct InnerRdfXmlWriter { |
|
||||||
current_subject: Option<Subject>, |
|
||||||
current_resource_tag: Option<String>, |
|
||||||
prefixes: BTreeMap<String, String>, |
|
||||||
} |
|
||||||
|
|
||||||
impl InnerRdfXmlWriter { |
|
||||||
#[allow(clippy::match_wildcard_for_single_variants, unreachable_patterns)] |
|
||||||
fn write_triple<'a>( |
|
||||||
&mut self, |
|
||||||
t: impl Into<TripleRef<'a>>, |
|
||||||
output: &mut Vec<Event<'a>>, |
|
||||||
) -> io::Result<()> { |
|
||||||
if self.current_subject.is_none() { |
|
||||||
self.write_start(output); |
|
||||||
} |
|
||||||
|
|
||||||
let triple = t.into(); |
|
||||||
// We open a new rdf:Description if useful
|
|
||||||
if self.current_subject.as_ref().map(Subject::as_ref) != Some(triple.subject) { |
|
||||||
if self.current_subject.is_some() { |
|
||||||
output.push(Event::End( |
|
||||||
self.current_resource_tag |
|
||||||
.take() |
|
||||||
.map_or_else(|| BytesEnd::new("rdf:Description"), BytesEnd::new), |
|
||||||
)); |
|
||||||
} |
|
||||||
self.current_subject = Some(triple.subject.into_owned()); |
|
||||||
|
|
||||||
let (mut description_open, with_type_tag) = if triple.predicate == rdf::TYPE { |
|
||||||
if let TermRef::NamedNode(t) = triple.object { |
|
||||||
let (prop_qname, prop_xmlns) = self.uri_to_qname_and_xmlns(t); |
|
||||||
let mut description_open = BytesStart::new(prop_qname.clone()); |
|
||||||
if let Some(prop_xmlns) = prop_xmlns { |
|
||||||
description_open.push_attribute(prop_xmlns); |
|
||||||
} |
|
||||||
self.current_resource_tag = Some(prop_qname.into_owned()); |
|
||||||
(description_open, true) |
|
||||||
} else { |
|
||||||
(BytesStart::new("rdf:Description"), false) |
|
||||||
} |
|
||||||
} else { |
|
||||||
(BytesStart::new("rdf:Description"), false) |
|
||||||
}; |
|
||||||
match triple.subject { |
|
||||||
SubjectRef::NamedNode(node) => { |
|
||||||
description_open.push_attribute(("rdf:about", node.as_str())) |
|
||||||
} |
|
||||||
SubjectRef::BlankNode(node) => { |
|
||||||
description_open.push_attribute(("rdf:nodeID", node.as_str())) |
|
||||||
} |
|
||||||
_ => { |
|
||||||
return Err(io::Error::new( |
|
||||||
io::ErrorKind::InvalidInput, |
|
||||||
"RDF/XML only supports named or blank subject", |
|
||||||
)) |
|
||||||
} |
|
||||||
} |
|
||||||
output.push(Event::Start(description_open)); |
|
||||||
if with_type_tag { |
|
||||||
return Ok(()); // No need for a value
|
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
let (prop_qname, prop_xmlns) = self.uri_to_qname_and_xmlns(triple.predicate); |
|
||||||
let mut property_open = BytesStart::new(prop_qname.clone()); |
|
||||||
if let Some(prop_xmlns) = prop_xmlns { |
|
||||||
property_open.push_attribute(prop_xmlns); |
|
||||||
} |
|
||||||
let content = match triple.object { |
|
||||||
TermRef::NamedNode(node) => { |
|
||||||
property_open.push_attribute(("rdf:resource", node.as_str())); |
|
||||||
None |
|
||||||
} |
|
||||||
TermRef::BlankNode(node) => { |
|
||||||
property_open.push_attribute(("rdf:nodeID", node.as_str())); |
|
||||||
None |
|
||||||
} |
|
||||||
TermRef::Literal(literal) => { |
|
||||||
if let Some(language) = literal.language() { |
|
||||||
property_open.push_attribute(("xml:lang", language)); |
|
||||||
} else if !literal.is_plain() { |
|
||||||
property_open.push_attribute(("rdf:datatype", literal.datatype().as_str())); |
|
||||||
} |
|
||||||
Some(literal.value()) |
|
||||||
} |
|
||||||
_ => { |
|
||||||
return Err(io::Error::new( |
|
||||||
io::ErrorKind::InvalidInput, |
|
||||||
"RDF/XML only supports named, blank or literal object", |
|
||||||
)) |
|
||||||
} |
|
||||||
}; |
|
||||||
if let Some(content) = content { |
|
||||||
output.push(Event::Start(property_open)); |
|
||||||
output.push(Event::Text(BytesText::new(content))); |
|
||||||
output.push(Event::End(BytesEnd::new(prop_qname))); |
|
||||||
} else { |
|
||||||
output.push(Event::Empty(property_open)); |
|
||||||
} |
|
||||||
Ok(()) |
|
||||||
} |
|
||||||
|
|
||||||
fn write_start(&self, output: &mut Vec<Event<'_>>) { |
|
||||||
output.push(Event::Decl(BytesDecl::new("1.0", Some("UTF-8"), None))); |
|
||||||
let mut rdf_open = BytesStart::new("rdf:RDF"); |
|
||||||
for (prefix_value, prefix_name) in &self.prefixes { |
|
||||||
rdf_open.push_attribute(( |
|
||||||
format!("xmlns:{prefix_name}").as_str(), |
|
||||||
prefix_value.as_str(), |
|
||||||
)); |
|
||||||
} |
|
||||||
output.push(Event::Start(rdf_open)) |
|
||||||
} |
|
||||||
|
|
||||||
fn finish(&mut self, output: &mut Vec<Event<'static>>) { |
|
||||||
if self.current_subject.is_some() { |
|
||||||
output.push(Event::End( |
|
||||||
self.current_resource_tag |
|
||||||
.take() |
|
||||||
.map_or_else(|| BytesEnd::new("rdf:Description"), BytesEnd::new), |
|
||||||
)); |
|
||||||
} else { |
|
||||||
self.write_start(output); |
|
||||||
} |
|
||||||
output.push(Event::End(BytesEnd::new("rdf:RDF"))); |
|
||||||
} |
|
||||||
|
|
||||||
fn uri_to_qname_and_xmlns<'a>( |
|
||||||
&self, |
|
||||||
uri: NamedNodeRef<'a>, |
|
||||||
) -> (Cow<'a, str>, Option<(&'a str, &'a str)>) { |
|
||||||
let (prop_prefix, prop_value) = split_iri(uri.as_str()); |
|
||||||
if let Some(prop_prefix) = self.prefixes.get(prop_prefix) { |
|
||||||
( |
|
||||||
if prop_prefix.is_empty() { |
|
||||||
Cow::Borrowed(prop_value) |
|
||||||
} else { |
|
||||||
Cow::Owned(format!("{prop_prefix}:{prop_value}")) |
|
||||||
}, |
|
||||||
None, |
|
||||||
) |
|
||||||
} else if prop_prefix == "http://www.w3.org/2000/xmlns/" { |
|
||||||
(Cow::Owned(format!("xmlns:{prop_value}")), None) |
|
||||||
} else if prop_value.is_empty() { |
|
||||||
(Cow::Borrowed("p:"), Some(("xmlns:p", prop_prefix))) |
|
||||||
} else { |
|
||||||
(Cow::Borrowed(prop_value), Some(("xmlns", prop_prefix))) |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
fn map_err(error: quick_xml::Error) -> io::Error { |
|
||||||
if let quick_xml::Error::Io(error) = error { |
|
||||||
Arc::try_unwrap(error).unwrap_or_else(|error| io::Error::new(error.kind(), error)) |
|
||||||
} else { |
|
||||||
io::Error::new(io::ErrorKind::Other, error) |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
fn split_iri(iri: &str) -> (&str, &str) { |
|
||||||
if let Some(position_base) = iri.rfind(|c| !is_name_char(c) || c == ':') { |
|
||||||
if let Some(position_add) = iri[position_base..].find(|c| is_name_start_char(c) && c != ':') |
|
||||||
{ |
|
||||||
( |
|
||||||
&iri[..position_base + position_add], |
|
||||||
&iri[position_base + position_add..], |
|
||||||
) |
|
||||||
} else { |
|
||||||
(iri, "") |
|
||||||
} |
|
||||||
} else { |
|
||||||
(iri, "") |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
#[cfg(test)] |
|
||||||
mod tests { |
|
||||||
use super::*; |
|
||||||
|
|
||||||
#[test] |
|
||||||
fn test_split_iri() { |
|
||||||
assert_eq!( |
|
||||||
split_iri("http://schema.org/Person"), |
|
||||||
("http://schema.org/", "Person") |
|
||||||
); |
|
||||||
assert_eq!(split_iri("http://schema.org/"), ("http://schema.org/", "")); |
|
||||||
assert_eq!( |
|
||||||
split_iri("http://schema.org#foo"), |
|
||||||
("http://schema.org#", "foo") |
|
||||||
); |
|
||||||
assert_eq!(split_iri("urn:isbn:foo"), ("urn:isbn:", "foo")); |
|
||||||
} |
|
||||||
} |
|
@ -1,26 +0,0 @@ |
|||||||
pub fn is_name_start_char(c: char) -> bool { |
|
||||||
// ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
|
|
||||||
matches!(c, |
|
||||||
':' |
|
||||||
| 'A'..='Z' |
|
||||||
| '_' |
|
||||||
| 'a'..='z' |
|
||||||
| '\u{00C0}'..='\u{00D6}' |
|
||||||
| '\u{00D8}'..='\u{00F6}' |
|
||||||
| '\u{00F8}'..='\u{02FF}' |
|
||||||
| '\u{0370}'..='\u{037D}' |
|
||||||
| '\u{037F}'..='\u{1FFF}' |
|
||||||
| '\u{200C}'..='\u{200D}' |
|
||||||
| '\u{2070}'..='\u{218F}' |
|
||||||
| '\u{2C00}'..='\u{2FEF}' |
|
||||||
| '\u{3001}'..='\u{D7FF}' |
|
||||||
| '\u{F900}'..='\u{FDCF}' |
|
||||||
| '\u{FDF0}'..='\u{FFFD}' |
|
||||||
| '\u{10000}'..='\u{EFFFF}') |
|
||||||
} |
|
||||||
|
|
||||||
pub fn is_name_char(c: char) -> bool { |
|
||||||
// NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
|
|
||||||
is_name_start_char(c) |
|
||||||
|| matches!(c, '-' | '.' | '0'..='9' | '\u{B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}') |
|
||||||
} |
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,626 @@ |
|||||||
|
use super::date_time::{DateTimeError, GDay, GMonth, GMonthDay, GYear, GYearMonth, TimezoneOffset}; |
||||||
|
use super::decimal::ParseDecimalError; |
||||||
|
use super::duration::{DayTimeDuration, YearMonthDuration}; |
||||||
|
use super::*; |
||||||
|
use std::error::Error; |
||||||
|
use std::fmt; |
||||||
|
use std::num::ParseIntError; |
||||||
|
use std::str::FromStr; |
||||||
|
|
||||||
|
/// A parsing error
|
||||||
|
#[derive(Debug, Clone)] |
||||||
|
pub struct XsdParseError { |
||||||
|
kind: XsdParseErrorKind, |
||||||
|
} |
||||||
|
|
||||||
|
#[derive(Debug, Clone)] |
||||||
|
enum XsdParseErrorKind { |
||||||
|
ParseInt(ParseIntError), |
||||||
|
ParseDecimal(ParseDecimalError), |
||||||
|
DateTime(DateTimeError), |
||||||
|
Message(&'static str), |
||||||
|
} |
||||||
|
|
||||||
|
const OVERFLOW_ERROR: XsdParseError = XsdParseError { |
||||||
|
kind: XsdParseErrorKind::Message("Overflow error"), |
||||||
|
}; |
||||||
|
|
||||||
|
impl fmt::Display for XsdParseError { |
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
||||||
|
match &self.kind { |
||||||
|
XsdParseErrorKind::ParseInt(error) => { |
||||||
|
write!(f, "Error while parsing integer: {error}") |
||||||
|
} |
||||||
|
XsdParseErrorKind::ParseDecimal(error) => { |
||||||
|
write!(f, "Error while parsing decimal: {error}") |
||||||
|
} |
||||||
|
XsdParseErrorKind::DateTime(error) => error.fmt(f), |
||||||
|
XsdParseErrorKind::Message(msg) => write!(f, "{msg}"), |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
impl XsdParseError { |
||||||
|
const fn msg(message: &'static str) -> Self { |
||||||
|
Self { |
||||||
|
kind: XsdParseErrorKind::Message(message), |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
impl Error for XsdParseError { |
||||||
|
fn source(&self) -> Option<&(dyn Error + 'static)> { |
||||||
|
match &self.kind { |
||||||
|
XsdParseErrorKind::ParseInt(error) => Some(error), |
||||||
|
XsdParseErrorKind::ParseDecimal(error) => Some(error), |
||||||
|
XsdParseErrorKind::DateTime(error) => Some(error), |
||||||
|
XsdParseErrorKind::Message(_) => None, |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
impl From<ParseIntError> for XsdParseError { |
||||||
|
fn from(error: ParseIntError) -> Self { |
||||||
|
Self { |
||||||
|
kind: XsdParseErrorKind::ParseInt(error), |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
impl From<ParseDecimalError> for XsdParseError { |
||||||
|
fn from(error: ParseDecimalError) -> Self { |
||||||
|
Self { |
||||||
|
kind: XsdParseErrorKind::ParseDecimal(error), |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
impl From<DateTimeError> for XsdParseError { |
||||||
|
fn from(error: DateTimeError) -> Self { |
||||||
|
Self { |
||||||
|
kind: XsdParseErrorKind::DateTime(error), |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// [6] duYearFrag ::= unsignedNoDecimalPtNumeral 'Y'
|
||||||
|
// [7] duMonthFrag ::= unsignedNoDecimalPtNumeral 'M'
|
||||||
|
// [8] duDayFrag ::= unsignedNoDecimalPtNumeral 'D'
|
||||||
|
// [9] duHourFrag ::= unsignedNoDecimalPtNumeral 'H'
|
||||||
|
// [10] duMinuteFrag ::= unsignedNoDecimalPtNumeral 'M'
|
||||||
|
// [11] duSecondFrag ::= (unsignedNoDecimalPtNumeral | unsignedDecimalPtNumeral) 'S'
|
||||||
|
// [12] duYearMonthFrag ::= (duYearFrag duMonthFrag?) | duMonthFrag
|
||||||
|
// [13] duTimeFrag ::= 'T' ((duHourFrag duMinuteFrag? duSecondFrag?) | (duMinuteFrag duSecondFrag?) | duSecondFrag)
|
||||||
|
// [14] duDayTimeFrag ::= (duDayFrag duTimeFrag?) | duTimeFrag
|
||||||
|
// [15] durationLexicalRep ::= '-'? 'P' ((duYearMonthFrag duDayTimeFrag?) | duDayTimeFrag)
|
||||||
|
struct DurationParts { |
||||||
|
year_month: Option<i64>, |
||||||
|
day_time: Option<Decimal>, |
||||||
|
} |
||||||
|
|
||||||
|
fn duration_parts(input: &str) -> Result<(DurationParts, &str), XsdParseError> { |
||||||
|
// States
|
||||||
|
const START: u32 = 0; |
||||||
|
const AFTER_YEAR: u32 = 1; |
||||||
|
const AFTER_MONTH: u32 = 2; |
||||||
|
const AFTER_DAY: u32 = 3; |
||||||
|
const AFTER_T: u32 = 4; |
||||||
|
const AFTER_HOUR: u32 = 5; |
||||||
|
const AFTER_MINUTE: u32 = 6; |
||||||
|
const AFTER_SECOND: u32 = 7; |
||||||
|
|
||||||
|
let (is_negative, input) = if let Some(left) = input.strip_prefix('-') { |
||||||
|
(true, left) |
||||||
|
} else { |
||||||
|
(false, input) |
||||||
|
}; |
||||||
|
let mut input = expect_char(input, 'P', "Durations must start with 'P'")?; |
||||||
|
let mut state = START; |
||||||
|
let mut year_month: Option<i64> = None; |
||||||
|
let mut day_time: Option<Decimal> = None; |
||||||
|
while !input.is_empty() { |
||||||
|
if let Some(left) = input.strip_prefix('T') { |
||||||
|
if state >= AFTER_T { |
||||||
|
return Err(XsdParseError::msg("Duplicated time separator 'T'")); |
||||||
|
} |
||||||
|
state = AFTER_T; |
||||||
|
input = left; |
||||||
|
} else { |
||||||
|
let (number_str, left) = decimal_prefix(input); |
||||||
|
match left.chars().next() { |
||||||
|
Some('Y') if state < AFTER_YEAR => { |
||||||
|
year_month = Some( |
||||||
|
year_month |
||||||
|
.unwrap_or_default() |
||||||
|
.checked_add( |
||||||
|
apply_i64_neg(i64::from_str(number_str)?, is_negative)? |
||||||
|
.checked_mul(12) |
||||||
|
.ok_or(OVERFLOW_ERROR)?, |
||||||
|
) |
||||||
|
.ok_or(OVERFLOW_ERROR)?, |
||||||
|
); |
||||||
|
state = AFTER_YEAR; |
||||||
|
} |
||||||
|
Some('M') if state < AFTER_MONTH => { |
||||||
|
year_month = Some( |
||||||
|
year_month |
||||||
|
.unwrap_or_default() |
||||||
|
.checked_add(apply_i64_neg(i64::from_str(number_str)?, is_negative)?) |
||||||
|
.ok_or(OVERFLOW_ERROR)?, |
||||||
|
); |
||||||
|
state = AFTER_MONTH; |
||||||
|
} |
||||||
|
Some('D') if state < AFTER_DAY => { |
||||||
|
if number_str.contains('.') { |
||||||
|
return Err(XsdParseError::msg( |
||||||
|
"Decimal numbers are not allowed for days", |
||||||
|
)); |
||||||
|
} |
||||||
|
day_time = Some( |
||||||
|
day_time |
||||||
|
.unwrap_or_default() |
||||||
|
.checked_add( |
||||||
|
apply_decimal_neg(Decimal::from_str(number_str)?, is_negative)? |
||||||
|
.checked_mul(86400) |
||||||
|
.ok_or(OVERFLOW_ERROR)?, |
||||||
|
) |
||||||
|
.ok_or(OVERFLOW_ERROR)?, |
||||||
|
); |
||||||
|
state = AFTER_DAY; |
||||||
|
} |
||||||
|
Some('H') if state == AFTER_T => { |
||||||
|
if number_str.contains('.') { |
||||||
|
return Err(XsdParseError::msg( |
||||||
|
"Decimal numbers are not allowed for hours", |
||||||
|
)); |
||||||
|
} |
||||||
|
day_time = Some( |
||||||
|
day_time |
||||||
|
.unwrap_or_default() |
||||||
|
.checked_add( |
||||||
|
apply_decimal_neg(Decimal::from_str(number_str)?, is_negative)? |
||||||
|
.checked_mul(3600) |
||||||
|
.ok_or(OVERFLOW_ERROR)?, |
||||||
|
) |
||||||
|
.ok_or(OVERFLOW_ERROR)?, |
||||||
|
); |
||||||
|
state = AFTER_HOUR; |
||||||
|
} |
||||||
|
Some('M') if (AFTER_T..AFTER_MINUTE).contains(&state) => { |
||||||
|
if number_str.contains('.') { |
||||||
|
return Err(XsdParseError::msg( |
||||||
|
"Decimal numbers are not allowed for minutes", |
||||||
|
)); |
||||||
|
} |
||||||
|
day_time = Some( |
||||||
|
day_time |
||||||
|
.unwrap_or_default() |
||||||
|
.checked_add( |
||||||
|
apply_decimal_neg(Decimal::from_str(number_str)?, is_negative)? |
||||||
|
.checked_mul(60) |
||||||
|
.ok_or(OVERFLOW_ERROR)?, |
||||||
|
) |
||||||
|
.ok_or(OVERFLOW_ERROR)?, |
||||||
|
); |
||||||
|
state = AFTER_MINUTE; |
||||||
|
} |
||||||
|
Some('S') if (AFTER_T..AFTER_SECOND).contains(&state) => { |
||||||
|
day_time = Some( |
||||||
|
day_time |
||||||
|
.unwrap_or_default() |
||||||
|
.checked_add(apply_decimal_neg( |
||||||
|
Decimal::from_str(number_str)?, |
||||||
|
is_negative, |
||||||
|
)?) |
||||||
|
.ok_or(OVERFLOW_ERROR)?, |
||||||
|
); |
||||||
|
state = AFTER_SECOND; |
||||||
|
} |
||||||
|
Some(_) => return Err(XsdParseError::msg("Unexpected type character")), |
||||||
|
None => { |
||||||
|
return Err(XsdParseError::msg( |
||||||
|
"Numbers in durations must be followed by a type character", |
||||||
|
)) |
||||||
|
} |
||||||
|
} |
||||||
|
input = &left[1..]; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
Ok(( |
||||||
|
DurationParts { |
||||||
|
year_month, |
||||||
|
day_time, |
||||||
|
}, |
||||||
|
input, |
||||||
|
)) |
||||||
|
} |
||||||
|
|
||||||
|
fn apply_i64_neg(value: i64, is_negative: bool) -> Result<i64, XsdParseError> { |
||||||
|
if is_negative { |
||||||
|
value.checked_neg().ok_or(OVERFLOW_ERROR) |
||||||
|
} else { |
||||||
|
Ok(value) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
fn apply_decimal_neg(value: Decimal, is_negative: bool) -> Result<Decimal, XsdParseError> { |
||||||
|
if is_negative { |
||||||
|
value.checked_neg().ok_or(OVERFLOW_ERROR) |
||||||
|
} else { |
||||||
|
Ok(value) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
pub fn parse_duration(input: &str) -> Result<Duration, XsdParseError> { |
||||||
|
let parts = ensure_complete(input, duration_parts)?; |
||||||
|
if parts.year_month.is_none() && parts.day_time.is_none() { |
||||||
|
return Err(XsdParseError::msg("Empty duration")); |
||||||
|
} |
||||||
|
Ok(Duration::new( |
||||||
|
parts.year_month.unwrap_or(0), |
||||||
|
parts.day_time.unwrap_or_default(), |
||||||
|
)) |
||||||
|
} |
||||||
|
|
||||||
|
pub fn parse_year_month_duration(input: &str) -> Result<YearMonthDuration, XsdParseError> { |
||||||
|
let parts = ensure_complete(input, duration_parts)?; |
||||||
|
if parts.day_time.is_some() { |
||||||
|
return Err(XsdParseError::msg( |
||||||
|
"There must not be any day or time component in a yearMonthDuration", |
||||||
|
)); |
||||||
|
} |
||||||
|
Ok(YearMonthDuration::new(parts.year_month.ok_or( |
||||||
|
XsdParseError::msg("No year and month values found"), |
||||||
|
)?)) |
||||||
|
} |
||||||
|
|
||||||
|
pub fn parse_day_time_duration(input: &str) -> Result<DayTimeDuration, XsdParseError> { |
||||||
|
let parts = ensure_complete(input, duration_parts)?; |
||||||
|
if parts.year_month.is_some() { |
||||||
|
return Err(XsdParseError::msg( |
||||||
|
"There must not be any year or month component in a dayTimeDuration", |
||||||
|
)); |
||||||
|
} |
||||||
|
Ok(DayTimeDuration::new(parts.day_time.ok_or( |
||||||
|
XsdParseError::msg("No day or time values found"), |
||||||
|
)?)) |
||||||
|
} |
||||||
|
|
||||||
|
// [16] dateTimeLexicalRep ::= yearFrag '-' monthFrag '-' dayFrag 'T' ((hourFrag ':' minuteFrag ':' secondFrag) | endOfDayFrag) timezoneFrag?
|
||||||
|
fn date_time_lexical_rep(input: &str) -> Result<(DateTime, &str), XsdParseError> { |
||||||
|
let (year, input) = year_frag(input)?; |
||||||
|
let input = expect_char(input, '-', "The year and month must be separated by '-'")?; |
||||||
|
let (month, input) = month_frag(input)?; |
||||||
|
let input = expect_char(input, '-', "The month and day must be separated by '-'")?; |
||||||
|
let (day, input) = day_frag(input)?; |
||||||
|
let input = expect_char(input, 'T', "The date and time must be separated by 'T'")?; |
||||||
|
let (hour, input) = hour_frag(input)?; |
||||||
|
let input = expect_char(input, ':', "The hours and minutes must be separated by ':'")?; |
||||||
|
let (minute, input) = minute_frag(input)?; |
||||||
|
let input = expect_char( |
||||||
|
input, |
||||||
|
':', |
||||||
|
"The minutes and seconds must be separated by ':'", |
||||||
|
)?; |
||||||
|
let (second, input) = second_frag(input)?; |
||||||
|
// We validate 24:00:00
|
||||||
|
if hour == 24 && minute != 0 && second != Decimal::from(0) { |
||||||
|
return Err(XsdParseError::msg( |
||||||
|
"Times are not allowed to be after 24:00:00", |
||||||
|
)); |
||||||
|
} |
||||||
|
let (timezone_offset, input) = optional_end(input, timezone_frag)?; |
||||||
|
Ok(( |
||||||
|
DateTime::new(year, month, day, hour, minute, second, timezone_offset)?, |
||||||
|
input, |
||||||
|
)) |
||||||
|
} |
||||||
|
|
||||||
|
pub fn parse_date_time(input: &str) -> Result<DateTime, XsdParseError> { |
||||||
|
ensure_complete(input, date_time_lexical_rep) |
||||||
|
} |
||||||
|
|
||||||
|
// [17] timeLexicalRep ::= ((hourFrag ':' minuteFrag ':' secondFrag) | endOfDayFrag) timezoneFrag?
|
||||||
|
fn time_lexical_rep(input: &str) -> Result<(Time, &str), XsdParseError> { |
||||||
|
let (hour, input) = hour_frag(input)?; |
||||||
|
let input = expect_char(input, ':', "The hours and minutes must be separated by ':'")?; |
||||||
|
let (minute, input) = minute_frag(input)?; |
||||||
|
let input = expect_char( |
||||||
|
input, |
||||||
|
':', |
||||||
|
"The minutes and seconds must be separated by ':'", |
||||||
|
)?; |
||||||
|
let (second, input) = second_frag(input)?; |
||||||
|
// We validate 24:00:00
|
||||||
|
if hour == 24 && minute != 0 && second != Decimal::from(0) { |
||||||
|
return Err(XsdParseError::msg( |
||||||
|
"Times are not allowed to be after 24:00:00", |
||||||
|
)); |
||||||
|
} |
||||||
|
let (timezone_offset, input) = optional_end(input, timezone_frag)?; |
||||||
|
Ok((Time::new(hour, minute, second, timezone_offset)?, input)) |
||||||
|
} |
||||||
|
|
||||||
|
pub fn parse_time(input: &str) -> Result<Time, XsdParseError> { |
||||||
|
ensure_complete(input, time_lexical_rep) |
||||||
|
} |
||||||
|
|
||||||
|
// [18] dateLexicalRep ::= yearFrag '-' monthFrag '-' dayFrag timezoneFrag? Constraint: Day-of-month Representations
|
||||||
|
fn date_lexical_rep(input: &str) -> Result<(Date, &str), XsdParseError> { |
||||||
|
let (year, input) = year_frag(input)?; |
||||||
|
let input = expect_char(input, '-', "The year and month must be separated by '-'")?; |
||||||
|
let (month, input) = month_frag(input)?; |
||||||
|
let input = expect_char(input, '-', "The month and day must be separated by '-'")?; |
||||||
|
let (day, input) = day_frag(input)?; |
||||||
|
let (timezone_offset, input) = optional_end(input, timezone_frag)?; |
||||||
|
Ok((Date::new(year, month, day, timezone_offset)?, input)) |
||||||
|
} |
||||||
|
|
||||||
|
pub fn parse_date(input: &str) -> Result<Date, XsdParseError> { |
||||||
|
ensure_complete(input, date_lexical_rep) |
||||||
|
} |
||||||
|
|
||||||
|
// [19] gYearMonthLexicalRep ::= yearFrag '-' monthFrag timezoneFrag?
|
||||||
|
fn g_year_month_lexical_rep(input: &str) -> Result<(GYearMonth, &str), XsdParseError> { |
||||||
|
let (year, input) = year_frag(input)?; |
||||||
|
let input = expect_char(input, '-', "The year and month must be separated by '-'")?; |
||||||
|
let (month, input) = month_frag(input)?; |
||||||
|
let (timezone_offset, input) = optional_end(input, timezone_frag)?; |
||||||
|
Ok((GYearMonth::new(year, month, timezone_offset)?, input)) |
||||||
|
} |
||||||
|
|
||||||
|
pub fn parse_g_year_month(input: &str) -> Result<GYearMonth, XsdParseError> { |
||||||
|
ensure_complete(input, g_year_month_lexical_rep) |
||||||
|
} |
||||||
|
|
||||||
|
// [20] gYearLexicalRep ::= yearFrag timezoneFrag?
|
||||||
|
fn g_year_lexical_rep(input: &str) -> Result<(GYear, &str), XsdParseError> { |
||||||
|
let (year, input) = year_frag(input)?; |
||||||
|
let (timezone_offset, input) = optional_end(input, timezone_frag)?; |
||||||
|
Ok((GYear::new(year, timezone_offset)?, input)) |
||||||
|
} |
||||||
|
|
||||||
|
pub fn parse_g_year(input: &str) -> Result<GYear, XsdParseError> { |
||||||
|
ensure_complete(input, g_year_lexical_rep) |
||||||
|
} |
||||||
|
|
||||||
|
// [21] gMonthDayLexicalRep ::= '--' monthFrag '-' dayFrag timezoneFrag? Constraint: Day-of-month Representations
|
||||||
|
fn g_month_day_lexical_rep(input: &str) -> Result<(GMonthDay, &str), XsdParseError> { |
||||||
|
let input = expect_char(input, '-', "gMonthDay values must start with '--'")?; |
||||||
|
let input = expect_char(input, '-', "gMonthDay values must start with '--'")?; |
||||||
|
let (month, input) = month_frag(input)?; |
||||||
|
let input = expect_char(input, '-', "The month and day must be separated by '-'")?; |
||||||
|
let (day, input) = day_frag(input)?; |
||||||
|
let (timezone_offset, input) = optional_end(input, timezone_frag)?; |
||||||
|
Ok((GMonthDay::new(month, day, timezone_offset)?, input)) |
||||||
|
} |
||||||
|
|
||||||
|
pub fn parse_g_month_day(input: &str) -> Result<GMonthDay, XsdParseError> { |
||||||
|
ensure_complete(input, g_month_day_lexical_rep) |
||||||
|
} |
||||||
|
|
||||||
|
// [22] gDayLexicalRep ::= '---' dayFrag timezoneFrag?
|
||||||
|
fn g_day_lexical_rep(input: &str) -> Result<(GDay, &str), XsdParseError> { |
||||||
|
let input = expect_char(input, '-', "gDay values must start with '---'")?; |
||||||
|
let input = expect_char(input, '-', "gDay values must start with '---'")?; |
||||||
|
let input = expect_char(input, '-', "gDay values must start with '---'")?; |
||||||
|
let (day, input) = day_frag(input)?; |
||||||
|
let (timezone_offset, input) = optional_end(input, timezone_frag)?; |
||||||
|
Ok((GDay::new(day, timezone_offset)?, input)) |
||||||
|
} |
||||||
|
|
||||||
|
pub fn parse_g_day(input: &str) -> Result<GDay, XsdParseError> { |
||||||
|
ensure_complete(input, g_day_lexical_rep) |
||||||
|
} |
||||||
|
|
||||||
|
// [23] gMonthLexicalRep ::= '--' monthFrag timezoneFrag?
|
||||||
|
fn g_month_lexical_rep(input: &str) -> Result<(GMonth, &str), XsdParseError> { |
||||||
|
let input = expect_char(input, '-', "gMonth values must start with '--'")?; |
||||||
|
let input = expect_char(input, '-', "gMonth values must start with '--'")?; |
||||||
|
let (month, input) = month_frag(input)?; |
||||||
|
let (timezone_offset, input) = optional_end(input, timezone_frag)?; |
||||||
|
Ok((GMonth::new(month, timezone_offset)?, input)) |
||||||
|
} |
||||||
|
|
||||||
|
pub fn parse_g_month(input: &str) -> Result<GMonth, XsdParseError> { |
||||||
|
ensure_complete(input, g_month_lexical_rep) |
||||||
|
} |
||||||
|
|
||||||
|
// [56] yearFrag ::= '-'? (([1-9] digit digit digit+)) | ('0' digit digit digit))
|
||||||
|
fn year_frag(input: &str) -> Result<(i64, &str), XsdParseError> { |
||||||
|
let (sign, input) = if let Some(left) = input.strip_prefix('-') { |
||||||
|
(-1, left) |
||||||
|
} else { |
||||||
|
(1, input) |
||||||
|
}; |
||||||
|
let (number_str, input) = integer_prefix(input); |
||||||
|
if number_str.len() < 4 { |
||||||
|
return Err(XsdParseError::msg("The year should be encoded on 4 digits")); |
||||||
|
} |
||||||
|
if number_str.len() > 4 && number_str.starts_with('0') { |
||||||
|
return Err(XsdParseError::msg( |
||||||
|
"The years value must not start with 0 if it can be encoded in at least 4 digits", |
||||||
|
)); |
||||||
|
} |
||||||
|
let number = i64::from_str(number_str)?; |
||||||
|
Ok((sign * number, input)) |
||||||
|
} |
||||||
|
|
||||||
|
// [57] monthFrag ::= ('0' [1-9]) | ('1' [0-2])
|
||||||
|
fn month_frag(input: &str) -> Result<(u8, &str), XsdParseError> { |
||||||
|
let (number_str, input) = integer_prefix(input); |
||||||
|
if number_str.len() != 2 { |
||||||
|
return Err(XsdParseError::msg("Month must be encoded with two digits")); |
||||||
|
} |
||||||
|
let number = u8::from_str(number_str)?; |
||||||
|
if !(1..=12).contains(&number) { |
||||||
|
return Err(XsdParseError::msg("Month must be between 01 and 12")); |
||||||
|
} |
||||||
|
Ok((number, input)) |
||||||
|
} |
||||||
|
|
||||||
|
// [58] dayFrag ::= ('0' [1-9]) | ([12] digit) | ('3' [01])
|
||||||
|
fn day_frag(input: &str) -> Result<(u8, &str), XsdParseError> { |
||||||
|
let (number_str, input) = integer_prefix(input); |
||||||
|
if number_str.len() != 2 { |
||||||
|
return Err(XsdParseError::msg("Day must be encoded with two digits")); |
||||||
|
} |
||||||
|
let number = u8::from_str(number_str)?; |
||||||
|
if !(1..=31).contains(&number) { |
||||||
|
return Err(XsdParseError::msg("Day must be between 01 and 31")); |
||||||
|
} |
||||||
|
Ok((number, input)) |
||||||
|
} |
||||||
|
|
||||||
|
// [59] hourFrag ::= ([01] digit) | ('2' [0-3])
|
||||||
|
// We also allow 24 for ease of parsing
|
||||||
|
fn hour_frag(input: &str) -> Result<(u8, &str), XsdParseError> { |
||||||
|
let (number_str, input) = integer_prefix(input); |
||||||
|
if number_str.len() != 2 { |
||||||
|
return Err(XsdParseError::msg("Hours must be encoded with two digits")); |
||||||
|
} |
||||||
|
let number = u8::from_str(number_str)?; |
||||||
|
if !(0..=24).contains(&number) { |
||||||
|
return Err(XsdParseError::msg("Hours must be between 00 and 24")); |
||||||
|
} |
||||||
|
Ok((number, input)) |
||||||
|
} |
||||||
|
|
||||||
|
// [60] minuteFrag ::= [0-5] digit
|
||||||
|
fn minute_frag(input: &str) -> Result<(u8, &str), XsdParseError> { |
||||||
|
let (number_str, input) = integer_prefix(input); |
||||||
|
if number_str.len() != 2 { |
||||||
|
return Err(XsdParseError::msg( |
||||||
|
"Minutes must be encoded with two digits", |
||||||
|
)); |
||||||
|
} |
||||||
|
let number = u8::from_str(number_str)?; |
||||||
|
if !(0..=59).contains(&number) { |
||||||
|
return Err(XsdParseError::msg("Minutes must be between 00 and 59")); |
||||||
|
} |
||||||
|
Ok((number, input)) |
||||||
|
} |
||||||
|
|
||||||
|
// [61] secondFrag ::= ([0-5] digit) ('.' digit+)?
|
||||||
|
fn second_frag(input: &str) -> Result<(Decimal, &str), XsdParseError> { |
||||||
|
let (number_str, input) = decimal_prefix(input); |
||||||
|
let (before_dot_str, _) = number_str.split_once('.').unwrap_or((number_str, "")); |
||||||
|
if before_dot_str.len() != 2 { |
||||||
|
return Err(XsdParseError::msg( |
||||||
|
"Seconds must be encoded with two digits", |
||||||
|
)); |
||||||
|
} |
||||||
|
let number = Decimal::from_str(number_str)?; |
||||||
|
if number < Decimal::from(0) || number >= Decimal::from(60) { |
||||||
|
return Err(XsdParseError::msg("Seconds must be between 00 and 60")); |
||||||
|
} |
||||||
|
if number_str.ends_with('.') { |
||||||
|
return Err(XsdParseError::msg( |
||||||
|
"Seconds are not allowed to end with a dot", |
||||||
|
)); |
||||||
|
} |
||||||
|
Ok((number, input)) |
||||||
|
} |
||||||
|
|
||||||
|
// [63] timezoneFrag ::= 'Z' | ('+' | '-') (('0' digit | '1' [0-3]) ':' minuteFrag | '14:00')
|
||||||
|
fn timezone_frag(input: &str) -> Result<(TimezoneOffset, &str), XsdParseError> { |
||||||
|
if let Some(left) = input.strip_prefix('Z') { |
||||||
|
return Ok((TimezoneOffset::UTC, left)); |
||||||
|
} |
||||||
|
let (sign, input) = if let Some(left) = input.strip_prefix('-') { |
||||||
|
(-1, left) |
||||||
|
} else if let Some(left) = input.strip_prefix('+') { |
||||||
|
(1, left) |
||||||
|
} else { |
||||||
|
(1, input) |
||||||
|
}; |
||||||
|
|
||||||
|
let (hour_str, input) = integer_prefix(input); |
||||||
|
if hour_str.len() != 2 { |
||||||
|
return Err(XsdParseError::msg( |
||||||
|
"The timezone hours must be encoded with two digits", |
||||||
|
)); |
||||||
|
} |
||||||
|
let hours = i16::from_str(hour_str)?; |
||||||
|
|
||||||
|
let input = expect_char( |
||||||
|
input, |
||||||
|
':', |
||||||
|
"The timezone hours and minutes must be separated by ':'", |
||||||
|
)?; |
||||||
|
let (minutes, input) = minute_frag(input)?; |
||||||
|
|
||||||
|
if hours > 13 && !(hours == 14 && minutes == 0) { |
||||||
|
return Err(XsdParseError::msg( |
||||||
|
"The timezone hours must be between 00 and 13", |
||||||
|
)); |
||||||
|
} |
||||||
|
|
||||||
|
Ok(( |
||||||
|
TimezoneOffset::new(sign * (hours * 60 + i16::from(minutes)))?, |
||||||
|
input, |
||||||
|
)) |
||||||
|
} |
||||||
|
|
||||||
|
fn ensure_complete<T>( |
||||||
|
input: &str, |
||||||
|
parse: impl FnOnce(&str) -> Result<(T, &str), XsdParseError>, |
||||||
|
) -> Result<T, XsdParseError> { |
||||||
|
let (result, left) = parse(input)?; |
||||||
|
if !left.is_empty() { |
||||||
|
return Err(XsdParseError::msg("Unrecognized value suffix")); |
||||||
|
} |
||||||
|
Ok(result) |
||||||
|
} |
||||||
|
|
||||||
|
fn expect_char<'a>( |
||||||
|
input: &'a str, |
||||||
|
constant: char, |
||||||
|
error_message: &'static str, |
||||||
|
) -> Result<&'a str, XsdParseError> { |
||||||
|
if let Some(left) = input.strip_prefix(constant) { |
||||||
|
Ok(left) |
||||||
|
} else { |
||||||
|
Err(XsdParseError::msg(error_message)) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
fn integer_prefix(input: &str) -> (&str, &str) { |
||||||
|
let mut end = input.len(); |
||||||
|
for (i, c) in input.char_indices() { |
||||||
|
if !c.is_ascii_digit() { |
||||||
|
end = i; |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
input.split_at(end) |
||||||
|
} |
||||||
|
|
||||||
|
fn decimal_prefix(input: &str) -> (&str, &str) { |
||||||
|
let mut end = input.len(); |
||||||
|
let mut dot_seen = false; |
||||||
|
for (i, c) in input.char_indices() { |
||||||
|
if c.is_ascii_digit() { |
||||||
|
// Ok
|
||||||
|
} else if c == '.' && !dot_seen { |
||||||
|
dot_seen = true; |
||||||
|
} else { |
||||||
|
end = i; |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
input.split_at(end) |
||||||
|
} |
||||||
|
|
||||||
|
fn optional_end<T>( |
||||||
|
input: &str, |
||||||
|
parse: impl FnOnce(&str) -> Result<(T, &str), XsdParseError>, |
||||||
|
) -> Result<(Option<T>, &str), XsdParseError> { |
||||||
|
Ok(if input.is_empty() { |
||||||
|
(None, input) |
||||||
|
} else { |
||||||
|
let (result, input) = parse(input)?; |
||||||
|
(Some(result), input) |
||||||
|
}) |
||||||
|
} |
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue