Compare commits
4 Commits
main
...
nextgraph2
Author | SHA1 | Date |
---|---|---|
Niko PLP | 86a8100121 | 9 months ago |
Niko PLP | e963387b02 | 9 months ago |
Niko PLP | 7b8901718e | 9 months ago |
Niko PLP | b3ec66e21b | 9 months ago |
@ -0,0 +1,137 @@ |
||||
[build] |
||||
rustflags = [ |
||||
"-Wtrivial-casts", |
||||
"-Wtrivial-numeric-casts", |
||||
"-Wunsafe-code", |
||||
"-Wunused-lifetimes", |
||||
"-Wunused-qualifications", |
||||
# TODO: 1.63+ "-Wclippy::as-underscore", |
||||
# TODO: 1.65+ ""-Wclippy::bool-to-int-with-if", |
||||
"-Wclippy::borrow-as-ptr", |
||||
"-Wclippy::case-sensitive-file-extension-comparisons", |
||||
"-Wclippy::cast-lossless", |
||||
"-Wclippy::cast-possible-truncation", |
||||
"-Wclippy::cast-possible-wrap", |
||||
"-Wclippy::cast-precision-loss", |
||||
"-Wclippy::cast-ptr-alignment", |
||||
"-Wclippy::cast-sign-loss", |
||||
"-Wclippy::checked-conversions", |
||||
"-Wclippy::clone-on-ref-ptr", |
||||
"-Wclippy::cloned-instead-of-copied", |
||||
"-Wclippy::copy-iterator", |
||||
"-Wclippy::dbg-macro", |
||||
"-Wclippy::decimal-literal-representation", |
||||
"-Wclippy::default-trait-access", |
||||
"-Wclippy::default-union-representation", |
||||
# TODO: 1.61+ "-Wclippy::deref-by-slicing", |
||||
# TODO: 1.63+ "-Wclippy::doc-link-with-quotes", |
||||
# TODO: 1.62+ "-Wclippy::empty-drop", |
||||
"-Wclippy::empty-enum", |
||||
# TODO: on major version "-Wclippy::empty-structs-with-brackets", |
||||
"-Wclippy::enum-glob-use", |
||||
"-Wclippy::exit", |
||||
"-Wclippy::expect-used", |
||||
"-Wclippy::expl-impl-clone-on-copy", |
||||
"-Wclippy::explicit-deref-methods", |
||||
"-Wclippy::explicit-into-iter-loop", |
||||
"-Wclippy::explicit-iter-loop", |
||||
"-Wclippy::filter-map-next", |
||||
"-Wclippy::flat-map-option", |
||||
"-Wclippy::fn-to-numeric-cast-any", |
||||
# TODO: 1.62+ "-Wclippy::format-push-string", |
||||
"-Wclippy::from-iter-instead-of-collect", |
||||
"-Wclippy::get-unwrap", |
||||
"-Wclippy::if-not-else", |
||||
"-Wclippy::if-then-some-else-none", |
||||
"-Wclippy::implicit-clone", |
||||
"-Wclippy::inconsistent-struct-constructor", |
||||
"-Wclippy::index-refutable-slice", |
||||
"-Wclippy::inefficient-to-string", |
||||
"-Wclippy::inline-always", |
||||
"-Wclippy::inline-asm-x86-att-syntax", |
||||
"-Wclippy::inline-asm-x86-intel-syntax", |
||||
"-Wclippy::invalid-upcast-comparisons", |
||||
"-Wclippy::items-after-statements", |
||||
"-Wclippy::large-digit-groups", |
||||
# TODO: 1.68+ "-Wclippy::large-futures", |
||||
"-Wclippy::large-stack-arrays", |
||||
"-Wclippy::large-types-passed-by-value", |
||||
"-Wclippy::let-underscore-must-use", |
||||
"-Wclippy::let-unit-value", |
||||
"-Wclippy::linkedlist", |
||||
"-Wclippy::lossy-float-literal", |
||||
"-Wclippy::macro-use-imports", |
||||
"-Wclippy::manual-assert", |
||||
# TODO: 1.65+ "-Wclippy::manual-instant-elapsed", |
||||
# TODO: 1.67+ "-Wclippy::manual-let-else", |
||||
"-Wclippy::manual-ok-or", |
||||
# TODO: 1.65+ "-Wclippy::manual-string-new", |
||||
"-Wclippy::many-single-char-names", |
||||
"-Wclippy::map-unwrap-or", |
||||
"-Wclippy::match-bool", |
||||
"-Wclippy::match-same-arms", |
||||
"-Wclippy::match-wildcard-for-single-variants", |
||||
"-Wclippy::maybe-infinite-iter", |
||||
"-Wclippy::mem-forget", |
||||
# TODO: 1.63+ "-Wclippy::mismatching-type-param-order", |
||||
"-Wclippy::multiple-inherent-impl", |
||||
"-Wclippy::mut-mut", |
||||
"-Wclippy::mutex-atomic", |
||||
"-Wclippy::naive-bytecount", |
||||
"-Wclippy::needless-bitwise-bool", |
||||
"-Wclippy::needless-continue", |
||||
"-Wclippy::needless-pass-by-value", |
||||
"-Wclippy::no-effect-underscore-binding", |
||||
# TODO: 1.69+ "-Wclippy::no-mangle-with-rust-abi", |
||||
"-Wclippy::non-ascii-literal", |
||||
"-Wclippy::print-stderr", |
||||
"-Wclippy::print-stdout", |
||||
"-Wclippy::ptr-as-ptr", |
||||
"-Wclippy::range-minus-one", |
||||
"-Wclippy::range-plus-one", |
||||
"-Wclippy::rc-buffer", |
||||
"-Wclippy::rc-mutex", |
||||
"-Wclippy::redundant-closure-for-method-calls", |
||||
"-Wclippy::redundant-else", |
||||
"-Wclippy::redundant-feature-names", |
||||
"-Wclippy::ref-binding-to-reference", |
||||
"-Wclippy::ref-option-ref", |
||||
"-Wclippy::rest-pat-in-fully-bound-structs", |
||||
"-Wclippy::return-self-not-must-use", |
||||
"-Wclippy::same-functions-in-if-condition", |
||||
# TODO: strange failure on 1.60 "-Wclippy::same-name-method", |
||||
# TODO: 1.68+ "-Wclippy::semicolon-outside-block", |
||||
"-Wclippy::single-match-else", |
||||
"-Wclippy::stable-sort-primitive", |
||||
"-Wclippy::str-to-string", |
||||
"-Wclippy::string-add", |
||||
"-Wclippy::string-add-assign", |
||||
"-Wclippy::string-lit-as-bytes", |
||||
"-Wclippy::string-to-string", |
||||
# TODO: 1.67+ "-Wclippy::suspicious-xor-used-as-pow", |
||||
"-Wclippy::todo", |
||||
"-Wclippy::transmute-ptr-to-ptr", |
||||
"-Wclippy::trivially-copy-pass-by-ref", |
||||
"-Wclippy::try-err", |
||||
"-Wclippy::unicode-not-nfc", |
||||
"-Wclippy::unimplemented", |
||||
# TODO: 1.66+ "-Wclippy::uninlined-format-args", |
||||
# TODO: 1.70+ "-Wclippy::unnecessary-box-returns", |
||||
# TODO: 1.61+ "-Wclippy::unnecessary-join", |
||||
# TODO: 1.67+ "-Wclippy::unnecessary-safety-comment", |
||||
# TODO: 1.67+ "-Wclippy::unnecessary-safety-doc", |
||||
"-Wclippy::unnecessary-self-imports", |
||||
"-Wclippy::unnecessary-wraps", |
||||
"-Wclippy::unneeded-field-pattern", |
||||
"-Wclippy::unnested-or-patterns", |
||||
"-Wclippy::unreadable-literal", |
||||
"-Wclippy::unseparated-literal-suffix", |
||||
"-Wclippy::unused-async", |
||||
"-Wclippy::unused-self", |
||||
"-Wclippy::use-debug", |
||||
"-Wclippy::used-underscore-binding", |
||||
"-Wclippy::verbose-bit-mask", |
||||
"-Wclippy::verbose-file-reads", |
||||
"-Wclippy::wildcard-dependencies", |
||||
"-Wclippy::zero-sized-map-values", |
||||
] |
@ -1,4 +1,5 @@ |
||||
FROM gcr.io/oss-fuzz-base/base-builder-rust:v1 |
||||
RUN apt-get update && apt-get install -y llvm-dev libclang-dev clang && apt-get clean && rm --recursive --force /var/lib/apt/lists/* |
||||
COPY . $SRC/oxigraph |
||||
WORKDIR oxigraph |
||||
COPY .clusterfuzzlite/build.sh $SRC/ |
||||
|
@ -0,0 +1,21 @@ |
||||
# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.241.1/containers/rust/.devcontainer/base.Dockerfile |
||||
|
||||
# [Choice] Debian OS version (use bullseye on local arm64/Apple Silicon): buster, bullseye |
||||
ARG VARIANT="bullseye" |
||||
FROM mcr.microsoft.com/vscode/devcontainers/rust:0-${VARIANT} |
||||
|
||||
# [Optional] Uncomment this section to install additional packages. |
||||
RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ |
||||
&& apt-get -y install --no-install-recommends \ |
||||
python3 \ |
||||
python3-venv \ |
||||
python-is-python3 \ |
||||
libclang-dev |
||||
|
||||
ENV VIRTUAL_ENV=/opt/venv |
||||
RUN python -m venv $VIRTUAL_ENV |
||||
ENV PATH="$VIRTUAL_ENV/bin:$PATH" |
||||
RUN pip install --no-cache-dir -r python/requirements.dev.txt |
||||
|
||||
# Change owner to the devcontainer user |
||||
RUN chown -R 1000:1000 $VIRTUAL_ENV |
@ -0,0 +1,69 @@ |
||||
// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at: |
||||
// https://github.com/microsoft/vscode-dev-containers/tree/v0.241.1/containers/rust |
||||
{ |
||||
"name": "Rust", |
||||
"build": { |
||||
"dockerfile": "Dockerfile", |
||||
"args": { |
||||
// Use the VARIANT arg to pick a Debian OS version: buster, bullseye |
||||
// Use bullseye when on local on arm64/Apple Silicon. |
||||
"VARIANT": "bullseye" |
||||
} |
||||
}, |
||||
"runArgs": ["--cap-add=SYS_PTRACE", "--security-opt", "seccomp=unconfined"], |
||||
|
||||
// Configure tool-specific properties. |
||||
"customizations": { |
||||
// Configure properties specific to VS Code. |
||||
"vscode": { |
||||
// Set *default* container specific settings.json values on container create. |
||||
"settings": { |
||||
"lldb.executable": "/usr/bin/lldb", |
||||
// VS Code don't watch files under ./target |
||||
"files.watcherExclude": { |
||||
"**/target/**": true |
||||
}, |
||||
"rust-analyzer.checkOnSave.command": "clippy", |
||||
|
||||
"python.defaultInterpreterPath": "/opt/venv/bin/python", |
||||
"python.linting.enabled": true, |
||||
"python.linting.pylintEnabled": true, |
||||
"python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8", |
||||
"python.formatting.blackPath": "/usr/local/py-utils/bin/black", |
||||
"python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf", |
||||
"python.linting.banditPath": "/usr/local/py-utils/bin/bandit", |
||||
"python.linting.flake8Path": "/usr/local/py-utils/bin/flake8", |
||||
"python.linting.mypyPath": "/usr/local/py-utils/bin/mypy", |
||||
"python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle", |
||||
"python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle", |
||||
"python.linting.pylintPath": "/opt/venv/bin/pylint", |
||||
"python.testing.pytestPath": "/opt/venv/bin/pytest" |
||||
}, |
||||
|
||||
// Add the IDs of extensions you want installed when the container is created. |
||||
"extensions": [ |
||||
"vadimcn.vscode-lldb", |
||||
"mutantdino.resourcemonitor", |
||||
"rust-lang.rust-analyzer", |
||||
"tamasfe.even-better-toml", |
||||
"serayuzgur.crates", |
||||
"ms-python.python", |
||||
"ms-python.vscode-pylance", |
||||
"esbenp.prettier-vscode", |
||||
"stardog-union.stardog-rdf-grammars" |
||||
] |
||||
} |
||||
}, |
||||
|
||||
// Use 'forwardPorts' to make a list of ports inside the container available locally. |
||||
// "forwardPorts": [], |
||||
|
||||
// Use 'postCreateCommand' to run commands after the container is created. |
||||
"postCreateCommand": "git submodule update --init && cargo build", |
||||
|
||||
// Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. |
||||
"remoteUser": "vscode", |
||||
"features": { |
||||
"python": "3.10" |
||||
} |
||||
} |
@ -1,27 +0,0 @@ |
||||
name: 'Setup Rust' |
||||
description: 'Setup Rust using Rustup' |
||||
inputs: |
||||
version: |
||||
description: 'Rust version to use. By default latest stable version' |
||||
required: false |
||||
default: 'stable' |
||||
component: |
||||
description: 'Rust extra component to install like clippy' |
||||
required: false |
||||
target: |
||||
description: 'Rust extra target to install like wasm32-unknown-unknown' |
||||
required: false |
||||
runs: |
||||
using: "composite" |
||||
steps: |
||||
- run: rustup update |
||||
shell: bash |
||||
- run: rustup default ${{ inputs.version }} |
||||
shell: bash |
||||
- run: rustup component add ${{ inputs.component }} |
||||
shell: bash |
||||
if: ${{ inputs.component }} |
||||
- run: rustup target add ${{ inputs.target }} |
||||
shell: bash |
||||
if: ${{ inputs.target }} |
||||
- uses: Swatinem/rust-cache@v2 |
@ -1,11 +0,0 @@ |
||||
if [ -f "rocksdb" ] |
||||
then |
||||
cd rocksdb || exit |
||||
else |
||||
git clone https://github.com/facebook/rocksdb.git |
||||
cd rocksdb || exit |
||||
git checkout v8.0.0 |
||||
make shared_lib |
||||
fi |
||||
sudo make install-shared |
||||
sudo ldconfig /usr/local/lib |
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,4 @@ |
||||
avoid-breaking-exported-api = false |
||||
avoid-breaking-exported-api = true |
||||
cognitive-complexity-threshold = 50 |
||||
too-many-arguments-threshold = 10 |
||||
type-complexity-threshold = 500 |
Before Width: | Height: | Size: 4.6 KiB |
@ -1,35 +0,0 @@ |
||||
+------------------+ +----------------+ +-----------------+ |
||||
+ oxigraph CLI {r} + + pyoxigraph {p} + + oxigraph JS {j} + |
||||
+------------------+ +----------------+ +-----------------+ |
||||
|
||||
+---------------------------------------------------------------------------+ |
||||
+ oxigraph (Rust) {r} + |
||||
+---------------------------------------------------------------------------+ |
||||
|
||||
+----------------------------+ +-------------+ |
||||
+ oxrdfio {r} + + sparopt {r} + |
||||
+----------------------------+ +-------------+ |
||||
|
||||
+-----------+ +--------------+ +-----------------+ +----------------+ |
||||
+ oxttl {r} + + oxrdfxml {r} + + spargebra {r} + + sparesults {r} + |
||||
+-----------+ +--------------+ +-----------------+ +----------------+ |
||||
|
||||
+-----------------------------------------------------------------------+ |
||||
+ oxrdf {r} + |
||||
+-----------------------------------------------------------------------+ |
||||
|
||||
+------------------+ |
||||
+ oxsdatatypes {r} + |
||||
+------------------+ |
||||
|
||||
|
||||
# Legend: |
||||
r = { |
||||
fill: papayawhip; |
||||
} |
||||
p = { |
||||
fill: lightyellow; |
||||
} |
||||
j = { |
||||
fill: lightgreen; |
||||
} |
@ -1,28 +0,0 @@ |
||||
#![no_main] |
||||
|
||||
use libfuzzer_sys::fuzz_target; |
||||
use oxttl::N3Parser; |
||||
|
||||
fuzz_target!(|data: &[u8]| { |
||||
let mut quads = Vec::new(); |
||||
let mut parser = N3Parser::new() |
||||
.with_base_iri("http://example.com/") |
||||
.unwrap() |
||||
.parse(); |
||||
for chunk in data.split(|c| *c == 0xFF) { |
||||
parser.extend_from_slice(chunk); |
||||
while let Some(result) = parser.read_next() { |
||||
if let Ok(quad) = result { |
||||
quads.push(quad); |
||||
} |
||||
} |
||||
} |
||||
parser.end(); |
||||
while let Some(result) = parser.read_next() { |
||||
if let Ok(quad) = result { |
||||
quads.push(quad); |
||||
} |
||||
} |
||||
assert!(parser.is_end()); |
||||
//TODO: serialize
|
||||
}); |
@ -1,84 +0,0 @@ |
||||
#![no_main] |
||||
|
||||
use libfuzzer_sys::fuzz_target; |
||||
use oxrdf::Quad; |
||||
use oxttl::{NQuadsParser, NQuadsSerializer}; |
||||
|
||||
fn parse<'a>( |
||||
chunks: impl IntoIterator<Item = &'a [u8]>, |
||||
unchecked: bool, |
||||
) -> (Vec<Quad>, Vec<String>) { |
||||
let mut quads = Vec::new(); |
||||
let mut errors = Vec::new(); |
||||
let mut parser = NQuadsParser::new().with_quoted_triples(); |
||||
if unchecked { |
||||
parser = parser.unchecked(); |
||||
} |
||||
let mut reader = parser.parse(); |
||||
for chunk in chunks { |
||||
reader.extend_from_slice(chunk); |
||||
while let Some(result) = reader.read_next() { |
||||
match result { |
||||
Ok(quad) => quads.push(quad), |
||||
Err(error) => errors.push(error.to_string()), |
||||
} |
||||
} |
||||
} |
||||
reader.end(); |
||||
while let Some(result) = reader.read_next() { |
||||
match result { |
||||
Ok(quad) => quads.push(quad), |
||||
Err(error) => errors.push(error.to_string()), |
||||
} |
||||
} |
||||
assert!(reader.is_end()); |
||||
(quads, errors) |
||||
} |
||||
|
||||
fuzz_target!(|data: &[u8]| { |
||||
// We parse with splitting
|
||||
let (quads, errors) = parse(data.split(|c| *c == 0xFF), false); |
||||
// We parse without splitting
|
||||
let (quads_without_split, errors_without_split) = parse( |
||||
[data |
||||
.iter() |
||||
.copied() |
||||
.filter(|c| *c != 0xFF) |
||||
.collect::<Vec<_>>() |
||||
.as_slice()], |
||||
false, |
||||
); |
||||
assert_eq!(quads, quads_without_split); |
||||
assert_eq!(errors, errors_without_split); |
||||
|
||||
// We test also unchecked if valid
|
||||
if errors.is_empty() { |
||||
let (quads_unchecked, errors_unchecked) = parse(data.split(|c| *c == 0xFF), true); |
||||
assert!(errors_unchecked.is_empty()); |
||||
assert_eq!(quads, quads_unchecked); |
||||
} |
||||
|
||||
// We serialize
|
||||
let mut writer = NQuadsSerializer::new().serialize_to_write(Vec::new()); |
||||
for quad in &quads { |
||||
writer.write_quad(quad).unwrap(); |
||||
} |
||||
let new_serialization = writer.finish(); |
||||
|
||||
// We parse the serialization
|
||||
let new_quads = NQuadsParser::new() |
||||
.with_quoted_triples() |
||||
.parse_read(new_serialization.as_slice()) |
||||
.collect::<Result<Vec<_>, _>>() |
||||
.map_err(|e| { |
||||
format!( |
||||
"Error on {:?} from {quads:?} based on {:?}: {e}", |
||||
String::from_utf8_lossy(&new_serialization), |
||||
String::from_utf8_lossy(data) |
||||
) |
||||
}) |
||||
.unwrap(); |
||||
|
||||
// We check the roundtrip has not changed anything
|
||||
assert_eq!(new_quads, quads); |
||||
}); |
@ -1,35 +0,0 @@ |
||||
#![no_main] |
||||
|
||||
use libfuzzer_sys::fuzz_target; |
||||
use oxrdfxml::{RdfXmlParser, RdfXmlSerializer}; |
||||
|
||||
fuzz_target!(|data: &[u8]| { |
||||
// We parse
|
||||
let triples = RdfXmlParser::new() |
||||
.parse_read(data) |
||||
.flatten() |
||||
.collect::<Vec<_>>(); |
||||
|
||||
// We serialize
|
||||
let mut writer = RdfXmlSerializer::new().serialize_to_write(Vec::new()); |
||||
for triple in &triples { |
||||
writer.write_triple(triple).unwrap(); |
||||
} |
||||
let new_serialization = writer.finish().unwrap(); |
||||
|
||||
// We parse the serialization
|
||||
let new_triples = RdfXmlParser::new() |
||||
.parse_read(new_serialization.as_slice()) |
||||
.collect::<Result<Vec<_>, _>>() |
||||
.map_err(|e| { |
||||
format!( |
||||
"Error on {:?} from {triples:?} based on {:?}: {e}", |
||||
String::from_utf8_lossy(&new_serialization), |
||||
String::from_utf8_lossy(data) |
||||
) |
||||
}) |
||||
.unwrap(); |
||||
|
||||
// We check the roundtrip has not changed anything
|
||||
assert_eq!(new_triples, triples); |
||||
}); |
@ -1,166 +0,0 @@ |
||||
#![no_main] |
||||
|
||||
use libfuzzer_sys::fuzz_target; |
||||
use oxrdf::graph::CanonicalizationAlgorithm; |
||||
use oxrdf::{Dataset, GraphName, Quad, Subject, Term, Triple}; |
||||
use oxttl::{TriGParser, TriGSerializer}; |
||||
|
||||
fn parse<'a>( |
||||
chunks: impl IntoIterator<Item = &'a [u8]>, |
||||
unchecked: bool, |
||||
) -> (Vec<Quad>, Vec<String>, Vec<(String, String)>) { |
||||
let mut quads = Vec::new(); |
||||
let mut errors = Vec::new(); |
||||
let mut parser = TriGParser::new() |
||||
.with_quoted_triples() |
||||
.with_base_iri("http://example.com/") |
||||
.unwrap(); |
||||
if unchecked { |
||||
parser = parser.unchecked(); |
||||
} |
||||
let mut reader = parser.parse(); |
||||
for chunk in chunks { |
||||
reader.extend_from_slice(chunk); |
||||
while let Some(result) = reader.read_next() { |
||||
match result { |
||||
Ok(quad) => quads.push(quad), |
||||
Err(error) => errors.push(error.to_string()), |
||||
} |
||||
} |
||||
} |
||||
reader.end(); |
||||
while let Some(result) = reader.read_next() { |
||||
match result { |
||||
Ok(quad) => quads.push(quad), |
||||
Err(error) => errors.push(error.to_string()), |
||||
} |
||||
} |
||||
assert!(reader.is_end()); |
||||
( |
||||
quads, |
||||
errors, |
||||
reader |
||||
.prefixes() |
||||
.map(|(k, v)| (k.to_owned(), v.to_owned())) |
||||
.collect(), |
||||
) |
||||
} |
||||
|
||||
fn count_triple_blank_nodes(triple: &Triple) -> usize { |
||||
(match &triple.subject { |
||||
Subject::BlankNode(_) => 1, |
||||
Subject::Triple(t) => count_triple_blank_nodes(t), |
||||
_ => 0, |
||||
}) + (match &triple.object { |
||||
Term::BlankNode(_) => 1, |
||||
Term::Triple(t) => count_triple_blank_nodes(t), |
||||
_ => 0, |
||||
}) |
||||
} |
||||
|
||||
fn count_quad_blank_nodes(quad: &Quad) -> usize { |
||||
(match &quad.subject { |
||||
Subject::BlankNode(_) => 1, |
||||
Subject::Triple(t) => count_triple_blank_nodes(t), |
||||
_ => 0, |
||||
}) + (match &quad.object { |
||||
Term::BlankNode(_) => 1, |
||||
Term::Triple(t) => count_triple_blank_nodes(t), |
||||
_ => 0, |
||||
}) + usize::from(matches!(quad.graph_name, GraphName::BlankNode(_))) |
||||
} |
||||
|
||||
fn serialize_quads(quads: &[Quad], prefixes: Vec<(String, String)>) -> Vec<u8> { |
||||
let mut serializer = TriGSerializer::new(); |
||||
for (prefix_name, prefix_iri) in prefixes { |
||||
serializer = serializer.with_prefix(prefix_name, prefix_iri).unwrap(); |
||||
} |
||||
let mut writer = serializer.serialize_to_write(Vec::new()); |
||||
for quad in quads { |
||||
writer.write_quad(quad).unwrap(); |
||||
} |
||||
writer.finish().unwrap() |
||||
} |
||||
|
||||
fuzz_target!(|data: &[u8]| { |
||||
// We parse with splitting
|
||||
let (quads, errors, prefixes) = parse(data.split(|c| *c == 0xFF), false); |
||||
// We parse without splitting
|
||||
let (quads_without_split, errors_without_split, _) = parse( |
||||
[data |
||||
.iter() |
||||
.copied() |
||||
.filter(|c| *c != 0xFF) |
||||
.collect::<Vec<_>>() |
||||
.as_slice()], |
||||
false, |
||||
); |
||||
let (quads_unchecked, errors_unchecked, _) = parse(data.split(|c| *c == 0xFF), true); |
||||
if errors.is_empty() { |
||||
assert!(errors_unchecked.is_empty()); |
||||
} |
||||
|
||||
let bnodes_count = quads.iter().map(count_quad_blank_nodes).sum::<usize>(); |
||||
if bnodes_count == 0 { |
||||
assert_eq!( |
||||
quads, |
||||
quads_without_split, |
||||
"With split:\n{}\nWithout split:\n{}", |
||||
String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())), |
||||
String::from_utf8_lossy(&serialize_quads(&quads_without_split, Vec::new())) |
||||
); |
||||
if errors.is_empty() { |
||||
assert_eq!( |
||||
quads, |
||||
quads_unchecked, |
||||
"Validating:\n{}\nUnchecked:\n{}", |
||||
String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())), |
||||
String::from_utf8_lossy(&serialize_quads(&quads_unchecked, Vec::new())) |
||||
); |
||||
} |
||||
} else if bnodes_count <= 4 { |
||||
let mut dataset_with_split = quads.iter().collect::<Dataset>(); |
||||
let mut dataset_without_split = quads_without_split.iter().collect::<Dataset>(); |
||||
dataset_with_split.canonicalize(CanonicalizationAlgorithm::Unstable); |
||||
dataset_without_split.canonicalize(CanonicalizationAlgorithm::Unstable); |
||||
assert_eq!( |
||||
dataset_with_split, |
||||
dataset_without_split, |
||||
"With split:\n{}\nWithout split:\n{}", |
||||
String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())), |
||||
String::from_utf8_lossy(&serialize_quads(&quads_without_split, Vec::new())) |
||||
); |
||||
if errors.is_empty() { |
||||
let mut dataset_unchecked = quads_unchecked.iter().collect::<Dataset>(); |
||||
dataset_unchecked.canonicalize(CanonicalizationAlgorithm::Unstable); |
||||
assert_eq!( |
||||
dataset_with_split, |
||||
dataset_unchecked, |
||||
"Validating:\n{}\nUnchecked:\n{}", |
||||
String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())), |
||||
String::from_utf8_lossy(&serialize_quads(&quads_unchecked, Vec::new())) |
||||
); |
||||
} |
||||
} |
||||
assert_eq!(errors, errors_without_split); |
||||
|
||||
// We serialize
|
||||
let new_serialization = serialize_quads(&quads, prefixes); |
||||
|
||||
// We parse the serialization
|
||||
let new_quads = TriGParser::new() |
||||
.with_quoted_triples() |
||||
.parse_read(new_serialization.as_slice()) |
||||
.collect::<Result<Vec<_>, _>>() |
||||
.map_err(|e| { |
||||
format!( |
||||
"Error on {:?} from {quads:?} based on {:?}: {e}", |
||||
String::from_utf8_lossy(&new_serialization), |
||||
String::from_utf8_lossy(data) |
||||
) |
||||
}) |
||||
.unwrap(); |
||||
|
||||
// We check the roundtrip has not changed anything
|
||||
assert_eq!(new_quads, quads); |
||||
}); |
@ -1,26 +1,20 @@ |
||||
[package] |
||||
name = "oxigraph-js" |
||||
version.workspace = true |
||||
authors.workspace = true |
||||
license.workspace = true |
||||
name = "oxigraph_js" |
||||
version = "0.3.22" |
||||
authors = ["Tpt <thomas@pellissier-tanon.fr>"] |
||||
license = "MIT OR Apache-2.0" |
||||
readme = "README.md" |
||||
keywords = ["RDF", "N-Triples", "Turtle", "XML", "SPARQL"] |
||||
keywords = ["RDF", "N-Triples", "Turtle", "RDF/XML", "SPARQL"] |
||||
repository = "https://github.com/oxigraph/oxigraph/tree/main/js" |
||||
description = "JavaScript bindings of Oxigraph" |
||||
edition.workspace = true |
||||
rust-version.workspace = true |
||||
publish = false |
||||
edition = "2021" |
||||
|
||||
[lib] |
||||
crate-type = ["cdylib"] |
||||
name = "oxigraph" |
||||
doc = false |
||||
|
||||
[dependencies] |
||||
console_error_panic_hook.workspace = true |
||||
js-sys.workspace = true |
||||
oxigraph = { workspace = true, features = ["js"] } |
||||
wasm-bindgen.workspace = true |
||||
|
||||
[lints] |
||||
workspace = true |
||||
oxigraph = { version = "0.3.22", path="../lib" } |
||||
wasm-bindgen = "0.2" |
||||
js-sys = "0.3" |
||||
console_error_panic_hook = "0.1" |
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,8 +1,7 @@ |
||||
{ |
||||
"$schema": "https://biomejs.dev/schemas/1.0.0/schema.json", |
||||
"formatter": { |
||||
"indentStyle": "space", |
||||
"indentWidth": 4, |
||||
"indentSize": 4, |
||||
"lineWidth": 100 |
||||
}, |
||||
"linter": { |
@ -0,0 +1,63 @@ |
||||
[package] |
||||
name = "oxigraph" |
||||
version = "0.3.22" |
||||
authors = ["Tpt <thomas@pellissier-tanon.fr>"] |
||||
license = "MIT OR Apache-2.0" |
||||
readme = "README.md" |
||||
keywords = ["RDF", "SPARQL", "graph-database", "database"] |
||||
categories = ["database-implementations"] |
||||
repository = "https://github.com/oxigraph/oxigraph/tree/main/lib" |
||||
homepage = "https://oxigraph.org/" |
||||
documentation = "https://docs.rs/oxigraph" |
||||
description = """ |
||||
a SPARQL database and RDF toolkit |
||||
""" |
||||
edition = "2021" |
||||
rust-version = "1.60" |
||||
|
||||
[package.metadata.docs.rs] |
||||
all-features = true |
||||
|
||||
[features] |
||||
default = [] |
||||
http_client = ["oxhttp", "oxhttp/rustls"] |
||||
rocksdb_debug = [] |
||||
|
||||
[dependencies] |
||||
rand = "0.8" |
||||
md-5 = "0.10" |
||||
sha-1 = "0.10" |
||||
sha2 = "0.10" |
||||
digest = "0.10" |
||||
regex = "1" |
||||
oxilangtag = "0.1" |
||||
oxiri = "0.2" |
||||
rio_api = "0.8" |
||||
rio_turtle = "0.8" |
||||
rio_xml = "0.8" |
||||
hex = "0.4" |
||||
siphasher = ">=0.3,<2.0" |
||||
lazy_static = "1" |
||||
json-event-parser = "0.1" |
||||
oxrdf = { version = "0.1.7", path="oxrdf", features = ["rdf-star", "oxsdatatypes"] } |
||||
oxsdatatypes = { version = "0.1.3", path="oxsdatatypes" } |
||||
spargebra = { version = "0.2.8", path="spargebra", features = ["rdf-star", "sep-0002", "sep-0006"] } |
||||
sparesults = { version = "0.1.8", path="sparesults", features = ["rdf-star"] } |
||||
|
||||
[target.'cfg(not(target_family = "wasm"))'.dependencies] |
||||
libc = "0.2" |
||||
rocksdb = {git = "https://git.nextgraph.org/NextGraph/rust-rocksdb.git", branch = "master", features = [ ] } |
||||
oxhttp = { version = "0.1", optional = true } |
||||
|
||||
[target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies] |
||||
getrandom = { version = "0.2", features = ["js"] } |
||||
js-sys = "0.3" |
||||
|
||||
[target.'cfg(not(target_family = "wasm"))'.dev-dependencies] |
||||
criterion = "0.4" |
||||
oxhttp = "0.1" |
||||
zstd = "0.12" |
||||
|
||||
[[bench]] |
||||
name = "store" |
||||
harness = false |
@ -1,13 +1,72 @@ |
||||
Oxigraph Rust crates |
||||
==================== |
||||
|
||||
Oxigraph is implemented in Rust. |
||||
It is composed on a main library, [`oxigraph`](./oxigraph) and a set of smaller crates used by the `oxigraph` crate: |
||||
* [`oxrdf`](./oxrdf), datastructures encoding RDF basic concepts (the `model` module of the `oxigraph` crate). |
||||
* [`oxrdfio`](./oxrdfio), a unified parser and serializer API for RDF formats (the `io` module of the `oxigraph` crate). It itself relies on: |
||||
* [`oxttl`](./oxttl), N-Triple, N-Quad, Turtle, TriG and N3 parsing and serialization. |
||||
* [`oxrdfxml`](./oxrdfxml), RDF/XML parsing and serialization. |
||||
* [`spargebra`](./spargebra), a SPARQL parser. |
||||
* [`sparesults`](./sparesults), parsers and serializers for SPARQL result formats (the `sparql::results` module of the `oxigraph` crate). |
||||
* [`sparopt`](./sparesults), a SPARQL optimizer. |
||||
* [`oxsdatatypes`](./oxsdatatypes), an implementation of some XML Schema datatypes. |
||||
Oxigraph |
||||
======== |
||||
|
||||
[![Latest Version](https://img.shields.io/crates/v/oxigraph.svg)](https://crates.io/crates/oxigraph) |
||||
[![Released API docs](https://docs.rs/oxigraph/badge.svg)](https://docs.rs/oxigraph) |
||||
[![Crates.io downloads](https://img.shields.io/crates/d/oxigraph)](https://crates.io/crates/oxigraph) |
||||
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) |
||||
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) |
||||
|
||||
Oxigraph is a graph database library implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard. |
||||
|
||||
Its goal is to provide a compliant, safe and fast on-disk graph database. |
||||
It also provides a set of utility functions for reading, writing, and processing RDF files. |
||||
|
||||
Oxigraph is in heavy development and SPARQL query evaluation has not been optimized yet. |
||||
|
||||
Oxigraph also provides [a standalone HTTP server](https://crates.io/crates/oxigraph_server) and [a Python library](https://pyoxigraph.readthedocs.io/) based on this library. |
||||
|
||||
|
||||
Oxigraph implements the following specifications: |
||||
* [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/), [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/), and [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/). |
||||
* [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/), and [RDF XML](https://www.w3.org/TR/rdf-syntax-grammar/) RDF serialization formats for both data ingestion and retrieval using the [Rio library](https://github.com/oxigraph/rio). |
||||
* [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/). |
||||
|
||||
A preliminary benchmark [is provided](../bench/README.md). Oxigraph internal design [is described on the wiki](https://github.com/oxigraph/oxigraph/wiki/Architecture). |
||||
|
||||
The main entry point of Oxigraph is the [`Store`](store::Store) struct: |
||||
```rust |
||||
use oxigraph::store::Store; |
||||
use oxigraph::model::*; |
||||
use oxigraph::sparql::QueryResults; |
||||
|
||||
let store = Store::new().unwrap(); |
||||
|
||||
// insertion |
||||
let ex = NamedNode::new("http://example.com").unwrap(); |
||||
let quad = Quad::new(ex.clone(), ex.clone(), ex.clone(), GraphName::DefaultGraph); |
||||
store.insert(&quad).unwrap(); |
||||
|
||||
// quad filter |
||||
let results = store.quads_for_pattern(Some(ex.as_ref().into()), None, None, None).collect::<Result<Vec<Quad>,_>>().unwrap(); |
||||
assert_eq!(vec![quad], results); |
||||
|
||||
// SPARQL query |
||||
if let QueryResults::Solutions(mut solutions) = store.query("SELECT ?s WHERE { ?s ?p ?o }").unwrap() { |
||||
assert_eq!(solutions.next().unwrap().unwrap().get("s"), Some(&ex.into())); |
||||
} |
||||
``` |
||||
|
||||
Some parts of this library are available as standalone crates: |
||||
* [`oxrdf`](https://crates.io/crates/oxrdf) provides datastructures encoding RDF basic concepts (the `oxigraph::model` module). |
||||
* [`spargebra`](https://crates.io/crates/spargebra) provides a SPARQL parser. |
||||
* [`sparesults`](https://crates.io/crates/sparesults) provides parsers and serializers for SPARQL result formats. |
||||
|
||||
To build the library, don't forget to clone the submodules using `git clone --recursive https://github.com/oxigraph/oxigraph.git` to clone the repository including submodules or `git submodule update --init` to add submodules to the already cloned repository. |
||||
|
||||
|
||||
## License |
||||
|
||||
This project is licensed under either of |
||||
|
||||
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or |
||||
`<http://www.apache.org/licenses/LICENSE-2.0>`) |
||||
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or |
||||
`<http://opensource.org/licenses/MIT>`) |
||||
|
||||
at your option. |
||||
|
||||
|
||||
### Contribution |
||||
|
||||
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. |
||||
|
@ -0,0 +1,265 @@ |
||||
use criterion::{criterion_group, criterion_main, Criterion, Throughput}; |
||||
use oxhttp::model::{Method, Request, Status}; |
||||
use oxigraph::io::GraphFormat; |
||||
use oxigraph::model::GraphNameRef; |
||||
use oxigraph::sparql::{Query, QueryResults, Update}; |
||||
use oxigraph::store::Store; |
||||
use rand::random; |
||||
use std::env::temp_dir; |
||||
use std::fs::{remove_dir_all, File}; |
||||
use std::io::{BufRead, BufReader, Cursor, Read}; |
||||
use std::path::{Path, PathBuf}; |
||||
|
||||
fn store_load(c: &mut Criterion) { |
||||
{ |
||||
let mut data = Vec::new(); |
||||
read_data("explore-1000.nt.zst") |
||||
.read_to_end(&mut data) |
||||
.unwrap(); |
||||
|
||||
let mut group = c.benchmark_group("store load"); |
||||
group.throughput(Throughput::Bytes(data.len() as u64)); |
||||
group.sample_size(10); |
||||
group.bench_function("load BSBM explore 1000 in memory", |b| { |
||||
b.iter(|| { |
||||
let store = Store::new().unwrap(); |
||||
do_load(&store, &data); |
||||
}) |
||||
}); |
||||
group.bench_function("load BSBM explore 1000 in on disk", |b| { |
||||
b.iter(|| { |
||||
let path = TempDir::default(); |
||||
let store = Store::open(&path).unwrap(); |
||||
do_load(&store, &data); |
||||
}) |
||||
}); |
||||
group.bench_function("load BSBM explore 1000 in on disk with bulk load", |b| { |
||||
b.iter(|| { |
||||
let path = TempDir::default(); |
||||
let store = Store::open(&path).unwrap(); |
||||
do_bulk_load(&store, &data); |
||||
}) |
||||
}); |
||||
} |
||||
|
||||
{ |
||||
let mut data = Vec::new(); |
||||
read_data("explore-10000.nt.zst") |
||||
.read_to_end(&mut data) |
||||
.unwrap(); |
||||
|
||||
let mut group = c.benchmark_group("store load large"); |
||||
group.throughput(Throughput::Bytes(data.len() as u64)); |
||||
group.sample_size(10); |
||||
group.bench_function("load BSBM explore 10000 in on disk with bulk load", |b| { |
||||
b.iter(|| { |
||||
let path = TempDir::default(); |
||||
let store = Store::open(&path).unwrap(); |
||||
do_bulk_load(&store, &data); |
||||
}) |
||||
}); |
||||
} |
||||
} |
||||
|
||||
fn do_load(store: &Store, data: &[u8]) { |
||||
store |
||||
.load_graph( |
||||
Cursor::new(&data), |
||||
GraphFormat::NTriples, |
||||
GraphNameRef::DefaultGraph, |
||||
None, |
||||
) |
||||
.unwrap(); |
||||
store.optimize().unwrap(); |
||||
} |
||||
|
||||
fn do_bulk_load(store: &Store, data: &[u8]) { |
||||
store |
||||
.bulk_loader() |
||||
.load_graph( |
||||
Cursor::new(&data), |
||||
GraphFormat::NTriples, |
||||
GraphNameRef::DefaultGraph, |
||||
None, |
||||
) |
||||
.unwrap(); |
||||
store.optimize().unwrap(); |
||||
} |
||||
|
||||
fn store_query_and_update(c: &mut Criterion) { |
||||
let mut data = Vec::new(); |
||||
read_data("explore-1000.nt.zst") |
||||
.read_to_end(&mut data) |
||||
.unwrap(); |
||||
|
||||
let operations = read_data("mix-exploreAndUpdate-1000.tsv.zst") |
||||
.lines() |
||||
.map(|l| { |
||||
let l = l.unwrap(); |
||||
let mut parts = l.trim().split('\t'); |
||||
let kind = parts.next().unwrap(); |
||||
let operation = parts.next().unwrap(); |
||||
match kind { |
||||
"query" => Operation::Query(Query::parse(operation, None).unwrap()), |
||||
"update" => Operation::Update(Update::parse(operation, None).unwrap()), |
||||
_ => panic!("Unexpected operation kind {kind}"), |
||||
} |
||||
}) |
||||
.collect::<Vec<_>>(); |
||||
let query_operations = operations |
||||
.iter() |
||||
.filter(|o| matches!(o, Operation::Query(_))) |
||||
.cloned() |
||||
.collect::<Vec<_>>(); |
||||
|
||||
let mut group = c.benchmark_group("store operations"); |
||||
group.throughput(Throughput::Elements(operations.len() as u64)); |
||||
group.sample_size(10); |
||||
|
||||
{ |
||||
let memory_store = Store::new().unwrap(); |
||||
do_bulk_load(&memory_store, &data); |
||||
group.bench_function("BSBM explore 1000 query in memory", |b| { |
||||
b.iter(|| run_operation(&memory_store, &query_operations)) |
||||
}); |
||||
group.bench_function("BSBM explore 1000 queryAndUpdate in memory", |b| { |
||||
b.iter(|| run_operation(&memory_store, &operations)) |
||||
}); |
||||
} |
||||
|
||||
{ |
||||
let path = TempDir::default(); |
||||
let disk_store = Store::open(&path).unwrap(); |
||||
do_bulk_load(&disk_store, &data); |
||||
group.bench_function("BSBM explore 1000 query on disk", |b| { |
||||
b.iter(|| run_operation(&disk_store, &query_operations)) |
||||
}); |
||||
group.bench_function("BSBM explore 1000 queryAndUpdate on disk", |b| { |
||||
b.iter(|| run_operation(&disk_store, &operations)) |
||||
}); |
||||
} |
||||
} |
||||
|
||||
fn run_operation(store: &Store, operations: &[Operation]) { |
||||
for operation in operations { |
||||
match operation { |
||||
Operation::Query(q) => match store.query(q.clone()).unwrap() { |
||||
QueryResults::Boolean(_) => (), |
||||
QueryResults::Solutions(s) => { |
||||
for s in s { |
||||
s.unwrap(); |
||||
} |
||||
} |
||||
QueryResults::Graph(g) => { |
||||
for t in g { |
||||
t.unwrap(); |
||||
} |
||||
} |
||||
}, |
||||
Operation::Update(u) => store.update(u.clone()).unwrap(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
fn sparql_parsing(c: &mut Criterion) { |
||||
let mut data = Vec::new(); |
||||
read_data("explore-1000.nt.zst") |
||||
.read_to_end(&mut data) |
||||
.unwrap(); |
||||
|
||||
let operations = read_data("mix-exploreAndUpdate-1000.tsv.zst") |
||||
.lines() |
||||
.map(|l| { |
||||
let l = l.unwrap(); |
||||
let mut parts = l.trim().split('\t'); |
||||
let kind = parts.next().unwrap(); |
||||
let operation = parts.next().unwrap(); |
||||
match kind { |
||||
"query" => RawOperation::Query(operation.to_owned()), |
||||
"update" => RawOperation::Update(operation.to_owned()), |
||||
_ => panic!("Unexpected operation kind {kind}"), |
||||
} |
||||
}) |
||||
.collect::<Vec<_>>(); |
||||
|
||||
let mut group = c.benchmark_group("sparql parsing"); |
||||
group.sample_size(10); |
||||
group.throughput(Throughput::Bytes( |
||||
operations |
||||
.iter() |
||||
.map(|o| match o { |
||||
RawOperation::Query(q) => q.len(), |
||||
RawOperation::Update(u) => u.len(), |
||||
}) |
||||
.sum::<usize>() as u64, |
||||
)); |
||||
group.bench_function("BSBM query and update set", |b| { |
||||
b.iter(|| { |
||||
for operation in &operations { |
||||
match operation { |
||||
RawOperation::Query(q) => { |
||||
Query::parse(q, None).unwrap(); |
||||
} |
||||
RawOperation::Update(u) => { |
||||
Update::parse(u, None).unwrap(); |
||||
} |
||||
} |
||||
} |
||||
}) |
||||
}); |
||||
} |
||||
|
||||
criterion_group!(store, sparql_parsing, store_query_and_update, store_load); |
||||
|
||||
criterion_main!(store); |
||||
|
||||
fn read_data(file: &str) -> impl BufRead { |
||||
if !Path::new(file).exists() { |
||||
let mut client = oxhttp::Client::new(); |
||||
client.set_redirection_limit(5); |
||||
let url = format!("https://github.com/Tpt/bsbm-tools/releases/download/v0.2/{file}"); |
||||
let request = Request::builder(Method::GET, url.parse().unwrap()).build(); |
||||
let response = client.request(request).unwrap(); |
||||
assert_eq!( |
||||
response.status(), |
||||
Status::OK, |
||||
"{}", |
||||
response.into_body().to_string().unwrap() |
||||
); |
||||
std::io::copy(&mut response.into_body(), &mut File::create(file).unwrap()).unwrap(); |
||||
} |
||||
BufReader::new(zstd::Decoder::new(File::open(file).unwrap()).unwrap()) |
||||
} |
||||
|
||||
#[derive(Clone)] |
||||
enum RawOperation { |
||||
Query(String), |
||||
Update(String), |
||||
} |
||||
|
||||
#[allow(clippy::large_enum_variant)] |
||||
#[derive(Clone)] |
||||
enum Operation { |
||||
Query(Query), |
||||
Update(Update), |
||||
} |
||||
|
||||
struct TempDir(PathBuf); |
||||
|
||||
impl Default for TempDir { |
||||
fn default() -> Self { |
||||
Self(temp_dir().join(format!("oxigraph-bench-{}", random::<u128>()))) |
||||
} |
||||
} |
||||
|
||||
impl AsRef<Path> for TempDir { |
||||
fn as_ref(&self) -> &Path { |
||||
&self.0 |
||||
} |
||||
} |
||||
|
||||
impl Drop for TempDir { |
||||
fn drop(&mut self) { |
||||
remove_dir_all(&self.0).unwrap() |
||||
} |
||||
} |
@ -1,59 +0,0 @@ |
||||
[package] |
||||
name = "oxigraph" |
||||
version.workspace = true |
||||
authors.workspace = true |
||||
license.workspace = true |
||||
readme = "README.md" |
||||
keywords = ["RDF", "SPARQL", "graph-database", "database"] |
||||
categories = ["database-implementations"] |
||||
repository = "https://github.com/oxigraph/oxigraph/tree/main/lib/oxigraph" |
||||
homepage = "https://oxigraph.org/" |
||||
documentation = "https://docs.rs/oxigraph" |
||||
description = """ |
||||
a SPARQL database and RDF toolkit |
||||
""" |
||||
edition.workspace = true |
||||
rust-version.workspace = true |
||||
|
||||
[features] |
||||
js = ["getrandom/js", "oxsdatatypes/js", "js-sys"] |
||||
|
||||
|
||||
[dependencies] |
||||
digest.workspace = true |
||||
hex.workspace = true |
||||
json-event-parser.workspace = true |
||||
md-5.workspace = true |
||||
oxilangtag.workspace = true |
||||
oxiri.workspace = true |
||||
oxrdf = { workspace = true, features = ["rdf-star", "oxsdatatypes"] } |
||||
oxrdfio = { workspace = true, features = ["rdf-star"] } |
||||
oxsdatatypes.workspace = true |
||||
rand.workspace = true |
||||
regex.workspace = true |
||||
sha1.workspace = true |
||||
sha2.workspace = true |
||||
siphasher.workspace = true |
||||
sparesults = { workspace = true, features = ["rdf-star"] } |
||||
spargebra = { workspace = true, features = ["rdf-star", "sep-0002", "sep-0006"] } |
||||
sparopt = { workspace = true, features = ["rdf-star", "sep-0002", "sep-0006"] } |
||||
thiserror.workspace = true |
||||
|
||||
[target.'cfg(not(target_family = "wasm"))'.dependencies] |
||||
libc = "0.2" |
||||
rocksdb.workspace = true |
||||
|
||||
[target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies] |
||||
getrandom.workspace = true |
||||
js-sys = { workspace = true, optional = true } |
||||
|
||||
[target.'cfg(not(target_family = "wasm"))'.dev-dependencies] |
||||
codspeed-criterion-compat.workspace = true |
||||
zstd.workspace = true |
||||
|
||||
[lints] |
||||
workspace = true |
||||
|
||||
[package.metadata.docs.rs] |
||||
rustdoc-args = ["--cfg", "docsrs"] |
||||
|
@ -1,82 +0,0 @@ |
||||
Oxigraph |
||||
======== |
||||
|
||||
[![Latest Version](https://img.shields.io/crates/v/oxigraph.svg)](https://crates.io/crates/oxigraph) |
||||
[![Released API docs](https://docs.rs/oxigraph/badge.svg)](https://docs.rs/oxigraph) |
||||
[![Crates.io downloads](https://img.shields.io/crates/d/oxigraph)](https://crates.io/crates/oxigraph) |
||||
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) |
||||
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community) |
||||
|
||||
Oxigraph is a graph database library implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard. |
||||
|
||||
Its goal is to provide a compliant, safe and fast on-disk graph database. |
||||
It also provides a set of utility functions for reading, writing, and processing RDF files. |
||||
|
||||
Oxigraph is in heavy development and SPARQL query evaluation has not been optimized yet. |
||||
|
||||
Oxigraph also provides [a CLI tool](https://crates.io/crates/oxigraph-cli) and [a Python library](https://pyoxigraph.readthedocs.io/) based on this library. |
||||
|
||||
|
||||
Oxigraph implements the following specifications: |
||||
* [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/), [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/), and [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/). |
||||
* [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/), and [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) RDF serialization formats for both data ingestion and retrieval. |
||||
* [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/). |
||||
|
||||
A preliminary benchmark [is provided](../bench/README.md). Oxigraph internal design [is described on the wiki](https://github.com/oxigraph/oxigraph/wiki/Architecture). |
||||
|
||||
The main entry point of Oxigraph is the [`Store`](store::Store) struct: |
||||
```rust |
||||
use oxigraph::store::Store; |
||||
use oxigraph::model::*; |
||||
use oxigraph::sparql::QueryResults; |
||||
|
||||
let store = Store::new().unwrap(); |
||||
|
||||
// insertion |
||||
let ex = NamedNode::new("http://example.com").unwrap(); |
||||
let quad = Quad::new(ex.clone(), ex.clone(), ex.clone(), GraphName::DefaultGraph); |
||||
store.insert(&quad).unwrap(); |
||||
|
||||
// quad filter |
||||
let results = store.quads_for_pattern(Some(ex.as_ref().into()), None, None, None).collect::<Result<Vec<Quad>,_>>().unwrap(); |
||||
assert_eq!(vec![quad], results); |
||||
|
||||
// SPARQL query |
||||
if let QueryResults::Solutions(mut solutions) = store.query("SELECT ?s WHERE { ?s ?p ?o }").unwrap() { |
||||
assert_eq!(solutions.next().unwrap().unwrap().get("s"), Some(&ex.into())); |
||||
} |
||||
``` |
||||
|
||||
It is based on these crates that can be used separately: |
||||
* [`oxrdf`](https://crates.io/crates/oxrdf), datastructures encoding RDF basic concepts (the [`oxigraph::model`](crate::model) module). |
||||
* [`oxrdfio`](https://crates.io/crates/oxrdfio), a unified parser and serializer API for RDF formats (the [`oxigraph::io`](crate::io) module). It itself relies on: |
||||
* [`oxttl`](https://crates.io/crates/oxttl), N-Triple, N-Quad, Turtle, TriG and N3 parsing and serialization. |
||||
* [`oxrdfxml`](https://crates.io/crates/oxrdfxml), RDF/XML parsing and serialization. |
||||
* [`spargebra`](https://crates.io/crates/spargebra), a SPARQL parser. |
||||
* [`sparesults`](https://crates.io/crates/sparesults), parsers and serializers for SPARQL result formats (the [`oxigraph::sparql::results`](crate::sparql::results) module). |
||||
* [`sparopt`](https://crates.io/crates/sparesults), a SPARQL optimizer. |
||||
* [`oxsdatatypes`](https://crates.io/crates/oxsdatatypes), an implementation of some XML Schema datatypes. |
||||
|
||||
To build the library locally, don't forget to clone the submodules using `git clone --recursive https://github.com/oxigraph/oxigraph.git` to clone the repository including submodules or `git submodule update --init` to add submodules to the already cloned repository. |
||||
|
||||
It is possible to disable the RocksDB storage backend to only use the in-memory fallback by disabling the `rocksdb` default feature: |
||||
```toml |
||||
oxigraph = { version = "*", default-features = false } |
||||
``` |
||||
This is the default behavior when compiling Oxigraph to WASM. |
||||
|
||||
## License |
||||
|
||||
This project is licensed under either of |
||||
|
||||
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or |
||||
`<http://www.apache.org/licenses/LICENSE-2.0>`) |
||||
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or |
||||
`<http://opensource.org/licenses/MIT>`) |
||||
|
||||
at your option. |
||||
|
||||
|
||||
### Contribution |
||||
|
||||
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. |
@ -1,39 +0,0 @@ |
||||
//! Utilities to read and write RDF graphs and datasets using [OxRDF I/O](https://crates.io/crates/oxrdfio).
|
||||
//!
|
||||
//! The entry points of this module are the two [`RdfParser`] and [`RdfSerializer`] structs.
|
||||
//!
|
||||
//! Usage example converting a Turtle file to a N-Triples file:
|
||||
//! ```
|
||||
//! use oxigraph::io::{RdfFormat, RdfParser, RdfSerializer};
|
||||
//!
|
||||
//! let turtle_file = b"@base <http://example.com/> .
|
||||
//! @prefix schema: <http://schema.org/> .
|
||||
//! <foo> a schema:Person ;
|
||||
//! schema:name \"Foo\" .
|
||||
//! <bar> a schema:Person ;
|
||||
//! schema:name \"Bar\" .";
|
||||
//!
|
||||
//! let ntriples_file = b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||
//! <http://example.com/foo> <http://schema.org/name> \"Foo\" .
|
||||
//! <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||
//! <http://example.com/bar> <http://schema.org/name> \"Bar\" .
|
||||
//! ";
|
||||
//!
|
||||
//! let mut writer = RdfSerializer::from_format(RdfFormat::NTriples).serialize_to_write(Vec::new());
|
||||
//! for quad in RdfParser::from_format(RdfFormat::Turtle).parse_read(turtle_file.as_ref()) {
|
||||
//! writer.write_quad(&quad.unwrap()).unwrap();
|
||||
//! }
|
||||
//! assert_eq!(writer.finish().unwrap(), ntriples_file);
|
||||
//! ```
|
||||
|
||||
mod format; |
||||
pub mod read; |
||||
pub mod write; |
||||
|
||||
#[allow(deprecated)] |
||||
pub use self::format::{DatasetFormat, GraphFormat}; |
||||
#[allow(deprecated)] |
||||
pub use self::read::{DatasetParser, GraphParser}; |
||||
#[allow(deprecated)] |
||||
pub use self::write::{DatasetSerializer, GraphSerializer}; |
||||
pub use oxrdfio::*; |
@ -1,199 +0,0 @@ |
||||
#![allow(deprecated)] |
||||
|
||||
//! Utilities to read RDF graphs and datasets.
|
||||
|
||||
use crate::io::{DatasetFormat, GraphFormat}; |
||||
use crate::model::*; |
||||
use oxrdfio::{FromReadQuadReader, RdfParseError, RdfParser}; |
||||
use std::io::Read; |
||||
|
||||
/// Parsers for RDF graph serialization formats.
|
||||
///
|
||||
/// It currently supports the following formats:
|
||||
/// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`GraphFormat::NTriples`])
|
||||
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`GraphFormat::Turtle`])
|
||||
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`GraphFormat::RdfXml`])
|
||||
///
|
||||
/// ```
|
||||
/// use oxigraph::io::{GraphFormat, GraphParser};
|
||||
///
|
||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
||||
///
|
||||
/// let parser = GraphParser::from_format(GraphFormat::NTriples);
|
||||
/// let triples = parser
|
||||
/// .read_triples(file.as_bytes())
|
||||
/// .collect::<Result<Vec<_>, _>>()?;
|
||||
///
|
||||
/// assert_eq!(triples.len(), 1);
|
||||
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
|
||||
/// # std::io::Result::Ok(())
|
||||
/// ```
|
||||
#[deprecated(note = "use RdfParser instead", since = "0.4.0")] |
||||
pub struct GraphParser { |
||||
inner: RdfParser, |
||||
} |
||||
|
||||
impl GraphParser { |
||||
/// Builds a parser for the given format.
|
||||
#[inline] |
||||
pub fn from_format(format: GraphFormat) -> Self { |
||||
Self { |
||||
inner: RdfParser::from_format(format.into()) |
||||
.without_named_graphs() |
||||
.rename_blank_nodes(), |
||||
} |
||||
} |
||||
|
||||
/// Provides an IRI that could be used to resolve the file relative IRIs.
|
||||
///
|
||||
/// ```
|
||||
/// use oxigraph::io::{GraphFormat, GraphParser};
|
||||
///
|
||||
/// let file = "</s> </p> </o> .";
|
||||
///
|
||||
/// let parser =
|
||||
/// GraphParser::from_format(GraphFormat::Turtle).with_base_iri("http://example.com")?;
|
||||
/// let triples = parser
|
||||
/// .read_triples(file.as_bytes())
|
||||
/// .collect::<Result<Vec<_>, _>>()?;
|
||||
///
|
||||
/// assert_eq!(triples.len(), 1);
|
||||
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn with_base_iri(self, base_iri: impl Into<String>) -> Result<Self, IriParseError> { |
||||
Ok(Self { |
||||
inner: self.inner.with_base_iri(base_iri)?, |
||||
}) |
||||
} |
||||
|
||||
/// Executes the parsing itself on a [`Read`] implementation and returns an iterator of triples.
|
||||
pub fn read_triples<R: Read>(self, reader: R) -> TripleReader<R> { |
||||
TripleReader { |
||||
parser: self.inner.parse_read(reader), |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// An iterator yielding read triples.
|
||||
/// Could be built using a [`GraphParser`].
|
||||
///
|
||||
/// ```
|
||||
/// use oxigraph::io::{GraphFormat, GraphParser};
|
||||
///
|
||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
||||
///
|
||||
/// let parser = GraphParser::from_format(GraphFormat::NTriples);
|
||||
/// let triples = parser
|
||||
/// .read_triples(file.as_bytes())
|
||||
/// .collect::<Result<Vec<_>, _>>()?;
|
||||
///
|
||||
/// assert_eq!(triples.len(), 1);
|
||||
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
|
||||
/// # std::io::Result::Ok(())
|
||||
/// ```
|
||||
#[must_use] |
||||
pub struct TripleReader<R: Read> { |
||||
parser: FromReadQuadReader<R>, |
||||
} |
||||
|
||||
impl<R: Read> Iterator for TripleReader<R> { |
||||
type Item = Result<Triple, RdfParseError>; |
||||
|
||||
fn next(&mut self) -> Option<Self::Item> { |
||||
Some(self.parser.next()?.map(Into::into).map_err(Into::into)) |
||||
} |
||||
} |
||||
|
||||
/// A parser for RDF dataset serialization formats.
|
||||
///
|
||||
/// It currently supports the following formats:
|
||||
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`DatasetFormat::NQuads`])
|
||||
/// * [TriG](https://www.w3.org/TR/trig/) ([`DatasetFormat::TriG`])
|
||||
///
|
||||
/// ```
|
||||
/// use oxigraph::io::{DatasetFormat, DatasetParser};
|
||||
///
|
||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .";
|
||||
///
|
||||
/// let parser = DatasetParser::from_format(DatasetFormat::NQuads);
|
||||
/// let quads = parser.read_quads(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
|
||||
///
|
||||
/// assert_eq!(quads.len(), 1);
|
||||
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
|
||||
/// # std::io::Result::Ok(())
|
||||
/// ```
|
||||
#[deprecated(note = "use RdfParser instead", since = "0.4.0")] |
||||
pub struct DatasetParser { |
||||
inner: RdfParser, |
||||
} |
||||
|
||||
impl DatasetParser { |
||||
/// Builds a parser for the given format.
|
||||
#[inline] |
||||
pub fn from_format(format: DatasetFormat) -> Self { |
||||
Self { |
||||
inner: RdfParser::from_format(format.into()).rename_blank_nodes(), |
||||
} |
||||
} |
||||
|
||||
/// Provides an IRI that could be used to resolve the file relative IRIs.
|
||||
///
|
||||
/// ```
|
||||
/// use oxigraph::io::{DatasetFormat, DatasetParser};
|
||||
///
|
||||
/// let file = "<g> { </s> </p> </o> }";
|
||||
///
|
||||
/// let parser =
|
||||
/// DatasetParser::from_format(DatasetFormat::TriG).with_base_iri("http://example.com")?;
|
||||
/// let triples = parser
|
||||
/// .read_quads(file.as_bytes())
|
||||
/// .collect::<Result<Vec<_>, _>>()?;
|
||||
///
|
||||
/// assert_eq!(triples.len(), 1);
|
||||
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn with_base_iri(self, base_iri: impl Into<String>) -> Result<Self, IriParseError> { |
||||
Ok(Self { |
||||
inner: self.inner.with_base_iri(base_iri)?, |
||||
}) |
||||
} |
||||
|
||||
/// Executes the parsing itself on a [`Read`] implementation and returns an iterator of quads.
|
||||
pub fn read_quads<R: Read>(self, reader: R) -> QuadReader<R> { |
||||
QuadReader { |
||||
parser: self.inner.parse_read(reader), |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// An iterator yielding read quads.
|
||||
/// Could be built using a [`DatasetParser`].
|
||||
///
|
||||
/// ```
|
||||
/// use oxigraph::io::{DatasetFormat, DatasetParser};
|
||||
///
|
||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .";
|
||||
///
|
||||
/// let parser = DatasetParser::from_format(DatasetFormat::NQuads);
|
||||
/// let quads = parser.read_quads(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
|
||||
///
|
||||
/// assert_eq!(quads.len(), 1);
|
||||
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
|
||||
/// # std::io::Result::Ok(())
|
||||
/// ```
|
||||
#[must_use] |
||||
pub struct QuadReader<R: Read> { |
||||
parser: FromReadQuadReader<R>, |
||||
} |
||||
|
||||
impl<R: Read> Iterator for QuadReader<R> { |
||||
type Item = Result<Quad, RdfParseError>; |
||||
|
||||
fn next(&mut self) -> Option<Self::Item> { |
||||
Some(self.parser.next()?.map_err(Into::into)) |
||||
} |
||||
} |
@ -1,185 +0,0 @@ |
||||
#![allow(deprecated)] |
||||
|
||||
//! Utilities to write RDF graphs and datasets.
|
||||
|
||||
use crate::io::{DatasetFormat, GraphFormat}; |
||||
use crate::model::*; |
||||
use oxrdfio::{RdfSerializer, ToWriteQuadWriter}; |
||||
use std::io::{self, Write}; |
||||
|
||||
/// A serializer for RDF graph serialization formats.
|
||||
///
|
||||
/// It currently supports the following formats:
|
||||
/// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`GraphFormat::NTriples`])
|
||||
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`GraphFormat::Turtle`])
|
||||
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`GraphFormat::RdfXml`])
|
||||
///
|
||||
/// ```
|
||||
/// use oxigraph::io::{GraphFormat, GraphSerializer};
|
||||
/// use oxigraph::model::*;
|
||||
///
|
||||
/// let mut buffer = Vec::new();
|
||||
/// let mut writer = GraphSerializer::from_format(GraphFormat::NTriples).triple_writer(&mut buffer);
|
||||
/// writer.write(&Triple {
|
||||
/// subject: NamedNode::new("http://example.com/s")?.into(),
|
||||
/// predicate: NamedNode::new("http://example.com/p")?,
|
||||
/// object: NamedNode::new("http://example.com/o")?.into(),
|
||||
/// })?;
|
||||
/// writer.finish()?;
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// buffer.as_slice(),
|
||||
/// "<http://example.com/s> <http://example.com/p> <http://example.com/o> .\n".as_bytes()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[deprecated(note = "use RdfSerializer instead", since = "0.4.0")] |
||||
pub struct GraphSerializer { |
||||
inner: RdfSerializer, |
||||
} |
||||
|
||||
impl GraphSerializer { |
||||
/// Builds a serializer for the given format
|
||||
#[inline] |
||||
pub fn from_format(format: GraphFormat) -> Self { |
||||
Self { |
||||
inner: RdfSerializer::from_format(format.into()), |
||||
} |
||||
} |
||||
|
||||
/// Returns a [`TripleWriter`] allowing writing triples into the given [`Write`] implementation
|
||||
pub fn triple_writer<W: Write>(self, write: W) -> TripleWriter<W> { |
||||
TripleWriter { |
||||
writer: self.inner.serialize_to_write(write), |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// Allows writing triples.
|
||||
/// Could be built using a [`GraphSerializer`].
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// Do not forget to run the [`finish`](TripleWriter::finish()) method to properly write the last bytes of the file.</div>
|
||||
///
|
||||
/// ```
|
||||
/// use oxigraph::io::{GraphFormat, GraphSerializer};
|
||||
/// use oxigraph::model::*;
|
||||
///
|
||||
/// let mut buffer = Vec::new();
|
||||
/// let mut writer = GraphSerializer::from_format(GraphFormat::NTriples).triple_writer(&mut buffer);
|
||||
/// writer.write(&Triple {
|
||||
/// subject: NamedNode::new("http://example.com/s")?.into(),
|
||||
/// predicate: NamedNode::new("http://example.com/p")?,
|
||||
/// object: NamedNode::new("http://example.com/o")?.into(),
|
||||
/// })?;
|
||||
/// writer.finish()?;
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// buffer.as_slice(),
|
||||
/// "<http://example.com/s> <http://example.com/p> <http://example.com/o> .\n".as_bytes()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[must_use] |
||||
pub struct TripleWriter<W: Write> { |
||||
writer: ToWriteQuadWriter<W>, |
||||
} |
||||
|
||||
impl<W: Write> TripleWriter<W> { |
||||
/// Writes a triple
|
||||
pub fn write<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> io::Result<()> { |
||||
self.writer.write_triple(triple) |
||||
} |
||||
|
||||
/// Writes the last bytes of the file
|
||||
pub fn finish(self) -> io::Result<()> { |
||||
self.writer.finish()?.flush() |
||||
} |
||||
} |
||||
|
||||
/// A serializer for RDF graph serialization formats.
|
||||
///
|
||||
/// It currently supports the following formats:
|
||||
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`DatasetFormat::NQuads`])
|
||||
/// * [TriG](https://www.w3.org/TR/trig/) ([`DatasetFormat::TriG`])
|
||||
///
|
||||
/// ```
|
||||
/// use oxigraph::io::{DatasetFormat, DatasetSerializer};
|
||||
/// use oxigraph::model::*;
|
||||
///
|
||||
/// let mut buffer = Vec::new();
|
||||
/// let mut writer = DatasetSerializer::from_format(DatasetFormat::NQuads).quad_writer(&mut buffer);
|
||||
/// writer.write(&Quad {
|
||||
/// subject: NamedNode::new("http://example.com/s")?.into(),
|
||||
/// predicate: NamedNode::new("http://example.com/p")?,
|
||||
/// object: NamedNode::new("http://example.com/o")?.into(),
|
||||
/// graph_name: NamedNode::new("http://example.com/g")?.into(),
|
||||
/// })?;
|
||||
/// writer.finish()?;
|
||||
///
|
||||
/// assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n".as_bytes());
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[deprecated(note = "use RdfSerializer instead", since = "0.4.0")] |
||||
pub struct DatasetSerializer { |
||||
inner: RdfSerializer, |
||||
} |
||||
|
||||
impl DatasetSerializer { |
||||
/// Builds a serializer for the given format
|
||||
#[inline] |
||||
pub fn from_format(format: DatasetFormat) -> Self { |
||||
Self { |
||||
inner: RdfSerializer::from_format(format.into()), |
||||
} |
||||
} |
||||
|
||||
/// Returns a [`QuadWriter`] allowing writing triples into the given [`Write`] implementation
|
||||
pub fn quad_writer<W: Write>(self, write: W) -> QuadWriter<W> { |
||||
QuadWriter { |
||||
writer: self.inner.serialize_to_write(write), |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// Allows writing triples.
|
||||
/// Could be built using a [`DatasetSerializer`].
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// Do not forget to run the [`finish`](QuadWriter::finish()) method to properly write the last bytes of the file.</div>
|
||||
///
|
||||
/// ```
|
||||
/// use oxigraph::io::{DatasetFormat, DatasetSerializer};
|
||||
/// use oxigraph::model::*;
|
||||
///
|
||||
/// let mut buffer = Vec::new();
|
||||
/// let mut writer = DatasetSerializer::from_format(DatasetFormat::NQuads).quad_writer(&mut buffer);
|
||||
/// writer.write(&Quad {
|
||||
/// subject: NamedNode::new("http://example.com/s")?.into(),
|
||||
/// predicate: NamedNode::new("http://example.com/p")?,
|
||||
/// object: NamedNode::new("http://example.com/o")?.into(),
|
||||
/// graph_name: NamedNode::new("http://example.com/g")?.into(),
|
||||
/// })?;
|
||||
/// writer.finish()?;
|
||||
///
|
||||
/// assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n".as_bytes());
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[must_use] |
||||
pub struct QuadWriter<W: Write> { |
||||
writer: ToWriteQuadWriter<W>, |
||||
} |
||||
|
||||
impl<W: Write> QuadWriter<W> { |
||||
/// Writes a quad
|
||||
pub fn write<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> io::Result<()> { |
||||
self.writer.write_quad(quad) |
||||
} |
||||
|
||||
/// Writes the last bytes of the file
|
||||
pub fn finish(self) -> io::Result<()> { |
||||
self.writer.finish()?.flush() |
||||
} |
||||
} |
@ -1,12 +0,0 @@ |
||||
#![doc = include_str!("../README.md")] |
||||
#![doc(test(attr(deny(warnings))))] |
||||
#![doc(test(attr(allow(deprecated))))] |
||||
#![cfg_attr(docsrs, feature(doc_auto_cfg))] |
||||
#![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] |
||||
#![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] |
||||
|
||||
pub mod io; |
||||
pub mod model; |
||||
pub mod sparql; |
||||
mod storage; |
||||
pub mod store; |
@ -1,22 +0,0 @@ |
||||
//! Implements data structures for [RDF 1.1 Concepts](https://www.w3.org/TR/rdf11-concepts/) using [OxRDF](https://crates.io/crates/oxrdf).
|
||||
//!
|
||||
//! Usage example:
|
||||
//!
|
||||
//! ```
|
||||
//! use oxigraph::model::*;
|
||||
//!
|
||||
//! let mut graph = Graph::default();
|
||||
//!
|
||||
//! // insertion
|
||||
//! let ex = NamedNodeRef::new("http://example.com").unwrap();
|
||||
//! let triple = TripleRef::new(ex, ex, ex);
|
||||
//! graph.insert(triple);
|
||||
//!
|
||||
//! // simple filter
|
||||
//! let results: Vec<_> = graph.triples_for_subject(ex).collect();
|
||||
//! assert_eq!(vec![triple], results);
|
||||
//! ```
|
||||
|
||||
pub use oxrdf::*; |
||||
|
||||
pub use spargebra::term::GroundQuad; |
@ -1,84 +0,0 @@ |
||||
use crate::io::RdfParseError; |
||||
use crate::model::NamedNode; |
||||
use crate::sparql::results::QueryResultsParseError as ResultsParseError; |
||||
use crate::sparql::SparqlSyntaxError; |
||||
use crate::storage::StorageError; |
||||
use std::convert::Infallible; |
||||
use std::error::Error; |
||||
use std::io; |
||||
|
||||
/// A SPARQL evaluation error.
|
||||
#[derive(Debug, thiserror::Error)] |
||||
#[non_exhaustive] |
||||
pub enum EvaluationError { |
||||
/// An error in SPARQL parsing.
|
||||
#[error(transparent)] |
||||
Parsing(#[from] SparqlSyntaxError), |
||||
/// An error from the storage.
|
||||
#[error(transparent)] |
||||
Storage(#[from] StorageError), |
||||
/// An error while parsing an external RDF file.
|
||||
#[error(transparent)] |
||||
GraphParsing(#[from] RdfParseError), |
||||
/// An error while parsing an external result file (likely from a federated query).
|
||||
#[error(transparent)] |
||||
ResultsParsing(#[from] ResultsParseError), |
||||
/// An error returned during results serialization.
|
||||
#[error(transparent)] |
||||
ResultsSerialization(#[from] io::Error), |
||||
/// Error during `SERVICE` evaluation
|
||||
#[error("{0}")] |
||||
Service(#[source] Box<dyn Error + Send + Sync + 'static>), |
||||
/// Error when `CREATE` tries to create an already existing graph
|
||||
#[error("The graph {0} already exists")] |
||||
GraphAlreadyExists(NamedNode), |
||||
/// Error when `DROP` or `CLEAR` tries to remove a not existing graph
|
||||
#[error("The graph {0} does not exist")] |
||||
GraphDoesNotExist(NamedNode), |
||||
/// The variable storing the `SERVICE` name is unbound
|
||||
#[error("The variable encoding the service name is unbound")] |
||||
UnboundService, |
||||
/// The given `SERVICE` is not supported
|
||||
#[error("The service {0} is not supported")] |
||||
UnsupportedService(NamedNode), |
||||
/// The given content media type returned from an HTTP response is not supported (`SERVICE` and `LOAD`)
|
||||
#[error("The content media type {0} is not supported")] |
||||
UnsupportedContentType(String), |
||||
/// The `SERVICE` call has not returns solutions
|
||||
#[error("The service is not returning solutions but a boolean or a graph")] |
||||
ServiceDoesNotReturnSolutions, |
||||
/// The results are not a RDF graph
|
||||
#[error("The query results are not a RDF graph")] |
||||
NotAGraph, |
||||
} |
||||
|
||||
impl From<Infallible> for EvaluationError { |
||||
#[inline] |
||||
fn from(error: Infallible) -> Self { |
||||
match error {} |
||||
} |
||||
} |
||||
|
||||
impl From<EvaluationError> for io::Error { |
||||
#[inline] |
||||
fn from(error: EvaluationError) -> Self { |
||||
match error { |
||||
EvaluationError::Parsing(error) => Self::new(io::ErrorKind::InvalidData, error), |
||||
EvaluationError::GraphParsing(error) => error.into(), |
||||
EvaluationError::ResultsParsing(error) => error.into(), |
||||
EvaluationError::ResultsSerialization(error) => error, |
||||
EvaluationError::Storage(error) => error.into(), |
||||
EvaluationError::Service(error) => match error.downcast() { |
||||
Ok(error) => *error, |
||||
Err(error) => Self::new(io::ErrorKind::Other, error), |
||||
}, |
||||
EvaluationError::GraphAlreadyExists(_) |
||||
| EvaluationError::GraphDoesNotExist(_) |
||||
| EvaluationError::UnboundService |
||||
| EvaluationError::UnsupportedService(_) |
||||
| EvaluationError::UnsupportedContentType(_) |
||||
| EvaluationError::ServiceDoesNotReturnSolutions |
||||
| EvaluationError::NotAGraph => Self::new(io::ErrorKind::InvalidInput, error), |
||||
} |
||||
} |
||||
} |
@ -1,9 +0,0 @@ |
||||
#[cfg(not(feature = "http-client"))] |
||||
mod dummy; |
||||
#[cfg(feature = "http-client")] |
||||
mod simple; |
||||
|
||||
#[cfg(not(feature = "http-client"))] |
||||
pub use dummy::Client; |
||||
#[cfg(feature = "http-client")] |
||||
pub use simple::Client; |
@ -1,371 +0,0 @@ |
||||
use crate::io::{RdfFormat, RdfSerializer}; |
||||
use crate::model::*; |
||||
use crate::sparql::error::EvaluationError; |
||||
use crate::sparql::results::{ |
||||
FromReadQueryResultsReader, FromReadSolutionsReader, QueryResultsFormat, |
||||
QueryResultsParseError, QueryResultsParser, QueryResultsSerializer, |
||||
}; |
||||
pub use sparesults::QuerySolution; |
||||
use std::io::{Read, Write}; |
||||
use std::sync::Arc; |
||||
|
||||
/// Results of a [SPARQL query](https://www.w3.org/TR/sparql11-query/).
|
||||
pub enum QueryResults { |
||||
/// Results of a [SELECT](https://www.w3.org/TR/sparql11-query/#select) query.
|
||||
Solutions(QuerySolutionIter), |
||||
/// Result of a [ASK](https://www.w3.org/TR/sparql11-query/#ask) query.
|
||||
Boolean(bool), |
||||
/// Results of a [CONSTRUCT](https://www.w3.org/TR/sparql11-query/#construct) or [DESCRIBE](https://www.w3.org/TR/sparql11-query/#describe) query.
|
||||
Graph(QueryTripleIter), |
||||
} |
||||
|
||||
impl QueryResults { |
||||
/// Reads a SPARQL query results serialization.
|
||||
pub fn read( |
||||
read: impl Read + 'static, |
||||
format: QueryResultsFormat, |
||||
) -> Result<Self, QueryResultsParseError> { |
||||
Ok(QueryResultsParser::from_format(format) |
||||
.parse_read(read)? |
||||
.into()) |
||||
} |
||||
|
||||
/// Writes the query results (solutions or boolean).
|
||||
///
|
||||
/// This method fails if it is called on the `Graph` results.
|
||||
///
|
||||
/// ```
|
||||
/// use oxigraph::store::Store;
|
||||
/// use oxigraph::model::*;
|
||||
/// use oxigraph::sparql::results::QueryResultsFormat;
|
||||
///
|
||||
/// let store = Store::new()?;
|
||||
/// let ex = NamedNodeRef::new("http://example.com")?;
|
||||
/// store.insert(QuadRef::new(ex, ex, ex, GraphNameRef::DefaultGraph))?;
|
||||
///
|
||||
/// let results = store.query("SELECT ?s WHERE { ?s ?p ?o }")?;
|
||||
/// assert_eq!(
|
||||
/// results.write(Vec::new(), QueryResultsFormat::Json)?,
|
||||
/// r#"{"head":{"vars":["s"]},"results":{"bindings":[{"s":{"type":"uri","value":"http://example.com"}}]}}"#.as_bytes()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub fn write<W: Write>( |
||||
self, |
||||
write: W, |
||||
format: QueryResultsFormat, |
||||
) -> Result<W, EvaluationError> { |
||||
let serializer = QueryResultsSerializer::from_format(format); |
||||
match self { |
||||
Self::Boolean(value) => serializer.serialize_boolean_to_write(write, value), |
||||
Self::Solutions(solutions) => { |
||||
let mut writer = serializer |
||||
.serialize_solutions_to_write(write, solutions.variables().to_vec()) |
||||
.map_err(EvaluationError::ResultsSerialization)?; |
||||
for solution in solutions { |
||||
writer |
||||
.write(&solution?) |
||||
.map_err(EvaluationError::ResultsSerialization)?; |
||||
} |
||||
writer.finish() |
||||
} |
||||
Self::Graph(triples) => { |
||||
let s = VariableRef::new_unchecked("subject"); |
||||
let p = VariableRef::new_unchecked("predicate"); |
||||
let o = VariableRef::new_unchecked("object"); |
||||
let mut writer = serializer |
||||
.serialize_solutions_to_write( |
||||
write, |
||||
vec![s.into_owned(), p.into_owned(), o.into_owned()], |
||||
) |
||||
.map_err(EvaluationError::ResultsSerialization)?; |
||||
for triple in triples { |
||||
let triple = triple?; |
||||
writer |
||||
.write([ |
||||
(s, &triple.subject.into()), |
||||
(p, &triple.predicate.into()), |
||||
(o, &triple.object), |
||||
]) |
||||
.map_err(EvaluationError::ResultsSerialization)?; |
||||
} |
||||
writer.finish() |
||||
} |
||||
} |
||||
.map_err(EvaluationError::ResultsSerialization) |
||||
} |
||||
|
||||
/// Writes the graph query results.
|
||||
///
|
||||
/// This method fails if it is called on the `Solution` or `Boolean` results.
|
||||
///
|
||||
/// ```
|
||||
/// use oxigraph::io::RdfFormat;
|
||||
/// use oxigraph::model::*;
|
||||
/// use oxigraph::store::Store;
|
||||
///
|
||||
/// let graph = "<http://example.com> <http://example.com> <http://example.com> .\n";
|
||||
///
|
||||
/// let store = Store::new()?;
|
||||
/// store.load_graph(
|
||||
/// graph.as_bytes(),
|
||||
/// RdfFormat::NTriples,
|
||||
/// GraphName::DefaultGraph,
|
||||
/// None,
|
||||
/// )?;
|
||||
///
|
||||
/// let results = store.query("CONSTRUCT WHERE { ?s ?p ?o }")?;
|
||||
/// assert_eq!(
|
||||
/// results.write_graph(Vec::new(), RdfFormat::NTriples)?,
|
||||
/// graph.as_bytes()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub fn write_graph<W: Write>( |
||||
self, |
||||
write: W, |
||||
format: impl Into<RdfFormat>, |
||||
) -> Result<W, EvaluationError> { |
||||
if let Self::Graph(triples) = self { |
||||
let mut writer = RdfSerializer::from_format(format.into()).serialize_to_write(write); |
||||
for triple in triples { |
||||
writer |
||||
.write_triple(&triple?) |
||||
.map_err(EvaluationError::ResultsSerialization)?; |
||||
} |
||||
writer |
||||
.finish() |
||||
.map_err(EvaluationError::ResultsSerialization) |
||||
} else { |
||||
Err(EvaluationError::NotAGraph) |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<QuerySolutionIter> for QueryResults { |
||||
#[inline] |
||||
fn from(value: QuerySolutionIter) -> Self { |
||||
Self::Solutions(value) |
||||
} |
||||
} |
||||
|
||||
impl<R: Read + 'static> From<FromReadQueryResultsReader<R>> for QueryResults { |
||||
fn from(reader: FromReadQueryResultsReader<R>) -> Self { |
||||
match reader { |
||||
FromReadQueryResultsReader::Solutions(s) => Self::Solutions(s.into()), |
||||
FromReadQueryResultsReader::Boolean(v) => Self::Boolean(v), |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// An iterator over [`QuerySolution`]s.
|
||||
///
|
||||
/// ```
|
||||
/// use oxigraph::sparql::QueryResults;
|
||||
/// use oxigraph::store::Store;
|
||||
///
|
||||
/// let store = Store::new()?;
|
||||
/// if let QueryResults::Solutions(solutions) = store.query("SELECT ?s WHERE { ?s ?p ?o }")? {
|
||||
/// for solution in solutions {
|
||||
/// println!("{:?}", solution?.get("s"));
|
||||
/// }
|
||||
/// }
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub struct QuerySolutionIter { |
||||
variables: Arc<[Variable]>, |
||||
iter: Box<dyn Iterator<Item = Result<QuerySolution, EvaluationError>>>, |
||||
} |
||||
|
||||
impl QuerySolutionIter { |
||||
/// Construct a new iterator of solution from an ordered list of solution variables and an iterator of solution tuples
|
||||
/// (each tuple using the same ordering as the variable list such that tuple element 0 is the value for the variable 0...)
|
||||
pub fn new( |
||||
variables: Arc<[Variable]>, |
||||
iter: impl Iterator<Item = Result<Vec<Option<Term>>, EvaluationError>> + 'static, |
||||
) -> Self { |
||||
Self { |
||||
variables: Arc::clone(&variables), |
||||
iter: Box::new( |
||||
iter.map(move |t| t.map(|values| (Arc::clone(&variables), values).into())), |
||||
), |
||||
} |
||||
} |
||||
|
||||
/// The variables used in the solutions.
|
||||
///
|
||||
/// ```
|
||||
/// use oxigraph::sparql::{QueryResults, Variable};
|
||||
/// use oxigraph::store::Store;
|
||||
///
|
||||
/// let store = Store::new()?;
|
||||
/// if let QueryResults::Solutions(solutions) = store.query("SELECT ?s ?o WHERE { ?s ?p ?o }")? {
|
||||
/// assert_eq!(
|
||||
/// solutions.variables(),
|
||||
/// &[Variable::new("s")?, Variable::new("o")?]
|
||||
/// );
|
||||
/// }
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn variables(&self) -> &[Variable] { |
||||
&self.variables |
||||
} |
||||
} |
||||
|
||||
impl<R: Read + 'static> From<FromReadSolutionsReader<R>> for QuerySolutionIter { |
||||
fn from(reader: FromReadSolutionsReader<R>) -> Self { |
||||
Self { |
||||
variables: reader.variables().into(), |
||||
iter: Box::new(reader.map(|t| t.map_err(EvaluationError::from))), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl Iterator for QuerySolutionIter { |
||||
type Item = Result<QuerySolution, EvaluationError>; |
||||
|
||||
#[inline] |
||||
fn next(&mut self) -> Option<Self::Item> { |
||||
self.iter.next() |
||||
} |
||||
|
||||
#[inline] |
||||
fn size_hint(&self) -> (usize, Option<usize>) { |
||||
self.iter.size_hint() |
||||
} |
||||
} |
||||
|
||||
/// An iterator over the triples that compose a graph solution.
|
||||
///
|
||||
/// ```
|
||||
/// use oxigraph::sparql::QueryResults;
|
||||
/// use oxigraph::store::Store;
|
||||
///
|
||||
/// let store = Store::new()?;
|
||||
/// if let QueryResults::Graph(triples) = store.query("CONSTRUCT WHERE { ?s ?p ?o }")? {
|
||||
/// for triple in triples {
|
||||
/// println!("{}", triple?);
|
||||
/// }
|
||||
/// }
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub struct QueryTripleIter { |
||||
pub(crate) iter: Box<dyn Iterator<Item = Result<Triple, EvaluationError>>>, |
||||
} |
||||
|
||||
impl Iterator for QueryTripleIter { |
||||
type Item = Result<Triple, EvaluationError>; |
||||
|
||||
#[inline] |
||||
fn next(&mut self) -> Option<Self::Item> { |
||||
self.iter.next() |
||||
} |
||||
|
||||
#[inline] |
||||
fn size_hint(&self) -> (usize, Option<usize>) { |
||||
self.iter.size_hint() |
||||
} |
||||
|
||||
#[inline] |
||||
fn fold<Acc, G>(self, init: Acc, g: G) -> Acc |
||||
where |
||||
G: FnMut(Acc, Self::Item) -> Acc, |
||||
{ |
||||
self.iter.fold(init, g) |
||||
} |
||||
} |
||||
|
||||
#[cfg(test)] |
||||
#[allow(clippy::panic_in_result_fn)] |
||||
mod tests { |
||||
use super::*; |
||||
use std::io::Cursor; |
||||
|
||||
#[test] |
||||
fn test_serialization_roundtrip() -> Result<(), EvaluationError> { |
||||
use std::str; |
||||
|
||||
for format in [ |
||||
QueryResultsFormat::Json, |
||||
QueryResultsFormat::Xml, |
||||
QueryResultsFormat::Tsv, |
||||
] { |
||||
let results = vec![ |
||||
QueryResults::Boolean(true), |
||||
QueryResults::Boolean(false), |
||||
QueryResults::Solutions(QuerySolutionIter::new( |
||||
[ |
||||
Variable::new_unchecked("foo"), |
||||
Variable::new_unchecked("bar"), |
||||
] |
||||
.as_ref() |
||||
.into(), |
||||
Box::new( |
||||
vec![ |
||||
Ok(vec![None, None]), |
||||
Ok(vec![ |
||||
Some(NamedNode::new_unchecked("http://example.com").into()), |
||||
None, |
||||
]), |
||||
Ok(vec![ |
||||
None, |
||||
Some(NamedNode::new_unchecked("http://example.com").into()), |
||||
]), |
||||
Ok(vec![ |
||||
Some(BlankNode::new_unchecked("foo").into()), |
||||
Some(BlankNode::new_unchecked("bar").into()), |
||||
]), |
||||
Ok(vec![Some(Literal::new_simple_literal("foo").into()), None]), |
||||
Ok(vec![ |
||||
Some( |
||||
Literal::new_language_tagged_literal_unchecked("foo", "fr") |
||||
.into(), |
||||
), |
||||
None, |
||||
]), |
||||
Ok(vec![ |
||||
Some(Literal::from(1).into()), |
||||
Some(Literal::from(true).into()), |
||||
]), |
||||
Ok(vec![ |
||||
Some(Literal::from(1.33).into()), |
||||
Some(Literal::from(false).into()), |
||||
]), |
||||
Ok(vec![ |
||||
Some( |
||||
Triple::new( |
||||
NamedNode::new_unchecked("http://example.com/s"), |
||||
NamedNode::new_unchecked("http://example.com/p"), |
||||
Triple::new( |
||||
NamedNode::new_unchecked("http://example.com/os"), |
||||
NamedNode::new_unchecked("http://example.com/op"), |
||||
NamedNode::new_unchecked("http://example.com/oo"), |
||||
), |
||||
) |
||||
.into(), |
||||
), |
||||
None, |
||||
]), |
||||
] |
||||
.into_iter(), |
||||
), |
||||
)), |
||||
]; |
||||
|
||||
for ex in results { |
||||
let mut buffer = Vec::new(); |
||||
ex.write(&mut buffer, format)?; |
||||
let ex2 = QueryResults::read(Cursor::new(buffer.clone()), format)?; |
||||
let mut buffer2 = Vec::new(); |
||||
ex2.write(&mut buffer2, format)?; |
||||
assert_eq!( |
||||
str::from_utf8(&buffer).unwrap(), |
||||
str::from_utf8(&buffer2).unwrap() |
||||
); |
||||
} |
||||
} |
||||
|
||||
Ok(()) |
||||
} |
||||
} |
@ -1,44 +0,0 @@ |
||||
//! Utilities to read and write RDF results formats using [sparesults](https://crates.io/crates/sparesults).
|
||||
//!
|
||||
//! It supports [SPARQL Query Results XML Format (Second Edition)](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/).
|
||||
//!
|
||||
//! Usage example converting a JSON result file into a TSV result file:
|
||||
//!
|
||||
//! ```
|
||||
//! use oxigraph::sparql::results::{QueryResultsFormat, QueryResultsParser, FromReadQueryResultsReader, QueryResultsSerializer};
|
||||
//! use std::io::Result;
|
||||
//!
|
||||
//! fn convert_json_to_tsv(json_file: &[u8]) -> Result<Vec<u8>> {
|
||||
//! let json_parser = QueryResultsParser::from_format(QueryResultsFormat::Json);
|
||||
//! let tsv_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Tsv);
|
||||
//! // We start to read the JSON file and see which kind of results it is
|
||||
//! match json_parser.parse_read(json_file)? {
|
||||
//! FromReadQueryResultsReader::Boolean(value) => {
|
||||
//! // it's a boolean result, we copy it in TSV to the output buffer
|
||||
//! tsv_serializer.serialize_boolean_to_write(Vec::new(), value)
|
||||
//! }
|
||||
//! FromReadQueryResultsReader::Solutions(solutions_reader) => {
|
||||
//! // it's a set of solutions, we create a writer and we write to it while reading in streaming from the JSON file
|
||||
//! let mut serialize_solutions_to_write = tsv_serializer.serialize_solutions_to_write(Vec::new(), solutions_reader.variables().to_vec())?;
|
||||
//! for solution in solutions_reader {
|
||||
//! serialize_solutions_to_write.write(&solution?)?;
|
||||
//! }
|
||||
//! serialize_solutions_to_write.finish()
|
||||
//! }
|
||||
//! }
|
||||
//! }
|
||||
//!
|
||||
//! // Let's test with a boolean
|
||||
//! assert_eq!(
|
||||
//! convert_json_to_tsv(br#"{"boolean":true}"#.as_slice()).unwrap(),
|
||||
//! b"true"
|
||||
//! );
|
||||
//!
|
||||
//! // And with a set of solutions
|
||||
//! assert_eq!(
|
||||
//! convert_json_to_tsv(br#"{"head":{"vars":["foo","bar"]},"results":{"bindings":[{"foo":{"type":"literal","value":"test"}}]}}"#.as_slice()).unwrap(),
|
||||
//! b"?foo\t?bar\n\"test\"\t\n"
|
||||
//! );
|
||||
//! ```
|
||||
|
||||
pub use sparesults::*; |
@ -1,12 +0,0 @@ |
||||
//! A storage backend
|
||||
//! RocksDB is available, if not in memory
|
||||
|
||||
#[cfg(any(target_family = "wasm"))] |
||||
pub use fallback::{ColumnFamily, ColumnFamilyDefinition, Db, Iter, Reader, Transaction}; |
||||
#[cfg(all(not(target_family = "wasm")))] |
||||
pub use oxi_rocksdb::{ColumnFamily, ColumnFamilyDefinition, Db, Iter, Reader, Transaction}; |
||||
|
||||
#[cfg(any(target_family = "wasm"))] |
||||
mod fallback; |
||||
#[cfg(all(not(target_family = "wasm")))] |
||||
mod oxi_rocksdb; |
@ -1,139 +0,0 @@ |
||||
use crate::io::{RdfFormat, RdfParseError}; |
||||
use crate::storage::numeric_encoder::EncodedTerm; |
||||
use oxiri::IriParseError; |
||||
use oxrdf::TermRef; |
||||
use std::error::Error; |
||||
use std::io; |
||||
|
||||
/// An error related to storage operations (reads, writes...).
|
||||
#[derive(Debug, thiserror::Error)] |
||||
#[non_exhaustive] |
||||
pub enum StorageError { |
||||
/// Error from the OS I/O layer.
|
||||
#[error(transparent)] |
||||
Io(#[from] io::Error), |
||||
/// Error related to data corruption.
|
||||
#[error(transparent)] |
||||
Corruption(#[from] CorruptionError), |
||||
#[doc(hidden)] |
||||
#[error("{0}")] |
||||
Other(#[source] Box<dyn Error + Send + Sync + 'static>), |
||||
} |
||||
|
||||
impl From<StorageError> for io::Error { |
||||
#[inline] |
||||
fn from(error: StorageError) -> Self { |
||||
match error { |
||||
StorageError::Io(error) => error, |
||||
StorageError::Corruption(error) => error.into(), |
||||
StorageError::Other(error) => Self::new(io::ErrorKind::Other, error), |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// An error return if some content in the database is corrupted.
|
||||
#[derive(Debug, thiserror::Error)] |
||||
#[error(transparent)] |
||||
pub struct CorruptionError(#[from] CorruptionErrorKind); |
||||
|
||||
/// An error return if some content in the database is corrupted.
|
||||
#[derive(Debug, thiserror::Error)] |
||||
enum CorruptionErrorKind { |
||||
#[error("{0}")] |
||||
Msg(String), |
||||
#[error("{0}")] |
||||
Other(#[source] Box<dyn Error + Send + Sync + 'static>), |
||||
} |
||||
|
||||
impl CorruptionError { |
||||
/// Builds an error from a printable error message.
|
||||
#[inline] |
||||
pub(crate) fn new(error: impl Into<Box<dyn Error + Send + Sync + 'static>>) -> Self { |
||||
Self(CorruptionErrorKind::Other(error.into())) |
||||
} |
||||
|
||||
#[inline] |
||||
pub(crate) fn from_encoded_term(encoded: &EncodedTerm, term: &TermRef<'_>) -> Self { |
||||
// TODO: eventually use a dedicated error enum value
|
||||
Self::msg(format!("Invalid term encoding {encoded:?} for {term}")) |
||||
} |
||||
|
||||
#[inline] |
||||
pub(crate) fn from_missing_column_family_name(name: &'static str) -> Self { |
||||
// TODO: eventually use a dedicated error enum value
|
||||
Self::msg(format!("Column family {name} does not exist")) |
||||
} |
||||
|
||||
/// Builds an error from a printable error message.
|
||||
#[inline] |
||||
pub(crate) fn msg(msg: impl Into<String>) -> Self { |
||||
Self(CorruptionErrorKind::Msg(msg.into())) |
||||
} |
||||
} |
||||
|
||||
impl From<CorruptionError> for io::Error { |
||||
#[inline] |
||||
fn from(error: CorruptionError) -> Self { |
||||
Self::new(io::ErrorKind::InvalidData, error) |
||||
} |
||||
} |
||||
|
||||
/// An error raised while loading a file into a [`Store`](crate::store::Store).
|
||||
#[derive(Debug, thiserror::Error)] |
||||
pub enum LoaderError { |
||||
/// An error raised while reading the file.
|
||||
#[error(transparent)] |
||||
Parsing(#[from] RdfParseError), |
||||
/// An error raised during the insertion in the store.
|
||||
#[error(transparent)] |
||||
Storage(#[from] StorageError), |
||||
/// The base IRI is invalid.
|
||||
#[error("Invalid base IRI '{iri}': {error}")] |
||||
InvalidBaseIri { |
||||
/// The IRI itself.
|
||||
iri: String, |
||||
/// The parsing error.
|
||||
#[source] |
||||
error: IriParseError, |
||||
}, |
||||
} |
||||
|
||||
impl From<LoaderError> for io::Error { |
||||
#[inline] |
||||
fn from(error: LoaderError) -> Self { |
||||
match error { |
||||
LoaderError::Storage(error) => error.into(), |
||||
LoaderError::Parsing(error) => error.into(), |
||||
LoaderError::InvalidBaseIri { .. } => { |
||||
Self::new(io::ErrorKind::InvalidInput, error.to_string()) |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// An error raised while writing a file from a [`Store`](crate::store::Store).
|
||||
#[derive(Debug, thiserror::Error)] |
||||
pub enum SerializerError { |
||||
/// An error raised while writing the content.
|
||||
#[error(transparent)] |
||||
Io(#[from] io::Error), |
||||
/// An error raised during the lookup in the store.
|
||||
#[error(transparent)] |
||||
Storage(#[from] StorageError), |
||||
/// A format compatible with [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) is required.
|
||||
#[error("A RDF format supporting datasets was expected, {0} found")] |
||||
DatasetFormatExpected(RdfFormat), |
||||
} |
||||
|
||||
impl From<SerializerError> for io::Error { |
||||
#[inline] |
||||
fn from(error: SerializerError) -> Self { |
||||
match error { |
||||
SerializerError::Storage(error) => error.into(), |
||||
SerializerError::Io(error) => error, |
||||
SerializerError::DatasetFormatExpected(_) => { |
||||
Self::new(io::ErrorKind::InvalidInput, error.to_string()) |
||||
} |
||||
} |
||||
} |
||||
} |
@ -1,36 +0,0 @@ |
||||
[package] |
||||
name = "oxrdfio" |
||||
version = "0.1.0-alpha.5" |
||||
authors.workspace = true |
||||
license.workspace = true |
||||
readme = "README.md" |
||||
keywords = ["RDF"] |
||||
repository = "https://github.com/oxigraph/oxigraph/tree/master/lib/oxrdfxml" |
||||
documentation = "https://docs.rs/oxrdfio" |
||||
description = """ |
||||
Parser and serializer for various RDF formats |
||||
""" |
||||
edition.workspace = true |
||||
rust-version.workspace = true |
||||
|
||||
[features] |
||||
default = [] |
||||
async-tokio = ["dep:tokio", "oxrdfxml/async-tokio", "oxttl/async-tokio"] |
||||
rdf-star = ["oxrdf/rdf-star", "oxttl/rdf-star"] |
||||
|
||||
[dependencies] |
||||
oxrdf.workspace = true |
||||
oxrdfxml.workspace = true |
||||
oxttl.workspace = true |
||||
thiserror.workspace = true |
||||
tokio = { workspace = true, optional = true, features = ["io-util"] } |
||||
|
||||
[dev-dependencies] |
||||
tokio = { workspace = true, features = ["rt", "macros"] } |
||||
|
||||
[lints] |
||||
workspace = true |
||||
|
||||
[package.metadata.docs.rs] |
||||
all-features = true |
||||
rustdoc-args = ["--cfg", "docsrs"] |
@ -1,67 +0,0 @@ |
||||
OxRDF I/O |
||||
========= |
||||
|
||||
[![Latest Version](https://img.shields.io/crates/v/oxrdfio.svg)](https://crates.io/crates/oxrdfio) |
||||
[![Released API docs](https://docs.rs/oxrdfio/badge.svg)](https://docs.rs/oxrdfio) |
||||
[![Crates.io downloads](https://img.shields.io/crates/d/oxrdfio)](https://crates.io/crates/oxrdfio) |
||||
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) |
||||
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community) |
||||
|
||||
OxRDF I/O is a set of parsers and serializers for RDF. |
||||
|
||||
It supports: |
||||
* [N3](https://w3c.github.io/N3/spec/) using [`oxttl`](https://crates.io/crates/oxttl) |
||||
* [N-Quads](https://www.w3.org/TR/n-quads/) using [`oxttl`](https://crates.io/crates/oxttl) |
||||
* [N-Triples](https://www.w3.org/TR/n-triples/) using [`oxttl`](https://crates.io/crates/oxttl) |
||||
* [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) using [`oxrdfxml`](https://crates.io/crates/oxrdfxml) |
||||
* [TriG](https://www.w3.org/TR/trig/) using [`oxttl`](https://crates.io/crates/oxttl) |
||||
* [Turtle](https://www.w3.org/TR/turtle/) using [`oxttl`](https://crates.io/crates/oxttl) |
||||
|
||||
Support for [SPARQL-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html) is also available behind the `rdf-star`feature for [Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star), [TriG-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#trig-star), [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star) and [N-Quads-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-quads-star). |
||||
|
||||
It is designed as a low level parser compatible with both synchronous and asynchronous I/O (behind the `async-tokio` feature). |
||||
|
||||
The entry points of this library are the two [`RdfParser`] and [`RdfSerializer`] structs. |
||||
|
||||
Usage example converting a Turtle file to a N-Triples file: |
||||
```rust |
||||
use oxrdfio::{RdfFormat, RdfParser, RdfSerializer}; |
||||
|
||||
let turtle_file = b"@base <http://example.com/> . |
||||
@prefix schema: <http://schema.org/> . |
||||
<foo> a schema:Person ; |
||||
schema:name \"Foo\" . |
||||
<bar> a schema:Person ; |
||||
schema:name \"Bar\" ."; |
||||
|
||||
let ntriples_file = b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> . |
||||
<http://example.com/foo> <http://schema.org/name> \"Foo\" . |
||||
<http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> . |
||||
<http://example.com/bar> <http://schema.org/name> \"Bar\" . |
||||
"; |
||||
|
||||
let mut writer = RdfSerializer::from_format(RdfFormat::NTriples).serialize_to_write(Vec::new()); |
||||
for quad in RdfParser::from_format(RdfFormat::Turtle).parse_read(turtle_file.as_ref()) { |
||||
writer.write_quad(&quad.unwrap()).unwrap(); |
||||
} |
||||
assert_eq!(writer.finish().unwrap(), ntriples_file); |
||||
``` |
||||
|
||||
Parsers for other RDF formats exists in Rust like [graph-rdfa-processor](https://github.com/nbittich/graph-rdfa-processor) for RDFa and [json-ld](https://github.com/timothee-haudebourg/json-ld) for JSON-LD. |
||||
|
||||
|
||||
## License |
||||
|
||||
This project is licensed under either of |
||||
|
||||
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or |
||||
`<http://www.apache.org/licenses/LICENSE-2.0>`) |
||||
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or |
||||
`<http://opensource.org/licenses/MIT>`) |
||||
|
||||
at your option. |
||||
|
||||
|
||||
### Contribution |
||||
|
||||
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. |
@ -1,122 +0,0 @@ |
||||
use std::io; |
||||
use std::ops::Range; |
||||
|
||||
/// Error returned during RDF format parsing.
|
||||
#[derive(Debug, thiserror::Error)] |
||||
pub enum RdfParseError { |
||||
/// I/O error during parsing (file not found...).
|
||||
#[error(transparent)] |
||||
Io(#[from] io::Error), |
||||
/// An error in the file syntax.
|
||||
#[error(transparent)] |
||||
Syntax(#[from] RdfSyntaxError), |
||||
} |
||||
|
||||
impl RdfParseError { |
||||
pub(crate) fn msg(msg: &'static str) -> Self { |
||||
Self::Syntax(RdfSyntaxError(SyntaxErrorKind::Msg(msg))) |
||||
} |
||||
} |
||||
|
||||
impl From<oxttl::TurtleSyntaxError> for RdfSyntaxError { |
||||
#[inline] |
||||
fn from(error: oxttl::TurtleSyntaxError) -> Self { |
||||
Self(SyntaxErrorKind::Turtle(error)) |
||||
} |
||||
} |
||||
|
||||
impl From<oxttl::TurtleParseError> for RdfParseError { |
||||
#[inline] |
||||
fn from(error: oxttl::TurtleParseError) -> Self { |
||||
match error { |
||||
oxttl::TurtleParseError::Syntax(e) => Self::Syntax(e.into()), |
||||
oxttl::TurtleParseError::Io(e) => Self::Io(e), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<oxrdfxml::RdfXmlSyntaxError> for RdfSyntaxError { |
||||
#[inline] |
||||
fn from(error: oxrdfxml::RdfXmlSyntaxError) -> Self { |
||||
Self(SyntaxErrorKind::RdfXml(error)) |
||||
} |
||||
} |
||||
|
||||
impl From<oxrdfxml::RdfXmlParseError> for RdfParseError { |
||||
#[inline] |
||||
fn from(error: oxrdfxml::RdfXmlParseError) -> Self { |
||||
match error { |
||||
oxrdfxml::RdfXmlParseError::Syntax(e) => Self::Syntax(e.into()), |
||||
oxrdfxml::RdfXmlParseError::Io(e) => Self::Io(e), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<RdfParseError> for io::Error { |
||||
#[inline] |
||||
fn from(error: RdfParseError) -> Self { |
||||
match error { |
||||
RdfParseError::Io(error) => error, |
||||
RdfParseError::Syntax(error) => error.into(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// An error in the syntax of the parsed file.
|
||||
#[derive(Debug, thiserror::Error)] |
||||
#[error(transparent)] |
||||
pub struct RdfSyntaxError(#[from] SyntaxErrorKind); |
||||
|
||||
/// An error in the syntax of the parsed file.
|
||||
#[derive(Debug, thiserror::Error)] |
||||
enum SyntaxErrorKind { |
||||
#[error(transparent)] |
||||
Turtle(#[from] oxttl::TurtleSyntaxError), |
||||
#[error(transparent)] |
||||
RdfXml(#[from] oxrdfxml::RdfXmlSyntaxError), |
||||
#[error("{0}")] |
||||
Msg(&'static str), |
||||
} |
||||
|
||||
impl RdfSyntaxError { |
||||
/// The location of the error inside of the file.
|
||||
#[inline] |
||||
pub fn location(&self) -> Option<Range<TextPosition>> { |
||||
match &self.0 { |
||||
SyntaxErrorKind::Turtle(e) => { |
||||
let location = e.location(); |
||||
Some( |
||||
TextPosition { |
||||
line: location.start.line, |
||||
column: location.start.column, |
||||
offset: location.start.offset, |
||||
}..TextPosition { |
||||
line: location.end.line, |
||||
column: location.end.column, |
||||
offset: location.end.offset, |
||||
}, |
||||
) |
||||
} |
||||
SyntaxErrorKind::RdfXml(_) | SyntaxErrorKind::Msg(_) => None, |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<RdfSyntaxError> for io::Error { |
||||
#[inline] |
||||
fn from(error: RdfSyntaxError) -> Self { |
||||
match error.0 { |
||||
SyntaxErrorKind::Turtle(error) => error.into(), |
||||
SyntaxErrorKind::RdfXml(error) => error.into(), |
||||
SyntaxErrorKind::Msg(msg) => Self::new(io::ErrorKind::InvalidData, msg), |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// A position in a text i.e. a `line` number starting from 0, a `column` number starting from 0 (in number of code points) and a global file `offset` starting from 0 (in number of bytes).
|
||||
#[derive(Eq, PartialEq, Debug, Clone, Copy)] |
||||
pub struct TextPosition { |
||||
pub line: u64, |
||||
pub column: u64, |
||||
pub offset: u64, |
||||
} |
@ -1,216 +0,0 @@ |
||||
use std::fmt; |
||||
|
||||
/// RDF serialization formats.
|
||||
///
|
||||
/// This enumeration is non exhaustive. New formats like JSON-LD might be added in the future.
|
||||
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] |
||||
#[non_exhaustive] |
||||
pub enum RdfFormat { |
||||
/// [N3](https://w3c.github.io/N3/spec/)
|
||||
N3, |
||||
/// [N-Quads](https://www.w3.org/TR/n-quads/)
|
||||
NQuads, |
||||
/// [N-Triples](https://www.w3.org/TR/n-triples/)
|
||||
NTriples, |
||||
/// [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/)
|
||||
RdfXml, |
||||
/// [TriG](https://www.w3.org/TR/trig/)
|
||||
TriG, |
||||
/// [Turtle](https://www.w3.org/TR/turtle/)
|
||||
Turtle, |
||||
} |
||||
|
||||
impl RdfFormat { |
||||
/// The format canonical IRI according to the [Unique URIs for file formats registry](https://www.w3.org/ns/formats/).
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::RdfFormat;
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// RdfFormat::NTriples.iri(),
|
||||
/// "http://www.w3.org/ns/formats/N-Triples"
|
||||
/// )
|
||||
/// ```
|
||||
#[inline] |
||||
pub const fn iri(self) -> &'static str { |
||||
match self { |
||||
Self::N3 => "http://www.w3.org/ns/formats/N3", |
||||
Self::NQuads => "http://www.w3.org/ns/formats/N-Quads", |
||||
Self::NTriples => "http://www.w3.org/ns/formats/N-Triples", |
||||
Self::RdfXml => "http://www.w3.org/ns/formats/RDF_XML", |
||||
Self::TriG => "http://www.w3.org/ns/formats/TriG", |
||||
Self::Turtle => "http://www.w3.org/ns/formats/Turtle", |
||||
} |
||||
} |
||||
|
||||
/// The format [IANA media type](https://tools.ietf.org/html/rfc2046).
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::RdfFormat;
|
||||
///
|
||||
/// assert_eq!(RdfFormat::NTriples.media_type(), "application/n-triples")
|
||||
/// ```
|
||||
#[inline] |
||||
pub const fn media_type(self) -> &'static str { |
||||
match self { |
||||
Self::N3 => "text/n3", |
||||
Self::NQuads => "application/n-quads", |
||||
Self::NTriples => "application/n-triples", |
||||
Self::RdfXml => "application/rdf+xml", |
||||
Self::TriG => "application/trig", |
||||
Self::Turtle => "text/turtle", |
||||
} |
||||
} |
||||
|
||||
/// The format [IANA-registered](https://tools.ietf.org/html/rfc2046) file extension.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::RdfFormat;
|
||||
///
|
||||
/// assert_eq!(RdfFormat::NTriples.file_extension(), "nt")
|
||||
/// ```
|
||||
#[inline] |
||||
pub const fn file_extension(self) -> &'static str { |
||||
match self { |
||||
Self::N3 => "n3", |
||||
Self::NQuads => "nq", |
||||
Self::NTriples => "nt", |
||||
Self::RdfXml => "rdf", |
||||
Self::TriG => "trig", |
||||
Self::Turtle => "ttl", |
||||
} |
||||
} |
||||
|
||||
/// The format name.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::RdfFormat;
|
||||
///
|
||||
/// assert_eq!(RdfFormat::NTriples.name(), "N-Triples")
|
||||
/// ```
|
||||
#[inline] |
||||
pub const fn name(self) -> &'static str { |
||||
match self { |
||||
Self::N3 => "N3", |
||||
Self::NQuads => "N-Quads", |
||||
Self::NTriples => "N-Triples", |
||||
Self::RdfXml => "RDF/XML", |
||||
Self::TriG => "TriG", |
||||
Self::Turtle => "Turtle", |
||||
} |
||||
} |
||||
|
||||
/// Checks if the formats supports [RDF datasets](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) and not only [RDF graphs](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph).
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::RdfFormat;
|
||||
///
|
||||
/// assert_eq!(RdfFormat::NTriples.supports_datasets(), false);
|
||||
/// assert_eq!(RdfFormat::NQuads.supports_datasets(), true);
|
||||
/// ```
|
||||
#[inline] |
||||
pub const fn supports_datasets(self) -> bool { |
||||
matches!(self, Self::NQuads | Self::TriG) |
||||
} |
||||
|
||||
/// Checks if the formats supports [RDF-star quoted triples](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#dfn-quoted).
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::RdfFormat;
|
||||
///
|
||||
/// assert_eq!(RdfFormat::NTriples.supports_rdf_star(), true);
|
||||
/// assert_eq!(RdfFormat::RdfXml.supports_rdf_star(), false);
|
||||
/// ```
|
||||
#[inline] |
||||
#[cfg(feature = "rdf-star")] |
||||
pub const fn supports_rdf_star(self) -> bool { |
||||
matches!( |
||||
self, |
||||
Self::NTriples | Self::NQuads | Self::Turtle | Self::TriG |
||||
) |
||||
} |
||||
|
||||
/// Looks for a known format from a media type.
|
||||
///
|
||||
/// It supports some media type aliases.
|
||||
/// For example, "application/xml" is going to return `RdfFormat::RdfXml` even if it is not its canonical media type.
|
||||
///
|
||||
/// Example:
|
||||
/// ```
|
||||
/// use oxrdfio::RdfFormat;
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// RdfFormat::from_media_type("text/turtle; charset=utf-8"),
|
||||
/// Some(RdfFormat::Turtle)
|
||||
/// )
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn from_media_type(media_type: &str) -> Option<Self> { |
||||
const MEDIA_SUBTYPES: [(&str, RdfFormat); 10] = [ |
||||
("n-quads", RdfFormat::NQuads), |
||||
("n-triples", RdfFormat::NTriples), |
||||
("n3", RdfFormat::N3), |
||||
("nquads", RdfFormat::NQuads), |
||||
("ntriples", RdfFormat::NTriples), |
||||
("plain", RdfFormat::NTriples), |
||||
("rdf+xml", RdfFormat::RdfXml), |
||||
("trig", RdfFormat::TriG), |
||||
("turtle", RdfFormat::Turtle), |
||||
("xml", RdfFormat::RdfXml), |
||||
]; |
||||
|
||||
let (r#type, subtype) = media_type |
||||
.split_once(';') |
||||
.unwrap_or((media_type, "")) |
||||
.0 |
||||
.split_once('/')?; |
||||
let r#type = r#type.trim(); |
||||
if !r#type.eq_ignore_ascii_case("application") && !r#type.eq_ignore_ascii_case("text") { |
||||
return None; |
||||
} |
||||
let subtype = subtype.trim(); |
||||
let subtype = subtype.strip_prefix("x-").unwrap_or(subtype); |
||||
for (candidate_subtype, candidate_id) in MEDIA_SUBTYPES { |
||||
if candidate_subtype.eq_ignore_ascii_case(subtype) { |
||||
return Some(candidate_id); |
||||
} |
||||
} |
||||
None |
||||
} |
||||
|
||||
/// Looks for a known format from an extension.
|
||||
///
|
||||
/// It supports some aliases.
|
||||
///
|
||||
/// Example:
|
||||
/// ```
|
||||
/// use oxrdfio::RdfFormat;
|
||||
///
|
||||
/// assert_eq!(RdfFormat::from_extension("nt"), Some(RdfFormat::NTriples))
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn from_extension(extension: &str) -> Option<Self> { |
||||
const MEDIA_TYPES: [(&str, RdfFormat); 8] = [ |
||||
("n3", RdfFormat::N3), |
||||
("nq", RdfFormat::NQuads), |
||||
("nt", RdfFormat::NTriples), |
||||
("rdf", RdfFormat::RdfXml), |
||||
("trig", RdfFormat::TriG), |
||||
("ttl", RdfFormat::Turtle), |
||||
("txt", RdfFormat::NTriples), |
||||
("xml", RdfFormat::RdfXml), |
||||
]; |
||||
for (candidate_extension, candidate_id) in MEDIA_TYPES { |
||||
if candidate_extension.eq_ignore_ascii_case(extension) { |
||||
return Some(candidate_id); |
||||
} |
||||
} |
||||
None |
||||
} |
||||
} |
||||
|
||||
impl fmt::Display for RdfFormat { |
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
||||
f.write_str(self.name()) |
||||
} |
||||
} |
@ -1,19 +0,0 @@ |
||||
#![doc = include_str!("../README.md")] |
||||
#![doc(test(attr(deny(warnings))))] |
||||
#![cfg_attr(docsrs, feature(doc_auto_cfg))] |
||||
#![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] |
||||
#![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] |
||||
|
||||
mod error; |
||||
mod format; |
||||
mod parser; |
||||
mod serializer; |
||||
|
||||
pub use error::{RdfParseError, RdfSyntaxError, TextPosition}; |
||||
pub use format::RdfFormat; |
||||
#[cfg(feature = "async-tokio")] |
||||
pub use parser::FromTokioAsyncReadQuadReader; |
||||
pub use parser::{FromReadQuadReader, RdfParser}; |
||||
#[cfg(feature = "async-tokio")] |
||||
pub use serializer::ToTokioAsyncWriteQuadWriter; |
||||
pub use serializer::{RdfSerializer, ToWriteQuadWriter}; |
@ -1,807 +0,0 @@ |
||||
//! Utilities to read RDF graphs and datasets.
|
||||
|
||||
pub use crate::error::RdfParseError; |
||||
use crate::format::RdfFormat; |
||||
use oxrdf::{BlankNode, GraphName, IriParseError, Quad, Subject, Term, Triple}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use oxrdfxml::FromTokioAsyncReadRdfXmlReader; |
||||
use oxrdfxml::{FromReadRdfXmlReader, RdfXmlParser}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use oxttl::n3::FromTokioAsyncReadN3Reader; |
||||
use oxttl::n3::{FromReadN3Reader, N3Parser, N3PrefixesIter, N3Quad, N3Term}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use oxttl::nquads::FromTokioAsyncReadNQuadsReader; |
||||
use oxttl::nquads::{FromReadNQuadsReader, NQuadsParser}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use oxttl::ntriples::FromTokioAsyncReadNTriplesReader; |
||||
use oxttl::ntriples::{FromReadNTriplesReader, NTriplesParser}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use oxttl::trig::FromTokioAsyncReadTriGReader; |
||||
use oxttl::trig::{FromReadTriGReader, TriGParser, TriGPrefixesIter}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use oxttl::turtle::FromTokioAsyncReadTurtleReader; |
||||
use oxttl::turtle::{FromReadTurtleReader, TurtleParser, TurtlePrefixesIter}; |
||||
use std::collections::HashMap; |
||||
use std::io::Read; |
||||
#[cfg(feature = "async-tokio")] |
||||
use tokio::io::AsyncRead; |
||||
|
||||
/// Parsers for RDF serialization formats.
|
||||
///
|
||||
/// It currently supports the following formats:
|
||||
/// * [N3](https://w3c.github.io/N3/spec/) ([`RdfFormat::N3`])
|
||||
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`RdfFormat::NQuads`])
|
||||
/// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`RdfFormat::NTriples`])
|
||||
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`RdfFormat::RdfXml`])
|
||||
/// * [TriG](https://www.w3.org/TR/trig/) ([`RdfFormat::TriG`])
|
||||
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`RdfFormat::Turtle`])
|
||||
///
|
||||
/// Note the useful options:
|
||||
/// - [`with_base_iri`](Self::with_base_iri) to resolve the relative IRIs.
|
||||
/// - [`rename_blank_nodes`](Self::rename_blank_nodes) to rename the blank nodes to auto-generated numbers to avoid conflicts when merging RDF graphs together.
|
||||
/// - [`without_named_graphs`](Self::without_named_graphs) to parse a single graph.
|
||||
/// - [`unchecked`](Self::unchecked) to skip some validations if the file is already known to be valid.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
||||
///
|
||||
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
|
||||
/// let quads = parser
|
||||
/// .parse_read(file.as_bytes())
|
||||
/// .collect::<Result<Vec<_>, _>>()?;
|
||||
///
|
||||
/// assert_eq!(quads.len(), 1);
|
||||
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
|
||||
/// # std::io::Result::Ok(())
|
||||
/// ```
|
||||
#[must_use] |
||||
pub struct RdfParser { |
||||
inner: RdfParserKind, |
||||
default_graph: GraphName, |
||||
without_named_graphs: bool, |
||||
rename_blank_nodes: bool, |
||||
} |
||||
|
||||
enum RdfParserKind { |
||||
N3(N3Parser), |
||||
NQuads(NQuadsParser), |
||||
NTriples(NTriplesParser), |
||||
RdfXml(RdfXmlParser), |
||||
TriG(TriGParser), |
||||
Turtle(TurtleParser), |
||||
} |
||||
|
||||
impl RdfParser { |
||||
/// Builds a parser for the given format.
|
||||
#[inline] |
||||
pub fn from_format(format: RdfFormat) -> Self { |
||||
Self { |
||||
inner: match format { |
||||
RdfFormat::N3 => RdfParserKind::N3(N3Parser::new()), |
||||
RdfFormat::NQuads => RdfParserKind::NQuads({ |
||||
#[cfg(feature = "rdf-star")] |
||||
{ |
||||
NQuadsParser::new().with_quoted_triples() |
||||
} |
||||
#[cfg(not(feature = "rdf-star"))] |
||||
{ |
||||
NQuadsParser::new() |
||||
} |
||||
}), |
||||
RdfFormat::NTriples => RdfParserKind::NTriples({ |
||||
#[cfg(feature = "rdf-star")] |
||||
{ |
||||
NTriplesParser::new().with_quoted_triples() |
||||
} |
||||
#[cfg(not(feature = "rdf-star"))] |
||||
{ |
||||
NTriplesParser::new() |
||||
} |
||||
}), |
||||
RdfFormat::RdfXml => RdfParserKind::RdfXml(RdfXmlParser::new()), |
||||
RdfFormat::TriG => RdfParserKind::TriG({ |
||||
#[cfg(feature = "rdf-star")] |
||||
{ |
||||
TriGParser::new().with_quoted_triples() |
||||
} |
||||
#[cfg(not(feature = "rdf-star"))] |
||||
{ |
||||
TriGParser::new() |
||||
} |
||||
}), |
||||
RdfFormat::Turtle => RdfParserKind::Turtle({ |
||||
#[cfg(feature = "rdf-star")] |
||||
{ |
||||
TurtleParser::new().with_quoted_triples() |
||||
} |
||||
#[cfg(not(feature = "rdf-star"))] |
||||
{ |
||||
TurtleParser::new() |
||||
} |
||||
}), |
||||
}, |
||||
default_graph: GraphName::DefaultGraph, |
||||
without_named_graphs: false, |
||||
rename_blank_nodes: false, |
||||
} |
||||
} |
||||
|
||||
/// The format the parser uses.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// RdfParser::from_format(RdfFormat::Turtle).format(),
|
||||
/// RdfFormat::Turtle
|
||||
/// );
|
||||
/// ```
|
||||
pub fn format(&self) -> RdfFormat { |
||||
match &self.inner { |
||||
RdfParserKind::N3(_) => RdfFormat::N3, |
||||
RdfParserKind::NQuads(_) => RdfFormat::NQuads, |
||||
RdfParserKind::NTriples(_) => RdfFormat::NTriples, |
||||
RdfParserKind::RdfXml(_) => RdfFormat::RdfXml, |
||||
RdfParserKind::TriG(_) => RdfFormat::TriG, |
||||
RdfParserKind::Turtle(_) => RdfFormat::Turtle, |
||||
} |
||||
} |
||||
|
||||
/// Provides an IRI that could be used to resolve the file relative IRIs.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// let file = "</s> </p> </o> .";
|
||||
///
|
||||
/// let parser = RdfParser::from_format(RdfFormat::Turtle).with_base_iri("http://example.com")?;
|
||||
/// let quads = parser
|
||||
/// .parse_read(file.as_bytes())
|
||||
/// .collect::<Result<Vec<_>, _>>()?;
|
||||
///
|
||||
/// assert_eq!(quads.len(), 1);
|
||||
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> { |
||||
self.inner = match self.inner { |
||||
RdfParserKind::N3(p) => RdfParserKind::N3(p), |
||||
RdfParserKind::NTriples(p) => RdfParserKind::NTriples(p), |
||||
RdfParserKind::NQuads(p) => RdfParserKind::NQuads(p), |
||||
RdfParserKind::RdfXml(p) => RdfParserKind::RdfXml(p.with_base_iri(base_iri)?), |
||||
RdfParserKind::TriG(p) => RdfParserKind::TriG(p.with_base_iri(base_iri)?), |
||||
RdfParserKind::Turtle(p) => RdfParserKind::Turtle(p.with_base_iri(base_iri)?), |
||||
}; |
||||
Ok(self) |
||||
} |
||||
|
||||
/// Provides the name graph name that should replace the default graph in the returned quads.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::NamedNode;
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
||||
///
|
||||
/// let parser = RdfParser::from_format(RdfFormat::Turtle)
|
||||
/// .with_default_graph(NamedNode::new("http://example.com/g")?);
|
||||
/// let quads = parser
|
||||
/// .parse_read(file.as_bytes())
|
||||
/// .collect::<Result<Vec<_>, _>>()?;
|
||||
///
|
||||
/// assert_eq!(quads.len(), 1);
|
||||
/// assert_eq!(quads[0].graph_name.to_string(), "<http://example.com/g>");
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn with_default_graph(mut self, default_graph: impl Into<GraphName>) -> Self { |
||||
self.default_graph = default_graph.into(); |
||||
self |
||||
} |
||||
|
||||
/// Sets that the parser must fail if parsing a named graph.
|
||||
///
|
||||
/// This function restricts the parser to only parse a single [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) and not an [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset).
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .";
|
||||
///
|
||||
/// let parser = RdfParser::from_format(RdfFormat::NQuads).without_named_graphs();
|
||||
/// assert!(parser.parse_read(file.as_bytes()).next().unwrap().is_err());
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn without_named_graphs(mut self) -> Self { |
||||
self.without_named_graphs = true; |
||||
self |
||||
} |
||||
|
||||
/// Renames the blank nodes ids from the ones set in the serialization to random ids.
|
||||
///
|
||||
/// This allows to avoid id conflicts when merging graphs together.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// let file = "_:a <http://example.com/p> <http://example.com/o> .";
|
||||
///
|
||||
/// let result1 = RdfParser::from_format(RdfFormat::NQuads)
|
||||
/// .rename_blank_nodes()
|
||||
/// .parse_read(file.as_bytes())
|
||||
/// .collect::<Result<Vec<_>, _>>()?;
|
||||
/// let result2 = RdfParser::from_format(RdfFormat::NQuads)
|
||||
/// .rename_blank_nodes()
|
||||
/// .parse_read(file.as_bytes())
|
||||
/// .collect::<Result<Vec<_>, _>>()?;
|
||||
/// assert_ne!(result1, result2);
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn rename_blank_nodes(mut self) -> Self { |
||||
self.rename_blank_nodes = true; |
||||
self |
||||
} |
||||
|
||||
/// Assumes the file is valid to make parsing faster.
|
||||
///
|
||||
/// It will skip some validations.
|
||||
///
|
||||
/// Note that if the file is actually not valid, then broken RDF might be emitted by the parser.
|
||||
#[inline] |
||||
pub fn unchecked(mut self) -> Self { |
||||
self.inner = match self.inner { |
||||
RdfParserKind::N3(p) => RdfParserKind::N3(p.unchecked()), |
||||
RdfParserKind::NTriples(p) => RdfParserKind::NTriples(p.unchecked()), |
||||
RdfParserKind::NQuads(p) => RdfParserKind::NQuads(p.unchecked()), |
||||
RdfParserKind::RdfXml(p) => RdfParserKind::RdfXml(p.unchecked()), |
||||
RdfParserKind::TriG(p) => RdfParserKind::TriG(p.unchecked()), |
||||
RdfParserKind::Turtle(p) => RdfParserKind::Turtle(p.unchecked()), |
||||
}; |
||||
self |
||||
} |
||||
|
||||
/// Parses from a [`Read`] implementation and returns an iterator of quads.
|
||||
///
|
||||
/// Reads are buffered.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
||||
///
|
||||
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
|
||||
/// let quads = parser
|
||||
/// .parse_read(file.as_bytes())
|
||||
/// .collect::<Result<Vec<_>, _>>()?;
|
||||
///
|
||||
/// assert_eq!(quads.len(), 1);
|
||||
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
|
||||
/// # std::io::Result::Ok(())
|
||||
/// ```
|
||||
pub fn parse_read<R: Read>(self, reader: R) -> FromReadQuadReader<R> { |
||||
FromReadQuadReader { |
||||
parser: match self.inner { |
||||
RdfParserKind::N3(p) => FromReadQuadReaderKind::N3(p.parse_read(reader)), |
||||
RdfParserKind::NQuads(p) => FromReadQuadReaderKind::NQuads(p.parse_read(reader)), |
||||
RdfParserKind::NTriples(p) => { |
||||
FromReadQuadReaderKind::NTriples(p.parse_read(reader)) |
||||
} |
||||
RdfParserKind::RdfXml(p) => FromReadQuadReaderKind::RdfXml(p.parse_read(reader)), |
||||
RdfParserKind::TriG(p) => FromReadQuadReaderKind::TriG(p.parse_read(reader)), |
||||
RdfParserKind::Turtle(p) => FromReadQuadReaderKind::Turtle(p.parse_read(reader)), |
||||
}, |
||||
mapper: QuadMapper { |
||||
default_graph: self.default_graph.clone(), |
||||
without_named_graphs: self.without_named_graphs, |
||||
blank_node_map: self.rename_blank_nodes.then(HashMap::new), |
||||
}, |
||||
} |
||||
} |
||||
|
||||
/// Parses from a Tokio [`AsyncRead`] implementation and returns an async iterator of quads.
|
||||
///
|
||||
/// Reads are buffered.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> Result<(), oxrdfio::RdfParseError> {
|
||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
||||
///
|
||||
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
|
||||
/// let mut reader = parser.parse_tokio_async_read(file.as_bytes());
|
||||
/// if let Some(quad) = reader.next().await {
|
||||
/// assert_eq!(quad?.subject.to_string(), "<http://example.com/s>");
|
||||
/// }
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[cfg(feature = "async-tokio")] |
||||
pub fn parse_tokio_async_read<R: AsyncRead + Unpin>( |
||||
self, |
||||
reader: R, |
||||
) -> FromTokioAsyncReadQuadReader<R> { |
||||
FromTokioAsyncReadQuadReader { |
||||
parser: match self.inner { |
||||
RdfParserKind::N3(p) => { |
||||
FromTokioAsyncReadQuadReaderKind::N3(p.parse_tokio_async_read(reader)) |
||||
} |
||||
RdfParserKind::NQuads(p) => { |
||||
FromTokioAsyncReadQuadReaderKind::NQuads(p.parse_tokio_async_read(reader)) |
||||
} |
||||
RdfParserKind::NTriples(p) => { |
||||
FromTokioAsyncReadQuadReaderKind::NTriples(p.parse_tokio_async_read(reader)) |
||||
} |
||||
RdfParserKind::RdfXml(p) => { |
||||
FromTokioAsyncReadQuadReaderKind::RdfXml(p.parse_tokio_async_read(reader)) |
||||
} |
||||
RdfParserKind::TriG(p) => { |
||||
FromTokioAsyncReadQuadReaderKind::TriG(p.parse_tokio_async_read(reader)) |
||||
} |
||||
RdfParserKind::Turtle(p) => { |
||||
FromTokioAsyncReadQuadReaderKind::Turtle(p.parse_tokio_async_read(reader)) |
||||
} |
||||
}, |
||||
mapper: QuadMapper { |
||||
default_graph: self.default_graph.clone(), |
||||
without_named_graphs: self.without_named_graphs, |
||||
blank_node_map: self.rename_blank_nodes.then(HashMap::new), |
||||
}, |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<RdfFormat> for RdfParser { |
||||
fn from(format: RdfFormat) -> Self { |
||||
Self::from_format(format) |
||||
} |
||||
} |
||||
|
||||
/// Parses a RDF file from a [`Read`] implementation. Can be built using [`RdfParser::parse_read`].
|
||||
///
|
||||
/// Reads are buffered.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
||||
///
|
||||
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
|
||||
/// let quads = parser
|
||||
/// .parse_read(file.as_bytes())
|
||||
/// .collect::<Result<Vec<_>, _>>()?;
|
||||
///
|
||||
/// assert_eq!(quads.len(), 1);
|
||||
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
|
||||
/// # std::io::Result::Ok(())
|
||||
/// ```
|
||||
#[must_use] |
||||
pub struct FromReadQuadReader<R: Read> { |
||||
parser: FromReadQuadReaderKind<R>, |
||||
mapper: QuadMapper, |
||||
} |
||||
|
||||
enum FromReadQuadReaderKind<R: Read> { |
||||
N3(FromReadN3Reader<R>), |
||||
NQuads(FromReadNQuadsReader<R>), |
||||
NTriples(FromReadNTriplesReader<R>), |
||||
RdfXml(FromReadRdfXmlReader<R>), |
||||
TriG(FromReadTriGReader<R>), |
||||
Turtle(FromReadTurtleReader<R>), |
||||
} |
||||
|
||||
impl<R: Read> Iterator for FromReadQuadReader<R> { |
||||
type Item = Result<Quad, RdfParseError>; |
||||
|
||||
fn next(&mut self) -> Option<Self::Item> { |
||||
Some(match &mut self.parser { |
||||
FromReadQuadReaderKind::N3(parser) => match parser.next()? { |
||||
Ok(quad) => self.mapper.map_n3_quad(quad), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
FromReadQuadReaderKind::NQuads(parser) => match parser.next()? { |
||||
Ok(quad) => self.mapper.map_quad(quad), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
FromReadQuadReaderKind::NTriples(parser) => match parser.next()? { |
||||
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
FromReadQuadReaderKind::RdfXml(parser) => match parser.next()? { |
||||
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
FromReadQuadReaderKind::TriG(parser) => match parser.next()? { |
||||
Ok(quad) => self.mapper.map_quad(quad), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
FromReadQuadReaderKind::Turtle(parser) => match parser.next()? { |
||||
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
}) |
||||
} |
||||
} |
||||
|
||||
impl<R: Read> FromReadQuadReader<R> { |
||||
/// The list of IRI prefixes considered at the current step of the parsing.
|
||||
///
|
||||
/// This method returns (prefix name, prefix value) tuples.
|
||||
/// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
|
||||
/// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
|
||||
///
|
||||
/// An empty iterator is return if the format does not support prefixes.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// let file = br#"@base <http://example.com/> .
|
||||
/// @prefix schema: <http://schema.org/> .
|
||||
/// <foo> a schema:Person ;
|
||||
/// schema:name "Foo" ."#;
|
||||
///
|
||||
/// let mut reader = RdfParser::from_format(RdfFormat::Turtle).parse_read(file.as_slice());
|
||||
/// assert!(reader.prefixes().collect::<Vec<_>>().is_empty()); // No prefix at the beginning
|
||||
///
|
||||
/// reader.next().unwrap()?; // We read the first triple
|
||||
/// assert_eq!(
|
||||
/// reader.prefixes().collect::<Vec<_>>(),
|
||||
/// [("schema", "http://schema.org/")]
|
||||
/// ); // There are now prefixes
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub fn prefixes(&self) -> PrefixesIter<'_> { |
||||
PrefixesIter { |
||||
inner: match &self.parser { |
||||
FromReadQuadReaderKind::N3(p) => PrefixesIterKind::N3(p.prefixes()), |
||||
FromReadQuadReaderKind::TriG(p) => PrefixesIterKind::TriG(p.prefixes()), |
||||
FromReadQuadReaderKind::Turtle(p) => PrefixesIterKind::Turtle(p.prefixes()), |
||||
FromReadQuadReaderKind::NQuads(_) |
||||
| FromReadQuadReaderKind::NTriples(_) |
||||
| FromReadQuadReaderKind::RdfXml(_) => PrefixesIterKind::None, /* TODO: implement for RDF/XML */ |
||||
}, |
||||
} |
||||
} |
||||
|
||||
/// The base IRI considered at the current step of the parsing.
|
||||
///
|
||||
/// `None` is returned if no base IRI is set or the format does not support base IRIs.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// let file = br#"@base <http://example.com/> .
|
||||
/// @prefix schema: <http://schema.org/> .
|
||||
/// <foo> a schema:Person ;
|
||||
/// schema:name "Foo" ."#;
|
||||
///
|
||||
/// let mut reader = RdfParser::from_format(RdfFormat::Turtle).parse_read(file.as_slice());
|
||||
/// assert!(reader.base_iri().is_none()); // No base at the beginning because none has been given to the parser.
|
||||
///
|
||||
/// reader.next().unwrap()?; // We read the first triple
|
||||
/// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI.
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub fn base_iri(&self) -> Option<&str> { |
||||
match &self.parser { |
||||
FromReadQuadReaderKind::N3(p) => p.base_iri(), |
||||
FromReadQuadReaderKind::TriG(p) => p.base_iri(), |
||||
FromReadQuadReaderKind::Turtle(p) => p.base_iri(), |
||||
FromReadQuadReaderKind::NQuads(_) |
||||
| FromReadQuadReaderKind::NTriples(_) |
||||
| FromReadQuadReaderKind::RdfXml(_) => None, // TODO: implement for RDF/XML
|
||||
} |
||||
} |
||||
} |
||||
|
||||
/// Parses a RDF file from a Tokio [`AsyncRead`] implementation. Can be built using [`RdfParser::parse_tokio_async_read`].
|
||||
///
|
||||
/// Reads are buffered.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> Result<(), oxrdfio::RdfParseError> {
|
||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
||||
///
|
||||
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
|
||||
/// let mut reader = parser.parse_tokio_async_read(file.as_bytes());
|
||||
/// if let Some(quad) = reader.next().await {
|
||||
/// assert_eq!(quad?.subject.to_string(), "<http://example.com/s>");
|
||||
/// }
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[must_use] |
||||
#[cfg(feature = "async-tokio")] |
||||
pub struct FromTokioAsyncReadQuadReader<R: AsyncRead + Unpin> { |
||||
parser: FromTokioAsyncReadQuadReaderKind<R>, |
||||
mapper: QuadMapper, |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
enum FromTokioAsyncReadQuadReaderKind<R: AsyncRead + Unpin> { |
||||
N3(FromTokioAsyncReadN3Reader<R>), |
||||
NQuads(FromTokioAsyncReadNQuadsReader<R>), |
||||
NTriples(FromTokioAsyncReadNTriplesReader<R>), |
||||
RdfXml(FromTokioAsyncReadRdfXmlReader<R>), |
||||
TriG(FromTokioAsyncReadTriGReader<R>), |
||||
Turtle(FromTokioAsyncReadTurtleReader<R>), |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
impl<R: AsyncRead + Unpin> FromTokioAsyncReadQuadReader<R> { |
||||
pub async fn next(&mut self) -> Option<Result<Quad, RdfParseError>> { |
||||
Some(match &mut self.parser { |
||||
FromTokioAsyncReadQuadReaderKind::N3(parser) => match parser.next().await? { |
||||
Ok(quad) => self.mapper.map_n3_quad(quad), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
FromTokioAsyncReadQuadReaderKind::NQuads(parser) => match parser.next().await? { |
||||
Ok(quad) => self.mapper.map_quad(quad), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
FromTokioAsyncReadQuadReaderKind::NTriples(parser) => match parser.next().await? { |
||||
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
FromTokioAsyncReadQuadReaderKind::RdfXml(parser) => match parser.next().await? { |
||||
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
FromTokioAsyncReadQuadReaderKind::TriG(parser) => match parser.next().await? { |
||||
Ok(quad) => self.mapper.map_quad(quad), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
FromTokioAsyncReadQuadReaderKind::Turtle(parser) => match parser.next().await? { |
||||
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
}) |
||||
} |
||||
|
||||
/// The list of IRI prefixes considered at the current step of the parsing.
|
||||
///
|
||||
/// This method returns (prefix name, prefix value) tuples.
|
||||
/// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
|
||||
/// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
|
||||
///
|
||||
/// An empty iterator is return if the format does not support prefixes.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> Result<(), oxttl::TurtleParseError> {
|
||||
/// let file = br#"@base <http://example.com/> .
|
||||
/// @prefix schema: <http://schema.org/> .
|
||||
/// <foo> a schema:Person ;
|
||||
/// schema:name "Foo" ."#;
|
||||
///
|
||||
/// let mut reader = RdfParser::from_format(RdfFormat::Turtle).parse_read(file.as_slice());
|
||||
/// assert_eq!(reader.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning
|
||||
///
|
||||
/// reader.next().await.unwrap()?; // We read the first triple
|
||||
/// assert_eq!(
|
||||
/// reader.prefixes().collect::<Vec<_>>(),
|
||||
/// [("schema", "http://schema.org/")]
|
||||
/// ); // There are now prefixes
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
pub fn prefixes(&self) -> PrefixesIter<'_> { |
||||
PrefixesIter { |
||||
inner: match &self.parser { |
||||
FromTokioAsyncReadQuadReaderKind::N3(p) => PrefixesIterKind::N3(p.prefixes()), |
||||
FromTokioAsyncReadQuadReaderKind::TriG(p) => PrefixesIterKind::TriG(p.prefixes()), |
||||
FromTokioAsyncReadQuadReaderKind::Turtle(p) => { |
||||
PrefixesIterKind::Turtle(p.prefixes()) |
||||
} |
||||
FromTokioAsyncReadQuadReaderKind::NQuads(_) |
||||
| FromTokioAsyncReadQuadReaderKind::NTriples(_) |
||||
| FromTokioAsyncReadQuadReaderKind::RdfXml(_) => PrefixesIterKind::None, /* TODO: implement for RDF/XML */ |
||||
}, |
||||
} |
||||
} |
||||
|
||||
/// The base IRI considered at the current step of the parsing.
|
||||
///
|
||||
/// `None` is returned if no base IRI is set or the format does not support base IRIs.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> Result<(), oxttl::TurtleParseError> {
|
||||
/// let file = br#"@base <http://example.com/> .
|
||||
/// @prefix schema: <http://schema.org/> .
|
||||
/// <foo> a schema:Person ;
|
||||
/// schema:name "Foo" ."#;
|
||||
///
|
||||
/// let mut reader =
|
||||
/// RdfParser::from_format(RdfFormat::Turtle).parse_tokio_async_read(file.as_slice());
|
||||
/// assert!(reader.base_iri().is_none()); // No base IRI at the beginning
|
||||
///
|
||||
/// reader.next().await.unwrap()?; // We read the first triple
|
||||
/// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
pub fn base_iri(&self) -> Option<&str> { |
||||
match &self.parser { |
||||
FromTokioAsyncReadQuadReaderKind::N3(p) => p.base_iri(), |
||||
FromTokioAsyncReadQuadReaderKind::TriG(p) => p.base_iri(), |
||||
FromTokioAsyncReadQuadReaderKind::Turtle(p) => p.base_iri(), |
||||
FromTokioAsyncReadQuadReaderKind::NQuads(_) |
||||
| FromTokioAsyncReadQuadReaderKind::NTriples(_) |
||||
| FromTokioAsyncReadQuadReaderKind::RdfXml(_) => None, // TODO: implement for RDF/XML
|
||||
} |
||||
} |
||||
} |
||||
|
||||
/// Iterator on the file prefixes.
|
||||
///
|
||||
/// See [`FromReadQuadReader::prefixes`].
|
||||
pub struct PrefixesIter<'a> { |
||||
inner: PrefixesIterKind<'a>, |
||||
} |
||||
|
||||
enum PrefixesIterKind<'a> { |
||||
Turtle(TurtlePrefixesIter<'a>), |
||||
TriG(TriGPrefixesIter<'a>), |
||||
N3(N3PrefixesIter<'a>), |
||||
None, |
||||
} |
||||
|
||||
impl<'a> Iterator for PrefixesIter<'a> { |
||||
type Item = (&'a str, &'a str); |
||||
|
||||
#[inline] |
||||
fn next(&mut self) -> Option<Self::Item> { |
||||
match &mut self.inner { |
||||
PrefixesIterKind::Turtle(iter) => iter.next(), |
||||
PrefixesIterKind::TriG(iter) => iter.next(), |
||||
PrefixesIterKind::N3(iter) => iter.next(), |
||||
PrefixesIterKind::None => None, |
||||
} |
||||
} |
||||
|
||||
#[inline] |
||||
fn size_hint(&self) -> (usize, Option<usize>) { |
||||
match &self.inner { |
||||
PrefixesIterKind::Turtle(iter) => iter.size_hint(), |
||||
PrefixesIterKind::TriG(iter) => iter.size_hint(), |
||||
PrefixesIterKind::N3(iter) => iter.size_hint(), |
||||
PrefixesIterKind::None => (0, Some(0)), |
||||
} |
||||
} |
||||
} |
||||
|
||||
struct QuadMapper { |
||||
default_graph: GraphName, |
||||
without_named_graphs: bool, |
||||
blank_node_map: Option<HashMap<BlankNode, BlankNode>>, |
||||
} |
||||
|
||||
impl QuadMapper { |
||||
fn map_blank_node(&mut self, node: BlankNode) -> BlankNode { |
||||
if let Some(blank_node_map) = &mut self.blank_node_map { |
||||
blank_node_map |
||||
.entry(node) |
||||
.or_insert_with(BlankNode::default) |
||||
.clone() |
||||
} else { |
||||
node |
||||
} |
||||
} |
||||
|
||||
fn map_subject(&mut self, node: Subject) -> Subject { |
||||
match node { |
||||
Subject::NamedNode(node) => node.into(), |
||||
Subject::BlankNode(node) => self.map_blank_node(node).into(), |
||||
#[cfg(feature = "rdf-star")] |
||||
Subject::Triple(triple) => self.map_triple(*triple).into(), |
||||
} |
||||
} |
||||
|
||||
fn map_term(&mut self, node: Term) -> Term { |
||||
match node { |
||||
Term::NamedNode(node) => node.into(), |
||||
Term::BlankNode(node) => self.map_blank_node(node).into(), |
||||
Term::Literal(literal) => literal.into(), |
||||
#[cfg(feature = "rdf-star")] |
||||
Term::Triple(triple) => self.map_triple(*triple).into(), |
||||
} |
||||
} |
||||
|
||||
fn map_triple(&mut self, triple: Triple) -> Triple { |
||||
Triple { |
||||
subject: self.map_subject(triple.subject), |
||||
predicate: triple.predicate, |
||||
object: self.map_term(triple.object), |
||||
} |
||||
} |
||||
|
||||
fn map_graph_name(&mut self, graph_name: GraphName) -> Result<GraphName, RdfParseError> { |
||||
match graph_name { |
||||
GraphName::NamedNode(node) => { |
||||
if self.without_named_graphs { |
||||
Err(RdfParseError::msg("Named graphs are not allowed")) |
||||
} else { |
||||
Ok(node.into()) |
||||
} |
||||
} |
||||
GraphName::BlankNode(node) => { |
||||
if self.without_named_graphs { |
||||
Err(RdfParseError::msg("Named graphs are not allowed")) |
||||
} else { |
||||
Ok(self.map_blank_node(node).into()) |
||||
} |
||||
} |
||||
GraphName::DefaultGraph => Ok(self.default_graph.clone()), |
||||
} |
||||
} |
||||
|
||||
fn map_quad(&mut self, quad: Quad) -> Result<Quad, RdfParseError> { |
||||
Ok(Quad { |
||||
subject: self.map_subject(quad.subject), |
||||
predicate: quad.predicate, |
||||
object: self.map_term(quad.object), |
||||
graph_name: self.map_graph_name(quad.graph_name)?, |
||||
}) |
||||
} |
||||
|
||||
fn map_triple_to_quad(&mut self, triple: Triple) -> Quad { |
||||
self.map_triple(triple).in_graph(self.default_graph.clone()) |
||||
} |
||||
|
||||
fn map_n3_quad(&mut self, quad: N3Quad) -> Result<Quad, RdfParseError> { |
||||
Ok(Quad { |
||||
subject: match quad.subject { |
||||
N3Term::NamedNode(s) => Ok(s.into()), |
||||
N3Term::BlankNode(s) => Ok(self.map_blank_node(s).into()), |
||||
N3Term::Literal(_) => Err(RdfParseError::msg( |
||||
"literals are not allowed in regular RDF subjects", |
||||
)), |
||||
#[cfg(feature = "rdf-star")] |
||||
N3Term::Triple(s) => Ok(self.map_triple(*s).into()), |
||||
N3Term::Variable(_) => Err(RdfParseError::msg( |
||||
"variables are not allowed in regular RDF subjects", |
||||
)), |
||||
}?, |
||||
predicate: match quad.predicate { |
||||
N3Term::NamedNode(p) => Ok(p), |
||||
N3Term::BlankNode(_) => Err(RdfParseError::msg( |
||||
"blank nodes are not allowed in regular RDF predicates", |
||||
)), |
||||
N3Term::Literal(_) => Err(RdfParseError::msg( |
||||
"literals are not allowed in regular RDF predicates", |
||||
)), |
||||
#[cfg(feature = "rdf-star")] |
||||
N3Term::Triple(_) => Err(RdfParseError::msg( |
||||
"quoted triples are not allowed in regular RDF predicates", |
||||
)), |
||||
N3Term::Variable(_) => Err(RdfParseError::msg( |
||||
"variables are not allowed in regular RDF predicates", |
||||
)), |
||||
}?, |
||||
object: match quad.object { |
||||
N3Term::NamedNode(o) => Ok(o.into()), |
||||
N3Term::BlankNode(o) => Ok(self.map_blank_node(o).into()), |
||||
N3Term::Literal(o) => Ok(o.into()), |
||||
#[cfg(feature = "rdf-star")] |
||||
N3Term::Triple(o) => Ok(self.map_triple(*o).into()), |
||||
N3Term::Variable(_) => Err(RdfParseError::msg( |
||||
"variables are not allowed in regular RDF objects", |
||||
)), |
||||
}?, |
||||
graph_name: self.map_graph_name(quad.graph_name)?, |
||||
}) |
||||
} |
||||
} |
@ -1,410 +0,0 @@ |
||||
//! Utilities to write RDF graphs and datasets.
|
||||
|
||||
use crate::format::RdfFormat; |
||||
use oxrdf::{GraphNameRef, IriParseError, QuadRef, TripleRef}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use oxrdfxml::ToTokioAsyncWriteRdfXmlWriter; |
||||
use oxrdfxml::{RdfXmlSerializer, ToWriteRdfXmlWriter}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use oxttl::nquads::ToTokioAsyncWriteNQuadsWriter; |
||||
use oxttl::nquads::{NQuadsSerializer, ToWriteNQuadsWriter}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use oxttl::ntriples::ToTokioAsyncWriteNTriplesWriter; |
||||
use oxttl::ntriples::{NTriplesSerializer, ToWriteNTriplesWriter}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use oxttl::trig::ToTokioAsyncWriteTriGWriter; |
||||
use oxttl::trig::{ToWriteTriGWriter, TriGSerializer}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use oxttl::turtle::ToTokioAsyncWriteTurtleWriter; |
||||
use oxttl::turtle::{ToWriteTurtleWriter, TurtleSerializer}; |
||||
use std::io::{self, Write}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use tokio::io::AsyncWrite; |
||||
|
||||
/// A serializer for RDF serialization formats.
|
||||
///
|
||||
/// It currently supports the following formats:
|
||||
/// * [N3](https://w3c.github.io/N3/spec/) ([`RdfFormat::N3`])
|
||||
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`RdfFormat::NQuads`])
|
||||
/// * [canonical](https://www.w3.org/TR/n-triples/#canonical-ntriples) [N-Triples](https://www.w3.org/TR/n-triples/) ([`RdfFormat::NTriples`])
|
||||
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`RdfFormat::RdfXml`])
|
||||
/// * [TriG](https://www.w3.org/TR/trig/) ([`RdfFormat::TriG`])
|
||||
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`RdfFormat::Turtle`])
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfSerializer};
|
||||
/// use oxrdf::{Quad, NamedNode};
|
||||
///
|
||||
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(Vec::new());
|
||||
/// writer.write_quad(&Quad {
|
||||
/// subject: NamedNode::new("http://example.com/s")?.into(),
|
||||
/// predicate: NamedNode::new("http://example.com/p")?,
|
||||
/// object: NamedNode::new("http://example.com/o")?.into(),
|
||||
/// graph_name: NamedNode::new("http://example.com/g")?.into()
|
||||
/// })?;
|
||||
/// assert_eq!(writer.finish()?, b"<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[must_use] |
||||
pub struct RdfSerializer { |
||||
inner: RdfSerializerKind, |
||||
} |
||||
|
||||
enum RdfSerializerKind { |
||||
NQuads(NQuadsSerializer), |
||||
NTriples(NTriplesSerializer), |
||||
RdfXml(RdfXmlSerializer), |
||||
TriG(TriGSerializer), |
||||
Turtle(TurtleSerializer), |
||||
} |
||||
|
||||
impl RdfSerializer { |
||||
/// Builds a serializer for the given format
|
||||
#[inline] |
||||
pub fn from_format(format: RdfFormat) -> Self { |
||||
Self { |
||||
inner: match format { |
||||
RdfFormat::NQuads => RdfSerializerKind::NQuads(NQuadsSerializer::new()), |
||||
RdfFormat::NTriples => RdfSerializerKind::NTriples(NTriplesSerializer::new()), |
||||
RdfFormat::RdfXml => RdfSerializerKind::RdfXml(RdfXmlSerializer::new()), |
||||
RdfFormat::TriG => RdfSerializerKind::TriG(TriGSerializer::new()), |
||||
RdfFormat::Turtle | RdfFormat::N3 => { |
||||
RdfSerializerKind::Turtle(TurtleSerializer::new()) |
||||
} |
||||
}, |
||||
} |
||||
} |
||||
|
||||
/// The format the serializer serializes to.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfSerializer};
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// RdfSerializer::from_format(RdfFormat::Turtle).format(),
|
||||
/// RdfFormat::Turtle
|
||||
/// );
|
||||
/// ```
|
||||
pub fn format(&self) -> RdfFormat { |
||||
match &self.inner { |
||||
RdfSerializerKind::NQuads(_) => RdfFormat::NQuads, |
||||
RdfSerializerKind::NTriples(_) => RdfFormat::NTriples, |
||||
RdfSerializerKind::RdfXml(_) => RdfFormat::RdfXml, |
||||
RdfSerializerKind::TriG(_) => RdfFormat::TriG, |
||||
RdfSerializerKind::Turtle(_) => RdfFormat::Turtle, |
||||
} |
||||
} |
||||
|
||||
/// If the format supports it, sets a prefix.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::vocab::rdf;
|
||||
/// use oxrdf::{NamedNodeRef, TripleRef};
|
||||
/// use oxrdfio::{RdfFormat, RdfSerializer};
|
||||
///
|
||||
/// let mut writer = RdfSerializer::from_format(RdfFormat::Turtle)
|
||||
/// .with_prefix("schema", "http://schema.org/")?
|
||||
/// .serialize_to_write(Vec::new());
|
||||
/// writer.write_triple(TripleRef {
|
||||
/// subject: NamedNodeRef::new("http://example.com/s")?.into(),
|
||||
/// predicate: rdf::TYPE.into(),
|
||||
/// object: NamedNodeRef::new("http://schema.org/Person")?.into(),
|
||||
/// })?;
|
||||
/// assert_eq!(
|
||||
/// writer.finish()?,
|
||||
/// b"@prefix schema: <http://schema.org/> .\n<http://example.com/s> a schema:Person .\n"
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn with_prefix( |
||||
mut self, |
||||
prefix_name: impl Into<String>, |
||||
prefix_iri: impl Into<String>, |
||||
) -> Result<Self, IriParseError> { |
||||
self.inner = match self.inner { |
||||
RdfSerializerKind::NQuads(s) => RdfSerializerKind::NQuads(s), |
||||
RdfSerializerKind::NTriples(s) => RdfSerializerKind::NTriples(s), |
||||
RdfSerializerKind::RdfXml(s) => { |
||||
RdfSerializerKind::RdfXml(s.with_prefix(prefix_name, prefix_iri)?) |
||||
} |
||||
RdfSerializerKind::TriG(s) => { |
||||
RdfSerializerKind::TriG(s.with_prefix(prefix_name, prefix_iri)?) |
||||
} |
||||
RdfSerializerKind::Turtle(s) => { |
||||
RdfSerializerKind::Turtle(s.with_prefix(prefix_name, prefix_iri)?) |
||||
} |
||||
}; |
||||
Ok(self) |
||||
} |
||||
|
||||
/// Writes to a [`Write`] implementation.
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// Do not forget to run the [`finish`](ToWriteQuadWriter::finish()) method to properly write the last bytes of the file.</div>
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div>
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfSerializer};
|
||||
/// use oxrdf::{Quad, NamedNode};
|
||||
///
|
||||
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(Vec::new());
|
||||
/// writer.write_quad(&Quad {
|
||||
/// subject: NamedNode::new("http://example.com/s")?.into(),
|
||||
/// predicate: NamedNode::new("http://example.com/p")?,
|
||||
/// object: NamedNode::new("http://example.com/o")?.into(),
|
||||
/// graph_name: NamedNode::new("http://example.com/g")?.into()
|
||||
/// })?;
|
||||
/// assert_eq!(writer.finish()?, b"<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub fn serialize_to_write<W: Write>(self, write: W) -> ToWriteQuadWriter<W> { |
||||
ToWriteQuadWriter { |
||||
formatter: match self.inner { |
||||
RdfSerializerKind::NQuads(s) => { |
||||
ToWriteQuadWriterKind::NQuads(s.serialize_to_write(write)) |
||||
} |
||||
RdfSerializerKind::NTriples(s) => { |
||||
ToWriteQuadWriterKind::NTriples(s.serialize_to_write(write)) |
||||
} |
||||
RdfSerializerKind::RdfXml(s) => { |
||||
ToWriteQuadWriterKind::RdfXml(s.serialize_to_write(write)) |
||||
} |
||||
RdfSerializerKind::TriG(s) => { |
||||
ToWriteQuadWriterKind::TriG(s.serialize_to_write(write)) |
||||
} |
||||
RdfSerializerKind::Turtle(s) => { |
||||
ToWriteQuadWriterKind::Turtle(s.serialize_to_write(write)) |
||||
} |
||||
}, |
||||
} |
||||
} |
||||
|
||||
/// Writes to a Tokio [`AsyncWrite`] implementation.
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// Do not forget to run the [`finish`](ToTokioAsyncWriteQuadWriter::finish()) method to properly write the last bytes of the file.</div>
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// This writer does unbuffered writes. You might want to use [`BufWriter`](tokio::io::BufWriter) to avoid that.</div>
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfSerializer};
|
||||
/// use oxrdf::{Quad, NamedNode};
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> std::io::Result<()> {
|
||||
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_tokio_async_write(Vec::new());
|
||||
/// writer.write_quad(&Quad {
|
||||
/// subject: NamedNode::new_unchecked("http://example.com/s").into(),
|
||||
/// predicate: NamedNode::new_unchecked("http://example.com/p"),
|
||||
/// object: NamedNode::new_unchecked("http://example.com/o").into(),
|
||||
/// graph_name: NamedNode::new_unchecked("http://example.com/g").into()
|
||||
/// }).await?;
|
||||
/// assert_eq!(writer.finish().await?, "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[cfg(feature = "async-tokio")] |
||||
pub fn serialize_to_tokio_async_write<W: AsyncWrite + Unpin>( |
||||
self, |
||||
write: W, |
||||
) -> ToTokioAsyncWriteQuadWriter<W> { |
||||
ToTokioAsyncWriteQuadWriter { |
||||
formatter: match self.inner { |
||||
RdfSerializerKind::NQuads(s) => { |
||||
ToTokioAsyncWriteQuadWriterKind::NQuads(s.serialize_to_tokio_async_write(write)) |
||||
} |
||||
RdfSerializerKind::NTriples(s) => ToTokioAsyncWriteQuadWriterKind::NTriples( |
||||
s.serialize_to_tokio_async_write(write), |
||||
), |
||||
RdfSerializerKind::RdfXml(s) => { |
||||
ToTokioAsyncWriteQuadWriterKind::RdfXml(s.serialize_to_tokio_async_write(write)) |
||||
} |
||||
RdfSerializerKind::TriG(s) => { |
||||
ToTokioAsyncWriteQuadWriterKind::TriG(s.serialize_to_tokio_async_write(write)) |
||||
} |
||||
RdfSerializerKind::Turtle(s) => { |
||||
ToTokioAsyncWriteQuadWriterKind::Turtle(s.serialize_to_tokio_async_write(write)) |
||||
} |
||||
}, |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<RdfFormat> for RdfSerializer { |
||||
fn from(format: RdfFormat) -> Self { |
||||
Self::from_format(format) |
||||
} |
||||
} |
||||
|
||||
/// Writes quads or triples to a [`Write`] implementation.
|
||||
///
|
||||
/// Can be built using [`RdfSerializer::serialize_to_write`].
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// Do not forget to run the [`finish`](ToWriteQuadWriter::finish()) method to properly write the last bytes of the file.</div>
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div>
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfSerializer};
|
||||
/// use oxrdf::{Quad, NamedNode};
|
||||
///
|
||||
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(Vec::new());
|
||||
/// writer.write_quad(&Quad {
|
||||
/// subject: NamedNode::new("http://example.com/s")?.into(),
|
||||
/// predicate: NamedNode::new("http://example.com/p")?,
|
||||
/// object: NamedNode::new("http://example.com/o")?.into(),
|
||||
/// graph_name: NamedNode::new("http://example.com/g")?.into(),
|
||||
/// })?;
|
||||
/// assert_eq!(writer.finish()?, b"<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[must_use] |
||||
pub struct ToWriteQuadWriter<W: Write> { |
||||
formatter: ToWriteQuadWriterKind<W>, |
||||
} |
||||
|
||||
enum ToWriteQuadWriterKind<W: Write> { |
||||
NQuads(ToWriteNQuadsWriter<W>), |
||||
NTriples(ToWriteNTriplesWriter<W>), |
||||
RdfXml(ToWriteRdfXmlWriter<W>), |
||||
TriG(ToWriteTriGWriter<W>), |
||||
Turtle(ToWriteTurtleWriter<W>), |
||||
} |
||||
|
||||
impl<W: Write> ToWriteQuadWriter<W> { |
||||
/// Writes a [`QuadRef`]
|
||||
pub fn write_quad<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> io::Result<()> { |
||||
match &mut self.formatter { |
||||
ToWriteQuadWriterKind::NQuads(writer) => writer.write_quad(quad), |
||||
ToWriteQuadWriterKind::NTriples(writer) => writer.write_triple(to_triple(quad)?), |
||||
ToWriteQuadWriterKind::RdfXml(writer) => writer.write_triple(to_triple(quad)?), |
||||
ToWriteQuadWriterKind::TriG(writer) => writer.write_quad(quad), |
||||
ToWriteQuadWriterKind::Turtle(writer) => writer.write_triple(to_triple(quad)?), |
||||
} |
||||
} |
||||
|
||||
/// Writes a [`TripleRef`]
|
||||
pub fn write_triple<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> io::Result<()> { |
||||
self.write_quad(triple.into().in_graph(GraphNameRef::DefaultGraph)) |
||||
} |
||||
|
||||
/// Writes the last bytes of the file
|
||||
///
|
||||
/// Note that this function does not flush the writer. You need to do that if you are using a [`BufWriter`](io::BufWriter).
|
||||
pub fn finish(self) -> io::Result<W> { |
||||
Ok(match self.formatter { |
||||
ToWriteQuadWriterKind::NQuads(writer) => writer.finish(), |
||||
ToWriteQuadWriterKind::NTriples(writer) => writer.finish(), |
||||
ToWriteQuadWriterKind::RdfXml(writer) => writer.finish()?, |
||||
ToWriteQuadWriterKind::TriG(writer) => writer.finish()?, |
||||
ToWriteQuadWriterKind::Turtle(writer) => writer.finish()?, |
||||
}) |
||||
} |
||||
} |
||||
|
||||
/// Writes quads or triples to a [`Write`] implementation.
|
||||
///
|
||||
/// Can be built using [`RdfSerializer::serialize_to_write`].
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// Do not forget to run the [`finish`](ToWriteQuadWriter::finish()) method to properly write the last bytes of the file.</div>
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div>
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfSerializer};
|
||||
/// use oxrdf::{Quad, NamedNode};
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> std::io::Result<()> {
|
||||
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_tokio_async_write(Vec::new());
|
||||
/// writer.write_quad(&Quad {
|
||||
/// subject: NamedNode::new_unchecked("http://example.com/s").into(),
|
||||
/// predicate: NamedNode::new_unchecked("http://example.com/p"),
|
||||
/// object: NamedNode::new_unchecked("http://example.com/o").into(),
|
||||
/// graph_name: NamedNode::new_unchecked("http://example.com/g").into()
|
||||
/// }).await?;
|
||||
/// assert_eq!(writer.finish().await?, "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[must_use] |
||||
#[cfg(feature = "async-tokio")] |
||||
pub struct ToTokioAsyncWriteQuadWriter<W: AsyncWrite + Unpin> { |
||||
formatter: ToTokioAsyncWriteQuadWriterKind<W>, |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
enum ToTokioAsyncWriteQuadWriterKind<W: AsyncWrite + Unpin> { |
||||
NQuads(ToTokioAsyncWriteNQuadsWriter<W>), |
||||
NTriples(ToTokioAsyncWriteNTriplesWriter<W>), |
||||
RdfXml(ToTokioAsyncWriteRdfXmlWriter<W>), |
||||
TriG(ToTokioAsyncWriteTriGWriter<W>), |
||||
Turtle(ToTokioAsyncWriteTurtleWriter<W>), |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteQuadWriter<W> { |
||||
/// Writes a [`QuadRef`]
|
||||
pub async fn write_quad<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> io::Result<()> { |
||||
match &mut self.formatter { |
||||
ToTokioAsyncWriteQuadWriterKind::NQuads(writer) => writer.write_quad(quad).await, |
||||
ToTokioAsyncWriteQuadWriterKind::NTriples(writer) => { |
||||
writer.write_triple(to_triple(quad)?).await |
||||
} |
||||
ToTokioAsyncWriteQuadWriterKind::RdfXml(writer) => { |
||||
writer.write_triple(to_triple(quad)?).await |
||||
} |
||||
ToTokioAsyncWriteQuadWriterKind::TriG(writer) => writer.write_quad(quad).await, |
||||
ToTokioAsyncWriteQuadWriterKind::Turtle(writer) => { |
||||
writer.write_triple(to_triple(quad)?).await |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// Writes a [`TripleRef`]
|
||||
pub async fn write_triple<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> io::Result<()> { |
||||
self.write_quad(triple.into().in_graph(GraphNameRef::DefaultGraph)) |
||||
.await |
||||
} |
||||
|
||||
/// Writes the last bytes of the file
|
||||
///
|
||||
/// Note that this function does not flush the writer. You need to do that if you are using a [`BufWriter`](io::BufWriter).
|
||||
pub async fn finish(self) -> io::Result<W> { |
||||
Ok(match self.formatter { |
||||
ToTokioAsyncWriteQuadWriterKind::NQuads(writer) => writer.finish(), |
||||
ToTokioAsyncWriteQuadWriterKind::NTriples(writer) => writer.finish(), |
||||
ToTokioAsyncWriteQuadWriterKind::RdfXml(writer) => writer.finish().await?, |
||||
ToTokioAsyncWriteQuadWriterKind::TriG(writer) => writer.finish().await?, |
||||
ToTokioAsyncWriteQuadWriterKind::Turtle(writer) => writer.finish().await?, |
||||
}) |
||||
} |
||||
} |
||||
|
||||
fn to_triple<'a>(quad: impl Into<QuadRef<'a>>) -> io::Result<TripleRef<'a>> { |
||||
let quad = quad.into(); |
||||
if quad.graph_name.is_default_graph() { |
||||
Ok(quad.into()) |
||||
} else { |
||||
Err(io::Error::new( |
||||
io::ErrorKind::InvalidInput, |
||||
"Only quads in the default graph can be serialized to a RDF graph format", |
||||
)) |
||||
} |
||||
} |
@ -1,36 +0,0 @@ |
||||
[package] |
||||
name = "oxrdfxml" |
||||
version = "0.1.0-alpha.5" |
||||
authors.workspace = true |
||||
license.workspace = true |
||||
readme = "README.md" |
||||
keywords = ["RDFXML", "XML", "RDF"] |
||||
repository = "https://github.com/oxigraph/oxigraph/tree/master/lib/oxrdfxml" |
||||
description = """ |
||||
Parser and serializer for the RDF/XML format |
||||
""" |
||||
documentation = "https://docs.rs/oxrdfxml" |
||||
edition.workspace = true |
||||
rust-version.workspace = true |
||||
|
||||
[features] |
||||
default = [] |
||||
async-tokio = ["dep:tokio", "quick-xml/async-tokio"] |
||||
|
||||
[dependencies] |
||||
oxilangtag.workspace = true |
||||
oxiri.workspace = true |
||||
oxrdf.workspace = true |
||||
quick-xml.workspace = true |
||||
thiserror.workspace = true |
||||
tokio = { workspace = true, optional = true, features = ["io-util"] } |
||||
|
||||
[dev-dependencies] |
||||
tokio = { workspace = true, features = ["rt", "macros"] } |
||||
|
||||
[lints] |
||||
workspace = true |
||||
|
||||
[package.metadata.docs.rs] |
||||
all-features = true |
||||
rustdoc-args = ["--cfg", "docsrs"] |
@ -1,56 +0,0 @@ |
||||
OxRDF/XML |
||||
========= |
||||
|
||||
[![Latest Version](https://img.shields.io/crates/v/oxrdfxml.svg)](https://crates.io/crates/oxrdfxml) |
||||
[![Released API docs](https://docs.rs/oxrdfxml/badge.svg)](https://docs.rs/oxrdfxml) |
||||
[![Crates.io downloads](https://img.shields.io/crates/d/oxrdfxml)](https://crates.io/crates/oxrdfxml) |
||||
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) |
||||
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community) |
||||
|
||||
OxRdfXml is a parser and serializer for [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/). |
||||
|
||||
The entry points of this library are the two [`RdfXmlParser`] and [`RdfXmlSerializer`] structs. |
||||
|
||||
Usage example counting the number of people in a RDF/XML file: |
||||
|
||||
```rust |
||||
use oxrdf::{NamedNodeRef, vocab::rdf}; |
||||
use oxrdfxml::RdfXmlParser; |
||||
|
||||
fn main() { |
||||
let file = br#"<?xml version="1.0"?> |
||||
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:schema="http://schema.org/"> |
||||
<rdf:Description rdf:about="http://example.com/foo"> |
||||
<rdf:type rdf:resource="http://schema.org/Person" /> |
||||
<schema:name>Foo</schema:name> |
||||
</rdf:Description> |
||||
<schema:Person rdf:about="http://example.com/bar" schema:name="Bar" /> |
||||
</rdf:RDF>"#; |
||||
|
||||
let schema_person = NamedNodeRef::new("http://schema.org/Person").unwrap(); |
||||
let mut count = 0; |
||||
for triple in RdfXmlParser::new().parse_read(file.as_ref()) { |
||||
let triple = triple.unwrap(); |
||||
if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { |
||||
count += 1; |
||||
} |
||||
} |
||||
assert_eq!(2, count); |
||||
} |
||||
``` |
||||
|
||||
## License |
||||
|
||||
This project is licensed under either of |
||||
|
||||
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or |
||||
`<http://www.apache.org/licenses/LICENSE-2.0>`) |
||||
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or |
||||
`<http://opensource.org/licenses/MIT>`) |
||||
|
||||
at your option. |
||||
|
||||
|
||||
### Contribution |
||||
|
||||
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. |
@ -1,89 +0,0 @@ |
||||
use oxilangtag::LanguageTagParseError; |
||||
use oxiri::IriParseError; |
||||
use std::io; |
||||
use std::sync::Arc; |
||||
|
||||
/// Error returned during RDF/XML parsing.
|
||||
#[derive(Debug, thiserror::Error)] |
||||
pub enum RdfXmlParseError { |
||||
/// I/O error during parsing (file not found...).
|
||||
#[error(transparent)] |
||||
Io(#[from] io::Error), |
||||
/// An error in the file syntax.
|
||||
#[error(transparent)] |
||||
Syntax(#[from] RdfXmlSyntaxError), |
||||
} |
||||
|
||||
impl From<RdfXmlParseError> for io::Error { |
||||
#[inline] |
||||
fn from(error: RdfXmlParseError) -> Self { |
||||
match error { |
||||
RdfXmlParseError::Io(error) => error, |
||||
RdfXmlParseError::Syntax(error) => error.into(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<quick_xml::Error> for RdfXmlParseError { |
||||
#[inline] |
||||
fn from(error: quick_xml::Error) -> Self { |
||||
match error { |
||||
quick_xml::Error::Io(error) => { |
||||
Self::Io(Arc::try_unwrap(error).unwrap_or_else(|e| io::Error::new(e.kind(), e))) |
||||
} |
||||
_ => Self::Syntax(RdfXmlSyntaxError(SyntaxErrorKind::Xml(error))), |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// An error in the syntax of the parsed file.
|
||||
#[derive(Debug, thiserror::Error)] |
||||
#[error(transparent)] |
||||
pub struct RdfXmlSyntaxError(#[from] pub(crate) SyntaxErrorKind); |
||||
|
||||
#[derive(Debug, thiserror::Error)] |
||||
pub enum SyntaxErrorKind { |
||||
#[error(transparent)] |
||||
Xml(#[from] quick_xml::Error), |
||||
#[error("error while parsing IRI '{iri}': {error}")] |
||||
InvalidIri { |
||||
iri: String, |
||||
#[source] |
||||
error: IriParseError, |
||||
}, |
||||
#[error("error while parsing language tag '{tag}': {error}")] |
||||
InvalidLanguageTag { |
||||
tag: String, |
||||
#[source] |
||||
error: LanguageTagParseError, |
||||
}, |
||||
#[error("{0}")] |
||||
Msg(String), |
||||
} |
||||
|
||||
impl RdfXmlSyntaxError { |
||||
/// Builds an error from a printable error message.
|
||||
#[inline] |
||||
pub(crate) fn msg(msg: impl Into<String>) -> Self { |
||||
Self(SyntaxErrorKind::Msg(msg.into())) |
||||
} |
||||
} |
||||
|
||||
impl From<RdfXmlSyntaxError> for io::Error { |
||||
#[inline] |
||||
fn from(error: RdfXmlSyntaxError) -> Self { |
||||
match error.0 { |
||||
SyntaxErrorKind::Xml(error) => match error { |
||||
quick_xml::Error::Io(error) => { |
||||
Arc::try_unwrap(error).unwrap_or_else(|e| Self::new(e.kind(), e)) |
||||
} |
||||
quick_xml::Error::UnexpectedEof(error) => { |
||||
Self::new(io::ErrorKind::UnexpectedEof, error) |
||||
} |
||||
_ => Self::new(io::ErrorKind::InvalidData, error), |
||||
}, |
||||
SyntaxErrorKind::Msg(msg) => Self::new(io::ErrorKind::InvalidData, msg), |
||||
_ => Self::new(io::ErrorKind::InvalidData, error), |
||||
} |
||||
} |
||||
} |
@ -1,18 +0,0 @@ |
||||
#![doc = include_str!("../README.md")] |
||||
#![doc(test(attr(deny(warnings))))] |
||||
#![cfg_attr(docsrs, feature(doc_auto_cfg))] |
||||
#![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] |
||||
#![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] |
||||
|
||||
mod error; |
||||
mod parser; |
||||
mod serializer; |
||||
mod utils; |
||||
|
||||
pub use error::{RdfXmlParseError, RdfXmlSyntaxError}; |
||||
#[cfg(feature = "async-tokio")] |
||||
pub use parser::FromTokioAsyncReadRdfXmlReader; |
||||
pub use parser::{FromReadRdfXmlReader, RdfXmlParser}; |
||||
#[cfg(feature = "async-tokio")] |
||||
pub use serializer::ToTokioAsyncWriteRdfXmlWriter; |
||||
pub use serializer::{RdfXmlSerializer, ToWriteRdfXmlWriter}; |
File diff suppressed because it is too large
Load Diff
@ -1,461 +0,0 @@ |
||||
use crate::utils::*; |
||||
use oxiri::{Iri, IriParseError}; |
||||
use oxrdf::vocab::rdf; |
||||
use oxrdf::{NamedNodeRef, Subject, SubjectRef, TermRef, TripleRef}; |
||||
use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event}; |
||||
use quick_xml::Writer; |
||||
use std::borrow::Cow; |
||||
use std::collections::BTreeMap; |
||||
use std::io; |
||||
use std::io::Write; |
||||
use std::sync::Arc; |
||||
#[cfg(feature = "async-tokio")] |
||||
use tokio::io::AsyncWrite; |
||||
|
||||
/// A [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) serializer.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{LiteralRef, NamedNodeRef, TripleRef};
|
||||
/// use oxrdfxml::RdfXmlSerializer;
|
||||
///
|
||||
/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_write(Vec::new());
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// ))?;
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://schema.org/name")?,
|
||||
/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"),
|
||||
/// ))?;
|
||||
/// assert_eq!(
|
||||
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>",
|
||||
/// writer.finish()?.as_slice()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[derive(Default)] |
||||
#[must_use] |
||||
pub struct RdfXmlSerializer { |
||||
prefixes: BTreeMap<String, String>, |
||||
} |
||||
|
||||
impl RdfXmlSerializer { |
||||
/// Builds a new [`RdfXmlSerializer`].
|
||||
#[inline] |
||||
pub fn new() -> Self { |
||||
Self { |
||||
prefixes: BTreeMap::new(), |
||||
} |
||||
} |
||||
|
||||
#[inline] |
||||
pub fn with_prefix( |
||||
mut self, |
||||
prefix_name: impl Into<String>, |
||||
prefix_iri: impl Into<String>, |
||||
) -> Result<Self, IriParseError> { |
||||
self.prefixes.insert( |
||||
Iri::parse(prefix_iri.into())?.into_inner(), |
||||
prefix_name.into(), |
||||
); |
||||
Ok(self) |
||||
} |
||||
|
||||
/// Writes a RDF/XML file to a [`Write`] implementation.
|
||||
///
|
||||
/// This writer does unbuffered writes.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{LiteralRef, NamedNodeRef, TripleRef};
|
||||
/// use oxrdfxml::RdfXmlSerializer;
|
||||
///
|
||||
/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_write(Vec::new());
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// ))?;
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://schema.org/name")?,
|
||||
/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"),
|
||||
/// ))?;
|
||||
/// assert_eq!(
|
||||
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>",
|
||||
/// writer.finish()?.as_slice()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[allow(clippy::unused_self)] |
||||
pub fn serialize_to_write<W: Write>(self, write: W) -> ToWriteRdfXmlWriter<W> { |
||||
ToWriteRdfXmlWriter { |
||||
writer: Writer::new_with_indent(write, b'\t', 1), |
||||
inner: self.inner_writer(), |
||||
} |
||||
} |
||||
|
||||
/// Writes a RDF/XML file to a [`AsyncWrite`] implementation.
|
||||
///
|
||||
/// This writer does unbuffered writes.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, TripleRef, LiteralRef};
|
||||
/// use oxrdfxml::RdfXmlSerializer;
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_tokio_async_write(Vec::new());
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// )).await?;
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://schema.org/name")?,
|
||||
/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"),
|
||||
/// )).await?;
|
||||
/// assert_eq!(
|
||||
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>",
|
||||
/// writer.finish().await?.as_slice()
|
||||
/// );
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[allow(clippy::unused_self)] |
||||
#[cfg(feature = "async-tokio")] |
||||
pub fn serialize_to_tokio_async_write<W: AsyncWrite + Unpin>( |
||||
self, |
||||
write: W, |
||||
) -> ToTokioAsyncWriteRdfXmlWriter<W> { |
||||
ToTokioAsyncWriteRdfXmlWriter { |
||||
writer: Writer::new_with_indent(write, b'\t', 1), |
||||
inner: self.inner_writer(), |
||||
} |
||||
} |
||||
|
||||
fn inner_writer(mut self) -> InnerRdfXmlWriter { |
||||
self.prefixes.insert( |
||||
"http://www.w3.org/1999/02/22-rdf-syntax-ns#".into(), |
||||
"rdf".into(), |
||||
); |
||||
InnerRdfXmlWriter { |
||||
current_subject: None, |
||||
current_resource_tag: None, |
||||
prefixes: self.prefixes, |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// Writes a RDF/XML file to a [`Write`] implementation. Can be built using [`RdfXmlSerializer::serialize_to_write`].
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{LiteralRef, NamedNodeRef, TripleRef};
|
||||
/// use oxrdfxml::RdfXmlSerializer;
|
||||
///
|
||||
/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_write(Vec::new());
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// ))?;
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://schema.org/name")?,
|
||||
/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"),
|
||||
/// ))?;
|
||||
/// assert_eq!(
|
||||
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>",
|
||||
/// writer.finish()?.as_slice()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[must_use] |
||||
pub struct ToWriteRdfXmlWriter<W: Write> { |
||||
writer: Writer<W>, |
||||
inner: InnerRdfXmlWriter, |
||||
} |
||||
|
||||
impl<W: Write> ToWriteRdfXmlWriter<W> { |
||||
/// Writes an extra triple.
|
||||
#[allow(clippy::match_wildcard_for_single_variants, unreachable_patterns)] |
||||
pub fn write_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> { |
||||
let mut buffer = Vec::new(); |
||||
self.inner.write_triple(t, &mut buffer)?; |
||||
self.flush_buffer(&mut buffer) |
||||
} |
||||
|
||||
/// Ends the write process and returns the underlying [`Write`].
|
||||
pub fn finish(mut self) -> io::Result<W> { |
||||
let mut buffer = Vec::new(); |
||||
self.inner.finish(&mut buffer); |
||||
self.flush_buffer(&mut buffer)?; |
||||
Ok(self.writer.into_inner()) |
||||
} |
||||
|
||||
fn flush_buffer(&mut self, buffer: &mut Vec<Event<'_>>) -> io::Result<()> { |
||||
for event in buffer.drain(0..) { |
||||
self.writer.write_event(event).map_err(map_err)?; |
||||
} |
||||
Ok(()) |
||||
} |
||||
} |
||||
|
||||
/// Writes a RDF/XML file to a [`AsyncWrite`] implementation. Can be built using [`RdfXmlSerializer::serialize_to_tokio_async_write`].
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, TripleRef, LiteralRef};
|
||||
/// use oxrdfxml::RdfXmlSerializer;
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_tokio_async_write(Vec::new());
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// )).await?;
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://schema.org/name")?,
|
||||
/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"),
|
||||
/// )).await?;
|
||||
/// assert_eq!(
|
||||
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>",
|
||||
/// writer.finish().await?.as_slice()
|
||||
/// );
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[cfg(feature = "async-tokio")] |
||||
#[must_use] |
||||
pub struct ToTokioAsyncWriteRdfXmlWriter<W: AsyncWrite + Unpin> { |
||||
writer: Writer<W>, |
||||
inner: InnerRdfXmlWriter, |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteRdfXmlWriter<W> { |
||||
/// Writes an extra triple.
|
||||
#[allow(clippy::match_wildcard_for_single_variants, unreachable_patterns)] |
||||
pub async fn write_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> { |
||||
let mut buffer = Vec::new(); |
||||
self.inner.write_triple(t, &mut buffer)?; |
||||
self.flush_buffer(&mut buffer).await |
||||
} |
||||
|
||||
/// Ends the write process and returns the underlying [`Write`].
|
||||
pub async fn finish(mut self) -> io::Result<W> { |
||||
let mut buffer = Vec::new(); |
||||
self.inner.finish(&mut buffer); |
||||
self.flush_buffer(&mut buffer).await?; |
||||
Ok(self.writer.into_inner()) |
||||
} |
||||
|
||||
async fn flush_buffer(&mut self, buffer: &mut Vec<Event<'_>>) -> io::Result<()> { |
||||
for event in buffer.drain(0..) { |
||||
self.writer |
||||
.write_event_async(event) |
||||
.await |
||||
.map_err(map_err)?; |
||||
} |
||||
Ok(()) |
||||
} |
||||
} |
||||
|
||||
pub struct InnerRdfXmlWriter { |
||||
current_subject: Option<Subject>, |
||||
current_resource_tag: Option<String>, |
||||
prefixes: BTreeMap<String, String>, |
||||
} |
||||
|
||||
impl InnerRdfXmlWriter { |
||||
#[allow(clippy::match_wildcard_for_single_variants, unreachable_patterns)] |
||||
fn write_triple<'a>( |
||||
&mut self, |
||||
t: impl Into<TripleRef<'a>>, |
||||
output: &mut Vec<Event<'a>>, |
||||
) -> io::Result<()> { |
||||
if self.current_subject.is_none() { |
||||
self.write_start(output); |
||||
} |
||||
|
||||
let triple = t.into(); |
||||
// We open a new rdf:Description if useful
|
||||
if self.current_subject.as_ref().map(Subject::as_ref) != Some(triple.subject) { |
||||
if self.current_subject.is_some() { |
||||
output.push(Event::End( |
||||
self.current_resource_tag |
||||
.take() |
||||
.map_or_else(|| BytesEnd::new("rdf:Description"), BytesEnd::new), |
||||
)); |
||||
} |
||||
self.current_subject = Some(triple.subject.into_owned()); |
||||
|
||||
let (mut description_open, with_type_tag) = if triple.predicate == rdf::TYPE { |
||||
if let TermRef::NamedNode(t) = triple.object { |
||||
let (prop_qname, prop_xmlns) = self.uri_to_qname_and_xmlns(t); |
||||
let mut description_open = BytesStart::new(prop_qname.clone()); |
||||
if let Some(prop_xmlns) = prop_xmlns { |
||||
description_open.push_attribute(prop_xmlns); |
||||
} |
||||
self.current_resource_tag = Some(prop_qname.into_owned()); |
||||
(description_open, true) |
||||
} else { |
||||
(BytesStart::new("rdf:Description"), false) |
||||
} |
||||
} else { |
||||
(BytesStart::new("rdf:Description"), false) |
||||
}; |
||||
match triple.subject { |
||||
SubjectRef::NamedNode(node) => { |
||||
description_open.push_attribute(("rdf:about", node.as_str())) |
||||
} |
||||
SubjectRef::BlankNode(node) => { |
||||
description_open.push_attribute(("rdf:nodeID", node.as_str())) |
||||
} |
||||
_ => { |
||||
return Err(io::Error::new( |
||||
io::ErrorKind::InvalidInput, |
||||
"RDF/XML only supports named or blank subject", |
||||
)) |
||||
} |
||||
} |
||||
output.push(Event::Start(description_open)); |
||||
if with_type_tag { |
||||
return Ok(()); // No need for a value
|
||||
} |
||||
} |
||||
|
||||
let (prop_qname, prop_xmlns) = self.uri_to_qname_and_xmlns(triple.predicate); |
||||
let mut property_open = BytesStart::new(prop_qname.clone()); |
||||
if let Some(prop_xmlns) = prop_xmlns { |
||||
property_open.push_attribute(prop_xmlns); |
||||
} |
||||
let content = match triple.object { |
||||
TermRef::NamedNode(node) => { |
||||
property_open.push_attribute(("rdf:resource", node.as_str())); |
||||
None |
||||
} |
||||
TermRef::BlankNode(node) => { |
||||
property_open.push_attribute(("rdf:nodeID", node.as_str())); |
||||
None |
||||
} |
||||
TermRef::Literal(literal) => { |
||||
if let Some(language) = literal.language() { |
||||
property_open.push_attribute(("xml:lang", language)); |
||||
} else if !literal.is_plain() { |
||||
property_open.push_attribute(("rdf:datatype", literal.datatype().as_str())); |
||||
} |
||||
Some(literal.value()) |
||||
} |
||||
_ => { |
||||
return Err(io::Error::new( |
||||
io::ErrorKind::InvalidInput, |
||||
"RDF/XML only supports named, blank or literal object", |
||||
)) |
||||
} |
||||
}; |
||||
if let Some(content) = content { |
||||
output.push(Event::Start(property_open)); |
||||
output.push(Event::Text(BytesText::new(content))); |
||||
output.push(Event::End(BytesEnd::new(prop_qname))); |
||||
} else { |
||||
output.push(Event::Empty(property_open)); |
||||
} |
||||
Ok(()) |
||||
} |
||||
|
||||
fn write_start(&self, output: &mut Vec<Event<'_>>) { |
||||
output.push(Event::Decl(BytesDecl::new("1.0", Some("UTF-8"), None))); |
||||
let mut rdf_open = BytesStart::new("rdf:RDF"); |
||||
for (prefix_value, prefix_name) in &self.prefixes { |
||||
rdf_open.push_attribute(( |
||||
format!("xmlns:{prefix_name}").as_str(), |
||||
prefix_value.as_str(), |
||||
)); |
||||
} |
||||
output.push(Event::Start(rdf_open)) |
||||
} |
||||
|
||||
fn finish(&mut self, output: &mut Vec<Event<'static>>) { |
||||
if self.current_subject.is_some() { |
||||
output.push(Event::End( |
||||
self.current_resource_tag |
||||
.take() |
||||
.map_or_else(|| BytesEnd::new("rdf:Description"), BytesEnd::new), |
||||
)); |
||||
} else { |
||||
self.write_start(output); |
||||
} |
||||
output.push(Event::End(BytesEnd::new("rdf:RDF"))); |
||||
} |
||||
|
||||
fn uri_to_qname_and_xmlns<'a>( |
||||
&self, |
||||
uri: NamedNodeRef<'a>, |
||||
) -> (Cow<'a, str>, Option<(&'a str, &'a str)>) { |
||||
let (prop_prefix, prop_value) = split_iri(uri.as_str()); |
||||
if let Some(prop_prefix) = self.prefixes.get(prop_prefix) { |
||||
( |
||||
if prop_prefix.is_empty() { |
||||
Cow::Borrowed(prop_value) |
||||
} else { |
||||
Cow::Owned(format!("{prop_prefix}:{prop_value}")) |
||||
}, |
||||
None, |
||||
) |
||||
} else if prop_prefix == "http://www.w3.org/2000/xmlns/" { |
||||
(Cow::Owned(format!("xmlns:{prop_value}")), None) |
||||
} else if prop_value.is_empty() { |
||||
(Cow::Borrowed("p:"), Some(("xmlns:p", prop_prefix))) |
||||
} else { |
||||
(Cow::Borrowed(prop_value), Some(("xmlns", prop_prefix))) |
||||
} |
||||
} |
||||
} |
||||
|
||||
fn map_err(error: quick_xml::Error) -> io::Error { |
||||
if let quick_xml::Error::Io(error) = error { |
||||
Arc::try_unwrap(error).unwrap_or_else(|error| io::Error::new(error.kind(), error)) |
||||
} else { |
||||
io::Error::new(io::ErrorKind::Other, error) |
||||
} |
||||
} |
||||
|
||||
fn split_iri(iri: &str) -> (&str, &str) { |
||||
if let Some(position_base) = iri.rfind(|c| !is_name_char(c) || c == ':') { |
||||
if let Some(position_add) = iri[position_base..].find(|c| is_name_start_char(c) && c != ':') |
||||
{ |
||||
( |
||||
&iri[..position_base + position_add], |
||||
&iri[position_base + position_add..], |
||||
) |
||||
} else { |
||||
(iri, "") |
||||
} |
||||
} else { |
||||
(iri, "") |
||||
} |
||||
} |
||||
|
||||
#[cfg(test)] |
||||
mod tests { |
||||
use super::*; |
||||
|
||||
#[test] |
||||
fn test_split_iri() { |
||||
assert_eq!( |
||||
split_iri("http://schema.org/Person"), |
||||
("http://schema.org/", "Person") |
||||
); |
||||
assert_eq!(split_iri("http://schema.org/"), ("http://schema.org/", "")); |
||||
assert_eq!( |
||||
split_iri("http://schema.org#foo"), |
||||
("http://schema.org#", "foo") |
||||
); |
||||
assert_eq!(split_iri("urn:isbn:foo"), ("urn:isbn:", "foo")); |
||||
} |
||||
} |
@ -1,26 +0,0 @@ |
||||
pub fn is_name_start_char(c: char) -> bool { |
||||
// ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
|
||||
matches!(c, |
||||
':' |
||||
| 'A'..='Z' |
||||
| '_' |
||||
| 'a'..='z' |
||||
| '\u{00C0}'..='\u{00D6}' |
||||
| '\u{00D8}'..='\u{00F6}' |
||||
| '\u{00F8}'..='\u{02FF}' |
||||
| '\u{0370}'..='\u{037D}' |
||||
| '\u{037F}'..='\u{1FFF}' |
||||
| '\u{200C}'..='\u{200D}' |
||||
| '\u{2070}'..='\u{218F}' |
||||
| '\u{2C00}'..='\u{2FEF}' |
||||
| '\u{3001}'..='\u{D7FF}' |
||||
| '\u{F900}'..='\u{FDCF}' |
||||
| '\u{FDF0}'..='\u{FFFD}' |
||||
| '\u{10000}'..='\u{EFFFF}') |
||||
} |
||||
|
||||
pub fn is_name_char(c: char) -> bool { |
||||
// NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
|
||||
is_name_start_char(c) |
||||
|| matches!(c, '-' | '.' | '0'..='9' | '\u{B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}') |
||||
} |
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,626 @@ |
||||
use super::date_time::{DateTimeError, GDay, GMonth, GMonthDay, GYear, GYearMonth, TimezoneOffset}; |
||||
use super::decimal::ParseDecimalError; |
||||
use super::duration::{DayTimeDuration, YearMonthDuration}; |
||||
use super::*; |
||||
use std::error::Error; |
||||
use std::fmt; |
||||
use std::num::ParseIntError; |
||||
use std::str::FromStr; |
||||
|
||||
/// A parsing error
|
||||
#[derive(Debug, Clone)] |
||||
pub struct XsdParseError { |
||||
kind: XsdParseErrorKind, |
||||
} |
||||
|
||||
#[derive(Debug, Clone)] |
||||
enum XsdParseErrorKind { |
||||
ParseInt(ParseIntError), |
||||
ParseDecimal(ParseDecimalError), |
||||
DateTime(DateTimeError), |
||||
Message(&'static str), |
||||
} |
||||
|
||||
const OVERFLOW_ERROR: XsdParseError = XsdParseError { |
||||
kind: XsdParseErrorKind::Message("Overflow error"), |
||||
}; |
||||
|
||||
impl fmt::Display for XsdParseError { |
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
||||
match &self.kind { |
||||
XsdParseErrorKind::ParseInt(error) => { |
||||
write!(f, "Error while parsing integer: {error}") |
||||
} |
||||
XsdParseErrorKind::ParseDecimal(error) => { |
||||
write!(f, "Error while parsing decimal: {error}") |
||||
} |
||||
XsdParseErrorKind::DateTime(error) => error.fmt(f), |
||||
XsdParseErrorKind::Message(msg) => write!(f, "{msg}"), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl XsdParseError { |
||||
const fn msg(message: &'static str) -> Self { |
||||
Self { |
||||
kind: XsdParseErrorKind::Message(message), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl Error for XsdParseError { |
||||
fn source(&self) -> Option<&(dyn Error + 'static)> { |
||||
match &self.kind { |
||||
XsdParseErrorKind::ParseInt(error) => Some(error), |
||||
XsdParseErrorKind::ParseDecimal(error) => Some(error), |
||||
XsdParseErrorKind::DateTime(error) => Some(error), |
||||
XsdParseErrorKind::Message(_) => None, |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<ParseIntError> for XsdParseError { |
||||
fn from(error: ParseIntError) -> Self { |
||||
Self { |
||||
kind: XsdParseErrorKind::ParseInt(error), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<ParseDecimalError> for XsdParseError { |
||||
fn from(error: ParseDecimalError) -> Self { |
||||
Self { |
||||
kind: XsdParseErrorKind::ParseDecimal(error), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<DateTimeError> for XsdParseError { |
||||
fn from(error: DateTimeError) -> Self { |
||||
Self { |
||||
kind: XsdParseErrorKind::DateTime(error), |
||||
} |
||||
} |
||||
} |
||||
|
||||
// [6] duYearFrag ::= unsignedNoDecimalPtNumeral 'Y'
|
||||
// [7] duMonthFrag ::= unsignedNoDecimalPtNumeral 'M'
|
||||
// [8] duDayFrag ::= unsignedNoDecimalPtNumeral 'D'
|
||||
// [9] duHourFrag ::= unsignedNoDecimalPtNumeral 'H'
|
||||
// [10] duMinuteFrag ::= unsignedNoDecimalPtNumeral 'M'
|
||||
// [11] duSecondFrag ::= (unsignedNoDecimalPtNumeral | unsignedDecimalPtNumeral) 'S'
|
||||
// [12] duYearMonthFrag ::= (duYearFrag duMonthFrag?) | duMonthFrag
|
||||
// [13] duTimeFrag ::= 'T' ((duHourFrag duMinuteFrag? duSecondFrag?) | (duMinuteFrag duSecondFrag?) | duSecondFrag)
|
||||
// [14] duDayTimeFrag ::= (duDayFrag duTimeFrag?) | duTimeFrag
|
||||
// [15] durationLexicalRep ::= '-'? 'P' ((duYearMonthFrag duDayTimeFrag?) | duDayTimeFrag)
|
||||
struct DurationParts { |
||||
year_month: Option<i64>, |
||||
day_time: Option<Decimal>, |
||||
} |
||||
|
||||
fn duration_parts(input: &str) -> Result<(DurationParts, &str), XsdParseError> { |
||||
// States
|
||||
const START: u32 = 0; |
||||
const AFTER_YEAR: u32 = 1; |
||||
const AFTER_MONTH: u32 = 2; |
||||
const AFTER_DAY: u32 = 3; |
||||
const AFTER_T: u32 = 4; |
||||
const AFTER_HOUR: u32 = 5; |
||||
const AFTER_MINUTE: u32 = 6; |
||||
const AFTER_SECOND: u32 = 7; |
||||
|
||||
let (is_negative, input) = if let Some(left) = input.strip_prefix('-') { |
||||
(true, left) |
||||
} else { |
||||
(false, input) |
||||
}; |
||||
let mut input = expect_char(input, 'P', "Durations must start with 'P'")?; |
||||
let mut state = START; |
||||
let mut year_month: Option<i64> = None; |
||||
let mut day_time: Option<Decimal> = None; |
||||
while !input.is_empty() { |
||||
if let Some(left) = input.strip_prefix('T') { |
||||
if state >= AFTER_T { |
||||
return Err(XsdParseError::msg("Duplicated time separator 'T'")); |
||||
} |
||||
state = AFTER_T; |
||||
input = left; |
||||
} else { |
||||
let (number_str, left) = decimal_prefix(input); |
||||
match left.chars().next() { |
||||
Some('Y') if state < AFTER_YEAR => { |
||||
year_month = Some( |
||||
year_month |
||||
.unwrap_or_default() |
||||
.checked_add( |
||||
apply_i64_neg(i64::from_str(number_str)?, is_negative)? |
||||
.checked_mul(12) |
||||
.ok_or(OVERFLOW_ERROR)?, |
||||
) |
||||
.ok_or(OVERFLOW_ERROR)?, |
||||
); |
||||
state = AFTER_YEAR; |
||||
} |
||||
Some('M') if state < AFTER_MONTH => { |
||||
year_month = Some( |
||||
year_month |
||||
.unwrap_or_default() |
||||
.checked_add(apply_i64_neg(i64::from_str(number_str)?, is_negative)?) |
||||
.ok_or(OVERFLOW_ERROR)?, |
||||
); |
||||
state = AFTER_MONTH; |
||||
} |
||||
Some('D') if state < AFTER_DAY => { |
||||
if number_str.contains('.') { |
||||
return Err(XsdParseError::msg( |
||||
"Decimal numbers are not allowed for days", |
||||
)); |
||||
} |
||||
day_time = Some( |
||||
day_time |
||||
.unwrap_or_default() |
||||
.checked_add( |
||||
apply_decimal_neg(Decimal::from_str(number_str)?, is_negative)? |
||||
.checked_mul(86400) |
||||
.ok_or(OVERFLOW_ERROR)?, |
||||
) |
||||
.ok_or(OVERFLOW_ERROR)?, |
||||
); |
||||
state = AFTER_DAY; |
||||
} |
||||
Some('H') if state == AFTER_T => { |
||||
if number_str.contains('.') { |
||||
return Err(XsdParseError::msg( |
||||
"Decimal numbers are not allowed for hours", |
||||
)); |
||||
} |
||||
day_time = Some( |
||||
day_time |
||||
.unwrap_or_default() |
||||
.checked_add( |
||||
apply_decimal_neg(Decimal::from_str(number_str)?, is_negative)? |
||||
.checked_mul(3600) |
||||
.ok_or(OVERFLOW_ERROR)?, |
||||
) |
||||
.ok_or(OVERFLOW_ERROR)?, |
||||
); |
||||
state = AFTER_HOUR; |
||||
} |
||||
Some('M') if (AFTER_T..AFTER_MINUTE).contains(&state) => { |
||||
if number_str.contains('.') { |
||||
return Err(XsdParseError::msg( |
||||
"Decimal numbers are not allowed for minutes", |
||||
)); |
||||
} |
||||
day_time = Some( |
||||
day_time |
||||
.unwrap_or_default() |
||||
.checked_add( |
||||
apply_decimal_neg(Decimal::from_str(number_str)?, is_negative)? |
||||
.checked_mul(60) |
||||
.ok_or(OVERFLOW_ERROR)?, |
||||
) |
||||
.ok_or(OVERFLOW_ERROR)?, |
||||
); |
||||
state = AFTER_MINUTE; |
||||
} |
||||
Some('S') if (AFTER_T..AFTER_SECOND).contains(&state) => { |
||||
day_time = Some( |
||||
day_time |
||||
.unwrap_or_default() |
||||
.checked_add(apply_decimal_neg( |
||||
Decimal::from_str(number_str)?, |
||||
is_negative, |
||||
)?) |
||||
.ok_or(OVERFLOW_ERROR)?, |
||||
); |
||||
state = AFTER_SECOND; |
||||
} |
||||
Some(_) => return Err(XsdParseError::msg("Unexpected type character")), |
||||
None => { |
||||
return Err(XsdParseError::msg( |
||||
"Numbers in durations must be followed by a type character", |
||||
)) |
||||
} |
||||
} |
||||
input = &left[1..]; |
||||
} |
||||
} |
||||
|
||||
Ok(( |
||||
DurationParts { |
||||
year_month, |
||||
day_time, |
||||
}, |
||||
input, |
||||
)) |
||||
} |
||||
|
||||
fn apply_i64_neg(value: i64, is_negative: bool) -> Result<i64, XsdParseError> { |
||||
if is_negative { |
||||
value.checked_neg().ok_or(OVERFLOW_ERROR) |
||||
} else { |
||||
Ok(value) |
||||
} |
||||
} |
||||
|
||||
fn apply_decimal_neg(value: Decimal, is_negative: bool) -> Result<Decimal, XsdParseError> { |
||||
if is_negative { |
||||
value.checked_neg().ok_or(OVERFLOW_ERROR) |
||||
} else { |
||||
Ok(value) |
||||
} |
||||
} |
||||
|
||||
pub fn parse_duration(input: &str) -> Result<Duration, XsdParseError> { |
||||
let parts = ensure_complete(input, duration_parts)?; |
||||
if parts.year_month.is_none() && parts.day_time.is_none() { |
||||
return Err(XsdParseError::msg("Empty duration")); |
||||
} |
||||
Ok(Duration::new( |
||||
parts.year_month.unwrap_or(0), |
||||
parts.day_time.unwrap_or_default(), |
||||
)) |
||||
} |
||||
|
||||
pub fn parse_year_month_duration(input: &str) -> Result<YearMonthDuration, XsdParseError> { |
||||
let parts = ensure_complete(input, duration_parts)?; |
||||
if parts.day_time.is_some() { |
||||
return Err(XsdParseError::msg( |
||||
"There must not be any day or time component in a yearMonthDuration", |
||||
)); |
||||
} |
||||
Ok(YearMonthDuration::new(parts.year_month.ok_or( |
||||
XsdParseError::msg("No year and month values found"), |
||||
)?)) |
||||
} |
||||
|
||||
pub fn parse_day_time_duration(input: &str) -> Result<DayTimeDuration, XsdParseError> { |
||||
let parts = ensure_complete(input, duration_parts)?; |
||||
if parts.year_month.is_some() { |
||||
return Err(XsdParseError::msg( |
||||
"There must not be any year or month component in a dayTimeDuration", |
||||
)); |
||||
} |
||||
Ok(DayTimeDuration::new(parts.day_time.ok_or( |
||||
XsdParseError::msg("No day or time values found"), |
||||
)?)) |
||||
} |
||||
|
||||
// [16] dateTimeLexicalRep ::= yearFrag '-' monthFrag '-' dayFrag 'T' ((hourFrag ':' minuteFrag ':' secondFrag) | endOfDayFrag) timezoneFrag?
|
||||
fn date_time_lexical_rep(input: &str) -> Result<(DateTime, &str), XsdParseError> { |
||||
let (year, input) = year_frag(input)?; |
||||
let input = expect_char(input, '-', "The year and month must be separated by '-'")?; |
||||
let (month, input) = month_frag(input)?; |
||||
let input = expect_char(input, '-', "The month and day must be separated by '-'")?; |
||||
let (day, input) = day_frag(input)?; |
||||
let input = expect_char(input, 'T', "The date and time must be separated by 'T'")?; |
||||
let (hour, input) = hour_frag(input)?; |
||||
let input = expect_char(input, ':', "The hours and minutes must be separated by ':'")?; |
||||
let (minute, input) = minute_frag(input)?; |
||||
let input = expect_char( |
||||
input, |
||||
':', |
||||
"The minutes and seconds must be separated by ':'", |
||||
)?; |
||||
let (second, input) = second_frag(input)?; |
||||
// We validate 24:00:00
|
||||
if hour == 24 && minute != 0 && second != Decimal::from(0) { |
||||
return Err(XsdParseError::msg( |
||||
"Times are not allowed to be after 24:00:00", |
||||
)); |
||||
} |
||||
let (timezone_offset, input) = optional_end(input, timezone_frag)?; |
||||
Ok(( |
||||
DateTime::new(year, month, day, hour, minute, second, timezone_offset)?, |
||||
input, |
||||
)) |
||||
} |
||||
|
||||
pub fn parse_date_time(input: &str) -> Result<DateTime, XsdParseError> { |
||||
ensure_complete(input, date_time_lexical_rep) |
||||
} |
||||
|
||||
// [17] timeLexicalRep ::= ((hourFrag ':' minuteFrag ':' secondFrag) | endOfDayFrag) timezoneFrag?
|
||||
fn time_lexical_rep(input: &str) -> Result<(Time, &str), XsdParseError> { |
||||
let (hour, input) = hour_frag(input)?; |
||||
let input = expect_char(input, ':', "The hours and minutes must be separated by ':'")?; |
||||
let (minute, input) = minute_frag(input)?; |
||||
let input = expect_char( |
||||
input, |
||||
':', |
||||
"The minutes and seconds must be separated by ':'", |
||||
)?; |
||||
let (second, input) = second_frag(input)?; |
||||
// We validate 24:00:00
|
||||
if hour == 24 && minute != 0 && second != Decimal::from(0) { |
||||
return Err(XsdParseError::msg( |
||||
"Times are not allowed to be after 24:00:00", |
||||
)); |
||||
} |
||||
let (timezone_offset, input) = optional_end(input, timezone_frag)?; |
||||
Ok((Time::new(hour, minute, second, timezone_offset)?, input)) |
||||
} |
||||
|
||||
pub fn parse_time(input: &str) -> Result<Time, XsdParseError> { |
||||
ensure_complete(input, time_lexical_rep) |
||||
} |
||||
|
||||
// [18] dateLexicalRep ::= yearFrag '-' monthFrag '-' dayFrag timezoneFrag? Constraint: Day-of-month Representations
|
||||
fn date_lexical_rep(input: &str) -> Result<(Date, &str), XsdParseError> { |
||||
let (year, input) = year_frag(input)?; |
||||
let input = expect_char(input, '-', "The year and month must be separated by '-'")?; |
||||
let (month, input) = month_frag(input)?; |
||||
let input = expect_char(input, '-', "The month and day must be separated by '-'")?; |
||||
let (day, input) = day_frag(input)?; |
||||
let (timezone_offset, input) = optional_end(input, timezone_frag)?; |
||||
Ok((Date::new(year, month, day, timezone_offset)?, input)) |
||||
} |
||||
|
||||
pub fn parse_date(input: &str) -> Result<Date, XsdParseError> { |
||||
ensure_complete(input, date_lexical_rep) |
||||
} |
||||
|
||||
// [19] gYearMonthLexicalRep ::= yearFrag '-' monthFrag timezoneFrag?
|
||||
fn g_year_month_lexical_rep(input: &str) -> Result<(GYearMonth, &str), XsdParseError> { |
||||
let (year, input) = year_frag(input)?; |
||||
let input = expect_char(input, '-', "The year and month must be separated by '-'")?; |
||||
let (month, input) = month_frag(input)?; |
||||
let (timezone_offset, input) = optional_end(input, timezone_frag)?; |
||||
Ok((GYearMonth::new(year, month, timezone_offset)?, input)) |
||||
} |
||||
|
||||
pub fn parse_g_year_month(input: &str) -> Result<GYearMonth, XsdParseError> { |
||||
ensure_complete(input, g_year_month_lexical_rep) |
||||
} |
||||
|
||||
// [20] gYearLexicalRep ::= yearFrag timezoneFrag?
|
||||
fn g_year_lexical_rep(input: &str) -> Result<(GYear, &str), XsdParseError> { |
||||
let (year, input) = year_frag(input)?; |
||||
let (timezone_offset, input) = optional_end(input, timezone_frag)?; |
||||
Ok((GYear::new(year, timezone_offset)?, input)) |
||||
} |
||||
|
||||
pub fn parse_g_year(input: &str) -> Result<GYear, XsdParseError> { |
||||
ensure_complete(input, g_year_lexical_rep) |
||||
} |
||||
|
||||
// [21] gMonthDayLexicalRep ::= '--' monthFrag '-' dayFrag timezoneFrag? Constraint: Day-of-month Representations
|
||||
fn g_month_day_lexical_rep(input: &str) -> Result<(GMonthDay, &str), XsdParseError> { |
||||
let input = expect_char(input, '-', "gMonthDay values must start with '--'")?; |
||||
let input = expect_char(input, '-', "gMonthDay values must start with '--'")?; |
||||
let (month, input) = month_frag(input)?; |
||||
let input = expect_char(input, '-', "The month and day must be separated by '-'")?; |
||||
let (day, input) = day_frag(input)?; |
||||
let (timezone_offset, input) = optional_end(input, timezone_frag)?; |
||||
Ok((GMonthDay::new(month, day, timezone_offset)?, input)) |
||||
} |
||||
|
||||
pub fn parse_g_month_day(input: &str) -> Result<GMonthDay, XsdParseError> { |
||||
ensure_complete(input, g_month_day_lexical_rep) |
||||
} |
||||
|
||||
// [22] gDayLexicalRep ::= '---' dayFrag timezoneFrag?
|
||||
fn g_day_lexical_rep(input: &str) -> Result<(GDay, &str), XsdParseError> { |
||||
let input = expect_char(input, '-', "gDay values must start with '---'")?; |
||||
let input = expect_char(input, '-', "gDay values must start with '---'")?; |
||||
let input = expect_char(input, '-', "gDay values must start with '---'")?; |
||||
let (day, input) = day_frag(input)?; |
||||
let (timezone_offset, input) = optional_end(input, timezone_frag)?; |
||||
Ok((GDay::new(day, timezone_offset)?, input)) |
||||
} |
||||
|
||||
pub fn parse_g_day(input: &str) -> Result<GDay, XsdParseError> { |
||||
ensure_complete(input, g_day_lexical_rep) |
||||
} |
||||
|
||||
// [23] gMonthLexicalRep ::= '--' monthFrag timezoneFrag?
|
||||
fn g_month_lexical_rep(input: &str) -> Result<(GMonth, &str), XsdParseError> { |
||||
let input = expect_char(input, '-', "gMonth values must start with '--'")?; |
||||
let input = expect_char(input, '-', "gMonth values must start with '--'")?; |
||||
let (month, input) = month_frag(input)?; |
||||
let (timezone_offset, input) = optional_end(input, timezone_frag)?; |
||||
Ok((GMonth::new(month, timezone_offset)?, input)) |
||||
} |
||||
|
||||
pub fn parse_g_month(input: &str) -> Result<GMonth, XsdParseError> { |
||||
ensure_complete(input, g_month_lexical_rep) |
||||
} |
||||
|
||||
// [56] yearFrag ::= '-'? (([1-9] digit digit digit+)) | ('0' digit digit digit))
|
||||
fn year_frag(input: &str) -> Result<(i64, &str), XsdParseError> { |
||||
let (sign, input) = if let Some(left) = input.strip_prefix('-') { |
||||
(-1, left) |
||||
} else { |
||||
(1, input) |
||||
}; |
||||
let (number_str, input) = integer_prefix(input); |
||||
if number_str.len() < 4 { |
||||
return Err(XsdParseError::msg("The year should be encoded on 4 digits")); |
||||
} |
||||
if number_str.len() > 4 && number_str.starts_with('0') { |
||||
return Err(XsdParseError::msg( |
||||
"The years value must not start with 0 if it can be encoded in at least 4 digits", |
||||
)); |
||||
} |
||||
let number = i64::from_str(number_str)?; |
||||
Ok((sign * number, input)) |
||||
} |
||||
|
||||
// [57] monthFrag ::= ('0' [1-9]) | ('1' [0-2])
|
||||
fn month_frag(input: &str) -> Result<(u8, &str), XsdParseError> { |
||||
let (number_str, input) = integer_prefix(input); |
||||
if number_str.len() != 2 { |
||||
return Err(XsdParseError::msg("Month must be encoded with two digits")); |
||||
} |
||||
let number = u8::from_str(number_str)?; |
||||
if !(1..=12).contains(&number) { |
||||
return Err(XsdParseError::msg("Month must be between 01 and 12")); |
||||
} |
||||
Ok((number, input)) |
||||
} |
||||
|
||||
// [58] dayFrag ::= ('0' [1-9]) | ([12] digit) | ('3' [01])
|
||||
fn day_frag(input: &str) -> Result<(u8, &str), XsdParseError> { |
||||
let (number_str, input) = integer_prefix(input); |
||||
if number_str.len() != 2 { |
||||
return Err(XsdParseError::msg("Day must be encoded with two digits")); |
||||
} |
||||
let number = u8::from_str(number_str)?; |
||||
if !(1..=31).contains(&number) { |
||||
return Err(XsdParseError::msg("Day must be between 01 and 31")); |
||||
} |
||||
Ok((number, input)) |
||||
} |
||||
|
||||
// [59] hourFrag ::= ([01] digit) | ('2' [0-3])
|
||||
// We also allow 24 for ease of parsing
|
||||
fn hour_frag(input: &str) -> Result<(u8, &str), XsdParseError> { |
||||
let (number_str, input) = integer_prefix(input); |
||||
if number_str.len() != 2 { |
||||
return Err(XsdParseError::msg("Hours must be encoded with two digits")); |
||||
} |
||||
let number = u8::from_str(number_str)?; |
||||
if !(0..=24).contains(&number) { |
||||
return Err(XsdParseError::msg("Hours must be between 00 and 24")); |
||||
} |
||||
Ok((number, input)) |
||||
} |
||||
|
||||
// [60] minuteFrag ::= [0-5] digit
|
||||
fn minute_frag(input: &str) -> Result<(u8, &str), XsdParseError> { |
||||
let (number_str, input) = integer_prefix(input); |
||||
if number_str.len() != 2 { |
||||
return Err(XsdParseError::msg( |
||||
"Minutes must be encoded with two digits", |
||||
)); |
||||
} |
||||
let number = u8::from_str(number_str)?; |
||||
if !(0..=59).contains(&number) { |
||||
return Err(XsdParseError::msg("Minutes must be between 00 and 59")); |
||||
} |
||||
Ok((number, input)) |
||||
} |
||||
|
||||
// [61] secondFrag ::= ([0-5] digit) ('.' digit+)?
|
||||
fn second_frag(input: &str) -> Result<(Decimal, &str), XsdParseError> { |
||||
let (number_str, input) = decimal_prefix(input); |
||||
let (before_dot_str, _) = number_str.split_once('.').unwrap_or((number_str, "")); |
||||
if before_dot_str.len() != 2 { |
||||
return Err(XsdParseError::msg( |
||||
"Seconds must be encoded with two digits", |
||||
)); |
||||
} |
||||
let number = Decimal::from_str(number_str)?; |
||||
if number < Decimal::from(0) || number >= Decimal::from(60) { |
||||
return Err(XsdParseError::msg("Seconds must be between 00 and 60")); |
||||
} |
||||
if number_str.ends_with('.') { |
||||
return Err(XsdParseError::msg( |
||||
"Seconds are not allowed to end with a dot", |
||||
)); |
||||
} |
||||
Ok((number, input)) |
||||
} |
||||
|
||||
// [63] timezoneFrag ::= 'Z' | ('+' | '-') (('0' digit | '1' [0-3]) ':' minuteFrag | '14:00')
|
||||
fn timezone_frag(input: &str) -> Result<(TimezoneOffset, &str), XsdParseError> { |
||||
if let Some(left) = input.strip_prefix('Z') { |
||||
return Ok((TimezoneOffset::UTC, left)); |
||||
} |
||||
let (sign, input) = if let Some(left) = input.strip_prefix('-') { |
||||
(-1, left) |
||||
} else if let Some(left) = input.strip_prefix('+') { |
||||
(1, left) |
||||
} else { |
||||
(1, input) |
||||
}; |
||||
|
||||
let (hour_str, input) = integer_prefix(input); |
||||
if hour_str.len() != 2 { |
||||
return Err(XsdParseError::msg( |
||||
"The timezone hours must be encoded with two digits", |
||||
)); |
||||
} |
||||
let hours = i16::from_str(hour_str)?; |
||||
|
||||
let input = expect_char( |
||||
input, |
||||
':', |
||||
"The timezone hours and minutes must be separated by ':'", |
||||
)?; |
||||
let (minutes, input) = minute_frag(input)?; |
||||
|
||||
if hours > 13 && !(hours == 14 && minutes == 0) { |
||||
return Err(XsdParseError::msg( |
||||
"The timezone hours must be between 00 and 13", |
||||
)); |
||||
} |
||||
|
||||
Ok(( |
||||
TimezoneOffset::new(sign * (hours * 60 + i16::from(minutes)))?, |
||||
input, |
||||
)) |
||||
} |
||||
|
||||
fn ensure_complete<T>( |
||||
input: &str, |
||||
parse: impl FnOnce(&str) -> Result<(T, &str), XsdParseError>, |
||||
) -> Result<T, XsdParseError> { |
||||
let (result, left) = parse(input)?; |
||||
if !left.is_empty() { |
||||
return Err(XsdParseError::msg("Unrecognized value suffix")); |
||||
} |
||||
Ok(result) |
||||
} |
||||
|
||||
fn expect_char<'a>( |
||||
input: &'a str, |
||||
constant: char, |
||||
error_message: &'static str, |
||||
) -> Result<&'a str, XsdParseError> { |
||||
if let Some(left) = input.strip_prefix(constant) { |
||||
Ok(left) |
||||
} else { |
||||
Err(XsdParseError::msg(error_message)) |
||||
} |
||||
} |
||||
|
||||
fn integer_prefix(input: &str) -> (&str, &str) { |
||||
let mut end = input.len(); |
||||
for (i, c) in input.char_indices() { |
||||
if !c.is_ascii_digit() { |
||||
end = i; |
||||
break; |
||||
} |
||||
} |
||||
input.split_at(end) |
||||
} |
||||
|
||||
fn decimal_prefix(input: &str) -> (&str, &str) { |
||||
let mut end = input.len(); |
||||
let mut dot_seen = false; |
||||
for (i, c) in input.char_indices() { |
||||
if c.is_ascii_digit() { |
||||
// Ok
|
||||
} else if c == '.' && !dot_seen { |
||||
dot_seen = true; |
||||
} else { |
||||
end = i; |
||||
break; |
||||
} |
||||
} |
||||
input.split_at(end) |
||||
} |
||||
|
||||
fn optional_end<T>( |
||||
input: &str, |
||||
parse: impl FnOnce(&str) -> Result<(T, &str), XsdParseError>, |
||||
) -> Result<(Option<T>, &str), XsdParseError> { |
||||
Ok(if input.is_empty() { |
||||
(None, input) |
||||
} else { |
||||
let (result, input) = parse(input)?; |
||||
(Some(result), input) |
||||
}) |
||||
} |
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue