Introduces OxRDF I/O stand-alone crate

pull/589/head
Tpt 1 year ago committed by Thomas Tanon
parent 73af297b4c
commit 7cd383af79
  1. 6
      .github/workflows/tests.yml
  2. 13
      Cargo.lock
  3. 1
      Cargo.toml
  4. 3
      lib/Cargo.toml
  5. 33
      lib/oxrdfio/Cargo.toml
  6. 61
      lib/oxrdfio/README.md
  7. 148
      lib/oxrdfio/src/error.rs
  8. 203
      lib/oxrdfio/src/format.rs
  9. 19
      lib/oxrdfio/src/lib.rs
  10. 577
      lib/oxrdfio/src/parser.rs
  11. 322
      lib/oxrdfio/src/serializer.rs
  12. 45
      lib/src/io/error.rs
  13. 21
      lib/src/io/format.rs
  14. 4
      lib/src/io/mod.rs
  15. 191
      lib/src/io/read.rs
  16. 84
      lib/src/io/write.rs

@ -36,6 +36,8 @@ jobs:
working-directory: ./lib/oxrdfxml working-directory: ./lib/oxrdfxml
- run: cargo clippy - run: cargo clippy
working-directory: ./lib/oxttl working-directory: ./lib/oxttl
- run: cargo clippy
working-directory: ./lib/oxrdfio
- run: cargo clippy - run: cargo clippy
working-directory: ./lib/sparesults working-directory: ./lib/sparesults
- run: cargo clippy - run: cargo clippy
@ -102,6 +104,8 @@ jobs:
working-directory: ./lib/oxrdfxml working-directory: ./lib/oxrdfxml
- run: cargo clippy -- -D warnings -D clippy::all - run: cargo clippy -- -D warnings -D clippy::all
working-directory: ./lib/oxttl working-directory: ./lib/oxttl
- run: cargo clippy -- -D warnings -D clippy::all
working-directory: ./lib/oxrdfio
- run: cargo clippy -- -D warnings -D clippy::all - run: cargo clippy -- -D warnings -D clippy::all
working-directory: ./lib/sparesults working-directory: ./lib/sparesults
- run: cargo clippy -- -D warnings -D clippy::all - run: cargo clippy -- -D warnings -D clippy::all
@ -159,7 +163,7 @@ jobs:
- run: rustup update - run: rustup update
- uses: Swatinem/rust-cache@v2 - uses: Swatinem/rust-cache@v2
- run: cargo install cargo-semver-checks || true - run: cargo install cargo-semver-checks || true
- run: cargo semver-checks check-release --exclude oxrocksdb-sys --exclude oxigraph_js --exclude pyoxigraph --exclude oxigraph_testsuite --exclude oxigraph_server --exclude oxrdfxml --exclude oxttl --exclude sparopt - run: cargo semver-checks check-release --exclude oxrocksdb-sys --exclude oxigraph_js --exclude pyoxigraph --exclude oxigraph_testsuite --exclude oxigraph_server --exclude oxrdfxml --exclude oxttl --exclude oxrdfio --exclude sparopt
test_linux: test_linux:
runs-on: ubuntu-latest runs-on: ubuntu-latest

13
Cargo.lock generated

@ -952,10 +952,9 @@ dependencies = [
"oxilangtag", "oxilangtag",
"oxiri", "oxiri",
"oxrdf", "oxrdf",
"oxrdfxml", "oxrdfio",
"oxrocksdb-sys", "oxrocksdb-sys",
"oxsdatatypes", "oxsdatatypes",
"oxttl",
"rand", "rand",
"regex", "regex",
"sha-1", "sha-1",
@ -1036,6 +1035,16 @@ dependencies = [
"rand", "rand",
] ]
[[package]]
name = "oxrdfio"
version = "0.1.0-alpha.1-dev"
dependencies = [
"oxrdf",
"oxrdfxml",
"oxttl",
"tokio",
]
[[package]] [[package]]
name = "oxrdfxml" name = "oxrdfxml"
version = "0.1.0-alpha.1-dev" version = "0.1.0-alpha.1-dev"

@ -3,6 +3,7 @@ members = [
"js", "js",
"lib", "lib",
"lib/oxrdf", "lib/oxrdf",
"lib/oxrdfio",
"lib/oxrdfxml", "lib/oxrdfxml",
"lib/oxsdatatypes", "lib/oxsdatatypes",
"lib/oxttl", "lib/oxttl",

@ -35,9 +35,8 @@ siphasher = "0.3"
lazy_static = "1" lazy_static = "1"
json-event-parser = "0.1" json-event-parser = "0.1"
oxrdf = { version = "0.2.0-alpha.1-dev", path = "oxrdf", features = ["rdf-star", "oxsdatatypes"] } oxrdf = { version = "0.2.0-alpha.1-dev", path = "oxrdf", features = ["rdf-star", "oxsdatatypes"] }
oxrdfxml = { version = "0.1.0-alpha.1-dev", path = "oxrdfxml" }
oxsdatatypes = { version = "0.2.0-alpha.1-dev", path="oxsdatatypes" } oxsdatatypes = { version = "0.2.0-alpha.1-dev", path="oxsdatatypes" }
oxttl = { version = "0.1.0-alpha.1-dev" , path = "oxttl", features = ["rdf-star"] } oxrdfio = { version = "0.1.0-alpha.1-dev" , path = "oxrdfio", features = ["rdf-star"] }
spargebra = { version = "0.3.0-alpha.1-dev", path = "spargebra", features = ["rdf-star", "sep-0002", "sep-0006"] } spargebra = { version = "0.3.0-alpha.1-dev", path = "spargebra", features = ["rdf-star", "sep-0002", "sep-0006"] }
sparopt = { version = "0.1.0-alpha.1-dev", path="sparopt", features = ["rdf-star", "sep-0002", "sep-0006"] } sparopt = { version = "0.1.0-alpha.1-dev", path="sparopt", features = ["rdf-star", "sep-0002", "sep-0006"] }
sparesults = { version = "0.2.0-alpha.1-dev", path = "sparesults", features = ["rdf-star"] } sparesults = { version = "0.2.0-alpha.1-dev", path = "sparesults", features = ["rdf-star"] }

@ -0,0 +1,33 @@
[package]
name = "oxrdfio"
version = "0.1.0-alpha.1-dev"
authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license = "MIT OR Apache-2.0"
readme = "README.md"
keywords = ["RDF"]
repository = "https://github.com/oxigraph/oxigraph/tree/master/lib/oxrdfxml"
homepage = "https://oxigraph.org/"
documentation = "https://docs.rs/oxrdfio"
description = """
Parser for various RDF serializations
"""
edition = "2021"
rust-version = "1.65"
[features]
default = []
async-tokio = ["dep:tokio", "oxrdfxml/async-tokio", "oxttl/async-tokio"]
rdf-star = ["oxrdf/rdf-star", "oxttl/rdf-star"]
[dependencies]
oxrdf = { version = "0.2.0-alpha.1-dev", path = "../oxrdf" }
oxrdfxml = { version = "0.1.0-alpha.1-dev", path = "../oxrdfxml" }
oxttl = { version = "0.1.0-alpha.1-dev" , path = "../oxttl" }
tokio = { version = "1", optional = true, features = ["io-util"] }
[dev-dependencies]
tokio = { version = "1", features = ["rt", "macros"] }
[package.metadata.docs.rs]
all-features = true
rustdoc-args = ["--cfg", "docsrs"]

@ -0,0 +1,61 @@
OxRDF I/O
=========
[![Latest Version](https://img.shields.io/crates/v/oxrdfio.svg)](https://crates.io/crates/oxrdfio)
[![Released API docs](https://docs.rs/oxrdfio/badge.svg)](https://docs.rs/oxrdfio)
[![Crates.io downloads](https://img.shields.io/crates/d/oxrdfio)](https://crates.io/crates/oxrdfio)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
OxRDF I/O is a set of parsers and serializers for RDF.
It supports:
* [N3](https://w3c.github.io/N3/spec/) using [`oxttl`](https://crates.io/crates/oxttl)
* [N-Quads](https://www.w3.org/TR/n-quads/) using [`oxttl`](https://crates.io/crates/oxttl)
* [N-Triples](https://www.w3.org/TR/n-triples/) using [`oxttl`](https://crates.io/crates/oxttl)
* [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) using [`oxrdfxml`](https://crates.io/crates/oxrdfxml)
* [TriG](https://www.w3.org/TR/trig/) using [`oxttl`](https://crates.io/crates/oxttl)
* [Turtle](https://www.w3.org/TR/turtle/) using [`oxttl`](https://crates.io/crates/oxttl)
Support for [SPARQL-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html) is also available behind the `rdf-star`feature for [Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star), [TriG-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#trig-star), [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star) and [N-Quads-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-quads-star).
It is designed as a low level parser compatible with both synchronous and asynchronous I/O (behind the `async-tokio` feature).
Usage example counting the number of people in a Turtle file:
```rust
use oxrdf::{NamedNodeRef, vocab::rdf};
use oxrdfio::{RdfFormat, RdfParser};
let file = b"@base <http://example.com/> .
@prefix schema: <http://schema.org/> .
<foo> a schema:Person ;
schema:name \"Foo\" .
<bar> a schema:Person ;
schema:name \"Bar\" .";
let schema_person = NamedNodeRef::new("http://schema.org/Person").unwrap();
let mut count = 0;
for quad in RdfParser::from_format(RdfFormat::Turtle).parse_read(file.as_ref()) {
let quad = quad.unwrap();
if quad.predicate == rdf::TYPE && quad.object == schema_person.into() {
count += 1;
}
}
assert_eq!(2, count);
```
## License
This project is licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or
`<http://www.apache.org/licenses/LICENSE-2.0>`)
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or
`<http://opensource.org/licenses/MIT>`)
at your option.
### Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.

@ -0,0 +1,148 @@
use std::error::Error;
use std::{fmt, io};
/// Error returned during RDF format parsing.
#[derive(Debug)]
pub enum ParseError {
/// I/O error during parsing (file not found...).
Io(io::Error),
/// An error in the file syntax.
Syntax(SyntaxError),
}
impl ParseError {
pub(crate) fn msg(msg: &'static str) -> Self {
Self::Syntax(SyntaxError {
inner: SyntaxErrorKind::Msg { msg },
})
}
}
impl fmt::Display for ParseError {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Io(e) => e.fmt(f),
Self::Syntax(e) => e.fmt(f),
}
}
}
impl Error for ParseError {
#[inline]
fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
Self::Io(e) => Some(e),
Self::Syntax(e) => Some(e),
}
}
}
impl From<oxttl::SyntaxError> for SyntaxError {
#[inline]
fn from(error: oxttl::SyntaxError) -> Self {
SyntaxError {
inner: SyntaxErrorKind::Turtle(error),
}
}
}
impl From<oxttl::ParseError> for ParseError {
#[inline]
fn from(error: oxttl::ParseError) -> Self {
match error {
oxttl::ParseError::Syntax(e) => Self::Syntax(e.into()),
oxttl::ParseError::Io(e) => Self::Io(e),
}
}
}
impl From<oxrdfxml::SyntaxError> for SyntaxError {
#[inline]
fn from(error: oxrdfxml::SyntaxError) -> Self {
SyntaxError {
inner: SyntaxErrorKind::RdfXml(error),
}
}
}
impl From<oxrdfxml::ParseError> for ParseError {
#[inline]
fn from(error: oxrdfxml::ParseError) -> Self {
match error {
oxrdfxml::ParseError::Syntax(e) => Self::Syntax(e.into()),
oxrdfxml::ParseError::Io(e) => Self::Io(e),
}
}
}
impl From<io::Error> for ParseError {
#[inline]
fn from(error: io::Error) -> Self {
Self::Io(error)
}
}
impl From<SyntaxError> for ParseError {
#[inline]
fn from(error: SyntaxError) -> Self {
Self::Syntax(error)
}
}
impl From<ParseError> for io::Error {
#[inline]
fn from(error: ParseError) -> Self {
match error {
ParseError::Io(error) => error,
ParseError::Syntax(error) => error.into(),
}
}
}
/// An error in the syntax of the parsed file.
#[derive(Debug)]
pub struct SyntaxError {
inner: SyntaxErrorKind,
}
#[derive(Debug)]
enum SyntaxErrorKind {
Turtle(oxttl::SyntaxError),
RdfXml(oxrdfxml::SyntaxError),
Msg { msg: &'static str },
}
impl fmt::Display for SyntaxError {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match &self.inner {
SyntaxErrorKind::Turtle(e) => e.fmt(f),
SyntaxErrorKind::RdfXml(e) => e.fmt(f),
SyntaxErrorKind::Msg { msg } => write!(f, "{msg}"),
}
}
}
impl Error for SyntaxError {
#[inline]
fn source(&self) -> Option<&(dyn Error + 'static)> {
match &self.inner {
SyntaxErrorKind::Turtle(e) => Some(e),
SyntaxErrorKind::RdfXml(e) => Some(e),
SyntaxErrorKind::Msg { .. } => None,
}
}
}
impl From<SyntaxError> for io::Error {
#[inline]
fn from(error: SyntaxError) -> Self {
match error.inner {
SyntaxErrorKind::Turtle(error) => error.into(),
SyntaxErrorKind::RdfXml(error) => error.into(),
SyntaxErrorKind::Msg { msg } => io::Error::new(io::ErrorKind::InvalidData, msg),
}
}
}

@ -0,0 +1,203 @@
use std::fmt;
/// RDF serialization formats.
///
/// This enumeration is non exhaustive. New formats like JSON-LD might be added in the future.
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
#[non_exhaustive]
pub enum RdfFormat {
/// [N3](https://w3c.github.io/N3/spec/)
N3,
/// [N-Quads](https://www.w3.org/TR/n-quads/)
NQuads,
/// [N-Triples](https://www.w3.org/TR/n-triples/)
NTriples,
/// [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/)
RdfXml,
/// [TriG](https://www.w3.org/TR/trig/)
TriG,
/// [Turtle](https://www.w3.org/TR/turtle/)
Turtle,
}
impl RdfFormat {
/// The format canonical IRI according to the [Unique URIs for file formats registry](https://www.w3.org/ns/formats/).
///
/// ```
/// use oxrdfio::RdfFormat;
///
/// assert_eq!(RdfFormat::NTriples.iri(), "http://www.w3.org/ns/formats/N-Triples")
/// ```
#[inline]
pub const fn iri(self) -> &'static str {
match self {
Self::N3 => "http://www.w3.org/ns/formats/N3",
Self::NQuads => "http://www.w3.org/ns/formats/N-Quads",
Self::NTriples => "http://www.w3.org/ns/formats/N-Triples",
Self::RdfXml => "http://www.w3.org/ns/formats/RDF_XML",
Self::TriG => "http://www.w3.org/ns/formats/TriG",
Self::Turtle => "http://www.w3.org/ns/formats/Turtle",
}
}
/// The format [IANA media type](https://tools.ietf.org/html/rfc2046).
///
/// ```
/// use oxrdfio::RdfFormat;
///
/// assert_eq!(RdfFormat::NTriples.media_type(), "application/n-triples")
/// ```
#[inline]
pub const fn media_type(self) -> &'static str {
match self {
Self::N3 => "text/n3",
Self::NQuads => "application/n-quads",
Self::NTriples => "application/n-triples",
Self::RdfXml => "application/rdf+xml",
Self::TriG => "application/trig",
Self::Turtle => "text/turtle",
}
}
/// The format [IANA-registered](https://tools.ietf.org/html/rfc2046) file extension.
///
/// ```
/// use oxrdfio::RdfFormat;
///
/// assert_eq!(RdfFormat::NTriples.file_extension(), "nt")
/// ```
#[inline]
pub const fn file_extension(self) -> &'static str {
match self {
Self::N3 => "n3",
Self::NQuads => "nq",
Self::NTriples => "nt",
Self::RdfXml => "rdf",
Self::TriG => "trig",
Self::Turtle => "ttl",
}
}
/// The format name.
///
/// ```
/// use oxrdfio::RdfFormat;
///
/// assert_eq!(RdfFormat::NTriples.name(), "N-Triples")
/// ```
#[inline]
pub const fn name(self) -> &'static str {
match self {
Self::N3 => "N3",
Self::NQuads => "N-Quads",
Self::NTriples => "N-Triples",
Self::RdfXml => "RDF/XML",
Self::TriG => "TriG",
Self::Turtle => "Turtle",
}
}
/// Checks if the formats supports [RDF datasets](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) and not only [RDF graphs](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph).
///
/// ```
/// use oxrdfio::RdfFormat;
///
/// assert_eq!(RdfFormat::NTriples.supports_datasets(), false);
/// assert_eq!(RdfFormat::NQuads.supports_datasets(), true);
/// ```
#[inline]
pub const fn supports_datasets(self) -> bool {
matches!(self, Self::NQuads | Self::TriG)
}
/// Checks if the formats supports [RDF-star quoted triples](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#dfn-quoted).
///
/// ```
/// use oxrdfio::RdfFormat;
///
/// assert_eq!(RdfFormat::NTriples.supports_rdf_star(), true);
/// assert_eq!(RdfFormat::RdfXml.supports_rdf_star(), false);
/// ```
#[inline]
#[cfg(feature = "rdf-star")]
pub const fn supports_rdf_star(self) -> bool {
matches!(
self,
Self::NTriples | Self::NQuads | Self::Turtle | Self::TriG
)
}
/// Looks for a known format from a media type.
///
/// It supports some media type aliases.
/// For example, "application/xml" is going to return `RdfFormat::RdfXml` even if it is not its canonical media type.
///
/// Example:
/// ```
/// use oxrdfio::RdfFormat;
///
/// assert_eq!(RdfFormat::from_media_type("text/turtle; charset=utf-8"), Some(RdfFormat::Turtle))
/// ```
#[inline]
pub fn from_media_type(media_type: &str) -> Option<Self> {
const MEDIA_TYPES: [(&str, RdfFormat); 14] = [
("application/n-quads", RdfFormat::NQuads),
("application/n-triples", RdfFormat::NTriples),
("application/rdf+xml", RdfFormat::RdfXml),
("application/trig", RdfFormat::TriG),
("application/turtle", RdfFormat::Turtle),
("application/xml", RdfFormat::RdfXml),
("application/x-trig", RdfFormat::TriG),
("application/x-turtle", RdfFormat::Turtle),
("text/n3", RdfFormat::N3),
("text/nquads", RdfFormat::NQuads),
("text/plain", RdfFormat::NTriples),
("text/turtle", RdfFormat::Turtle),
("text/xml", RdfFormat::RdfXml),
("text/x-nquads", RdfFormat::NQuads),
];
let media_type = media_type.split(';').next()?.trim();
for (candidate_media_type, candidate_id) in MEDIA_TYPES {
if candidate_media_type.eq_ignore_ascii_case(media_type) {
return Some(candidate_id);
}
}
None
}
/// Looks for a known format from an extension.
///
/// It supports some aliases.
///
/// Example:
/// ```
/// use oxrdfio::RdfFormat;
///
/// assert_eq!(RdfFormat::from_extension("nt"), Some(RdfFormat::NTriples))
/// ```
#[inline]
pub fn from_extension(extension: &str) -> Option<Self> {
const MEDIA_TYPES: [(&str, RdfFormat); 8] = [
("n3", RdfFormat::N3),
("nq", RdfFormat::NQuads),
("nt", RdfFormat::NTriples),
("rdf", RdfFormat::RdfXml),
("trig", RdfFormat::TriG),
("ttl", RdfFormat::Turtle),
("txt", RdfFormat::NTriples),
("xml", RdfFormat::RdfXml),
];
for (candidate_extension, candidate_id) in MEDIA_TYPES {
if candidate_extension.eq_ignore_ascii_case(extension) {
return Some(candidate_id);
}
}
None
}
}
impl fmt::Display for RdfFormat {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.name())
}
}

@ -0,0 +1,19 @@
#![doc = include_str!("../README.md")]
#![doc(test(attr(deny(warnings))))]
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
#![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")]
#![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")]
mod error;
mod format;
mod parser;
mod serializer;
pub use error::{ParseError, SyntaxError};
pub use format::RdfFormat;
#[cfg(feature = "async-tokio")]
pub use parser::FromTokioAsyncReadQuadReader;
pub use parser::{FromReadQuadReader, RdfParser};
#[cfg(feature = "async-tokio")]
pub use serializer::ToTokioAsyncWriteQuadWriter;
pub use serializer::{RdfSerializer, ToWriteQuadWriter};

@ -0,0 +1,577 @@
//! Utilities to read RDF graphs and datasets.
pub use crate::error::{ParseError, SyntaxError};
use crate::format::RdfFormat;
use oxrdf::{BlankNode, GraphName, IriParseError, Quad, Subject, Term, Triple};
#[cfg(feature = "async-tokio")]
use oxrdfxml::FromTokioAsyncReadRdfXmlReader;
use oxrdfxml::{FromReadRdfXmlReader, RdfXmlParser};
#[cfg(feature = "async-tokio")]
use oxttl::n3::FromTokioAsyncReadN3Reader;
use oxttl::n3::{FromReadN3Reader, N3Parser, N3Quad, N3Term};
#[cfg(feature = "async-tokio")]
use oxttl::nquads::FromTokioAsyncReadNQuadsReader;
use oxttl::nquads::{FromReadNQuadsReader, NQuadsParser};
#[cfg(feature = "async-tokio")]
use oxttl::ntriples::FromTokioAsyncReadNTriplesReader;
use oxttl::ntriples::{FromReadNTriplesReader, NTriplesParser};
#[cfg(feature = "async-tokio")]
use oxttl::trig::FromTokioAsyncReadTriGReader;
use oxttl::trig::{FromReadTriGReader, TriGParser};
#[cfg(feature = "async-tokio")]
use oxttl::turtle::FromTokioAsyncReadTurtleReader;
use oxttl::turtle::{FromReadTurtleReader, TurtleParser};
use std::collections::HashMap;
use std::io::Read;
#[cfg(feature = "async-tokio")]
use tokio::io::AsyncRead;
/// Parsers for RDF serialization formats.
///
/// It currently supports the following formats:
/// * [N3](https://w3c.github.io/N3/spec/) ([`RdfFormat::N3`])
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`RdfFormat::NQuads`])
/// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`RdfFormat::NTriples`])
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`RdfFormat::RdfXml`])
/// * [TriG](https://www.w3.org/TR/trig/) ([`RdfFormat::TriG`])
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`RdfFormat::Turtle`])
///
/// Note the useful options:
/// - [`with_base_iri`](RdfParser::with_base_iri) to resolve the relative IRIs.
/// - [`rename_blank_nodes`](RdfParser::rename_blank_nodes) to rename the blank nodes to auto-generated numbers to avoid conflicts when merging RDF graphs together.
/// - [`without_named_graphs`](RdfParser::without_named_graphs) to parse a single graph.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
/// let quads = parser.parse_read(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
///
/// assert_eq!(quads.len(), 1);
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
pub struct RdfParser {
inner: RdfParserKind,
default_graph: GraphName,
without_named_graphs: bool,
rename_blank_nodes: bool,
}
enum RdfParserKind {
N3(N3Parser),
NQuads(NQuadsParser),
NTriples(NTriplesParser),
RdfXml(RdfXmlParser),
TriG(TriGParser),
Turtle(TurtleParser),
}
impl RdfParser {
/// Builds a parser for the given format.
#[inline]
#[must_use]
pub fn from_format(format: RdfFormat) -> Self {
Self {
inner: match format {
RdfFormat::N3 => RdfParserKind::N3(N3Parser::new()),
RdfFormat::NQuads => RdfParserKind::NQuads({
#[cfg(feature = "rdf-star")]
{
NQuadsParser::new().with_quoted_triples()
}
#[cfg(not(feature = "rdf-star"))]
{
NQuadsParser::new()
}
}),
RdfFormat::NTriples => RdfParserKind::NTriples({
#[cfg(feature = "rdf-star")]
{
NTriplesParser::new().with_quoted_triples()
}
#[cfg(not(feature = "rdf-star"))]
{
NTriplesParser::new()
}
}),
RdfFormat::RdfXml => RdfParserKind::RdfXml(RdfXmlParser::new()),
RdfFormat::TriG => RdfParserKind::TriG({
#[cfg(feature = "rdf-star")]
{
TriGParser::new().with_quoted_triples()
}
#[cfg(not(feature = "rdf-star"))]
{
TriGParser::new()
}
}),
RdfFormat::Turtle => RdfParserKind::Turtle({
#[cfg(feature = "rdf-star")]
{
TurtleParser::new().with_quoted_triples()
}
#[cfg(not(feature = "rdf-star"))]
{
TurtleParser::new()
}
}),
},
default_graph: GraphName::DefaultGraph,
without_named_graphs: false,
rename_blank_nodes: false,
}
}
/// Provides an IRI that could be used to resolve the file relative IRIs.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = "</s> </p> </o> .";
///
/// let parser = RdfParser::from_format(RdfFormat::Turtle).with_base_iri("http://example.com")?;
/// let quads = parser.parse_read(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
///
/// assert_eq!(quads.len(), 1);
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn with_base_iri(self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
Ok(Self {
inner: match self.inner {
RdfParserKind::N3(p) => RdfParserKind::N3(p),
RdfParserKind::NTriples(p) => RdfParserKind::NTriples(p),
RdfParserKind::NQuads(p) => RdfParserKind::NQuads(p),
RdfParserKind::RdfXml(p) => RdfParserKind::RdfXml(p.with_base_iri(base_iri)?),
RdfParserKind::TriG(p) => RdfParserKind::TriG(p.with_base_iri(base_iri)?),
RdfParserKind::Turtle(p) => RdfParserKind::Turtle(p.with_base_iri(base_iri)?),
},
default_graph: self.default_graph,
without_named_graphs: self.without_named_graphs,
rename_blank_nodes: self.rename_blank_nodes,
})
}
/// Provides the name graph name that should replace the default graph in the returned quads.
///
/// ```
/// use oxrdf::NamedNode;
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = RdfParser::from_format(RdfFormat::Turtle).with_default_graph(NamedNode::new("http://example.com/g")?);
/// let quads = parser.parse_read(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
///
/// assert_eq!(quads.len(), 1);
/// assert_eq!(quads[0].graph_name.to_string(), "<http://example.com/g>");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
#[must_use]
pub fn with_default_graph(self, default_graph: impl Into<GraphName>) -> Self {
Self {
inner: self.inner,
default_graph: default_graph.into(),
without_named_graphs: self.without_named_graphs,
rename_blank_nodes: self.rename_blank_nodes,
}
}
/// Sets that the parser must fail if parsing a named graph.
///
/// This function restricts the parser to only parse a single [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) and not an [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset).
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .";
///
/// let parser = RdfParser::from_format(RdfFormat::NQuads).without_named_graphs();
/// assert!(parser.parse_read(file.as_bytes()).next().unwrap().is_err());
/// ```
#[inline]
#[must_use]
pub fn without_named_graphs(self) -> Self {
Self {
inner: self.inner,
default_graph: self.default_graph,
without_named_graphs: true,
rename_blank_nodes: self.rename_blank_nodes,
}
}
/// Renames the blank nodes ids from the ones set in the serialization to random ids.
///
/// This allows to avoid id conflicts when merging graphs together.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = "_:a <http://example.com/p> <http://example.com/o> .";
///
/// let parser = RdfParser::from_format(RdfFormat::NQuads).rename_blank_nodes();
/// let result1 = parser.parse_read(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
/// let result2 = parser.parse_read(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
/// assert_ne!(result1, result2);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
#[must_use]
pub fn rename_blank_nodes(self) -> Self {
Self {
inner: self.inner,
default_graph: self.default_graph,
without_named_graphs: self.without_named_graphs,
rename_blank_nodes: true,
}
}
/// Parses from a [`Read`] implementation and returns an iterator of quads.
///
/// Reads are buffered.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
/// let quads = parser.parse_read(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
///
/// assert_eq!(quads.len(), 1);
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
pub fn parse_read<R: Read>(&self, reader: R) -> FromReadQuadReader<R> {
FromReadQuadReader {
parser: match &self.inner {
RdfParserKind::N3(p) => FromReadQuadReaderKind::N3(p.parse_read(reader)),
RdfParserKind::NQuads(p) => FromReadQuadReaderKind::NQuads(p.parse_read(reader)),
RdfParserKind::NTriples(p) => {
FromReadQuadReaderKind::NTriples(p.parse_read(reader))
}
RdfParserKind::RdfXml(p) => FromReadQuadReaderKind::RdfXml(p.parse_read(reader)),
RdfParserKind::TriG(p) => FromReadQuadReaderKind::TriG(p.parse_read(reader)),
RdfParserKind::Turtle(p) => FromReadQuadReaderKind::Turtle(p.parse_read(reader)),
},
mapper: QuadMapper {
default_graph: self.default_graph.clone(),
without_named_graphs: self.without_named_graphs,
blank_node_map: self.rename_blank_nodes.then(HashMap::new),
},
}
}
/// Parses from a Tokio [`AsyncRead`] implementation and returns an async iterator of quads.
///
/// Reads are buffered.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser, ParseError};
///
/// #[tokio::main(flavor = "current_thread")]
/// async fn main() -> Result<(), ParseError> {
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
/// let mut reader = parser.parse_tokio_async_read(file.as_bytes());
/// if let Some(quad) = reader.next().await {
/// assert_eq!(quad?.subject.to_string(), "<http://example.com/s>");
/// }
/// Ok(())
/// }
/// ```
#[cfg(feature = "async-tokio")]
pub fn parse_tokio_async_read<R: AsyncRead + Unpin>(
&self,
reader: R,
) -> FromTokioAsyncReadQuadReader<R> {
FromTokioAsyncReadQuadReader {
parser: match &self.inner {
RdfParserKind::N3(p) => {
FromTokioAsyncReadQuadReaderKind::N3(p.parse_tokio_async_read(reader))
}
RdfParserKind::NQuads(p) => {
FromTokioAsyncReadQuadReaderKind::NQuads(p.parse_tokio_async_read(reader))
}
RdfParserKind::NTriples(p) => {
FromTokioAsyncReadQuadReaderKind::NTriples(p.parse_tokio_async_read(reader))
}
RdfParserKind::RdfXml(p) => {
FromTokioAsyncReadQuadReaderKind::RdfXml(p.parse_tokio_async_read(reader))
}
RdfParserKind::TriG(p) => {
FromTokioAsyncReadQuadReaderKind::TriG(p.parse_tokio_async_read(reader))
}
RdfParserKind::Turtle(p) => {
FromTokioAsyncReadQuadReaderKind::Turtle(p.parse_tokio_async_read(reader))
}
},
mapper: QuadMapper {
default_graph: self.default_graph.clone(),
without_named_graphs: self.without_named_graphs,
blank_node_map: self.rename_blank_nodes.then(HashMap::new),
},
}
}
}
/// Parses a RDF file from a [`Read`] implementation. Can be built using [`RdfParser::parse_read`].
///
/// Reads are buffered.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
/// let quads = parser.parse_read(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
///
/// assert_eq!(quads.len(), 1);
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[must_use]
pub struct FromReadQuadReader<R: Read> {
parser: FromReadQuadReaderKind<R>,
mapper: QuadMapper,
}
enum FromReadQuadReaderKind<R: Read> {
N3(FromReadN3Reader<R>),
NQuads(FromReadNQuadsReader<R>),
NTriples(FromReadNTriplesReader<R>),
RdfXml(FromReadRdfXmlReader<R>),
TriG(FromReadTriGReader<R>),
Turtle(FromReadTurtleReader<R>),
}
impl<R: Read> Iterator for FromReadQuadReader<R> {
type Item = Result<Quad, ParseError>;
fn next(&mut self) -> Option<Result<Quad, ParseError>> {
Some(match &mut self.parser {
FromReadQuadReaderKind::N3(parser) => match parser.next()? {
Ok(quad) => self.mapper.map_n3_quad(quad),
Err(e) => Err(e.into()),
},
FromReadQuadReaderKind::NQuads(parser) => match parser.next()? {
Ok(quad) => self.mapper.map_quad(quad),
Err(e) => Err(e.into()),
},
FromReadQuadReaderKind::NTriples(parser) => match parser.next()? {
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
Err(e) => Err(e.into()),
},
FromReadQuadReaderKind::RdfXml(parser) => match parser.next()? {
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
Err(e) => Err(e.into()),
},
FromReadQuadReaderKind::TriG(parser) => match parser.next()? {
Ok(quad) => self.mapper.map_quad(quad),
Err(e) => Err(e.into()),
},
FromReadQuadReaderKind::Turtle(parser) => match parser.next()? {
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
Err(e) => Err(e.into()),
},
})
}
}
/// Parses a RDF file from a Tokio [`AsyncRead`] implementation. Can be built using [`RdfParser::parse_tokio_async_read`].
///
/// Reads are buffered.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser, ParseError};
///
/// #[tokio::main(flavor = "current_thread")]
/// async fn main() -> Result<(), ParseError> {
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
/// let mut reader = parser.parse_tokio_async_read(file.as_bytes());
/// if let Some(quad) = reader.next().await {
/// assert_eq!(quad?.subject.to_string(), "<http://example.com/s>");
/// }
/// Ok(())
/// }
/// ```
#[must_use]
#[cfg(feature = "async-tokio")]
pub struct FromTokioAsyncReadQuadReader<R: AsyncRead + Unpin> {
parser: FromTokioAsyncReadQuadReaderKind<R>,
mapper: QuadMapper,
}
#[cfg(feature = "async-tokio")]
enum FromTokioAsyncReadQuadReaderKind<R: AsyncRead + Unpin> {
N3(FromTokioAsyncReadN3Reader<R>),
NQuads(FromTokioAsyncReadNQuadsReader<R>),
NTriples(FromTokioAsyncReadNTriplesReader<R>),
RdfXml(FromTokioAsyncReadRdfXmlReader<R>),
TriG(FromTokioAsyncReadTriGReader<R>),
Turtle(FromTokioAsyncReadTurtleReader<R>),
}
#[cfg(feature = "async-tokio")]
impl<R: AsyncRead + Unpin> FromTokioAsyncReadQuadReader<R> {
pub async fn next(&mut self) -> Option<Result<Quad, ParseError>> {
Some(match &mut self.parser {
FromTokioAsyncReadQuadReaderKind::N3(parser) => match parser.next().await? {
Ok(quad) => self.mapper.map_n3_quad(quad),
Err(e) => Err(e.into()),
},
FromTokioAsyncReadQuadReaderKind::NQuads(parser) => match parser.next().await? {
Ok(quad) => self.mapper.map_quad(quad),
Err(e) => Err(e.into()),
},
FromTokioAsyncReadQuadReaderKind::NTriples(parser) => match parser.next().await? {
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
Err(e) => Err(e.into()),
},
FromTokioAsyncReadQuadReaderKind::RdfXml(parser) => match parser.next().await? {
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
Err(e) => Err(e.into()),
},
FromTokioAsyncReadQuadReaderKind::TriG(parser) => match parser.next().await? {
Ok(quad) => self.mapper.map_quad(quad),
Err(e) => Err(e.into()),
},
FromTokioAsyncReadQuadReaderKind::Turtle(parser) => match parser.next().await? {
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
Err(e) => Err(e.into()),
},
})
}
}
struct QuadMapper {
default_graph: GraphName,
without_named_graphs: bool,
blank_node_map: Option<HashMap<BlankNode, BlankNode>>,
}
impl QuadMapper {
fn map_blank_node(&mut self, node: BlankNode) -> BlankNode {
if let Some(blank_node_map) = &mut self.blank_node_map {
blank_node_map
.entry(node)
.or_insert_with(BlankNode::default)
.clone()
} else {
node
}
}
fn map_subject(&mut self, node: Subject) -> Subject {
match node {
Subject::NamedNode(node) => node.into(),
Subject::BlankNode(node) => self.map_blank_node(node).into(),
#[cfg(feature = "rdf-star")]
Subject::Triple(triple) => self.map_triple(*triple).into(),
}
}
fn map_term(&mut self, node: Term) -> Term {
match node {
Term::NamedNode(node) => node.into(),
Term::BlankNode(node) => self.map_blank_node(node).into(),
Term::Literal(literal) => literal.into(),
#[cfg(feature = "rdf-star")]
Term::Triple(triple) => self.map_triple(*triple).into(),
}
}
fn map_triple(&mut self, triple: Triple) -> Triple {
Triple {
subject: self.map_subject(triple.subject),
predicate: triple.predicate,
object: self.map_term(triple.object),
}
}
fn map_graph_name(&mut self, graph_name: GraphName) -> Result<GraphName, ParseError> {
match graph_name {
GraphName::NamedNode(node) => {
if self.without_named_graphs {
Err(ParseError::msg("Named graphs are not allowed"))
} else {
Ok(node.into())
}
}
GraphName::BlankNode(node) => {
if self.without_named_graphs {
Err(ParseError::msg("Named graphs are not allowed"))
} else {
Ok(self.map_blank_node(node).into())
}
}
GraphName::DefaultGraph => Ok(self.default_graph.clone()),
}
}
fn map_quad(&mut self, quad: Quad) -> Result<Quad, ParseError> {
Ok(Quad {
subject: self.map_subject(quad.subject),
predicate: quad.predicate,
object: self.map_term(quad.object),
graph_name: self.map_graph_name(quad.graph_name)?,
})
}
fn map_triple_to_quad(&mut self, triple: Triple) -> Quad {
self.map_triple(triple).in_graph(self.default_graph.clone())
}
fn map_n3_quad(&mut self, quad: N3Quad) -> Result<Quad, ParseError> {
Ok(Quad {
subject: match quad.subject {
N3Term::NamedNode(s) => Ok(s.into()),
N3Term::BlankNode(s) => Ok(self.map_blank_node(s).into()),
N3Term::Literal(_) => Err(ParseError::msg(
"literals are not allowed in regular RDF subjects",
)),
#[cfg(feature = "rdf-star")]
N3Term::Triple(s) => Ok(self.map_triple(*s).into()),
N3Term::Variable(_) => Err(ParseError::msg(
"variables are not allowed in regular RDF subjects",
)),
}?,
predicate: match quad.predicate {
N3Term::NamedNode(p) => Ok(p),
N3Term::BlankNode(_) => Err(ParseError::msg(
"blank nodes are not allowed in regular RDF predicates",
)),
N3Term::Literal(_) => Err(ParseError::msg(
"literals are not allowed in regular RDF predicates",
)),
#[cfg(feature = "rdf-star")]
N3Term::Triple(_) => Err(ParseError::msg(
"quoted triples are not allowed in regular RDF predicates",
)),
N3Term::Variable(_) => Err(ParseError::msg(
"variables are not allowed in regular RDF predicates",
)),
}?,
object: match quad.object {
N3Term::NamedNode(o) => Ok(o.into()),
N3Term::BlankNode(o) => Ok(self.map_blank_node(o).into()),
N3Term::Literal(o) => Ok(o.into()),
#[cfg(feature = "rdf-star")]
N3Term::Triple(o) => Ok(self.map_triple(*o).into()),
N3Term::Variable(_) => Err(ParseError::msg(
"variables are not allowed in regular RDF objects",
)),
}?,
graph_name: self.map_graph_name(quad.graph_name)?,
})
}
}

@ -0,0 +1,322 @@
//! Utilities to write RDF graphs and datasets.
use crate::format::RdfFormat;
use oxrdf::{GraphNameRef, QuadRef, TripleRef};
#[cfg(feature = "async-tokio")]
use oxrdfxml::ToTokioAsyncWriteRdfXmlWriter;
use oxrdfxml::{RdfXmlSerializer, ToWriteRdfXmlWriter};
#[cfg(feature = "async-tokio")]
use oxttl::nquads::ToTokioAsyncWriteNQuadsWriter;
use oxttl::nquads::{NQuadsSerializer, ToWriteNQuadsWriter};
#[cfg(feature = "async-tokio")]
use oxttl::ntriples::ToTokioAsyncWriteNTriplesWriter;
use oxttl::ntriples::{NTriplesSerializer, ToWriteNTriplesWriter};
#[cfg(feature = "async-tokio")]
use oxttl::trig::ToTokioAsyncWriteTriGWriter;
use oxttl::trig::{ToWriteTriGWriter, TriGSerializer};
#[cfg(feature = "async-tokio")]
use oxttl::turtle::ToTokioAsyncWriteTurtleWriter;
use oxttl::turtle::{ToWriteTurtleWriter, TurtleSerializer};
use std::io::{self, Write};
#[cfg(feature = "async-tokio")]
use tokio::io::{AsyncWrite, AsyncWriteExt};
/// A serializer for RDF serialization formats.
///
/// It currently supports the following formats:
/// * [N3](https://w3c.github.io/N3/spec/) ([`RdfFormat::N3`])
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`RdfFormat::NQuads`])
/// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`RdfFormat::NTriples`])
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`RdfFormat::RdfXml`])
/// * [TriG](https://www.w3.org/TR/trig/) ([`RdfFormat::TriG`])
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`RdfFormat::Turtle`])
///
/// ```
/// use oxrdfio::{RdfFormat, RdfSerializer};
/// use oxrdf::{Quad, NamedNode};
///
/// let mut buffer = Vec::new();
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(&mut buffer);
/// writer.write_quad(&Quad {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// graph_name: NamedNode::new("http://example.com/g")?.into()
/// })?;
/// writer.finish()?;
///
/// assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n".as_bytes());
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct RdfSerializer {
format: RdfFormat,
}
impl RdfSerializer {
/// Builds a serializer for the given format
#[inline]
pub fn from_format(format: RdfFormat) -> Self {
Self { format }
}
/// Writes to a [`Write`] implementation.
///
/// Warning: Do not forget to run the [`finish`](ToWriteQuadWriter::finish()) method to properly write the last bytes of the file.
///
/// Warning: This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfSerializer};
/// use oxrdf::{Quad, NamedNode};
///
/// let mut buffer = Vec::new();
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(&mut buffer);
/// writer.write_quad(&Quad {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// graph_name: NamedNode::new("http://example.com/g")?.into()
/// })?;
/// writer.finish()?;
///
/// assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n".as_bytes());
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn serialize_to_write<W: Write>(&self, writer: W) -> ToWriteQuadWriter<W> {
ToWriteQuadWriter {
formatter: match self.format {
RdfFormat::NQuads => ToWriteQuadWriterKind::NQuads(
NQuadsSerializer::new().serialize_to_write(writer),
),
RdfFormat::NTriples => ToWriteQuadWriterKind::NTriples(
NTriplesSerializer::new().serialize_to_write(writer),
),
RdfFormat::RdfXml => ToWriteQuadWriterKind::RdfXml(
RdfXmlSerializer::new().serialize_to_write(writer),
),
RdfFormat::TriG => {
ToWriteQuadWriterKind::TriG(TriGSerializer::new().serialize_to_write(writer))
}
RdfFormat::Turtle | RdfFormat::N3 => ToWriteQuadWriterKind::Turtle(
TurtleSerializer::new().serialize_to_write(writer),
),
},
}
}
/// Writes to a Tokio [`AsyncWrite`] implementation.
///
/// Warning: Do not forget to run the [`finish`](ToTokioAsyncWriteQuadWriter::finish()) method to properly write the last bytes of the file.
///
/// Warning: This writer does unbuffered writes. You might want to use [`BufWriter`](tokio::io::BufWriter) to avoid that.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfSerializer};
/// use oxrdf::{Quad, NamedNode};
/// use std::io;
///
/// #[tokio::main(flavor = "current_thread")]
/// async fn main() -> io::Result<()> {
/// let mut buffer = Vec::new();
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_tokio_async_write(&mut buffer);
/// writer.write_quad(&Quad {
/// subject: NamedNode::new_unchecked("http://example.com/s").into(),
/// predicate: NamedNode::new_unchecked("http://example.com/p"),
/// object: NamedNode::new_unchecked("http://example.com/o").into(),
/// graph_name: NamedNode::new_unchecked("http://example.com/g").into()
/// }).await?;
/// writer.finish().await?;
///
/// assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n".as_bytes());
/// Ok(())
/// }
/// ```
#[cfg(feature = "async-tokio")]
pub fn serialize_to_tokio_async_write<W: AsyncWrite + Unpin>(
&self,
writer: W,
) -> ToTokioAsyncWriteQuadWriter<W> {
ToTokioAsyncWriteQuadWriter {
formatter: match self.format {
RdfFormat::NQuads => ToTokioAsyncWriteQuadWriterKind::NQuads(
NQuadsSerializer::new().serialize_to_tokio_async_write(writer),
),
RdfFormat::NTriples => ToTokioAsyncWriteQuadWriterKind::NTriples(
NTriplesSerializer::new().serialize_to_tokio_async_write(writer),
),
RdfFormat::RdfXml => ToTokioAsyncWriteQuadWriterKind::RdfXml(
RdfXmlSerializer::new().serialize_to_tokio_async_write(writer),
),
RdfFormat::TriG => ToTokioAsyncWriteQuadWriterKind::TriG(
TriGSerializer::new().serialize_to_tokio_async_write(writer),
),
RdfFormat::Turtle | RdfFormat::N3 => ToTokioAsyncWriteQuadWriterKind::Turtle(
TurtleSerializer::new().serialize_to_tokio_async_write(writer),
),
},
}
}
}
/// Writes quads or triples to a [`Write`] implementation.
///
/// Can be built using [`RdfSerializer::serialize_to_write`].
///
/// Warning: Do not forget to run the [`finish`](ToWriteQuadWriter::finish()) method to properly write the last bytes of the file.
///
/// Warning: This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfSerializer};
/// use oxrdf::{Quad, NamedNode};
///
/// let mut buffer = Vec::new();
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(&mut buffer);
/// writer.write_quad(&Quad {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// graph_name: NamedNode::new("http://example.com/g")?.into(),
/// })?;
/// writer.finish()?;
///
/// assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n".as_bytes());
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[must_use]
pub struct ToWriteQuadWriter<W: Write> {
formatter: ToWriteQuadWriterKind<W>,
}
enum ToWriteQuadWriterKind<W: Write> {
NQuads(ToWriteNQuadsWriter<W>),
NTriples(ToWriteNTriplesWriter<W>),
RdfXml(ToWriteRdfXmlWriter<W>),
TriG(ToWriteTriGWriter<W>),
Turtle(ToWriteTurtleWriter<W>),
}
impl<W: Write> ToWriteQuadWriter<W> {
/// Writes a [`QuadRef`]
pub fn write_quad<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> io::Result<()> {
match &mut self.formatter {
ToWriteQuadWriterKind::NQuads(writer) => writer.write_quad(quad),
ToWriteQuadWriterKind::NTriples(writer) => writer.write_triple(to_triple(quad)?),
ToWriteQuadWriterKind::RdfXml(writer) => writer.write_triple(to_triple(quad)?),
ToWriteQuadWriterKind::TriG(writer) => writer.write_quad(quad),
ToWriteQuadWriterKind::Turtle(writer) => writer.write_triple(to_triple(quad)?),
}
}
/// Writes a [`TripleRef`]
pub fn write_triple<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> io::Result<()> {
self.write_quad(triple.into().in_graph(GraphNameRef::DefaultGraph))
}
/// Writes the last bytes of the file
pub fn finish(self) -> io::Result<()> {
match self.formatter {
ToWriteQuadWriterKind::NQuads(writer) => writer.finish(),
ToWriteQuadWriterKind::NTriples(writer) => writer.finish(),
ToWriteQuadWriterKind::RdfXml(writer) => writer.finish()?,
ToWriteQuadWriterKind::TriG(writer) => writer.finish()?,
ToWriteQuadWriterKind::Turtle(writer) => writer.finish()?,
}
.flush()
}
}
/// Writes quads or triples to a [`Write`] implementation.
///
/// Can be built using [`RdfSerializer::serialize_to_write`].
///
/// Warning: Do not forget to run the [`finish`](ToWriteQuadWriter::finish()) method to properly write the last bytes of the file.
///
/// Warning: This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfSerializer};
/// use oxrdf::{Quad, NamedNode};
/// use std::io;
///
/// #[tokio::main(flavor = "current_thread")]
/// async fn main() -> io::Result<()> {
/// let mut buffer = Vec::new();
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_tokio_async_write(&mut buffer);
/// writer.write_quad(&Quad {
/// subject: NamedNode::new_unchecked("http://example.com/s").into(),
/// predicate: NamedNode::new_unchecked("http://example.com/p"),
/// object: NamedNode::new_unchecked("http://example.com/o").into(),
/// graph_name: NamedNode::new_unchecked("http://example.com/g").into()
/// }).await?;
/// writer.finish().await?;
///
/// assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n".as_bytes());
/// Ok(())
/// }
/// ```
#[must_use]
#[cfg(feature = "async-tokio")]
pub struct ToTokioAsyncWriteQuadWriter<W: AsyncWrite + Unpin> {
formatter: ToTokioAsyncWriteQuadWriterKind<W>,
}
#[cfg(feature = "async-tokio")]
enum ToTokioAsyncWriteQuadWriterKind<W: AsyncWrite + Unpin> {
NQuads(ToTokioAsyncWriteNQuadsWriter<W>),
NTriples(ToTokioAsyncWriteNTriplesWriter<W>),
RdfXml(ToTokioAsyncWriteRdfXmlWriter<W>),
TriG(ToTokioAsyncWriteTriGWriter<W>),
Turtle(ToTokioAsyncWriteTurtleWriter<W>),
}
#[cfg(feature = "async-tokio")]
impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteQuadWriter<W> {
/// Writes a [`QuadRef`]
pub async fn write_quad<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> io::Result<()> {
match &mut self.formatter {
ToTokioAsyncWriteQuadWriterKind::NQuads(writer) => writer.write_quad(quad).await,
ToTokioAsyncWriteQuadWriterKind::NTriples(writer) => {
writer.write_triple(to_triple(quad)?).await
}
ToTokioAsyncWriteQuadWriterKind::RdfXml(writer) => {
writer.write_triple(to_triple(quad)?).await
}
ToTokioAsyncWriteQuadWriterKind::TriG(writer) => writer.write_quad(quad).await,
ToTokioAsyncWriteQuadWriterKind::Turtle(writer) => {
writer.write_triple(to_triple(quad)?).await
}
}
}
/// Writes a [`TripleRef`]
pub async fn write_triple<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> io::Result<()> {
self.write_quad(triple.into().in_graph(GraphNameRef::DefaultGraph))
.await
}
/// Writes the last bytes of the file
pub async fn finish(self) -> io::Result<()> {
match self.formatter {
ToTokioAsyncWriteQuadWriterKind::NQuads(writer) => writer.finish(),
ToTokioAsyncWriteQuadWriterKind::NTriples(writer) => writer.finish(),
ToTokioAsyncWriteQuadWriterKind::RdfXml(writer) => writer.finish().await?,
ToTokioAsyncWriteQuadWriterKind::TriG(writer) => writer.finish().await?,
ToTokioAsyncWriteQuadWriterKind::Turtle(writer) => writer.finish().await?,
}
.flush()
.await
}
}
fn to_triple<'a>(quad: impl Into<QuadRef<'a>>) -> io::Result<TripleRef<'a>> {
let quad = quad.into();
if quad.graph_name.is_default_graph() {
Ok(quad.into())
} else {
Err(io::Error::new(
io::ErrorKind::InvalidInput,
"Only quads in the default graph can be serialized to a RDF graph format",
))
}
}

@ -43,40 +43,21 @@ impl Error for ParseError {
} }
} }
impl From<oxttl::SyntaxError> for SyntaxError { impl From<oxrdfio::SyntaxError> for SyntaxError {
#[inline] #[inline]
fn from(error: oxttl::SyntaxError) -> Self { fn from(error: oxrdfio::SyntaxError) -> Self {
SyntaxError { SyntaxError {
inner: SyntaxErrorKind::Turtle(error), inner: SyntaxErrorKind::IO(error),
} }
} }
} }
impl From<oxttl::ParseError> for ParseError { impl From<oxrdfio::ParseError> for ParseError {
#[inline] #[inline]
fn from(error: oxttl::ParseError) -> Self { fn from(error: oxrdfio::ParseError) -> Self {
match error { match error {
oxttl::ParseError::Syntax(e) => Self::Syntax(e.into()), oxrdfio::ParseError::Syntax(e) => Self::Syntax(e.into()),
oxttl::ParseError::Io(e) => Self::Io(e), oxrdfio::ParseError::Io(e) => Self::Io(e),
}
}
}
impl From<oxrdfxml::SyntaxError> for SyntaxError {
#[inline]
fn from(error: oxrdfxml::SyntaxError) -> Self {
SyntaxError {
inner: SyntaxErrorKind::RdfXml(error),
}
}
}
impl From<oxrdfxml::ParseError> for ParseError {
#[inline]
fn from(error: oxrdfxml::ParseError) -> Self {
match error {
oxrdfxml::ParseError::Syntax(e) => Self::Syntax(e.into()),
oxrdfxml::ParseError::Io(e) => Self::Io(e),
} }
} }
} }
@ -113,8 +94,7 @@ pub struct SyntaxError {
#[derive(Debug)] #[derive(Debug)]
enum SyntaxErrorKind { enum SyntaxErrorKind {
Turtle(oxttl::SyntaxError), IO(oxrdfio::SyntaxError),
RdfXml(oxrdfxml::SyntaxError),
InvalidBaseIri { iri: String, error: IriParseError }, InvalidBaseIri { iri: String, error: IriParseError },
} }
@ -122,8 +102,7 @@ impl fmt::Display for SyntaxError {
#[inline] #[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match &self.inner { match &self.inner {
SyntaxErrorKind::Turtle(e) => e.fmt(f), SyntaxErrorKind::IO(e) => e.fmt(f),
SyntaxErrorKind::RdfXml(e) => e.fmt(f),
SyntaxErrorKind::InvalidBaseIri { iri, error } => { SyntaxErrorKind::InvalidBaseIri { iri, error } => {
write!(f, "Invalid base IRI '{iri}': {error}") write!(f, "Invalid base IRI '{iri}': {error}")
} }
@ -135,8 +114,7 @@ impl Error for SyntaxError {
#[inline] #[inline]
fn source(&self) -> Option<&(dyn Error + 'static)> { fn source(&self) -> Option<&(dyn Error + 'static)> {
match &self.inner { match &self.inner {
SyntaxErrorKind::Turtle(e) => Some(e), SyntaxErrorKind::IO(e) => Some(e),
SyntaxErrorKind::RdfXml(e) => Some(e),
SyntaxErrorKind::InvalidBaseIri { .. } => None, SyntaxErrorKind::InvalidBaseIri { .. } => None,
} }
} }
@ -146,8 +124,7 @@ impl From<SyntaxError> for io::Error {
#[inline] #[inline]
fn from(error: SyntaxError) -> Self { fn from(error: SyntaxError) -> Self {
match error.inner { match error.inner {
SyntaxErrorKind::Turtle(error) => error.into(), SyntaxErrorKind::IO(error) => error.into(),
SyntaxErrorKind::RdfXml(error) => error.into(),
SyntaxErrorKind::InvalidBaseIri { iri, error } => Self::new( SyntaxErrorKind::InvalidBaseIri { iri, error } => Self::new(
io::ErrorKind::InvalidInput, io::ErrorKind::InvalidInput,
format!("Invalid IRI '{iri}': {error}"), format!("Invalid IRI '{iri}': {error}"),

@ -1,3 +1,5 @@
use oxrdfio::RdfFormat;
/// [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) serialization formats. /// [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) serialization formats.
/// ///
/// This enumeration is non exhaustive. New formats like JSON-LD will be added in the future. /// This enumeration is non exhaustive. New formats like JSON-LD will be added in the future.
@ -102,6 +104,16 @@ impl GraphFormat {
} }
} }
impl From<GraphFormat> for RdfFormat {
fn from(format: GraphFormat) -> Self {
match format {
GraphFormat::NTriples => Self::NTriples,
GraphFormat::Turtle => Self::Turtle,
GraphFormat::RdfXml => Self::RdfXml,
}
}
}
/// [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) serialization formats. /// [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) serialization formats.
/// ///
/// This enumeration is non exhaustive. New formats like JSON-LD will be added in the future. /// This enumeration is non exhaustive. New formats like JSON-LD will be added in the future.
@ -198,6 +210,15 @@ impl DatasetFormat {
} }
} }
impl From<DatasetFormat> for RdfFormat {
fn from(format: DatasetFormat) -> Self {
match format {
DatasetFormat::NQuads => Self::NQuads,
DatasetFormat::TriG => Self::TriG,
}
}
}
impl TryFrom<DatasetFormat> for GraphFormat { impl TryFrom<DatasetFormat> for GraphFormat {
type Error = (); type Error = ();

@ -8,3 +8,7 @@ pub mod write;
pub use self::format::{DatasetFormat, GraphFormat}; pub use self::format::{DatasetFormat, GraphFormat};
pub use self::read::{DatasetParser, GraphParser}; pub use self::read::{DatasetParser, GraphParser};
pub use self::write::{DatasetSerializer, GraphSerializer}; pub use self::write::{DatasetSerializer, GraphSerializer};
pub use oxrdfio::{
FromReadQuadReader, ParseError, RdfFormat, RdfParser, RdfSerializer, SyntaxError,
ToWriteQuadWriter,
};

@ -4,12 +4,7 @@ pub use crate::io::error::{ParseError, SyntaxError};
use crate::io::{DatasetFormat, GraphFormat}; use crate::io::{DatasetFormat, GraphFormat};
use crate::model::*; use crate::model::*;
use oxiri::IriParseError; use oxiri::IriParseError;
use oxrdfxml::{FromReadRdfXmlReader, RdfXmlParser}; use oxrdfio::{FromReadQuadReader, RdfParser};
use oxttl::nquads::{FromReadNQuadsReader, NQuadsParser};
use oxttl::ntriples::{FromReadNTriplesReader, NTriplesParser};
use oxttl::trig::{FromReadTriGReader, TriGParser};
use oxttl::turtle::{FromReadTurtleReader, TurtleParser};
use std::collections::HashMap;
use std::io::Read; use std::io::Read;
/// Parsers for RDF graph serialization formats. /// Parsers for RDF graph serialization formats.
@ -27,18 +22,12 @@ use std::io::Read;
/// let parser = GraphParser::from_format(GraphFormat::NTriples); /// let parser = GraphParser::from_format(GraphFormat::NTriples);
/// let triples = parser.read_triples(file.as_bytes()).collect::<Result<Vec<_>,_>>()?; /// let triples = parser.read_triples(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
/// ///
///assert_eq!(triples.len(), 1); /// assert_eq!(triples.len(), 1);
///assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>"); /// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(()) /// # std::io::Result::Ok(())
/// ``` /// ```
pub struct GraphParser { pub struct GraphParser {
inner: GraphParserKind, inner: RdfParser,
}
enum GraphParserKind {
NTriples(NTriplesParser),
Turtle(TurtleParser),
RdfXml(RdfXmlParser),
} }
impl GraphParser { impl GraphParser {
@ -46,15 +35,9 @@ impl GraphParser {
#[inline] #[inline]
pub fn from_format(format: GraphFormat) -> Self { pub fn from_format(format: GraphFormat) -> Self {
Self { Self {
inner: match format { inner: RdfParser::from_format(format.into())
GraphFormat::NTriples => { .without_named_graphs()
GraphParserKind::NTriples(NTriplesParser::new().with_quoted_triples()) .rename_blank_nodes(),
}
GraphFormat::Turtle => {
GraphParserKind::Turtle(TurtleParser::new().with_quoted_triples())
}
GraphFormat::RdfXml => GraphParserKind::RdfXml(RdfXmlParser::new()),
},
} }
} }
@ -68,30 +51,21 @@ impl GraphParser {
/// let parser = GraphParser::from_format(GraphFormat::Turtle).with_base_iri("http://example.com")?; /// let parser = GraphParser::from_format(GraphFormat::Turtle).with_base_iri("http://example.com")?;
/// let triples = parser.read_triples(file.as_bytes()).collect::<Result<Vec<_>,_>>()?; /// let triples = parser.read_triples(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
/// ///
///assert_eq!(triples.len(), 1); /// assert_eq!(triples.len(), 1);
///assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>"); /// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) /// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ``` /// ```
#[inline] #[inline]
pub fn with_base_iri(self, base_iri: impl Into<String>) -> Result<Self, IriParseError> { pub fn with_base_iri(self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
Ok(Self { Ok(Self {
inner: match self.inner { inner: self.inner.with_base_iri(base_iri)?,
GraphParserKind::NTriples(p) => GraphParserKind::NTriples(p),
GraphParserKind::Turtle(p) => GraphParserKind::Turtle(p.with_base_iri(base_iri)?),
GraphParserKind::RdfXml(p) => GraphParserKind::RdfXml(p.with_base_iri(base_iri)?),
},
}) })
} }
/// Executes the parsing itself on a [`Read`] implementation and returns an iterator of triples. /// Executes the parsing itself on a [`Read`] implementation and returns an iterator of triples.
pub fn read_triples<R: Read>(&self, reader: R) -> TripleReader<R> { pub fn read_triples<R: Read>(&self, reader: R) -> TripleReader<R> {
TripleReader { TripleReader {
mapper: BlankNodeMapper::default(), parser: self.inner.parse_read(reader),
parser: match &self.inner {
GraphParserKind::NTriples(p) => TripleReaderKind::NTriples(p.parse_read(reader)),
GraphParserKind::Turtle(p) => TripleReaderKind::Turtle(p.parse_read(reader)),
GraphParserKind::RdfXml(p) => TripleReaderKind::RdfXml(p.parse_read(reader)),
},
} }
} }
} }
@ -107,41 +81,20 @@ impl GraphParser {
/// let parser = GraphParser::from_format(GraphFormat::NTriples); /// let parser = GraphParser::from_format(GraphFormat::NTriples);
/// let triples = parser.read_triples(file.as_bytes()).collect::<Result<Vec<_>,_>>()?; /// let triples = parser.read_triples(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
/// ///
///assert_eq!(triples.len(), 1); /// assert_eq!(triples.len(), 1);
///assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>"); /// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(()) /// # std::io::Result::Ok(())
/// ``` /// ```
#[must_use] #[must_use]
pub struct TripleReader<R: Read> { pub struct TripleReader<R: Read> {
mapper: BlankNodeMapper, parser: FromReadQuadReader<R>,
parser: TripleReaderKind<R>,
}
#[allow(clippy::large_enum_variant)]
enum TripleReaderKind<R: Read> {
NTriples(FromReadNTriplesReader<R>),
Turtle(FromReadTurtleReader<R>),
RdfXml(FromReadRdfXmlReader<R>),
} }
impl<R: Read> Iterator for TripleReader<R> { impl<R: Read> Iterator for TripleReader<R> {
type Item = Result<Triple, ParseError>; type Item = Result<Triple, ParseError>;
fn next(&mut self) -> Option<Result<Triple, ParseError>> { fn next(&mut self) -> Option<Result<Triple, ParseError>> {
Some(match &mut self.parser { Some(self.parser.next()?.map(Into::into).map_err(Into::into))
TripleReaderKind::NTriples(parser) => match parser.next()? {
Ok(triple) => Ok(self.mapper.triple(triple)),
Err(e) => Err(e.into()),
},
TripleReaderKind::Turtle(parser) => match parser.next()? {
Ok(triple) => Ok(self.mapper.triple(triple)),
Err(e) => Err(e.into()),
},
TripleReaderKind::RdfXml(parser) => match parser.next()? {
Ok(triple) => Ok(self.mapper.triple(triple)),
Err(e) => Err(e.into()),
},
})
} }
} }
@ -159,17 +112,12 @@ impl<R: Read> Iterator for TripleReader<R> {
/// let parser = DatasetParser::from_format(DatasetFormat::NQuads); /// let parser = DatasetParser::from_format(DatasetFormat::NQuads);
/// let quads = parser.read_quads(file.as_bytes()).collect::<Result<Vec<_>,_>>()?; /// let quads = parser.read_quads(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
/// ///
///assert_eq!(quads.len(), 1); /// assert_eq!(quads.len(), 1);
///assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>"); /// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(()) /// # std::io::Result::Ok(())
/// ``` /// ```
pub struct DatasetParser { pub struct DatasetParser {
inner: DatasetParserKind, inner: RdfParser,
}
enum DatasetParserKind {
NQuads(NQuadsParser),
TriG(TriGParser),
} }
impl DatasetParser { impl DatasetParser {
@ -177,14 +125,7 @@ impl DatasetParser {
#[inline] #[inline]
pub fn from_format(format: DatasetFormat) -> Self { pub fn from_format(format: DatasetFormat) -> Self {
Self { Self {
inner: match format { inner: RdfParser::from_format(format.into()).rename_blank_nodes(),
DatasetFormat::NQuads => {
DatasetParserKind::NQuads(NQuadsParser::new().with_quoted_triples())
}
DatasetFormat::TriG => {
DatasetParserKind::TriG(TriGParser::new().with_quoted_triples())
}
},
} }
} }
@ -198,28 +139,21 @@ impl DatasetParser {
/// let parser = DatasetParser::from_format(DatasetFormat::TriG).with_base_iri("http://example.com")?; /// let parser = DatasetParser::from_format(DatasetFormat::TriG).with_base_iri("http://example.com")?;
/// let triples = parser.read_quads(file.as_bytes()).collect::<Result<Vec<_>,_>>()?; /// let triples = parser.read_quads(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
/// ///
///assert_eq!(triples.len(), 1); /// assert_eq!(triples.len(), 1);
///assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>"); /// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) /// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ``` /// ```
#[inline] #[inline]
pub fn with_base_iri(self, base_iri: impl Into<String>) -> Result<Self, IriParseError> { pub fn with_base_iri(self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
Ok(Self { Ok(Self {
inner: match self.inner { inner: self.inner.with_base_iri(base_iri)?,
DatasetParserKind::NQuads(p) => DatasetParserKind::NQuads(p),
DatasetParserKind::TriG(p) => DatasetParserKind::TriG(p.with_base_iri(base_iri)?),
},
}) })
} }
/// Executes the parsing itself on a [`Read`] implementation and returns an iterator of quads. /// Executes the parsing itself on a [`Read`] implementation and returns an iterator of quads.
pub fn read_quads<R: Read>(&self, reader: R) -> QuadReader<R> { pub fn read_quads<R: Read>(&self, reader: R) -> QuadReader<R> {
QuadReader { QuadReader {
mapper: BlankNodeMapper::default(), parser: self.inner.parse_read(reader),
parser: match &self.inner {
DatasetParserKind::NQuads(p) => QuadReaderKind::NQuads(p.parse_read(reader)),
DatasetParserKind::TriG(p) => QuadReaderKind::TriG(p.parse_read(reader)),
},
} }
} }
} }
@ -235,90 +169,19 @@ impl DatasetParser {
/// let parser = DatasetParser::from_format(DatasetFormat::NQuads); /// let parser = DatasetParser::from_format(DatasetFormat::NQuads);
/// let quads = parser.read_quads(file.as_bytes()).collect::<Result<Vec<_>,_>>()?; /// let quads = parser.read_quads(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
/// ///
///assert_eq!(quads.len(), 1); /// assert_eq!(quads.len(), 1);
///assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>"); /// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(()) /// # std::io::Result::Ok(())
/// ``` /// ```
#[must_use] #[must_use]
pub struct QuadReader<R: Read> { pub struct QuadReader<R: Read> {
mapper: BlankNodeMapper, parser: FromReadQuadReader<R>,
parser: QuadReaderKind<R>,
}
enum QuadReaderKind<R: Read> {
NQuads(FromReadNQuadsReader<R>),
TriG(FromReadTriGReader<R>),
} }
impl<R: Read> Iterator for QuadReader<R> { impl<R: Read> Iterator for QuadReader<R> {
type Item = Result<Quad, ParseError>; type Item = Result<Quad, ParseError>;
fn next(&mut self) -> Option<Result<Quad, ParseError>> { fn next(&mut self) -> Option<Result<Quad, ParseError>> {
Some(match &mut self.parser { Some(self.parser.next()?.map_err(Into::into))
QuadReaderKind::NQuads(parser) => match parser.next()? {
Ok(quad) => Ok(self.mapper.quad(quad)),
Err(e) => Err(e.into()),
},
QuadReaderKind::TriG(parser) => match parser.next()? {
Ok(quad) => Ok(self.mapper.quad(quad)),
Err(e) => Err(e.into()),
},
})
}
}
#[derive(Default)]
struct BlankNodeMapper {
bnode_map: HashMap<BlankNode, BlankNode>,
}
impl BlankNodeMapper {
fn blank_node(&mut self, node: BlankNode) -> BlankNode {
self.bnode_map
.entry(node)
.or_insert_with(BlankNode::default)
.clone()
}
fn subject(&mut self, node: Subject) -> Subject {
match node {
Subject::NamedNode(node) => node.into(),
Subject::BlankNode(node) => self.blank_node(node).into(),
Subject::Triple(triple) => self.triple(*triple).into(),
}
}
fn term(&mut self, node: Term) -> Term {
match node {
Term::NamedNode(node) => node.into(),
Term::BlankNode(node) => self.blank_node(node).into(),
Term::Literal(literal) => literal.into(),
Term::Triple(triple) => self.triple(*triple).into(),
}
}
fn triple(&mut self, triple: Triple) -> Triple {
Triple {
subject: self.subject(triple.subject),
predicate: triple.predicate,
object: self.term(triple.object),
}
}
fn graph_name(&mut self, graph_name: GraphName) -> GraphName {
match graph_name {
GraphName::NamedNode(node) => node.into(),
GraphName::BlankNode(node) => self.blank_node(node).into(),
GraphName::DefaultGraph => GraphName::DefaultGraph,
}
}
fn quad(&mut self, quad: Quad) -> Quad {
Quad {
subject: self.subject(quad.subject),
predicate: quad.predicate,
object: self.term(quad.object),
graph_name: self.graph_name(quad.graph_name),
}
} }
} }

@ -2,11 +2,7 @@
use crate::io::{DatasetFormat, GraphFormat}; use crate::io::{DatasetFormat, GraphFormat};
use crate::model::*; use crate::model::*;
use oxrdfxml::{RdfXmlSerializer, ToWriteRdfXmlWriter}; use oxrdfio::{RdfSerializer, ToWriteQuadWriter};
use oxttl::nquads::{NQuadsSerializer, ToWriteNQuadsWriter};
use oxttl::ntriples::{NTriplesSerializer, ToWriteNTriplesWriter};
use oxttl::trig::{ToWriteTriGWriter, TriGSerializer};
use oxttl::turtle::{ToWriteTurtleWriter, TurtleSerializer};
use std::io::{self, Write}; use std::io::{self, Write};
/// A serializer for RDF graph serialization formats. /// A serializer for RDF graph serialization formats.
@ -29,34 +25,26 @@ use std::io::{self, Write};
/// })?; /// })?;
/// writer.finish()?; /// writer.finish()?;
/// ///
///assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> .\n".as_bytes()); /// assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> .\n".as_bytes());
/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) /// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ``` /// ```
pub struct GraphSerializer { pub struct GraphSerializer {
format: GraphFormat, inner: RdfSerializer,
} }
impl GraphSerializer { impl GraphSerializer {
/// Builds a serializer for the given format /// Builds a serializer for the given format
#[inline] #[inline]
pub fn from_format(format: GraphFormat) -> Self { pub fn from_format(format: GraphFormat) -> Self {
Self { format } Self {
inner: RdfSerializer::from_format(format.into()),
}
} }
/// Returns a [`TripleWriter`] allowing writing triples into the given [`Write`] implementation /// Returns a [`TripleWriter`] allowing writing triples into the given [`Write`] implementation
pub fn triple_writer<W: Write>(&self, writer: W) -> TripleWriter<W> { pub fn triple_writer<W: Write>(&self, writer: W) -> TripleWriter<W> {
TripleWriter { TripleWriter {
formatter: match self.format { writer: self.inner.serialize_to_write(writer),
GraphFormat::NTriples => {
TripleWriterKind::NTriples(NTriplesSerializer::new().serialize_to_write(writer))
}
GraphFormat::Turtle => {
TripleWriterKind::Turtle(TurtleSerializer::new().serialize_to_write(writer))
}
GraphFormat::RdfXml => {
TripleWriterKind::RdfXml(RdfXmlSerializer::new().serialize_to_write(writer))
}
},
} }
} }
} }
@ -79,37 +67,23 @@ impl GraphSerializer {
/// })?; /// })?;
/// writer.finish()?; /// writer.finish()?;
/// ///
///assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> .\n".as_bytes()); /// assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> .\n".as_bytes());
/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) /// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ``` /// ```
#[must_use] #[must_use]
pub struct TripleWriter<W: Write> { pub struct TripleWriter<W: Write> {
formatter: TripleWriterKind<W>, writer: ToWriteQuadWriter<W>,
}
enum TripleWriterKind<W: Write> {
NTriples(ToWriteNTriplesWriter<W>),
Turtle(ToWriteTurtleWriter<W>),
RdfXml(ToWriteRdfXmlWriter<W>),
} }
impl<W: Write> TripleWriter<W> { impl<W: Write> TripleWriter<W> {
/// Writes a triple /// Writes a triple
pub fn write<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> io::Result<()> { pub fn write<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> io::Result<()> {
match &mut self.formatter { self.writer.write_triple(triple)
TripleWriterKind::NTriples(writer) => writer.write_triple(triple),
TripleWriterKind::Turtle(writer) => writer.write_triple(triple),
TripleWriterKind::RdfXml(writer) => writer.write_triple(triple),
}
} }
/// Writes the last bytes of the file /// Writes the last bytes of the file
pub fn finish(self) -> io::Result<()> { pub fn finish(self) -> io::Result<()> {
match self.formatter { self.writer.finish()
TripleWriterKind::NTriples(writer) => writer.finish().flush(),
TripleWriterKind::Turtle(writer) => writer.finish()?.flush(),
TripleWriterKind::RdfXml(formatter) => formatter.finish()?.flush(),
}
} }
} }
@ -133,31 +107,26 @@ impl<W: Write> TripleWriter<W> {
/// })?; /// })?;
/// writer.finish()?; /// writer.finish()?;
/// ///
///assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n".as_bytes()); /// assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n".as_bytes());
/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) /// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ``` /// ```
pub struct DatasetSerializer { pub struct DatasetSerializer {
format: DatasetFormat, inner: RdfSerializer,
} }
impl DatasetSerializer { impl DatasetSerializer {
/// Builds a serializer for the given format /// Builds a serializer for the given format
#[inline] #[inline]
pub fn from_format(format: DatasetFormat) -> Self { pub fn from_format(format: DatasetFormat) -> Self {
Self { format } Self {
inner: RdfSerializer::from_format(format.into()),
}
} }
/// Returns a [`QuadWriter`] allowing writing triples into the given [`Write`] implementation /// Returns a [`QuadWriter`] allowing writing triples into the given [`Write`] implementation
pub fn quad_writer<W: Write>(&self, writer: W) -> QuadWriter<W> { pub fn quad_writer<W: Write>(&self, writer: W) -> QuadWriter<W> {
QuadWriter { QuadWriter {
formatter: match self.format { writer: self.inner.serialize_to_write(writer),
DatasetFormat::NQuads => {
QuadWriterKind::NQuads(NQuadsSerializer::new().serialize_to_write(writer))
}
DatasetFormat::TriG => {
QuadWriterKind::TriG(TriGSerializer::new().serialize_to_write(writer))
}
},
} }
} }
} }
@ -181,33 +150,22 @@ impl DatasetSerializer {
/// })?; /// })?;
/// writer.finish()?; /// writer.finish()?;
/// ///
///assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n".as_bytes()); /// assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n".as_bytes());
/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) /// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ``` /// ```
#[must_use] #[must_use]
pub struct QuadWriter<W: Write> { pub struct QuadWriter<W: Write> {
formatter: QuadWriterKind<W>, writer: ToWriteQuadWriter<W>,
}
enum QuadWriterKind<W: Write> {
NQuads(ToWriteNQuadsWriter<W>),
TriG(ToWriteTriGWriter<W>),
} }
impl<W: Write> QuadWriter<W> { impl<W: Write> QuadWriter<W> {
/// Writes a quad /// Writes a quad
pub fn write<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> io::Result<()> { pub fn write<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> io::Result<()> {
match &mut self.formatter { self.writer.write_quad(quad)
QuadWriterKind::NQuads(writer) => writer.write_quad(quad),
QuadWriterKind::TriG(writer) => writer.write_quad(quad),
}
} }
/// Writes the last bytes of the file /// Writes the last bytes of the file
pub fn finish(self) -> io::Result<()> { pub fn finish(self) -> io::Result<()> {
match self.formatter { self.writer.finish()
QuadWriterKind::NQuads(writer) => writer.finish().flush(),
QuadWriterKind::TriG(writer) => writer.finish()?.flush(),
}
} }
} }

Loading…
Cancel
Save