From 7cd383af79b812f51e0d05959b6ca624fc1570f1 Mon Sep 17 00:00:00 2001 From: Tpt Date: Sun, 6 Aug 2023 21:48:39 +0200 Subject: [PATCH] Introduces OxRDF I/O stand-alone crate --- .github/workflows/tests.yml | 6 +- Cargo.lock | 13 +- Cargo.toml | 1 + lib/Cargo.toml | 3 +- lib/oxrdfio/Cargo.toml | 33 ++ lib/oxrdfio/README.md | 61 ++++ lib/oxrdfio/src/error.rs | 148 +++++++++ lib/oxrdfio/src/format.rs | 203 ++++++++++++ lib/oxrdfio/src/lib.rs | 19 ++ lib/oxrdfio/src/parser.rs | 577 ++++++++++++++++++++++++++++++++++ lib/oxrdfio/src/serializer.rs | 322 +++++++++++++++++++ lib/src/io/error.rs | 45 +-- lib/src/io/format.rs | 21 ++ lib/src/io/mod.rs | 4 + lib/src/io/read.rs | 191 ++--------- lib/src/io/write.rs | 84 ++--- 16 files changed, 1465 insertions(+), 266 deletions(-) create mode 100644 lib/oxrdfio/Cargo.toml create mode 100644 lib/oxrdfio/README.md create mode 100644 lib/oxrdfio/src/error.rs create mode 100644 lib/oxrdfio/src/format.rs create mode 100644 lib/oxrdfio/src/lib.rs create mode 100644 lib/oxrdfio/src/parser.rs create mode 100644 lib/oxrdfio/src/serializer.rs diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index c5010d9c..a58e710c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -36,6 +36,8 @@ jobs: working-directory: ./lib/oxrdfxml - run: cargo clippy working-directory: ./lib/oxttl + - run: cargo clippy + working-directory: ./lib/oxrdfio - run: cargo clippy working-directory: ./lib/sparesults - run: cargo clippy @@ -102,6 +104,8 @@ jobs: working-directory: ./lib/oxrdfxml - run: cargo clippy -- -D warnings -D clippy::all working-directory: ./lib/oxttl + - run: cargo clippy -- -D warnings -D clippy::all + working-directory: ./lib/oxrdfio - run: cargo clippy -- -D warnings -D clippy::all working-directory: ./lib/sparesults - run: cargo clippy -- -D warnings -D clippy::all @@ -159,7 +163,7 @@ jobs: - run: rustup update - uses: Swatinem/rust-cache@v2 - run: cargo install cargo-semver-checks || true - - run: cargo semver-checks check-release --exclude oxrocksdb-sys --exclude oxigraph_js --exclude pyoxigraph --exclude oxigraph_testsuite --exclude oxigraph_server --exclude oxrdfxml --exclude oxttl --exclude sparopt + - run: cargo semver-checks check-release --exclude oxrocksdb-sys --exclude oxigraph_js --exclude pyoxigraph --exclude oxigraph_testsuite --exclude oxigraph_server --exclude oxrdfxml --exclude oxttl --exclude oxrdfio --exclude sparopt test_linux: runs-on: ubuntu-latest diff --git a/Cargo.lock b/Cargo.lock index ab40b360..1e3337d5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -952,10 +952,9 @@ dependencies = [ "oxilangtag", "oxiri", "oxrdf", - "oxrdfxml", + "oxrdfio", "oxrocksdb-sys", "oxsdatatypes", - "oxttl", "rand", "regex", "sha-1", @@ -1036,6 +1035,16 @@ dependencies = [ "rand", ] +[[package]] +name = "oxrdfio" +version = "0.1.0-alpha.1-dev" +dependencies = [ + "oxrdf", + "oxrdfxml", + "oxttl", + "tokio", +] + [[package]] name = "oxrdfxml" version = "0.1.0-alpha.1-dev" diff --git a/Cargo.toml b/Cargo.toml index 0ad8536d..8b6c3e48 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,6 +3,7 @@ members = [ "js", "lib", "lib/oxrdf", + "lib/oxrdfio", "lib/oxrdfxml", "lib/oxsdatatypes", "lib/oxttl", diff --git a/lib/Cargo.toml b/lib/Cargo.toml index aef5f012..0cf839de 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -35,9 +35,8 @@ siphasher = "0.3" lazy_static = "1" json-event-parser = "0.1" oxrdf = { version = "0.2.0-alpha.1-dev", path = "oxrdf", features = ["rdf-star", "oxsdatatypes"] } -oxrdfxml = { version = "0.1.0-alpha.1-dev", path = "oxrdfxml" } oxsdatatypes = { version = "0.2.0-alpha.1-dev", path="oxsdatatypes" } -oxttl = { version = "0.1.0-alpha.1-dev" , path = "oxttl", features = ["rdf-star"] } +oxrdfio = { version = "0.1.0-alpha.1-dev" , path = "oxrdfio", features = ["rdf-star"] } spargebra = { version = "0.3.0-alpha.1-dev", path = "spargebra", features = ["rdf-star", "sep-0002", "sep-0006"] } sparopt = { version = "0.1.0-alpha.1-dev", path="sparopt", features = ["rdf-star", "sep-0002", "sep-0006"] } sparesults = { version = "0.2.0-alpha.1-dev", path = "sparesults", features = ["rdf-star"] } diff --git a/lib/oxrdfio/Cargo.toml b/lib/oxrdfio/Cargo.toml new file mode 100644 index 00000000..70f266b0 --- /dev/null +++ b/lib/oxrdfio/Cargo.toml @@ -0,0 +1,33 @@ +[package] +name = "oxrdfio" +version = "0.1.0-alpha.1-dev" +authors = ["Tpt "] +license = "MIT OR Apache-2.0" +readme = "README.md" +keywords = ["RDF"] +repository = "https://github.com/oxigraph/oxigraph/tree/master/lib/oxrdfxml" +homepage = "https://oxigraph.org/" +documentation = "https://docs.rs/oxrdfio" +description = """ +Parser for various RDF serializations +""" +edition = "2021" +rust-version = "1.65" + +[features] +default = [] +async-tokio = ["dep:tokio", "oxrdfxml/async-tokio", "oxttl/async-tokio"] +rdf-star = ["oxrdf/rdf-star", "oxttl/rdf-star"] + +[dependencies] +oxrdf = { version = "0.2.0-alpha.1-dev", path = "../oxrdf" } +oxrdfxml = { version = "0.1.0-alpha.1-dev", path = "../oxrdfxml" } +oxttl = { version = "0.1.0-alpha.1-dev" , path = "../oxttl" } +tokio = { version = "1", optional = true, features = ["io-util"] } + +[dev-dependencies] +tokio = { version = "1", features = ["rt", "macros"] } + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] diff --git a/lib/oxrdfio/README.md b/lib/oxrdfio/README.md new file mode 100644 index 00000000..1712b8ae --- /dev/null +++ b/lib/oxrdfio/README.md @@ -0,0 +1,61 @@ +OxRDF I/O +========= + +[![Latest Version](https://img.shields.io/crates/v/oxrdfio.svg)](https://crates.io/crates/oxrdfio) +[![Released API docs](https://docs.rs/oxrdfio/badge.svg)](https://docs.rs/oxrdfio) +[![Crates.io downloads](https://img.shields.io/crates/d/oxrdfio)](https://crates.io/crates/oxrdfio) +[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) +[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) + +OxRDF I/O is a set of parsers and serializers for RDF. + +It supports: +* [N3](https://w3c.github.io/N3/spec/) using [`oxttl`](https://crates.io/crates/oxttl) +* [N-Quads](https://www.w3.org/TR/n-quads/) using [`oxttl`](https://crates.io/crates/oxttl) +* [N-Triples](https://www.w3.org/TR/n-triples/) using [`oxttl`](https://crates.io/crates/oxttl) +* [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) using [`oxrdfxml`](https://crates.io/crates/oxrdfxml) +* [TriG](https://www.w3.org/TR/trig/) using [`oxttl`](https://crates.io/crates/oxttl) +* [Turtle](https://www.w3.org/TR/turtle/) using [`oxttl`](https://crates.io/crates/oxttl) + +Support for [SPARQL-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html) is also available behind the `rdf-star`feature for [Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star), [TriG-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#trig-star), [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star) and [N-Quads-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-quads-star). + +It is designed as a low level parser compatible with both synchronous and asynchronous I/O (behind the `async-tokio` feature). + +Usage example counting the number of people in a Turtle file: +```rust +use oxrdf::{NamedNodeRef, vocab::rdf}; +use oxrdfio::{RdfFormat, RdfParser}; + +let file = b"@base . +@prefix schema: . + a schema:Person ; + schema:name \"Foo\" . + a schema:Person ; + schema:name \"Bar\" ."; + +let schema_person = NamedNodeRef::new("http://schema.org/Person").unwrap(); +let mut count = 0; +for quad in RdfParser::from_format(RdfFormat::Turtle).parse_read(file.as_ref()) { + let quad = quad.unwrap(); + if quad.predicate == rdf::TYPE && quad.object == schema_person.into() { + count += 1; + } +} +assert_eq!(2, count); +``` + +## License + +This project is licensed under either of + +* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or + ``) +* MIT license ([LICENSE-MIT](../LICENSE-MIT) or + ``) + +at your option. + + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. diff --git a/lib/oxrdfio/src/error.rs b/lib/oxrdfio/src/error.rs new file mode 100644 index 00000000..ac8173a7 --- /dev/null +++ b/lib/oxrdfio/src/error.rs @@ -0,0 +1,148 @@ +use std::error::Error; +use std::{fmt, io}; + +/// Error returned during RDF format parsing. +#[derive(Debug)] +pub enum ParseError { + /// I/O error during parsing (file not found...). + Io(io::Error), + /// An error in the file syntax. + Syntax(SyntaxError), +} + +impl ParseError { + pub(crate) fn msg(msg: &'static str) -> Self { + Self::Syntax(SyntaxError { + inner: SyntaxErrorKind::Msg { msg }, + }) + } +} + +impl fmt::Display for ParseError { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Io(e) => e.fmt(f), + Self::Syntax(e) => e.fmt(f), + } + } +} + +impl Error for ParseError { + #[inline] + fn source(&self) -> Option<&(dyn Error + 'static)> { + match self { + Self::Io(e) => Some(e), + Self::Syntax(e) => Some(e), + } + } +} + +impl From for SyntaxError { + #[inline] + fn from(error: oxttl::SyntaxError) -> Self { + SyntaxError { + inner: SyntaxErrorKind::Turtle(error), + } + } +} + +impl From for ParseError { + #[inline] + fn from(error: oxttl::ParseError) -> Self { + match error { + oxttl::ParseError::Syntax(e) => Self::Syntax(e.into()), + oxttl::ParseError::Io(e) => Self::Io(e), + } + } +} + +impl From for SyntaxError { + #[inline] + fn from(error: oxrdfxml::SyntaxError) -> Self { + SyntaxError { + inner: SyntaxErrorKind::RdfXml(error), + } + } +} + +impl From for ParseError { + #[inline] + fn from(error: oxrdfxml::ParseError) -> Self { + match error { + oxrdfxml::ParseError::Syntax(e) => Self::Syntax(e.into()), + oxrdfxml::ParseError::Io(e) => Self::Io(e), + } + } +} + +impl From for ParseError { + #[inline] + fn from(error: io::Error) -> Self { + Self::Io(error) + } +} + +impl From for ParseError { + #[inline] + fn from(error: SyntaxError) -> Self { + Self::Syntax(error) + } +} + +impl From for io::Error { + #[inline] + fn from(error: ParseError) -> Self { + match error { + ParseError::Io(error) => error, + ParseError::Syntax(error) => error.into(), + } + } +} + +/// An error in the syntax of the parsed file. +#[derive(Debug)] +pub struct SyntaxError { + inner: SyntaxErrorKind, +} + +#[derive(Debug)] +enum SyntaxErrorKind { + Turtle(oxttl::SyntaxError), + RdfXml(oxrdfxml::SyntaxError), + + Msg { msg: &'static str }, +} + +impl fmt::Display for SyntaxError { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self.inner { + SyntaxErrorKind::Turtle(e) => e.fmt(f), + SyntaxErrorKind::RdfXml(e) => e.fmt(f), + SyntaxErrorKind::Msg { msg } => write!(f, "{msg}"), + } + } +} + +impl Error for SyntaxError { + #[inline] + fn source(&self) -> Option<&(dyn Error + 'static)> { + match &self.inner { + SyntaxErrorKind::Turtle(e) => Some(e), + SyntaxErrorKind::RdfXml(e) => Some(e), + SyntaxErrorKind::Msg { .. } => None, + } + } +} + +impl From for io::Error { + #[inline] + fn from(error: SyntaxError) -> Self { + match error.inner { + SyntaxErrorKind::Turtle(error) => error.into(), + SyntaxErrorKind::RdfXml(error) => error.into(), + SyntaxErrorKind::Msg { msg } => io::Error::new(io::ErrorKind::InvalidData, msg), + } + } +} diff --git a/lib/oxrdfio/src/format.rs b/lib/oxrdfio/src/format.rs new file mode 100644 index 00000000..8c4ce230 --- /dev/null +++ b/lib/oxrdfio/src/format.rs @@ -0,0 +1,203 @@ +use std::fmt; + +/// RDF serialization formats. +/// +/// This enumeration is non exhaustive. New formats like JSON-LD might be added in the future. +#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] +#[non_exhaustive] +pub enum RdfFormat { + /// [N3](https://w3c.github.io/N3/spec/) + N3, + /// [N-Quads](https://www.w3.org/TR/n-quads/) + NQuads, + /// [N-Triples](https://www.w3.org/TR/n-triples/) + NTriples, + /// [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) + RdfXml, + /// [TriG](https://www.w3.org/TR/trig/) + TriG, + /// [Turtle](https://www.w3.org/TR/turtle/) + Turtle, +} + +impl RdfFormat { + /// The format canonical IRI according to the [Unique URIs for file formats registry](https://www.w3.org/ns/formats/). + /// + /// ``` + /// use oxrdfio::RdfFormat; + /// + /// assert_eq!(RdfFormat::NTriples.iri(), "http://www.w3.org/ns/formats/N-Triples") + /// ``` + #[inline] + pub const fn iri(self) -> &'static str { + match self { + Self::N3 => "http://www.w3.org/ns/formats/N3", + Self::NQuads => "http://www.w3.org/ns/formats/N-Quads", + Self::NTriples => "http://www.w3.org/ns/formats/N-Triples", + Self::RdfXml => "http://www.w3.org/ns/formats/RDF_XML", + Self::TriG => "http://www.w3.org/ns/formats/TriG", + Self::Turtle => "http://www.w3.org/ns/formats/Turtle", + } + } + + /// The format [IANA media type](https://tools.ietf.org/html/rfc2046). + /// + /// ``` + /// use oxrdfio::RdfFormat; + /// + /// assert_eq!(RdfFormat::NTriples.media_type(), "application/n-triples") + /// ``` + #[inline] + pub const fn media_type(self) -> &'static str { + match self { + Self::N3 => "text/n3", + Self::NQuads => "application/n-quads", + Self::NTriples => "application/n-triples", + Self::RdfXml => "application/rdf+xml", + Self::TriG => "application/trig", + Self::Turtle => "text/turtle", + } + } + + /// The format [IANA-registered](https://tools.ietf.org/html/rfc2046) file extension. + /// + /// ``` + /// use oxrdfio::RdfFormat; + /// + /// assert_eq!(RdfFormat::NTriples.file_extension(), "nt") + /// ``` + #[inline] + pub const fn file_extension(self) -> &'static str { + match self { + Self::N3 => "n3", + Self::NQuads => "nq", + Self::NTriples => "nt", + Self::RdfXml => "rdf", + Self::TriG => "trig", + Self::Turtle => "ttl", + } + } + + /// The format name. + /// + /// ``` + /// use oxrdfio::RdfFormat; + /// + /// assert_eq!(RdfFormat::NTriples.name(), "N-Triples") + /// ``` + #[inline] + pub const fn name(self) -> &'static str { + match self { + Self::N3 => "N3", + Self::NQuads => "N-Quads", + Self::NTriples => "N-Triples", + Self::RdfXml => "RDF/XML", + Self::TriG => "TriG", + Self::Turtle => "Turtle", + } + } + + /// Checks if the formats supports [RDF datasets](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) and not only [RDF graphs](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph). + /// + /// ``` + /// use oxrdfio::RdfFormat; + /// + /// assert_eq!(RdfFormat::NTriples.supports_datasets(), false); + /// assert_eq!(RdfFormat::NQuads.supports_datasets(), true); + /// ``` + #[inline] + pub const fn supports_datasets(self) -> bool { + matches!(self, Self::NQuads | Self::TriG) + } + + /// Checks if the formats supports [RDF-star quoted triples](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#dfn-quoted). + /// + /// ``` + /// use oxrdfio::RdfFormat; + /// + /// assert_eq!(RdfFormat::NTriples.supports_rdf_star(), true); + /// assert_eq!(RdfFormat::RdfXml.supports_rdf_star(), false); + /// ``` + #[inline] + #[cfg(feature = "rdf-star")] + pub const fn supports_rdf_star(self) -> bool { + matches!( + self, + Self::NTriples | Self::NQuads | Self::Turtle | Self::TriG + ) + } + + /// Looks for a known format from a media type. + /// + /// It supports some media type aliases. + /// For example, "application/xml" is going to return `RdfFormat::RdfXml` even if it is not its canonical media type. + /// + /// Example: + /// ``` + /// use oxrdfio::RdfFormat; + /// + /// assert_eq!(RdfFormat::from_media_type("text/turtle; charset=utf-8"), Some(RdfFormat::Turtle)) + /// ``` + #[inline] + pub fn from_media_type(media_type: &str) -> Option { + const MEDIA_TYPES: [(&str, RdfFormat); 14] = [ + ("application/n-quads", RdfFormat::NQuads), + ("application/n-triples", RdfFormat::NTriples), + ("application/rdf+xml", RdfFormat::RdfXml), + ("application/trig", RdfFormat::TriG), + ("application/turtle", RdfFormat::Turtle), + ("application/xml", RdfFormat::RdfXml), + ("application/x-trig", RdfFormat::TriG), + ("application/x-turtle", RdfFormat::Turtle), + ("text/n3", RdfFormat::N3), + ("text/nquads", RdfFormat::NQuads), + ("text/plain", RdfFormat::NTriples), + ("text/turtle", RdfFormat::Turtle), + ("text/xml", RdfFormat::RdfXml), + ("text/x-nquads", RdfFormat::NQuads), + ]; + let media_type = media_type.split(';').next()?.trim(); + for (candidate_media_type, candidate_id) in MEDIA_TYPES { + if candidate_media_type.eq_ignore_ascii_case(media_type) { + return Some(candidate_id); + } + } + None + } + + /// Looks for a known format from an extension. + /// + /// It supports some aliases. + /// + /// Example: + /// ``` + /// use oxrdfio::RdfFormat; + /// + /// assert_eq!(RdfFormat::from_extension("nt"), Some(RdfFormat::NTriples)) + /// ``` + #[inline] + pub fn from_extension(extension: &str) -> Option { + const MEDIA_TYPES: [(&str, RdfFormat); 8] = [ + ("n3", RdfFormat::N3), + ("nq", RdfFormat::NQuads), + ("nt", RdfFormat::NTriples), + ("rdf", RdfFormat::RdfXml), + ("trig", RdfFormat::TriG), + ("ttl", RdfFormat::Turtle), + ("txt", RdfFormat::NTriples), + ("xml", RdfFormat::RdfXml), + ]; + for (candidate_extension, candidate_id) in MEDIA_TYPES { + if candidate_extension.eq_ignore_ascii_case(extension) { + return Some(candidate_id); + } + } + None + } +} + +impl fmt::Display for RdfFormat { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(self.name()) + } +} diff --git a/lib/oxrdfio/src/lib.rs b/lib/oxrdfio/src/lib.rs new file mode 100644 index 00000000..d31ec656 --- /dev/null +++ b/lib/oxrdfio/src/lib.rs @@ -0,0 +1,19 @@ +#![doc = include_str!("../README.md")] +#![doc(test(attr(deny(warnings))))] +#![cfg_attr(docsrs, feature(doc_auto_cfg))] +#![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] +#![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] + +mod error; +mod format; +mod parser; +mod serializer; + +pub use error::{ParseError, SyntaxError}; +pub use format::RdfFormat; +#[cfg(feature = "async-tokio")] +pub use parser::FromTokioAsyncReadQuadReader; +pub use parser::{FromReadQuadReader, RdfParser}; +#[cfg(feature = "async-tokio")] +pub use serializer::ToTokioAsyncWriteQuadWriter; +pub use serializer::{RdfSerializer, ToWriteQuadWriter}; diff --git a/lib/oxrdfio/src/parser.rs b/lib/oxrdfio/src/parser.rs new file mode 100644 index 00000000..b0e4a419 --- /dev/null +++ b/lib/oxrdfio/src/parser.rs @@ -0,0 +1,577 @@ +//! Utilities to read RDF graphs and datasets. + +pub use crate::error::{ParseError, SyntaxError}; +use crate::format::RdfFormat; +use oxrdf::{BlankNode, GraphName, IriParseError, Quad, Subject, Term, Triple}; +#[cfg(feature = "async-tokio")] +use oxrdfxml::FromTokioAsyncReadRdfXmlReader; +use oxrdfxml::{FromReadRdfXmlReader, RdfXmlParser}; +#[cfg(feature = "async-tokio")] +use oxttl::n3::FromTokioAsyncReadN3Reader; +use oxttl::n3::{FromReadN3Reader, N3Parser, N3Quad, N3Term}; +#[cfg(feature = "async-tokio")] +use oxttl::nquads::FromTokioAsyncReadNQuadsReader; +use oxttl::nquads::{FromReadNQuadsReader, NQuadsParser}; +#[cfg(feature = "async-tokio")] +use oxttl::ntriples::FromTokioAsyncReadNTriplesReader; +use oxttl::ntriples::{FromReadNTriplesReader, NTriplesParser}; +#[cfg(feature = "async-tokio")] +use oxttl::trig::FromTokioAsyncReadTriGReader; +use oxttl::trig::{FromReadTriGReader, TriGParser}; +#[cfg(feature = "async-tokio")] +use oxttl::turtle::FromTokioAsyncReadTurtleReader; +use oxttl::turtle::{FromReadTurtleReader, TurtleParser}; +use std::collections::HashMap; +use std::io::Read; +#[cfg(feature = "async-tokio")] +use tokio::io::AsyncRead; + +/// Parsers for RDF serialization formats. +/// +/// It currently supports the following formats: +/// * [N3](https://w3c.github.io/N3/spec/) ([`RdfFormat::N3`]) +/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`RdfFormat::NQuads`]) +/// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`RdfFormat::NTriples`]) +/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`RdfFormat::RdfXml`]) +/// * [TriG](https://www.w3.org/TR/trig/) ([`RdfFormat::TriG`]) +/// * [Turtle](https://www.w3.org/TR/turtle/) ([`RdfFormat::Turtle`]) +/// +/// Note the useful options: +/// - [`with_base_iri`](RdfParser::with_base_iri) to resolve the relative IRIs. +/// - [`rename_blank_nodes`](RdfParser::rename_blank_nodes) to rename the blank nodes to auto-generated numbers to avoid conflicts when merging RDF graphs together. +/// - [`without_named_graphs`](RdfParser::without_named_graphs) to parse a single graph. +/// +/// ``` +/// use oxrdfio::{RdfFormat, RdfParser}; +/// +/// let file = " ."; +/// +/// let parser = RdfParser::from_format(RdfFormat::NTriples); +/// let quads = parser.parse_read(file.as_bytes()).collect::,_>>()?; +/// +/// assert_eq!(quads.len(), 1); +/// assert_eq!(quads[0].subject.to_string(), ""); +/// # std::io::Result::Ok(()) +/// ``` +pub struct RdfParser { + inner: RdfParserKind, + default_graph: GraphName, + without_named_graphs: bool, + rename_blank_nodes: bool, +} + +enum RdfParserKind { + N3(N3Parser), + NQuads(NQuadsParser), + NTriples(NTriplesParser), + RdfXml(RdfXmlParser), + TriG(TriGParser), + Turtle(TurtleParser), +} + +impl RdfParser { + /// Builds a parser for the given format. + #[inline] + #[must_use] + pub fn from_format(format: RdfFormat) -> Self { + Self { + inner: match format { + RdfFormat::N3 => RdfParserKind::N3(N3Parser::new()), + RdfFormat::NQuads => RdfParserKind::NQuads({ + #[cfg(feature = "rdf-star")] + { + NQuadsParser::new().with_quoted_triples() + } + #[cfg(not(feature = "rdf-star"))] + { + NQuadsParser::new() + } + }), + RdfFormat::NTriples => RdfParserKind::NTriples({ + #[cfg(feature = "rdf-star")] + { + NTriplesParser::new().with_quoted_triples() + } + #[cfg(not(feature = "rdf-star"))] + { + NTriplesParser::new() + } + }), + RdfFormat::RdfXml => RdfParserKind::RdfXml(RdfXmlParser::new()), + RdfFormat::TriG => RdfParserKind::TriG({ + #[cfg(feature = "rdf-star")] + { + TriGParser::new().with_quoted_triples() + } + #[cfg(not(feature = "rdf-star"))] + { + TriGParser::new() + } + }), + RdfFormat::Turtle => RdfParserKind::Turtle({ + #[cfg(feature = "rdf-star")] + { + TurtleParser::new().with_quoted_triples() + } + #[cfg(not(feature = "rdf-star"))] + { + TurtleParser::new() + } + }), + }, + default_graph: GraphName::DefaultGraph, + without_named_graphs: false, + rename_blank_nodes: false, + } + } + + /// Provides an IRI that could be used to resolve the file relative IRIs. + /// + /// ``` + /// use oxrdfio::{RdfFormat, RdfParser}; + /// + /// let file = "

."; + /// + /// let parser = RdfParser::from_format(RdfFormat::Turtle).with_base_iri("http://example.com")?; + /// let quads = parser.parse_read(file.as_bytes()).collect::,_>>()?; + /// + /// assert_eq!(quads.len(), 1); + /// assert_eq!(quads[0].subject.to_string(), ""); + /// # Result::<_,Box>::Ok(()) + /// ``` + #[inline] + pub fn with_base_iri(self, base_iri: impl Into) -> Result { + Ok(Self { + inner: match self.inner { + RdfParserKind::N3(p) => RdfParserKind::N3(p), + RdfParserKind::NTriples(p) => RdfParserKind::NTriples(p), + RdfParserKind::NQuads(p) => RdfParserKind::NQuads(p), + RdfParserKind::RdfXml(p) => RdfParserKind::RdfXml(p.with_base_iri(base_iri)?), + RdfParserKind::TriG(p) => RdfParserKind::TriG(p.with_base_iri(base_iri)?), + RdfParserKind::Turtle(p) => RdfParserKind::Turtle(p.with_base_iri(base_iri)?), + }, + default_graph: self.default_graph, + without_named_graphs: self.without_named_graphs, + rename_blank_nodes: self.rename_blank_nodes, + }) + } + + /// Provides the name graph name that should replace the default graph in the returned quads. + /// + /// ``` + /// use oxrdf::NamedNode; + /// use oxrdfio::{RdfFormat, RdfParser}; + /// + /// let file = " ."; + /// + /// let parser = RdfParser::from_format(RdfFormat::Turtle).with_default_graph(NamedNode::new("http://example.com/g")?); + /// let quads = parser.parse_read(file.as_bytes()).collect::,_>>()?; + /// + /// assert_eq!(quads.len(), 1); + /// assert_eq!(quads[0].graph_name.to_string(), ""); + /// # Result::<_,Box>::Ok(()) + /// ``` + #[inline] + #[must_use] + pub fn with_default_graph(self, default_graph: impl Into) -> Self { + Self { + inner: self.inner, + default_graph: default_graph.into(), + without_named_graphs: self.without_named_graphs, + rename_blank_nodes: self.rename_blank_nodes, + } + } + + /// Sets that the parser must fail if parsing a named graph. + /// + /// This function restricts the parser to only parse a single [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) and not an [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset). + /// + /// ``` + /// use oxrdfio::{RdfFormat, RdfParser}; + /// + /// let file = " ."; + /// + /// let parser = RdfParser::from_format(RdfFormat::NQuads).without_named_graphs(); + /// assert!(parser.parse_read(file.as_bytes()).next().unwrap().is_err()); + /// ``` + #[inline] + #[must_use] + pub fn without_named_graphs(self) -> Self { + Self { + inner: self.inner, + default_graph: self.default_graph, + without_named_graphs: true, + rename_blank_nodes: self.rename_blank_nodes, + } + } + + /// Renames the blank nodes ids from the ones set in the serialization to random ids. + /// + /// This allows to avoid id conflicts when merging graphs together. + /// + /// ``` + /// use oxrdfio::{RdfFormat, RdfParser}; + /// + /// let file = "_:a ."; + /// + /// let parser = RdfParser::from_format(RdfFormat::NQuads).rename_blank_nodes(); + /// let result1 = parser.parse_read(file.as_bytes()).collect::,_>>()?; + /// let result2 = parser.parse_read(file.as_bytes()).collect::,_>>()?; + /// assert_ne!(result1, result2); + /// # Result::<_,Box>::Ok(()) + /// ``` + #[inline] + #[must_use] + pub fn rename_blank_nodes(self) -> Self { + Self { + inner: self.inner, + default_graph: self.default_graph, + without_named_graphs: self.without_named_graphs, + rename_blank_nodes: true, + } + } + + /// Parses from a [`Read`] implementation and returns an iterator of quads. + /// + /// Reads are buffered. + /// + /// ``` + /// use oxrdfio::{RdfFormat, RdfParser}; + /// + /// let file = " ."; + /// + /// let parser = RdfParser::from_format(RdfFormat::NTriples); + /// let quads = parser.parse_read(file.as_bytes()).collect::,_>>()?; + /// + /// assert_eq!(quads.len(), 1); + /// assert_eq!(quads[0].subject.to_string(), ""); + /// # std::io::Result::Ok(()) + /// ``` + pub fn parse_read(&self, reader: R) -> FromReadQuadReader { + FromReadQuadReader { + parser: match &self.inner { + RdfParserKind::N3(p) => FromReadQuadReaderKind::N3(p.parse_read(reader)), + RdfParserKind::NQuads(p) => FromReadQuadReaderKind::NQuads(p.parse_read(reader)), + RdfParserKind::NTriples(p) => { + FromReadQuadReaderKind::NTriples(p.parse_read(reader)) + } + RdfParserKind::RdfXml(p) => FromReadQuadReaderKind::RdfXml(p.parse_read(reader)), + RdfParserKind::TriG(p) => FromReadQuadReaderKind::TriG(p.parse_read(reader)), + RdfParserKind::Turtle(p) => FromReadQuadReaderKind::Turtle(p.parse_read(reader)), + }, + mapper: QuadMapper { + default_graph: self.default_graph.clone(), + without_named_graphs: self.without_named_graphs, + blank_node_map: self.rename_blank_nodes.then(HashMap::new), + }, + } + } + + /// Parses from a Tokio [`AsyncRead`] implementation and returns an async iterator of quads. + /// + /// Reads are buffered. + /// + /// ``` + /// use oxrdfio::{RdfFormat, RdfParser, ParseError}; + /// + /// #[tokio::main(flavor = "current_thread")] + /// async fn main() -> Result<(), ParseError> { + /// let file = " ."; + /// + /// let parser = RdfParser::from_format(RdfFormat::NTriples); + /// let mut reader = parser.parse_tokio_async_read(file.as_bytes()); + /// if let Some(quad) = reader.next().await { + /// assert_eq!(quad?.subject.to_string(), ""); + /// } + /// Ok(()) + /// } + /// ``` + #[cfg(feature = "async-tokio")] + pub fn parse_tokio_async_read( + &self, + reader: R, + ) -> FromTokioAsyncReadQuadReader { + FromTokioAsyncReadQuadReader { + parser: match &self.inner { + RdfParserKind::N3(p) => { + FromTokioAsyncReadQuadReaderKind::N3(p.parse_tokio_async_read(reader)) + } + RdfParserKind::NQuads(p) => { + FromTokioAsyncReadQuadReaderKind::NQuads(p.parse_tokio_async_read(reader)) + } + RdfParserKind::NTriples(p) => { + FromTokioAsyncReadQuadReaderKind::NTriples(p.parse_tokio_async_read(reader)) + } + RdfParserKind::RdfXml(p) => { + FromTokioAsyncReadQuadReaderKind::RdfXml(p.parse_tokio_async_read(reader)) + } + RdfParserKind::TriG(p) => { + FromTokioAsyncReadQuadReaderKind::TriG(p.parse_tokio_async_read(reader)) + } + RdfParserKind::Turtle(p) => { + FromTokioAsyncReadQuadReaderKind::Turtle(p.parse_tokio_async_read(reader)) + } + }, + mapper: QuadMapper { + default_graph: self.default_graph.clone(), + without_named_graphs: self.without_named_graphs, + blank_node_map: self.rename_blank_nodes.then(HashMap::new), + }, + } + } +} + +/// Parses a RDF file from a [`Read`] implementation. Can be built using [`RdfParser::parse_read`]. +/// +/// Reads are buffered. +/// +/// ``` +/// use oxrdfio::{RdfFormat, RdfParser}; +/// +/// let file = " ."; +/// +/// let parser = RdfParser::from_format(RdfFormat::NTriples); +/// let quads = parser.parse_read(file.as_bytes()).collect::,_>>()?; +/// +/// assert_eq!(quads.len(), 1); +/// assert_eq!(quads[0].subject.to_string(), ""); +/// # std::io::Result::Ok(()) +/// ``` +#[must_use] +pub struct FromReadQuadReader { + parser: FromReadQuadReaderKind, + mapper: QuadMapper, +} + +enum FromReadQuadReaderKind { + N3(FromReadN3Reader), + NQuads(FromReadNQuadsReader), + NTriples(FromReadNTriplesReader), + RdfXml(FromReadRdfXmlReader), + TriG(FromReadTriGReader), + Turtle(FromReadTurtleReader), +} + +impl Iterator for FromReadQuadReader { + type Item = Result; + + fn next(&mut self) -> Option> { + Some(match &mut self.parser { + FromReadQuadReaderKind::N3(parser) => match parser.next()? { + Ok(quad) => self.mapper.map_n3_quad(quad), + Err(e) => Err(e.into()), + }, + FromReadQuadReaderKind::NQuads(parser) => match parser.next()? { + Ok(quad) => self.mapper.map_quad(quad), + Err(e) => Err(e.into()), + }, + FromReadQuadReaderKind::NTriples(parser) => match parser.next()? { + Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), + Err(e) => Err(e.into()), + }, + FromReadQuadReaderKind::RdfXml(parser) => match parser.next()? { + Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), + Err(e) => Err(e.into()), + }, + FromReadQuadReaderKind::TriG(parser) => match parser.next()? { + Ok(quad) => self.mapper.map_quad(quad), + Err(e) => Err(e.into()), + }, + FromReadQuadReaderKind::Turtle(parser) => match parser.next()? { + Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), + Err(e) => Err(e.into()), + }, + }) + } +} + +/// Parses a RDF file from a Tokio [`AsyncRead`] implementation. Can be built using [`RdfParser::parse_tokio_async_read`]. +/// +/// Reads are buffered. +/// +/// ``` +/// use oxrdfio::{RdfFormat, RdfParser, ParseError}; +/// +/// #[tokio::main(flavor = "current_thread")] +/// async fn main() -> Result<(), ParseError> { +/// let file = " ."; +/// +/// let parser = RdfParser::from_format(RdfFormat::NTriples); +/// let mut reader = parser.parse_tokio_async_read(file.as_bytes()); +/// if let Some(quad) = reader.next().await { +/// assert_eq!(quad?.subject.to_string(), ""); +/// } +/// Ok(()) +/// } +/// ``` +#[must_use] +#[cfg(feature = "async-tokio")] +pub struct FromTokioAsyncReadQuadReader { + parser: FromTokioAsyncReadQuadReaderKind, + mapper: QuadMapper, +} + +#[cfg(feature = "async-tokio")] +enum FromTokioAsyncReadQuadReaderKind { + N3(FromTokioAsyncReadN3Reader), + NQuads(FromTokioAsyncReadNQuadsReader), + NTriples(FromTokioAsyncReadNTriplesReader), + RdfXml(FromTokioAsyncReadRdfXmlReader), + TriG(FromTokioAsyncReadTriGReader), + Turtle(FromTokioAsyncReadTurtleReader), +} + +#[cfg(feature = "async-tokio")] +impl FromTokioAsyncReadQuadReader { + pub async fn next(&mut self) -> Option> { + Some(match &mut self.parser { + FromTokioAsyncReadQuadReaderKind::N3(parser) => match parser.next().await? { + Ok(quad) => self.mapper.map_n3_quad(quad), + Err(e) => Err(e.into()), + }, + FromTokioAsyncReadQuadReaderKind::NQuads(parser) => match parser.next().await? { + Ok(quad) => self.mapper.map_quad(quad), + Err(e) => Err(e.into()), + }, + FromTokioAsyncReadQuadReaderKind::NTriples(parser) => match parser.next().await? { + Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), + Err(e) => Err(e.into()), + }, + FromTokioAsyncReadQuadReaderKind::RdfXml(parser) => match parser.next().await? { + Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), + Err(e) => Err(e.into()), + }, + FromTokioAsyncReadQuadReaderKind::TriG(parser) => match parser.next().await? { + Ok(quad) => self.mapper.map_quad(quad), + Err(e) => Err(e.into()), + }, + FromTokioAsyncReadQuadReaderKind::Turtle(parser) => match parser.next().await? { + Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), + Err(e) => Err(e.into()), + }, + }) + } +} + +struct QuadMapper { + default_graph: GraphName, + without_named_graphs: bool, + blank_node_map: Option>, +} + +impl QuadMapper { + fn map_blank_node(&mut self, node: BlankNode) -> BlankNode { + if let Some(blank_node_map) = &mut self.blank_node_map { + blank_node_map + .entry(node) + .or_insert_with(BlankNode::default) + .clone() + } else { + node + } + } + + fn map_subject(&mut self, node: Subject) -> Subject { + match node { + Subject::NamedNode(node) => node.into(), + Subject::BlankNode(node) => self.map_blank_node(node).into(), + #[cfg(feature = "rdf-star")] + Subject::Triple(triple) => self.map_triple(*triple).into(), + } + } + + fn map_term(&mut self, node: Term) -> Term { + match node { + Term::NamedNode(node) => node.into(), + Term::BlankNode(node) => self.map_blank_node(node).into(), + Term::Literal(literal) => literal.into(), + #[cfg(feature = "rdf-star")] + Term::Triple(triple) => self.map_triple(*triple).into(), + } + } + + fn map_triple(&mut self, triple: Triple) -> Triple { + Triple { + subject: self.map_subject(triple.subject), + predicate: triple.predicate, + object: self.map_term(triple.object), + } + } + + fn map_graph_name(&mut self, graph_name: GraphName) -> Result { + match graph_name { + GraphName::NamedNode(node) => { + if self.without_named_graphs { + Err(ParseError::msg("Named graphs are not allowed")) + } else { + Ok(node.into()) + } + } + GraphName::BlankNode(node) => { + if self.without_named_graphs { + Err(ParseError::msg("Named graphs are not allowed")) + } else { + Ok(self.map_blank_node(node).into()) + } + } + GraphName::DefaultGraph => Ok(self.default_graph.clone()), + } + } + + fn map_quad(&mut self, quad: Quad) -> Result { + Ok(Quad { + subject: self.map_subject(quad.subject), + predicate: quad.predicate, + object: self.map_term(quad.object), + graph_name: self.map_graph_name(quad.graph_name)?, + }) + } + + fn map_triple_to_quad(&mut self, triple: Triple) -> Quad { + self.map_triple(triple).in_graph(self.default_graph.clone()) + } + + fn map_n3_quad(&mut self, quad: N3Quad) -> Result { + Ok(Quad { + subject: match quad.subject { + N3Term::NamedNode(s) => Ok(s.into()), + N3Term::BlankNode(s) => Ok(self.map_blank_node(s).into()), + N3Term::Literal(_) => Err(ParseError::msg( + "literals are not allowed in regular RDF subjects", + )), + #[cfg(feature = "rdf-star")] + N3Term::Triple(s) => Ok(self.map_triple(*s).into()), + N3Term::Variable(_) => Err(ParseError::msg( + "variables are not allowed in regular RDF subjects", + )), + }?, + predicate: match quad.predicate { + N3Term::NamedNode(p) => Ok(p), + N3Term::BlankNode(_) => Err(ParseError::msg( + "blank nodes are not allowed in regular RDF predicates", + )), + N3Term::Literal(_) => Err(ParseError::msg( + "literals are not allowed in regular RDF predicates", + )), + #[cfg(feature = "rdf-star")] + N3Term::Triple(_) => Err(ParseError::msg( + "quoted triples are not allowed in regular RDF predicates", + )), + N3Term::Variable(_) => Err(ParseError::msg( + "variables are not allowed in regular RDF predicates", + )), + }?, + object: match quad.object { + N3Term::NamedNode(o) => Ok(o.into()), + N3Term::BlankNode(o) => Ok(self.map_blank_node(o).into()), + N3Term::Literal(o) => Ok(o.into()), + #[cfg(feature = "rdf-star")] + N3Term::Triple(o) => Ok(self.map_triple(*o).into()), + N3Term::Variable(_) => Err(ParseError::msg( + "variables are not allowed in regular RDF objects", + )), + }?, + graph_name: self.map_graph_name(quad.graph_name)?, + }) + } +} diff --git a/lib/oxrdfio/src/serializer.rs b/lib/oxrdfio/src/serializer.rs new file mode 100644 index 00000000..35a9f29e --- /dev/null +++ b/lib/oxrdfio/src/serializer.rs @@ -0,0 +1,322 @@ +//! Utilities to write RDF graphs and datasets. + +use crate::format::RdfFormat; +use oxrdf::{GraphNameRef, QuadRef, TripleRef}; +#[cfg(feature = "async-tokio")] +use oxrdfxml::ToTokioAsyncWriteRdfXmlWriter; +use oxrdfxml::{RdfXmlSerializer, ToWriteRdfXmlWriter}; +#[cfg(feature = "async-tokio")] +use oxttl::nquads::ToTokioAsyncWriteNQuadsWriter; +use oxttl::nquads::{NQuadsSerializer, ToWriteNQuadsWriter}; +#[cfg(feature = "async-tokio")] +use oxttl::ntriples::ToTokioAsyncWriteNTriplesWriter; +use oxttl::ntriples::{NTriplesSerializer, ToWriteNTriplesWriter}; +#[cfg(feature = "async-tokio")] +use oxttl::trig::ToTokioAsyncWriteTriGWriter; +use oxttl::trig::{ToWriteTriGWriter, TriGSerializer}; +#[cfg(feature = "async-tokio")] +use oxttl::turtle::ToTokioAsyncWriteTurtleWriter; +use oxttl::turtle::{ToWriteTurtleWriter, TurtleSerializer}; +use std::io::{self, Write}; +#[cfg(feature = "async-tokio")] +use tokio::io::{AsyncWrite, AsyncWriteExt}; + +/// A serializer for RDF serialization formats. +/// +/// It currently supports the following formats: +/// * [N3](https://w3c.github.io/N3/spec/) ([`RdfFormat::N3`]) +/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`RdfFormat::NQuads`]) +/// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`RdfFormat::NTriples`]) +/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`RdfFormat::RdfXml`]) +/// * [TriG](https://www.w3.org/TR/trig/) ([`RdfFormat::TriG`]) +/// * [Turtle](https://www.w3.org/TR/turtle/) ([`RdfFormat::Turtle`]) +/// +/// ``` +/// use oxrdfio::{RdfFormat, RdfSerializer}; +/// use oxrdf::{Quad, NamedNode}; +/// +/// let mut buffer = Vec::new(); +/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(&mut buffer); +/// writer.write_quad(&Quad { +/// subject: NamedNode::new("http://example.com/s")?.into(), +/// predicate: NamedNode::new("http://example.com/p")?, +/// object: NamedNode::new("http://example.com/o")?.into(), +/// graph_name: NamedNode::new("http://example.com/g")?.into() +/// })?; +/// writer.finish()?; +/// +/// assert_eq!(buffer.as_slice(), " .\n".as_bytes()); +/// # Result::<_,Box>::Ok(()) +/// ``` +pub struct RdfSerializer { + format: RdfFormat, +} + +impl RdfSerializer { + /// Builds a serializer for the given format + #[inline] + pub fn from_format(format: RdfFormat) -> Self { + Self { format } + } + + /// Writes to a [`Write`] implementation. + /// + /// Warning: Do not forget to run the [`finish`](ToWriteQuadWriter::finish()) method to properly write the last bytes of the file. + /// + /// Warning: This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that. + /// + /// ``` + /// use oxrdfio::{RdfFormat, RdfSerializer}; + /// use oxrdf::{Quad, NamedNode}; + /// + /// let mut buffer = Vec::new(); + /// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(&mut buffer); + /// writer.write_quad(&Quad { + /// subject: NamedNode::new("http://example.com/s")?.into(), + /// predicate: NamedNode::new("http://example.com/p")?, + /// object: NamedNode::new("http://example.com/o")?.into(), + /// graph_name: NamedNode::new("http://example.com/g")?.into() + /// })?; + /// writer.finish()?; + /// + /// assert_eq!(buffer.as_slice(), " .\n".as_bytes()); + /// # Result::<_,Box>::Ok(()) + /// ``` + pub fn serialize_to_write(&self, writer: W) -> ToWriteQuadWriter { + ToWriteQuadWriter { + formatter: match self.format { + RdfFormat::NQuads => ToWriteQuadWriterKind::NQuads( + NQuadsSerializer::new().serialize_to_write(writer), + ), + RdfFormat::NTriples => ToWriteQuadWriterKind::NTriples( + NTriplesSerializer::new().serialize_to_write(writer), + ), + RdfFormat::RdfXml => ToWriteQuadWriterKind::RdfXml( + RdfXmlSerializer::new().serialize_to_write(writer), + ), + RdfFormat::TriG => { + ToWriteQuadWriterKind::TriG(TriGSerializer::new().serialize_to_write(writer)) + } + RdfFormat::Turtle | RdfFormat::N3 => ToWriteQuadWriterKind::Turtle( + TurtleSerializer::new().serialize_to_write(writer), + ), + }, + } + } + + /// Writes to a Tokio [`AsyncWrite`] implementation. + /// + /// Warning: Do not forget to run the [`finish`](ToTokioAsyncWriteQuadWriter::finish()) method to properly write the last bytes of the file. + /// + /// Warning: This writer does unbuffered writes. You might want to use [`BufWriter`](tokio::io::BufWriter) to avoid that. + /// + /// ``` + /// use oxrdfio::{RdfFormat, RdfSerializer}; + /// use oxrdf::{Quad, NamedNode}; + /// use std::io; + /// + /// #[tokio::main(flavor = "current_thread")] + /// async fn main() -> io::Result<()> { + /// let mut buffer = Vec::new(); + /// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_tokio_async_write(&mut buffer); + /// writer.write_quad(&Quad { + /// subject: NamedNode::new_unchecked("http://example.com/s").into(), + /// predicate: NamedNode::new_unchecked("http://example.com/p"), + /// object: NamedNode::new_unchecked("http://example.com/o").into(), + /// graph_name: NamedNode::new_unchecked("http://example.com/g").into() + /// }).await?; + /// writer.finish().await?; + /// + /// assert_eq!(buffer.as_slice(), " .\n".as_bytes()); + /// Ok(()) + /// } + /// ``` + #[cfg(feature = "async-tokio")] + pub fn serialize_to_tokio_async_write( + &self, + writer: W, + ) -> ToTokioAsyncWriteQuadWriter { + ToTokioAsyncWriteQuadWriter { + formatter: match self.format { + RdfFormat::NQuads => ToTokioAsyncWriteQuadWriterKind::NQuads( + NQuadsSerializer::new().serialize_to_tokio_async_write(writer), + ), + RdfFormat::NTriples => ToTokioAsyncWriteQuadWriterKind::NTriples( + NTriplesSerializer::new().serialize_to_tokio_async_write(writer), + ), + RdfFormat::RdfXml => ToTokioAsyncWriteQuadWriterKind::RdfXml( + RdfXmlSerializer::new().serialize_to_tokio_async_write(writer), + ), + RdfFormat::TriG => ToTokioAsyncWriteQuadWriterKind::TriG( + TriGSerializer::new().serialize_to_tokio_async_write(writer), + ), + RdfFormat::Turtle | RdfFormat::N3 => ToTokioAsyncWriteQuadWriterKind::Turtle( + TurtleSerializer::new().serialize_to_tokio_async_write(writer), + ), + }, + } + } +} + +/// Writes quads or triples to a [`Write`] implementation. +/// +/// Can be built using [`RdfSerializer::serialize_to_write`]. +/// +/// Warning: Do not forget to run the [`finish`](ToWriteQuadWriter::finish()) method to properly write the last bytes of the file. +/// +/// Warning: This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that. +/// +/// ``` +/// use oxrdfio::{RdfFormat, RdfSerializer}; +/// use oxrdf::{Quad, NamedNode}; +/// +/// let mut buffer = Vec::new(); +/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(&mut buffer); +/// writer.write_quad(&Quad { +/// subject: NamedNode::new("http://example.com/s")?.into(), +/// predicate: NamedNode::new("http://example.com/p")?, +/// object: NamedNode::new("http://example.com/o")?.into(), +/// graph_name: NamedNode::new("http://example.com/g")?.into(), +/// })?; +/// writer.finish()?; +/// +/// assert_eq!(buffer.as_slice(), " .\n".as_bytes()); +/// # Result::<_,Box>::Ok(()) +/// ``` +#[must_use] +pub struct ToWriteQuadWriter { + formatter: ToWriteQuadWriterKind, +} + +enum ToWriteQuadWriterKind { + NQuads(ToWriteNQuadsWriter), + NTriples(ToWriteNTriplesWriter), + RdfXml(ToWriteRdfXmlWriter), + TriG(ToWriteTriGWriter), + Turtle(ToWriteTurtleWriter), +} + +impl ToWriteQuadWriter { + /// Writes a [`QuadRef`] + pub fn write_quad<'a>(&mut self, quad: impl Into>) -> io::Result<()> { + match &mut self.formatter { + ToWriteQuadWriterKind::NQuads(writer) => writer.write_quad(quad), + ToWriteQuadWriterKind::NTriples(writer) => writer.write_triple(to_triple(quad)?), + ToWriteQuadWriterKind::RdfXml(writer) => writer.write_triple(to_triple(quad)?), + ToWriteQuadWriterKind::TriG(writer) => writer.write_quad(quad), + ToWriteQuadWriterKind::Turtle(writer) => writer.write_triple(to_triple(quad)?), + } + } + + /// Writes a [`TripleRef`] + pub fn write_triple<'a>(&mut self, triple: impl Into>) -> io::Result<()> { + self.write_quad(triple.into().in_graph(GraphNameRef::DefaultGraph)) + } + + /// Writes the last bytes of the file + pub fn finish(self) -> io::Result<()> { + match self.formatter { + ToWriteQuadWriterKind::NQuads(writer) => writer.finish(), + ToWriteQuadWriterKind::NTriples(writer) => writer.finish(), + ToWriteQuadWriterKind::RdfXml(writer) => writer.finish()?, + ToWriteQuadWriterKind::TriG(writer) => writer.finish()?, + ToWriteQuadWriterKind::Turtle(writer) => writer.finish()?, + } + .flush() + } +} + +/// Writes quads or triples to a [`Write`] implementation. +/// +/// Can be built using [`RdfSerializer::serialize_to_write`]. +/// +/// Warning: Do not forget to run the [`finish`](ToWriteQuadWriter::finish()) method to properly write the last bytes of the file. +/// +/// Warning: This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that. +/// +/// ``` +/// use oxrdfio::{RdfFormat, RdfSerializer}; +/// use oxrdf::{Quad, NamedNode}; +/// use std::io; +/// +/// #[tokio::main(flavor = "current_thread")] +/// async fn main() -> io::Result<()> { +/// let mut buffer = Vec::new(); +/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_tokio_async_write(&mut buffer); +/// writer.write_quad(&Quad { +/// subject: NamedNode::new_unchecked("http://example.com/s").into(), +/// predicate: NamedNode::new_unchecked("http://example.com/p"), +/// object: NamedNode::new_unchecked("http://example.com/o").into(), +/// graph_name: NamedNode::new_unchecked("http://example.com/g").into() +/// }).await?; +/// writer.finish().await?; +/// +/// assert_eq!(buffer.as_slice(), " .\n".as_bytes()); +/// Ok(()) +/// } +/// ``` +#[must_use] +#[cfg(feature = "async-tokio")] +pub struct ToTokioAsyncWriteQuadWriter { + formatter: ToTokioAsyncWriteQuadWriterKind, +} + +#[cfg(feature = "async-tokio")] +enum ToTokioAsyncWriteQuadWriterKind { + NQuads(ToTokioAsyncWriteNQuadsWriter), + NTriples(ToTokioAsyncWriteNTriplesWriter), + RdfXml(ToTokioAsyncWriteRdfXmlWriter), + TriG(ToTokioAsyncWriteTriGWriter), + Turtle(ToTokioAsyncWriteTurtleWriter), +} + +#[cfg(feature = "async-tokio")] +impl ToTokioAsyncWriteQuadWriter { + /// Writes a [`QuadRef`] + pub async fn write_quad<'a>(&mut self, quad: impl Into>) -> io::Result<()> { + match &mut self.formatter { + ToTokioAsyncWriteQuadWriterKind::NQuads(writer) => writer.write_quad(quad).await, + ToTokioAsyncWriteQuadWriterKind::NTriples(writer) => { + writer.write_triple(to_triple(quad)?).await + } + ToTokioAsyncWriteQuadWriterKind::RdfXml(writer) => { + writer.write_triple(to_triple(quad)?).await + } + ToTokioAsyncWriteQuadWriterKind::TriG(writer) => writer.write_quad(quad).await, + ToTokioAsyncWriteQuadWriterKind::Turtle(writer) => { + writer.write_triple(to_triple(quad)?).await + } + } + } + + /// Writes a [`TripleRef`] + pub async fn write_triple<'a>(&mut self, triple: impl Into>) -> io::Result<()> { + self.write_quad(triple.into().in_graph(GraphNameRef::DefaultGraph)) + .await + } + + /// Writes the last bytes of the file + pub async fn finish(self) -> io::Result<()> { + match self.formatter { + ToTokioAsyncWriteQuadWriterKind::NQuads(writer) => writer.finish(), + ToTokioAsyncWriteQuadWriterKind::NTriples(writer) => writer.finish(), + ToTokioAsyncWriteQuadWriterKind::RdfXml(writer) => writer.finish().await?, + ToTokioAsyncWriteQuadWriterKind::TriG(writer) => writer.finish().await?, + ToTokioAsyncWriteQuadWriterKind::Turtle(writer) => writer.finish().await?, + } + .flush() + .await + } +} + +fn to_triple<'a>(quad: impl Into>) -> io::Result> { + let quad = quad.into(); + if quad.graph_name.is_default_graph() { + Ok(quad.into()) + } else { + Err(io::Error::new( + io::ErrorKind::InvalidInput, + "Only quads in the default graph can be serialized to a RDF graph format", + )) + } +} diff --git a/lib/src/io/error.rs b/lib/src/io/error.rs index bea22d8e..7cbdc8ac 100644 --- a/lib/src/io/error.rs +++ b/lib/src/io/error.rs @@ -43,40 +43,21 @@ impl Error for ParseError { } } -impl From for SyntaxError { +impl From for SyntaxError { #[inline] - fn from(error: oxttl::SyntaxError) -> Self { + fn from(error: oxrdfio::SyntaxError) -> Self { SyntaxError { - inner: SyntaxErrorKind::Turtle(error), + inner: SyntaxErrorKind::IO(error), } } } -impl From for ParseError { +impl From for ParseError { #[inline] - fn from(error: oxttl::ParseError) -> Self { + fn from(error: oxrdfio::ParseError) -> Self { match error { - oxttl::ParseError::Syntax(e) => Self::Syntax(e.into()), - oxttl::ParseError::Io(e) => Self::Io(e), - } - } -} - -impl From for SyntaxError { - #[inline] - fn from(error: oxrdfxml::SyntaxError) -> Self { - SyntaxError { - inner: SyntaxErrorKind::RdfXml(error), - } - } -} - -impl From for ParseError { - #[inline] - fn from(error: oxrdfxml::ParseError) -> Self { - match error { - oxrdfxml::ParseError::Syntax(e) => Self::Syntax(e.into()), - oxrdfxml::ParseError::Io(e) => Self::Io(e), + oxrdfio::ParseError::Syntax(e) => Self::Syntax(e.into()), + oxrdfio::ParseError::Io(e) => Self::Io(e), } } } @@ -113,8 +94,7 @@ pub struct SyntaxError { #[derive(Debug)] enum SyntaxErrorKind { - Turtle(oxttl::SyntaxError), - RdfXml(oxrdfxml::SyntaxError), + IO(oxrdfio::SyntaxError), InvalidBaseIri { iri: String, error: IriParseError }, } @@ -122,8 +102,7 @@ impl fmt::Display for SyntaxError { #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match &self.inner { - SyntaxErrorKind::Turtle(e) => e.fmt(f), - SyntaxErrorKind::RdfXml(e) => e.fmt(f), + SyntaxErrorKind::IO(e) => e.fmt(f), SyntaxErrorKind::InvalidBaseIri { iri, error } => { write!(f, "Invalid base IRI '{iri}': {error}") } @@ -135,8 +114,7 @@ impl Error for SyntaxError { #[inline] fn source(&self) -> Option<&(dyn Error + 'static)> { match &self.inner { - SyntaxErrorKind::Turtle(e) => Some(e), - SyntaxErrorKind::RdfXml(e) => Some(e), + SyntaxErrorKind::IO(e) => Some(e), SyntaxErrorKind::InvalidBaseIri { .. } => None, } } @@ -146,8 +124,7 @@ impl From for io::Error { #[inline] fn from(error: SyntaxError) -> Self { match error.inner { - SyntaxErrorKind::Turtle(error) => error.into(), - SyntaxErrorKind::RdfXml(error) => error.into(), + SyntaxErrorKind::IO(error) => error.into(), SyntaxErrorKind::InvalidBaseIri { iri, error } => Self::new( io::ErrorKind::InvalidInput, format!("Invalid IRI '{iri}': {error}"), diff --git a/lib/src/io/format.rs b/lib/src/io/format.rs index 7c95ddcb..01e112ac 100644 --- a/lib/src/io/format.rs +++ b/lib/src/io/format.rs @@ -1,3 +1,5 @@ +use oxrdfio::RdfFormat; + /// [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) serialization formats. /// /// This enumeration is non exhaustive. New formats like JSON-LD will be added in the future. @@ -102,6 +104,16 @@ impl GraphFormat { } } +impl From for RdfFormat { + fn from(format: GraphFormat) -> Self { + match format { + GraphFormat::NTriples => Self::NTriples, + GraphFormat::Turtle => Self::Turtle, + GraphFormat::RdfXml => Self::RdfXml, + } + } +} + /// [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) serialization formats. /// /// This enumeration is non exhaustive. New formats like JSON-LD will be added in the future. @@ -198,6 +210,15 @@ impl DatasetFormat { } } +impl From for RdfFormat { + fn from(format: DatasetFormat) -> Self { + match format { + DatasetFormat::NQuads => Self::NQuads, + DatasetFormat::TriG => Self::TriG, + } + } +} + impl TryFrom for GraphFormat { type Error = (); diff --git a/lib/src/io/mod.rs b/lib/src/io/mod.rs index 2e5269de..a8185918 100644 --- a/lib/src/io/mod.rs +++ b/lib/src/io/mod.rs @@ -8,3 +8,7 @@ pub mod write; pub use self::format::{DatasetFormat, GraphFormat}; pub use self::read::{DatasetParser, GraphParser}; pub use self::write::{DatasetSerializer, GraphSerializer}; +pub use oxrdfio::{ + FromReadQuadReader, ParseError, RdfFormat, RdfParser, RdfSerializer, SyntaxError, + ToWriteQuadWriter, +}; diff --git a/lib/src/io/read.rs b/lib/src/io/read.rs index eddaabed..fe414aa1 100644 --- a/lib/src/io/read.rs +++ b/lib/src/io/read.rs @@ -4,12 +4,7 @@ pub use crate::io::error::{ParseError, SyntaxError}; use crate::io::{DatasetFormat, GraphFormat}; use crate::model::*; use oxiri::IriParseError; -use oxrdfxml::{FromReadRdfXmlReader, RdfXmlParser}; -use oxttl::nquads::{FromReadNQuadsReader, NQuadsParser}; -use oxttl::ntriples::{FromReadNTriplesReader, NTriplesParser}; -use oxttl::trig::{FromReadTriGReader, TriGParser}; -use oxttl::turtle::{FromReadTurtleReader, TurtleParser}; -use std::collections::HashMap; +use oxrdfio::{FromReadQuadReader, RdfParser}; use std::io::Read; /// Parsers for RDF graph serialization formats. @@ -27,18 +22,12 @@ use std::io::Read; /// let parser = GraphParser::from_format(GraphFormat::NTriples); /// let triples = parser.read_triples(file.as_bytes()).collect::,_>>()?; /// -///assert_eq!(triples.len(), 1); -///assert_eq!(triples[0].subject.to_string(), ""); +/// assert_eq!(triples.len(), 1); +/// assert_eq!(triples[0].subject.to_string(), ""); /// # std::io::Result::Ok(()) /// ``` pub struct GraphParser { - inner: GraphParserKind, -} - -enum GraphParserKind { - NTriples(NTriplesParser), - Turtle(TurtleParser), - RdfXml(RdfXmlParser), + inner: RdfParser, } impl GraphParser { @@ -46,15 +35,9 @@ impl GraphParser { #[inline] pub fn from_format(format: GraphFormat) -> Self { Self { - inner: match format { - GraphFormat::NTriples => { - GraphParserKind::NTriples(NTriplesParser::new().with_quoted_triples()) - } - GraphFormat::Turtle => { - GraphParserKind::Turtle(TurtleParser::new().with_quoted_triples()) - } - GraphFormat::RdfXml => GraphParserKind::RdfXml(RdfXmlParser::new()), - }, + inner: RdfParser::from_format(format.into()) + .without_named_graphs() + .rename_blank_nodes(), } } @@ -68,30 +51,21 @@ impl GraphParser { /// let parser = GraphParser::from_format(GraphFormat::Turtle).with_base_iri("http://example.com")?; /// let triples = parser.read_triples(file.as_bytes()).collect::,_>>()?; /// - ///assert_eq!(triples.len(), 1); - ///assert_eq!(triples[0].subject.to_string(), ""); + /// assert_eq!(triples.len(), 1); + /// assert_eq!(triples[0].subject.to_string(), ""); /// # Result::<_,Box>::Ok(()) /// ``` #[inline] pub fn with_base_iri(self, base_iri: impl Into) -> Result { Ok(Self { - inner: match self.inner { - GraphParserKind::NTriples(p) => GraphParserKind::NTriples(p), - GraphParserKind::Turtle(p) => GraphParserKind::Turtle(p.with_base_iri(base_iri)?), - GraphParserKind::RdfXml(p) => GraphParserKind::RdfXml(p.with_base_iri(base_iri)?), - }, + inner: self.inner.with_base_iri(base_iri)?, }) } /// Executes the parsing itself on a [`Read`] implementation and returns an iterator of triples. pub fn read_triples(&self, reader: R) -> TripleReader { TripleReader { - mapper: BlankNodeMapper::default(), - parser: match &self.inner { - GraphParserKind::NTriples(p) => TripleReaderKind::NTriples(p.parse_read(reader)), - GraphParserKind::Turtle(p) => TripleReaderKind::Turtle(p.parse_read(reader)), - GraphParserKind::RdfXml(p) => TripleReaderKind::RdfXml(p.parse_read(reader)), - }, + parser: self.inner.parse_read(reader), } } } @@ -107,41 +81,20 @@ impl GraphParser { /// let parser = GraphParser::from_format(GraphFormat::NTriples); /// let triples = parser.read_triples(file.as_bytes()).collect::,_>>()?; /// -///assert_eq!(triples.len(), 1); -///assert_eq!(triples[0].subject.to_string(), ""); +/// assert_eq!(triples.len(), 1); +/// assert_eq!(triples[0].subject.to_string(), ""); /// # std::io::Result::Ok(()) /// ``` #[must_use] pub struct TripleReader { - mapper: BlankNodeMapper, - parser: TripleReaderKind, -} - -#[allow(clippy::large_enum_variant)] -enum TripleReaderKind { - NTriples(FromReadNTriplesReader), - Turtle(FromReadTurtleReader), - RdfXml(FromReadRdfXmlReader), + parser: FromReadQuadReader, } impl Iterator for TripleReader { type Item = Result; fn next(&mut self) -> Option> { - Some(match &mut self.parser { - TripleReaderKind::NTriples(parser) => match parser.next()? { - Ok(triple) => Ok(self.mapper.triple(triple)), - Err(e) => Err(e.into()), - }, - TripleReaderKind::Turtle(parser) => match parser.next()? { - Ok(triple) => Ok(self.mapper.triple(triple)), - Err(e) => Err(e.into()), - }, - TripleReaderKind::RdfXml(parser) => match parser.next()? { - Ok(triple) => Ok(self.mapper.triple(triple)), - Err(e) => Err(e.into()), - }, - }) + Some(self.parser.next()?.map(Into::into).map_err(Into::into)) } } @@ -159,17 +112,12 @@ impl Iterator for TripleReader { /// let parser = DatasetParser::from_format(DatasetFormat::NQuads); /// let quads = parser.read_quads(file.as_bytes()).collect::,_>>()?; /// -///assert_eq!(quads.len(), 1); -///assert_eq!(quads[0].subject.to_string(), ""); +/// assert_eq!(quads.len(), 1); +/// assert_eq!(quads[0].subject.to_string(), ""); /// # std::io::Result::Ok(()) /// ``` pub struct DatasetParser { - inner: DatasetParserKind, -} - -enum DatasetParserKind { - NQuads(NQuadsParser), - TriG(TriGParser), + inner: RdfParser, } impl DatasetParser { @@ -177,14 +125,7 @@ impl DatasetParser { #[inline] pub fn from_format(format: DatasetFormat) -> Self { Self { - inner: match format { - DatasetFormat::NQuads => { - DatasetParserKind::NQuads(NQuadsParser::new().with_quoted_triples()) - } - DatasetFormat::TriG => { - DatasetParserKind::TriG(TriGParser::new().with_quoted_triples()) - } - }, + inner: RdfParser::from_format(format.into()).rename_blank_nodes(), } } @@ -198,28 +139,21 @@ impl DatasetParser { /// let parser = DatasetParser::from_format(DatasetFormat::TriG).with_base_iri("http://example.com")?; /// let triples = parser.read_quads(file.as_bytes()).collect::,_>>()?; /// - ///assert_eq!(triples.len(), 1); - ///assert_eq!(triples[0].subject.to_string(), ""); + /// assert_eq!(triples.len(), 1); + /// assert_eq!(triples[0].subject.to_string(), ""); /// # Result::<_,Box>::Ok(()) /// ``` #[inline] pub fn with_base_iri(self, base_iri: impl Into) -> Result { Ok(Self { - inner: match self.inner { - DatasetParserKind::NQuads(p) => DatasetParserKind::NQuads(p), - DatasetParserKind::TriG(p) => DatasetParserKind::TriG(p.with_base_iri(base_iri)?), - }, + inner: self.inner.with_base_iri(base_iri)?, }) } /// Executes the parsing itself on a [`Read`] implementation and returns an iterator of quads. pub fn read_quads(&self, reader: R) -> QuadReader { QuadReader { - mapper: BlankNodeMapper::default(), - parser: match &self.inner { - DatasetParserKind::NQuads(p) => QuadReaderKind::NQuads(p.parse_read(reader)), - DatasetParserKind::TriG(p) => QuadReaderKind::TriG(p.parse_read(reader)), - }, + parser: self.inner.parse_read(reader), } } } @@ -235,90 +169,19 @@ impl DatasetParser { /// let parser = DatasetParser::from_format(DatasetFormat::NQuads); /// let quads = parser.read_quads(file.as_bytes()).collect::,_>>()?; /// -///assert_eq!(quads.len(), 1); -///assert_eq!(quads[0].subject.to_string(), ""); +/// assert_eq!(quads.len(), 1); +/// assert_eq!(quads[0].subject.to_string(), ""); /// # std::io::Result::Ok(()) /// ``` #[must_use] pub struct QuadReader { - mapper: BlankNodeMapper, - parser: QuadReaderKind, -} - -enum QuadReaderKind { - NQuads(FromReadNQuadsReader), - TriG(FromReadTriGReader), + parser: FromReadQuadReader, } impl Iterator for QuadReader { type Item = Result; fn next(&mut self) -> Option> { - Some(match &mut self.parser { - QuadReaderKind::NQuads(parser) => match parser.next()? { - Ok(quad) => Ok(self.mapper.quad(quad)), - Err(e) => Err(e.into()), - }, - QuadReaderKind::TriG(parser) => match parser.next()? { - Ok(quad) => Ok(self.mapper.quad(quad)), - Err(e) => Err(e.into()), - }, - }) - } -} - -#[derive(Default)] -struct BlankNodeMapper { - bnode_map: HashMap, -} - -impl BlankNodeMapper { - fn blank_node(&mut self, node: BlankNode) -> BlankNode { - self.bnode_map - .entry(node) - .or_insert_with(BlankNode::default) - .clone() - } - - fn subject(&mut self, node: Subject) -> Subject { - match node { - Subject::NamedNode(node) => node.into(), - Subject::BlankNode(node) => self.blank_node(node).into(), - Subject::Triple(triple) => self.triple(*triple).into(), - } - } - - fn term(&mut self, node: Term) -> Term { - match node { - Term::NamedNode(node) => node.into(), - Term::BlankNode(node) => self.blank_node(node).into(), - Term::Literal(literal) => literal.into(), - Term::Triple(triple) => self.triple(*triple).into(), - } - } - - fn triple(&mut self, triple: Triple) -> Triple { - Triple { - subject: self.subject(triple.subject), - predicate: triple.predicate, - object: self.term(triple.object), - } - } - - fn graph_name(&mut self, graph_name: GraphName) -> GraphName { - match graph_name { - GraphName::NamedNode(node) => node.into(), - GraphName::BlankNode(node) => self.blank_node(node).into(), - GraphName::DefaultGraph => GraphName::DefaultGraph, - } - } - - fn quad(&mut self, quad: Quad) -> Quad { - Quad { - subject: self.subject(quad.subject), - predicate: quad.predicate, - object: self.term(quad.object), - graph_name: self.graph_name(quad.graph_name), - } + Some(self.parser.next()?.map_err(Into::into)) } } diff --git a/lib/src/io/write.rs b/lib/src/io/write.rs index 1661343f..b9373afc 100644 --- a/lib/src/io/write.rs +++ b/lib/src/io/write.rs @@ -2,11 +2,7 @@ use crate::io::{DatasetFormat, GraphFormat}; use crate::model::*; -use oxrdfxml::{RdfXmlSerializer, ToWriteRdfXmlWriter}; -use oxttl::nquads::{NQuadsSerializer, ToWriteNQuadsWriter}; -use oxttl::ntriples::{NTriplesSerializer, ToWriteNTriplesWriter}; -use oxttl::trig::{ToWriteTriGWriter, TriGSerializer}; -use oxttl::turtle::{ToWriteTurtleWriter, TurtleSerializer}; +use oxrdfio::{RdfSerializer, ToWriteQuadWriter}; use std::io::{self, Write}; /// A serializer for RDF graph serialization formats. @@ -29,34 +25,26 @@ use std::io::{self, Write}; /// })?; /// writer.finish()?; /// -///assert_eq!(buffer.as_slice(), " .\n".as_bytes()); +/// assert_eq!(buffer.as_slice(), " .\n".as_bytes()); /// # Result::<_,Box>::Ok(()) /// ``` pub struct GraphSerializer { - format: GraphFormat, + inner: RdfSerializer, } impl GraphSerializer { /// Builds a serializer for the given format #[inline] pub fn from_format(format: GraphFormat) -> Self { - Self { format } + Self { + inner: RdfSerializer::from_format(format.into()), + } } /// Returns a [`TripleWriter`] allowing writing triples into the given [`Write`] implementation pub fn triple_writer(&self, writer: W) -> TripleWriter { TripleWriter { - formatter: match self.format { - GraphFormat::NTriples => { - TripleWriterKind::NTriples(NTriplesSerializer::new().serialize_to_write(writer)) - } - GraphFormat::Turtle => { - TripleWriterKind::Turtle(TurtleSerializer::new().serialize_to_write(writer)) - } - GraphFormat::RdfXml => { - TripleWriterKind::RdfXml(RdfXmlSerializer::new().serialize_to_write(writer)) - } - }, + writer: self.inner.serialize_to_write(writer), } } } @@ -79,37 +67,23 @@ impl GraphSerializer { /// })?; /// writer.finish()?; /// -///assert_eq!(buffer.as_slice(), " .\n".as_bytes()); +/// assert_eq!(buffer.as_slice(), " .\n".as_bytes()); /// # Result::<_,Box>::Ok(()) /// ``` #[must_use] pub struct TripleWriter { - formatter: TripleWriterKind, -} - -enum TripleWriterKind { - NTriples(ToWriteNTriplesWriter), - Turtle(ToWriteTurtleWriter), - RdfXml(ToWriteRdfXmlWriter), + writer: ToWriteQuadWriter, } impl TripleWriter { /// Writes a triple pub fn write<'a>(&mut self, triple: impl Into>) -> io::Result<()> { - match &mut self.formatter { - TripleWriterKind::NTriples(writer) => writer.write_triple(triple), - TripleWriterKind::Turtle(writer) => writer.write_triple(triple), - TripleWriterKind::RdfXml(writer) => writer.write_triple(triple), - } + self.writer.write_triple(triple) } /// Writes the last bytes of the file pub fn finish(self) -> io::Result<()> { - match self.formatter { - TripleWriterKind::NTriples(writer) => writer.finish().flush(), - TripleWriterKind::Turtle(writer) => writer.finish()?.flush(), - TripleWriterKind::RdfXml(formatter) => formatter.finish()?.flush(), - } + self.writer.finish() } } @@ -133,31 +107,26 @@ impl TripleWriter { /// })?; /// writer.finish()?; /// -///assert_eq!(buffer.as_slice(), " .\n".as_bytes()); +/// assert_eq!(buffer.as_slice(), " .\n".as_bytes()); /// # Result::<_,Box>::Ok(()) /// ``` pub struct DatasetSerializer { - format: DatasetFormat, + inner: RdfSerializer, } impl DatasetSerializer { /// Builds a serializer for the given format #[inline] pub fn from_format(format: DatasetFormat) -> Self { - Self { format } + Self { + inner: RdfSerializer::from_format(format.into()), + } } /// Returns a [`QuadWriter`] allowing writing triples into the given [`Write`] implementation pub fn quad_writer(&self, writer: W) -> QuadWriter { QuadWriter { - formatter: match self.format { - DatasetFormat::NQuads => { - QuadWriterKind::NQuads(NQuadsSerializer::new().serialize_to_write(writer)) - } - DatasetFormat::TriG => { - QuadWriterKind::TriG(TriGSerializer::new().serialize_to_write(writer)) - } - }, + writer: self.inner.serialize_to_write(writer), } } } @@ -181,33 +150,22 @@ impl DatasetSerializer { /// })?; /// writer.finish()?; /// -///assert_eq!(buffer.as_slice(), " .\n".as_bytes()); +/// assert_eq!(buffer.as_slice(), " .\n".as_bytes()); /// # Result::<_,Box>::Ok(()) /// ``` #[must_use] pub struct QuadWriter { - formatter: QuadWriterKind, -} - -enum QuadWriterKind { - NQuads(ToWriteNQuadsWriter), - TriG(ToWriteTriGWriter), + writer: ToWriteQuadWriter, } impl QuadWriter { /// Writes a quad pub fn write<'a>(&mut self, quad: impl Into>) -> io::Result<()> { - match &mut self.formatter { - QuadWriterKind::NQuads(writer) => writer.write_quad(quad), - QuadWriterKind::TriG(writer) => writer.write_quad(quad), - } + self.writer.write_quad(quad) } /// Writes the last bytes of the file pub fn finish(self) -> io::Result<()> { - match self.formatter { - QuadWriterKind::NQuads(writer) => writer.finish().flush(), - QuadWriterKind::TriG(writer) => writer.finish()?.flush(), - } + self.writer.finish() } }