From ef765666bef2f7b19071056d2fcc78414d2acd95 Mon Sep 17 00:00:00 2001 From: Tpt Date: Fri, 19 Jan 2024 22:14:24 +0100 Subject: [PATCH] Serialization: allows to set prefixes --- .github/workflows/tests.yml | 4 + Cargo.lock | 10 +- cli/src/main.rs | 19 +- fuzz/fuzz_targets/trig.rs | 43 +++-- lib/Cargo.toml | 8 +- lib/oxrdf/Cargo.toml | 2 +- lib/oxrdfio/Cargo.toml | 4 +- lib/oxrdfio/src/parser.rs | 26 +-- lib/oxrdfio/src/serializer.rs | 164 ++++++++++------ lib/oxrdfxml/Cargo.toml | 4 +- lib/oxrdfxml/src/serializer.rs | 209 ++++++++++++++------ lib/oxttl/Cargo.toml | 2 +- lib/oxttl/src/lexer.rs | 8 +- lib/oxttl/src/line_formats.rs | 4 +- lib/oxttl/src/n3.rs | 4 +- lib/oxttl/src/nquads.rs | 2 +- lib/oxttl/src/ntriples.rs | 2 +- lib/oxttl/src/terse.rs | 4 +- lib/oxttl/src/trig.rs | 341 ++++++++++++++++++++++++++------- lib/oxttl/src/turtle.rs | 108 +++++++---- lib/sparesults/Cargo.toml | 4 +- lib/sparesults/src/xml.rs | 7 +- lib/spargebra/Cargo.toml | 4 +- lib/sparopt/Cargo.toml | 6 +- 24 files changed, 695 insertions(+), 294 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 689bb227..76074704 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -42,8 +42,12 @@ jobs: working-directory: ./lib/oxttl - run: cargo clippy --all-targets -- -D warnings -D clippy::all working-directory: ./lib/oxrdfio + - run: cargo clippy --all-targets --features async-tokio -- -D warnings -D clippy::all + working-directory: ./lib/oxrdfio - run: cargo clippy --all-targets -- -D warnings -D clippy::all working-directory: ./lib/sparesults + - run: cargo clippy --all-targets --features async-tokio -- -D warnings -D clippy::all + working-directory: ./lib/sparesults - run: cargo clippy --all-targets -- -D warnings -D clippy::all working-directory: ./lib/spargebra - run: cargo clippy --all-targets -- -D warnings -D clippy::all diff --git a/Cargo.lock b/Cargo.lock index c9fb9e90..70b1abfb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1104,7 +1104,7 @@ checksum = "b225dad32cfaa43a960b93f01fa7f87528ac07e794b80f6d9a0153e0222557e2" [[package]] name = "oxrdf" -version = "0.2.0-alpha.1" +version = "0.2.0-alpha.2-dev" dependencies = [ "oxilangtag", "oxiri", @@ -1124,7 +1124,7 @@ dependencies = [ [[package]] name = "oxrdfxml" -version = "0.1.0-alpha.1" +version = "0.1.0-alpha.2-dev" dependencies = [ "oxilangtag", "oxiri", @@ -1719,7 +1719,7 @@ checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" [[package]] name = "sparesults" -version = "0.2.0-alpha.1" +version = "0.2.0-alpha.2-dev" dependencies = [ "json-event-parser", "memchr", @@ -1730,7 +1730,7 @@ dependencies = [ [[package]] name = "spargebra" -version = "0.3.0-alpha.1" +version = "0.3.0-alpha.2-dev" dependencies = [ "oxilangtag", "oxiri", @@ -1741,7 +1741,7 @@ dependencies = [ [[package]] name = "sparopt" -version = "0.1.0-alpha.1" +version = "0.1.0-alpha.2-dev" dependencies = [ "oxrdf", "rand", diff --git a/cli/src/main.rs b/cli/src/main.rs index cabf08c4..cff5cf79 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -714,7 +714,7 @@ pub fn main() -> anyhow::Result<()> { let to_format = if let Some(format) = to_format { rdf_format_from_name(&format)? - } else if let Some(file) = &from_file { + } else if let Some(file) = &to_file { rdf_format_from_path(file)? } else { bail!("The --to-format option must be set when writing to stdout") @@ -826,14 +826,21 @@ fn dump( fn do_convert( parser: RdfParser, read: R, - serializer: RdfSerializer, + mut serializer: RdfSerializer, write: W, lenient: bool, from_graph: &Option, default_graph: &GraphName, ) -> anyhow::Result { + let mut parser = parser.parse_read(read); + let first = parser.next(); // We read the first element to get prefixes + for (prefix_name, prefix_iri) in parser.prefixes() { + serializer = serializer + .with_prefix(prefix_name, prefix_iri) + .with_context(|| format!("Invalid IRI for prefix {prefix_name}: {prefix_iri}"))?; + } let mut writer = serializer.serialize_to_write(write); - for quad_result in parser.parse_read(read) { + for quad_result in first.into_iter().chain(parser) { match quad_result { Ok(mut quad) => { if let Some(from_graph) = from_graph { @@ -2239,8 +2246,8 @@ mod tests { #[test] fn cli_convert_file() -> Result<()> { let input_file = NamedTempFile::new("input.ttl")?; - input_file.write_str("

.")?; - let output_file = NamedTempFile::new("output.nt")?; + input_file.write_str("@prefix schema: .\n a schema:Person ;\n\tschema:name \"Foo Bar\"@en .\n")?; + let output_file = NamedTempFile::new("output.rdf")?; cli_command()? .arg("convert") .arg("--from-file") @@ -2252,7 +2259,7 @@ mod tests { .assert() .success(); output_file - .assert(" .\n"); + .assert("\n\n\t\n\t\tFoo Bar\n\t\n"); Ok(()) } diff --git a/fuzz/fuzz_targets/trig.rs b/fuzz/fuzz_targets/trig.rs index 1ce03d1b..64253e3c 100644 --- a/fuzz/fuzz_targets/trig.rs +++ b/fuzz/fuzz_targets/trig.rs @@ -7,7 +7,7 @@ use oxttl::{TriGParser, TriGSerializer}; fn parse<'a>( chunks: impl IntoIterator, unchecked: bool, -) -> (Vec, Vec) { +) -> (Vec, Vec, Vec<(String, String)>) { let mut quads = Vec::new(); let mut errors = Vec::new(); let mut parser = TriGParser::new() @@ -35,7 +35,14 @@ fn parse<'a>( } } assert!(reader.is_end()); - (quads, errors) + ( + quads, + errors, + reader + .prefixes() + .map(|(k, v)| (k.to_owned(), v.to_owned())) + .collect(), + ) } fn count_triple_blank_nodes(triple: &Triple) -> usize { @@ -62,8 +69,12 @@ fn count_quad_blank_nodes(quad: &Quad) -> usize { }) + usize::from(matches!(quad.graph_name, GraphName::BlankNode(_))) } -fn serialize_quads(quads: &[Quad]) -> Vec { - let mut writer = TriGSerializer::new().serialize_to_write(Vec::new()); +fn serialize_quads(quads: &[Quad], prefixes: Vec<(String, String)>) -> Vec { + let mut serializer = TriGSerializer::new(); + for (prefix_name, prefix_iri) in prefixes { + serializer = serializer.with_prefix(prefix_name, prefix_iri).unwrap(); + } + let mut writer = serializer.serialize_to_write(Vec::new()); for quad in quads { writer.write_quad(quad).unwrap(); } @@ -72,9 +83,9 @@ fn serialize_quads(quads: &[Quad]) -> Vec { fuzz_target!(|data: &[u8]| { // We parse with splitting - let (quads, errors) = parse(data.split(|c| *c == 0xFF), false); + let (quads, errors, prefixes) = parse(data.split(|c| *c == 0xFF), false); // We parse without splitting - let (quads_without_split, errors_without_split) = parse( + let (quads_without_split, errors_without_split, _) = parse( [data .iter() .copied() @@ -83,7 +94,7 @@ fuzz_target!(|data: &[u8]| { .as_slice()], false, ); - let (quads_unchecked, errors_unchecked) = parse(data.split(|c| *c == 0xFF), true); + let (quads_unchecked, errors_unchecked, _) = parse(data.split(|c| *c == 0xFF), true); if errors.is_empty() { assert!(errors_unchecked.is_empty()); } @@ -94,16 +105,16 @@ fuzz_target!(|data: &[u8]| { quads, quads_without_split, "With split:\n{}\nWithout split:\n{}", - String::from_utf8_lossy(&serialize_quads(&quads)), - String::from_utf8_lossy(&serialize_quads(&quads_without_split)) + String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())), + String::from_utf8_lossy(&serialize_quads(&quads_without_split, Vec::new())) ); if errors.is_empty() { assert_eq!( quads, quads_unchecked, "Validating:\n{}\nUnchecked:\n{}", - String::from_utf8_lossy(&serialize_quads(&quads)), - String::from_utf8_lossy(&serialize_quads(&quads_unchecked)) + String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())), + String::from_utf8_lossy(&serialize_quads(&quads_unchecked, Vec::new())) ); } } else if bnodes_count <= 4 { @@ -115,8 +126,8 @@ fuzz_target!(|data: &[u8]| { dataset_with_split, dataset_without_split, "With split:\n{}\nWithout split:\n{}", - String::from_utf8_lossy(&serialize_quads(&quads)), - String::from_utf8_lossy(&serialize_quads(&quads_without_split)) + String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())), + String::from_utf8_lossy(&serialize_quads(&quads_without_split, Vec::new())) ); if errors.is_empty() { if errors.is_empty() { @@ -126,8 +137,8 @@ fuzz_target!(|data: &[u8]| { dataset_with_split, dataset_unchecked, "Validating:\n{}\nUnchecked:\n{}", - String::from_utf8_lossy(&serialize_quads(&quads)), - String::from_utf8_lossy(&serialize_quads(&quads_unchecked)) + String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())), + String::from_utf8_lossy(&serialize_quads(&quads_unchecked, Vec::new())) ); } } @@ -135,7 +146,7 @@ fuzz_target!(|data: &[u8]| { assert_eq!(errors, errors_without_split); // We serialize - let new_serialization = serialize_quads(&quads); + let new_serialization = serialize_quads(&quads, prefixes); // We parse the serialization let new_quads = TriGParser::new() diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 8465ce8f..44005104 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -32,7 +32,7 @@ json-event-parser = "0.2.0-alpha.2" md-5 = "0.10" oxilangtag = "0.1" oxiri = "0.2.3-alpha.1" -oxrdf = { version = "0.2.0-alpha.1", path = "oxrdf", features = ["rdf-star", "oxsdatatypes"] } +oxrdf = { version = "0.2.0-alpha.2-dev", path = "oxrdf", features = ["rdf-star", "oxsdatatypes"] } oxrdfio = { version = "0.1.0-alpha.2-dev", path = "oxrdfio", features = ["rdf-star"] } oxsdatatypes = { version = "0.2.0-alpha.1", path = "oxsdatatypes" } rand = "0.8" @@ -40,9 +40,9 @@ regex = "1.7" sha1 = "0.10" sha2 = "0.10" siphasher = ">=0.3, <2.0" -sparesults = { version = "0.2.0-alpha.1", path = "sparesults", features = ["rdf-star"] } -spargebra = { version = "0.3.0-alpha.1", path = "spargebra", features = ["rdf-star", "sep-0002", "sep-0006"] } -sparopt = { version = "0.1.0-alpha.1", path = "sparopt", features = ["rdf-star", "sep-0002", "sep-0006"] } +sparesults = { version = "0.2.0-alpha.2-dev", path = "sparesults", features = ["rdf-star"] } +spargebra = { version = "0.3.0-alpha.2-dev", path = "spargebra", features = ["rdf-star", "sep-0002", "sep-0006"] } +sparopt = { version = "0.1.0-alpha.2-dev", path = "sparopt", features = ["rdf-star", "sep-0002", "sep-0006"] } [target.'cfg(not(target_family = "wasm"))'.dependencies] libc = "0.2.147" diff --git a/lib/oxrdf/Cargo.toml b/lib/oxrdf/Cargo.toml index fdf94a51..c52b13ca 100644 --- a/lib/oxrdf/Cargo.toml +++ b/lib/oxrdf/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "oxrdf" -version = "0.2.0-alpha.1" +version = "0.2.0-alpha.2-dev" authors = ["Tpt "] license = "MIT OR Apache-2.0" readme = "README.md" diff --git a/lib/oxrdfio/Cargo.toml b/lib/oxrdfio/Cargo.toml index c2930046..027c5eba 100644 --- a/lib/oxrdfio/Cargo.toml +++ b/lib/oxrdfio/Cargo.toml @@ -19,8 +19,8 @@ async-tokio = ["dep:tokio", "oxrdfxml/async-tokio", "oxttl/async-tokio"] rdf-star = ["oxrdf/rdf-star", "oxttl/rdf-star"] [dependencies] -oxrdf = { version = "0.2.0-alpha.1", path = "../oxrdf" } -oxrdfxml = { version = "0.1.0-alpha.1", path = "../oxrdfxml" } +oxrdf = { version = "0.2.0-alpha.2-dev", path = "../oxrdf" } +oxrdfxml = { version = "0.1.0-alpha.2-dev", path = "../oxrdfxml" } oxttl = { version = "0.1.0-alpha.2-dev", path = "../oxttl" } tokio = { version = "1.29", optional = true, features = ["io-util"] } diff --git a/lib/oxrdfio/src/parser.rs b/lib/oxrdfio/src/parser.rs index 766be9ce..d1536141 100644 --- a/lib/oxrdfio/src/parser.rs +++ b/lib/oxrdfio/src/parser.rs @@ -598,12 +598,14 @@ impl FromTokioAsyncReadQuadReader { pub fn prefixes(&self) -> PrefixesIter<'_> { PrefixesIter { inner: match &self.parser { - FromReadQuadReaderKind::N3(p) => PrefixesIterKind::N3(p.prefixes()), - FromReadQuadReaderKind::TriG(p) => PrefixesIterKind::TriG(p.prefixes()), - FromReadQuadReaderKind::Turtle(p) => PrefixesIterKind::Turtle(p.prefixes()), - FromReadQuadReaderKind::NQuads(_) - | FromReadQuadReaderKind::NTriples(_) - | FromReadQuadReaderKind::RdfXml(_) => PrefixesIterKind::None, /* TODO: implement for RDF/XML */ + FromTokioAsyncReadQuadReaderKind::N3(p) => PrefixesIterKind::N3(p.prefixes()), + FromTokioAsyncReadQuadReaderKind::TriG(p) => PrefixesIterKind::TriG(p.prefixes()), + FromTokioAsyncReadQuadReaderKind::Turtle(p) => { + PrefixesIterKind::Turtle(p.prefixes()) + } + FromTokioAsyncReadQuadReaderKind::NQuads(_) + | FromTokioAsyncReadQuadReaderKind::NTriples(_) + | FromTokioAsyncReadQuadReaderKind::RdfXml(_) => PrefixesIterKind::None, /* TODO: implement for RDF/XML */ }, } } @@ -633,12 +635,12 @@ impl FromTokioAsyncReadQuadReader { /// ``` pub fn base_iri(&self) -> Option<&str> { match &self.parser { - FromReadQuadReaderKind::N3(p) => p.base_iri(), - FromReadQuadReaderKind::TriG(p) => p.base_iri(), - FromReadQuadReaderKind::Turtle(p) => p.base_iri(), - FromReadQuadReaderKind::NQuads(_) - | FromReadQuadReaderKind::NTriples(_) - | FromReadQuadReaderKind::RdfXml(_) => None, // TODO: implement for RDF/XML + FromTokioAsyncReadQuadReaderKind::N3(p) => p.base_iri(), + FromTokioAsyncReadQuadReaderKind::TriG(p) => p.base_iri(), + FromTokioAsyncReadQuadReaderKind::Turtle(p) => p.base_iri(), + FromTokioAsyncReadQuadReaderKind::NQuads(_) + | FromTokioAsyncReadQuadReaderKind::NTriples(_) + | FromTokioAsyncReadQuadReaderKind::RdfXml(_) => None, // TODO: implement for RDF/XML } } } diff --git a/lib/oxrdfio/src/serializer.rs b/lib/oxrdfio/src/serializer.rs index 7abf7696..3b32dc32 100644 --- a/lib/oxrdfio/src/serializer.rs +++ b/lib/oxrdfio/src/serializer.rs @@ -1,7 +1,7 @@ //! Utilities to write RDF graphs and datasets. use crate::format::RdfFormat; -use oxrdf::{GraphNameRef, QuadRef, TripleRef}; +use oxrdf::{GraphNameRef, IriParseError, QuadRef, TripleRef}; #[cfg(feature = "async-tokio")] use oxrdfxml::ToTokioAsyncWriteRdfXmlWriter; use oxrdfxml::{RdfXmlSerializer, ToWriteRdfXmlWriter}; @@ -35,29 +35,44 @@ use tokio::io::AsyncWrite; /// use oxrdfio::{RdfFormat, RdfSerializer}; /// use oxrdf::{Quad, NamedNode}; /// -/// let mut buffer = Vec::new(); -/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(&mut buffer); +/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(Vec::new()); /// writer.write_quad(&Quad { /// subject: NamedNode::new("http://example.com/s")?.into(), /// predicate: NamedNode::new("http://example.com/p")?, /// object: NamedNode::new("http://example.com/o")?.into(), /// graph_name: NamedNode::new("http://example.com/g")?.into() /// })?; -/// writer.finish()?; -/// -/// assert_eq!(buffer.as_slice(), " .\n".as_bytes()); +/// assert_eq!(writer.finish()?, b" .\n"); /// # Result::<_,Box>::Ok(()) /// ``` #[must_use] pub struct RdfSerializer { - format: RdfFormat, + inner: RdfSerializerKind, +} + +enum RdfSerializerKind { + NQuads(NQuadsSerializer), + NTriples(NTriplesSerializer), + RdfXml(RdfXmlSerializer), + TriG(TriGSerializer), + Turtle(TurtleSerializer), } impl RdfSerializer { /// Builds a serializer for the given format #[inline] pub fn from_format(format: RdfFormat) -> Self { - Self { format } + Self { + inner: match format { + RdfFormat::NQuads => RdfSerializerKind::NQuads(NQuadsSerializer::new()), + RdfFormat::NTriples => RdfSerializerKind::NTriples(NTriplesSerializer::new()), + RdfFormat::RdfXml => RdfSerializerKind::RdfXml(RdfXmlSerializer::new()), + RdfFormat::TriG => RdfSerializerKind::TriG(TriGSerializer::new()), + RdfFormat::Turtle | RdfFormat::N3 => { + RdfSerializerKind::Turtle(TurtleSerializer::new()) + } + }, + } } /// The format the serializer serializes to. @@ -71,7 +86,56 @@ impl RdfSerializer { /// ); /// ``` pub fn format(&self) -> RdfFormat { - self.format + match &self.inner { + RdfSerializerKind::NQuads(_) => RdfFormat::NQuads, + RdfSerializerKind::NTriples(_) => RdfFormat::NTriples, + RdfSerializerKind::RdfXml(_) => RdfFormat::RdfXml, + RdfSerializerKind::TriG(_) => RdfFormat::TriG, + RdfSerializerKind::Turtle(_) => RdfFormat::Turtle, + } + } + + /// If the format supports it, sets a prefix. + /// + /// ``` + /// use oxrdf::vocab::rdf; + /// use oxrdf::{NamedNodeRef, TripleRef}; + /// use oxrdfio::{RdfFormat, RdfSerializer}; + /// + /// let mut writer = RdfSerializer::from_format(RdfFormat::Turtle) + /// .with_prefix("schema", "http://schema.org/")? + /// .serialize_to_write(Vec::new()); + /// writer.write_triple(TripleRef { + /// subject: NamedNodeRef::new("http://example.com/s")?.into(), + /// predicate: rdf::TYPE.into(), + /// object: NamedNodeRef::new("http://schema.org/Person")?.into(), + /// })?; + /// assert_eq!( + /// writer.finish()?, + /// b"@prefix schema: .\n a schema:Person .\n" + /// ); + /// # Result::<_,Box>::Ok(()) + /// ``` + #[inline] + pub fn with_prefix( + mut self, + prefix_name: impl Into, + prefix_iri: impl Into, + ) -> Result { + self.inner = match self.inner { + RdfSerializerKind::NQuads(s) => RdfSerializerKind::NQuads(s), + RdfSerializerKind::NTriples(s) => RdfSerializerKind::NTriples(s), + RdfSerializerKind::RdfXml(s) => { + RdfSerializerKind::RdfXml(s.with_prefix(prefix_name, prefix_iri)?) + } + RdfSerializerKind::TriG(s) => { + RdfSerializerKind::TriG(s.with_prefix(prefix_name, prefix_iri)?) + } + RdfSerializerKind::Turtle(s) => { + RdfSerializerKind::Turtle(s.with_prefix(prefix_name, prefix_iri)?) + } + }; + Ok(self) } /// Writes to a [`Write`] implementation. @@ -88,36 +152,33 @@ impl RdfSerializer { /// use oxrdfio::{RdfFormat, RdfSerializer}; /// use oxrdf::{Quad, NamedNode}; /// - /// let mut buffer = Vec::new(); - /// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(&mut buffer); + /// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(Vec::new()); /// writer.write_quad(&Quad { /// subject: NamedNode::new("http://example.com/s")?.into(), /// predicate: NamedNode::new("http://example.com/p")?, /// object: NamedNode::new("http://example.com/o")?.into(), /// graph_name: NamedNode::new("http://example.com/g")?.into() /// })?; - /// writer.finish()?; - /// - /// assert_eq!(buffer.as_slice(), " .\n".as_bytes()); + /// assert_eq!(writer.finish()?, b" .\n"); /// # Result::<_,Box>::Ok(()) /// ``` pub fn serialize_to_write(self, write: W) -> ToWriteQuadWriter { ToWriteQuadWriter { - formatter: match self.format { - RdfFormat::NQuads => { - ToWriteQuadWriterKind::NQuads(NQuadsSerializer::new().serialize_to_write(write)) + formatter: match self.inner { + RdfSerializerKind::NQuads(s) => { + ToWriteQuadWriterKind::NQuads(s.serialize_to_write(write)) } - RdfFormat::NTriples => ToWriteQuadWriterKind::NTriples( - NTriplesSerializer::new().serialize_to_write(write), - ), - RdfFormat::RdfXml => { - ToWriteQuadWriterKind::RdfXml(RdfXmlSerializer::new().serialize_to_write(write)) + RdfSerializerKind::NTriples(s) => { + ToWriteQuadWriterKind::NTriples(s.serialize_to_write(write)) } - RdfFormat::TriG => { - ToWriteQuadWriterKind::TriG(TriGSerializer::new().serialize_to_write(write)) + RdfSerializerKind::RdfXml(s) => { + ToWriteQuadWriterKind::RdfXml(s.serialize_to_write(write)) } - RdfFormat::Turtle | RdfFormat::N3 => { - ToWriteQuadWriterKind::Turtle(TurtleSerializer::new().serialize_to_write(write)) + RdfSerializerKind::TriG(s) => { + ToWriteQuadWriterKind::TriG(s.serialize_to_write(write)) + } + RdfSerializerKind::Turtle(s) => { + ToWriteQuadWriterKind::Turtle(s.serialize_to_write(write)) } }, } @@ -139,17 +200,14 @@ impl RdfSerializer { /// /// # #[tokio::main(flavor = "current_thread")] /// # async fn main() -> std::io::Result<()> { - /// let mut buffer = Vec::new(); - /// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_tokio_async_write(&mut buffer); + /// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_tokio_async_write(Vec::new()); /// writer.write_quad(&Quad { /// subject: NamedNode::new_unchecked("http://example.com/s").into(), /// predicate: NamedNode::new_unchecked("http://example.com/p"), /// object: NamedNode::new_unchecked("http://example.com/o").into(), /// graph_name: NamedNode::new_unchecked("http://example.com/g").into() /// }).await?; - /// writer.finish().await?; - /// - /// assert_eq!(buffer.as_slice(), " .\n".as_bytes()); + /// assert_eq!(writer.finish().await?, " .\n"); /// # Ok(()) /// # } /// ``` @@ -159,22 +217,22 @@ impl RdfSerializer { write: W, ) -> ToTokioAsyncWriteQuadWriter { ToTokioAsyncWriteQuadWriter { - formatter: match self.format { - RdfFormat::NQuads => ToTokioAsyncWriteQuadWriterKind::NQuads( - NQuadsSerializer::new().serialize_to_tokio_async_write(write), - ), - RdfFormat::NTriples => ToTokioAsyncWriteQuadWriterKind::NTriples( - NTriplesSerializer::new().serialize_to_tokio_async_write(write), - ), - RdfFormat::RdfXml => ToTokioAsyncWriteQuadWriterKind::RdfXml( - RdfXmlSerializer::new().serialize_to_tokio_async_write(write), - ), - RdfFormat::TriG => ToTokioAsyncWriteQuadWriterKind::TriG( - TriGSerializer::new().serialize_to_tokio_async_write(write), - ), - RdfFormat::Turtle | RdfFormat::N3 => ToTokioAsyncWriteQuadWriterKind::Turtle( - TurtleSerializer::new().serialize_to_tokio_async_write(write), + formatter: match self.inner { + RdfSerializerKind::NQuads(s) => { + ToTokioAsyncWriteQuadWriterKind::NQuads(s.serialize_to_tokio_async_write(write)) + } + RdfSerializerKind::NTriples(s) => ToTokioAsyncWriteQuadWriterKind::NTriples( + s.serialize_to_tokio_async_write(write), ), + RdfSerializerKind::RdfXml(s) => { + ToTokioAsyncWriteQuadWriterKind::RdfXml(s.serialize_to_tokio_async_write(write)) + } + RdfSerializerKind::TriG(s) => { + ToTokioAsyncWriteQuadWriterKind::TriG(s.serialize_to_tokio_async_write(write)) + } + RdfSerializerKind::Turtle(s) => { + ToTokioAsyncWriteQuadWriterKind::Turtle(s.serialize_to_tokio_async_write(write)) + } }, } } @@ -202,17 +260,14 @@ impl From for RdfSerializer { /// use oxrdfio::{RdfFormat, RdfSerializer}; /// use oxrdf::{Quad, NamedNode}; /// -/// let mut buffer = Vec::new(); -/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(&mut buffer); +/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(Vec::new()); /// writer.write_quad(&Quad { /// subject: NamedNode::new("http://example.com/s")?.into(), /// predicate: NamedNode::new("http://example.com/p")?, /// object: NamedNode::new("http://example.com/o")?.into(), /// graph_name: NamedNode::new("http://example.com/g")?.into(), /// })?; -/// writer.finish()?; -/// -/// assert_eq!(buffer.as_slice(), " .\n".as_bytes()); +/// assert_eq!(writer.finish()?, b" .\n"); /// # Result::<_,Box>::Ok(()) /// ``` #[must_use] @@ -277,17 +332,14 @@ impl ToWriteQuadWriter { /// /// # #[tokio::main(flavor = "current_thread")] /// # async fn main() -> std::io::Result<()> { -/// let mut buffer = Vec::new(); -/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_tokio_async_write(&mut buffer); +/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_tokio_async_write(Vec::new()); /// writer.write_quad(&Quad { /// subject: NamedNode::new_unchecked("http://example.com/s").into(), /// predicate: NamedNode::new_unchecked("http://example.com/p"), /// object: NamedNode::new_unchecked("http://example.com/o").into(), /// graph_name: NamedNode::new_unchecked("http://example.com/g").into() /// }).await?; -/// writer.finish().await?; -/// -/// assert_eq!(buffer.as_slice(), " .\n".as_bytes()); +/// assert_eq!(writer.finish().await?, " .\n"); /// # Ok(()) /// # } /// ``` diff --git a/lib/oxrdfxml/Cargo.toml b/lib/oxrdfxml/Cargo.toml index 2906ac0e..6546b809 100644 --- a/lib/oxrdfxml/Cargo.toml +++ b/lib/oxrdfxml/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "oxrdfxml" -version = "0.1.0-alpha.1" +version = "0.1.0-alpha.2-dev" authors = ["Tpt "] license = "MIT OR Apache-2.0" readme = "README.md" @@ -18,7 +18,7 @@ default = [] async-tokio = ["dep:tokio", "quick-xml/async-tokio"] [dependencies] -oxrdf = { version = "0.2.0-alpha.1", path = "../oxrdf" } +oxrdf = { version = "0.2.0-alpha.2-dev", path = "../oxrdf" } oxilangtag = "0.1" oxiri = "0.2.3-alpha.1" quick-xml = ">=0.29, <0.32" diff --git a/lib/oxrdfxml/src/serializer.rs b/lib/oxrdfxml/src/serializer.rs index a19856f0..742b026e 100644 --- a/lib/oxrdfxml/src/serializer.rs +++ b/lib/oxrdfxml/src/serializer.rs @@ -1,8 +1,11 @@ use crate::utils::*; -use oxrdf::{Subject, SubjectRef, TermRef, TripleRef}; -use quick_xml::events::*; +use oxiri::{Iri, IriParseError}; +use oxrdf::vocab::rdf; +use oxrdf::{NamedNodeRef, Subject, SubjectRef, TermRef, TripleRef}; +use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event}; use quick_xml::Writer; use std::borrow::Cow; +use std::collections::BTreeMap; use std::io; use std::io::Write; use std::sync::Arc; @@ -12,30 +15,52 @@ use tokio::io::AsyncWrite; /// A [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) serializer. /// /// ``` -/// use oxrdf::{NamedNodeRef, TripleRef}; +/// use oxrdf::{LiteralRef, NamedNodeRef, TripleRef}; /// use oxrdfxml::RdfXmlSerializer; /// -/// let mut writer = RdfXmlSerializer::new().serialize_to_write(Vec::new()); +/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_write(Vec::new()); /// writer.write_triple(TripleRef::new( /// NamedNodeRef::new("http://example.com#me")?, /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, /// NamedNodeRef::new("http://schema.org/Person")?, /// ))?; +/// writer.write_triple(TripleRef::new( +/// NamedNodeRef::new("http://example.com#me")?, +/// NamedNodeRef::new("http://schema.org/name")?, +/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"), +/// ))?; /// assert_eq!( -/// b"\n\n\t\n\t\t\n\t\n", +/// b"\n\n\t\n\t\tFoo Bar\n\t\n", /// writer.finish()?.as_slice() /// ); /// # Result::<_,Box>::Ok(()) /// ``` #[derive(Default)] #[must_use] -pub struct RdfXmlSerializer; +pub struct RdfXmlSerializer { + prefixes: BTreeMap, +} impl RdfXmlSerializer { /// Builds a new [`RdfXmlSerializer`]. #[inline] pub fn new() -> Self { - Self + Self { + prefixes: BTreeMap::new(), + } + } + + #[inline] + pub fn with_prefix( + mut self, + prefix_name: impl Into, + prefix_iri: impl Into, + ) -> Result { + self.prefixes.insert( + Iri::parse(prefix_iri.into())?.into_inner(), + prefix_name.into(), + ); + Ok(self) } /// Writes a RDF/XML file to a [`Write`] implementation. @@ -43,17 +68,22 @@ impl RdfXmlSerializer { /// This writer does unbuffered writes. /// /// ``` - /// use oxrdf::{NamedNodeRef, TripleRef}; + /// use oxrdf::{LiteralRef, NamedNodeRef, TripleRef}; /// use oxrdfxml::RdfXmlSerializer; /// - /// let mut writer = RdfXmlSerializer::new().serialize_to_write(Vec::new()); + /// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_write(Vec::new()); /// writer.write_triple(TripleRef::new( /// NamedNodeRef::new("http://example.com#me")?, /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, /// NamedNodeRef::new("http://schema.org/Person")?, /// ))?; + /// writer.write_triple(TripleRef::new( + /// NamedNodeRef::new("http://example.com#me")?, + /// NamedNodeRef::new("http://schema.org/name")?, + /// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"), + /// ))?; /// assert_eq!( - /// b"\n\n\t\n\t\t\n\t\n", + /// b"\n\n\t\n\t\tFoo Bar\n\t\n", /// writer.finish()?.as_slice() /// ); /// # Result::<_,Box>::Ok(()) @@ -62,9 +92,7 @@ impl RdfXmlSerializer { pub fn serialize_to_write(self, write: W) -> ToWriteRdfXmlWriter { ToWriteRdfXmlWriter { writer: Writer::new_with_indent(write, b'\t', 1), - inner: InnerRdfXmlWriter { - current_subject: None, - }, + inner: self.inner_writer(), } } @@ -73,19 +101,24 @@ impl RdfXmlSerializer { /// This writer does unbuffered writes. /// /// ``` - /// use oxrdf::{NamedNodeRef, TripleRef}; + /// use oxrdf::{NamedNodeRef, TripleRef, LiteralRef}; /// use oxrdfxml::RdfXmlSerializer; /// /// # #[tokio::main(flavor = "current_thread")] - /// # async fn main() -> std::io::Result<()> { - /// let mut writer = RdfXmlSerializer::new().serialize_to_tokio_async_write(Vec::new()); + /// # async fn main() -> Result<(), Box> { + /// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_tokio_async_write(Vec::new()); /// writer.write_triple(TripleRef::new( - /// NamedNodeRef::new_unchecked("http://example.com#me"), - /// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), - /// NamedNodeRef::new_unchecked("http://schema.org/Person"), + /// NamedNodeRef::new("http://example.com#me")?, + /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, + /// NamedNodeRef::new("http://schema.org/Person")?, + /// )).await?; + /// writer.write_triple(TripleRef::new( + /// NamedNodeRef::new("http://example.com#me")?, + /// NamedNodeRef::new("http://schema.org/name")?, + /// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"), /// )).await?; /// assert_eq!( - /// b"\n\n\t\n\t\t\n\t\n", + /// b"\n\n\t\n\t\tFoo Bar\n\t\n", /// writer.finish().await?.as_slice() /// ); /// # Ok(()) @@ -99,9 +132,19 @@ impl RdfXmlSerializer { ) -> ToTokioAsyncWriteRdfXmlWriter { ToTokioAsyncWriteRdfXmlWriter { writer: Writer::new_with_indent(write, b'\t', 1), - inner: InnerRdfXmlWriter { - current_subject: None, - }, + inner: self.inner_writer(), + } + } + + fn inner_writer(mut self) -> InnerRdfXmlWriter { + self.prefixes.insert( + "http://www.w3.org/1999/02/22-rdf-syntax-ns#".into(), + "rdf".into(), + ); + InnerRdfXmlWriter { + current_subject: None, + current_resource_tag: None, + prefixes: self.prefixes, } } } @@ -109,17 +152,22 @@ impl RdfXmlSerializer { /// Writes a RDF/XML file to a [`Write`] implementation. Can be built using [`RdfXmlSerializer::serialize_to_write`]. /// /// ``` -/// use oxrdf::{NamedNodeRef, TripleRef}; +/// use oxrdf::{LiteralRef, NamedNodeRef, TripleRef}; /// use oxrdfxml::RdfXmlSerializer; /// -/// let mut writer = RdfXmlSerializer::new().serialize_to_write(Vec::new()); +/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_write(Vec::new()); /// writer.write_triple(TripleRef::new( /// NamedNodeRef::new("http://example.com#me")?, /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, /// NamedNodeRef::new("http://schema.org/Person")?, /// ))?; +/// writer.write_triple(TripleRef::new( +/// NamedNodeRef::new("http://example.com#me")?, +/// NamedNodeRef::new("http://schema.org/name")?, +/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"), +/// ))?; /// assert_eq!( -/// b"\n\n\t\n\t\t\n\t\n", +/// b"\n\n\t\n\t\tFoo Bar\n\t\n", /// writer.finish()?.as_slice() /// ); /// # Result::<_,Box>::Ok(()) @@ -158,19 +206,24 @@ impl ToWriteRdfXmlWriter { /// Writes a RDF/XML file to a [`AsyncWrite`] implementation. Can be built using [`RdfXmlSerializer::serialize_to_tokio_async_write`]. /// /// ``` -/// use oxrdf::{NamedNodeRef, TripleRef}; +/// use oxrdf::{NamedNodeRef, TripleRef, LiteralRef}; /// use oxrdfxml::RdfXmlSerializer; /// /// # #[tokio::main(flavor = "current_thread")] -/// # async fn main() -> std::io::Result<()> { -/// let mut writer = RdfXmlSerializer::new().serialize_to_tokio_async_write(Vec::new()); +/// # async fn main() -> Result<(), Box> { +/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_tokio_async_write(Vec::new()); /// writer.write_triple(TripleRef::new( -/// NamedNodeRef::new_unchecked("http://example.com#me"), -/// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), -/// NamedNodeRef::new_unchecked("http://schema.org/Person"), +/// NamedNodeRef::new("http://example.com#me")?, +/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, +/// NamedNodeRef::new("http://schema.org/Person")?, +/// )).await?; +/// writer.write_triple(TripleRef::new( +/// NamedNodeRef::new("http://example.com#me")?, +/// NamedNodeRef::new("http://schema.org/name")?, +/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"), /// )).await?; /// assert_eq!( -/// b"\n\n\t\n\t\t\n\t\n", +/// b"\n\n\t\n\t\tFoo Bar\n\t\n", /// writer.finish().await?.as_slice() /// ); /// # Ok(()) @@ -214,6 +267,8 @@ impl ToTokioAsyncWriteRdfXmlWriter { pub struct InnerRdfXmlWriter { current_subject: Option, + current_resource_tag: Option, + prefixes: BTreeMap, } impl InnerRdfXmlWriter { @@ -224,17 +279,36 @@ impl InnerRdfXmlWriter { output: &mut Vec>, ) -> io::Result<()> { if self.current_subject.is_none() { - Self::write_start(output); + self.write_start(output); } let triple = t.into(); // We open a new rdf:Description if useful if self.current_subject.as_ref().map(Subject::as_ref) != Some(triple.subject) { if self.current_subject.is_some() { - output.push(Event::End(BytesEnd::new("rdf:Description"))); + output.push(Event::End( + self.current_resource_tag + .take() + .map_or_else(|| BytesEnd::new("rdf:Description"), BytesEnd::new), + )); } + self.current_subject = Some(triple.subject.into_owned()); - let mut description_open = BytesStart::new("rdf:Description"); + let (mut description_open, with_type_tag) = if triple.predicate == rdf::TYPE { + if let TermRef::NamedNode(t) = triple.object { + let (prop_qname, prop_xmlns) = self.uri_to_qname_and_xmlns(t); + let mut description_open = BytesStart::new(prop_qname.clone()); + if let Some(prop_xmlns) = prop_xmlns { + description_open.push_attribute(prop_xmlns); + } + self.current_resource_tag = Some(prop_qname.into_owned()); + (description_open, true) + } else { + (BytesStart::new("rdf:Description"), false) + } + } else { + (BytesStart::new("rdf:Description"), false) + }; match triple.subject { SubjectRef::NamedNode(node) => { description_open.push_attribute(("rdf:about", node.as_str())) @@ -250,20 +324,12 @@ impl InnerRdfXmlWriter { } } output.push(Event::Start(description_open)); + if with_type_tag { + return Ok(()); // No need for a value + } } - self.current_subject = Some(triple.subject.into_owned()); - let (prop_prefix, prop_value) = split_iri(triple.predicate.as_str()); - let (prop_qname, prop_xmlns) = - if prop_prefix == "http://www.w3.org/1999/02/22-rdf-syntax-ns#" { - (Cow::Owned(format!("rdf:{prop_value}")), None) - } else if prop_prefix == "http://www.w3.org/2000/xmlns/" { - (Cow::Owned(format!("xmlns:{prop_value}")), None) - } else if prop_value.is_empty() { - (Cow::Borrowed("p:"), Some(("xmlns:p", prop_prefix))) - } else { - (Cow::Borrowed(prop_value), Some(("xmlns", prop_prefix))) - }; + let (prop_qname, prop_xmlns) = self.uri_to_qname_and_xmlns(triple.predicate); let mut property_open = BytesStart::new(prop_qname.clone()); if let Some(prop_xmlns) = prop_xmlns { property_open.push_attribute(prop_xmlns); @@ -302,29 +368,58 @@ impl InnerRdfXmlWriter { Ok(()) } - fn write_start(output: &mut Vec>) { + fn write_start(&self, output: &mut Vec>) { output.push(Event::Decl(BytesDecl::new("1.0", Some("UTF-8"), None))); let mut rdf_open = BytesStart::new("rdf:RDF"); - rdf_open.push_attribute(("xmlns:rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#")); + for (prefix_value, prefix_name) in &self.prefixes { + rdf_open.push_attribute(( + format!("xmlns:{prefix_name}").as_str(), + prefix_value.as_str(), + )); + } output.push(Event::Start(rdf_open)) } - fn finish(&self, output: &mut Vec>) { + fn finish(&mut self, output: &mut Vec>) { if self.current_subject.is_some() { - output.push(Event::End(BytesEnd::new("rdf:Description"))); + output.push(Event::End( + self.current_resource_tag + .take() + .map_or_else(|| BytesEnd::new("rdf:Description"), BytesEnd::new), + )); } else { - Self::write_start(output); + self.write_start(output); } output.push(Event::End(BytesEnd::new("rdf:RDF"))); } + + fn uri_to_qname_and_xmlns<'a>( + &self, + uri: NamedNodeRef<'a>, + ) -> (Cow<'a, str>, Option<(&'a str, &'a str)>) { + let (prop_prefix, prop_value) = split_iri(uri.as_str()); + if let Some(prop_prefix) = self.prefixes.get(prop_prefix) { + ( + if prop_prefix.is_empty() { + Cow::Borrowed(prop_value) + } else { + Cow::Owned(format!("{prop_prefix}:{prop_value}")) + }, + None, + ) + } else if prop_prefix == "http://www.w3.org/2000/xmlns/" { + (Cow::Owned(format!("xmlns:{prop_value}")), None) + } else if prop_value.is_empty() { + (Cow::Borrowed("p:"), Some(("xmlns:p", prop_prefix))) + } else { + (Cow::Borrowed(prop_value), Some(("xmlns", prop_prefix))) + } + } } fn map_err(error: quick_xml::Error) -> io::Error { if let quick_xml::Error::Io(error) = error { - match Arc::try_unwrap(error) { - Ok(error) => error, - Err(error) => io::Error::new(error.kind(), error), - } + Arc::try_unwrap(error).unwrap_or_else(|error| io::Error::new(error.kind(), error)) } else { io::Error::new(io::ErrorKind::Other, error) } diff --git a/lib/oxttl/Cargo.toml b/lib/oxttl/Cargo.toml index 7f7428f6..25d87b48 100644 --- a/lib/oxttl/Cargo.toml +++ b/lib/oxttl/Cargo.toml @@ -20,7 +20,7 @@ async-tokio = ["dep:tokio"] [dependencies] memchr = "2.5" -oxrdf = { version = "0.2.0-alpha.1", path = "../oxrdf" } +oxrdf = { version = "0.2.0-alpha.2-dev", path = "../oxrdf" } oxiri = "0.2.3-alpha.1" oxilangtag = "0.1" tokio = { version = "1.29", optional = true, features = ["io-util"] } diff --git a/lib/oxttl/src/lexer.rs b/lib/oxttl/src/lexer.rs index d11bd2fe..1eac849e 100644 --- a/lib/oxttl/src/lexer.rs +++ b/lib/oxttl/src/lexer.rs @@ -49,14 +49,14 @@ pub struct N3Lexer { // TODO: simplify by not giving is_end and fail with an "unexpected eof" is none is returned when is_end=true? impl TokenRecognizer for N3Lexer { - type Token<'a> = N3Token<'a>; type Options = N3LexerOptions; + type Token<'a> = N3Token<'a>; fn recognize_next_token<'a>( &mut self, data: &'a [u8], is_ending: bool, - options: &Self::Options, + options: &N3LexerOptions, ) -> Option<(usize, Result, TokenRecognizerError>)> { match *data.first()? { b'<' => match *data.get(1)? { @@ -914,12 +914,12 @@ impl N3Lexer { } // [158s] PN_CHARS_U ::= PN_CHARS_BASE | '_' | ':' - fn is_possible_pn_chars_u(c: char) -> bool { + pub(super) fn is_possible_pn_chars_u(c: char) -> bool { Self::is_possible_pn_chars_base(c) || c == '_' } // [160s] PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040] - fn is_possible_pn_chars(c: char) -> bool { + pub(crate) fn is_possible_pn_chars(c: char) -> bool { Self::is_possible_pn_chars_u(c) || matches!(c, '-' | '0'..='9' | '\u{00B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}') diff --git a/lib/oxttl/src/line_formats.rs b/lib/oxttl/src/line_formats.rs index e522bd53..5932f7a2 100644 --- a/lib/oxttl/src/line_formats.rs +++ b/lib/oxttl/src/line_formats.rs @@ -39,9 +39,9 @@ enum NQuadsState { } impl RuleRecognizer for NQuadsRecognizer { - type TokenRecognizer = N3Lexer; - type Output = Quad; type Context = NQuadsRecognizerContext; + type Output = Quad; + type TokenRecognizer = N3Lexer; fn error_recovery_state(mut self) -> Self { self.stack.clear(); diff --git a/lib/oxttl/src/n3.rs b/lib/oxttl/src/n3.rs index 8b70a01e..59ba9cf3 100644 --- a/lib/oxttl/src/n3.rs +++ b/lib/oxttl/src/n3.rs @@ -723,9 +723,9 @@ struct N3RecognizerContext { } impl RuleRecognizer for N3Recognizer { - type TokenRecognizer = N3Lexer; - type Output = N3Quad; type Context = N3RecognizerContext; + type Output = N3Quad; + type TokenRecognizer = N3Lexer; fn error_recovery_state(mut self) -> Self { self.stack.clear(); diff --git a/lib/oxttl/src/nquads.rs b/lib/oxttl/src/nquads.rs index f5108828..0ae22119 100644 --- a/lib/oxttl/src/nquads.rs +++ b/lib/oxttl/src/nquads.rs @@ -441,7 +441,7 @@ impl NQuadsSerializer { /// # Result::<_,Box>::Ok(()) /// ``` #[allow(clippy::unused_self)] - pub fn serialize(&self) -> LowLevelNQuadsWriter { + pub fn serialize(self) -> LowLevelNQuadsWriter { LowLevelNQuadsWriter } } diff --git a/lib/oxttl/src/ntriples.rs b/lib/oxttl/src/ntriples.rs index 4e0f7d7c..686907dc 100644 --- a/lib/oxttl/src/ntriples.rs +++ b/lib/oxttl/src/ntriples.rs @@ -437,7 +437,7 @@ impl NTriplesSerializer { /// # Result::<_,Box>::Ok(()) /// ``` #[allow(clippy::unused_self)] - pub fn serialize(&self) -> LowLevelNTriplesWriter { + pub fn serialize(self) -> LowLevelNTriplesWriter { LowLevelNTriplesWriter } } diff --git a/lib/oxttl/src/terse.rs b/lib/oxttl/src/terse.rs index c233c735..ebeff436 100644 --- a/lib/oxttl/src/terse.rs +++ b/lib/oxttl/src/terse.rs @@ -35,9 +35,9 @@ impl TriGRecognizerContext { } impl RuleRecognizer for TriGRecognizer { - type TokenRecognizer = N3Lexer; - type Output = Quad; type Context = TriGRecognizerContext; + type Output = Quad; + type TokenRecognizer = N3Lexer; fn error_recovery_state(mut self) -> Self { self.stack.clear(); diff --git a/lib/oxttl/src/trig.rs b/lib/oxttl/src/trig.rs index a27dd24d..aca75110 100644 --- a/lib/oxttl/src/trig.rs +++ b/lib/oxttl/src/trig.rs @@ -1,15 +1,18 @@ //! A [TriG](https://www.w3.org/TR/trig/) streaming parser implemented by [`TriGParser`] //! and a serializer implemented by [`TriGSerializer`]. +use crate::lexer::N3Lexer; use crate::terse::TriGRecognizer; #[cfg(feature = "async-tokio")] use crate::toolkit::FromTokioAsyncReadIterator; use crate::toolkit::{FromReadIterator, ParseError, Parser, SyntaxError}; use oxiri::{Iri, IriParseError}; -use oxrdf::vocab::xsd; -use oxrdf::{GraphName, NamedNode, Quad, QuadRef, Subject, TermRef}; +use oxrdf::vocab::{rdf, xsd}; +use oxrdf::{ + GraphName, GraphNameRef, LiteralRef, NamedNode, NamedNodeRef, Quad, QuadRef, Subject, TermRef, +}; use std::collections::hash_map::Iter; -use std::collections::HashMap; +use std::collections::{BTreeMap, HashMap}; use std::fmt; use std::io::{self, Read, Write}; #[cfg(feature = "async-tokio")] @@ -582,7 +585,9 @@ impl<'a> Iterator for TriGPrefixesIter<'a> { /// use oxrdf::{NamedNodeRef, QuadRef}; /// use oxttl::TriGSerializer; /// -/// let mut writer = TriGSerializer::new().serialize_to_write(Vec::new()); +/// let mut writer = TriGSerializer::new() +/// .with_prefix("schema", "http://schema.org/")? +/// .serialize_to_write(Vec::new()); /// writer.write_quad(QuadRef::new( /// NamedNodeRef::new("http://example.com#me")?, /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, @@ -590,20 +595,37 @@ impl<'a> Iterator for TriGPrefixesIter<'a> { /// NamedNodeRef::new("http://example.com")?, /// ))?; /// assert_eq!( -/// b" {\n\t .\n}\n", +/// b"@prefix schema: .\n {\n\t a schema:Person .\n}\n", /// writer.finish()?.as_slice() /// ); /// # Result::<_,Box>::Ok(()) /// ``` #[derive(Default)] #[must_use] -pub struct TriGSerializer; +pub struct TriGSerializer { + prefixes: BTreeMap, +} impl TriGSerializer { /// Builds a new [`TriGSerializer`]. #[inline] pub fn new() -> Self { - Self + Self { + prefixes: BTreeMap::new(), + } + } + + #[inline] + pub fn with_prefix( + mut self, + prefix_name: impl Into, + prefix_iri: impl Into, + ) -> Result { + self.prefixes.insert( + Iri::parse(prefix_iri.into())?.into_inner(), + prefix_name.into(), + ); + Ok(self) } /// Writes a TriG file to a [`Write`] implementation. @@ -612,7 +634,9 @@ impl TriGSerializer { /// use oxrdf::{NamedNodeRef, QuadRef}; /// use oxttl::TriGSerializer; /// - /// let mut writer = TriGSerializer::new().serialize_to_write(Vec::new()); + /// let mut writer = TriGSerializer::new() + /// .with_prefix("schema", "http://schema.org/")? + /// .serialize_to_write(Vec::new()); /// writer.write_quad(QuadRef::new( /// NamedNodeRef::new("http://example.com#me")?, /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, @@ -620,7 +644,7 @@ impl TriGSerializer { /// NamedNodeRef::new("http://example.com")?, /// ))?; /// assert_eq!( - /// b" {\n\t .\n}\n", + /// b"@prefix schema: .\n {\n\t a schema:Person .\n}\n", /// writer.finish()?.as_slice() /// ); /// # Result::<_,Box>::Ok(()) @@ -639,16 +663,20 @@ impl TriGSerializer { /// use oxttl::TriGSerializer; /// /// # #[tokio::main(flavor = "current_thread")] - /// # async fn main() -> std::io::Result<()> { - /// let mut writer = TriGSerializer::new().serialize_to_tokio_async_write(Vec::new()); - /// writer.write_quad(QuadRef::new( - /// NamedNodeRef::new_unchecked("http://example.com#me"), - /// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), - /// NamedNodeRef::new_unchecked("http://schema.org/Person"), - /// NamedNodeRef::new_unchecked("http://example.com"), - /// )).await?; + /// # async fn main() -> Result<(), Box> { + /// let mut writer = TriGSerializer::new() + /// .with_prefix("schema", "http://schema.org/")? + /// .serialize_to_tokio_async_write(Vec::new()); + /// writer + /// .write_quad(QuadRef::new( + /// NamedNodeRef::new_unchecked("http://example.com#me"), + /// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), + /// NamedNodeRef::new_unchecked("http://schema.org/Person"), + /// NamedNodeRef::new_unchecked("http://example.com"), + /// )) + /// .await?; /// assert_eq!( - /// b" {\n\t .\n}\n", + /// b"@prefix schema: .\n {\n\t a schema:Person .\n}\n", /// writer.finish().await?.as_slice() /// ); /// # Ok(()) @@ -673,23 +701,29 @@ impl TriGSerializer { /// use oxttl::TriGSerializer; /// /// let mut buf = Vec::new(); - /// let mut writer = TriGSerializer::new().serialize(); - /// writer.write_quad(QuadRef::new( - /// NamedNodeRef::new("http://example.com#me")?, - /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, - /// NamedNodeRef::new("http://schema.org/Person")?, - /// NamedNodeRef::new("http://example.com")?, - /// ), &mut buf)?; + /// let mut writer = TriGSerializer::new() + /// .with_prefix("schema", "http://schema.org/")? + /// .serialize(); + /// writer.write_quad( + /// QuadRef::new( + /// NamedNodeRef::new("http://example.com#me")?, + /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, + /// NamedNodeRef::new("http://schema.org/Person")?, + /// NamedNodeRef::new("http://example.com")?, + /// ), + /// &mut buf, + /// )?; /// writer.finish(&mut buf)?; /// assert_eq!( - /// b" {\n\t .\n}\n", + /// b"@prefix schema: .\n {\n\t a schema:Person .\n}\n", /// buf.as_slice() /// ); /// # Result::<_,Box>::Ok(()) /// ``` - #[allow(clippy::unused_self)] - pub fn serialize(&self) -> LowLevelTriGWriter { + pub fn serialize(self) -> LowLevelTriGWriter { LowLevelTriGWriter { + prefixes: self.prefixes, + prelude_written: false, current_graph_name: GraphName::DefaultGraph, current_subject_predicate: None, } @@ -702,7 +736,9 @@ impl TriGSerializer { /// use oxrdf::{NamedNodeRef, QuadRef}; /// use oxttl::TriGSerializer; /// -/// let mut writer = TriGSerializer::new().serialize_to_write(Vec::new()); +/// let mut writer = TriGSerializer::new() +/// .with_prefix("schema", "http://schema.org/")? +/// .serialize_to_write(Vec::new()); /// writer.write_quad(QuadRef::new( /// NamedNodeRef::new("http://example.com#me")?, /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, @@ -710,7 +746,7 @@ impl TriGSerializer { /// NamedNodeRef::new("http://example.com")?, /// ))?; /// assert_eq!( -/// b" {\n\t .\n}\n", +/// b"@prefix schema: .\n {\n\t a schema:Person .\n}\n", /// writer.finish()?.as_slice() /// ); /// # Result::<_,Box>::Ok(()) @@ -741,16 +777,20 @@ impl ToWriteTriGWriter { /// use oxttl::TriGSerializer; /// /// # #[tokio::main(flavor = "current_thread")] -/// # async fn main() -> std::io::Result<()> { -/// let mut writer = TriGSerializer::new().serialize_to_tokio_async_write(Vec::new()); -/// writer.write_quad(QuadRef::new( -/// NamedNodeRef::new_unchecked("http://example.com#me"), -/// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), -/// NamedNodeRef::new_unchecked("http://schema.org/Person"), -/// NamedNodeRef::new_unchecked("http://example.com"), -/// )).await?; +/// # async fn main() -> Result<(), Box> { +/// let mut writer = TriGSerializer::new() +/// .with_prefix("schema", "http://schema.org/")? +/// .serialize_to_tokio_async_write(Vec::new()); +/// writer +/// .write_quad(QuadRef::new( +/// NamedNodeRef::new_unchecked("http://example.com#me"), +/// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), +/// NamedNodeRef::new_unchecked("http://schema.org/Person"), +/// NamedNodeRef::new_unchecked("http://example.com"), +/// )) +/// .await?; /// assert_eq!( -/// b" {\n\t .\n}\n", +/// b"@prefix schema: .\n {\n\t a schema:Person .\n}\n", /// writer.finish().await?.as_slice() /// ); /// # Ok(()) @@ -790,21 +830,28 @@ impl ToTokioAsyncWriteTriGWriter { /// use oxttl::TriGSerializer; /// /// let mut buf = Vec::new(); -/// let mut writer = TriGSerializer::new().serialize(); -/// writer.write_quad(QuadRef::new( -/// NamedNodeRef::new("http://example.com#me")?, -/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, -/// NamedNodeRef::new("http://schema.org/Person")?, -/// NamedNodeRef::new("http://example.com")?, -/// ), &mut buf)?; +/// let mut writer = TriGSerializer::new() +/// .with_prefix("schema", "http://schema.org/")? +/// .serialize(); +/// writer.write_quad( +/// QuadRef::new( +/// NamedNodeRef::new("http://example.com#me")?, +/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, +/// NamedNodeRef::new("http://schema.org/Person")?, +/// NamedNodeRef::new("http://example.com")?, +/// ), +/// &mut buf, +/// )?; /// writer.finish(&mut buf)?; /// assert_eq!( -/// b" {\n\t .\n}\n", +/// b"@prefix schema: .\n {\n\t a schema:Person .\n}\n", /// buf.as_slice() /// ); /// # Result::<_,Box>::Ok(()) /// ``` pub struct LowLevelTriGWriter { + prefixes: BTreeMap, + prelude_written: bool, current_graph_name: GraphName, current_subject_predicate: Option<(Subject, NamedNode)>, } @@ -816,6 +863,12 @@ impl LowLevelTriGWriter { q: impl Into>, mut write: impl Write, ) -> io::Result<()> { + if !self.prelude_written { + self.prelude_written = true; + for (prefix_iri, prefix_name) in &self.prefixes { + writeln!(write, "@prefix {prefix_name}: <{prefix_iri}> .")?; + } + } let q = q.into(); if q.graph_name == self.current_graph_name.as_ref() { if let Some((current_subject, current_predicate)) = @@ -824,7 +877,7 @@ impl LowLevelTriGWriter { if q.subject == current_subject.as_ref() { if q.predicate == current_predicate { self.current_subject_predicate = Some((current_subject, current_predicate)); - write!(write, " , {}", TurtleTerm(q.object)) + write!(write, " , {}", self.term(q.object)) } else { self.current_subject_predicate = Some((current_subject, q.predicate.into_owned())); @@ -832,7 +885,12 @@ impl LowLevelTriGWriter { if !self.current_graph_name.is_default_graph() { write!(write, "\t")?; } - write!(write, "\t{} {}", q.predicate, TurtleTerm(q.object)) + write!( + write, + "\t{} {}", + self.predicate(q.predicate), + self.term(q.object) + ) } } else { self.current_subject_predicate = @@ -844,9 +902,9 @@ impl LowLevelTriGWriter { write!( write, "{} {} {}", - TurtleTerm(q.subject.into()), - q.predicate, - TurtleTerm(q.object) + self.term(q.subject), + self.predicate(q.predicate), + self.term(q.object) ) } } else { @@ -858,9 +916,9 @@ impl LowLevelTriGWriter { write!( write, "{} {} {}", - TurtleTerm(q.subject.into()), - q.predicate, - TurtleTerm(q.object) + self.term(q.subject), + self.predicate(q.predicate), + self.term(q.object) ) } } else { @@ -873,20 +931,42 @@ impl LowLevelTriGWriter { self.current_graph_name = q.graph_name.into_owned(); self.current_subject_predicate = Some((q.subject.into_owned(), q.predicate.into_owned())); - if !self.current_graph_name.is_default_graph() { - writeln!(write, "{} {{", q.graph_name)?; - write!(write, "\t")?; + match self.current_graph_name.as_ref() { + GraphNameRef::NamedNode(g) => { + writeln!(write, "{} {{", self.term(g))?; + write!(write, "\t")?; + } + GraphNameRef::BlankNode(g) => { + writeln!(write, "{} {{", self.term(g))?; + write!(write, "\t")?; + } + GraphNameRef::DefaultGraph => (), } + write!( write, "{} {} {}", - TurtleTerm(q.subject.into()), - q.predicate, - TurtleTerm(q.object) + self.term(q.subject), + self.predicate(q.predicate), + self.term(q.object) ) } } + fn predicate<'a>(&'a self, named_node: impl Into>) -> TurtlePredicate<'a> { + TurtlePredicate { + named_node: named_node.into(), + prefixes: &self.prefixes, + } + } + + fn term<'a>(&'a self, term: impl Into>) -> TurtleTerm<'a> { + TurtleTerm { + term: term.into(), + prefixes: &self.prefixes, + } + } + /// Finishes to write the file. pub fn finish(&mut self, mut write: impl Write) -> io::Result<()> { if self.current_subject_predicate.is_some() { @@ -899,12 +979,43 @@ impl LowLevelTriGWriter { } } -struct TurtleTerm<'a>(TermRef<'a>); +struct TurtlePredicate<'a> { + named_node: NamedNodeRef<'a>, + prefixes: &'a BTreeMap, +} + +impl<'a> fmt::Display for TurtlePredicate<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.named_node == rdf::TYPE { + write!(f, "a") + } else { + TurtleTerm { + term: self.named_node.into(), + prefixes: self.prefixes, + } + .fmt(f) + } + } +} + +struct TurtleTerm<'a> { + term: TermRef<'a>, + prefixes: &'a BTreeMap, +} impl<'a> fmt::Display for TurtleTerm<'a> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self.0 { - TermRef::NamedNode(v) => write!(f, "{v}"), + match self.term { + TermRef::NamedNode(v) => { + for (prefix_iri, prefix_name) in self.prefixes { + if let Some(local_name) = v.as_str().strip_prefix(prefix_iri) { + if let Some(escaped_local_name) = escape_local_name(local_name) { + return write!(f, "{prefix_name}:{escaped_local_name}"); + } + } + } + write!(f, "{v}") + } TermRef::BlankNode(v) => write!(f, "{v}"), TermRef::Literal(v) => { let value = v.value(); @@ -917,8 +1028,18 @@ impl<'a> fmt::Display for TurtleTerm<'a> { }; if inline { write!(f, "{value}") - } else { + } else if v.is_plain() { write!(f, "{v}") + } else { + write!( + f, + "{}^^{}", + LiteralRef::new_simple_literal(v.value()), + TurtleTerm { + term: v.datatype().into(), + prefixes: self.prefixes + } + ) } } #[cfg(feature = "rdf-star")] @@ -926,9 +1047,18 @@ impl<'a> fmt::Display for TurtleTerm<'a> { write!( f, "<< {} {} {} >>", - TurtleTerm(t.subject.as_ref().into()), - t.predicate, - TurtleTerm(t.object.as_ref()) + TurtleTerm { + term: t.subject.as_ref().into(), + prefixes: self.prefixes + }, + TurtleTerm { + term: t.predicate.as_ref().into(), + prefixes: self.prefixes + }, + TurtleTerm { + term: t.object.as_ref(), + prefixes: self.prefixes + } ) } } @@ -1004,6 +1134,61 @@ fn is_turtle_double(value: &str) -> bool { (with_before || with_after) && !value.is_empty() && value.iter().all(u8::is_ascii_digit) } +fn escape_local_name(value: &str) -> Option { + // TODO: PLX + // [168s] PN_LOCAL ::= (PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))? + let mut output = String::with_capacity(value.len()); + let mut chars = value.chars(); + let first = chars.next()?; + if N3Lexer::is_possible_pn_chars_u(first) || first == ':' || first.is_ascii_digit() { + output.push(first); + } else if can_be_escaped_in_local_name(first) { + output.push('\\'); + output.push(first); + } else { + return None; + } + + while let Some(c) = chars.next() { + if N3Lexer::is_possible_pn_chars(c) || c == ':' || (c == '.' && !chars.as_str().is_empty()) + { + output.push(c); + } else if can_be_escaped_in_local_name(c) { + output.push('\\'); + output.push(c); + } else { + return None; + } + } + + Some(output) +} + +fn can_be_escaped_in_local_name(c: char) -> bool { + matches!( + c, + '_' | '~' + | '.' + | '-' + | '!' + | '$' + | '&' + | '\'' + | '(' + | ')' + | '*' + | '+' + | ',' + | ';' + | '=' + | '/' + | '?' + | '#' + | '@' + | '%' + ) +} + #[cfg(test)] mod tests { #![allow(clippy::panic_in_result_fn)] @@ -1014,11 +1199,20 @@ mod tests { #[test] fn test_write() -> io::Result<()> { - let mut writer = TriGSerializer::new().serialize_to_write(Vec::new()); + let mut writer = TriGSerializer::new() + .with_prefix("ex", "http://example.com/") + .unwrap() + .serialize_to_write(Vec::new()); writer.write_quad(QuadRef::new( NamedNodeRef::new_unchecked("http://example.com/s"), NamedNodeRef::new_unchecked("http://example.com/p"), - NamedNodeRef::new_unchecked("http://example.com/o"), + NamedNodeRef::new_unchecked("http://example.com/o."), + NamedNodeRef::new_unchecked("http://example.com/g"), + ))?; + writer.write_quad(QuadRef::new( + NamedNodeRef::new_unchecked("http://example.com/s"), + NamedNodeRef::new_unchecked("http://example.com/p"), + NamedNodeRef::new_unchecked("http://example.com/o{o}"), NamedNodeRef::new_unchecked("http://example.com/g"), ))?; writer.write_quad(QuadRef::new( @@ -1047,11 +1241,14 @@ mod tests { ))?; writer.write_quad(QuadRef::new( BlankNodeRef::new_unchecked("b"), - NamedNodeRef::new_unchecked("http://example.com/p2"), + NamedNodeRef::new_unchecked("http://example.org/p2"), LiteralRef::new_typed_literal("false", xsd::BOOLEAN), NamedNodeRef::new_unchecked("http://example.com/g2"), ))?; - assert_eq!(String::from_utf8(writer.finish()?).unwrap(), " {\n\t , \"foo\" ;\n\t\t \"foo\"@en .\n\t_:b _:b2 .\n}\n_:b true .\n {\n\t_:b false .\n}\n"); + assert_eq!( + String::from_utf8(writer.finish()?).unwrap(), + "@prefix ex: .\nex:g {\n\tex:s ex:p ex:o\\. , , \"foo\" ;\n\t\tex:p2 \"foo\"@en .\n\t_:b ex:p2 _:b2 .\n}\n_:b ex:p2 true .\nex:g2 {\n\t_:b false .\n}\n" + ); Ok(()) } } diff --git a/lib/oxttl/src/turtle.rs b/lib/oxttl/src/turtle.rs index f5193059..0cc9fd77 100644 --- a/lib/oxttl/src/turtle.rs +++ b/lib/oxttl/src/turtle.rs @@ -583,14 +583,16 @@ impl<'a> Iterator for TurtlePrefixesIter<'a> { /// use oxrdf::{NamedNodeRef, TripleRef}; /// use oxttl::TurtleSerializer; /// -/// let mut writer = TurtleSerializer::new().serialize_to_write(Vec::new()); +/// let mut writer = TurtleSerializer::new() +/// .with_prefix("schema", "http://schema.org/")? +/// .serialize_to_write(Vec::new()); /// writer.write_triple(TripleRef::new( /// NamedNodeRef::new("http://example.com#me")?, /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, /// NamedNodeRef::new("http://schema.org/Person")?, /// ))?; /// assert_eq!( -/// b" .\n", +/// b"@prefix schema: .\n a schema:Person .\n", /// writer.finish()?.as_slice() /// ); /// # Result::<_,Box>::Ok(()) @@ -608,20 +610,32 @@ impl TurtleSerializer { Self::default() } + #[inline] + pub fn with_prefix( + mut self, + prefix_name: impl Into, + prefix_iri: impl Into, + ) -> Result { + self.inner = self.inner.with_prefix(prefix_name, prefix_iri)?; + Ok(self) + } + /// Writes a Turtle file to a [`Write`] implementation. /// /// ``` /// use oxrdf::{NamedNodeRef, TripleRef}; /// use oxttl::TurtleSerializer; /// - /// let mut writer = TurtleSerializer::new().serialize_to_write(Vec::new()); + /// let mut writer = TurtleSerializer::new() + /// .with_prefix("schema", "http://schema.org/")? + /// .serialize_to_write(Vec::new()); /// writer.write_triple(TripleRef::new( /// NamedNodeRef::new("http://example.com#me")?, /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, /// NamedNodeRef::new("http://schema.org/Person")?, /// ))?; /// assert_eq!( - /// b" .\n", + /// b"@prefix schema: .\n a schema:Person .\n", /// writer.finish()?.as_slice() /// ); /// # Result::<_,Box>::Ok(()) @@ -639,15 +653,19 @@ impl TurtleSerializer { /// use oxttl::TurtleSerializer; /// /// # #[tokio::main(flavor = "current_thread")] - /// # async fn main() -> std::io::Result<()> { - /// let mut writer = TurtleSerializer::new().serialize_to_tokio_async_write(Vec::new()); - /// writer.write_triple(TripleRef::new( - /// NamedNodeRef::new_unchecked("http://example.com#me"), - /// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), - /// NamedNodeRef::new_unchecked("http://schema.org/Person"), - /// )).await?; + /// # async fn main() -> Result<(),Box> { + /// let mut writer = TurtleSerializer::new() + /// .with_prefix("schema", "http://schema.org/")? + /// .serialize_to_tokio_async_write(Vec::new()); + /// writer + /// .write_triple(TripleRef::new( + /// NamedNodeRef::new_unchecked("http://example.com#me"), + /// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), + /// NamedNodeRef::new_unchecked("http://schema.org/Person"), + /// )) + /// .await?; /// assert_eq!( - /// b" .\n", + /// b"@prefix schema: .\n a schema:Person .\n", /// writer.finish().await?.as_slice() /// ); /// # Ok(()) @@ -670,20 +688,25 @@ impl TurtleSerializer { /// use oxttl::TurtleSerializer; /// /// let mut buf = Vec::new(); - /// let mut writer = TurtleSerializer::new().serialize(); - /// writer.write_triple(TripleRef::new( - /// NamedNodeRef::new("http://example.com#me")?, - /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, - /// NamedNodeRef::new("http://schema.org/Person")?, - /// ), &mut buf)?; + /// let mut writer = TurtleSerializer::new() + /// .with_prefix("schema", "http://schema.org/")? + /// .serialize(); + /// writer.write_triple( + /// TripleRef::new( + /// NamedNodeRef::new("http://example.com#me")?, + /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, + /// NamedNodeRef::new("http://schema.org/Person")?, + /// ), + /// &mut buf, + /// )?; /// writer.finish(&mut buf)?; /// assert_eq!( - /// b" .\n", + /// b"@prefix schema: .\n a schema:Person .\n", /// buf.as_slice() /// ); /// # Result::<_,Box>::Ok(()) /// ``` - pub fn serialize(&self) -> LowLevelTurtleWriter { + pub fn serialize(self) -> LowLevelTurtleWriter { LowLevelTurtleWriter { inner: self.inner.serialize(), } @@ -696,14 +719,16 @@ impl TurtleSerializer { /// use oxrdf::{NamedNodeRef, TripleRef}; /// use oxttl::TurtleSerializer; /// -/// let mut writer = TurtleSerializer::new().serialize_to_write(Vec::new()); +/// let mut writer = TurtleSerializer::new() +/// .with_prefix("schema", "http://schema.org/")? +/// .serialize_to_write(Vec::new()); /// writer.write_triple(TripleRef::new( /// NamedNodeRef::new("http://example.com#me")?, /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, /// NamedNodeRef::new("http://schema.org/Person")?, /// ))?; /// assert_eq!( -/// b" .\n", +/// b"@prefix schema: .\n a schema:Person .\n", /// writer.finish()?.as_slice() /// ); /// # Result::<_,Box>::Ok(()) @@ -733,15 +758,19 @@ impl ToWriteTurtleWriter { /// use oxttl::TurtleSerializer; /// /// # #[tokio::main(flavor = "current_thread")] -/// # async fn main() -> std::io::Result<()> { -/// let mut writer = TurtleSerializer::new().serialize_to_tokio_async_write(Vec::new()); -/// writer.write_triple(TripleRef::new( -/// NamedNodeRef::new_unchecked("http://example.com#me"), -/// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), -/// NamedNodeRef::new_unchecked("http://schema.org/Person") -/// )).await?; +/// # async fn main() -> Result<(), Box> { +/// let mut writer = TurtleSerializer::new() +/// .with_prefix("schema", "http://schema.org/")? +/// .serialize_to_tokio_async_write(Vec::new()); +/// writer +/// .write_triple(TripleRef::new( +/// NamedNodeRef::new_unchecked("http://example.com#me"), +/// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), +/// NamedNodeRef::new_unchecked("http://schema.org/Person"), +/// )) +/// .await?; /// assert_eq!( -/// b" .\n", +/// b"@prefix schema: .\n a schema:Person .\n", /// writer.finish().await?.as_slice() /// ); /// # Ok(()) @@ -775,15 +804,20 @@ impl ToTokioAsyncWriteTurtleWriter { /// use oxttl::TurtleSerializer; /// /// let mut buf = Vec::new(); -/// let mut writer = TurtleSerializer::new().serialize(); -/// writer.write_triple(TripleRef::new( -/// NamedNodeRef::new("http://example.com#me")?, -/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, -/// NamedNodeRef::new("http://schema.org/Person")?, -/// ), &mut buf)?; +/// let mut writer = TurtleSerializer::new() +/// .with_prefix("schema", "http://schema.org/")? +/// .serialize(); +/// writer.write_triple( +/// TripleRef::new( +/// NamedNodeRef::new("http://example.com#me")?, +/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, +/// NamedNodeRef::new("http://schema.org/Person")?, +/// ), +/// &mut buf, +/// )?; /// writer.finish(&mut buf)?; /// assert_eq!( -/// b" .\n", +/// b"@prefix schema: .\n a schema:Person .\n", /// buf.as_slice() /// ); /// # Result::<_,Box>::Ok(()) diff --git a/lib/sparesults/Cargo.toml b/lib/sparesults/Cargo.toml index 0ef5eb23..1bfdf4ab 100644 --- a/lib/sparesults/Cargo.toml +++ b/lib/sparesults/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "sparesults" -version = "0.2.0-alpha.1" +version = "0.2.0-alpha.2-dev" authors = ["Tpt "] license = "MIT OR Apache-2.0" readme = "README.md" @@ -21,7 +21,7 @@ async-tokio = ["dep:tokio", "quick-xml/async-tokio", "json-event-parser/async-to [dependencies] json-event-parser = "0.2.0-alpha.2" memchr = "2.5" -oxrdf = { version = "0.2.0-alpha.1", path = "../oxrdf" } +oxrdf = { version = "0.2.0-alpha.2-dev", path = "../oxrdf" } quick-xml = ">=0.29, <0.32" tokio = { version = "1.29", optional = true, features = ["io-util"] } diff --git a/lib/sparesults/src/xml.rs b/lib/sparesults/src/xml.rs index c0450fac..fb038d2d 100644 --- a/lib/sparesults/src/xml.rs +++ b/lib/sparesults/src/xml.rs @@ -665,10 +665,9 @@ fn decode<'a, T>( fn map_xml_error(error: quick_xml::Error) -> io::Error { match error { - quick_xml::Error::Io(error) => match Arc::try_unwrap(error) { - Ok(error) => error, - Err(error) => io::Error::new(error.kind(), error), - }, + quick_xml::Error::Io(error) => { + Arc::try_unwrap(error).unwrap_or_else(|error| io::Error::new(error.kind(), error)) + } quick_xml::Error::UnexpectedEof(_) => io::Error::new(io::ErrorKind::UnexpectedEof, error), _ => io::Error::new(io::ErrorKind::InvalidData, error), } diff --git a/lib/spargebra/Cargo.toml b/lib/spargebra/Cargo.toml index 620bb041..09d42f88 100644 --- a/lib/spargebra/Cargo.toml +++ b/lib/spargebra/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spargebra" -version = "0.3.0-alpha.1" +version = "0.3.0-alpha.2-dev" authors = ["Tpt "] license = "MIT OR Apache-2.0" readme = "README.md" @@ -24,7 +24,7 @@ peg = "0.8" rand = "0.8" oxiri = "0.2.3-alpha.1" oxilangtag = "0.1" -oxrdf = { version = "0.2.0-alpha.1", path = "../oxrdf" } +oxrdf = { version = "0.2.0-alpha.2-dev", path = "../oxrdf" } [lints] workspace = true diff --git a/lib/sparopt/Cargo.toml b/lib/sparopt/Cargo.toml index 27698646..bdf55805 100644 --- a/lib/sparopt/Cargo.toml +++ b/lib/sparopt/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "sparopt" -version = "0.1.0-alpha.1" +version = "0.1.0-alpha.2-dev" authors = ["Tpt "] license = "MIT OR Apache-2.0" readme = "README.md" @@ -20,9 +20,9 @@ sep-0002 = ["spargebra/sep-0002"] sep-0006 = ["spargebra/sep-0006"] [dependencies] -oxrdf = { version = "0.2.0-alpha.1", path = "../oxrdf" } +oxrdf = { version = "0.2.0-alpha.2-dev", path = "../oxrdf" } rand = "0.8" -spargebra = { version = "0.3.0-alpha.1", path = "../spargebra" } +spargebra = { version = "0.3.0-alpha.2-dev", path = "../spargebra" } [lints] workspace = true