//! A [N-Triples](https://www.w3.org/TR/n-triples/) streaming parser implemented by [`NTriplesParser`] //! and a serializer implemented by [`NTriplesSerializer`]. use crate::line_formats::NQuadsRecognizer; #[cfg(feature = "async-tokio")] use crate::toolkit::FromTokioAsyncReadIterator; use crate::toolkit::{FromReadIterator, Parser, TurtleParseError, TurtleSyntaxError}; use oxrdf::{Triple, TripleRef}; use std::io::{self, Read, Write}; #[cfg(feature = "async-tokio")] use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt}; /// A [N-Triples](https://www.w3.org/TR/n-triples/) streaming parser. /// /// Support for [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star) is available behind the `rdf-star` feature and the [`NTriplesParser::with_quoted_triples`] option. /// /// Count the number of people: /// ``` /// use oxrdf::{NamedNodeRef, vocab::rdf}; /// use oxttl::NTriplesParser; /// /// let file = br#" . /// "Foo" . /// . /// "Bar" ."#; /// /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; /// let mut count = 0; /// for triple in NTriplesParser::new().parse_read(file.as_ref()) { /// let triple = triple?; /// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { /// count += 1; /// } /// } /// assert_eq!(2, count); /// # Result::<_,Box>::Ok(()) /// ``` #[derive(Default)] #[must_use] pub struct NTriplesParser { unchecked: bool, #[cfg(feature = "rdf-star")] with_quoted_triples: bool, } impl NTriplesParser { /// Builds a new [`NTriplesParser`]. #[inline] pub fn new() -> Self { Self::default() } /// Assumes the file is valid to make parsing faster. /// /// It will skip some validations. /// /// Note that if the file is actually not valid, then broken RDF might be emitted by the parser. /// #[inline] pub fn unchecked(mut self) -> Self { self.unchecked = true; self } /// Enables [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star). #[cfg(feature = "rdf-star")] #[inline] pub fn with_quoted_triples(mut self) -> Self { self.with_quoted_triples = true; self } /// Parses a N-Triples file from a [`Read`] implementation. /// /// Count the number of people: /// ``` /// use oxrdf::{NamedNodeRef, vocab::rdf}; /// use oxttl::NTriplesParser; /// /// let file = br#" . /// "Foo" . /// . /// "Bar" ."#; /// /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; /// let mut count = 0; /// for triple in NTriplesParser::new().parse_read(file.as_ref()) { /// let triple = triple?; /// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { /// count += 1; /// } /// } /// assert_eq!(2, count); /// # Result::<_,Box>::Ok(()) /// ``` pub fn parse_read(self, read: R) -> FromReadNTriplesReader { FromReadNTriplesReader { inner: self.parse().parser.parse_read(read), } } /// Parses a N-Triples file from a [`AsyncRead`] implementation. /// /// Count the number of people: /// ``` /// use oxrdf::{NamedNodeRef, vocab::rdf}; /// use oxttl::NTriplesParser; /// /// # #[tokio::main(flavor = "current_thread")] /// # async fn main() -> Result<(), oxttl::TurtleParseError> { /// let file = br#" . /// "Foo" . /// . /// "Bar" ."#; /// /// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person"); /// let mut count = 0; /// let mut parser = NTriplesParser::new().parse_tokio_async_read(file.as_ref()); /// while let Some(triple) = parser.next().await { /// let triple = triple?; /// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { /// count += 1; /// } /// } /// assert_eq!(2, count); /// # Ok(()) /// # } /// ``` #[cfg(feature = "async-tokio")] pub fn parse_tokio_async_read( self, read: R, ) -> FromTokioAsyncReadNTriplesReader { FromTokioAsyncReadNTriplesReader { inner: self.parse().parser.parse_tokio_async_read(read), } } /// Allows to parse a N-Triples file by using a low-level API. /// /// Count the number of people: /// ``` /// use oxrdf::{NamedNodeRef, vocab::rdf}; /// use oxttl::NTriplesParser; /// /// let file: [&[u8]; 4] = [ /// b" .\n", /// b" \"Foo\" .\n", /// b" .\n", /// b" \"Bar\" .\n" /// ]; /// /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; /// let mut count = 0; /// let mut parser = NTriplesParser::new().parse(); /// let mut file_chunks = file.iter(); /// while !parser.is_end() { /// // We feed more data to the parser /// if let Some(chunk) = file_chunks.next() { /// parser.extend_from_slice(chunk); /// } else { /// parser.end(); // It's finished /// } /// // We read as many triples from the parser as possible /// while let Some(triple) = parser.read_next() { /// let triple = triple?; /// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { /// count += 1; /// } /// } /// } /// assert_eq!(2, count); /// # Result::<_,Box>::Ok(()) /// ``` #[allow(clippy::unused_self)] pub fn parse(self) -> LowLevelNTriplesReader { LowLevelNTriplesReader { parser: NQuadsRecognizer::new_parser( false, #[cfg(feature = "rdf-star")] self.with_quoted_triples, self.unchecked, ), } } } /// Parses a N-Triples file from a [`Read`] implementation. Can be built using [`NTriplesParser::parse_read`]. /// /// Count the number of people: /// ``` /// use oxrdf::{NamedNodeRef, vocab::rdf}; /// use oxttl::NTriplesParser; /// /// let file = br#" . /// "Foo" . /// . /// "Bar" ."#; /// /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; /// let mut count = 0; /// for triple in NTriplesParser::new().parse_read(file.as_ref()) { /// let triple = triple?; /// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { /// count += 1; /// } /// } /// assert_eq!(2, count); /// # Result::<_,Box>::Ok(()) /// ``` #[must_use] pub struct FromReadNTriplesReader { inner: FromReadIterator, } impl Iterator for FromReadNTriplesReader { type Item = Result; fn next(&mut self) -> Option { Some(self.inner.next()?.map(Into::into)) } } /// Parses a N-Triples file from a [`AsyncRead`] implementation. Can be built using [`NTriplesParser::parse_tokio_async_read`]. /// /// Count the number of people: /// ``` /// use oxrdf::{NamedNodeRef, vocab::rdf}; /// use oxttl::NTriplesParser; /// /// # #[tokio::main(flavor = "current_thread")] /// # async fn main() -> Result<(), oxttl::TurtleParseError> { /// let file = br#" . /// "Foo" . /// . /// "Bar" ."#; /// /// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person"); /// let mut count = 0; /// let mut parser = NTriplesParser::new().parse_tokio_async_read(file.as_ref()); /// while let Some(triple) = parser.next().await { /// let triple = triple?; /// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { /// count += 1; /// } /// } /// assert_eq!(2, count); /// # Ok(()) /// # } /// ``` #[cfg(feature = "async-tokio")] #[must_use] pub struct FromTokioAsyncReadNTriplesReader { inner: FromTokioAsyncReadIterator, } #[cfg(feature = "async-tokio")] impl FromTokioAsyncReadNTriplesReader { /// Reads the next triple or returns `None` if the file is finished. pub async fn next(&mut self) -> Option> { Some(self.inner.next().await?.map(Into::into)) } } /// Parses a N-Triples file by using a low-level API. Can be built using [`NTriplesParser::parse`]. /// /// Count the number of people: /// ``` /// use oxrdf::{NamedNodeRef, vocab::rdf}; /// use oxttl::NTriplesParser; /// /// let file: [&[u8]; 4] = [ /// b" .\n", /// b" \"Foo\" .\n", /// b" .\n", /// b" \"Bar\" .\n" /// ]; /// /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; /// let mut count = 0; /// let mut parser = NTriplesParser::new().parse(); /// let mut file_chunks = file.iter(); /// while !parser.is_end() { /// // We feed more data to the parser /// if let Some(chunk) = file_chunks.next() { /// parser.extend_from_slice(chunk); /// } else { /// parser.end(); // It's finished /// } /// // We read as many triples from the parser as possible /// while let Some(triple) = parser.read_next() { /// let triple = triple?; /// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { /// count += 1; /// } /// } /// } /// assert_eq!(2, count); /// # Result::<_,Box>::Ok(()) /// ``` pub struct LowLevelNTriplesReader { parser: Parser, } impl LowLevelNTriplesReader { /// Adds some extra bytes to the parser. Should be called when [`read_next`](Self::read_next) returns [`None`] and there is still unread data. pub fn extend_from_slice(&mut self, other: &[u8]) { self.parser.extend_from_slice(other) } /// Tell the parser that the file is finished. /// /// This triggers the parsing of the final bytes and might lead [`read_next`](Self::read_next) to return some extra values. pub fn end(&mut self) { self.parser.end() } /// Returns if the parsing is finished i.e. [`end`](Self::end) has been called and [`read_next`](Self::read_next) is always going to return `None`. pub fn is_end(&self) -> bool { self.parser.is_end() } /// Attempt to parse a new triple from the already provided data. /// /// Returns [`None`] if the parsing is finished or more data is required. /// If it is the case more data should be fed using [`extend_from_slice`](Self::extend_from_slice). pub fn read_next(&mut self) -> Option> { Some(self.parser.read_next()?.map(Into::into)) } } /// A [canonical](https://www.w3.org/TR/n-triples/#canonical-ntriples) [N-Triples](https://www.w3.org/TR/n-triples/) serializer. /// /// Support for [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star) is available behind the `rdf-star` feature. /// /// ``` /// use oxrdf::{NamedNodeRef, TripleRef}; /// use oxttl::NTriplesSerializer; /// /// let mut writer = NTriplesSerializer::new().serialize_to_write(Vec::new()); /// writer.write_triple(TripleRef::new( /// NamedNodeRef::new("http://example.com#me")?, /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, /// NamedNodeRef::new("http://schema.org/Person")?, /// ))?; /// assert_eq!( /// b" .\n", /// writer.finish().as_slice() /// ); /// # Result::<_,Box>::Ok(()) /// ``` #[derive(Default)] #[must_use] pub struct NTriplesSerializer; impl NTriplesSerializer { /// Builds a new [`NTriplesSerializer`]. #[inline] pub fn new() -> Self { Self } /// Writes a N-Triples file to a [`Write`] implementation. /// /// ``` /// use oxrdf::{NamedNodeRef, TripleRef}; /// use oxttl::NTriplesSerializer; /// /// let mut writer = NTriplesSerializer::new().serialize_to_write(Vec::new()); /// writer.write_triple(TripleRef::new( /// NamedNodeRef::new("http://example.com#me")?, /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, /// NamedNodeRef::new("http://schema.org/Person")?, /// ))?; /// assert_eq!( /// b" .\n", /// writer.finish().as_slice() /// ); /// # Result::<_,Box>::Ok(()) /// ``` pub fn serialize_to_write(self, write: W) -> ToWriteNTriplesWriter { ToWriteNTriplesWriter { write, writer: self.serialize(), } } /// Writes a N-Triples file to a [`AsyncWrite`] implementation. /// /// ``` /// use oxrdf::{NamedNodeRef, TripleRef}; /// use oxttl::NTriplesSerializer; /// /// # #[tokio::main(flavor = "current_thread")] /// # async fn main() -> std::io::Result<()> { /// let mut writer = NTriplesSerializer::new().serialize_to_tokio_async_write(Vec::new()); /// writer.write_triple(TripleRef::new( /// NamedNodeRef::new_unchecked("http://example.com#me"), /// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), /// NamedNodeRef::new_unchecked("http://schema.org/Person"), /// )).await?; /// assert_eq!( /// b" .\n", /// writer.finish().as_slice() /// ); /// # Ok(()) /// # } /// ``` #[cfg(feature = "async-tokio")] pub fn serialize_to_tokio_async_write( self, write: W, ) -> ToTokioAsyncWriteNTriplesWriter { ToTokioAsyncWriteNTriplesWriter { write, writer: self.serialize(), buffer: Vec::new(), } } /// Builds a low-level N-Triples writer. /// /// ``` /// use oxrdf::{NamedNodeRef, TripleRef}; /// use oxttl::NTriplesSerializer; /// /// let mut buf = Vec::new(); /// let mut writer = NTriplesSerializer::new().serialize(); /// writer.write_triple(TripleRef::new( /// NamedNodeRef::new("http://example.com#me")?, /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, /// NamedNodeRef::new("http://schema.org/Person")?, /// ), &mut buf)?; /// assert_eq!( /// b" .\n", /// buf.as_slice() /// ); /// # Result::<_,Box>::Ok(()) /// ``` #[allow(clippy::unused_self)] pub fn serialize(self) -> LowLevelNTriplesWriter { LowLevelNTriplesWriter } } /// Writes a N-Triples file to a [`Write`] implementation. Can be built using [`NTriplesSerializer::serialize_to_write`]. /// /// ``` /// use oxrdf::{NamedNodeRef, TripleRef}; /// use oxttl::NTriplesSerializer; /// /// let mut writer = NTriplesSerializer::new().serialize_to_write(Vec::new()); /// writer.write_triple(TripleRef::new( /// NamedNodeRef::new("http://example.com#me")?, /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, /// NamedNodeRef::new("http://schema.org/Person")?, /// ))?; /// assert_eq!( /// b" .\n", /// writer.finish().as_slice() /// ); /// # Result::<_,Box>::Ok(()) /// ``` #[must_use] pub struct ToWriteNTriplesWriter { write: W, writer: LowLevelNTriplesWriter, } impl ToWriteNTriplesWriter { /// Writes an extra triple. pub fn write_triple<'a>(&mut self, t: impl Into>) -> io::Result<()> { self.writer.write_triple(t, &mut self.write) } /// Ends the write process and returns the underlying [`Write`]. pub fn finish(self) -> W { self.write } } /// Writes a N-Triples file to a [`AsyncWrite`] implementation. Can be built using [`NTriplesSerializer::serialize_to_tokio_async_write`]. /// /// ``` /// use oxrdf::{NamedNodeRef, TripleRef}; /// use oxttl::NTriplesSerializer; /// /// # #[tokio::main(flavor = "current_thread")] /// # async fn main() -> std::io::Result<()> { /// let mut writer = NTriplesSerializer::new().serialize_to_tokio_async_write(Vec::new()); /// writer.write_triple(TripleRef::new( /// NamedNodeRef::new_unchecked("http://example.com#me"), /// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), /// NamedNodeRef::new_unchecked("http://schema.org/Person") /// )).await?; /// assert_eq!( /// b" .\n", /// writer.finish().as_slice() /// ); /// # Ok(()) /// # } /// ``` #[cfg(feature = "async-tokio")] #[must_use] pub struct ToTokioAsyncWriteNTriplesWriter { write: W, writer: LowLevelNTriplesWriter, buffer: Vec, } #[cfg(feature = "async-tokio")] impl ToTokioAsyncWriteNTriplesWriter { /// Writes an extra triple. pub async fn write_triple<'a>(&mut self, t: impl Into>) -> io::Result<()> { self.writer.write_triple(t, &mut self.buffer)?; self.write.write_all(&self.buffer).await?; self.buffer.clear(); Ok(()) } /// Ends the write process and returns the underlying [`Write`]. pub fn finish(self) -> W { self.write } } /// Writes a N-Triples file by using a low-level API. Can be built using [`NTriplesSerializer::serialize`]. /// /// ``` /// use oxrdf::{NamedNodeRef, TripleRef}; /// use oxttl::NTriplesSerializer; /// /// let mut buf = Vec::new(); /// let mut writer = NTriplesSerializer::new().serialize(); /// writer.write_triple(TripleRef::new( /// NamedNodeRef::new("http://example.com#me")?, /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, /// NamedNodeRef::new("http://schema.org/Person")?, /// ), &mut buf)?; /// assert_eq!( /// b" .\n", /// buf.as_slice() /// ); /// # Result::<_,Box>::Ok(()) /// ``` pub struct LowLevelNTriplesWriter; impl LowLevelNTriplesWriter { /// Writes an extra triple. #[allow(clippy::unused_self)] pub fn write_triple<'a>( &mut self, t: impl Into>, mut write: impl Write, ) -> io::Result<()> { writeln!(write, "{} .", t.into()) } } #[cfg(test)] mod tests { use super::*; use oxrdf::{Literal, NamedNode}; #[test] fn unchecked_parsing() { let triples = NTriplesParser::new() .unchecked() .parse_read(r#" "baz"@toolonglangtag ."#.as_bytes()) .collect::, _>>() .unwrap(); assert_eq!( triples, [Triple::new( NamedNode::new_unchecked("foo"), NamedNode::new_unchecked("bar"), Literal::new_language_tagged_literal_unchecked("baz", "toolonglangtag"), )] ) } }