//! A [TriG](https://www.w3.org/TR/trig/) streaming parser implemented by [`TriGParser`]. use crate::terse::TriGRecognizer; use crate::toolkit::{FromReadIterator, ParseError, ParseOrIoError, Parser}; use oxiri::{Iri, IriParseError}; use oxrdf::{vocab::xsd, GraphName, NamedNode, Quad, QuadRef, Subject, TermRef}; use std::collections::HashMap; use std::fmt; use std::io::{self, Read, Write}; /// A [TriG](https://www.w3.org/TR/trig/) streaming parser. /// /// Support for [TriG-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#trig-star) is available behind the `rdf-star` feature and the [`TriGParser::with_quoted_triples`] option. /// /// Count the number of people: /// ``` /// use oxrdf::{NamedNodeRef, vocab::rdf}; /// use oxttl::TriGParser; /// /// let file = b"@base . /// @prefix schema: . /// a schema:Person ; /// schema:name \"Foo\" . /// a schema:Person ; /// schema:name \"Bar\" ."; /// /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; /// let mut count = 0; /// for quad in TriGParser::new().parse_from_read(file.as_ref()) { /// let quad = quad?; /// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() { /// count += 1; /// } /// } /// assert_eq!(2, count); /// # Result::<_,Box>::Ok(()) /// ``` #[derive(Default)] pub struct TriGParser { base: Option>, prefixes: HashMap>, #[cfg(feature = "rdf-star")] with_quoted_triples: bool, } impl TriGParser { /// Builds a new [`TriGParser`]. #[inline] pub fn new() -> Self { Self::default() } #[inline] pub fn with_base_iri(mut self, base_iri: impl Into) -> Result { self.base = Some(Iri::parse(base_iri.into())?); Ok(self) } #[inline] pub fn with_prefix( mut self, prefix_name: impl Into, prefix_iri: impl Into, ) -> Result { self.prefixes .insert(prefix_name.into(), Iri::parse(prefix_iri.into())?); Ok(self) } /// Enables [TriG-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#trig-star). #[cfg(feature = "rdf-star")] #[inline] #[must_use] pub fn with_quoted_triples(mut self) -> Self { self.with_quoted_triples = true; self } /// Parses a TriG file from a [`Read`] implementation. /// /// Count the number of people: /// ``` /// use oxrdf::{NamedNodeRef, vocab::rdf}; /// use oxttl::TriGParser; /// /// let file = b"@base . /// @prefix schema: . /// a schema:Person ; /// schema:name \"Foo\" . /// a schema:Person ; /// schema:name \"Bar\" ."; /// /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; /// let mut count = 0; /// for quad in TriGParser::new().parse_from_read(file.as_ref()) { /// let quad = quad?; /// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() { /// count += 1; /// } /// } /// assert_eq!(2, count); /// # Result::<_,Box>::Ok(()) /// ``` pub fn parse_from_read(&self, read: R) -> FromReadTriGReader { FromReadTriGReader { inner: self.parse().parser.parse_from_read(read), } } /// Allows to parse a TriG file by using a low-level API. /// /// Count the number of people: /// ``` /// use oxrdf::{NamedNodeRef, vocab::rdf}; /// use oxttl::TriGParser; /// /// let file: [&[u8]; 5] = [b"@base ", /// b". @prefix schema: .", /// b" a schema:Person", /// b" ; schema:name \"Foo\" . ", /// b" a schema:Person ; schema:name \"Bar\" ." /// ]; /// /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; /// let mut count = 0; /// let mut parser = TriGParser::new().parse(); /// let mut file_chunks = file.iter(); /// while !parser.is_end() { /// // We feed more data to the parser /// if let Some(chunk) = file_chunks.next() { /// parser.extend_from_slice(chunk); /// } else { /// parser.end(); // It's finished /// } /// // We read as many quads from the parser as possible /// while let Some(quad) = parser.read_next() { /// let quad = quad?; /// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() { /// count += 1; /// } /// } /// } /// assert_eq!(2, count); /// # Result::<_,Box>::Ok(()) /// ``` pub fn parse(&self) -> LowLevelTriGReader { LowLevelTriGReader { parser: TriGRecognizer::new_parser( true, #[cfg(feature = "rdf-star")] self.with_quoted_triples, self.base.clone(), self.prefixes.clone(), ), } } } /// Parses a TriG file from a [`Read`] implementation. Can be built using [`TriGParser::parse_from_read`]. /// /// Count the number of people: /// ``` /// use oxrdf::{NamedNodeRef, vocab::rdf}; /// use oxttl::TriGParser; /// /// let file = b"@base . /// @prefix schema: . /// a schema:Person ; /// schema:name \"Foo\" . /// a schema:Person ; /// schema:name \"Bar\" ."; /// /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; /// let mut count = 0; /// for quad in TriGParser::new().parse_from_read(file.as_ref()) { /// let quad = quad?; /// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() { /// count += 1; /// } /// } /// assert_eq!(2, count); /// # Result::<_,Box>::Ok(()) /// ``` pub struct FromReadTriGReader { inner: FromReadIterator, } impl Iterator for FromReadTriGReader { type Item = Result; fn next(&mut self) -> Option> { self.inner.next() } } /// Parses a TriG file by using a low-level API. Can be built using [`TriGParser::parse`]. /// /// Count the number of people: /// ``` /// use oxrdf::{NamedNodeRef, vocab::rdf}; /// use oxttl::TriGParser; /// /// let file: [&[u8]; 5] = [b"@base ", /// b". @prefix schema: .", /// b" a schema:Person", /// b" ; schema:name \"Foo\" . ", /// b" a schema:Person ; schema:name \"Bar\" ." /// ]; /// /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?; /// let mut count = 0; /// let mut parser = TriGParser::new().parse(); /// let mut file_chunks = file.iter(); /// while !parser.is_end() { /// // We feed more data to the parser /// if let Some(chunk) = file_chunks.next() { /// parser.extend_from_slice(chunk); /// } else { /// parser.end(); // It's finished /// } /// // We read as many quads from the parser as possible /// while let Some(quad) = parser.read_next() { /// let quad = quad?; /// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() { /// count += 1; /// } /// } /// } /// assert_eq!(2, count); /// # Result::<_,Box>::Ok(()) /// ``` pub struct LowLevelTriGReader { parser: Parser, } impl LowLevelTriGReader { /// Adds some extra bytes to the parser. Should be called when [`read_next`](Self::read_next) returns [`None`] and there is still unread data. pub fn extend_from_slice(&mut self, other: &[u8]) { self.parser.extend_from_slice(other) } /// Tell the parser that the file is finished. /// /// This triggers the parsing of the final bytes and might lead [`read_next`](Self::read_next) to return some extra values. pub fn end(&mut self) { self.parser.end() } /// Returns if the parsing is finished i.e. [`end`](Self::end) has been called and [`read_next`](Self::read_next) is always going to return `None`. pub fn is_end(&self) -> bool { self.parser.is_end() } /// Attempt to parse a new quad from the already provided data. /// /// Returns [`None`] if the parsing is finished or more data is required. /// If it is the case more data should be fed using [`extend_from_slice`](Self::extend_from_slice). pub fn read_next(&mut self) -> Option> { self.parser.read_next() } } /// A [TriG](https://www.w3.org/TR/trig/) serializer. /// /// Support for [TriG-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#trig-star) is available behind the `rdf-star` feature. /// /// ``` /// use oxrdf::{NamedNodeRef, QuadRef}; /// use oxttl::TriGSerializer; /// /// let mut writer = TriGSerializer::new().serialize_to_write(Vec::new()); /// writer.write_quad(QuadRef::new( /// NamedNodeRef::new("http://example.com#me")?, /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, /// NamedNodeRef::new("http://schema.org/Person")?, /// NamedNodeRef::new("http://example.com")?, /// ))?; /// assert_eq!( /// b" {\n\t .\n}\n", /// writer.finish()?.as_slice() /// ); /// # Result::<_,Box>::Ok(()) /// ``` #[derive(Default)] pub struct TriGSerializer; impl TriGSerializer { /// Builds a new [`TriGSerializer`]. #[inline] pub fn new() -> Self { Self } /// Writes a TriG file to a [`Write`] implementation. /// /// ``` /// use oxrdf::{NamedNodeRef, QuadRef}; /// use oxttl::TriGSerializer; /// /// let mut writer = TriGSerializer::new().serialize_to_write(Vec::new()); /// writer.write_quad(QuadRef::new( /// NamedNodeRef::new("http://example.com#me")?, /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, /// NamedNodeRef::new("http://schema.org/Person")?, /// NamedNodeRef::new("http://example.com")?, /// ))?; /// assert_eq!( /// b" {\n\t .\n}\n", /// writer.finish()?.as_slice() /// ); /// # Result::<_,Box>::Ok(()) /// ``` pub fn serialize_to_write(&self, write: W) -> ToWriteTriGWriter { ToWriteTriGWriter { write, writer: self.serialize(), } } /// Builds a low-level TriG writer. /// /// ``` /// use oxrdf::{NamedNodeRef, QuadRef}; /// use oxttl::TriGSerializer; /// /// let mut buf = Vec::new(); /// let mut writer = TriGSerializer::new().serialize(); /// writer.write_quad(QuadRef::new( /// NamedNodeRef::new("http://example.com#me")?, /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, /// NamedNodeRef::new("http://schema.org/Person")?, /// NamedNodeRef::new("http://example.com")?, /// ), &mut buf)?; /// writer.finish(&mut buf)?; /// assert_eq!( /// b" {\n\t .\n}\n", /// buf.as_slice() /// ); /// # Result::<_,Box>::Ok(()) /// ``` #[allow(clippy::unused_self)] pub fn serialize(&self) -> LowLevelTriGWriter { LowLevelTriGWriter { current_graph_name: GraphName::DefaultGraph, current_subject_predicate: None, } } } /// Writes a TriG file to a [`Write`] implementation. Can be built using [`TriGSerializer::serialize_to_write`]. /// /// ``` /// use oxrdf::{NamedNodeRef, QuadRef}; /// use oxttl::TriGSerializer; /// /// let mut writer = TriGSerializer::new().serialize_to_write(Vec::new()); /// writer.write_quad(QuadRef::new( /// NamedNodeRef::new("http://example.com#me")?, /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, /// NamedNodeRef::new("http://schema.org/Person")?, /// NamedNodeRef::new("http://example.com")?, /// ))?; /// assert_eq!( /// b" {\n\t .\n}\n", /// writer.finish()?.as_slice() /// ); /// # Result::<_,Box>::Ok(()) /// ``` pub struct ToWriteTriGWriter { write: W, writer: LowLevelTriGWriter, } impl ToWriteTriGWriter { /// Writes an extra quad. pub fn write_quad<'a>(&mut self, q: impl Into>) -> io::Result<()> { self.writer.write_quad(q, &mut self.write) } /// Ends the write process and returns the underlying [`Write`]. pub fn finish(mut self) -> io::Result { self.writer.finish(&mut self.write)?; Ok(self.write) } } /// Writes a TriG file by using a low-level API. Can be built using [`TriGSerializer::serialize`]. /// /// ``` /// use oxrdf::{NamedNodeRef, QuadRef}; /// use oxttl::TriGSerializer; /// /// let mut buf = Vec::new(); /// let mut writer = TriGSerializer::new().serialize(); /// writer.write_quad(QuadRef::new( /// NamedNodeRef::new("http://example.com#me")?, /// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?, /// NamedNodeRef::new("http://schema.org/Person")?, /// NamedNodeRef::new("http://example.com")?, /// ), &mut buf)?; /// writer.finish(&mut buf)?; /// assert_eq!( /// b" {\n\t .\n}\n", /// buf.as_slice() /// ); /// # Result::<_,Box>::Ok(()) /// ``` pub struct LowLevelTriGWriter { current_graph_name: GraphName, current_subject_predicate: Option<(Subject, NamedNode)>, } impl LowLevelTriGWriter { /// Writes an extra quad. pub fn write_quad<'a>( &mut self, q: impl Into>, mut write: impl Write, ) -> io::Result<()> { let q = q.into(); if q.graph_name == self.current_graph_name.as_ref() { if let Some((current_subject, current_predicate)) = self.current_subject_predicate.take() { if q.subject == current_subject.as_ref() { if q.predicate == current_predicate { self.current_subject_predicate = Some((current_subject, current_predicate)); write!(write, " , {}", TurtleTerm(q.object)) } else { self.current_subject_predicate = Some((current_subject, q.predicate.into_owned())); writeln!(write, " ;")?; if !self.current_graph_name.is_default_graph() { write!(write, "\t")?; } write!(write, "\t{} {}", q.predicate, TurtleTerm(q.object)) } } else { self.current_subject_predicate = Some((q.subject.into_owned(), q.predicate.into_owned())); writeln!(write, " .")?; if !self.current_graph_name.is_default_graph() { write!(write, "\t")?; } write!( write, "{} {} {}", TurtleTerm(q.subject.into()), q.predicate, TurtleTerm(q.object) ) } } else { self.current_subject_predicate = Some((q.subject.into_owned(), q.predicate.into_owned())); if !self.current_graph_name.is_default_graph() { write!(write, "\t")?; } write!( write, "{} {} {}", TurtleTerm(q.subject.into()), q.predicate, TurtleTerm(q.object) ) } } else { if self.current_subject_predicate.is_some() { writeln!(write, " .")?; } if !self.current_graph_name.is_default_graph() { writeln!(write, "}}")?; } self.current_graph_name = q.graph_name.into_owned(); self.current_subject_predicate = Some((q.subject.into_owned(), q.predicate.into_owned())); if !self.current_graph_name.is_default_graph() { writeln!(write, "{} {{", q.graph_name)?; write!(write, "\t")?; } write!( write, "{} {} {}", TurtleTerm(q.subject.into()), q.predicate, TurtleTerm(q.object) ) } } /// Finishes to write the file. pub fn finish(&mut self, mut write: impl Write) -> io::Result<()> { if self.current_subject_predicate.is_some() { writeln!(write, " .")?; } if !self.current_graph_name.is_default_graph() { writeln!(write, "}}")?; } Ok(()) } } struct TurtleTerm<'a>(TermRef<'a>); impl<'a> fmt::Display for TurtleTerm<'a> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self.0 { TermRef::NamedNode(v) => write!(f, "{v}"), TermRef::BlankNode(v) => write!(f, "{v}"), TermRef::Literal(v) => { let value = v.value(); let inline = match v.datatype() { xsd::BOOLEAN => is_turtle_boolean(value), xsd::INTEGER => is_turtle_integer(value), xsd::DECIMAL => is_turtle_decimal(value), xsd::DOUBLE => is_turtle_double(value), _ => false, }; if inline { write!(f, "{value}") } else { write!(f, "{v}") } } #[cfg(feature = "rdf-star")] TermRef::Triple(t) => { write!( f, "<< {} {} {} >>", TurtleTerm(t.subject.as_ref().into()), t.predicate, TurtleTerm(t.object.as_ref()) ) } } } } fn is_turtle_boolean(value: &str) -> bool { matches!(value, "true" | "false") } fn is_turtle_integer(value: &str) -> bool { // [19] INTEGER ::= [+-]? [0-9]+ let mut value = value.as_bytes(); if let Some(v) = value.strip_prefix(b"+") { value = v; } else if let Some(v) = value.strip_prefix(b"-") { value = v; } !value.is_empty() && value.iter().all(u8::is_ascii_digit) } fn is_turtle_decimal(value: &str) -> bool { // [20] DECIMAL ::= [+-]? [0-9]* '.' [0-9]+ let mut value = value.as_bytes(); if let Some(v) = value.strip_prefix(b"+") { value = v; } else if let Some(v) = value.strip_prefix(b"-") { value = v; } while value.first().map_or(false, u8::is_ascii_digit) { value = &value[1..]; } let Some(value) = value.strip_prefix(b".") else { return false; }; !value.is_empty() && value.iter().all(u8::is_ascii_digit) } fn is_turtle_double(value: &str) -> bool { // [21] DOUBLE ::= [+-]? ([0-9]+ '.' [0-9]* EXPONENT | '.' [0-9]+ EXPONENT | [0-9]+ EXPONENT) // [154s] EXPONENT ::= [eE] [+-]? [0-9]+ let mut value = value.as_bytes(); if let Some(v) = value.strip_prefix(b"+") { value = v; } else if let Some(v) = value.strip_prefix(b"-") { value = v; } let mut with_before = false; while value.first().map_or(false, u8::is_ascii_digit) { value = &value[1..]; with_before = true; } let mut with_after = false; if let Some(v) = value.strip_prefix(b".") { value = v; while value.first().map_or(false, u8::is_ascii_digit) { value = &value[1..]; with_after = true; } } if let Some(v) = value.strip_prefix(b"e") { value = v; } else if let Some(v) = value.strip_prefix(b"E") { value = v; } else { return false; } if let Some(v) = value.strip_prefix(b"+") { value = v; } else if let Some(v) = value.strip_prefix(b"-") { value = v; } (with_before || with_after) && !value.is_empty() && value.iter().all(u8::is_ascii_digit) } #[cfg(test)] mod tests { use super::*; use oxrdf::vocab::xsd; use oxrdf::{BlankNodeRef, GraphNameRef, LiteralRef, NamedNodeRef}; #[test] fn test_write() -> io::Result<()> { let mut writer = TriGSerializer::new().serialize_to_write(Vec::new()); writer.write_quad(QuadRef::new( NamedNodeRef::new_unchecked("http://example.com/s"), NamedNodeRef::new_unchecked("http://example.com/p"), NamedNodeRef::new_unchecked("http://example.com/o"), NamedNodeRef::new_unchecked("http://example.com/g"), ))?; writer.write_quad(QuadRef::new( NamedNodeRef::new_unchecked("http://example.com/s"), NamedNodeRef::new_unchecked("http://example.com/p"), LiteralRef::new_simple_literal("foo"), NamedNodeRef::new_unchecked("http://example.com/g"), ))?; writer.write_quad(QuadRef::new( NamedNodeRef::new_unchecked("http://example.com/s"), NamedNodeRef::new_unchecked("http://example.com/p2"), LiteralRef::new_language_tagged_literal_unchecked("foo", "en"), NamedNodeRef::new_unchecked("http://example.com/g"), ))?; writer.write_quad(QuadRef::new( BlankNodeRef::new_unchecked("b"), NamedNodeRef::new_unchecked("http://example.com/p2"), BlankNodeRef::new_unchecked("b2"), NamedNodeRef::new_unchecked("http://example.com/g"), ))?; writer.write_quad(QuadRef::new( BlankNodeRef::new_unchecked("b"), NamedNodeRef::new_unchecked("http://example.com/p2"), LiteralRef::new_typed_literal("true", xsd::BOOLEAN), GraphNameRef::DefaultGraph, ))?; writer.write_quad(QuadRef::new( BlankNodeRef::new_unchecked("b"), NamedNodeRef::new_unchecked("http://example.com/p2"), LiteralRef::new_typed_literal("false", xsd::BOOLEAN), NamedNodeRef::new_unchecked("http://example.com/g2"), ))?; assert_eq!(String::from_utf8(writer.finish()?).unwrap(), " {\n\t , \"foo\" ;\n\t\t \"foo\"@en .\n\t_:b _:b2 .\n}\n_:b true .\n {\n\t_:b false .\n}\n"); Ok(()) } }