//! Utilities to read RDF graphs and datasets. pub use crate::error::{ParseError, SyntaxError}; use crate::format::RdfFormat; use oxrdf::{BlankNode, GraphName, IriParseError, Quad, Subject, Term, Triple}; #[cfg(feature = "async-tokio")] use oxrdfxml::FromTokioAsyncReadRdfXmlReader; use oxrdfxml::{FromReadRdfXmlReader, RdfXmlParser}; #[cfg(feature = "async-tokio")] use oxttl::n3::FromTokioAsyncReadN3Reader; use oxttl::n3::{FromReadN3Reader, N3Parser, N3Quad, N3Term}; #[cfg(feature = "async-tokio")] use oxttl::nquads::FromTokioAsyncReadNQuadsReader; use oxttl::nquads::{FromReadNQuadsReader, NQuadsParser}; #[cfg(feature = "async-tokio")] use oxttl::ntriples::FromTokioAsyncReadNTriplesReader; use oxttl::ntriples::{FromReadNTriplesReader, NTriplesParser}; #[cfg(feature = "async-tokio")] use oxttl::trig::FromTokioAsyncReadTriGReader; use oxttl::trig::{FromReadTriGReader, TriGParser}; #[cfg(feature = "async-tokio")] use oxttl::turtle::FromTokioAsyncReadTurtleReader; use oxttl::turtle::{FromReadTurtleReader, TurtleParser}; use std::collections::HashMap; use std::io::Read; #[cfg(feature = "async-tokio")] use tokio::io::AsyncRead; /// Parsers for RDF serialization formats. /// /// It currently supports the following formats: /// * [N3](https://w3c.github.io/N3/spec/) ([`RdfFormat::N3`]) /// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`RdfFormat::NQuads`]) /// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`RdfFormat::NTriples`]) /// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`RdfFormat::RdfXml`]) /// * [TriG](https://www.w3.org/TR/trig/) ([`RdfFormat::TriG`]) /// * [Turtle](https://www.w3.org/TR/turtle/) ([`RdfFormat::Turtle`]) /// /// Note the useful options: /// - [`with_base_iri`](RdfParser::with_base_iri) to resolve the relative IRIs. /// - [`rename_blank_nodes`](RdfParser::rename_blank_nodes) to rename the blank nodes to auto-generated numbers to avoid conflicts when merging RDF graphs together. /// - [`without_named_graphs`](RdfParser::without_named_graphs) to parse a single graph. /// /// ``` /// use oxrdfio::{RdfFormat, RdfParser}; /// /// let file = " ."; /// /// let parser = RdfParser::from_format(RdfFormat::NTriples); /// let quads = parser.parse_read(file.as_bytes()).collect::,_>>()?; /// /// assert_eq!(quads.len(), 1); /// assert_eq!(quads[0].subject.to_string(), ""); /// # std::io::Result::Ok(()) /// ``` pub struct RdfParser { inner: RdfParserKind, default_graph: GraphName, without_named_graphs: bool, rename_blank_nodes: bool, } enum RdfParserKind { N3(N3Parser), NQuads(NQuadsParser), NTriples(NTriplesParser), RdfXml(RdfXmlParser), TriG(TriGParser), Turtle(TurtleParser), } impl RdfParser { /// Builds a parser for the given format. #[inline] #[must_use] pub fn from_format(format: RdfFormat) -> Self { Self { inner: match format { RdfFormat::N3 => RdfParserKind::N3(N3Parser::new()), RdfFormat::NQuads => RdfParserKind::NQuads({ #[cfg(feature = "rdf-star")] { NQuadsParser::new().with_quoted_triples() } #[cfg(not(feature = "rdf-star"))] { NQuadsParser::new() } }), RdfFormat::NTriples => RdfParserKind::NTriples({ #[cfg(feature = "rdf-star")] { NTriplesParser::new().with_quoted_triples() } #[cfg(not(feature = "rdf-star"))] { NTriplesParser::new() } }), RdfFormat::RdfXml => RdfParserKind::RdfXml(RdfXmlParser::new()), RdfFormat::TriG => RdfParserKind::TriG({ #[cfg(feature = "rdf-star")] { TriGParser::new().with_quoted_triples() } #[cfg(not(feature = "rdf-star"))] { TriGParser::new() } }), RdfFormat::Turtle => RdfParserKind::Turtle({ #[cfg(feature = "rdf-star")] { TurtleParser::new().with_quoted_triples() } #[cfg(not(feature = "rdf-star"))] { TurtleParser::new() } }), }, default_graph: GraphName::DefaultGraph, without_named_graphs: false, rename_blank_nodes: false, } } /// Provides an IRI that could be used to resolve the file relative IRIs. /// /// ``` /// use oxrdfio::{RdfFormat, RdfParser}; /// /// let file = "

."; /// /// let parser = RdfParser::from_format(RdfFormat::Turtle).with_base_iri("http://example.com")?; /// let quads = parser.parse_read(file.as_bytes()).collect::,_>>()?; /// /// assert_eq!(quads.len(), 1); /// assert_eq!(quads[0].subject.to_string(), ""); /// # Result::<_,Box>::Ok(()) /// ``` #[inline] pub fn with_base_iri(self, base_iri: impl Into) -> Result { Ok(Self { inner: match self.inner { RdfParserKind::N3(p) => RdfParserKind::N3(p), RdfParserKind::NTriples(p) => RdfParserKind::NTriples(p), RdfParserKind::NQuads(p) => RdfParserKind::NQuads(p), RdfParserKind::RdfXml(p) => RdfParserKind::RdfXml(p.with_base_iri(base_iri)?), RdfParserKind::TriG(p) => RdfParserKind::TriG(p.with_base_iri(base_iri)?), RdfParserKind::Turtle(p) => RdfParserKind::Turtle(p.with_base_iri(base_iri)?), }, default_graph: self.default_graph, without_named_graphs: self.without_named_graphs, rename_blank_nodes: self.rename_blank_nodes, }) } /// Provides the name graph name that should replace the default graph in the returned quads. /// /// ``` /// use oxrdf::NamedNode; /// use oxrdfio::{RdfFormat, RdfParser}; /// /// let file = " ."; /// /// let parser = RdfParser::from_format(RdfFormat::Turtle).with_default_graph(NamedNode::new("http://example.com/g")?); /// let quads = parser.parse_read(file.as_bytes()).collect::,_>>()?; /// /// assert_eq!(quads.len(), 1); /// assert_eq!(quads[0].graph_name.to_string(), ""); /// # Result::<_,Box>::Ok(()) /// ``` #[inline] #[must_use] pub fn with_default_graph(self, default_graph: impl Into) -> Self { Self { inner: self.inner, default_graph: default_graph.into(), without_named_graphs: self.without_named_graphs, rename_blank_nodes: self.rename_blank_nodes, } } /// Sets that the parser must fail if parsing a named graph. /// /// This function restricts the parser to only parse a single [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) and not an [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset). /// /// ``` /// use oxrdfio::{RdfFormat, RdfParser}; /// /// let file = " ."; /// /// let parser = RdfParser::from_format(RdfFormat::NQuads).without_named_graphs(); /// assert!(parser.parse_read(file.as_bytes()).next().unwrap().is_err()); /// ``` #[inline] #[must_use] pub fn without_named_graphs(self) -> Self { Self { inner: self.inner, default_graph: self.default_graph, without_named_graphs: true, rename_blank_nodes: self.rename_blank_nodes, } } /// Renames the blank nodes ids from the ones set in the serialization to random ids. /// /// This allows to avoid id conflicts when merging graphs together. /// /// ``` /// use oxrdfio::{RdfFormat, RdfParser}; /// /// let file = "_:a ."; /// /// let result1 = RdfParser::from_format(RdfFormat::NQuads) /// .rename_blank_nodes() /// .parse_read(file.as_bytes()).collect::,_>>()?; /// let result2 = RdfParser::from_format(RdfFormat::NQuads) /// .rename_blank_nodes() /// .parse_read(file.as_bytes()).collect::,_>>()?; /// assert_ne!(result1, result2); /// # Result::<_,Box>::Ok(()) /// ``` #[inline] #[must_use] pub fn rename_blank_nodes(self) -> Self { Self { inner: self.inner, default_graph: self.default_graph, without_named_graphs: self.without_named_graphs, rename_blank_nodes: true, } } /// Parses from a [`Read`] implementation and returns an iterator of quads. /// /// Reads are buffered. /// /// ``` /// use oxrdfio::{RdfFormat, RdfParser}; /// /// let file = " ."; /// /// let parser = RdfParser::from_format(RdfFormat::NTriples); /// let quads = parser.parse_read(file.as_bytes()).collect::,_>>()?; /// /// assert_eq!(quads.len(), 1); /// assert_eq!(quads[0].subject.to_string(), ""); /// # std::io::Result::Ok(()) /// ``` pub fn parse_read(self, reader: R) -> FromReadQuadReader { FromReadQuadReader { parser: match self.inner { RdfParserKind::N3(p) => FromReadQuadReaderKind::N3(p.parse_read(reader)), RdfParserKind::NQuads(p) => FromReadQuadReaderKind::NQuads(p.parse_read(reader)), RdfParserKind::NTriples(p) => { FromReadQuadReaderKind::NTriples(p.parse_read(reader)) } RdfParserKind::RdfXml(p) => FromReadQuadReaderKind::RdfXml(p.parse_read(reader)), RdfParserKind::TriG(p) => FromReadQuadReaderKind::TriG(p.parse_read(reader)), RdfParserKind::Turtle(p) => FromReadQuadReaderKind::Turtle(p.parse_read(reader)), }, mapper: QuadMapper { default_graph: self.default_graph.clone(), without_named_graphs: self.without_named_graphs, blank_node_map: self.rename_blank_nodes.then(HashMap::new), }, } } /// Parses from a Tokio [`AsyncRead`] implementation and returns an async iterator of quads. /// /// Reads are buffered. /// /// ``` /// use oxrdfio::{RdfFormat, RdfParser, ParseError}; /// /// #[tokio::main(flavor = "current_thread")] /// async fn main() -> Result<(), ParseError> { /// let file = " ."; /// /// let parser = RdfParser::from_format(RdfFormat::NTriples); /// let mut reader = parser.parse_tokio_async_read(file.as_bytes()); /// if let Some(quad) = reader.next().await { /// assert_eq!(quad?.subject.to_string(), ""); /// } /// Ok(()) /// } /// ``` #[cfg(feature = "async-tokio")] pub fn parse_tokio_async_read( self, reader: R, ) -> FromTokioAsyncReadQuadReader { FromTokioAsyncReadQuadReader { parser: match self.inner { RdfParserKind::N3(p) => { FromTokioAsyncReadQuadReaderKind::N3(p.parse_tokio_async_read(reader)) } RdfParserKind::NQuads(p) => { FromTokioAsyncReadQuadReaderKind::NQuads(p.parse_tokio_async_read(reader)) } RdfParserKind::NTriples(p) => { FromTokioAsyncReadQuadReaderKind::NTriples(p.parse_tokio_async_read(reader)) } RdfParserKind::RdfXml(p) => { FromTokioAsyncReadQuadReaderKind::RdfXml(p.parse_tokio_async_read(reader)) } RdfParserKind::TriG(p) => { FromTokioAsyncReadQuadReaderKind::TriG(p.parse_tokio_async_read(reader)) } RdfParserKind::Turtle(p) => { FromTokioAsyncReadQuadReaderKind::Turtle(p.parse_tokio_async_read(reader)) } }, mapper: QuadMapper { default_graph: self.default_graph.clone(), without_named_graphs: self.without_named_graphs, blank_node_map: self.rename_blank_nodes.then(HashMap::new), }, } } } /// Parses a RDF file from a [`Read`] implementation. Can be built using [`RdfParser::parse_read`]. /// /// Reads are buffered. /// /// ``` /// use oxrdfio::{RdfFormat, RdfParser}; /// /// let file = " ."; /// /// let parser = RdfParser::from_format(RdfFormat::NTriples); /// let quads = parser.parse_read(file.as_bytes()).collect::,_>>()?; /// /// assert_eq!(quads.len(), 1); /// assert_eq!(quads[0].subject.to_string(), ""); /// # std::io::Result::Ok(()) /// ``` #[must_use] pub struct FromReadQuadReader { parser: FromReadQuadReaderKind, mapper: QuadMapper, } enum FromReadQuadReaderKind { N3(FromReadN3Reader), NQuads(FromReadNQuadsReader), NTriples(FromReadNTriplesReader), RdfXml(FromReadRdfXmlReader), TriG(FromReadTriGReader), Turtle(FromReadTurtleReader), } impl Iterator for FromReadQuadReader { type Item = Result; fn next(&mut self) -> Option> { Some(match &mut self.parser { FromReadQuadReaderKind::N3(parser) => match parser.next()? { Ok(quad) => self.mapper.map_n3_quad(quad), Err(e) => Err(e.into()), }, FromReadQuadReaderKind::NQuads(parser) => match parser.next()? { Ok(quad) => self.mapper.map_quad(quad), Err(e) => Err(e.into()), }, FromReadQuadReaderKind::NTriples(parser) => match parser.next()? { Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), Err(e) => Err(e.into()), }, FromReadQuadReaderKind::RdfXml(parser) => match parser.next()? { Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), Err(e) => Err(e.into()), }, FromReadQuadReaderKind::TriG(parser) => match parser.next()? { Ok(quad) => self.mapper.map_quad(quad), Err(e) => Err(e.into()), }, FromReadQuadReaderKind::Turtle(parser) => match parser.next()? { Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), Err(e) => Err(e.into()), }, }) } } /// Parses a RDF file from a Tokio [`AsyncRead`] implementation. Can be built using [`RdfParser::parse_tokio_async_read`]. /// /// Reads are buffered. /// /// ``` /// use oxrdfio::{RdfFormat, RdfParser, ParseError}; /// /// #[tokio::main(flavor = "current_thread")] /// async fn main() -> Result<(), ParseError> { /// let file = " ."; /// /// let parser = RdfParser::from_format(RdfFormat::NTriples); /// let mut reader = parser.parse_tokio_async_read(file.as_bytes()); /// if let Some(quad) = reader.next().await { /// assert_eq!(quad?.subject.to_string(), ""); /// } /// Ok(()) /// } /// ``` #[must_use] #[cfg(feature = "async-tokio")] pub struct FromTokioAsyncReadQuadReader { parser: FromTokioAsyncReadQuadReaderKind, mapper: QuadMapper, } #[cfg(feature = "async-tokio")] enum FromTokioAsyncReadQuadReaderKind { N3(FromTokioAsyncReadN3Reader), NQuads(FromTokioAsyncReadNQuadsReader), NTriples(FromTokioAsyncReadNTriplesReader), RdfXml(FromTokioAsyncReadRdfXmlReader), TriG(FromTokioAsyncReadTriGReader), Turtle(FromTokioAsyncReadTurtleReader), } #[cfg(feature = "async-tokio")] impl FromTokioAsyncReadQuadReader { pub async fn next(&mut self) -> Option> { Some(match &mut self.parser { FromTokioAsyncReadQuadReaderKind::N3(parser) => match parser.next().await? { Ok(quad) => self.mapper.map_n3_quad(quad), Err(e) => Err(e.into()), }, FromTokioAsyncReadQuadReaderKind::NQuads(parser) => match parser.next().await? { Ok(quad) => self.mapper.map_quad(quad), Err(e) => Err(e.into()), }, FromTokioAsyncReadQuadReaderKind::NTriples(parser) => match parser.next().await? { Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), Err(e) => Err(e.into()), }, FromTokioAsyncReadQuadReaderKind::RdfXml(parser) => match parser.next().await? { Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), Err(e) => Err(e.into()), }, FromTokioAsyncReadQuadReaderKind::TriG(parser) => match parser.next().await? { Ok(quad) => self.mapper.map_quad(quad), Err(e) => Err(e.into()), }, FromTokioAsyncReadQuadReaderKind::Turtle(parser) => match parser.next().await? { Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), Err(e) => Err(e.into()), }, }) } } struct QuadMapper { default_graph: GraphName, without_named_graphs: bool, blank_node_map: Option>, } impl QuadMapper { fn map_blank_node(&mut self, node: BlankNode) -> BlankNode { if let Some(blank_node_map) = &mut self.blank_node_map { blank_node_map .entry(node) .or_insert_with(BlankNode::default) .clone() } else { node } } fn map_subject(&mut self, node: Subject) -> Subject { match node { Subject::NamedNode(node) => node.into(), Subject::BlankNode(node) => self.map_blank_node(node).into(), #[cfg(feature = "rdf-star")] Subject::Triple(triple) => self.map_triple(*triple).into(), } } fn map_term(&mut self, node: Term) -> Term { match node { Term::NamedNode(node) => node.into(), Term::BlankNode(node) => self.map_blank_node(node).into(), Term::Literal(literal) => literal.into(), #[cfg(feature = "rdf-star")] Term::Triple(triple) => self.map_triple(*triple).into(), } } fn map_triple(&mut self, triple: Triple) -> Triple { Triple { subject: self.map_subject(triple.subject), predicate: triple.predicate, object: self.map_term(triple.object), } } fn map_graph_name(&mut self, graph_name: GraphName) -> Result { match graph_name { GraphName::NamedNode(node) => { if self.without_named_graphs { Err(ParseError::msg("Named graphs are not allowed")) } else { Ok(node.into()) } } GraphName::BlankNode(node) => { if self.without_named_graphs { Err(ParseError::msg("Named graphs are not allowed")) } else { Ok(self.map_blank_node(node).into()) } } GraphName::DefaultGraph => Ok(self.default_graph.clone()), } } fn map_quad(&mut self, quad: Quad) -> Result { Ok(Quad { subject: self.map_subject(quad.subject), predicate: quad.predicate, object: self.map_term(quad.object), graph_name: self.map_graph_name(quad.graph_name)?, }) } fn map_triple_to_quad(&mut self, triple: Triple) -> Quad { self.map_triple(triple).in_graph(self.default_graph.clone()) } fn map_n3_quad(&mut self, quad: N3Quad) -> Result { Ok(Quad { subject: match quad.subject { N3Term::NamedNode(s) => Ok(s.into()), N3Term::BlankNode(s) => Ok(self.map_blank_node(s).into()), N3Term::Literal(_) => Err(ParseError::msg( "literals are not allowed in regular RDF subjects", )), #[cfg(feature = "rdf-star")] N3Term::Triple(s) => Ok(self.map_triple(*s).into()), N3Term::Variable(_) => Err(ParseError::msg( "variables are not allowed in regular RDF subjects", )), }?, predicate: match quad.predicate { N3Term::NamedNode(p) => Ok(p), N3Term::BlankNode(_) => Err(ParseError::msg( "blank nodes are not allowed in regular RDF predicates", )), N3Term::Literal(_) => Err(ParseError::msg( "literals are not allowed in regular RDF predicates", )), #[cfg(feature = "rdf-star")] N3Term::Triple(_) => Err(ParseError::msg( "quoted triples are not allowed in regular RDF predicates", )), N3Term::Variable(_) => Err(ParseError::msg( "variables are not allowed in regular RDF predicates", )), }?, object: match quad.object { N3Term::NamedNode(o) => Ok(o.into()), N3Term::BlankNode(o) => Ok(self.map_blank_node(o).into()), N3Term::Literal(o) => Ok(o.into()), #[cfg(feature = "rdf-star")] N3Term::Triple(o) => Ok(self.map_triple(*o).into()), N3Term::Variable(_) => Err(ParseError::msg( "variables are not allowed in regular RDF objects", )), }?, graph_name: self.map_graph_name(quad.graph_name)?, }) } }