Fork of https://github.com/oxigraph/oxigraph.git for the purpose of NextGraph project
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
807 lines
31 KiB
807 lines
31 KiB
//! Utilities to read RDF graphs and datasets.
|
|
|
|
pub use crate::error::RdfParseError;
|
|
use crate::format::RdfFormat;
|
|
use oxrdf::{BlankNode, GraphName, IriParseError, Quad, Subject, Term, Triple};
|
|
#[cfg(feature = "async-tokio")]
|
|
use oxrdfxml::FromTokioAsyncReadRdfXmlReader;
|
|
use oxrdfxml::{FromReadRdfXmlReader, RdfXmlParser};
|
|
#[cfg(feature = "async-tokio")]
|
|
use oxttl::n3::FromTokioAsyncReadN3Reader;
|
|
use oxttl::n3::{FromReadN3Reader, N3Parser, N3PrefixesIter, N3Quad, N3Term};
|
|
#[cfg(feature = "async-tokio")]
|
|
use oxttl::nquads::FromTokioAsyncReadNQuadsReader;
|
|
use oxttl::nquads::{FromReadNQuadsReader, NQuadsParser};
|
|
#[cfg(feature = "async-tokio")]
|
|
use oxttl::ntriples::FromTokioAsyncReadNTriplesReader;
|
|
use oxttl::ntriples::{FromReadNTriplesReader, NTriplesParser};
|
|
#[cfg(feature = "async-tokio")]
|
|
use oxttl::trig::FromTokioAsyncReadTriGReader;
|
|
use oxttl::trig::{FromReadTriGReader, TriGParser, TriGPrefixesIter};
|
|
#[cfg(feature = "async-tokio")]
|
|
use oxttl::turtle::FromTokioAsyncReadTurtleReader;
|
|
use oxttl::turtle::{FromReadTurtleReader, TurtleParser, TurtlePrefixesIter};
|
|
use std::collections::HashMap;
|
|
use std::io::Read;
|
|
#[cfg(feature = "async-tokio")]
|
|
use tokio::io::AsyncRead;
|
|
|
|
/// Parsers for RDF serialization formats.
|
|
///
|
|
/// It currently supports the following formats:
|
|
/// * [N3](https://w3c.github.io/N3/spec/) ([`RdfFormat::N3`])
|
|
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`RdfFormat::NQuads`])
|
|
/// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`RdfFormat::NTriples`])
|
|
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`RdfFormat::RdfXml`])
|
|
/// * [TriG](https://www.w3.org/TR/trig/) ([`RdfFormat::TriG`])
|
|
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`RdfFormat::Turtle`])
|
|
///
|
|
/// Note the useful options:
|
|
/// - [`with_base_iri`](Self::with_base_iri) to resolve the relative IRIs.
|
|
/// - [`rename_blank_nodes`](Self::rename_blank_nodes) to rename the blank nodes to auto-generated numbers to avoid conflicts when merging RDF graphs together.
|
|
/// - [`without_named_graphs`](Self::without_named_graphs) to parse a single graph.
|
|
/// - [`unchecked`](Self::unchecked) to skip some validations if the file is already known to be valid.
|
|
///
|
|
/// ```
|
|
/// use oxrdfio::{RdfFormat, RdfParser};
|
|
///
|
|
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
|
///
|
|
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
|
|
/// let quads = parser
|
|
/// .parse_read(file.as_bytes())
|
|
/// .collect::<Result<Vec<_>, _>>()?;
|
|
///
|
|
/// assert_eq!(quads.len(), 1);
|
|
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
|
|
/// # std::io::Result::Ok(())
|
|
/// ```
|
|
#[must_use]
|
|
pub struct RdfParser {
|
|
inner: RdfParserKind,
|
|
default_graph: GraphName,
|
|
without_named_graphs: bool,
|
|
rename_blank_nodes: bool,
|
|
}
|
|
|
|
enum RdfParserKind {
|
|
N3(N3Parser),
|
|
NQuads(NQuadsParser),
|
|
NTriples(NTriplesParser),
|
|
RdfXml(RdfXmlParser),
|
|
TriG(TriGParser),
|
|
Turtle(TurtleParser),
|
|
}
|
|
|
|
impl RdfParser {
|
|
/// Builds a parser for the given format.
|
|
#[inline]
|
|
pub fn from_format(format: RdfFormat) -> Self {
|
|
Self {
|
|
inner: match format {
|
|
RdfFormat::N3 => RdfParserKind::N3(N3Parser::new()),
|
|
RdfFormat::NQuads => RdfParserKind::NQuads({
|
|
#[cfg(feature = "rdf-star")]
|
|
{
|
|
NQuadsParser::new().with_quoted_triples()
|
|
}
|
|
#[cfg(not(feature = "rdf-star"))]
|
|
{
|
|
NQuadsParser::new()
|
|
}
|
|
}),
|
|
RdfFormat::NTriples => RdfParserKind::NTriples({
|
|
#[cfg(feature = "rdf-star")]
|
|
{
|
|
NTriplesParser::new().with_quoted_triples()
|
|
}
|
|
#[cfg(not(feature = "rdf-star"))]
|
|
{
|
|
NTriplesParser::new()
|
|
}
|
|
}),
|
|
RdfFormat::RdfXml => RdfParserKind::RdfXml(RdfXmlParser::new()),
|
|
RdfFormat::TriG => RdfParserKind::TriG({
|
|
#[cfg(feature = "rdf-star")]
|
|
{
|
|
TriGParser::new().with_quoted_triples()
|
|
}
|
|
#[cfg(not(feature = "rdf-star"))]
|
|
{
|
|
TriGParser::new()
|
|
}
|
|
}),
|
|
RdfFormat::Turtle => RdfParserKind::Turtle({
|
|
#[cfg(feature = "rdf-star")]
|
|
{
|
|
TurtleParser::new().with_quoted_triples()
|
|
}
|
|
#[cfg(not(feature = "rdf-star"))]
|
|
{
|
|
TurtleParser::new()
|
|
}
|
|
}),
|
|
},
|
|
default_graph: GraphName::DefaultGraph,
|
|
without_named_graphs: false,
|
|
rename_blank_nodes: false,
|
|
}
|
|
}
|
|
|
|
/// The format the parser uses.
|
|
///
|
|
/// ```
|
|
/// use oxrdfio::{RdfFormat, RdfParser};
|
|
///
|
|
/// assert_eq!(
|
|
/// RdfParser::from_format(RdfFormat::Turtle).format(),
|
|
/// RdfFormat::Turtle
|
|
/// );
|
|
/// ```
|
|
pub fn format(&self) -> RdfFormat {
|
|
match &self.inner {
|
|
RdfParserKind::N3(_) => RdfFormat::N3,
|
|
RdfParserKind::NQuads(_) => RdfFormat::NQuads,
|
|
RdfParserKind::NTriples(_) => RdfFormat::NTriples,
|
|
RdfParserKind::RdfXml(_) => RdfFormat::RdfXml,
|
|
RdfParserKind::TriG(_) => RdfFormat::TriG,
|
|
RdfParserKind::Turtle(_) => RdfFormat::Turtle,
|
|
}
|
|
}
|
|
|
|
/// Provides an IRI that could be used to resolve the file relative IRIs.
|
|
///
|
|
/// ```
|
|
/// use oxrdfio::{RdfFormat, RdfParser};
|
|
///
|
|
/// let file = "</s> </p> </o> .";
|
|
///
|
|
/// let parser = RdfParser::from_format(RdfFormat::Turtle).with_base_iri("http://example.com")?;
|
|
/// let quads = parser
|
|
/// .parse_read(file.as_bytes())
|
|
/// .collect::<Result<Vec<_>, _>>()?;
|
|
///
|
|
/// assert_eq!(quads.len(), 1);
|
|
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
|
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
|
/// ```
|
|
#[inline]
|
|
pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
|
|
self.inner = match self.inner {
|
|
RdfParserKind::N3(p) => RdfParserKind::N3(p),
|
|
RdfParserKind::NTriples(p) => RdfParserKind::NTriples(p),
|
|
RdfParserKind::NQuads(p) => RdfParserKind::NQuads(p),
|
|
RdfParserKind::RdfXml(p) => RdfParserKind::RdfXml(p.with_base_iri(base_iri)?),
|
|
RdfParserKind::TriG(p) => RdfParserKind::TriG(p.with_base_iri(base_iri)?),
|
|
RdfParserKind::Turtle(p) => RdfParserKind::Turtle(p.with_base_iri(base_iri)?),
|
|
};
|
|
Ok(self)
|
|
}
|
|
|
|
/// Provides the name graph name that should replace the default graph in the returned quads.
|
|
///
|
|
/// ```
|
|
/// use oxrdf::NamedNode;
|
|
/// use oxrdfio::{RdfFormat, RdfParser};
|
|
///
|
|
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
|
///
|
|
/// let parser = RdfParser::from_format(RdfFormat::Turtle)
|
|
/// .with_default_graph(NamedNode::new("http://example.com/g")?);
|
|
/// let quads = parser
|
|
/// .parse_read(file.as_bytes())
|
|
/// .collect::<Result<Vec<_>, _>>()?;
|
|
///
|
|
/// assert_eq!(quads.len(), 1);
|
|
/// assert_eq!(quads[0].graph_name.to_string(), "<http://example.com/g>");
|
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
|
/// ```
|
|
#[inline]
|
|
pub fn with_default_graph(mut self, default_graph: impl Into<GraphName>) -> Self {
|
|
self.default_graph = default_graph.into();
|
|
self
|
|
}
|
|
|
|
/// Sets that the parser must fail if parsing a named graph.
|
|
///
|
|
/// This function restricts the parser to only parse a single [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) and not an [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset).
|
|
///
|
|
/// ```
|
|
/// use oxrdfio::{RdfFormat, RdfParser};
|
|
///
|
|
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .";
|
|
///
|
|
/// let parser = RdfParser::from_format(RdfFormat::NQuads).without_named_graphs();
|
|
/// assert!(parser.parse_read(file.as_bytes()).next().unwrap().is_err());
|
|
/// ```
|
|
#[inline]
|
|
pub fn without_named_graphs(mut self) -> Self {
|
|
self.without_named_graphs = true;
|
|
self
|
|
}
|
|
|
|
/// Renames the blank nodes ids from the ones set in the serialization to random ids.
|
|
///
|
|
/// This allows to avoid id conflicts when merging graphs together.
|
|
///
|
|
/// ```
|
|
/// use oxrdfio::{RdfFormat, RdfParser};
|
|
///
|
|
/// let file = "_:a <http://example.com/p> <http://example.com/o> .";
|
|
///
|
|
/// let result1 = RdfParser::from_format(RdfFormat::NQuads)
|
|
/// .rename_blank_nodes()
|
|
/// .parse_read(file.as_bytes())
|
|
/// .collect::<Result<Vec<_>, _>>()?;
|
|
/// let result2 = RdfParser::from_format(RdfFormat::NQuads)
|
|
/// .rename_blank_nodes()
|
|
/// .parse_read(file.as_bytes())
|
|
/// .collect::<Result<Vec<_>, _>>()?;
|
|
/// assert_ne!(result1, result2);
|
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
|
/// ```
|
|
#[inline]
|
|
pub fn rename_blank_nodes(mut self) -> Self {
|
|
self.rename_blank_nodes = true;
|
|
self
|
|
}
|
|
|
|
/// Assumes the file is valid to make parsing faster.
|
|
///
|
|
/// It will skip some validations.
|
|
///
|
|
/// Note that if the file is actually not valid, then broken RDF might be emitted by the parser.
|
|
#[inline]
|
|
pub fn unchecked(mut self) -> Self {
|
|
self.inner = match self.inner {
|
|
RdfParserKind::N3(p) => RdfParserKind::N3(p.unchecked()),
|
|
RdfParserKind::NTriples(p) => RdfParserKind::NTriples(p.unchecked()),
|
|
RdfParserKind::NQuads(p) => RdfParserKind::NQuads(p.unchecked()),
|
|
RdfParserKind::RdfXml(p) => RdfParserKind::RdfXml(p.unchecked()),
|
|
RdfParserKind::TriG(p) => RdfParserKind::TriG(p.unchecked()),
|
|
RdfParserKind::Turtle(p) => RdfParserKind::Turtle(p.unchecked()),
|
|
};
|
|
self
|
|
}
|
|
|
|
/// Parses from a [`Read`] implementation and returns an iterator of quads.
|
|
///
|
|
/// Reads are buffered.
|
|
///
|
|
/// ```
|
|
/// use oxrdfio::{RdfFormat, RdfParser};
|
|
///
|
|
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
|
///
|
|
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
|
|
/// let quads = parser
|
|
/// .parse_read(file.as_bytes())
|
|
/// .collect::<Result<Vec<_>, _>>()?;
|
|
///
|
|
/// assert_eq!(quads.len(), 1);
|
|
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
|
|
/// # std::io::Result::Ok(())
|
|
/// ```
|
|
pub fn parse_read<R: Read>(self, reader: R) -> FromReadQuadReader<R> {
|
|
FromReadQuadReader {
|
|
parser: match self.inner {
|
|
RdfParserKind::N3(p) => FromReadQuadReaderKind::N3(p.parse_read(reader)),
|
|
RdfParserKind::NQuads(p) => FromReadQuadReaderKind::NQuads(p.parse_read(reader)),
|
|
RdfParserKind::NTriples(p) => {
|
|
FromReadQuadReaderKind::NTriples(p.parse_read(reader))
|
|
}
|
|
RdfParserKind::RdfXml(p) => FromReadQuadReaderKind::RdfXml(p.parse_read(reader)),
|
|
RdfParserKind::TriG(p) => FromReadQuadReaderKind::TriG(p.parse_read(reader)),
|
|
RdfParserKind::Turtle(p) => FromReadQuadReaderKind::Turtle(p.parse_read(reader)),
|
|
},
|
|
mapper: QuadMapper {
|
|
default_graph: self.default_graph.clone(),
|
|
without_named_graphs: self.without_named_graphs,
|
|
blank_node_map: self.rename_blank_nodes.then(HashMap::new),
|
|
},
|
|
}
|
|
}
|
|
|
|
/// Parses from a Tokio [`AsyncRead`] implementation and returns an async iterator of quads.
|
|
///
|
|
/// Reads are buffered.
|
|
///
|
|
/// ```
|
|
/// use oxrdfio::{RdfFormat, RdfParser};
|
|
///
|
|
/// # #[tokio::main(flavor = "current_thread")]
|
|
/// # async fn main() -> Result<(), oxrdfio::RdfParseError> {
|
|
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
|
///
|
|
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
|
|
/// let mut reader = parser.parse_tokio_async_read(file.as_bytes());
|
|
/// if let Some(quad) = reader.next().await {
|
|
/// assert_eq!(quad?.subject.to_string(), "<http://example.com/s>");
|
|
/// }
|
|
/// # Ok(())
|
|
/// # }
|
|
/// ```
|
|
#[cfg(feature = "async-tokio")]
|
|
pub fn parse_tokio_async_read<R: AsyncRead + Unpin>(
|
|
self,
|
|
reader: R,
|
|
) -> FromTokioAsyncReadQuadReader<R> {
|
|
FromTokioAsyncReadQuadReader {
|
|
parser: match self.inner {
|
|
RdfParserKind::N3(p) => {
|
|
FromTokioAsyncReadQuadReaderKind::N3(p.parse_tokio_async_read(reader))
|
|
}
|
|
RdfParserKind::NQuads(p) => {
|
|
FromTokioAsyncReadQuadReaderKind::NQuads(p.parse_tokio_async_read(reader))
|
|
}
|
|
RdfParserKind::NTriples(p) => {
|
|
FromTokioAsyncReadQuadReaderKind::NTriples(p.parse_tokio_async_read(reader))
|
|
}
|
|
RdfParserKind::RdfXml(p) => {
|
|
FromTokioAsyncReadQuadReaderKind::RdfXml(p.parse_tokio_async_read(reader))
|
|
}
|
|
RdfParserKind::TriG(p) => {
|
|
FromTokioAsyncReadQuadReaderKind::TriG(p.parse_tokio_async_read(reader))
|
|
}
|
|
RdfParserKind::Turtle(p) => {
|
|
FromTokioAsyncReadQuadReaderKind::Turtle(p.parse_tokio_async_read(reader))
|
|
}
|
|
},
|
|
mapper: QuadMapper {
|
|
default_graph: self.default_graph.clone(),
|
|
without_named_graphs: self.without_named_graphs,
|
|
blank_node_map: self.rename_blank_nodes.then(HashMap::new),
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<RdfFormat> for RdfParser {
|
|
fn from(format: RdfFormat) -> Self {
|
|
Self::from_format(format)
|
|
}
|
|
}
|
|
|
|
/// Parses a RDF file from a [`Read`] implementation. Can be built using [`RdfParser::parse_read`].
|
|
///
|
|
/// Reads are buffered.
|
|
///
|
|
/// ```
|
|
/// use oxrdfio::{RdfFormat, RdfParser};
|
|
///
|
|
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
|
///
|
|
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
|
|
/// let quads = parser
|
|
/// .parse_read(file.as_bytes())
|
|
/// .collect::<Result<Vec<_>, _>>()?;
|
|
///
|
|
/// assert_eq!(quads.len(), 1);
|
|
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
|
|
/// # std::io::Result::Ok(())
|
|
/// ```
|
|
#[must_use]
|
|
pub struct FromReadQuadReader<R: Read> {
|
|
parser: FromReadQuadReaderKind<R>,
|
|
mapper: QuadMapper,
|
|
}
|
|
|
|
enum FromReadQuadReaderKind<R: Read> {
|
|
N3(FromReadN3Reader<R>),
|
|
NQuads(FromReadNQuadsReader<R>),
|
|
NTriples(FromReadNTriplesReader<R>),
|
|
RdfXml(FromReadRdfXmlReader<R>),
|
|
TriG(FromReadTriGReader<R>),
|
|
Turtle(FromReadTurtleReader<R>),
|
|
}
|
|
|
|
impl<R: Read> Iterator for FromReadQuadReader<R> {
|
|
type Item = Result<Quad, RdfParseError>;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
Some(match &mut self.parser {
|
|
FromReadQuadReaderKind::N3(parser) => match parser.next()? {
|
|
Ok(quad) => self.mapper.map_n3_quad(quad),
|
|
Err(e) => Err(e.into()),
|
|
},
|
|
FromReadQuadReaderKind::NQuads(parser) => match parser.next()? {
|
|
Ok(quad) => self.mapper.map_quad(quad),
|
|
Err(e) => Err(e.into()),
|
|
},
|
|
FromReadQuadReaderKind::NTriples(parser) => match parser.next()? {
|
|
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
|
|
Err(e) => Err(e.into()),
|
|
},
|
|
FromReadQuadReaderKind::RdfXml(parser) => match parser.next()? {
|
|
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
|
|
Err(e) => Err(e.into()),
|
|
},
|
|
FromReadQuadReaderKind::TriG(parser) => match parser.next()? {
|
|
Ok(quad) => self.mapper.map_quad(quad),
|
|
Err(e) => Err(e.into()),
|
|
},
|
|
FromReadQuadReaderKind::Turtle(parser) => match parser.next()? {
|
|
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
|
|
Err(e) => Err(e.into()),
|
|
},
|
|
})
|
|
}
|
|
}
|
|
|
|
impl<R: Read> FromReadQuadReader<R> {
|
|
/// The list of IRI prefixes considered at the current step of the parsing.
|
|
///
|
|
/// This method returns (prefix name, prefix value) tuples.
|
|
/// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
|
|
/// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
|
|
///
|
|
/// An empty iterator is return if the format does not support prefixes.
|
|
///
|
|
/// ```
|
|
/// use oxrdfio::{RdfFormat, RdfParser};
|
|
///
|
|
/// let file = br#"@base <http://example.com/> .
|
|
/// @prefix schema: <http://schema.org/> .
|
|
/// <foo> a schema:Person ;
|
|
/// schema:name "Foo" ."#;
|
|
///
|
|
/// let mut reader = RdfParser::from_format(RdfFormat::Turtle).parse_read(file.as_slice());
|
|
/// assert!(reader.prefixes().collect::<Vec<_>>().is_empty()); // No prefix at the beginning
|
|
///
|
|
/// reader.next().unwrap()?; // We read the first triple
|
|
/// assert_eq!(
|
|
/// reader.prefixes().collect::<Vec<_>>(),
|
|
/// [("schema", "http://schema.org/")]
|
|
/// ); // There are now prefixes
|
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
|
/// ```
|
|
pub fn prefixes(&self) -> PrefixesIter<'_> {
|
|
PrefixesIter {
|
|
inner: match &self.parser {
|
|
FromReadQuadReaderKind::N3(p) => PrefixesIterKind::N3(p.prefixes()),
|
|
FromReadQuadReaderKind::TriG(p) => PrefixesIterKind::TriG(p.prefixes()),
|
|
FromReadQuadReaderKind::Turtle(p) => PrefixesIterKind::Turtle(p.prefixes()),
|
|
FromReadQuadReaderKind::NQuads(_)
|
|
| FromReadQuadReaderKind::NTriples(_)
|
|
| FromReadQuadReaderKind::RdfXml(_) => PrefixesIterKind::None, /* TODO: implement for RDF/XML */
|
|
},
|
|
}
|
|
}
|
|
|
|
/// The base IRI considered at the current step of the parsing.
|
|
///
|
|
/// `None` is returned if no base IRI is set or the format does not support base IRIs.
|
|
///
|
|
/// ```
|
|
/// use oxrdfio::{RdfFormat, RdfParser};
|
|
///
|
|
/// let file = br#"@base <http://example.com/> .
|
|
/// @prefix schema: <http://schema.org/> .
|
|
/// <foo> a schema:Person ;
|
|
/// schema:name "Foo" ."#;
|
|
///
|
|
/// let mut reader = RdfParser::from_format(RdfFormat::Turtle).parse_read(file.as_slice());
|
|
/// assert!(reader.base_iri().is_none()); // No base at the beginning because none has been given to the parser.
|
|
///
|
|
/// reader.next().unwrap()?; // We read the first triple
|
|
/// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI.
|
|
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
|
/// ```
|
|
pub fn base_iri(&self) -> Option<&str> {
|
|
match &self.parser {
|
|
FromReadQuadReaderKind::N3(p) => p.base_iri(),
|
|
FromReadQuadReaderKind::TriG(p) => p.base_iri(),
|
|
FromReadQuadReaderKind::Turtle(p) => p.base_iri(),
|
|
FromReadQuadReaderKind::NQuads(_)
|
|
| FromReadQuadReaderKind::NTriples(_)
|
|
| FromReadQuadReaderKind::RdfXml(_) => None, // TODO: implement for RDF/XML
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Parses a RDF file from a Tokio [`AsyncRead`] implementation. Can be built using [`RdfParser::parse_tokio_async_read`].
|
|
///
|
|
/// Reads are buffered.
|
|
///
|
|
/// ```
|
|
/// use oxrdfio::{RdfFormat, RdfParser};
|
|
///
|
|
/// # #[tokio::main(flavor = "current_thread")]
|
|
/// # async fn main() -> Result<(), oxrdfio::RdfParseError> {
|
|
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
|
///
|
|
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
|
|
/// let mut reader = parser.parse_tokio_async_read(file.as_bytes());
|
|
/// if let Some(quad) = reader.next().await {
|
|
/// assert_eq!(quad?.subject.to_string(), "<http://example.com/s>");
|
|
/// }
|
|
/// # Ok(())
|
|
/// # }
|
|
/// ```
|
|
#[must_use]
|
|
#[cfg(feature = "async-tokio")]
|
|
pub struct FromTokioAsyncReadQuadReader<R: AsyncRead + Unpin> {
|
|
parser: FromTokioAsyncReadQuadReaderKind<R>,
|
|
mapper: QuadMapper,
|
|
}
|
|
|
|
#[cfg(feature = "async-tokio")]
|
|
enum FromTokioAsyncReadQuadReaderKind<R: AsyncRead + Unpin> {
|
|
N3(FromTokioAsyncReadN3Reader<R>),
|
|
NQuads(FromTokioAsyncReadNQuadsReader<R>),
|
|
NTriples(FromTokioAsyncReadNTriplesReader<R>),
|
|
RdfXml(FromTokioAsyncReadRdfXmlReader<R>),
|
|
TriG(FromTokioAsyncReadTriGReader<R>),
|
|
Turtle(FromTokioAsyncReadTurtleReader<R>),
|
|
}
|
|
|
|
#[cfg(feature = "async-tokio")]
|
|
impl<R: AsyncRead + Unpin> FromTokioAsyncReadQuadReader<R> {
|
|
pub async fn next(&mut self) -> Option<Result<Quad, RdfParseError>> {
|
|
Some(match &mut self.parser {
|
|
FromTokioAsyncReadQuadReaderKind::N3(parser) => match parser.next().await? {
|
|
Ok(quad) => self.mapper.map_n3_quad(quad),
|
|
Err(e) => Err(e.into()),
|
|
},
|
|
FromTokioAsyncReadQuadReaderKind::NQuads(parser) => match parser.next().await? {
|
|
Ok(quad) => self.mapper.map_quad(quad),
|
|
Err(e) => Err(e.into()),
|
|
},
|
|
FromTokioAsyncReadQuadReaderKind::NTriples(parser) => match parser.next().await? {
|
|
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
|
|
Err(e) => Err(e.into()),
|
|
},
|
|
FromTokioAsyncReadQuadReaderKind::RdfXml(parser) => match parser.next().await? {
|
|
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
|
|
Err(e) => Err(e.into()),
|
|
},
|
|
FromTokioAsyncReadQuadReaderKind::TriG(parser) => match parser.next().await? {
|
|
Ok(quad) => self.mapper.map_quad(quad),
|
|
Err(e) => Err(e.into()),
|
|
},
|
|
FromTokioAsyncReadQuadReaderKind::Turtle(parser) => match parser.next().await? {
|
|
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
|
|
Err(e) => Err(e.into()),
|
|
},
|
|
})
|
|
}
|
|
|
|
/// The list of IRI prefixes considered at the current step of the parsing.
|
|
///
|
|
/// This method returns (prefix name, prefix value) tuples.
|
|
/// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
|
|
/// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
|
|
///
|
|
/// An empty iterator is return if the format does not support prefixes.
|
|
///
|
|
/// ```
|
|
/// use oxrdfio::{RdfFormat, RdfParser};
|
|
///
|
|
/// # #[tokio::main(flavor = "current_thread")]
|
|
/// # async fn main() -> Result<(), oxttl::TurtleParseError> {
|
|
/// let file = br#"@base <http://example.com/> .
|
|
/// @prefix schema: <http://schema.org/> .
|
|
/// <foo> a schema:Person ;
|
|
/// schema:name "Foo" ."#;
|
|
///
|
|
/// let mut reader = RdfParser::from_format(RdfFormat::Turtle).parse_read(file.as_slice());
|
|
/// assert_eq!(reader.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning
|
|
///
|
|
/// reader.next().await.unwrap()?; // We read the first triple
|
|
/// assert_eq!(
|
|
/// reader.prefixes().collect::<Vec<_>>(),
|
|
/// [("schema", "http://schema.org/")]
|
|
/// ); // There are now prefixes
|
|
/// # Ok(())
|
|
/// # }
|
|
/// ```
|
|
pub fn prefixes(&self) -> PrefixesIter<'_> {
|
|
PrefixesIter {
|
|
inner: match &self.parser {
|
|
FromTokioAsyncReadQuadReaderKind::N3(p) => PrefixesIterKind::N3(p.prefixes()),
|
|
FromTokioAsyncReadQuadReaderKind::TriG(p) => PrefixesIterKind::TriG(p.prefixes()),
|
|
FromTokioAsyncReadQuadReaderKind::Turtle(p) => {
|
|
PrefixesIterKind::Turtle(p.prefixes())
|
|
}
|
|
FromTokioAsyncReadQuadReaderKind::NQuads(_)
|
|
| FromTokioAsyncReadQuadReaderKind::NTriples(_)
|
|
| FromTokioAsyncReadQuadReaderKind::RdfXml(_) => PrefixesIterKind::None, /* TODO: implement for RDF/XML */
|
|
},
|
|
}
|
|
}
|
|
|
|
/// The base IRI considered at the current step of the parsing.
|
|
///
|
|
/// `None` is returned if no base IRI is set or the format does not support base IRIs.
|
|
///
|
|
/// ```
|
|
/// use oxrdfio::{RdfFormat, RdfParser};
|
|
///
|
|
/// # #[tokio::main(flavor = "current_thread")]
|
|
/// # async fn main() -> Result<(), oxttl::TurtleParseError> {
|
|
/// let file = br#"@base <http://example.com/> .
|
|
/// @prefix schema: <http://schema.org/> .
|
|
/// <foo> a schema:Person ;
|
|
/// schema:name "Foo" ."#;
|
|
///
|
|
/// let mut reader =
|
|
/// RdfParser::from_format(RdfFormat::Turtle).parse_tokio_async_read(file.as_slice());
|
|
/// assert!(reader.base_iri().is_none()); // No base IRI at the beginning
|
|
///
|
|
/// reader.next().await.unwrap()?; // We read the first triple
|
|
/// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI
|
|
/// # Ok(())
|
|
/// # }
|
|
/// ```
|
|
pub fn base_iri(&self) -> Option<&str> {
|
|
match &self.parser {
|
|
FromTokioAsyncReadQuadReaderKind::N3(p) => p.base_iri(),
|
|
FromTokioAsyncReadQuadReaderKind::TriG(p) => p.base_iri(),
|
|
FromTokioAsyncReadQuadReaderKind::Turtle(p) => p.base_iri(),
|
|
FromTokioAsyncReadQuadReaderKind::NQuads(_)
|
|
| FromTokioAsyncReadQuadReaderKind::NTriples(_)
|
|
| FromTokioAsyncReadQuadReaderKind::RdfXml(_) => None, // TODO: implement for RDF/XML
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Iterator on the file prefixes.
|
|
///
|
|
/// See [`FromReadQuadReader::prefixes`].
|
|
pub struct PrefixesIter<'a> {
|
|
inner: PrefixesIterKind<'a>,
|
|
}
|
|
|
|
enum PrefixesIterKind<'a> {
|
|
Turtle(TurtlePrefixesIter<'a>),
|
|
TriG(TriGPrefixesIter<'a>),
|
|
N3(N3PrefixesIter<'a>),
|
|
None,
|
|
}
|
|
|
|
impl<'a> Iterator for PrefixesIter<'a> {
|
|
type Item = (&'a str, &'a str);
|
|
|
|
#[inline]
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
match &mut self.inner {
|
|
PrefixesIterKind::Turtle(iter) => iter.next(),
|
|
PrefixesIterKind::TriG(iter) => iter.next(),
|
|
PrefixesIterKind::N3(iter) => iter.next(),
|
|
PrefixesIterKind::None => None,
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn size_hint(&self) -> (usize, Option<usize>) {
|
|
match &self.inner {
|
|
PrefixesIterKind::Turtle(iter) => iter.size_hint(),
|
|
PrefixesIterKind::TriG(iter) => iter.size_hint(),
|
|
PrefixesIterKind::N3(iter) => iter.size_hint(),
|
|
PrefixesIterKind::None => (0, Some(0)),
|
|
}
|
|
}
|
|
}
|
|
|
|
struct QuadMapper {
|
|
default_graph: GraphName,
|
|
without_named_graphs: bool,
|
|
blank_node_map: Option<HashMap<BlankNode, BlankNode>>,
|
|
}
|
|
|
|
impl QuadMapper {
|
|
fn map_blank_node(&mut self, node: BlankNode) -> BlankNode {
|
|
if let Some(blank_node_map) = &mut self.blank_node_map {
|
|
blank_node_map
|
|
.entry(node)
|
|
.or_insert_with(BlankNode::default)
|
|
.clone()
|
|
} else {
|
|
node
|
|
}
|
|
}
|
|
|
|
fn map_subject(&mut self, node: Subject) -> Subject {
|
|
match node {
|
|
Subject::NamedNode(node) => node.into(),
|
|
Subject::BlankNode(node) => self.map_blank_node(node).into(),
|
|
#[cfg(feature = "rdf-star")]
|
|
Subject::Triple(triple) => self.map_triple(*triple).into(),
|
|
}
|
|
}
|
|
|
|
fn map_term(&mut self, node: Term) -> Term {
|
|
match node {
|
|
Term::NamedNode(node) => node.into(),
|
|
Term::BlankNode(node) => self.map_blank_node(node).into(),
|
|
Term::Literal(literal) => literal.into(),
|
|
#[cfg(feature = "rdf-star")]
|
|
Term::Triple(triple) => self.map_triple(*triple).into(),
|
|
}
|
|
}
|
|
|
|
fn map_triple(&mut self, triple: Triple) -> Triple {
|
|
Triple {
|
|
subject: self.map_subject(triple.subject),
|
|
predicate: triple.predicate,
|
|
object: self.map_term(triple.object),
|
|
}
|
|
}
|
|
|
|
fn map_graph_name(&mut self, graph_name: GraphName) -> Result<GraphName, RdfParseError> {
|
|
match graph_name {
|
|
GraphName::NamedNode(node) => {
|
|
if self.without_named_graphs {
|
|
Err(RdfParseError::msg("Named graphs are not allowed"))
|
|
} else {
|
|
Ok(node.into())
|
|
}
|
|
}
|
|
GraphName::BlankNode(node) => {
|
|
if self.without_named_graphs {
|
|
Err(RdfParseError::msg("Named graphs are not allowed"))
|
|
} else {
|
|
Ok(self.map_blank_node(node).into())
|
|
}
|
|
}
|
|
GraphName::DefaultGraph => Ok(self.default_graph.clone()),
|
|
}
|
|
}
|
|
|
|
fn map_quad(&mut self, quad: Quad) -> Result<Quad, RdfParseError> {
|
|
Ok(Quad {
|
|
subject: self.map_subject(quad.subject),
|
|
predicate: quad.predicate,
|
|
object: self.map_term(quad.object),
|
|
graph_name: self.map_graph_name(quad.graph_name)?,
|
|
})
|
|
}
|
|
|
|
fn map_triple_to_quad(&mut self, triple: Triple) -> Quad {
|
|
self.map_triple(triple).in_graph(self.default_graph.clone())
|
|
}
|
|
|
|
fn map_n3_quad(&mut self, quad: N3Quad) -> Result<Quad, RdfParseError> {
|
|
Ok(Quad {
|
|
subject: match quad.subject {
|
|
N3Term::NamedNode(s) => Ok(s.into()),
|
|
N3Term::BlankNode(s) => Ok(self.map_blank_node(s).into()),
|
|
N3Term::Literal(_) => Err(RdfParseError::msg(
|
|
"literals are not allowed in regular RDF subjects",
|
|
)),
|
|
#[cfg(feature = "rdf-star")]
|
|
N3Term::Triple(s) => Ok(self.map_triple(*s).into()),
|
|
N3Term::Variable(_) => Err(RdfParseError::msg(
|
|
"variables are not allowed in regular RDF subjects",
|
|
)),
|
|
}?,
|
|
predicate: match quad.predicate {
|
|
N3Term::NamedNode(p) => Ok(p),
|
|
N3Term::BlankNode(_) => Err(RdfParseError::msg(
|
|
"blank nodes are not allowed in regular RDF predicates",
|
|
)),
|
|
N3Term::Literal(_) => Err(RdfParseError::msg(
|
|
"literals are not allowed in regular RDF predicates",
|
|
)),
|
|
#[cfg(feature = "rdf-star")]
|
|
N3Term::Triple(_) => Err(RdfParseError::msg(
|
|
"quoted triples are not allowed in regular RDF predicates",
|
|
)),
|
|
N3Term::Variable(_) => Err(RdfParseError::msg(
|
|
"variables are not allowed in regular RDF predicates",
|
|
)),
|
|
}?,
|
|
object: match quad.object {
|
|
N3Term::NamedNode(o) => Ok(o.into()),
|
|
N3Term::BlankNode(o) => Ok(self.map_blank_node(o).into()),
|
|
N3Term::Literal(o) => Ok(o.into()),
|
|
#[cfg(feature = "rdf-star")]
|
|
N3Term::Triple(o) => Ok(self.map_triple(*o).into()),
|
|
N3Term::Variable(_) => Err(RdfParseError::msg(
|
|
"variables are not allowed in regular RDF objects",
|
|
)),
|
|
}?,
|
|
graph_name: self.map_graph_name(quad.graph_name)?,
|
|
})
|
|
}
|
|
}
|
|
|