Adds a serializers API and improves the parser API

pull/46/head
Tpt 5 years ago
parent 35fd315585
commit 0bc5e235eb
  1. 7
      lib/src/io/mod.rs
  2. 252
      lib/src/io/read.rs
  3. 211
      lib/src/io/write.rs
  4. 30
      lib/src/sparql/model.rs
  5. 2
      lib/src/store/mod.rs

@ -1,5 +1,8 @@
mod read;
//! Utilities to read and write RDF graphs and datasets
pub mod read;
mod syntax;
pub mod write;
pub use self::read::DatasetParser;
pub use self::read::GraphParser;
@ -7,3 +10,5 @@ pub use self::syntax::DatasetSyntax;
#[allow(deprecated)]
pub use self::syntax::FileSyntax;
pub use self::syntax::GraphSyntax;
pub use self::write::DatasetSerializer;
pub use self::write::GraphSerializer;

@ -1,3 +1,5 @@
//! Utilities to read RDF graphs and datasets
use super::GraphSyntax;
use crate::model::*;
use crate::DatasetSyntax;
@ -7,9 +9,9 @@ use rio_api::parser::{QuadsParser, TriplesParser};
use rio_turtle::{NQuadsParser, NTriplesParser, TriGParser, TurtleParser};
use rio_xml::RdfXmlParser;
use std::collections::HashMap;
use std::error::Error;
use std::io;
use std::io::BufRead;
use std::iter::once;
/// A reader for RDF graph serialization formats.
///
@ -25,7 +27,7 @@ use std::iter::once;
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = GraphParser::from_syntax(GraphSyntax::NTriples);
/// let triples = parser.read(Cursor::new(file)).collect::<Result<Vec<_>,_>>()?;
/// let triples = parser.read_triples(Cursor::new(file))?.collect::<Result<Vec<_>,_>>()?;
///
///assert_eq!(triples.len(), 1);
///assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
@ -53,7 +55,7 @@ impl GraphParser {
/// let file = "</s> </p> </o> .";
///
/// let parser = GraphParser::from_syntax(GraphSyntax::Turtle).with_base_iri("http://example.com")?;
/// let triples = parser.read(Cursor::new(file)).collect::<Result<Vec<_>,_>>()?;
/// let triples = parser.read_triples(Cursor::new(file))?.collect::<Result<Vec<_>,_>>()?;
///
///assert_eq!(triples.len(), 1);
///assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
@ -65,47 +67,98 @@ impl GraphParser {
}
/// Executes the parsing itself
pub fn read<'a>(
&self,
reader: impl BufRead + 'a,
) -> impl Iterator<Item = Result<Triple, io::Error>> + 'a {
match self.parse(reader) {
Ok(iter) => iter,
Err(error) => Box::new(once(Err(error))),
}
pub fn read_triples<R: BufRead>(&self, reader: R) -> Result<TripleReader<R>, io::Error> {
//TODO: drop the error when possible
Ok(TripleReader {
mapper: RioMapper::default(),
parser: match self.syntax {
GraphSyntax::NTriples => {
TripleReaderKind::NTriples(NTriplesParser::new(reader).map_err(invalid_input)?)
}
GraphSyntax::Turtle => TripleReaderKind::Turtle(
TurtleParser::new(reader, &self.base_iri).map_err(invalid_input)?,
),
GraphSyntax::RdfXml => TripleReaderKind::RdfXml(
RdfXmlParser::new(reader, &self.base_iri).map_err(invalid_input)?,
),
},
buffer: Vec::new(),
})
}
}
fn parse<'a>(
&self,
reader: impl BufRead + 'a,
) -> Result<Box<dyn Iterator<Item = Result<Triple, io::Error>> + 'a>, io::Error> {
Ok(match self.syntax {
GraphSyntax::NTriples => {
Box::new(self.parse_from_triple_parser(NTriplesParser::new(reader))?)
}
GraphSyntax::Turtle => {
Box::new(self.parse_from_triple_parser(TurtleParser::new(reader, &self.base_iri))?)
/// Allows reading triples.
/// Could be built using a `GraphParser`.
///
/// ```
/// use oxigraph::io::{GraphSyntax, GraphParser};
/// use std::io::Cursor;
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = GraphParser::from_syntax(GraphSyntax::NTriples);
/// let triples = parser.read_triples(Cursor::new(file))?.collect::<Result<Vec<_>,_>>()?;
///
///assert_eq!(triples.len(), 1);
///assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[must_use]
pub struct TripleReader<R: BufRead> {
mapper: RioMapper,
parser: TripleReaderKind<R>,
buffer: Vec<Triple>,
}
enum TripleReaderKind<R: BufRead> {
NTriples(NTriplesParser<R>),
Turtle(TurtleParser<R>),
RdfXml(RdfXmlParser<R>),
}
impl<R: BufRead> Iterator for TripleReader<R> {
type Item = Result<Triple, io::Error>;
fn next(&mut self) -> Option<Result<Triple, io::Error>> {
loop {
if let Some(r) = self.buffer.pop() {
return Some(Ok(r));
}
GraphSyntax::RdfXml => {
Box::new(self.parse_from_triple_parser(RdfXmlParser::new(reader, &self.base_iri))?)
if let Err(error) = match &mut self.parser {
TripleReaderKind::NTriples(parser) => {
Self::read(parser, &mut self.buffer, &mut self.mapper, invalid_data)
}
TripleReaderKind::Turtle(parser) => {
Self::read(parser, &mut self.buffer, &mut self.mapper, invalid_data)
}
TripleReaderKind::RdfXml(parser) => {
Self::read(parser, &mut self.buffer, &mut self.mapper, invalid_data)
}
}? {
return Some(Err(error));
}
})
}
}
}
fn parse_from_triple_parser<P: TriplesParser>(
&self,
parser: Result<P, P::Error>,
) -> Result<impl Iterator<Item = Result<Triple, io::Error>>, io::Error>
where
P::Error: Send + Sync + 'static,
{
let parser = parser.map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
let mut mapper = RioMapper::default();
Ok(parser
.into_iter(move |t| Ok(mapper.triple(&t)))
.map(|e: Result<_, P::Error>| {
e.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
}))
impl<R: BufRead> TripleReader<R> {
fn read<P: TriplesParser>(
parser: &mut P,
buffer: &mut Vec<Triple>,
mapper: &mut RioMapper,
error: impl Fn(P::Error) -> io::Error,
) -> Option<Result<(), io::Error>> {
if parser.is_end() {
None
} else if let Err(e) = parser.parse_step(&mut |t| {
buffer.push(mapper.triple(&t));
Ok(())
}) {
Some(Err(error(e)))
} else {
Some(Ok(()))
}
}
}
@ -122,7 +175,7 @@ impl GraphParser {
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .";
///
/// let parser = DatasetParser::from_syntax(DatasetSyntax::NQuads);
/// let quads = parser.read(Cursor::new(file)).collect::<Result<Vec<_>,_>>()?;
/// let quads = parser.read_quads(Cursor::new(file))?.collect::<Result<Vec<_>,_>>()?;
///
///assert_eq!(quads.len(), 1);
///assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
@ -150,7 +203,7 @@ impl DatasetParser {
/// let file = "<g> { </s> </p> </o> }";
///
/// let parser = DatasetParser::from_syntax(DatasetSyntax::TriG).with_base_iri("http://example.com")?;
/// let triples = parser.read(Cursor::new(file)).collect::<Result<Vec<_>,_>>()?;
/// let triples = parser.read_quads(Cursor::new(file))?.collect::<Result<Vec<_>,_>>()?;
///
///assert_eq!(triples.len(), 1);
///assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
@ -162,44 +215,91 @@ impl DatasetParser {
}
/// Executes the parsing itself
pub fn read<'a>(
&self,
reader: impl BufRead + 'a,
) -> impl Iterator<Item = Result<Quad, io::Error>> + 'a {
match self.parse(reader) {
Ok(iter) => iter,
Err(error) => Box::new(once(Err(error))),
}
pub fn read_quads<R: BufRead>(&self, reader: R) -> Result<QuadReader<R>, io::Error> {
//TODO: drop the error when possible
Ok(QuadReader {
mapper: RioMapper::default(),
parser: match self.syntax {
DatasetSyntax::NQuads => {
QuadReaderKind::NQuads(NQuadsParser::new(reader).map_err(invalid_input)?)
}
DatasetSyntax::TriG => QuadReaderKind::TriG(
TriGParser::new(reader, &self.base_iri).map_err(invalid_input)?,
),
},
buffer: Vec::new(),
})
}
}
fn parse<'a>(
&self,
reader: impl BufRead + 'a,
) -> Result<Box<dyn Iterator<Item = Result<Quad, io::Error>> + 'a>, io::Error> {
Ok(match self.syntax {
DatasetSyntax::NQuads => {
Box::new(self.parse_from_quad_parser(NQuadsParser::new(reader))?)
/// Allows reading quads.
/// Could be built using a `DatasetParser`.
///
/// ```
/// use oxigraph::io::{DatasetSyntax, DatasetParser};
/// use std::io::Cursor;
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .";
///
/// let parser = DatasetParser::from_syntax(DatasetSyntax::NQuads);
/// let quads = parser.read_quads(Cursor::new(file))?.collect::<Result<Vec<_>,_>>()?;
///
///assert_eq!(quads.len(), 1);
///assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[must_use]
pub struct QuadReader<R: BufRead> {
mapper: RioMapper,
parser: QuadReaderKind<R>,
buffer: Vec<Quad>,
}
enum QuadReaderKind<R: BufRead> {
NQuads(NQuadsParser<R>),
TriG(TriGParser<R>),
}
impl<R: BufRead> Iterator for QuadReader<R> {
type Item = Result<Quad, io::Error>;
fn next(&mut self) -> Option<Result<Quad, io::Error>> {
loop {
if let Some(r) = self.buffer.pop() {
return Some(Ok(r));
}
DatasetSyntax::TriG => {
Box::new(self.parse_from_quad_parser(TriGParser::new(reader, &self.base_iri))?)
if let Err(error) = match &mut self.parser {
QuadReaderKind::NQuads(parser) => {
Self::read(parser, &mut self.buffer, &mut self.mapper, invalid_data)
}
QuadReaderKind::TriG(parser) => {
Self::read(parser, &mut self.buffer, &mut self.mapper, invalid_data)
}
}? {
return Some(Err(error));
}
})
}
}
}
fn parse_from_quad_parser<P: QuadsParser>(
&self,
parser: Result<P, P::Error>,
) -> Result<impl Iterator<Item = Result<Quad, io::Error>>, io::Error>
where
P::Error: Send + Sync + 'static,
{
let parser = parser.map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
let mut mapper = RioMapper::default();
Ok(parser
.into_iter(move |q| Ok(mapper.quad(&q)))
.map(|e: Result<_, P::Error>| {
e.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
}))
impl<R: BufRead> QuadReader<R> {
fn read<P: QuadsParser>(
parser: &mut P,
buffer: &mut Vec<Quad>,
mapper: &mut RioMapper,
error: impl Fn(P::Error) -> io::Error,
) -> Option<Result<(), io::Error>> {
if parser.is_end() {
None
} else if let Err(e) = parser.parse_step(&mut |t| {
buffer.push(mapper.quad(&t));
Ok(())
}) {
Some(Err(error(e)))
} else {
Some(Ok(()))
}
}
}
@ -272,3 +372,11 @@ impl<'a> RioMapper {
}
}
}
fn invalid_input(error: impl Error + Send + Sync + 'static) -> io::Error {
io::Error::new(io::ErrorKind::InvalidInput, error)
}
fn invalid_data(error: impl Error + Send + Sync + 'static) -> io::Error {
io::Error::new(io::ErrorKind::InvalidData, error) //TODO: drop
}

@ -0,0 +1,211 @@
//! Utilities to write RDF graphs and datasets
use super::GraphSyntax;
use crate::model::*;
use crate::DatasetSyntax;
use rio_api::formatter::{QuadsFormatter, TriplesFormatter};
use rio_turtle::{NQuadsFormatter, NTriplesFormatter, TriGFormatter, TurtleFormatter};
use rio_xml::{RdfXmlError, RdfXmlFormatter};
use std::io;
use std::io::Write;
/// A serializer for RDF graph serialization formats.
///
/// It currently supports the following formats:
/// * [N-Triples](https://www.w3.org/TR/n-triples/) (`GraphSyntax::NTriples`)
/// * [Turtle](https://www.w3.org/TR/turtle/) (`GraphSyntax::Turtle`)
/// * [RDF XML](https://www.w3.org/TR/rdf-syntax-grammar/) (`GraphSyntax::RdfXml`)
///
/// ```
/// use oxigraph::io::{GraphSyntax, GraphSerializer};
/// use oxigraph::model::*;
///
/// let mut buffer = Vec::new();
/// let mut writer = GraphSerializer::from_syntax(GraphSyntax::NTriples).triple_writer(&mut buffer)?;
/// writer.write(&Triple {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into()
/// })?;
/// writer.finish()?;
///
///assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> .\n".as_bytes());
/// # oxigraph::Result::Ok(())
/// ```
#[allow(missing_copy_implementations)]
pub struct GraphSerializer {
syntax: GraphSyntax,
}
impl GraphSerializer {
pub fn from_syntax(syntax: GraphSyntax) -> Self {
Self { syntax }
}
/// Returns a `TripleWriter` allowing writing triples into the given `Write` implementation
pub fn triple_writer<W: Write>(&self, writer: W) -> Result<TripleWriter<W>, io::Error> {
Ok(TripleWriter {
formatter: match self.syntax {
GraphSyntax::NTriples => TripleWriterKind::NTriples(NTriplesFormatter::new(writer)),
GraphSyntax::Turtle => TripleWriterKind::Turtle(TurtleFormatter::new(writer)),
GraphSyntax::RdfXml => {
TripleWriterKind::RdfXml(RdfXmlFormatter::new(writer).map_err(map_xml_err)?)
}
},
})
}
}
/// Allows writing triples.
/// Could be built using a `GraphSerializer`.
///
/// Warning: Do not forget to run the `finish` method to properly write the last bytes of the file.
///
/// ```
/// use oxigraph::io::{GraphSyntax, GraphSerializer};
/// use oxigraph::model::*;
///
/// let mut buffer = Vec::new();
/// let mut writer = GraphSerializer::from_syntax(GraphSyntax::NTriples).triple_writer(&mut buffer)?;
/// writer.write(&Triple {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into()
/// })?;
/// writer.finish()?;
///
///assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> .\n".as_bytes());
/// # oxigraph::Result::Ok(())
/// ```
#[must_use]
pub struct TripleWriter<W: Write> {
formatter: TripleWriterKind<W>,
}
enum TripleWriterKind<W: Write> {
NTriples(NTriplesFormatter<W>),
Turtle(TurtleFormatter<W>),
RdfXml(RdfXmlFormatter<W>),
}
impl<W: Write> TripleWriter<W> {
pub fn write(&mut self, triple: &Triple) -> Result<(), io::Error> {
match &mut self.formatter {
TripleWriterKind::NTriples(formatter) => formatter.format(&triple.into())?,
TripleWriterKind::Turtle(formatter) => formatter.format(&triple.into())?,
TripleWriterKind::RdfXml(formatter) => {
formatter.format(&triple.into()).map_err(map_xml_err)?
}
}
Ok(())
}
/// Writes the last bytes of the file
pub fn finish(self) -> Result<(), io::Error> {
match self.formatter {
TripleWriterKind::NTriples(formatter) => formatter.finish(),
TripleWriterKind::Turtle(formatter) => formatter.finish()?,
TripleWriterKind::RdfXml(formatter) => formatter.finish().map_err(map_xml_err)?,
};
Ok(())
}
}
/// A serializer for RDF graph serialization formats.
///
/// It currently supports the following formats:
/// * [N-Quads](https://www.w3.org/TR/n-quads/) (`DatasetSyntax::NQuads`)
/// * [TriG](https://www.w3.org/TR/trig/) (`DatasetSyntax::TriG`)
///
/// ```
/// use oxigraph::io::{DatasetSyntax, DatasetSerializer};
/// use oxigraph::model::*;
///
/// let mut buffer = Vec::new();
/// let mut writer = DatasetSerializer::from_syntax(DatasetSyntax::NQuads).quad_writer(&mut buffer)?;
/// writer.write(&Quad {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// graph_name: NamedNode::new("http://example.com/g")?.into(),
/// })?;
/// writer.finish()?;
///
///assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n".as_bytes());
/// # oxigraph::Result::Ok(())
/// ```
#[allow(missing_copy_implementations)]
pub struct DatasetSerializer {
syntax: DatasetSyntax,
}
impl DatasetSerializer {
pub fn from_syntax(syntax: DatasetSyntax) -> Self {
Self { syntax }
}
/// Returns a `QuadWriter` allowing writing triples into the given `Write` implementation
pub fn quad_writer<W: Write>(&self, writer: W) -> Result<QuadWriter<W>, io::Error> {
Ok(QuadWriter {
formatter: match self.syntax {
DatasetSyntax::NQuads => QuadWriterKind::NQuads(NQuadsFormatter::new(writer)),
DatasetSyntax::TriG => QuadWriterKind::TriG(TriGFormatter::new(writer)),
},
})
}
}
/// Allows writing triples.
/// Could be built using a `DatasetSerializer`.
///
/// Warning: Do not forget to run the `finish` method to properly write the last bytes of the file.
///
/// ```
/// use oxigraph::io::{DatasetSyntax, DatasetSerializer};
/// use oxigraph::model::*;
///
/// let mut buffer = Vec::new();
/// let mut writer = DatasetSerializer::from_syntax(DatasetSyntax::NQuads).quad_writer(&mut buffer)?;
/// writer.write(&Quad {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// graph_name: NamedNode::new("http://example.com/g")?.into(),
/// })?;
/// writer.finish()?;
///
///assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n".as_bytes());
/// # oxigraph::Result::Ok(())
/// ```
#[must_use]
pub struct QuadWriter<W: Write> {
formatter: QuadWriterKind<W>,
}
enum QuadWriterKind<W: Write> {
NQuads(NQuadsFormatter<W>),
TriG(TriGFormatter<W>),
}
impl<W: Write> QuadWriter<W> {
pub fn write(&mut self, triple: &Quad) -> Result<(), io::Error> {
match &mut self.formatter {
QuadWriterKind::NQuads(formatter) => formatter.format(&triple.into())?,
QuadWriterKind::TriG(formatter) => formatter.format(&triple.into())?,
}
Ok(())
}
/// Writes the last bytes of the file
pub fn finish(self) -> Result<(), io::Error> {
match self.formatter {
QuadWriterKind::NQuads(formatter) => formatter.finish(),
QuadWriterKind::TriG(formatter) => formatter.finish()?,
};
Ok(())
}
}
fn map_xml_err(e: RdfXmlError) -> io::Error {
io::Error::new(io::ErrorKind::Other, e) //TODO: drop
}

@ -1,3 +1,4 @@
use crate::io::GraphSerializer;
#[allow(deprecated)]
use crate::io::{FileSyntax, GraphSyntax};
use crate::model::*;
@ -5,9 +6,6 @@ use crate::sparql::json_results::write_json_results;
use crate::sparql::xml_results::{read_xml_results, write_xml_results};
use crate::{Error, Result};
use rand::random;
use rio_api::formatter::TriplesFormatter;
use rio_turtle::{NTriplesFormatter, TurtleFormatter};
use rio_xml::RdfXmlFormatter;
use std::fmt;
use std::io::{BufRead, Write};
use std::rc::Rc;
@ -80,29 +78,11 @@ impl QueryResult {
/// ```
pub fn write_graph(self, write: impl Write, syntax: GraphSyntax) -> Result<()> {
if let QueryResult::Graph(triples) = self {
match syntax {
GraphSyntax::NTriples => {
let mut formatter = NTriplesFormatter::new(write);
for triple in triples {
formatter.format(&(&triple?).into())?;
}
formatter.finish();
}
GraphSyntax::Turtle => {
let mut formatter = TurtleFormatter::new(write);
for triple in triples {
formatter.format(&(&triple?).into())?;
}
formatter.finish()?;
}
GraphSyntax::RdfXml => {
let mut formatter = RdfXmlFormatter::new(write)?;
for triple in triples {
formatter.format(&(&triple?).into())?;
}
formatter.finish()?;
}
let mut writer = GraphSerializer::from_syntax(syntax).triple_writer(write)?;
for triple in triples {
writer.write(&triple?)?;
}
writer.finish()?;
Ok(())
} else {
Err(Error::msg(

@ -139,7 +139,7 @@ fn dump_graph(
}
fn map_xml_err(e: RdfXmlError) -> io::Error {
io::Error::new(io::ErrorKind::Other, e)
io::Error::new(io::ErrorKind::Other, e) // TODO: drop
}
fn load_dataset<S: WritableEncodedStore>(

Loading…
Cancel
Save