Moves I/O out of Graph and Dataset structs

Makes basic model independent of I/O
pull/190/head
Tpt 3 years ago
parent a91ea89fff
commit 837d5d4ff7
  1. 197
      lib/src/model/dataset.rs
  2. 73
      lib/src/model/graph.rs
  3. 50
      testsuite/src/files.rs

@ -23,19 +23,14 @@
//!
//! See also [`Graph`](super::Graph) if you only care about plain triples.
use crate::io::read::ParserError;
use crate::io::{
DatasetFormat, DatasetParser, DatasetSerializer, GraphFormat, GraphParser, GraphSerializer,
};
use crate::model::interning::*;
use crate::model::SubjectRef;
use crate::model::*;
use std::collections::hash_map::DefaultHasher;
use std::collections::BTreeSet;
use std::collections::{HashMap, HashSet};
use std::fmt;
use std::hash::{Hash, Hasher};
use std::io::{BufRead, Write};
use std::{fmt, io};
/// An in-memory [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset).
///
@ -404,77 +399,6 @@ impl Dataset {
self.ospg.clear();
}
/// Loads a file into the dataset.
///
/// To load a specific graph use [`GraphViewMut::load`].
///
/// Usage example:
/// ```
/// use oxigraph::model::*;
/// use oxigraph::io::DatasetFormat;
///
/// let mut dataset = Dataset::new();
///
/// // insertion
/// let file = b"<http://example.com> <http://example.com> <http://example.com> <http://example.com> .";
/// dataset.load(file.as_ref(), DatasetFormat::NQuads, None)?;
///
/// // we inspect the store contents
/// let ex = NamedNodeRef::new("http://example.com")?;
/// assert!(dataset.contains(QuadRef::new(ex, ex, ex, ex)));
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
///
/// Warning: This functions inserts the quads during the parsing.
/// If the parsing fails in the middle of the file, the quads read before stay in the dataset.
///
/// Errors related to parameter validation like the base IRI use the [`InvalidInput`](std::io::ErrorKind::InvalidInput) error kind.
/// Errors related to a bad syntax in the loaded file use the [`InvalidData`](std::io::ErrorKind::InvalidData) or [`UnexpectedEof`](std::io::ErrorKind::UnexpectedEof) error kinds.
pub fn load(
&mut self,
reader: impl BufRead,
format: DatasetFormat,
base_iri: Option<&str>,
) -> Result<(), ParserError> {
let mut parser = DatasetParser::from_format(format);
if let Some(base_iri) = base_iri {
parser = parser
.with_base_iri(base_iri)
.map_err(|e| ParserError::invalid_base_iri(base_iri, e))?;
}
for t in parser.read_quads(reader)? {
self.insert(&t?);
}
Ok(())
}
/// Dumps the dataset into a file.
///
/// To dump a specific graph use [`GraphView::dump`].
///
/// Usage example:
/// ```
/// use oxigraph::io::DatasetFormat;
/// use oxigraph::model::Dataset;
///
/// let file = "<http://example.com> <http://example.com> <http://example.com> <http://example.com> .\n".as_bytes();
///
/// let mut store = Dataset::new();
/// store.load(file, DatasetFormat::NQuads, None)?;
///
/// let mut buffer = Vec::new();
/// store.dump(&mut buffer, DatasetFormat::NQuads)?;
/// assert_eq!(file, buffer.as_slice());
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn dump(&self, writer: impl Write, format: DatasetFormat) -> io::Result<()> {
let mut writer = DatasetSerializer::from_format(format).quad_writer(writer)?;
for t in self {
writer.write(t)?;
}
writer.finish()
}
fn encode_quad(
&mut self,
quad: QuadRef<'_>,
@ -567,20 +491,26 @@ impl Dataset {
///
/// Usage example ([Dataset isomorphim](https://www.w3.org/TR/rdf11-concepts/#dfn-dataset-isomorphism)):
/// ```
/// use oxigraph::io::DatasetFormat;
/// use oxigraph::model::Dataset;
/// use oxigraph::model::*;
///
/// let iri = NamedNodeRef::new("http://example.com")?;
///
/// let file = "GRAPH _:a1 { <http://example.com> <http://example.com> [ <http://example.com/p> <http://example.com/o> ] . }".as_bytes();
/// let mut graph1 = Graph::new();
/// let bnode1 = BlankNode::default();
/// let g1 = BlankNode::default();
/// graph1.insert(QuadRef::new(iri, iri, &bnode1, &g1));
/// graph1.insert(QuadRef::new(&bnode1, iri, iri, &g1));
///
/// let mut dataset1 = Dataset::new();
/// dataset1.load(file, DatasetFormat::TriG, None)?;
/// let mut dataset2 = Dataset::new();
/// dataset2.load(file, DatasetFormat::TriG, None)?;
/// let mut graph2 = Graph::new();
/// let bnode2 = BlankNode::default();
/// let g2 = BlankNode::default();
/// graph1.insert(QuadRef::new(iri, iri, &bnode2, &g2));
/// graph1.insert(QuadRef::new(&bnode2, iri, iri, &g2));
///
/// assert_ne!(dataset1, dataset2);
/// dataset1.canonicalize();
/// dataset2.canonicalize();
/// assert_eq!(dataset1, dataset2);
/// assert_ne!(graph1, graph2);
/// graph1.canonicalize();
/// graph2.canonicalize();
/// assert_eq!(graph1, graph2);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
///
@ -1247,31 +1177,6 @@ impl<'a> GraphView<'a> {
self.iter().next().is_none()
}
/// Dumps the graph into a file.
///
/// Usage example:
/// ```
/// use oxigraph::io::GraphFormat;
/// use oxigraph::model::*;
///
/// let mut dataset = Dataset::new();
/// let ex = NamedNodeRef::new("http://example.com")?;
/// dataset.insert(QuadRef::new(ex, ex, ex, ex));
///
/// let file = "<http://example.com> <http://example.com> <http://example.com> .\n".as_bytes();
/// let mut buffer = Vec::new();
/// dataset.graph(ex).dump(&mut buffer, GraphFormat::NTriples)?;
/// assert_eq!(file, buffer.as_slice());
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn dump(&self, writer: impl Write, format: GraphFormat) -> io::Result<()> {
let mut writer = GraphSerializer::from_format(format).triple_writer(writer)?;
for t in self {
writer.write(t)?;
}
writer.finish()
}
fn encoded_triple(&self, triple: TripleRef<'_>) -> Option<InternedTriple> {
Some(InternedTriple {
subject: self.dataset.encoded_subject(triple.subject)?,
@ -1371,49 +1276,6 @@ impl<'a> GraphViewMut<'a> {
}
}
/// Loads a file into the graph.
///
/// Usage example:
/// ```
/// use oxigraph::model::*;
/// use oxigraph::io::GraphFormat;
///
/// let mut dataset = Dataset::new();
/// let mut graph = dataset.graph_mut(NamedNodeRef::new("http://example.com")?);
///
/// // insertion
/// let file = b"<http://example.com> <http://example.com> <http://example.com> .";
/// graph.load(file.as_ref(), GraphFormat::NTriples, None)?;
///
/// // we inspect the dataset contents
/// let ex = NamedNodeRef::new("http://example.com")?;
/// assert!(graph.contains(TripleRef::new(ex, ex, ex)));
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
///
/// Warning: This functions inserts the triples during the parsing.
/// If the parsing fails in the middle of the file, the triples read before stay in the graph.
///
/// Errors related to parameter validation like the base IRI use the [`InvalidInput`](std::io::ErrorKind::InvalidInput) error kind.
/// Errors related to a bad syntax in the loaded file use the [`InvalidData`](std::io::ErrorKind::InvalidData) or [`UnexpectedEof`](std::io::ErrorKind::UnexpectedEof) error kinds.
pub fn load(
&mut self,
reader: impl BufRead,
format: GraphFormat,
base_iri: Option<&str>,
) -> Result<(), ParserError> {
let mut parser = GraphParser::from_format(format);
if let Some(base_iri) = base_iri {
parser = parser
.with_base_iri(base_iri)
.map_err(|e| ParserError::invalid_base_iri(base_iri, e))?;
}
for t in parser.read_triples(reader)? {
self.insert(&t?);
}
Ok(())
}
fn encode_triple(&mut self, triple: TripleRef<'_>) -> InternedTriple {
InternedTriple {
subject: InternedSubject::encoded_into(triple.subject, &mut self.dataset.interner),
@ -1517,29 +1379,6 @@ impl<'a> GraphViewMut<'a> {
pub fn is_empty(&self) -> bool {
self.read().is_empty()
}
/// Dumps the graph into a file.
///
/// Usage example:
/// ```
/// use oxigraph::io::GraphFormat;
/// use oxigraph::model::*;
///
/// let mut dataset = Dataset::new();
/// let mut graph = dataset.graph_mut(NamedNodeRef::new("http://example.com")?);
///
/// let ex = NamedNodeRef::new("http://example.com")?;
/// graph.insert(TripleRef::new(ex, ex, ex));
///
/// let file = "<http://example.com> <http://example.com> <http://example.com> .\n".as_bytes();
/// let mut buffer = Vec::new();
/// graph.dump(&mut buffer, GraphFormat::NTriples)?;
/// assert_eq!(file, buffer.as_slice());
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn dump(self, writer: impl Write, format: GraphFormat) -> io::Result<()> {
self.read().dump(writer, format)
}
}
impl<'a> Extend<Triple> for GraphViewMut<'a> {

@ -19,12 +19,9 @@
//!
//! See also [`Dataset`](super::Dataset) if you want to get support of multiple RDF graphs at the same time.
use crate::io::read::ParserError;
use crate::io::GraphFormat;
use crate::model::dataset::*;
use crate::model::*;
use std::io::{BufRead, Write};
use std::{fmt, io};
use std::fmt;
/// An in-memory [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-graph).
///
@ -179,74 +176,24 @@ impl Graph {
self.dataset.clear()
}
/// Loads a file into the graph.
///
/// Usage example:
/// ```
/// use oxigraph::model::*;
/// use oxigraph::io::GraphFormat;
///
/// let mut graph = Graph::new();
///
/// // insertion
/// let file = b"<http://example.com> <http://example.com> <http://example.com> .";
/// graph.load(file.as_ref(), GraphFormat::NTriples, None)?;
///
/// // we inspect the graph contents
/// let ex = NamedNodeRef::new("http://example.com")?;
/// assert!(graph.contains(TripleRef::new(ex, ex, ex)));
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
///
/// Warning: This functions inserts the triples during the parsing.
/// If the parsing fails in the middle of the file, the triples read before stay in the graph.
///
/// Errors related to parameter validation like the base IRI use the [`InvalidInput`](std::io::ErrorKind::InvalidInput) error kind.
/// Errors related to a bad syntax in the loaded file use the [`InvalidData`](std::io::ErrorKind::InvalidData) or [`UnexpectedEof`](std::io::ErrorKind::UnexpectedEof) error kinds.
pub fn load(
&mut self,
reader: impl BufRead,
format: GraphFormat,
base_iri: Option<&str>,
) -> Result<(), ParserError> {
self.graph_mut().load(reader, format, base_iri)
}
/// Dumps the graph into a file.
///
/// Usage example:
/// ```
/// use oxigraph::io::GraphFormat;
/// use oxigraph::model::Graph;
///
/// let file = "<http://example.com> <http://example.com> <http://example.com> .\n".as_bytes();
///
/// let mut graph = Graph::new();
/// graph.load(file, GraphFormat::NTriples, None)?;
///
/// let mut buffer = Vec::new();
/// graph.dump(&mut buffer, GraphFormat::NTriples)?;
/// assert_eq!(file, buffer.as_slice());
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn dump(&self, writer: impl Write, format: GraphFormat) -> io::Result<()> {
self.graph().dump(writer, format)
}
/// Applies on the graph the canonicalization process described in
/// [Canonical Forms for Isomorphic and Equivalent RDF Graphs: Algorithms for Leaning and Labelling Blank Nodes, Aidan Hogan, 2017](http://aidanhogan.com/docs/rdf-canonicalisation.pdf)
///
/// Usage example ([Graph isomorphim](https://www.w3.org/TR/rdf11-concepts/#dfn-graph-isomorphism)):
/// ```
/// use oxigraph::io::GraphFormat;
/// use oxigraph::model::Graph;
/// use oxigraph::model::*;
///
/// let file = "<http://example.com> <http://example.com> [ <http://example.com/p> <http://example.com/o> ] .".as_bytes();
/// let iri = NamedNodeRef::new("http://example.com")?;
///
/// let mut graph1 = Graph::new();
/// graph1.load(file, GraphFormat::Turtle, None)?;
/// let bnode1 = BlankNode::default();
/// graph1.insert(TripleRef::new(iri, iri, &bnode1));
/// graph1.insert(TripleRef::new(&bnode1, iri, iri));
///
/// let mut graph2 = Graph::new();
/// graph2.load(file, GraphFormat::Turtle, None)?;
/// let bnode2 = BlankNode::default();
/// graph1.insert(TripleRef::new(iri, iri, &bnode2));
/// graph1.insert(TripleRef::new(&bnode2, iri, iri));
///
/// assert_ne!(graph1, graph2);
/// graph1.canonicalize();

@ -1,5 +1,5 @@
use anyhow::{anyhow, Result};
use oxigraph::io::{DatasetFormat, GraphFormat};
use oxigraph::io::{DatasetFormat, DatasetParser, GraphFormat, GraphParser};
use oxigraph::model::{Dataset, Graph, GraphNameRef};
use oxigraph::store::Store;
use std::fs::File;
@ -78,14 +78,13 @@ pub fn load_to_store<'a>(
}
pub fn load_to_graph(url: &str, graph: &mut Graph) -> Result<()> {
if url.ends_with(".nt") {
graph.load(read_file(url)?, GraphFormat::NTriples, Some(url))?
} else if url.ends_with(".ttl") {
graph.load(read_file(url)?, GraphFormat::Turtle, Some(url))?
} else if url.ends_with(".rdf") {
graph.load(read_file(url)?, GraphFormat::RdfXml, Some(url))?
} else {
return Err(anyhow!("Serialization type not found for {}", url));
let format = url
.rsplit_once(".")
.and_then(|(_, extension)| GraphFormat::from_extension(extension))
.ok_or_else(|| anyhow!("Serialization type not found for {}", url))?;
let parser = GraphParser::from_format(format).with_base_iri(url)?;
for t in parser.read_triples(read_file(url)?)? {
graph.insert(&t?);
}
Ok(())
}
@ -101,26 +100,23 @@ pub fn load_to_dataset<'a>(
dataset: &mut Dataset,
to_graph_name: impl Into<GraphNameRef<'a>>,
) -> Result<()> {
if url.ends_with(".nt") {
dataset
.graph_mut(to_graph_name)
.load(read_file(url)?, GraphFormat::NTriples, Some(url))?
} else if url.ends_with(".ttl") {
dataset
.graph_mut(to_graph_name)
.load(read_file(url)?, GraphFormat::Turtle, Some(url))?
} else if url.ends_with(".rdf") {
dataset
.graph_mut(to_graph_name)
.load(read_file(url)?, GraphFormat::RdfXml, Some(url))?
} else if url.ends_with(".nq") {
dataset.load(read_file(url)?, DatasetFormat::NQuads, Some(url))?
} else if url.ends_with(".trig") {
dataset.load(read_file(url)?, DatasetFormat::TriG, Some(url))?
let to_graph_name = to_graph_name.into();
let extension = url.rsplit_once(".").map(|(_, ext)| ext);
if let Some(format) = extension.and_then(GraphFormat::from_extension) {
let parser = GraphParser::from_format(format).with_base_iri(url)?;
for t in parser.read_triples(read_file(url)?)? {
dataset.insert(&t?.in_graph(to_graph_name));
}
Ok(())
} else if let Some(format) = extension.and_then(DatasetFormat::from_extension) {
let parser = DatasetParser::from_format(format).with_base_iri(url)?;
for q in parser.read_quads(read_file(url)?)? {
dataset.insert(&q?);
}
Ok(())
} else {
return Err(anyhow!("Serialization type not found for {}", url));
Err(anyhow!("Serialization type not found for {}", url))
}
Ok(())
}
pub fn load_dataset(url: &str) -> Result<Dataset> {

Loading…
Cancel
Save