Moves I/O out of Graph and Dataset structs

Makes basic model independent of I/O
pull/190/head
Tpt 3 years ago
parent a91ea89fff
commit 837d5d4ff7
  1. 197
      lib/src/model/dataset.rs
  2. 73
      lib/src/model/graph.rs
  3. 50
      testsuite/src/files.rs

@ -23,19 +23,14 @@
//! //!
//! See also [`Graph`](super::Graph) if you only care about plain triples. //! See also [`Graph`](super::Graph) if you only care about plain triples.
use crate::io::read::ParserError;
use crate::io::{
DatasetFormat, DatasetParser, DatasetSerializer, GraphFormat, GraphParser, GraphSerializer,
};
use crate::model::interning::*; use crate::model::interning::*;
use crate::model::SubjectRef; use crate::model::SubjectRef;
use crate::model::*; use crate::model::*;
use std::collections::hash_map::DefaultHasher; use std::collections::hash_map::DefaultHasher;
use std::collections::BTreeSet; use std::collections::BTreeSet;
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
use std::fmt;
use std::hash::{Hash, Hasher}; use std::hash::{Hash, Hasher};
use std::io::{BufRead, Write};
use std::{fmt, io};
/// An in-memory [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset). /// An in-memory [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset).
/// ///
@ -404,77 +399,6 @@ impl Dataset {
self.ospg.clear(); self.ospg.clear();
} }
/// Loads a file into the dataset.
///
/// To load a specific graph use [`GraphViewMut::load`].
///
/// Usage example:
/// ```
/// use oxigraph::model::*;
/// use oxigraph::io::DatasetFormat;
///
/// let mut dataset = Dataset::new();
///
/// // insertion
/// let file = b"<http://example.com> <http://example.com> <http://example.com> <http://example.com> .";
/// dataset.load(file.as_ref(), DatasetFormat::NQuads, None)?;
///
/// // we inspect the store contents
/// let ex = NamedNodeRef::new("http://example.com")?;
/// assert!(dataset.contains(QuadRef::new(ex, ex, ex, ex)));
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
///
/// Warning: This functions inserts the quads during the parsing.
/// If the parsing fails in the middle of the file, the quads read before stay in the dataset.
///
/// Errors related to parameter validation like the base IRI use the [`InvalidInput`](std::io::ErrorKind::InvalidInput) error kind.
/// Errors related to a bad syntax in the loaded file use the [`InvalidData`](std::io::ErrorKind::InvalidData) or [`UnexpectedEof`](std::io::ErrorKind::UnexpectedEof) error kinds.
pub fn load(
&mut self,
reader: impl BufRead,
format: DatasetFormat,
base_iri: Option<&str>,
) -> Result<(), ParserError> {
let mut parser = DatasetParser::from_format(format);
if let Some(base_iri) = base_iri {
parser = parser
.with_base_iri(base_iri)
.map_err(|e| ParserError::invalid_base_iri(base_iri, e))?;
}
for t in parser.read_quads(reader)? {
self.insert(&t?);
}
Ok(())
}
/// Dumps the dataset into a file.
///
/// To dump a specific graph use [`GraphView::dump`].
///
/// Usage example:
/// ```
/// use oxigraph::io::DatasetFormat;
/// use oxigraph::model::Dataset;
///
/// let file = "<http://example.com> <http://example.com> <http://example.com> <http://example.com> .\n".as_bytes();
///
/// let mut store = Dataset::new();
/// store.load(file, DatasetFormat::NQuads, None)?;
///
/// let mut buffer = Vec::new();
/// store.dump(&mut buffer, DatasetFormat::NQuads)?;
/// assert_eq!(file, buffer.as_slice());
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn dump(&self, writer: impl Write, format: DatasetFormat) -> io::Result<()> {
let mut writer = DatasetSerializer::from_format(format).quad_writer(writer)?;
for t in self {
writer.write(t)?;
}
writer.finish()
}
fn encode_quad( fn encode_quad(
&mut self, &mut self,
quad: QuadRef<'_>, quad: QuadRef<'_>,
@ -567,20 +491,26 @@ impl Dataset {
/// ///
/// Usage example ([Dataset isomorphim](https://www.w3.org/TR/rdf11-concepts/#dfn-dataset-isomorphism)): /// Usage example ([Dataset isomorphim](https://www.w3.org/TR/rdf11-concepts/#dfn-dataset-isomorphism)):
/// ``` /// ```
/// use oxigraph::io::DatasetFormat; /// use oxigraph::model::*;
/// use oxigraph::model::Dataset;
/// ///
/// let file = "GRAPH _:a1 { <http://example.com> <http://example.com> [ <http://example.com/p> <http://example.com/o> ] . }".as_bytes(); /// let iri = NamedNodeRef::new("http://example.com")?;
/// ///
/// let mut dataset1 = Dataset::new(); /// let mut graph1 = Graph::new();
/// dataset1.load(file, DatasetFormat::TriG, None)?; /// let bnode1 = BlankNode::default();
/// let mut dataset2 = Dataset::new(); /// let g1 = BlankNode::default();
/// dataset2.load(file, DatasetFormat::TriG, None)?; /// graph1.insert(QuadRef::new(iri, iri, &bnode1, &g1));
/// graph1.insert(QuadRef::new(&bnode1, iri, iri, &g1));
/// ///
/// assert_ne!(dataset1, dataset2); /// let mut graph2 = Graph::new();
/// dataset1.canonicalize(); /// let bnode2 = BlankNode::default();
/// dataset2.canonicalize(); /// let g2 = BlankNode::default();
/// assert_eq!(dataset1, dataset2); /// graph1.insert(QuadRef::new(iri, iri, &bnode2, &g2));
/// graph1.insert(QuadRef::new(&bnode2, iri, iri, &g2));
///
/// assert_ne!(graph1, graph2);
/// graph1.canonicalize();
/// graph2.canonicalize();
/// assert_eq!(graph1, graph2);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) /// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ``` /// ```
/// ///
@ -1247,31 +1177,6 @@ impl<'a> GraphView<'a> {
self.iter().next().is_none() self.iter().next().is_none()
} }
/// Dumps the graph into a file.
///
/// Usage example:
/// ```
/// use oxigraph::io::GraphFormat;
/// use oxigraph::model::*;
///
/// let mut dataset = Dataset::new();
/// let ex = NamedNodeRef::new("http://example.com")?;
/// dataset.insert(QuadRef::new(ex, ex, ex, ex));
///
/// let file = "<http://example.com> <http://example.com> <http://example.com> .\n".as_bytes();
/// let mut buffer = Vec::new();
/// dataset.graph(ex).dump(&mut buffer, GraphFormat::NTriples)?;
/// assert_eq!(file, buffer.as_slice());
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn dump(&self, writer: impl Write, format: GraphFormat) -> io::Result<()> {
let mut writer = GraphSerializer::from_format(format).triple_writer(writer)?;
for t in self {
writer.write(t)?;
}
writer.finish()
}
fn encoded_triple(&self, triple: TripleRef<'_>) -> Option<InternedTriple> { fn encoded_triple(&self, triple: TripleRef<'_>) -> Option<InternedTriple> {
Some(InternedTriple { Some(InternedTriple {
subject: self.dataset.encoded_subject(triple.subject)?, subject: self.dataset.encoded_subject(triple.subject)?,
@ -1371,49 +1276,6 @@ impl<'a> GraphViewMut<'a> {
} }
} }
/// Loads a file into the graph.
///
/// Usage example:
/// ```
/// use oxigraph::model::*;
/// use oxigraph::io::GraphFormat;
///
/// let mut dataset = Dataset::new();
/// let mut graph = dataset.graph_mut(NamedNodeRef::new("http://example.com")?);
///
/// // insertion
/// let file = b"<http://example.com> <http://example.com> <http://example.com> .";
/// graph.load(file.as_ref(), GraphFormat::NTriples, None)?;
///
/// // we inspect the dataset contents
/// let ex = NamedNodeRef::new("http://example.com")?;
/// assert!(graph.contains(TripleRef::new(ex, ex, ex)));
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
///
/// Warning: This functions inserts the triples during the parsing.
/// If the parsing fails in the middle of the file, the triples read before stay in the graph.
///
/// Errors related to parameter validation like the base IRI use the [`InvalidInput`](std::io::ErrorKind::InvalidInput) error kind.
/// Errors related to a bad syntax in the loaded file use the [`InvalidData`](std::io::ErrorKind::InvalidData) or [`UnexpectedEof`](std::io::ErrorKind::UnexpectedEof) error kinds.
pub fn load(
&mut self,
reader: impl BufRead,
format: GraphFormat,
base_iri: Option<&str>,
) -> Result<(), ParserError> {
let mut parser = GraphParser::from_format(format);
if let Some(base_iri) = base_iri {
parser = parser
.with_base_iri(base_iri)
.map_err(|e| ParserError::invalid_base_iri(base_iri, e))?;
}
for t in parser.read_triples(reader)? {
self.insert(&t?);
}
Ok(())
}
fn encode_triple(&mut self, triple: TripleRef<'_>) -> InternedTriple { fn encode_triple(&mut self, triple: TripleRef<'_>) -> InternedTriple {
InternedTriple { InternedTriple {
subject: InternedSubject::encoded_into(triple.subject, &mut self.dataset.interner), subject: InternedSubject::encoded_into(triple.subject, &mut self.dataset.interner),
@ -1517,29 +1379,6 @@ impl<'a> GraphViewMut<'a> {
pub fn is_empty(&self) -> bool { pub fn is_empty(&self) -> bool {
self.read().is_empty() self.read().is_empty()
} }
/// Dumps the graph into a file.
///
/// Usage example:
/// ```
/// use oxigraph::io::GraphFormat;
/// use oxigraph::model::*;
///
/// let mut dataset = Dataset::new();
/// let mut graph = dataset.graph_mut(NamedNodeRef::new("http://example.com")?);
///
/// let ex = NamedNodeRef::new("http://example.com")?;
/// graph.insert(TripleRef::new(ex, ex, ex));
///
/// let file = "<http://example.com> <http://example.com> <http://example.com> .\n".as_bytes();
/// let mut buffer = Vec::new();
/// graph.dump(&mut buffer, GraphFormat::NTriples)?;
/// assert_eq!(file, buffer.as_slice());
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn dump(self, writer: impl Write, format: GraphFormat) -> io::Result<()> {
self.read().dump(writer, format)
}
} }
impl<'a> Extend<Triple> for GraphViewMut<'a> { impl<'a> Extend<Triple> for GraphViewMut<'a> {

@ -19,12 +19,9 @@
//! //!
//! See also [`Dataset`](super::Dataset) if you want to get support of multiple RDF graphs at the same time. //! See also [`Dataset`](super::Dataset) if you want to get support of multiple RDF graphs at the same time.
use crate::io::read::ParserError;
use crate::io::GraphFormat;
use crate::model::dataset::*; use crate::model::dataset::*;
use crate::model::*; use crate::model::*;
use std::io::{BufRead, Write}; use std::fmt;
use std::{fmt, io};
/// An in-memory [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-graph). /// An in-memory [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-graph).
/// ///
@ -179,74 +176,24 @@ impl Graph {
self.dataset.clear() self.dataset.clear()
} }
/// Loads a file into the graph.
///
/// Usage example:
/// ```
/// use oxigraph::model::*;
/// use oxigraph::io::GraphFormat;
///
/// let mut graph = Graph::new();
///
/// // insertion
/// let file = b"<http://example.com> <http://example.com> <http://example.com> .";
/// graph.load(file.as_ref(), GraphFormat::NTriples, None)?;
///
/// // we inspect the graph contents
/// let ex = NamedNodeRef::new("http://example.com")?;
/// assert!(graph.contains(TripleRef::new(ex, ex, ex)));
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
///
/// Warning: This functions inserts the triples during the parsing.
/// If the parsing fails in the middle of the file, the triples read before stay in the graph.
///
/// Errors related to parameter validation like the base IRI use the [`InvalidInput`](std::io::ErrorKind::InvalidInput) error kind.
/// Errors related to a bad syntax in the loaded file use the [`InvalidData`](std::io::ErrorKind::InvalidData) or [`UnexpectedEof`](std::io::ErrorKind::UnexpectedEof) error kinds.
pub fn load(
&mut self,
reader: impl BufRead,
format: GraphFormat,
base_iri: Option<&str>,
) -> Result<(), ParserError> {
self.graph_mut().load(reader, format, base_iri)
}
/// Dumps the graph into a file.
///
/// Usage example:
/// ```
/// use oxigraph::io::GraphFormat;
/// use oxigraph::model::Graph;
///
/// let file = "<http://example.com> <http://example.com> <http://example.com> .\n".as_bytes();
///
/// let mut graph = Graph::new();
/// graph.load(file, GraphFormat::NTriples, None)?;
///
/// let mut buffer = Vec::new();
/// graph.dump(&mut buffer, GraphFormat::NTriples)?;
/// assert_eq!(file, buffer.as_slice());
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn dump(&self, writer: impl Write, format: GraphFormat) -> io::Result<()> {
self.graph().dump(writer, format)
}
/// Applies on the graph the canonicalization process described in /// Applies on the graph the canonicalization process described in
/// [Canonical Forms for Isomorphic and Equivalent RDF Graphs: Algorithms for Leaning and Labelling Blank Nodes, Aidan Hogan, 2017](http://aidanhogan.com/docs/rdf-canonicalisation.pdf) /// [Canonical Forms for Isomorphic and Equivalent RDF Graphs: Algorithms for Leaning and Labelling Blank Nodes, Aidan Hogan, 2017](http://aidanhogan.com/docs/rdf-canonicalisation.pdf)
/// ///
/// Usage example ([Graph isomorphim](https://www.w3.org/TR/rdf11-concepts/#dfn-graph-isomorphism)): /// Usage example ([Graph isomorphim](https://www.w3.org/TR/rdf11-concepts/#dfn-graph-isomorphism)):
/// ``` /// ```
/// use oxigraph::io::GraphFormat; /// use oxigraph::model::*;
/// use oxigraph::model::Graph;
/// ///
/// let file = "<http://example.com> <http://example.com> [ <http://example.com/p> <http://example.com/o> ] .".as_bytes(); /// let iri = NamedNodeRef::new("http://example.com")?;
/// ///
/// let mut graph1 = Graph::new(); /// let mut graph1 = Graph::new();
/// graph1.load(file, GraphFormat::Turtle, None)?; /// let bnode1 = BlankNode::default();
/// graph1.insert(TripleRef::new(iri, iri, &bnode1));
/// graph1.insert(TripleRef::new(&bnode1, iri, iri));
///
/// let mut graph2 = Graph::new(); /// let mut graph2 = Graph::new();
/// graph2.load(file, GraphFormat::Turtle, None)?; /// let bnode2 = BlankNode::default();
/// graph1.insert(TripleRef::new(iri, iri, &bnode2));
/// graph1.insert(TripleRef::new(&bnode2, iri, iri));
/// ///
/// assert_ne!(graph1, graph2); /// assert_ne!(graph1, graph2);
/// graph1.canonicalize(); /// graph1.canonicalize();

@ -1,5 +1,5 @@
use anyhow::{anyhow, Result}; use anyhow::{anyhow, Result};
use oxigraph::io::{DatasetFormat, GraphFormat}; use oxigraph::io::{DatasetFormat, DatasetParser, GraphFormat, GraphParser};
use oxigraph::model::{Dataset, Graph, GraphNameRef}; use oxigraph::model::{Dataset, Graph, GraphNameRef};
use oxigraph::store::Store; use oxigraph::store::Store;
use std::fs::File; use std::fs::File;
@ -78,14 +78,13 @@ pub fn load_to_store<'a>(
} }
pub fn load_to_graph(url: &str, graph: &mut Graph) -> Result<()> { pub fn load_to_graph(url: &str, graph: &mut Graph) -> Result<()> {
if url.ends_with(".nt") { let format = url
graph.load(read_file(url)?, GraphFormat::NTriples, Some(url))? .rsplit_once(".")
} else if url.ends_with(".ttl") { .and_then(|(_, extension)| GraphFormat::from_extension(extension))
graph.load(read_file(url)?, GraphFormat::Turtle, Some(url))? .ok_or_else(|| anyhow!("Serialization type not found for {}", url))?;
} else if url.ends_with(".rdf") { let parser = GraphParser::from_format(format).with_base_iri(url)?;
graph.load(read_file(url)?, GraphFormat::RdfXml, Some(url))? for t in parser.read_triples(read_file(url)?)? {
} else { graph.insert(&t?);
return Err(anyhow!("Serialization type not found for {}", url));
} }
Ok(()) Ok(())
} }
@ -101,26 +100,23 @@ pub fn load_to_dataset<'a>(
dataset: &mut Dataset, dataset: &mut Dataset,
to_graph_name: impl Into<GraphNameRef<'a>>, to_graph_name: impl Into<GraphNameRef<'a>>,
) -> Result<()> { ) -> Result<()> {
if url.ends_with(".nt") { let to_graph_name = to_graph_name.into();
dataset let extension = url.rsplit_once(".").map(|(_, ext)| ext);
.graph_mut(to_graph_name) if let Some(format) = extension.and_then(GraphFormat::from_extension) {
.load(read_file(url)?, GraphFormat::NTriples, Some(url))? let parser = GraphParser::from_format(format).with_base_iri(url)?;
} else if url.ends_with(".ttl") { for t in parser.read_triples(read_file(url)?)? {
dataset dataset.insert(&t?.in_graph(to_graph_name));
.graph_mut(to_graph_name)
.load(read_file(url)?, GraphFormat::Turtle, Some(url))?
} else if url.ends_with(".rdf") {
dataset
.graph_mut(to_graph_name)
.load(read_file(url)?, GraphFormat::RdfXml, Some(url))?
} else if url.ends_with(".nq") {
dataset.load(read_file(url)?, DatasetFormat::NQuads, Some(url))?
} else if url.ends_with(".trig") {
dataset.load(read_file(url)?, DatasetFormat::TriG, Some(url))?
} else {
return Err(anyhow!("Serialization type not found for {}", url));
} }
Ok(()) Ok(())
} else if let Some(format) = extension.and_then(DatasetFormat::from_extension) {
let parser = DatasetParser::from_format(format).with_base_iri(url)?;
for q in parser.read_quads(read_file(url)?)? {
dataset.insert(&q?);
}
Ok(())
} else {
Err(anyhow!("Serialization type not found for {}", url))
}
} }
pub fn load_dataset(url: &str) -> Result<Dataset> { pub fn load_dataset(url: &str) -> Result<Dataset> {

Loading…
Cancel
Save