From 837d5d4ff77bdf798410de33d59e80b213d0d7e2 Mon Sep 17 00:00:00 2001 From: Tpt Date: Sat, 1 Jan 2022 19:55:42 +0100 Subject: [PATCH] Moves I/O out of Graph and Dataset structs Makes basic model independent of I/O --- lib/src/model/dataset.rs | 197 ++++----------------------------------- lib/src/model/graph.rs | 73 ++------------- testsuite/src/files.rs | 50 +++++----- 3 files changed, 51 insertions(+), 269 deletions(-) diff --git a/lib/src/model/dataset.rs b/lib/src/model/dataset.rs index a9a1d989..da2e39ce 100644 --- a/lib/src/model/dataset.rs +++ b/lib/src/model/dataset.rs @@ -23,19 +23,14 @@ //! //! See also [`Graph`](super::Graph) if you only care about plain triples. -use crate::io::read::ParserError; -use crate::io::{ - DatasetFormat, DatasetParser, DatasetSerializer, GraphFormat, GraphParser, GraphSerializer, -}; use crate::model::interning::*; use crate::model::SubjectRef; use crate::model::*; use std::collections::hash_map::DefaultHasher; use std::collections::BTreeSet; use std::collections::{HashMap, HashSet}; +use std::fmt; use std::hash::{Hash, Hasher}; -use std::io::{BufRead, Write}; -use std::{fmt, io}; /// An in-memory [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset). /// @@ -404,77 +399,6 @@ impl Dataset { self.ospg.clear(); } - /// Loads a file into the dataset. - /// - /// To load a specific graph use [`GraphViewMut::load`]. - /// - /// Usage example: - /// ``` - /// use oxigraph::model::*; - /// use oxigraph::io::DatasetFormat; - /// - /// let mut dataset = Dataset::new(); - /// - /// // insertion - /// let file = b" ."; - /// dataset.load(file.as_ref(), DatasetFormat::NQuads, None)?; - /// - /// // we inspect the store contents - /// let ex = NamedNodeRef::new("http://example.com")?; - /// assert!(dataset.contains(QuadRef::new(ex, ex, ex, ex))); - /// # Result::<_,Box>::Ok(()) - /// ``` - /// - /// Warning: This functions inserts the quads during the parsing. - /// If the parsing fails in the middle of the file, the quads read before stay in the dataset. - /// - /// Errors related to parameter validation like the base IRI use the [`InvalidInput`](std::io::ErrorKind::InvalidInput) error kind. - /// Errors related to a bad syntax in the loaded file use the [`InvalidData`](std::io::ErrorKind::InvalidData) or [`UnexpectedEof`](std::io::ErrorKind::UnexpectedEof) error kinds. - pub fn load( - &mut self, - reader: impl BufRead, - format: DatasetFormat, - base_iri: Option<&str>, - ) -> Result<(), ParserError> { - let mut parser = DatasetParser::from_format(format); - if let Some(base_iri) = base_iri { - parser = parser - .with_base_iri(base_iri) - .map_err(|e| ParserError::invalid_base_iri(base_iri, e))?; - } - for t in parser.read_quads(reader)? { - self.insert(&t?); - } - Ok(()) - } - - /// Dumps the dataset into a file. - /// - /// To dump a specific graph use [`GraphView::dump`]. - /// - /// Usage example: - /// ``` - /// use oxigraph::io::DatasetFormat; - /// use oxigraph::model::Dataset; - /// - /// let file = " .\n".as_bytes(); - /// - /// let mut store = Dataset::new(); - /// store.load(file, DatasetFormat::NQuads, None)?; - /// - /// let mut buffer = Vec::new(); - /// store.dump(&mut buffer, DatasetFormat::NQuads)?; - /// assert_eq!(file, buffer.as_slice()); - /// # Result::<_,Box>::Ok(()) - /// ``` - pub fn dump(&self, writer: impl Write, format: DatasetFormat) -> io::Result<()> { - let mut writer = DatasetSerializer::from_format(format).quad_writer(writer)?; - for t in self { - writer.write(t)?; - } - writer.finish() - } - fn encode_quad( &mut self, quad: QuadRef<'_>, @@ -567,20 +491,26 @@ impl Dataset { /// /// Usage example ([Dataset isomorphim](https://www.w3.org/TR/rdf11-concepts/#dfn-dataset-isomorphism)): /// ``` - /// use oxigraph::io::DatasetFormat; - /// use oxigraph::model::Dataset; + /// use oxigraph::model::*; + /// + /// let iri = NamedNodeRef::new("http://example.com")?; /// - /// let file = "GRAPH _:a1 { [ ] . }".as_bytes(); + /// let mut graph1 = Graph::new(); + /// let bnode1 = BlankNode::default(); + /// let g1 = BlankNode::default(); + /// graph1.insert(QuadRef::new(iri, iri, &bnode1, &g1)); + /// graph1.insert(QuadRef::new(&bnode1, iri, iri, &g1)); /// - /// let mut dataset1 = Dataset::new(); - /// dataset1.load(file, DatasetFormat::TriG, None)?; - /// let mut dataset2 = Dataset::new(); - /// dataset2.load(file, DatasetFormat::TriG, None)?; + /// let mut graph2 = Graph::new(); + /// let bnode2 = BlankNode::default(); + /// let g2 = BlankNode::default(); + /// graph1.insert(QuadRef::new(iri, iri, &bnode2, &g2)); + /// graph1.insert(QuadRef::new(&bnode2, iri, iri, &g2)); /// - /// assert_ne!(dataset1, dataset2); - /// dataset1.canonicalize(); - /// dataset2.canonicalize(); - /// assert_eq!(dataset1, dataset2); + /// assert_ne!(graph1, graph2); + /// graph1.canonicalize(); + /// graph2.canonicalize(); + /// assert_eq!(graph1, graph2); /// # Result::<_,Box>::Ok(()) /// ``` /// @@ -1247,31 +1177,6 @@ impl<'a> GraphView<'a> { self.iter().next().is_none() } - /// Dumps the graph into a file. - /// - /// Usage example: - /// ``` - /// use oxigraph::io::GraphFormat; - /// use oxigraph::model::*; - /// - /// let mut dataset = Dataset::new(); - /// let ex = NamedNodeRef::new("http://example.com")?; - /// dataset.insert(QuadRef::new(ex, ex, ex, ex)); - /// - /// let file = " .\n".as_bytes(); - /// let mut buffer = Vec::new(); - /// dataset.graph(ex).dump(&mut buffer, GraphFormat::NTriples)?; - /// assert_eq!(file, buffer.as_slice()); - /// # Result::<_,Box>::Ok(()) - /// ``` - pub fn dump(&self, writer: impl Write, format: GraphFormat) -> io::Result<()> { - let mut writer = GraphSerializer::from_format(format).triple_writer(writer)?; - for t in self { - writer.write(t)?; - } - writer.finish() - } - fn encoded_triple(&self, triple: TripleRef<'_>) -> Option { Some(InternedTriple { subject: self.dataset.encoded_subject(triple.subject)?, @@ -1371,49 +1276,6 @@ impl<'a> GraphViewMut<'a> { } } - /// Loads a file into the graph. - /// - /// Usage example: - /// ``` - /// use oxigraph::model::*; - /// use oxigraph::io::GraphFormat; - /// - /// let mut dataset = Dataset::new(); - /// let mut graph = dataset.graph_mut(NamedNodeRef::new("http://example.com")?); - /// - /// // insertion - /// let file = b" ."; - /// graph.load(file.as_ref(), GraphFormat::NTriples, None)?; - /// - /// // we inspect the dataset contents - /// let ex = NamedNodeRef::new("http://example.com")?; - /// assert!(graph.contains(TripleRef::new(ex, ex, ex))); - /// # Result::<_,Box>::Ok(()) - /// ``` - /// - /// Warning: This functions inserts the triples during the parsing. - /// If the parsing fails in the middle of the file, the triples read before stay in the graph. - /// - /// Errors related to parameter validation like the base IRI use the [`InvalidInput`](std::io::ErrorKind::InvalidInput) error kind. - /// Errors related to a bad syntax in the loaded file use the [`InvalidData`](std::io::ErrorKind::InvalidData) or [`UnexpectedEof`](std::io::ErrorKind::UnexpectedEof) error kinds. - pub fn load( - &mut self, - reader: impl BufRead, - format: GraphFormat, - base_iri: Option<&str>, - ) -> Result<(), ParserError> { - let mut parser = GraphParser::from_format(format); - if let Some(base_iri) = base_iri { - parser = parser - .with_base_iri(base_iri) - .map_err(|e| ParserError::invalid_base_iri(base_iri, e))?; - } - for t in parser.read_triples(reader)? { - self.insert(&t?); - } - Ok(()) - } - fn encode_triple(&mut self, triple: TripleRef<'_>) -> InternedTriple { InternedTriple { subject: InternedSubject::encoded_into(triple.subject, &mut self.dataset.interner), @@ -1517,29 +1379,6 @@ impl<'a> GraphViewMut<'a> { pub fn is_empty(&self) -> bool { self.read().is_empty() } - - /// Dumps the graph into a file. - /// - /// Usage example: - /// ``` - /// use oxigraph::io::GraphFormat; - /// use oxigraph::model::*; - /// - /// let mut dataset = Dataset::new(); - /// let mut graph = dataset.graph_mut(NamedNodeRef::new("http://example.com")?); - /// - /// let ex = NamedNodeRef::new("http://example.com")?; - /// graph.insert(TripleRef::new(ex, ex, ex)); - /// - /// let file = " .\n".as_bytes(); - /// let mut buffer = Vec::new(); - /// graph.dump(&mut buffer, GraphFormat::NTriples)?; - /// assert_eq!(file, buffer.as_slice()); - /// # Result::<_,Box>::Ok(()) - /// ``` - pub fn dump(self, writer: impl Write, format: GraphFormat) -> io::Result<()> { - self.read().dump(writer, format) - } } impl<'a> Extend for GraphViewMut<'a> { diff --git a/lib/src/model/graph.rs b/lib/src/model/graph.rs index deaeb481..b02a47ba 100644 --- a/lib/src/model/graph.rs +++ b/lib/src/model/graph.rs @@ -19,12 +19,9 @@ //! //! See also [`Dataset`](super::Dataset) if you want to get support of multiple RDF graphs at the same time. -use crate::io::read::ParserError; -use crate::io::GraphFormat; use crate::model::dataset::*; use crate::model::*; -use std::io::{BufRead, Write}; -use std::{fmt, io}; +use std::fmt; /// An in-memory [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-graph). /// @@ -179,74 +176,24 @@ impl Graph { self.dataset.clear() } - /// Loads a file into the graph. - /// - /// Usage example: - /// ``` - /// use oxigraph::model::*; - /// use oxigraph::io::GraphFormat; - /// - /// let mut graph = Graph::new(); - /// - /// // insertion - /// let file = b" ."; - /// graph.load(file.as_ref(), GraphFormat::NTriples, None)?; - /// - /// // we inspect the graph contents - /// let ex = NamedNodeRef::new("http://example.com")?; - /// assert!(graph.contains(TripleRef::new(ex, ex, ex))); - /// # Result::<_,Box>::Ok(()) - /// ``` - /// - /// Warning: This functions inserts the triples during the parsing. - /// If the parsing fails in the middle of the file, the triples read before stay in the graph. - /// - /// Errors related to parameter validation like the base IRI use the [`InvalidInput`](std::io::ErrorKind::InvalidInput) error kind. - /// Errors related to a bad syntax in the loaded file use the [`InvalidData`](std::io::ErrorKind::InvalidData) or [`UnexpectedEof`](std::io::ErrorKind::UnexpectedEof) error kinds. - pub fn load( - &mut self, - reader: impl BufRead, - format: GraphFormat, - base_iri: Option<&str>, - ) -> Result<(), ParserError> { - self.graph_mut().load(reader, format, base_iri) - } - - /// Dumps the graph into a file. - /// - /// Usage example: - /// ``` - /// use oxigraph::io::GraphFormat; - /// use oxigraph::model::Graph; - /// - /// let file = " .\n".as_bytes(); - /// - /// let mut graph = Graph::new(); - /// graph.load(file, GraphFormat::NTriples, None)?; - /// - /// let mut buffer = Vec::new(); - /// graph.dump(&mut buffer, GraphFormat::NTriples)?; - /// assert_eq!(file, buffer.as_slice()); - /// # Result::<_,Box>::Ok(()) - /// ``` - pub fn dump(&self, writer: impl Write, format: GraphFormat) -> io::Result<()> { - self.graph().dump(writer, format) - } - /// Applies on the graph the canonicalization process described in /// [Canonical Forms for Isomorphic and Equivalent RDF Graphs: Algorithms for Leaning and Labelling Blank Nodes, Aidan Hogan, 2017](http://aidanhogan.com/docs/rdf-canonicalisation.pdf) /// /// Usage example ([Graph isomorphim](https://www.w3.org/TR/rdf11-concepts/#dfn-graph-isomorphism)): /// ``` - /// use oxigraph::io::GraphFormat; - /// use oxigraph::model::Graph; + /// use oxigraph::model::*; /// - /// let file = " [ ] .".as_bytes(); + /// let iri = NamedNodeRef::new("http://example.com")?; /// /// let mut graph1 = Graph::new(); - /// graph1.load(file, GraphFormat::Turtle, None)?; + /// let bnode1 = BlankNode::default(); + /// graph1.insert(TripleRef::new(iri, iri, &bnode1)); + /// graph1.insert(TripleRef::new(&bnode1, iri, iri)); + /// /// let mut graph2 = Graph::new(); - /// graph2.load(file, GraphFormat::Turtle, None)?; + /// let bnode2 = BlankNode::default(); + /// graph1.insert(TripleRef::new(iri, iri, &bnode2)); + /// graph1.insert(TripleRef::new(&bnode2, iri, iri)); /// /// assert_ne!(graph1, graph2); /// graph1.canonicalize(); diff --git a/testsuite/src/files.rs b/testsuite/src/files.rs index 37262691..2d918acb 100644 --- a/testsuite/src/files.rs +++ b/testsuite/src/files.rs @@ -1,5 +1,5 @@ use anyhow::{anyhow, Result}; -use oxigraph::io::{DatasetFormat, GraphFormat}; +use oxigraph::io::{DatasetFormat, DatasetParser, GraphFormat, GraphParser}; use oxigraph::model::{Dataset, Graph, GraphNameRef}; use oxigraph::store::Store; use std::fs::File; @@ -78,14 +78,13 @@ pub fn load_to_store<'a>( } pub fn load_to_graph(url: &str, graph: &mut Graph) -> Result<()> { - if url.ends_with(".nt") { - graph.load(read_file(url)?, GraphFormat::NTriples, Some(url))? - } else if url.ends_with(".ttl") { - graph.load(read_file(url)?, GraphFormat::Turtle, Some(url))? - } else if url.ends_with(".rdf") { - graph.load(read_file(url)?, GraphFormat::RdfXml, Some(url))? - } else { - return Err(anyhow!("Serialization type not found for {}", url)); + let format = url + .rsplit_once(".") + .and_then(|(_, extension)| GraphFormat::from_extension(extension)) + .ok_or_else(|| anyhow!("Serialization type not found for {}", url))?; + let parser = GraphParser::from_format(format).with_base_iri(url)?; + for t in parser.read_triples(read_file(url)?)? { + graph.insert(&t?); } Ok(()) } @@ -101,26 +100,23 @@ pub fn load_to_dataset<'a>( dataset: &mut Dataset, to_graph_name: impl Into>, ) -> Result<()> { - if url.ends_with(".nt") { - dataset - .graph_mut(to_graph_name) - .load(read_file(url)?, GraphFormat::NTriples, Some(url))? - } else if url.ends_with(".ttl") { - dataset - .graph_mut(to_graph_name) - .load(read_file(url)?, GraphFormat::Turtle, Some(url))? - } else if url.ends_with(".rdf") { - dataset - .graph_mut(to_graph_name) - .load(read_file(url)?, GraphFormat::RdfXml, Some(url))? - } else if url.ends_with(".nq") { - dataset.load(read_file(url)?, DatasetFormat::NQuads, Some(url))? - } else if url.ends_with(".trig") { - dataset.load(read_file(url)?, DatasetFormat::TriG, Some(url))? + let to_graph_name = to_graph_name.into(); + let extension = url.rsplit_once(".").map(|(_, ext)| ext); + if let Some(format) = extension.and_then(GraphFormat::from_extension) { + let parser = GraphParser::from_format(format).with_base_iri(url)?; + for t in parser.read_triples(read_file(url)?)? { + dataset.insert(&t?.in_graph(to_graph_name)); + } + Ok(()) + } else if let Some(format) = extension.and_then(DatasetFormat::from_extension) { + let parser = DatasetParser::from_format(format).with_base_iri(url)?; + for q in parser.read_quads(read_file(url)?)? { + dataset.insert(&q?); + } + Ok(()) } else { - return Err(anyhow!("Serialization type not found for {}", url)); + Err(anyhow!("Serialization type not found for {}", url)) } - Ok(()) } pub fn load_dataset(url: &str) -> Result {