From f75dc6a61d2462ecb0a8eb0349fb2b960960ca6f Mon Sep 17 00:00:00 2001 From: Tpt Date: Wed, 20 Jan 2021 08:13:32 +0100 Subject: [PATCH] Adds simple in memory Graph and Dataset --- lib/benches/store.rs | 43 +- lib/src/model/dataset.rs | 1521 +++++++++++++++++++++++++++++ lib/src/model/graph.rs | 302 ++++++ lib/src/model/interning.rs | 344 +++++++ lib/src/model/mod.rs | 15 +- lib/src/store/small_string.rs | 2 +- testsuite/src/files.rs | 56 +- testsuite/src/manifest.rs | 350 ++++--- testsuite/src/parser_evaluator.rs | 22 +- testsuite/src/report.rs | 16 +- testsuite/src/sparql_evaluator.rs | 250 ++--- 11 files changed, 2562 insertions(+), 359 deletions(-) create mode 100644 lib/src/model/dataset.rs create mode 100644 lib/src/model/graph.rs create mode 100644 lib/src/model/interning.rs diff --git a/lib/benches/store.rs b/lib/benches/store.rs index 7453489e..93f40e43 100644 --- a/lib/benches/store.rs +++ b/lib/benches/store.rs @@ -1,12 +1,51 @@ use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; -use oxigraph::model::{NamedNode, Quad}; +use oxigraph::model::{Dataset, Graph, NamedNode, Quad, Triple}; use oxigraph::{MemoryStore, SledStore}; use rand::random; +use std::iter::FromIterator; -criterion_group!(store_load, memory_load_bench, sled_load_bench); +criterion_group!( + store_load, + graph_load_bench, + dataset_load_bench, + memory_load_bench, + sled_load_bench +); criterion_main!(store_load); +fn graph_load_bench(c: &mut Criterion) { + let mut group = c.benchmark_group("graph"); + group.nresamples(10); + group.sample_size(10); + for size in [100, 1_000, 10_000].iter() { + group.throughput(Throughput::Elements(*size as u64)); + let triples: Vec<_> = create_quads(*size).into_iter().map(Triple::from).collect(); + group.bench_function(BenchmarkId::from_parameter(size), |b| { + b.iter(|| { + Graph::from_iter(triples.iter()); + }); + }); + } + group.finish(); +} + +fn dataset_load_bench(c: &mut Criterion) { + let mut group = c.benchmark_group("dataset"); + group.nresamples(10); + group.sample_size(10); + for size in [100, 1_000, 10_000].iter() { + group.throughput(Throughput::Elements(*size as u64)); + let quads = create_quads(*size); + group.bench_function(BenchmarkId::from_parameter(size), |b| { + b.iter(|| { + Dataset::from_iter(quads.iter()); + }); + }); + } + group.finish(); +} + fn memory_load_bench(c: &mut Criterion) { let mut group = c.benchmark_group("memory"); group.nresamples(10); diff --git a/lib/src/model/dataset.rs b/lib/src/model/dataset.rs new file mode 100644 index 00000000..b51ad5a3 --- /dev/null +++ b/lib/src/model/dataset.rs @@ -0,0 +1,1521 @@ +//! [In-memory implementation](super::Dataset) of [RDF datasets](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset). +//! +//! Usage example: +//! ``` +//! use oxigraph::model::*; +//! +//! let mut dataset = Dataset::default(); +//! +//! // insertion +//! let ex = NamedNodeRef::new("http://example.com")?; +//! let quad = QuadRef::new(ex, ex, ex, ex); +//! dataset.insert(quad); +//! +//! // simple filter +//! let results: Vec<_> = dataset.quads_for_subject(ex).collect(); +//! assert_eq!(vec![quad], results); +//! +//! // direct access to a dataset graph +//! let results: Vec<_> = dataset.graph(ex).iter().collect(); +//! assert_eq!(vec![TripleRef::new(ex, ex, ex)], results); +//! # Result::<_,Box>::Ok(()) +//! ``` +//! +//! See also [`Graph`](super::Graph) if you only care about plain triples. + +use crate::io::{ + DatasetFormat, DatasetParser, DatasetSerializer, GraphFormat, GraphParser, GraphSerializer, +}; +use crate::model::interning::*; +use crate::model::NamedOrBlankNodeRef; +use crate::model::*; +use lasso::Rodeo; +use std::collections::hash_map::DefaultHasher; +use std::collections::BTreeSet; +use std::collections::{HashMap, HashSet}; +use std::hash::{Hash, Hasher}; +use std::io::{BufRead, Write}; +use std::iter::FromIterator; +use std::{fmt, io}; + +/// An in-memory [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset). +/// +/// It can accomodate a fairly large number of quads (in the few millions). +/// Beware: it interns the string and does not do any garbage collection yet: +/// if you insert and remove a lot of different terms, memory will grow without any reduction. +/// +/// Usage example: +/// ``` +/// use oxigraph::model::*; +/// +/// let mut dataset = Dataset::default(); +/// +/// // insertion +/// let ex = NamedNodeRef::new("http://example.com")?; +/// let quad = QuadRef::new(ex, ex, ex, ex); +/// dataset.insert(quad); +/// +/// // simple filter +/// let results: Vec<_> = dataset.quads_for_subject(ex).collect(); +/// assert_eq!(vec![quad], results); +/// +/// // direct access to a dataset graph +/// let results: Vec<_> = dataset.graph(ex).iter().collect(); +/// assert_eq!(vec![TripleRef::new(ex, ex, ex)], results); +/// # Result::<_,Box>::Ok(()) +/// ``` +#[derive(Debug, Default)] +pub struct Dataset { + interner: Rodeo, + gspo: BTreeSet<( + InternedGraphName, + InternedNamedOrBlankNode, + InternedNamedNode, + InternedTerm, + )>, + gpos: BTreeSet<( + InternedGraphName, + InternedNamedNode, + InternedTerm, + InternedNamedOrBlankNode, + )>, + gosp: BTreeSet<( + InternedGraphName, + InternedTerm, + InternedNamedOrBlankNode, + InternedNamedNode, + )>, + spog: BTreeSet<( + InternedNamedOrBlankNode, + InternedNamedNode, + InternedTerm, + InternedGraphName, + )>, + posg: BTreeSet<( + InternedNamedNode, + InternedTerm, + InternedNamedOrBlankNode, + InternedGraphName, + )>, + ospg: BTreeSet<( + InternedTerm, + InternedNamedOrBlankNode, + InternedNamedNode, + InternedGraphName, + )>, +} + +impl Dataset { + /// Creates a new dataset + pub fn new() -> Self { + Self::default() + } + + /// Provides a read-only view on a [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-graph) contained in this dataset. + /// + /// ``` + /// use oxigraph::model::*; + /// + /// let mut dataset = Dataset::default(); + /// let ex = NamedNodeRef::new("http://example.com")?; + /// dataset.insert(QuadRef::new(ex, ex, ex, ex)); + /// + /// let results: Vec<_> = dataset.graph(ex).iter().collect(); + /// assert_eq!(vec![TripleRef::new(ex, ex, ex)], results); + /// # Result::<_,Box>::Ok(()) + /// ``` + pub fn graph<'a, 'b>(&'a self, graph_name: impl Into>) -> GraphView<'a> { + let graph_name = self + .encoded_graph_name(graph_name) + .unwrap_or_else(InternedGraphName::impossible); + GraphView { + dataset: self, + graph_name, + } + } + + /// Provides a read/write view on a [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-graph) contained in this dataset. + /// + /// ``` + /// use oxigraph::model::*; + /// + /// let mut dataset = Dataset::default(); + /// let ex = NamedNodeRef::new("http://example.com")?; + /// + /// // We edit and query the dataset http://example.com graph + /// { + /// let mut graph = dataset.graph_mut(ex); + /// graph.insert(TripleRef::new(ex, ex, ex)); + /// let results: Vec<_> = graph.iter().collect(); + /// assert_eq!(vec![TripleRef::new(ex, ex, ex)], results); + /// } + /// + /// // We have also changes the dataset itself + /// let results: Vec<_> = dataset.iter().collect(); + /// assert_eq!(vec![QuadRef::new(ex, ex, ex, ex)], results); + /// # Result::<_,Box>::Ok(()) + /// ``` + pub fn graph_mut<'a, 'b>( + &'a mut self, + graph_name: impl Into>, + ) -> GraphViewMut<'a> { + let graph_name = InternedGraphName::encoded_into(graph_name.into(), &mut self.interner); + GraphViewMut { + dataset: self, + graph_name, + } + } + + /// Returns all the quads contained by the dataset + pub fn iter(&self) -> Iter<'_> { + let iter = self.spog.iter(); + Iter { + dataset: self, + inner: iter, + } + } + + pub fn quads_for_subject<'a, 'b>( + &'a self, + subject: impl Into>, + ) -> impl Iterator> + 'a { + let subject = self + .encoded_named_or_blank_node(subject) + .unwrap_or_else(InternedNamedOrBlankNode::impossible); + self.interned_quads_for_subject(subject) + .map(move |q| self.decode_spog(q)) + } + + fn interned_quads_for_subject( + &self, + subject: InternedNamedOrBlankNode, + ) -> impl Iterator< + Item = ( + InternedNamedOrBlankNode, + InternedNamedNode, + InternedTerm, + InternedGraphName, + ), + > + '_ { + self.spog + .range( + &( + subject, + InternedNamedNode::first(), + InternedTerm::first(), + InternedGraphName::first(), + ) + ..&( + subject.next(), + InternedNamedNode::first(), + InternedTerm::first(), + InternedGraphName::first(), + ), + ) + .copied() + } + + pub fn quads_for_predicate<'a, 'b>( + &'a self, + predicate: impl Into>, + ) -> impl Iterator> + 'a { + let predicate = self + .encoded_named_node(predicate) + .unwrap_or_else(InternedNamedNode::impossible); + self.interned_quads_for_predicate(predicate) + .map(move |q| self.decode_spog(q)) + } + + fn interned_quads_for_predicate( + &self, + predicate: InternedNamedNode, + ) -> impl Iterator< + Item = ( + InternedNamedOrBlankNode, + InternedNamedNode, + InternedTerm, + InternedGraphName, + ), + > + '_ { + self.posg + .range( + &( + predicate, + InternedTerm::first(), + InternedNamedOrBlankNode::first(), + InternedGraphName::first(), + ) + ..&( + predicate.next(), + InternedTerm::first(), + InternedNamedOrBlankNode::first(), + InternedGraphName::first(), + ), + ) + .copied() + .map(|(p, o, s, g)| (s, p, o, g)) + } + + pub fn quads_for_object<'a, 'b>( + &'a self, + object: impl Into>, + ) -> impl Iterator> + 'a { + let object = self + .encoded_term(object) + .unwrap_or_else(InternedTerm::impossible); + + self.interned_quads_for_object(object) + .map(move |q| self.decode_spog(q)) + } + + fn interned_quads_for_object( + &self, + object: InternedTerm, + ) -> impl Iterator< + Item = ( + InternedNamedOrBlankNode, + InternedNamedNode, + InternedTerm, + InternedGraphName, + ), + > + '_ { + self.ospg + .range( + &( + object, + InternedNamedOrBlankNode::first(), + InternedNamedNode::first(), + InternedGraphName::first(), + ) + ..&( + object.next(), + InternedNamedOrBlankNode::first(), + InternedNamedNode::first(), + InternedGraphName::first(), + ), + ) + .copied() + .map(|(o, s, p, g)| (s, p, o, g)) + } + + fn interned_quads_for_graph_name( + &self, + graph_name: InternedGraphName, + ) -> impl Iterator< + Item = ( + InternedNamedOrBlankNode, + InternedNamedNode, + InternedTerm, + InternedGraphName, + ), + > + '_ { + self.gspo + .range( + &( + graph_name, + InternedNamedOrBlankNode::first(), + InternedNamedNode::first(), + InternedTerm::first(), + ) + ..&( + graph_name.next(), + InternedNamedOrBlankNode::first(), + InternedNamedNode::first(), + InternedTerm::first(), + ), + ) + .copied() + .map(|(g, s, p, o)| (s, p, o, g)) + } + + /// Checks if the dataset contains the given quad + pub fn contains<'a>(&self, quad: impl Into>) -> bool { + if let Some(q) = self.encoded_quad(quad.into()) { + self.spog.contains(&q) + } else { + false + } + } + + /// Returns the number of quads in this dataset + pub fn len(&self) -> usize { + self.gspo.len() + } + + /// Checks if this dataset contains a quad + pub fn is_empty(&self) -> bool { + self.gspo.is_empty() + } + + /// Adds a quad to the dataset + pub fn insert<'a>(&mut self, quad: impl Into>) -> bool { + let quad = self.encode_quad(quad.into()); + self.insert_encoded(quad) + } + + fn insert_encoded( + &mut self, + quad: ( + InternedNamedOrBlankNode, + InternedNamedNode, + InternedTerm, + InternedGraphName, + ), + ) -> bool { + let (s, p, o, g) = quad; + self.gspo.insert((g, s, p, o)); + self.gpos.insert((g, p, o, s)); + self.gosp.insert((g, o, s, p)); + self.spog.insert((s, p, o, g)); + self.posg.insert((p, o, s, g)); + self.ospg.insert((o, s, p, g)) + } + + /// Removes a concrete quad from the dataset + pub fn remove<'a>(&mut self, quad: impl Into>) -> bool { + if let Some(quad) = self.encoded_quad(quad.into()) { + self.remove_encoded(quad) + } else { + false + } + } + + fn remove_encoded( + &mut self, + quad: ( + InternedNamedOrBlankNode, + InternedNamedNode, + InternedTerm, + InternedGraphName, + ), + ) -> bool { + let (s, p, o, g) = quad; + self.gspo.remove(&(g, s, p, o)); + self.gpos.remove(&(g, p, o, s)); + self.gosp.remove(&(g, o, s, p)); + self.spog.remove(&(s, p, o, g)); + self.posg.remove(&(p, o, s, g)); + self.ospg.remove(&(o, s, p, g)) + } + + /// Clears the dataset + pub fn clear(&mut self) { + self.gspo.clear(); + self.gpos.clear(); + self.gosp.clear(); + self.spog.clear(); + self.posg.clear(); + self.ospg.clear(); + } + + /// Loads a file into the dataset. + /// + /// To load a specific graph use [`GraphViewMut::load`]. + /// + /// Usage example: + /// ``` + /// use oxigraph::model::*; + /// use oxigraph::io::DatasetFormat; + /// + /// let mut dataset = Dataset::new(); + /// + /// // insertion + /// let file = b" ."; + /// dataset.load(file.as_ref(), DatasetFormat::NQuads, None)?; + /// + /// // we inspect the store contents + /// let ex = NamedNodeRef::new("http://example.com")?; + /// assert!(dataset.contains(QuadRef::new(ex, ex, ex, ex))); + /// # Result::<_,Box>::Ok(()) + /// ``` + /// + /// Warning: This functions inserts the quads during the parsing. + /// If the parsing fails in the middle of the file, the quads read before stay in the dataset. + /// + /// Errors related to parameter validation like the base IRI use the [`InvalidInput`](std::io::ErrorKind::InvalidInput) error kind. + /// Errors related to a bad syntax in the loaded file use the [`InvalidData`](std::io::ErrorKind::InvalidData) or [`UnexpectedEof`](std::io::ErrorKind::UnexpectedEof) error kinds. + pub fn load( + &mut self, + reader: impl BufRead, + format: DatasetFormat, + base_iri: Option<&str>, + ) -> Result<(), io::Error> { + let mut parser = DatasetParser::from_format(format); + if let Some(base_iri) = base_iri { + parser = parser + .with_base_iri(base_iri) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; + } + for t in parser.read_quads(reader)? { + self.insert(&t?); + } + Ok(()) + } + + /// Dumps the dataset into a file. + /// + /// To dump a specific graph use [`GraphView::dump`]. + /// + /// Usage example: + /// ``` + /// use oxigraph::io::DatasetFormat; + /// use oxigraph::model::Dataset; + /// + /// let file = " .\n".as_bytes(); + /// + /// let mut store = Dataset::new(); + /// store.load(file, DatasetFormat::NQuads,None)?; + /// + /// let mut buffer = Vec::new(); + /// store.dump(&mut buffer, DatasetFormat::NQuads)?; + /// assert_eq!(file, buffer.as_slice()); + /// # Result::<_,Box>::Ok(()) + /// ``` + pub fn dump(&self, writer: impl Write, format: DatasetFormat) -> Result<(), io::Error> { + let mut writer = DatasetSerializer::from_format(format).quad_writer(writer)?; + for t in self { + writer.write(t)?; + } + writer.finish() + } + + fn encode_quad( + &mut self, + quad: QuadRef<'_>, + ) -> ( + InternedNamedOrBlankNode, + InternedNamedNode, + InternedTerm, + InternedGraphName, + ) { + ( + InternedNamedOrBlankNode::encoded_into(quad.subject, &mut self.interner), + InternedNamedNode::encoded_into(quad.predicate, &mut self.interner), + InternedTerm::encoded_into(quad.object, &mut self.interner), + InternedGraphName::encoded_into(quad.graph_name, &mut self.interner), + ) + } + + fn encoded_quad( + &self, + quad: QuadRef<'_>, + ) -> Option<( + InternedNamedOrBlankNode, + InternedNamedNode, + InternedTerm, + InternedGraphName, + )> { + Some(( + self.encoded_named_or_blank_node(quad.subject)?, + self.encoded_named_node(quad.predicate)?, + self.encoded_term(quad.object)?, + self.encoded_graph_name(quad.graph_name)?, + )) + } + + pub(super) fn encoded_named_node<'a>( + &self, + node: impl Into>, + ) -> Option { + InternedNamedNode::encoded_from(node.into(), &self.interner) + } + + pub(super) fn encoded_named_or_blank_node<'a>( + &self, + node: impl Into>, + ) -> Option { + InternedNamedOrBlankNode::encoded_from(node.into(), &self.interner) + } + + pub(super) fn encoded_term<'a>(&self, term: impl Into>) -> Option { + InternedTerm::encoded_from(term.into(), &self.interner) + } + + pub(super) fn encoded_graph_name<'a>( + &self, + graph_name: impl Into>, + ) -> Option { + InternedGraphName::encoded_from(graph_name.into(), &self.interner) + } + + fn decode_spog( + &self, + quad: ( + InternedNamedOrBlankNode, + InternedNamedNode, + InternedTerm, + InternedGraphName, + ), + ) -> QuadRef<'_> { + QuadRef { + subject: quad.0.decode_from(&self.interner), + predicate: quad.1.decode_from(&self.interner), + object: quad.2.decode_from(&self.interner), + graph_name: quad.3.decode_from(&self.interner), + } + } + + /// Applies on the dataset the canonicalization process described in + /// [Canonical Forms for Isomorphic and Equivalent RDF Graphs: Algorithms for Leaning and Labelling Blank Nodes, Aidan Hogan, 2017](http://aidanhogan.com/docs/rdf-canonicalisation.pdf) + /// + /// Warning: This implementation worst-case complexity is in O(b!) with b the number of blank nodes in the input graphs. + pub fn canonicalize(&mut self) { + let bnodes = self.blank_nodes(); + let (hash, partition) = + self.hash_bnodes(bnodes.into_iter().map(|bnode| (bnode, 0)).collect()); + let new_quads = self.distinguish(&hash, &partition); + self.clear(); + for quad in new_quads { + self.insert_encoded(quad); + } + } + + fn blank_nodes(&self) -> HashSet { + let mut bnodes = HashSet::new(); + for (g, s, _, o) in &self.gspo { + if let InternedNamedOrBlankNode::BlankNode(bnode) = s { + bnodes.insert(*bnode); + } + if let InternedTerm::BlankNode(bnode) = o { + bnodes.insert(*bnode); + } + if let InternedGraphName::BlankNode(bnode) = g { + bnodes.insert(*bnode); + } + } + bnodes + } + + fn hash_bnodes( + &self, + mut hashes: HashMap, + ) -> ( + HashMap, + Vec<(u64, Vec)>, + ) { + let mut to_hash = Vec::new(); + let mut partition: HashMap> = HashMap::new(); + let mut partition_len = 0; + loop { + //TODO: improve termination + let mut new_hashes = HashMap::new(); + for (bnode, old_hash) in &hashes { + for (_, p, o, g) in + self.interned_quads_for_subject(InternedNamedOrBlankNode::BlankNode(*bnode)) + { + to_hash.push(( + self.hash_named_node(p), + self.hash_term(o, &hashes), + self.hash_graph_name(g, &hashes), + 0, + )); + } + for (s, p, _, g) in self.interned_quads_for_object(InternedTerm::BlankNode(*bnode)) + { + to_hash.push(( + self.hash_named_or_blank_node(s, &hashes), + self.hash_named_node(p), + self.hash_graph_name(g, &hashes), + 1, + )); + } + for (s, p, o, _) in + self.interned_quads_for_graph_name(InternedGraphName::BlankNode(*bnode)) + { + to_hash.push(( + self.hash_named_or_blank_node(s, &hashes), + self.hash_named_node(p), + self.hash_term(o, &hashes), + 2, + )); + } + to_hash.sort_unstable(); + let hash = self.hash_tuple((old_hash, &to_hash)); + to_hash.clear(); + new_hashes.insert(*bnode, hash); + partition.entry(hash).or_default().push(*bnode); + } + if partition.len() == partition_len { + let mut partition: Vec<_> = partition.into_iter().collect(); + partition.sort_by(|(h1, b1), (h2, b2)| (b1.len(), h1).cmp(&(b2.len(), h2))); + return (hashes, partition); + } + hashes = new_hashes; + partition_len = partition.len(); + partition.clear(); + } + } + + fn hash_named_node(&self, node: InternedNamedNode) -> u64 { + self.hash_tuple(node.decode_from(&self.interner)) + } + + fn hash_named_or_blank_node( + &self, + node: InternedNamedOrBlankNode, + bnodes_hash: &HashMap, + ) -> u64 { + if let InternedNamedOrBlankNode::BlankNode(bnode) = node { + *bnodes_hash.get(&bnode).unwrap() + } else { + self.hash_tuple(node.decode_from(&self.interner)) + } + } + + fn hash_term(&self, term: InternedTerm, bnodes_hash: &HashMap) -> u64 { + if let InternedTerm::BlankNode(bnode) = term { + *bnodes_hash.get(&bnode).unwrap() + } else { + self.hash_tuple(term.decode_from(&self.interner)) + } + } + + fn hash_graph_name( + &self, + graph_name: InternedGraphName, + bnodes_hash: &HashMap, + ) -> u64 { + if let InternedGraphName::BlankNode(bnode) = graph_name { + *bnodes_hash.get(&bnode).unwrap() + } else { + self.hash_tuple(graph_name.decode_from(&self.interner)) + } + } + + fn hash_tuple(&self, v: impl Hash) -> u64 { + let mut hasher = DefaultHasher::new(); + v.hash(&mut hasher); + hasher.finish() + } + + fn distinguish( + &mut self, + hash: &HashMap, + partition: &[(u64, Vec)], + ) -> Vec<( + InternedNamedOrBlankNode, + InternedNamedNode, + InternedTerm, + InternedGraphName, + )> { + let b_prime = partition + .iter() + .find_map(|(_, b)| if b.len() > 1 { Some(b) } else { None }); + if let Some(b_prime) = b_prime { + b_prime + .iter() + .map(|b| { + let mut hash_prime = hash.clone(); + hash_prime.insert(*b, self.hash_tuple((hash_prime[b], 22))); + let (hash_prime_prime, partition_prime) = self.hash_bnodes(hash_prime); + self.distinguish(&hash_prime_prime, &partition_prime) + }) + .fold(None, |a, b| { + Some(if let Some(a) = a { + if a <= b { + a + } else { + b + } + } else { + b + }) + }) + .unwrap_or_else(Vec::new) + } else { + self.label(hash) + } + } + + fn label( + &mut self, + hashes: &HashMap, + ) -> Vec<( + InternedNamedOrBlankNode, + InternedNamedNode, + InternedTerm, + InternedGraphName, + )> { + let old_quads: Vec<_> = self.spog.iter().copied().collect(); + let mut quads: Vec<_> = old_quads + .into_iter() + .map(|(s, p, o, g)| { + ( + if let InternedNamedOrBlankNode::BlankNode(bnode) = s { + InternedNamedOrBlankNode::BlankNode(self.map_bnode(bnode, hashes)) + } else { + s + }, + p, + if let InternedTerm::BlankNode(bnode) = o { + InternedTerm::BlankNode(self.map_bnode(bnode, hashes)) + } else { + o + }, + if let InternedGraphName::BlankNode(bnode) = g { + InternedGraphName::BlankNode(self.map_bnode(bnode, hashes)) + } else { + g + }, + ) + }) + .collect(); + quads.sort(); + quads + } + + fn map_bnode( + &mut self, + old_bnode: InternedBlankNode, + hashes: &HashMap, + ) -> InternedBlankNode { + InternedBlankNode::encoded_into( + BlankNode::new_from_unique_id(*hashes.get(&old_bnode).unwrap()).as_ref(), + &mut self.interner, + ) + } +} + +impl PartialEq for Dataset { + fn eq(&self, other: &Self) -> bool { + if self.len() != other.len() { + return false; + } + for q in self { + if !other.contains(q) { + return false; + } + } + true + } +} + +impl Eq for Dataset {} + +impl<'a> IntoIterator for &'a Dataset { + type Item = QuadRef<'a>; + type IntoIter = Iter<'a>; + + fn into_iter(self) -> Iter<'a> { + self.iter() + } +} + +impl FromIterator for Dataset { + fn from_iter>(iter: I) -> Self { + let mut g = Dataset::new(); + g.extend(iter); + g + } +} + +impl<'a, T: Into>> FromIterator for Dataset { + fn from_iter>(iter: I) -> Self { + let mut g = Dataset::new(); + g.extend(iter); + g + } +} + +impl Extend for Dataset { + fn extend>(&mut self, iter: I) { + for t in iter { + self.insert(&t); + } + } +} + +impl<'a, T: Into>> Extend for Dataset { + fn extend>(&mut self, iter: I) { + for t in iter { + self.insert(t); + } + } +} + +impl fmt::Display for Dataset { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for t in self { + writeln!(f, "{}", t)?; + } + Ok(()) + } +} + +/// A read-only view on a [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-graph) contained in a [`Dataset`]. +/// +/// It is built using the [`Dataset::graph`] method. +/// +/// Usage example: +/// ``` +/// use oxigraph::model::*; +/// +/// let mut dataset = Dataset::default(); +/// let ex = NamedNodeRef::new("http://example.com")?; +/// dataset.insert(QuadRef::new(ex, ex, ex, ex)); +/// +/// let results: Vec<_> = dataset.graph(ex).iter().collect(); +/// assert_eq!(vec![TripleRef::new(ex, ex, ex)], results); +/// # Result::<_,Box>::Ok(()) +/// ``` +#[derive(Clone, Copy, Debug)] +pub struct GraphView<'a> { + dataset: &'a Dataset, + graph_name: InternedGraphName, +} + +impl<'a> GraphView<'a> { + /// Returns all the triples contained by the graph + pub fn iter(self) -> GraphViewIter<'a> { + let iter = self.dataset.gspo.range( + &( + self.graph_name, + InternedNamedOrBlankNode::first(), + InternedNamedNode::first(), + InternedTerm::first(), + ) + ..&( + self.graph_name.next(), + InternedNamedOrBlankNode::first(), + InternedNamedNode::first(), + InternedTerm::first(), + ), + ); + GraphViewIter { + graph: self, + inner: iter, + } + } + + pub fn triples_for_subject<'b>( + self, + subject: impl Into>, + ) -> impl Iterator> + 'a { + self.triples_for_interned_subject(self.dataset.encoded_named_or_blank_node(subject)) + } + + pub(super) fn triples_for_interned_subject( + self, + subject: Option, + ) -> impl Iterator> + 'a { + let subject = subject.unwrap_or_else(InternedNamedOrBlankNode::impossible); + self.dataset + .gspo + .range( + &( + self.graph_name, + subject, + InternedNamedNode::first(), + InternedTerm::first(), + ) + ..&( + self.graph_name, + subject.next(), + InternedNamedNode::first(), + InternedTerm::first(), + ), + ) + .map(move |q| self.decode_gspo(*q)) + } + + pub fn objects_for_subject_predicate<'b>( + self, + subject: impl Into>, + predicate: impl Into>, + ) -> impl Iterator> + 'a { + self.objects_for_interned_subject_predicate( + self.dataset.encoded_named_or_blank_node(subject), + self.dataset.encoded_named_node(predicate), + ) + } + + pub fn objects_for_interned_subject_predicate( + self, + subject: Option, + predicate: Option, + ) -> impl Iterator> + 'a { + let subject = subject.unwrap_or_else(InternedNamedOrBlankNode::impossible); + let predicate = predicate.unwrap_or_else(InternedNamedNode::impossible); + self.dataset + .gspo + .range( + &(self.graph_name, subject, predicate, InternedTerm::first()) + ..&( + self.graph_name, + subject, + predicate.next(), + InternedTerm::first(), + ), + ) + .map(move |q| q.3.decode_from(&self.dataset.interner)) + } + + pub fn object_for_subject_predicate<'b>( + self, + subject: impl Into>, + predicate: impl Into>, + ) -> Option> { + self.objects_for_subject_predicate(subject, predicate) + .next() + } + + pub fn predicates_for_subject_object<'b>( + self, + subject: impl Into>, + object: impl Into>, + ) -> impl Iterator> + 'a { + self.predicates_for_interned_subject_object( + self.dataset.encoded_named_or_blank_node(subject), + self.dataset.encoded_term(object), + ) + } + + pub(super) fn predicates_for_interned_subject_object( + self, + subject: Option, + object: Option, + ) -> impl Iterator> + 'a { + let subject = subject.unwrap_or_else(InternedNamedOrBlankNode::impossible); + let object = object.unwrap_or_else(InternedTerm::impossible); + self.dataset + .gosp + .range( + &(self.graph_name, object, subject, InternedNamedNode::first()) + ..&( + self.graph_name, + object, + subject.next(), + InternedNamedNode::first(), + ), + ) + .map(move |q| q.3.decode_from(&self.dataset.interner)) + } + + pub fn triples_for_predicate<'b>( + self, + predicate: impl Into>, + ) -> impl Iterator> + 'a { + self.triples_for_interned_predicate(self.dataset.encoded_named_node(predicate)) + } + + pub(super) fn triples_for_interned_predicate( + self, + predicate: Option, + ) -> impl Iterator> + 'a { + let predicate = predicate.unwrap_or_else(InternedNamedNode::impossible); + self.dataset + .gpos + .range( + &( + self.graph_name, + predicate, + InternedTerm::first(), + InternedNamedOrBlankNode::first(), + ) + ..&( + self.graph_name, + predicate.next(), + InternedTerm::first(), + InternedNamedOrBlankNode::first(), + ), + ) + .map(move |q| self.decode_gpos(*q)) + } + + pub fn subjects_for_predicate_object<'b>( + self, + predicate: impl Into>, + object: impl Into>, + ) -> impl Iterator> + 'a { + self.subjects_for_interned_predicate_object( + self.dataset.encoded_named_node(predicate), + self.dataset.encoded_term(object), + ) + } + + pub(super) fn subjects_for_interned_predicate_object( + self, + predicate: Option, + object: Option, + ) -> impl Iterator> + 'a { + let predicate = predicate.unwrap_or_else(InternedNamedNode::impossible); + let object = object.unwrap_or_else(InternedTerm::impossible); + self.dataset + .gpos + .range( + &( + self.graph_name, + predicate, + object, + InternedNamedOrBlankNode::first(), + ) + ..&( + self.graph_name, + predicate, + object.next(), + InternedNamedOrBlankNode::first(), + ), + ) + .map(move |q| q.3.decode_from(&self.dataset.interner)) + } + + pub fn subject_for_predicate_object<'b>( + self, + predicate: impl Into>, + object: impl Into>, + ) -> Option> { + self.subjects_for_predicate_object(predicate, object).next() + } + + pub fn triples_for_object<'b>( + self, + object: impl Into>, + ) -> impl Iterator> + 'a { + self.triples_for_interned_object(self.dataset.encoded_term(object)) + } + + pub fn triples_for_interned_object( + self, + object: Option, + ) -> impl Iterator> + 'a { + let object = object.unwrap_or_else(InternedTerm::impossible); + self.dataset + .gosp + .range( + &( + self.graph_name, + object, + InternedNamedOrBlankNode::first(), + InternedNamedNode::first(), + ) + ..&( + self.graph_name, + object.next(), + InternedNamedOrBlankNode::first(), + InternedNamedNode::first(), + ), + ) + .map(move |q| self.decode_gosp(*q)) + } + + /// Checks if the graph contains the given triple + pub fn contains<'b>(&self, triple: impl Into>) -> bool { + if let Some((s, p, o)) = self.encoded_triple(triple.into()) { + self.dataset.gspo.contains(&(self.graph_name, s, p, o)) + } else { + false + } + } + + /// Returns the number of triples in this graph + pub fn len(&self) -> usize { + self.iter().count() + } + + /// Checks if this graph contains a triple + pub fn is_empty(&self) -> bool { + self.iter().next().is_none() + } + + /// Dumps the graph into a file. + /// + /// Usage example: + /// ``` + /// use oxigraph::io::GraphFormat; + /// use oxigraph::model::Graph; + /// + /// let file = " .\n".as_bytes(); + /// + /// let mut store = Graph::new(); + /// store.load(file, GraphFormat::NTriples,None)?; + /// + /// let mut buffer = Vec::new(); + /// store.dump(&mut buffer, GraphFormat::NTriples)?; + /// assert_eq!(file, buffer.as_slice()); + /// # Result::<_,Box>::Ok(()) + /// ``` + pub fn dump(self, writer: impl Write, format: GraphFormat) -> Result<(), io::Error> { + let mut writer = GraphSerializer::from_format(format).triple_writer(writer)?; + for t in self { + writer.write(t)?; + } + writer.finish() + } + + fn encoded_triple( + &self, + triple: TripleRef<'_>, + ) -> Option<(InternedNamedOrBlankNode, InternedNamedNode, InternedTerm)> { + Some(( + self.dataset.encoded_named_or_blank_node(triple.subject)?, + self.dataset.encoded_named_node(triple.predicate)?, + self.dataset.encoded_term(triple.object)?, + )) + } + + fn decode_gspo( + self, + quad: ( + InternedGraphName, + InternedNamedOrBlankNode, + InternedNamedNode, + InternedTerm, + ), + ) -> TripleRef<'a> { + TripleRef { + subject: quad.1.decode_from(&self.dataset.interner), + predicate: quad.2.decode_from(&self.dataset.interner), + object: quad.3.decode_from(&self.dataset.interner), + } + } + + fn decode_gpos( + self, + quad: ( + InternedGraphName, + InternedNamedNode, + InternedTerm, + InternedNamedOrBlankNode, + ), + ) -> TripleRef<'a> { + self.decode_gspo((quad.0, quad.3, quad.1, quad.2)) + } + + fn decode_gosp( + self, + quad: ( + InternedGraphName, + InternedTerm, + InternedNamedOrBlankNode, + InternedNamedNode, + ), + ) -> TripleRef<'a> { + self.decode_gspo((quad.0, quad.2, quad.3, quad.1)) + } +} + +impl<'a> IntoIterator for GraphView<'a> { + type Item = TripleRef<'a>; + type IntoIter = GraphViewIter<'a>; + + fn into_iter(self) -> GraphViewIter<'a> { + self.iter() + } +} + +impl<'a, 'b> IntoIterator for &'b GraphView<'a> { + type Item = TripleRef<'a>; + type IntoIter = GraphViewIter<'a>; + + fn into_iter(self) -> GraphViewIter<'a> { + self.iter() + } +} + +impl<'a> fmt::Display for GraphView<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for t in self { + writeln!(f, "{}", t)?; + } + Ok(()) + } +} + +/// A read/write view on a [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-graph) contained in a [`Dataset`]. +/// +/// It is built using the [`Dataset::graph_mut`] method. +/// +/// Usage example: +/// ``` +/// use oxigraph::model::*; +/// +/// let mut dataset = Dataset::default(); +/// let ex = NamedNodeRef::new("http://example.com")?; +/// +/// // We edit and query the dataset http://example.com graph +/// { +/// let mut graph = dataset.graph_mut(ex); +/// graph.insert(TripleRef::new(ex, ex, ex)); +/// let results: Vec<_> = graph.iter().collect(); +/// assert_eq!(vec![TripleRef::new(ex, ex, ex)], results); +/// } +/// +/// // We have also changes the dataset itself +/// let results: Vec<_> = dataset.iter().collect(); +/// assert_eq!(vec![QuadRef::new(ex, ex, ex, ex)], results); +/// # Result::<_,Box>::Ok(()) +/// ``` +#[derive(Debug)] +pub struct GraphViewMut<'a> { + dataset: &'a mut Dataset, + graph_name: InternedGraphName, +} + +impl<'a> GraphViewMut<'a> { + fn read(&self) -> GraphView<'_> { + GraphView { + dataset: self.dataset, + graph_name: self.graph_name, + } + } + + /// Adds a triple to the graph + pub fn insert<'b>(&mut self, triple: impl Into>) -> bool { + let (s, p, o) = self.encode_triple(triple.into()); + self.dataset.insert_encoded((s, p, o, self.graph_name)) + } + + /// Removes a concrete triple from the graph + pub fn remove<'b>(&mut self, triple: impl Into>) -> bool { + if let Some((s, p, o)) = self.read().encoded_triple(triple.into()) { + self.dataset.remove_encoded((s, p, o, self.graph_name)) + } else { + false + } + } + + /// Loads a file into the graph. + /// + /// Usage example: + /// ``` + /// use oxigraph::model::*; + /// use oxigraph::io::GraphFormat; + /// + /// let mut graph = Graph::new(); + /// + /// // insertion + /// let file = b" ."; + /// graph.load(file.as_ref(), GraphFormat::NTriples, None)?; + /// + /// // we inspect the store contents + /// let ex = NamedNodeRef::new("http://example.com")?; + /// assert!(graph.contains(TripleRef::new(ex, ex, ex))); + /// # Result::<_,Box>::Ok(()) + /// ``` + /// + /// Warning: This functions inserts the triples during the parsing. + /// If the parsing fails in the middle of the file, the triples read before stay in the graph. + /// + /// Errors related to parameter validation like the base IRI use the [`InvalidInput`](std::io::ErrorKind::InvalidInput) error kind. + /// Errors related to a bad syntax in the loaded file use the [`InvalidData`](std::io::ErrorKind::InvalidData) or [`UnexpectedEof`](std::io::ErrorKind::UnexpectedEof) error kinds. + pub fn load( + &mut self, + reader: impl BufRead, + format: GraphFormat, + base_iri: Option<&str>, + ) -> Result<(), io::Error> { + let mut parser = GraphParser::from_format(format); + if let Some(base_iri) = base_iri { + parser = parser + .with_base_iri(base_iri) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; + } + for t in parser.read_triples(reader)? { + self.insert(&t?); + } + Ok(()) + } + + fn encode_triple( + &mut self, + triple: TripleRef<'_>, + ) -> (InternedNamedOrBlankNode, InternedNamedNode, InternedTerm) { + ( + InternedNamedOrBlankNode::encoded_into(triple.subject, &mut self.dataset.interner), + InternedNamedNode::encoded_into(triple.predicate, &mut self.dataset.interner), + InternedTerm::encoded_into(triple.object, &mut self.dataset.interner), + ) + } + + /// Returns all the triples contained by the graph + pub fn iter(&'a self) -> GraphViewIter<'a> { + self.read().iter() + } + + pub fn triples_for_subject<'b>( + &'a self, + subject: impl Into>, + ) -> impl Iterator> + 'a { + self.read() + .triples_for_interned_subject(self.dataset.encoded_named_or_blank_node(subject)) + } + + pub fn objects_for_subject_predicate<'b>( + &'a self, + subject: impl Into>, + predicate: impl Into>, + ) -> impl Iterator> + 'a { + self.read().objects_for_interned_subject_predicate( + self.dataset.encoded_named_or_blank_node(subject), + self.dataset.encoded_named_node(predicate), + ) + } + + pub fn object_for_subject_predicate<'b>( + &'a self, + subject: impl Into>, + predicate: impl Into>, + ) -> Option> { + self.read().object_for_subject_predicate(subject, predicate) + } + + pub fn predicates_for_subject_object<'b>( + &'a self, + subject: impl Into>, + object: impl Into>, + ) -> impl Iterator> + 'a { + self.read().predicates_for_interned_subject_object( + self.dataset.encoded_named_or_blank_node(subject), + self.dataset.encoded_term(object), + ) + } + + pub fn triples_for_predicate<'b>( + &'a self, + predicate: impl Into>, + ) -> impl Iterator> + 'a { + self.read() + .triples_for_interned_predicate(self.dataset.encoded_named_node(predicate)) + } + + pub fn subjects_for_predicate_object<'b>( + &'a self, + predicate: impl Into>, + object: impl Into>, + ) -> impl Iterator> + 'a { + self.read().subjects_for_interned_predicate_object( + self.dataset.encoded_named_node(predicate), + self.dataset.encoded_term(object), + ) + } + + pub fn subject_for_predicate_object<'b>( + &'a self, + predicate: impl Into>, + object: impl Into>, + ) -> Option> { + self.read().subject_for_predicate_object(predicate, object) + } + + pub fn triples_for_object<'b>( + &'a self, + object: TermRef<'b>, + ) -> impl Iterator> + 'a { + self.read() + .triples_for_interned_object(self.dataset.encoded_term(object)) + } + + /// Checks if the graph contains the given triple + pub fn contains<'b>(&self, triple: impl Into>) -> bool { + self.read().contains(triple) + } + + /// Returns the number of triples in this graph + pub fn len(&self) -> usize { + self.read().len() + } + + /// Checks if this graph contains a triple + pub fn is_empty(&self) -> bool { + self.read().is_empty() + } + + /// Dumps the graph into a file. + /// + /// Usage example: + /// ``` + /// use oxigraph::io::GraphFormat; + /// use oxigraph::model::Graph; + /// + /// let file = " .\n".as_bytes(); + /// + /// let mut store = Graph::new(); + /// store.load(file, GraphFormat::NTriples,None)?; + /// + /// let mut buffer = Vec::new(); + /// store.dump(&mut buffer, GraphFormat::NTriples)?; + /// assert_eq!(file, buffer.as_slice()); + /// # Result::<_,Box>::Ok(()) + /// ``` + pub fn dump(self, writer: impl Write, format: GraphFormat) -> Result<(), io::Error> { + self.read().dump(writer, format) + } +} + +impl<'a> Extend for GraphViewMut<'a> { + fn extend>(&mut self, iter: I) { + for t in iter { + self.insert(&t); + } + } +} + +impl<'a, 'b, T: Into>> Extend for GraphViewMut<'a> { + fn extend>(&mut self, iter: I) { + for t in iter { + self.insert(t); + } + } +} + +impl<'a> IntoIterator for &'a GraphViewMut<'a> { + type Item = TripleRef<'a>; + type IntoIter = GraphViewIter<'a>; + + fn into_iter(self) -> GraphViewIter<'a> { + self.iter() + } +} + +impl<'a> fmt::Display for GraphViewMut<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for t in self { + writeln!(f, "{}", t)?; + } + Ok(()) + } +} + +/// Iterator returned by [`Dataset::iter`] +pub struct Iter<'a> { + dataset: &'a Dataset, + inner: std::collections::btree_set::Iter< + 'a, + ( + InternedNamedOrBlankNode, + InternedNamedNode, + InternedTerm, + InternedGraphName, + ), + >, +} + +impl<'a> Iterator for Iter<'a> { + type Item = QuadRef<'a>; + + fn next(&mut self) -> Option> { + self.inner.next().map(|q| self.dataset.decode_spog(*q)) + } +} + +/// Iterator returned by [`GraphView::iter`] +pub struct GraphViewIter<'a> { + graph: GraphView<'a>, + inner: std::collections::btree_set::Range< + 'a, + ( + InternedGraphName, + InternedNamedOrBlankNode, + InternedNamedNode, + InternedTerm, + ), + >, +} + +impl<'a> Iterator for GraphViewIter<'a> { + type Item = TripleRef<'a>; + + fn next(&mut self) -> Option> { + self.inner.next().map(|t| self.graph.decode_gspo(*t)) + } +} diff --git a/lib/src/model/graph.rs b/lib/src/model/graph.rs new file mode 100644 index 00000000..b40f6582 --- /dev/null +++ b/lib/src/model/graph.rs @@ -0,0 +1,302 @@ +//! [In-memory implementation](super::Graph) of [RDF graphs](https://www.w3.org/TR/rdf11-concepts/#dfn-graph). +//! +//! Usage example: +//! ``` +//! use oxigraph::model::*; +//! +//! let mut graph = Graph::default(); +//! +//! // insertion +//! let ex = NamedNodeRef::new("http://example.com")?; +//! let triple = TripleRef::new(ex, ex, ex); +//! graph.insert(triple); +//! +//! // simple filter +//! let results: Vec<_> = graph.triples_for_subject(ex).collect(); +//! assert_eq!(vec![triple], results); +//! # Result::<_,Box>::Ok(()) +//! ``` +//! +//! See also [`Dataset`](super::Dataset) if you want to get support of multiple RDF graphs at the same time. + +use crate::io::GraphFormat; +use crate::model::dataset::*; +use crate::model::*; +use std::io::{BufRead, Write}; +use std::iter::FromIterator; +use std::{fmt, io}; + +/// An in-memory [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-graph). +/// +/// It can accomodate a fairly large number of triples (in the few millions). +/// Beware: it interns the string and does not do any garbage collection yet: +/// if you insert and remove a lot of different terms, memory will grow without any reduction. +/// +/// Usage example: +/// ``` +/// use oxigraph::model::*; +/// +/// let mut graph = Graph::default(); +/// +/// // insertion +/// let ex = NamedNodeRef::new("http://example.com")?; +/// let triple = TripleRef::new(ex, ex, ex); +/// graph.insert(triple); +/// +/// // simple filter +/// let results: Vec<_> = graph.triples_for_subject(ex).collect(); +/// assert_eq!(vec![triple], results); +/// # Result::<_,Box>::Ok(()) +/// ``` +#[derive(Debug, Default)] +pub struct Graph { + dataset: Dataset, +} + +impl Graph { + /// Creates a new graph + pub fn new() -> Self { + Self::default() + } + + fn graph(&self) -> GraphView<'_> { + self.dataset.graph(GraphNameRef::DefaultGraph) + } + + fn graph_mut(&mut self) -> GraphViewMut<'_> { + self.dataset.graph_mut(GraphNameRef::DefaultGraph) + } + + /// Returns all the triples contained by the graph + pub fn iter(&self) -> Iter<'_> { + Iter { + inner: self.graph().iter(), + } + } + + pub fn triples_for_subject<'a, 'b>( + &'a self, + subject: impl Into>, + ) -> impl Iterator> + 'a { + self.graph() + .triples_for_interned_subject(self.dataset.encoded_named_or_blank_node(subject)) + } + + pub fn objects_for_subject_predicate<'a, 'b>( + &'a self, + subject: impl Into>, + predicate: impl Into>, + ) -> impl Iterator> + 'a { + self.graph().objects_for_interned_subject_predicate( + self.dataset.encoded_named_or_blank_node(subject), + self.dataset.encoded_named_node(predicate), + ) + } + + pub fn object_for_subject_predicate<'a, 'b>( + &'a self, + subject: impl Into>, + predicate: impl Into>, + ) -> Option> { + self.graph() + .objects_for_subject_predicate(subject, predicate) + .next() + } + + pub fn predicates_for_subject_object<'a, 'b>( + &'a self, + subject: impl Into>, + object: impl Into>, + ) -> impl Iterator> + 'a { + self.graph().predicates_for_interned_subject_object( + self.dataset.encoded_named_or_blank_node(subject), + self.dataset.encoded_term(object), + ) + } + + pub fn triples_for_predicate<'a, 'b>( + &'a self, + predicate: impl Into>, + ) -> impl Iterator> + 'a { + self.graph() + .triples_for_interned_predicate(self.dataset.encoded_named_node(predicate)) + } + + pub fn subjects_for_predicate_object<'a, 'b>( + &'a self, + predicate: impl Into>, + object: impl Into>, + ) -> impl Iterator> + 'a { + self.graph().subjects_for_interned_predicate_object( + self.dataset.encoded_named_node(predicate), + self.dataset.encoded_term(object), + ) + } + + pub fn subject_for_predicate_object<'a, 'b>( + &'a self, + predicate: impl Into>, + object: impl Into>, + ) -> Option> { + self.graph().subject_for_predicate_object(predicate, object) + } + + pub fn triples_for_object<'a, 'b>( + &'a self, + object: impl Into>, + ) -> impl Iterator> + 'a { + self.graph() + .triples_for_interned_object(self.dataset.encoded_term(object)) + } + + /// Checks if the graph contains the given triple + pub fn contains<'a>(&self, triple: impl Into>) -> bool { + self.graph().contains(triple) + } + + /// Returns the number of triples in this graph + pub fn len(&self) -> usize { + self.dataset.len() + } + + /// Checks if this graph contains a triple + pub fn is_empty(&self) -> bool { + self.dataset.is_empty() + } + + /// Adds a triple to the graph + pub fn insert<'a>(&mut self, triple: impl Into>) -> bool { + self.graph_mut().insert(triple) + } + + /// Removes a concrete triple from the graph + pub fn remove<'a>(&mut self, triple: impl Into>) -> bool { + self.graph_mut().remove(triple) + } + + /// Loads a file into the graph. + /// + /// Usage example: + /// ``` + /// use oxigraph::model::*; + /// use oxigraph::io::GraphFormat; + /// + /// let mut graph = Graph::new(); + /// + /// // insertion + /// let file = b" ."; + /// graph.load(file.as_ref(), GraphFormat::NTriples, None)?; + /// + /// // we inspect the store contents + /// let ex = NamedNodeRef::new("http://example.com")?; + /// assert!(graph.contains(TripleRef::new(ex, ex, ex))); + /// # Result::<_,Box>::Ok(()) + /// ``` + /// + /// Warning: This functions inserts the triples during the parsing. + /// If the parsing fails in the middle of the file, the triples read before stay in the graph. + /// + /// Errors related to parameter validation like the base IRI use the [`InvalidInput`](std::io::ErrorKind::InvalidInput) error kind. + /// Errors related to a bad syntax in the loaded file use the [`InvalidData`](std::io::ErrorKind::InvalidData) or [`UnexpectedEof`](std::io::ErrorKind::UnexpectedEof) error kinds. + pub fn load( + &mut self, + reader: impl BufRead, + format: GraphFormat, + base_iri: Option<&str>, + ) -> Result<(), io::Error> { + self.graph_mut().load(reader, format, base_iri) + } + + /// Dumps the graph into a file. + /// + /// Usage example: + /// ``` + /// use oxigraph::io::GraphFormat; + /// use oxigraph::model::Graph; + /// + /// let file = " .\n".as_bytes(); + /// + /// let mut store = Graph::new(); + /// store.load(file, GraphFormat::NTriples,None)?; + /// + /// let mut buffer = Vec::new(); + /// store.dump(&mut buffer, GraphFormat::NTriples)?; + /// assert_eq!(file, buffer.as_slice()); + /// # Result::<_,Box>::Ok(()) + /// ``` + pub fn dump(&self, writer: impl Write, format: GraphFormat) -> Result<(), io::Error> { + self.graph().dump(writer, format) + } + + /// Applies on the graph the canonicalization process described in + /// [Canonical Forms for Isomorphic and Equivalent RDF Graphs: Algorithms for Leaning and Labelling Blank Nodes, Aidan Hogan, 2017](http://aidanhogan.com/docs/rdf-canonicalisation.pdf) + /// + /// Warning: This implementation worst-case complexity is in O(b!) with b the number of blank nodes in the input graphs. + pub fn canonicalize(&mut self) { + self.dataset.canonicalize() + } +} + +impl PartialEq for Graph { + fn eq(&self, other: &Self) -> bool { + self.dataset == other.dataset + } +} + +impl Eq for Graph {} + +impl<'a> IntoIterator for &'a Graph { + type Item = TripleRef<'a>; + type IntoIter = Iter<'a>; + + fn into_iter(self) -> Iter<'a> { + self.iter() + } +} + +impl FromIterator for Graph { + fn from_iter>(iter: I) -> Self { + let mut g = Graph::new(); + g.extend(iter); + g + } +} + +impl<'a, T: Into>> FromIterator for Graph { + fn from_iter>(iter: I) -> Self { + let mut g = Graph::new(); + g.extend(iter); + g + } +} + +impl Extend for Graph { + fn extend>(&mut self, iter: I) { + self.graph_mut().extend(iter) + } +} + +impl<'a, T: Into>> Extend for Graph { + fn extend>(&mut self, iter: I) { + self.graph_mut().extend(iter) + } +} + +impl fmt::Display for Graph { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.graph().fmt(f) + } +} + +/// Iterator returned by [`Graph::iter`] +pub struct Iter<'a> { + inner: GraphViewIter<'a>, +} + +impl<'a> Iterator for Iter<'a> { + type Item = TripleRef<'a>; + + fn next(&mut self) -> Option> { + self.inner.next() + } +} diff --git a/lib/src/model/interning.rs b/lib/src/model/interning.rs new file mode 100644 index 00000000..93ad232c --- /dev/null +++ b/lib/src/model/interning.rs @@ -0,0 +1,344 @@ +//! Interning of RDF elements using Rodeo + +use crate::model::*; +use lasso::{Key, Rodeo, Spur}; +use std::convert::TryInto; + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] +pub struct InternedNamedNode { + id: Spur, +} + +impl InternedNamedNode { + pub fn encoded_into(named_node: NamedNodeRef<'_>, interner: &mut Rodeo) -> Self { + Self { + id: interner.get_or_intern(named_node.as_str()), + } + } + + pub fn encoded_from(named_node: NamedNodeRef<'_>, interner: &Rodeo) -> Option { + Some(Self { + id: interner.get(named_node.as_str())?, + }) + } + + pub fn decode_from<'a>(&self, interner: &'a Rodeo) -> NamedNodeRef<'a> { + NamedNodeRef::new_unchecked(interner.resolve(&self.id)) + } + + pub fn first() -> Self { + Self { id: fist_spur() } + } + + pub fn next(&self) -> Self { + Self { + id: next_spur(self.id), + } + } + + pub fn impossible() -> Self { + Self { + id: impossible_spur(), + } + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] +pub struct InternedBlankNode { + id: Spur, +} + +impl InternedBlankNode { + pub fn encoded_into(blank_node: BlankNodeRef<'_>, interner: &mut Rodeo) -> Self { + Self { + id: interner.get_or_intern(blank_node.as_str()), + } + } + + pub fn encoded_from(blank_node: BlankNodeRef<'_>, interner: &Rodeo) -> Option { + Some(Self { + id: interner.get(blank_node.as_str())?, + }) + } + + pub fn decode_from<'a>(&self, interner: &'a Rodeo) -> BlankNodeRef<'a> { + BlankNodeRef::new_unchecked(interner.resolve(&self.id)) + } + + pub fn next(&self) -> Self { + Self { + id: next_spur(self.id), + } + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] +pub enum InternedLiteral { + String { + value_id: Spur, + }, + LanguageTaggedString { + value_id: Spur, + language_id: Spur, + }, + TypedLiteral { + value_id: Spur, + datatype: InternedNamedNode, + }, +} + +impl InternedLiteral { + pub fn encoded_into(literal: LiteralRef<'_>, interner: &mut Rodeo) -> Self { + let value_id = interner.get_or_intern(literal.value()); + if literal.is_plain() { + if let Some(language) = literal.language() { + Self::LanguageTaggedString { + value_id, + language_id: interner.get_or_intern(language), + } + } else { + Self::String { value_id } + } + } else { + Self::TypedLiteral { + value_id, + datatype: InternedNamedNode::encoded_into(literal.datatype(), interner), + } + } + } + + pub fn encoded_from(literal: LiteralRef<'_>, interner: &Rodeo) -> Option { + let value_id = interner.get(literal.value())?; + Some(if literal.is_plain() { + if let Some(language) = literal.language() { + Self::LanguageTaggedString { + value_id, + language_id: interner.get(language)?, + } + } else { + Self::String { value_id } + } + } else { + Self::TypedLiteral { + value_id, + datatype: InternedNamedNode::encoded_from(literal.datatype(), interner)?, + } + }) + } + + pub fn decode_from<'a>(&self, interner: &'a Rodeo) -> LiteralRef<'a> { + match self { + InternedLiteral::String { value_id } => { + LiteralRef::new_simple_literal(interner.resolve(value_id)) + } + InternedLiteral::LanguageTaggedString { + value_id, + language_id, + } => LiteralRef::new_language_tagged_literal_unchecked( + interner.resolve(value_id), + interner.resolve(language_id), + ), + InternedLiteral::TypedLiteral { value_id, datatype } => LiteralRef::new_typed_literal( + interner.resolve(value_id), + datatype.decode_from(interner), + ), + } + } + + pub fn next(&self) -> Self { + match self { + Self::String { value_id } => Self::String { + value_id: next_spur(*value_id), + }, + Self::LanguageTaggedString { + value_id, + language_id, + } => Self::LanguageTaggedString { + value_id: *value_id, + language_id: next_spur(*language_id), + }, + Self::TypedLiteral { value_id, datatype } => Self::TypedLiteral { + value_id: *value_id, + datatype: datatype.next(), + }, + } + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] +pub enum InternedNamedOrBlankNode { + NamedNode(InternedNamedNode), + BlankNode(InternedBlankNode), +} + +impl InternedNamedOrBlankNode { + pub fn encoded_into(node: NamedOrBlankNodeRef<'_>, interner: &mut Rodeo) -> Self { + match node { + NamedOrBlankNodeRef::NamedNode(node) => { + Self::NamedNode(InternedNamedNode::encoded_into(node, interner)) + } + NamedOrBlankNodeRef::BlankNode(node) => { + Self::BlankNode(InternedBlankNode::encoded_into(node, interner)) + } + } + } + + pub fn encoded_from(node: NamedOrBlankNodeRef<'_>, interner: &Rodeo) -> Option { + Some(match node { + NamedOrBlankNodeRef::NamedNode(node) => { + Self::NamedNode(InternedNamedNode::encoded_from(node, interner)?) + } + NamedOrBlankNodeRef::BlankNode(node) => { + Self::BlankNode(InternedBlankNode::encoded_from(node, interner)?) + } + }) + } + + pub fn decode_from<'a>(&self, interner: &'a Rodeo) -> NamedOrBlankNodeRef<'a> { + match self { + Self::NamedNode(node) => NamedOrBlankNodeRef::NamedNode(node.decode_from(interner)), + Self::BlankNode(node) => NamedOrBlankNodeRef::BlankNode(node.decode_from(interner)), + } + } + + pub fn first() -> Self { + Self::NamedNode(InternedNamedNode::first()) + } + + pub fn next(&self) -> Self { + match self { + Self::NamedNode(node) => Self::NamedNode(node.next()), + Self::BlankNode(node) => Self::BlankNode(node.next()), + } + } + + pub fn impossible() -> Self { + Self::NamedNode(InternedNamedNode::impossible()) + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] +pub enum InternedGraphName { + DefaultGraph, + NamedNode(InternedNamedNode), + BlankNode(InternedBlankNode), +} + +impl InternedGraphName { + pub fn encoded_into(node: GraphNameRef<'_>, interner: &mut Rodeo) -> Self { + match node { + GraphNameRef::DefaultGraph => Self::DefaultGraph, + GraphNameRef::NamedNode(node) => { + Self::NamedNode(InternedNamedNode::encoded_into(node, interner)) + } + GraphNameRef::BlankNode(node) => { + Self::BlankNode(InternedBlankNode::encoded_into(node, interner)) + } + } + } + + pub fn encoded_from(node: GraphNameRef<'_>, interner: &Rodeo) -> Option { + Some(match node { + GraphNameRef::DefaultGraph => Self::DefaultGraph, + GraphNameRef::NamedNode(node) => { + Self::NamedNode(InternedNamedNode::encoded_from(node, interner)?) + } + GraphNameRef::BlankNode(node) => { + Self::BlankNode(InternedBlankNode::encoded_from(node, interner)?) + } + }) + } + + pub fn decode_from<'a>(&self, interner: &'a Rodeo) -> GraphNameRef<'a> { + match self { + Self::DefaultGraph => GraphNameRef::DefaultGraph, + Self::NamedNode(node) => GraphNameRef::NamedNode(node.decode_from(interner)), + Self::BlankNode(node) => GraphNameRef::BlankNode(node.decode_from(interner)), + } + } + + pub fn first() -> Self { + Self::DefaultGraph + } + + pub fn next(&self) -> Self { + match self { + Self::DefaultGraph => Self::NamedNode(InternedNamedNode::first()), + Self::NamedNode(node) => Self::NamedNode(node.next()), + Self::BlankNode(node) => Self::BlankNode(node.next()), + } + } + + pub fn impossible() -> Self { + Self::NamedNode(InternedNamedNode::impossible()) + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] +pub enum InternedTerm { + NamedNode(InternedNamedNode), + BlankNode(InternedBlankNode), + Literal(InternedLiteral), +} + +impl InternedTerm { + pub fn encoded_into(term: TermRef<'_>, interner: &mut Rodeo) -> Self { + match term { + TermRef::NamedNode(term) => { + Self::NamedNode(InternedNamedNode::encoded_into(term, interner)) + } + TermRef::BlankNode(term) => { + Self::BlankNode(InternedBlankNode::encoded_into(term, interner)) + } + TermRef::Literal(term) => Self::Literal(InternedLiteral::encoded_into(term, interner)), + } + } + + pub fn encoded_from(term: TermRef<'_>, interner: &Rodeo) -> Option { + Some(match term { + TermRef::NamedNode(term) => { + Self::NamedNode(InternedNamedNode::encoded_from(term, interner)?) + } + TermRef::BlankNode(term) => { + Self::BlankNode(InternedBlankNode::encoded_from(term, interner)?) + } + TermRef::Literal(term) => Self::Literal(InternedLiteral::encoded_from(term, interner)?), + }) + } + + pub fn decode_from<'a>(&self, interner: &'a Rodeo) -> TermRef<'a> { + match self { + Self::NamedNode(term) => TermRef::NamedNode(term.decode_from(interner)), + Self::BlankNode(term) => TermRef::BlankNode(term.decode_from(interner)), + Self::Literal(term) => TermRef::Literal(term.decode_from(interner)), + } + } + + pub fn first() -> Self { + Self::NamedNode(InternedNamedNode::first()) + } + + pub fn next(&self) -> Self { + match self { + Self::NamedNode(node) => Self::NamedNode(node.next()), + Self::BlankNode(node) => Self::BlankNode(node.next()), + Self::Literal(node) => Self::Literal(node.next()), + } + } + + pub fn impossible() -> Self { + Self::NamedNode(InternedNamedNode::impossible()) + } +} + +fn fist_spur() -> Spur { + Spur::try_from_usize(0).unwrap() +} + +fn next_spur(value: Spur) -> Spur { + Spur::try_from_usize(value.into_usize() + 1).unwrap() +} + +fn impossible_spur() -> Spur { + Spur::try_from_usize((u32::max_value() - 10).try_into().unwrap()).unwrap() +} diff --git a/lib/src/model/mod.rs b/lib/src/model/mod.rs index c9fde7fa..220fdbb0 100644 --- a/lib/src/model/mod.rs +++ b/lib/src/model/mod.rs @@ -3,6 +3,9 @@ //! Inspired by [RDF/JS](https://rdf.js.org/data-model-spec/) and [Apache Commons RDF](http://commons.apache.org/proper/commons-rdf/) mod blank_node; +pub mod dataset; +pub mod graph; +mod interning; mod literal; mod named_node; mod parser; @@ -12,11 +15,13 @@ mod triple; pub mod vocab; pub(crate) mod xsd; -pub use crate::model::blank_node::{BlankNode, BlankNodeIdParseError, BlankNodeRef}; -pub use crate::model::literal::{Literal, LiteralRef}; -pub use crate::model::named_node::{NamedNode, NamedNodeRef}; -pub use crate::model::parser::TermParseError; -pub use crate::model::triple::{ +pub use self::blank_node::{BlankNode, BlankNodeIdParseError, BlankNodeRef}; +pub use self::dataset::Dataset; +pub use self::graph::Graph; +pub use self::literal::{Literal, LiteralRef}; +pub use self::named_node::{NamedNode, NamedNodeRef}; +pub use self::parser::TermParseError; +pub use self::triple::{ GraphName, GraphNameRef, NamedOrBlankNode, NamedOrBlankNodeRef, Quad, QuadRef, Term, TermRef, Triple, TripleRef, }; diff --git a/lib/src/store/small_string.rs b/lib/src/store/small_string.rs index a97b98be..f3b586e3 100644 --- a/lib/src/store/small_string.rs +++ b/lib/src/store/small_string.rs @@ -1,6 +1,6 @@ -use nom::lib::std::convert::TryFrom; use std::borrow::Borrow; use std::cmp::Ordering; +use std::convert::TryFrom; use std::convert::TryInto; use std::error::Error; use std::fmt; diff --git a/testsuite/src/files.rs b/testsuite/src/files.rs index e31d3daa..ed651f9b 100644 --- a/testsuite/src/files.rs +++ b/testsuite/src/files.rs @@ -1,6 +1,6 @@ use anyhow::{anyhow, Result}; use oxigraph::io::{DatasetFormat, GraphFormat}; -use oxigraph::model::{GraphName, GraphNameRef}; +use oxigraph::model::{Dataset, Graph, GraphNameRef}; use oxigraph::MemoryStore; use std::fs::File; use std::io::{BufRead, BufReader, Read}; @@ -75,8 +75,54 @@ pub fn load_to_store<'a>( Ok(()) } -pub fn load_store(url: &str) -> Result { - let store = MemoryStore::new(); - load_to_store(url, &store, &GraphName::DefaultGraph)?; - Ok(store) +pub fn load_to_graph(url: &str, graph: &mut Graph) -> Result<()> { + if url.ends_with(".nt") { + graph.load(read_file(url)?, GraphFormat::NTriples, Some(url))? + } else if url.ends_with(".ttl") { + graph.load(read_file(url)?, GraphFormat::Turtle, Some(url))? + } else if url.ends_with(".rdf") { + graph.load(read_file(url)?, GraphFormat::RdfXml, Some(url))? + } else { + return Err(anyhow!("Serialization type not found for {}", url)); + } + Ok(()) +} + +pub fn load_graph(url: &str) -> Result { + let mut graph = Graph::new(); + load_to_graph(url, &mut graph)?; + Ok(graph) +} + +pub fn load_to_dataset<'a>( + url: &str, + dataset: &mut Dataset, + to_graph_name: impl Into>, +) -> Result<()> { + if url.ends_with(".nt") { + dataset + .graph_mut(to_graph_name) + .load(read_file(url)?, GraphFormat::NTriples, Some(url))? + } else if url.ends_with(".ttl") { + dataset + .graph_mut(to_graph_name) + .load(read_file(url)?, GraphFormat::Turtle, Some(url))? + } else if url.ends_with(".rdf") { + dataset + .graph_mut(to_graph_name) + .load(read_file(url)?, GraphFormat::RdfXml, Some(url))? + } else if url.ends_with(".nq") { + dataset.load(read_file(url)?, DatasetFormat::NQuads, Some(url))? + } else if url.ends_with(".trig") { + dataset.load(read_file(url)?, DatasetFormat::TriG, Some(url))? + } else { + return Err(anyhow!("Serialization type not found for {}", url)); + } + Ok(()) +} + +pub fn load_dataset(url: &str) -> Result { + let mut dataset = Dataset::new(); + load_to_dataset(url, &mut dataset, GraphNameRef::DefaultGraph)?; + Ok(dataset) } diff --git a/testsuite/src/manifest.rs b/testsuite/src/manifest.rs index 9dbcb222..a50942f7 100644 --- a/testsuite/src/manifest.rs +++ b/testsuite/src/manifest.rs @@ -1,9 +1,8 @@ -use crate::files::load_to_store; +use crate::files::load_to_graph; use crate::vocab::*; use anyhow::{anyhow, Result}; use oxigraph::model::vocab::*; use oxigraph::model::*; -use oxigraph::MemoryStore; use std::fmt; pub struct Test { @@ -50,7 +49,7 @@ impl fmt::Display for Test { } pub struct TestManifest { - graph: MemoryStore, + graph: Graph, tests_to_do: Vec, manifests_to_do: Vec, } @@ -58,7 +57,7 @@ pub struct TestManifest { impl TestManifest { pub fn new(manifest_urls: impl IntoIterator) -> Self { Self { - graph: MemoryStore::new(), + graph: Graph::new(), tests_to_do: Vec::new(), manifests_to_do: manifest_urls .into_iter() @@ -74,158 +73,160 @@ impl Iterator for TestManifest { fn next(&mut self) -> Option> { match self.tests_to_do.pop() { Some(Term::NamedNode(test_node)) => { - let kind = match object_for_subject_predicate(&self.graph, &test_node, rdf::TYPE) { - Some(Term::NamedNode(c)) => c, + let kind = match self + .graph + .object_for_subject_predicate(&test_node, rdf::TYPE) + { + Some(TermRef::NamedNode(c)) => c.into_owned(), _ => return self.next(), //We ignore the test }; - let name = match object_for_subject_predicate(&self.graph, &test_node, mf::NAME) { - Some(Term::Literal(c)) => Some(c.value().to_string()), + let name = match self + .graph + .object_for_subject_predicate(&test_node, mf::NAME) + { + Some(TermRef::Literal(c)) => Some(c.value().to_string()), _ => None, }; - let comment = - match object_for_subject_predicate(&self.graph, &test_node, rdfs::COMMENT) { - Some(Term::Literal(c)) => Some(c.value().to_string()), - _ => None, - }; - let (action, query, update, data, graph_data, service_data) = - match object_for_subject_predicate(&self.graph, &test_node, mf::ACTION) { - Some(Term::NamedNode(n)) => { - (Some(n.into_string()), None, None, None, vec![], vec![]) - } - Some(Term::BlankNode(n)) => { - let query = - match object_for_subject_predicate(&self.graph, &n, qt::QUERY) { - Some(Term::NamedNode(q)) => Some(q.into_string()), - _ => None, - }; - let update = - match object_for_subject_predicate(&self.graph, &n, ut::REQUEST) { - Some(Term::NamedNode(q)) => Some(q.into_string()), - _ => None, - }; - let data = match object_for_subject_predicate(&self.graph, &n, qt::DATA) - .or_else(|| object_for_subject_predicate(&self.graph, &n, ut::DATA)) - { - Some(Term::NamedNode(q)) => Some(q.into_string()), - _ => None, - }; - let graph_data = - objects_for_subject_predicate(&self.graph, &n, qt::GRAPH_DATA) - .chain(objects_for_subject_predicate( - &self.graph, - &n, - ut::GRAPH_DATA, - )) - .filter_map(|g| match g { - Term::NamedNode(q) => Some((q.clone(), q.into_string())), - Term::BlankNode(node) => { - if let Some(Term::NamedNode(graph)) = - object_for_subject_predicate( - &self.graph, - &node, - ut::GRAPH, - ) - { - if let Some(Term::Literal(name)) = - object_for_subject_predicate( - &self.graph, - &node, - rdfs::LABEL, - ) - { - Some(( - NamedNode::new(name.value()).unwrap(), - graph.into_string(), - )) - } else { - Some((graph.clone(), graph.into_string())) - } - } else { - None - } - } - _ => None, - }) - .collect(); - let service_data = - objects_for_subject_predicate(&self.graph, &n, qt::SERVICE_DATA) - .filter_map(|g| match g { - Term::NamedNode(g) => Some(g.into()), - Term::BlankNode(g) => Some(g.into()), - _ => None, - }) - .filter_map(|g: NamedOrBlankNode| { - if let ( - Some(Term::NamedNode(endpoint)), - Some(Term::NamedNode(data)), - ) = ( - object_for_subject_predicate( - &self.graph, - &g, - qt::ENDPOINT, - ), - object_for_subject_predicate(&self.graph, &g, qt::DATA), - ) { - Some((endpoint.into_string(), data.into_string())) + let comment = match self + .graph + .object_for_subject_predicate(&test_node, rdfs::COMMENT) + { + Some(TermRef::Literal(c)) => Some(c.value().to_string()), + _ => None, + }; + let (action, query, update, data, graph_data, service_data) = match self + .graph + .object_for_subject_predicate(&test_node, mf::ACTION) + { + Some(TermRef::NamedNode(n)) => ( + Some(n.as_str().to_owned()), + None, + None, + None, + vec![], + vec![], + ), + Some(TermRef::BlankNode(n)) => { + let query = match self.graph.object_for_subject_predicate(n, qt::QUERY) { + Some(TermRef::NamedNode(q)) => Some(q.as_str().to_owned()), + _ => None, + }; + let update = match self.graph.object_for_subject_predicate(n, ut::REQUEST) { + Some(TermRef::NamedNode(q)) => Some(q.as_str().to_owned()), + _ => None, + }; + let data = match self + .graph + .object_for_subject_predicate(n, qt::DATA) + .or_else(|| self.graph.object_for_subject_predicate(n, ut::DATA)) + { + Some(TermRef::NamedNode(q)) => Some(q.as_str().to_owned()), + _ => None, + }; + let graph_data = self + .graph + .objects_for_subject_predicate(n, qt::GRAPH_DATA) + .chain(self.graph.objects_for_subject_predicate(n, ut::GRAPH_DATA)) + .filter_map(|g| match g { + TermRef::NamedNode(q) => { + Some((q.into_owned(), q.as_str().to_owned())) + } + TermRef::BlankNode(node) => { + if let Some(TermRef::NamedNode(graph)) = + self.graph.object_for_subject_predicate(node, ut::GRAPH) + { + if let Some(TermRef::Literal(name)) = self + .graph + .object_for_subject_predicate(node, rdfs::LABEL) + { + Some(( + NamedNode::new(name.value()).unwrap(), + graph.as_str().to_owned(), + )) } else { - None + Some((graph.into_owned(), graph.as_str().to_owned())) } - }) - .collect(); - (None, query, update, data, graph_data, service_data) - } - Some(_) => return Some(Err(anyhow!("invalid action"))), - None => { - return Some(Err(anyhow!("action not found for test {}", test_node))); - } - }; - let (result, result_graph_data) = - match object_for_subject_predicate(&self.graph, &test_node, mf::RESULT) { - Some(Term::NamedNode(n)) => (Some(n.into_string()), Vec::new()), - Some(Term::BlankNode(n)) => ( - if let Some(Term::NamedNode(result)) = - object_for_subject_predicate(&self.graph, &n, ut::DATA) - { - Some(result.into_string()) - } else { - None - }, - objects_for_subject_predicate(&self.graph, &n, ut::GRAPH_DATA) - .filter_map(|g| match g { - Term::NamedNode(q) => Some((q.clone(), q.into_string())), - Term::BlankNode(node) => { - if let Some(Term::NamedNode(graph)) = - object_for_subject_predicate( - &self.graph, - &node, - ut::GRAPH, - ) + } else { + None + } + } + _ => None, + }) + .collect(); + let service_data = self + .graph + .objects_for_subject_predicate(n, qt::SERVICE_DATA) + .filter_map(|g| match g { + TermRef::NamedNode(g) => Some(g.into()), + TermRef::BlankNode(g) => Some(g.into()), + _ => None, + }) + .filter_map(|g: NamedOrBlankNodeRef<'_>| { + if let ( + Some(TermRef::NamedNode(endpoint)), + Some(TermRef::NamedNode(data)), + ) = ( + self.graph.object_for_subject_predicate(g, qt::ENDPOINT), + self.graph.object_for_subject_predicate(g, qt::DATA), + ) { + Some((endpoint.as_str().to_owned(), data.as_str().to_owned())) + } else { + None + } + }) + .collect(); + (None, query, update, data, graph_data, service_data) + } + Some(_) => return Some(Err(anyhow!("invalid action"))), + None => { + return Some(Err(anyhow!("action not found for test {}", test_node))); + } + }; + let (result, result_graph_data) = match self + .graph + .object_for_subject_predicate(&test_node, mf::RESULT) + { + Some(TermRef::NamedNode(n)) => (Some(n.as_str().to_owned()), Vec::new()), + Some(TermRef::BlankNode(n)) => ( + if let Some(TermRef::NamedNode(result)) = + self.graph.object_for_subject_predicate(n, ut::DATA) + { + Some(result.as_str().to_owned()) + } else { + None + }, + self.graph + .objects_for_subject_predicate(n, ut::GRAPH_DATA) + .filter_map(|g| match g { + TermRef::NamedNode(q) => { + Some((q.into_owned(), q.as_str().to_owned())) + } + TermRef::BlankNode(node) => { + if let Some(TermRef::NamedNode(graph)) = + self.graph.object_for_subject_predicate(node, ut::GRAPH) + { + if let Some(TermRef::Literal(name)) = self + .graph + .object_for_subject_predicate(node, rdfs::LABEL) { - if let Some(Term::Literal(name)) = - object_for_subject_predicate( - &self.graph, - &node, - rdfs::LABEL, - ) - { - Some(( - NamedNode::new(name.value()).unwrap(), - graph.into_string(), - )) - } else { - Some((graph.clone(), graph.into_string())) - } + Some(( + NamedNode::new(name.value()).unwrap(), + graph.as_str().to_owned(), + )) } else { - None + Some((graph.into_owned(), graph.as_str().to_owned())) } + } else { + None } - _ => None, - }) - .collect(), - ), - Some(_) => return Some(Err(anyhow!("invalid result"))), - None => (None, Vec::new()), - }; + } + _ => None, + }) + .collect(), + ), + Some(_) => return Some(Err(anyhow!("invalid result"))), + None => (None, Vec::new()), + }; Some(Ok(Test { id: test_node, kind, @@ -247,15 +248,16 @@ impl Iterator for TestManifest { Some(url) => { let manifest = NamedOrBlankNodeRef::from(NamedNodeRef::new(url.as_str()).unwrap()); - if let Err(error) = - load_to_store(&url, &self.graph, GraphNameRef::DefaultGraph) - { + if let Err(error) = load_to_graph(&url, &mut self.graph) { return Some(Err(error)); } // New manifests - match object_for_subject_predicate(&self.graph, manifest, mf::INCLUDE) { - Some(Term::BlankNode(list)) => { + match self + .graph + .object_for_subject_predicate(manifest, mf::INCLUDE) + { + Some(TermRef::BlankNode(list)) => { self.manifests_to_do.extend( RdfListIterator::iter(&self.graph, list.into()).filter_map( |m| match m { @@ -270,8 +272,11 @@ impl Iterator for TestManifest { } // New tests - match object_for_subject_predicate(&self.graph, manifest, mf::ENTRIES) { - Some(Term::BlankNode(list)) => { + match self + .graph + .object_for_subject_predicate(manifest, mf::ENTRIES) + { + Some(TermRef::BlankNode(list)) => { self.tests_to_do .extend(RdfListIterator::iter(&self.graph, list.into())); } @@ -290,12 +295,12 @@ impl Iterator for TestManifest { } struct RdfListIterator<'a> { - graph: &'a MemoryStore, - current_node: Option, + graph: &'a Graph, + current_node: Option>, } impl<'a> RdfListIterator<'a> { - fn iter(graph: &'a MemoryStore, root: NamedOrBlankNode) -> RdfListIterator<'a> { + fn iter(graph: &'a Graph, root: NamedOrBlankNodeRef<'a>) -> RdfListIterator<'a> { RdfListIterator { graph, current_node: Some(root), @@ -307,14 +312,17 @@ impl<'a> Iterator for RdfListIterator<'a> { type Item = Term; fn next(&mut self) -> Option { - match self.current_node.clone() { + match self.current_node { Some(current) => { - let result = object_for_subject_predicate(&self.graph, ¤t, rdf::FIRST); + let result = self + .graph + .object_for_subject_predicate(current, rdf::FIRST) + .map(|v| v.into_owned()); self.current_node = - match object_for_subject_predicate(&self.graph, ¤t, rdf::REST) { - Some(Term::NamedNode(n)) if n == rdf::NIL => None, - Some(Term::NamedNode(n)) => Some(n.into()), - Some(Term::BlankNode(n)) => Some(n.into()), + match self.graph.object_for_subject_predicate(current, rdf::REST) { + Some(TermRef::NamedNode(n)) if n == rdf::NIL => None, + Some(TermRef::NamedNode(n)) => Some(n.into()), + Some(TermRef::BlankNode(n)) => Some(n.into()), _ => None, }; result @@ -323,21 +331,3 @@ impl<'a> Iterator for RdfListIterator<'a> { } } } - -fn object_for_subject_predicate<'a>( - store: &MemoryStore, - subject: impl Into>, - predicate: impl Into>, -) -> Option { - objects_for_subject_predicate(store, subject, predicate).next() -} - -fn objects_for_subject_predicate<'a>( - store: &MemoryStore, - subject: impl Into>, - predicate: impl Into>, -) -> impl Iterator { - store - .quads_for_pattern(Some(subject.into()), Some(predicate.into()), None, None) - .map(|t| t.object) -} diff --git a/testsuite/src/parser_evaluator.rs b/testsuite/src/parser_evaluator.rs index 6018b31c..04804811 100644 --- a/testsuite/src/parser_evaluator.rs +++ b/testsuite/src/parser_evaluator.rs @@ -1,6 +1,6 @@ -use crate::files::load_store; +use crate::files::load_dataset; use crate::manifest::Test; -use crate::report::{store_diff, TestResult}; +use crate::report::{dataset_diff, TestResult}; use anyhow::{anyhow, Result}; use chrono::Utc; @@ -30,7 +30,7 @@ fn evaluate_parser_test(test: &Test) -> Result<()> { || test.kind == "http://www.w3.org/ns/rdftest#TestTurtlePositiveSyntax" || test.kind == "http://www.w3.org/ns/rdftest#TestTrigPositiveSyntax" { - match load_store(action) { + match load_dataset(action) { Ok(_) => Ok(()), Err(e) => Err(anyhow!(format!("Parse error: {}", e))), } @@ -42,7 +42,7 @@ fn evaluate_parser_test(test: &Test) -> Result<()> { || test.kind == "http://www.w3.org/ns/rdftest#TestTrigNegativeEval" || test.kind == "http://www.w3.org/ns/rdftest#TestXMLNegativeSyntax" { - match load_store(action) { + match load_dataset(action) { Ok(_) => Err(anyhow!("File parsed with an error even if it should not",)), Err(_) => Ok(()), } @@ -50,17 +50,19 @@ fn evaluate_parser_test(test: &Test) -> Result<()> { || test.kind == "http://www.w3.org/ns/rdftest#TestTrigEval" || test.kind == "http://www.w3.org/ns/rdftest#TestXMLEval" { - match load_store(action) { - Ok(actual_graph) => { + match load_dataset(action) { + Ok(mut actual_graph) => { + actual_graph.canonicalize(); if let Some(result) = &test.result { - match load_store(result) { - Ok(expected_graph) => { - if expected_graph.is_isomorphic(&actual_graph) { + match load_dataset(result) { + Ok(mut expected_graph) => { + expected_graph.canonicalize(); + if expected_graph == actual_graph { Ok(()) } else { Err(anyhow!( "The two files are not isomorphic. Diff:\n{}", - store_diff(&expected_graph, &actual_graph) + dataset_diff(&expected_graph, &actual_graph) )) } } diff --git a/testsuite/src/report.rs b/testsuite/src/report.rs index bc10f8a7..68a4accb 100644 --- a/testsuite/src/report.rs +++ b/testsuite/src/report.rs @@ -1,7 +1,6 @@ use anyhow::Result; use chrono::{DateTime, Utc}; -use oxigraph::model::NamedNode; -use oxigraph::MemoryStore; +use oxigraph::model::{Dataset, NamedNode}; use text_diff::{diff, Difference}; #[derive(Debug)] @@ -11,10 +10,10 @@ pub struct TestResult { pub date: DateTime, } -pub fn store_diff(expected: &MemoryStore, actual: &MemoryStore) -> String { +pub fn dataset_diff(expected: &Dataset, actual: &Dataset) -> String { let (_, changeset) = diff( - &normalize_store_text(expected), - &normalize_store_text(actual), + &normalize_dataset_text(expected), + &normalize_dataset_text(actual), "\n", ); let mut ret = String::new(); @@ -42,11 +41,8 @@ pub fn store_diff(expected: &MemoryStore, actual: &MemoryStore) -> String { ret } -fn normalize_store_text(store: &MemoryStore) -> String { - let mut quads: Vec<_> = store - .quads_for_pattern(None, None, None, None) - .map(|q| q.to_string()) - .collect(); +fn normalize_dataset_text(store: &Dataset) -> String { + let mut quads: Vec<_> = store.iter().map(|q| q.to_string()).collect(); quads.sort(); quads.join("\n") } diff --git a/testsuite/src/sparql_evaluator.rs b/testsuite/src/sparql_evaluator.rs index 767617ae..c88cbb4d 100644 --- a/testsuite/src/sparql_evaluator.rs +++ b/testsuite/src/sparql_evaluator.rs @@ -1,6 +1,6 @@ use crate::files::*; use crate::manifest::*; -use crate::report::{store_diff, TestResult}; +use crate::report::{dataset_diff, TestResult}; use crate::vocab::*; use anyhow::{anyhow, Result}; use chrono::Utc; @@ -223,13 +223,17 @@ fn evaluate_sparql_test(test: &Test) -> Result<()> { error )), Ok(()) => { - if store.is_isomorphic(&result_store) { + let mut store_dataset: Dataset = store.iter().collect(); + store_dataset.canonicalize(); + let mut result_store_dataset: Dataset = result_store.iter().collect(); + result_store_dataset.canonicalize(); + if store_dataset == result_store_dataset { Ok(()) } else { Err(anyhow!( "Failure on {}.\nDiff:\n{}\nParsed update:\n{}\n", test, - store_diff(&result_store, &store), + dataset_diff(&result_store_dataset, &store_dataset), Update::parse(&read_file_to_string(update_file)?, Some(update_file)) .unwrap(), )) @@ -259,7 +263,7 @@ fn load_sparql_query_result(url: &str) -> Result { false, ) } else { - Ok(StaticQueryResults::from_dataset(load_store(url)?)) + Ok(StaticQueryResults::from_graph(load_graph(url)?)) } } @@ -309,82 +313,56 @@ impl ServiceHandler for StaticServiceHandler { } } -fn to_dataset(result: QueryResults, with_order: bool) -> Result { - match result { - QueryResults::Graph(graph) => Ok(graph - .map(|t| t.map(|t| t.in_graph(None))) - .collect::>()?), +fn to_graph(result: QueryResults, with_order: bool) -> Result { + Ok(match result { + QueryResults::Graph(graph) => graph.collect::>()?, QueryResults::Boolean(value) => { - let store = MemoryStore::new(); + let mut graph = Graph::new(); let result_set = BlankNode::default(); - store.insert(Quad::new( - result_set.clone(), - rdf::TYPE, - rs::RESULT_SET, - None, - )); - store.insert(Quad::new( - result_set, + graph.insert(TripleRef::new(&result_set, rdf::TYPE, rs::RESULT_SET)); + graph.insert(TripleRef::new( + &result_set, rs::BOOLEAN, - Literal::from(value), - None, + &Literal::from(value), )); - Ok(store) + graph } QueryResults::Solutions(solutions) => { - let store = MemoryStore::new(); + let mut graph = Graph::new(); let result_set = BlankNode::default(); - store.insert(Quad::new( - result_set.clone(), - rdf::TYPE, - rs::RESULT_SET, - None, - )); + graph.insert(TripleRef::new(&result_set, rdf::TYPE, rs::RESULT_SET)); for variable in solutions.variables() { - store.insert(Quad::new( - result_set.clone(), + graph.insert(TripleRef::new( + &result_set, rs::RESULT_VARIABLE, - Literal::new_simple_literal(variable.as_str()), - None, + LiteralRef::new_simple_literal(variable.as_str()), )); } for (i, solution) in solutions.enumerate() { let solution = solution?; let solution_id = BlankNode::default(); - store.insert(Quad::new( - result_set.clone(), - rs::SOLUTION, - solution_id.clone(), - None, - )); + graph.insert(TripleRef::new(&result_set, rs::SOLUTION, &solution_id)); for (variable, value) in solution.iter() { let binding = BlankNode::default(); - store.insert(Quad::new( - solution_id.clone(), - rs::BINDING, - binding.clone(), - None, - )); - store.insert(Quad::new(binding.clone(), rs::VALUE, value.clone(), None)); - store.insert(Quad::new( - binding, + graph.insert(TripleRef::new(&solution_id, rs::BINDING, &binding)); + graph.insert(TripleRef::new(&binding, rs::VALUE, value)); + graph.insert(TripleRef::new( + &binding, rs::VARIABLE, - Literal::new_simple_literal(variable.as_str()), - None, + LiteralRef::new_simple_literal(variable.as_str()), )); } if with_order { - store.insert(Quad::new( - solution_id, + graph.insert(TripleRef::new( + &solution_id, rs::INDEX, - Literal::from((i + 1) as i128), - None, + &Literal::from((i + 1) as i128), )); } } - Ok(store) + graph } - } + }) } fn are_query_results_isomorphic( @@ -423,7 +401,7 @@ fn are_query_results_isomorphic( expected == actual } (StaticQueryResults::Graph(expected), StaticQueryResults::Graph(actual)) => { - expected.is_isomorphic(&actual) + expected == actual } _ => false, } @@ -445,7 +423,7 @@ fn compare_solutions(expected: &[(Variable, Term)], actual: &[(Variable, Term)]) } enum StaticQueryResults { - Graph(MemoryStore), + Graph(Graph), Solutions { variables: Vec, solutions: Vec>, @@ -483,83 +461,80 @@ impl fmt::Display for StaticQueryResults { impl StaticQueryResults { fn from_query_results(results: QueryResults, with_order: bool) -> Result { - Ok(Self::from_dataset(to_dataset(results, with_order)?)) + Ok(Self::from_graph(to_graph(results, with_order)?)) } - fn from_dataset(dataset: MemoryStore) -> StaticQueryResults { - if let Some(result_set) = dataset - .quads_for_pattern(None, Some(rdf::TYPE), Some(rs::RESULT_SET.into()), None) - .map(|q| q.subject) - .next() - { - if let Some(bool) = object_for_subject_predicate(&dataset, &result_set, rs::BOOLEAN) { + fn from_graph(graph: Graph) -> StaticQueryResults { + // Hack to normalize literals + let mut graph: Graph = graph + .iter() + .map(|t| t.into_owned().in_graph(GraphName::DefaultGraph)) + .collect::() + .into_iter() + .map(Triple::from) + .collect(); + + if let Some(result_set) = graph.subject_for_predicate_object(rdf::TYPE, rs::RESULT_SET) { + if let Some(bool) = graph.object_for_subject_predicate(result_set, rs::BOOLEAN) { // Boolean query - StaticQueryResults::Boolean(bool == Literal::from(true).into()) + StaticQueryResults::Boolean(bool == Literal::from(true).as_ref().into()) } else { // Regular query - let mut variables: Vec = - objects_for_subject_predicate(&dataset, &result_set, rs::RESULT_VARIABLE) - .filter_map(|object| { - if let Term::Literal(l) = object { - Some(Variable::new_unchecked(l.value())) - } else { - None - } - }) - .collect(); + let mut variables: Vec = graph + .objects_for_subject_predicate(result_set, rs::RESULT_VARIABLE) + .filter_map(|object| { + if let TermRef::Literal(l) = object { + Some(Variable::new_unchecked(l.value())) + } else { + None + } + }) + .collect(); variables.sort(); - let mut solutions: Vec<_> = - objects_for_subject_predicate(&dataset, &result_set, rs::SOLUTION) - .filter_map(|object| { - if let Term::BlankNode(solution) = object { - let mut bindings = - objects_for_subject_predicate(&dataset, &solution, rs::BINDING) - .filter_map(|object| { - if let Term::BlankNode(binding) = object { - if let ( - Some(Term::Literal(variable)), - Some(value), - ) = ( - object_for_subject_predicate( - &dataset, - &binding, - rs::VARIABLE, - ), - object_for_subject_predicate( - &dataset, - &binding, - rs::VALUE, - ), - ) { - Some(( - Variable::new_unchecked(variable.value()), - value, - )) - } else { - None - } - } else { - None - } - }) - .collect::>(); - bindings.sort_by(|(a, _), (b, _)| a.cmp(&b)); - let index = - object_for_subject_predicate(&dataset, &solution, rs::INDEX) - .and_then(|object| { - if let Term::Literal(l) = object { - u64::from_str(l.value()).ok() - } else { - None - } - }); - Some((bindings, index)) - } else { - None - } - }) - .collect(); + let mut solutions: Vec<_> = graph + .objects_for_subject_predicate(result_set, rs::SOLUTION) + .filter_map(|object| { + if let TermRef::BlankNode(solution) = object { + let mut bindings = graph + .objects_for_subject_predicate(solution, rs::BINDING) + .filter_map(|object| { + if let TermRef::BlankNode(binding) = object { + if let (Some(TermRef::Literal(variable)), Some(value)) = ( + graph.object_for_subject_predicate( + binding, + rs::VARIABLE, + ), + graph.object_for_subject_predicate(binding, rs::VALUE), + ) { + Some(( + Variable::new_unchecked(variable.value()), + value.into_owned(), + )) + } else { + None + } + } else { + None + } + }) + .collect::>(); + bindings.sort_by(|(a, _), (b, _)| a.cmp(&b)); + let index = graph + .object_for_subject_predicate(solution, rs::INDEX) + .and_then(|object| { + if let TermRef::Literal(l) = object { + u64::from_str(l.value()).ok() + } else { + None + } + }); + Some((bindings, index)) + } else { + None + } + }) + .collect(); solutions.sort_by(|(_, index_a), (_, index_b)| index_a.cmp(index_b)); let ordered = solutions.iter().all(|(_, index)| index.is_some()); @@ -574,25 +549,8 @@ impl StaticQueryResults { } } } else { - StaticQueryResults::Graph(dataset) + graph.canonicalize(); + StaticQueryResults::Graph(graph) } } } - -fn object_for_subject_predicate<'a>( - store: &MemoryStore, - subject: impl Into>, - predicate: impl Into>, -) -> Option { - objects_for_subject_predicate(store, subject, predicate).next() -} - -fn objects_for_subject_predicate<'a>( - store: &MemoryStore, - subject: impl Into>, - predicate: impl Into>, -) -> impl Iterator { - store - .quads_for_pattern(Some(subject.into()), Some(predicate.into()), None, None) - .map(|t| t.object) -}