diff --git a/src/errors.rs b/src/errors.rs index bdce774a..5778e10c 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -1,3 +1,5 @@ +use std::fmt; +use std::sync::PoisonError; error_chain! { foreign_links { Url(::url::ParseError); @@ -10,3 +12,16 @@ error_chain! { SparqlParser(::sparql::parser::ParseError); } } + +impl From> for Error { + fn from(_: PoisonError) -> Self { + //TODO: improve conversion + "Unexpected lock error".into() + } +} + +impl From for fmt::Error { + fn from(_: Error) -> Self { + fmt::Error + } +} diff --git a/src/store/isomorphism.rs b/src/store/isomorphism.rs index a1673af7..c84155cc 100644 --- a/src/store/isomorphism.rs +++ b/src/store/isomorphism.rs @@ -1,3 +1,4 @@ +use errors::*; use model::*; use std::collections::hash_map::DefaultHasher; use std::collections::BTreeSet; @@ -5,55 +6,55 @@ use std::collections::HashMap; use std::collections::HashSet; use std::hash::Hash; use std::hash::Hasher; -use store::memory::MemoryGraph; +use store::Graph; #[derive(Eq, PartialEq, Hash, Ord, PartialOrd)] -struct SubjectPredicate<'a> { - subject: &'a NamedOrBlankNode, - predicate: &'a NamedNode, +struct SubjectPredicate { + subject: NamedOrBlankNode, + predicate: NamedNode, } -impl<'a> SubjectPredicate<'a> { - fn new(subject: &'a NamedOrBlankNode, predicate: &'a NamedNode) -> Self { +impl SubjectPredicate { + fn new(subject: NamedOrBlankNode, predicate: NamedNode) -> Self { Self { subject, predicate } } } #[derive(Eq, PartialEq, Hash, Ord, PartialOrd)] -struct PredicateObject<'a> { - predicate: &'a NamedNode, - object: &'a Term, +struct PredicateObject { + predicate: NamedNode, + object: Term, } -impl<'a> PredicateObject<'a> { - fn new(predicate: &'a NamedNode, object: &'a Term) -> Self { +impl PredicateObject { + fn new(predicate: NamedNode, object: Term) -> Self { Self { predicate, object } } } -fn subject_predicates_for_object<'a>( - graph: &'a MemoryGraph, - object: &'a Term, -) -> impl Iterator> { - graph - .triples_for_object(object) - .map(|t| SubjectPredicate::new(t.subject(), t.predicate())) +fn subject_predicates_for_object( + graph: &impl Graph, + object: &Term, +) -> Result>> { + Ok(graph + .triples_for_object(object)? + .map(|t| t.map(|t| SubjectPredicate::new(t.subject().clone(), t.predicate_owned())))) } -fn predicate_objects_for_subject<'a>( - graph: &'a MemoryGraph, - subject: &'a NamedOrBlankNode, -) -> impl Iterator> { - graph - .triples_for_subject(subject) - .map(|t| PredicateObject::new(t.predicate(), t.object())) +fn predicate_objects_for_subject( + graph: &impl Graph, + subject: &NamedOrBlankNode, +) -> Result>> { + Ok(graph + .triples_for_subject(subject)? + .map(|t| t.map(|t| PredicateObject::new(t.predicate().clone(), t.object_owned())))) } -fn hash_blank_nodes<'a>( - bnodes: HashSet<&'a BlankNode>, - graph: &'a MemoryGraph, -) -> HashMap> { - let mut bnodes_by_hash: HashMap> = HashMap::default(); +fn hash_blank_nodes( + bnodes: HashSet, + graph: &impl Graph, +) -> Result>> { + let mut bnodes_by_hash: HashMap> = HashMap::default(); // NB: we need to sort the triples to have the same hash for bnode in bnodes.into_iter() { @@ -62,7 +63,8 @@ fn hash_blank_nodes<'a>( { let subject = NamedOrBlankNode::from(bnode.clone()); let mut po_set: BTreeSet = BTreeSet::default(); - for po in predicate_objects_for_subject(&graph, &subject) { + for po in predicate_objects_for_subject(graph, &subject)? { + let po = po?; if !po.object.is_blank_node() { po_set.insert(po); } @@ -75,7 +77,8 @@ fn hash_blank_nodes<'a>( { let object = Term::from(bnode.clone()); let mut sp_set: BTreeSet = BTreeSet::default(); - for sp in subject_predicates_for_object(&graph, &object) { + for sp in subject_predicates_for_object(graph, &object)? { + let sp = sp?; if !sp.subject.is_blank_node() { sp_set.insert(sp); } @@ -91,64 +94,66 @@ fn hash_blank_nodes<'a>( .push(bnode); } - bnodes_by_hash + Ok(bnodes_by_hash) } pub trait GraphIsomorphism { /// Checks if two graphs are [isomorphic](https://www.w3.org/TR/rdf11-concepts/#dfn-graph-isomorphism) - fn is_isomorphic(&self, other: &Self) -> bool; + fn is_isomorphic(&self, other: &Self) -> Result; } -impl GraphIsomorphism for MemoryGraph { +impl GraphIsomorphism for G { //TODO: proper isomorphism building - fn is_isomorphic(&self, other: &Self) -> bool { - if self.len() != other.len() { - return false; + fn is_isomorphic(&self, other: &Self) -> Result { + if self.len()? != other.len()? { + return Ok(false); } - let mut self_bnodes: HashSet<&BlankNode> = HashSet::default(); - let mut other_bnodes: HashSet<&BlankNode> = HashSet::default(); + let mut self_bnodes: HashSet = HashSet::default(); + let mut other_bnodes: HashSet = HashSet::default(); - for t in self { + for t in self.iter()? { + let t = t?; if let NamedOrBlankNode::BlankNode(subject) = t.subject() { - self_bnodes.insert(subject); + self_bnodes.insert(subject.clone()); if let Term::BlankNode(object) = t.object() { - self_bnodes.insert(object); + self_bnodes.insert(object.clone()); } } else if let Term::BlankNode(object) = t.object() { - self_bnodes.insert(object); - } else if !other.contains(t) { - return false; + self_bnodes.insert(object.clone()); + } else if !other.contains(&t)? { + return Ok(false); } } - for t in other { + for t in other.iter()? { + let t = t?; if let NamedOrBlankNode::BlankNode(subject) = t.subject() { - other_bnodes.insert(subject); + other_bnodes.insert(subject.clone()); if let Term::BlankNode(object) = t.object() { - other_bnodes.insert(object); + other_bnodes.insert(object.clone()); } } else if let Term::BlankNode(object) = t.object() { - other_bnodes.insert(object); - } else if !self.contains(t) { - return false; + other_bnodes.insert(object.clone()); + } else if !self.contains(&t)? { + return Ok(false); } } - let self_bnodes_by_hash = hash_blank_nodes(self_bnodes, &self); - let other_bnodes_by_hash = hash_blank_nodes(other_bnodes, &other); + let self_bnodes_by_hash = hash_blank_nodes(self_bnodes, self)?; + let other_bnodes_by_hash = hash_blank_nodes(other_bnodes, other)?; if self_bnodes_by_hash.len() != other_bnodes_by_hash.len() { - return false; + return Ok(false); } for hash in self_bnodes_by_hash.keys() { if self_bnodes_by_hash.get(hash).map(|l| l.len()) != other_bnodes_by_hash.get(hash).map(|l| l.len()) { - return false; + return Ok(false); } } - true + Ok(true) } } diff --git a/src/store/memory.rs b/src/store/memory.rs index 880bbb4e..d6e7e156 100644 --- a/src/store/memory.rs +++ b/src/store/memory.rs @@ -1,187 +1,457 @@ -use model::vocab::rdf; -use model::*; -use std::collections::HashSet; -use std::fmt; -use std::iter::FromIterator; - -#[derive(Debug, Clone, Default)] -pub struct MemoryGraph { - triples: HashSet, +use errors::*; +use std::collections::BTreeMap; +use std::collections::BTreeSet; +use std::sync::RwLock; +use store::numeric_encoder::*; +use store::store::*; + +pub type MemoryDataset = StoreDataset; +pub type MemoryGraph = StoreDefaultGraph; + +#[derive(Default)] +pub struct MemoryStore { + id2str: RwLock>>, + str2id: RwLock, u64>>, + graph_indexes: RwLock>, } -impl MemoryGraph { - pub fn iter(&self) -> impl Iterator { - self.triples.iter() - } +#[derive(Default)] +struct MemoryGraphIndexes { + spo: BTreeMap>>, + pos: BTreeMap>>, + osp: BTreeMap>>, +} - pub fn triples_for_subject<'a>( - &'a self, - subject: &'a NamedOrBlankNode, - ) -> impl Iterator { - self.iter().filter(move |t| t.subject() == subject) - } +impl BytesStore for MemoryStore { + type BytesOutput = Vec; - pub fn triples_for_predicate<'a>( - &'a self, - predicate: &'a NamedNode, - ) -> impl Iterator { - self.iter().filter(move |t| t.predicate() == predicate) + fn insert_bytes(&self, value: &[u8]) -> Result { + let mut id2str = self.id2str.write()?; + let mut str2id = self.str2id.write()?; + let id = str2id.entry(value.to_vec()).or_insert_with(|| { + let id = id2str.len() as u64; + id2str.insert(id, value.to_vec()); + id + }); + Ok(*id) } - pub fn triples_for_object<'a>(&'a self, object: &'a Term) -> impl Iterator { - self.iter().filter(move |t| t.object() == object) + fn get_bytes(&self, id: u64) -> Result>> { + Ok(self.id2str.read()?.get(&id).map(|s| s.to_owned())) } +} - pub fn triples_for_subject_predicate<'a>( - &'a self, - subject: &'a NamedOrBlankNode, - predicate: &'a NamedNode, - ) -> impl Iterator { - self.iter() - .filter(move |t| t.subject() == subject && t.predicate() == predicate) - } +impl EncodedQuadsStore for MemoryStore { + type QuadsIterator = > as IntoIterator>::IntoIter; + type QuadsForSubjectIterator = > as IntoIterator>::IntoIter; + type QuadsForSubjectPredicateIterator = > as IntoIterator>::IntoIter; + type QuadsForSubjectPredicateObjectIterator = + > as IntoIterator>::IntoIter; + type QuadsForSubjectObjectIterator = > as IntoIterator>::IntoIter; + type QuadsForPredicateIterator = > as IntoIterator>::IntoIter; + type QuadsForPredicateObjectIterator = > as IntoIterator>::IntoIter; + type QuadsForObjectIterator = > as IntoIterator>::IntoIter; + type QuadsForGraphIterator = > as IntoIterator>::IntoIter; + type QuadsForSubjectGraphIterator = > as IntoIterator>::IntoIter; + type QuadsForSubjectPredicateGraphIterator = + > as IntoIterator>::IntoIter; + type QuadsForSubjectObjectGraphIterator = > as IntoIterator>::IntoIter; + type QuadsForPredicateGraphIterator = > as IntoIterator>::IntoIter; + type QuadsForPredicateObjectGraphIterator = + > as IntoIterator>::IntoIter; + type QuadsForObjectGraphIterator = > as IntoIterator>::IntoIter; - pub fn objects_for_subject_predicate<'a>( - &'a self, - subject: &'a NamedOrBlankNode, - predicate: &'a NamedNode, - ) -> impl Iterator { - self.triples_for_subject_predicate(subject, predicate) - .map(|t| t.object()) + fn quads(&self) -> Result<> as IntoIterator>::IntoIter> { + let mut result = Vec::default(); + for (graph_name, graph) in self.graph_indexes.read()?.iter() { + for (s, pos) in graph.spo.iter() { + for (p, os) in pos.iter() { + for o in os.iter() { + result.push(Ok(encoded_quad(s, p, o, graph_name))) + } + } + } + } + Ok(result.into_iter()) } - pub fn object_for_subject_predicate<'a>( - &'a self, - subject: &'a NamedOrBlankNode, - predicate: &'a NamedNode, - ) -> Option<&'a Term> { - self.objects_for_subject_predicate(subject, predicate) - .nth(0) + fn quads_for_subject( + &self, + subject: &EncodedTerm, + ) -> Result<> as IntoIterator>::IntoIter> { + let mut result = Vec::default(); + for (graph_name, graph) in self.graph_indexes.read()?.iter() { + if let Some(pos) = graph.spo.get(subject) { + for (p, os) in pos.iter() { + for o in os.iter() { + result.push(Ok(encoded_quad(subject, p, o, graph_name))) + } + } + } + } + Ok(result.into_iter()) } - pub fn triples_for_predicate_object<'a>( - &'a self, - predicate: &'a NamedNode, - object: &'a Term, - ) -> impl Iterator { - self.iter() - .filter(move |t| t.predicate() == predicate && t.object() == object) + fn quads_for_subject_predicate( + &self, + subject: &EncodedTerm, + predicate: &EncodedTerm, + ) -> Result<> as IntoIterator>::IntoIter> { + let mut result = Vec::default(); + for (graph_name, graph) in self.graph_indexes.read()?.iter() { + if let Some(pos) = graph.spo.get(subject) { + if let Some(os) = pos.get(predicate) { + for o in os.iter() { + result.push(Ok(encoded_quad(subject, predicate, o, graph_name))) + } + } + } + } + Ok(result.into_iter()) } - pub fn subjects_for_predicate_object<'a>( - &'a self, - predicate: &'a NamedNode, - object: &'a Term, - ) -> impl Iterator { - self.triples_for_predicate_object(predicate, object) - .map(|t| t.subject()) + fn quads_for_subject_predicate_object( + &self, + subject: &EncodedTerm, + predicate: &EncodedTerm, + object: &EncodedTerm, + ) -> Result<> as IntoIterator>::IntoIter> { + let mut result = Vec::default(); + for (graph_name, graph) in self.graph_indexes.read()?.iter() { + if let Some(pos) = graph.spo.get(subject) { + if let Some(os) = pos.get(predicate) { + if os.contains(object) { + result.push(Ok(encoded_quad(subject, predicate, object, graph_name))) + } + } + } + } + Ok(result.into_iter()) } - pub fn subject_for_predicate_object<'a>( - &'a self, - predicate: &'a NamedNode, - object: &'a Term, - ) -> Option<&'a NamedOrBlankNode> { - self.subjects_for_predicate_object(predicate, object).nth(0) + fn quads_for_subject_object( + &self, + subject: &EncodedTerm, + object: &EncodedTerm, + ) -> Result<> as IntoIterator>::IntoIter> { + let mut result = Vec::default(); + for (graph_name, graph) in self.graph_indexes.read()?.iter() { + if let Some(sps) = graph.osp.get(object) { + if let Some(ps) = sps.get(subject) { + for p in ps.iter() { + result.push(Ok(encoded_quad(subject, p, object, graph_name))) + } + } + } + } + Ok(result.into_iter()) } - pub fn values_for_list<'a>(&'a self, root: NamedOrBlankNode) -> ListIterator<'a> { - ListIterator { - graph: self, - current_node: Some(root), + fn quads_for_predicate( + &self, + predicate: &EncodedTerm, + ) -> Result<> as IntoIterator>::IntoIter> { + let mut result = Vec::default(); + for (graph_name, graph) in self.graph_indexes.read()?.iter() { + if let Some(oss) = graph.pos.get(predicate) { + for (o, ss) in oss.iter() { + for s in ss.iter() { + result.push(Ok(encoded_quad(s, predicate, o, graph_name))) + } + } + } } + Ok(result.into_iter()) } - pub fn len(&self) -> usize { - self.triples.len() + fn quads_for_predicate_object( + &self, + predicate: &EncodedTerm, + object: &EncodedTerm, + ) -> Result<> as IntoIterator>::IntoIter> { + let mut result = Vec::default(); + for (graph_name, graph) in self.graph_indexes.read()?.iter() { + if let Some(oss) = graph.pos.get(predicate) { + if let Some(ss) = oss.get(object) { + for s in ss.iter() { + result.push(Ok(encoded_quad(s, predicate, object, graph_name))) + } + } + } + } + Ok(result.into_iter()) } - pub fn is_empty(&self) -> bool { - self.triples.is_empty() + fn quads_for_object( + &self, + object: &EncodedTerm, + ) -> Result<> as IntoIterator>::IntoIter> { + let mut result = Vec::default(); + for (graph_name, graph) in self.graph_indexes.read()?.iter() { + if let Some(sps) = graph.osp.get(object) { + for (s, ps) in sps.iter() { + for p in ps.iter() { + result.push(Ok(encoded_quad(s, p, object, graph_name))) + } + } + } + } + Ok(result.into_iter()) } - pub fn contains(&self, value: &Triple) -> bool { - self.triples.contains(value) + fn quads_for_graph( + &self, + graph_name: &EncodedTerm, + ) -> Result<> as IntoIterator>::IntoIter> { + let mut result = Vec::default(); + if let Some(graph) = self.graph_indexes.read()?.get(graph_name) { + for (s, pos) in graph.spo.iter() { + for (p, os) in pos.iter() { + for o in os.iter() { + result.push(Ok(encoded_quad(s, p, o, graph_name))) + } + } + } + } + Ok(result.into_iter()) } - pub fn insert(&mut self, value: Triple) -> bool { - self.triples.insert(value) + fn quads_for_subject_graph( + &self, + subject: &EncodedTerm, + graph_name: &EncodedTerm, + ) -> Result<> as IntoIterator>::IntoIter> { + let mut result = Vec::default(); + if let Some(graph) = self.graph_indexes.read()?.get(graph_name) { + if let Some(pos) = graph.spo.get(subject) { + for (p, os) in pos.iter() { + for o in os.iter() { + result.push(Ok(encoded_quad(subject, p, o, graph_name))) + } + } + } + } + Ok(result.into_iter()) } -} -impl fmt::Display for MemoryGraph { - fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - for triple in &self.triples { - write!(fmt, "{}\n", triple)?; + fn quads_for_subject_predicate_graph( + &self, + subject: &EncodedTerm, + predicate: &EncodedTerm, + graph_name: &EncodedTerm, + ) -> Result<> as IntoIterator>::IntoIter> { + let mut result = Vec::default(); + if let Some(graph) = self.graph_indexes.read()?.get(graph_name) { + if let Some(pos) = graph.spo.get(subject) { + if let Some(os) = pos.get(predicate) { + for o in os.iter() { + result.push(Ok(encoded_quad(subject, predicate, o, graph_name))) + } + } + } } - Ok(()) + Ok(result.into_iter()) } -} -impl IntoIterator for MemoryGraph { - type Item = Triple; - type IntoIter = as IntoIterator>::IntoIter; - - fn into_iter(self) -> ::IntoIter { - self.triples.into_iter() + fn quads_for_subject_object_graph( + &self, + subject: &EncodedTerm, + object: &EncodedTerm, + graph_name: &EncodedTerm, + ) -> Result<> as IntoIterator>::IntoIter> { + let mut result = Vec::default(); + if let Some(graph) = self.graph_indexes.read()?.get(graph_name) { + if let Some(sps) = graph.osp.get(object) { + if let Some(ps) = sps.get(subject) { + for p in ps.iter() { + result.push(Ok(encoded_quad(subject, p, object, graph_name))) + } + } + } + } + Ok(result.into_iter()) } -} -impl<'a> IntoIterator for &'a MemoryGraph { - type Item = &'a Triple; - type IntoIter = <&'a HashSet as IntoIterator>::IntoIter; + fn quads_for_predicate_graph( + &self, + predicate: &EncodedTerm, + graph_name: &EncodedTerm, + ) -> Result<> as IntoIterator>::IntoIter> { + let mut result = Vec::default(); + if let Some(graph) = self.graph_indexes.read()?.get(graph_name) { + if let Some(oss) = graph.pos.get(predicate) { + for (o, ss) in oss.iter() { + for s in ss.iter() { + result.push(Ok(encoded_quad(s, predicate, o, graph_name))) + } + } + } + } + Ok(result.into_iter()) + } - fn into_iter(self) -> ::IntoIter { - self.triples.iter() + fn quads_for_predicate_object_graph( + &self, + predicate: &EncodedTerm, + object: &EncodedTerm, + graph_name: &EncodedTerm, + ) -> Result<> as IntoIterator>::IntoIter> { + let mut result = Vec::default(); + if let Some(graph) = self.graph_indexes.read()?.get(graph_name) { + if let Some(oss) = graph.pos.get(predicate) { + if let Some(ss) = oss.get(object) { + for s in ss.iter() { + result.push(Ok(encoded_quad(s, predicate, object, graph_name))) + } + } + } + } + Ok(result.into_iter()) } -} -impl FromIterator for MemoryGraph { - fn from_iter>(iter: I) -> Self { - let triples = HashSet::from_iter(iter); - Self { triples } + fn quads_for_object_graph( + &self, + object: &EncodedTerm, + graph_name: &EncodedTerm, + ) -> Result<> as IntoIterator>::IntoIter> { + let mut result = Vec::default(); + if let Some(graph) = self.graph_indexes.read()?.get(graph_name) { + if let Some(sps) = graph.osp.get(object) { + for (s, ps) in sps.iter() { + for p in ps.iter() { + result.push(Ok(encoded_quad(s, p, object, graph_name))) + } + } + } + } + Ok(result.into_iter()) } -} -impl Extend for MemoryGraph { - fn extend>(&mut self, iter: I) { - self.triples.extend(iter) + fn contains(&self, quad: &EncodedQuad) -> Result { + Ok(self + .graph_indexes + .read()? + .get(&quad.graph_name) + .map(|graph| { + graph + .spo + .get(&quad.subject) + .map(|po| { + po.get(&quad.predicate) + .map(|o| o.contains(&quad.object)) + .unwrap_or(false) + }) + .unwrap_or(false) + }) + .unwrap_or(false)) } -} -impl<'a> Extend<&'a Triple> for MemoryGraph { - fn extend>(&mut self, iter: I) { - self.triples.extend(iter.into_iter().cloned()) + fn insert(&self, quad: &EncodedQuad) -> Result<()> { + let mut graph_indexes = self.graph_indexes.write()?; + let graph = graph_indexes + .entry(quad.graph_name.clone()) + .or_insert_with(MemoryGraphIndexes::default); + graph + .spo + .entry(quad.subject.clone()) + .or_default() + .entry(quad.predicate.clone()) + .or_default() + .insert(quad.object.clone()); + graph + .pos + .entry(quad.predicate.clone()) + .or_default() + .entry(quad.object.clone()) + .or_default() + .insert(quad.subject.clone()); + graph + .osp + .entry(quad.object.clone()) + .or_default() + .entry(quad.subject.clone()) + .or_default() + .insert(quad.predicate.clone()); + Ok(()) } -} -pub struct ListIterator<'a> { - graph: &'a MemoryGraph, - current_node: Option, -} + fn remove(&self, quad: &EncodedQuad) -> Result<()> { + let mut graph_indexes = self.graph_indexes.write()?; + let mut empty_graph = false; + if let Some(graph) = graph_indexes.get_mut(&quad.graph_name) { + { + let mut empty_pos = false; + if let Some(mut pos) = graph.spo.get_mut(&quad.subject) { + let mut empty_os = false; + if let Some(mut os) = pos.get_mut(&quad.predicate) { + os.remove(&quad.object); + empty_os = os.is_empty(); + } + if empty_os { + pos.remove(&quad.predicate); + } + empty_pos = pos.is_empty(); + } + if empty_pos { + graph.spo.remove(&quad.subject); + } + } + + { + let mut empty_oss = false; + if let Some(mut oss) = graph.pos.get_mut(&quad.predicate) { + let mut empty_ss = false; + if let Some(mut ss) = oss.get_mut(&quad.object) { + ss.remove(&quad.subject); + empty_ss = ss.is_empty(); + } + if empty_ss { + oss.remove(&quad.object); + } + empty_oss = oss.is_empty(); + } + if empty_oss { + graph.pos.remove(&quad.predicate); + } + } + + { + let mut empty_sps = false; + if let Some(mut sps) = graph.osp.get_mut(&quad.object) { + let mut empty_ps = false; + if let Some(mut ps) = sps.get_mut(&quad.subject) { + ps.remove(&quad.predicate); + empty_ps = ps.is_empty(); + } + if empty_ps { + sps.remove(&quad.subject); + } + empty_sps = sps.is_empty(); + } + if empty_sps { + graph.osp.remove(&quad.object); + } + } -impl<'a> Iterator for ListIterator<'a> { - type Item = Term; - - fn next(&mut self) -> Option { - match self.current_node.clone() { - Some(current) => { - let result = self - .graph - .object_for_subject_predicate(¤t, &rdf::FIRST)? - .clone(); - self.current_node = match self - .graph - .object_for_subject_predicate(¤t, &rdf::REST) - { - Some(Term::NamedNode(n)) if *n == *rdf::NIL => None, - Some(Term::NamedNode(n)) => Some(n.clone().into()), - Some(Term::BlankNode(n)) => Some(n.clone().into()), - _ => None, - }; - Some(result) - } - None => None, + empty_graph = graph.spo.is_empty(); + } + if empty_graph { + graph_indexes.remove(&quad.graph_name); } + Ok(()) } } + +fn encoded_quad( + subject: &EncodedTerm, + predicate: &EncodedTerm, + object: &EncodedTerm, + graph_name: &EncodedTerm, +) -> EncodedQuad { + EncodedQuad::new( + subject.clone(), + predicate.clone(), + object.clone(), + graph_name.clone(), + ) +} diff --git a/src/store/mod.rs b/src/store/mod.rs index bd3fa28b..b8c81d73 100644 --- a/src/store/mod.rs +++ b/src/store/mod.rs @@ -1,19 +1,23 @@ pub mod isomorphism; -pub mod memory; +mod memory; mod numeric_encoder; -pub mod rocksdb; +mod rocksdb; mod store; use errors::*; use model::*; +pub use store::memory::MemoryDataset; +pub use store::memory::MemoryGraph; +pub use store::rocksdb::RocksDbDataset; + pub trait Graph { type TriplesIterator: Iterator>; type TriplesForSubjectIterator: Iterator>; - type TriplesForSubjectPredicateIterator: Iterator>; - type TriplesForSubjectObjectIterator: Iterator>; + type ObjectsForSubjectPredicateIterator: Iterator>; + type PredicatesForSubjectObjectIterator: Iterator>; type TriplesForPredicateIterator: Iterator>; - type TriplesForPredicateObjectIterator: Iterator>; + type SubjectsForPredicateObjectIterator: Iterator>; type TriplesForObjectIterator: Iterator>; fn iter(&self) -> Result { @@ -27,28 +31,43 @@ pub trait Graph { subject: &NamedOrBlankNode, ) -> Result; - fn triples_for_subject_predicate( + fn objects_for_subject_predicate( + &self, + subject: &NamedOrBlankNode, + predicate: &NamedNode, + ) -> Result; + + fn object_for_subject_predicate( &self, subject: &NamedOrBlankNode, predicate: &NamedNode, - ) -> Result; + ) -> Result> { + //TODO use transpose when stable + match self + .objects_for_subject_predicate(subject, predicate)? + .nth(0) + { + Some(object) => Ok(Some(object?)), + None => Ok(None), + } + } - fn triples_for_subject_object( + fn predicates_for_subject_object( &self, subject: &NamedOrBlankNode, object: &Term, - ) -> Result; + ) -> Result; fn triples_for_predicate( &self, predicate: &NamedNode, ) -> Result; - fn triples_for_predicate_object( + fn subjects_for_predicate_object( &self, predicate: &NamedNode, object: &Term, - ) -> Result; + ) -> Result; fn triples_for_object(&self, object: &Term) -> Result; diff --git a/src/store/numeric_encoder.rs b/src/store/numeric_encoder.rs index 6eac616c..e8433512 100644 --- a/src/store/numeric_encoder.rs +++ b/src/store/numeric_encoder.rs @@ -53,6 +53,22 @@ pub struct EncodedQuad { pub graph_name: EncodedTerm, } +impl EncodedQuad { + pub fn new( + subject: EncodedTerm, + predicate: EncodedTerm, + object: EncodedTerm, + graph_name: EncodedTerm, + ) -> Self { + Self { + subject, + predicate, + object, + graph_name, + } + } +} + pub trait TermReader { fn read_term(&mut self) -> Result; fn read_spog_quad(&mut self) -> Result; diff --git a/src/store/rocksdb.rs b/src/store/rocksdb.rs index 00d144b3..be492519 100644 --- a/src/store/rocksdb.rs +++ b/src/store/rocksdb.rs @@ -79,11 +79,7 @@ impl BytesStore for RocksDbStore { Ok(match self.db.get_cf(self.str2id_cf, value)? { Some(id) => from_bytes_slice(&id), None => { - let id = self - .str_id_counter - .lock() - .unwrap() - .get_and_increment(&self.db)? as u64; + let id = self.str_id_counter.lock()?.get_and_increment(&self.db)? as u64; let id_bytes = to_bytes(id); let mut batch = WriteBatch::default(); batch.put_cf(self.id2str_cf, &id_bytes, value)?; diff --git a/src/store/rocksdb/storage.rs b/src/store/rocksdb/storage.rs deleted file mode 100644 index 48770a0e..00000000 --- a/src/store/rocksdb/storage.rs +++ /dev/null @@ -1,524 +0,0 @@ -use errors::*; -use rocksdb::ColumnFamily; -use rocksdb::DBRawIterator; -use rocksdb::DBVector; -use rocksdb::Options; -use rocksdb::WriteBatch; -use rocksdb::DB; -use std::io::Cursor; -use std::mem::size_of; -use std::path::Path; -use std::str; -use std::sync::Mutex; -use store::numeric_encoder::*; -use store::store::EncodedQuadsStore; -use utils::from_bytes; -use utils::from_bytes_slice; -use utils::to_bytes; - -const ID2STR_CF: &'static str = "id2str"; -const STR2ID_CF: &'static str = "id2str"; -const SPOG_CF: &'static str = "spog"; -const POSG_CF: &'static str = "posg"; -const OSPG_CF: &'static str = "ospg"; - -const EMPTY_BUF: [u8; 0] = [0 as u8; 0]; - -//TODO: indexes for the default graph and indexes for the named graphs (no more Optional and space saving) - -const COLUMN_FAMILIES: [&'static str; 5] = [ID2STR_CF, STR2ID_CF, SPOG_CF, POSG_CF, OSPG_CF]; - -pub struct RocksDbStore { - db: DB, - str_id_counter: Mutex, - id2str_cf: ColumnFamily, - str2id_cf: ColumnFamily, - spog_cf: ColumnFamily, - posg_cf: ColumnFamily, - ospg_cf: ColumnFamily, -} - -impl RocksDbStore { - pub fn open(path: impl AsRef) -> Result { - let mut options = Options::default(); - options.create_if_missing(true); - options.create_missing_column_families(true); - - let db = DB::open_cf(&options, path, &COLUMN_FAMILIES)?; - let id2str_cf = get_cf(&db, STR2ID_CF)?; - let str2id_cf = get_cf(&db, ID2STR_CF)?; - let spog_cf = get_cf(&db, SPOG_CF)?; - let posg_cf = get_cf(&db, POSG_CF)?; - let ospg_cf = get_cf(&db, OSPG_CF)?; - - Ok(Self { - db, - str_id_counter: Mutex::new(RocksDBCounter::new("bsc")), - id2str_cf, - str2id_cf, - spog_cf, - posg_cf, - ospg_cf, - }) - } -} - -impl BytesStore for RocksDbStore { - type BytesOutput = DBVector; - - fn insert_bytes(&self, value: &[u8]) -> Result { - Ok(match self.db.get_cf(self.str2id_cf, value)? { - Some(id) => from_bytes_slice(&id), - None => { - let id = self - .str_id_counter - .lock() - .unwrap() - .get_and_increment(&self.db)? as u64; - let id_bytes = to_bytes(id); - let mut batch = WriteBatch::default(); - batch.put_cf(self.id2str_cf, &id_bytes, value)?; - batch.put_cf(self.str2id_cf, value, &id_bytes)?; - self.db.write(batch)?; - id - } - }) - } - - fn get_bytes(&self, id: u64) -> Result> { - Ok(self.db.get_cf(self.id2str_cf, &to_bytes(id))?) - } -} - -impl EncodedQuadsStore for RocksDbStore { - type QuadsIterator = SPOGIndexIterator; - type QuadsForSubjectIterator = FilteringEncodedQuadsIterator; - type QuadsForSubjectPredicateIterator = FilteringEncodedQuadsIterator; - type QuadsForSubjectPredicateObjectIterator = FilteringEncodedQuadsIterator; - type QuadsForSubjectObjectIterator = FilteringEncodedQuadsIterator; - type QuadsForPredicateIterator = FilteringEncodedQuadsIterator; - type QuadsForPredicateObjectIterator = FilteringEncodedQuadsIterator; - type QuadsForObjectIterator = FilteringEncodedQuadsIterator; - type QuadsForGraphIterator = InGraphQuadsIterator; - type QuadsForSubjectGraphIterator = - InGraphQuadsIterator>; - type QuadsForSubjectPredicateGraphIterator = - InGraphQuadsIterator>; - type QuadsForSubjectObjectGraphIterator = - InGraphQuadsIterator>; - type QuadsForPredicateGraphIterator = - InGraphQuadsIterator>; - type QuadsForPredicateObjectGraphIterator = - InGraphQuadsIterator>; - type QuadsForObjectGraphIterator = - InGraphQuadsIterator>; - - fn quads(&self) -> Result { - let mut iter = self.db.raw_iterator_cf(self.spog_cf)?; - iter.seek_to_first(); - Ok(SPOGIndexIterator { iter }) - } - - fn quads_for_subject( - &self, - subject: &EncodedTerm, - ) -> Result> { - let mut iter = self.db.raw_iterator_cf(self.spog_cf)?; - iter.seek(&encode_term(subject)?); - Ok(FilteringEncodedQuadsIterator { - iter: SPOGIndexIterator { iter }, - filter: EncodedQuadPattern::new(Some(subject.clone()), None, None, None), - }) - } - - fn quads_for_subject_predicate( - &self, - subject: &EncodedTerm, - predicate: &EncodedTerm, - ) -> Result> { - let mut iter = self.db.raw_iterator_cf(self.spog_cf)?; - iter.seek(&encode_term_pair(subject, predicate)?); - Ok(FilteringEncodedQuadsIterator { - iter: SPOGIndexIterator { iter }, - filter: EncodedQuadPattern::new( - Some(subject.clone()), - Some(predicate.clone()), - None, - None, - ), - }) - } - - fn quads_for_subject_predicate_object( - &self, - subject: &EncodedTerm, - predicate: &EncodedTerm, - object: &EncodedTerm, - ) -> Result> { - let mut iter = self.db.raw_iterator_cf(self.spog_cf)?; - iter.seek(&encode_term_triple(subject, predicate, object)?); - Ok(FilteringEncodedQuadsIterator { - iter: SPOGIndexIterator { iter }, - filter: EncodedQuadPattern::new( - Some(subject.clone()), - Some(predicate.clone()), - Some(object.clone()), - None, - ), - }) - } - - fn quads_for_subject_object( - &self, - subject: &EncodedTerm, - object: &EncodedTerm, - ) -> Result> { - let mut iter = self.db.raw_iterator_cf(self.spog_cf)?; - iter.seek(&encode_term_pair(object, subject)?); - Ok(FilteringEncodedQuadsIterator { - iter: OSPGIndexIterator { iter }, - filter: EncodedQuadPattern::new( - Some(subject.clone()), - None, - Some(object.clone()), - None, - ), - }) - } - - fn quads_for_predicate( - &self, - predicate: &EncodedTerm, - ) -> Result> { - let mut iter = self.db.raw_iterator_cf(self.posg_cf)?; - iter.seek(&encode_term(predicate)?); - Ok(FilteringEncodedQuadsIterator { - iter: POSGIndexIterator { iter }, - filter: EncodedQuadPattern::new(None, Some(predicate.clone()), None, None), - }) - } - - fn quads_for_predicate_object( - &self, - predicate: &EncodedTerm, - object: &EncodedTerm, - ) -> Result> { - let mut iter = self.db.raw_iterator_cf(self.spog_cf)?; - iter.seek(&encode_term_pair(predicate, object)?); - Ok(FilteringEncodedQuadsIterator { - iter: POSGIndexIterator { iter }, - filter: EncodedQuadPattern::new( - None, - Some(predicate.clone()), - Some(object.clone()), - None, - ), - }) - } - - fn quads_for_object( - &self, - object: &EncodedTerm, - ) -> Result> { - let mut iter = self.db.raw_iterator_cf(self.ospg_cf)?; - iter.seek(&encode_term(&object)?); - Ok(FilteringEncodedQuadsIterator { - iter: OSPGIndexIterator { iter }, - filter: EncodedQuadPattern::new(None, None, Some(object.clone()), None), - }) - } - - fn quads_for_graph( - &self, - graph_name: &EncodedTerm, - ) -> Result> { - Ok(InGraphQuadsIterator { - iter: self.quads()?, - graph_name: graph_name.clone(), - }) - } - - fn quads_for_subject_graph( - &self, - subject: &EncodedTerm, - graph_name: &EncodedTerm, - ) -> Result>> { - Ok(InGraphQuadsIterator { - iter: self.quads_for_subject(subject)?, - graph_name: graph_name.clone(), - }) - } - - fn quads_for_subject_predicate_graph( - &self, - subject: &EncodedTerm, - predicate: &EncodedTerm, - graph_name: &EncodedTerm, - ) -> Result>> { - Ok(InGraphQuadsIterator { - iter: self.quads_for_subject_predicate(subject, predicate)?, - graph_name: graph_name.clone(), - }) - } - - fn quads_for_subject_object_graph( - &self, - subject: &EncodedTerm, - object: &EncodedTerm, - graph_name: &EncodedTerm, - ) -> Result>> { - Ok(InGraphQuadsIterator { - iter: self.quads_for_subject_object(subject, object)?, - graph_name: graph_name.clone(), - }) - } - - fn quads_for_predicate_graph( - &self, - predicate: &EncodedTerm, - graph_name: &EncodedTerm, - ) -> Result>> { - Ok(InGraphQuadsIterator { - iter: self.quads_for_predicate(predicate)?, - graph_name: graph_name.clone(), - }) - } - - fn quads_for_predicate_object_graph( - &self, - predicate: &EncodedTerm, - object: &EncodedTerm, - graph_name: &EncodedTerm, - ) -> Result>> { - Ok(InGraphQuadsIterator { - iter: self.quads_for_predicate_object(predicate, object)?, - graph_name: graph_name.clone(), - }) - } - - fn quads_for_object_graph( - &self, - object: &EncodedTerm, - graph_name: &EncodedTerm, - ) -> Result>> { - Ok(InGraphQuadsIterator { - iter: self.quads_for_object(object)?, - graph_name: graph_name.clone(), - }) - } - - fn contains(&self, quad: &EncodedQuad) -> Result { - Ok(self - .db - .get_cf(self.spog_cf, &encode_spog_quad(quad)?)? - .is_some()) - } - - fn insert(&self, quad: &EncodedQuad) -> Result<()> { - let mut batch = WriteBatch::default(); - batch.put_cf(self.spog_cf, &encode_spog_quad(quad)?, &EMPTY_BUF)?; - batch.put_cf(self.posg_cf, &encode_posg_quad(quad)?, &EMPTY_BUF)?; - batch.put_cf(self.ospg_cf, &encode_ospg_quad(quad)?, &EMPTY_BUF)?; - Ok(self.db.write(batch)?) //TODO: check what's going on if the key already exists - } - - fn remove(&self, quad: &EncodedQuad) -> Result<()> { - let mut batch = WriteBatch::default(); - batch.delete_cf(self.spog_cf, &encode_spog_quad(quad)?)?; - batch.delete_cf(self.posg_cf, &encode_posg_quad(quad)?)?; - batch.delete_cf(self.ospg_cf, &encode_ospg_quad(quad)?)?; - Ok(self.db.write(batch)?) - } -} - -pub fn get_cf(db: &DB, name: &str) -> Result { - db.cf_handle(name) - .ok_or_else(|| "column family not found".into()) -} - -struct RocksDBCounter { - name: &'static str, -} - -impl RocksDBCounter { - fn new(name: &'static str) -> Self { - Self { name } - } - - fn get_and_increment(&self, db: &DB) -> Result { - let value = db - .get(self.name.as_bytes())? - .map(|b| { - let mut buf = [0 as u8; size_of::()]; - buf.copy_from_slice(&b); - from_bytes(buf) - }) - .unwrap_or(0); - db.put(self.name.as_bytes(), &to_bytes(value + 1))?; - Ok(value) - } -} - -struct EncodedQuadPattern { - subject: Option, - predicate: Option, - object: Option, - graph_name: Option, -} - -impl EncodedQuadPattern { - fn new( - subject: Option, - predicate: Option, - object: Option, - graph_name: Option, - ) -> Self { - Self { - subject, - predicate, - object, - graph_name, - } - } - - fn filter(&self, quad: &EncodedQuad) -> bool { - if let Some(ref subject) = self.subject { - if &quad.subject != subject { - return false; - } - } - if let Some(ref predicate) = self.predicate { - if &quad.predicate != predicate { - return false; - } - } - if let Some(ref object) = self.object { - if &quad.object != object { - return false; - } - } - if let Some(ref graph_name) = self.graph_name { - if &quad.graph_name != graph_name { - return false; - } - } - true - } -} - -fn encode_term(t: &EncodedTerm) -> Result> { - let mut vec = Vec::default(); - vec.write_term(&t)?; - Ok(vec) -} - -fn encode_term_pair(t1: &EncodedTerm, t2: &EncodedTerm) -> Result> { - let mut vec = Vec::default(); - vec.write_term(&t1)?; - vec.write_term(&t2)?; - Ok(vec) -} - -fn encode_term_triple(t1: &EncodedTerm, t2: &EncodedTerm, t3: &EncodedTerm) -> Result> { - let mut vec = Vec::default(); - vec.write_term(&t1)?; - vec.write_term(&t2)?; - vec.write_term(&t3)?; - Ok(vec) -} - -fn encode_spog_quad(quad: &EncodedQuad) -> Result> { - let mut vec = Vec::default(); - vec.write_spog_quad(quad)?; - Ok(vec) -} - -fn encode_posg_quad(quad: &EncodedQuad) -> Result> { - let mut vec = Vec::default(); - vec.write_posg_quad(quad)?; - Ok(vec) -} - -fn encode_ospg_quad(quad: &EncodedQuad) -> Result> { - let mut vec = Vec::default(); - vec.write_ospg_quad(quad)?; - Ok(vec) -} - -pub struct SPOGIndexIterator { - iter: DBRawIterator, -} - -impl Iterator for SPOGIndexIterator { - type Item = Result; - - fn next(&mut self) -> Option> { - self.iter.next(); - self.iter - .key() - .map(|buffer| Cursor::new(buffer).read_spog_quad()) - } -} - -pub struct POSGIndexIterator { - iter: DBRawIterator, -} - -impl Iterator for POSGIndexIterator { - type Item = Result; - - fn next(&mut self) -> Option> { - self.iter.next(); - self.iter - .key() - .map(|buffer| Cursor::new(buffer).read_posg_quad()) - } -} - -pub struct OSPGIndexIterator { - iter: DBRawIterator, -} - -impl Iterator for OSPGIndexIterator { - type Item = Result; - - fn next(&mut self) -> Option> { - self.iter.next(); - self.iter - .key() - .map(|buffer| Cursor::new(buffer).read_ospg_quad()) - } -} - -pub struct FilteringEncodedQuadsIterator>> { - iter: I, - filter: EncodedQuadPattern, -} - -impl>> Iterator for FilteringEncodedQuadsIterator { - type Item = Result; - - fn next(&mut self) -> Option> { - self.iter.next().filter(|quad| match quad { - Ok(quad) => self.filter.filter(quad), - Err(_) => true, - }) - } -} - -pub struct InGraphQuadsIterator>> { - iter: I, - graph_name: EncodedTerm, -} - -impl>> Iterator for InGraphQuadsIterator { - type Item = Result; - - fn next(&mut self) -> Option> { - let graph_name = &self.graph_name; - self.iter.find(|quad| match quad { - Ok(quad) => graph_name == &quad.graph_name, - Err(_) => true, - }) - } -} diff --git a/src/store/store.rs b/src/store/store.rs index 56daa362..831a55bd 100644 --- a/src/store/store.rs +++ b/src/store/store.rs @@ -1,5 +1,8 @@ use errors::*; use model::*; +use std::fmt; +use std::iter::FromIterator; +use std::iter::Iterator; use std::sync::Arc; use store::numeric_encoder::*; use store::Dataset; @@ -270,6 +273,41 @@ impl Dataset for StoreDataset { } } +impl fmt::Display for StoreDataset { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + for quad in self.iter()? { + write!(fmt, "{}\n", quad?)?; + } + Ok(()) + } +} + +impl Default for StoreDataset { + fn default() -> Self { + Self::new_from_store(S::default()) + } +} + +impl FromIterator for StoreDataset { + fn from_iter>(iter: I) -> Self { + let dataset = StoreDataset::default(); + for quad in iter { + dataset.insert(&quad).unwrap(); + } + dataset + } +} + +impl<'a, S: EncodedQuadsStore + Default> FromIterator<&'a Quad> for StoreDataset { + fn from_iter>(iter: I) -> Self { + let dataset = StoreDataset::default(); + for quad in iter { + dataset.insert(quad).unwrap(); + } + dataset + } +} + pub struct StoreNamedGraph { store: Arc, name: NamedOrBlankNode, @@ -279,13 +317,13 @@ pub struct StoreNamedGraph { impl Graph for StoreNamedGraph { type TriplesIterator = TriplesIterator; type TriplesForSubjectIterator = TriplesIterator; - type TriplesForSubjectPredicateIterator = - TriplesIterator; - type TriplesForSubjectObjectIterator = - TriplesIterator; + type ObjectsForSubjectPredicateIterator = + ObjectsIterator; + type PredicatesForSubjectObjectIterator = + PredicatesIterator; type TriplesForPredicateIterator = TriplesIterator; - type TriplesForPredicateObjectIterator = - TriplesIterator; + type SubjectsForPredicateObjectIterator = + SubjectsIterator; type TriplesForObjectIterator = TriplesIterator; fn triples(&self) -> Result> { @@ -308,13 +346,13 @@ impl Graph for StoreNamedGraph { store: self.store.clone(), }) } - fn triples_for_subject_predicate( + fn objects_for_subject_predicate( &self, subject: &NamedOrBlankNode, predicate: &NamedNode, - ) -> Result> { + ) -> Result> { let encoder = self.store.encoder(); - Ok(TriplesIterator { + Ok(ObjectsIterator { iter: self.store.quads_for_subject_predicate_graph( &encoder.encode_named_or_blank_node(subject)?, &encoder.encode_named_node(predicate)?, @@ -323,13 +361,13 @@ impl Graph for StoreNamedGraph { store: self.store.clone(), }) } - fn triples_for_subject_object( + fn predicates_for_subject_object( &self, subject: &NamedOrBlankNode, object: &Term, - ) -> Result> { + ) -> Result> { let encoder = self.store.encoder(); - Ok(TriplesIterator { + Ok(PredicatesIterator { iter: self.store.quads_for_subject_object_graph( &encoder.encode_named_or_blank_node(subject)?, &encoder.encode_term(object)?, @@ -351,13 +389,13 @@ impl Graph for StoreNamedGraph { store: self.store.clone(), }) } - fn triples_for_predicate_object( + fn subjects_for_predicate_object( &self, predicate: &NamedNode, object: &Term, - ) -> Result> { + ) -> Result> { let encoder = self.store.encoder(); - Ok(TriplesIterator { + Ok(SubjectsIterator { iter: self.store.quads_for_predicate_object_graph( &encoder.encode_named_node(predicate)?, &encoder.encode_term(object)?, @@ -424,6 +462,15 @@ impl NamedGraph for StoreNamedGraph { } } +impl fmt::Display for StoreNamedGraph { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + for triple in self.iter()? { + write!(fmt, "{}\n", triple?)?; + } + Ok(()) + } +} + pub struct StoreDefaultGraph { store: Arc, } @@ -431,13 +478,13 @@ pub struct StoreDefaultGraph { impl Graph for StoreDefaultGraph { type TriplesIterator = TriplesIterator; type TriplesForSubjectIterator = TriplesIterator; - type TriplesForSubjectPredicateIterator = - TriplesIterator; - type TriplesForSubjectObjectIterator = - TriplesIterator; + type ObjectsForSubjectPredicateIterator = + ObjectsIterator; + type PredicatesForSubjectObjectIterator = + PredicatesIterator; type TriplesForPredicateIterator = TriplesIterator; - type TriplesForPredicateObjectIterator = - TriplesIterator; + type SubjectsForPredicateObjectIterator = + SubjectsIterator; type TriplesForObjectIterator = TriplesIterator; fn triples(&self) -> Result> { @@ -460,13 +507,13 @@ impl Graph for StoreDefaultGraph { store: self.store.clone(), }) } - fn triples_for_subject_predicate( + fn objects_for_subject_predicate( &self, subject: &NamedOrBlankNode, predicate: &NamedNode, - ) -> Result> { + ) -> Result> { let encoder = self.store.encoder(); - Ok(TriplesIterator { + Ok(ObjectsIterator { iter: self.store.quads_for_subject_predicate_graph( &encoder.encode_named_or_blank_node(subject)?, &encoder.encode_named_node(predicate)?, @@ -475,13 +522,13 @@ impl Graph for StoreDefaultGraph { store: self.store.clone(), }) } - fn triples_for_subject_object( + fn predicates_for_subject_object( &self, subject: &NamedOrBlankNode, object: &Term, - ) -> Result> { + ) -> Result> { let encoder = self.store.encoder(); - Ok(TriplesIterator { + Ok(PredicatesIterator { iter: self.store.quads_for_subject_object_graph( &encoder.encode_named_or_blank_node(subject)?, &encoder.encode_term(object)?, @@ -503,13 +550,13 @@ impl Graph for StoreDefaultGraph { store: self.store.clone(), }) } - fn triples_for_predicate_object( + fn subjects_for_predicate_object( &self, predicate: &NamedNode, object: &Term, - ) -> Result> { + ) -> Result> { let encoder = self.store.encoder(); - Ok(TriplesIterator { + Ok(SubjectsIterator { iter: self.store.quads_for_predicate_object_graph( &encoder.encode_named_node(predicate)?, &encoder.encode_term(object)?, @@ -570,6 +617,41 @@ impl Graph for StoreDefaultGraph { } } +impl fmt::Display for StoreDefaultGraph { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + for triple in self.iter()? { + write!(fmt, "{}\n", triple?)?; + } + Ok(()) + } +} + +impl Default for StoreDefaultGraph { + fn default() -> Self { + StoreDataset::default().default_graph() + } +} + +impl FromIterator for StoreDefaultGraph { + fn from_iter>(iter: I) -> Self { + let graph = StoreDefaultGraph::default(); + for triple in iter { + graph.insert(&triple).unwrap(); + } + graph + } +} + +impl<'a, S: EncodedQuadsStore + Default> FromIterator<&'a Triple> for StoreDefaultGraph { + fn from_iter>(iter: I) -> Self { + let graph = StoreDefaultGraph::default(); + for triple in iter { + graph.insert(triple).unwrap(); + } + graph + } +} + pub struct StoreUnionGraph { store: Arc, } @@ -577,11 +659,13 @@ pub struct StoreUnionGraph { impl Graph for StoreUnionGraph { type TriplesIterator = TriplesIterator; type TriplesForSubjectIterator = TriplesIterator; - type TriplesForSubjectPredicateIterator = - TriplesIterator; - type TriplesForSubjectObjectIterator = TriplesIterator; + type ObjectsForSubjectPredicateIterator = + ObjectsIterator; + type PredicatesForSubjectObjectIterator = + PredicatesIterator; type TriplesForPredicateIterator = TriplesIterator; - type TriplesForPredicateObjectIterator = TriplesIterator; + type SubjectsForPredicateObjectIterator = + SubjectsIterator; type TriplesForObjectIterator = TriplesIterator; fn triples(&self) -> Result> { @@ -603,13 +687,13 @@ impl Graph for StoreUnionGraph { store: self.store.clone(), }) } - fn triples_for_subject_predicate( + fn objects_for_subject_predicate( &self, subject: &NamedOrBlankNode, predicate: &NamedNode, - ) -> Result> { + ) -> Result> { let encoder = self.store.encoder(); - Ok(TriplesIterator { + Ok(ObjectsIterator { iter: self.store.quads_for_subject_predicate( &encoder.encode_named_or_blank_node(subject)?, &encoder.encode_named_node(predicate)?, @@ -617,13 +701,13 @@ impl Graph for StoreUnionGraph { store: self.store.clone(), }) } - fn triples_for_subject_object( + fn predicates_for_subject_object( &self, subject: &NamedOrBlankNode, object: &Term, - ) -> Result> { + ) -> Result> { let encoder = self.store.encoder(); - Ok(TriplesIterator { + Ok(PredicatesIterator { iter: self.store.quads_for_subject_object( &encoder.encode_named_or_blank_node(subject)?, &encoder.encode_term(object)?, @@ -643,13 +727,13 @@ impl Graph for StoreUnionGraph { store: self.store.clone(), }) } - fn triples_for_predicate_object( + fn subjects_for_predicate_object( &self, predicate: &NamedNode, object: &Term, - ) -> Result> { + ) -> Result> { let encoder = self.store.encoder(); - Ok(TriplesIterator { + Ok(SubjectsIterator { iter: self.store.quads_for_predicate_object( &encoder.encode_named_node(predicate)?, &encoder.encode_term(object)?, @@ -682,11 +766,11 @@ impl Graph for StoreUnionGraph { } fn insert(&self, triple: &Triple) -> Result<()> { - unimplemented!() + Err("Union graph is not writable".into()) } fn remove(&self, triple: &Triple) -> Result<()> { - unimplemented!() + Err("Union graph is not writable".into()) } fn len(&self) -> Result { @@ -698,6 +782,15 @@ impl Graph for StoreUnionGraph { } } +impl fmt::Display for StoreUnionGraph { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + for triple in self.iter()? { + write!(fmt, "{}\n", triple?)?; + } + Ok(()) + } +} + pub struct DelegatingBytesStore<'a, S: 'a + BytesStore + Sized>(&'a S); impl<'a, S: BytesStore> BytesStore for DelegatingBytesStore<'a, S> { @@ -745,3 +838,58 @@ impl>, S: EncodedQuadsStore> Iterator .map(|k| k.and_then(|quad| self.store.encoder().decode_triple(&quad))) } } + +pub struct SubjectsIterator>, S: EncodedQuadsStore> { + iter: I, + store: Arc, +} + +impl>, S: EncodedQuadsStore> Iterator + for SubjectsIterator +{ + type Item = Result; + + fn next(&mut self) -> Option> { + self.iter.next().map(|k| { + k.and_then(|quad| { + self.store + .encoder() + .decode_named_or_blank_node(&quad.subject) + }) + }) + } +} + +pub struct PredicatesIterator>, S: EncodedQuadsStore> { + iter: I, + store: Arc, +} + +impl>, S: EncodedQuadsStore> Iterator + for PredicatesIterator +{ + type Item = Result; + + fn next(&mut self) -> Option> { + self.iter + .next() + .map(|k| k.and_then(|quad| self.store.encoder().decode_named_node(&quad.predicate))) + } +} + +pub struct ObjectsIterator>, S: EncodedQuadsStore> { + iter: I, + store: Arc, +} + +impl>, S: EncodedQuadsStore> Iterator + for ObjectsIterator +{ + type Item = Result; + + fn next(&mut self) -> Option> { + self.iter + .next() + .map(|k| k.and_then(|quad| self.store.encoder().decode_term(&quad.object))) + } +} diff --git a/tests/rdf_test_cases.rs b/tests/rdf_test_cases.rs index 4ed7f0ce..80a2db7b 100644 --- a/tests/rdf_test_cases.rs +++ b/tests/rdf_test_cases.rs @@ -17,7 +17,8 @@ use rudf::rio::turtle::read_turtle; use rudf::sparql::algebra::Query; use rudf::sparql::parser::read_sparql_query; use rudf::store::isomorphism::GraphIsomorphism; -use rudf::store::memory::MemoryGraph; +use rudf::store::Graph; +use rudf::store::MemoryGraph; use std::error::Error; use std::fmt; use url::Url; @@ -70,7 +71,7 @@ fn turtle_w3c_testsuite() { match client.load_turtle(test.action.clone()) { Ok(action_graph) => match client.load_turtle(test.result.clone().unwrap()) { Ok(result_graph) => assert!( - action_graph.is_isomorphic(&result_graph), + action_graph.is_isomorphic(&result_graph).unwrap(), "Failure on {}. Expected file:\n{}\nParsed file:\n{}\n", test, action_graph, @@ -95,7 +96,10 @@ fn turtle_w3c_testsuite() { .unwrap_or_else(|| Ok(MemoryGraph::default())); assert!( action_graph.is_err() - || !action_graph.unwrap().is_isomorphic(&result_graph.unwrap()), + || !action_graph + .unwrap() + .is_isomorphic(&result_graph.unwrap()) + .unwrap(), "Failure on {}", test ); @@ -276,6 +280,7 @@ impl<'a> Iterator for TestManifest<'a> { let kind = match self .graph .object_for_subject_predicate(&test_subject, &rdf::TYPE) + .unwrap() { Some(Term::NamedNode(c)) => match c.value().split("#").last() { Some(k) => k.to_string(), @@ -286,6 +291,7 @@ impl<'a> Iterator for TestManifest<'a> { let name = match self .graph .object_for_subject_predicate(&test_subject, &mf::NAME) + .unwrap() { Some(Term::Literal(c)) => Some(c.value().to_string()), _ => None, @@ -293,6 +299,7 @@ impl<'a> Iterator for TestManifest<'a> { let comment = match self .graph .object_for_subject_predicate(&test_subject, &rdfs::COMMENT) + .unwrap() { Some(Term::Literal(c)) => Some(c.value().to_string()), _ => None, @@ -300,6 +307,7 @@ impl<'a> Iterator for TestManifest<'a> { let action = match self .graph .object_for_subject_predicate(&test_subject, &*mf::ACTION) + .unwrap() { Some(Term::NamedNode(n)) => n.url().clone(), Some(_) => return Some(Err("invalid action".into())), @@ -308,6 +316,7 @@ impl<'a> Iterator for TestManifest<'a> { let result = match self .graph .object_for_subject_predicate(&test_subject, &*mf::RESULT) + .unwrap() { Some(Term::NamedNode(n)) => Some(n.url().clone()), Some(_) => return Some(Err("invalid result".into())), @@ -328,7 +337,10 @@ impl<'a> Iterator for TestManifest<'a> { Some(url) => { let manifest = NamedOrBlankNode::from(NamedNode::new(url.clone())); match self.client.load_turtle(url) { - Ok(g) => self.graph.extend(g.into_iter()), + Ok(g) => g + .iter() + .unwrap() + .for_each(|g| self.graph.insert(&g.unwrap()).unwrap()), Err(e) => return Some(Err(e.into())), } @@ -336,11 +348,11 @@ impl<'a> Iterator for TestManifest<'a> { match self .graph .object_for_subject_predicate(&manifest, &*mf::INCLUDE) + .unwrap() { Some(Term::BlankNode(list)) => { self.manifests_to_do.extend( - self.graph - .values_for_list(list.clone().into()) + RdfListIterator::iter(&self.graph, list.clone().into()) .flat_map(|m| match m { Term::NamedNode(nm) => Some(nm.url().clone()), _ => None, @@ -355,12 +367,19 @@ impl<'a> Iterator for TestManifest<'a> { match self .graph .object_for_subject_predicate(&manifest, &*mf::ENTRIES) + .unwrap() { Some(Term::BlankNode(list)) => { - self.tests_to_do - .extend(self.graph.values_for_list(list.clone().into())); + self.tests_to_do.extend(RdfListIterator::iter( + &self.graph, + list.clone().into(), + )); + } + Some(term) => { + return Some(Err( + format!("Invalid tests list. Got term {}", term).into() + )) } - Some(_) => return Some(Err("invalid tests list".into())), None => (), } } @@ -371,3 +390,45 @@ impl<'a> Iterator for TestManifest<'a> { } } } + +pub struct RdfListIterator<'a, G: 'a + Graph> { + graph: &'a G, + current_node: Option, +} + +impl<'a, G: 'a + Graph> RdfListIterator<'a, G> { + fn iter(graph: &'a G, root: NamedOrBlankNode) -> RdfListIterator<'a, G> { + RdfListIterator { + graph, + current_node: Some(root), + } + } +} + +impl<'a, G: 'a + Graph> Iterator for RdfListIterator<'a, G> { + type Item = Term; + + fn next(&mut self) -> Option { + match self.current_node.clone() { + Some(current) => { + let result = self + .graph + .object_for_subject_predicate(¤t, &rdf::FIRST) + .unwrap()? + .clone(); + self.current_node = match self + .graph + .object_for_subject_predicate(¤t, &rdf::REST) + .unwrap() + { + Some(Term::NamedNode(ref n)) if *n == *rdf::NIL => None, + Some(Term::NamedNode(n)) => Some(n.clone().into()), + Some(Term::BlankNode(n)) => Some(n.clone().into()), + _ => None, + }; + Some(result) + } + None => None, + } + } +}