diff --git a/src/lib.rs b/src/lib.rs index b34fdf24..398e2f29 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,3 +4,4 @@ extern crate url; pub mod model; pub mod rio; +pub mod store; diff --git a/src/store/isomorphism.rs b/src/store/isomorphism.rs new file mode 100644 index 00000000..2e3f372e --- /dev/null +++ b/src/store/isomorphism.rs @@ -0,0 +1,154 @@ +use model::data::*; +use std::collections::BTreeSet; +use std::collections::HashMap; +use std::collections::HashSet; +use std::collections::hash_map::DefaultHasher; +use std::hash::Hash; +use std::hash::Hasher; +use store::memory::MemoryGraph; + +#[derive(Eq, PartialEq, Hash, Ord, PartialOrd)] +struct SubjectPredicate<'a> { + subject: &'a NamedOrBlankNode, + predicate: &'a NamedNode, +} + +impl<'a> SubjectPredicate<'a> { + fn new(subject: &'a NamedOrBlankNode, predicate: &'a NamedNode) -> Self { + Self { subject, predicate } + } +} + +#[derive(Eq, PartialEq, Hash, Ord, PartialOrd)] +struct PredicateObject<'a> { + predicate: &'a NamedNode, + object: &'a Term, +} + +impl<'a> PredicateObject<'a> { + fn new(predicate: &'a NamedNode, object: &'a Term) -> Self { + Self { predicate, object } + } +} + +fn subject_predicates_for_object<'a>( + graph: &'a MemoryGraph, + object: &'a Term, +) -> impl Iterator> { + graph + .triples_for_object(object) + .map(|t| SubjectPredicate::new(t.subject(), t.predicate())) +} + +fn predicate_objects_for_subject<'a>( + graph: &'a MemoryGraph, + subject: &'a NamedOrBlankNode, +) -> impl Iterator> { + graph + .triples_for_subject(subject) + .map(|t| PredicateObject::new(t.predicate(), t.object())) +} + +fn hash_blank_nodes<'a>( + bnodes: HashSet<&'a BlankNode>, + graph: &'a MemoryGraph, +) -> HashMap> { + let mut bnodes_by_hash: HashMap> = HashMap::default(); + + // NB: we need to sort the triples to have the same hash + for bnode in bnodes.into_iter() { + let mut hasher = DefaultHasher::new(); + + { + let subject = NamedOrBlankNode::from(bnode.clone()); + let mut po_set: BTreeSet = BTreeSet::default(); + for po in predicate_objects_for_subject(&graph, &subject) { + if !po.object.is_blank_node() { + po_set.insert(po); + } + } + for po in po_set { + po.hash(&mut hasher); + } + } + + { + let object = Term::from(bnode.clone()); + let mut sp_set: BTreeSet = BTreeSet::default(); + for sp in subject_predicates_for_object(&graph, &object) { + if !sp.subject.is_blank_node() { + sp_set.insert(sp); + } + } + for sp in sp_set { + sp.hash(&mut hasher); + } + } + + bnodes_by_hash + .entry(hasher.finish()) + .or_insert_with(Vec::default) + .push(bnode); + } + + bnodes_by_hash +} + +pub trait GraphIsomorphism { + /// Checks if two graphs are [isomorphic](https://www.w3.org/TR/rdf11-concepts/#dfn-graph-isomorphism) + fn is_isomorphic(&self, other: &Self) -> bool; +} + +impl GraphIsomorphism for MemoryGraph { + //TODO: proper isomorphism building + fn is_isomorphic(&self, other: &Self) -> bool { + if self.len() != other.len() { + return false; + } + + let mut self_bnodes: HashSet<&BlankNode> = HashSet::default(); + let mut other_bnodes: HashSet<&BlankNode> = HashSet::default(); + + for t in self { + if let NamedOrBlankNode::BlankNode(subject) = t.subject() { + self_bnodes.insert(subject); + if let Term::BlankNode(object) = t.object() { + self_bnodes.insert(object); + } + } else if let Term::BlankNode(object) = t.object() { + self_bnodes.insert(object); + } else if !other.contains(t) { + return false; + } + } + for t in other { + if let NamedOrBlankNode::BlankNode(subject) = t.subject() { + other_bnodes.insert(subject); + if let Term::BlankNode(object) = t.object() { + other_bnodes.insert(object); + } + } else if let Term::BlankNode(object) = t.object() { + other_bnodes.insert(object); + } else if !self.contains(t) { + return false; + } + } + + let self_bnodes_by_hash = hash_blank_nodes(self_bnodes, &self); + let other_bnodes_by_hash = hash_blank_nodes(other_bnodes, &other); + + if self_bnodes_by_hash.len() != other_bnodes_by_hash.len() { + return false; + } + + for hash in self_bnodes_by_hash.keys() { + if self_bnodes_by_hash.get(hash).map(|l| l.len()) + != other_bnodes_by_hash.get(hash).map(|l| l.len()) + { + return false; + } + } + + true + } +} diff --git a/src/store/memory.rs b/src/store/memory.rs new file mode 100644 index 00000000..b7a987d2 --- /dev/null +++ b/src/store/memory.rs @@ -0,0 +1,148 @@ +use model::data::*; +use std::collections::HashSet; +use std::fmt; +use std::iter::FromIterator; + +#[derive(Debug, Clone, Default)] +pub struct MemoryGraph { + triples: HashSet, +} + +impl MemoryGraph { + pub fn iter(&self) -> impl Iterator { + self.triples.iter() + } + + pub fn triples_for_subject<'a>( + &'a self, + subject: &'a NamedOrBlankNode, + ) -> impl Iterator { + self.iter().filter(move |t| t.subject() == subject) + } + + pub fn triples_for_predicate<'a>( + &'a self, + predicate: &'a NamedNode, + ) -> impl Iterator { + self.iter().filter(move |t| t.predicate() == predicate) + } + + pub fn triples_for_object<'a>(&'a self, object: &'a Term) -> impl Iterator { + self.iter().filter(move |t| t.object() == object) + } + + pub fn triples_for_subject_predicate<'a>( + &'a self, + subject: &'a NamedOrBlankNode, + predicate: &'a NamedNode, + ) -> impl Iterator { + self.iter() + .filter(move |t| t.subject() == subject && t.predicate() == predicate) + } + + pub fn objects_for_subject_predicate<'a>( + &'a self, + subject: &'a NamedOrBlankNode, + predicate: &'a NamedNode, + ) -> impl Iterator { + self.triples_for_subject_predicate(subject, predicate) + .map(|t| t.object()) + } + + pub fn object_for_subject_predicate<'a>( + &'a self, + subject: &'a NamedOrBlankNode, + predicate: &'a NamedNode, + ) -> Option<&'a Term> { + self.objects_for_subject_predicate(subject, predicate) + .nth(0) + } + + pub fn triples_for_predicate_object<'a>( + &'a self, + predicate: &'a NamedNode, + object: &'a Term, + ) -> impl Iterator { + self.iter() + .filter(move |t| t.predicate() == predicate && t.object() == object) + } + + pub fn subjects_for_predicate_object<'a>( + &'a self, + predicate: &'a NamedNode, + object: &'a Term, + ) -> impl Iterator { + self.triples_for_predicate_object(predicate, object) + .map(|t| t.subject()) + } + + pub fn subject_for_predicate_object<'a>( + &'a self, + predicate: &'a NamedNode, + object: &'a Term, + ) -> Option<&'a NamedOrBlankNode> { + self.subjects_for_predicate_object(predicate, object).nth(0) + } + + pub fn len(&self) -> usize { + self.triples.len() + } + + pub fn is_empty(&self) -> bool { + self.triples.is_empty() + } + + pub fn contains(&self, value: &Triple) -> bool { + self.triples.contains(value) + } + + pub fn insert(&mut self, value: Triple) -> bool { + self.triples.insert(value) + } +} + +impl fmt::Display for MemoryGraph { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + for triple in &self.triples { + write!(fmt, "{}\n", triple)?; + } + Ok(()) + } +} + +impl IntoIterator for MemoryGraph { + type Item = Triple; + type IntoIter = as IntoIterator>::IntoIter; + + fn into_iter(self) -> ::IntoIter { + self.triples.into_iter() + } +} + +impl<'a> IntoIterator for &'a MemoryGraph { + type Item = &'a Triple; + type IntoIter = <&'a HashSet as IntoIterator>::IntoIter; + + fn into_iter(self) -> ::IntoIter { + self.triples.iter() + } +} + +impl FromIterator for MemoryGraph { + fn from_iter>(iter: I) -> Self { + let triples = HashSet::from_iter(iter); + Self { triples } + } +} + +impl Extend for MemoryGraph { + fn extend>(&mut self, iter: I) { + self.triples.extend(iter) + } +} + +impl<'a> Extend<&'a Triple> for MemoryGraph { + fn extend>(&mut self, iter: I) { + self.triples.extend(iter.into_iter().cloned()) + } +} diff --git a/src/store/mod.rs b/src/store/mod.rs new file mode 100644 index 00000000..959f8e6c --- /dev/null +++ b/src/store/mod.rs @@ -0,0 +1,2 @@ +pub mod isomorphism; +pub mod memory; diff --git a/tests/rdf_test_cases.rs b/tests/rdf_test_cases.rs index 4e7a8336..b37b99be 100644 --- a/tests/rdf_test_cases.rs +++ b/tests/rdf_test_cases.rs @@ -9,13 +9,8 @@ use rudf::rio::RioError; use rudf::rio::RioResult; use rudf::rio::ntriples::read_ntriples; use rudf::rio::turtle::read_turtle; -use std::collections::BTreeSet; -use std::collections::HashMap; -use std::collections::HashSet; -use std::collections::hash_map::DefaultHasher; -use std::fmt; -use std::hash::Hash; -use std::hash::Hasher; +use rudf::store::isomorphism::GraphIsomorphism; +use rudf::store::memory::MemoryGraph; use std::iter::FromIterator; use std::str::FromStr; use url::Url; @@ -33,14 +28,14 @@ impl Default for RDFClient { } impl RDFClient { - fn load_turtle(&self, uri: Url) -> RioResult> { + fn load_turtle(&self, uri: Url) -> RioResult { match self.client.get(uri.clone()).send() { - Ok(response) => Ok(HashSet::from_iter(read_turtle(response, Some(uri))?)), + Ok(response) => Ok(MemoryGraph::from_iter(read_turtle(response, Some(uri))?)), Err(error) => Err(RioError::new(error)), } } - fn load_ntriples(&self, uri: Url) -> RioResult> { + fn load_ntriples(&self, uri: Url) -> RioResult { match self.client.get(uri).send() { Ok(response) => read_ntriples(response).collect(), Err(error) => Err(RioError::new(error)), @@ -48,199 +43,6 @@ impl RDFClient { } } -#[derive(Eq, PartialEq, Clone)] -struct Graph(HashSet); - -impl fmt::Display for Graph { - fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - for triple in &self.0 { - write!(fmt, "{}\n", triple)?; - } - Ok(()) - } -} - -#[derive(Eq, PartialEq, Hash, Ord, PartialOrd)] -struct SubjectPredicate<'a> { - subject: &'a NamedOrBlankNode, - predicate: &'a NamedNode, -} - -impl<'a> SubjectPredicate<'a> { - fn new(subject: &'a NamedOrBlankNode, predicate: &'a NamedNode) -> Self { - Self { subject, predicate } - } -} - -#[derive(Eq, PartialEq, Hash, Ord, PartialOrd)] -struct PredicateObject<'a> { - predicate: &'a NamedNode, - object: &'a Term, -} - -impl<'a> PredicateObject<'a> { - fn new(predicate: &'a NamedNode, object: &'a Term) -> Self { - Self { predicate, object } - } -} - -fn objects_for_subject_predicate<'a>( - graph: &'a HashSet, - subject: &'a NamedOrBlankNode, - predicate: &'a NamedNode, -) -> impl Iterator { - graph - .iter() - .filter(move |t| t.subject() == subject && t.predicate() == predicate) - .map(|t| t.object()) -} - -fn object_for_subject_predicate<'a>( - graph: &'a HashSet, - subject: &'a NamedOrBlankNode, - predicate: &'a NamedNode, -) -> Option<&'a Term> { - objects_for_subject_predicate(graph, subject, predicate).nth(0) -} - -fn subjects_for_predicate_object<'a>( - graph: &'a HashSet, - predicate: &'a NamedNode, - object: &'a Term, -) -> impl Iterator { - graph - .iter() - .filter(move |t| t.predicate() == predicate && t.object() == object) - .map(|t| t.subject()) -} - -fn subject_for_predicate_object<'a>( - graph: &'a HashSet, - predicate: &'a NamedNode, - object: &'a Term, -) -> Option<&'a NamedOrBlankNode> { - subjects_for_predicate_object(graph, predicate, object).nth(0) -} - -fn subject_predicates_for_object<'a>( - graph: &'a HashSet, - object: &'a Term, -) -> impl Iterator> { - graph - .iter() - .filter(move |t| t.object() == object) - .map(|t| SubjectPredicate::new(t.subject(), t.predicate())) -} - -fn predicate_objects_for_subject<'a>( - graph: &'a HashSet, - subject: &'a NamedOrBlankNode, -) -> impl Iterator> { - graph - .iter() - .filter(move |t| t.subject() == subject) - .map(|t| PredicateObject::new(t.predicate(), t.object())) -} - -fn hash_blank_nodes<'a>( - bnodes: HashSet<&'a BlankNode>, - graph: &'a HashSet, -) -> HashMap> { - let mut bnodes_by_hash: HashMap> = HashMap::default(); - - // NB: we need to sort the triples to have the same hash - for bnode in bnodes.into_iter() { - let mut hasher = DefaultHasher::new(); - - { - let subject = NamedOrBlankNode::from(bnode.clone()); - let mut po_set: BTreeSet = BTreeSet::default(); - for po in predicate_objects_for_subject(&graph, &subject) { - if !po.object.is_blank_node() { - po_set.insert(po); - } - } - for po in po_set { - po.hash(&mut hasher); - } - } - - { - let object = Term::from(bnode.clone()); - let mut sp_set: BTreeSet = BTreeSet::default(); - for sp in subject_predicates_for_object(&graph, &object) { - if !sp.subject.is_blank_node() { - sp_set.insert(sp); - } - } - for sp in sp_set { - sp.hash(&mut hasher); - } - } - - bnodes_by_hash - .entry(hasher.finish()) - .or_insert_with(Vec::default) - .push(bnode); - } - - bnodes_by_hash -} - -//TODO: use a better datastructure -fn is_isomorphic(a: &HashSet, b: &HashSet) -> bool { - if a.len() != b.len() { - return false; - } - - let mut a_bnodes: HashSet<&BlankNode> = HashSet::default(); - let mut b_bnodes: HashSet<&BlankNode> = HashSet::default(); - - for t in a { - if let NamedOrBlankNode::BlankNode(subject) = t.subject() { - a_bnodes.insert(subject); - if let Term::BlankNode(object) = t.object() { - a_bnodes.insert(object); - } - } else if let Term::BlankNode(object) = t.object() { - a_bnodes.insert(object); - } else if !b.contains(t) { - return false; - } - } - for t in b { - if let NamedOrBlankNode::BlankNode(subject) = t.subject() { - b_bnodes.insert(subject); - if let Term::BlankNode(object) = t.object() { - b_bnodes.insert(object); - } - } else if let Term::BlankNode(object) = t.object() { - b_bnodes.insert(object); - } else if !a.contains(t) { - return false; - } - } - - let a_bnodes_by_hash = hash_blank_nodes(a_bnodes, &a); - let b_bnodes_by_hash = hash_blank_nodes(b_bnodes, &b); - - if a_bnodes_by_hash.len() != b_bnodes_by_hash.len() { - return false; - } - - for hash in a_bnodes_by_hash.keys() { - if a_bnodes_by_hash.get(hash).map(|l| l.len()) - != b_bnodes_by_hash.get(hash).map(|l| l.len()) - { - return false; - } - } - - //TODO: proper isomorphism building - - true -} - #[test] fn turtle_w3c_testsuite() { let manifest_url = Url::parse("http://www.w3.org/2013/TurtleTests/manifest.ttl").unwrap(); @@ -289,11 +91,14 @@ fn turtle_w3c_testsuite() { ).into(), ]; - subjects_for_predicate_object(&manifest, &rdf::TYPE, &rdft_test_turtle_positive_syntax) + manifest + .subjects_for_predicate_object(&rdf::TYPE, &rdft_test_turtle_positive_syntax) .for_each(|test| { - let comment = object_for_subject_predicate(&manifest, test, &rdfs_comment).unwrap(); + let comment = manifest + .object_for_subject_predicate(test, &rdfs_comment) + .unwrap(); if let Some(Term::NamedNode(file)) = - object_for_subject_predicate(&manifest, test, &mf_action) + manifest.object_for_subject_predicate(test, &mf_action) { if let Err(error) = client.load_turtle(file.url().clone()) { assert!( @@ -304,11 +109,14 @@ fn turtle_w3c_testsuite() { } } }); - subjects_for_predicate_object(&manifest, &rdf::TYPE, &rdft_test_turtle_negative_syntax) + manifest + .subjects_for_predicate_object(&rdf::TYPE, &rdft_test_turtle_negative_syntax) .for_each(|test| { - let comment = object_for_subject_predicate(&manifest, test, &rdfs_comment).unwrap(); + let comment = manifest + .object_for_subject_predicate(test, &rdfs_comment) + .unwrap(); if let Some(Term::NamedNode(file)) = - object_for_subject_predicate(&manifest, test, &mf_action) + manifest.object_for_subject_predicate(test, &mf_action) { assert!( client.load_turtle(file.url().clone()).is_err(), @@ -318,27 +126,31 @@ fn turtle_w3c_testsuite() { ); } }); - subjects_for_predicate_object(&manifest, &rdf::TYPE, &rdft_test_turtle_eval).for_each(|test| { - if test_blacklist.contains(test) { - return; - } - let comment = object_for_subject_predicate(&manifest, test, &rdfs_comment).unwrap(); - if let Some(Term::NamedNode(input)) = - object_for_subject_predicate(&manifest, test, &mf_action) - { - if let Some(Term::NamedNode(result)) = - object_for_subject_predicate(&manifest, test, &mf_result) + manifest + .subjects_for_predicate_object(&rdf::TYPE, &rdft_test_turtle_eval) + .for_each(|test| { + if test_blacklist.contains(test) { + return; + } + let comment = manifest + .object_for_subject_predicate(test, &rdfs_comment) + .unwrap(); + if let Some(Term::NamedNode(input)) = + manifest.object_for_subject_predicate(test, &mf_action) { - match client.load_turtle(input.url().clone()) { + if let Some(Term::NamedNode(result)) = + manifest.object_for_subject_predicate(test, &mf_result) + { + match client.load_turtle(input.url().clone()) { Ok(action_graph) => match client.load_turtle(result.url().clone()) { Ok(result_graph) => assert!( - is_isomorphic(&action_graph, &result_graph), + action_graph.is_isomorphic(&result_graph), "Failure on positive evaluation test file {} against {} about {}. Expected file:\n{}\nParsed file:\n{}\n", input, result, comment, - Graph(action_graph), - Graph(result_graph) + action_graph, + result_graph ), Err(error) => assert!( false, @@ -352,30 +164,32 @@ fn turtle_w3c_testsuite() { input, comment, error ) } + } } - } - }); - subjects_for_predicate_object(&manifest, &rdf::TYPE, &rdft_test_turtle_negative_eval).for_each( - |test| { - let comment = object_for_subject_predicate(&manifest, test, &rdfs_comment).unwrap(); + }); + manifest + .subjects_for_predicate_object(&rdf::TYPE, &rdft_test_turtle_negative_eval) + .for_each(|test| { + let comment = manifest + .object_for_subject_predicate(test, &rdfs_comment) + .unwrap(); if let Some(Term::NamedNode(file)) = - object_for_subject_predicate(&manifest, test, &mf_action) + manifest.object_for_subject_predicate(test, &mf_action) { if let Some(Term::NamedNode(result)) = - object_for_subject_predicate(&manifest, test, &mf_result) + manifest.object_for_subject_predicate(test, &mf_result) { let action_graph = client.load_turtle(file.url().clone()); let result_graph = client.load_turtle(result.url().clone()); assert!( - !is_isomorphic(&action_graph.unwrap(), &result_graph.unwrap()), + !action_graph.unwrap().is_isomorphic(&result_graph.unwrap()), "Failure on positive evaluation test file {} about {}", file, comment ); } } - }, - ); + }); } #[test] @@ -395,11 +209,14 @@ fn ntriples_w3c_testsuite() { NamedNode::from_str("http://www.w3.org/ns/rdftest#TestNTriplesNegativeSyntax").unwrap(), ); - subjects_for_predicate_object(&manifest, &rdf::TYPE, &rdft_test_ntriples_positive_syntax) + manifest + .subjects_for_predicate_object(&rdf::TYPE, &rdft_test_ntriples_positive_syntax) .for_each(|test| { - let comment = object_for_subject_predicate(&manifest, test, &rdfs_comment).unwrap(); + let comment = manifest + .object_for_subject_predicate(test, &rdfs_comment) + .unwrap(); if let Some(Term::NamedNode(file)) = - object_for_subject_predicate(&manifest, test, &mf_action) + manifest.object_for_subject_predicate(test, &mf_action) { if let Err(error) = client.load_ntriples(file.url().clone()) { assert!( @@ -410,11 +227,14 @@ fn ntriples_w3c_testsuite() { } } }); - subjects_for_predicate_object(&manifest, &rdf::TYPE, &rdft_test_ntriples_negative_syntax) + manifest + .subjects_for_predicate_object(&rdf::TYPE, &rdft_test_ntriples_negative_syntax) .for_each(|test| { - let comment = object_for_subject_predicate(&manifest, test, &rdfs_comment).unwrap(); + let comment = manifest + .object_for_subject_predicate(test, &rdfs_comment) + .unwrap(); if let Some(Term::NamedNode(file)) = - object_for_subject_predicate(&manifest, test, &mf_action) + manifest.object_for_subject_predicate(test, &mf_action) { assert!( client.load_ntriples(file.url().clone()).is_err(),