parent
4dee8a9aa2
commit
ce1c198552
@ -0,0 +1,156 @@ |
||||
use crate::model::isomorphism::are_graphs_isomorphic; |
||||
use crate::model::*; |
||||
use std::collections::HashSet; |
||||
use std::fmt; |
||||
use std::iter::FromIterator; |
||||
|
||||
/// Simple data structure [RDF graphs](https://www.w3.org/TR/rdf11-concepts/#dfn-graph).
|
||||
///
|
||||
/// It is not done to hold big graphs.
|
||||
///
|
||||
/// Usage example:
|
||||
/// ```
|
||||
/// use rudf::model::*;
|
||||
/// use rudf::model::SimpleGraph;
|
||||
/// use std::str::FromStr;
|
||||
///
|
||||
/// let mut graph = SimpleGraph::default();
|
||||
/// let ex = NamedNode::from_str("http://example.com").unwrap();
|
||||
/// let triple = Triple::new(ex.clone(), ex.clone(), ex.clone());
|
||||
/// graph.insert(triple.clone());
|
||||
/// let results: Vec<Triple> = graph.triples_for_subject(&ex.into()).cloned().collect();
|
||||
/// assert_eq!(vec![triple], results);
|
||||
/// ```
|
||||
#[derive(Eq, PartialEq, Debug, Clone, Default)] |
||||
pub struct SimpleGraph { |
||||
triples: HashSet<Triple>, |
||||
} |
||||
|
||||
impl SimpleGraph { |
||||
/// Returns all triples contained by the graph
|
||||
pub fn iter(&self) -> impl Iterator<Item = &Triple> { |
||||
self.triples.iter() |
||||
} |
||||
|
||||
pub fn triples_for_subject<'a>( |
||||
&'a self, |
||||
subject: &'a NamedOrBlankNode, |
||||
) -> impl Iterator<Item = &Triple> + 'a { |
||||
self.iter().filter(move |t| t.subject() == subject) |
||||
} |
||||
|
||||
pub fn objects_for_subject_predicate<'a>( |
||||
&'a self, |
||||
subject: &'a NamedOrBlankNode, |
||||
predicate: &'a NamedNode, |
||||
) -> impl Iterator<Item = &Term> + 'a { |
||||
self.iter() |
||||
.filter(move |t| t.subject() == subject && t.predicate() == predicate) |
||||
.map(|t| t.object()) |
||||
} |
||||
|
||||
pub fn object_for_subject_predicate<'a>( |
||||
&'a self, |
||||
subject: &'a NamedOrBlankNode, |
||||
predicate: &'a NamedNode, |
||||
) -> Option<&'a Term> { |
||||
self.objects_for_subject_predicate(subject, predicate) |
||||
.next() |
||||
} |
||||
|
||||
pub fn predicates_for_subject_object<'a>( |
||||
&'a self, |
||||
subject: &'a NamedOrBlankNode, |
||||
object: &'a Term, |
||||
) -> impl Iterator<Item = &NamedNode> + 'a { |
||||
self.iter() |
||||
.filter(move |t| t.subject() == subject && t.object() == object) |
||||
.map(|t| t.predicate()) |
||||
} |
||||
|
||||
pub fn triples_for_predicate<'a>( |
||||
&'a self, |
||||
predicate: &'a NamedNode, |
||||
) -> impl Iterator<Item = &Triple> + 'a { |
||||
self.iter().filter(move |t| t.predicate() == predicate) |
||||
} |
||||
|
||||
pub fn subjects_for_predicate_object<'a>( |
||||
&'a self, |
||||
predicate: &'a NamedNode, |
||||
object: &'a Term, |
||||
) -> impl Iterator<Item = &NamedOrBlankNode> + 'a { |
||||
self.iter() |
||||
.filter(move |t| t.predicate() == predicate && t.object() == object) |
||||
.map(|t| t.subject()) |
||||
} |
||||
|
||||
pub fn triples_for_object<'a>( |
||||
&'a self, |
||||
object: &'a Term, |
||||
) -> impl Iterator<Item = &Triple> + 'a { |
||||
self.iter().filter(move |t| t.object() == object) |
||||
} |
||||
|
||||
/// Checks if the graph contains the given triple
|
||||
pub fn contains(&self, triple: &Triple) -> bool { |
||||
self.triples.contains(triple) |
||||
} |
||||
|
||||
/// Adds a triple to the graph
|
||||
pub fn insert(&mut self, triple: Triple) -> bool { |
||||
self.triples.insert(triple) |
||||
} |
||||
|
||||
/// Removes a concrete triple from the graph
|
||||
pub fn remove(&mut self, triple: &Triple) -> bool { |
||||
self.triples.remove(triple) |
||||
} |
||||
|
||||
/// Returns the number of triples in this graph
|
||||
pub fn len(&self) -> usize { |
||||
self.triples.len() |
||||
} |
||||
|
||||
/// Checks if this graph contains a triple
|
||||
pub fn is_empty(&self) -> bool { |
||||
self.triples.is_empty() |
||||
} |
||||
|
||||
/// Checks if the current graph is [isomorphic](https://www.w3.org/TR/rdf11-concepts/#dfn-graph-isomorphism) with an other one
|
||||
pub fn is_isomorphic(&self, other: &SimpleGraph) -> bool { |
||||
are_graphs_isomorphic(self, other) |
||||
} |
||||
} |
||||
|
||||
impl IntoIterator for SimpleGraph { |
||||
type Item = Triple; |
||||
type IntoIter = <HashSet<Triple> as IntoIterator>::IntoIter; |
||||
|
||||
fn into_iter(self) -> Self::IntoIter { |
||||
self.triples.into_iter() |
||||
} |
||||
} |
||||
|
||||
impl FromIterator<Triple> for SimpleGraph { |
||||
fn from_iter<I: IntoIterator<Item = Triple>>(iter: I) -> Self { |
||||
Self { |
||||
triples: HashSet::from_iter(iter), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl Extend<Triple> for SimpleGraph { |
||||
fn extend<I: IntoIterator<Item = Triple>>(&mut self, iter: I) { |
||||
self.triples.extend(iter) |
||||
} |
||||
} |
||||
|
||||
impl fmt::Display for SimpleGraph { |
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
||||
for t in &self.triples { |
||||
writeln!(f, "{}", t)?; |
||||
} |
||||
Ok(()) |
||||
} |
||||
} |
@ -0,0 +1,223 @@ |
||||
use crate::model::*; |
||||
use permutohedron::LexicalPermutation; |
||||
use std::collections::hash_map::DefaultHasher; |
||||
use std::collections::HashSet; |
||||
use std::collections::{BTreeSet, HashMap}; |
||||
use std::hash::Hash; |
||||
use std::hash::Hasher; |
||||
|
||||
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] |
||||
struct SubjectPredicate<'a> { |
||||
subject: &'a NamedOrBlankNode, |
||||
predicate: &'a NamedNode, |
||||
} |
||||
|
||||
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] |
||||
struct PredicateObject<'a> { |
||||
predicate: &'a NamedNode, |
||||
object: &'a Term, |
||||
} |
||||
|
||||
fn subject_predicates_for_object<'a>( |
||||
graph: &'a SimpleGraph, |
||||
object: &'a Term, |
||||
) -> impl Iterator<Item = SubjectPredicate<'a>> + 'a { |
||||
graph.triples_for_object(object).map(|t| SubjectPredicate { |
||||
subject: t.subject(), |
||||
predicate: t.predicate(), |
||||
}) |
||||
} |
||||
|
||||
fn predicate_objects_for_subject<'a>( |
||||
graph: &'a SimpleGraph, |
||||
subject: &'a NamedOrBlankNode, |
||||
) -> impl Iterator<Item = PredicateObject<'a>> + 'a { |
||||
graph.triples_for_subject(subject).map(|t| PredicateObject { |
||||
predicate: t.predicate(), |
||||
object: t.object(), |
||||
}) |
||||
} |
||||
|
||||
fn hash_blank_nodes<'a>( |
||||
bnodes: HashSet<&'a BlankNode>, |
||||
graph: &'a SimpleGraph, |
||||
) -> HashMap<u64, Vec<&'a BlankNode>> { |
||||
let mut bnodes_by_hash = HashMap::default(); |
||||
|
||||
// NB: we need to sort the triples to have the same hash
|
||||
for bnode in bnodes { |
||||
let mut hasher = DefaultHasher::new(); |
||||
|
||||
{ |
||||
let subject = NamedOrBlankNode::from(bnode.clone()); |
||||
let mut po_set: BTreeSet<PredicateObject> = BTreeSet::default(); |
||||
for po in predicate_objects_for_subject(graph, &subject) { |
||||
match &po.object { |
||||
Term::BlankNode(_) => (), |
||||
_ => { |
||||
po_set.insert(po); |
||||
} |
||||
} |
||||
} |
||||
for po in po_set { |
||||
po.hash(&mut hasher); |
||||
} |
||||
} |
||||
|
||||
{ |
||||
let object = Term::from(bnode.clone()); |
||||
let mut sp_set: BTreeSet<SubjectPredicate> = BTreeSet::default(); |
||||
for sp in subject_predicates_for_object(graph, &object) { |
||||
match &sp.subject { |
||||
NamedOrBlankNode::BlankNode(_) => (), |
||||
_ => { |
||||
sp_set.insert(sp); |
||||
} |
||||
} |
||||
} |
||||
for sp in sp_set { |
||||
sp.hash(&mut hasher); |
||||
} |
||||
} |
||||
|
||||
bnodes_by_hash |
||||
.entry(hasher.finish()) |
||||
.or_insert_with(Vec::default) |
||||
.push(bnode); |
||||
} |
||||
bnodes_by_hash |
||||
} |
||||
|
||||
fn build_and_check_containment_from_hashes<'a>( |
||||
hashes_to_see: &mut Vec<&u64>, |
||||
a_bnodes_by_hash: &'a HashMap<u64, Vec<&'a BlankNode>>, |
||||
b_bnodes_by_hash: &'a HashMap<u64, Vec<&'a BlankNode>>, |
||||
a_to_b_mapping: &mut HashMap<&'a BlankNode, &'a BlankNode>, |
||||
a: &SimpleGraph, |
||||
b: &SimpleGraph, |
||||
) -> bool { |
||||
let hash = match hashes_to_see.pop() { |
||||
Some(h) => h, |
||||
None => return check_is_contained(a_to_b_mapping, a, b), |
||||
}; |
||||
|
||||
let a_nodes = a_bnodes_by_hash |
||||
.get(hash) |
||||
.map_or(&[] as &[&BlankNode], |v| v.as_slice()); |
||||
let b_nodes = b_bnodes_by_hash |
||||
.get(hash) |
||||
.map_or(&[] as &[&BlankNode], |v| v.as_slice()); |
||||
if a_nodes.len() != b_nodes.len() { |
||||
return false; |
||||
} |
||||
if a_nodes.len() == 1 { |
||||
// Avoid allocation for len == 1
|
||||
a_to_b_mapping.insert(a_nodes[0], b_nodes[0]); |
||||
let result = build_and_check_containment_from_hashes( |
||||
hashes_to_see, |
||||
a_bnodes_by_hash, |
||||
b_bnodes_by_hash, |
||||
a_to_b_mapping, |
||||
a, |
||||
b, |
||||
); |
||||
a_to_b_mapping.remove(a_nodes[0]); |
||||
hashes_to_see.push(hash); |
||||
result |
||||
} else { |
||||
// We compute all the rotations of a_nodes and then zip it with b_nodes to have all the possible pairs (a,b)
|
||||
let mut a_nodes_rotated = a_nodes.to_vec(); |
||||
a_nodes_rotated.sort(); |
||||
loop { |
||||
for (a_node, b_node) in a_nodes_rotated.iter().zip(b_nodes.iter()) { |
||||
a_to_b_mapping.insert(a_node, b_node); |
||||
} |
||||
let result = if build_and_check_containment_from_hashes( |
||||
hashes_to_see, |
||||
a_bnodes_by_hash, |
||||
b_bnodes_by_hash, |
||||
a_to_b_mapping, |
||||
a, |
||||
b, |
||||
) { |
||||
Some(true) |
||||
} else if a_nodes_rotated.next_permutation() { |
||||
None //keep going
|
||||
} else { |
||||
Some(false) // No more permutation
|
||||
}; |
||||
|
||||
if let Some(result) = result { |
||||
for a_node in &a_nodes_rotated { |
||||
a_to_b_mapping.remove(a_node); |
||||
} |
||||
hashes_to_see.push(hash); |
||||
return result; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
fn check_is_contained<'a>( |
||||
a_to_b_mapping: &mut HashMap<&'a BlankNode, &'a BlankNode>, |
||||
a: &SimpleGraph, |
||||
b: &SimpleGraph, |
||||
) -> bool { |
||||
for t_a in a.iter() { |
||||
let subject = if let NamedOrBlankNode::BlankNode(s_a) = &t_a.subject() { |
||||
a_to_b_mapping[s_a].clone().into() |
||||
} else { |
||||
t_a.subject().clone() |
||||
}; |
||||
let predicate = t_a.predicate().clone(); |
||||
let object = if let Term::BlankNode(o_a) = &t_a.object() { |
||||
a_to_b_mapping[o_a].clone().into() |
||||
} else { |
||||
t_a.object().clone() |
||||
}; |
||||
if !b.contains(&Triple::new(subject, predicate, object)) { |
||||
return false; |
||||
} |
||||
} |
||||
|
||||
true |
||||
} |
||||
|
||||
fn graph_blank_nodes(graph: &SimpleGraph) -> HashSet<&BlankNode> { |
||||
let mut blank_nodes = HashSet::default(); |
||||
for t in graph.iter() { |
||||
if let NamedOrBlankNode::BlankNode(subject) = t.subject() { |
||||
blank_nodes.insert(subject); |
||||
} |
||||
if let Term::BlankNode(object) = &t.object() { |
||||
blank_nodes.insert(object); |
||||
} |
||||
} |
||||
blank_nodes |
||||
} |
||||
|
||||
pub fn are_graphs_isomorphic(a: &SimpleGraph, b: &SimpleGraph) -> bool { |
||||
if a.len() != b.len() { |
||||
return false; |
||||
} |
||||
|
||||
let a_bnodes = graph_blank_nodes(a); |
||||
let a_bnodes_by_hash = hash_blank_nodes(a_bnodes, a); |
||||
|
||||
let b_bnodes = graph_blank_nodes(b); |
||||
let b_bnodes_by_hash = hash_blank_nodes(b_bnodes, b); |
||||
|
||||
// Hashes should have the same size everywhere
|
||||
if a_bnodes_by_hash.len() != b_bnodes_by_hash.len() { |
||||
return false; |
||||
} |
||||
|
||||
build_and_check_containment_from_hashes( |
||||
&mut a_bnodes_by_hash.keys().collect(), |
||||
&a_bnodes_by_hash, |
||||
&b_bnodes_by_hash, |
||||
&mut HashMap::default(), |
||||
a, |
||||
b, |
||||
) |
||||
} |
@ -1,158 +0,0 @@ |
||||
use crate::model::*; |
||||
use crate::Result; |
||||
use std::collections::hash_map::DefaultHasher; |
||||
use std::collections::BTreeSet; |
||||
use std::collections::HashMap; |
||||
use std::collections::HashSet; |
||||
use std::hash::Hash; |
||||
use std::hash::Hasher; |
||||
|
||||
#[derive(Eq, PartialEq, Hash, Ord, PartialOrd)] |
||||
struct SubjectPredicate { |
||||
subject: NamedOrBlankNode, |
||||
predicate: NamedNode, |
||||
} |
||||
|
||||
impl SubjectPredicate { |
||||
fn new(subject: NamedOrBlankNode, predicate: NamedNode) -> Self { |
||||
Self { subject, predicate } |
||||
} |
||||
} |
||||
|
||||
#[derive(Eq, PartialEq, Hash, Ord, PartialOrd)] |
||||
struct PredicateObject { |
||||
predicate: NamedNode, |
||||
object: Term, |
||||
} |
||||
|
||||
impl PredicateObject { |
||||
fn new(predicate: NamedNode, object: Term) -> Self { |
||||
Self { predicate, object } |
||||
} |
||||
} |
||||
|
||||
fn subject_predicates_for_object( |
||||
graph: &impl Graph, |
||||
object: &Term, |
||||
) -> Result<impl Iterator<Item = Result<SubjectPredicate>>> { |
||||
Ok(graph |
||||
.triples_for_object(object)? |
||||
.map(|t| t.map(|t| SubjectPredicate::new(t.subject().clone(), t.predicate_owned())))) |
||||
} |
||||
|
||||
fn predicate_objects_for_subject( |
||||
graph: &impl Graph, |
||||
subject: &NamedOrBlankNode, |
||||
) -> Result<impl Iterator<Item = Result<PredicateObject>>> { |
||||
Ok(graph |
||||
.triples_for_subject(subject)? |
||||
.map(|t| t.map(|t| PredicateObject::new(t.predicate().clone(), t.object_owned())))) |
||||
} |
||||
|
||||
fn hash_blank_nodes( |
||||
bnodes: HashSet<BlankNode>, |
||||
graph: &impl Graph, |
||||
) -> Result<HashMap<u64, Vec<BlankNode>>> { |
||||
let mut bnodes_by_hash: HashMap<u64, Vec<BlankNode>> = HashMap::default(); |
||||
|
||||
// NB: we need to sort the triples to have the same hash
|
||||
for bnode in bnodes { |
||||
let mut hasher = DefaultHasher::new(); |
||||
|
||||
{ |
||||
let subject = NamedOrBlankNode::from(bnode.clone()); |
||||
let mut po_set: BTreeSet<PredicateObject> = BTreeSet::default(); |
||||
for po in predicate_objects_for_subject(graph, &subject)? { |
||||
let po = po?; |
||||
if !po.object.is_blank_node() { |
||||
po_set.insert(po); |
||||
} |
||||
} |
||||
for po in po_set { |
||||
po.hash(&mut hasher); |
||||
} |
||||
} |
||||
|
||||
{ |
||||
let object = Term::from(bnode.clone()); |
||||
let mut sp_set: BTreeSet<SubjectPredicate> = BTreeSet::default(); |
||||
for sp in subject_predicates_for_object(graph, &object)? { |
||||
let sp = sp?; |
||||
if !sp.subject.is_blank_node() { |
||||
sp_set.insert(sp); |
||||
} |
||||
} |
||||
for sp in sp_set { |
||||
sp.hash(&mut hasher); |
||||
} |
||||
} |
||||
|
||||
bnodes_by_hash |
||||
.entry(hasher.finish()) |
||||
.or_insert_with(Vec::default) |
||||
.push(bnode); |
||||
} |
||||
|
||||
Ok(bnodes_by_hash) |
||||
} |
||||
|
||||
pub trait GraphIsomorphism { |
||||
/// Checks if two graphs are [isomorphic](https://www.w3.org/TR/rdf11-concepts/#dfn-graph-isomorphism)
|
||||
fn is_isomorphic(&self, other: &Self) -> Result<bool>; |
||||
} |
||||
|
||||
impl<G: Graph> GraphIsomorphism for G { |
||||
//TODO: proper isomorphism building
|
||||
fn is_isomorphic(&self, other: &Self) -> Result<bool> { |
||||
if self.len()? != other.len()? { |
||||
return Ok(false); |
||||
} |
||||
|
||||
let mut self_bnodes: HashSet<BlankNode> = HashSet::default(); |
||||
let mut other_bnodes: HashSet<BlankNode> = HashSet::default(); |
||||
|
||||
for t in self.iter()? { |
||||
let t = t?; |
||||
if let NamedOrBlankNode::BlankNode(subject) = t.subject() { |
||||
self_bnodes.insert(subject.clone()); |
||||
if let Term::BlankNode(object) = t.object() { |
||||
self_bnodes.insert(object.clone()); |
||||
} |
||||
} else if let Term::BlankNode(object) = t.object() { |
||||
self_bnodes.insert(object.clone()); |
||||
} else if !other.contains(&t)? { |
||||
return Ok(false); |
||||
} |
||||
} |
||||
for t in other.iter()? { |
||||
let t = t?; |
||||
if let NamedOrBlankNode::BlankNode(subject) = t.subject() { |
||||
other_bnodes.insert(subject.clone()); |
||||
if let Term::BlankNode(object) = t.object() { |
||||
other_bnodes.insert(object.clone()); |
||||
} |
||||
} else if let Term::BlankNode(object) = t.object() { |
||||
other_bnodes.insert(object.clone()); |
||||
} else if !self.contains(&t)? { |
||||
return Ok(false); |
||||
} |
||||
} |
||||
|
||||
let self_bnodes_by_hash = hash_blank_nodes(self_bnodes, self)?; |
||||
let other_bnodes_by_hash = hash_blank_nodes(other_bnodes, other)?; |
||||
|
||||
if self_bnodes_by_hash.len() != other_bnodes_by_hash.len() { |
||||
return Ok(false); |
||||
} |
||||
|
||||
for hash in self_bnodes_by_hash.keys() { |
||||
if self_bnodes_by_hash.get(hash).map(|l| l.len()) |
||||
!= other_bnodes_by_hash.get(hash).map(|l| l.len()) |
||||
{ |
||||
return Ok(false); |
||||
} |
||||
} |
||||
|
||||
Ok(true) |
||||
} |
||||
} |
Loading…
Reference in new issue