parent
5039da163b
commit
69f94777b6
@ -1,178 +0,0 @@ |
||||
use crate::model::isomorphism::are_graphs_isomorphic; |
||||
use crate::model::*; |
||||
use std::collections::HashSet; |
||||
use std::fmt; |
||||
use std::iter::FromIterator; |
||||
|
||||
/// A simple implementation of [RDF graphs](https://www.w3.org/TR/rdf11-concepts/#dfn-graph).
|
||||
///
|
||||
/// It is not done to hold big graphs.
|
||||
///
|
||||
/// Usage example:
|
||||
/// ```
|
||||
/// use oxigraph::model::*;
|
||||
/// use oxigraph::model::SimpleGraph;
|
||||
///
|
||||
/// let mut graph = SimpleGraph::default();
|
||||
/// let ex = NamedNode::parse("http://example.com").unwrap();
|
||||
/// let triple = Triple::new(ex.clone(), ex.clone(), ex.clone());
|
||||
/// graph.insert(triple.clone());
|
||||
/// let results: Vec<Triple> = graph.triples_for_subject(&ex.into()).cloned().collect();
|
||||
/// assert_eq!(vec![triple], results);
|
||||
/// ```
|
||||
#[derive(Eq, PartialEq, Debug, Clone, Default)] |
||||
pub struct SimpleGraph { |
||||
triples: HashSet<Triple>, |
||||
} |
||||
|
||||
impl SimpleGraph { |
||||
/// Returns all triples contained by the graph
|
||||
pub fn iter(&self) -> impl Iterator<Item = &Triple> { |
||||
self.triples.iter() |
||||
} |
||||
|
||||
pub fn triples_for_subject<'a>( |
||||
&'a self, |
||||
subject: &'a NamedOrBlankNode, |
||||
) -> impl Iterator<Item = &Triple> + 'a { |
||||
self.iter().filter(move |t| t.subject() == subject) |
||||
} |
||||
|
||||
pub fn objects_for_subject_predicate<'a>( |
||||
&'a self, |
||||
subject: &'a NamedOrBlankNode, |
||||
predicate: &'a NamedNode, |
||||
) -> impl Iterator<Item = &Term> + 'a { |
||||
self.iter().filter_map(move |t| { |
||||
if t.subject() == subject && t.predicate() == predicate { |
||||
Some(t.object()) |
||||
} else { |
||||
None |
||||
} |
||||
}) |
||||
} |
||||
|
||||
pub fn object_for_subject_predicate<'a>( |
||||
&'a self, |
||||
subject: &'a NamedOrBlankNode, |
||||
predicate: &'a NamedNode, |
||||
) -> Option<&'a Term> { |
||||
self.objects_for_subject_predicate(subject, predicate) |
||||
.next() |
||||
} |
||||
|
||||
pub fn predicates_for_subject_object<'a>( |
||||
&'a self, |
||||
subject: &'a NamedOrBlankNode, |
||||
object: &'a Term, |
||||
) -> impl Iterator<Item = &NamedNode> + 'a { |
||||
self.iter().filter_map(move |t| { |
||||
if t.subject() == subject && t.object() == object { |
||||
Some(t.predicate()) |
||||
} else { |
||||
None |
||||
} |
||||
}) |
||||
} |
||||
|
||||
pub fn triples_for_predicate<'a>( |
||||
&'a self, |
||||
predicate: &'a NamedNode, |
||||
) -> impl Iterator<Item = &Triple> + 'a { |
||||
self.iter().filter(move |t| t.predicate() == predicate) |
||||
} |
||||
|
||||
pub fn subjects_for_predicate_object<'a>( |
||||
&'a self, |
||||
predicate: &'a NamedNode, |
||||
object: &'a Term, |
||||
) -> impl Iterator<Item = &NamedOrBlankNode> + 'a { |
||||
self.iter().filter_map(move |t| { |
||||
if t.predicate() == predicate && t.object() == object { |
||||
Some(t.subject()) |
||||
} else { |
||||
None |
||||
} |
||||
}) |
||||
} |
||||
|
||||
pub fn triples_for_object<'a>( |
||||
&'a self, |
||||
object: &'a Term, |
||||
) -> impl Iterator<Item = &Triple> + 'a { |
||||
self.iter().filter(move |t| t.object() == object) |
||||
} |
||||
|
||||
/// Checks if the graph contains the given triple
|
||||
pub fn contains(&self, triple: &Triple) -> bool { |
||||
self.triples.contains(triple) |
||||
} |
||||
|
||||
/// Adds a triple to the graph
|
||||
pub fn insert(&mut self, triple: Triple) -> bool { |
||||
self.triples.insert(triple) |
||||
} |
||||
|
||||
/// Removes a concrete triple from the graph
|
||||
pub fn remove(&mut self, triple: &Triple) -> bool { |
||||
self.triples.remove(triple) |
||||
} |
||||
|
||||
/// Returns the number of triples in this graph
|
||||
pub fn len(&self) -> usize { |
||||
self.triples.len() |
||||
} |
||||
|
||||
/// Checks if this graph contains a triple
|
||||
pub fn is_empty(&self) -> bool { |
||||
self.triples.is_empty() |
||||
} |
||||
|
||||
/// Checks if the current graph is [isomorphic](https://www.w3.org/TR/rdf11-concepts/#dfn-graph-isomorphism) with another one
|
||||
///
|
||||
/// Warning: This algorithm worst case complexity is in O(n!)
|
||||
pub fn is_isomorphic(&self, other: &SimpleGraph) -> bool { |
||||
are_graphs_isomorphic(self, other) |
||||
} |
||||
} |
||||
|
||||
impl IntoIterator for SimpleGraph { |
||||
type Item = Triple; |
||||
type IntoIter = <HashSet<Triple> as IntoIterator>::IntoIter; |
||||
|
||||
fn into_iter(self) -> Self::IntoIter { |
||||
self.triples.into_iter() |
||||
} |
||||
} |
||||
|
||||
impl<'a> IntoIterator for &'a SimpleGraph { |
||||
type Item = &'a Triple; |
||||
type IntoIter = <&'a HashSet<Triple> as IntoIterator>::IntoIter; |
||||
|
||||
fn into_iter(self) -> Self::IntoIter { |
||||
self.triples.iter() |
||||
} |
||||
} |
||||
|
||||
impl FromIterator<Triple> for SimpleGraph { |
||||
fn from_iter<I: IntoIterator<Item = Triple>>(iter: I) -> Self { |
||||
Self { |
||||
triples: HashSet::from_iter(iter), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl Extend<Triple> for SimpleGraph { |
||||
fn extend<I: IntoIterator<Item = Triple>>(&mut self, iter: I) { |
||||
self.triples.extend(iter) |
||||
} |
||||
} |
||||
|
||||
impl fmt::Display for SimpleGraph { |
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
||||
for t in &self.triples { |
||||
writeln!(f, "{}", t)?; |
||||
} |
||||
Ok(()) |
||||
} |
||||
} |
@ -1,289 +0,0 @@ |
||||
use crate::model::*; |
||||
use std::collections::hash_map::{DefaultHasher, RandomState}; |
||||
use std::collections::{BTreeSet, HashMap, HashSet}; |
||||
use std::hash::Hash; |
||||
use std::hash::Hasher; |
||||
|
||||
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] |
||||
struct SubjectPredicate<'a> { |
||||
subject: &'a NamedOrBlankNode, |
||||
predicate: &'a NamedNode, |
||||
} |
||||
|
||||
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] |
||||
struct PredicateObject<'a> { |
||||
predicate: &'a NamedNode, |
||||
object: &'a Term, |
||||
} |
||||
|
||||
fn subject_predicates_for_object<'a>( |
||||
graph: &'a SimpleGraph, |
||||
object: &'a Term, |
||||
) -> impl Iterator<Item = SubjectPredicate<'a>> + 'a { |
||||
graph.triples_for_object(object).map(|t| SubjectPredicate { |
||||
subject: t.subject(), |
||||
predicate: t.predicate(), |
||||
}) |
||||
} |
||||
|
||||
fn predicate_objects_for_subject<'a>( |
||||
graph: &'a SimpleGraph, |
||||
subject: &'a NamedOrBlankNode, |
||||
) -> impl Iterator<Item = PredicateObject<'a>> + 'a { |
||||
graph.triples_for_subject(subject).map(|t| PredicateObject { |
||||
predicate: t.predicate(), |
||||
object: t.object(), |
||||
}) |
||||
} |
||||
|
||||
fn split_hash_buckets<'a>( |
||||
bnodes_by_hash: HashMap<u64, Vec<&'a BlankNode>>, |
||||
graph: &'a SimpleGraph, |
||||
distance: usize, |
||||
) -> HashMap<u64, Vec<&'a BlankNode>> { |
||||
let mut new_bnodes_by_hash = HashMap::default(); |
||||
|
||||
for (hash, bnodes) in bnodes_by_hash { |
||||
if bnodes.len() == 1 { |
||||
new_bnodes_by_hash.insert(hash, bnodes); // Nothing to improve
|
||||
} else { |
||||
for bnode in bnodes { |
||||
let mut starts = vec![NamedOrBlankNode::from(*bnode)]; |
||||
for _ in 0..distance { |
||||
let mut new_starts = Vec::default(); |
||||
for s in starts { |
||||
for t in graph.triples_for_subject(&s) { |
||||
match t.object() { |
||||
Term::NamedNode(t) => new_starts.push(t.clone().into()), |
||||
Term::BlankNode(t) => new_starts.push(t.clone().into()), |
||||
Term::Literal(_) => (), |
||||
} |
||||
} |
||||
for t in graph.triples_for_object(&s.into()) { |
||||
new_starts.push(t.subject().clone()); |
||||
} |
||||
} |
||||
starts = new_starts; |
||||
} |
||||
|
||||
// We do the hashing
|
||||
let mut hasher = DefaultHasher::default(); |
||||
hash.hash(&mut hasher); // We start with the previous hash
|
||||
|
||||
// NB: we need to sort the triples to have the same hash
|
||||
let mut po_set: BTreeSet<PredicateObject<'_>> = BTreeSet::default(); |
||||
for start in &starts { |
||||
for po in predicate_objects_for_subject(graph, start) { |
||||
match &po.object { |
||||
Term::BlankNode(_) => (), |
||||
_ => { |
||||
po_set.insert(po); |
||||
} |
||||
} |
||||
} |
||||
} |
||||
for po in &po_set { |
||||
po.hash(&mut hasher); |
||||
} |
||||
|
||||
let mut sp_set: BTreeSet<SubjectPredicate<'_>> = BTreeSet::default(); |
||||
let term_starts: Vec<_> = starts.into_iter().map(|t| t.into()).collect(); |
||||
for start in &term_starts { |
||||
for sp in subject_predicates_for_object(graph, start) { |
||||
match &sp.subject { |
||||
NamedOrBlankNode::BlankNode(_) => (), |
||||
_ => { |
||||
sp_set.insert(sp); |
||||
} |
||||
} |
||||
} |
||||
} |
||||
for sp in &sp_set { |
||||
sp.hash(&mut hasher); |
||||
} |
||||
|
||||
new_bnodes_by_hash |
||||
.entry(hasher.finish()) |
||||
.or_insert_with(Vec::default) |
||||
.push(bnode); |
||||
} |
||||
} |
||||
} |
||||
new_bnodes_by_hash |
||||
} |
||||
|
||||
fn build_and_check_containment_from_hashes<'a>( |
||||
a_bnodes_by_hash: &mut Vec<(u64, Vec<&'a BlankNode>)>, |
||||
b_bnodes_by_hash: &'a HashMap<u64, Vec<&'a BlankNode>>, |
||||
a_to_b_mapping: &mut HashMap<&'a BlankNode, &'a BlankNode>, |
||||
a: &'a SimpleGraph, |
||||
b: &'a SimpleGraph, |
||||
current_a_nodes: &[&'a BlankNode], |
||||
current_b_nodes: &mut BTreeSet<&'a BlankNode>, |
||||
) -> bool { |
||||
if let Some((a_node, remaining_a_node)) = current_a_nodes.split_last() { |
||||
let b_nodes = current_b_nodes.iter().cloned().collect::<Vec<_>>(); |
||||
for b_node in b_nodes { |
||||
current_b_nodes.remove(b_node); |
||||
a_to_b_mapping.insert(a_node, b_node); |
||||
if check_is_contained_focused(a_to_b_mapping, a_node, a, b) |
||||
&& build_and_check_containment_from_hashes( |
||||
a_bnodes_by_hash, |
||||
b_bnodes_by_hash, |
||||
a_to_b_mapping, |
||||
a, |
||||
b, |
||||
remaining_a_node, |
||||
current_b_nodes, |
||||
) |
||||
{ |
||||
return true; |
||||
} |
||||
current_b_nodes.insert(b_node); |
||||
} |
||||
a_to_b_mapping.remove(a_node); |
||||
false |
||||
} else { |
||||
let (hash, new_a_nodes) = match a_bnodes_by_hash.pop() { |
||||
Some(v) => v, |
||||
None => return true, |
||||
}; |
||||
|
||||
let mut new_b_nodes = b_bnodes_by_hash |
||||
.get(&hash) |
||||
.map_or(BTreeSet::default(), |v| v.iter().cloned().collect()); |
||||
if new_a_nodes.len() != new_b_nodes.len() { |
||||
return false; |
||||
} |
||||
|
||||
if new_a_nodes.len() > 10 { |
||||
eprintln!("Too big instance, aborting"); |
||||
return true; //TODO: Very very very bad
|
||||
} |
||||
|
||||
if build_and_check_containment_from_hashes( |
||||
a_bnodes_by_hash, |
||||
b_bnodes_by_hash, |
||||
a_to_b_mapping, |
||||
a, |
||||
b, |
||||
&new_a_nodes, |
||||
&mut new_b_nodes, |
||||
) { |
||||
true |
||||
} else { |
||||
a_bnodes_by_hash.push((hash, new_a_nodes)); |
||||
false |
||||
} |
||||
} |
||||
} |
||||
|
||||
fn check_is_contained_focused<'a>( |
||||
a_to_b_mapping: &mut HashMap<&'a BlankNode, &'a BlankNode>, |
||||
a_bnode_focus: &'a BlankNode, |
||||
a: &'a SimpleGraph, |
||||
b: &'a SimpleGraph, |
||||
) -> bool { |
||||
let a_bnode_subject = a_bnode_focus.clone().into(); |
||||
let a_bnode_object = a_bnode_focus.clone().into(); |
||||
let ts_a = a |
||||
.triples_for_subject(&a_bnode_subject) |
||||
.chain(a.triples_for_object(&a_bnode_object)); |
||||
for t_a in ts_a { |
||||
let subject: NamedOrBlankNode = if let NamedOrBlankNode::BlankNode(s_a) = &t_a.subject() { |
||||
if let Some(s_a) = a_to_b_mapping.get(s_a) { |
||||
(*s_a).clone().into() |
||||
} else { |
||||
continue; // We skip for now
|
||||
} |
||||
} else { |
||||
t_a.subject().clone() |
||||
}; |
||||
let predicate = t_a.predicate().clone(); |
||||
let object: Term = if let Term::BlankNode(o_a) = &t_a.object() { |
||||
if let Some(o_a) = a_to_b_mapping.get(o_a) { |
||||
(*o_a).clone().into() |
||||
} else { |
||||
continue; // We skip for now
|
||||
} |
||||
} else { |
||||
t_a.object().clone() |
||||
}; |
||||
if !b.contains(&Triple::new(subject, predicate, object)) { |
||||
return false; |
||||
} |
||||
} |
||||
|
||||
true |
||||
} |
||||
|
||||
fn graph_blank_nodes(graph: &SimpleGraph) -> Vec<&BlankNode> { |
||||
let mut blank_nodes: HashSet<&BlankNode, RandomState> = HashSet::default(); |
||||
for t in graph { |
||||
if let NamedOrBlankNode::BlankNode(subject) = t.subject() { |
||||
blank_nodes.insert(subject); |
||||
} |
||||
if let Term::BlankNode(object) = &t.object() { |
||||
blank_nodes.insert(object); |
||||
} |
||||
} |
||||
blank_nodes.into_iter().collect() |
||||
} |
||||
|
||||
pub fn are_graphs_isomorphic(a: &SimpleGraph, b: &SimpleGraph) -> bool { |
||||
if a.len() != b.len() { |
||||
return false; |
||||
} |
||||
|
||||
// We check containment of everything buts triples with blank nodes
|
||||
let mut a_bnodes_triples = SimpleGraph::default(); |
||||
for t in a { |
||||
if t.subject().is_blank_node() || t.object().is_blank_node() { |
||||
a_bnodes_triples.insert(t.clone()); |
||||
} else if !b.contains(t) { |
||||
return false; // Triple in a not in b without blank nodes
|
||||
} |
||||
} |
||||
|
||||
let mut b_bnodes_triples = SimpleGraph::default(); |
||||
for t in b { |
||||
if t.subject().is_blank_node() || t.object().is_blank_node() { |
||||
b_bnodes_triples.insert(t.clone()); |
||||
} else if !a.contains(t) { |
||||
return false; // Triple in a not in b without blank nodes
|
||||
} |
||||
} |
||||
|
||||
let mut a_bnodes_by_hash = HashMap::default(); |
||||
a_bnodes_by_hash.insert(0, graph_blank_nodes(&a_bnodes_triples)); |
||||
let mut b_bnodes_by_hash = HashMap::default(); |
||||
b_bnodes_by_hash.insert(0, graph_blank_nodes(&b_bnodes_triples)); |
||||
|
||||
for distance in 0..5 { |
||||
let max_size = a_bnodes_by_hash.values().map(Vec::len).max().unwrap_or(0); |
||||
if max_size < 2 { |
||||
break; // We only have small buckets
|
||||
} |
||||
|
||||
a_bnodes_by_hash = split_hash_buckets(a_bnodes_by_hash, a, distance); |
||||
b_bnodes_by_hash = split_hash_buckets(b_bnodes_by_hash, b, distance); |
||||
|
||||
// Hashes should have the same size
|
||||
if a_bnodes_by_hash.len() != b_bnodes_by_hash.len() { |
||||
return false; |
||||
} |
||||
} |
||||
|
||||
let mut sorted_a_bnodes_by_hash: Vec<_> = a_bnodes_by_hash.into_iter().collect(); |
||||
sorted_a_bnodes_by_hash.sort_by(|(_, l1), (_, l2)| l1.len().cmp(&l2.len())); |
||||
|
||||
build_and_check_containment_from_hashes( |
||||
&mut sorted_a_bnodes_by_hash, |
||||
&b_bnodes_by_hash, |
||||
&mut HashMap::default(), |
||||
&a_bnodes_triples, |
||||
&b_bnodes_triples, |
||||
&[], |
||||
&mut BTreeSet::default(), |
||||
) |
||||
} |
Loading…
Reference in new issue