From bbf184f7aea4d60ec01a33d680d8a3dbb6567170 Mon Sep 17 00:00:00 2001
From: Tpt <thomaspt@hotmail.fr>
Date: Thu, 6 Jul 2023 17:56:05 +0200
Subject: [PATCH] Isomorphism: makes sure that new hashes depends on the old
 ones

Allows to make the "distinguish" step work
---
 fuzz/fuzz_targets/trig.rs | 11 +++++++-
 lib/oxrdf/src/dataset.rs  | 56 +++++++++++++++++++++------------------
 2 files changed, 40 insertions(+), 27 deletions(-)
diff --git a/fuzz/fuzz_targets/trig.rs b/fuzz/fuzz_targets/trig.rs
index a96ca86c..6a930a97 100644
--- a/fuzz/fuzz_targets/trig.rs
+++ b/fuzz/fuzz_targets/trig.rs
@@ -74,7 +74,16 @@ fuzz_target!(|data: &[u8]| {
         .filter(|c| *c != 0xFF)
         .collect::<Vec<_>>()
         .as_slice()]);
-    if quads.iter().map(count_quad_blank_nodes).sum::<usize>() < 2 {
+    let bnodes_count = quads.iter().map(count_quad_blank_nodes).sum::<usize>();
+    if bnodes_count == 0 {
+        assert_eq!(
+            quads,
+            quads_without_split,
+            "With split:\n{}\nWithout split:\n{}",
+            String::from_utf8_lossy(&serialize_quads(&quads)),
+            String::from_utf8_lossy(&serialize_quads(&quads_without_split))
+        );
+    } else if bnodes_count <= 4 {
         let mut dataset_with_split = quads.iter().collect::<Dataset>();
         let mut dataset_without_split = quads_without_split.iter().collect::<Dataset>();
         dataset_with_split.canonicalize();
diff --git a/lib/oxrdf/src/dataset.rs b/lib/oxrdf/src/dataset.rs
index dbf82c86..4b7736eb 100644
--- a/lib/oxrdf/src/dataset.rs
+++ b/lib/oxrdf/src/dataset.rs
@@ -646,37 +646,41 @@ impl Dataset {
         Vec<(u64, Vec<InternedBlankNode>)>,
     ) {
         let mut to_hash = Vec::new();
-        let mut to_do = hashes.keys().copied().collect::<Vec<_>>();
+        let mut to_do = hashes
+            .keys()
+            .map(|bnode| (*bnode, true))
+            .collect::<HashMap<_, _>>();
         let mut partition = HashMap::<_, Vec<_>>::with_capacity(hashes.len());
-        let mut partition_count = to_do.len();
-        while !to_do.is_empty() {
+        let mut old_partition_count = usize::MAX;
+        while old_partition_count != partition.len() {
+            old_partition_count = partition.len();
             partition.clear();
             let mut new_hashes = hashes.clone();
-            let mut new_todo = Vec::with_capacity(to_do.len());
-            for bnode in to_do {
-                for (s, p, o, g) in &quads_per_blank_node[&bnode] {
-                    to_hash.push((
-                        self.hash_subject(s, bnode, &hashes),
-                        self.hash_named_node(*p),
-                        self.hash_term(o, bnode, &hashes),
-                        self.hash_graph_name(g, bnode, &hashes),
-                    ));
-                }
-                to_hash.sort_unstable();
-                let hash = Self::hash_tuple((&to_hash,));
-                to_hash.clear();
-                if hash != hashes[&bnode] {
-                    new_hashes.insert(bnode, hash);
-                    new_todo.push(bnode);
-                }
-                partition.entry(hash).or_default().push(bnode);
+            for bnode in hashes.keys() {
+                let hash = if to_do.contains_key(bnode) {
+                    for (s, p, o, g) in &quads_per_blank_node[bnode] {
+                        to_hash.push((
+                            self.hash_subject(s, *bnode, &hashes),
+                            self.hash_named_node(*p),
+                            self.hash_term(o, *bnode, &hashes),
+                            self.hash_graph_name(g, *bnode, &hashes),
+                        ));
+                    }
+                    to_hash.sort_unstable();
+                    let hash = Self::hash_tuple((&to_hash, hashes[bnode]));
+                    to_hash.clear();
+                    if hash == hashes[bnode] {
+                        to_do.insert(*bnode, false);
+                    } else {
+                        new_hashes.insert(*bnode, hash);
+                    }
+                    hash
+                } else {
+                    hashes[bnode]
+                };
+                partition.entry(hash).or_default().push(*bnode);
             }
             hashes = new_hashes;
-            to_do = new_todo;
-            if partition_count == partition.len() {
-                break; // no improvement
-            }
-            partition_count = partition.len();
         }
         let mut partition: Vec<_> = partition.into_iter().collect();
         partition.sort_unstable_by(|(h1, b1), (h2, b2)| (b1.len(), h1).cmp(&(b2.len(), h2)));