From 82de4660f3e5c4a84081a153978137952e4c7f1a Mon Sep 17 00:00:00 2001 From: Niko PLP Date: Wed, 8 Oct 2025 14:44:32 +0300 Subject: [PATCH] SPARQL CONSTRUCT: avoid emitting multiple times the same triples (backport of 3314b4dd30bd61ae67074ef52f93edd6b28f490c) --- Cargo.lock | 9 +++- ng-oxigraph/Cargo.toml | 1 + ng-oxigraph/src/oxigraph/sparql/eval.rs | 55 ++++++++++++++++--------- 3 files changed, 45 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ecd443f..524c76a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -457,7 +457,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "rustc-hash", + "rustc-hash 1.1.0", "shlex", "syn 2.0.106", ] @@ -2292,6 +2292,7 @@ dependencies = [ "quick-xml 0.31.0", "rand 0.8.5", "regex", + "rustc-hash 2.1.1", "serde", "sha1", "sha2 0.10.9", @@ -3272,6 +3273,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + [[package]] name = "rustc_version" version = "0.4.1" diff --git a/ng-oxigraph/Cargo.toml b/ng-oxigraph/Cargo.toml index fdf7339..e3b6de1 100644 --- a/ng-oxigraph/Cargo.toml +++ b/ng-oxigraph/Cargo.toml @@ -37,6 +37,7 @@ oxilangtag = "0.1" oxiri = "0.2.3" rand = "0.8" regex = "1.8.4" +rustc-hash = "2" serde = { version = "1.0.142", features = ["derive"] } sha1 = "0.10" sha2 = "0.10" diff --git a/ng-oxigraph/src/oxigraph/sparql/eval.rs b/ng-oxigraph/src/oxigraph/sparql/eval.rs index d6bd322..7dd3b1a 100644 --- a/ng-oxigraph/src/oxigraph/sparql/eval.rs +++ b/ng-oxigraph/src/oxigraph/sparql/eval.rs @@ -27,6 +27,7 @@ use oxilangtag::LanguageTag; use oxiri::Iri; use rand::random; use regex::{Regex, RegexBuilder}; +use rustc_hash::FxHashSet; use sha1::Sha1; use sha2::{Sha256, Sha384, Sha512}; use std::cell::Cell; @@ -214,6 +215,7 @@ impl SimpleEvaluator { iter: eval(from), template, buffered_results: Vec::default(), + already_emitted_results: FxHashSet::default(), bnodes: Vec::default(), }), }), @@ -4939,6 +4941,7 @@ struct ConstructIterator { iter: EncodedTuplesIterator, template: Vec, buffered_results: Vec>, + already_emitted_results: FxHashSet, bnodes: Vec, } @@ -4961,15 +4964,29 @@ impl Iterator for ConstructIterator { get_triple_template_value(&template.predicate, &tuple, &mut self.bnodes), get_triple_template_value(&template.object, &tuple, &mut self.bnodes), ) { - self.buffered_results.push(decode_triple( - &*self.eval.dataset, - &subject, - &predicate, - &object, - )); + let triple = EncodedTriple { + subject, + predicate, + object, + }; + // We allocate new blank nodes for each solution, + // triples with blank nodes are likely to be new. + let new_triple = triple.subject.is_blank_node() + || triple.subject.is_triple() + || triple.object.is_blank_node() + || triple.object.is_triple() + || self.already_emitted_results.insert(triple.clone()); + if new_triple { + self.buffered_results + .push(self.eval.dataset.decode_triple(&triple).map_err(Into::into)); + if self.already_emitted_results.len() > 1024 * 1024 { + // We don't want to have a too big memory impact + self.already_emitted_results.clear(); + } + } } } - self.bnodes.clear(); // We do not reuse old bnodes + self.bnodes.clear(); // We do not reuse blank nodes } } } @@ -5025,18 +5042,18 @@ fn new_bnode() -> EncodedTerm { EncodedTerm::NumericalBlankNode { id: random() } } -fn decode_triple( - decoder: &D, - subject: &EncodedTerm, - predicate: &EncodedTerm, - object: &EncodedTerm, -) -> Result { - Ok(Triple::new( - decoder.decode_subject(subject)?, - decoder.decode_named_node(predicate)?, - decoder.decode_term(object)?, - )) -} +// fn decode_triple( +// decoder: &D, +// subject: &EncodedTerm, +// predicate: &EncodedTerm, +// object: &EncodedTerm, +// ) -> Result { +// Ok(Triple::new( +// decoder.decode_subject(subject)?, +// decoder.decode_named_node(predicate)?, +// decoder.decode_term(object)?, +// )) +// } struct DescribeIterator { eval: SimpleEvaluator,