SPARQL CONSTRUCT: avoid emitting multiple times the same triples (backport of 3314b4dd30bd61ae67074ef52f93edd6b28f490c)

feat/orm
Niko PLP 2 weeks ago
parent b2b72580b5
commit 82de4660f3
  1. 9
      Cargo.lock
  2. 1
      ng-oxigraph/Cargo.toml
  3. 55
      ng-oxigraph/src/oxigraph/sparql/eval.rs

9
Cargo.lock generated

@ -457,7 +457,7 @@ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"regex", "regex",
"rustc-hash", "rustc-hash 1.1.0",
"shlex", "shlex",
"syn 2.0.106", "syn 2.0.106",
] ]
@ -2292,6 +2292,7 @@ dependencies = [
"quick-xml 0.31.0", "quick-xml 0.31.0",
"rand 0.8.5", "rand 0.8.5",
"regex", "regex",
"rustc-hash 2.1.1",
"serde", "serde",
"sha1", "sha1",
"sha2 0.10.9", "sha2 0.10.9",
@ -3272,6 +3273,12 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "rustc-hash"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
[[package]] [[package]]
name = "rustc_version" name = "rustc_version"
version = "0.4.1" version = "0.4.1"

@ -37,6 +37,7 @@ oxilangtag = "0.1"
oxiri = "0.2.3" oxiri = "0.2.3"
rand = "0.8" rand = "0.8"
regex = "1.8.4" regex = "1.8.4"
rustc-hash = "2"
serde = { version = "1.0.142", features = ["derive"] } serde = { version = "1.0.142", features = ["derive"] }
sha1 = "0.10" sha1 = "0.10"
sha2 = "0.10" sha2 = "0.10"

@ -27,6 +27,7 @@ use oxilangtag::LanguageTag;
use oxiri::Iri; use oxiri::Iri;
use rand::random; use rand::random;
use regex::{Regex, RegexBuilder}; use regex::{Regex, RegexBuilder};
use rustc_hash::FxHashSet;
use sha1::Sha1; use sha1::Sha1;
use sha2::{Sha256, Sha384, Sha512}; use sha2::{Sha256, Sha384, Sha512};
use std::cell::Cell; use std::cell::Cell;
@ -214,6 +215,7 @@ impl SimpleEvaluator {
iter: eval(from), iter: eval(from),
template, template,
buffered_results: Vec::default(), buffered_results: Vec::default(),
already_emitted_results: FxHashSet::default(),
bnodes: Vec::default(), bnodes: Vec::default(),
}), }),
}), }),
@ -4939,6 +4941,7 @@ struct ConstructIterator {
iter: EncodedTuplesIterator, iter: EncodedTuplesIterator,
template: Vec<TripleTemplate>, template: Vec<TripleTemplate>,
buffered_results: Vec<Result<Triple, EvaluationError>>, buffered_results: Vec<Result<Triple, EvaluationError>>,
already_emitted_results: FxHashSet<EncodedTriple>,
bnodes: Vec<EncodedTerm>, bnodes: Vec<EncodedTerm>,
} }
@ -4961,15 +4964,29 @@ impl Iterator for ConstructIterator {
get_triple_template_value(&template.predicate, &tuple, &mut self.bnodes), get_triple_template_value(&template.predicate, &tuple, &mut self.bnodes),
get_triple_template_value(&template.object, &tuple, &mut self.bnodes), get_triple_template_value(&template.object, &tuple, &mut self.bnodes),
) { ) {
self.buffered_results.push(decode_triple( let triple = EncodedTriple {
&*self.eval.dataset, subject,
&subject, predicate,
&predicate, object,
&object, };
)); // We allocate new blank nodes for each solution,
// triples with blank nodes are likely to be new.
let new_triple = triple.subject.is_blank_node()
|| triple.subject.is_triple()
|| triple.object.is_blank_node()
|| triple.object.is_triple()
|| self.already_emitted_results.insert(triple.clone());
if new_triple {
self.buffered_results
.push(self.eval.dataset.decode_triple(&triple).map_err(Into::into));
if self.already_emitted_results.len() > 1024 * 1024 {
// We don't want to have a too big memory impact
self.already_emitted_results.clear();
}
}
} }
} }
self.bnodes.clear(); // We do not reuse old bnodes self.bnodes.clear(); // We do not reuse blank nodes
} }
} }
} }
@ -5025,18 +5042,18 @@ fn new_bnode() -> EncodedTerm {
EncodedTerm::NumericalBlankNode { id: random() } EncodedTerm::NumericalBlankNode { id: random() }
} }
fn decode_triple<D: Decoder>( // fn decode_triple<D: Decoder>(
decoder: &D, // decoder: &D,
subject: &EncodedTerm, // subject: &EncodedTerm,
predicate: &EncodedTerm, // predicate: &EncodedTerm,
object: &EncodedTerm, // object: &EncodedTerm,
) -> Result<Triple, EvaluationError> { // ) -> Result<Triple, EvaluationError> {
Ok(Triple::new( // Ok(Triple::new(
decoder.decode_subject(subject)?, // decoder.decode_subject(subject)?,
decoder.decode_named_node(predicate)?, // decoder.decode_named_node(predicate)?,
decoder.decode_term(object)?, // decoder.decode_term(object)?,
)) // ))
} // }
struct DescribeIterator { struct DescribeIterator {
eval: SimpleEvaluator, eval: SimpleEvaluator,

Loading…
Cancel
Save