SPARQL CONSTRUCT: avoid emitting multiple times the same triples (backport of 3314b4dd30bd61ae67074ef52f93edd6b28f490c)

feat/orm
Niko PLP 1 week ago
parent b2b72580b5
commit 82de4660f3
  1. 9
      Cargo.lock
  2. 1
      ng-oxigraph/Cargo.toml
  3. 55
      ng-oxigraph/src/oxigraph/sparql/eval.rs

9
Cargo.lock generated

@ -457,7 +457,7 @@ dependencies = [
"proc-macro2",
"quote",
"regex",
"rustc-hash",
"rustc-hash 1.1.0",
"shlex",
"syn 2.0.106",
]
@ -2292,6 +2292,7 @@ dependencies = [
"quick-xml 0.31.0",
"rand 0.8.5",
"regex",
"rustc-hash 2.1.1",
"serde",
"sha1",
"sha2 0.10.9",
@ -3272,6 +3273,12 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "rustc-hash"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
[[package]]
name = "rustc_version"
version = "0.4.1"

@ -37,6 +37,7 @@ oxilangtag = "0.1"
oxiri = "0.2.3"
rand = "0.8"
regex = "1.8.4"
rustc-hash = "2"
serde = { version = "1.0.142", features = ["derive"] }
sha1 = "0.10"
sha2 = "0.10"

@ -27,6 +27,7 @@ use oxilangtag::LanguageTag;
use oxiri::Iri;
use rand::random;
use regex::{Regex, RegexBuilder};
use rustc_hash::FxHashSet;
use sha1::Sha1;
use sha2::{Sha256, Sha384, Sha512};
use std::cell::Cell;
@ -214,6 +215,7 @@ impl SimpleEvaluator {
iter: eval(from),
template,
buffered_results: Vec::default(),
already_emitted_results: FxHashSet::default(),
bnodes: Vec::default(),
}),
}),
@ -4939,6 +4941,7 @@ struct ConstructIterator {
iter: EncodedTuplesIterator,
template: Vec<TripleTemplate>,
buffered_results: Vec<Result<Triple, EvaluationError>>,
already_emitted_results: FxHashSet<EncodedTriple>,
bnodes: Vec<EncodedTerm>,
}
@ -4961,15 +4964,29 @@ impl Iterator for ConstructIterator {
get_triple_template_value(&template.predicate, &tuple, &mut self.bnodes),
get_triple_template_value(&template.object, &tuple, &mut self.bnodes),
) {
self.buffered_results.push(decode_triple(
&*self.eval.dataset,
&subject,
&predicate,
&object,
));
let triple = EncodedTriple {
subject,
predicate,
object,
};
// We allocate new blank nodes for each solution,
// triples with blank nodes are likely to be new.
let new_triple = triple.subject.is_blank_node()
|| triple.subject.is_triple()
|| triple.object.is_blank_node()
|| triple.object.is_triple()
|| self.already_emitted_results.insert(triple.clone());
if new_triple {
self.buffered_results
.push(self.eval.dataset.decode_triple(&triple).map_err(Into::into));
if self.already_emitted_results.len() > 1024 * 1024 {
// We don't want to have a too big memory impact
self.already_emitted_results.clear();
}
}
}
}
self.bnodes.clear(); // We do not reuse old bnodes
self.bnodes.clear(); // We do not reuse blank nodes
}
}
}
@ -5025,18 +5042,18 @@ fn new_bnode() -> EncodedTerm {
EncodedTerm::NumericalBlankNode { id: random() }
}
fn decode_triple<D: Decoder>(
decoder: &D,
subject: &EncodedTerm,
predicate: &EncodedTerm,
object: &EncodedTerm,
) -> Result<Triple, EvaluationError> {
Ok(Triple::new(
decoder.decode_subject(subject)?,
decoder.decode_named_node(predicate)?,
decoder.decode_term(object)?,
))
}
// fn decode_triple<D: Decoder>(
// decoder: &D,
// subject: &EncodedTerm,
// predicate: &EncodedTerm,
// object: &EncodedTerm,
// ) -> Result<Triple, EvaluationError> {
// Ok(Triple::new(
// decoder.decode_subject(subject)?,
// decoder.decode_named_node(predicate)?,
// decoder.decode_term(object)?,
// ))
// }
struct DescribeIterator {
eval: SimpleEvaluator,

Loading…
Cancel
Save