Bulk load: avoid not needed lookup

It's fine to have some extra inserts and avoid reading the DB state
pull/173/head
Tpt 3 years ago
parent 7bb41e655e
commit b9d4f912dc
  1. 20
      lib/src/storage/mod.rs

@ -957,7 +957,6 @@ impl StorageWriter {
#[cfg(not(target_arch = "wasm32"))] #[cfg(not(target_arch = "wasm32"))]
pub struct BulkLoader<'a> { pub struct BulkLoader<'a> {
storage: &'a Storage, storage: &'a Storage,
reader: Reader,
id2str: HashMap<StrHash, Box<str>>, id2str: HashMap<StrHash, Box<str>>,
quads: HashSet<EncodedQuad>, quads: HashSet<EncodedQuad>,
triples: HashSet<EncodedQuad>, triples: HashSet<EncodedQuad>,
@ -970,7 +969,6 @@ impl<'a> BulkLoader<'a> {
pub fn new(storage: &'a Storage) -> Self { pub fn new(storage: &'a Storage) -> Self {
Self { Self {
storage, storage,
reader: storage.db.reader(),
id2str: HashMap::default(), id2str: HashMap::default(),
quads: HashSet::default(), quads: HashSet::default(),
triples: HashSet::default(), triples: HashSet::default(),
@ -987,33 +985,21 @@ impl<'a> BulkLoader<'a> {
self.buffer.clear(); self.buffer.clear();
if quad.graph_name.is_default_graph() { if quad.graph_name.is_default_graph() {
write_spo_quad(&mut self.buffer, &encoded); write_spo_quad(&mut self.buffer, &encoded);
if !self if self.triples.insert(encoded.clone()) {
.reader
.contains_key(&self.storage.dspo_cf, &self.buffer)?
&& self.triples.insert(encoded.clone())
{
self.insert_term(quad.subject.as_ref().into(), &encoded.subject)?; self.insert_term(quad.subject.as_ref().into(), &encoded.subject)?;
self.insert_term(quad.predicate.as_ref().into(), &encoded.predicate)?; self.insert_term(quad.predicate.as_ref().into(), &encoded.predicate)?;
self.insert_term(quad.object.as_ref(), &encoded.object)?; self.insert_term(quad.object.as_ref(), &encoded.object)?;
} }
} else { } else {
write_spog_quad(&mut self.buffer, &encoded); write_spog_quad(&mut self.buffer, &encoded);
if !self if self.quads.insert(encoded.clone()) {
.reader
.contains_key(&self.storage.spog_cf, &self.buffer)?
&& self.quads.insert(encoded.clone())
{
self.insert_term(quad.subject.as_ref().into(), &encoded.subject)?; self.insert_term(quad.subject.as_ref().into(), &encoded.subject)?;
self.insert_term(quad.predicate.as_ref().into(), &encoded.predicate)?; self.insert_term(quad.predicate.as_ref().into(), &encoded.predicate)?;
self.insert_term(quad.object.as_ref(), &encoded.object)?; self.insert_term(quad.object.as_ref(), &encoded.object)?;
self.buffer.clear(); self.buffer.clear();
write_term(&mut self.buffer, &encoded.graph_name); write_term(&mut self.buffer, &encoded.graph_name);
if !self if self.graphs.insert(encoded.graph_name.clone()) {
.reader
.contains_key(&self.storage.graphs_cf, &self.buffer)?
&& self.graphs.insert(encoded.graph_name.clone())
{
self.insert_term( self.insert_term(
match quad.graph_name.as_ref() { match quad.graph_name.as_ref() {
GraphNameRef::NamedNode(n) => n.into(), GraphNameRef::NamedNode(n) => n.into(),

Loading…
Cancel
Save