Bulk loader: removes dead code

There were some dead code and key-values where deduplicated twice
encoder
Tpt 3 years ago
parent c815782631
commit ad710dc825
  1. 54
      lib/src/storage/mod.rs

@ -1323,36 +1323,28 @@ impl FileBulkLoader {
} }
fn encode(&mut self, quads: impl IntoIterator<Item = Quad>) -> Result<(), StorageError> { fn encode(&mut self, quads: impl IntoIterator<Item = Quad>) -> Result<(), StorageError> {
let mut buffer = Vec::new();
for quad in quads { for quad in quads {
let encoded = EncodedQuad::from(quad.as_ref()); let encoded = EncodedQuad::from(quad.as_ref());
buffer.clear();
if quad.graph_name.is_default_graph() { if quad.graph_name.is_default_graph() {
write_spo_quad(&mut buffer, &encoded);
if self.triples.insert(encoded.clone()) { if self.triples.insert(encoded.clone()) {
self.insert_term(quad.subject.as_ref().into(), &encoded.subject)?; self.insert_term(quad.subject.as_ref().into(), &encoded.subject)?;
self.insert_term(quad.predicate.as_ref().into(), &encoded.predicate)?; self.insert_term(quad.predicate.as_ref().into(), &encoded.predicate)?;
self.insert_term(quad.object.as_ref(), &encoded.object)?; self.insert_term(quad.object.as_ref(), &encoded.object)?;
} }
} else { } else if self.quads.insert(encoded.clone()) {
write_spog_quad(&mut buffer, &encoded); self.insert_term(quad.subject.as_ref().into(), &encoded.subject)?;
if self.quads.insert(encoded.clone()) { self.insert_term(quad.predicate.as_ref().into(), &encoded.predicate)?;
self.insert_term(quad.subject.as_ref().into(), &encoded.subject)?; self.insert_term(quad.object.as_ref(), &encoded.object)?;
self.insert_term(quad.predicate.as_ref().into(), &encoded.predicate)?;
self.insert_term(quad.object.as_ref(), &encoded.object)?; if self.graphs.insert(encoded.graph_name.clone()) {
self.insert_term(
buffer.clear(); match quad.graph_name.as_ref() {
write_term(&mut buffer, &encoded.graph_name); GraphNameRef::NamedNode(n) => n.into(),
if self.graphs.insert(encoded.graph_name.clone()) { GraphNameRef::BlankNode(n) => n.into(),
self.insert_term( GraphNameRef::DefaultGraph => unreachable!(),
match quad.graph_name.as_ref() { },
GraphNameRef::NamedNode(n) => n.into(), &encoded.graph_name,
GraphNameRef::BlankNode(n) => n.into(), )?;
GraphNameRef::DefaultGraph => unreachable!(),
},
&encoded.graph_name,
)?;
}
} }
} }
} }
@ -1405,11 +1397,11 @@ impl FileBulkLoader {
} }
if !self.quads.is_empty() { if !self.quads.is_empty() {
let quads = take(&mut self.graphs);
to_load.push(( to_load.push((
&self.storage.graphs_cf, &self.storage.graphs_cf,
self.build_sst_for_keys(quads.into_iter().map(|g| encode_term(&g)))?, self.build_sst_for_keys(self.graphs.iter().map(encode_term))?,
)); ));
self.graphs.clear();
to_load.push(( to_load.push((
&self.storage.gspo_cf, &self.storage.gspo_cf,
@ -1500,19 +1492,9 @@ impl FileBulkLoader {
) -> Result<PathBuf, StorageError> { ) -> Result<PathBuf, StorageError> {
let mut values = values.collect::<Vec<_>>(); let mut values = values.collect::<Vec<_>>();
values.sort_unstable(); values.sort_unstable();
let deduplicated_values = values.iter().enumerate().filter_map(|(i, value)| {
if values
.get(i + 1)
.map_or(true, |next_value| value != next_value)
{
Some(value)
} else {
None
}
});
let mut sst = self.storage.db.new_sst_file()?; let mut sst = self.storage.db.new_sst_file()?;
for value in deduplicated_values { for value in values {
sst.insert_empty(value)?; sst.insert_empty(&value)?;
} }
sst.finish() sst.finish()
} }

Loading…
Cancel
Save