Bulk loader: removes dead code

There were some dead code and key-values where deduplicated twice
encoder
Tpt 3 years ago
parent c815782631
commit ad710dc825
  1. 54
      lib/src/storage/mod.rs

@ -1323,36 +1323,28 @@ impl FileBulkLoader {
}
fn encode(&mut self, quads: impl IntoIterator<Item = Quad>) -> Result<(), StorageError> {
let mut buffer = Vec::new();
for quad in quads {
let encoded = EncodedQuad::from(quad.as_ref());
buffer.clear();
if quad.graph_name.is_default_graph() {
write_spo_quad(&mut buffer, &encoded);
if self.triples.insert(encoded.clone()) {
self.insert_term(quad.subject.as_ref().into(), &encoded.subject)?;
self.insert_term(quad.predicate.as_ref().into(), &encoded.predicate)?;
self.insert_term(quad.object.as_ref(), &encoded.object)?;
}
} else {
write_spog_quad(&mut buffer, &encoded);
if self.quads.insert(encoded.clone()) {
self.insert_term(quad.subject.as_ref().into(), &encoded.subject)?;
self.insert_term(quad.predicate.as_ref().into(), &encoded.predicate)?;
self.insert_term(quad.object.as_ref(), &encoded.object)?;
buffer.clear();
write_term(&mut buffer, &encoded.graph_name);
if self.graphs.insert(encoded.graph_name.clone()) {
self.insert_term(
match quad.graph_name.as_ref() {
GraphNameRef::NamedNode(n) => n.into(),
GraphNameRef::BlankNode(n) => n.into(),
GraphNameRef::DefaultGraph => unreachable!(),
},
&encoded.graph_name,
)?;
}
} else if self.quads.insert(encoded.clone()) {
self.insert_term(quad.subject.as_ref().into(), &encoded.subject)?;
self.insert_term(quad.predicate.as_ref().into(), &encoded.predicate)?;
self.insert_term(quad.object.as_ref(), &encoded.object)?;
if self.graphs.insert(encoded.graph_name.clone()) {
self.insert_term(
match quad.graph_name.as_ref() {
GraphNameRef::NamedNode(n) => n.into(),
GraphNameRef::BlankNode(n) => n.into(),
GraphNameRef::DefaultGraph => unreachable!(),
},
&encoded.graph_name,
)?;
}
}
}
@ -1405,11 +1397,11 @@ impl FileBulkLoader {
}
if !self.quads.is_empty() {
let quads = take(&mut self.graphs);
to_load.push((
&self.storage.graphs_cf,
self.build_sst_for_keys(quads.into_iter().map(|g| encode_term(&g)))?,
self.build_sst_for_keys(self.graphs.iter().map(encode_term))?,
));
self.graphs.clear();
to_load.push((
&self.storage.gspo_cf,
@ -1500,19 +1492,9 @@ impl FileBulkLoader {
) -> Result<PathBuf, StorageError> {
let mut values = values.collect::<Vec<_>>();
values.sort_unstable();
let deduplicated_values = values.iter().enumerate().filter_map(|(i, value)| {
if values
.get(i + 1)
.map_or(true, |next_value| value != next_value)
{
Some(value)
} else {
None
}
});
let mut sst = self.storage.db.new_sst_file()?;
for value in deduplicated_values {
sst.insert_empty(value)?;
for value in values {
sst.insert_empty(&value)?;
}
sst.finish()
}

Loading…
Cancel
Save