Range deletion microoptimizations

Summary:
- Made RangeDelAggregator's InternalKeyComparator member a reference-to-const so we don't need to copy-construct it. Also added InternalKeyComparator to ImmutableCFOptions so we don't need to construct one for each DBIter.
- Made MemTable::NewRangeTombstoneIterator and the table readers' NewRangeTombstoneIterator() functions return nullptr instead of NewEmptyInternalIterator to avoid the allocation. Updated callers accordingly.
Closes https://github.com/facebook/rocksdb/pull/1548

Differential Revision: D4208169

Pulled By: ajkr

fbshipit-source-id: 2fd65cf
main
Andrew Kryczka 8 years ago committed by Facebook Github Bot
parent 23a18ca5ad
commit fd43ee09da
  1. 1
      db/builder.cc
  2. 12
      db/db_compaction_filter_test.cc
  3. 2
      db/db_iter.cc
  4. 8
      db/db_test_util.cc
  5. 6
      db/flush_job.cc
  6. 8
      db/memtable.cc
  7. 1
      db/memtable.h
  8. 2
      db/range_del_aggregator.h
  9. 10
      db/table_cache.cc
  10. 3
      db/write_batch_test.cc
  11. 4
      table/block_based_table_reader.cc
  12. 2
      table/table_reader.h
  13. 3
      table/table_test.cc
  14. 4
      tools/ldb_cmd.cc
  15. 1
      util/cf_options.cc
  16. 2
      util/cf_options.h

@ -82,7 +82,6 @@ Status BuildTable(
Status s; Status s;
meta->fd.file_size = 0; meta->fd.file_size = 0;
iter->SeekToFirst(); iter->SeekToFirst();
range_del_iter->SeekToFirst();
std::unique_ptr<RangeDelAggregator> range_del_agg( std::unique_ptr<RangeDelAggregator> range_del_agg(
new RangeDelAggregator(internal_comparator, snapshots)); new RangeDelAggregator(internal_comparator, snapshots));
s = range_del_agg->AddTombstones(std::move(range_del_iter)); s = range_del_agg->AddTombstones(std::move(range_del_iter));

@ -261,8 +261,8 @@ TEST_F(DBTestCompactionFilter, CompactionFilter) {
int total = 0; int total = 0;
Arena arena; Arena arena;
{ {
RangeDelAggregator range_del_agg(InternalKeyComparator(options.comparator), InternalKeyComparator icmp(options.comparator);
{} /* snapshots */); RangeDelAggregator range_del_agg(icmp, {} /* snapshots */);
ScopedArenaIterator iter( ScopedArenaIterator iter(
dbfull()->NewInternalIterator(&arena, &range_del_agg, handles_[1])); dbfull()->NewInternalIterator(&arena, &range_del_agg, handles_[1]));
iter->SeekToFirst(); iter->SeekToFirst();
@ -351,8 +351,8 @@ TEST_F(DBTestCompactionFilter, CompactionFilter) {
// level Lmax because this record is at the tip // level Lmax because this record is at the tip
count = 0; count = 0;
{ {
RangeDelAggregator range_del_agg(InternalKeyComparator(options.comparator), InternalKeyComparator icmp(options.comparator);
{} /* snapshots */); RangeDelAggregator range_del_agg(icmp, {} /* snapshots */);
ScopedArenaIterator iter( ScopedArenaIterator iter(
dbfull()->NewInternalIterator(&arena, &range_del_agg, handles_[1])); dbfull()->NewInternalIterator(&arena, &range_del_agg, handles_[1]));
iter->SeekToFirst(); iter->SeekToFirst();
@ -570,8 +570,8 @@ TEST_F(DBTestCompactionFilter, CompactionFilterContextManual) {
int count = 0; int count = 0;
int total = 0; int total = 0;
Arena arena; Arena arena;
RangeDelAggregator range_del_agg(InternalKeyComparator(options.comparator), InternalKeyComparator icmp(options.comparator);
{} /* snapshots */); RangeDelAggregator range_del_agg(icmp, {} /* snapshots */);
ScopedArenaIterator iter( ScopedArenaIterator iter(
dbfull()->NewInternalIterator(&arena, &range_del_agg)); dbfull()->NewInternalIterator(&arena, &range_del_agg));
iter->SeekToFirst(); iter->SeekToFirst();

@ -123,7 +123,7 @@ class DBIter: public Iterator {
prefix_same_as_start_(prefix_same_as_start), prefix_same_as_start_(prefix_same_as_start),
pin_thru_lifetime_(pin_data), pin_thru_lifetime_(pin_data),
total_order_seek_(total_order_seek), total_order_seek_(total_order_seek),
range_del_agg_(InternalKeyComparator(cmp), s) { range_del_agg_(ioptions.internal_comparator, s) {
RecordTick(statistics_, NO_ITERATORS); RecordTick(statistics_, NO_ITERATORS);
prefix_extractor_ = ioptions.prefix_extractor; prefix_extractor_ = ioptions.prefix_extractor;
max_skip_ = max_sequential_skip_in_iterations; max_skip_ = max_sequential_skip_in_iterations;

@ -590,8 +590,8 @@ std::string DBTestBase::Contents(int cf) {
std::string DBTestBase::AllEntriesFor(const Slice& user_key, int cf) { std::string DBTestBase::AllEntriesFor(const Slice& user_key, int cf) {
Arena arena; Arena arena;
auto options = CurrentOptions(); auto options = CurrentOptions();
RangeDelAggregator range_del_agg(InternalKeyComparator(options.comparator), InternalKeyComparator icmp(options.comparator);
{} /* snapshots */); RangeDelAggregator range_del_agg(icmp, {} /* snapshots */);
ScopedArenaIterator iter; ScopedArenaIterator iter;
if (cf == 0) { if (cf == 0) {
iter.set(dbfull()->NewInternalIterator(&arena, &range_del_agg)); iter.set(dbfull()->NewInternalIterator(&arena, &range_del_agg));
@ -999,8 +999,8 @@ void DBTestBase::validateNumberOfEntries(int numValues, int cf) {
ScopedArenaIterator iter; ScopedArenaIterator iter;
Arena arena; Arena arena;
auto options = CurrentOptions(); auto options = CurrentOptions();
RangeDelAggregator range_del_agg(InternalKeyComparator(options.comparator), InternalKeyComparator icmp(options.comparator);
{} /* snapshots */); RangeDelAggregator range_del_agg(icmp, {} /* snapshots */);
if (cf != 0) { if (cf != 0) {
iter.set( iter.set(
dbfull()->NewInternalIterator(&arena, &range_del_agg, handles_[cf])); dbfull()->NewInternalIterator(&arena, &range_del_agg, handles_[cf]));

@ -256,12 +256,14 @@ Status FlushJob::WriteLevel0Table() {
"[%s] [JOB %d] Flushing memtable with next log file: %" PRIu64 "\n", "[%s] [JOB %d] Flushing memtable with next log file: %" PRIu64 "\n",
cfd_->GetName().c_str(), job_context_->job_id, m->GetNextLogNumber()); cfd_->GetName().c_str(), job_context_->job_id, m->GetNextLogNumber());
memtables.push_back(m->NewIterator(ro, &arena)); memtables.push_back(m->NewIterator(ro, &arena));
range_del_iters.push_back(m->NewRangeTombstoneIterator(ro)); auto* range_del_iter = m->NewRangeTombstoneIterator(ro);
if (range_del_iter != nullptr) {
range_del_iters.push_back(range_del_iter);
}
total_num_entries += m->num_entries(); total_num_entries += m->num_entries();
total_num_deletes += m->num_deletes(); total_num_deletes += m->num_deletes();
total_memory_usage += m->ApproximateMemoryUsage(); total_memory_usage += m->ApproximateMemoryUsage();
} }
assert(memtables.size() == range_del_iters.size());
event_logger_->Log() << "job" << job_context_->job_id << "event" event_logger_->Log() << "job" << job_context_->job_id << "event"
<< "flush_started" << "flush_started"

@ -75,6 +75,7 @@ MemTable::MemTable(const InternalKeyComparator& cmp,
range_del_table_(SkipListFactory().CreateMemTableRep( range_del_table_(SkipListFactory().CreateMemTableRep(
comparator_, &allocator_, nullptr /* transform */, comparator_, &allocator_, nullptr /* transform */,
ioptions.info_log)), ioptions.info_log)),
is_range_del_table_empty_(true),
data_size_(0), data_size_(0),
num_entries_(0), num_entries_(0),
num_deletes_(0), num_deletes_(0),
@ -375,8 +376,8 @@ InternalIterator* MemTable::NewIterator(const ReadOptions& read_options,
InternalIterator* MemTable::NewRangeTombstoneIterator( InternalIterator* MemTable::NewRangeTombstoneIterator(
const ReadOptions& read_options) { const ReadOptions& read_options) {
if (read_options.ignore_range_deletions) { if (read_options.ignore_range_deletions || is_range_del_table_empty_) {
return NewEmptyInternalIterator(); return nullptr;
} }
return new MemTableIterator(*this, read_options, nullptr /* arena */, return new MemTableIterator(*this, read_options, nullptr /* arena */,
true /* use_range_del_table */); true /* use_range_del_table */);
@ -508,6 +509,9 @@ void MemTable::Add(SequenceNumber s, ValueType type,
!first_seqno_.compare_exchange_weak(cur_earliest_seqno, s)) { !first_seqno_.compare_exchange_weak(cur_earliest_seqno, s)) {
} }
} }
if (is_range_del_table_empty_ && type == kTypeRangeDeletion) {
is_range_del_table_empty_ = false;
}
} }
// Callback from MemTable::Get() // Callback from MemTable::Get()

@ -352,6 +352,7 @@ class MemTable {
MemTableAllocator allocator_; MemTableAllocator allocator_;
unique_ptr<MemTableRep> table_; unique_ptr<MemTableRep> table_;
unique_ptr<MemTableRep> range_del_table_; unique_ptr<MemTableRep> range_del_table_;
bool is_range_del_table_empty_;
// Total data size of all data inserted // Total data size of all data inserted
std::atomic<uint64_t> data_size_; std::atomic<uint64_t> data_size_;

@ -105,7 +105,7 @@ class RangeDelAggregator {
SequenceNumber upper_bound_; SequenceNumber upper_bound_;
std::unique_ptr<Rep> rep_; std::unique_ptr<Rep> rep_;
const InternalKeyComparator icmp_; const InternalKeyComparator& icmp_;
}; };
} // namespace rocksdb } // namespace rocksdb

@ -178,7 +178,9 @@ InternalIterator* TableCache::NewIterator(
if (range_del_agg != nullptr && !options.ignore_range_deletions) { if (range_del_agg != nullptr && !options.ignore_range_deletions) {
std::unique_ptr<InternalIterator> range_del_iter(NewRangeDeletionIterator( std::unique_ptr<InternalIterator> range_del_iter(NewRangeDeletionIterator(
options, icomparator, fd, file_read_hist, skip_filters, level)); options, icomparator, fd, file_read_hist, skip_filters, level));
if (range_del_iter != nullptr) {
s = range_del_iter->status(); s = range_del_iter->status();
}
if (s.ok()) { if (s.ok()) {
s = range_del_agg->AddTombstones(std::move(range_del_iter)); s = range_del_agg->AddTombstones(std::move(range_del_iter));
} }
@ -253,7 +255,7 @@ InternalIterator* TableCache::NewRangeDeletionIterator(
const FileDescriptor& fd, HistogramImpl* file_read_hist, bool skip_filters, const FileDescriptor& fd, HistogramImpl* file_read_hist, bool skip_filters,
int level) { int level) {
if (options.ignore_range_deletions) { if (options.ignore_range_deletions) {
return NewEmptyInternalIterator(); return nullptr;
} }
Status s; Status s;
TableReader* table_reader = fd.table_reader; TableReader* table_reader = fd.table_reader;
@ -270,8 +272,12 @@ InternalIterator* TableCache::NewRangeDeletionIterator(
if (s.ok()) { if (s.ok()) {
auto* result = table_reader->NewRangeTombstoneIterator(options); auto* result = table_reader->NewRangeTombstoneIterator(options);
if (cache_handle != nullptr) { if (cache_handle != nullptr) {
if (result == nullptr) {
ReleaseHandle(cache_handle);
} else {
result->RegisterCleanup(&UnrefEntry, cache_, cache_handle); result->RegisterCleanup(&UnrefEntry, cache_, cache_handle);
} }
}
return result; return result;
} }
return NewErrorInternalIterator(s); return NewErrorInternalIterator(s);
@ -287,7 +293,9 @@ Status TableCache::Get(const ReadOptions& options,
!options.ignore_range_deletions) { !options.ignore_range_deletions) {
std::unique_ptr<InternalIterator> range_del_iter(NewRangeDeletionIterator( std::unique_ptr<InternalIterator> range_del_iter(NewRangeDeletionIterator(
options, internal_comparator, fd, file_read_hist, skip_filters, level)); options, internal_comparator, fd, file_read_hist, skip_filters, level));
if (range_del_iter != nullptr) {
s = range_del_iter->status(); s = range_del_iter->status();
}
if (s.ok()) { if (s.ok()) {
s = get_context->range_del_agg()->AddTombstones( s = get_context->range_del_agg()->AddTombstones(
std::move(range_del_iter)); std::move(range_del_iter));

@ -55,6 +55,9 @@ static std::string PrintContents(WriteBatch* b) {
iter = mem->NewRangeTombstoneIterator(ReadOptions()); iter = mem->NewRangeTombstoneIterator(ReadOptions());
iter_guard.reset(iter); iter_guard.reset(iter);
} }
if (iter == nullptr) {
continue;
}
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
ParsedInternalKey ikey; ParsedInternalKey ikey;
memset((void*)&ikey, 0, sizeof(ikey)); memset((void*)&ikey, 0, sizeof(ikey));

@ -1489,7 +1489,7 @@ InternalIterator* BlockBasedTable::NewIterator(const ReadOptions& read_options,
InternalIterator* BlockBasedTable::NewRangeTombstoneIterator( InternalIterator* BlockBasedTable::NewRangeTombstoneIterator(
const ReadOptions& read_options) { const ReadOptions& read_options) {
if (rep_->range_del_handle.IsNull()) { if (rep_->range_del_handle.IsNull()) {
return NewEmptyInternalIterator(); return nullptr;
} }
std::string str; std::string str;
rep_->range_del_handle.EncodeTo(&str); rep_->range_del_handle.EncodeTo(&str);
@ -1967,6 +1967,7 @@ Status BlockBasedTable::DumpTable(WritableFile* out_file) {
} }
// Output range deletions block // Output range deletions block
auto* range_del_iter = NewRangeTombstoneIterator(ReadOptions()); auto* range_del_iter = NewRangeTombstoneIterator(ReadOptions());
if (range_del_iter != nullptr) {
range_del_iter->SeekToFirst(); range_del_iter->SeekToFirst();
if (range_del_iter->Valid()) { if (range_del_iter->Valid()) {
out_file->Append( out_file->Append(
@ -1979,6 +1980,7 @@ Status BlockBasedTable::DumpTable(WritableFile* out_file) {
out_file->Append("\n"); out_file->Append("\n");
} }
delete range_del_iter; delete range_del_iter;
}
// Output Data blocks // Output Data blocks
s = DumpDataBlocks(out_file); s = DumpDataBlocks(out_file);

@ -44,7 +44,7 @@ class TableReader {
virtual InternalIterator* NewRangeTombstoneIterator( virtual InternalIterator* NewRangeTombstoneIterator(
const ReadOptions& read_options) { const ReadOptions& read_options) {
return NewEmptyInternalIterator(); return nullptr;
} }
// Given a key, return an approximate byte offset in the file where // Given a key, return an approximate byte offset in the file where

@ -2459,6 +2459,9 @@ TEST_F(MemTableTest, Simple) {
iter = memtable->NewRangeTombstoneIterator(ReadOptions()); iter = memtable->NewRangeTombstoneIterator(ReadOptions());
iter_guard.reset(iter); iter_guard.reset(iter);
} }
if (iter == nullptr) {
continue;
}
iter->SeekToFirst(); iter->SeekToFirst();
while (iter->Valid()) { while (iter->Valid()) {
fprintf(stderr, "key: '%s' -> '%s'\n", iter->key().ToString().c_str(), fprintf(stderr, "key: '%s' -> '%s'\n", iter->key().ToString().c_str(),

@ -1187,8 +1187,8 @@ void InternalDumpCommand::DoCommand() {
uint64_t s1=0,s2=0; uint64_t s1=0,s2=0;
// Setup internal key iterator // Setup internal key iterator
Arena arena; Arena arena;
RangeDelAggregator range_del_agg(InternalKeyComparator(options_.comparator), auto icmp = InternalKeyComparator(options_.comparator);
{} /* snapshots */); RangeDelAggregator range_del_agg(icmp, {} /* snapshots */);
ScopedArenaIterator iter(idb->NewInternalIterator(&arena, &range_del_agg)); ScopedArenaIterator iter(idb->NewInternalIterator(&arena, &range_del_agg));
Status st = iter->status(); Status st = iter->status();
if (!st.ok()) { if (!st.ok()) {

@ -31,6 +31,7 @@ ImmutableCFOptions::ImmutableCFOptions(const ImmutableDBOptions& db_options,
compaction_options_fifo(cf_options.compaction_options_fifo), compaction_options_fifo(cf_options.compaction_options_fifo),
prefix_extractor(cf_options.prefix_extractor.get()), prefix_extractor(cf_options.prefix_extractor.get()),
user_comparator(cf_options.comparator), user_comparator(cf_options.comparator),
internal_comparator(InternalKeyComparator(cf_options.comparator)),
merge_operator(cf_options.merge_operator.get()), merge_operator(cf_options.merge_operator.get()),
compaction_filter(cf_options.compaction_filter), compaction_filter(cf_options.compaction_filter),
compaction_filter_factory(cf_options.compaction_filter_factory.get()), compaction_filter_factory(cf_options.compaction_filter_factory.get()),

@ -8,6 +8,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "db/dbformat.h"
#include "rocksdb/options.h" #include "rocksdb/options.h"
#include "util/compression.h" #include "util/compression.h"
#include "util/db_options.h" #include "util/db_options.h"
@ -35,6 +36,7 @@ struct ImmutableCFOptions {
const SliceTransform* prefix_extractor; const SliceTransform* prefix_extractor;
const Comparator* user_comparator; const Comparator* user_comparator;
InternalKeyComparator internal_comparator;
MergeOperator* merge_operator; MergeOperator* merge_operator;

Loading…
Cancel
Save