Eliminate redundant cache lookup with range deletion

Summary:
When we introduced range deletion block, TableCache::Get() and TableCache::NewIterator() each did two table cache lookups, one for range deletion block iterator and another for getting the table reader to which the Get()/NewIterator() is delegated. This extra cache lookup was very CPU-intensive (about 10% overhead in a read-heavy benchmark). We can avoid it by reusing the Cache::Handle created for range deletion block iterator to get the file reader.
Closes https://github.com/facebook/rocksdb/pull/1537

Differential Revision: D4201167

Pulled By: ajkr

fbshipit-source-id: d33ffd8
main
Andrew Kryczka 8 years ago committed by Facebook Github Bot
parent 182b940e70
commit 734e4acafb
  1. 21
      db/db_compaction_test.cc
  2. 95
      db/table_cache.cc
  3. 10
      db/table_cache.h

@ -294,8 +294,7 @@ TEST_F(DBCompactionTest, TestTableReaderForCompaction) {
num_new_table_reader = 0; num_new_table_reader = 0;
ASSERT_EQ(Key(k), Get(Key(k))); ASSERT_EQ(Key(k), Get(Key(k)));
// lookup iterator from table cache and no need to create a new one. // lookup iterator from table cache and no need to create a new one.
// a second table cache iterator is created for range tombstones ASSERT_EQ(num_table_cache_lookup, 1);
ASSERT_EQ(num_table_cache_lookup, 2);
ASSERT_EQ(num_new_table_reader, 0); ASSERT_EQ(num_new_table_reader, 0);
} }
} }
@ -306,9 +305,9 @@ TEST_F(DBCompactionTest, TestTableReaderForCompaction) {
dbfull()->TEST_WaitForCompact(); dbfull()->TEST_WaitForCompact();
// Preloading iterator issues one table cache lookup and creates // Preloading iterator issues one table cache lookup and creates
// a new table reader. One file is created for flush and one for compaction. // a new table reader. One file is created for flush and one for compaction.
// Compaction inputs make no table cache look-up for data iterators and one // Compaction inputs make no table cache look-up for data/range deletion
// look-up per compaction input file (three). // iterators
ASSERT_EQ(num_table_cache_lookup, 5); ASSERT_EQ(num_table_cache_lookup, 2);
// Create new iterator for: // Create new iterator for:
// (1) 1 for verifying flush results // (1) 1 for verifying flush results
// (2) 3 for compaction input files // (2) 3 for compaction input files
@ -318,8 +317,7 @@ TEST_F(DBCompactionTest, TestTableReaderForCompaction) {
num_table_cache_lookup = 0; num_table_cache_lookup = 0;
num_new_table_reader = 0; num_new_table_reader = 0;
ASSERT_EQ(Key(1), Get(Key(1))); ASSERT_EQ(Key(1), Get(Key(1)));
// a second table cache iterator is created for range tombstones ASSERT_EQ(num_table_cache_lookup, 1);
ASSERT_EQ(num_table_cache_lookup, 2);
ASSERT_EQ(num_new_table_reader, 0); ASSERT_EQ(num_new_table_reader, 0);
num_table_cache_lookup = 0; num_table_cache_lookup = 0;
@ -329,17 +327,16 @@ TEST_F(DBCompactionTest, TestTableReaderForCompaction) {
cro.target_level = 2; cro.target_level = 2;
cro.bottommost_level_compaction = BottommostLevelCompaction::kForce; cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
db_->CompactRange(cro, nullptr, nullptr); db_->CompactRange(cro, nullptr, nullptr);
// Only verifying compaction outputs issues two table cache lookup // Only verifying compaction outputs issues one table cache lookup
// (one for data block, one for range deletion block). // for both data block and range deletion block).
ASSERT_EQ(num_table_cache_lookup, 2); ASSERT_EQ(num_table_cache_lookup, 1);
// One for compaction input, one for verifying compaction results. // One for compaction input, one for verifying compaction results.
ASSERT_EQ(num_new_table_reader, 2); ASSERT_EQ(num_new_table_reader, 2);
num_table_cache_lookup = 0; num_table_cache_lookup = 0;
num_new_table_reader = 0; num_new_table_reader = 0;
ASSERT_EQ(Key(1), Get(Key(1))); ASSERT_EQ(Key(1), Get(Key(1)));
// a second table cache iterator is created for range tombstones ASSERT_EQ(num_table_cache_lookup, 1);
ASSERT_EQ(num_table_cache_lookup, 2);
ASSERT_EQ(num_new_table_reader, 0); ASSERT_EQ(num_new_table_reader, 0);
rocksdb::SyncPoint::GetInstance()->ClearAllCallBacks(); rocksdb::SyncPoint::GetInstance()->ClearAllCallBacks();

@ -175,17 +175,6 @@ InternalIterator* TableCache::NewIterator(
PERF_TIMER_GUARD(new_table_iterator_nanos); PERF_TIMER_GUARD(new_table_iterator_nanos);
Status s; Status s;
if (range_del_agg != nullptr && !options.ignore_range_deletions) {
std::unique_ptr<InternalIterator> range_del_iter(NewRangeDeletionIterator(
options, icomparator, fd, file_read_hist, skip_filters, level));
if (range_del_iter != nullptr) {
s = range_del_iter->status();
}
if (s.ok()) {
s = range_del_agg->AddTombstones(std::move(range_del_iter));
}
}
bool create_new_table_reader = false; bool create_new_table_reader = false;
TableReader* table_reader = nullptr; TableReader* table_reader = nullptr;
Cache::Handle* handle = nullptr; Cache::Handle* handle = nullptr;
@ -226,14 +215,15 @@ InternalIterator* TableCache::NewIterator(
} }
} }
} }
InternalIterator* result = nullptr;
if (s.ok()) { if (s.ok()) {
InternalIterator* result = result = table_reader->NewIterator(options, arena, skip_filters);
table_reader->NewIterator(options, arena, skip_filters);
if (create_new_table_reader) { if (create_new_table_reader) {
assert(handle == nullptr); assert(handle == nullptr);
result->RegisterCleanup(&DeleteTableReader, table_reader, nullptr); result->RegisterCleanup(&DeleteTableReader, table_reader, nullptr);
} else if (handle != nullptr) { } else if (handle != nullptr) {
result->RegisterCleanup(&UnrefEntry, cache_, handle); result->RegisterCleanup(&UnrefEntry, cache_, handle);
handle = nullptr; // prevent from releasing below
} }
if (for_compaction) { if (for_compaction) {
@ -242,74 +232,38 @@ InternalIterator* TableCache::NewIterator(
if (table_reader_ptr != nullptr) { if (table_reader_ptr != nullptr) {
*table_reader_ptr = table_reader; *table_reader_ptr = table_reader;
} }
return result;
}
if (handle != nullptr) {
ReleaseHandle(handle);
} }
return NewErrorInternalIterator(s, arena); if (s.ok() && range_del_agg != nullptr && !options.ignore_range_deletions) {
} std::unique_ptr<InternalIterator> range_del_iter(
table_reader->NewRangeTombstoneIterator(options));
InternalIterator* TableCache::NewRangeDeletionIterator( if (range_del_iter != nullptr) {
const ReadOptions& options, const InternalKeyComparator& icmp, s = range_del_iter->status();
const FileDescriptor& fd, HistogramImpl* file_read_hist, bool skip_filters,
int level) {
if (options.ignore_range_deletions) {
return nullptr;
} }
Status s;
TableReader* table_reader = fd.table_reader;
Cache::Handle* cache_handle = nullptr;
if (table_reader == nullptr) {
s = FindTable(env_options_, icmp, fd, &cache_handle,
options.read_tier == kBlockCacheTier /* no_io */,
true /* record_read_stats */, file_read_hist, skip_filters,
level);
if (s.ok()) { if (s.ok()) {
table_reader = GetTableReaderFromHandle(cache_handle); s = range_del_agg->AddTombstones(std::move(range_del_iter));
} }
} }
if (s.ok()) {
auto* result = table_reader->NewRangeTombstoneIterator(options); if (handle != nullptr) {
if (cache_handle != nullptr) { ReleaseHandle(handle);
if (result == nullptr) {
ReleaseHandle(cache_handle);
} else {
result->RegisterCleanup(&UnrefEntry, cache_, cache_handle);
} }
if (!s.ok()) {
assert(result == nullptr);
result = NewErrorInternalIterator(s, arena);
} }
return result; return result;
} }
return NewErrorInternalIterator(s);
}
Status TableCache::Get(const ReadOptions& options, Status TableCache::Get(const ReadOptions& options,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
const FileDescriptor& fd, const Slice& k, const FileDescriptor& fd, const Slice& k,
GetContext* get_context, HistogramImpl* file_read_hist, GetContext* get_context, HistogramImpl* file_read_hist,
bool skip_filters, int level) { bool skip_filters, int level) {
Status s;
if (get_context->range_del_agg() != nullptr &&
!options.ignore_range_deletions) {
std::unique_ptr<InternalIterator> range_del_iter(NewRangeDeletionIterator(
options, internal_comparator, fd, file_read_hist, skip_filters, level));
if (range_del_iter != nullptr) {
s = range_del_iter->status();
}
if (s.ok()) {
s = get_context->range_del_agg()->AddTombstones(
std::move(range_del_iter));
}
}
TableReader* t = fd.table_reader;
Cache::Handle* handle = nullptr;
std::string* row_cache_entry = nullptr; std::string* row_cache_entry = nullptr;
bool done = false; bool done = false;
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
IterKey row_cache_key; IterKey row_cache_key;
std::string row_cache_entry_buffer; std::string row_cache_entry_buffer;
if (s.ok()) {
// Check row cache if enabled. Since row cache does not currently store // Check row cache if enabled. Since row cache does not currently store
// sequence numbers, we cannot use it if we need to fetch the sequence. // sequence numbers, we cannot use it if we need to fetch the sequence.
if (ioptions_.row_cache && !get_context->NeedToReadSequence()) { if (ioptions_.row_cache && !get_context->NeedToReadSequence()) {
@ -345,10 +299,12 @@ Status TableCache::Get(const ReadOptions& options,
row_cache_entry = &row_cache_entry_buffer; row_cache_entry = &row_cache_entry_buffer;
} }
} }
}
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE
Status s;
TableReader* t = fd.table_reader;
Cache::Handle* handle = nullptr;
if (!done && s.ok()) { if (!done && s.ok()) {
if (!t) { if (t == nullptr) {
s = FindTable(env_options_, internal_comparator, fd, &handle, s = FindTable(env_options_, internal_comparator, fd, &handle,
options.read_tier == kBlockCacheTier /* no_io */, options.read_tier == kBlockCacheTier /* no_io */,
true /* record_read_stats */, file_read_hist, skip_filters, true /* record_read_stats */, file_read_hist, skip_filters,
@ -368,6 +324,19 @@ Status TableCache::Get(const ReadOptions& options,
done = true; done = true;
} }
} }
if (!done && s.ok() && get_context->range_del_agg() != nullptr &&
!options.ignore_range_deletions) {
std::unique_ptr<InternalIterator> range_del_iter(
t->NewRangeTombstoneIterator(options));
if (range_del_iter != nullptr) {
s = range_del_iter->status();
}
if (s.ok()) {
s = get_context->range_del_agg()->AddTombstones(
std::move(range_del_iter));
}
}
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
// Put the replay log in row cache only if something was found. // Put the replay log in row cache only if something was found.
if (!done && s.ok() && row_cache_entry && !row_cache_entry->empty()) { if (!done && s.ok() && row_cache_entry && !row_cache_entry->empty()) {

@ -58,16 +58,6 @@ class TableCache {
HistogramImpl* file_read_hist = nullptr, bool for_compaction = false, HistogramImpl* file_read_hist = nullptr, bool for_compaction = false,
Arena* arena = nullptr, bool skip_filters = false, int level = -1); Arena* arena = nullptr, bool skip_filters = false, int level = -1);
// Return an iterator over the range deletion meta-block for the specified
// file number.
// @param skip_filters Disables loading/accessing the filter block
// @param level The level this table is at, -1 for "not set / don't know"
InternalIterator* NewRangeDeletionIterator(const ReadOptions& options,
const InternalKeyComparator& icmp,
const FileDescriptor& fd,
HistogramImpl* file_read_hist,
bool skip_filters, int level);
// If a seek to internal key "k" in specified file finds an entry, // If a seek to internal key "k" in specified file finds an entry,
// call (*handle_result)(arg, found_key, found_value) repeatedly until // call (*handle_result)(arg, found_key, found_value) repeatedly until
// it returns false. // it returns false.

Loading…
Cancel
Save