Add a ticker stat for number of keys skipped during iteration

Summary:
This diff adds a new ticker stat, NUMBER_ITER_SKIP, to count the
number of internal keys skipped during iteration. Keys can be skipped
due to deletes, or lower sequence number, or higher sequence number
than the one requested.

Also, fix the issue when StatisticsData is naturally aligned on cacheline boundary,
padding becomes a zero size array, which the Windows compiler doesn't
like. So add a cacheline worth of padding in that case to keep it happy.
We cannot conditionally add padding as gcc doesn't allow using sizeof
in preprocessor directives.
Closes https://github.com/facebook/rocksdb/pull/3177

Differential Revision: D6353897

Pulled By: anand1976

fbshipit-source-id: 441d5a09af9c4e22e7355242dfc0c7b27aa0a6c2
main
anand1976 7 years ago committed by Facebook Github Bot
parent 578f36e431
commit d394a6bb48
  1. 13
      db/db_iter.cc
  2. 81
      db/db_iterator_test.cc
  3. 4
      include/rocksdb/statistics.h
  4. 6
      java/rocksjni/portal.h
  5. 3
      monitoring/statistics.h

@ -76,6 +76,7 @@ class DBIter final: public Iterator {
prev_count_ = 0; prev_count_ = 0;
prev_found_count_ = 0; prev_found_count_ = 0;
bytes_read_ = 0; bytes_read_ = 0;
skip_count_ = 0;
} }
void BumpGlobalStatistics(Statistics* global_statistics) { void BumpGlobalStatistics(Statistics* global_statistics) {
@ -84,6 +85,7 @@ class DBIter final: public Iterator {
RecordTick(global_statistics, NUMBER_DB_PREV, prev_count_); RecordTick(global_statistics, NUMBER_DB_PREV, prev_count_);
RecordTick(global_statistics, NUMBER_DB_PREV_FOUND, prev_found_count_); RecordTick(global_statistics, NUMBER_DB_PREV_FOUND, prev_found_count_);
RecordTick(global_statistics, ITER_BYTES_READ, bytes_read_); RecordTick(global_statistics, ITER_BYTES_READ, bytes_read_);
RecordTick(global_statistics, NUMBER_ITER_SKIP, skip_count_);
PERF_COUNTER_ADD(iter_read_bytes, bytes_read_); PERF_COUNTER_ADD(iter_read_bytes, bytes_read_);
ResetCounters(); ResetCounters();
} }
@ -98,6 +100,8 @@ class DBIter final: public Iterator {
uint64_t prev_found_count_; uint64_t prev_found_count_;
// Map to Tickers::ITER_BYTES_READ // Map to Tickers::ITER_BYTES_READ
uint64_t bytes_read_; uint64_t bytes_read_;
// Map to Tickers::NUMBER_ITER_SKIP
uint64_t skip_count_;
}; };
DBIter(Env* _env, const ReadOptions& read_options, DBIter(Env* _env, const ReadOptions& read_options,
@ -147,6 +151,7 @@ class DBIter final: public Iterator {
// Compiler warning issue filed: // Compiler warning issue filed:
// https://github.com/facebook/rocksdb/issues/3013 // https://github.com/facebook/rocksdb/issues/3013
RecordTick(statistics_, NO_ITERATORS, uint64_t(-1)); RecordTick(statistics_, NO_ITERATORS, uint64_t(-1));
ResetInternalKeysSkippedCounter();
local_stats_.BumpGlobalStatistics(statistics_); local_stats_.BumpGlobalStatistics(statistics_);
if (!arena_mode_) { if (!arena_mode_) {
delete iter_; delete iter_;
@ -267,6 +272,10 @@ class DBIter final: public Iterator {
} }
inline void ResetInternalKeysSkippedCounter() { inline void ResetInternalKeysSkippedCounter() {
local_stats_.skip_count_ += num_internal_keys_skipped_;
if (valid_) {
local_stats_.skip_count_--;
}
num_internal_keys_skipped_ = 0; num_internal_keys_skipped_ = 0;
} }
@ -1143,6 +1152,7 @@ void DBIter::Seek(const Slice& target) {
} }
if (statistics_ != nullptr) { if (statistics_ != nullptr) {
if (valid_) { if (valid_) {
// Decrement since we don't want to count this key as skipped
RecordTick(statistics_, NUMBER_DB_SEEK_FOUND); RecordTick(statistics_, NUMBER_DB_SEEK_FOUND);
RecordTick(statistics_, ITER_BYTES_READ, key().size() + value().size()); RecordTick(statistics_, ITER_BYTES_READ, key().size() + value().size());
PERF_COUNTER_ADD(iter_read_bytes, key().size() + value().size()); PERF_COUNTER_ADD(iter_read_bytes, key().size() + value().size());
@ -1269,7 +1279,8 @@ void DBIter::SeekToLast() {
if (!Valid()) { if (!Valid()) {
return; return;
} else if (user_comparator_->Equal(*iterate_upper_bound_, key())) { } else if (user_comparator_->Equal(*iterate_upper_bound_, key())) {
Prev(); ReleaseTempPinnedData();
PrevInternal();
} }
} else { } else {
PrevInternal(); PrevInternal();

@ -2076,6 +2076,87 @@ TEST_F(DBIteratorTest, TableFilter) {
} }
} }
TEST_F(DBIteratorTest, SkipStatistics) {
Options options = CurrentOptions();
options.statistics = rocksdb::CreateDBStatistics();
DestroyAndReopen(options);
int skip_count = 0;
// write a bunch of kvs to the database.
ASSERT_OK(Put("a", "1"));
ASSERT_OK(Put("b", "1"));
ASSERT_OK(Put("c", "1"));
ASSERT_OK(Flush());
ASSERT_OK(Put("d", "1"));
ASSERT_OK(Put("e", "1"));
ASSERT_OK(Put("f", "1"));
ASSERT_OK(Put("a", "2"));
ASSERT_OK(Put("b", "2"));
ASSERT_OK(Flush());
ASSERT_OK(Delete("d"));
ASSERT_OK(Delete("e"));
ASSERT_OK(Delete("f"));
Iterator* iter = db_->NewIterator(ReadOptions());
int count = 0;
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
ASSERT_OK(iter->status());
count++;
}
ASSERT_EQ(count, 3);
delete iter;
skip_count += 8; // 3 deletes + 3 original keys + 2 lower in sequence
ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
iter = db_->NewIterator(ReadOptions());
count = 0;
for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
ASSERT_OK(iter->status());
count++;
}
ASSERT_EQ(count, 3);
delete iter;
skip_count += 8; // Same as above, but in reverse order
ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
ASSERT_OK(Put("aa", "1"));
ASSERT_OK(Put("ab", "1"));
ASSERT_OK(Put("ac", "1"));
ASSERT_OK(Put("ad", "1"));
ASSERT_OK(Flush());
ASSERT_OK(Delete("ab"));
ASSERT_OK(Delete("ac"));
ASSERT_OK(Delete("ad"));
ReadOptions ro;
Slice prefix("b");
ro.iterate_upper_bound = &prefix;
iter = db_->NewIterator(ro);
count = 0;
for(iter->Seek("aa"); iter->Valid(); iter->Next()) {
ASSERT_OK(iter->status());
count++;
}
ASSERT_EQ(count, 1);
delete iter;
skip_count += 6; // 3 deletes + 3 original keys
ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
iter = db_->NewIterator(ro);
count = 0;
for(iter->SeekToLast(); iter->Valid(); iter->Prev()) {
ASSERT_OK(iter->status());
count++;
}
ASSERT_EQ(count, 2);
delete iter;
// 3 deletes + 3 original keys + 2 keys of "b" + lower sequence of "a"
skip_count += 9;
ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
}
} // namespace rocksdb } // namespace rocksdb
int main(int argc, char** argv) { int main(int argc, char** argv) {

@ -223,6 +223,9 @@ enum Tickers : uint32_t {
// Number of refill intervals where rate limiter's bytes are fully consumed. // Number of refill intervals where rate limiter's bytes are fully consumed.
NUMBER_RATE_LIMITER_DRAINS, NUMBER_RATE_LIMITER_DRAINS,
// Number of internal keys skipped by Iterator
NUMBER_ITER_SKIP,
TICKER_ENUM_MAX TICKER_ENUM_MAX
}; };
@ -328,6 +331,7 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
{READ_AMP_ESTIMATE_USEFUL_BYTES, "rocksdb.read.amp.estimate.useful.bytes"}, {READ_AMP_ESTIMATE_USEFUL_BYTES, "rocksdb.read.amp.estimate.useful.bytes"},
{READ_AMP_TOTAL_READ_BYTES, "rocksdb.read.amp.total.read.bytes"}, {READ_AMP_TOTAL_READ_BYTES, "rocksdb.read.amp.total.read.bytes"},
{NUMBER_RATE_LIMITER_DRAINS, "rocksdb.number.rate_limiter.drains"}, {NUMBER_RATE_LIMITER_DRAINS, "rocksdb.number.rate_limiter.drains"},
{NUMBER_ITER_SKIP, "rocksdb.number.iter.skip"},
}; };
/** /**

@ -2523,8 +2523,10 @@ class TickerTypeJni {
return 0x5B; return 0x5B;
case rocksdb::Tickers::NUMBER_RATE_LIMITER_DRAINS: case rocksdb::Tickers::NUMBER_RATE_LIMITER_DRAINS:
return 0x5C; return 0x5C;
case rocksdb::Tickers::TICKER_ENUM_MAX: case rocksdb::Tickers::NUMBER_ITER_SKIP:
return 0x5D; return 0x5D;
case rocksdb::Tickers::TICKER_ENUM_MAX:
return 0x5E;
default: default:
// undefined/default // undefined/default
@ -2723,6 +2725,8 @@ class TickerTypeJni {
case 0x5C: case 0x5C:
return rocksdb::Tickers::NUMBER_RATE_LIMITER_DRAINS; return rocksdb::Tickers::NUMBER_RATE_LIMITER_DRAINS;
case 0x5D: case 0x5D:
return rocksdb::Tickers::NUMBER_ITER_SKIP;
case 0x5E:
return rocksdb::Tickers::TICKER_ENUM_MAX; return rocksdb::Tickers::TICKER_ENUM_MAX;
default: default:

@ -76,8 +76,7 @@ class StatisticsImpl : public Statistics {
padding[(CACHE_LINE_SIZE - padding[(CACHE_LINE_SIZE -
(INTERNAL_TICKER_ENUM_MAX * sizeof(std::atomic_uint_fast64_t) + (INTERNAL_TICKER_ENUM_MAX * sizeof(std::atomic_uint_fast64_t) +
INTERNAL_HISTOGRAM_ENUM_MAX * sizeof(HistogramImpl)) % INTERNAL_HISTOGRAM_ENUM_MAX * sizeof(HistogramImpl)) %
CACHE_LINE_SIZE) % CACHE_LINE_SIZE)] ROCKSDB_FIELD_UNUSED;
CACHE_LINE_SIZE] ROCKSDB_FIELD_UNUSED;
}; };
static_assert(sizeof(StatisticsData) % 64 == 0, "Expected 64-byte aligned"); static_assert(sizeof(StatisticsData) % 64 == 0, "Expected 64-byte aligned");

Loading…
Cancel
Save