Add experimental PerfContext counters for db iterator Prev/Next/Seek* APIs (#11320)

Summary:
**Context/Summary:**
Motived by user need of investigating db iterator behavior during an interval of any time length of a certain thread, we decide to collect and expose related counters in `PerfContext` as an experimental feature, in addition to the existing db-scope ones (i.e, tickers)

Pull Request resolved: https://github.com/facebook/rocksdb/pull/11320

Test Plan:
- new UT
- db bench

Setup
```
./db_bench -db=/dev/shm/testdb/ -benchmarks="fillseq" -key_size=32 -value_size=512 -num=1000000 -compression_type=none -bloom_bits=3
```
Test till converges
```
./db_bench -seed=1679526311157283 -use_existing_db=1 -perf_level=2 -db=/dev/shm/testdb/ -benchmarks="seekrandom[-X60]"
```
pre-change
`seekrandom [AVG 33 runs] : 7545 (± 100) ops/sec`
post-change (no regression)
`seekrandom [AVG 33 runs] : 7688 (± 67) ops/sec`

Reviewed By: cbi42

Differential Revision: D44321931

Pulled By: hx235

fbshipit-source-id: f98a254ba3e3ced95eb5928884e33f1b99dca401
oxigraph-8.3.2
Hui Xiao 2 years ago committed by Facebook GitHub Bot
parent 601320164b
commit c14eb134ed
  1. 3
      HISTORY.md
  2. 6
      db/db_iter.cc
  3. 55
      db/db_iterator_test.cc
  4. 7
      include/rocksdb/perf_context.h
  5. 15
      monitoring/perf_context.cc

@ -7,6 +7,9 @@
### Behavior changes
* For level compaction with `level_compaction_dynamic_level_bytes=true`, RocksDB now trivially moves levels down to fill LSM starting from bottommost level during DB open. See more in comments for option `level_compaction_dynamic_level_bytes`.
### New Features
* Add experimental `PerfContext` counters `iter_{next|prev|seek}_count` for db iterator, each counting the times of corresponding API being called.
## 8.1.0 (03/18/2023)
### Behavior changes
* Compaction output file cutting logic now considers range tombstone start keys. For example, SST partitioner now may receive ParitionRequest for range tombstone start keys.

@ -131,6 +131,7 @@ void DBIter::Next() {
assert(valid_);
assert(status_.ok());
PERF_COUNTER_ADD(iter_next_count, 1);
PERF_CPU_TIMER_GUARD(iter_next_cpu_nanos, clock_);
// Release temporarily pinned blocks from last operation
ReleaseTempPinnedData();
@ -637,6 +638,7 @@ void DBIter::Prev() {
assert(valid_);
assert(status_.ok());
PERF_COUNTER_ADD(iter_prev_count, 1);
PERF_CPU_TIMER_GUARD(iter_prev_cpu_nanos, clock_);
ReleaseTempPinnedData();
ResetBlobValue();
@ -1444,6 +1446,7 @@ void DBIter::SetSavedKeyToSeekForPrevTarget(const Slice& target) {
}
void DBIter::Seek(const Slice& target) {
PERF_COUNTER_ADD(iter_seek_count, 1);
PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, clock_);
StopWatch sw(clock_, statistics_, DB_SEEK);
@ -1517,6 +1520,7 @@ void DBIter::Seek(const Slice& target) {
}
void DBIter::SeekForPrev(const Slice& target) {
PERF_COUNTER_ADD(iter_seek_count, 1);
PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, clock_);
StopWatch sw(clock_, statistics_, DB_SEEK);
@ -1590,6 +1594,7 @@ void DBIter::SeekToFirst() {
Seek(*iterate_lower_bound_);
return;
}
PERF_COUNTER_ADD(iter_seek_count, 1);
PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, clock_);
// Don't use iter_::Seek() if we set a prefix extractor
// because prefix seek will be used.
@ -1652,6 +1657,7 @@ void DBIter::SeekToLast() {
return;
}
PERF_COUNTER_ADD(iter_seek_count, 1);
PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, clock_);
// Don't use iter_::Seek() if we set a prefix extractor
// because prefix seek will be used.

@ -30,12 +30,63 @@ class DummyReadCallback : public ReadCallback {
void SetSnapshot(SequenceNumber seq) { max_visible_seq_ = seq; }
};
class DBIteratorBaseTest : public DBTestBase {
public:
DBIteratorBaseTest()
: DBTestBase("db_iterator_test", /*env_do_fsync=*/true) {}
};
TEST_F(DBIteratorBaseTest, APICallsWithPerfContext) {
// Set up the DB
Options options = CurrentOptions();
DestroyAndReopen(options);
Random rnd(301);
for (int i = 1; i <= 3; i++) {
ASSERT_OK(Put(std::to_string(i), std::to_string(i)));
}
// Setup iterator and PerfContext
Iterator* iter = db_->NewIterator(ReadOptions());
std::string key_str = std::to_string(2);
Slice key(key_str);
SetPerfLevel(kEnableCount);
get_perf_context()->Reset();
// Initial PerfContext counters
ASSERT_EQ(0, get_perf_context()->iter_seek_count);
ASSERT_EQ(0, get_perf_context()->iter_next_count);
ASSERT_EQ(0, get_perf_context()->iter_prev_count);
// Test Seek-related API calls PerfContext counter
iter->Seek(key);
iter->SeekToFirst();
iter->SeekToLast();
iter->SeekForPrev(key);
ASSERT_EQ(4, get_perf_context()->iter_seek_count);
ASSERT_EQ(0, get_perf_context()->iter_next_count);
ASSERT_EQ(0, get_perf_context()->iter_prev_count);
// Test Next() calls PerfContext counter
iter->Next();
ASSERT_EQ(4, get_perf_context()->iter_seek_count);
ASSERT_EQ(1, get_perf_context()->iter_next_count);
ASSERT_EQ(0, get_perf_context()->iter_prev_count);
// Test Prev() calls PerfContext counter
iter->Prev();
ASSERT_EQ(4, get_perf_context()->iter_seek_count);
ASSERT_EQ(1, get_perf_context()->iter_next_count);
ASSERT_EQ(1, get_perf_context()->iter_prev_count);
delete iter;
}
// Test param:
// bool: whether to pass read_callback to NewIterator().
class DBIteratorTest : public DBTestBase,
class DBIteratorTest : public DBIteratorBaseTest,
public testing::WithParamInterface<bool> {
public:
DBIteratorTest() : DBTestBase("db_iterator_test", /*env_do_fsync=*/true) {}
DBIteratorTest() {}
Iterator* NewIterator(const ReadOptions& read_options,
ColumnFamilyHandle* column_family = nullptr) {

@ -254,6 +254,13 @@ struct PerfContext {
uint64_t iter_prev_cpu_nanos;
uint64_t iter_seek_cpu_nanos;
// EXPERIMENTAL
// Total number of db iterator's Next(), Prev(), Seek-related APIs being
// called
uint64_t iter_next_count;
uint64_t iter_prev_count;
uint64_t iter_seek_count;
// Time spent in encrypting data. Populated when EncryptedEnv is used.
uint64_t encrypt_data_nanos;
// Time spent in decrypting data. Populated when EncryptedEnv is used.

@ -134,6 +134,9 @@ PerfContext::PerfContext(const PerfContext& other) {
iter_next_cpu_nanos = other.iter_next_cpu_nanos;
iter_prev_cpu_nanos = other.iter_prev_cpu_nanos;
iter_seek_cpu_nanos = other.iter_seek_cpu_nanos;
iter_next_count = other.iter_next_count;
iter_prev_count = other.iter_prev_count;
iter_seek_count = other.iter_seek_count;
number_async_seek = other.number_async_seek;
if (per_level_perf_context_enabled && level_to_perf_context != nullptr) {
ClearPerLevelPerfContext();
@ -254,6 +257,9 @@ PerfContext::PerfContext(PerfContext&& other) noexcept {
iter_next_cpu_nanos = other.iter_next_cpu_nanos;
iter_prev_cpu_nanos = other.iter_prev_cpu_nanos;
iter_seek_cpu_nanos = other.iter_seek_cpu_nanos;
iter_next_count = other.iter_next_count;
iter_prev_count = other.iter_prev_count;
iter_seek_count = other.iter_seek_count;
number_async_seek = other.number_async_seek;
if (per_level_perf_context_enabled && level_to_perf_context != nullptr) {
ClearPerLevelPerfContext();
@ -376,6 +382,9 @@ PerfContext& PerfContext::operator=(const PerfContext& other) {
iter_next_cpu_nanos = other.iter_next_cpu_nanos;
iter_prev_cpu_nanos = other.iter_prev_cpu_nanos;
iter_seek_cpu_nanos = other.iter_seek_cpu_nanos;
iter_next_count = other.iter_next_count;
iter_prev_count = other.iter_prev_count;
iter_seek_count = other.iter_seek_count;
number_async_seek = other.number_async_seek;
if (per_level_perf_context_enabled && level_to_perf_context != nullptr) {
ClearPerLevelPerfContext();
@ -488,6 +497,9 @@ void PerfContext::Reset() {
iter_next_cpu_nanos = 0;
iter_prev_cpu_nanos = 0;
iter_seek_cpu_nanos = 0;
iter_next_count = 0;
iter_prev_count = 0;
iter_seek_count = 0;
number_async_seek = 0;
if (per_level_perf_context_enabled && level_to_perf_context) {
for (auto& kv : *level_to_perf_context) {
@ -621,6 +633,9 @@ std::string PerfContext::ToString(bool exclude_zero_counters) const {
PERF_CONTEXT_OUTPUT(iter_next_cpu_nanos);
PERF_CONTEXT_OUTPUT(iter_prev_cpu_nanos);
PERF_CONTEXT_OUTPUT(iter_seek_cpu_nanos);
PERF_CONTEXT_OUTPUT(iter_next_count);
PERF_CONTEXT_OUTPUT(iter_prev_count);
PERF_CONTEXT_OUTPUT(iter_seek_count);
PERF_CONTEXT_OUTPUT(number_async_seek);
PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(bloom_filter_useful);
PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(bloom_filter_full_positive);

Loading…
Cancel
Save