From 01e6badbb6723158af3a951d681121f1c74aebd2 Mon Sep 17 00:00:00 2001 From: Shi Feng Date: Tue, 26 Mar 2019 16:20:52 -0700 Subject: [PATCH] Introduce CPU timers for iterator seek and next (#5076) Summary: Introduce CPU timers for iterator seek and next operations. Seek counter includes SeekToFirst, SeekToLast and SeekForPrev, w/ the caveat that SeekToLast timer doesn't include some post processing time if upper bound is defined. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5076 Differential Revision: D14525218 Pulled By: fredfsh fbshipit-source-id: 03ba25df3b22b06c072621e4de0eacfa1445f0d9 --- db/db_iter.cc | 7 ++ db/db_test2.cc | 87 ++++++++++++++++++++++- db/perf_context_test.cc | 125 +++++++++++++++++++++++++++++++++ include/rocksdb/perf_context.h | 3 + monitoring/perf_context.cc | 15 ++++ 5 files changed, 236 insertions(+), 1 deletion(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index d71e78a08..7d91ef027 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -374,6 +374,7 @@ void DBIter::Next() { assert(valid_); assert(status_.ok()); + PERF_CPU_TIMER_GUARD(iter_next_cpu_nanos, env_); // Release temporarily pinned blocks from last operation ReleaseTempPinnedData(); ResetInternalKeysSkippedCounter(); @@ -731,6 +732,8 @@ bool DBIter::MergeValuesNewToOld() { void DBIter::Prev() { assert(valid_); assert(status_.ok()); + + PERF_CPU_TIMER_GUARD(iter_prev_cpu_nanos, env_); ReleaseTempPinnedData(); ResetInternalKeysSkippedCounter(); bool ok = true; @@ -1261,6 +1264,7 @@ SequenceNumber DBIter::MaxVisibleSequenceNumber() { } void DBIter::Seek(const Slice& target) { + PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, env_); StopWatch sw(env_, statistics_, DB_SEEK); status_ = Status::OK(); ReleaseTempPinnedData(); @@ -1318,6 +1322,7 @@ void DBIter::Seek(const Slice& target) { } void DBIter::SeekForPrev(const Slice& target) { + PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, env_); StopWatch sw(env_, statistics_, DB_SEEK); status_ = Status::OK(); ReleaseTempPinnedData(); @@ -1378,6 +1383,7 @@ void DBIter::SeekToFirst() { Seek(*iterate_lower_bound_); return; } + PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, env_); // Don't use iter_::Seek() if we set a prefix extractor // because prefix seek will be used. if (prefix_extractor_ != nullptr && !total_order_seek_) { @@ -1429,6 +1435,7 @@ void DBIter::SeekToLast() { return; } + PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, env_); // Don't use iter_::Seek() if we set a prefix extractor // because prefix seek will be used. if (prefix_extractor_ != nullptr && !total_order_seek_) { diff --git a/db/db_test2.cc b/db/db_test2.cc index 7cec8e66b..b62a1d577 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -1805,7 +1805,7 @@ class MockPersistentCache : public PersistentCache { #ifdef OS_LINUX // Make sure that in CPU time perf context counters, Env::NowCPUNanos() // is used, rather than Env::CPUNanos(); -TEST_F(DBTest2, TestPerfContextCpuTime) { +TEST_F(DBTest2, TestPerfContextGetCpuTime) { // force resizing table cache so table handle is not preloaded so that // we can measure find_table_nanos during Get(). dbfull()->TEST_table_cache()->SetCapacity(0); @@ -1836,6 +1836,91 @@ TEST_F(DBTest2, TestPerfContextCpuTime) { SetPerfLevel(PerfLevel::kDisable); rocksdb::SyncPoint::GetInstance()->DisableProcessing(); } + +TEST_F(DBTest2, TestPerfContextIterCpuTime) { + DestroyAndReopen(CurrentOptions()); + // force resizing table cache so table handle is not preloaded so that + // we can measure find_table_nanos during iteration + dbfull()->TEST_table_cache()->SetCapacity(0); + + const size_t kNumEntries = 10; + for (size_t i = 0; i < kNumEntries; ++i) { + ASSERT_OK(Put("k" + ToString(i), "v" + ToString(i))); + } + ASSERT_OK(Flush()); + for (size_t i = 0; i < kNumEntries; ++i) { + ASSERT_EQ("v" + ToString(i), Get("k" + ToString(i))); + } + std::string last_key = "k" + ToString(kNumEntries - 1); + std::string last_value = "v" + ToString(kNumEntries - 1); + env_->now_cpu_count_.store(0); + + // CPU timing is not enabled with kEnableTimeExceptForMutex + SetPerfLevel(PerfLevel::kEnableTimeExceptForMutex); + Iterator* iter = db_->NewIterator(ReadOptions()); + iter->Seek("k0"); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("v0", iter->value().ToString()); + iter->SeekForPrev(last_key); + ASSERT_TRUE(iter->Valid()); + iter->SeekToLast(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(last_value, iter->value().ToString()); + iter->SeekToFirst(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("v0", iter->value().ToString()); + ASSERT_EQ(0, get_perf_context()->iter_seek_cpu_nanos); + iter->Next(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("v1", iter->value().ToString()); + ASSERT_EQ(0, get_perf_context()->iter_next_cpu_nanos); + iter->Prev(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("v0", iter->value().ToString()); + ASSERT_EQ(0, get_perf_context()->iter_prev_cpu_nanos); + ASSERT_EQ(0, env_->now_cpu_count_.load()); + delete iter; + + uint64_t kDummyAddonTime = uint64_t{1000000000000}; + + // Add time to NowNanos() reading. + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "TableCache::FindTable:0", + [&](void* /*arg*/) { env_->addon_time_.fetch_add(kDummyAddonTime); }); + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + + SetPerfLevel(PerfLevel::kEnableTimeAndCPUTimeExceptForMutex); + iter = db_->NewIterator(ReadOptions()); + iter->Seek("k0"); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("v0", iter->value().ToString()); + iter->SeekForPrev(last_key); + ASSERT_TRUE(iter->Valid()); + iter->SeekToLast(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(last_value, iter->value().ToString()); + iter->SeekToFirst(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("v0", iter->value().ToString()); + ASSERT_GT(get_perf_context()->iter_seek_cpu_nanos, 0); + ASSERT_LT(get_perf_context()->iter_seek_cpu_nanos, kDummyAddonTime); + iter->Next(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("v1", iter->value().ToString()); + ASSERT_GT(get_perf_context()->iter_next_cpu_nanos, 0); + ASSERT_LT(get_perf_context()->iter_next_cpu_nanos, kDummyAddonTime); + iter->Prev(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("v0", iter->value().ToString()); + ASSERT_GT(get_perf_context()->iter_prev_cpu_nanos, 0); + ASSERT_LT(get_perf_context()->iter_prev_cpu_nanos, kDummyAddonTime); + ASSERT_GE(env_->now_cpu_count_.load(), 12); + ASSERT_GT(get_perf_context()->find_table_nanos, kDummyAddonTime); + + SetPerfLevel(PerfLevel::kDisable); + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); + delete iter; +} #endif // OS_LINUX #ifndef OS_SOLARIS // GetUniqueIdFromFile is not implemented diff --git a/db/perf_context_test.cc b/db/perf_context_test.cc index 4500b810a..1e3830636 100644 --- a/db/perf_context_test.cc +++ b/db/perf_context_test.cc @@ -813,6 +813,131 @@ TEST_F(PerfContextTest, PerfContextByLevelGetSet) { ASSERT_NE(std::string::npos, zero_excluded.find("block_cache_miss_count = 4@level1, 2@level3")); } + +TEST_F(PerfContextTest, CPUTimer) { + DestroyDB(kDbName, Options()); + auto db = OpenDb(); + WriteOptions write_options; + ReadOptions read_options; + SetPerfLevel(PerfLevel::kEnableTimeAndCPUTimeExceptForMutex); + + std::string max_str = "0"; + for (int i = 0; i < FLAGS_total_keys; ++i) { + std::string i_str = ToString(i); + std::string key = "k" + i_str; + std::string value = "v" + i_str; + max_str = max_str > i_str ? max_str : i_str; + + db->Put(write_options, key, value); + } + std::string last_key = "k" + max_str; + std::string last_value = "v" + max_str; + + { + // Get + get_perf_context()->Reset(); + std::string value; + ASSERT_OK(db->Get(read_options, "k0", &value)); + ASSERT_EQ(value, "v0"); + + if (FLAGS_verbose) { + std::cout << "Get CPU time nanos: " + << get_perf_context()->get_cpu_nanos << "ns\n"; + } + + // Iter + std::unique_ptr iter(db->NewIterator(read_options)); + + // Seek + get_perf_context()->Reset(); + iter->Seek(last_key); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(last_value, iter->value().ToString()); + + if (FLAGS_verbose) { + std::cout << "Iter Seek CPU time nanos: " + << get_perf_context()->iter_seek_cpu_nanos << "ns\n"; + } + + // SeekForPrev + get_perf_context()->Reset(); + iter->SeekForPrev(last_key); + ASSERT_TRUE(iter->Valid()); + + if (FLAGS_verbose) { + std::cout << "Iter SeekForPrev CPU time nanos: " + << get_perf_context()->iter_seek_cpu_nanos << "ns\n"; + } + + // SeekToLast + get_perf_context()->Reset(); + iter->SeekToLast(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(last_value, iter->value().ToString()); + + if (FLAGS_verbose) { + std::cout << "Iter SeekToLast CPU time nanos: " + << get_perf_context()->iter_seek_cpu_nanos << "ns\n"; + } + + // SeekToFirst + get_perf_context()->Reset(); + iter->SeekToFirst(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("v0", iter->value().ToString()); + + if (FLAGS_verbose) { + std::cout << "Iter SeekToFirst CPU time nanos: " + << get_perf_context()->iter_seek_cpu_nanos << "ns\n"; + } + + // Next + get_perf_context()->Reset(); + iter->Next(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("v1", iter->value().ToString()); + + if (FLAGS_verbose) { + std::cout << "Iter Next CPU time nanos: " + << get_perf_context()->iter_next_cpu_nanos << "ns\n"; + } + + // Prev + get_perf_context()->Reset(); + iter->Prev(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("v0", iter->value().ToString()); + + if (FLAGS_verbose) { + std::cout << "Iter Prev CPU time nanos: " + << get_perf_context()->iter_prev_cpu_nanos << "ns\n"; + } + + // monotonically increasing + get_perf_context()->Reset(); + auto count = get_perf_context()->iter_seek_cpu_nanos; + for (int i = 0; i < FLAGS_total_keys; ++i) { + iter->Seek("k" + ToString(i)); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("v" + ToString(i), iter->value().ToString()); + auto next_count = get_perf_context()->iter_seek_cpu_nanos; + ASSERT_GT(next_count, count); + count = next_count; + } + + // iterator creation/destruction; multiple iterators + { + std::unique_ptr iter2(db->NewIterator(read_options)); + ASSERT_EQ(count, get_perf_context()->iter_seek_cpu_nanos); + iter2->Seek(last_key); + ASSERT_TRUE(iter2->Valid()); + ASSERT_EQ(last_value, iter2->value().ToString()); + ASSERT_GT(get_perf_context()->iter_seek_cpu_nanos, count); + count = get_perf_context()->iter_seek_cpu_nanos; + } + ASSERT_EQ(count, get_perf_context()->iter_seek_cpu_nanos); + } +} } // namespace rocksdb int main(int argc, char** argv) { diff --git a/include/rocksdb/perf_context.h b/include/rocksdb/perf_context.h index 4e299b373..b6123e845 100644 --- a/include/rocksdb/perf_context.h +++ b/include/rocksdb/perf_context.h @@ -218,6 +218,9 @@ struct PerfContext { uint64_t env_new_logger_nanos; uint64_t get_cpu_nanos; + uint64_t iter_next_cpu_nanos; + uint64_t iter_prev_cpu_nanos; + uint64_t iter_seek_cpu_nanos; std::map* level_to_perf_context = nullptr; bool per_level_perf_context_enabled = false; diff --git a/monitoring/perf_context.cc b/monitoring/perf_context.cc index 90199c3ca..40b0b215c 100644 --- a/monitoring/perf_context.cc +++ b/monitoring/perf_context.cc @@ -118,6 +118,9 @@ PerfContext::PerfContext(const PerfContext& other) { env_unlock_file_nanos = other.env_unlock_file_nanos; env_new_logger_nanos = other.env_new_logger_nanos; get_cpu_nanos = other.get_cpu_nanos; + iter_next_cpu_nanos = other.iter_next_cpu_nanos; + iter_prev_cpu_nanos = other.iter_prev_cpu_nanos; + iter_seek_cpu_nanos = other.iter_seek_cpu_nanos; if (per_level_perf_context_enabled && level_to_perf_context != nullptr) { ClearPerLevelPerfContext(); } @@ -210,6 +213,9 @@ PerfContext::PerfContext(PerfContext&& other) noexcept { env_unlock_file_nanos = other.env_unlock_file_nanos; env_new_logger_nanos = other.env_new_logger_nanos; get_cpu_nanos = other.get_cpu_nanos; + iter_next_cpu_nanos = other.iter_next_cpu_nanos; + iter_prev_cpu_nanos = other.iter_prev_cpu_nanos; + iter_seek_cpu_nanos = other.iter_seek_cpu_nanos; if (per_level_perf_context_enabled && level_to_perf_context != nullptr) { ClearPerLevelPerfContext(); } @@ -304,6 +310,9 @@ PerfContext& PerfContext::operator=(const PerfContext& other) { env_unlock_file_nanos = other.env_unlock_file_nanos; env_new_logger_nanos = other.env_new_logger_nanos; get_cpu_nanos = other.get_cpu_nanos; + iter_next_cpu_nanos = other.iter_next_cpu_nanos; + iter_prev_cpu_nanos = other.iter_prev_cpu_nanos; + iter_seek_cpu_nanos = other.iter_seek_cpu_nanos; if (per_level_perf_context_enabled && level_to_perf_context != nullptr) { ClearPerLevelPerfContext(); } @@ -395,6 +404,9 @@ void PerfContext::Reset() { env_unlock_file_nanos = 0; env_new_logger_nanos = 0; get_cpu_nanos = 0; + iter_next_cpu_nanos = 0; + iter_prev_cpu_nanos = 0; + iter_seek_cpu_nanos = 0; if (per_level_perf_context_enabled && level_to_perf_context) { for (auto& kv : *level_to_perf_context) { kv.second.Reset(); @@ -509,6 +521,9 @@ std::string PerfContext::ToString(bool exclude_zero_counters) const { PERF_CONTEXT_OUTPUT(env_unlock_file_nanos); PERF_CONTEXT_OUTPUT(env_new_logger_nanos); PERF_CONTEXT_OUTPUT(get_cpu_nanos); + PERF_CONTEXT_OUTPUT(iter_next_cpu_nanos); + PERF_CONTEXT_OUTPUT(iter_prev_cpu_nanos); + PERF_CONTEXT_OUTPUT(iter_seek_cpu_nanos); PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(bloom_filter_useful); PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(bloom_filter_full_positive); PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(bloom_filter_full_true_positive);