Introduce a CPU time counter in perf_context (#4741)

Summary:
Introduce the first CPU timing counter, perf_context.get_cpu_nanos. This opens a door to more CPU counters in the future.
Only Posix Env has it implemented using clock_gettime() with CLOCK_THREAD_CPUTIME_ID. How accurate the counter is depends on the platform.
Make PerfStepTimer to take an Env as an argument, and sometimes pass it in. The direct reason is to make the unit tests to use SpecialEnv where we can ingest logic there. But in long term, this is a good change.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4741

Differential Revision: D13287798

Pulled By: siying

fbshipit-source-id: 090361049d9d5095d1d1a369fe1338d2e2e1c73f
main
Siying Dong 6 years ago committed by Facebook Github Bot
parent 02bfc5831e
commit da1c64b6e7
  1. 2
      db/db_impl.cc
  2. 34
      db/db_test2.cc
  3. 7
      db/db_test_util.h
  4. 2
      db/table_cache.cc
  5. 9
      env/env_posix.cc
  6. 3
      include/rocksdb/env.h
  7. 3
      include/rocksdb/perf_context.h
  8. 7
      include/rocksdb/perf_level.h
  9. 2
      monitoring/perf_context.cc
  10. 25
      monitoring/perf_context_imp.h
  11. 32
      monitoring/perf_step_timer.h

@ -1216,6 +1216,7 @@ Status DBImpl::GetImpl(const ReadOptions& read_options,
PinnableSlice* pinnable_val, bool* value_found, PinnableSlice* pinnable_val, bool* value_found,
ReadCallback* callback, bool* is_blob_index) { ReadCallback* callback, bool* is_blob_index) {
assert(pinnable_val != nullptr); assert(pinnable_val != nullptr);
PERF_CPU_TIMER_GUARD(get_cpu_nanos, env_);
StopWatch sw(env_, stats_, DB_GET); StopWatch sw(env_, stats_, DB_GET);
PERF_TIMER_GUARD(get_snapshot_time); PERF_TIMER_GUARD(get_snapshot_time);
@ -1330,6 +1331,7 @@ std::vector<Status> DBImpl::MultiGet(
const ReadOptions& read_options, const ReadOptions& read_options,
const std::vector<ColumnFamilyHandle*>& column_family, const std::vector<ColumnFamilyHandle*>& column_family,
const std::vector<Slice>& keys, std::vector<std::string>* values) { const std::vector<Slice>& keys, std::vector<std::string>* values) {
PERF_CPU_TIMER_GUARD(get_cpu_nanos, env_);
StopWatch sw(env_, stats_, DB_MULTIGET); StopWatch sw(env_, stats_, DB_MULTIGET);
PERF_TIMER_GUARD(get_snapshot_time); PERF_TIMER_GUARD(get_snapshot_time);

@ -1646,6 +1646,40 @@ class MockPersistentCache : public PersistentCache {
const size_t max_size_ = 10 * 1024; // 10KiB const size_t max_size_ = 10 * 1024; // 10KiB
}; };
#ifdef OS_LINUX
// Make sure that in CPU time perf context counters, Env::NowCPUNanos()
// is used, rather than Env::CPUNanos();
TEST_F(DBTest2, TestPerfContextCpuTime) {
ASSERT_OK(Put("foo", "bar"));
ASSERT_OK(Flush());
env_->now_cpu_count_.store(0);
// CPU timing is not enabled with kEnableTimeExceptForMutex
SetPerfLevel(PerfLevel::kEnableTimeExceptForMutex);
ASSERT_EQ("bar", Get("foo"));
ASSERT_EQ(0, get_perf_context()->get_cpu_nanos);
ASSERT_EQ(0, env_->now_cpu_count_.load());
uint64_t kDummyAddonTime = uint64_t{1000000000000};
// Add time to NowNanos() reading.
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"TableCache::FindTable:0",
[&](void* /*arg*/) { env_->addon_time_.fetch_add(kDummyAddonTime); });
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
SetPerfLevel(PerfLevel::kEnableTimeAndCPUTimeExceptForMutex);
ASSERT_EQ("bar", Get("foo"));
ASSERT_EQ(env_->now_cpu_count_.load(), 2);
ASSERT_LT(get_perf_context()->get_cpu_nanos, kDummyAddonTime);
ASSERT_GT(get_perf_context()->find_table_nanos, kDummyAddonTime);
SetPerfLevel(PerfLevel::kDisable);
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
}
#endif // OS_LINUX
#ifndef OS_SOLARIS // GetUniqueIdFromFile is not implemented #ifndef OS_SOLARIS // GetUniqueIdFromFile is not implemented
TEST_F(DBTest2, PersistentCache) { TEST_F(DBTest2, PersistentCache) {
int num_iter = 80; int num_iter = 80;

@ -503,6 +503,11 @@ class SpecialEnv : public EnvWrapper {
return s; return s;
} }
virtual uint64_t NowCPUNanos() override {
now_cpu_count_.fetch_add(1);
return target()->NowCPUNanos();
}
virtual uint64_t NowNanos() override { virtual uint64_t NowNanos() override {
return (time_elapse_only_sleep_ ? 0 : target()->NowNanos()) + return (time_elapse_only_sleep_ ? 0 : target()->NowNanos()) +
addon_time_.load() * 1000; addon_time_.load() * 1000;
@ -572,6 +577,8 @@ class SpecialEnv : public EnvWrapper {
std::atomic<int64_t> addon_time_; std::atomic<int64_t> addon_time_;
std::atomic<int> now_cpu_count_;
std::atomic<int> delete_count_; std::atomic<int> delete_count_;
std::atomic<bool> time_elapse_only_sleep_; std::atomic<bool> time_elapse_only_sleep_;

@ -147,7 +147,7 @@ Status TableCache::FindTable(const EnvOptions& env_options,
HistogramImpl* file_read_hist, bool skip_filters, HistogramImpl* file_read_hist, bool skip_filters,
int level, int level,
bool prefetch_index_and_filter_in_cache) { bool prefetch_index_and_filter_in_cache) {
PERF_TIMER_GUARD(find_table_nanos); PERF_TIMER_GUARD_WITH_ENV(find_table_nanos, ioptions_.env);
Status s; Status s;
uint64_t number = fd.GetNumber(); uint64_t number = fd.GetNumber();
Slice key = GetSliceForFileNumber(&number); Slice key = GetSliceForFileNumber(&number);

9
env/env_posix.cc vendored

@ -843,6 +843,15 @@ class PosixEnv : public Env {
#endif #endif
} }
virtual uint64_t NowCPUNanos() override {
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_AIX)
struct timespec ts;
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts);
return static_cast<uint64_t>(ts.tv_sec) * 1000000000 + ts.tv_nsec;
#endif
return 0;
}
virtual void SleepForMicroseconds(int micros) override { usleep(micros); } virtual void SleepForMicroseconds(int micros) override { usleep(micros); }
virtual Status GetHostName(char* name, uint64_t len) override { virtual Status GetHostName(char* name, uint64_t len) override {

@ -384,6 +384,9 @@ class Env {
return NowMicros() * 1000; return NowMicros() * 1000;
} }
// 0 indicates not supported.
virtual uint64_t NowCPUNanos() { return 0; }
// Sleep/delay the thread for the prescribed number of micro-seconds. // Sleep/delay the thread for the prescribed number of micro-seconds.
virtual void SleepForMicroseconds(int micros) = 0; virtual void SleepForMicroseconds(int micros) = 0;

@ -205,6 +205,9 @@ struct PerfContext {
uint64_t env_lock_file_nanos; uint64_t env_lock_file_nanos;
uint64_t env_unlock_file_nanos; uint64_t env_unlock_file_nanos;
uint64_t env_new_logger_nanos; uint64_t env_new_logger_nanos;
uint64_t get_cpu_nanos;
std::map<uint32_t, PerfContextByLevel>* level_to_perf_context; std::map<uint32_t, PerfContextByLevel>* level_to_perf_context;
bool per_level_perf_context_enabled; bool per_level_perf_context_enabled;
}; };

@ -17,8 +17,11 @@ enum PerfLevel : unsigned char {
kEnableCount = 2, // enable only count stats kEnableCount = 2, // enable only count stats
kEnableTimeExceptForMutex = 3, // Other than count stats, also enable time kEnableTimeExceptForMutex = 3, // Other than count stats, also enable time
// stats except for mutexes // stats except for mutexes
kEnableTime = 4, // enable count and time stats // Other than time, also measure CPU time counters. Still don't measure
kOutOfBounds = 5 // N.B. Must always be the last value! // time (neither wall time nor CPU time) for mutexes.
kEnableTimeAndCPUTimeExceptForMutex = 4,
kEnableTime = 5, // enable count and time stats
kOutOfBounds = 6 // N.B. Must always be the last value!
}; };
// set the perf stats level for current thread // set the perf stats level for current thread

@ -114,6 +114,7 @@ void PerfContext::Reset() {
env_lock_file_nanos = 0; env_lock_file_nanos = 0;
env_unlock_file_nanos = 0; env_unlock_file_nanos = 0;
env_new_logger_nanos = 0; env_new_logger_nanos = 0;
get_cpu_nanos = 0;
if (per_level_perf_context_enabled && level_to_perf_context) { if (per_level_perf_context_enabled && level_to_perf_context) {
for (auto& kv : *level_to_perf_context) { for (auto& kv : *level_to_perf_context) {
kv.second.Reset(); kv.second.Reset();
@ -224,6 +225,7 @@ std::string PerfContext::ToString(bool exclude_zero_counters) const {
PERF_CONTEXT_OUTPUT(env_lock_file_nanos); PERF_CONTEXT_OUTPUT(env_lock_file_nanos);
PERF_CONTEXT_OUTPUT(env_unlock_file_nanos); PERF_CONTEXT_OUTPUT(env_unlock_file_nanos);
PERF_CONTEXT_OUTPUT(env_new_logger_nanos); PERF_CONTEXT_OUTPUT(env_new_logger_nanos);
PERF_CONTEXT_OUTPUT(get_cpu_nanos);
PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(bloom_filter_useful); PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(bloom_filter_useful);
PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(bloom_filter_full_positive); PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(bloom_filter_full_positive);
PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(bloom_filter_full_true_positive); PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(bloom_filter_full_true_positive);

@ -41,12 +41,25 @@ extern thread_local PerfContext perf_context;
PerfStepTimer perf_step_timer_##metric(&(perf_context.metric)); \ PerfStepTimer perf_step_timer_##metric(&(perf_context.metric)); \
perf_step_timer_##metric.Start(); perf_step_timer_##metric.Start();
#define PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(metric, condition, stats, \ // Declare and set start time of the timer
ticker_type) \ #define PERF_TIMER_GUARD_WITH_ENV(metric, env) \
PerfStepTimer perf_step_timer_##metric(&(perf_context.metric), true, stats, \ PerfStepTimer perf_step_timer_##metric(&(perf_context.metric), env); \
ticker_type); \ perf_step_timer_##metric.Start();
if (condition) { \
perf_step_timer_##metric.Start(); \ // Declare and set start time of the timer
#define PERF_CPU_TIMER_GUARD(metric, env) \
PerfStepTimer perf_step_timer_##metric( \
&(perf_context.metric), env, true, \
PerfLevel::kEnableTimeAndCPUTimeExceptForMutex); \
perf_step_timer_##metric.Start();
#define PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(metric, condition, stats, \
ticker_type) \
PerfStepTimer perf_step_timer_##metric(&(perf_context.metric), nullptr, \
false, PerfLevel::kEnableTime, stats, \
ticker_type); \
if (condition) { \
perf_step_timer_##metric.Start(); \
} }
// Update metric with time elapsed since last START. start time is reset // Update metric with time elapsed since last START. start time is reset

@ -12,14 +12,15 @@ namespace rocksdb {
class PerfStepTimer { class PerfStepTimer {
public: public:
explicit PerfStepTimer(uint64_t* metric, bool for_mutex = false, explicit PerfStepTimer(
Statistics* statistics = nullptr, uint64_t* metric, Env* env = nullptr, bool use_cpu_time = false,
uint32_t ticker_type = 0) PerfLevel enable_level = PerfLevel::kEnableTimeExceptForMutex,
: perf_counter_enabled_( Statistics* statistics = nullptr, uint32_t ticker_type = 0)
perf_level >= PerfLevel::kEnableTime || : perf_counter_enabled_(perf_level >= enable_level),
(!for_mutex && perf_level >= kEnableTimeExceptForMutex)), use_cpu_time_(use_cpu_time),
env_((perf_counter_enabled_ || statistics != nullptr) ? Env::Default() env_((perf_counter_enabled_ || statistics != nullptr)
: nullptr), ? ((env != nullptr) ? env : Env::Default())
: nullptr),
start_(0), start_(0),
metric_(metric), metric_(metric),
statistics_(statistics), statistics_(statistics),
@ -31,13 +32,21 @@ class PerfStepTimer {
void Start() { void Start() {
if (perf_counter_enabled_ || statistics_ != nullptr) { if (perf_counter_enabled_ || statistics_ != nullptr) {
start_ = env_->NowNanos(); start_ = time_now();
}
}
uint64_t time_now() {
if (!use_cpu_time_) {
return env_->NowNanos();
} else {
return env_->NowCPUNanos();
} }
} }
void Measure() { void Measure() {
if (start_) { if (start_) {
uint64_t now = env_->NowNanos(); uint64_t now = time_now();
*metric_ += now - start_; *metric_ += now - start_;
start_ = now; start_ = now;
} }
@ -45,7 +54,7 @@ class PerfStepTimer {
void Stop() { void Stop() {
if (start_) { if (start_) {
uint64_t duration = env_->NowNanos() - start_; uint64_t duration = time_now() - start_;
if (perf_counter_enabled_) { if (perf_counter_enabled_) {
*metric_ += duration; *metric_ += duration;
} }
@ -59,6 +68,7 @@ class PerfStepTimer {
private: private:
const bool perf_counter_enabled_; const bool perf_counter_enabled_;
const bool use_cpu_time_;
Env* const env_; Env* const env_;
uint64_t start_; uint64_t start_;
uint64_t* metric_; uint64_t* metric_;

Loading…
Cancel
Save