|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
//
|
|
|
|
#include "rocksdb/perf_context.h"
|
|
|
|
|
|
|
|
#include <algorithm>
|
|
|
|
#include <iostream>
|
|
|
|
#include <thread>
|
[RocksDB] Added nano second stopwatch and new perf counters to track block read cost
Summary: The pupose of this diff is to expose per user-call level precise timing of block read, so that we can answer questions like: a Get() costs me 100ms, is that somehow related to loading blocks from file system, or sth else? We will answer that with EXACTLY how many blocks have been read, how much time was spent on transfering the bytes from os, how much time was spent on checksum verification and how much time was spent on block decompression, just for that one Get. A nano second stopwatch was introduced to track time with higher precision. The cost/precision of the stopwatch is also measured in unit-test. On my dev box, retrieving one time instance costs about 30ns, on average. The deviation of timing results is good enough to track 100ns-1us level events. And the overhead could be safely ignored for 100us level events (10000 instances/s), for example, a viewstate thrift call.
Test Plan: perf_context_test, also testing with viewstate shadow traffic.
Reviewers: dhruba
Reviewed By: dhruba
CC: leveldb, xjin
Differential Revision: https://reviews.facebook.net/D12351
12 years ago
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
#include "monitoring/histogram.h"
|
|
|
|
#include "monitoring/instrumented_mutex.h"
|
|
|
|
#include "monitoring/perf_context_imp.h"
|
|
|
|
#include "monitoring/thread_status_util.h"
|
|
|
|
#include "port/port.h"
|
|
|
|
#include "rocksdb/db.h"
|
|
|
|
#include "rocksdb/memtablerep.h"
|
|
|
|
#include "rocksdb/slice_transform.h"
|
|
|
|
#include "rocksdb/system_clock.h"
|
|
|
|
#include "test_util/testharness.h"
|
[RocksDB] Added nano second stopwatch and new perf counters to track block read cost
Summary: The pupose of this diff is to expose per user-call level precise timing of block read, so that we can answer questions like: a Get() costs me 100ms, is that somehow related to loading blocks from file system, or sth else? We will answer that with EXACTLY how many blocks have been read, how much time was spent on transfering the bytes from os, how much time was spent on checksum verification and how much time was spent on block decompression, just for that one Get. A nano second stopwatch was introduced to track time with higher precision. The cost/precision of the stopwatch is also measured in unit-test. On my dev box, retrieving one time instance costs about 30ns, on average. The deviation of timing results is good enough to track 100ns-1us level events. And the overhead could be safely ignored for 100us level events (10000 instances/s), for example, a viewstate thrift call.
Test Plan: perf_context_test, also testing with viewstate shadow traffic.
Reviewers: dhruba
Reviewed By: dhruba
CC: leveldb, xjin
Differential Revision: https://reviews.facebook.net/D12351
12 years ago
|
|
|
#include "util/stop_watch.h"
|
|
|
|
#include "util/string_util.h"
|
|
|
|
#include "utilities/merge_operators.h"
|
|
|
|
|
|
|
|
bool FLAGS_random_key = false;
|
|
|
|
bool FLAGS_use_set_based_memetable = false;
|
|
|
|
int FLAGS_total_keys = 100;
|
|
|
|
int FLAGS_write_buffer_size = 1000000000;
|
|
|
|
int FLAGS_max_write_buffer_number = 8;
|
|
|
|
int FLAGS_min_write_buffer_number_to_merge = 7;
|
|
|
|
bool FLAGS_verbose = false;
|
|
|
|
|
|
|
|
// Path to the database on file system
|
|
|
|
const std::string kDbName =
|
|
|
|
ROCKSDB_NAMESPACE::test::PerThreadDBPath("perf_context_test");
|
|
|
|
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
|
|
|
|
std::shared_ptr<DB> OpenDb(bool read_only = false) {
|
|
|
|
DB* db;
|
|
|
|
Options options;
|
|
|
|
options.create_if_missing = true;
|
|
|
|
options.max_open_files = -1;
|
|
|
|
options.write_buffer_size = FLAGS_write_buffer_size;
|
|
|
|
options.max_write_buffer_number = FLAGS_max_write_buffer_number;
|
|
|
|
options.min_write_buffer_number_to_merge =
|
|
|
|
FLAGS_min_write_buffer_number_to_merge;
|
|
|
|
|
|
|
|
if (FLAGS_use_set_based_memetable) {
|
|
|
|
#ifndef ROCKSDB_LITE
|
|
|
|
options.prefix_extractor.reset(
|
|
|
|
ROCKSDB_NAMESPACE::NewFixedPrefixTransform(0));
|
|
|
|
options.memtable_factory.reset(NewHashSkipListRepFactory());
|
|
|
|
#endif // ROCKSDB_LITE
|
|
|
|
}
|
|
|
|
|
|
|
|
Status s;
|
|
|
|
if (!read_only) {
|
|
|
|
s = DB::Open(options, kDbName, &db);
|
|
|
|
} else {
|
|
|
|
s = DB::OpenForReadOnly(options, kDbName, &db);
|
|
|
|
}
|
rocksdb: Replace ASSERT* with EXPECT* in functions that does not return void value
Summary:
gtest does not use exceptions to fail a unit test by design, and `ASSERT*`s are implemented using `return`. As a consequence we cannot use `ASSERT*` in a function that does not return `void` value ([[ https://code.google.com/p/googletest/wiki/AdvancedGuide#Assertion_Placement | 1]]), and have to fix our existing code. This diff does this in a generic way, with no manual changes.
In order to detect all existing `ASSERT*` that are used in functions that doesn't return void value, I change the code to generate compile errors for such cases.
In `util/testharness.h` I defined `EXPECT*` assertions, the same way as `ASSERT*`, and redefined `ASSERT*` to return `void`. Then executed:
```lang=bash
% USE_CLANG=1 make all -j55 -k 2> build.log
% perl -naF: -e 'print "-- -number=".$F[1]." ".$F[0]."\n" if /: error:/' \
build.log | xargs -L 1 perl -spi -e 's/ASSERT/EXPECT/g if $. == $number'
% make format
```
After that I reverted back change to `ASSERT*` in `util/testharness.h`. But preserved introduced `EXPECT*`, which is the same as `ASSERT*`. This will be deleted once switched to gtest.
This diff is independent and contains manual changes only in `util/testharness.h`.
Test Plan:
Make sure all tests are passing.
```lang=bash
% USE_CLANG=1 make check
```
Reviewers: igor, lgalanis, sdong, yufei.zhu, rven, meyering
Reviewed By: meyering
Subscribers: dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D33333
10 years ago
|
|
|
EXPECT_OK(s);
|
|
|
|
return std::shared_ptr<DB>(db);
|
|
|
|
}
|
|
|
|
|
|
|
|
class PerfContextTest : public testing::Test {};
|
|
|
|
|
|
|
|
TEST_F(PerfContextTest, SeekIntoDeletion) {
|
|
|
|
ASSERT_OK(DestroyDB(kDbName, Options()));
|
|
|
|
auto db = OpenDb();
|
|
|
|
WriteOptions write_options;
|
|
|
|
ReadOptions read_options;
|
|
|
|
|
|
|
|
for (int i = 0; i < FLAGS_total_keys; ++i) {
|
|
|
|
std::string key = "k" + std::to_string(i);
|
|
|
|
std::string value = "v" + std::to_string(i);
|
|
|
|
|
|
|
|
ASSERT_OK(db->Put(write_options, key, value));
|
|
|
|
}
|
|
|
|
|
|
|
|
for (int i = 0; i < FLAGS_total_keys -1 ; ++i) {
|
|
|
|
std::string key = "k" + std::to_string(i);
|
|
|
|
ASSERT_OK(db->Delete(write_options, key));
|
|
|
|
}
|
|
|
|
|
|
|
|
HistogramImpl hist_get;
|
|
|
|
HistogramImpl hist_get_time;
|
|
|
|
for (int i = 0; i < FLAGS_total_keys - 1; ++i) {
|
|
|
|
std::string key = "k" + std::to_string(i);
|
|
|
|
std::string value;
|
|
|
|
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
StopWatchNano timer(SystemClock::Default().get());
|
|
|
|
timer.Start();
|
|
|
|
auto status = db->Get(read_options, key, &value);
|
|
|
|
auto elapsed_nanos = timer.ElapsedNanos();
|
|
|
|
ASSERT_TRUE(status.IsNotFound());
|
|
|
|
hist_get.Add(get_perf_context()->user_key_comparison_count);
|
|
|
|
hist_get_time.Add(elapsed_nanos);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (FLAGS_verbose) {
|
|
|
|
std::cout << "Get user key comparison: \n" << hist_get.ToString()
|
|
|
|
<< "Get time: \n" << hist_get_time.ToString();
|
|
|
|
}
|
|
|
|
|
|
|
|
{
|
|
|
|
HistogramImpl hist_seek_to_first;
|
|
|
|
std::unique_ptr<Iterator> iter(db->NewIterator(read_options));
|
|
|
|
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
StopWatchNano timer(SystemClock::Default().get(), true);
|
|
|
|
iter->SeekToFirst();
|
|
|
|
hist_seek_to_first.Add(get_perf_context()->user_key_comparison_count);
|
|
|
|
auto elapsed_nanos = timer.ElapsedNanos();
|
|
|
|
|
|
|
|
if (FLAGS_verbose) {
|
|
|
|
std::cout << "SeekToFirst user key comparison: \n"
|
|
|
|
<< hist_seek_to_first.ToString() << "ikey skipped: "
|
|
|
|
<< get_perf_context()->internal_key_skipped_count << "\n"
|
|
|
|
<< "idelete skipped: "
|
|
|
|
<< get_perf_context()->internal_delete_skipped_count << "\n"
|
|
|
|
<< "elapsed: " << elapsed_nanos << "\n";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
HistogramImpl hist_seek;
|
|
|
|
for (int i = 0; i < FLAGS_total_keys; ++i) {
|
|
|
|
std::unique_ptr<Iterator> iter(db->NewIterator(read_options));
|
|
|
|
std::string key = "k" + std::to_string(i);
|
|
|
|
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
StopWatchNano timer(SystemClock::Default().get(), true);
|
|
|
|
iter->Seek(key);
|
|
|
|
auto elapsed_nanos = timer.ElapsedNanos();
|
|
|
|
hist_seek.Add(get_perf_context()->user_key_comparison_count);
|
|
|
|
if (FLAGS_verbose) {
|
|
|
|
std::cout << "seek cmp: " << get_perf_context()->user_key_comparison_count
|
|
|
|
<< " ikey skipped " << get_perf_context()->internal_key_skipped_count
|
|
|
|
<< " idelete skipped "
|
|
|
|
<< get_perf_context()->internal_delete_skipped_count
|
|
|
|
<< " elapsed: " << elapsed_nanos << "ns\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
ASSERT_TRUE(iter->Valid());
|
|
|
|
StopWatchNano timer2(SystemClock::Default().get(), true);
|
|
|
|
iter->Next();
|
|
|
|
auto elapsed_nanos2 = timer2.ElapsedNanos();
|
|
|
|
if (FLAGS_verbose) {
|
|
|
|
std::cout << "next cmp: " << get_perf_context()->user_key_comparison_count
|
|
|
|
<< "elapsed: " << elapsed_nanos2 << "ns\n";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (FLAGS_verbose) {
|
|
|
|
std::cout << "Seek user key comparison: \n" << hist_seek.ToString();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(PerfContextTest, StopWatchNanoOverhead) {
|
[RocksDB] Added nano second stopwatch and new perf counters to track block read cost
Summary: The pupose of this diff is to expose per user-call level precise timing of block read, so that we can answer questions like: a Get() costs me 100ms, is that somehow related to loading blocks from file system, or sth else? We will answer that with EXACTLY how many blocks have been read, how much time was spent on transfering the bytes from os, how much time was spent on checksum verification and how much time was spent on block decompression, just for that one Get. A nano second stopwatch was introduced to track time with higher precision. The cost/precision of the stopwatch is also measured in unit-test. On my dev box, retrieving one time instance costs about 30ns, on average. The deviation of timing results is good enough to track 100ns-1us level events. And the overhead could be safely ignored for 100us level events (10000 instances/s), for example, a viewstate thrift call.
Test Plan: perf_context_test, also testing with viewstate shadow traffic.
Reviewers: dhruba
Reviewed By: dhruba
CC: leveldb, xjin
Differential Revision: https://reviews.facebook.net/D12351
12 years ago
|
|
|
// profile the timer cost by itself!
|
|
|
|
const int kTotalIterations = 1000000;
|
|
|
|
std::vector<uint64_t> timings(kTotalIterations);
|
|
|
|
|
|
|
|
StopWatchNano timer(SystemClock::Default().get(), true);
|
[RocksDB] Added nano second stopwatch and new perf counters to track block read cost
Summary: The pupose of this diff is to expose per user-call level precise timing of block read, so that we can answer questions like: a Get() costs me 100ms, is that somehow related to loading blocks from file system, or sth else? We will answer that with EXACTLY how many blocks have been read, how much time was spent on transfering the bytes from os, how much time was spent on checksum verification and how much time was spent on block decompression, just for that one Get. A nano second stopwatch was introduced to track time with higher precision. The cost/precision of the stopwatch is also measured in unit-test. On my dev box, retrieving one time instance costs about 30ns, on average. The deviation of timing results is good enough to track 100ns-1us level events. And the overhead could be safely ignored for 100us level events (10000 instances/s), for example, a viewstate thrift call.
Test Plan: perf_context_test, also testing with viewstate shadow traffic.
Reviewers: dhruba
Reviewed By: dhruba
CC: leveldb, xjin
Differential Revision: https://reviews.facebook.net/D12351
12 years ago
|
|
|
for (auto& timing : timings) {
|
|
|
|
timing = timer.ElapsedNanos(true /* reset */);
|
|
|
|
}
|
|
|
|
|
|
|
|
HistogramImpl histogram;
|
|
|
|
for (const auto timing : timings) {
|
|
|
|
histogram.Add(timing);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (FLAGS_verbose) {
|
|
|
|
std::cout << histogram.ToString();
|
|
|
|
}
|
[RocksDB] Added nano second stopwatch and new perf counters to track block read cost
Summary: The pupose of this diff is to expose per user-call level precise timing of block read, so that we can answer questions like: a Get() costs me 100ms, is that somehow related to loading blocks from file system, or sth else? We will answer that with EXACTLY how many blocks have been read, how much time was spent on transfering the bytes from os, how much time was spent on checksum verification and how much time was spent on block decompression, just for that one Get. A nano second stopwatch was introduced to track time with higher precision. The cost/precision of the stopwatch is also measured in unit-test. On my dev box, retrieving one time instance costs about 30ns, on average. The deviation of timing results is good enough to track 100ns-1us level events. And the overhead could be safely ignored for 100us level events (10000 instances/s), for example, a viewstate thrift call.
Test Plan: perf_context_test, also testing with viewstate shadow traffic.
Reviewers: dhruba
Reviewed By: dhruba
CC: leveldb, xjin
Differential Revision: https://reviews.facebook.net/D12351
12 years ago
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(PerfContextTest, StopWatchOverhead) {
|
[RocksDB] Added nano second stopwatch and new perf counters to track block read cost
Summary: The pupose of this diff is to expose per user-call level precise timing of block read, so that we can answer questions like: a Get() costs me 100ms, is that somehow related to loading blocks from file system, or sth else? We will answer that with EXACTLY how many blocks have been read, how much time was spent on transfering the bytes from os, how much time was spent on checksum verification and how much time was spent on block decompression, just for that one Get. A nano second stopwatch was introduced to track time with higher precision. The cost/precision of the stopwatch is also measured in unit-test. On my dev box, retrieving one time instance costs about 30ns, on average. The deviation of timing results is good enough to track 100ns-1us level events. And the overhead could be safely ignored for 100us level events (10000 instances/s), for example, a viewstate thrift call.
Test Plan: perf_context_test, also testing with viewstate shadow traffic.
Reviewers: dhruba
Reviewed By: dhruba
CC: leveldb, xjin
Differential Revision: https://reviews.facebook.net/D12351
12 years ago
|
|
|
// profile the timer cost by itself!
|
|
|
|
const int kTotalIterations = 1000000;
|
|
|
|
uint64_t elapsed = 0;
|
[RocksDB] Added nano second stopwatch and new perf counters to track block read cost
Summary: The pupose of this diff is to expose per user-call level precise timing of block read, so that we can answer questions like: a Get() costs me 100ms, is that somehow related to loading blocks from file system, or sth else? We will answer that with EXACTLY how many blocks have been read, how much time was spent on transfering the bytes from os, how much time was spent on checksum verification and how much time was spent on block decompression, just for that one Get. A nano second stopwatch was introduced to track time with higher precision. The cost/precision of the stopwatch is also measured in unit-test. On my dev box, retrieving one time instance costs about 30ns, on average. The deviation of timing results is good enough to track 100ns-1us level events. And the overhead could be safely ignored for 100us level events (10000 instances/s), for example, a viewstate thrift call.
Test Plan: perf_context_test, also testing with viewstate shadow traffic.
Reviewers: dhruba
Reviewed By: dhruba
CC: leveldb, xjin
Differential Revision: https://reviews.facebook.net/D12351
12 years ago
|
|
|
std::vector<uint64_t> timings(kTotalIterations);
|
|
|
|
|
|
|
|
StopWatch timer(SystemClock::Default().get(), nullptr, 0, &elapsed);
|
[RocksDB] Added nano second stopwatch and new perf counters to track block read cost
Summary: The pupose of this diff is to expose per user-call level precise timing of block read, so that we can answer questions like: a Get() costs me 100ms, is that somehow related to loading blocks from file system, or sth else? We will answer that with EXACTLY how many blocks have been read, how much time was spent on transfering the bytes from os, how much time was spent on checksum verification and how much time was spent on block decompression, just for that one Get. A nano second stopwatch was introduced to track time with higher precision. The cost/precision of the stopwatch is also measured in unit-test. On my dev box, retrieving one time instance costs about 30ns, on average. The deviation of timing results is good enough to track 100ns-1us level events. And the overhead could be safely ignored for 100us level events (10000 instances/s), for example, a viewstate thrift call.
Test Plan: perf_context_test, also testing with viewstate shadow traffic.
Reviewers: dhruba
Reviewed By: dhruba
CC: leveldb, xjin
Differential Revision: https://reviews.facebook.net/D12351
12 years ago
|
|
|
for (auto& timing : timings) {
|
|
|
|
timing = elapsed;
|
[RocksDB] Added nano second stopwatch and new perf counters to track block read cost
Summary: The pupose of this diff is to expose per user-call level precise timing of block read, so that we can answer questions like: a Get() costs me 100ms, is that somehow related to loading blocks from file system, or sth else? We will answer that with EXACTLY how many blocks have been read, how much time was spent on transfering the bytes from os, how much time was spent on checksum verification and how much time was spent on block decompression, just for that one Get. A nano second stopwatch was introduced to track time with higher precision. The cost/precision of the stopwatch is also measured in unit-test. On my dev box, retrieving one time instance costs about 30ns, on average. The deviation of timing results is good enough to track 100ns-1us level events. And the overhead could be safely ignored for 100us level events (10000 instances/s), for example, a viewstate thrift call.
Test Plan: perf_context_test, also testing with viewstate shadow traffic.
Reviewers: dhruba
Reviewed By: dhruba
CC: leveldb, xjin
Differential Revision: https://reviews.facebook.net/D12351
12 years ago
|
|
|
}
|
|
|
|
|
|
|
|
HistogramImpl histogram;
|
|
|
|
uint64_t prev_timing = 0;
|
|
|
|
for (const auto timing : timings) {
|
|
|
|
histogram.Add(timing - prev_timing);
|
|
|
|
prev_timing = timing;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (FLAGS_verbose) {
|
|
|
|
std::cout << histogram.ToString();
|
|
|
|
}
|
[RocksDB] Added nano second stopwatch and new perf counters to track block read cost
Summary: The pupose of this diff is to expose per user-call level precise timing of block read, so that we can answer questions like: a Get() costs me 100ms, is that somehow related to loading blocks from file system, or sth else? We will answer that with EXACTLY how many blocks have been read, how much time was spent on transfering the bytes from os, how much time was spent on checksum verification and how much time was spent on block decompression, just for that one Get. A nano second stopwatch was introduced to track time with higher precision. The cost/precision of the stopwatch is also measured in unit-test. On my dev box, retrieving one time instance costs about 30ns, on average. The deviation of timing results is good enough to track 100ns-1us level events. And the overhead could be safely ignored for 100us level events (10000 instances/s), for example, a viewstate thrift call.
Test Plan: perf_context_test, also testing with viewstate shadow traffic.
Reviewers: dhruba
Reviewed By: dhruba
CC: leveldb, xjin
Differential Revision: https://reviews.facebook.net/D12351
12 years ago
|
|
|
}
|
|
|
|
|
|
|
|
void ProfileQueries(bool enabled_time = false) {
|
|
|
|
ASSERT_OK(DestroyDB(kDbName, Options())); // Start this test with a fresh DB
|
|
|
|
|
|
|
|
auto db = OpenDb();
|
|
|
|
|
|
|
|
WriteOptions write_options;
|
|
|
|
ReadOptions read_options;
|
|
|
|
|
|
|
|
HistogramImpl hist_put;
|
|
|
|
|
|
|
|
HistogramImpl hist_get;
|
|
|
|
HistogramImpl hist_get_snapshot;
|
|
|
|
HistogramImpl hist_get_memtable;
|
|
|
|
HistogramImpl hist_get_files;
|
|
|
|
HistogramImpl hist_get_post_process;
|
|
|
|
HistogramImpl hist_num_memtable_checked;
|
|
|
|
|
|
|
|
HistogramImpl hist_mget;
|
|
|
|
HistogramImpl hist_mget_snapshot;
|
|
|
|
HistogramImpl hist_mget_memtable;
|
|
|
|
HistogramImpl hist_mget_files;
|
|
|
|
HistogramImpl hist_mget_post_process;
|
|
|
|
HistogramImpl hist_mget_num_memtable_checked;
|
|
|
|
|
|
|
|
HistogramImpl hist_write_pre_post;
|
|
|
|
HistogramImpl hist_write_wal_time;
|
|
|
|
HistogramImpl hist_write_memtable_time;
|
|
|
|
HistogramImpl hist_write_delay_time;
|
|
|
|
HistogramImpl hist_write_thread_wait_nanos;
|
|
|
|
HistogramImpl hist_write_scheduling_time;
|
|
|
|
|
|
|
|
uint64_t total_db_mutex_nanos = 0;
|
|
|
|
|
|
|
|
if (FLAGS_verbose) {
|
|
|
|
std::cout << "Inserting " << FLAGS_total_keys << " key/value pairs\n...\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<int> keys;
|
|
|
|
const int kFlushFlag = -1;
|
|
|
|
for (int i = 0; i < FLAGS_total_keys; ++i) {
|
|
|
|
keys.push_back(i);
|
|
|
|
if (i == FLAGS_total_keys / 2) {
|
|
|
|
// Issuing a flush in the middle.
|
|
|
|
keys.push_back(kFlushFlag);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (FLAGS_random_key) {
|
|
|
|
RandomShuffle(std::begin(keys), std::end(keys));
|
|
|
|
}
|
|
|
|
#ifndef NDEBUG
|
|
|
|
ThreadStatusUtil::TEST_SetStateDelay(ThreadStatus::STATE_MUTEX_WAIT, 1U);
|
|
|
|
#endif
|
|
|
|
int num_mutex_waited = 0;
|
|
|
|
for (const int i : keys) {
|
|
|
|
if (i == kFlushFlag) {
|
|
|
|
FlushOptions fo;
|
|
|
|
db->Flush(fo);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string key = "k" + std::to_string(i);
|
|
|
|
std::string value = "v" + std::to_string(i);
|
|
|
|
|
|
|
|
std::vector<std::string> values;
|
|
|
|
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
ASSERT_OK(db->Put(write_options, key, value));
|
|
|
|
if (++num_mutex_waited > 3) {
|
|
|
|
#ifndef NDEBUG
|
|
|
|
ThreadStatusUtil::TEST_SetStateDelay(ThreadStatus::STATE_MUTEX_WAIT, 0U);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
hist_write_pre_post.Add(
|
|
|
|
get_perf_context()->write_pre_and_post_process_time);
|
|
|
|
hist_write_wal_time.Add(get_perf_context()->write_wal_time);
|
|
|
|
hist_write_memtable_time.Add(get_perf_context()->write_memtable_time);
|
|
|
|
hist_write_delay_time.Add(get_perf_context()->write_delay_time);
|
|
|
|
hist_write_thread_wait_nanos.Add(
|
|
|
|
get_perf_context()->write_thread_wait_nanos);
|
|
|
|
hist_write_scheduling_time.Add(
|
|
|
|
get_perf_context()->write_scheduling_flushes_compactions_time);
|
|
|
|
hist_put.Add(get_perf_context()->user_key_comparison_count);
|
|
|
|
total_db_mutex_nanos += get_perf_context()->db_mutex_lock_nanos;
|
|
|
|
}
|
|
|
|
#ifndef NDEBUG
|
|
|
|
ThreadStatusUtil::TEST_SetStateDelay(ThreadStatus::STATE_MUTEX_WAIT, 0U);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
for (const int i : keys) {
|
|
|
|
if (i == kFlushFlag) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
std::string key = "k" + std::to_string(i);
|
|
|
|
std::string expected_value = "v" + std::to_string(i);
|
|
|
|
std::string value;
|
|
|
|
|
|
|
|
std::vector<Slice> multiget_keys = {Slice(key)};
|
|
|
|
std::vector<std::string> values;
|
|
|
|
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
ASSERT_OK(db->Get(read_options, key, &value));
|
|
|
|
ASSERT_EQ(expected_value, value);
|
|
|
|
hist_get_snapshot.Add(get_perf_context()->get_snapshot_time);
|
|
|
|
hist_get_memtable.Add(get_perf_context()->get_from_memtable_time);
|
|
|
|
hist_get_files.Add(get_perf_context()->get_from_output_files_time);
|
|
|
|
hist_num_memtable_checked.Add(get_perf_context()->get_from_memtable_count);
|
|
|
|
hist_get_post_process.Add(get_perf_context()->get_post_process_time);
|
|
|
|
hist_get.Add(get_perf_context()->user_key_comparison_count);
|
|
|
|
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
auto statuses = db->MultiGet(read_options, multiget_keys, &values);
|
|
|
|
for (const auto& s : statuses) {
|
|
|
|
ASSERT_OK(s);
|
|
|
|
}
|
|
|
|
hist_mget_snapshot.Add(get_perf_context()->get_snapshot_time);
|
|
|
|
hist_mget_memtable.Add(get_perf_context()->get_from_memtable_time);
|
|
|
|
hist_mget_files.Add(get_perf_context()->get_from_output_files_time);
|
|
|
|
hist_mget_num_memtable_checked.Add(get_perf_context()->get_from_memtable_count);
|
|
|
|
hist_mget_post_process.Add(get_perf_context()->get_post_process_time);
|
|
|
|
hist_mget.Add(get_perf_context()->user_key_comparison_count);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (FLAGS_verbose) {
|
|
|
|
std::cout << "Put user key comparison: \n"
|
|
|
|
<< hist_put.ToString() << "Get user key comparison: \n"
|
|
|
|
<< hist_get.ToString() << "MultiGet user key comparison: \n"
|
|
|
|
<< hist_get.ToString();
|
|
|
|
std::cout << "Put(): Pre and Post Process Time: \n"
|
|
|
|
<< hist_write_pre_post.ToString() << " Writing WAL time: \n"
|
|
|
|
<< hist_write_wal_time.ToString() << "\n"
|
|
|
|
<< " Writing Mem Table time: \n"
|
|
|
|
<< hist_write_memtable_time.ToString() << "\n"
|
|
|
|
<< " Write Delay: \n" << hist_write_delay_time.ToString() << "\n"
|
|
|
|
<< " Waiting for Batch time: \n"
|
|
|
|
<< hist_write_thread_wait_nanos.ToString() << "\n"
|
|
|
|
<< " Scheduling Flushes and Compactions Time: \n"
|
|
|
|
<< hist_write_scheduling_time.ToString() << "\n"
|
|
|
|
<< " Total DB mutex nanos: \n" << total_db_mutex_nanos << "\n";
|
|
|
|
|
|
|
|
std::cout << "Get(): Time to get snapshot: \n"
|
|
|
|
<< hist_get_snapshot.ToString()
|
|
|
|
<< " Time to get value from memtables: \n"
|
|
|
|
<< hist_get_memtable.ToString() << "\n"
|
|
|
|
<< " Time to get value from output files: \n"
|
|
|
|
<< hist_get_files.ToString() << "\n"
|
|
|
|
<< " Number of memtables checked: \n"
|
|
|
|
<< hist_num_memtable_checked.ToString() << "\n"
|
|
|
|
<< " Time to post process: \n" << hist_get_post_process.ToString()
|
|
|
|
<< "\n";
|
|
|
|
|
|
|
|
std::cout << "MultiGet(): Time to get snapshot: \n"
|
|
|
|
<< hist_mget_snapshot.ToString()
|
|
|
|
<< " Time to get value from memtables: \n"
|
|
|
|
<< hist_mget_memtable.ToString() << "\n"
|
|
|
|
<< " Time to get value from output files: \n"
|
|
|
|
<< hist_mget_files.ToString() << "\n"
|
|
|
|
<< " Number of memtables checked: \n"
|
|
|
|
<< hist_mget_num_memtable_checked.ToString() << "\n"
|
|
|
|
<< " Time to post process: \n"
|
|
|
|
<< hist_mget_post_process.ToString() << "\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
if (enabled_time) {
|
|
|
|
ASSERT_GT(hist_get.Average(), 0);
|
|
|
|
ASSERT_GT(hist_get_snapshot.Average(), 0);
|
|
|
|
ASSERT_GT(hist_get_memtable.Average(), 0);
|
|
|
|
ASSERT_GT(hist_get_files.Average(), 0);
|
|
|
|
ASSERT_GT(hist_get_post_process.Average(), 0);
|
|
|
|
ASSERT_GT(hist_num_memtable_checked.Average(), 0);
|
|
|
|
|
|
|
|
ASSERT_GT(hist_mget.Average(), 0);
|
|
|
|
ASSERT_GT(hist_mget_snapshot.Average(), 0);
|
|
|
|
ASSERT_GT(hist_mget_memtable.Average(), 0);
|
|
|
|
ASSERT_GT(hist_mget_files.Average(), 0);
|
|
|
|
ASSERT_GT(hist_mget_post_process.Average(), 0);
|
|
|
|
ASSERT_GT(hist_mget_num_memtable_checked.Average(), 0);
|
|
|
|
|
|
|
|
EXPECT_GT(hist_write_pre_post.Average(), 0);
|
|
|
|
EXPECT_GT(hist_write_wal_time.Average(), 0);
|
|
|
|
EXPECT_GT(hist_write_memtable_time.Average(), 0);
|
|
|
|
EXPECT_EQ(hist_write_delay_time.Average(), 0);
|
|
|
|
EXPECT_EQ(hist_write_thread_wait_nanos.Average(), 0);
|
|
|
|
EXPECT_GT(hist_write_scheduling_time.Average(), 0);
|
|
|
|
|
|
|
|
#ifndef NDEBUG
|
|
|
|
ASSERT_LT(total_db_mutex_nanos, 100U);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
db.reset();
|
|
|
|
db = OpenDb(true);
|
|
|
|
|
|
|
|
hist_get.Clear();
|
|
|
|
hist_get_snapshot.Clear();
|
|
|
|
hist_get_memtable.Clear();
|
|
|
|
hist_get_files.Clear();
|
|
|
|
hist_get_post_process.Clear();
|
|
|
|
hist_num_memtable_checked.Clear();
|
|
|
|
|
|
|
|
hist_mget.Clear();
|
|
|
|
hist_mget_snapshot.Clear();
|
|
|
|
hist_mget_memtable.Clear();
|
|
|
|
hist_mget_files.Clear();
|
|
|
|
hist_mget_post_process.Clear();
|
|
|
|
hist_mget_num_memtable_checked.Clear();
|
|
|
|
|
|
|
|
for (const int i : keys) {
|
|
|
|
if (i == kFlushFlag) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
std::string key = "k" + std::to_string(i);
|
|
|
|
std::string expected_value = "v" + std::to_string(i);
|
|
|
|
std::string value;
|
|
|
|
|
|
|
|
std::vector<Slice> multiget_keys = {Slice(key)};
|
|
|
|
std::vector<std::string> values;
|
|
|
|
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
ASSERT_OK(db->Get(read_options, key, &value));
|
|
|
|
ASSERT_EQ(expected_value, value);
|
|
|
|
hist_get_snapshot.Add(get_perf_context()->get_snapshot_time);
|
|
|
|
hist_get_memtable.Add(get_perf_context()->get_from_memtable_time);
|
|
|
|
hist_get_files.Add(get_perf_context()->get_from_output_files_time);
|
|
|
|
hist_num_memtable_checked.Add(get_perf_context()->get_from_memtable_count);
|
|
|
|
hist_get_post_process.Add(get_perf_context()->get_post_process_time);
|
|
|
|
hist_get.Add(get_perf_context()->user_key_comparison_count);
|
|
|
|
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
auto statuses = db->MultiGet(read_options, multiget_keys, &values);
|
|
|
|
for (const auto& s : statuses) {
|
|
|
|
ASSERT_OK(s);
|
|
|
|
}
|
|
|
|
hist_mget_snapshot.Add(get_perf_context()->get_snapshot_time);
|
|
|
|
hist_mget_memtable.Add(get_perf_context()->get_from_memtable_time);
|
|
|
|
hist_mget_files.Add(get_perf_context()->get_from_output_files_time);
|
|
|
|
hist_mget_num_memtable_checked.Add(get_perf_context()->get_from_memtable_count);
|
|
|
|
hist_mget_post_process.Add(get_perf_context()->get_post_process_time);
|
|
|
|
hist_mget.Add(get_perf_context()->user_key_comparison_count);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (FLAGS_verbose) {
|
|
|
|
std::cout << "ReadOnly Get user key comparison: \n"
|
|
|
|
<< hist_get.ToString()
|
|
|
|
<< "ReadOnly MultiGet user key comparison: \n"
|
|
|
|
<< hist_mget.ToString();
|
|
|
|
|
|
|
|
std::cout << "ReadOnly Get(): Time to get snapshot: \n"
|
|
|
|
<< hist_get_snapshot.ToString()
|
|
|
|
<< " Time to get value from memtables: \n"
|
|
|
|
<< hist_get_memtable.ToString() << "\n"
|
|
|
|
<< " Time to get value from output files: \n"
|
|
|
|
<< hist_get_files.ToString() << "\n"
|
|
|
|
<< " Number of memtables checked: \n"
|
|
|
|
<< hist_num_memtable_checked.ToString() << "\n"
|
|
|
|
<< " Time to post process: \n" << hist_get_post_process.ToString()
|
|
|
|
<< "\n";
|
|
|
|
|
|
|
|
std::cout << "ReadOnly MultiGet(): Time to get snapshot: \n"
|
|
|
|
<< hist_mget_snapshot.ToString()
|
|
|
|
<< " Time to get value from memtables: \n"
|
|
|
|
<< hist_mget_memtable.ToString() << "\n"
|
|
|
|
<< " Time to get value from output files: \n"
|
|
|
|
<< hist_mget_files.ToString() << "\n"
|
|
|
|
<< " Number of memtables checked: \n"
|
|
|
|
<< hist_mget_num_memtable_checked.ToString() << "\n"
|
|
|
|
<< " Time to post process: \n"
|
|
|
|
<< hist_mget_post_process.ToString() << "\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
if (enabled_time) {
|
|
|
|
ASSERT_GT(hist_get.Average(), 0);
|
|
|
|
ASSERT_GT(hist_get_memtable.Average(), 0);
|
|
|
|
ASSERT_GT(hist_get_files.Average(), 0);
|
|
|
|
ASSERT_GT(hist_num_memtable_checked.Average(), 0);
|
|
|
|
// In read-only mode Get(), no super version operation is needed
|
|
|
|
ASSERT_EQ(hist_get_post_process.Average(), 0);
|
|
|
|
ASSERT_GT(hist_get_snapshot.Average(), 0);
|
|
|
|
|
|
|
|
ASSERT_GT(hist_mget.Average(), 0);
|
|
|
|
ASSERT_GT(hist_mget_snapshot.Average(), 0);
|
|
|
|
ASSERT_GT(hist_mget_memtable.Average(), 0);
|
|
|
|
ASSERT_GT(hist_mget_files.Average(), 0);
|
|
|
|
ASSERT_GT(hist_mget_post_process.Average(), 0);
|
|
|
|
ASSERT_GT(hist_mget_num_memtable_checked.Average(), 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifndef ROCKSDB_LITE
|
|
|
|
TEST_F(PerfContextTest, KeyComparisonCount) {
|
|
|
|
SetPerfLevel(kEnableCount);
|
|
|
|
ProfileQueries();
|
[RocksDB] Added nano second stopwatch and new perf counters to track block read cost
Summary: The pupose of this diff is to expose per user-call level precise timing of block read, so that we can answer questions like: a Get() costs me 100ms, is that somehow related to loading blocks from file system, or sth else? We will answer that with EXACTLY how many blocks have been read, how much time was spent on transfering the bytes from os, how much time was spent on checksum verification and how much time was spent on block decompression, just for that one Get. A nano second stopwatch was introduced to track time with higher precision. The cost/precision of the stopwatch is also measured in unit-test. On my dev box, retrieving one time instance costs about 30ns, on average. The deviation of timing results is good enough to track 100ns-1us level events. And the overhead could be safely ignored for 100us level events (10000 instances/s), for example, a viewstate thrift call.
Test Plan: perf_context_test, also testing with viewstate shadow traffic.
Reviewers: dhruba
Reviewed By: dhruba
CC: leveldb, xjin
Differential Revision: https://reviews.facebook.net/D12351
12 years ago
|
|
|
|
|
|
|
SetPerfLevel(kDisable);
|
|
|
|
ProfileQueries();
|
|
|
|
|
[RocksDB] Added nano second stopwatch and new perf counters to track block read cost
Summary: The pupose of this diff is to expose per user-call level precise timing of block read, so that we can answer questions like: a Get() costs me 100ms, is that somehow related to loading blocks from file system, or sth else? We will answer that with EXACTLY how many blocks have been read, how much time was spent on transfering the bytes from os, how much time was spent on checksum verification and how much time was spent on block decompression, just for that one Get. A nano second stopwatch was introduced to track time with higher precision. The cost/precision of the stopwatch is also measured in unit-test. On my dev box, retrieving one time instance costs about 30ns, on average. The deviation of timing results is good enough to track 100ns-1us level events. And the overhead could be safely ignored for 100us level events (10000 instances/s), for example, a viewstate thrift call.
Test Plan: perf_context_test, also testing with viewstate shadow traffic.
Reviewers: dhruba
Reviewed By: dhruba
CC: leveldb, xjin
Differential Revision: https://reviews.facebook.net/D12351
12 years ago
|
|
|
SetPerfLevel(kEnableTime);
|
|
|
|
ProfileQueries(true);
|
|
|
|
}
|
|
|
|
#endif // ROCKSDB_LITE
|
|
|
|
|
|
|
|
// make perf_context_test
|
|
|
|
// export ROCKSDB_TESTS=PerfContextTest.SeekKeyComparison
|
|
|
|
// For one memtable:
|
|
|
|
// ./perf_context_test --write_buffer_size=500000 --total_keys=10000
|
|
|
|
// For two memtables:
|
|
|
|
// ./perf_context_test --write_buffer_size=250000 --total_keys=10000
|
|
|
|
// Specify --random_key=1 to shuffle the key before insertion
|
|
|
|
// Results show that, for sequential insertion, worst-case Seek Key comparison
|
|
|
|
// is close to the total number of keys (linear), when there is only one
|
|
|
|
// memtable. When there are two memtables, even the avg Seek Key comparison
|
|
|
|
// starts to become linear to the input size.
|
|
|
|
|
|
|
|
TEST_F(PerfContextTest, SeekKeyComparison) {
|
|
|
|
ASSERT_OK(DestroyDB(kDbName, Options()));
|
|
|
|
auto db = OpenDb();
|
|
|
|
WriteOptions write_options;
|
|
|
|
ReadOptions read_options;
|
|
|
|
|
|
|
|
if (FLAGS_verbose) {
|
|
|
|
std::cout << "Inserting " << FLAGS_total_keys << " key/value pairs\n...\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<int> keys;
|
|
|
|
for (int i = 0; i < FLAGS_total_keys; ++i) {
|
|
|
|
keys.push_back(i);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (FLAGS_random_key) {
|
|
|
|
RandomShuffle(std::begin(keys), std::end(keys));
|
|
|
|
}
|
|
|
|
|
|
|
|
HistogramImpl hist_put_time;
|
|
|
|
HistogramImpl hist_wal_time;
|
|
|
|
HistogramImpl hist_time_diff;
|
|
|
|
|
|
|
|
SetPerfLevel(kEnableTime);
|
|
|
|
StopWatchNano timer(SystemClock::Default().get());
|
|
|
|
for (const int i : keys) {
|
|
|
|
std::string key = "k" + std::to_string(i);
|
|
|
|
std::string value = "v" + std::to_string(i);
|
|
|
|
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
timer.Start();
|
|
|
|
ASSERT_OK(db->Put(write_options, key, value));
|
|
|
|
auto put_time = timer.ElapsedNanos();
|
|
|
|
hist_put_time.Add(put_time);
|
|
|
|
hist_wal_time.Add(get_perf_context()->write_wal_time);
|
|
|
|
hist_time_diff.Add(put_time - get_perf_context()->write_wal_time);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (FLAGS_verbose) {
|
|
|
|
std::cout << "Put time:\n" << hist_put_time.ToString() << "WAL time:\n"
|
|
|
|
<< hist_wal_time.ToString() << "time diff:\n"
|
|
|
|
<< hist_time_diff.ToString();
|
|
|
|
}
|
|
|
|
|
|
|
|
HistogramImpl hist_seek;
|
|
|
|
HistogramImpl hist_next;
|
|
|
|
|
|
|
|
for (int i = 0; i < FLAGS_total_keys; ++i) {
|
|
|
|
std::string key = "k" + std::to_string(i);
|
|
|
|
std::string value = "v" + std::to_string(i);
|
|
|
|
|
|
|
|
std::unique_ptr<Iterator> iter(db->NewIterator(read_options));
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
iter->Seek(key);
|
|
|
|
ASSERT_TRUE(iter->Valid());
|
|
|
|
ASSERT_EQ(iter->value().ToString(), value);
|
|
|
|
hist_seek.Add(get_perf_context()->user_key_comparison_count);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::unique_ptr<Iterator> iter(db->NewIterator(read_options));
|
|
|
|
for (iter->SeekToFirst(); iter->Valid();) {
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
iter->Next();
|
|
|
|
hist_next.Add(get_perf_context()->user_key_comparison_count);
|
|
|
|
}
|
|
|
|
ASSERT_OK(iter->status());
|
|
|
|
if (FLAGS_verbose) {
|
|
|
|
std::cout << "Seek:\n" << hist_seek.ToString() << "Next:\n"
|
|
|
|
<< hist_next.ToString();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(PerfContextTest, DBMutexLockCounter) {
|
|
|
|
int stats_code[] = {0, static_cast<int>(DB_MUTEX_WAIT_MICROS)};
|
|
|
|
for (PerfLevel perf_level_test :
|
|
|
|
{PerfLevel::kEnableTimeExceptForMutex, PerfLevel::kEnableTime}) {
|
|
|
|
for (int c = 0; c < 2; ++c) {
|
|
|
|
InstrumentedMutex mutex(nullptr, SystemClock::Default().get(),
|
|
|
|
stats_code[c]);
|
|
|
|
mutex.Lock();
|
|
|
|
ROCKSDB_NAMESPACE::port::Thread child_thread([&] {
|
|
|
|
SetPerfLevel(perf_level_test);
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
ASSERT_EQ(get_perf_context()->db_mutex_lock_nanos, 0);
|
|
|
|
mutex.Lock();
|
|
|
|
mutex.Unlock();
|
|
|
|
if (perf_level_test == PerfLevel::kEnableTimeExceptForMutex ||
|
|
|
|
stats_code[c] != DB_MUTEX_WAIT_MICROS) {
|
|
|
|
ASSERT_EQ(get_perf_context()->db_mutex_lock_nanos, 0);
|
|
|
|
} else {
|
|
|
|
// increment the counter only when it's a DB Mutex
|
|
|
|
ASSERT_GT(get_perf_context()->db_mutex_lock_nanos, 0);
|
|
|
|
}
|
|
|
|
});
|
|
|
|
SystemClock::Default()->SleepForMicroseconds(100);
|
|
|
|
mutex.Unlock();
|
|
|
|
child_thread.join();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(PerfContextTest, FalseDBMutexWait) {
|
|
|
|
SetPerfLevel(kEnableTime);
|
|
|
|
int stats_code[] = {0, static_cast<int>(DB_MUTEX_WAIT_MICROS)};
|
|
|
|
for (int c = 0; c < 2; ++c) {
|
|
|
|
InstrumentedMutex mutex(nullptr, SystemClock::Default().get(),
|
|
|
|
stats_code[c]);
|
|
|
|
InstrumentedCondVar lock(&mutex);
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
mutex.Lock();
|
|
|
|
lock.TimedWait(100);
|
|
|
|
mutex.Unlock();
|
|
|
|
if (stats_code[c] == static_cast<int>(DB_MUTEX_WAIT_MICROS)) {
|
|
|
|
// increment the counter only when it's a DB Mutex
|
|
|
|
ASSERT_GT(get_perf_context()->db_condition_wait_nanos, 0);
|
|
|
|
} else {
|
|
|
|
ASSERT_EQ(get_perf_context()->db_condition_wait_nanos, 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(PerfContextTest, ToString) {
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
get_perf_context()->block_read_count = 12345;
|
|
|
|
|
|
|
|
std::string zero_included = get_perf_context()->ToString();
|
|
|
|
ASSERT_NE(std::string::npos, zero_included.find("= 0"));
|
|
|
|
ASSERT_NE(std::string::npos, zero_included.find("= 12345"));
|
|
|
|
|
|
|
|
std::string zero_excluded = get_perf_context()->ToString(true);
|
|
|
|
ASSERT_EQ(std::string::npos, zero_excluded.find("= 0"));
|
|
|
|
ASSERT_NE(std::string::npos, zero_excluded.find("= 12345"));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(PerfContextTest, MergeOperatorTime) {
|
|
|
|
ASSERT_OK(DestroyDB(kDbName, Options()));
|
|
|
|
DB* db;
|
|
|
|
Options options;
|
|
|
|
options.create_if_missing = true;
|
|
|
|
options.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
|
|
Status s = DB::Open(options, kDbName, &db);
|
|
|
|
EXPECT_OK(s);
|
|
|
|
|
|
|
|
std::string val;
|
|
|
|
ASSERT_OK(db->Merge(WriteOptions(), "k1", "val1"));
|
|
|
|
ASSERT_OK(db->Merge(WriteOptions(), "k1", "val2"));
|
|
|
|
ASSERT_OK(db->Merge(WriteOptions(), "k1", "val3"));
|
|
|
|
ASSERT_OK(db->Merge(WriteOptions(), "k1", "val4"));
|
|
|
|
|
|
|
|
SetPerfLevel(kEnableTime);
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
ASSERT_OK(db->Get(ReadOptions(), "k1", &val));
|
|
|
|
#ifdef OS_SOLARIS
|
|
|
|
for (int i = 0; i < 100; i++) {
|
|
|
|
ASSERT_OK(db->Get(ReadOptions(), "k1", &val));
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
EXPECT_GT(get_perf_context()->merge_operator_time_nanos, 0);
|
|
|
|
|
|
|
|
ASSERT_OK(db->Flush(FlushOptions()));
|
|
|
|
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
ASSERT_OK(db->Get(ReadOptions(), "k1", &val));
|
|
|
|
#ifdef OS_SOLARIS
|
|
|
|
for (int i = 0; i < 100; i++) {
|
|
|
|
ASSERT_OK(db->Get(ReadOptions(), "k1", &val));
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
EXPECT_GT(get_perf_context()->merge_operator_time_nanos, 0);
|
|
|
|
|
|
|
|
ASSERT_OK(db->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
|
|
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
ASSERT_OK(db->Get(ReadOptions(), "k1", &val));
|
|
|
|
#ifdef OS_SOLARIS
|
|
|
|
for (int i = 0; i < 100; i++) {
|
|
|
|
ASSERT_OK(db->Get(ReadOptions(), "k1", &val));
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
EXPECT_GT(get_perf_context()->merge_operator_time_nanos, 0);
|
|
|
|
|
|
|
|
delete db;
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(PerfContextTest, CopyAndMove) {
|
|
|
|
// Assignment operator
|
|
|
|
{
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
get_perf_context()->EnablePerLevelPerfContext();
|
|
|
|
PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, 5);
|
|
|
|
ASSERT_EQ(
|
|
|
|
1,
|
|
|
|
(*(get_perf_context()->level_to_perf_context))[5].bloom_filter_useful);
|
|
|
|
PerfContext perf_context_assign;
|
|
|
|
perf_context_assign = *get_perf_context();
|
|
|
|
ASSERT_EQ(
|
|
|
|
1,
|
|
|
|
(*(perf_context_assign.level_to_perf_context))[5].bloom_filter_useful);
|
|
|
|
get_perf_context()->ClearPerLevelPerfContext();
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
ASSERT_EQ(
|
|
|
|
1,
|
|
|
|
(*(perf_context_assign.level_to_perf_context))[5].bloom_filter_useful);
|
|
|
|
perf_context_assign.ClearPerLevelPerfContext();
|
|
|
|
perf_context_assign.Reset();
|
|
|
|
}
|
|
|
|
// Copy constructor
|
|
|
|
{
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
get_perf_context()->EnablePerLevelPerfContext();
|
|
|
|
PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, 5);
|
|
|
|
ASSERT_EQ(
|
|
|
|
1,
|
|
|
|
(*(get_perf_context()->level_to_perf_context))[5].bloom_filter_useful);
|
|
|
|
PerfContext perf_context_copy(*get_perf_context());
|
|
|
|
ASSERT_EQ(
|
|
|
|
1, (*(perf_context_copy.level_to_perf_context))[5].bloom_filter_useful);
|
|
|
|
get_perf_context()->ClearPerLevelPerfContext();
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
ASSERT_EQ(
|
|
|
|
1, (*(perf_context_copy.level_to_perf_context))[5].bloom_filter_useful);
|
|
|
|
perf_context_copy.ClearPerLevelPerfContext();
|
|
|
|
perf_context_copy.Reset();
|
|
|
|
}
|
|
|
|
// Move constructor
|
|
|
|
{
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
get_perf_context()->EnablePerLevelPerfContext();
|
|
|
|
PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, 5);
|
|
|
|
ASSERT_EQ(
|
|
|
|
1,
|
|
|
|
(*(get_perf_context()->level_to_perf_context))[5].bloom_filter_useful);
|
|
|
|
PerfContext perf_context_move = std::move(*get_perf_context());
|
|
|
|
ASSERT_EQ(
|
|
|
|
1, (*(perf_context_move.level_to_perf_context))[5].bloom_filter_useful);
|
|
|
|
get_perf_context()->ClearPerLevelPerfContext();
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
ASSERT_EQ(
|
|
|
|
1, (*(perf_context_move.level_to_perf_context))[5].bloom_filter_useful);
|
|
|
|
perf_context_move.ClearPerLevelPerfContext();
|
|
|
|
perf_context_move.Reset();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(PerfContextTest, PerfContextDisableEnable) {
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
get_perf_context()->EnablePerLevelPerfContext();
|
|
|
|
PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_positive, 1, 0);
|
|
|
|
get_perf_context()->DisablePerLevelPerfContext();
|
|
|
|
PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, 5);
|
|
|
|
get_perf_context()->EnablePerLevelPerfContext();
|
|
|
|
PERF_COUNTER_BY_LEVEL_ADD(block_cache_hit_count, 1, 0);
|
|
|
|
get_perf_context()->DisablePerLevelPerfContext();
|
|
|
|
PerfContext perf_context_copy(*get_perf_context());
|
|
|
|
ASSERT_EQ(1, (*(perf_context_copy.level_to_perf_context))[0]
|
|
|
|
.bloom_filter_full_positive);
|
|
|
|
// this was set when per level perf context is disabled, should not be copied
|
|
|
|
ASSERT_NE(
|
|
|
|
1, (*(perf_context_copy.level_to_perf_context))[5].bloom_filter_useful);
|
|
|
|
ASSERT_EQ(
|
|
|
|
1, (*(perf_context_copy.level_to_perf_context))[0].block_cache_hit_count);
|
|
|
|
perf_context_copy.ClearPerLevelPerfContext();
|
|
|
|
perf_context_copy.Reset();
|
|
|
|
get_perf_context()->ClearPerLevelPerfContext();
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(PerfContextTest, PerfContextByLevelGetSet) {
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
get_perf_context()->EnablePerLevelPerfContext();
|
|
|
|
PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_positive, 1, 0);
|
|
|
|
PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, 5);
|
|
|
|
PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, 7);
|
|
|
|
PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, 7);
|
|
|
|
PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1, 2);
|
|
|
|
PERF_COUNTER_BY_LEVEL_ADD(block_cache_hit_count, 1, 0);
|
|
|
|
PERF_COUNTER_BY_LEVEL_ADD(block_cache_hit_count, 5, 2);
|
|
|
|
PERF_COUNTER_BY_LEVEL_ADD(block_cache_miss_count, 2, 3);
|
|
|
|
PERF_COUNTER_BY_LEVEL_ADD(block_cache_miss_count, 4, 1);
|
|
|
|
ASSERT_EQ(
|
|
|
|
0, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful);
|
|
|
|
ASSERT_EQ(
|
|
|
|
1, (*(get_perf_context()->level_to_perf_context))[5].bloom_filter_useful);
|
|
|
|
ASSERT_EQ(
|
|
|
|
2, (*(get_perf_context()->level_to_perf_context))[7].bloom_filter_useful);
|
|
|
|
ASSERT_EQ(1, (*(get_perf_context()->level_to_perf_context))[0]
|
|
|
|
.bloom_filter_full_positive);
|
|
|
|
ASSERT_EQ(1, (*(get_perf_context()->level_to_perf_context))[2]
|
|
|
|
.bloom_filter_full_true_positive);
|
|
|
|
ASSERT_EQ(1, (*(get_perf_context()->level_to_perf_context))[0]
|
|
|
|
.block_cache_hit_count);
|
|
|
|
ASSERT_EQ(5, (*(get_perf_context()->level_to_perf_context))[2]
|
|
|
|
.block_cache_hit_count);
|
|
|
|
ASSERT_EQ(2, (*(get_perf_context()->level_to_perf_context))[3]
|
|
|
|
.block_cache_miss_count);
|
|
|
|
ASSERT_EQ(4, (*(get_perf_context()->level_to_perf_context))[1]
|
|
|
|
.block_cache_miss_count);
|
|
|
|
std::string zero_excluded = get_perf_context()->ToString(true);
|
|
|
|
ASSERT_NE(std::string::npos,
|
|
|
|
zero_excluded.find("bloom_filter_useful = 1@level5, 2@level7"));
|
|
|
|
ASSERT_NE(std::string::npos,
|
|
|
|
zero_excluded.find("bloom_filter_full_positive = 1@level0"));
|
|
|
|
ASSERT_NE(std::string::npos,
|
|
|
|
zero_excluded.find("bloom_filter_full_true_positive = 1@level2"));
|
|
|
|
ASSERT_NE(std::string::npos,
|
|
|
|
zero_excluded.find("block_cache_hit_count = 1@level0, 5@level2"));
|
|
|
|
ASSERT_NE(std::string::npos,
|
|
|
|
zero_excluded.find("block_cache_miss_count = 4@level1, 2@level3"));
|
[RocksDB] Added nano second stopwatch and new perf counters to track block read cost
Summary: The pupose of this diff is to expose per user-call level precise timing of block read, so that we can answer questions like: a Get() costs me 100ms, is that somehow related to loading blocks from file system, or sth else? We will answer that with EXACTLY how many blocks have been read, how much time was spent on transfering the bytes from os, how much time was spent on checksum verification and how much time was spent on block decompression, just for that one Get. A nano second stopwatch was introduced to track time with higher precision. The cost/precision of the stopwatch is also measured in unit-test. On my dev box, retrieving one time instance costs about 30ns, on average. The deviation of timing results is good enough to track 100ns-1us level events. And the overhead could be safely ignored for 100us level events (10000 instances/s), for example, a viewstate thrift call.
Test Plan: perf_context_test, also testing with viewstate shadow traffic.
Reviewers: dhruba
Reviewed By: dhruba
CC: leveldb, xjin
Differential Revision: https://reviews.facebook.net/D12351
12 years ago
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(PerfContextTest, CPUTimer) {
|
|
|
|
if (SystemClock::Default()->CPUNanos() == 0) {
|
|
|
|
ROCKSDB_GTEST_SKIP("Target without CPUNanos support");
|
Fix MSVC-related build issues (#7439)
Summary:
This PR addresses some build and functional issues on MSVC targets, as a step towards an eventual goal of having RocksDB build successfully for Windows on ARM64.
Addressed issues include:
- BitsSetToOne and CountTrailingZeroBits do not compile on non-x64 MSVC targets. A fallback implementation of BitsSetToOne when Intel intrinsics are not available is added, based on the C++20 `<bit>` popcount implementation in Microsoft's STL.
- The implementation of FloorLog2 for MSVC targets (including x64) gives incorrect results. The unit test easily detects this, but CircleCI is currently configured to only run a specific set of tests for Windows CMake builds, so this seems to have been unnoticed.
- AsmVolatilePause does not use YieldProcessor on Windows ARM64 targets, even though it is available.
- When CondVar::TimedWait calls Microsoft STL's condition_variable::wait_for, it can potentially trigger a bug (just recently fixed in the upcoming VS 16.8's STL) that deadlocks various tests that wait for a timer to execute, since `Timer::Run` doesn't get a chance to execute before being blocked by the test function acquiring the mutex.
- In c_test, `GetTempDir` assumes a POSIX-style temp path.
- `NormalizePath` did not eliminate consecutive POSIX-style path separators on Windows, resulting in test failures in e.g., wal_manager_test.
- Various other test failures.
In a followup PR I hope to modify CircleCI's config.yml to invoke all RocksDB unit tests in Windows CMake builds with CTest, instead of the current use of `run_ci_db_test.ps1` which requires individual tests to be specified and is missing many of the existing tests.
Notes from peterd: FloorLog2 is not yet used in production code (it's for something in progress). I also added a few more inexpensive platform-dependent tests to Windows CircleCI runs. And included facebook/folly#1461 as requested
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7439
Reviewed By: jay-zhuang
Differential Revision: D24021563
Pulled By: pdillinger
fbshipit-source-id: 0ec2027c0d6a494d8a0fe38d9667fc2f7e29f7e7
4 years ago
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
ASSERT_OK(DestroyDB(kDbName, Options()));
|
|
|
|
auto db = OpenDb();
|
|
|
|
WriteOptions write_options;
|
|
|
|
ReadOptions read_options;
|
|
|
|
SetPerfLevel(PerfLevel::kEnableTimeAndCPUTimeExceptForMutex);
|
|
|
|
|
|
|
|
std::string max_str = "0";
|
|
|
|
for (int i = 0; i < FLAGS_total_keys; ++i) {
|
|
|
|
std::string i_str = std::to_string(i);
|
|
|
|
std::string key = "k" + i_str;
|
|
|
|
std::string value = "v" + i_str;
|
|
|
|
max_str = max_str > i_str ? max_str : i_str;
|
|
|
|
|
|
|
|
ASSERT_OK(db->Put(write_options, key, value));
|
|
|
|
}
|
|
|
|
std::string last_key = "k" + max_str;
|
|
|
|
std::string last_value = "v" + max_str;
|
|
|
|
|
|
|
|
{
|
|
|
|
// Get
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
std::string value;
|
|
|
|
ASSERT_OK(db->Get(read_options, "k0", &value));
|
|
|
|
ASSERT_EQ(value, "v0");
|
|
|
|
|
|
|
|
if (FLAGS_verbose) {
|
|
|
|
std::cout << "Get CPU time nanos: " << get_perf_context()->get_cpu_nanos
|
|
|
|
<< "ns\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
// Iter
|
|
|
|
std::unique_ptr<Iterator> iter(db->NewIterator(read_options));
|
|
|
|
|
|
|
|
// Seek
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
iter->Seek(last_key);
|
|
|
|
ASSERT_TRUE(iter->Valid());
|
|
|
|
ASSERT_EQ(last_value, iter->value().ToString());
|
|
|
|
|
|
|
|
if (FLAGS_verbose) {
|
|
|
|
std::cout << "Iter Seek CPU time nanos: "
|
|
|
|
<< get_perf_context()->iter_seek_cpu_nanos << "ns\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
// SeekForPrev
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
iter->SeekForPrev(last_key);
|
|
|
|
ASSERT_TRUE(iter->Valid());
|
|
|
|
|
|
|
|
if (FLAGS_verbose) {
|
|
|
|
std::cout << "Iter SeekForPrev CPU time nanos: "
|
|
|
|
<< get_perf_context()->iter_seek_cpu_nanos << "ns\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
// SeekToLast
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
iter->SeekToLast();
|
|
|
|
ASSERT_TRUE(iter->Valid());
|
|
|
|
ASSERT_EQ(last_value, iter->value().ToString());
|
|
|
|
|
|
|
|
if (FLAGS_verbose) {
|
|
|
|
std::cout << "Iter SeekToLast CPU time nanos: "
|
|
|
|
<< get_perf_context()->iter_seek_cpu_nanos << "ns\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
// SeekToFirst
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
iter->SeekToFirst();
|
|
|
|
ASSERT_TRUE(iter->Valid());
|
|
|
|
ASSERT_EQ("v0", iter->value().ToString());
|
|
|
|
|
|
|
|
if (FLAGS_verbose) {
|
|
|
|
std::cout << "Iter SeekToFirst CPU time nanos: "
|
|
|
|
<< get_perf_context()->iter_seek_cpu_nanos << "ns\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
// Next
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
iter->Next();
|
|
|
|
ASSERT_TRUE(iter->Valid());
|
|
|
|
ASSERT_EQ("v1", iter->value().ToString());
|
|
|
|
|
|
|
|
if (FLAGS_verbose) {
|
|
|
|
std::cout << "Iter Next CPU time nanos: "
|
|
|
|
<< get_perf_context()->iter_next_cpu_nanos << "ns\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
// Prev
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
iter->Prev();
|
|
|
|
ASSERT_TRUE(iter->Valid());
|
|
|
|
ASSERT_EQ("v0", iter->value().ToString());
|
|
|
|
|
|
|
|
if (FLAGS_verbose) {
|
|
|
|
std::cout << "Iter Prev CPU time nanos: "
|
|
|
|
<< get_perf_context()->iter_prev_cpu_nanos << "ns\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
// monotonically increasing
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
auto count = get_perf_context()->iter_seek_cpu_nanos;
|
|
|
|
for (int i = 0; i < FLAGS_total_keys; ++i) {
|
|
|
|
iter->Seek("k" + std::to_string(i));
|
|
|
|
ASSERT_TRUE(iter->Valid());
|
|
|
|
ASSERT_EQ("v" + std::to_string(i), iter->value().ToString());
|
|
|
|
auto next_count = get_perf_context()->iter_seek_cpu_nanos;
|
|
|
|
ASSERT_GT(next_count, count);
|
|
|
|
count = next_count;
|
|
|
|
}
|
|
|
|
|
|
|
|
// iterator creation/destruction; multiple iterators
|
|
|
|
{
|
|
|
|
std::unique_ptr<Iterator> iter2(db->NewIterator(read_options));
|
|
|
|
ASSERT_EQ(count, get_perf_context()->iter_seek_cpu_nanos);
|
|
|
|
iter2->Seek(last_key);
|
|
|
|
ASSERT_TRUE(iter2->Valid());
|
|
|
|
ASSERT_EQ(last_value, iter2->value().ToString());
|
|
|
|
ASSERT_GT(get_perf_context()->iter_seek_cpu_nanos, count);
|
|
|
|
count = get_perf_context()->iter_seek_cpu_nanos;
|
|
|
|
}
|
|
|
|
ASSERT_EQ(count, get_perf_context()->iter_seek_cpu_nanos);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|
|
|
|
|
|
|
|
int main(int argc, char** argv) {
|
|
|
|
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
|
|
|
|
::testing::InitGoogleTest(&argc, argv);
|
|
|
|
|
|
|
|
for (int i = 1; i < argc; i++) {
|
|
|
|
int n;
|
|
|
|
char junk;
|
|
|
|
|
|
|
|
if (sscanf(argv[i], "--write_buffer_size=%d%c", &n, &junk) == 1) {
|
|
|
|
FLAGS_write_buffer_size = n;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sscanf(argv[i], "--total_keys=%d%c", &n, &junk) == 1) {
|
|
|
|
FLAGS_total_keys = n;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sscanf(argv[i], "--random_key=%d%c", &n, &junk) == 1 &&
|
|
|
|
(n == 0 || n == 1)) {
|
|
|
|
FLAGS_random_key = n;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sscanf(argv[i], "--use_set_based_memetable=%d%c", &n, &junk) == 1 &&
|
|
|
|
(n == 0 || n == 1)) {
|
|
|
|
FLAGS_use_set_based_memetable = n;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sscanf(argv[i], "--verbose=%d%c", &n, &junk) == 1 &&
|
|
|
|
(n == 0 || n == 1)) {
|
|
|
|
FLAGS_verbose = n;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (FLAGS_verbose) {
|
|
|
|
std::cout << kDbName << "\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
return RUN_ALL_TESTS();
|
|
|
|
}
|