Add last level and non-last level read statistics (#9519)

Summary:
Add last level and non-last level read statistics:
```
LAST_LEVEL_READ_BYTES,
LAST_LEVEL_READ_COUNT,
NON_LAST_LEVEL_READ_BYTES,
NON_LAST_LEVEL_READ_COUNT,
```

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9519

Test Plan: added unittest

Reviewed By: siying

Differential Revision: D34062539

Pulled By: jay-zhuang

fbshipit-source-id: 908644c3050878b4234febdc72e3e19d89af38cd
main
Jay Zhuang 3 years ago committed by Facebook GitHub Bot
parent 30b08878d8
commit f4b2500e12
  1. 1
      HISTORY.md
  2. 52
      db/db_test2.cc
  3. 2
      db/table_cache.cc
  4. 125
      file/random_access_file_reader.cc
  5. 9
      file/random_access_file_reader.h
  6. 6
      include/rocksdb/statistics.h
  7. 16
      java/rocksjni/portal.h
  8. 8
      java/src/main/java/org/rocksdb/TickerType.java
  9. 70
      microbench/db_basic_bench.cc
  10. 4
      monitoring/statistics.cc

@ -83,6 +83,7 @@
* Improved the SstDumpTool to read the comparator from table properties and use it to read the SST File. * Improved the SstDumpTool to read the comparator from table properties and use it to read the SST File.
* Extended the column family statistics in the info log so the total amount of garbage in the blob files and the blob file space amplification factor are also logged. Also exposed the blob file space amp via the `rocksdb.blob-stats` DB property. * Extended the column family statistics in the info log so the total amount of garbage in the blob files and the blob file space amplification factor are also logged. Also exposed the blob file space amp via the `rocksdb.blob-stats` DB property.
* Introduced the API rocksdb_create_dir_if_missing in c.h that calls underlying file system's CreateDirIfMissing API to create the directory. * Introduced the API rocksdb_create_dir_if_missing in c.h that calls underlying file system's CreateDirIfMissing API to create the directory.
* Added last level and non-last level read statistics: `LAST_LEVEL_READ_*`, `NON_LAST_LEVEL_READ_*`.
## 6.29.0 (01/21/2022) ## 6.29.0 (01/21/2022)
Note: The next release will be major release 7.0. See https://github.com/facebook/rocksdb/issues/9390 for more info. Note: The next release will be major release 7.0. See https://github.com/facebook/rocksdb/issues/9390 for more info.

@ -6847,6 +6847,58 @@ TEST_F(DBTest2, BottommostTemperatureUniversal) {
size = GetSstSizeHelper(Temperature::kCold); size = GetSstSizeHelper(Temperature::kCold);
ASSERT_GT(size, 0); ASSERT_GT(size, 0);
} }
TEST_F(DBTest2, LastLevelStatistics) {
Options options = CurrentOptions();
options.bottommost_temperature = Temperature::kWarm;
options.level0_file_num_compaction_trigger = 2;
options.level_compaction_dynamic_level_bytes = true;
options.statistics = CreateDBStatistics();
Reopen(options);
// generate 1 sst on level 0
ASSERT_OK(Put("foo", "bar"));
ASSERT_OK(Put("bar", "bar"));
ASSERT_OK(Flush());
ASSERT_EQ("bar", Get("bar"));
ASSERT_GT(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES), 0);
ASSERT_GT(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT), 0);
ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_BYTES), 0);
ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_COUNT), 0);
// 2nd flush to trigger compaction
ASSERT_OK(Put("foo", "bar"));
ASSERT_OK(Put("bar", "bar"));
ASSERT_OK(Flush());
ASSERT_OK(dbfull()->TEST_WaitForCompact());
ASSERT_EQ("bar", Get("bar"));
ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_BYTES),
options.statistics->getTickerCount(WARM_FILE_READ_BYTES));
ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_COUNT),
options.statistics->getTickerCount(WARM_FILE_READ_COUNT));
auto pre_bytes =
options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES);
auto pre_count =
options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT);
// 3rd flush to generate 1 sst on level 0
ASSERT_OK(Put("foo", "bar"));
ASSERT_OK(Put("bar", "bar"));
ASSERT_OK(Flush());
ASSERT_EQ("bar", Get("bar"));
ASSERT_GT(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES),
pre_bytes);
ASSERT_GT(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT),
pre_count);
ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_BYTES),
options.statistics->getTickerCount(WARM_FILE_READ_BYTES));
ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_COUNT),
options.statistics->getTickerCount(WARM_FILE_READ_COUNT));
}
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE
// WAL recovery mode is WALRecoveryMode::kPointInTimeRecovery. // WAL recovery mode is WALRecoveryMode::kPointInTimeRecovery.

@ -135,7 +135,7 @@ Status TableCache::GetTableReader(
std::move(file), fname, ioptions_.clock, io_tracer_, std::move(file), fname, ioptions_.clock, io_tracer_,
record_read_stats ? ioptions_.stats : nullptr, SST_READ_MICROS, record_read_stats ? ioptions_.stats : nullptr, SST_READ_MICROS,
file_read_hist, ioptions_.rate_limiter.get(), ioptions_.listeners, file_read_hist, ioptions_.rate_limiter.get(), ioptions_.listeners,
file_temperature)); file_temperature, level == ioptions_.num_levels - 1));
s = ioptions_.table_factory->NewTableReader( s = ioptions_.table_factory->NewTableReader(
ro, ro,
TableReaderOptions( TableReaderOptions(

@ -22,85 +22,43 @@
#include "util/rate_limiter.h" #include "util/rate_limiter.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
inline void IOStatsAddBytesByTemperature(Temperature file_temperature,
size_t value) {
if (file_temperature == Temperature::kUnknown) {
return;
}
switch (file_temperature) {
case Temperature::kHot:
IOSTATS_ADD(file_io_stats_by_temperature.hot_file_bytes_read, value);
break;
case Temperature::kWarm:
IOSTATS_ADD(file_io_stats_by_temperature.warm_file_bytes_read, value);
break;
case Temperature::kCold:
IOSTATS_ADD(file_io_stats_by_temperature.cold_file_bytes_read, value);
break;
default:
break;
}
}
inline void IOStatsAddCountByTemperature(Temperature file_temperature,
size_t value) {
if (file_temperature == Temperature::kUnknown) {
return;
}
switch (file_temperature) {
case Temperature::kHot:
IOSTATS_ADD(file_io_stats_by_temperature.hot_file_read_count, value);
break;
case Temperature::kWarm:
IOSTATS_ADD(file_io_stats_by_temperature.warm_file_read_count, value);
break;
case Temperature::kCold:
IOSTATS_ADD(file_io_stats_by_temperature.cold_file_read_count, value);
break;
default:
break;
}
}
inline void StatisticAddBytesByTemperature(Statistics* stats, inline void RecordIOStats(Statistics* stats, Temperature file_temperature,
Temperature file_temperature, bool is_last_level, size_t size) {
size_t value) { IOSTATS_ADD(bytes_read, size);
if (stats == nullptr || file_temperature == Temperature::kUnknown) { // record for last/non-last level
return; if (is_last_level) {
} RecordTick(stats, LAST_LEVEL_READ_BYTES, size);
switch (file_temperature) { RecordTick(stats, LAST_LEVEL_READ_COUNT, 1);
case Temperature::kHot: } else {
RecordTick(stats, HOT_FILE_READ_BYTES, value); RecordTick(stats, NON_LAST_LEVEL_READ_BYTES, size);
break; RecordTick(stats, NON_LAST_LEVEL_READ_COUNT, 1);
case Temperature::kWarm:
RecordTick(stats, WARM_FILE_READ_BYTES, value);
break;
case Temperature::kCold:
RecordTick(stats, COLD_FILE_READ_BYTES, value);
break;
default:
break;
} }
}
inline void StatisticAddCountByTemperature(Statistics* stats, // record for temperature file
Temperature file_temperature, if (file_temperature != Temperature::kUnknown) {
size_t value) { switch (file_temperature) {
if (stats == nullptr || file_temperature == Temperature::kUnknown) { case Temperature::kHot:
return; IOSTATS_ADD(file_io_stats_by_temperature.hot_file_bytes_read, size);
} IOSTATS_ADD(file_io_stats_by_temperature.hot_file_read_count, 1);
switch (file_temperature) { RecordTick(stats, HOT_FILE_READ_BYTES, size);
case Temperature::kHot: RecordTick(stats, HOT_FILE_READ_COUNT, 1);
RecordTick(stats, HOT_FILE_READ_COUNT, value); break;
break; case Temperature::kWarm:
case Temperature::kWarm: IOSTATS_ADD(file_io_stats_by_temperature.warm_file_bytes_read, size);
RecordTick(stats, WARM_FILE_READ_COUNT, value); IOSTATS_ADD(file_io_stats_by_temperature.warm_file_read_count, 1);
break; RecordTick(stats, WARM_FILE_READ_BYTES, size);
case Temperature::kCold: RecordTick(stats, WARM_FILE_READ_COUNT, 1);
RecordTick(stats, COLD_FILE_READ_COUNT, value); break;
break; case Temperature::kCold:
default: IOSTATS_ADD(file_io_stats_by_temperature.cold_file_bytes_read, size);
break; IOSTATS_ADD(file_io_stats_by_temperature.cold_file_read_count, 1);
RecordTick(stats, COLD_FILE_READ_BYTES, size);
RecordTick(stats, COLD_FILE_READ_COUNT, 1);
break;
default:
break;
}
} }
} }
@ -273,11 +231,7 @@ IOStatus RandomAccessFileReader::Read(
} }
*result = Slice(res_scratch, io_s.ok() ? pos : 0); *result = Slice(res_scratch, io_s.ok() ? pos : 0);
} }
IOSTATS_ADD(bytes_read, result->size()); RecordIOStats(stats_, file_temperature_, is_last_level_, result->size());
IOStatsAddBytesByTemperature(file_temperature_, result->size());
IOStatsAddCountByTemperature(file_temperature_, 1);
StatisticAddBytesByTemperature(stats_, file_temperature_, result->size());
StatisticAddCountByTemperature(stats_, file_temperature_, 1);
SetPerfLevel(prev_perf_level); SetPerfLevel(prev_perf_level);
} }
if (stats_ != nullptr && file_read_hist_ != nullptr) { if (stats_ != nullptr && file_read_hist_ != nullptr) {
@ -450,13 +404,8 @@ IOStatus RandomAccessFileReader::MultiRead(
} }
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE
IOSTATS_ADD(bytes_read, read_reqs[i].result.size()); RecordIOStats(stats_, file_temperature_, is_last_level_,
IOStatsAddBytesByTemperature(file_temperature_, read_reqs[i].result.size());
read_reqs[i].result.size());
IOStatsAddCountByTemperature(file_temperature_, 1);
StatisticAddBytesByTemperature(stats_, file_temperature_,
read_reqs[i].result.size());
StatisticAddCountByTemperature(stats_, file_temperature_, 1);
} }
SetPerfLevel(prev_perf_level); SetPerfLevel(prev_perf_level);
} }

@ -89,7 +89,8 @@ class RandomAccessFileReader {
HistogramImpl* file_read_hist_; HistogramImpl* file_read_hist_;
RateLimiter* rate_limiter_; RateLimiter* rate_limiter_;
std::vector<std::shared_ptr<EventListener>> listeners_; std::vector<std::shared_ptr<EventListener>> listeners_;
Temperature file_temperature_; const Temperature file_temperature_;
const bool is_last_level_;
public: public:
explicit RandomAccessFileReader( explicit RandomAccessFileReader(
@ -100,7 +101,8 @@ class RandomAccessFileReader {
HistogramImpl* file_read_hist = nullptr, HistogramImpl* file_read_hist = nullptr,
RateLimiter* rate_limiter = nullptr, RateLimiter* rate_limiter = nullptr,
const std::vector<std::shared_ptr<EventListener>>& listeners = {}, const std::vector<std::shared_ptr<EventListener>>& listeners = {},
Temperature file_temperature = Temperature::kUnknown) Temperature file_temperature = Temperature::kUnknown,
bool is_last_level = false)
: file_(std::move(raf), io_tracer, _file_name), : file_(std::move(raf), io_tracer, _file_name),
file_name_(std::move(_file_name)), file_name_(std::move(_file_name)),
clock_(clock), clock_(clock),
@ -109,7 +111,8 @@ class RandomAccessFileReader {
file_read_hist_(file_read_hist), file_read_hist_(file_read_hist),
rate_limiter_(rate_limiter), rate_limiter_(rate_limiter),
listeners_(), listeners_(),
file_temperature_(file_temperature) { file_temperature_(file_temperature),
is_last_level_(is_last_level) {
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
std::for_each(listeners.begin(), listeners.end(), std::for_each(listeners.begin(), listeners.end(),
[this](const std::shared_ptr<EventListener>& e) { [this](const std::shared_ptr<EventListener>& e) {

@ -425,6 +425,12 @@ enum Tickers : uint32_t {
WARM_FILE_READ_COUNT, WARM_FILE_READ_COUNT,
COLD_FILE_READ_COUNT, COLD_FILE_READ_COUNT,
// Last level and non-last level read statistics
LAST_LEVEL_READ_BYTES,
LAST_LEVEL_READ_COUNT,
NON_LAST_LEVEL_READ_BYTES,
NON_LAST_LEVEL_READ_COUNT,
TICKER_ENUM_MAX TICKER_ENUM_MAX
}; };

@ -5045,6 +5045,14 @@ class TickerTypeJni {
return -0x28; return -0x28;
case ROCKSDB_NAMESPACE::Tickers::COLD_FILE_READ_COUNT: case ROCKSDB_NAMESPACE::Tickers::COLD_FILE_READ_COUNT:
return -0x29; return -0x29;
case ROCKSDB_NAMESPACE::Tickers::LAST_LEVEL_READ_BYTES:
return -0x2A;
case ROCKSDB_NAMESPACE::Tickers::LAST_LEVEL_READ_COUNT:
return -0x2B;
case ROCKSDB_NAMESPACE::Tickers::NON_LAST_LEVEL_READ_BYTES:
return -0x2C;
case ROCKSDB_NAMESPACE::Tickers::NON_LAST_LEVEL_READ_COUNT:
return -0x2D;
case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX: case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX:
// 0x5F was the max value in the initial copy of tickers to Java. // 0x5F was the max value in the initial copy of tickers to Java.
// Since these values are exposed directly to Java clients, we keep // Since these values are exposed directly to Java clients, we keep
@ -5406,6 +5414,14 @@ class TickerTypeJni {
return ROCKSDB_NAMESPACE::Tickers::WARM_FILE_READ_COUNT; return ROCKSDB_NAMESPACE::Tickers::WARM_FILE_READ_COUNT;
case -0x29: case -0x29:
return ROCKSDB_NAMESPACE::Tickers::COLD_FILE_READ_COUNT; return ROCKSDB_NAMESPACE::Tickers::COLD_FILE_READ_COUNT;
case -0x2A:
return ROCKSDB_NAMESPACE::Tickers::LAST_LEVEL_READ_BYTES;
case -0x2B:
return ROCKSDB_NAMESPACE::Tickers::LAST_LEVEL_READ_COUNT;
case -0x2C:
return ROCKSDB_NAMESPACE::Tickers::NON_LAST_LEVEL_READ_BYTES;
case -0x2D:
return ROCKSDB_NAMESPACE::Tickers::NON_LAST_LEVEL_READ_COUNT;
case 0x5F: case 0x5F:
// 0x5F was the max value in the initial copy of tickers to Java. // 0x5F was the max value in the initial copy of tickers to Java.
// Since these values are exposed directly to Java clients, we keep // Since these values are exposed directly to Java clients, we keep

@ -796,6 +796,14 @@ public enum TickerType {
WARM_FILE_READ_COUNT((byte) -0x28), WARM_FILE_READ_COUNT((byte) -0x28),
COLD_FILE_READ_COUNT((byte) -0x29), COLD_FILE_READ_COUNT((byte) -0x29),
/**
* (non-)last level read statistics
*/
LAST_LEVEL_READ_BYTES((byte) -0x2A),
LAST_LEVEL_READ_COUNT((byte) -0x2B),
NON_LAST_LEVEL_READ_BYTES((byte) -0x2C),
NON_LAST_LEVEL_READ_COUNT((byte) -0x2D),
TICKER_ENUM_MAX((byte) 0x5F); TICKER_ENUM_MAX((byte) 0x5F);
private final byte value; private final byte value;

@ -128,7 +128,8 @@ static void SetupDB(benchmark::State& state, Options& options, DB** dpptr,
state.SkipWithError(s.ToString().c_str()); state.SkipWithError(s.ToString().c_str());
return; return;
} }
std::string db_name = db_path + "/" + test_name + std::to_string(getpid()); std::string db_name =
db_path + kFilePathSeparator + test_name + std::to_string(getpid());
DestroyDB(db_name, options); DestroyDB(db_name, options);
s = DB::Open(options, db_name, dpptr); s = DB::Open(options, db_name, dpptr);
@ -785,6 +786,7 @@ void GenerateRandomKVs(std::vector<std::string>* keys,
} }
} }
// TODO: move it to different files, as it's testing an internal API
static void DataBlockSeek(benchmark::State& state) { static void DataBlockSeek(benchmark::State& state) {
Random rnd(301); Random rnd(301);
Options options = Options(); Options options = Options();
@ -1287,6 +1289,72 @@ BENCHMARK(PrefixSeek)
->Iterations(kPrefixSeekNum / 8) ->Iterations(kPrefixSeekNum / 8)
->Apply(PrefixSeekArguments); ->Apply(PrefixSeekArguments);
// TODO: move it to different files, as it's testing an internal API
static void RandomAccessFileReaderRead(benchmark::State& state) {
bool enable_statistics = state.range(0);
constexpr int kFileNum = 10;
auto env = Env::Default();
auto fs = env->GetFileSystem();
std::string db_path;
Status s = env->GetTestDirectory(&db_path);
if (!s.ok()) {
state.SkipWithError(s.ToString().c_str());
return;
}
// Setup multiple `RandomAccessFileReader`s with different parameters to be
// used for test
Random rand(301);
std::string fname_base =
db_path + kFilePathSeparator + "random-access-file-reader-read";
std::vector<std::unique_ptr<RandomAccessFileReader>> readers;
auto statistics_share = CreateDBStatistics();
Statistics* statistics = enable_statistics ? statistics_share.get() : nullptr;
for (int i = 0; i < kFileNum; i++) {
std::string fname = fname_base + ToString(i);
std::string content = rand.RandomString(kDefaultPageSize);
std::unique_ptr<WritableFile> tgt_file;
env->NewWritableFile(fname, &tgt_file, EnvOptions());
tgt_file->Append(content);
tgt_file->Close();
std::unique_ptr<FSRandomAccessFile> f;
fs->NewRandomAccessFile(fname, FileOptions(), &f, nullptr);
int rand_num = rand.Next() % 3;
auto temperature = rand_num == 0 ? Temperature::kUnknown
: rand_num == 1 ? Temperature::kWarm
: Temperature::kCold;
readers.emplace_back(new RandomAccessFileReader(
std::move(f), fname, env->GetSystemClock().get(), nullptr, statistics,
0, nullptr, nullptr, {}, temperature, rand_num == 1));
}
IOOptions io_options;
std::unique_ptr<char[]> scratch(new char[2048]);
Slice result;
uint64_t idx = 0;
for (auto _ : state) {
s = readers[idx++ % kFileNum]->Read(io_options, 0, kDefaultPageSize / 3,
&result, scratch.get(), nullptr,
Env::IO_TOTAL);
if (!s.ok()) {
state.SkipWithError(s.ToString().c_str());
}
}
// clean up
for (int i = 0; i < kFileNum; i++) {
std::string fname = fname_base + ToString(i);
env->DeleteFile(fname); // ignore return, okay to fail cleanup
}
}
BENCHMARK(RandomAccessFileReaderRead)
->Iterations(1000000)
->Arg(0)
->Arg(1)
->ArgName("enable_statistics");
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE
BENCHMARK_MAIN(); BENCHMARK_MAIN();

@ -222,6 +222,10 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
{HOT_FILE_READ_COUNT, "rocksdb.hot.file.read.count"}, {HOT_FILE_READ_COUNT, "rocksdb.hot.file.read.count"},
{WARM_FILE_READ_COUNT, "rocksdb.warm.file.read.count"}, {WARM_FILE_READ_COUNT, "rocksdb.warm.file.read.count"},
{COLD_FILE_READ_COUNT, "rocksdb.cold.file.read.count"}, {COLD_FILE_READ_COUNT, "rocksdb.cold.file.read.count"},
{LAST_LEVEL_READ_BYTES, "rocksdb.last.level.read.bytes"},
{LAST_LEVEL_READ_COUNT, "rocksdb.last.level.read.count"},
{NON_LAST_LEVEL_READ_BYTES, "rocksdb.non.last.level.read.bytes"},
{NON_LAST_LEVEL_READ_COUNT, "rocksdb.non.last.level.read.count"},
}; };
const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = { const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {

Loading…
Cancel
Save