Add last level and non-last level read statistics (#9519)

Summary:
Add last level and non-last level read statistics:
```
LAST_LEVEL_READ_BYTES,
LAST_LEVEL_READ_COUNT,
NON_LAST_LEVEL_READ_BYTES,
NON_LAST_LEVEL_READ_COUNT,
```

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9519

Test Plan: added unittest

Reviewed By: siying

Differential Revision: D34062539

Pulled By: jay-zhuang

fbshipit-source-id: 908644c3050878b4234febdc72e3e19d89af38cd
main
Jay Zhuang 3 years ago committed by Facebook GitHub Bot
parent 30b08878d8
commit f4b2500e12
  1. 1
      HISTORY.md
  2. 52
      db/db_test2.cc
  3. 2
      db/table_cache.cc
  4. 103
      file/random_access_file_reader.cc
  5. 9
      file/random_access_file_reader.h
  6. 6
      include/rocksdb/statistics.h
  7. 16
      java/rocksjni/portal.h
  8. 8
      java/src/main/java/org/rocksdb/TickerType.java
  9. 70
      microbench/db_basic_bench.cc
  10. 4
      monitoring/statistics.cc

@ -83,6 +83,7 @@
* Improved the SstDumpTool to read the comparator from table properties and use it to read the SST File.
* Extended the column family statistics in the info log so the total amount of garbage in the blob files and the blob file space amplification factor are also logged. Also exposed the blob file space amp via the `rocksdb.blob-stats` DB property.
* Introduced the API rocksdb_create_dir_if_missing in c.h that calls underlying file system's CreateDirIfMissing API to create the directory.
* Added last level and non-last level read statistics: `LAST_LEVEL_READ_*`, `NON_LAST_LEVEL_READ_*`.
## 6.29.0 (01/21/2022)
Note: The next release will be major release 7.0. See https://github.com/facebook/rocksdb/issues/9390 for more info.

@ -6847,6 +6847,58 @@ TEST_F(DBTest2, BottommostTemperatureUniversal) {
size = GetSstSizeHelper(Temperature::kCold);
ASSERT_GT(size, 0);
}
TEST_F(DBTest2, LastLevelStatistics) {
Options options = CurrentOptions();
options.bottommost_temperature = Temperature::kWarm;
options.level0_file_num_compaction_trigger = 2;
options.level_compaction_dynamic_level_bytes = true;
options.statistics = CreateDBStatistics();
Reopen(options);
// generate 1 sst on level 0
ASSERT_OK(Put("foo", "bar"));
ASSERT_OK(Put("bar", "bar"));
ASSERT_OK(Flush());
ASSERT_EQ("bar", Get("bar"));
ASSERT_GT(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES), 0);
ASSERT_GT(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT), 0);
ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_BYTES), 0);
ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_COUNT), 0);
// 2nd flush to trigger compaction
ASSERT_OK(Put("foo", "bar"));
ASSERT_OK(Put("bar", "bar"));
ASSERT_OK(Flush());
ASSERT_OK(dbfull()->TEST_WaitForCompact());
ASSERT_EQ("bar", Get("bar"));
ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_BYTES),
options.statistics->getTickerCount(WARM_FILE_READ_BYTES));
ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_COUNT),
options.statistics->getTickerCount(WARM_FILE_READ_COUNT));
auto pre_bytes =
options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES);
auto pre_count =
options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT);
// 3rd flush to generate 1 sst on level 0
ASSERT_OK(Put("foo", "bar"));
ASSERT_OK(Put("bar", "bar"));
ASSERT_OK(Flush());
ASSERT_EQ("bar", Get("bar"));
ASSERT_GT(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES),
pre_bytes);
ASSERT_GT(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT),
pre_count);
ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_BYTES),
options.statistics->getTickerCount(WARM_FILE_READ_BYTES));
ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_COUNT),
options.statistics->getTickerCount(WARM_FILE_READ_COUNT));
}
#endif // ROCKSDB_LITE
// WAL recovery mode is WALRecoveryMode::kPointInTimeRecovery.

@ -135,7 +135,7 @@ Status TableCache::GetTableReader(
std::move(file), fname, ioptions_.clock, io_tracer_,
record_read_stats ? ioptions_.stats : nullptr, SST_READ_MICROS,
file_read_hist, ioptions_.rate_limiter.get(), ioptions_.listeners,
file_temperature));
file_temperature, level == ioptions_.num_levels - 1));
s = ioptions_.table_factory->NewTableReader(
ro,
TableReaderOptions(

@ -22,85 +22,43 @@
#include "util/rate_limiter.h"
namespace ROCKSDB_NAMESPACE {
inline void IOStatsAddBytesByTemperature(Temperature file_temperature,
size_t value) {
if (file_temperature == Temperature::kUnknown) {
return;
}
switch (file_temperature) {
case Temperature::kHot:
IOSTATS_ADD(file_io_stats_by_temperature.hot_file_bytes_read, value);
break;
case Temperature::kWarm:
IOSTATS_ADD(file_io_stats_by_temperature.warm_file_bytes_read, value);
break;
case Temperature::kCold:
IOSTATS_ADD(file_io_stats_by_temperature.cold_file_bytes_read, value);
break;
default:
break;
}
}
inline void IOStatsAddCountByTemperature(Temperature file_temperature,
size_t value) {
if (file_temperature == Temperature::kUnknown) {
return;
}
switch (file_temperature) {
case Temperature::kHot:
IOSTATS_ADD(file_io_stats_by_temperature.hot_file_read_count, value);
break;
case Temperature::kWarm:
IOSTATS_ADD(file_io_stats_by_temperature.warm_file_read_count, value);
break;
case Temperature::kCold:
IOSTATS_ADD(file_io_stats_by_temperature.cold_file_read_count, value);
break;
default:
break;
inline void RecordIOStats(Statistics* stats, Temperature file_temperature,
bool is_last_level, size_t size) {
IOSTATS_ADD(bytes_read, size);
// record for last/non-last level
if (is_last_level) {
RecordTick(stats, LAST_LEVEL_READ_BYTES, size);
RecordTick(stats, LAST_LEVEL_READ_COUNT, 1);
} else {
RecordTick(stats, NON_LAST_LEVEL_READ_BYTES, size);
RecordTick(stats, NON_LAST_LEVEL_READ_COUNT, 1);
}
}
inline void StatisticAddBytesByTemperature(Statistics* stats,
Temperature file_temperature,
size_t value) {
if (stats == nullptr || file_temperature == Temperature::kUnknown) {
return;
}
// record for temperature file
if (file_temperature != Temperature::kUnknown) {
switch (file_temperature) {
case Temperature::kHot:
RecordTick(stats, HOT_FILE_READ_BYTES, value);
IOSTATS_ADD(file_io_stats_by_temperature.hot_file_bytes_read, size);
IOSTATS_ADD(file_io_stats_by_temperature.hot_file_read_count, 1);
RecordTick(stats, HOT_FILE_READ_BYTES, size);
RecordTick(stats, HOT_FILE_READ_COUNT, 1);
break;
case Temperature::kWarm:
RecordTick(stats, WARM_FILE_READ_BYTES, value);
IOSTATS_ADD(file_io_stats_by_temperature.warm_file_bytes_read, size);
IOSTATS_ADD(file_io_stats_by_temperature.warm_file_read_count, 1);
RecordTick(stats, WARM_FILE_READ_BYTES, size);
RecordTick(stats, WARM_FILE_READ_COUNT, 1);
break;
case Temperature::kCold:
RecordTick(stats, COLD_FILE_READ_BYTES, value);
IOSTATS_ADD(file_io_stats_by_temperature.cold_file_bytes_read, size);
IOSTATS_ADD(file_io_stats_by_temperature.cold_file_read_count, 1);
RecordTick(stats, COLD_FILE_READ_BYTES, size);
RecordTick(stats, COLD_FILE_READ_COUNT, 1);
break;
default:
break;
}
}
inline void StatisticAddCountByTemperature(Statistics* stats,
Temperature file_temperature,
size_t value) {
if (stats == nullptr || file_temperature == Temperature::kUnknown) {
return;
}
switch (file_temperature) {
case Temperature::kHot:
RecordTick(stats, HOT_FILE_READ_COUNT, value);
break;
case Temperature::kWarm:
RecordTick(stats, WARM_FILE_READ_COUNT, value);
break;
case Temperature::kCold:
RecordTick(stats, COLD_FILE_READ_COUNT, value);
break;
default:
break;
}
}
@ -273,11 +231,7 @@ IOStatus RandomAccessFileReader::Read(
}
*result = Slice(res_scratch, io_s.ok() ? pos : 0);
}
IOSTATS_ADD(bytes_read, result->size());
IOStatsAddBytesByTemperature(file_temperature_, result->size());
IOStatsAddCountByTemperature(file_temperature_, 1);
StatisticAddBytesByTemperature(stats_, file_temperature_, result->size());
StatisticAddCountByTemperature(stats_, file_temperature_, 1);
RecordIOStats(stats_, file_temperature_, is_last_level_, result->size());
SetPerfLevel(prev_perf_level);
}
if (stats_ != nullptr && file_read_hist_ != nullptr) {
@ -450,13 +404,8 @@ IOStatus RandomAccessFileReader::MultiRead(
}
#endif // ROCKSDB_LITE
IOSTATS_ADD(bytes_read, read_reqs[i].result.size());
IOStatsAddBytesByTemperature(file_temperature_,
read_reqs[i].result.size());
IOStatsAddCountByTemperature(file_temperature_, 1);
StatisticAddBytesByTemperature(stats_, file_temperature_,
RecordIOStats(stats_, file_temperature_, is_last_level_,
read_reqs[i].result.size());
StatisticAddCountByTemperature(stats_, file_temperature_, 1);
}
SetPerfLevel(prev_perf_level);
}

@ -89,7 +89,8 @@ class RandomAccessFileReader {
HistogramImpl* file_read_hist_;
RateLimiter* rate_limiter_;
std::vector<std::shared_ptr<EventListener>> listeners_;
Temperature file_temperature_;
const Temperature file_temperature_;
const bool is_last_level_;
public:
explicit RandomAccessFileReader(
@ -100,7 +101,8 @@ class RandomAccessFileReader {
HistogramImpl* file_read_hist = nullptr,
RateLimiter* rate_limiter = nullptr,
const std::vector<std::shared_ptr<EventListener>>& listeners = {},
Temperature file_temperature = Temperature::kUnknown)
Temperature file_temperature = Temperature::kUnknown,
bool is_last_level = false)
: file_(std::move(raf), io_tracer, _file_name),
file_name_(std::move(_file_name)),
clock_(clock),
@ -109,7 +111,8 @@ class RandomAccessFileReader {
file_read_hist_(file_read_hist),
rate_limiter_(rate_limiter),
listeners_(),
file_temperature_(file_temperature) {
file_temperature_(file_temperature),
is_last_level_(is_last_level) {
#ifndef ROCKSDB_LITE
std::for_each(listeners.begin(), listeners.end(),
[this](const std::shared_ptr<EventListener>& e) {

@ -425,6 +425,12 @@ enum Tickers : uint32_t {
WARM_FILE_READ_COUNT,
COLD_FILE_READ_COUNT,
// Last level and non-last level read statistics
LAST_LEVEL_READ_BYTES,
LAST_LEVEL_READ_COUNT,
NON_LAST_LEVEL_READ_BYTES,
NON_LAST_LEVEL_READ_COUNT,
TICKER_ENUM_MAX
};

@ -5045,6 +5045,14 @@ class TickerTypeJni {
return -0x28;
case ROCKSDB_NAMESPACE::Tickers::COLD_FILE_READ_COUNT:
return -0x29;
case ROCKSDB_NAMESPACE::Tickers::LAST_LEVEL_READ_BYTES:
return -0x2A;
case ROCKSDB_NAMESPACE::Tickers::LAST_LEVEL_READ_COUNT:
return -0x2B;
case ROCKSDB_NAMESPACE::Tickers::NON_LAST_LEVEL_READ_BYTES:
return -0x2C;
case ROCKSDB_NAMESPACE::Tickers::NON_LAST_LEVEL_READ_COUNT:
return -0x2D;
case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX:
// 0x5F was the max value in the initial copy of tickers to Java.
// Since these values are exposed directly to Java clients, we keep
@ -5406,6 +5414,14 @@ class TickerTypeJni {
return ROCKSDB_NAMESPACE::Tickers::WARM_FILE_READ_COUNT;
case -0x29:
return ROCKSDB_NAMESPACE::Tickers::COLD_FILE_READ_COUNT;
case -0x2A:
return ROCKSDB_NAMESPACE::Tickers::LAST_LEVEL_READ_BYTES;
case -0x2B:
return ROCKSDB_NAMESPACE::Tickers::LAST_LEVEL_READ_COUNT;
case -0x2C:
return ROCKSDB_NAMESPACE::Tickers::NON_LAST_LEVEL_READ_BYTES;
case -0x2D:
return ROCKSDB_NAMESPACE::Tickers::NON_LAST_LEVEL_READ_COUNT;
case 0x5F:
// 0x5F was the max value in the initial copy of tickers to Java.
// Since these values are exposed directly to Java clients, we keep

@ -796,6 +796,14 @@ public enum TickerType {
WARM_FILE_READ_COUNT((byte) -0x28),
COLD_FILE_READ_COUNT((byte) -0x29),
/**
* (non-)last level read statistics
*/
LAST_LEVEL_READ_BYTES((byte) -0x2A),
LAST_LEVEL_READ_COUNT((byte) -0x2B),
NON_LAST_LEVEL_READ_BYTES((byte) -0x2C),
NON_LAST_LEVEL_READ_COUNT((byte) -0x2D),
TICKER_ENUM_MAX((byte) 0x5F);
private final byte value;

@ -128,7 +128,8 @@ static void SetupDB(benchmark::State& state, Options& options, DB** dpptr,
state.SkipWithError(s.ToString().c_str());
return;
}
std::string db_name = db_path + "/" + test_name + std::to_string(getpid());
std::string db_name =
db_path + kFilePathSeparator + test_name + std::to_string(getpid());
DestroyDB(db_name, options);
s = DB::Open(options, db_name, dpptr);
@ -785,6 +786,7 @@ void GenerateRandomKVs(std::vector<std::string>* keys,
}
}
// TODO: move it to different files, as it's testing an internal API
static void DataBlockSeek(benchmark::State& state) {
Random rnd(301);
Options options = Options();
@ -1287,6 +1289,72 @@ BENCHMARK(PrefixSeek)
->Iterations(kPrefixSeekNum / 8)
->Apply(PrefixSeekArguments);
// TODO: move it to different files, as it's testing an internal API
static void RandomAccessFileReaderRead(benchmark::State& state) {
bool enable_statistics = state.range(0);
constexpr int kFileNum = 10;
auto env = Env::Default();
auto fs = env->GetFileSystem();
std::string db_path;
Status s = env->GetTestDirectory(&db_path);
if (!s.ok()) {
state.SkipWithError(s.ToString().c_str());
return;
}
// Setup multiple `RandomAccessFileReader`s with different parameters to be
// used for test
Random rand(301);
std::string fname_base =
db_path + kFilePathSeparator + "random-access-file-reader-read";
std::vector<std::unique_ptr<RandomAccessFileReader>> readers;
auto statistics_share = CreateDBStatistics();
Statistics* statistics = enable_statistics ? statistics_share.get() : nullptr;
for (int i = 0; i < kFileNum; i++) {
std::string fname = fname_base + ToString(i);
std::string content = rand.RandomString(kDefaultPageSize);
std::unique_ptr<WritableFile> tgt_file;
env->NewWritableFile(fname, &tgt_file, EnvOptions());
tgt_file->Append(content);
tgt_file->Close();
std::unique_ptr<FSRandomAccessFile> f;
fs->NewRandomAccessFile(fname, FileOptions(), &f, nullptr);
int rand_num = rand.Next() % 3;
auto temperature = rand_num == 0 ? Temperature::kUnknown
: rand_num == 1 ? Temperature::kWarm
: Temperature::kCold;
readers.emplace_back(new RandomAccessFileReader(
std::move(f), fname, env->GetSystemClock().get(), nullptr, statistics,
0, nullptr, nullptr, {}, temperature, rand_num == 1));
}
IOOptions io_options;
std::unique_ptr<char[]> scratch(new char[2048]);
Slice result;
uint64_t idx = 0;
for (auto _ : state) {
s = readers[idx++ % kFileNum]->Read(io_options, 0, kDefaultPageSize / 3,
&result, scratch.get(), nullptr,
Env::IO_TOTAL);
if (!s.ok()) {
state.SkipWithError(s.ToString().c_str());
}
}
// clean up
for (int i = 0; i < kFileNum; i++) {
std::string fname = fname_base + ToString(i);
env->DeleteFile(fname); // ignore return, okay to fail cleanup
}
}
BENCHMARK(RandomAccessFileReaderRead)
->Iterations(1000000)
->Arg(0)
->Arg(1)
->ArgName("enable_statistics");
} // namespace ROCKSDB_NAMESPACE
BENCHMARK_MAIN();

@ -222,6 +222,10 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
{HOT_FILE_READ_COUNT, "rocksdb.hot.file.read.count"},
{WARM_FILE_READ_COUNT, "rocksdb.warm.file.read.count"},
{COLD_FILE_READ_COUNT, "rocksdb.cold.file.read.count"},
{LAST_LEVEL_READ_BYTES, "rocksdb.last.level.read.bytes"},
{LAST_LEVEL_READ_COUNT, "rocksdb.last.level.read.count"},
{NON_LAST_LEVEL_READ_BYTES, "rocksdb.non.last.level.read.bytes"},
{NON_LAST_LEVEL_READ_COUNT, "rocksdb.non.last.level.read.count"},
};
const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {

Loading…
Cancel
Save