Add tiered storage related read bytes stats to Statistic (#9123)

Summary:
Add the 3 read bytes counter to the Statistic, which will be used by storage tiering and get the information for files with different temperature.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9123

Test Plan: added new testing cases.

Reviewed By: siying

Differential Revision: D32154745

Pulled By: zhichao-cao

fbshipit-source-id: b7905d6dae469a72428742364ec07b634b6f15da
main
Zhichao Cao 3 years ago committed by Facebook GitHub Bot
parent 1178d348aa
commit b694cd0e0d
  1. 1
      HISTORY.md
  2. 36
      db/db_test2.cc
  3. 47
      file/random_access_file_reader.cc
  4. 8
      include/rocksdb/statistics.h
  5. 24
      java/rocksjni/portal.h
  6. 10
      java/src/main/java/org/rocksdb/TickerType.java
  7. 6
      monitoring/statistics.cc

@ -5,6 +5,7 @@
* Added a new online consistency check for BlobDB which validates that the number/total size of garbage blobs does not exceed the number/total size of all blobs in any given blob file. * Added a new online consistency check for BlobDB which validates that the number/total size of garbage blobs does not exceed the number/total size of all blobs in any given blob file.
* Provided support for tracking per-sst user-defined timestamp information in MANIFEST. * Provided support for tracking per-sst user-defined timestamp information in MANIFEST.
* Added new option "adaptive_readahead" in ReadOptions. For iterators, RocksDB does auto-readahead on noticing sequential reads and by enabling this option, readahead_size of current file (if reads are sequential) will be carried forward to next file instead of starting from the scratch at each level (except L0 level files). If reads are not sequential it will fall back to 8KB. This option is applicable only for RocksDB internal prefetch buffer and isn't supported with underlying file system prefetching. * Added new option "adaptive_readahead" in ReadOptions. For iterators, RocksDB does auto-readahead on noticing sequential reads and by enabling this option, readahead_size of current file (if reads are sequential) will be carried forward to next file instead of starting from the scratch at each level (except L0 level files). If reads are not sequential it will fall back to 8KB. This option is applicable only for RocksDB internal prefetch buffer and isn't supported with underlying file system prefetching.
* Added the read count and read bytes related stats to Statistics for tiered storage hot, warm, and cold file reads.
### Bug Fixes ### Bug Fixes
* Prevent a `CompactRange()` with `CompactRangeOptions::change_level == true` from possibly causing corruption to the LSM state (overlapping files within a level) when run in parallel with another manual compaction. Note that setting `force_consistency_checks == true` (the default) would cause the DB to enter read-only mode in this scenario and return `Status::Corruption`, rather than committing any corruption. * Prevent a `CompactRange()` with `CompactRangeOptions::change_level == true` from possibly causing corruption to the LSM state (overlapping files within a level) when run in parallel with another manual compaction. Note that setting `force_consistency_checks == true` (the default) would cause the DB to enter read-only mode in this scenario and return `Status::Corruption`, rather than committing any corruption.

@ -6501,6 +6501,7 @@ TEST_F(DBTest2, BottommostTemperature) {
Options options = CurrentOptions(); Options options = CurrentOptions();
options.bottommost_temperature = Temperature::kWarm; options.bottommost_temperature = Temperature::kWarm;
options.level0_file_num_compaction_trigger = 2; options.level0_file_num_compaction_trigger = 2;
options.statistics = CreateDBStatistics();
Reopen(options); Reopen(options);
auto size = GetSstSizeHelper(Temperature::kUnknown); auto size = GetSstSizeHelper(Temperature::kUnknown);
@ -6532,6 +6533,9 @@ TEST_F(DBTest2, BottommostTemperature) {
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0); ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_read_count, 0); ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_read_count, 0);
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0); ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_BYTES), 0);
ASSERT_GT(options.statistics->getTickerCount(WARM_FILE_READ_BYTES), 0);
ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_BYTES), 0);
ASSERT_EQ("bar", Get("foo")); ASSERT_EQ("bar", Get("foo"));
@ -6541,6 +6545,12 @@ TEST_F(DBTest2, BottommostTemperature) {
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_bytes_read, 0); ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_bytes_read, 0);
ASSERT_GT(iostats->file_io_stats_by_temperature.warm_file_bytes_read, 0); ASSERT_GT(iostats->file_io_stats_by_temperature.warm_file_bytes_read, 0);
ASSERT_EQ(iostats->file_io_stats_by_temperature.cold_file_bytes_read, 0); ASSERT_EQ(iostats->file_io_stats_by_temperature.cold_file_bytes_read, 0);
ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_BYTES), 0);
ASSERT_GT(options.statistics->getTickerCount(WARM_FILE_READ_BYTES), 0);
ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_BYTES), 0);
ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_COUNT), 0);
ASSERT_GT(options.statistics->getTickerCount(WARM_FILE_READ_COUNT), 0);
ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_COUNT), 0);
// non-bottommost file still has unknown temperature // non-bottommost file still has unknown temperature
ASSERT_OK(Put("foo", "bar")); ASSERT_OK(Put("foo", "bar"));
@ -6553,6 +6563,12 @@ TEST_F(DBTest2, BottommostTemperature) {
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_bytes_read, 0); ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_bytes_read, 0);
ASSERT_GT(iostats->file_io_stats_by_temperature.warm_file_bytes_read, 0); ASSERT_GT(iostats->file_io_stats_by_temperature.warm_file_bytes_read, 0);
ASSERT_EQ(iostats->file_io_stats_by_temperature.cold_file_bytes_read, 0); ASSERT_EQ(iostats->file_io_stats_by_temperature.cold_file_bytes_read, 0);
ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_BYTES), 0);
ASSERT_GT(options.statistics->getTickerCount(WARM_FILE_READ_BYTES), 0);
ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_BYTES), 0);
ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_COUNT), 0);
ASSERT_GT(options.statistics->getTickerCount(WARM_FILE_READ_COUNT), 0);
ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_COUNT), 0);
db_->GetColumnFamilyMetaData(&metadata); db_->GetColumnFamilyMetaData(&metadata);
ASSERT_EQ(2, metadata.file_count); ASSERT_EQ(2, metadata.file_count);
@ -6593,7 +6609,7 @@ TEST_F(DBTest2, BottommostTemperatureUniversal) {
options.compaction_style = kCompactionStyleUniversal; options.compaction_style = kCompactionStyleUniversal;
options.level0_file_num_compaction_trigger = kTriggerNum; options.level0_file_num_compaction_trigger = kTriggerNum;
options.num_levels = kNumLevels; options.num_levels = kNumLevels;
options.statistics = CreateDBStatistics();
DestroyAndReopen(options); DestroyAndReopen(options);
auto size = GetSstSizeHelper(Temperature::kUnknown); auto size = GetSstSizeHelper(Temperature::kUnknown);
@ -6624,6 +6640,12 @@ TEST_F(DBTest2, BottommostTemperatureUniversal) {
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0); ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_read_count, 0); ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_read_count, 0);
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0); ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_BYTES), 0);
ASSERT_EQ(options.statistics->getTickerCount(WARM_FILE_READ_BYTES), 0);
ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_BYTES), 0);
ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_COUNT), 0);
ASSERT_EQ(options.statistics->getTickerCount(WARM_FILE_READ_COUNT), 0);
ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_COUNT), 0);
ASSERT_EQ("bar", Get("foo")); ASSERT_EQ("bar", Get("foo"));
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0); ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
@ -6632,6 +6654,12 @@ TEST_F(DBTest2, BottommostTemperatureUniversal) {
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_bytes_read, 0); ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_bytes_read, 0);
ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_bytes_read, 0); ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_bytes_read, 0);
ASSERT_EQ(iostats->file_io_stats_by_temperature.cold_file_bytes_read, 0); ASSERT_EQ(iostats->file_io_stats_by_temperature.cold_file_bytes_read, 0);
ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_BYTES), 0);
ASSERT_EQ(options.statistics->getTickerCount(WARM_FILE_READ_BYTES), 0);
ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_BYTES), 0);
ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_COUNT), 0);
ASSERT_EQ(options.statistics->getTickerCount(WARM_FILE_READ_COUNT), 0);
ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_COUNT), 0);
ASSERT_OK(Put("foo", "bar")); ASSERT_OK(Put("foo", "bar"));
ASSERT_OK(Put("bar", "bar")); ASSERT_OK(Put("bar", "bar"));
@ -6667,6 +6695,12 @@ TEST_F(DBTest2, BottommostTemperatureUniversal) {
ASSERT_EQ(size, 0); ASSERT_EQ(size, 0);
size = GetSstSizeHelper(Temperature::kWarm); size = GetSstSizeHelper(Temperature::kWarm);
ASSERT_GT(size, 0); ASSERT_GT(size, 0);
ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_BYTES), 0);
ASSERT_GT(options.statistics->getTickerCount(WARM_FILE_READ_BYTES), 0);
ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_BYTES), 0);
ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_COUNT), 0);
ASSERT_GT(options.statistics->getTickerCount(WARM_FILE_READ_COUNT), 0);
ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_COUNT), 0);
// non-bottommost file still has unknown temperature // non-bottommost file still has unknown temperature
ASSERT_OK(Put("foo", "bar")); ASSERT_OK(Put("foo", "bar"));

@ -62,6 +62,48 @@ inline void IOStatsAddCountByTemperature(Temperature file_temperature,
} }
} }
inline void StatisticAddBytesByTemperature(Statistics* stats,
Temperature file_temperature,
size_t value) {
if (stats == nullptr || file_temperature == Temperature::kUnknown) {
return;
}
switch (file_temperature) {
case Temperature::kHot:
RecordTick(stats, HOT_FILE_READ_BYTES, value);
break;
case Temperature::kWarm:
RecordTick(stats, WARM_FILE_READ_BYTES, value);
break;
case Temperature::kCold:
RecordTick(stats, COLD_FILE_READ_BYTES, value);
break;
default:
break;
}
}
inline void StatisticAddCountByTemperature(Statistics* stats,
Temperature file_temperature,
size_t value) {
if (stats == nullptr || file_temperature == Temperature::kUnknown) {
return;
}
switch (file_temperature) {
case Temperature::kHot:
RecordTick(stats, HOT_FILE_READ_COUNT, value);
break;
case Temperature::kWarm:
RecordTick(stats, WARM_FILE_READ_COUNT, value);
break;
case Temperature::kCold:
RecordTick(stats, COLD_FILE_READ_COUNT, value);
break;
default:
break;
}
}
IOStatus RandomAccessFileReader::Create( IOStatus RandomAccessFileReader::Create(
const std::shared_ptr<FileSystem>& fs, const std::string& fname, const std::shared_ptr<FileSystem>& fs, const std::string& fname,
const FileOptions& file_opts, const FileOptions& file_opts,
@ -224,6 +266,8 @@ IOStatus RandomAccessFileReader::Read(const IOOptions& opts, uint64_t offset,
IOSTATS_ADD(bytes_read, result->size()); IOSTATS_ADD(bytes_read, result->size());
IOStatsAddBytesByTemperature(file_temperature_, result->size()); IOStatsAddBytesByTemperature(file_temperature_, result->size());
IOStatsAddCountByTemperature(file_temperature_, 1); IOStatsAddCountByTemperature(file_temperature_, 1);
StatisticAddBytesByTemperature(stats_, file_temperature_, result->size());
StatisticAddCountByTemperature(stats_, file_temperature_, 1);
SetPerfLevel(prev_perf_level); SetPerfLevel(prev_perf_level);
} }
if (stats_ != nullptr && file_read_hist_ != nullptr) { if (stats_ != nullptr && file_read_hist_ != nullptr) {
@ -392,6 +436,9 @@ IOStatus RandomAccessFileReader::MultiRead(const IOOptions& opts,
IOStatsAddBytesByTemperature(file_temperature_, IOStatsAddBytesByTemperature(file_temperature_,
read_reqs[i].result.size()); read_reqs[i].result.size());
IOStatsAddCountByTemperature(file_temperature_, 1); IOStatsAddCountByTemperature(file_temperature_, 1);
StatisticAddBytesByTemperature(stats_, file_temperature_,
read_reqs[i].result.size());
StatisticAddCountByTemperature(stats_, file_temperature_, 1);
} }
SetPerfLevel(prev_perf_level); SetPerfLevel(prev_perf_level);
} }

@ -417,6 +417,14 @@ enum Tickers : uint32_t {
REMOTE_COMPACT_READ_BYTES, REMOTE_COMPACT_READ_BYTES,
REMOTE_COMPACT_WRITE_BYTES, REMOTE_COMPACT_WRITE_BYTES,
// Tiered storage related statistics
HOT_FILE_READ_BYTES,
WARM_FILE_READ_BYTES,
COLD_FILE_READ_BYTES,
HOT_FILE_READ_COUNT,
WARM_FILE_READ_COUNT,
COLD_FILE_READ_COUNT,
TICKER_ENUM_MAX TICKER_ENUM_MAX
}; };

@ -5012,6 +5012,18 @@ class TickerTypeJni {
return -0x22; return -0x22;
case ROCKSDB_NAMESPACE::Tickers::REMOTE_COMPACT_WRITE_BYTES: case ROCKSDB_NAMESPACE::Tickers::REMOTE_COMPACT_WRITE_BYTES:
return -0x23; return -0x23;
case ROCKSDB_NAMESPACE::Tickers::HOT_FILE_READ_BYTES:
return -0x24;
case ROCKSDB_NAMESPACE::Tickers::WARM_FILE_READ_BYTES:
return -0x25;
case ROCKSDB_NAMESPACE::Tickers::COLD_FILE_READ_BYTES:
return -0x26;
case ROCKSDB_NAMESPACE::Tickers::HOT_FILE_READ_COUNT:
return -0x27;
case ROCKSDB_NAMESPACE::Tickers::WARM_FILE_READ_COUNT:
return -0x28;
case ROCKSDB_NAMESPACE::Tickers::COLD_FILE_READ_COUNT:
return -0x29;
case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX: case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX:
// 0x5F was the max value in the initial copy of tickers to Java. // 0x5F was the max value in the initial copy of tickers to Java.
// Since these values are exposed directly to Java clients, we keep // Since these values are exposed directly to Java clients, we keep
@ -5361,6 +5373,18 @@ class TickerTypeJni {
return ROCKSDB_NAMESPACE::Tickers::REMOTE_COMPACT_READ_BYTES; return ROCKSDB_NAMESPACE::Tickers::REMOTE_COMPACT_READ_BYTES;
case -0x23: case -0x23:
return ROCKSDB_NAMESPACE::Tickers::REMOTE_COMPACT_WRITE_BYTES; return ROCKSDB_NAMESPACE::Tickers::REMOTE_COMPACT_WRITE_BYTES;
case -0x24:
return ROCKSDB_NAMESPACE::Tickers::HOT_FILE_READ_BYTES;
case -0x25:
return ROCKSDB_NAMESPACE::Tickers::WARM_FILE_READ_BYTES;
case -0x26:
return ROCKSDB_NAMESPACE::Tickers::COLD_FILE_READ_BYTES;
case -0x27:
return ROCKSDB_NAMESPACE::Tickers::HOT_FILE_READ_COUNT;
case -0x28:
return ROCKSDB_NAMESPACE::Tickers::WARM_FILE_READ_COUNT;
case -0x29:
return ROCKSDB_NAMESPACE::Tickers::COLD_FILE_READ_COUNT;
case 0x5F: case 0x5F:
// 0x5F was the max value in the initial copy of tickers to Java. // 0x5F was the max value in the initial copy of tickers to Java.
// Since these values are exposed directly to Java clients, we keep // Since these values are exposed directly to Java clients, we keep

@ -786,6 +786,16 @@ public enum TickerType {
REMOTE_COMPACT_READ_BYTES((byte) -0x22), REMOTE_COMPACT_READ_BYTES((byte) -0x22),
REMOTE_COMPACT_WRITE_BYTES((byte) -0x23), REMOTE_COMPACT_WRITE_BYTES((byte) -0x23),
/**
* Tiered storage related statistics
*/
HOT_FILE_READ_BYTES((byte) -0x24),
WARM_FILE_READ_BYTES((byte) -0x25),
COLD_FILE_READ_BYTES((byte) -0x26),
HOT_FILE_READ_COUNT((byte) -0x27),
WARM_FILE_READ_COUNT((byte) -0x28),
COLD_FILE_READ_COUNT((byte) -0x29),
TICKER_ENUM_MAX((byte) 0x5F); TICKER_ENUM_MAX((byte) 0x5F);
private final byte value; private final byte value;

@ -216,6 +216,12 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
{BACKUP_WRITE_BYTES, "rocksdb.backup.write.bytes"}, {BACKUP_WRITE_BYTES, "rocksdb.backup.write.bytes"},
{REMOTE_COMPACT_READ_BYTES, "rocksdb.remote.compact.read.bytes"}, {REMOTE_COMPACT_READ_BYTES, "rocksdb.remote.compact.read.bytes"},
{REMOTE_COMPACT_WRITE_BYTES, "rocksdb.remote.compact.write.bytes"}, {REMOTE_COMPACT_WRITE_BYTES, "rocksdb.remote.compact.write.bytes"},
{HOT_FILE_READ_BYTES, "rocksdb.hot.file.read.bytes"},
{WARM_FILE_READ_BYTES, "rocksdb.warm.file.read.bytes"},
{COLD_FILE_READ_BYTES, "rocksdb.cold.file.read.bytes"},
{HOT_FILE_READ_COUNT, "rocksdb.hot.file.read.count"},
{WARM_FILE_READ_COUNT, "rocksdb.warm.file.read.count"},
{COLD_FILE_READ_COUNT, "rocksdb.cold.file.read.count"},
}; };
const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = { const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {

Loading…
Cancel
Save