From b694cd0e0d90a5dea6f78e494c7d54e0f0d65d9a Mon Sep 17 00:00:00 2001 From: Zhichao Cao Date: Tue, 16 Nov 2021 15:15:48 -0800 Subject: [PATCH] Add tiered storage related read bytes stats to Statistic (#9123) Summary: Add the 3 read bytes counter to the Statistic, which will be used by storage tiering and get the information for files with different temperature. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9123 Test Plan: added new testing cases. Reviewed By: siying Differential Revision: D32154745 Pulled By: zhichao-cao fbshipit-source-id: b7905d6dae469a72428742364ec07b634b6f15da --- HISTORY.md | 1 + db/db_test2.cc | 36 +++++++++++++- file/random_access_file_reader.cc | 47 +++++++++++++++++++ include/rocksdb/statistics.h | 8 ++++ java/rocksjni/portal.h | 24 ++++++++++ .../src/main/java/org/rocksdb/TickerType.java | 10 ++++ monitoring/statistics.cc | 6 +++ 7 files changed, 131 insertions(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index 11128c4ea..ae04ad740 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -5,6 +5,7 @@ * Added a new online consistency check for BlobDB which validates that the number/total size of garbage blobs does not exceed the number/total size of all blobs in any given blob file. * Provided support for tracking per-sst user-defined timestamp information in MANIFEST. * Added new option "adaptive_readahead" in ReadOptions. For iterators, RocksDB does auto-readahead on noticing sequential reads and by enabling this option, readahead_size of current file (if reads are sequential) will be carried forward to next file instead of starting from the scratch at each level (except L0 level files). If reads are not sequential it will fall back to 8KB. This option is applicable only for RocksDB internal prefetch buffer and isn't supported with underlying file system prefetching. +* Added the read count and read bytes related stats to Statistics for tiered storage hot, warm, and cold file reads. ### Bug Fixes * Prevent a `CompactRange()` with `CompactRangeOptions::change_level == true` from possibly causing corruption to the LSM state (overlapping files within a level) when run in parallel with another manual compaction. Note that setting `force_consistency_checks == true` (the default) would cause the DB to enter read-only mode in this scenario and return `Status::Corruption`, rather than committing any corruption. diff --git a/db/db_test2.cc b/db/db_test2.cc index 5904865f6..0ce6f1e4d 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -6501,6 +6501,7 @@ TEST_F(DBTest2, BottommostTemperature) { Options options = CurrentOptions(); options.bottommost_temperature = Temperature::kWarm; options.level0_file_num_compaction_trigger = 2; + options.statistics = CreateDBStatistics(); Reopen(options); auto size = GetSstSizeHelper(Temperature::kUnknown); @@ -6532,6 +6533,9 @@ TEST_F(DBTest2, BottommostTemperature) { ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0); ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_read_count, 0); ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0); + ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_BYTES), 0); + ASSERT_GT(options.statistics->getTickerCount(WARM_FILE_READ_BYTES), 0); + ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_BYTES), 0); ASSERT_EQ("bar", Get("foo")); @@ -6541,6 +6545,12 @@ TEST_F(DBTest2, BottommostTemperature) { ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_bytes_read, 0); ASSERT_GT(iostats->file_io_stats_by_temperature.warm_file_bytes_read, 0); ASSERT_EQ(iostats->file_io_stats_by_temperature.cold_file_bytes_read, 0); + ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_BYTES), 0); + ASSERT_GT(options.statistics->getTickerCount(WARM_FILE_READ_BYTES), 0); + ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_BYTES), 0); + ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_COUNT), 0); + ASSERT_GT(options.statistics->getTickerCount(WARM_FILE_READ_COUNT), 0); + ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_COUNT), 0); // non-bottommost file still has unknown temperature ASSERT_OK(Put("foo", "bar")); @@ -6553,6 +6563,12 @@ TEST_F(DBTest2, BottommostTemperature) { ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_bytes_read, 0); ASSERT_GT(iostats->file_io_stats_by_temperature.warm_file_bytes_read, 0); ASSERT_EQ(iostats->file_io_stats_by_temperature.cold_file_bytes_read, 0); + ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_BYTES), 0); + ASSERT_GT(options.statistics->getTickerCount(WARM_FILE_READ_BYTES), 0); + ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_BYTES), 0); + ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_COUNT), 0); + ASSERT_GT(options.statistics->getTickerCount(WARM_FILE_READ_COUNT), 0); + ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_COUNT), 0); db_->GetColumnFamilyMetaData(&metadata); ASSERT_EQ(2, metadata.file_count); @@ -6593,7 +6609,7 @@ TEST_F(DBTest2, BottommostTemperatureUniversal) { options.compaction_style = kCompactionStyleUniversal; options.level0_file_num_compaction_trigger = kTriggerNum; options.num_levels = kNumLevels; - + options.statistics = CreateDBStatistics(); DestroyAndReopen(options); auto size = GetSstSizeHelper(Temperature::kUnknown); @@ -6624,6 +6640,12 @@ TEST_F(DBTest2, BottommostTemperatureUniversal) { ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0); ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_read_count, 0); ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0); + ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_BYTES), 0); + ASSERT_EQ(options.statistics->getTickerCount(WARM_FILE_READ_BYTES), 0); + ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_BYTES), 0); + ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_COUNT), 0); + ASSERT_EQ(options.statistics->getTickerCount(WARM_FILE_READ_COUNT), 0); + ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_COUNT), 0); ASSERT_EQ("bar", Get("foo")); ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0); @@ -6632,6 +6654,12 @@ TEST_F(DBTest2, BottommostTemperatureUniversal) { ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_bytes_read, 0); ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_bytes_read, 0); ASSERT_EQ(iostats->file_io_stats_by_temperature.cold_file_bytes_read, 0); + ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_BYTES), 0); + ASSERT_EQ(options.statistics->getTickerCount(WARM_FILE_READ_BYTES), 0); + ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_BYTES), 0); + ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_COUNT), 0); + ASSERT_EQ(options.statistics->getTickerCount(WARM_FILE_READ_COUNT), 0); + ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_COUNT), 0); ASSERT_OK(Put("foo", "bar")); ASSERT_OK(Put("bar", "bar")); @@ -6667,6 +6695,12 @@ TEST_F(DBTest2, BottommostTemperatureUniversal) { ASSERT_EQ(size, 0); size = GetSstSizeHelper(Temperature::kWarm); ASSERT_GT(size, 0); + ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_BYTES), 0); + ASSERT_GT(options.statistics->getTickerCount(WARM_FILE_READ_BYTES), 0); + ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_BYTES), 0); + ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_COUNT), 0); + ASSERT_GT(options.statistics->getTickerCount(WARM_FILE_READ_COUNT), 0); + ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_COUNT), 0); // non-bottommost file still has unknown temperature ASSERT_OK(Put("foo", "bar")); diff --git a/file/random_access_file_reader.cc b/file/random_access_file_reader.cc index 4ffcce25c..4af420c62 100644 --- a/file/random_access_file_reader.cc +++ b/file/random_access_file_reader.cc @@ -62,6 +62,48 @@ inline void IOStatsAddCountByTemperature(Temperature file_temperature, } } +inline void StatisticAddBytesByTemperature(Statistics* stats, + Temperature file_temperature, + size_t value) { + if (stats == nullptr || file_temperature == Temperature::kUnknown) { + return; + } + switch (file_temperature) { + case Temperature::kHot: + RecordTick(stats, HOT_FILE_READ_BYTES, value); + break; + case Temperature::kWarm: + RecordTick(stats, WARM_FILE_READ_BYTES, value); + break; + case Temperature::kCold: + RecordTick(stats, COLD_FILE_READ_BYTES, value); + break; + default: + break; + } +} + +inline void StatisticAddCountByTemperature(Statistics* stats, + Temperature file_temperature, + size_t value) { + if (stats == nullptr || file_temperature == Temperature::kUnknown) { + return; + } + switch (file_temperature) { + case Temperature::kHot: + RecordTick(stats, HOT_FILE_READ_COUNT, value); + break; + case Temperature::kWarm: + RecordTick(stats, WARM_FILE_READ_COUNT, value); + break; + case Temperature::kCold: + RecordTick(stats, COLD_FILE_READ_COUNT, value); + break; + default: + break; + } +} + IOStatus RandomAccessFileReader::Create( const std::shared_ptr& fs, const std::string& fname, const FileOptions& file_opts, @@ -224,6 +266,8 @@ IOStatus RandomAccessFileReader::Read(const IOOptions& opts, uint64_t offset, IOSTATS_ADD(bytes_read, result->size()); IOStatsAddBytesByTemperature(file_temperature_, result->size()); IOStatsAddCountByTemperature(file_temperature_, 1); + StatisticAddBytesByTemperature(stats_, file_temperature_, result->size()); + StatisticAddCountByTemperature(stats_, file_temperature_, 1); SetPerfLevel(prev_perf_level); } if (stats_ != nullptr && file_read_hist_ != nullptr) { @@ -392,6 +436,9 @@ IOStatus RandomAccessFileReader::MultiRead(const IOOptions& opts, IOStatsAddBytesByTemperature(file_temperature_, read_reqs[i].result.size()); IOStatsAddCountByTemperature(file_temperature_, 1); + StatisticAddBytesByTemperature(stats_, file_temperature_, + read_reqs[i].result.size()); + StatisticAddCountByTemperature(stats_, file_temperature_, 1); } SetPerfLevel(prev_perf_level); } diff --git a/include/rocksdb/statistics.h b/include/rocksdb/statistics.h index 795f23fe4..6525493f4 100644 --- a/include/rocksdb/statistics.h +++ b/include/rocksdb/statistics.h @@ -417,6 +417,14 @@ enum Tickers : uint32_t { REMOTE_COMPACT_READ_BYTES, REMOTE_COMPACT_WRITE_BYTES, + // Tiered storage related statistics + HOT_FILE_READ_BYTES, + WARM_FILE_READ_BYTES, + COLD_FILE_READ_BYTES, + HOT_FILE_READ_COUNT, + WARM_FILE_READ_COUNT, + COLD_FILE_READ_COUNT, + TICKER_ENUM_MAX }; diff --git a/java/rocksjni/portal.h b/java/rocksjni/portal.h index 261769764..512701ae0 100644 --- a/java/rocksjni/portal.h +++ b/java/rocksjni/portal.h @@ -5012,6 +5012,18 @@ class TickerTypeJni { return -0x22; case ROCKSDB_NAMESPACE::Tickers::REMOTE_COMPACT_WRITE_BYTES: return -0x23; + case ROCKSDB_NAMESPACE::Tickers::HOT_FILE_READ_BYTES: + return -0x24; + case ROCKSDB_NAMESPACE::Tickers::WARM_FILE_READ_BYTES: + return -0x25; + case ROCKSDB_NAMESPACE::Tickers::COLD_FILE_READ_BYTES: + return -0x26; + case ROCKSDB_NAMESPACE::Tickers::HOT_FILE_READ_COUNT: + return -0x27; + case ROCKSDB_NAMESPACE::Tickers::WARM_FILE_READ_COUNT: + return -0x28; + case ROCKSDB_NAMESPACE::Tickers::COLD_FILE_READ_COUNT: + return -0x29; case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX: // 0x5F was the max value in the initial copy of tickers to Java. // Since these values are exposed directly to Java clients, we keep @@ -5361,6 +5373,18 @@ class TickerTypeJni { return ROCKSDB_NAMESPACE::Tickers::REMOTE_COMPACT_READ_BYTES; case -0x23: return ROCKSDB_NAMESPACE::Tickers::REMOTE_COMPACT_WRITE_BYTES; + case -0x24: + return ROCKSDB_NAMESPACE::Tickers::HOT_FILE_READ_BYTES; + case -0x25: + return ROCKSDB_NAMESPACE::Tickers::WARM_FILE_READ_BYTES; + case -0x26: + return ROCKSDB_NAMESPACE::Tickers::COLD_FILE_READ_BYTES; + case -0x27: + return ROCKSDB_NAMESPACE::Tickers::HOT_FILE_READ_COUNT; + case -0x28: + return ROCKSDB_NAMESPACE::Tickers::WARM_FILE_READ_COUNT; + case -0x29: + return ROCKSDB_NAMESPACE::Tickers::COLD_FILE_READ_COUNT; case 0x5F: // 0x5F was the max value in the initial copy of tickers to Java. // Since these values are exposed directly to Java clients, we keep diff --git a/java/src/main/java/org/rocksdb/TickerType.java b/java/src/main/java/org/rocksdb/TickerType.java index 9fa1942bf..237b4cb88 100644 --- a/java/src/main/java/org/rocksdb/TickerType.java +++ b/java/src/main/java/org/rocksdb/TickerType.java @@ -786,6 +786,16 @@ public enum TickerType { REMOTE_COMPACT_READ_BYTES((byte) -0x22), REMOTE_COMPACT_WRITE_BYTES((byte) -0x23), + /** + * Tiered storage related statistics + */ + HOT_FILE_READ_BYTES((byte) -0x24), + WARM_FILE_READ_BYTES((byte) -0x25), + COLD_FILE_READ_BYTES((byte) -0x26), + HOT_FILE_READ_COUNT((byte) -0x27), + WARM_FILE_READ_COUNT((byte) -0x28), + COLD_FILE_READ_COUNT((byte) -0x29), + TICKER_ENUM_MAX((byte) 0x5F); private final byte value; diff --git a/monitoring/statistics.cc b/monitoring/statistics.cc index 3bb4b5964..eb5941ba5 100644 --- a/monitoring/statistics.cc +++ b/monitoring/statistics.cc @@ -216,6 +216,12 @@ const std::vector> TickersNameMap = { {BACKUP_WRITE_BYTES, "rocksdb.backup.write.bytes"}, {REMOTE_COMPACT_READ_BYTES, "rocksdb.remote.compact.read.bytes"}, {REMOTE_COMPACT_WRITE_BYTES, "rocksdb.remote.compact.write.bytes"}, + {HOT_FILE_READ_BYTES, "rocksdb.hot.file.read.bytes"}, + {WARM_FILE_READ_BYTES, "rocksdb.warm.file.read.bytes"}, + {COLD_FILE_READ_BYTES, "rocksdb.cold.file.read.bytes"}, + {HOT_FILE_READ_COUNT, "rocksdb.hot.file.read.count"}, + {WARM_FILE_READ_COUNT, "rocksdb.warm.file.read.count"}, + {COLD_FILE_READ_COUNT, "rocksdb.cold.file.read.count"}, }; const std::vector> HistogramsNameMap = {