Add file temperature related counter and bytes stats to and io_stats (#8710)

Summary:
For tiered storage project, we need to know the block read count and read bytes of files with different temperature. Add FileIOByTemperature to IOStatsContext and collect the bytes read and read count from different temperature files through the RandomAccessFileReader.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8710

Test Plan: make check, add the testing cases

Reviewed By: siying

Differential Revision: D30582400

Pulled By: zhichao-cao

fbshipit-source-id: d83173de594374fc8404af5ce93a6a9be72c7141
main
Zhichao Cao 3 years ago committed by Facebook GitHub Bot
parent 699f45049d
commit b632ed0c67
  1. 37
      db/db_test2.cc
  2. 37
      db/table_cache.cc
  3. 6
      db/table_cache.h
  4. 3
      db/version_builder.cc
  5. 45
      file/random_access_file_reader.cc
  6. 7
      file/random_access_file_reader.h
  7. 28
      include/rocksdb/iostats_context.h
  8. 8
      monitoring/iostats_context.cc
  9. 27
      monitoring/perf_context_imp.h

@ -17,6 +17,7 @@
#include "options/options_helper.h"
#include "port/port.h"
#include "port/stack_trace.h"
#include "rocksdb/iostats_context.h"
#include "rocksdb/persistent_cache.h"
#include "rocksdb/trace_record.h"
#include "rocksdb/trace_record_result.h"
@ -6522,6 +6523,9 @@ TEST_F(DBTest2, BottommostTemperature) {
ASSERT_OK(Flush());
ASSERT_OK(dbfull()->TEST_WaitForCompact());
get_iostats_context()->Reset();
IOStatsContext* iostats = get_iostats_context();
ColumnFamilyMetaData metadata;
db_->GetColumnFamilyMetaData(&metadata);
ASSERT_EQ(1, metadata.file_count);
@ -6530,11 +6534,31 @@ TEST_F(DBTest2, BottommostTemperature) {
ASSERT_EQ(size, 0);
size = GetSstSizeHelper(Temperature::kWarm);
ASSERT_GT(size, 0);
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_read_count, 0);
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
ASSERT_EQ("bar", Get("foo"));
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_read_count, 1);
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_bytes_read, 0);
ASSERT_GT(iostats->file_io_stats_by_temperature.warm_file_bytes_read, 0);
ASSERT_EQ(iostats->file_io_stats_by_temperature.cold_file_bytes_read, 0);
// non-bottommost file still has unknown temperature
ASSERT_OK(Put("foo", "bar"));
ASSERT_OK(Put("bar", "bar"));
ASSERT_OK(Flush());
ASSERT_EQ("bar", Get("bar"));
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_read_count, 1);
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_bytes_read, 0);
ASSERT_GT(iostats->file_io_stats_by_temperature.warm_file_bytes_read, 0);
ASSERT_EQ(iostats->file_io_stats_by_temperature.cold_file_bytes_read, 0);
db_->GetColumnFamilyMetaData(&metadata);
ASSERT_EQ(2, metadata.file_count);
ASSERT_EQ(Temperature::kUnknown, metadata.levels[0].files[0].temperature);
@ -6583,6 +6607,8 @@ TEST_F(DBTest2, BottommostTemperatureUniversal) {
ASSERT_EQ(size, 0);
size = GetSstSizeHelper(Temperature::kHot);
ASSERT_EQ(size, 0);
get_iostats_context()->Reset();
IOStatsContext* iostats = get_iostats_context();
for (int i = 0; i < kTriggerNum; i++) {
ASSERT_OK(Put("foo", "bar"));
@ -6600,6 +6626,17 @@ TEST_F(DBTest2, BottommostTemperatureUniversal) {
ASSERT_GT(size, 0);
size = GetSstSizeHelper(Temperature::kWarm);
ASSERT_EQ(size, 0);
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_read_count, 0);
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
ASSERT_EQ("bar", Get("foo"));
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_read_count, 0);
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_bytes_read, 0);
ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_bytes_read, 0);
ASSERT_EQ(iostats->file_io_stats_by_temperature.cold_file_bytes_read, 0);
ASSERT_OK(Put("foo", "bar"));
ASSERT_OK(Put("bar", "bar"));

@ -103,7 +103,7 @@ Status TableCache::GetTableReader(
std::unique_ptr<TableReader>* table_reader,
const SliceTransform* prefix_extractor, bool skip_filters, int level,
bool prefetch_index_and_filter_in_cache,
size_t max_file_size_for_l0_meta_pin) {
size_t max_file_size_for_l0_meta_pin, Temperature file_temperature) {
std::string fname =
TableFileName(ioptions_.cf_paths, fd.GetNumber(), fd.GetPathId());
std::unique_ptr<FSRandomAccessFile> file;
@ -132,7 +132,8 @@ Status TableCache::GetTableReader(
new RandomAccessFileReader(
std::move(file), fname, ioptions_.clock, io_tracer_,
record_read_stats ? ioptions_.stats : nullptr, SST_READ_MICROS,
file_read_hist, ioptions_.rate_limiter.get(), ioptions_.listeners));
file_read_hist, ioptions_.rate_limiter.get(), ioptions_.listeners,
file_temperature));
s = ioptions_.table_factory->NewTableReader(
ro,
TableReaderOptions(
@ -154,15 +155,13 @@ void TableCache::EraseHandle(const FileDescriptor& fd, Cache::Handle* handle) {
cache_->Erase(key);
}
Status TableCache::FindTable(const ReadOptions& ro,
const FileOptions& file_options,
const InternalKeyComparator& internal_comparator,
const FileDescriptor& fd, Cache::Handle** handle,
const SliceTransform* prefix_extractor,
const bool no_io, bool record_read_stats,
HistogramImpl* file_read_hist, bool skip_filters,
int level, bool prefetch_index_and_filter_in_cache,
size_t max_file_size_for_l0_meta_pin) {
Status TableCache::FindTable(
const ReadOptions& ro, const FileOptions& file_options,
const InternalKeyComparator& internal_comparator, const FileDescriptor& fd,
Cache::Handle** handle, const SliceTransform* prefix_extractor,
const bool no_io, bool record_read_stats, HistogramImpl* file_read_hist,
bool skip_filters, int level, bool prefetch_index_and_filter_in_cache,
size_t max_file_size_for_l0_meta_pin, Temperature file_temperature) {
PERF_TIMER_GUARD_WITH_CLOCK(find_table_nanos, ioptions_.clock);
uint64_t number = fd.GetNumber();
Slice key = GetSliceForFileNumber(&number);
@ -186,7 +185,7 @@ Status TableCache::FindTable(const ReadOptions& ro,
ro, file_options, internal_comparator, fd, false /* sequential mode */,
record_read_stats, file_read_hist, &table_reader, prefix_extractor,
skip_filters, level, prefetch_index_and_filter_in_cache,
max_file_size_for_l0_meta_pin);
max_file_size_for_l0_meta_pin, file_temperature);
if (!s.ok()) {
assert(table_reader == nullptr);
RecordTick(ioptions_.stats, NO_FILE_ERRORS);
@ -231,7 +230,7 @@ InternalIterator* TableCache::NewIterator(
options.read_tier == kBlockCacheTier /* no_io */,
!for_compaction /* record_read_stats */, file_read_hist, skip_filters,
level, true /* prefetch_index_and_filter_in_cache */,
max_file_size_for_l0_meta_pin);
max_file_size_for_l0_meta_pin, file_meta.temperature);
if (s.ok()) {
table_reader = GetTableReaderFromHandle(handle);
}
@ -431,7 +430,7 @@ Status TableCache::Get(const ReadOptions& options,
options.read_tier == kBlockCacheTier /* no_io */,
true /* record_read_stats */, file_read_hist, skip_filters,
level, true /* prefetch_index_and_filter_in_cache */,
max_file_size_for_l0_meta_pin);
max_file_size_for_l0_meta_pin, file_meta.temperature);
if (s.ok()) {
t = GetTableReaderFromHandle(handle);
}
@ -531,10 +530,12 @@ Status TableCache::MultiGet(const ReadOptions& options,
// found in the row cache and thus the range may now be empty
if (s.ok() && !table_range.empty()) {
if (t == nullptr) {
s = FindTable(
options, file_options_, internal_comparator, fd, &handle,
prefix_extractor, options.read_tier == kBlockCacheTier /* no_io */,
true /* record_read_stats */, file_read_hist, skip_filters, level);
s = FindTable(options, file_options_, internal_comparator, fd, &handle,
prefix_extractor,
options.read_tier == kBlockCacheTier /* no_io */,
true /* record_read_stats */, file_read_hist, skip_filters,
level, true /* prefetch_index_and_filter_in_cache */,
0 /*max_file_size_for_l0_meta_pin*/, file_meta.temperature);
TEST_SYNC_POINT_CALLBACK("TableCache::MultiGet:FindTable", &s);
if (s.ok()) {
t = GetTableReaderFromHandle(handle);

@ -140,7 +140,8 @@ class TableCache {
HistogramImpl* file_read_hist = nullptr,
bool skip_filters = false, int level = -1,
bool prefetch_index_and_filter_in_cache = true,
size_t max_file_size_for_l0_meta_pin = 0);
size_t max_file_size_for_l0_meta_pin = 0,
Temperature file_temperature = Temperature::kUnknown);
// Get TableReader from a cache handle.
TableReader* GetTableReaderFromHandle(Cache::Handle* handle);
@ -206,7 +207,8 @@ class TableCache {
const SliceTransform* prefix_extractor = nullptr,
bool skip_filters = false, int level = -1,
bool prefetch_index_and_filter_in_cache = true,
size_t max_file_size_for_l0_meta_pin = 0);
size_t max_file_size_for_l0_meta_pin = 0,
Temperature file_temperature = Temperature::kUnknown);
// Create a key prefix for looking up the row cache. The prefix is of the
// format row_cache_id + fd_number + seq_no. Later, the user key can be

@ -1017,7 +1017,8 @@ class VersionBuilder::Rep {
&file_meta->table_reader_handle, prefix_extractor, false /*no_io */,
true /* record_read_stats */,
internal_stats->GetFileReadHist(level), false, level,
prefetch_index_and_filter_in_cache, max_file_size_for_l0_meta_pin);
prefetch_index_and_filter_in_cache, max_file_size_for_l0_meta_pin,
file_meta->temperature);
if (file_meta->table_reader_handle != nullptr) {
// Load table_reader
file_meta->fd.table_reader = table_cache_->GetTableReaderFromHandle(

@ -22,6 +22,46 @@
#include "util/rate_limiter.h"
namespace ROCKSDB_NAMESPACE {
inline void IOStatsAddBytesByTemperature(Temperature file_temperature,
size_t value) {
if (file_temperature == Temperature::kUnknown) {
return;
}
switch (file_temperature) {
case Temperature::kHot:
IOSTATS_ADD(file_io_stats_by_temperature.hot_file_bytes_read, value);
break;
case Temperature::kWarm:
IOSTATS_ADD(file_io_stats_by_temperature.warm_file_bytes_read, value);
break;
case Temperature::kCold:
IOSTATS_ADD(file_io_stats_by_temperature.cold_file_bytes_read, value);
break;
default:
break;
}
}
inline void IOStatsAddCountByTemperature(Temperature file_temperature,
size_t value) {
if (file_temperature == Temperature::kUnknown) {
return;
}
switch (file_temperature) {
case Temperature::kHot:
IOSTATS_ADD(file_io_stats_by_temperature.hot_file_read_count, value);
break;
case Temperature::kWarm:
IOSTATS_ADD(file_io_stats_by_temperature.warm_file_read_count, value);
break;
case Temperature::kCold:
IOSTATS_ADD(file_io_stats_by_temperature.cold_file_read_count, value);
break;
default:
break;
}
}
IOStatus RandomAccessFileReader::Create(
const std::shared_ptr<FileSystem>& fs, const std::string& fname,
const FileOptions& file_opts,
@ -182,6 +222,8 @@ IOStatus RandomAccessFileReader::Read(const IOOptions& opts, uint64_t offset,
*result = Slice(res_scratch, io_s.ok() ? pos : 0);
}
IOSTATS_ADD(bytes_read, result->size());
IOStatsAddBytesByTemperature(file_temperature_, result->size());
IOStatsAddCountByTemperature(file_temperature_, 1);
SetPerfLevel(prev_perf_level);
}
if (stats_ != nullptr && file_read_hist_ != nullptr) {
@ -347,6 +389,9 @@ IOStatus RandomAccessFileReader::MultiRead(const IOOptions& opts,
}
#endif // ROCKSDB_LITE
IOSTATS_ADD(bytes_read, read_reqs[i].result.size());
IOStatsAddBytesByTemperature(file_temperature_,
read_reqs[i].result.size());
IOStatsAddCountByTemperature(file_temperature_, 1);
}
SetPerfLevel(prev_perf_level);
}

@ -73,6 +73,7 @@ class RandomAccessFileReader {
HistogramImpl* file_read_hist_;
RateLimiter* rate_limiter_;
std::vector<std::shared_ptr<EventListener>> listeners_;
Temperature file_temperature_;
public:
explicit RandomAccessFileReader(
@ -82,7 +83,8 @@ class RandomAccessFileReader {
Statistics* stats = nullptr, uint32_t hist_type = 0,
HistogramImpl* file_read_hist = nullptr,
RateLimiter* rate_limiter = nullptr,
const std::vector<std::shared_ptr<EventListener>>& listeners = {})
const std::vector<std::shared_ptr<EventListener>>& listeners = {},
Temperature file_temperature = Temperature::kUnknown)
: file_(std::move(raf), io_tracer, _file_name),
file_name_(std::move(_file_name)),
clock_(clock),
@ -90,7 +92,8 @@ class RandomAccessFileReader {
hist_type_(hist_type),
file_read_hist_(file_read_hist),
rate_limiter_(rate_limiter),
listeners_() {
listeners_(),
file_temperature_(file_temperature) {
#ifndef ROCKSDB_LITE
std::for_each(listeners.begin(), listeners.end(),
[this](const std::shared_ptr<EventListener>& e) {

@ -14,6 +14,32 @@
namespace ROCKSDB_NAMESPACE {
// EXPERIMENTAL: the IO statistics for tiered storage. It matches with each
// item in Temperature class.
struct FileIOByTemperature {
// the number of bytes read to Temperature::kHot file
uint64_t hot_file_bytes_read;
// the number of bytes read to Temperature::kWarm file
uint64_t warm_file_bytes_read;
// the number of bytes read to Temperature::kCold file
uint64_t cold_file_bytes_read;
// total number of reads to Temperature::kHot file
uint64_t hot_file_read_count;
// total number of reads to Temperature::kWarm file
uint64_t warm_file_read_count;
// total number of reads to Temperature::kCold file
uint64_t cold_file_read_count;
// reset all the statistics to 0.
void Reset() {
hot_file_bytes_read = 0;
warm_file_bytes_read = 0;
cold_file_bytes_read = 0;
hot_file_read_count = 0;
warm_file_read_count = 0;
cold_file_read_count = 0;
}
};
struct IOStatsContext {
// reset all io-stats counter to zero
void Reset();
@ -48,6 +74,8 @@ struct IOStatsContext {
uint64_t cpu_write_nanos;
// CPU time spent in read() and pread()
uint64_t cpu_read_nanos;
FileIOByTemperature file_io_stats_by_temperature;
};
// If RocksDB is compiled with -DNIOSTATS_CONTEXT, then a pointer to a global,

@ -39,6 +39,7 @@ void IOStatsContext::Reset() {
logger_nanos = 0;
cpu_write_nanos = 0;
cpu_read_nanos = 0;
file_io_stats_by_temperature.Reset();
#endif //! NIOSTATS_CONTEXT
}
@ -66,7 +67,12 @@ std::string IOStatsContext::ToString(bool exclude_zero_counters) const {
IOSTATS_CONTEXT_OUTPUT(logger_nanos);
IOSTATS_CONTEXT_OUTPUT(cpu_write_nanos);
IOSTATS_CONTEXT_OUTPUT(cpu_read_nanos);
IOSTATS_CONTEXT_OUTPUT(file_io_stats_by_temperature.hot_file_bytes_read);
IOSTATS_CONTEXT_OUTPUT(file_io_stats_by_temperature.warm_file_bytes_read);
IOSTATS_CONTEXT_OUTPUT(file_io_stats_by_temperature.cold_file_bytes_read);
IOSTATS_CONTEXT_OUTPUT(file_io_stats_by_temperature.hot_file_read_count);
IOSTATS_CONTEXT_OUTPUT(file_io_stats_by_temperature.warm_file_read_count);
IOSTATS_CONTEXT_OUTPUT(file_io_stats_by_temperature.cold_file_read_count);
std::string str = ss.str();
str.erase(str.find_last_not_of(", ") + 1);
return str;

@ -77,20 +77,19 @@ extern thread_local PerfContext perf_context;
}
// Increase metric value
#define PERF_COUNTER_BY_LEVEL_ADD(metric, value, level) \
if (perf_level >= PerfLevel::kEnableCount && \
perf_context.per_level_perf_context_enabled && \
perf_context.level_to_perf_context) { \
if ((*(perf_context.level_to_perf_context)).find(level) != \
(*(perf_context.level_to_perf_context)).end()) { \
(*(perf_context.level_to_perf_context))[level].metric += value; \
} \
else { \
PerfContextByLevel empty_context; \
(*(perf_context.level_to_perf_context))[level] = empty_context; \
(*(perf_context.level_to_perf_context))[level].metric += value; \
} \
} \
#define PERF_COUNTER_BY_LEVEL_ADD(metric, value, level) \
if (perf_level >= PerfLevel::kEnableCount && \
perf_context.per_level_perf_context_enabled && \
perf_context.level_to_perf_context) { \
if ((*(perf_context.level_to_perf_context)).find(level) != \
(*(perf_context.level_to_perf_context)).end()) { \
(*(perf_context.level_to_perf_context))[level].metric += value; \
} else { \
PerfContextByLevel empty_context; \
(*(perf_context.level_to_perf_context))[level] = empty_context; \
(*(perf_context.level_to_perf_context))[level].metric += value; \
} \
}
#endif

Loading…
Cancel
Save