From 3ae386eafe2eeca911b20642048a7ae490020ace Mon Sep 17 00:00:00 2001 From: sdong Date: Wed, 5 Aug 2015 12:11:30 -0700 Subject: [PATCH] Add statistic histogram "rocksdb.sst.read.micros" Summary: Measure read latency histogram and put in statistics. Compaction inputs are excluded from it when possible (unfortunately usually no possible as we usually take table reader from table cache. Test Plan: Run db_bench and it shows the stats, like: rocksdb.sst.read.micros statistics Percentiles :=> 50 : 1.238522 95 : 2.529740 99 : 3.912180 Reviewers: kradhakrishnan, rven, anthony, IslamAbdelRahman, MarkCallaghan, yhchiang Reviewed By: yhchiang Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D43275 --- db/compaction_job.cc | 3 ++- db/table_cache.cc | 9 +++++--- db/table_cache.h | 2 +- include/rocksdb/statistics.h | 42 +++++++++++++++++++----------------- util/file_reader_writer.cc | 1 + util/file_reader_writer.h | 16 ++++++++++++-- 6 files changed, 46 insertions(+), 27 deletions(-) diff --git a/db/compaction_job.cc b/db/compaction_job.cc index bc9f318ca..380b79f73 100644 --- a/db/compaction_job.cc +++ b/db/compaction_job.cc @@ -716,7 +716,8 @@ Status CompactionJob::FinishCompactionOutputFile(const Status& input_status) { ColumnFamilyData* cfd = compact_->compaction->column_family_data(); FileDescriptor fd(output_number, output_path_id, current_bytes); Iterator* iter = cfd->table_cache()->NewIterator( - ReadOptions(), env_options_, cfd->internal_comparator(), fd); + ReadOptions(), env_options_, cfd->internal_comparator(), fd, nullptr, + true); s = iter->status(); if (s.ok() && paranoid_file_checks_) { diff --git a/db/table_cache.cc b/db/table_cache.cc index 3e0fdc3eb..8b11d0fed 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -79,7 +79,7 @@ void TableCache::ReleaseHandle(Cache::Handle* handle) { Status TableCache::FindTable(const EnvOptions& env_options, const InternalKeyComparator& internal_comparator, const FileDescriptor& fd, Cache::Handle** handle, - const bool no_io) { + const bool no_io, bool record_read_stats) { PERF_TIMER_GUARD(find_table_nanos); Status s; uint64_t number = fd.GetNumber(); @@ -101,7 +101,10 @@ Status TableCache::FindTable(const EnvOptions& env_options, } StopWatch sw(ioptions_.env, ioptions_.statistics, TABLE_OPEN_IO_MICROS); std::unique_ptr file_reader( - new RandomAccessFileReader(std::move(file))); + new RandomAccessFileReader( + std::move(file), ioptions_.env, + record_read_stats ? ioptions_.statistics : nullptr, + SST_READ_MICROS)); s = ioptions_.table_factory->NewTableReader( ioptions_, env_options, internal_comparator, std::move(file_reader), fd.GetFileSize(), &table_reader); @@ -136,7 +139,7 @@ Iterator* TableCache::NewIterator(const ReadOptions& options, Status s; if (table_reader == nullptr) { s = FindTable(env_options, icomparator, fd, &handle, - options.read_tier == kBlockCacheTier); + options.read_tier == kBlockCacheTier, !for_compaction); if (!s.ok()) { return NewErrorIterator(s, arena); } diff --git a/db/table_cache.h b/db/table_cache.h index 5212de717..95b74b54f 100644 --- a/db/table_cache.h +++ b/db/table_cache.h @@ -63,7 +63,7 @@ class TableCache { Status FindTable(const EnvOptions& toptions, const InternalKeyComparator& internal_comparator, const FileDescriptor& file_fd, Cache::Handle**, - const bool no_io = false); + const bool no_io = false, bool record_read_stats = true); // Get TableReader from a cache handle. TableReader* GetTableReaderFromHandle(Cache::Handle* handle); diff --git a/include/rocksdb/statistics.h b/include/rocksdb/statistics.h index b7644f1e3..61a9958ae 100644 --- a/include/rocksdb/statistics.h +++ b/include/rocksdb/statistics.h @@ -251,30 +251,32 @@ enum Histograms : uint32_t { NUM_FILES_IN_SINGLE_COMPACTION, DB_SEEK, WRITE_STALL, + SST_READ_MICROS, HISTOGRAM_ENUM_MAX, // TODO(ldemailly): enforce HistogramsNameMap match }; const std::vector> HistogramsNameMap = { - { DB_GET, "rocksdb.db.get.micros" }, - { DB_WRITE, "rocksdb.db.write.micros" }, - { COMPACTION_TIME, "rocksdb.compaction.times.micros" }, - { TABLE_SYNC_MICROS, "rocksdb.table.sync.micros" }, - { COMPACTION_OUTFILE_SYNC_MICROS, "rocksdb.compaction.outfile.sync.micros" }, - { WAL_FILE_SYNC_MICROS, "rocksdb.wal.file.sync.micros" }, - { MANIFEST_FILE_SYNC_MICROS, "rocksdb.manifest.file.sync.micros" }, - { TABLE_OPEN_IO_MICROS, "rocksdb.table.open.io.micros" }, - { DB_MULTIGET, "rocksdb.db.multiget.micros" }, - { READ_BLOCK_COMPACTION_MICROS, "rocksdb.read.block.compaction.micros" }, - { READ_BLOCK_GET_MICROS, "rocksdb.read.block.get.micros" }, - { WRITE_RAW_BLOCK_MICROS, "rocksdb.write.raw.block.micros" }, - { STALL_L0_SLOWDOWN_COUNT, "rocksdb.l0.slowdown.count"}, - { STALL_MEMTABLE_COMPACTION_COUNT, "rocksdb.memtable.compaction.count"}, - { STALL_L0_NUM_FILES_COUNT, "rocksdb.num.files.stall.count"}, - { HARD_RATE_LIMIT_DELAY_COUNT, "rocksdb.hard.rate.limit.delay.count"}, - { SOFT_RATE_LIMIT_DELAY_COUNT, "rocksdb.soft.rate.limit.delay.count"}, - { NUM_FILES_IN_SINGLE_COMPACTION, "rocksdb.numfiles.in.singlecompaction" }, - { DB_SEEK, "rocksdb.db.seek.micros" }, - { WRITE_STALL, "rocksdb.db.write.stall" }, + {DB_GET, "rocksdb.db.get.micros"}, + {DB_WRITE, "rocksdb.db.write.micros"}, + {COMPACTION_TIME, "rocksdb.compaction.times.micros"}, + {TABLE_SYNC_MICROS, "rocksdb.table.sync.micros"}, + {COMPACTION_OUTFILE_SYNC_MICROS, "rocksdb.compaction.outfile.sync.micros"}, + {WAL_FILE_SYNC_MICROS, "rocksdb.wal.file.sync.micros"}, + {MANIFEST_FILE_SYNC_MICROS, "rocksdb.manifest.file.sync.micros"}, + {TABLE_OPEN_IO_MICROS, "rocksdb.table.open.io.micros"}, + {DB_MULTIGET, "rocksdb.db.multiget.micros"}, + {READ_BLOCK_COMPACTION_MICROS, "rocksdb.read.block.compaction.micros"}, + {READ_BLOCK_GET_MICROS, "rocksdb.read.block.get.micros"}, + {WRITE_RAW_BLOCK_MICROS, "rocksdb.write.raw.block.micros"}, + {STALL_L0_SLOWDOWN_COUNT, "rocksdb.l0.slowdown.count"}, + {STALL_MEMTABLE_COMPACTION_COUNT, "rocksdb.memtable.compaction.count"}, + {STALL_L0_NUM_FILES_COUNT, "rocksdb.num.files.stall.count"}, + {HARD_RATE_LIMIT_DELAY_COUNT, "rocksdb.hard.rate.limit.delay.count"}, + {SOFT_RATE_LIMIT_DELAY_COUNT, "rocksdb.soft.rate.limit.delay.count"}, + {NUM_FILES_IN_SINGLE_COMPACTION, "rocksdb.numfiles.in.singlecompaction"}, + {DB_SEEK, "rocksdb.db.seek.micros"}, + {WRITE_STALL, "rocksdb.db.write.stall"}, + {SST_READ_MICROS, "rocksdb.sst.read.micros"}, }; struct HistogramData { diff --git a/util/file_reader_writer.cc b/util/file_reader_writer.cc index 5f8bd134f..179dc77bd 100644 --- a/util/file_reader_writer.cc +++ b/util/file_reader_writer.cc @@ -27,6 +27,7 @@ Status SequentialFileReader::Skip(uint64_t n) { return file_->Skip(n); } Status RandomAccessFileReader::Read(uint64_t offset, size_t n, Slice* result, char* scratch) const { + StopWatch sw(env_, stats_, hist_type_); IOSTATS_TIMER_GUARD(read_nanos); Status s = file_->Read(offset, n, result, scratch); IOSTATS_ADD_IF_POSITIVE(bytes_read, result->size()); diff --git a/util/file_reader_writer.h b/util/file_reader_writer.h index c089a5022..a16acb266 100644 --- a/util/file_reader_writer.h +++ b/util/file_reader_writer.h @@ -10,6 +10,9 @@ #include "rocksdb/env.h" namespace rocksdb { + +class Statistics; + class SequentialFileReader { private: std::unique_ptr file_; @@ -27,10 +30,19 @@ class SequentialFileReader { class RandomAccessFileReader : public RandomAccessFile { private: std::unique_ptr file_; + Env* env_; + Statistics* stats_; + uint32_t hist_type_; public: - explicit RandomAccessFileReader(std::unique_ptr&& raf) - : file_(std::move(raf)) {} + explicit RandomAccessFileReader(std::unique_ptr&& raf, + Env* env = nullptr, + Statistics* stats = nullptr, + uint32_t hist_type = 0) + : file_(std::move(raf)), + env_(env), + stats_(stats), + hist_type_(hist_type) {} Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const;