From 37b75e13641db2d534ede95bee154ba4f895573a Mon Sep 17 00:00:00 2001 From: anand76 Date: Thu, 15 Sep 2022 22:48:06 -0700 Subject: [PATCH] Fix some MultiGet stats (#10673) Summary: The stats were not accurate for the coroutine version of MultiGet. This PR fixes it. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10673 Reviewed By: akankshamahajan15 Differential Revision: D39492615 Pulled By: anand1976 fbshipit-source-id: b46c04e15ea27e66f4c31f00c66497aa283bf9d3 --- HISTORY.md | 1 + db/version_set.cc | 62 ++++++++++++++++++++++++++++++++--------------- db/version_set.h | 17 ++++++------- 3 files changed, 51 insertions(+), 29 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 9cea81b18..9615b07cf 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -9,6 +9,7 @@ * Update rocksdb.multiget.io.batch.size stat in non-async MultiGet as well. * Fix a bug in key range overlap checking with concurrent compactions when user-defined timestamp is enabled. User-defined timestamps should be EXCLUDED when checking if two ranges overlap. * Fixed a bug where the blob cache prepopulating logic did not consider the secondary cache (see #10603). +* Fixed the rocksdb.num.sst.read.per.level, rocksdb.num.index.and.filter.blocks.read.per.level and rocksdb.num.level.read.per.multiget stats in the MultiGet coroutines ### Public API changes * Add `rocksdb_column_family_handle_get_id`, `rocksdb_column_family_handle_get_name` to get name, id of column family in C API diff --git a/db/version_set.cc b/db/version_set.cc index b068d06c2..0d291d16f 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -2655,8 +2655,8 @@ Status Version::ProcessBatch( std::unordered_map* blob_ctxs, autovector& batches, std::deque& waiting, std::deque& to_process, unsigned int& num_tasks_queued, - uint64_t& num_filter_read, uint64_t& num_index_read, - uint64_t& num_sst_read) { + std::unordered_map>& + mget_stats) { FilePickerMultiGet& fp = *batch; MultiGetRange range = fp.GetRange(); // Initialize a new empty range. Any keys that are not in this level will @@ -2706,19 +2706,29 @@ Status Version::ProcessBatch( leftover += ~file_range; range -= ~file_range; if (!file_range.empty()) { + int level = fp.GetHitFileLevel(); + auto stat = mget_stats.find(level); + if (stat == mget_stats.end()) { + auto entry = mget_stats.insert({level, {0, 0, 0}}); + assert(entry.second); + stat = entry.first; + } + if (waiting.empty() && to_process.empty() && !fp.RemainingOverlapInLevel() && leftover.empty() && mget_tasks.empty()) { // All keys are in one SST file, so take the fast path s = MultiGetFromSST(read_options, file_range, fp.GetHitFileLevel(), skip_filters, skip_range_deletions, f, *blob_ctxs, - table_handle, num_filter_read, num_index_read, - num_sst_read); + table_handle, std::get<0>(stat->second), + std::get<1>(stat->second), + std::get<2>(stat->second)); } else { mget_tasks.emplace_back(MultiGetFromSSTCoroutine( read_options, file_range, fp.GetHitFileLevel(), skip_filters, - skip_range_deletions, f, *blob_ctxs, table_handle, num_filter_read, - num_index_read, num_sst_read)); + skip_range_deletions, f, *blob_ctxs, table_handle, + std::get<0>(stat->second), std::get<1>(stat->second), + std::get<2>(stat->second))); ++num_tasks_queued; } } @@ -2756,9 +2766,7 @@ Status Version::MultiGetAsync( std::deque to_process; Status s; std::vector> mget_tasks; - uint64_t num_filter_read = 0; - uint64_t num_index_read = 0; - uint64_t num_sst_read = 0; + std::unordered_map> mget_stats; // Create the initial batch with the input range batches.emplace_back(range, &storage_info_.level_files_brief_, @@ -2780,20 +2788,10 @@ Status Version::MultiGetAsync( // Look through one level. This may split the batch and enqueue it to // to_process s = ProcessBatch(options, batch, mget_tasks, blob_ctxs, batches, waiting, - to_process, num_tasks_queued, num_filter_read, - num_index_read, num_sst_read); + to_process, num_tasks_queued, mget_stats); if (!s.ok()) { break; } - // Dump the stats since the search has moved to the next level - if (num_filter_read + num_index_read) { - RecordInHistogram(db_statistics_, - NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL, - num_index_read + num_filter_read); - } - if (num_sst_read) { - RecordInHistogram(db_statistics_, NUM_SST_READ_PER_LEVEL, num_sst_read); - } // If ProcessBatch didn't enqueue any coroutine tasks, it means all // keys were filtered out. So put the batch back in to_process to // lookup in the next level @@ -2840,6 +2838,30 @@ Status Version::MultiGetAsync( } } + uint64_t num_levels = 0; + for (auto& stat : mget_stats) { + if (stat.first == 0) { + num_levels += std::get<2>(stat.second); + } else { + num_levels++; + } + + uint64_t num_meta_reads = + std::get<0>(stat.second) + std::get<1>(stat.second); + uint64_t num_sst_reads = std::get<2>(stat.second); + if (num_meta_reads > 0) { + RecordInHistogram(db_statistics_, + NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL, + num_meta_reads); + } + if (num_sst_reads > 0) { + RecordInHistogram(db_statistics_, NUM_SST_READ_PER_LEVEL, num_sst_reads); + } + } + if (num_levels > 0) { + RecordInHistogram(db_statistics_, NUM_LEVEL_READ_PER_MULTIGET, num_levels); + } + return s; } #endif diff --git a/db/version_set.h b/db/version_set.h index 077bb26c3..99625c550 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -1014,15 +1014,14 @@ class Version { // queue coroutine tasks to mget_tasks. It may also split the input batch // by creating a new batch with keys definitely not in this level and // enqueuing it to to_process. - Status ProcessBatch(const ReadOptions& read_options, - FilePickerMultiGet* batch, - std::vector>& mget_tasks, - std::unordered_map* blob_ctxs, - autovector& batches, - std::deque& waiting, - std::deque& to_process, - unsigned int& num_tasks_queued, uint64_t& num_filter_read, - uint64_t& num_index_read, uint64_t& num_sst_read); + Status ProcessBatch( + const ReadOptions& read_options, FilePickerMultiGet* batch, + std::vector>& mget_tasks, + std::unordered_map* blob_ctxs, + autovector& batches, std::deque& waiting, + std::deque& to_process, unsigned int& num_tasks_queued, + std::unordered_map>& + mget_stats); #endif ColumnFamilyData* cfd_; // ColumnFamilyData to which this Version belongs