diff --git a/HISTORY.md b/HISTORY.md index 7e9532340..4b80bc66b 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -25,6 +25,7 @@ * And Add C API's for Transaction, SstFileWriter, Compaction as mentioned [here](https://github.com/facebook/rocksdb/wiki/User-defined-Timestamp-(Experimental)) * The contract for implementations of Comparator::IsSameLengthImmediateSuccessor has been updated to work around a design bug in `auto_prefix_mode`. * The API documentation for `auto_prefix_mode` now notes some corner cases in which it returns different results than `total_order_seek`, due to design bugs that are not easily fixed. Users using built-in comparators and keys at least the size of a fixed prefix length are not affected. +* Obsoleted the NUM_DATA_BLOCKS_READ_PER_LEVEL stat and introduced the NUM_LEVEL_READ_PER_MULTIGET and MULTIGET_COROUTINE_COUNT stats ### New Features * Add FileSystem::ReadAsync API in io_tracing diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc index 840f332dc..55258434e 100644 --- a/db/db_basic_test.cc +++ b/db/db_basic_test.cc @@ -2221,6 +2221,7 @@ TEST_F(DBMultiGetAsyncIOTest, GetFromL0) { // No async IO in this case since we don't do parallel lookup in L0 ASSERT_EQ(multiget_io_batch_size.count, 0); ASSERT_EQ(multiget_io_batch_size.max, 0); + ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 0); } TEST_F(DBMultiGetAsyncIOTest, GetFromL1) { @@ -2257,6 +2258,7 @@ TEST_F(DBMultiGetAsyncIOTest, GetFromL1) { // A batch of 3 async IOs is expected, one for each overlapping file in L1 ASSERT_EQ(multiget_io_batch_size.count, 1); ASSERT_EQ(multiget_io_batch_size.max, 3); + ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 3); } TEST_F(DBMultiGetAsyncIOTest, LastKeyInFile) { @@ -2407,27 +2409,37 @@ TEST_F(DBBasicTest, MultiGetStats) { values.data(), s.data(), false); ASSERT_EQ(values.size(), kMultiGetBatchSize); - HistogramData hist_data_blocks; + HistogramData hist_level; HistogramData hist_index_and_filter_blocks; HistogramData hist_sst; - options.statistics->histogramData(NUM_DATA_BLOCKS_READ_PER_LEVEL, - &hist_data_blocks); + options.statistics->histogramData(NUM_LEVEL_READ_PER_MULTIGET, &hist_level); options.statistics->histogramData(NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL, &hist_index_and_filter_blocks); options.statistics->histogramData(NUM_SST_READ_PER_LEVEL, &hist_sst); // Maximum number of blocks read from a file system in a level. - ASSERT_EQ(hist_data_blocks.max, 32); + ASSERT_EQ(hist_level.max, 1); ASSERT_GT(hist_index_and_filter_blocks.max, 0); // Maximum number of sst files read from file system in a level. ASSERT_EQ(hist_sst.max, 2); // Minimun number of blocks read in a level. - ASSERT_EQ(hist_data_blocks.min, 4); + ASSERT_EQ(hist_level.min, 1); ASSERT_GT(hist_index_and_filter_blocks.min, 0); // Minimun number of sst files read in a level. ASSERT_EQ(hist_sst.min, 1); + + for (PinnableSlice& value : values) { + value.Reset(); + } + for (Status& status : s) { + status = Status::OK(); + } + db_->MultiGet(read_opts, handles_[1], kMultiGetBatchSize, &keys[950], + values.data(), s.data(), false); + options.statistics->histogramData(NUM_LEVEL_READ_PER_MULTIGET, &hist_level); + ASSERT_EQ(hist_level.max, 2); } // Test class for batched MultiGet with prefix extractor diff --git a/db/version_set.cc b/db/version_set.cc index 729d535cf..3b6157fda 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -2252,8 +2252,8 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range, Status s; uint64_t num_index_read = 0; uint64_t num_filter_read = 0; - uint64_t num_data_read = 0; uint64_t num_sst_read = 0; + uint64_t num_level_read = 0; MultiGetRange keys_with_blobs_range(*range, range->begin(), range->end()); // blob_file => [[blob_idx, it], ...] @@ -2275,7 +2275,7 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range, s = MultiGetFromSST(read_options, fp.CurrentFileRange(), fp.GetHitFileLevel(), fp.IsHitFileLastInLevel(), f, blob_rqs, num_filter_read, num_index_read, - num_data_read, num_sst_read); + num_sst_read); if (fp.GetHitFileLevel() == 0) { dump_stats_for_l0_file = true; } @@ -2290,13 +2290,14 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range, mget_tasks.emplace_back(MultiGetFromSSTCoroutine( read_options, fp.CurrentFileRange(), fp.GetHitFileLevel(), fp.IsHitFileLastInLevel(), f, blob_rqs, num_filter_read, - num_index_read, num_data_read, num_sst_read)); + num_index_read, num_sst_read)); if (fp.KeyMaySpanNextFile()) { break; } f = fp.GetNextFileInLevel(); } if (mget_tasks.size() > 0) { + RecordTick(db_statistics_, MULTIGET_COROUTINE_COUNT, mget_tasks.size()); // Collect all results so far std::vector statuses = folly::coro::blockingWait( folly::coro::collectAllRange(std::move(mget_tasks)) @@ -2328,18 +2329,18 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range, (prev_level != 0 && prev_level != (int)fp.GetHitFileLevel())) { // Dump the stats if the search has moved to the next level and // reset for next level. - if (num_sst_read || (num_filter_read + num_index_read)) { + if (num_filter_read + num_index_read) { RecordInHistogram(db_statistics_, NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL, num_index_read + num_filter_read); - RecordInHistogram(db_statistics_, NUM_DATA_BLOCKS_READ_PER_LEVEL, - num_data_read); + } + if (num_sst_read) { RecordInHistogram(db_statistics_, NUM_SST_READ_PER_LEVEL, num_sst_read); + num_level_read++; } num_filter_read = 0; num_index_read = 0; - num_data_read = 0; num_sst_read = 0; } prev_level = fp.GetHitFileLevel(); @@ -2347,11 +2348,19 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range, } // Dump stats for most recent level - RecordInHistogram(db_statistics_, NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL, - num_index_read + num_filter_read); - RecordInHistogram(db_statistics_, NUM_DATA_BLOCKS_READ_PER_LEVEL, - num_data_read); - RecordInHistogram(db_statistics_, NUM_SST_READ_PER_LEVEL, num_sst_read); + if (num_filter_read + num_index_read) { + RecordInHistogram(db_statistics_, + NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL, + num_index_read + num_filter_read); + } + if (num_sst_read) { + RecordInHistogram(db_statistics_, NUM_SST_READ_PER_LEVEL, num_sst_read); + num_level_read++; + } + if (num_level_read) { + RecordInHistogram(db_statistics_, NUM_LEVEL_READ_PER_MULTIGET, + num_level_read); + } if (s.ok() && !blob_rqs.empty()) { MultiGetBlob(read_options, keys_with_blobs_range, blob_rqs); diff --git a/db/version_set.h b/db/version_set.h index 72398f162..4bfb4bf74 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -958,7 +958,7 @@ class Version { int hit_file_level, bool is_hit_file_last_in_level, FdWithKeyRange* f, std::unordered_map& blob_rqs, uint64_t& num_filter_read, uint64_t& num_index_read, - uint64_t& num_data_read, uint64_t& num_sst_read); + uint64_t& num_sst_read); ColumnFamilyData* cfd_; // ColumnFamilyData to which this Version belongs Logger* info_log_; diff --git a/db/version_set_sync_and_async.h b/db/version_set_sync_and_async.h index 9dd50d21b..229c59a6e 100644 --- a/db/version_set_sync_and_async.h +++ b/db/version_set_sync_and_async.h @@ -15,8 +15,7 @@ DEFINE_SYNC_AND_ASYNC(Status, Version::MultiGetFromSST) (const ReadOptions& read_options, MultiGetRange file_range, int hit_file_level, bool is_hit_file_last_in_level, FdWithKeyRange* f, std::unordered_map& blob_rqs, - uint64_t& num_filter_read, uint64_t& num_index_read, uint64_t& num_data_read, - uint64_t& num_sst_read) { + uint64_t& num_filter_read, uint64_t& num_index_read, uint64_t& num_sst_read) { bool timer_enabled = GetPerfLevel() >= PerfLevel::kEnableTimeExceptForMutex && get_perf_context()->per_level_perf_context_enabled; @@ -63,12 +62,10 @@ DEFINE_SYNC_AND_ASYNC(Status, Version::MultiGetFromSST) batch_size++; num_index_read += get_context.get_context_stats_.num_index_read; num_filter_read += get_context.get_context_stats_.num_filter_read; - num_data_read += get_context.get_context_stats_.num_data_read; num_sst_read += get_context.get_context_stats_.num_sst_read; // Reset these stats since they're specific to a level get_context.get_context_stats_.num_index_read = 0; get_context.get_context_stats_.num_filter_read = 0; - get_context.get_context_stats_.num_data_read = 0; get_context.get_context_stats_.num_sst_read = 0; // report the counters before returning diff --git a/include/rocksdb/statistics.h b/include/rocksdb/statistics.h index 6f9ca1da7..b89a0ab68 100644 --- a/include/rocksdb/statistics.h +++ b/include/rocksdb/statistics.h @@ -432,6 +432,7 @@ enum Tickers : uint32_t { NON_LAST_LEVEL_READ_COUNT, BLOCK_CHECKSUM_COMPUTE_COUNT, + MULTIGET_COROUTINE_COUNT, TICKER_ENUM_MAX }; @@ -529,6 +530,7 @@ enum Histograms : uint32_t { // Num of index and filter blocks read from file system per level. NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL, // Num of data blocks read from file system per level. + // Obsolete NUM_DATA_BLOCKS_READ_PER_LEVEL, // Num of sst files read from file system per level. NUM_SST_READ_PER_LEVEL, @@ -546,6 +548,9 @@ enum Histograms : uint32_t { // Number of IOs issued in parallel in a MultiGet batch MULTIGET_IO_BATCH_SIZE, + // Number of levels requiring IO for MultiGet + NUM_LEVEL_READ_PER_MULTIGET, + HISTOGRAM_ENUM_MAX, }; diff --git a/monitoring/statistics.cc b/monitoring/statistics.cc index 8afffa5f6..3f9155d1f 100644 --- a/monitoring/statistics.cc +++ b/monitoring/statistics.cc @@ -226,7 +226,8 @@ const std::vector> TickersNameMap = { {LAST_LEVEL_READ_COUNT, "rocksdb.last.level.read.count"}, {NON_LAST_LEVEL_READ_BYTES, "rocksdb.non.last.level.read.bytes"}, {NON_LAST_LEVEL_READ_COUNT, "rocksdb.non.last.level.read.count"}, - {BLOCK_CHECKSUM_COMPUTE_COUNT, "rocksdb.block.checksum.compute.count"}}; + {BLOCK_CHECKSUM_COMPUTE_COUNT, "rocksdb.block.checksum.compute.count"}, + {MULTIGET_COROUTINE_COUNT, "rocksdb.multiget.coroutine.count"}}; const std::vector> HistogramsNameMap = { {DB_GET, "rocksdb.db.get.micros"}, @@ -287,6 +288,7 @@ const std::vector> HistogramsNameMap = { {POLL_WAIT_MICROS, "rocksdb.poll.wait.micros"}, {PREFETCHED_BYTES_DISCARDED, "rocksdb.prefetched.bytes.discarded"}, {MULTIGET_IO_BATCH_SIZE, "rocksdb.multiget.io.batch.size"}, + {NUM_LEVEL_READ_PER_MULTIGET, "rocksdb.num.level.read.per.multiget"}, }; std::shared_ptr CreateDBStatistics() { diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index 31b75cf5e..7074df313 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -1611,9 +1611,6 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache( case BlockType::kDeprecatedFilter: ++get_context->get_context_stats_.num_filter_read; break; - case BlockType::kData: - ++get_context->get_context_stats_.num_data_read; - break; default: break; } @@ -1771,9 +1768,6 @@ Status BlockBasedTable::RetrieveBlock( case BlockType::kDeprecatedFilter: ++(get_context->get_context_stats_.num_filter_read); break; - case BlockType::kData: - ++(get_context->get_context_stats_.num_data_read); - break; default: break; } diff --git a/table/block_based/block_based_table_reader_sync_and_async.h b/table/block_based/block_based_table_reader_sync_and_async.h index ab42220c3..9eeea40da 100644 --- a/table/block_based/block_based_table_reader_sync_and_async.h +++ b/table/block_based/block_based_table_reader_sync_and_async.h @@ -177,9 +177,6 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::RetrieveMultipleBlocks) size_t& req_idx = req_idx_for_block[valid_batch_idx]; size_t& req_offset = req_offset_for_block[valid_batch_idx]; valid_batch_idx++; - if (mget_iter->get_context) { - ++(mget_iter->get_context->get_context_stats_.num_data_read); - } FSReadRequest& req = read_reqs[req_idx]; Status s = req.status; if (s.ok()) { diff --git a/table/get_context.h b/table/get_context.h index 8120cfcbb..636af6e61 100644 --- a/table/get_context.h +++ b/table/get_context.h @@ -53,7 +53,6 @@ struct GetContextStats { // MultiGet stats. uint64_t num_filter_read = 0; uint64_t num_index_read = 0; - uint64_t num_data_read = 0; uint64_t num_sst_read = 0; };