diff --git a/HISTORY.md b/HISTORY.md index 39b1f73a6..19e903d64 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -5,6 +5,7 @@ * Fixed more cases of EventListener::OnTableFileCreated called with OK status, file_size==0, and no SST file kept. Now the status is Aborted. * Fixed a read-after-free bug in `DB::GetMergeOperands()`. * Fix a data loss bug for 2PC write-committed transaction caused by concurrent transaction commit and memtable switch (#9571). +* Fixed NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL, NUM_DATA_BLOCKS_READ_PER_LEVEL, and NUM_SST_READ_PER_LEVEL stats to be reported once per MultiGet batch per level. ### Performance Improvements * Mitigated the overhead of building the file location hash table used by the online LSM tree consistency checks, which can improve performance for certain workloads (see #9351). diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc index f27532c11..e54e485dd 100644 --- a/db/db_basic_test.cc +++ b/db/db_basic_test.cc @@ -1956,8 +1956,9 @@ TEST_F(DBBasicTest, MultiGetStats) { int total_keys = 2000; std::vector keys_str(total_keys); std::vector keys(total_keys); - std::vector values(total_keys); - std::vector s(total_keys); + static size_t kMultiGetBatchSize = 100; + std::vector values(kMultiGetBatchSize); + std::vector s(kMultiGetBatchSize); ReadOptions read_opts; Random rnd(309); @@ -1994,15 +1995,16 @@ TEST_F(DBBasicTest, MultiGetStats) { } } ASSERT_OK(Flush(1)); + MoveFilesToLevel(1, 1); Close(); ReopenWithColumnFamilies({"default", "pikachu"}, options); ASSERT_OK(options.statistics->Reset()); - db_->MultiGet(read_opts, handles_[1], total_keys, keys.data(), values.data(), - s.data(), false); + db_->MultiGet(read_opts, handles_[1], kMultiGetBatchSize, &keys[1250], + values.data(), s.data(), false); - ASSERT_EQ(values.size(), total_keys); + ASSERT_EQ(values.size(), kMultiGetBatchSize); HistogramData hist_data_blocks; HistogramData hist_index_and_filter_blocks; HistogramData hist_sst; @@ -2014,16 +2016,16 @@ TEST_F(DBBasicTest, MultiGetStats) { options.statistics->histogramData(NUM_SST_READ_PER_LEVEL, &hist_sst); // Maximum number of blocks read from a file system in a level. - ASSERT_GT(hist_data_blocks.max, 0); + ASSERT_EQ(hist_data_blocks.max, 32); ASSERT_GT(hist_index_and_filter_blocks.max, 0); // Maximum number of sst files read from file system in a level. - ASSERT_GT(hist_sst.max, 0); + ASSERT_EQ(hist_sst.max, 2); // Minimun number of blocks read in a level. - ASSERT_EQ(hist_data_blocks.min, 3); + ASSERT_EQ(hist_data_blocks.min, 4); ASSERT_GT(hist_index_and_filter_blocks.min, 0); // Minimun number of sst files read in a level. - ASSERT_GT(hist_sst.max, 0); + ASSERT_EQ(hist_sst.min, 1); } // Test class for batched MultiGet with prefix extractor diff --git a/db/version_set.cc b/db/version_set.cc index 1bef2a46c..daf1d9145 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -2189,12 +2189,31 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range, MultiGetRange keys_with_blobs_range(*range, range->begin(), range->end()); // blob_file => [[blob_idx, it], ...] std::unordered_map blob_rqs; + int level = -1; while (f != nullptr) { MultiGetRange file_range = fp.CurrentFileRange(); bool timer_enabled = GetPerfLevel() >= PerfLevel::kEnableTimeExceptForMutex && get_perf_context()->per_level_perf_context_enabled; + + // Report MultiGet stats per level. + if (level >= 0 && level != (int)fp.GetHitFileLevel()) { + // Dump the stats if the search has moved to the next level and + // reset for next level. + RecordInHistogram(db_statistics_, + NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL, + num_index_read + num_filter_read); + RecordInHistogram(db_statistics_, NUM_DATA_BLOCKS_READ_PER_LEVEL, + num_data_read); + RecordInHistogram(db_statistics_, NUM_SST_READ_PER_LEVEL, num_sst_read); + num_filter_read = 0; + num_index_read = 0; + num_data_read = 0; + num_sst_read = 0; + level = fp.GetHitFileLevel(); + } + StopWatchNano timer(clock_, timer_enabled /* auto_start */); s = table_cache_->MultiGet( read_options, *internal_comparator(), *f->file_metadata, &file_range, @@ -2239,6 +2258,11 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range, num_filter_read += get_context.get_context_stats_.num_filter_read; num_data_read += get_context.get_context_stats_.num_data_read; num_sst_read += get_context.get_context_stats_.num_sst_read; + // Reset these stats since they're specific to a level + get_context.get_context_stats_.num_index_read = 0; + get_context.get_context_stats_.num_filter_read = 0; + get_context.get_context_stats_.num_data_read = 0; + get_context.get_context_stats_.num_sst_read = 0; // report the counters before returning if (get_context.State() != GetContext::kNotFound && @@ -2315,22 +2339,6 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range, } } - // Report MultiGet stats per level. - if (fp.IsHitFileLastInLevel()) { - // Dump the stats if this is the last file of this level and reset for - // next level. - RecordInHistogram(db_statistics_, - NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL, - num_index_read + num_filter_read); - RecordInHistogram(db_statistics_, NUM_DATA_BLOCKS_READ_PER_LEVEL, - num_data_read); - RecordInHistogram(db_statistics_, NUM_SST_READ_PER_LEVEL, num_sst_read); - num_filter_read = 0; - num_index_read = 0; - num_data_read = 0; - num_sst_read = 0; - } - RecordInHistogram(db_statistics_, SST_BATCH_SIZE, batch_size); if (!s.ok() || file_picker_range.empty()) { break; @@ -2338,6 +2346,13 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range, f = fp.GetNextFile(); } + // Dump stats for most recent level + RecordInHistogram(db_statistics_, NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL, + num_index_read + num_filter_read); + RecordInHistogram(db_statistics_, NUM_DATA_BLOCKS_READ_PER_LEVEL, + num_data_read); + RecordInHistogram(db_statistics_, NUM_SST_READ_PER_LEVEL, num_sst_read); + if (s.ok() && !blob_rqs.empty()) { MultiGetBlob(read_options, keys_with_blobs_range, blob_rqs); }