Add Stats for MultiGet (#7366)

Summary:
Add following stats for MultiGet in Histogram to get more insight on MultiGet.
    1. Number of index and filter blocks read from file as part of MultiGet
    request per level.
    2. Number of data blocks read from file per level.
    3. Number of SST files loaded from file system per level.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/7366

Reviewed By: anand1976

Differential Revision: D24127040

Pulled By: akankshamahajan15

fbshipit-source-id: e63a003056b833729b277edc0639c08fb432756b
main
Akanksha Mahajan 4 years ago committed by Facebook GitHub Bot
parent 8891e9a0eb
commit 38d0a365e3
  1. 1
      HISTORY.md
  2. 89
      db/db_basic_test.cc
  3. 26
      db/version_set.cc
  4. 8
      include/rocksdb/statistics.h
  5. 14
      java/rocksjni/portal.h
  6. 16
      java/src/main/java/org/rocksdb/HistogramType.java
  7. 4
      monitoring/statistics.cc
  8. 35
      table/block_based/block_based_table_reader.cc
  9. 5
      table/get_context.h

@ -18,6 +18,7 @@
* Methods to configure serialize, and compare -- such as TableFactory -- are exposed directly through the Configurable base class (from which these objects inherit). This change will allow for better and more thorough configuration management and retrieval in the future. The options for a Configurable object can be set via the ConfigureFromMap, ConfigureFromString, or ConfigureOption method. The serialized version of the options of an object can be retrieved via the GetOptionString, ToString, or GetOption methods. The list of options supported by an object can be obtained via the GetOptionNames method. The "raw" object (such as the BlockBasedTableOption) for an option may be retrieved via the GetOptions method. Configurable options can be compared via the AreEquivalent method. The settings within a Configurable object may be validated via the ValidateOptions method. The object may be intialized (at which point only mutable options may be updated) via the PrepareOptions method. * Methods to configure serialize, and compare -- such as TableFactory -- are exposed directly through the Configurable base class (from which these objects inherit). This change will allow for better and more thorough configuration management and retrieval in the future. The options for a Configurable object can be set via the ConfigureFromMap, ConfigureFromString, or ConfigureOption method. The serialized version of the options of an object can be retrieved via the GetOptionString, ToString, or GetOption methods. The list of options supported by an object can be obtained via the GetOptionNames method. The "raw" object (such as the BlockBasedTableOption) for an option may be retrieved via the GetOptions method. Configurable options can be compared via the AreEquivalent method. The settings within a Configurable object may be validated via the ValidateOptions method. The object may be intialized (at which point only mutable options may be updated) via the PrepareOptions method.
* Introduce options.check_flush_compaction_key_order with default value to be true. With this option, during flush and compaction, key order will be checked when writing to each SST file. If the order is violated, the flush or compaction will fail. * Introduce options.check_flush_compaction_key_order with default value to be true. With this option, during flush and compaction, key order will be checked when writing to each SST file. If the order is violated, the flush or compaction will fail.
* Added is_full_compaction to CompactionJobStats, so that the information is available through the EventListener interface. * Added is_full_compaction to CompactionJobStats, so that the information is available through the EventListener interface.
* Add more stats for MultiGet in Histogram to get number of data blocks, index blocks, filter blocks and sst files read from file system per level.
## 6.13 (09/12/2020) ## 6.13 (09/12/2020)
### Bug fixes ### Bug fixes

@ -1871,6 +1871,95 @@ TEST_F(DBBasicTest, MultiGetBatchedValueSizeMultiLevelMerge) {
} }
} }
TEST_F(DBBasicTest, MultiGetStats) {
Options options;
options.create_if_missing = true;
options.disable_auto_compactions = true;
options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
BlockBasedTableOptions table_options;
table_options.block_size = 1;
table_options.index_type =
BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
table_options.partition_filters = true;
table_options.no_block_cache = true;
table_options.cache_index_and_filter_blocks = false;
table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
options.table_factory.reset(new BlockBasedTableFactory(table_options));
CreateAndReopenWithCF({"pikachu"}, options);
int total_keys = 2000;
std::vector<std::string> keys_str(total_keys);
std::vector<Slice> keys(total_keys);
std::vector<PinnableSlice> values(total_keys);
std::vector<Status> s(total_keys);
ReadOptions read_opts;
Random rnd(309);
// Create Multiple SST files at multiple levels.
for (int i = 0; i < 500; ++i) {
keys_str[i] = "k" + std::to_string(i);
keys[i] = Slice(keys_str[i]);
ASSERT_OK(Put(1, "k" + std::to_string(i), rnd.RandomString(1000)));
if (i % 100 == 0) {
Flush(1);
}
}
Flush(1);
MoveFilesToLevel(2, 1);
for (int i = 501; i < 1000; ++i) {
keys_str[i] = "k" + std::to_string(i);
keys[i] = Slice(keys_str[i]);
ASSERT_OK(Put(1, "k" + std::to_string(i), rnd.RandomString(1000)));
if (i % 100 == 0) {
Flush(1);
}
}
Flush(1);
MoveFilesToLevel(2, 1);
for (int i = 1001; i < total_keys; ++i) {
keys_str[i] = "k" + std::to_string(i);
keys[i] = Slice(keys_str[i]);
ASSERT_OK(Put(1, "k" + std::to_string(i), rnd.RandomString(1000)));
if (i % 100 == 0) {
Flush(1);
}
}
Flush(1);
Close();
ReopenWithColumnFamilies({"default", "pikachu"}, options);
ASSERT_OK(options.statistics->Reset());
db_->MultiGet(read_opts, handles_[1], total_keys, keys.data(), values.data(),
s.data(), false);
ASSERT_EQ(values.size(), total_keys);
HistogramData hist_data_blocks;
HistogramData hist_index_and_filter_blocks;
HistogramData hist_sst;
options.statistics->histogramData(NUM_DATA_BLOCKS_READ_PER_LEVEL,
&hist_data_blocks);
options.statistics->histogramData(NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL,
&hist_index_and_filter_blocks);
options.statistics->histogramData(NUM_SST_READ_PER_LEVEL, &hist_sst);
// Maximum number of blocks read from a file system in a level.
ASSERT_GT(hist_data_blocks.max, 0);
ASSERT_GT(hist_index_and_filter_blocks.max, 0);
// Maximum number of sst files read from file system in a level.
ASSERT_GT(hist_sst.max, 0);
// Minimun number of blocks read in a level.
ASSERT_EQ(hist_data_blocks.min, 0);
ASSERT_GT(hist_index_and_filter_blocks.min, 0);
// Minimun number of sst files read in a level.
ASSERT_GT(hist_sst.max, 0);
}
// Test class for batched MultiGet with prefix extractor // Test class for batched MultiGet with prefix extractor
// Param bool - If true, use partitioned filters // Param bool - If true, use partitioned filters
// If false, use full filter block // If false, use full filter block

@ -1968,6 +1968,10 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
&storage_info_.file_indexer_, user_comparator(), internal_comparator()); &storage_info_.file_indexer_, user_comparator(), internal_comparator());
FdWithKeyRange* f = fp.GetNextFile(); FdWithKeyRange* f = fp.GetNextFile();
Status s; Status s;
uint64_t num_index_read = 0;
uint64_t num_filter_read = 0;
uint64_t num_data_read = 0;
uint64_t num_sst_read = 0;
while (f != nullptr) { while (f != nullptr) {
MultiGetRange file_range = fp.CurrentFileRange(); MultiGetRange file_range = fp.CurrentFileRange();
@ -2014,6 +2018,11 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
sample_file_read_inc(f->file_metadata); sample_file_read_inc(f->file_metadata);
} }
batch_size++; batch_size++;
num_index_read += get_context.get_context_stats_.num_index_read;
num_filter_read += get_context.get_context_stats_.num_filter_read;
num_data_read += get_context.get_context_stats_.num_data_read;
num_sst_read += get_context.get_context_stats_.num_sst_read;
// report the counters before returning // report the counters before returning
if (get_context.State() != GetContext::kNotFound && if (get_context.State() != GetContext::kNotFound &&
get_context.State() != GetContext::kMerge && get_context.State() != GetContext::kMerge &&
@ -2069,6 +2078,23 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
continue; continue;
} }
} }
// Report MultiGet stats per level.
if (fp.IsHitFileLastInLevel()) {
// Dump the stats if this is the last file of this level and reset for
// next level.
RecordInHistogram(db_statistics_,
NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL,
num_index_read + num_filter_read);
RecordInHistogram(db_statistics_, NUM_DATA_BLOCKS_READ_PER_LEVEL,
num_data_read);
RecordInHistogram(db_statistics_, NUM_SST_READ_PER_LEVEL, num_sst_read);
num_filter_read = 0;
num_index_read = 0;
num_data_read = 0;
num_sst_read = 0;
}
RecordInHistogram(db_statistics_, SST_BATCH_SIZE, batch_size); RecordInHistogram(db_statistics_, SST_BATCH_SIZE, batch_size);
if (!s.ok() || file_picker_range.empty()) { if (!s.ok() || file_picker_range.empty()) {
break; break;

@ -464,6 +464,14 @@ enum Histograms : uint32_t {
FLUSH_TIME, FLUSH_TIME,
SST_BATCH_SIZE, SST_BATCH_SIZE,
// MultiGet stats logged per level
// Num of index and filter blocks read from file system per level.
NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL,
// Num of data blocks read from file system per level.
NUM_DATA_BLOCKS_READ_PER_LEVEL,
// Num of sst files read from file system per level.
NUM_SST_READ_PER_LEVEL,
HISTOGRAM_ENUM_MAX, HISTOGRAM_ENUM_MAX,
}; };

@ -5385,6 +5385,13 @@ class HistogramTypeJni {
return 0x2D; return 0x2D;
case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_DECOMPRESSION_MICROS: case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_DECOMPRESSION_MICROS:
return 0x2E; return 0x2E;
case ROCKSDB_NAMESPACE::Histograms::
NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL:
return 0x2F;
case ROCKSDB_NAMESPACE::Histograms::NUM_DATA_BLOCKS_READ_PER_LEVEL:
return 0x30;
case ROCKSDB_NAMESPACE::Histograms::NUM_SST_READ_PER_LEVEL:
return 0x31;
case ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX: case ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX:
// 0x1F for backwards compatibility on current minor version. // 0x1F for backwards compatibility on current minor version.
return 0x1F; return 0x1F;
@ -5492,6 +5499,13 @@ class HistogramTypeJni {
return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_COMPRESSION_MICROS; return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_COMPRESSION_MICROS;
case 0x2E: case 0x2E:
return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_DECOMPRESSION_MICROS; return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_DECOMPRESSION_MICROS;
case 0x2F:
return ROCKSDB_NAMESPACE::Histograms::
NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL;
case 0x30:
return ROCKSDB_NAMESPACE::Histograms::NUM_DATA_BLOCKS_READ_PER_LEVEL;
case 0x31:
return ROCKSDB_NAMESPACE::Histograms::NUM_SST_READ_PER_LEVEL;
case 0x1F: case 0x1F:
// 0x1F for backwards compatibility on current minor version. // 0x1F for backwards compatibility on current minor version.
return ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX; return ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX;

@ -159,6 +159,22 @@ public enum HistogramType {
*/ */
BLOB_DB_DECOMPRESSION_MICROS((byte) 0x2E), BLOB_DB_DECOMPRESSION_MICROS((byte) 0x2E),
/**
* Num of Index and Filter blocks read from file system per level in MultiGet
* request
*/
NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL((byte) 0x2F),
/**
* Num of Data blocks read from file system per level in MultiGet request.
*/
NUM_DATA_BLOCKS_READ_PER_LEVEL((byte) 0x30),
/**
* Num of SST files read from file system per level in MultiGet request.
*/
NUM_SST_READ_PER_LEVEL((byte) 0x31),
// 0x1F for backwards compatibility on current minor version. // 0x1F for backwards compatibility on current minor version.
HISTOGRAM_ENUM_MAX((byte) 0x1F); HISTOGRAM_ENUM_MAX((byte) 0x1F);

@ -243,6 +243,10 @@ const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {
{BLOB_DB_DECOMPRESSION_MICROS, "rocksdb.blobdb.decompression.micros"}, {BLOB_DB_DECOMPRESSION_MICROS, "rocksdb.blobdb.decompression.micros"},
{FLUSH_TIME, "rocksdb.db.flush.micros"}, {FLUSH_TIME, "rocksdb.db.flush.micros"},
{SST_BATCH_SIZE, "rocksdb.sst.batch.size"}, {SST_BATCH_SIZE, "rocksdb.sst.batch.size"},
{NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL,
"rocksdb.num.index.and.filter.blocks.read.per.level"},
{NUM_DATA_BLOCKS_READ_PER_LEVEL, "rocksdb.num.data.blocks.read.per.level"},
{NUM_SST_READ_PER_LEVEL, "rocksdb.num.sst.read.per.level"},
}; };
std::shared_ptr<Statistics> CreateDBStatistics() { std::shared_ptr<Statistics> CreateDBStatistics() {

@ -1482,6 +1482,21 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
s = block_fetcher.ReadBlockContents(); s = block_fetcher.ReadBlockContents();
raw_block_comp_type = block_fetcher.get_compression_type(); raw_block_comp_type = block_fetcher.get_compression_type();
contents = &raw_block_contents; contents = &raw_block_contents;
if (get_context) {
switch (block_type) {
case BlockType::kIndex:
++get_context->get_context_stats_.num_index_read;
break;
case BlockType::kFilter:
++get_context->get_context_stats_.num_filter_read;
break;
case BlockType::kData:
++get_context->get_context_stats_.num_data_read;
break;
default:
break;
}
}
} else { } else {
raw_block_comp_type = contents->get_compression_type(); raw_block_comp_type = contents->get_compression_type();
} }
@ -1889,6 +1904,22 @@ Status BlockBasedTable::RetrieveBlock(
GetMemoryAllocator(rep_->table_options), for_compaction, GetMemoryAllocator(rep_->table_options), for_compaction,
rep_->blocks_definitely_zstd_compressed, rep_->blocks_definitely_zstd_compressed,
rep_->table_options.filter_policy.get()); rep_->table_options.filter_policy.get());
if (get_context) {
switch (block_type) {
case BlockType::kIndex:
++(get_context->get_context_stats_.num_index_read);
break;
case BlockType::kFilter:
++(get_context->get_context_stats_.num_filter_read);
break;
case BlockType::kData:
++(get_context->get_context_stats_.num_data_read);
break;
default:
break;
}
}
} }
if (!s.ok()) { if (!s.ok()) {
@ -2553,6 +2584,10 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
} }
RetrieveMultipleBlocks(read_options, &data_block_range, &block_handles, RetrieveMultipleBlocks(read_options, &data_block_range, &block_handles,
&statuses, &results, scratch, dict); &statuses, &results, scratch, dict);
if (sst_file_range.begin()->get_context) {
++(sst_file_range.begin()
->get_context->get_context_stats_.num_sst_read);
}
} }
} }

@ -47,6 +47,11 @@ struct GetContextStats {
uint64_t num_cache_compression_dict_add = 0; uint64_t num_cache_compression_dict_add = 0;
uint64_t num_cache_compression_dict_add_redundant = 0; uint64_t num_cache_compression_dict_add_redundant = 0;
uint64_t num_cache_compression_dict_bytes_insert = 0; uint64_t num_cache_compression_dict_bytes_insert = 0;
// MultiGet stats.
uint64_t num_filter_read = 0;
uint64_t num_index_read = 0;
uint64_t num_data_read = 0;
uint64_t num_sst_read = 0;
}; };
// A class to hold context about a point lookup, such as pointer to value // A class to hold context about a point lookup, such as pointer to value

Loading…
Cancel
Save