Fix StopWatch bug; Remove setting `record_read_stats` (#11474)

Summary:
**Context/Summary:**
- StopWatch enable stats even when `StatsLevel::kExceptTimers` is set. It's a harmless bug though since `reportTimeToHistogram()` will not report it anyway according to https://github.com/facebook/rocksdb/blob/main/include/rocksdb/statistics.h#L705
-  https://github.com/facebook/rocksdb/pull/11288 should have removed logics of setting `record_read_stats = !for_compaction` as we don't differentiate `RandomAccessFileReader`'s stats behavior based on compaction or not (instead we now report stats of different IO activities including compaction to different stats). Fixing this should report more compaction related file read micros that aren't reported previously due to `for_compaction==true`

Pull Request resolved: https://github.com/facebook/rocksdb/pull/11474

Test Plan:
- DB bench pre vs post fix with small max_open_files

Setup command
`./db_ bench  -db=/dev/shm/testdb/ -statistics=true -benchmarks=fillseq -key_size=32 -value_size=512 -num=5000 -write_buffer_size=655 -target_file_size_base=655 -disable_auto_compactions=true -compression_type=none -bloom_bits=3`

Run command
`./db_bench --open_files=1 -use_existing_db=true -db=/dev/shm/testdb2/ -statistics=true -benchmarks=compactall -key_size=32 -value_size=512 -num=5000 -write_buffer_size=655 -target_file_size_base=655 -disable_auto_compactions=true -compression_type=none -bloom_bits=3`

Pre-fix
```
rocksdb.sst.read.micros P50 : 2.056175 P95 : 4.647739 P99 : 8.948475 P100 : 25.000000 COUNT : 4451 SUM : 12827
rocksdb.file.read.flush.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
rocksdb.file.read.compaction.micros P50 : 2.057397 P95 : 4.625253 P99 : 8.749474 P100 : 25.000000 COUNT : 4382 SUM : 12608
rocksdb.file.read.db.open.micros P50 : 1.985294 P95 : 9.100000 P99 : 13.000000 P100 : 13.000000 COUNT : 69 SUM : 219
```

Post-fix (with a higher `rocksdb.file.read.compaction.micros` count)
```
rocksdb.sst.read.micros P50 : 1.858968 P95 : 3.653086 P99 : 5.968000 P100 : 21.000000 COUNT : 3548 SUM : 9119
rocksdb.file.read.flush.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
rocksdb.file.read.compaction.micros P50 : 1.857027 P95 : 3.627614 P99 : 5.738621 P100 : 21.000000 COUNT : 3479 SUM : 8904
rocksdb.file.read.db.open.micros P50 : 2.000000 P95 : 6.733333 P99 : 11.000000 P100 : 11.000000 COUNT : 69 SUM : 215
```
- CI

Reviewed By: ajkr

Differential Revision: D46137221

Pulled By: hx235

fbshipit-source-id: e5b4ee7001af26f2ee0377bc6334f43b2a527388
oxigraph-main
Hui Xiao 2 years ago committed by Facebook GitHub Bot
parent e8710303d9
commit dcc6fc99f9
  1. 4
      HISTORY.md
  2. 45
      db/table_cache.cc
  3. 10
      db/table_cache.h
  4. 4
      db/table_cache_sync_and_async.h
  5. 1
      db/version_builder.cc
  6. 3
      db/version_set.cc
  7. 2
      util/stop_watch.h

@ -2,6 +2,10 @@
## Unreleased
### New Features
* Add a new option OptimisticTransactionDBOptions::shared_lock_buckets that enables sharing mutexes for validating transactions between DB instances, for better balancing memory efficiency and validation contention across DB instances. Different column families and DBs also now use different hash seeds in this validation, so that the same set of key names will not contend across DBs or column families.
### Behavior changes
* Statistics `rocksdb.sst.read.micros` scope is expanded to all SST reads except for file ingestion and column family import (some compaction reads were previously excluded).
## 8.3.0 (05/19/2023)
### New Features
* Introduced a new option `block_protection_bytes_per_key`, which can be used to enable per key-value integrity protection for in-memory blocks in block cache (#11287).

@ -90,7 +90,7 @@ TableCache::~TableCache() {}
Status TableCache::GetTableReader(
const ReadOptions& ro, const FileOptions& file_options,
const InternalKeyComparator& internal_comparator,
const FileMetaData& file_meta, bool sequential_mode, bool record_read_stats,
const FileMetaData& file_meta, bool sequential_mode,
uint8_t block_protection_bytes_per_key, HistogramImpl* file_read_hist,
std::unique_ptr<TableReader>* table_reader,
const std::shared_ptr<const SliceTransform>& prefix_extractor,
@ -127,11 +127,11 @@ Status TableCache::GetTableReader(
}
StopWatch sw(ioptions_.clock, ioptions_.stats, TABLE_OPEN_IO_MICROS);
std::unique_ptr<RandomAccessFileReader> file_reader(
new RandomAccessFileReader(
std::move(file), fname, ioptions_.clock, io_tracer_,
record_read_stats ? ioptions_.stats : nullptr, SST_READ_MICROS,
file_read_hist, ioptions_.rate_limiter.get(), ioptions_.listeners,
file_temperature, level == ioptions_.num_levels - 1));
new RandomAccessFileReader(std::move(file), fname, ioptions_.clock,
io_tracer_, ioptions_.stats, SST_READ_MICROS,
file_read_hist, ioptions_.rate_limiter.get(),
ioptions_.listeners, file_temperature,
level == ioptions_.num_levels - 1));
UniqueId64x2 expected_unique_id;
if (ioptions_.verify_sst_unique_id_in_manifest) {
expected_unique_id = file_meta.unique_id;
@ -160,8 +160,8 @@ Status TableCache::FindTable(
const FileMetaData& file_meta, TypedHandle** handle,
uint8_t block_protection_bytes_per_key,
const std::shared_ptr<const SliceTransform>& prefix_extractor,
const bool no_io, bool record_read_stats, HistogramImpl* file_read_hist,
bool skip_filters, int level, bool prefetch_index_and_filter_in_cache,
const bool no_io, HistogramImpl* file_read_hist, bool skip_filters,
int level, bool prefetch_index_and_filter_in_cache,
size_t max_file_size_for_l0_meta_pin, Temperature file_temperature) {
PERF_TIMER_GUARD_WITH_CLOCK(find_table_nanos, ioptions_.clock);
uint64_t number = file_meta.fd.GetNumber();
@ -183,7 +183,7 @@ Status TableCache::FindTable(
std::unique_ptr<TableReader> table_reader;
Status s = GetTableReader(ro, file_options, internal_comparator, file_meta,
false /* sequential mode */, record_read_stats,
false /* sequential mode */,
block_protection_bytes_per_key, file_read_hist,
&table_reader, prefix_extractor, skip_filters,
level, prefetch_index_and_filter_in_cache,
@ -232,8 +232,7 @@ InternalIterator* TableCache::NewIterator(
s = FindTable(options, file_options, icomparator, file_meta, &handle,
block_protection_bytes_per_key, prefix_extractor,
options.read_tier == kBlockCacheTier /* no_io */,
!for_compaction /* record_read_stats */, file_read_hist,
skip_filters, level,
file_read_hist, skip_filters, level,
true /* prefetch_index_and_filter_in_cache */,
max_file_size_for_l0_meta_pin, file_meta.temperature);
if (s.ok()) {
@ -438,8 +437,8 @@ Status TableCache::Get(
s = FindTable(options, file_options_, internal_comparator, file_meta,
&handle, block_protection_bytes_per_key, prefix_extractor,
options.read_tier == kBlockCacheTier /* no_io */,
true /* record_read_stats */, file_read_hist, skip_filters,
level, true /* prefetch_index_and_filter_in_cache */,
file_read_hist, skip_filters, level,
true /* prefetch_index_and_filter_in_cache */,
max_file_size_for_l0_meta_pin, file_meta.temperature);
if (s.ok()) {
t = cache_.Value(handle);
@ -541,7 +540,7 @@ Status TableCache::MultiGetFilter(
s = FindTable(options, file_options_, internal_comparator, file_meta,
&handle, block_protection_bytes_per_key, prefix_extractor,
options.read_tier == kBlockCacheTier /* no_io */,
true /* record_read_stats */, file_read_hist,
file_read_hist,
/*skip_filters=*/false, level,
true /* prefetch_index_and_filter_in_cache */,
/*max_file_size_for_l0_meta_pin=*/0, file_meta.temperature);
@ -658,11 +657,10 @@ uint64_t TableCache::ApproximateOffsetOf(
TableReader* table_reader = file_meta.fd.table_reader;
TypedHandle* table_handle = nullptr;
if (table_reader == nullptr) {
const bool for_compaction = (caller == TableReaderCaller::kCompaction);
Status s = FindTable(
read_options, file_options_, internal_comparator, file_meta,
&table_handle, block_protection_bytes_per_key, prefix_extractor,
false /* no_io */, !for_compaction /* record_read_stats */);
Status s =
FindTable(read_options, file_options_, internal_comparator, file_meta,
&table_handle, block_protection_bytes_per_key,
prefix_extractor, false /* no_io */);
if (s.ok()) {
table_reader = cache_.Value(table_handle);
}
@ -688,11 +686,10 @@ uint64_t TableCache::ApproximateSize(
TableReader* table_reader = file_meta.fd.table_reader;
TypedHandle* table_handle = nullptr;
if (table_reader == nullptr) {
const bool for_compaction = (caller == TableReaderCaller::kCompaction);
Status s = FindTable(
read_options, file_options_, internal_comparator, file_meta,
&table_handle, block_protection_bytes_per_key, prefix_extractor,
false /* no_io */, !for_compaction /* record_read_stats */);
Status s =
FindTable(read_options, file_options_, internal_comparator, file_meta,
&table_handle, block_protection_bytes_per_key,
prefix_extractor, false /* no_io */);
if (s.ok()) {
table_reader = cache_.Value(table_handle);
}

@ -171,9 +171,9 @@ class TableCache {
const FileMetaData& file_meta, TypedHandle**,
uint8_t block_protection_bytes_per_key,
const std::shared_ptr<const SliceTransform>& prefix_extractor = nullptr,
const bool no_io = false, bool record_read_stats = true,
HistogramImpl* file_read_hist = nullptr, bool skip_filters = false,
int level = -1, bool prefetch_index_and_filter_in_cache = true,
const bool no_io = false, HistogramImpl* file_read_hist = nullptr,
bool skip_filters = false, int level = -1,
bool prefetch_index_and_filter_in_cache = true,
size_t max_file_size_for_l0_meta_pin = 0,
Temperature file_temperature = Temperature::kUnknown);
@ -243,8 +243,8 @@ class TableCache {
const ReadOptions& ro, const FileOptions& file_options,
const InternalKeyComparator& internal_comparator,
const FileMetaData& file_meta, bool sequential_mode,
bool record_read_stats, uint8_t block_protection_bytes_per_key,
HistogramImpl* file_read_hist, std::unique_ptr<TableReader>* table_reader,
uint8_t block_protection_bytes_per_key, HistogramImpl* file_read_hist,
std::unique_ptr<TableReader>* table_reader,
const std::shared_ptr<const SliceTransform>& prefix_extractor = nullptr,
bool skip_filters = false, int level = -1,
bool prefetch_index_and_filter_in_cache = true,

@ -68,8 +68,8 @@ DEFINE_SYNC_AND_ASYNC(Status, TableCache::MultiGet)
s = FindTable(options, file_options_, internal_comparator, file_meta,
&handle, block_protection_bytes_per_key, prefix_extractor,
options.read_tier == kBlockCacheTier /* no_io */,
true /* record_read_stats */, file_read_hist, skip_filters,
level, true /* prefetch_index_and_filter_in_cache */,
file_read_hist, skip_filters, level,
true /* prefetch_index_and_filter_in_cache */,
0 /*max_file_size_for_l0_meta_pin*/, file_meta.temperature);
TEST_SYNC_POINT_CALLBACK("TableCache::MultiGet:FindTable", &s);
if (s.ok()) {

@ -1328,7 +1328,6 @@ class VersionBuilder::Rep {
read_options, file_options_,
*(base_vstorage_->InternalComparator()), *file_meta, &handle,
block_protection_bytes_per_key, prefix_extractor, false /*no_io */,
true /* record_read_stats */,
internal_stats->GetFileReadHist(level), false, level,
prefetch_index_and_filter_in_cache, max_file_size_for_l0_meta_pin,
file_meta->temperature);

@ -7126,8 +7126,7 @@ Status VersionSet::VerifyFileMetadata(const ReadOptions& read_options,
status = table_cache->FindTable(
read_options, file_opts, *icmp, meta_copy, &handle,
cf_opts->block_protection_bytes_per_key, pe,
/*no_io=*/false, /*record_read_stats=*/true,
internal_stats->GetFileReadHist(level), false, level,
/*no_io=*/false, internal_stats->GetFileReadHist(level), false, level,
/*prefetch_index_and_filter_in_cache*/ false, max_sz_for_l0_meta_pin,
meta_copy.temperature);
if (handle) {

@ -32,7 +32,7 @@ class StopWatch {
elapsed_(elapsed),
overwrite_(overwrite),
stats_enabled_(statistics &&
statistics->get_stats_level() >=
statistics->get_stats_level() >
StatsLevel::kExceptTimers &&
(hist_type_1_ != Histograms::HISTOGRAM_ENUM_MAX ||
hist_type_2_ != Histograms::HISTOGRAM_ENUM_MAX)),

Loading…
Cancel
Save