From dcc6fc99f99821ef7c31853675c389cdef6cb9f7 Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Thu, 25 May 2023 10:16:58 -0700 Subject: [PATCH] Fix StopWatch bug; Remove setting `record_read_stats` (#11474) Summary: **Context/Summary:** - StopWatch enable stats even when `StatsLevel::kExceptTimers` is set. It's a harmless bug though since `reportTimeToHistogram()` will not report it anyway according to https://github.com/facebook/rocksdb/blob/main/include/rocksdb/statistics.h#L705 - https://github.com/facebook/rocksdb/pull/11288 should have removed logics of setting `record_read_stats = !for_compaction` as we don't differentiate `RandomAccessFileReader`'s stats behavior based on compaction or not (instead we now report stats of different IO activities including compaction to different stats). Fixing this should report more compaction related file read micros that aren't reported previously due to `for_compaction==true` Pull Request resolved: https://github.com/facebook/rocksdb/pull/11474 Test Plan: - DB bench pre vs post fix with small max_open_files Setup command `./db_ bench -db=/dev/shm/testdb/ -statistics=true -benchmarks=fillseq -key_size=32 -value_size=512 -num=5000 -write_buffer_size=655 -target_file_size_base=655 -disable_auto_compactions=true -compression_type=none -bloom_bits=3` Run command `./db_bench --open_files=1 -use_existing_db=true -db=/dev/shm/testdb2/ -statistics=true -benchmarks=compactall -key_size=32 -value_size=512 -num=5000 -write_buffer_size=655 -target_file_size_base=655 -disable_auto_compactions=true -compression_type=none -bloom_bits=3` Pre-fix ``` rocksdb.sst.read.micros P50 : 2.056175 P95 : 4.647739 P99 : 8.948475 P100 : 25.000000 COUNT : 4451 SUM : 12827 rocksdb.file.read.flush.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0 rocksdb.file.read.compaction.micros P50 : 2.057397 P95 : 4.625253 P99 : 8.749474 P100 : 25.000000 COUNT : 4382 SUM : 12608 rocksdb.file.read.db.open.micros P50 : 1.985294 P95 : 9.100000 P99 : 13.000000 P100 : 13.000000 COUNT : 69 SUM : 219 ``` Post-fix (with a higher `rocksdb.file.read.compaction.micros` count) ``` rocksdb.sst.read.micros P50 : 1.858968 P95 : 3.653086 P99 : 5.968000 P100 : 21.000000 COUNT : 3548 SUM : 9119 rocksdb.file.read.flush.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0 rocksdb.file.read.compaction.micros P50 : 1.857027 P95 : 3.627614 P99 : 5.738621 P100 : 21.000000 COUNT : 3479 SUM : 8904 rocksdb.file.read.db.open.micros P50 : 2.000000 P95 : 6.733333 P99 : 11.000000 P100 : 11.000000 COUNT : 69 SUM : 215 ``` - CI Reviewed By: ajkr Differential Revision: D46137221 Pulled By: hx235 fbshipit-source-id: e5b4ee7001af26f2ee0377bc6334f43b2a527388 --- HISTORY.md | 4 +++ db/table_cache.cc | 45 +++++++++++++++------------------ db/table_cache.h | 10 ++++---- db/table_cache_sync_and_async.h | 4 +-- db/version_builder.cc | 1 - db/version_set.cc | 3 +-- util/stop_watch.h | 2 +- 7 files changed, 34 insertions(+), 35 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 1bcad2ed6..894c950f1 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -2,6 +2,10 @@ ## Unreleased ### New Features * Add a new option OptimisticTransactionDBOptions::shared_lock_buckets that enables sharing mutexes for validating transactions between DB instances, for better balancing memory efficiency and validation contention across DB instances. Different column families and DBs also now use different hash seeds in this validation, so that the same set of key names will not contend across DBs or column families. + +### Behavior changes +* Statistics `rocksdb.sst.read.micros` scope is expanded to all SST reads except for file ingestion and column family import (some compaction reads were previously excluded). + ## 8.3.0 (05/19/2023) ### New Features * Introduced a new option `block_protection_bytes_per_key`, which can be used to enable per key-value integrity protection for in-memory blocks in block cache (#11287). diff --git a/db/table_cache.cc b/db/table_cache.cc index 3575fa24d..9f41e8555 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -90,7 +90,7 @@ TableCache::~TableCache() {} Status TableCache::GetTableReader( const ReadOptions& ro, const FileOptions& file_options, const InternalKeyComparator& internal_comparator, - const FileMetaData& file_meta, bool sequential_mode, bool record_read_stats, + const FileMetaData& file_meta, bool sequential_mode, uint8_t block_protection_bytes_per_key, HistogramImpl* file_read_hist, std::unique_ptr* table_reader, const std::shared_ptr& prefix_extractor, @@ -127,11 +127,11 @@ Status TableCache::GetTableReader( } StopWatch sw(ioptions_.clock, ioptions_.stats, TABLE_OPEN_IO_MICROS); std::unique_ptr file_reader( - new RandomAccessFileReader( - std::move(file), fname, ioptions_.clock, io_tracer_, - record_read_stats ? ioptions_.stats : nullptr, SST_READ_MICROS, - file_read_hist, ioptions_.rate_limiter.get(), ioptions_.listeners, - file_temperature, level == ioptions_.num_levels - 1)); + new RandomAccessFileReader(std::move(file), fname, ioptions_.clock, + io_tracer_, ioptions_.stats, SST_READ_MICROS, + file_read_hist, ioptions_.rate_limiter.get(), + ioptions_.listeners, file_temperature, + level == ioptions_.num_levels - 1)); UniqueId64x2 expected_unique_id; if (ioptions_.verify_sst_unique_id_in_manifest) { expected_unique_id = file_meta.unique_id; @@ -160,8 +160,8 @@ Status TableCache::FindTable( const FileMetaData& file_meta, TypedHandle** handle, uint8_t block_protection_bytes_per_key, const std::shared_ptr& prefix_extractor, - const bool no_io, bool record_read_stats, HistogramImpl* file_read_hist, - bool skip_filters, int level, bool prefetch_index_and_filter_in_cache, + const bool no_io, HistogramImpl* file_read_hist, bool skip_filters, + int level, bool prefetch_index_and_filter_in_cache, size_t max_file_size_for_l0_meta_pin, Temperature file_temperature) { PERF_TIMER_GUARD_WITH_CLOCK(find_table_nanos, ioptions_.clock); uint64_t number = file_meta.fd.GetNumber(); @@ -183,7 +183,7 @@ Status TableCache::FindTable( std::unique_ptr table_reader; Status s = GetTableReader(ro, file_options, internal_comparator, file_meta, - false /* sequential mode */, record_read_stats, + false /* sequential mode */, block_protection_bytes_per_key, file_read_hist, &table_reader, prefix_extractor, skip_filters, level, prefetch_index_and_filter_in_cache, @@ -232,8 +232,7 @@ InternalIterator* TableCache::NewIterator( s = FindTable(options, file_options, icomparator, file_meta, &handle, block_protection_bytes_per_key, prefix_extractor, options.read_tier == kBlockCacheTier /* no_io */, - !for_compaction /* record_read_stats */, file_read_hist, - skip_filters, level, + file_read_hist, skip_filters, level, true /* prefetch_index_and_filter_in_cache */, max_file_size_for_l0_meta_pin, file_meta.temperature); if (s.ok()) { @@ -438,8 +437,8 @@ Status TableCache::Get( s = FindTable(options, file_options_, internal_comparator, file_meta, &handle, block_protection_bytes_per_key, prefix_extractor, options.read_tier == kBlockCacheTier /* no_io */, - true /* record_read_stats */, file_read_hist, skip_filters, - level, true /* prefetch_index_and_filter_in_cache */, + file_read_hist, skip_filters, level, + true /* prefetch_index_and_filter_in_cache */, max_file_size_for_l0_meta_pin, file_meta.temperature); if (s.ok()) { t = cache_.Value(handle); @@ -541,7 +540,7 @@ Status TableCache::MultiGetFilter( s = FindTable(options, file_options_, internal_comparator, file_meta, &handle, block_protection_bytes_per_key, prefix_extractor, options.read_tier == kBlockCacheTier /* no_io */, - true /* record_read_stats */, file_read_hist, + file_read_hist, /*skip_filters=*/false, level, true /* prefetch_index_and_filter_in_cache */, /*max_file_size_for_l0_meta_pin=*/0, file_meta.temperature); @@ -658,11 +657,10 @@ uint64_t TableCache::ApproximateOffsetOf( TableReader* table_reader = file_meta.fd.table_reader; TypedHandle* table_handle = nullptr; if (table_reader == nullptr) { - const bool for_compaction = (caller == TableReaderCaller::kCompaction); - Status s = FindTable( - read_options, file_options_, internal_comparator, file_meta, - &table_handle, block_protection_bytes_per_key, prefix_extractor, - false /* no_io */, !for_compaction /* record_read_stats */); + Status s = + FindTable(read_options, file_options_, internal_comparator, file_meta, + &table_handle, block_protection_bytes_per_key, + prefix_extractor, false /* no_io */); if (s.ok()) { table_reader = cache_.Value(table_handle); } @@ -688,11 +686,10 @@ uint64_t TableCache::ApproximateSize( TableReader* table_reader = file_meta.fd.table_reader; TypedHandle* table_handle = nullptr; if (table_reader == nullptr) { - const bool for_compaction = (caller == TableReaderCaller::kCompaction); - Status s = FindTable( - read_options, file_options_, internal_comparator, file_meta, - &table_handle, block_protection_bytes_per_key, prefix_extractor, - false /* no_io */, !for_compaction /* record_read_stats */); + Status s = + FindTable(read_options, file_options_, internal_comparator, file_meta, + &table_handle, block_protection_bytes_per_key, + prefix_extractor, false /* no_io */); if (s.ok()) { table_reader = cache_.Value(table_handle); } diff --git a/db/table_cache.h b/db/table_cache.h index 09ca10ada..39e41cc6c 100644 --- a/db/table_cache.h +++ b/db/table_cache.h @@ -171,9 +171,9 @@ class TableCache { const FileMetaData& file_meta, TypedHandle**, uint8_t block_protection_bytes_per_key, const std::shared_ptr& prefix_extractor = nullptr, - const bool no_io = false, bool record_read_stats = true, - HistogramImpl* file_read_hist = nullptr, bool skip_filters = false, - int level = -1, bool prefetch_index_and_filter_in_cache = true, + const bool no_io = false, HistogramImpl* file_read_hist = nullptr, + bool skip_filters = false, int level = -1, + bool prefetch_index_and_filter_in_cache = true, size_t max_file_size_for_l0_meta_pin = 0, Temperature file_temperature = Temperature::kUnknown); @@ -243,8 +243,8 @@ class TableCache { const ReadOptions& ro, const FileOptions& file_options, const InternalKeyComparator& internal_comparator, const FileMetaData& file_meta, bool sequential_mode, - bool record_read_stats, uint8_t block_protection_bytes_per_key, - HistogramImpl* file_read_hist, std::unique_ptr* table_reader, + uint8_t block_protection_bytes_per_key, HistogramImpl* file_read_hist, + std::unique_ptr* table_reader, const std::shared_ptr& prefix_extractor = nullptr, bool skip_filters = false, int level = -1, bool prefetch_index_and_filter_in_cache = true, diff --git a/db/table_cache_sync_and_async.h b/db/table_cache_sync_and_async.h index df8e9337f..8ff03ec50 100644 --- a/db/table_cache_sync_and_async.h +++ b/db/table_cache_sync_and_async.h @@ -68,8 +68,8 @@ DEFINE_SYNC_AND_ASYNC(Status, TableCache::MultiGet) s = FindTable(options, file_options_, internal_comparator, file_meta, &handle, block_protection_bytes_per_key, prefix_extractor, options.read_tier == kBlockCacheTier /* no_io */, - true /* record_read_stats */, file_read_hist, skip_filters, - level, true /* prefetch_index_and_filter_in_cache */, + file_read_hist, skip_filters, level, + true /* prefetch_index_and_filter_in_cache */, 0 /*max_file_size_for_l0_meta_pin*/, file_meta.temperature); TEST_SYNC_POINT_CALLBACK("TableCache::MultiGet:FindTable", &s); if (s.ok()) { diff --git a/db/version_builder.cc b/db/version_builder.cc index d87ef9449..210b0de86 100644 --- a/db/version_builder.cc +++ b/db/version_builder.cc @@ -1328,7 +1328,6 @@ class VersionBuilder::Rep { read_options, file_options_, *(base_vstorage_->InternalComparator()), *file_meta, &handle, block_protection_bytes_per_key, prefix_extractor, false /*no_io */, - true /* record_read_stats */, internal_stats->GetFileReadHist(level), false, level, prefetch_index_and_filter_in_cache, max_file_size_for_l0_meta_pin, file_meta->temperature); diff --git a/db/version_set.cc b/db/version_set.cc index 674c0e4aa..e81e42b52 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -7126,8 +7126,7 @@ Status VersionSet::VerifyFileMetadata(const ReadOptions& read_options, status = table_cache->FindTable( read_options, file_opts, *icmp, meta_copy, &handle, cf_opts->block_protection_bytes_per_key, pe, - /*no_io=*/false, /*record_read_stats=*/true, - internal_stats->GetFileReadHist(level), false, level, + /*no_io=*/false, internal_stats->GetFileReadHist(level), false, level, /*prefetch_index_and_filter_in_cache*/ false, max_sz_for_l0_meta_pin, meta_copy.temperature); if (handle) { diff --git a/util/stop_watch.h b/util/stop_watch.h index ece72007b..287813045 100644 --- a/util/stop_watch.h +++ b/util/stop_watch.h @@ -32,7 +32,7 @@ class StopWatch { elapsed_(elapsed), overwrite_(overwrite), stats_enabled_(statistics && - statistics->get_stats_level() >= + statistics->get_stats_level() > StatsLevel::kExceptTimers && (hist_type_1_ != Histograms::HISTOGRAM_ENUM_MAX || hist_type_2_ != Histograms::HISTOGRAM_ENUM_MAX)),