From 0cc05438931aa344a4d0bd681c399b5c03574a79 Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Mon, 7 Feb 2022 09:14:40 -0800 Subject: [PATCH] Mitigate the overhead of building the hash of file locations (#9504) Summary: The patch builds on the refactoring done in https://github.com/facebook/rocksdb/issues/9494 and improves the performance of building the hash of file locations in `VersionStorageInfo` in two ways. First, the hash building is moved from `AddFile` (which is called under the DB mutex) to a separate post-processing step done as part of `PrepareForVersionAppend` (during which the mutex is *not* held). Second, the space necessary for the hash is preallocated to prevent costly reallocation/rehashing operations. These changes mitigate the overhead of the file location hash, which can be significant with certain workloads where the baseline CPU usage is low (see https://github.com/facebook/rocksdb/issues/9351, which is a workload where keys are sorted, WAL is turned off, the vector memtable implementation is used, and there are lots of small SST files). Fixes https://github.com/facebook/rocksdb/issues/9351 Pull Request resolved: https://github.com/facebook/rocksdb/pull/9504 Test Plan: `make check` ``` numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --disable_wal=1 --seed= ``` Final statistics before this patch: ``` Cumulative writes: 0 writes, 697M keys, 0 commit groups, 0.0 writes per commit group, ingest: 283.25 GB, 241.08 MB/s Interval writes: 0 writes, 1264K keys, 0 commit groups, 0.0 writes per commit group, ingest: 525.69 MB, 176.67 MB/s ``` With the patch: ``` Cumulative writes: 0 writes, 759M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.57 GB, 262.63 MB/s Interval writes: 0 writes, 1555K keys, 0 commit groups, 0.0 writes per commit group, ingest: 646.61 MB, 215.11 MB/s ``` Reviewed By: riversand963 Differential Revision: D34014734 Pulled By: ltamasi fbshipit-source-id: acb2703677451d5ccaa7e9d950844b33d240695b --- db/version_set.cc | 29 +++++++++++++++++++++++------ db/version_set.h | 5 +++-- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/db/version_set.cc b/db/version_set.cc index 532e37361..c34fa385d 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -2413,6 +2413,7 @@ void VersionStorageInfo::PrepareForVersionAppend( GenerateLevelFilesBrief(); GenerateLevel0NonOverlapping(); GenerateBottommostFiles(); + GenerateFileLocationIndex(); } void Version::PrepareAppend(const MutableCFOptions& mutable_cf_options, @@ -3046,12 +3047,6 @@ void VersionStorageInfo::AddFile(int level, FileMetaData* f) { level_files.push_back(f); f->refs++; - - const uint64_t file_number = f->fd.GetNumber(); - - assert(file_locations_.find(file_number) == file_locations_.end()); - file_locations_.emplace(file_number, - FileLocation(level, level_files.size() - 1)); } void VersionStorageInfo::AddBlobFile( @@ -3286,6 +3281,28 @@ void VersionStorageInfo::GenerateBottommostFiles() { } } +void VersionStorageInfo::GenerateFileLocationIndex() { + size_t num_files = 0; + + for (int level = 0; level < num_levels_; ++level) { + num_files += files_[level].size(); + } + + file_locations_.reserve(num_files); + + for (int level = 0; level < num_levels_; ++level) { + for (size_t pos = 0; pos < files_[level].size(); ++pos) { + const FileMetaData* const meta = files_[level][pos]; + assert(meta); + + const uint64_t file_number = meta->fd.GetNumber(); + + assert(file_locations_.find(file_number) == file_locations_.end()); + file_locations_.emplace(file_number, FileLocation(level, pos)); + } + } +} + void VersionStorageInfo::UpdateOldestSnapshot(SequenceNumber seqnum) { assert(seqnum >= oldest_snapshot_seqnum_); oldest_snapshot_seqnum_ = seqnum; diff --git a/db/version_set.h b/db/version_set.h index 53f97012d..ce55e755d 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -302,7 +302,7 @@ class VersionStorageInfo { size_t position_ = 0; }; - // REQUIRES: This version has been saved (see VersionSet::SaveTo) + // REQUIRES: PrepareForVersionAppend has been called FileLocation GetFileLocation(uint64_t file_number) const { const auto it = file_locations_.find(file_number); @@ -319,7 +319,7 @@ class VersionStorageInfo { return it->second; } - // REQUIRES: This version has been saved (see VersionSet::SaveTo) + // REQUIRES: PrepareForVersionAppend has been called FileMetaData* GetFileMetaDataByNumber(uint64_t file_number) const { auto location = GetFileLocation(file_number); @@ -520,6 +520,7 @@ class VersionStorageInfo { void GenerateLevelFilesBrief(); void GenerateLevel0NonOverlapping(); void GenerateBottommostFiles(); + void GenerateFileLocationIndex(); const InternalKeyComparator* internal_comparator_; const Comparator* user_comparator_;