Mitigate the overhead of building the hash of file locations (#9504)

Summary:
The patch builds on the refactoring done in https://github.com/facebook/rocksdb/issues/9494
and improves the performance of building the hash of file
locations in `VersionStorageInfo` in two ways. First, the hash
building is moved from `AddFile` (which is called under the DB mutex)
to a separate post-processing step done as part of `PrepareForVersionAppend`
(during which the mutex is *not* held). Second, the space necessary
for the hash is preallocated to prevent costly reallocation/rehashing
operations. These changes mitigate the overhead of the file location hash,
which can be significant with certain workloads where the baseline CPU usage
is low (see https://github.com/facebook/rocksdb/issues/9351,
which is a workload where keys are sorted, WAL is turned
off, the vector memtable implementation is used, and there are lots of small
SST files).

Fixes https://github.com/facebook/rocksdb/issues/9351

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9504

Test Plan:
`make check`

```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --disable_wal=1 --seed=<some_seed>
```

Final statistics before this patch:
```
Cumulative writes: 0 writes, 697M keys, 0 commit groups, 0.0 writes per commit group, ingest: 283.25 GB, 241.08 MB/s
Interval writes: 0 writes, 1264K keys, 0 commit groups, 0.0 writes per commit group, ingest: 525.69 MB, 176.67 MB/s
```

With the patch:
```
Cumulative writes: 0 writes, 759M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.57 GB, 262.63 MB/s
Interval writes: 0 writes, 1555K keys, 0 commit groups, 0.0 writes per commit group, ingest: 646.61 MB, 215.11 MB/s
```

Reviewed By: riversand963

Differential Revision: D34014734

Pulled By: ltamasi

fbshipit-source-id: acb2703677451d5ccaa7e9d950844b33d240695b
main
Levi Tamasi 2 years ago committed by Facebook GitHub Bot
parent b69f4360ea
commit 0cc0543893
  1. 29
      db/version_set.cc
  2. 5
      db/version_set.h

@ -2413,6 +2413,7 @@ void VersionStorageInfo::PrepareForVersionAppend(
GenerateLevelFilesBrief();
GenerateLevel0NonOverlapping();
GenerateBottommostFiles();
GenerateFileLocationIndex();
}
void Version::PrepareAppend(const MutableCFOptions& mutable_cf_options,
@ -3046,12 +3047,6 @@ void VersionStorageInfo::AddFile(int level, FileMetaData* f) {
level_files.push_back(f);
f->refs++;
const uint64_t file_number = f->fd.GetNumber();
assert(file_locations_.find(file_number) == file_locations_.end());
file_locations_.emplace(file_number,
FileLocation(level, level_files.size() - 1));
}
void VersionStorageInfo::AddBlobFile(
@ -3286,6 +3281,28 @@ void VersionStorageInfo::GenerateBottommostFiles() {
}
}
void VersionStorageInfo::GenerateFileLocationIndex() {
size_t num_files = 0;
for (int level = 0; level < num_levels_; ++level) {
num_files += files_[level].size();
}
file_locations_.reserve(num_files);
for (int level = 0; level < num_levels_; ++level) {
for (size_t pos = 0; pos < files_[level].size(); ++pos) {
const FileMetaData* const meta = files_[level][pos];
assert(meta);
const uint64_t file_number = meta->fd.GetNumber();
assert(file_locations_.find(file_number) == file_locations_.end());
file_locations_.emplace(file_number, FileLocation(level, pos));
}
}
}
void VersionStorageInfo::UpdateOldestSnapshot(SequenceNumber seqnum) {
assert(seqnum >= oldest_snapshot_seqnum_);
oldest_snapshot_seqnum_ = seqnum;

@ -302,7 +302,7 @@ class VersionStorageInfo {
size_t position_ = 0;
};
// REQUIRES: This version has been saved (see VersionSet::SaveTo)
// REQUIRES: PrepareForVersionAppend has been called
FileLocation GetFileLocation(uint64_t file_number) const {
const auto it = file_locations_.find(file_number);
@ -319,7 +319,7 @@ class VersionStorageInfo {
return it->second;
}
// REQUIRES: This version has been saved (see VersionSet::SaveTo)
// REQUIRES: PrepareForVersionAppend has been called
FileMetaData* GetFileMetaDataByNumber(uint64_t file_number) const {
auto location = GetFileLocation(file_number);
@ -520,6 +520,7 @@ class VersionStorageInfo {
void GenerateLevelFilesBrief();
void GenerateLevel0NonOverlapping();
void GenerateBottommostFiles();
void GenerateFileLocationIndex();
const InternalKeyComparator* internal_comparator_;
const Comparator* user_comparator_;

Loading…
Cancel
Save