From 320d9a8e8a1b6998f92934f87fc71ad8bd6d4596 Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Wed, 9 Feb 2022 12:35:39 -0800 Subject: [PATCH] Use a sorted vector instead of a map to store blob file metadata (#9526) Summary: The patch replaces `std::map` with a sorted `std::vector` for `VersionStorageInfo::blob_files_` and preallocates the space for the `vector` before saving the `BlobFileMetaData` into the new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`. These changes reduce the time the DB mutex is held while saving new `Version`s, and using a sorted `vector` also makes lookups faster thanks to better memory locality. In addition, the patch introduces helper methods `VersionStorageInfo::GetBlobFileMetaData` and `VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by clients to perform lookups in the `vector`, and does some general cleanup in the parts of code where blob file metadata are used. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526 Test Plan: Ran `make check` and the crash test script for a while. Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced: ``` numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed= ``` Final statistics before the patch: ``` Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s ``` With the patch: ``` Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s ``` Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs. Reviewed By: riversand963 Differential Revision: D34082728 Pulled By: ltamasi fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc --- HISTORY.md | 1 + db/blob/db_blob_compaction_test.cc | 6 +- db/compaction/compaction_iterator.cc | 15 ++- db/compaction/compaction_job.cc | 20 ++-- db/db_compaction_test.cc | 2 +- db/db_filesnapshot.cc | 5 +- db/db_flush_test.cc | 2 +- db/db_impl/db_impl.cc | 7 +- db/db_impl/db_impl_compaction_flush.cc | 26 +++-- db/db_impl/db_impl_debug.cc | 29 ++++-- db/db_test_util.cc | 3 +- db/db_wal_test.cc | 2 +- db/flush_job.cc | 7 +- db/internal_stats.cc | 63 +++++++++--- db/listener_test.cc | 81 ++++++--------- db/version_builder.cc | 46 ++++----- db/version_builder_test.cc | 110 +++++++++----------- db/version_set.cc | 137 +++++++++++++++---------- db/version_set.h | 35 +++++-- 19 files changed, 341 insertions(+), 256 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 989745e8d..2dc424cfb 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -6,6 +6,7 @@ ### Performance Improvements * Mitigated the overhead of building the file location hash table used by the online LSM tree consistency checks, which can improve performance for certain workloads (see #9351). +* Switched to using a sorted `std::vector` instead of `std::map` for storing the metadata objects for blob files, which can improve performance for certain workloads, especially when the number of blob files is high. ### Public API changes * Require C++17 compatible compiler (GCC >= 7, Clang >= 5, Visual Studio >= 2017). See #9388. diff --git a/db/blob/db_blob_compaction_test.cc b/db/blob/db_blob_compaction_test.cc index e4d709c8c..73ce82ec0 100644 --- a/db/blob/db_blob_compaction_test.cc +++ b/db/blob/db_blob_compaction_test.cc @@ -512,8 +512,7 @@ TEST_F(DBBlobCompactionTest, TrackGarbage) { ASSERT_EQ(blob_files.size(), 2); { - auto it = blob_files.begin(); - const auto& meta = it->second; + const auto& meta = blob_files.front(); assert(meta); constexpr uint64_t first_expected_bytes = @@ -543,8 +542,7 @@ TEST_F(DBBlobCompactionTest, TrackGarbage) { } { - auto it = blob_files.rbegin(); - const auto& meta = it->second; + const auto& meta = blob_files.back(); assert(meta); constexpr uint64_t new_first_expected_bytes = diff --git a/db/compaction/compaction_iterator.cc b/db/compaction/compaction_iterator.cc index 5ccd1a922..b52325c7f 100644 --- a/db/compaction/compaction_iterator.cc +++ b/db/compaction/compaction_iterator.cc @@ -1164,12 +1164,17 @@ uint64_t CompactionIterator::ComputeBlobGarbageCollectionCutoffFileNumber( const auto& blob_files = storage_info->GetBlobFiles(); - auto it = blob_files.begin(); - std::advance( - it, compaction->blob_garbage_collection_age_cutoff() * blob_files.size()); + const size_t cutoff_index = static_cast( + compaction->blob_garbage_collection_age_cutoff() * blob_files.size()); - return it != blob_files.end() ? it->first - : std::numeric_limits::max(); + if (cutoff_index >= blob_files.size()) { + return std::numeric_limits::max(); + } + + const auto& meta = blob_files[cutoff_index]; + assert(meta); + + return meta->GetBlobFileNumber(); } std::unique_ptr CompactionIterator::CreateBlobFetcherIfNeeded( diff --git a/db/compaction/compaction_job.cc b/db/compaction/compaction_job.cc index 28de326be..3c2890258 100644 --- a/db/compaction/compaction_job.cc +++ b/db/compaction/compaction_job.cc @@ -963,11 +963,14 @@ Status CompactionJob::Install(const MutableCFOptions& mutable_cf_options) { const auto& blob_files = vstorage->GetBlobFiles(); if (!blob_files.empty()) { - ROCKS_LOG_BUFFER(log_buffer_, - "[%s] Blob file summary: head=%" PRIu64 ", tail=%" PRIu64 - "\n", - column_family_name.c_str(), blob_files.begin()->first, - blob_files.rbegin()->first); + assert(blob_files.front()); + assert(blob_files.back()); + + ROCKS_LOG_BUFFER( + log_buffer_, + "[%s] Blob file summary: head=%" PRIu64 ", tail=%" PRIu64 "\n", + column_family_name.c_str(), blob_files.front()->GetBlobFileNumber(), + blob_files.back()->GetBlobFileNumber()); } UpdateCompactionJobStats(stats); @@ -1014,8 +1017,11 @@ Status CompactionJob::Install(const MutableCFOptions& mutable_cf_options) { stream.EndArray(); if (!blob_files.empty()) { - stream << "blob_file_head" << blob_files.begin()->first; - stream << "blob_file_tail" << blob_files.rbegin()->first; + assert(blob_files.front()); + stream << "blob_file_head" << blob_files.front()->GetBlobFileNumber(); + + assert(blob_files.back()); + stream << "blob_file_tail" << blob_files.back()->GetBlobFileNumber(); } CleanupCompaction(); diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index d4ca3963a..8943f6b9d 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -6064,7 +6064,7 @@ TEST_F(DBCompactionTest, CompactionWithBlob) { const auto& blob_files = storage_info->GetBlobFiles(); ASSERT_EQ(blob_files.size(), 1); - const auto& blob_file = blob_files.begin()->second; + const auto& blob_file = blob_files.front(); ASSERT_NE(blob_file, nullptr); ASSERT_EQ(table_file->smallest.user_key(), first_key); diff --git a/db/db_filesnapshot.cc b/db/db_filesnapshot.cc index 35d75c823..7d4da8c22 100644 --- a/db/db_filesnapshot.cc +++ b/db/db_filesnapshot.cc @@ -260,15 +260,14 @@ Status DBImpl::GetLiveFilesStorageInfo( } } const auto& blob_files = vsi.GetBlobFiles(); - for (const auto& pair : blob_files) { - const auto& meta = pair.second; + for (const auto& meta : blob_files) { assert(meta); results.emplace_back(); LiveFileStorageInfo& info = results.back(); info.relative_filename = BlobFileName(meta->GetBlobFileNumber()); - info.directory = GetName(); // TODO?: support db_paths/cf_paths + info.directory = GetDir(/* path_id */ 0); info.file_number = meta->GetBlobFileNumber(); info.file_type = kBlobFile; info.size = meta->GetBlobFileSize(); diff --git a/db/db_flush_test.cc b/db/db_flush_test.cc index fcee2f1a6..b4ca7f019 100644 --- a/db/db_flush_test.cc +++ b/db/db_flush_test.cc @@ -1629,7 +1629,7 @@ TEST_F(DBFlushTest, FlushWithBlob) { const auto& blob_files = storage_info->GetBlobFiles(); ASSERT_EQ(blob_files.size(), 1); - const auto& blob_file = blob_files.begin()->second; + const auto& blob_file = blob_files.front(); assert(blob_file); ASSERT_EQ(table_file->smallest.user_key(), "key1"); diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index fd4d8a0c9..ec0275cc3 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -5108,10 +5108,11 @@ Status DBImpl::VerifyChecksumInternal(const ReadOptions& read_options, if (s.ok() && use_file_checksum) { const auto& blob_files = vstorage->GetBlobFiles(); - for (const auto& pair : blob_files) { - const uint64_t blob_file_number = pair.first; - const auto& meta = pair.second; + for (const auto& meta : blob_files) { assert(meta); + + const uint64_t blob_file_number = meta->GetBlobFileNumber(); + const std::string blob_file_name = BlobFileName( cfd->ioptions()->cf_paths.front().path, blob_file_number); s = VerifyFullFileChecksum(meta->GetChecksumValue(), diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index 4ff87c1e4..5550d935b 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -265,11 +265,14 @@ Status DBImpl::FlushMemTableToOutputFile( const auto& blob_files = storage_info->GetBlobFiles(); if (!blob_files.empty()) { - ROCKS_LOG_BUFFER(log_buffer, - "[%s] Blob file summary: head=%" PRIu64 ", tail=%" PRIu64 - "\n", - column_family_name.c_str(), blob_files.begin()->first, - blob_files.rbegin()->first); + assert(blob_files.front()); + assert(blob_files.back()); + + ROCKS_LOG_BUFFER( + log_buffer, + "[%s] Blob file summary: head=%" PRIu64 ", tail=%" PRIu64 "\n", + column_family_name.c_str(), blob_files.front()->GetBlobFileNumber(), + blob_files.back()->GetBlobFileNumber()); } } @@ -706,11 +709,14 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles( const auto& blob_files = storage_info->GetBlobFiles(); if (!blob_files.empty()) { - ROCKS_LOG_BUFFER(log_buffer, - "[%s] Blob file summary: head=%" PRIu64 - ", tail=%" PRIu64 "\n", - column_family_name.c_str(), blob_files.begin()->first, - blob_files.rbegin()->first); + assert(blob_files.front()); + assert(blob_files.back()); + + ROCKS_LOG_BUFFER( + log_buffer, + "[%s] Blob file summary: head=%" PRIu64 ", tail=%" PRIu64 "\n", + column_family_name.c_str(), blob_files.front()->GetBlobFileNumber(), + blob_files.back()->GetBlobFileNumber()); } } if (made_progress) { diff --git a/db/db_impl/db_impl_debug.cc b/db/db_impl/db_impl_debug.cc index c28569528..e05e82c2c 100644 --- a/db/db_impl/db_impl_debug.cc +++ b/db/db_impl/db_impl_debug.cc @@ -60,24 +60,37 @@ void DBImpl::TEST_GetFilesMetaData( ColumnFamilyHandle* column_family, std::vector>* metadata, std::vector>* blob_metadata) { + assert(metadata); + auto cfh = static_cast_with_check(column_family); + assert(cfh); + auto cfd = cfh->cfd(); + assert(cfd); + InstrumentedMutexLock l(&mutex_); + + const auto* current = cfd->current(); + assert(current); + + const auto* vstorage = current->storage_info(); + assert(vstorage); + metadata->resize(NumberLevels()); - for (int level = 0; level < NumberLevels(); level++) { - const std::vector& files = - cfd->current()->storage_info()->LevelFiles(level); + + for (int level = 0; level < NumberLevels(); ++level) { + const std::vector& files = vstorage->LevelFiles(level); (*metadata)[level].clear(); + (*metadata)[level].reserve(files.size()); + for (const auto& f : files) { (*metadata)[level].push_back(*f); } } - if (blob_metadata != nullptr) { - blob_metadata->clear(); - for (const auto& blob : cfd->current()->storage_info()->GetBlobFiles()) { - blob_metadata->push_back(blob.second); - } + + if (blob_metadata) { + *blob_metadata = vstorage->GetBlobFiles(); } } diff --git a/db/db_test_util.cc b/db/db_test_util.cc index 4dec2f4ff..2cae1b6ef 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -1143,7 +1143,8 @@ std::vector DBTestBase::GetBlobFileNumbers() { result.reserve(blob_files.size()); for (const auto& blob_file : blob_files) { - result.emplace_back(blob_file.first); + assert(blob_file); + result.emplace_back(blob_file->GetBlobFileNumber()); } return result; diff --git a/db/db_wal_test.cc b/db/db_wal_test.cc index b24a5c633..c46db3362 100644 --- a/db/db_wal_test.cc +++ b/db/db_wal_test.cc @@ -433,7 +433,7 @@ TEST_F(DBWALTest, RecoverWithBlob) { const auto& blob_files = storage_info->GetBlobFiles(); ASSERT_EQ(blob_files.size(), 1); - const auto& blob_file = blob_files.begin()->second; + const auto& blob_file = blob_files.front(); ASSERT_NE(blob_file, nullptr); ASSERT_EQ(table_file->smallest.user_key(), "key1"); diff --git a/db/flush_job.cc b/db/flush_job.cc index f2046abec..877b76025 100644 --- a/db/flush_job.cc +++ b/db/flush_job.cc @@ -312,8 +312,11 @@ Status FlushJob::Run(LogsWithPrepTracker* prep_tracker, FileMetaData* file_meta, const auto& blob_files = vstorage->GetBlobFiles(); if (!blob_files.empty()) { - stream << "blob_file_head" << blob_files.begin()->first; - stream << "blob_file_tail" << blob_files.rbegin()->first; + assert(blob_files.front()); + stream << "blob_file_head" << blob_files.front()->GetBlobFileNumber(); + + assert(blob_files.back()); + stream << "blob_file_tail" << blob_files.back()->GetBlobFileNumber(); } stream << "immutable_memtables" << cfd_->imm()->NumNotFlushed(); diff --git a/db/internal_stats.cc b/db/internal_stats.cc index 7db16ba9a..b7809a675 100644 --- a/db/internal_stats.cc +++ b/db/internal_stats.cc @@ -757,42 +757,75 @@ bool InternalStats::HandleLiveSstFilesSizeAtTemperature(std::string* value, bool InternalStats::HandleNumBlobFiles(uint64_t* value, DBImpl* /*db*/, Version* /*version*/) { - const auto* vstorage = cfd_->current()->storage_info(); + assert(cfd_); + + const auto* current = cfd_->current(); + assert(current); + + const auto* vstorage = current->storage_info(); + assert(vstorage); + const auto& blob_files = vstorage->GetBlobFiles(); + *value = blob_files.size(); + return true; } bool InternalStats::HandleBlobStats(std::string* value, Slice /*suffix*/) { - std::ostringstream oss; - auto* current_version = cfd_->current(); - const auto& blob_files = current_version->storage_info()->GetBlobFiles(); - uint64_t current_num_blob_files = blob_files.size(); - uint64_t current_file_size = 0; - uint64_t current_garbage_size = 0; - for (const auto& pair : blob_files) { - const auto& meta = pair.second; - current_file_size += meta->GetBlobFileSize(); - current_garbage_size += meta->GetGarbageBlobBytes(); + assert(cfd_); + + const auto* current = cfd_->current(); + assert(current); + + const auto* vstorage = current->storage_info(); + assert(vstorage); + + const auto& blob_files = vstorage->GetBlobFiles(); + + uint64_t total_file_size = 0; + uint64_t total_garbage_size = 0; + + for (const auto& meta : blob_files) { + assert(meta); + + total_file_size += meta->GetBlobFileSize(); + total_garbage_size += meta->GetGarbageBlobBytes(); } - oss << "Number of blob files: " << current_num_blob_files - << "\nTotal size of blob files: " << current_file_size - << "\nTotal size of garbage in blob files: " << current_garbage_size + + std::ostringstream oss; + + oss << "Number of blob files: " << blob_files.size() + << "\nTotal size of blob files: " << total_file_size + << "\nTotal size of garbage in blob files: " << total_garbage_size << '\n'; + value->append(oss.str()); + return true; } bool InternalStats::HandleTotalBlobFileSize(uint64_t* value, DBImpl* /*db*/, Version* /*version*/) { + assert(cfd_); + *value = cfd_->GetTotalBlobFileSize(); + return true; } bool InternalStats::HandleLiveBlobFileSize(uint64_t* value, DBImpl* /*db*/, Version* /*version*/) { - const auto* vstorage = cfd_->current()->storage_info(); + assert(cfd_); + + const auto* current = cfd_->current(); + assert(current); + + const auto* vstorage = current->storage_info(); + assert(vstorage); + *value = vstorage->GetTotalBlobFileSize(); + return true; } diff --git a/db/listener_test.cc b/db/listener_test.cc index ffe419c6e..1bad5c19f 100644 --- a/db/listener_test.cc +++ b/db/listener_test.cc @@ -1256,22 +1256,7 @@ class BlobDBJobLevelEventListenerTest : public EventListener { explicit BlobDBJobLevelEventListenerTest(EventListenerTest* test) : test_(test), call_count_(0) {} - std::shared_ptr GetBlobFileMetaData( - const VersionStorageInfo::BlobFiles& blob_files, - uint64_t blob_file_number) { - const auto it = blob_files.find(blob_file_number); - - if (it == blob_files.end()) { - return nullptr; - } - - const auto& meta = it->second; - assert(meta); - - return meta; - } - - const VersionStorageInfo::BlobFiles& GetBlobFiles() { + const VersionStorageInfo* GetVersionStorageInfo() const { VersionSet* const versions = test_->dbfull()->GetVersionSet(); assert(versions); @@ -1284,8 +1269,28 @@ class BlobDBJobLevelEventListenerTest : public EventListener { const VersionStorageInfo* const storage_info = current->storage_info(); EXPECT_NE(storage_info, nullptr); - const auto& blob_files = storage_info->GetBlobFiles(); - return blob_files; + return storage_info; + } + + void CheckBlobFileAdditions( + const std::vector& blob_file_addition_infos) const { + const auto* vstorage = GetVersionStorageInfo(); + + EXPECT_FALSE(blob_file_addition_infos.empty()); + + for (const auto& blob_file_addition_info : blob_file_addition_infos) { + const auto meta = vstorage->GetBlobFileMetaData( + blob_file_addition_info.blob_file_number); + + EXPECT_NE(meta, nullptr); + EXPECT_EQ(meta->GetBlobFileNumber(), + blob_file_addition_info.blob_file_number); + EXPECT_EQ(meta->GetTotalBlobBytes(), + blob_file_addition_info.total_blob_bytes); + EXPECT_EQ(meta->GetTotalBlobCount(), + blob_file_addition_info.total_blob_count); + EXPECT_FALSE(blob_file_addition_info.blob_file_path.empty()); + } } std::vector GetFlushedFiles() { @@ -1299,46 +1304,28 @@ class BlobDBJobLevelEventListenerTest : public EventListener { void OnFlushCompleted(DB* /*db*/, const FlushJobInfo& info) override { call_count_++; - EXPECT_FALSE(info.blob_file_addition_infos.empty()); - const auto& blob_files = GetBlobFiles(); + { std::lock_guard lock(mutex_); flushed_files_.push_back(info.file_path); } + EXPECT_EQ(info.blob_compression_type, kNoCompression); - for (const auto& blob_file_addition_info : info.blob_file_addition_infos) { - const auto meta = GetBlobFileMetaData( - blob_files, blob_file_addition_info.blob_file_number); - EXPECT_EQ(meta->GetBlobFileNumber(), - blob_file_addition_info.blob_file_number); - EXPECT_EQ(meta->GetTotalBlobBytes(), - blob_file_addition_info.total_blob_bytes); - EXPECT_EQ(meta->GetTotalBlobCount(), - blob_file_addition_info.total_blob_count); - EXPECT_FALSE(blob_file_addition_info.blob_file_path.empty()); - } + CheckBlobFileAdditions(info.blob_file_addition_infos); } - void OnCompactionCompleted(DB* /*db*/, const CompactionJobInfo& ci) override { + void OnCompactionCompleted(DB* /*db*/, + const CompactionJobInfo& info) override { call_count_++; - EXPECT_FALSE(ci.blob_file_garbage_infos.empty()); - const auto& blob_files = GetBlobFiles(); - EXPECT_EQ(ci.blob_compression_type, kNoCompression); - for (const auto& blob_file_addition_info : ci.blob_file_addition_infos) { - const auto meta = GetBlobFileMetaData( - blob_files, blob_file_addition_info.blob_file_number); - EXPECT_EQ(meta->GetBlobFileNumber(), - blob_file_addition_info.blob_file_number); - EXPECT_EQ(meta->GetTotalBlobBytes(), - blob_file_addition_info.total_blob_bytes); - EXPECT_EQ(meta->GetTotalBlobCount(), - blob_file_addition_info.total_blob_count); - EXPECT_FALSE(blob_file_addition_info.blob_file_path.empty()); - } + EXPECT_EQ(info.blob_compression_type, kNoCompression); + + CheckBlobFileAdditions(info.blob_file_addition_infos); + + EXPECT_FALSE(info.blob_file_garbage_infos.empty()); - for (const auto& blob_file_garbage_info : ci.blob_file_garbage_infos) { + for (const auto& blob_file_garbage_info : info.blob_file_garbage_infos) { EXPECT_GT(blob_file_garbage_info.blob_file_number, 0U); EXPECT_GT(blob_file_garbage_info.garbage_blob_count, 0U); EXPECT_GT(blob_file_garbage_info.garbage_blob_bytes, 0U); diff --git a/db/version_builder.cc b/db/version_builder.cc index 10908e323..e76985687 100644 --- a/db/version_builder.cc +++ b/db/version_builder.cc @@ -464,11 +464,11 @@ class VersionBuilder::Rep { // Make sure that all blob files in the version have non-garbage data and // the links between them and the table files are consistent. const auto& blob_files = vstorage->GetBlobFiles(); - for (const auto& pair : blob_files) { - const uint64_t blob_file_number = pair.first; - const auto& blob_file_meta = pair.second; + for (const auto& blob_file_meta : blob_files) { assert(blob_file_meta); + const uint64_t blob_file_number = blob_file_meta->GetBlobFileNumber(); + if (blob_file_meta->GetGarbageBlobCount() >= blob_file_meta->GetTotalBlobCount()) { std::ostringstream oss; @@ -543,15 +543,9 @@ class VersionBuilder::Rep { } assert(base_vstorage_); + const auto meta = base_vstorage_->GetBlobFileMetaData(blob_file_number); - const auto& base_blob_files = base_vstorage_->GetBlobFiles(); - - auto base_it = base_blob_files.find(blob_file_number); - if (base_it != base_blob_files.end()) { - return true; - } - - return false; + return !!meta; } MutableBlobFileMetaData* GetOrCreateMutableBlobFileMetaData( @@ -562,16 +556,11 @@ class VersionBuilder::Rep { } assert(base_vstorage_); + const auto meta = base_vstorage_->GetBlobFileMetaData(blob_file_number); - const auto& base_blob_files = base_vstorage_->GetBlobFiles(); - - auto base_it = base_blob_files.find(blob_file_number); - if (base_it != base_blob_files.end()) { - assert(base_it->second); - + if (meta) { mutable_it = mutable_blob_file_metas_ - .emplace(blob_file_number, - MutableBlobFileMetaData(base_it->second)) + .emplace(blob_file_number, MutableBlobFileMetaData(meta)) .first; return &mutable_it->second; } @@ -862,20 +851,20 @@ class VersionBuilder::Rep { ProcessBoth process_both) const { assert(base_vstorage_); - const auto& base_blob_files = base_vstorage_->GetBlobFiles(); - auto base_it = base_blob_files.lower_bound(first_blob_file); - const auto base_it_end = base_blob_files.end(); + auto base_it = base_vstorage_->GetBlobFileMetaDataLB(first_blob_file); + const auto base_it_end = base_vstorage_->GetBlobFiles().end(); auto mutable_it = mutable_blob_file_metas_.lower_bound(first_blob_file); const auto mutable_it_end = mutable_blob_file_metas_.end(); while (base_it != base_it_end && mutable_it != mutable_it_end) { - const uint64_t base_blob_file_number = base_it->first; + const auto& base_meta = *base_it; + assert(base_meta); + + const uint64_t base_blob_file_number = base_meta->GetBlobFileNumber(); const uint64_t mutable_blob_file_number = mutable_it->first; if (base_blob_file_number < mutable_blob_file_number) { - const auto& base_meta = base_it->second; - if (!process_base(base_meta)) { return; } @@ -892,7 +881,6 @@ class VersionBuilder::Rep { } else { assert(base_blob_file_number == mutable_blob_file_number); - const auto& base_meta = base_it->second; const auto& mutable_meta = mutable_it->second; if (!process_both(base_meta, mutable_meta)) { @@ -905,7 +893,7 @@ class VersionBuilder::Rep { } while (base_it != base_it_end) { - const auto& base_meta = base_it->second; + const auto& base_meta = *base_it; if (!process_base(base_meta)) { return; @@ -1007,6 +995,10 @@ class VersionBuilder::Rep { void SaveBlobFilesTo(VersionStorageInfo* vstorage) const { assert(vstorage); + assert(base_vstorage_); + vstorage->ReserveBlob(base_vstorage_->GetBlobFiles().size() + + mutable_blob_file_metas_.size()); + const uint64_t oldest_blob_file_with_linked_ssts = GetMinOldestBlobFileNumber(); diff --git a/db/version_builder_test.cc b/db/version_builder_test.cc index 62ed05326..82eb25684 100644 --- a/db/version_builder_test.cc +++ b/db/version_builder_test.cc @@ -137,21 +137,6 @@ class VersionBuilderTest : public testing::Test { kDisableUserTimestamp, kDisableUserTimestamp); } - static std::shared_ptr GetBlobFileMetaData( - const VersionStorageInfo::BlobFiles& blob_files, - uint64_t blob_file_number) { - const auto it = blob_files.find(blob_file_number); - - if (it == blob_files.end()) { - return std::shared_ptr(); - } - - const auto& meta = it->second; - assert(meta); - - return meta; - } - void UpdateVersionStorageInfo(VersionStorageInfo* vstorage) { assert(vstorage); @@ -743,7 +728,7 @@ TEST_F(VersionBuilderTest, ApplyBlobFileAddition) { const auto& new_blob_files = new_vstorage.GetBlobFiles(); ASSERT_EQ(new_blob_files.size(), 1); - const auto new_meta = GetBlobFileMetaData(new_blob_files, blob_file_number); + const auto new_meta = new_vstorage.GetBlobFileMetaData(blob_file_number); ASSERT_NE(new_meta, nullptr); ASSERT_EQ(new_meta->GetBlobFileNumber(), blob_file_number); @@ -845,8 +830,7 @@ TEST_F(VersionBuilderTest, ApplyBlobFileGarbageFileInBase) { checksum_value, BlobFileMetaData::LinkedSsts{table_file_number}, garbage_blob_count, garbage_blob_bytes); - const auto meta = - GetBlobFileMetaData(vstorage_.GetBlobFiles(), blob_file_number); + const auto meta = vstorage_.GetBlobFileMetaData(blob_file_number); ASSERT_NE(meta, nullptr); // Add dummy table file to ensure the blob file is referenced. @@ -883,7 +867,7 @@ TEST_F(VersionBuilderTest, ApplyBlobFileGarbageFileInBase) { const auto& new_blob_files = new_vstorage.GetBlobFiles(); ASSERT_EQ(new_blob_files.size(), 1); - const auto new_meta = GetBlobFileMetaData(new_blob_files, blob_file_number); + const auto new_meta = new_vstorage.GetBlobFileMetaData(blob_file_number); ASSERT_NE(new_meta, nullptr); ASSERT_EQ(new_meta->GetSharedMeta(), meta->GetSharedMeta()); @@ -955,7 +939,7 @@ TEST_F(VersionBuilderTest, ApplyBlobFileGarbageFileAdditionApplied) { const auto& new_blob_files = new_vstorage.GetBlobFiles(); ASSERT_EQ(new_blob_files.size(), 1); - const auto new_meta = GetBlobFileMetaData(new_blob_files, blob_file_number); + const auto new_meta = new_vstorage.GetBlobFileMetaData(blob_file_number); ASSERT_NE(new_meta, nullptr); ASSERT_EQ(new_meta->GetBlobFileNumber(), blob_file_number); @@ -1065,9 +1049,9 @@ TEST_F(VersionBuilderTest, BlobFileGarbageOverflow) { TEST_F(VersionBuilderTest, SaveBlobFilesTo) { // Add three blob files to base version. - for (uint64_t i = 3; i >= 1; --i) { - const uint64_t table_file_number = i; - const uint64_t blob_file_number = i; + for (uint64_t i = 1; i <= 3; ++i) { + const uint64_t table_file_number = 2 * i; + const uint64_t blob_file_number = 2 * i + 1; const uint64_t total_blob_count = i * 1000; const uint64_t total_blob_bytes = i * 1000000; const uint64_t garbage_blob_count = i * 100; @@ -1078,8 +1062,15 @@ TEST_F(VersionBuilderTest, SaveBlobFilesTo) { /* checksum_value */ std::string(), BlobFileMetaData::LinkedSsts{table_file_number}, garbage_blob_count, garbage_blob_bytes); + } + + // Add dummy table files to ensure the blob files are referenced. + // Note: files are added to L0, so they have to be added in reverse order + // (newest first). + for (uint64_t i = 3; i >= 1; --i) { + const uint64_t table_file_number = 2 * i; + const uint64_t blob_file_number = 2 * i + 1; - // Add dummy table file to ensure the blob file is referenced. AddDummyFile(table_file_number, blob_file_number); } @@ -1099,16 +1090,16 @@ TEST_F(VersionBuilderTest, SaveBlobFilesTo) { // blob file is all garbage after the edit and will not be part of the new // version. The corresponding dummy table file is also removed for // consistency. - edit.AddBlobFileGarbage(/* blob_file_number */ 2, + edit.AddBlobFileGarbage(/* blob_file_number */ 5, /* garbage_blob_count */ 200, /* garbage_blob_bytes */ 100000); - edit.AddBlobFileGarbage(/* blob_file_number */ 3, + edit.AddBlobFileGarbage(/* blob_file_number */ 7, /* garbage_blob_count */ 2700, /* garbage_blob_bytes */ 2940000); - edit.DeleteFile(/* level */ 0, /* file_number */ 3); + edit.DeleteFile(/* level */ 0, /* file_number */ 6); // Add a fourth blob file. - edit.AddBlobFile(/* blob_file_number */ 4, /* total_blob_count */ 4000, + edit.AddBlobFile(/* blob_file_number */ 9, /* total_blob_count */ 4000, /* total_blob_bytes */ 4000000, /* checksum_method */ std::string(), /* checksum_value */ std::string()); @@ -1127,32 +1118,32 @@ TEST_F(VersionBuilderTest, SaveBlobFilesTo) { const auto& new_blob_files = new_vstorage.GetBlobFiles(); ASSERT_EQ(new_blob_files.size(), 3); - const auto meta1 = GetBlobFileMetaData(new_blob_files, 1); + const auto meta3 = new_vstorage.GetBlobFileMetaData(/* blob_file_number */ 3); - ASSERT_NE(meta1, nullptr); - ASSERT_EQ(meta1->GetBlobFileNumber(), 1); - ASSERT_EQ(meta1->GetTotalBlobCount(), 1000); - ASSERT_EQ(meta1->GetTotalBlobBytes(), 1000000); - ASSERT_EQ(meta1->GetGarbageBlobCount(), 100); - ASSERT_EQ(meta1->GetGarbageBlobBytes(), 20000); + ASSERT_NE(meta3, nullptr); + ASSERT_EQ(meta3->GetBlobFileNumber(), 3); + ASSERT_EQ(meta3->GetTotalBlobCount(), 1000); + ASSERT_EQ(meta3->GetTotalBlobBytes(), 1000000); + ASSERT_EQ(meta3->GetGarbageBlobCount(), 100); + ASSERT_EQ(meta3->GetGarbageBlobBytes(), 20000); - const auto meta2 = GetBlobFileMetaData(new_blob_files, 2); + const auto meta5 = new_vstorage.GetBlobFileMetaData(/* blob_file_number */ 5); - ASSERT_NE(meta2, nullptr); - ASSERT_EQ(meta2->GetBlobFileNumber(), 2); - ASSERT_EQ(meta2->GetTotalBlobCount(), 2000); - ASSERT_EQ(meta2->GetTotalBlobBytes(), 2000000); - ASSERT_EQ(meta2->GetGarbageBlobCount(), 400); - ASSERT_EQ(meta2->GetGarbageBlobBytes(), 140000); + ASSERT_NE(meta5, nullptr); + ASSERT_EQ(meta5->GetBlobFileNumber(), 5); + ASSERT_EQ(meta5->GetTotalBlobCount(), 2000); + ASSERT_EQ(meta5->GetTotalBlobBytes(), 2000000); + ASSERT_EQ(meta5->GetGarbageBlobCount(), 400); + ASSERT_EQ(meta5->GetGarbageBlobBytes(), 140000); - const auto meta4 = GetBlobFileMetaData(new_blob_files, 4); + const auto meta9 = new_vstorage.GetBlobFileMetaData(/* blob_file_number */ 9); - ASSERT_NE(meta4, nullptr); - ASSERT_EQ(meta4->GetBlobFileNumber(), 4); - ASSERT_EQ(meta4->GetTotalBlobCount(), 4000); - ASSERT_EQ(meta4->GetTotalBlobBytes(), 4000000); - ASSERT_EQ(meta4->GetGarbageBlobCount(), 0); - ASSERT_EQ(meta4->GetGarbageBlobBytes(), 0); + ASSERT_NE(meta9, nullptr); + ASSERT_EQ(meta9->GetBlobFileNumber(), 9); + ASSERT_EQ(meta9->GetTotalBlobCount(), 4000); + ASSERT_EQ(meta9->GetTotalBlobBytes(), 4000000); + ASSERT_EQ(meta9->GetGarbageBlobCount(), 0); + ASSERT_EQ(meta9->GetGarbageBlobBytes(), 0); // Delete the first table file, which makes the first blob file obsolete // since it's at the head and unreferenced. @@ -1160,7 +1151,7 @@ TEST_F(VersionBuilderTest, SaveBlobFilesTo) { &new_vstorage, version_set); VersionEdit second_edit; - second_edit.DeleteFile(/* level */ 0, /* file_number */ 1); + second_edit.DeleteFile(/* level */ 0, /* file_number */ 2); ASSERT_OK(second_builder.Apply(&second_edit)); @@ -1175,9 +1166,10 @@ TEST_F(VersionBuilderTest, SaveBlobFilesTo) { const auto& newer_blob_files = newer_vstorage.GetBlobFiles(); ASSERT_EQ(newer_blob_files.size(), 2); - const auto newer_meta1 = GetBlobFileMetaData(newer_blob_files, 1); + const auto newer_meta3 = + newer_vstorage.GetBlobFileMetaData(/* blob_file_number */ 3); - ASSERT_EQ(newer_meta1, nullptr); + ASSERT_EQ(newer_meta3, nullptr); UnrefFilesInVersion(&newer_vstorage); UnrefFilesInVersion(&new_vstorage); @@ -1259,7 +1251,7 @@ TEST_F(VersionBuilderTest, SaveBlobFilesToConcurrentJobs) { ASSERT_EQ(new_blob_files.size(), 2); const auto base_meta = - GetBlobFileMetaData(new_blob_files, base_blob_file_number); + new_vstorage.GetBlobFileMetaData(base_blob_file_number); ASSERT_NE(base_meta, nullptr); ASSERT_EQ(base_meta->GetBlobFileNumber(), base_blob_file_number); @@ -1270,7 +1262,7 @@ TEST_F(VersionBuilderTest, SaveBlobFilesToConcurrentJobs) { ASSERT_EQ(base_meta->GetChecksumMethod(), checksum_method); ASSERT_EQ(base_meta->GetChecksumValue(), checksum_value); - const auto added_meta = GetBlobFileMetaData(new_blob_files, blob_file_number); + const auto added_meta = new_vstorage.GetBlobFileMetaData(blob_file_number); ASSERT_NE(added_meta, nullptr); ASSERT_EQ(added_meta->GetBlobFileNumber(), blob_file_number); @@ -1542,7 +1534,7 @@ TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) { for (size_t i = 0; i < 5; ++i) { const auto meta = - GetBlobFileMetaData(blob_files, /* blob_file_number */ i + 1); + vstorage_.GetBlobFileMetaData(/* blob_file_number */ i + 1); ASSERT_NE(meta, nullptr); ASSERT_EQ(meta->GetLinkedSsts(), expected_linked_ssts[i]); } @@ -1582,8 +1574,7 @@ TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) { // Trivially move a file that references a blob file. Note that we save // the original BlobFileMetaData object so we can check that no new object // gets created. - auto meta3 = - GetBlobFileMetaData(vstorage_.GetBlobFiles(), /* blob_file_number */ 3); + auto meta3 = vstorage_.GetBlobFileMetaData(/* blob_file_number */ 3); edit.DeleteFile(/* level */ 1, /* file_number */ 3); edit.AddFile(/* level */ 2, /* file_number */ 3, /* path_id */ 0, @@ -1655,14 +1646,15 @@ TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) { for (size_t i = 0; i < 5; ++i) { const auto meta = - GetBlobFileMetaData(blob_files, /* blob_file_number */ i + 1); + new_vstorage.GetBlobFileMetaData(/* blob_file_number */ i + 1); ASSERT_NE(meta, nullptr); ASSERT_EQ(meta->GetLinkedSsts(), expected_linked_ssts[i]); } // Make sure that no new BlobFileMetaData got created for the blob file // affected by the trivial move. - ASSERT_EQ(GetBlobFileMetaData(blob_files, /* blob_file_number */ 3), meta3); + ASSERT_EQ(new_vstorage.GetBlobFileMetaData(/* blob_file_number */ 3), + meta3); } UnrefFilesInVersion(&new_vstorage); diff --git a/db/version_set.cc b/db/version_set.cc index c34fa385d..942b3d5f4 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1491,15 +1491,16 @@ void Version::GetColumnFamilyMetaData(ColumnFamilyMetaData* cf_meta) { level, level_size, std::move(files)); cf_meta->size += level_size; } - for (const auto& iter : vstorage->GetBlobFiles()) { - const auto meta = iter.second.get(); + for (const auto& meta : vstorage->GetBlobFiles()) { + assert(meta); + cf_meta->blob_files.emplace_back( meta->GetBlobFileNumber(), BlobFileName("", meta->GetBlobFileNumber()), ioptions->cf_paths.front().path, meta->GetBlobFileSize(), meta->GetTotalBlobCount(), meta->GetTotalBlobBytes(), meta->GetGarbageBlobCount(), meta->GetGarbageBlobBytes(), meta->GetChecksumMethod(), meta->GetChecksumValue()); - cf_meta->blob_file_count++; + ++cf_meta->blob_file_count; cf_meta->blob_file_size += meta->GetBlobFileSize(); } } @@ -1821,12 +1822,9 @@ Status Version::GetBlob(const ReadOptions& read_options, const Slice& user_key, return Status::Corruption("Unexpected TTL/inlined blob index"); } - const auto& blob_files = storage_info_.GetBlobFiles(); - const uint64_t blob_file_number = blob_index.file_number(); - const auto it = blob_files.find(blob_file_number); - if (it == blob_files.end()) { + if (!storage_info_.GetBlobFileMetaData(blob_file_number)) { return Status::Corruption("Invalid blob file number"); } @@ -1870,10 +1868,11 @@ void Version::MultiGetBlob( assert(!blob_rqs.empty()); Status status; - const auto& blob_files = storage_info_.GetBlobFiles(); + for (auto& elem : blob_rqs) { - uint64_t blob_file_number = elem.first; - if (blob_files.find(blob_file_number) == blob_files.end()) { + const uint64_t blob_file_number = elem.first; + + if (!storage_info_.GetBlobFileMetaData(blob_file_number)) { auto& blobs_in_file = elem.second; for (const auto& blob : blobs_in_file) { const KeyContext& key_context = blob.second; @@ -1881,6 +1880,7 @@ void Version::MultiGetBlob( } continue; } + CacheHandleGuard blob_file_reader; assert(blob_file_cache_); status = blob_file_cache_->GetBlobFileReader(blob_file_number, @@ -2972,9 +2972,7 @@ void VersionStorageInfo::ComputeFilesMarkedForForcedBlobGC( // blob_garbage_collection_force_threshold and the entire batch has to be // eligible for GC according to blob_garbage_collection_age_cutoff in order // for us to schedule any compactions. - const auto oldest_it = blob_files_.begin(); - - const auto& oldest_meta = oldest_it->second; + const auto& oldest_meta = blob_files_.front(); assert(oldest_meta); const auto& linked_ssts = oldest_meta->GetLinkedSsts(); @@ -2984,9 +2982,8 @@ void VersionStorageInfo::ComputeFilesMarkedForForcedBlobGC( uint64_t sum_total_blob_bytes = oldest_meta->GetTotalBlobBytes(); uint64_t sum_garbage_blob_bytes = oldest_meta->GetGarbageBlobBytes(); - auto it = oldest_it; - for (++it; it != blob_files_.end(); ++it) { - const auto& meta = it->second; + while (true) { + const auto& meta = blob_files_[count]; assert(meta); if (!meta->GetLinkedSsts().empty()) { @@ -3053,12 +3050,21 @@ void VersionStorageInfo::AddBlobFile( std::shared_ptr blob_file_meta) { assert(blob_file_meta); - const uint64_t blob_file_number = blob_file_meta->GetBlobFileNumber(); + assert(blob_files_.empty() || + (blob_files_.back() && blob_files_.back()->GetBlobFileNumber() < + blob_file_meta->GetBlobFileNumber())); - auto it = blob_files_.lower_bound(blob_file_number); - assert(it == blob_files_.end() || it->first != blob_file_number); + blob_files_.emplace_back(std::move(blob_file_meta)); +} - blob_files_.emplace_hint(it, blob_file_number, std::move(blob_file_meta)); +VersionStorageInfo::BlobFiles::const_iterator +VersionStorageInfo::GetBlobFileMetaDataLB(uint64_t blob_file_number) const { + return std::lower_bound( + blob_files_.begin(), blob_files_.end(), blob_file_number, + [](const std::shared_ptr& lhs, uint64_t rhs) { + assert(lhs); + return lhs->GetBlobFileNumber() < rhs; + }); } void VersionStorageInfo::SetFinalized() { @@ -3845,14 +3851,16 @@ uint64_t VersionStorageInfo::EstimateLiveDataSize() const { } } } + // For BlobDB, the result also includes the exact value of live bytes in the // blob files of the version. - const auto& blobFiles = GetBlobFiles(); - for (const auto& pair : blobFiles) { - const auto& meta = pair.second; + for (const auto& meta : blob_files_) { + assert(meta); + size += meta->GetTotalBlobBytes(); size -= meta->GetGarbageBlobBytes(); } + return size; } @@ -3902,8 +3910,7 @@ void Version::AddLiveFiles(std::vector* live_table_files, } const auto& blob_files = storage_info_.GetBlobFiles(); - for (const auto& pair : blob_files) { - const auto& meta = pair.second; + for (const auto& meta : blob_files) { assert(meta); live_blob_files->emplace_back(meta->GetBlobFileNumber()); @@ -3960,8 +3967,7 @@ std::string Version::DebugString(bool hex, bool print_stats) const { r.append("--- blob files --- version# "); AppendNumberTo(&r, version_number_); r.append(" ---\n"); - for (const auto& pair : blob_files) { - const auto& blob_file_meta = pair.second; + for (const auto& blob_file_meta : blob_files) { assert(blob_file_meta); r.append(blob_file_meta->DebugString()); @@ -5251,13 +5257,25 @@ Status VersionSet::GetLiveFilesChecksumInfo(FileChecksumList* checksum_list) { checksum_list->reset(); for (auto cfd : *column_family_set_) { + assert(cfd); + if (cfd->IsDropped() || !cfd->initialized()) { continue; } + + const auto* current = cfd->current(); + assert(current); + + const auto* vstorage = current->storage_info(); + assert(vstorage); + /* SST files */ for (int level = 0; level < cfd->NumberLevels(); level++) { - for (const auto& file : - cfd->current()->storage_info()->LevelFiles(level)) { + const auto& level_files = vstorage->LevelFiles(level); + + for (const auto& file : level_files) { + assert(file); + s = checksum_list->InsertOneFileChecksum(file->fd.GetNumber(), file->file_checksum, file->file_checksum_func_name); @@ -5268,13 +5286,9 @@ Status VersionSet::GetLiveFilesChecksumInfo(FileChecksumList* checksum_list) { } /* Blob files */ - const auto& blob_files = cfd->current()->storage_info()->GetBlobFiles(); - for (const auto& pair : blob_files) { - const uint64_t blob_file_number = pair.first; - const auto& meta = pair.second; - + const auto& blob_files = vstorage->GetBlobFiles(); + for (const auto& meta : blob_files) { assert(meta); - assert(blob_file_number == meta->GetBlobFileNumber()); std::string checksum_value = meta->GetChecksumValue(); std::string checksum_method = meta->GetChecksumMethod(); @@ -5284,8 +5298,8 @@ Status VersionSet::GetLiveFilesChecksumInfo(FileChecksumList* checksum_list) { checksum_method = kUnknownFileChecksumFuncName; } - s = checksum_list->InsertOneFileChecksum(blob_file_number, checksum_value, - checksum_method); + s = checksum_list->InsertOneFileChecksum(meta->GetBlobFileNumber(), + checksum_value, checksum_method); if (!s.ok()) { return s; } @@ -5421,12 +5435,18 @@ Status VersionSet::WriteCurrentStateToManifest( VersionEdit edit; edit.SetColumnFamily(cfd->GetID()); - assert(cfd->current()); - assert(cfd->current()->storage_info()); + const auto* current = cfd->current(); + assert(current); + + const auto* vstorage = current->storage_info(); + assert(vstorage); for (int level = 0; level < cfd->NumberLevels(); level++) { - for (const auto& f : - cfd->current()->storage_info()->LevelFiles(level)) { + const auto& level_files = vstorage->LevelFiles(level); + + for (const auto& f : level_files) { + assert(f); + edit.AddFile( level, f->fd.GetNumber(), f->fd.GetPathId(), f->fd.GetFileSize(), f->smallest, f->largest, f->fd.smallest_seqno, @@ -5437,13 +5457,11 @@ Status VersionSet::WriteCurrentStateToManifest( } } - const auto& blob_files = cfd->current()->storage_info()->GetBlobFiles(); - for (const auto& pair : blob_files) { - const uint64_t blob_file_number = pair.first; - const auto& meta = pair.second; - + const auto& blob_files = vstorage->GetBlobFiles(); + for (const auto& meta : blob_files) { assert(meta); - assert(blob_file_number == meta->GetBlobFileNumber()); + + const uint64_t blob_file_number = meta->GetBlobFileNumber(); edit.AddBlobFile(blob_file_number, meta->GetTotalBlobCount(), meta->GetTotalBlobBytes(), meta->GetChecksumMethod(), @@ -5970,21 +5988,30 @@ uint64_t VersionSet::GetTotalSstFilesSize(Version* dummy_versions) { uint64_t VersionSet::GetTotalBlobFileSize(Version* dummy_versions) { std::unordered_set unique_blob_files; - uint64_t all_v_blob_file_size = 0; + + uint64_t all_versions_blob_file_size = 0; + for (auto* v = dummy_versions->next_; v != dummy_versions; v = v->next_) { // iterate all the versions - auto* vstorage = v->storage_info(); + const auto* vstorage = v->storage_info(); + assert(vstorage); + const auto& blob_files = vstorage->GetBlobFiles(); - for (const auto& pair : blob_files) { - if (unique_blob_files.find(pair.first) == unique_blob_files.end()) { + + for (const auto& meta : blob_files) { + assert(meta); + + const uint64_t blob_file_number = meta->GetBlobFileNumber(); + + if (unique_blob_files.find(blob_file_number) == unique_blob_files.end()) { // find Blob file that has not been counted - unique_blob_files.insert(pair.first); - const auto& meta = pair.second; - all_v_blob_file_size += meta->GetBlobFileSize(); + unique_blob_files.insert(blob_file_number); + all_versions_blob_file_size += meta->GetBlobFileSize(); } } } - return all_v_blob_file_size; + + return all_versions_blob_file_size; } Status VersionSet::VerifyFileMetadata(const std::string& fpath, diff --git a/db/version_set.h b/db/version_set.h index ce55e755d..14026052e 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -127,6 +127,8 @@ class VersionStorageInfo { void AddFile(int level, FileMetaData* f); + void ReserveBlob(size_t size) { blob_files_.reserve(size); } + void AddBlobFile(std::shared_ptr blob_file_meta); void PrepareForVersionAppend(const ImmutableOptions& immutable_options, @@ -264,7 +266,7 @@ class VersionStorageInfo { void set_l0_delay_trigger_count(int v) { l0_delay_trigger_count_ = v; } - // REQUIRES: This version has been saved (see VersionSet::SaveTo) + // REQUIRES: This version has been saved (see VersionBuilder::SaveTo) int NumLevelFiles(int level) const { assert(finalized_); return static_cast(files_[level].size()); @@ -273,7 +275,7 @@ class VersionStorageInfo { // Return the combined file size of all files at the specified level. uint64_t NumLevelBytes(int level) const; - // REQUIRES: This version has been saved (see VersionSet::SaveTo) + // REQUIRES: This version has been saved (see VersionBuilder::SaveTo) const std::vector& LevelFiles(int level) const { return files_[level]; } @@ -330,15 +332,34 @@ class VersionStorageInfo { return files_[location.GetLevel()][location.GetPosition()]; } - // REQUIRES: This version has been saved (see VersionSet::SaveTo) - using BlobFiles = std::map>; + // REQUIRES: This version has been saved (see VersionBuilder::SaveTo) + using BlobFiles = std::vector>; const BlobFiles& GetBlobFiles() const { return blob_files_; } + // REQUIRES: This version has been saved (see VersionBuilder::SaveTo) + BlobFiles::const_iterator GetBlobFileMetaDataLB( + uint64_t blob_file_number) const; + + // REQUIRES: This version has been saved (see VersionBuilder::SaveTo) + std::shared_ptr GetBlobFileMetaData( + uint64_t blob_file_number) const { + const auto it = GetBlobFileMetaDataLB(blob_file_number); + + assert(it == blob_files_.end() || *it); + + if (it != blob_files_.end() && + (*it)->GetBlobFileNumber() == blob_file_number) { + return *it; + } + + return std::shared_ptr(); + } + + // REQUIRES: This version has been saved (see VersionBuilder::SaveTo) uint64_t GetTotalBlobFileSize() const { uint64_t total_blob_bytes = 0; - for (const auto& pair : blob_files_) { - const auto& meta = pair.second; + for (const auto& meta : blob_files_) { assert(meta); total_blob_bytes += meta->GetBlobFileSize(); @@ -546,7 +567,7 @@ class VersionStorageInfo { using FileLocations = std::unordered_map; FileLocations file_locations_; - // Map of blob files in version by number. + // Vector of blob files in version sorted by blob file number. BlobFiles blob_files_; // Level that L0 data should be compacted to. All levels < base_level_ should