diff --git a/HISTORY.md b/HISTORY.md index f50320d41..3659b8532 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -49,6 +49,7 @@ ### Performance Improvements * Avoid calling malloc_usable_size() in LRU Cache's mutex. +* Reduce DB mutex holding time when finding obsolete files to delete. When a file is trivial moved to another level, the internal files will be referenced twice internally and sometimes opened twice too. If a deletion candidate file is not the last reference, we need to destroy the reference and close the file but not deleting the file. Right now we determine it by building a set of all live files. With the improvement, we check the file against all live LSM-tree versions instead. ## 7.2.0 (04/15/2022) ### Bug Fixes diff --git a/db/db_impl/db_impl_files.cc b/db/db_impl/db_impl_files.cc index 86a7808b2..cc1b1a362 100644 --- a/db/db_impl/db_impl_files.cc +++ b/db/db_impl/db_impl_files.cc @@ -166,8 +166,8 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force, job_context->log_number = MinLogNumberToKeep(); job_context->prev_log_number = versions_->prev_log_number(); - versions_->AddLiveFiles(&job_context->sst_live, &job_context->blob_live); if (doing_the_full_scan) { + versions_->AddLiveFiles(&job_context->sst_live, &job_context->blob_live); InfoLogPrefix info_log_prefix(!immutable_db_options_.db_log_dir.empty(), dbname_); std::set paths; @@ -242,6 +242,14 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force, log_file, immutable_db_options_.db_log_dir); } } + } else { + // Instead of filling ob_context->sst_live and job_context->blob_live, + // directly remove files that show up in any Version. This is because + // candidate files tend to be a small percentage of all files, so it is + // usually cheaper to check them against every version, compared to + // building a map for all files. + versions_->RemoveLiveFiles(job_context->sst_delete_files, + job_context->blob_delete_files); } // logs_ is empty when called during recovery, in which case there can't yet @@ -395,8 +403,10 @@ void DBImpl::PurgeObsoleteFiles(JobContext& state, bool schedule_only) { state.manifest_delete_files.size()); // We may ignore the dbname when generating the file names. for (auto& file : state.sst_delete_files) { - candidate_files.emplace_back( - MakeTableFileName(file.metadata->fd.GetNumber()), file.path); + if (!file.only_delete_metadata) { + candidate_files.emplace_back( + MakeTableFileName(file.metadata->fd.GetNumber()), file.path); + } if (file.metadata->table_reader_handle) { table_cache_->Release(file.metadata->table_reader_handle); } diff --git a/db/version_set.cc b/db/version_set.cc index 223bdba6a..bbe450a72 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -3941,6 +3941,26 @@ void Version::AddLiveFiles(std::vector* live_table_files, } } +void Version::RemoveLiveFiles( + std::vector& sst_delete_candidates, + std::vector& blob_delete_candidates) const { + for (ObsoleteFileInfo& fi : sst_delete_candidates) { + if (!fi.only_delete_metadata && + storage_info()->GetFileLocation(fi.metadata->fd.GetNumber()) != + VersionStorageInfo::FileLocation::Invalid()) { + fi.only_delete_metadata = true; + } + } + + blob_delete_candidates.erase( + std::remove_if( + blob_delete_candidates.begin(), blob_delete_candidates.end(), + [this](ObsoleteBlobFileInfo& x) { + return storage_info()->GetBlobFileMetaData(x.GetBlobFileNumber()); + }), + blob_delete_candidates.end()); +} + std::string Version::DebugString(bool hex, bool print_stats) const { std::string r; for (int level = 0; level < storage_info_.num_levels_; level++) { @@ -5745,6 +5765,38 @@ uint64_t VersionSet::ApproximateSize(Version* v, const FdWithKeyRange& f, v->GetMutableCFOptions().prefix_extractor); } +void VersionSet::RemoveLiveFiles( + std::vector& sst_delete_candidates, + std::vector& blob_delete_candidates) const { + assert(column_family_set_); + for (auto cfd : *column_family_set_) { + assert(cfd); + if (!cfd->initialized()) { + continue; + } + + auto* current = cfd->current(); + bool found_current = false; + + Version* const dummy_versions = cfd->dummy_versions(); + assert(dummy_versions); + + for (Version* v = dummy_versions->next_; v != dummy_versions; + v = v->next_) { + v->RemoveLiveFiles(sst_delete_candidates, blob_delete_candidates); + if (v == current) { + found_current = true; + } + } + + if (!found_current && current != nullptr) { + // Should never happen unless it is a bug. + assert(false); + current->RemoveLiveFiles(sst_delete_candidates, blob_delete_candidates); + } + } +} + void VersionSet::AddLiveFiles(std::vector* live_table_files, std::vector* live_blob_files) const { assert(live_table_files); diff --git a/db/version_set.h b/db/version_set.h index d2fd8ddb5..8f0073a89 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -688,6 +688,53 @@ class VersionStorageInfo { friend class VersionSet; }; +struct ObsoleteFileInfo { + FileMetaData* metadata; + std::string path; + // If true, the FileMataData should be destroyed but the file should + // not be deleted. This is because another FileMetaData still references + // the file, usually because the file is trivial moved so two FileMetadata + // is managing the file. + bool only_delete_metadata = false; + + ObsoleteFileInfo() noexcept + : metadata(nullptr), only_delete_metadata(false) {} + ObsoleteFileInfo(FileMetaData* f, const std::string& file_path) + : metadata(f), path(file_path), only_delete_metadata(false) {} + + ObsoleteFileInfo(const ObsoleteFileInfo&) = delete; + ObsoleteFileInfo& operator=(const ObsoleteFileInfo&) = delete; + + ObsoleteFileInfo(ObsoleteFileInfo&& rhs) noexcept : ObsoleteFileInfo() { + *this = std::move(rhs); + } + + ObsoleteFileInfo& operator=(ObsoleteFileInfo&& rhs) noexcept { + path = std::move(rhs.path); + metadata = rhs.metadata; + rhs.metadata = nullptr; + + return *this; + } + void DeleteMetadata() { + delete metadata; + metadata = nullptr; + } +}; + +class ObsoleteBlobFileInfo { + public: + ObsoleteBlobFileInfo(uint64_t blob_file_number, std::string path) + : blob_file_number_(blob_file_number), path_(std::move(path)) {} + + uint64_t GetBlobFileNumber() const { return blob_file_number_; } + const std::string& GetPath() const { return path_; } + + private: + uint64_t blob_file_number_; + std::string path_; +}; + using MultiGetRange = MultiGetContext::Range; // A column family's version consists of the table and blob files owned by // the column family at a certain point in time. @@ -789,6 +836,11 @@ class Version { void AddLiveFiles(std::vector* live_table_files, std::vector* live_blob_files) const; + // Remove live files that are in the delete candidate lists. + void RemoveLiveFiles( + std::vector& sst_delete_candidates, + std::vector& blob_delete_candidates) const; + // Return a human readable string that describes this version's contents. std::string DebugString(bool hex = false, bool print_stats = false) const; @@ -927,49 +979,6 @@ class Version { void operator=(const Version&) = delete; }; -struct ObsoleteFileInfo { - FileMetaData* metadata; - std::string path; - - ObsoleteFileInfo() noexcept : metadata(nullptr) {} - ObsoleteFileInfo(FileMetaData* f, const std::string& file_path) - : metadata(f), path(file_path) {} - - ObsoleteFileInfo(const ObsoleteFileInfo&) = delete; - ObsoleteFileInfo& operator=(const ObsoleteFileInfo&) = delete; - - ObsoleteFileInfo(ObsoleteFileInfo&& rhs) noexcept : - ObsoleteFileInfo() { - *this = std::move(rhs); - } - - ObsoleteFileInfo& operator=(ObsoleteFileInfo&& rhs) noexcept { - path = std::move(rhs.path); - metadata = rhs.metadata; - rhs.metadata = nullptr; - - return *this; - } - - void DeleteMetadata() { - delete metadata; - metadata = nullptr; - } -}; - -class ObsoleteBlobFileInfo { - public: - ObsoleteBlobFileInfo(uint64_t blob_file_number, std::string path) - : blob_file_number_(blob_file_number), path_(std::move(path)) {} - - uint64_t GetBlobFileNumber() const { return blob_file_number_; } - const std::string& GetPath() const { return path_; } - - private: - uint64_t blob_file_number_; - std::string path_; -}; - class BaseReferencedVersionBuilder; class AtomicGroupReadBuffer { @@ -1287,6 +1296,11 @@ class VersionSet { void AddLiveFiles(std::vector* live_table_files, std::vector* live_blob_files) const; + // Remove live files that are in the delete candidate lists. + void RemoveLiveFiles( + std::vector& sst_delete_candidates, + std::vector& blob_delete_candidates) const; + // Return the approximate size of data to be scanned for range [start, end) // in levels [start_level, end_level). If end_level == -1 it will search // through all non-empty levels