From 081722780bdd338565ac9630284e9e7503d47880 Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Wed, 3 Nov 2021 11:51:22 -0700 Subject: [PATCH] Refactor the detailed consistency checks and the SST saving logic in VersionBuilder (#9099) Summary: The patch refactors the parts of `VersionBuilder` that deal with SST file comparisons. Specifically, it makes the following changes: * Turns `NewestFirstBySeqNo` and `BySmallestKey` from free-standing functions into function objects. Note: `BySmallestKey` has a pointer to the `InternalKeyComparator`, while `NewestFirstBySeqNo` is completely stateless. * Eliminates the wrapper `FileComparator`, which was essentially an unnecessary DIY virtual function call mechanism. * Refactors `CheckConsistencyDetails` and `SaveSSTFilesTo` using helper function templates that take comparator/checker function objects. Using static polymorphism eliminates the need to make runtime decisions about which comparator to use. * Extends some error messages returned by the consistency checks and makes them more uniform. * Removes some incomplete/redundant consistency checks from `VersionBuilder` and `FilePicker`. * Improves const correctness in several places. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9099 Test Plan: `make check` Reviewed By: riversand963 Differential Revision: D32027503 Pulled By: ltamasi fbshipit-source-id: 621326ae41f4f55f7ad6a91abbd6e666d5c7857c --- db/version_builder.cc | 382 +++++++++++++++++++++++++----------------- db/version_builder.h | 5 +- db/version_set.cc | 49 +----- db/version_set.h | 2 +- 4 files changed, 236 insertions(+), 202 deletions(-) diff --git a/db/version_builder.cc b/db/version_builder.cc index 69dc34fe0..17d2d108e 100644 --- a/db/version_builder.cc +++ b/db/version_builder.cc @@ -34,52 +34,48 @@ namespace ROCKSDB_NAMESPACE { -bool NewestFirstBySeqNo(FileMetaData* a, FileMetaData* b) { - if (a->fd.largest_seqno != b->fd.largest_seqno) { - return a->fd.largest_seqno > b->fd.largest_seqno; - } - if (a->fd.smallest_seqno != b->fd.smallest_seqno) { - return a->fd.smallest_seqno > b->fd.smallest_seqno; - } - // Break ties by file number - return a->fd.GetNumber() > b->fd.GetNumber(); -} +class VersionBuilder::Rep { + class NewestFirstBySeqNo { + public: + bool operator()(const FileMetaData* lhs, const FileMetaData* rhs) const { + assert(lhs); + assert(rhs); -namespace { -bool BySmallestKey(FileMetaData* a, FileMetaData* b, - const InternalKeyComparator* cmp) { - int r = cmp->Compare(a->smallest, b->smallest); - if (r != 0) { - return (r < 0); - } - // Break ties by file number - return (a->fd.GetNumber() < b->fd.GetNumber()); -} -} // namespace + if (lhs->fd.largest_seqno != rhs->fd.largest_seqno) { + return lhs->fd.largest_seqno > rhs->fd.largest_seqno; + } -class VersionBuilder::Rep { - private: - // Helper to sort files_ in v - // kLevel0 -- NewestFirstBySeqNo - // kLevelNon0 -- BySmallestKey - struct FileComparator { - enum SortMethod { kLevel0 = 0, kLevelNon0 = 1, } sort_method; - const InternalKeyComparator* internal_comparator; - - FileComparator() : internal_comparator(nullptr) {} - - bool operator()(FileMetaData* f1, FileMetaData* f2) const { - switch (sort_method) { - case kLevel0: - return NewestFirstBySeqNo(f1, f2); - case kLevelNon0: - return BySmallestKey(f1, f2, internal_comparator); + if (lhs->fd.smallest_seqno != rhs->fd.smallest_seqno) { + return lhs->fd.smallest_seqno > rhs->fd.smallest_seqno; } - assert(false); - return false; + + // Break ties by file number + return lhs->fd.GetNumber() > rhs->fd.GetNumber(); } }; + class BySmallestKey { + public: + explicit BySmallestKey(const InternalKeyComparator* cmp) : cmp_(cmp) {} + + bool operator()(const FileMetaData* lhs, const FileMetaData* rhs) const { + assert(lhs); + assert(rhs); + assert(cmp_); + + const int r = cmp_->Compare(lhs->smallest, rhs->smallest); + if (r != 0) { + return (r < 0); + } + + // Break ties by file number + return (lhs->fd.GetNumber() < rhs->fd.GetNumber()); + } + + private: + const InternalKeyComparator* cmp_; + }; + struct LevelState { std::unordered_set deleted_files; // Map from file number to file meta data. @@ -247,8 +243,8 @@ class VersionBuilder::Rep { bool has_invalid_levels_; // Current levels of table files affected by additions/deletions. std::unordered_map table_file_levels_; - FileComparator level_zero_cmp_; - FileComparator level_nonzero_cmp_; + NewestFirstBySeqNo level_zero_cmp_; + BySmallestKey level_nonzero_cmp_; // Mutable metadata objects for all blob files affected by the series of // version edits. @@ -264,14 +260,11 @@ class VersionBuilder::Rep { base_vstorage_(base_vstorage), version_set_(version_set), num_levels_(base_vstorage->num_levels()), - has_invalid_levels_(false) { + has_invalid_levels_(false), + level_nonzero_cmp_(base_vstorage_->InternalComparator()) { assert(ioptions_); levels_ = new LevelState[num_levels_]; - level_zero_cmp_.sort_method = FileComparator::kLevel0; - level_nonzero_cmp_.sort_method = FileComparator::kLevelNon0; - level_nonzero_cmp_.internal_comparator = - base_vstorage_->InternalComparator(); } ~Rep() { @@ -297,6 +290,10 @@ class VersionBuilder::Rep { } } + // Mapping used for checking the consistency of links between SST files and + // blob files. It is built using the forward links (table file -> blob file), + // and is subsequently compared with the inverse mapping stored in the + // BlobFileMetaData objects. using ExpectedLinkedSsts = std::unordered_map; @@ -312,91 +309,155 @@ class VersionBuilder::Rep { (*expected_linked_ssts)[blob_file_number].emplace(table_file_number); } - Status CheckConsistencyDetails(VersionStorageInfo* vstorage) { - // Make sure the files are sorted correctly and that the links between - // table files and blob files are consistent. The latter is checked using - // the following mapping, which is built using the forward links - // (table file -> blob file), and is subsequently compared with the inverse - // mapping stored in the BlobFileMetaData objects. - ExpectedLinkedSsts expected_linked_ssts; + template + Status CheckConsistencyDetailsForLevel( + const VersionStorageInfo* vstorage, int level, Checker checker, + const std::string& sync_point, + ExpectedLinkedSsts* expected_linked_ssts) const { +#ifdef NDEBUG + (void)sync_point; +#endif - for (int level = 0; level < num_levels_; level++) { - auto& level_files = vstorage->LevelFiles(level); + assert(vstorage); + assert(level >= 0 && level < num_levels_); + assert(expected_linked_ssts); - if (level_files.empty()) { - continue; - } + const auto& level_files = vstorage->LevelFiles(level); + + if (level_files.empty()) { + return Status::OK(); + } + + assert(level_files[0]); + UpdateExpectedLinkedSsts(level_files[0]->fd.GetNumber(), + level_files[0]->oldest_blob_file_number, + expected_linked_ssts); + + for (size_t i = 1; i < level_files.size(); ++i) { + assert(level_files[i]); + UpdateExpectedLinkedSsts(level_files[i]->fd.GetNumber(), + level_files[i]->oldest_blob_file_number, + expected_linked_ssts); + + auto lhs = level_files[i - 1]; + auto rhs = level_files[i]; - assert(level_files[0]); - UpdateExpectedLinkedSsts(level_files[0]->fd.GetNumber(), - level_files[0]->oldest_blob_file_number, - &expected_linked_ssts); - for (size_t i = 1; i < level_files.size(); i++) { - assert(level_files[i]); - UpdateExpectedLinkedSsts(level_files[i]->fd.GetNumber(), - level_files[i]->oldest_blob_file_number, - &expected_linked_ssts); - - auto f1 = level_files[i - 1]; - auto f2 = level_files[i]; - if (level == 0) { #ifndef NDEBUG - auto pair = std::make_pair(&f1, &f2); - TEST_SYNC_POINT_CALLBACK("VersionBuilder::CheckConsistency0", &pair); + auto pair = std::make_pair(&lhs, &rhs); + TEST_SYNC_POINT_CALLBACK(sync_point, &pair); #endif - if (!level_zero_cmp_(f1, f2)) { - return Status::Corruption("L0 files are not sorted properly"); + + const Status s = checker(lhs, rhs); + if (!s.ok()) { + return s; + } + } + + return Status::OK(); + } + + // Make sure table files are sorted correctly and that the links between + // table files and blob files are consistent. + Status CheckConsistencyDetails(const VersionStorageInfo* vstorage) const { + assert(vstorage); + + ExpectedLinkedSsts expected_linked_ssts; + + if (num_levels_ > 0) { + // Check L0 + { + auto l0_checker = [this](const FileMetaData* lhs, + const FileMetaData* rhs) { + assert(lhs); + assert(rhs); + + if (!level_zero_cmp_(lhs, rhs)) { + std::ostringstream oss; + oss << "L0 files are not sorted properly: files #" + << lhs->fd.GetNumber() << ", #" << rhs->fd.GetNumber(); + + return Status::Corruption("VersionBuilder", oss.str()); } - if (f2->fd.smallest_seqno == f2->fd.largest_seqno) { + if (rhs->fd.smallest_seqno == rhs->fd.largest_seqno) { // This is an external file that we ingested - SequenceNumber external_file_seqno = f2->fd.smallest_seqno; - if (!(external_file_seqno < f1->fd.largest_seqno || + const SequenceNumber external_file_seqno = rhs->fd.smallest_seqno; + + if (!(external_file_seqno < lhs->fd.largest_seqno || external_file_seqno == 0)) { - return Status::Corruption( - "L0 file with seqno " + ToString(f1->fd.smallest_seqno) + - " " + ToString(f1->fd.largest_seqno) + - " vs. file with global_seqno" + - ToString(external_file_seqno) + " with fileNumber " + - ToString(f1->fd.GetNumber())); + std::ostringstream oss; + oss << "L0 file #" << lhs->fd.GetNumber() << " with seqno " + << lhs->fd.smallest_seqno << ' ' << lhs->fd.largest_seqno + << " vs. file #" << rhs->fd.GetNumber() + << " with global_seqno " << external_file_seqno; + + return Status::Corruption("VersionBuilder", oss.str()); } - } else if (f1->fd.smallest_seqno <= f2->fd.smallest_seqno) { - return Status::Corruption("L0 files seqno " + - ToString(f1->fd.smallest_seqno) + " " + - ToString(f1->fd.largest_seqno) + " " + - ToString(f1->fd.GetNumber()) + " vs. " + - ToString(f2->fd.smallest_seqno) + " " + - ToString(f2->fd.largest_seqno) + " " + - ToString(f2->fd.GetNumber())); + } else if (lhs->fd.smallest_seqno <= rhs->fd.smallest_seqno) { + std::ostringstream oss; + oss << "L0 file #" << lhs->fd.GetNumber() << " with seqno " + << lhs->fd.smallest_seqno << ' ' << lhs->fd.largest_seqno + << " vs. file #" << rhs->fd.GetNumber() << " with seqno " + << rhs->fd.smallest_seqno << ' ' << rhs->fd.largest_seqno; + + return Status::Corruption("VersionBuilder", oss.str()); } - } else { -#ifndef NDEBUG - auto pair = std::make_pair(&f1, &f2); - TEST_SYNC_POINT_CALLBACK("VersionBuilder::CheckConsistency1", &pair); -#endif - if (!level_nonzero_cmp_(f1, f2)) { - return Status::Corruption( - "L" + ToString(level) + - " files are not sorted properly: files #" + - ToString(f1->fd.GetNumber()) + ", #" + - ToString(f2->fd.GetNumber())); + + return Status::OK(); + }; + + const Status s = CheckConsistencyDetailsForLevel( + vstorage, /* level */ 0, l0_checker, + "VersionBuilder::CheckConsistency0", &expected_linked_ssts); + if (!s.ok()) { + return s; + } + } + + // Check L1 and up + const InternalKeyComparator* const icmp = vstorage->InternalComparator(); + assert(icmp); + + for (int level = 1; level < num_levels_; ++level) { + auto checker = [this, level, icmp](const FileMetaData* lhs, + const FileMetaData* rhs) { + assert(lhs); + assert(rhs); + + if (!level_nonzero_cmp_(lhs, rhs)) { + std::ostringstream oss; + oss << 'L' << level << " files are not sorted properly: files #" + << lhs->fd.GetNumber() << ", #" << rhs->fd.GetNumber(); + + return Status::Corruption("VersionBuilder", oss.str()); } - // Make sure there is no overlap in levels > 0 - if (vstorage->InternalComparator()->Compare(f1->largest, - f2->smallest) >= 0) { - return Status::Corruption( - "L" + ToString(level) + " have overlapping ranges: file #" + - ToString(f1->fd.GetNumber()) + - " largest key: " + (f1->largest).DebugString(true) + - " vs. file #" + ToString(f2->fd.GetNumber()) + - " smallest key: " + (f2->smallest).DebugString(true)); + // Make sure there is no overlap in level + if (icmp->Compare(lhs->largest, rhs->smallest) >= 0) { + std::ostringstream oss; + oss << 'L' << level << " has overlapping ranges: file #" + << lhs->fd.GetNumber() + << " largest key: " << lhs->largest.DebugString(true) + << " vs. file #" << rhs->fd.GetNumber() + << " smallest key: " << rhs->smallest.DebugString(true); + + return Status::Corruption("VersionBuilder", oss.str()); } + + return Status::OK(); + }; + + const Status s = CheckConsistencyDetailsForLevel( + vstorage, level, checker, "VersionBuilder::CheckConsistency1", + &expected_linked_ssts); + if (!s.ok()) { + return s; } } } - // Make sure that all blob files in the version have non-garbage data. + // Make sure that all blob files in the version have non-garbage data and + // the links between them and the table files are consistent. const auto& blob_files = vstorage->GetBlobFiles(); for (const auto& pair : blob_files) { const uint64_t blob_file_number = pair.first; @@ -428,7 +489,9 @@ class VersionBuilder::Rep { return ret_s; } - Status CheckConsistency(VersionStorageInfo* vstorage) { + Status CheckConsistency(const VersionStorageInfo* vstorage) const { + assert(vstorage); + // Always run consistency checks in debug build #ifdef NDEBUG if (!vstorage->force_consistency_checks()) { @@ -748,7 +811,7 @@ class VersionBuilder::Rep { } // Apply all of the edits in *edit to the current state. - Status Apply(VersionEdit* edit) { + Status Apply(const VersionEdit* edit) { { const Status s = CheckConsistency(base_vstorage_); if (!s.ok()) { @@ -910,7 +973,8 @@ class VersionBuilder::Rep { } } - void MaybeAddFile(VersionStorageInfo* vstorage, int level, FileMetaData* f) { + void MaybeAddFile(VersionStorageInfo* vstorage, int level, + FileMetaData* f) const { const uint64_t file_number = f->fd.GetNumber(); const auto& level_state = levels_[level]; @@ -935,52 +999,52 @@ class VersionBuilder::Rep { } } - void SaveSSTFilesTo(VersionStorageInfo* vstorage) { - for (int level = 0; level < num_levels_; level++) { - const auto& cmp = (level == 0) ? level_zero_cmp_ : level_nonzero_cmp_; - // Merge the set of added files with the set of pre-existing files. - // Drop any deleted files. Store the result in *v. - const auto& base_files = base_vstorage_->LevelFiles(level); - const auto& unordered_added_files = levels_[level].added_files; - vstorage->Reserve(level, - base_files.size() + unordered_added_files.size()); - - // Sort added files for the level. - std::vector added_files; - added_files.reserve(unordered_added_files.size()); - for (const auto& pair : unordered_added_files) { - added_files.push_back(pair.second); + template + void SaveSSTFilesTo(VersionStorageInfo* vstorage, int level, Cmp cmp) const { + // Merge the set of added files with the set of pre-existing files. + // Drop any deleted files. Store the result in *vstorage. + const auto& base_files = base_vstorage_->LevelFiles(level); + const auto& unordered_added_files = levels_[level].added_files; + vstorage->Reserve(level, base_files.size() + unordered_added_files.size()); + + // Sort added files for the level. + std::vector added_files; + added_files.reserve(unordered_added_files.size()); + for (const auto& pair : unordered_added_files) { + added_files.push_back(pair.second); + } + std::sort(added_files.begin(), added_files.end(), cmp); + + auto base_iter = base_files.begin(); + auto base_end = base_files.end(); + auto added_iter = added_files.begin(); + auto added_end = added_files.end(); + while (added_iter != added_end || base_iter != base_end) { + if (base_iter == base_end || + (added_iter != added_end && cmp(*added_iter, *base_iter))) { + MaybeAddFile(vstorage, level, *added_iter++); + } else { + MaybeAddFile(vstorage, level, *base_iter++); } - std::sort(added_files.begin(), added_files.end(), cmp); + } + } -#ifndef NDEBUG - FileMetaData* prev_added_file = nullptr; - for (const auto& added : added_files) { - if (level > 0 && prev_added_file != nullptr) { - assert(base_vstorage_->InternalComparator()->Compare( - prev_added_file->smallest, added->smallest) <= 0); - } - prev_added_file = added; - } -#endif + void SaveSSTFilesTo(VersionStorageInfo* vstorage) const { + assert(vstorage); - auto base_iter = base_files.begin(); - auto base_end = base_files.end(); - auto added_iter = added_files.begin(); - auto added_end = added_files.end(); - while (added_iter != added_end || base_iter != base_end) { - if (base_iter == base_end || - (added_iter != added_end && cmp(*added_iter, *base_iter))) { - MaybeAddFile(vstorage, level, *added_iter++); - } else { - MaybeAddFile(vstorage, level, *base_iter++); - } - } + if (!num_levels_) { + return; + } + + SaveSSTFilesTo(vstorage, /* level */ 0, level_zero_cmp_); + + for (int level = 1; level < num_levels_; ++level) { + SaveSSTFilesTo(vstorage, level, level_nonzero_cmp_); } } // Save the current state in *vstorage. - Status SaveTo(VersionStorageInfo* vstorage) { + Status SaveTo(VersionStorageInfo* vstorage) const { Status s = CheckConsistency(base_vstorage_); if (!s.ok()) { return s; @@ -1117,9 +1181,11 @@ bool VersionBuilder::CheckConsistencyForNumLevels() { return rep_->CheckConsistencyForNumLevels(); } -Status VersionBuilder::Apply(VersionEdit* edit) { return rep_->Apply(edit); } +Status VersionBuilder::Apply(const VersionEdit* edit) { + return rep_->Apply(edit); +} -Status VersionBuilder::SaveTo(VersionStorageInfo* vstorage) { +Status VersionBuilder::SaveTo(VersionStorageInfo* vstorage) const { return rep_->SaveTo(vstorage); } diff --git a/db/version_builder.h b/db/version_builder.h index 5a5c9ea18..b56257a13 100644 --- a/db/version_builder.h +++ b/db/version_builder.h @@ -37,8 +37,8 @@ class VersionBuilder { ~VersionBuilder(); bool CheckConsistencyForNumLevels(); - Status Apply(VersionEdit* edit); - Status SaveTo(VersionStorageInfo* vstorage); + Status Apply(const VersionEdit* edit); + Status SaveTo(VersionStorageInfo* vstorage) const; Status LoadTableHandlers(InternalStats* internal_stats, int max_threads, bool prefetch_index_and_filter_in_cache, bool is_initial_load, @@ -66,5 +66,4 @@ class BaseReferencedVersionBuilder { Version* version_; }; -extern bool NewestFirstBySeqNo(FileMetaData* a, FileMetaData* b); } // namespace ROCKSDB_NAMESPACE diff --git a/db/version_set.cc b/db/version_set.cc index f68a6fee8..2913f4e8f 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -120,10 +120,9 @@ Status OverlapWithIterator(const Comparator* ucmp, // are MergeInProgress). class FilePicker { public: - FilePicker(std::vector* files, const Slice& user_key, - const Slice& ikey, autovector* file_levels, - unsigned int num_levels, FileIndexer* file_indexer, - const Comparator* user_comparator, + FilePicker(const Slice& user_key, const Slice& ikey, + autovector* file_levels, unsigned int num_levels, + FileIndexer* file_indexer, const Comparator* user_comparator, const InternalKeyComparator* internal_comparator) : num_levels_(num_levels), curr_level_(static_cast(-1)), @@ -131,9 +130,6 @@ class FilePicker { hit_file_level_(static_cast(-1)), search_left_bound_(0), search_right_bound_(FileIndexer::kLevelMaxIndex), -#ifndef NDEBUG - files_(files), -#endif level_files_brief_(file_levels), is_hit_file_last_in_level_(false), curr_file_level_(nullptr), @@ -142,9 +138,6 @@ class FilePicker { file_indexer_(file_indexer), user_comparator_(user_comparator), internal_comparator_(internal_comparator) { -#ifdef NDEBUG - (void)files; -#endif // Setup member variables to search first level. search_ended_ = !PrepareNextLevel(); if (!search_ended_) { @@ -214,23 +207,7 @@ class FilePicker { } } } -#ifndef NDEBUG - // Sanity check to make sure that the files are correctly sorted - if (prev_file_) { - if (curr_level_ != 0) { - int comp_sign = internal_comparator_->Compare( - prev_file_->largest_key, f->smallest_key); - assert(comp_sign < 0); - } else { - // level == 0, the current file cannot be newer than the previous - // one. Use compressed data structure, has no attribute seqNo - assert(curr_index_in_curr_level_ > 0); - assert(!NewestFirstBySeqNo(files_[0][curr_index_in_curr_level_], - files_[0][curr_index_in_curr_level_-1])); - } - } - prev_file_ = f; -#endif + returned_file_level_ = curr_level_; if (curr_level_ > 0 && cmp_largest < 0) { // No more files to search in this level. @@ -262,9 +239,6 @@ class FilePicker { unsigned int hit_file_level_; int32_t search_left_bound_; int32_t search_right_bound_; -#ifndef NDEBUG - std::vector* files_; -#endif autovector* level_files_brief_; bool search_ended_; bool is_hit_file_last_in_level_; @@ -276,9 +250,6 @@ class FilePicker { FileIndexer* file_indexer_; const Comparator* user_comparator_; const InternalKeyComparator* internal_comparator_; -#ifndef NDEBUG - FdWithKeyRange* prev_file_; -#endif // Setup local variables to search next level. // Returns false if there are no more levels to search. @@ -348,9 +319,7 @@ class FilePicker { } start_index_in_curr_level_ = start_index; curr_index_in_curr_level_ = start_index; -#ifndef NDEBUG - prev_file_ = nullptr; -#endif + return true; } // curr_level_ = num_levels_. So, no more levels to search. @@ -2022,10 +1991,10 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k, pinned_iters_mgr.StartPinning(); } - FilePicker fp( - storage_info_.files_, user_key, ikey, &storage_info_.level_files_brief_, - storage_info_.num_non_empty_levels_, &storage_info_.file_indexer_, - user_comparator(), internal_comparator()); + FilePicker fp(user_key, ikey, &storage_info_.level_files_brief_, + storage_info_.num_non_empty_levels_, + &storage_info_.file_indexer_, user_comparator(), + internal_comparator()); FdWithKeyRange* f = fp.GetNextFile(); while (f != nullptr) { diff --git a/db/version_set.h b/db/version_set.h index 6bfe6763f..63874b2f9 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -491,7 +491,7 @@ class VersionStorageInfo { next_file_to_compact_by_size_[level] = 0; } - const InternalKeyComparator* InternalComparator() { + const InternalKeyComparator* InternalComparator() const { return internal_comparator_; }