// Copyright (c) 2013, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/version_builder.h" #ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS #endif #include #include #include #include #include #include #include "db/dbformat.h" #include "db/table_cache.h" #include "db/version_set.h" #include "table/table_reader.h" namespace rocksdb { bool NewestFirstBySeqNo(FileMetaData* a, FileMetaData* b) { if (a->smallest_seqno != b->smallest_seqno) { return a->smallest_seqno > b->smallest_seqno; } if (a->largest_seqno != b->largest_seqno) { return a->largest_seqno > b->largest_seqno; } // Break ties by file number return a->fd.GetNumber() > b->fd.GetNumber(); } namespace { bool BySmallestKey(FileMetaData* a, FileMetaData* b, const InternalKeyComparator* cmp) { int r = cmp->Compare(a->smallest, b->smallest); if (r != 0) { return (r < 0); } // Break ties by file number return (a->fd.GetNumber() < b->fd.GetNumber()); } } // namespace class VersionBuilder::Rep { private: // Helper to sort files_ in v // kLevel0 -- NewestFirstBySeqNo // kLevelNon0 -- BySmallestKey struct FileComparator { enum SortMethod { kLevel0 = 0, kLevelNon0 = 1, } sort_method; const InternalKeyComparator* internal_comparator; bool operator()(FileMetaData* f1, FileMetaData* f2) const { switch (sort_method) { case kLevel0: return NewestFirstBySeqNo(f1, f2); case kLevelNon0: return BySmallestKey(f1, f2, internal_comparator); } assert(false); return false; } }; struct LevelState { std::unordered_set deleted_files; // Map from file number to file meta data. std::unordered_map added_files; }; const EnvOptions& env_options_; TableCache* table_cache_; VersionStorageInfo* base_vstorage_; LevelState* levels_; FileComparator level_zero_cmp_; FileComparator level_nonzero_cmp_; public: Rep(const EnvOptions& env_options, TableCache* table_cache, VersionStorageInfo* base_vstorage) : env_options_(env_options), table_cache_(table_cache), base_vstorage_(base_vstorage) { levels_ = new LevelState[base_vstorage_->num_levels()]; level_zero_cmp_.sort_method = FileComparator::kLevel0; level_nonzero_cmp_.sort_method = FileComparator::kLevelNon0; level_nonzero_cmp_.internal_comparator = base_vstorage_->InternalComparator(); } ~Rep() { for (int level = 0; level < base_vstorage_->num_levels(); level++) { const auto& added = levels_[level].added_files; for (auto& pair : added) { UnrefFile(pair.second); } } delete[] levels_; } void UnrefFile(FileMetaData* f) { f->refs--; if (f->refs <= 0) { if (f->table_reader_handle) { assert(table_cache_ != nullptr); table_cache_->ReleaseHandle(f->table_reader_handle); f->table_reader_handle = nullptr; } delete f; } } void CheckConsistency(VersionStorageInfo* vstorage) { #ifndef NDEBUG // make sure the files are sorted correctly for (int level = 0; level < vstorage->num_levels(); level++) { auto& level_files = vstorage->LevelFiles(level); for (size_t i = 1; i < level_files.size(); i++) { auto f1 = level_files[i - 1]; auto f2 = level_files[i]; if (level == 0) { assert(level_zero_cmp_(f1, f2)); assert(f1->largest_seqno > f2->largest_seqno); } else { assert(level_nonzero_cmp_(f1, f2)); // Make sure there is no overlap in levels > 0 if (vstorage->InternalComparator()->Compare(f1->largest, f2->smallest) >= 0) { fprintf(stderr, "overlapping ranges in same level %s vs. %s\n", (f1->largest).DebugString().c_str(), (f2->smallest).DebugString().c_str()); abort(); } } } } #endif } void CheckConsistencyForDeletes(VersionEdit* edit, uint64_t number, int level) { #ifndef NDEBUG // a file to be deleted better exist in the previous version bool found = false; for (int l = 0; !found && l < base_vstorage_->num_levels(); l++) { const std::vector& base_files = base_vstorage_->LevelFiles(l); for (unsigned int i = 0; i < base_files.size(); i++) { FileMetaData* f = base_files[i]; if (f->fd.GetNumber() == number) { found = true; break; } } } // if the file did not exist in the previous version, then it // is possibly moved from lower level to higher level in current // version for (int l = level + 1; !found && l < base_vstorage_->num_levels(); l++) { auto& level_added = levels_[l].added_files; auto got = level_added.find(number); if (got != level_added.end()) { found = true; break; } } // maybe this file was added in a previous edit that was Applied if (!found) { auto& level_added = levels_[level].added_files; auto got = level_added.find(number); if (got != level_added.end()) { found = true; } } if (!found) { fprintf(stderr, "not found %" PRIu64 "\n", number); } assert(found); #endif } // Apply all of the edits in *edit to the current state. void Apply(VersionEdit* edit) { CheckConsistency(base_vstorage_); // Delete files const VersionEdit::DeletedFileSet& del = edit->GetDeletedFiles(); for (const auto& del_file : del) { const auto level = del_file.first; const auto number = del_file.second; levels_[level].deleted_files.insert(number); CheckConsistencyForDeletes(edit, number, level); auto exising = levels_[level].added_files.find(number); if (exising != levels_[level].added_files.end()) { UnrefFile(exising->second); levels_[level].added_files.erase(number); } } // Add new files for (const auto& new_file : edit->GetNewFiles()) { const int level = new_file.first; FileMetaData* f = new FileMetaData(new_file.second); f->refs = 1; assert(levels_[level].added_files.find(f->fd.GetNumber()) == levels_[level].added_files.end()); levels_[level].deleted_files.erase(f->fd.GetNumber()); levels_[level].added_files[f->fd.GetNumber()] = f; } } // Save the current state in *v. void SaveTo(VersionStorageInfo* vstorage) { CheckConsistency(base_vstorage_); CheckConsistency(vstorage); for (int level = 0; level < base_vstorage_->num_levels(); level++) { const auto& cmp = (level == 0) ? level_zero_cmp_ : level_nonzero_cmp_; // Merge the set of added files with the set of pre-existing files. // Drop any deleted files. Store the result in *v. const auto& base_files = base_vstorage_->LevelFiles(level); auto base_iter = base_files.begin(); auto base_end = base_files.end(); const auto& unordered_added_files = levels_[level].added_files; vstorage->Reserve(level, base_files.size() + unordered_added_files.size()); // Sort added files for the level. std::vector added_files; added_files.reserve(unordered_added_files.size()); for (const auto& pair : unordered_added_files) { added_files.push_back(pair.second); } std::sort(added_files.begin(), added_files.end(), cmp); #ifndef NDEBUG FileMetaData* prev_file = nullptr; #endif for (const auto& added : added_files) { #ifndef NDEBUG if (level > 0 && prev_file != nullptr) { assert(base_vstorage_->InternalComparator()->Compare( prev_file->smallest, added->smallest) <= 0); } prev_file = added; #endif // Add all smaller files listed in base_ for (auto bpos = std::upper_bound(base_iter, base_end, added, cmp); base_iter != bpos; ++base_iter) { MaybeAddFile(vstorage, level, *base_iter); } MaybeAddFile(vstorage, level, added); } // Add remaining base files for (; base_iter != base_end; ++base_iter) { MaybeAddFile(vstorage, level, *base_iter); } } CheckConsistency(vstorage); } void LoadTableHandlers() { assert(table_cache_ != nullptr); for (int level = 0; level < base_vstorage_->num_levels(); level++) { for (auto& file_meta_pair : levels_[level].added_files) { auto* file_meta = file_meta_pair.second; assert(!file_meta->table_reader_handle); table_cache_->FindTable( env_options_, *(base_vstorage_->InternalComparator()), file_meta->fd, &file_meta->table_reader_handle, false); if (file_meta->table_reader_handle != nullptr) { // Load table_reader file_meta->fd.table_reader = table_cache_->GetTableReaderFromHandle( file_meta->table_reader_handle); } } } } void MaybeAddFile(VersionStorageInfo* vstorage, int level, FileMetaData* f) { if (levels_[level].deleted_files.count(f->fd.GetNumber()) > 0) { // File is deleted: do nothing } else { vstorage->AddFile(level, f); } } }; VersionBuilder::VersionBuilder(const EnvOptions& env_options, TableCache* table_cache, VersionStorageInfo* base_vstorage) : rep_(new Rep(env_options, table_cache, base_vstorage)) {} VersionBuilder::~VersionBuilder() { delete rep_; } void VersionBuilder::CheckConsistency(VersionStorageInfo* vstorage) { rep_->CheckConsistency(vstorage); } void VersionBuilder::CheckConsistencyForDeletes(VersionEdit* edit, uint64_t number, int level) { rep_->CheckConsistencyForDeletes(edit, number, level); } void VersionBuilder::Apply(VersionEdit* edit) { rep_->Apply(edit); } void VersionBuilder::SaveTo(VersionStorageInfo* vstorage) { rep_->SaveTo(vstorage); } void VersionBuilder::LoadTableHandlers() { rep_->LoadTableHandlers(); } void VersionBuilder::MaybeAddFile(VersionStorageInfo* vstorage, int level, FileMetaData* f) { rep_->MaybeAddFile(vstorage, level, f); } } // namespace rocksdb