From a2e0d890ed6da025c0d34ab46b63389f4e6b3f2d Mon Sep 17 00:00:00 2001 From: Igor Canadi Date: Tue, 1 Jul 2014 08:55:04 +0200 Subject: [PATCH] No need for files_by_size_ in universal compaction Summary: files_by_size_ is sorted by time in case of universal compaction. However, Version::files_ is also sorted by time. So no need for files_by_size_ Test Plan: 1) make check with the change 2) make check with `assert(last_index == c->input_version_->files_[level].size() - 1);` in compaction picker Reviewers: dhruba, haobo, yhchiang, sdong, ljin Reviewed By: ljin Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D19125 --- db/compaction_picker.cc | 79 ++++++++++++++++------------------------- db/version_set.cc | 41 +++++---------------- db/version_set.h | 2 +- 3 files changed, 41 insertions(+), 81 deletions(-) diff --git a/db/compaction_picker.cc b/db/compaction_picker.cc index f5551f774..92bd81d3f 100644 --- a/db/compaction_picker.cc +++ b/db/compaction_picker.cc @@ -585,15 +585,9 @@ Compaction* UniversalCompactionPicker::PickCompaction(Version* version, newerfile = f; } - // The files are sorted from newest first to oldest last. - std::vector& file_by_time = c->input_version_->files_by_size_[level]; - // Is the earliest file part of this compaction? - int last_index = file_by_time[file_by_time.size()-1]; - FileMetaData* last_file = c->input_version_->files_[level][last_index]; - if (c->inputs_[0][c->inputs_[0].size()-1] == last_file) { - c->bottommost_level_ = true; - } + FileMetaData* last_file = c->input_version_->files_[level].back(); + c->bottommost_level_ = c->inputs_[0].back() == last_file; // update statistics MeasureTime(options_->statistics.get(), NUM_FILES_IN_SINGLE_COMPACTION, @@ -628,12 +622,12 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp( options_->compaction_options_universal.max_merge_width; // The files are sorted from newest first to oldest last. - std::vector& file_by_time = version->files_by_size_[level]; + const auto& files = version->files_[level]; + FileMetaData* f = nullptr; bool done = false; int start_index = 0; unsigned int candidate_count = 0; - assert(file_by_time.size() == version->files_[level].size()); unsigned int max_files_to_compact = std::min(max_merge_width, max_number_of_files_to_compact); @@ -641,14 +635,13 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp( // Considers a candidate file only if it is smaller than the // total size accumulated so far. - for (unsigned int loop = 0; loop < file_by_time.size(); loop++) { + for (unsigned int loop = 0; loop < files.size(); loop++) { candidate_count = 0; // Skip files that are already being compacted - for (f = nullptr; loop < file_by_time.size(); loop++) { - int index = file_by_time[loop]; - f = version->files_[level][index]; + for (f = nullptr; loop < files.size(); loop++) { + f = files[loop]; if (!f->being_compacted) { candidate_count = 1; @@ -670,11 +663,9 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp( } // Check if the suceeding files need compaction. - for (unsigned int i = loop+1; - candidate_count < max_files_to_compact && i < file_by_time.size(); - i++) { - int index = file_by_time[i]; - FileMetaData* f = version->files_[level][index]; + for (unsigned int i = loop + 1; + candidate_count < max_files_to_compact && i < files.size(); i++) { + FileMetaData* f = files[i]; if (f->being_compacted) { break; } @@ -713,14 +704,14 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp( break; } else { for (unsigned int i = loop; - i < loop + candidate_count && i < file_by_time.size(); i++) { - int index = file_by_time[i]; - FileMetaData* f = version->files_[level][index]; - LogToBuffer(log_buffer, - "[%s] Universal: Skipping file %" PRIu64 "[%d] " - "with size %" PRIu64 " (compensated size %" PRIu64 ") %d\n", - version->cfd_->GetName().c_str(), f->fd.GetNumber(), - i, f->fd.GetFileSize(), f->compensated_file_size, f->being_compacted); + i < loop + candidate_count && i < files.size(); i++) { + FileMetaData* f = files[i]; + LogToBuffer(log_buffer, "[%s] Universal: Skipping file %" PRIu64 + "[%d] with size %" PRIu64 + " (compensated size %" PRIu64 ") %d\n", + version->cfd_->GetName().c_str(), f->fd.GetNumber(), i, + f->fd.GetFileSize(), f->compensated_file_size, + f->being_compacted); } } } @@ -736,10 +727,9 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp( if (ratio_to_compress >= 0) { uint64_t total_size = version->NumLevelBytes(level); uint64_t older_file_size = 0; - for (unsigned int i = file_by_time.size() - 1; i >= first_index_after; - i--) { - older_file_size += - version->files_[level][file_by_time[i]]->fd.GetFileSize(); + for (unsigned int i = files.size() - 1; + i >= first_index_after; i--) { + older_file_size += files[i]->fd.GetFileSize(); if (older_file_size * 100L >= total_size * (long) ratio_to_compress) { enable_compression = false; break; @@ -752,8 +742,7 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp( c->score_ = score; for (unsigned int i = start_index; i < first_index_after; i++) { - int index = file_by_time[i]; - FileMetaData* f = c->input_version_->files_[level][index]; + FileMetaData* f = c->input_version_->files_[level][i]; c->inputs_[0].push_back(f); LogToBuffer(log_buffer, "[%s] Universal: Picking file %" PRIu64 "[%d] " @@ -780,8 +769,7 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp( max_size_amplification_percent; // The files are sorted from newest first to oldest last. - std::vector& file_by_time = version->files_by_size_[level]; - assert(file_by_time.size() == version->files_[level].size()); + const auto& files = version->files_[level]; unsigned int candidate_count = 0; uint64_t candidate_size = 0; @@ -789,9 +777,8 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp( FileMetaData* f = nullptr; // Skip files that are already being compacted - for (unsigned int loop = 0; loop < file_by_time.size() - 1; loop++) { - int index = file_by_time[loop]; - f = version->files_[level][index]; + for (unsigned int loop = 0; loop < files.size() - 1; loop++) { + f = files[loop]; if (!f->being_compacted) { start_index = loop; // Consider this as the first candidate. break; @@ -812,10 +799,8 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp( " to reduce size amp.\n"); // keep adding up all the remaining files - for (unsigned int loop = start_index; loop < file_by_time.size() - 1; - loop++) { - int index = file_by_time[loop]; - f = version->files_[level][index]; + for (unsigned int loop = start_index; loop < files.size() - 1; loop++) { + f = files[loop]; if (f->being_compacted) { LogToBuffer( log_buffer, @@ -832,8 +817,7 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp( } // size of earliest file - int index = file_by_time[file_by_time.size() - 1]; - uint64_t earliest_file_size = version->files_[level][index]->fd.GetFileSize(); + uint64_t earliest_file_size = files.back()->fd.GetFileSize(); // size amplification = percentage of additional size if (candidate_size * 100 < ratio * earliest_file_size) { @@ -850,7 +834,7 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp( "earliest-file-size %" PRIu64, version->cfd_->GetName().c_str(), candidate_size, earliest_file_size); } - assert(start_index >= 0 && start_index < file_by_time.size() - 1); + assert(start_index >= 0 && start_index < files.size() - 1); // create a compaction request // We always compact all the files, so always compress. @@ -858,9 +842,8 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp( new Compaction(version, level, level, MaxFileSizeForLevel(level), LLONG_MAX, false, true); c->score_ = score; - for (unsigned int loop = start_index; loop < file_by_time.size(); loop++) { - int index = file_by_time[loop]; - f = c->input_version_->files_[level][index]; + for (unsigned int loop = start_index; loop < files.size(); loop++) { + f = c->input_version_->files_[level][loop]; c->inputs_[0].push_back(f); LogToBuffer(log_buffer, "[%s] Universal: size amp picking file %" PRIu64 "[%d] " diff --git a/db/version_set.cc b/db/version_set.cc index 29611f0a0..c54f0b591 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -861,7 +861,6 @@ void Version::ComputeCompactionScore( } namespace { - // Compator that is used to sort files based on their size // In normal mode: descending size bool CompareCompensatedSizeDescending(const Version::Fsize& first, @@ -869,18 +868,6 @@ bool CompareCompensatedSizeDescending(const Version::Fsize& first, return (first.file->compensated_file_size > second.file->compensated_file_size); } -// A static compator used to sort files based on their seqno -// In universal style : descending seqno -bool CompareSeqnoDescending(const Version::Fsize& first, - const Version::Fsize& second) { - if (first.file->smallest_seqno > second.file->smallest_seqno) { - assert(first.file->largest_seqno > second.file->largest_seqno); - return true; - } - assert(first.file->largest_seqno <= second.file->largest_seqno); - return false; -} - } // anonymous namespace void Version::UpdateNumNonEmptyLevels() { @@ -895,19 +882,15 @@ void Version::UpdateNumNonEmptyLevels() { } void Version::UpdateFilesBySize() { - if (cfd_->options()->compaction_style == kCompactionStyleFIFO) { + if (cfd_->options()->compaction_style == kCompactionStyleFIFO || + cfd_->options()->compaction_style == kCompactionStyleUniversal) { // don't need this return; } // No need to sort the highest level because it is never compacted. - int max_level = - (cfd_->options()->compaction_style == kCompactionStyleUniversal) - ? NumberLevels() - : NumberLevels() - 1; - - for (int level = 0; level < max_level; level++) { + for (int level = 0; level < NumberLevels() - 1; level++) { const std::vector& files = files_[level]; - std::vector& files_by_size = files_by_size_[level]; + auto& files_by_size = files_by_size_[level]; assert(files_by_size.size() == 0); // populate a temp vector for sorting based on size @@ -918,18 +901,12 @@ void Version::UpdateFilesBySize() { } // sort the top number_of_files_to_sort_ based on file size - if (cfd_->options()->compaction_style == kCompactionStyleUniversal) { - int num = temp.size(); - std::partial_sort(temp.begin(), temp.begin() + num, temp.end(), - CompareSeqnoDescending); - } else { - int num = Version::number_of_files_to_sort_; - if (num > (int)temp.size()) { - num = temp.size(); - } - std::partial_sort(temp.begin(), temp.begin() + num, temp.end(), - CompareCompensatedSizeDescending); + size_t num = Version::number_of_files_to_sort_; + if (num > temp.size()) { + num = temp.size(); } + std::partial_sort(temp.begin(), temp.begin() + num, temp.end(), + CompareCompensatedSizeDescending); assert(temp.size() == files.size()); // initialize files_by_size_ diff --git a/db/version_set.h b/db/version_set.h index 542db7466..04f52a508 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -294,7 +294,7 @@ class Version { // that on a running system, we need to look at only the first // few largest files because a new version is created every few // seconds/minutes (because of concurrent compactions). - static const int number_of_files_to_sort_ = 50; + static const size_t number_of_files_to_sort_ = 50; // Level that should be compacted next and its compaction score. // Score < 1 means compaction is not strictly needed. These fields