Add a mode to always pick the oldest file to compact for each level

Summary:
Add options.compaction_pri, which specifies the policy about which file to compact first.
kCompactionPriByLargestSeq will compact oldest files first.
Verified the behavior in db_bench but did not write unit tests yet. Also need to make it settable through option string and dynamically changeable.

Test Plan: Will write unit tests

Reviewers: igor, rven, anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, MarkCallaghan

Reviewed By: yhchiang

Subscribers: leveldb, dhruba

Differential Revision: https://reviews.facebook.net/D45951
main
sdong 9 years ago
parent dd2e1eeb35
commit f1b9f804e9
  1. 7
      db/compaction_picker.cc
  2. 2
      db/compaction_picker_test.cc
  3. 6
      db/db_bench.cc
  4. 2
      db/version_builder_test.cc
  5. 36
      db/version_set.cc
  6. 27
      db/version_set.h
  7. 12
      include/rocksdb/options.h
  8. 3
      util/mutable_cf_options.h
  9. 4
      util/options.cc

@ -1028,7 +1028,7 @@ bool LevelCompactionPicker::PickCompactionBySize(VersionStorageInfo* vstorage,
// Pick the largest file in this level that is not already
// being compacted
const std::vector<int>& file_size = vstorage->FilesBySize(level);
const std::vector<int>& file_size = vstorage->FilesByCompactionPri(level);
const std::vector<FileMetaData*>& level_files = vstorage->LevelFiles(level);
// record the first file that is not yet compacted
@ -1039,11 +1039,6 @@ bool LevelCompactionPicker::PickCompactionBySize(VersionStorageInfo* vstorage,
int index = file_size[i];
auto* f = level_files[index];
assert((i == file_size.size() - 1) ||
(i >= VersionStorageInfo::kNumberFilesToSort - 1) ||
(f->compensated_file_size >=
level_files[file_size[i + 1]]->compensated_file_size));
// do not pick a file to compact if it is being compacted
// from n-1 level.
if (f->being_compacted) {

@ -117,7 +117,7 @@ class CompactionPickerTest : public testing::Test {
void UpdateVersionStorageInfo() {
vstorage_->CalculateBaseBytes(ioptions_, mutable_cf_options_);
vstorage_->UpdateFilesBySize();
vstorage_->UpdateFilesByCompactionPri(mutable_cf_options_);
vstorage_->UpdateNumNonEmptyLevels();
vstorage_->GenerateFileIndexer();
vstorage_->GenerateLevelFilesBrief();

@ -314,6 +314,10 @@ static rocksdb::CompactionStyle FLAGS_compaction_style_e;
DEFINE_int32(compaction_style, (int32_t) rocksdb::Options().compaction_style,
"style of compaction: level-based vs universal");
static rocksdb::CompactionPri FLAGS_compaction_pri_e;
DEFINE_int32(compaction_pri, (int32_t)rocksdb::Options().compaction_style,
"priority of files to compaction: by size or by data age");
DEFINE_int32(universal_size_ratio, 0,
"Percentage flexibility while comparing file size"
" (for universal compaction only).");
@ -2248,6 +2252,7 @@ class Benchmark {
options.max_subcompactions = static_cast<uint32_t>(FLAGS_subcompactions);
options.max_background_flushes = FLAGS_max_background_flushes;
options.compaction_style = FLAGS_compaction_style_e;
options.compaction_pri = FLAGS_compaction_pri_e;
if (FLAGS_prefix_size != 0) {
options.prefix_extractor.reset(
NewFixedPrefixTransform(FLAGS_prefix_size));
@ -3957,6 +3962,7 @@ int main(int argc, char** argv) {
if (FLAGS_statistics) {
dbstats = rocksdb::CreateDBStatistics();
}
FLAGS_compaction_pri_e = (rocksdb::CompactionPri)FLAGS_compaction_pri;
std::vector<std::string> fanout = rocksdb::StringSplit(
FLAGS_max_bytes_for_level_multiplier_additional, ',');

@ -77,7 +77,7 @@ class VersionBuilderTest : public testing::Test {
}
void UpdateVersionStorageInfo() {
vstorage_.UpdateFilesBySize();
vstorage_.UpdateFilesByCompactionPri(mutable_cf_options_);
vstorage_.UpdateNumNonEmptyLevels();
vstorage_.GenerateFileIndexer();
vstorage_.GenerateLevelFilesBrief();

@ -782,7 +782,7 @@ VersionStorageInfo::VersionStorageInfo(
compaction_style_(compaction_style),
files_(new std::vector<FileMetaData*>[num_levels_]),
base_level_(num_levels_ == 1 ? -1 : 1),
files_by_size_(num_levels_),
files_by_compaction_pri_(num_levels_),
level0_non_overlapping_(false),
next_file_to_compact_by_size_(num_levels_),
compaction_score_(num_levels_),
@ -923,7 +923,7 @@ void Version::PrepareApply(
UpdateAccumulatedStats(update_stats);
storage_info_.UpdateNumNonEmptyLevels();
storage_info_.CalculateBaseBytes(*cfd_->ioptions(), mutable_cf_options);
storage_info_.UpdateFilesBySize();
storage_info_.UpdateFilesByCompactionPri(mutable_cf_options);
storage_info_.GenerateFileIndexer();
storage_info_.GenerateLevelFilesBrief();
storage_info_.GenerateLevel0NonOverlapping();
@ -1227,7 +1227,6 @@ bool CompareCompensatedSizeDescending(const Fsize& first, const Fsize& second) {
return (first.file->compensated_file_size >
second.file->compensated_file_size);
}
} // anonymous namespace
void VersionStorageInfo::AddFile(int level, FileMetaData* f) {
@ -1245,7 +1244,7 @@ void VersionStorageInfo::AddFile(int level, FileMetaData* f) {
// following functions called:
// 1. UpdateNumNonEmptyLevels();
// 2. CalculateBaseBytes();
// 3. UpdateFilesBySize();
// 3. UpdateFilesByCompactionPri();
// 4. GenerateFileIndexer();
// 5. GenerateLevelFilesBrief();
// 6. GenerateLevel0NonOverlapping();
@ -1297,7 +1296,8 @@ void VersionStorageInfo::UpdateNumNonEmptyLevels() {
}
}
void VersionStorageInfo::UpdateFilesBySize() {
void VersionStorageInfo::UpdateFilesByCompactionPri(
const MutableCFOptions& mutable_cf_options) {
if (compaction_style_ == kCompactionStyleFIFO ||
compaction_style_ == kCompactionStyleUniversal) {
// don't need this
@ -1306,8 +1306,8 @@ void VersionStorageInfo::UpdateFilesBySize() {
// No need to sort the highest level because it is never compacted.
for (int level = 0; level < num_levels() - 1; level++) {
const std::vector<FileMetaData*>& files = files_[level];
auto& files_by_size = files_by_size_[level];
assert(files_by_size.size() == 0);
auto& files_by_compaction_pri = files_by_compaction_pri_[level];
assert(files_by_compaction_pri.size() == 0);
// populate a temp vector for sorting based on size
std::vector<Fsize> temp(files.size());
@ -1321,16 +1321,28 @@ void VersionStorageInfo::UpdateFilesBySize() {
if (num > temp.size()) {
num = temp.size();
}
std::partial_sort(temp.begin(), temp.begin() + num, temp.end(),
CompareCompensatedSizeDescending);
switch (mutable_cf_options.compaction_pri) {
case kCompactionPriByCompensatedSize:
std::partial_sort(temp.begin(), temp.begin() + num, temp.end(),
CompareCompensatedSizeDescending);
break;
case kCompactionPriByLargestSeq:
std::sort(temp.begin(), temp.end(),
[this](const Fsize& f1, const Fsize& f2) -> bool {
return f1.file->largest_seqno < f2.file->largest_seqno;
});
break;
default:
assert(false);
}
assert(temp.size() == files.size());
// initialize files_by_size_
// initialize files_by_compaction_pri_
for (unsigned int i = 0; i < temp.size(); i++) {
files_by_size.push_back(temp[i].index);
files_by_compaction_pri.push_back(temp[i].index);
}
next_file_to_compact_by_size_[level] = 0;
assert(files_[level].size() == files_by_size_[level].size());
assert(files_[level].size() == files_by_compaction_pri_[level].size());
}
}

@ -132,8 +132,9 @@ class VersionStorageInfo {
// Generate level_files_brief_ from files_
void GenerateLevelFilesBrief();
// Sort all files for this version based on their file size and
// record results in files_by_size_. The largest files are listed first.
void UpdateFilesBySize();
// record results in files_by_compaction_pri_. The largest files are listed
// first.
void UpdateFilesByCompactionPri(const MutableCFOptions& mutable_cf_options);
void GenerateLevel0NonOverlapping();
bool level0_non_overlapping() const {
@ -226,9 +227,9 @@ class VersionStorageInfo {
}
// REQUIRES: This version has been saved (see VersionSet::SaveTo)
const std::vector<int>& FilesBySize(int level) const {
const std::vector<int>& FilesByCompactionPri(int level) const {
assert(finalized_);
return files_by_size_[level];
return files_by_compaction_pri_[level];
}
// REQUIRES: This version has been saved (see VersionSet::SaveTo)
@ -242,7 +243,7 @@ class VersionStorageInfo {
int base_level() const { return base_level_; }
// REQUIRES: lock is held
// Set the index that is used to offset into files_by_size_ to find
// Set the index that is used to offset into files_by_compaction_pri_ to find
// the next compaction candidate file.
void SetNextCompactionIndex(int level, int index) {
next_file_to_compact_by_size_[level] = index;
@ -259,7 +260,7 @@ class VersionStorageInfo {
return file_indexer_;
}
// Only the first few entries of files_by_size_ are sorted.
// Only the first few entries of files_by_compaction_pri_ are sorted.
// There is no need to sort all the files because it is likely
// that on a running system, we need to look at only the first
// few largest files because a new version is created every few
@ -299,7 +300,8 @@ class VersionStorageInfo {
uint64_t GetEstimatedActiveKeys() const;
// re-initializes the index that is used to offset into files_by_size_
// re-initializes the index that is used to offset into
// files_by_compaction_pri_
// to find the next compaction candidate file.
void ResetNextCompactionIndex(int level) {
next_file_to_compact_by_size_[level] = 0;
@ -351,16 +353,16 @@ class VersionStorageInfo {
// but files in each level are now sorted based on file
// size. The file with the largest size is at the front.
// This vector stores the index of the file from files_.
std::vector<std::vector<int>> files_by_size_;
std::vector<std::vector<int>> files_by_compaction_pri_;
// If true, means that files in L0 have keys with non overlapping ranges
bool level0_non_overlapping_;
// An index into files_by_size_ that specifies the first
// An index into files_by_compaction_pri_ that specifies the first
// file that is not yet compacted
std::vector<int> next_file_to_compact_by_size_;
// Only the first few entries of files_by_size_ are sorted.
// Only the first few entries of files_by_compaction_pri_ are sorted.
// There is no need to sort all the files because it is likely
// that on a running system, we need to look at only the first
// few largest files because a new version is created every few
@ -513,8 +515,9 @@ class Version {
void UpdateAccumulatedStats(bool update_stats);
// Sort all files for this version based on their file size and
// record results in files_by_size_. The largest files are listed first.
void UpdateFilesBySize();
// record results in files_by_compaction_pri_. The largest files are listed
// first.
void UpdateFilesByCompactionPri();
ColumnFamilyData* cfd_; // ColumnFamilyData to which this Version belongs
Logger* info_log_;

@ -80,6 +80,13 @@ enum CompactionStyle : char {
kCompactionStyleNone = 0x3,
};
enum CompactionPri : char {
// Slightly Priotize larger files by size compensated by #deletes
kCompactionPriByCompensatedSize = 0x0,
// First compact files whose data is oldest.
kCompactionPriByLargestSeq = 0x1,
};
enum class WALRecoveryMode : char {
// Original levelDB recovery
// We tolerate incomplete record in trailing data on all logs
@ -547,6 +554,11 @@ struct ColumnFamilyOptions {
// The compaction style. Default: kCompactionStyleLevel
CompactionStyle compaction_style;
// If level compaction_style = kCompactionStyleLevel, for each level,
// which files are prioritized to be picked to compact.
// Default: kCompactionPriByCompensatedSize
CompactionPri compaction_pri;
// If true, compaction will verify checksum on every read that happens
// as part of compaction
//

@ -31,6 +31,7 @@ struct MutableCFOptions {
options.level0_file_num_compaction_trigger),
level0_slowdown_writes_trigger(options.level0_slowdown_writes_trigger),
level0_stop_writes_trigger(options.level0_stop_writes_trigger),
compaction_pri(options.compaction_pri),
max_grandparent_overlap_factor(options.max_grandparent_overlap_factor),
expanded_compaction_factor(options.expanded_compaction_factor),
source_compaction_factor(options.source_compaction_factor),
@ -66,6 +67,7 @@ struct MutableCFOptions {
level0_file_num_compaction_trigger(0),
level0_slowdown_writes_trigger(0),
level0_stop_writes_trigger(0),
compaction_pri(kCompactionPriByCompensatedSize),
max_grandparent_overlap_factor(0),
expanded_compaction_factor(0),
source_compaction_factor(0),
@ -117,6 +119,7 @@ struct MutableCFOptions {
int level0_file_num_compaction_trigger;
int level0_slowdown_writes_trigger;
int level0_stop_writes_trigger;
CompactionPri compaction_pri;
int max_grandparent_overlap_factor;
int expanded_compaction_factor;
int source_compaction_factor;

@ -109,6 +109,7 @@ ColumnFamilyOptions::ColumnFamilyOptions()
disable_auto_compactions(false),
purge_redundant_kvs_while_flush(true),
compaction_style(kCompactionStyleLevel),
compaction_pri(kCompactionPriByCompensatedSize),
verify_checksums_in_compaction(true),
filter_deletes(false),
max_sequential_skip_in_iterations(8),
@ -170,6 +171,7 @@ ColumnFamilyOptions::ColumnFamilyOptions(const Options& options)
disable_auto_compactions(options.disable_auto_compactions),
purge_redundant_kvs_while_flush(options.purge_redundant_kvs_while_flush),
compaction_style(options.compaction_style),
compaction_pri(options.compaction_pri),
verify_checksums_in_compaction(options.verify_checksums_in_compaction),
compaction_options_universal(options.compaction_options_universal),
compaction_options_fifo(options.compaction_options_fifo),
@ -492,6 +494,8 @@ void ColumnFamilyOptions::Dump(Logger* log) const {
verify_checksums_in_compaction);
Header(log, " Options.compaction_style: %d",
compaction_style);
Header(log, " Options.compaction_pri: %d",
compaction_pri);
Header(log, " Options.compaction_options_universal.size_ratio: %u",
compaction_options_universal.size_ratio);
Header(log, "Options.compaction_options_universal.min_merge_width: %u",

Loading…
Cancel
Save