From c0b23dd5b07772d44eb9af3b4c475c3e3b27cfc2 Mon Sep 17 00:00:00 2001 From: Poornima Chozhiyath Raman Date: Tue, 7 Jul 2015 14:18:55 -0700 Subject: [PATCH] Enabling trivial move in universal compaction Summary: This change enables trivial move if all the input files are non onverlapping while doing Universal Compaction. Test Plan: ./compaction_picker_test and db_test ran successfully with the new testcases. Reviewers: sdong Reviewed By: sdong Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D40875 --- HISTORY.md | 1 + db/compaction.cc | 6 ++ db/compaction.h | 18 +++++ db/compaction_picker.cc | 107 +++++++++++++++++++++++++ db/compaction_picker.h | 6 ++ db/compaction_picker_test.cc | 60 ++++++++++++++ db/db_impl.cc | 34 ++++---- db/db_test.cc | 43 ++++++++++ include/rocksdb/universal_compaction.h | 8 +- 9 files changed, 268 insertions(+), 15 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 7eced978e..c67620383 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -8,6 +8,7 @@ * Several new features on EventListener (see include/rocksdb/listener.h): - OnCompationCompleted() now returns per-compaciton job statistics, defined in include/rocksdb/compaction_job_stats.h. - Added OnTableFileCreated() and OnTableFileDeleted(). +* Add compaction_options_universal.enable_trivial_move to true, to allow trivial move while performing universal compaction. Trivial move will happen only when all the input files are non overlapping. ### Public API changes * EventListener::OnFlushCompleted() now passes FlushJobInfo instead of a list of parameters. diff --git a/db/compaction.cc b/db/compaction.cc index 02077923f..a7f2a9742 100644 --- a/db/compaction.cc +++ b/db/compaction.cc @@ -167,6 +167,12 @@ bool Compaction::IsTrivialMove() const { return false; } + // Used in universal compaction, where trivial move can be done if the + // input files are non overlapping + if (cfd_->ioptions()->compaction_options_universal.allow_trivial_move) { + return is_trivial_move_; + } + return (start_level_ != output_level_ && num_input_levels() == 1 && input(0, 0)->fd.GetPathId() == GetOutputPathId() && InputCompressionMatchesOutput() && diff --git a/db/compaction.h b/db/compaction.h index beddf2363..d40864f39 100644 --- a/db/compaction.h +++ b/db/compaction.h @@ -158,6 +158,19 @@ class Compaction { // Was this compaction triggered manually by the client? bool IsManualCompaction() { return is_manual_compaction_; } + // Used when allow_trivial_move option is set in + // Universal compaction. If all the input files are + // non overlapping, then is_trivial_move_ variable + // will be set true, else false + void set_is_trivial_move(bool trivial_move) { + is_trivial_move_ = trivial_move; + } + + // Used when allow_trivial_move option is set in + // Universal compaction. Returns true, if the input files + // are non-overlapping and can be trivially moved. + bool is_trivial_move() { return is_trivial_move_; } + // Return the MutableCFOptions that should be used throughout the compaction // procedure const MutableCFOptions* mutable_cf_options() { return &mutable_cf_options_; } @@ -238,6 +251,11 @@ class Compaction { // Is this compaction requested by the client? const bool is_manual_compaction_; + // True if we can do trivial move in Universal multi level + // compaction + + bool is_trivial_move_; + // "level_ptrs_" holds indices into "input_version_->levels_", where each // index remembers which file of an associated level we are currently used // to check KeyNotExistsBeyondOutputLevel() for deletion operation. diff --git a/db/compaction_picker.cc b/db/compaction_picker.cc index 70e48146b..ec18498ce 100644 --- a/db/compaction_picker.cc +++ b/db/compaction_picker.cc @@ -15,6 +15,7 @@ #include #include +#include #include #include @@ -37,6 +38,64 @@ uint64_t TotalCompensatedFileSize(const std::vector& files) { return sum; } +// Used in universal compaction when trivial move is enabled. +// This structure is used for the construction of min heap +// that contains the file meta data, the level of the file +// and the index of the file in that level + +struct InputFileInfo { + FileMetaData* f; + unsigned int level; + unsigned int index; +}; + +// Used in universal compaction when trivial move is enabled. +// This comparator is used for the construction of min heap +// based on the smallest key of the file. +struct UserKeyComparator { + explicit UserKeyComparator(const Comparator* ucmp) { ucmp_ = ucmp; } + + bool operator()(InputFileInfo i1, InputFileInfo i2) const { + return (ucmp_->Compare(i1.f->smallest.user_key(), + i2.f->smallest.user_key()) > 0); + } + + private: + const Comparator* ucmp_; +}; + +typedef std::priority_queue, + UserKeyComparator> SmallestKeyHeap; + +// This function creates the heap that is used to find if the files are +// overlapping during universal compaction when the allow_trivial_move +// is set. +SmallestKeyHeap create_level_heap(Compaction* c, const Comparator* ucmp) { + SmallestKeyHeap smallest_key_priority_q = + SmallestKeyHeap(UserKeyComparator(ucmp)); + + InputFileInfo input_file; + + for (unsigned int l = 0; l < c->num_input_levels(); l++) { + if (c->num_input_files(l) != 0) { + if (l == 0 && c->start_level() == 0) { + for (size_t i = 0; i < c->num_input_files(0); i++) { + input_file.f = c->input(0, i); + input_file.level = 0; + input_file.index = i; + smallest_key_priority_q.push(std::move(input_file)); + } + } else { + input_file.f = c->input(l, 0); + input_file.level = l; + input_file.index = 0; + smallest_key_priority_q.push(std::move(input_file)); + } + } + } + return smallest_key_priority_q; +} + } // anonymous namespace // Determine compression type, based on user options, level of the output @@ -1106,6 +1165,50 @@ void GetSmallestLargestSeqno(const std::vector& files, } // namespace #endif +// Algorithm that checks to see if there are any overlapping +// files in the input +bool CompactionPicker::IsInputNonOverlapping(Compaction* c) { + auto comparator = icmp_->user_comparator(); + int first_iter = 1; + + InputFileInfo prev, curr, next; + + SmallestKeyHeap smallest_key_priority_q = + create_level_heap(c, icmp_->user_comparator()); + + while (!smallest_key_priority_q.empty()) { + curr = smallest_key_priority_q.top(); + smallest_key_priority_q.pop(); + + if (first_iter) { + prev = curr; + first_iter = 0; + } else { + if (comparator->Compare(prev.f->largest.user_key(), + curr.f->smallest.user_key()) >= 0) { + // found overlapping files, return false + return false; + } + assert(comparator->Compare(curr.f->largest.user_key(), + prev.f->largest.user_key()) > 0); + prev = curr; + } + + next.f = nullptr; + + if (curr.level != 0 && curr.index < c->num_input_files(curr.level) - 1) { + next.f = c->input(curr.level, curr.index + 1); + next.level = curr.level; + next.index = curr.index + 1; + } + + if (next.f) { + smallest_key_priority_q.push(std::move(next)); + } + } + return true; +} + // Universal style of compaction. Pick files that are contiguous in // time-range to compact. // @@ -1168,6 +1271,10 @@ Compaction* UniversalCompactionPicker::PickCompaction( return nullptr; } + if (ioptions_.compaction_options_universal.allow_trivial_move == true) { + c->set_is_trivial_move(IsInputNonOverlapping(c)); + } + // validate that all the chosen files of L0 are non overlapping in time #ifndef NDEBUG SequenceNumber prev_smallest_seqno = 0U; diff --git a/db/compaction_picker.h b/db/compaction_picker.h index 403410196..65ca73abf 100644 --- a/db/compaction_picker.h +++ b/db/compaction_picker.h @@ -105,6 +105,12 @@ class CompactionPicker { const VersionStorageInfo* vstorage, const CompactionOptions& compact_options) const; + // Used in universal compaction when the enabled_trivial_move + // option is set. Checks whether there are any overlapping files + // in the input. Returns true if the input files are non + // overlapping. + bool IsInputNonOverlapping(Compaction* c); + protected: int NumberLevels() const { return ioptions_.num_levels; } diff --git a/db/compaction_picker_test.cc b/db/compaction_picker_test.cc index f1145afc5..e6b31fbfa 100644 --- a/db/compaction_picker_test.cc +++ b/db/compaction_picker_test.cc @@ -77,6 +77,8 @@ class CompactionPickerTest : public testing::Test { f->fd = FileDescriptor(file_number, path_id, file_size); f->smallest = InternalKey(smallest, smallest_seq, kTypeValue); f->largest = InternalKey(largest, largest_seq, kTypeValue); + f->smallest_seqno = smallest_seq; + f->largest_seqno = largest_seq; f->compensated_file_size = file_size; f->refs = 0; vstorage_->AddFile(level, f); @@ -365,6 +367,64 @@ TEST_F(CompactionPickerTest, NeedsCompactionUniversal) { vstorage_->CompactionScore(0) >= 1); } } +// Tests if the files can be trivially moved in multi level +// universal compaction when allow_trivial_move option is set +// In this test as the input files overlaps, they cannot +// be trivially moved. + +TEST_F(CompactionPickerTest, CannotTrivialMoveUniversal) { + const uint64_t kFileSize = 100000; + + ioptions_.compaction_options_universal.allow_trivial_move = true; + NewVersionStorage(1, kCompactionStyleUniversal); + UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_); + // must return false when there's no files. + ASSERT_EQ(universal_compaction_picker.NeedsCompaction(vstorage_.get()), + false); + + NewVersionStorage(3, kCompactionStyleUniversal); + + Add(0, 1U, "150", "200", kFileSize, 0, 500, 550); + Add(0, 2U, "201", "250", kFileSize, 0, 401, 450); + Add(0, 4U, "260", "300", kFileSize, 0, 260, 300); + Add(1, 5U, "100", "151", kFileSize, 0, 200, 251); + Add(1, 3U, "301", "350", kFileSize, 0, 101, 150); + Add(2, 6U, "120", "200", kFileSize, 0, 20, 100); + + UpdateVersionStorageInfo(); + + std::unique_ptr compaction( + universal_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + + ASSERT_TRUE(!compaction->is_trivial_move()); +} +// Tests if the files can be trivially moved in multi level +// universal compaction when allow_trivial_move option is set +// In this test as the input files doesn't overlaps, they should +// be trivially moved. +TEST_F(CompactionPickerTest, AllowsTrivialMoveUniversal) { + const uint64_t kFileSize = 100000; + + ioptions_.compaction_options_universal.allow_trivial_move = true; + UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_); + + NewVersionStorage(3, kCompactionStyleUniversal); + + Add(0, 1U, "150", "200", kFileSize, 0, 500, 550); + Add(0, 2U, "201", "250", kFileSize, 0, 401, 450); + Add(0, 4U, "260", "300", kFileSize, 0, 260, 300); + Add(1, 5U, "010", "080", kFileSize, 0, 200, 251); + Add(2, 3U, "301", "350", kFileSize, 0, 101, 150); + + UpdateVersionStorageInfo(); + + std::unique_ptr compaction( + universal_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + + ASSERT_TRUE(compaction->is_trivial_move()); +} TEST_F(CompactionPickerTest, NeedsCompactionFIFO) { NewVersionStorage(1, kCompactionStyleFIFO); diff --git a/db/db_impl.cc b/db/db_impl.cc index 66eac6ad0..75535c27d 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -2538,21 +2538,27 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress, JobContext* job_context, // Move files to next level int32_t moved_files = 0; int64_t moved_bytes = 0; - for (size_t i = 0; i < c->num_input_files(0); i++) { - FileMetaData* f = c->input(0, i); - c->edit()->DeleteFile(c->level(), f->fd.GetNumber()); - c->edit()->AddFile(c->output_level(), f->fd.GetNumber(), - f->fd.GetPathId(), f->fd.GetFileSize(), f->smallest, - f->largest, f->smallest_seqno, f->largest_seqno, - f->marked_for_compaction); - - LogToBuffer(log_buffer, - "[%s] Moving #%" PRIu64 " to level-%d %" PRIu64 " bytes\n", - c->column_family_data()->GetName().c_str(), f->fd.GetNumber(), - c->output_level(), f->fd.GetFileSize()); - ++moved_files; - moved_bytes += f->fd.GetFileSize(); + for (unsigned int l = 0; l < c->num_input_levels(); l++) { + if (l == static_cast(c->output_level())) { + continue; + } + for (size_t i = 0; i < c->num_input_files(l); i++) { + FileMetaData* f = c->input(l, i); + c->edit()->DeleteFile(c->level(), f->fd.GetNumber()); + c->edit()->AddFile(c->output_level(), f->fd.GetNumber(), + f->fd.GetPathId(), f->fd.GetFileSize(), f->smallest, + f->largest, f->smallest_seqno, f->largest_seqno, + f->marked_for_compaction); + + LogToBuffer(log_buffer, + "[%s] Moving #%" PRIu64 " to level-%d %" PRIu64 " bytes\n", + c->column_family_data()->GetName().c_str(), + f->fd.GetNumber(), c->output_level(), f->fd.GetFileSize()); + ++moved_files; + moved_bytes += f->fd.GetFileSize(); + } } + status = versions_->LogAndApply(c->column_family_data(), *c->mutable_cf_options(), c->edit(), &mutex_, directories_.GetDbDir()); diff --git a/db/db_test.cc b/db/db_test.cc index 6ae8f878a..124f6afbb 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -4528,7 +4528,50 @@ TEST_P(DBTestUniversalCompactionMultiLevels, UniversalCompactionMultiLevels) { ASSERT_EQ(Get(1, Key(i % num_keys)), Key(i)); } } +// Tests universal compaction with trivial move enabled +TEST_P(DBTestUniversalCompactionMultiLevels, UniversalCompactionTrivialMove) { + int32_t trivial_move = 0; + int32_t non_trivial_move = 0; + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "DBImpl::BackgroundCompaction:TrivialMove", + [&](void* arg) { trivial_move++; }); + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "DBImpl::BackgroundCompaction:NonTrivial", + [&](void* arg) { non_trivial_move++; }); + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + + Options options; + options.compaction_style = kCompactionStyleUniversal; + options.compaction_options_universal.allow_trivial_move = true; + options.num_levels = 3; + options.write_buffer_size = 100 << 10; // 100KB + options.level0_file_num_compaction_trigger = 3; + options.max_background_compactions = 1; + options.target_file_size_base = 32 * 1024; + options = CurrentOptions(options); + DestroyAndReopen(options); + CreateAndReopenWithCF({"pikachu"}, options); + + // Trigger compaction if size amplification exceeds 110% + options.compaction_options_universal.max_size_amplification_percent = 110; + options = CurrentOptions(options); + ReopenWithColumnFamilies({"default", "pikachu"}, options); + Random rnd(301); + int num_keys = 15000; + for (int i = 0; i < num_keys; i++) { + ASSERT_OK(Put(1, Key(i), Key(i))); + } + std::vector values; + + ASSERT_OK(Flush(1)); + dbfull()->TEST_WaitForCompact(); + + ASSERT_GT(trivial_move, 0); + ASSERT_EQ(non_trivial_move, 0); + + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); +} INSTANTIATE_TEST_CASE_P(DBTestUniversalCompactionMultiLevels, DBTestUniversalCompactionMultiLevels, ::testing::Values(3, 20)); diff --git a/include/rocksdb/universal_compaction.h b/include/rocksdb/universal_compaction.h index 229e50b25..e0f9f830f 100644 --- a/include/rocksdb/universal_compaction.h +++ b/include/rocksdb/universal_compaction.h @@ -69,6 +69,11 @@ class CompactionOptionsUniversal { // Default: kCompactionStopStyleTotalSize CompactionStopStyle stop_style; + // Option to optimize the universal multi level compaction by enabling + // trivial move for non overlapping files. + // Default: false + bool allow_trivial_move; + // Default set of parameters CompactionOptionsUniversal() : size_ratio(1), @@ -76,7 +81,8 @@ class CompactionOptionsUniversal { max_merge_width(UINT_MAX), max_size_amplification_percent(200), compression_size_percent(-1), - stop_style(kCompactionStopStyleTotalSize) {} + stop_style(kCompactionStopStyleTotalSize), + allow_trivial_move(false) {} }; } // namespace rocksdb