From e07aa8669daa887a7599d4588e15529d9b087c70 Mon Sep 17 00:00:00 2001 From: Dmitry Fink Date: Wed, 23 Jan 2019 21:43:44 -0800 Subject: [PATCH] Allow full merge when root of history for a key is reached (#4909) Summary: Previously compaction was not collapsing operands for a first key on a layer, even in cases when it was its root of history. Some tests (CompactionJobTest.NonAssocMerge) was actually accounting for that bug, Pull Request resolved: https://github.com/facebook/rocksdb/pull/4909 Differential Revision: D13781169 Pulled By: finik fbshipit-source-id: d2de353ecf05bec39b942cd8d5b97a8dc445f336 --- db/compaction_job_test.cc | 5 ++--- db/db_range_del_test.cc | 13 +++++++++++-- db/merge_helper.cc | 26 ++++++++++++++------------ db/merge_helper_test.cc | 2 +- include/rocksdb/merge_operator.h | 25 +++++++++++++++++++++---- 5 files changed, 49 insertions(+), 22 deletions(-) diff --git a/db/compaction_job_test.cc b/db/compaction_job_test.cc index c7da191dd..16114ccaa 100644 --- a/db/compaction_job_test.cc +++ b/db/compaction_job_test.cc @@ -456,8 +456,7 @@ TEST_F(CompactionJobTest, NonAssocMerge) { auto expected_results = mock::MakeMockFile({{KeyStr("a", 0U, kTypeValue), "3,4,5"}, - {KeyStr("b", 2U, kTypeMerge), "2"}, - {KeyStr("b", 1U, kTypeMerge), "1"}}); + {KeyStr("b", 2U, kTypeValue), "1,2"}}); SetLastSequence(5U); auto files = cfd_->current()->storage_info()->LevelFiles(0); @@ -484,7 +483,7 @@ TEST_F(CompactionJobTest, MergeOperandFilter) { auto expected_results = mock::MakeMockFile({{KeyStr("a", 0U, kTypeValue), test::EncodeInt(8U)}, - {KeyStr("b", 2U, kTypeMerge), test::EncodeInt(2U)}}); + {KeyStr("b", 2U, kTypeValue), test::EncodeInt(2U)}}); SetLastSequence(5U); auto files = cfd_->current()->storage_info()->LevelFiles(0); diff --git a/db/db_range_del_test.cc b/db/db_range_del_test.cc index 73bc4d275..f17b3fa74 100644 --- a/db/db_range_del_test.cc +++ b/db/db_range_del_test.cc @@ -1138,6 +1138,14 @@ TEST_F(DBRangeDelTest, KeyAtOverlappingEndpointReappears) { options.target_file_size_base = kFileBytes; Reopen(options); + // Push dummy data to L3 so that our actual test files on L0-L2 + // will not be considered "bottommost" level, otherwise compaction + // may prevent us from creating overlapping user keys + // as on the bottommost layer MergeHelper + ASSERT_OK(db_->Merge(WriteOptions(), "key", "dummy")); + ASSERT_OK(db_->Flush(FlushOptions())); + MoveFilesToLevel(3); + Random rnd(301); const Snapshot* snapshot = nullptr; for (int i = 0; i < kNumFiles; ++i) { @@ -1159,8 +1167,9 @@ TEST_F(DBRangeDelTest, KeyAtOverlappingEndpointReappears) { std::string value; ASSERT_TRUE(db_->Get(ReadOptions(), "key", &value).IsNotFound()); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr /* begin_key */, - nullptr /* end_key */)); + dbfull()->TEST_CompactRange(0 /* level */, nullptr /* begin */, + nullptr /* end */, nullptr /* column_family */, + true /* disallow_trivial_move */); ASSERT_EQ(0, NumTableFilesAtLevel(0)); // Now we have multiple files at L1 all containing a single user key, thus // guaranteeing overlap in the file endpoints. diff --git a/db/merge_helper.cc b/db/merge_helper.cc index 2dea15ff9..2c8046fbc 100644 --- a/db/merge_helper.cc +++ b/db/merge_helper.cc @@ -282,22 +282,24 @@ Status MergeHelper::MergeUntil(InternalIterator* iter, return Status::OK(); } - // We are sure we have seen this key's entire history if we are at the - // last level and exhausted all internal keys of this user key. - // NOTE: !iter->Valid() does not necessarily mean we hit the - // beginning of a user key, as versions of a user key might be - // split into multiple files (even files on the same level) - // and some files might not be included in the compaction/merge. + // We are sure we have seen this key's entire history if: + // at_bottom == true (this does not necessarily mean it is the bottommost + // layer, but rather that we are confident the key does not appear on any of + // the lower layers, at_bottom == false doesn't mean it does appear, just + // that we can't be sure, see Compaction::IsBottommostLevel for details) + // AND + // we have either encountered another key or end of key history on this + // layer. // - // There are also cases where we have seen the root of history of this - // key without being sure of it. Then, we simply miss the opportunity + // When these conditions are true we are able to merge all the keys + // using full merge. + // + // For these cases we are not sure about, we simply miss the opportunity // to combine the keys. Since VersionSet::SetupOtherInputs() always makes // sure that all merge-operands on the same level get compacted together, // this will simply lead to these merge operands moving to the next level. - // - // So, we only perform the following logic (to merge all operands together - // without a Put/Delete) if we are certain that we have seen the end of key. - bool surely_seen_the_beginning = hit_the_next_user_key && at_bottom; + bool surely_seen_the_beginning = (hit_the_next_user_key || !iter->Valid()) + && at_bottom; if (surely_seen_the_beginning) { // do a final merge with nullptr as the existing value and say // bye to the merge type (it's now converted to a Put) diff --git a/db/merge_helper_test.cc b/db/merge_helper_test.cc index dc43db0d1..d7589d78a 100644 --- a/db/merge_helper_test.cc +++ b/db/merge_helper_test.cc @@ -130,7 +130,7 @@ TEST_F(MergeHelperTest, SingleOperand) { AddKeyVal("a", 50, kTypeMerge, test::EncodeInt(1U)); - ASSERT_TRUE(Run(31, true).IsMergeInProgress()); + ASSERT_TRUE(Run(31, false).IsMergeInProgress()); ASSERT_FALSE(iter_->Valid()); ASSERT_EQ(test::KeyStr("a", 50, kTypeMerge), merge_helper_->keys()[0]); ASSERT_EQ(test::EncodeInt(1U), merge_helper_->values()[0]); diff --git a/include/rocksdb/merge_operator.h b/include/rocksdb/merge_operator.h index b90f3d72f..3817974e3 100644 --- a/include/rocksdb/merge_operator.h +++ b/include/rocksdb/merge_operator.h @@ -108,6 +108,23 @@ class MergeOperator { Slice& existing_operand; }; + // This function applies a stack of merge operands in chrionological order + // on top of an existing value. There are two ways in which this method is + // being used: + // a) During Get() operation, it used to calculate the final value of a key + // b) During compaction, in order to collapse some operands with the based + // value. + // + // Note: The name of the method is somewhat misleading, as both in the cases + // of Get() or compaction it may be called on a subset of operands: + // K: 0 +1 +2 +7 +4 +5 2 +1 +2 + // ^ + // | + // snapshot + // In the example above, Get(K) operation will call FullMerge with a base + // value of 2 and operands [+1, +2]. Compaction process might decide to + // collapse the beginning of the history up to the snapshot by performing + // full Merge with base value of 0 and operands [+1, +2, +7, +3]. virtual bool FullMergeV2(const MergeOperationInput& merge_in, MergeOperationOutput* merge_out) const; @@ -182,11 +199,11 @@ class MergeOperator { // consistent MergeOperator between DB opens. virtual const char* Name() const = 0; - // Determines whether the MergeOperator can be called with just a single + // Determines whether the PartialMerge can be called with just a single // merge operand. - // Override and return true for allowing a single operand. Both FullMergeV2 - // and PartialMerge/PartialMergeMulti should be overridden and implemented - // correctly to handle a single operand. + // Override and return true for allowing a single operand. PartialMerge + // and PartialMergeMulti should be overridden and implemented + // correctly to properly handle a single operand. virtual bool AllowSingleOperand() const { return false; } // Allows to control when to invoke a full merge during Get.