diff --git a/HISTORY.md b/HISTORY.md index 7ee49bbc5..a4aa2667f 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -11,6 +11,7 @@ * Properly set the stop key for a truncated manual CompactRange * Fix slow flush/compaction when DB contains many snapshots. The problem became noticeable to us in DBs with 100,000+ snapshots, though it will affect others at different thresholds. * Fix the bug that WriteBatchWithIndex's SeekForPrev() doesn't see the entries with the same key. +* Fix the bug where user comparator was sometimes fed with InternalKey instead of the user key. The bug manifests when during GenerateBottommostFiles. ## 5.17.0 (10/05/2018) ### Public API Change diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index 5136b0392..e46a2cf2f 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -2953,6 +2953,12 @@ TEST_F(DBCompactionTest, SuggestCompactRangeNoTwoLevel0Compactions) { dbfull()->TEST_WaitForCompact(); } +static std::string ShortKey(int i) { + assert(i < 10000); + char buf[100]; + snprintf(buf, sizeof(buf), "key%04d", i); + return std::string(buf); +} TEST_P(DBCompactionTestWithParam, ForceBottommostLevelCompaction) { int32_t trivial_move = 0; @@ -2965,10 +2971,28 @@ TEST_P(DBCompactionTestWithParam, ForceBottommostLevelCompaction) { [&](void* /*arg*/) { non_trivial_move++; }); rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + // The key size is guaranteed to be <= 8 + class ShortKeyComparator : public Comparator { + int Compare(const rocksdb::Slice& a, + const rocksdb::Slice& b) const override { + assert(a.size() <= 8); + assert(b.size() <= 8); + return BytewiseComparator()->Compare(a, b); + } + const char* Name() const override { return "ShortKeyComparator"; } + void FindShortestSeparator(std::string* start, + const rocksdb::Slice& limit) const override { + return BytewiseComparator()->FindShortestSeparator(start, limit); + } + void FindShortSuccessor(std::string* key) const override { + return BytewiseComparator()->FindShortSuccessor(key); + } + } short_key_cmp; Options options = CurrentOptions(); options.target_file_size_base = 100000000; options.write_buffer_size = 100000000; options.max_subcompactions = max_subcompactions_; + options.comparator = &short_key_cmp; DestroyAndReopen(options); int32_t value_size = 10 * 1024; // 10 KB @@ -2978,7 +3002,7 @@ TEST_P(DBCompactionTestWithParam, ForceBottommostLevelCompaction) { // File with keys [ 0 => 99 ] for (int i = 0; i < 100; i++) { values.push_back(RandomString(&rnd, value_size)); - ASSERT_OK(Put(Key(i), values[i])); + ASSERT_OK(Put(ShortKey(i), values[i])); } ASSERT_OK(Flush()); @@ -2995,7 +3019,7 @@ TEST_P(DBCompactionTestWithParam, ForceBottommostLevelCompaction) { // File with keys [ 100 => 199 ] for (int i = 100; i < 200; i++) { values.push_back(RandomString(&rnd, value_size)); - ASSERT_OK(Put(Key(i), values[i])); + ASSERT_OK(Put(ShortKey(i), values[i])); } ASSERT_OK(Flush()); @@ -3013,7 +3037,7 @@ TEST_P(DBCompactionTestWithParam, ForceBottommostLevelCompaction) { // File with keys [ 200 => 299 ] for (int i = 200; i < 300; i++) { values.push_back(RandomString(&rnd, value_size)); - ASSERT_OK(Put(Key(i), values[i])); + ASSERT_OK(Put(ShortKey(i), values[i])); } ASSERT_OK(Flush()); @@ -3031,7 +3055,7 @@ TEST_P(DBCompactionTestWithParam, ForceBottommostLevelCompaction) { ASSERT_EQ(non_trivial_move, 0); for (int i = 0; i < 300; i++) { - ASSERT_EQ(Get(Key(i)), values[i]); + ASSERT_EQ(Get(ShortKey(i)), values[i]); } rocksdb::SyncPoint::GetInstance()->DisableProcessing(); diff --git a/db/version_set.cc b/db/version_set.cc index aec3469f1..f25f579cf 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1975,7 +1975,9 @@ void VersionStorageInfo::GenerateBottommostFiles() { } else { l0_file_idx = -1; } - if (!RangeMightExistAfterSortedRun(f.smallest_key, f.largest_key, + Slice smallest_user_key = ExtractUserKey(f.smallest_key); + Slice largest_user_key = ExtractUserKey(f.largest_key); + if (!RangeMightExistAfterSortedRun(smallest_user_key, largest_user_key, static_cast(level), l0_file_idx)) { bottommost_files_.emplace_back(static_cast(level), @@ -2640,8 +2642,8 @@ uint64_t VersionStorageInfo::EstimateLiveDataSize() const { } bool VersionStorageInfo::RangeMightExistAfterSortedRun( - const Slice& smallest_key, const Slice& largest_key, int last_level, - int last_l0_idx) { + const Slice& smallest_user_key, const Slice& largest_user_key, + int last_level, int last_l0_idx) { assert((last_l0_idx != -1) == (last_level == 0)); // TODO(ajkr): this preserves earlier behavior where we considered an L0 file // bottommost only if it's the oldest L0 file and there are no files on older @@ -2663,7 +2665,7 @@ bool VersionStorageInfo::RangeMightExistAfterSortedRun( // which overlap with [`smallest_key`, `largest_key`]. if (files_[level].size() > 0 && (last_level == 0 || - OverlapInLevel(level, &smallest_key, &largest_key))) { + OverlapInLevel(level, &smallest_user_key, &largest_user_key))) { return true; } } diff --git a/db/version_set.h b/db/version_set.h index e3bea32f4..814c5a0e4 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -408,9 +408,9 @@ class VersionStorageInfo { // @param last_level Level after which we check for overlap // @param last_l0_idx If `last_level == 0`, index of L0 file after which we // check for overlap; otherwise, must be -1 - bool RangeMightExistAfterSortedRun(const Slice& smallest_key, - const Slice& largest_key, int last_level, - int last_l0_idx); + bool RangeMightExistAfterSortedRun(const Slice& smallest_user_key, + const Slice& largest_user_key, + int last_level, int last_l0_idx); private: const InternalKeyComparator* internal_comparator_;