From 894c6d21aff74d1edd2a355f52f2315d5ba58a60 Mon Sep 17 00:00:00 2001 From: sdong Date: Mon, 13 Jan 2020 16:25:28 -0800 Subject: [PATCH] Bug when multiple files at one level contains the same smallest key (#6285) Summary: The fractional cascading index is not correctly generated when two files at the same level contains the same smallest or largest user key. The result would be that it would hit an assertion in debug mode and lower level files might be skipped. This might cause wrong results when the same user keys are of merge operands and Get() is called using the exact user key. In that case, the lower files would need to further checked. The fix is to fix the fractional cascading index. Pull Request resolved: https://github.com/facebook/rocksdb/pull/6285 Test Plan: Add a unit test which would cause the assertion which would be fixed. Differential Revision: D19358426 fbshipit-source-id: 39b2b1558075fd95e99491d462a67f9f2298c48e --- HISTORY.md | 1 + db/db_test2.cc | 32 ++++++++++++++++++++++++++++++++ db/file_indexer.cc | 2 -- 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index a6c59d032..9e58ddfaa 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -16,6 +16,7 @@ * BlobDB no longer updates the SST to blob file mapping upon failed compactions. * Fixed a bug where BlobDB was comparing the `ColumnFamilyHandle` pointers themselves instead of only the column family IDs when checking whether an API call uses the default column family or not. * Fix a race condition for cfd->log_number_ between manifest switch and memtable switch (PR 6249) when number of column families is greater than 1. +* Fix a bug on fractional cascading index when multiple files at the same level contain the same smallest user key, and those user keys are for merge operands. In this case, Get() the exact key may miss some merge operands. ### New Features * It is now possible to enable periodic compactions for the base DB when using BlobDB. diff --git a/db/db_test2.cc b/db/db_test2.cc index 07ed8608b..07a486186 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -4266,6 +4266,38 @@ TEST_F(DBTest2, SwitchMemtableRaceWithNewManifest) { ASSERT_OK(dbfull()->TEST_WaitForCompact()); thread.join(); } + +TEST_F(DBTest2, SameSmallestInSameLevel) { + // This test validates fractional casacading logic when several files at one + // one level only contains the same user key. + Options options = CurrentOptions(); + options.merge_operator = MergeOperators::CreateStringAppendOperator(); + DestroyAndReopen(options); + + ASSERT_OK(Put("key", "1")); + ASSERT_OK(Put("key", "2")); + ASSERT_OK(db_->Merge(WriteOptions(), "key", "3")); + ASSERT_OK(db_->Merge(WriteOptions(), "key", "4")); + Flush(); + CompactRangeOptions cro; + cro.change_level = true; + cro.target_level = 2; + ASSERT_OK(dbfull()->CompactRange(cro, db_->DefaultColumnFamily(), nullptr, + nullptr)); + + ASSERT_OK(db_->Merge(WriteOptions(), "key", "5")); + Flush(); + ASSERT_OK(db_->Merge(WriteOptions(), "key", "6")); + Flush(); + ASSERT_OK(db_->Merge(WriteOptions(), "key", "7")); + Flush(); + ASSERT_OK(db_->Merge(WriteOptions(), "key", "8")); + Flush(); + dbfull()->TEST_WaitForCompact(true); + ASSERT_EQ("0,4,1", FilesPerLevel()); + + ASSERT_EQ("2,3,4,5,6,7,8", Get("key")); +} } // namespace rocksdb #ifdef ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS diff --git a/db/file_indexer.cc b/db/file_indexer.cc index f22211f04..fd0180992 100644 --- a/db/file_indexer.cc +++ b/db/file_indexer.cc @@ -157,7 +157,6 @@ void FileIndexer::CalculateLB( if (cmp == 0) { set_index(&index[upper_idx], lower_idx); ++upper_idx; - ++lower_idx; } else if (cmp > 0) { // Lower level's file (largest) is smaller, a key won't hit in that // file. Move to next lower file @@ -195,7 +194,6 @@ void FileIndexer::CalculateRB( if (cmp == 0) { set_index(&index[upper_idx], lower_idx); --upper_idx; - --lower_idx; } else if (cmp < 0) { // Lower level's file (smallest) is larger, a key won't hit in that // file. Move to next lower file.