// Copyright (c) 2013, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/db_test_util.h" #include "port/stack_trace.h" #include "rocksdb/experimental.h" #include "util/sync_point.h" namespace rocksdb { // SYNC_POINT is not supported in released Windows mode. #if !defined(ROCKSDB_LITE) class DBCompactionTest : public DBTestBase { public: DBCompactionTest() : DBTestBase("/db_compaction_test") {} }; class DBCompactionTestWithParam : public DBTestBase, public testing::WithParamInterface { public: DBCompactionTestWithParam() : DBTestBase("/db_compaction_test") { max_subcompactions_ = GetParam(); } // Required if inheriting from testing::WithParamInterface<> static void SetUpTestCase() {} static void TearDownTestCase() {} uint32_t max_subcompactions_; }; namespace { class OnFileDeletionListener : public EventListener { public: OnFileDeletionListener() : matched_count_(0), expected_file_name_("") {} void SetExpectedFileName( const std::string file_name) { expected_file_name_ = file_name; } void VerifyMatchedCount(size_t expected_value) { ASSERT_EQ(matched_count_, expected_value); } void OnTableFileDeleted( const TableFileDeletionInfo& info) override { if (expected_file_name_ != "") { ASSERT_EQ(expected_file_name_, info.file_path); expected_file_name_ = ""; matched_count_++; } } private: size_t matched_count_; std::string expected_file_name_; }; static const int kCDTValueSize = 1000; static const int kCDTKeysPerBuffer = 4; static const int kCDTNumLevels = 8; Options DeletionTriggerOptions() { Options options; options.compression = kNoCompression; options.write_buffer_size = kCDTKeysPerBuffer * (kCDTValueSize + 24); options.min_write_buffer_number_to_merge = 1; options.max_write_buffer_number_to_maintain = 0; options.num_levels = kCDTNumLevels; options.level0_file_num_compaction_trigger = 1; options.target_file_size_base = options.write_buffer_size * 2; options.target_file_size_multiplier = 2; options.max_bytes_for_level_base = options.target_file_size_base * options.target_file_size_multiplier; options.max_bytes_for_level_multiplier = 2; options.disable_auto_compactions = false; return options; } bool HaveOverlappingKeyRanges( const Comparator* c, const SstFileMetaData& a, const SstFileMetaData& b) { if (c->Compare(a.smallestkey, b.smallestkey) >= 0) { if (c->Compare(a.smallestkey, b.largestkey) <= 0) { // b.smallestkey <= a.smallestkey <= b.largestkey return true; } } else if (c->Compare(a.largestkey, b.smallestkey) >= 0) { // a.smallestkey < b.smallestkey <= a.largestkey return true; } if (c->Compare(a.largestkey, b.largestkey) <= 0) { if (c->Compare(a.largestkey, b.smallestkey) >= 0) { // b.smallestkey <= a.largestkey <= b.largestkey return true; } } else if (c->Compare(a.smallestkey, b.largestkey) <= 0) { // a.smallestkey <= b.largestkey < a.largestkey return true; } return false; } // Identifies all files between level "min_level" and "max_level" // which has overlapping key range with "input_file_meta". void GetOverlappingFileNumbersForLevelCompaction( const ColumnFamilyMetaData& cf_meta, const Comparator* comparator, int min_level, int max_level, const SstFileMetaData* input_file_meta, std::set* overlapping_file_names) { std::set overlapping_files; overlapping_files.insert(input_file_meta); for (int m = min_level; m <= max_level; ++m) { for (auto& file : cf_meta.levels[m].files) { for (auto* included_file : overlapping_files) { if (HaveOverlappingKeyRanges( comparator, *included_file, file)) { overlapping_files.insert(&file); overlapping_file_names->insert(file.name); break; } } } } } void VerifyCompactionResult( const ColumnFamilyMetaData& cf_meta, const std::set& overlapping_file_numbers) { #ifndef NDEBUG for (auto& level : cf_meta.levels) { for (auto& file : level.files) { assert(overlapping_file_numbers.find(file.name) == overlapping_file_numbers.end()); } } #endif } const SstFileMetaData* PickFileRandomly( const ColumnFamilyMetaData& cf_meta, Random* rand, int* level = nullptr) { auto file_id = rand->Uniform(static_cast( cf_meta.file_count)) + 1; for (auto& level_meta : cf_meta.levels) { if (file_id <= level_meta.files.size()) { if (level != nullptr) { *level = level_meta.level; } auto result = rand->Uniform(file_id); return &(level_meta.files[result]); } file_id -= level_meta.files.size(); } assert(false); return nullptr; } } // anonymous namespace // All the TEST_P tests run once with sub_compactions disabled (i.e. // options.max_subcompactions = 1) and once with it enabled TEST_P(DBCompactionTestWithParam, CompactionDeletionTrigger) { for (int tid = 0; tid < 3; ++tid) { uint64_t db_size[2]; Options options = CurrentOptions(DeletionTriggerOptions()); options.max_subcompactions = max_subcompactions_; if (tid == 1) { // the following only disable stats update in DB::Open() // and should not affect the result of this test. options.skip_stats_update_on_db_open = true; } else if (tid == 2) { // third pass with universal compaction options.compaction_style = kCompactionStyleUniversal; options.num_levels = 1; } DestroyAndReopen(options); Random rnd(301); const int kTestSize = kCDTKeysPerBuffer * 1024; std::vector values; for (int k = 0; k < kTestSize; ++k) { values.push_back(RandomString(&rnd, kCDTValueSize)); ASSERT_OK(Put(Key(k), values[k])); } dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForCompact(); db_size[0] = Size(Key(0), Key(kTestSize - 1)); for (int k = 0; k < kTestSize; ++k) { ASSERT_OK(Delete(Key(k))); } dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForCompact(); db_size[1] = Size(Key(0), Key(kTestSize - 1)); // must have much smaller db size. ASSERT_GT(db_size[0] / 3, db_size[1]); } } TEST_F(DBCompactionTest, SkipStatsUpdateTest) { // This test verify UpdateAccumulatedStats is not on by observing // the compaction behavior when there are many of deletion entries. // The test will need to be updated if the internal behavior changes. Options options = DeletionTriggerOptions(); options = CurrentOptions(options); options.env = env_; DestroyAndReopen(options); Random rnd(301); const int kTestSize = kCDTKeysPerBuffer * 512; std::vector values; for (int k = 0; k < kTestSize; ++k) { values.push_back(RandomString(&rnd, kCDTValueSize)); ASSERT_OK(Put(Key(k), values[k])); } dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForCompact(); for (int k = 0; k < kTestSize; ++k) { ASSERT_OK(Delete(Key(k))); } // Reopen the DB with stats-update disabled options.skip_stats_update_on_db_open = true; env_->random_file_open_counter_.store(0); Reopen(options); // As stats-update is disabled, we expect a very low number of // random file open. // Note that this number must be changed accordingly if we change // the number of files needed to be opened in the DB::Open process. const int kMaxFileOpenCount = 10; ASSERT_LT(env_->random_file_open_counter_.load(), kMaxFileOpenCount); // Repeat the reopen process, but this time we enable // stats-update. options.skip_stats_update_on_db_open = false; env_->random_file_open_counter_.store(0); Reopen(options); // Since we do a normal stats update on db-open, there // will be more random open files. ASSERT_GT(env_->random_file_open_counter_.load(), kMaxFileOpenCount); } TEST_F(DBCompactionTest, TestTableReaderForCompaction) { Options options; options = CurrentOptions(options); options.env = env_; options.new_table_reader_for_compaction_inputs = true; options.max_open_files = 100; options.level0_file_num_compaction_trigger = 3; DestroyAndReopen(options); Random rnd(301); int num_table_cache_lookup = 0; int num_new_table_reader = 0; rocksdb::SyncPoint::GetInstance()->SetCallBack( "TableCache::FindTable:0", [&](void* arg) { assert(arg != nullptr); bool no_io = *(reinterpret_cast(arg)); if (!no_io) { // filter out cases for table properties queries. num_table_cache_lookup++; } }); rocksdb::SyncPoint::GetInstance()->SetCallBack( "TableCache::GetTableReader:0", [&](void* arg) { num_new_table_reader++; }); rocksdb::SyncPoint::GetInstance()->EnableProcessing(); for (int k = 0; k < options.level0_file_num_compaction_trigger; ++k) { ASSERT_OK(Put(Key(k), Key(k))); ASSERT_OK(Put(Key(10 - k), "bar")); if (k < options.level0_file_num_compaction_trigger - 1) { num_table_cache_lookup = 0; Flush(); dbfull()->TEST_WaitForCompact(); // preloading iterator issues one table cache lookup and create // a new table reader. ASSERT_EQ(num_table_cache_lookup, 1); ASSERT_EQ(num_new_table_reader, 1); num_table_cache_lookup = 0; num_new_table_reader = 0; ASSERT_EQ(Key(k), Get(Key(k))); // lookup iterator from table cache and no need to create a new one. ASSERT_EQ(num_table_cache_lookup, 1); ASSERT_EQ(num_new_table_reader, 0); } } num_table_cache_lookup = 0; num_new_table_reader = 0; Flush(); dbfull()->TEST_WaitForCompact(); // Preloading iterator issues one table cache lookup and creates // a new table reader. One file is created for flush and one for compaction. // Compaction inputs make no table cache look-up. ASSERT_EQ(num_table_cache_lookup, 2); // Create new iterator for: // (1) 1 for verifying flush results // (2) 3 for compaction input files // (3) 1 for verifying compaction results. ASSERT_EQ(num_new_table_reader, 5); num_table_cache_lookup = 0; num_new_table_reader = 0; ASSERT_EQ(Key(1), Get(Key(1))); ASSERT_EQ(num_table_cache_lookup, 1); ASSERT_EQ(num_new_table_reader, 0); num_table_cache_lookup = 0; num_new_table_reader = 0; CompactRangeOptions cro; cro.change_level = true; cro.target_level = 2; cro.bottommost_level_compaction = BottommostLevelCompaction::kForce; db_->CompactRange(cro, nullptr, nullptr); // Only verifying compaction outputs issues one table cache lookup. ASSERT_EQ(num_table_cache_lookup, 1); // One for compaction input, one for verifying compaction results. ASSERT_EQ(num_new_table_reader, 2); num_table_cache_lookup = 0; num_new_table_reader = 0; ASSERT_EQ(Key(1), Get(Key(1))); ASSERT_EQ(num_table_cache_lookup, 1); ASSERT_EQ(num_new_table_reader, 0); rocksdb::SyncPoint::GetInstance()->ClearAllCallBacks(); } TEST_P(DBCompactionTestWithParam, CompactionDeletionTriggerReopen) { for (int tid = 0; tid < 2; ++tid) { uint64_t db_size[3]; Options options = CurrentOptions(DeletionTriggerOptions()); options.max_subcompactions = max_subcompactions_; if (tid == 1) { // second pass with universal compaction options.compaction_style = kCompactionStyleUniversal; options.num_levels = 1; } DestroyAndReopen(options); Random rnd(301); // round 1 --- insert key/value pairs. const int kTestSize = kCDTKeysPerBuffer * 512; std::vector values; for (int k = 0; k < kTestSize; ++k) { values.push_back(RandomString(&rnd, kCDTValueSize)); ASSERT_OK(Put(Key(k), values[k])); } dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForCompact(); db_size[0] = Size(Key(0), Key(kTestSize - 1)); Close(); // round 2 --- disable auto-compactions and issue deletions. options.create_if_missing = false; options.disable_auto_compactions = true; Reopen(options); for (int k = 0; k < kTestSize; ++k) { ASSERT_OK(Delete(Key(k))); } db_size[1] = Size(Key(0), Key(kTestSize - 1)); Close(); // as auto_compaction is off, we shouldn't see too much reduce // in db size. ASSERT_LT(db_size[0] / 3, db_size[1]); // round 3 --- reopen db with auto_compaction on and see if // deletion compensation still work. options.disable_auto_compactions = false; Reopen(options); // insert relatively small amount of data to trigger auto compaction. for (int k = 0; k < kTestSize / 10; ++k) { ASSERT_OK(Put(Key(k), values[k])); } dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForCompact(); db_size[2] = Size(Key(0), Key(kTestSize - 1)); // this time we're expecting significant drop in size. ASSERT_GT(db_size[0] / 3, db_size[2]); } } TEST_F(DBCompactionTest, DisableStatsUpdateReopen) { uint64_t db_size[3]; for (int test = 0; test < 2; ++test) { Options options = CurrentOptions(DeletionTriggerOptions()); options.skip_stats_update_on_db_open = (test == 0); env_->random_read_counter_.Reset(); DestroyAndReopen(options); Random rnd(301); // round 1 --- insert key/value pairs. const int kTestSize = kCDTKeysPerBuffer * 512; std::vector values; for (int k = 0; k < kTestSize; ++k) { values.push_back(RandomString(&rnd, kCDTValueSize)); ASSERT_OK(Put(Key(k), values[k])); } dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForCompact(); db_size[0] = Size(Key(0), Key(kTestSize - 1)); Close(); // round 2 --- disable auto-compactions and issue deletions. options.create_if_missing = false; options.disable_auto_compactions = true; env_->random_read_counter_.Reset(); Reopen(options); for (int k = 0; k < kTestSize; ++k) { ASSERT_OK(Delete(Key(k))); } db_size[1] = Size(Key(0), Key(kTestSize - 1)); Close(); // as auto_compaction is off, we shouldn't see too much reduce // in db size. ASSERT_LT(db_size[0] / 3, db_size[1]); // round 3 --- reopen db with auto_compaction on and see if // deletion compensation still work. options.disable_auto_compactions = false; Reopen(options); dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForCompact(); db_size[2] = Size(Key(0), Key(kTestSize - 1)); if (options.skip_stats_update_on_db_open) { // If update stats on DB::Open is disable, we don't expect // deletion entries taking effect. ASSERT_LT(db_size[0] / 3, db_size[2]); } else { // Otherwise, we should see a significant drop in db size. ASSERT_GT(db_size[0] / 3, db_size[2]); } } } TEST_P(DBCompactionTestWithParam, CompactionTrigger) { const int kNumKeysPerFile = 100; Options options; options.write_buffer_size = 110 << 10; // 110KB options.arena_block_size = 4 << 10; options.num_levels = 3; options.level0_file_num_compaction_trigger = 3; options.max_subcompactions = max_subcompactions_; options.memtable_factory.reset(new SpecialSkipListFactory(kNumKeysPerFile)); options = CurrentOptions(options); CreateAndReopenWithCF({"pikachu"}, options); Random rnd(301); for (int num = 0; num < options.level0_file_num_compaction_trigger - 1; num++) { std::vector values; // Write 100KB (100 values, each 1K) for (int i = 0; i < kNumKeysPerFile; i++) { values.push_back(RandomString(&rnd, 990)); ASSERT_OK(Put(1, Key(i), values[i])); } // put extra key to trigger flush ASSERT_OK(Put(1, "", "")); dbfull()->TEST_WaitForFlushMemTable(handles_[1]); ASSERT_EQ(NumTableFilesAtLevel(0, 1), num + 1); } // generate one more file in level-0, and should trigger level-0 compaction std::vector values; for (int i = 0; i < kNumKeysPerFile; i++) { values.push_back(RandomString(&rnd, 990)); ASSERT_OK(Put(1, Key(i), values[i])); } // put extra key to trigger flush ASSERT_OK(Put(1, "", "")); dbfull()->TEST_WaitForCompact(); ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); ASSERT_EQ(NumTableFilesAtLevel(1, 1), 1); } TEST_P(DBCompactionTestWithParam, CompactionsGenerateMultipleFiles) { Options options; options.write_buffer_size = 100000000; // Large write buffer options.max_subcompactions = max_subcompactions_; options = CurrentOptions(options); CreateAndReopenWithCF({"pikachu"}, options); Random rnd(301); // Write 8MB (80 values, each 100K) ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); std::vector values; for (int i = 0; i < 80; i++) { values.push_back(RandomString(&rnd, 100000)); ASSERT_OK(Put(1, Key(i), values[i])); } // Reopening moves updates to level-0 ReopenWithColumnFamilies({"default", "pikachu"}, options); dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1], true /* disallow trivial move */); ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); ASSERT_GT(NumTableFilesAtLevel(1, 1), 1); for (int i = 0; i < 80; i++) { ASSERT_EQ(Get(1, Key(i)), values[i]); } } TEST_F(DBCompactionTest, MinorCompactionsHappen) { do { Options options; options.write_buffer_size = 10000; options = CurrentOptions(options); CreateAndReopenWithCF({"pikachu"}, options); const int N = 500; int starting_num_tables = TotalTableFiles(1); for (int i = 0; i < N; i++) { ASSERT_OK(Put(1, Key(i), Key(i) + std::string(1000, 'v'))); } int ending_num_tables = TotalTableFiles(1); ASSERT_GT(ending_num_tables, starting_num_tables); for (int i = 0; i < N; i++) { ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(1, Key(i))); } ReopenWithColumnFamilies({"default", "pikachu"}, options); for (int i = 0; i < N; i++) { ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(1, Key(i))); } } while (ChangeCompactOptions()); } // Check that writes done during a memtable compaction are recovered // if the database is shutdown during the memtable compaction. TEST_F(DBCompactionTest, RecoverDuringMemtableCompaction) { do { Options options; options.env = env_; options.write_buffer_size = 1000000; options = CurrentOptions(options); CreateAndReopenWithCF({"pikachu"}, options); // Trigger a long memtable compaction and reopen the database during it ASSERT_OK(Put(1, "foo", "v1")); // Goes to 1st log file ASSERT_OK(Put(1, "big1", std::string(10000000, 'x'))); // Fills memtable ASSERT_OK(Put(1, "big2", std::string(1000, 'y'))); // Triggers compaction ASSERT_OK(Put(1, "bar", "v2")); // Goes to new log file ReopenWithColumnFamilies({"default", "pikachu"}, options); ASSERT_EQ("v1", Get(1, "foo")); ASSERT_EQ("v2", Get(1, "bar")); ASSERT_EQ(std::string(10000000, 'x'), Get(1, "big1")); ASSERT_EQ(std::string(1000, 'y'), Get(1, "big2")); } while (ChangeOptions()); } TEST_P(DBCompactionTestWithParam, TrivialMoveOneFile) { int32_t trivial_move = 0; rocksdb::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:TrivialMove", [&](void* arg) { trivial_move++; }); rocksdb::SyncPoint::GetInstance()->EnableProcessing(); Options options; options.write_buffer_size = 100000000; options.max_subcompactions = max_subcompactions_; options = CurrentOptions(options); DestroyAndReopen(options); int32_t num_keys = 80; int32_t value_size = 100 * 1024; // 100 KB Random rnd(301); std::vector values; for (int i = 0; i < num_keys; i++) { values.push_back(RandomString(&rnd, value_size)); ASSERT_OK(Put(Key(i), values[i])); } // Reopening moves updates to L0 Reopen(options); ASSERT_EQ(NumTableFilesAtLevel(0, 0), 1); // 1 file in L0 ASSERT_EQ(NumTableFilesAtLevel(1, 0), 0); // 0 files in L1 std::vector metadata; db_->GetLiveFilesMetaData(&metadata); ASSERT_EQ(metadata.size(), 1U); LiveFileMetaData level0_file = metadata[0]; // L0 file meta // Compaction will initiate a trivial move from L0 to L1 dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); // File moved From L0 to L1 ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0); // 0 files in L0 ASSERT_EQ(NumTableFilesAtLevel(1, 0), 1); // 1 file in L1 metadata.clear(); db_->GetLiveFilesMetaData(&metadata); ASSERT_EQ(metadata.size(), 1U); ASSERT_EQ(metadata[0].name /* level1_file.name */, level0_file.name); ASSERT_EQ(metadata[0].size /* level1_file.size */, level0_file.size); for (int i = 0; i < num_keys; i++) { ASSERT_EQ(Get(Key(i)), values[i]); } ASSERT_EQ(trivial_move, 1); rocksdb::SyncPoint::GetInstance()->DisableProcessing(); } TEST_P(DBCompactionTestWithParam, TrivialMoveNonOverlappingFiles) { int32_t trivial_move = 0; int32_t non_trivial_move = 0; rocksdb::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:TrivialMove", [&](void* arg) { trivial_move++; }); rocksdb::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:NonTrivial", [&](void* arg) { non_trivial_move++; }); rocksdb::SyncPoint::GetInstance()->EnableProcessing(); Options options = CurrentOptions(); options.disable_auto_compactions = true; options.write_buffer_size = 10 * 1024 * 1024; options.max_subcompactions = max_subcompactions_; DestroyAndReopen(options); // non overlapping ranges std::vector> ranges = { {100, 199}, {300, 399}, {0, 99}, {200, 299}, {600, 699}, {400, 499}, {500, 550}, {551, 599}, }; int32_t value_size = 10 * 1024; // 10 KB Random rnd(301); std::map values; for (uint32_t i = 0; i < ranges.size(); i++) { for (int32_t j = ranges[i].first; j <= ranges[i].second; j++) { values[j] = RandomString(&rnd, value_size); ASSERT_OK(Put(Key(j), values[j])); } ASSERT_OK(Flush()); } int32_t level0_files = NumTableFilesAtLevel(0, 0); ASSERT_EQ(level0_files, ranges.size()); // Multiple files in L0 ASSERT_EQ(NumTableFilesAtLevel(1, 0), 0); // No files in L1 // Since data is non-overlapping we expect compaction to initiate // a trivial move db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); // We expect that all the files were trivially moved from L0 to L1 ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0); ASSERT_EQ(NumTableFilesAtLevel(1, 0) /* level1_files */, level0_files); for (uint32_t i = 0; i < ranges.size(); i++) { for (int32_t j = ranges[i].first; j <= ranges[i].second; j++) { ASSERT_EQ(Get(Key(j)), values[j]); } } ASSERT_EQ(trivial_move, 1); ASSERT_EQ(non_trivial_move, 0); trivial_move = 0; non_trivial_move = 0; values.clear(); DestroyAndReopen(options); // Same ranges as above but overlapping ranges = { {100, 199}, {300, 399}, {0, 99}, {200, 299}, {600, 699}, {400, 499}, {500, 560}, // this range overlap with the next one {551, 599}, }; for (uint32_t i = 0; i < ranges.size(); i++) { for (int32_t j = ranges[i].first; j <= ranges[i].second; j++) { values[j] = RandomString(&rnd, value_size); ASSERT_OK(Put(Key(j), values[j])); } ASSERT_OK(Flush()); } db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); for (uint32_t i = 0; i < ranges.size(); i++) { for (int32_t j = ranges[i].first; j <= ranges[i].second; j++) { ASSERT_EQ(Get(Key(j)), values[j]); } } ASSERT_EQ(trivial_move, 0); ASSERT_EQ(non_trivial_move, 1); rocksdb::SyncPoint::GetInstance()->DisableProcessing(); } TEST_P(DBCompactionTestWithParam, TrivialMoveTargetLevel) { int32_t trivial_move = 0; int32_t non_trivial_move = 0; rocksdb::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:TrivialMove", [&](void* arg) { trivial_move++; }); rocksdb::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:NonTrivial", [&](void* arg) { non_trivial_move++; }); rocksdb::SyncPoint::GetInstance()->EnableProcessing(); Options options = CurrentOptions(); options.disable_auto_compactions = true; options.write_buffer_size = 10 * 1024 * 1024; options.num_levels = 7; options.max_subcompactions = max_subcompactions_; DestroyAndReopen(options); int32_t value_size = 10 * 1024; // 10 KB // Add 2 non-overlapping files Random rnd(301); std::map values; // file 1 [0 => 300] for (int32_t i = 0; i <= 300; i++) { values[i] = RandomString(&rnd, value_size); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); // file 2 [600 => 700] for (int32_t i = 600; i <= 700; i++) { values[i] = RandomString(&rnd, value_size); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); // 2 files in L0 ASSERT_EQ("2", FilesPerLevel(0)); CompactRangeOptions compact_options; compact_options.change_level = true; compact_options.target_level = 6; ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); // 2 files in L6 ASSERT_EQ("0,0,0,0,0,0,2", FilesPerLevel(0)); ASSERT_EQ(trivial_move, 1); ASSERT_EQ(non_trivial_move, 0); for (int32_t i = 0; i <= 300; i++) { ASSERT_EQ(Get(Key(i)), values[i]); } for (int32_t i = 600; i <= 700; i++) { ASSERT_EQ(Get(Key(i)), values[i]); } } TEST_P(DBCompactionTestWithParam, TrivialMoveToLastLevelWithFiles) { int32_t trivial_move = 0; int32_t non_trivial_move = 0; rocksdb::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:TrivialMove", [&](void* arg) { trivial_move++; }); rocksdb::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:NonTrivial", [&](void* arg) { non_trivial_move++; }); rocksdb::SyncPoint::GetInstance()->EnableProcessing(); Options options; options.write_buffer_size = 100000000; options.max_subcompactions = max_subcompactions_; options = CurrentOptions(options); DestroyAndReopen(options); int32_t value_size = 10 * 1024; // 10 KB Random rnd(301); std::vector values; // File with keys [ 0 => 99 ] for (int i = 0; i < 100; i++) { values.push_back(RandomString(&rnd, value_size)); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); ASSERT_EQ("1", FilesPerLevel(0)); // Compaction will do L0=>L1 (trivial move) then move L1 files to L3 CompactRangeOptions compact_options; compact_options.change_level = true; compact_options.target_level = 3; ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); ASSERT_EQ("0,0,0,1", FilesPerLevel(0)); ASSERT_EQ(trivial_move, 1); ASSERT_EQ(non_trivial_move, 0); // File with keys [ 100 => 199 ] for (int i = 100; i < 200; i++) { values.push_back(RandomString(&rnd, value_size)); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); ASSERT_EQ("1,0,0,1", FilesPerLevel(0)); // Compaction will do L0=>L1 L1=>L2 L2=>L3 (3 trivial moves) ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_EQ("0,0,0,2", FilesPerLevel(0)); ASSERT_EQ(trivial_move, 4); ASSERT_EQ(non_trivial_move, 0); for (int i = 0; i < 200; i++) { ASSERT_EQ(Get(Key(i)), values[i]); } rocksdb::SyncPoint::GetInstance()->DisableProcessing(); } TEST_P(DBCompactionTestWithParam, LevelCompactionThirdPath) { Options options = CurrentOptions(); options.db_paths.emplace_back(dbname_, 500 * 1024); options.db_paths.emplace_back(dbname_ + "_2", 4 * 1024 * 1024); options.db_paths.emplace_back(dbname_ + "_3", 1024 * 1024 * 1024); options.compaction_style = kCompactionStyleLevel; options.write_buffer_size = 110 << 10; // 110KB options.arena_block_size = 4 << 10; options.level0_file_num_compaction_trigger = 2; options.num_levels = 4; options.max_bytes_for_level_base = 400 * 1024; options.max_subcompactions = max_subcompactions_; // options = CurrentOptions(options); std::vector filenames; env_->GetChildren(options.db_paths[1].path, &filenames); // Delete archival files. for (size_t i = 0; i < filenames.size(); ++i) { env_->DeleteFile(options.db_paths[1].path + "/" + filenames[i]); } env_->DeleteDir(options.db_paths[1].path); Reopen(options); Random rnd(301); int key_idx = 0; // First three 110KB files are not going to second path. // After that, (100K, 200K) for (int num = 0; num < 3; num++) { GenerateNewFile(&rnd, &key_idx); } // Another 110KB triggers a compaction to 400K file to fill up first path GenerateNewFile(&rnd, &key_idx); ASSERT_EQ(3, GetSstFileCount(options.db_paths[1].path)); // (1, 4) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4", FilesPerLevel(0)); ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); // (1, 4, 1) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4,1", FilesPerLevel(0)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); // (1, 4, 2) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4,2", FilesPerLevel(0)); ASSERT_EQ(2, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); // (1, 4, 3) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4,3", FilesPerLevel(0)); ASSERT_EQ(3, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); // (1, 4, 4) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4,4", FilesPerLevel(0)); ASSERT_EQ(4, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); // (1, 4, 5) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4,5", FilesPerLevel(0)); ASSERT_EQ(5, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); // (1, 4, 6) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4,6", FilesPerLevel(0)); ASSERT_EQ(6, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); // (1, 4, 7) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4,7", FilesPerLevel(0)); ASSERT_EQ(7, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); // (1, 4, 8) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4,8", FilesPerLevel(0)); ASSERT_EQ(8, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); for (int i = 0; i < key_idx; i++) { auto v = Get(Key(i)); ASSERT_NE(v, "NOT_FOUND"); ASSERT_TRUE(v.size() == 1 || v.size() == 990); } Reopen(options); for (int i = 0; i < key_idx; i++) { auto v = Get(Key(i)); ASSERT_NE(v, "NOT_FOUND"); ASSERT_TRUE(v.size() == 1 || v.size() == 990); } Destroy(options); } TEST_P(DBCompactionTestWithParam, LevelCompactionPathUse) { Options options = CurrentOptions(); options.db_paths.emplace_back(dbname_, 500 * 1024); options.db_paths.emplace_back(dbname_ + "_2", 4 * 1024 * 1024); options.db_paths.emplace_back(dbname_ + "_3", 1024 * 1024 * 1024); options.compaction_style = kCompactionStyleLevel; options.write_buffer_size = 110 << 10; // 110KB options.arena_block_size = 4 << 10; options.level0_file_num_compaction_trigger = 2; options.num_levels = 4; options.max_bytes_for_level_base = 400 * 1024; options.max_subcompactions = max_subcompactions_; // options = CurrentOptions(options); std::vector filenames; env_->GetChildren(options.db_paths[1].path, &filenames); // Delete archival files. for (size_t i = 0; i < filenames.size(); ++i) { env_->DeleteFile(options.db_paths[1].path + "/" + filenames[i]); } env_->DeleteDir(options.db_paths[1].path); Reopen(options); Random rnd(301); int key_idx = 0; // Always gets compacted into 1 Level1 file, // 0/1 Level 0 file for (int num = 0; num < 3; num++) { key_idx = 0; GenerateNewFile(&rnd, &key_idx); } key_idx = 0; GenerateNewFile(&rnd, &key_idx); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); key_idx = 0; GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,1", FilesPerLevel(0)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); key_idx = 0; GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("0,1", FilesPerLevel(0)); ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(0, GetSstFileCount(dbname_)); key_idx = 0; GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,1", FilesPerLevel(0)); ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); key_idx = 0; GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("0,1", FilesPerLevel(0)); ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(0, GetSstFileCount(dbname_)); key_idx = 0; GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,1", FilesPerLevel(0)); ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); key_idx = 0; GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("0,1", FilesPerLevel(0)); ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(0, GetSstFileCount(dbname_)); key_idx = 0; GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,1", FilesPerLevel(0)); ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); key_idx = 0; GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("0,1", FilesPerLevel(0)); ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(0, GetSstFileCount(dbname_)); key_idx = 0; GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,1", FilesPerLevel(0)); ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); for (int i = 0; i < key_idx; i++) { auto v = Get(Key(i)); ASSERT_NE(v, "NOT_FOUND"); ASSERT_TRUE(v.size() == 1 || v.size() == 990); } Reopen(options); for (int i = 0; i < key_idx; i++) { auto v = Get(Key(i)); ASSERT_NE(v, "NOT_FOUND"); ASSERT_TRUE(v.size() == 1 || v.size() == 990); } Destroy(options); } TEST_P(DBCompactionTestWithParam, ConvertCompactionStyle) { Random rnd(301); int max_key_level_insert = 200; int max_key_universal_insert = 600; // Stage 1: generate a db with level compaction Options options; options.write_buffer_size = 110 << 10; // 110KB options.arena_block_size = 4 << 10; options.num_levels = 4; options.level0_file_num_compaction_trigger = 3; options.max_bytes_for_level_base = 500 << 10; // 500KB options.max_bytes_for_level_multiplier = 1; options.target_file_size_base = 200 << 10; // 200KB options.target_file_size_multiplier = 1; options.max_subcompactions = max_subcompactions_; options = CurrentOptions(options); CreateAndReopenWithCF({"pikachu"}, options); for (int i = 0; i <= max_key_level_insert; i++) { // each value is 10K ASSERT_OK(Put(1, Key(i), RandomString(&rnd, 10000))); } ASSERT_OK(Flush(1)); dbfull()->TEST_WaitForCompact(); ASSERT_GT(TotalTableFiles(1, 4), 1); int non_level0_num_files = 0; for (int i = 1; i < options.num_levels; i++) { non_level0_num_files += NumTableFilesAtLevel(i, 1); } ASSERT_GT(non_level0_num_files, 0); // Stage 2: reopen with universal compaction - should fail options = CurrentOptions(); options.compaction_style = kCompactionStyleUniversal; options.num_levels = 1; options = CurrentOptions(options); Status s = TryReopenWithColumnFamilies({"default", "pikachu"}, options); ASSERT_TRUE(s.IsInvalidArgument()); // Stage 3: compact into a single file and move the file to level 0 options = CurrentOptions(); options.disable_auto_compactions = true; options.target_file_size_base = INT_MAX; options.target_file_size_multiplier = 1; options.max_bytes_for_level_base = INT_MAX; options.max_bytes_for_level_multiplier = 1; options.num_levels = 4; options = CurrentOptions(options); ReopenWithColumnFamilies({"default", "pikachu"}, options); CompactRangeOptions compact_options; compact_options.change_level = true; compact_options.target_level = 0; compact_options.bottommost_level_compaction = BottommostLevelCompaction::kForce; dbfull()->CompactRange(compact_options, handles_[1], nullptr, nullptr); // Only 1 file in L0 ASSERT_EQ("1", FilesPerLevel(1)); // Stage 4: re-open in universal compaction style and do some db operations options = CurrentOptions(); options.compaction_style = kCompactionStyleUniversal; options.num_levels = 4; options.write_buffer_size = 110 << 10; // 110KB options.arena_block_size = 4 << 10; options.level0_file_num_compaction_trigger = 3; options = CurrentOptions(options); ReopenWithColumnFamilies({"default", "pikachu"}, options); options.num_levels = 1; ReopenWithColumnFamilies({"default", "pikachu"}, options); for (int i = max_key_level_insert / 2; i <= max_key_universal_insert; i++) { ASSERT_OK(Put(1, Key(i), RandomString(&rnd, 10000))); } dbfull()->Flush(FlushOptions()); ASSERT_OK(Flush(1)); dbfull()->TEST_WaitForCompact(); for (int i = 1; i < options.num_levels; i++) { ASSERT_EQ(NumTableFilesAtLevel(i, 1), 0); } // verify keys inserted in both level compaction style and universal // compaction style std::string keys_in_db; Iterator* iter = dbfull()->NewIterator(ReadOptions(), handles_[1]); for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { keys_in_db.append(iter->key().ToString()); keys_in_db.push_back(','); } delete iter; std::string expected_keys; for (int i = 0; i <= max_key_universal_insert; i++) { expected_keys.append(Key(i)); expected_keys.push_back(','); } ASSERT_EQ(keys_in_db, expected_keys); } TEST_F(DBCompactionTest, L0_CompactionBug_Issue44_a) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); ASSERT_OK(Put(1, "b", "v")); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); ASSERT_OK(Delete(1, "b")); ASSERT_OK(Delete(1, "a")); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); ASSERT_OK(Delete(1, "a")); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); ASSERT_OK(Put(1, "a", "v")); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); ASSERT_EQ("(a->v)", Contents(1)); env_->SleepForMicroseconds(1000000); // Wait for compaction to finish ASSERT_EQ("(a->v)", Contents(1)); } while (ChangeCompactOptions()); } TEST_F(DBCompactionTest, L0_CompactionBug_Issue44_b) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); Put(1, "", ""); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); Delete(1, "e"); Put(1, "", ""); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); Put(1, "c", "cv"); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); Put(1, "", ""); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); Put(1, "", ""); env_->SleepForMicroseconds(1000000); // Wait for compaction to finish ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); Put(1, "d", "dv"); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); Put(1, "", ""); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); Delete(1, "d"); Delete(1, "b"); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); ASSERT_EQ("(->)(c->cv)", Contents(1)); env_->SleepForMicroseconds(1000000); // Wait for compaction to finish ASSERT_EQ("(->)(c->cv)", Contents(1)); } while (ChangeCompactOptions()); } TEST_P(DBCompactionTestWithParam, ManualCompaction) { Options options = CurrentOptions(); options.max_subcompactions = max_subcompactions_; options.statistics = rocksdb::CreateDBStatistics(); CreateAndReopenWithCF({"pikachu"}, options); // iter - 0 with 7 levels // iter - 1 with 3 levels for (int iter = 0; iter < 2; ++iter) { MakeTables(3, "p", "q", 1); ASSERT_EQ("1,1,1", FilesPerLevel(1)); // Compaction range falls before files Compact(1, "", "c"); ASSERT_EQ("1,1,1", FilesPerLevel(1)); // Compaction range falls after files Compact(1, "r", "z"); ASSERT_EQ("1,1,1", FilesPerLevel(1)); // Compaction range overlaps files Compact(1, "p1", "p9"); ASSERT_EQ("0,0,1", FilesPerLevel(1)); // Populate a different range MakeTables(3, "c", "e", 1); ASSERT_EQ("1,1,2", FilesPerLevel(1)); // Compact just the new range Compact(1, "b", "f"); ASSERT_EQ("0,0,2", FilesPerLevel(1)); // Compact all MakeTables(1, "a", "z", 1); ASSERT_EQ("1,0,2", FilesPerLevel(1)); uint64_t prev_block_cache_add = options.statistics->getTickerCount(BLOCK_CACHE_ADD); db_->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr); // Verify manual compaction doesn't fill block cache ASSERT_EQ(prev_block_cache_add, options.statistics->getTickerCount(BLOCK_CACHE_ADD)); ASSERT_EQ("0,0,1", FilesPerLevel(1)); if (iter == 0) { options = CurrentOptions(); options.max_background_flushes = 0; options.num_levels = 3; options.create_if_missing = true; options.statistics = rocksdb::CreateDBStatistics(); DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); } } } TEST_P(DBCompactionTestWithParam, ManualLevelCompactionOutputPathId) { Options options = CurrentOptions(); options.db_paths.emplace_back(dbname_ + "_2", 2 * 10485760); options.db_paths.emplace_back(dbname_ + "_3", 100 * 10485760); options.db_paths.emplace_back(dbname_ + "_4", 120 * 10485760); options.max_subcompactions = max_subcompactions_; CreateAndReopenWithCF({"pikachu"}, options); // iter - 0 with 7 levels // iter - 1 with 3 levels for (int iter = 0; iter < 2; ++iter) { for (int i = 0; i < 3; ++i) { ASSERT_OK(Put(1, "p", "begin")); ASSERT_OK(Put(1, "q", "end")); ASSERT_OK(Flush(1)); } ASSERT_EQ("3", FilesPerLevel(1)); ASSERT_EQ(3, GetSstFileCount(options.db_paths[0].path)); ASSERT_EQ(0, GetSstFileCount(dbname_)); // Compaction range falls before files Compact(1, "", "c"); ASSERT_EQ("3", FilesPerLevel(1)); // Compaction range falls after files Compact(1, "r", "z"); ASSERT_EQ("3", FilesPerLevel(1)); // Compaction range overlaps files Compact(1, "p1", "p9", 1); ASSERT_EQ("0,1", FilesPerLevel(1)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(0, GetSstFileCount(options.db_paths[0].path)); ASSERT_EQ(0, GetSstFileCount(dbname_)); // Populate a different range for (int i = 0; i < 3; ++i) { ASSERT_OK(Put(1, "c", "begin")); ASSERT_OK(Put(1, "e", "end")); ASSERT_OK(Flush(1)); } ASSERT_EQ("3,1", FilesPerLevel(1)); // Compact just the new range Compact(1, "b", "f", 1); ASSERT_EQ("0,2", FilesPerLevel(1)); ASSERT_EQ(2, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(0, GetSstFileCount(options.db_paths[0].path)); ASSERT_EQ(0, GetSstFileCount(dbname_)); // Compact all ASSERT_OK(Put(1, "a", "begin")); ASSERT_OK(Put(1, "z", "end")); ASSERT_OK(Flush(1)); ASSERT_EQ("1,2", FilesPerLevel(1)); ASSERT_EQ(2, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[0].path)); CompactRangeOptions compact_options; compact_options.target_path_id = 1; db_->CompactRange(compact_options, handles_[1], nullptr, nullptr); ASSERT_EQ("0,1", FilesPerLevel(1)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(0, GetSstFileCount(options.db_paths[0].path)); ASSERT_EQ(0, GetSstFileCount(dbname_)); if (iter == 0) { DestroyAndReopen(options); options = CurrentOptions(); options.db_paths.emplace_back(dbname_ + "_2", 2 * 10485760); options.db_paths.emplace_back(dbname_ + "_3", 100 * 10485760); options.db_paths.emplace_back(dbname_ + "_4", 120 * 10485760); options.max_background_flushes = 1; options.num_levels = 3; options.create_if_missing = true; CreateAndReopenWithCF({"pikachu"}, options); } } } TEST_F(DBCompactionTest, FilesDeletedAfterCompaction) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); ASSERT_OK(Put(1, "foo", "v2")); Compact(1, "a", "z"); const size_t num_files = CountLiveFiles(); for (int i = 0; i < 10; i++) { ASSERT_OK(Put(1, "foo", "v2")); Compact(1, "a", "z"); } ASSERT_EQ(CountLiveFiles(), num_files); } while (ChangeCompactOptions()); } // Check level comapction with compact files TEST_P(DBCompactionTestWithParam, DISABLED_CompactFilesOnLevelCompaction) { const int kTestKeySize = 16; const int kTestValueSize = 984; const int kEntrySize = kTestKeySize + kTestValueSize; const int kEntriesPerBuffer = 100; Options options; options.create_if_missing = true; options.write_buffer_size = kEntrySize * kEntriesPerBuffer; options.compaction_style = kCompactionStyleLevel; options.target_file_size_base = options.write_buffer_size; options.max_bytes_for_level_base = options.target_file_size_base * 2; options.level0_stop_writes_trigger = 2; options.max_bytes_for_level_multiplier = 2; options.compression = kNoCompression; options.max_subcompactions = max_subcompactions_; options = CurrentOptions(options); CreateAndReopenWithCF({"pikachu"}, options); Random rnd(301); for (int key = 64 * kEntriesPerBuffer; key >= 0; --key) { ASSERT_OK(Put(1, ToString(key), RandomString(&rnd, kTestValueSize))); } dbfull()->TEST_WaitForFlushMemTable(handles_[1]); dbfull()->TEST_WaitForCompact(); ColumnFamilyMetaData cf_meta; dbfull()->GetColumnFamilyMetaData(handles_[1], &cf_meta); int output_level = static_cast(cf_meta.levels.size()) - 1; for (int file_picked = 5; file_picked > 0; --file_picked) { std::set overlapping_file_names; std::vector compaction_input_file_names; for (int f = 0; f < file_picked; ++f) { int level; auto file_meta = PickFileRandomly(cf_meta, &rnd, &level); compaction_input_file_names.push_back(file_meta->name); GetOverlappingFileNumbersForLevelCompaction( cf_meta, options.comparator, level, output_level, file_meta, &overlapping_file_names); } ASSERT_OK(dbfull()->CompactFiles( CompactionOptions(), handles_[1], compaction_input_file_names, output_level)); // Make sure all overlapping files do not exist after compaction dbfull()->GetColumnFamilyMetaData(handles_[1], &cf_meta); VerifyCompactionResult(cf_meta, overlapping_file_names); } // make sure all key-values are still there. for (int key = 64 * kEntriesPerBuffer; key >= 0; --key) { ASSERT_NE(Get(1, ToString(key)), "NOT_FOUND"); } } TEST_P(DBCompactionTestWithParam, PartialCompactionFailure) { Options options; const int kKeySize = 16; const int kKvSize = 1000; const int kKeysPerBuffer = 100; const int kNumL1Files = 5; options.create_if_missing = true; options.write_buffer_size = kKeysPerBuffer * kKvSize; options.max_write_buffer_number = 2; options.target_file_size_base = options.write_buffer_size * (options.max_write_buffer_number - 1); options.level0_file_num_compaction_trigger = kNumL1Files; options.max_bytes_for_level_base = options.level0_file_num_compaction_trigger * options.target_file_size_base; options.max_bytes_for_level_multiplier = 2; options.compression = kNoCompression; options.max_subcompactions = max_subcompactions_; env_->SetBackgroundThreads(1, Env::HIGH); env_->SetBackgroundThreads(1, Env::LOW); // stop the compaction thread until we simulate the file creation failure. test::SleepingBackgroundTask sleeping_task_low; env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, Env::Priority::LOW); options.env = env_; DestroyAndReopen(options); const int kNumInsertedKeys = options.level0_file_num_compaction_trigger * (options.max_write_buffer_number - 1) * kKeysPerBuffer; Random rnd(301); std::vector keys; std::vector values; for (int k = 0; k < kNumInsertedKeys; ++k) { keys.emplace_back(RandomString(&rnd, kKeySize)); values.emplace_back(RandomString(&rnd, kKvSize - kKeySize)); ASSERT_OK(Put(Slice(keys[k]), Slice(values[k]))); dbfull()->TEST_WaitForFlushMemTable(); } dbfull()->TEST_FlushMemTable(true); // Make sure the number of L0 files can trigger compaction. ASSERT_GE(NumTableFilesAtLevel(0), options.level0_file_num_compaction_trigger); auto previous_num_level0_files = NumTableFilesAtLevel(0); // Fail the first file creation. env_->non_writable_count_ = 1; sleeping_task_low.WakeUp(); sleeping_task_low.WaitUntilDone(); // Expect compaction to fail here as one file will fail its // creation. ASSERT_TRUE(!dbfull()->TEST_WaitForCompact().ok()); // Verify L0 -> L1 compaction does fail. ASSERT_EQ(NumTableFilesAtLevel(1), 0); // Verify all L0 files are still there. ASSERT_EQ(NumTableFilesAtLevel(0), previous_num_level0_files); // All key-values must exist after compaction fails. for (int k = 0; k < kNumInsertedKeys; ++k) { ASSERT_EQ(values[k], Get(keys[k])); } env_->non_writable_count_ = 0; // Make sure RocksDB will not get into corrupted state. Reopen(options); // Verify again after reopen. for (int k = 0; k < kNumInsertedKeys; ++k) { ASSERT_EQ(values[k], Get(keys[k])); } } TEST_P(DBCompactionTestWithParam, DeleteMovedFileAfterCompaction) { // iter 1 -- delete_obsolete_files_period_micros == 0 for (int iter = 0; iter < 2; ++iter) { // This test triggers move compaction and verifies that the file is not // deleted when it's part of move compaction Options options = CurrentOptions(); options.env = env_; if (iter == 1) { options.delete_obsolete_files_period_micros = 0; } options.create_if_missing = true; options.level0_file_num_compaction_trigger = 2; // trigger compaction when we have 2 files OnFileDeletionListener* listener = new OnFileDeletionListener(); options.listeners.emplace_back(listener); options.max_subcompactions = max_subcompactions_; DestroyAndReopen(options); Random rnd(301); // Create two 1MB sst files for (int i = 0; i < 2; ++i) { // Create 1MB sst file for (int j = 0; j < 100; ++j) { ASSERT_OK(Put(Key(i * 50 + j), RandomString(&rnd, 10 * 1024))); } ASSERT_OK(Flush()); } // this should execute L0->L1 dbfull()->TEST_WaitForCompact(); ASSERT_EQ("0,1", FilesPerLevel(0)); // block compactions test::SleepingBackgroundTask sleeping_task; env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task, Env::Priority::LOW); options.max_bytes_for_level_base = 1024 * 1024; // 1 MB Reopen(options); std::unique_ptr iterator(db_->NewIterator(ReadOptions())); ASSERT_EQ("0,1", FilesPerLevel(0)); // let compactions go sleeping_task.WakeUp(); sleeping_task.WaitUntilDone(); // this should execute L1->L2 (move) dbfull()->TEST_WaitForCompact(); ASSERT_EQ("0,0,1", FilesPerLevel(0)); std::vector metadata; db_->GetLiveFilesMetaData(&metadata); ASSERT_EQ(metadata.size(), 1U); auto moved_file_name = metadata[0].name; // Create two more 1MB sst files for (int i = 0; i < 2; ++i) { // Create 1MB sst file for (int j = 0; j < 100; ++j) { ASSERT_OK(Put(Key(i * 50 + j + 100), RandomString(&rnd, 10 * 1024))); } ASSERT_OK(Flush()); } // this should execute both L0->L1 and L1->L2 (merge with previous file) dbfull()->TEST_WaitForCompact(); ASSERT_EQ("0,0,2", FilesPerLevel(0)); // iterator is holding the file ASSERT_OK(env_->FileExists(dbname_ + moved_file_name)); listener->SetExpectedFileName(dbname_ + moved_file_name); iterator.reset(); // this file should have been compacted away ASSERT_NOK(env_->FileExists(dbname_ + moved_file_name)); listener->VerifyMatchedCount(1); } } TEST_P(DBCompactionTestWithParam, CompressLevelCompaction) { if (!Zlib_Supported()) { return; } Options options = CurrentOptions(); options.compaction_style = kCompactionStyleLevel; options.write_buffer_size = 110 << 10; // 110KB options.arena_block_size = 4 << 10; options.level0_file_num_compaction_trigger = 2; options.num_levels = 4; options.max_bytes_for_level_base = 400 * 1024; options.max_subcompactions = max_subcompactions_; // First two levels have no compression, so that a trivial move between // them will be allowed. Level 2 has Zlib compression so that a trivial // move to level 3 will not be allowed options.compression_per_level = {kNoCompression, kNoCompression, kZlibCompression}; int matches = 0, didnt_match = 0, trivial_move = 0, non_trivial = 0; rocksdb::SyncPoint::GetInstance()->SetCallBack( "Compaction::InputCompressionMatchesOutput:Matches", [&](void* arg) { matches++; }); rocksdb::SyncPoint::GetInstance()->SetCallBack( "Compaction::InputCompressionMatchesOutput:DidntMatch", [&](void* arg) { didnt_match++; }); rocksdb::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:NonTrivial", [&](void* arg) { non_trivial++; }); rocksdb::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:TrivialMove", [&](void* arg) { trivial_move++; }); rocksdb::SyncPoint::GetInstance()->EnableProcessing(); Reopen(options); Random rnd(301); int key_idx = 0; // First three 110KB files are going to level 0 // After that, (100K, 200K) for (int num = 0; num < 3; num++) { GenerateNewFile(&rnd, &key_idx); } // Another 110KB triggers a compaction to 400K file to fill up level 0 GenerateNewFile(&rnd, &key_idx); ASSERT_EQ(4, GetSstFileCount(dbname_)); // (1, 4) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4", FilesPerLevel(0)); // (1, 4, 1) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4,1", FilesPerLevel(0)); // (1, 4, 2) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4,2", FilesPerLevel(0)); // (1, 4, 3) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4,3", FilesPerLevel(0)); // (1, 4, 4) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4,4", FilesPerLevel(0)); // (1, 4, 5) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4,5", FilesPerLevel(0)); // (1, 4, 6) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4,6", FilesPerLevel(0)); // (1, 4, 7) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4,7", FilesPerLevel(0)); // (1, 4, 8) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4,8", FilesPerLevel(0)); ASSERT_EQ(matches, 12); // Currently, the test relies on the number of calls to // InputCompressionMatchesOutput() per compaction. const int kCallsToInputCompressionMatch = 2; ASSERT_EQ(didnt_match, 8 * kCallsToInputCompressionMatch); ASSERT_EQ(trivial_move, 12); ASSERT_EQ(non_trivial, 8); rocksdb::SyncPoint::GetInstance()->DisableProcessing(); for (int i = 0; i < key_idx; i++) { auto v = Get(Key(i)); ASSERT_NE(v, "NOT_FOUND"); ASSERT_TRUE(v.size() == 1 || v.size() == 990); } Reopen(options); for (int i = 0; i < key_idx; i++) { auto v = Get(Key(i)); ASSERT_NE(v, "NOT_FOUND"); ASSERT_TRUE(v.size() == 1 || v.size() == 990); } Destroy(options); } // This tests for a bug that could cause two level0 compactions running // concurrently // TODO(aekmekji): Make sure that the reason this fails when run with // max_subcompactions > 1 is not a correctness issue but just inherent to // running parallel L0-L1 compactions TEST_F(DBCompactionTest, SuggestCompactRangeNoTwoLevel0Compactions) { Options options = CurrentOptions(); options.compaction_style = kCompactionStyleLevel; options.write_buffer_size = 110 << 10; options.arena_block_size = 4 << 10; options.level0_file_num_compaction_trigger = 4; options.num_levels = 4; options.compression = kNoCompression; options.max_bytes_for_level_base = 450 << 10; options.target_file_size_base = 98 << 10; options.max_write_buffer_number = 2; options.max_background_compactions = 2; DestroyAndReopen(options); // fill up the DB Random rnd(301); for (int num = 0; num < 10; num++) { GenerateNewRandomFile(&rnd); } db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); rocksdb::SyncPoint::GetInstance()->LoadDependency( {{"CompactionJob::Run():Start", "DBCompactionTest::SuggestCompactRangeNoTwoLevel0Compactions:1"}, {"DBCompactionTest::SuggestCompactRangeNoTwoLevel0Compactions:2", "CompactionJob::Run():End"}}); rocksdb::SyncPoint::GetInstance()->EnableProcessing(); // trigger L0 compaction for (int num = 0; num < options.level0_file_num_compaction_trigger + 1; num++) { GenerateNewRandomFile(&rnd, /* nowait */ true); ASSERT_OK(Flush()); } TEST_SYNC_POINT( "DBCompactionTest::SuggestCompactRangeNoTwoLevel0Compactions:1"); GenerateNewRandomFile(&rnd, /* nowait */ true); dbfull()->TEST_WaitForFlushMemTable(); ASSERT_OK(experimental::SuggestCompactRange(db_, nullptr, nullptr)); for (int num = 0; num < options.level0_file_num_compaction_trigger + 1; num++) { GenerateNewRandomFile(&rnd, /* nowait */ true); ASSERT_OK(Flush()); } TEST_SYNC_POINT( "DBCompactionTest::SuggestCompactRangeNoTwoLevel0Compactions:2"); dbfull()->TEST_WaitForCompact(); } TEST_P(DBCompactionTestWithParam, ForceBottommostLevelCompaction) { int32_t trivial_move = 0; int32_t non_trivial_move = 0; rocksdb::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:TrivialMove", [&](void* arg) { trivial_move++; }); rocksdb::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:NonTrivial", [&](void* arg) { non_trivial_move++; }); rocksdb::SyncPoint::GetInstance()->EnableProcessing(); Options options; options.write_buffer_size = 100000000; options.max_subcompactions = max_subcompactions_; options = CurrentOptions(options); DestroyAndReopen(options); int32_t value_size = 10 * 1024; // 10 KB Random rnd(301); std::vector values; // File with keys [ 0 => 99 ] for (int i = 0; i < 100; i++) { values.push_back(RandomString(&rnd, value_size)); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); ASSERT_EQ("1", FilesPerLevel(0)); // Compaction will do L0=>L1 (trivial move) then move L1 files to L3 CompactRangeOptions compact_options; compact_options.change_level = true; compact_options.target_level = 3; ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); ASSERT_EQ("0,0,0,1", FilesPerLevel(0)); ASSERT_EQ(trivial_move, 1); ASSERT_EQ(non_trivial_move, 0); // File with keys [ 100 => 199 ] for (int i = 100; i < 200; i++) { values.push_back(RandomString(&rnd, value_size)); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); ASSERT_EQ("1,0,0,1", FilesPerLevel(0)); // Compaction will do L0=>L1 L1=>L2 L2=>L3 (3 trivial moves) // then compacte the bottommost level L3=>L3 (non trivial move) compact_options = CompactRangeOptions(); compact_options.bottommost_level_compaction = BottommostLevelCompaction::kForce; ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); ASSERT_EQ("0,0,0,1", FilesPerLevel(0)); ASSERT_EQ(trivial_move, 4); ASSERT_EQ(non_trivial_move, 1); // File with keys [ 200 => 299 ] for (int i = 200; i < 300; i++) { values.push_back(RandomString(&rnd, value_size)); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); ASSERT_EQ("1,0,0,1", FilesPerLevel(0)); trivial_move = 0; non_trivial_move = 0; compact_options = CompactRangeOptions(); compact_options.bottommost_level_compaction = BottommostLevelCompaction::kSkip; // Compaction will do L0=>L1 L1=>L2 L2=>L3 (3 trivial moves) // and will skip bottommost level compaction ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); ASSERT_EQ("0,0,0,2", FilesPerLevel(0)); ASSERT_EQ(trivial_move, 3); ASSERT_EQ(non_trivial_move, 0); for (int i = 0; i < 300; i++) { ASSERT_EQ(Get(Key(i)), values[i]); } rocksdb::SyncPoint::GetInstance()->DisableProcessing(); } INSTANTIATE_TEST_CASE_P(DBCompactionTestWithParam, DBCompactionTestWithParam, ::testing::Values(1, 4)); #endif // !defined(ROCKSDB_LITE) } // namespace rocksdb int main(int argc, char** argv) { #if !defined(ROCKSDB_LITE) rocksdb::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); #else return 0; #endif }