From 1b59a490ef8d8da78c826b379167207dfa682b4c Mon Sep 17 00:00:00 2001 From: Sagar Vemuri Date: Thu, 30 May 2019 16:07:57 -0700 Subject: [PATCH] Fix flaky DBTest2.PresetCompressionDict test (#5378) Summary: Fix flaky DBTest2.PresetCompressionDict test. This PR fixes two issues with the test: 1. Replaces `GetSstFiles` with `TotalSize`, which is based on `DB::GetColumnFamilyMetaData` so that only the size of the live SST files is taken into consideration when computing the total size of all sst files. Earlier, with `GetSstFiles`, even obsolete files were getting picked up. 1. In ZSTD compression, it is sometimes possible that using a trained dictionary is not better than using an untrained one. Using a trained dictionary performs well in 99% of the cases, but still in the remaining ~1% of the cases (out of 10000 runs) using an untrained dictionary gets better compression results. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5378 Differential Revision: D15559100 Pulled By: sagar0 fbshipit-source-id: c35adbf13871f520a2cec48f8bad9ff27ff7a0b4 --- db/db_test2.cc | 58 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 20 deletions(-) diff --git a/db/db_test2.cc b/db/db_test2.cc index d93beb447..109a7a377 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -1036,8 +1036,7 @@ TEST_F(DBTest2, WalFilterTestWithColumnFamilies) { ASSERT_TRUE(index == keys_cf.size()); } -// Temporarily disable it because the test is flaky. -TEST_F(DBTest2, DISABLED_PresetCompressionDict) { +TEST_F(DBTest2, PresetCompressionDict) { // Verifies that compression ratio improves when dictionary is enabled, and // improves even further when the dictionary is trained by ZSTD. const size_t kBlockSizeBytes = 4 << 10; @@ -1046,7 +1045,8 @@ TEST_F(DBTest2, DISABLED_PresetCompressionDict) { const int kNumL0Files = 5; Options options; - options.env = CurrentOptions().env; // Make sure to use any custom env that the test is configured with. + // Make sure to use any custom env that the test is configured with. + options.env = CurrentOptions().env; options.allow_concurrent_memtable_write = false; options.arena_block_size = kBlockSizeBytes; options.create_if_missing = true; @@ -1072,10 +1072,19 @@ TEST_F(DBTest2, DISABLED_PresetCompressionDict) { compression_types.push_back(kZSTD); } + enum DictionaryTypes : int { + kWithoutDict, + kWithDict, + kWithZSTDTrainedDict, + kDictEnd, + }; + for (auto compression_type : compression_types) { options.compression = compression_type; - size_t prev_out_bytes; - for (int i = 0; i < 3; ++i) { + size_t bytes_without_dict = 0; + size_t bytes_with_dict = 0; + size_t bytes_with_zstd_trained_dict = 0; + for (int i = kWithoutDict; i < kDictEnd; i++) { // First iteration: compress without preset dictionary // Second iteration: compress with preset dictionary // Third iteration (zstd only): compress with zstd-trained dictionary @@ -1085,19 +1094,19 @@ TEST_F(DBTest2, DISABLED_PresetCompressionDict) { // the non-first iterations, verify the data we get out is the same data // we put in. switch (i) { - case 0: + case kWithoutDict: options.compression_opts.max_dict_bytes = 0; options.compression_opts.zstd_max_train_bytes = 0; break; - case 1: - options.compression_opts.max_dict_bytes = 4 * kBlockSizeBytes; + case kWithDict: + options.compression_opts.max_dict_bytes = kBlockSizeBytes; options.compression_opts.zstd_max_train_bytes = 0; break; - case 2: + case kWithZSTDTrainedDict: if (compression_type != kZSTD) { continue; } - options.compression_opts.max_dict_bytes = 4 * kBlockSizeBytes; + options.compression_opts.max_dict_bytes = kBlockSizeBytes; options.compression_opts.zstd_max_train_bytes = kL0FileBytes; break; default: @@ -1129,23 +1138,32 @@ TEST_F(DBTest2, DISABLED_PresetCompressionDict) { ASSERT_EQ(0, NumTableFilesAtLevel(0, 1)); ASSERT_GT(NumTableFilesAtLevel(1, 1), 0); - size_t out_bytes = 0; - std::vector files; - GetSstFiles(env_, dbname_, &files); - for (const auto& file : files) { - uint64_t curr_bytes; - env_->GetFileSize(dbname_ + "/" + file, &curr_bytes); - out_bytes += static_cast(curr_bytes); + // Get the live sst files size + size_t total_sst_bytes = TotalSize(1); + if (i == kWithoutDict) { + bytes_without_dict = total_sst_bytes; + } else if (i == kWithDict) { + bytes_with_dict = total_sst_bytes; + } else if (i == kWithZSTDTrainedDict) { + bytes_with_zstd_trained_dict = total_sst_bytes; } for (size_t j = 0; j < kNumL0Files * (kL0FileBytes / kBlockSizeBytes); j++) { ASSERT_EQ(seq_datas[(j / 10) % 10], Get(1, Key(static_cast(j)))); } - if (i) { - ASSERT_GT(prev_out_bytes, out_bytes); + if (i == kWithDict) { + ASSERT_GT(bytes_without_dict, bytes_with_dict); + } else if (i == kWithZSTDTrainedDict) { + // In zstd compression, it is sometimes possible that using a trained + // dictionary does not get as good a compression ratio as without + // training. + // But using a dictionary (with or without training) should always get + // better compression ratio than not using one. + ASSERT_TRUE(bytes_with_dict > bytes_with_zstd_trained_dict || + bytes_without_dict > bytes_with_zstd_trained_dict); } - prev_out_bytes = out_bytes; + DestroyAndReopen(options); } }