Fix flaky DBTest2.PresetCompressionDict test (#5378)

Summary:
Fix flaky DBTest2.PresetCompressionDict test.

This PR fixes two issues with the test:
1. Replaces `GetSstFiles` with `TotalSize`, which is based on `DB::GetColumnFamilyMetaData` so that only the size of the live SST files is taken into consideration when computing the total size of all sst files. Earlier, with `GetSstFiles`, even obsolete files were getting picked up.
1. In ZSTD compression, it is sometimes possible that using a trained dictionary is not better than using an untrained one. Using a trained dictionary performs well in 99% of the cases, but still in the remaining ~1% of the cases (out of 10000 runs) using an untrained dictionary gets better compression results.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5378

Differential Revision: D15559100

Pulled By: sagar0

fbshipit-source-id: c35adbf13871f520a2cec48f8bad9ff27ff7a0b4
main
Sagar Vemuri 6 years ago committed by Facebook Github Bot
parent 50e470791d
commit 1b59a490ef
  1. 58
      db/db_test2.cc

@ -1036,8 +1036,7 @@ TEST_F(DBTest2, WalFilterTestWithColumnFamilies) {
ASSERT_TRUE(index == keys_cf.size()); ASSERT_TRUE(index == keys_cf.size());
} }
// Temporarily disable it because the test is flaky. TEST_F(DBTest2, PresetCompressionDict) {
TEST_F(DBTest2, DISABLED_PresetCompressionDict) {
// Verifies that compression ratio improves when dictionary is enabled, and // Verifies that compression ratio improves when dictionary is enabled, and
// improves even further when the dictionary is trained by ZSTD. // improves even further when the dictionary is trained by ZSTD.
const size_t kBlockSizeBytes = 4 << 10; const size_t kBlockSizeBytes = 4 << 10;
@ -1046,7 +1045,8 @@ TEST_F(DBTest2, DISABLED_PresetCompressionDict) {
const int kNumL0Files = 5; const int kNumL0Files = 5;
Options options; Options options;
options.env = CurrentOptions().env; // Make sure to use any custom env that the test is configured with. // Make sure to use any custom env that the test is configured with.
options.env = CurrentOptions().env;
options.allow_concurrent_memtable_write = false; options.allow_concurrent_memtable_write = false;
options.arena_block_size = kBlockSizeBytes; options.arena_block_size = kBlockSizeBytes;
options.create_if_missing = true; options.create_if_missing = true;
@ -1072,10 +1072,19 @@ TEST_F(DBTest2, DISABLED_PresetCompressionDict) {
compression_types.push_back(kZSTD); compression_types.push_back(kZSTD);
} }
enum DictionaryTypes : int {
kWithoutDict,
kWithDict,
kWithZSTDTrainedDict,
kDictEnd,
};
for (auto compression_type : compression_types) { for (auto compression_type : compression_types) {
options.compression = compression_type; options.compression = compression_type;
size_t prev_out_bytes; size_t bytes_without_dict = 0;
for (int i = 0; i < 3; ++i) { size_t bytes_with_dict = 0;
size_t bytes_with_zstd_trained_dict = 0;
for (int i = kWithoutDict; i < kDictEnd; i++) {
// First iteration: compress without preset dictionary // First iteration: compress without preset dictionary
// Second iteration: compress with preset dictionary // Second iteration: compress with preset dictionary
// Third iteration (zstd only): compress with zstd-trained dictionary // Third iteration (zstd only): compress with zstd-trained dictionary
@ -1085,19 +1094,19 @@ TEST_F(DBTest2, DISABLED_PresetCompressionDict) {
// the non-first iterations, verify the data we get out is the same data // the non-first iterations, verify the data we get out is the same data
// we put in. // we put in.
switch (i) { switch (i) {
case 0: case kWithoutDict:
options.compression_opts.max_dict_bytes = 0; options.compression_opts.max_dict_bytes = 0;
options.compression_opts.zstd_max_train_bytes = 0; options.compression_opts.zstd_max_train_bytes = 0;
break; break;
case 1: case kWithDict:
options.compression_opts.max_dict_bytes = 4 * kBlockSizeBytes; options.compression_opts.max_dict_bytes = kBlockSizeBytes;
options.compression_opts.zstd_max_train_bytes = 0; options.compression_opts.zstd_max_train_bytes = 0;
break; break;
case 2: case kWithZSTDTrainedDict:
if (compression_type != kZSTD) { if (compression_type != kZSTD) {
continue; continue;
} }
options.compression_opts.max_dict_bytes = 4 * kBlockSizeBytes; options.compression_opts.max_dict_bytes = kBlockSizeBytes;
options.compression_opts.zstd_max_train_bytes = kL0FileBytes; options.compression_opts.zstd_max_train_bytes = kL0FileBytes;
break; break;
default: default:
@ -1129,23 +1138,32 @@ TEST_F(DBTest2, DISABLED_PresetCompressionDict) {
ASSERT_EQ(0, NumTableFilesAtLevel(0, 1)); ASSERT_EQ(0, NumTableFilesAtLevel(0, 1));
ASSERT_GT(NumTableFilesAtLevel(1, 1), 0); ASSERT_GT(NumTableFilesAtLevel(1, 1), 0);
size_t out_bytes = 0; // Get the live sst files size
std::vector<std::string> files; size_t total_sst_bytes = TotalSize(1);
GetSstFiles(env_, dbname_, &files); if (i == kWithoutDict) {
for (const auto& file : files) { bytes_without_dict = total_sst_bytes;
uint64_t curr_bytes; } else if (i == kWithDict) {
env_->GetFileSize(dbname_ + "/" + file, &curr_bytes); bytes_with_dict = total_sst_bytes;
out_bytes += static_cast<size_t>(curr_bytes); } else if (i == kWithZSTDTrainedDict) {
bytes_with_zstd_trained_dict = total_sst_bytes;
} }
for (size_t j = 0; j < kNumL0Files * (kL0FileBytes / kBlockSizeBytes); for (size_t j = 0; j < kNumL0Files * (kL0FileBytes / kBlockSizeBytes);
j++) { j++) {
ASSERT_EQ(seq_datas[(j / 10) % 10], Get(1, Key(static_cast<int>(j)))); ASSERT_EQ(seq_datas[(j / 10) % 10], Get(1, Key(static_cast<int>(j))));
} }
if (i) { if (i == kWithDict) {
ASSERT_GT(prev_out_bytes, out_bytes); ASSERT_GT(bytes_without_dict, bytes_with_dict);
} else if (i == kWithZSTDTrainedDict) {
// In zstd compression, it is sometimes possible that using a trained
// dictionary does not get as good a compression ratio as without
// training.
// But using a dictionary (with or without training) should always get
// better compression ratio than not using one.
ASSERT_TRUE(bytes_with_dict > bytes_with_zstd_trained_dict ||
bytes_without_dict > bytes_with_zstd_trained_dict);
} }
prev_out_bytes = out_bytes;
DestroyAndReopen(options); DestroyAndReopen(options);
} }
} }

Loading…
Cancel
Save