@ -1036,8 +1036,7 @@ TEST_F(DBTest2, WalFilterTestWithColumnFamilies) {
ASSERT_TRUE ( index = = keys_cf . size ( ) ) ;
}
// Temporarily disable it because the test is flaky.
TEST_F ( DBTest2 , DISABLED_PresetCompressionDict ) {
TEST_F ( DBTest2 , PresetCompressionDict ) {
// Verifies that compression ratio improves when dictionary is enabled, and
// improves even further when the dictionary is trained by ZSTD.
const size_t kBlockSizeBytes = 4 < < 10 ;
@ -1046,7 +1045,8 @@ TEST_F(DBTest2, DISABLED_PresetCompressionDict) {
const int kNumL0Files = 5 ;
Options options ;
options . env = CurrentOptions ( ) . env ; // Make sure to use any custom env that the test is configured with.
// Make sure to use any custom env that the test is configured with.
options . env = CurrentOptions ( ) . env ;
options . allow_concurrent_memtable_write = false ;
options . arena_block_size = kBlockSizeBytes ;
options . create_if_missing = true ;
@ -1072,10 +1072,19 @@ TEST_F(DBTest2, DISABLED_PresetCompressionDict) {
compression_types . push_back ( kZSTD ) ;
}
enum DictionaryTypes : int {
kWithoutDict ,
kWithDict ,
kWithZSTDTrainedDict ,
kDictEnd ,
} ;
for ( auto compression_type : compression_types ) {
options . compression = compression_type ;
size_t prev_out_bytes ;
for ( int i = 0 ; i < 3 ; + + i ) {
size_t bytes_without_dict = 0 ;
size_t bytes_with_dict = 0 ;
size_t bytes_with_zstd_trained_dict = 0 ;
for ( int i = kWithoutDict ; i < kDictEnd ; i + + ) {
// First iteration: compress without preset dictionary
// Second iteration: compress with preset dictionary
// Third iteration (zstd only): compress with zstd-trained dictionary
@ -1085,19 +1094,19 @@ TEST_F(DBTest2, DISABLED_PresetCompressionDict) {
// the non-first iterations, verify the data we get out is the same data
// we put in.
switch ( i ) {
case 0 :
case kWithoutDict :
options . compression_opts . max_dict_bytes = 0 ;
options . compression_opts . zstd_max_train_bytes = 0 ;
break ;
case 1 :
options . compression_opts . max_dict_bytes = 4 * kBlockSizeBytes ;
case kWithDict :
options . compression_opts . max_dict_bytes = kBlockSizeBytes ;
options . compression_opts . zstd_max_train_bytes = 0 ;
break ;
case 2 :
case kWithZSTDTrainedDict :
if ( compression_type ! = kZSTD ) {
continue ;
}
options . compression_opts . max_dict_bytes = 4 * kBlockSizeBytes ;
options . compression_opts . max_dict_bytes = kBlockSizeBytes ;
options . compression_opts . zstd_max_train_bytes = kL0FileBytes ;
break ;
default :
@ -1129,23 +1138,32 @@ TEST_F(DBTest2, DISABLED_PresetCompressionDict) {
ASSERT_EQ ( 0 , NumTableFilesAtLevel ( 0 , 1 ) ) ;
ASSERT_GT ( NumTableFilesAtLevel ( 1 , 1 ) , 0 ) ;
size_t out_bytes = 0 ;
std : : vector < std : : string > files ;
GetSstFiles ( env_ , dbname_ , & files ) ;
for ( const auto & file : files ) {
uint64_t curr_bytes ;
env_ - > GetFileSize ( dbname_ + " / " + file , & curr_bytes ) ;
out_bytes + = static_cast < size_t > ( curr_bytes ) ;
// Get the live sst files size
size_t total_sst_bytes = TotalSize ( 1 ) ;
if ( i = = kWithoutDict ) {
bytes_without_dict = total_sst_bytes ;
} else if ( i = = kWithDict ) {
bytes_with_dict = total_sst_bytes ;
} else if ( i = = kWithZSTDTrainedDict ) {
bytes_with_zstd_trained_dict = total_sst_bytes ;
}
for ( size_t j = 0 ; j < kNumL0Files * ( kL0FileBytes / kBlockSizeBytes ) ;
j + + ) {
ASSERT_EQ ( seq_datas [ ( j / 10 ) % 10 ] , Get ( 1 , Key ( static_cast < int > ( j ) ) ) ) ;
}
if ( i ) {
ASSERT_GT ( prev_out_bytes , out_bytes ) ;
if ( i = = kWithDict ) {
ASSERT_GT ( bytes_without_dict , bytes_with_dict ) ;
} else if ( i = = kWithZSTDTrainedDict ) {
// In zstd compression, it is sometimes possible that using a trained
// dictionary does not get as good a compression ratio as without
// training.
// But using a dictionary (with or without training) should always get
// better compression ratio than not using one.
ASSERT_TRUE ( bytes_with_dict > bytes_with_zstd_trained_dict | |
bytes_without_dict > bytes_with_zstd_trained_dict ) ;
}
prev_out_bytes = out_bytes ;
DestroyAndReopen ( options ) ;
}
}