pass key/value samples through zstd compression dictionary generator

Summary:
Instead of using samples directly, we now support passing the samples through zstd's dictionary generator when `CompressionOptions::zstd_max_train_bytes` is set to nonzero. If set to zero, we will use the samples directly as the dictionary -- same as before.

Note this is the first step of #2987, extracted into a separate PR per reviewer request.
Closes https://github.com/facebook/rocksdb/pull/3057

Differential Revision: D6116891

Pulled By: ajkr

fbshipit-source-id: 70ab13cc4c734fa02e554180eed0618b75255497
main
Andrew Kryczka 7 years ago committed by Facebook Github Bot
parent c4c1f961e7
commit 24ad430600
  1. 16
      db/column_family.cc
  2. 37
      db/compaction_job.cc
  3. 36
      db/db_test2.cc
  4. 44
      include/rocksdb/advanced_options.h
  5. 43
      util/compression.h

@ -123,6 +123,22 @@ Status CheckCompressionSupported(const ColumnFamilyOptions& cf_options) {
" is not linked with the binary."); " is not linked with the binary.");
} }
} }
if (cf_options.compression_opts.zstd_max_train_bytes > 0) {
if (!CompressionTypeSupported(CompressionType::kZSTD)) {
// Dictionary trainer is available since v0.6.1, but ZSTD was marked
// stable only since v0.8.0. For now we enable the feature in stable
// versions only.
return Status::InvalidArgument(
"zstd dictionary trainer cannot be used because " +
CompressionTypeToString(CompressionType::kZSTD) +
" is not linked with the binary.");
}
if (cf_options.compression_opts.max_dict_bytes == 0) {
return Status::InvalidArgument(
"The dictionary size limit (`CompressionOptions::max_dict_bytes`) "
"should be nonzero if we're using zstd's dictionary generator.");
}
}
return Status::OK(); return Status::OK();
} }

@ -702,15 +702,18 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
sub_compact->compaction->mutable_cf_options(); sub_compact->compaction->mutable_cf_options();
// To build compression dictionary, we sample the first output file, assuming // To build compression dictionary, we sample the first output file, assuming
// it'll reach the maximum length, and then use the dictionary for compressing // it'll reach the maximum length. We optionally pass these samples through
// subsequent output files. The dictionary may be less than max_dict_bytes if // zstd's dictionary trainer, or just use them directly. Then, the dictionary
// the first output file's length is less than the maximum. // is used for compressing subsequent output files in the same subcompaction.
const bool kUseZstdTrainer =
cfd->ioptions()->compression_opts.zstd_max_train_bytes > 0;
const size_t kSampleBytes =
kUseZstdTrainer ? cfd->ioptions()->compression_opts.zstd_max_train_bytes
: cfd->ioptions()->compression_opts.max_dict_bytes;
const int kSampleLenShift = 6; // 2^6 = 64-byte samples const int kSampleLenShift = 6; // 2^6 = 64-byte samples
std::set<size_t> sample_begin_offsets; std::set<size_t> sample_begin_offsets;
if (bottommost_level_ && if (bottommost_level_ && kSampleBytes > 0) {
cfd->ioptions()->compression_opts.max_dict_bytes > 0) { const size_t kMaxSamples = kSampleBytes >> kSampleLenShift;
const size_t kMaxSamples =
cfd->ioptions()->compression_opts.max_dict_bytes >> kSampleLenShift;
const size_t kOutFileLen = mutable_cf_options->MaxFileSizeForLevel( const size_t kOutFileLen = mutable_cf_options->MaxFileSizeForLevel(
compact_->compaction->output_level()); compact_->compaction->output_level());
if (kOutFileLen != port::kMaxSizet) { if (kOutFileLen != port::kMaxSizet) {
@ -780,11 +783,11 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
} }
const auto& c_iter_stats = c_iter->iter_stats(); const auto& c_iter_stats = c_iter->iter_stats();
auto sample_begin_offset_iter = sample_begin_offsets.cbegin(); auto sample_begin_offset_iter = sample_begin_offsets.cbegin();
// data_begin_offset and compression_dict are only valid while generating // data_begin_offset and dict_sample_data are only valid while generating
// dictionary from the first output file. // dictionary from the first output file.
size_t data_begin_offset = 0; size_t data_begin_offset = 0;
std::string compression_dict; std::string dict_sample_data;
compression_dict.reserve(cfd->ioptions()->compression_opts.max_dict_bytes); dict_sample_data.reserve(kSampleBytes);
while (status.ok() && !cfd->IsDropped() && c_iter->Valid()) { while (status.ok() && !cfd->IsDropped() && c_iter->Valid()) {
// Invariant: c_iter.status() is guaranteed to be OK if c_iter->Valid() // Invariant: c_iter.status() is guaranteed to be OK if c_iter->Valid()
@ -856,7 +859,7 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
data_elmt_copy_len = data_elmt_copy_len =
data_end_offset - (data_begin_offset + data_elmt_copy_offset); data_end_offset - (data_begin_offset + data_elmt_copy_offset);
} }
compression_dict.append(&data_elmt.data()[data_elmt_copy_offset], dict_sample_data.append(&data_elmt.data()[data_elmt_copy_offset],
data_elmt_copy_len); data_elmt_copy_len);
if (sample_end_offset > data_end_offset) { if (sample_end_offset > data_end_offset) {
// Didn't finish sample. Try to finish it with the next data_elmt. // Didn't finish sample. Try to finish it with the next data_elmt.
@ -911,9 +914,15 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
RecordDroppedKeys(range_del_out_stats, RecordDroppedKeys(range_del_out_stats,
&sub_compact->compaction_job_stats); &sub_compact->compaction_job_stats);
if (sub_compact->outputs.size() == 1) { if (sub_compact->outputs.size() == 1) {
// Use dictionary from first output file for compression of subsequent // Use samples from first output file to create dictionary for
// files. // compression of subsequent files.
sub_compact->compression_dict = std::move(compression_dict); if (kUseZstdTrainer) {
sub_compact->compression_dict = ZSTD_TrainDictionary(
dict_sample_data, kSampleLenShift,
cfd->ioptions()->compression_opts.max_dict_bytes);
} else {
sub_compact->compression_dict = std::move(dict_sample_data);
}
} }
} }
} }

@ -1027,6 +1027,7 @@ TEST_F(DBTest2, PresetCompressionDict) {
const size_t kL0FileBytes = 128 << 10; const size_t kL0FileBytes = 128 << 10;
const size_t kApproxPerBlockOverheadBytes = 50; const size_t kApproxPerBlockOverheadBytes = 50;
const int kNumL0Files = 5; const int kNumL0Files = 5;
const int kZstdTrainFactor = 16;
Options options; Options options;
options.env = CurrentOptions().env; // Make sure to use any custom env that the test is configured with. options.env = CurrentOptions().env; // Make sure to use any custom env that the test is configured with.
@ -1059,17 +1060,34 @@ TEST_F(DBTest2, PresetCompressionDict) {
for (auto compression_type : compression_types) { for (auto compression_type : compression_types) {
options.compression = compression_type; options.compression = compression_type;
size_t prev_out_bytes; size_t prev_out_bytes;
for (int i = 0; i < 2; ++i) { for (int i = 0; i < 3; ++i) {
// First iteration: compress without preset dictionary // First iteration: compress without preset dictionary
// Second iteration: compress with preset dictionary // Second iteration: compress with preset dictionary
// To make sure the compression dictionary was actually used, we verify // Third iteration (zstd only): compress with zstd-trained dictionary
// the compressed size is smaller in the second iteration. Also in the //
// second iteration, verify the data we get out is the same data we put // To make sure the compression dictionary has the intended effect, we
// in. // verify the compressed size is smaller in successive iterations. Also in
if (i) { // the non-first iterations, verify the data we get out is the same data
options.compression_opts.max_dict_bytes = kBlockSizeBytes; // we put in.
} else { switch (i) {
options.compression_opts.max_dict_bytes = 0; case 0:
options.compression_opts.max_dict_bytes = 0;
options.compression_opts.zstd_max_train_bytes = 0;
break;
case 1:
options.compression_opts.max_dict_bytes = kBlockSizeBytes;
options.compression_opts.zstd_max_train_bytes = 0;
break;
case 2:
if (compression_type != kZSTD) {
continue;
}
options.compression_opts.max_dict_bytes = kBlockSizeBytes;
options.compression_opts.zstd_max_train_bytes =
kZstdTrainFactor * kBlockSizeBytes;
break;
default:
assert(false);
} }
options.statistics = rocksdb::CreateDBStatistics(); options.statistics = rocksdb::CreateDBStatistics();

@ -90,23 +90,47 @@ struct CompressionOptions {
int window_bits; int window_bits;
int level; int level;
int strategy; int strategy;
// Maximum size of dictionary used to prime the compression library. Currently
// this dictionary will be constructed by sampling the first output file in a // Maximum size of dictionaries used to prime the compression library.
// subcompaction when the target level is bottommost. This dictionary will be // Enabling dictionary can improve compression ratios when there are
// loaded into the compression library before compressing/uncompressing each // repetitions across data blocks.
// data block of subsequent files in the subcompaction. Effectively, this //
// improves compression ratios when there are repetitions across data blocks. // The dictionary is created by sampling the SST file data. If
// A value of 0 indicates the feature is disabled. // `zstd_max_train_bytes` is nonzero, the samples are passed through zstd's
// dictionary generator. Otherwise, the random samples are used directly as
// the dictionary.
//
// When compression dictionary is disabled, we compress and write each block
// before buffering data for the next one. When compression dictionary is
// enabled, we buffer all SST file data in-memory so we can sample it, as data
// can only be compressed and written after the dictionary has been finalized.
// So users of this feature may see increased memory usage.
//
// Default: 0. // Default: 0.
uint32_t max_dict_bytes; uint32_t max_dict_bytes;
// Maximum size of training data passed to zstd's dictionary trainer. Using
// zstd's dictionary trainer can achieve even better compression ratio
// improvements than using `max_dict_bytes` alone.
//
// The training data will be used to generate a dictionary of max_dict_bytes.
//
// Default: 0.
uint32_t zstd_max_train_bytes;
CompressionOptions() CompressionOptions()
: window_bits(-14), level(-1), strategy(0), max_dict_bytes(0) {} : window_bits(-14),
CompressionOptions(int wbits, int _lev, int _strategy, int _max_dict_bytes) level(-1),
strategy(0),
max_dict_bytes(0),
zstd_max_train_bytes(0) {}
CompressionOptions(int wbits, int _lev, int _strategy, int _max_dict_bytes,
int _zstd_max_train_bytes)
: window_bits(wbits), : window_bits(wbits),
level(_lev), level(_lev),
strategy(_strategy), strategy(_strategy),
max_dict_bytes(_max_dict_bytes) {} max_dict_bytes(_max_dict_bytes),
zstd_max_train_bytes(_zstd_max_train_bytes) {}
}; };
enum UpdateStatus { // Return status For inplace update callback enum UpdateStatus { // Return status For inplace update callback

@ -35,7 +35,10 @@
#if defined(ZSTD) #if defined(ZSTD)
#include <zstd.h> #include <zstd.h>
#endif #if ZSTD_VERSION_NUMBER >= 800 // v0.8.0+
#include <zdict.h>
#endif // ZSTD_VERSION_NUMBER >= 800
#endif // ZSTD
#if defined(XPRESS) #if defined(XPRESS)
#include "port/xpress.h" #include "port/xpress.h"
@ -796,4 +799,42 @@ inline char* ZSTD_Uncompress(const char* input_data, size_t input_length,
return nullptr; return nullptr;
} }
inline std::string ZSTD_TrainDictionary(const std::string& samples,
const std::vector<size_t>& sample_lens,
size_t max_dict_bytes) {
// Dictionary trainer is available since v0.6.1, but ZSTD was marked stable
// only since v0.8.0. For now we enable the feature in stable versions only.
#if ZSTD_VERSION_NUMBER >= 800 // v0.8.0+
std::string dict_data(max_dict_bytes, '\0');
size_t dict_len =
ZDICT_trainFromBuffer(&dict_data[0], max_dict_bytes, &samples[0],
&sample_lens[0], sample_lens.size());
if (ZDICT_isError(dict_len)) {
return "";
}
assert(dict_len <= max_dict_bytes);
dict_data.resize(dict_len);
return dict_data;
#else // up to v0.7.x
assert(false);
return "";
#endif // ZSTD_VERSION_NUMBER >= 800
}
inline std::string ZSTD_TrainDictionary(const std::string& samples,
size_t sample_len_shift,
size_t max_dict_bytes) {
// Dictionary trainer is available since v0.6.1, but ZSTD was marked stable
// only since v0.8.0. For now we enable the feature in stable versions only.
#if ZSTD_VERSION_NUMBER >= 800 // v0.8.0+
// skips potential partial sample at the end of "samples"
size_t num_samples = samples.size() >> sample_len_shift;
std::vector<size_t> sample_lens(num_samples, 1 << sample_len_shift);
return ZSTD_TrainDictionary(samples, sample_lens, max_dict_bytes);
#else // up to v0.7.x
assert(false);
return "";
#endif // ZSTD_VERSION_NUMBER >= 800
}
} // namespace rocksdb } // namespace rocksdb

Loading…
Cancel
Save