pass key/value samples through zstd compression dictionary generator

Summary: Instead of using samples directly, we now support passing the samples through zstd's dictionary generator when `CompressionOptions::zstd_max_train_bytes` is set to nonzero. If set to zero, we will use the samples directly as the dictionary -- same as before. Note this is the first step of #2987, extracted into a separate PR per reviewer request. Closes https://github.com/facebook/rocksdb/pull/3057 Differential Revision: D6116891 Pulled By: ajkr fbshipit-source-id: 70ab13cc4c734fa02e554180eed0618b75255497
8 years ago · 24ad430600
parent c4c1f961e7
commit 24ad430600
5 changed files with 142 additions and 34 deletions
--- a/db/column_family.cc
+++ b/db/column_family.cc
@ -123,6 +123,22 @@ Status CheckCompressionSupported(const ColumnFamilyOptions& cf_options) {
          " is not linked with the binary.");
    }
  }
  if (cf_options.compression_opts.zstd_max_train_bytes > 0) {
    if (!CompressionTypeSupported(CompressionType::kZSTD)) {
      // Dictionary trainer is available since v0.6.1, but ZSTD was marked
      // stable only since v0.8.0. For now we enable the feature in stable
      // versions only.
      return Status::InvalidArgument(
          "zstd dictionary trainer cannot be used because " +
          CompressionTypeToString(CompressionType::kZSTD) +
          " is not linked with the binary.");
    }
    if (cf_options.compression_opts.max_dict_bytes == 0) {
      return Status::InvalidArgument(
          "The dictionary size limit (`CompressionOptions::max_dict_bytes`) "
          "should be nonzero if we're using zstd's dictionary generator.");
    }
  }
  return Status::OK();
 }
--- a/db/compaction_job.cc
+++ b/db/compaction_job.cc
@ -702,15 +702,18 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
      sub_compact->compaction->mutable_cf_options();
  // To build compression dictionary, we sample the first output file, assuming
-  // it'll reach the maximum length, and then use the dictionary for compressing
+  // it'll reach the maximum length. We optionally pass these samples through
-  // subsequent output files. The dictionary may be less than max_dict_bytes if
+  // zstd's dictionary trainer, or just use them directly. Then, the dictionary
-  // the first output file's length is less than the maximum.
+  // is used for compressing subsequent output files in the same subcompaction.
  const bool kUseZstdTrainer =
      cfd->ioptions()->compression_opts.zstd_max_train_bytes > 0;
  const size_t kSampleBytes =
      kUseZstdTrainer ? cfd->ioptions()->compression_opts.zstd_max_train_bytes
                      : cfd->ioptions()->compression_opts.max_dict_bytes;
  const int kSampleLenShift = 6;  // 2^6 = 64-byte samples
  std::set<size_t> sample_begin_offsets;
-  if (bottommost_level_ &&
+  if (bottommost_level_ && kSampleBytes > 0) {
-      cfd->ioptions()->compression_opts.max_dict_bytes > 0) {
+    const size_t kMaxSamples = kSampleBytes >> kSampleLenShift;
    const size_t kMaxSamples =
        cfd->ioptions()->compression_opts.max_dict_bytes >> kSampleLenShift;
    const size_t kOutFileLen = mutable_cf_options->MaxFileSizeForLevel(
        compact_->compaction->output_level());
    if (kOutFileLen != port::kMaxSizet) {
@ -780,11 +783,11 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
  }
  const auto& c_iter_stats = c_iter->iter_stats();
  auto sample_begin_offset_iter = sample_begin_offsets.cbegin();
-  // data_begin_offset and compression_dict are only valid while generating
+  // data_begin_offset and dict_sample_data are only valid while generating
  // dictionary from the first output file.
  size_t data_begin_offset = 0;
-  std::string compression_dict;
+  std::string dict_sample_data;
-  compression_dict.reserve(cfd->ioptions()->compression_opts.max_dict_bytes);
+  dict_sample_data.reserve(kSampleBytes);
  while (status.ok() && !cfd->IsDropped() && c_iter->Valid()) {
    // Invariant: c_iter.status() is guaranteed to be OK if c_iter->Valid()
@ -856,7 +859,7 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
            data_elmt_copy_len =
                data_end_offset - (data_begin_offset + data_elmt_copy_offset);
          }
-          compression_dict.append(&data_elmt.data()[data_elmt_copy_offset],
+          dict_sample_data.append(&data_elmt.data()[data_elmt_copy_offset],
                                  data_elmt_copy_len);
          if (sample_end_offset > data_end_offset) {
            // Didn't finish sample. Try to finish it with the next data_elmt.
@ -911,9 +914,15 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
      RecordDroppedKeys(range_del_out_stats,
                        &sub_compact->compaction_job_stats);
      if (sub_compact->outputs.size() == 1) {
-        // Use dictionary from first output file for compression of subsequent
+        // Use samples from first output file to create dictionary for
-        // files.
+        // compression of subsequent files.
-        sub_compact->compression_dict = std::move(compression_dict);
+        if (kUseZstdTrainer) {
          sub_compact->compression_dict = ZSTD_TrainDictionary(
              dict_sample_data, kSampleLenShift,
              cfd->ioptions()->compression_opts.max_dict_bytes);
        } else {
          sub_compact->compression_dict = std::move(dict_sample_data);
        }
      }
    }
  }
--- a/db/db_test2.cc
+++ b/db/db_test2.cc
@ -1027,6 +1027,7 @@ TEST_F(DBTest2, PresetCompressionDict) {
  const size_t kL0FileBytes = 128 << 10;
  const size_t kApproxPerBlockOverheadBytes = 50;
  const int kNumL0Files = 5;
  const int kZstdTrainFactor = 16;
  Options options;
  options.env = CurrentOptions().env; // Make sure to use any custom env that the test is configured with.
@ -1059,17 +1060,34 @@ TEST_F(DBTest2, PresetCompressionDict) {
  for (auto compression_type : compression_types) {
    options.compression = compression_type;
    size_t prev_out_bytes;
-    for (int i = 0; i < 2; ++i) {
+    for (int i = 0; i < 3; ++i) {
      // First iteration: compress without preset dictionary
      // Second iteration: compress with preset dictionary
-      // To make sure the compression dictionary was actually used, we verify
+      // Third iteration (zstd only): compress with zstd-trained dictionary
-      // the compressed size is smaller in the second iteration. Also in the
+      //
-      // second iteration, verify the data we get out is the same data we put
+      // To make sure the compression dictionary has the intended effect, we
-      // in.
+      // verify the compressed size is smaller in successive iterations. Also in
-      if (i) {
+      // the non-first iterations, verify the data we get out is the same data
-        options.compression_opts.max_dict_bytes = kBlockSizeBytes;
+      // we put in.
-      } else {
+      switch (i) {
-        options.compression_opts.max_dict_bytes = 0;
+        case 0:
          options.compression_opts.max_dict_bytes = 0;
          options.compression_opts.zstd_max_train_bytes = 0;
          break;
        case 1:
          options.compression_opts.max_dict_bytes = kBlockSizeBytes;
          options.compression_opts.zstd_max_train_bytes = 0;
          break;
        case 2:
          if (compression_type != kZSTD) {
            continue;
          }
          options.compression_opts.max_dict_bytes = kBlockSizeBytes;
          options.compression_opts.zstd_max_train_bytes =
              kZstdTrainFactor * kBlockSizeBytes;
          break;
        default:
          assert(false);
      }
      options.statistics = rocksdb::CreateDBStatistics();
--- a/include/rocksdb/advanced_options.h
+++ b/include/rocksdb/advanced_options.h
@ -90,23 +90,47 @@ struct CompressionOptions {
  int window_bits;
  int level;
  int strategy;
-  // Maximum size of dictionary used to prime the compression library. Currently
+
-  // this dictionary will be constructed by sampling the first output file in a
+  // Maximum size of dictionaries used to prime the compression library.
-  // subcompaction when the target level is bottommost. This dictionary will be
+  // Enabling dictionary can improve compression ratios when there are
-  // loaded into the compression library before compressing/uncompressing each
+  // repetitions across data blocks.
-  // data block of subsequent files in the subcompaction. Effectively, this
+  //
-  // improves compression ratios when there are repetitions across data blocks.
+  // The dictionary is created by sampling the SST file data. If
-  // A value of 0 indicates the feature is disabled.
+  // `zstd_max_train_bytes` is nonzero, the samples are passed through zstd's
  // dictionary generator. Otherwise, the random samples are used directly as
  // the dictionary.
  //
  // When compression dictionary is disabled, we compress and write each block
  // before buffering data for the next one. When compression dictionary is
  // enabled, we buffer all SST file data in-memory so we can sample it, as data
  // can only be compressed and written after the dictionary has been finalized.
  // So users of this feature may see increased memory usage.
  //
  // Default: 0.
  uint32_t max_dict_bytes;
  // Maximum size of training data passed to zstd's dictionary trainer. Using
  // zstd's dictionary trainer can achieve even better compression ratio
  // improvements than using `max_dict_bytes` alone.
  //
  // The training data will be used to generate a dictionary of max_dict_bytes.
  //
  // Default: 0.
  uint32_t zstd_max_train_bytes;
  CompressionOptions()
-      : window_bits(-14), level(-1), strategy(0), max_dict_bytes(0) {}
+      : window_bits(-14),
-  CompressionOptions(int wbits, int _lev, int _strategy, int _max_dict_bytes)
+        level(-1),
        strategy(0),
        max_dict_bytes(0),
        zstd_max_train_bytes(0) {}
  CompressionOptions(int wbits, int _lev, int _strategy, int _max_dict_bytes,
                     int _zstd_max_train_bytes)
      : window_bits(wbits),
        level(_lev),
        strategy(_strategy),
-        max_dict_bytes(_max_dict_bytes) {}
+        max_dict_bytes(_max_dict_bytes),
        zstd_max_train_bytes(_zstd_max_train_bytes) {}
 };
 enum UpdateStatus {    // Return status For inplace update callback
--- a/util/compression.h
+++ b/util/compression.h
@ -35,7 +35,10 @@
 #if defined(ZSTD)
 #include <zstd.h>
-#endif
+#if ZSTD_VERSION_NUMBER >= 800  // v0.8.0+
 #include <zdict.h>
 #endif  // ZSTD_VERSION_NUMBER >= 800
 #endif  // ZSTD
 #if defined(XPRESS)
 #include "port/xpress.h"
@ -796,4 +799,42 @@ inline char* ZSTD_Uncompress(const char* input_data, size_t input_length,
  return nullptr;
 }
 inline std::string ZSTD_TrainDictionary(const std::string& samples,
                                        const std::vector<size_t>& sample_lens,
                                        size_t max_dict_bytes) {
  // Dictionary trainer is available since v0.6.1, but ZSTD was marked stable
  // only since v0.8.0. For now we enable the feature in stable versions only.
 #if ZSTD_VERSION_NUMBER >= 800  // v0.8.0+
  std::string dict_data(max_dict_bytes, '\0');
  size_t dict_len =
      ZDICT_trainFromBuffer(&dict_data[0], max_dict_bytes, &samples[0],
                            &sample_lens[0], sample_lens.size());
  if (ZDICT_isError(dict_len)) {
    return "";
  }
  assert(dict_len <= max_dict_bytes);
  dict_data.resize(dict_len);
  return dict_data;
 #else   // up to v0.7.x
  assert(false);
  return "";
 #endif  // ZSTD_VERSION_NUMBER >= 800
 }
 inline std::string ZSTD_TrainDictionary(const std::string& samples,
                                        size_t sample_len_shift,
                                        size_t max_dict_bytes) {
  // Dictionary trainer is available since v0.6.1, but ZSTD was marked stable
  // only since v0.8.0. For now we enable the feature in stable versions only.
 #if ZSTD_VERSION_NUMBER >= 800  // v0.8.0+
  // skips potential partial sample at the end of "samples"
  size_t num_samples = samples.size() >> sample_len_shift;
  std::vector<size_t> sample_lens(num_samples, 1 << sample_len_shift);
  return ZSTD_TrainDictionary(samples, sample_lens, max_dict_bytes);
 #else   // up to v0.7.x
  assert(false);
  return "";
 #endif  // ZSTD_VERSION_NUMBER >= 800
 }
 }  // namespace rocksdb