Introduce library-independent default compression level

Summary:
Previously we were using -1 as the default for every library, which was legacy from our zlib options. That worked for a while, but after zstd introduced a146ee04ae, it started giving poor compression ratios by default in zstd.

This PR adds a constant to RocksDB public API, `CompressionOptions::kDefaultCompressionLevel`, which will get translated to the default value specific to the compression library being used in "util/compression.h". The constant uses a number that appears to be larger than any library's maximum compression level.
Closes https://github.com/facebook/rocksdb/pull/3895

Differential Revision: D8125780

Pulled By: ajkr

fbshipit-source-id: 2db157a89118cd4f94577c2f4a0a5ff31c8391c6
main
Andrew Kryczka 7 years ago committed by Facebook Github Bot
parent 4011012d9d
commit 01bcc34896
  1. 1
      HISTORY.md
  2. 10
      include/rocksdb/advanced_options.h
  3. 10
      include/rocksdb/options.h
  4. 30
      util/compression.h

@ -13,6 +13,7 @@
* Now, `DBOptions::use_direct_io_for_flush_and_compaction` only applies to background writes, and `DBOptions::use_direct_reads` applies to both user reads and background reads. This conforms with Linux's `open(2)` manpage, which advises against simultaneously reading a file in buffered and direct modes, due to possibly undefined behavior and degraded performance. * Now, `DBOptions::use_direct_io_for_flush_and_compaction` only applies to background writes, and `DBOptions::use_direct_reads` applies to both user reads and background reads. This conforms with Linux's `open(2)` manpage, which advises against simultaneously reading a file in buffered and direct modes, due to possibly undefined behavior and degraded performance.
* Iterator::Valid() always returns false if !status().ok(). So, now when doing a Seek() followed by some Next()s, there's no need to check status() after every operation. * Iterator::Valid() always returns false if !status().ok(). So, now when doing a Seek() followed by some Next()s, there's no need to check status() after every operation.
* Iterator::Seek()/SeekForPrev()/SeekToFirst()/SeekToLast() always resets status(). * Iterator::Seek()/SeekForPrev()/SeekToFirst()/SeekToLast() always resets status().
* Introduced `CompressionOptions::kDefaultCompressionLevel`, which is a generic way to tell RocksDB to use the compression library's default level. It is now the default value for `CompressionOptions::level`. Previously the level defaulted to -1, which gave poor compression ratios in ZSTD.
### New Features ### New Features
* Introduce TTL for level compaction so that all files older than ttl go through the compaction process to get rid of old data. * Introduce TTL for level compaction so that all files older than ttl go through the compaction process to get rid of old data.

@ -87,6 +87,14 @@ struct CompactionOptionsFIFO {
// Compression options for different compression algorithms like Zlib // Compression options for different compression algorithms like Zlib
struct CompressionOptions { struct CompressionOptions {
// RocksDB's generic default compression level. Internally it'll be translated
// to the default compression level specific to the library being used (see
// comment above `ColumnFamilyOptions::compression`).
//
// The default value is the max 16-bit int as it'll be written out in OPTIONS
// file, which should be portable.
const static int kDefaultCompressionLevel = 32767;
int window_bits; int window_bits;
int level; int level;
int strategy; int strategy;
@ -120,7 +128,7 @@ struct CompressionOptions {
CompressionOptions() CompressionOptions()
: window_bits(-14), : window_bits(-14),
level(-1), level(kDefaultCompressionLevel),
strategy(0), strategy(0),
max_dict_bytes(0), max_dict_bytes(0),
zstd_max_train_bytes(0) {} zstd_max_train_bytes(0) {}

@ -198,11 +198,21 @@ struct ColumnFamilyOptions : public AdvancedColumnFamilyOptions {
// Typical speeds of kSnappyCompression on an Intel(R) Core(TM)2 2.4GHz: // Typical speeds of kSnappyCompression on an Intel(R) Core(TM)2 2.4GHz:
// ~200-500MB/s compression // ~200-500MB/s compression
// ~400-800MB/s decompression // ~400-800MB/s decompression
//
// Note that these speeds are significantly faster than most // Note that these speeds are significantly faster than most
// persistent storage speeds, and therefore it is typically never // persistent storage speeds, and therefore it is typically never
// worth switching to kNoCompression. Even if the input data is // worth switching to kNoCompression. Even if the input data is
// incompressible, the kSnappyCompression implementation will // incompressible, the kSnappyCompression implementation will
// efficiently detect that and will switch to uncompressed mode. // efficiently detect that and will switch to uncompressed mode.
//
// If you do not set `compression_opts.level`, or set it to
// `CompressionOptions::kDefaultCompressionLevel`, we will attempt to pick the
// default corresponding to `compression` as follows:
//
// - kZSTD: 3
// - kZlibCompression: Z_DEFAULT_COMPRESSION (currently -1)
// - kLZ4HCCompression: 0
// - For all others, we do not specify a compression level
CompressionType compression; CompressionType compression;
// Compression algorithm that will be used for the bottommost level that // Compression algorithm that will be used for the bottommost level that

@ -254,9 +254,15 @@ inline bool Zlib_Compress(const CompressionOptions& opts,
// memLevel=9 uses maximum memory for optimal speed. // memLevel=9 uses maximum memory for optimal speed.
// The default value is 8. See zconf.h for more details. // The default value is 8. See zconf.h for more details.
static const int memLevel = 8; static const int memLevel = 8;
int level;
if (opts.level == CompressionOptions::kDefaultCompressionLevel) {
level = Z_DEFAULT_COMPRESSION;
} else {
level = opts.level;
}
z_stream _stream; z_stream _stream;
memset(&_stream, 0, sizeof(z_stream)); memset(&_stream, 0, sizeof(z_stream));
int st = deflateInit2(&_stream, opts.level, Z_DEFLATED, opts.window_bits, int st = deflateInit2(&_stream, level, Z_DEFLATED, opts.window_bits,
memLevel, opts.strategy); memLevel, opts.strategy);
if (st != Z_OK) { if (st != Z_OK) {
return false; return false;
@ -719,9 +725,15 @@ inline bool LZ4HC_Compress(const CompressionOptions& opts,
output->resize(static_cast<size_t>(output_header_len + compress_bound)); output->resize(static_cast<size_t>(output_header_len + compress_bound));
int outlen; int outlen;
int level;
if (opts.level == CompressionOptions::kDefaultCompressionLevel) {
level = 0; // lz4hc.h says any value < 1 will be sanitized to default
} else {
level = opts.level;
}
#if LZ4_VERSION_NUMBER >= 10400 // r124+ #if LZ4_VERSION_NUMBER >= 10400 // r124+
LZ4_streamHC_t* stream = LZ4_createStreamHC(); LZ4_streamHC_t* stream = LZ4_createStreamHC();
LZ4_resetStreamHC(stream, opts.level); LZ4_resetStreamHC(stream, level);
const char* compression_dict_data = const char* compression_dict_data =
compression_dict.size() > 0 ? compression_dict.data() : nullptr; compression_dict.size() > 0 ? compression_dict.data() : nullptr;
size_t compression_dict_size = compression_dict.size(); size_t compression_dict_size = compression_dict.size();
@ -742,7 +754,7 @@ inline bool LZ4HC_Compress(const CompressionOptions& opts,
#elif LZ4_VERSION_MAJOR // r113-r123 #elif LZ4_VERSION_MAJOR // r113-r123
outlen = LZ4_compressHC2_limitedOutput(input, &(*output)[output_header_len], outlen = LZ4_compressHC2_limitedOutput(input, &(*output)[output_header_len],
static_cast<int>(length), static_cast<int>(length),
compress_bound, opts.level); compress_bound, level);
#else // up to r112 #else // up to r112
outlen = outlen =
LZ4_compressHC_limitedOutput(input, &(*output)[output_header_len], LZ4_compressHC_limitedOutput(input, &(*output)[output_header_len],
@ -807,15 +819,23 @@ inline bool ZSTD_Compress(const CompressionOptions& opts, const char* input,
size_t compressBound = ZSTD_compressBound(length); size_t compressBound = ZSTD_compressBound(length);
output->resize(static_cast<size_t>(output_header_len + compressBound)); output->resize(static_cast<size_t>(output_header_len + compressBound));
size_t outlen; size_t outlen;
int level;
if (opts.level == CompressionOptions::kDefaultCompressionLevel) {
// 3 is the value of ZSTD_CLEVEL_DEFAULT (not exposed publicly), see
// https://github.com/facebook/zstd/issues/1148
level = 3;
} else {
level = opts.level;
}
#if ZSTD_VERSION_NUMBER >= 500 // v0.5.0+ #if ZSTD_VERSION_NUMBER >= 500 // v0.5.0+
ZSTD_CCtx* context = ZSTD_createCCtx(); ZSTD_CCtx* context = ZSTD_createCCtx();
outlen = ZSTD_compress_usingDict( outlen = ZSTD_compress_usingDict(
context, &(*output)[output_header_len], compressBound, input, length, context, &(*output)[output_header_len], compressBound, input, length,
compression_dict.data(), compression_dict.size(), opts.level); compression_dict.data(), compression_dict.size(), level);
ZSTD_freeCCtx(context); ZSTD_freeCCtx(context);
#else // up to v0.4.x #else // up to v0.4.x
outlen = ZSTD_compress(&(*output)[output_header_len], compressBound, input, outlen = ZSTD_compress(&(*output)[output_header_len], compressBound, input,
length, opts.level); length, level);
#endif // ZSTD_VERSION_NUMBER >= 500 #endif // ZSTD_VERSION_NUMBER >= 500
if (outlen == 0) { if (outlen == 0) {
return false; return false;

Loading…
Cancel
Save