Add ZSTD (not final format) compression type

Summary: Add ZSTD compression type. The same way as adding LZ4.

Test Plan: run all tests. Generate files in db_bench. Make sure reads succeed. But the SST files cannot be opened in older versions. Also some other adhoc tests.

Reviewers: rven, anthony, IslamAbdelRahman, kradhakrishnan, igor

Reviewed By: igor

Subscribers: MarkCallaghan, maykov, yoshinorim, leveldb, dhruba

Differential Revision: https://reviews.facebook.net/D45747
main
sdong 10 years ago
parent e2db15efde
commit 7a0dbdf3ac
  1. 1
      HISTORY.md
  2. 12
      build_tools/build_detect_platform
  3. 11
      build_tools/fbcode_config.sh
  4. 12
      build_tools/fbcode_config4.8.1.sh
  5. 11
      db/db_bench.cc
  6. 4
      include/rocksdb/env.h
  7. 10
      include/rocksdb/options.h
  8. 7
      table/block_based_table_builder.cc
  9. 11
      table/format.cc
  10. 4
      table/table_test.cc
  11. 17
      tools/db_sanity_test.cc
  12. 2
      tools/db_stress.cc
  13. 59
      util/compression.h
  14. 2
      util/ldb_cmd.cc
  15. 2
      util/options_helper.cc
  16. 6
      util/options_test.cc
  17. 6
      util/sst_dump_tool.cc

@ -4,6 +4,7 @@
### New Features
* DB::GetProperty() now accept "rocksdb.aggregated-table-properties" and "rocksdb.aggregated-table-properties-at-levelN", in which case it returns aggregated table properties of the target column family, or the aggregated table properties of the specified level N if the "at-level" version is used.
* Add compression option kZSTDNotFinalCompression for people to experiment ZSTD although its format is not finalized.
### Public API Changes
* Removed class Env::RandomRWFile and Env::NewRandomRWFile().

@ -22,6 +22,7 @@
# -DLEVELDB_PLATFORM_NOATOMIC if it is not
# -DSNAPPY if the Snappy library is present
# -DLZ4 if the LZ4 library is present
# -DZSTD if the ZSTD library is present
# -DNUMA if the NUMA library is present
#
# Using gflags in rocksdb:
@ -269,6 +270,17 @@ EOF
JAVA_LDFLAGS="$JAVA_LDFLAGS -llz4"
fi
# Test whether zstd library is installed
$CXX $CFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <zstd.h>
int main() {}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DZSTD"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lzstd"
JAVA_LDFLAGS="$JAVA_LDFLAGS -lzstd"
fi
# Test whether numa is available
$CXX $CFLAGS -x c++ - -o /dev/null -lnuma 2>/dev/null <<EOF
#include <numa.h>

@ -42,6 +42,11 @@ if test -z $PIC_BUILD; then
LZ4_INCLUDE=" -I /mnt/gvfs/third-party2/lz4/79d2943e2dd7208a3e0b06cf95e9f85f05fe9e1b/r124/gcc-4.9-glibc-2.20/4230243/include/"
LZ4_LIBS=" /mnt/gvfs/third-party2/lz4/79d2943e2dd7208a3e0b06cf95e9f85f05fe9e1b/r124/gcc-4.9-glibc-2.20/4230243/lib/liblz4.a"
CFLAGS+=" -DLZ4"
ZSTD_REV=8df2d01673ae6afcc8c8d16fec862b2d67ecc1e9
ZSTD_INCLUDE=" -I /mnt/gvfs/third-party2/zstd/$ZSTD_REV/0.1.1/gcc-4.8.1-glibc-2.17/c3f970a/include"
ZSTD_LIBS=" /mnt/gvfs/third-party2/zstd/$ZSTD_REV/0.1.1/gcc-4.8.1-glibc-2.17/c3f970a/lib/libzstd.a"
CFLAGS+=" -DZSTD"
fi
# location of gflags headers and libraries
@ -73,7 +78,7 @@ export USE_SSE=1
BINUTILS="/mnt/gvfs/third-party2/binutils/0b6ad0c88ddd903333a48ae8bff134efac468e4a/2.25/centos6-native/da39a3e/bin"
AR="$BINUTILS/ar"
DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE"
DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE"
GCC_BASE="/mnt/gvfs/third-party2/gcc/1c67a0b88f64d4d9ced0382d141c76aaa7d62fba/4.9.x/centos6-native/1317bc4"
STDLIBS="-L $GCC_BASE/lib64"
@ -114,14 +119,14 @@ CFLAGS+=" $DEPS_INCLUDE"
CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE"
CXXFLAGS+=" $CFLAGS"
EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $GFLAGS_LIBS $NUMA_LIB"
EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB"
EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/gcc-4.9-glibc-2.20/lib/ld.so"
EXEC_LDFLAGS+=" $LIBUNWIND"
EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/gcc-4.9-glibc-2.20/lib"
PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++"
EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $GFLAGS_LIBS"
EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS"
VALGRIND_VER="/mnt/gvfs/third-party2/valgrind/6c45ef049cbf11c2df593addb712cd891049e737/3.10.0/gcc-4.9-glibc-2.20/4230243/bin/"

@ -30,6 +30,10 @@ LZ4_REV=065ec7e38fe83329031f6668c43bef83eff5808b
LZ4_INCLUDE=" -I /mnt/gvfs/third-party2/lz4/$LZ4_REV/r108/gcc-4.8.1-glibc-2.17/c3f970a/include"
LZ4_LIBS=" /mnt/gvfs/third-party2/lz4/$LZ4_REV/r108/gcc-4.8.1-glibc-2.17/c3f970a/lib/liblz4.a"
ZSTD_REV=8df2d01673ae6afcc8c8d16fec862b2d67ecc1e9
ZSTD_INCLUDE=" -I /mnt/gvfs/third-party2/zstd/$ZSTD_REV/0.1.1/gcc-4.8.1-glibc-2.17/c3f970a/include"
ZSTD_LIBS=" /mnt/gvfs/third-party2/zstd/$ZSTD_REV/0.1.1/gcc-4.8.1-glibc-2.17/c3f970a/lib/libzstd.a"
# location of gflags headers and libraries
GFLAGS_INCLUDE=" -I /mnt/gvfs/third-party2/gflags/1ad047a6e6f6673991918ecadc670868205a243a/1.6/gcc-4.8.1-glibc-2.17/c3f970a/include/"
GFLAGS_LIBS=" /mnt/gvfs/third-party2/gflags/1ad047a6e6f6673991918ecadc670868205a243a/1.6/gcc-4.8.1-glibc-2.17/c3f970a/lib/libgflags.a"
@ -53,7 +57,7 @@ export USE_SSE=1
BINUTILS="/mnt/gvfs/third-party2/binutils/2aff2e7b474cd3e6ab23495ad1224b7d214b9f8e/2.21.1/centos6-native/da39a3e/bin"
AR="$BINUTILS/ar"
DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE"
DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE"
GCC_BASE="/mnt/gvfs/third-party2/gcc/1ec615e23800f0815d474478ba476a0adc3fe788/4.8.1/centos6-native/cc6c9dc"
STDLIBS="-L $GCC_BASE/lib64"
@ -88,17 +92,17 @@ fi
CFLAGS+=" $DEPS_INCLUDE"
CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE"
CFLAGS+=" -DSNAPPY -DGFLAGS=google -DZLIB -DBZIP2 -DLZ4 -DNUMA"
CFLAGS+=" -DSNAPPY -DGFLAGS=google -DZLIB -DBZIP2 -DLZ4 -DZSTD -DNUMA"
CXXFLAGS+=" $CFLAGS"
EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $GFLAGS_LIBS $NUMA_LIB"
EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB"
EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/gcc-4.8.1-glibc-2.17/lib/ld.so"
EXEC_LDFLAGS+=" $LIBUNWIND"
EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/gcc-4.8.1-glibc-2.17/lib"
PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++"
EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $GFLAGS_LIBS"
EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS"
VALGRIND_REV=b2a9f85e4b70cd03abc85a7f3027fbc4cef35bd0
VALGRIND_VER="/mnt/gvfs/third-party2/valgrind/$VALGRIND_REV/3.8.1/gcc-4.8.1-glibc-2.17/c3f970a/bin/"

@ -510,6 +510,8 @@ enum rocksdb::CompressionType StringToCompressionType(const char* ctype) {
return rocksdb::kLZ4Compression;
else if (!strcasecmp(ctype, "lz4hc"))
return rocksdb::kLZ4HCCompression;
else if (!strcasecmp(ctype, "zstd"))
return rocksdb::kZSTDNotFinalCompression;
fprintf(stdout, "Cannot parse compression type '%s'\n", ctype);
return rocksdb::kSnappyCompression; //default value
@ -1487,6 +1489,10 @@ class Benchmark {
ok = LZ4HC_Compress(Options().compression_opts, 2, input.data(),
input.size(), compressed);
break;
case rocksdb::kZSTDNotFinalCompression:
ok = ZSTD_Compress(Options().compression_opts, input.data(),
input.size(), compressed);
break;
default:
ok = false;
}
@ -2181,6 +2187,11 @@ class Benchmark {
&decompress_size, 2);
ok = uncompressed != nullptr;
break;
case rocksdb::kZSTDNotFinalCompression:
uncompressed = ZSTD_Uncompress(compressed.data(), compressed.size(),
&decompress_size);
ok = uncompressed != nullptr;
break;
default:
ok = false;
}

@ -459,9 +459,7 @@ class WritableFile {
// Indicates the upper layers if the current WritableFile implementation
// uses direct IO.
virtual bool UseDirectIO() const {
return false;
}
virtual bool UseDirectIO() const { return false; }
/*
* Change the priority in rate limiter if rate limiting is enabled.

@ -55,8 +55,14 @@ class InternalKeyComparator;
enum CompressionType : char {
// NOTE: do not change the values of existing entries, as these are
// part of the persistent format on disk.
kNoCompression = 0x0, kSnappyCompression = 0x1, kZlibCompression = 0x2,
kBZip2Compression = 0x3, kLZ4Compression = 0x4, kLZ4HCCompression = 0x5
kNoCompression = 0x0,
kSnappyCompression = 0x1,
kZlibCompression = 0x2,
kBZip2Compression = 0x3,
kLZ4Compression = 0x4,
kLZ4HCCompression = 0x5,
// zstd format is not finalized yet so it's subject to changes.
kZSTDNotFinalCompression = 0x40,
};
enum CompactionStyle : char {

@ -360,6 +360,13 @@ Slice CompressBlock(const Slice& raw,
return *compressed_output;
}
break; // fall back to no compression.
case kZSTDNotFinalCompression:
if (ZSTD_Compress(compression_options, raw.data(), raw.size(),
compressed_output) &&
GoodCompressionRatio(compressed_output->size(), raw.size())) {
return *compressed_output;
}
break; // fall back to no compression.
default: {} // Do not recognize this compression type
}

@ -426,6 +426,17 @@ Status UncompressBlockContents(const char* data, size_t n,
*contents =
BlockContents(std::move(ubuf), decompress_size, true, kNoCompression);
break;
case kZSTDNotFinalCompression:
ubuf =
std::unique_ptr<char[]>(ZSTD_Uncompress(data, n, &decompress_size));
if (!ubuf) {
static char zstd_corrupt_msg[] =
"ZSTD not supported or corrupted ZSTD compressed block contents";
return Status::Corruption(zstd_corrupt_msg);
}
*contents =
BlockContents(std::move(ubuf), decompress_size, true, kNoCompression);
break;
default:
return Status::Corruption("bad block type");
}

@ -531,6 +531,10 @@ static std::vector<TestArgs> GenerateArgList() {
compression_types.emplace_back(kLZ4HCCompression, false);
compression_types.emplace_back(kLZ4HCCompression, true);
}
if (ZSTD_Supported()) {
compression_types.emplace_back(kZSTDNotFinalCompression, false);
compression_types.emplace_back(kZSTDNotFinalCompression, true);
}
for (auto test_type : test_types) {
for (auto reverse_compare : reverse_compare_types) {

@ -182,6 +182,19 @@ class SanityTestLZ4HCCompression : public SanityTest {
Options options_;
};
class SanityTestZSTDCompression : public SanityTest {
public:
explicit SanityTestZSTDCompression(const std::string& path)
: SanityTest(path) {
options_.compression = kZSTDNotFinalCompression;
}
virtual Options GetOptions() const override { return options_; }
virtual std::string Name() const override { return "ZSTDCompression"; }
private:
Options options_;
};
#ifndef ROCKSDB_LITE
class SanityTestPlainTableFactory : public SanityTest {
public:
@ -218,11 +231,13 @@ class SanityTestBloomFilter : public SanityTest {
namespace {
bool RunSanityTests(const std::string& command, const std::string& path) {
std::vector<SanityTest*> sanity_tests = {
new SanityTestBasic(path), new SanityTestSpecialComparator(path),
new SanityTestBasic(path),
new SanityTestSpecialComparator(path),
new SanityTestZlibCompression(path),
new SanityTestZlibCompressionVersion2(path),
new SanityTestLZ4Compression(path),
new SanityTestLZ4HCCompression(path),
new SanityTestZSTDCompression(path),
#ifndef ROCKSDB_LITE
new SanityTestPlainTableFactory(path),
#endif // ROCKSDB_LITE

@ -343,6 +343,8 @@ enum rocksdb::CompressionType StringToCompressionType(const char* ctype) {
return rocksdb::kLZ4Compression;
else if (!strcasecmp(ctype, "lz4hc"))
return rocksdb::kLZ4HCCompression;
else if (!strcasecmp(ctype, "zstd"))
return rocksdb::kZSTDNotFinalCompression;
fprintf(stdout, "Cannot parse compression type '%s'\n", ctype);
return rocksdb::kSnappyCompression; //default value

@ -11,6 +11,7 @@
#include <algorithm>
#include <limits>
#include <string>
#include "rocksdb/options.h"
#include "util/coding.h"
@ -32,6 +33,10 @@
#include <lz4hc.h>
#endif
#if defined(ZSTD)
#include <zstd.h>
#endif
namespace rocksdb {
inline bool Snappy_Supported() {
@ -62,6 +67,13 @@ inline bool LZ4_Supported() {
return false;
}
inline bool ZSTD_Supported() {
#ifdef ZSTD
return true;
#endif
return false;
}
inline bool CompressionTypeSupported(CompressionType compression_type) {
switch (compression_type) {
case kNoCompression:
@ -76,6 +88,8 @@ inline bool CompressionTypeSupported(CompressionType compression_type) {
return LZ4_Supported();
case kLZ4HCCompression:
return LZ4_Supported();
case kZSTDNotFinalCompression:
return LZ4_Supported();
default:
assert(false);
return false;
@ -96,6 +110,8 @@ inline std::string CompressionTypeToString(CompressionType compression_type) {
return "LZ4";
case kLZ4HCCompression:
return "LZ4HC";
case kZSTDNotFinalCompression:
return "ZSTD";
default:
assert(false);
return "";
@ -590,4 +606,47 @@ inline bool LZ4HC_Compress(const CompressionOptions& opts,
return false;
}
inline bool ZSTD_Compress(const CompressionOptions& opts, const char* input,
size_t length, ::std::string* output) {
#ifdef ZSTD
if (length > std::numeric_limits<uint32_t>::max()) {
// Can't compress more than 4GB
return false;
}
size_t output_header_len = compression::PutDecompressedSizeInfo(
output, static_cast<uint32_t>(length));
size_t compressBound = ZSTD_compressBound(length);
output->resize(static_cast<size_t>(output_header_len + compressBound));
size_t outlen = ZSTD_compress(&(*output)[output_header_len], compressBound,
input, length);
if (outlen == 0) {
return false;
}
output->resize(output_header_len + outlen);
return true;
#endif
return false;
}
inline char* ZSTD_Uncompress(const char* input_data, size_t input_length,
int* decompress_size) {
#ifdef ZSTD
uint32_t output_len = 0;
if (!compression::GetDecompressedSizeInfo(&input_data, &input_length,
&output_len)) {
return nullptr;
}
char* output = new char[output_len];
size_t actual_output_length =
ZSTD_decompress(output, output_len, input_data, input_length);
assert(actual_output_length == output_len);
*decompress_size = static_cast<int>(actual_output_length);
return output;
#endif
return nullptr;
}
} // namespace rocksdb

@ -281,6 +281,8 @@ Options LDBCommand::PrepareOptionsForOpenDB() {
opt.compression = kLZ4Compression;
} else if (comp == "lz4hc") {
opt.compression = kLZ4HCCompression;
} else if (comp == "zstd") {
opt.compression = kZSTDNotFinalCompression;
} else {
// Unknown compression.
exec_state_ =

@ -37,6 +37,8 @@ CompressionType ParseCompressionType(const std::string& type) {
return kLZ4Compression;
} else if (type == "kLZ4HCCompression") {
return kLZ4HCCompression;
} else if (type == "kZSTDNotFinalCompression") {
return kZSTDNotFinalCompression;
} else {
throw std::invalid_argument("Unknown compression type: " + type);
}

@ -104,7 +104,8 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) {
"kZlibCompression:"
"kBZip2Compression:"
"kLZ4Compression:"
"kLZ4HCCompression"},
"kLZ4HCCompression:"
"kZSTDNotFinalCompression"},
{"compression_opts", "4:5:6"},
{"num_levels", "7"},
{"level0_file_num_compaction_trigger", "8"},
@ -185,13 +186,14 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) {
ASSERT_EQ(new_cf_opt.min_write_buffer_number_to_merge, 3);
ASSERT_EQ(new_cf_opt.max_write_buffer_number_to_maintain, 99);
ASSERT_EQ(new_cf_opt.compression, kSnappyCompression);
ASSERT_EQ(new_cf_opt.compression_per_level.size(), 6U);
ASSERT_EQ(new_cf_opt.compression_per_level.size(), 7U);
ASSERT_EQ(new_cf_opt.compression_per_level[0], kNoCompression);
ASSERT_EQ(new_cf_opt.compression_per_level[1], kSnappyCompression);
ASSERT_EQ(new_cf_opt.compression_per_level[2], kZlibCompression);
ASSERT_EQ(new_cf_opt.compression_per_level[3], kBZip2Compression);
ASSERT_EQ(new_cf_opt.compression_per_level[4], kLZ4Compression);
ASSERT_EQ(new_cf_opt.compression_per_level[5], kLZ4HCCompression);
ASSERT_EQ(new_cf_opt.compression_per_level[6], kZSTDNotFinalCompression);
ASSERT_EQ(new_cf_opt.compression_opts.window_bits, 4);
ASSERT_EQ(new_cf_opt.compression_opts.level, 5);
ASSERT_EQ(new_cf_opt.compression_opts.strategy, 6);

@ -163,11 +163,15 @@ int SstFileReader::ShowAllCompressionSizes(size_t block_size) {
std::make_pair(CompressionType::kLZ4Compression, "kLZ4Compression"));
compress_type.insert(
std::make_pair(CompressionType::kLZ4HCCompression, "kLZ4HCCompression"));
compress_type.insert(std::make_pair(CompressionType::kZSTDNotFinalCompression,
"kZSTDNotFinalCompression"));
fprintf(stdout, "Block Size: %lu\n", block_size);
for (CompressionType i = CompressionType::kNoCompression;
i != CompressionType::kLZ4HCCompression; i = CompressionType(i + 1)) {
i <= CompressionType::kZSTDNotFinalCompression;
i = (i == kLZ4HCCompression) ? kZSTDNotFinalCompression
: CompressionType(i + 1)) {
CompressionOptions compress_opt;
TableBuilderOptions tb_opts(imoptions, ikc, &block_based_table_factories, i,
compress_opt, false);

Loading…
Cancel
Save