fork of https://github.com/oxigraph/rocksdb and https://github.com/facebook/rocksdb for nextgraph and oxigraph
				
			
			
		
			You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							1532 lines
						
					
					
						
							49 KiB
						
					
					
				
			
		
		
	
	
							1532 lines
						
					
					
						
							49 KiB
						
					
					
				| // Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
 | |
| //  This source code is licensed under both the GPLv2 (found in the
 | |
| //  COPYING file in the root directory) and Apache 2.0 License
 | |
| //  (found in the LICENSE.Apache file in the root directory).
 | |
| //
 | |
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 | |
| // Use of this source code is governed by a BSD-style license that can be
 | |
| // found in the LICENSE file. See the AUTHORS file for names of contributors.
 | |
| //
 | |
| #pragma once
 | |
| 
 | |
| #include <algorithm>
 | |
| #include <limits>
 | |
| #ifdef ROCKSDB_MALLOC_USABLE_SIZE
 | |
| #ifdef OS_FREEBSD
 | |
| #include <malloc_np.h>
 | |
| #else  // OS_FREEBSD
 | |
| #include <malloc.h>
 | |
| #endif  // OS_FREEBSD
 | |
| #endif  // ROCKSDB_MALLOC_USABLE_SIZE
 | |
| #include <string>
 | |
| 
 | |
| #include "memory/memory_allocator.h"
 | |
| #include "rocksdb/options.h"
 | |
| #include "rocksdb/table.h"
 | |
| #include "test_util/sync_point.h"
 | |
| #include "util/coding.h"
 | |
| #include "util/compression_context_cache.h"
 | |
| #include "util/string_util.h"
 | |
| 
 | |
| #ifdef SNAPPY
 | |
| #include <snappy.h>
 | |
| #endif
 | |
| 
 | |
| #ifdef ZLIB
 | |
| #include <zlib.h>
 | |
| #endif
 | |
| 
 | |
| #ifdef BZIP2
 | |
| #include <bzlib.h>
 | |
| #endif
 | |
| 
 | |
| #if defined(LZ4)
 | |
| #include <lz4.h>
 | |
| #include <lz4hc.h>
 | |
| #endif
 | |
| 
 | |
| #if defined(ZSTD)
 | |
| #include <zstd.h>
 | |
| #if ZSTD_VERSION_NUMBER >= 10103  // v1.1.3+
 | |
| #include <zdict.h>
 | |
| #endif  // ZSTD_VERSION_NUMBER >= 10103
 | |
| namespace ROCKSDB_NAMESPACE {
 | |
| // Need this for the context allocation override
 | |
| // On windows we need to do this explicitly
 | |
| #if (ZSTD_VERSION_NUMBER >= 500)
 | |
| #if defined(ROCKSDB_JEMALLOC) && defined(OS_WIN) && \
 | |
|     defined(ZSTD_STATIC_LINKING_ONLY)
 | |
| #define ROCKSDB_ZSTD_CUSTOM_MEM
 | |
| namespace port {
 | |
| ZSTD_customMem GetJeZstdAllocationOverrides();
 | |
| }  // namespace port
 | |
| #endif  // defined(ROCKSDB_JEMALLOC) && defined(OS_WIN) &&
 | |
|         // defined(ZSTD_STATIC_LINKING_ONLY)
 | |
| 
 | |
| // We require `ZSTD_sizeof_DDict` and `ZSTD_createDDict_byReference` to use
 | |
| // `ZSTD_DDict`. The former was introduced in v1.0.0 and the latter was
 | |
| // introduced in v1.1.3. But an important bug fix for `ZSTD_sizeof_DDict` came
 | |
| // in v1.1.4, so that is the version we require. As of today's latest version
 | |
| // (v1.3.8), they are both still in the experimental API, which means they are
 | |
| // only exported when the compiler flag `ZSTD_STATIC_LINKING_ONLY` is set.
 | |
| #if defined(ZSTD_STATIC_LINKING_ONLY) && ZSTD_VERSION_NUMBER >= 10104
 | |
| #define ROCKSDB_ZSTD_DDICT
 | |
| #endif  // defined(ZSTD_STATIC_LINKING_ONLY) && ZSTD_VERSION_NUMBER >= 10104
 | |
| 
 | |
| // Cached data represents a portion that can be re-used
 | |
| // If, in the future we have more than one native context to
 | |
| // cache we can arrange this as a tuple
 | |
| class ZSTDUncompressCachedData {
 | |
|  public:
 | |
|   using ZSTDNativeContext = ZSTD_DCtx*;
 | |
|   ZSTDUncompressCachedData() {}
 | |
|   // Init from cache
 | |
|   ZSTDUncompressCachedData(const ZSTDUncompressCachedData& o) = delete;
 | |
|   ZSTDUncompressCachedData& operator=(const ZSTDUncompressCachedData&) = delete;
 | |
|   ZSTDUncompressCachedData(ZSTDUncompressCachedData&& o) ROCKSDB_NOEXCEPT
 | |
|       : ZSTDUncompressCachedData() {
 | |
|     *this = std::move(o);
 | |
|   }
 | |
|   ZSTDUncompressCachedData& operator=(ZSTDUncompressCachedData&& o)
 | |
|       ROCKSDB_NOEXCEPT {
 | |
|     assert(zstd_ctx_ == nullptr);
 | |
|     std::swap(zstd_ctx_, o.zstd_ctx_);
 | |
|     std::swap(cache_idx_, o.cache_idx_);
 | |
|     return *this;
 | |
|   }
 | |
|   ZSTDNativeContext Get() const { return zstd_ctx_; }
 | |
|   int64_t GetCacheIndex() const { return cache_idx_; }
 | |
|   void CreateIfNeeded() {
 | |
|     if (zstd_ctx_ == nullptr) {
 | |
| #ifdef ROCKSDB_ZSTD_CUSTOM_MEM
 | |
|       zstd_ctx_ =
 | |
|           ZSTD_createDCtx_advanced(port::GetJeZstdAllocationOverrides());
 | |
| #else   // ROCKSDB_ZSTD_CUSTOM_MEM
 | |
|       zstd_ctx_ = ZSTD_createDCtx();
 | |
| #endif  // ROCKSDB_ZSTD_CUSTOM_MEM
 | |
|       cache_idx_ = -1;
 | |
|     }
 | |
|   }
 | |
|   void InitFromCache(const ZSTDUncompressCachedData& o, int64_t idx) {
 | |
|     zstd_ctx_ = o.zstd_ctx_;
 | |
|     cache_idx_ = idx;
 | |
|   }
 | |
|   ~ZSTDUncompressCachedData() {
 | |
|     if (zstd_ctx_ != nullptr && cache_idx_ == -1) {
 | |
|       ZSTD_freeDCtx(zstd_ctx_);
 | |
|     }
 | |
|   }
 | |
| 
 | |
|  private:
 | |
|   ZSTDNativeContext zstd_ctx_ = nullptr;
 | |
|   int64_t cache_idx_ = -1;  // -1 means this instance owns the context
 | |
| };
 | |
| #endif  // (ZSTD_VERSION_NUMBER >= 500)
 | |
| }  // namespace ROCKSDB_NAMESPACE
 | |
| #endif  // ZSTD
 | |
| 
 | |
| #if !(defined ZSTD) || !(ZSTD_VERSION_NUMBER >= 500)
 | |
| namespace ROCKSDB_NAMESPACE {
 | |
| class ZSTDUncompressCachedData {
 | |
|   void* padding;  // unused
 | |
|  public:
 | |
|   using ZSTDNativeContext = void*;
 | |
|   ZSTDUncompressCachedData() {}
 | |
|   ZSTDUncompressCachedData(const ZSTDUncompressCachedData&) {}
 | |
|   ZSTDUncompressCachedData& operator=(const ZSTDUncompressCachedData&) = delete;
 | |
|   ZSTDUncompressCachedData(ZSTDUncompressCachedData&&)
 | |
|       ROCKSDB_NOEXCEPT = default;
 | |
|   ZSTDUncompressCachedData& operator=(ZSTDUncompressCachedData&&)
 | |
|       ROCKSDB_NOEXCEPT = default;
 | |
|   ZSTDNativeContext Get() const { return nullptr; }
 | |
|   int64_t GetCacheIndex() const { return -1; }
 | |
|   void CreateIfNeeded() {}
 | |
|   void InitFromCache(const ZSTDUncompressCachedData&, int64_t) {}
 | |
|  private:
 | |
|   void ignore_padding__() { padding = nullptr; }
 | |
| };
 | |
| }  // namespace ROCKSDB_NAMESPACE
 | |
| #endif
 | |
| 
 | |
| #if defined(XPRESS)
 | |
| #include "port/xpress.h"
 | |
| #endif
 | |
| 
 | |
| namespace ROCKSDB_NAMESPACE {
 | |
| 
 | |
| // Holds dictionary and related data, like ZSTD's digested compression
 | |
| // dictionary.
 | |
| struct CompressionDict {
 | |
| #if ZSTD_VERSION_NUMBER >= 700
 | |
|   ZSTD_CDict* zstd_cdict_ = nullptr;
 | |
| #endif  // ZSTD_VERSION_NUMBER >= 700
 | |
|   std::string dict_;
 | |
| 
 | |
|  public:
 | |
| #if ZSTD_VERSION_NUMBER >= 700
 | |
|   CompressionDict(std::string dict, CompressionType type, int level) {
 | |
| #else   // ZSTD_VERSION_NUMBER >= 700
 | |
|   CompressionDict(std::string dict, CompressionType /*type*/, int /*level*/) {
 | |
| #endif  // ZSTD_VERSION_NUMBER >= 700
 | |
|     dict_ = std::move(dict);
 | |
| #if ZSTD_VERSION_NUMBER >= 700
 | |
|     zstd_cdict_ = nullptr;
 | |
|     if (!dict_.empty() && (type == kZSTD || type == kZSTDNotFinalCompression)) {
 | |
|       if (level == CompressionOptions::kDefaultCompressionLevel) {
 | |
|         // 3 is the value of ZSTD_CLEVEL_DEFAULT (not exposed publicly), see
 | |
|         // https://github.com/facebook/zstd/issues/1148
 | |
|         level = 3;
 | |
|       }
 | |
|       // Should be safe (but slower) if below call fails as we'll use the
 | |
|       // raw dictionary to compress.
 | |
|       zstd_cdict_ = ZSTD_createCDict(dict_.data(), dict_.size(), level);
 | |
|       assert(zstd_cdict_ != nullptr);
 | |
|     }
 | |
| #endif  // ZSTD_VERSION_NUMBER >= 700
 | |
|   }
 | |
| 
 | |
|   ~CompressionDict() {
 | |
| #if ZSTD_VERSION_NUMBER >= 700
 | |
|     size_t res = 0;
 | |
|     if (zstd_cdict_ != nullptr) {
 | |
|       res = ZSTD_freeCDict(zstd_cdict_);
 | |
|     }
 | |
|     assert(res == 0);  // Last I checked they can't fail
 | |
|     (void)res;         // prevent unused var warning
 | |
| #endif                 // ZSTD_VERSION_NUMBER >= 700
 | |
|   }
 | |
| 
 | |
| #if ZSTD_VERSION_NUMBER >= 700
 | |
|   const ZSTD_CDict* GetDigestedZstdCDict() const { return zstd_cdict_; }
 | |
| #endif  // ZSTD_VERSION_NUMBER >= 700
 | |
| 
 | |
|   Slice GetRawDict() const { return dict_; }
 | |
| 
 | |
|   static const CompressionDict& GetEmptyDict() {
 | |
|     static CompressionDict empty_dict{};
 | |
|     return empty_dict;
 | |
|   }
 | |
| 
 | |
|   CompressionDict() = default;
 | |
|   // Disable copy/move
 | |
|   CompressionDict(const CompressionDict&) = delete;
 | |
|   CompressionDict& operator=(const CompressionDict&) = delete;
 | |
|   CompressionDict(CompressionDict&&) = delete;
 | |
|   CompressionDict& operator=(CompressionDict&&) = delete;
 | |
| };
 | |
| 
 | |
| // Holds dictionary and related data, like ZSTD's digested uncompression
 | |
| // dictionary.
 | |
| struct UncompressionDict {
 | |
|   // Block containing the data for the compression dictionary in case the
 | |
|   // constructor that takes a string parameter is used.
 | |
|   std::string dict_;
 | |
| 
 | |
|   // Block containing the data for the compression dictionary in case the
 | |
|   // constructor that takes a Slice parameter is used and the passed in
 | |
|   // CacheAllocationPtr is not nullptr.
 | |
|   CacheAllocationPtr allocation_;
 | |
| 
 | |
|   // Slice pointing to the compression dictionary data. Can point to
 | |
|   // dict_, allocation_, or some other memory location, depending on how
 | |
|   // the object was constructed.
 | |
|   Slice slice_;
 | |
| 
 | |
| #ifdef ROCKSDB_ZSTD_DDICT
 | |
|   // Processed version of the contents of slice_ for ZSTD compression.
 | |
|   ZSTD_DDict* zstd_ddict_ = nullptr;
 | |
| #endif  // ROCKSDB_ZSTD_DDICT
 | |
| 
 | |
| #ifdef ROCKSDB_ZSTD_DDICT
 | |
|   UncompressionDict(std::string dict, bool using_zstd)
 | |
| #else   // ROCKSDB_ZSTD_DDICT
 | |
|   UncompressionDict(std::string dict, bool /* using_zstd */)
 | |
| #endif  // ROCKSDB_ZSTD_DDICT
 | |
|       : dict_(std::move(dict)), slice_(dict_) {
 | |
| #ifdef ROCKSDB_ZSTD_DDICT
 | |
|     if (!slice_.empty() && using_zstd) {
 | |
|       zstd_ddict_ = ZSTD_createDDict_byReference(slice_.data(), slice_.size());
 | |
|       assert(zstd_ddict_ != nullptr);
 | |
|     }
 | |
| #endif  // ROCKSDB_ZSTD_DDICT
 | |
|   }
 | |
| 
 | |
| #ifdef ROCKSDB_ZSTD_DDICT
 | |
|   UncompressionDict(Slice slice, CacheAllocationPtr&& allocation,
 | |
|                     bool using_zstd)
 | |
| #else   // ROCKSDB_ZSTD_DDICT
 | |
|   UncompressionDict(Slice slice, CacheAllocationPtr&& allocation,
 | |
|                     bool /* using_zstd */)
 | |
| #endif  // ROCKSDB_ZSTD_DDICT
 | |
|       : allocation_(std::move(allocation)), slice_(std::move(slice)) {
 | |
| #ifdef ROCKSDB_ZSTD_DDICT
 | |
|     if (!slice_.empty() && using_zstd) {
 | |
|       zstd_ddict_ = ZSTD_createDDict_byReference(slice_.data(), slice_.size());
 | |
|       assert(zstd_ddict_ != nullptr);
 | |
|     }
 | |
| #endif  // ROCKSDB_ZSTD_DDICT
 | |
|   }
 | |
| 
 | |
|   UncompressionDict(UncompressionDict&& rhs)
 | |
|       : dict_(std::move(rhs.dict_)),
 | |
|         allocation_(std::move(rhs.allocation_)),
 | |
|         slice_(std::move(rhs.slice_))
 | |
| #ifdef ROCKSDB_ZSTD_DDICT
 | |
|         ,
 | |
|         zstd_ddict_(rhs.zstd_ddict_)
 | |
| #endif
 | |
|   {
 | |
| #ifdef ROCKSDB_ZSTD_DDICT
 | |
|     rhs.zstd_ddict_ = nullptr;
 | |
| #endif
 | |
|   }
 | |
| 
 | |
|   ~UncompressionDict() {
 | |
| #ifdef ROCKSDB_ZSTD_DDICT
 | |
|     size_t res = 0;
 | |
|     if (zstd_ddict_ != nullptr) {
 | |
|       res = ZSTD_freeDDict(zstd_ddict_);
 | |
|     }
 | |
|     assert(res == 0);  // Last I checked they can't fail
 | |
|     (void)res;         // prevent unused var warning
 | |
| #endif                 // ROCKSDB_ZSTD_DDICT
 | |
|   }
 | |
| 
 | |
|   UncompressionDict& operator=(UncompressionDict&& rhs) {
 | |
|     if (this == &rhs) {
 | |
|       return *this;
 | |
|     }
 | |
| 
 | |
|     dict_ = std::move(rhs.dict_);
 | |
|     allocation_ = std::move(rhs.allocation_);
 | |
|     slice_ = std::move(rhs.slice_);
 | |
| 
 | |
| #ifdef ROCKSDB_ZSTD_DDICT
 | |
|     zstd_ddict_ = rhs.zstd_ddict_;
 | |
|     rhs.zstd_ddict_ = nullptr;
 | |
| #endif
 | |
| 
 | |
|     return *this;
 | |
|   }
 | |
| 
 | |
|   // The object is self-contained if the string constructor is used, or the
 | |
|   // Slice constructor is invoked with a non-null allocation. Otherwise, it
 | |
|   // is the caller's responsibility to ensure that the underlying storage
 | |
|   // outlives this object.
 | |
|   bool own_bytes() const { return !dict_.empty() || allocation_; }
 | |
| 
 | |
|   const Slice& GetRawDict() const { return slice_; }
 | |
| 
 | |
| #ifdef ROCKSDB_ZSTD_DDICT
 | |
|   const ZSTD_DDict* GetDigestedZstdDDict() const { return zstd_ddict_; }
 | |
| #endif  // ROCKSDB_ZSTD_DDICT
 | |
| 
 | |
|   static const UncompressionDict& GetEmptyDict() {
 | |
|     static UncompressionDict empty_dict{};
 | |
|     return empty_dict;
 | |
|   }
 | |
| 
 | |
|   size_t ApproximateMemoryUsage() const {
 | |
|     size_t usage = sizeof(struct UncompressionDict);
 | |
|     usage += dict_.size();
 | |
|     if (allocation_) {
 | |
|       auto allocator = allocation_.get_deleter().allocator;
 | |
|       if (allocator) {
 | |
|         usage += allocator->UsableSize(allocation_.get(), slice_.size());
 | |
|       } else {
 | |
|         usage += slice_.size();
 | |
|       }
 | |
|     }
 | |
| #ifdef ROCKSDB_ZSTD_DDICT
 | |
|     usage += ZSTD_sizeof_DDict(zstd_ddict_);
 | |
| #endif  // ROCKSDB_ZSTD_DDICT
 | |
|     return usage;
 | |
|   }
 | |
| 
 | |
|   UncompressionDict() = default;
 | |
|   // Disable copy
 | |
|   UncompressionDict(const CompressionDict&) = delete;
 | |
|   UncompressionDict& operator=(const CompressionDict&) = delete;
 | |
| };
 | |
| 
 | |
| class CompressionContext {
 | |
|  private:
 | |
| #if defined(ZSTD) && (ZSTD_VERSION_NUMBER >= 500)
 | |
|   ZSTD_CCtx* zstd_ctx_ = nullptr;
 | |
|   void CreateNativeContext(CompressionType type) {
 | |
|     if (type == kZSTD || type == kZSTDNotFinalCompression) {
 | |
| #ifdef ROCKSDB_ZSTD_CUSTOM_MEM
 | |
|       zstd_ctx_ =
 | |
|           ZSTD_createCCtx_advanced(port::GetJeZstdAllocationOverrides());
 | |
| #else   // ROCKSDB_ZSTD_CUSTOM_MEM
 | |
|       zstd_ctx_ = ZSTD_createCCtx();
 | |
| #endif  // ROCKSDB_ZSTD_CUSTOM_MEM
 | |
|     }
 | |
|   }
 | |
|   void DestroyNativeContext() {
 | |
|     if (zstd_ctx_ != nullptr) {
 | |
|       ZSTD_freeCCtx(zstd_ctx_);
 | |
|     }
 | |
|   }
 | |
| 
 | |
|  public:
 | |
|   // callable inside ZSTD_Compress
 | |
|   ZSTD_CCtx* ZSTDPreallocCtx() const {
 | |
|     assert(zstd_ctx_ != nullptr);
 | |
|     return zstd_ctx_;
 | |
|   }
 | |
| 
 | |
| #else   // ZSTD && (ZSTD_VERSION_NUMBER >= 500)
 | |
|  private:
 | |
|   void CreateNativeContext(CompressionType /* type */) {}
 | |
|   void DestroyNativeContext() {}
 | |
| #endif  // ZSTD && (ZSTD_VERSION_NUMBER >= 500)
 | |
|  public:
 | |
|   explicit CompressionContext(CompressionType type) {
 | |
|     CreateNativeContext(type);
 | |
|   }
 | |
|   ~CompressionContext() { DestroyNativeContext(); }
 | |
|   CompressionContext(const CompressionContext&) = delete;
 | |
|   CompressionContext& operator=(const CompressionContext&) = delete;
 | |
| };
 | |
| 
 | |
| class CompressionInfo {
 | |
|   const CompressionOptions& opts_;
 | |
|   const CompressionContext& context_;
 | |
|   const CompressionDict& dict_;
 | |
|   const CompressionType type_;
 | |
|   const uint64_t sample_for_compression_;
 | |
| 
 | |
|  public:
 | |
|   CompressionInfo(const CompressionOptions& _opts,
 | |
|                   const CompressionContext& _context,
 | |
|                   const CompressionDict& _dict, CompressionType _type,
 | |
|                   uint64_t _sample_for_compression)
 | |
|       : opts_(_opts),
 | |
|         context_(_context),
 | |
|         dict_(_dict),
 | |
|         type_(_type),
 | |
|         sample_for_compression_(_sample_for_compression) {}
 | |
| 
 | |
|   const CompressionOptions& options() const { return opts_; }
 | |
|   const CompressionContext& context() const { return context_; }
 | |
|   const CompressionDict& dict() const { return dict_; }
 | |
|   CompressionType type() const { return type_; }
 | |
|   uint64_t SampleForCompression() const { return sample_for_compression_; }
 | |
| };
 | |
| 
 | |
| class UncompressionContext {
 | |
|  private:
 | |
|   CompressionContextCache* ctx_cache_ = nullptr;
 | |
|   ZSTDUncompressCachedData uncomp_cached_data_;
 | |
| 
 | |
|  public:
 | |
|   explicit UncompressionContext(CompressionType type) {
 | |
|     if (type == kZSTD || type == kZSTDNotFinalCompression) {
 | |
|       ctx_cache_ = CompressionContextCache::Instance();
 | |
|       uncomp_cached_data_ = ctx_cache_->GetCachedZSTDUncompressData();
 | |
|     }
 | |
|   }
 | |
|   ~UncompressionContext() {
 | |
|     if (uncomp_cached_data_.GetCacheIndex() != -1) {
 | |
|       assert(ctx_cache_ != nullptr);
 | |
|       ctx_cache_->ReturnCachedZSTDUncompressData(
 | |
|           uncomp_cached_data_.GetCacheIndex());
 | |
|     }
 | |
|   }
 | |
|   UncompressionContext(const UncompressionContext&) = delete;
 | |
|   UncompressionContext& operator=(const UncompressionContext&) = delete;
 | |
| 
 | |
|   ZSTDUncompressCachedData::ZSTDNativeContext GetZSTDContext() const {
 | |
|     return uncomp_cached_data_.Get();
 | |
|   }
 | |
| };
 | |
| 
 | |
| class UncompressionInfo {
 | |
|   const UncompressionContext& context_;
 | |
|   const UncompressionDict& dict_;
 | |
|   const CompressionType type_;
 | |
| 
 | |
|  public:
 | |
|   UncompressionInfo(const UncompressionContext& _context,
 | |
|                     const UncompressionDict& _dict, CompressionType _type)
 | |
|       : context_(_context), dict_(_dict), type_(_type) {}
 | |
| 
 | |
|   const UncompressionContext& context() const { return context_; }
 | |
|   const UncompressionDict& dict() const { return dict_; }
 | |
|   CompressionType type() const { return type_; }
 | |
| };
 | |
| 
 | |
| inline bool Snappy_Supported() {
 | |
| #ifdef SNAPPY
 | |
|   return true;
 | |
| #else
 | |
|   return false;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| inline bool Zlib_Supported() {
 | |
| #ifdef ZLIB
 | |
|   return true;
 | |
| #else
 | |
|   return false;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| inline bool BZip2_Supported() {
 | |
| #ifdef BZIP2
 | |
|   return true;
 | |
| #else
 | |
|   return false;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| inline bool LZ4_Supported() {
 | |
| #ifdef LZ4
 | |
|   return true;
 | |
| #else
 | |
|   return false;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| inline bool XPRESS_Supported() {
 | |
| #ifdef XPRESS
 | |
|   return true;
 | |
| #else
 | |
|   return false;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| inline bool ZSTD_Supported() {
 | |
| #ifdef ZSTD
 | |
|   // ZSTD format is finalized since version 0.8.0.
 | |
|   return (ZSTD_versionNumber() >= 800);
 | |
| #else
 | |
|   return false;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| inline bool ZSTDNotFinal_Supported() {
 | |
| #ifdef ZSTD
 | |
|   return true;
 | |
| #else
 | |
|   return false;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| inline bool CompressionTypeSupported(CompressionType compression_type) {
 | |
|   switch (compression_type) {
 | |
|     case kNoCompression:
 | |
|       return true;
 | |
|     case kSnappyCompression:
 | |
|       return Snappy_Supported();
 | |
|     case kZlibCompression:
 | |
|       return Zlib_Supported();
 | |
|     case kBZip2Compression:
 | |
|       return BZip2_Supported();
 | |
|     case kLZ4Compression:
 | |
|       return LZ4_Supported();
 | |
|     case kLZ4HCCompression:
 | |
|       return LZ4_Supported();
 | |
|     case kXpressCompression:
 | |
|       return XPRESS_Supported();
 | |
|     case kZSTDNotFinalCompression:
 | |
|       return ZSTDNotFinal_Supported();
 | |
|     case kZSTD:
 | |
|       return ZSTD_Supported();
 | |
|     default:
 | |
|       assert(false);
 | |
|       return false;
 | |
|   }
 | |
| }
 | |
| 
 | |
| inline bool DictCompressionTypeSupported(CompressionType compression_type) {
 | |
|   switch (compression_type) {
 | |
|     case kNoCompression:
 | |
|       return false;
 | |
|     case kSnappyCompression:
 | |
|       return false;
 | |
|     case kZlibCompression:
 | |
|       return Zlib_Supported();
 | |
|     case kBZip2Compression:
 | |
|       return false;
 | |
|     case kLZ4Compression:
 | |
|     case kLZ4HCCompression:
 | |
| #if LZ4_VERSION_NUMBER >= 10400  // r124+
 | |
|       return LZ4_Supported();
 | |
| #else
 | |
|       return false;
 | |
| #endif
 | |
|     case kXpressCompression:
 | |
|       return false;
 | |
|     case kZSTDNotFinalCompression:
 | |
| #if ZSTD_VERSION_NUMBER >= 500  // v0.5.0+
 | |
|       return ZSTDNotFinal_Supported();
 | |
| #else
 | |
|       return false;
 | |
| #endif
 | |
|     case kZSTD:
 | |
| #if ZSTD_VERSION_NUMBER >= 500  // v0.5.0+
 | |
|       return ZSTD_Supported();
 | |
| #else
 | |
|       return false;
 | |
| #endif
 | |
|     default:
 | |
|       assert(false);
 | |
|       return false;
 | |
|   }
 | |
| }
 | |
| 
 | |
| inline std::string CompressionTypeToString(CompressionType compression_type) {
 | |
|   switch (compression_type) {
 | |
|     case kNoCompression:
 | |
|       return "NoCompression";
 | |
|     case kSnappyCompression:
 | |
|       return "Snappy";
 | |
|     case kZlibCompression:
 | |
|       return "Zlib";
 | |
|     case kBZip2Compression:
 | |
|       return "BZip2";
 | |
|     case kLZ4Compression:
 | |
|       return "LZ4";
 | |
|     case kLZ4HCCompression:
 | |
|       return "LZ4HC";
 | |
|     case kXpressCompression:
 | |
|       return "Xpress";
 | |
|     case kZSTD:
 | |
|       return "ZSTD";
 | |
|     case kZSTDNotFinalCompression:
 | |
|       return "ZSTDNotFinal";
 | |
|     case kDisableCompressionOption:
 | |
|       return "DisableOption";
 | |
|     default:
 | |
|       assert(false);
 | |
|       return "";
 | |
|   }
 | |
| }
 | |
| 
 | |
| inline std::string CompressionOptionsToString(
 | |
|     CompressionOptions& compression_options) {
 | |
|   std::string result;
 | |
|   result.reserve(512);
 | |
|   result.append("window_bits=")
 | |
|       .append(ToString(compression_options.window_bits))
 | |
|       .append("; ");
 | |
|   result.append("level=")
 | |
|       .append(ToString(compression_options.level))
 | |
|       .append("; ");
 | |
|   result.append("strategy=")
 | |
|       .append(ToString(compression_options.strategy))
 | |
|       .append("; ");
 | |
|   result.append("max_dict_bytes=")
 | |
|       .append(ToString(compression_options.max_dict_bytes))
 | |
|       .append("; ");
 | |
|   result.append("zstd_max_train_bytes=")
 | |
|       .append(ToString(compression_options.zstd_max_train_bytes))
 | |
|       .append("; ");
 | |
|   result.append("enabled=")
 | |
|       .append(ToString(compression_options.enabled))
 | |
|       .append("; ");
 | |
|   result.append("max_dict_buffer_bytes=")
 | |
|       .append(ToString(compression_options.max_dict_buffer_bytes))
 | |
|       .append("; ");
 | |
|   return result;
 | |
| }
 | |
| 
 | |
| // compress_format_version can have two values:
 | |
| // 1 -- decompressed sizes for BZip2 and Zlib are not included in the compressed
 | |
| // block. Also, decompressed sizes for LZ4 are encoded in platform-dependent
 | |
| // way.
 | |
| // 2 -- Zlib, BZip2 and LZ4 encode decompressed size as Varint32 just before the
 | |
| // start of compressed block. Snappy format is the same as version 1.
 | |
| 
 | |
| inline bool Snappy_Compress(const CompressionInfo& /*info*/, const char* input,
 | |
|                             size_t length, ::std::string* output) {
 | |
| #ifdef SNAPPY
 | |
|   output->resize(snappy::MaxCompressedLength(length));
 | |
|   size_t outlen;
 | |
|   snappy::RawCompress(input, length, &(*output)[0], &outlen);
 | |
|   output->resize(outlen);
 | |
|   return true;
 | |
| #else
 | |
|   (void)input;
 | |
|   (void)length;
 | |
|   (void)output;
 | |
|   return false;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| inline CacheAllocationPtr Snappy_Uncompress(
 | |
|     const char* input, size_t length, size_t* uncompressed_size,
 | |
|     MemoryAllocator* allocator = nullptr) {
 | |
| #ifdef SNAPPY
 | |
|   size_t uncompressed_length = 0;
 | |
|   if (!snappy::GetUncompressedLength(input, length, &uncompressed_length)) {
 | |
|     return nullptr;
 | |
|   }
 | |
| 
 | |
|   CacheAllocationPtr output = AllocateBlock(uncompressed_length, allocator);
 | |
| 
 | |
|   if (!snappy::RawUncompress(input, length, output.get())) {
 | |
|     return nullptr;
 | |
|   }
 | |
| 
 | |
|   *uncompressed_size = uncompressed_length;
 | |
| 
 | |
|   return output;
 | |
| #else
 | |
|   (void)input;
 | |
|   (void)length;
 | |
|   (void)uncompressed_size;
 | |
|   (void)allocator;
 | |
|   return nullptr;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| namespace compression {
 | |
| // returns size
 | |
| inline size_t PutDecompressedSizeInfo(std::string* output, uint32_t length) {
 | |
|   PutVarint32(output, length);
 | |
|   return output->size();
 | |
| }
 | |
| 
 | |
| inline bool GetDecompressedSizeInfo(const char** input_data,
 | |
|                                     size_t* input_length,
 | |
|                                     uint32_t* output_len) {
 | |
|   auto new_input_data =
 | |
|       GetVarint32Ptr(*input_data, *input_data + *input_length, output_len);
 | |
|   if (new_input_data == nullptr) {
 | |
|     return false;
 | |
|   }
 | |
|   *input_length -= (new_input_data - *input_data);
 | |
|   *input_data = new_input_data;
 | |
|   return true;
 | |
| }
 | |
| }  // namespace compression
 | |
| 
 | |
| // compress_format_version == 1 -- decompressed size is not included in the
 | |
| // block header
 | |
| // compress_format_version == 2 -- decompressed size is included in the block
 | |
| // header in varint32 format
 | |
| // @param compression_dict Data for presetting the compression library's
 | |
| //    dictionary.
 | |
| inline bool Zlib_Compress(const CompressionInfo& info,
 | |
|                           uint32_t compress_format_version, const char* input,
 | |
|                           size_t length, ::std::string* output) {
 | |
| #ifdef ZLIB
 | |
|   if (length > std::numeric_limits<uint32_t>::max()) {
 | |
|     // Can't compress more than 4GB
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   size_t output_header_len = 0;
 | |
|   if (compress_format_version == 2) {
 | |
|     output_header_len = compression::PutDecompressedSizeInfo(
 | |
|         output, static_cast<uint32_t>(length));
 | |
|   }
 | |
|   // Resize output to be the plain data length.
 | |
|   // This may not be big enough if the compression actually expands data.
 | |
|   output->resize(output_header_len + length);
 | |
| 
 | |
|   // The memLevel parameter specifies how much memory should be allocated for
 | |
|   // the internal compression state.
 | |
|   // memLevel=1 uses minimum memory but is slow and reduces compression ratio.
 | |
|   // memLevel=9 uses maximum memory for optimal speed.
 | |
|   // The default value is 8. See zconf.h for more details.
 | |
|   static const int memLevel = 8;
 | |
|   int level;
 | |
|   if (info.options().level == CompressionOptions::kDefaultCompressionLevel) {
 | |
|     level = Z_DEFAULT_COMPRESSION;
 | |
|   } else {
 | |
|     level = info.options().level;
 | |
|   }
 | |
|   z_stream _stream;
 | |
|   memset(&_stream, 0, sizeof(z_stream));
 | |
|   int st = deflateInit2(&_stream, level, Z_DEFLATED, info.options().window_bits,
 | |
|                         memLevel, info.options().strategy);
 | |
|   if (st != Z_OK) {
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   Slice compression_dict = info.dict().GetRawDict();
 | |
|   if (compression_dict.size()) {
 | |
|     // Initialize the compression library's dictionary
 | |
|     st = deflateSetDictionary(
 | |
|         &_stream, reinterpret_cast<const Bytef*>(compression_dict.data()),
 | |
|         static_cast<unsigned int>(compression_dict.size()));
 | |
|     if (st != Z_OK) {
 | |
|       deflateEnd(&_stream);
 | |
|       return false;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   // Compress the input, and put compressed data in output.
 | |
|   _stream.next_in = (Bytef*)input;
 | |
|   _stream.avail_in = static_cast<unsigned int>(length);
 | |
| 
 | |
|   // Initialize the output size.
 | |
|   _stream.avail_out = static_cast<unsigned int>(length);
 | |
|   _stream.next_out = reinterpret_cast<Bytef*>(&(*output)[output_header_len]);
 | |
| 
 | |
|   bool compressed = false;
 | |
|   st = deflate(&_stream, Z_FINISH);
 | |
|   if (st == Z_STREAM_END) {
 | |
|     compressed = true;
 | |
|     output->resize(output->size() - _stream.avail_out);
 | |
|   }
 | |
|   // The only return value we really care about is Z_STREAM_END.
 | |
|   // Z_OK means insufficient output space. This means the compression is
 | |
|   // bigger than decompressed size. Just fail the compression in that case.
 | |
| 
 | |
|   deflateEnd(&_stream);
 | |
|   return compressed;
 | |
| #else
 | |
|   (void)info;
 | |
|   (void)compress_format_version;
 | |
|   (void)input;
 | |
|   (void)length;
 | |
|   (void)output;
 | |
|   return false;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| // compress_format_version == 1 -- decompressed size is not included in the
 | |
| // block header
 | |
| // compress_format_version == 2 -- decompressed size is included in the block
 | |
| // header in varint32 format
 | |
| // @param compression_dict Data for presetting the compression library's
 | |
| //    dictionary.
 | |
| inline CacheAllocationPtr Zlib_Uncompress(
 | |
|     const UncompressionInfo& info, const char* input_data, size_t input_length,
 | |
|     size_t* uncompressed_size, uint32_t compress_format_version,
 | |
|     MemoryAllocator* allocator = nullptr, int windowBits = -14) {
 | |
| #ifdef ZLIB
 | |
|   uint32_t output_len = 0;
 | |
|   if (compress_format_version == 2) {
 | |
|     if (!compression::GetDecompressedSizeInfo(&input_data, &input_length,
 | |
|                                               &output_len)) {
 | |
|       return nullptr;
 | |
|     }
 | |
|   } else {
 | |
|     // Assume the decompressed data size will 5x of compressed size, but round
 | |
|     // to the page size
 | |
|     size_t proposed_output_len = ((input_length * 5) & (~(4096 - 1))) + 4096;
 | |
|     output_len = static_cast<uint32_t>(
 | |
|         std::min(proposed_output_len,
 | |
|                  static_cast<size_t>(std::numeric_limits<uint32_t>::max())));
 | |
|   }
 | |
| 
 | |
|   z_stream _stream;
 | |
|   memset(&_stream, 0, sizeof(z_stream));
 | |
| 
 | |
|   // For raw inflate, the windowBits should be -8..-15.
 | |
|   // If windowBits is bigger than zero, it will use either zlib
 | |
|   // header or gzip header. Adding 32 to it will do automatic detection.
 | |
|   int st =
 | |
|       inflateInit2(&_stream, windowBits > 0 ? windowBits + 32 : windowBits);
 | |
|   if (st != Z_OK) {
 | |
|     return nullptr;
 | |
|   }
 | |
| 
 | |
|   const Slice& compression_dict = info.dict().GetRawDict();
 | |
|   if (compression_dict.size()) {
 | |
|     // Initialize the compression library's dictionary
 | |
|     st = inflateSetDictionary(
 | |
|         &_stream, reinterpret_cast<const Bytef*>(compression_dict.data()),
 | |
|         static_cast<unsigned int>(compression_dict.size()));
 | |
|     if (st != Z_OK) {
 | |
|       return nullptr;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   _stream.next_in = (Bytef*)input_data;
 | |
|   _stream.avail_in = static_cast<unsigned int>(input_length);
 | |
| 
 | |
|   auto output = AllocateBlock(output_len, allocator);
 | |
| 
 | |
|   _stream.next_out = (Bytef*)output.get();
 | |
|   _stream.avail_out = static_cast<unsigned int>(output_len);
 | |
| 
 | |
|   bool done = false;
 | |
|   while (!done) {
 | |
|     st = inflate(&_stream, Z_SYNC_FLUSH);
 | |
|     switch (st) {
 | |
|       case Z_STREAM_END:
 | |
|         done = true;
 | |
|         break;
 | |
|       case Z_OK: {
 | |
|         // No output space. Increase the output space by 20%.
 | |
|         // We should never run out of output space if
 | |
|         // compress_format_version == 2
 | |
|         assert(compress_format_version != 2);
 | |
|         size_t old_sz = output_len;
 | |
|         uint32_t output_len_delta = output_len / 5;
 | |
|         output_len += output_len_delta < 10 ? 10 : output_len_delta;
 | |
|         auto tmp = AllocateBlock(output_len, allocator);
 | |
|         memcpy(tmp.get(), output.get(), old_sz);
 | |
|         output = std::move(tmp);
 | |
| 
 | |
|         // Set more output.
 | |
|         _stream.next_out = (Bytef*)(output.get() + old_sz);
 | |
|         _stream.avail_out = static_cast<unsigned int>(output_len - old_sz);
 | |
|         break;
 | |
|       }
 | |
|       case Z_BUF_ERROR:
 | |
|       default:
 | |
|         inflateEnd(&_stream);
 | |
|         return nullptr;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   // If we encoded decompressed block size, we should have no bytes left
 | |
|   assert(compress_format_version != 2 || _stream.avail_out == 0);
 | |
|   assert(output_len >= _stream.avail_out);
 | |
|   *uncompressed_size = output_len - _stream.avail_out;
 | |
|   inflateEnd(&_stream);
 | |
|   return output;
 | |
| #else
 | |
|   (void)info;
 | |
|   (void)input_data;
 | |
|   (void)input_length;
 | |
|   (void)uncompressed_size;
 | |
|   (void)compress_format_version;
 | |
|   (void)allocator;
 | |
|   (void)windowBits;
 | |
|   return nullptr;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| // compress_format_version == 1 -- decompressed size is not included in the
 | |
| // block header
 | |
| // compress_format_version == 2 -- decompressed size is included in the block
 | |
| // header in varint32 format
 | |
| inline bool BZip2_Compress(const CompressionInfo& /*info*/,
 | |
|                            uint32_t compress_format_version, const char* input,
 | |
|                            size_t length, ::std::string* output) {
 | |
| #ifdef BZIP2
 | |
|   if (length > std::numeric_limits<uint32_t>::max()) {
 | |
|     // Can't compress more than 4GB
 | |
|     return false;
 | |
|   }
 | |
|   size_t output_header_len = 0;
 | |
|   if (compress_format_version == 2) {
 | |
|     output_header_len = compression::PutDecompressedSizeInfo(
 | |
|         output, static_cast<uint32_t>(length));
 | |
|   }
 | |
|   // Resize output to be the plain data length.
 | |
|   // This may not be big enough if the compression actually expands data.
 | |
|   output->resize(output_header_len + length);
 | |
| 
 | |
|   bz_stream _stream;
 | |
|   memset(&_stream, 0, sizeof(bz_stream));
 | |
| 
 | |
|   // Block size 1 is 100K.
 | |
|   // 0 is for silent.
 | |
|   // 30 is the default workFactor
 | |
|   int st = BZ2_bzCompressInit(&_stream, 1, 0, 30);
 | |
|   if (st != BZ_OK) {
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   // Compress the input, and put compressed data in output.
 | |
|   _stream.next_in = (char*)input;
 | |
|   _stream.avail_in = static_cast<unsigned int>(length);
 | |
| 
 | |
|   // Initialize the output size.
 | |
|   _stream.avail_out = static_cast<unsigned int>(length);
 | |
|   _stream.next_out = reinterpret_cast<char*>(&(*output)[output_header_len]);
 | |
| 
 | |
|   bool compressed = false;
 | |
|   st = BZ2_bzCompress(&_stream, BZ_FINISH);
 | |
|   if (st == BZ_STREAM_END) {
 | |
|     compressed = true;
 | |
|     output->resize(output->size() - _stream.avail_out);
 | |
|   }
 | |
|   // The only return value we really care about is BZ_STREAM_END.
 | |
|   // BZ_FINISH_OK means insufficient output space. This means the compression
 | |
|   // is bigger than decompressed size. Just fail the compression in that case.
 | |
| 
 | |
|   BZ2_bzCompressEnd(&_stream);
 | |
|   return compressed;
 | |
| #else
 | |
|   (void)compress_format_version;
 | |
|   (void)input;
 | |
|   (void)length;
 | |
|   (void)output;
 | |
|   return false;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| // compress_format_version == 1 -- decompressed size is not included in the
 | |
| // block header
 | |
| // compress_format_version == 2 -- decompressed size is included in the block
 | |
| // header in varint32 format
 | |
| inline CacheAllocationPtr BZip2_Uncompress(
 | |
|     const char* input_data, size_t input_length, size_t* uncompressed_size,
 | |
|     uint32_t compress_format_version, MemoryAllocator* allocator = nullptr) {
 | |
| #ifdef BZIP2
 | |
|   uint32_t output_len = 0;
 | |
|   if (compress_format_version == 2) {
 | |
|     if (!compression::GetDecompressedSizeInfo(&input_data, &input_length,
 | |
|                                               &output_len)) {
 | |
|       return nullptr;
 | |
|     }
 | |
|   } else {
 | |
|     // Assume the decompressed data size will 5x of compressed size, but round
 | |
|     // to the next page size
 | |
|     size_t proposed_output_len = ((input_length * 5) & (~(4096 - 1))) + 4096;
 | |
|     output_len = static_cast<uint32_t>(
 | |
|         std::min(proposed_output_len,
 | |
|                  static_cast<size_t>(std::numeric_limits<uint32_t>::max())));
 | |
|   }
 | |
| 
 | |
|   bz_stream _stream;
 | |
|   memset(&_stream, 0, sizeof(bz_stream));
 | |
| 
 | |
|   int st = BZ2_bzDecompressInit(&_stream, 0, 0);
 | |
|   if (st != BZ_OK) {
 | |
|     return nullptr;
 | |
|   }
 | |
| 
 | |
|   _stream.next_in = (char*)input_data;
 | |
|   _stream.avail_in = static_cast<unsigned int>(input_length);
 | |
| 
 | |
|   auto output = AllocateBlock(output_len, allocator);
 | |
| 
 | |
|   _stream.next_out = (char*)output.get();
 | |
|   _stream.avail_out = static_cast<unsigned int>(output_len);
 | |
| 
 | |
|   bool done = false;
 | |
|   while (!done) {
 | |
|     st = BZ2_bzDecompress(&_stream);
 | |
|     switch (st) {
 | |
|       case BZ_STREAM_END:
 | |
|         done = true;
 | |
|         break;
 | |
|       case BZ_OK: {
 | |
|         // No output space. Increase the output space by 20%.
 | |
|         // We should never run out of output space if
 | |
|         // compress_format_version == 2
 | |
|         assert(compress_format_version != 2);
 | |
|         uint32_t old_sz = output_len;
 | |
|         output_len = output_len * 1.2;
 | |
|         auto tmp = AllocateBlock(output_len, allocator);
 | |
|         memcpy(tmp.get(), output.get(), old_sz);
 | |
|         output = std::move(tmp);
 | |
| 
 | |
|         // Set more output.
 | |
|         _stream.next_out = (char*)(output.get() + old_sz);
 | |
|         _stream.avail_out = static_cast<unsigned int>(output_len - old_sz);
 | |
|         break;
 | |
|       }
 | |
|       default:
 | |
|         BZ2_bzDecompressEnd(&_stream);
 | |
|         return nullptr;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   // If we encoded decompressed block size, we should have no bytes left
 | |
|   assert(compress_format_version != 2 || _stream.avail_out == 0);
 | |
|   assert(output_len >= _stream.avail_out);
 | |
|   *uncompressed_size = output_len - _stream.avail_out;
 | |
|   BZ2_bzDecompressEnd(&_stream);
 | |
|   return output;
 | |
| #else
 | |
|   (void)input_data;
 | |
|   (void)input_length;
 | |
|   (void)uncompressed_size;
 | |
|   (void)compress_format_version;
 | |
|   (void)allocator;
 | |
|   return nullptr;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| // compress_format_version == 1 -- decompressed size is included in the
 | |
| // block header using memcpy, which makes database non-portable)
 | |
| // compress_format_version == 2 -- decompressed size is included in the block
 | |
| // header in varint32 format
 | |
| // @param compression_dict Data for presetting the compression library's
 | |
| //    dictionary.
 | |
| inline bool LZ4_Compress(const CompressionInfo& info,
 | |
|                          uint32_t compress_format_version, const char* input,
 | |
|                          size_t length, ::std::string* output) {
 | |
| #ifdef LZ4
 | |
|   if (length > std::numeric_limits<uint32_t>::max()) {
 | |
|     // Can't compress more than 4GB
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   size_t output_header_len = 0;
 | |
|   if (compress_format_version == 2) {
 | |
|     // new encoding, using varint32 to store size information
 | |
|     output_header_len = compression::PutDecompressedSizeInfo(
 | |
|         output, static_cast<uint32_t>(length));
 | |
|   } else {
 | |
|     // legacy encoding, which is not really portable (depends on big/little
 | |
|     // endianness)
 | |
|     output_header_len = 8;
 | |
|     output->resize(output_header_len);
 | |
|     char* p = const_cast<char*>(output->c_str());
 | |
|     memcpy(p, &length, sizeof(length));
 | |
|   }
 | |
|   int compress_bound = LZ4_compressBound(static_cast<int>(length));
 | |
|   output->resize(static_cast<size_t>(output_header_len + compress_bound));
 | |
| 
 | |
|   int outlen;
 | |
| #if LZ4_VERSION_NUMBER >= 10400  // r124+
 | |
|   LZ4_stream_t* stream = LZ4_createStream();
 | |
|   Slice compression_dict = info.dict().GetRawDict();
 | |
|   if (compression_dict.size()) {
 | |
|     LZ4_loadDict(stream, compression_dict.data(),
 | |
|                  static_cast<int>(compression_dict.size()));
 | |
|   }
 | |
| #if LZ4_VERSION_NUMBER >= 10700  // r129+
 | |
|   outlen =
 | |
|       LZ4_compress_fast_continue(stream, input, &(*output)[output_header_len],
 | |
|                                  static_cast<int>(length), compress_bound, 1);
 | |
| #else  // up to r128
 | |
|   outlen = LZ4_compress_limitedOutput_continue(
 | |
|       stream, input, &(*output)[output_header_len], static_cast<int>(length),
 | |
|       compress_bound);
 | |
| #endif
 | |
|   LZ4_freeStream(stream);
 | |
| #else   // up to r123
 | |
|   outlen = LZ4_compress_limitedOutput(input, &(*output)[output_header_len],
 | |
|                                       static_cast<int>(length), compress_bound);
 | |
| #endif  // LZ4_VERSION_NUMBER >= 10400
 | |
| 
 | |
|   if (outlen == 0) {
 | |
|     return false;
 | |
|   }
 | |
|   output->resize(static_cast<size_t>(output_header_len + outlen));
 | |
|   return true;
 | |
| #else  // LZ4
 | |
|   (void)info;
 | |
|   (void)compress_format_version;
 | |
|   (void)input;
 | |
|   (void)length;
 | |
|   (void)output;
 | |
|   return false;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| // compress_format_version == 1 -- decompressed size is included in the
 | |
| // block header using memcpy, which makes database non-portable)
 | |
| // compress_format_version == 2 -- decompressed size is included in the block
 | |
| // header in varint32 format
 | |
| // @param compression_dict Data for presetting the compression library's
 | |
| //    dictionary.
 | |
| inline CacheAllocationPtr LZ4_Uncompress(const UncompressionInfo& info,
 | |
|                                          const char* input_data,
 | |
|                                          size_t input_length,
 | |
|                                          size_t* uncompressed_size,
 | |
|                                          uint32_t compress_format_version,
 | |
|                                          MemoryAllocator* allocator = nullptr) {
 | |
| #ifdef LZ4
 | |
|   uint32_t output_len = 0;
 | |
|   if (compress_format_version == 2) {
 | |
|     // new encoding, using varint32 to store size information
 | |
|     if (!compression::GetDecompressedSizeInfo(&input_data, &input_length,
 | |
|                                               &output_len)) {
 | |
|       return nullptr;
 | |
|     }
 | |
|   } else {
 | |
|     // legacy encoding, which is not really portable (depends on big/little
 | |
|     // endianness)
 | |
|     if (input_length < 8) {
 | |
|       return nullptr;
 | |
|     }
 | |
|     memcpy(&output_len, input_data, sizeof(output_len));
 | |
|     input_length -= 8;
 | |
|     input_data += 8;
 | |
|   }
 | |
| 
 | |
|   auto output = AllocateBlock(output_len, allocator);
 | |
| 
 | |
|   int decompress_bytes = 0;
 | |
| 
 | |
| #if LZ4_VERSION_NUMBER >= 10400  // r124+
 | |
|   LZ4_streamDecode_t* stream = LZ4_createStreamDecode();
 | |
|   const Slice& compression_dict = info.dict().GetRawDict();
 | |
|   if (compression_dict.size()) {
 | |
|     LZ4_setStreamDecode(stream, compression_dict.data(),
 | |
|                         static_cast<int>(compression_dict.size()));
 | |
|   }
 | |
|   decompress_bytes = LZ4_decompress_safe_continue(
 | |
|       stream, input_data, output.get(), static_cast<int>(input_length),
 | |
|       static_cast<int>(output_len));
 | |
|   LZ4_freeStreamDecode(stream);
 | |
| #else   // up to r123
 | |
|   decompress_bytes = LZ4_decompress_safe(input_data, output.get(),
 | |
|                                          static_cast<int>(input_length),
 | |
|                                          static_cast<int>(output_len));
 | |
| #endif  // LZ4_VERSION_NUMBER >= 10400
 | |
| 
 | |
|   if (decompress_bytes < 0) {
 | |
|     return nullptr;
 | |
|   }
 | |
|   assert(decompress_bytes == static_cast<int>(output_len));
 | |
|   *uncompressed_size = decompress_bytes;
 | |
|   return output;
 | |
| #else  // LZ4
 | |
|   (void)info;
 | |
|   (void)input_data;
 | |
|   (void)input_length;
 | |
|   (void)uncompressed_size;
 | |
|   (void)compress_format_version;
 | |
|   (void)allocator;
 | |
|   return nullptr;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| // compress_format_version == 1 -- decompressed size is included in the
 | |
| // block header using memcpy, which makes database non-portable)
 | |
| // compress_format_version == 2 -- decompressed size is included in the block
 | |
| // header in varint32 format
 | |
| // @param compression_dict Data for presetting the compression library's
 | |
| //    dictionary.
 | |
| inline bool LZ4HC_Compress(const CompressionInfo& info,
 | |
|                            uint32_t compress_format_version, const char* input,
 | |
|                            size_t length, ::std::string* output) {
 | |
| #ifdef LZ4
 | |
|   if (length > std::numeric_limits<uint32_t>::max()) {
 | |
|     // Can't compress more than 4GB
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   size_t output_header_len = 0;
 | |
|   if (compress_format_version == 2) {
 | |
|     // new encoding, using varint32 to store size information
 | |
|     output_header_len = compression::PutDecompressedSizeInfo(
 | |
|         output, static_cast<uint32_t>(length));
 | |
|   } else {
 | |
|     // legacy encoding, which is not really portable (depends on big/little
 | |
|     // endianness)
 | |
|     output_header_len = 8;
 | |
|     output->resize(output_header_len);
 | |
|     char* p = const_cast<char*>(output->c_str());
 | |
|     memcpy(p, &length, sizeof(length));
 | |
|   }
 | |
|   int compress_bound = LZ4_compressBound(static_cast<int>(length));
 | |
|   output->resize(static_cast<size_t>(output_header_len + compress_bound));
 | |
| 
 | |
|   int outlen;
 | |
|   int level;
 | |
|   if (info.options().level == CompressionOptions::kDefaultCompressionLevel) {
 | |
|     level = 0;  // lz4hc.h says any value < 1 will be sanitized to default
 | |
|   } else {
 | |
|     level = info.options().level;
 | |
|   }
 | |
| #if LZ4_VERSION_NUMBER >= 10400  // r124+
 | |
|   LZ4_streamHC_t* stream = LZ4_createStreamHC();
 | |
|   LZ4_resetStreamHC(stream, level);
 | |
|   Slice compression_dict = info.dict().GetRawDict();
 | |
|   const char* compression_dict_data =
 | |
|       compression_dict.size() > 0 ? compression_dict.data() : nullptr;
 | |
|   size_t compression_dict_size = compression_dict.size();
 | |
|   LZ4_loadDictHC(stream, compression_dict_data,
 | |
|                  static_cast<int>(compression_dict_size));
 | |
| 
 | |
| #if LZ4_VERSION_NUMBER >= 10700  // r129+
 | |
|   outlen =
 | |
|       LZ4_compress_HC_continue(stream, input, &(*output)[output_header_len],
 | |
|                                static_cast<int>(length), compress_bound);
 | |
| #else   // r124-r128
 | |
|   outlen = LZ4_compressHC_limitedOutput_continue(
 | |
|       stream, input, &(*output)[output_header_len], static_cast<int>(length),
 | |
|       compress_bound);
 | |
| #endif  // LZ4_VERSION_NUMBER >= 10700
 | |
|   LZ4_freeStreamHC(stream);
 | |
| 
 | |
| #elif LZ4_VERSION_MAJOR  // r113-r123
 | |
|   outlen = LZ4_compressHC2_limitedOutput(input, &(*output)[output_header_len],
 | |
|                                          static_cast<int>(length),
 | |
|                                          compress_bound, level);
 | |
| #else                    // up to r112
 | |
|   outlen =
 | |
|       LZ4_compressHC_limitedOutput(input, &(*output)[output_header_len],
 | |
|                                    static_cast<int>(length), compress_bound);
 | |
| #endif                   // LZ4_VERSION_NUMBER >= 10400
 | |
| 
 | |
|   if (outlen == 0) {
 | |
|     return false;
 | |
|   }
 | |
|   output->resize(static_cast<size_t>(output_header_len + outlen));
 | |
|   return true;
 | |
| #else  // LZ4
 | |
|   (void)info;
 | |
|   (void)compress_format_version;
 | |
|   (void)input;
 | |
|   (void)length;
 | |
|   (void)output;
 | |
|   return false;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| #ifdef XPRESS
 | |
| inline bool XPRESS_Compress(const char* input, size_t length,
 | |
|                             std::string* output) {
 | |
|   return port::xpress::Compress(input, length, output);
 | |
| }
 | |
| #else
 | |
| inline bool XPRESS_Compress(const char* /*input*/, size_t /*length*/,
 | |
|                             std::string* /*output*/) {
 | |
|   return false;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| #ifdef XPRESS
 | |
| inline char* XPRESS_Uncompress(const char* input_data, size_t input_length,
 | |
|                                size_t* uncompressed_size) {
 | |
|   return port::xpress::Decompress(input_data, input_length, uncompressed_size);
 | |
| }
 | |
| #else
 | |
| inline char* XPRESS_Uncompress(const char* /*input_data*/,
 | |
|                                size_t /*input_length*/,
 | |
|                                size_t* /*uncompressed_size*/) {
 | |
|   return nullptr;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| inline bool ZSTD_Compress(const CompressionInfo& info, const char* input,
 | |
|                           size_t length, ::std::string* output) {
 | |
| #ifdef ZSTD
 | |
|   if (length > std::numeric_limits<uint32_t>::max()) {
 | |
|     // Can't compress more than 4GB
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   size_t output_header_len = compression::PutDecompressedSizeInfo(
 | |
|       output, static_cast<uint32_t>(length));
 | |
| 
 | |
|   size_t compressBound = ZSTD_compressBound(length);
 | |
|   output->resize(static_cast<size_t>(output_header_len + compressBound));
 | |
|   size_t outlen = 0;
 | |
|   int level;
 | |
|   if (info.options().level == CompressionOptions::kDefaultCompressionLevel) {
 | |
|     // 3 is the value of ZSTD_CLEVEL_DEFAULT (not exposed publicly), see
 | |
|     // https://github.com/facebook/zstd/issues/1148
 | |
|     level = 3;
 | |
|   } else {
 | |
|     level = info.options().level;
 | |
|   }
 | |
| #if ZSTD_VERSION_NUMBER >= 500  // v0.5.0+
 | |
|   ZSTD_CCtx* context = info.context().ZSTDPreallocCtx();
 | |
|   assert(context != nullptr);
 | |
| #if ZSTD_VERSION_NUMBER >= 700  // v0.7.0+
 | |
|   if (info.dict().GetDigestedZstdCDict() != nullptr) {
 | |
|     outlen = ZSTD_compress_usingCDict(context, &(*output)[output_header_len],
 | |
|                                       compressBound, input, length,
 | |
|                                       info.dict().GetDigestedZstdCDict());
 | |
|   }
 | |
| #endif  // ZSTD_VERSION_NUMBER >= 700
 | |
|   if (outlen == 0) {
 | |
|     outlen = ZSTD_compress_usingDict(context, &(*output)[output_header_len],
 | |
|                                      compressBound, input, length,
 | |
|                                      info.dict().GetRawDict().data(),
 | |
|                                      info.dict().GetRawDict().size(), level);
 | |
|   }
 | |
| #else   // up to v0.4.x
 | |
|   outlen = ZSTD_compress(&(*output)[output_header_len], compressBound, input,
 | |
|                          length, level);
 | |
| #endif  // ZSTD_VERSION_NUMBER >= 500
 | |
|   if (outlen == 0) {
 | |
|     return false;
 | |
|   }
 | |
|   output->resize(output_header_len + outlen);
 | |
|   return true;
 | |
| #else  // ZSTD
 | |
|   (void)info;
 | |
|   (void)input;
 | |
|   (void)length;
 | |
|   (void)output;
 | |
|   return false;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| // @param compression_dict Data for presetting the compression library's
 | |
| //    dictionary.
 | |
| inline CacheAllocationPtr ZSTD_Uncompress(
 | |
|     const UncompressionInfo& info, const char* input_data, size_t input_length,
 | |
|     size_t* uncompressed_size, MemoryAllocator* allocator = nullptr) {
 | |
| #ifdef ZSTD
 | |
|   uint32_t output_len = 0;
 | |
|   if (!compression::GetDecompressedSizeInfo(&input_data, &input_length,
 | |
|                                             &output_len)) {
 | |
|     return nullptr;
 | |
|   }
 | |
| 
 | |
|   auto output = AllocateBlock(output_len, allocator);
 | |
|   size_t actual_output_length = 0;
 | |
| #if ZSTD_VERSION_NUMBER >= 500  // v0.5.0+
 | |
|   ZSTD_DCtx* context = info.context().GetZSTDContext();
 | |
|   assert(context != nullptr);
 | |
| #ifdef ROCKSDB_ZSTD_DDICT
 | |
|   if (info.dict().GetDigestedZstdDDict() != nullptr) {
 | |
|     actual_output_length = ZSTD_decompress_usingDDict(
 | |
|         context, output.get(), output_len, input_data, input_length,
 | |
|         info.dict().GetDigestedZstdDDict());
 | |
|   }
 | |
| #endif  // ROCKSDB_ZSTD_DDICT
 | |
|   if (actual_output_length == 0) {
 | |
|     actual_output_length = ZSTD_decompress_usingDict(
 | |
|         context, output.get(), output_len, input_data, input_length,
 | |
|         info.dict().GetRawDict().data(), info.dict().GetRawDict().size());
 | |
|   }
 | |
| #else   // up to v0.4.x
 | |
|   (void)info;
 | |
|   actual_output_length =
 | |
|       ZSTD_decompress(output.get(), output_len, input_data, input_length);
 | |
| #endif  // ZSTD_VERSION_NUMBER >= 500
 | |
|   assert(actual_output_length == output_len);
 | |
|   *uncompressed_size = actual_output_length;
 | |
|   return output;
 | |
| #else  // ZSTD
 | |
|   (void)info;
 | |
|   (void)input_data;
 | |
|   (void)input_length;
 | |
|   (void)uncompressed_size;
 | |
|   (void)allocator;
 | |
|   return nullptr;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| inline bool ZSTD_TrainDictionarySupported() {
 | |
| #ifdef ZSTD
 | |
|   // Dictionary trainer is available since v0.6.1 for static linking, but not
 | |
|   // available for dynamic linking until v1.1.3. For now we enable the feature
 | |
|   // in v1.1.3+ only.
 | |
|   return (ZSTD_versionNumber() >= 10103);
 | |
| #else
 | |
|   return false;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| inline std::string ZSTD_TrainDictionary(const std::string& samples,
 | |
|                                         const std::vector<size_t>& sample_lens,
 | |
|                                         size_t max_dict_bytes) {
 | |
|   // Dictionary trainer is available since v0.6.1 for static linking, but not
 | |
|   // available for dynamic linking until v1.1.3. For now we enable the feature
 | |
|   // in v1.1.3+ only.
 | |
| #if ZSTD_VERSION_NUMBER >= 10103  // v1.1.3+
 | |
|   assert(samples.empty() == sample_lens.empty());
 | |
|   if (samples.empty()) {
 | |
|     return "";
 | |
|   }
 | |
|   std::string dict_data(max_dict_bytes, '\0');
 | |
|   size_t dict_len = ZDICT_trainFromBuffer(
 | |
|       &dict_data[0], max_dict_bytes, &samples[0], &sample_lens[0],
 | |
|       static_cast<unsigned>(sample_lens.size()));
 | |
|   if (ZDICT_isError(dict_len)) {
 | |
|     return "";
 | |
|   }
 | |
|   assert(dict_len <= max_dict_bytes);
 | |
|   dict_data.resize(dict_len);
 | |
|   return dict_data;
 | |
| #else   // up to v1.1.2
 | |
|   assert(false);
 | |
|   (void)samples;
 | |
|   (void)sample_lens;
 | |
|   (void)max_dict_bytes;
 | |
|   return "";
 | |
| #endif  // ZSTD_VERSION_NUMBER >= 10103
 | |
| }
 | |
| 
 | |
| inline std::string ZSTD_TrainDictionary(const std::string& samples,
 | |
|                                         size_t sample_len_shift,
 | |
|                                         size_t max_dict_bytes) {
 | |
|   // Dictionary trainer is available since v0.6.1, but ZSTD was marked stable
 | |
|   // only since v0.8.0. For now we enable the feature in stable versions only.
 | |
| #if ZSTD_VERSION_NUMBER >= 10103  // v1.1.3+
 | |
|   // skips potential partial sample at the end of "samples"
 | |
|   size_t num_samples = samples.size() >> sample_len_shift;
 | |
|   std::vector<size_t> sample_lens(num_samples, size_t(1) << sample_len_shift);
 | |
|   return ZSTD_TrainDictionary(samples, sample_lens, max_dict_bytes);
 | |
| #else   // up to v1.1.2
 | |
|   assert(false);
 | |
|   (void)samples;
 | |
|   (void)sample_len_shift;
 | |
|   (void)max_dict_bytes;
 | |
|   return "";
 | |
| #endif  // ZSTD_VERSION_NUMBER >= 10103
 | |
| }
 | |
| 
 | |
| inline bool CompressData(const Slice& raw,
 | |
|                          const CompressionInfo& compression_info,
 | |
|                          uint32_t compress_format_version,
 | |
|                          std::string* compressed_output) {
 | |
|   bool ret = false;
 | |
| 
 | |
|   // Will return compressed block contents if (1) the compression method is
 | |
|   // supported in this platform and (2) the compression rate is "good enough".
 | |
|   switch (compression_info.type()) {
 | |
|     case kSnappyCompression:
 | |
|       ret = Snappy_Compress(compression_info, raw.data(), raw.size(),
 | |
|                             compressed_output);
 | |
|       break;
 | |
|     case kZlibCompression:
 | |
|       ret = Zlib_Compress(compression_info, compress_format_version, raw.data(),
 | |
|                           raw.size(), compressed_output);
 | |
|       break;
 | |
|     case kBZip2Compression:
 | |
|       ret = BZip2_Compress(compression_info, compress_format_version,
 | |
|                            raw.data(), raw.size(), compressed_output);
 | |
|       break;
 | |
|     case kLZ4Compression:
 | |
|       ret = LZ4_Compress(compression_info, compress_format_version, raw.data(),
 | |
|                          raw.size(), compressed_output);
 | |
|       break;
 | |
|     case kLZ4HCCompression:
 | |
|       ret = LZ4HC_Compress(compression_info, compress_format_version,
 | |
|                            raw.data(), raw.size(), compressed_output);
 | |
|       break;
 | |
|     case kXpressCompression:
 | |
|       ret = XPRESS_Compress(raw.data(), raw.size(), compressed_output);
 | |
|       break;
 | |
|     case kZSTD:
 | |
|     case kZSTDNotFinalCompression:
 | |
|       ret = ZSTD_Compress(compression_info, raw.data(), raw.size(),
 | |
|                           compressed_output);
 | |
|       break;
 | |
|     default:
 | |
|       // Do not recognize this compression type
 | |
|       break;
 | |
|   }
 | |
| 
 | |
|   TEST_SYNC_POINT_CALLBACK("CompressData:TamperWithReturnValue",
 | |
|                            static_cast<void*>(&ret));
 | |
| 
 | |
|   return ret;
 | |
| }
 | |
| 
 | |
| inline CacheAllocationPtr UncompressData(
 | |
|     const UncompressionInfo& uncompression_info, const char* data, size_t n,
 | |
|     size_t* uncompressed_size, uint32_t compress_format_version,
 | |
|     MemoryAllocator* allocator = nullptr) {
 | |
|   switch (uncompression_info.type()) {
 | |
|     case kSnappyCompression:
 | |
|       return Snappy_Uncompress(data, n, uncompressed_size, allocator);
 | |
|     case kZlibCompression:
 | |
|       return Zlib_Uncompress(uncompression_info, data, n, uncompressed_size,
 | |
|                              compress_format_version, allocator);
 | |
|     case kBZip2Compression:
 | |
|       return BZip2_Uncompress(data, n, uncompressed_size,
 | |
|                               compress_format_version, allocator);
 | |
|     case kLZ4Compression:
 | |
|     case kLZ4HCCompression:
 | |
|       return LZ4_Uncompress(uncompression_info, data, n, uncompressed_size,
 | |
|                             compress_format_version, allocator);
 | |
|     case kXpressCompression:
 | |
|       // XPRESS allocates memory internally, thus no support for custom
 | |
|       // allocator.
 | |
|       return CacheAllocationPtr(XPRESS_Uncompress(data, n, uncompressed_size));
 | |
|     case kZSTD:
 | |
|     case kZSTDNotFinalCompression:
 | |
|       return ZSTD_Uncompress(uncompression_info, data, n, uncompressed_size,
 | |
|                              allocator);
 | |
|     default:
 | |
|       return CacheAllocationPtr();
 | |
|   }
 | |
| }
 | |
| 
 | |
| }  // namespace ROCKSDB_NAMESPACE
 | |
| 
 |