diff --git a/HISTORY.md b/HISTORY.md index e91350297..21b8e76d8 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -7,7 +7,7 @@ * Improve subcompaction range partition so that it is likely to be more even. More evenly distribution of subcompaction will improve compaction throughput for some workloads. All input files' index blocks to sample some anchor key points from which we pick positions to partition the input range. This would introduce some CPU overhead in compaction preparation phase, if subcompaction is enabled, but it should be a small fraction of the CPU usage of the whole compaction process. This also brings a behavier change: subcompaction number is much more likely to maxed out than before. * Add CompactionPri::kRoundRobin, a compaction picking mode that cycles through all the files with a compact cursor in a round-robin manner. This feature is available since 7.5. * Provide support for subcompactions for user_defined_timestamp. -* Added an option `memtable_protection_bytes_per_key` that turns on memtable per key-value checksum protection. Each memtable entry will be suffixed by a checksum that is computed during writes, and verified in reads/compaction. Detected corruption will be logged and with corruption status returned to user. +* Added an option `memtable_protection_bytes_per_key` that turns on memtable per key-value checksum protection. Each memtable entry will be suffixed by a checksum that is computed during writes, and verified in reads/compaction. Detected corruption will be logged and with corruption status returned to user. * Added a blob-specific cache priority level - bottom level. Blobs are typically lower-value targets for caching than data blocks, since 1) with BlobDB, data blocks containing blob references conceptually form an index structure which has to be consulted before we can read the blob value, and 2) cached blobs represent only a single key-value, while cached data blocks generally contain multiple KVs. The user can specify the new option `low_pri_pool_ratio` in `LRUCacheOptions` to configure the ratio of capacity reserved for low priority cache entries (and therefore the remaining ratio is the space reserved for the bottom level), or configuring the new argument `low_pri_pool_ratio` in `NewLRUCache()` to achieve the same effect. ### Public API changes @@ -27,6 +27,7 @@ * Fixed a bug where blobs read by iterators would be inserted into the cache even with the `fill_cache` read option set to false. * Fixed the segfault caused by `AllocateData()` in `CompressedSecondaryCache::SplitValueIntoChunks()` and `MergeChunksIntoValueTest`. * Fixed a bug in BlobDB where a mix of inlined and blob values could result in an incorrect value being passed to the compaction filter (see #10391). +* Fixed a memory leak bug in stress tests caused by `FaultInjectionSecondaryCache`. ### Behavior Change * Added checksum handshake during the copying of decompressed WAL fragment. This together with #9875, #10037, #10212, #10114 and #10319 provides end-to-end integrity protection for write batch during recovery. @@ -36,6 +37,7 @@ * Improve universal tiered storage compaction picker to avoid extra major compaction triggered by size amplification. If `preclude_last_level_data_seconds` is enabled, the size amplification is calculated within non last_level data only which skip the last level and use the penultimate level as the size base. * If an error is hit when writing to a file (append, sync, etc), RocksDB is more strict with not issuing more operations to it, except closing the file, with exceptions of some WAL file operations in error recovery path. * A `WriteBufferManager` constructed with `allow_stall == false` will no longer trigger write stall implicitly by thrashing until memtable count limit is reached. Instead, a column family can continue accumulating writes while that CF is flushing, which means memory may increase. Users who prefer stalling writes must now explicitly set `allow_stall == true`. +* Add `CompressedSecondaryCache` into the stress tests. ### Performance Improvements * Instead of constructing `FragmentedRangeTombstoneList` during every read operation, it is now constructed once and stored in immutable memtables. This improves speed of querying range tombstones from immutable memtables. diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 53a2c5c28..e00ad2fed 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -179,7 +179,8 @@ default_params = { "async_io": lambda: random.choice([0, 1]), "wal_compression": lambda: random.choice(["none", "zstd"]), "verify_sst_unique_id_in_manifest": 1, # always do unique_id verification - "secondary_cache_uri": "", + "secondary_cache_uri": lambda: random.choice( + ["", "compressed_secondary_cache://capacity=8388608"]), "allow_data_in_errors": True, } diff --git a/utilities/fault_injection_secondary_cache.cc b/utilities/fault_injection_secondary_cache.cc index 143862d98..502fd773b 100644 --- a/utilities/fault_injection_secondary_cache.cc +++ b/utilities/fault_injection_secondary_cache.cc @@ -88,14 +88,22 @@ std::unique_ptr FaultInjectionSecondaryCache::Lookup(const Slice& key, const Cache::CreateCallback& create_cb, bool wait, bool& is_in_sec_cache) { - std::unique_ptr hdl = - base_->Lookup(key, create_cb, wait, is_in_sec_cache); ErrorContext* ctx = GetErrorContext(); - if (wait && ctx->rand.OneIn(prob_)) { - hdl.reset(); + if (base_is_compressed_sec_cache_) { + if (ctx->rand.OneIn(prob_)) { + return nullptr; + } else { + return base_->Lookup(key, create_cb, wait, is_in_sec_cache); + } + } else { + std::unique_ptr hdl = + base_->Lookup(key, create_cb, wait, is_in_sec_cache); + if (wait && ctx->rand.OneIn(prob_)) { + hdl.reset(); + } + return std::unique_ptr( + new FaultInjectionSecondaryCache::ResultHandle(this, std::move(hdl))); } - return std::unique_ptr( - new FaultInjectionSecondaryCache::ResultHandle(this, std::move(hdl))); } void FaultInjectionSecondaryCache::Erase(const Slice& key) { @@ -104,7 +112,19 @@ void FaultInjectionSecondaryCache::Erase(const Slice& key) { void FaultInjectionSecondaryCache::WaitAll( std::vector handles) { - FaultInjectionSecondaryCache::ResultHandle::WaitAll(this, handles); + if (base_is_compressed_sec_cache_) { + ErrorContext* ctx = GetErrorContext(); + std::vector base_handles; + for (SecondaryCacheResultHandle* hdl : handles) { + if (ctx->rand.OneIn(prob_)) { + continue; + } + base_handles.push_back(hdl); + } + base_->WaitAll(base_handles); + } else { + FaultInjectionSecondaryCache::ResultHandle::WaitAll(this, handles); + } } } // namespace ROCKSDB_NAMESPACE diff --git a/utilities/fault_injection_secondary_cache.h b/utilities/fault_injection_secondary_cache.h index f9fb0b15d..acd960747 100644 --- a/utilities/fault_injection_secondary_cache.h +++ b/utilities/fault_injection_secondary_cache.h @@ -22,6 +22,9 @@ class FaultInjectionSecondaryCache : public SecondaryCache { seed_(seed), prob_(prob), thread_local_error_(new ThreadLocalPtr(DeleteThreadLocalErrorContext)) { + if (std::strcmp(base_->Name(), "CompressedSecondaryCache") == 0) { + base_is_compressed_sec_cache_ = true; + } } virtual ~FaultInjectionSecondaryCache() override {} @@ -35,13 +38,13 @@ class FaultInjectionSecondaryCache : public SecondaryCache { const Slice& key, const Cache::CreateCallback& create_cb, bool wait, bool& is_in_sec_cache) override; - void Erase(const Slice& /*key*/) override; + void Erase(const Slice& key) override; void WaitAll(std::vector handles) override; - std::string GetPrintableOptions() const override { return ""; } - - void EnableErrorInjection(uint64_t prob); + std::string GetPrintableOptions() const override { + return base_->GetPrintableOptions(); + } private: class ResultHandle : public SecondaryCacheResultHandle { @@ -80,6 +83,7 @@ class FaultInjectionSecondaryCache : public SecondaryCache { const std::shared_ptr base_; uint32_t seed_; int prob_; + bool base_is_compressed_sec_cache_{false}; struct ErrorContext { Random rand;