From 13cb7a84b62838ec7761671aa46ee55b4b1ce1c2 Mon Sep 17 00:00:00 2001 From: Bo Wang Date: Thu, 18 Aug 2022 21:53:27 -0700 Subject: [PATCH] Fix the memory leak in db_stress tests that are caused by `FaultInjectionSecondaryCache` and add `CompressedSecondaryCache` into stress tests. (#10523) Summary: 1. Fix the memory leak in db_stress tests that are caused by `FaultInjectionSecondaryCache`. To address the test requirements for both CompressedSecondaryCache and CachlibWrapper, a new class variable `base_is_compressed_sec_cache_` is added to determine the different behaviors in `Lookup()` and `WaitAll()`. 2. Add `CompressedSecondaryCache` into stress tests. Before this PR, memory leak is reported during crash tests if `CompressedSecondaryCache` is in stress tests. One example is shown as follows: ``` ==70722==ERROR: LeakSanitizer: detected memory leaks Direct leak of 6648240 byte(s) in 83103 object(s) allocated from: #0 0x13de9d7 in operator new(unsigned long) (/data/sandcastle/boxes/eden-trunk-hg-fbcode-fbsource/fbcode/buck-out/dbgo/gen/aab7ed39/internal_repo_rocksdb/repo/db_stress+0x13de9d7) https://github.com/facebook/rocksdb/issues/1 0x9084c7 in rocksdb::BlocklikeTraits::Create(rocksdb::BlockContents&&, unsigned long, rocksdb::Statistics*, bool, rocksdb::FilterPolicy const*) internal_repo_rocksdb/repo/table/block_based/block_like_traits.h:128 https://github.com/facebook/rocksdb/issues/2 0x9084c7 in std::function rocksdb::GetCreateCallback(unsigned long, rocksdb::Statistics*, bool, rocksdb::FilterPolicy const*)::'lambda'(void const*, unsigned long, void**, unsigned long*)::operator()(void const*, unsigned long, void**, unsigned long*) const internal_repo_rocksdb/repo/table/block_based/block_like_traits.h:34 https://github.com/facebook/rocksdb/issues/3 0x9082c9 in rocksdb::Block std::__invoke_impl rocksdb::GetCreateCallback(unsigned long, rocksdb::Statistics*, bool, rocksdb::FilterPolicy const*)::'lambda'(void const*, unsigned long, void**, unsigned long*)&, void const*, unsigned long, void**, unsigned long*>(std::__invoke_other, std::function rocksdb::GetCreateCallback(unsigned long, rocksdb::Statistics*, bool, rocksdb::FilterPolicy const*)::'lambda'(void const*, unsigned long, void**, unsigned long*)&, void const*&&, unsigned long&&, void**&&, unsigned long*&&) third-party-buck/platform010/build/libgcc/include/c++/trunk/bits/invoke.h:61 https://github.com/facebook/rocksdb/issues/4 0x90825d in std::enable_if rocksdb::GetCreateCallback(unsigned long, rocksdb::Statistics*, bool, rocksdb::FilterPolicy const*)::'lambda'(void const*, unsigned long, void**, unsigned long*)&, void const*, unsigned long, void**, unsigned long*>, rocksdb::Block>::type std::__invoke_r rocksdb::GetCreateCallback(unsigned long, rocksdb::Statistics*, bool, rocksdb::FilterPolicy const*)::'lambda'(void const*, unsigned long, void**, unsigned long*)&, void const*, unsigned long, void**, unsigned long*>(std::function rocksdb::GetCreateCallback(unsigned long, rocksdb::Statistics*, bool, rocksdb::FilterPolicy const*)::'lambda'(void const*, unsigned long, void**, unsigned long*)&, void const*&&, unsigned long&&, void**&&, unsigned long*&&) third-party-buck/platform010/build/libgcc/include/c++/trunk/bits/invoke.h:114 https://github.com/facebook/rocksdb/issues/5 0x9081b0 in std::_Function_handler rocksdb::GetCreateCallback(unsigned long, rocksdb::Statistics*, bool, rocksdb::FilterPolicy const*)::'lambda'(void const*, unsigned long, void**, unsigned long*)>::_M_invoke(std::_Any_data const&, void const*&&, unsigned long&&, void**&&, unsigned long*&&) third-party-buck/platform010/build/libgcc/include/c++/trunk/bits/std_function.h:291 https://github.com/facebook/rocksdb/issues/6 0x991f2c in std::function::operator()(void const*, unsigned long, void**, unsigned long*) const third-party-buck/platform010/build/libgcc/include/c++/trunk/bits/std_function.h:560 https://github.com/facebook/rocksdb/issues/7 0x990277 in rocksdb::CompressedSecondaryCache::Lookup(rocksdb::Slice const&, std::function const&, bool, bool&) internal_repo_rocksdb/repo/cache/compressed_secondary_cache.cc:77 https://github.com/facebook/rocksdb/issues/8 0xd3aa4d in rocksdb::FaultInjectionSecondaryCache::Lookup(rocksdb::Slice const&, std::function const&, bool, bool&) internal_repo_rocksdb/repo/utilities/fault_injection_secondary_cache.cc:92 https://github.com/facebook/rocksdb/issues/9 0xeadaab in rocksdb::lru_cache::LRUCacheShard::Lookup(rocksdb::Slice const&, unsigned int, rocksdb::Cache::CacheItemHelper const*, std::function const&, rocksdb::Cache::Priority, bool, rocksdb::Statistics*) internal_repo_rocksdb/repo/cache/lru_cache.cc:445 https://github.com/facebook/rocksdb/issues/10 0x1064573 in rocksdb::ShardedCache::Lookup(rocksdb::Slice const&, rocksdb::Cache::CacheItemHelper const*, std::function const&, rocksdb::Cache::Priority, bool, rocksdb::Statistics*) internal_repo_rocksdb/repo/cache/sharded_cache.cc:89 https://github.com/facebook/rocksdb/issues/11 0x8be0df in rocksdb::BlockBasedTable::GetEntryFromCache(rocksdb::CacheTier const&, rocksdb::Cache*, rocksdb::Slice const&, rocksdb::BlockType, bool, rocksdb::GetContext*, rocksdb::Cache::CacheItemHelper const*, std::function const&, rocksdb::Cache::Priority) const internal_repo_rocksdb/repo/table/block_based/block_based_table_reader.cc:389 https://github.com/facebook/rocksdb/issues/12 0x905790 in rocksdb::Status rocksdb::BlockBasedTable::GetDataBlockFromCache(rocksdb::Slice const&, rocksdb::Cache*, rocksdb::Cache*, rocksdb::ReadOptions const&, rocksdb::CachableEntry*, rocksdb::UncompressionDict const&, rocksdb::BlockType, bool, rocksdb::GetContext*) const internal_repo_rocksdb/repo/table/block_based/block_based_table_reader.cc:1263 https://github.com/facebook/rocksdb/issues/13 0x8b9259 in rocksdb::Status rocksdb::BlockBasedTable::MaybeReadBlockAndLoadToCache(rocksdb::FilePrefetchBuffer*, rocksdb::ReadOptions const&, rocksdb::BlockHandle const&, rocksdb::UncompressionDict const&, bool, bool, rocksdb::CachableEntry*, rocksdb::BlockType, rocksdb::GetContext*, rocksdb::BlockCacheLookupContext*, rocksdb::BlockContents*, bool) const internal_repo_rocksdb/repo/table/block_based/block_based_table_reader.cc:1559 https://github.com/facebook/rocksdb/issues/14 0x8b710c in rocksdb::Status rocksdb::BlockBasedTable::RetrieveBlock(rocksdb::FilePrefetchBuffer*, rocksdb::ReadOptions const&, rocksdb::BlockHandle const&, rocksdb::UncompressionDict const&, rocksdb::CachableEntry*, rocksdb::BlockType, rocksdb::GetContext*, rocksdb::BlockCacheLookupContext*, bool, bool, bool, bool) const internal_repo_rocksdb/repo/table/block_based/block_based_table_reader.cc:1726 https://github.com/facebook/rocksdb/issues/15 0x8c329f in rocksdb::DataBlockIter* rocksdb::BlockBasedTable::NewDataBlockIterator(rocksdb::ReadOptions const&, rocksdb::BlockHandle const&, rocksdb::DataBlockIter*, rocksdb::BlockType, rocksdb::GetContext*, rocksdb::BlockCacheLookupContext*, rocksdb::FilePrefetchBuffer*, bool, bool, rocksdb::Status&) const internal_repo_rocksdb/repo/table/block_based/block_based_table_reader_impl.h:58 https://github.com/facebook/rocksdb/issues/16 0x920117 in rocksdb::BlockBasedTableIterator::InitDataBlock() internal_repo_rocksdb/repo/table/block_based/block_based_table_iterator.cc:262 https://github.com/facebook/rocksdb/issues/17 0x920d42 in rocksdb::BlockBasedTableIterator::MaterializeCurrentBlock() internal_repo_rocksdb/repo/table/block_based/block_based_table_iterator.cc:332 https://github.com/facebook/rocksdb/issues/18 0xc6a201 in rocksdb::IteratorWrapperBase::PrepareValue() internal_repo_rocksdb/repo/table/iterator_wrapper.h:78 https://github.com/facebook/rocksdb/issues/19 0xc6a201 in rocksdb::IteratorWrapperBase::PrepareValue() internal_repo_rocksdb/repo/table/iterator_wrapper.h:78 https://github.com/facebook/rocksdb/issues/20 0xef9f6c in rocksdb::MergingIterator::PrepareValue() internal_repo_rocksdb/repo/table/merging_iterator.cc:260 https://github.com/facebook/rocksdb/issues/21 0xc6a201 in rocksdb::IteratorWrapperBase::PrepareValue() internal_repo_rocksdb/repo/table/iterator_wrapper.h:78 https://github.com/facebook/rocksdb/issues/22 0xc67bcd in rocksdb::DBIter::FindNextUserEntryInternal(bool, rocksdb::Slice const*) internal_repo_rocksdb/repo/db/db_iter.cc:326 https://github.com/facebook/rocksdb/issues/23 0xc66d36 in rocksdb::DBIter::FindNextUserEntry(bool, rocksdb::Slice const*) internal_repo_rocksdb/repo/db/db_iter.cc:234 https://github.com/facebook/rocksdb/issues/24 0xc7ab47 in rocksdb::DBIter::Next() internal_repo_rocksdb/repo/db/db_iter.cc:161 https://github.com/facebook/rocksdb/issues/25 0x70d938 in rocksdb::BatchedOpsStressTest::TestPrefixScan(rocksdb::ThreadState*, rocksdb::ReadOptions const&, std::vector > const&, std::vector > const&) internal_repo_rocksdb/repo/db_stress_tool/batched_ops_stress.cc:320 https://github.com/facebook/rocksdb/issues/26 0x6dc6a8 in rocksdb::StressTest::OperateDb(rocksdb::ThreadState*) internal_repo_rocksdb/repo/db_stress_tool/db_stress_test_base.cc:907 https://github.com/facebook/rocksdb/issues/27 0x6867de in rocksdb::ThreadBody(void*) internal_repo_rocksdb/repo/db_stress_tool/db_stress_driver.cc:33 https://github.com/facebook/rocksdb/issues/28 0xce4cc2 in rocksdb::(anonymous namespace)::StartThreadWrapper(void*) internal_repo_rocksdb/repo/env/env_posix.cc:461 https://github.com/facebook/rocksdb/issues/29 0x7f23f9068c0e in start_thread /home/engshare/third-party2/glibc/2.34/src/glibc-2.34/nptl/pthread_create.c:434:8 ``` Pull Request resolved: https://github.com/facebook/rocksdb/pull/10523 Test Plan: ``` $COMPILE_WITH_ASAN=1 make -j 24 $db_stress J=40 crash_test_with_txn ``` Reviewed By: anand1976 Differential Revision: D38646839 Pulled By: gitbw95 fbshipit-source-id: 9452895c7dc95481a9d7afe83b15193cf5b1c43e --- HISTORY.md | 4 ++- tools/db_crashtest.py | 3 +- utilities/fault_injection_secondary_cache.cc | 34 ++++++++++++++++---- utilities/fault_injection_secondary_cache.h | 12 ++++--- 4 files changed, 40 insertions(+), 13 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index e91350297..21b8e76d8 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -7,7 +7,7 @@ * Improve subcompaction range partition so that it is likely to be more even. More evenly distribution of subcompaction will improve compaction throughput for some workloads. All input files' index blocks to sample some anchor key points from which we pick positions to partition the input range. This would introduce some CPU overhead in compaction preparation phase, if subcompaction is enabled, but it should be a small fraction of the CPU usage of the whole compaction process. This also brings a behavier change: subcompaction number is much more likely to maxed out than before. * Add CompactionPri::kRoundRobin, a compaction picking mode that cycles through all the files with a compact cursor in a round-robin manner. This feature is available since 7.5. * Provide support for subcompactions for user_defined_timestamp. -* Added an option `memtable_protection_bytes_per_key` that turns on memtable per key-value checksum protection. Each memtable entry will be suffixed by a checksum that is computed during writes, and verified in reads/compaction. Detected corruption will be logged and with corruption status returned to user. +* Added an option `memtable_protection_bytes_per_key` that turns on memtable per key-value checksum protection. Each memtable entry will be suffixed by a checksum that is computed during writes, and verified in reads/compaction. Detected corruption will be logged and with corruption status returned to user. * Added a blob-specific cache priority level - bottom level. Blobs are typically lower-value targets for caching than data blocks, since 1) with BlobDB, data blocks containing blob references conceptually form an index structure which has to be consulted before we can read the blob value, and 2) cached blobs represent only a single key-value, while cached data blocks generally contain multiple KVs. The user can specify the new option `low_pri_pool_ratio` in `LRUCacheOptions` to configure the ratio of capacity reserved for low priority cache entries (and therefore the remaining ratio is the space reserved for the bottom level), or configuring the new argument `low_pri_pool_ratio` in `NewLRUCache()` to achieve the same effect. ### Public API changes @@ -27,6 +27,7 @@ * Fixed a bug where blobs read by iterators would be inserted into the cache even with the `fill_cache` read option set to false. * Fixed the segfault caused by `AllocateData()` in `CompressedSecondaryCache::SplitValueIntoChunks()` and `MergeChunksIntoValueTest`. * Fixed a bug in BlobDB where a mix of inlined and blob values could result in an incorrect value being passed to the compaction filter (see #10391). +* Fixed a memory leak bug in stress tests caused by `FaultInjectionSecondaryCache`. ### Behavior Change * Added checksum handshake during the copying of decompressed WAL fragment. This together with #9875, #10037, #10212, #10114 and #10319 provides end-to-end integrity protection for write batch during recovery. @@ -36,6 +37,7 @@ * Improve universal tiered storage compaction picker to avoid extra major compaction triggered by size amplification. If `preclude_last_level_data_seconds` is enabled, the size amplification is calculated within non last_level data only which skip the last level and use the penultimate level as the size base. * If an error is hit when writing to a file (append, sync, etc), RocksDB is more strict with not issuing more operations to it, except closing the file, with exceptions of some WAL file operations in error recovery path. * A `WriteBufferManager` constructed with `allow_stall == false` will no longer trigger write stall implicitly by thrashing until memtable count limit is reached. Instead, a column family can continue accumulating writes while that CF is flushing, which means memory may increase. Users who prefer stalling writes must now explicitly set `allow_stall == true`. +* Add `CompressedSecondaryCache` into the stress tests. ### Performance Improvements * Instead of constructing `FragmentedRangeTombstoneList` during every read operation, it is now constructed once and stored in immutable memtables. This improves speed of querying range tombstones from immutable memtables. diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 53a2c5c28..e00ad2fed 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -179,7 +179,8 @@ default_params = { "async_io": lambda: random.choice([0, 1]), "wal_compression": lambda: random.choice(["none", "zstd"]), "verify_sst_unique_id_in_manifest": 1, # always do unique_id verification - "secondary_cache_uri": "", + "secondary_cache_uri": lambda: random.choice( + ["", "compressed_secondary_cache://capacity=8388608"]), "allow_data_in_errors": True, } diff --git a/utilities/fault_injection_secondary_cache.cc b/utilities/fault_injection_secondary_cache.cc index 143862d98..502fd773b 100644 --- a/utilities/fault_injection_secondary_cache.cc +++ b/utilities/fault_injection_secondary_cache.cc @@ -88,14 +88,22 @@ std::unique_ptr FaultInjectionSecondaryCache::Lookup(const Slice& key, const Cache::CreateCallback& create_cb, bool wait, bool& is_in_sec_cache) { - std::unique_ptr hdl = - base_->Lookup(key, create_cb, wait, is_in_sec_cache); ErrorContext* ctx = GetErrorContext(); - if (wait && ctx->rand.OneIn(prob_)) { - hdl.reset(); + if (base_is_compressed_sec_cache_) { + if (ctx->rand.OneIn(prob_)) { + return nullptr; + } else { + return base_->Lookup(key, create_cb, wait, is_in_sec_cache); + } + } else { + std::unique_ptr hdl = + base_->Lookup(key, create_cb, wait, is_in_sec_cache); + if (wait && ctx->rand.OneIn(prob_)) { + hdl.reset(); + } + return std::unique_ptr( + new FaultInjectionSecondaryCache::ResultHandle(this, std::move(hdl))); } - return std::unique_ptr( - new FaultInjectionSecondaryCache::ResultHandle(this, std::move(hdl))); } void FaultInjectionSecondaryCache::Erase(const Slice& key) { @@ -104,7 +112,19 @@ void FaultInjectionSecondaryCache::Erase(const Slice& key) { void FaultInjectionSecondaryCache::WaitAll( std::vector handles) { - FaultInjectionSecondaryCache::ResultHandle::WaitAll(this, handles); + if (base_is_compressed_sec_cache_) { + ErrorContext* ctx = GetErrorContext(); + std::vector base_handles; + for (SecondaryCacheResultHandle* hdl : handles) { + if (ctx->rand.OneIn(prob_)) { + continue; + } + base_handles.push_back(hdl); + } + base_->WaitAll(base_handles); + } else { + FaultInjectionSecondaryCache::ResultHandle::WaitAll(this, handles); + } } } // namespace ROCKSDB_NAMESPACE diff --git a/utilities/fault_injection_secondary_cache.h b/utilities/fault_injection_secondary_cache.h index f9fb0b15d..acd960747 100644 --- a/utilities/fault_injection_secondary_cache.h +++ b/utilities/fault_injection_secondary_cache.h @@ -22,6 +22,9 @@ class FaultInjectionSecondaryCache : public SecondaryCache { seed_(seed), prob_(prob), thread_local_error_(new ThreadLocalPtr(DeleteThreadLocalErrorContext)) { + if (std::strcmp(base_->Name(), "CompressedSecondaryCache") == 0) { + base_is_compressed_sec_cache_ = true; + } } virtual ~FaultInjectionSecondaryCache() override {} @@ -35,13 +38,13 @@ class FaultInjectionSecondaryCache : public SecondaryCache { const Slice& key, const Cache::CreateCallback& create_cb, bool wait, bool& is_in_sec_cache) override; - void Erase(const Slice& /*key*/) override; + void Erase(const Slice& key) override; void WaitAll(std::vector handles) override; - std::string GetPrintableOptions() const override { return ""; } - - void EnableErrorInjection(uint64_t prob); + std::string GetPrintableOptions() const override { + return base_->GetPrintableOptions(); + } private: class ResultHandle : public SecondaryCacheResultHandle { @@ -80,6 +83,7 @@ class FaultInjectionSecondaryCache : public SecondaryCache { const std::shared_ptr base_; uint32_t seed_; int prob_; + bool base_is_compressed_sec_cache_{false}; struct ErrorContext { Random rand;