From e102de73180a5bead2d534f27b10258dcd2386e6 Mon Sep 17 00:00:00 2001 From: Zhichao Cao Date: Fri, 4 Dec 2020 20:30:23 -0800 Subject: [PATCH] Fix assert(cfd->imm()->NumNotFlushed() > 0) in FlushMemtable (#7744) Summary: In current code base, in FlushMemtable, when `(Flush_reason == FlushReason::kErrorRecoveryRetryFlush && (!cfd->mem()->IsEmpty() || !cached_recoverable_state_empty_.load()))`, we assert that cfd->imm()->NumNotFlushed() > 0. However, there are some corner cases that can fail this assert: 1) if there are multiple CFs, some CF has immutable memtable, some CFs don't. In ResumeImpl, all CFs will call FlushMemtable, which will hit the assert. 2) Regular flush is scheduled and running, the resume thread is waiting. New KVs are inserted and SchedulePendingFlush is called. Regular flush will continue call MaybeScheduleFlushAndCompaction until all the immutable memtables are flushed. When regular flush ends and auto resume thread starts to schedule new flushes, cfd->imm()->NumNotFlushed() can be 0. Remove the assert and added the comments. Pull Request resolved: https://github.com/facebook/rocksdb/pull/7744 Test Plan: make check and pass the stress test Reviewed By: riversand963 Differential Revision: D25340573 Pulled By: zhichao-cao fbshipit-source-id: eac357bdace660247c197f01a9ff6857e3c97672 --- db/db_impl/db_impl_compaction_flush.cc | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index 50d59b17d..04186a220 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -1765,12 +1765,16 @@ Status DBImpl::FlushMemTable(ColumnFamilyData* cfd, } WaitForPendingWrites(); - if (!cfd->mem()->IsEmpty() || !cached_recoverable_state_empty_.load()) { - if (flush_reason != FlushReason::kErrorRecoveryRetryFlush) { - s = SwitchMemtable(cfd, &context); - } else { - assert(cfd->imm()->NumNotFlushed() > 0); - } + if (flush_reason != FlushReason::kErrorRecoveryRetryFlush && + (!cfd->mem()->IsEmpty() || !cached_recoverable_state_empty_.load())) { + // Note that, when flush reason is kErrorRecoveryRetryFlush, during the + // auto retry resume, we want to avoid creating new small memtables. + // Therefore, SwitchMemtable will not be called. Also, since ResumeImpl + // will iterate through all the CFs and call FlushMemtable during auto + // retry resume, it is possible that in some CFs, + // cfd->imm()->NumNotFlushed() = 0. In this case, so no flush request will + // be created and scheduled, status::OK() will be returned. + s = SwitchMemtable(cfd, &context); } const uint64_t flush_memtable_id = port::kMaxUint64; if (s.ok()) {