From 11266440825ddebe0527458bceb6bb28209dc9f2 Mon Sep 17 00:00:00 2001 From: krad Date: Wed, 9 Sep 2015 21:59:28 -0700 Subject: [PATCH] Relaxing consistency detection to include errors while inserting to memtable as WAL recovery error. Summary: The current code, considers data to be consistent if the record checksum passes. We do have customer issues where the record checksum passed but the data was incomprehensible. There is no way to get out of this error case since all WAL recovery model will consider this error as unrelated to WAL. Relaxing the definition and including errors while inserting to memtable as WAL errors and handing them as per the recovery level. Test Plan: Used customer dump to verify the fix for different level. The db opens for kSkipAnyCorruptedRecords and kPointInTimeRecovery, but fails for kAbsoluteConsistency and kTolerateCorruptedTailRecords. Reviewers: sdon igor CC: leveldb@ Task ID: #7918721 Blame Rev: --- db/db_impl.cc | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index 364c57df3..d3f1a7e77 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -1148,8 +1148,12 @@ Status DBImpl::RecoverLogFiles(const std::vector& log_numbers, MaybeIgnoreError(&status); if (!status.ok()) { - return status; + // We are treating this as a failure while reading since we read valid + // blocks that do not form coherent data + reporter.Corruption(record.size(), status); + continue; } + const SequenceNumber last_seq = WriteBatchInternal::Sequence(&batch) + WriteBatchInternal::Count(&batch) - 1; if ((*max_sequence == kMaxSequenceNumber) || (last_seq > *max_sequence)) { @@ -1183,12 +1187,11 @@ Status DBImpl::RecoverLogFiles(const std::vector& log_numbers, } if (!status.ok()) { - // The hook function is designed to ignore all IO errors from reader - // during recovery for kSkipAnyCorruptedRecords. Status variable is - // unmodified by the reader. - assert(db_options_.wal_recovery_mode != - WALRecoveryMode::kSkipAnyCorruptedRecords); if (db_options_.wal_recovery_mode == + WALRecoveryMode::kSkipAnyCorruptedRecords) { + // We should ignore all errors unconditionally + status = Status::OK(); + } else if (db_options_.wal_recovery_mode == WALRecoveryMode::kPointInTimeRecovery) { // We should ignore the error but not continue replaying status = Status::OK(); @@ -1197,8 +1200,11 @@ Status DBImpl::RecoverLogFiles(const std::vector& log_numbers, Log(InfoLogLevel::INFO_LEVEL, db_options_.info_log, "Point in time recovered to log #%" PRIu64 " seq #%" PRIu64, log_number, *max_sequence); - } else if (db_options_.wal_recovery_mode != - WALRecoveryMode::kSkipAnyCorruptedRecords) { + } else { + assert(db_options_.wal_recovery_mode == + WALRecoveryMode::kTolerateCorruptedTailRecords + || db_options_.wal_recovery_mode == + WALRecoveryMode::kAbsoluteConsistency); return status; } }