From 87d0af15d8ad6a82d69ff8d632e0e13df6befa30 Mon Sep 17 00:00:00 2001 From: Haobo Xu Date: Tue, 21 May 2013 11:53:33 -0700 Subject: [PATCH] [RocksDB] Introduce an option to skip log error on recovery Summary: Currently, with paranoid_check on, DB::Open will fail on any log read error on recovery. If client is ok with losing most recent updates, we could simply skip those errors. However, it's important to introduce an additional flag, so that paranoid_check can still guard against more serious problems. Test Plan: make check; db_stress Reviewers: dhruba, emayanke Reviewed By: emayanke CC: leveldb, emayanke Differential Revision: https://reviews.facebook.net/D10869 --- db/db_impl.cc | 9 +++++---- include/leveldb/options.h | 6 ++++++ util/options.cc | 5 ++++- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index 3d01d5481..ba8d9ff53 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -590,7 +590,8 @@ Status DBImpl::RecoverLogFile(uint64_t log_number, Env* env; Logger* info_log; const char* fname; - Status* status; // nullptr if options_.paranoid_checks==false + Status* status; // nullptr if options_.paranoid_checks==false or + // options_.skip_log_error_on_recovery==true virtual void Corruption(size_t bytes, const Status& s) { Log(info_log, "%s%s: dropping %d bytes; %s", (this->status == nullptr ? "(ignoring error) " : ""), @@ -615,7 +616,8 @@ Status DBImpl::RecoverLogFile(uint64_t log_number, reporter.env = env_; reporter.info_log = options_.info_log.get(); reporter.fname = fname.c_str(); - reporter.status = (options_.paranoid_checks ? &status : nullptr); + reporter.status = (options_.paranoid_checks && + !options_.skip_log_error_on_recovery ? &status : nullptr); // We intentially make log::Reader do checksumming even if // paranoid_checks==false so that corruptions cause entire commits // to be skipped instead of propagating bad information (like overly @@ -633,8 +635,7 @@ Status DBImpl::RecoverLogFile(uint64_t log_number, if (external_table) { mem = external_table; } - while (reader.ReadRecord(&record, &scratch) && - status.ok()) { + while (reader.ReadRecord(&record, &scratch) && status.ok()) { if (record.size() < 12) { reporter.Corruption( record.size(), Status::Corruption("log record too small")); diff --git a/include/leveldb/options.h b/include/leveldb/options.h index f180c74eb..043991872 100644 --- a/include/leveldb/options.h +++ b/include/leveldb/options.h @@ -424,6 +424,12 @@ struct Options { // Disable child process inherit open files. Default: true bool is_fd_close_on_exec; + + // Skip log corruption error on recovery (If client is ok with + // losing most recent changes) + // Default: false + bool skip_log_error_on_recovery; + }; // Options that control read operations diff --git a/util/options.cc b/util/options.cc index 6ac122218..7e67e8cbd 100644 --- a/util/options.cc +++ b/util/options.cc @@ -68,7 +68,8 @@ Options::Options() allow_readahead_compactions(true), allow_mmap_reads(false), allow_mmap_writes(true), - is_fd_close_on_exec(true) { + is_fd_close_on_exec(true), + skip_log_error_on_recovery(false) { } void @@ -189,6 +190,8 @@ Options::Dump(Logger* log) const allow_mmap_writes); Log(log," Options.is_fd_close_on_exec: %d", is_fd_close_on_exec); + Log(log," Options.skip_log_error_on_recovery: %d", + skip_log_error_on_recovery); } // Options::Dump //