From a00bf1b3cfa191c866fb1e2fd007c03b8e4b810e Mon Sep 17 00:00:00 2001 From: sdong Date: Mon, 27 Jun 2016 17:42:14 -0700 Subject: [PATCH] Add More Logging to track total_log_size Summary: We saw instances where total_log_size is off the real value, but I'm not able to reproduce it. Add more logging to help debugging when it happens again. Test Plan: Run the unit test and see the logging. Reviewers: andrewkr, yhchiang, igor, IslamAbdelRahman Reviewed By: IslamAbdelRahman Subscribers: leveldb, andrewkr, dhruba Differential Revision: https://reviews.facebook.net/D60081 --- db/column_family_test.cc | 3 +++ db/db_impl.cc | 16 ++++++++++++++++ db/db_impl.h | 2 ++ db/job_context.h | 3 +++ 4 files changed, 24 insertions(+) diff --git a/db/column_family_test.cc b/db/column_family_test.cc index 6c2351cce..302be3b71 100644 --- a/db/column_family_test.cc +++ b/db/column_family_test.cc @@ -1950,6 +1950,9 @@ TEST_F(ColumnFamilyTest, FlushStaleColumnFamilies) { // 3 files for default column families, 1 file for column family [two], zero // files for column family [one], because it's empty AssertCountLiveFiles(4); + + Flush(0); + ASSERT_EQ(0, dbfull()->TEST_total_log_size()); Close(); } diff --git a/db/db_impl.cc b/db/db_impl.cc index cabbcd82a..26044da32 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -838,6 +838,7 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force, if (!alive_log_files_.empty()) { uint64_t min_log_number = job_context->log_number; + size_t num_alive_log_files = alive_log_files_.size(); // find newly obsoleted log files while (alive_log_files_.begin()->number < min_log_number) { auto& earliest = *alive_log_files_.begin(); @@ -848,6 +849,11 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force, } else { job_context->log_delete_files.push_back(earliest.number); } + if (job_context->size_log_to_delete == 0) { + job_context->prev_total_log_size = total_log_size_; + job_context->num_alive_log_files = num_alive_log_files; + } + job_context->size_log_to_delete += earliest.size; total_log_size_ -= earliest.size; alive_log_files_.pop_front(); // Current log should always stay alive since it can't have @@ -971,6 +977,15 @@ void DBImpl::PurgeObsoleteFiles(const JobContext& state, bool schedule_only) { std::unique(candidate_files.begin(), candidate_files.end()), candidate_files.end()); + if (state.prev_total_log_size > 0) { + Log(InfoLogLevel::INFO_LEVEL, db_options_.info_log, + "[JOB %d] Try to delete WAL files size %" PRIu64 + ", prev total WAL file size %" PRIu64 + ", number of live WAL files %" ROCKSDB_PRIszt ".\n", + state.job_id, state.size_log_to_delete, state.prev_total_log_size, + state.num_alive_log_files); + } + std::vector old_info_log_files; InfoLogPrefix info_log_prefix(!db_options_.db_log_dir.empty(), dbname_); for (const auto& candidate_file : candidate_files) { @@ -1049,6 +1064,7 @@ void DBImpl::PurgeObsoleteFiles(const JobContext& state, bool schedule_only) { continue; } #endif // !ROCKSDB_LITE + Status file_deletion_status; if (schedule_only) { InstrumentedMutexLock guard_lock(&mutex_); diff --git a/db/db_impl.h b/db/db_impl.h index ff1d226fd..de9f4f8a1 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -341,6 +341,8 @@ class DBImpl : public DB { uint64_t TEST_LogfileNumber(); + uint64_t TEST_total_log_size() const { return total_log_size_; } + // Returns column family name to ImmutableCFOptions map. Status TEST_GetAllImmutableCFOptions( std::unordered_map* iopts_map); diff --git a/db/job_context.h b/db/job_context.h index 0f24136a3..7761a8312 100644 --- a/db/job_context.h +++ b/db/job_context.h @@ -76,6 +76,9 @@ struct JobContext { uint64_t prev_log_number; uint64_t min_pending_output = 0; + uint64_t prev_total_log_size = 0; + size_t num_alive_log_files = 0; + uint64_t size_log_to_delete = 0; explicit JobContext(int _job_id, bool create_superversion = false) { job_id = _job_id;