From 9e4aa798c3d47c6be64324bd9d38f0813c8ead7b Mon Sep 17 00:00:00 2001 From: somnathr Date: Mon, 12 Sep 2016 16:53:42 -0700 Subject: [PATCH] Summary: (#1313) If log recycling is enabled with the rocksdb (recycle_log_file_num=16) db->Writebatch is erroring out with keynotfound after ~5-6 hours of run (1M seq but can happen to any workload I guess).See my detailed bug report here (https://github.com/facebook/rocksdb/issues/1303). This commit is the fix for this, a check is been added not to delete the log file if it is already there in the recycle list. Test Plan: Unit tested it and ran the similar profile. Not reproducing anymore. --- db/db_impl.cc | 85 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 53 insertions(+), 32 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index 5b4ad092a..e2c26bf5e 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -812,38 +812,6 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force, job_context->prev_log_number = versions_->prev_log_number(); versions_->AddLiveFiles(&job_context->sst_live); - if (doing_the_full_scan) { - for (size_t path_id = 0; path_id < db_options_.db_paths.size(); path_id++) { - // set of all files in the directory. We'll exclude files that are still - // alive in the subsequent processings. - std::vector files; - env_->GetChildren(db_options_.db_paths[path_id].path, - &files); // Ignore errors - for (std::string file : files) { - // TODO(icanadi) clean up this mess to avoid having one-off "/" prefixes - job_context->full_scan_candidate_files.emplace_back( - "/" + file, static_cast(path_id)); - } - } - - //Add log files in wal_dir - if (db_options_.wal_dir != dbname_) { - std::vector log_files; - env_->GetChildren(db_options_.wal_dir, &log_files); // Ignore errors - for (std::string log_file : log_files) { - job_context->full_scan_candidate_files.emplace_back(log_file, 0); - } - } - // Add info log files in db_log_dir - if (!db_options_.db_log_dir.empty() && db_options_.db_log_dir != dbname_) { - std::vector info_log_files; - // Ignore errors - env_->GetChildren(db_options_.db_log_dir, &info_log_files); - for (std::string log_file : info_log_files) { - job_context->full_scan_candidate_files.emplace_back(log_file, 0); - } - } - } if (!alive_log_files_.empty()) { uint64_t min_log_number = job_context->log_number; @@ -883,6 +851,59 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force, assert(!logs_.empty()); } + if (doing_the_full_scan) { + for (size_t path_id = 0; path_id < db_options_.db_paths.size(); path_id++) { + // set of all files in the directory. We'll exclude files that are still + // alive in the subsequent processings. + std::vector files; + env_->GetChildren(db_options_.db_paths[path_id].path, + &files); // Ignore errors + for (std::string file : files) { + // TODO(icanadi) clean up this mess to avoid having one-off "/" prefixes + job_context->full_scan_candidate_files.emplace_back( + "/" + file, static_cast(path_id)); + } + } + + //Add log files in wal_dir + if (db_options_.wal_dir != dbname_) { + std::vector log_files; + env_->GetChildren(db_options_.wal_dir, &log_files); // Ignore errors + InfoLogPrefix info_log_prefix(!db_options_.db_log_dir.empty(), dbname_); + for (std::string log_file : log_files) { + uint64_t number; + FileType type; + // Ignore file if we cannot recognize it. + if (!ParseFileName(log_file, &number, info_log_prefix.prefix, &type)) { + Log(InfoLogLevel::INFO_LEVEL, db_options_.info_log, + "Unrecognized log file %s \n",log_file.c_str()); + continue; + } + // If the log file is already in the log recycle list , don't put + // it in the candidate list. + if (std::find(log_recycle_files.begin(), log_recycle_files.end(),number) != + log_recycle_files.end()) { + + Log(InfoLogLevel::INFO_LEVEL, db_options_.info_log, + "Log %" PRIu64 " Already added in the recycle list, skipping.\n", + number); + continue; + } + + job_context->full_scan_candidate_files.emplace_back(log_file, 0); + } + } + // Add info log files in db_log_dir + if (!db_options_.db_log_dir.empty() && db_options_.db_log_dir != dbname_) { + std::vector info_log_files; + // Ignore errors + env_->GetChildren(db_options_.db_log_dir, &info_log_files); + for (std::string log_file : info_log_files) { + job_context->full_scan_candidate_files.emplace_back(log_file, 0); + } + } + } + // We're just cleaning up for DB::Write(). assert(job_context->logs_to_free.empty()); job_context->logs_to_free = logs_to_free_;