From cb094e13bbadb4031ecab95e084418da60973312 Mon Sep 17 00:00:00 2001 From: Siying Dong Date: Fri, 31 May 2019 10:45:20 -0700 Subject: [PATCH] Auto roll logger to enforce options.keep_log_file_num immediately after a new file is created (#5370) Summary: Right now, with auto roll logger, options.keep_log_file_num enforcement is triggered by events like DB reopen or full obsolete scan happens. In the mean time, the size and number of log files can grow without a limit. We put a stronger enforcement to the option, so that the number of log files can always under control. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5370 Differential Revision: D15570413 Pulled By: siying fbshipit-source-id: 0916c3c4d42ab8fdd29389ee7fd7e1557b03176e --- HISTORY.md | 1 + file/filename.cc | 32 ++++ file/filename.h | 8 + util/auto_roll_logger.cc | 103 +++++++++++- util/auto_roll_logger.h | 33 ++-- util/auto_roll_logger_test.cc | 194 ++++++++++++++++++++--- utilities/convenience/info_log_finder.cc | 29 +--- 7 files changed, 328 insertions(+), 72 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index f645d5cc2..b9b6998c6 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -4,6 +4,7 @@ * Now DB::Close() will return Aborted() error when there is unreleased snapshot. Users can retry after all snapshots are released. * Partitions of partitioned indexes no longer affect the read amplification statistics. * Due to a refactoring, block cache eviction statistics for indexes are temporarily broken. We plan to reintroduce them in a later phase. +* options.keep_log_file_num will be enforced strictly all the time. File names of all log files will be tracked, which may take significantly amount of memory if options.keep_log_file_num is large and either of options.max_log_file_size or options.log_file_time_to_roll is set. ### New Features * Add an option `snap_refresh_nanos` (default to 0.1s) to periodically refresh the snapshot list in compaction jobs. Assign to 0 to disable the feature. diff --git a/file/filename.cc b/file/filename.cc index a8fb78005..6f00d15eb 100644 --- a/file/filename.cc +++ b/file/filename.cc @@ -407,4 +407,36 @@ Status SyncManifest(Env* env, const ImmutableDBOptions* db_options, return file->Sync(db_options->use_fsync); } +Status GetInfoLogFiles(Env* env, const std::string& db_log_dir, + const std::string& dbname, std::string* parent_dir, + std::vector* info_log_list) { + assert(parent_dir != nullptr); + assert(info_log_list != nullptr); + uint64_t number = 0; + FileType type; + + if (!db_log_dir.empty()) { + *parent_dir = db_log_dir; + } else { + *parent_dir = dbname; + } + + InfoLogPrefix info_log_prefix(!db_log_dir.empty(), dbname); + + std::vector file_names; + Status s = env->GetChildren(*parent_dir, &file_names); + + if (!s.ok()) { + return s; + } + + for (auto& f : file_names) { + if (ParseFileName(f, &number, info_log_prefix.prefix, &type) && + (type == kInfoLogFile)) { + info_log_list->push_back(f); + } + } + return Status::OK(); +} + } // namespace rocksdb diff --git a/file/filename.h b/file/filename.h index eea6b1b02..db06f4664 100644 --- a/file/filename.h +++ b/file/filename.h @@ -169,4 +169,12 @@ extern Status SetIdentityFile(Env* env, const std::string& dbname); extern Status SyncManifest(Env* env, const ImmutableDBOptions* db_options, WritableFileWriter* file); +// Return list of file names of info logs in `file_names`. +// The list only contains file name. The parent directory name is stored +// in `parent_dir`. +// `db_log_dir` should be the one as in options.db_log_dir +extern Status GetInfoLogFiles(Env* env, const std::string& db_log_dir, + const std::string& dbname, + std::string* parent_dir, + std::vector* file_names); } // namespace rocksdb diff --git a/util/auto_roll_logger.cc b/util/auto_roll_logger.cc index ae6061aed..9e8d67503 100644 --- a/util/auto_roll_logger.cc +++ b/util/auto_roll_logger.cc @@ -4,12 +4,53 @@ // (found in the LICENSE.Apache file in the root directory). // #include "util/auto_roll_logger.h" +#include +#include "file/filename.h" +#include "util/logging.h" #include "util/mutexlock.h" namespace rocksdb { #ifndef ROCKSDB_LITE // -- AutoRollLogger + +AutoRollLogger::AutoRollLogger(Env* env, const std::string& dbname, + const std::string& db_log_dir, + size_t log_max_size, + size_t log_file_time_to_roll, + size_t keep_log_file_num, + const InfoLogLevel log_level) + : Logger(log_level), + dbname_(dbname), + db_log_dir_(db_log_dir), + env_(env), + status_(Status::OK()), + kMaxLogFileSize(log_max_size), + kLogFileTimeToRoll(log_file_time_to_roll), + kKeepLogFileNum(keep_log_file_num), + cached_now(static_cast(env_->NowMicros() * 1e-6)), + ctime_(cached_now), + cached_now_access_count(0), + call_NowMicros_every_N_records_(100), + mutex_() { + Status s = env->GetAbsolutePath(dbname, &db_absolute_path_); + if (s.IsNotSupported()) { + db_absolute_path_ = dbname; + } else { + status_ = s; + } + log_fname_ = InfoLogFileName(dbname_, db_absolute_path_, db_log_dir_); + if (env_->FileExists(log_fname_).ok()) { + RollLogFile(); + } + GetExistingFiles(); + ResetLogger(); + s = TrimOldLogFiles(); + if (!status_.ok()) { + status_ = s; + } +} + Status AutoRollLogger::ResetLogger() { TEST_SYNC_POINT("AutoRollLogger::ResetLogger:BeforeNewLogger"); status_ = env_->NewLogger(log_fname_, &logger_); @@ -44,6 +85,58 @@ void AutoRollLogger::RollLogFile() { now++; } while (env_->FileExists(old_fname).ok()); env_->RenameFile(log_fname_, old_fname); + old_log_files_.push(old_fname); +} + +void AutoRollLogger::GetExistingFiles() { + { + // Empty the queue to avoid duplicated entries in the queue. + std::queue empty; + std::swap(old_log_files_, empty); + } + + std::string parent_dir; + std::vector info_log_files; + Status s = + GetInfoLogFiles(env_, db_log_dir_, dbname_, &parent_dir, &info_log_files); + if (status_.ok()) { + status_ = s; + } + // We need to sort the file before enqueing it so that when we + // delete file from the front, it is the oldest file. + std::sort(info_log_files.begin(), info_log_files.end()); + + for (const std::string& f : info_log_files) { + old_log_files_.push(parent_dir + "/" + f); + } +} + +Status AutoRollLogger::TrimOldLogFiles() { + // Here we directly list info files and delete them through Env. + // The deletion isn't going through DB, so there are shortcomes: + // 1. the deletion is not rate limited by SstFileManager + // 2. there is a chance that an I/O will be issued here + // Since it's going to be complicated to pass DB object down to + // here, we take a simple approach to keep the code easier to + // maintain. + + // old_log_files_.empty() is helpful for the corner case that + // kKeepLogFileNum == 0. We can instead check kKeepLogFileNum != 0 but + // it's essentially the same thing, and checking empty before accessing + // the queue feels safer. + while (!old_log_files_.empty() && old_log_files_.size() >= kKeepLogFileNum) { + Status s = env_->DeleteFile(old_log_files_.front()); + // Remove the file from the tracking anyway. It's possible that + // DB cleaned up the old log file, or people cleaned it up manually. + old_log_files_.pop(); + // To make the file really go away, we should sync parent directory. + // Since there isn't any consistency issue involved here, skipping + // this part to avoid one I/O here. + if (!s.ok()) { + return s; + } + } + return Status::OK(); } std::string AutoRollLogger::ValistToString(const char* format, @@ -78,12 +171,19 @@ void AutoRollLogger::Logv(const char* format, va_list ap) { (kMaxLogFileSize > 0 && logger_->GetLogFileSize() >= kMaxLogFileSize)) { RollLogFile(); Status s = ResetLogger(); + Status s2 = TrimOldLogFiles(); + if (!s.ok()) { // can't really log the error if creating a new LOG file failed return; } WriteHeaderInfo(); + + if (!s2.ok()) { + ROCKS_LOG_WARN(logger.get(), "Fail to trim old info log file: %s", + s2.ToString().c_str()); + } } // pin down the current logger_ instance before releasing the mutex. @@ -153,7 +253,8 @@ Status CreateLoggerFromOptions(const std::string& dbname, if (options.log_file_time_to_roll > 0 || options.max_log_file_size > 0) { AutoRollLogger* result = new AutoRollLogger( env, dbname, options.db_log_dir, options.max_log_file_size, - options.log_file_time_to_roll, options.info_log_level); + options.log_file_time_to_roll, options.keep_log_file_num, + options.info_log_level); Status s = result->GetStatus(); if (!s.ok()) { delete result; diff --git a/util/auto_roll_logger.h b/util/auto_roll_logger.h index a5b2139fc..a14fbfd58 100644 --- a/util/auto_roll_logger.h +++ b/util/auto_roll_logger.h @@ -8,6 +8,7 @@ #pragma once #include +#include #include #include "file/filename.h" @@ -24,25 +25,8 @@ class AutoRollLogger : public Logger { public: AutoRollLogger(Env* env, const std::string& dbname, const std::string& db_log_dir, size_t log_max_size, - size_t log_file_time_to_roll, - const InfoLogLevel log_level = InfoLogLevel::INFO_LEVEL) - : Logger(log_level), - dbname_(dbname), - db_log_dir_(db_log_dir), - env_(env), - status_(Status::OK()), - kMaxLogFileSize(log_max_size), - kLogFileTimeToRoll(log_file_time_to_roll), - cached_now(static_cast(env_->NowMicros() * 1e-6)), - ctime_(cached_now), - cached_now_access_count(0), - call_NowMicros_every_N_records_(100), - mutex_() { - env->GetAbsolutePath(dbname, &db_absolute_path_); - log_fname_ = InfoLogFileName(dbname_, db_absolute_path_, db_log_dir_); - RollLogFile(); - ResetLogger(); - } + size_t log_file_time_to_roll, size_t keep_log_file_num, + const InfoLogLevel log_level = InfoLogLevel::INFO_LEVEL); using Logger::Logv; void Logv(const char* format, va_list ap) override; @@ -110,6 +94,11 @@ class AutoRollLogger : public Logger { bool LogExpired(); Status ResetLogger(); void RollLogFile(); + // Read all names of old log files into old_log_files_ + // If there is any error, put the error code in status_ + void GetExistingFiles(); + // Delete old log files if it excceeds the limit. + Status TrimOldLogFiles(); // Log message to logger without rolling void LogInternal(const char* format, ...); // Serialize the va_list to a string @@ -126,8 +115,14 @@ class AutoRollLogger : public Logger { Status status_; const size_t kMaxLogFileSize; const size_t kLogFileTimeToRoll; + const size_t kKeepLogFileNum; // header information std::list headers_; + // List of all existing info log files. Used for enforcing number of + // info log files. + // Full path is stored here. It consumes signifianctly more memory + // than only storing file name. Can optimize if it causes a problem. + std::queue old_log_files_; // to avoid frequent env->NowMicros() calls, we cached the current time uint64_t cached_now; uint64_t ctime_; diff --git a/util/auto_roll_logger_test.cc b/util/auto_roll_logger_test.cc index 87de5ed5b..ff47719d4 100644 --- a/util/auto_roll_logger_test.cc +++ b/util/auto_roll_logger_test.cc @@ -41,6 +41,21 @@ class NoSleepEnv : public EnvWrapper { }; } // namespace +// In this test we only want to Log some simple log message with +// no format. LogMessage() provides such a simple interface and +// avoids the [format-security] warning which occurs when you +// call ROCKS_LOG_INFO(logger, log_message) directly. +namespace { +void LogMessage(Logger* logger, const char* message) { + ROCKS_LOG_INFO(logger, "%s", message); +} + +void LogMessage(const InfoLogLevel log_level, Logger* logger, + const char* message) { + Log(log_level, logger, "%s", message); +} +} // namespace + class AutoRollLoggerTest : public testing::Test { public: static void InitTestDb() { @@ -62,6 +77,41 @@ class AutoRollLoggerTest : public testing::Test { const std::string& log_message); void RollLogFileByTimeTest(Env*, AutoRollLogger* logger, size_t time, const std::string& log_message); + // return list of files under kTestDir that contains "LOG" + std::vector GetLogFiles() { + std::vector ret; + std::vector files; + Status s = default_env->GetChildren(kTestDir, &files); + // Should call ASSERT_OK() here but it doesn't compile. It's not + // worth the time figuring out why. + EXPECT_TRUE(s.ok()); + for (const auto& f : files) { + if (f.find("LOG") != std::string::npos) { + ret.push_back(f); + } + } + return ret; + } + + // Delete all log files under kTestDir + void CleanupLogFiles() { + for (const std::string& f : GetLogFiles()) { + ASSERT_OK(default_env->DeleteFile(kTestDir + "/" + f)); + } + } + + void RollNTimesBySize(Logger* auto_roll_logger, size_t file_num, + size_t max_log_file_size) { + // Roll the log 4 times, and it will trim to 3 files. + std::string dummy_large_string; + dummy_large_string.assign(max_log_file_size, '='); + auto_roll_logger->SetInfoLogLevel(InfoLogLevel::INFO_LEVEL); + for (size_t i = 0; i < file_num + 1; i++) { + // Log enough bytes to trigger at least one roll. + LogMessage(auto_roll_logger, dummy_large_string.c_str()); + LogMessage(auto_roll_logger, ""); + } + } static const std::string kSampleMessage; static const std::string kTestDir; @@ -77,21 +127,6 @@ const std::string AutoRollLoggerTest::kLogFile( test::PerThreadDBPath("db_log_test") + "/LOG"); Env* AutoRollLoggerTest::default_env = Env::Default(); -// In this test we only want to Log some simple log message with -// no format. LogMessage() provides such a simple interface and -// avoids the [format-security] warning which occurs when you -// call ROCKS_LOG_INFO(logger, log_message) directly. -namespace { -void LogMessage(Logger* logger, const char* message) { - ROCKS_LOG_INFO(logger, "%s", message); -} - -void LogMessage(const InfoLogLevel log_level, Logger* logger, - const char* message) { - Log(log_level, logger, "%s", message); -} -} // namespace - void AutoRollLoggerTest::RollLogFileBySizeTest(AutoRollLogger* logger, size_t log_max_size, const std::string& log_message) { @@ -159,8 +194,10 @@ void AutoRollLoggerTest::RollLogFileByTimeTest(Env* env, AutoRollLogger* logger, TEST_F(AutoRollLoggerTest, RollLogFileBySize) { InitTestDb(); size_t log_max_size = 1024 * 5; + size_t keep_log_file_num = 10; - AutoRollLogger logger(Env::Default(), kTestDir, "", log_max_size, 0); + AutoRollLogger logger(Env::Default(), kTestDir, "", log_max_size, 0, + keep_log_file_num); RollLogFileBySizeTest(&logger, log_max_size, kSampleMessage + ":RollLogFileBySize"); @@ -171,11 +208,12 @@ TEST_F(AutoRollLoggerTest, RollLogFileByTime) { size_t time = 2; size_t log_size = 1024 * 5; + size_t keep_log_file_num = 10; InitTestDb(); // -- Test the existence of file during the server restart. ASSERT_EQ(Status::NotFound(), default_env->FileExists(kLogFile)); - AutoRollLogger logger(&nse, kTestDir, "", log_size, time); + AutoRollLogger logger(&nse, kTestDir, "", log_size, time, keep_log_file_num); ASSERT_OK(default_env->FileExists(kLogFile)); RollLogFileByTimeTest(&nse, &logger, time, @@ -192,28 +230,30 @@ TEST_F(AutoRollLoggerTest, OpenLogFilesMultipleTimesWithOptionLog_max_size) { // treated as "singed". size_t kZero = 0; size_t log_size = 1024; + size_t keep_log_file_num = 10; - AutoRollLogger* logger = new AutoRollLogger( - Env::Default(), kTestDir, "", log_size, 0); + AutoRollLogger* logger = new AutoRollLogger(Env::Default(), kTestDir, "", + log_size, 0, keep_log_file_num); LogMessage(logger, kSampleMessage.c_str()); ASSERT_GT(logger->GetLogFileSize(), kZero); delete logger; // reopens the log file and an empty log file will be created. - logger = new AutoRollLogger( - Env::Default(), kTestDir, "", log_size, 0); + logger = new AutoRollLogger(Env::Default(), kTestDir, "", log_size, 0, 10); ASSERT_EQ(logger->GetLogFileSize(), kZero); delete logger; } TEST_F(AutoRollLoggerTest, CompositeRollByTimeAndSizeLogger) { size_t time = 2, log_max_size = 1024 * 5; + size_t keep_log_file_num = 10; InitTestDb(); NoSleepEnv nse(Env::Default()); - AutoRollLogger logger(&nse, kTestDir, "", log_max_size, time); + AutoRollLogger logger(&nse, kTestDir, "", log_max_size, time, + keep_log_file_num); // Test the ability to roll by size RollLogFileBySizeTest(&logger, log_max_size, @@ -269,6 +309,107 @@ TEST_F(AutoRollLoggerTest, CreateLoggerFromOptions) { kSampleMessage + ":CreateLoggerFromOptions - both"); RollLogFileByTimeTest(&nse, auto_roll_logger, options.log_file_time_to_roll, kSampleMessage + ":CreateLoggerFromOptions - both"); + + // Set keep_log_file_num + { + const size_t kFileNum = 3; + InitTestDb(); + options.max_log_file_size = 512; + options.log_file_time_to_roll = 2; + options.keep_log_file_num = kFileNum; + ASSERT_OK(CreateLoggerFromOptions(kTestDir, options, &logger)); + auto_roll_logger = dynamic_cast(logger.get()); + + // Roll the log 4 times, and it will trim to 3 files. + std::string dummy_large_string; + dummy_large_string.assign(options.max_log_file_size, '='); + auto_roll_logger->SetInfoLogLevel(InfoLogLevel::INFO_LEVEL); + for (size_t i = 0; i < kFileNum + 1; i++) { + // Log enough bytes to trigger at least one roll. + LogMessage(auto_roll_logger, dummy_large_string.c_str()); + LogMessage(auto_roll_logger, ""); + } + + std::vector files = GetLogFiles(); + ASSERT_EQ(kFileNum, files.size()); + + CleanupLogFiles(); + } + + // Set keep_log_file_num and dbname is different from + // db_log_dir. + { + const size_t kFileNum = 3; + InitTestDb(); + options.max_log_file_size = 512; + options.log_file_time_to_roll = 2; + options.keep_log_file_num = kFileNum; + options.db_log_dir = kTestDir; + ASSERT_OK(CreateLoggerFromOptions("/dummy/db/name", options, &logger)); + auto_roll_logger = dynamic_cast(logger.get()); + + // Roll the log 4 times, and it will trim to 3 files. + std::string dummy_large_string; + dummy_large_string.assign(options.max_log_file_size, '='); + auto_roll_logger->SetInfoLogLevel(InfoLogLevel::INFO_LEVEL); + for (size_t i = 0; i < kFileNum + 1; i++) { + // Log enough bytes to trigger at least one roll. + LogMessage(auto_roll_logger, dummy_large_string.c_str()); + LogMessage(auto_roll_logger, ""); + } + + std::vector files = GetLogFiles(); + ASSERT_EQ(kFileNum, files.size()); + for (const auto& f : files) { + ASSERT_TRUE(f.find("dummy") != std::string::npos); + } + + // Cleaning up those files. + CleanupLogFiles(); + } +} + +TEST_F(AutoRollLoggerTest, AutoDeleting) { + for (int attempt = 0; attempt < 2; attempt++) { + // In the first attemp, db_log_dir is not set, while in the + // second it is set. + std::string dbname = (attempt == 0) ? kTestDir : "/test/dummy/dir"; + std::string db_log_dir = (attempt == 0) ? "" : kTestDir; + + InitTestDb(); + const size_t kMaxFileSize = 512; + { + size_t log_num = 8; + AutoRollLogger logger(Env::Default(), dbname, db_log_dir, kMaxFileSize, 0, + log_num); + RollNTimesBySize(&logger, log_num, kMaxFileSize); + + ASSERT_EQ(log_num, GetLogFiles().size()); + } + // Shrink number of files + { + size_t log_num = 5; + AutoRollLogger logger(Env::Default(), dbname, db_log_dir, kMaxFileSize, 0, + log_num); + ASSERT_EQ(log_num, GetLogFiles().size()); + + RollNTimesBySize(&logger, 3, kMaxFileSize); + ASSERT_EQ(log_num, GetLogFiles().size()); + } + + // Increase number of files again. + { + size_t log_num = 7; + AutoRollLogger logger(Env::Default(), dbname, db_log_dir, kMaxFileSize, 0, + log_num); + ASSERT_EQ(6, GetLogFiles().size()); + + RollNTimesBySize(&logger, 3, kMaxFileSize); + ASSERT_EQ(log_num, GetLogFiles().size()); + } + + CleanupLogFiles(); + } } TEST_F(AutoRollLoggerTest, LogFlushWhileRolling) { @@ -322,7 +463,7 @@ TEST_F(AutoRollLoggerTest, InfoLogLevel) { // an extra-scope to force the AutoRollLogger to flush the log file when it // becomes out of scope. { - AutoRollLogger logger(Env::Default(), kTestDir, "", log_size, 0); + AutoRollLogger logger(Env::Default(), kTestDir, "", log_size, 0, 10); for (int log_level = InfoLogLevel::HEADER_LEVEL; log_level >= InfoLogLevel::DEBUG_LEVEL; log_level--) { logger.SetInfoLogLevel((InfoLogLevel)log_level); @@ -360,7 +501,7 @@ TEST_F(AutoRollLoggerTest, Close) { size_t log_size = 8192; size_t log_lines = 0; - AutoRollLogger logger(Env::Default(), kTestDir, "", log_size, 0); + AutoRollLogger logger(Env::Default(), kTestDir, "", log_size, 0, 10); for (int log_level = InfoLogLevel::HEADER_LEVEL; log_level >= InfoLogLevel::DEBUG_LEVEL; log_level--) { logger.SetInfoLogLevel((InfoLogLevel)log_level); @@ -446,8 +587,9 @@ TEST_F(AutoRollLoggerTest, LogHeaderTest) { InitTestDb(); - AutoRollLogger logger(Env::Default(), kTestDir, /*db_log_dir=*/ "", - LOG_MAX_SIZE, /*log_file_time_to_roll=*/ 0); + AutoRollLogger logger(Env::Default(), kTestDir, /*db_log_dir=*/"", + LOG_MAX_SIZE, /*log_file_time_to_roll=*/0, + /*keep_log_file_num=*/10); if (test_num == 0) { // Log some headers explicitly using Header() diff --git a/utilities/convenience/info_log_finder.cc b/utilities/convenience/info_log_finder.cc index 3e5999616..646362aa2 100644 --- a/utilities/convenience/info_log_finder.cc +++ b/utilities/convenience/info_log_finder.cc @@ -14,35 +14,12 @@ namespace rocksdb { Status GetInfoLogList(DB* db, std::vector* info_log_list) { - uint64_t number = 0; - FileType type; - std::string path; - if (!db) { return Status::InvalidArgument("DB pointer is not valid"); } - + std::string parent_path; const Options& options = db->GetOptions(); - if (!options.db_log_dir.empty()) { - path = options.db_log_dir; - } else { - path = db->GetName(); - } - InfoLogPrefix info_log_prefix(!options.db_log_dir.empty(), db->GetName()); - auto* env = options.env; - std::vector file_names; - Status s = env->GetChildren(path, &file_names); - - if (!s.ok()) { - return s; - } - - for (auto f : file_names) { - if (ParseFileName(f, &number, info_log_prefix.prefix, &type) && - (type == kInfoLogFile)) { - info_log_list->push_back(f); - } - } - return Status::OK(); + return GetInfoLogFiles(options.env, options.db_log_dir, db->GetName(), + &parent_path, info_log_list); } } // namespace rocksdb