Auto roll logger to enforce options.keep_log_file_num immediately after a new file is created (#5370)

Summary:
Right now, with auto roll logger, options.keep_log_file_num enforcement is triggered by events like DB reopen or full obsolete scan happens. In the mean time, the size and number of log files can grow without a limit. We put a stronger enforcement to the option, so that the number of log files can always under control.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5370

Differential Revision: D15570413

Pulled By: siying

fbshipit-source-id: 0916c3c4d42ab8fdd29389ee7fd7e1557b03176e
main
Siying Dong 5 years ago committed by Facebook Github Bot
parent 0834bbd0b1
commit cb094e13bb
  1. 1
      HISTORY.md
  2. 32
      file/filename.cc
  3. 8
      file/filename.h
  4. 103
      util/auto_roll_logger.cc
  5. 33
      util/auto_roll_logger.h
  6. 194
      util/auto_roll_logger_test.cc
  7. 29
      utilities/convenience/info_log_finder.cc

@ -4,6 +4,7 @@
* Now DB::Close() will return Aborted() error when there is unreleased snapshot. Users can retry after all snapshots are released.
* Partitions of partitioned indexes no longer affect the read amplification statistics.
* Due to a refactoring, block cache eviction statistics for indexes are temporarily broken. We plan to reintroduce them in a later phase.
* options.keep_log_file_num will be enforced strictly all the time. File names of all log files will be tracked, which may take significantly amount of memory if options.keep_log_file_num is large and either of options.max_log_file_size or options.log_file_time_to_roll is set.
### New Features
* Add an option `snap_refresh_nanos` (default to 0.1s) to periodically refresh the snapshot list in compaction jobs. Assign to 0 to disable the feature.

@ -407,4 +407,36 @@ Status SyncManifest(Env* env, const ImmutableDBOptions* db_options,
return file->Sync(db_options->use_fsync);
}
Status GetInfoLogFiles(Env* env, const std::string& db_log_dir,
const std::string& dbname, std::string* parent_dir,
std::vector<std::string>* info_log_list) {
assert(parent_dir != nullptr);
assert(info_log_list != nullptr);
uint64_t number = 0;
FileType type;
if (!db_log_dir.empty()) {
*parent_dir = db_log_dir;
} else {
*parent_dir = dbname;
}
InfoLogPrefix info_log_prefix(!db_log_dir.empty(), dbname);
std::vector<std::string> file_names;
Status s = env->GetChildren(*parent_dir, &file_names);
if (!s.ok()) {
return s;
}
for (auto& f : file_names) {
if (ParseFileName(f, &number, info_log_prefix.prefix, &type) &&
(type == kInfoLogFile)) {
info_log_list->push_back(f);
}
}
return Status::OK();
}
} // namespace rocksdb

@ -169,4 +169,12 @@ extern Status SetIdentityFile(Env* env, const std::string& dbname);
extern Status SyncManifest(Env* env, const ImmutableDBOptions* db_options,
WritableFileWriter* file);
// Return list of file names of info logs in `file_names`.
// The list only contains file name. The parent directory name is stored
// in `parent_dir`.
// `db_log_dir` should be the one as in options.db_log_dir
extern Status GetInfoLogFiles(Env* env, const std::string& db_log_dir,
const std::string& dbname,
std::string* parent_dir,
std::vector<std::string>* file_names);
} // namespace rocksdb

@ -4,12 +4,53 @@
// (found in the LICENSE.Apache file in the root directory).
//
#include "util/auto_roll_logger.h"
#include <algorithm>
#include "file/filename.h"
#include "util/logging.h"
#include "util/mutexlock.h"
namespace rocksdb {
#ifndef ROCKSDB_LITE
// -- AutoRollLogger
AutoRollLogger::AutoRollLogger(Env* env, const std::string& dbname,
const std::string& db_log_dir,
size_t log_max_size,
size_t log_file_time_to_roll,
size_t keep_log_file_num,
const InfoLogLevel log_level)
: Logger(log_level),
dbname_(dbname),
db_log_dir_(db_log_dir),
env_(env),
status_(Status::OK()),
kMaxLogFileSize(log_max_size),
kLogFileTimeToRoll(log_file_time_to_roll),
kKeepLogFileNum(keep_log_file_num),
cached_now(static_cast<uint64_t>(env_->NowMicros() * 1e-6)),
ctime_(cached_now),
cached_now_access_count(0),
call_NowMicros_every_N_records_(100),
mutex_() {
Status s = env->GetAbsolutePath(dbname, &db_absolute_path_);
if (s.IsNotSupported()) {
db_absolute_path_ = dbname;
} else {
status_ = s;
}
log_fname_ = InfoLogFileName(dbname_, db_absolute_path_, db_log_dir_);
if (env_->FileExists(log_fname_).ok()) {
RollLogFile();
}
GetExistingFiles();
ResetLogger();
s = TrimOldLogFiles();
if (!status_.ok()) {
status_ = s;
}
}
Status AutoRollLogger::ResetLogger() {
TEST_SYNC_POINT("AutoRollLogger::ResetLogger:BeforeNewLogger");
status_ = env_->NewLogger(log_fname_, &logger_);
@ -44,6 +85,58 @@ void AutoRollLogger::RollLogFile() {
now++;
} while (env_->FileExists(old_fname).ok());
env_->RenameFile(log_fname_, old_fname);
old_log_files_.push(old_fname);
}
void AutoRollLogger::GetExistingFiles() {
{
// Empty the queue to avoid duplicated entries in the queue.
std::queue<std::string> empty;
std::swap(old_log_files_, empty);
}
std::string parent_dir;
std::vector<std::string> info_log_files;
Status s =
GetInfoLogFiles(env_, db_log_dir_, dbname_, &parent_dir, &info_log_files);
if (status_.ok()) {
status_ = s;
}
// We need to sort the file before enqueing it so that when we
// delete file from the front, it is the oldest file.
std::sort(info_log_files.begin(), info_log_files.end());
for (const std::string& f : info_log_files) {
old_log_files_.push(parent_dir + "/" + f);
}
}
Status AutoRollLogger::TrimOldLogFiles() {
// Here we directly list info files and delete them through Env.
// The deletion isn't going through DB, so there are shortcomes:
// 1. the deletion is not rate limited by SstFileManager
// 2. there is a chance that an I/O will be issued here
// Since it's going to be complicated to pass DB object down to
// here, we take a simple approach to keep the code easier to
// maintain.
// old_log_files_.empty() is helpful for the corner case that
// kKeepLogFileNum == 0. We can instead check kKeepLogFileNum != 0 but
// it's essentially the same thing, and checking empty before accessing
// the queue feels safer.
while (!old_log_files_.empty() && old_log_files_.size() >= kKeepLogFileNum) {
Status s = env_->DeleteFile(old_log_files_.front());
// Remove the file from the tracking anyway. It's possible that
// DB cleaned up the old log file, or people cleaned it up manually.
old_log_files_.pop();
// To make the file really go away, we should sync parent directory.
// Since there isn't any consistency issue involved here, skipping
// this part to avoid one I/O here.
if (!s.ok()) {
return s;
}
}
return Status::OK();
}
std::string AutoRollLogger::ValistToString(const char* format,
@ -78,12 +171,19 @@ void AutoRollLogger::Logv(const char* format, va_list ap) {
(kMaxLogFileSize > 0 && logger_->GetLogFileSize() >= kMaxLogFileSize)) {
RollLogFile();
Status s = ResetLogger();
Status s2 = TrimOldLogFiles();
if (!s.ok()) {
// can't really log the error if creating a new LOG file failed
return;
}
WriteHeaderInfo();
if (!s2.ok()) {
ROCKS_LOG_WARN(logger.get(), "Fail to trim old info log file: %s",
s2.ToString().c_str());
}
}
// pin down the current logger_ instance before releasing the mutex.
@ -153,7 +253,8 @@ Status CreateLoggerFromOptions(const std::string& dbname,
if (options.log_file_time_to_roll > 0 || options.max_log_file_size > 0) {
AutoRollLogger* result = new AutoRollLogger(
env, dbname, options.db_log_dir, options.max_log_file_size,
options.log_file_time_to_roll, options.info_log_level);
options.log_file_time_to_roll, options.keep_log_file_num,
options.info_log_level);
Status s = result->GetStatus();
if (!s.ok()) {
delete result;

@ -8,6 +8,7 @@
#pragma once
#include <list>
#include <queue>
#include <string>
#include "file/filename.h"
@ -24,25 +25,8 @@ class AutoRollLogger : public Logger {
public:
AutoRollLogger(Env* env, const std::string& dbname,
const std::string& db_log_dir, size_t log_max_size,
size_t log_file_time_to_roll,
const InfoLogLevel log_level = InfoLogLevel::INFO_LEVEL)
: Logger(log_level),
dbname_(dbname),
db_log_dir_(db_log_dir),
env_(env),
status_(Status::OK()),
kMaxLogFileSize(log_max_size),
kLogFileTimeToRoll(log_file_time_to_roll),
cached_now(static_cast<uint64_t>(env_->NowMicros() * 1e-6)),
ctime_(cached_now),
cached_now_access_count(0),
call_NowMicros_every_N_records_(100),
mutex_() {
env->GetAbsolutePath(dbname, &db_absolute_path_);
log_fname_ = InfoLogFileName(dbname_, db_absolute_path_, db_log_dir_);
RollLogFile();
ResetLogger();
}
size_t log_file_time_to_roll, size_t keep_log_file_num,
const InfoLogLevel log_level = InfoLogLevel::INFO_LEVEL);
using Logger::Logv;
void Logv(const char* format, va_list ap) override;
@ -110,6 +94,11 @@ class AutoRollLogger : public Logger {
bool LogExpired();
Status ResetLogger();
void RollLogFile();
// Read all names of old log files into old_log_files_
// If there is any error, put the error code in status_
void GetExistingFiles();
// Delete old log files if it excceeds the limit.
Status TrimOldLogFiles();
// Log message to logger without rolling
void LogInternal(const char* format, ...);
// Serialize the va_list to a string
@ -126,8 +115,14 @@ class AutoRollLogger : public Logger {
Status status_;
const size_t kMaxLogFileSize;
const size_t kLogFileTimeToRoll;
const size_t kKeepLogFileNum;
// header information
std::list<std::string> headers_;
// List of all existing info log files. Used for enforcing number of
// info log files.
// Full path is stored here. It consumes signifianctly more memory
// than only storing file name. Can optimize if it causes a problem.
std::queue<std::string> old_log_files_;
// to avoid frequent env->NowMicros() calls, we cached the current time
uint64_t cached_now;
uint64_t ctime_;

@ -41,6 +41,21 @@ class NoSleepEnv : public EnvWrapper {
};
} // namespace
// In this test we only want to Log some simple log message with
// no format. LogMessage() provides such a simple interface and
// avoids the [format-security] warning which occurs when you
// call ROCKS_LOG_INFO(logger, log_message) directly.
namespace {
void LogMessage(Logger* logger, const char* message) {
ROCKS_LOG_INFO(logger, "%s", message);
}
void LogMessage(const InfoLogLevel log_level, Logger* logger,
const char* message) {
Log(log_level, logger, "%s", message);
}
} // namespace
class AutoRollLoggerTest : public testing::Test {
public:
static void InitTestDb() {
@ -62,6 +77,41 @@ class AutoRollLoggerTest : public testing::Test {
const std::string& log_message);
void RollLogFileByTimeTest(Env*, AutoRollLogger* logger, size_t time,
const std::string& log_message);
// return list of files under kTestDir that contains "LOG"
std::vector<std::string> GetLogFiles() {
std::vector<std::string> ret;
std::vector<std::string> files;
Status s = default_env->GetChildren(kTestDir, &files);
// Should call ASSERT_OK() here but it doesn't compile. It's not
// worth the time figuring out why.
EXPECT_TRUE(s.ok());
for (const auto& f : files) {
if (f.find("LOG") != std::string::npos) {
ret.push_back(f);
}
}
return ret;
}
// Delete all log files under kTestDir
void CleanupLogFiles() {
for (const std::string& f : GetLogFiles()) {
ASSERT_OK(default_env->DeleteFile(kTestDir + "/" + f));
}
}
void RollNTimesBySize(Logger* auto_roll_logger, size_t file_num,
size_t max_log_file_size) {
// Roll the log 4 times, and it will trim to 3 files.
std::string dummy_large_string;
dummy_large_string.assign(max_log_file_size, '=');
auto_roll_logger->SetInfoLogLevel(InfoLogLevel::INFO_LEVEL);
for (size_t i = 0; i < file_num + 1; i++) {
// Log enough bytes to trigger at least one roll.
LogMessage(auto_roll_logger, dummy_large_string.c_str());
LogMessage(auto_roll_logger, "");
}
}
static const std::string kSampleMessage;
static const std::string kTestDir;
@ -77,21 +127,6 @@ const std::string AutoRollLoggerTest::kLogFile(
test::PerThreadDBPath("db_log_test") + "/LOG");
Env* AutoRollLoggerTest::default_env = Env::Default();
// In this test we only want to Log some simple log message with
// no format. LogMessage() provides such a simple interface and
// avoids the [format-security] warning which occurs when you
// call ROCKS_LOG_INFO(logger, log_message) directly.
namespace {
void LogMessage(Logger* logger, const char* message) {
ROCKS_LOG_INFO(logger, "%s", message);
}
void LogMessage(const InfoLogLevel log_level, Logger* logger,
const char* message) {
Log(log_level, logger, "%s", message);
}
} // namespace
void AutoRollLoggerTest::RollLogFileBySizeTest(AutoRollLogger* logger,
size_t log_max_size,
const std::string& log_message) {
@ -159,8 +194,10 @@ void AutoRollLoggerTest::RollLogFileByTimeTest(Env* env, AutoRollLogger* logger,
TEST_F(AutoRollLoggerTest, RollLogFileBySize) {
InitTestDb();
size_t log_max_size = 1024 * 5;
size_t keep_log_file_num = 10;
AutoRollLogger logger(Env::Default(), kTestDir, "", log_max_size, 0);
AutoRollLogger logger(Env::Default(), kTestDir, "", log_max_size, 0,
keep_log_file_num);
RollLogFileBySizeTest(&logger, log_max_size,
kSampleMessage + ":RollLogFileBySize");
@ -171,11 +208,12 @@ TEST_F(AutoRollLoggerTest, RollLogFileByTime) {
size_t time = 2;
size_t log_size = 1024 * 5;
size_t keep_log_file_num = 10;
InitTestDb();
// -- Test the existence of file during the server restart.
ASSERT_EQ(Status::NotFound(), default_env->FileExists(kLogFile));
AutoRollLogger logger(&nse, kTestDir, "", log_size, time);
AutoRollLogger logger(&nse, kTestDir, "", log_size, time, keep_log_file_num);
ASSERT_OK(default_env->FileExists(kLogFile));
RollLogFileByTimeTest(&nse, &logger, time,
@ -192,28 +230,30 @@ TEST_F(AutoRollLoggerTest, OpenLogFilesMultipleTimesWithOptionLog_max_size) {
// treated as "singed".
size_t kZero = 0;
size_t log_size = 1024;
size_t keep_log_file_num = 10;
AutoRollLogger* logger = new AutoRollLogger(
Env::Default(), kTestDir, "", log_size, 0);
AutoRollLogger* logger = new AutoRollLogger(Env::Default(), kTestDir, "",
log_size, 0, keep_log_file_num);
LogMessage(logger, kSampleMessage.c_str());
ASSERT_GT(logger->GetLogFileSize(), kZero);
delete logger;
// reopens the log file and an empty log file will be created.
logger = new AutoRollLogger(
Env::Default(), kTestDir, "", log_size, 0);
logger = new AutoRollLogger(Env::Default(), kTestDir, "", log_size, 0, 10);
ASSERT_EQ(logger->GetLogFileSize(), kZero);
delete logger;
}
TEST_F(AutoRollLoggerTest, CompositeRollByTimeAndSizeLogger) {
size_t time = 2, log_max_size = 1024 * 5;
size_t keep_log_file_num = 10;
InitTestDb();
NoSleepEnv nse(Env::Default());
AutoRollLogger logger(&nse, kTestDir, "", log_max_size, time);
AutoRollLogger logger(&nse, kTestDir, "", log_max_size, time,
keep_log_file_num);
// Test the ability to roll by size
RollLogFileBySizeTest(&logger, log_max_size,
@ -269,6 +309,107 @@ TEST_F(AutoRollLoggerTest, CreateLoggerFromOptions) {
kSampleMessage + ":CreateLoggerFromOptions - both");
RollLogFileByTimeTest(&nse, auto_roll_logger, options.log_file_time_to_roll,
kSampleMessage + ":CreateLoggerFromOptions - both");
// Set keep_log_file_num
{
const size_t kFileNum = 3;
InitTestDb();
options.max_log_file_size = 512;
options.log_file_time_to_roll = 2;
options.keep_log_file_num = kFileNum;
ASSERT_OK(CreateLoggerFromOptions(kTestDir, options, &logger));
auto_roll_logger = dynamic_cast<AutoRollLogger*>(logger.get());
// Roll the log 4 times, and it will trim to 3 files.
std::string dummy_large_string;
dummy_large_string.assign(options.max_log_file_size, '=');
auto_roll_logger->SetInfoLogLevel(InfoLogLevel::INFO_LEVEL);
for (size_t i = 0; i < kFileNum + 1; i++) {
// Log enough bytes to trigger at least one roll.
LogMessage(auto_roll_logger, dummy_large_string.c_str());
LogMessage(auto_roll_logger, "");
}
std::vector<std::string> files = GetLogFiles();
ASSERT_EQ(kFileNum, files.size());
CleanupLogFiles();
}
// Set keep_log_file_num and dbname is different from
// db_log_dir.
{
const size_t kFileNum = 3;
InitTestDb();
options.max_log_file_size = 512;
options.log_file_time_to_roll = 2;
options.keep_log_file_num = kFileNum;
options.db_log_dir = kTestDir;
ASSERT_OK(CreateLoggerFromOptions("/dummy/db/name", options, &logger));
auto_roll_logger = dynamic_cast<AutoRollLogger*>(logger.get());
// Roll the log 4 times, and it will trim to 3 files.
std::string dummy_large_string;
dummy_large_string.assign(options.max_log_file_size, '=');
auto_roll_logger->SetInfoLogLevel(InfoLogLevel::INFO_LEVEL);
for (size_t i = 0; i < kFileNum + 1; i++) {
// Log enough bytes to trigger at least one roll.
LogMessage(auto_roll_logger, dummy_large_string.c_str());
LogMessage(auto_roll_logger, "");
}
std::vector<std::string> files = GetLogFiles();
ASSERT_EQ(kFileNum, files.size());
for (const auto& f : files) {
ASSERT_TRUE(f.find("dummy") != std::string::npos);
}
// Cleaning up those files.
CleanupLogFiles();
}
}
TEST_F(AutoRollLoggerTest, AutoDeleting) {
for (int attempt = 0; attempt < 2; attempt++) {
// In the first attemp, db_log_dir is not set, while in the
// second it is set.
std::string dbname = (attempt == 0) ? kTestDir : "/test/dummy/dir";
std::string db_log_dir = (attempt == 0) ? "" : kTestDir;
InitTestDb();
const size_t kMaxFileSize = 512;
{
size_t log_num = 8;
AutoRollLogger logger(Env::Default(), dbname, db_log_dir, kMaxFileSize, 0,
log_num);
RollNTimesBySize(&logger, log_num, kMaxFileSize);
ASSERT_EQ(log_num, GetLogFiles().size());
}
// Shrink number of files
{
size_t log_num = 5;
AutoRollLogger logger(Env::Default(), dbname, db_log_dir, kMaxFileSize, 0,
log_num);
ASSERT_EQ(log_num, GetLogFiles().size());
RollNTimesBySize(&logger, 3, kMaxFileSize);
ASSERT_EQ(log_num, GetLogFiles().size());
}
// Increase number of files again.
{
size_t log_num = 7;
AutoRollLogger logger(Env::Default(), dbname, db_log_dir, kMaxFileSize, 0,
log_num);
ASSERT_EQ(6, GetLogFiles().size());
RollNTimesBySize(&logger, 3, kMaxFileSize);
ASSERT_EQ(log_num, GetLogFiles().size());
}
CleanupLogFiles();
}
}
TEST_F(AutoRollLoggerTest, LogFlushWhileRolling) {
@ -322,7 +463,7 @@ TEST_F(AutoRollLoggerTest, InfoLogLevel) {
// an extra-scope to force the AutoRollLogger to flush the log file when it
// becomes out of scope.
{
AutoRollLogger logger(Env::Default(), kTestDir, "", log_size, 0);
AutoRollLogger logger(Env::Default(), kTestDir, "", log_size, 0, 10);
for (int log_level = InfoLogLevel::HEADER_LEVEL;
log_level >= InfoLogLevel::DEBUG_LEVEL; log_level--) {
logger.SetInfoLogLevel((InfoLogLevel)log_level);
@ -360,7 +501,7 @@ TEST_F(AutoRollLoggerTest, Close) {
size_t log_size = 8192;
size_t log_lines = 0;
AutoRollLogger logger(Env::Default(), kTestDir, "", log_size, 0);
AutoRollLogger logger(Env::Default(), kTestDir, "", log_size, 0, 10);
for (int log_level = InfoLogLevel::HEADER_LEVEL;
log_level >= InfoLogLevel::DEBUG_LEVEL; log_level--) {
logger.SetInfoLogLevel((InfoLogLevel)log_level);
@ -446,8 +587,9 @@ TEST_F(AutoRollLoggerTest, LogHeaderTest) {
InitTestDb();
AutoRollLogger logger(Env::Default(), kTestDir, /*db_log_dir=*/ "",
LOG_MAX_SIZE, /*log_file_time_to_roll=*/ 0);
AutoRollLogger logger(Env::Default(), kTestDir, /*db_log_dir=*/"",
LOG_MAX_SIZE, /*log_file_time_to_roll=*/0,
/*keep_log_file_num=*/10);
if (test_num == 0) {
// Log some headers explicitly using Header()

@ -14,35 +14,12 @@
namespace rocksdb {
Status GetInfoLogList(DB* db, std::vector<std::string>* info_log_list) {
uint64_t number = 0;
FileType type;
std::string path;
if (!db) {
return Status::InvalidArgument("DB pointer is not valid");
}
std::string parent_path;
const Options& options = db->GetOptions();
if (!options.db_log_dir.empty()) {
path = options.db_log_dir;
} else {
path = db->GetName();
}
InfoLogPrefix info_log_prefix(!options.db_log_dir.empty(), db->GetName());
auto* env = options.env;
std::vector<std::string> file_names;
Status s = env->GetChildren(path, &file_names);
if (!s.ok()) {
return s;
}
for (auto f : file_names) {
if (ParseFileName(f, &number, info_log_prefix.prefix, &type) &&
(type == kInfoLogFile)) {
info_log_list->push_back(f);
}
}
return Status::OK();
return GetInfoLogFiles(options.env, options.db_log_dir, db->GetName(),
&parent_path, info_log_list);
}
} // namespace rocksdb

Loading…
Cancel
Save