Reduce `env_->GetChildren()` calls in DBImpl::Recover() (#7044)

Summary:
There currently exist multiple `GetChildren()` calls in `DBImpl::Recover()`, which can be expensive in cases of distributed file systems.
This pull request try to call `DBImpl::Recover()` of each necessary directory only _once_ and reuse the results in the places of repeated calls in current code.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/7044

Test Plan:
Run `make check` and use the default test suite. The modified code should be semantically identical to the current code. As a proof of this solution, we may optionally deploy the system onto a (real or simulated) distributed system and expect reduced latency caused by manifest fetching.

(WIP)

Reviewed By: riversand963

Differential Revision: D22419925

Pulled By: roghnin

fbshipit-source-id: d3774fbfbc246c5527101bc16747eb5c90919886
main
wenh 4 years ago committed by Facebook GitHub Bot
parent a9a973869a
commit 4924a506b9
  1. 4
      db/db_impl/db_impl.h
  2. 60
      db/db_impl/db_impl_open.cc
  3. 2
      db/repair.cc
  4. 38
      db/version_set.cc
  5. 5
      db/version_set.h

@ -852,8 +852,8 @@ class DBImpl : public DB {
InstrumentedMutex* mutex() const { return &mutex_; } InstrumentedMutex* mutex() const { return &mutex_; }
// Initialize a brand new DB. The DB directory is expected to be empty before // Initialize a brand new DB. The DB directory is expected to be empty before
// calling it. // calling it. Push new manifest file name into `new_filenames`.
Status NewDB(); Status NewDB(std::vector<std::string>* new_filenames);
// This is to be used only by internal rocksdb classes. // This is to be used only by internal rocksdb classes.
static Status Open(const DBOptions& db_options, const std::string& name, static Status Open(const DBOptions& db_options, const std::string& name,

@ -253,7 +253,7 @@ Status DBImpl::ValidateOptions(const DBOptions& db_options) {
return Status::OK(); return Status::OK();
} }
Status DBImpl::NewDB() { Status DBImpl::NewDB(std::vector<std::string>* new_filenames) {
VersionEdit new_db; VersionEdit new_db;
Status s = SetIdentityFile(env_, dbname_); Status s = SetIdentityFile(env_, dbname_);
if (!s.ok()) { if (!s.ok()) {
@ -293,6 +293,10 @@ Status DBImpl::NewDB() {
if (s.ok()) { if (s.ok()) {
// Make "CURRENT" file that points to the new manifest file. // Make "CURRENT" file that points to the new manifest file.
s = SetCurrentFile(fs_.get(), dbname_, 1, directories_.GetDbDir()); s = SetCurrentFile(fs_.get(), dbname_, 1, directories_.GetDbDir());
if (new_filenames) {
new_filenames->emplace_back(
manifest.substr(manifest.find_last_of("/\\") + 1));
}
} else { } else {
fs_->DeleteFile(manifest, IOOptions(), nullptr); fs_->DeleteFile(manifest, IOOptions(), nullptr);
} }
@ -356,6 +360,7 @@ Status DBImpl::Recover(
bool is_new_db = false; bool is_new_db = false;
assert(db_lock_ == nullptr); assert(db_lock_ == nullptr);
std::vector<std::string> files_in_dbname;
if (!read_only) { if (!read_only) {
Status s = directories_.SetDirectories(fs_.get(), dbname_, Status s = directories_.SetDirectories(fs_.get(), dbname_,
immutable_db_options_.wal_dir, immutable_db_options_.wal_dir,
@ -379,10 +384,12 @@ Status DBImpl::Recover(
s = env_->FileExists(current_fname); s = env_->FileExists(current_fname);
} else { } else {
s = Status::NotFound(); s = Status::NotFound();
std::vector<std::string> files; Status io_s = env_->GetChildren(dbname_, &files_in_dbname);
// No need to check return value if (!io_s.ok()) {
env_->GetChildren(dbname_, &files); s = io_s;
for (const std::string& file : files) { files_in_dbname.clear();
}
for (const std::string& file : files_in_dbname) {
uint64_t number = 0; uint64_t number = 0;
FileType type = kLogFile; // initialize FileType type = kLogFile; // initialize
if (ParseFileName(file, &number, &type) && type == kDescriptorFile) { if (ParseFileName(file, &number, &type) && type == kDescriptorFile) {
@ -396,7 +403,7 @@ Status DBImpl::Recover(
} }
if (s.IsNotFound()) { if (s.IsNotFound()) {
if (immutable_db_options_.create_if_missing) { if (immutable_db_options_.create_if_missing) {
s = NewDB(); s = NewDB(&files_in_dbname);
is_new_db = true; is_new_db = true;
if (!s.ok()) { if (!s.ok()) {
return s; return s;
@ -438,6 +445,16 @@ Status DBImpl::Recover(
} }
} }
} }
} else if (immutable_db_options_.best_efforts_recovery) {
assert(files_in_dbname.empty());
Status s = env_->GetChildren(dbname_, &files_in_dbname);
if (s.IsNotFound()) {
return Status::InvalidArgument(dbname_,
"does not exist (open for read only)");
} else if (s.IsIOError()) {
return s;
}
assert(s.ok());
} }
assert(db_id_.empty()); assert(db_id_.empty());
Status s; Status s;
@ -445,8 +462,9 @@ Status DBImpl::Recover(
if (!immutable_db_options_.best_efforts_recovery) { if (!immutable_db_options_.best_efforts_recovery) {
s = versions_->Recover(column_families, read_only, &db_id_); s = versions_->Recover(column_families, read_only, &db_id_);
} else { } else {
s = versions_->TryRecover(column_families, read_only, &db_id_, assert(!files_in_dbname.empty());
&missing_table_file); s = versions_->TryRecover(column_families, read_only, files_in_dbname,
&db_id_, &missing_table_file);
if (s.ok()) { if (s.ok()) {
// TryRecover may delete previous column_family_set_. // TryRecover may delete previous column_family_set_.
column_family_memtables_.reset( column_family_memtables_.reset(
@ -506,6 +524,7 @@ Status DBImpl::Recover(
s = InitPersistStatsColumnFamily(); s = InitPersistStatsColumnFamily();
} }
std::vector<std::string> files_in_wal_dir;
if (s.ok()) { if (s.ok()) {
// Initial max_total_in_memory_state_ before recovery logs. Log recovery // Initial max_total_in_memory_state_ before recovery logs. Log recovery
// may check this value to decide whether to flush. // may check this value to decide whether to flush.
@ -532,9 +551,8 @@ Status DBImpl::Recover(
// Note that prev_log_number() is no longer used, but we pay // Note that prev_log_number() is no longer used, but we pay
// attention to it in case we are recovering a database // attention to it in case we are recovering a database
// produced by an older version of rocksdb. // produced by an older version of rocksdb.
std::vector<std::string> filenames;
if (!immutable_db_options_.best_efforts_recovery) { if (!immutable_db_options_.best_efforts_recovery) {
s = env_->GetChildren(immutable_db_options_.wal_dir, &filenames); s = env_->GetChildren(immutable_db_options_.wal_dir, &files_in_wal_dir);
} }
if (s.IsNotFound()) { if (s.IsNotFound()) {
return Status::InvalidArgument("wal_dir not found", return Status::InvalidArgument("wal_dir not found",
@ -544,15 +562,15 @@ Status DBImpl::Recover(
} }
std::vector<uint64_t> logs; std::vector<uint64_t> logs;
for (size_t i = 0; i < filenames.size(); i++) { for (const auto& file : files_in_wal_dir) {
uint64_t number; uint64_t number;
FileType type; FileType type;
if (ParseFileName(filenames[i], &number, &type) && type == kLogFile) { if (ParseFileName(file, &number, &type) && type == kLogFile) {
if (is_new_db) { if (is_new_db) {
return Status::Corruption( return Status::Corruption(
"While creating a new Db, wal_dir contains " "While creating a new Db, wal_dir contains "
"existing log file: ", "existing log file: ",
filenames[i]); file);
} else { } else {
logs.push_back(number); logs.push_back(number);
} }
@ -604,15 +622,24 @@ Status DBImpl::Recover(
// to reflect the most recent OPTIONS file. It does not matter for regular // to reflect the most recent OPTIONS file. It does not matter for regular
// read-write db instance because options_file_number_ will later be // read-write db instance because options_file_number_ will later be
// updated to versions_->NewFileNumber() in RenameTempFileToOptionsFile. // updated to versions_->NewFileNumber() in RenameTempFileToOptionsFile.
std::vector<std::string> file_names; std::vector<std::string> filenames;
if (s.ok()) { if (s.ok()) {
s = env_->GetChildren(GetName(), &file_names); const std::string normalized_dbname = NormalizePath(dbname_);
const std::string normalized_wal_dir =
NormalizePath(immutable_db_options_.wal_dir);
if (immutable_db_options_.best_efforts_recovery) {
filenames = std::move(files_in_dbname);
} else if (normalized_dbname == normalized_wal_dir) {
filenames = std::move(files_in_wal_dir);
} else {
s = env_->GetChildren(GetName(), &filenames);
}
} }
if (s.ok()) { if (s.ok()) {
uint64_t number = 0; uint64_t number = 0;
uint64_t options_file_number = 0; uint64_t options_file_number = 0;
FileType type; FileType type;
for (const auto& fname : file_names) { for (const auto& fname : filenames) {
if (ParseFileName(fname, &number, &type) && type == kOptionsFile) { if (ParseFileName(fname, &number, &type) && type == kOptionsFile) {
options_file_number = std::max(number, options_file_number); options_file_number = std::max(number, options_file_number);
} }
@ -620,7 +647,6 @@ Status DBImpl::Recover(
versions_->options_file_number_ = options_file_number; versions_->options_file_number_ = options_file_number;
} }
} }
return s; return s;
} }

@ -185,7 +185,7 @@ class Repairer {
DBImpl* db_impl = new DBImpl(db_options_, dbname_); DBImpl* db_impl = new DBImpl(db_options_, dbname_);
// Also use this temp DBImpl to get a session id // Also use this temp DBImpl to get a session id
db_impl->GetDbSessionId(db_session_id_); db_impl->GetDbSessionId(db_session_id_);
status = db_impl->NewDB(); status = db_impl->NewDB(/*new_filenames=*/nullptr);
delete db_impl; delete db_impl;
} }

@ -4730,34 +4730,25 @@ Status VersionSet::Recover(
namespace { namespace {
class ManifestPicker { class ManifestPicker {
public: public:
explicit ManifestPicker(const std::string& dbname, FileSystem* fs); explicit ManifestPicker(const std::string& dbname,
void SeekToFirstManifest(); const std::vector<std::string>& files_in_dbname);
// REQUIRES Valid() == true // REQUIRES Valid() == true
std::string GetNextManifest(uint64_t* file_number, std::string* file_name); std::string GetNextManifest(uint64_t* file_number, std::string* file_name);
bool Valid() const { return manifest_file_iter_ != manifest_files_.end(); } bool Valid() const { return manifest_file_iter_ != manifest_files_.end(); }
const Status& status() const { return status_; }
private: private:
const std::string& dbname_; const std::string& dbname_;
FileSystem* const fs_;
// MANIFEST file names(s) // MANIFEST file names(s)
std::vector<std::string> manifest_files_; std::vector<std::string> manifest_files_;
std::vector<std::string>::const_iterator manifest_file_iter_; std::vector<std::string>::const_iterator manifest_file_iter_;
Status status_;
}; };
ManifestPicker::ManifestPicker(const std::string& dbname, FileSystem* fs) ManifestPicker::ManifestPicker(const std::string& dbname,
: dbname_(dbname), fs_(fs) {} const std::vector<std::string>& files_in_dbname)
: dbname_(dbname) {
void ManifestPicker::SeekToFirstManifest() { // populate manifest files
assert(fs_ != nullptr); assert(!files_in_dbname.empty());
std::vector<std::string> children; for (const auto& fname : files_in_dbname) {
Status s = fs_->GetChildren(dbname_, IOOptions(), &children, /*dbg=*/nullptr);
if (!s.ok()) {
status_ = s;
return;
}
for (const auto& fname : children) {
uint64_t file_num = 0; uint64_t file_num = 0;
FileType file_type; FileType file_type;
bool parse_ok = ParseFileName(fname, &file_num, &file_type); bool parse_ok = ParseFileName(fname, &file_num, &file_type);
@ -4765,6 +4756,7 @@ void ManifestPicker::SeekToFirstManifest() {
manifest_files_.push_back(fname); manifest_files_.push_back(fname);
} }
} }
// seek to first manifest
std::sort(manifest_files_.begin(), manifest_files_.end(), std::sort(manifest_files_.begin(), manifest_files_.end(),
[](const std::string& lhs, const std::string& rhs) { [](const std::string& lhs, const std::string& rhs) {
uint64_t num1 = 0; uint64_t num1 = 0;
@ -4787,7 +4779,6 @@ void ManifestPicker::SeekToFirstManifest() {
std::string ManifestPicker::GetNextManifest(uint64_t* number, std::string ManifestPicker::GetNextManifest(uint64_t* number,
std::string* file_name) { std::string* file_name) {
assert(status_.ok());
assert(Valid()); assert(Valid());
std::string ret; std::string ret;
if (manifest_file_iter_ != manifest_files_.end()) { if (manifest_file_iter_ != manifest_files_.end()) {
@ -4817,16 +4808,13 @@ std::string ManifestPicker::GetNextManifest(uint64_t* number,
Status VersionSet::TryRecover( Status VersionSet::TryRecover(
const std::vector<ColumnFamilyDescriptor>& column_families, bool read_only, const std::vector<ColumnFamilyDescriptor>& column_families, bool read_only,
std::string* db_id, bool* has_missing_table_file) { const std::vector<std::string>& files_in_dbname, std::string* db_id,
ManifestPicker manifest_picker(dbname_, fs_); bool* has_missing_table_file) {
manifest_picker.SeekToFirstManifest(); ManifestPicker manifest_picker(dbname_, files_in_dbname);
Status s = manifest_picker.status();
if (!s.ok()) {
return s;
}
if (!manifest_picker.Valid()) { if (!manifest_picker.Valid()) {
return Status::Corruption("Cannot locate MANIFEST file in " + dbname_); return Status::Corruption("Cannot locate MANIFEST file in " + dbname_);
} }
Status s;
std::string manifest_path = std::string manifest_path =
manifest_picker.GetNextManifest(&manifest_file_number_, nullptr); manifest_picker.GetNextManifest(&manifest_file_number_, nullptr);
while (!manifest_path.empty()) { while (!manifest_path.empty()) {

@ -962,8 +962,9 @@ class VersionSet {
bool read_only = false, std::string* db_id = nullptr); bool read_only = false, std::string* db_id = nullptr);
Status TryRecover(const std::vector<ColumnFamilyDescriptor>& column_families, Status TryRecover(const std::vector<ColumnFamilyDescriptor>& column_families,
bool read_only, std::string* db_id, bool read_only,
bool* has_missing_table_file); const std::vector<std::string>& files_in_dbname,
std::string* db_id, bool* has_missing_table_file);
// Try to recover the version set to the most recent consistent state // Try to recover the version set to the most recent consistent state
// recorded in the specified manifest. // recorded in the specified manifest.

Loading…
Cancel
Save