Reduce `env_->GetChildren()` calls in DBImpl::Recover() (#7044)

Summary:
There currently exist multiple `GetChildren()` calls in `DBImpl::Recover()`, which can be expensive in cases of distributed file systems.
This pull request try to call `DBImpl::Recover()` of each necessary directory only _once_ and reuse the results in the places of repeated calls in current code.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/7044

Test Plan:
Run `make check` and use the default test suite. The modified code should be semantically identical to the current code. As a proof of this solution, we may optionally deploy the system onto a (real or simulated) distributed system and expect reduced latency caused by manifest fetching.

(WIP)

Reviewed By: riversand963

Differential Revision: D22419925

Pulled By: roghnin

fbshipit-source-id: d3774fbfbc246c5527101bc16747eb5c90919886
main
wenh 4 years ago committed by Facebook GitHub Bot
parent a9a973869a
commit 4924a506b9
  1. 4
      db/db_impl/db_impl.h
  2. 60
      db/db_impl/db_impl_open.cc
  3. 2
      db/repair.cc
  4. 38
      db/version_set.cc
  5. 5
      db/version_set.h

@ -852,8 +852,8 @@ class DBImpl : public DB {
InstrumentedMutex* mutex() const { return &mutex_; }
// Initialize a brand new DB. The DB directory is expected to be empty before
// calling it.
Status NewDB();
// calling it. Push new manifest file name into `new_filenames`.
Status NewDB(std::vector<std::string>* new_filenames);
// This is to be used only by internal rocksdb classes.
static Status Open(const DBOptions& db_options, const std::string& name,

@ -253,7 +253,7 @@ Status DBImpl::ValidateOptions(const DBOptions& db_options) {
return Status::OK();
}
Status DBImpl::NewDB() {
Status DBImpl::NewDB(std::vector<std::string>* new_filenames) {
VersionEdit new_db;
Status s = SetIdentityFile(env_, dbname_);
if (!s.ok()) {
@ -293,6 +293,10 @@ Status DBImpl::NewDB() {
if (s.ok()) {
// Make "CURRENT" file that points to the new manifest file.
s = SetCurrentFile(fs_.get(), dbname_, 1, directories_.GetDbDir());
if (new_filenames) {
new_filenames->emplace_back(
manifest.substr(manifest.find_last_of("/\\") + 1));
}
} else {
fs_->DeleteFile(manifest, IOOptions(), nullptr);
}
@ -356,6 +360,7 @@ Status DBImpl::Recover(
bool is_new_db = false;
assert(db_lock_ == nullptr);
std::vector<std::string> files_in_dbname;
if (!read_only) {
Status s = directories_.SetDirectories(fs_.get(), dbname_,
immutable_db_options_.wal_dir,
@ -379,10 +384,12 @@ Status DBImpl::Recover(
s = env_->FileExists(current_fname);
} else {
s = Status::NotFound();
std::vector<std::string> files;
// No need to check return value
env_->GetChildren(dbname_, &files);
for (const std::string& file : files) {
Status io_s = env_->GetChildren(dbname_, &files_in_dbname);
if (!io_s.ok()) {
s = io_s;
files_in_dbname.clear();
}
for (const std::string& file : files_in_dbname) {
uint64_t number = 0;
FileType type = kLogFile; // initialize
if (ParseFileName(file, &number, &type) && type == kDescriptorFile) {
@ -396,7 +403,7 @@ Status DBImpl::Recover(
}
if (s.IsNotFound()) {
if (immutable_db_options_.create_if_missing) {
s = NewDB();
s = NewDB(&files_in_dbname);
is_new_db = true;
if (!s.ok()) {
return s;
@ -438,6 +445,16 @@ Status DBImpl::Recover(
}
}
}
} else if (immutable_db_options_.best_efforts_recovery) {
assert(files_in_dbname.empty());
Status s = env_->GetChildren(dbname_, &files_in_dbname);
if (s.IsNotFound()) {
return Status::InvalidArgument(dbname_,
"does not exist (open for read only)");
} else if (s.IsIOError()) {
return s;
}
assert(s.ok());
}
assert(db_id_.empty());
Status s;
@ -445,8 +462,9 @@ Status DBImpl::Recover(
if (!immutable_db_options_.best_efforts_recovery) {
s = versions_->Recover(column_families, read_only, &db_id_);
} else {
s = versions_->TryRecover(column_families, read_only, &db_id_,
&missing_table_file);
assert(!files_in_dbname.empty());
s = versions_->TryRecover(column_families, read_only, files_in_dbname,
&db_id_, &missing_table_file);
if (s.ok()) {
// TryRecover may delete previous column_family_set_.
column_family_memtables_.reset(
@ -506,6 +524,7 @@ Status DBImpl::Recover(
s = InitPersistStatsColumnFamily();
}
std::vector<std::string> files_in_wal_dir;
if (s.ok()) {
// Initial max_total_in_memory_state_ before recovery logs. Log recovery
// may check this value to decide whether to flush.
@ -532,9 +551,8 @@ Status DBImpl::Recover(
// Note that prev_log_number() is no longer used, but we pay
// attention to it in case we are recovering a database
// produced by an older version of rocksdb.
std::vector<std::string> filenames;
if (!immutable_db_options_.best_efforts_recovery) {
s = env_->GetChildren(immutable_db_options_.wal_dir, &filenames);
s = env_->GetChildren(immutable_db_options_.wal_dir, &files_in_wal_dir);
}
if (s.IsNotFound()) {
return Status::InvalidArgument("wal_dir not found",
@ -544,15 +562,15 @@ Status DBImpl::Recover(
}
std::vector<uint64_t> logs;
for (size_t i = 0; i < filenames.size(); i++) {
for (const auto& file : files_in_wal_dir) {
uint64_t number;
FileType type;
if (ParseFileName(filenames[i], &number, &type) && type == kLogFile) {
if (ParseFileName(file, &number, &type) && type == kLogFile) {
if (is_new_db) {
return Status::Corruption(
"While creating a new Db, wal_dir contains "
"existing log file: ",
filenames[i]);
file);
} else {
logs.push_back(number);
}
@ -604,15 +622,24 @@ Status DBImpl::Recover(
// to reflect the most recent OPTIONS file. It does not matter for regular
// read-write db instance because options_file_number_ will later be
// updated to versions_->NewFileNumber() in RenameTempFileToOptionsFile.
std::vector<std::string> file_names;
std::vector<std::string> filenames;
if (s.ok()) {
s = env_->GetChildren(GetName(), &file_names);
const std::string normalized_dbname = NormalizePath(dbname_);
const std::string normalized_wal_dir =
NormalizePath(immutable_db_options_.wal_dir);
if (immutable_db_options_.best_efforts_recovery) {
filenames = std::move(files_in_dbname);
} else if (normalized_dbname == normalized_wal_dir) {
filenames = std::move(files_in_wal_dir);
} else {
s = env_->GetChildren(GetName(), &filenames);
}
}
if (s.ok()) {
uint64_t number = 0;
uint64_t options_file_number = 0;
FileType type;
for (const auto& fname : file_names) {
for (const auto& fname : filenames) {
if (ParseFileName(fname, &number, &type) && type == kOptionsFile) {
options_file_number = std::max(number, options_file_number);
}
@ -620,7 +647,6 @@ Status DBImpl::Recover(
versions_->options_file_number_ = options_file_number;
}
}
return s;
}

@ -185,7 +185,7 @@ class Repairer {
DBImpl* db_impl = new DBImpl(db_options_, dbname_);
// Also use this temp DBImpl to get a session id
db_impl->GetDbSessionId(db_session_id_);
status = db_impl->NewDB();
status = db_impl->NewDB(/*new_filenames=*/nullptr);
delete db_impl;
}

@ -4730,34 +4730,25 @@ Status VersionSet::Recover(
namespace {
class ManifestPicker {
public:
explicit ManifestPicker(const std::string& dbname, FileSystem* fs);
void SeekToFirstManifest();
explicit ManifestPicker(const std::string& dbname,
const std::vector<std::string>& files_in_dbname);
// REQUIRES Valid() == true
std::string GetNextManifest(uint64_t* file_number, std::string* file_name);
bool Valid() const { return manifest_file_iter_ != manifest_files_.end(); }
const Status& status() const { return status_; }
private:
const std::string& dbname_;
FileSystem* const fs_;
// MANIFEST file names(s)
std::vector<std::string> manifest_files_;
std::vector<std::string>::const_iterator manifest_file_iter_;
Status status_;
};
ManifestPicker::ManifestPicker(const std::string& dbname, FileSystem* fs)
: dbname_(dbname), fs_(fs) {}
void ManifestPicker::SeekToFirstManifest() {
assert(fs_ != nullptr);
std::vector<std::string> children;
Status s = fs_->GetChildren(dbname_, IOOptions(), &children, /*dbg=*/nullptr);
if (!s.ok()) {
status_ = s;
return;
}
for (const auto& fname : children) {
ManifestPicker::ManifestPicker(const std::string& dbname,
const std::vector<std::string>& files_in_dbname)
: dbname_(dbname) {
// populate manifest files
assert(!files_in_dbname.empty());
for (const auto& fname : files_in_dbname) {
uint64_t file_num = 0;
FileType file_type;
bool parse_ok = ParseFileName(fname, &file_num, &file_type);
@ -4765,6 +4756,7 @@ void ManifestPicker::SeekToFirstManifest() {
manifest_files_.push_back(fname);
}
}
// seek to first manifest
std::sort(manifest_files_.begin(), manifest_files_.end(),
[](const std::string& lhs, const std::string& rhs) {
uint64_t num1 = 0;
@ -4787,7 +4779,6 @@ void ManifestPicker::SeekToFirstManifest() {
std::string ManifestPicker::GetNextManifest(uint64_t* number,
std::string* file_name) {
assert(status_.ok());
assert(Valid());
std::string ret;
if (manifest_file_iter_ != manifest_files_.end()) {
@ -4817,16 +4808,13 @@ std::string ManifestPicker::GetNextManifest(uint64_t* number,
Status VersionSet::TryRecover(
const std::vector<ColumnFamilyDescriptor>& column_families, bool read_only,
std::string* db_id, bool* has_missing_table_file) {
ManifestPicker manifest_picker(dbname_, fs_);
manifest_picker.SeekToFirstManifest();
Status s = manifest_picker.status();
if (!s.ok()) {
return s;
}
const std::vector<std::string>& files_in_dbname, std::string* db_id,
bool* has_missing_table_file) {
ManifestPicker manifest_picker(dbname_, files_in_dbname);
if (!manifest_picker.Valid()) {
return Status::Corruption("Cannot locate MANIFEST file in " + dbname_);
}
Status s;
std::string manifest_path =
manifest_picker.GetNextManifest(&manifest_file_number_, nullptr);
while (!manifest_path.empty()) {

@ -962,8 +962,9 @@ class VersionSet {
bool read_only = false, std::string* db_id = nullptr);
Status TryRecover(const std::vector<ColumnFamilyDescriptor>& column_families,
bool read_only, std::string* db_id,
bool* has_missing_table_file);
bool read_only,
const std::vector<std::string>& files_in_dbname,
std::string* db_id, bool* has_missing_table_file);
// Try to recover the version set to the most recent consistent state
// recorded in the specified manifest.

Loading…
Cancel
Save