From 36c504be17d9e7c81567ba0732ef81632d3d2c74 Mon Sep 17 00:00:00 2001 From: sdong Date: Mon, 3 Feb 2020 14:11:50 -0800 Subject: [PATCH] Avoid create directory for every column families (#6358) Summary: A relatively recent regression causes for every CF, create and open directory is called for the DB directory, unless CF has a private directory. This doesn't scale well with large number of column families. Pull Request resolved: https://github.com/facebook/rocksdb/pull/6358 Test Plan: Run all existing tests and see it pass. strace with db_bench --num_column_families and observe it doesn't open directory for number of column families. Differential Revision: D19675141 fbshipit-source-id: da01d9216f1dae3f03d4064fbd88ce71245bd9be --- db/column_family.cc | 23 ++++++++++++++++------- db/column_family.h | 7 +++++-- db/db_impl/db_impl.cc | 3 ++- db/db_impl/db_impl.h | 1 - db/db_impl/db_impl_open.cc | 3 ++- 5 files changed, 25 insertions(+), 12 deletions(-) diff --git a/db/column_family.cc b/db/column_family.cc index c65ab90f7..135f0c9b0 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -1320,17 +1320,26 @@ Env::WriteLifeTimeHint ColumnFamilyData::CalculateSSTWriteHint(int level) { static_cast(Env::WLTH_MEDIUM)); } -Status ColumnFamilyData::AddDirectories() { +Status ColumnFamilyData::AddDirectories( + std::map>* created_dirs) { Status s; + assert(created_dirs != nullptr); assert(data_dirs_.empty()); for (auto& p : ioptions_.cf_paths) { - std::unique_ptr path_directory; - s = DBImpl::CreateAndNewDirectory(ioptions_.env, p.path, &path_directory); - if (!s.ok()) { - return s; + auto existing_dir = created_dirs->find(p.path); + + if (existing_dir == created_dirs->end()) { + std::unique_ptr path_directory; + s = DBImpl::CreateAndNewDirectory(ioptions_.env, p.path, &path_directory); + if (!s.ok()) { + return s; + } + assert(path_directory != nullptr); + data_dirs_.emplace_back(path_directory.release()); + (*created_dirs)[p.path] = data_dirs_.back(); + } else { + data_dirs_.emplace_back(existing_dir->second); } - assert(path_directory != nullptr); - data_dirs_.emplace_back(path_directory.release()); } assert(data_dirs_.size() == ioptions_.cf_paths.size()); return s; diff --git a/db/column_family.h b/db/column_family.h index f8d0edc13..351819839 100644 --- a/db/column_family.h +++ b/db/column_family.h @@ -497,7 +497,10 @@ class ColumnFamilyData { Env::WriteLifeTimeHint CalculateSSTWriteHint(int level); - Status AddDirectories(); + // created_dirs remembers directory created, so that we don't need to call + // the same data creation operation again. + Status AddDirectories( + std::map>* created_dirs); Directory* GetDataDir(size_t path_id) const; @@ -589,7 +592,7 @@ class ColumnFamilyData { std::atomic last_memtable_id_; // Directories corresponding to cf_paths. - std::vector> data_dirs_; + std::vector> data_dirs_; }; // ColumnFamilySet has interesting thread-safety requirements diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 9366d49d4..423af5dcf 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -2301,7 +2301,8 @@ Status DBImpl::CreateColumnFamilyImpl(const ColumnFamilyOptions& cf_options, auto* cfd = versions_->GetColumnFamilySet()->GetColumnFamily(column_family_name); assert(cfd != nullptr); - s = cfd->AddDirectories(); + std::map> dummy_created_dirs; + s = cfd->AddDirectories(&dummy_created_dirs); } if (s.ok()) { single_column_family_mode_ = false; diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 931daeee3..8f65a5ca4 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -826,7 +826,6 @@ class DBImpl : public DB { std::vector* handles, DB** dbptr, const bool seq_per_batch, const bool batch_per_txn); - static Status CreateAndNewDirectory(Env* env, const std::string& dirname, std::unique_ptr* directory); diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index 95d917f32..f574981f8 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -454,8 +454,9 @@ Status DBImpl::Recover( s = CheckConsistency(); } if (s.ok() && !read_only) { + std::map> created_dirs; for (auto cfd : *versions_->GetColumnFamilySet()) { - s = cfd->AddDirectories(); + s = cfd->AddDirectories(&created_dirs); if (!s.ok()) { return s; }