From c5a64cffd221a1f2e5878c261215769f00b41847 Mon Sep 17 00:00:00 2001
From: Yanqin Jin <yanqin@fb.com>
Date: Tue, 12 Feb 2019 12:01:55 -0800
Subject: [PATCH] Avoid fsync on the same directory in atomic flush (#4817)

Summary:
In `DBImpl::AtomicFlushMemTablesToOutputFiles`, we need to call fsync only once
on the same data directory. If two column families share a common directory for
their data, we call fsync only once.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4817

Differential Revision: D13543689

Pulled By: riversand963

fbshipit-source-id: 4701d77c96a47802fbf6cb9f3337ee65d46b95f5
---
 db/db_impl_compaction_flush.cc | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/db/db_impl_compaction_flush.cc b/db/db_impl_compaction_flush.cc
index 2c8c42685..050b3f106 100644
--- a/db/db_impl_compaction_flush.cc
+++ b/db/db_impl_compaction_flush.cc
@@ -296,6 +296,7 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
                      &earliest_write_conflict_snapshot, &snapshot_checker);
 
   autovector<Directory*> distinct_output_dirs;
+  autovector<std::string> distinct_output_dir_paths;
   std::vector<FlushJob> jobs;
   std::vector<MutableCFOptions> all_mutable_cf_options;
   int num_cfs = static_cast<int>(cfds.size());
@@ -303,18 +304,20 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
   for (int i = 0; i < num_cfs; ++i) {
     auto cfd = cfds[i];
     Directory* data_dir = GetDataDir(cfd, 0U);
+    const std::string& curr_path = cfd->ioptions()->cf_paths[0].path;
 
     // Add to distinct output directories if eligible. Use linear search. Since
     // the number of elements in the vector is not large, performance should be
     // tolerable.
     bool found = false;
-    for (const auto dir : distinct_output_dirs) {
-      if (dir == data_dir) {
+    for (const auto& path : distinct_output_dir_paths) {
+      if (path == curr_path) {
         found = true;
         break;
       }
     }
     if (!found) {
+      distinct_output_dir_paths.emplace_back(curr_path);
       distinct_output_dirs.emplace_back(data_dir);
     }
 
@@ -322,7 +325,7 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
     const MutableCFOptions& mutable_cf_options = all_mutable_cf_options.back();
     const uint64_t* max_memtable_id = &(bg_flush_args[i].max_memtable_id_);
     jobs.emplace_back(
-        dbname_, cfds[i], immutable_db_options_, mutable_cf_options,
+        dbname_, cfd, immutable_db_options_, mutable_cf_options,
         max_memtable_id, env_options_for_compaction_, versions_.get(), &mutex_,
         &shutting_down_, snapshot_seqs, earliest_write_conflict_snapshot,
         snapshot_checker, job_context, log_buffer, directories_.GetDbDir(),