diff --git a/HISTORY.md b/HISTORY.md index 1c8509337..f7fac7b0d 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,6 +1,11 @@ # Rocksdb Change Log -## Unreleased +## Unreleased (will be released in 3.0) +* By default, max_background_flushes is 1 and flush process is + removed from background compaction process. Flush process is now always + executed in high priority thread pool. + +## Unreleased (will be relased in 2.8) * By default, checksums are verified on every read from database diff --git a/db/db_impl.cc b/db/db_impl.cc index d346d915b..91e327a8b 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -142,6 +142,9 @@ Options SanitizeOptions(const std::string& dbname, DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src) { DBOptions result = src; ClipToRange(&result.max_open_files, 20, 1000000); + if (result.max_background_flushes == 0) { + result.max_background_flushes = 1; + } if (result.info_log == nullptr) { Status s = CreateLoggerFromOptions(dbname, result.db_log_dir, src.env, @@ -1704,11 +1707,15 @@ void DBImpl::MaybeScheduleFlushOrCompaction() { is_flush_pending = true; } } - if (is_flush_pending && - (bg_flush_scheduled_ < options_.max_background_flushes)) { + if (is_flush_pending) { // memtable flush needed - bg_flush_scheduled_++; - env_->Schedule(&DBImpl::BGWorkFlush, this, Env::Priority::HIGH); + // max_background_compactions should not be 0, because that means + // flush will never get executed + assert(options_.max_background_flushes != 0); + if (bg_flush_scheduled_ < options_.max_background_flushes) { + bg_flush_scheduled_++; + env_->Schedule(&DBImpl::BGWorkFlush, this, Env::Priority::HIGH); + } } bool is_compaction_needed = false; for (auto cfd : *versions_->GetColumnFamilySet()) { @@ -1718,12 +1725,10 @@ void DBImpl::MaybeScheduleFlushOrCompaction() { } } - // Schedule BGWorkCompaction if there's a compaction pending (or a memtable - // flush, but the HIGH pool is not enabled). Do it only if - // max_background_compactions hasn't been reached and, in case + // Schedule BGWorkCompaction if there's a compaction pending + // Do it only if max_background_compactions hasn't been reached and, in case // bg_manual_only_ > 0, if it's a manual compaction. - if ((manual_compaction_ || is_compaction_needed || - (is_flush_pending && (options_.max_background_flushes <= 0))) && + if ((manual_compaction_ || is_compaction_needed) && bg_compaction_scheduled_ < options_.max_background_compactions && (!bg_manual_only_ || manual_compaction_)) { @@ -1868,41 +1873,14 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress, *madeProgress = false; mutex_.AssertHeld(); + unique_ptr c; bool is_manual = (manual_compaction_ != nullptr) && (manual_compaction_->in_progress == false); - if (is_manual) { - // another thread cannot pick up the same work - manual_compaction_->in_progress = true; - } - - // TODO: remove memtable flush from formal compaction - for (auto cfd : *versions_->GetColumnFamilySet()) { - while (cfd->imm()->IsFlushPending()) { - Log(options_.info_log, - "BackgroundCompaction doing FlushMemTableToOutputFile with column " - "family %d, compaction slots available %d", - cfd->GetID(), - options_.max_background_compactions - bg_compaction_scheduled_); - Status stat = - FlushMemTableToOutputFile(cfd, madeProgress, deletion_state); - if (!stat.ok()) { - if (is_manual) { - manual_compaction_->status = stat; - manual_compaction_->done = true; - manual_compaction_->in_progress = false; - manual_compaction_ = nullptr; - } - return stat; - } - } - } - - unique_ptr c; InternalKey manual_end_storage; InternalKey* manual_end = &manual_end_storage; if (is_manual) { ManualCompaction* m = manual_compaction_; - assert(m->in_progress); + m->in_progress = true; c.reset(m->cfd->CompactRange(m->input_level, m->output_level, m->begin, m->end, &manual_end)); if (!c) { @@ -2299,20 +2277,6 @@ Status DBImpl::DoCompactionWork(CompactionState* compact, } for (; input->Valid() && !shutting_down_.Acquire_Load(); ) { - // Prioritize immutable compaction work - // TODO: remove memtable flush from normal compaction work - if (cfd->imm()->imm_flush_needed.NoBarrier_Load() != nullptr) { - const uint64_t imm_start = env_->NowMicros(); - LogFlush(options_.info_log); - mutex_.Lock(); - if (cfd->imm()->IsFlushPending()) { - FlushMemTableToOutputFile(cfd, nullptr, deletion_state); - bg_cv_.SignalAll(); // Wakeup MakeRoomForWrite() if necessary - } - mutex_.Unlock(); - imm_micros += (env_->NowMicros() - imm_start); - } - Slice key = input->key(); Slice value = input->value(); diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index e7994d798..47ee930e8 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -526,13 +526,17 @@ struct DBOptions { // regardless of this setting uint64_t delete_obsolete_files_period_micros; - // Maximum number of concurrent background jobs, submitted to - // the default LOW priority thread pool + // Maximum number of concurrent background compaction jobs, submitted to + // the default LOW priority thread pool. + // If you're increasing this, also consider increasing number of threads in + // LOW priority thread pool. For more information, see + // Env::SetBackgroundThreads // Default: 1 int max_background_compactions; // Maximum number of concurrent background memtable flush jobs, submitted to // the HIGH priority thread pool. + // // By default, all background jobs (major compaction and memtable flush) go // to the LOW priority pool. If this option is set to a positive number, // memtable flush jobs will be submitted to the HIGH priority pool. @@ -540,7 +544,11 @@ struct DBOptions { // Without a separate pool, long running major compaction jobs could // potentially block memtable flush jobs of other db instances, leading to // unnecessary Put stalls. - // Default: 0 + // + // If you're increasing this, also consider increasing number of threads in + // HIGH priority thread pool. For more information, see + // Env::SetBackgroundThreads + // Default: 1 int max_background_flushes; // Specify the maximal size of the info log file. If the log file diff --git a/util/options.cc b/util/options.cc index 50d1e850e..212dc4653 100644 --- a/util/options.cc +++ b/util/options.cc @@ -150,7 +150,7 @@ DBOptions::DBOptions() wal_dir(""), delete_obsolete_files_period_micros(6 * 60 * 60 * 1000000UL), max_background_compactions(1), - max_background_flushes(0), + max_background_flushes(1), max_log_file_size(0), log_file_time_to_roll(0), keep_log_file_num(1000),