Make DBOption compaction_readahead_size dynamic

Summary: Closes https://github.com/facebook/rocksdb/pull/3004

Differential Revision: D6056141

Pulled By: miasantreble

fbshipit-source-id: 56df1630f464fd56b07d25d38161f699e0528b7f
main
Zhongyi Xie 7 years ago committed by Facebook Github Bot
parent e27f60b1c8
commit 32e31d49d1
  1. 2
      HISTORY.md
  2. 7
      db/compaction_job.cc
  3. 2
      db/compaction_job.h
  4. 12
      db/db_impl.cc
  5. 8
      db/db_impl_compaction_flush.cc
  6. 26
      db/db_options_test.cc
  7. 5
      db/db_test_util.h
  8. 4
      db/table_cache.cc
  9. 13
      db/version_set.cc
  10. 9
      db/version_set.h
  11. 1
      options/cf_options.cc
  12. 2
      options/cf_options.h
  13. 13
      options/db_options.cc
  14. 2
      options/db_options.h
  15. 2
      options/options_helper.cc
  16. 3
      options/options_helper.h

@ -15,7 +15,7 @@
### New Features ### New Features
* `DBOptions::writable_file_max_buffer_size` can now be changed dynamically. * `DBOptions::writable_file_max_buffer_size` can now be changed dynamically.
* `DBOptions::bytes_per_sync` and `DBOptions::wal_bytes_per_sync` can now be changed dynamically, `DBOptions::wal_bytes_per_sync` will flush all memtables and switch to a new WAL file. * `DBOptions::bytes_per_sync`, `DBOptions::compaction_readahead_size`, and `DBOptions::wal_bytes_per_sync` can now be changed dynamically, `DBOptions::wal_bytes_per_sync` will flush all memtables and switch to a new WAL file.
* Support dynamic adjustment of rate limit according to demand for background I/O. It can be enabled by passing `true` to the `auto_tuned` parameter in `NewGenericRateLimiter()`. The value passed as `rate_bytes_per_sec` will still be respected as an upper-bound. * Support dynamic adjustment of rate limit according to demand for background I/O. It can be enabled by passing `true` to the `auto_tuned` parameter in `NewGenericRateLimiter()`. The value passed as `rate_bytes_per_sec` will still be respected as an upper-bound.
* Support dynamically changing `ColumnFamilyOptions::compaction_options_fifo`. * Support dynamically changing `ColumnFamilyOptions::compaction_options_fifo`.
* Introduce `EventListener::OnStallConditionsChanged()` callback. Users can implement it to be notified when user writes are stalled, stopped, or resumed. * Introduce `EventListener::OnStallConditionsChanged()` callback. Users can implement it to be notified when user writes are stalled, stopped, or resumed.

@ -265,8 +265,7 @@ CompactionJob::CompactionJob(
int job_id, Compaction* compaction, const ImmutableDBOptions& db_options, int job_id, Compaction* compaction, const ImmutableDBOptions& db_options,
const EnvOptions env_options, VersionSet* versions, const EnvOptions env_options, VersionSet* versions,
const std::atomic<bool>* shutting_down, const std::atomic<bool>* shutting_down,
const SequenceNumber preserve_deletes_seqnum, const SequenceNumber preserve_deletes_seqnum, LogBuffer* log_buffer,
LogBuffer* log_buffer,
Directory* db_directory, Directory* output_directory, Statistics* stats, Directory* db_directory, Directory* output_directory, Statistics* stats,
InstrumentedMutex* db_mutex, Status* db_bg_error, InstrumentedMutex* db_mutex, Status* db_bg_error,
std::vector<SequenceNumber> existing_snapshots, std::vector<SequenceNumber> existing_snapshots,
@ -282,6 +281,8 @@ CompactionJob::CompactionJob(
db_options_(db_options), db_options_(db_options),
env_options_(env_options), env_options_(env_options),
env_(db_options.env), env_(db_options.env),
env_optiosn_for_read_(
env_->OptimizeForCompactionTableRead(env_options, db_options_)),
versions_(versions), versions_(versions),
shutting_down_(shutting_down), shutting_down_(shutting_down),
preserve_deletes_seqnum_(preserve_deletes_seqnum), preserve_deletes_seqnum_(preserve_deletes_seqnum),
@ -680,7 +681,7 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
std::unique_ptr<RangeDelAggregator> range_del_agg( std::unique_ptr<RangeDelAggregator> range_del_agg(
new RangeDelAggregator(cfd->internal_comparator(), existing_snapshots_)); new RangeDelAggregator(cfd->internal_comparator(), existing_snapshots_));
std::unique_ptr<InternalIterator> input(versions_->MakeInputIterator( std::unique_ptr<InternalIterator> input(versions_->MakeInputIterator(
sub_compact->compaction, range_del_agg.get())); sub_compact->compaction, range_del_agg.get(), env_optiosn_for_read_));
AutoThreadOperationStageUpdater stage_updater( AutoThreadOperationStageUpdater stage_updater(
ThreadStatus::STAGE_COMPACTION_PROCESS_KV); ThreadStatus::STAGE_COMPACTION_PROCESS_KV);

@ -134,6 +134,8 @@ class CompactionJob {
const EnvOptions env_options_; const EnvOptions env_options_;
Env* env_; Env* env_;
// env_option optimized for compaction table reads
EnvOptions env_optiosn_for_read_;
VersionSet* versions_; VersionSet* versions_;
const std::atomic<bool>* shutting_down_; const std::atomic<bool>* shutting_down_;
const SequenceNumber preserve_deletes_seqnum_; const SequenceNumber preserve_deletes_seqnum_;

@ -587,13 +587,15 @@ Status DBImpl::SetDBOptions(
new_options.bytes_per_sync = 1024 * 1024; new_options.bytes_per_sync = 1024 * 1024;
} }
mutable_db_options_ = new_options; mutable_db_options_ = new_options;
env_options_for_compaction_ = EnvOptions(BuildDBOptions( env_options_for_compaction_ = EnvOptions(
immutable_db_options_, BuildDBOptions(immutable_db_options_, mutable_db_options_));
mutable_db_options_));
env_options_for_compaction_ = env_->OptimizeForCompactionTableWrite( env_options_for_compaction_ = env_->OptimizeForCompactionTableWrite(
env_options_for_compaction_, env_options_for_compaction_, immutable_db_options_);
immutable_db_options_);
versions_->ChangeEnvOptions(mutable_db_options_); versions_->ChangeEnvOptions(mutable_db_options_);
env_options_for_compaction_ = env_->OptimizeForCompactionTableRead(
env_options_for_compaction_, immutable_db_options_);
env_options_for_compaction_.compaction_readahead_size =
mutable_db_options_.compaction_readahead_size;
write_thread_.EnterUnbatched(&w, &mutex_); write_thread_.EnterUnbatched(&w, &mutex_);
if (total_log_size_ > GetMaxTotalWalSize() || wal_changed) { if (total_log_size_ > GetMaxTotalWalSize() || wal_changed) {
Status purge_wal_status = SwitchWAL(&write_context); Status purge_wal_status = SwitchWAL(&write_context);

@ -544,10 +544,10 @@ Status DBImpl::CompactFilesImpl(
job_context->job_id, c.get(), immutable_db_options_, job_context->job_id, c.get(), immutable_db_options_,
env_options_for_compaction_, versions_.get(), &shutting_down_, env_options_for_compaction_, versions_.get(), &shutting_down_,
preserve_deletes_seqnum_.load(), log_buffer, directories_.GetDbDir(), preserve_deletes_seqnum_.load(), log_buffer, directories_.GetDbDir(),
directories_.GetDataDir(c->output_path_id()), directories_.GetDataDir(c->output_path_id()), stats_, &mutex_, &bg_error_,
stats_, &mutex_, &bg_error_, snapshot_seqs, snapshot_seqs, earliest_write_conflict_snapshot, snapshot_checker,
earliest_write_conflict_snapshot, snapshot_checker, table_cache_, table_cache_, &event_logger_,
&event_logger_, c->mutable_cf_options()->paranoid_file_checks, c->mutable_cf_options()->paranoid_file_checks,
c->mutable_cf_options()->report_bg_io_stats, dbname_, c->mutable_cf_options()->report_bg_io_stats, dbname_,
nullptr); // Here we pass a nullptr for CompactionJobStats because nullptr); // Here we pass a nullptr for CompactionJobStats because
// CompactFiles does not trigger OnCompactionCompleted(), // CompactFiles does not trigger OnCompactionCompleted(),

@ -692,6 +692,32 @@ TEST_F(DBOptionsTest, SetFIFOCompactionOptions) {
ASSERT_LE(NumTableFilesAtLevel(0), 5); ASSERT_LE(NumTableFilesAtLevel(0), 5);
} }
TEST_F(DBOptionsTest, CompactionReadaheadSizeChange) {
SpecialEnv env(env_);
Options options;
options.env = &env;
options.compaction_readahead_size = 0;
options.new_table_reader_for_compaction_inputs = true;
options.level0_file_num_compaction_trigger = 2;
const std::string kValue(1024, 'v');
Reopen(options);
ASSERT_EQ(0, dbfull()->GetDBOptions().compaction_readahead_size);
ASSERT_OK(dbfull()->SetDBOptions({{"compaction_readahead_size", "256"}}));
ASSERT_EQ(256, dbfull()->GetDBOptions().compaction_readahead_size);
for (int i = 0; i < 1024; i++) {
Put(Key(i), kValue);
}
Flush();
for (int i = 0; i < 1024 * 2; i++) {
Put(Key(i), kValue);
}
Flush();
dbfull()->TEST_WaitForCompact();
ASSERT_EQ(256, env_->compaction_readahead_size_);
Close();
}
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE
} // namespace rocksdb } // namespace rocksdb

@ -445,6 +445,9 @@ class SpecialEnv : public EnvWrapper {
r->reset(new CountingFile(std::move(*r), &random_read_counter_, r->reset(new CountingFile(std::move(*r), &random_read_counter_,
&random_read_bytes_counter_)); &random_read_bytes_counter_));
} }
if (s.ok() && soptions.compaction_readahead_size > 0) {
compaction_readahead_size_ = soptions.compaction_readahead_size;
}
return s; return s;
} }
@ -570,6 +573,8 @@ class SpecialEnv : public EnvWrapper {
bool no_slowdown_; bool no_slowdown_;
std::atomic<bool> is_wal_sync_thread_safe_{true}; std::atomic<bool> is_wal_sync_thread_safe_{true};
size_t compaction_readahead_size_;
}; };
class MockTimeEnv : public EnvWrapper { class MockTimeEnv : public EnvWrapper {

@ -191,7 +191,9 @@ InternalIterator* TableCache::NewIterator(
&use_direct_reads_for_compaction); &use_direct_reads_for_compaction);
#endif // !NDEBUG #endif // !NDEBUG
if (ioptions_.new_table_reader_for_compaction_inputs) { if (ioptions_.new_table_reader_for_compaction_inputs) {
readahead = ioptions_.compaction_readahead_size; // get compaction_readahead_size from env_options allows us to set the
// value dynamically
readahead = env_options.compaction_readahead_size;
create_new_table_reader = true; create_new_table_reader = true;
} }
} else { } else {

@ -2418,9 +2418,7 @@ VersionSet::VersionSet(const std::string& dbname,
prev_log_number_(0), prev_log_number_(0),
current_version_number_(0), current_version_number_(0),
manifest_file_size_(0), manifest_file_size_(0),
env_options_(storage_options), env_options_(storage_options) {}
env_options_compactions_(
env_->OptimizeForCompactionTableRead(env_options_, *db_options_)) {}
void CloseTables(void* ptr, size_t) { void CloseTables(void* ptr, size_t) {
TableReader* table_reader = reinterpret_cast<TableReader*>(ptr); TableReader* table_reader = reinterpret_cast<TableReader*>(ptr);
@ -3688,7 +3686,8 @@ void VersionSet::AddLiveFiles(std::vector<FileDescriptor>* live_list) {
} }
InternalIterator* VersionSet::MakeInputIterator( InternalIterator* VersionSet::MakeInputIterator(
const Compaction* c, RangeDelAggregator* range_del_agg) { const Compaction* c, RangeDelAggregator* range_del_agg,
const EnvOptions& env_options_compactions) {
auto cfd = c->column_family_data(); auto cfd = c->column_family_data();
ReadOptions read_options; ReadOptions read_options;
read_options.verify_checksums = true; read_options.verify_checksums = true;
@ -3713,8 +3712,8 @@ InternalIterator* VersionSet::MakeInputIterator(
const LevelFilesBrief* flevel = c->input_levels(which); const LevelFilesBrief* flevel = c->input_levels(which);
for (size_t i = 0; i < flevel->num_files; i++) { for (size_t i = 0; i < flevel->num_files; i++) {
list[num++] = cfd->table_cache()->NewIterator( list[num++] = cfd->table_cache()->NewIterator(
read_options, env_options_compactions_, read_options, env_options_compactions, cfd->internal_comparator(),
cfd->internal_comparator(), flevel->files[i].fd, range_del_agg, flevel->files[i].fd, range_del_agg,
nullptr /* table_reader_ptr */, nullptr /* table_reader_ptr */,
nullptr /* no per level latency histogram */, nullptr /* no per level latency histogram */,
true /* for_compaction */, nullptr /* arena */, true /* for_compaction */, nullptr /* arena */,
@ -3724,7 +3723,7 @@ InternalIterator* VersionSet::MakeInputIterator(
// Create concatenating iterator for the files from this level // Create concatenating iterator for the files from this level
list[num++] = NewTwoLevelIterator( list[num++] = NewTwoLevelIterator(
new LevelFileIteratorState( new LevelFileIteratorState(
cfd->table_cache(), read_options, env_options_compactions_, cfd->table_cache(), read_options, env_options_compactions,
cfd->internal_comparator(), cfd->internal_comparator(),
nullptr /* no per level latency histogram */, nullptr /* no per level latency histogram */,
true /* for_compaction */, false /* prefix enabled */, true /* for_compaction */, false /* prefix enabled */,

@ -812,8 +812,9 @@ class VersionSet {
// Create an iterator that reads over the compaction inputs for "*c". // Create an iterator that reads over the compaction inputs for "*c".
// The caller should delete the iterator when no longer needed. // The caller should delete the iterator when no longer needed.
InternalIterator* MakeInputIterator(const Compaction* c, InternalIterator* MakeInputIterator(
RangeDelAggregator* range_del_agg); const Compaction* c, RangeDelAggregator* range_del_agg,
const EnvOptions& env_options_compactions);
// Add all files listed in any live version to *live. // Add all files listed in any live version to *live.
void AddLiveFiles(std::vector<FileDescriptor>* live_list); void AddLiveFiles(std::vector<FileDescriptor>* live_list);
@ -916,10 +917,6 @@ class VersionSet {
// env options for all reads and writes except compactions // env options for all reads and writes except compactions
EnvOptions env_options_; EnvOptions env_options_;
// env options used for compactions. This is a copy of
// env_options_ but with readaheads set to readahead_compactions_.
const EnvOptions env_options_compactions_;
// No copying allowed // No copying allowed
VersionSet(const VersionSet&); VersionSet(const VersionSet&);
void operator=(const VersionSet&); void operator=(const VersionSet&);

@ -65,7 +65,6 @@ ImmutableCFOptions::ImmutableCFOptions(const ImmutableDBOptions& db_options,
db_options.access_hint_on_compaction_start), db_options.access_hint_on_compaction_start),
new_table_reader_for_compaction_inputs( new_table_reader_for_compaction_inputs(
db_options.new_table_reader_for_compaction_inputs), db_options.new_table_reader_for_compaction_inputs),
compaction_readahead_size(db_options.compaction_readahead_size),
num_levels(cf_options.num_levels), num_levels(cf_options.num_levels),
optimize_filters_for_hits(cf_options.optimize_filters_for_hits), optimize_filters_for_hits(cf_options.optimize_filters_for_hits),
force_consistency_checks(cf_options.force_consistency_checks), force_consistency_checks(cf_options.force_consistency_checks),

@ -101,8 +101,6 @@ struct ImmutableCFOptions {
bool new_table_reader_for_compaction_inputs; bool new_table_reader_for_compaction_inputs;
size_t compaction_readahead_size;
int num_levels; int num_levels;
bool optimize_filters_for_hits; bool optimize_filters_for_hits;

@ -62,7 +62,6 @@ ImmutableDBOptions::ImmutableDBOptions(const DBOptions& options)
access_hint_on_compaction_start(options.access_hint_on_compaction_start), access_hint_on_compaction_start(options.access_hint_on_compaction_start),
new_table_reader_for_compaction_inputs( new_table_reader_for_compaction_inputs(
options.new_table_reader_for_compaction_inputs), options.new_table_reader_for_compaction_inputs),
compaction_readahead_size(options.compaction_readahead_size),
random_access_max_buffer_size(options.random_access_max_buffer_size), random_access_max_buffer_size(options.random_access_max_buffer_size),
use_adaptive_mutex(options.use_adaptive_mutex), use_adaptive_mutex(options.use_adaptive_mutex),
listeners(options.listeners), listeners(options.listeners),
@ -168,9 +167,6 @@ void ImmutableDBOptions::Dump(Logger* log) const {
static_cast<int>(access_hint_on_compaction_start)); static_cast<int>(access_hint_on_compaction_start));
ROCKS_LOG_HEADER(log, " Options.new_table_reader_for_compaction_inputs: %d", ROCKS_LOG_HEADER(log, " Options.new_table_reader_for_compaction_inputs: %d",
new_table_reader_for_compaction_inputs); new_table_reader_for_compaction_inputs);
ROCKS_LOG_HEADER(
log, " Options.compaction_readahead_size: %" ROCKSDB_PRIszt,
compaction_readahead_size);
ROCKS_LOG_HEADER( ROCKS_LOG_HEADER(
log, " Options.random_access_max_buffer_size: %" ROCKSDB_PRIszt, log, " Options.random_access_max_buffer_size: %" ROCKSDB_PRIszt,
random_access_max_buffer_size); random_access_max_buffer_size);
@ -234,7 +230,8 @@ MutableDBOptions::MutableDBOptions()
stats_dump_period_sec(600), stats_dump_period_sec(600),
max_open_files(-1), max_open_files(-1),
bytes_per_sync(0), bytes_per_sync(0),
wal_bytes_per_sync(0) {} wal_bytes_per_sync(0),
compaction_readahead_size(0) {}
MutableDBOptions::MutableDBOptions(const DBOptions& options) MutableDBOptions::MutableDBOptions(const DBOptions& options)
: max_background_jobs(options.max_background_jobs), : max_background_jobs(options.max_background_jobs),
@ -249,7 +246,8 @@ MutableDBOptions::MutableDBOptions(const DBOptions& options)
stats_dump_period_sec(options.stats_dump_period_sec), stats_dump_period_sec(options.stats_dump_period_sec),
max_open_files(options.max_open_files), max_open_files(options.max_open_files),
bytes_per_sync(options.bytes_per_sync), bytes_per_sync(options.bytes_per_sync),
wal_bytes_per_sync(options.wal_bytes_per_sync) {} wal_bytes_per_sync(options.wal_bytes_per_sync),
compaction_readahead_size(options.compaction_readahead_size) {}
void MutableDBOptions::Dump(Logger* log) const { void MutableDBOptions::Dump(Logger* log) const {
ROCKS_LOG_HEADER(log, " Options.max_background_jobs: %d", ROCKS_LOG_HEADER(log, " Options.max_background_jobs: %d",
@ -278,6 +276,9 @@ void MutableDBOptions::Dump(Logger* log) const {
ROCKS_LOG_HEADER(log, ROCKS_LOG_HEADER(log,
" Options.wal_bytes_per_sync: %" PRIu64, " Options.wal_bytes_per_sync: %" PRIu64,
wal_bytes_per_sync); wal_bytes_per_sync);
ROCKS_LOG_HEADER(log,
" Options.compaction_readahead_size: %" ROCKSDB_PRIszt,
compaction_readahead_size);
} }
} // namespace rocksdb } // namespace rocksdb

@ -55,7 +55,6 @@ struct ImmutableDBOptions {
std::shared_ptr<WriteBufferManager> write_buffer_manager; std::shared_ptr<WriteBufferManager> write_buffer_manager;
DBOptions::AccessHint access_hint_on_compaction_start; DBOptions::AccessHint access_hint_on_compaction_start;
bool new_table_reader_for_compaction_inputs; bool new_table_reader_for_compaction_inputs;
size_t compaction_readahead_size;
size_t random_access_max_buffer_size; size_t random_access_max_buffer_size;
bool use_adaptive_mutex; bool use_adaptive_mutex;
std::vector<std::shared_ptr<EventListener>> listeners; std::vector<std::shared_ptr<EventListener>> listeners;
@ -100,6 +99,7 @@ struct MutableDBOptions {
int max_open_files; int max_open_files;
uint64_t bytes_per_sync; uint64_t bytes_per_sync;
uint64_t wal_bytes_per_sync; uint64_t wal_bytes_per_sync;
size_t compaction_readahead_size;
}; };
} // namespace rocksdb } // namespace rocksdb

@ -87,7 +87,7 @@ DBOptions BuildDBOptions(const ImmutableDBOptions& immutable_db_options,
options.new_table_reader_for_compaction_inputs = options.new_table_reader_for_compaction_inputs =
immutable_db_options.new_table_reader_for_compaction_inputs; immutable_db_options.new_table_reader_for_compaction_inputs;
options.compaction_readahead_size = options.compaction_readahead_size =
immutable_db_options.compaction_readahead_size; mutable_db_options.compaction_readahead_size;
options.random_access_max_buffer_size = options.random_access_max_buffer_size =
immutable_db_options.random_access_max_buffer_size; immutable_db_options.random_access_max_buffer_size;
options.writable_file_max_buffer_size = options.writable_file_max_buffer_size =

@ -216,7 +216,8 @@ static std::unordered_map<std::string, OptionTypeInfo> db_options_type_info = {
OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}}, OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}},
{"compaction_readahead_size", {"compaction_readahead_size",
{offsetof(struct DBOptions, compaction_readahead_size), OptionType::kSizeT, {offsetof(struct DBOptions, compaction_readahead_size), OptionType::kSizeT,
OptionVerificationType::kNormal, false, 0}}, OptionVerificationType::kNormal, true,
offsetof(struct MutableDBOptions, compaction_readahead_size)}},
{"random_access_max_buffer_size", {"random_access_max_buffer_size",
{offsetof(struct DBOptions, random_access_max_buffer_size), {offsetof(struct DBOptions, random_access_max_buffer_size),
OptionType::kSizeT, OptionVerificationType::kNormal, false, 0}}, OptionType::kSizeT, OptionVerificationType::kNormal, false, 0}},

Loading…
Cancel
Save