diff --git a/HISTORY.md b/HISTORY.md index 29a4d86ff..ede253dbb 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,6 +1,7 @@ # Rocksdb Change Log ## 4.7.0 (4/8/2016) ### Public API Change +* rename options compaction_measure_io_stats to report_bg_io_stats and include flush too. * Change some default options. Now default options will optimize for server-workloads. Also enable slowdown and full stop triggers for pending compaction bytes. These changes may cause sub-optimal performance or significant increase of resource usage. To avoid these risks, users can open existing RocksDB with options extracted from RocksDB option files. See https://github.com/facebook/rocksdb/wiki/RocksDB-Options-File for how to use RocksDB option files. Or you can call Options.OldDefaults() to recover old defaults. DEFAULT_OPTIONS_HISTORY.md will track change history of default options. ## 4.6.0 (3/10/2016) diff --git a/db/compaction_job_stats_test.cc b/db/compaction_job_stats_test.cc index 4ead93510..2cd317404 100644 --- a/db/compaction_job_stats_test.cc +++ b/db/compaction_job_stats_test.cc @@ -660,7 +660,7 @@ TEST_P(CompactionJobStatsTest, CompactionJobStatsTest) { options.max_subcompactions = max_subcompactions_; options.bytes_per_sync = 512 * 1024; - options.compaction_measure_io_stats = true; + options.report_bg_io_stats = true; for (int test = 0; test < 2; ++test) { DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); diff --git a/db/db_impl.cc b/db/db_impl.cc index 4970d7792..49a123a1d 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -1551,12 +1551,13 @@ Status DBImpl::FlushMemTableToOutputFile( std::vector snapshot_seqs = snapshots_.GetAll(&earliest_write_conflict_snapshot); - FlushJob flush_job( - dbname_, cfd, db_options_, mutable_cf_options, env_options_, - versions_.get(), &mutex_, &shutting_down_, snapshot_seqs, - earliest_write_conflict_snapshot, job_context, log_buffer, - directories_.GetDbDir(), directories_.GetDataDir(0U), - GetCompressionFlush(*cfd->ioptions()), stats_, &event_logger_); + FlushJob flush_job(dbname_, cfd, db_options_, mutable_cf_options, + env_options_, versions_.get(), &mutex_, &shutting_down_, + snapshot_seqs, earliest_write_conflict_snapshot, + job_context, log_buffer, directories_.GetDbDir(), + directories_.GetDataDir(0U), + GetCompressionFlush(*cfd->ioptions()), stats_, + &event_logger_, mutable_cf_options.report_bg_io_stats); FileMetaData file_meta; @@ -1905,7 +1906,7 @@ Status DBImpl::CompactFilesImpl( directories_.GetDataDir(c->output_path_id()), stats_, &mutex_, &bg_error_, snapshot_seqs, earliest_write_conflict_snapshot, table_cache_, &event_logger_, c->mutable_cf_options()->paranoid_file_checks, - c->mutable_cf_options()->compaction_measure_io_stats, dbname_, + c->mutable_cf_options()->report_bg_io_stats, dbname_, nullptr); // Here we pass a nullptr for CompactionJobStats because // CompactFiles does not trigger OnCompactionCompleted(), // which is the only place where CompactionJobStats is @@ -3111,7 +3112,7 @@ Status DBImpl::BackgroundCompaction(bool* made_progress, &bg_error_, snapshot_seqs, earliest_write_conflict_snapshot, table_cache_, &event_logger_, c->mutable_cf_options()->paranoid_file_checks, - c->mutable_cf_options()->compaction_measure_io_stats, dbname_, + c->mutable_cf_options()->report_bg_io_stats, dbname_, &compaction_job_stats); compaction_job.Prepare(); diff --git a/db/db_test_util.cc b/db/db_test_util.cc index 7ad1c2fac..1af09e286 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -303,6 +303,7 @@ Options DBTestBase::CurrentOptions( case kPerfOptions: options.soft_rate_limit = 2.0; options.delayed_write_rate = 8 * 1024 * 1024; + options.report_bg_io_stats = true; // TODO(3.13) -- test more options break; case kDeletesFilterFirst: diff --git a/db/flush_job.cc b/db/flush_job.cc index 958c4460c..2616180a1 100644 --- a/db/flush_job.cc +++ b/db/flush_job.cc @@ -66,7 +66,7 @@ FlushJob::FlushJob(const std::string& dbname, ColumnFamilyData* cfd, JobContext* job_context, LogBuffer* log_buffer, Directory* db_directory, Directory* output_file_directory, CompressionType output_compression, Statistics* stats, - EventLogger* event_logger) + EventLogger* event_logger, bool measure_io_stats) : dbname_(dbname), cfd_(cfd), db_options_(db_options), @@ -83,7 +83,8 @@ FlushJob::FlushJob(const std::string& dbname, ColumnFamilyData* cfd, output_file_directory_(output_file_directory), output_compression_(output_compression), stats_(stats), - event_logger_(event_logger) { + event_logger_(event_logger), + measure_io_stats_(measure_io_stats) { // Update the thread status to indicate flush. ReportStartedFlush(); TEST_SYNC_POINT("FlushJob::FlushJob()"); @@ -121,6 +122,21 @@ void FlushJob::RecordFlushIOStats() { Status FlushJob::Run(FileMetaData* file_meta) { AutoThreadOperationStageUpdater stage_run( ThreadStatus::STAGE_FLUSH_RUN); + // I/O measurement variables + PerfLevel prev_perf_level = PerfLevel::kEnableTime; + uint64_t prev_write_nanos = 0; + uint64_t prev_fsync_nanos = 0; + uint64_t prev_range_sync_nanos = 0; + uint64_t prev_prepare_write_nanos = 0; + if (measure_io_stats_) { + prev_perf_level = GetPerfLevel(); + SetPerfLevel(PerfLevel::kEnableTime); + prev_write_nanos = IOSTATS(write_nanos); + prev_fsync_nanos = IOSTATS(fsync_nanos); + prev_range_sync_nanos = IOSTATS(range_sync_nanos); + prev_prepare_write_nanos = IOSTATS(prepare_write_nanos); + } + // Save the contents of the earliest memtable as a new Table FileMetaData meta; autovector mems; @@ -180,6 +196,18 @@ Status FlushJob::Run(FileMetaData* file_meta) { } stream.EndArray(); + if (measure_io_stats_) { + if (prev_perf_level != PerfLevel::kEnableTime) { + SetPerfLevel(prev_perf_level); + } + stream << "file_write_nanos" << (IOSTATS(write_nanos) - prev_write_nanos); + stream << "file_range_sync_nanos" + << (IOSTATS(range_sync_nanos) - prev_range_sync_nanos); + stream << "file_fsync_nanos" << (IOSTATS(fsync_nanos) - prev_fsync_nanos); + stream << "file_prepare_write_nanos" + << (IOSTATS(prepare_write_nanos) - prev_prepare_write_nanos); + } + return s; } diff --git a/db/flush_job.h b/db/flush_job.h index 4d088b58e..8c7de4b4d 100644 --- a/db/flush_job.h +++ b/db/flush_job.h @@ -62,7 +62,7 @@ class FlushJob { JobContext* job_context, LogBuffer* log_buffer, Directory* db_directory, Directory* output_file_directory, CompressionType output_compression, Statistics* stats, - EventLogger* event_logger); + EventLogger* event_logger, bool measure_io_stats); ~FlushJob(); @@ -93,6 +93,7 @@ class FlushJob { Statistics* stats_; EventLogger* event_logger_; TableProperties table_properties_; + bool measure_io_stats_; }; } // namespace rocksdb diff --git a/db/flush_job_test.cc b/db/flush_job_test.cc index 3bba6337b..c285987d9 100644 --- a/db/flush_job_test.cc +++ b/db/flush_job_test.cc @@ -93,7 +93,7 @@ TEST_F(FlushJobTest, Empty) { db_options_, *cfd->GetLatestMutableCFOptions(), env_options_, versions_.get(), &mutex_, &shutting_down_, {}, kMaxSequenceNumber, &job_context, nullptr, nullptr, - nullptr, kNoCompression, nullptr, &event_logger); + nullptr, kNoCompression, nullptr, &event_logger, false); ASSERT_OK(flush_job.Run()); job_context.Clean(); } @@ -132,7 +132,7 @@ TEST_F(FlushJobTest, NonEmpty) { db_options_, *cfd->GetLatestMutableCFOptions(), env_options_, versions_.get(), &mutex_, &shutting_down_, {}, kMaxSequenceNumber, &job_context, nullptr, nullptr, - nullptr, kNoCompression, nullptr, &event_logger); + nullptr, kNoCompression, nullptr, &event_logger, true); FileMetaData fd; mutex_.Lock(); ASSERT_OK(flush_job.Run(&fd)); @@ -192,11 +192,11 @@ TEST_F(FlushJobTest, Snapshots) { } EventLogger event_logger(db_options_.info_log.get()); - FlushJob flush_job(dbname_, versions_->GetColumnFamilySet()->GetDefault(), - db_options_, *cfd->GetLatestMutableCFOptions(), - env_options_, versions_.get(), &mutex_, &shutting_down_, - snapshots, kMaxSequenceNumber, &job_context, nullptr, - nullptr, nullptr, kNoCompression, nullptr, &event_logger); + FlushJob flush_job( + dbname_, versions_->GetColumnFamilySet()->GetDefault(), db_options_, + *cfd->GetLatestMutableCFOptions(), env_options_, versions_.get(), &mutex_, + &shutting_down_, snapshots, kMaxSequenceNumber, &job_context, nullptr, + nullptr, nullptr, kNoCompression, nullptr, &event_logger, true); mutex_.Lock(); ASSERT_OK(flush_job.Run()); mutex_.Unlock(); diff --git a/include/rocksdb/compaction_job_stats.h b/include/rocksdb/compaction_job_stats.h index d06fbe403..cfd81f80e 100644 --- a/include/rocksdb/compaction_job_stats.h +++ b/include/rocksdb/compaction_job_stats.h @@ -61,7 +61,7 @@ struct CompactionJobStats { uint64_t num_corrupt_keys; // Following counters are only populated if - // options.compaction_measure_io_stats = true; + // options.report_bg_io_stats = true; // Time spent on file's Append() call. uint64_t file_write_nanos; diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index c6e13b420..3e8d8278b 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -790,9 +790,9 @@ struct ColumnFamilyOptions { // Default: false bool paranoid_file_checks; - // Measure IO stats in compactions, if true. + // Measure IO stats in compactions and flushes, if true. // Default: false - bool compaction_measure_io_stats; + bool report_bg_io_stats; // Create ColumnFamilyOptions with default values for all fields ColumnFamilyOptions(); diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index 9b2482bb2..888e96d71 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -516,7 +516,7 @@ DEFINE_uint64(transaction_lock_timeout, 100, " milliseconds before failing a transaction waiting on a lock"); #endif // ROCKSDB_LITE -DEFINE_bool(compaction_measure_io_stats, false, +DEFINE_bool(report_bg_io_stats, false, "Measure times spents on I/Os while in compactions. "); enum rocksdb::CompressionType StringToCompressionType(const char* ctype) { @@ -2599,7 +2599,7 @@ class Benchmark { exit(1); } options.max_successive_merges = FLAGS_max_successive_merges; - options.compaction_measure_io_stats = FLAGS_compaction_measure_io_stats; + options.report_bg_io_stats = FLAGS_report_bg_io_stats; // set universal style compaction configurations, if applicable if (FLAGS_universal_size_ratio != 0) { diff --git a/util/mutable_cf_options.h b/util/mutable_cf_options.h index dbae48e33..8d9dc7d64 100644 --- a/util/mutable_cf_options.h +++ b/util/mutable_cf_options.h @@ -47,7 +47,7 @@ struct MutableCFOptions { max_sequential_skip_in_iterations( options.max_sequential_skip_in_iterations), paranoid_file_checks(options.paranoid_file_checks), - compaction_measure_io_stats(options.compaction_measure_io_stats) + report_bg_io_stats(options.report_bg_io_stats) { RefreshDerivedOptions(ioptions); @@ -80,7 +80,7 @@ struct MutableCFOptions { max_subcompactions(1), max_sequential_skip_in_iterations(0), paranoid_file_checks(false), - compaction_measure_io_stats(false) {} + report_bg_io_stats(false) {} // Must be called after any change to MutableCFOptions void RefreshDerivedOptions(const ImmutableCFOptions& ioptions); @@ -135,7 +135,7 @@ struct MutableCFOptions { // Misc options uint64_t max_sequential_skip_in_iterations; bool paranoid_file_checks; - bool compaction_measure_io_stats; + bool report_bg_io_stats; // Derived options // Per-level target file size. diff --git a/util/options.cc b/util/options.cc index 356736afd..1c4b63a53 100644 --- a/util/options.cc +++ b/util/options.cc @@ -129,7 +129,7 @@ ColumnFamilyOptions::ColumnFamilyOptions() min_partial_merge_operands(2), optimize_filters_for_hits(false), paranoid_file_checks(false), - compaction_measure_io_stats(false) { + report_bg_io_stats(false) { assert(memtable_factory.get() != nullptr); } @@ -198,7 +198,7 @@ ColumnFamilyOptions::ColumnFamilyOptions(const Options& options) min_partial_merge_operands(options.min_partial_merge_operands), optimize_filters_for_hits(options.optimize_filters_for_hits), paranoid_file_checks(options.paranoid_file_checks), - compaction_measure_io_stats(options.compaction_measure_io_stats) { + report_bg_io_stats(options.report_bg_io_stats) { assert(memtable_factory.get() != nullptr); if (max_bytes_for_level_multiplier_additional.size() < static_cast(num_levels)) { @@ -604,8 +604,8 @@ void ColumnFamilyOptions::Dump(Logger* log) const { optimize_filters_for_hits); Header(log, " Options.paranoid_file_checks: %d", paranoid_file_checks); - Header(log, " Options.compaction_measure_io_stats: %d", - compaction_measure_io_stats); + Header(log, " Options.report_bg_io_stats: %d", + report_bg_io_stats); } // ColumnFamilyOptions::Dump void Options::Dump(Logger* log) const { diff --git a/util/options_helper.cc b/util/options_helper.cc index 679d0a1b4..674aed7b6 100644 --- a/util/options_helper.cc +++ b/util/options_helper.cc @@ -1452,8 +1452,7 @@ ColumnFamilyOptions BuildColumnFamilyOptions( cf_opts.max_sequential_skip_in_iterations = mutable_cf_options.max_sequential_skip_in_iterations; cf_opts.paranoid_file_checks = mutable_cf_options.paranoid_file_checks; - cf_opts.compaction_measure_io_stats = - mutable_cf_options.compaction_measure_io_stats; + cf_opts.report_bg_io_stats = mutable_cf_options.report_bg_io_stats; cf_opts.table_factory = options.table_factory; // TODO(yhchiang): find some way to handle the following derived options diff --git a/util/options_helper.h b/util/options_helper.h index f2e4878b1..953f2494d 100644 --- a/util/options_helper.h +++ b/util/options_helper.h @@ -329,9 +329,12 @@ static std::unordered_map cf_options_type_info = { std::string* merged_value); std::vector max_bytes_for_level_multiplier_additional; */ - {"compaction_measure_io_stats", - {offsetof(struct ColumnFamilyOptions, compaction_measure_io_stats), + {"report_bg_io_stats", + {offsetof(struct ColumnFamilyOptions, report_bg_io_stats), OptionType::kBoolean, OptionVerificationType::kNormal}}, + {"compaction_measure_io_stats", + {offsetof(struct ColumnFamilyOptions, report_bg_io_stats), + OptionType::kBoolean, OptionVerificationType::kDeprecated}}, {"disable_auto_compactions", {offsetof(struct ColumnFamilyOptions, disable_auto_compactions), OptionType::kBoolean, OptionVerificationType::kNormal}}, diff --git a/util/options_settable_test.cc b/util/options_settable_test.cc index 374b95115..40a094dd7 100644 --- a/util/options_settable_test.cc +++ b/util/options_settable_test.cc @@ -420,7 +420,7 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) { "filter_deletes=false;" "hard_pending_compaction_bytes_limit=0;" "disable_auto_compactions=false;" - "compaction_measure_io_stats=true;", + "report_bg_io_stats=true;", new_options)); ASSERT_EQ(unset_bytes_base, diff --git a/util/options_test.cc b/util/options_test.cc index 43671b6b8..405e2b36a 100644 --- a/util/options_test.cc +++ b/util/options_test.cc @@ -127,7 +127,8 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) { {"filter_deletes", "0"}, {"max_sequential_skip_in_iterations", "24"}, {"inplace_update_support", "true"}, - {"compaction_measure_io_stats", "true"}, + {"report_bg_io_stats", "true"}, + {"compaction_measure_io_stats", "false"}, {"inplace_update_num_locks", "25"}, {"memtable_prefix_bloom_bits", "26"}, {"memtable_prefix_bloom_probes", "27"}, diff --git a/util/testutil.cc b/util/testutil.cc index 8c587511f..fbaa75e64 100644 --- a/util/testutil.cc +++ b/util/testutil.cc @@ -294,7 +294,7 @@ void RandomInitCFOptions(ColumnFamilyOptions* cf_opt, Random* rnd) { cf_opt->compaction_style = (CompactionStyle)(rnd->Uniform(4)); // boolean options - cf_opt->compaction_measure_io_stats = rnd->Uniform(2); + cf_opt->report_bg_io_stats = rnd->Uniform(2); cf_opt->disable_auto_compactions = rnd->Uniform(2); cf_opt->filter_deletes = rnd->Uniform(2); cf_opt->inplace_update_support = rnd->Uniform(2);