Add DBOptions. avoid_unnecessary_blocking_io to defer file deletions (#5043)

Summary:
Just like ReadOptions::background_purge_on_iterator_cleanup but for ColumnFamilyHandle instead of Iterator.

In our use case we sometimes call ColumnFamilyHandle's destructor from low-latency threads, and sometimes it blocks the thread for a few seconds deleting the files. To avoid that, we can either offload ColumnFamilyHandle's destruction to a background thread on our side, or add this option on rocksdb side. This PR does the latter, to be consistent with how we solve exactly the same problem for iterators using background_purge_on_iterator_cleanup option.

(EDIT: It's avoid_unnecessary_blocking_io now, and affects both CF drops and iterator destructors.)
I'm not quite comfortable with having two separate options (background_purge_on_iterator_cleanup and background_purge_on_cf_cleanup) for such a rarely used thing. Maybe we should merge them? Rename background_purge_on_cf_cleanup to something like delete_files_on_background_threads_only or avoid_blocking_io_in_unexpected_places, and make iterators use it instead of the one in ReadOptions? I can do that here if you guys think it's better.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5043

Differential Revision: D14339233

Pulled By: al13n321

fbshipit-source-id: ccf7efa11c85c9a5b91d969bb55627d0fb01e7b8
main
Mike Kolupaev 6 years ago committed by Facebook Github Bot
parent 127a850beb
commit 120bc4715b
  1. 2
      HISTORY.md
  2. 9
      db/column_family.cc
  3. 3
      db/db_impl.cc
  4. 49
      db/deletefile_test.cc
  5. 8
      db/forward_iterator.cc
  6. 8
      include/rocksdb/options.h
  7. 7
      options/db_options.cc
  8. 1
      options/db_options.h
  9. 9
      options/options_helper.cc
  10. 3
      options/options_settable_test.cc

@ -9,6 +9,8 @@
* Introduce two more stats levels, kExceptHistogramOrTimers and kExceptTimers. * Introduce two more stats levels, kExceptHistogramOrTimers and kExceptTimers.
* Added a feature to perform data-block sampling for compressibility, and report stats to user. * Added a feature to perform data-block sampling for compressibility, and report stats to user.
* Add support for trace filtering. * Add support for trace filtering.
* Add DBOptions.avoid_unnecessary_blocking_io. If true, we avoid file deletion when destorying ColumnFamilyHandle and Iterator. Instead, a job is scheduled to delete the files in background.
### Public API Change ### Public API Change
* Remove bundled fbson library. * Remove bundled fbson library.
* statistics.stats_level_ becomes atomic. It is preferred to use statistics.set_stats_level() and statistics.get_stats_level() to access it. * statistics.stats_level_ becomes atomic. It is preferred to use statistics.set_stats_level() and statistics.get_stats_level() to access it.

@ -68,7 +68,14 @@ ColumnFamilyHandleImpl::~ColumnFamilyHandleImpl() {
db_->FindObsoleteFiles(&job_context, false, true); db_->FindObsoleteFiles(&job_context, false, true);
mutex_->Unlock(); mutex_->Unlock();
if (job_context.HaveSomethingToDelete()) { if (job_context.HaveSomethingToDelete()) {
db_->PurgeObsoleteFiles(job_context); bool defer_purge =
db_->immutable_db_options().avoid_unnecessary_blocking_io;
db_->PurgeObsoleteFiles(job_context, defer_purge);
if (defer_purge) {
mutex_->Lock();
db_->SchedulePurge();
mutex_->Unlock();
}
} }
job_context.Clean(); job_context.Clean();
} }

@ -1313,7 +1313,8 @@ InternalIterator* DBImpl::NewInternalIterator(const ReadOptions& read_options,
internal_iter = merge_iter_builder.Finish(); internal_iter = merge_iter_builder.Finish();
IterState* cleanup = IterState* cleanup =
new IterState(this, &mutex_, super_version, new IterState(this, &mutex_, super_version,
read_options.background_purge_on_iterator_cleanup); read_options.background_purge_on_iterator_cleanup ||
immutable_db_options_.avoid_unnecessary_blocking_io);
internal_iter->RegisterCleanup(CleanupIteratorState, cleanup, nullptr); internal_iter->RegisterCleanup(CleanupIteratorState, cleanup, nullptr);
return internal_iter; return internal_iter;

@ -241,7 +241,7 @@ TEST_F(DeleteFileTest, PurgeObsoleteFilesTest) {
CloseDB(); CloseDB();
} }
TEST_F(DeleteFileTest, BackgroundPurgeTest) { TEST_F(DeleteFileTest, BackgroundPurgeIteratorTest) {
std::string first("0"), last("999999"); std::string first("0"), last("999999");
CompactRangeOptions compact_options; CompactRangeOptions compact_options;
compact_options.change_level = true; compact_options.change_level = true;
@ -279,6 +279,53 @@ TEST_F(DeleteFileTest, BackgroundPurgeTest) {
CloseDB(); CloseDB();
} }
TEST_F(DeleteFileTest, BackgroundPurgeCFDropTest) {
auto do_test = [&](bool bg_purge) {
ColumnFamilyOptions co;
WriteOptions wo;
FlushOptions fo;
ColumnFamilyHandle* cfh = nullptr;
ASSERT_OK(db_->CreateColumnFamily(co, "dropme", &cfh));
ASSERT_OK(db_->Put(wo, cfh, "pika", "chu"));
ASSERT_OK(db_->Flush(fo, cfh));
// Expect 1 sst file.
CheckFileTypeCounts(dbname_, 0, 1, 1);
ASSERT_OK(db_->DropColumnFamily(cfh));
// Still 1 file, it won't be deleted while ColumnFamilyHandle is alive.
CheckFileTypeCounts(dbname_, 0, 1, 1);
delete cfh;
test::SleepingBackgroundTask sleeping_task_after;
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
&sleeping_task_after, Env::Priority::HIGH);
// If background purge is enabled, the file should still be there.
CheckFileTypeCounts(dbname_, 0, bg_purge ? 1 : 0, 1);
// Execute background purges.
sleeping_task_after.WakeUp();
sleeping_task_after.WaitUntilDone();
// The file should have been deleted.
CheckFileTypeCounts(dbname_, 0, 0, 1);
};
{
SCOPED_TRACE("avoid_unnecessary_blocking_io = false");
do_test(false);
}
options_.avoid_unnecessary_blocking_io = true;
ASSERT_OK(ReopenDB(false));
{
SCOPED_TRACE("avoid_unnecessary_blocking_io = true");
do_test(true);
}
CloseDB();
}
// This test is to reproduce a bug that read invalid ReadOption in iterator // This test is to reproduce a bug that read invalid ReadOption in iterator
// cleanup function // cleanup function
TEST_F(DeleteFileTest, BackgroundPurgeCopyOptions) { TEST_F(DeleteFileTest, BackgroundPurgeCopyOptions) {

@ -265,16 +265,18 @@ void ForwardIterator::SVCleanup() {
if (sv_ == nullptr) { if (sv_ == nullptr) {
return; return;
} }
bool background_purge =
read_options_.background_purge_on_iterator_cleanup ||
db_->immutable_db_options().avoid_unnecessary_blocking_io;
if (pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled()) { if (pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled()) {
// pinned_iters_mgr_ tells us to make sure that all visited key-value slices // pinned_iters_mgr_ tells us to make sure that all visited key-value slices
// are alive until pinned_iters_mgr_->ReleasePinnedData() is called. // are alive until pinned_iters_mgr_->ReleasePinnedData() is called.
// The slices may point into some memtables owned by sv_, so we need to keep // The slices may point into some memtables owned by sv_, so we need to keep
// sv_ referenced until pinned_iters_mgr_ unpins everything. // sv_ referenced until pinned_iters_mgr_ unpins everything.
auto p = new SVCleanupParams{ auto p = new SVCleanupParams{db_, sv_, background_purge};
db_, sv_, read_options_.background_purge_on_iterator_cleanup};
pinned_iters_mgr_->PinPtr(p, &ForwardIterator::DeferredSVCleanup); pinned_iters_mgr_->PinPtr(p, &ForwardIterator::DeferredSVCleanup);
} else { } else {
SVCleanup(db_, sv_, read_options_.background_purge_on_iterator_cleanup); SVCleanup(db_, sv_, background_purge);
} }
} }

@ -1003,6 +1003,14 @@ struct DBOptions {
// Currently, any WAL-enabled writes after atomic flush may be replayed // Currently, any WAL-enabled writes after atomic flush may be replayed
// independently if the process crashes later and tries to recover. // independently if the process crashes later and tries to recover.
bool atomic_flush = false; bool atomic_flush = false;
// If true, ColumnFamilyHandle's and Iterator's destructors won't delete
// obsolete files directly and will instead schedule a background job
// to do it. Use it if you're destroying iterators or ColumnFamilyHandle-s
// from latency-sensitive threads.
// If set to true, takes precedence over
// ReadOptions::background_purge_on_iterator_cleanup.
bool avoid_unnecessary_blocking_io = false;
}; };
// Options to control the behavior of a database (passed to DB::Open) // Options to control the behavior of a database (passed to DB::Open)

@ -86,7 +86,8 @@ ImmutableDBOptions::ImmutableDBOptions(const DBOptions& options)
preserve_deletes(options.preserve_deletes), preserve_deletes(options.preserve_deletes),
two_write_queues(options.two_write_queues), two_write_queues(options.two_write_queues),
manual_wal_flush(options.manual_wal_flush), manual_wal_flush(options.manual_wal_flush),
atomic_flush(options.atomic_flush) { atomic_flush(options.atomic_flush),
avoid_unnecessary_blocking_io(options.avoid_unnecessary_blocking_io) {
} }
void ImmutableDBOptions::Dump(Logger* log) const { void ImmutableDBOptions::Dump(Logger* log) const {
@ -217,6 +218,10 @@ void ImmutableDBOptions::Dump(Logger* log) const {
two_write_queues); two_write_queues);
ROCKS_LOG_HEADER(log, " Options.manual_wal_flush: %d", ROCKS_LOG_HEADER(log, " Options.manual_wal_flush: %d",
manual_wal_flush); manual_wal_flush);
ROCKS_LOG_HEADER(log, " Options.atomic_flush: %d", atomic_flush);
ROCKS_LOG_HEADER(log,
" Options.avoid_unnecessary_blocking_io: %d",
avoid_unnecessary_blocking_io);
} }
MutableDBOptions::MutableDBOptions() MutableDBOptions::MutableDBOptions()

@ -79,6 +79,7 @@ struct ImmutableDBOptions {
bool two_write_queues; bool two_write_queues;
bool manual_wal_flush; bool manual_wal_flush;
bool atomic_flush; bool atomic_flush;
bool avoid_unnecessary_blocking_io;
}; };
struct MutableDBOptions { struct MutableDBOptions {

@ -132,6 +132,8 @@ DBOptions BuildDBOptions(const ImmutableDBOptions& immutable_db_options,
options.two_write_queues = immutable_db_options.two_write_queues; options.two_write_queues = immutable_db_options.two_write_queues;
options.manual_wal_flush = immutable_db_options.manual_wal_flush; options.manual_wal_flush = immutable_db_options.manual_wal_flush;
options.atomic_flush = immutable_db_options.atomic_flush; options.atomic_flush = immutable_db_options.atomic_flush;
options.avoid_unnecessary_blocking_io =
immutable_db_options.avoid_unnecessary_blocking_io;
return options; return options;
} }
@ -1615,7 +1617,12 @@ std::unordered_map<std::string, OptionTypeInfo>
{"atomic_flush", {"atomic_flush",
{offsetof(struct DBOptions, atomic_flush), OptionType::kBoolean, {offsetof(struct DBOptions, atomic_flush), OptionType::kBoolean,
OptionVerificationType::kNormal, false, OptionVerificationType::kNormal, false,
offsetof(struct ImmutableDBOptions, atomic_flush)}}}; offsetof(struct ImmutableDBOptions, atomic_flush)}},
{"avoid_unnecessary_blocking_io",
{offsetof(struct DBOptions, avoid_unnecessary_blocking_io),
OptionType::kBoolean, OptionVerificationType::kNormal, false,
offsetof(struct ImmutableDBOptions, avoid_unnecessary_blocking_io)}}
};
std::unordered_map<std::string, BlockBasedTableOptions::IndexType> std::unordered_map<std::string, BlockBasedTableOptions::IndexType>
OptionsHelper::block_base_table_index_type_string_map = { OptionsHelper::block_base_table_index_type_string_map = {

@ -294,7 +294,8 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) {
"two_write_queues=false;" "two_write_queues=false;"
"manual_wal_flush=false;" "manual_wal_flush=false;"
"seq_per_batch=false;" "seq_per_batch=false;"
"atomic_flush=false", "atomic_flush=false;"
"avoid_unnecessary_blocking_io=false",
new_options)); new_options));
ASSERT_EQ(unset_bytes_base, NumUnsetBytes(new_options_ptr, sizeof(DBOptions), ASSERT_EQ(unset_bytes_base, NumUnsetBytes(new_options_ptr, sizeof(DBOptions),

Loading…
Cancel
Save