DisableManualCompaction may fail to cancel an unscheduled task (#9659)

Summary:
https://github.com/facebook/rocksdb/issues/9625 didn't change the unschedule condition which was waiting for the background thread to clean-up the compaction.
make sure we only unschedule the task when it's scheduled.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9659

Reviewed By: ajkr

Differential Revision: D34651820

Pulled By: jay-zhuang

fbshipit-source-id: 23f42081b15ec8886cd81cbf131b116e0c74dc2f
main
Jay Zhuang 3 years ago committed by Facebook GitHub Bot
parent 09b0e8f2c7
commit 4dff279b19
  1. 2
      HISTORY.md
  2. 1
      db/column_family.cc
  3. 1
      db/compaction/compaction_picker_universal.cc
  4. 106
      db/db_compaction_test.cc
  5. 31
      db/db_impl/db_impl.h
  6. 113
      db/db_impl/db_impl_compaction_flush.cc

@ -13,6 +13,8 @@
* Fixed a data race on `versions_` between `DBImpl::ResumeImpl()` and threads waiting for recovery to complete (#9496) * Fixed a data race on `versions_` between `DBImpl::ResumeImpl()` and threads waiting for recovery to complete (#9496)
* Fixed a bug caused by race among flush, incoming writes and taking snapshots. Queries to snapshots created with these race condition can return incorrect result, e.g. resurfacing deleted data. * Fixed a bug caused by race among flush, incoming writes and taking snapshots. Queries to snapshots created with these race condition can return incorrect result, e.g. resurfacing deleted data.
* Fixed a bug that DB flush uses `options.compression` even `options.compression_per_level` is set. * Fixed a bug that DB flush uses `options.compression` even `options.compression_per_level` is set.
* Fixed a bug that DisableManualCompaction may assert when disable an unscheduled manual compaction.
* Fixed a potential timer crash when open close DB concurrently.
### Public API changes ### Public API changes
* Remove BlockBasedTableOptions.hash_index_allow_collision which already takes no effect. * Remove BlockBasedTableOptions.hash_index_allow_collision which already takes no effect.

@ -989,7 +989,6 @@ WriteStallCondition ColumnFamilyData::RecalculateWriteStallConditions(
GetL0ThresholdSpeedupCompaction( GetL0ThresholdSpeedupCompaction(
mutable_cf_options.level0_file_num_compaction_trigger, mutable_cf_options.level0_file_num_compaction_trigger,
mutable_cf_options.level0_slowdown_writes_trigger)) { mutable_cf_options.level0_slowdown_writes_trigger)) {
fprintf(stdout, "JJJ2\n");
write_controller_token_ = write_controller_token_ =
write_controller->GetCompactionPressureToken(); write_controller->GetCompactionPressureToken();
ROCKS_LOG_INFO( ROCKS_LOG_INFO(

@ -372,7 +372,6 @@ Compaction* UniversalCompactionBuilder::PickCompaction() {
const int kLevel0 = 0; const int kLevel0 = 0;
score_ = vstorage_->CompactionScore(kLevel0); score_ = vstorage_->CompactionScore(kLevel0);
sorted_runs_ = CalculateSortedRuns(*vstorage_); sorted_runs_ = CalculateSortedRuns(*vstorage_);
fprintf(stdout, "JJJ1\n");
if (sorted_runs_.size() == 0 || if (sorted_runs_.size() == 0 ||
(vstorage_->FilesMarkedForPeriodicCompaction().empty() && (vstorage_->FilesMarkedForPeriodicCompaction().empty() &&

@ -6889,6 +6889,112 @@ TEST_F(DBCompactionTest, FIFOWarm) {
Destroy(options); Destroy(options);
} }
TEST_F(DBCompactionTest, DisableMultiManualCompaction) {
const int kNumL0Files = 10;
Options options = CurrentOptions();
options.level0_file_num_compaction_trigger = kNumL0Files;
Reopen(options);
// Generate 2 levels of file to make sure the manual compaction is not skipped
for (int i = 0; i < 10; i++) {
ASSERT_OK(Put(Key(i), "value"));
if (i % 2) {
ASSERT_OK(Flush());
}
}
MoveFilesToLevel(2);
for (int i = 0; i < 10; i++) {
ASSERT_OK(Put(Key(i), "value"));
if (i % 2) {
ASSERT_OK(Flush());
}
}
MoveFilesToLevel(1);
// Block compaction queue
test::SleepingBackgroundTask sleeping_task_low;
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
Env::Priority::LOW);
port::Thread compact_thread1([&]() {
CompactRangeOptions cro;
cro.exclusive_manual_compaction = false;
std::string begin_str = Key(0);
std::string end_str = Key(3);
Slice b = begin_str;
Slice e = end_str;
auto s = db_->CompactRange(cro, &b, &e);
ASSERT_TRUE(s.IsIncomplete());
});
port::Thread compact_thread2([&]() {
CompactRangeOptions cro;
cro.exclusive_manual_compaction = false;
std::string begin_str = Key(4);
std::string end_str = Key(7);
Slice b = begin_str;
Slice e = end_str;
auto s = db_->CompactRange(cro, &b, &e);
ASSERT_TRUE(s.IsIncomplete());
});
// Disable manual compaction should cancel both manual compactions and both
// compaction should return incomplete.
db_->DisableManualCompaction();
compact_thread1.join();
compact_thread2.join();
sleeping_task_low.WakeUp();
sleeping_task_low.WaitUntilDone();
ASSERT_OK(dbfull()->TEST_WaitForCompact(true));
}
TEST_F(DBCompactionTest, DisableJustStartedManualCompaction) {
const int kNumL0Files = 4;
Options options = CurrentOptions();
options.level0_file_num_compaction_trigger = kNumL0Files;
Reopen(options);
// generate files, but avoid trigger auto compaction
for (int i = 0; i < kNumL0Files / 2; i++) {
ASSERT_OK(Put(Key(1), "value1"));
ASSERT_OK(Put(Key(2), "value2"));
ASSERT_OK(Flush());
}
// make sure the manual compaction background is started but not yet set the
// status to in_progress, then cancel the manual compaction, which should not
// result in segfault
SyncPoint::GetInstance()->LoadDependency(
{{"DBImpl::BGWorkCompaction",
"DBCompactionTest::DisableJustStartedManualCompaction:"
"PreDisableManualCompaction"},
{"DBCompactionTest::DisableJustStartedManualCompaction:"
"ManualCompactionReturn",
"BackgroundCallCompaction:0"}});
SyncPoint::GetInstance()->EnableProcessing();
port::Thread compact_thread([&]() {
CompactRangeOptions cro;
cro.exclusive_manual_compaction = true;
auto s = db_->CompactRange(cro, nullptr, nullptr);
ASSERT_TRUE(s.IsIncomplete());
TEST_SYNC_POINT(
"DBCompactionTest::DisableJustStartedManualCompaction:"
"ManualCompactionReturn");
});
TEST_SYNC_POINT(
"DBCompactionTest::DisableJustStartedManualCompaction:"
"PreDisableManualCompaction");
db_->DisableManualCompaction();
compact_thread.join();
}
TEST_F(DBCompactionTest, DisableManualCompactionThreadQueueFull) { TEST_F(DBCompactionTest, DisableManualCompactionThreadQueueFull) {
const int kNumL0Files = 4; const int kNumL0Files = 4;

@ -1516,19 +1516,31 @@ class DBImpl : public DB {
// Information for a manual compaction // Information for a manual compaction
struct ManualCompactionState { struct ManualCompactionState {
ManualCompactionState(ColumnFamilyData* _cfd, int _input_level,
int _output_level, uint32_t _output_path_id,
bool _exclusive, bool _disallow_trivial_move,
std::atomic<bool>* _canceled)
: cfd(_cfd),
input_level(_input_level),
output_level(_output_level),
output_path_id(_output_path_id),
exclusive(_exclusive),
disallow_trivial_move(_disallow_trivial_move),
canceled(_canceled) {}
ColumnFamilyData* cfd; ColumnFamilyData* cfd;
int input_level; int input_level;
int output_level; int output_level;
uint32_t output_path_id; uint32_t output_path_id;
Status status; Status status;
bool done; bool done = false;
bool in_progress; // compaction request being processed? bool in_progress = false; // compaction request being processed?
bool incomplete; // only part of requested range compacted bool incomplete = false; // only part of requested range compacted
bool exclusive; // current behavior of only one manual bool exclusive; // current behavior of only one manual
bool disallow_trivial_move; // Force actual compaction to run bool disallow_trivial_move; // Force actual compaction to run
const InternalKey* begin; // nullptr means beginning of key range const InternalKey* begin = nullptr; // nullptr means beginning of key range
const InternalKey* end; // nullptr means end of key range const InternalKey* end = nullptr; // nullptr means end of key range
InternalKey* manual_end; // how far we are compacting InternalKey* manual_end = nullptr; // how far we are compacting
InternalKey tmp_storage; // Used to keep track of compaction progress InternalKey tmp_storage; // Used to keep track of compaction progress
InternalKey tmp_storage1; // Used to keep track of compaction progress InternalKey tmp_storage1; // Used to keep track of compaction progress
std::atomic<bool>* canceled; // Compaction canceled by the user? std::atomic<bool>* canceled; // Compaction canceled by the user?
@ -1538,7 +1550,8 @@ class DBImpl : public DB {
Compaction* compaction; Compaction* compaction;
// caller retains ownership of `manual_compaction_state` as it is reused // caller retains ownership of `manual_compaction_state` as it is reused
// across background compactions. // across background compactions.
ManualCompactionState* manual_compaction_state; // nullptr if non-manual std::shared_ptr<ManualCompactionState>
manual_compaction_state; // nullptr if non-manual
// task limiter token is requested during compaction picking. // task limiter token is requested during compaction picking.
std::unique_ptr<TaskLimiterToken> task_token; std::unique_ptr<TaskLimiterToken> task_token;
}; };

@ -285,7 +285,6 @@ Status DBImpl::FlushMemTableToOutputFile(
assert(storage_info); assert(storage_info);
VersionStorageInfo::LevelSummaryStorage tmp; VersionStorageInfo::LevelSummaryStorage tmp;
fprintf(stdout, "JJJ4\n");
ROCKS_LOG_BUFFER(log_buffer, "[%s] Level summary: %s\n", ROCKS_LOG_BUFFER(log_buffer, "[%s] Level summary: %s\n",
column_family_name.c_str(), column_family_name.c_str(),
storage_info->LevelSummary(&tmp)); storage_info->LevelSummary(&tmp));
@ -731,7 +730,6 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
assert(storage_info); assert(storage_info);
VersionStorageInfo::LevelSummaryStorage tmp; VersionStorageInfo::LevelSummaryStorage tmp;
fprintf(stdout, "JJJ3\n");
ROCKS_LOG_BUFFER(log_buffer, "[%s] Level summary: %s\n", ROCKS_LOG_BUFFER(log_buffer, "[%s] Level summary: %s\n",
column_family_name.c_str(), column_family_name.c_str(),
storage_info->LevelSummary(&tmp)); storage_info->LevelSummary(&tmp));
@ -1800,34 +1798,27 @@ Status DBImpl::RunManualCompaction(
bool scheduled = false; bool scheduled = false;
Env::Priority thread_pool_priority = Env::Priority::TOTAL; Env::Priority thread_pool_priority = Env::Priority::TOTAL;
bool manual_conflict = false; bool manual_conflict = false;
ManualCompactionState manual;
manual.cfd = cfd; auto manual = std::make_shared<ManualCompactionState>(
manual.input_level = input_level; cfd, input_level, output_level, compact_range_options.target_path_id,
manual.output_level = output_level; exclusive, disallow_trivial_move, compact_range_options.canceled);
manual.output_path_id = compact_range_options.target_path_id;
manual.done = false;
manual.in_progress = false;
manual.incomplete = false;
manual.exclusive = exclusive;
manual.disallow_trivial_move = disallow_trivial_move;
manual.canceled = compact_range_options.canceled;
// For universal compaction, we enforce every manual compaction to compact // For universal compaction, we enforce every manual compaction to compact
// all files. // all files.
if (begin == nullptr || if (begin == nullptr ||
cfd->ioptions()->compaction_style == kCompactionStyleUniversal || cfd->ioptions()->compaction_style == kCompactionStyleUniversal ||
cfd->ioptions()->compaction_style == kCompactionStyleFIFO) { cfd->ioptions()->compaction_style == kCompactionStyleFIFO) {
manual.begin = nullptr; manual->begin = nullptr;
} else { } else {
begin_storage.SetMinPossibleForUserKey(*begin); begin_storage.SetMinPossibleForUserKey(*begin);
manual.begin = &begin_storage; manual->begin = &begin_storage;
} }
if (end == nullptr || if (end == nullptr ||
cfd->ioptions()->compaction_style == kCompactionStyleUniversal || cfd->ioptions()->compaction_style == kCompactionStyleUniversal ||
cfd->ioptions()->compaction_style == kCompactionStyleFIFO) { cfd->ioptions()->compaction_style == kCompactionStyleFIFO) {
manual.end = nullptr; manual->end = nullptr;
} else { } else {
end_storage.SetMaxPossibleForUserKey(*end); end_storage.SetMaxPossibleForUserKey(*end);
manual.end = &end_storage; manual->end = &end_storage;
} }
TEST_SYNC_POINT("DBImpl::RunManualCompaction:0"); TEST_SYNC_POINT("DBImpl::RunManualCompaction:0");
@ -1839,10 +1830,10 @@ Status DBImpl::RunManualCompaction(
// `DisableManualCompaction()` just waited for the manual compaction queue // `DisableManualCompaction()` just waited for the manual compaction queue
// to drain. So return immediately. // to drain. So return immediately.
TEST_SYNC_POINT("DBImpl::RunManualCompaction:PausedAtStart"); TEST_SYNC_POINT("DBImpl::RunManualCompaction:PausedAtStart");
manual.status = manual->status =
Status::Incomplete(Status::SubCode::kManualCompactionPaused); Status::Incomplete(Status::SubCode::kManualCompactionPaused);
manual.done = true; manual->done = true;
return manual.status; return manual->status;
} }
// When a manual compaction arrives, temporarily disable scheduling of // When a manual compaction arrives, temporarily disable scheduling of
@ -1862,7 +1853,7 @@ Status DBImpl::RunManualCompaction(
// However, only one of them will actually schedule compaction, while // However, only one of them will actually schedule compaction, while
// others will wait on a condition variable until it completes. // others will wait on a condition variable until it completes.
AddManualCompaction(&manual); AddManualCompaction(manual.get());
TEST_SYNC_POINT_CALLBACK("DBImpl::RunManualCompaction:NotScheduled", &mutex_); TEST_SYNC_POINT_CALLBACK("DBImpl::RunManualCompaction:NotScheduled", &mutex_);
if (exclusive) { if (exclusive) {
// Limitation: there's no way to wake up the below loop when user sets // Limitation: there's no way to wake up the below loop when user sets
@ -1871,11 +1862,11 @@ Status DBImpl::RunManualCompaction(
while (bg_bottom_compaction_scheduled_ > 0 || while (bg_bottom_compaction_scheduled_ > 0 ||
bg_compaction_scheduled_ > 0) { bg_compaction_scheduled_ > 0) {
if (manual_compaction_paused_ > 0 || if (manual_compaction_paused_ > 0 ||
(manual.canceled != nullptr && *manual.canceled == true)) { (manual->canceled != nullptr && *manual->canceled == true)) {
// Pretend the error came from compaction so the below cleanup/error // Pretend the error came from compaction so the below cleanup/error
// handling code can process it. // handling code can process it.
manual.done = true; manual->done = true;
manual.status = manual->status =
Status::Incomplete(Status::SubCode::kManualCompactionPaused); Status::Incomplete(Status::SubCode::kManualCompactionPaused);
break; break;
} }
@ -1897,56 +1888,64 @@ Status DBImpl::RunManualCompaction(
// We don't check bg_error_ here, because if we get the error in compaction, // We don't check bg_error_ here, because if we get the error in compaction,
// the compaction will set manual.status to bg_error_ and set manual.done to // the compaction will set manual.status to bg_error_ and set manual.done to
// true. // true.
while (!manual.done) { while (!manual->done) {
assert(HasPendingManualCompaction()); assert(HasPendingManualCompaction());
manual_conflict = false; manual_conflict = false;
Compaction* compaction = nullptr; Compaction* compaction = nullptr;
if (ShouldntRunManualCompaction(&manual) || (manual.in_progress == true) || if (ShouldntRunManualCompaction(manual.get()) ||
scheduled || (manual->in_progress == true) || scheduled ||
(((manual.manual_end = &manual.tmp_storage1) != nullptr) && (((manual->manual_end = &manual->tmp_storage1) != nullptr) &&
((compaction = manual.cfd->CompactRange( ((compaction = manual->cfd->CompactRange(
*manual.cfd->GetLatestMutableCFOptions(), mutable_db_options_, *manual->cfd->GetLatestMutableCFOptions(), mutable_db_options_,
manual.input_level, manual.output_level, compact_range_options, manual->input_level, manual->output_level, compact_range_options,
manual.begin, manual.end, &manual.manual_end, &manual_conflict, manual->begin, manual->end, &manual->manual_end,
max_file_num_to_ignore, trim_ts)) == nullptr && &manual_conflict, max_file_num_to_ignore, trim_ts)) == nullptr &&
manual_conflict))) { manual_conflict))) {
// exclusive manual compactions should not see a conflict during // exclusive manual compactions should not see a conflict during
// CompactRange // CompactRange
assert(!exclusive || !manual_conflict); assert(!exclusive || !manual_conflict);
// Running either this or some other manual compaction // Running either this or some other manual compaction
bg_cv_.Wait(); bg_cv_.Wait();
if (manual_compaction_paused_ > 0 && !manual.done && if (manual_compaction_paused_ > 0) {
!manual.in_progress) { manual->done = true;
manual.done = true; manual->status =
manual.status =
Status::Incomplete(Status::SubCode::kManualCompactionPaused); Status::Incomplete(Status::SubCode::kManualCompactionPaused);
assert(thread_pool_priority != Env::Priority::TOTAL); if (scheduled) {
env_->UnSchedule(GetTaskTag(TaskType::kManualCompaction), assert(thread_pool_priority != Env::Priority::TOTAL);
thread_pool_priority); auto unscheduled_task_num = env_->UnSchedule(
GetTaskTag(TaskType::kManualCompaction), thread_pool_priority);
if (unscheduled_task_num > 0) {
ROCKS_LOG_INFO(
immutable_db_options_.info_log,
"[%s] Unscheduled %d number of manual compactions from the "
"thread-pool",
cfd->GetName().c_str(), unscheduled_task_num);
}
}
break; break;
} }
if (scheduled && manual.incomplete == true) { if (scheduled && manual->incomplete == true) {
assert(!manual.in_progress); assert(!manual->in_progress);
scheduled = false; scheduled = false;
manual.incomplete = false; manual->incomplete = false;
} }
} else if (!scheduled) { } else if (!scheduled) {
if (compaction == nullptr) { if (compaction == nullptr) {
manual.done = true; manual->done = true;
bg_cv_.SignalAll(); bg_cv_.SignalAll();
continue; continue;
} }
ca = new CompactionArg; ca = new CompactionArg;
ca->db = this; ca->db = this;
ca->prepicked_compaction = new PrepickedCompaction; ca->prepicked_compaction = new PrepickedCompaction;
ca->prepicked_compaction->manual_compaction_state = &manual; ca->prepicked_compaction->manual_compaction_state = manual;
ca->prepicked_compaction->compaction = compaction; ca->prepicked_compaction->compaction = compaction;
if (!RequestCompactionToken( if (!RequestCompactionToken(
cfd, true, &ca->prepicked_compaction->task_token, &log_buffer)) { cfd, true, &ca->prepicked_compaction->task_token, &log_buffer)) {
// Don't throttle manual compaction, only count outstanding tasks. // Don't throttle manual compaction, only count outstanding tasks.
assert(false); assert(false);
} }
manual.incomplete = false; manual->incomplete = false;
if (compaction->bottommost_level() && if (compaction->bottommost_level() &&
env_->GetBackgroundThreads(Env::Priority::BOTTOM) > 0) { env_->GetBackgroundThreads(Env::Priority::BOTTOM) > 0) {
bg_bottom_compaction_scheduled_++; bg_bottom_compaction_scheduled_++;
@ -1970,18 +1969,18 @@ Status DBImpl::RunManualCompaction(
} }
log_buffer.FlushBufferToLog(); log_buffer.FlushBufferToLog();
assert(!manual.in_progress); assert(!manual->in_progress);
assert(HasPendingManualCompaction()); assert(HasPendingManualCompaction());
RemoveManualCompaction(&manual); RemoveManualCompaction(manual.get());
// if the manual job is unscheduled, try schedule other jobs in case there's // if the manual job is unscheduled, try schedule other jobs in case there's
// any unscheduled compaction job which was blocked by exclusive manual // any unscheduled compaction job which was blocked by exclusive manual
// compaction. // compaction.
if (manual.status.IsIncomplete() && if (manual->status.IsIncomplete() &&
manual.status.subcode() == Status::SubCode::kManualCompactionPaused) { manual->status.subcode() == Status::SubCode::kManualCompactionPaused) {
MaybeScheduleFlushOrCompaction(); MaybeScheduleFlushOrCompaction();
} }
bg_cv_.SignalAll(); bg_cv_.SignalAll();
return manual.status; return manual->status;
} }
void DBImpl::GenerateFlushRequest(const autovector<ColumnFamilyData*>& cfds, void DBImpl::GenerateFlushRequest(const autovector<ColumnFamilyData*>& cfds,
@ -2949,7 +2948,7 @@ void DBImpl::BackgroundCallCompaction(PrepickedCompaction* prepicked_compaction,
mutex_.Lock(); mutex_.Lock();
} else if (s.IsManualCompactionPaused()) { } else if (s.IsManualCompactionPaused()) {
assert(prepicked_compaction); assert(prepicked_compaction);
ManualCompactionState* m = prepicked_compaction->manual_compaction_state; auto m = prepicked_compaction->manual_compaction_state;
assert(m); assert(m);
ROCKS_LOG_BUFFER(&log_buffer, "[%s] [JOB %d] Manual compaction paused", ROCKS_LOG_BUFFER(&log_buffer, "[%s] [JOB %d] Manual compaction paused",
m->cfd->GetName().c_str(), job_context.job_id); m->cfd->GetName().c_str(), job_context.job_id);
@ -3031,7 +3030,7 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
LogBuffer* log_buffer, LogBuffer* log_buffer,
PrepickedCompaction* prepicked_compaction, PrepickedCompaction* prepicked_compaction,
Env::Priority thread_pri) { Env::Priority thread_pri) {
ManualCompactionState* manual_compaction = std::shared_ptr<ManualCompactionState> manual_compaction =
prepicked_compaction == nullptr prepicked_compaction == nullptr
? nullptr ? nullptr
: prepicked_compaction->manual_compaction_state; : prepicked_compaction->manual_compaction_state;
@ -3075,6 +3074,10 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
if (!status.ok()) { if (!status.ok()) {
if (is_manual) { if (is_manual) {
manual_compaction->status = status; manual_compaction->status = status;
manual_compaction->status
.PermitUncheckedError(); // the manual compaction thread may exit
// first, which won't be able to check the
// status
manual_compaction->done = true; manual_compaction->done = true;
manual_compaction->in_progress = false; manual_compaction->in_progress = false;
manual_compaction = nullptr; manual_compaction = nullptr;
@ -3097,7 +3100,7 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
// InternalKey* manual_end = &manual_end_storage; // InternalKey* manual_end = &manual_end_storage;
bool sfm_reserved_compact_space = false; bool sfm_reserved_compact_space = false;
if (is_manual) { if (is_manual) {
ManualCompactionState* m = manual_compaction; auto m = manual_compaction;
assert(m->in_progress); assert(m->in_progress);
if (!c) { if (!c) {
m->done = true; m->done = true;
@ -3481,7 +3484,7 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
c.reset(); c.reset();
if (is_manual) { if (is_manual) {
ManualCompactionState* m = manual_compaction; auto m = manual_compaction;
if (!status.ok()) { if (!status.ok()) {
m->status = status; m->status = status;
m->done = true; m->done = true;

Loading…
Cancel
Save