Abort compactions more reliably when closing DB

Summary:
DB shutdown aborts running compactions by setting an atomic shutting_down=true that CompactionJob periodically checks. Without this PR it checks it before processing every _output_ value. If compaction filter filters everything out, the compaction is uninterruptible. This PR adds checks for shutting_down on every _input_ value (in CompactionIterator and MergeHelper).

There's also some minor code cleanup along the way.
Closes https://github.com/facebook/rocksdb/pull/1639

Differential Revision: D4306571

Pulled By: yiwu-arbug

fbshipit-source-id: f050890
main
Mike Kolupaev 8 years ago committed by Facebook Github Bot
parent 62384ebe9c
commit d18dd2c41f
  1. 28
      db/compaction_iterator.cc
  2. 12
      db/compaction_iterator.h
  3. 165
      db/compaction_iterator_test.cc
  4. 40
      db/compaction_job.cc
  5. 4
      db/compaction_job.h
  6. 5
      db/db_impl.cc
  7. 8
      db/merge_helper.cc
  8. 14
      db/merge_helper.h
  9. 6
      include/rocksdb/compaction_filter.h

@ -16,14 +16,14 @@ CompactionIterator::CompactionIterator(
SequenceNumber earliest_write_conflict_snapshot, Env* env, SequenceNumber earliest_write_conflict_snapshot, Env* env,
bool expect_valid_internal_key, RangeDelAggregator* range_del_agg, bool expect_valid_internal_key, RangeDelAggregator* range_del_agg,
const Compaction* compaction, const CompactionFilter* compaction_filter, const Compaction* compaction, const CompactionFilter* compaction_filter,
LogBuffer* log_buffer) const std::atomic<bool>* shutting_down)
: CompactionIterator( : CompactionIterator(
input, cmp, merge_helper, last_sequence, snapshots, input, cmp, merge_helper, last_sequence, snapshots,
earliest_write_conflict_snapshot, env, expect_valid_internal_key, earliest_write_conflict_snapshot, env, expect_valid_internal_key,
range_del_agg, range_del_agg,
std::unique_ptr<CompactionProxy>( std::unique_ptr<CompactionProxy>(
compaction ? new CompactionProxy(compaction) : nullptr), compaction ? new CompactionProxy(compaction) : nullptr),
compaction_filter, log_buffer) {} compaction_filter, shutting_down) {}
CompactionIterator::CompactionIterator( CompactionIterator::CompactionIterator(
InternalIterator* input, const Comparator* cmp, MergeHelper* merge_helper, InternalIterator* input, const Comparator* cmp, MergeHelper* merge_helper,
@ -31,7 +31,8 @@ CompactionIterator::CompactionIterator(
SequenceNumber earliest_write_conflict_snapshot, Env* env, SequenceNumber earliest_write_conflict_snapshot, Env* env,
bool expect_valid_internal_key, RangeDelAggregator* range_del_agg, bool expect_valid_internal_key, RangeDelAggregator* range_del_agg,
std::unique_ptr<CompactionProxy> compaction, std::unique_ptr<CompactionProxy> compaction,
const CompactionFilter* compaction_filter, LogBuffer* log_buffer) const CompactionFilter* compaction_filter,
const std::atomic<bool>* shutting_down)
: input_(input), : input_(input),
cmp_(cmp), cmp_(cmp),
merge_helper_(merge_helper), merge_helper_(merge_helper),
@ -42,7 +43,7 @@ CompactionIterator::CompactionIterator(
range_del_agg_(range_del_agg), range_del_agg_(range_del_agg),
compaction_(std::move(compaction)), compaction_(std::move(compaction)),
compaction_filter_(compaction_filter), compaction_filter_(compaction_filter),
log_buffer_(log_buffer), shutting_down_(shutting_down),
merge_out_iter_(merge_helper_) { merge_out_iter_(merge_helper_) {
assert(compaction_filter_ == nullptr || compaction_ != nullptr); assert(compaction_filter_ == nullptr || compaction_ != nullptr);
bottommost_level_ = bottommost_level_ =
@ -136,7 +137,7 @@ void CompactionIterator::NextFromInput() {
at_next_ = false; at_next_ = false;
valid_ = false; valid_ = false;
while (!valid_ && input_->Valid()) { while (!valid_ && input_->Valid() && !IsShuttingDown()) {
key_ = input_->key(); key_ = input_->key();
value_ = input_->value(); value_ = input_->value();
iter_stats_.num_input_records++; iter_stats_.num_input_records++;
@ -217,7 +218,8 @@ void CompactionIterator::NextFromInput() {
} }
if (filter == CompactionFilter::Decision::kRemove) { if (filter == CompactionFilter::Decision::kRemove) {
// convert the current key to a delete // convert the current key to a delete; key_ is pointing into
// current_key_ at this point, so updating current_key_ updates key()
ikey_.type = kTypeDeletion; ikey_.type = kTypeDeletion;
current_key_.UpdateInternalKey(ikey_.sequence, kTypeDeletion); current_key_.UpdateInternalKey(ikey_.sequence, kTypeDeletion);
// no value associated with delete // no value associated with delete
@ -422,7 +424,6 @@ void CompactionIterator::NextFromInput() {
input_->Next(); input_->Next();
} else if (ikey_.type == kTypeMerge) { } else if (ikey_.type == kTypeMerge) {
if (!merge_helper_->HasOperator()) { if (!merge_helper_->HasOperator()) {
LogToBuffer(log_buffer_, "Options::merge_operator is null.");
status_ = Status::InvalidArgument( status_ = Status::InvalidArgument(
"merge_operator is not properly initialized."); "merge_operator is not properly initialized.");
return; return;
@ -433,11 +434,14 @@ void CompactionIterator::NextFromInput() {
// have hit (A) // have hit (A)
// We encapsulate the merge related state machine in a different // We encapsulate the merge related state machine in a different
// object to minimize change to the existing flow. // object to minimize change to the existing flow.
merge_helper_->MergeUntil(input_, range_del_agg_, prev_snapshot, Status s = merge_helper_->MergeUntil(input_, range_del_agg_,
bottommost_level_); prev_snapshot, bottommost_level_);
merge_out_iter_.SeekToFirst(); merge_out_iter_.SeekToFirst();
if (merge_out_iter_.Valid()) { if (!s.ok() && !s.IsMergeInProgress()) {
status_ = s;
return;
} else if (merge_out_iter_.Valid()) {
// NOTE: key, value, and ikey_ refer to old entries. // NOTE: key, value, and ikey_ refer to old entries.
// These will be correctly set below. // These will be correctly set below.
key_ = merge_out_iter_.key(); key_ = merge_out_iter_.key();
@ -481,6 +485,10 @@ void CompactionIterator::NextFromInput() {
input_->Seek(skip_until); input_->Seek(skip_until);
} }
} }
if (!valid_ && IsShuttingDown()) {
status_ = Status::ShutdownInProgress();
}
} }
void CompactionIterator::PrepareOutput() { void CompactionIterator::PrepareOutput() {

@ -17,7 +17,6 @@
#include "db/pinned_iterators_manager.h" #include "db/pinned_iterators_manager.h"
#include "db/range_del_aggregator.h" #include "db/range_del_aggregator.h"
#include "rocksdb/compaction_filter.h" #include "rocksdb/compaction_filter.h"
#include "util/log_buffer.h"
namespace rocksdb { namespace rocksdb {
@ -61,7 +60,7 @@ class CompactionIterator {
RangeDelAggregator* range_del_agg, RangeDelAggregator* range_del_agg,
const Compaction* compaction = nullptr, const Compaction* compaction = nullptr,
const CompactionFilter* compaction_filter = nullptr, const CompactionFilter* compaction_filter = nullptr,
LogBuffer* log_buffer = nullptr); const std::atomic<bool>* shutting_down = nullptr);
// Constructor with custom CompactionProxy, used for tests. // Constructor with custom CompactionProxy, used for tests.
CompactionIterator(InternalIterator* input, const Comparator* cmp, CompactionIterator(InternalIterator* input, const Comparator* cmp,
@ -72,7 +71,7 @@ class CompactionIterator {
RangeDelAggregator* range_del_agg, RangeDelAggregator* range_del_agg,
std::unique_ptr<CompactionProxy> compaction, std::unique_ptr<CompactionProxy> compaction,
const CompactionFilter* compaction_filter = nullptr, const CompactionFilter* compaction_filter = nullptr,
LogBuffer* log_buffer = nullptr); const std::atomic<bool>* shutting_down = nullptr);
~CompactionIterator(); ~CompactionIterator();
@ -125,7 +124,7 @@ class CompactionIterator {
RangeDelAggregator* range_del_agg_; RangeDelAggregator* range_del_agg_;
std::unique_ptr<CompactionProxy> compaction_; std::unique_ptr<CompactionProxy> compaction_;
const CompactionFilter* compaction_filter_; const CompactionFilter* compaction_filter_;
LogBuffer* log_buffer_; const std::atomic<bool>* shutting_down_;
bool bottommost_level_; bool bottommost_level_;
bool valid_ = false; bool valid_ = false;
bool visible_at_tip_; bool visible_at_tip_;
@ -180,5 +179,10 @@ class CompactionIterator {
// is in or beyond the last file checked during the previous call // is in or beyond the last file checked during the previous call
std::vector<size_t> level_ptrs_; std::vector<size_t> level_ptrs_;
CompactionIterationStats iter_stats_; CompactionIterationStats iter_stats_;
bool IsShuttingDown() {
// This is a best-effort facility, so memory_order_relaxed is sufficient.
return shutting_down_ && shutting_down_->load(std::memory_order_relaxed);
}
}; };
} // namespace rocksdb } // namespace rocksdb

@ -13,6 +13,64 @@
namespace rocksdb { namespace rocksdb {
// Expects no merging attempts.
class NoMergingMergeOp : public MergeOperator {
public:
bool FullMergeV2(const MergeOperationInput& merge_in,
MergeOperationOutput* merge_out) const override {
ADD_FAILURE();
return false;
}
bool PartialMergeMulti(const Slice& key,
const std::deque<Slice>& operand_list,
std::string* new_value,
Logger* logger) const override {
ADD_FAILURE();
return false;
}
const char* Name() const override {
return "CompactionIteratorTest NoMergingMergeOp";
}
};
// Compaction filter that gets stuck when it sees a particular key,
// then gets unstuck when told to.
// Always returns Decition::kRemove.
class StallingFilter : public CompactionFilter {
public:
virtual Decision FilterV2(int level, const Slice& key, ValueType t,
const Slice& existing_value, std::string* new_value,
std::string* skip_until) const override {
int k = std::atoi(key.ToString().c_str());
last_seen.store(k);
while (k >= stall_at.load()) {
std::this_thread::yield();
}
return Decision::kRemove;
}
const char* Name() const override {
return "CompactionIteratorTest StallingFilter";
}
// Wait until the filter sees a key >= k and stalls at that key.
// If `exact`, asserts that the seen key is equal to k.
void WaitForStall(int k, bool exact = true) {
stall_at.store(k);
while (last_seen.load() < k) {
std::this_thread::yield();
}
if (exact) {
EXPECT_EQ(k, last_seen.load());
}
}
// Filter will stall on key >= stall_at. Advance stall_at to unstall.
mutable std::atomic<int> stall_at{0};
// Last key the filter was called with.
mutable std::atomic<int> last_seen{0};
};
class LoggingForwardVectorIterator : public InternalIterator { class LoggingForwardVectorIterator : public InternalIterator {
public: public:
struct Action { struct Action {
@ -88,13 +146,15 @@ class FakeCompaction : public CompactionIterator::CompactionProxy {
virtual int level(size_t compaction_input_level) const { return 0; } virtual int level(size_t compaction_input_level) const { return 0; }
virtual bool KeyNotExistsBeyondOutputLevel( virtual bool KeyNotExistsBeyondOutputLevel(
const Slice& user_key, std::vector<size_t>* level_ptrs) const { const Slice& user_key, std::vector<size_t>* level_ptrs) const {
return false; return key_not_exists_beyond_output_level;
} }
virtual bool bottommost_level() const { return false; } virtual bool bottommost_level() const { return false; }
virtual int number_levels() const { return 1; } virtual int number_levels() const { return 1; }
virtual Slice GetLargestUserKey() const { virtual Slice GetLargestUserKey() const {
return "\xff\xff\xff\xff\xff\xff\xff\xff\xff"; return "\xff\xff\xff\xff\xff\xff\xff\xff\xff";
} }
bool key_not_exists_beyond_output_level = false;
}; };
class CompactionIteratorTest : public testing::Test { class CompactionIteratorTest : public testing::Test {
@ -116,17 +176,19 @@ class CompactionIteratorTest : public testing::Test {
std::unique_ptr<CompactionIterator::CompactionProxy> compaction; std::unique_ptr<CompactionIterator::CompactionProxy> compaction;
if (filter) { if (filter) {
compaction.reset(new FakeCompaction()); compaction_proxy_ = new FakeCompaction();
compaction.reset(compaction_proxy_);
} }
merge_helper_.reset(new MergeHelper(Env::Default(), cmp_, merge_op, filter, merge_helper_.reset(new MergeHelper(Env::Default(), cmp_, merge_op, filter,
nullptr, 0U, false, 0)); nullptr, 0U, false, 0, 0, nullptr,
&shutting_down_));
iter_.reset(new LoggingForwardVectorIterator(ks, vs)); iter_.reset(new LoggingForwardVectorIterator(ks, vs));
iter_->SeekToFirst(); iter_->SeekToFirst();
c_iter_.reset(new CompactionIterator( c_iter_.reset(new CompactionIterator(
iter_.get(), cmp_, merge_helper_.get(), last_sequence, &snapshots_, iter_.get(), cmp_, merge_helper_.get(), last_sequence, &snapshots_,
kMaxSequenceNumber, Env::Default(), false, range_del_agg_.get(), kMaxSequenceNumber, Env::Default(), false, range_del_agg_.get(),
std::move(compaction), filter)); std::move(compaction), filter, &shutting_down_));
} }
void AddSnapshot(SequenceNumber snapshot) { snapshots_.push_back(snapshot); } void AddSnapshot(SequenceNumber snapshot) { snapshots_.push_back(snapshot); }
@ -138,6 +200,8 @@ class CompactionIteratorTest : public testing::Test {
std::unique_ptr<LoggingForwardVectorIterator> iter_; std::unique_ptr<LoggingForwardVectorIterator> iter_;
std::unique_ptr<CompactionIterator> c_iter_; std::unique_ptr<CompactionIterator> c_iter_;
std::unique_ptr<RangeDelAggregator> range_del_agg_; std::unique_ptr<RangeDelAggregator> range_del_agg_;
std::atomic<bool> shutting_down_{false};
FakeCompaction* compaction_proxy_;
}; };
// It is possible that the output of the compaction iterator is empty even if // It is possible that the output of the compaction iterator is empty even if
@ -209,26 +273,6 @@ TEST_F(CompactionIteratorTest, RangeDeletionWithSnapshots) {
} }
TEST_F(CompactionIteratorTest, CompactionFilterSkipUntil) { TEST_F(CompactionIteratorTest, CompactionFilterSkipUntil) {
// Expect no merging attempts.
class MergeOp : public MergeOperator {
public:
bool FullMergeV2(const MergeOperationInput& merge_in,
MergeOperationOutput* merge_out) const override {
ADD_FAILURE();
return false;
}
bool PartialMergeMulti(const Slice& key,
const std::deque<Slice>& operand_list,
std::string* new_value,
Logger* logger) const override {
ADD_FAILURE();
return false;
}
const char* Name() const override {
return "CompactionIteratorTest.CompactionFilterSkipUntil::MergeOp";
}
};
class Filter : public CompactionFilter { class Filter : public CompactionFilter {
virtual Decision FilterV2(int level, const Slice& key, ValueType t, virtual Decision FilterV2(int level, const Slice& key, ValueType t,
const Slice& existing_value, const Slice& existing_value,
@ -286,7 +330,7 @@ TEST_F(CompactionIteratorTest, CompactionFilterSkipUntil) {
} }
}; };
MergeOp merge_op; NoMergingMergeOp merge_op;
Filter filter; Filter filter;
InitIterators( InitIterators(
{test::KeyStr("a", 50, kTypeValue), // keep {test::KeyStr("a", 50, kTypeValue), // keep
@ -338,6 +382,77 @@ TEST_F(CompactionIteratorTest, CompactionFilterSkipUntil) {
ASSERT_EQ(expected_actions, iter_->log); ASSERT_EQ(expected_actions, iter_->log);
} }
TEST_F(CompactionIteratorTest, ShuttingDownInFilter) {
NoMergingMergeOp merge_op;
StallingFilter filter;
InitIterators(
{test::KeyStr("1", 1, kTypeValue), test::KeyStr("2", 2, kTypeValue),
test::KeyStr("3", 3, kTypeValue), test::KeyStr("4", 4, kTypeValue)},
{"v1", "v2", "v3", "v4"}, {}, {}, kMaxSequenceNumber, &merge_op, &filter);
// Don't leave tombstones (kTypeDeletion) for filtered keys.
compaction_proxy_->key_not_exists_beyond_output_level = true;
std::atomic<bool> seek_done{false};
std::thread compaction_thread([&] {
c_iter_->SeekToFirst();
EXPECT_FALSE(c_iter_->Valid());
EXPECT_TRUE(c_iter_->status().IsShutdownInProgress());
seek_done.store(true);
});
// Let key 1 through.
filter.WaitForStall(1);
// Shutdown during compaction filter call for key 2.
filter.WaitForStall(2);
shutting_down_.store(true);
EXPECT_FALSE(seek_done.load());
// Unstall filter and wait for SeekToFirst() to return.
filter.stall_at.store(3);
compaction_thread.join();
assert(seek_done.load());
// Check that filter was never called again.
EXPECT_EQ(2, filter.last_seen.load());
}
// Same as ShuttingDownInFilter, but shutdown happens during filter call for
// a merge operand, not for a value.
TEST_F(CompactionIteratorTest, ShuttingDownInMerge) {
NoMergingMergeOp merge_op;
StallingFilter filter;
InitIterators(
{test::KeyStr("1", 1, kTypeValue), test::KeyStr("2", 2, kTypeMerge),
test::KeyStr("3", 3, kTypeMerge), test::KeyStr("4", 4, kTypeValue)},
{"v1", "v2", "v3", "v4"}, {}, {}, kMaxSequenceNumber, &merge_op, &filter);
compaction_proxy_->key_not_exists_beyond_output_level = true;
std::atomic<bool> seek_done{false};
std::thread compaction_thread([&] {
c_iter_->SeekToFirst();
ASSERT_FALSE(c_iter_->Valid());
ASSERT_TRUE(c_iter_->status().IsShutdownInProgress());
seek_done.store(true);
});
// Let key 1 through.
filter.WaitForStall(1);
// Shutdown during compaction filter call for key 2.
filter.WaitForStall(2);
shutting_down_.store(true);
EXPECT_FALSE(seek_done.load());
// Unstall filter and wait for SeekToFirst() to return.
filter.stall_at.store(3);
compaction_thread.join();
assert(seek_done.load());
// Check that filter was never called again.
EXPECT_EQ(2, filter.last_seen.load());
}
} // namespace rocksdb } // namespace rocksdb
int main(int argc, char** argv) { int main(int argc, char** argv) {

@ -264,7 +264,7 @@ void CompactionJob::AggregateStatistics() {
CompactionJob::CompactionJob( CompactionJob::CompactionJob(
int job_id, Compaction* compaction, const ImmutableDBOptions& db_options, int job_id, Compaction* compaction, const ImmutableDBOptions& db_options,
const EnvOptions& env_options, VersionSet* versions, const EnvOptions& env_options, VersionSet* versions,
std::atomic<bool>* shutting_down, LogBuffer* log_buffer, const std::atomic<bool>* shutting_down, LogBuffer* log_buffer,
Directory* db_directory, Directory* output_directory, Statistics* stats, Directory* db_directory, Directory* output_directory, Statistics* stats,
InstrumentedMutex* db_mutex, Status* db_bg_error, InstrumentedMutex* db_mutex, Status* db_bg_error,
std::vector<SequenceNumber> existing_snapshots, std::vector<SequenceNumber> existing_snapshots,
@ -724,7 +724,8 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
mutable_cf_options->min_partial_merge_operands, mutable_cf_options->min_partial_merge_operands,
false /* internal key corruption is expected */, false /* internal key corruption is expected */,
existing_snapshots_.empty() ? 0 : existing_snapshots_.back(), existing_snapshots_.empty() ? 0 : existing_snapshots_.back(),
compact_->compaction->level(), db_options_.statistics.get()); compact_->compaction->level(), db_options_.statistics.get(),
shutting_down_);
TEST_SYNC_POINT("CompactionJob::Run():Inprogress"); TEST_SYNC_POINT("CompactionJob::Run():Inprogress");
@ -742,7 +743,8 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
sub_compact->c_iter.reset(new CompactionIterator( sub_compact->c_iter.reset(new CompactionIterator(
input.get(), cfd->user_comparator(), &merge, versions_->LastSequence(), input.get(), cfd->user_comparator(), &merge, versions_->LastSequence(),
&existing_snapshots_, earliest_write_conflict_snapshot_, env_, false, &existing_snapshots_, earliest_write_conflict_snapshot_, env_, false,
range_del_agg.get(), sub_compact->compaction, compaction_filter)); range_del_agg.get(), sub_compact->compaction, compaction_filter,
shutting_down_));
auto c_iter = sub_compact->c_iter.get(); auto c_iter = sub_compact->c_iter.get();
c_iter->SeekToFirst(); c_iter->SeekToFirst();
const auto& c_iter_stats = c_iter->iter_stats(); const auto& c_iter_stats = c_iter->iter_stats();
@ -753,10 +755,7 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
std::string compression_dict; std::string compression_dict;
compression_dict.reserve(cfd->ioptions()->compression_opts.max_dict_bytes); compression_dict.reserve(cfd->ioptions()->compression_opts.max_dict_bytes);
// TODO(noetzli): check whether we could check !shutting_down_->... only while (status.ok() && !cfd->IsDropped() && c_iter->Valid()) {
// only occasionally (see diff D42687)
while (status.ok() && !shutting_down_->load(std::memory_order_acquire) &&
!cfd->IsDropped() && c_iter->Valid()) {
// Invariant: c_iter.status() is guaranteed to be OK if c_iter->Valid() // Invariant: c_iter.status() is guaranteed to be OK if c_iter->Valid()
// returns true. // returns true.
const Slice& key = c_iter->key(); const Slice& key = c_iter->key();
@ -903,26 +902,35 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
RecordDroppedKeys(c_iter_stats, &sub_compact->compaction_job_stats); RecordDroppedKeys(c_iter_stats, &sub_compact->compaction_job_stats);
RecordCompactionIOStats(); RecordCompactionIOStats();
if (status.ok() && if (status.ok() && (shutting_down_->load(std::memory_order_relaxed) ||
(shutting_down_->load(std::memory_order_acquire) || cfd->IsDropped())) { cfd->IsDropped())) {
status = Status::ShutdownInProgress( status = Status::ShutdownInProgress(
"Database shutdown or Column family drop during compaction"); "Database shutdown or Column family drop during compaction");
} }
if (status.ok()) {
status = input->status();
}
if (status.ok()) {
status = c_iter->status();
}
if (status.ok() && sub_compact->builder == nullptr && if (status.ok() && sub_compact->builder == nullptr &&
sub_compact->outputs.size() == 0 && sub_compact->outputs.size() == 0 &&
range_del_agg->ShouldAddTombstones(bottommost_level_)) { range_del_agg->ShouldAddTombstones(bottommost_level_)) {
// handle subcompaction containing only range deletions // handle subcompaction containing only range deletions
status = OpenCompactionOutputFile(sub_compact); status = OpenCompactionOutputFile(sub_compact);
} }
if (status.ok() && sub_compact->builder != nullptr) {
// Call FinishCompactionOutputFile() even if status is not ok: it needs to
// close the output file.
if (sub_compact->builder != nullptr) {
CompactionIterationStats range_del_out_stats; CompactionIterationStats range_del_out_stats;
status = Status s = FinishCompactionOutputFile(
FinishCompactionOutputFile(input->status(), sub_compact, status, sub_compact, range_del_agg.get(), &range_del_out_stats);
range_del_agg.get(), &range_del_out_stats);
RecordDroppedKeys(range_del_out_stats, &sub_compact->compaction_job_stats);
}
if (status.ok()) { if (status.ok()) {
status = input->status(); status = s;
}
RecordDroppedKeys(range_del_out_stats, &sub_compact->compaction_job_stats);
} }
if (measure_io_stats_) { if (measure_io_stats_) {

@ -57,7 +57,7 @@ class CompactionJob {
CompactionJob(int job_id, Compaction* compaction, CompactionJob(int job_id, Compaction* compaction,
const ImmutableDBOptions& db_options, const ImmutableDBOptions& db_options,
const EnvOptions& env_options, VersionSet* versions, const EnvOptions& env_options, VersionSet* versions,
std::atomic<bool>* shutting_down, LogBuffer* log_buffer, const std::atomic<bool>* shutting_down, LogBuffer* log_buffer,
Directory* db_directory, Directory* output_directory, Directory* db_directory, Directory* output_directory,
Statistics* stats, InstrumentedMutex* db_mutex, Statistics* stats, InstrumentedMutex* db_mutex,
Status* db_bg_error, Status* db_bg_error,
@ -131,7 +131,7 @@ class CompactionJob {
Env* env_; Env* env_;
VersionSet* versions_; VersionSet* versions_;
std::atomic<bool>* shutting_down_; const std::atomic<bool>* shutting_down_;
LogBuffer* log_buffer_; LogBuffer* log_buffer_;
Directory* db_directory_; Directory* db_directory_;
Directory* output_directory_; Directory* output_directory_;

@ -379,6 +379,9 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname)
void DBImpl::CancelAllBackgroundWork(bool wait) { void DBImpl::CancelAllBackgroundWork(bool wait) {
InstrumentedMutexLock l(&mutex_); InstrumentedMutexLock l(&mutex_);
Log(InfoLogLevel::INFO_LEVEL, immutable_db_options_.info_log,
"Shutdown: canceling all background work");
if (!shutting_down_.load(std::memory_order_acquire) && if (!shutting_down_.load(std::memory_order_acquire) &&
has_unpersisted_data_ && has_unpersisted_data_ &&
!mutable_db_options_.avoid_flush_during_shutdown) { !mutable_db_options_.avoid_flush_during_shutdown) {
@ -503,6 +506,8 @@ DBImpl::~DBImpl() {
env_->UnlockFile(db_lock_); env_->UnlockFile(db_lock_);
} }
Log(InfoLogLevel::INFO_LEVEL, immutable_db_options_.info_log,
"Shutdown complete");
LogFlush(immutable_db_options_.info_log); LogFlush(immutable_db_options_.info_log);
} }

@ -104,6 +104,10 @@ Status MergeHelper::MergeUntil(InternalIterator* iter,
Status s; Status s;
bool hit_the_next_user_key = false; bool hit_the_next_user_key = false;
for (; iter->Valid(); iter->Next(), original_key_is_iter = false) { for (; iter->Valid(); iter->Next(), original_key_is_iter = false) {
if (IsShuttingDown()) {
return Status::ShutdownInProgress();
}
ParsedInternalKey ikey; ParsedInternalKey ikey;
assert(keys_.size() == merge_context_.GetNumOperands()); assert(keys_.size() == merge_context_.GetNumOperands());
@ -278,10 +282,6 @@ Status MergeHelper::MergeUntil(InternalIterator* iter,
// We haven't seen the beginning of the key nor a Put/Delete. // We haven't seen the beginning of the key nor a Put/Delete.
// Attempt to use the user's associative merge function to // Attempt to use the user's associative merge function to
// merge the stacked merge operands into a single operand. // merge the stacked merge operands into a single operand.
//
// TODO(noetzli) The docblock of MergeUntil suggests that a successful
// partial merge returns Status::OK(). Should we change the status code
// after a successful partial merge?
s = Status::MergeInProgress(); s = Status::MergeInProgress();
if (merge_context_.GetNumOperands() >= 2 && if (merge_context_.GetNumOperands() >= 2 &&
merge_context_.GetNumOperands() >= min_partial_merge_operands_) { merge_context_.GetNumOperands() >= min_partial_merge_operands_) {

@ -34,11 +34,13 @@ class MergeHelper {
const CompactionFilter* compaction_filter, Logger* logger, const CompactionFilter* compaction_filter, Logger* logger,
unsigned min_partial_merge_operands, unsigned min_partial_merge_operands,
bool assert_valid_internal_key, SequenceNumber latest_snapshot, bool assert_valid_internal_key, SequenceNumber latest_snapshot,
int level = 0, Statistics* stats = nullptr) int level = 0, Statistics* stats = nullptr,
const std::atomic<bool>* shutting_down = nullptr)
: env_(env), : env_(env),
user_comparator_(user_comparator), user_comparator_(user_comparator),
user_merge_operator_(user_merge_operator), user_merge_operator_(user_merge_operator),
compaction_filter_(compaction_filter), compaction_filter_(compaction_filter),
shutting_down_(shutting_down),
logger_(logger), logger_(logger),
min_partial_merge_operands_(min_partial_merge_operands), min_partial_merge_operands_(min_partial_merge_operands),
assert_valid_internal_key_(assert_valid_internal_key), assert_valid_internal_key_(assert_valid_internal_key),
@ -81,10 +83,12 @@ class MergeHelper {
// //
// Returns one of the following statuses: // Returns one of the following statuses:
// - OK: Entries were successfully merged. // - OK: Entries were successfully merged.
// - MergeInProgress: Put/Delete not encountered and unable to merge operands. // - MergeInProgress: Put/Delete not encountered, and didn't reach the start
// of key's history. Output consists of merge operands only.
// - Corruption: Merge operator reported unsuccessful merge or a corrupted // - Corruption: Merge operator reported unsuccessful merge or a corrupted
// key has been encountered and not expected (applies only when compiling // key has been encountered and not expected (applies only when compiling
// with asserts removed). // with asserts removed).
// - ShutdownInProgress: interrupted by shutdown (*shutting_down == true).
// //
// REQUIRED: The first key in the input is not corrupted. // REQUIRED: The first key in the input is not corrupted.
Status MergeUntil(InternalIterator* iter, Status MergeUntil(InternalIterator* iter,
@ -150,6 +154,7 @@ class MergeHelper {
const Comparator* user_comparator_; const Comparator* user_comparator_;
const MergeOperator* user_merge_operator_; const MergeOperator* user_merge_operator_;
const CompactionFilter* compaction_filter_; const CompactionFilter* compaction_filter_;
const std::atomic<bool>* shutting_down_;
Logger* logger_; Logger* logger_;
unsigned min_partial_merge_operands_; unsigned min_partial_merge_operands_;
bool assert_valid_internal_key_; // enforce no internal key corruption? bool assert_valid_internal_key_; // enforce no internal key corruption?
@ -171,6 +176,11 @@ class MergeHelper {
bool has_compaction_filter_skip_until_ = false; bool has_compaction_filter_skip_until_ = false;
std::string compaction_filter_value_; std::string compaction_filter_value_;
InternalKey compaction_filter_skip_until_; InternalKey compaction_filter_skip_until_;
bool IsShuttingDown() {
// This is a best-effort facility, so memory_order_relaxed is sufficient.
return shutting_down_ && shutting_down_->load(std::memory_order_relaxed);
}
}; };
// MergeOutputIterator can be used to iterate over the result of a merge. // MergeOutputIterator can be used to iterate over the result of a merge.

@ -140,6 +140,12 @@ class CompactionFilter {
// by kRemoveAndSkipUntil can disappear from a snapshot - beware // by kRemoveAndSkipUntil can disappear from a snapshot - beware
// if you're using TransactionDB or DB::GetSnapshot(). // if you're using TransactionDB or DB::GetSnapshot().
// //
// Another warning: if value for a key was overwritten or merged into
// (multiple Put()s or Merge()s), and compaction filter skips this key
// with kRemoveAndSkipUntil, it's possible that it will remove only
// the new value, exposing the old value that was supposed to be
// overwritten.
//
// If you use kRemoveAndSkipUntil, consider also reducing // If you use kRemoveAndSkipUntil, consider also reducing
// compaction_readahead_size option. // compaction_readahead_size option.
// //

Loading…
Cancel
Save