Call PreReleaseCallback between WAL and memtable write (#5015)

Summary:
PreReleaseCallback meant to be called before the writes are visible to the readers. Since the sequence number is known after the WAL write, there is no reason to delay calling PreReleaseCallback to after the memtable write, which would complicates the reader's logic in presence of our memtable writes that are made visible by the other write thread.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5015

Differential Revision: D14221670

Pulled By: maysamyabandeh

fbshipit-source-id: a504dd665cf923226d7af09cc8e9c7739a25edc6
main
Maysam Yabandeh 6 years ago committed by Facebook Github Bot
parent 68a2f94d5d
commit 77ebc82b92
  1. 56
      db/db_impl_write.cc
  2. 4
      db/pre_release_callback.h

@ -146,17 +146,6 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
if (write_thread_.CompleteParallelMemTableWriter(&w)) {
// we're responsible for exit batch group
for (auto* writer : *(w.write_group)) {
if (!writer->CallbackFailed() && writer->pre_release_callback) {
assert(writer->sequence != kMaxSequenceNumber);
Status ws = writer->pre_release_callback->Callback(writer->sequence,
disable_memtable);
if (!ws.ok()) {
status = ws;
break;
}
}
}
// TODO(myabandeh): propagate status to write_group
auto last_sequence = w.write_group->last_sequence;
versions_->SetLastSequence(last_sequence);
@ -309,17 +298,8 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
const SequenceNumber current_sequence = last_sequence + 1;
last_sequence += seq_inc;
// PreReleaseCallback is called after WAL write and before memtable write
if (status.ok()) {
PERF_TIMER_GUARD(write_memtable_time);
if (!parallel) {
// w.sequence will be set inside InsertInto
w.status = WriteBatchInternal::InsertInto(
write_group, current_sequence, column_family_memtables_.get(),
&flush_scheduler_, write_options.ignore_missing_column_families,
0 /*recovery_log_number*/, this, parallel, seq_per_batch_,
batch_per_txn_);
} else {
SequenceNumber next_sequence = current_sequence;
// Note: the logic for advancing seq here must be consistent with the
// logic in WriteBatchInternal::InsertInto(write_group...) as well as
@ -330,6 +310,14 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
continue;
}
writer->sequence = next_sequence;
if (writer->pre_release_callback) {
Status ws = writer->pre_release_callback->Callback(writer->sequence,
disable_memtable);
if (!ws.ok()) {
status = ws;
break;
}
}
if (seq_per_batch_) {
assert(writer->batch_cnt);
next_sequence += writer->batch_cnt;
@ -337,6 +325,19 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
next_sequence += WriteBatchInternal::Count(writer->batch);
}
}
}
if (status.ok()) {
PERF_TIMER_GUARD(write_memtable_time);
if (!parallel) {
// w.sequence will be set inside InsertInto
w.status = WriteBatchInternal::InsertInto(
write_group, current_sequence, column_family_memtables_.get(),
&flush_scheduler_, write_options.ignore_missing_column_families,
0 /*recovery_log_number*/, this, parallel, seq_per_batch_,
batch_per_txn_);
} else {
write_group.last_sequence = last_sequence;
write_thread_.LaunchParallelMemTableWriters(&write_group);
in_parallel_group = true;
@ -388,17 +389,8 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
}
if (should_exit_batch_group) {
if (status.ok()) {
for (auto* writer : write_group) {
if (!writer->CallbackFailed() && writer->pre_release_callback) {
assert(writer->sequence != kMaxSequenceNumber);
Status ws = writer->pre_release_callback->Callback(writer->sequence,
disable_memtable);
if (!ws.ok()) {
status = ws;
break;
}
}
}
// Note: if we are to resume after non-OK statuses we need to revisit how
// we reacts to non-OK statuses here.
versions_->SetLastSequence(last_sequence);
}
MemTableInsertStatusCheck(w.status);

@ -15,8 +15,8 @@ class PreReleaseCallback {
public:
virtual ~PreReleaseCallback() {}
// Will be called while on the write thread after the write and before the
// release of the sequence number. This is useful if any operation needs to be
// Will be called while on the write thread after the write to the WAL and
// before the write to memtable. This is useful if any operation needs to be
// done before the write gets visible to the readers, or if we want to reduce
// the overhead of locking by updating something sequentially while we are on
// the write thread. If the callback fails, this function returns a non-OK

Loading…
Cancel
Save