[CF] Fix CF bugs in WriteBatch

Summary:
This diff fixes two bugs:
* Increase sequence number even if WriteBatch fails. This is important because WriteBatches in WAL logs have implictly increasing sequence number, even if one update in a write batch fails. This caused some writes to get lost in my CF stress testing
* Tolerate 'invalid column family' errors on recovery. When a column family is dropped, processing WAL logs can have some WriteBatches that still refer to the dropped column family. In recovery environment, we want to ignore those errors. In client's Write() code path, however, we want to return the failure to the client if he's trying to add data to invalid column family.

Test Plan: db_stress's verification works now

Reviewers: dhruba, haobo

CC: leveldb

Differential Revision: https://reviews.facebook.net/D16533
main
Igor Canadi 11 years ago
parent 8ea21a778b
commit f9b2f0ad79
  1. 4
      db/db_impl.cc
  2. 71
      db/write_batch.cc
  3. 12
      db/write_batch_internal.h

@ -955,7 +955,7 @@ Status DBImpl::RecoverLogFile(uint64_t log_number, SequenceNumber* max_sequence,
WriteBatchInternal::SetContents(&batch, record); WriteBatchInternal::SetContents(&batch, record);
status = WriteBatchInternal::InsertInto( status = WriteBatchInternal::InsertInto(
&batch, column_family_memtables_.get(), log_number); &batch, column_family_memtables_.get(), true, log_number);
MaybeIgnoreError(&status); MaybeIgnoreError(&status);
if (!status.ok()) { if (!status.ok()) {
@ -3311,7 +3311,7 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
StartPerfTimer(&write_memtable_timer); StartPerfTimer(&write_memtable_timer);
status = WriteBatchInternal::InsertInto( status = WriteBatchInternal::InsertInto(
updates, column_family_memtables_.get(), 0, this, false); updates, column_family_memtables_.get(), false, 0, this, false);
BumpPerfTime(&perf_context.write_memtable_time, &write_memtable_timer); BumpPerfTime(&perf_context.write_memtable_time, &write_memtable_timer);
if (!status.ok()) { if (!status.ok()) {

@ -234,14 +234,17 @@ class MemTableInserter : public WriteBatch::Handler {
public: public:
SequenceNumber sequence_; SequenceNumber sequence_;
ColumnFamilyMemTables* cf_mems_; ColumnFamilyMemTables* cf_mems_;
bool recovery_;
uint64_t log_number_; uint64_t log_number_;
DBImpl* db_; DBImpl* db_;
const bool dont_filter_deletes_; const bool dont_filter_deletes_;
MemTableInserter(SequenceNumber sequence, ColumnFamilyMemTables* cf_mems, MemTableInserter(SequenceNumber sequence, ColumnFamilyMemTables* cf_mems,
uint64_t log_number, DB* db, const bool dont_filter_deletes) bool recovery, uint64_t log_number, DB* db,
const bool dont_filter_deletes)
: sequence_(sequence), : sequence_(sequence),
cf_mems_(cf_mems), cf_mems_(cf_mems),
recovery_(recovery),
log_number_(log_number), log_number_(log_number),
db_(reinterpret_cast<DBImpl*>(db)), db_(reinterpret_cast<DBImpl*>(db)),
dont_filter_deletes_(dont_filter_deletes) { dont_filter_deletes_(dont_filter_deletes) {
@ -251,19 +254,39 @@ class MemTableInserter : public WriteBatch::Handler {
} }
} }
bool IgnoreUpdate() { bool SeekToColumnFamily(uint32_t column_family_id, Status* s) {
return log_number_ != 0 && log_number_ < cf_mems_->GetLogNumber();
}
virtual Status PutCF(uint32_t column_family_id, const Slice& key,
const Slice& value) {
bool found = cf_mems_->Seek(column_family_id); bool found = cf_mems_->Seek(column_family_id);
if (recovery_ && (!found || log_number_ < cf_mems_->GetLogNumber())) {
// if in recovery envoronment:
// * If column family was not found, it might mean that the WAL write
// batch references to the column family that was dropped after the
// insert. We don't want to fail the whole write batch in that case -- we
// just ignore the update.
// * If log_number_ < cf_mems_->GetLogNumber(), this means that column
// family already contains updates from this log. We can't apply updates
// twice because of update-in-place or merge workloads -- ignore the
// update
*s = Status::OK();
return false;
}
if (!found) { if (!found) {
return Status::InvalidArgument( assert(!recovery_);
// If the column family was not found in non-recovery enviornment
// (client's write code-path), we have to fail the write and return
// the failure status to the client.
*s = Status::InvalidArgument(
"Invalid column family specified in write batch"); "Invalid column family specified in write batch");
return false;
} }
if (IgnoreUpdate()) { return true;
return Status::OK(); }
virtual Status PutCF(uint32_t column_family_id, const Slice& key,
const Slice& value) {
Status seek_status;
if (!SeekToColumnFamily(column_family_id, &seek_status)) {
++sequence_;
return seek_status;
} }
MemTable* mem = cf_mems_->GetMemTable(); MemTable* mem = cf_mems_->GetMemTable();
const Options* options = cf_mems_->GetFullOptions(); const Options* options = cf_mems_->GetFullOptions();
@ -315,13 +338,10 @@ class MemTableInserter : public WriteBatch::Handler {
virtual Status MergeCF(uint32_t column_family_id, const Slice& key, virtual Status MergeCF(uint32_t column_family_id, const Slice& key,
const Slice& value) { const Slice& value) {
bool found = cf_mems_->Seek(column_family_id); Status seek_status;
if (!found) { if (!SeekToColumnFamily(column_family_id, &seek_status)) {
return Status::InvalidArgument( ++sequence_;
"Invalid column family specified in write batch"); return seek_status;
}
if (IgnoreUpdate()) {
return Status::OK();
} }
MemTable* mem = cf_mems_->GetMemTable(); MemTable* mem = cf_mems_->GetMemTable();
const Options* options = cf_mems_->GetFullOptions(); const Options* options = cf_mems_->GetFullOptions();
@ -387,13 +407,10 @@ class MemTableInserter : public WriteBatch::Handler {
} }
virtual Status DeleteCF(uint32_t column_family_id, const Slice& key) { virtual Status DeleteCF(uint32_t column_family_id, const Slice& key) {
bool found = cf_mems_->Seek(column_family_id); Status seek_status;
if (!found) { if (!SeekToColumnFamily(column_family_id, &seek_status)) {
return Status::InvalidArgument( ++sequence_;
"Invalid column family specified in write batch"); return seek_status;
}
if (IgnoreUpdate()) {
return Status::OK();
} }
MemTable* mem = cf_mems_->GetMemTable(); MemTable* mem = cf_mems_->GetMemTable();
const Options* options = cf_mems_->GetFullOptions(); const Options* options = cf_mems_->GetFullOptions();
@ -421,10 +438,10 @@ class MemTableInserter : public WriteBatch::Handler {
Status WriteBatchInternal::InsertInto(const WriteBatch* b, Status WriteBatchInternal::InsertInto(const WriteBatch* b,
ColumnFamilyMemTables* memtables, ColumnFamilyMemTables* memtables,
uint64_t log_number, DB* db, bool recovery, uint64_t log_number,
const bool dont_filter_deletes) { DB* db, const bool dont_filter_deletes) {
MemTableInserter inserter(WriteBatchInternal::Sequence(b), memtables, MemTableInserter inserter(WriteBatchInternal::Sequence(b), memtables,
log_number, db, dont_filter_deletes); recovery, log_number, db, dont_filter_deletes);
return b->Iterate(&inserter); return b->Iterate(&inserter);
} }

@ -90,12 +90,18 @@ class WriteBatchInternal {
// Inserts batch entries into memtable // Inserts batch entries into memtable
// If dont_filter_deletes is false AND options.filter_deletes is true, // If dont_filter_deletes is false AND options.filter_deletes is true,
// then --> Drops deletes in batch if db->KeyMayExist returns false // then --> Drops deletes in batch if db->KeyMayExist returns false
// If log_number is not-null, the memtable will be updated only if // If recovery == true, this means InsertInto is executed on a recovery
// code-path. WriteBatch referencing a dropped column family can be
// found on a recovery code-path and should be ignored (recovery should not
// fail). Additionally, the memtable will be updated only if
// memtables->GetLogNumber() >= log_number // memtables->GetLogNumber() >= log_number
// See MemTableInserter::IgnoreUpdate() // However, if recovery == false, any WriteBatch referencing
// non-existing column family will return a failure. Also, log_number is
// ignored in that case
static Status InsertInto(const WriteBatch* batch, static Status InsertInto(const WriteBatch* batch,
ColumnFamilyMemTables* memtables, ColumnFamilyMemTables* memtables,
uint64_t log_number = 0, DB* db = nullptr, bool recovery = false, uint64_t log_number = 0,
DB* db = nullptr,
const bool dont_filter_deletes = true); const bool dont_filter_deletes = true);
static void Append(WriteBatch* dst, const WriteBatch* src); static void Append(WriteBatch* dst, const WriteBatch* src);

Loading…
Cancel
Save