Fix the log number bug when updating MANIFEST file

Summary:
Crash may occur during the flushes of more than two mem tables.

As the info log suggested, even when both were successfully flushed,
the recovery process still pick up one of the memtable's log for recovery.

This diff fix the problem by setting the correct "log number" in MANIFEST.

Test Plan: make test; deployed to leaf4 and make sure it doesn't result in crashes of this type.

Reviewers: haobo, dhruba

CC: leveldb

Differential Revision: https://reviews.facebook.net/D13659
main
Kai Liu 11 years ago
parent e44976b199
commit c17607a251
  1. 43
      db/db_impl.cc
  2. 22
      db/memtablelist.cc
  3. 3
      db/memtablelist.h
  4. 2
      db/version_edit.h

@ -871,6 +871,9 @@ Status DBImpl::WriteLevel0Table(std::vector<MemTable*> &mems, VersionEdit* edit,
std::vector<Iterator*> list; std::vector<Iterator*> list;
for (MemTable* m : mems) { for (MemTable* m : mems) {
Log(options_.info_log,
"Flushing memtable with log file: %lu\n",
m->GetLogNumber());
list.push_back(m->NewIterator()); list.push_back(m->NewIterator());
} }
Iterator* iter = NewMergingIterator(&internal_comparator_, &list[0], Iterator* iter = NewMergingIterator(&internal_comparator_, &list[0],
@ -964,11 +967,22 @@ Status DBImpl::FlushMemTableToOutputFile(bool* madeProgress) {
} }
// record the logfile_number_ before we release the mutex // record the logfile_number_ before we release the mutex
// entries mems are (implicitly) sorted in ascending order by their created
// time. We will use the first memtable's `edit` to keep the meta info for
// this flush.
MemTable* m = mems[0]; MemTable* m = mems[0];
VersionEdit* edit = m->GetEdits(); VersionEdit* edit = m->GetEdits();
edit->SetPrevLogNumber(0); edit->SetPrevLogNumber(0);
edit->SetLogNumber(m->GetNextLogNumber()); // Earlier logs no longer needed // SetLogNumber(log_num) indicates logs with number smaller than log_num
auto to_delete = m->GetLogNumber(); // will no longer be picked up for recovery.
edit->SetLogNumber(
mems.back()->GetNextLogNumber()
);
std::vector<uint64_t> logs_to_delete;
for (auto mem : mems) {
logs_to_delete.push_back(mem->GetLogNumber());
}
// This will release and re-acquire the mutex. // This will release and re-acquire the mutex.
Status s = WriteLevel0Table(mems, edit, &file_number); Status s = WriteLevel0Table(mems, edit, &file_number);
@ -994,14 +1008,19 @@ Status DBImpl::FlushMemTableToOutputFile(bool* madeProgress) {
// should store the file number in the shared state, and retry // should store the file number in the shared state, and retry
// However, for now, PurgeObsoleteFiles will take care of that // However, for now, PurgeObsoleteFiles will take care of that
// anyways. // anyways.
if (options_.purge_log_after_memtable_flush && bool should_delete_log = options_.purge_log_after_memtable_flush &&
!disable_delete_obsolete_files_ && !disable_delete_obsolete_files_;
to_delete > 0) { if (should_delete_log) {
for (auto log_num : logs_to_delete) {
if (log_num < 0) {
continue;
}
mutex_.Unlock(); mutex_.Unlock();
DeleteLogFile(to_delete); DeleteLogFile(log_num);
mutex_.Lock(); mutex_.Lock();
} }
} }
}
return s; return s;
} }
@ -2874,9 +2893,12 @@ Status DBImpl::MakeRoomForWrite(bool force) {
if (force) { if (force) {
imm_.FlushRequested(); imm_.FlushRequested();
} }
mem_ = new MemTable(internal_comparator_, mem_rep_factory_, mem_ = new MemTable(
NumberLevels(), options_); internal_comparator_, mem_rep_factory_, NumberLevels(), options_);
mem_->Ref(); mem_->Ref();
Log(options_.info_log,
"New memtable created with log file: #%lu\n",
logfile_number_);
mem_->SetLogNumber(logfile_number_); mem_->SetLogNumber(logfile_number_);
force = false; // Do not force another compaction if have room force = false; // Do not force another compaction if have room
MaybeScheduleFlushOrCompaction(); MaybeScheduleFlushOrCompaction();
@ -3129,8 +3151,9 @@ Status DBImpl::DeleteFile(std::string name) {
FileType type; FileType type;
if (!ParseFileName(name, &number, &type) || if (!ParseFileName(name, &number, &type) ||
(type != kTableFile)) { (type != kTableFile)) {
Log(options_.info_log, "DeleteFile #%lld FAILED. Invalid file name\n", Log(options_.info_log,
static_cast<unsigned long long>(number)); "DeleteFile #%ld FAILED. Invalid file name\n",
number);
return Status::InvalidArgument("Invalid file name"); return Status::InvalidArgument("Invalid file name");
} }

@ -98,7 +98,6 @@ Status MemTableList::InstallMemtableFlushResults(
// flush was sucessful // flush was sucessful
bool first = true; bool first = true;
for (MemTable* m : mems) { for (MemTable* m : mems) {
// All the edits are associated with the first memtable of this batch. // All the edits are associated with the first memtable of this batch.
assert(first || m->GetEdits()->NumEntries() == 0); assert(first || m->GetEdits()->NumEntries() == 0);
first = false; first = false;
@ -124,22 +123,21 @@ Status MemTableList::InstallMemtableFlushResults(
if (!m->flush_completed_) { if (!m->flush_completed_) {
break; break;
} }
first = true;
Log(info_log, Log(info_log, "Level-0 commit table #%lu started", m->file_number_);
"Level-0 commit table #%llu: started",
(unsigned long long)m->file_number_);
// this can release and reacquire the mutex. // this can release and reacquire the mutex.
s = vset->LogAndApply(&m->edit_, mu); s = vset->LogAndApply(&m->edit_, mu);
// All the later memtables that have the same filenum // All the later memtables that have the same filenum
// are part of the same batch. They can be committed now. // are part of the same batch. They can be committed now.
uint64_t mem_id = 1; // how many memtables has been flushed.
do { do {
if (s.ok()) { // commit new state if (s.ok()) { // commit new state
Log(info_log, "Level-0 commit table #%llu: done %s", Log(info_log,
(unsigned long long)m->file_number_, "Level-0 commit table #%lu: memtable #%lu done",
first ? "": "bulk"); m->file_number_,
mem_id);
memlist_.remove(m); memlist_.remove(m);
assert(m->file_number_ > 0); assert(m->file_number_ > 0);
@ -152,8 +150,10 @@ Status MemTableList::InstallMemtableFlushResults(
size_--; size_--;
} else { } else {
//commit failed. setup state so that we can flush again. //commit failed. setup state so that we can flush again.
Log(info_log, "Level-0 commit table #%llu: failed", Log(info_log,
(unsigned long long)m->file_number_); "Level-0 commit table #%lu: memtable #%lu failed",
m->file_number_,
mem_id);
m->flush_completed_ = false; m->flush_completed_ = false;
m->flush_in_progress_ = false; m->flush_in_progress_ = false;
m->edit_.Clear(); m->edit_.Clear();
@ -163,7 +163,7 @@ Status MemTableList::InstallMemtableFlushResults(
imm_flush_needed.Release_Store((void *)1); imm_flush_needed.Release_Store((void *)1);
s = Status::IOError("Unable to commit flushed memtable"); s = Status::IOError("Unable to commit flushed memtable");
} }
first = false; ++mem_id;
} while (!memlist_.empty() && (m = memlist_.back()) && } while (!memlist_.empty() && (m = memlist_.back()) &&
m->file_number_ == file_number); m->file_number_ == file_number);
} }

@ -54,7 +54,8 @@ class MemTableList {
// not yet started. // not yet started.
bool IsFlushPending(int min_write_buffer_number_to_merge); bool IsFlushPending(int min_write_buffer_number_to_merge);
// Returns the earliest memtables that needs to be flushed. // Returns the earliest memtables that needs to be flushed. The returned
// memtables are guaranteed to be in the ascending order of created time.
void PickMemtablesToFlush(std::vector<MemTable*>* mems); void PickMemtablesToFlush(std::vector<MemTable*>* mems);
// Commit a successful flush in the manifest file // Commit a successful flush in the manifest file

@ -34,7 +34,7 @@ struct FileMetaData {
class VersionEdit { class VersionEdit {
public: public:
/* implicit */ VersionEdit(int number_levels) : explicit VersionEdit(int number_levels) :
number_levels_(number_levels) { number_levels_(number_levels) {
Clear(); Clear();
} }

Loading…
Cancel
Save