|
|
@ -863,16 +863,13 @@ void DBImpl::PurgeObsoleteWALFiles() { |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// If externalTable is set, then apply recovered transactions
|
|
|
|
|
|
|
|
// to that table. This is used for readonly mode.
|
|
|
|
|
|
|
|
Status DBImpl::Recover( |
|
|
|
Status DBImpl::Recover( |
|
|
|
VersionEdit* edit, |
|
|
|
const std::vector<ColumnFamilyDescriptor>& column_families, bool read_only, |
|
|
|
const std::vector<ColumnFamilyDescriptor>& column_families, |
|
|
|
bool error_if_log_file_exist) { |
|
|
|
MemTable* external_table, bool error_if_log_file_exist) { |
|
|
|
|
|
|
|
mutex_.AssertHeld(); |
|
|
|
mutex_.AssertHeld(); |
|
|
|
|
|
|
|
|
|
|
|
assert(db_lock_ == nullptr); |
|
|
|
assert(db_lock_ == nullptr); |
|
|
|
if (!external_table) { |
|
|
|
if (!read_only) { |
|
|
|
// We call CreateDirIfMissing() as the directory may already exist (if we
|
|
|
|
// We call CreateDirIfMissing() as the directory may already exist (if we
|
|
|
|
// are reopening a DB), when this happens we don't want creating the
|
|
|
|
// are reopening a DB), when this happens we don't want creating the
|
|
|
|
// directory to cause an error. However, we need to check if creating the
|
|
|
|
// directory to cause an error. However, we need to check if creating the
|
|
|
@ -966,12 +963,12 @@ Status DBImpl::Recover( |
|
|
|
|
|
|
|
|
|
|
|
// Recover in the order in which the logs were generated
|
|
|
|
// Recover in the order in which the logs were generated
|
|
|
|
std::sort(logs.begin(), logs.end()); |
|
|
|
std::sort(logs.begin(), logs.end()); |
|
|
|
for (size_t i = 0; i < logs.size(); i++) { |
|
|
|
for (size_t i = 0; s.ok() && i < logs.size(); i++) { |
|
|
|
s = RecoverLogFile(logs[i], edit, &max_sequence, external_table); |
|
|
|
|
|
|
|
// The previous incarnation may not have written any MANIFEST
|
|
|
|
// The previous incarnation may not have written any MANIFEST
|
|
|
|
// records after allocating this log number. So we manually
|
|
|
|
// records after allocating this log number. So we manually
|
|
|
|
// update the file number allocation counter in VersionSet.
|
|
|
|
// update the file number allocation counter in VersionSet.
|
|
|
|
versions_->MarkFileNumberUsed(logs[i]); |
|
|
|
versions_->MarkFileNumberUsed(logs[i]); |
|
|
|
|
|
|
|
s = RecoverLogFile(logs[i], &max_sequence, read_only); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (s.ok()) { |
|
|
|
if (s.ok()) { |
|
|
@ -986,10 +983,8 @@ Status DBImpl::Recover( |
|
|
|
return s; |
|
|
|
return s; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
Status DBImpl::RecoverLogFile(uint64_t log_number, |
|
|
|
Status DBImpl::RecoverLogFile(uint64_t log_number, SequenceNumber* max_sequence, |
|
|
|
VersionEdit* edit, |
|
|
|
bool read_only) { |
|
|
|
SequenceNumber* max_sequence, |
|
|
|
|
|
|
|
MemTable* external_table) { |
|
|
|
|
|
|
|
struct LogReporter : public log::Reader::Reporter { |
|
|
|
struct LogReporter : public log::Reader::Reporter { |
|
|
|
Env* env; |
|
|
|
Env* env; |
|
|
|
Logger* info_log; |
|
|
|
Logger* info_log; |
|
|
@ -1006,6 +1001,8 @@ Status DBImpl::RecoverLogFile(uint64_t log_number, |
|
|
|
|
|
|
|
|
|
|
|
mutex_.AssertHeld(); |
|
|
|
mutex_.AssertHeld(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
VersionEdit edit; |
|
|
|
|
|
|
|
|
|
|
|
// Open the log file
|
|
|
|
// Open the log file
|
|
|
|
std::string fname = LogFileName(options_.wal_dir, log_number); |
|
|
|
std::string fname = LogFileName(options_.wal_dir, log_number); |
|
|
|
unique_ptr<SequentialFile> file; |
|
|
|
unique_ptr<SequentialFile> file; |
|
|
@ -1035,11 +1032,8 @@ Status DBImpl::RecoverLogFile(uint64_t log_number, |
|
|
|
std::string scratch; |
|
|
|
std::string scratch; |
|
|
|
Slice record; |
|
|
|
Slice record; |
|
|
|
WriteBatch batch; |
|
|
|
WriteBatch batch; |
|
|
|
MemTable* mem = nullptr; |
|
|
|
bool memtable_empty = true; |
|
|
|
if (external_table) { |
|
|
|
while (reader.ReadRecord(&record, &scratch)) { |
|
|
|
mem = external_table; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
while (reader.ReadRecord(&record, &scratch) && status.ok()) { |
|
|
|
|
|
|
|
if (record.size() < 12) { |
|
|
|
if (record.size() < 12) { |
|
|
|
reporter.Corruption( |
|
|
|
reporter.Corruption( |
|
|
|
record.size(), Status::Corruption("log record too small")); |
|
|
|
record.size(), Status::Corruption("log record too small")); |
|
|
@ -1047,14 +1041,11 @@ Status DBImpl::RecoverLogFile(uint64_t log_number, |
|
|
|
} |
|
|
|
} |
|
|
|
WriteBatchInternal::SetContents(&batch, record); |
|
|
|
WriteBatchInternal::SetContents(&batch, record); |
|
|
|
|
|
|
|
|
|
|
|
if (mem == nullptr) { |
|
|
|
status = WriteBatchInternal::InsertInto(&batch, mem_, &options_); |
|
|
|
mem = new MemTable(internal_comparator_, options_); |
|
|
|
memtable_empty = false; |
|
|
|
mem->Ref(); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
status = WriteBatchInternal::InsertInto(&batch, mem, &options_); |
|
|
|
|
|
|
|
MaybeIgnoreError(&status); |
|
|
|
MaybeIgnoreError(&status); |
|
|
|
if (!status.ok()) { |
|
|
|
if (!status.ok()) { |
|
|
|
break; |
|
|
|
return status; |
|
|
|
} |
|
|
|
} |
|
|
|
const SequenceNumber last_seq = |
|
|
|
const SequenceNumber last_seq = |
|
|
|
WriteBatchInternal::Sequence(&batch) + |
|
|
|
WriteBatchInternal::Sequence(&batch) + |
|
|
@ -1063,28 +1054,44 @@ Status DBImpl::RecoverLogFile(uint64_t log_number, |
|
|
|
*max_sequence = last_seq; |
|
|
|
*max_sequence = last_seq; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (!external_table && |
|
|
|
if (!read_only && |
|
|
|
mem->ApproximateMemoryUsage() > options_.write_buffer_size) { |
|
|
|
mem_->ApproximateMemoryUsage() > options_.write_buffer_size) { |
|
|
|
status = WriteLevel0TableForRecovery(mem, edit); |
|
|
|
status = WriteLevel0TableForRecovery(mem_, &edit); |
|
|
|
|
|
|
|
// we still want to clear memtable, even if the recovery failed
|
|
|
|
|
|
|
|
delete mem_->Unref(); |
|
|
|
|
|
|
|
mem_ = new MemTable(internal_comparator_, options_); |
|
|
|
|
|
|
|
mem_->Ref(); |
|
|
|
|
|
|
|
memtable_empty = true; |
|
|
|
if (!status.ok()) { |
|
|
|
if (!status.ok()) { |
|
|
|
// Reflect errors immediately so that conditions like full
|
|
|
|
// Reflect errors immediately so that conditions like full
|
|
|
|
// file-systems cause the DB::Open() to fail.
|
|
|
|
// file-systems cause the DB::Open() to fail.
|
|
|
|
break; |
|
|
|
return status; |
|
|
|
} |
|
|
|
} |
|
|
|
delete mem->Unref(); |
|
|
|
|
|
|
|
mem = nullptr; |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (status.ok() && mem != nullptr && !external_table) { |
|
|
|
if (!memtable_empty && !read_only) { |
|
|
|
status = WriteLevel0TableForRecovery(mem, edit); |
|
|
|
status = WriteLevel0TableForRecovery(mem_, &edit); |
|
|
|
// Reflect errors immediately so that conditions like full
|
|
|
|
delete mem_->Unref(); |
|
|
|
// file-systems cause the DB::Open() to fail.
|
|
|
|
mem_ = new MemTable(internal_comparator_, options_); |
|
|
|
|
|
|
|
mem_->Ref(); |
|
|
|
|
|
|
|
if (!status.ok()) { |
|
|
|
|
|
|
|
return status; |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (mem != nullptr && !external_table) { |
|
|
|
if (edit.NumEntries() > 0) { |
|
|
|
delete mem->Unref(); |
|
|
|
// if read_only, NumEntries() will be 0
|
|
|
|
|
|
|
|
assert(!read_only); |
|
|
|
|
|
|
|
// writing log number in the manifest means that any log file
|
|
|
|
|
|
|
|
// with number strongly less than (log_number + 1) is already
|
|
|
|
|
|
|
|
// recovered and should be ignored on next reincarnation.
|
|
|
|
|
|
|
|
// Since we already recovered log_number, we want all logs
|
|
|
|
|
|
|
|
// with numbers `<= log_number` (includes this one) to be ignored
|
|
|
|
|
|
|
|
edit.SetLogNumber(log_number + 1); |
|
|
|
|
|
|
|
status = versions_->LogAndApply(&edit, &mutex_); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
return status; |
|
|
|
return status; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
@ -3939,9 +3946,7 @@ Status DB::OpenWithColumnFamilies( |
|
|
|
return s; |
|
|
|
return s; |
|
|
|
} |
|
|
|
} |
|
|
|
impl->mutex_.Lock(); |
|
|
|
impl->mutex_.Lock(); |
|
|
|
VersionEdit edit; |
|
|
|
s = impl->Recover(); // Handles create_if_missing, error_if_exists
|
|
|
|
// Handles create_if_missing, error_if_exists
|
|
|
|
|
|
|
|
s = impl->Recover(&edit, column_families); |
|
|
|
|
|
|
|
if (s.ok()) { |
|
|
|
if (s.ok()) { |
|
|
|
uint64_t new_log_number = impl->versions_->NewFileNumber(); |
|
|
|
uint64_t new_log_number = impl->versions_->NewFileNumber(); |
|
|
|
unique_ptr<WritableFile> lfile; |
|
|
|
unique_ptr<WritableFile> lfile; |
|
|
@ -3953,6 +3958,7 @@ Status DB::OpenWithColumnFamilies( |
|
|
|
); |
|
|
|
); |
|
|
|
if (s.ok()) { |
|
|
|
if (s.ok()) { |
|
|
|
lfile->SetPreallocationBlockSize(1.1 * impl->options_.write_buffer_size); |
|
|
|
lfile->SetPreallocationBlockSize(1.1 * impl->options_.write_buffer_size); |
|
|
|
|
|
|
|
VersionEdit edit; |
|
|
|
edit.SetLogNumber(new_log_number); |
|
|
|
edit.SetLogNumber(new_log_number); |
|
|
|
impl->logfile_number_ = new_log_number; |
|
|
|
impl->logfile_number_ = new_log_number; |
|
|
|
impl->log_.reset(new log::Writer(std::move(lfile))); |
|
|
|
impl->log_.reset(new log::Writer(std::move(lfile))); |
|
|
|