@ -1219,14 +1219,16 @@ Status DBImpl::Recover(
" flag but a log file already exists " ) ;
" flag but a log file already exists " ) ;
}
}
if ( ! logs . empty ( ) ) {
// Recover in the order in which the logs were generated
// Recover in the order in which the logs were generated
std : : sort ( logs . begin ( ) , logs . end ( ) ) ;
std : : sort ( logs . begin ( ) , logs . end ( ) ) ;
for ( const auto & log : logs ) {
s = RecoverLogFiles ( logs , & max_sequence , read_only ) ;
// The previous incarnation may not have written any MANIFEST
if ( ! s . ok ( ) ) {
// records after allocating this log number. So we manually
// Clear memtables if recovery failed
// update the file number allocation counter in VersionSet.
for ( auto cfd : * versions_ - > GetColumnFamilySet ( ) ) {
versions_ - > MarkFileNumberUsed ( log ) ;
cfd - > CreateNewMemtable ( ) ;
s = RecoverLogFile ( log , & max_sequence , read_only ) ;
}
}
}
}
SetTickerCount ( stats_ , SEQUENCE_NUMBER , versions_ - > LastSequence ( ) ) ;
SetTickerCount ( stats_ , SEQUENCE_NUMBER , versions_ - > LastSequence ( ) ) ;
}
}
@ -1239,8 +1241,9 @@ Status DBImpl::Recover(
return s ;
return s ;
}
}
Status DBImpl : : RecoverLogFile ( uint64_t log_number , SequenceNumber * max_sequence ,
// REQUIRES: log_numbers are sorted in ascending order
bool read_only ) {
Status DBImpl : : RecoverLogFiles ( const std : : vector < uint64_t > & log_numbers ,
SequenceNumber * max_sequence , bool read_only ) {
struct LogReporter : public log : : Reader : : Reporter {
struct LogReporter : public log : : Reader : : Reporter {
Env * env ;
Env * env ;
Logger * info_log ;
Logger * info_log ;
@ -1256,7 +1259,7 @@ Status DBImpl::RecoverLogFile(uint64_t log_number, SequenceNumber* max_sequence,
} ;
} ;
mutex_ . AssertHeld ( ) ;
mutex_ . AssertHeld ( ) ;
Status status ;
std : : unordered_map < int , VersionEdit > version_edits ;
std : : unordered_map < int , VersionEdit > version_edits ;
// no need to refcount because iteration is under mutex
// no need to refcount because iteration is under mutex
for ( auto cfd : * versions_ - > GetColumnFamilySet ( ) ) {
for ( auto cfd : * versions_ - > GetColumnFamilySet ( ) ) {
@ -1265,13 +1268,24 @@ Status DBImpl::RecoverLogFile(uint64_t log_number, SequenceNumber* max_sequence,
version_edits . insert ( { cfd - > GetID ( ) , edit } ) ;
version_edits . insert ( { cfd - > GetID ( ) , edit } ) ;
}
}
for ( auto log_number : log_numbers ) {
// The previous incarnation may not have written any MANIFEST
// records after allocating this log number. So we manually
// update the file number allocation counter in VersionSet.
versions_ - > MarkFileNumberUsed ( log_number ) ;
// Open the log file
// Open the log file
std : : string fname = LogFileName ( db_options_ . wal_dir , log_number ) ;
std : : string fname = LogFileName ( db_options_ . wal_dir , log_number ) ;
unique_ptr < SequentialFile > file ;
unique_ptr < SequentialFile > file ;
Status status = env_ - > NewSequentialFile ( fname , & file , env_options_ ) ;
status = env_ - > NewSequentialFile ( fname , & file , env_options_ ) ;
if ( ! status . ok ( ) ) {
if ( ! status . ok ( ) ) {
MaybeIgnoreError ( & status ) ;
MaybeIgnoreError ( & status ) ;
if ( ! status . ok ( ) ) {
return status ;
return status ;
} else {
// Fail with one log file, but that's ok.
// Try next one.
continue ;
}
}
}
// Create the log reader.
// Create the log reader.
@ -1279,8 +1293,9 @@ Status DBImpl::RecoverLogFile(uint64_t log_number, SequenceNumber* max_sequence,
reporter . env = env_ ;
reporter . env = env_ ;
reporter . info_log = db_options_ . info_log . get ( ) ;
reporter . info_log = db_options_ . info_log . get ( ) ;
reporter . fname = fname . c_str ( ) ;
reporter . fname = fname . c_str ( ) ;
reporter . status = ( db_options_ . paranoid_checks & &
reporter . status =
! db_options_ . skip_log_error_on_recovery ? & status
( db_options_ . paranoid_checks & & ! db_options_ . skip_log_error_on_recovery
? & status
: nullptr ) ;
: nullptr ) ;
// We intentially make log::Reader do checksumming even if
// We intentially make log::Reader do checksumming even if
// paranoid_checks==false so that corruptions cause entire commits
// paranoid_checks==false so that corruptions cause entire commits
@ -1304,19 +1319,17 @@ Status DBImpl::RecoverLogFile(uint64_t log_number, SequenceNumber* max_sequence,
// If column family was not found, it might mean that the WAL write
// If column family was not found, it might mean that the WAL write
// batch references to the column family that was dropped after the
// batch references to the column family that was dropped after the
// insert. We don't want to fail the whole write batch in that case -- we
// insert. We don't want to fail the whole write batch in that case --
// just ignore the update. That's why we set ignore missing column families
// we just ignore the update.
// to true
// That's why we set ignore missing column families to true
status = WriteBatchInternal : : InsertInto (
status = WriteBatchInternal : : InsertInto (
& batch , column_family_memtables_ . get ( ) ,
& batch , column_family_memtables_ . get ( ) , true , log_number ) ;
true /* ignore missing column families */ , log_number ) ;
MaybeIgnoreError ( & status ) ;
MaybeIgnoreError ( & status ) ;
if ( ! status . ok ( ) ) {
if ( ! status . ok ( ) ) {
return status ;
return status ;
}
}
const SequenceNumber last_seq =
const SequenceNumber last_seq = WriteBatchInternal : : Sequence ( & batch ) +
WriteBatchInternal : : Sequence ( & batch ) +
WriteBatchInternal : : Count ( & batch ) - 1 ;
WriteBatchInternal : : Count ( & batch ) - 1 ;
if ( last_seq > * max_sequence ) {
if ( last_seq > * max_sequence ) {
* max_sequence = last_seq ;
* max_sequence = last_seq ;
@ -1334,13 +1347,12 @@ Status DBImpl::RecoverLogFile(uint64_t log_number, SequenceNumber* max_sequence,
assert ( iter ! = version_edits . end ( ) ) ;
assert ( iter ! = version_edits . end ( ) ) ;
VersionEdit * edit = & iter - > second ;
VersionEdit * edit = & iter - > second ;
status = WriteLevel0TableForRecovery ( cfd , cfd - > mem ( ) , edit ) ;
status = WriteLevel0TableForRecovery ( cfd , cfd - > mem ( ) , edit ) ;
// we still want to clear the memtable, even if the recovery failed
cfd - > CreateNewMemtable ( ) ;
if ( ! status . ok ( ) ) {
if ( ! status . ok ( ) ) {
// Reflect errors immediately so that conditions like full
// Reflect errors immediately so that conditions like full
// file-systems cause the DB::Open() to fail.
// file-systems cause the DB::Open() to fail.
return status ;
return status ;
}
}
cfd - > CreateNewMemtable ( ) ;
}
}
}
}
}
}
@ -1349,18 +1361,20 @@ Status DBImpl::RecoverLogFile(uint64_t log_number, SequenceNumber* max_sequence,
if ( versions_ - > LastSequence ( ) < * max_sequence ) {
if ( versions_ - > LastSequence ( ) < * max_sequence ) {
versions_ - > SetLastSequence ( * max_sequence ) ;
versions_ - > SetLastSequence ( * max_sequence ) ;
}
}
}
if ( ! read_only ) {
if ( ! read_only ) {
// no need to refcount since client still doesn't have access
// no need to refcount since client still doesn't have access
// to the DB and can not drop column families while we iterate
// to the DB and can not drop column families while we iterate
auto max_log_number = log_numbers . back ( ) ;
for ( auto cfd : * versions_ - > GetColumnFamilySet ( ) ) {
for ( auto cfd : * versions_ - > GetColumnFamilySet ( ) ) {
auto iter = version_edits . find ( cfd - > GetID ( ) ) ;
auto iter = version_edits . find ( cfd - > GetID ( ) ) ;
assert ( iter ! = version_edits . end ( ) ) ;
assert ( iter ! = version_edits . end ( ) ) ;
VersionEdit * edit = & iter - > second ;
VersionEdit * edit = & iter - > second ;
if ( cfd - > GetLogNumber ( ) > log_number ) {
if ( cfd - > GetLogNumber ( ) > max_ log_number) {
// Column family cfd has already flushed the data
// Column family cfd has already flushed the data
// from log_number . Memtable has to be empty because
// from all logs . Memtable has to be empty because
// we filter the updates based on log_number
// we filter the updates based on log_number
// (in WriteBatch::InsertInto)
// (in WriteBatch::InsertInto)
assert ( cfd - > mem ( ) - > GetFirstSequenceNumber ( ) = = 0 ) ;
assert ( cfd - > mem ( ) - > GetFirstSequenceNumber ( ) = = 0 ) ;
@ -1371,28 +1385,29 @@ Status DBImpl::RecoverLogFile(uint64_t log_number, SequenceNumber* max_sequence,
// flush the final memtable (if non-empty)
// flush the final memtable (if non-empty)
if ( cfd - > mem ( ) - > GetFirstSequenceNumber ( ) ! = 0 ) {
if ( cfd - > mem ( ) - > GetFirstSequenceNumber ( ) ! = 0 ) {
status = WriteLevel0TableForRecovery ( cfd , cfd - > mem ( ) , edit ) ;
status = WriteLevel0TableForRecovery ( cfd , cfd - > mem ( ) , edit ) ;
if ( ! status . ok ( ) ) {
// Recovery failed
break ;
}
}
// we still want to clear the memtable, even if the recovery failed
cfd - > CreateNewMemtable ( ) ;
cfd - > CreateNewMemtable ( ) ;
if ( ! status . ok ( ) ) {
return status ;
}
}
// write MANIFEST with update
// write MANIFEST with update
// writing log number in the manifest means that any log file
// writing log_ number in the manifest means that any log file
// with number strongly less than (log_number + 1) is already
// with number strongly less than (log_number + 1) is already
// recovered and should be ignored on next reincarnation.
// recovered and should be ignored on next reincarnation.
// Since we already recovered log_number, we want all logs
// Since we already recovered max_ log_number, we want all logs
// with numbers `<= log_number` (includes this one) to be ignored
// with numbers `<= max_ log_number` (includes this one) to be ignored
edit - > SetLogNumber ( log_number + 1 ) ;
edit - > SetLogNumber ( max_ log_number + 1 ) ;
// we must mark the next log number as used, even though it's
// we must mark the next log number as used, even though it's
// not actually used. that is because VersionSet assumes
// not actually used. that is because VersionSet assumes
// VersionSet::next_file_number_ always to be strictly greater than any
// VersionSet::next_file_number_ always to be strictly greater than any
// log number
// log number
versions_ - > MarkFileNumberUsed ( log_number + 1 ) ;
versions_ - > MarkFileNumberUsed ( max_ log_number + 1 ) ;
status = versions_ - > LogAndApply ( cfd , edit , & mutex_ ) ;
status = versions_ - > LogAndApply ( cfd , edit , & mutex_ ) ;
if ( ! status . ok ( ) ) {
if ( ! status . ok ( ) ) {
return status ;
// Recovery failed
break ;
}
}
}
}
}
}