log_reader: pass in WALRecoveryMode instead of bool report_eof_inconsistency

Soon our behavior will depend on more than just whther we are in
kAbsoluteConsistency or not.

Signed-off-by: Sage Weil <sage@redhat.com>
main
Sage Weil 9 years ago
parent 7188052107
commit 9c33f64d19
  1. 22
      db/db_impl.cc
  2. 24
      db/log_reader.cc
  3. 9
      db/log_reader.h
  4. 13
      db/log_test.cc

@ -1126,21 +1126,6 @@ Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& log_numbers,
db_options_.wal_recovery_mode, !continue_replay_log); db_options_.wal_recovery_mode, !continue_replay_log);
// Determine if we should tolerate incomplete records at the tail end of the // Determine if we should tolerate incomplete records at the tail end of the
// log
bool report_eof_inconsistency;
if (db_options_.wal_recovery_mode ==
WALRecoveryMode::kAbsoluteConsistency) {
// in clean shutdown we don't expect any error in the log files
report_eof_inconsistency = true;
} else {
// for other modes ignore only incomplete records in the last log file
// which is presumably due to write in progress during restart
report_eof_inconsistency = false;
// TODO krad: Evaluate if we need to move to a more strict mode where we
// restrict the inconsistency to only the last log
}
// Read all the records and add to a memtable // Read all the records and add to a memtable
std::string scratch; std::string scratch;
Slice record; Slice record;
@ -1155,9 +1140,10 @@ Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& log_numbers,
} }
} }
while (continue_replay_log && while (
reader.ReadRecord(&record, &scratch, report_eof_inconsistency) && continue_replay_log &&
status.ok()) { reader.ReadRecord(&record, &scratch, db_options_.wal_recovery_mode) &&
status.ok()) {
if (record.size() < 12) { if (record.size() < 12) {
reporter.Corruption(record.size(), reporter.Corruption(record.size(),
Status::Corruption("log record too small")); Status::Corruption("log record too small"));

@ -66,8 +66,15 @@ bool Reader::SkipToInitialBlock() {
return true; return true;
} }
// For kAbsoluteConsistency, on clean shutdown we don't expect any error
// in the log files. For other modes, we can ignore only incomplete records
// in the last log file, which are presumably due to a write in progress
// during restart (or from log recycling).
//
// TODO krad: Evaluate if we need to move to a more strict mode where we
// restrict the inconsistency to only the last log
bool Reader::ReadRecord(Slice* record, std::string* scratch, bool Reader::ReadRecord(Slice* record, std::string* scratch,
const bool report_eof_inconsistency) { WALRecoveryMode wal_recovery_mode) {
if (last_record_offset_ < initial_offset_) { if (last_record_offset_ < initial_offset_) {
if (!SkipToInitialBlock()) { if (!SkipToInitialBlock()) {
return false; return false;
@ -85,7 +92,7 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch,
while (true) { while (true) {
uint64_t physical_record_offset = end_of_buffer_offset_ - buffer_.size(); uint64_t physical_record_offset = end_of_buffer_offset_ - buffer_.size();
const unsigned int record_type = const unsigned int record_type =
ReadPhysicalRecord(&fragment, report_eof_inconsistency); ReadPhysicalRecord(&fragment, wal_recovery_mode);
switch (record_type) { switch (record_type) {
case kFullType: case kFullType:
if (in_fragmented_record && !scratch->empty()) { if (in_fragmented_record && !scratch->empty()) {
@ -137,7 +144,8 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch,
case kEof: case kEof:
if (in_fragmented_record) { if (in_fragmented_record) {
if (report_eof_inconsistency) { if (wal_recovery_mode == WALRecoveryMode::kAbsoluteConsistency) {
// in clean shutdown we don't expect any error in the log files
ReportCorruption(scratch->size(), "error reading trailing data"); ReportCorruption(scratch->size(), "error reading trailing data");
} }
// This can be caused by the writer dying immediately after // This can be caused by the writer dying immediately after
@ -249,7 +257,7 @@ void Reader::ReportDrop(size_t bytes, const Status& reason) {
} }
unsigned int Reader::ReadPhysicalRecord(Slice* result, unsigned int Reader::ReadPhysicalRecord(Slice* result,
const bool report_eof_inconsistency) { WALRecoveryMode wal_recovery_mode) {
while (true) { while (true) {
if (buffer_.size() < (size_t)kHeaderSize) { if (buffer_.size() < (size_t)kHeaderSize) {
if (!eof_ && !read_error_) { if (!eof_ && !read_error_) {
@ -272,7 +280,9 @@ unsigned int Reader::ReadPhysicalRecord(Slice* result,
// end of the file, which can be caused by the writer crashing in the // end of the file, which can be caused by the writer crashing in the
// middle of writing the header. Unless explicitly requested we don't // middle of writing the header. Unless explicitly requested we don't
// considering this an error, just report EOF. // considering this an error, just report EOF.
if (buffer_.size() && report_eof_inconsistency) { if (buffer_.size() &&
wal_recovery_mode == WALRecoveryMode::kAbsoluteConsistency) {
// in clean shutdown we don't expect any error in the log files
ReportCorruption(buffer_.size(), "truncated header"); ReportCorruption(buffer_.size(), "truncated header");
} }
buffer_.clear(); buffer_.clear();
@ -296,7 +306,9 @@ unsigned int Reader::ReadPhysicalRecord(Slice* result,
// If the end of the file has been reached without reading |length| bytes // If the end of the file has been reached without reading |length| bytes
// of payload, assume the writer died in the middle of writing the record. // of payload, assume the writer died in the middle of writing the record.
// Don't report a corruption unless requested. // Don't report a corruption unless requested.
if (drop_size && report_eof_inconsistency) { if (drop_size &&
wal_recovery_mode == WALRecoveryMode::kAbsoluteConsistency) {
// in clean shutdown we don't expect any error in the log files
ReportCorruption(drop_size, "truncated header"); ReportCorruption(drop_size, "truncated header");
} }
return kEof; return kEof;

@ -14,6 +14,7 @@
#include "db/log_format.h" #include "db/log_format.h"
#include "rocksdb/slice.h" #include "rocksdb/slice.h"
#include "rocksdb/status.h" #include "rocksdb/status.h"
#include "rocksdb/options.h"
namespace rocksdb { namespace rocksdb {
@ -65,7 +66,8 @@ class Reader {
// will only be valid until the next mutating operation on this // will only be valid until the next mutating operation on this
// reader or the next mutation to *scratch. // reader or the next mutation to *scratch.
bool ReadRecord(Slice* record, std::string* scratch, bool ReadRecord(Slice* record, std::string* scratch,
bool report_eof_inconsistency = false); WALRecoveryMode wal_recovery_mode =
WALRecoveryMode::kTolerateCorruptedTailRecords);
// Returns the physical offset of the last record returned by ReadRecord. // Returns the physical offset of the last record returned by ReadRecord.
// //
@ -128,8 +130,9 @@ class Reader {
bool SkipToInitialBlock(); bool SkipToInitialBlock();
// Return type, or one of the preceding special values // Return type, or one of the preceding special values
unsigned int ReadPhysicalRecord(Slice* result, unsigned int ReadPhysicalRecord(
bool report_eof_inconsistency = false); Slice* result, WALRecoveryMode wal_recovery_mode =
WALRecoveryMode::kTolerateCorruptedTailRecords);
// Reports dropped bytes to the reporter. // Reports dropped bytes to the reporter.
// buffer_ must be updated to remove the dropped bytes prior to invocation. // buffer_ must be updated to remove the dropped bytes prior to invocation.

@ -174,10 +174,11 @@ class LogTest : public ::testing::TestWithParam<int> {
return dest_contents().size(); return dest_contents().size();
} }
std::string Read(const bool report_eof_inconsistency = false) { std::string Read(const WALRecoveryMode wal_recovery_mode =
WALRecoveryMode::kTolerateCorruptedTailRecords) {
std::string scratch; std::string scratch;
Slice record; Slice record;
if (reader_.ReadRecord(&record, &scratch, report_eof_inconsistency)) { if (reader_.ReadRecord(&record, &scratch, wal_recovery_mode)) {
return record.ToString(); return record.ToString();
} else { } else {
return "EOF"; return "EOF";
@ -424,7 +425,7 @@ TEST_P(LogTest, TruncatedTrailingRecordIsIgnored) {
TEST_P(LogTest, TruncatedTrailingRecordIsNotIgnored) { TEST_P(LogTest, TruncatedTrailingRecordIsNotIgnored) {
Write("foo"); Write("foo");
ShrinkSize(4); // Drop all payload as well as a header byte ShrinkSize(4); // Drop all payload as well as a header byte
ASSERT_EQ("EOF", Read(/*report_eof_inconsistency*/ true)); ASSERT_EQ("EOF", Read(WALRecoveryMode::kAbsoluteConsistency));
// Truncated last record is ignored, not treated as an error // Truncated last record is ignored, not treated as an error
ASSERT_GT(DroppedBytes(), 0U); ASSERT_GT(DroppedBytes(), 0U);
ASSERT_EQ("OK", MatchError("Corruption: truncated header")); ASSERT_EQ("OK", MatchError("Corruption: truncated header"));
@ -452,7 +453,7 @@ TEST_P(LogTest, BadLengthAtEndIsIgnored) {
TEST_P(LogTest, BadLengthAtEndIsNotIgnored) { TEST_P(LogTest, BadLengthAtEndIsNotIgnored) {
Write("foo"); Write("foo");
ShrinkSize(1); ShrinkSize(1);
ASSERT_EQ("EOF", Read(/*report_eof_inconsistency=*/true)); ASSERT_EQ("EOF", Read(WALRecoveryMode::kAbsoluteConsistency));
ASSERT_GT(DroppedBytes(), 0U); ASSERT_GT(DroppedBytes(), 0U);
ASSERT_EQ("OK", MatchError("Corruption: truncated header")); ASSERT_EQ("OK", MatchError("Corruption: truncated header"));
} }
@ -518,7 +519,7 @@ TEST_P(LogTest, MissingLastIsNotIgnored) {
Write(BigString("bar", kBlockSize)); Write(BigString("bar", kBlockSize));
// Remove the LAST block, including header. // Remove the LAST block, including header.
ShrinkSize(14); ShrinkSize(14);
ASSERT_EQ("EOF", Read(/*report_eof_inconsistency=*/true)); ASSERT_EQ("EOF", Read(WALRecoveryMode::kAbsoluteConsistency));
ASSERT_GT(DroppedBytes(), 0U); ASSERT_GT(DroppedBytes(), 0U);
ASSERT_EQ("OK", MatchError("Corruption: error reading trailing data")); ASSERT_EQ("OK", MatchError("Corruption: error reading trailing data"));
} }
@ -536,7 +537,7 @@ TEST_P(LogTest, PartialLastIsNotIgnored) {
Write(BigString("bar", kBlockSize)); Write(BigString("bar", kBlockSize));
// Cause a bad record length in the LAST block. // Cause a bad record length in the LAST block.
ShrinkSize(1); ShrinkSize(1);
ASSERT_EQ("EOF", Read(/*report_eof_inconsistency=*/true)); ASSERT_EQ("EOF", Read(WALRecoveryMode::kAbsoluteConsistency));
ASSERT_GT(DroppedBytes(), 0U); ASSERT_GT(DroppedBytes(), 0U);
ASSERT_EQ("OK", MatchError( ASSERT_EQ("OK", MatchError(
"Corruption: truncated headerCorruption: " "Corruption: truncated headerCorruption: "

Loading…
Cancel
Save