|
|
|
@ -127,6 +127,634 @@ TEST_F(DBWALTest, SyncWALNotWaitWrite) { |
|
|
|
|
ASSERT_EQ(Get("foo2"), "bar2"); |
|
|
|
|
rocksdb::SyncPoint::GetInstance()->DisableProcessing(); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
TEST_F(DBWALTest, Recover) { |
|
|
|
|
do { |
|
|
|
|
CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); |
|
|
|
|
ASSERT_OK(Put(1, "foo", "v1")); |
|
|
|
|
ASSERT_OK(Put(1, "baz", "v5")); |
|
|
|
|
|
|
|
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); |
|
|
|
|
ASSERT_EQ("v1", Get(1, "foo")); |
|
|
|
|
|
|
|
|
|
ASSERT_EQ("v1", Get(1, "foo")); |
|
|
|
|
ASSERT_EQ("v5", Get(1, "baz")); |
|
|
|
|
ASSERT_OK(Put(1, "bar", "v2")); |
|
|
|
|
ASSERT_OK(Put(1, "foo", "v3")); |
|
|
|
|
|
|
|
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); |
|
|
|
|
ASSERT_EQ("v3", Get(1, "foo")); |
|
|
|
|
ASSERT_OK(Put(1, "foo", "v4")); |
|
|
|
|
ASSERT_EQ("v4", Get(1, "foo")); |
|
|
|
|
ASSERT_EQ("v2", Get(1, "bar")); |
|
|
|
|
ASSERT_EQ("v5", Get(1, "baz")); |
|
|
|
|
} while (ChangeOptions()); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
TEST_F(DBWALTest, RecoverWithTableHandle) { |
|
|
|
|
do { |
|
|
|
|
Options options = CurrentOptions(); |
|
|
|
|
options.create_if_missing = true; |
|
|
|
|
options.disable_auto_compactions = true; |
|
|
|
|
DestroyAndReopen(options); |
|
|
|
|
CreateAndReopenWithCF({"pikachu"}, options); |
|
|
|
|
|
|
|
|
|
ASSERT_OK(Put(1, "foo", "v1")); |
|
|
|
|
ASSERT_OK(Put(1, "bar", "v2")); |
|
|
|
|
ASSERT_OK(Flush(1)); |
|
|
|
|
ASSERT_OK(Put(1, "foo", "v3")); |
|
|
|
|
ASSERT_OK(Put(1, "bar", "v4")); |
|
|
|
|
ASSERT_OK(Flush(1)); |
|
|
|
|
ASSERT_OK(Put(1, "big", std::string(100, 'a'))); |
|
|
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); |
|
|
|
|
|
|
|
|
|
std::vector<std::vector<FileMetaData>> files; |
|
|
|
|
dbfull()->TEST_GetFilesMetaData(handles_[1], &files); |
|
|
|
|
size_t total_files = 0; |
|
|
|
|
for (const auto& level : files) { |
|
|
|
|
total_files += level.size(); |
|
|
|
|
} |
|
|
|
|
ASSERT_EQ(total_files, 3); |
|
|
|
|
for (const auto& level : files) { |
|
|
|
|
for (const auto& file : level) { |
|
|
|
|
if (kInfiniteMaxOpenFiles == option_config_) { |
|
|
|
|
ASSERT_TRUE(file.table_reader_handle != nullptr); |
|
|
|
|
} else { |
|
|
|
|
ASSERT_TRUE(file.table_reader_handle == nullptr); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} while (ChangeOptions()); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
TEST_F(DBWALTest, IgnoreRecoveredLog) { |
|
|
|
|
std::string backup_logs = dbname_ + "/backup_logs"; |
|
|
|
|
|
|
|
|
|
// delete old files in backup_logs directory
|
|
|
|
|
env_->CreateDirIfMissing(backup_logs); |
|
|
|
|
std::vector<std::string> old_files; |
|
|
|
|
env_->GetChildren(backup_logs, &old_files); |
|
|
|
|
for (auto& file : old_files) { |
|
|
|
|
if (file != "." && file != "..") { |
|
|
|
|
env_->DeleteFile(backup_logs + "/" + file); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
do { |
|
|
|
|
Options options = CurrentOptions(); |
|
|
|
|
options.create_if_missing = true; |
|
|
|
|
options.merge_operator = MergeOperators::CreateUInt64AddOperator(); |
|
|
|
|
options.wal_dir = dbname_ + "/logs"; |
|
|
|
|
DestroyAndReopen(options); |
|
|
|
|
|
|
|
|
|
// fill up the DB
|
|
|
|
|
std::string one, two; |
|
|
|
|
PutFixed64(&one, 1); |
|
|
|
|
PutFixed64(&two, 2); |
|
|
|
|
ASSERT_OK(db_->Merge(WriteOptions(), Slice("foo"), Slice(one))); |
|
|
|
|
ASSERT_OK(db_->Merge(WriteOptions(), Slice("foo"), Slice(one))); |
|
|
|
|
ASSERT_OK(db_->Merge(WriteOptions(), Slice("bar"), Slice(one))); |
|
|
|
|
|
|
|
|
|
// copy the logs to backup
|
|
|
|
|
std::vector<std::string> logs; |
|
|
|
|
env_->GetChildren(options.wal_dir, &logs); |
|
|
|
|
for (auto& log : logs) { |
|
|
|
|
if (log != ".." && log != ".") { |
|
|
|
|
CopyFile(options.wal_dir + "/" + log, backup_logs + "/" + log); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// recover the DB
|
|
|
|
|
Reopen(options); |
|
|
|
|
ASSERT_EQ(two, Get("foo")); |
|
|
|
|
ASSERT_EQ(one, Get("bar")); |
|
|
|
|
Close(); |
|
|
|
|
|
|
|
|
|
// copy the logs from backup back to wal dir
|
|
|
|
|
for (auto& log : logs) { |
|
|
|
|
if (log != ".." && log != ".") { |
|
|
|
|
CopyFile(backup_logs + "/" + log, options.wal_dir + "/" + log); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
// this should ignore the log files, recovery should not happen again
|
|
|
|
|
// if the recovery happens, the same merge operator would be called twice,
|
|
|
|
|
// leading to incorrect results
|
|
|
|
|
Reopen(options); |
|
|
|
|
ASSERT_EQ(two, Get("foo")); |
|
|
|
|
ASSERT_EQ(one, Get("bar")); |
|
|
|
|
Close(); |
|
|
|
|
Destroy(options); |
|
|
|
|
Reopen(options); |
|
|
|
|
Close(); |
|
|
|
|
|
|
|
|
|
// copy the logs from backup back to wal dir
|
|
|
|
|
env_->CreateDirIfMissing(options.wal_dir); |
|
|
|
|
for (auto& log : logs) { |
|
|
|
|
if (log != ".." && log != ".") { |
|
|
|
|
CopyFile(backup_logs + "/" + log, options.wal_dir + "/" + log); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
// assert that we successfully recovered only from logs, even though we
|
|
|
|
|
// destroyed the DB
|
|
|
|
|
Reopen(options); |
|
|
|
|
ASSERT_EQ(two, Get("foo")); |
|
|
|
|
ASSERT_EQ(one, Get("bar")); |
|
|
|
|
|
|
|
|
|
// Recovery will fail if DB directory doesn't exist.
|
|
|
|
|
Destroy(options); |
|
|
|
|
// copy the logs from backup back to wal dir
|
|
|
|
|
env_->CreateDirIfMissing(options.wal_dir); |
|
|
|
|
for (auto& log : logs) { |
|
|
|
|
if (log != ".." && log != ".") { |
|
|
|
|
CopyFile(backup_logs + "/" + log, options.wal_dir + "/" + log); |
|
|
|
|
// we won't be needing this file no more
|
|
|
|
|
env_->DeleteFile(backup_logs + "/" + log); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
Status s = TryReopen(options); |
|
|
|
|
ASSERT_TRUE(!s.ok()); |
|
|
|
|
} while (ChangeOptions(kSkipHashCuckoo)); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
TEST_F(DBWALTest, RecoveryWithEmptyLog) { |
|
|
|
|
do { |
|
|
|
|
CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); |
|
|
|
|
ASSERT_OK(Put(1, "foo", "v1")); |
|
|
|
|
ASSERT_OK(Put(1, "foo", "v2")); |
|
|
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); |
|
|
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); |
|
|
|
|
ASSERT_OK(Put(1, "foo", "v3")); |
|
|
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); |
|
|
|
|
ASSERT_EQ("v3", Get(1, "foo")); |
|
|
|
|
} while (ChangeOptions()); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#ifndef ROCKSDB_LITE |
|
|
|
|
TEST_F(DBWALTest, RecoverWithLargeLog) { |
|
|
|
|
do { |
|
|
|
|
{ |
|
|
|
|
Options options = CurrentOptions(); |
|
|
|
|
CreateAndReopenWithCF({"pikachu"}, options); |
|
|
|
|
ASSERT_OK(Put(1, "big1", std::string(200000, '1'))); |
|
|
|
|
ASSERT_OK(Put(1, "big2", std::string(200000, '2'))); |
|
|
|
|
ASSERT_OK(Put(1, "small3", std::string(10, '3'))); |
|
|
|
|
ASSERT_OK(Put(1, "small4", std::string(10, '4'))); |
|
|
|
|
ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Make sure that if we re-open with a small write buffer size that
|
|
|
|
|
// we flush table files in the middle of a large log file.
|
|
|
|
|
Options options; |
|
|
|
|
options.write_buffer_size = 100000; |
|
|
|
|
options = CurrentOptions(options); |
|
|
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, options); |
|
|
|
|
ASSERT_EQ(NumTableFilesAtLevel(0, 1), 3); |
|
|
|
|
ASSERT_EQ(std::string(200000, '1'), Get(1, "big1")); |
|
|
|
|
ASSERT_EQ(std::string(200000, '2'), Get(1, "big2")); |
|
|
|
|
ASSERT_EQ(std::string(10, '3'), Get(1, "small3")); |
|
|
|
|
ASSERT_EQ(std::string(10, '4'), Get(1, "small4")); |
|
|
|
|
ASSERT_GT(NumTableFilesAtLevel(0, 1), 1); |
|
|
|
|
} while (ChangeCompactOptions()); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// In https://reviews.facebook.net/D20661 we change
|
|
|
|
|
// recovery behavior: previously for each log file each column family
|
|
|
|
|
// memtable was flushed, even it was empty. Now it's changed:
|
|
|
|
|
// we try to create the smallest number of table files by merging
|
|
|
|
|
// updates from multiple logs
|
|
|
|
|
TEST_F(DBWALTest, RecoverCheckFileAmountWithSmallWriteBuffer) { |
|
|
|
|
Options options = CurrentOptions(); |
|
|
|
|
options.write_buffer_size = 5000000; |
|
|
|
|
CreateAndReopenWithCF({"pikachu", "dobrynia", "nikitich"}, options); |
|
|
|
|
|
|
|
|
|
// Since we will reopen DB with smaller write_buffer_size,
|
|
|
|
|
// each key will go to new SST file
|
|
|
|
|
ASSERT_OK(Put(1, Key(10), DummyString(1000000))); |
|
|
|
|
ASSERT_OK(Put(1, Key(10), DummyString(1000000))); |
|
|
|
|
ASSERT_OK(Put(1, Key(10), DummyString(1000000))); |
|
|
|
|
ASSERT_OK(Put(1, Key(10), DummyString(1000000))); |
|
|
|
|
|
|
|
|
|
ASSERT_OK(Put(3, Key(10), DummyString(1))); |
|
|
|
|
// Make 'dobrynia' to be flushed and new WAL file to be created
|
|
|
|
|
ASSERT_OK(Put(2, Key(10), DummyString(7500000))); |
|
|
|
|
ASSERT_OK(Put(2, Key(1), DummyString(1))); |
|
|
|
|
dbfull()->TEST_WaitForFlushMemTable(handles_[2]); |
|
|
|
|
{ |
|
|
|
|
auto tables = ListTableFiles(env_, dbname_); |
|
|
|
|
ASSERT_EQ(tables.size(), static_cast<size_t>(1)); |
|
|
|
|
// Make sure 'dobrynia' was flushed: check sst files amount
|
|
|
|
|
ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"), |
|
|
|
|
static_cast<uint64_t>(1)); |
|
|
|
|
} |
|
|
|
|
// New WAL file
|
|
|
|
|
ASSERT_OK(Put(1, Key(1), DummyString(1))); |
|
|
|
|
ASSERT_OK(Put(1, Key(1), DummyString(1))); |
|
|
|
|
ASSERT_OK(Put(3, Key(10), DummyString(1))); |
|
|
|
|
ASSERT_OK(Put(3, Key(10), DummyString(1))); |
|
|
|
|
ASSERT_OK(Put(3, Key(10), DummyString(1))); |
|
|
|
|
|
|
|
|
|
options.write_buffer_size = 4096; |
|
|
|
|
options.arena_block_size = 4096; |
|
|
|
|
ReopenWithColumnFamilies({"default", "pikachu", "dobrynia", "nikitich"}, |
|
|
|
|
options); |
|
|
|
|
{ |
|
|
|
|
// No inserts => default is empty
|
|
|
|
|
ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"), |
|
|
|
|
static_cast<uint64_t>(0)); |
|
|
|
|
// First 4 keys goes to separate SSTs + 1 more SST for 2 smaller keys
|
|
|
|
|
ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"), |
|
|
|
|
static_cast<uint64_t>(5)); |
|
|
|
|
// 1 SST for big key + 1 SST for small one
|
|
|
|
|
ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"), |
|
|
|
|
static_cast<uint64_t>(2)); |
|
|
|
|
// 1 SST for all keys
|
|
|
|
|
ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"), |
|
|
|
|
static_cast<uint64_t>(1)); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// In https://reviews.facebook.net/D20661 we change
|
|
|
|
|
// recovery behavior: previously for each log file each column family
|
|
|
|
|
// memtable was flushed, even it wasn't empty. Now it's changed:
|
|
|
|
|
// we try to create the smallest number of table files by merging
|
|
|
|
|
// updates from multiple logs
|
|
|
|
|
TEST_F(DBWALTest, RecoverCheckFileAmount) { |
|
|
|
|
Options options = CurrentOptions(); |
|
|
|
|
options.write_buffer_size = 100000; |
|
|
|
|
options.arena_block_size = 4 * 1024; |
|
|
|
|
CreateAndReopenWithCF({"pikachu", "dobrynia", "nikitich"}, options); |
|
|
|
|
|
|
|
|
|
ASSERT_OK(Put(0, Key(1), DummyString(1))); |
|
|
|
|
ASSERT_OK(Put(1, Key(1), DummyString(1))); |
|
|
|
|
ASSERT_OK(Put(2, Key(1), DummyString(1))); |
|
|
|
|
|
|
|
|
|
// Make 'nikitich' memtable to be flushed
|
|
|
|
|
ASSERT_OK(Put(3, Key(10), DummyString(1002400))); |
|
|
|
|
ASSERT_OK(Put(3, Key(1), DummyString(1))); |
|
|
|
|
dbfull()->TEST_WaitForFlushMemTable(handles_[3]); |
|
|
|
|
// 4 memtable are not flushed, 1 sst file
|
|
|
|
|
{ |
|
|
|
|
auto tables = ListTableFiles(env_, dbname_); |
|
|
|
|
ASSERT_EQ(tables.size(), static_cast<size_t>(1)); |
|
|
|
|
ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"), |
|
|
|
|
static_cast<uint64_t>(1)); |
|
|
|
|
} |
|
|
|
|
// Memtable for 'nikitich' has flushed, new WAL file has opened
|
|
|
|
|
// 4 memtable still not flushed
|
|
|
|
|
|
|
|
|
|
// Write to new WAL file
|
|
|
|
|
ASSERT_OK(Put(0, Key(1), DummyString(1))); |
|
|
|
|
ASSERT_OK(Put(1, Key(1), DummyString(1))); |
|
|
|
|
ASSERT_OK(Put(2, Key(1), DummyString(1))); |
|
|
|
|
|
|
|
|
|
// Fill up 'nikitich' one more time
|
|
|
|
|
ASSERT_OK(Put(3, Key(10), DummyString(1002400))); |
|
|
|
|
// make it flush
|
|
|
|
|
ASSERT_OK(Put(3, Key(1), DummyString(1))); |
|
|
|
|
dbfull()->TEST_WaitForFlushMemTable(handles_[3]); |
|
|
|
|
// There are still 4 memtable not flushed, and 2 sst tables
|
|
|
|
|
ASSERT_OK(Put(0, Key(1), DummyString(1))); |
|
|
|
|
ASSERT_OK(Put(1, Key(1), DummyString(1))); |
|
|
|
|
ASSERT_OK(Put(2, Key(1), DummyString(1))); |
|
|
|
|
|
|
|
|
|
{ |
|
|
|
|
auto tables = ListTableFiles(env_, dbname_); |
|
|
|
|
ASSERT_EQ(tables.size(), static_cast<size_t>(2)); |
|
|
|
|
ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"), |
|
|
|
|
static_cast<uint64_t>(2)); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
ReopenWithColumnFamilies({"default", "pikachu", "dobrynia", "nikitich"}, |
|
|
|
|
options); |
|
|
|
|
{ |
|
|
|
|
std::vector<uint64_t> table_files = ListTableFiles(env_, dbname_); |
|
|
|
|
// Check, that records for 'default', 'dobrynia' and 'pikachu' from
|
|
|
|
|
// first, second and third WALs went to the same SST.
|
|
|
|
|
// So, there is 6 SSTs: three for 'nikitich', one for 'default', one for
|
|
|
|
|
// 'dobrynia', one for 'pikachu'
|
|
|
|
|
ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"), |
|
|
|
|
static_cast<uint64_t>(1)); |
|
|
|
|
ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"), |
|
|
|
|
static_cast<uint64_t>(3)); |
|
|
|
|
ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"), |
|
|
|
|
static_cast<uint64_t>(1)); |
|
|
|
|
ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"), |
|
|
|
|
static_cast<uint64_t>(1)); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
TEST_F(DBWALTest, SyncMultipleLogs) { |
|
|
|
|
const uint64_t kNumBatches = 2; |
|
|
|
|
const int kBatchSize = 1000; |
|
|
|
|
|
|
|
|
|
Options options = CurrentOptions(); |
|
|
|
|
options.create_if_missing = true; |
|
|
|
|
options.write_buffer_size = 4096; |
|
|
|
|
Reopen(options); |
|
|
|
|
|
|
|
|
|
WriteBatch batch; |
|
|
|
|
WriteOptions wo; |
|
|
|
|
wo.sync = true; |
|
|
|
|
|
|
|
|
|
for (uint64_t b = 0; b < kNumBatches; b++) { |
|
|
|
|
batch.Clear(); |
|
|
|
|
for (int i = 0; i < kBatchSize; i++) { |
|
|
|
|
batch.Put(Key(i), DummyString(128)); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
dbfull()->Write(wo, &batch); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
ASSERT_OK(dbfull()->SyncWAL()); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
//
|
|
|
|
|
// Test WAL recovery for the various modes available
|
|
|
|
|
//
|
|
|
|
|
class RecoveryTestHelper { |
|
|
|
|
public: |
|
|
|
|
// Number of WAL files to generate
|
|
|
|
|
static const int kWALFilesCount = 10; |
|
|
|
|
// Starting number for the WAL file name like 00010.log
|
|
|
|
|
static const int kWALFileOffset = 10; |
|
|
|
|
// Keys to be written per WAL file
|
|
|
|
|
static const int kKeysPerWALFile = 1024; |
|
|
|
|
// Size of the value
|
|
|
|
|
static const int kValueSize = 10; |
|
|
|
|
|
|
|
|
|
// Create WAL files with values filled in
|
|
|
|
|
static void FillData(DBWALTest* test, const Options& options, |
|
|
|
|
const size_t wal_count, size_t* count) { |
|
|
|
|
const DBOptions& db_options = options; |
|
|
|
|
|
|
|
|
|
*count = 0; |
|
|
|
|
|
|
|
|
|
shared_ptr<Cache> table_cache = NewLRUCache(50000, 16); |
|
|
|
|
EnvOptions env_options; |
|
|
|
|
WriteBuffer write_buffer(db_options.db_write_buffer_size); |
|
|
|
|
|
|
|
|
|
unique_ptr<VersionSet> versions; |
|
|
|
|
unique_ptr<WalManager> wal_manager; |
|
|
|
|
WriteController write_controller; |
|
|
|
|
|
|
|
|
|
versions.reset(new VersionSet(test->dbname_, &db_options, env_options, |
|
|
|
|
table_cache.get(), &write_buffer, |
|
|
|
|
&write_controller)); |
|
|
|
|
|
|
|
|
|
wal_manager.reset(new WalManager(db_options, env_options)); |
|
|
|
|
|
|
|
|
|
std::unique_ptr<log::Writer> current_log_writer; |
|
|
|
|
|
|
|
|
|
for (size_t j = kWALFileOffset; j < wal_count + kWALFileOffset; j++) { |
|
|
|
|
uint64_t current_log_number = j; |
|
|
|
|
std::string fname = LogFileName(test->dbname_, current_log_number); |
|
|
|
|
unique_ptr<WritableFile> file; |
|
|
|
|
ASSERT_OK(db_options.env->NewWritableFile(fname, &file, env_options)); |
|
|
|
|
unique_ptr<WritableFileWriter> file_writer( |
|
|
|
|
new WritableFileWriter(std::move(file), env_options)); |
|
|
|
|
current_log_writer.reset( |
|
|
|
|
new log::Writer(std::move(file_writer), current_log_number, |
|
|
|
|
db_options.recycle_log_file_num > 0)); |
|
|
|
|
|
|
|
|
|
for (int i = 0; i < kKeysPerWALFile; i++) { |
|
|
|
|
std::string key = "key" + ToString((*count)++); |
|
|
|
|
std::string value = test->DummyString(kValueSize); |
|
|
|
|
assert(current_log_writer.get() != nullptr); |
|
|
|
|
uint64_t seq = versions->LastSequence() + 1; |
|
|
|
|
WriteBatch batch; |
|
|
|
|
batch.Put(key, value); |
|
|
|
|
WriteBatchInternal::SetSequence(&batch, seq); |
|
|
|
|
current_log_writer->AddRecord(WriteBatchInternal::Contents(&batch)); |
|
|
|
|
versions->SetLastSequence(seq); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Recreate and fill the store with some data
|
|
|
|
|
static size_t FillData(DBWALTest* test, Options* options) { |
|
|
|
|
options->create_if_missing = true; |
|
|
|
|
test->DestroyAndReopen(*options); |
|
|
|
|
test->Close(); |
|
|
|
|
|
|
|
|
|
size_t count = 0; |
|
|
|
|
FillData(test, *options, kWALFilesCount, &count); |
|
|
|
|
return count; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Read back all the keys we wrote and return the number of keys found
|
|
|
|
|
static size_t GetData(DBWALTest* test) { |
|
|
|
|
size_t count = 0; |
|
|
|
|
for (size_t i = 0; i < kWALFilesCount * kKeysPerWALFile; i++) { |
|
|
|
|
if (test->Get("key" + ToString(i)) != "NOT_FOUND") { |
|
|
|
|
++count; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
return count; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Manuall corrupt the specified WAL
|
|
|
|
|
static void CorruptWAL(DBWALTest* test, const Options& options, |
|
|
|
|
const double off, const double len, |
|
|
|
|
const int wal_file_id, const bool trunc = false) { |
|
|
|
|
Env* env = options.env; |
|
|
|
|
std::string fname = LogFileName(test->dbname_, wal_file_id); |
|
|
|
|
uint64_t size; |
|
|
|
|
ASSERT_OK(env->GetFileSize(fname, &size)); |
|
|
|
|
ASSERT_GT(size, 0); |
|
|
|
|
#ifdef OS_WIN |
|
|
|
|
// Windows disk cache behaves differently. When we truncate
|
|
|
|
|
// the original content is still in the cache due to the original
|
|
|
|
|
// handle is still open. Generally, in Windows, one prohibits
|
|
|
|
|
// shared access to files and it is not needed for WAL but we allow
|
|
|
|
|
// it to induce corruption at various tests.
|
|
|
|
|
test->Close(); |
|
|
|
|
#endif |
|
|
|
|
if (trunc) { |
|
|
|
|
ASSERT_EQ(0, truncate(fname.c_str(), static_cast<int64_t>(size * off))); |
|
|
|
|
} else { |
|
|
|
|
InduceCorruption(fname, static_cast<size_t>(size * off), |
|
|
|
|
static_cast<size_t>(size * len)); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Overwrite data with 'a' from offset for length len
|
|
|
|
|
static void InduceCorruption(const std::string& filename, size_t offset, |
|
|
|
|
size_t len) { |
|
|
|
|
ASSERT_GT(len, 0U); |
|
|
|
|
|
|
|
|
|
int fd = open(filename.c_str(), O_RDWR); |
|
|
|
|
|
|
|
|
|
ASSERT_GT(fd, 0); |
|
|
|
|
ASSERT_EQ(offset, lseek(fd, offset, SEEK_SET)); |
|
|
|
|
|
|
|
|
|
void* buf = alloca(len); |
|
|
|
|
memset(buf, 'a', len); |
|
|
|
|
ASSERT_EQ(len, write(fd, buf, static_cast<unsigned int>(len))); |
|
|
|
|
|
|
|
|
|
close(fd); |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
// Test scope:
|
|
|
|
|
// - We expect to open the data store when there is incomplete trailing writes
|
|
|
|
|
// at the end of any of the logs
|
|
|
|
|
// - We do not expect to open the data store for corruption
|
|
|
|
|
TEST_F(DBWALTest, kTolerateCorruptedTailRecords) { |
|
|
|
|
const int jstart = RecoveryTestHelper::kWALFileOffset; |
|
|
|
|
const int jend = jstart + RecoveryTestHelper::kWALFilesCount; |
|
|
|
|
|
|
|
|
|
for (auto trunc : {true, false}) { /* Corruption style */ |
|
|
|
|
for (int i = 0; i < 4; i++) { /* Corruption offset position */ |
|
|
|
|
for (int j = jstart; j < jend; j++) { /* WAL file */ |
|
|
|
|
// Fill data for testing
|
|
|
|
|
Options options = CurrentOptions(); |
|
|
|
|
const size_t row_count = RecoveryTestHelper::FillData(this, &options); |
|
|
|
|
// test checksum failure or parsing
|
|
|
|
|
RecoveryTestHelper::CorruptWAL(this, options, /*off=*/i * .3, |
|
|
|
|
/*len%=*/.1, /*wal=*/j, trunc); |
|
|
|
|
|
|
|
|
|
if (trunc) { |
|
|
|
|
options.wal_recovery_mode = |
|
|
|
|
WALRecoveryMode::kTolerateCorruptedTailRecords; |
|
|
|
|
options.create_if_missing = false; |
|
|
|
|
ASSERT_OK(TryReopen(options)); |
|
|
|
|
const size_t recovered_row_count = RecoveryTestHelper::GetData(this); |
|
|
|
|
ASSERT_TRUE(i == 0 || recovered_row_count > 0); |
|
|
|
|
ASSERT_LT(recovered_row_count, row_count); |
|
|
|
|
} else { |
|
|
|
|
options.wal_recovery_mode = |
|
|
|
|
WALRecoveryMode::kTolerateCorruptedTailRecords; |
|
|
|
|
ASSERT_NOK(TryReopen(options)); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Test scope:
|
|
|
|
|
// We don't expect the data store to be opened if there is any corruption
|
|
|
|
|
// (leading, middle or trailing -- incomplete writes or corruption)
|
|
|
|
|
TEST_F(DBWALTest, kAbsoluteConsistency) { |
|
|
|
|
const int jstart = RecoveryTestHelper::kWALFileOffset; |
|
|
|
|
const int jend = jstart + RecoveryTestHelper::kWALFilesCount; |
|
|
|
|
|
|
|
|
|
// Verify clean slate behavior
|
|
|
|
|
Options options = CurrentOptions(); |
|
|
|
|
const size_t row_count = RecoveryTestHelper::FillData(this, &options); |
|
|
|
|
options.wal_recovery_mode = WALRecoveryMode::kAbsoluteConsistency; |
|
|
|
|
options.create_if_missing = false; |
|
|
|
|
ASSERT_OK(TryReopen(options)); |
|
|
|
|
ASSERT_EQ(RecoveryTestHelper::GetData(this), row_count); |
|
|
|
|
|
|
|
|
|
for (auto trunc : {true, false}) { /* Corruption style */ |
|
|
|
|
for (int i = 0; i < 4; i++) { /* Corruption offset position */ |
|
|
|
|
if (trunc && i == 0) { |
|
|
|
|
continue; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
for (int j = jstart; j < jend; j++) { /* wal files */ |
|
|
|
|
// fill with new date
|
|
|
|
|
RecoveryTestHelper::FillData(this, &options); |
|
|
|
|
// corrupt the wal
|
|
|
|
|
RecoveryTestHelper::CorruptWAL(this, options, /*off=*/i * .3, |
|
|
|
|
/*len%=*/.1, j, trunc); |
|
|
|
|
// verify
|
|
|
|
|
options.wal_recovery_mode = WALRecoveryMode::kAbsoluteConsistency; |
|
|
|
|
options.create_if_missing = false; |
|
|
|
|
ASSERT_NOK(TryReopen(options)); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Test scope:
|
|
|
|
|
// - We expect to open data store under all circumstances
|
|
|
|
|
// - We expect only data upto the point where the first error was encountered
|
|
|
|
|
TEST_F(DBWALTest, kPointInTimeRecovery) { |
|
|
|
|
const int jstart = RecoveryTestHelper::kWALFileOffset; |
|
|
|
|
const int jend = jstart + RecoveryTestHelper::kWALFilesCount; |
|
|
|
|
const int maxkeys = |
|
|
|
|
RecoveryTestHelper::kWALFilesCount * RecoveryTestHelper::kKeysPerWALFile; |
|
|
|
|
|
|
|
|
|
for (auto trunc : {true, false}) { /* Corruption style */ |
|
|
|
|
for (int i = 0; i < 4; i++) { /* Offset of corruption */ |
|
|
|
|
for (int j = jstart; j < jend; j++) { /* WAL file */ |
|
|
|
|
// Fill data for testing
|
|
|
|
|
Options options = CurrentOptions(); |
|
|
|
|
const size_t row_count = RecoveryTestHelper::FillData(this, &options); |
|
|
|
|
|
|
|
|
|
// Corrupt the wal
|
|
|
|
|
RecoveryTestHelper::CorruptWAL(this, options, /*off=*/i * .3, |
|
|
|
|
/*len%=*/.1, j, trunc); |
|
|
|
|
|
|
|
|
|
// Verify
|
|
|
|
|
options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery; |
|
|
|
|
options.create_if_missing = false; |
|
|
|
|
ASSERT_OK(TryReopen(options)); |
|
|
|
|
|
|
|
|
|
// Probe data for invariants
|
|
|
|
|
size_t recovered_row_count = RecoveryTestHelper::GetData(this); |
|
|
|
|
ASSERT_LT(recovered_row_count, row_count); |
|
|
|
|
|
|
|
|
|
bool expect_data = true; |
|
|
|
|
for (size_t k = 0; k < maxkeys; ++k) { |
|
|
|
|
bool found = Get("key" + ToString(i)) != "NOT_FOUND"; |
|
|
|
|
if (expect_data && !found) { |
|
|
|
|
expect_data = false; |
|
|
|
|
} |
|
|
|
|
ASSERT_EQ(found, expect_data); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
const size_t min = RecoveryTestHelper::kKeysPerWALFile * |
|
|
|
|
(j - RecoveryTestHelper::kWALFileOffset); |
|
|
|
|
ASSERT_GE(recovered_row_count, min); |
|
|
|
|
if (!trunc && i != 0) { |
|
|
|
|
const size_t max = RecoveryTestHelper::kKeysPerWALFile * |
|
|
|
|
(j - RecoveryTestHelper::kWALFileOffset + 1); |
|
|
|
|
ASSERT_LE(recovered_row_count, max); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Test scope:
|
|
|
|
|
// - We expect to open the data store under all scenarios
|
|
|
|
|
// - We expect to have recovered records past the corruption zone
|
|
|
|
|
TEST_F(DBWALTest, kSkipAnyCorruptedRecords) { |
|
|
|
|
const int jstart = RecoveryTestHelper::kWALFileOffset; |
|
|
|
|
const int jend = jstart + RecoveryTestHelper::kWALFilesCount; |
|
|
|
|
|
|
|
|
|
for (auto trunc : {true, false}) { /* Corruption style */ |
|
|
|
|
for (int i = 0; i < 4; i++) { /* Corruption offset */ |
|
|
|
|
for (int j = jstart; j < jend; j++) { /* wal files */ |
|
|
|
|
// Fill data for testing
|
|
|
|
|
Options options = CurrentOptions(); |
|
|
|
|
const size_t row_count = RecoveryTestHelper::FillData(this, &options); |
|
|
|
|
|
|
|
|
|
// Corrupt the WAL
|
|
|
|
|
RecoveryTestHelper::CorruptWAL(this, options, /*off=*/i * .3, |
|
|
|
|
/*len%=*/.1, j, trunc); |
|
|
|
|
|
|
|
|
|
// Verify behavior
|
|
|
|
|
options.wal_recovery_mode = WALRecoveryMode::kSkipAnyCorruptedRecords; |
|
|
|
|
options.create_if_missing = false; |
|
|
|
|
ASSERT_OK(TryReopen(options)); |
|
|
|
|
|
|
|
|
|
// Probe data for invariants
|
|
|
|
|
size_t recovered_row_count = RecoveryTestHelper::GetData(this); |
|
|
|
|
ASSERT_LT(recovered_row_count, row_count); |
|
|
|
|
|
|
|
|
|
if (!trunc) { |
|
|
|
|
ASSERT_TRUE(i != 0 || recovered_row_count > 0); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#endif // ROCKSDB_LITE
|
|
|
|
|
|
|
|
|
|
} // namespace rocksdb
|
|
|
|
|
|
|
|
|
|
int main(int argc, char** argv) { |
|
|
|
|