|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
//
|
|
|
|
// Copyright 2014 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
|
|
|
|
// This test uses a custom Env to keep track of the state of a filesystem as of
|
|
|
|
// the last "sync". It then checks for data loss errors by purposely dropping
|
|
|
|
// file data (or entire files) not protected by a "sync".
|
|
|
|
|
|
|
|
#include "db/db_impl.h"
|
|
|
|
#include "db/log_format.h"
|
|
|
|
#include "db/version_set.h"
|
|
|
|
#include "env/mock_env.h"
|
|
|
|
#include "rocksdb/cache.h"
|
|
|
|
#include "rocksdb/db.h"
|
|
|
|
#include "rocksdb/env.h"
|
|
|
|
#include "rocksdb/table.h"
|
|
|
|
#include "rocksdb/write_batch.h"
|
[rocksdb] Recovery path sequence miscount fix
Summary:
Consider the following WAL with 4 batch entries prefixed with their sequence at time of memtable insert.
[1: BEGIN_PREPARE, PUT, PUT, PUT, PUT, END_PREPARE(a)]
[1: BEGIN_PREPARE, PUT, PUT, PUT, PUT, END_PREPARE(b)]
[4: COMMIT(a)]
[7: COMMIT(b)]
The first two batches do not consume any sequence numbers so are both prefixed with seq=1.
For 2pc commit, memtable insertion takes place before COMMIT batch is written to WAL.
We can see that sequence number consumption takes place between WAL entries giving us the seemingly sparse sequence prefix for WAL entries.
This is a valid WAL.
Because with 2PC markers one WriteBatch points to another batch containing its inserts a writebatch can consume more or less sequence numbers than the number of sequence consuming entries that it contains.
We can see that, given the entries in the WAL, 6 sequence ids were consumed. Yet on recovery the maximum sequence consumed would be 7 + 3 (the number of sequence numbers consumed by COMMIT(b))
So, now upon recovery we must track the actual consumption of sequence numbers.
In the provided scenario there will be no sequence gaps, but it is possible to produce a sequence gap. This should not be a problem though. correct?
Test Plan: provided test.
Reviewers: sdong
Subscribers: andrewkr, leveldb, dhruba, hermanlee4
Differential Revision: https://reviews.facebook.net/D57645
9 years ago
|
|
|
#include "util/fault_injection_test_env.h"
|
|
|
|
#include "util/filename.h"
|
|
|
|
#include "util/logging.h"
|
|
|
|
#include "util/mutexlock.h"
|
|
|
|
#include "util/sync_point.h"
|
|
|
|
#include "util/testharness.h"
|
|
|
|
#include "util/testutil.h"
|
|
|
|
|
|
|
|
namespace rocksdb {
|
|
|
|
|
|
|
|
static const int kValueSize = 1000;
|
|
|
|
static const int kMaxNumValues = 2000;
|
|
|
|
static const size_t kNumIterations = 3;
|
|
|
|
|
|
|
|
class FaultInjectionTest : public testing::Test,
|
|
|
|
public testing::WithParamInterface<bool> {
|
|
|
|
protected:
|
|
|
|
enum OptionConfig {
|
|
|
|
kDefault,
|
|
|
|
kDifferentDataDir,
|
|
|
|
kWalDir,
|
|
|
|
kSyncWal,
|
|
|
|
kWalDirSyncWal,
|
|
|
|
kMultiLevels,
|
|
|
|
kEnd,
|
|
|
|
};
|
|
|
|
int option_config_;
|
|
|
|
// When need to make sure data is persistent, sync WAL
|
|
|
|
bool sync_use_wal_;
|
|
|
|
// When need to make sure data is persistent, call DB::CompactRange()
|
|
|
|
bool sync_use_compact_;
|
|
|
|
|
|
|
|
bool sequential_order_;
|
|
|
|
|
|
|
|
protected:
|
|
|
|
public:
|
|
|
|
enum ExpectedVerifResult { kValExpectFound, kValExpectNoError };
|
|
|
|
enum ResetMethod {
|
|
|
|
kResetDropUnsyncedData,
|
|
|
|
kResetDropRandomUnsyncedData,
|
|
|
|
kResetDeleteUnsyncedFiles,
|
|
|
|
kResetDropAndDeleteUnsynced
|
|
|
|
};
|
|
|
|
|
|
|
|
std::unique_ptr<Env> base_env_;
|
|
|
|
FaultInjectionTestEnv* env_;
|
|
|
|
std::string dbname_;
|
|
|
|
shared_ptr<Cache> tiny_cache_;
|
|
|
|
Options options_;
|
|
|
|
DB* db_;
|
|
|
|
|
|
|
|
FaultInjectionTest()
|
|
|
|
: option_config_(kDefault),
|
|
|
|
sync_use_wal_(false),
|
|
|
|
sync_use_compact_(true),
|
|
|
|
base_env_(nullptr),
|
|
|
|
env_(NULL),
|
|
|
|
db_(NULL) {
|
|
|
|
}
|
|
|
|
|
|
|
|
~FaultInjectionTest() {
|
|
|
|
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
rocksdb::SyncPoint::GetInstance()->ClearAllCallBacks();
|
|
|
|
}
|
|
|
|
|
|
|
|
bool ChangeOptions() {
|
|
|
|
option_config_++;
|
|
|
|
if (option_config_ >= kEnd) {
|
|
|
|
return false;
|
|
|
|
} else {
|
|
|
|
if (option_config_ == kMultiLevels) {
|
|
|
|
base_env_.reset(new MockEnv(Env::Default()));
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Return the current option configuration.
|
|
|
|
Options CurrentOptions() {
|
|
|
|
sync_use_wal_ = false;
|
|
|
|
sync_use_compact_ = true;
|
|
|
|
Options options;
|
|
|
|
switch (option_config_) {
|
|
|
|
case kWalDir:
|
|
|
|
options.wal_dir = test::TmpDir(env_) + "/fault_test_wal";
|
|
|
|
break;
|
|
|
|
case kDifferentDataDir:
|
|
|
|
options.db_paths.emplace_back(test::TmpDir(env_) + "/fault_test_data",
|
|
|
|
1000000U);
|
|
|
|
break;
|
|
|
|
case kSyncWal:
|
|
|
|
sync_use_wal_ = true;
|
|
|
|
sync_use_compact_ = false;
|
|
|
|
break;
|
|
|
|
case kWalDirSyncWal:
|
|
|
|
options.wal_dir = test::TmpDir(env_) + "/fault_test_wal";
|
|
|
|
sync_use_wal_ = true;
|
|
|
|
sync_use_compact_ = false;
|
|
|
|
break;
|
|
|
|
case kMultiLevels:
|
|
|
|
options.write_buffer_size = 64 * 1024;
|
|
|
|
options.target_file_size_base = 64 * 1024;
|
|
|
|
options.level0_file_num_compaction_trigger = 2;
|
|
|
|
options.level0_slowdown_writes_trigger = 2;
|
|
|
|
options.level0_stop_writes_trigger = 4;
|
|
|
|
options.max_bytes_for_level_base = 128 * 1024;
|
|
|
|
options.max_write_buffer_number = 2;
|
|
|
|
options.max_background_compactions = 8;
|
|
|
|
options.max_background_flushes = 8;
|
|
|
|
sync_use_wal_ = true;
|
|
|
|
sync_use_compact_ = false;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return options;
|
|
|
|
}
|
|
|
|
|
|
|
|
Status NewDB() {
|
|
|
|
assert(db_ == NULL);
|
|
|
|
assert(tiny_cache_ == nullptr);
|
|
|
|
assert(env_ == NULL);
|
|
|
|
|
|
|
|
env_ =
|
|
|
|
new FaultInjectionTestEnv(base_env_ ? base_env_.get() : Env::Default());
|
|
|
|
|
|
|
|
options_ = CurrentOptions();
|
|
|
|
options_.env = env_;
|
|
|
|
options_.paranoid_checks = true;
|
|
|
|
|
|
|
|
BlockBasedTableOptions table_options;
|
|
|
|
tiny_cache_ = NewLRUCache(100);
|
|
|
|
table_options.block_cache = tiny_cache_;
|
|
|
|
options_.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
|
|
|
|
|
|
dbname_ = test::TmpDir() + "/fault_test";
|
|
|
|
|
rocksdb: Replace ASSERT* with EXPECT* in functions that does not return void value
Summary:
gtest does not use exceptions to fail a unit test by design, and `ASSERT*`s are implemented using `return`. As a consequence we cannot use `ASSERT*` in a function that does not return `void` value ([[ https://code.google.com/p/googletest/wiki/AdvancedGuide#Assertion_Placement | 1]]), and have to fix our existing code. This diff does this in a generic way, with no manual changes.
In order to detect all existing `ASSERT*` that are used in functions that doesn't return void value, I change the code to generate compile errors for such cases.
In `util/testharness.h` I defined `EXPECT*` assertions, the same way as `ASSERT*`, and redefined `ASSERT*` to return `void`. Then executed:
```lang=bash
% USE_CLANG=1 make all -j55 -k 2> build.log
% perl -naF: -e 'print "-- -number=".$F[1]." ".$F[0]."\n" if /: error:/' \
build.log | xargs -L 1 perl -spi -e 's/ASSERT/EXPECT/g if $. == $number'
% make format
```
After that I reverted back change to `ASSERT*` in `util/testharness.h`. But preserved introduced `EXPECT*`, which is the same as `ASSERT*`. This will be deleted once switched to gtest.
This diff is independent and contains manual changes only in `util/testharness.h`.
Test Plan:
Make sure all tests are passing.
```lang=bash
% USE_CLANG=1 make check
```
Reviewers: igor, lgalanis, sdong, yufei.zhu, rven, meyering
Reviewed By: meyering
Subscribers: dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D33333
10 years ago
|
|
|
EXPECT_OK(DestroyDB(dbname_, options_));
|
|
|
|
|
|
|
|
options_.create_if_missing = true;
|
|
|
|
Status s = OpenDB();
|
|
|
|
options_.create_if_missing = false;
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
void SetUp() override {
|
|
|
|
sequential_order_ = GetParam();
|
|
|
|
ASSERT_OK(NewDB());
|
|
|
|
}
|
|
|
|
|
|
|
|
void TearDown() override {
|
|
|
|
CloseDB();
|
|
|
|
|
|
|
|
Status s = DestroyDB(dbname_, options_);
|
|
|
|
|
|
|
|
delete env_;
|
|
|
|
env_ = NULL;
|
|
|
|
|
|
|
|
tiny_cache_.reset();
|
|
|
|
|
|
|
|
ASSERT_OK(s);
|
|
|
|
}
|
|
|
|
|
|
|
|
void Build(const WriteOptions& write_options, int start_idx, int num_vals) {
|
|
|
|
std::string key_space, value_space;
|
|
|
|
WriteBatch batch;
|
|
|
|
for (int i = start_idx; i < start_idx + num_vals; i++) {
|
|
|
|
Slice key = Key(i, &key_space);
|
|
|
|
batch.Clear();
|
|
|
|
batch.Put(key, Value(i, &value_space));
|
|
|
|
ASSERT_OK(db_->Write(write_options, &batch));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Status ReadValue(int i, std::string* val) const {
|
|
|
|
std::string key_space, value_space;
|
|
|
|
Slice key = Key(i, &key_space);
|
|
|
|
Value(i, &value_space);
|
|
|
|
ReadOptions options;
|
|
|
|
return db_->Get(options, key, val);
|
|
|
|
}
|
|
|
|
|
|
|
|
Status Verify(int start_idx, int num_vals,
|
|
|
|
ExpectedVerifResult expected) const {
|
|
|
|
std::string val;
|
|
|
|
std::string value_space;
|
|
|
|
Status s;
|
|
|
|
for (int i = start_idx; i < start_idx + num_vals && s.ok(); i++) {
|
|
|
|
Value(i, &value_space);
|
|
|
|
s = ReadValue(i, &val);
|
|
|
|
if (s.ok()) {
|
rocksdb: Replace ASSERT* with EXPECT* in functions that does not return void value
Summary:
gtest does not use exceptions to fail a unit test by design, and `ASSERT*`s are implemented using `return`. As a consequence we cannot use `ASSERT*` in a function that does not return `void` value ([[ https://code.google.com/p/googletest/wiki/AdvancedGuide#Assertion_Placement | 1]]), and have to fix our existing code. This diff does this in a generic way, with no manual changes.
In order to detect all existing `ASSERT*` that are used in functions that doesn't return void value, I change the code to generate compile errors for such cases.
In `util/testharness.h` I defined `EXPECT*` assertions, the same way as `ASSERT*`, and redefined `ASSERT*` to return `void`. Then executed:
```lang=bash
% USE_CLANG=1 make all -j55 -k 2> build.log
% perl -naF: -e 'print "-- -number=".$F[1]." ".$F[0]."\n" if /: error:/' \
build.log | xargs -L 1 perl -spi -e 's/ASSERT/EXPECT/g if $. == $number'
% make format
```
After that I reverted back change to `ASSERT*` in `util/testharness.h`. But preserved introduced `EXPECT*`, which is the same as `ASSERT*`. This will be deleted once switched to gtest.
This diff is independent and contains manual changes only in `util/testharness.h`.
Test Plan:
Make sure all tests are passing.
```lang=bash
% USE_CLANG=1 make check
```
Reviewers: igor, lgalanis, sdong, yufei.zhu, rven, meyering
Reviewed By: meyering
Subscribers: dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D33333
10 years ago
|
|
|
EXPECT_EQ(value_space, val);
|
|
|
|
}
|
|
|
|
if (expected == kValExpectFound) {
|
|
|
|
if (!s.ok()) {
|
|
|
|
fprintf(stderr, "Error when read %dth record (expect found): %s\n", i,
|
|
|
|
s.ToString().c_str());
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
} else if (!s.ok() && !s.IsNotFound()) {
|
|
|
|
fprintf(stderr, "Error when read %dth record: %s\n", i,
|
|
|
|
s.ToString().c_str());
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Return the ith key
|
|
|
|
Slice Key(int i, std::string* storage) const {
|
|
|
|
unsigned long long num = i;
|
|
|
|
if (!sequential_order_) {
|
|
|
|
// random transfer
|
|
|
|
const int m = 0x5bd1e995;
|
|
|
|
num *= m;
|
|
|
|
num ^= num << 24;
|
|
|
|
}
|
|
|
|
char buf[100];
|
|
|
|
snprintf(buf, sizeof(buf), "%016d", static_cast<int>(num));
|
|
|
|
storage->assign(buf, strlen(buf));
|
|
|
|
return Slice(*storage);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Return the value to associate with the specified key
|
|
|
|
Slice Value(int k, std::string* storage) const {
|
|
|
|
Random r(k);
|
|
|
|
return test::RandomString(&r, kValueSize, storage);
|
|
|
|
}
|
|
|
|
|
|
|
|
void CloseDB() {
|
|
|
|
delete db_;
|
|
|
|
db_ = nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
Status OpenDB() {
|
|
|
|
CloseDB();
|
|
|
|
env_->ResetState();
|
|
|
|
Status s = DB::Open(options_, dbname_, &db_);
|
|
|
|
assert(db_ != nullptr);
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
void DeleteAllData() {
|
|
|
|
Iterator* iter = db_->NewIterator(ReadOptions());
|
|
|
|
WriteOptions options;
|
|
|
|
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
|
|
|
ASSERT_OK(db_->Delete(WriteOptions(), iter->key()));
|
|
|
|
}
|
|
|
|
|
|
|
|
delete iter;
|
|
|
|
|
|
|
|
FlushOptions flush_options;
|
|
|
|
flush_options.wait = true;
|
|
|
|
db_->Flush(flush_options);
|
|
|
|
}
|
|
|
|
|
|
|
|
// rnd cannot be null for kResetDropRandomUnsyncedData
|
|
|
|
void ResetDBState(ResetMethod reset_method, Random* rnd = nullptr) {
|
|
|
|
env_->AssertNoOpenFile();
|
|
|
|
switch (reset_method) {
|
|
|
|
case kResetDropUnsyncedData:
|
|
|
|
ASSERT_OK(env_->DropUnsyncedFileData());
|
|
|
|
break;
|
|
|
|
case kResetDropRandomUnsyncedData:
|
|
|
|
ASSERT_OK(env_->DropRandomUnsyncedFileData(rnd));
|
|
|
|
break;
|
|
|
|
case kResetDeleteUnsyncedFiles:
|
|
|
|
ASSERT_OK(env_->DeleteFilesCreatedAfterLastDirSync());
|
|
|
|
break;
|
|
|
|
case kResetDropAndDeleteUnsynced:
|
|
|
|
ASSERT_OK(env_->DropUnsyncedFileData());
|
|
|
|
ASSERT_OK(env_->DeleteFilesCreatedAfterLastDirSync());
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
assert(false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void PartialCompactTestPreFault(int num_pre_sync, int num_post_sync) {
|
|
|
|
DeleteAllData();
|
|
|
|
|
|
|
|
WriteOptions write_options;
|
|
|
|
write_options.sync = sync_use_wal_;
|
|
|
|
|
|
|
|
Build(write_options, 0, num_pre_sync);
|
|
|
|
if (sync_use_compact_) {
|
|
|
|
db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
|
|
|
|
}
|
|
|
|
write_options.sync = false;
|
|
|
|
Build(write_options, num_pre_sync, num_post_sync);
|
|
|
|
}
|
|
|
|
|
|
|
|
void PartialCompactTestReopenWithFault(ResetMethod reset_method,
|
|
|
|
int num_pre_sync, int num_post_sync,
|
|
|
|
Random* rnd = nullptr) {
|
|
|
|
env_->SetFilesystemActive(false);
|
|
|
|
CloseDB();
|
|
|
|
ResetDBState(reset_method, rnd);
|
|
|
|
ASSERT_OK(OpenDB());
|
|
|
|
ASSERT_OK(Verify(0, num_pre_sync, FaultInjectionTest::kValExpectFound));
|
|
|
|
ASSERT_OK(Verify(num_pre_sync, num_post_sync,
|
|
|
|
FaultInjectionTest::kValExpectNoError));
|
|
|
|
WaitCompactionFinish();
|
|
|
|
ASSERT_OK(Verify(0, num_pre_sync, FaultInjectionTest::kValExpectFound));
|
|
|
|
ASSERT_OK(Verify(num_pre_sync, num_post_sync,
|
|
|
|
FaultInjectionTest::kValExpectNoError));
|
|
|
|
}
|
|
|
|
|
|
|
|
void NoWriteTestPreFault() {
|
|
|
|
}
|
|
|
|
|
|
|
|
void NoWriteTestReopenWithFault(ResetMethod reset_method) {
|
|
|
|
CloseDB();
|
|
|
|
ResetDBState(reset_method);
|
|
|
|
ASSERT_OK(OpenDB());
|
|
|
|
}
|
|
|
|
|
|
|
|
void WaitCompactionFinish() {
|
[rocksdb] Recovery path sequence miscount fix
Summary:
Consider the following WAL with 4 batch entries prefixed with their sequence at time of memtable insert.
[1: BEGIN_PREPARE, PUT, PUT, PUT, PUT, END_PREPARE(a)]
[1: BEGIN_PREPARE, PUT, PUT, PUT, PUT, END_PREPARE(b)]
[4: COMMIT(a)]
[7: COMMIT(b)]
The first two batches do not consume any sequence numbers so are both prefixed with seq=1.
For 2pc commit, memtable insertion takes place before COMMIT batch is written to WAL.
We can see that sequence number consumption takes place between WAL entries giving us the seemingly sparse sequence prefix for WAL entries.
This is a valid WAL.
Because with 2PC markers one WriteBatch points to another batch containing its inserts a writebatch can consume more or less sequence numbers than the number of sequence consuming entries that it contains.
We can see that, given the entries in the WAL, 6 sequence ids were consumed. Yet on recovery the maximum sequence consumed would be 7 + 3 (the number of sequence numbers consumed by COMMIT(b))
So, now upon recovery we must track the actual consumption of sequence numbers.
In the provided scenario there will be no sequence gaps, but it is possible to produce a sequence gap. This should not be a problem though. correct?
Test Plan: provided test.
Reviewers: sdong
Subscribers: andrewkr, leveldb, dhruba, hermanlee4
Differential Revision: https://reviews.facebook.net/D57645
9 years ago
|
|
|
static_cast<DBImpl*>(db_->GetRootDB())->TEST_WaitForCompact();
|
|
|
|
ASSERT_OK(db_->Put(WriteOptions(), "", ""));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
TEST_P(FaultInjectionTest, FaultTest) {
|
|
|
|
do {
|
|
|
|
Random rnd(301);
|
|
|
|
|
|
|
|
for (size_t idx = 0; idx < kNumIterations; idx++) {
|
|
|
|
int num_pre_sync = rnd.Uniform(kMaxNumValues);
|
|
|
|
int num_post_sync = rnd.Uniform(kMaxNumValues);
|
|
|
|
|
|
|
|
PartialCompactTestPreFault(num_pre_sync, num_post_sync);
|
|
|
|
PartialCompactTestReopenWithFault(kResetDropUnsyncedData, num_pre_sync,
|
|
|
|
num_post_sync);
|
|
|
|
NoWriteTestPreFault();
|
|
|
|
NoWriteTestReopenWithFault(kResetDropUnsyncedData);
|
|
|
|
|
|
|
|
PartialCompactTestPreFault(num_pre_sync, num_post_sync);
|
|
|
|
PartialCompactTestReopenWithFault(kResetDropRandomUnsyncedData,
|
|
|
|
num_pre_sync, num_post_sync, &rnd);
|
|
|
|
NoWriteTestPreFault();
|
|
|
|
NoWriteTestReopenWithFault(kResetDropUnsyncedData);
|
|
|
|
|
|
|
|
// Setting a separate data path won't pass the test as we don't sync
|
|
|
|
// it after creating new files,
|
|
|
|
PartialCompactTestPreFault(num_pre_sync, num_post_sync);
|
|
|
|
PartialCompactTestReopenWithFault(kResetDropAndDeleteUnsynced,
|
|
|
|
num_pre_sync, num_post_sync);
|
|
|
|
NoWriteTestPreFault();
|
|
|
|
NoWriteTestReopenWithFault(kResetDropAndDeleteUnsynced);
|
|
|
|
|
|
|
|
PartialCompactTestPreFault(num_pre_sync, num_post_sync);
|
|
|
|
// No new files created so we expect all values since no files will be
|
|
|
|
// dropped.
|
|
|
|
PartialCompactTestReopenWithFault(kResetDeleteUnsyncedFiles, num_pre_sync,
|
|
|
|
num_post_sync);
|
|
|
|
NoWriteTestPreFault();
|
|
|
|
NoWriteTestReopenWithFault(kResetDeleteUnsyncedFiles);
|
|
|
|
}
|
|
|
|
} while (ChangeOptions());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Previous log file is not fsynced if sync is forced after log rolling.
|
[wal changes 2/3] write with sync=true syncs previous unsynced wals to prevent illegal data loss
Summary:
I'll just copy internal task summary here:
"
This sequence will cause data loss in the middle after an sync write:
non-sync write key 1
flush triggered, not yet scheduled
sync write key 2
system crash
After rebooting, users might see key 2 but not key 1, which violates the API of sync write.
This can be reproduced using unit test FaultInjectionTest::DISABLED_WriteOptionSyncTest.
One way to fix it is for a sync write, if there is outstanding unsynced log files, we need to syc them too.
"
This diff should be considered together with the next diff D40905; in isolation this fix probably could be a little simpler.
Test Plan: `make check`; added a test for that (DBTest.SyncingPreviousLogs) before noticing FaultInjectionTest.WriteOptionSyncTest (keeping both since mine asserts a bit more); both tests fail without this diff; for D40905 stacked on top of this diff, ran tests with ASAN, TSAN and valgrind
Reviewers: rven, yhchiang, IslamAbdelRahman, anthony, kradhakrishnan, igor, sdong
Reviewed By: sdong
Subscribers: dhruba
Differential Revision: https://reviews.facebook.net/D40899
9 years ago
|
|
|
TEST_P(FaultInjectionTest, WriteOptionSyncTest) {
|
|
|
|
test::SleepingBackgroundTask sleeping_task_low;
|
|
|
|
env_->SetBackgroundThreads(1, Env::HIGH);
|
|
|
|
// Block the job queue to prevent flush job from running.
|
|
|
|
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
|
|
|
|
Env::Priority::HIGH);
|
|
|
|
sleeping_task_low.WaitUntilSleeping();
|
|
|
|
|
|
|
|
WriteOptions write_options;
|
|
|
|
write_options.sync = false;
|
|
|
|
|
|
|
|
std::string key_space, value_space;
|
|
|
|
ASSERT_OK(
|
|
|
|
db_->Put(write_options, Key(1, &key_space), Value(1, &value_space)));
|
|
|
|
FlushOptions flush_options;
|
|
|
|
flush_options.wait = false;
|
|
|
|
ASSERT_OK(db_->Flush(flush_options));
|
|
|
|
write_options.sync = true;
|
|
|
|
ASSERT_OK(
|
|
|
|
db_->Put(write_options, Key(2, &key_space), Value(2, &value_space)));
|
|
|
|
db_->FlushWAL(false);
|
|
|
|
|
|
|
|
env_->SetFilesystemActive(false);
|
|
|
|
NoWriteTestReopenWithFault(kResetDropAndDeleteUnsynced);
|
|
|
|
sleeping_task_low.WakeUp();
|
|
|
|
sleeping_task_low.WaitUntilDone();
|
|
|
|
|
|
|
|
ASSERT_OK(OpenDB());
|
|
|
|
std::string val;
|
|
|
|
Value(2, &value_space);
|
|
|
|
ASSERT_OK(ReadValue(2, &val));
|
|
|
|
ASSERT_EQ(value_space, val);
|
|
|
|
|
|
|
|
Value(1, &value_space);
|
|
|
|
ASSERT_OK(ReadValue(1, &val));
|
|
|
|
ASSERT_EQ(value_space, val);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_P(FaultInjectionTest, UninstalledCompaction) {
|
|
|
|
options_.target_file_size_base = 32 * 1024;
|
|
|
|
options_.write_buffer_size = 100 << 10; // 100KB
|
|
|
|
options_.level0_file_num_compaction_trigger = 6;
|
|
|
|
options_.level0_stop_writes_trigger = 1 << 10;
|
|
|
|
options_.level0_slowdown_writes_trigger = 1 << 10;
|
|
|
|
options_.max_background_compactions = 1;
|
|
|
|
OpenDB();
|
|
|
|
|
|
|
|
if (!sequential_order_) {
|
|
|
|
rocksdb::SyncPoint::GetInstance()->LoadDependency({
|
|
|
|
{"FaultInjectionTest::FaultTest:0", "DBImpl::BGWorkCompaction"},
|
|
|
|
{"CompactionJob::Run():End", "FaultInjectionTest::FaultTest:1"},
|
|
|
|
{"FaultInjectionTest::FaultTest:2",
|
|
|
|
"DBImpl::BackgroundCompaction:NonTrivial:AfterRun"},
|
|
|
|
});
|
|
|
|
}
|
|
|
|
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
|
|
|
|
|
|
|
|
int kNumKeys = 1000;
|
|
|
|
Build(WriteOptions(), 0, kNumKeys);
|
|
|
|
FlushOptions flush_options;
|
|
|
|
flush_options.wait = true;
|
|
|
|
db_->Flush(flush_options);
|
|
|
|
ASSERT_OK(db_->Put(WriteOptions(), "", ""));
|
|
|
|
TEST_SYNC_POINT("FaultInjectionTest::FaultTest:0");
|
|
|
|
TEST_SYNC_POINT("FaultInjectionTest::FaultTest:1");
|
|
|
|
env_->SetFilesystemActive(false);
|
|
|
|
TEST_SYNC_POINT("FaultInjectionTest::FaultTest:2");
|
|
|
|
CloseDB();
|
|
|
|
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
ResetDBState(kResetDropUnsyncedData);
|
|
|
|
|
|
|
|
std::atomic<bool> opened(false);
|
|
|
|
rocksdb::SyncPoint::GetInstance()->SetCallBack(
|
|
|
|
"DBImpl::Open:Opened", [&](void* arg) { opened.store(true); });
|
|
|
|
rocksdb::SyncPoint::GetInstance()->SetCallBack(
|
|
|
|
"DBImpl::BGWorkCompaction",
|
|
|
|
[&](void* arg) { ASSERT_TRUE(opened.load()); });
|
|
|
|
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
|
|
|
|
ASSERT_OK(OpenDB());
|
|
|
|
ASSERT_OK(Verify(0, kNumKeys, FaultInjectionTest::kValExpectFound));
|
|
|
|
WaitCompactionFinish();
|
|
|
|
ASSERT_OK(Verify(0, kNumKeys, FaultInjectionTest::kValExpectFound));
|
|
|
|
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
rocksdb::SyncPoint::GetInstance()->ClearAllCallBacks();
|
|
|
|
}
|
|
|
|
|
[wal changes 3/3] method in DB to sync WAL without blocking writers
Summary:
Subj. We really need this feature.
Previous diff D40899 has most of the changes to make this possible, this diff just adds the method.
Test Plan: `make check`, the new test fails without this diff; ran with ASAN, TSAN and valgrind.
Reviewers: igor, rven, IslamAbdelRahman, anthony, kradhakrishnan, tnovak, yhchiang, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, maykov, hermanlee4, yoshinorim, tnovak, dhruba
Differential Revision: https://reviews.facebook.net/D40905
9 years ago
|
|
|
TEST_P(FaultInjectionTest, ManualLogSyncTest) {
|
|
|
|
test::SleepingBackgroundTask sleeping_task_low;
|
[wal changes 3/3] method in DB to sync WAL without blocking writers
Summary:
Subj. We really need this feature.
Previous diff D40899 has most of the changes to make this possible, this diff just adds the method.
Test Plan: `make check`, the new test fails without this diff; ran with ASAN, TSAN and valgrind.
Reviewers: igor, rven, IslamAbdelRahman, anthony, kradhakrishnan, tnovak, yhchiang, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, maykov, hermanlee4, yoshinorim, tnovak, dhruba
Differential Revision: https://reviews.facebook.net/D40905
9 years ago
|
|
|
env_->SetBackgroundThreads(1, Env::HIGH);
|
|
|
|
// Block the job queue to prevent flush job from running.
|
|
|
|
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
|
[wal changes 3/3] method in DB to sync WAL without blocking writers
Summary:
Subj. We really need this feature.
Previous diff D40899 has most of the changes to make this possible, this diff just adds the method.
Test Plan: `make check`, the new test fails without this diff; ran with ASAN, TSAN and valgrind.
Reviewers: igor, rven, IslamAbdelRahman, anthony, kradhakrishnan, tnovak, yhchiang, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, maykov, hermanlee4, yoshinorim, tnovak, dhruba
Differential Revision: https://reviews.facebook.net/D40905
9 years ago
|
|
|
Env::Priority::HIGH);
|
|
|
|
sleeping_task_low.WaitUntilSleeping();
|
[wal changes 3/3] method in DB to sync WAL without blocking writers
Summary:
Subj. We really need this feature.
Previous diff D40899 has most of the changes to make this possible, this diff just adds the method.
Test Plan: `make check`, the new test fails without this diff; ran with ASAN, TSAN and valgrind.
Reviewers: igor, rven, IslamAbdelRahman, anthony, kradhakrishnan, tnovak, yhchiang, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, maykov, hermanlee4, yoshinorim, tnovak, dhruba
Differential Revision: https://reviews.facebook.net/D40905
9 years ago
|
|
|
|
|
|
|
WriteOptions write_options;
|
|
|
|
write_options.sync = false;
|
|
|
|
|
|
|
|
std::string key_space, value_space;
|
|
|
|
ASSERT_OK(
|
|
|
|
db_->Put(write_options, Key(1, &key_space), Value(1, &value_space)));
|
|
|
|
FlushOptions flush_options;
|
|
|
|
flush_options.wait = false;
|
|
|
|
ASSERT_OK(db_->Flush(flush_options));
|
|
|
|
ASSERT_OK(
|
|
|
|
db_->Put(write_options, Key(2, &key_space), Value(2, &value_space)));
|
|
|
|
ASSERT_OK(db_->FlushWAL(true));
|
[wal changes 3/3] method in DB to sync WAL without blocking writers
Summary:
Subj. We really need this feature.
Previous diff D40899 has most of the changes to make this possible, this diff just adds the method.
Test Plan: `make check`, the new test fails without this diff; ran with ASAN, TSAN and valgrind.
Reviewers: igor, rven, IslamAbdelRahman, anthony, kradhakrishnan, tnovak, yhchiang, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, maykov, hermanlee4, yoshinorim, tnovak, dhruba
Differential Revision: https://reviews.facebook.net/D40905
9 years ago
|
|
|
|
|
|
|
env_->SetFilesystemActive(false);
|
|
|
|
NoWriteTestReopenWithFault(kResetDropAndDeleteUnsynced);
|
|
|
|
sleeping_task_low.WakeUp();
|
|
|
|
sleeping_task_low.WaitUntilDone();
|
[wal changes 3/3] method in DB to sync WAL without blocking writers
Summary:
Subj. We really need this feature.
Previous diff D40899 has most of the changes to make this possible, this diff just adds the method.
Test Plan: `make check`, the new test fails without this diff; ran with ASAN, TSAN and valgrind.
Reviewers: igor, rven, IslamAbdelRahman, anthony, kradhakrishnan, tnovak, yhchiang, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, maykov, hermanlee4, yoshinorim, tnovak, dhruba
Differential Revision: https://reviews.facebook.net/D40905
9 years ago
|
|
|
|
|
|
|
ASSERT_OK(OpenDB());
|
|
|
|
std::string val;
|
|
|
|
Value(2, &value_space);
|
|
|
|
ASSERT_OK(ReadValue(2, &val));
|
|
|
|
ASSERT_EQ(value_space, val);
|
|
|
|
|
|
|
|
Value(1, &value_space);
|
|
|
|
ASSERT_OK(ReadValue(1, &val));
|
|
|
|
ASSERT_EQ(value_space, val);
|
|
|
|
}
|
|
|
|
|
Add facility to write only a portion of WriteBatch to WAL
Summary:
When constructing a write batch a client may now call MarkWalTerminationPoint() on that batch. No batch operations after this call will be added written to the WAL but will still be inserted into the Memtable. This facility is used to remove one of the three WriteImpl calls in 2PC transactions. This produces a ~1% perf improvement.
```
RocksDB - unoptimized 2pc, sync_binlog=1, disable_2pc=off
INFO 2016-08-31 14:30:38,814 [main]: REQUEST PHASE COMPLETED. 75000000 requests done in 2619 seconds. Requests/second = 28628
RocksDB - optimized 2pc , sync_binlog=1, disable_2pc=off
INFO 2016-08-31 16:26:59,442 [main]: REQUEST PHASE COMPLETED. 75000000 requests done in 2581 seconds. Requests/second = 29054
```
Test Plan: Two unit tests added.
Reviewers: sdong, yiwu, IslamAbdelRahman
Reviewed By: yiwu
Subscribers: hermanlee4, dhruba, andrewkr
Differential Revision: https://reviews.facebook.net/D64599
8 years ago
|
|
|
TEST_P(FaultInjectionTest, WriteBatchWalTerminationTest) {
|
|
|
|
ReadOptions ro;
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.env = env_;
|
|
|
|
|
|
|
|
WriteOptions wo;
|
|
|
|
wo.sync = true;
|
|
|
|
wo.disableWAL = false;
|
|
|
|
WriteBatch batch;
|
|
|
|
batch.Put("cats", "dogs");
|
|
|
|
batch.MarkWalTerminationPoint();
|
|
|
|
batch.Put("boys", "girls");
|
|
|
|
ASSERT_OK(db_->Write(wo, &batch));
|
|
|
|
|
|
|
|
env_->SetFilesystemActive(false);
|
|
|
|
NoWriteTestReopenWithFault(kResetDropAndDeleteUnsynced);
|
|
|
|
ASSERT_OK(OpenDB());
|
|
|
|
|
|
|
|
std::string val;
|
|
|
|
ASSERT_OK(db_->Get(ro, "cats", &val));
|
|
|
|
ASSERT_EQ("dogs", val);
|
|
|
|
ASSERT_EQ(db_->Get(ro, "boys", &val), Status::NotFound());
|
|
|
|
}
|
|
|
|
|
|
|
|
INSTANTIATE_TEST_CASE_P(FaultTest, FaultInjectionTest, ::testing::Bool());
|
|
|
|
|
|
|
|
} // namespace rocksdb
|
|
|
|
|
|
|
|
int main(int argc, char** argv) {
|
|
|
|
::testing::InitGoogleTest(&argc, argv);
|
|
|
|
return RUN_ALL_TESTS();
|
|
|
|
}
|