|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
//
|
|
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
|
|
|
|
#ifdef GFLAGS
|
|
|
|
#include "db_stress_tool/db_stress_common.h"
|
|
|
|
#include "file/file_util.h"
|
|
|
|
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
class CfConsistencyStressTest : public StressTest {
|
|
|
|
public:
|
|
|
|
CfConsistencyStressTest() : batch_id_(0) {}
|
|
|
|
|
|
|
|
~CfConsistencyStressTest() override {}
|
|
|
|
|
|
|
|
bool IsStateTracked() const override { return false; }
|
|
|
|
|
|
|
|
Status TestPut(ThreadState* thread, WriteOptions& write_opts,
|
|
|
|
const ReadOptions& /* read_opts */,
|
|
|
|
const std::vector<int>& rand_column_families,
|
|
|
|
const std::vector<int64_t>& rand_keys,
|
|
|
|
char (&value)[100]) override {
|
Add the PutEntity API to the stress/crash tests (#10760)
Summary:
The patch adds the `PutEntity` API to the non-batched, batched, and
CF consistency stress tests. Namely, when the new `db_stress` command
line parameter `use_put_entity_one_in` is greater than zero, one in
N writes on average is performed using `PutEntity` rather than `Put`.
The wide-column entity written has the generated value in its default
column; in addition, it contains up to three additional columns where
the original generated value is divided up between the column name and the
column value (with the column name containing the first k characters of
the generated value, and the column value containing the rest). Whether
`PutEntity` is used (and if so, how many columns the entity has) is completely
determined by the "value base" used to generate the value (that is, there is
no randomness involved). Assuming the same `use_put_entity_one_in` setting
is used across `db_stress` invocations, this enables us to reconstruct and
validate the entity during subsequent `db_stress` runs.
Note that `PutEntity` is currently incompatible with `Merge`, transactions, and
user-defined timestamps; these combinations are currently disabled/disallowed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10760
Test Plan: Ran some batched, non-batched, and CF consistency stress tests using the script.
Reviewed By: riversand963
Differential Revision: D39939032
Pulled By: ltamasi
fbshipit-source-id: eafdf124e95993fb7d73158e3b006d11819f7fa9
2 years ago
|
|
|
assert(!rand_column_families.empty());
|
|
|
|
assert(!rand_keys.empty());
|
|
|
|
|
|
|
|
const std::string k = Key(rand_keys[0]);
|
|
|
|
|
|
|
|
const uint32_t value_base = batch_id_.fetch_add(1);
|
|
|
|
const size_t sz = GenerateValue(value_base, value, sizeof(value));
|
|
|
|
const Slice v(value, sz);
|
|
|
|
|
|
|
|
WriteBatch batch;
|
Add the PutEntity API to the stress/crash tests (#10760)
Summary:
The patch adds the `PutEntity` API to the non-batched, batched, and
CF consistency stress tests. Namely, when the new `db_stress` command
line parameter `use_put_entity_one_in` is greater than zero, one in
N writes on average is performed using `PutEntity` rather than `Put`.
The wide-column entity written has the generated value in its default
column; in addition, it contains up to three additional columns where
the original generated value is divided up between the column name and the
column value (with the column name containing the first k characters of
the generated value, and the column value containing the rest). Whether
`PutEntity` is used (and if so, how many columns the entity has) is completely
determined by the "value base" used to generate the value (that is, there is
no randomness involved). Assuming the same `use_put_entity_one_in` setting
is used across `db_stress` invocations, this enables us to reconstruct and
validate the entity during subsequent `db_stress` runs.
Note that `PutEntity` is currently incompatible with `Merge`, transactions, and
user-defined timestamps; these combinations are currently disabled/disallowed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10760
Test Plan: Ran some batched, non-batched, and CF consistency stress tests using the script.
Reviewed By: riversand963
Differential Revision: D39939032
Pulled By: ltamasi
fbshipit-source-id: eafdf124e95993fb7d73158e3b006d11819f7fa9
2 years ago
|
|
|
|
|
|
|
const bool use_put_entity = !FLAGS_use_merge &&
|
|
|
|
FLAGS_use_put_entity_one_in > 0 &&
|
|
|
|
(value_base % FLAGS_use_put_entity_one_in) == 0;
|
|
|
|
|
|
|
|
for (auto cf : rand_column_families) {
|
Add the PutEntity API to the stress/crash tests (#10760)
Summary:
The patch adds the `PutEntity` API to the non-batched, batched, and
CF consistency stress tests. Namely, when the new `db_stress` command
line parameter `use_put_entity_one_in` is greater than zero, one in
N writes on average is performed using `PutEntity` rather than `Put`.
The wide-column entity written has the generated value in its default
column; in addition, it contains up to three additional columns where
the original generated value is divided up between the column name and the
column value (with the column name containing the first k characters of
the generated value, and the column value containing the rest). Whether
`PutEntity` is used (and if so, how many columns the entity has) is completely
determined by the "value base" used to generate the value (that is, there is
no randomness involved). Assuming the same `use_put_entity_one_in` setting
is used across `db_stress` invocations, this enables us to reconstruct and
validate the entity during subsequent `db_stress` runs.
Note that `PutEntity` is currently incompatible with `Merge`, transactions, and
user-defined timestamps; these combinations are currently disabled/disallowed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10760
Test Plan: Ran some batched, non-batched, and CF consistency stress tests using the script.
Reviewed By: riversand963
Differential Revision: D39939032
Pulled By: ltamasi
fbshipit-source-id: eafdf124e95993fb7d73158e3b006d11819f7fa9
2 years ago
|
|
|
ColumnFamilyHandle* const cfh = column_families_[cf];
|
|
|
|
assert(cfh);
|
|
|
|
|
|
|
|
if (FLAGS_use_merge) {
|
Add the PutEntity API to the stress/crash tests (#10760)
Summary:
The patch adds the `PutEntity` API to the non-batched, batched, and
CF consistency stress tests. Namely, when the new `db_stress` command
line parameter `use_put_entity_one_in` is greater than zero, one in
N writes on average is performed using `PutEntity` rather than `Put`.
The wide-column entity written has the generated value in its default
column; in addition, it contains up to three additional columns where
the original generated value is divided up between the column name and the
column value (with the column name containing the first k characters of
the generated value, and the column value containing the rest). Whether
`PutEntity` is used (and if so, how many columns the entity has) is completely
determined by the "value base" used to generate the value (that is, there is
no randomness involved). Assuming the same `use_put_entity_one_in` setting
is used across `db_stress` invocations, this enables us to reconstruct and
validate the entity during subsequent `db_stress` runs.
Note that `PutEntity` is currently incompatible with `Merge`, transactions, and
user-defined timestamps; these combinations are currently disabled/disallowed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10760
Test Plan: Ran some batched, non-batched, and CF consistency stress tests using the script.
Reviewed By: riversand963
Differential Revision: D39939032
Pulled By: ltamasi
fbshipit-source-id: eafdf124e95993fb7d73158e3b006d11819f7fa9
2 years ago
|
|
|
batch.Merge(cfh, k, v);
|
|
|
|
} else if (use_put_entity) {
|
|
|
|
batch.PutEntity(cfh, k, GenerateWideColumns(value_base, v));
|
|
|
|
} else {
|
|
|
|
batch.Put(cfh, k, v);
|
|
|
|
}
|
|
|
|
}
|
Add the PutEntity API to the stress/crash tests (#10760)
Summary:
The patch adds the `PutEntity` API to the non-batched, batched, and
CF consistency stress tests. Namely, when the new `db_stress` command
line parameter `use_put_entity_one_in` is greater than zero, one in
N writes on average is performed using `PutEntity` rather than `Put`.
The wide-column entity written has the generated value in its default
column; in addition, it contains up to three additional columns where
the original generated value is divided up between the column name and the
column value (with the column name containing the first k characters of
the generated value, and the column value containing the rest). Whether
`PutEntity` is used (and if so, how many columns the entity has) is completely
determined by the "value base" used to generate the value (that is, there is
no randomness involved). Assuming the same `use_put_entity_one_in` setting
is used across `db_stress` invocations, this enables us to reconstruct and
validate the entity during subsequent `db_stress` runs.
Note that `PutEntity` is currently incompatible with `Merge`, transactions, and
user-defined timestamps; these combinations are currently disabled/disallowed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10760
Test Plan: Ran some batched, non-batched, and CF consistency stress tests using the script.
Reviewed By: riversand963
Differential Revision: D39939032
Pulled By: ltamasi
fbshipit-source-id: eafdf124e95993fb7d73158e3b006d11819f7fa9
2 years ago
|
|
|
|
|
|
|
Status s = db_->Write(write_opts, &batch);
|
Add the PutEntity API to the stress/crash tests (#10760)
Summary:
The patch adds the `PutEntity` API to the non-batched, batched, and
CF consistency stress tests. Namely, when the new `db_stress` command
line parameter `use_put_entity_one_in` is greater than zero, one in
N writes on average is performed using `PutEntity` rather than `Put`.
The wide-column entity written has the generated value in its default
column; in addition, it contains up to three additional columns where
the original generated value is divided up between the column name and the
column value (with the column name containing the first k characters of
the generated value, and the column value containing the rest). Whether
`PutEntity` is used (and if so, how many columns the entity has) is completely
determined by the "value base" used to generate the value (that is, there is
no randomness involved). Assuming the same `use_put_entity_one_in` setting
is used across `db_stress` invocations, this enables us to reconstruct and
validate the entity during subsequent `db_stress` runs.
Note that `PutEntity` is currently incompatible with `Merge`, transactions, and
user-defined timestamps; these combinations are currently disabled/disallowed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10760
Test Plan: Ran some batched, non-batched, and CF consistency stress tests using the script.
Reviewed By: riversand963
Differential Revision: D39939032
Pulled By: ltamasi
fbshipit-source-id: eafdf124e95993fb7d73158e3b006d11819f7fa9
2 years ago
|
|
|
|
|
|
|
if (!s.ok()) {
|
|
|
|
fprintf(stderr, "multi put or merge error: %s\n", s.ToString().c_str());
|
|
|
|
thread->stats.AddErrors(1);
|
|
|
|
} else {
|
|
|
|
auto num = static_cast<long>(rand_column_families.size());
|
|
|
|
thread->stats.AddBytesForWrites(num, (sz + 1) * num);
|
|
|
|
}
|
|
|
|
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
Status TestDelete(ThreadState* thread, WriteOptions& write_opts,
|
|
|
|
const std::vector<int>& rand_column_families,
|
|
|
|
const std::vector<int64_t>& rand_keys) override {
|
|
|
|
std::string key_str = Key(rand_keys[0]);
|
|
|
|
Slice key = key_str;
|
|
|
|
WriteBatch batch;
|
|
|
|
for (auto cf : rand_column_families) {
|
|
|
|
ColumnFamilyHandle* cfh = column_families_[cf];
|
|
|
|
batch.Delete(cfh, key);
|
|
|
|
}
|
|
|
|
Status s = db_->Write(write_opts, &batch);
|
|
|
|
if (!s.ok()) {
|
|
|
|
fprintf(stderr, "multidel error: %s\n", s.ToString().c_str());
|
|
|
|
thread->stats.AddErrors(1);
|
|
|
|
} else {
|
|
|
|
thread->stats.AddDeletes(static_cast<long>(rand_column_families.size()));
|
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
Status TestDeleteRange(ThreadState* thread, WriteOptions& write_opts,
|
|
|
|
const std::vector<int>& rand_column_families,
|
|
|
|
const std::vector<int64_t>& rand_keys) override {
|
|
|
|
int64_t rand_key = rand_keys[0];
|
|
|
|
auto shared = thread->shared;
|
|
|
|
int64_t max_key = shared->GetMaxKey();
|
|
|
|
if (rand_key > max_key - FLAGS_range_deletion_width) {
|
|
|
|
rand_key =
|
|
|
|
thread->rand.Next() % (max_key - FLAGS_range_deletion_width + 1);
|
|
|
|
}
|
|
|
|
std::string key_str = Key(rand_key);
|
|
|
|
Slice key = key_str;
|
|
|
|
std::string end_key_str = Key(rand_key + FLAGS_range_deletion_width);
|
|
|
|
Slice end_key = end_key_str;
|
|
|
|
WriteBatch batch;
|
|
|
|
for (auto cf : rand_column_families) {
|
|
|
|
ColumnFamilyHandle* cfh = column_families_[rand_column_families[cf]];
|
|
|
|
batch.DeleteRange(cfh, key, end_key);
|
|
|
|
}
|
|
|
|
Status s = db_->Write(write_opts, &batch);
|
|
|
|
if (!s.ok()) {
|
|
|
|
fprintf(stderr, "multi del range error: %s\n", s.ToString().c_str());
|
|
|
|
thread->stats.AddErrors(1);
|
|
|
|
} else {
|
|
|
|
thread->stats.AddRangeDeletions(
|
|
|
|
static_cast<long>(rand_column_families.size()));
|
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
void TestIngestExternalFile(
|
|
|
|
ThreadState* /* thread */,
|
|
|
|
const std::vector<int>& /* rand_column_families */,
|
|
|
|
const std::vector<int64_t>& /* rand_keys */) override {
|
|
|
|
assert(false);
|
|
|
|
fprintf(stderr,
|
|
|
|
"CfConsistencyStressTest does not support TestIngestExternalFile "
|
|
|
|
"because it's not possible to verify the result\n");
|
|
|
|
std::terminate();
|
|
|
|
}
|
|
|
|
|
|
|
|
Status TestGet(ThreadState* thread, const ReadOptions& readoptions,
|
|
|
|
const std::vector<int>& rand_column_families,
|
|
|
|
const std::vector<int64_t>& rand_keys) override {
|
|
|
|
std::string key_str = Key(rand_keys[0]);
|
|
|
|
Slice key = key_str;
|
|
|
|
Status s;
|
|
|
|
bool is_consistent = true;
|
|
|
|
|
|
|
|
if (thread->rand.OneIn(2)) {
|
|
|
|
// 1/2 chance, does a random read from random CF
|
|
|
|
auto cfh =
|
|
|
|
column_families_[rand_column_families[thread->rand.Next() %
|
|
|
|
rand_column_families.size()]];
|
|
|
|
std::string from_db;
|
|
|
|
s = db_->Get(readoptions, cfh, key, &from_db);
|
|
|
|
} else {
|
|
|
|
// 1/2 chance, comparing one key is the same across all CFs
|
|
|
|
const Snapshot* snapshot = db_->GetSnapshot();
|
|
|
|
ReadOptions readoptionscopy = readoptions;
|
|
|
|
readoptionscopy.snapshot = snapshot;
|
|
|
|
|
|
|
|
std::string value0;
|
|
|
|
s = db_->Get(readoptionscopy, column_families_[rand_column_families[0]],
|
|
|
|
key, &value0);
|
|
|
|
if (s.ok() || s.IsNotFound()) {
|
|
|
|
bool found = s.ok();
|
|
|
|
for (size_t i = 1; i < rand_column_families.size(); i++) {
|
|
|
|
std::string value1;
|
|
|
|
s = db_->Get(readoptionscopy,
|
|
|
|
column_families_[rand_column_families[i]], key, &value1);
|
|
|
|
if (!s.ok() && !s.IsNotFound()) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (!found && s.ok()) {
|
|
|
|
fprintf(stderr, "Get() return different results with key %s\n",
|
|
|
|
Slice(key_str).ToString(true).c_str());
|
|
|
|
fprintf(stderr, "CF %s is not found\n",
|
|
|
|
column_family_names_[0].c_str());
|
|
|
|
fprintf(stderr, "CF %s returns value %s\n",
|
|
|
|
column_family_names_[i].c_str(),
|
|
|
|
Slice(value1).ToString(true).c_str());
|
|
|
|
is_consistent = false;
|
|
|
|
} else if (found && s.IsNotFound()) {
|
|
|
|
fprintf(stderr, "Get() return different results with key %s\n",
|
|
|
|
Slice(key_str).ToString(true).c_str());
|
|
|
|
fprintf(stderr, "CF %s returns value %s\n",
|
|
|
|
column_family_names_[0].c_str(),
|
|
|
|
Slice(value0).ToString(true).c_str());
|
|
|
|
fprintf(stderr, "CF %s is not found\n",
|
|
|
|
column_family_names_[i].c_str());
|
|
|
|
is_consistent = false;
|
|
|
|
} else if (s.ok() && value0 != value1) {
|
|
|
|
fprintf(stderr, "Get() return different results with key %s\n",
|
|
|
|
Slice(key_str).ToString(true).c_str());
|
|
|
|
fprintf(stderr, "CF %s returns value %s\n",
|
|
|
|
column_family_names_[0].c_str(),
|
|
|
|
Slice(value0).ToString(true).c_str());
|
|
|
|
fprintf(stderr, "CF %s returns value %s\n",
|
|
|
|
column_family_names_[i].c_str(),
|
|
|
|
Slice(value1).ToString(true).c_str());
|
|
|
|
is_consistent = false;
|
|
|
|
}
|
|
|
|
if (!is_consistent) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
db_->ReleaseSnapshot(snapshot);
|
|
|
|
}
|
|
|
|
if (!is_consistent) {
|
|
|
|
fprintf(stderr, "TestGet error: is_consistent is false\n");
|
|
|
|
thread->stats.AddErrors(1);
|
|
|
|
// Fail fast to preserve the DB state.
|
|
|
|
thread->shared->SetVerificationFailure();
|
|
|
|
} else if (s.ok()) {
|
|
|
|
thread->stats.AddGets(1, 1);
|
|
|
|
} else if (s.IsNotFound()) {
|
|
|
|
thread->stats.AddGets(1, 0);
|
|
|
|
} else {
|
|
|
|
fprintf(stderr, "TestGet error: %s\n", s.ToString().c_str());
|
|
|
|
thread->stats.AddErrors(1);
|
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<Status> TestMultiGet(
|
|
|
|
ThreadState* thread, const ReadOptions& read_opts,
|
|
|
|
const std::vector<int>& rand_column_families,
|
|
|
|
const std::vector<int64_t>& rand_keys) override {
|
|
|
|
size_t num_keys = rand_keys.size();
|
|
|
|
std::vector<std::string> key_str;
|
|
|
|
std::vector<Slice> keys;
|
|
|
|
keys.reserve(num_keys);
|
|
|
|
key_str.reserve(num_keys);
|
|
|
|
std::vector<PinnableSlice> values(num_keys);
|
|
|
|
std::vector<Status> statuses(num_keys);
|
|
|
|
ColumnFamilyHandle* cfh = column_families_[rand_column_families[0]];
|
|
|
|
ReadOptions readoptionscopy = read_opts;
|
|
|
|
readoptionscopy.rate_limiter_priority =
|
|
|
|
FLAGS_rate_limit_user_ops ? Env::IO_USER : Env::IO_TOTAL;
|
|
|
|
|
|
|
|
for (size_t i = 0; i < num_keys; ++i) {
|
|
|
|
key_str.emplace_back(Key(rand_keys[i]));
|
|
|
|
keys.emplace_back(key_str.back());
|
|
|
|
}
|
|
|
|
db_->MultiGet(readoptionscopy, cfh, num_keys, keys.data(), values.data(),
|
|
|
|
statuses.data());
|
|
|
|
for (auto s : statuses) {
|
|
|
|
if (s.ok()) {
|
|
|
|
// found case
|
|
|
|
thread->stats.AddGets(1, 1);
|
|
|
|
} else if (s.IsNotFound()) {
|
|
|
|
// not found case
|
|
|
|
thread->stats.AddGets(1, 0);
|
|
|
|
} else {
|
|
|
|
// errors case
|
|
|
|
fprintf(stderr, "MultiGet error: %s\n", s.ToString().c_str());
|
|
|
|
thread->stats.AddErrors(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return statuses;
|
|
|
|
}
|
|
|
|
|
|
|
|
void TestGetEntity(ThreadState* thread, const ReadOptions& read_opts,
|
|
|
|
const std::vector<int>& rand_column_families,
|
|
|
|
const std::vector<int64_t>& rand_keys) override {
|
|
|
|
assert(thread);
|
|
|
|
assert(!rand_column_families.empty());
|
|
|
|
assert(!rand_keys.empty());
|
|
|
|
|
|
|
|
const std::string key = Key(rand_keys[0]);
|
|
|
|
|
|
|
|
Status s;
|
|
|
|
bool is_consistent = true;
|
|
|
|
|
|
|
|
if (thread->rand.OneIn(2)) {
|
|
|
|
// With a 1/2 chance, do a random read from a random CF
|
|
|
|
const size_t cf_id = thread->rand.Next() % rand_column_families.size();
|
|
|
|
|
|
|
|
assert(rand_column_families[cf_id] >= 0);
|
|
|
|
assert(rand_column_families[cf_id] <
|
|
|
|
static_cast<int>(column_families_.size()));
|
|
|
|
|
|
|
|
ColumnFamilyHandle* const cfh =
|
|
|
|
column_families_[rand_column_families[cf_id]];
|
|
|
|
assert(cfh);
|
|
|
|
|
|
|
|
PinnableWideColumns result;
|
|
|
|
s = db_->GetEntity(read_opts, cfh, key, &result);
|
|
|
|
|
|
|
|
if (s.ok()) {
|
|
|
|
if (!VerifyWideColumns(result.columns())) {
|
|
|
|
fprintf(
|
|
|
|
stderr,
|
|
|
|
"GetEntity error: inconsistent columns for key %s, entity %s\n",
|
|
|
|
StringToHex(key).c_str(),
|
|
|
|
WideColumnsToHex(result.columns()).c_str());
|
|
|
|
is_consistent = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// With a 1/2 chance, compare one key across all CFs
|
|
|
|
ManagedSnapshot snapshot_guard(db_);
|
|
|
|
|
|
|
|
ReadOptions read_opts_copy = read_opts;
|
|
|
|
read_opts_copy.snapshot = snapshot_guard.snapshot();
|
|
|
|
|
|
|
|
assert(rand_column_families[0] >= 0);
|
|
|
|
assert(rand_column_families[0] <
|
|
|
|
static_cast<int>(column_families_.size()));
|
|
|
|
|
|
|
|
PinnableWideColumns cmp_result;
|
|
|
|
s = db_->GetEntity(read_opts_copy,
|
|
|
|
column_families_[rand_column_families[0]], key,
|
|
|
|
&cmp_result);
|
|
|
|
|
|
|
|
if (s.ok() || s.IsNotFound()) {
|
|
|
|
const bool cmp_found = s.ok();
|
|
|
|
|
|
|
|
if (cmp_found) {
|
|
|
|
if (!VerifyWideColumns(cmp_result.columns())) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"GetEntity error: inconsistent columns for key %s, "
|
|
|
|
"entity %s\n",
|
|
|
|
StringToHex(key).c_str(),
|
|
|
|
WideColumnsToHex(cmp_result.columns()).c_str());
|
|
|
|
is_consistent = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (is_consistent) {
|
|
|
|
for (size_t i = 1; i < rand_column_families.size(); ++i) {
|
|
|
|
assert(rand_column_families[i] >= 0);
|
|
|
|
assert(rand_column_families[i] <
|
|
|
|
static_cast<int>(column_families_.size()));
|
|
|
|
|
|
|
|
PinnableWideColumns result;
|
|
|
|
s = db_->GetEntity(read_opts_copy,
|
|
|
|
column_families_[rand_column_families[i]], key,
|
|
|
|
&result);
|
|
|
|
|
|
|
|
if (!s.ok() && !s.IsNotFound()) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
const bool found = s.ok();
|
|
|
|
|
|
|
|
assert(!column_family_names_.empty());
|
|
|
|
assert(i < column_family_names_.size());
|
|
|
|
|
|
|
|
if (!cmp_found && found) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"GetEntity returns different results for key %s: CF %s "
|
|
|
|
"returns not found, CF %s returns entity %s\n",
|
|
|
|
StringToHex(key).c_str(), column_family_names_[0].c_str(),
|
|
|
|
column_family_names_[i].c_str(),
|
|
|
|
WideColumnsToHex(result.columns()).c_str());
|
|
|
|
is_consistent = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (cmp_found && !found) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"GetEntity returns different results for key %s: CF %s "
|
|
|
|
"returns entity %s, CF %s returns not found\n",
|
|
|
|
StringToHex(key).c_str(), column_family_names_[0].c_str(),
|
|
|
|
WideColumnsToHex(cmp_result.columns()).c_str(),
|
|
|
|
column_family_names_[i].c_str());
|
|
|
|
is_consistent = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (found && result != cmp_result) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"GetEntity returns different results for key %s: CF %s "
|
|
|
|
"returns entity %s, CF %s returns entity %s\n",
|
|
|
|
StringToHex(key).c_str(), column_family_names_[0].c_str(),
|
|
|
|
WideColumnsToHex(cmp_result.columns()).c_str(),
|
|
|
|
column_family_names_[i].c_str(),
|
|
|
|
WideColumnsToHex(result.columns()).c_str());
|
|
|
|
is_consistent = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!is_consistent) {
|
|
|
|
fprintf(stderr, "TestGetEntity error: results are not consistent\n");
|
|
|
|
thread->stats.AddErrors(1);
|
|
|
|
// Fail fast to preserve the DB state.
|
|
|
|
thread->shared->SetVerificationFailure();
|
|
|
|
} else if (s.ok()) {
|
|
|
|
thread->stats.AddGets(1, 1);
|
|
|
|
} else if (s.IsNotFound()) {
|
|
|
|
thread->stats.AddGets(1, 0);
|
|
|
|
} else {
|
|
|
|
fprintf(stderr, "TestGetEntity error: %s\n", s.ToString().c_str());
|
|
|
|
thread->stats.AddErrors(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Status TestPrefixScan(ThreadState* thread, const ReadOptions& readoptions,
|
|
|
|
const std::vector<int>& rand_column_families,
|
|
|
|
const std::vector<int64_t>& rand_keys) override {
|
|
|
|
assert(!rand_column_families.empty());
|
|
|
|
assert(!rand_keys.empty());
|
|
|
|
|
|
|
|
const std::string key = Key(rand_keys[0]);
|
|
|
|
|
|
|
|
const size_t prefix_to_use =
|
|
|
|
(FLAGS_prefix_size < 0) ? 7 : static_cast<size_t>(FLAGS_prefix_size);
|
|
|
|
|
|
|
|
const Slice prefix(key.data(), prefix_to_use);
|
|
|
|
|
|
|
|
std::string upper_bound;
|
|
|
|
Slice ub_slice;
|
|
|
|
|
|
|
|
ReadOptions ro_copy = readoptions;
|
|
|
|
|
|
|
|
// Get the next prefix first and then see if we want to set upper bound.
|
|
|
|
// We'll use the next prefix in an assertion later on
|
|
|
|
if (GetNextPrefix(prefix, &upper_bound) && thread->rand.OneIn(2)) {
|
|
|
|
ub_slice = Slice(upper_bound);
|
|
|
|
ro_copy.iterate_upper_bound = &ub_slice;
|
|
|
|
}
|
|
|
|
|
|
|
|
ColumnFamilyHandle* const cfh =
|
|
|
|
column_families_[rand_column_families[thread->rand.Uniform(
|
|
|
|
static_cast<int>(rand_column_families.size()))]];
|
|
|
|
assert(cfh);
|
|
|
|
|
|
|
|
std::unique_ptr<Iterator> iter(db_->NewIterator(ro_copy, cfh));
|
|
|
|
|
|
|
|
uint64_t count = 0;
|
|
|
|
Status s;
|
|
|
|
|
|
|
|
for (iter->Seek(prefix); iter->Valid() && iter->key().starts_with(prefix);
|
|
|
|
iter->Next()) {
|
|
|
|
++count;
|
|
|
|
|
|
|
|
if (!VerifyWideColumns(iter->value(), iter->columns())) {
|
|
|
|
s = Status::Corruption("Value and columns inconsistent",
|
|
|
|
DebugString(iter->value(), iter->columns()));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(prefix_to_use == 0 ||
|
|
|
|
count <= GetPrefixKeyCount(prefix.ToString(), upper_bound));
|
|
|
|
|
|
|
|
if (s.ok()) {
|
|
|
|
s = iter->status();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!s.ok()) {
|
|
|
|
fprintf(stderr, "TestPrefixScan error: %s\n", s.ToString().c_str());
|
|
|
|
thread->stats.AddErrors(1);
|
|
|
|
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
thread->stats.AddPrefixes(1, count);
|
|
|
|
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
|
|
|
|
ColumnFamilyHandle* GetControlCfh(ThreadState* thread,
|
|
|
|
int /*column_family_id*/
|
|
|
|
) override {
|
|
|
|
// All column families should contain the same data. Randomly pick one.
|
|
|
|
return column_families_[thread->rand.Next() % column_families_.size()];
|
|
|
|
}
|
|
|
|
|
|
|
|
void VerifyDb(ThreadState* thread) const override {
|
|
|
|
// This `ReadOptions` is for validation purposes. Ignore
|
|
|
|
// `FLAGS_rate_limit_user_ops` to avoid slowing any validation.
|
|
|
|
ReadOptions options(FLAGS_verify_checksum, true);
|
|
|
|
|
|
|
|
// We must set total_order_seek to true because we are doing a SeekToFirst
|
|
|
|
// on a column family whose memtables may support (by default) prefix-based
|
|
|
|
// iterator. In this case, NewIterator with options.total_order_seek being
|
|
|
|
// false returns a prefix-based iterator. Calling SeekToFirst using this
|
|
|
|
// iterator causes the iterator to become invalid. That means we cannot
|
|
|
|
// iterate the memtable using this iterator any more, although the memtable
|
|
|
|
// contains the most up-to-date key-values.
|
|
|
|
options.total_order_seek = true;
|
|
|
|
|
|
|
|
ManagedSnapshot snapshot_guard(db_);
|
|
|
|
options.snapshot = snapshot_guard.snapshot();
|
|
|
|
|
|
|
|
const size_t num = column_families_.size();
|
|
|
|
|
|
|
|
std::vector<std::unique_ptr<Iterator>> iters;
|
|
|
|
iters.reserve(num);
|
|
|
|
|
|
|
|
for (size_t i = 0; i < num; ++i) {
|
|
|
|
iters.emplace_back(db_->NewIterator(options, column_families_[i]));
|
|
|
|
iters.back()->SeekToFirst();
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<Status> statuses(num, Status::OK());
|
|
|
|
|
|
|
|
assert(thread);
|
|
|
|
|
|
|
|
auto shared = thread->shared;
|
|
|
|
assert(shared);
|
|
|
|
|
|
|
|
do {
|
|
|
|
if (shared->HasVerificationFailedYet()) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t valid_cnt = 0;
|
|
|
|
|
|
|
|
for (size_t i = 0; i < num; ++i) {
|
|
|
|
const auto& iter = iters[i];
|
|
|
|
assert(iter);
|
|
|
|
|
|
|
|
if (iter->Valid()) {
|
|
|
|
if (!VerifyWideColumns(iter->value(), iter->columns())) {
|
|
|
|
statuses[i] =
|
|
|
|
Status::Corruption("Value and columns inconsistent",
|
|
|
|
DebugString(iter->value(), iter->columns()));
|
|
|
|
} else {
|
|
|
|
++valid_cnt;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
statuses[i] = iter->status();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (valid_cnt == 0) {
|
|
|
|
for (size_t i = 0; i < num; ++i) {
|
|
|
|
const auto& s = statuses[i];
|
|
|
|
if (!s.ok()) {
|
|
|
|
fprintf(stderr, "Iterator on cf %s has error: %s\n",
|
|
|
|
column_families_[i]->GetName().c_str(),
|
|
|
|
s.ToString().c_str());
|
|
|
|
shared->SetVerificationFailure();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (valid_cnt < num) {
|
|
|
|
shared->SetVerificationFailure();
|
|
|
|
|
|
|
|
for (size_t i = 0; i < num; ++i) {
|
|
|
|
assert(iters[i]);
|
|
|
|
|
|
|
|
if (!iters[i]->Valid()) {
|
|
|
|
if (statuses[i].ok()) {
|
|
|
|
fprintf(stderr, "Finished scanning cf %s\n",
|
|
|
|
column_families_[i]->GetName().c_str());
|
|
|
|
} else {
|
|
|
|
fprintf(stderr, "Iterator on cf %s has error: %s\n",
|
|
|
|
column_families_[i]->GetName().c_str(),
|
|
|
|
statuses[i].ToString().c_str());
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
fprintf(stderr, "cf %s has remaining data to scan\n",
|
|
|
|
column_families_[i]->GetName().c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (shared->HasVerificationFailedYet()) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If the program reaches here, then all column families' iterators are
|
|
|
|
// still valid.
|
|
|
|
assert(valid_cnt == num);
|
|
|
|
|
|
|
|
if (shared->PrintingVerificationResults()) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(iters[0]);
|
|
|
|
|
|
|
|
const Slice key = iters[0]->key();
|
|
|
|
const Slice value = iters[0]->value();
|
|
|
|
|
|
|
|
int num_mismatched_cfs = 0;
|
|
|
|
|
|
|
|
for (size_t i = 1; i < num; ++i) {
|
|
|
|
assert(iters[i]);
|
|
|
|
|
|
|
|
const int cmp = key.compare(iters[i]->key());
|
|
|
|
|
|
|
|
if (cmp != 0) {
|
|
|
|
++num_mismatched_cfs;
|
|
|
|
|
|
|
|
if (1 == num_mismatched_cfs) {
|
|
|
|
fprintf(stderr, "Verification failed\n");
|
|
|
|
fprintf(stderr, "Latest Sequence Number: %" PRIu64 "\n",
|
|
|
|
db_->GetLatestSequenceNumber());
|
|
|
|
fprintf(stderr, "[%s] %s => %s\n",
|
|
|
|
column_families_[0]->GetName().c_str(),
|
|
|
|
key.ToString(true /* hex */).c_str(),
|
|
|
|
value.ToString(true /* hex */).c_str());
|
|
|
|
}
|
|
|
|
|
|
|
|
fprintf(stderr, "[%s] %s => %s\n",
|
|
|
|
column_families_[i]->GetName().c_str(),
|
|
|
|
iters[i]->key().ToString(true /* hex */).c_str(),
|
|
|
|
iters[i]->value().ToString(true /* hex */).c_str());
|
|
|
|
|
|
|
|
Slice begin_key;
|
|
|
|
Slice end_key;
|
|
|
|
if (cmp < 0) {
|
|
|
|
begin_key = key;
|
|
|
|
end_key = iters[i]->key();
|
|
|
|
} else {
|
|
|
|
begin_key = iters[i]->key();
|
|
|
|
end_key = key;
|
|
|
|
}
|
|
|
|
|
|
|
|
const auto print_key_versions = [&](ColumnFamilyHandle* cfh) {
|
|
|
|
constexpr size_t kMaxNumIKeys = 8;
|
|
|
|
|
|
|
|
std::vector<KeyVersion> versions;
|
|
|
|
const Status s = GetAllKeyVersions(db_, cfh, begin_key, end_key,
|
|
|
|
kMaxNumIKeys, &versions);
|
|
|
|
if (!s.ok()) {
|
|
|
|
fprintf(stderr, "%s\n", s.ToString().c_str());
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(cfh);
|
|
|
|
|
|
|
|
fprintf(stderr,
|
|
|
|
"Internal keys in CF '%s', [%s, %s] (max %" ROCKSDB_PRIszt
|
|
|
|
")\n",
|
|
|
|
cfh->GetName().c_str(),
|
|
|
|
begin_key.ToString(true /* hex */).c_str(),
|
|
|
|
end_key.ToString(true /* hex */).c_str(), kMaxNumIKeys);
|
|
|
|
|
|
|
|
for (const KeyVersion& kv : versions) {
|
|
|
|
fprintf(stderr, " key %s seq %" PRIu64 " type %d\n",
|
|
|
|
Slice(kv.user_key).ToString(true).c_str(), kv.sequence,
|
|
|
|
kv.type);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
if (1 == num_mismatched_cfs) {
|
|
|
|
print_key_versions(column_families_[0]);
|
|
|
|
}
|
|
|
|
|
|
|
|
print_key_versions(column_families_[i]);
|
|
|
|
|
|
|
|
shared->SetVerificationFailure();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
shared->FinishPrintingVerificationResults();
|
|
|
|
|
|
|
|
for (auto& iter : iters) {
|
|
|
|
assert(iter);
|
|
|
|
iter->Next();
|
|
|
|
}
|
|
|
|
} while (true);
|
|
|
|
}
|
|
|
|
|
|
|
|
void ContinuouslyVerifyDb(ThreadState* thread) const override {
|
|
|
|
assert(thread);
|
|
|
|
Status status;
|
|
|
|
|
|
|
|
DB* db_ptr = cmp_db_ ? cmp_db_ : db_;
|
|
|
|
const auto& cfhs = cmp_db_ ? cmp_cfhs_ : column_families_;
|
|
|
|
|
|
|
|
// Take a snapshot to preserve the state of primary db.
|
|
|
|
ManagedSnapshot snapshot_guard(db_);
|
|
|
|
|
|
|
|
SharedState* shared = thread->shared;
|
|
|
|
assert(shared);
|
|
|
|
|
|
|
|
if (cmp_db_) {
|
|
|
|
status = cmp_db_->TryCatchUpWithPrimary();
|
|
|
|
if (!status.ok()) {
|
|
|
|
fprintf(stderr, "TryCatchUpWithPrimary: %s\n",
|
|
|
|
status.ToString().c_str());
|
|
|
|
shared->SetShouldStopTest();
|
|
|
|
assert(false);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const auto checksum_column_family = [](Iterator* iter,
|
|
|
|
uint32_t* checksum) -> Status {
|
|
|
|
assert(nullptr != checksum);
|
|
|
|
|
|
|
|
uint32_t ret = 0;
|
|
|
|
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
|
|
|
ret = crc32c::Extend(ret, iter->key().data(), iter->key().size());
|
|
|
|
ret = crc32c::Extend(ret, iter->value().data(), iter->value().size());
|
|
|
|
|
|
|
|
for (const auto& column : iter->columns()) {
|
|
|
|
ret = crc32c::Extend(ret, column.name().data(), column.name().size());
|
|
|
|
ret =
|
|
|
|
crc32c::Extend(ret, column.value().data(), column.value().size());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
*checksum = ret;
|
|
|
|
return iter->status();
|
|
|
|
};
|
|
|
|
// This `ReadOptions` is for validation purposes. Ignore
|
|
|
|
// `FLAGS_rate_limit_user_ops` to avoid slowing any validation.
|
|
|
|
ReadOptions ropts(FLAGS_verify_checksum, true);
|
|
|
|
ropts.total_order_seek = true;
|
|
|
|
if (nullptr == cmp_db_) {
|
|
|
|
ropts.snapshot = snapshot_guard.snapshot();
|
|
|
|
}
|
|
|
|
uint32_t crc = 0;
|
|
|
|
{
|
|
|
|
// Compute crc for all key-values of default column family.
|
|
|
|
std::unique_ptr<Iterator> it(db_ptr->NewIterator(ropts));
|
|
|
|
status = checksum_column_family(it.get(), &crc);
|
|
|
|
if (!status.ok()) {
|
|
|
|
fprintf(stderr, "Computing checksum of default cf: %s\n",
|
|
|
|
status.ToString().c_str());
|
|
|
|
assert(false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Since we currently intentionally disallow reading from the secondary
|
|
|
|
// instance with snapshot, we cannot achieve cross-cf consistency if WAL is
|
|
|
|
// enabled because there is no guarantee that secondary instance replays
|
|
|
|
// the primary's WAL to a consistent point where all cfs have the same
|
|
|
|
// data.
|
|
|
|
if (status.ok() && FLAGS_disable_wal) {
|
|
|
|
uint32_t tmp_crc = 0;
|
|
|
|
for (ColumnFamilyHandle* cfh : cfhs) {
|
|
|
|
if (cfh == db_ptr->DefaultColumnFamily()) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
std::unique_ptr<Iterator> it(db_ptr->NewIterator(ropts, cfh));
|
|
|
|
status = checksum_column_family(it.get(), &tmp_crc);
|
|
|
|
if (!status.ok() || tmp_crc != crc) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!status.ok()) {
|
|
|
|
fprintf(stderr, "status: %s\n", status.ToString().c_str());
|
|
|
|
shared->SetShouldStopTest();
|
|
|
|
assert(false);
|
|
|
|
} else if (tmp_crc != crc) {
|
|
|
|
fprintf(stderr, "tmp_crc=%" PRIu32 " crc=%" PRIu32 "\n", tmp_crc, crc);
|
|
|
|
shared->SetShouldStopTest();
|
|
|
|
assert(false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<int> GenerateColumnFamilies(
|
|
|
|
const int /* num_column_families */,
|
|
|
|
int /* rand_column_family */) const override {
|
|
|
|
std::vector<int> ret;
|
|
|
|
int num = static_cast<int>(column_families_.size());
|
|
|
|
int k = 0;
|
|
|
|
std::generate_n(back_inserter(ret), num, [&k]() -> int { return k++; });
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
Add the PutEntity API to the stress/crash tests (#10760)
Summary:
The patch adds the `PutEntity` API to the non-batched, batched, and
CF consistency stress tests. Namely, when the new `db_stress` command
line parameter `use_put_entity_one_in` is greater than zero, one in
N writes on average is performed using `PutEntity` rather than `Put`.
The wide-column entity written has the generated value in its default
column; in addition, it contains up to three additional columns where
the original generated value is divided up between the column name and the
column value (with the column name containing the first k characters of
the generated value, and the column value containing the rest). Whether
`PutEntity` is used (and if so, how many columns the entity has) is completely
determined by the "value base" used to generate the value (that is, there is
no randomness involved). Assuming the same `use_put_entity_one_in` setting
is used across `db_stress` invocations, this enables us to reconstruct and
validate the entity during subsequent `db_stress` runs.
Note that `PutEntity` is currently incompatible with `Merge`, transactions, and
user-defined timestamps; these combinations are currently disabled/disallowed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10760
Test Plan: Ran some batched, non-batched, and CF consistency stress tests using the script.
Reviewed By: riversand963
Differential Revision: D39939032
Pulled By: ltamasi
fbshipit-source-id: eafdf124e95993fb7d73158e3b006d11819f7fa9
2 years ago
|
|
|
std::atomic<uint32_t> batch_id_;
|
|
|
|
};
|
|
|
|
|
|
|
|
StressTest* CreateCfConsistencyStressTest() {
|
|
|
|
return new CfConsistencyStressTest();
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|
|
|
|
#endif // GFLAGS
|