|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
//
|
|
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
|
|
|
|
#ifdef GFLAGS
|
|
|
|
#include "db_stress_tool/db_stress_common.h"
|
|
|
|
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
class BatchedOpsStressTest : public StressTest {
|
|
|
|
public:
|
|
|
|
BatchedOpsStressTest() {}
|
|
|
|
virtual ~BatchedOpsStressTest() {}
|
|
|
|
|
|
|
|
bool IsStateTracked() const override { return false; }
|
|
|
|
|
|
|
|
// Given a key K and value V, this puts ("0"+K, V+"0"), ("1"+K, V+"1"), ...,
|
|
|
|
// ("9"+K, V+"9") in DB atomically i.e in a single batch.
|
|
|
|
// Also refer BatchedOpsStressTest::TestGet
|
|
|
|
Status TestPut(ThreadState* thread, WriteOptions& write_opts,
|
|
|
|
const ReadOptions& /* read_opts */,
|
|
|
|
const std::vector<int>& rand_column_families,
|
|
|
|
const std::vector<int64_t>& rand_keys,
|
|
|
|
char (&value)[100]) override {
|
Add the PutEntity API to the stress/crash tests (#10760)
Summary:
The patch adds the `PutEntity` API to the non-batched, batched, and
CF consistency stress tests. Namely, when the new `db_stress` command
line parameter `use_put_entity_one_in` is greater than zero, one in
N writes on average is performed using `PutEntity` rather than `Put`.
The wide-column entity written has the generated value in its default
column; in addition, it contains up to three additional columns where
the original generated value is divided up between the column name and the
column value (with the column name containing the first k characters of
the generated value, and the column value containing the rest). Whether
`PutEntity` is used (and if so, how many columns the entity has) is completely
determined by the "value base" used to generate the value (that is, there is
no randomness involved). Assuming the same `use_put_entity_one_in` setting
is used across `db_stress` invocations, this enables us to reconstruct and
validate the entity during subsequent `db_stress` runs.
Note that `PutEntity` is currently incompatible with `Merge`, transactions, and
user-defined timestamps; these combinations are currently disabled/disallowed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10760
Test Plan: Ran some batched, non-batched, and CF consistency stress tests using the script.
Reviewed By: riversand963
Differential Revision: D39939032
Pulled By: ltamasi
fbshipit-source-id: eafdf124e95993fb7d73158e3b006d11819f7fa9
2 years ago
|
|
|
assert(!rand_column_families.empty());
|
|
|
|
assert(!rand_keys.empty());
|
|
|
|
|
|
|
|
const std::string key_body = Key(rand_keys[0]);
|
Add the PutEntity API to the stress/crash tests (#10760)
Summary:
The patch adds the `PutEntity` API to the non-batched, batched, and
CF consistency stress tests. Namely, when the new `db_stress` command
line parameter `use_put_entity_one_in` is greater than zero, one in
N writes on average is performed using `PutEntity` rather than `Put`.
The wide-column entity written has the generated value in its default
column; in addition, it contains up to three additional columns where
the original generated value is divided up between the column name and the
column value (with the column name containing the first k characters of
the generated value, and the column value containing the rest). Whether
`PutEntity` is used (and if so, how many columns the entity has) is completely
determined by the "value base" used to generate the value (that is, there is
no randomness involved). Assuming the same `use_put_entity_one_in` setting
is used across `db_stress` invocations, this enables us to reconstruct and
validate the entity during subsequent `db_stress` runs.
Note that `PutEntity` is currently incompatible with `Merge`, transactions, and
user-defined timestamps; these combinations are currently disabled/disallowed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10760
Test Plan: Ran some batched, non-batched, and CF consistency stress tests using the script.
Reviewed By: riversand963
Differential Revision: D39939032
Pulled By: ltamasi
fbshipit-source-id: eafdf124e95993fb7d73158e3b006d11819f7fa9
2 years ago
|
|
|
|
Support parallel read and write/delete to same key in NonBatchedOpsStressTest (#11058)
Summary:
**Context:**
Current `NonBatchedOpsStressTest` does not allow multi-thread read (i.e, Get, Iterator) and write (i.e, Put, Merge) or delete to the same key. Every read or write/delete operation will acquire lock (`GetLocksForKeyRange`) on the target key to gain exclusive access to it. This does not align with RocksDB's nature of allowing multi-thread read and write/delete to the same key, that is concurrent threads can issue read/write/delete to RocksDB without external locking. Therefore this is a gap in our testing coverage.
To close the gap, biggest challenge remains in verifying db value against expected state in presence of parallel read and write/delete. The challenge is due to read/write/delete to the db and read/write to expected state is not within one atomic operation. Therefore we may not know the exact expected state of a certain db read, as by the time we read the expected state for that db read, another write to expected state for another db write to the same key might have changed the expected state.
**Summary:**
Credited to ajkr's idea, we now solve this challenge by breaking the 32-bits expected value of a key into different parts that can be read and write to in parallel.
Basically we divide the 32-bits expected value into `value_base` (corresponding to the previous whole 32 bits but now with some shrinking in the value base range we allow), `pending_write` (i.e, whether there is an ongoing concurrent write), `del_counter` (i.e, number of times a value has been deleted, analogous to value_base for write), `pending_delete` (similar to pending_write) and `deleted` (i.e whether a key is deleted).
Also, we need to use incremental `value_base` instead of random value base as before because we want to control the range of value base a correct db read result can possibly be in presence of parallel read and write. In that way, we can verify the correctness of the read against expected state more easily. This is at the cost of reducing the randomness of the value generated in NonBatchedOpsStressTest we are willing to accept.
(For detailed algorithm of how to use these parts to infer expected state of a key, see the PR)
Misc: hide value_base detail from callers of ExpectedState by abstracting related logics into ExpectedValue class
Pull Request resolved: https://github.com/facebook/rocksdb/pull/11058
Test Plan:
- Manual test of small number of keys (i.e, high chances of parallel read and write/delete to same key) with equally distributed read/write/deleted for 30 min
```
python3 tools/db_crashtest.py --simple {blackbox|whitebox} --sync_fault_injection=1 --skip_verifydb=0 --continuous_verification_interval=1000 --clear_column_family_one_in=0 --max_key=10 --column_families=1 --threads=32 --readpercent=25 --writepercent=25 --nooverwritepercent=0 --iterpercent=25 --verify_iterator_with_expected_state_one_in=1 --num_iterations=5 --delpercent=15 --delrangepercent=10 --range_deletion_width=5 --use_merge={0|1} --use_put_entity_one_in=0 --use_txn=0 --verify_before_write=0 --user_timestamp_size=0 --compact_files_one_in=1000 --compact_range_one_in=1000 --flush_one_in=1000 --get_property_one_in=1000 --ingest_external_file_one_in=100 --backup_one_in=100 --checkpoint_one_in=100 --approximate_size_one_in=0 --acquire_snapshot_one_in=100 --use_multiget=0 --prefixpercent=0 --get_live_files_one_in=1000 --manual_wal_flush_one_in=1000 --pause_background_one_in=1000 --target_file_size_base=524288 --write_buffer_size=524288 --verify_checksum_one_in=1000 --verify_db_one_in=1000
```
- Rehearsal stress test for normal parameter and aggressive parameter to see if such change can find what existing stress test can find (i.e, no regression in testing capability)
- [Ongoing]Try to find new bugs with this change that are not found by current NonBatchedOpsStressTest with no parallel read and write/delete to same key
Reviewed By: ajkr
Differential Revision: D42257258
Pulled By: hx235
fbshipit-source-id: e6fdc18f1fad3753e5ac91731483a644d9b5b6eb
2 years ago
|
|
|
const uint32_t value_base = thread->rand.Next();
|
Add the PutEntity API to the stress/crash tests (#10760)
Summary:
The patch adds the `PutEntity` API to the non-batched, batched, and
CF consistency stress tests. Namely, when the new `db_stress` command
line parameter `use_put_entity_one_in` is greater than zero, one in
N writes on average is performed using `PutEntity` rather than `Put`.
The wide-column entity written has the generated value in its default
column; in addition, it contains up to three additional columns where
the original generated value is divided up between the column name and the
column value (with the column name containing the first k characters of
the generated value, and the column value containing the rest). Whether
`PutEntity` is used (and if so, how many columns the entity has) is completely
determined by the "value base" used to generate the value (that is, there is
no randomness involved). Assuming the same `use_put_entity_one_in` setting
is used across `db_stress` invocations, this enables us to reconstruct and
validate the entity during subsequent `db_stress` runs.
Note that `PutEntity` is currently incompatible with `Merge`, transactions, and
user-defined timestamps; these combinations are currently disabled/disallowed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10760
Test Plan: Ran some batched, non-batched, and CF consistency stress tests using the script.
Reviewed By: riversand963
Differential Revision: D39939032
Pulled By: ltamasi
fbshipit-source-id: eafdf124e95993fb7d73158e3b006d11819f7fa9
2 years ago
|
|
|
const size_t sz = GenerateValue(value_base, value, sizeof(value));
|
|
|
|
const std::string value_body = Slice(value, sz).ToString();
|
Add the PutEntity API to the stress/crash tests (#10760)
Summary:
The patch adds the `PutEntity` API to the non-batched, batched, and
CF consistency stress tests. Namely, when the new `db_stress` command
line parameter `use_put_entity_one_in` is greater than zero, one in
N writes on average is performed using `PutEntity` rather than `Put`.
The wide-column entity written has the generated value in its default
column; in addition, it contains up to three additional columns where
the original generated value is divided up between the column name and the
column value (with the column name containing the first k characters of
the generated value, and the column value containing the rest). Whether
`PutEntity` is used (and if so, how many columns the entity has) is completely
determined by the "value base" used to generate the value (that is, there is
no randomness involved). Assuming the same `use_put_entity_one_in` setting
is used across `db_stress` invocations, this enables us to reconstruct and
validate the entity during subsequent `db_stress` runs.
Note that `PutEntity` is currently incompatible with `Merge`, transactions, and
user-defined timestamps; these combinations are currently disabled/disallowed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10760
Test Plan: Ran some batched, non-batched, and CF consistency stress tests using the script.
Reviewed By: riversand963
Differential Revision: D39939032
Pulled By: ltamasi
fbshipit-source-id: eafdf124e95993fb7d73158e3b006d11819f7fa9
2 years ago
|
|
|
|
|
|
|
WriteBatch batch(0 /* reserved_bytes */, 0 /* max_bytes */,
|
Revise APIs related to user-defined timestamp (#8946)
Summary:
ajkr reminded me that we have a rule of not including per-kv related data in `WriteOptions`.
Namely, `WriteOptions` should not include information about "what-to-write", but should just
include information about "how-to-write".
According to this rule, `WriteOptions::timestamp` (experimental) is clearly a violation. Therefore,
this PR removes `WriteOptions::timestamp` for compliance.
After the removal, we need to pass timestamp info via another set of APIs. This PR proposes a set
of overloaded functions `Put(write_opts, key, value, ts)`, `Delete(write_opts, key, ts)`, and
`SingleDelete(write_opts, key, ts)`. Planned to add `Write(write_opts, batch, ts)`, but its complexity
made me reconsider doing it in another PR (maybe).
For better checking and returning error early, we also add a new set of APIs to `WriteBatch` that take
extra `timestamp` information when writing to `WriteBatch`es.
These set of APIs in `WriteBatchWithIndex` are currently not supported, and are on our TODO list.
Removed `WriteBatch::AssignTimestamps()` and renamed `WriteBatch::AssignTimestamp()` to
`WriteBatch::UpdateTimestamps()` since this method require that all keys have space for timestamps
allocated already and multiple timestamps can be updated.
The constructor of `WriteBatch` now takes a fourth argument `default_cf_ts_sz` which is the timestamp
size of the default column family. This will be used to allocate space when calling APIs that do not
specify a column family handle.
Also, updated `DB::Get()`, `DB::MultiGet()`, `DB::NewIterator()`, `DB::NewIterators()` methods, replacing
some assertions about timestamp to returning Status code.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/8946
Test Plan:
make check
./db_bench -benchmarks=fillseq,fillrandom,readrandom,readseq,deleterandom -user_timestamp_size=8
./db_stress --user_timestamp_size=8 -nooverwritepercent=0 -test_secondary=0 -secondary_catch_up_one_in=0 -continuous_verification_interval=0
Make sure there is no perf regression by running the following
```
./db_bench_opt -db=/dev/shm/rocksdb -use_existing_db=0 -level0_stop_writes_trigger=256 -level0_slowdown_writes_trigger=256 -level0_file_num_compaction_trigger=256 -disable_wal=1 -duration=10 -benchmarks=fillrandom
```
Before this PR
```
DB path: [/dev/shm/rocksdb]
fillrandom : 1.831 micros/op 546235 ops/sec; 60.4 MB/s
```
After this PR
```
DB path: [/dev/shm/rocksdb]
fillrandom : 1.820 micros/op 549404 ops/sec; 60.8 MB/s
```
Reviewed By: ltamasi
Differential Revision: D33721359
Pulled By: riversand963
fbshipit-source-id: c131561534272c120ffb80711d42748d21badf09
3 years ago
|
|
|
FLAGS_batch_protection_bytes_per_key,
|
|
|
|
FLAGS_user_timestamp_size);
|
Add the PutEntity API to the stress/crash tests (#10760)
Summary:
The patch adds the `PutEntity` API to the non-batched, batched, and
CF consistency stress tests. Namely, when the new `db_stress` command
line parameter `use_put_entity_one_in` is greater than zero, one in
N writes on average is performed using `PutEntity` rather than `Put`.
The wide-column entity written has the generated value in its default
column; in addition, it contains up to three additional columns where
the original generated value is divided up between the column name and the
column value (with the column name containing the first k characters of
the generated value, and the column value containing the rest). Whether
`PutEntity` is used (and if so, how many columns the entity has) is completely
determined by the "value base" used to generate the value (that is, there is
no randomness involved). Assuming the same `use_put_entity_one_in` setting
is used across `db_stress` invocations, this enables us to reconstruct and
validate the entity during subsequent `db_stress` runs.
Note that `PutEntity` is currently incompatible with `Merge`, transactions, and
user-defined timestamps; these combinations are currently disabled/disallowed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10760
Test Plan: Ran some batched, non-batched, and CF consistency stress tests using the script.
Reviewed By: riversand963
Differential Revision: D39939032
Pulled By: ltamasi
fbshipit-source-id: eafdf124e95993fb7d73158e3b006d11819f7fa9
2 years ago
|
|
|
|
|
|
|
ColumnFamilyHandle* const cfh = column_families_[rand_column_families[0]];
|
|
|
|
assert(cfh);
|
|
|
|
|
|
|
|
for (int i = 9; i >= 0; --i) {
|
|
|
|
const std::string num = std::to_string(i);
|
Add the PutEntity API to the stress/crash tests (#10760)
Summary:
The patch adds the `PutEntity` API to the non-batched, batched, and
CF consistency stress tests. Namely, when the new `db_stress` command
line parameter `use_put_entity_one_in` is greater than zero, one in
N writes on average is performed using `PutEntity` rather than `Put`.
The wide-column entity written has the generated value in its default
column; in addition, it contains up to three additional columns where
the original generated value is divided up between the column name and the
column value (with the column name containing the first k characters of
the generated value, and the column value containing the rest). Whether
`PutEntity` is used (and if so, how many columns the entity has) is completely
determined by the "value base" used to generate the value (that is, there is
no randomness involved). Assuming the same `use_put_entity_one_in` setting
is used across `db_stress` invocations, this enables us to reconstruct and
validate the entity during subsequent `db_stress` runs.
Note that `PutEntity` is currently incompatible with `Merge`, transactions, and
user-defined timestamps; these combinations are currently disabled/disallowed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10760
Test Plan: Ran some batched, non-batched, and CF consistency stress tests using the script.
Reviewed By: riversand963
Differential Revision: D39939032
Pulled By: ltamasi
fbshipit-source-id: eafdf124e95993fb7d73158e3b006d11819f7fa9
2 years ago
|
|
|
|
|
|
|
// Note: the digit in num is prepended to the key; however, it is appended
|
|
|
|
// to the value because we want the "value base" to be encoded uniformly
|
|
|
|
// at the beginning of the value for all types of stress tests (e.g.
|
|
|
|
// batched, non-batched, CF consistency).
|
|
|
|
const std::string k = num + key_body;
|
|
|
|
const std::string v = value_body + num;
|
Add the PutEntity API to the stress/crash tests (#10760)
Summary:
The patch adds the `PutEntity` API to the non-batched, batched, and
CF consistency stress tests. Namely, when the new `db_stress` command
line parameter `use_put_entity_one_in` is greater than zero, one in
N writes on average is performed using `PutEntity` rather than `Put`.
The wide-column entity written has the generated value in its default
column; in addition, it contains up to three additional columns where
the original generated value is divided up between the column name and the
column value (with the column name containing the first k characters of
the generated value, and the column value containing the rest). Whether
`PutEntity` is used (and if so, how many columns the entity has) is completely
determined by the "value base" used to generate the value (that is, there is
no randomness involved). Assuming the same `use_put_entity_one_in` setting
is used across `db_stress` invocations, this enables us to reconstruct and
validate the entity during subsequent `db_stress` runs.
Note that `PutEntity` is currently incompatible with `Merge`, transactions, and
user-defined timestamps; these combinations are currently disabled/disallowed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10760
Test Plan: Ran some batched, non-batched, and CF consistency stress tests using the script.
Reviewed By: riversand963
Differential Revision: D39939032
Pulled By: ltamasi
fbshipit-source-id: eafdf124e95993fb7d73158e3b006d11819f7fa9
2 years ago
|
|
|
|
|
|
|
if (FLAGS_use_merge) {
|
Add the PutEntity API to the stress/crash tests (#10760)
Summary:
The patch adds the `PutEntity` API to the non-batched, batched, and
CF consistency stress tests. Namely, when the new `db_stress` command
line parameter `use_put_entity_one_in` is greater than zero, one in
N writes on average is performed using `PutEntity` rather than `Put`.
The wide-column entity written has the generated value in its default
column; in addition, it contains up to three additional columns where
the original generated value is divided up between the column name and the
column value (with the column name containing the first k characters of
the generated value, and the column value containing the rest). Whether
`PutEntity` is used (and if so, how many columns the entity has) is completely
determined by the "value base" used to generate the value (that is, there is
no randomness involved). Assuming the same `use_put_entity_one_in` setting
is used across `db_stress` invocations, this enables us to reconstruct and
validate the entity during subsequent `db_stress` runs.
Note that `PutEntity` is currently incompatible with `Merge`, transactions, and
user-defined timestamps; these combinations are currently disabled/disallowed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10760
Test Plan: Ran some batched, non-batched, and CF consistency stress tests using the script.
Reviewed By: riversand963
Differential Revision: D39939032
Pulled By: ltamasi
fbshipit-source-id: eafdf124e95993fb7d73158e3b006d11819f7fa9
2 years ago
|
|
|
batch.Merge(cfh, k, v);
|
|
|
|
} else if (FLAGS_use_put_entity_one_in > 0 &&
|
|
|
|
(value_base % FLAGS_use_put_entity_one_in) == 0) {
|
|
|
|
batch.PutEntity(cfh, k, GenerateWideColumns(value_base, v));
|
|
|
|
} else {
|
Add the PutEntity API to the stress/crash tests (#10760)
Summary:
The patch adds the `PutEntity` API to the non-batched, batched, and
CF consistency stress tests. Namely, when the new `db_stress` command
line parameter `use_put_entity_one_in` is greater than zero, one in
N writes on average is performed using `PutEntity` rather than `Put`.
The wide-column entity written has the generated value in its default
column; in addition, it contains up to three additional columns where
the original generated value is divided up between the column name and the
column value (with the column name containing the first k characters of
the generated value, and the column value containing the rest). Whether
`PutEntity` is used (and if so, how many columns the entity has) is completely
determined by the "value base" used to generate the value (that is, there is
no randomness involved). Assuming the same `use_put_entity_one_in` setting
is used across `db_stress` invocations, this enables us to reconstruct and
validate the entity during subsequent `db_stress` runs.
Note that `PutEntity` is currently incompatible with `Merge`, transactions, and
user-defined timestamps; these combinations are currently disabled/disallowed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10760
Test Plan: Ran some batched, non-batched, and CF consistency stress tests using the script.
Reviewed By: riversand963
Differential Revision: D39939032
Pulled By: ltamasi
fbshipit-source-id: eafdf124e95993fb7d73158e3b006d11819f7fa9
2 years ago
|
|
|
batch.Put(cfh, k, v);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Add the PutEntity API to the stress/crash tests (#10760)
Summary:
The patch adds the `PutEntity` API to the non-batched, batched, and
CF consistency stress tests. Namely, when the new `db_stress` command
line parameter `use_put_entity_one_in` is greater than zero, one in
N writes on average is performed using `PutEntity` rather than `Put`.
The wide-column entity written has the generated value in its default
column; in addition, it contains up to three additional columns where
the original generated value is divided up between the column name and the
column value (with the column name containing the first k characters of
the generated value, and the column value containing the rest). Whether
`PutEntity` is used (and if so, how many columns the entity has) is completely
determined by the "value base" used to generate the value (that is, there is
no randomness involved). Assuming the same `use_put_entity_one_in` setting
is used across `db_stress` invocations, this enables us to reconstruct and
validate the entity during subsequent `db_stress` runs.
Note that `PutEntity` is currently incompatible with `Merge`, transactions, and
user-defined timestamps; these combinations are currently disabled/disallowed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10760
Test Plan: Ran some batched, non-batched, and CF consistency stress tests using the script.
Reviewed By: riversand963
Differential Revision: D39939032
Pulled By: ltamasi
fbshipit-source-id: eafdf124e95993fb7d73158e3b006d11819f7fa9
2 years ago
|
|
|
const Status s = db_->Write(write_opts, &batch);
|
|
|
|
|
|
|
|
if (!s.ok()) {
|
|
|
|
fprintf(stderr, "multiput error: %s\n", s.ToString().c_str());
|
|
|
|
thread->stats.AddErrors(1);
|
|
|
|
} else {
|
|
|
|
// we did 10 writes each of size sz + 1
|
|
|
|
thread->stats.AddBytesForWrites(10, (sz + 1) * 10);
|
|
|
|
}
|
|
|
|
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Given a key K, this deletes ("0"+K), ("1"+K), ..., ("9"+K)
|
|
|
|
// in DB atomically i.e in a single batch. Also refer MultiGet.
|
|
|
|
Status TestDelete(ThreadState* thread, WriteOptions& writeoptions,
|
|
|
|
const std::vector<int>& rand_column_families,
|
|
|
|
const std::vector<int64_t>& rand_keys) override {
|
|
|
|
std::string keys[10] = {"9", "7", "5", "3", "1", "8", "6", "4", "2", "0"};
|
|
|
|
|
|
|
|
WriteBatch batch(0 /* reserved_bytes */, 0 /* max_bytes */,
|
Revise APIs related to user-defined timestamp (#8946)
Summary:
ajkr reminded me that we have a rule of not including per-kv related data in `WriteOptions`.
Namely, `WriteOptions` should not include information about "what-to-write", but should just
include information about "how-to-write".
According to this rule, `WriteOptions::timestamp` (experimental) is clearly a violation. Therefore,
this PR removes `WriteOptions::timestamp` for compliance.
After the removal, we need to pass timestamp info via another set of APIs. This PR proposes a set
of overloaded functions `Put(write_opts, key, value, ts)`, `Delete(write_opts, key, ts)`, and
`SingleDelete(write_opts, key, ts)`. Planned to add `Write(write_opts, batch, ts)`, but its complexity
made me reconsider doing it in another PR (maybe).
For better checking and returning error early, we also add a new set of APIs to `WriteBatch` that take
extra `timestamp` information when writing to `WriteBatch`es.
These set of APIs in `WriteBatchWithIndex` are currently not supported, and are on our TODO list.
Removed `WriteBatch::AssignTimestamps()` and renamed `WriteBatch::AssignTimestamp()` to
`WriteBatch::UpdateTimestamps()` since this method require that all keys have space for timestamps
allocated already and multiple timestamps can be updated.
The constructor of `WriteBatch` now takes a fourth argument `default_cf_ts_sz` which is the timestamp
size of the default column family. This will be used to allocate space when calling APIs that do not
specify a column family handle.
Also, updated `DB::Get()`, `DB::MultiGet()`, `DB::NewIterator()`, `DB::NewIterators()` methods, replacing
some assertions about timestamp to returning Status code.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/8946
Test Plan:
make check
./db_bench -benchmarks=fillseq,fillrandom,readrandom,readseq,deleterandom -user_timestamp_size=8
./db_stress --user_timestamp_size=8 -nooverwritepercent=0 -test_secondary=0 -secondary_catch_up_one_in=0 -continuous_verification_interval=0
Make sure there is no perf regression by running the following
```
./db_bench_opt -db=/dev/shm/rocksdb -use_existing_db=0 -level0_stop_writes_trigger=256 -level0_slowdown_writes_trigger=256 -level0_file_num_compaction_trigger=256 -disable_wal=1 -duration=10 -benchmarks=fillrandom
```
Before this PR
```
DB path: [/dev/shm/rocksdb]
fillrandom : 1.831 micros/op 546235 ops/sec; 60.4 MB/s
```
After this PR
```
DB path: [/dev/shm/rocksdb]
fillrandom : 1.820 micros/op 549404 ops/sec; 60.8 MB/s
```
Reviewed By: ltamasi
Differential Revision: D33721359
Pulled By: riversand963
fbshipit-source-id: c131561534272c120ffb80711d42748d21badf09
3 years ago
|
|
|
FLAGS_batch_protection_bytes_per_key,
|
|
|
|
FLAGS_user_timestamp_size);
|
|
|
|
Status s;
|
|
|
|
auto cfh = column_families_[rand_column_families[0]];
|
|
|
|
std::string key_str = Key(rand_keys[0]);
|
|
|
|
for (int i = 0; i < 10; i++) {
|
|
|
|
keys[i] += key_str;
|
|
|
|
batch.Delete(cfh, keys[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
s = db_->Write(writeoptions, &batch);
|
|
|
|
if (!s.ok()) {
|
|
|
|
fprintf(stderr, "multidelete error: %s\n", s.ToString().c_str());
|
|
|
|
thread->stats.AddErrors(1);
|
|
|
|
} else {
|
|
|
|
thread->stats.AddDeletes(10);
|
|
|
|
}
|
|
|
|
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
Status TestDeleteRange(ThreadState* /* thread */,
|
|
|
|
WriteOptions& /* write_opts */,
|
|
|
|
const std::vector<int>& /* rand_column_families */,
|
|
|
|
const std::vector<int64_t>& /* rand_keys */) override {
|
|
|
|
assert(false);
|
|
|
|
return Status::NotSupported(
|
|
|
|
"BatchedOpsStressTest does not support "
|
|
|
|
"TestDeleteRange");
|
|
|
|
}
|
|
|
|
|
|
|
|
void TestIngestExternalFile(
|
|
|
|
ThreadState* /* thread */,
|
|
|
|
const std::vector<int>& /* rand_column_families */,
|
|
|
|
const std::vector<int64_t>& /* rand_keys */) override {
|
|
|
|
assert(false);
|
|
|
|
fprintf(stderr,
|
|
|
|
"BatchedOpsStressTest does not support "
|
|
|
|
"TestIngestExternalFile\n");
|
|
|
|
std::terminate();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Given a key K, this gets values for "0"+K, "1"+K, ..., "9"+K
|
|
|
|
// in the same snapshot, and verifies that all the values are of the form
|
|
|
|
// V+"0", V+"1", ..., V+"9".
|
|
|
|
// ASSUMES that BatchedOpsStressTest::TestPut was used to put (K, V) into
|
|
|
|
// the DB.
|
|
|
|
Status TestGet(ThreadState* thread, const ReadOptions& readoptions,
|
|
|
|
const std::vector<int>& rand_column_families,
|
|
|
|
const std::vector<int64_t>& rand_keys) override {
|
|
|
|
std::string keys[10] = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"};
|
|
|
|
Slice key_slices[10];
|
|
|
|
std::string values[10];
|
|
|
|
ReadOptions readoptionscopy = readoptions;
|
|
|
|
readoptionscopy.snapshot = db_->GetSnapshot();
|
|
|
|
std::string key_str = Key(rand_keys[0]);
|
|
|
|
Slice key = key_str;
|
|
|
|
auto cfh = column_families_[rand_column_families[0]];
|
|
|
|
std::string from_db;
|
|
|
|
Status s;
|
|
|
|
for (int i = 0; i < 10; i++) {
|
|
|
|
keys[i] += key.ToString();
|
|
|
|
key_slices[i] = keys[i];
|
|
|
|
s = db_->Get(readoptionscopy, cfh, key_slices[i], &from_db);
|
|
|
|
if (!s.ok() && !s.IsNotFound()) {
|
|
|
|
fprintf(stderr, "get error: %s\n", s.ToString().c_str());
|
|
|
|
values[i] = "";
|
|
|
|
thread->stats.AddErrors(1);
|
|
|
|
// we continue after error rather than exiting so that we can
|
|
|
|
// find more errors if any
|
|
|
|
} else if (s.IsNotFound()) {
|
|
|
|
values[i] = "";
|
|
|
|
thread->stats.AddGets(1, 0);
|
|
|
|
} else {
|
|
|
|
values[i] = from_db;
|
|
|
|
|
|
|
|
assert(!keys[i].empty());
|
|
|
|
assert(!values[i].empty());
|
|
|
|
|
|
|
|
const char expected = keys[i].front();
|
|
|
|
const char actual = values[i].back();
|
|
|
|
|
|
|
|
if (expected != actual) {
|
|
|
|
fprintf(stderr, "get error expected = %c actual = %c\n", expected,
|
|
|
|
actual);
|
|
|
|
}
|
|
|
|
|
|
|
|
values[i].pop_back(); // get rid of the differing character
|
|
|
|
|
|
|
|
thread->stats.AddGets(1, 1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
db_->ReleaseSnapshot(readoptionscopy.snapshot);
|
|
|
|
|
|
|
|
// Now that we retrieved all values, check that they all match
|
|
|
|
for (int i = 1; i < 10; i++) {
|
|
|
|
if (values[i] != values[0]) {
|
|
|
|
fprintf(stderr, "get error: inconsistent values for key %s: %s, %s\n",
|
|
|
|
key.ToString(true).c_str(), StringToHex(values[0]).c_str(),
|
|
|
|
StringToHex(values[i]).c_str());
|
|
|
|
// we continue after error rather than exiting so that we can
|
|
|
|
// find more errors if any
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<Status> TestMultiGet(
|
|
|
|
ThreadState* thread, const ReadOptions& readoptions,
|
|
|
|
const std::vector<int>& rand_column_families,
|
|
|
|
const std::vector<int64_t>& rand_keys) override {
|
|
|
|
size_t num_keys = rand_keys.size();
|
|
|
|
std::vector<Status> ret_status(num_keys);
|
|
|
|
std::array<std::string, 10> keys = {
|
|
|
|
{"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}};
|
|
|
|
size_t num_prefixes = keys.size();
|
|
|
|
for (size_t rand_key = 0; rand_key < num_keys; ++rand_key) {
|
|
|
|
std::vector<Slice> key_slices;
|
|
|
|
std::vector<PinnableSlice> values(num_prefixes);
|
|
|
|
std::vector<Status> statuses(num_prefixes);
|
|
|
|
ReadOptions readoptionscopy = readoptions;
|
|
|
|
readoptionscopy.snapshot = db_->GetSnapshot();
|
|
|
|
readoptionscopy.rate_limiter_priority =
|
|
|
|
FLAGS_rate_limit_user_ops ? Env::IO_USER : Env::IO_TOTAL;
|
|
|
|
std::vector<std::string> key_str;
|
|
|
|
key_str.reserve(num_prefixes);
|
|
|
|
key_slices.reserve(num_prefixes);
|
|
|
|
std::string from_db;
|
|
|
|
ColumnFamilyHandle* cfh = column_families_[rand_column_families[0]];
|
|
|
|
|
|
|
|
for (size_t key = 0; key < num_prefixes; ++key) {
|
|
|
|
key_str.emplace_back(keys[key] + Key(rand_keys[rand_key]));
|
|
|
|
key_slices.emplace_back(key_str.back());
|
|
|
|
}
|
|
|
|
db_->MultiGet(readoptionscopy, cfh, num_prefixes, key_slices.data(),
|
|
|
|
values.data(), statuses.data());
|
|
|
|
for (size_t i = 0; i < num_prefixes; i++) {
|
|
|
|
Status s = statuses[i];
|
|
|
|
if (!s.ok() && !s.IsNotFound()) {
|
|
|
|
fprintf(stderr, "multiget error: %s\n", s.ToString().c_str());
|
|
|
|
thread->stats.AddErrors(1);
|
|
|
|
ret_status[rand_key] = s;
|
|
|
|
// we continue after error rather than exiting so that we can
|
|
|
|
// find more errors if any
|
|
|
|
} else if (s.IsNotFound()) {
|
|
|
|
thread->stats.AddGets(1, 0);
|
|
|
|
ret_status[rand_key] = s;
|
|
|
|
} else {
|
|
|
|
assert(!keys[i].empty());
|
|
|
|
assert(!values[i].empty());
|
|
|
|
|
|
|
|
const char expected = keys[i][0];
|
|
|
|
const char actual = values[i][values[i].size() - 1];
|
|
|
|
|
|
|
|
if (expected != actual) {
|
|
|
|
fprintf(stderr, "multiget error expected = %c actual = %c\n",
|
|
|
|
expected, actual);
|
|
|
|
}
|
|
|
|
|
|
|
|
values[i].remove_suffix(1); // get rid of the differing character
|
|
|
|
|
|
|
|
thread->stats.AddGets(1, 1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
db_->ReleaseSnapshot(readoptionscopy.snapshot);
|
|
|
|
|
|
|
|
// Now that we retrieved all values, check that they all match
|
|
|
|
for (size_t i = 1; i < num_prefixes; i++) {
|
|
|
|
if (values[i] != values[0]) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"multiget error: inconsistent values for key %s: %s, %s\n",
|
|
|
|
StringToHex(key_str[i]).c_str(),
|
|
|
|
StringToHex(values[0].ToString()).c_str(),
|
|
|
|
StringToHex(values[i].ToString()).c_str());
|
|
|
|
// we continue after error rather than exiting so that we can
|
|
|
|
// find more errors if any
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret_status;
|
|
|
|
}
|
|
|
|
|
|
|
|
void TestGetEntity(ThreadState* thread, const ReadOptions& read_opts,
|
|
|
|
const std::vector<int>& rand_column_families,
|
|
|
|
const std::vector<int64_t>& rand_keys) override {
|
|
|
|
assert(thread);
|
|
|
|
|
|
|
|
ManagedSnapshot snapshot_guard(db_);
|
|
|
|
|
|
|
|
ReadOptions read_opts_copy(read_opts);
|
|
|
|
read_opts_copy.snapshot = snapshot_guard.snapshot();
|
|
|
|
|
|
|
|
assert(!rand_keys.empty());
|
|
|
|
|
|
|
|
const std::string key_suffix = Key(rand_keys[0]);
|
|
|
|
|
|
|
|
assert(!rand_column_families.empty());
|
|
|
|
assert(rand_column_families[0] >= 0);
|
|
|
|
assert(rand_column_families[0] < static_cast<int>(column_families_.size()));
|
|
|
|
|
|
|
|
ColumnFamilyHandle* const cfh = column_families_[rand_column_families[0]];
|
|
|
|
assert(cfh);
|
|
|
|
|
|
|
|
constexpr size_t num_keys = 10;
|
|
|
|
|
|
|
|
std::array<PinnableWideColumns, num_keys> results;
|
|
|
|
|
|
|
|
for (size_t i = 0; i < num_keys; ++i) {
|
|
|
|
const std::string key = std::to_string(i) + key_suffix;
|
|
|
|
|
|
|
|
const Status s = db_->GetEntity(read_opts_copy, cfh, key, &results[i]);
|
|
|
|
|
|
|
|
if (!s.ok() && !s.IsNotFound()) {
|
|
|
|
fprintf(stderr, "GetEntity error: %s\n", s.ToString().c_str());
|
|
|
|
thread->stats.AddErrors(1);
|
|
|
|
} else if (s.IsNotFound()) {
|
|
|
|
thread->stats.AddGets(1, 0);
|
|
|
|
} else {
|
|
|
|
thread->stats.AddGets(1, 1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (size_t i = 0; i < num_keys; ++i) {
|
|
|
|
const WideColumns& columns = results[i].columns();
|
|
|
|
|
|
|
|
if (!CompareColumns(results[0].columns(), columns)) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"GetEntity error: inconsistent entities for key %s: %s, %s\n",
|
|
|
|
StringToHex(key_suffix).c_str(),
|
|
|
|
WideColumnsToHex(results[0].columns()).c_str(),
|
|
|
|
WideColumnsToHex(columns).c_str());
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!columns.empty()) {
|
|
|
|
// The last character of each column value should be 'i' as a decimal
|
|
|
|
// digit
|
|
|
|
const char expected = static_cast<char>('0' + i);
|
|
|
|
|
|
|
|
for (const auto& column : columns) {
|
|
|
|
const Slice& value = column.value();
|
|
|
|
|
|
|
|
if (value.empty() || value[value.size() - 1] != expected) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"GetEntity error: incorrect column value for key "
|
|
|
|
"%s, entity %s, column value %s, expected %c\n",
|
|
|
|
StringToHex(key_suffix).c_str(),
|
|
|
|
WideColumnsToHex(columns).c_str(),
|
|
|
|
value.ToString(/* hex */ true).c_str(), expected);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!VerifyWideColumns(columns)) {
|
|
|
|
fprintf(
|
|
|
|
stderr,
|
|
|
|
"GetEntity error: inconsistent columns for key %s, entity %s\n",
|
|
|
|
StringToHex(key_suffix).c_str(),
|
|
|
|
WideColumnsToHex(columns).c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void TestMultiGetEntity(ThreadState* thread, const ReadOptions& read_opts,
|
|
|
|
const std::vector<int>& rand_column_families,
|
|
|
|
const std::vector<int64_t>& rand_keys) override {
|
|
|
|
assert(thread);
|
|
|
|
|
|
|
|
assert(!rand_column_families.empty());
|
|
|
|
assert(rand_column_families[0] >= 0);
|
|
|
|
assert(rand_column_families[0] < static_cast<int>(column_families_.size()));
|
|
|
|
|
|
|
|
ColumnFamilyHandle* const cfh = column_families_[rand_column_families[0]];
|
|
|
|
assert(cfh);
|
|
|
|
|
|
|
|
assert(!rand_keys.empty());
|
|
|
|
|
|
|
|
ManagedSnapshot snapshot_guard(db_);
|
|
|
|
|
|
|
|
ReadOptions read_opts_copy(read_opts);
|
|
|
|
read_opts_copy.snapshot = snapshot_guard.snapshot();
|
|
|
|
|
|
|
|
const size_t num_keys = rand_keys.size();
|
|
|
|
|
|
|
|
for (size_t i = 0; i < num_keys; ++i) {
|
|
|
|
const std::string key_suffix = Key(rand_keys[i]);
|
|
|
|
|
|
|
|
constexpr size_t num_prefixes = 10;
|
|
|
|
|
|
|
|
std::array<std::string, num_prefixes> keys;
|
|
|
|
std::array<Slice, num_prefixes> key_slices;
|
|
|
|
std::array<PinnableWideColumns, num_prefixes> results;
|
|
|
|
std::array<Status, num_prefixes> statuses;
|
|
|
|
|
|
|
|
for (size_t j = 0; j < num_prefixes; ++j) {
|
|
|
|
keys[j] = std::to_string(j) + key_suffix;
|
|
|
|
key_slices[j] = keys[j];
|
|
|
|
}
|
|
|
|
|
|
|
|
db_->MultiGetEntity(read_opts_copy, cfh, num_prefixes, key_slices.data(),
|
|
|
|
results.data(), statuses.data());
|
|
|
|
|
|
|
|
for (size_t j = 0; j < num_prefixes; ++j) {
|
|
|
|
const Status& s = statuses[j];
|
|
|
|
|
|
|
|
if (!s.ok() && !s.IsNotFound()) {
|
|
|
|
fprintf(stderr, "MultiGetEntity error: %s\n", s.ToString().c_str());
|
|
|
|
thread->stats.AddErrors(1);
|
|
|
|
} else if (s.IsNotFound()) {
|
|
|
|
thread->stats.AddGets(1, 0);
|
|
|
|
} else {
|
|
|
|
thread->stats.AddGets(1, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
const WideColumns& cmp_columns = results[0].columns();
|
|
|
|
const WideColumns& columns = results[j].columns();
|
|
|
|
|
|
|
|
if (!CompareColumns(cmp_columns, columns)) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"MultiGetEntity error: inconsistent entities for key %s: %s, "
|
|
|
|
"%s\n",
|
|
|
|
StringToHex(key_suffix).c_str(),
|
|
|
|
WideColumnsToHex(cmp_columns).c_str(),
|
|
|
|
WideColumnsToHex(columns).c_str());
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!columns.empty()) {
|
|
|
|
// The last character of each column value should be 'j' as a decimal
|
|
|
|
// digit
|
|
|
|
const char expected = static_cast<char>('0' + j);
|
|
|
|
|
|
|
|
for (const auto& column : columns) {
|
|
|
|
const Slice& value = column.value();
|
|
|
|
|
|
|
|
if (value.empty() || value[value.size() - 1] != expected) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"MultiGetEntity error: incorrect column value for key "
|
|
|
|
"%s, entity %s, column value %s, expected %c\n",
|
|
|
|
StringToHex(key_suffix).c_str(),
|
|
|
|
WideColumnsToHex(columns).c_str(),
|
|
|
|
value.ToString(/* hex */ true).c_str(), expected);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!VerifyWideColumns(columns)) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"MultiGetEntity error: inconsistent columns for key %s, "
|
|
|
|
"entity %s\n",
|
|
|
|
StringToHex(key_suffix).c_str(),
|
|
|
|
WideColumnsToHex(columns).c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Given a key, this does prefix scans for "0"+P, "1"+P, ..., "9"+P
|
|
|
|
// in the same snapshot where P is the first FLAGS_prefix_size - 1 bytes
|
|
|
|
// of the key. Each of these 10 scans returns a series of values;
|
|
|
|
// each series should be the same length, and it is verified for each
|
|
|
|
// index i that all the i'th values are of the form V+"0", V+"1", ..., V+"9".
|
|
|
|
// ASSUMES that MultiPut was used to put (K, V)
|
|
|
|
Status TestPrefixScan(ThreadState* thread, const ReadOptions& readoptions,
|
|
|
|
const std::vector<int>& rand_column_families,
|
|
|
|
const std::vector<int64_t>& rand_keys) override {
|
|
|
|
assert(!rand_column_families.empty());
|
|
|
|
assert(!rand_keys.empty());
|
|
|
|
|
|
|
|
const std::string key = Key(rand_keys[0]);
|
|
|
|
|
|
|
|
assert(FLAGS_prefix_size > 0);
|
|
|
|
const size_t prefix_to_use = static_cast<size_t>(FLAGS_prefix_size);
|
|
|
|
|
|
|
|
constexpr size_t num_prefixes = 10;
|
|
|
|
|
|
|
|
std::array<std::string, num_prefixes> prefixes;
|
|
|
|
std::array<Slice, num_prefixes> prefix_slices;
|
|
|
|
std::array<ReadOptions, num_prefixes> ro_copies;
|
|
|
|
std::array<std::string, num_prefixes> upper_bounds;
|
|
|
|
std::array<Slice, num_prefixes> ub_slices;
|
|
|
|
std::array<std::unique_ptr<Iterator>, num_prefixes> iters;
|
|
|
|
|
|
|
|
const Snapshot* const snapshot = db_->GetSnapshot();
|
|
|
|
|
|
|
|
ColumnFamilyHandle* const cfh = column_families_[rand_column_families[0]];
|
|
|
|
assert(cfh);
|
|
|
|
|
|
|
|
for (size_t i = 0; i < num_prefixes; ++i) {
|
|
|
|
prefixes[i] = std::to_string(i) + key;
|
|
|
|
prefix_slices[i] = Slice(prefixes[i].data(), prefix_to_use);
|
|
|
|
|
|
|
|
ro_copies[i] = readoptions;
|
|
|
|
ro_copies[i].snapshot = snapshot;
|
|
|
|
if (thread->rand.OneIn(2) &&
|
|
|
|
GetNextPrefix(prefix_slices[i], &(upper_bounds[i]))) {
|
|
|
|
// For half of the time, set the upper bound to the next prefix
|
|
|
|
ub_slices[i] = upper_bounds[i];
|
|
|
|
ro_copies[i].iterate_upper_bound = &(ub_slices[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
iters[i].reset(db_->NewIterator(ro_copies[i], cfh));
|
|
|
|
iters[i]->Seek(prefix_slices[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t count = 0;
|
|
|
|
|
|
|
|
while (iters[0]->Valid() && iters[0]->key().starts_with(prefix_slices[0])) {
|
|
|
|
++count;
|
|
|
|
|
|
|
|
std::array<std::string, num_prefixes> values;
|
|
|
|
|
|
|
|
// get list of all values for this iteration
|
|
|
|
for (size_t i = 0; i < num_prefixes; ++i) {
|
|
|
|
// no iterator should finish before the first one
|
|
|
|
assert(iters[i]->Valid() &&
|
|
|
|
iters[i]->key().starts_with(prefix_slices[i]));
|
|
|
|
values[i] = iters[i]->value().ToString();
|
|
|
|
|
|
|
|
// make sure the last character of the value is the expected digit
|
|
|
|
assert(!prefixes[i].empty());
|
|
|
|
assert(!values[i].empty());
|
|
|
|
|
|
|
|
const char expected = prefixes[i].front();
|
|
|
|
const char actual = values[i].back();
|
|
|
|
|
|
|
|
if (expected != actual) {
|
|
|
|
fprintf(stderr, "prefix scan error expected = %c actual = %c\n",
|
|
|
|
expected, actual);
|
|
|
|
}
|
|
|
|
|
|
|
|
values[i].pop_back(); // get rid of the differing character
|
|
|
|
|
|
|
|
// make sure all values are equivalent
|
|
|
|
if (values[i] != values[0]) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"prefix scan error : %" ROCKSDB_PRIszt
|
|
|
|
", inconsistent values for prefix %s: %s, %s\n",
|
|
|
|
i, prefix_slices[i].ToString(/* hex */ true).c_str(),
|
|
|
|
StringToHex(values[0]).c_str(),
|
|
|
|
StringToHex(values[i]).c_str());
|
|
|
|
// we continue after error rather than exiting so that we can
|
|
|
|
// find more errors if any
|
|
|
|
}
|
|
|
|
|
|
|
|
// make sure value() and columns() are consistent
|
|
|
|
if (!VerifyWideColumns(iters[i]->value(), iters[i]->columns())) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"prefix scan error : %" ROCKSDB_PRIszt
|
|
|
|
", value and columns inconsistent for prefix %s: value: %s, "
|
|
|
|
"columns: %s\n",
|
|
|
|
i, prefix_slices[i].ToString(/* hex */ true).c_str(),
|
|
|
|
iters[i]->value().ToString(/* hex */ true).c_str(),
|
|
|
|
WideColumnsToHex(iters[i]->columns()).c_str());
|
|
|
|
}
|
|
|
|
|
|
|
|
iters[i]->Next();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// cleanup iterators and snapshot
|
|
|
|
for (size_t i = 0; i < num_prefixes; ++i) {
|
|
|
|
// if the first iterator finished, they should have all finished
|
|
|
|
assert(!iters[i]->Valid() ||
|
|
|
|
!iters[i]->key().starts_with(prefix_slices[i]));
|
|
|
|
assert(iters[i]->status().ok());
|
|
|
|
}
|
|
|
|
|
|
|
|
db_->ReleaseSnapshot(snapshot);
|
|
|
|
|
|
|
|
thread->stats.AddPrefixes(1, count);
|
|
|
|
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
|
|
|
|
void VerifyDb(ThreadState* /* thread */) const override {}
|
|
|
|
|
|
|
|
void ContinuouslyVerifyDb(ThreadState* /* thread */) const override {}
|
|
|
|
|
|
|
|
// Compare columns ignoring the last character of column values
|
|
|
|
bool CompareColumns(const WideColumns& lhs, const WideColumns& rhs) {
|
|
|
|
if (lhs.size() != rhs.size()) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (size_t i = 0; i < lhs.size(); ++i) {
|
|
|
|
if (lhs[i].name() != rhs[i].name()) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (lhs[i].value().size() != rhs[i].value().size()) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (lhs[i].value().difference_offset(rhs[i].value()) <
|
|
|
|
lhs[i].value().size() - 1) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
StressTest* CreateBatchedOpsStressTest() { return new BatchedOpsStressTest(); }
|
|
|
|
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|
|
|
|
#endif // GFLAGS
|