Ignore `total_order_seek` in DB::Get (#9427)

Summary:
Apparently setting total_order_seek=true for DB::Get was
intended to allow accurate read semantics if the current prefix
extractor doesn't match what was used to generate SST files on
disk. But since prefix_extractor was made a mutable option in 5.14.0, we
have been able to detect this case and provide the correct semantics
regardless of the total_order_seek option. Since that time, the option
has only made Get() slower in a reasonably common case: prefix_extractor
unchanged and whole_key_filtering=false.

So this change primarily removes unnecessary effect of
total_order_seek on Get. Also cleans up some related comments.

Also adds a -total_order_seek option to db_bench and canonicalizes
handling of ReadOptions in db_bench so that command line options have
the expected association with library features. (There is potential
for change in regression test behavior, but the old behavior is likely
indefensible, or some other inconsistency would need to be fixed.)

TODO in follow-up work: there should be no reason for Get() to depend on
current prefix extractor at all.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9427

Test Plan:
Unit tests updated.

Performance (using db_bench update)

Create DB with `TEST_TMPDIR=/dev/shm/rocksdb ./db_bench -benchmarks=fillrandom -num=10000000 -disable_wal=1 -write_buffer_size=10000000 -bloom_bits=16 -compaction_style=2 -fifo_compaction_max_table_files_size_mb=10000 -fifo_compaction_allow_compaction=0 -prefix_size=12 -whole_key_filtering=0`

Test with and without `-total_order_seek` on `TEST_TMPDIR=/dev/shm/rocksdb ./db_bench -use_existing_db -readonly -benchmarks=readrandom -num=10000000 -duration=40 -disable_wal=1 -bloom_bits=16 -compaction_style=2 -fifo_compaction_max_table_files_size_mb=10000 -fifo_compaction_allow_compaction=0 -prefix_size=12`

Before this change, total_order_seek=false: 25188 ops/sec
Before this change, total_order_seek=true:   1222 ops/sec (~20x slower)

After this change, total_order_seek=false: 24570 ops/sec
After this change, total_order_seek=true:  25012 ops/sec (indistinguishable)

Reviewed By: siying

Differential Revision: D33753458

Pulled By: pdillinger

fbshipit-source-id: bf892f34907a5e407d9c40bd4d42f0adbcbe0014
main
Peter Dillinger 3 years ago committed by Facebook GitHub Bot
parent c7ce03dce1
commit f6d7ec1d02
  1. 1
      HISTORY.md
  2. 36
      db/db_bloom_filter_test.cc
  3. 3
      include/rocksdb/options.h
  4. 47
      table/block_based/block_based_table_reader.cc
  5. 10
      table/block_based/block_based_table_reader.h
  6. 86
      tools/db_bench_tool.cc

@ -23,6 +23,7 @@
### Behavior Changes ### Behavior Changes
* Disallow the combination of DBOptions.use_direct_io_for_flush_and_compaction == true and DBOptions.writable_file_max_buffer_size == 0. This combination can cause WritableFileWriter::Append() to loop forever, and it does not make much sense in direct IO. * Disallow the combination of DBOptions.use_direct_io_for_flush_and_compaction == true and DBOptions.writable_file_max_buffer_size == 0. This combination can cause WritableFileWriter::Append() to loop forever, and it does not make much sense in direct IO.
* `ReadOptions::total_order_seek` no longer affects `DB::Get()`. The original motivation for this interaction has been obsolete since RocksDB has been able to detect whether the current prefix extractor is compatible with that used to generate table files, probably RocksDB 5.14.0.
## 6.29.0 (01/21/2022) ## 6.29.0 (01/21/2022)
Note: The next release will be major release 7.0. See https://github.com/facebook/rocksdb/issues/9390 for more info. Note: The next release will be major release 7.0. See https://github.com/facebook/rocksdb/issues/9390 for more info.

@ -217,11 +217,23 @@ TEST_F(DBBloomFilterTest, GetFilterByPrefixBloomCustomPrefixExtractor) {
(*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful);
ro.total_order_seek = true; ro.total_order_seek = true;
ASSERT_TRUE(db_->Get(ro, "foobarbar", &value).IsNotFound()); // NOTE: total_order_seek no longer affects Get()
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2); ASSERT_EQ("NOT_FOUND", Get("foobarbar"));
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3);
ASSERT_EQ( ASSERT_EQ(
2, 3,
(*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful);
// No bloom on extractor changed
#ifndef ROCKSDB_LITE
ASSERT_OK(db_->SetOptions({{"prefix_extractor", "capped:10"}}));
ASSERT_EQ("NOT_FOUND", Get("foobarbar"));
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3);
ASSERT_EQ(
3,
(*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful);
#endif // ROCKSDB_LITE
get_perf_context()->Reset(); get_perf_context()->Reset();
} }
} }
@ -268,11 +280,23 @@ TEST_F(DBBloomFilterTest, GetFilterByPrefixBloom) {
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2);
ro.total_order_seek = true; ro.total_order_seek = true;
ASSERT_TRUE(db_->Get(ro, "foobarbar", &value).IsNotFound()); // NOTE: total_order_seek no longer affects Get()
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2); ASSERT_EQ("NOT_FOUND", Get("foobarbar"));
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3);
ASSERT_EQ( ASSERT_EQ(
2, 3,
(*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful);
// No bloom on extractor changed
#ifndef ROCKSDB_LITE
ASSERT_OK(db_->SetOptions({{"prefix_extractor", "capped:10"}}));
ASSERT_EQ("NOT_FOUND", Get("foobarbar"));
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3);
ASSERT_EQ(
3,
(*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful);
#endif // ROCKSDB_LITE
get_perf_context()->Reset(); get_perf_context()->Reset();
} }
} }

@ -1499,8 +1499,7 @@ struct ReadOptions {
// used in the table. Some table format (e.g. plain table) may not support // used in the table. Some table format (e.g. plain table) may not support
// this option. // this option.
// If true when calling Get(), we also skip prefix bloom when reading from // If true when calling Get(), we also skip prefix bloom when reading from
// block based table. It provides a way to read existing data after // block based table, which only affects Get() performance.
// changing implementation of prefix extractor.
// Default: false // Default: false
bool total_order_seek; bool total_order_seek;

@ -122,9 +122,8 @@ void ReleaseCachedEntry(void* arg, void* h) {
cache->Release(handle, false /* force_erase */); cache->Release(handle, false /* force_erase */);
} }
// For hash based index, return true if prefix_extractor and // For hash based index, return false if table_properties->prefix_extractor_name
// prefix_extractor_block mismatch, false otherwise. This flag will be used // and prefix_extractor both exist and match, otherwise true.
// as total_order_seek via NewIndexIterator
inline bool PrefixExtractorChangedHelper( inline bool PrefixExtractorChangedHelper(
const TableProperties* table_properties, const TableProperties* table_properties,
const SliceTransform* prefix_extractor) { const SliceTransform* prefix_extractor) {
@ -616,10 +615,6 @@ Status BlockBasedTable::Open(
"version of RocksDB?"); "version of RocksDB?");
} }
// We've successfully read the footer. We are ready to serve requests.
// Better not mutate rep_ after the creation. eg. internal_prefix_transform
// raw pointer will be used to create HashIndexReader, whose reset may
// access a dangling pointer.
BlockCacheLookupContext lookup_context{TableReaderCaller::kPrefetch}; BlockCacheLookupContext lookup_context{TableReaderCaller::kPrefetch};
Rep* rep = new BlockBasedTable::Rep(ioptions, env_options, table_options, Rep* rep = new BlockBasedTable::Rep(ioptions, env_options, table_options,
internal_comparator, skip_filters, internal_comparator, skip_filters,
@ -627,8 +622,14 @@ Status BlockBasedTable::Open(
rep->file = std::move(file); rep->file = std::move(file);
rep->footer = footer; rep->footer = footer;
rep->hash_index_allow_collision = table_options.hash_index_allow_collision; rep->hash_index_allow_collision = table_options.hash_index_allow_collision;
// We've successfully read the footer. We are ready to serve requests.
// Better not mutate rep_ after the creation. eg. internal_prefix_transform
// raw pointer will be used to create HashIndexReader, whose reset may
// access a dangling pointer.
// We need to wrap data with internal_prefix_transform to make sure it can // We need to wrap data with internal_prefix_transform to make sure it can
// handle prefix correctly. // handle prefix correctly.
// FIXME: is changed prefix_extractor handled anywhere for hash index?
if (prefix_extractor != nullptr) { if (prefix_extractor != nullptr) {
rep->internal_prefix_transform.reset( rep->internal_prefix_transform.reset(
new InternalKeySliceTransform(prefix_extractor.get())); new InternalKeySliceTransform(prefix_extractor.get()));
@ -2212,8 +2213,7 @@ FragmentedRangeTombstoneIterator* BlockBasedTable::NewRangeTombstoneIterator(
} }
bool BlockBasedTable::FullFilterKeyMayMatch( bool BlockBasedTable::FullFilterKeyMayMatch(
const ReadOptions& read_options, FilterBlockReader* filter, FilterBlockReader* filter, const Slice& internal_key, const bool no_io,
const Slice& internal_key, const bool no_io,
const SliceTransform* prefix_extractor, GetContext* get_context, const SliceTransform* prefix_extractor, GetContext* get_context,
BlockCacheLookupContext* lookup_context) const { BlockCacheLookupContext* lookup_context) const {
if (filter == nullptr || filter->IsBlockBased()) { if (filter == nullptr || filter->IsBlockBased()) {
@ -2228,13 +2228,14 @@ bool BlockBasedTable::FullFilterKeyMayMatch(
may_match = may_match =
filter->KeyMayMatch(user_key_without_ts, prefix_extractor, kNotValid, filter->KeyMayMatch(user_key_without_ts, prefix_extractor, kNotValid,
no_io, const_ikey_ptr, get_context, lookup_context); no_io, const_ikey_ptr, get_context, lookup_context);
} else if (!read_options.total_order_seek && } else if (!PrefixExtractorChanged(prefix_extractor) &&
!PrefixExtractorChanged(prefix_extractor) &&
prefix_extractor->InDomain(user_key_without_ts) && prefix_extractor->InDomain(user_key_without_ts) &&
!filter->PrefixMayMatch( !filter->PrefixMayMatch(
prefix_extractor->Transform(user_key_without_ts), prefix_extractor->Transform(user_key_without_ts),
prefix_extractor, kNotValid, no_io, const_ikey_ptr, prefix_extractor, kNotValid, no_io, const_ikey_ptr,
get_context, lookup_context)) { get_context, lookup_context)) {
// FIXME ^^^: there should be no reason for Get() to depend on current
// prefix_extractor at all. It should always use table_prefix_extractor.
may_match = false; may_match = false;
} }
if (may_match) { if (may_match) {
@ -2245,8 +2246,7 @@ bool BlockBasedTable::FullFilterKeyMayMatch(
} }
void BlockBasedTable::FullFilterKeysMayMatch( void BlockBasedTable::FullFilterKeysMayMatch(
const ReadOptions& read_options, FilterBlockReader* filter, FilterBlockReader* filter, MultiGetRange* range, const bool no_io,
MultiGetRange* range, const bool no_io,
const SliceTransform* prefix_extractor, const SliceTransform* prefix_extractor,
BlockCacheLookupContext* lookup_context) const { BlockCacheLookupContext* lookup_context) const {
if (filter == nullptr || filter->IsBlockBased()) { if (filter == nullptr || filter->IsBlockBased()) {
@ -2269,8 +2269,9 @@ void BlockBasedTable::FullFilterKeysMayMatch(
PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, filtered_keys, PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, filtered_keys,
rep_->level); rep_->level);
} }
} else if (!read_options.total_order_seek && } else if (!PrefixExtractorChanged(prefix_extractor)) {
!PrefixExtractorChanged(prefix_extractor)) { // FIXME ^^^: there should be no reason for MultiGet() to depend on current
// prefix_extractor at all. It should always use table_prefix_extractor.
filter->PrefixesMayMatch(range, prefix_extractor, kNotValid, false, filter->PrefixesMayMatch(range, prefix_extractor, kNotValid, false,
lookup_context); lookup_context);
RecordTick(rep_->ioptions.stats, BLOOM_FILTER_PREFIX_CHECKED, before_keys); RecordTick(rep_->ioptions.stats, BLOOM_FILTER_PREFIX_CHECKED, before_keys);
@ -2308,9 +2309,8 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
read_options.snapshot != nullptr; read_options.snapshot != nullptr;
} }
TEST_SYNC_POINT("BlockBasedTable::Get:BeforeFilterMatch"); TEST_SYNC_POINT("BlockBasedTable::Get:BeforeFilterMatch");
const bool may_match = const bool may_match = FullFilterKeyMayMatch(
FullFilterKeyMayMatch(read_options, filter, key, no_io, prefix_extractor, filter, key, no_io, prefix_extractor, get_context, &lookup_context);
get_context, &lookup_context);
TEST_SYNC_POINT("BlockBasedTable::Get:AfterFilterMatch"); TEST_SYNC_POINT("BlockBasedTable::Get:AfterFilterMatch");
if (!may_match) { if (!may_match) {
RecordTick(rep_->ioptions.stats, BLOOM_FILTER_USEFUL); RecordTick(rep_->ioptions.stats, BLOOM_FILTER_USEFUL);
@ -2496,8 +2496,8 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
BlockCacheLookupContext lookup_context{ BlockCacheLookupContext lookup_context{
TableReaderCaller::kUserMultiGet, tracing_mget_id, TableReaderCaller::kUserMultiGet, tracing_mget_id,
/*get_from_user_specified_snapshot=*/read_options.snapshot != nullptr}; /*get_from_user_specified_snapshot=*/read_options.snapshot != nullptr};
FullFilterKeysMayMatch(read_options, filter, &sst_file_range, no_io, FullFilterKeysMayMatch(filter, &sst_file_range, no_io, prefix_extractor,
prefix_extractor, &lookup_context); &lookup_context);
if (!sst_file_range.empty()) { if (!sst_file_range.empty()) {
IndexBlockIter iiter_on_stack; IndexBlockIter iiter_on_stack;
@ -3110,12 +3110,6 @@ Status BlockBasedTable::CreateIndexReader(
InternalIterator* meta_iter, bool use_cache, bool prefetch, bool pin, InternalIterator* meta_iter, bool use_cache, bool prefetch, bool pin,
BlockCacheLookupContext* lookup_context, BlockCacheLookupContext* lookup_context,
std::unique_ptr<IndexReader>* index_reader) { std::unique_ptr<IndexReader>* index_reader) {
// kHashSearch requires non-empty prefix_extractor but bypass checking
// prefix_extractor here since we have no access to MutableCFOptions.
// Add need_upper_bound_check flag in BlockBasedTable::NewIndexIterator.
// If prefix_extractor does not match prefix_extractor_name from table
// properties, turn off Hash Index by setting total_order_seek to true
switch (rep_->index_type) { switch (rep_->index_type) {
case BlockBasedTableOptions::kTwoLevelIndexSearch: { case BlockBasedTableOptions::kTwoLevelIndexSearch: {
return PartitionIndexReader::Create(this, ro, prefetch_buffer, use_cache, return PartitionIndexReader::Create(this, ro, prefetch_buffer, use_cache,
@ -3133,6 +3127,7 @@ Status BlockBasedTable::CreateIndexReader(
std::unique_ptr<Block> metaindex_guard; std::unique_ptr<Block> metaindex_guard;
std::unique_ptr<InternalIterator> metaindex_iter_guard; std::unique_ptr<InternalIterator> metaindex_iter_guard;
bool should_fallback = false; bool should_fallback = false;
// FIXME: is changed prefix_extractor handled anywhere for hash index?
if (rep_->internal_prefix_transform.get() == nullptr) { if (rep_->internal_prefix_transform.get() == nullptr) {
ROCKS_LOG_WARN(rep_->ioptions.logger, ROCKS_LOG_WARN(rep_->ioptions.logger,
"No prefix extractor passed in. Fall back to binary" "No prefix extractor passed in. Fall back to binary"

@ -439,15 +439,13 @@ class BlockBasedTable : public TableReader {
BlockCacheLookupContext* lookup_context, BlockCacheLookupContext* lookup_context,
std::unique_ptr<IndexReader>* index_reader); std::unique_ptr<IndexReader>* index_reader);
bool FullFilterKeyMayMatch(const ReadOptions& read_options, bool FullFilterKeyMayMatch(FilterBlockReader* filter, const Slice& user_key,
FilterBlockReader* filter, const Slice& user_key,
const bool no_io, const bool no_io,
const SliceTransform* prefix_extractor, const SliceTransform* prefix_extractor,
GetContext* get_context, GetContext* get_context,
BlockCacheLookupContext* lookup_context) const; BlockCacheLookupContext* lookup_context) const;
void FullFilterKeysMayMatch(const ReadOptions& read_options, void FullFilterKeysMayMatch(FilterBlockReader* filter, MultiGetRange* range,
FilterBlockReader* filter, MultiGetRange* range,
const bool no_io, const bool no_io,
const SliceTransform* prefix_extractor, const SliceTransform* prefix_extractor,
BlockCacheLookupContext* lookup_context) const; BlockCacheLookupContext* lookup_context) const;
@ -505,8 +503,8 @@ class BlockBasedTable : public TableReader {
void DumpKeyValue(const Slice& key, const Slice& value, void DumpKeyValue(const Slice& key, const Slice& value,
std::ostream& out_stream); std::ostream& out_stream);
// Returns true if prefix_extractor is compatible with that used in building // Returns false if prefix_extractor exists and is compatible with that used
// the table file. // in building the table file, otherwise true.
bool PrefixExtractorChanged(const SliceTransform* prefix_extractor) const; bool PrefixExtractorChanged(const SliceTransform* prefix_extractor) const;
// A cumulative data block file read in MultiGet lower than this size will // A cumulative data block file read in MultiGet lower than this size will

@ -688,6 +688,10 @@ DEFINE_bool(memtable_whole_key_filtering, false,
DEFINE_bool(memtable_use_huge_page, false, DEFINE_bool(memtable_use_huge_page, false,
"Try to use huge page in memtables."); "Try to use huge page in memtables.");
DEFINE_bool(whole_key_filtering,
ROCKSDB_NAMESPACE::BlockBasedTableOptions().whole_key_filtering,
"Use whole keys (in addition to prefixes) in SST bloom filter.");
DEFINE_bool(use_existing_db, false, "If true, do not destroy the existing" DEFINE_bool(use_existing_db, false, "If true, do not destroy the existing"
" database. If you set this flag and also specify a benchmark that" " database. If you set this flag and also specify a benchmark that"
" wants a fresh database, that benchmark will fail."); " wants a fresh database, that benchmark will fail.");
@ -2622,6 +2626,7 @@ class Benchmark {
int64_t writes_per_range_tombstone_; int64_t writes_per_range_tombstone_;
int64_t range_tombstone_width_; int64_t range_tombstone_width_;
int64_t max_num_range_tombstones_; int64_t max_num_range_tombstones_;
ReadOptions read_options_;
WriteOptions write_options_; WriteOptions write_options_;
Options open_options_; // keep options around to properly destroy db later Options open_options_; // keep options around to properly destroy db later
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
@ -3245,6 +3250,12 @@ class Benchmark {
write_options_.sync = true; write_options_.sync = true;
} }
write_options_.disableWAL = FLAGS_disable_wal; write_options_.disableWAL = FLAGS_disable_wal;
read_options_ = ReadOptions(FLAGS_verify_checksum, true);
read_options_.total_order_seek = FLAGS_total_order_seek;
read_options_.prefix_same_as_start = FLAGS_prefix_same_as_start;
read_options_.tailing = FLAGS_use_tailing_iterator;
read_options_.readahead_size = FLAGS_readahead_size;
read_options_.adaptive_readahead = FLAGS_adaptive_readahead;
void (Benchmark::*method)(ThreadState*) = nullptr; void (Benchmark::*method)(ThreadState*) = nullptr;
void (Benchmark::*post_process_method)() = nullptr; void (Benchmark::*post_process_method)() = nullptr;
@ -4163,6 +4174,7 @@ class Benchmark {
block_based_options.enable_index_compression = block_based_options.enable_index_compression =
FLAGS_enable_index_compression; FLAGS_enable_index_compression;
block_based_options.block_align = FLAGS_block_align; block_based_options.block_align = FLAGS_block_align;
block_based_options.whole_key_filtering = FLAGS_whole_key_filtering;
BlockBasedTableOptions::PrepopulateBlockCache prepopulate_block_cache = BlockBasedTableOptions::PrepopulateBlockCache prepopulate_block_cache =
block_based_options.prepopulate_block_cache; block_based_options.prepopulate_block_cache;
switch (FLAGS_prepopulate_block_cache) { switch (FLAGS_prepopulate_block_cache) {
@ -4484,7 +4496,7 @@ class Benchmark {
if (FLAGS_use_existing_keys) { if (FLAGS_use_existing_keys) {
// Only work on single database // Only work on single database
assert(db_.db != nullptr); assert(db_.db != nullptr);
ReadOptions read_opts; ReadOptions read_opts; // before read_options_ initialized
read_opts.total_order_seek = true; read_opts.total_order_seek = true;
Iterator* iter = db_.db->NewIterator(read_opts); Iterator* iter = db_.db->NewIterator(read_opts);
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
@ -5483,8 +5495,7 @@ class Benchmark {
} }
void ReadSequential(ThreadState* thread, DB* db) { void ReadSequential(ThreadState* thread, DB* db) {
ReadOptions options(FLAGS_verify_checksum, true); ReadOptions options = read_options_;
options.tailing = FLAGS_use_tailing_iterator;
std::unique_ptr<char[]> ts_guard; std::unique_ptr<char[]> ts_guard;
Slice ts; Slice ts;
if (user_timestamp_size_ > 0) { if (user_timestamp_size_ > 0) {
@ -5523,7 +5534,6 @@ class Benchmark {
int64_t found = 0; int64_t found = 0;
int64_t bytes = 0; int64_t bytes = 0;
int64_t key_rand = 0; int64_t key_rand = 0;
ReadOptions options(FLAGS_verify_checksum, true);
std::unique_ptr<const char[]> key_guard; std::unique_ptr<const char[]> key_guard;
Slice key = AllocateKey(&key_guard); Slice key = AllocateKey(&key_guard);
PinnableSlice pinnable_val; PinnableSlice pinnable_val;
@ -5538,11 +5548,11 @@ class Benchmark {
read++; read++;
Status s; Status s;
if (FLAGS_num_column_families > 1) { if (FLAGS_num_column_families > 1) {
s = db_with_cfh->db->Get(options, db_with_cfh->GetCfh(key_rand), key, s = db_with_cfh->db->Get(read_options_, db_with_cfh->GetCfh(key_rand),
&pinnable_val); key, &pinnable_val);
} else { } else {
pinnable_val.Reset(); pinnable_val.Reset();
s = db_with_cfh->db->Get(options, s = db_with_cfh->db->Get(read_options_,
db_with_cfh->db->DefaultColumnFamily(), key, db_with_cfh->db->DefaultColumnFamily(), key,
&pinnable_val); &pinnable_val);
} }
@ -5588,9 +5598,7 @@ class Benchmark {
} }
void ReadReverse(ThreadState* thread, DB* db) { void ReadReverse(ThreadState* thread, DB* db) {
ReadOptions options(FLAGS_verify_checksum, true); Iterator* iter = db->NewIterator(read_options_);
options.adaptive_readahead = FLAGS_adaptive_readahead;
Iterator* iter = db->NewIterator(options);
int64_t i = 0; int64_t i = 0;
int64_t bytes = 0; int64_t bytes = 0;
for (iter->SeekToLast(); i < reads_ && iter->Valid(); iter->Prev()) { for (iter->SeekToLast(); i < reads_ && iter->Valid(); iter->Prev()) {
@ -5612,7 +5620,7 @@ class Benchmark {
int64_t read = 0; int64_t read = 0;
int64_t found = 0; int64_t found = 0;
int64_t nonexist = 0; int64_t nonexist = 0;
ReadOptions options(FLAGS_verify_checksum, true); ReadOptions options = read_options_;
std::unique_ptr<const char[]> key_guard; std::unique_ptr<const char[]> key_guard;
Slice key = AllocateKey(&key_guard); Slice key = AllocateKey(&key_guard);
std::string value; std::string value;
@ -5702,7 +5710,7 @@ class Benchmark {
int64_t bytes = 0; int64_t bytes = 0;
int num_keys = 0; int num_keys = 0;
int64_t key_rand = 0; int64_t key_rand = 0;
ReadOptions options(FLAGS_verify_checksum, true); ReadOptions options = read_options_;
std::unique_ptr<const char[]> key_guard; std::unique_ptr<const char[]> key_guard;
Slice key = AllocateKey(&key_guard); Slice key = AllocateKey(&key_guard);
PinnableSlice pinnable_val; PinnableSlice pinnable_val;
@ -5788,7 +5796,7 @@ class Benchmark {
int64_t bytes = 0; int64_t bytes = 0;
int64_t num_multireads = 0; int64_t num_multireads = 0;
int64_t found = 0; int64_t found = 0;
ReadOptions options(FLAGS_verify_checksum, true); ReadOptions options = read_options_;
std::vector<Slice> keys; std::vector<Slice> keys;
std::vector<std::unique_ptr<const char[]> > key_guards; std::vector<std::unique_ptr<const char[]> > key_guards;
std::vector<std::string> values(entries_per_batch_); std::vector<std::string> values(entries_per_batch_);
@ -6170,7 +6178,6 @@ class Benchmark {
value_max = FLAGS_mix_max_value_size; value_max = FLAGS_mix_max_value_size;
} }
ReadOptions options(FLAGS_verify_checksum, true);
std::unique_ptr<const char[]> key_guard; std::unique_ptr<const char[]> key_guard;
Slice key = AllocateKey(&key_guard); Slice key = AllocateKey(&key_guard);
PinnableSlice pinnable_val; PinnableSlice pinnable_val;
@ -6253,11 +6260,11 @@ class Benchmark {
gets++; gets++;
read++; read++;
if (FLAGS_num_column_families > 1) { if (FLAGS_num_column_families > 1) {
s = db_with_cfh->db->Get(options, db_with_cfh->GetCfh(key_rand), key, s = db_with_cfh->db->Get(read_options_, db_with_cfh->GetCfh(key_rand),
&pinnable_val); key, &pinnable_val);
} else { } else {
pinnable_val.Reset(); pinnable_val.Reset();
s = db_with_cfh->db->Get(options, s = db_with_cfh->db->Get(read_options_,
db_with_cfh->db->DefaultColumnFamily(), key, db_with_cfh->db->DefaultColumnFamily(), key,
&pinnable_val); &pinnable_val);
} }
@ -6302,7 +6309,7 @@ class Benchmark {
// Seek query // Seek query
if (db_with_cfh->db != nullptr) { if (db_with_cfh->db != nullptr) {
Iterator* single_iter = nullptr; Iterator* single_iter = nullptr;
single_iter = db_with_cfh->db->NewIterator(options); single_iter = db_with_cfh->db->NewIterator(read_options_);
if (single_iter != nullptr) { if (single_iter != nullptr) {
single_iter->Seek(key); single_iter->Seek(key);
seek++; seek++;
@ -6345,7 +6352,7 @@ class Benchmark {
void IteratorCreation(ThreadState* thread) { void IteratorCreation(ThreadState* thread) {
Duration duration(FLAGS_duration, reads_); Duration duration(FLAGS_duration, reads_);
ReadOptions options(FLAGS_verify_checksum, true); ReadOptions options = read_options_;
std::unique_ptr<char[]> ts_guard; std::unique_ptr<char[]> ts_guard;
if (user_timestamp_size_ > 0) { if (user_timestamp_size_ > 0) {
ts_guard.reset(new char[user_timestamp_size_]); ts_guard.reset(new char[user_timestamp_size_]);
@ -6375,12 +6382,7 @@ class Benchmark {
int64_t read = 0; int64_t read = 0;
int64_t found = 0; int64_t found = 0;
int64_t bytes = 0; int64_t bytes = 0;
ReadOptions options(FLAGS_verify_checksum, true); ReadOptions options = read_options_;
options.total_order_seek = FLAGS_total_order_seek;
options.prefix_same_as_start = FLAGS_prefix_same_as_start;
options.tailing = FLAGS_use_tailing_iterator;
options.readahead_size = FLAGS_readahead_size;
options.adaptive_readahead = FLAGS_adaptive_readahead;
std::unique_ptr<char[]> ts_guard; std::unique_ptr<char[]> ts_guard;
Slice ts; Slice ts;
if (user_timestamp_size_ > 0) { if (user_timestamp_size_ > 0) {
@ -6669,7 +6671,7 @@ class Benchmark {
abort(); abort();
} }
assert(db_.db != nullptr); assert(db_.db != nullptr);
ReadOptions read_options; ReadOptions read_options = read_options_;
std::unique_ptr<char[]> ts_guard; std::unique_ptr<char[]> ts_guard;
Slice ts; Slice ts;
if (user_timestamp_size_ > 0) { if (user_timestamp_size_ > 0) {
@ -6677,7 +6679,6 @@ class Benchmark {
ts = mock_app_clock_->GetTimestampForRead(thread->rand, ts_guard.get()); ts = mock_app_clock_->GetTimestampForRead(thread->rand, ts_guard.get());
read_options.timestamp = &ts; read_options.timestamp = &ts;
} }
read_options.adaptive_readahead = FLAGS_adaptive_readahead;
Iterator* iter = db_.db->NewIterator(read_options); Iterator* iter = db_.db->NewIterator(read_options);
fprintf(stderr, "num reads to do %" PRIu64 "\n", reads_); fprintf(stderr, "num reads to do %" PRIu64 "\n", reads_);
@ -6767,13 +6768,12 @@ class Benchmark {
// Given a key K and value V, this gets values for K+"0", K+"1" and K+"2" // Given a key K and value V, this gets values for K+"0", K+"1" and K+"2"
// in the same snapshot, and verifies that all the values are identical. // in the same snapshot, and verifies that all the values are identical.
// ASSUMES that PutMany was used to put (K, V) into the DB. // ASSUMES that PutMany was used to put (K, V) into the DB.
Status GetMany(DB* db, const ReadOptions& readoptions, const Slice& key, Status GetMany(DB* db, const Slice& key, std::string* value) {
std::string* value) {
std::string suffixes[3] = {"0", "1", "2"}; std::string suffixes[3] = {"0", "1", "2"};
std::string keys[3]; std::string keys[3];
Slice key_slices[3]; Slice key_slices[3];
std::string values[3]; std::string values[3];
ReadOptions readoptionscopy = readoptions; ReadOptions readoptionscopy = read_options_;
std::unique_ptr<char[]> ts_guard; std::unique_ptr<char[]> ts_guard;
Slice ts; Slice ts;
@ -6821,7 +6821,6 @@ class Benchmark {
// FLAGS_numdistinct distinct keys instead of FLAGS_num distinct keys. // FLAGS_numdistinct distinct keys instead of FLAGS_num distinct keys.
// (d) Does not have a MultiGet option. // (d) Does not have a MultiGet option.
void RandomWithVerify(ThreadState* thread) { void RandomWithVerify(ThreadState* thread) {
ReadOptions options(FLAGS_verify_checksum, true);
RandomGenerator gen; RandomGenerator gen;
std::string value; std::string value;
int64_t found = 0; int64_t found = 0;
@ -6848,7 +6847,7 @@ class Benchmark {
FLAGS_numdistinct, &key); FLAGS_numdistinct, &key);
if (get_weight > 0) { if (get_weight > 0) {
// do all the gets first // do all the gets first
Status s = GetMany(db, options, key, &value); Status s = GetMany(db, key, &value);
if (!s.ok() && !s.IsNotFound()) { if (!s.ok() && !s.IsNotFound()) {
fprintf(stderr, "getmany error: %s\n", s.ToString().c_str()); fprintf(stderr, "getmany error: %s\n", s.ToString().c_str());
// we continue after error rather than exiting so that we can // we continue after error rather than exiting so that we can
@ -6892,7 +6891,7 @@ class Benchmark {
// This is different from ReadWhileWriting because it does not use // This is different from ReadWhileWriting because it does not use
// an extra thread. // an extra thread.
void ReadRandomWriteRandom(ThreadState* thread) { void ReadRandomWriteRandom(ThreadState* thread) {
ReadOptions options(FLAGS_verify_checksum, true); ReadOptions options = read_options_;
RandomGenerator gen; RandomGenerator gen;
std::string value; std::string value;
int64_t found = 0; int64_t found = 0;
@ -6966,7 +6965,7 @@ class Benchmark {
// //
// Read-modify-write for random keys // Read-modify-write for random keys
void UpdateRandom(ThreadState* thread) { void UpdateRandom(ThreadState* thread) {
ReadOptions options(FLAGS_verify_checksum, true); ReadOptions options = read_options_;
RandomGenerator gen; RandomGenerator gen;
std::string value; std::string value;
int64_t found = 0; int64_t found = 0;
@ -7031,7 +7030,7 @@ class Benchmark {
// representing the existing value, we generate an array B of the same size, // representing the existing value, we generate an array B of the same size,
// then compute C = A^B as C[i]=A[i]^B[i], and store C // then compute C = A^B as C[i]=A[i]^B[i], and store C
void XORUpdateRandom(ThreadState* thread) { void XORUpdateRandom(ThreadState* thread) {
ReadOptions options(FLAGS_verify_checksum, true); ReadOptions options = read_options_;
RandomGenerator gen; RandomGenerator gen;
std::string existing_value; std::string existing_value;
int64_t found = 0; int64_t found = 0;
@ -7096,7 +7095,7 @@ class Benchmark {
// Each operation causes the key grow by value_size (simulating an append). // Each operation causes the key grow by value_size (simulating an append).
// Generally used for benchmarking against merges of similar type // Generally used for benchmarking against merges of similar type
void AppendRandom(ThreadState* thread) { void AppendRandom(ThreadState* thread) {
ReadOptions options(FLAGS_verify_checksum, true); ReadOptions options = read_options_;
RandomGenerator gen; RandomGenerator gen;
std::string value; std::string value;
int64_t found = 0; int64_t found = 0;
@ -7219,7 +7218,6 @@ class Benchmark {
// As with MergeRandom, the merge operator to use should be defined by // As with MergeRandom, the merge operator to use should be defined by
// FLAGS_merge_operator. // FLAGS_merge_operator.
void ReadRandomMergeRandom(ThreadState* thread) { void ReadRandomMergeRandom(ThreadState* thread) {
ReadOptions options(FLAGS_verify_checksum, true);
RandomGenerator gen; RandomGenerator gen;
std::string value; std::string value;
int64_t num_hits = 0; int64_t num_hits = 0;
@ -7246,7 +7244,7 @@ class Benchmark {
num_merges++; num_merges++;
thread->stats.FinishedOps(nullptr, db, 1, kMerge); thread->stats.FinishedOps(nullptr, db, 1, kMerge);
} else { } else {
Status s = db->Get(options, key, &value); Status s = db->Get(read_options_, key, &value);
if (value.length() > max_length) if (value.length() > max_length)
max_length = value.length(); max_length = value.length();
@ -7277,8 +7275,7 @@ class Benchmark {
thread->stats.Start(thread->tid); thread->stats.Start(thread->tid);
DB* db = SelectDB(thread); DB* db = SelectDB(thread);
ReadOptions read_opts(FLAGS_verify_checksum, true); ReadOptions read_opts = read_options_;
read_opts.adaptive_readahead = FLAGS_adaptive_readahead;
std::unique_ptr<char[]> ts_guard; std::unique_ptr<char[]> ts_guard;
Slice ts; Slice ts;
if (user_timestamp_size_ > 0) { if (user_timestamp_size_ > 0) {
@ -7418,9 +7415,7 @@ class Benchmark {
// RandomTransactionVerify() will then validate the correctness of the results // RandomTransactionVerify() will then validate the correctness of the results
// by checking if the sum of all keys in each set is the same. // by checking if the sum of all keys in each set is the same.
void RandomTransaction(ThreadState* thread) { void RandomTransaction(ThreadState* thread) {
ReadOptions options(FLAGS_verify_checksum, true);
Duration duration(FLAGS_duration, readwrites_); Duration duration(FLAGS_duration, readwrites_);
ReadOptions read_options(FLAGS_verify_checksum, true);
uint16_t num_prefix_ranges = static_cast<uint16_t>(FLAGS_transaction_sets); uint16_t num_prefix_ranges = static_cast<uint16_t>(FLAGS_transaction_sets);
uint64_t transactions_done = 0; uint64_t transactions_done = 0;
@ -7434,7 +7429,7 @@ class Benchmark {
txn_options.set_snapshot = FLAGS_transaction_set_snapshot; txn_options.set_snapshot = FLAGS_transaction_set_snapshot;
RandomTransactionInserter inserter(&thread->rand, write_options_, RandomTransactionInserter inserter(&thread->rand, write_options_,
read_options, FLAGS_num, read_options_, FLAGS_num,
num_prefix_ranges); num_prefix_ranges);
if (FLAGS_num_multi_db > 1) { if (FLAGS_num_multi_db > 1) {
@ -7585,7 +7580,6 @@ class Benchmark {
} }
void TimeSeriesReadOrDelete(ThreadState* thread, bool do_deletion) { void TimeSeriesReadOrDelete(ThreadState* thread, bool do_deletion) {
ReadOptions options(FLAGS_verify_checksum, true);
int64_t read = 0; int64_t read = 0;
int64_t found = 0; int64_t found = 0;
int64_t bytes = 0; int64_t bytes = 0;
@ -7593,7 +7587,7 @@ class Benchmark {
Iterator* iter = nullptr; Iterator* iter = nullptr;
// Only work on single database // Only work on single database
assert(db_.db != nullptr); assert(db_.db != nullptr);
iter = db_.db->NewIterator(options); iter = db_.db->NewIterator(read_options_);
std::unique_ptr<const char[]> key_guard; std::unique_ptr<const char[]> key_guard;
Slice key = AllocateKey(&key_guard); Slice key = AllocateKey(&key_guard);
@ -7609,7 +7603,7 @@ class Benchmark {
} }
if (!FLAGS_use_tailing_iterator) { if (!FLAGS_use_tailing_iterator) {
delete iter; delete iter;
iter = db_.db->NewIterator(options); iter = db_.db->NewIterator(read_options_);
} }
// Pick a Iterator to use // Pick a Iterator to use

Loading…
Cancel
Save