kill ReadOptions.prefix and .prefix_seek

Summary:
also add an override option total_order_iteration if you want to use full
iterator with prefix_extractor

Test Plan: make all check

Reviewers: igor, haobo, sdong, yhchiang

Reviewed By: haobo

CC: leveldb, dhruba

Differential Revision: https://reviews.facebook.net/D17805
main
Lei Jin 11 years ago
parent 8ce5492623
commit 3995e801ab
  1. 1
      HISTORY.md
  2. 11
      db/c.cc
  3. 2
      db/c_test.c
  4. 2
      db/db_bench.cc
  5. 17
      db/db_impl.cc
  6. 1
      db/db_impl_debug.cc
  7. 103
      db/db_test.cc
  8. 12
      db/memtable.cc
  9. 12
      db/memtable.h
  10. 13
      db/plain_table_db_test.cc
  11. 75
      db/prefix_filter_iterator.h
  12. 159
      db/prefix_test.cc
  13. 3
      db/repair.cc
  14. 6
      db/simple_table_db_test.cc
  15. 8
      db/table_cache.cc
  16. 6
      db/tailing_iter.cc
  17. 5
      db/tailing_iter.h
  18. 20
      db/version_set.cc
  19. 4
      db/write_batch_test.cc
  20. 4
      include/rocksdb/c.h
  21. 7
      include/rocksdb/memtablerep.h
  22. 13
      include/rocksdb/options.h
  23. 4
      include/rocksdb/table.h
  24. 9
      table/block_based_table_reader.cc
  25. 6
      table/plain_table_reader.cc
  26. 5
      table/plain_table_reader.h
  27. 4
      table/table_reader.h
  28. 7
      table/table_reader_bench.cc
  29. 20
      table/table_test.cc
  30. 4
      tools/db_stress.cc
  31. 12
      util/hash_linklist_rep.cc
  32. 11
      util/hash_skiplist_rep.cc

@ -9,6 +9,7 @@
* Column family support
### Public API changes
* Deprecated ReadOptions.prefix and ReadOptions.prefix_seek. Seek() defaults to prefix-based seek when Options.prefix_extractor is supplied. More detail is documented in https://github.com/facebook/rocksdb/wiki/Prefix-Seek-API-Changes
## 2.8.0 (04/04/2014)

@ -1230,23 +1230,12 @@ void rocksdb_readoptions_set_fill_cache(
opt->rep.fill_cache = v;
}
void rocksdb_readoptions_set_prefix_seek(
rocksdb_readoptions_t* opt, unsigned char v) {
opt->rep.prefix_seek = v;
}
void rocksdb_readoptions_set_snapshot(
rocksdb_readoptions_t* opt,
const rocksdb_snapshot_t* snap) {
opt->rep.snapshot = (snap ? snap->rep : nullptr);
}
void rocksdb_readoptions_set_prefix(
rocksdb_readoptions_t* opt, const char* key, size_t keylen) {
Slice prefix = Slice(key, keylen);
opt->rep.prefix = &prefix;
}
void rocksdb_readoptions_set_read_tier(
rocksdb_readoptions_t* opt, int v) {
opt->rep.read_tier = static_cast<rocksdb::ReadTier>(v);

@ -461,8 +461,6 @@ int main(int argc, char** argv) {
rocksdb_put(db, woptions, "bar3", 4, "bar", 3, &err);
CheckNoError(err);
rocksdb_readoptions_set_prefix_seek(roptions, 1);
rocksdb_iterator_t* iter = rocksdb_create_iterator(db, roptions);
CheckCondition(!rocksdb_iter_valid(iter));

@ -1944,7 +1944,6 @@ class Benchmark {
void IteratorCreation(ThreadState* thread) {
Duration duration(FLAGS_duration, reads_);
ReadOptions options(FLAGS_verify_checksum, true);
options.prefix_seek = (FLAGS_prefix_size > 0);
while (!duration.Done(1)) {
DB* db = SelectDB(thread);
Iterator* iter = db->NewIterator(options);
@ -1966,7 +1965,6 @@ class Benchmark {
int64_t found = 0;
ReadOptions options(FLAGS_verify_checksum, true);
options.tailing = FLAGS_use_tailing_iterator;
options.prefix_seek = (FLAGS_prefix_size > 0);
Iterator* single_iter = nullptr;
std::vector<Iterator*> multi_iters;

@ -33,7 +33,6 @@
#include "db/memtable_list.h"
#include "db/merge_context.h"
#include "db/merge_helper.h"
#include "db/prefix_filter_iterator.h"
#include "db/table_cache.h"
#include "db/table_properties_collector.h"
#include "db/tailing_iter.h"
@ -1339,7 +1338,7 @@ Status DBImpl::WriteLevel0TableForRecovery(ColumnFamilyData* cfd, MemTable* mem,
FileMetaData meta;
meta.number = versions_->NewFileNumber();
pending_outputs_.insert(meta.number);
Iterator* iter = mem->NewIterator();
Iterator* iter = mem->NewIterator(ReadOptions(), true);
const SequenceNumber newest_snapshot = snapshots_.GetNewest();
const SequenceNumber earliest_seqno_in_memtable =
mem->GetFirstSequenceNumber();
@ -1405,7 +1404,7 @@ Status DBImpl::WriteLevel0Table(ColumnFamilyData* cfd,
for (MemTable* m : mems) {
Log(options_.info_log, "[%s] Flushing memtable with next log file: %lu\n",
cfd->GetName().c_str(), (unsigned long)m->GetNextLogNumber());
memtables.push_back(m->NewIterator());
memtables.push_back(m->NewIterator(ReadOptions(), true));
}
Iterator* iter = NewMergingIterator(&cfd->internal_comparator(),
&memtables[0], memtables.size());
@ -3494,12 +3493,6 @@ Iterator* DBImpl::NewIterator(const ReadOptions& options,
cfd->user_comparator(), iter, snapshot);
}
if (options.prefix) {
// use extra wrapper to exclude any keys from the results which
// don't begin with the prefix
iter = new PrefixFilterIterator(iter, *options.prefix,
cfd->options()->prefix_extractor.get());
}
return iter;
}
@ -3507,12 +3500,6 @@ Status DBImpl::NewIterators(
const ReadOptions& options,
const std::vector<ColumnFamilyHandle*>& column_families,
std::vector<Iterator*>* iterators) {
if (options.prefix) {
return Status::NotSupported(
"NewIterators doesn't support ReadOptions::prefix");
}
iterators->clear();
iterators->reserve(column_families.size());
SequenceNumber latest_snapshot = 0;

@ -33,7 +33,6 @@ Iterator* DBImpl::TEST_NewInternalIterator(ColumnFamilyHandle* column_family) {
SuperVersion* super_version = cfd->GetSuperVersion()->Ref();
mutex_.Unlock();
ReadOptions roptions;
roptions.prefix_seek = true;
return NewInternalIterator(roptions, cfd, super_version);
}

@ -306,7 +306,8 @@ class DBTest {
kSkipUniversalCompaction = 2,
kSkipMergePut = 4,
kSkipPlainTable = 8,
kSkipHashIndex = 16
kSkipHashIndex = 16,
kSkipNoSeekToLast = 32
};
DBTest() : option_config_(kDefault),
@ -341,6 +342,11 @@ class DBTest {
if ((skip_mask & kSkipMergePut) && option_config_ == kMergePut) {
continue;
}
if ((skip_mask & kSkipNoSeekToLast) &&
(option_config_ == kHashLinkList ||
option_config_ == kHashSkipList)) {;
continue;
}
if ((skip_mask & kSkipPlainTable)
&& (option_config_ == kPlainTableAllBytesPrefix
|| option_config_ == kPlainTableFirstBytePrefix)) {
@ -862,10 +868,11 @@ class DBTest {
void VerifyIterLast(std::string expected_key, int cf = 0) {
Iterator* iter;
ReadOptions ro;
if (cf == 0) {
iter = db_->NewIterator(ReadOptions());
iter = db_->NewIterator(ro);
} else {
iter = db_->NewIterator(ReadOptions(), handles_[cf]);
iter = db_->NewIterator(ro, handles_[cf]);
}
iter->SeekToLast();
ASSERT_EQ(IterStatus(iter), expected_key);
@ -1463,7 +1470,7 @@ TEST(DBTest, NonBlockingIteration) {
// This test verifies block cache behaviors, which is not used by plain
// table format.
} while (ChangeOptions(kSkipPlainTable));
} while (ChangeOptions(kSkipPlainTable | kSkipNoSeekToLast));
}
// A delete is skipped for key if KeyMayExist(key) returns False
@ -1907,19 +1914,23 @@ TEST(DBTest, IterSmallAndLargeMix) {
TEST(DBTest, IterMultiWithDelete) {
do {
CreateAndReopenWithCF({"pikachu"});
ASSERT_OK(Put(1, "a", "va"));
ASSERT_OK(Put(1, "b", "vb"));
ASSERT_OK(Put(1, "c", "vc"));
ASSERT_OK(Delete(1, "b"));
ASSERT_EQ("NOT_FOUND", Get(1, "b"));
ASSERT_OK(Put(1, "ka", "va"));
ASSERT_OK(Put(1, "kb", "vb"));
ASSERT_OK(Put(1, "kc", "vc"));
ASSERT_OK(Delete(1, "kb"));
ASSERT_EQ("NOT_FOUND", Get(1, "kb"));
Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]);
iter->Seek("c");
ASSERT_EQ(IterStatus(iter), "c->vc");
iter->Seek("kc");
ASSERT_EQ(IterStatus(iter), "kc->vc");
if (!CurrentOptions().merge_operator) {
// TODO: merge operator does not support backward iteration yet
iter->Prev();
ASSERT_EQ(IterStatus(iter), "a->va");
if (kPlainTableAllBytesPrefix != option_config_&&
kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
kHashLinkList != option_config_) {
iter->Prev();
ASSERT_EQ(IterStatus(iter), "ka->va");
}
}
delete iter;
} while (ChangeOptions());
@ -1952,7 +1963,7 @@ TEST(DBTest, IterPrevMaxSkip) {
ASSERT_OK(Delete(1, "key1"));
VerifyIterLast("(invalid)", 1);
} while (ChangeOptions(kSkipMergePut));
} while (ChangeOptions(kSkipMergePut | kSkipNoSeekToLast));
}
TEST(DBTest, IterWithSnapshot) {
@ -1977,15 +1988,19 @@ TEST(DBTest, IterWithSnapshot) {
ASSERT_EQ(IterStatus(iter), "key5->val5");
if (!CurrentOptions().merge_operator) {
// TODO: merge operator does not support backward iteration yet
iter->Prev();
ASSERT_EQ(IterStatus(iter), "key4->val4");
iter->Prev();
ASSERT_EQ(IterStatus(iter), "key3->val3");
if (kPlainTableAllBytesPrefix != option_config_&&
kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
kHashLinkList != option_config_) {
iter->Prev();
ASSERT_EQ(IterStatus(iter), "key4->val4");
iter->Prev();
ASSERT_EQ(IterStatus(iter), "key3->val3");
iter->Next();
ASSERT_EQ(IterStatus(iter), "key4->val4");
iter->Next();
ASSERT_EQ(IterStatus(iter), "key5->val5");
iter->Next();
ASSERT_EQ(IterStatus(iter), "key4->val4");
iter->Next();
ASSERT_EQ(IterStatus(iter), "key5->val5");
}
iter->Next();
ASSERT_TRUE(!iter->Valid());
}
@ -5944,7 +5959,7 @@ TEST(DBTest, GroupCommitTest) {
ASSERT_TRUE(!itr->Valid());
delete itr;
} while (ChangeOptions());
} while (ChangeOptions(kSkipNoSeekToLast));
}
namespace {
@ -6313,7 +6328,7 @@ TEST(DBTest, Randomized) {
}
if (model_snap != nullptr) model.ReleaseSnapshot(model_snap);
if (db_snap != nullptr) db_->ReleaseSnapshot(db_snap);
} while (ChangeOptions(kSkipDeletesFilterFirst));
} while (ChangeOptions(kSkipDeletesFilterFirst | kSkipNoSeekToLast));
}
TEST(DBTest, MultiGetSimple) {
@ -6429,7 +6444,6 @@ void PrefixScanInit(DBTest *dbtest) {
} // namespace
TEST(DBTest, PrefixScan) {
ReadOptions ro = ReadOptions();
int count;
Slice prefix;
Slice key;
@ -6450,45 +6464,9 @@ TEST(DBTest, PrefixScan) {
options.max_background_compactions = 2;
options.create_if_missing = true;
options.disable_seek_compaction = true;
// Tricky: options.prefix_extractor will be released by
// NewHashSkipListRepFactory after use.
options.memtable_factory.reset(NewHashSkipListRepFactory());
// prefix specified, with blooms: 2 RAND I/Os
// SeekToFirst
DestroyAndReopen(&options);
PrefixScanInit(this);
count = 0;
env_->random_read_counter_.Reset();
ro.prefix = &prefix;
iter = db_->NewIterator(ro);
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
assert(iter->key().starts_with(prefix));
count++;
}
ASSERT_OK(iter->status());
delete iter;
ASSERT_EQ(count, 2);
ASSERT_EQ(env_->random_read_counter_.Read(), 2);
// prefix specified, with blooms: 2 RAND I/Os
// Seek
DestroyAndReopen(&options);
PrefixScanInit(this);
count = 0;
env_->random_read_counter_.Reset();
ro.prefix = &prefix;
iter = db_->NewIterator(ro);
for (iter->Seek(key); iter->Valid(); iter->Next()) {
assert(iter->key().starts_with(prefix));
count++;
}
ASSERT_OK(iter->status());
delete iter;
ASSERT_EQ(count, 2);
ASSERT_EQ(env_->random_read_counter_.Read(), 2);
// no prefix specified: 11 RAND I/Os
// 11 RAND I/Os
DestroyAndReopen(&options);
PrefixScanInit(this);
count = 0;
@ -6652,7 +6630,6 @@ TEST(DBTest, TailingIteratorDeletes) {
TEST(DBTest, TailingIteratorPrefixSeek) {
ReadOptions read_options;
read_options.tailing = true;
read_options.prefix_seek = true;
Options options = CurrentOptions();
options.env = env_;

@ -159,13 +159,12 @@ const char* EncodeKey(std::string* scratch, const Slice& target) {
class MemTableIterator: public Iterator {
public:
MemTableIterator(const MemTable& mem, const ReadOptions& options)
MemTableIterator(const MemTable& mem, const ReadOptions& options,
bool enforce_total_order)
: bloom_(nullptr),
prefix_extractor_(mem.prefix_extractor_),
valid_(false) {
if (options.prefix) {
iter_.reset(mem.table_->GetPrefixIterator(*options.prefix));
} else if (options.prefix_seek) {
if (prefix_extractor_ != nullptr && !enforce_total_order) {
bloom_ = mem.prefix_bloom_.get();
iter_.reset(mem.table_->GetDynamicPrefixIterator());
} else {
@ -224,8 +223,9 @@ class MemTableIterator: public Iterator {
void operator=(const MemTableIterator&);
};
Iterator* MemTable::NewIterator(const ReadOptions& options) {
return new MemTableIterator(*this, options);
Iterator* MemTable::NewIterator(const ReadOptions& options,
bool enforce_total_order) {
return new MemTableIterator(*this, options, enforce_total_order);
}
port::RWMutex* MemTable::GetLock(const Slice& key) {

@ -75,14 +75,10 @@ class MemTable {
// iterator are internal keys encoded by AppendInternalKey in the
// db/dbformat.{h,cc} module.
//
// If options.prefix is supplied, it is passed to the underlying MemTableRep
// as a hint that the iterator only need to support access to keys with that
// specific prefix.
// If options.prefix is not supplied and options.prefix_seek is set, the
// iterator is not bound to a specific prefix. However, the semantics of
// Seek is changed - the result might only include keys with the same prefix
// as the seek-key.
Iterator* NewIterator(const ReadOptions& options = ReadOptions());
// By default, it returns an iterator for prefix seek if prefix_extractor
// is configured in Options.
Iterator* NewIterator(const ReadOptions& options,
bool enforce_total_order = false);
// Add an entry into memtable that maps key to value at the
// specified sequence number and with the specified type.

@ -47,7 +47,6 @@ class PlainTableDBTest {
public:
PlainTableDBTest() : env_(Env::Default()) {
ro_.prefix_seek = true;
dbname_ = test::TmpDir() + "/plain_table_db_test";
ASSERT_OK(DestroyDB(dbname_, Options()));
db_ = nullptr;
@ -59,8 +58,6 @@ class PlainTableDBTest {
ASSERT_OK(DestroyDB(dbname_, Options()));
}
ReadOptions ro_;
// Return the current option configuration.
Options CurrentOptions() {
Options options;
@ -123,7 +120,7 @@ class PlainTableDBTest {
}
std::string Get(const std::string& k, const Snapshot* snapshot = nullptr) {
ReadOptions options = ro_;
ReadOptions options;
options.snapshot = snapshot;
std::string result;
Status s = db_->Get(options, k, &result);
@ -369,7 +366,7 @@ TEST(PlainTableDBTest, Iterator) {
dbfull()->TEST_FlushMemTable();
ASSERT_EQ("v1", Get("1000000000foo001"));
ASSERT_EQ("v__3", Get("1000000000foo003"));
Iterator* iter = dbfull()->NewIterator(ro_);
Iterator* iter = dbfull()->NewIterator(ReadOptions());
iter->Seek("1000000000foo000");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("1000000000foo001", iter->key().ToString());
@ -471,7 +468,7 @@ TEST(PlainTableDBTest, IteratorLargeKeys) {
dbfull()->TEST_FlushMemTable();
Iterator* iter = dbfull()->NewIterator(ro_);
Iterator* iter = dbfull()->NewIterator(ReadOptions());
iter->Seek(key_list[0]);
for (size_t i = 0; i < 7; i++) {
@ -535,7 +532,7 @@ TEST(PlainTableDBTest, IteratorReverseSuffixComparator) {
dbfull()->TEST_FlushMemTable();
ASSERT_EQ("v1", Get("1000000000foo001"));
ASSERT_EQ("v__3", Get("1000000000foo003"));
Iterator* iter = dbfull()->NewIterator(ro_);
Iterator* iter = dbfull()->NewIterator(ReadOptions());
iter->Seek("1000000000foo009");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("1000000000foo008", iter->key().ToString());
@ -766,7 +763,7 @@ TEST(PlainTableDBTest, NonExistingKeyToNonEmptyBucket) {
ASSERT_EQ("NOT_FOUND", Get("8000000000000bar"));
ASSERT_EQ("NOT_FOUND", Get("1000000000000bar"));
Iterator* iter = dbfull()->NewIterator(ro_);
Iterator* iter = dbfull()->NewIterator(ReadOptions());
iter->Seek("5000000000000bar");
ASSERT_TRUE(iter->Valid());

@ -1,75 +0,0 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
// Wrap an underlying iterator, but exclude any results not starting
// with a given prefix. Seeking to keys not beginning with the prefix
// is invalid, and SeekToLast is not implemented (that would be
// non-trivial), but otherwise this iterator will behave just like the
// underlying iterator would if there happened to be no non-matching
// keys in the dataset.
#pragma once
#include "rocksdb/iterator.h"
#include "rocksdb/slice.h"
#include "rocksdb/slice_transform.h"
namespace rocksdb {
class PrefixFilterIterator : public Iterator {
private:
Iterator* iter_;
const Slice &prefix_;
const SliceTransform *prefix_extractor_;
Status status_;
public:
PrefixFilterIterator(Iterator* iter,
const Slice &prefix,
const SliceTransform* prefix_extractor)
: iter_(iter), prefix_(prefix),
prefix_extractor_(prefix_extractor),
status_(Status::OK()) {
if (prefix_extractor == nullptr) {
status_ = Status::InvalidArgument("A prefix filter may not be used "
"unless a function is also defined "
"for extracting prefixes");
} else if (!prefix_extractor_->InRange(prefix)) {
status_ = Status::InvalidArgument("Must provide a slice for prefix which"
"is a prefix for some key");
}
}
~PrefixFilterIterator() {
delete iter_;
}
Slice key() const { return iter_->key(); }
Slice value() const { return iter_->value(); }
Status status() const {
if (!status_.ok()) {
return status_;
}
return iter_->status();
}
void Next() { iter_->Next(); }
void Prev() { iter_->Prev(); }
void Seek(const Slice& k) {
if (prefix_extractor_->Transform(k) == prefix_) {
iter_->Seek(k);
} else {
status_ = Status::InvalidArgument("Seek must begin with target prefix");
}
}
void SeekToFirst() {
Seek(prefix_);
}
void SeekToLast() {
status_ = Status::NotSupported("SeekToLast is incompatible with prefixes");
}
bool Valid() const {
return (status_.ok() && iter_->Valid() &&
prefix_extractor_->Transform(iter_->key()) == prefix_);
}
};
} // namespace rocksdb

@ -17,7 +17,6 @@
#include "util/stop_watch.h"
#include "util/testharness.h"
DEFINE_bool(use_prefix_hash_memtable, true, "");
DEFINE_bool(trigger_deadlock, false,
"issue delete in range scan to trigger PrefixHashMap deadlock");
DEFINE_uint64(bucket_count, 100000, "number of buckets");
@ -208,7 +207,6 @@ TEST(PrefixTest, TestResult) {
auto db = OpenDb();
WriteOptions write_options;
ReadOptions read_options;
read_options.prefix_seek = true;
// 1. Insert one row.
Slice v16("v16");
@ -371,43 +369,6 @@ TEST(PrefixTest, TestResult) {
}
}
TEST(PrefixTest, FullIterator) {
while (NextOptions(1000000)) {
DestroyDB(kDbName, Options());
auto db = OpenDb();
WriteOptions write_options;
std::vector<uint64_t> prefixes;
for (uint64_t i = 0; i < 100; ++i) {
prefixes.push_back(i);
}
std::random_shuffle(prefixes.begin(), prefixes.end());
for (auto prefix : prefixes) {
for (uint64_t i = 0; i < 200; ++i) {
TestKey test_key(prefix, i);
Slice key = TestKeyToSlice(test_key);
ASSERT_OK(db->Put(write_options, key, Slice("0")));
}
}
auto func = [](void* db_void) {
auto db = reinterpret_cast<DB*>(db_void);
std::unique_ptr<Iterator> iter(db->NewIterator(ReadOptions()));
iter->SeekToFirst();
for (int i = 0; i < 3; ++i) {
iter->Next();
}
};
auto env = Env::Default();
for (int i = 0; i < 16; ++i) {
env->StartThread(func, reinterpret_cast<void*>(db.get()));
}
env->WaitForJoin();
}
}
TEST(PrefixTest, DynamicPrefixIterator) {
while (NextOptions(FLAGS_bucket_count)) {
std::cout << "*** Mem table: " << options.memtable_factory->Name()
@ -452,9 +413,6 @@ TEST(PrefixTest, DynamicPrefixIterator) {
HistogramImpl hist_seek_time;
HistogramImpl hist_seek_comparison;
if (FLAGS_use_prefix_hash_memtable) {
read_options.prefix_seek = true;
}
std::unique_ptr<Iterator> iter(db->NewIterator(read_options));
for (auto prefix : prefixes) {
@ -464,14 +422,15 @@ TEST(PrefixTest, DynamicPrefixIterator) {
perf_context.Reset();
StopWatchNano timer(Env::Default(), true);
auto key_prefix = options.prefix_extractor->Transform(key);
uint64_t total_keys = 0;
for (iter->Seek(key); iter->Valid(); iter->Next()) {
for (iter->Seek(key);
iter->Valid() && iter->key().starts_with(key_prefix);
iter->Next()) {
if (FLAGS_trigger_deadlock) {
std::cout << "Behold the deadlock!\n";
db->Delete(write_options, iter->key());
}
auto test_key = SliceToTestKey(iter->key());
if (test_key->prefix != prefix) break;
total_keys++;
}
hist_seek_time.Add(timer.ElapsedNanos());
@ -509,116 +468,6 @@ TEST(PrefixTest, DynamicPrefixIterator) {
}
}
TEST(PrefixTest, PrefixHash) {
while (NextOptions(FLAGS_bucket_count)) {
std::cout << "*** Mem table: " << options.memtable_factory->Name()
<< std::endl;
DestroyDB(kDbName, Options());
auto db = OpenDb();
WriteOptions write_options;
ReadOptions read_options;
std::vector<uint64_t> prefixes;
for (uint64_t i = 0; i < FLAGS_total_prefixes; ++i) {
prefixes.push_back(i);
}
if (FLAGS_random_prefix) {
std::random_shuffle(prefixes.begin(), prefixes.end());
}
// insert x random prefix, each with y continuous element.
HistogramImpl hist_put_time;
HistogramImpl hist_put_comparison;
for (auto prefix : prefixes) {
for (uint64_t sorted = 0; sorted < FLAGS_items_per_prefix; sorted++) {
TestKey test_key(prefix, sorted);
Slice key = TestKeyToSlice(test_key);
std::string value = "v" + std::to_string(sorted);
perf_context.Reset();
StopWatchNano timer(Env::Default(), true);
ASSERT_OK(db->Put(write_options, key, value));
hist_put_time.Add(timer.ElapsedNanos());
hist_put_comparison.Add(perf_context.user_key_comparison_count);
}
}
std::cout << "Put key comparison: \n" << hist_put_comparison.ToString()
<< "Put time: \n" << hist_put_time.ToString();
// test seek existing keys
HistogramImpl hist_seek_time;
HistogramImpl hist_seek_comparison;
for (auto prefix : prefixes) {
TestKey test_key(prefix, 0);
Slice key = TestKeyToSlice(test_key);
std::string value = "v" + std::to_string(0);
Slice key_prefix;
if (FLAGS_use_prefix_hash_memtable) {
key_prefix = options.prefix_extractor->Transform(key);
read_options.prefix = &key_prefix;
}
std::unique_ptr<Iterator> iter(db->NewIterator(read_options));
perf_context.Reset();
StopWatchNano timer(Env::Default(), true);
uint64_t total_keys = 0;
for (iter->Seek(key); iter->Valid(); iter->Next()) {
if (FLAGS_trigger_deadlock) {
std::cout << "Behold the deadlock!\n";
db->Delete(write_options, iter->key());
}
auto test_key = SliceToTestKey(iter->key());
if (test_key->prefix != prefix) break;
total_keys++;
}
hist_seek_time.Add(timer.ElapsedNanos());
hist_seek_comparison.Add(perf_context.user_key_comparison_count);
ASSERT_EQ(total_keys, FLAGS_items_per_prefix);
}
std::cout << "Seek key comparison: \n"
<< hist_seek_comparison.ToString()
<< "Seek time: \n"
<< hist_seek_time.ToString();
// test non-existing keys
HistogramImpl hist_no_seek_time;
HistogramImpl hist_no_seek_comparison;
for (auto prefix = FLAGS_total_prefixes;
prefix < FLAGS_total_prefixes + 100;
prefix++) {
TestKey test_key(prefix, 0);
Slice key = TestKeyToSlice(test_key);
if (FLAGS_use_prefix_hash_memtable) {
Slice key_prefix = options.prefix_extractor->Transform(key);
read_options.prefix = &key_prefix;
}
std::unique_ptr<Iterator> iter(db->NewIterator(read_options));
perf_context.Reset();
StopWatchNano timer(Env::Default(), true);
iter->Seek(key);
hist_no_seek_time.Add(timer.ElapsedNanos());
hist_no_seek_comparison.Add(perf_context.user_key_comparison_count);
ASSERT_TRUE(!iter->Valid());
}
std::cout << "non-existing Seek key comparison: \n"
<< hist_no_seek_comparison.ToString()
<< "non-existing Seek time: \n"
<< hist_no_seek_time.ToString();
}
}
}
int main(int argc, char** argv) {

@ -231,7 +231,8 @@ class Repairer {
// since ExtractMetaData() will also generate edits.
FileMetaData meta;
meta.number = next_file_number_++;
Iterator* iter = mem->NewIterator();
ReadOptions ro;
Iterator* iter = mem->NewIterator(ro, true /* enforce_total_order */);
status = BuildTable(dbname_, env_, options_, storage_options_, table_cache_,
iter, &meta, icmp_, 0, 0, kNoCompression);
delete iter;

@ -83,8 +83,6 @@ public:
unique_ptr<RandomAccessFile> && file, uint64_t file_size,
unique_ptr<TableReader>* table_reader);
bool PrefixMayMatch(const Slice& internal_prefix) override;
Iterator* NewIterator(const ReadOptions&) override;
Status Get(const ReadOptions&, const Slice& key, void* arg,
@ -220,10 +218,6 @@ std::shared_ptr<const TableProperties> SimpleTableReader::GetTableProperties()
return rep_->table_properties;
}
bool SimpleTableReader::PrefixMayMatch(const Slice& internal_prefix) {
return true;
}
Iterator* SimpleTableReader::NewIterator(const ReadOptions& options) {
return new SimpleTableIterator(this);
}

@ -191,16 +191,16 @@ Status TableCache::GetTableProperties(
}
bool TableCache::PrefixMayMatch(const ReadOptions& options,
const InternalKeyComparator& icomparator,
const FileMetaData& file_meta,
const Slice& internal_prefix, bool* table_io) {
const InternalKeyComparator& icomparator,
const FileMetaData& file_meta,
const Slice& internal_prefix, bool* table_io) {
bool may_match = true;
auto table_reader = file_meta.table_reader;
Cache::Handle* table_handle = nullptr;
if (table_reader == nullptr) {
// Need to get table handle from file number
Status s = FindTable(storage_options_, icomparator, file_meta.number,
file_meta.file_size, &table_handle, table_io);
file_meta.file_size, &table_handle, table_io);
if (!s.ok()) {
return may_match;
}

@ -60,8 +60,8 @@ void TailingIterator::Seek(const Slice& target) {
// need to do a seek if 'target' belongs to that interval (i.e. immutable_ is
// already at the correct position)!
//
// If options.prefix_seek is used and immutable_ is not valid, seek if target
// has a different prefix than prev_key.
// If prefix seek is used and immutable_ is not valid, seek if target has a
// different prefix than prev_key.
//
// prev_key_ is updated by Next(). SeekImmutable() sets prev_key_ to
// 'target' -- in this case, prev_key_ is included in the interval, so
@ -70,7 +70,7 @@ void TailingIterator::Seek(const Slice& target) {
const Comparator* cmp = cfd_->user_comparator();
if (!is_prev_set_ || cmp->Compare(prev_key_, target) >= !is_prev_inclusive_ ||
(immutable_->Valid() && cmp->Compare(target, immutable_->key()) > 0) ||
(read_options_.prefix_seek && !IsSamePrefix(target))) {
(cfd_->options()->prefix_extractor != nullptr && !IsSamePrefix(target))) {
SeekImmutable(target);
}

@ -2,9 +2,10 @@
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
#ifndef ROCKSDB_LITE
#pragma once
#ifndef ROCKSDB_LITE
#include <string>
#include "rocksdb/db.h"
@ -79,7 +80,7 @@ class TailingIterator : public Iterator {
bool IsCurrentVersion() const;
// check if SeekImmutable() is needed due to target having a different prefix
// than prev_key_ (used when options.prefix_seek is set)
// than prev_key_ (used when in prefix seek mode)
bool IsSamePrefix(const Slice& target) const;
// creates mutable_ and immutable_ iterators and updates version_number_

@ -226,20 +226,11 @@ static Iterator* GetFileIterator(void* arg, const ReadOptions& options,
return NewErrorIterator(
Status::Corruption("FileReader invoked with unexpected value"));
} else {
ReadOptions options_copy;
if (options.prefix) {
// suppress prefix filtering since we have already checked the
// filters once at this point
options_copy = options;
options_copy.prefix = nullptr;
}
const EncodedFileMetaData* encoded_meta =
reinterpret_cast<const EncodedFileMetaData*>(file_value.data());
FileMetaData meta(encoded_meta->number, encoded_meta->file_size);
meta.table_reader = encoded_meta->table_reader;
return cache->NewIterator(
options.prefix ? options_copy : options, soptions, icomparator, meta,
return cache->NewIterator(options, soptions, icomparator, meta,
nullptr /* don't need reference to table*/, for_compaction);
}
}
@ -328,15 +319,6 @@ Iterator* Version::NewConcatenatingIterator(const ReadOptions& options,
int level) const {
Iterator* level_iter =
new LevelFileNumIterator(cfd_->internal_comparator(), &files_[level]);
if (options.prefix) {
InternalKey internal_prefix(*options.prefix, 0, kTypeValue);
if (!PrefixMayMatch(options, soptions,
internal_prefix.Encode(), level_iter)) {
delete level_iter;
// nothing in this level can match the prefix
return NewEmptyIterator();
}
}
return NewTwoLevelIterator(level_iter, &GetFileIterator, cfd_->table_cache(),
options, soptions, cfd_->internal_comparator());
}

@ -31,7 +31,7 @@ static std::string PrintContents(WriteBatch* b) {
ColumnFamilyMemTablesDefault cf_mems_default(mem, &options);
Status s = WriteBatchInternal::InsertInto(b, &cf_mems_default);
int count = 0;
Iterator* iter = mem->NewIterator();
Iterator* iter = mem->NewIterator(ReadOptions());
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
ParsedInternalKey ikey;
memset((void *)&ikey, 0, sizeof(ikey));
@ -283,7 +283,7 @@ TEST(WriteBatchTest, PutGatherSlices) {
namespace {
class ColumnFamilyHandleImplDummy : public ColumnFamilyHandleImpl {
public:
ColumnFamilyHandleImplDummy(int id)
explicit ColumnFamilyHandleImplDummy(int id)
: ColumnFamilyHandleImpl(nullptr, nullptr, nullptr), id_(id) {}
uint32_t GetID() const override { return id_; }

@ -463,13 +463,9 @@ extern void rocksdb_readoptions_set_verify_checksums(
unsigned char);
extern void rocksdb_readoptions_set_fill_cache(
rocksdb_readoptions_t*, unsigned char);
extern void rocksdb_readoptions_set_prefix_seek(
rocksdb_readoptions_t*, unsigned char);
extern void rocksdb_readoptions_set_snapshot(
rocksdb_readoptions_t*,
const rocksdb_snapshot_t*);
extern void rocksdb_readoptions_set_prefix(
rocksdb_readoptions_t*, const char* key, size_t keylen);
extern void rocksdb_readoptions_set_read_tier(
rocksdb_readoptions_t*, int);
extern void rocksdb_readoptions_set_tailing(

@ -148,13 +148,6 @@ class MemTableRep {
// GetIterator().
virtual Iterator* GetIterator(const Slice& user_key) { return GetIterator(); }
// Return an iterator over at least the keys with the specified prefix. The
// iterator may also allow access to other keys, but doesn't have to. Default:
// GetIterator().
virtual Iterator* GetPrefixIterator(const Slice& prefix) {
return GetIterator();
}
// Return an iterator that has a special Seek semantics. The result of
// a Seek might only include keys with the same prefix as the target key.
virtual Iterator* GetDynamicPrefixIterator() { return GetIterator(); }

@ -817,7 +817,10 @@ struct ReadOptions {
// If this option is set and memtable implementation allows, Seek
// might only return keys with the same prefix as the seek-key
bool prefix_seek;
//
// ! DEPRECATED: prefix_seek is on by default when prefix_extractor
// is configured
// bool prefix_seek;
// If "snapshot" is non-nullptr, read as of the supplied snapshot
// (which must belong to the DB that is being read and which must
@ -837,7 +840,9 @@ struct ReadOptions {
// prefix, and SeekToLast() is not supported. prefix filter with this
// option will sometimes reduce the number of read IOPs.
// Default: nullptr
const Slice* prefix;
//
// ! DEPRECATED
// const Slice* prefix;
// Specify if this read request should process data that ALREADY
// resides on a particular cache. If the required data is not
@ -856,17 +861,13 @@ struct ReadOptions {
ReadOptions()
: verify_checksums(true),
fill_cache(true),
prefix_seek(false),
snapshot(nullptr),
prefix(nullptr),
read_tier(kReadAllTier),
tailing(false) {}
ReadOptions(bool cksum, bool cache)
: verify_checksums(cksum),
fill_cache(cache),
prefix_seek(false),
snapshot(nullptr),
prefix(nullptr),
read_tier(kReadAllTier),
tailing(false) {}
};

@ -62,9 +62,7 @@ struct BlockBasedTableOptions {
kBinarySearch,
// The hash index, if enabled, will do the hash lookup when
// `ReadOption.prefix_seek == true`. User should also specify
// `Options.prefix_extractor` to allow the index block to correctly
// extract the prefix of the given key and perform hash table lookup.
// `Options.prefix_extractor` is provided.
kHashSearch,
};

@ -919,15 +919,6 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_prefix) {
}
Iterator* BlockBasedTable::NewIterator(const ReadOptions& options) {
if (options.prefix) {
InternalKey internal_prefix(*options.prefix, 0, kTypeValue);
if (!PrefixMayMatch(internal_prefix.Encode())) {
// nothing in this file can match the prefix, so we should not
// bother doing I/O to this file when iterating.
return NewEmptyIterator();
}
}
return NewTwoLevelIterator(NewIndexIterator(options),
&BlockBasedTable::DataBlockReader,
const_cast<BlockBasedTable*>(this), options,

@ -149,12 +149,8 @@ Status PlainTableReader::Open(
void PlainTableReader::SetupForCompaction() {
}
bool PlainTableReader::PrefixMayMatch(const Slice& internal_prefix) {
return true;
}
Iterator* PlainTableReader::NewIterator(const ReadOptions& options) {
return new PlainTableIterator(this, options.prefix_seek);
return new PlainTableIterator(this, options_.prefix_extractor != nullptr);
}
struct PlainTableReader::IndexRecord {

@ -2,8 +2,9 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#ifndef ROCKSDB_LITE
#pragma once
#ifndef ROCKSDB_LITE
#include <unordered_map>
#include <memory>
#include <vector>
@ -53,8 +54,6 @@ class PlainTableReader: public TableReader {
const int bloom_bits_per_key, double hash_table_ratio,
size_t index_sparseness);
bool PrefixMayMatch(const Slice& internal_prefix);
Iterator* NewIterator(const ReadOptions&);
Status Get(const ReadOptions&, const Slice& key, void* arg,

@ -29,7 +29,9 @@ class TableReader {
// contains the key a key starting with iternal_prefix. The specific
// table implementation can use bloom filter and/or other heuristic
// to filter out this table as a whole.
virtual bool PrefixMayMatch(const Slice& internal_prefix) = 0;
virtual bool PrefixMayMatch(const Slice& internal_prefix) {
return true;
}
// Returns a new iterator over the table contents.
// The result of NewIterator() is initially invalid (caller must

@ -68,8 +68,6 @@ void TableReaderBenchmark(Options& opts, EnvOptions& env_options,
bool through_db, bool measured_by_nanosecond) {
rocksdb::InternalKeyComparator ikc(opts.comparator);
Slice prefix = Slice();
std::string file_name = test::TmpDir()
+ "/rocksdb_table_reader_benchmark";
std::string dbname = test::TmpDir() + "/rocksdb_table_reader_bench_db";
@ -156,10 +154,6 @@ void TableReaderBenchmark(Options& opts, EnvOptions& env_options,
}
std::string start_key = MakeKey(r1, r2, through_db);
std::string end_key = MakeKey(r1, r2 + r2_len, through_db);
if (prefix_len < 16) {
prefix = Slice(start_key.data(), prefix_len);
read_options.prefix = &prefix;
}
uint64_t total_time = 0;
uint64_t start_time = Now(env, measured_by_nanosecond);
port::MemoryBarrier();
@ -254,7 +248,6 @@ int main(int argc, char** argv) {
options.compression = rocksdb::CompressionType::kNoCompression;
if (FLAGS_plain_table) {
ro.prefix_seek = true;
options.allow_mmap_reads = true;
env_options.use_mmap_reads = true;
tf = new rocksdb::PlainTableFactory(16, (FLAGS_prefix_len == 16) ? 0 : 8,

@ -307,11 +307,9 @@ class KeyConvertingIterator: public Iterator {
class TableConstructor: public Constructor {
public:
explicit TableConstructor(const Comparator* cmp,
bool convert_to_internal_key = false,
bool prefix_seek = false)
bool convert_to_internal_key = false)
: Constructor(cmp),
convert_to_internal_key_(convert_to_internal_key),
prefix_seek_(prefix_seek) {}
convert_to_internal_key_(convert_to_internal_key) {}
~TableConstructor() { Reset(); }
virtual Status FinishImpl(const Options& options,
@ -352,9 +350,6 @@ class TableConstructor: public Constructor {
virtual Iterator* NewIterator() const {
ReadOptions ro;
if (prefix_seek_) {
ro.prefix_seek = true;
}
Iterator* iter = table_reader_->NewIterator(ro);
if (convert_to_internal_key_) {
return new KeyConvertingIterator(iter);
@ -388,7 +383,6 @@ class TableConstructor: public Constructor {
source_.reset();
}
bool convert_to_internal_key_;
bool prefix_seek_;
uint64_t uniq_id_;
unique_ptr<StringSink> sink_;
@ -434,7 +428,7 @@ class MemTableConstructor: public Constructor {
return Status::OK();
}
virtual Iterator* NewIterator() const {
return new KeyConvertingIterator(memtable_->NewIterator());
return new KeyConvertingIterator(memtable_->NewIterator(ReadOptions()));
}
private:
@ -699,7 +693,7 @@ class Harness {
options_.prefix_extractor.reset(new FixedOrLessPrefixTransform(2));
options_.allow_mmap_reads = true;
options_.table_factory.reset(NewPlainTableFactory());
constructor_ = new TableConstructor(options_.comparator, true, true);
constructor_ = new TableConstructor(options_.comparator, true);
internal_comparator_.reset(
new InternalKeyComparator(options_.comparator));
break;
@ -709,7 +703,7 @@ class Harness {
options_.prefix_extractor.reset(NewNoopTransform());
options_.allow_mmap_reads = true;
options_.table_factory.reset(NewPlainTableFactory());
constructor_ = new TableConstructor(options_.comparator, true, true);
constructor_ = new TableConstructor(options_.comparator, true);
internal_comparator_.reset(
new InternalKeyComparator(options_.comparator));
break;
@ -719,7 +713,7 @@ class Harness {
options_.prefix_extractor = nullptr;
options_.allow_mmap_reads = true;
options_.table_factory.reset(NewTotalOrderPlainTableFactory());
constructor_ = new TableConstructor(options_.comparator, true, false);
constructor_ = new TableConstructor(options_.comparator, true);
internal_comparator_.reset(
new InternalKeyComparator(options_.comparator));
break;
@ -1667,7 +1661,7 @@ TEST(MemTableTest, Simple) {
ColumnFamilyMemTablesDefault cf_mems_default(memtable, &options);
ASSERT_TRUE(WriteBatchInternal::InsertInto(&batch, &cf_mems_default).ok());
Iterator* iter = memtable->NewIterator();
Iterator* iter = memtable->NewIterator(ReadOptions());
iter->SeekToFirst();
while (iter->Valid()) {
fprintf(stderr, "key: '%s' -> '%s'\n",

@ -1008,7 +1008,6 @@ class StressTest {
prefixes[i].resize(FLAGS_prefix_size);
prefix_slices[i] = Slice(prefixes[i]);
readoptionscopy[i] = readoptions;
readoptionscopy[i].prefix_seek = true;
readoptionscopy[i].snapshot = snapshot;
iters[i] = db_->NewIterator(readoptionscopy[i], column_family);
iters[i]->Seek(prefix_slices[i]);
@ -1074,7 +1073,6 @@ class StressTest {
const Snapshot* snapshot = db_->GetSnapshot();
ReadOptions readoptionscopy = readoptions;
readoptionscopy.snapshot = snapshot;
readoptionscopy.prefix_seek = FLAGS_prefix_size > 0;
unique_ptr<Iterator> iter(db_->NewIterator(readoptionscopy, column_family));
iter->Seek(key);
@ -1198,7 +1196,6 @@ class StressTest {
// prefix
if (!FLAGS_test_batches_snapshots) {
Slice prefix = Slice(key.data(), FLAGS_prefix_size);
read_opts.prefix_seek = true;
Iterator* iter = db_->NewIterator(read_opts, column_family);
int64_t count = 0;
for (iter->Seek(prefix);
@ -1277,7 +1274,6 @@ class StressTest {
}
if (!thread->rand.OneIn(2)) {
// Use iterator to verify this range
options.prefix_seek = FLAGS_prefix_size > 0;
unique_ptr<Iterator> iter(
db_->NewIterator(options, column_families_[cf]));
iter->Seek(Key(start));

@ -73,9 +73,6 @@ class HashLinkListRep : public MemTableRep {
virtual MemTableRep::Iterator* GetIterator(const Slice& slice) override;
virtual MemTableRep::Iterator* GetPrefixIterator(const Slice& prefix)
override;
virtual MemTableRep::Iterator* GetDynamicPrefixIterator() override;
private:
@ -429,19 +426,14 @@ MemTableRep::Iterator* HashLinkListRep::GetIterator() {
return new FullListIterator(list, new_arena);
}
MemTableRep::Iterator* HashLinkListRep::GetPrefixIterator(
const Slice& prefix) {
auto bucket = GetBucket(prefix);
MemTableRep::Iterator* HashLinkListRep::GetIterator(const Slice& slice) {
auto bucket = GetBucket(transform_->Transform(slice));
if (bucket == nullptr) {
return new EmptyIterator();
}
return new Iterator(this, bucket);
}
MemTableRep::Iterator* HashLinkListRep::GetIterator(const Slice& slice) {
return GetPrefixIterator(transform_->Transform(slice));
}
MemTableRep::Iterator* HashLinkListRep::GetDynamicPrefixIterator() {
return new DynamicIterator(*this);
}

@ -42,9 +42,6 @@ class HashSkipListRep : public MemTableRep {
virtual MemTableRep::Iterator* GetIterator(const Slice& slice) override;
virtual MemTableRep::Iterator* GetPrefixIterator(const Slice& prefix)
override;
virtual MemTableRep::Iterator* GetDynamicPrefixIterator() override;
private:
@ -307,18 +304,14 @@ MemTableRep::Iterator* HashSkipListRep::GetIterator() {
return new Iterator(list, true, new_arena);
}
MemTableRep::Iterator* HashSkipListRep::GetPrefixIterator(const Slice& prefix) {
auto bucket = GetBucket(prefix);
MemTableRep::Iterator* HashSkipListRep::GetIterator(const Slice& slice) {
auto bucket = GetBucket(transform_->Transform(slice));
if (bucket == nullptr) {
return new EmptyIterator();
}
return new Iterator(bucket, false);
}
MemTableRep::Iterator* HashSkipListRep::GetIterator(const Slice& slice) {
return GetPrefixIterator(transform_->Transform(slice));
}
MemTableRep::Iterator* HashSkipListRep::GetDynamicPrefixIterator() {
return new DynamicIterator(*this);
}

Loading…
Cancel
Save