Reduce binary search when reseek into the same data block (#5256)

Summary:
Right now, when Seek() is called again, RocksDB always does a binary search against the files and index blocks, even if they end up with the same file/block. Improve it as following:
1. in LevelIterator, reseek first try to check the boundary of the current file. If it falls into the same file, skip the binary search to find the file
2. in block based table iterator, reseek skip to reseek the iterator block if the seek key is larger than the current key and lower than the index key (boundary of the current block and the next block).
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5256

Differential Revision: D15105072

Pulled By: siying

fbshipit-source-id: 39634bdb4a881082451fa39cecd7ecf12160bf80
main
Siying Dong 6 years ago committed by Facebook Github Bot
parent 4e0f2aadb0
commit 4479dff208
  1. 3
      HISTORY.md
  2. 98
      db/db_iterator_test.cc
  3. 18
      db/version_set.cc
  4. 1
      table/block.cc
  5. 26
      table/block_based_table_reader.cc

@ -3,6 +3,9 @@
### Public API Change ### Public API Change
* Now DB::Close() will return Aborted() error when there is unreleased snapshot. Users can retry after all snapshots are released. * Now DB::Close() will return Aborted() error when there is unreleased snapshot. Users can retry after all snapshots are released.
### New Features
* Reduce binary search when iterator reseek into the same data block.
## 6.2.0 (4/30/2019) ## 6.2.0 (4/30/2019)
### New Features ### New Features
* Add an option `strict_bytes_per_sync` that causes a file-writing thread to block rather than exceed the limit on bytes pending writeback specified by `bytes_per_sync` or `wal_bytes_per_sync`. * Add an option `strict_bytes_per_sync` that causes a file-writing thread to block rather than exceed the limit on bytes pending writeback specified by `bytes_per_sync` or `wal_bytes_per_sync`.

@ -2450,6 +2450,104 @@ TEST_P(DBIteratorTest, SeekBackwardAfterOutOfUpperBound) {
ASSERT_EQ("a", it->key().ToString()); ASSERT_EQ("a", it->key().ToString());
} }
TEST_P(DBIteratorTest, AvoidReseekLevelIterator) {
Options options = CurrentOptions();
options.compression = CompressionType::kNoCompression;
BlockBasedTableOptions table_options;
table_options.block_size = 800;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
Reopen(options);
Random rnd(301);
std::string random_str = RandomString(&rnd, 180);
ASSERT_OK(Put("1", random_str));
ASSERT_OK(Put("2", random_str));
ASSERT_OK(Put("3", random_str));
ASSERT_OK(Put("4", random_str));
// A new block
ASSERT_OK(Put("5", random_str));
ASSERT_OK(Put("6", random_str));
ASSERT_OK(Put("7", random_str));
ASSERT_OK(Flush());
ASSERT_OK(Put("8", random_str));
ASSERT_OK(Put("9", random_str));
ASSERT_OK(Flush());
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
int num_find_file_in_level = 0;
int num_idx_blk_seek = 0;
SyncPoint::GetInstance()->SetCallBack(
"LevelIterator::Seek:BeforeFindFile",
[&](void* /*arg*/) { num_find_file_in_level++; });
SyncPoint::GetInstance()->SetCallBack(
"IndexBlockIter::Seek:0", [&](void* /*arg*/) { num_idx_blk_seek++; });
SyncPoint::GetInstance()->EnableProcessing();
{
std::unique_ptr<Iterator> iter(NewIterator(ReadOptions()));
iter->Seek("1");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(1, num_find_file_in_level);
ASSERT_EQ(1, num_idx_blk_seek);
iter->Seek("2");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(1, num_find_file_in_level);
ASSERT_EQ(1, num_idx_blk_seek);
iter->Seek("3");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(1, num_find_file_in_level);
ASSERT_EQ(1, num_idx_blk_seek);
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(1, num_find_file_in_level);
ASSERT_EQ(1, num_idx_blk_seek);
iter->Seek("5");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(1, num_find_file_in_level);
ASSERT_EQ(2, num_idx_blk_seek);
iter->Seek("6");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(1, num_find_file_in_level);
ASSERT_EQ(2, num_idx_blk_seek);
iter->Seek("7");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(1, num_find_file_in_level);
ASSERT_EQ(3, num_idx_blk_seek);
iter->Seek("8");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(2, num_find_file_in_level);
// Still re-seek because "8" is the boundary key, which has
// the same user key as the seek key.
ASSERT_EQ(4, num_idx_blk_seek);
iter->Seek("5");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(3, num_find_file_in_level);
ASSERT_EQ(5, num_idx_blk_seek);
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(3, num_find_file_in_level);
ASSERT_EQ(5, num_idx_blk_seek);
// Seek backward never triggers the index block seek to be skipped
iter->Seek("5");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(3, num_find_file_in_level);
ASSERT_EQ(6, num_idx_blk_seek);
}
SyncPoint::GetInstance()->DisableProcessing();
}
INSTANTIATE_TEST_CASE_P(DBIteratorTestInstance, DBIteratorTest, INSTANTIATE_TEST_CASE_P(DBIteratorTestInstance, DBIteratorTest,
testing::Values(true, false)); testing::Values(true, false));

@ -1007,9 +1007,25 @@ class LevelIterator final : public InternalIterator {
}; };
void LevelIterator::Seek(const Slice& target) { void LevelIterator::Seek(const Slice& target) {
// Check whether the seek key fall under the same file
bool need_to_reseek = true;
if (file_iter_.iter() != nullptr && file_index_ < flevel_->num_files) {
const FdWithKeyRange& cur_file = flevel_->files[file_index_];
if (icomparator_.InternalKeyComparator::Compare(
target, cur_file.largest_key) <= 0 &&
icomparator_.InternalKeyComparator::Compare(
target, cur_file.smallest_key) >= 0) {
need_to_reseek = false;
assert(static_cast<size_t>(FindFile(icomparator_, *flevel_, target)) ==
file_index_);
}
}
if (need_to_reseek) {
TEST_SYNC_POINT("LevelIterator::Seek:BeforeFindFile");
size_t new_file_index = FindFile(icomparator_, *flevel_, target); size_t new_file_index = FindFile(icomparator_, *flevel_, target);
InitFileIterator(new_file_index); InitFileIterator(new_file_index);
}
if (file_iter_.iter() != nullptr) { if (file_iter_.iter() != nullptr) {
file_iter_.Seek(target); file_iter_.Seek(target);
} }

@ -381,6 +381,7 @@ bool DataBlockIter::SeekForGetImpl(const Slice& target) {
} }
void IndexBlockIter::Seek(const Slice& target) { void IndexBlockIter::Seek(const Slice& target) {
TEST_SYNC_POINT("IndexBlockIter::Seek:0");
Slice seek_key = target; Slice seek_key = target;
if (!key_includes_seq_) { if (!key_includes_seq_) {
seek_key = ExtractUserKey(target); seek_key = ExtractUserKey(target);

@ -2334,16 +2334,34 @@ void BlockBasedTableIterator<TBlockIter, TValue>::Seek(const Slice& target) {
return; return;
} }
SavePrevIndexValue(); bool need_seek_index = true;
if (block_iter_points_to_real_block_) {
// Reseek.
prev_index_value_ = index_iter_->value();
// We can avoid an index seek if:
// 1. The new seek key is larger than the current key
// 2. The new seek key is within the upper bound of the block
// Since we don't necessarily know the internal key for either
// the current key or the upper bound, we check user keys and
// exclude the equality case. Considering internal keys can
// improve for the boundary cases, but it would complicate the
// code.
if (user_comparator_.Compare(ExtractUserKey(target),
block_iter_.user_key()) > 0 &&
user_comparator_.Compare(ExtractUserKey(target),
index_iter_->user_key()) < 0) {
need_seek_index = false;
}
}
if (need_seek_index) {
index_iter_->Seek(target); index_iter_->Seek(target);
if (!index_iter_->Valid()) { if (!index_iter_->Valid()) {
ResetDataIter(); ResetDataIter();
return; return;
} }
InitDataBlock(); InitDataBlock();
}
block_iter_.Seek(target); block_iter_.Seek(target);

Loading…
Cancel
Save