Better support for merge operation with data block hash index (#11356)

Summary:
when data block hash index finds a key of op_type `kTypeMerge`, do not redo data block seek.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/11356

Test Plan:
- added new unit test
- crash test: `python3 tools/db_crashtest.py whitebox --simple --use_merge=1 --data_block_index_type=1`
- benchmark see slight improvement in read throughput:
```
TEST_TMPDIR=/dev/shm/hashindex ./db_bench -benchmarks=mergerandom -use_existing_db=false -num=10000000 -compression_type=none -level_compaction_dynamic_level_bytes=1 -merge_operator=PutOperator -write_buffer_size=1000000 --use_data_block_hash_index=1

TEST_TMPDIR=/dev/shm/hashindex ./db_bench -benchmarks=readrandom[-X10] -use_existing_db=true -num=10000000 -merge_operator=PutOperator -readonly=1 -disable_auto_compactions=1 -reads=100000

Main: readrandom [AVG 10 runs] : 29526 (± 1118) ops/sec;    2.1 (± 0.1) MB/sec
Post-PR: readrandom [AVG 10 runs] : 31095 (± 662) ops/sec;    2.2 (± 0.0) MB/sec
```

Reviewed By: pdillinger

Differential Revision: D44759895

Pulled By: cbi42

fbshipit-source-id: 387f0c35938c7e0e96b810ca3babf1967fc68191
oxigraph-8.3.2
Changyu Bi 2 years ago committed by Facebook GitHub Bot
parent 0578d9f951
commit f631138e1c
  1. 42
      db/db_merge_operator_test.cc
  2. 8
      table/block_based/block.cc

@ -358,6 +358,48 @@ TEST_F(DBMergeOperatorTest, MergeOperatorFailsWithMustMerge) {
} }
} }
TEST_F(DBMergeOperatorTest, DataBlockBinaryAndHash) {
// Basic test to check that merge operator works with data block index type
// DataBlockBinaryAndHash.
Options options;
options.create_if_missing = true;
options.merge_operator.reset(new TestPutOperator());
options.env = env_;
BlockBasedTableOptions table_options;
table_options.block_restart_interval = 16;
table_options.data_block_index_type =
BlockBasedTableOptions::DataBlockIndexType::kDataBlockBinaryAndHash;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
DestroyAndReopen(options);
const int kNumKeys = 100;
for (int i = 0; i < kNumKeys; ++i) {
ASSERT_OK(db_->Merge(WriteOptions(), Key(i), std::to_string(i)));
}
ASSERT_OK(Flush());
std::string value;
for (int i = 0; i < kNumKeys; ++i) {
ASSERT_OK(db_->Get(ReadOptions(), Key(i), &value));
ASSERT_EQ(std::to_string(i), value);
}
std::vector<const Snapshot*> snapshots;
for (int i = 0; i < kNumKeys; ++i) {
ASSERT_OK(db_->Delete(WriteOptions(), Key(i)));
for (int j = 0; j < 3; ++j) {
ASSERT_OK(db_->Merge(WriteOptions(), Key(i), std::to_string(i * 3 + j)));
snapshots.push_back(db_->GetSnapshot());
}
}
ASSERT_OK(Flush());
for (int i = 0; i < kNumKeys; ++i) {
ASSERT_OK(db_->Get(ReadOptions(), Key(i), &value));
ASSERT_EQ(std::to_string(i * 3 + 2), value);
}
for (auto snapshot : snapshots) {
db_->ReleaseSnapshot(snapshot);
}
}
class MergeOperatorPinningTest : public DBMergeOperatorTest, class MergeOperatorPinningTest : public DBMergeOperatorTest,
public testing::WithParamInterface<bool> { public testing::WithParamInterface<bool> {

@ -307,11 +307,11 @@ void MetaBlockIter::SeekImpl(const Slice& target) {
// target = "seek_user_key @ type | seqno". // target = "seek_user_key @ type | seqno".
// //
// For any type other than kTypeValue, kTypeDeletion, kTypeSingleDeletion, // For any type other than kTypeValue, kTypeDeletion, kTypeSingleDeletion,
// kTypeBlobIndex, or kTypeWideColumnEntity, this function behaves identically // kTypeBlobIndex, kTypeWideColumnEntity or kTypeMerge, this function behaves
// to Seek(). // identically to Seek().
// //
// For any type in kTypeValue, kTypeDeletion, kTypeSingleDeletion, // For any type in kTypeValue, kTypeDeletion, kTypeSingleDeletion,
// kTypeBlobIndex, or kTypeWideColumnEntity: // kTypeBlobIndex, kTypeWideColumnEntity, or kTypeMerge:
// //
// If the return value is FALSE, iter location is undefined, and it means: // If the return value is FALSE, iter location is undefined, and it means:
// 1) there is no key in this block falling into the range: // 1) there is no key in this block falling into the range:
@ -412,11 +412,11 @@ bool DataBlockIter::SeekForGetImpl(const Slice& target) {
ValueType value_type = ExtractValueType(raw_key_.GetInternalKey()); ValueType value_type = ExtractValueType(raw_key_.GetInternalKey());
if (value_type != ValueType::kTypeValue && if (value_type != ValueType::kTypeValue &&
value_type != ValueType::kTypeDeletion && value_type != ValueType::kTypeDeletion &&
value_type != ValueType::kTypeMerge &&
value_type != ValueType::kTypeSingleDeletion && value_type != ValueType::kTypeSingleDeletion &&
value_type != ValueType::kTypeBlobIndex && value_type != ValueType::kTypeBlobIndex &&
value_type != ValueType::kTypeWideColumnEntity) { value_type != ValueType::kTypeWideColumnEntity) {
SeekImpl(target); SeekImpl(target);
return true;
} }
// Result found, and the iter is correctly set. // Result found, and the iter is correctly set.

Loading…
Cancel
Save