From f631138e1cf3b44b119911662a6fea0e10772507 Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Fri, 7 Apr 2023 10:06:03 -0700 Subject: [PATCH] Better support for merge operation with data block hash index (#11356) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: when data block hash index finds a key of op_type `kTypeMerge`, do not redo data block seek. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11356 Test Plan: - added new unit test - crash test: `python3 tools/db_crashtest.py whitebox --simple --use_merge=1 --data_block_index_type=1` - benchmark see slight improvement in read throughput: ``` TEST_TMPDIR=/dev/shm/hashindex ./db_bench -benchmarks=mergerandom -use_existing_db=false -num=10000000 -compression_type=none -level_compaction_dynamic_level_bytes=1 -merge_operator=PutOperator -write_buffer_size=1000000 --use_data_block_hash_index=1 TEST_TMPDIR=/dev/shm/hashindex ./db_bench -benchmarks=readrandom[-X10] -use_existing_db=true -num=10000000 -merge_operator=PutOperator -readonly=1 -disable_auto_compactions=1 -reads=100000 Main: readrandom [AVG 10 runs] : 29526 (± 1118) ops/sec; 2.1 (± 0.1) MB/sec Post-PR: readrandom [AVG 10 runs] : 31095 (± 662) ops/sec; 2.2 (± 0.0) MB/sec ``` Reviewed By: pdillinger Differential Revision: D44759895 Pulled By: cbi42 fbshipit-source-id: 387f0c35938c7e0e96b810ca3babf1967fc68191 --- db/db_merge_operator_test.cc | 42 ++++++++++++++++++++++++++++++++++++ table/block_based/block.cc | 8 +++---- 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/db/db_merge_operator_test.cc b/db/db_merge_operator_test.cc index 19c7bd1e8..0d2687ca5 100644 --- a/db/db_merge_operator_test.cc +++ b/db/db_merge_operator_test.cc @@ -358,6 +358,48 @@ TEST_F(DBMergeOperatorTest, MergeOperatorFailsWithMustMerge) { } } +TEST_F(DBMergeOperatorTest, DataBlockBinaryAndHash) { + // Basic test to check that merge operator works with data block index type + // DataBlockBinaryAndHash. + Options options; + options.create_if_missing = true; + options.merge_operator.reset(new TestPutOperator()); + options.env = env_; + BlockBasedTableOptions table_options; + table_options.block_restart_interval = 16; + table_options.data_block_index_type = + BlockBasedTableOptions::DataBlockIndexType::kDataBlockBinaryAndHash; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + DestroyAndReopen(options); + + const int kNumKeys = 100; + for (int i = 0; i < kNumKeys; ++i) { + ASSERT_OK(db_->Merge(WriteOptions(), Key(i), std::to_string(i))); + } + ASSERT_OK(Flush()); + std::string value; + for (int i = 0; i < kNumKeys; ++i) { + ASSERT_OK(db_->Get(ReadOptions(), Key(i), &value)); + ASSERT_EQ(std::to_string(i), value); + } + + std::vector snapshots; + for (int i = 0; i < kNumKeys; ++i) { + ASSERT_OK(db_->Delete(WriteOptions(), Key(i))); + for (int j = 0; j < 3; ++j) { + ASSERT_OK(db_->Merge(WriteOptions(), Key(i), std::to_string(i * 3 + j))); + snapshots.push_back(db_->GetSnapshot()); + } + } + ASSERT_OK(Flush()); + for (int i = 0; i < kNumKeys; ++i) { + ASSERT_OK(db_->Get(ReadOptions(), Key(i), &value)); + ASSERT_EQ(std::to_string(i * 3 + 2), value); + } + for (auto snapshot : snapshots) { + db_->ReleaseSnapshot(snapshot); + } +} class MergeOperatorPinningTest : public DBMergeOperatorTest, public testing::WithParamInterface { diff --git a/table/block_based/block.cc b/table/block_based/block.cc index 7eb0b010f..b9b5d6e7e 100644 --- a/table/block_based/block.cc +++ b/table/block_based/block.cc @@ -307,11 +307,11 @@ void MetaBlockIter::SeekImpl(const Slice& target) { // target = "seek_user_key @ type | seqno". // // For any type other than kTypeValue, kTypeDeletion, kTypeSingleDeletion, -// kTypeBlobIndex, or kTypeWideColumnEntity, this function behaves identically -// to Seek(). +// kTypeBlobIndex, kTypeWideColumnEntity or kTypeMerge, this function behaves +// identically to Seek(). // // For any type in kTypeValue, kTypeDeletion, kTypeSingleDeletion, -// kTypeBlobIndex, or kTypeWideColumnEntity: +// kTypeBlobIndex, kTypeWideColumnEntity, or kTypeMerge: // // If the return value is FALSE, iter location is undefined, and it means: // 1) there is no key in this block falling into the range: @@ -412,11 +412,11 @@ bool DataBlockIter::SeekForGetImpl(const Slice& target) { ValueType value_type = ExtractValueType(raw_key_.GetInternalKey()); if (value_type != ValueType::kTypeValue && value_type != ValueType::kTypeDeletion && + value_type != ValueType::kTypeMerge && value_type != ValueType::kTypeSingleDeletion && value_type != ValueType::kTypeBlobIndex && value_type != ValueType::kTypeWideColumnEntity) { SeekImpl(target); - return true; } // Result found, and the iter is correctly set.