diff --git a/HISTORY.md b/HISTORY.md index 2d3ec6926..df4bc3e4f 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -8,6 +8,7 @@ * Fix a bug that can cause unnecessary bg thread to be scheduled(#6104). * Fix a bug in which a snapshot read could be affected by a DeleteRange after the snapshot (#6062). * Fixed two performance issues related to memtable history trimming. First, a new SuperVersion is now created only if some memtables were actually trimmed. Second, trimming is only scheduled if there is at least one flushed memtable that is kept in memory for the purposes of transaction conflict checking. +* Fix a bug in WriteBatchWithIndex::MultiGetFromBatchAndDB, which is called by Transaction::MultiGet, that causes due to stale pointer access when the number of keys is > 32 ## 6.6.0 (11/25/2019) ### Bug Fixes diff --git a/utilities/transactions/transaction_test.cc b/utilities/transactions/transaction_test.cc index fd8490119..da1edd185 100644 --- a/utilities/transactions/transaction_test.cc +++ b/utilities/transactions/transaction_test.cc @@ -2823,6 +2823,104 @@ TEST_P(TransactionTest, MultiGetBatchedTest) { } } +// This test calls WriteBatchWithIndex::MultiGetFromBatchAndDB with a large +// number of keys, i.e greater than MultiGetContext::MAX_BATCH_SIZE, which is +// is 32. This forces autovector allocations in the MultiGet code paths +// to use std::vector in addition to stack allocations. The MultiGet keys +// includes Merges, which are handled specially in MultiGetFromBatchAndDB by +// allocating an autovector of MergeContexts +TEST_P(TransactionTest, MultiGetLargeBatchedTest) { + WriteOptions write_options; + ReadOptions read_options, snapshot_read_options; + string value; + Status s; + + ColumnFamilyHandle* cf; + ColumnFamilyOptions cf_options; + + std::vector key_str; + for (int i = 0; i < 100; ++i) { + key_str.emplace_back(std::to_string(i)); + } + // Create a new column families + s = db->CreateColumnFamily(cf_options, "CF", &cf); + ASSERT_OK(s); + + delete cf; + delete db; + db = nullptr; + + // open DB with three column families + std::vector column_families; + // have to open default column family + column_families.push_back( + ColumnFamilyDescriptor(kDefaultColumnFamilyName, ColumnFamilyOptions())); + // open the new column families + cf_options.merge_operator = MergeOperators::CreateStringAppendOperator(); + column_families.push_back(ColumnFamilyDescriptor("CF", cf_options)); + + std::vector handles; + + options.merge_operator = MergeOperators::CreateStringAppendOperator(); + ASSERT_OK(ReOpenNoDelete(column_families, &handles)); + assert(db != nullptr); + + // Write some data to the db + WriteBatch batch; + for (int i = 0; i < 3 * MultiGetContext::MAX_BATCH_SIZE; ++i) { + std::string val = "val" + std::to_string(i); + batch.Put(handles[1], key_str[i], val); + } + s = db->Write(write_options, &batch); + ASSERT_OK(s); + + WriteBatchWithIndex wb; + // Write some data to the db + s = wb.Delete(handles[1], std::to_string(1)); + ASSERT_OK(s); + s = wb.Put(handles[1], std::to_string(2), "new_val" + std::to_string(2)); + ASSERT_OK(s); + // Write a lot of merges so when we call MultiGetFromBatchAndDB later on, + // it is forced to use std::vector in rocksdb::autovector to allocate + // MergeContexts. The number of merges needs to be > + // MultiGetContext::MAX_BATCH_SIZE + for (int i = 8; i < MultiGetContext::MAX_BATCH_SIZE + 24; ++i) { + s = wb.Merge(handles[1], std::to_string(i), "merge"); + ASSERT_OK(s); + } + + // MultiGet a lot of keys in order to force std::vector reallocations + std::vector keys; + for (int i = 0; i < MultiGetContext::MAX_BATCH_SIZE + 32; ++i) { + keys.emplace_back(key_str[i]); + } + std::vector values(keys.size()); + std::vector statuses(keys.size()); + + wb.MultiGetFromBatchAndDB(db, snapshot_read_options, handles[1], keys.size(), keys.data(), + values.data(), statuses.data(), false); + for (size_t i =0; i < keys.size(); ++i) { + if (i == 1) { + ASSERT_TRUE(statuses[1].IsNotFound()); + } else if (i == 2) { + ASSERT_TRUE(statuses[2].ok()); + ASSERT_EQ(values[2], "new_val" + std::to_string(2)); + } else if (i >= 8 && i < 56) { + ASSERT_TRUE(statuses[i].ok()); + ASSERT_EQ(values[i], "val" + std::to_string(i) + ",merge"); + } else { + ASSERT_TRUE(statuses[i].ok()); + if (values[i] != "val" + std::to_string(i)) { + ASSERT_EQ(values[i], "val" + std::to_string(i)); + } + } + } + + for (auto handle : handles) { + delete handle; + } +} + TEST_P(TransactionTest, ColumnFamiliesTest2) { WriteOptions write_options; ReadOptions read_options, snapshot_read_options; diff --git a/utilities/write_batch_with_index/write_batch_with_index.cc b/utilities/write_batch_with_index/write_batch_with_index.cc index fe99e43e6..54fb2292c 100644 --- a/utilities/write_batch_with_index/write_batch_with_index.cc +++ b/utilities/write_batch_with_index/write_batch_with_index.cc @@ -1004,10 +1004,13 @@ void WriteBatchWithIndex::MultiGetFromBatchAndDB( assert(result == WriteBatchWithIndexInternal::Result::kMergeInProgress || result == WriteBatchWithIndexInternal::Result::kNotFound); key_context.emplace_back(column_family, keys[i], &values[i], &statuses[i]); - sorted_keys.emplace_back(&key_context.back()); merges.emplace_back(result, std::move(merge_context)); } + for (KeyContext& key : key_context) { + sorted_keys.emplace_back(&key); + } + // Did not find key in batch OR could not resolve Merges. Try DB. static_cast_with_check(db->GetRootDB()) ->PrepareMultiGetKeys(key_context.size(), sorted_input, &sorted_keys);