From ae29495e4b8496dc6111d5c95f5c22be2d5c5d62 Mon Sep 17 00:00:00 2001 From: Andres Notzli Date: Tue, 14 Jul 2015 00:21:41 -0700 Subject: [PATCH] Avoid manipulating const char* arrays Summary: We were manipulating `const char*` arrays in CompactionJob to change the sequence number/types of keys. This patch changes UpdateInternalKey() to use string methods to do the manipulation and updates all calls accordingly. Test Plan: Added test case for UpdateInternalKey() in dbformat_test. make && make check Reviewers: sdong, rven, yhchiang, igor Reviewed By: igor Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D41985 --- db/compaction_job.cc | 10 ++++------ db/dbformat.h | 15 +++++++++------ db/dbformat_test.cc | 19 +++++++++++++++++++ db/merge_helper.cc | 10 +++------- 4 files changed, 35 insertions(+), 19 deletions(-) diff --git a/db/compaction_job.cc b/db/compaction_job.cc index 722316c6e..c937684f6 100644 --- a/db/compaction_job.cc +++ b/db/compaction_job.cc @@ -790,7 +790,7 @@ Status CompactionJob::ProcessKeyValueCompaction(int64_t* imm_micros, // If we have a single key to write, simply write that key. while (true) { // Invariant: key,value,ikey will always be the next entry to write - char* kptr = (char*)key.data(); + Slice newkey(key.data(), key.size()); std::string kstr; // Zeroing out the sequence number leads to better compression. @@ -803,11 +803,10 @@ Status CompactionJob::ProcessKeyValueCompaction(int64_t* imm_micros, // make a copy because updating in place would cause problems // with the priority queue that is managing the input key iterator kstr.assign(key.data(), key.size()); - kptr = (char*)kstr.c_str(); - UpdateInternalKey(kptr, key.size(), (uint64_t)0, ikey.type); + UpdateInternalKey(&kstr, (uint64_t)0, ikey.type); + newkey = Slice(kstr); } - Slice newkey(kptr, key.size()); assert((key.clear(), 1)); // we do not need 'key' anymore // Open output file if necessary @@ -953,8 +952,7 @@ void CompactionJob::CallCompactionFilterV2( if (compact_->to_delete_buf_[i]) { // update the string buffer directly // the Slice buffer points to the updated buffer - UpdateInternalKey(&compact_->key_str_buf_[i][0], - compact_->key_str_buf_[i].size(), ikey_buf[i].sequence, + UpdateInternalKey(&compact_->key_str_buf_[i], ikey_buf[i].sequence, kTypeDeletion); // no value associated with delete diff --git a/db/dbformat.h b/db/dbformat.h index 7696aa012..f54c8e226 100644 --- a/db/dbformat.h +++ b/db/dbformat.h @@ -195,14 +195,17 @@ inline bool ParseInternalKey(const Slice& internal_key, return (c <= static_cast(kValueTypeForSeek)); } -// Update the sequence number in the internal key -inline void UpdateInternalKey(char* internal_key, - const size_t internal_key_size, +// Update the sequence number in the internal key. +// Guarantees not to invalidate ikey.data(). +inline void UpdateInternalKey(std::string* ikey, uint64_t seq, ValueType t) { - assert(internal_key_size >= 8); - char* seqtype = internal_key + internal_key_size - 8; + size_t ikey_sz = ikey->size(); + assert(ikey_sz >= 8); uint64_t newval = (seq << 8) | t; - EncodeFixed64(seqtype, newval); + + // Note: Since C++11, strings are guaranteed to be stored contiguously and + // string::operator[]() is guaranteed not to change ikey.data(). + EncodeFixed64(&(*ikey)[ikey_sz - 8], newval); } // Get the sequence number from the internal key diff --git a/db/dbformat_test.cc b/db/dbformat_test.cc index 56e292742..0273dd062 100644 --- a/db/dbformat_test.cc +++ b/db/dbformat_test.cc @@ -149,6 +149,25 @@ TEST_F(FormatTest, IterKeyOperation) { "abcdefghijklmnopqrstuvwxyz")); } +TEST_F(FormatTest, UpdateInternalKey) { + std::string user_key("abcdefghijklmnopqrstuvwxyz"); + uint64_t new_seq = 0x123456; + ValueType new_val_type = kTypeDeletion; + + std::string ikey; + AppendInternalKey(&ikey, ParsedInternalKey(user_key, 100U, kTypeValue)); + size_t ikey_size = ikey.size(); + UpdateInternalKey(&ikey, new_seq, new_val_type); + ASSERT_EQ(ikey_size, ikey.size()); + + Slice in(ikey); + ParsedInternalKey decoded; + ASSERT_TRUE(ParseInternalKey(in, &decoded)); + ASSERT_EQ(user_key, decoded.user_key.ToString()); + ASSERT_EQ(new_seq, decoded.sequence); + ASSERT_EQ(new_val_type, decoded.type); +} + } // namespace rocksdb int main(int argc, char** argv) { diff --git a/db/merge_helper.cc b/db/merge_helper.cc index a10d46c4b..bcccf9d3a 100644 --- a/db/merge_helper.cc +++ b/db/merge_helper.cc @@ -128,8 +128,7 @@ void MergeHelper::MergeUntil(Iterator* iter, SequenceNumber stop_before, std::string& original_key = keys_.back(); // The original key encountered orig_ikey.type = kTypeValue; - UpdateInternalKey(&original_key[0], original_key.size(), - orig_ikey.sequence, orig_ikey.type); + UpdateInternalKey(&original_key, orig_ikey.sequence, orig_ikey.type); operands_.back() = std::move(merge_result); } @@ -159,8 +158,7 @@ void MergeHelper::MergeUntil(Iterator* iter, SequenceNumber stop_before, std::string& original_key = keys_.back(); // The original key encountered orig_ikey.type = kTypeValue; - UpdateInternalKey(&original_key[0], original_key.size(), - orig_ikey.sequence, orig_ikey.type); + UpdateInternalKey(&original_key, orig_ikey.sequence, orig_ikey.type); operands_.back() = std::move(merge_result); } @@ -223,9 +221,7 @@ void MergeHelper::MergeUntil(Iterator* iter, SequenceNumber stop_before, if (success_) { std::string& original_key = keys_.back(); // The original key encountered orig_ikey.type = kTypeValue; - UpdateInternalKey(&original_key[0], original_key.size(), - orig_ikey.sequence, orig_ikey.type); - + UpdateInternalKey(&original_key, orig_ikey.sequence, orig_ikey.type); operands_.back() = std::move(merge_result); } else { RecordTick(stats, NUMBER_MERGE_FAILURES);