diff --git a/db/db_test2.cc b/db/db_test2.cc index 117809ce0..718dbe625 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -1644,66 +1644,80 @@ size_t GetEncodedEntrySize(size_t key_size, size_t value_size) { TEST_F(DBTest2, ReadAmpBitmap) { Options options = CurrentOptions(); BlockBasedTableOptions bbto; - // Disable delta encoding to make it easier to calculate read amplification - bbto.use_delta_encoding = false; - // Huge block cache to make it easier to calculate read amplification - bbto.block_cache = NewLRUCache(1024 * 1024 * 1024); - bbto.read_amp_bytes_per_bit = 16; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - options.statistics = rocksdb::CreateDBStatistics(); - DestroyAndReopen(options); - - const size_t kNumEntries = 10000; + size_t bytes_per_bit[2] = {1, 16}; + for (size_t k = 0; k < 2; k++) { + // Disable delta encoding to make it easier to calculate read amplification + bbto.use_delta_encoding = false; + // Huge block cache to make it easier to calculate read amplification + bbto.block_cache = NewLRUCache(1024 * 1024 * 1024); + bbto.read_amp_bytes_per_bit = bytes_per_bit[k]; + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + options.statistics = rocksdb::CreateDBStatistics(); + DestroyAndReopen(options); - Random rnd(301); - for (size_t i = 0; i < kNumEntries; i++) { - ASSERT_OK(Put(Key(static_cast(i)), RandomString(&rnd, 100))); - } - ASSERT_OK(Flush()); + const size_t kNumEntries = 10000; - Close(); - Reopen(options); + Random rnd(301); + for (size_t i = 0; i < kNumEntries; i++) { + ASSERT_OK(Put(Key(static_cast(i)), RandomString(&rnd, 100))); + } + ASSERT_OK(Flush()); - // Read keys/values randomly and verify that reported read amp error - // is less than 2% - uint64_t total_useful_bytes = 0; - std::set read_keys; - std::string value; - for (size_t i = 0; i < kNumEntries * 5; i++) { - int key_idx = rnd.Next() % kNumEntries; - std::string k = Key(key_idx); - ASSERT_OK(db_->Get(ReadOptions(), k, &value)); + Close(); + Reopen(options); + + // Read keys/values randomly and verify that reported read amp error + // is less than 2% + uint64_t total_useful_bytes = 0; + std::set read_keys; + std::string value; + for (size_t i = 0; i < kNumEntries * 5; i++) { + int key_idx = rnd.Next() % kNumEntries; + std::string key = Key(key_idx); + ASSERT_OK(db_->Get(ReadOptions(), key, &value)); + + if (read_keys.find(key_idx) == read_keys.end()) { + auto internal_key = InternalKey(key, 0, ValueType::kTypeValue); + total_useful_bytes += + GetEncodedEntrySize(internal_key.size(), value.size()); + read_keys.insert(key_idx); + } - if (read_keys.find(key_idx) == read_keys.end()) { - auto ik = InternalKey(k, 0, ValueType::kTypeValue); - total_useful_bytes += GetEncodedEntrySize(ik.size(), value.size()); - read_keys.insert(key_idx); - } + double expected_read_amp = + static_cast(total_useful_bytes) / + options.statistics->getTickerCount(READ_AMP_TOTAL_READ_BYTES); - double expected_read_amp = - static_cast(total_useful_bytes) / - options.statistics->getTickerCount(READ_AMP_TOTAL_READ_BYTES); + double read_amp = + static_cast(options.statistics->getTickerCount( + READ_AMP_ESTIMATE_USEFUL_BYTES)) / + options.statistics->getTickerCount(READ_AMP_TOTAL_READ_BYTES); - double read_amp = - static_cast(options.statistics->getTickerCount( - READ_AMP_ESTIMATE_USEFUL_BYTES)) / - options.statistics->getTickerCount(READ_AMP_TOTAL_READ_BYTES); + double error_pct = fabs(expected_read_amp - read_amp) * 100; + // Error between reported read amp and real read amp should be less than + // 2% + EXPECT_LE(error_pct, 2); + } - double error_pct = fabs(expected_read_amp - read_amp) * 100; - // Error between reported read amp and real read amp should be less than 2% - EXPECT_LE(error_pct, 2); - } + // Make sure we read every thing in the DB (which is smaller than our cache) + Iterator* iter = db_->NewIterator(ReadOptions()); + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + ASSERT_EQ(iter->value().ToString(), Get(iter->key().ToString())); + } + delete iter; - // Make sure we read every thing in the DB (which is smaller than our cache) - Iterator* iter = db_->NewIterator(ReadOptions()); - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ASSERT_EQ(iter->value().ToString(), Get(iter->key().ToString())); + // Read amp is on average 100% since we read all what we loaded in memory + if (k == 0) { + ASSERT_EQ( + options.statistics->getTickerCount(READ_AMP_ESTIMATE_USEFUL_BYTES), + options.statistics->getTickerCount(READ_AMP_TOTAL_READ_BYTES)); + } else { + ASSERT_NEAR( + options.statistics->getTickerCount(READ_AMP_ESTIMATE_USEFUL_BYTES) * + 1.0f / + options.statistics->getTickerCount(READ_AMP_TOTAL_READ_BYTES), + 1, .01); + } } - delete iter; - - // Read amp is 100% since we read all what we loaded in memory - ASSERT_EQ(options.statistics->getTickerCount(READ_AMP_ESTIMATE_USEFUL_BYTES), - options.statistics->getTickerCount(READ_AMP_TOTAL_READ_BYTES)); } #ifndef OS_SOLARIS // GetUniqueIdFromFile is not implemented diff --git a/table/block.h b/table/block.h index 9011d4d22..5ebbaeda0 100644 --- a/table/block.h +++ b/table/block.h @@ -29,7 +29,8 @@ #include "rocksdb/statistics.h" #include "table/block_prefix_index.h" #include "table/internal_iterator.h" - +#include "util/random.h" +#include "util/sync_point.h" #include "format.h" namespace rocksdb { @@ -46,7 +47,12 @@ class BlockReadAmpBitmap { public: explicit BlockReadAmpBitmap(size_t block_size, size_t bytes_per_bit, Statistics* statistics) - : bitmap_(nullptr), bytes_per_bit_pow_(0), statistics_(statistics) { + : bitmap_(nullptr), + bytes_per_bit_pow_(0), + statistics_(statistics), + rnd_( + Random::GetTLSInstance()->Uniform(static_cast(bytes_per_bit))) { + TEST_SYNC_POINT_CALLBACK("BlockReadAmpBitmap:rnd", &rnd_); assert(block_size > 0 && bytes_per_bit > 0); // convert bytes_per_bit to be a power of 2 @@ -56,62 +62,38 @@ class BlockReadAmpBitmap { // num_bits_needed = ceil(block_size / bytes_per_bit) size_t num_bits_needed = - (block_size >> static_cast(bytes_per_bit_pow_)) + - (block_size % (static_cast(1) - << static_cast(bytes_per_bit_pow_)) != - 0); + ((block_size - 1) >> bytes_per_bit_pow_) + 1; + assert(num_bits_needed > 0); // bitmap_size = ceil(num_bits_needed / kBitsPerEntry) - size_t bitmap_size = (num_bits_needed / kBitsPerEntry) + - (num_bits_needed % kBitsPerEntry != 0); + size_t bitmap_size = (num_bits_needed - 1) / kBitsPerEntry + 1; // Create bitmap and set all the bits to 0 bitmap_ = new std::atomic[bitmap_size]; memset(bitmap_, 0, bitmap_size * kBytesPersEntry); - RecordTick(GetStatistics(), READ_AMP_TOTAL_READ_BYTES, - num_bits_needed << bytes_per_bit_pow_); + RecordTick(GetStatistics(), READ_AMP_TOTAL_READ_BYTES, block_size); } ~BlockReadAmpBitmap() { delete[] bitmap_; } void Mark(uint32_t start_offset, uint32_t end_offset) { assert(end_offset >= start_offset); - - // Every new bit we set will bump this counter - uint32_t new_useful_bytes = 0; - // Index of first bit in mask (start_offset / bytes_per_bit) - uint32_t start_bit = start_offset >> bytes_per_bit_pow_; - // Index of last bit in mask (end_offset / bytes_per_bit) - uint32_t end_bit = end_offset >> bytes_per_bit_pow_; - // Index of middle bit (unique to this range) - uint32_t mid_bit = start_bit + 1; - - // It's guaranteed that ranges sent to Mark() wont overlap, this mean that - // we dont need to set the middle bits, we can simply set only one bit of - // the middle bits, and check this bit if we want to know if the whole - // range is set or not. - if (mid_bit < end_bit) { - if (GetAndSet(mid_bit) == 0) { - new_useful_bytes += (end_bit - mid_bit) << bytes_per_bit_pow_; - } else { - // If the middle bit is set, it's guaranteed that start and end bits - // are also set - return; - } - } else { - // This range dont have a middle bit, the whole range fall in 1 or 2 bits + // Index of first bit in mask + uint32_t start_bit = + (start_offset + (1 << bytes_per_bit_pow_) - rnd_ - 1) >> + bytes_per_bit_pow_; + // Index of last bit in mask + 1 + uint32_t exclusive_end_bit = + (end_offset + (1 << bytes_per_bit_pow_) - rnd_) >> bytes_per_bit_pow_; + if (start_bit >= exclusive_end_bit) { + return; } + assert(exclusive_end_bit > 0); if (GetAndSet(start_bit) == 0) { - new_useful_bytes += (1 << bytes_per_bit_pow_); - } - - if (GetAndSet(end_bit) == 0) { - new_useful_bytes += (1 << bytes_per_bit_pow_); - } - - if (new_useful_bytes > 0) { + uint32_t new_useful_bytes = (exclusive_end_bit - start_bit) + << bytes_per_bit_pow_; RecordTick(GetStatistics(), READ_AMP_ESTIMATE_USEFUL_BYTES, new_useful_bytes); } @@ -148,6 +130,7 @@ class BlockReadAmpBitmap { // this pointer maybe invalid, but the DB will update it to a valid pointer // by using SetStatistics() before calling Mark() std::atomic statistics_; + uint32_t rnd_; }; class Block { diff --git a/table/block_test.cc b/table/block_test.cc index 778326b6d..bcc2d69e3 100644 --- a/table/block_test.cc +++ b/table/block_test.cc @@ -228,18 +228,17 @@ class BlockReadAmpBitmapSlowAndAccurate { public: void Mark(size_t start_offset, size_t end_offset) { assert(end_offset >= start_offset); - marked_ranges_.emplace(end_offset, start_offset); } // Return true if any byte in this range was Marked - bool IsAnyInRangeMarked(size_t start_offset, size_t end_offset) { + bool IsPinMarked(size_t offset) { auto it = marked_ranges_.lower_bound( - std::make_pair(start_offset, static_cast(0))); + std::make_pair(offset, static_cast(0))); if (it == marked_ranges_.end()) { return false; } - return start_offset <= it->first && end_offset >= it->second; + return offset <= it->first && offset >= it->second; } private: @@ -247,6 +246,12 @@ class BlockReadAmpBitmapSlowAndAccurate { }; TEST_F(BlockTest, BlockReadAmpBitmap) { + uint32_t pin_offset = 0; + SyncPoint::GetInstance()->SetCallBack( + "BlockReadAmpBitmap:rnd", [&pin_offset](void* arg) { + pin_offset = *(static_cast(arg)); + }); + SyncPoint::GetInstance()->EnableProcessing(); std::vector block_sizes = { 1, // 1 byte 32, // 32 bytes @@ -279,10 +284,8 @@ TEST_F(BlockTest, BlockReadAmpBitmap) { if (needed_bits % 32 != 0) { bitmap_size++; } - size_t bits_in_bitmap = bitmap_size * 32; - ASSERT_EQ(stats->getTickerCount(READ_AMP_TOTAL_READ_BYTES), - needed_bits * kBytesPerBit); + ASSERT_EQ(stats->getTickerCount(READ_AMP_TOTAL_READ_BYTES), block_size); // Generate some random entries std::vector random_entry_offsets; @@ -316,20 +319,18 @@ TEST_F(BlockTest, BlockReadAmpBitmap) { current_entry.second); size_t total_bits = 0; - for (size_t bit_idx = 0; bit_idx < bits_in_bitmap; bit_idx++) { - size_t start_rng = bit_idx * kBytesPerBit; - size_t end_rng = (start_rng + kBytesPerBit) - 1; - - total_bits += - read_amp_slow_and_accurate.IsAnyInRangeMarked(start_rng, end_rng); + for (size_t bit_idx = 0; bit_idx < needed_bits; bit_idx++) { + total_bits += read_amp_slow_and_accurate.IsPinMarked( + bit_idx * kBytesPerBit + pin_offset); } size_t expected_estimate_useful = total_bits * kBytesPerBit; size_t got_estimate_useful = - stats->getTickerCount(READ_AMP_ESTIMATE_USEFUL_BYTES); - + stats->getTickerCount(READ_AMP_ESTIMATE_USEFUL_BYTES); ASSERT_EQ(expected_estimate_useful, got_estimate_useful); } } + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); } TEST_F(BlockTest, BlockWithReadAmpBitmap) {