From 5cd0576ffeacba2071771282a262d81fdb1e2232 Mon Sep 17 00:00:00 2001 From: Radheshyam Balasundaram Date: Fri, 5 Sep 2014 11:18:01 -0700 Subject: [PATCH] Fix compaction bug in Cuckoo Table Builder. Use kvs_.size() instead of num_entries in FileSize() method. Summary: Fix compaction bug in Cuckoo Table Builder. Use kvs_.size() instead of num_entries in FileSize() method. Also added tests. Test Plan: make check all Also ran db_bench to generate multiple files. Reviewers: sdong, ljin Reviewed By: ljin Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D22743 --- db/cuckoo_table_db_test.cc | 26 ++++++++++++++++++- table/cuckoo_table_builder.cc | 5 ++-- table/cuckoo_table_builder_test.cc | 41 ++++++++++++++++++++++++------ 3 files changed, 60 insertions(+), 12 deletions(-) diff --git a/db/cuckoo_table_db_test.cc b/db/cuckoo_table_db_test.cc index c1e59b1b5..2652d1776 100644 --- a/db/cuckoo_table_db_test.cc +++ b/db/cuckoo_table_db_test.cc @@ -245,14 +245,38 @@ TEST(CuckooTableDBTest, CompactionTrigger) { ASSERT_OK(Put(Key(idx), std::string(10000, 'a' + idx))); } dbfull()->TEST_WaitForFlushMemTable(); - dbfull()->TEST_CompactRange(0, nullptr, nullptr); + ASSERT_EQ("2", FilesPerLevel()); + dbfull()->TEST_CompactRange(0, nullptr, nullptr); ASSERT_EQ("0,2", FilesPerLevel()); for (int idx = 0; idx < 22; ++idx) { ASSERT_EQ(std::string(10000, 'a' + idx), Get(Key(idx))); } } +TEST(CuckooTableDBTest, CompactionIntoMultipleFiles) { + // Create a big L0 file and check it compacts into multiple files in L1. + Options options = CurrentOptions(); + options.write_buffer_size = 270 << 10; + // Two SST files should be created, each containing 14 keys. + // Number of buckets will be 16. Total size ~156 KB. + options.target_file_size_base = 160 << 10; + Reopen(&options); + + // Write 28 values, each 10016 B ~ 10KB + for (int idx = 0; idx < 28; ++idx) { + ASSERT_OK(Put(Key(idx), std::string(10000, 'a' + idx))); + } + dbfull()->TEST_WaitForFlushMemTable(); + ASSERT_EQ("1", FilesPerLevel()); + + dbfull()->TEST_CompactRange(0, nullptr, nullptr); + ASSERT_EQ("0,2", FilesPerLevel()); + for (int idx = 0; idx < 28; ++idx) { + ASSERT_EQ(std::string(10000, 'a' + idx), Get(Key(idx))); + } +} + TEST(CuckooTableDBTest, SameKeyInsertedInTwoDifferentFilesAndCompacted) { // Insert same key twice so that they go to different SST files. Then wait for // compaction and check if the latest value is stored and old value removed. diff --git a/table/cuckoo_table_builder.cc b/table/cuckoo_table_builder.cc index 6326d3787..e107071f2 100644 --- a/table/cuckoo_table_builder.cc +++ b/table/cuckoo_table_builder.cc @@ -56,7 +56,6 @@ CuckooTableBuilder::CuckooTableBuilder( ucomp_(user_comparator), get_slice_hash_(get_slice_hash), closed_(false) { - properties_.num_entries = 0; // Data is in a huge block. properties_.num_data_blocks = 1; properties_.index_size = 0; @@ -64,7 +63,7 @@ CuckooTableBuilder::CuckooTableBuilder( } void CuckooTableBuilder::Add(const Slice& key, const Slice& value) { - if (properties_.num_entries >= kMaxVectorIdx - 1) { + if (kvs_.size() >= kMaxVectorIdx - 1) { status_ = Status::NotSupported("Number of keys in a file must be < 2^32-1"); return; } @@ -311,7 +310,7 @@ uint64_t CuckooTableBuilder::NumEntries() const { uint64_t CuckooTableBuilder::FileSize() const { if (closed_) { return file_->GetFileSize(); - } else if (properties_.num_entries == 0) { + } else if (kvs_.size() == 0) { return 0; } diff --git a/table/cuckoo_table_builder_test.cc b/table/cuckoo_table_builder_test.cc index 69647d410..be13dc9a3 100644 --- a/table/cuckoo_table_builder_test.cc +++ b/table/cuckoo_table_builder_test.cc @@ -135,6 +135,7 @@ TEST(CuckooBuilderTest, SuccessWithEmptyFile) { CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, 4, 100, BytewiseComparator(), 1, GetSliceHash); ASSERT_OK(builder.status()); + ASSERT_EQ(0UL, builder.FileSize()); ASSERT_OK(builder.Finish()); ASSERT_OK(writable_file->Close()); CheckFileContents({}, {}, {}, "", 0, 2, false); @@ -155,6 +156,7 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) { for (auto& user_key : user_keys) { keys.push_back(GetInternalKey(user_key, false)); } + uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio); unique_ptr writable_file; fname = test::TmpDir() + "/NoCollisionFullKey"; @@ -167,10 +169,12 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) { ASSERT_EQ(builder.NumEntries(), i + 1); ASSERT_OK(builder.status()); } + uint32_t bucket_size = keys[0].size() + values[0].size(); + ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); ASSERT_OK(builder.Finish()); ASSERT_OK(writable_file->Close()); + ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); - uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio); std::string expected_unused_bucket = GetInternalKey("key00", true); expected_unused_bucket += std::string(values[0].size(), 'a'); CheckFileContents(keys, values, expected_locations, @@ -192,6 +196,7 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) { for (auto& user_key : user_keys) { keys.push_back(GetInternalKey(user_key, false)); } + uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio); unique_ptr writable_file; fname = test::TmpDir() + "/WithCollisionFullKey"; @@ -204,10 +209,12 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) { ASSERT_EQ(builder.NumEntries(), i + 1); ASSERT_OK(builder.status()); } + uint32_t bucket_size = keys[0].size() + values[0].size(); + ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); ASSERT_OK(builder.Finish()); ASSERT_OK(writable_file->Close()); + ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); - uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio); std::string expected_unused_bucket = GetInternalKey("key00", true); expected_unused_bucket += std::string(values[0].size(), 'a'); CheckFileContents(keys, values, expected_locations, @@ -229,6 +236,7 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionAndCuckooBlock) { for (auto& user_key : user_keys) { keys.push_back(GetInternalKey(user_key, false)); } + uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio); unique_ptr writable_file; uint32_t cuckoo_block_size = 2; @@ -242,10 +250,12 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionAndCuckooBlock) { ASSERT_EQ(builder.NumEntries(), i + 1); ASSERT_OK(builder.status()); } + uint32_t bucket_size = keys[0].size() + values[0].size(); + ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); ASSERT_OK(builder.Finish()); ASSERT_OK(writable_file->Close()); + ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); - uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio); std::string expected_unused_bucket = GetInternalKey("key00", true); expected_unused_bucket += std::string(values[0].size(), 'a'); CheckFileContents(keys, values, expected_locations, @@ -272,6 +282,7 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKey) { for (auto& user_key : user_keys) { keys.push_back(GetInternalKey(user_key, false)); } + uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio); unique_ptr writable_file; fname = test::TmpDir() + "/WithCollisionPathFullKey"; @@ -284,10 +295,12 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKey) { ASSERT_EQ(builder.NumEntries(), i + 1); ASSERT_OK(builder.status()); } + uint32_t bucket_size = keys[0].size() + values[0].size(); + ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); ASSERT_OK(builder.Finish()); ASSERT_OK(writable_file->Close()); + ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); - uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio); std::string expected_unused_bucket = GetInternalKey("key00", true); expected_unused_bucket += std::string(values[0].size(), 'a'); CheckFileContents(keys, values, expected_locations, @@ -311,6 +324,7 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKeyAndCuckooBlock) { for (auto& user_key : user_keys) { keys.push_back(GetInternalKey(user_key, false)); } + uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio); unique_ptr writable_file; fname = test::TmpDir() + "/WithCollisionPathFullKeyAndCuckooBlock"; @@ -323,10 +337,12 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKeyAndCuckooBlock) { ASSERT_EQ(builder.NumEntries(), i + 1); ASSERT_OK(builder.status()); } + uint32_t bucket_size = keys[0].size() + values[0].size(); + ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); ASSERT_OK(builder.Finish()); ASSERT_OK(writable_file->Close()); + ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); - uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio); std::string expected_unused_bucket = GetInternalKey("key00", true); expected_unused_bucket += std::string(values[0].size(), 'a'); CheckFileContents(keys, values, expected_locations, @@ -344,6 +360,7 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) { {user_keys[3], {3, 4, 5, 6}} }; std::vector expected_locations = {0, 1, 2, 3}; + uint32_t expected_table_size = NextPowOf2(user_keys.size() / kHashTableRatio); unique_ptr writable_file; fname = test::TmpDir() + "/NoCollisionUserKey"; @@ -356,10 +373,12 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) { ASSERT_EQ(builder.NumEntries(), i + 1); ASSERT_OK(builder.status()); } + uint32_t bucket_size = user_keys[0].size() + values[0].size(); + ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); ASSERT_OK(builder.Finish()); ASSERT_OK(writable_file->Close()); + ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); - uint32_t expected_table_size = NextPowOf2(user_keys.size() / kHashTableRatio); std::string expected_unused_bucket = "key00"; expected_unused_bucket += std::string(values[0].size(), 'a'); CheckFileContents(user_keys, values, expected_locations, @@ -377,6 +396,7 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) { {user_keys[3], {0, 1, 2, 3}}, }; std::vector expected_locations = {0, 1, 2, 3}; + uint32_t expected_table_size = NextPowOf2(user_keys.size() / kHashTableRatio); unique_ptr writable_file; fname = test::TmpDir() + "/WithCollisionUserKey"; @@ -389,10 +409,12 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) { ASSERT_EQ(builder.NumEntries(), i + 1); ASSERT_OK(builder.status()); } + uint32_t bucket_size = user_keys[0].size() + values[0].size(); + ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); ASSERT_OK(builder.Finish()); ASSERT_OK(writable_file->Close()); + ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); - uint32_t expected_table_size = NextPowOf2(user_keys.size() / kHashTableRatio); std::string expected_unused_bucket = "key00"; expected_unused_bucket += std::string(values[0].size(), 'a'); CheckFileContents(user_keys, values, expected_locations, @@ -412,6 +434,7 @@ TEST(CuckooBuilderTest, WithCollisionPathUserKey) { {user_keys[4], {0, 2}}, }; std::vector expected_locations = {0, 1, 3, 4, 2}; + uint32_t expected_table_size = NextPowOf2(user_keys.size() / kHashTableRatio); unique_ptr writable_file; fname = test::TmpDir() + "/WithCollisionPathUserKey"; @@ -424,10 +447,12 @@ TEST(CuckooBuilderTest, WithCollisionPathUserKey) { ASSERT_EQ(builder.NumEntries(), i + 1); ASSERT_OK(builder.status()); } + uint32_t bucket_size = user_keys[0].size() + values[0].size(); + ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); ASSERT_OK(builder.Finish()); ASSERT_OK(writable_file->Close()); + ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); - uint32_t expected_table_size = NextPowOf2(user_keys.size() / kHashTableRatio); std::string expected_unused_bucket = "key00"; expected_unused_bucket += std::string(values[0].size(), 'a'); CheckFileContents(user_keys, values, expected_locations,