Fix compaction bug in Cuckoo Table Builder. Use kvs_.size() instead of num_entries in FileSize() method.

Summary: Fix compaction bug in Cuckoo Table Builder. Use kvs_.size() instead of num_entries in FileSize() method. Also added tests.

Test Plan:
make check all
Also ran db_bench to generate multiple files.

Reviewers: sdong, ljin

Reviewed By: ljin

Subscribers: leveldb

Differential Revision: https://reviews.facebook.net/D22743
main
Radheshyam Balasundaram 10 years ago
parent 0fbb3facc0
commit 5cd0576ffe
  1. 26
      db/cuckoo_table_db_test.cc
  2. 5
      table/cuckoo_table_builder.cc
  3. 41
      table/cuckoo_table_builder_test.cc

@ -245,14 +245,38 @@ TEST(CuckooTableDBTest, CompactionTrigger) {
ASSERT_OK(Put(Key(idx), std::string(10000, 'a' + idx))); ASSERT_OK(Put(Key(idx), std::string(10000, 'a' + idx)));
} }
dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForFlushMemTable();
dbfull()->TEST_CompactRange(0, nullptr, nullptr); ASSERT_EQ("2", FilesPerLevel());
dbfull()->TEST_CompactRange(0, nullptr, nullptr);
ASSERT_EQ("0,2", FilesPerLevel()); ASSERT_EQ("0,2", FilesPerLevel());
for (int idx = 0; idx < 22; ++idx) { for (int idx = 0; idx < 22; ++idx) {
ASSERT_EQ(std::string(10000, 'a' + idx), Get(Key(idx))); ASSERT_EQ(std::string(10000, 'a' + idx), Get(Key(idx)));
} }
} }
TEST(CuckooTableDBTest, CompactionIntoMultipleFiles) {
// Create a big L0 file and check it compacts into multiple files in L1.
Options options = CurrentOptions();
options.write_buffer_size = 270 << 10;
// Two SST files should be created, each containing 14 keys.
// Number of buckets will be 16. Total size ~156 KB.
options.target_file_size_base = 160 << 10;
Reopen(&options);
// Write 28 values, each 10016 B ~ 10KB
for (int idx = 0; idx < 28; ++idx) {
ASSERT_OK(Put(Key(idx), std::string(10000, 'a' + idx)));
}
dbfull()->TEST_WaitForFlushMemTable();
ASSERT_EQ("1", FilesPerLevel());
dbfull()->TEST_CompactRange(0, nullptr, nullptr);
ASSERT_EQ("0,2", FilesPerLevel());
for (int idx = 0; idx < 28; ++idx) {
ASSERT_EQ(std::string(10000, 'a' + idx), Get(Key(idx)));
}
}
TEST(CuckooTableDBTest, SameKeyInsertedInTwoDifferentFilesAndCompacted) { TEST(CuckooTableDBTest, SameKeyInsertedInTwoDifferentFilesAndCompacted) {
// Insert same key twice so that they go to different SST files. Then wait for // Insert same key twice so that they go to different SST files. Then wait for
// compaction and check if the latest value is stored and old value removed. // compaction and check if the latest value is stored and old value removed.

@ -56,7 +56,6 @@ CuckooTableBuilder::CuckooTableBuilder(
ucomp_(user_comparator), ucomp_(user_comparator),
get_slice_hash_(get_slice_hash), get_slice_hash_(get_slice_hash),
closed_(false) { closed_(false) {
properties_.num_entries = 0;
// Data is in a huge block. // Data is in a huge block.
properties_.num_data_blocks = 1; properties_.num_data_blocks = 1;
properties_.index_size = 0; properties_.index_size = 0;
@ -64,7 +63,7 @@ CuckooTableBuilder::CuckooTableBuilder(
} }
void CuckooTableBuilder::Add(const Slice& key, const Slice& value) { void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
if (properties_.num_entries >= kMaxVectorIdx - 1) { if (kvs_.size() >= kMaxVectorIdx - 1) {
status_ = Status::NotSupported("Number of keys in a file must be < 2^32-1"); status_ = Status::NotSupported("Number of keys in a file must be < 2^32-1");
return; return;
} }
@ -311,7 +310,7 @@ uint64_t CuckooTableBuilder::NumEntries() const {
uint64_t CuckooTableBuilder::FileSize() const { uint64_t CuckooTableBuilder::FileSize() const {
if (closed_) { if (closed_) {
return file_->GetFileSize(); return file_->GetFileSize();
} else if (properties_.num_entries == 0) { } else if (kvs_.size() == 0) {
return 0; return 0;
} }

@ -135,6 +135,7 @@ TEST(CuckooBuilderTest, SuccessWithEmptyFile) {
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
4, 100, BytewiseComparator(), 1, GetSliceHash); 4, 100, BytewiseComparator(), 1, GetSliceHash);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
ASSERT_EQ(0UL, builder.FileSize());
ASSERT_OK(builder.Finish()); ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close()); ASSERT_OK(writable_file->Close());
CheckFileContents({}, {}, {}, "", 0, 2, false); CheckFileContents({}, {}, {}, "", 0, 2, false);
@ -155,6 +156,7 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) {
for (auto& user_key : user_keys) { for (auto& user_key : user_keys) {
keys.push_back(GetInternalKey(user_key, false)); keys.push_back(GetInternalKey(user_key, false));
} }
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
unique_ptr<WritableFile> writable_file; unique_ptr<WritableFile> writable_file;
fname = test::TmpDir() + "/NoCollisionFullKey"; fname = test::TmpDir() + "/NoCollisionFullKey";
@ -167,10 +169,12 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) {
ASSERT_EQ(builder.NumEntries(), i + 1); ASSERT_EQ(builder.NumEntries(), i + 1);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
} }
uint32_t bucket_size = keys[0].size() + values[0].size();
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
ASSERT_OK(builder.Finish()); ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close()); ASSERT_OK(writable_file->Close());
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
std::string expected_unused_bucket = GetInternalKey("key00", true); std::string expected_unused_bucket = GetInternalKey("key00", true);
expected_unused_bucket += std::string(values[0].size(), 'a'); expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(keys, values, expected_locations, CheckFileContents(keys, values, expected_locations,
@ -192,6 +196,7 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) {
for (auto& user_key : user_keys) { for (auto& user_key : user_keys) {
keys.push_back(GetInternalKey(user_key, false)); keys.push_back(GetInternalKey(user_key, false));
} }
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
unique_ptr<WritableFile> writable_file; unique_ptr<WritableFile> writable_file;
fname = test::TmpDir() + "/WithCollisionFullKey"; fname = test::TmpDir() + "/WithCollisionFullKey";
@ -204,10 +209,12 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) {
ASSERT_EQ(builder.NumEntries(), i + 1); ASSERT_EQ(builder.NumEntries(), i + 1);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
} }
uint32_t bucket_size = keys[0].size() + values[0].size();
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
ASSERT_OK(builder.Finish()); ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close()); ASSERT_OK(writable_file->Close());
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
std::string expected_unused_bucket = GetInternalKey("key00", true); std::string expected_unused_bucket = GetInternalKey("key00", true);
expected_unused_bucket += std::string(values[0].size(), 'a'); expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(keys, values, expected_locations, CheckFileContents(keys, values, expected_locations,
@ -229,6 +236,7 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionAndCuckooBlock) {
for (auto& user_key : user_keys) { for (auto& user_key : user_keys) {
keys.push_back(GetInternalKey(user_key, false)); keys.push_back(GetInternalKey(user_key, false));
} }
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
unique_ptr<WritableFile> writable_file; unique_ptr<WritableFile> writable_file;
uint32_t cuckoo_block_size = 2; uint32_t cuckoo_block_size = 2;
@ -242,10 +250,12 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionAndCuckooBlock) {
ASSERT_EQ(builder.NumEntries(), i + 1); ASSERT_EQ(builder.NumEntries(), i + 1);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
} }
uint32_t bucket_size = keys[0].size() + values[0].size();
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
ASSERT_OK(builder.Finish()); ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close()); ASSERT_OK(writable_file->Close());
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
std::string expected_unused_bucket = GetInternalKey("key00", true); std::string expected_unused_bucket = GetInternalKey("key00", true);
expected_unused_bucket += std::string(values[0].size(), 'a'); expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(keys, values, expected_locations, CheckFileContents(keys, values, expected_locations,
@ -272,6 +282,7 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKey) {
for (auto& user_key : user_keys) { for (auto& user_key : user_keys) {
keys.push_back(GetInternalKey(user_key, false)); keys.push_back(GetInternalKey(user_key, false));
} }
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
unique_ptr<WritableFile> writable_file; unique_ptr<WritableFile> writable_file;
fname = test::TmpDir() + "/WithCollisionPathFullKey"; fname = test::TmpDir() + "/WithCollisionPathFullKey";
@ -284,10 +295,12 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKey) {
ASSERT_EQ(builder.NumEntries(), i + 1); ASSERT_EQ(builder.NumEntries(), i + 1);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
} }
uint32_t bucket_size = keys[0].size() + values[0].size();
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
ASSERT_OK(builder.Finish()); ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close()); ASSERT_OK(writable_file->Close());
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
std::string expected_unused_bucket = GetInternalKey("key00", true); std::string expected_unused_bucket = GetInternalKey("key00", true);
expected_unused_bucket += std::string(values[0].size(), 'a'); expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(keys, values, expected_locations, CheckFileContents(keys, values, expected_locations,
@ -311,6 +324,7 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKeyAndCuckooBlock) {
for (auto& user_key : user_keys) { for (auto& user_key : user_keys) {
keys.push_back(GetInternalKey(user_key, false)); keys.push_back(GetInternalKey(user_key, false));
} }
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
unique_ptr<WritableFile> writable_file; unique_ptr<WritableFile> writable_file;
fname = test::TmpDir() + "/WithCollisionPathFullKeyAndCuckooBlock"; fname = test::TmpDir() + "/WithCollisionPathFullKeyAndCuckooBlock";
@ -323,10 +337,12 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKeyAndCuckooBlock) {
ASSERT_EQ(builder.NumEntries(), i + 1); ASSERT_EQ(builder.NumEntries(), i + 1);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
} }
uint32_t bucket_size = keys[0].size() + values[0].size();
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
ASSERT_OK(builder.Finish()); ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close()); ASSERT_OK(writable_file->Close());
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
std::string expected_unused_bucket = GetInternalKey("key00", true); std::string expected_unused_bucket = GetInternalKey("key00", true);
expected_unused_bucket += std::string(values[0].size(), 'a'); expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(keys, values, expected_locations, CheckFileContents(keys, values, expected_locations,
@ -344,6 +360,7 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) {
{user_keys[3], {3, 4, 5, 6}} {user_keys[3], {3, 4, 5, 6}}
}; };
std::vector<uint64_t> expected_locations = {0, 1, 2, 3}; std::vector<uint64_t> expected_locations = {0, 1, 2, 3};
uint32_t expected_table_size = NextPowOf2(user_keys.size() / kHashTableRatio);
unique_ptr<WritableFile> writable_file; unique_ptr<WritableFile> writable_file;
fname = test::TmpDir() + "/NoCollisionUserKey"; fname = test::TmpDir() + "/NoCollisionUserKey";
@ -356,10 +373,12 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) {
ASSERT_EQ(builder.NumEntries(), i + 1); ASSERT_EQ(builder.NumEntries(), i + 1);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
} }
uint32_t bucket_size = user_keys[0].size() + values[0].size();
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
ASSERT_OK(builder.Finish()); ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close()); ASSERT_OK(writable_file->Close());
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
uint32_t expected_table_size = NextPowOf2(user_keys.size() / kHashTableRatio);
std::string expected_unused_bucket = "key00"; std::string expected_unused_bucket = "key00";
expected_unused_bucket += std::string(values[0].size(), 'a'); expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(user_keys, values, expected_locations, CheckFileContents(user_keys, values, expected_locations,
@ -377,6 +396,7 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) {
{user_keys[3], {0, 1, 2, 3}}, {user_keys[3], {0, 1, 2, 3}},
}; };
std::vector<uint64_t> expected_locations = {0, 1, 2, 3}; std::vector<uint64_t> expected_locations = {0, 1, 2, 3};
uint32_t expected_table_size = NextPowOf2(user_keys.size() / kHashTableRatio);
unique_ptr<WritableFile> writable_file; unique_ptr<WritableFile> writable_file;
fname = test::TmpDir() + "/WithCollisionUserKey"; fname = test::TmpDir() + "/WithCollisionUserKey";
@ -389,10 +409,12 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) {
ASSERT_EQ(builder.NumEntries(), i + 1); ASSERT_EQ(builder.NumEntries(), i + 1);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
} }
uint32_t bucket_size = user_keys[0].size() + values[0].size();
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
ASSERT_OK(builder.Finish()); ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close()); ASSERT_OK(writable_file->Close());
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
uint32_t expected_table_size = NextPowOf2(user_keys.size() / kHashTableRatio);
std::string expected_unused_bucket = "key00"; std::string expected_unused_bucket = "key00";
expected_unused_bucket += std::string(values[0].size(), 'a'); expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(user_keys, values, expected_locations, CheckFileContents(user_keys, values, expected_locations,
@ -412,6 +434,7 @@ TEST(CuckooBuilderTest, WithCollisionPathUserKey) {
{user_keys[4], {0, 2}}, {user_keys[4], {0, 2}},
}; };
std::vector<uint64_t> expected_locations = {0, 1, 3, 4, 2}; std::vector<uint64_t> expected_locations = {0, 1, 3, 4, 2};
uint32_t expected_table_size = NextPowOf2(user_keys.size() / kHashTableRatio);
unique_ptr<WritableFile> writable_file; unique_ptr<WritableFile> writable_file;
fname = test::TmpDir() + "/WithCollisionPathUserKey"; fname = test::TmpDir() + "/WithCollisionPathUserKey";
@ -424,10 +447,12 @@ TEST(CuckooBuilderTest, WithCollisionPathUserKey) {
ASSERT_EQ(builder.NumEntries(), i + 1); ASSERT_EQ(builder.NumEntries(), i + 1);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
} }
uint32_t bucket_size = user_keys[0].size() + values[0].size();
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
ASSERT_OK(builder.Finish()); ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close()); ASSERT_OK(writable_file->Close());
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
uint32_t expected_table_size = NextPowOf2(user_keys.size() / kHashTableRatio);
std::string expected_unused_bucket = "key00"; std::string expected_unused_bucket = "key00";
expected_unused_bucket += std::string(values[0].size(), 'a'); expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(user_keys, values, expected_locations, CheckFileContents(user_keys, values, expected_locations,

Loading…
Cancel
Save