From 9718c790ec286fe2dad70dea491b54c34e5547a7 Mon Sep 17 00:00:00 2001 From: Siying Dong Date: Thu, 12 Dec 2013 18:42:07 -0800 Subject: [PATCH] [Performance Branch] Fix a bug of PlainTable when building indexes Summary: PlainTable now has a bug of the ordering of indexes for the prefixes in the same bucket. I thought std::map guaranteed key order but it didn't, probably because I didn't use it properly. But seems to me that we don't need to make extra sorting as input prefixes are already sorted. Found by problem by running leaf4 against plain table. Replace the map with a vector. It should performs better too. After the fix, leaf4 unit tests are passing. Test Plan: run plain_table_db_test Also going to run db_test with plain table in the uncommitted branch. Reviewers: haobo, kailiu Reviewed By: haobo CC: nkg-, leveldb Differential Revision: https://reviews.facebook.net/D14649 --- table/plain_table_reader.cc | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/table/plain_table_reader.cc b/table/plain_table_reader.cc index cccaf61c9..e808948ab 100644 --- a/table/plain_table_reader.cc +++ b/table/plain_table_reader.cc @@ -37,14 +37,6 @@ public: return MurmurHash(s.data(), s.size(), 397); } }; - -class slice_comparator { -public: - bool operator()(rocksdb::Slice const& s1, - rocksdb::Slice const& s2) const { - return s1.compare(s2) < 0; - } -}; } namespace rocksdb { @@ -146,8 +138,8 @@ Status PlainTableReader::PopulateIndex(uint64_t file_size) { HistogramImpl keys_per_prefix_hist; // Need map to be ordered to make sure sub indexes generated // are in order. - std::map prefix2map; - + std::vector> prefix_index_pairs; + std::string current_prefix_index; while (pos < file_size) { uint32_t key_offset = pos; status_ = Next(pos, &key_slice, &value_slice, pos); @@ -156,6 +148,11 @@ Status PlainTableReader::PopulateIndex(uint64_t file_size) { if (first || prev_key_prefix_slice != key_prefix_slice) { if (!first) { keys_per_prefix_hist.Add(key_index_within_prefix); + prefix_index_pairs.push_back( + std::make_pair( + std::move(prev_key_prefix_slice), + std::move(current_prefix_index))); + current_prefix_index.clear(); } key_index_within_prefix = 0; prev_key_prefix_slice = key_prefix_slice; @@ -163,27 +160,30 @@ Status PlainTableReader::PopulateIndex(uint64_t file_size) { if (key_index_within_prefix++ % 8 == 0) { // Add an index key for every 8 keys - std::string& prefix_index = prefix2map[key_prefix_slice]; - PutFixed32(&prefix_index, key_offset); + PutFixed32(¤t_prefix_index, key_offset); } first = false; } + prefix_index_pairs.push_back( + std::make_pair(std::move(prev_key_prefix_slice), + std::move(current_prefix_index))); + keys_per_prefix_hist.Add(key_index_within_prefix); if (hash_table_ != nullptr) { delete[] hash_table_; } std::vector filter_entries(0); // for creating bloom filter; if (filter_policy_ != nullptr) { - filter_entries.reserve(prefix2map.size()); + filter_entries.reserve(prefix_index_pairs.size()); } double hash_table_size_multipier = (hash_table_ratio_ > 1.0) ? 1.0 : 1.0 / hash_table_ratio_; - hash_table_size_ = prefix2map.size() * hash_table_size_multipier + 1; + hash_table_size_ = prefix_index_pairs.size() * hash_table_size_multipier + 1; hash_table_ = new uint32_t[hash_table_size_]; std::vector hash2map(hash_table_size_); size_t sub_index_size_needed = 0; - for (auto& p: prefix2map) { + for (auto& p: prefix_index_pairs) { auto& sub_index = hash2map[getBucketId(p.first, key_prefix_len_, hash_table_size_)]; if (sub_index.length() > 0 || p.second.length() > kOffsetLen) {