store file_indexer info in sequential memory

Summary:
  use arena to allocate space for next_level_index_ and level_rb_
  Thus increasing data locality and make Version::Get faster.

Benchmark detail
Base version: commit d2a727c182

command used:
./db_bench --db=/mnt/db/rocksdb --num_levels=6 --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 --block_size=4096 --cache_size=17179869184 --cache_numshardbits=6 --compression_type=none --compression_ratio=1 --min_level_to_compress=-1 --disable_seek_compaction=1 --hard_rate_limit=2 --write_buffer_size=134217728 --max_write_buffer_number=2 --level0_file_num_compaction_trigger=8 --target_file_size_base=2097152 --max_bytes_for_level_base=1073741824 --disable_wal=0 --sync=0 --disable_data_sync=1 --verify_checksum=1 --delete_obsolete_files_period_micros=314572800 --max_grandparent_overlap_factor=10 --max_background_compactions=4 --max_background_flushes=0 --level0_slowdown_writes_trigger=16 --level0_stop_writes_trigger=24 --statistics=0 --stats_per_interval=0 --stats_interval=1048576 --histogram=0 --use_plain_table=1 --open_files=-1 --mmap_read=1 --mmap_write=0 --memtablerep=prefix_hash --bloom_bits=10 --bloom_locality=1 --perf_level=0 --benchmarks=fillseq, readrandom,readrandom,readrandom --use_existing_db=0 --num=52428800 --threads=1

Result:
cpu running percentage:
Version::Get, improved from 7.98% to 7.42%
FileIndexer::GetNextLevelIndex, improved from 1.18% to 0.68%.

Test Plan:
  make all check

Reviewers: ljin, haobo, yhchiang, sdong

Reviewed By: sdong

Subscribers: dhruba, igor

Differential Revision: https://reviews.facebook.net/D19845
main
Feng Zhu 10 years ago
parent 1614284eff
commit c11d604ab3
  1. 75
      db/file_indexer.cc
  2. 30
      db/file_indexer.h
  3. 80
      db/file_indexer_test.cc
  4. 7
      db/version_set.cc
  5. 2
      db/version_set.h

@ -14,21 +14,18 @@
namespace rocksdb {
FileIndexer::FileIndexer(const uint32_t num_levels,
const Comparator* ucmp)
: num_levels_(num_levels),
FileIndexer::FileIndexer(const Comparator* ucmp)
: num_levels_(0),
ucmp_(ucmp),
next_level_index_(num_levels),
level_rb_(num_levels, -1) {
level_rb_(nullptr) {
}
uint32_t FileIndexer::NumLevelIndex() {
return next_level_index_.size();
}
uint32_t FileIndexer::LevelIndexSize(uint32_t level) {
return next_level_index_[level].size();
return next_level_index_[level].num_index;
}
void FileIndexer::GetNextLevelIndex(
@ -46,11 +43,12 @@ void FileIndexer::GetNextLevelIndex(
assert(level < num_levels_ - 1);
assert(static_cast<int32_t>(file_index) <= level_rb_[level]);
const auto& index = next_level_index_[level][file_index];
const IndexUnit* index_units = next_level_index_[level].index_units;
const auto& index = index_units[file_index];
if (cmp_smallest < 0) {
*left_bound = (level > 0 && file_index > 0) ?
next_level_index_[level][file_index - 1].largest_lb : 0;
index_units[file_index - 1].largest_lb : 0;
*right_bound = index.smallest_rb;
} else if (cmp_smallest == 0) {
*left_bound = index.smallest_lb;
@ -73,16 +71,26 @@ void FileIndexer::GetNextLevelIndex(
assert(*right_bound <= level_rb_[level + 1]);
}
void FileIndexer::ClearIndex() {
for (uint32_t level = 1; level < num_levels_; ++level) {
next_level_index_[level].clear();
}
}
void FileIndexer::UpdateIndex(std::vector<FileMetaData*>* const files) {
void FileIndexer::UpdateIndex(Arena* arena,
const uint32_t num_levels,
std::vector<FileMetaData*>* const files) {
if (files == nullptr) {
return;
}
if (num_levels == 0) { // uint_32 0-1 would cause bad behavior
num_levels_ = num_levels;
return;
}
assert(level_rb_ == nullptr); // level_rb_ should be init here
num_levels_ = num_levels;
next_level_index_.resize(num_levels);
char* mem = arena->AllocateAligned(num_levels_ * sizeof(int32_t));
level_rb_ = new (mem)int32_t[num_levels_];
for (size_t i = 0; i < num_levels_; i++) {
level_rb_[i] = -1;
}
// L1 - Ln-1
for (uint32_t level = 1; level < num_levels_ - 1; ++level) {
@ -93,31 +101,33 @@ void FileIndexer::UpdateIndex(std::vector<FileMetaData*>* const files) {
if (upper_size == 0) {
continue;
}
auto& index = next_level_index_[level];
index.resize(upper_size);
IndexLevel& index_level = next_level_index_[level];
index_level.num_index = upper_size;
char* mem = arena->AllocateAligned(upper_size * sizeof(IndexUnit));
index_level.index_units = new (mem)IndexUnit[upper_size];
CalculateLB(upper_files, lower_files, &index,
CalculateLB(upper_files, lower_files, &index_level,
[this](const FileMetaData* a, const FileMetaData* b) -> int {
return ucmp_->Compare(a->smallest.user_key(), b->largest.user_key());
},
[](IndexUnit* index, int32_t f_idx) {
index->smallest_lb = f_idx;
});
CalculateLB(upper_files, lower_files, &index,
CalculateLB(upper_files, lower_files, &index_level,
[this](const FileMetaData* a, const FileMetaData* b) -> int {
return ucmp_->Compare(a->largest.user_key(), b->largest.user_key());
},
[](IndexUnit* index, int32_t f_idx) {
index->largest_lb = f_idx;
});
CalculateRB(upper_files, lower_files, &index,
CalculateRB(upper_files, lower_files, &index_level,
[this](const FileMetaData* a, const FileMetaData* b) -> int {
return ucmp_->Compare(a->smallest.user_key(), b->smallest.user_key());
},
[](IndexUnit* index, int32_t f_idx) {
index->smallest_rb = f_idx;
});
CalculateRB(upper_files, lower_files, &index,
CalculateRB(upper_files, lower_files, &index_level,
[this](const FileMetaData* a, const FileMetaData* b) -> int {
return ucmp_->Compare(a->largest.user_key(), b->smallest.user_key());
},
@ -125,23 +135,26 @@ void FileIndexer::UpdateIndex(std::vector<FileMetaData*>* const files) {
index->largest_rb = f_idx;
});
}
level_rb_[num_levels_ - 1] = files[num_levels_ - 1].size() - 1;
}
void FileIndexer::CalculateLB(const std::vector<FileMetaData*>& upper_files,
const std::vector<FileMetaData*>& lower_files,
std::vector<IndexUnit>* index,
IndexLevel *index_level,
std::function<int(const FileMetaData*, const FileMetaData*)> cmp_op,
std::function<void(IndexUnit*, int32_t)> set_index) {
const int32_t upper_size = upper_files.size();
const int32_t lower_size = lower_files.size();
int32_t upper_idx = 0;
int32_t lower_idx = 0;
IndexUnit* index = index_level->index_units;
while (upper_idx < upper_size && lower_idx < lower_size) {
int cmp = cmp_op(upper_files[upper_idx], lower_files[lower_idx]);
if (cmp == 0) {
set_index(&(*index)[upper_idx], lower_idx);
set_index(&index[upper_idx], lower_idx);
++upper_idx;
++lower_idx;
} else if (cmp > 0) {
@ -151,7 +164,7 @@ void FileIndexer::CalculateLB(const std::vector<FileMetaData*>& upper_files,
} else {
// Lower level's file becomes larger, update the index, and
// move to the next upper file
set_index(&(*index)[upper_idx], lower_idx);
set_index(&index[upper_idx], lower_idx);
++upper_idx;
}
}
@ -159,25 +172,27 @@ void FileIndexer::CalculateLB(const std::vector<FileMetaData*>& upper_files,
while (upper_idx < upper_size) {
// Lower files are exhausted, that means the remaining upper files are
// greater than any lower files. Set the index to be the lower level size.
set_index(&(*index)[upper_idx], lower_size);
set_index(&index[upper_idx], lower_size);
++upper_idx;
}
}
void FileIndexer::CalculateRB(const std::vector<FileMetaData*>& upper_files,
const std::vector<FileMetaData*>& lower_files,
std::vector<IndexUnit>* index,
IndexLevel *index_level,
std::function<int(const FileMetaData*, const FileMetaData*)> cmp_op,
std::function<void(IndexUnit*, int32_t)> set_index) {
const int32_t upper_size = upper_files.size();
const int32_t lower_size = lower_files.size();
int32_t upper_idx = upper_size - 1;
int32_t lower_idx = lower_size - 1;
IndexUnit* index = index_level->index_units;
while (upper_idx >= 0 && lower_idx >= 0) {
int cmp = cmp_op(upper_files[upper_idx], lower_files[lower_idx]);
if (cmp == 0) {
set_index(&(*index)[upper_idx], lower_idx);
set_index(&index[upper_idx], lower_idx);
--upper_idx;
--lower_idx;
} else if (cmp < 0) {
@ -187,14 +202,14 @@ void FileIndexer::CalculateRB(const std::vector<FileMetaData*>& upper_files,
} else {
// Lower level's file becomes smaller, update the index, and move to
// the next the upper file
set_index(&(*index)[upper_idx], lower_idx);
set_index(&index[upper_idx], lower_idx);
--upper_idx;
}
}
while (upper_idx >= 0) {
// Lower files are exhausted, that means the remaining upper files are
// smaller than any lower files. Set it to -1.
set_index(&(*index)[upper_idx], -1);
set_index(&index[upper_idx], -1);
--upper_idx;
}
}

@ -12,11 +12,15 @@
#include <functional>
#include <limits>
#include <vector>
#include "util/arena.h"
#include "util/autovector.h"
namespace rocksdb {
class Comparator;
struct FileMetaData;
struct FdWithKeyRange;
struct FileLevel;
// The file tree structure in Version is prebuilt and the range of each file
// is known. On Version::Get(), it uses binary search to find a potential file
@ -36,7 +40,7 @@ struct FileMetaData;
// naive approach.
class FileIndexer {
public:
FileIndexer(const uint32_t num_levels, const Comparator* ucmp);
explicit FileIndexer(const Comparator* ucmp);
uint32_t NumLevelIndex();
@ -50,16 +54,16 @@ class FileIndexer {
const uint32_t level, const uint32_t file_index, const int cmp_smallest,
const int cmp_largest, int32_t* left_bound, int32_t* right_bound);
void ClearIndex();
void UpdateIndex(std::vector<FileMetaData*>* const files);
void UpdateIndex(Arena* arena,
const uint32_t num_levels,
std::vector<FileMetaData*>* const files);
enum {
kLevelMaxIndex = std::numeric_limits<int32_t>::max()
};
private:
const uint32_t num_levels_;
uint32_t num_levels_;
const Comparator* ucmp_;
struct IndexUnit {
@ -110,20 +114,28 @@ class FileIndexer {
int32_t largest_rb;
};
// Data structure to store IndexUnits in a whole level
struct IndexLevel {
size_t num_index;
IndexUnit* index_units;
IndexLevel(): num_index(0), index_units(nullptr) {}
};
void CalculateLB(const std::vector<FileMetaData*>& upper_files,
const std::vector<FileMetaData*>& lower_files,
std::vector<IndexUnit>* index,
IndexLevel* index_level,
std::function<int(const FileMetaData*, const FileMetaData*)> cmp_op,
std::function<void(IndexUnit*, int32_t)> set_index);
void CalculateRB(const std::vector<FileMetaData*>& upper_files,
const std::vector<FileMetaData*>& lower_files,
std::vector<IndexUnit>* index,
IndexLevel* index_level,
std::function<int(const FileMetaData*, const FileMetaData*)> cmp_op,
std::function<void(IndexUnit*, int32_t)> set_index);
std::vector<std::vector<IndexUnit>> next_level_index_;
std::vector<int32_t> level_rb_;
autovector<IndexLevel> next_level_index_;
int32_t* level_rb_;
};
} // namespace rocksdb

@ -35,16 +35,15 @@ class IntComparator : public Comparator {
void FindShortSuccessor(std::string* key) const {}
};
struct FileIndexerTest {
public:
FileIndexerTest() :
kNumLevels(4), indexer(kNumLevels, &ucmp),
kNumLevels(4),
files(new std::vector<FileMetaData*>[kNumLevels]) {
}
~FileIndexerTest() {
Reset();
ClearFiles();
delete[] files;
}
@ -59,14 +58,13 @@ struct FileIndexerTest {
return InternalKey(Slice(reinterpret_cast<char*>(&v), 8), 0, kTypeValue);
}
void Reset() {
void ClearFiles() {
for (uint32_t i = 0; i < kNumLevels; ++i) {
for (auto* f : files[i]) {
delete f;
}
files[i].clear();
}
indexer.ClearIndex();
}
void GetNextLevelIndex(const uint32_t level, const uint32_t file_index,
@ -74,23 +72,32 @@ struct FileIndexerTest {
int32_t* right_index) {
*left_index = 100;
*right_index = 100;
indexer.GetNextLevelIndex(level, file_index, cmp_smallest, cmp_largest,
indexer->GetNextLevelIndex(level, file_index, cmp_smallest, cmp_largest,
left_index, right_index);
}
int32_t left = 100;
int32_t right = 100;
const uint32_t kNumLevels;
IntComparator ucmp;
FileIndexer indexer;
FileIndexer* indexer;
std::vector<FileMetaData*>* files;
};
TEST(FileIndexerTest, next_level_hint) {
for (uint32_t i = 0; i < kNumLevels; ++i) {
ASSERT_EQ(0U, indexer.LevelIndexSize(i));
}
// Case 0: Empty
TEST(FileIndexerTest, Empty) {
Arena arena;
indexer = new FileIndexer(&ucmp);
indexer->UpdateIndex(&arena, 0, files);
delete indexer;
}
// Case 1: no overlap, files are on the left of next level files
// Case 1: no overlap, files are on the left of next level files
TEST(FileIndexerTest, no_overlap_left) {
Arena arena;
uint32_t kNumLevels = 4;
indexer = new FileIndexer(&ucmp);
// level 1
AddFile(1, 100, 200);
AddFile(1, 300, 400);
@ -103,9 +110,7 @@ TEST(FileIndexerTest, next_level_hint) {
AddFile(3, 2500, 2600);
AddFile(3, 2601, 2699);
AddFile(3, 2700, 2800);
indexer.UpdateIndex(files);
int32_t left = 100;
int32_t right = 100;
indexer->UpdateIndex(&arena, kNumLevels, files);
for (uint32_t level = 1; level < 3; ++level) {
for (uint32_t f = 0; f < 3; ++f) {
GetNextLevelIndex(level, f, -1, -1, &left, &right);
@ -125,12 +130,15 @@ TEST(FileIndexerTest, next_level_hint) {
ASSERT_EQ(2, right);
}
}
delete indexer;
ClearFiles();
}
// Case 2: no overlap, files are on the right of next level files
Reset();
for (uint32_t i = 1; i < kNumLevels; ++i) {
ASSERT_EQ(0U, indexer.LevelIndexSize(i));
}
// Case 2: no overlap, files are on the right of next level files
TEST(FileIndexerTest, no_overlap_right) {
Arena arena;
uint32_t kNumLevels = 4;
indexer = new FileIndexer(&ucmp);
// level 1
AddFile(1, 2100, 2200);
AddFile(1, 2300, 2400);
@ -143,7 +151,7 @@ TEST(FileIndexerTest, next_level_hint) {
AddFile(3, 500, 600);
AddFile(3, 501, 699);
AddFile(3, 700, 800);
indexer.UpdateIndex(files);
indexer->UpdateIndex(&arena, kNumLevels, files);
for (uint32_t level = 1; level < 3; ++level) {
for (uint32_t f = 0; f < 3; ++f) {
GetNextLevelIndex(level, f, -1, -1, &left, &right);
@ -166,11 +174,16 @@ TEST(FileIndexerTest, next_level_hint) {
ASSERT_EQ(2, right);
}
}
delete indexer;
}
// Case 3: empty L2
Reset();
// Case 3: empty L2
TEST(FileIndexerTest, empty_L2) {
Arena arena;
uint32_t kNumLevels = 4;
indexer = new FileIndexer(&ucmp);
for (uint32_t i = 1; i < kNumLevels; ++i) {
ASSERT_EQ(0U, indexer.LevelIndexSize(i));
ASSERT_EQ(0U, indexer->LevelIndexSize(i));
}
// level 1
AddFile(1, 2100, 2200);
@ -180,7 +193,7 @@ TEST(FileIndexerTest, next_level_hint) {
AddFile(3, 500, 600);
AddFile(3, 501, 699);
AddFile(3, 700, 800);
indexer.UpdateIndex(files);
indexer->UpdateIndex(&arena, kNumLevels, files);
for (uint32_t f = 0; f < 3; ++f) {
GetNextLevelIndex(1, f, -1, -1, &left, &right);
ASSERT_EQ(0, left);
@ -201,13 +214,14 @@ TEST(FileIndexerTest, next_level_hint) {
ASSERT_EQ(0, left);
ASSERT_EQ(-1, right);
}
delete indexer;
ClearFiles();
}
// Case 4: mixed
Reset();
for (uint32_t i = 1; i < kNumLevels; ++i) {
ASSERT_EQ(0U, indexer.LevelIndexSize(i));
}
// Case 4: mixed
TEST(FileIndexerTest, mixed) {
Arena arena;
indexer = new FileIndexer(&ucmp);
// level 1
AddFile(1, 100, 200);
AddFile(1, 250, 400);
@ -222,7 +236,7 @@ TEST(FileIndexerTest, next_level_hint) {
AddFile(3, 0, 50);
AddFile(3, 100, 200);
AddFile(3, 201, 250);
indexer.UpdateIndex(files);
indexer->UpdateIndex(&arena, kNumLevels, files);
// level 1, 0
GetNextLevelIndex(1, 0, -1, -1, &left, &right);
ASSERT_EQ(0, left);
@ -321,6 +335,8 @@ TEST(FileIndexerTest, next_level_hint) {
ASSERT_EQ(3, left);
ASSERT_EQ(2, right);
}
delete indexer;
ClearFiles();
}
} // namespace rocksdb

@ -546,6 +546,8 @@ Version::Version(ColumnFamilyData* cfd, VersionSet* vset,
// cfd is nullptr if Version is dummy
num_levels_(cfd == nullptr ? 0 : cfd->NumberLevels()),
num_non_empty_levels_(num_levels_),
file_indexer_(cfd == nullptr ? nullptr
: cfd->internal_comparator().user_comparator()),
vset_(vset),
next_(this),
prev_(this),
@ -556,8 +558,6 @@ Version::Version(ColumnFamilyData* cfd, VersionSet* vset,
compaction_score_(num_levels_),
compaction_level_(num_levels_),
version_number_(version_number),
file_indexer_(num_levels_, cfd == nullptr ? nullptr
: cfd->internal_comparator().user_comparator()),
total_file_size_(0),
total_raw_key_size_(0),
total_raw_value_size_(0),
@ -782,6 +782,7 @@ void Version::PrepareApply(std::vector<uint64_t>& size_being_compacted) {
ComputeCompactionScore(size_being_compacted);
UpdateFilesBySize();
UpdateNumNonEmptyLevels();
file_indexer_.UpdateIndex(&arena_, num_non_empty_levels_, files_);
GenerateFileLevels();
}
@ -1599,8 +1600,6 @@ class VersionSet::Builder {
}
CheckConsistency(v);
v->file_indexer_.UpdateIndex(v->files_);
}
void LoadTableHandlers() {

@ -283,6 +283,7 @@ class Version {
int num_levels_; // Number of levels
int num_non_empty_levels_; // Number of levels. Any level larger than it
// is guaranteed to be empty.
FileIndexer file_indexer_;
VersionSet* vset_; // VersionSet to which this Version belongs
Arena arena_; // Used to allocate space for file_levels_
Version* next_; // Next version in linked list
@ -325,7 +326,6 @@ class Version {
uint64_t version_number_;
Version(ColumnFamilyData* cfd, VersionSet* vset, uint64_t version_number = 0);
FileIndexer file_indexer_;
// total file size
uint64_t total_file_size_;

Loading…
Cancel
Save