The method GetOverlappingInputs should use binary search.

Summary:
The method Version::GetOverlappingInputs used a sequential search
to map a kay-range to a set of files. But the files are arranged
in ascending order of key, so a biary search is more effective.

This patch implements Version::GetOverlappingInputsBinarySearch
that finds one file that corresponds to the specified key range
and then iterates backwards and forwards to find all overlapping
files.

This patch is critical for making compactions efficient, especially
when there are thousands of files in a single level.

I measured that 1000 iterations of TEST_MaxNextLevelOverlappingBytes
takes 16000 microseconds without this patch. With this patch, the
same method takes about 4600 microseconds.

Test Plan: Almost all unit tests in db_test uses this method to lookup keys.

Reviewers: heyongqiang

Reviewed By: heyongqiang

CC: MarkCallaghan, emayanke, sheki

Differential Revision: https://reviews.facebook.net/D6465
main
Dhruba Borthakur 12 years ago
parent 5273c81483
commit cb7a00227f
  1. 82
      db/version_set.cc
  2. 13
      db/version_set.h

@ -467,6 +467,10 @@ void Version::GetOverlappingInputs(
user_end = end->user_key();
}
const Comparator* user_cmp = vset_->icmp_.user_comparator();
if (begin != NULL && end != NULL && level > 0) {
GetOverlappingInputsBinarySearch(level, user_begin, user_end, inputs);
return;
}
for (size_t i = 0; i < files_[level].size(); ) {
FileMetaData* f = files_[level][i++];
const Slice file_start = f->smallest.user_key();
@ -494,6 +498,84 @@ void Version::GetOverlappingInputs(
}
}
// Store in "*inputs" all files in "level" that overlap [begin,end]
// Employ binary search to find at least one file that overlaps the
// specified range. From that file, iterate backwards and
// forwards to find all overlapping files.
void Version::GetOverlappingInputsBinarySearch(
int level,
const Slice& user_begin,
const Slice& user_end,
std::vector<FileMetaData*>* inputs) {
assert(level > 0);
int min = 0;
int mid = 0;
int max = files_[level].size() -1;
bool foundOverlap = false;
const Comparator* user_cmp = vset_->icmp_.user_comparator();
while (min <= max) {
mid = (min + max)/2;
FileMetaData* f = files_[level][mid];
const Slice file_start = f->smallest.user_key();
const Slice file_limit = f->largest.user_key();
if (user_cmp->Compare(file_limit, user_begin) < 0) {
min = mid + 1;
} else if (user_cmp->Compare(user_end, file_start) < 0) {
max = mid - 1;
} else {
foundOverlap = true;
break;
}
}
// If there were no overlapping files, return immediately.
if (!foundOverlap) {
return;
}
ExtendOverlappingInputs(level, user_begin, user_end, inputs, mid);
}
// Store in "*inputs" all files in "level" that overlap [begin,end]
// The midIndex specifies the index of at least one file that
// overlaps the specified range. From that file, iterate backward
// and forward to find all overlapping files.
void Version::ExtendOverlappingInputs(
int level,
const Slice& user_begin,
const Slice& user_end,
std::vector<FileMetaData*>* inputs,
int midIndex) {
// assert that the file at midIndex overlaps with the range
const Comparator* user_cmp = vset_->icmp_.user_comparator();
assert(midIndex < files_[level].size());
assert((user_cmp->Compare(files_[level][midIndex]->largest.user_key(),
user_begin) >= 0) ||
(user_cmp->Compare(files_[level][midIndex]->smallest.user_key(),
user_end) <= 0));
// check backwards from 'mid' to lower indices
for (size_t i = midIndex; i < files_[level].size(); i--) {
FileMetaData* f = files_[level][i];
const Slice file_limit = f->largest.user_key();
if (user_cmp->Compare(file_limit, user_begin) >= 0) {
inputs->insert(inputs->begin(), f); // insert into beginning of vector
} else {
break;
}
}
// check forward from 'mid+1' to higher indices
for (size_t i = midIndex+1; i < files_[level].size(); i++) {
FileMetaData* f = files_[level][i];
const Slice file_start = f->smallest.user_key();
if (user_cmp->Compare(file_start, user_end) <= 0) {
inputs->push_back(f); // insert into end of vector
} else {
break;
}
}
}
std::string Version::DebugString() const {
std::string r;
for (int level = 0; level < vset_->NumberLevels(); level++) {

@ -89,6 +89,19 @@ class Version {
const InternalKey* end, // NULL means after all keys
std::vector<FileMetaData*>* inputs);
void GetOverlappingInputsBinarySearch(
int level,
const Slice& begin, // NULL means before all keys
const Slice& end, // NULL means after all keys
std::vector<FileMetaData*>* inputs);
void ExtendOverlappingInputs(
int level,
const Slice& begin, // NULL means before all keys
const Slice& end, // NULL means after all keys
std::vector<FileMetaData*>* inputs,
int index); // start extending from this index
// Returns true iff some file in the specified level overlaps
// some part of [*smallest_user_key,*largest_user_key].
// smallest_user_key==NULL represents a key smaller than all keys in the DB.

Loading…
Cancel
Save