diff --git a/db/compaction.cc b/db/compaction.cc index 4ed5374ac..34b79d60e 100644 --- a/db/compaction.cc +++ b/db/compaction.cc @@ -75,6 +75,13 @@ Compaction::~Compaction() { } } +void Compaction::GenerateFileLevels() { + input_levels_.resize(2); + for (int which = 0; which < 2; which++) { + DoGenerateFileLevel(&input_levels_[which], inputs_[which], &arena_); + } +} + bool Compaction::IsTrivialMove() const { // Avoid a move if there is lots of overlapping grandparent data. // Otherwise, the move could create a parent file that will require diff --git a/db/compaction.h b/db/compaction.h index b6677bf09..89d71a41f 100644 --- a/db/compaction.h +++ b/db/compaction.h @@ -8,6 +8,8 @@ // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once +#include "util/arena.h" +#include "util/autovector.h" #include "db/version_set.h" namespace rocksdb { @@ -18,6 +20,10 @@ class ColumnFamilyData; // A Compaction encapsulates information about a compaction. class Compaction { public: + // No copying allowed + Compaction(const Compaction&) = delete; + void operator=(const Compaction&) = delete; + ~Compaction(); // Return the level that is being compacted. Inputs from "level" @@ -44,6 +50,9 @@ class Compaction { std::vector* inputs(int which) { return &inputs_[which]; } + // Return the input_level file + FileLevel* input_levels(int which) { return &input_levels_[which]; } + // Maximum size of files to build during this compaction. uint64_t MaxOutputFileSize() const { return max_output_file_size_; } @@ -53,7 +62,11 @@ class Compaction { // Whether need to write output file to second DB path. uint32_t GetOutputPathId() const { return output_path_id_; } - // Is this a trivial compaction that can be implemented by just + // Generate input_levels_ from inputs_ + // Should be called when inputs_ is stable + void GenerateFileLevels(); + + // Is this a trivial compaction that can be implemented by just // moving a single input file to the next level (no merging or splitting) bool IsTrivialMove() const; @@ -118,6 +131,7 @@ class Compaction { VersionEdit* edit_; int number_levels_; ColumnFamilyData* cfd_; + Arena arena_; // Arena used to allocate space for file_levels_ uint32_t output_path_id_; CompressionType output_compression_; @@ -128,6 +142,9 @@ class Compaction { // Each compaction reads inputs from "level_" and "level_+1" std::vector inputs_[2]; // The two sets of inputs + // A copy of inputs_, organized more closely in memory + autovector input_levels_; + // State used to check for number of of overlapping grandparent files // (parent == level_ + 1, grandparent == level_ + 2) std::vector grandparents_; diff --git a/db/db_impl.cc b/db/db_impl.cc index 2b96cdee5..31da3690b 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -2903,6 +2903,8 @@ Status DBImpl::DoCompactionWork(CompactionState* compact, compact->CleanupMergedBuffer(); bool prefix_initialized = false; + // Generate file_levels_ for compaction berfore making Iterator + compact->compaction->GenerateFileLevels(); int64_t imm_micros = 0; // Micros spent doing imm_ compactions ColumnFamilyData* cfd = compact->compaction->column_family_data(); LogToBuffer( diff --git a/db/version_edit.h b/db/version_edit.h index ee1f6c437..ee54cb664 100644 --- a/db/version_edit.h +++ b/db/version_edit.h @@ -14,6 +14,8 @@ #include #include "rocksdb/cache.h" #include "db/dbformat.h" +#include "util/arena.h" +#include "util/autovector.h" namespace rocksdb { diff --git a/db/version_set.cc b/db/version_set.cc index 1ff69e2dc..4960fa21d 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -82,33 +82,6 @@ Version::~Version() { delete[] files_; } -int FindFileInRange(const InternalKeyComparator& icmp, - const std::vector& files, - const Slice& key, - uint32_t left, - uint32_t right) { - while (left < right) { - uint32_t mid = (left + right) / 2; - const FileMetaData* f = files[mid]; - if (icmp.InternalKeyComparator::Compare(f->largest.Encode(), key) < 0) { - // Key at "mid.largest" is < "target". Therefore all - // files at or before "mid" are uninteresting. - left = mid + 1; - } else { - // Key at "mid.largest" is >= "target". Therefore all files - // after "mid" are uninteresting. - right = mid; - } - } - return right; -} - -int FindFile(const InternalKeyComparator& icmp, - const std::vector& files, - const Slice& key) { - return FindFileInRange(icmp, files, key, 0, files.size()); -} - // Find File in FileLevel data structure // Within an index range defined by left and right int FindFileInRange(const InternalKeyComparator& icmp, @@ -138,6 +111,36 @@ int FindFile(const InternalKeyComparator& icmp, return FindFileInRange(icmp, file_level, key, 0, file_level.num_files); } +void DoGenerateFileLevel(FileLevel* file_level, + const std::vector& files, + Arena* arena) { + assert(file_level); + assert(files.size() >= 0); + assert(arena); + + size_t num = files.size(); + file_level->num_files = num; + char* mem = arena->AllocateAligned(num * sizeof(FdWithKeyRange)); + file_level->files = new (mem)FdWithKeyRange[num]; + + for (size_t i = 0; i < num; i++) { + Slice smallest_key = files[i]->smallest.Encode(); + Slice largest_key = files[i]->largest.Encode(); + + // Copy key slice to sequential memory + size_t smallest_size = smallest_key.size(); + size_t largest_size = largest_key.size(); + mem = arena->AllocateAligned(smallest_size + largest_size); + memcpy(mem, smallest_key.data(), smallest_size); + memcpy(mem + smallest_size, largest_key.data(), largest_size); + + FdWithKeyRange& f = file_level->files[i]; + f.fd = files[i]->fd; + f.smallest_key = Slice(mem, smallest_size); + f.largest_key = Slice(mem + smallest_size, largest_size); + } +} + static bool AfterFile(const Comparator* ucmp, const Slice* user_key, const FdWithKeyRange* f) { // nullptr user_key occurs before all keys and is therefore never after *f @@ -152,7 +155,6 @@ static bool BeforeFile(const Comparator* ucmp, ucmp->Compare(*user_key, ExtractUserKey(f->smallest_key)) < 0); } - bool SomeFileOverlapsRange( const InternalKeyComparator& icmp, bool disjoint_sorted_files, @@ -198,21 +200,21 @@ bool SomeFileOverlapsRange( class Version::LevelFileNumIterator : public Iterator { public: LevelFileNumIterator(const InternalKeyComparator& icmp, - const std::vector* flist) + const FileLevel* flevel) : icmp_(icmp), - flist_(flist), - index_(flist->size()), + flevel_(flevel), + index_(flevel->num_files), current_value_(0, 0, 0) { // Marks as invalid } virtual bool Valid() const { - return index_ < flist_->size(); + return index_ < flevel_->num_files; } virtual void Seek(const Slice& target) { - index_ = FindFile(icmp_, *flist_, target); + index_ = FindFile(icmp_, *flevel_, target); } virtual void SeekToFirst() { index_ = 0; } virtual void SeekToLast() { - index_ = flist_->empty() ? 0 : flist_->size() - 1; + index_ = (flevel_->num_files == 0) ? 0 : flevel_->num_files - 1; } virtual void Next() { assert(Valid()); @@ -221,26 +223,27 @@ class Version::LevelFileNumIterator : public Iterator { virtual void Prev() { assert(Valid()); if (index_ == 0) { - index_ = flist_->size(); // Marks as invalid + index_ = flevel_->num_files; // Marks as invalid } else { index_--; } } Slice key() const { assert(Valid()); - return (*flist_)[index_]->largest.Encode(); + return flevel_->files[index_].largest_key; } Slice value() const { assert(Valid()); - auto* file_meta = (*flist_)[index_]; - current_value_ = file_meta->fd; + + auto file_meta = flevel_->files[index_]; + current_value_ = file_meta.fd; return Slice(reinterpret_cast(¤t_value_), sizeof(FileDescriptor)); } virtual Status status() const { return Status::OK(); } private: const InternalKeyComparator icmp_; - const std::vector* const flist_; + const FileLevel* flevel_; uint32_t index_; mutable FileDescriptor current_value_; }; @@ -357,21 +360,23 @@ void Version::AddIterators(const ReadOptions& read_options, const EnvOptions& soptions, std::vector* iters) { // Merge all level zero files together since they may overlap - for (const FileMetaData* file : files_[0]) { + for (size_t i = 0; i < file_levels_[0].num_files; i++) { + const auto& file = file_levels_[0].files[i]; iters->push_back(cfd_->table_cache()->NewIterator( - read_options, soptions, cfd_->internal_comparator(), file->fd)); + read_options, soptions, cfd_->internal_comparator(), file.fd)); } // For levels > 0, we can use a concatenating iterator that sequentially // walks through the non-overlapping files in the level, opening them // lazily. for (int level = 1; level < num_levels_; level++) { - if (!files_[level].empty()) { + if (file_levels_[level].num_files != 0) { iters->push_back(NewTwoLevelIterator(new LevelFileIteratorState( cfd_->table_cache(), read_options, soptions, cfd_->internal_comparator(), false /* for_compaction */, cfd_->options()->prefix_extractor != nullptr), - new LevelFileNumIterator(cfd_->internal_comparator(), &files_[level]))); + new LevelFileNumIterator(cfd_->internal_comparator(), + &file_levels_[level]))); } } } @@ -380,9 +385,10 @@ void Version::AddIterators(const ReadOptions& read_options, const EnvOptions& soptions, MergeIteratorBuilder* merge_iter_builder) { // Merge all level zero files together since they may overlap - for (const FileMetaData* file : files_[0]) { + for (size_t i = 0; i < file_levels_[0].num_files; i++) { + const auto& file = file_levels_[0].files[i]; merge_iter_builder->AddIterator(cfd_->table_cache()->NewIterator( - read_options, soptions, cfd_->internal_comparator(), file->fd, nullptr, + read_options, soptions, cfd_->internal_comparator(), file.fd, nullptr, false, merge_iter_builder->GetArena())); } @@ -390,14 +396,14 @@ void Version::AddIterators(const ReadOptions& read_options, // walks through the non-overlapping files in the level, opening them // lazily. for (int level = 1; level < num_levels_; level++) { - if (!files_[level].empty()) { + if (file_levels_[level].num_files != 0) { merge_iter_builder->AddIterator(NewTwoLevelIterator( new LevelFileIteratorState( cfd_->table_cache(), read_options, soptions, cfd_->internal_comparator(), false /* for_compaction */, cfd_->options()->prefix_extractor != nullptr), - new LevelFileNumIterator(cfd_->internal_comparator(), &files_[level]), - merge_iter_builder->GetArena())); + new LevelFileNumIterator(cfd_->internal_comparator(), + &file_levels_[level]), merge_iter_builder->GetArena())); } } } @@ -659,6 +665,7 @@ void Version::Get(const ReadOptions& options, for (int32_t i = start_index; i < num_files;) { FdWithKeyRange* f = &files[i]; + assert(f->fd.GetNumber() == files_[level][i]->fd.GetNumber()); int cmp_largest = -1; // Do key range filtering of files or/and fractional cascading if: @@ -746,7 +753,6 @@ void Version::Get(const ReadOptions& options, } } - if (kMerge == saver.state) { // merge_operands are in saver and we hit the beginning of the key history // do a final merge of nullptr and operands; @@ -767,29 +773,7 @@ void Version::Get(const ReadOptions& options, void Version::GenerateFileLevels() { file_levels_.resize(num_non_empty_levels_); for (int level = 0; level < num_non_empty_levels_; level++) { - const auto& files = files_[level]; - auto& file_level = file_levels_[level]; - - size_t num = files.size(); - file_level.num_files = num; - char* mem = arena_.AllocateAligned(num * sizeof(FdWithKeyRange)); - file_level.files = new (mem)FdWithKeyRange[num]; - - for (size_t i = 0; i < files.size(); i++) { - Slice smallest_key = files[i]->smallest.Encode(); - Slice largest_key = files[i]->largest.Encode(); - - // Copy key slice to sequential memory - size_t smallest_size = smallest_key.size(); - size_t largest_size = largest_key.size(); - mem = arena_.AllocateAligned(smallest_size + largest_size); - memcpy(mem, smallest_key.data(), smallest_size); - memcpy(mem + smallest_size, largest_key.data(), largest_size); - - file_level.files[i].fd = files[i]->fd; - file_level.files[i].smallest_key = Slice(mem, smallest_size); - file_level.files[i].largest_key = Slice(mem+smallest_size, largest_size); - } + DoGenerateFileLevel(&file_levels_[level], files_[level], &arena_); } } @@ -1181,7 +1165,7 @@ void Version::GetOverlappingInputsBinarySearch( // The midIndex specifies the index of at least one file that // overlaps the specified range. From that file, iterate backward // and forward to find all overlapping files. -// Use compressed file meda data, make search faster +// Use FileLevel in searching, make it faster void Version::ExtendOverlappingInputs( int level, const Slice& user_begin, @@ -2764,16 +2748,17 @@ Iterator* VersionSet::MakeInputIterator(Compaction* c) { // Level-0 files have to be merged together. For other levels, // we will make a concatenating iterator per level. // TODO(opt): use concatenating iterator for level-0 if there is no overlap - const int space = (c->level() == 0 ? c->inputs(0)->size() + 1 : 2); + const int space = (c->level() == 0 ? c->input_levels(0)->num_files + 1 : 2); Iterator** list = new Iterator*[space]; int num = 0; for (int which = 0; which < 2; which++) { - if (!c->inputs(which)->empty()) { + if (c->input_levels(which)->num_files != 0) { if (c->level() + which == 0) { - for (const auto& file : *c->inputs(which)) { + const FileLevel* flevel = c->input_levels(which); + for (size_t i = 0; i < flevel->num_files; i++) { list[num++] = cfd->table_cache()->NewIterator( read_options, storage_options_compactions_, - cfd->internal_comparator(), file->fd, nullptr, + cfd->internal_comparator(), flevel->files[i].fd, nullptr, true /* for compaction */); } } else { @@ -2783,7 +2768,7 @@ Iterator* VersionSet::MakeInputIterator(Compaction* c) { cfd->internal_comparator(), true /* for_compaction */, false /* prefix enabled */), new Version::LevelFileNumIterator(cfd->internal_comparator(), - c->inputs(which))); + c->input_levels(which))); } } } diff --git a/db/version_set.h b/db/version_set.h index 027e5610d..00d40affd 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -53,14 +53,6 @@ class ColumnFamilySet; class TableCache; class MergeIteratorBuilder; - -// Return the smallest index i such that files[i]->largest >= key. -// Return files.size() if there is no such file. -// REQUIRES: "files" contains a sorted list of non-overlapping files. -extern int FindFile(const InternalKeyComparator& icmp, - const std::vector& files, - const Slice& key); - // Return the smallest index i such that file_level.files[i]->largest >= key. // Return file_level.num_files if there is no such file. // REQUIRES: "file_level.files" contains a sorted list of @@ -82,6 +74,13 @@ extern bool SomeFileOverlapsRange( const Slice* smallest_user_key, const Slice* largest_user_key); +// Generate FileLevel from vector +// Would copy smallest_key and largest_key data to sequential memory +// arena: Arena used to allocate the memory +extern void DoGenerateFileLevel(FileLevel* file_level, + const std::vector& files, + Arena* arena); + class Version { public: // Append to *iters a sequence of iterators that will @@ -294,7 +293,6 @@ class Version { // in increasing order of keys std::vector* files_; - // A list for the same set of files that are stored in files_, // but files in each level are now sorted based on file // size. The file with the largest size is at the front. diff --git a/db/version_set_test.cc b/db/version_set_test.cc index 2593b63b6..365200610 100644 --- a/db/version_set_test.cc +++ b/db/version_set_test.cc @@ -14,14 +14,15 @@ namespace rocksdb { -class FindFileTest { +class GenerateFileLevelTest { public: std::vector files_; - bool disjoint_sorted_files_; + FileLevel file_level_; + Arena arena_; - FindFileTest() : disjoint_sorted_files_(true) { } + GenerateFileLevelTest() { } - ~FindFileTest() { + ~GenerateFileLevelTest() { for (unsigned int i = 0; i < files_.size(); i++) { delete files_[i]; } @@ -37,55 +38,44 @@ class FindFileTest { files_.push_back(f); } - int Find(const char* key) { - InternalKey target(key, 100, kTypeValue); - InternalKeyComparator cmp(BytewiseComparator()); - return FindFile(cmp, files_, target.Encode()); + int Compare() { + int diff = 0; + for (size_t i = 0; i < files_.size(); i++) { + if (file_level_.files[i].fd.GetNumber() != files_[i]->fd.GetNumber()) { + diff++; + } + } + return diff; } }; -TEST(FindFileTest, Empty) { - ASSERT_EQ(0, Find("foo")); +TEST(GenerateFileLevelTest, Empty) { + DoGenerateFileLevel(&file_level_, files_, &arena_); + ASSERT_EQ(0, file_level_.num_files); + ASSERT_EQ(0, Compare()); } -TEST(FindFileTest, Single) { +TEST(GenerateFileLevelTest, Single) { Add("p", "q"); - ASSERT_EQ(0, Find("a")); - ASSERT_EQ(0, Find("p")); - ASSERT_EQ(0, Find("p1")); - ASSERT_EQ(0, Find("q")); - ASSERT_EQ(1, Find("q1")); - ASSERT_EQ(1, Find("z")); + DoGenerateFileLevel(&file_level_, files_, &arena_); + ASSERT_EQ(1, file_level_.num_files); + ASSERT_EQ(0, Compare()); } -TEST(FindFileTest, Multiple) { +TEST(GenerateFileLevelTest, Multiple) { Add("150", "200"); Add("200", "250"); Add("300", "350"); Add("400", "450"); - ASSERT_EQ(0, Find("100")); - ASSERT_EQ(0, Find("150")); - ASSERT_EQ(0, Find("151")); - ASSERT_EQ(0, Find("199")); - ASSERT_EQ(0, Find("200")); - ASSERT_EQ(1, Find("201")); - ASSERT_EQ(1, Find("249")); - ASSERT_EQ(1, Find("250")); - ASSERT_EQ(2, Find("251")); - ASSERT_EQ(2, Find("299")); - ASSERT_EQ(2, Find("300")); - ASSERT_EQ(2, Find("349")); - ASSERT_EQ(2, Find("350")); - ASSERT_EQ(3, Find("351")); - ASSERT_EQ(3, Find("400")); - ASSERT_EQ(3, Find("450")); - ASSERT_EQ(4, Find("451")); + DoGenerateFileLevel(&file_level_, files_, &arena_); + ASSERT_EQ(4, file_level_.num_files); + ASSERT_EQ(0, Compare()); } class FindLevelFileTest { public: - FileLevel level_files_; + FileLevel file_level_; bool disjoint_sorted_files_; Arena arena_; @@ -96,8 +86,8 @@ class FindLevelFileTest { void LevelFileInit(size_t num = 0) { char* mem = arena_.AllocateAligned(num * sizeof(FdWithKeyRange)); - level_files_.files = new (mem)FdWithKeyRange[num]; - level_files_.num_files = 0; + file_level_.files = new (mem)FdWithKeyRange[num]; + file_level_.num_files = 0; } void Add(const char* smallest, const char* largest, @@ -115,27 +105,27 @@ class FindLevelFileTest { memcpy(mem + smallest_slice.size(), largest_slice.data(), largest_slice.size()); - // add compressd_level_ - size_t num = level_files_.num_files; - auto& file = level_files_.files[num]; + // add to file_level_ + size_t num = file_level_.num_files; + auto& file = file_level_.files[num]; file.fd = FileDescriptor(num + 1, 0, 0); file.smallest_key = Slice(mem, smallest_slice.size()); file.largest_key = Slice(mem + smallest_slice.size(), largest_slice.size()); - level_files_.num_files++; + file_level_.num_files++; } int Find(const char* key) { InternalKey target(key, 100, kTypeValue); InternalKeyComparator cmp(BytewiseComparator()); - return FindFile(cmp, level_files_, target.Encode()); + return FindFile(cmp, file_level_, target.Encode()); } bool Overlaps(const char* smallest, const char* largest) { InternalKeyComparator cmp(BytewiseComparator()); Slice s(smallest != nullptr ? smallest : ""); Slice l(largest != nullptr ? largest : ""); - return SomeFileOverlapsRange(cmp, disjoint_sorted_files_, level_files_, + return SomeFileOverlapsRange(cmp, disjoint_sorted_files_, file_level_, (smallest != nullptr ? &s : nullptr), (largest != nullptr ? &l : nullptr)); }