diff --git a/db/db_impl.cc b/db/db_impl.cc index 6752e0d0a..146db6133 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -3587,7 +3587,6 @@ void DBImpl::ReturnAndCleanupSuperVersion(ColumnFamilyData* cfd, void DBImpl::GetApproximateSizes(ColumnFamilyHandle* column_family, const Range* range, int n, uint64_t* sizes) { - // TODO(opt): better implementation Version* v; auto cfh = reinterpret_cast(column_family); auto cfd = cfh->cfd(); @@ -3599,12 +3598,9 @@ void DBImpl::GetApproximateSizes(ColumnFamilyHandle* column_family, for (int i = 0; i < n; i++) { // Convert user_key into a corresponding internal key. - InternalKey k1, k2; - k1.SetMaxPossibleForUserKey(range[i].start); - k2.SetMaxPossibleForUserKey(range[i].limit); - uint64_t start = versions_->ApproximateOffsetOf(v, k1); - uint64_t limit = versions_->ApproximateOffsetOf(v, k2); - sizes[i] = (limit >= start ? limit - start : 0); + InternalKey k1(range[i].start, kMaxSequenceNumber, kValueTypeForSeek); + InternalKey k2(range[i].limit, kMaxSequenceNumber, kValueTypeForSeek); + sizes[i] = versions_->ApproximateSize(v, k1.Encode(), k2.Encode()); } { diff --git a/db/version_set.cc b/db/version_set.cc index 95e4b7718..de15c4cef 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -2802,40 +2802,101 @@ bool VersionSet::ManifestContains(uint64_t manifest_file_num, return result; } +uint64_t VersionSet::ApproximateSize(Version* v, const Slice& start, + const Slice& end) { + // pre-condition + assert(v->cfd_->internal_comparator().Compare(start, end) <= 0); -uint64_t VersionSet::ApproximateOffsetOf(Version* v, const InternalKey& ikey) { - uint64_t result = 0; + uint64_t size = 0; const auto* vstorage = v->storage_info(); - for (int level = 0; level < vstorage->num_levels(); level++) { - const std::vector& files = vstorage->LevelFiles(level); - for (size_t i = 0; i < files.size(); i++) { - if (v->cfd_->internal_comparator().Compare(files[i]->largest, ikey) <= - 0) { - // Entire file is before "ikey", so just add the file size - result += files[i]->fd.GetFileSize(); - } else if (v->cfd_->internal_comparator().Compare(files[i]->smallest, - ikey) > 0) { - // Entire file is after "ikey", so ignore - if (level > 0) { - // Files other than level 0 are sorted by meta->smallest, so - // no further files in this level will contain data for - // "ikey". - break; - } - } else { - // "ikey" falls in the range for this table. Add the - // approximate offset of "ikey" within the table. - TableReader* table_reader_ptr; - Iterator* iter = v->cfd_->table_cache()->NewIterator( - ReadOptions(), env_options_, v->cfd_->internal_comparator(), - files[i]->fd, &table_reader_ptr); - if (table_reader_ptr != nullptr) { - result += table_reader_ptr->ApproximateOffsetOf(ikey.Encode()); - } - delete iter; + + for (int level = 0; level < vstorage->num_non_empty_levels(); level++) { + const LevelFilesBrief& files_brief = vstorage->LevelFilesBrief(level); + if (!files_brief.num_files) { + // empty level, skip exploration + continue; + } + + if (!level) { + // level 0 data is sorted order, handle the use case explicitly + size += ApproximateSizeLevel0(v, files_brief, start, end); + continue; + } + + assert(level > 0); + assert(files_brief.num_files > 0); + + // identify the file position for starting key + const uint64_t idx_start = + FindFileInRange(v->cfd_->internal_comparator(), files_brief, start, + /*start=*/0, files_brief.num_files - 1); + assert(idx_start < files_brief.num_files); + + // scan all files from the starting position until the ending position + // inferred from the sorted order + for (uint64_t i = idx_start; i < files_brief.num_files; i++) { + uint64_t val; + val = ApproximateSize(v, files_brief.files[i], end); + if (!val) { + // the files after this will not have the range + break; + } + + size += val; + + if (i == idx_start) { + // subtract the bytes needed to be scanned to get to the starting + // key + val = ApproximateSize(v, files_brief.files[i], start); + assert(size >= val); + size -= val; } } } + + return size; +} + +uint64_t VersionSet::ApproximateSizeLevel0(Version* v, + const LevelFilesBrief& files_brief, + const Slice& key_start, + const Slice& key_end) { + // level 0 files are not in sorted order, we need to iterate through + // the list to compute the total bytes that require scanning + uint64_t size = 0; + for (size_t i = 0; i < files_brief.num_files; i++) { + const uint64_t start = ApproximateSize(v, files_brief.files[i], key_start); + const uint64_t end = ApproximateSize(v, files_brief.files[i], key_end); + assert(end >= start); + size += end - start; + } + return size; +} + +uint64_t VersionSet::ApproximateSize(Version* v, const FdWithKeyRange& f, + const Slice& key) { + // pre-condition + assert(v); + + uint64_t result = 0; + if (v->cfd_->internal_comparator().Compare(f.largest_key, key) <= 0) { + // Entire file is before "key", so just add the file size + result = f.fd.GetFileSize(); + } else if (v->cfd_->internal_comparator().Compare(f.smallest_key, key) > 0) { + // Entire file is after "key", so ignore + result = 0; + } else { + // "key" falls in the range for this table. Add the + // approximate offset of "key" within the table. + TableReader* table_reader_ptr; + Iterator* iter = v->cfd_->table_cache()->NewIterator( + ReadOptions(), env_options_, v->cfd_->internal_comparator(), f.fd, + &table_reader_ptr); + if (table_reader_ptr != nullptr) { + result = table_reader_ptr->ApproximateOffsetOf(key); + } + delete iter; + } return result; } diff --git a/db/version_set.h b/db/version_set.h index 267a8ba34..5c5f1fc27 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -618,9 +618,8 @@ class VersionSet { // Add all files listed in any live version to *live. void AddLiveFiles(std::vector* live_list); - // Return the approximate offset in the database of the data for - // "key" as of version "v". - uint64_t ApproximateOffsetOf(Version* v, const InternalKey& key); + // Return the approximate size of data to be scanned for range [start, end) + uint64_t ApproximateSize(Version* v, const Slice& start, const Slice& end); // Return the size of the current manifest file uint64_t manifest_file_size() const { return manifest_file_size_; } @@ -657,6 +656,13 @@ class VersionSet { } }; + // ApproximateSize helper + uint64_t ApproximateSizeLevel0(Version* v, const LevelFilesBrief& files_brief, + const Slice& start, const Slice& end); + + uint64_t ApproximateSize(Version* v, const FdWithKeyRange& f, + const Slice& key); + // Save current contents to *log Status WriteSnapshot(log::Writer* log);