Include estimated bytes deleted by range tombstones in compensated file size (#10734)

Summary:
compensate file sizes in compaction picking so files with range tombstones are preferred, such that they get compacted down earlier as they tend to delete a lot of data. This PR adds a `compensated_range_deletion_size` field in FileMeta that is computed during Flush/Compaction and persisted in MANIFEST. This value is added to `compensated_file_size` which will be used for compaction picking. Currently, for a file in level L, `compensated_range_deletion_size` is set to the estimated bytes deleted by range tombstone of this file in all levels > L. This helps to reduce space amp when data in older levels are covered by range tombstones in level L.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10734

Test Plan:
- Added unit tests.
- benchmark to check if the above definition `compensated_range_deletion_size` is reducing space amp as intended, without affecting write amp too much. The experiment set up favorable for this optimization: large range tombstone issued infrequently. Command used:
```
./db_bench -benchmarks=fillrandom,waitforcompaction,stats,levelstats -use_existing_db=false -avoid_flush_during_recovery=true -write_buffer_size=33554432 -level_compaction_dynamic_level_bytes=true -max_background_jobs=8 -max_bytes_for_level_base=134217728 -target_file_size_base=33554432 -writes_per_range_tombstone=500000 -range_tombstone_width=5000000 -num=50000000 -benchmark_write_rate_limit=8388608 -threads=16 -duration=1800 --max_num_range_tombstones=1000000000
```

In this experiment, each thread wrote 16 range tombstones over the duration of 30 minutes, each range tombstone has width 5M that is the 10% of the key space width. Results shows this PR generates a smaller DB size.

Compaction stats from this PR:
```
Level    Files   Size     Score Read(GB)  Rn(GB) Rnp1(GB) Write(GB) Wnew(GB) Moved(GB) W-Amp Rd(MB/s) Wr(MB/s) Comp(sec) CompMergeCPU(sec) Comp(cnt) Avg(sec) KeyIn KeyDrop Rblob(GB) Wblob(GB)
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  L0      2/0   31.54 MB   0.5      0.0     0.0      0.0       8.4      8.4       0.0   1.0      0.0     63.4    135.56            110.94       544    0.249       0      0       0.0       0.0
  L4      3/0   96.55 MB   0.8     18.5     6.7     11.8      18.4      6.6       0.0   2.7     65.3     64.9    290.08            284.03       108    2.686    284M  1957K       0.0       0.0
  L5     15/0   404.41 MB   1.0     19.1     7.7     11.4      18.8      7.4       0.3   2.5     66.6     65.7    292.93            285.34       220    1.332    293M  3808K       0.0       0.0
  L6    143/0    4.12 GB   0.0     45.0     7.5     37.5      41.6      4.1       0.0   5.5     71.2     65.9    647.00            632.66       251    2.578    739M    47M       0.0       0.0
 Sum    163/0    4.64 GB   0.0     82.6    21.9     60.7      87.2     26.5       0.3  10.4     61.9     65.4   1365.58           1312.97      1123    1.216   1318M    52M       0.0       0.0
```

Compaction stats from main:
```
Level    Files   Size     Score Read(GB)  Rn(GB) Rnp1(GB) Write(GB) Wnew(GB) Moved(GB) W-Amp Rd(MB/s) Wr(MB/s) Comp(sec) CompMergeCPU(sec) Comp(cnt) Avg(sec) KeyIn KeyDrop Rblob(GB) Wblob(GB)
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  L0      0/0    0.00 KB   0.0      0.0     0.0      0.0       8.4      8.4       0.0   1.0      0.0     60.5    142.12            115.89       569    0.250       0      0       0.0       0.0
  L4      3/0   85.68 MB   1.0     17.7     6.8     10.9      17.6      6.7       0.0   2.6     62.7     62.3    289.05            281.79       112    2.581    272M  2309K       0.0       0.0
  L5     11/0   293.73 MB   1.0     18.8     7.5     11.2      18.5      7.2       0.5   2.5     64.9     63.9    296.07            288.50       220    1.346    288M  4365K       0.0       0.0
  L6    130/0    3.94 GB   0.0     51.5     7.6     43.9      47.9      3.9       0.0   6.3     67.2     62.4    784.95            765.92       258    3.042    848M    51M       0.0       0.0
 Sum    144/0    4.31 GB   0.0     88.0    21.9     66.0      92.3     26.3       0.5  11.0     59.6     62.5   1512.19           1452.09      1159    1.305   1409M    58M       0.0       0.0```

Reviewed By: ajkr

Differential Revision: D39834713

Pulled By: cbi42

fbshipit-source-id: fe9341040b8704a8fbb10cad5cf5c43e962c7e6b
main
Changyu Bi 2 years ago committed by Facebook GitHub Bot
parent 02f2b20864
commit cc6f323705
  1. 17
      db/builder.cc
  2. 3
      db/builder.h
  3. 3
      db/compaction/compaction_job_test.cc
  4. 24
      db/compaction/compaction_outputs.cc
  5. 3
      db/compaction/compaction_picker_test.cc
  6. 6
      db/db_impl/db_impl_compaction_flush.cc
  7. 3
      db/db_impl/db_impl_experimental.cc
  8. 20
      db/db_impl/db_impl_open.cc
  9. 117
      db/db_range_del_test.cc
  10. 3
      db/experimental.cc
  11. 2
      db/external_sst_file_ingestion_job.cc
  12. 5
      db/flush_job.cc
  13. 2
      db/import_column_family_job.cc
  14. 3
      db/repair.cc
  15. 80
      db/version_builder_test.cc
  16. 12
      db/version_edit.cc
  17. 24
      db/version_edit.h
  18. 18
      db/version_edit_test.cc
  19. 18
      db/version_set.cc
  20. 32
      db/version_set_test.cc

@ -71,8 +71,9 @@ Status BuildTable(
int job_id, const Env::IOPriority io_priority,
TableProperties* table_properties, Env::WriteLifeTimeHint write_hint,
const std::string* full_history_ts_low,
BlobFileCompletionCallback* blob_callback, uint64_t* num_input_entries,
uint64_t* memtable_payload_bytes, uint64_t* memtable_garbage_bytes) {
BlobFileCompletionCallback* blob_callback, Version* version,
uint64_t* num_input_entries, uint64_t* memtable_payload_bytes,
uint64_t* memtable_garbage_bytes) {
assert((tboptions.column_family_id ==
TablePropertiesCollectorFactory::Context::kUnknownColumnFamily) ==
tboptions.column_family_name.empty());
@ -246,9 +247,17 @@ Status BuildTable(
auto tombstone = range_del_it->Tombstone();
auto kv = tombstone.Serialize();
builder->Add(kv.first.Encode(), kv.second);
meta->UpdateBoundariesForRange(kv.first, tombstone.SerializeEndKey(),
tombstone.seq_,
InternalKey tombstone_end = tombstone.SerializeEndKey();
meta->UpdateBoundariesForRange(kv.first, tombstone_end, tombstone.seq_,
tboptions.internal_comparator);
if (version) {
SizeApproximationOptions approx_opts;
approx_opts.files_size_error_margin = 0.1;
meta->compensated_range_deletion_size += versions->ApproximateSize(
approx_opts, version, kv.first.Encode(), tombstone_end.Encode(),
0 /* start_level */, -1 /* end_level */,
TableReaderCaller::kFlush);
}
}
}

@ -13,6 +13,7 @@
#include "db/range_tombstone_fragmenter.h"
#include "db/seqno_to_time_mapping.h"
#include "db/table_properties_collector.h"
#include "db/version_set.h"
#include "logging/event_logger.h"
#include "options/cf_options.h"
#include "rocksdb/comparator.h"
@ -70,7 +71,7 @@ extern Status BuildTable(
Env::WriteLifeTimeHint write_hint = Env::WLTH_NOT_SET,
const std::string* full_history_ts_low = nullptr,
BlobFileCompletionCallback* blob_callback = nullptr,
uint64_t* num_input_entries = nullptr,
Version* version = nullptr, uint64_t* num_input_entries = nullptr,
uint64_t* memtable_payload_bytes = nullptr,
uint64_t* memtable_garbage_bytes = nullptr);

@ -386,7 +386,8 @@ class CompactionJobTestBase : public testing::Test {
oldest_blob_file_number, kUnknownOldestAncesterTime,
kUnknownFileCreationTime,
versions_->GetColumnFamilySet()->GetDefault()->NewEpochNumber(),
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2,
0);
mutex_.Lock();
EXPECT_OK(

@ -525,7 +525,8 @@ Status CompactionOutputs::AddRangeDels(
ucmp->CompareWithoutTimestamp(*lower_bound, kv.second) < 0);
// Range tombstone is not supported by output validator yet.
builder_->Add(kv.first.Encode(), kv.second);
InternalKey smallest_candidate = std::move(kv.first);
InternalKey tombstone_start = std::move(kv.first);
InternalKey smallest_candidate{tombstone_start};
if (lower_bound != nullptr &&
ucmp->CompareWithoutTimestamp(smallest_candidate.user_key(),
*lower_bound) <= 0) {
@ -594,7 +595,8 @@ Status CompactionOutputs::AddRangeDels(
smallest_candidate = InternalKey(*lower_bound, 0, kTypeRangeDeletion);
}
}
InternalKey largest_candidate = tombstone.SerializeEndKey();
InternalKey tombstone_end = tombstone.SerializeEndKey();
InternalKey largest_candidate{tombstone_end};
if (upper_bound != nullptr &&
ucmp->CompareWithoutTimestamp(*upper_bound,
largest_candidate.user_key()) <= 0) {
@ -636,6 +638,24 @@ Status CompactionOutputs::AddRangeDels(
#endif
meta.UpdateBoundariesForRange(smallest_candidate, largest_candidate,
tombstone.seq_, icmp);
if (!bottommost_level) {
// Range tombstones are truncated at file boundaries
if (icmp.Compare(tombstone_start, meta.smallest) < 0) {
tombstone_start = meta.smallest;
}
if (icmp.Compare(tombstone_end, meta.largest) > 0) {
tombstone_end = meta.largest;
}
SizeApproximationOptions approx_opts;
approx_opts.files_size_error_margin = 0.1;
auto approximate_covered_size =
compaction_->input_version()->version_set()->ApproximateSize(
approx_opts, compaction_->input_version(),
tombstone_start.Encode(), tombstone_end.Encode(),
compaction_->output_level() + 1 /* start_level */,
-1 /* end_level */, kCompaction);
meta.compensated_range_deletion_size += approximate_covered_size;
}
// The smallest key in a file is used for range tombstone truncation, so
// it cannot have a seqnum of 0 (unless the smallest data key in a file
// has a seqnum of 0). Otherwise, the truncated tombstone may expose

@ -148,7 +148,7 @@ class CompactionPickerTestBase : public testing::Test {
smallest_seq, largest_seq, marked_for_compact, temperature,
kInvalidBlobFileNumber, kUnknownOldestAncesterTime,
kUnknownFileCreationTime, epoch_number, kUnknownFileChecksum,
kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
f->compensated_file_size =
(compensated_file_size != 0) ? compensated_file_size : file_size;
f->oldest_ancester_time = oldest_ancestor_time;
@ -2873,7 +2873,6 @@ TEST_F(CompactionPickerTest, IntraL0MaxCompactionBytesHit) {
ASSERT_EQ(0, compaction->output_level());
}
#ifndef ROCKSDB_LITE
TEST_F(CompactionPickerTest, UniversalMarkedCompactionFullOverlap) {
const uint64_t kFileSize = 100000;

@ -1747,7 +1747,8 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
f->smallest, f->largest, f->fd.smallest_seqno, f->fd.largest_seqno,
f->marked_for_compaction, f->temperature, f->oldest_blob_file_number,
f->oldest_ancester_time, f->file_creation_time, f->epoch_number,
f->file_checksum, f->file_checksum_func_name, f->unique_id);
f->file_checksum, f->file_checksum_func_name, f->unique_id,
f->compensated_range_deletion_size);
}
ROCKS_LOG_DEBUG(immutable_db_options_.info_log,
"[%s] Apply version edit:\n%s", cfd->GetName().c_str(),
@ -3388,7 +3389,8 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
f->fd.largest_seqno, f->marked_for_compaction, f->temperature,
f->oldest_blob_file_number, f->oldest_ancester_time,
f->file_creation_time, f->epoch_number, f->file_checksum,
f->file_checksum_func_name, f->unique_id);
f->file_checksum_func_name, f->unique_id,
f->compensated_range_deletion_size);
ROCKS_LOG_BUFFER(
log_buffer,

@ -137,7 +137,8 @@ Status DBImpl::PromoteL0(ColumnFamilyHandle* column_family, int target_level) {
f->marked_for_compaction, f->temperature,
f->oldest_blob_file_number, f->oldest_ancester_time,
f->file_creation_time, f->epoch_number, f->file_checksum,
f->file_checksum_func_name, f->unique_id);
f->file_checksum_func_name, f->unique_id,
f->compensated_range_deletion_size);
}
status = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(),

@ -1550,6 +1550,8 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
0 /* file_creation_time */, db_id_, db_session_id_,
0 /* target_file_size */, meta.fd.GetNumber());
SeqnoToTimeMapping empty_seqno_time_mapping;
Version* version = cfd->current();
version->Ref();
s = BuildTable(
dbname_, versions_.get(), immutable_db_options_, tboptions,
file_options_for_compaction_, cfd->table_cache(), iter.get(),
@ -1559,7 +1561,8 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
io_tracer_, BlobFileCreationReason::kRecovery,
empty_seqno_time_mapping, &event_logger_, job_id, Env::IO_HIGH,
nullptr /* table_properties */, write_hint,
nullptr /*full_history_ts_low*/, &blob_callback_);
nullptr /*full_history_ts_low*/, &blob_callback_, version);
version->Unref();
LogFlush(immutable_db_options_.info_log);
ROCKS_LOG_DEBUG(immutable_db_options_.info_log,
"[%s] [WriteLevel0TableForRecovery]"
@ -1583,13 +1586,14 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
constexpr int level = 0;
if (s.ok() && has_output) {
edit->AddFile(
level, meta.fd.GetNumber(), meta.fd.GetPathId(), meta.fd.GetFileSize(),
meta.smallest, meta.largest, meta.fd.smallest_seqno,
meta.fd.largest_seqno, meta.marked_for_compaction, meta.temperature,
meta.oldest_blob_file_number, meta.oldest_ancester_time,
meta.file_creation_time, meta.epoch_number, meta.file_checksum,
meta.file_checksum_func_name, meta.unique_id);
edit->AddFile(level, meta.fd.GetNumber(), meta.fd.GetPathId(),
meta.fd.GetFileSize(), meta.smallest, meta.largest,
meta.fd.smallest_seqno, meta.fd.largest_seqno,
meta.marked_for_compaction, meta.temperature,
meta.oldest_blob_file_number, meta.oldest_ancester_time,
meta.file_creation_time, meta.epoch_number,
meta.file_checksum, meta.file_checksum_func_name,
meta.unique_id, meta.compensated_range_deletion_size);
for (const auto& blob : blob_file_additions) {
edit->AddBlobFile(blob);

@ -479,7 +479,10 @@ TEST_F(DBRangeDelTest, ValidUniversalSubcompactionBoundaries) {
std::vector<std::string> values;
// Write 100KB (100 values, each 1K)
for (int k = 0; k < kNumPerFile; k++) {
values.push_back(rnd.RandomString(990));
// For the highest level, use smaller value size such that it does not
// prematurely cause auto compaction due to range tombstone adding
// additional compensated file size
values.push_back(rnd.RandomString((i == kNumLevels - 2) ? 600 : 990));
ASSERT_OK(Put(Key(j * kNumPerFile + k), values[k]));
}
// put extra key to trigger flush
@ -492,7 +495,13 @@ TEST_F(DBRangeDelTest, ValidUniversalSubcompactionBoundaries) {
}
ASSERT_OK(dbfull()->TEST_WaitForCompact());
ASSERT_EQ(NumTableFilesAtLevel(0), 0);
ASSERT_GT(NumTableFilesAtLevel(kNumLevels - 1 - i), kFilesPerLevel - 1);
if (i == kNumLevels - 2) {
// For the highest level, value size is smaller (see Put() above),
// so output file number is smaller.
ASSERT_GT(NumTableFilesAtLevel(kNumLevels - 1 - i), kFilesPerLevel - 2);
} else {
ASSERT_GT(NumTableFilesAtLevel(kNumLevels - 1 - i), kFilesPerLevel - 1);
}
}
// Now L1-L3 are full, when we compact L1->L2 we should see (1) subcompactions
// happen since input level > 0; (2) range deletions are not dropped since
@ -3004,6 +3013,110 @@ TEST_F(DBRangeDelTest, RangeTombstoneRespectIterateUpperBound) {
ASSERT_OK(iter->status());
}
TEST_F(DBRangeDelTest, RangetombesoneCompensateFilesize) {
Options opts = CurrentOptions();
opts.disable_auto_compactions = true;
DestroyAndReopen(opts);
std::vector<std::string> values;
Random rnd(301);
// file in L2
values.push_back(rnd.RandomString(1 << 10));
ASSERT_OK(Put("a", values.back()));
values.push_back(rnd.RandomString(1 << 10));
ASSERT_OK(Put("b", values.back()));
ASSERT_OK(Flush());
MoveFilesToLevel(2);
uint64_t l2_size = 0;
ASSERT_OK(Size("a", "c", 0 /* cf */, &l2_size));
ASSERT_GT(l2_size, 0);
// file in L1
values.push_back(rnd.RandomString(1 << 10));
ASSERT_OK(Put("d", values.back()));
values.push_back(rnd.RandomString(1 << 10));
ASSERT_OK(Put("e", values.back()));
ASSERT_OK(Flush());
MoveFilesToLevel(1);
uint64_t l1_size = 0;
ASSERT_OK(Size("d", "f", 0 /* cf */, &l1_size));
ASSERT_GT(l1_size, 0);
ASSERT_OK(
db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "f"));
ASSERT_OK(Flush());
// Range deletion compensated size computed during flush time
std::vector<std::vector<FileMetaData>> level_to_files;
dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(),
&level_to_files);
ASSERT_EQ(level_to_files[0].size(), 1);
ASSERT_EQ(level_to_files[0][0].compensated_range_deletion_size,
l1_size + l2_size);
ASSERT_EQ(level_to_files[1].size(), 1);
ASSERT_EQ(level_to_files[1][0].compensated_range_deletion_size, 0);
ASSERT_EQ(level_to_files[2].size(), 1);
ASSERT_EQ(level_to_files[2][0].compensated_range_deletion_size, 0);
// Range deletion compensated size computed during compaction time
ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
true /* disallow_trivial_move */));
ASSERT_EQ(NumTableFilesAtLevel(0), 0);
ASSERT_EQ(NumTableFilesAtLevel(1), 1);
ASSERT_EQ(NumTableFilesAtLevel(2), 1);
dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(),
&level_to_files);
ASSERT_EQ(level_to_files[1].size(), 1);
ASSERT_EQ(level_to_files[1][0].compensated_range_deletion_size, l2_size);
ASSERT_EQ(level_to_files[2].size(), 1);
ASSERT_EQ(level_to_files[2][0].compensated_range_deletion_size, 0);
}
TEST_F(DBRangeDelTest, RangetombesoneCompensateFilesizePersistDuringReopen) {
Options opts = CurrentOptions();
opts.disable_auto_compactions = true;
DestroyAndReopen(opts);
std::vector<std::string> values;
Random rnd(301);
values.push_back(rnd.RandomString(1 << 10));
ASSERT_OK(Put("a", values.back()));
values.push_back(rnd.RandomString(1 << 10));
ASSERT_OK(Put("b", values.back()));
ASSERT_OK(Flush());
MoveFilesToLevel(2);
ASSERT_OK(
db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "c"));
ASSERT_OK(Flush());
MoveFilesToLevel(1);
ASSERT_OK(
db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
ASSERT_OK(Flush());
std::vector<std::vector<FileMetaData>> level_to_files;
dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(),
&level_to_files);
ASSERT_EQ(level_to_files[0].size(), 1);
ASSERT_EQ(level_to_files[1].size(), 1);
ASSERT_EQ(level_to_files[2].size(), 1);
uint64_t l2_size = level_to_files[2][0].fd.GetFileSize();
uint64_t l1_size = level_to_files[1][0].fd.GetFileSize();
ASSERT_GT(l2_size, 0);
ASSERT_GT(l1_size, 0);
ASSERT_EQ(level_to_files[0][0].compensated_range_deletion_size,
l1_size + l2_size);
ASSERT_EQ(level_to_files[1][0].compensated_range_deletion_size, l2_size);
Reopen(opts);
dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(),
&level_to_files);
ASSERT_EQ(level_to_files[0].size(), 1);
ASSERT_EQ(level_to_files[0][0].compensated_range_deletion_size,
l1_size + l2_size);
ASSERT_EQ(level_to_files[1].size(), 1);
ASSERT_EQ(level_to_files[1][0].compensated_range_deletion_size, l2_size);
}
#endif // ROCKSDB_LITE
} // namespace ROCKSDB_NAMESPACE

@ -113,7 +113,8 @@ Status UpdateManifestForFilesState(
lf->fd.largest_seqno, lf->marked_for_compaction, temp,
lf->oldest_blob_file_number, lf->oldest_ancester_time,
lf->file_creation_time, lf->epoch_number, lf->file_checksum,
lf->file_checksum_func_name, lf->unique_id);
lf->file_checksum_func_name, lf->unique_id,
lf->compensated_range_deletion_size);
}
}
} else {

@ -473,7 +473,7 @@ Status ExternalSstFileIngestionJob::Run() {
ingestion_options_.ingest_behind
? kReservedEpochNumberForFileIngestedBehind
: cfd_->NewEpochNumber(),
f.file_checksum, f.file_checksum_func_name, f.unique_id);
f.file_checksum, f.file_checksum_func_name, f.unique_id, 0);
f_metadata.temperature = f.file_temperature;
edit_.AddFile(f.picked_level, f_metadata);
}

@ -941,7 +941,7 @@ Status FlushJob::WriteLevel0Table() {
cfd_->internal_stats(), &io_s, io_tracer_,
BlobFileCreationReason::kFlush, seqno_to_time_mapping_, event_logger_,
job_context_->job_id, io_priority, &table_properties_, write_hint,
full_history_ts_low, blob_callback_, &num_input_entries,
full_history_ts_low, blob_callback_, base_, &num_input_entries,
&memtable_payload_bytes, &memtable_garbage_bytes);
// TODO: Cleanup io_status in BuildTable and table builders
assert(!s.ok() || io_s.ok());
@ -1003,8 +1003,7 @@ Status FlushJob::WriteLevel0Table() {
meta_.oldest_blob_file_number, meta_.oldest_ancester_time,
meta_.file_creation_time, meta_.epoch_number,
meta_.file_checksum, meta_.file_checksum_func_name,
meta_.unique_id);
meta_.unique_id, meta_.compensated_range_deletion_size);
edit_->SetBlobFileAdditions(std::move(blob_file_additions));
}
#ifndef ROCKSDB_LITE

@ -143,7 +143,7 @@ Status ImportColumnFamilyJob::Run() {
file_metadata.smallest_seqno, file_metadata.largest_seqno, false,
file_metadata.temperature, kInvalidBlobFileNumber, oldest_ancester_time,
current_time, file_metadata.epoch_number, kUnknownFileChecksum,
kUnknownFileChecksumFuncName, f.unique_id);
kUnknownFileChecksumFuncName, f.unique_id, 0);
s = dummy_version_builder.Apply(&dummy_version_edit);
}
if (s.ok()) {

@ -665,7 +665,8 @@ class Repairer {
table->meta.temperature, table->meta.oldest_blob_file_number,
table->meta.oldest_ancester_time, table->meta.file_creation_time,
table->meta.epoch_number, table->meta.file_checksum,
table->meta.file_checksum_func_name, table->meta.unique_id);
table->meta.file_checksum_func_name, table->meta.unique_id,
table->meta.compensated_range_deletion_size);
}
s = dummy_version_builder.Apply(&dummy_edit);
if (s.ok()) {

@ -73,7 +73,7 @@ class VersionBuilderTest : public testing::Test {
/* marked_for_compact */ false, Temperature::kUnknown,
oldest_blob_file_number, kUnknownOldestAncesterTime,
kUnknownFileCreationTime, epoch_number, kUnknownFileChecksum,
kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
f->compensated_file_size = file_size;
f->num_entries = num_entries;
f->num_deletions = num_deletions;
@ -130,12 +130,13 @@ class VersionBuilderTest : public testing::Test {
constexpr SequenceNumber largest_seqno = 300;
constexpr bool marked_for_compaction = false;
edit->AddFile(
level, table_file_number, path_id, file_size, GetInternalKey(smallest),
GetInternalKey(largest), smallest_seqno, largest_seqno,
marked_for_compaction, Temperature::kUnknown, blob_file_number,
kUnknownOldestAncesterTime, kUnknownFileCreationTime, epoch_number,
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
edit->AddFile(level, table_file_number, path_id, file_size,
GetInternalKey(smallest), GetInternalKey(largest),
smallest_seqno, largest_seqno, marked_for_compaction,
Temperature::kUnknown, blob_file_number,
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
epoch_number, kUnknownFileChecksum,
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
}
void UpdateVersionStorageInfo(VersionStorageInfo* vstorage) {
@ -186,7 +187,7 @@ TEST_F(VersionBuilderTest, ApplyAndSaveTo) {
2, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200,
false, Temperature::kUnknown, kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
version_edit.DeleteFile(3, 27U);
EnvOptions env_options;
@ -233,7 +234,8 @@ TEST_F(VersionBuilderTest, ApplyAndSaveToDynamic) {
3, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200,
false, Temperature::kUnknown, kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
version_edit.DeleteFile(0, 1U);
version_edit.DeleteFile(0, 88U);
@ -283,7 +285,7 @@ TEST_F(VersionBuilderTest, ApplyAndSaveToDynamic2) {
4, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200,
false, Temperature::kUnknown, kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
version_edit.DeleteFile(0, 1U);
version_edit.DeleteFile(0, 88U);
version_edit.DeleteFile(4, 6U);
@ -319,27 +321,27 @@ TEST_F(VersionBuilderTest, ApplyMultipleAndSaveTo) {
2, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200,
false, Temperature::kUnknown, kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
version_edit.AddFile(
2, 676, 0, 100U, GetInternalKey("401"), GetInternalKey("450"), 200, 200,
false, Temperature::kUnknown, kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
version_edit.AddFile(
2, 636, 0, 100U, GetInternalKey("601"), GetInternalKey("650"), 200, 200,
false, Temperature::kUnknown, kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
version_edit.AddFile(
2, 616, 0, 100U, GetInternalKey("501"), GetInternalKey("550"), 200, 200,
false, Temperature::kUnknown, kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
version_edit.AddFile(
2, 606, 0, 100U, GetInternalKey("701"), GetInternalKey("750"), 200, 200,
false, Temperature::kUnknown, kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
EnvOptions env_options;
constexpr TableCache* table_cache = nullptr;
@ -378,27 +380,27 @@ TEST_F(VersionBuilderTest, ApplyDeleteAndSaveTo) {
2, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200,
false, Temperature::kUnknown, kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
version_edit.AddFile(
2, 676, 0, 100U, GetInternalKey("401"), GetInternalKey("450"), 200, 200,
false, Temperature::kUnknown, kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
version_edit.AddFile(
2, 636, 0, 100U, GetInternalKey("601"), GetInternalKey("650"), 200, 200,
false, Temperature::kUnknown, kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
version_edit.AddFile(
2, 616, 0, 100U, GetInternalKey("501"), GetInternalKey("550"), 200, 200,
false, Temperature::kUnknown, kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
version_edit.AddFile(
2, 606, 0, 100U, GetInternalKey("701"), GetInternalKey("750"), 200, 200,
false, Temperature::kUnknown, kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
ASSERT_OK(version_builder.Apply(&version_edit));
VersionEdit version_edit2;
@ -406,14 +408,14 @@ TEST_F(VersionBuilderTest, ApplyDeleteAndSaveTo) {
2, 808, 0, 100U, GetInternalKey("901"), GetInternalKey("950"), 200, 200,
false, Temperature::kUnknown, kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
version_edit2.DeleteFile(2, 616);
version_edit2.DeleteFile(2, 636);
version_edit.AddFile(
2, 806, 0, 100U, GetInternalKey("801"), GetInternalKey("850"), 200, 200,
false, Temperature::kUnknown, kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
ASSERT_OK(version_builder.Apply(&version_edit2));
ASSERT_OK(version_builder.SaveTo(&new_vstorage));
@ -524,7 +526,7 @@ TEST_F(VersionBuilderTest, ApplyFileDeletionAndAddition) {
GetInternalKey(largest, largest_seq), smallest_seqno, largest_seqno,
marked_for_compaction, Temperature::kUnknown, kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
ASSERT_OK(builder.Apply(&addition));
@ -573,7 +575,7 @@ TEST_F(VersionBuilderTest, ApplyFileAdditionAlreadyInBase) {
GetInternalKey(largest), smallest_seqno, largest_seqno,
marked_for_compaction, Temperature::kUnknown, kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
const Status s = builder.Apply(&edit);
ASSERT_TRUE(s.IsCorruption());
@ -609,7 +611,7 @@ TEST_F(VersionBuilderTest, ApplyFileAdditionAlreadyApplied) {
GetInternalKey(largest), smallest_seqno, largest_seqno,
marked_for_compaction, Temperature::kUnknown, kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
ASSERT_OK(builder.Apply(&edit));
@ -622,7 +624,7 @@ TEST_F(VersionBuilderTest, ApplyFileAdditionAlreadyApplied) {
GetInternalKey(largest), smallest_seqno, largest_seqno,
marked_for_compaction, Temperature::kUnknown, kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
const Status s = builder.Apply(&other_edit);
ASSERT_TRUE(s.IsCorruption());
@ -658,7 +660,7 @@ TEST_F(VersionBuilderTest, ApplyFileAdditionAndDeletion) {
GetInternalKey(largest), smallest_seqno, largest_seqno,
marked_for_compaction, Temperature::kUnknown, kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
ASSERT_OK(builder.Apply(&addition));
@ -1231,7 +1233,7 @@ TEST_F(VersionBuilderTest, SaveBlobFilesToConcurrentJobs) {
GetInternalKey(largest), smallest_seqno, largest_seqno,
marked_for_compaction, Temperature::kUnknown, blob_file_number,
kUnknownOldestAncesterTime, kUnknownFileCreationTime, 2 /*epoch_number*/,
checksum_value, checksum_method, kNullUniqueId64x2);
checksum_value, checksum_method, kNullUniqueId64x2, 0);
edit.AddBlobFile(blob_file_number, total_blob_count, total_blob_bytes,
checksum_method, checksum_value);
@ -1319,7 +1321,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForBlobFiles) {
/* oldest_blob_file_number */ 16, kUnknownOldestAncesterTime,
kUnknownFileCreationTime, kUnknownEpochNumber,
kUnknownFileChecksum, kUnknownFileChecksumFuncName,
kNullUniqueId64x2);
kNullUniqueId64x2, 0);
edit.AddFile(/* level */ 1, /* file_number */ 700, /* path_id */ 0,
/* file_size */ 100, /* smallest */ GetInternalKey("801"),
@ -1329,7 +1331,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForBlobFiles) {
/* oldest_blob_file_number */ 1000, kUnknownOldestAncesterTime,
kUnknownFileCreationTime, kUnknownEpochNumber,
kUnknownFileChecksum, kUnknownFileChecksumFuncName,
kNullUniqueId64x2);
kNullUniqueId64x2, 0);
edit.AddBlobFile(/* blob_file_number */ 1000, /* total_blob_count */ 2000,
/* total_blob_bytes */ 200000,
/* checksum_method */ std::string(),
@ -1550,7 +1552,7 @@ TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) {
Temperature::kUnknown,
/* oldest_blob_file_number */ 1, kUnknownOldestAncesterTime,
kUnknownFileCreationTime, kUnknownEpochNumber, kUnknownFileChecksum,
kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
// Add an SST that does not reference any blob files.
edit.AddFile(
@ -1560,7 +1562,7 @@ TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) {
/* largest_seqno */ 2200, /* marked_for_compaction */ false,
Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime,
kUnknownFileCreationTime, kUnknownEpochNumber, kUnknownFileChecksum,
kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
// Delete a file that references a blob file.
edit.DeleteFile(/* level */ 1, /* file_number */ 6);
@ -1583,7 +1585,7 @@ TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) {
/* oldest_blob_file_number */ 3, kUnknownOldestAncesterTime,
kUnknownFileCreationTime, kUnknownEpochNumber,
kUnknownFileChecksum, kUnknownFileChecksumFuncName,
kNullUniqueId64x2);
kNullUniqueId64x2, 0);
// Trivially move a file that does not reference any blob files.
edit.DeleteFile(/* level */ 1, /* file_number */ 13);
@ -1595,7 +1597,7 @@ TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) {
Temperature::kUnknown, kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
kUnknownEpochNumber, kUnknownFileChecksum,
kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
// Add one more SST file that references a blob file, then promptly
// delete it in a second version edit before the new version gets saved.
@ -1609,7 +1611,7 @@ TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) {
/* oldest_blob_file_number */ 5, kUnknownOldestAncesterTime,
kUnknownFileCreationTime, kUnknownEpochNumber,
kUnknownFileChecksum, kUnknownFileChecksumFuncName,
kNullUniqueId64x2);
kNullUniqueId64x2, 0);
VersionEdit edit2;
@ -1710,7 +1712,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForL0FilesSortedByEpochNumber) {
/* oldest_blob_file_number */ kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
1 /* epoch_number */, kUnknownFileChecksum, kUnknownFileChecksumFuncName,
kNullUniqueId64x2);
kNullUniqueId64x2, 0);
version_edit_1.AddFile(
/* level */ 0, /* file_number */ 2U, /* path_id */ 0,
/* file_size */ 100, /* smallest */ GetInternalKey("b", 2),
@ -1720,7 +1722,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForL0FilesSortedByEpochNumber) {
/* oldest_blob_file_number */ kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
1 /* epoch_number */, kUnknownFileChecksum, kUnknownFileChecksumFuncName,
kNullUniqueId64x2);
kNullUniqueId64x2, 0);
VersionBuilder version_builder_1(EnvOptions(), &ioptions_,
nullptr /* table_cache */, &vstorage_,
@ -1747,7 +1749,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForL0FilesSortedByEpochNumber) {
/* oldest_blob_file_number */ kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
1 /* epoch_number */, kUnknownFileChecksum, kUnknownFileChecksumFuncName,
kNullUniqueId64x2);
kNullUniqueId64x2, 0);
version_edit_2.AddFile(
/* level */ 0, /* file_number */ 2U, /* path_id */ 0,
/* file_size */ 100, /* smallest */ GetInternalKey("b", 2),
@ -1757,7 +1759,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForL0FilesSortedByEpochNumber) {
/* oldest_blob_file_number */ kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
2 /* epoch_number */, kUnknownFileChecksum, kUnknownFileChecksumFuncName,
kNullUniqueId64x2);
kNullUniqueId64x2, 0);
VersionBuilder version_builder_2(EnvOptions(), &ioptions_,
nullptr /* table_cache */, &vstorage_,

@ -231,6 +231,13 @@ bool VersionEdit::EncodeTo(std::string* dst) const {
std::string unique_id_str = EncodeUniqueIdBytes(&unique_id);
PutLengthPrefixedSlice(dst, Slice(unique_id_str));
}
if (f.compensated_range_deletion_size) {
PutVarint32(dst, kCompensatedRangeDeletionSize);
std::string compensated_range_deletion_size;
PutVarint64(&compensated_range_deletion_size,
f.compensated_range_deletion_size);
PutLengthPrefixedSlice(dst, Slice(compensated_range_deletion_size));
}
TEST_SYNC_POINT_CALLBACK("VersionEdit::EncodeTo:NewFile4:CustomizeFields",
dst);
@ -404,6 +411,11 @@ const char* VersionEdit::DecodeNewFile4From(Slice* input) {
return "invalid unique id";
}
break;
case kCompensatedRangeDeletionSize:
if (!GetVarint64(&field, &f.compensated_range_deletion_size)) {
return "Invalid compensated range deletion size";
}
break;
default:
if ((custom_tag & kCustomTagNonSafeIgnoreMask) != 0) {
// Should not proceed if cannot understand it

@ -89,6 +89,7 @@ enum NewFileCustomTag : uint32_t {
kMaxTimestamp = 11,
kUniqueId = 12,
kEpochNumber = 13,
kCompensatedRangeDeletionSize = 14,
// If this bit for the custom tag is set, opening DB should fail if
// we don't know this field.
@ -182,15 +183,22 @@ struct FileMetaData {
// Stats for compensating deletion entries during compaction
// File size compensated by deletion entry.
// This is updated in Version::UpdateAccumulatedStats() first time when the
// file is created or loaded. After it is updated (!= 0), it is immutable.
// This is used to compute a file's compaction priority, and is updated in
// Version::ComputeCompensatedSizes() first time when the file is created or
// loaded. After it is updated (!= 0), it is immutable.
uint64_t compensated_file_size = 0;
// These values can mutate, but they can only be read or written from
// single-threaded LogAndApply thread
uint64_t num_entries = 0; // the number of entries.
uint64_t num_deletions = 0; // the number of deletion entries.
// The number of deletion entries, including range deletions.
uint64_t num_deletions = 0;
uint64_t raw_key_size = 0; // total uncompressed key size.
uint64_t raw_value_size = 0; // total uncompressed value size.
uint64_t num_range_deletions = 0;
// This is computed during Flush/Compaction, and is added to
// `compensated_file_size`. Currently, this estimates the size of keys in the
// next level covered by range tombstones in this file.
uint64_t compensated_range_deletion_size = 0;
int refs = 0; // Reference count
@ -240,10 +248,12 @@ struct FileMetaData {
uint64_t _oldest_ancester_time, uint64_t _file_creation_time,
uint64_t _epoch_number, const std::string& _file_checksum,
const std::string& _file_checksum_func_name,
UniqueId64x2 _unique_id)
UniqueId64x2 _unique_id,
const uint64_t _compensated_range_deletion_size)
: fd(file, file_path_id, file_size, smallest_seq, largest_seq),
smallest(smallest_key),
largest(largest_key),
compensated_range_deletion_size(_compensated_range_deletion_size),
marked_for_compaction(marked_for_compact),
temperature(_temperature),
oldest_blob_file_number(oldest_blob_file),
@ -434,7 +444,8 @@ class VersionEdit {
uint64_t oldest_ancester_time, uint64_t file_creation_time,
uint64_t epoch_number, const std::string& file_checksum,
const std::string& file_checksum_func_name,
const UniqueId64x2& unique_id) {
const UniqueId64x2& unique_id,
const uint64_t compensated_range_deletion_size) {
assert(smallest_seqno <= largest_seqno);
new_files_.emplace_back(
level,
@ -442,7 +453,8 @@ class VersionEdit {
smallest_seqno, largest_seqno, marked_for_compaction,
temperature, oldest_blob_file_number, oldest_ancester_time,
file_creation_time, epoch_number, file_checksum,
file_checksum_func_name, unique_id));
file_checksum_func_name, unique_id,
compensated_range_deletion_size));
if (!HasLastSequence() || largest_seqno > GetLastSequence()) {
SetLastSequence(largest_seqno);
}

@ -45,7 +45,7 @@ TEST_F(VersionEditTest, EncodeDecode) {
kBig + 500 + i, kBig + 600 + i, false, Temperature::kUnknown,
kInvalidBlobFileNumber, 888, 678,
kBig + 300 + i /* epoch_number */, "234", "crc32c",
kNullUniqueId64x2);
kNullUniqueId64x2, 0);
edit.DeleteFile(4, kBig + 700 + i);
}
@ -65,24 +65,24 @@ TEST_F(VersionEditTest, EncodeDecodeNewFile4) {
kBig + 600, true, Temperature::kUnknown, kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
300 /* epoch_number */, kUnknownFileChecksum,
kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
edit.AddFile(4, 301, 3, 100, InternalKey("foo", kBig + 501, kTypeValue),
InternalKey("zoo", kBig + 601, kTypeDeletion), kBig + 501,
kBig + 601, false, Temperature::kUnknown, kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
301 /* epoch_number */, kUnknownFileChecksum,
kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
edit.AddFile(5, 302, 0, 100, InternalKey("foo", kBig + 502, kTypeValue),
InternalKey("zoo", kBig + 602, kTypeDeletion), kBig + 502,
kBig + 602, true, Temperature::kUnknown, kInvalidBlobFileNumber,
666, 888, 302 /* epoch_number */, kUnknownFileChecksum,
kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
edit.AddFile(5, 303, 0, 100, InternalKey("foo", kBig + 503, kTypeBlobIndex),
InternalKey("zoo", kBig + 603, kTypeBlobIndex), kBig + 503,
kBig + 603, true, Temperature::kUnknown, 1001,
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
303 /* epoch_number */, kUnknownFileChecksum,
kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
edit.DeleteFile(4, 700);
@ -123,12 +123,12 @@ TEST_F(VersionEditTest, ForwardCompatibleNewFile4) {
kBig + 600, true, Temperature::kUnknown, kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
300 /* epoch_number */, kUnknownFileChecksum,
kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
edit.AddFile(4, 301, 3, 100, InternalKey("foo", kBig + 501, kTypeValue),
InternalKey("zoo", kBig + 601, kTypeDeletion), kBig + 501,
kBig + 601, false, Temperature::kUnknown, kInvalidBlobFileNumber,
686, 868, 301 /* epoch_number */, "234", "crc32c",
kNullUniqueId64x2);
kNullUniqueId64x2, 0);
edit.DeleteFile(4, 700);
edit.SetComparatorName("foo");
@ -177,7 +177,7 @@ TEST_F(VersionEditTest, NewFile4NotSupportedField) {
kBig + 600, true, Temperature::kUnknown, kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
300 /* epoch_number */, kUnknownFileChecksum,
kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
edit.SetComparatorName("foo");
edit.SetLogNumber(kBig + 100);
@ -208,7 +208,7 @@ TEST_F(VersionEditTest, EncodeEmptyFile) {
Temperature::kUnknown, kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
1 /*epoch_number*/, kUnknownFileChecksum,
kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
std::string buffer;
ASSERT_TRUE(!edit.EncodeTo(&buffer));
}

@ -2960,7 +2960,7 @@ bool Version::MaybeInitializeFileMetaData(FileMetaData* file_meta) {
file_meta->num_deletions = tp->num_deletions;
file_meta->raw_value_size = tp->raw_value_size;
file_meta->raw_key_size = tp->raw_key_size;
file_meta->num_range_deletions = tp->num_range_deletions;
return true;
}
@ -3062,11 +3062,15 @@ void VersionStorageInfo::ComputeCompensatedSizes() {
// size of deletion entries in a stable workload, the deletion
// compensation logic might introduce unwanted effet which changes the
// shape of LSM tree.
if (file_meta->num_deletions * 2 >= file_meta->num_entries) {
if ((file_meta->num_deletions - file_meta->num_range_deletions) * 2 >=
file_meta->num_entries) {
file_meta->compensated_file_size +=
(file_meta->num_deletions * 2 - file_meta->num_entries) *
((file_meta->num_deletions - file_meta->num_range_deletions) * 2 -
file_meta->num_entries) *
average_value_size * kDeletionWeightOnCompaction;
}
file_meta->compensated_file_size +=
file_meta->compensated_range_deletion_size;
}
}
}
@ -6215,7 +6219,8 @@ Status VersionSet::WriteCurrentStateToManifest(
f->marked_for_compaction, f->temperature,
f->oldest_blob_file_number, f->oldest_ancester_time,
f->file_creation_time, f->epoch_number, f->file_checksum,
f->file_checksum_func_name, f->unique_id);
f->file_checksum_func_name, f->unique_id,
f->compensated_range_deletion_size);
}
}
@ -6293,8 +6298,9 @@ uint64_t VersionSet::ApproximateSize(const SizeApproximationOptions& options,
const int num_non_empty_levels = vstorage->num_non_empty_levels();
end_level = (end_level == -1) ? num_non_empty_levels
: std::min(end_level, num_non_empty_levels);
assert(start_level <= end_level);
if (end_level <= start_level) {
return 0;
}
// Outline of the optimization that uses options.files_size_error_margin.
// When approximating the files total size that is used to store a keys range,

@ -51,7 +51,7 @@ class GenerateLevelFilesBriefTest : public testing::Test {
largest_seq, /* marked_for_compact */ false, Temperature::kUnknown,
kInvalidBlobFileNumber, kUnknownOldestAncesterTime,
kUnknownFileCreationTime, kUnknownEpochNumber, kUnknownFileChecksum,
kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
files_.push_back(f);
}
@ -143,16 +143,19 @@ class VersionStorageInfoTestBase : public testing::Test {
void Add(int level, uint32_t file_number, const char* smallest,
const char* largest, uint64_t file_size = 0,
uint64_t oldest_blob_file_number = kInvalidBlobFileNumber) {
uint64_t oldest_blob_file_number = kInvalidBlobFileNumber,
uint64_t compensated_range_deletion_size = 0) {
constexpr SequenceNumber dummy_seq = 0;
Add(level, file_number, GetInternalKey(smallest, dummy_seq),
GetInternalKey(largest, dummy_seq), file_size, oldest_blob_file_number);
GetInternalKey(largest, dummy_seq), file_size, oldest_blob_file_number,
compensated_range_deletion_size);
}
void Add(int level, uint32_t file_number, const InternalKey& smallest,
const InternalKey& largest, uint64_t file_size = 0,
uint64_t oldest_blob_file_number = kInvalidBlobFileNumber) {
uint64_t oldest_blob_file_number = kInvalidBlobFileNumber,
uint64_t compensated_range_deletion_size = 0) {
assert(level < vstorage_.num_levels());
FileMetaData* f = new FileMetaData(
file_number, 0, file_size, smallest, largest, /* smallest_seq */ 0,
@ -160,8 +163,7 @@ class VersionStorageInfoTestBase : public testing::Test {
Temperature::kUnknown, oldest_blob_file_number,
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
kUnknownEpochNumber, kUnknownFileChecksum, kUnknownFileChecksumFuncName,
kNullUniqueId64x2);
f->compensated_file_size = file_size;
kNullUniqueId64x2, compensated_range_deletion_size);
vstorage_.AddFile(level, f);
}
@ -2136,6 +2138,17 @@ TEST_F(VersionSetTest, AtomicGroupWithWalEdits) {
}
}
TEST_F(VersionStorageInfoTest, AddRangeDeletionCompensatedFileSize) {
// Tests that compensated range deletion size is added to compensated file
// size.
Add(4, 100U, "1", "2", 100U, kInvalidBlobFileNumber, 1000U);
UpdateVersionStorageInfo();
auto meta = vstorage_.GetFileMetaDataByNumber(100U);
ASSERT_EQ(meta->compensated_file_size, 100U + 1000U);
}
class VersionSetWithTimestampTest : public VersionSetTest {
public:
static const std::string kNewCfName;
@ -3242,7 +3255,8 @@ class VersionSetTestMissingFiles : public VersionSetTestBase,
file_metas->emplace_back(file_num, /*file_path_id=*/0, file_size, ikey,
ikey, 0, 0, false, Temperature::kUnknown, 0, 0,
0, info.epoch_number, kUnknownFileChecksum,
kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksumFuncName, kNullUniqueId64x2,
0);
}
}
@ -3299,7 +3313,7 @@ TEST_F(VersionSetTestMissingFiles, ManifestFarBehindSst) {
file_num, /*file_path_id=*/0, /*file_size=*/12, smallest_ikey,
largest_ikey, 0, 0, false, Temperature::kUnknown, 0, 0, 0,
file_num /* epoch_number */, kUnknownFileChecksum,
kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
added_files.emplace_back(0, meta);
}
WriteFileAdditionAndDeletionToManifest(
@ -3360,7 +3374,7 @@ TEST_F(VersionSetTestMissingFiles, ManifestAheadofSst) {
file_num, /*file_path_id=*/0, /*file_size=*/12, smallest_ikey,
largest_ikey, 0, 0, false, Temperature::kUnknown, 0, 0, 0,
file_num /* epoch_number */, kUnknownFileChecksum,
kUnknownFileChecksumFuncName, kNullUniqueId64x2);
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
added_files.emplace_back(0, meta);
}
WriteFileAdditionAndDeletionToManifest(

Loading…
Cancel
Save