From 7521478b4397ef08a2c790335eff6a042bfc087e Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Wed, 21 Jun 2023 21:49:01 -0700 Subject: [PATCH] Record the `persist_user_defined_timestamps` flag in manifest (#11515) Summary: Start to record the value of the flag `AdvancedColumnFamilyOptions.persist_user_defined_timestamps` in the Manifest and table properties for a SST file when it is created. And use the recorded flag when creating a table reader for the SST file. This flag's default value is true, it is only explicitly recorded if it's false. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11515 Test Plan: ``` make all check ./version_edit_test ``` Reviewed By: ltamasi Differential Revision: D46920386 Pulled By: jowlyzhang fbshipit-source-id: 075c20363d3d2cc1368422ecc805617ed135cc26 --- db/builder.cc | 2 + db/compaction/compaction_job_test.cc | 3 +- db/compaction/compaction_outputs.cc | 2 + db/compaction/compaction_picker_test.cc | 3 +- db/db_impl/db_impl_compaction_flush.cc | 6 +- db/db_impl/db_impl_experimental.cc | 3 +- db/db_impl/db_impl_open.cc | 19 +- db/experimental.cc | 3 +- db/external_sst_file_ingestion_job.cc | 7 +- db/flush_job.cc | 2 +- db/import_column_family_job.cc | 7 +- db/repair.cc | 5 +- db/table_cache.cc | 3 +- db/version_builder_test.cc | 301 +++++++++--------- db/version_edit.cc | 20 +- db/version_edit.h | 16 +- db/version_edit_test.cc | 24 +- db/version_set.cc | 3 +- db/version_set_test.cc | 22 +- include/rocksdb/table_properties.h | 6 + .../block_based/block_based_table_builder.cc | 2 + table/meta_blocks.cc | 6 + table/sst_file_dumper.cc | 8 +- table/table_properties.cc | 2 + .../persist_udt_flag_in_manifest.md | 1 + 25 files changed, 284 insertions(+), 192 deletions(-) create mode 100644 unreleased_history/new_features/persist_udt_flag_in_manifest.md diff --git a/db/builder.cc b/db/builder.cc index a99bb57e8..9c7402a31 100644 --- a/db/builder.cc +++ b/db/builder.cc @@ -293,6 +293,8 @@ Status BuildTable( meta->fd.file_size = file_size; meta->tail_size = builder->GetTailSize(); meta->marked_for_compaction = builder->NeedCompact(); + meta->user_defined_timestamps_persisted = + ioptions.persist_user_defined_timestamps; assert(meta->fd.GetFileSize() > 0); tp = builder ->GetTableProperties(); // refresh now that builder is finished diff --git a/db/compaction/compaction_job_test.cc b/db/compaction/compaction_job_test.cc index a7cf65f01..f7fc28c15 100644 --- a/db/compaction/compaction_job_test.cc +++ b/db/compaction/compaction_job_test.cc @@ -386,7 +386,8 @@ class CompactionJobTestBase : public testing::Test { kUnknownFileCreationTime, versions_->GetColumnFamilySet()->GetDefault()->NewEpochNumber(), kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, - 0, 0); + /*compensated_range_deletion_size=*/0, /*tail_size=*/0, + /*user_defined_timestamps_persisted=*/true); mutex_.Lock(); EXPECT_OK(versions_->LogAndApply( diff --git a/db/compaction/compaction_outputs.cc b/db/compaction/compaction_outputs.cc index 81323aa3b..0e078ed83 100644 --- a/db/compaction/compaction_outputs.cc +++ b/db/compaction/compaction_outputs.cc @@ -45,6 +45,8 @@ Status CompactionOutputs::Finish(const Status& intput_status, meta->fd.file_size = current_bytes; meta->tail_size = builder_->GetTailSize(); meta->marked_for_compaction = builder_->NeedCompact(); + meta->user_defined_timestamps_persisted = static_cast( + builder_->GetTableProperties().user_defined_timestamps_persisted); } current_output().finished = true; stats_.bytes_written += current_bytes; diff --git a/db/compaction/compaction_picker_test.cc b/db/compaction/compaction_picker_test.cc index 6b04f4295..fd14322b2 100644 --- a/db/compaction/compaction_picker_test.cc +++ b/db/compaction/compaction_picker_test.cc @@ -153,7 +153,8 @@ class CompactionPickerTestBase : public testing::Test { smallest_seq, largest_seq, marked_for_compact, temperature, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, epoch_number, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); + kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0, + true /* user_defined_timestamps_persisted */); f->compensated_file_size = (compensated_file_size != 0) ? compensated_file_size : file_size; f->oldest_ancester_time = oldest_ancestor_time; diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index 2ace6159b..8274dd587 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -1777,7 +1777,8 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) { f->marked_for_compaction, f->temperature, f->oldest_blob_file_number, f->oldest_ancester_time, f->file_creation_time, f->epoch_number, f->file_checksum, f->file_checksum_func_name, f->unique_id, - f->compensated_range_deletion_size, f->tail_size); + f->compensated_range_deletion_size, f->tail_size, + f->user_defined_timestamps_persisted); } ROCKS_LOG_DEBUG(immutable_db_options_.info_log, "[%s] Apply version edit:\n%s", cfd->GetName().c_str(), @@ -3510,7 +3511,8 @@ Status DBImpl::BackgroundCompaction(bool* made_progress, f->oldest_blob_file_number, f->oldest_ancester_time, f->file_creation_time, f->epoch_number, f->file_checksum, f->file_checksum_func_name, f->unique_id, - f->compensated_range_deletion_size, f->tail_size); + f->compensated_range_deletion_size, f->tail_size, + f->user_defined_timestamps_persisted); ROCKS_LOG_BUFFER( log_buffer, diff --git a/db/db_impl/db_impl_experimental.cc b/db/db_impl/db_impl_experimental.cc index 8d958ffc1..442cb4767 100644 --- a/db/db_impl/db_impl_experimental.cc +++ b/db/db_impl/db_impl_experimental.cc @@ -138,7 +138,8 @@ Status DBImpl::PromoteL0(ColumnFamilyHandle* column_family, int target_level) { f->oldest_blob_file_number, f->oldest_ancester_time, f->file_creation_time, f->epoch_number, f->file_checksum, f->file_checksum_func_name, f->unique_id, - f->compensated_range_deletion_size, f->tail_size); + f->compensated_range_deletion_size, f->tail_size, + f->user_defined_timestamps_persisted); } status = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(), diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index 2e0c91aba..a7d06724b 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -618,7 +618,7 @@ Status DBImpl::Recover( f->file_creation_time, f->epoch_number, f->file_checksum, f->file_checksum_func_name, f->unique_id, f->compensated_range_deletion_size, - f->tail_size); + f->tail_size, f->user_defined_timestamps_persisted); ROCKS_LOG_WARN(immutable_db_options_.info_log, "[%s] Moving #%" PRIu64 " from from_level-%d to from_level-%d %" PRIu64 @@ -1689,14 +1689,15 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd, constexpr int level = 0; if (s.ok() && has_output) { - edit->AddFile( - level, meta.fd.GetNumber(), meta.fd.GetPathId(), meta.fd.GetFileSize(), - meta.smallest, meta.largest, meta.fd.smallest_seqno, - meta.fd.largest_seqno, meta.marked_for_compaction, meta.temperature, - meta.oldest_blob_file_number, meta.oldest_ancester_time, - meta.file_creation_time, meta.epoch_number, meta.file_checksum, - meta.file_checksum_func_name, meta.unique_id, - meta.compensated_range_deletion_size, meta.tail_size); + edit->AddFile(level, meta.fd.GetNumber(), meta.fd.GetPathId(), + meta.fd.GetFileSize(), meta.smallest, meta.largest, + meta.fd.smallest_seqno, meta.fd.largest_seqno, + meta.marked_for_compaction, meta.temperature, + meta.oldest_blob_file_number, meta.oldest_ancester_time, + meta.file_creation_time, meta.epoch_number, + meta.file_checksum, meta.file_checksum_func_name, + meta.unique_id, meta.compensated_range_deletion_size, + meta.tail_size, meta.user_defined_timestamps_persisted); for (const auto& blob : blob_file_additions) { edit->AddBlobFile(blob); diff --git a/db/experimental.cc b/db/experimental.cc index cb5fb3179..f6f920b2c 100644 --- a/db/experimental.cc +++ b/db/experimental.cc @@ -102,7 +102,8 @@ Status UpdateManifestForFilesState( lf->oldest_blob_file_number, lf->oldest_ancester_time, lf->file_creation_time, lf->epoch_number, lf->file_checksum, lf->file_checksum_func_name, lf->unique_id, - lf->compensated_range_deletion_size, lf->tail_size); + lf->compensated_range_deletion_size, lf->tail_size, + lf->user_defined_timestamps_persisted); } } } else { diff --git a/db/external_sst_file_ingestion_job.cc b/db/external_sst_file_ingestion_job.cc index 2f8c9779a..c665ed83d 100644 --- a/db/external_sst_file_ingestion_job.cc +++ b/db/external_sst_file_ingestion_job.cc @@ -482,7 +482,9 @@ Status ExternalSstFileIngestionJob::Run() { ingestion_options_.ingest_behind ? kReservedEpochNumberForFileIngestedBehind : cfd_->NewEpochNumber(), - f.file_checksum, f.file_checksum_func_name, f.unique_id, 0, tail_size); + f.file_checksum, f.file_checksum_func_name, f.unique_id, 0, tail_size, + static_cast( + f.table_properties.user_defined_timestamps_persisted)); f_metadata.temperature = f.file_temperature; edit_.AddFile(f.picked_level, f_metadata); } @@ -684,6 +686,9 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo( sst_file_reader.reset(new RandomAccessFileReader( std::move(sst_file), external_file, nullptr /*Env*/, io_tracer_)); + // TODO(yuzhangyu): User-defined timestamps doesn't support external sst file + // ingestion. Pass in the correct `user_defined_timestamps_persisted` flag + // for creating `TableReaderOptions` when the support is there. status = cfd_->ioptions()->table_factory->NewTableReader( TableReaderOptions( *cfd_->ioptions(), sv->mutable_cf_options.prefix_extractor, diff --git a/db/flush_job.cc b/db/flush_job.cc index 4c6b329dc..3854e967a 100644 --- a/db/flush_job.cc +++ b/db/flush_job.cc @@ -1011,7 +1011,7 @@ Status FlushJob::WriteLevel0Table() { meta_.file_creation_time, meta_.epoch_number, meta_.file_checksum, meta_.file_checksum_func_name, meta_.unique_id, meta_.compensated_range_deletion_size, - meta_.tail_size); + meta_.tail_size, meta_.user_defined_timestamps_persisted); edit_->SetBlobFileAdditions(std::move(blob_file_additions)); } // Piggyback FlushJobInfo on the first first flushed memtable. diff --git a/db/import_column_family_job.cc b/db/import_column_family_job.cc index c6c9099bc..11c5fd41b 100644 --- a/db/import_column_family_job.cc +++ b/db/import_column_family_job.cc @@ -211,7 +211,9 @@ Status ImportColumnFamilyJob::Run() { file_metadata.temperature, kInvalidBlobFileNumber, oldest_ancester_time, current_time, file_metadata.epoch_number, kUnknownFileChecksum, kUnknownFileChecksumFuncName, f.unique_id, 0, - tail_size); + tail_size, + static_cast( + f.table_properties.user_defined_timestamps_persisted)); s = dummy_version_builder.Apply(&dummy_version_edit); } } @@ -318,6 +320,9 @@ Status ImportColumnFamilyJob::GetIngestedFileInfo( sst_file_reader.reset(new RandomAccessFileReader( std::move(sst_file), external_file, nullptr /*Env*/, io_tracer_)); + // TODO(yuzhangyu): User-defined timestamps doesn't support importing column + // family. Pass in the correct `user_defined_timestamps_persisted` flag for + // creating `TableReaderOptions` when the support is there. status = cfd_->ioptions()->table_factory->NewTableReader( TableReaderOptions( *cfd_->ioptions(), sv->mutable_cf_options.prefix_extractor, diff --git a/db/repair.cc b/db/repair.cc index 8e0b5cf41..58ada3aeb 100644 --- a/db/repair.cc +++ b/db/repair.cc @@ -560,6 +560,8 @@ class Repairer { AddColumnFamily(props->column_family_name, t->column_family_id); } t->meta.oldest_ancester_time = props->creation_time; + t->meta.user_defined_timestamps_persisted = + static_cast(props->user_defined_timestamps_persisted); } if (status.ok()) { uint64_t tail_size = 0; @@ -703,7 +705,8 @@ class Repairer { table->meta.oldest_ancester_time, table->meta.file_creation_time, table->meta.epoch_number, table->meta.file_checksum, table->meta.file_checksum_func_name, table->meta.unique_id, - table->meta.compensated_range_deletion_size, table->meta.tail_size); + table->meta.compensated_range_deletion_size, table->meta.tail_size, + table->meta.user_defined_timestamps_persisted); } s = dummy_version_builder.Apply(&dummy_edit); if (s.ok()) { diff --git a/db/table_cache.cc b/db/table_cache.cc index 9f41e8555..0bbec70e1 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -146,7 +146,8 @@ Status TableCache::GetTableReader( false /* force_direct_prefetch */, level, block_cache_tracer_, max_file_size_for_l0_meta_pin, db_session_id_, file_meta.fd.GetNumber(), expected_unique_id, - file_meta.fd.largest_seqno, file_meta.tail_size), + file_meta.fd.largest_seqno, file_meta.tail_size, + file_meta.user_defined_timestamps_persisted), std::move(file_reader), file_meta.fd.GetFileSize(), table_reader, prefetch_index_and_filter_in_cache); TEST_SYNC_POINT("TableCache::GetTableReader:0"); diff --git a/db/version_builder_test.cc b/db/version_builder_test.cc index af41f00f6..ac80be7ca 100644 --- a/db/version_builder_test.cc +++ b/db/version_builder_test.cc @@ -73,7 +73,8 @@ class VersionBuilderTest : public testing::Test { /* marked_for_compact */ false, Temperature::kUnknown, oldest_blob_file_number, kUnknownOldestAncesterTime, kUnknownFileCreationTime, epoch_number, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); + kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0, + /* user_defined_timestamps_persisted */ true); f->compensated_file_size = file_size; f->num_entries = num_entries; f->num_deletions = num_deletions; @@ -130,13 +131,13 @@ class VersionBuilderTest : public testing::Test { constexpr SequenceNumber largest_seqno = 300; constexpr bool marked_for_compaction = false; - edit->AddFile(level, table_file_number, path_id, file_size, - GetInternalKey(smallest), GetInternalKey(largest), - smallest_seqno, largest_seqno, marked_for_compaction, - Temperature::kUnknown, blob_file_number, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - epoch_number, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); + edit->AddFile( + level, table_file_number, path_id, file_size, GetInternalKey(smallest), + GetInternalKey(largest), smallest_seqno, largest_seqno, + marked_for_compaction, Temperature::kUnknown, blob_file_number, + kUnknownOldestAncesterTime, kUnknownFileCreationTime, epoch_number, + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, + 0, 0, /* user_defined_timestamps_persisted */ true); } void UpdateVersionStorageInfo(VersionStorageInfo* vstorage) { @@ -183,12 +184,12 @@ TEST_F(VersionBuilderTest, ApplyAndSaveTo) { UpdateVersionStorageInfo(); VersionEdit version_edit; - version_edit.AddFile(2, 666, 0, 100U, GetInternalKey("301"), - GetInternalKey("350"), 200, 200, false, - Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - kUnknownEpochNumber, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); + version_edit.AddFile( + 2, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200, + false, Temperature::kUnknown, kInvalidBlobFileNumber, + kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, + 0, /* user_defined_timestamps_persisted */ true); version_edit.DeleteFile(3, 27U); EnvOptions env_options; @@ -231,12 +232,12 @@ TEST_F(VersionBuilderTest, ApplyAndSaveToDynamic) { UpdateVersionStorageInfo(); VersionEdit version_edit; - version_edit.AddFile(3, 666, 0, 100U, GetInternalKey("301"), - GetInternalKey("350"), 200, 200, false, - Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - kUnknownEpochNumber, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); + version_edit.AddFile( + 3, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200, + false, Temperature::kUnknown, kInvalidBlobFileNumber, + kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, + 0, /* user_defined_timestamps_persisted */ true); version_edit.DeleteFile(0, 1U); version_edit.DeleteFile(0, 88U); @@ -283,12 +284,12 @@ TEST_F(VersionBuilderTest, ApplyAndSaveToDynamic2) { UpdateVersionStorageInfo(); VersionEdit version_edit; - version_edit.AddFile(4, 666, 0, 100U, GetInternalKey("301"), - GetInternalKey("350"), 200, 200, false, - Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - kUnknownEpochNumber, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); + version_edit.AddFile( + 4, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200, + false, Temperature::kUnknown, kInvalidBlobFileNumber, + kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, + 0, /* user_defined_timestamps_persisted */ true); version_edit.DeleteFile(0, 1U); version_edit.DeleteFile(0, 88U); version_edit.DeleteFile(4, 6U); @@ -320,36 +321,36 @@ TEST_F(VersionBuilderTest, ApplyMultipleAndSaveTo) { UpdateVersionStorageInfo(); VersionEdit version_edit; - version_edit.AddFile(2, 666, 0, 100U, GetInternalKey("301"), - GetInternalKey("350"), 200, 200, false, - Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - kUnknownEpochNumber, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); - version_edit.AddFile(2, 676, 0, 100U, GetInternalKey("401"), - GetInternalKey("450"), 200, 200, false, - Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - kUnknownEpochNumber, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); - version_edit.AddFile(2, 636, 0, 100U, GetInternalKey("601"), - GetInternalKey("650"), 200, 200, false, - Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - kUnknownEpochNumber, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); - version_edit.AddFile(2, 616, 0, 100U, GetInternalKey("501"), - GetInternalKey("550"), 200, 200, false, - Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - kUnknownEpochNumber, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); - version_edit.AddFile(2, 606, 0, 100U, GetInternalKey("701"), - GetInternalKey("750"), 200, 200, false, - Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - kUnknownEpochNumber, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); + version_edit.AddFile( + 2, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200, + false, Temperature::kUnknown, kInvalidBlobFileNumber, + kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, + 0, /* user_defined_timestamps_persisted */ true); + version_edit.AddFile( + 2, 676, 0, 100U, GetInternalKey("401"), GetInternalKey("450"), 200, 200, + false, Temperature::kUnknown, kInvalidBlobFileNumber, + kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, + 0, /* user_defined_timestamps_persisted */ true); + version_edit.AddFile( + 2, 636, 0, 100U, GetInternalKey("601"), GetInternalKey("650"), 200, 200, + false, Temperature::kUnknown, kInvalidBlobFileNumber, + kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, + 0, /* user_defined_timestamps_persisted */ true); + version_edit.AddFile( + 2, 616, 0, 100U, GetInternalKey("501"), GetInternalKey("550"), 200, 200, + false, Temperature::kUnknown, kInvalidBlobFileNumber, + kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, + 0, /* user_defined_timestamps_persisted */ true); + version_edit.AddFile( + 2, 606, 0, 100U, GetInternalKey("701"), GetInternalKey("750"), 200, 200, + false, Temperature::kUnknown, kInvalidBlobFileNumber, + kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, + 0, /* user_defined_timestamps_persisted */ true); EnvOptions env_options; constexpr TableCache* table_cache = nullptr; @@ -384,53 +385,53 @@ TEST_F(VersionBuilderTest, ApplyDeleteAndSaveTo) { kCompactionStyleLevel, nullptr, false); VersionEdit version_edit; - version_edit.AddFile(2, 666, 0, 100U, GetInternalKey("301"), - GetInternalKey("350"), 200, 200, false, - Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - kUnknownEpochNumber, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); - version_edit.AddFile(2, 676, 0, 100U, GetInternalKey("401"), - GetInternalKey("450"), 200, 200, false, - Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - kUnknownEpochNumber, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); - version_edit.AddFile(2, 636, 0, 100U, GetInternalKey("601"), - GetInternalKey("650"), 200, 200, false, - Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - kUnknownEpochNumber, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); - version_edit.AddFile(2, 616, 0, 100U, GetInternalKey("501"), - GetInternalKey("550"), 200, 200, false, - Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - kUnknownEpochNumber, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); - version_edit.AddFile(2, 606, 0, 100U, GetInternalKey("701"), - GetInternalKey("750"), 200, 200, false, - Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - kUnknownEpochNumber, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); + version_edit.AddFile( + 2, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200, + false, Temperature::kUnknown, kInvalidBlobFileNumber, + kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, + 0, /* user_defined_timestamps_persisted */ true); + version_edit.AddFile( + 2, 676, 0, 100U, GetInternalKey("401"), GetInternalKey("450"), 200, 200, + false, Temperature::kUnknown, kInvalidBlobFileNumber, + kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, + 0, /* user_defined_timestamps_persisted */ true); + version_edit.AddFile( + 2, 636, 0, 100U, GetInternalKey("601"), GetInternalKey("650"), 200, 200, + false, Temperature::kUnknown, kInvalidBlobFileNumber, + kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, + 0, /* user_defined_timestamps_persisted */ true); + version_edit.AddFile( + 2, 616, 0, 100U, GetInternalKey("501"), GetInternalKey("550"), 200, 200, + false, Temperature::kUnknown, kInvalidBlobFileNumber, + kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, + 0, /* user_defined_timestamps_persisted */ true); + version_edit.AddFile( + 2, 606, 0, 100U, GetInternalKey("701"), GetInternalKey("750"), 200, 200, + false, Temperature::kUnknown, kInvalidBlobFileNumber, + kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, + 0, /* user_defined_timestamps_persisted */ true); ASSERT_OK(version_builder.Apply(&version_edit)); VersionEdit version_edit2; - version_edit.AddFile(2, 808, 0, 100U, GetInternalKey("901"), - GetInternalKey("950"), 200, 200, false, - Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - kUnknownEpochNumber, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); + version_edit.AddFile( + 2, 808, 0, 100U, GetInternalKey("901"), GetInternalKey("950"), 200, 200, + false, Temperature::kUnknown, kInvalidBlobFileNumber, + kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, + 0, /* user_defined_timestamps_persisted */ true); version_edit2.DeleteFile(2, 616); version_edit2.DeleteFile(2, 636); - version_edit.AddFile(2, 806, 0, 100U, GetInternalKey("801"), - GetInternalKey("850"), 200, 200, false, - Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - kUnknownEpochNumber, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); + version_edit.AddFile( + 2, 806, 0, 100U, GetInternalKey("801"), GetInternalKey("850"), 200, 200, + false, Temperature::kUnknown, kInvalidBlobFileNumber, + kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, + 0, /* user_defined_timestamps_persisted */ true); ASSERT_OK(version_builder.Apply(&version_edit2)); ASSERT_OK(version_builder.SaveTo(&new_vstorage)); @@ -535,14 +536,14 @@ TEST_F(VersionBuilderTest, ApplyFileDeletionAndAddition) { constexpr bool marked_for_compaction = false; - addition.AddFile(level, file_number, path_id, file_size, - GetInternalKey(smallest, smallest_seq), - GetInternalKey(largest, largest_seq), smallest_seqno, - largest_seqno, marked_for_compaction, Temperature::kUnknown, - kInvalidBlobFileNumber, kUnknownOldestAncesterTime, - kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, - kNullUniqueId64x2, 0, 0); + addition.AddFile( + level, file_number, path_id, file_size, + GetInternalKey(smallest, smallest_seq), + GetInternalKey(largest, largest_seq), smallest_seqno, largest_seqno, + marked_for_compaction, Temperature::kUnknown, kInvalidBlobFileNumber, + kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, + 0, /* user_defined_timestamps_persisted */ true); ASSERT_OK(builder.Apply(&addition)); @@ -586,13 +587,13 @@ TEST_F(VersionBuilderTest, ApplyFileAdditionAlreadyInBase) { constexpr SequenceNumber largest_seqno = 1000; constexpr bool marked_for_compaction = false; - edit.AddFile(new_level, file_number, path_id, file_size, - GetInternalKey(smallest), GetInternalKey(largest), - smallest_seqno, largest_seqno, marked_for_compaction, - Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - kUnknownEpochNumber, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); + edit.AddFile( + new_level, file_number, path_id, file_size, GetInternalKey(smallest), + GetInternalKey(largest), smallest_seqno, largest_seqno, + marked_for_compaction, Temperature::kUnknown, kInvalidBlobFileNumber, + kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, + 0, /* user_defined_timestamps_persisted */ true); const Status s = builder.Apply(&edit); ASSERT_TRUE(s.IsCorruption()); @@ -623,13 +624,13 @@ TEST_F(VersionBuilderTest, ApplyFileAdditionAlreadyApplied) { constexpr SequenceNumber largest_seqno = 1000; constexpr bool marked_for_compaction = false; - edit.AddFile(level, file_number, path_id, file_size, GetInternalKey(smallest), - GetInternalKey(largest), smallest_seqno, largest_seqno, - marked_for_compaction, Temperature::kUnknown, - kInvalidBlobFileNumber, kUnknownOldestAncesterTime, - kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, - kNullUniqueId64x2, 0, 0); + edit.AddFile( + level, file_number, path_id, file_size, GetInternalKey(smallest), + GetInternalKey(largest), smallest_seqno, largest_seqno, + marked_for_compaction, Temperature::kUnknown, kInvalidBlobFileNumber, + kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, + 0, /* user_defined_timestamps_persisted */ true); ASSERT_OK(builder.Apply(&edit)); @@ -637,13 +638,13 @@ TEST_F(VersionBuilderTest, ApplyFileAdditionAlreadyApplied) { constexpr int new_level = 2; - other_edit.AddFile(new_level, file_number, path_id, file_size, - GetInternalKey(smallest), GetInternalKey(largest), - smallest_seqno, largest_seqno, marked_for_compaction, - Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - kUnknownEpochNumber, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); + other_edit.AddFile( + new_level, file_number, path_id, file_size, GetInternalKey(smallest), + GetInternalKey(largest), smallest_seqno, largest_seqno, + marked_for_compaction, Temperature::kUnknown, kInvalidBlobFileNumber, + kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, + 0, /* user_defined_timestamps_persisted */ true); const Status s = builder.Apply(&other_edit); ASSERT_TRUE(s.IsCorruption()); @@ -674,13 +675,13 @@ TEST_F(VersionBuilderTest, ApplyFileAdditionAndDeletion) { VersionEdit addition; - addition.AddFile(level, file_number, path_id, file_size, - GetInternalKey(smallest), GetInternalKey(largest), - smallest_seqno, largest_seqno, marked_for_compaction, - Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - kUnknownEpochNumber, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); + addition.AddFile( + level, file_number, path_id, file_size, GetInternalKey(smallest), + GetInternalKey(largest), smallest_seqno, largest_seqno, + marked_for_compaction, Temperature::kUnknown, kInvalidBlobFileNumber, + kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, + 0, /* user_defined_timestamps_persisted */ true); ASSERT_OK(builder.Apply(&addition)); @@ -1253,7 +1254,8 @@ TEST_F(VersionBuilderTest, SaveBlobFilesToConcurrentJobs) { GetInternalKey(largest), smallest_seqno, largest_seqno, marked_for_compaction, Temperature::kUnknown, blob_file_number, kUnknownOldestAncesterTime, kUnknownFileCreationTime, 2 /*epoch_number*/, - checksum_value, checksum_method, kNullUniqueId64x2, 0, 0); + checksum_value, checksum_method, kNullUniqueId64x2, 0, 0, + /* user_defined_timestamps_persisted */ true); edit.AddBlobFile(blob_file_number, total_blob_count, total_blob_bytes, checksum_method, checksum_value); @@ -1341,7 +1343,8 @@ TEST_F(VersionBuilderTest, CheckConsistencyForBlobFiles) { /* oldest_blob_file_number */ 16, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, kUnknownFileChecksum, kUnknownFileChecksumFuncName, - kNullUniqueId64x2, 0, 0); + kNullUniqueId64x2, 0, 0, + /* user_defined_timestamps_persisted */ true); edit.AddFile(/* level */ 1, /* file_number */ 700, /* path_id */ 0, /* file_size */ 100, /* smallest */ GetInternalKey("801"), @@ -1351,7 +1354,8 @@ TEST_F(VersionBuilderTest, CheckConsistencyForBlobFiles) { /* oldest_blob_file_number */ 1000, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, kUnknownFileChecksum, kUnknownFileChecksumFuncName, - kNullUniqueId64x2, 0, 0); + kNullUniqueId64x2, 0, 0, + /* user_defined_timestamps_persisted */ true); edit.AddBlobFile(/* blob_file_number */ 1000, /* total_blob_count */ 2000, /* total_blob_bytes */ 200000, /* checksum_method */ std::string(), @@ -1572,7 +1576,8 @@ TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) { Temperature::kUnknown, /* oldest_blob_file_number */ 1, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); + kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0, + /* user_defined_timestamps_persisted */ true); // Add an SST that does not reference any blob files. edit.AddFile( @@ -1582,7 +1587,8 @@ TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) { /* largest_seqno */ 2200, /* marked_for_compaction */ false, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); + kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0, + /* user_defined_timestamps_persisted */ true); // Delete a file that references a blob file. edit.DeleteFile(/* level */ 1, /* file_number */ 6); @@ -1605,7 +1611,8 @@ TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) { /* oldest_blob_file_number */ 3, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, kUnknownFileChecksum, kUnknownFileChecksumFuncName, - kNullUniqueId64x2, 0, 0); + kNullUniqueId64x2, 0, 0, + /* user_defined_timestamps_persisted */ true); // Trivially move a file that does not reference any blob files. edit.DeleteFile(/* level */ 1, /* file_number */ 13); @@ -1617,7 +1624,8 @@ TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) { Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); + kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0, + /* user_defined_timestamps_persisted */ true); // Add one more SST file that references a blob file, then promptly // delete it in a second version edit before the new version gets saved. @@ -1631,7 +1639,8 @@ TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) { /* oldest_blob_file_number */ 5, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, kUnknownFileChecksum, kUnknownFileChecksumFuncName, - kNullUniqueId64x2, 0, 0); + kNullUniqueId64x2, 0, 0, + /* user_defined_timestamps_persisted */ true); VersionEdit edit2; @@ -1732,7 +1741,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForL0FilesSortedByEpochNumber) { /* oldest_blob_file_number */ kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, 1 /* epoch_number */, kUnknownFileChecksum, kUnknownFileChecksumFuncName, - kNullUniqueId64x2, 0, 0); + kNullUniqueId64x2, 0, 0, /* user_defined_timestamps_persisted */ true); version_edit_1.AddFile( /* level */ 0, /* file_number */ 2U, /* path_id */ 0, /* file_size */ 100, /* smallest */ GetInternalKey("b", 2), @@ -1742,7 +1751,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForL0FilesSortedByEpochNumber) { /* oldest_blob_file_number */ kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, 1 /* epoch_number */, kUnknownFileChecksum, kUnknownFileChecksumFuncName, - kNullUniqueId64x2, 0, 0); + kNullUniqueId64x2, 0, 0, /* user_defined_timestamps_persisted */ true); VersionBuilder version_builder_1(EnvOptions(), &ioptions_, nullptr /* table_cache */, &vstorage_, @@ -1769,7 +1778,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForL0FilesSortedByEpochNumber) { /* oldest_blob_file_number */ kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, 1 /* epoch_number */, kUnknownFileChecksum, kUnknownFileChecksumFuncName, - kNullUniqueId64x2, 0, 0); + kNullUniqueId64x2, 0, 0, /* user_defined_timestamps_persisted */ true); version_edit_2.AddFile( /* level */ 0, /* file_number */ 2U, /* path_id */ 0, /* file_size */ 100, /* smallest */ GetInternalKey("b", 2), @@ -1779,7 +1788,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForL0FilesSortedByEpochNumber) { /* oldest_blob_file_number */ kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, 2 /* epoch_number */, kUnknownFileChecksum, kUnknownFileChecksumFuncName, - kNullUniqueId64x2, 0, 0); + kNullUniqueId64x2, 0, 0, /* user_defined_timestamps_persisted */ true); VersionBuilder version_builder_2(EnvOptions(), &ioptions_, nullptr /* table_cache */, &vstorage_, diff --git a/db/version_edit.cc b/db/version_edit.cc index 4f1ae80d2..a9ef6aced 100644 --- a/db/version_edit.cc +++ b/db/version_edit.cc @@ -244,6 +244,14 @@ bool VersionEdit::EncodeTo(std::string* dst) const { PutVarint64(&varint_tail_size, f.tail_size); PutLengthPrefixedSlice(dst, Slice(varint_tail_size)); } + if (!f.user_defined_timestamps_persisted) { + // The default value for the flag is true, it's only explicitly persisted + // when it's false. We are putting 0 as the value here to signal false + // (i.e. UDTS not persisted). + PutVarint32(dst, NewFileCustomTag::kUserDefinedTimestampsPersisted); + char p = static_cast(0); + PutLengthPrefixedSlice(dst, Slice(&p, 1)); + } TEST_SYNC_POINT_CALLBACK("VersionEdit::EncodeTo:NewFile4:CustomizeFields", dst); @@ -427,6 +435,12 @@ const char* VersionEdit::DecodeNewFile4From(Slice* input) { return "invalid tail start offset"; } break; + case kUserDefinedTimestampsPersisted: + if (field.size() != 1) { + return "user-defined timestamps persisted field wrong size"; + } + f.user_defined_timestamps_persisted = (field[0] == 1); + break; default: if ((custom_tag & kCustomTagNonSafeIgnoreMask) != 0) { // Should not proceed if cannot understand it @@ -862,8 +876,10 @@ std::string VersionEdit::DebugString(bool hex_key) const { InternalUniqueIdToExternal(&id); r.append(UniqueIdToHumanString(EncodeUniqueIdBytes(&id))); } - r.append(" tail size:"); + r.append(" tail size: "); AppendNumberTo(&r, f.tail_size); + r.append(" User-defined timestamps persisted: "); + r.append(f.user_defined_timestamps_persisted ? "true" : "false"); } for (const auto& blob_file_addition : blob_file_additions_) { @@ -980,6 +996,8 @@ std::string VersionEdit::DebugJSON(int edit_num, bool hex_key) const { jw << "Temperature" << static_cast(f.temperature); } jw << "TailSize" << f.tail_size; + jw << "UserDefinedTimestampsPersisted" + << f.user_defined_timestamps_persisted; jw.EndArrayedObject(); } diff --git a/db/version_edit.h b/db/version_edit.h index 07e8f3774..01c013b6c 100644 --- a/db/version_edit.h +++ b/db/version_edit.h @@ -91,6 +91,7 @@ enum NewFileCustomTag : uint32_t { kEpochNumber = 13, kCompensatedRangeDeletionSize = 14, kTailSize = 15, + kUserDefinedTimestampsPersisted = 16, // If this bit for the custom tag is set, opening DB should fail if // we don't know this field. @@ -243,6 +244,11 @@ struct FileMetaData { // "Tail" refers to all blocks after data blocks till the end of the SST file uint64_t tail_size = 0; + // Value of the `AdvancedColumnFamilyOptions.persist_user_defined_timestamps` + // flag when the file is created. Default to true, only when this flag is + // false, it's explicitly written to Manifest. + bool user_defined_timestamps_persisted = true; + FileMetaData() = default; FileMetaData(uint64_t file, uint32_t file_path_id, uint64_t file_size, @@ -255,7 +261,7 @@ struct FileMetaData { const std::string& _file_checksum_func_name, UniqueId64x2 _unique_id, const uint64_t _compensated_range_deletion_size, - uint64_t _tail_size) + uint64_t _tail_size, bool _user_defined_timestamps_persisted) : fd(file, file_path_id, file_size, smallest_seq, largest_seq), smallest(smallest_key), largest(largest_key), @@ -269,7 +275,8 @@ struct FileMetaData { file_checksum(_file_checksum), file_checksum_func_name(_file_checksum_func_name), unique_id(std::move(_unique_id)), - tail_size(_tail_size) { + tail_size(_tail_size), + user_defined_timestamps_persisted(_user_defined_timestamps_persisted) { TEST_SYNC_POINT_CALLBACK("FileMetaData::FileMetaData", this); } @@ -454,7 +461,7 @@ class VersionEdit { const std::string& file_checksum_func_name, const UniqueId64x2& unique_id, const uint64_t compensated_range_deletion_size, - uint64_t tail_size) { + uint64_t tail_size, bool user_defined_timestamps_persisted) { assert(smallest_seqno <= largest_seqno); new_files_.emplace_back( level, @@ -463,7 +470,8 @@ class VersionEdit { temperature, oldest_blob_file_number, oldest_ancester_time, file_creation_time, epoch_number, file_checksum, file_checksum_func_name, unique_id, - compensated_range_deletion_size, tail_size)); + compensated_range_deletion_size, tail_size, + user_defined_timestamps_persisted)); if (!HasLastSequence() || largest_seqno > GetLastSequence()) { SetLastSequence(largest_seqno); } diff --git a/db/version_edit_test.cc b/db/version_edit_test.cc index da1a85999..a3bf6beff 100644 --- a/db/version_edit_test.cc +++ b/db/version_edit_test.cc @@ -45,7 +45,7 @@ TEST_F(VersionEditTest, EncodeDecode) { kBig + 500 + i, kBig + 600 + i, false, Temperature::kUnknown, kInvalidBlobFileNumber, 888, 678, kBig + 300 + i /* epoch_number */, "234", "crc32c", - kNullUniqueId64x2, 0, 0); + kNullUniqueId64x2, 0, 0, true); edit.DeleteFile(4, kBig + 700 + i); } @@ -65,24 +65,24 @@ TEST_F(VersionEditTest, EncodeDecodeNewFile4) { kBig + 600, true, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, 300 /* epoch_number */, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); + kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0, true); edit.AddFile(4, 301, 3, 100, InternalKey("foo", kBig + 501, kTypeValue), InternalKey("zoo", kBig + 601, kTypeDeletion), kBig + 501, kBig + 601, false, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, 301 /* epoch_number */, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); + kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0, false); edit.AddFile(5, 302, 0, 100, InternalKey("foo", kBig + 502, kTypeValue), InternalKey("zoo", kBig + 602, kTypeDeletion), kBig + 502, kBig + 602, true, Temperature::kUnknown, kInvalidBlobFileNumber, 666, 888, 302 /* epoch_number */, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); + kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0, true); edit.AddFile(5, 303, 0, 100, InternalKey("foo", kBig + 503, kTypeBlobIndex), InternalKey("zoo", kBig + 603, kTypeBlobIndex), kBig + 503, kBig + 603, true, Temperature::kUnknown, 1001, kUnknownOldestAncesterTime, kUnknownFileCreationTime, 303 /* epoch_number */, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); + kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0, true); edit.DeleteFile(4, 700); @@ -99,7 +99,7 @@ TEST_F(VersionEditTest, EncodeDecodeNewFile4) { ASSERT_TRUE(s.ok()) << s.ToString(); auto& new_files = parsed.GetNewFiles(); ASSERT_TRUE(new_files[0].second.marked_for_compaction); - ASSERT_TRUE(!new_files[1].second.marked_for_compaction); + ASSERT_FALSE(new_files[1].second.marked_for_compaction); ASSERT_TRUE(new_files[2].second.marked_for_compaction); ASSERT_TRUE(new_files[3].second.marked_for_compaction); ASSERT_EQ(3u, new_files[0].second.fd.GetPathId()); @@ -113,6 +113,10 @@ TEST_F(VersionEditTest, EncodeDecodeNewFile4) { ASSERT_EQ(kInvalidBlobFileNumber, new_files[2].second.oldest_blob_file_number); ASSERT_EQ(1001, new_files[3].second.oldest_blob_file_number); + ASSERT_TRUE(new_files[0].second.user_defined_timestamps_persisted); + ASSERT_FALSE(new_files[1].second.user_defined_timestamps_persisted); + ASSERT_TRUE(new_files[2].second.user_defined_timestamps_persisted); + ASSERT_TRUE(new_files[3].second.user_defined_timestamps_persisted); } TEST_F(VersionEditTest, ForwardCompatibleNewFile4) { @@ -123,12 +127,12 @@ TEST_F(VersionEditTest, ForwardCompatibleNewFile4) { kBig + 600, true, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, 300 /* epoch_number */, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); + kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0, true); edit.AddFile(4, 301, 3, 100, InternalKey("foo", kBig + 501, kTypeValue), InternalKey("zoo", kBig + 601, kTypeDeletion), kBig + 501, kBig + 601, false, Temperature::kUnknown, kInvalidBlobFileNumber, 686, 868, 301 /* epoch_number */, "234", "crc32c", - kNullUniqueId64x2, 0, 0); + kNullUniqueId64x2, 0, 0, true); edit.DeleteFile(4, 700); edit.SetComparatorName("foo"); @@ -177,7 +181,7 @@ TEST_F(VersionEditTest, NewFile4NotSupportedField) { kBig + 600, true, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, 300 /* epoch_number */, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); + kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0, true); edit.SetComparatorName("foo"); edit.SetLogNumber(kBig + 100); @@ -208,7 +212,7 @@ TEST_F(VersionEditTest, EncodeEmptyFile) { Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, 1 /*epoch_number*/, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); + kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0, true); std::string buffer; ASSERT_TRUE(!edit.EncodeTo(&buffer)); } diff --git a/db/version_set.cc b/db/version_set.cc index f8b5ff6ce..8ce2b8277 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -6454,7 +6454,8 @@ Status VersionSet::WriteCurrentStateToManifest( f->oldest_blob_file_number, f->oldest_ancester_time, f->file_creation_time, f->epoch_number, f->file_checksum, f->file_checksum_func_name, f->unique_id, - f->compensated_range_deletion_size, f->tail_size); + f->compensated_range_deletion_size, f->tail_size, + f->user_defined_timestamps_persisted); } } diff --git a/db/version_set_test.cc b/db/version_set_test.cc index c7925749b..a16cdc4c1 100644 --- a/db/version_set_test.cc +++ b/db/version_set_test.cc @@ -51,7 +51,8 @@ class GenerateLevelFilesBriefTest : public testing::Test { largest_seq, /* marked_for_compact */ false, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); + kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0, + /* user_defined_timestamps_persisted */ true); files_.push_back(f); } @@ -163,7 +164,8 @@ class VersionStorageInfoTestBase : public testing::Test { Temperature::kUnknown, oldest_blob_file_number, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, kUnknownFileChecksum, kUnknownFileChecksumFuncName, - kNullUniqueId64x2, compensated_range_deletion_size, 0); + kNullUniqueId64x2, compensated_range_deletion_size, 0, + /* user_defined_timestamps_persisted */ true); vstorage_.AddFile(level, f); } @@ -3292,11 +3294,11 @@ class VersionSetTestMissingFiles : public VersionSetTestBase, s = fs_->GetFileSize(fname, IOOptions(), &file_size, nullptr); ASSERT_OK(s); ASSERT_NE(0, file_size); - file_metas->emplace_back(file_num, /*file_path_id=*/0, file_size, ikey, - ikey, 0, 0, false, Temperature::kUnknown, 0, 0, - 0, info.epoch_number, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, - 0, 0); + file_metas->emplace_back( + file_num, /*file_path_id=*/0, file_size, ikey, ikey, 0, 0, false, + Temperature::kUnknown, 0, 0, 0, info.epoch_number, + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, + 0, 0, /* user_defined_timestamps_persisted */ true); } } @@ -3353,7 +3355,8 @@ TEST_F(VersionSetTestMissingFiles, ManifestFarBehindSst) { file_num, /*file_path_id=*/0, /*file_size=*/12, smallest_ikey, largest_ikey, 0, 0, false, Temperature::kUnknown, 0, 0, 0, file_num /* epoch_number */, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); + kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0, + /* user_defined_timestamps_persisted */ true); added_files.emplace_back(0, meta); } WriteFileAdditionAndDeletionToManifest( @@ -3414,7 +3417,8 @@ TEST_F(VersionSetTestMissingFiles, ManifestAheadofSst) { file_num, /*file_path_id=*/0, /*file_size=*/12, smallest_ikey, largest_ikey, 0, 0, false, Temperature::kUnknown, 0, 0, 0, file_num /* epoch_number */, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0); + kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0, + /* user_defined_timestamps_persisted */ true); added_files.emplace_back(0, meta); } WriteFileAdditionAndDeletionToManifest( diff --git a/include/rocksdb/table_properties.h b/include/rocksdb/table_properties.h index ab259f930..ebde339dd 100644 --- a/include/rocksdb/table_properties.h +++ b/include/rocksdb/table_properties.h @@ -71,6 +71,7 @@ struct TablePropertiesNames { static const std::string kFastCompressionEstimatedDataSize; static const std::string kSequenceNumberTimeMapping; static const std::string kTailStartOffset; + static const std::string kUserDefinedTimestampsPersisted; }; // `TablePropertiesCollector` provides the mechanism for users to collect @@ -244,6 +245,11 @@ struct TableProperties { // "Tail" refers to all blocks after data blocks till the end of the SST file uint64_t tail_start_offset = 0; + // Value of the `AdvancedColumnFamilyOptions.persist_user_defined_timestamps` + // when the file is created. Default to be true, only when this flag is false, + // it's explicitly written to meta properties block. + uint64_t user_defined_timestamps_persisted = 1; + // DB identity // db_id is an identifier generated the first time the DB is created // If DB identity is unset or unassigned, `db_id` will be an empty string. diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index c3e312928..d3c70536d 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -1671,6 +1671,8 @@ void BlockBasedTableBuilder::WritePropertiesBlock( rep_->compressible_input_data_bytes + rep_->uncompressible_input_data_bytes; } + rep_->props.user_defined_timestamps_persisted = + rep_->persist_user_defined_timestamps; // Add basic properties property_block_builder.AddTableProperty(rep_->props); diff --git a/table/meta_blocks.cc b/table/meta_blocks.cc index 6fea536d6..cf756cfff 100644 --- a/table/meta_blocks.cc +++ b/table/meta_blocks.cc @@ -116,6 +116,10 @@ void PropertyBlockBuilder::AddTableProperty(const TableProperties& props) { props.fast_compression_estimated_data_size); } Add(TablePropertiesNames::kTailStartOffset, props.tail_start_offset); + if (props.user_defined_timestamps_persisted == 0) { + Add(TablePropertiesNames::kUserDefinedTimestampsPersisted, + props.user_defined_timestamps_persisted); + } if (!props.db_id.empty()) { Add(TablePropertiesNames::kDbId, props.db_id); } @@ -310,6 +314,8 @@ Status ReadTablePropertiesHelper( &new_table_properties->fast_compression_estimated_data_size}, {TablePropertiesNames::kTailStartOffset, &new_table_properties->tail_start_offset}, + {TablePropertiesNames::kUserDefinedTimestampsPersisted, + &new_table_properties->user_defined_timestamps_persisted}, }; std::string last_key; diff --git a/table/sst_file_dumper.cc b/table/sst_file_dumper.cc index f6d6e195d..e385ab72a 100644 --- a/table/sst_file_dumper.cc +++ b/table/sst_file_dumper.cc @@ -165,10 +165,16 @@ Status SstFileDumper::NewTableReader( const ImmutableOptions& /*ioptions*/, const EnvOptions& /*soptions*/, const InternalKeyComparator& /*internal_comparator*/, uint64_t file_size, std::unique_ptr* /*table_reader*/) { + // TODO(yuzhangyu): full support in sst_dump for SST files generated when + // `user_defined_timestamps_persisted` is false. auto t_opt = TableReaderOptions( ioptions_, moptions_.prefix_extractor, soptions_, internal_comparator_, 0 /* block_protection_bytes_per_key */, false /* skip_filters */, - false /* immortal */, true /* force_direct_prefetch */); + false /* immortal */, true /* force_direct_prefetch */, -1 /* level */, + nullptr /* block_cache_tracer */, 0 /* max_file_size_for_l0_meta_pin */, + "" /* cur_db_session_id */, 0 /* cur_file_num */, {} /* unique_id */, + 0 /* largest_seqno */, 0 /* tail_size */, + static_cast(table_properties_->user_defined_timestamps_persisted)); // Allow open file with global sequence number for backward compatibility. t_opt.largest_seqno = kMaxSequenceNumber; diff --git a/table/table_properties.cc b/table/table_properties.cc index 819d4da2a..06ea13f7c 100644 --- a/table/table_properties.cc +++ b/table/table_properties.cc @@ -305,6 +305,8 @@ const std::string TablePropertiesNames::kSequenceNumberTimeMapping = "rocksdb.seqno.time.map"; const std::string TablePropertiesNames::kTailStartOffset = "rocksdb.tail.start.offset"; +const std::string TablePropertiesNames::kUserDefinedTimestampsPersisted = + "rocksdb.user.defined.timestamps.persisted"; #ifndef NDEBUG // WARNING: TEST_SetRandomTableProperties assumes the following layout of diff --git a/unreleased_history/new_features/persist_udt_flag_in_manifest.md b/unreleased_history/new_features/persist_udt_flag_in_manifest.md new file mode 100644 index 000000000..13c0c0a70 --- /dev/null +++ b/unreleased_history/new_features/persist_udt_flag_in_manifest.md @@ -0,0 +1 @@ +*Start to record the value of the flag `AdvancedColumnFamilyOptions.persist_user_defined_timestamps` in the Manifest and table properties for a SST file when it is created. And use the recorded flag when creating a table reader for the SST file. This flag is only explicitly record if it's false. \ No newline at end of file