Use flush time for the props.creation_time for FIFO compaction (#6612)

Summary:
For FIFO compaction, we use flush time instead of oldest key time as the
creation time. This is to prevent FIFO compaction dropping files whose oldest
key time is older than TTL but which has newer keys than TTL.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6612

Test Plan: make check

Reviewed By: siying

Differential Revision: D20748217

Pulled By: riversand963

fbshipit-source-id: 3f7b00a847020760537cdddd12f6fe039e5bc663
main
Yanqin Jin 5 years ago committed by Facebook GitHub Bot
parent eaf95c7d1a
commit 18cf0de640
  1. 4
      HISTORY.md
  2. 18
      db/compaction/compaction_picker_fifo.cc
  3. 25
      db/db_compaction_test.cc
  4. 8
      db/flush_job.cc
  5. 2
      include/rocksdb/advanced_options.h

@ -1,4 +1,8 @@
# Rocksdb Change Log # Rocksdb Change Log
## Unreleased
### Behavior changes
* Since RocksDB 6.8, ttl-based FIFO compaction can drop a file whose oldest key becomes older than options.ttl while others have not. This fix reverts this and makes ttl-based FIFO compaction use the file's flush time as the criterion. This fix also requires that max_open_files = -1 and compaction_options_fifo.allow_compaction = false to function properly.
## 6.9.0 (03/29/2020) ## 6.9.0 (03/29/2020)
### Public API Change ### Public API Change
* Fix spelling so that API now has correctly spelled transaction state name `COMMITTED`, while the old misspelled `COMMITED` is still available as an alias. * Fix spelling so that API now has correctly spelled transaction state name `COMMITTED`, while the old misspelled `COMMITED` is still available as an alias.

@ -71,10 +71,13 @@ Compaction* FIFOCompactionPicker::PickTTLCompaction(
if (current_time > mutable_cf_options.ttl) { if (current_time > mutable_cf_options.ttl) {
for (auto ritr = level_files.rbegin(); ritr != level_files.rend(); ++ritr) { for (auto ritr = level_files.rbegin(); ritr != level_files.rend(); ++ritr) {
FileMetaData* f = *ritr; FileMetaData* f = *ritr;
uint64_t creation_time = f->TryGetFileCreationTime(); if (f->fd.table_reader && f->fd.table_reader->GetTableProperties()) {
if (creation_time == kUnknownFileCreationTime || uint64_t creation_time =
creation_time >= (current_time - mutable_cf_options.ttl)) { f->fd.table_reader->GetTableProperties()->creation_time;
break; if (creation_time == 0 ||
creation_time >= (current_time - mutable_cf_options.ttl)) {
break;
}
} }
total_size -= f->compensated_file_size; total_size -= f->compensated_file_size;
inputs[0].files.push_back(f); inputs[0].files.push_back(f);
@ -92,11 +95,14 @@ Compaction* FIFOCompactionPicker::PickTTLCompaction(
} }
for (const auto& f : inputs[0].files) { for (const auto& f : inputs[0].files) {
uint64_t creation_time = 0;
if (f && f->fd.table_reader && f->fd.table_reader->GetTableProperties()) {
creation_time = f->fd.table_reader->GetTableProperties()->creation_time;
}
ROCKS_LOG_BUFFER(log_buffer, ROCKS_LOG_BUFFER(log_buffer,
"[%s] FIFO compaction: picking file %" PRIu64 "[%s] FIFO compaction: picking file %" PRIu64
" with creation time %" PRIu64 " for deletion", " with creation time %" PRIu64 " for deletion",
cf_name.c_str(), f->fd.GetNumber(), cf_name.c_str(), f->fd.GetNumber(), creation_time);
f->TryGetFileCreationTime());
} }
Compaction* c = new Compaction( Compaction* c = new Compaction(

@ -5217,31 +5217,6 @@ TEST_P(DBCompactionTestWithParam,
} }
} }
TEST_F(DBCompactionTest, FifoCompactionGetFileCreationTime) {
MockEnv mock_env(env_);
do {
Options options = CurrentOptions();
options.table_factory.reset(new BlockBasedTableFactory());
options.env = &mock_env;
options.ttl = static_cast<uint64_t>(24) * 3600;
options.compaction_style = kCompactionStyleFIFO;
constexpr size_t kNumFiles = 24;
options.max_open_files = 20;
constexpr size_t kNumKeysPerFile = 10;
DestroyAndReopen(options);
for (size_t i = 0; i < kNumFiles; ++i) {
for (size_t j = 0; j < kNumKeysPerFile; ++j) {
ASSERT_OK(Put(std::to_string(j), "value_" + std::to_string(i)));
}
ASSERT_OK(Flush());
}
mock_env.FakeSleepForMicroseconds(
static_cast<uint64_t>(1000 * 1000 * (1 + options.ttl)));
ASSERT_OK(Put("foo", "value"));
ASSERT_OK(Flush());
} while (ChangeOptions());
}
#endif // !defined(ROCKSDB_LITE) #endif // !defined(ROCKSDB_LITE)
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE

@ -375,6 +375,11 @@ Status FlushJob::WriteLevel0Table() {
meta_.oldest_ancester_time = std::min(current_time, oldest_key_time); meta_.oldest_ancester_time = std::min(current_time, oldest_key_time);
meta_.file_creation_time = current_time; meta_.file_creation_time = current_time;
uint64_t creation_time = (cfd_->ioptions()->compaction_style ==
CompactionStyle::kCompactionStyleFIFO)
? current_time
: meta_.oldest_ancester_time;
IOStatus io_s; IOStatus io_s;
s = BuildTable( s = BuildTable(
dbname_, db_options_.env, db_options_.fs.get(), *cfd_->ioptions(), dbname_, db_options_.env, db_options_.fs.get(), *cfd_->ioptions(),
@ -388,8 +393,7 @@ Status FlushJob::WriteLevel0Table() {
mutable_cf_options_.paranoid_file_checks, cfd_->internal_stats(), mutable_cf_options_.paranoid_file_checks, cfd_->internal_stats(),
TableFileCreationReason::kFlush, &io_s, event_logger_, TableFileCreationReason::kFlush, &io_s, event_logger_,
job_context_->job_id, Env::IO_HIGH, &table_properties_, 0 /* level */, job_context_->job_id, Env::IO_HIGH, &table_properties_, 0 /* level */,
meta_.oldest_ancester_time, oldest_key_time, write_hint, creation_time, oldest_key_time, write_hint, current_time);
current_time);
if (!io_s.ok()) { if (!io_s.ok()) {
io_status_ = io_s; io_status_ = io_s;
} }

@ -645,6 +645,7 @@ struct AdvancedColumnFamilyOptions {
bool report_bg_io_stats = false; bool report_bg_io_stats = false;
// Files older than TTL will go through the compaction process. // Files older than TTL will go through the compaction process.
// Pre-req: This needs max_open_files to be set to -1.
// In Level: Non-bottom-level files older than TTL will go through the // In Level: Non-bottom-level files older than TTL will go through the
// compation process. // compation process.
// In FIFO: Files older than TTL will be deleted. // In FIFO: Files older than TTL will be deleted.
@ -672,6 +673,7 @@ struct AdvancedColumnFamilyOptions {
// Supported in Level and FIFO compaction. // Supported in Level and FIFO compaction.
// In FIFO compaction, this option has the same meaning as TTL and whichever // In FIFO compaction, this option has the same meaning as TTL and whichever
// stricter will be used. // stricter will be used.
// Pre-req: max_open_file == -1.
// unit: seconds. Ex: 7 days = 7 * 24 * 60 * 60 // unit: seconds. Ex: 7 days = 7 * 24 * 60 * 60
// //
// Values: // Values:

Loading…
Cancel
Save