|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
//
|
|
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
|
|
|
|
#include "db/compaction/compaction_picker_fifo.h"
|
|
|
|
#ifndef ROCKSDB_LITE
|
|
|
|
|
|
|
|
#include <cinttypes>
|
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
|
|
|
#include "db/column_family.h"
|
|
|
|
#include "logging/log_buffer.h"
|
|
|
|
#include "util/string_util.h"
|
|
|
|
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
namespace {
|
|
|
|
uint64_t GetTotalFilesSize(const std::vector<FileMetaData*>& files) {
|
|
|
|
uint64_t total_size = 0;
|
|
|
|
for (const auto& f : files) {
|
|
|
|
total_size += f->fd.file_size;
|
|
|
|
}
|
|
|
|
return total_size;
|
|
|
|
}
|
|
|
|
} // anonymous namespace
|
|
|
|
|
|
|
|
bool FIFOCompactionPicker::NeedsCompaction(
|
|
|
|
const VersionStorageInfo* vstorage) const {
|
|
|
|
const int kLevel0 = 0;
|
|
|
|
return vstorage->CompactionScore(kLevel0) >= 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
Compaction* FIFOCompactionPicker::PickTTLCompaction(
|
|
|
|
const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
|
|
|
|
const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage,
|
|
|
|
LogBuffer* log_buffer) {
|
|
|
|
assert(mutable_cf_options.ttl > 0);
|
|
|
|
|
|
|
|
const int kLevel0 = 0;
|
|
|
|
const std::vector<FileMetaData*>& level_files = vstorage->LevelFiles(kLevel0);
|
|
|
|
uint64_t total_size = GetTotalFilesSize(level_files);
|
|
|
|
|
|
|
|
int64_t _current_time;
|
|
|
|
auto status = ioptions_.clock->GetCurrentTime(&_current_time);
|
|
|
|
if (!status.ok()) {
|
|
|
|
ROCKS_LOG_BUFFER(log_buffer,
|
|
|
|
"[%s] FIFO compaction: Couldn't get current time: %s. "
|
|
|
|
"Not doing compactions based on TTL. ",
|
|
|
|
cf_name.c_str(), status.ToString().c_str());
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
const uint64_t current_time = static_cast<uint64_t>(_current_time);
|
|
|
|
|
Fix assertion failure in FIFO compaction with TTL (#5754)
Summary:
Before this PR, the following sequence of events can cause assertion failure as shown below.
Stack trace (partial):
```
(gdb) bt
2 0x00007f59b350ad15 in __assert_fail_base (fmt=<optimized out>, assertion=assertion@entry=0x9f8390 "mark_as_compacted ? !inputs_[i][j]->being_compacted : inputs_[i][j]->being_compacted", file=file@entry=0x9e347c "db/compaction/compaction.cc", line=line@entry=395, function=function@entry=0xa21ec0 <rocksdb::Compaction::MarkFilesBeingCompacted(bool)::__PRETTY_FUNCTION__> "void rocksdb::Compaction::MarkFilesBeingCompacted(bool)") at assert.c:92
3 0x00007f59b350adc3 in __GI___assert_fail (assertion=assertion@entry=0x9f8390 "mark_as_compacted ? !inputs_[i][j]->being_compacted : inputs_[i][j]->being_compacted", file=file@entry=0x9e347c "db/compaction/compaction.cc", line=line@entry=395, function=function@entry=0xa21ec0 <rocksdb::Compaction::MarkFilesBeingCompacted(bool)::__PRETTY_FUNCTION__> "void rocksdb::Compaction::MarkFilesBeingCompacted(bool)") at assert.c:101
4 0x0000000000492ccd in rocksdb::Compaction::MarkFilesBeingCompacted (this=<optimized out>, mark_as_compacted=<optimized out>) at db/compaction/compaction.cc:394
5 0x000000000049467a in rocksdb::Compaction::Compaction (this=0x7f59af013000, vstorage=0x7f581af53030, _immutable_cf_options=..., _mutable_cf_options=..., _inputs=..., _output_level=<optimized out>, _target_file_size=0, _max_compaction_bytes=0, _output_path_id=0, _compression=<incomplete type>, _compression_opts=..., _max_subcompactions=0, _grandparents=..., _manual_compaction=false, _score=4, _deletion_compaction=true, _compaction_reason=rocksdb::CompactionReason::kFIFOTtl) at db/compaction/compaction.cc:241
6 0x00000000004af9bc in rocksdb::FIFOCompactionPicker::PickTTLCompaction (this=0x7f59b31a6900, cf_name=..., mutable_cf_options=..., vstorage=0x7f581af53030, log_buffer=log_buffer@entry=0x7f59b1bfa930) at db/compaction/compaction_picker_fifo.cc:101
7 0x00000000004b0771 in rocksdb::FIFOCompactionPicker::PickCompaction (this=0x7f59b31a6900, cf_name=..., mutable_cf_options=..., vstorage=0x7f581af53030, log_buffer=0x7f59b1bfa930) at db/compaction/compaction_picker_fifo.cc:201
8 0x00000000004838cc in rocksdb::ColumnFamilyData::PickCompaction (this=this@entry=0x7f59b31b3700, mutable_options=..., log_buffer=log_buffer@entry=0x7f59b1bfa930) at db/column_family.cc:933
9 0x00000000004f3645 in rocksdb::DBImpl::BackgroundCompaction (this=this@entry=0x7f59b3176000, made_progress=made_progress@entry=0x7f59b1bfa6bf, job_context=job_context@entry=0x7f59b1bfa760, log_buffer=log_buffer@entry=0x7f59b1bfa930, prepicked_compaction=prepicked_compaction@entry=0x0, thread_pri=rocksdb::Env::LOW) at db/db_impl/db_impl_compaction_flush.cc:2541
10 0x00000000004f5e2a in rocksdb::DBImpl::BackgroundCallCompaction (this=this@entry=0x7f59b3176000, prepicked_compaction=prepicked_compaction@entry=0x0, bg_thread_pri=bg_thread_pri@entry=rocksdb::Env::LOW) at db/db_impl/db_impl_compaction_flush.cc:2312
11 0x00000000004f648e in rocksdb::DBImpl::BGWorkCompaction (arg=<optimized out>) at db/db_impl/db_impl_compaction_flush.cc:2087
```
This can be caused by the following sequence of events.
```
Time
| thr bg_compact_thr1 bg_compact_thr2
| write
| flush
| mark all l0 as being compacted
| write
| flush
| add cf to queue again
| mark all l0 as being
| compacted, fail the
| assertion
V
```
Test plan (on devserver)
Since bg_compact_thr1 and bg_compact_thr2 are two threads executing the same
code, it is difficult to use sync point dependency to
coordinate their execution. Therefore, I choose to use db_stress.
```
$TEST_TMPDIR=/dev/shm/rocksdb ./db_stress --periodic_compaction_seconds=1 --max_background_compactions=20 --format_version=2 --memtablerep=skip_list --max_write_buffer_number=3 --cache_index_and_filter_blocks=1 --reopen=20 --recycle_log_file_num=0 --acquire_snapshot_one_in=10000 --delpercent=4 --log2_keys_per_lock=22 --compaction_ttl=1 --block_size=16384 --use_multiget=1 --compact_files_one_in=1000000 --target_file_size_multiplier=2 --clear_column_family_one_in=0 --max_bytes_for_level_base=10485760 --use_full_merge_v1=1 --target_file_size_base=2097152 --checkpoint_one_in=1000000 --mmap_read=0 --compression_type=zstd --writepercent=35 --readpercent=45 --subcompactions=4 --use_merge=0 --write_buffer_size=4194304 --test_batches_snapshots=0 --db=/dev/shm/rocksdb/rocksdb_crashtest_whitebox --use_direct_reads=0 --compact_range_one_in=1000000 --open_files=-1 --destroy_db_initially=0 --progress_reports=0 --compression_zstd_max_train_bytes=0 --snapshot_hold_ops=100000 --enable_pipelined_write=0 --nooverwritepercent=1 --compression_max_dict_bytes=0 --max_key=1000000 --prefixpercent=5 --flush_one_in=1000000 --ops_per_thread=40000 --index_block_restart_interval=7 --cache_size=1048576 --compaction_style=2 --verify_checksum=1 --delrangepercent=1 --use_direct_io_for_flush_and_compaction=0
```
This should see no assertion failure.
Last but not least,
```
$COMPILE_WITH_ASAN=1 make -j32 all
$make check
```
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5754
Differential Revision: D17109791
Pulled By: riversand963
fbshipit-source-id: 25fc46101235add158554e096540b72c324be078
5 years ago
|
|
|
if (!level0_compactions_in_progress_.empty()) {
|
|
|
|
ROCKS_LOG_BUFFER(
|
|
|
|
log_buffer,
|
|
|
|
"[%s] FIFO compaction: Already executing compaction. No need "
|
|
|
|
"to run parallel compactions since compactions are very fast",
|
|
|
|
cf_name.c_str());
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<CompactionInputFiles> inputs;
|
|
|
|
inputs.emplace_back();
|
|
|
|
inputs[0].level = 0;
|
|
|
|
|
|
|
|
// avoid underflow
|
|
|
|
if (current_time > mutable_cf_options.ttl) {
|
|
|
|
for (auto ritr = level_files.rbegin(); ritr != level_files.rend(); ++ritr) {
|
|
|
|
FileMetaData* f = *ritr;
|
|
|
|
assert(f);
|
|
|
|
if (f->fd.table_reader && f->fd.table_reader->GetTableProperties()) {
|
|
|
|
uint64_t creation_time =
|
|
|
|
f->fd.table_reader->GetTableProperties()->creation_time;
|
|
|
|
if (creation_time == 0 ||
|
|
|
|
creation_time >= (current_time - mutable_cf_options.ttl)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
total_size -= f->compensated_file_size;
|
|
|
|
inputs[0].files.push_back(f);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Return a nullptr and proceed to size-based FIFO compaction if:
|
|
|
|
// 1. there are no files older than ttl OR
|
|
|
|
// 2. there are a few files older than ttl, but deleting them will not bring
|
|
|
|
// the total size to be less than max_table_files_size threshold.
|
|
|
|
if (inputs[0].files.empty() ||
|
|
|
|
total_size >
|
|
|
|
mutable_cf_options.compaction_options_fifo.max_table_files_size) {
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (const auto& f : inputs[0].files) {
|
|
|
|
uint64_t creation_time = 0;
|
|
|
|
assert(f);
|
|
|
|
if (f->fd.table_reader && f->fd.table_reader->GetTableProperties()) {
|
|
|
|
creation_time = f->fd.table_reader->GetTableProperties()->creation_time;
|
|
|
|
}
|
|
|
|
ROCKS_LOG_BUFFER(log_buffer,
|
|
|
|
"[%s] FIFO compaction: picking file %" PRIu64
|
|
|
|
" with creation time %" PRIu64 " for deletion",
|
|
|
|
cf_name.c_str(), f->fd.GetNumber(), creation_time);
|
|
|
|
}
|
|
|
|
|
|
|
|
Compaction* c = new Compaction(
|
|
|
|
vstorage, ioptions_, mutable_cf_options, mutable_db_options,
|
|
|
|
std::move(inputs), 0, 0, 0, 0, kNoCompression,
|
|
|
|
mutable_cf_options.compression_opts,
|
|
|
|
/* max_subcompactions */ 0, {}, /* is manual */ false,
|
|
|
|
vstorage->CompactionScore(0),
|
|
|
|
/* is deletion compaction */ true, CompactionReason::kFIFOTtl);
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
|
|
|
Compaction* FIFOCompactionPicker::PickSizeCompaction(
|
|
|
|
const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
|
|
|
|
const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage,
|
|
|
|
LogBuffer* log_buffer) {
|
|
|
|
const int kLevel0 = 0;
|
|
|
|
const std::vector<FileMetaData*>& level_files = vstorage->LevelFiles(kLevel0);
|
|
|
|
uint64_t total_size = GetTotalFilesSize(level_files);
|
|
|
|
|
|
|
|
if (total_size <=
|
|
|
|
mutable_cf_options.compaction_options_fifo.max_table_files_size ||
|
|
|
|
level_files.size() == 0) {
|
|
|
|
// total size not exceeded
|
|
|
|
if (mutable_cf_options.compaction_options_fifo.allow_compaction &&
|
|
|
|
level_files.size() > 0) {
|
|
|
|
CompactionInputFiles comp_inputs;
|
|
|
|
// try to prevent same files from being compacted multiple times, which
|
|
|
|
// could produce large files that may never TTL-expire. Achieve this by
|
|
|
|
// disallowing compactions with files larger than memtable (inflate its
|
|
|
|
// size by 10% to account for uncompressed L0 files that may have size
|
|
|
|
// slightly greater than memtable size limit).
|
|
|
|
size_t max_compact_bytes_per_del_file =
|
|
|
|
static_cast<size_t>(MultiplyCheckOverflow(
|
|
|
|
static_cast<uint64_t>(mutable_cf_options.write_buffer_size),
|
|
|
|
1.1));
|
|
|
|
if (FindIntraL0Compaction(
|
|
|
|
level_files,
|
|
|
|
mutable_cf_options
|
|
|
|
.level0_file_num_compaction_trigger /* min_files_to_compact */
|
|
|
|
,
|
|
|
|
max_compact_bytes_per_del_file,
|
|
|
|
mutable_cf_options.max_compaction_bytes, &comp_inputs)) {
|
|
|
|
Compaction* c = new Compaction(
|
|
|
|
vstorage, ioptions_, mutable_cf_options, mutable_db_options,
|
|
|
|
{comp_inputs}, 0, 16 * 1024 * 1024 /* output file size limit */,
|
|
|
|
0 /* max compaction bytes, not applicable */,
|
|
|
|
0 /* output path ID */, mutable_cf_options.compression,
|
|
|
|
mutable_cf_options.compression_opts, 0 /* max_subcompactions */, {},
|
|
|
|
/* is manual */ false, vstorage->CompactionScore(0),
|
|
|
|
/* is deletion compaction */ false,
|
|
|
|
CompactionReason::kFIFOReduceNumFiles);
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ROCKS_LOG_BUFFER(
|
|
|
|
log_buffer,
|
|
|
|
"[%s] FIFO compaction: nothing to do. Total size %" PRIu64
|
|
|
|
", max size %" PRIu64 "\n",
|
|
|
|
cf_name.c_str(), total_size,
|
|
|
|
mutable_cf_options.compaction_options_fifo.max_table_files_size);
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!level0_compactions_in_progress_.empty()) {
|
|
|
|
ROCKS_LOG_BUFFER(
|
|
|
|
log_buffer,
|
|
|
|
"[%s] FIFO compaction: Already executing compaction. No need "
|
|
|
|
"to run parallel compactions since compactions are very fast",
|
|
|
|
cf_name.c_str());
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<CompactionInputFiles> inputs;
|
|
|
|
inputs.emplace_back();
|
|
|
|
inputs[0].level = 0;
|
|
|
|
|
|
|
|
for (auto ritr = level_files.rbegin(); ritr != level_files.rend(); ++ritr) {
|
|
|
|
auto f = *ritr;
|
|
|
|
total_size -= f->compensated_file_size;
|
|
|
|
inputs[0].files.push_back(f);
|
|
|
|
char tmp_fsize[16];
|
|
|
|
AppendHumanBytes(f->fd.GetFileSize(), tmp_fsize, sizeof(tmp_fsize));
|
|
|
|
ROCKS_LOG_BUFFER(log_buffer,
|
|
|
|
"[%s] FIFO compaction: picking file %" PRIu64
|
|
|
|
" with size %s for deletion",
|
|
|
|
cf_name.c_str(), f->fd.GetNumber(), tmp_fsize);
|
|
|
|
if (total_size <=
|
|
|
|
mutable_cf_options.compaction_options_fifo.max_table_files_size) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Compaction* c = new Compaction(
|
|
|
|
vstorage, ioptions_, mutable_cf_options, mutable_db_options,
|
|
|
|
std::move(inputs), 0, 0, 0, 0, kNoCompression,
|
|
|
|
mutable_cf_options.compression_opts,
|
|
|
|
/* max_subcompactions */ 0, {}, /* is manual */ false,
|
|
|
|
vstorage->CompactionScore(0),
|
|
|
|
/* is deletion compaction */ true, CompactionReason::kFIFOMaxSize);
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
|
|
|
Compaction* FIFOCompactionPicker::PickCompaction(
|
|
|
|
const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
|
|
|
|
const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage,
|
|
|
|
LogBuffer* log_buffer, SequenceNumber /*earliest_memtable_seqno*/) {
|
|
|
|
assert(vstorage->num_levels() == 1);
|
|
|
|
|
|
|
|
Compaction* c = nullptr;
|
|
|
|
if (mutable_cf_options.ttl > 0) {
|
|
|
|
c = PickTTLCompaction(cf_name, mutable_cf_options, mutable_db_options,
|
|
|
|
vstorage, log_buffer);
|
|
|
|
}
|
|
|
|
if (c == nullptr) {
|
|
|
|
c = PickSizeCompaction(cf_name, mutable_cf_options, mutable_db_options,
|
|
|
|
vstorage, log_buffer);
|
|
|
|
}
|
|
|
|
RegisterCompaction(c);
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
|
|
|
Compaction* FIFOCompactionPicker::CompactRange(
|
|
|
|
const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
|
|
|
|
const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage,
|
|
|
|
int input_level, int output_level,
|
|
|
|
const CompactRangeOptions& /*compact_range_options*/,
|
|
|
|
const InternalKey* /*begin*/, const InternalKey* /*end*/,
|
|
|
|
InternalKey** compaction_end, bool* /*manual_conflict*/,
|
|
|
|
uint64_t /*max_file_num_to_ignore*/) {
|
|
|
|
#ifdef NDEBUG
|
|
|
|
(void)input_level;
|
|
|
|
(void)output_level;
|
|
|
|
#endif
|
|
|
|
assert(input_level == 0);
|
|
|
|
assert(output_level == 0);
|
|
|
|
*compaction_end = nullptr;
|
|
|
|
LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL, ioptions_.logger);
|
|
|
|
Compaction* c = PickCompaction(cf_name, mutable_cf_options,
|
|
|
|
mutable_db_options, vstorage, &log_buffer);
|
|
|
|
log_buffer.FlushBufferToLog();
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|
|
|
|
#endif // !ROCKSDB_LITE
|