@ -20,6 +20,8 @@
# include <utility>
# include <utility>
# include <vector>
# include <vector>
# include "db/blob/blob_file_addition.h"
# include "db/blob/blob_file_builder.h"
# include "db/builder.h"
# include "db/builder.h"
# include "db/db_impl/db_impl.h"
# include "db/db_impl/db_impl.h"
# include "db/db_iter.h"
# include "db/db_iter.h"
@ -138,6 +140,7 @@ struct CompactionJob::SubcompactionState {
// State kept for output being generated
// State kept for output being generated
std : : vector < Output > outputs ;
std : : vector < Output > outputs ;
std : : vector < BlobFileAddition > blob_file_additions ;
std : : unique_ptr < WritableFileWriter > outfile ;
std : : unique_ptr < WritableFileWriter > outfile ;
std : : unique_ptr < TableBuilder > builder ;
std : : unique_ptr < TableBuilder > builder ;
@ -231,21 +234,13 @@ struct CompactionJob::CompactionState {
std : : vector < CompactionJob : : SubcompactionState > sub_compact_states ;
std : : vector < CompactionJob : : SubcompactionState > sub_compact_states ;
Status status ;
Status status ;
uint64_t total_bytes ;
size_t num_output_files = 0 ;
uint64_t num_output_records ;
uint64_t total_bytes = 0 ;
size_t num_blob_output_files = 0 ;
explicit CompactionState ( Compaction * c )
uint64_t total_blob_bytes = 0 ;
: compaction ( c ) ,
uint64_t num_output_records = 0 ;
total_bytes ( 0 ) ,
num_output_records ( 0 ) { }
size_t NumOutputFiles ( ) {
explicit CompactionState ( Compaction * c ) : compaction ( c ) { }
size_t total = 0 ;
for ( auto & s : sub_compact_states ) {
total + = s . outputs . size ( ) ;
}
return total ;
}
Slice SmallestUserKey ( ) {
Slice SmallestUserKey ( ) {
for ( const auto & sub_compact_state : sub_compact_states ) {
for ( const auto & sub_compact_state : sub_compact_states ) {
@ -272,11 +267,29 @@ struct CompactionJob::CompactionState {
} ;
} ;
void CompactionJob : : AggregateStatistics ( ) {
void CompactionJob : : AggregateStatistics ( ) {
assert ( compact_ ) ;
for ( SubcompactionState & sc : compact_ - > sub_compact_states ) {
for ( SubcompactionState & sc : compact_ - > sub_compact_states ) {
auto & outputs = sc . outputs ;
if ( ! outputs . empty ( ) & & ! outputs . back ( ) . meta . fd . file_size ) {
// An error occurred, so ignore the last output.
outputs . pop_back ( ) ;
}
compact_ - > num_output_files + = outputs . size ( ) ;
compact_ - > total_bytes + = sc . total_bytes ;
compact_ - > total_bytes + = sc . total_bytes ;
compact_ - > num_output_records + = sc . num_output_records ;
const auto & blobs = sc . blob_file_additions ;
compact_ - > num_blob_output_files + = blobs . size ( ) ;
for ( const auto & blob : blobs ) {
compact_ - > total_blob_bytes + = blob . GetTotalBlobBytes ( ) ;
}
}
for ( SubcompactionState & sc : compact_ - > sub_compact_states ) {
compact_ - > num_output_records + = sc . num_output_records ;
compaction_job_stats_ - > Add ( sc . compaction_job_stats ) ;
compaction_job_stats_ - > Add ( sc . compaction_job_stats ) ;
}
}
}
}
@ -286,7 +299,8 @@ CompactionJob::CompactionJob(
const FileOptions & file_options , VersionSet * versions ,
const FileOptions & file_options , VersionSet * versions ,
const std : : atomic < bool > * shutting_down ,
const std : : atomic < bool > * shutting_down ,
const SequenceNumber preserve_deletes_seqnum , LogBuffer * log_buffer ,
const SequenceNumber preserve_deletes_seqnum , LogBuffer * log_buffer ,
FSDirectory * db_directory , FSDirectory * output_directory , Statistics * stats ,
FSDirectory * db_directory , FSDirectory * output_directory ,
FSDirectory * blob_output_directory , Statistics * stats ,
InstrumentedMutex * db_mutex , ErrorHandler * db_error_handler ,
InstrumentedMutex * db_mutex , ErrorHandler * db_error_handler ,
std : : vector < SequenceNumber > existing_snapshots ,
std : : vector < SequenceNumber > existing_snapshots ,
SequenceNumber earliest_write_conflict_snapshot ,
SequenceNumber earliest_write_conflict_snapshot ,
@ -317,6 +331,7 @@ CompactionJob::CompactionJob(
log_buffer_ ( log_buffer ) ,
log_buffer_ ( log_buffer ) ,
db_directory_ ( db_directory ) ,
db_directory_ ( db_directory ) ,
output_directory_ ( output_directory ) ,
output_directory_ ( output_directory ) ,
blob_output_directory_ ( blob_output_directory ) ,
stats_ ( stats ) ,
stats_ ( stats ) ,
db_mutex_ ( db_mutex ) ,
db_mutex_ ( db_mutex ) ,
db_error_handler_ ( db_error_handler ) ,
db_error_handler_ ( db_error_handler ) ,
@ -604,18 +619,34 @@ Status CompactionJob::Run() {
// Check if any thread encountered an error during execution
// Check if any thread encountered an error during execution
Status status ;
Status status ;
IOStatus io_s ;
IOStatus io_s ;
bool wrote_new_blob_files = false ;
for ( const auto & state : compact_ - > sub_compact_states ) {
for ( const auto & state : compact_ - > sub_compact_states ) {
if ( ! state . status . ok ( ) ) {
if ( ! state . status . ok ( ) ) {
status = state . status ;
status = state . status ;
io_s = state . io_status ;
io_s = state . io_status ;
break ;
break ;
}
}
if ( ! state . blob_file_additions . empty ( ) ) {
wrote_new_blob_files = true ;
}
}
}
if ( io_status_ . ok ( ) ) {
if ( io_status_ . ok ( ) ) {
io_status_ = io_s ;
io_status_ = io_s ;
}
}
if ( status . ok ( ) & & output_directory_ ) {
if ( status . ok ( ) ) {
io_s = output_directory_ - > Fsync ( IOOptions ( ) , nullptr ) ;
constexpr IODebugContext * dbg = nullptr ;
if ( output_directory_ ) {
io_s = output_directory_ - > Fsync ( IOOptions ( ) , dbg ) ;
}
if ( io_s . ok ( ) & & wrote_new_blob_files & & blob_output_directory_ & &
blob_output_directory_ ! = output_directory_ ) {
io_s = blob_output_directory_ - > Fsync ( IOOptions ( ) , dbg ) ;
}
}
}
if ( io_status_ . ok ( ) ) {
if ( io_status_ . ok ( ) ) {
io_status_ = io_s ;
io_status_ = io_s ;
@ -721,6 +752,7 @@ Status CompactionJob::Run() {
// Finish up all book-keeping to unify the subcompaction results
// Finish up all book-keeping to unify the subcompaction results
AggregateStatistics ( ) ;
AggregateStatistics ( ) ;
UpdateCompactionStats ( ) ;
UpdateCompactionStats ( ) ;
RecordCompactionIOStats ( ) ;
RecordCompactionIOStats ( ) ;
LogFlush ( db_options_ . info_log ) ;
LogFlush ( db_options_ . info_log ) ;
TEST_SYNC_POINT ( " CompactionJob::Run():End " ) ;
TEST_SYNC_POINT ( " CompactionJob::Run():End " ) ;
@ -730,11 +762,16 @@ Status CompactionJob::Run() {
}
}
Status CompactionJob : : Install ( const MutableCFOptions & mutable_cf_options ) {
Status CompactionJob : : Install ( const MutableCFOptions & mutable_cf_options ) {
assert ( compact_ ) ;
AutoThreadOperationStageUpdater stage_updater (
AutoThreadOperationStageUpdater stage_updater (
ThreadStatus : : STAGE_COMPACTION_INSTALL ) ;
ThreadStatus : : STAGE_COMPACTION_INSTALL ) ;
db_mutex_ - > AssertHeld ( ) ;
db_mutex_ - > AssertHeld ( ) ;
Status status = compact_ - > status ;
Status status = compact_ - > status ;
ColumnFamilyData * cfd = compact_ - > compaction - > column_family_data ( ) ;
ColumnFamilyData * cfd = compact_ - > compaction - > column_family_data ( ) ;
assert ( cfd ) ;
cfd - > internal_stats ( ) - > AddCompactionStats (
cfd - > internal_stats ( ) - > AddCompactionStats (
compact_ - > compaction - > output_level ( ) , thread_pri_ , compaction_stats_ ) ;
compact_ - > compaction - > output_level ( ) , thread_pri_ , compaction_stats_ ) ;
@ -744,6 +781,7 @@ Status CompactionJob::Install(const MutableCFOptions& mutable_cf_options) {
if ( ! versions_ - > io_status ( ) . ok ( ) ) {
if ( ! versions_ - > io_status ( ) . ok ( ) ) {
io_status_ = versions_ - > io_status ( ) ;
io_status_ = versions_ - > io_status ( ) ;
}
}
VersionStorageInfo : : LevelSummaryStorage tmp ;
VersionStorageInfo : : LevelSummaryStorage tmp ;
auto vstorage = cfd - > current ( ) - > storage_info ( ) ;
auto vstorage = cfd - > current ( ) - > storage_info ( ) ;
const auto & stats = compaction_stats_ ;
const auto & stats = compaction_stats_ ;
@ -768,6 +806,8 @@ Status CompactionJob::Install(const MutableCFOptions& mutable_cf_options) {
stats . bytes_written / static_cast < double > ( stats . micros ) ;
stats . bytes_written / static_cast < double > ( stats . micros ) ;
}
}
const std : : string & column_family_name = cfd - > GetName ( ) ;
ROCKS_LOG_BUFFER (
ROCKS_LOG_BUFFER (
log_buffer_ ,
log_buffer_ ,
" [%s] compacted to: %s, MB/sec: %.1f rd, %.1f wr, level %d, "
" [%s] compacted to: %s, MB/sec: %.1f rd, %.1f wr, level %d, "
@ -775,8 +815,9 @@ Status CompactionJob::Install(const MutableCFOptions& mutable_cf_options) {
" MB in(%.1f, %.1f) out(%.1f), read-write-amplify(%.1f) "
" MB in(%.1f, %.1f) out(%.1f), read-write-amplify(%.1f) "
" write-amplify(%.1f) %s, records in: % " PRIu64
" write-amplify(%.1f) %s, records in: % " PRIu64
" , records dropped: % " PRIu64 " output_compression: %s \n " ,
" , records dropped: % " PRIu64 " output_compression: %s \n " ,
cfd - > GetName ( ) . c_str ( ) , vstorage - > LevelSummary ( & tmp ) , bytes_read_per_sec ,
column_family_name . c_str ( ) , vstorage - > LevelSummary ( & tmp ) ,
bytes_written_per_sec , compact_ - > compaction - > output_level ( ) ,
bytes_read_per_sec , bytes_written_per_sec ,
compact_ - > compaction - > output_level ( ) ,
stats . num_input_files_in_non_output_levels ,
stats . num_input_files_in_non_output_levels ,
stats . num_input_files_in_output_level , stats . num_output_files ,
stats . num_input_files_in_output_level , stats . num_output_files ,
stats . bytes_read_non_output_levels / 1048576.0 ,
stats . bytes_read_non_output_levels / 1048576.0 ,
@ -787,6 +828,15 @@ Status CompactionJob::Install(const MutableCFOptions& mutable_cf_options) {
CompressionTypeToString ( compact_ - > compaction - > output_compression ( ) )
CompressionTypeToString ( compact_ - > compaction - > output_compression ( ) )
. c_str ( ) ) ;
. c_str ( ) ) ;
const auto & blob_files = vstorage - > GetBlobFiles ( ) ;
if ( ! blob_files . empty ( ) ) {
ROCKS_LOG_BUFFER ( log_buffer_ ,
" [%s] Blob file summary: head=% " PRIu64 " , tail=% " PRIu64
" \n " ,
column_family_name . c_str ( ) , blob_files . begin ( ) - > first ,
blob_files . rbegin ( ) - > first ) ;
}
UpdateCompactionJobStats ( stats ) ;
UpdateCompactionJobStats ( stats ) ;
auto stream = event_logger_ - > LogToBuffer ( log_buffer_ ) ;
auto stream = event_logger_ - > LogToBuffer ( log_buffer_ ) ;
@ -795,11 +845,18 @@ Status CompactionJob::Install(const MutableCFOptions& mutable_cf_options) {
< < " compaction_time_micros " < < stats . micros
< < " compaction_time_micros " < < stats . micros
< < " compaction_time_cpu_micros " < < stats . cpu_micros < < " output_level "
< < " compaction_time_cpu_micros " < < stats . cpu_micros < < " output_level "
< < compact_ - > compaction - > output_level ( ) < < " num_output_files "
< < compact_ - > compaction - > output_level ( ) < < " num_output_files "
< < compact_ - > NumOutputFiles ( ) < < " total_output_size "
< < compact_ - > num_output_files < < " total_output_size "
< < compact_ - > total_bytes < < " num_input_records "
< < compact_ - > total_bytes ;
< < stats . num_input_records < < " num_output_records "
< < compact_ - > num_output_records < < " num_subcompactions "
if ( compact_ - > num_blob_output_files > 0 ) {
< < compact_ - > sub_compact_states . size ( ) < < " output_compression "
stream < < " num_blob_output_files " < < compact_ - > num_blob_output_files
< < " total_blob_output_size " < < compact_ - > total_blob_bytes ;
}
stream < < " num_input_records " < < stats . num_input_records
< < " num_output_records " < < compact_ - > num_output_records
< < " num_subcompactions " < < compact_ - > sub_compact_states . size ( )
< < " output_compression "
< < CompressionTypeToString ( compact_ - > compaction - > output_compression ( ) ) ;
< < CompressionTypeToString ( compact_ - > compaction - > output_compression ( ) ) ;
stream < < " num_single_delete_mismatches "
stream < < " num_single_delete_mismatches "
@ -823,12 +880,18 @@ Status CompactionJob::Install(const MutableCFOptions& mutable_cf_options) {
}
}
stream . EndArray ( ) ;
stream . EndArray ( ) ;
if ( ! blob_files . empty ( ) ) {
stream < < " blob_file_head " < < blob_files . begin ( ) - > first ;
stream < < " blob_file_tail " < < blob_files . rbegin ( ) - > first ;
}
CleanupCompaction ( ) ;
CleanupCompaction ( ) ;
return status ;
return status ;
}
}
void CompactionJob : : ProcessKeyValueCompaction ( SubcompactionState * sub_compact ) {
void CompactionJob : : ProcessKeyValueCompaction ( SubcompactionState * sub_compact ) {
assert ( sub_compact ! = nullptr ) ;
assert ( sub_compact ) ;
assert ( sub_compact - > compaction ) ;
uint64_t prev_cpu_micros = env_ - > NowCPUNanos ( ) / 1000 ;
uint64_t prev_cpu_micros = env_ - > NowCPUNanos ( ) / 1000 ;
@ -899,6 +962,22 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
snapshot_checker_ , compact_ - > compaction - > level ( ) ,
snapshot_checker_ , compact_ - > compaction - > level ( ) ,
db_options_ . statistics . get ( ) ) ;
db_options_ . statistics . get ( ) ) ;
const MutableCFOptions * mutable_cf_options =
sub_compact - > compaction - > mutable_cf_options ( ) ;
assert ( mutable_cf_options ) ;
std : : vector < std : : string > blob_file_paths ;
std : : unique_ptr < BlobFileBuilder > blob_file_builder (
mutable_cf_options - > enable_blob_files
? new BlobFileBuilder (
versions_ , env_ , fs_ . get ( ) ,
sub_compact - > compaction - > immutable_cf_options ( ) ,
mutable_cf_options , & file_options_ , job_id_ , cfd - > GetID ( ) ,
cfd - > GetName ( ) , Env : : IOPriority : : IO_LOW , write_hint_ ,
& blob_file_paths , & sub_compact - > blob_file_additions )
: nullptr ) ;
TEST_SYNC_POINT ( " CompactionJob::Run():Inprogress " ) ;
TEST_SYNC_POINT ( " CompactionJob::Run():Inprogress " ) ;
TEST_SYNC_POINT_CALLBACK (
TEST_SYNC_POINT_CALLBACK (
" CompactionJob::Run():PausingManualCompaction:1 " ,
" CompactionJob::Run():PausingManualCompaction:1 " ,
@ -921,7 +1000,7 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
& existing_snapshots_ , earliest_write_conflict_snapshot_ ,
& existing_snapshots_ , earliest_write_conflict_snapshot_ ,
snapshot_checker_ , env_ , ShouldReportDetailedTime ( env_ , stats_ ) ,
snapshot_checker_ , env_ , ShouldReportDetailedTime ( env_ , stats_ ) ,
/*expect_valid_internal_key=*/ true , & range_del_agg ,
/*expect_valid_internal_key=*/ true , & range_del_agg ,
/* blob_file_builder */ nullptr , db_options_ . allow_data_in_errors ,
blob_file_builder . get ( ) , db_options_ . allow_data_in_errors ,
sub_compact - > compaction , compaction_filter , shutting_down_ ,
sub_compact - > compaction , compaction_filter , shutting_down_ ,
preserve_deletes_seqnum_ , manual_compaction_paused_ ,
preserve_deletes_seqnum_ , manual_compaction_paused_ ,
db_options_ . info_log ) ) ;
db_options_ . info_log ) ) ;
@ -1093,6 +1172,14 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
RecordDroppedKeys ( range_del_out_stats , & sub_compact - > compaction_job_stats ) ;
RecordDroppedKeys ( range_del_out_stats , & sub_compact - > compaction_job_stats ) ;
}
}
if ( blob_file_builder ) {
if ( status . ok ( ) ) {
status = blob_file_builder - > Finish ( ) ;
}
blob_file_builder . reset ( ) ;
}
sub_compact - > compaction_job_stats . cpu_micros =
sub_compact - > compaction_job_stats . cpu_micros =
env_ - > NowCPUNanos ( ) / 1000 - prev_cpu_micros ;
env_ - > NowCPUNanos ( ) / 1000 - prev_cpu_micros ;
@ -1479,9 +1566,13 @@ Status CompactionJob::FinishCompactionOutputFile(
Status CompactionJob : : InstallCompactionResults (
Status CompactionJob : : InstallCompactionResults (
const MutableCFOptions & mutable_cf_options ) {
const MutableCFOptions & mutable_cf_options ) {
assert ( compact_ ) ;
db_mutex_ - > AssertHeld ( ) ;
db_mutex_ - > AssertHeld ( ) ;
auto * compaction = compact_ - > compaction ;
auto * compaction = compact_ - > compaction ;
assert ( compaction ) ;
// paranoia: verify that the files that we started with
// paranoia: verify that the files that we started with
// still exist in the current version and in the same original level.
// still exist in the current version and in the same original level.
// This ensures that a concurrent compaction did not erroneously
// This ensures that a concurrent compaction did not erroneously
@ -1497,23 +1588,32 @@ Status CompactionJob::InstallCompactionResults(
{
{
Compaction : : InputLevelSummaryBuffer inputs_summary ;
Compaction : : InputLevelSummaryBuffer inputs_summary ;
ROCKS_LOG_INFO (
ROCKS_LOG_INFO ( db_options_ . info_log ,
db_options_ . info_log , " [%s] [JOB %d] Compacted %s => % " PRIu64 " bytes " ,
" [%s] [JOB %d] Compacted %s => % " PRIu64 " bytes " ,
compaction - > column_family_data ( ) - > GetName ( ) . c_str ( ) , job_id_ ,
compaction - > column_family_data ( ) - > GetName ( ) . c_str ( ) , job_id_ ,
compaction - > InputLevelSummary ( & inputs_summary ) , compact_ - > total_bytes ) ;
compaction - > InputLevelSummary ( & inputs_summary ) ,
compact_ - > total_bytes + compact_ - > total_blob_bytes ) ;
}
}
VersionEdit * const edit = compaction - > edit ( ) ;
assert ( edit ) ;
// Add compaction inputs
// Add compaction inputs
compaction - > AddInputDeletions ( compact_ - > compaction - > edit ( ) ) ;
compaction - > AddInputDeletions ( edit ) ;
for ( const auto & sub_compact : compact_ - > sub_compact_states ) {
for ( const auto & sub_compact : compact_ - > sub_compact_states ) {
for ( const auto & out : sub_compact . outputs ) {
for ( const auto & out : sub_compact . outputs ) {
compaction - > edit ( ) - > AddFile ( compaction - > output_level ( ) , out . meta ) ;
edit - > AddFile ( compaction - > output_level ( ) , out . meta ) ;
}
for ( const auto & blob : sub_compact . blob_file_additions ) {
edit - > AddBlobFile ( blob ) ;
}
}
}
}
return versions_ - > LogAndApply ( compaction - > column_family_data ( ) ,
return versions_ - > LogAndApply ( compaction - > column_family_data ( ) ,
mutable_cf_options , compaction - > edit ( ) ,
mutable_cf_options , edit , db_mutex_ ,
db_mutex_ , db_ directory_ ) ;
db_directory_ ) ;
}
}
void CompactionJob : : RecordCompactionIOStats ( ) {
void CompactionJob : : RecordCompactionIOStats ( ) {
@ -1689,6 +1789,8 @@ void CopyPrefix(const Slice& src, size_t prefix_length, std::string* dst) {
# endif // !ROCKSDB_LITE
# endif // !ROCKSDB_LITE
void CompactionJob : : UpdateCompactionStats ( ) {
void CompactionJob : : UpdateCompactionStats ( ) {
assert ( compact_ ) ;
Compaction * compaction = compact_ - > compaction ;
Compaction * compaction = compact_ - > compaction ;
compaction_stats_ . num_input_files_in_non_output_levels = 0 ;
compaction_stats_ . num_input_files_in_non_output_levels = 0 ;
compaction_stats_ . num_input_files_in_output_level = 0 ;
compaction_stats_ . num_input_files_in_output_level = 0 ;
@ -1706,27 +1808,15 @@ void CompactionJob::UpdateCompactionStats() {
}
}
}
}
uint64_t num_output_records = 0 ;
compaction_stats_ . num_output_files =
static_cast < int > ( compact_ - > num_output_files ) +
for ( const auto & sub_compact : compact_ - > sub_compact_states ) {
static_cast < int > ( compact_ - > num_blob_output_files ) ;
size_t num_output_files = sub_compact . outputs . size ( ) ;
compaction_stats_ . bytes_written =
if ( sub_compact . builder ! = nullptr ) {
compact_ - > total_bytes + compact_ - > total_blob_bytes ;
// An error occurred so ignore the last output.
assert ( num_output_files > 0 ) ;
- - num_output_files ;
}
compaction_stats_ . num_output_files + = static_cast < int > ( num_output_files ) ;
num_output_records + = sub_compact . num_output_records ;
for ( const auto & out : sub_compact . outputs ) {
compaction_stats_ . bytes_written + = out . meta . fd . file_size ;
}
}
if ( compaction_stats_ . num_input_records > num_output_records ) {
if ( compaction_stats_ . num_input_records > compact_ - > num_output_records ) {
compaction_stats_ . num_dropped_records =
compaction_stats_ . num_dropped_records =
compaction_stats_ . num_input_records - num_output_records ;
compaction_stats_ . num_input_records - compact_ - > num_output_records ;
}
}
}
}
@ -1765,7 +1855,7 @@ void CompactionJob::UpdateCompactionJobStats(
compaction_job_stats_ - > num_output_records = compact_ - > num_output_records ;
compaction_job_stats_ - > num_output_records = compact_ - > num_output_records ;
compaction_job_stats_ - > num_output_files = stats . num_output_files ;
compaction_job_stats_ - > num_output_files = stats . num_output_files ;
if ( compact_ - > NumOutputFiles ( ) > 0U ) {
if ( stats . num_output_files > 0 ) {
CopyPrefix ( compact_ - > SmallestUserKey ( ) ,
CopyPrefix ( compact_ - > SmallestUserKey ( ) ,
CompactionJobStats : : kMaxPrefixLength ,
CompactionJobStats : : kMaxPrefixLength ,
& compaction_job_stats_ - > smallest_output_key_prefix ) ;
& compaction_job_stats_ - > smallest_output_key_prefix ) ;