@ -47,6 +47,15 @@ static uint64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
return sum ;
}
static uint64_t TotalCompensatedFileSize (
const std : : vector < FileMetaData * > & files ) {
uint64_t sum = 0 ;
for ( size_t i = 0 ; i < files . size ( ) & & files [ i ] ; i + + ) {
sum + = files [ i ] - > compensated_file_size ;
}
return sum ;
}
Version : : ~ Version ( ) {
assert ( refs_ = = 0 ) ;
@ -241,53 +250,69 @@ class Version::LevelFileIteratorState : public TwoLevelIteratorState {
bool for_compaction_ ;
} ;
Status Version : : GetPropertiesOfAllTables ( TablePropertiesCollection * props ) {
Status Version : : GetTableProperties ( std : : shared_ptr < const TableProperties > * tp ,
const FileMetaData * file_meta ,
const std : : string * fname ) {
auto table_cache = cfd_ - > table_cache ( ) ;
auto options = cfd_ - > options ( ) ;
Status s = table_cache - > GetTableProperties (
vset_ - > storage_options_ , cfd_ - > internal_comparator ( ) , file_meta - > fd ,
tp , true /* no io */ ) ;
if ( s . ok ( ) ) {
return s ;
}
// We only ignore error type `Incomplete` since it's by design that we
// disallow table when it's not in table cache.
if ( ! s . IsIncomplete ( ) ) {
return s ;
}
// 2. Table is not present in table cache, we'll read the table properties
// directly from the properties block in the file.
std : : unique_ptr < RandomAccessFile > file ;
if ( fname ! = nullptr ) {
s = options - > env - > NewRandomAccessFile (
* fname , & file , vset_ - > storage_options_ ) ;
} else {
s = options - > env - > NewRandomAccessFile (
TableFileName ( vset_ - > dbname_ , file_meta - > fd . GetNumber ( ) ) ,
& file , vset_ - > storage_options_ ) ;
}
if ( ! s . ok ( ) ) {
return s ;
}
TableProperties * raw_table_properties ;
// By setting the magic number to kInvalidTableMagicNumber, we can by
// pass the magic number check in the footer.
s = ReadTableProperties (
file . get ( ) , file_meta - > fd . GetFileSize ( ) ,
Footer : : kInvalidTableMagicNumber /* table's magic number */ ,
vset_ - > env_ , options - > info_log . get ( ) , & raw_table_properties ) ;
if ( ! s . ok ( ) ) {
return s ;
}
RecordTick ( options - > statistics . get ( ) ,
NUMBER_DIRECT_LOAD_TABLE_PROPERTIES ) ;
* tp = std : : shared_ptr < const TableProperties > ( raw_table_properties ) ;
return s ;
}
Status Version : : GetPropertiesOfAllTables ( TablePropertiesCollection * props ) {
for ( int level = 0 ; level < num_levels_ ; level + + ) {
for ( const auto & file_meta : files_ [ level ] ) {
auto fname = TableFileName ( vset_ - > dbname_ , file_meta - > fd . GetNumber ( ) ) ;
// 1. If the table is already present in table cache, load table
// properties from there.
std : : shared_ptr < const TableProperties > table_properties ;
Status s = table_cache - > GetTableProperties (
vset_ - > storage_options_ , cfd_ - > internal_comparator ( ) , file_meta - > fd ,
& table_properties , true /* no io */ ) ;
Status s = GetTableProperties ( & table_properties , file_meta , & fname ) ;
if ( s . ok ( ) ) {
props - > insert ( { fname , table_properties } ) ;
continue ;
}
// We only ignore error type `Incomplete` since it's by design that we
// disallow table when it's not in table cache.
if ( ! s . IsIncomplete ( ) ) {
return s ;
}
// 2. Table is not present in table cache, we'll read the table properties
// directly from the properties block in the file.
std : : unique_ptr < RandomAccessFile > file ;
s = options - > env - > NewRandomAccessFile ( fname , & file ,
vset_ - > storage_options_ ) ;
if ( ! s . ok ( ) ) {
return s ;
}
TableProperties * raw_table_properties ;
// By setting the magic number to kInvalidTableMagicNumber, we can by
// pass the magic number check in the footer.
s = ReadTableProperties (
file . get ( ) , file_meta - > fd . GetFileSize ( ) ,
Footer : : kInvalidTableMagicNumber /* table's magic number */ ,
vset_ - > env_ , options - > info_log . get ( ) , & raw_table_properties ) ;
if ( ! s . ok ( ) ) {
} else {
return s ;
}
RecordTick ( options - > statistics . get ( ) ,
NUMBER_DIRECT_LOAD_TABLE_PROPERTIES ) ;
props - > insert ( { fname , std : : shared_ptr < const TableProperties > (
raw_table_properties ) } ) ;
}
}
@ -492,7 +517,11 @@ Version::Version(ColumnFamilyData* cfd, VersionSet* vset,
compaction_level_ ( num_levels_ ) ,
version_number_ ( version_number ) ,
file_indexer_ ( num_levels_ , cfd = = nullptr ? nullptr
: cfd - > internal_comparator ( ) . user_comparator ( ) ) {
: cfd - > internal_comparator ( ) . user_comparator ( ) ) ,
total_file_size_ ( 0 ) ,
total_raw_key_size_ ( 0 ) ,
total_raw_value_size_ ( 0 ) ,
num_non_deletions_ ( 0 ) {
}
void Version : : Get ( const ReadOptions & options ,
@ -699,6 +728,58 @@ void Version::PrepareApply(std::vector<uint64_t>& size_being_compacted) {
UpdateNumNonEmptyLevels ( ) ;
}
bool Version : : MaybeInitializeFileMetaData ( FileMetaData * file_meta ) {
if ( file_meta - > num_entries > 0 ) {
return false ;
}
std : : shared_ptr < const TableProperties > tp ;
Status s = GetTableProperties ( & tp , file_meta ) ;
if ( ! s . ok ( ) ) {
return false ;
}
if ( tp . get ( ) = = nullptr ) return false ;
file_meta - > num_entries = tp - > num_entries ;
file_meta - > num_deletions = GetDeletedKeys ( tp - > user_collected_properties ) ;
file_meta - > raw_value_size = tp - > raw_value_size ;
file_meta - > raw_key_size = tp - > raw_key_size ;
return true ;
}
void Version : : UpdateTemporaryStats ( const VersionEdit * edit ) {
static const int kDeletionWeightOnCompaction = 2 ;
// incrementally update the average value size by
// including newly added files into the global stats
int init_count = 0 ;
int total_count = 0 ;
for ( int level = 0 ; level < num_levels_ ; level + + ) {
for ( auto * file_meta : files_ [ level ] ) {
if ( MaybeInitializeFileMetaData ( file_meta ) ) {
// each FileMeta will be initialized only once.
total_file_size_ + = file_meta - > fd . GetFileSize ( ) ;
total_raw_key_size_ + = file_meta - > raw_key_size ;
total_raw_value_size_ + = file_meta - > raw_value_size ;
num_non_deletions_ + =
file_meta - > num_entries - file_meta - > num_deletions ;
init_count + + ;
}
total_count + + ;
}
}
uint64_t average_value_size = GetAverageValueSize ( ) ;
// compute the compensated size
for ( int level = 0 ; level < num_levels_ ; level + + ) {
for ( auto * file_meta : files_ [ level ] ) {
file_meta - > compensated_file_size = file_meta - > fd . GetFileSize ( ) +
file_meta - > num_deletions * average_value_size *
kDeletionWeightOnCompaction ;
}
}
}
void Version : : ComputeCompactionScore (
std : : vector < uint64_t > & size_being_compacted ) {
double max_score = 0 ;
@ -728,7 +809,7 @@ void Version::ComputeCompactionScore(
uint64_t total_size = 0 ;
for ( unsigned int i = 0 ; i < files_ [ level ] . size ( ) ; i + + ) {
if ( ! files_ [ level ] [ i ] - > being_compacted ) {
total_size + = files_ [ level ] [ i ] - > fd . GetFileSize ( ) ;
total_size + = files_ [ level ] [ i ] - > compensated_file_size ;
numfiles + + ;
}
}
@ -747,7 +828,7 @@ void Version::ComputeCompactionScore(
} else {
// Compute the ratio of current size to size limit.
const uint64_t level_bytes =
TotalFileSize ( files_ [ level ] ) - size_being_compacted [ level ] ;
TotalCompensated FileSize ( files_ [ level ] ) - size_being_compacted [ level ] ;
score = static_cast < double > ( level_bytes ) /
cfd_ - > compaction_picker ( ) - > MaxBytesForLevel ( level ) ;
if ( max_score < score ) {
@ -783,9 +864,10 @@ namespace {
// Compator that is used to sort files based on their size
// In normal mode: descending size
bool CompareSizeDescending ( const Version : : Fsize & first ,
const Version : : Fsize & second ) {
return ( first . file - > fd . GetFileSize ( ) > second . file - > fd . GetFileSize ( ) ) ;
bool CompareCompensatedSizeDescending ( const Version : : Fsize & first ,
const Version : : Fsize & second ) {
return ( first . file - > compensated_file_size >
second . file - > compensated_file_size ) ;
}
// A static compator used to sort files based on their seqno
// In universal style : descending seqno
@ -846,7 +928,7 @@ void Version::UpdateFilesBySize() {
num = temp . size ( ) ;
}
std : : partial_sort ( temp . begin ( ) , temp . begin ( ) + num , temp . end ( ) ,
CompareSizeDescending ) ;
CompareCompensated SizeDescending ) ;
}
assert ( temp . size ( ) = = files . size ( ) ) ;
@ -1674,6 +1756,7 @@ Status VersionSet::LogAndApply(ColumnFamilyData* column_family_data,
if ( ! edit - > IsColumnFamilyManipulation ( ) ) {
// This is cpu-heavy operations, which should be called outside mutex.
v - > UpdateTemporaryStats ( edit ) ;
v - > PrepareApply ( size_being_compacted ) ;
}