@ -11,102 +11,279 @@
namespace rocksdb {
namespace rocksdb {
namespace blob_db {
namespace blob_db {
namespace {
CompactionFilter : : Decision BlobIndexCompactionFilterBase : : FilterV2 (
int /*level*/ , const Slice & key , ValueType value_type , const Slice & value ,
// CompactionFilter to delete expired blob index from base DB.
std : : string * /*new_value*/ , std : : string * /*skip_until*/ ) const {
class BlobIndexCompactionFilter : public CompactionFilter {
if ( value_type ! = kBlobIndex ) {
public :
return Decision : : kKeep ;
BlobIndexCompactionFilter ( BlobCompactionContext context ,
}
uint64_t current_time , Statistics * statistics )
BlobIndex blob_index ;
: context_ ( context ) ,
Status s = blob_index . DecodeFrom ( value ) ;
current_time_ ( current_time ) ,
if ( ! s . ok ( ) ) {
statistics_ ( statistics ) { }
// Unable to decode blob index. Keeping the value.
return Decision : : kKeep ;
~ BlobIndexCompactionFilter ( ) override {
}
RecordTick ( statistics_ , BLOB_DB_BLOB_INDEX_EXPIRED_COUNT , expired_count_ ) ;
if ( blob_index . HasTTL ( ) & & blob_index . expiration ( ) < = current_time_ ) {
RecordTick ( statistics_ , BLOB_DB_BLOB_INDEX_EXPIRED_SIZE , expired_size_ ) ;
// Expired
RecordTick ( statistics_ , BLOB_DB_BLOB_INDEX_EVICTED_COUNT , evicted_count_ ) ;
expired_count_ + + ;
RecordTick ( statistics_ , BLOB_DB_BLOB_INDEX_EVICTED_SIZE , evicted_size_ ) ;
expired_size_ + = key . size ( ) + value . size ( ) ;
}
return Decision : : kRemove ;
}
const char * Name ( ) const override { return " BlobIndexCompactionFilter " ; }
if ( ! blob_index . IsInlined ( ) & &
blob_index . file_number ( ) < context_ . next_file_number & &
// Filter expired blob indexes regardless of snapshots.
context_ . current_blob_files . count ( blob_index . file_number ( ) ) = = 0 ) {
bool IgnoreSnapshots ( ) const override { return true ; }
// Corresponding blob file gone. Could have been garbage collected or
// evicted by FIFO eviction.
Decision FilterV2 ( int /*level*/ , const Slice & key , ValueType value_type ,
evicted_count_ + + ;
const Slice & value , std : : string * /*new_value*/ ,
evicted_size_ + = key . size ( ) + value . size ( ) ;
std : : string * /*skip_until*/ ) const override {
return Decision : : kRemove ;
if ( value_type ! = kBlobIndex ) {
}
return Decision : : kKeep ;
if ( context_ . fifo_eviction_seq > 0 & & blob_index . HasTTL ( ) & &
}
blob_index . expiration ( ) < context_ . evict_expiration_up_to ) {
BlobIndex blob_index ;
// Hack: Internal key is passed to BlobIndexCompactionFilter for it to
Status s = blob_index . DecodeFrom ( value ) ;
// get sequence number.
if ( ! s . ok ( ) ) {
ParsedInternalKey ikey ;
// Unable to decode blob index. Keeping the value.
bool ok = ParseInternalKey ( key , & ikey ) ;
return Decision : : kKeep ;
// Remove keys that could have been remove by last FIFO eviction.
}
// If get error while parsing key, ignore and continue.
if ( blob_index . HasTTL ( ) & & blob_index . expiration ( ) < = current_time_ ) {
if ( ok & & ikey . sequence < context_ . fifo_eviction_seq ) {
// Expired
expired_count_ + + ;
expired_size_ + = key . size ( ) + value . size ( ) ;
return Decision : : kRemove ;
}
if ( ! blob_index . IsInlined ( ) & &
blob_index . file_number ( ) < context_ . next_file_number & &
context_ . current_blob_files . count ( blob_index . file_number ( ) ) = = 0 ) {
// Corresponding blob file gone. Could have been garbage collected or
// evicted by FIFO eviction.
evicted_count_ + + ;
evicted_count_ + + ;
evicted_size_ + = key . size ( ) + value . size ( ) ;
evicted_size_ + = key . size ( ) + value . size ( ) ;
return Decision : : kRemove ;
return Decision : : kRemove ;
}
}
if ( context_ . fifo_eviction_seq > 0 & & blob_index . HasTTL ( ) & &
}
blob_index . expiration ( ) < context_ . evict_expiration_up_to ) {
return Decision : : kKeep ;
// Hack: Internal key is passed to BlobIndexCompactionFilter for it to
}
// get sequence number.
ParsedInternalKey ikey ;
CompactionFilter : : BlobDecision BlobIndexCompactionFilterGC : : PrepareBlobOutput (
bool ok = ParseInternalKey ( key , & ikey ) ;
const Slice & key , const Slice & existing_value ,
// Remove keys that could have been remove by last FIFO eviction.
std : : string * new_value ) const {
// If get error while parsing key, ignore and continue.
assert ( new_value ) ;
if ( ok & & ikey . sequence < context_ . fifo_eviction_seq ) {
evicted_count_ + + ;
const BlobDBImpl * const blob_db_impl = context_gc_ . blob_db_impl ;
evicted_size_ + = key . size ( ) + value . size ( ) ;
( void ) blob_db_impl ;
return Decision : : kRemove ;
}
assert ( blob_db_impl ) ;
}
assert ( blob_db_impl - > bdb_options_ . enable_garbage_collection ) ;
return Decision : : kKeep ;
BlobIndex blob_index ;
const Status s = blob_index . DecodeFrom ( existing_value ) ;
if ( ! s . ok ( ) ) {
return BlobDecision : : kCorruption ;
}
if ( blob_index . IsInlined ( ) ) {
return BlobDecision : : kKeep ;
}
if ( blob_index . HasTTL ( ) ) {
return BlobDecision : : kKeep ;
}
if ( blob_index . file_number ( ) > = context_gc_ . cutoff_file_number ) {
return BlobDecision : : kKeep ;
}
// Note: each compaction generates its own blob files, which, depending on the
// workload, might result in many small blob files. The total number of files
// is bounded though (determined by the number of compactions and the blob
// file size option).
if ( ! OpenNewBlobFileIfNeeded ( ) ) {
return BlobDecision : : kIOError ;
}
PinnableSlice blob ;
CompressionType compression_type = kNoCompression ;
if ( ! ReadBlobFromOldFile ( key , blob_index , & blob , & compression_type ) ) {
return BlobDecision : : kIOError ;
}
uint64_t new_blob_file_number = 0 ;
uint64_t new_blob_offset = 0 ;
if ( ! WriteBlobToNewFile ( key , blob , & new_blob_file_number , & new_blob_offset ) ) {
return BlobDecision : : kIOError ;
}
}
private :
if ( ! CloseAndRegisterNewBlobFileIfNeeded ( ) ) {
BlobCompactionContext context_ ;
return BlobDecision : : kIOError ;
const uint64_t current_time_ ;
}
Statistics * statistics_ ;
// It is safe to not using std::atomic since the compaction filter, created
BlobIndex : : EncodeBlob ( new_value , new_blob_file_number , new_blob_offset ,
// from a compaction filter factroy, will not be called from multiple threads.
blob . size ( ) , compression_type ) ;
mutable uint64_t expired_count_ = 0 ;
mutable uint64_t expired_size_ = 0 ;
return BlobDecision : : kChangeValue ;
mutable uint64_t evicted_count_ = 0 ;
}
mutable uint64_t evicted_size_ = 0 ;
} ;
bool BlobIndexCompactionFilterGC : : OpenNewBlobFileIfNeeded ( ) const {
if ( blob_file_ ) {
assert ( writer_ ) ;
return true ;
}
} // anonymous namespace
BlobDBImpl * const blob_db_impl = context_gc_ . blob_db_impl ;
assert ( blob_db_impl ) ;
const Status s = blob_db_impl - > CreateBlobFileAndWriter (
/* has_ttl */ false , ExpirationRange ( ) , " GC " , & blob_file_ , & writer_ ) ;
if ( ! s . ok ( ) ) {
ROCKS_LOG_ERROR ( blob_db_impl - > db_options_ . info_log ,
" Error opening new blob file during GC, status: %s " ,
s . ToString ( ) . c_str ( ) ) ;
return false ;
}
assert ( blob_file_ ) ;
assert ( writer_ ) ;
return true ;
}
bool BlobIndexCompactionFilterGC : : ReadBlobFromOldFile (
const Slice & key , const BlobIndex & blob_index , PinnableSlice * blob ,
CompressionType * compression_type ) const {
BlobDBImpl * const blob_db_impl = context_gc_ . blob_db_impl ;
assert ( blob_db_impl ) ;
const Status s = blob_db_impl - > GetRawBlobFromFile (
key , blob_index . file_number ( ) , blob_index . offset ( ) , blob_index . size ( ) ,
blob , compression_type ) ;
if ( ! s . ok ( ) ) {
ROCKS_LOG_ERROR ( blob_db_impl - > db_options_ . info_log ,
" Error reading blob during GC, key: %s (%s), status: %s " ,
key . ToString ( /* output_hex */ true ) . c_str ( ) ,
blob_index . DebugString ( /* output_hex */ true ) . c_str ( ) ,
s . ToString ( ) . c_str ( ) ) ;
return false ;
}
return true ;
}
bool BlobIndexCompactionFilterGC : : WriteBlobToNewFile (
const Slice & key , const Slice & blob , uint64_t * new_blob_file_number ,
uint64_t * new_blob_offset ) const {
assert ( new_blob_file_number ) ;
assert ( new_blob_offset ) ;
assert ( blob_file_ ) ;
* new_blob_file_number = blob_file_ - > BlobFileNumber ( ) ;
assert ( writer_ ) ;
uint64_t new_key_offset = 0 ;
const Status s = writer_ - > AddRecord ( key , blob , kNoExpiration , & new_key_offset ,
new_blob_offset ) ;
if ( ! s . ok ( ) ) {
const BlobDBImpl * const blob_db_impl = context_gc_ . blob_db_impl ;
assert ( blob_db_impl ) ;
ROCKS_LOG_ERROR (
blob_db_impl - > db_options_ . info_log ,
" Error writing blob to new file %s during GC, key: %s, status: %s " ,
blob_file_ - > PathName ( ) . c_str ( ) ,
key . ToString ( /* output_hex */ true ) . c_str ( ) , s . ToString ( ) . c_str ( ) ) ;
return false ;
}
const uint64_t new_size =
BlobLogRecord : : kHeaderSize + key . size ( ) + blob . size ( ) ;
blob_file_ - > BlobRecordAdded ( new_size ) ;
BlobDBImpl * const blob_db_impl = context_gc_ . blob_db_impl ;
assert ( blob_db_impl ) ;
blob_db_impl - > total_blob_size_ + = new_size ;
return true ;
}
bool BlobIndexCompactionFilterGC : : CloseAndRegisterNewBlobFileIfNeeded ( ) const {
const BlobDBImpl * const blob_db_impl = context_gc_ . blob_db_impl ;
assert ( blob_db_impl ) ;
assert ( blob_file_ ) ;
if ( blob_file_ - > GetFileSize ( ) < blob_db_impl - > bdb_options_ . blob_file_size ) {
return true ;
}
return CloseAndRegisterNewBlobFile ( ) ;
}
bool BlobIndexCompactionFilterGC : : CloseAndRegisterNewBlobFile ( ) const {
BlobDBImpl * const blob_db_impl = context_gc_ . blob_db_impl ;
assert ( blob_db_impl ) ;
assert ( blob_file_ ) ;
Status s ;
{
WriteLock wl ( & blob_db_impl - > mutex_ ) ;
s = blob_db_impl - > CloseBlobFile ( blob_file_ ) ;
// Note: we delay registering the new blob file until it's closed to
// prevent FIFO eviction from processing it during the GC run.
blob_db_impl - > RegisterBlobFile ( blob_file_ ) ;
}
assert ( blob_file_ - > Immutable ( ) ) ;
blob_file_ . reset ( ) ;
if ( ! s . ok ( ) ) {
ROCKS_LOG_ERROR ( blob_db_impl - > db_options_ . info_log ,
" Error closing new blob file %s during GC, status: %s " ,
blob_file_ - > PathName ( ) . c_str ( ) , s . ToString ( ) . c_str ( ) ) ;
return false ;
}
return true ;
}
std : : unique_ptr < CompactionFilter >
std : : unique_ptr < CompactionFilter >
BlobIndexCompactionFilterFactory : : CreateCompactionFilter (
BlobIndexCompactionFilterFactory : : CreateCompactionFilter (
const CompactionFilter : : Context & /*context*/ ) {
const CompactionFilter : : Context & /*context*/ ) {
assert ( env ( ) ) ;
int64_t current_time = 0 ;
int64_t current_time = 0 ;
Status s = env_ - > GetCurrentTime ( & current_time ) ;
Status s = env ( ) - > GetCurrentTime ( & current_time ) ;
if ( ! s . ok ( ) ) {
if ( ! s . ok ( ) ) {
return nullptr ;
return nullptr ;
}
}
assert ( current_time > = 0 ) ;
assert ( current_time > = 0 ) ;
assert ( blob_db_impl ( ) ) ;
BlobCompactionContext context ;
BlobCompactionContext context ;
blob_db_impl_ - > GetCompactionContext ( & context ) ;
blob_db_impl ( ) - > GetCompactionContext ( & context ) ;
return std : : unique_ptr < CompactionFilter > ( new BlobIndexCompactionFilter (
return std : : unique_ptr < CompactionFilter > ( new BlobIndexCompactionFilter (
context , static_cast < uint64_t > ( current_time ) , statistics_ ) ) ;
std : : move ( context ) , current_time , statistics ( ) ) ) ;
}
std : : unique_ptr < CompactionFilter >
BlobIndexCompactionFilterFactoryGC : : CreateCompactionFilter (
const CompactionFilter : : Context & /*context*/ ) {
assert ( env ( ) ) ;
int64_t current_time = 0 ;
Status s = env ( ) - > GetCurrentTime ( & current_time ) ;
if ( ! s . ok ( ) ) {
return nullptr ;
}
assert ( current_time > = 0 ) ;
assert ( blob_db_impl ( ) ) ;
BlobCompactionContext context ;
BlobCompactionContextGC context_gc ;
blob_db_impl ( ) - > GetCompactionContext ( & context , & context_gc ) ;
return std : : unique_ptr < CompactionFilter > ( new BlobIndexCompactionFilterGC (
std : : move ( context ) , std : : move ( context_gc ) , current_time , statistics ( ) ) ) ;
}
}
} // namespace blob_db
} // namespace blob_db