@ -33,6 +33,7 @@
# include "util/sync_point.h"
# include "util/timer_queue.h"
# include "utilities/blob_db/blob_db_iterator.h"
# include "utilities/blob_db/blob_index.h"
namespace {
int kBlockBasedTableVersionFormat = 2 ;
@ -49,78 +50,8 @@ void extendTimestamps(rocksdb::blob_db::tsrange_t* ts_range, uint64_t ts) {
} // end namespace
namespace rocksdb {
namespace blob_db {
// BlobHandle is a pointer to the blob that is stored in the LSM
class BlobHandle {
public :
BlobHandle ( )
: file_number_ ( std : : numeric_limits < uint64_t > : : max ( ) ) ,
offset_ ( std : : numeric_limits < uint64_t > : : max ( ) ) ,
size_ ( std : : numeric_limits < uint64_t > : : max ( ) ) ,
compression_ ( kNoCompression ) { }
uint64_t filenumber ( ) const { return file_number_ ; }
void set_filenumber ( uint64_t fn ) { file_number_ = fn ; }
// The offset of the block in the file.
uint64_t offset ( ) const { return offset_ ; }
void set_offset ( uint64_t _offset ) { offset_ = _offset ; }
// The size of the stored block
uint64_t size ( ) const { return size_ ; }
void set_size ( uint64_t _size ) { size_ = _size ; }
CompressionType compression ( ) const { return compression_ ; }
void set_compression ( CompressionType t ) { compression_ = t ; }
void EncodeTo ( std : : string * dst ) const ;
Status DecodeFrom ( const Slice & input ) ;
void clear ( ) ;
private :
uint64_t file_number_ ;
uint64_t offset_ ;
uint64_t size_ ;
CompressionType compression_ ;
} ;
void BlobHandle : : EncodeTo ( std : : string * dst ) const {
// Sanity check that all fields have been set
assert ( offset_ ! = std : : numeric_limits < uint64_t > : : max ( ) ) ;
assert ( size_ ! = std : : numeric_limits < uint64_t > : : max ( ) ) ;
assert ( file_number_ ! = std : : numeric_limits < uint64_t > : : max ( ) ) ;
dst - > reserve ( 30 ) ;
PutVarint64 ( dst , file_number_ ) ;
PutVarint64 ( dst , offset_ ) ;
PutVarint64 ( dst , size_ ) ;
dst - > push_back ( static_cast < unsigned char > ( compression_ ) ) ;
}
void BlobHandle : : clear ( ) {
file_number_ = std : : numeric_limits < uint64_t > : : max ( ) ;
offset_ = std : : numeric_limits < uint64_t > : : max ( ) ;
size_ = std : : numeric_limits < uint64_t > : : max ( ) ;
compression_ = kNoCompression ;
}
Status BlobHandle : : DecodeFrom ( const Slice & input ) {
Slice s ( input ) ;
Slice * p = & s ;
if ( GetVarint64 ( p , & file_number_ ) & & GetVarint64 ( p , & offset_ ) & &
GetVarint64 ( p , & size_ ) ) {
compression_ = static_cast < CompressionType > ( p - > data ( ) [ 0 ] ) ;
return Status : : OK ( ) ;
} else {
clear ( ) ;
return Status : : Corruption ( " bad blob handle " ) ;
}
}
Random blob_rgen ( static_cast < uint32_t > ( time ( nullptr ) ) ) ;
void BlobDBFlushBeginListener : : OnFlushBegin ( DB * db , const FlushJobInfo & info ) {
@ -149,19 +80,20 @@ void EvictAllVersionsCompactionListener::InternalListener::OnCompaction(
if ( ! is_new & &
value_type = =
CompactionEventListener : : CompactionListenerValueType : : kValue ) {
BlobHandle handle ;
Status s = handle . DecodeFrom ( existing_value ) ;
BlobIndex blob_index ;
Status s = blob_index . DecodeFrom ( existing_value ) ;
if ( s . ok ( ) ) {
if ( impl_ - > debug_level_ > = 3 )
ROCKS_LOG_INFO ( impl_ - > db_options_ . info_log ,
" CALLBACK COMPACTED OUT KEY: %s SN: %d "
" NEW: %d FN: % " PRIu64 " OFFSET: % " PRIu64
" SIZE: % " PRIu64 ,
key . ToString ( ) . c_str ( ) , sn , is_new , handle . filenumber ( ) ,
handle . offset ( ) , handle . size ( ) ) ;
impl_ - > override_vals_q_ . enqueue ( { handle . filenumber ( ) , key . size ( ) ,
handle . offset ( ) , handle . size ( ) , sn } ) ;
ROCKS_LOG_INFO (
impl_ - > db_options_ . info_log ,
" CALLBACK COMPACTED OUT KEY: %s SN: %d "
" NEW: %d FN: % " PRIu64 " OFFSET: % " PRIu64 " SIZE: % " PRIu64 ,
key . ToString ( ) . c_str ( ) , sn , is_new , blob_index . file_number ( ) ,
blob_index . offset ( ) , blob_index . size ( ) ) ;
impl_ - > override_vals_q_ . enqueue ( { blob_index . file_number ( ) , key . size ( ) ,
blob_index . offset ( ) , blob_index . size ( ) ,
sn } ) ;
}
} else {
if ( impl_ - > debug_level_ > = 3 )
@ -178,7 +110,6 @@ BlobDBImpl::BlobDBImpl(const std::string& dbname,
db_impl_ ( nullptr ) ,
env_ ( db_options . env ) ,
ttl_extractor_ ( blob_db_options . ttl_extractor . get ( ) ) ,
wo_set_ ( false ) ,
bdb_options_ ( blob_db_options ) ,
db_options_ ( db_options ) ,
env_options_ ( db_options ) ,
@ -235,7 +166,6 @@ BlobDBOptions BlobDBImpl::GetBlobDBOptions() const { return bdb_options_; }
BlobDBImpl : : BlobDBImpl ( DB * db , const BlobDBOptions & blob_db_options )
: BlobDB ( db ) ,
db_impl_ ( static_cast_with_check < DBImpl , DB > ( db ) ) ,
wo_set_ ( false ) ,
bdb_options_ ( blob_db_options ) ,
db_options_ ( db - > GetOptions ( ) ) ,
env_options_ ( db_ - > GetOptions ( ) ) ,
@ -610,17 +540,6 @@ std::shared_ptr<Writer> BlobDBImpl::CheckOrCreateWriterLocked(
return writer ;
}
void BlobDBImpl : : UpdateWriteOptions ( const WriteOptions & options ) {
if ( ! wo_set_ . load ( std : : memory_order_relaxed ) ) {
// DCLP
WriteLock wl ( & mutex_ ) ;
if ( ! wo_set_ . load ( std : : memory_order_acquire ) ) {
wo_set_ . store ( true , std : : memory_order_release ) ;
write_options_ = options ;
}
}
}
std : : shared_ptr < BlobFile > BlobDBImpl : : SelectBlobFile ( ) {
uint32_t val = blob_rgen . Next ( ) ;
{
@ -736,14 +655,6 @@ std::shared_ptr<BlobFile> BlobDBImpl::SelectBlobFileTTL(uint64_t expiration) {
return bfile ;
}
Status BlobDBImpl : : Put ( const WriteOptions & options , const Slice & key ,
const Slice & value ) {
std : : string new_value ;
Slice value_slice ;
uint64_t expiration = ExtractExpiration ( key , value , & value_slice , & new_value ) ;
return PutUntil ( options , key , value_slice , expiration ) ;
}
Status BlobDBImpl : : Delete ( const WriteOptions & options , const Slice & key ) {
SequenceNumber lsn = db_impl_ - > GetLatestSequenceNumber ( ) ;
Status s = db_ - > Delete ( options , key ) ;
@ -753,141 +664,94 @@ Status BlobDBImpl::Delete(const WriteOptions& options, const Slice& key) {
return s ;
}
Status BlobDBImpl : : Write ( const WriteOptions & opts , WriteBatch * updates ) {
class BlobInserter : public WriteBatch : : Handler {
private :
BlobDBImpl * impl_ ;
SequenceNumber sequence_ ;
WriteBatch updates_blob_ ;
std : : shared_ptr < BlobFile > last_file_ ;
bool has_put_ ;
std : : string new_value_ ;
uint32_t default_cf_id_ ;
public :
BlobInserter ( BlobDBImpl * impl , SequenceNumber seq )
: impl_ ( impl ) ,
sequence_ ( seq ) ,
has_put_ ( false ) ,
default_cf_id_ ( reinterpret_cast < ColumnFamilyHandleImpl * > (
impl_ - > DefaultColumnFamily ( ) )
- > cfd ( )
- > GetID ( ) ) { }
SequenceNumber sequence ( ) { return sequence_ ; }
WriteBatch * updates_blob ( ) { return & updates_blob_ ; }
std : : shared_ptr < BlobFile > & last_file ( ) { return last_file_ ; }
bool has_put ( ) { return has_put_ ; }
virtual Status PutCF ( uint32_t column_family_id , const Slice & key ,
const Slice & value_slice ) override {
if ( column_family_id ! = default_cf_id_ ) {
return Status : : NotSupported (
" Blob DB doesn't support non-default column family. " ) ;
}
Slice value_unc ;
uint64_t expiration =
impl_ - > ExtractExpiration ( key , value_slice , & value_unc , & new_value_ ) ;
std : : shared_ptr < BlobFile > bfile =
( expiration ! = kNoExpiration )
? impl_ - > SelectBlobFileTTL ( expiration )
: ( ( last_file_ ) ? last_file_ : impl_ - > SelectBlobFile ( ) ) ;
if ( last_file_ & & last_file_ ! = bfile ) {
return Status : : NotFound ( " too many blob files " ) ;
}
if ( ! bfile ) {
return Status : : NotFound ( " blob file not found " ) ;
}
last_file_ = bfile ;
has_put_ = true ;
std : : string compression_output ;
Slice value = impl_ - > GetCompressedSlice ( value_unc , & compression_output ) ;
std : : string headerbuf ;
Writer : : ConstructBlobHeader ( & headerbuf , key , value , expiration , - 1 ) ;
std : : string index_entry ;
Status s = impl_ - > AppendBlob ( bfile , headerbuf , key , value , & index_entry ) ;
if ( ! s . ok ( ) ) {
return s ;
}
bfile - > ExtendSequenceRange ( sequence_ ) ;
sequence_ + + ;
if ( expiration ! = kNoExpiration ) {
extendTTL ( & ( bfile - > ttl_range_ ) , expiration ) ;
}
class BlobDBImpl : : BlobInserter : public WriteBatch : : Handler {
private :
const WriteOptions & options_ ;
BlobDBImpl * blob_db_impl_ ;
uint32_t default_cf_id_ ;
SequenceNumber sequence_ ;
WriteBatch batch_ ;
return WriteBatchInternal : : PutBlobIndex ( & updates_blob_ , column_family_id ,
key , index_entry ) ;
public :
BlobInserter ( const WriteOptions & options , BlobDBImpl * blob_db_impl ,
uint32_t default_cf_id , SequenceNumber seq )
: options_ ( options ) ,
blob_db_impl_ ( blob_db_impl ) ,
default_cf_id_ ( default_cf_id ) ,
sequence_ ( seq ) { }
SequenceNumber sequence ( ) { return sequence_ ; }
WriteBatch * batch ( ) { return & batch_ ; }
virtual Status PutCF ( uint32_t column_family_id , const Slice & key ,
const Slice & value ) override {
if ( column_family_id ! = default_cf_id_ ) {
return Status : : NotSupported (
" Blob DB doesn't support non-default column family. " ) ;
}
std : : string new_value ;
Slice value_slice ;
uint64_t expiration =
blob_db_impl_ - > ExtractExpiration ( key , value , & value_slice , & new_value ) ;
Status s = blob_db_impl_ - > PutBlobValue ( options_ , key , value_slice ,
expiration , sequence_ , & batch_ ) ;
sequence_ + + ;
return s ;
}
virtual Status DeleteCF ( uint32_t column_family_id ,
const Slice & key ) override {
if ( column_family_id ! = default_cf_id_ ) {
return Status : : NotSupported (
" Blob DB doesn't support non-default column family. " ) ;
}
WriteBatchInternal : : Delete ( & updates_blob_ , column_family_id , key ) ;
sequence_ + + ;
return Status : : OK ( ) ;
virtual Status DeleteCF ( uint32_t column_family_id ,
const Slice & key ) override {
if ( column_family_id ! = default_cf_id_ ) {
return Status : : NotSupported (
" Blob DB doesn't support non-default column family. " ) ;
}
Status s = WriteBatchInternal : : Delete ( & batch_ , column_family_id , key ) ;
sequence_ + + ;
return s ;
}
virtual Status DeleteRange ( uint32_t column_family_id ,
const Slice & begin_key , const Slice & end_key ) {
if ( column_family_id ! = default_cf_id_ ) {
return Status : : NotSupported (
" Blob DB doesn't support non-default column family. " ) ;
}
WriteBatchInternal : : DeleteRange ( & updates_blob_ , column_family_id ,
begin_key , end_key ) ;
sequence_ + + ;
return Status : : OK ( ) ;
virtual Status DeleteRange ( uint32_t column_family_id , const Slice & begin_key ,
const Slice & end_key ) {
if ( column_family_id ! = default_cf_id_ ) {
return Status : : NotSupported (
" Blob DB doesn't support non-default column family. " ) ;
}
Status s = WriteBatchInternal : : DeleteRange ( & batch_ , column_family_id ,
begin_key , end_key ) ;
sequence_ + + ;
return s ;
}
virtual Status SingleDeleteCF ( uint32_t /*column_family_id*/ ,
const Slice & /*key*/ ) override {
return Status : : NotSupported ( " Not supported operation in blob db. " ) ;
}
virtual Status SingleDeleteCF ( uint32_t /*column_family_id*/ ,
const Slice & /*key*/ ) override {
return Status : : NotSupported ( " Not supported operation in blob db. " ) ;
}
virtual Status MergeCF ( uint32_t /*column_family_id*/ , const Slice & /*key*/ ,
const Slice & /*value*/ ) override {
return Status : : NotSupported ( " Not supported operation in blob db. " ) ;
}
virtual Status MergeCF ( uint32_t /*column_family_id*/ , const Slice & /*key*/ ,
const Slice & /*value*/ ) override {
return Status : : NotSupported ( " Not supported operation in blob db. " ) ;
}
virtual void LogData ( const Slice & blob ) override {
updates_blob_ . PutLogData ( blob ) ;
}
} ;
virtual void LogData ( const Slice & blob ) override { batch_ . PutLogData ( blob ) ; }
} ;
Status BlobDBImpl : : Write ( const WriteOptions & options , WriteBatch * updates ) {
MutexLock l ( & write_mutex_ ) ;
SequenceNumber current_seq = db_impl_ - > GetLatestSequenceNumber ( ) + 1 ;
BlobInserter blob_inserter ( this , current_seq ) ;
uint32_t default_cf_id =
reinterpret_cast < ColumnFamilyHandleImpl * > ( DefaultColumnFamily ( ) ) - > GetID ( ) ;
SequenceNumber current_seq = GetLatestSequenceNumber ( ) + 1 ;
BlobInserter blob_inserter ( options , this , default_cf_id , current_seq ) ;
Status s = updates - > Iterate ( & blob_inserter ) ;
if ( ! s . ok ( ) ) {
return s ;
}
s = db_ - > Write ( opts , blob_inserter . updates_blob ( ) ) ;
s = db_ - > Write ( option s , blob_inserter . batch ( ) ) ;
if ( ! s . ok ( ) ) {
return s ;
}
assert ( current_seq = =
WriteBatchInternal : : Sequence ( blob_inserter . updates_blob ( ) ) ) ;
assert ( blob_inserter . sequence ( ) = =
current_seq + WriteBatchInternal : : Count ( blob_inserter . updates_blob ( ) ) ) ;
if ( blob_inserter . has_put ( ) ) {
s = CloseBlobFileIfNeeded ( blob_inserter . last_file ( ) ) ;
if ( ! s . ok ( ) ) {
return s ;
}
}
assert ( blob_inserter . sequence ( ) = = GetLatestSequenceNumber ( ) + 1 ) ;
// add deleted key to list of keys that have been deleted for book-keeping
class DeleteBookkeeper : public WriteBatch : : Handler {
@ -956,83 +820,106 @@ void BlobDBImpl::GetLiveFilesMetaData(std::vector<LiveFileMetaData>* metadata) {
}
}
Status BlobDBImpl : : Put ( const WriteOptions & options , const Slice & key ,
const Slice & value ) {
std : : string new_value ;
Slice value_slice ;
uint64_t expiration = ExtractExpiration ( key , value , & value_slice , & new_value ) ;
return PutUntil ( options , key , value_slice , expiration ) ;
}
Status BlobDBImpl : : PutWithTTL ( const WriteOptions & options ,
const Slice & key , const Slice & value ,
uint64_t ttl ) {
uint64_t now = EpochNow ( ) ;
assert ( std : : numeric_limits < uint64_t > : : max ( ) - now > ttl ) ;
return PutUntil ( options , key , value , now + ttl ) ;
}
Slice BlobDBImpl : : GetCompressedSlice ( const Slice & raw ,
std : : string * compression_output ) const {
if ( bdb_options_ . compression = = kNoCompression ) {
return raw ;
}
CompressionType ct = bdb_options_ . compression ;
CompressionOptions compression_opts ;
CompressBlock ( raw , compression_opts , & ct , kBlockBasedTableVersionFormat ,
Slice ( ) , compression_output ) ;
return * compression_output ;
uint64_t expiration = kNoExpiration - now > ttl ? now + ttl : kNoExpiration ;
return PutUntil ( options , key , value , expiration ) ;
}
Status BlobDBImpl : : PutUntil ( const WriteOptions & options , const Slice & key ,
const Slice & value_unc , uint64_t expiration ) {
TEST_SYNC_POINT ( " BlobDBImpl::PutUntil:Start " ) ;
const Slice & value , uint64_t expiration ) {
MutexLock l ( & write_mutex_ ) ;
UpdateWriteOptions ( options ) ;
std : : shared_ptr < BlobFile > bfile = ( expiration ! = kNoExpiration )
? SelectBlobFileTTL ( expiration )
: SelectBlobFile ( ) ;
if ( ! bfile ) return Status : : NotFound ( " Blob file not found " ) ;
std : : string compression_output ;
Slice value = GetCompressedSlice ( value_unc , & compression_output ) ;
std : : string headerbuf ;
Writer : : ConstructBlobHeader ( & headerbuf , key , value , expiration , - 1 ) ;
std : : string index_entry ;
Status s = AppendBlob ( bfile , headerbuf , key , value , & index_entry ) ;
if ( ! s . ok ( ) ) {
ROCKS_LOG_ERROR ( db_options_ . info_log ,
" Failed to append blob to FILE: %s: KEY: %s VALSZ: %d "
" status: '%s' blob_file: '%s' " ,
bfile - > PathName ( ) . c_str ( ) , key . ToString ( ) . c_str ( ) ,
value . size ( ) , s . ToString ( ) . c_str ( ) ,
bfile - > DumpState ( ) . c_str ( ) ) ;
return s ;
SequenceNumber sequence = GetLatestSequenceNumber ( ) + 1 ;
WriteBatch batch ;
Status s = PutBlobValue ( options , key , value , expiration , sequence , & batch ) ;
if ( s . ok ( ) ) {
s = db_ - > Write ( options , & batch ) ;
}
return s ;
}
WriteBatch batch ;
Status BlobDBImpl : : PutBlobValue ( const WriteOptions & options , const Slice & key ,
const Slice & value , uint64_t expiration ,
SequenceNumber sequence , WriteBatch * batch ) {
TEST_SYNC_POINT ( " BlobDBImpl::PutBlobValue:Start " ) ;
Status s ;
std : : string index_entry ;
uint32_t column_family_id =
reinterpret_cast < ColumnFamilyHandleImpl * > ( DefaultColumnFamily ( ) ) - > GetID ( ) ;
s = WriteBatchInternal : : PutBlobIndex ( & batch , column_family_id , key ,
index_entry ) ;
if ( value . size ( ) < bdb_options_ . min_blob_size ) {
if ( expiration = = kNoExpiration ) {
// Put as normal value
s = batch - > Put ( key , value ) ;
} else {
// Inlined with TTL
BlobIndex : : EncodeInlinedTTL ( & index_entry , expiration , value ) ;
s = WriteBatchInternal : : PutBlobIndex ( batch , column_family_id , key ,
index_entry ) ;
}
} else {
std : : shared_ptr < BlobFile > bfile = ( expiration ! = kNoExpiration )
? SelectBlobFileTTL ( expiration )
: SelectBlobFile ( ) ;
if ( ! bfile ) {
return Status : : NotFound ( " Blob file not found " ) ;
}
// this goes to the base db and can be expensive
if ( s . ok ( ) ) {
s = db_ - > Write ( options , & batch ) ;
}
std : : string compression_output ;
Slice value_compressed = GetCompressedSlice ( value , & compression_output ) ;
if ( s . ok ( ) ) {
// this is the sequence number of the write.
SequenceNumber sn = WriteBatchInternal : : Sequence ( & batch ) ;
bfile - > ExtendSequenceRange ( sn ) ;
std : : string headerbuf ;
Writer : : ConstructBlobHeader ( & headerbuf , key , value_compressed , expiration ,
- 1 ) ;
if ( expiration ! = kNoExpiration ) {
extendTTL ( & ( bfile - > ttl_range_ ) , expiration ) ;
}
s = AppendBlob ( bfile , headerbuf , key , value_compressed , expiration ,
& index_entry ) ;
s = CloseBlobFileIfNeeded ( bfile ) ;
if ( s . ok ( ) ) {
bfile - > ExtendSequenceRange ( sequence ) ;
if ( expiration ! = kNoExpiration ) {
extendTTL ( & ( bfile - > ttl_range_ ) , expiration ) ;
}
s = CloseBlobFileIfNeeded ( bfile ) ;
if ( s . ok ( ) ) {
s = WriteBatchInternal : : PutBlobIndex ( batch , column_family_id , key ,
index_entry ) ;
}
} else {
ROCKS_LOG_ERROR ( db_options_ . info_log ,
" Failed to append blob to FILE: %s: KEY: %s VALSZ: %d "
" status: '%s' blob_file: '%s' " ,
bfile - > PathName ( ) . c_str ( ) , key . ToString ( ) . c_str ( ) ,
value . size ( ) , s . ToString ( ) . c_str ( ) ,
bfile - > DumpState ( ) . c_str ( ) ) ;
}
}
TEST_SYNC_POINT ( " BlobDBImpl::PutUntil:Finish " ) ;
TEST_SYNC_POINT ( " BlobDBImpl::PutBlobValue :Finish " ) ;
return s ;
}
Slice BlobDBImpl : : GetCompressedSlice ( const Slice & raw ,
std : : string * compression_output ) const {
if ( bdb_options_ . compression = = kNoCompression ) {
return raw ;
}
CompressionType ct = bdb_options_ . compression ;
CompressionOptions compression_opts ;
CompressBlock ( raw , compression_opts , & ct , kBlockBasedTableVersionFormat ,
Slice ( ) , compression_output ) ;
return * compression_output ;
}
uint64_t BlobDBImpl : : ExtractExpiration ( const Slice & key , const Slice & value ,
Slice * value_slice ,
std : : string * new_value ) {
@ -1049,7 +936,8 @@ uint64_t BlobDBImpl::ExtractExpiration(const Slice& key, const Slice& value,
Status BlobDBImpl : : AppendBlob ( const std : : shared_ptr < BlobFile > & bfile ,
const std : : string & headerbuf , const Slice & key ,
const Slice & value , std : : string * index_entry ) {
const Slice & value , uint64_t expiration ,
std : : string * index_entry ) {
auto size_put = BlobLogRecord : : kHeaderSize + key . size ( ) + value . size ( ) ;
if ( bdb_options_ . blob_dir_size > 0 & &
( total_blob_space_ . load ( ) + size_put ) > bdb_options_ . blob_dir_size ) {
@ -1086,18 +974,14 @@ Status BlobDBImpl::AppendBlob(const std::shared_ptr<BlobFile>& bfile,
last_period_write_ + = size_put ;
total_blob_space_ + = size_put ;
BlobHandle handle ;
handle . set_filenumber ( bfile - > BlobFileNumber ( ) ) ;
handle . set_size ( value . size ( ) ) ;
handle . set_offset ( blob_offset ) ;
handle . set_compression ( bdb_options_ . compression ) ;
handle . EncodeTo ( index_entry ) ;
if ( debug_level_ > = 3 )
ROCKS_LOG_INFO ( db_options_ . info_log ,
" >Adding KEY FILE: %s: BC: %d OFFSET: %d SZ: %d " ,
bfile - > PathName ( ) . c_str ( ) , bfile - > blob_count_ . load ( ) ,
blob_offset , value . size ( ) ) ;
if ( expiration = = kNoExpiration ) {
BlobIndex : : EncodeBlob ( index_entry , bfile - > BlobFileNumber ( ) , blob_offset ,
value . size ( ) , bdb_options_ . compression ) ;
} else {
BlobIndex : : EncodeBlobTTL ( index_entry , expiration , bfile - > BlobFileNumber ( ) ,
blob_offset , value . size ( ) ,
bdb_options_ . compression ) ;
}
return s ;
}
@ -1138,29 +1022,45 @@ bool BlobDBImpl::SetSnapshotIfNeeded(ReadOptions* read_options) {
Status BlobDBImpl : : GetBlobValue ( const Slice & key , const Slice & index_entry ,
PinnableSlice * value ) {
assert ( value ! = nullptr ) ;
BlobHandle handle ;
Status s = handle . DecodeFrom ( index_entry ) ;
if ( ! s . ok ( ) ) return s ;
BlobIndex blob_index ;
Status s = blob_index . DecodeFrom ( index_entry ) ;
if ( ! s . ok ( ) ) {
return s ;
}
if ( blob_index . HasTTL ( ) & & blob_index . expiration ( ) < = EpochNow ( ) ) {
return Status : : NotFound ( " Key expired " ) ;
}
if ( blob_index . IsInlined ( ) ) {
// TODO(yiwu): If index_entry is a PinnableSlice, we can also pin the same
// memory buffer to avoid extra copy.
value - > PinSelf ( blob_index . value ( ) ) ;
return Status : : OK ( ) ;
}
if ( blob_index . size ( ) = = 0 ) {
value - > PinSelf ( " " ) ;
return Status : : OK ( ) ;
}
// offset has to have certain min, as we will read CRC
// later from the Blob Header, which needs to be also a
// valid offset.
if ( handle . offset ( ) <
if ( blob_index . offset ( ) <
( BlobLogHeader : : kHeaderSize + BlobLogRecord : : kHeaderSize + key . size ( ) ) ) {
if ( debug_level_ > = 2 ) {
ROCKS_LOG_ERROR (
db_options_ . info_log ,
" Invalid blob handle file_number: % " PRIu64 " blob_offset: % " PRIu64
" blob_size: % " PRIu64 " key: %s " ,
handle . filenumber ( ) , handle . offset ( ) , handle . size ( ) , key . data ( ) ) ;
ROCKS_LOG_ERROR ( db_options_ . info_log ,
" Invalid blob index file_number: % " PRIu64
" blob_offset: % " PRIu64 " blob_size: % " PRIu64
" key: %s " ,
blob_index . file_number ( ) , blob_index . offset ( ) ,
blob_index . size ( ) , key . data ( ) ) ;
}
return Status : : NotFound ( " Blob Not Found, although found in LSM " ) ;
return Status : : NotFound ( " Invalid blob offset " ) ;
}
std : : shared_ptr < BlobFile > bfile ;
{
ReadLock rl ( & mutex_ ) ;
auto hitr = blob_files_ . find ( handle . filenumber ( ) ) ;
auto hitr = blob_files_ . find ( blob_index . file_ number ( ) ) ;
// file was deleted
if ( hitr = = blob_files_ . end ( ) ) {
@ -1170,7 +1070,7 @@ Status BlobDBImpl::GetBlobValue(const Slice& key, const Slice& index_entry,
bfile = hitr - > second ;
}
if ( handle . size ( ) = = 0 & & value ! = nullptr ) {
if ( blob_index . size ( ) = = 0 & & value ! = nullptr ) {
value - > PinSelf ( " " ) ;
return Status : : OK ( ) ;
}
@ -1186,19 +1086,19 @@ Status BlobDBImpl::GetBlobValue(const Slice& key, const Slice& index_entry,
}
// allocate the buffer. This is safe in C++11
valueptr - > resize ( handle . size ( ) ) ;
valueptr - > resize ( blob_index . size ( ) ) ;
char * buffer = & ( * valueptr ) [ 0 ] ;
Slice blob_value ;
s = reader - > Read ( handle . offset ( ) , handle . size ( ) , & blob_value , buffer ) ;
if ( ! s . ok ( ) | | blob_value . size ( ) ! = handle . size ( ) ) {
s = reader - > Read ( blob_index . offset ( ) , blob_index . size ( ) , & blob_value , buffer ) ;
if ( ! s . ok ( ) | | blob_value . size ( ) ! = blob_index . size ( ) ) {
if ( debug_level_ > = 2 ) {
ROCKS_LOG_ERROR ( db_options_ . info_log ,
" Failed to read blob from file: %s blob_offset: % " PRIu64
" blob_size: % " PRIu64 " read: %d key: %s status: '%s' " ,
bfile - > PathName ( ) . c_str ( ) , handle . offset ( ) , handle . size ( ) ,
static_cast < int > ( blob_value . size ( ) ) , key . data ( ) ,
s . ToString ( ) . c_str ( ) ) ;
bfile - > PathName ( ) . c_str ( ) , blob_index . offset ( ) ,
blob_index . size ( ) , static_cast < int > ( blob_value . size ( ) ) ,
key . data ( ) , s . ToString ( ) . c_str ( ) ) ;
}
return Status : : NotFound ( " Blob Not Found as couldnt retrieve Blob " ) ;
}
@ -1208,15 +1108,15 @@ Status BlobDBImpl::GetBlobValue(const Slice& key, const Slice& index_entry,
std : : string crc_str ;
crc_str . resize ( sizeof ( uint32_t ) ) ;
char * crc_buffer = & ( crc_str [ 0 ] ) ;
s = reader - > Read ( handle . offset ( ) - ( key . size ( ) + sizeof ( uint32_t ) ) ,
s = reader - > Read ( blob_index . offset ( ) - ( key . size ( ) + sizeof ( uint32_t ) ) ,
sizeof ( uint32_t ) , & crc_slice , crc_buffer ) ;
if ( ! s . ok ( ) | | ! GetFixed32 ( & crc_slice , & crc_exp ) ) {
if ( debug_level_ > = 2 ) {
ROCKS_LOG_ERROR ( db_options_ . info_log ,
" Failed to fetch blob crc file: %s blob_offset: % " PRIu64
" blob_size: % " PRIu64 " key: %s status: '%s' " ,
bfile - > PathName ( ) . c_str ( ) , handle . offset ( ) , handle . size ( ) ,
key . data ( ) , s . ToString ( ) . c_str ( ) ) ;
bfile - > PathName ( ) . c_str ( ) , blob_index . offset ( ) ,
blob_index . size ( ) , key . data ( ) , s . ToString ( ) . c_str ( ) ) ;
}
return Status : : NotFound ( " Blob Not Found as couldnt retrieve CRC " ) ;
}
@ -1228,8 +1128,8 @@ Status BlobDBImpl::GetBlobValue(const Slice& key, const Slice& index_entry,
ROCKS_LOG_ERROR ( db_options_ . info_log ,
" Blob crc mismatch file: %s blob_offset: % " PRIu64
" blob_size: % " PRIu64 " key: %s status: '%s' " ,
bfile - > PathName ( ) . c_str ( ) , handle . offset ( ) , handle . size ( ) ,
key . data ( ) , s . ToString ( ) . c_str ( ) ) ;
bfile - > PathName ( ) . c_str ( ) , blob_index . offset ( ) ,
blob_index . size ( ) , key . data ( ) , s . ToString ( ) . c_str ( ) ) ;
}
return Status : : Corruption ( " Corruption. Blob CRC mismatch " ) ;
}
@ -1358,8 +1258,9 @@ bool BlobDBImpl::FileDeleteOk_SnapshotCheckLocked(
SequenceNumber esn = bfile - > GetSNRange ( ) . first ;
// this is not correct.
// you want to check that there are no snapshots in the
// TODO(yiwu): Here we should check instead if there is an active snapshot
// lies between the first sequence in the file, and the last sequence by
// the time the file finished being garbage collect.
bool notok = db_impl_ - > HasActiveSnapshotLaterThanSN ( esn ) ;
if ( notok ) {
ROCKS_LOG_INFO ( db_options_ . info_log ,
@ -1399,16 +1300,16 @@ bool BlobDBImpl::FindFileAndEvictABlob(uint64_t file_number, uint64_t key_size,
}
bool BlobDBImpl : : MarkBlobDeleted ( const Slice & key , const Slice & index_entry ) {
BlobHandle handle ;
Status s = handle . DecodeFrom ( index_entry ) ;
BlobIndex blob_index ;
Status s = blob_index . DecodeFrom ( index_entry ) ;
if ( ! s . ok ( ) ) {
ROCKS_LOG_INFO ( db_options_ . info_log ,
" Could not parse lsm val in MarkBlobDeleted %s " ,
index_entry . ToString ( ) . c_str ( ) ) ;
return false ;
}
bool succ = FindFileAndEvictABlob ( handle . filenumber ( ) , key . size ( ) ,
handle . offset ( ) , handle . size ( ) ) ;
bool succ = FindFileAndEvictABlob ( blob_index . file_ number ( ) , key . size ( ) ,
blob_index . offset ( ) , blob_index . size ( ) ) ;
return succ ;
}
@ -1757,16 +1658,16 @@ Status BlobDBImpl::GCFileAndUpdateLSM(const std::shared_ptr<BlobFile>& bfptr,
continue ;
}
BlobHandle handle ;
s = handle . DecodeFrom ( index_entry ) ;
BlobIndex blob_index ;
s = blob_index . DecodeFrom ( index_entry ) ;
if ( ! s . ok ( ) ) {
ROCKS_LOG_ERROR ( db_options_ . info_log ,
" Error while decoding index entry: %s " ,
s . ToString ( ) . c_str ( ) ) ;
break ;
}
if ( handle . filenumber ( ) ! = bfptr - > BlobFileNumber ( ) | |
handle . offset ( ) ! = blob_offset ) {
if ( blob_index . file_ number ( ) ! = bfptr - > BlobFileNumber ( ) | |
blob_index . offset ( ) ! = blob_offset ) {
// Key has been overwritten. Drop the blob record.
continue ;
}
@ -1843,12 +1744,9 @@ Status BlobDBImpl::GCFileAndUpdateLSM(const std::shared_ptr<BlobFile>& bfptr,
s = new_writer - > AddRecord ( record . Key ( ) , record . Blob ( ) , & new_key_offset ,
& new_blob_offset , record . GetTTL ( ) ) ;
BlobHandle new_handle ;
new_handle . set_filenumber ( newfile - > BlobFileNumber ( ) ) ;
new_handle . set_size ( record . Blob ( ) . size ( ) ) ;
new_handle . set_offset ( new_blob_offset ) ;
new_handle . set_compression ( bdb_options_ . compression ) ;
new_handle . EncodeTo ( & new_index_entry ) ;
BlobIndex : : EncodeBlob ( & new_index_entry , newfile - > BlobFileNumber ( ) ,
new_blob_offset , record . Blob ( ) . size ( ) ,
bdb_options_ . compression ) ;
newfile - > blob_count_ + + ;
newfile - > file_size_ + =
@ -2269,6 +2167,11 @@ Status DestroyBlobDB(const std::string& dbname, const Options& options,
}
# ifndef NDEBUG
Status BlobDBImpl : : TEST_GetBlobValue ( const Slice & key , const Slice & index_entry ,
PinnableSlice * value ) {
return GetBlobValue ( key , index_entry , value ) ;
}
std : : vector < std : : shared_ptr < BlobFile > > BlobDBImpl : : TEST_GetBlobFiles ( ) const {
ReadLock l ( & mutex_ ) ;
std : : vector < std : : shared_ptr < BlobFile > > blob_files ;