@ -46,6 +46,22 @@
namespace ROCKSDB_NAMESPACE {
namespace ROCKSDB_NAMESPACE {
namespace {
inline uint32_t ChecksumHexToInt32 ( const std : : string & checksum_hex ) {
std : : string checksum_str ;
Slice ( checksum_hex ) . DecodeHex ( & checksum_str ) ;
return EndianSwapValue ( DecodeFixed32 ( checksum_str . c_str ( ) ) ) ;
}
inline std : : string ChecksumStrToHex ( const std : : string & checksum_str ) {
return Slice ( checksum_str ) . ToString ( true ) ;
}
inline std : : string ChecksumInt32ToHex ( const uint32_t & checksum_value ) {
std : : string checksum_str ;
PutFixed32 ( & checksum_str , EndianSwapValue ( checksum_value ) ) ;
return ChecksumStrToHex ( checksum_str ) ;
}
} // namespace
void BackupStatistics : : IncrementNumberSuccessBackup ( ) {
void BackupStatistics : : IncrementNumberSuccessBackup ( ) {
number_success_backup + + ;
number_success_backup + + ;
}
}
@ -149,12 +165,12 @@ class BackupEngineImpl : public BackupEngine {
std : : unordered_map < std : : string , uint64_t > * result ) ;
std : : unordered_map < std : : string , uint64_t > * result ) ;
struct FileInfo {
struct FileInfo {
FileInfo ( const std : : string & fname , uint64_t sz , uint32_t checksum ,
FileInfo ( const std : : string & fname , uint64_t sz , const std : : string & checksum ,
const std : : string & id = " " , const std : : string & sid = " " )
const std : : string & id = " " , const std : : string & sid = " " )
: refs ( 0 ) ,
: refs ( 0 ) ,
filename ( fname ) ,
filename ( fname ) ,
size ( sz ) ,
size ( sz ) ,
checksum_value ( checksum ) ,
checksum_hex ( checksum ) ,
db_id ( id ) ,
db_id ( id ) ,
db_session_id ( sid ) { }
db_session_id ( sid ) { }
@ -164,11 +180,13 @@ class BackupEngineImpl : public BackupEngine {
int refs ;
int refs ;
const std : : string filename ;
const std : : string filename ;
const uint64_t size ;
const uint64_t size ;
const uint32_t checksum_value ;
const std : : string checksum_hex ;
// DB identities
// DB identities
// db_id is obtained for potential usage in the future but not used
// db_id is obtained for potential usage in the future but not used
// currently; db_session_id appears in the backup SST filename
// currently
const std : : string db_id ;
const std : : string db_id ;
// db_session_id appears in the backup SST filename if the table naming
// option is kOptionalChecksumAndDbSessionId
const std : : string db_session_id ;
const std : : string db_session_id ;
} ;
} ;
@ -279,6 +297,7 @@ class BackupEngineImpl : public BackupEngine {
inline std : : string GetPrivateDirRel ( ) const {
inline std : : string GetPrivateDirRel ( ) const {
return " private " ;
return " private " ;
}
}
inline std : : string GetSharedDirRel ( ) const { return " shared " ; }
inline std : : string GetSharedChecksumDirRel ( ) const {
inline std : : string GetSharedChecksumDirRel ( ) const {
return " shared_checksum " ;
return " shared_checksum " ;
}
}
@ -292,7 +311,7 @@ class BackupEngineImpl : public BackupEngine {
inline std : : string GetSharedFileRel ( const std : : string & file = " " ,
inline std : : string GetSharedFileRel ( const std : : string & file = " " ,
bool tmp = false ) const {
bool tmp = false ) const {
assert ( file . size ( ) = = 0 | | file [ 0 ] ! = ' / ' ) ;
assert ( file . size ( ) = = 0 | | file [ 0 ] ! = ' / ' ) ;
return std : : string ( " shared /" ) + ( tmp ? " . " : " " ) + file +
return GetSharedDirRel ( ) + " / " + ( tmp ? " . " : " " ) + file +
( tmp ? " .tmp " : " " ) ;
( tmp ? " .tmp " : " " ) ;
}
}
inline std : : string GetSharedFileWithChecksumRel ( const std : : string & file = " " ,
inline std : : string GetSharedFileWithChecksumRel ( const std : : string & file = " " ,
@ -301,22 +320,29 @@ class BackupEngineImpl : public BackupEngine {
return GetSharedChecksumDirRel ( ) + " / " + ( tmp ? " . " : " " ) + file +
return GetSharedChecksumDirRel ( ) + " / " + ( tmp ? " . " : " " ) + file +
( tmp ? " .tmp " : " " ) ;
( tmp ? " .tmp " : " " ) ;
}
}
// If kChecksumAndDbSessionId is the naming option and db_session_id is not
inline bool UseSessionId ( const std : : string & sid ) const {
// empty, backup SST filenames consist of file_number, crc32c, db_session_id.
return GetTableNamingOption ( ) = = kOptionalChecksumAndDbSessionId & &
// Otherwise, backup SST filenames consist of file_number, crc32c, file_size.
! sid . empty ( ) ;
}
inline std : : string GetSharedFileWithChecksum (
inline std : : string GetSharedFileWithChecksum (
const std : : string & file , const uint32_t checksum_value ,
const std : : string & file , bool has_checksum ,
const uint64_t file_size , const std : : string & db_session_id ) const {
const std : : string & checksum_hex , const uint64_t file_size ,
const std : : string & db_session_id ) const {
assert ( file . size ( ) = = 0 | | file [ 0 ] ! = ' / ' ) ;
assert ( file . size ( ) = = 0 | | file [ 0 ] ! = ' / ' ) ;
std : : string file_copy = file ;
std : : string file_copy = file ;
const std : : string suffix =
if ( UseSessionId ( db_session_id ) ) {
GetTableNamingOption ( ) = = kChecksumAndDbSessionId & &
if ( has_checksum ) {
! db_session_id . empty ( )
return file_copy . insert ( file_copy . find_last_of ( ' . ' ) ,
? db_session_id
" _ " + checksum_hex + " _ " + db_session_id ) ;
: ROCKSDB_NAMESPACE : : ToString ( file_size ) ;
} else {
return file_copy . insert (
return file_copy . insert ( file_copy . find_last_of ( ' . ' ) ,
file_copy . find_last_of ( ' . ' ) ,
" _ " + db_session_id ) ;
" _ " + ROCKSDB_NAMESPACE : : ToString ( checksum_value ) + " _ " + suffix ) ;
}
} else {
return file_copy . insert ( file_copy . find_last_of ( ' . ' ) ,
" _ " + ToString ( ChecksumHexToInt32 ( checksum_hex ) ) +
" _ " + ToString ( file_size ) ) ;
}
}
}
inline std : : string GetFileFromChecksumFile ( const std : : string & file ) const {
inline std : : string GetFileFromChecksumFile ( const std : : string & file ) const {
assert ( file . size ( ) = = 0 | | file [ 0 ] ! = ' / ' ) ;
assert ( file . size ( ) = = 0 | | file [ 0 ] ! = ' / ' ) ;
@ -332,17 +358,6 @@ class BackupEngineImpl : public BackupEngine {
return GetBackupMetaDir ( ) + " / " + ( tmp ? " . " : " " ) +
return GetBackupMetaDir ( ) + " / " + ( tmp ? " . " : " " ) +
ROCKSDB_NAMESPACE : : ToString ( backup_id ) + ( tmp ? " .tmp " : " " ) ;
ROCKSDB_NAMESPACE : : ToString ( backup_id ) + ( tmp ? " .tmp " : " " ) ;
}
}
inline bool IsSstFile ( const std : : string & fname ) const {
return fname . length ( ) > 4 & & fname . rfind ( " .sst " ) = = fname . length ( ) - 4 ;
}
inline std : : string ChecksumInt32ToStr ( const uint32_t & checksum_int ) {
std : : string checksum_str ;
PutFixed32 ( & checksum_str , EndianSwapValue ( checksum_int ) ) ;
return checksum_str ;
}
inline uint32_t ChecksumStrToInt32 ( const std : : string & checksum_str ) {
return EndianSwapValue ( DecodeFixed32 ( checksum_str . c_str ( ) ) ) ;
}
// If size_limit == 0, there is no size limit, copy everything.
// If size_limit == 0, there is no size limit, copy everything.
//
//
@ -354,14 +369,13 @@ class BackupEngineImpl : public BackupEngine {
const std : : string & src , const std : : string & dst ,
const std : : string & src , const std : : string & dst ,
const std : : string & contents , Env * src_env , Env * dst_env ,
const std : : string & contents , Env * src_env , Env * dst_env ,
const EnvOptions & src_env_options , bool sync , RateLimiter * rate_limiter ,
const EnvOptions & src_env_options , bool sync , RateLimiter * rate_limiter ,
uint64_t * size = nullptr , uint32_t * checksum_value = nullptr ,
uint64_t * size = nullptr , std : : string * checksum_hex = nullptr ,
uint64_t size_limit = 0 ,
uint64_t size_limit = 0 ,
std : : function < void ( ) > progress_callback = [ ] ( ) { } ,
std : : function < void ( ) > progress_callback = [ ] ( ) { } ) ;
std : : string * db_id = nullptr , std : : string * db_session_id = nullptr ) ;
Status CalculateChecksum ( const std : : string & src , Env * src_env ,
Status CalculateChecksum ( const std : : string & src , Env * src_env ,
const EnvOptions & src_env_options ,
const EnvOptions & src_env_options ,
uint64_t size_limit , uint32_t * checksum_value ) ;
uint64_t size_limit , std : : string * checksum_hex ) ;
// Obtain db_id and db_session_id from the table properties of file_path
// Obtain db_id and db_session_id from the table properties of file_path
Status GetFileDbIdentities ( Env * src_env , const EnvOptions & src_env_options ,
Status GetFileDbIdentities ( Env * src_env , const EnvOptions & src_env_options ,
@ -370,7 +384,7 @@ class BackupEngineImpl : public BackupEngine {
struct CopyOrCreateResult {
struct CopyOrCreateResult {
uint64_t size ;
uint64_t size ;
uint32_t checksum_value ;
std : : string checksum_hex ;
std : : string db_id ;
std : : string db_id ;
std : : string db_session_id ;
std : : string db_session_id ;
Status status ;
Status status ;
@ -393,7 +407,9 @@ class BackupEngineImpl : public BackupEngine {
std : : function < void ( ) > progress_callback ;
std : : function < void ( ) > progress_callback ;
bool verify_checksum_after_work ;
bool verify_checksum_after_work ;
std : : string src_checksum_func_name ;
std : : string src_checksum_func_name ;
std : : string src_checksum_str ;
std : : string src_checksum_hex ;
std : : string db_id ;
std : : string db_session_id ;
CopyOrCreateWorkItem ( )
CopyOrCreateWorkItem ( )
: src_path ( " " ) ,
: src_path ( " " ) ,
@ -407,7 +423,9 @@ class BackupEngineImpl : public BackupEngine {
size_limit ( 0 ) ,
size_limit ( 0 ) ,
verify_checksum_after_work ( false ) ,
verify_checksum_after_work ( false ) ,
src_checksum_func_name ( kUnknownFileChecksumFuncName ) ,
src_checksum_func_name ( kUnknownFileChecksumFuncName ) ,
src_checksum_str ( kUnknownFileChecksum ) { }
src_checksum_hex ( " " ) ,
db_id ( " " ) ,
db_session_id ( " " ) { }
CopyOrCreateWorkItem ( const CopyOrCreateWorkItem & ) = delete ;
CopyOrCreateWorkItem ( const CopyOrCreateWorkItem & ) = delete ;
CopyOrCreateWorkItem & operator = ( const CopyOrCreateWorkItem & ) = delete ;
CopyOrCreateWorkItem & operator = ( const CopyOrCreateWorkItem & ) = delete ;
@ -429,8 +447,10 @@ class BackupEngineImpl : public BackupEngine {
result = std : : move ( o . result ) ;
result = std : : move ( o . result ) ;
progress_callback = std : : move ( o . progress_callback ) ;
progress_callback = std : : move ( o . progress_callback ) ;
verify_checksum_after_work = o . verify_checksum_after_work ;
verify_checksum_after_work = o . verify_checksum_after_work ;
src_checksum_func_name = o . src_checksum_func_name ;
src_checksum_func_name = std : : move ( o . src_checksum_func_name ) ;
src_checksum_str = o . src_checksum_str ;
src_checksum_hex = std : : move ( o . src_checksum_hex ) ;
db_id = std : : move ( o . db_id ) ;
db_session_id = std : : move ( o . db_session_id ) ;
return * this ;
return * this ;
}
}
@ -439,10 +459,11 @@ class BackupEngineImpl : public BackupEngine {
Env * _src_env , Env * _dst_env , EnvOptions _src_env_options , bool _sync ,
Env * _src_env , Env * _dst_env , EnvOptions _src_env_options , bool _sync ,
RateLimiter * _rate_limiter , uint64_t _size_limit ,
RateLimiter * _rate_limiter , uint64_t _size_limit ,
std : : function < void ( ) > _progress_callback = [ ] ( ) { } ,
std : : function < void ( ) > _progress_callback = [ ] ( ) { } ,
const bool & _verify_checksum_after_work = false ,
bool _verify_checksum_after_work = false ,
const std : : string & _src_checksum_func_name =
const std : : string & _src_checksum_func_name =
kUnknownFileChecksumFuncName ,
kUnknownFileChecksumFuncName ,
const std : : string & _src_checksum_str = kUnknownFileChecksum )
const std : : string & _src_checksum_hex = " " ,
const std : : string & _db_id = " " , const std : : string & _db_session_id = " " )
: src_path ( std : : move ( _src_path ) ) ,
: src_path ( std : : move ( _src_path ) ) ,
dst_path ( std : : move ( _dst_path ) ) ,
dst_path ( std : : move ( _dst_path ) ) ,
contents ( std : : move ( _contents ) ) ,
contents ( std : : move ( _contents ) ) ,
@ -455,7 +476,9 @@ class BackupEngineImpl : public BackupEngine {
progress_callback ( _progress_callback ) ,
progress_callback ( _progress_callback ) ,
verify_checksum_after_work ( _verify_checksum_after_work ) ,
verify_checksum_after_work ( _verify_checksum_after_work ) ,
src_checksum_func_name ( _src_checksum_func_name ) ,
src_checksum_func_name ( _src_checksum_func_name ) ,
src_checksum_str ( _src_checksum_str ) { }
src_checksum_hex ( _src_checksum_hex ) ,
db_id ( _db_id ) ,
db_session_id ( _db_session_id ) { }
} ;
} ;
struct BackupAfterCopyOrCreateWorkItem {
struct BackupAfterCopyOrCreateWorkItem {
@ -507,12 +530,11 @@ class BackupEngineImpl : public BackupEngine {
struct RestoreAfterCopyOrCreateWorkItem {
struct RestoreAfterCopyOrCreateWorkItem {
std : : future < CopyOrCreateResult > result ;
std : : future < CopyOrCreateResult > result ;
uint32_t checksum_value ;
std : : string checksum_hex ;
RestoreAfterCopyOrCreateWorkItem ( )
RestoreAfterCopyOrCreateWorkItem ( ) : checksum_hex ( " " ) { }
: checksum_value ( 0 ) { }
RestoreAfterCopyOrCreateWorkItem ( std : : future < CopyOrCreateResult > & & _result ,
RestoreAfterCopyOrCreateWorkItem ( std : : future < CopyOrCreateResult > & & _result ,
uint32_t _checksum_value )
const std : : string & _checksum_hex )
: result ( std : : move ( _result ) ) , checksum_value ( _checksum_value ) { }
: result ( std : : move ( _result ) ) , checksum_hex ( _checksum_hex ) { }
RestoreAfterCopyOrCreateWorkItem ( RestoreAfterCopyOrCreateWorkItem & & o )
RestoreAfterCopyOrCreateWorkItem ( RestoreAfterCopyOrCreateWorkItem & & o )
ROCKSDB_NOEXCEPT {
ROCKSDB_NOEXCEPT {
* this = std : : move ( o ) ;
* this = std : : move ( o ) ;
@ -521,7 +543,7 @@ class BackupEngineImpl : public BackupEngine {
RestoreAfterCopyOrCreateWorkItem & operator = (
RestoreAfterCopyOrCreateWorkItem & operator = (
RestoreAfterCopyOrCreateWorkItem & & o ) ROCKSDB_NOEXCEPT {
RestoreAfterCopyOrCreateWorkItem & & o ) ROCKSDB_NOEXCEPT {
result = std : : move ( o . result ) ;
result = std : : move ( o . result ) ;
checksum_value = o . checksum_value ;
checksum_hex = std : : move ( o . checksum_hex ) ;
return * this ;
return * this ;
}
}
} ;
} ;
@ -837,8 +859,10 @@ Status BackupEngineImpl::Initialize() {
work_item . src_path , work_item . dst_path , work_item . contents ,
work_item . src_path , work_item . dst_path , work_item . contents ,
work_item . src_env , work_item . dst_env , work_item . src_env_options ,
work_item . src_env , work_item . dst_env , work_item . src_env_options ,
work_item . sync , work_item . rate_limiter , & result . size ,
work_item . sync , work_item . rate_limiter , & result . size ,
& result . checksum_value , work_item . size_limit ,
& result . checksum_hex , work_item . size_limit ,
work_item . progress_callback , & result . db_id , & result . db_session_id ) ;
work_item . progress_callback ) ;
result . db_id = work_item . db_id ;
result . db_session_id = work_item . db_session_id ;
if ( result . status . ok ( ) & & work_item . verify_checksum_after_work ) {
if ( result . status . ok ( ) & & work_item . verify_checksum_after_work ) {
// unknown checksum function name implies no db table file checksum in
// unknown checksum function name implies no db table file checksum in
// db manifest; work_item.verify_checksum_after_work being true means
// db manifest; work_item.verify_checksum_after_work being true means
@ -847,13 +871,10 @@ Status BackupEngineImpl::Initialize() {
if ( work_item . src_checksum_func_name = =
if ( work_item . src_checksum_func_name = =
kUnknownFileChecksumFuncName | |
kUnknownFileChecksumFuncName | |
work_item . src_checksum_func_name = = kDbFileChecksumFuncName ) {
work_item . src_checksum_func_name = = kDbFileChecksumFuncName ) {
uint32_t src_checksum_int =
if ( work_item . src_checksum_hex ! = result . checksum_hex ) {
ChecksumStrToInt32 ( work_item . src_checksum_str ) ;
std : : string checksum_info (
if ( src_checksum_int ! = result . checksum_value ) {
" Expected checksum is " + work_item . src_checksum_hex +
std : : string checksum_info ( " Expected checksum is " +
" while computed checksum is " + result . checksum_hex ) ;
ToString ( src_checksum_int ) +
" while computed checksum is " +
ToString ( result . checksum_value ) ) ;
result . status =
result . status =
Status : : Corruption ( " Checksum mismatch after copying to " +
Status : : Corruption ( " Checksum mismatch after copying to " +
work_item . dst_path + " : " + checksum_info ) ;
work_item . dst_path + " : " + checksum_info ) ;
@ -1037,12 +1058,12 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata(
auto result = item . result . get ( ) ;
auto result = item . result . get ( ) ;
item_status = result . status ;
item_status = result . status ;
if ( item_status . ok ( ) & & item . shared & & item . needed_to_copy ) {
if ( item_status . ok ( ) & & item . shared & & item . needed_to_copy ) {
item_status = item . backup_env - > RenameFile ( item . dst_path_tmp ,
item_status =
item . dst_path ) ;
item . backup_env - > RenameFile ( item . dst_path_tmp , item . dst_path ) ;
}
}
if ( item_status . ok ( ) ) {
if ( item_status . ok ( ) ) {
item_status = new_backup . get ( ) - > AddFile ( std : : make_shared < FileInfo > (
item_status = new_backup . get ( ) - > AddFile ( std : : make_shared < FileInfo > (
item . dst_relative , result . size , result . checksum_value , result . db_id ,
item . dst_relative , result . size , result . checksum_hex , result . db_id ,
result . db_session_id ) ) ;
result . db_session_id ) ) ;
}
}
if ( ! item_status . ok ( ) ) {
if ( ! item_status . ok ( ) ) {
@ -1310,18 +1331,19 @@ Status BackupEngineImpl::RestoreDBFromBackup(const RestoreOptions& options,
Status s ;
Status s ;
std : : vector < RestoreAfterCopyOrCreateWorkItem > restore_items_to_finish ;
std : : vector < RestoreAfterCopyOrCreateWorkItem > restore_items_to_finish ;
for ( const auto & file_info : backup - > GetFiles ( ) ) {
for ( const auto & file_info : backup - > GetFiles ( ) ) {
const std : : string & file = file_info - > filename ;
const std : : string & file = file_info - > filename ;
std : : string dst ;
std : : string dst ;
// 1. extract the filename
// 1. extract the filename
size_t slash = file . find_last_of ( ' / ' ) ;
size_t slash = file . find_last_of ( ' / ' ) ;
// file will either be shared/<file>, shared_checksum/<file_crc32c_size>
// file will either be shared/<file>, shared_checksum/<file_crc32c_size>,
// shared_checksum/<file_crc32c_session>, or private/<number>/<file>
// shared_checksum/<file_session>, shared_checksum/<file_crc32c_session>,
// or private/<number>/<file>
assert ( slash ! = std : : string : : npos ) ;
assert ( slash ! = std : : string : : npos ) ;
dst = file . substr ( slash + 1 ) ;
dst = file . substr ( slash + 1 ) ;
// if the file was in shared_checksum, extract the real file name
// if the file was in shared_checksum, extract the real file name
// in this case the file is <number>_<checksum>_<size>.<type>
// in this case the file is <number>_<checksum>_<size>.<type>,
// or <number>_<checksum>_<session>.<type> if new naming is used
// <number>_<session>.<type>, or <number>_<checksum>_<session>.<type>
if ( file . substr ( 0 , slash ) = = GetSharedChecksumDirRel ( ) ) {
if ( file . substr ( 0 , slash ) = = GetSharedChecksumDirRel ( ) ) {
dst = GetFileFromChecksumFile ( dst ) ;
dst = GetFileFromChecksumFile ( dst ) ;
}
}
@ -1346,8 +1368,7 @@ Status BackupEngineImpl::RestoreDBFromBackup(const RestoreOptions& options,
EnvOptions ( ) /* src_env_options */ , false , rate_limiter ,
EnvOptions ( ) /* src_env_options */ , false , rate_limiter ,
0 /* size_limit */ ) ;
0 /* size_limit */ ) ;
RestoreAfterCopyOrCreateWorkItem after_copy_or_create_work_item (
RestoreAfterCopyOrCreateWorkItem after_copy_or_create_work_item (
copy_or_create_work_item . result . get_future ( ) ,
copy_or_create_work_item . result . get_future ( ) , file_info - > checksum_hex ) ;
file_info - > checksum_value ) ;
files_to_copy_or_create_ . write ( std : : move ( copy_or_create_work_item ) ) ;
files_to_copy_or_create_ . write ( std : : move ( copy_or_create_work_item ) ) ;
restore_items_to_finish . push_back (
restore_items_to_finish . push_back (
std : : move ( after_copy_or_create_work_item ) ) ;
std : : move ( after_copy_or_create_work_item ) ) ;
@ -1362,7 +1383,7 @@ Status BackupEngineImpl::RestoreDBFromBackup(const RestoreOptions& options,
if ( ! item_status . ok ( ) ) {
if ( ! item_status . ok ( ) ) {
s = item_status ;
s = item_status ;
break ;
break ;
} else if ( item . checksum_value ! = result . checksum_value ) {
} else if ( item . checksum_hex ! = result . checksum_hex ) {
s = Status : : Corruption ( " Checksum check failed " ) ;
s = Status : : Corruption ( " Checksum check failed " ) ;
break ;
break ;
}
}
@ -1420,15 +1441,15 @@ Status BackupEngineImpl::VerifyBackup(BackupID backup_id,
}
}
if ( verify_with_checksum ) {
if ( verify_with_checksum ) {
// verify file checksum
// verify file checksum
uint32_t checksum_value = 0 ;
std : : string checksum_hex ;
ROCKS_LOG_INFO ( options_ . info_log , " Verifying %s checksum... \n " ,
ROCKS_LOG_INFO ( options_ . info_log , " Verifying %s checksum... \n " ,
abs_path . c_str ( ) ) ;
abs_path . c_str ( ) ) ;
CalculateChecksum ( abs_path , backup_env_ , EnvOptions ( ) , 0 /* size_limit */ ,
CalculateChecksum ( abs_path , backup_env_ , EnvOptions ( ) , 0 /* size_limit */ ,
& checksum_value ) ;
& checksum_hex ) ;
if ( file_info - > checksum_value ! = checksum_value ) {
if ( file_info - > checksum_hex ! = checksum_hex ) {
std : : string checksum_info (
std : : string checksum_info (
" Expected checksum is " + ToString ( file_info - > checksum_value ) +
" Expected checksum is " + file_info - > checksum_hex +
" while computed checksum is " + ToString ( checksum_value ) ) ;
" while computed checksum is " + checksum_hex ) ;
return Status : : Corruption ( " File corrupted: Checksum mismatch for " +
return Status : : Corruption ( " File corrupted: Checksum mismatch for " +
abs_path + " : " + checksum_info ) ;
abs_path + " : " + checksum_info ) ;
}
}
@ -1440,9 +1461,8 @@ Status BackupEngineImpl::VerifyBackup(BackupID backup_id,
Status BackupEngineImpl : : CopyOrCreateFile (
Status BackupEngineImpl : : CopyOrCreateFile (
const std : : string & src , const std : : string & dst , const std : : string & contents ,
const std : : string & src , const std : : string & dst , const std : : string & contents ,
Env * src_env , Env * dst_env , const EnvOptions & src_env_options , bool sync ,
Env * src_env , Env * dst_env , const EnvOptions & src_env_options , bool sync ,
RateLimiter * rate_limiter , uint64_t * size , uint32_t * checksum_value ,
RateLimiter * rate_limiter , uint64_t * size , std : : string * checksum_hex ,
uint64_t size_limit , std : : function < void ( ) > progress_callback ,
uint64_t size_limit , std : : function < void ( ) > progress_callback ) {
std : : string * db_id , std : : string * db_session_id ) {
assert ( src . empty ( ) ! = contents . empty ( ) ) ;
assert ( src . empty ( ) ! = contents . empty ( ) ) ;
Status s ;
Status s ;
std : : unique_ptr < WritableFile > dst_file ;
std : : unique_ptr < WritableFile > dst_file ;
@ -1453,15 +1473,7 @@ Status BackupEngineImpl::CopyOrCreateFile(
if ( size ! = nullptr ) {
if ( size ! = nullptr ) {
* size = 0 ;
* size = 0 ;
}
}
if ( checksum_value ! = nullptr ) {
uint32_t checksum_value = 0 ;
* checksum_value = 0 ;
}
if ( db_id ! = nullptr ) {
* db_id = " " ;
}
if ( db_session_id ! = nullptr ) {
* db_session_id = " " ;
}
// Check if size limit is set. if not, set it to very big number
// Check if size limit is set. if not, set it to very big number
if ( size_limit = = 0 ) {
if ( size_limit = = 0 ) {
@ -1504,7 +1516,8 @@ Status BackupEngineImpl::CopyOrCreateFile(
size_limit - = data . size ( ) ;
size_limit - = data . size ( ) ;
TEST_SYNC_POINT_CALLBACK (
TEST_SYNC_POINT_CALLBACK (
" BackupEngineImpl::CopyOrCreateFile:CorruptionDuringBackup " ,
" BackupEngineImpl::CopyOrCreateFile:CorruptionDuringBackup " ,
IsSstFile ( src ) ? & data : nullptr ) ;
( src . length ( ) > 4 & & src . rfind ( " .sst " ) = = src . length ( ) - 4 ) ? & data
: nullptr ) ;
if ( ! s . ok ( ) ) {
if ( ! s . ok ( ) ) {
return s ;
return s ;
@ -1513,9 +1526,8 @@ Status BackupEngineImpl::CopyOrCreateFile(
if ( size ! = nullptr ) {
if ( size ! = nullptr ) {
* size + = data . size ( ) ;
* size + = data . size ( ) ;
}
}
if ( checksum_value ! = nullptr ) {
if ( checksum_hex ! = nullptr ) {
* checksum_value =
checksum_value = crc32c : : Extend ( checksum_value , data . data ( ) , data . size ( ) ) ;
crc32c : : Extend ( * checksum_value , data . data ( ) , data . size ( ) ) ;
}
}
s = dest_writer - > Append ( data ) ;
s = dest_writer - > Append ( data ) ;
if ( rate_limiter ! = nullptr ) {
if ( rate_limiter ! = nullptr ) {
@ -1529,29 +1541,17 @@ Status BackupEngineImpl::CopyOrCreateFile(
}
}
} while ( s . ok ( ) & & contents . empty ( ) & & data . size ( ) > 0 & & size_limit > 0 ) ;
} while ( s . ok ( ) & & contents . empty ( ) & & data . size ( ) > 0 & & size_limit > 0 ) ;
// Convert uint32_t checksum to hex checksum
if ( checksum_hex ! = nullptr ) {
checksum_hex - > assign ( ChecksumInt32ToHex ( checksum_value ) ) ;
}
if ( s . ok ( ) & & sync ) {
if ( s . ok ( ) & & sync ) {
s = dest_writer - > Sync ( false ) ;
s = dest_writer - > Sync ( false ) ;
}
}
if ( s . ok ( ) ) {
if ( s . ok ( ) ) {
s = dest_writer - > Close ( ) ;
s = dest_writer - > Close ( ) ;
}
}
if ( s . ok ( ) & & GetTableNamingOption ( ) = = kChecksumAndDbSessionId ) {
// When copying SST files and using db_session_id in the name,
// try to get DB identities
// Note that when CopyOrCreateFile() is called while restoring, we still
// try obtaining the ids but as for now we do not use ids to verify
// the restored file
if ( ! src . empty ( ) ) {
// copying
if ( IsSstFile ( src ) & & ( db_id ! = nullptr | | db_session_id ! = nullptr ) ) {
// SST file
// Ignore the returned status
// In the failed cases, db_id and db_session_id will be empty
GetFileDbIdentities ( src_env , src_env_options , src , db_id ,
db_session_id ) ;
}
}
}
return s ;
return s ;
}
}
@ -1571,65 +1571,67 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
std : : string dst_relative = fname . substr ( 1 ) ;
std : : string dst_relative = fname . substr ( 1 ) ;
std : : string dst_relative_tmp ;
std : : string dst_relative_tmp ;
Status s ;
Status s ;
uint32_t checksum_value = 0 ;
std : : string checksum_hex ;
std : : string db_id ;
std : : string db_id ;
std : : string db_session_id ;
std : : string db_session_id ;
// whether the checksum for a table file has been computed
// whether the checksum for a table file is available
bool has_checksum = false ;
bool has_checksum = false ;
// Whenever a default checksum function name is passed in, we will verify it
// Whenever a default checksum function name is passed in, we will compares
// before copying. Note that only table files may have a known checksum name
// the corresponding checksum values after copying. Note that only table files
// passed in.
// may have a known checksum function name passed in.
//
//
// If no default checksum function name is passed in, we will calculate the
// If no default checksum function name is passed in and db session id is not
// checksum *before* copying in two cases (we always calcuate checksums when
// available, we will calculate the checksum *before* copying in two cases
// copying or creating for any file types):
// (we always calcuate checksums when copying or creating for any file types):
// a) share_files_with_checksum is true and file type is table;
// a) share_files_with_checksum is true and file type is table;
// b) share_table_files is true and the file exists already.
// b) share_table_files is true and the file exists already.
//
// Step 0: Check if default checksum function name is passed in
if ( kDbFileChecksumFuncName = = src_checksum_func_name ) {
if ( kDbFileChecksumFuncName = = src_checksum_func_name ) {
if ( src_checksum_str = = kUnknownFileChecksum ) {
if ( src_checksum_str = = kUnknownFileChecksum ) {
return Status : : Aborted ( " Unknown checksum value for " + fname ) ;
return Status : : Aborted ( " Unknown checksum value for " + fname ) ;
}
}
s = CalculateChecksum ( src_dir + fname , db_env_ , src_env_options , size_limit ,
checksum_hex = ChecksumStrToHex ( src_checksum_str ) ;
& checksum_value ) ;
if ( ! s . ok ( ) ) {
return s ;
}
// Convert src_checksum_str to uint32_t and compare
uint32_t src_checksum_int = ChecksumStrToInt32 ( src_checksum_str ) ;
if ( src_checksum_int ! = checksum_value ) {
std : : string checksum_info (
" Expected checksum is " + ToString ( src_checksum_int ) +
" while computed checksum is " + ToString ( checksum_value ) ) ;
return Status : : Corruption ( " Checksum mismatch before copying from " +
fname + " : " + checksum_info ) ;
}
has_checksum = true ;
has_checksum = true ;
}
}
// Step 1: Prepare the relative path to destination
// Step 1: Prepare the relative path to destination
if ( shared & & shared_checksum ) {
if ( shared & & shared_checksum ) {
// add checksum and file length to the file name
if ( GetTableNamingOption ( ) = = kOptionalChecksumAndDbSessionId ) {
if ( ! has_checksum ) {
s = CalculateChecksum ( src_dir + fname , db_env_ , src_env_options ,
size_limit , & checksum_value ) ;
if ( ! s . ok ( ) ) {
return s ;
}
has_checksum = true ;
}
if ( GetTableNamingOption ( ) = = kChecksumAndDbSessionId ) {
// Prepare db_session_id to add to the file name
// Prepare db_session_id to add to the file name
// Ignore the returned status
// Ignore the returned status
// In the failed cases, db_id and db_session_id will be empty
// In the failed cases, db_id and db_session_id will be empty
GetFileDbIdentities ( db_env_ , src_env_options , src_dir + fname , & db_id ,
GetFileDbIdentities ( db_env_ , src_env_options , src_dir + fname , & db_id ,
& db_session_id ) ;
& db_session_id ) ;
}
}
// Calculate checksum if checksum and db session id are not available.
// If db session id is available, we will not calculate the checksum
// since the session id should suffice to avoid file name collision in
// the shared_checksum directory.
if ( ! has_checksum & & db_session_id . empty ( ) ) {
s = CalculateChecksum ( src_dir + fname , db_env_ , src_env_options ,
size_limit , & checksum_hex ) ;
if ( ! s . ok ( ) ) {
return s ;
}
has_checksum = true ;
}
if ( size_bytes = = port : : kMaxUint64 ) {
if ( size_bytes = = port : : kMaxUint64 ) {
return Status : : NotFound ( " File missing: " + src_dir + fname ) ;
return Status : : NotFound ( " File missing: " + src_dir + fname ) ;
}
}
dst_relative = GetSharedFileWithChecksum ( dst_relative , checksum_value ,
// dst_relative depends on the following conditions:
size_bytes , db_session_id ) ;
// 1) the naming scheme is kOptionalChecksumAndDbSessionId,
// 2) db_session_id is not empty,
// 3) checksum is available in the DB manifest.
// If 1,2,3) are satisfied, then dst_relative will be of the form:
// shared_checksum/<file_number>_<checksum>_<db_session_id>.sst
// If 1,2) are satisfied, then dst_relative will be of the form:
// shared_checksum/<file_number>_<db_session_id>.sst
// Otherwise, dst_relative is of the form
// shared_checksum/<file_number>_<checksum>_<size>.sst
dst_relative = GetSharedFileWithChecksum (
dst_relative , has_checksum , checksum_hex , size_bytes , db_session_id ) ;
dst_relative_tmp = GetSharedFileWithChecksumRel ( dst_relative , true ) ;
dst_relative_tmp = GetSharedFileWithChecksumRel ( dst_relative , true ) ;
dst_relative = GetSharedFileWithChecksumRel ( dst_relative , false ) ;
dst_relative = GetSharedFileWithChecksumRel ( dst_relative , false ) ;
} else if ( shared ) {
} else if ( shared ) {
@ -1681,20 +1683,43 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
} else if ( shared & & ( same_path | | file_exists ) ) {
} else if ( shared & & ( same_path | | file_exists ) ) {
need_to_copy = false ;
need_to_copy = false ;
if ( shared_checksum ) {
if ( shared_checksum ) {
if ( GetTableNamingOption ( ) = = kChecksumAndDbSessionId & &
if ( backuped_file_infos_ . find ( dst_relative ) = =
! db_session_id . empty ( ) ) {
backuped_file_infos_ . end ( ) & &
ROCKS_LOG_INFO ( options_ . info_log ,
! same_path ) {
" %s already present, with checksum %u, size % " PRIu64
// file exists but not referenced
" and DB session identity %s " ,
ROCKS_LOG_INFO (
fname . c_str ( ) , checksum_value , size_bytes ,
options_ . info_log ,
db_session_id . c_str ( ) ) ;
" %s already present, but not referenced by any backup. We will "
" overwrite the file. " ,
fname . c_str ( ) ) ;
need_to_copy = true ;
backup_env_ - > DeleteFile ( final_dest_path ) ;
} else {
} else {
ROCKS_LOG_INFO ( options_ . info_log ,
// file exists and referenced
" %s already present, with checksum %u and size % " PRIu64 ,
if ( ! has_checksum ) {
fname . c_str ( ) , checksum_value , size_bytes ) ;
s = CalculateChecksum ( src_dir + fname , db_env_ , src_env_options ,
size_limit , & checksum_hex ) ;
if ( ! s . ok ( ) ) {
return s ;
}
has_checksum = true ;
}
if ( UseSessionId ( db_session_id ) ) {
ROCKS_LOG_INFO ( options_ . info_log ,
" %s already present, with checksum %s, size % " PRIu64
" and DB session identity %s " ,
fname . c_str ( ) , checksum_hex . c_str ( ) , size_bytes ,
db_session_id . c_str ( ) ) ;
} else {
ROCKS_LOG_INFO (
options_ . info_log ,
" %s already present, with checksum %s and size % " PRIu64 ,
fname . c_str ( ) , checksum_hex . c_str ( ) , size_bytes ) ;
}
}
}
} else if ( backuped_file_infos_ . find ( dst_relative ) = =
} else if ( backuped_file_infos_ . find ( dst_relative ) = =
backuped_file_infos_ . end ( ) & & ! same_path ) {
backuped_file_infos_ . end ( ) & &
! same_path ) {
// file already exists, but it's not referenced by any backup. overwrite
// file already exists, but it's not referenced by any backup. overwrite
// the file
// the file
ROCKS_LOG_INFO (
ROCKS_LOG_INFO (
@ -1710,23 +1735,12 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
" %s already present, calculate checksum " , fname . c_str ( ) ) ;
" %s already present, calculate checksum " , fname . c_str ( ) ) ;
if ( ! has_checksum ) {
if ( ! has_checksum ) {
s = CalculateChecksum ( src_dir + fname , db_env_ , src_env_options ,
s = CalculateChecksum ( src_dir + fname , db_env_ , src_env_options ,
size_limit , & checksum_value ) ;
size_limit , & checksum_hex ) ;
if ( ! s . ok ( ) ) {
if ( ! s . ok ( ) ) {
return s ;
return s ;
}
}
has_checksum = true ;
has_checksum = true ;
}
}
// try to get the db identities as they are also members of
// the class CopyOrCreateResult
if ( GetTableNamingOption ( ) = = kChecksumAndDbSessionId ) {
assert ( IsSstFile ( fname ) ) ;
ROCKS_LOG_INFO ( options_ . info_log ,
" %s checksum checksum calculated, try to obtain DB "
" session identity " ,
fname . c_str ( ) ) ;
GetFileDbIdentities ( db_env_ , src_env_options , src_dir + fname , & db_id ,
& db_session_id ) ;
}
}
}
}
}
live_dst_paths . insert ( final_dest_path ) ;
live_dst_paths . insert ( final_dest_path ) ;
@ -1739,7 +1753,7 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
src_dir . empty ( ) ? " " : src_dir + fname , * copy_dest_path , contents ,
src_dir . empty ( ) ? " " : src_dir + fname , * copy_dest_path , contents ,
db_env_ , backup_env_ , src_env_options , options_ . sync , rate_limiter ,
db_env_ , backup_env_ , src_env_options , options_ . sync , rate_limiter ,
size_limit , progress_callback , has_checksum , src_checksum_func_name ,
size_limit , progress_callback , has_checksum , src_checksum_func_name ,
ChecksumInt32ToStr ( checksum_value ) ) ;
checksum_hex , db_id , db_session_id ) ;
BackupAfterCopyOrCreateWorkItem after_copy_or_create_work_item (
BackupAfterCopyOrCreateWorkItem after_copy_or_create_work_item (
copy_or_create_work_item . result . get_future ( ) , shared , need_to_copy ,
copy_or_create_work_item . result . get_future ( ) , shared , need_to_copy ,
backup_env_ , temp_dest_path , final_dest_path , dst_relative ) ;
backup_env_ , temp_dest_path , final_dest_path , dst_relative ) ;
@ -1754,9 +1768,9 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
CopyOrCreateResult result ;
CopyOrCreateResult result ;
result . status = s ;
result . status = s ;
result . size = size_bytes ;
result . size = size_bytes ;
result . checksum_value = checksum_value ;
result . checksum_hex = std : : move ( checksum_hex ) ;
result . db_id = db_id ;
result . db_id = std : : move ( db_id ) ;
result . db_session_id = db_session_id ;
result . db_session_id = std : : move ( db_session_id ) ;
promise_result . set_value ( std : : move ( result ) ) ;
promise_result . set_value ( std : : move ( result ) ) ;
}
}
return s ;
return s ;
@ -1765,8 +1779,11 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
Status BackupEngineImpl : : CalculateChecksum ( const std : : string & src , Env * src_env ,
Status BackupEngineImpl : : CalculateChecksum ( const std : : string & src , Env * src_env ,
const EnvOptions & src_env_options ,
const EnvOptions & src_env_options ,
uint64_t size_limit ,
uint64_t size_limit ,
uint32_t * checksum_value ) {
std : : string * checksum_hex ) {
* checksum_value = 0 ;
if ( checksum_hex = = nullptr ) {
return Status : : Aborted ( " Checksum pointer is null " ) ;
}
uint32_t checksum_value = 0 ;
if ( size_limit = = 0 ) {
if ( size_limit = = 0 ) {
size_limit = std : : numeric_limits < uint64_t > : : max ( ) ;
size_limit = std : : numeric_limits < uint64_t > : : max ( ) ;
}
}
@ -1795,9 +1812,11 @@ Status BackupEngineImpl::CalculateChecksum(const std::string& src, Env* src_env,
}
}
size_limit - = data . size ( ) ;
size_limit - = data . size ( ) ;
* checksum_value = crc32c : : Extend ( * checksum_value , data . data ( ) , data . size ( ) ) ;
checksum_value = crc32c : : Extend ( checksum_value , data . data ( ) , data . size ( ) ) ;
} while ( data . size ( ) > 0 & & size_limit > 0 ) ;
} while ( data . size ( ) > 0 & & size_limit > 0 ) ;
checksum_hex - > assign ( ChecksumInt32ToHex ( checksum_value ) ) ;
return s ;
return s ;
}
}
@ -2024,7 +2043,7 @@ Status BackupEngineImpl::BackupMeta::AddFile(
return Status : : Corruption ( " In memory metadata insertion error " ) ;
return Status : : Corruption ( " In memory metadata insertion error " ) ;
}
}
} else {
} else {
if ( itr - > second - > checksum_value ! = file_info - > checksum_value ) {
if ( itr - > second - > checksum_hex ! = file_info - > checksum_hex ) {
return Status : : Corruption (
return Status : : Corruption (
" Checksum mismatch for existing backup file. Delete old backups and "
" Checksum mismatch for existing backup file. Delete old backups and "
" try again. " ) ;
" try again. " ) ;
@ -2154,7 +2173,8 @@ Status BackupEngineImpl::BackupMeta::LoadFromFile(
" in " + meta_filename_ ) ;
" in " + meta_filename_ ) ;
}
}
files . emplace_back ( new FileInfo ( filename , size , checksum_value ) ) ;
files . emplace_back (
new FileInfo ( filename , size , ChecksumInt32ToHex ( checksum_value ) ) ) ;
}
}
if ( s . ok ( ) & & data . size ( ) > 0 ) {
if ( s . ok ( ) & & data . size ( ) > 0 ) {
@ -2197,7 +2217,8 @@ Status BackupEngineImpl::BackupMeta::StoreToFile(bool sync) {
Slice ( app_metadata_ ) . ToString ( /* hex */ true ) ;
Slice ( app_metadata_ ) . ToString ( /* hex */ true ) ;
// +1 to accommodate newline character
// +1 to accommodate newline character
size_t hex_meta_strlen = kMetaDataPrefix . ToString ( ) . length ( ) + hex_encoded_metadata . length ( ) + 1 ;
size_t hex_meta_strlen =
kMetaDataPrefix . ToString ( ) . length ( ) + hex_encoded_metadata . length ( ) + 1 ;
if ( hex_meta_strlen > = buf_size ) {
if ( hex_meta_strlen > = buf_size ) {
return Status : : Corruption ( " Buffer too small to fit backup metadata " ) ;
return Status : : Corruption ( " Buffer too small to fit backup metadata " ) ;
}
}
@ -2232,8 +2253,10 @@ Status BackupEngineImpl::BackupMeta::StoreToFile(bool sync) {
// use crc32c for now, switch to something else if needed
// use crc32c for now, switch to something else if needed
// WART: The checksums are crc32c, not original crc32
// WART: The checksums are crc32c, not original crc32
size_t newlen = len + file - > filename . length ( ) + snprintf ( writelen_temp ,
size_t newlen =
sizeof ( writelen_temp ) , " crc32 %u \n " , file - > checksum_value ) ;
len + file - > filename . length ( ) +
snprintf ( writelen_temp , sizeof ( writelen_temp ) , " crc32 %u \n " ,
ChecksumHexToInt32 ( file - > checksum_hex ) ) ;
const char * const_write = writelen_temp ;
const char * const_write = writelen_temp ;
if ( newlen > = buf_size ) {
if ( newlen > = buf_size ) {
backup_meta_file - > Append ( Slice ( buf . get ( ) , len ) ) ;
backup_meta_file - > Append ( Slice ( buf . get ( ) , len ) ) ;