@ -16,6 +16,7 @@
# include "monitoring/histogram.h"
# include "monitoring/histogram.h"
# include "monitoring/iostats_context_imp.h"
# include "monitoring/iostats_context_imp.h"
# include "port/port.h"
# include "port/port.h"
# include "rocksdb/io_status.h"
# include "rocksdb/system_clock.h"
# include "rocksdb/system_clock.h"
# include "test_util/sync_point.h"
# include "test_util/sync_point.h"
# include "util/crc32c.h"
# include "util/crc32c.h"
@ -23,6 +24,13 @@
# include "util/rate_limiter.h"
# include "util/rate_limiter.h"
namespace ROCKSDB_NAMESPACE {
namespace ROCKSDB_NAMESPACE {
namespace {
IOStatus AssertFalseAndGetStatusForPrevError ( ) {
assert ( false ) ;
return IOStatus : : IOError ( " Writer has previous error. " ) ;
}
} // namespace
IOStatus WritableFileWriter : : Create ( const std : : shared_ptr < FileSystem > & fs ,
IOStatus WritableFileWriter : : Create ( const std : : shared_ptr < FileSystem > & fs ,
const std : : string & fname ,
const std : : string & fname ,
const FileOptions & file_opts ,
const FileOptions & file_opts ,
@ -43,6 +51,10 @@ IOStatus WritableFileWriter::Create(const std::shared_ptr<FileSystem>& fs,
IOStatus WritableFileWriter : : Append ( const Slice & data , uint32_t crc32c_checksum ,
IOStatus WritableFileWriter : : Append ( const Slice & data , uint32_t crc32c_checksum ,
Env : : IOPriority op_rate_limiter_priority ) {
Env : : IOPriority op_rate_limiter_priority ) {
if ( seen_error_ ) {
return AssertFalseAndGetStatusForPrevError ( ) ;
}
const char * src = data . data ( ) ;
const char * src = data . data ( ) ;
size_t left = data . size ( ) ;
size_t left = data . size ( ) ;
IOStatus s ;
IOStatus s ;
@ -85,6 +97,7 @@ IOStatus WritableFileWriter::Append(const Slice& data, uint32_t crc32c_checksum,
if ( buf_ . CurrentSize ( ) > 0 ) {
if ( buf_ . CurrentSize ( ) > 0 ) {
s = Flush ( op_rate_limiter_priority ) ;
s = Flush ( op_rate_limiter_priority ) ;
if ( ! s . ok ( ) ) {
if ( ! s . ok ( ) ) {
seen_error_ = true ;
return s ;
return s ;
}
}
}
}
@ -165,12 +178,17 @@ IOStatus WritableFileWriter::Append(const Slice& data, uint32_t crc32c_checksum,
if ( s . ok ( ) ) {
if ( s . ok ( ) ) {
uint64_t cur_size = filesize_ . load ( std : : memory_order_acquire ) ;
uint64_t cur_size = filesize_ . load ( std : : memory_order_acquire ) ;
filesize_ . store ( cur_size + data . size ( ) , std : : memory_order_release ) ;
filesize_ . store ( cur_size + data . size ( ) , std : : memory_order_release ) ;
} else {
seen_error_ = true ;
}
}
return s ;
return s ;
}
}
IOStatus WritableFileWriter : : Pad ( const size_t pad_bytes ,
IOStatus WritableFileWriter : : Pad ( const size_t pad_bytes ,
Env : : IOPriority op_rate_limiter_priority ) {
Env : : IOPriority op_rate_limiter_priority ) {
if ( seen_error_ ) {
return AssertFalseAndGetStatusForPrevError ( ) ;
}
assert ( pad_bytes < kDefaultPageSize ) ;
assert ( pad_bytes < kDefaultPageSize ) ;
size_t left = pad_bytes ;
size_t left = pad_bytes ;
size_t cap = buf_ . Capacity ( ) - buf_ . CurrentSize ( ) ;
size_t cap = buf_ . Capacity ( ) - buf_ . CurrentSize ( ) ;
@ -186,6 +204,7 @@ IOStatus WritableFileWriter::Pad(const size_t pad_bytes,
if ( left > 0 ) {
if ( left > 0 ) {
IOStatus s = Flush ( op_rate_limiter_priority ) ;
IOStatus s = Flush ( op_rate_limiter_priority ) ;
if ( ! s . ok ( ) ) {
if ( ! s . ok ( ) ) {
seen_error_ = true ;
return s ;
return s ;
}
}
}
}
@ -203,17 +222,31 @@ IOStatus WritableFileWriter::Pad(const size_t pad_bytes,
}
}
IOStatus WritableFileWriter : : Close ( ) {
IOStatus WritableFileWriter : : Close ( ) {
if ( seen_error_ ) {
IOStatus interim ;
if ( writable_file_ . get ( ) ! = nullptr ) {
interim = writable_file_ - > Close ( IOOptions ( ) , nullptr ) ;
writable_file_ . reset ( ) ;
}
if ( interim . ok ( ) ) {
return IOStatus : : IOError (
" File is closed but data not flushed as writer has previous error. " ) ;
} else {
return interim ;
}
}
// Do not quit immediately on failure the file MUST be closed
// Do not quit immediately on failure the file MUST be closed
IOStatus s ;
// Possible to close it twice now as we MUST close
// Possible to close it twice now as we MUST close
// in __dtor, simply flushing is not enough
// in __dtor, simply flushing is not enough
// Windows when pre-allocating does not fill with zeros
// Windows when pre-allocating does not fill with zeros
// also with unbuffered access we also set the end of data.
// also with unbuffered access we also set the end of data.
if ( writable_file_ . get ( ) = = nullptr ) {
if ( writable_file_ . get ( ) = = nullptr ) {
return s ;
return IOStatu s: : OK ( ) ;
}
}
IOStatus s ;
s = Flush ( ) ; // flush cache to OS
s = Flush ( ) ; // flush cache to OS
IOStatus interim ;
IOStatus interim ;
@ -294,9 +327,13 @@ IOStatus WritableFileWriter::Close() {
writable_file_ . reset ( ) ;
writable_file_ . reset ( ) ;
TEST_KILL_RANDOM ( " WritableFileWriter::Close:1 " ) ;
TEST_KILL_RANDOM ( " WritableFileWriter::Close:1 " ) ;
if ( s . ok ( ) & & checksum_generator_ ! = nullptr & & ! checksum_finalized_ ) {
if ( s . ok ( ) ) {
checksum_generator_ - > Finalize ( ) ;
if ( checksum_generator_ ! = nullptr & & ! checksum_finalized_ ) {
checksum_finalized_ = true ;
checksum_generator_ - > Finalize ( ) ;
checksum_finalized_ = true ;
}
} else {
seen_error_ = true ;
}
}
return s ;
return s ;
@ -305,6 +342,10 @@ IOStatus WritableFileWriter::Close() {
// write out the cached data to the OS cache or storage if direct I/O
// write out the cached data to the OS cache or storage if direct I/O
// enabled
// enabled
IOStatus WritableFileWriter : : Flush ( Env : : IOPriority op_rate_limiter_priority ) {
IOStatus WritableFileWriter : : Flush ( Env : : IOPriority op_rate_limiter_priority ) {
if ( seen_error_ ) {
return AssertFalseAndGetStatusForPrevError ( ) ;
}
IOStatus s ;
IOStatus s ;
TEST_KILL_RANDOM_WITH_WEIGHT ( " WritableFileWriter::Flush:0 " , REDUCE_ODDS2 ) ;
TEST_KILL_RANDOM_WITH_WEIGHT ( " WritableFileWriter::Flush:0 " , REDUCE_ODDS2 ) ;
@ -329,6 +370,7 @@ IOStatus WritableFileWriter::Flush(Env::IOPriority op_rate_limiter_priority) {
}
}
}
}
if ( ! s . ok ( ) ) {
if ( ! s . ok ( ) ) {
seen_error_ = true ;
return s ;
return s ;
}
}
}
}
@ -357,6 +399,7 @@ IOStatus WritableFileWriter::Flush(Env::IOPriority op_rate_limiter_priority) {
}
}
if ( ! s . ok ( ) ) {
if ( ! s . ok ( ) ) {
seen_error_ = true ;
return s ;
return s ;
}
}
@ -383,6 +426,9 @@ IOStatus WritableFileWriter::Flush(Env::IOPriority op_rate_limiter_priority) {
if ( offset_sync_to > 0 & &
if ( offset_sync_to > 0 & &
offset_sync_to - last_sync_size_ > = bytes_per_sync_ ) {
offset_sync_to - last_sync_size_ > = bytes_per_sync_ ) {
s = RangeSync ( last_sync_size_ , offset_sync_to - last_sync_size_ ) ;
s = RangeSync ( last_sync_size_ , offset_sync_to - last_sync_size_ ) ;
if ( ! s . ok ( ) ) {
seen_error_ = true ;
}
last_sync_size_ = offset_sync_to ;
last_sync_size_ = offset_sync_to ;
}
}
}
}
@ -409,14 +455,20 @@ const char* WritableFileWriter::GetFileChecksumFuncName() const {
}
}
IOStatus WritableFileWriter : : Sync ( bool use_fsync ) {
IOStatus WritableFileWriter : : Sync ( bool use_fsync ) {
if ( seen_error_ ) {
return AssertFalseAndGetStatusForPrevError ( ) ;
}
IOStatus s = Flush ( ) ;
IOStatus s = Flush ( ) ;
if ( ! s . ok ( ) ) {
if ( ! s . ok ( ) ) {
seen_error_ = true ;
return s ;
return s ;
}
}
TEST_KILL_RANDOM ( " WritableFileWriter::Sync:0 " ) ;
TEST_KILL_RANDOM ( " WritableFileWriter::Sync:0 " ) ;
if ( ! use_direct_io ( ) & & pending_sync_ ) {
if ( ! use_direct_io ( ) & & pending_sync_ ) {
s = SyncInternal ( use_fsync ) ;
s = SyncInternal ( use_fsync ) ;
if ( ! s . ok ( ) ) {
if ( ! s . ok ( ) ) {
seen_error_ = true ;
return s ;
return s ;
}
}
}
}
@ -426,6 +478,10 @@ IOStatus WritableFileWriter::Sync(bool use_fsync) {
}
}
IOStatus WritableFileWriter : : SyncWithoutFlush ( bool use_fsync ) {
IOStatus WritableFileWriter : : SyncWithoutFlush ( bool use_fsync ) {
if ( seen_error_ ) {
return AssertFalseAndGetStatusForPrevError ( ) ;
}
if ( ! writable_file_ - > IsSyncThreadSafe ( ) ) {
if ( ! writable_file_ - > IsSyncThreadSafe ( ) ) {
return IOStatus : : NotSupported (
return IOStatus : : NotSupported (
" Can't WritableFileWriter::SyncWithoutFlush() because "
" Can't WritableFileWriter::SyncWithoutFlush() because "
@ -434,10 +490,17 @@ IOStatus WritableFileWriter::SyncWithoutFlush(bool use_fsync) {
TEST_SYNC_POINT ( " WritableFileWriter::SyncWithoutFlush:1 " ) ;
TEST_SYNC_POINT ( " WritableFileWriter::SyncWithoutFlush:1 " ) ;
IOStatus s = SyncInternal ( use_fsync ) ;
IOStatus s = SyncInternal ( use_fsync ) ;
TEST_SYNC_POINT ( " WritableFileWriter::SyncWithoutFlush:2 " ) ;
TEST_SYNC_POINT ( " WritableFileWriter::SyncWithoutFlush:2 " ) ;
if ( ! s . ok ( ) ) {
seen_error_ = true ;
}
return s ;
return s ;
}
}
IOStatus WritableFileWriter : : SyncInternal ( bool use_fsync ) {
IOStatus WritableFileWriter : : SyncInternal ( bool use_fsync ) {
if ( seen_error_ ) {
return AssertFalseAndGetStatusForPrevError ( ) ;
}
IOStatus s ;
IOStatus s ;
IOSTATS_TIMER_GUARD ( fsync_nanos ) ;
IOSTATS_TIMER_GUARD ( fsync_nanos ) ;
TEST_SYNC_POINT ( " WritableFileWriter::SyncInternal:0 " ) ;
TEST_SYNC_POINT ( " WritableFileWriter::SyncInternal:0 " ) ;
@ -473,10 +536,17 @@ IOStatus WritableFileWriter::SyncInternal(bool use_fsync) {
}
}
# endif
# endif
SetPerfLevel ( prev_perf_level ) ;
SetPerfLevel ( prev_perf_level ) ;
if ( ! s . ok ( ) ) {
seen_error_ = true ;
}
return s ;
return s ;
}
}
IOStatus WritableFileWriter : : RangeSync ( uint64_t offset , uint64_t nbytes ) {
IOStatus WritableFileWriter : : RangeSync ( uint64_t offset , uint64_t nbytes ) {
if ( seen_error_ ) {
return AssertFalseAndGetStatusForPrevError ( ) ;
}
IOSTATS_TIMER_GUARD ( range_sync_nanos ) ;
IOSTATS_TIMER_GUARD ( range_sync_nanos ) ;
TEST_SYNC_POINT ( " WritableFileWriter::RangeSync:0 " ) ;
TEST_SYNC_POINT ( " WritableFileWriter::RangeSync:0 " ) ;
# ifndef ROCKSDB_LITE
# ifndef ROCKSDB_LITE
@ -488,6 +558,9 @@ IOStatus WritableFileWriter::RangeSync(uint64_t offset, uint64_t nbytes) {
IOOptions io_options ;
IOOptions io_options ;
io_options . rate_limiter_priority = writable_file_ - > GetIOPriority ( ) ;
io_options . rate_limiter_priority = writable_file_ - > GetIOPriority ( ) ;
IOStatus s = writable_file_ - > RangeSync ( offset , nbytes , io_options , nullptr ) ;
IOStatus s = writable_file_ - > RangeSync ( offset , nbytes , io_options , nullptr ) ;
if ( ! s . ok ( ) ) {
seen_error_ = true ;
}
# ifndef ROCKSDB_LITE
# ifndef ROCKSDB_LITE
if ( ShouldNotifyListeners ( ) ) {
if ( ShouldNotifyListeners ( ) ) {
auto finish_ts = std : : chrono : : steady_clock : : now ( ) ;
auto finish_ts = std : : chrono : : steady_clock : : now ( ) ;
@ -505,6 +578,10 @@ IOStatus WritableFileWriter::RangeSync(uint64_t offset, uint64_t nbytes) {
// limiter if available
// limiter if available
IOStatus WritableFileWriter : : WriteBuffered (
IOStatus WritableFileWriter : : WriteBuffered (
const char * data , size_t size , Env : : IOPriority op_rate_limiter_priority ) {
const char * data , size_t size , Env : : IOPriority op_rate_limiter_priority ) {
if ( seen_error_ ) {
return AssertFalseAndGetStatusForPrevError ( ) ;
}
IOStatus s ;
IOStatus s ;
assert ( ! use_direct_io ( ) ) ;
assert ( ! use_direct_io ( ) ) ;
const char * src = data ;
const char * src = data ;
@ -576,6 +653,7 @@ IOStatus WritableFileWriter::WriteBuffered(
}
}
# endif
# endif
if ( ! s . ok ( ) ) {
if ( ! s . ok ( ) ) {
seen_error_ = true ;
return s ;
return s ;
}
}
}
}
@ -590,11 +668,18 @@ IOStatus WritableFileWriter::WriteBuffered(
}
}
buf_ . Size ( 0 ) ;
buf_ . Size ( 0 ) ;
buffered_data_crc32c_checksum_ = 0 ;
buffered_data_crc32c_checksum_ = 0 ;
if ( ! s . ok ( ) ) {
seen_error_ = true ;
}
return s ;
return s ;
}
}
IOStatus WritableFileWriter : : WriteBufferedWithChecksum (
IOStatus WritableFileWriter : : WriteBufferedWithChecksum (
const char * data , size_t size , Env : : IOPriority op_rate_limiter_priority ) {
const char * data , size_t size , Env : : IOPriority op_rate_limiter_priority ) {
if ( seen_error_ ) {
return AssertFalseAndGetStatusForPrevError ( ) ;
}
IOStatus s ;
IOStatus s ;
assert ( ! use_direct_io ( ) ) ;
assert ( ! use_direct_io ( ) ) ;
assert ( perform_data_verification_ & & buffered_data_with_checksum_ ) ;
assert ( perform_data_verification_ & & buffered_data_with_checksum_ ) ;
@ -666,6 +751,7 @@ IOStatus WritableFileWriter::WriteBufferedWithChecksum(
// and let caller determine error handling.
// and let caller determine error handling.
buf_ . Size ( 0 ) ;
buf_ . Size ( 0 ) ;
buffered_data_crc32c_checksum_ = 0 ;
buffered_data_crc32c_checksum_ = 0 ;
seen_error_ = true ;
return s ;
return s ;
}
}
}
}
@ -679,6 +765,9 @@ IOStatus WritableFileWriter::WriteBufferedWithChecksum(
buffered_data_crc32c_checksum_ = 0 ;
buffered_data_crc32c_checksum_ = 0 ;
uint64_t cur_size = flushed_size_ . load ( std : : memory_order_acquire ) ;
uint64_t cur_size = flushed_size_ . load ( std : : memory_order_acquire ) ;
flushed_size_ . store ( cur_size + left , std : : memory_order_release ) ;
flushed_size_ . store ( cur_size + left , std : : memory_order_release ) ;
if ( ! s . ok ( ) ) {
seen_error_ = true ;
}
return s ;
return s ;
}
}
@ -712,6 +801,12 @@ void WritableFileWriter::Crc32cHandoffChecksumCalculation(const char* data,
# ifndef ROCKSDB_LITE
# ifndef ROCKSDB_LITE
IOStatus WritableFileWriter : : WriteDirect (
IOStatus WritableFileWriter : : WriteDirect (
Env : : IOPriority op_rate_limiter_priority ) {
Env : : IOPriority op_rate_limiter_priority ) {
if ( seen_error_ ) {
assert ( false ) ;
return IOStatus : : IOError ( " Writer has previous error. " ) ;
}
assert ( use_direct_io ( ) ) ;
assert ( use_direct_io ( ) ) ;
IOStatus s ;
IOStatus s ;
const size_t alignment = buf_ . Alignment ( ) ;
const size_t alignment = buf_ . Alignment ( ) ;
@ -778,6 +873,7 @@ IOStatus WritableFileWriter::WriteDirect(
}
}
if ( ! s . ok ( ) ) {
if ( ! s . ok ( ) ) {
buf_ . Size ( file_advance + leftover_tail ) ;
buf_ . Size ( file_advance + leftover_tail ) ;
seen_error_ = true ;
return s ;
return s ;
}
}
}
}
@ -801,12 +897,18 @@ IOStatus WritableFileWriter::WriteDirect(
// is a multiple of whole pages otherwise filesize_ is leftover_tail
// is a multiple of whole pages otherwise filesize_ is leftover_tail
// behind
// behind
next_write_offset_ + = file_advance ;
next_write_offset_ + = file_advance ;
} else {
seen_error_ = true ;
}
}
return s ;
return s ;
}
}
IOStatus WritableFileWriter : : WriteDirectWithChecksum (
IOStatus WritableFileWriter : : WriteDirectWithChecksum (
Env : : IOPriority op_rate_limiter_priority ) {
Env : : IOPriority op_rate_limiter_priority ) {
if ( seen_error_ ) {
return AssertFalseAndGetStatusForPrevError ( ) ;
}
assert ( use_direct_io ( ) ) ;
assert ( use_direct_io ( ) ) ;
assert ( perform_data_verification_ & & buffered_data_with_checksum_ ) ;
assert ( perform_data_verification_ & & buffered_data_with_checksum_ ) ;
IOStatus s ;
IOStatus s ;
@ -884,6 +986,7 @@ IOStatus WritableFileWriter::WriteDirectWithChecksum(
buf_ . Size ( file_advance + leftover_tail ) ;
buf_ . Size ( file_advance + leftover_tail ) ;
buffered_data_crc32c_checksum_ =
buffered_data_crc32c_checksum_ =
crc32c : : Value ( buf_ . BufferStart ( ) , buf_ . CurrentSize ( ) ) ;
crc32c : : Value ( buf_ . BufferStart ( ) , buf_ . CurrentSize ( ) ) ;
seen_error_ = true ;
return s ;
return s ;
}
}
}
}
@ -907,6 +1010,8 @@ IOStatus WritableFileWriter::WriteDirectWithChecksum(
// is a multiple of whole pages otherwise filesize_ is leftover_tail
// is a multiple of whole pages otherwise filesize_ is leftover_tail
// behind
// behind
next_write_offset_ + = file_advance ;
next_write_offset_ + = file_advance ;
} else {
seen_error_ = true ;
}
}
return s ;
return s ;
}
}