@ -34,6 +34,7 @@
# include "port/port.h"
# include "port/port.h"
# include "rocksdb/slice.h"
# include "rocksdb/slice.h"
# include "test_util/sync_point.h"
# include "test_util/sync_point.h"
# include "util/autovector.h"
# include "util/coding.h"
# include "util/coding.h"
# include "util/string_util.h"
# include "util/string_util.h"
@ -409,12 +410,22 @@ size_t PosixHelper::GetUniqueIdFromFile(int fd, char* id, size_t max_size) {
*
*
* pread ( ) based random - access
* pread ( ) based random - access
*/
*/
PosixRandomAccessFile : : PosixRandomAccessFile ( const std : : string & fname , int fd ,
PosixRandomAccessFile : : PosixRandomAccessFile (
const EnvOptions & options )
const std : : string & fname , int fd , const EnvOptions & options
# if defined(ROCKSDB_IOURING_PRESENT)
,
ThreadLocalPtr * thread_local_io_urings
# endif
)
: filename_ ( fname ) ,
: filename_ ( fname ) ,
fd_ ( fd ) ,
fd_ ( fd ) ,
use_direct_io_ ( options . use_direct_reads ) ,
use_direct_io_ ( options . use_direct_reads ) ,
logical_sector_size_ ( GetLogicalBufferSize ( fd_ ) ) {
logical_sector_size_ ( GetLogicalBufferSize ( fd_ ) )
# if defined(ROCKSDB_IOURING_PRESENT)
,
thread_local_io_urings_ ( thread_local_io_urings )
# endif
{
assert ( ! options . use_direct_reads | | ! options . use_mmap_reads ) ;
assert ( ! options . use_direct_reads | | ! options . use_mmap_reads ) ;
assert ( ! options . use_mmap_reads | | sizeof ( void * ) < 8 ) ;
assert ( ! options . use_mmap_reads | | sizeof ( void * ) < 8 ) ;
}
}
@ -460,6 +471,96 @@ Status PosixRandomAccessFile::Read(uint64_t offset, size_t n, Slice* result,
return s ;
return s ;
}
}
Status PosixRandomAccessFile : : MultiRead ( ReadRequest * reqs , size_t num_reqs ) {
# if defined(ROCKSDB_IOURING_PRESENT)
size_t reqs_off ;
ssize_t ret __attribute__ ( ( __unused__ ) ) ;
struct io_uring * iu = nullptr ;
if ( thread_local_io_urings_ ) {
iu = static_cast < struct io_uring * > ( thread_local_io_urings_ - > Get ( ) ) ;
if ( iu = = nullptr ) {
iu = CreateIOUring ( ) ;
if ( iu ! = nullptr ) {
thread_local_io_urings_ - > Reset ( iu ) ;
}
}
}
// Init failed, platform doesn't support io_uring. Fall back to
// serialized reads
if ( iu = = nullptr ) {
return RandomAccessFile : : MultiRead ( reqs , num_reqs ) ;
}
struct WrappedReadRequest {
ReadRequest * req ;
struct iovec iov ;
explicit WrappedReadRequest ( ReadRequest * r ) : req ( r ) { }
} ;
autovector < WrappedReadRequest , 32 > req_wraps ;
for ( size_t i = 0 ; i < num_reqs ; i + + ) {
req_wraps . emplace_back ( & reqs [ i ] ) ;
}
reqs_off = 0 ;
while ( num_reqs ) {
size_t this_reqs = num_reqs ;
// If requests exceed depth, split it into batches
if ( this_reqs > kIoUringDepth ) this_reqs = kIoUringDepth ;
for ( size_t i = 0 ; i < this_reqs ; i + + ) {
size_t index = i + reqs_off ;
struct io_uring_sqe * sqe ;
sqe = io_uring_get_sqe ( iu ) ;
req_wraps [ index ] . iov . iov_base = reqs [ index ] . scratch ;
req_wraps [ index ] . iov . iov_len = reqs [ index ] . len ;
reqs [ index ] . result = reqs [ index ] . scratch ;
io_uring_prep_readv ( sqe , fd_ , & req_wraps [ index ] . iov , 1 ,
reqs [ index ] . offset ) ;
io_uring_sqe_set_data ( sqe , & req_wraps [ index ] ) ;
}
ret = io_uring_submit_and_wait ( iu , static_cast < unsigned int > ( this_reqs ) ) ;
if ( static_cast < size_t > ( ret ) ! = this_reqs ) {
fprintf ( stderr , " ret = %ld this_reqs: %ld \n " , ( long ) ret , ( long ) this_reqs ) ;
}
assert ( static_cast < size_t > ( ret ) = = this_reqs ) ;
for ( size_t i = 0 ; i < this_reqs ; i + + ) {
struct io_uring_cqe * cqe ;
WrappedReadRequest * req_wrap ;
// We could use the peek variant here, but this seems safer in terms
// of our initial wait not reaping all completions
ret = io_uring_wait_cqe ( iu , & cqe ) ;
assert ( ! ret ) ;
req_wrap = static_cast < WrappedReadRequest * > ( io_uring_cqe_get_data ( cqe ) ) ;
ReadRequest * req = req_wrap - > req ;
if ( static_cast < size_t > ( cqe - > res ) = = req_wrap - > iov . iov_len ) {
req - > result = Slice ( req - > scratch , cqe - > res ) ;
req - > status = Status : : OK ( ) ;
} else if ( cqe - > res > = 0 ) {
req - > result = Slice ( req - > scratch , req_wrap - > iov . iov_len - cqe - > res ) ;
} else {
req - > result = Slice ( req - > scratch , 0 ) ;
req - > status = IOError ( " Req failed " , filename_ , cqe - > res ) ;
}
io_uring_cqe_seen ( iu , cqe ) ;
}
num_reqs - = this_reqs ;
reqs_off + = this_reqs ;
}
return Status : : OK ( ) ;
# else
return RandomAccessFile : : MultiRead ( reqs , num_reqs ) ;
# endif
}
Status PosixRandomAccessFile : : Prefetch ( uint64_t offset , size_t n ) {
Status PosixRandomAccessFile : : Prefetch ( uint64_t offset , size_t n ) {
Status s ;
Status s ;
if ( ! use_direct_io ( ) ) {
if ( ! use_direct_io ( ) ) {