@ -9,6 +9,7 @@
# include "db/table_cache.h"
# include "db/table_cache.h"
# include "db/dbformat.h"
# include "db/filename.h"
# include "db/filename.h"
# include "db/version_edit.h"
# include "db/version_edit.h"
@ -21,9 +22,12 @@
namespace rocksdb {
namespace rocksdb {
namespace {
template < class T >
static void DeleteEntry ( const Slice & key , void * value ) {
static void DeleteEntry ( const Slice & key , void * value ) {
TableReader * table_reader = reinterpret_cast < TableReader * > ( value ) ;
T * typed_value = reinterpret_cast < T * > ( value ) ;
delete table_reader ;
delete typed_value ;
}
}
static void UnrefEntry ( void * arg1 , void * arg2 ) {
static void UnrefEntry ( void * arg1 , void * arg2 ) {
@ -37,11 +41,27 @@ static Slice GetSliceForFileNumber(const uint64_t* file_number) {
sizeof ( * file_number ) ) ;
sizeof ( * file_number ) ) ;
}
}
# ifndef ROCKSDB_LITE
void AppendVarint64 ( IterKey * key , uint64_t v ) {
char buf [ 10 ] ;
auto ptr = EncodeVarint64 ( buf , v ) ;
key - > TrimAppend ( key - > Size ( ) , buf , ptr - buf ) ;
}
# endif // ROCKSDB_LITE
} // namespace
TableCache : : TableCache ( const ImmutableCFOptions & ioptions ,
TableCache : : TableCache ( const ImmutableCFOptions & ioptions ,
const EnvOptions & env_options , Cache * const cache )
const EnvOptions & env_options , Cache * const cache )
: ioptions_ ( ioptions ) ,
: ioptions_ ( ioptions ) , env_options_ ( env_options ) , cache_ ( cache ) {
env_options_ ( env_options ) ,
if ( ioptions_ . row_cache ) {
cache_ ( cache ) { }
// If the same cache is shared by multiple instances, we need to
// disambiguate its entries.
PutVarint64 ( & row_cache_id_ , ioptions_ . row_cache - > NewId ( ) ) ;
}
}
TableCache : : ~ TableCache ( ) {
TableCache : : ~ TableCache ( ) {
}
}
@ -88,7 +108,8 @@ Status TableCache::FindTable(const EnvOptions& env_options,
// We do not cache error results so that if the error is transient,
// We do not cache error results so that if the error is transient,
// or somebody repairs the file, we recover automatically.
// or somebody repairs the file, we recover automatically.
} else {
} else {
* handle = cache_ - > Insert ( key , table_reader . release ( ) , 1 , & DeleteEntry ) ;
* handle = cache_ - > Insert ( key , table_reader . release ( ) , 1 ,
& DeleteEntry < TableReader > ) ;
}
}
}
}
return s ;
return s ;
@ -137,6 +158,46 @@ Status TableCache::Get(const ReadOptions& options,
TableReader * t = fd . table_reader ;
TableReader * t = fd . table_reader ;
Status s ;
Status s ;
Cache : : Handle * handle = nullptr ;
Cache : : Handle * handle = nullptr ;
std : : string * row_cache_entry = nullptr ;
# ifndef ROCKSDB_LITE
IterKey row_cache_key ;
std : : string row_cache_entry_buffer ;
if ( ioptions_ . row_cache ) {
uint64_t fd_number = fd . GetNumber ( ) ;
auto user_key = ExtractUserKey ( k ) ;
// We use the user key as cache key instead of the internal key,
// otherwise the whole cache would be invalidated every time the
// sequence key increases. However, to support caching snapshot
// reads, we append the sequence number (incremented by 1 to
// distinguish from 0) only in this case.
uint64_t seq_no =
options . snapshot = = nullptr ? 0 : 1 + GetInternalKeySeqno ( k ) ;
// Compute row cache key.
row_cache_key . TrimAppend ( row_cache_key . Size ( ) , row_cache_id_ . data ( ) ,
row_cache_id_ . size ( ) ) ;
AppendVarint64 ( & row_cache_key , fd_number ) ;
AppendVarint64 ( & row_cache_key , seq_no ) ;
row_cache_key . TrimAppend ( row_cache_key . Size ( ) , user_key . data ( ) ,
user_key . size ( ) ) ;
if ( auto row_handle = ioptions_ . row_cache - > Lookup ( row_cache_key . GetKey ( ) ) ) {
auto found_row_cache_entry = static_cast < const std : : string * > (
ioptions_ . row_cache - > Value ( row_handle ) ) ;
replayGetContextLog ( * found_row_cache_entry , user_key , get_context ) ;
ioptions_ . row_cache - > Release ( row_handle ) ;
RecordTick ( ioptions_ . statistics , ROW_CACHE_HIT ) ;
return Status : : OK ( ) ;
}
// Not found, setting up the replay log.
RecordTick ( ioptions_ . statistics , ROW_CACHE_MISS ) ;
row_cache_entry = & row_cache_entry_buffer ;
}
# endif // ROCKSDB_LITE
if ( ! t ) {
if ( ! t ) {
s = FindTable ( env_options_ , internal_comparator , fd , & handle ,
s = FindTable ( env_options_ , internal_comparator , fd , & handle ,
options . read_tier = = kBlockCacheTier ) ;
options . read_tier = = kBlockCacheTier ) ;
@ -145,15 +206,30 @@ Status TableCache::Get(const ReadOptions& options,
}
}
}
}
if ( s . ok ( ) ) {
if ( s . ok ( ) ) {
get_context - > SetReplayLog ( row_cache_entry ) ; // nullptr if no cache.
s = t - > Get ( options , k , get_context ) ;
s = t - > Get ( options , k , get_context ) ;
get_context - > SetReplayLog ( nullptr ) ;
if ( handle ! = nullptr ) {
if ( handle ! = nullptr ) {
ReleaseHandle ( handle ) ;
ReleaseHandle ( handle ) ;
}
}
} else if ( options . read_tier & & s . IsIncomplete ( ) ) {
} else if ( options . read_tier & & s . IsIncomplete ( ) ) {
// Couldnt find Table in cache but treat as kFound if no_io set
// Couldn' t find Table in cache but treat as kFound if no_io set
get_context - > MarkKeyMayExist ( ) ;
get_context - > MarkKeyMayExist ( ) ;
return Status : : OK ( ) ;
return Status : : OK ( ) ;
}
}
# ifndef ROCKSDB_LITE
// Put the replay log in row cache only if something was found.
if ( s . ok ( ) & & row_cache_entry & & ! row_cache_entry - > empty ( ) ) {
size_t charge =
row_cache_key . Size ( ) + row_cache_entry - > size ( ) + sizeof ( std : : string ) ;
void * row_ptr = new std : : string ( std : : move ( * row_cache_entry ) ) ;
auto row_handle = ioptions_ . row_cache - > Insert (
row_cache_key . GetKey ( ) , row_ptr , charge , & DeleteEntry < std : : string > ) ;
ioptions_ . row_cache - > Release ( row_handle ) ;
}
# endif // ROCKSDB_LITE
return s ;
return s ;
}
}