|
|
|
@ -277,25 +277,12 @@ Status TableCache::GetRangeTombstoneIterator( |
|
|
|
|
return s; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
Status TableCache::Get(const ReadOptions& options, |
|
|
|
|
const InternalKeyComparator& internal_comparator, |
|
|
|
|
const FileMetaData& file_meta, const Slice& k, |
|
|
|
|
GetContext* get_context, |
|
|
|
|
const SliceTransform* prefix_extractor, |
|
|
|
|
HistogramImpl* file_read_hist, bool skip_filters, |
|
|
|
|
int level) { |
|
|
|
|
auto& fd = file_meta.fd; |
|
|
|
|
std::string* row_cache_entry = nullptr; |
|
|
|
|
bool done = false; |
|
|
|
|
#ifndef ROCKSDB_LITE |
|
|
|
|
IterKey row_cache_key; |
|
|
|
|
std::string row_cache_entry_buffer; |
|
|
|
|
|
|
|
|
|
// Check row cache if enabled. Since row cache does not currently store
|
|
|
|
|
// sequence numbers, we cannot use it if we need to fetch the sequence.
|
|
|
|
|
if (ioptions_.row_cache && !get_context->NeedToReadSequence()) { |
|
|
|
|
void TableCache::CreateRowCacheKeyPrefix( |
|
|
|
|
const ReadOptions& options, |
|
|
|
|
const FileDescriptor& fd, const Slice& internal_key, |
|
|
|
|
GetContext* get_context, IterKey& row_cache_key) { |
|
|
|
|
uint64_t fd_number = fd.GetNumber(); |
|
|
|
|
auto user_key = ExtractUserKey(k); |
|
|
|
|
// We use the user key as cache key instead of the internal key,
|
|
|
|
|
// otherwise the whole cache would be invalidated every time the
|
|
|
|
|
// sequence key increases. However, to support caching snapshot
|
|
|
|
@ -316,7 +303,7 @@ Status TableCache::Get(const ReadOptions& options, |
|
|
|
|
// We should consider to use options.snapshot->GetSequenceNumber()
|
|
|
|
|
// instead of GetInternalKeySeqno(k), which will make the code
|
|
|
|
|
// easier to understand.
|
|
|
|
|
seq_no = 1 + GetInternalKeySeqno(k); |
|
|
|
|
seq_no = 1 + GetInternalKeySeqno(internal_key); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Compute row cache key.
|
|
|
|
@ -324,9 +311,15 @@ Status TableCache::Get(const ReadOptions& options, |
|
|
|
|
row_cache_id_.size()); |
|
|
|
|
AppendVarint64(&row_cache_key, fd_number); |
|
|
|
|
AppendVarint64(&row_cache_key, seq_no); |
|
|
|
|
row_cache_key.TrimAppend(row_cache_key.Size(), user_key.data(), |
|
|
|
|
user_key.size()); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
bool TableCache::GetFromRowCache( |
|
|
|
|
const Slice& user_key, IterKey& row_cache_key, |
|
|
|
|
size_t prefix_size, GetContext* get_context) { |
|
|
|
|
bool found = false; |
|
|
|
|
|
|
|
|
|
row_cache_key.TrimAppend(prefix_size, user_key.data(), |
|
|
|
|
user_key.size()); |
|
|
|
|
if (auto row_handle = |
|
|
|
|
ioptions_.row_cache->Lookup(row_cache_key.GetUserKey())) { |
|
|
|
|
// Cleanable routine to release the cache entry
|
|
|
|
@ -350,10 +343,36 @@ Status TableCache::Get(const ReadOptions& options, |
|
|
|
|
replayGetContextLog(*found_row_cache_entry, user_key, get_context, |
|
|
|
|
&value_pinner); |
|
|
|
|
RecordTick(ioptions_.statistics, ROW_CACHE_HIT); |
|
|
|
|
done = true; |
|
|
|
|
found = true; |
|
|
|
|
} else { |
|
|
|
|
// Not found, setting up the replay log.
|
|
|
|
|
RecordTick(ioptions_.statistics, ROW_CACHE_MISS); |
|
|
|
|
} |
|
|
|
|
return found; |
|
|
|
|
} |
|
|
|
|
#endif // ROCKSDB_LITE
|
|
|
|
|
|
|
|
|
|
Status TableCache::Get(const ReadOptions& options, |
|
|
|
|
const InternalKeyComparator& internal_comparator, |
|
|
|
|
const FileMetaData& file_meta, const Slice& k, |
|
|
|
|
GetContext* get_context, |
|
|
|
|
const SliceTransform* prefix_extractor, |
|
|
|
|
HistogramImpl* file_read_hist, bool skip_filters, |
|
|
|
|
int level) { |
|
|
|
|
auto& fd = file_meta.fd; |
|
|
|
|
std::string* row_cache_entry = nullptr; |
|
|
|
|
bool done = false; |
|
|
|
|
#ifndef ROCKSDB_LITE |
|
|
|
|
IterKey row_cache_key; |
|
|
|
|
std::string row_cache_entry_buffer; |
|
|
|
|
|
|
|
|
|
// Check row cache if enabled. Since row cache does not currently store
|
|
|
|
|
// sequence numbers, we cannot use it if we need to fetch the sequence.
|
|
|
|
|
if (ioptions_.row_cache && !get_context->NeedToReadSequence()) { |
|
|
|
|
auto user_key = ExtractUserKey(k); |
|
|
|
|
CreateRowCacheKeyPrefix(options, fd, k, get_context, row_cache_key); |
|
|
|
|
done = GetFromRowCache(user_key, row_cache_key, row_cache_key.Size(), |
|
|
|
|
get_context); |
|
|
|
|
if (!done) { |
|
|
|
|
row_cache_entry = &row_cache_entry_buffer; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
@ -413,8 +432,6 @@ Status TableCache::Get(const ReadOptions& options, |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Batched version of TableCache::MultiGet.
|
|
|
|
|
// TODO: Add support for row cache. As of now, this ignores the row cache
|
|
|
|
|
// and directly looks up in the table files
|
|
|
|
|
Status TableCache::MultiGet(const ReadOptions& options, |
|
|
|
|
const InternalKeyComparator& internal_comparator, |
|
|
|
|
const FileMetaData& file_meta, |
|
|
|
@ -426,7 +443,41 @@ Status TableCache::MultiGet(const ReadOptions& options, |
|
|
|
|
Status s; |
|
|
|
|
TableReader* t = fd.table_reader; |
|
|
|
|
Cache::Handle* handle = nullptr; |
|
|
|
|
if (s.ok()) { |
|
|
|
|
MultiGetRange table_range(*mget_range, mget_range->begin(), mget_range->end()); |
|
|
|
|
#ifndef ROCKSDB_LITE |
|
|
|
|
autovector<std::string, MultiGetContext::MAX_BATCH_SIZE> row_cache_entries; |
|
|
|
|
IterKey row_cache_key; |
|
|
|
|
size_t row_cache_key_prefix_size = 0; |
|
|
|
|
KeyContext& first_key = *table_range.begin(); |
|
|
|
|
bool lookup_row_cache = ioptions_.row_cache && |
|
|
|
|
!first_key.get_context->NeedToReadSequence(); |
|
|
|
|
|
|
|
|
|
// Check row cache if enabled. Since row cache does not currently store
|
|
|
|
|
// sequence numbers, we cannot use it if we need to fetch the sequence.
|
|
|
|
|
if (lookup_row_cache) { |
|
|
|
|
GetContext* first_context = first_key.get_context; |
|
|
|
|
CreateRowCacheKeyPrefix(options, fd, first_key.ikey, first_context, |
|
|
|
|
row_cache_key); |
|
|
|
|
row_cache_key_prefix_size = row_cache_key.Size(); |
|
|
|
|
|
|
|
|
|
for (auto miter = table_range.begin(); miter != table_range.end(); ++miter) { |
|
|
|
|
const Slice& user_key = miter->ukey;; |
|
|
|
|
GetContext* get_context = miter->get_context; |
|
|
|
|
|
|
|
|
|
if (GetFromRowCache(user_key, row_cache_key, row_cache_key_prefix_size, |
|
|
|
|
get_context)) { |
|
|
|
|
table_range.SkipKey(miter); |
|
|
|
|
} else { |
|
|
|
|
row_cache_entries.emplace_back(); |
|
|
|
|
get_context->SetReplayLog(&(row_cache_entries.back())); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
#endif // ROCKSDB_LITE
|
|
|
|
|
|
|
|
|
|
// Check that table_range is not empty. Its possible all keys may have been
|
|
|
|
|
// found in the row cache and thus the range may now be empty
|
|
|
|
|
if (s.ok() && !table_range.empty()) { |
|
|
|
|
if (t == nullptr) { |
|
|
|
|
s = FindTable( |
|
|
|
|
env_options_, internal_comparator, fd, &handle, prefix_extractor, |
|
|
|
@ -441,21 +492,20 @@ Status TableCache::MultiGet(const ReadOptions& options, |
|
|
|
|
std::unique_ptr<FragmentedRangeTombstoneIterator> range_del_iter( |
|
|
|
|
t->NewRangeTombstoneIterator(options)); |
|
|
|
|
if (range_del_iter != nullptr) { |
|
|
|
|
for (auto iter = mget_range->begin(); iter != mget_range->end(); |
|
|
|
|
for (auto iter = table_range.begin(); iter != table_range.end(); |
|
|
|
|
++iter) { |
|
|
|
|
const Slice& k = iter->ikey; |
|
|
|
|
SequenceNumber* max_covering_tombstone_seq = |
|
|
|
|
iter->get_context->max_covering_tombstone_seq(); |
|
|
|
|
*max_covering_tombstone_seq = std::max( |
|
|
|
|
*max_covering_tombstone_seq, |
|
|
|
|
range_del_iter->MaxCoveringTombstoneSeqnum(ExtractUserKey(k))); |
|
|
|
|
*max_covering_tombstone_seq = |
|
|
|
|
std::max(*max_covering_tombstone_seq, |
|
|
|
|
range_del_iter->MaxCoveringTombstoneSeqnum(iter->ukey)); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
if (s.ok()) { |
|
|
|
|
t->MultiGet(options, mget_range, prefix_extractor, skip_filters); |
|
|
|
|
t->MultiGet(options, &table_range, prefix_extractor, skip_filters); |
|
|
|
|
} else if (options.read_tier == kBlockCacheTier && s.IsIncomplete()) { |
|
|
|
|
for (auto iter = mget_range->begin(); iter != mget_range->end(); ++iter) { |
|
|
|
|
for (auto iter = table_range.begin(); iter != table_range.end(); ++iter) { |
|
|
|
|
Status* status = iter->s; |
|
|
|
|
if (status->IsIncomplete()) { |
|
|
|
|
// Couldn't find Table in cache but treat as kFound if no_io set
|
|
|
|
@ -466,6 +516,31 @@ Status TableCache::MultiGet(const ReadOptions& options, |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#ifndef ROCKSDB_LITE |
|
|
|
|
if (lookup_row_cache) { |
|
|
|
|
size_t row_idx = 0; |
|
|
|
|
|
|
|
|
|
for (auto miter = table_range.begin(); miter != table_range.end(); ++miter) { |
|
|
|
|
std::string& row_cache_entry = row_cache_entries[row_idx++]; |
|
|
|
|
const Slice& user_key = miter->ukey;; |
|
|
|
|
GetContext* get_context = miter->get_context; |
|
|
|
|
|
|
|
|
|
get_context->SetReplayLog(nullptr); |
|
|
|
|
// Compute row cache key.
|
|
|
|
|
row_cache_key.TrimAppend(row_cache_key_prefix_size, user_key.data(), |
|
|
|
|
user_key.size()); |
|
|
|
|
// Put the replay log in row cache only if something was found.
|
|
|
|
|
if (s.ok() && !row_cache_entry.empty()) { |
|
|
|
|
size_t charge = |
|
|
|
|
row_cache_key.Size() + row_cache_entry.size() + sizeof(std::string); |
|
|
|
|
void* row_ptr = new std::string(std::move(row_cache_entry)); |
|
|
|
|
ioptions_.row_cache->Insert(row_cache_key.GetUserKey(), row_ptr, charge, |
|
|
|
|
&DeleteEntry<std::string>); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
#endif // ROCKSDB_LITE
|
|
|
|
|
|
|
|
|
|
if (handle != nullptr) { |
|
|
|
|
ReleaseHandle(handle); |
|
|
|
|
} |
|
|
|
|