plain table reader: avoid re-read the same position for index and data in non-mmap mode

Summary: In non-mmap mode, plain table reader can issue two pread() for index checking and reading the actual data, although it's for the same location. By reusing the key decoder, we reuse the buffer used for the two to avoid it.

Test Plan: Run unit tests. Run table_reader_bench and see from strace the repeat read cases to disappear.

Reviewers: anthony, yhchiang, rven, kradhakrishnan, IslamAbdelRahman

Reviewed By: IslamAbdelRahman

Subscribers: leveldb, dhruba

Differential Revision: https://reviews.facebook.net/D50949
main
sdong 9 years ago
parent d5239f8709
commit 9d0b8f19d9
  1. 22
      table/plain_table_reader.cc
  2. 6
      table/plain_table_reader.h
  3. 9
      table/table_reader_bench.cc

@ -409,7 +409,8 @@ Status PlainTableReader::PopulateIndex(TableProperties* props,
return Status::OK(); return Status::OK();
} }
Status PlainTableReader::GetOffset(const Slice& target, const Slice& prefix, Status PlainTableReader::GetOffset(PlainTableKeyDecoder* decoder,
const Slice& target, const Slice& prefix,
uint32_t prefix_hash, bool& prefix_matched, uint32_t prefix_hash, bool& prefix_matched,
uint32_t* offset) const { uint32_t* offset) const {
prefix_matched = false; prefix_matched = false;
@ -435,15 +436,12 @@ Status PlainTableReader::GetOffset(const Slice& target, const Slice& prefix,
return Status::Corruption(Slice()); return Status::Corruption(Slice());
} }
PlainTableKeyDecoder decoder(&file_info_, encoding_type_, user_key_len_,
ioptions_.prefix_extractor);
// The key is between [low, high). Do a binary search between it. // The key is between [low, high). Do a binary search between it.
while (high - low > 1) { while (high - low > 1) {
uint32_t mid = (high + low) / 2; uint32_t mid = (high + low) / 2;
uint32_t file_offset = GetFixed32Element(base_ptr, mid); uint32_t file_offset = GetFixed32Element(base_ptr, mid);
uint32_t tmp; uint32_t tmp;
Status s = decoder.NextKeyNoValue(file_offset, &mid_key, nullptr, &tmp); Status s = decoder->NextKeyNoValue(file_offset, &mid_key, nullptr, &tmp);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
@ -468,7 +466,7 @@ Status PlainTableReader::GetOffset(const Slice& target, const Slice& prefix,
ParsedInternalKey low_key; ParsedInternalKey low_key;
uint32_t tmp; uint32_t tmp;
uint32_t low_key_offset = GetFixed32Element(base_ptr, low); uint32_t low_key_offset = GetFixed32Element(base_ptr, low);
Status s = decoder.NextKeyNoValue(low_key_offset, &low_key, nullptr, &tmp); Status s = decoder->NextKeyNoValue(low_key_offset, &low_key, nullptr, &tmp);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
@ -559,8 +557,10 @@ Status PlainTableReader::Get(const ReadOptions& ro, const Slice& target,
} }
uint32_t offset; uint32_t offset;
bool prefix_match; bool prefix_match;
Status s = PlainTableKeyDecoder decoder(&file_info_, encoding_type_, user_key_len_,
GetOffset(target, prefix_slice, prefix_hash, prefix_match, &offset); ioptions_.prefix_extractor);
Status s = GetOffset(&decoder, target, prefix_slice, prefix_hash,
prefix_match, &offset);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
@ -571,8 +571,6 @@ Status PlainTableReader::Get(const ReadOptions& ro, const Slice& target,
return Status::Corruption(Slice()); return Status::Corruption(Slice());
} }
Slice found_value; Slice found_value;
PlainTableKeyDecoder decoder(&file_info_, encoding_type_, user_key_len_,
ioptions_.prefix_extractor);
while (offset < file_info_.data_end_offset) { while (offset < file_info_.data_end_offset) {
s = Next(&decoder, &offset, &found_key, nullptr, &found_value); s = Next(&decoder, &offset, &found_key, nullptr, &found_value);
if (!s.ok()) { if (!s.ok()) {
@ -662,8 +660,8 @@ void PlainTableIterator::Seek(const Slice& target) {
} }
} }
bool prefix_match; bool prefix_match;
status_ = table_->GetOffset(target, prefix_slice, prefix_hash, prefix_match, status_ = table_->GetOffset(&decoder_, target, prefix_slice, prefix_hash,
&next_offset_); prefix_match, &next_offset_);
if (!status_.ok()) { if (!status_.ok()) {
offset_ = next_offset_ = table_->file_info_.data_end_offset; offset_ = next_offset_ = table_->file_info_.data_end_offset;
return; return;

@ -219,9 +219,9 @@ class PlainTableReader: public TableReader {
// Get file offset for key target. // Get file offset for key target.
// return value prefix_matched is set to true if the offset is confirmed // return value prefix_matched is set to true if the offset is confirmed
// for a key with the same prefix as target. // for a key with the same prefix as target.
Status GetOffset(const Slice& target, const Slice& prefix, Status GetOffset(PlainTableKeyDecoder* decoder, const Slice& target,
uint32_t prefix_hash, bool& prefix_matched, const Slice& prefix, uint32_t prefix_hash,
uint32_t* offset) const; bool& prefix_matched, uint32_t* offset) const;
bool IsTotalOrderMode() const { return (prefix_extractor_ == nullptr); } bool IsTotalOrderMode() const { return (prefix_extractor_ == nullptr); }

@ -258,6 +258,7 @@ DEFINE_bool(iterator, false, "For test iterator");
DEFINE_bool(through_db, false, "If enable, a DB instance will be created and " DEFINE_bool(through_db, false, "If enable, a DB instance will be created and "
"the query will be against DB. Otherwise, will be directly against " "the query will be against DB. Otherwise, will be directly against "
"a table reader."); "a table reader.");
DEFINE_bool(mmap_read, true, "Whether use mmap read");
DEFINE_string(table_factory, "block_based", DEFINE_string(table_factory, "block_based",
"Table factory to use: `block_based` (default), `plain_table` or " "Table factory to use: `block_based` (default), `plain_table` or "
"`cuckoo_hash`."); "`cuckoo_hash`.");
@ -283,8 +284,8 @@ int main(int argc, char** argv) {
if (FLAGS_table_factory == "cuckoo_hash") { if (FLAGS_table_factory == "cuckoo_hash") {
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
options.allow_mmap_reads = true; options.allow_mmap_reads = FLAGS_mmap_read;
env_options.use_mmap_reads = true; env_options.use_mmap_reads = FLAGS_mmap_read;
rocksdb::CuckooTableOptions table_options; rocksdb::CuckooTableOptions table_options;
table_options.hash_table_ratio = 0.75; table_options.hash_table_ratio = 0.75;
tf.reset(rocksdb::NewCuckooTableFactory(table_options)); tf.reset(rocksdb::NewCuckooTableFactory(table_options));
@ -294,8 +295,8 @@ int main(int argc, char** argv) {
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE
} else if (FLAGS_table_factory == "plain_table") { } else if (FLAGS_table_factory == "plain_table") {
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
options.allow_mmap_reads = true; options.allow_mmap_reads = FLAGS_mmap_read;
env_options.use_mmap_reads = true; env_options.use_mmap_reads = FLAGS_mmap_read;
rocksdb::PlainTableOptions plain_table_options; rocksdb::PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 16; plain_table_options.user_key_len = 16;

Loading…
Cancel
Save