PlainTable should avoid copying Get() results from immortal source. (#4924)

Summary:
https://github.com/facebook/rocksdb/pull/4053 avoids memcopy for Get() results if files are immortable
(read-only DB, max_open_files=-1) and the file is ammaped. The same optimization is being applied to PlainTable
here.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4924

Differential Revision: D13827749

Pulled By: siying

fbshipit-source-id: 1f2cbfc530b40ce08ccd53f95f6e78de4d1c2f96
main
Siying Dong 6 years ago committed by Facebook Github Bot
parent e1de88c8c7
commit f184bee77b
  1. 52
      db/plain_table_db_test.cc
  2. 2
      table/get_context.cc
  3. 3
      table/plain_table_factory.cc
  4. 10
      table/plain_table_reader.cc
  5. 3
      table/plain_table_reader.h

@ -158,6 +158,8 @@ class PlainTableDBTest : public testing::Test,
db_ = nullptr;
}
bool mmap_mode() const { return mmap_mode_; }
void DestroyAndReopen(Options* options = nullptr) {
//Destroy using last options
Destroy(&last_options_);
@ -174,6 +176,12 @@ class PlainTableDBTest : public testing::Test,
return DB::Open(*options, dbname_, db);
}
Status ReopenForReadOnly(Options* options) {
delete db_;
db_ = nullptr;
return DB::OpenForReadOnly(*options, dbname_, &db_);
}
Status TryReopen(Options* options = nullptr) {
delete db_;
db_ = nullptr;
@ -547,6 +555,50 @@ TEST_P(PlainTableDBTest, Flush2) {
}
}
TEST_P(PlainTableDBTest, Immortable) {
for (EncodingType encoding_type : {kPlain, kPrefix}) {
Options options = CurrentOptions();
options.create_if_missing = true;
options.max_open_files = -1;
// Set only one bucket to force bucket conflict.
// Test index interval for the same prefix to be 1, 2 and 4
PlainTableOptions plain_table_options;
plain_table_options.hash_table_ratio = 0.75;
plain_table_options.index_sparseness = 16;
plain_table_options.user_key_len = kPlainTableVariableLength;
plain_table_options.bloom_bits_per_key = 10;
plain_table_options.encoding_type = encoding_type;
options.table_factory.reset(NewPlainTableFactory(plain_table_options));
DestroyAndReopen(&options);
ASSERT_OK(Put("0000000000000bar", "b"));
ASSERT_OK(Put("1000000000000foo", "v1"));
dbfull()->TEST_FlushMemTable();
int copied = 0;
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"GetContext::SaveValue::PinSelf", [&](void* /*arg*/) { copied++; });
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
ASSERT_EQ("b", Get("0000000000000bar"));
ASSERT_EQ("v1", Get("1000000000000foo"));
ASSERT_EQ(2, copied);
copied = 0;
Close();
ASSERT_OK(ReopenForReadOnly(&options));
ASSERT_EQ("b", Get("0000000000000bar"));
ASSERT_EQ("v1", Get("1000000000000foo"));
ASSERT_EQ("NOT_FOUND", Get("1000000000000bar"));
if (mmap_mode()) {
ASSERT_EQ(0, copied);
} else {
ASSERT_EQ(2, copied);
}
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
}
}
TEST_P(PlainTableDBTest, Iterator) {
for (size_t huge_page_tlb_size = 0; huge_page_tlb_size <= 2 * 1024 * 1024;
huge_page_tlb_size += 2 * 1024 * 1024) {

@ -221,6 +221,8 @@ bool GetContext::SaveValue(const ParsedInternalKey& parsed_key,
// If the backing resources for the value are provided, pin them
pinnable_val_->PinSlice(value, value_pinner);
} else {
TEST_SYNC_POINT_CALLBACK("GetContext::SaveValue::PinSelf", this);
// Otherwise copy the value
pinnable_val_->PinSelf(value);
}

@ -27,7 +27,8 @@ Status PlainTableFactory::NewTableReader(
table_reader_options.internal_comparator, std::move(file), file_size,
table, table_options_.bloom_bits_per_key, table_options_.hash_table_ratio,
table_options_.index_sparseness, table_options_.huge_page_tlb_size,
table_options_.full_scan_mode, table_reader_options.prefix_extractor);
table_options_.full_scan_mode, table_reader_options.immortal,
table_reader_options.prefix_extractor);
}
TableBuilder* PlainTableFactory::NewTableBuilder(

@ -120,7 +120,8 @@ Status PlainTableReader::Open(
std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
std::unique_ptr<TableReader>* table_reader, const int bloom_bits_per_key,
double hash_table_ratio, size_t index_sparseness, size_t huge_page_tlb_size,
bool full_scan_mode, const SliceTransform* prefix_extractor) {
bool full_scan_mode, const bool immortal_table,
const SliceTransform* prefix_extractor) {
if (file_size > PlainTableIndex::kMaxFileSize) {
return Status::NotSupported("File is too large for PlainTableReader!");
}
@ -181,6 +182,10 @@ Status PlainTableReader::Open(
new_reader->full_scan_mode_ = true;
}
if (immortal_table && new_reader->file_info_.is_mmap_mode) {
new_reader->dummy_cleanable_.reset(new Cleanable());
}
*table_reader = std::move(new_reader);
return s;
}
@ -598,7 +603,8 @@ Status PlainTableReader::Get(const ReadOptions& /*ro*/, const Slice& target,
// can we enable the fast path?
if (internal_comparator_.Compare(found_key, parsed_target) >= 0) {
bool dont_care __attribute__((__unused__));
if (!get_context->SaveValue(found_key, found_value, &dont_care)) {
if (!get_context->SaveValue(found_key, found_value, &dont_care,
dummy_cleanable_.get())) {
break;
}
}

@ -75,7 +75,7 @@ class PlainTableReader: public TableReader {
uint64_t file_size, std::unique_ptr<TableReader>* table,
const int bloom_bits_per_key, double hash_table_ratio,
size_t index_sparseness, size_t huge_page_tlb_size,
bool full_scan_mode,
bool full_scan_mode, const bool immortal_table = false,
const SliceTransform* prefix_extractor = nullptr);
InternalIterator* NewIterator(const ReadOptions&,
@ -157,6 +157,7 @@ class PlainTableReader: public TableReader {
CacheAllocationPtr bloom_block_alloc_;
const ImmutableCFOptions& ioptions_;
std::unique_ptr<Cleanable> dummy_cleanable_;
uint64_t file_size_;
std::shared_ptr<const TableProperties> table_properties_;

Loading…
Cancel
Save