Add functionality to pre-fetch blocks specified by a key range to BlockBasedTable implementation.

Summary:
Pre-fetching is a common operation performed by data stores for
disk/flash based systems as part of database startup.

This is part of task 5197184.

Test Plan: Run the newly added unit test

Reviewers: rven, igor, sdong

Reviewed By: sdong

Subscribers: dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D33933
main
krad 10 years ago
parent c4bd03a97e
commit f29b33c73b
  1. 46
      table/block_based_table_reader.cc
  2. 5
      table/block_based_table_reader.h
  3. 12
      table/table_reader.h
  4. 128
      table/table_test.cc

@ -1240,6 +1240,52 @@ Status BlockBasedTable::Get(
return s;
}
Status BlockBasedTable::Prefetch(const Slice* const begin,
const Slice* const end) {
auto& comparator = rep_->internal_comparator;
// pre-condition
if (begin && end && comparator.Compare(*begin, *end) > 0) {
return Status::InvalidArgument(*begin, *end);
}
BlockIter iiter;
NewIndexIterator(ReadOptions(), &iiter);
if (!iiter.status().ok()) {
// error opening index iterator
return iiter.status();
}
// indicates if we are on the last page that need to be pre-fetched
bool prefetching_boundary_page = false;
for (begin ? iiter.Seek(*begin) : iiter.SeekToFirst(); iiter.Valid();
iiter.Next()) {
Slice block_handle = iiter.value();
if (end && comparator.Compare(iiter.key(), *end) >= 0) {
if (prefetching_boundary_page) {
break;
}
// The index entry represents the last key in the data block.
// We should load this page into memory as well, but no more
prefetching_boundary_page = true;
}
// Load the block specified by the block_handle into the block cache
BlockIter biter;
NewDataBlockIterator(rep_, ReadOptions(), block_handle, &biter);
if (!biter.status().ok()) {
// there was an unexpected error while pre-fetching
return biter.status();
}
}
return Status::OK();
}
bool BlockBasedTable::TEST_KeyInCache(const ReadOptions& options,
const Slice& key) {
std::unique_ptr<Iterator> iiter(NewIndexIterator(options));

@ -83,6 +83,11 @@ class BlockBasedTable : public TableReader {
Status Get(const ReadOptions& readOptions, const Slice& key,
GetContext* get_context) override;
// Pre-fetch the disk blocks that correspond to the key range specified by
// (kbegin, kend). The call will return return error status in the event of
// IO or iteration error.
Status Prefetch(const Slice* begin, const Slice* end) override;
// Given a key, return an approximate byte offset in the file where
// the data for that key begins (or would begin if the key were
// present in the file). The returned value is in terms of file

@ -68,6 +68,18 @@ class TableReader {
virtual Status Get(const ReadOptions& readOptions, const Slice& key,
GetContext* get_context) = 0;
// Prefetch data corresponding to a give range of keys
// Typically this functionality is required for table implementations that
// persists the data on a non volatile storage medium like disk/SSD
virtual Status Prefetch(const Slice* begin = nullptr,
const Slice* end = nullptr) {
(void) begin;
(void) end;
// Default implementation is NOOP.
// The child class should implement functionality when applicable
return Status::OK();
}
// convert db file to a human readable form
virtual Status DumpTable(WritableFile* out_file) {
return Status::NotSupported("DumpTable() not supported");

@ -47,6 +47,9 @@
#include "util/testutil.h"
#include "util/scoped_arena_iterator.h"
using std::vector;
using std::string;
namespace rocksdb {
extern const uint64_t kLegacyBlockBasedTableMagicNumber;
@ -1125,6 +1128,131 @@ TEST(BlockBasedTableTest, FilterPolicyNameProperties) {
ASSERT_EQ("rocksdb.BuiltinBloomFilter", props.filter_policy_name);
}
//
// BlockBasedTableTest::PrefetchTest
//
void AssertKeysInCache(BlockBasedTable* table_reader,
const vector<string>& keys_in_cache,
const vector<string>& keys_not_in_cache) {
for (auto key : keys_in_cache) {
ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), key));
}
for (auto key : keys_not_in_cache) {
ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), key));
}
}
void PrefetchRange(TableConstructor* c, Options* opt,
BlockBasedTableOptions* table_options,
const vector<std::string>& keys,
const char* key_begin, const char* key_end,
const vector<string>& keys_in_cache,
const vector<string>& keys_not_in_cache,
const Status expected_status = Status::OK()) {
// reset the cache and reopen the table
table_options->block_cache = NewLRUCache(16 * 1024 * 1024);
opt->table_factory.reset(NewBlockBasedTableFactory(*table_options));
const ImmutableCFOptions ioptions2(*opt);
ASSERT_OK(c->Reopen(ioptions2));
// prefetch
auto* table_reader = dynamic_cast<BlockBasedTable*>(c->GetTableReader());
// empty string replacement is a trick so we don't crash the test
Slice begin(key_begin ? key_begin : "");
Slice end(key_end ? key_end : "");
Status s = table_reader->Prefetch(key_begin ? &begin : nullptr,
key_end ? &end : nullptr);
ASSERT_TRUE(s.code() == expected_status.code());
// assert our expectation in cache warmup
AssertKeysInCache(table_reader, keys_in_cache, keys_not_in_cache);
}
TEST(BlockBasedTableTest, PrefetchTest) {
// The purpose of this test is to test the prefetching operation built into
// BlockBasedTable.
Options opt;
unique_ptr<InternalKeyComparator> ikc;
ikc.reset(new test::PlainInternalKeyComparator(opt.comparator));
opt.compression = kNoCompression;
BlockBasedTableOptions table_options;
table_options.block_size = 1024;
// big enough so we don't ever lose cached values.
table_options.block_cache = NewLRUCache(16 * 1024 * 1024);
opt.table_factory.reset(NewBlockBasedTableFactory(table_options));
TableConstructor c(BytewiseComparator());
c.Add("k01", "hello");
c.Add("k02", "hello2");
c.Add("k03", std::string(10000, 'x'));
c.Add("k04", std::string(200000, 'x'));
c.Add("k05", std::string(300000, 'x'));
c.Add("k06", "hello3");
c.Add("k07", std::string(100000, 'x'));
std::vector<std::string> keys;
KVMap kvmap;
const ImmutableCFOptions ioptions(opt);
c.Finish(opt, ioptions, table_options, *ikc, &keys, &kvmap);
// We get the following data spread :
//
// Data block Index
// ========================
// [ k01 k02 k03 ] k03
// [ k04 ] k04
// [ k05 ] k05
// [ k06 k07 ] k07
// Simple
PrefetchRange(&c, &opt, &table_options, keys,
/*key_range=*/ "k01", "k05",
/*keys_in_cache=*/ {"k01", "k02", "k03", "k04", "k05"},
/*keys_not_in_cache=*/ {"k06", "k07"});
PrefetchRange(&c, &opt, &table_options, keys,
"k01", "k01",
{"k01", "k02", "k03"},
{"k04", "k05", "k06", "k07"});
// odd
PrefetchRange(&c, &opt, &table_options, keys,
"a", "z",
{"k01", "k02", "k03", "k04", "k05", "k06", "k07"},
{});
PrefetchRange(&c, &opt, &table_options, keys,
"k00", "k00",
{"k01", "k02", "k03"},
{"k04", "k05", "k06", "k07"});
// Edge cases
PrefetchRange(&c, &opt, &table_options, keys,
"k00", "k06",
{"k01", "k02", "k03", "k04", "k05", "k06", "k07"},
{});
PrefetchRange(&c, &opt, &table_options, keys,
"k00", "zzz",
{"k01", "k02", "k03", "k04", "k05", "k06", "k07"},
{});
// null keys
PrefetchRange(&c, &opt, &table_options, keys,
nullptr, nullptr,
{"k01", "k02", "k03", "k04", "k05", "k06", "k07"},
{});
PrefetchRange(&c, &opt, &table_options, keys,
"k04", nullptr,
{"k04", "k05", "k06", "k07"},
{"k01", "k02", "k03"});
PrefetchRange(&c, &opt, &table_options, keys,
nullptr, "k05",
{"k01", "k02", "k03", "k04", "k05"},
{"k06", "k07"});
// invalid
PrefetchRange(&c, &opt, &table_options, keys,
"k06", "k00", {}, {},
Status::InvalidArgument(Slice("k06 "), Slice("k07")));
}
TEST(BlockBasedTableTest, TotalOrderSeekOnHashIndex) {
BlockBasedTableOptions table_options;
for (int i = 0; i < 4; ++i) {

Loading…
Cancel
Save