Extend Get/MultiGet deadline support to table open (#6982)

Summary:
Current implementation of the ```read_options.deadline``` option only checks the deadline for random file reads during point lookups. This PR extends the checks to file opens, prefetches and preloads as part of table open.

The main changes are in the ```BlockBasedTable```, partitioned index and filter readers, and ```TableCache``` to take ReadOptions as an additional parameter. In ```BlockBasedTable::Open```, in order to retain existing behavior w.r.t checksum verification and block cache usage, we filter out most of the options in ```ReadOptions``` except ```deadline```. However, having the ```ReadOptions``` gives us more flexibility to honor other options like verify_checksums, fill_cache etc. in the future.

Additional changes in callsites due to function signature changes in ```NewTableReader()``` and ```FilePrefetchBuffer```.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6982

Test Plan: Add new unit tests in db_basic_test

Reviewed By: riversand963

Differential Revision: D22219515

Pulled By: anand1976

fbshipit-source-id: 8a3b92f4a889808013838603aa3ca35229cd501b
main
Anand Ananthabhotla 4 years ago committed by Facebook GitHub Bot
parent d809ae9a2d
commit 9a5886bd8c
  1. 392
      db/db_basic_test.cc
  2. 2
      db/db_impl/db_impl.cc
  3. 3
      db/plain_table_db_test.cc
  4. 58
      db/table_cache.cc
  5. 4
      db/table_cache.h
  6. 7
      db/version_builder.cc
  7. 13
      file/file_prefetch_buffer.cc
  8. 10
      file/file_prefetch_buffer.h
  9. 3
      file/file_util.cc
  10. 2
      file/random_access_file_reader.h
  11. 14
      include/rocksdb/table.h
  12. 10
      table/adaptive/adaptive_table_factory.cc
  13. 3
      table/adaptive/adaptive_table_factory.h
  14. 8
      table/block_based/binary_search_index_reader.cc
  15. 2
      table/block_based/binary_search_index_reader.h
  16. 12
      table/block_based/block_based_filter_block.cc
  17. 6
      table/block_based/block_based_filter_block.h
  18. 4
      table/block_based/block_based_table_factory.cc
  19. 3
      table/block_based/block_based_table_factory.h
  20. 128
      table/block_based/block_based_table_reader.cc
  21. 34
      table/block_based/block_based_table_reader.h
  22. 3
      table/block_based/block_based_table_reader_test.cc
  23. 2
      table/block_based/filter_block.h
  24. 12
      table/block_based/full_filter_block.cc
  25. 6
      table/block_based/full_filter_block.h
  26. 3
      table/block_based/hash_index_reader.cc
  27. 2
      table/block_based/hash_index_reader.h
  28. 31
      table/block_based/partitioned_filter_block.cc
  29. 8
      table/block_based/partitioned_filter_block.h
  30. 22
      table/block_based/partitioned_index_reader.cc
  31. 4
      table/block_based/partitioned_index_reader.h
  32. 10
      table/block_based/uncompression_dict_reader.cc
  33. 6
      table/block_based/uncompression_dict_reader.h
  34. 21
      table/block_fetcher.cc
  35. 11
      table/block_fetcher_test.cc
  36. 2
      table/cuckoo/cuckoo_table_factory.cc
  37. 3
      table/cuckoo/cuckoo_table_factory.h
  38. 15
      table/format.cc
  39. 2
      table/format.h
  40. 40
      table/meta_blocks.cc
  41. 3
      table/meta_blocks.h
  42. 1
      table/mock_table.cc
  43. 3
      table/mock_table.h
  44. 2
      table/plain/plain_table_factory.cc
  45. 6
      table/plain/plain_table_factory.h
  46. 6
      table/sst_file_dumper.cc
  47. 16
      table/table_test.cc
  48. 2
      utilities/options/options_util_test.cc

@ -7,6 +7,8 @@
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors. // found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <cstring>
#include "db/db_test_util.h" #include "db/db_test_util.h"
#include "port/stack_trace.h" #include "port/stack_trace.h"
#include "rocksdb/merge_operator.h" #include "rocksdb/merge_operator.h"
@ -2801,132 +2803,176 @@ INSTANTIATE_TEST_CASE_P(ParallelIO, DBBasicTestWithParallelIO,
::testing::Bool(), ::testing::Bool(), ::testing::Bool(), ::testing::Bool(),
::testing::Values(1, 4))); ::testing::Values(1, 4)));
// A test class for intercepting random reads and injecting artificial // Forward declaration
// delays. Used for testing the deadline/timeout feature class DeadlineFS;
class DBBasicTestMultiGetDeadline : public DBBasicTestMultiGet {
class DeadlineRandomAccessFile : public FSRandomAccessFileWrapper {
public: public:
DBBasicTestMultiGetDeadline() DeadlineRandomAccessFile(DeadlineFS& fs, SpecialEnv* env,
: DBBasicTestMultiGet("db_basic_test_multiget_deadline" /*Test dir*/, std::unique_ptr<FSRandomAccessFile>& file)
10 /*# of column families*/, : FSRandomAccessFileWrapper(file.get()),
false /*compressed cache enabled*/, fs_(fs),
true /*uncompressed cache enabled*/, file_(std::move(file)),
true /*compression enabled*/, env_(env) {}
true /*ReadOptions.fill_cache*/,
1 /*# of parallel compression threads*/) {}
// Forward declaration
class DeadlineFS;
class DeadlineRandomAccessFile : public FSRandomAccessFileWrapper {
public:
DeadlineRandomAccessFile(DeadlineFS& fs, SpecialEnv* env,
std::unique_ptr<FSRandomAccessFile>& file)
: FSRandomAccessFileWrapper(file.get()),
fs_(fs),
file_(std::move(file)),
env_(env) {}
IOStatus Read(uint64_t offset, size_t len, const IOOptions& opts,
Slice* result, char* scratch, IODebugContext* dbg) const override {
int delay;
const std::chrono::microseconds deadline = fs_.GetDeadline();
if (deadline.count()) {
AssertDeadline(deadline, opts);
}
if (fs_.ShouldDelay(&delay)) {
env_->SleepForMicroseconds(delay);
}
return FSRandomAccessFileWrapper::Read(offset, len, opts, result, scratch,
dbg);
}
IOStatus MultiRead(FSReadRequest* reqs, size_t num_reqs, IOStatus Read(uint64_t offset, size_t len, const IOOptions& opts,
const IOOptions& options, IODebugContext* dbg) override { Slice* result, char* scratch,
int delay; IODebugContext* dbg) const override;
const std::chrono::microseconds deadline = fs_.GetDeadline();
if (deadline.count()) {
AssertDeadline(deadline, options);
}
if (fs_.ShouldDelay(&delay)) {
env_->SleepForMicroseconds(delay);
}
return FSRandomAccessFileWrapper::MultiRead(reqs, num_reqs, options, dbg);
}
private: IOStatus MultiRead(FSReadRequest* reqs, size_t num_reqs,
void AssertDeadline(const std::chrono::microseconds deadline, const IOOptions& options, IODebugContext* dbg) override;
const IOOptions& opts) const {
// Give a leeway of +- 10us as it can take some time for the Get/
// MultiGet call to reach here, in order to avoid false alarms
std::chrono::microseconds now =
std::chrono::microseconds(env_->NowMicros());
ASSERT_EQ(deadline - now, opts.timeout);
}
DeadlineFS& fs_;
std::unique_ptr<FSRandomAccessFile> file_;
SpecialEnv* env_;
};
class DeadlineFS : public FileSystemWrapper { private:
public: DeadlineFS& fs_;
DeadlineFS(SpecialEnv* env) std::unique_ptr<FSRandomAccessFile> file_;
: FileSystemWrapper(FileSystem::Default()), SpecialEnv* env_;
delay_idx_(0), };
deadline_(std::chrono::microseconds::zero()),
env_(env) {}
~DeadlineFS() = default;
IOStatus NewRandomAccessFile(const std::string& fname,
const FileOptions& opts,
std::unique_ptr<FSRandomAccessFile>* result,
IODebugContext* dbg) override {
std::unique_ptr<FSRandomAccessFile> file;
IOStatus s;
s = target()->NewRandomAccessFile(fname, opts, &file, dbg);
result->reset(new DeadlineRandomAccessFile(*this, env_, file));
return s;
}
// Set a vector of {IO counter, delay in microseconds} pairs that control class DeadlineFS : public FileSystemWrapper {
// when to inject a delay and duration of the delay public:
void SetDelaySequence(const std::chrono::microseconds deadline, explicit DeadlineFS(SpecialEnv* env)
const std::vector<std::pair<int, int>>&& seq) { : FileSystemWrapper(FileSystem::Default()),
int total_delay = 0; delay_idx_(0),
for (auto& seq_iter : seq) { deadline_(std::chrono::microseconds::zero()),
// Ensure no individual delay is > 500ms env_(env),
ASSERT_LT(seq_iter.second, 500000); timedout_(false),
total_delay += seq_iter.second; ignore_deadline_(false) {}
}
// ASSERT total delay is < 1s. This is mainly to keep the test from IOStatus NewRandomAccessFile(const std::string& fname,
// timing out in CI test frameworks const FileOptions& opts,
ASSERT_LT(total_delay, 1000000); std::unique_ptr<FSRandomAccessFile>* result,
delay_seq_ = seq; IODebugContext* dbg) override {
delay_idx_ = 0; std::unique_ptr<FSRandomAccessFile> file;
io_count_ = 0; IOStatus s;
deadline_ = deadline;
s = target()->NewRandomAccessFile(fname, opts, &file, dbg);
result->reset(new DeadlineRandomAccessFile(*this, env_, file));
int delay;
const std::chrono::microseconds deadline = GetDeadline();
if (deadline.count()) {
AssertDeadline(deadline, opts.io_options);
}
if (ShouldDelay(&delay, &s)) {
env_->SleepForMicroseconds(delay);
}
return s;
}
// Set a vector of {IO counter, delay in microseconds, return status} tuples
// that control when to inject a delay and duration of the delay
void SetDelaySequence(
const std::chrono::microseconds deadline,
const std::vector<std::tuple<int, int, IOStatus>>&& seq) {
int total_delay = 0;
for (auto& seq_iter : seq) {
// Ensure no individual delay is > 500ms
ASSERT_LT(std::get<1>(seq_iter), 500000);
total_delay += std::get<1>(seq_iter);
} }
// ASSERT total delay is < 1s. This is mainly to keep the test from
// timing out in CI test frameworks
ASSERT_LT(total_delay, 1000000);
delay_seq_ = seq;
delay_idx_ = 0;
io_count_ = 0;
deadline_ = deadline;
timedout_ = false;
}
// Increment the IO counter and return a delay in microseconds
bool ShouldDelay(int* delay, IOStatus* s) {
if (!ignore_deadline_ && delay_idx_ < delay_seq_.size() &&
std::get<0>(delay_seq_[delay_idx_]) == io_count_++) {
*delay = std::get<1>(delay_seq_[delay_idx_]);
*s = std::get<2>(delay_seq_[delay_idx_]);
delay_idx_++;
timedout_ = true;
return true;
}
*s = IOStatus::OK();
return false;
}
// Increment the IO counter and return a delay in microseconds const std::chrono::microseconds GetDeadline() {
bool ShouldDelay(int* delay) { return ignore_deadline_ ? std::chrono::microseconds::zero() : deadline_;
if (delay_idx_ < delay_seq_.size() && }
delay_seq_[delay_idx_].first == io_count_++) {
*delay = delay_seq_[delay_idx_].second; bool TimedOut() { return timedout_; }
delay_idx_++;
return true; void IgnoreDeadline(bool ignore) { ignore_deadline_ = ignore; }
}
return false; void AssertDeadline(const std::chrono::microseconds deadline,
const IOOptions& opts) const {
// Give a leeway of +- 10us as it can take some time for the Get/
// MultiGet call to reach here, in order to avoid false alarms
std::chrono::microseconds now =
std::chrono::microseconds(env_->NowMicros());
if (deadline - now != opts.timeout) {
ASSERT_EQ(deadline - now, opts.timeout);
} }
}
private:
std::vector<std::tuple<int, int, IOStatus>> delay_seq_;
size_t delay_idx_;
int io_count_;
std::chrono::microseconds deadline_;
SpecialEnv* env_;
bool timedout_;
bool ignore_deadline_;
};
const std::chrono::microseconds GetDeadline() { return deadline_; } IOStatus DeadlineRandomAccessFile::Read(uint64_t offset, size_t len,
const IOOptions& opts, Slice* result,
char* scratch,
IODebugContext* dbg) const {
int delay;
const std::chrono::microseconds deadline = fs_.GetDeadline();
IOStatus s;
if (deadline.count()) {
fs_.AssertDeadline(deadline, opts);
}
if (fs_.ShouldDelay(&delay, &s)) {
env_->SleepForMicroseconds(delay);
}
if (s.ok()) {
s = FSRandomAccessFileWrapper::Read(offset, len, opts, result, scratch,
dbg);
}
return s;
}
private: IOStatus DeadlineRandomAccessFile::MultiRead(FSReadRequest* reqs,
std::vector<std::pair<int, int>> delay_seq_; size_t num_reqs,
size_t delay_idx_; const IOOptions& options,
int io_count_; IODebugContext* dbg) {
std::chrono::microseconds deadline_; int delay;
SpecialEnv* env_; const std::chrono::microseconds deadline = fs_.GetDeadline();
}; IOStatus s;
if (deadline.count()) {
fs_.AssertDeadline(deadline, options);
}
if (fs_.ShouldDelay(&delay, &s)) {
env_->SleepForMicroseconds(delay);
}
if (s.ok()) {
s = FSRandomAccessFileWrapper::MultiRead(reqs, num_reqs, options, dbg);
}
return s;
}
// A test class for intercepting random reads and injecting artificial
// delays. Used for testing the deadline/timeout feature
class DBBasicTestMultiGetDeadline : public DBBasicTestMultiGet {
public:
DBBasicTestMultiGetDeadline()
: DBBasicTestMultiGet(
"db_basic_test_multiget_deadline" /*Test dir*/,
10 /*# of column families*/, false /*compressed cache enabled*/,
true /*uncompressed cache enabled*/, true /*compression enabled*/,
true /*ReadOptions.fill_cache*/,
1 /*# of parallel compression threads*/) {}
inline void CheckStatus(std::vector<Status>& statuses, size_t num_ok) { inline void CheckStatus(std::vector<Status>& statuses, size_t num_ok) {
for (size_t i = 0; i < statuses.size(); ++i) { for (size_t i = 0; i < statuses.size(); ++i) {
@ -2940,8 +2986,7 @@ class DBBasicTestMultiGetDeadline : public DBBasicTestMultiGet {
}; };
TEST_F(DBBasicTestMultiGetDeadline, MultiGetDeadlineExceeded) { TEST_F(DBBasicTestMultiGetDeadline, MultiGetDeadlineExceeded) {
std::shared_ptr<DBBasicTestMultiGetDeadline::DeadlineFS> fs( std::shared_ptr<DeadlineFS> fs = std::make_shared<DeadlineFS>(env_);
new DBBasicTestMultiGetDeadline::DeadlineFS(env_));
std::unique_ptr<Env> env(new CompositeEnvWrapper(env_, fs)); std::unique_ptr<Env> env(new CompositeEnvWrapper(env_, fs));
Options options = CurrentOptions(); Options options = CurrentOptions();
env_->SetTimeElapseOnlySleep(&options); env_->SetTimeElapseOnlySleep(&options);
@ -2972,7 +3017,8 @@ TEST_F(DBBasicTestMultiGetDeadline, MultiGetDeadlineExceeded) {
ReadOptions ro; ReadOptions ro;
ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000}; ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000};
// Delay the first IO by 200ms // Delay the first IO by 200ms
fs->SetDelaySequence(ro.deadline, {{0, 20000}}); fs->SetDelaySequence(
ro.deadline, {std::tuple<int, int, IOStatus>{0, 20000, IOStatus::OK()}});
std::vector<Status> statuses = dbfull()->MultiGet(ro, cfs, keys, &values); std::vector<Status> statuses = dbfull()->MultiGet(ro, cfs, keys, &values);
// The first key is successful because we check after the lookup, but // The first key is successful because we check after the lookup, but
@ -2997,7 +3043,8 @@ TEST_F(DBBasicTestMultiGetDeadline, MultiGetDeadlineExceeded) {
keys[i] = Slice(key_str[i].data(), key_str[i].size()); keys[i] = Slice(key_str[i].data(), key_str[i].size());
} }
ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000}; ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000};
fs->SetDelaySequence(ro.deadline, {{1, 20000}}); fs->SetDelaySequence(
ro.deadline, {std::tuple<int, int, IOStatus>{1, 20000, IOStatus::OK()}});
statuses = dbfull()->MultiGet(ro, cfs, keys, &values); statuses = dbfull()->MultiGet(ro, cfs, keys, &values);
CheckStatus(statuses, 3); CheckStatus(statuses, 3);
@ -3011,7 +3058,8 @@ TEST_F(DBBasicTestMultiGetDeadline, MultiGetDeadlineExceeded) {
statuses.clear(); statuses.clear();
statuses.resize(keys.size()); statuses.resize(keys.size());
ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000}; ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000};
fs->SetDelaySequence(ro.deadline, {{0, 20000}}); fs->SetDelaySequence(
ro.deadline, {std::tuple<int, int, IOStatus>{0, 20000, IOStatus::OK()}});
dbfull()->MultiGet(ro, keys.size(), cfs.data(), keys.data(), dbfull()->MultiGet(ro, keys.size(), cfs.data(), keys.data(),
pin_values.data(), statuses.data()); pin_values.data(), statuses.data());
CheckStatus(statuses, 2); CheckStatus(statuses, 2);
@ -3026,7 +3074,8 @@ TEST_F(DBBasicTestMultiGetDeadline, MultiGetDeadlineExceeded) {
statuses.clear(); statuses.clear();
statuses.resize(keys.size()); statuses.resize(keys.size());
ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000}; ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000};
fs->SetDelaySequence(ro.deadline, {{2, 20000}}); fs->SetDelaySequence(
ro.deadline, {std::tuple<int, int, IOStatus>{2, 20000, IOStatus::OK()}});
dbfull()->MultiGet(ro, keys.size(), cfs.data(), keys.data(), dbfull()->MultiGet(ro, keys.size(), cfs.data(), keys.data(),
pin_values.data(), statuses.data()); pin_values.data(), statuses.data());
CheckStatus(statuses, 6); CheckStatus(statuses, 6);
@ -3040,7 +3089,8 @@ TEST_F(DBBasicTestMultiGetDeadline, MultiGetDeadlineExceeded) {
statuses.clear(); statuses.clear();
statuses.resize(keys.size()); statuses.resize(keys.size());
ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000}; ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000};
fs->SetDelaySequence(ro.deadline, {{3, 20000}}); fs->SetDelaySequence(
ro.deadline, {std::tuple<int, int, IOStatus>{3, 20000, IOStatus::OK()}});
dbfull()->MultiGet(ro, keys.size(), cfs.data(), keys.data(), dbfull()->MultiGet(ro, keys.size(), cfs.data(), keys.data(),
pin_values.data(), statuses.data()); pin_values.data(), statuses.data());
CheckStatus(statuses, 8); CheckStatus(statuses, 8);
@ -3066,7 +3116,8 @@ TEST_F(DBBasicTestMultiGetDeadline, MultiGetDeadlineExceeded) {
statuses.clear(); statuses.clear();
statuses.resize(keys.size()); statuses.resize(keys.size());
ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000}; ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000};
fs->SetDelaySequence(ro.deadline, {{1, 20000}}); fs->SetDelaySequence(
ro.deadline, {std::tuple<int, int, IOStatus>{1, 20000, IOStatus::OK()}});
dbfull()->MultiGet(ro, handles_[0], keys.size(), keys.data(), dbfull()->MultiGet(ro, handles_[0], keys.size(), keys.data(),
pin_values.data(), statuses.data()); pin_values.data(), statuses.data());
CheckStatus(statuses, 64); CheckStatus(statuses, 64);
@ -3100,6 +3151,99 @@ TEST_F(DBBasicTest, ManifestWriteFailure) {
Reopen(options); Reopen(options);
} }
TEST_F(DBBasicTest, PointLookupDeadline) {
std::shared_ptr<DeadlineFS> fs = std::make_shared<DeadlineFS>(env_);
std::unique_ptr<Env> env(new CompositeEnvWrapper(env_, fs));
// Since we call SetTimeElapseOnlySleep, Close() later on may not work
// properly for the DB that's opened by the DBTestBase constructor.
Close();
for (int option_config = kDefault; option_config < kEnd; ++option_config) {
if (ShouldSkipOptions(option_config, kSkipPlainTable | kSkipMmapReads)) {
continue;
}
option_config_ = option_config;
Options options = CurrentOptions();
if (options.use_direct_reads) {
continue;
}
options.env = env.get();
options.disable_auto_compactions = true;
Cache* block_cache = nullptr;
env_->SetTimeElapseOnlySleep(&options);
// Fileter block reads currently don't cause the request to get
// aborted on a read timeout, so its possible those block reads
// may get issued even if the deadline is past
SyncPoint::GetInstance()->SetCallBack(
"BlockBasedTable::Get:BeforeFilterMatch",
[&](void* /*arg*/) { fs->IgnoreDeadline(true); });
SyncPoint::GetInstance()->SetCallBack(
"BlockBasedTable::Get:AfterFilterMatch",
[&](void* /*arg*/) { fs->IgnoreDeadline(false); });
// DB open will create table readers unless we reduce the table cache
// capacity.
// SanitizeOptions will set max_open_files to minimum of 20. Table cache
// is allocated with max_open_files - 10 as capacity. So override
// max_open_files to 11 so table cache capacity will become 1. This will
// prevent file open during DB open and force the file to be opened
// during MultiGet
SyncPoint::GetInstance()->SetCallBack(
"SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) {
int* max_open_files = (int*)arg;
*max_open_files = 11;
});
SyncPoint::GetInstance()->EnableProcessing();
Reopen(options);
if (options.table_factory &&
!strcmp(options.table_factory->Name(),
BlockBasedTableFactory::kName.c_str())) {
BlockBasedTableFactory* bbtf =
static_cast<BlockBasedTableFactory*>(options.table_factory.get());
block_cache = bbtf->table_options().block_cache.get();
}
Random rnd(301);
for (int i = 0; i < 400; ++i) {
std::string key = "k" + ToString(i);
Put(key, RandomString(&rnd, 100));
}
Flush();
bool timedout = true;
// A timeout will be forced when the IO counter reaches this value
int io_deadline_trigger = 0;
// Keep incrementing io_deadline_trigger and call Get() until there is an
// iteration that doesn't cause a timeout. This ensures that we cover
// all file reads in the point lookup path that can potentially timeout
// and cause the Get() to fail.
while (timedout) {
ReadOptions ro;
ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000};
fs->SetDelaySequence(
ro.deadline, {std::tuple<int, int, IOStatus>{
io_deadline_trigger, 20000, IOStatus::TimedOut()}});
block_cache->SetCapacity(0);
block_cache->SetCapacity(1048576);
std::string value;
Status s = dbfull()->Get(ro, "k50", &value);
if (fs->TimedOut()) {
ASSERT_EQ(s, Status::TimedOut());
} else {
timedout = false;
ASSERT_OK(s);
}
io_deadline_trigger++;
}
// Reset the delay sequence in order to avoid false alarms during Reopen
fs->SetDelaySequence(std::chrono::microseconds::zero(), {});
}
Close();
}
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE
#ifdef ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS #ifdef ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS

@ -2685,8 +2685,6 @@ Iterator* DBImpl::NewIterator(const ReadOptions& read_options,
return NewErrorIterator( return NewErrorIterator(
Status::NotSupported("Managed iterator is not supported anymore.")); Status::NotSupported("Managed iterator is not supported anymore."));
} }
// We will eventually support deadline for iterators too, but safeguard
// for now
if (read_options.deadline != std::chrono::microseconds::zero()) { if (read_options.deadline != std::chrono::microseconds::zero()) {
return NewErrorIterator( return NewErrorIterator(
Status::NotSupported("ReadOptions deadline is not supported")); Status::NotSupported("ReadOptions deadline is not supported"));

@ -336,8 +336,9 @@ class TestPlainTableFactory : public PlainTableFactory {
column_family_id_(column_family_id), column_family_id_(column_family_id),
column_family_name_(std::move(column_family_name)) {} column_family_name_(std::move(column_family_name)) {}
using PlainTableFactory::NewTableReader;
Status NewTableReader( Status NewTableReader(
const TableReaderOptions& table_reader_options, const ReadOptions& /*ro*/, const TableReaderOptions& table_reader_options,
std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size, std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
std::unique_ptr<TableReader>* table, std::unique_ptr<TableReader>* table,
bool /*prefetch_index_and_filter_in_cache*/) const override { bool /*prefetch_index_and_filter_in_cache*/) const override {

@ -13,6 +13,7 @@
#include "db/range_tombstone_fragmenter.h" #include "db/range_tombstone_fragmenter.h"
#include "db/snapshot_impl.h" #include "db/snapshot_impl.h"
#include "db/version_edit.h" #include "db/version_edit.h"
#include "file/file_util.h"
#include "file/filename.h" #include "file/filename.h"
#include "file/random_access_file_reader.h" #include "file/random_access_file_reader.h"
#include "monitoring/perf_context_imp.h" #include "monitoring/perf_context_imp.h"
@ -92,7 +93,7 @@ void TableCache::ReleaseHandle(Cache::Handle* handle) {
} }
Status TableCache::GetTableReader( Status TableCache::GetTableReader(
const FileOptions& file_options, const ReadOptions& ro, const FileOptions& file_options,
const InternalKeyComparator& internal_comparator, const FileDescriptor& fd, const InternalKeyComparator& internal_comparator, const FileDescriptor& fd,
bool sequential_mode, bool record_read_stats, HistogramImpl* file_read_hist, bool sequential_mode, bool record_read_stats, HistogramImpl* file_read_hist,
std::unique_ptr<TableReader>* table_reader, std::unique_ptr<TableReader>* table_reader,
@ -102,12 +103,19 @@ Status TableCache::GetTableReader(
std::string fname = std::string fname =
TableFileName(ioptions_.cf_paths, fd.GetNumber(), fd.GetPathId()); TableFileName(ioptions_.cf_paths, fd.GetNumber(), fd.GetPathId());
std::unique_ptr<FSRandomAccessFile> file; std::unique_ptr<FSRandomAccessFile> file;
Status s = ioptions_.fs->NewRandomAccessFile(fname, file_options, &file, FileOptions fopts = file_options;
nullptr); Status s = PrepareIOFromReadOptions(ro, ioptions_.env, fopts.io_options);
if (s.ok()) {
s = ioptions_.fs->NewRandomAccessFile(fname, fopts, &file, nullptr);
}
RecordTick(ioptions_.statistics, NO_FILE_OPENS); RecordTick(ioptions_.statistics, NO_FILE_OPENS);
if (s.IsPathNotFound()) { if (s.IsPathNotFound()) {
fname = Rocks2LevelTableFileName(fname); fname = Rocks2LevelTableFileName(fname);
s = ioptions_.fs->NewRandomAccessFile(fname, file_options, &file, nullptr); s = PrepareIOFromReadOptions(ro, ioptions_.env, fopts.io_options);
if (s.ok()) {
s = ioptions_.fs->NewRandomAccessFile(fname, file_options, &file,
nullptr);
}
RecordTick(ioptions_.statistics, NO_FILE_OPENS); RecordTick(ioptions_.statistics, NO_FILE_OPENS);
} }
@ -122,6 +130,7 @@ Status TableCache::GetTableReader(
record_read_stats ? ioptions_.statistics : nullptr, SST_READ_MICROS, record_read_stats ? ioptions_.statistics : nullptr, SST_READ_MICROS,
file_read_hist, ioptions_.rate_limiter, ioptions_.listeners)); file_read_hist, ioptions_.rate_limiter, ioptions_.listeners));
s = ioptions_.table_factory->NewTableReader( s = ioptions_.table_factory->NewTableReader(
ro,
TableReaderOptions(ioptions_, prefix_extractor, file_options, TableReaderOptions(ioptions_, prefix_extractor, file_options,
internal_comparator, skip_filters, immortal_tables_, internal_comparator, skip_filters, immortal_tables_,
false /* force_direct_prefetch */, level, false /* force_direct_prefetch */, level,
@ -141,7 +150,8 @@ void TableCache::EraseHandle(const FileDescriptor& fd, Cache::Handle* handle) {
cache_->Erase(key); cache_->Erase(key);
} }
Status TableCache::FindTable(const FileOptions& file_options, Status TableCache::FindTable(const ReadOptions& ro,
const FileOptions& file_options,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
const FileDescriptor& fd, Cache::Handle** handle, const FileDescriptor& fd, Cache::Handle** handle,
const SliceTransform* prefix_extractor, const SliceTransform* prefix_extractor,
@ -169,7 +179,7 @@ Status TableCache::FindTable(const FileOptions& file_options,
} }
std::unique_ptr<TableReader> table_reader; std::unique_ptr<TableReader> table_reader;
s = GetTableReader(file_options, internal_comparator, fd, s = GetTableReader(ro, file_options, internal_comparator, fd,
false /* sequential mode */, record_read_stats, false /* sequential mode */, record_read_stats,
file_read_hist, &table_reader, prefix_extractor, file_read_hist, &table_reader, prefix_extractor,
skip_filters, level, prefetch_index_and_filter_in_cache, skip_filters, level, prefetch_index_and_filter_in_cache,
@ -212,12 +222,12 @@ InternalIterator* TableCache::NewIterator(
auto& fd = file_meta.fd; auto& fd = file_meta.fd;
table_reader = fd.table_reader; table_reader = fd.table_reader;
if (table_reader == nullptr) { if (table_reader == nullptr) {
s = FindTable(file_options, icomparator, fd, &handle, prefix_extractor, s = FindTable(
options.read_tier == kBlockCacheTier /* no_io */, options, file_options, icomparator, fd, &handle, prefix_extractor,
!for_compaction /* record_read_stats */, file_read_hist, options.read_tier == kBlockCacheTier /* no_io */,
skip_filters, level, !for_compaction /* record_read_stats */, file_read_hist, skip_filters,
true /* prefetch_index_and_filter_in_cache */, level, true /* prefetch_index_and_filter_in_cache */,
max_file_size_for_l0_meta_pin); max_file_size_for_l0_meta_pin);
if (s.ok()) { if (s.ok()) {
table_reader = GetTableReaderFromHandle(handle); table_reader = GetTableReaderFromHandle(handle);
} }
@ -288,7 +298,7 @@ Status TableCache::GetRangeTombstoneIterator(
TableReader* t = fd.table_reader; TableReader* t = fd.table_reader;
Cache::Handle* handle = nullptr; Cache::Handle* handle = nullptr;
if (t == nullptr) { if (t == nullptr) {
s = FindTable(file_options_, internal_comparator, fd, &handle); s = FindTable(options, file_options_, internal_comparator, fd, &handle);
if (s.ok()) { if (s.ok()) {
t = GetTableReaderFromHandle(handle); t = GetTableReaderFromHandle(handle);
} }
@ -403,7 +413,7 @@ Status TableCache::Get(const ReadOptions& options,
Cache::Handle* handle = nullptr; Cache::Handle* handle = nullptr;
if (!done && s.ok()) { if (!done && s.ok()) {
if (t == nullptr) { if (t == nullptr) {
s = FindTable(file_options_, internal_comparator, fd, &handle, s = FindTable(options, file_options_, internal_comparator, fd, &handle,
prefix_extractor, prefix_extractor,
options.read_tier == kBlockCacheTier /* no_io */, options.read_tier == kBlockCacheTier /* no_io */,
true /* record_read_stats */, file_read_hist, skip_filters, true /* record_read_stats */, file_read_hist, skip_filters,
@ -506,8 +516,8 @@ Status TableCache::MultiGet(const ReadOptions& options,
if (s.ok() && !table_range.empty()) { if (s.ok() && !table_range.empty()) {
if (t == nullptr) { if (t == nullptr) {
s = FindTable( s = FindTable(
file_options_, internal_comparator, fd, &handle, prefix_extractor, options, file_options_, internal_comparator, fd, &handle,
options.read_tier == kBlockCacheTier /* no_io */, prefix_extractor, options.read_tier == kBlockCacheTier /* no_io */,
true /* record_read_stats */, file_read_hist, skip_filters, level); true /* record_read_stats */, file_read_hist, skip_filters, level);
TEST_SYNC_POINT_CALLBACK("TableCache::MultiGet:FindTable", &s); TEST_SYNC_POINT_CALLBACK("TableCache::MultiGet:FindTable", &s);
if (s.ok()) { if (s.ok()) {
@ -591,8 +601,8 @@ Status TableCache::GetTableProperties(
} }
Cache::Handle* table_handle = nullptr; Cache::Handle* table_handle = nullptr;
s = FindTable(file_options, internal_comparator, fd, &table_handle, s = FindTable(ReadOptions(), file_options, internal_comparator, fd,
prefix_extractor, no_io); &table_handle, prefix_extractor, no_io);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
@ -615,8 +625,8 @@ size_t TableCache::GetMemoryUsageByTableReader(
} }
Cache::Handle* table_handle = nullptr; Cache::Handle* table_handle = nullptr;
s = FindTable(file_options, internal_comparator, fd, &table_handle, s = FindTable(ReadOptions(), file_options, internal_comparator, fd,
prefix_extractor, true); &table_handle, prefix_extractor, true);
if (!s.ok()) { if (!s.ok()) {
return 0; return 0;
} }
@ -640,8 +650,8 @@ uint64_t TableCache::ApproximateOffsetOf(
Cache::Handle* table_handle = nullptr; Cache::Handle* table_handle = nullptr;
if (table_reader == nullptr) { if (table_reader == nullptr) {
const bool for_compaction = (caller == TableReaderCaller::kCompaction); const bool for_compaction = (caller == TableReaderCaller::kCompaction);
Status s = FindTable(file_options_, internal_comparator, fd, &table_handle, Status s = FindTable(ReadOptions(), file_options_, internal_comparator, fd,
prefix_extractor, false /* no_io */, &table_handle, prefix_extractor, false /* no_io */,
!for_compaction /* record_read_stats */); !for_compaction /* record_read_stats */);
if (s.ok()) { if (s.ok()) {
table_reader = GetTableReaderFromHandle(table_handle); table_reader = GetTableReaderFromHandle(table_handle);
@ -667,8 +677,8 @@ uint64_t TableCache::ApproximateSize(
Cache::Handle* table_handle = nullptr; Cache::Handle* table_handle = nullptr;
if (table_reader == nullptr) { if (table_reader == nullptr) {
const bool for_compaction = (caller == TableReaderCaller::kCompaction); const bool for_compaction = (caller == TableReaderCaller::kCompaction);
Status s = FindTable(file_options_, internal_comparator, fd, &table_handle, Status s = FindTable(ReadOptions(), file_options_, internal_comparator, fd,
prefix_extractor, false /* no_io */, &table_handle, prefix_extractor, false /* no_io */,
!for_compaction /* record_read_stats */); !for_compaction /* record_read_stats */);
if (s.ok()) { if (s.ok()) {
table_reader = GetTableReaderFromHandle(table_handle); table_reader = GetTableReaderFromHandle(table_handle);

@ -129,7 +129,7 @@ class TableCache {
// Find table reader // Find table reader
// @param skip_filters Disables loading/accessing the filter block // @param skip_filters Disables loading/accessing the filter block
// @param level == -1 means not specified // @param level == -1 means not specified
Status FindTable(const FileOptions& toptions, Status FindTable(const ReadOptions& ro, const FileOptions& toptions,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
const FileDescriptor& file_fd, Cache::Handle**, const FileDescriptor& file_fd, Cache::Handle**,
const SliceTransform* prefix_extractor = nullptr, const SliceTransform* prefix_extractor = nullptr,
@ -195,7 +195,7 @@ class TableCache {
private: private:
// Build a table reader // Build a table reader
Status GetTableReader(const FileOptions& file_options, Status GetTableReader(const ReadOptions& ro, const FileOptions& file_options,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
const FileDescriptor& fd, bool sequential_mode, const FileDescriptor& fd, bool sequential_mode,
bool record_read_stats, HistogramImpl* file_read_hist, bool record_read_stats, HistogramImpl* file_read_hist,

@ -931,9 +931,10 @@ class VersionBuilder::Rep {
auto* file_meta = files_meta[file_idx].first; auto* file_meta = files_meta[file_idx].first;
int level = files_meta[file_idx].second; int level = files_meta[file_idx].second;
statuses[file_idx] = table_cache_->FindTable( statuses[file_idx] = table_cache_->FindTable(
file_options_, *(base_vstorage_->InternalComparator()), ReadOptions(), file_options_,
file_meta->fd, &file_meta->table_reader_handle, prefix_extractor, *(base_vstorage_->InternalComparator()), file_meta->fd,
false /*no_io */, true /* record_read_stats */, &file_meta->table_reader_handle, prefix_extractor, false /*no_io */,
true /* record_read_stats */,
internal_stats->GetFileReadHist(level), false, level, internal_stats->GetFileReadHist(level), false, level,
prefetch_index_and_filter_in_cache, max_file_size_for_l0_meta_pin); prefetch_index_and_filter_in_cache, max_file_size_for_l0_meta_pin);
if (file_meta->table_reader_handle != nullptr) { if (file_meta->table_reader_handle != nullptr) {

@ -21,7 +21,8 @@
#include "util/rate_limiter.h" #include "util/rate_limiter.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
Status FilePrefetchBuffer::Prefetch(RandomAccessFileReader* reader, Status FilePrefetchBuffer::Prefetch(const IOOptions& opts,
RandomAccessFileReader* reader,
uint64_t offset, size_t n, uint64_t offset, size_t n,
bool for_compaction) { bool for_compaction) {
if (!enable_ || reader == nullptr) { if (!enable_ || reader == nullptr) {
@ -87,7 +88,7 @@ Status FilePrefetchBuffer::Prefetch(RandomAccessFileReader* reader,
Slice result; Slice result;
size_t read_len = static_cast<size_t>(roundup_len - chunk_len); size_t read_len = static_cast<size_t>(roundup_len - chunk_len);
s = reader->Read(IOOptions(), rounddown_offset + chunk_len, read_len, &result, s = reader->Read(opts, rounddown_offset + chunk_len, read_len, &result,
buffer_.BufferStart() + chunk_len, nullptr, for_compaction); buffer_.BufferStart() + chunk_len, nullptr, for_compaction);
#ifndef NDEBUG #ifndef NDEBUG
if (!s.ok() || result.size() < read_len) { if (!s.ok() || result.size() < read_len) {
@ -103,7 +104,8 @@ Status FilePrefetchBuffer::Prefetch(RandomAccessFileReader* reader,
return s; return s;
} }
bool FilePrefetchBuffer::TryReadFromCache(uint64_t offset, size_t n, bool FilePrefetchBuffer::TryReadFromCache(const IOOptions& opts,
uint64_t offset, size_t n,
Slice* result, bool for_compaction) { Slice* result, bool for_compaction) {
if (track_min_offset_ && offset < min_offset_read_) { if (track_min_offset_ && offset < min_offset_read_) {
min_offset_read_ = static_cast<size_t>(offset); min_offset_read_ = static_cast<size_t>(offset);
@ -122,10 +124,11 @@ bool FilePrefetchBuffer::TryReadFromCache(uint64_t offset, size_t n,
assert(max_readahead_size_ >= readahead_size_); assert(max_readahead_size_ >= readahead_size_);
Status s; Status s;
if (for_compaction) { if (for_compaction) {
s = Prefetch(file_reader_, offset, std::max(n, readahead_size_), s = Prefetch(opts, file_reader_, offset, std::max(n, readahead_size_),
for_compaction); for_compaction);
} else { } else {
s = Prefetch(file_reader_, offset, n + readahead_size_, for_compaction); s = Prefetch(opts, file_reader_, offset, n + readahead_size_,
for_compaction);
} }
if (!s.ok()) { if (!s.ok()) {
return false; return false;

@ -11,9 +11,11 @@
#include <atomic> #include <atomic>
#include <sstream> #include <sstream>
#include <string> #include <string>
#include "file/random_access_file_reader.h" #include "file/random_access_file_reader.h"
#include "port/port.h" #include "port/port.h"
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "rocksdb/options.h"
#include "util/aligned_buffer.h" #include "util/aligned_buffer.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
@ -59,8 +61,8 @@ class FilePrefetchBuffer {
// offset : the file offset to start reading from. // offset : the file offset to start reading from.
// n : the number of bytes to read. // n : the number of bytes to read.
// for_compaction : if prefetch is done for compaction read. // for_compaction : if prefetch is done for compaction read.
Status Prefetch(RandomAccessFileReader* reader, uint64_t offset, size_t n, Status Prefetch(const IOOptions& opts, RandomAccessFileReader* reader,
bool for_compaction = false); uint64_t offset, size_t n, bool for_compaction = false);
// Tries returning the data for a file raed from this buffer, if that data is // Tries returning the data for a file raed from this buffer, if that data is
// in the buffer. // in the buffer.
@ -72,8 +74,8 @@ class FilePrefetchBuffer {
// n : the number of bytes. // n : the number of bytes.
// result : output buffer to put the data into. // result : output buffer to put the data into.
// for_compaction : if cache read is done for compaction read. // for_compaction : if cache read is done for compaction read.
bool TryReadFromCache(uint64_t offset, size_t n, Slice* result, bool TryReadFromCache(const IOOptions& opts, uint64_t offset, size_t n,
bool for_compaction = false); Slice* result, bool for_compaction = false);
// The minimum `offset` ever passed to TryReadFromCache(). This will nly be // The minimum `offset` ever passed to TryReadFromCache(). This will nly be
// tracked if track_min_offset = true. // tracked if track_min_offset = true.

@ -166,10 +166,11 @@ IOStatus GenerateOneFileChecksum(FileSystem* fs, const std::string& file_path,
Slice slice; Slice slice;
uint64_t offset = 0; uint64_t offset = 0;
IOOptions opts;
while (size > 0) { while (size > 0) {
size_t bytes_to_read = size_t bytes_to_read =
static_cast<size_t>(std::min(uint64_t{readahead_size}, size)); static_cast<size_t>(std::min(uint64_t{readahead_size}, size));
if (!prefetch_buffer.TryReadFromCache(offset, bytes_to_read, &slice, if (!prefetch_buffer.TryReadFromCache(opts, offset, bytes_to_read, &slice,
false)) { false)) {
return IOStatus::Corruption("file read failed"); return IOStatus::Corruption("file read failed");
} }

@ -11,10 +11,12 @@
#include <atomic> #include <atomic>
#include <sstream> #include <sstream>
#include <string> #include <string>
#include "port/port.h" #include "port/port.h"
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "rocksdb/file_system.h" #include "rocksdb/file_system.h"
#include "rocksdb/listener.h" #include "rocksdb/listener.h"
#include "rocksdb/options.h"
#include "rocksdb/rate_limiter.h" #include "rocksdb/rate_limiter.h"
#include "util/aligned_buffer.h" #include "util/aligned_buffer.h"

@ -556,7 +556,19 @@ class TableFactory {
const TableReaderOptions& table_reader_options, const TableReaderOptions& table_reader_options,
std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size, std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
std::unique_ptr<TableReader>* table_reader, std::unique_ptr<TableReader>* table_reader,
bool prefetch_index_and_filter_in_cache = true) const = 0; bool prefetch_index_and_filter_in_cache = true) const {
ReadOptions ro;
return NewTableReader(ro, table_reader_options, std::move(file), file_size,
table_reader, prefetch_index_and_filter_in_cache);
}
// Overload of the above function that allows the caller to pass in a
// ReadOptions
virtual Status NewTableReader(
const ReadOptions& ro, const TableReaderOptions& table_reader_options,
std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
std::unique_ptr<TableReader>* table_reader,
bool prefetch_index_and_filter_in_cache) const = 0;
// Return a table builder to write to a file for this table type. // Return a table builder to write to a file for this table type.
// //

@ -42,12 +42,13 @@ extern const uint64_t kLegacyBlockBasedTableMagicNumber;
extern const uint64_t kCuckooTableMagicNumber; extern const uint64_t kCuckooTableMagicNumber;
Status AdaptiveTableFactory::NewTableReader( Status AdaptiveTableFactory::NewTableReader(
const TableReaderOptions& table_reader_options, const ReadOptions& ro, const TableReaderOptions& table_reader_options,
std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size, std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
std::unique_ptr<TableReader>* table, std::unique_ptr<TableReader>* table,
bool /*prefetch_index_and_filter_in_cache*/) const { bool prefetch_index_and_filter_in_cache) const {
Footer footer; Footer footer;
auto s = ReadFooterFromFile(file.get(), nullptr /* prefetch_buffer */, IOOptions opts;
auto s = ReadFooterFromFile(opts, file.get(), nullptr /* prefetch_buffer */,
file_size, &footer); file_size, &footer);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
@ -59,7 +60,8 @@ Status AdaptiveTableFactory::NewTableReader(
} else if (footer.table_magic_number() == kBlockBasedTableMagicNumber || } else if (footer.table_magic_number() == kBlockBasedTableMagicNumber ||
footer.table_magic_number() == kLegacyBlockBasedTableMagicNumber) { footer.table_magic_number() == kLegacyBlockBasedTableMagicNumber) {
return block_based_table_factory_->NewTableReader( return block_based_table_factory_->NewTableReader(
table_reader_options, std::move(file), file_size, table); ro, table_reader_options, std::move(file), file_size, table,
prefetch_index_and_filter_in_cache);
} else if (footer.table_magic_number() == kCuckooTableMagicNumber) { } else if (footer.table_magic_number() == kCuckooTableMagicNumber) {
return cuckoo_table_factory_->NewTableReader( return cuckoo_table_factory_->NewTableReader(
table_reader_options, std::move(file), file_size, table); table_reader_options, std::move(file), file_size, table);

@ -33,8 +33,9 @@ class AdaptiveTableFactory : public TableFactory {
const char* Name() const override { return "AdaptiveTableFactory"; } const char* Name() const override { return "AdaptiveTableFactory"; }
using TableFactory::NewTableReader;
Status NewTableReader( Status NewTableReader(
const TableReaderOptions& table_reader_options, const ReadOptions& ro, const TableReaderOptions& table_reader_options,
std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size, std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
std::unique_ptr<TableReader>* table, std::unique_ptr<TableReader>* table,
bool prefetch_index_and_filter_in_cache = true) const override; bool prefetch_index_and_filter_in_cache = true) const override;

@ -10,9 +10,9 @@
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
Status BinarySearchIndexReader::Create( Status BinarySearchIndexReader::Create(
const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, const BlockBasedTable* table, const ReadOptions& ro,
bool use_cache, bool prefetch, bool pin, FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch,
BlockCacheLookupContext* lookup_context, bool pin, BlockCacheLookupContext* lookup_context,
std::unique_ptr<IndexReader>* index_reader) { std::unique_ptr<IndexReader>* index_reader) {
assert(table != nullptr); assert(table != nullptr);
assert(table->get_rep()); assert(table->get_rep());
@ -22,7 +22,7 @@ Status BinarySearchIndexReader::Create(
CachableEntry<Block> index_block; CachableEntry<Block> index_block;
if (prefetch || !use_cache) { if (prefetch || !use_cache) {
const Status s = const Status s =
ReadIndexBlock(table, prefetch_buffer, ReadOptions(), use_cache, ReadIndexBlock(table, prefetch_buffer, ro, use_cache,
/*get_context=*/nullptr, lookup_context, &index_block); /*get_context=*/nullptr, lookup_context, &index_block);
if (!s.ok()) { if (!s.ok()) {
return s; return s;

@ -19,7 +19,7 @@ class BinarySearchIndexReader : public BlockBasedTable::IndexReaderCommon {
// `BinarySearchIndexReader`. // `BinarySearchIndexReader`.
// On success, index_reader will be populated; otherwise it will remain // On success, index_reader will be populated; otherwise it will remain
// unmodified. // unmodified.
static Status Create(const BlockBasedTable* table, static Status Create(const BlockBasedTable* table, const ReadOptions& ro,
FilePrefetchBuffer* prefetch_buffer, bool use_cache, FilePrefetchBuffer* prefetch_buffer, bool use_cache,
bool prefetch, bool pin, bool prefetch, bool pin,
BlockCacheLookupContext* lookup_context, BlockCacheLookupContext* lookup_context,

@ -171,18 +171,18 @@ BlockBasedFilterBlockReader::BlockBasedFilterBlockReader(
} }
std::unique_ptr<FilterBlockReader> BlockBasedFilterBlockReader::Create( std::unique_ptr<FilterBlockReader> BlockBasedFilterBlockReader::Create(
const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, const BlockBasedTable* table, const ReadOptions& ro,
bool use_cache, bool prefetch, bool pin, FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch,
BlockCacheLookupContext* lookup_context) { bool pin, BlockCacheLookupContext* lookup_context) {
assert(table); assert(table);
assert(table->get_rep()); assert(table->get_rep());
assert(!pin || prefetch); assert(!pin || prefetch);
CachableEntry<BlockContents> filter_block; CachableEntry<BlockContents> filter_block;
if (prefetch || !use_cache) { if (prefetch || !use_cache) {
const Status s = ReadFilterBlock(table, prefetch_buffer, ReadOptions(), const Status s = ReadFilterBlock(table, prefetch_buffer, ro, use_cache,
use_cache, nullptr /* get_context */, nullptr /* get_context */, lookup_context,
lookup_context, &filter_block); &filter_block);
if (!s.ok()) { if (!s.ok()) {
IGNORE_STATUS_IF_ERROR(s); IGNORE_STATUS_IF_ERROR(s);
return std::unique_ptr<FilterBlockReader>(); return std::unique_ptr<FilterBlockReader>();

@ -85,9 +85,9 @@ class BlockBasedFilterBlockReader
void operator=(const BlockBasedFilterBlockReader&) = delete; void operator=(const BlockBasedFilterBlockReader&) = delete;
static std::unique_ptr<FilterBlockReader> Create( static std::unique_ptr<FilterBlockReader> Create(
const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, const BlockBasedTable* table, const ReadOptions& ro,
bool use_cache, bool prefetch, bool pin, FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch,
BlockCacheLookupContext* lookup_context); bool pin, BlockCacheLookupContext* lookup_context);
bool IsBlockBased() override { return true; } bool IsBlockBased() override { return true; }

@ -412,12 +412,12 @@ BlockBasedTableFactory::BlockBasedTableFactory(
} }
Status BlockBasedTableFactory::NewTableReader( Status BlockBasedTableFactory::NewTableReader(
const TableReaderOptions& table_reader_options, const ReadOptions& ro, const TableReaderOptions& table_reader_options,
std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size, std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
std::unique_ptr<TableReader>* table_reader, std::unique_ptr<TableReader>* table_reader,
bool prefetch_index_and_filter_in_cache) const { bool prefetch_index_and_filter_in_cache) const {
return BlockBasedTable::Open( return BlockBasedTable::Open(
table_reader_options.ioptions, table_reader_options.env_options, ro, table_reader_options.ioptions, table_reader_options.env_options,
table_options_, table_reader_options.internal_comparator, std::move(file), table_options_, table_reader_options.internal_comparator, std::move(file),
file_size, table_reader, table_reader_options.prefix_extractor, file_size, table_reader, table_reader_options.prefix_extractor,
prefetch_index_and_filter_in_cache, table_reader_options.skip_filters, prefetch_index_and_filter_in_cache, table_reader_options.skip_filters,

@ -48,8 +48,9 @@ class BlockBasedTableFactory : public TableFactory {
const char* Name() const override { return kName.c_str(); } const char* Name() const override { return kName.c_str(); }
using TableFactory::NewTableReader;
Status NewTableReader( Status NewTableReader(
const TableReaderOptions& table_reader_options, const ReadOptions& ro, const TableReaderOptions& table_reader_options,
std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size, std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
std::unique_ptr<TableReader>* table_reader, std::unique_ptr<TableReader>* table_reader,
bool prefetch_index_and_filter_in_cache = true) const override; bool prefetch_index_and_filter_in_cache = true) const override;

@ -577,8 +577,8 @@ Slice BlockBasedTable::GetCacheKey(const char* cache_key_prefix,
} }
Status BlockBasedTable::Open( Status BlockBasedTable::Open(
const ImmutableCFOptions& ioptions, const EnvOptions& env_options, const ReadOptions& read_options, const ImmutableCFOptions& ioptions,
const BlockBasedTableOptions& table_options, const EnvOptions& env_options, const BlockBasedTableOptions& table_options,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size, std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
std::unique_ptr<TableReader>* table_reader, std::unique_ptr<TableReader>* table_reader,
@ -595,12 +595,19 @@ Status BlockBasedTable::Open(
Footer footer; Footer footer;
std::unique_ptr<FilePrefetchBuffer> prefetch_buffer; std::unique_ptr<FilePrefetchBuffer> prefetch_buffer;
// Only retain read_options.deadline. In future, we may retain more
// options. Specifically, w ignore verify_checksums and default to
// checksum verification anyway when creating the index and filter
// readers.
ReadOptions ro;
ro.deadline = read_options.deadline;
// prefetch both index and filters, down to all partitions // prefetch both index and filters, down to all partitions
const bool prefetch_all = prefetch_index_and_filter_in_cache || level == 0; const bool prefetch_all = prefetch_index_and_filter_in_cache || level == 0;
const bool preload_all = !table_options.cache_index_and_filter_blocks; const bool preload_all = !table_options.cache_index_and_filter_blocks;
if (!ioptions.allow_mmap_reads) { if (!ioptions.allow_mmap_reads) {
s = PrefetchTail(file.get(), file_size, force_direct_prefetch, s = PrefetchTail(ro, file.get(), file_size, force_direct_prefetch,
tail_prefetch_stats, prefetch_all, preload_all, tail_prefetch_stats, prefetch_all, preload_all,
&prefetch_buffer); &prefetch_buffer);
} else { } else {
@ -617,8 +624,12 @@ Status BlockBasedTable::Open(
// 5. [meta block: compression dictionary] // 5. [meta block: compression dictionary]
// 6. [meta block: index] // 6. [meta block: index]
// 7. [meta block: filter] // 7. [meta block: filter]
s = ReadFooterFromFile(file.get(), prefetch_buffer.get(), file_size, &footer, IOOptions opts;
kBlockBasedTableMagicNumber); s = PrepareIOFromReadOptions(ro, file->env(), opts);
if (s.ok()) {
s = ReadFooterFromFile(opts, file.get(), prefetch_buffer.get(), file_size,
&footer, kBlockBasedTableMagicNumber);
}
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
@ -664,7 +675,7 @@ Status BlockBasedTable::Open(
// Read metaindex // Read metaindex
std::unique_ptr<Block> metaindex; std::unique_ptr<Block> metaindex;
std::unique_ptr<InternalIterator> metaindex_iter; std::unique_ptr<InternalIterator> metaindex_iter;
s = new_table->ReadMetaIndexBlock(prefetch_buffer.get(), &metaindex, s = new_table->ReadMetaIndexBlock(ro, prefetch_buffer.get(), &metaindex,
&metaindex_iter); &metaindex_iter);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
@ -672,18 +683,19 @@ Status BlockBasedTable::Open(
// Populates table_properties and some fields that depend on it, // Populates table_properties and some fields that depend on it,
// such as index_type. // such as index_type.
s = new_table->ReadPropertiesBlock(prefetch_buffer.get(), s = new_table->ReadPropertiesBlock(ro, prefetch_buffer.get(),
metaindex_iter.get(), largest_seqno); metaindex_iter.get(), largest_seqno);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
s = new_table->ReadRangeDelBlock(prefetch_buffer.get(), metaindex_iter.get(), s = new_table->ReadRangeDelBlock(ro, prefetch_buffer.get(),
internal_comparator, &lookup_context); metaindex_iter.get(), internal_comparator,
&lookup_context);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
s = new_table->PrefetchIndexAndFilterBlocks( s = new_table->PrefetchIndexAndFilterBlocks(
prefetch_buffer.get(), metaindex_iter.get(), new_table.get(), ro, prefetch_buffer.get(), metaindex_iter.get(), new_table.get(),
prefetch_all, table_options, level, file_size, prefetch_all, table_options, level, file_size,
max_file_size_for_l0_meta_pin, &lookup_context); max_file_size_for_l0_meta_pin, &lookup_context);
@ -703,7 +715,7 @@ Status BlockBasedTable::Open(
} }
Status BlockBasedTable::PrefetchTail( Status BlockBasedTable::PrefetchTail(
RandomAccessFileReader* file, uint64_t file_size, const ReadOptions& ro, RandomAccessFileReader* file, uint64_t file_size,
bool force_direct_prefetch, TailPrefetchStats* tail_prefetch_stats, bool force_direct_prefetch, TailPrefetchStats* tail_prefetch_stats,
const bool prefetch_all, const bool preload_all, const bool prefetch_all, const bool preload_all,
std::unique_ptr<FilePrefetchBuffer>* prefetch_buffer) { std::unique_ptr<FilePrefetchBuffer>* prefetch_buffer) {
@ -742,15 +754,19 @@ Status BlockBasedTable::PrefetchTail(
} else { } else {
prefetch_buffer->reset(new FilePrefetchBuffer( prefetch_buffer->reset(new FilePrefetchBuffer(
nullptr, 0, 0, true /* enable */, true /* track_min_offset */)); nullptr, 0, 0, true /* enable */, true /* track_min_offset */));
s = (*prefetch_buffer)->Prefetch(file, prefetch_off, prefetch_len); IOOptions opts;
s = PrepareIOFromReadOptions(ro, file->env(), opts);
if (s.ok()) {
s = (*prefetch_buffer)->Prefetch(opts, file, prefetch_off, prefetch_len);
}
} }
return s; return s;
} }
Status BlockBasedTable::TryReadPropertiesWithGlobalSeqno( Status BlockBasedTable::TryReadPropertiesWithGlobalSeqno(
FilePrefetchBuffer* prefetch_buffer, const Slice& handle_value, const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer,
TableProperties** table_properties) { const Slice& handle_value, TableProperties** table_properties) {
assert(table_properties != nullptr); assert(table_properties != nullptr);
// If this is an external SST file ingested with write_global_seqno set to // If this is an external SST file ingested with write_global_seqno set to
// true, then we expect the checksum mismatch because checksum was written // true, then we expect the checksum mismatch because checksum was written
@ -760,7 +776,7 @@ Status BlockBasedTable::TryReadPropertiesWithGlobalSeqno(
// original value, i.e. 0, and verify the checksum again. // original value, i.e. 0, and verify the checksum again.
BlockHandle props_block_handle; BlockHandle props_block_handle;
CacheAllocationPtr tmp_buf; CacheAllocationPtr tmp_buf;
Status s = ReadProperties(handle_value, rep_->file.get(), prefetch_buffer, Status s = ReadProperties(ro, handle_value, rep_->file.get(), prefetch_buffer,
rep_->footer, rep_->ioptions, table_properties, rep_->footer, rep_->ioptions, table_properties,
false /* verify_checksum */, &props_block_handle, false /* verify_checksum */, &props_block_handle,
&tmp_buf, false /* compression_type_missing */, &tmp_buf, false /* compression_type_missing */,
@ -784,8 +800,8 @@ Status BlockBasedTable::TryReadPropertiesWithGlobalSeqno(
} }
Status BlockBasedTable::ReadPropertiesBlock( Status BlockBasedTable::ReadPropertiesBlock(
FilePrefetchBuffer* prefetch_buffer, InternalIterator* meta_iter, const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer,
const SequenceNumber largest_seqno) { InternalIterator* meta_iter, const SequenceNumber largest_seqno) {
bool found_properties_block = true; bool found_properties_block = true;
Status s; Status s;
s = SeekToPropertiesBlock(meta_iter, &found_properties_block); s = SeekToPropertiesBlock(meta_iter, &found_properties_block);
@ -799,16 +815,17 @@ Status BlockBasedTable::ReadPropertiesBlock(
TableProperties* table_properties = nullptr; TableProperties* table_properties = nullptr;
if (s.ok()) { if (s.ok()) {
s = ReadProperties( s = ReadProperties(
meta_iter->value(), rep_->file.get(), prefetch_buffer, rep_->footer, ro, meta_iter->value(), rep_->file.get(), prefetch_buffer,
rep_->ioptions, &table_properties, true /* verify_checksum */, rep_->footer, rep_->ioptions, &table_properties,
nullptr /* ret_block_handle */, nullptr /* ret_block_contents */, true /* verify_checksum */, nullptr /* ret_block_handle */,
nullptr /* ret_block_contents */,
false /* compression_type_missing */, nullptr /* memory_allocator */); false /* compression_type_missing */, nullptr /* memory_allocator */);
} }
IGNORE_STATUS_IF_ERROR(s); IGNORE_STATUS_IF_ERROR(s);
if (s.IsCorruption()) { if (s.IsCorruption()) {
s = TryReadPropertiesWithGlobalSeqno(prefetch_buffer, meta_iter->value(), s = TryReadPropertiesWithGlobalSeqno(
&table_properties); ro, prefetch_buffer, meta_iter->value(), &table_properties);
IGNORE_STATUS_IF_ERROR(s); IGNORE_STATUS_IF_ERROR(s);
} }
std::unique_ptr<TableProperties> props_guard; std::unique_ptr<TableProperties> props_guard;
@ -883,7 +900,8 @@ Status BlockBasedTable::ReadPropertiesBlock(
} }
Status BlockBasedTable::ReadRangeDelBlock( Status BlockBasedTable::ReadRangeDelBlock(
FilePrefetchBuffer* prefetch_buffer, InternalIterator* meta_iter, const ReadOptions& read_options, FilePrefetchBuffer* prefetch_buffer,
InternalIterator* meta_iter,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
BlockCacheLookupContext* lookup_context) { BlockCacheLookupContext* lookup_context) {
Status s; Status s;
@ -896,7 +914,6 @@ Status BlockBasedTable::ReadRangeDelBlock(
"Error when seeking to range delete tombstones block from file: %s", "Error when seeking to range delete tombstones block from file: %s",
s.ToString().c_str()); s.ToString().c_str());
} else if (found_range_del_block && !range_del_handle.IsNull()) { } else if (found_range_del_block && !range_del_handle.IsNull()) {
ReadOptions read_options;
std::unique_ptr<InternalIterator> iter(NewDataBlockIterator<DataBlockIter>( std::unique_ptr<InternalIterator> iter(NewDataBlockIterator<DataBlockIter>(
read_options, range_del_handle, read_options, range_del_handle,
/*input_iter=*/nullptr, BlockType::kRangeDeletion, /*input_iter=*/nullptr, BlockType::kRangeDeletion,
@ -919,8 +936,8 @@ Status BlockBasedTable::ReadRangeDelBlock(
} }
Status BlockBasedTable::PrefetchIndexAndFilterBlocks( Status BlockBasedTable::PrefetchIndexAndFilterBlocks(
FilePrefetchBuffer* prefetch_buffer, InternalIterator* meta_iter, const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer,
BlockBasedTable* new_table, bool prefetch_all, InternalIterator* meta_iter, BlockBasedTable* new_table, bool prefetch_all,
const BlockBasedTableOptions& table_options, const int level, const BlockBasedTableOptions& table_options, const int level,
size_t file_size, size_t max_file_size_for_l0_meta_pin, size_t file_size, size_t max_file_size_for_l0_meta_pin,
BlockCacheLookupContext* lookup_context) { BlockCacheLookupContext* lookup_context) {
@ -983,7 +1000,7 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks(
index_type == BlockBasedTableOptions::kTwoLevelIndexSearch); index_type == BlockBasedTableOptions::kTwoLevelIndexSearch);
std::unique_ptr<IndexReader> index_reader; std::unique_ptr<IndexReader> index_reader;
s = new_table->CreateIndexReader(prefetch_buffer, meta_iter, use_cache, s = new_table->CreateIndexReader(ro, prefetch_buffer, meta_iter, use_cache,
prefetch_index, pin_index, lookup_context, prefetch_index, pin_index, lookup_context,
&index_reader); &index_reader);
if (!s.ok()) { if (!s.ok()) {
@ -996,7 +1013,7 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks(
// are hence follow the configuration for pin and prefetch regardless of // are hence follow the configuration for pin and prefetch regardless of
// the value of cache_index_and_filter_blocks // the value of cache_index_and_filter_blocks
if (prefetch_all) { if (prefetch_all) {
rep_->index_reader->CacheDependencies(pin_all); rep_->index_reader->CacheDependencies(ro, pin_all);
} }
// prefetch the first level of filter // prefetch the first level of filter
@ -1013,12 +1030,12 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks(
if (rep_->filter_policy) { if (rep_->filter_policy) {
auto filter = new_table->CreateFilterBlockReader( auto filter = new_table->CreateFilterBlockReader(
prefetch_buffer, use_cache, prefetch_filter, pin_filter, ro, prefetch_buffer, use_cache, prefetch_filter, pin_filter,
lookup_context); lookup_context);
if (filter) { if (filter) {
// Refer to the comment above about paritioned indexes always being cached // Refer to the comment above about paritioned indexes always being cached
if (prefetch_all) { if (prefetch_all) {
filter->CacheDependencies(pin_all); filter->CacheDependencies(ro, pin_all);
} }
rep_->filter = std::move(filter); rep_->filter = std::move(filter);
@ -1027,7 +1044,7 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks(
if (!rep_->compression_dict_handle.IsNull()) { if (!rep_->compression_dict_handle.IsNull()) {
std::unique_ptr<UncompressionDictReader> uncompression_dict_reader; std::unique_ptr<UncompressionDictReader> uncompression_dict_reader;
s = UncompressionDictReader::Create(this, prefetch_buffer, use_cache, s = UncompressionDictReader::Create(this, ro, prefetch_buffer, use_cache,
prefetch_all, pin_all, lookup_context, prefetch_all, pin_all, lookup_context,
&uncompression_dict_reader); &uncompression_dict_reader);
if (!s.ok()) { if (!s.ok()) {
@ -1082,14 +1099,14 @@ size_t BlockBasedTable::ApproximateMemoryUsage() const {
// metaindex // metaindex
// block and its iterator. // block and its iterator.
Status BlockBasedTable::ReadMetaIndexBlock( Status BlockBasedTable::ReadMetaIndexBlock(
FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer,
std::unique_ptr<Block>* metaindex_block, std::unique_ptr<Block>* metaindex_block,
std::unique_ptr<InternalIterator>* iter) { std::unique_ptr<InternalIterator>* iter) {
// TODO(sanjay): Skip this if footer.metaindex_handle() size indicates // TODO(sanjay): Skip this if footer.metaindex_handle() size indicates
// it is an empty block. // it is an empty block.
std::unique_ptr<Block> metaindex; std::unique_ptr<Block> metaindex;
Status s = ReadBlockFromFile( Status s = ReadBlockFromFile(
rep_->file.get(), prefetch_buffer, rep_->footer, ReadOptions(), rep_->file.get(), prefetch_buffer, rep_->footer, ro,
rep_->footer.metaindex_handle(), &metaindex, rep_->ioptions, rep_->footer.metaindex_handle(), &metaindex, rep_->ioptions,
true /* decompress */, true /*maybe_compressed*/, BlockType::kMetaIndex, true /* decompress */, true /*maybe_compressed*/, BlockType::kMetaIndex,
UncompressionDict::GetEmptyDict(), rep_->persistent_cache_options, UncompressionDict::GetEmptyDict(), rep_->persistent_cache_options,
@ -1317,8 +1334,8 @@ Status BlockBasedTable::PutDataBlockToCache(
} }
std::unique_ptr<FilterBlockReader> BlockBasedTable::CreateFilterBlockReader( std::unique_ptr<FilterBlockReader> BlockBasedTable::CreateFilterBlockReader(
FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch, const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer, bool use_cache,
bool pin, BlockCacheLookupContext* lookup_context) { bool prefetch, bool pin, BlockCacheLookupContext* lookup_context) {
auto& rep = rep_; auto& rep = rep_;
auto filter_type = rep->filter_type; auto filter_type = rep->filter_type;
if (filter_type == Rep::FilterType::kNoFilter) { if (filter_type == Rep::FilterType::kNoFilter) {
@ -1330,14 +1347,14 @@ std::unique_ptr<FilterBlockReader> BlockBasedTable::CreateFilterBlockReader(
switch (filter_type) { switch (filter_type) {
case Rep::FilterType::kPartitionedFilter: case Rep::FilterType::kPartitionedFilter:
return PartitionedFilterBlockReader::Create( return PartitionedFilterBlockReader::Create(
this, prefetch_buffer, use_cache, prefetch, pin, lookup_context); this, ro, prefetch_buffer, use_cache, prefetch, pin, lookup_context);
case Rep::FilterType::kBlockFilter: case Rep::FilterType::kBlockFilter:
return BlockBasedFilterBlockReader::Create( return BlockBasedFilterBlockReader::Create(
this, prefetch_buffer, use_cache, prefetch, pin, lookup_context); this, ro, prefetch_buffer, use_cache, prefetch, pin, lookup_context);
case Rep::FilterType::kFullFilter: case Rep::FilterType::kFullFilter:
return FullFilterBlockReader::Create(this, prefetch_buffer, use_cache, return FullFilterBlockReader::Create(this, ro, prefetch_buffer, use_cache,
prefetch, pin, lookup_context); prefetch, pin, lookup_context);
default: default:
@ -2205,9 +2222,11 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
lookup_context.get_from_user_specified_snapshot = lookup_context.get_from_user_specified_snapshot =
read_options.snapshot != nullptr; read_options.snapshot != nullptr;
} }
TEST_SYNC_POINT("BlockBasedTable::Get:BeforeFilterMatch");
const bool may_match = const bool may_match =
FullFilterKeyMayMatch(read_options, filter, key, no_io, prefix_extractor, FullFilterKeyMayMatch(read_options, filter, key, no_io, prefix_extractor,
get_context, &lookup_context); get_context, &lookup_context);
TEST_SYNC_POINT("BlockBasedTable::Get:AfterFilterMatch");
if (!may_match) { if (!may_match) {
RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL); RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL);
PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, rep_->level); PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, rep_->level);
@ -2754,7 +2773,8 @@ Status BlockBasedTable::VerifyChecksum(const ReadOptions& read_options,
// Check Meta blocks // Check Meta blocks
std::unique_ptr<Block> metaindex; std::unique_ptr<Block> metaindex;
std::unique_ptr<InternalIterator> metaindex_iter; std::unique_ptr<InternalIterator> metaindex_iter;
s = ReadMetaIndexBlock(nullptr /* prefetch buffer */, &metaindex, ReadOptions ro;
s = ReadMetaIndexBlock(ro, nullptr /* prefetch buffer */, &metaindex,
&metaindex_iter); &metaindex_iter);
if (s.ok()) { if (s.ok()) {
s = VerifyChecksumInMetaBlocks(metaindex_iter.get()); s = VerifyChecksumInMetaBlocks(metaindex_iter.get());
@ -2878,7 +2898,8 @@ Status BlockBasedTable::VerifyChecksumInMetaBlocks(
s = block_fetcher.ReadBlockContents(); s = block_fetcher.ReadBlockContents();
if (s.IsCorruption() && meta_block_name == kPropertiesBlock) { if (s.IsCorruption() && meta_block_name == kPropertiesBlock) {
TableProperties* table_properties; TableProperties* table_properties;
s = TryReadPropertiesWithGlobalSeqno(nullptr /* prefetch_buffer */, ReadOptions ro;
s = TryReadPropertiesWithGlobalSeqno(ro, nullptr /* prefetch_buffer */,
index_iter->value(), index_iter->value(),
&table_properties); &table_properties);
delete table_properties; delete table_properties;
@ -2931,7 +2952,7 @@ bool BlockBasedTable::TEST_KeyInCache(const ReadOptions& options,
// 4. internal_comparator // 4. internal_comparator
// 5. index_type // 5. index_type
Status BlockBasedTable::CreateIndexReader( Status BlockBasedTable::CreateIndexReader(
FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer,
InternalIterator* preloaded_meta_index_iter, bool use_cache, bool prefetch, InternalIterator* preloaded_meta_index_iter, bool use_cache, bool prefetch,
bool pin, BlockCacheLookupContext* lookup_context, bool pin, BlockCacheLookupContext* lookup_context,
std::unique_ptr<IndexReader>* index_reader) { std::unique_ptr<IndexReader>* index_reader) {
@ -2943,16 +2964,16 @@ Status BlockBasedTable::CreateIndexReader(
switch (rep_->index_type) { switch (rep_->index_type) {
case BlockBasedTableOptions::kTwoLevelIndexSearch: { case BlockBasedTableOptions::kTwoLevelIndexSearch: {
return PartitionIndexReader::Create(this, prefetch_buffer, use_cache, return PartitionIndexReader::Create(this, ro, prefetch_buffer, use_cache,
prefetch, pin, lookup_context, prefetch, pin, lookup_context,
index_reader); index_reader);
} }
case BlockBasedTableOptions::kBinarySearch: case BlockBasedTableOptions::kBinarySearch:
FALLTHROUGH_INTENDED; FALLTHROUGH_INTENDED;
case BlockBasedTableOptions::kBinarySearchWithFirstKey: { case BlockBasedTableOptions::kBinarySearchWithFirstKey: {
return BinarySearchIndexReader::Create(this, prefetch_buffer, use_cache, return BinarySearchIndexReader::Create(this, ro, prefetch_buffer,
prefetch, pin, lookup_context, use_cache, prefetch, pin,
index_reader); lookup_context, index_reader);
} }
case BlockBasedTableOptions::kHashSearch: { case BlockBasedTableOptions::kHashSearch: {
std::unique_ptr<Block> metaindex_guard; std::unique_ptr<Block> metaindex_guard;
@ -2965,7 +2986,7 @@ Status BlockBasedTable::CreateIndexReader(
" search index."); " search index.");
should_fallback = true; should_fallback = true;
} else if (meta_index_iter == nullptr) { } else if (meta_index_iter == nullptr) {
auto s = ReadMetaIndexBlock(prefetch_buffer, &metaindex_guard, auto s = ReadMetaIndexBlock(ro, prefetch_buffer, &metaindex_guard,
&metaindex_iter_guard); &metaindex_iter_guard);
if (!s.ok()) { if (!s.ok()) {
// we simply fall back to binary search in case there is any // we simply fall back to binary search in case there is any
@ -2979,13 +3000,13 @@ Status BlockBasedTable::CreateIndexReader(
} }
if (should_fallback) { if (should_fallback) {
return BinarySearchIndexReader::Create(this, prefetch_buffer, use_cache, return BinarySearchIndexReader::Create(this, ro, prefetch_buffer,
prefetch, pin, lookup_context, use_cache, prefetch, pin,
index_reader); lookup_context, index_reader);
} else { } else {
return HashIndexReader::Create(this, prefetch_buffer, meta_index_iter, return HashIndexReader::Create(this, ro, prefetch_buffer,
use_cache, prefetch, pin, lookup_context, meta_index_iter, use_cache, prefetch,
index_reader); pin, lookup_context, index_reader);
} }
} }
default: { default: {
@ -3170,7 +3191,8 @@ Status BlockBasedTable::DumpTable(WritableFile* out_file) {
"--------------------------------------\n"); "--------------------------------------\n");
std::unique_ptr<Block> metaindex; std::unique_ptr<Block> metaindex;
std::unique_ptr<InternalIterator> metaindex_iter; std::unique_ptr<InternalIterator> metaindex_iter;
Status s = ReadMetaIndexBlock(nullptr /* prefetch_buffer */, &metaindex, ReadOptions ro;
Status s = ReadMetaIndexBlock(ro, nullptr /* prefetch_buffer */, &metaindex,
&metaindex_iter); &metaindex_iter);
if (s.ok()) { if (s.ok()) {
for (metaindex_iter->SeekToFirst(); metaindex_iter->Valid(); for (metaindex_iter->SeekToFirst(); metaindex_iter->Valid();

@ -87,7 +87,7 @@ class BlockBasedTable : public TableReader {
// are set. // are set.
// @param force_direct_prefetch if true, always prefetching to RocksDB // @param force_direct_prefetch if true, always prefetching to RocksDB
// buffer, rather than calling RandomAccessFile::Prefetch(). // buffer, rather than calling RandomAccessFile::Prefetch().
static Status Open(const ImmutableCFOptions& ioptions, static Status Open(const ReadOptions& ro, const ImmutableCFOptions& ioptions,
const EnvOptions& env_options, const EnvOptions& env_options,
const BlockBasedTableOptions& table_options, const BlockBasedTableOptions& table_options,
const InternalKeyComparator& internal_key_comparator, const InternalKeyComparator& internal_key_comparator,
@ -205,7 +205,7 @@ class BlockBasedTable : public TableReader {
virtual size_t ApproximateMemoryUsage() const = 0; virtual size_t ApproximateMemoryUsage() const = 0;
// Cache the dependencies of the index reader (e.g. the partitions // Cache the dependencies of the index reader (e.g. the partitions
// of a partitioned index). // of a partitioned index).
virtual void CacheDependencies(bool /* pin */) {} virtual void CacheDependencies(const ReadOptions& /*ro*/, bool /* pin */) {}
}; };
class IndexReaderCommon; class IndexReaderCommon;
@ -379,7 +379,8 @@ class BlockBasedTable : public TableReader {
// Optionally, user can pass a preloaded meta_index_iter for the index that // Optionally, user can pass a preloaded meta_index_iter for the index that
// need to access extra meta blocks for index construction. This parameter // need to access extra meta blocks for index construction. This parameter
// helps avoid re-reading meta index block if caller already created one. // helps avoid re-reading meta index block if caller already created one.
Status CreateIndexReader(FilePrefetchBuffer* prefetch_buffer, Status CreateIndexReader(const ReadOptions& ro,
FilePrefetchBuffer* prefetch_buffer,
InternalIterator* preloaded_meta_index_iter, InternalIterator* preloaded_meta_index_iter,
bool use_cache, bool prefetch, bool pin, bool use_cache, bool prefetch, bool pin,
BlockCacheLookupContext* lookup_context, BlockCacheLookupContext* lookup_context,
@ -401,28 +402,32 @@ class BlockBasedTable : public TableReader {
// If force_direct_prefetch is true, always prefetching to RocksDB // If force_direct_prefetch is true, always prefetching to RocksDB
// buffer, rather than calling RandomAccessFile::Prefetch(). // buffer, rather than calling RandomAccessFile::Prefetch().
static Status PrefetchTail( static Status PrefetchTail(
RandomAccessFileReader* file, uint64_t file_size, const ReadOptions& ro, RandomAccessFileReader* file, uint64_t file_size,
bool force_direct_prefetch, TailPrefetchStats* tail_prefetch_stats, bool force_direct_prefetch, TailPrefetchStats* tail_prefetch_stats,
const bool prefetch_all, const bool preload_all, const bool prefetch_all, const bool preload_all,
std::unique_ptr<FilePrefetchBuffer>* prefetch_buffer); std::unique_ptr<FilePrefetchBuffer>* prefetch_buffer);
Status ReadMetaIndexBlock(FilePrefetchBuffer* prefetch_buffer, Status ReadMetaIndexBlock(const ReadOptions& ro,
FilePrefetchBuffer* prefetch_buffer,
std::unique_ptr<Block>* metaindex_block, std::unique_ptr<Block>* metaindex_block,
std::unique_ptr<InternalIterator>* iter); std::unique_ptr<InternalIterator>* iter);
Status TryReadPropertiesWithGlobalSeqno(FilePrefetchBuffer* prefetch_buffer, Status TryReadPropertiesWithGlobalSeqno(const ReadOptions& ro,
FilePrefetchBuffer* prefetch_buffer,
const Slice& handle_value, const Slice& handle_value,
TableProperties** table_properties); TableProperties** table_properties);
Status ReadPropertiesBlock(FilePrefetchBuffer* prefetch_buffer, Status ReadPropertiesBlock(const ReadOptions& ro,
FilePrefetchBuffer* prefetch_buffer,
InternalIterator* meta_iter, InternalIterator* meta_iter,
const SequenceNumber largest_seqno); const SequenceNumber largest_seqno);
Status ReadRangeDelBlock(FilePrefetchBuffer* prefetch_buffer, Status ReadRangeDelBlock(const ReadOptions& ro,
FilePrefetchBuffer* prefetch_buffer,
InternalIterator* meta_iter, InternalIterator* meta_iter,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
BlockCacheLookupContext* lookup_context); BlockCacheLookupContext* lookup_context);
Status PrefetchIndexAndFilterBlocks( Status PrefetchIndexAndFilterBlocks(
FilePrefetchBuffer* prefetch_buffer, InternalIterator* meta_iter, const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer,
BlockBasedTable* new_table, bool prefetch_all, InternalIterator* meta_iter, BlockBasedTable* new_table,
const BlockBasedTableOptions& table_options, const int level, bool prefetch_all, const BlockBasedTableOptions& table_options,
size_t file_size, size_t max_file_size_for_l0_meta_pin, const int level, size_t file_size, size_t max_file_size_for_l0_meta_pin,
BlockCacheLookupContext* lookup_context); BlockCacheLookupContext* lookup_context);
static BlockType GetBlockTypeForMetaBlockByName(const Slice& meta_block_name); static BlockType GetBlockTypeForMetaBlockByName(const Slice& meta_block_name);
@ -433,8 +438,9 @@ class BlockBasedTable : public TableReader {
// Create the filter from the filter block. // Create the filter from the filter block.
std::unique_ptr<FilterBlockReader> CreateFilterBlockReader( std::unique_ptr<FilterBlockReader> CreateFilterBlockReader(
FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch, const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer,
bool pin, BlockCacheLookupContext* lookup_context); bool use_cache, bool prefetch, bool pin,
BlockCacheLookupContext* lookup_context);
static void SetupCacheKeyPrefix(Rep* rep); static void SetupCacheKeyPrefix(Rep* rep);

@ -90,7 +90,8 @@ class BlockBasedTableReaderTest
ASSERT_OK(env_->GetFileSize(Path(table_name), &file_size)); ASSERT_OK(env_->GetFileSize(Path(table_name), &file_size));
std::unique_ptr<TableReader> table_reader; std::unique_ptr<TableReader> table_reader;
ASSERT_OK(BlockBasedTable::Open(ioptions, EnvOptions(), ReadOptions ro;
ASSERT_OK(BlockBasedTable::Open(ro, ioptions, EnvOptions(),
table_factory_->table_options(), comparator, table_factory_->table_options(), comparator,
std::move(file), file_size, &table_reader)); std::move(file), file_size, &table_reader));

@ -153,7 +153,7 @@ class FilterBlockReader {
return error_msg; return error_msg;
} }
virtual void CacheDependencies(bool /*pin*/) {} virtual void CacheDependencies(const ReadOptions& /*ro*/, bool /*pin*/) {}
virtual bool RangeMayExist(const Slice* /*iterate_upper_bound*/, virtual bool RangeMayExist(const Slice* /*iterate_upper_bound*/,
const Slice& user_key, const Slice& user_key,

@ -119,18 +119,18 @@ bool FullFilterBlockReader::KeyMayMatch(
} }
std::unique_ptr<FilterBlockReader> FullFilterBlockReader::Create( std::unique_ptr<FilterBlockReader> FullFilterBlockReader::Create(
const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, const BlockBasedTable* table, const ReadOptions& ro,
bool use_cache, bool prefetch, bool pin, FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch,
BlockCacheLookupContext* lookup_context) { bool pin, BlockCacheLookupContext* lookup_context) {
assert(table); assert(table);
assert(table->get_rep()); assert(table->get_rep());
assert(!pin || prefetch); assert(!pin || prefetch);
CachableEntry<ParsedFullFilterBlock> filter_block; CachableEntry<ParsedFullFilterBlock> filter_block;
if (prefetch || !use_cache) { if (prefetch || !use_cache) {
const Status s = ReadFilterBlock(table, prefetch_buffer, ReadOptions(), const Status s = ReadFilterBlock(table, prefetch_buffer, ro, use_cache,
use_cache, nullptr /* get_context */, nullptr /* get_context */, lookup_context,
lookup_context, &filter_block); &filter_block);
if (!s.ok()) { if (!s.ok()) {
IGNORE_STATUS_IF_ERROR(s); IGNORE_STATUS_IF_ERROR(s);
return std::unique_ptr<FilterBlockReader>(); return std::unique_ptr<FilterBlockReader>();

@ -87,9 +87,9 @@ class FullFilterBlockReader
CachableEntry<ParsedFullFilterBlock>&& filter_block); CachableEntry<ParsedFullFilterBlock>&& filter_block);
static std::unique_ptr<FilterBlockReader> Create( static std::unique_ptr<FilterBlockReader> Create(
const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, const BlockBasedTable* table, const ReadOptions& ro,
bool use_cache, bool prefetch, bool pin, FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch,
BlockCacheLookupContext* lookup_context); bool pin, BlockCacheLookupContext* lookup_context);
bool IsBlockBased() override { return false; } bool IsBlockBased() override { return false; }

@ -13,6 +13,7 @@
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
Status HashIndexReader::Create(const BlockBasedTable* table, Status HashIndexReader::Create(const BlockBasedTable* table,
const ReadOptions& ro,
FilePrefetchBuffer* prefetch_buffer, FilePrefetchBuffer* prefetch_buffer,
InternalIterator* meta_index_iter, InternalIterator* meta_index_iter,
bool use_cache, bool prefetch, bool pin, bool use_cache, bool prefetch, bool pin,
@ -28,7 +29,7 @@ Status HashIndexReader::Create(const BlockBasedTable* table,
CachableEntry<Block> index_block; CachableEntry<Block> index_block;
if (prefetch || !use_cache) { if (prefetch || !use_cache) {
const Status s = const Status s =
ReadIndexBlock(table, prefetch_buffer, ReadOptions(), use_cache, ReadIndexBlock(table, prefetch_buffer, ro, use_cache,
/*get_context=*/nullptr, lookup_context, &index_block); /*get_context=*/nullptr, lookup_context, &index_block);
if (!s.ok()) { if (!s.ok()) {
return s; return s;

@ -15,7 +15,7 @@ namespace ROCKSDB_NAMESPACE {
// key. // key.
class HashIndexReader : public BlockBasedTable::IndexReaderCommon { class HashIndexReader : public BlockBasedTable::IndexReaderCommon {
public: public:
static Status Create(const BlockBasedTable* table, static Status Create(const BlockBasedTable* table, const ReadOptions& ro,
FilePrefetchBuffer* prefetch_buffer, FilePrefetchBuffer* prefetch_buffer,
InternalIterator* meta_index_iter, bool use_cache, InternalIterator* meta_index_iter, bool use_cache,
bool prefetch, bool pin, bool prefetch, bool pin,

@ -7,6 +7,7 @@
#include <utility> #include <utility>
#include "file/file_util.h"
#include "monitoring/perf_context_imp.h" #include "monitoring/perf_context_imp.h"
#include "port/malloc.h" #include "port/malloc.h"
#include "port/port.h" #include "port/port.h"
@ -149,18 +150,18 @@ PartitionedFilterBlockReader::PartitionedFilterBlockReader(
: FilterBlockReaderCommon(t, std::move(filter_block)) {} : FilterBlockReaderCommon(t, std::move(filter_block)) {}
std::unique_ptr<FilterBlockReader> PartitionedFilterBlockReader::Create( std::unique_ptr<FilterBlockReader> PartitionedFilterBlockReader::Create(
const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, const BlockBasedTable* table, const ReadOptions& ro,
bool use_cache, bool prefetch, bool pin, FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch,
BlockCacheLookupContext* lookup_context) { bool pin, BlockCacheLookupContext* lookup_context) {
assert(table); assert(table);
assert(table->get_rep()); assert(table->get_rep());
assert(!pin || prefetch); assert(!pin || prefetch);
CachableEntry<Block> filter_block; CachableEntry<Block> filter_block;
if (prefetch || !use_cache) { if (prefetch || !use_cache) {
const Status s = ReadFilterBlock(table, prefetch_buffer, ReadOptions(), const Status s = ReadFilterBlock(table, prefetch_buffer, ro, use_cache,
use_cache, nullptr /* get_context */, nullptr /* get_context */, lookup_context,
lookup_context, &filter_block); &filter_block);
if (!s.ok()) { if (!s.ok()) {
IGNORE_STATUS_IF_ERROR(s); IGNORE_STATUS_IF_ERROR(s);
return std::unique_ptr<FilterBlockReader>(); return std::unique_ptr<FilterBlockReader>();
@ -411,7 +412,8 @@ size_t PartitionedFilterBlockReader::ApproximateMemoryUsage() const {
} }
// TODO(myabandeh): merge this with the same function in IndexReader // TODO(myabandeh): merge this with the same function in IndexReader
void PartitionedFilterBlockReader::CacheDependencies(bool pin) { void PartitionedFilterBlockReader::CacheDependencies(const ReadOptions& ro,
bool pin) {
assert(table()); assert(table());
const BlockBasedTable::Rep* const rep = table()->get_rep(); const BlockBasedTable::Rep* const rep = table()->get_rep();
@ -457,11 +459,14 @@ void PartitionedFilterBlockReader::CacheDependencies(bool pin) {
std::unique_ptr<FilePrefetchBuffer> prefetch_buffer; std::unique_ptr<FilePrefetchBuffer> prefetch_buffer;
prefetch_buffer.reset(new FilePrefetchBuffer()); prefetch_buffer.reset(new FilePrefetchBuffer());
s = prefetch_buffer->Prefetch(rep->file.get(), prefetch_off, IOOptions opts;
static_cast<size_t>(prefetch_len)); s = PrepareIOFromReadOptions(ro, rep->file->env(), opts);
if (s.ok()) {
s = prefetch_buffer->Prefetch(opts, rep->file.get(), prefetch_off,
static_cast<size_t>(prefetch_len));
}
// After prefetch, read the partitions one by one // After prefetch, read the partitions one by one
ReadOptions read_options;
for (biter.SeekToFirst(); biter.Valid(); biter.Next()) { for (biter.SeekToFirst(); biter.Valid(); biter.Next()) {
handle = biter.value().handle; handle = biter.value().handle;
@ -469,9 +474,9 @@ void PartitionedFilterBlockReader::CacheDependencies(bool pin) {
// TODO: Support counter batch update for partitioned index and // TODO: Support counter batch update for partitioned index and
// filter blocks // filter blocks
s = table()->MaybeReadBlockAndLoadToCache( s = table()->MaybeReadBlockAndLoadToCache(
prefetch_buffer.get(), read_options, handle, prefetch_buffer.get(), ro, handle, UncompressionDict::GetEmptyDict(),
UncompressionDict::GetEmptyDict(), &block, BlockType::kFilter, &block, BlockType::kFilter, nullptr /* get_context */, &lookup_context,
nullptr /* get_context */, &lookup_context, nullptr /* contents */); nullptr /* contents */);
assert(s.ok() || block.GetValue() == nullptr); assert(s.ok() || block.GetValue() == nullptr);
if (s.ok() && block.GetValue() != nullptr) { if (s.ok() && block.GetValue() != nullptr) {

@ -71,9 +71,9 @@ class PartitionedFilterBlockReader : public FilterBlockReaderCommon<Block> {
CachableEntry<Block>&& filter_block); CachableEntry<Block>&& filter_block);
static std::unique_ptr<FilterBlockReader> Create( static std::unique_ptr<FilterBlockReader> Create(
const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, const BlockBasedTable* table, const ReadOptions& ro,
bool use_cache, bool prefetch, bool pin, FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch,
BlockCacheLookupContext* lookup_context); bool pin, BlockCacheLookupContext* lookup_context);
bool IsBlockBased() override { return false; } bool IsBlockBased() override { return false; }
bool KeyMayMatch(const Slice& key, const SliceTransform* prefix_extractor, bool KeyMayMatch(const Slice& key, const SliceTransform* prefix_extractor,
@ -130,7 +130,7 @@ class PartitionedFilterBlockReader : public FilterBlockReaderCommon<Block> {
uint64_t block_offset, BlockHandle filter_handle, uint64_t block_offset, BlockHandle filter_handle,
bool no_io, BlockCacheLookupContext* lookup_context, bool no_io, BlockCacheLookupContext* lookup_context,
FilterManyFunction filter_function) const; FilterManyFunction filter_function) const;
void CacheDependencies(bool pin) override; void CacheDependencies(const ReadOptions& ro, bool pin) override;
const InternalKeyComparator* internal_comparator() const; const InternalKeyComparator* internal_comparator() const;
bool index_key_includes_seq() const; bool index_key_includes_seq() const;

@ -7,13 +7,15 @@
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors. // found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "table/block_based/partitioned_index_reader.h" #include "table/block_based/partitioned_index_reader.h"
#include "file/file_util.h"
#include "table/block_based/partitioned_index_iterator.h" #include "table/block_based/partitioned_index_iterator.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
Status PartitionIndexReader::Create( Status PartitionIndexReader::Create(
const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, const BlockBasedTable* table, const ReadOptions& ro,
bool use_cache, bool prefetch, bool pin, FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch,
BlockCacheLookupContext* lookup_context, bool pin, BlockCacheLookupContext* lookup_context,
std::unique_ptr<IndexReader>* index_reader) { std::unique_ptr<IndexReader>* index_reader) {
assert(table != nullptr); assert(table != nullptr);
assert(table->get_rep()); assert(table->get_rep());
@ -23,7 +25,7 @@ Status PartitionIndexReader::Create(
CachableEntry<Block> index_block; CachableEntry<Block> index_block;
if (prefetch || !use_cache) { if (prefetch || !use_cache) {
const Status s = const Status s =
ReadIndexBlock(table, prefetch_buffer, ReadOptions(), use_cache, ReadIndexBlock(table, prefetch_buffer, ro, use_cache,
/*get_context=*/nullptr, lookup_context, &index_block); /*get_context=*/nullptr, lookup_context, &index_block);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
@ -75,6 +77,7 @@ InternalIteratorBase<IndexValue>* PartitionIndexReader::NewIterator(
} else { } else {
ReadOptions ro; ReadOptions ro;
ro.fill_cache = read_options.fill_cache; ro.fill_cache = read_options.fill_cache;
ro.deadline = read_options.deadline;
// We don't return pinned data from index blocks, so no need // We don't return pinned data from index blocks, so no need
// to set `block_contents_pinned`. // to set `block_contents_pinned`.
std::unique_ptr<InternalIteratorBase<IndexValue>> index_iter( std::unique_ptr<InternalIteratorBase<IndexValue>> index_iter(
@ -100,7 +103,7 @@ InternalIteratorBase<IndexValue>* PartitionIndexReader::NewIterator(
// the first level iter is always on heap and will attempt to delete it // the first level iter is always on heap and will attempt to delete it
// in its destructor. // in its destructor.
} }
void PartitionIndexReader::CacheDependencies(bool pin) { void PartitionIndexReader::CacheDependencies(const ReadOptions& ro, bool pin) {
// Before read partitions, prefetch them to avoid lots of IOs // Before read partitions, prefetch them to avoid lots of IOs
BlockCacheLookupContext lookup_context{TableReaderCaller::kPrefetch}; BlockCacheLookupContext lookup_context{TableReaderCaller::kPrefetch};
const BlockBasedTable::Rep* rep = table()->rep_; const BlockBasedTable::Rep* rep = table()->rep_;
@ -147,12 +150,15 @@ void PartitionIndexReader::CacheDependencies(bool pin) {
uint64_t prefetch_len = last_off - prefetch_off; uint64_t prefetch_len = last_off - prefetch_off;
std::unique_ptr<FilePrefetchBuffer> prefetch_buffer; std::unique_ptr<FilePrefetchBuffer> prefetch_buffer;
rep->CreateFilePrefetchBuffer(0, 0, &prefetch_buffer); rep->CreateFilePrefetchBuffer(0, 0, &prefetch_buffer);
s = prefetch_buffer->Prefetch(rep->file.get(), prefetch_off, IOOptions opts;
static_cast<size_t>(prefetch_len)); s = PrepareIOFromReadOptions(ro, rep->file->env(), opts);
if (s.ok()) {
s = prefetch_buffer->Prefetch(opts, rep->file.get(), prefetch_off,
static_cast<size_t>(prefetch_len));
}
// After prefetch, read the partitions one by one // After prefetch, read the partitions one by one
biter.SeekToFirst(); biter.SeekToFirst();
auto ro = ReadOptions();
for (; biter.Valid(); biter.Next()) { for (; biter.Valid(); biter.Next()) {
handle = biter.value().handle; handle = biter.value().handle;
CachableEntry<Block> block; CachableEntry<Block> block;

@ -17,7 +17,7 @@ class PartitionIndexReader : public BlockBasedTable::IndexReaderCommon {
// `PartitionIndexReader`. // `PartitionIndexReader`.
// On success, index_reader will be populated; otherwise it will remain // On success, index_reader will be populated; otherwise it will remain
// unmodified. // unmodified.
static Status Create(const BlockBasedTable* table, static Status Create(const BlockBasedTable* table, const ReadOptions& ro,
FilePrefetchBuffer* prefetch_buffer, bool use_cache, FilePrefetchBuffer* prefetch_buffer, bool use_cache,
bool prefetch, bool pin, bool prefetch, bool pin,
BlockCacheLookupContext* lookup_context, BlockCacheLookupContext* lookup_context,
@ -29,7 +29,7 @@ class PartitionIndexReader : public BlockBasedTable::IndexReaderCommon {
IndexBlockIter* iter, GetContext* get_context, IndexBlockIter* iter, GetContext* get_context,
BlockCacheLookupContext* lookup_context) override; BlockCacheLookupContext* lookup_context) override;
void CacheDependencies(bool pin) override; void CacheDependencies(const ReadOptions& ro, bool pin) override;
size_t ApproximateMemoryUsage() const override { size_t ApproximateMemoryUsage() const override {
size_t usage = ApproximateIndexBlockMemoryUsage(); size_t usage = ApproximateIndexBlockMemoryUsage();
#ifdef ROCKSDB_MALLOC_USABLE_SIZE #ifdef ROCKSDB_MALLOC_USABLE_SIZE

@ -12,9 +12,9 @@
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
Status UncompressionDictReader::Create( Status UncompressionDictReader::Create(
const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, const BlockBasedTable* table, const ReadOptions& ro,
bool use_cache, bool prefetch, bool pin, FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch,
BlockCacheLookupContext* lookup_context, bool pin, BlockCacheLookupContext* lookup_context,
std::unique_ptr<UncompressionDictReader>* uncompression_dict_reader) { std::unique_ptr<UncompressionDictReader>* uncompression_dict_reader) {
assert(table); assert(table);
assert(table->get_rep()); assert(table->get_rep());
@ -24,8 +24,8 @@ Status UncompressionDictReader::Create(
CachableEntry<UncompressionDict> uncompression_dict; CachableEntry<UncompressionDict> uncompression_dict;
if (prefetch || !use_cache) { if (prefetch || !use_cache) {
const Status s = ReadUncompressionDictionary( const Status s = ReadUncompressionDictionary(
table, prefetch_buffer, ReadOptions(), use_cache, table, prefetch_buffer, ro, use_cache, nullptr /* get_context */,
nullptr /* get_context */, lookup_context, &uncompression_dict); lookup_context, &uncompression_dict);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }

@ -25,9 +25,9 @@ struct UncompressionDict;
class UncompressionDictReader { class UncompressionDictReader {
public: public:
static Status Create( static Status Create(
const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, const BlockBasedTable* table, const ReadOptions& ro,
bool use_cache, bool prefetch, bool pin, FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch,
BlockCacheLookupContext* lookup_context, bool pin, BlockCacheLookupContext* lookup_context,
std::unique_ptr<UncompressionDictReader>* uncompression_dict_reader); std::unique_ptr<UncompressionDictReader>* uncompression_dict_reader);
Status GetOrReadUncompressionDictionary( Status GetOrReadUncompressionDictionary(

@ -58,16 +58,19 @@ inline bool BlockFetcher::TryGetUncompressBlockFromPersistentCache() {
} }
inline bool BlockFetcher::TryGetFromPrefetchBuffer() { inline bool BlockFetcher::TryGetFromPrefetchBuffer() {
if (prefetch_buffer_ != nullptr && if (prefetch_buffer_ != nullptr) {
prefetch_buffer_->TryReadFromCache( IOOptions opts;
handle_.offset(), block_size_with_trailer_, &slice_, Status s = PrepareIOFromReadOptions(read_options_, file_->env(), opts);
for_compaction_)) { if (s.ok() && prefetch_buffer_->TryReadFromCache(
CheckBlockChecksum(); opts, handle_.offset(), block_size_with_trailer_, &slice_,
if (!status_.ok()) { for_compaction_)) {
return true; CheckBlockChecksum();
if (!status_.ok()) {
return true;
}
got_from_prefetch_buffer_ = true;
used_buf_ = const_cast<char*>(slice_.data());
} }
got_from_prefetch_buffer_ = true;
used_buf_ = const_cast<char*>(slice_.data());
} }
return got_from_prefetch_buffer_; return got_from_prefetch_buffer_;
} }

@ -244,7 +244,8 @@ class BlockFetcherTest : public testing::Test {
ASSERT_OK(env_->GetFileSize(Path(table_name), &file_size)); ASSERT_OK(env_->GetFileSize(Path(table_name), &file_size));
std::unique_ptr<TableReader> table_reader; std::unique_ptr<TableReader> table_reader;
ASSERT_OK(BlockBasedTable::Open(ioptions, EnvOptions(), ReadOptions ro;
ASSERT_OK(BlockBasedTable::Open(ro, ioptions, EnvOptions(),
table_factory_.table_options(), comparator, table_factory_.table_options(), comparator,
std::move(file), file_size, &table_reader)); std::move(file), file_size, &table_reader));
@ -259,8 +260,9 @@ class BlockFetcherTest : public testing::Test {
void ReadFooter(RandomAccessFileReader* file, Footer* footer) { void ReadFooter(RandomAccessFileReader* file, Footer* footer) {
uint64_t file_size = 0; uint64_t file_size = 0;
ASSERT_OK(env_->GetFileSize(file->file_name(), &file_size)); ASSERT_OK(env_->GetFileSize(file->file_name(), &file_size));
ReadFooterFromFile(file, nullptr /* prefetch_buffer */, file_size, footer, IOOptions opts;
kBlockBasedTableMagicNumber); ReadFooterFromFile(opts, file, nullptr /* prefetch_buffer */, file_size,
footer, kBlockBasedTableMagicNumber);
} }
// NOTE: compression_type returns the compression type of the fetched block // NOTE: compression_type returns the compression type of the fetched block
@ -315,8 +317,9 @@ class BlockFetcherTest : public testing::Test {
NewTableReader(ioptions, foptions, comparator, table_name, &table); NewTableReader(ioptions, foptions, comparator, table_name, &table);
std::unique_ptr<BlockBasedTable::IndexReader> index_reader; std::unique_ptr<BlockBasedTable::IndexReader> index_reader;
ReadOptions ro;
ASSERT_OK(BinarySearchIndexReader::Create( ASSERT_OK(BinarySearchIndexReader::Create(
table.get(), nullptr /* prefetch_buffer */, false /* use_cache */, table.get(), ro, nullptr /* prefetch_buffer */, false /* use_cache */,
false /* prefetch */, false /* pin */, nullptr /* lookup_context */, false /* prefetch */, false /* pin */, nullptr /* lookup_context */,
&index_reader)); &index_reader));

@ -13,7 +13,7 @@
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
Status CuckooTableFactory::NewTableReader( Status CuckooTableFactory::NewTableReader(
const TableReaderOptions& table_reader_options, const ReadOptions& /*ro*/, const TableReaderOptions& table_reader_options,
std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size, std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
std::unique_ptr<TableReader>* table, std::unique_ptr<TableReader>* table,
bool /*prefetch_index_and_filter_in_cache*/) const { bool /*prefetch_index_and_filter_in_cache*/) const {

@ -58,8 +58,9 @@ class CuckooTableFactory : public TableFactory {
const char* Name() const override { return "CuckooTable"; } const char* Name() const override { return "CuckooTable"; }
using TableFactory::NewTableReader;
Status NewTableReader( Status NewTableReader(
const TableReaderOptions& table_reader_options, const ReadOptions& ro, const TableReaderOptions& table_reader_options,
std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size, std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
std::unique_ptr<TableReader>* table, std::unique_ptr<TableReader>* table,
bool prefetch_index_and_filter_in_cache = true) const override; bool prefetch_index_and_filter_in_cache = true) const override;

@ -281,7 +281,7 @@ std::string Footer::ToString() const {
return result; return result;
} }
Status ReadFooterFromFile(RandomAccessFileReader* file, Status ReadFooterFromFile(const IOOptions& opts, RandomAccessFileReader* file,
FilePrefetchBuffer* prefetch_buffer, FilePrefetchBuffer* prefetch_buffer,
uint64_t file_size, Footer* footer, uint64_t file_size, Footer* footer,
uint64_t enforce_table_magic_number) { uint64_t enforce_table_magic_number) {
@ -300,15 +300,20 @@ Status ReadFooterFromFile(RandomAccessFileReader* file,
? static_cast<size_t>(file_size - Footer::kMaxEncodedLength) ? static_cast<size_t>(file_size - Footer::kMaxEncodedLength)
: 0; : 0;
Status s; Status s;
// TODO: Need to pass appropriate deadline to TryReadFromCache(). Right now,
// there is no readahead for point lookups, so TryReadFromCache will fail if
// the required data is not in the prefetch buffer. Once deadline is enabled
// for iterator, TryReadFromCache might do a readahead. Revisit to see if we
// need to pass a timeout at that point
if (prefetch_buffer == nullptr || if (prefetch_buffer == nullptr ||
!prefetch_buffer->TryReadFromCache(read_offset, Footer::kMaxEncodedLength, !prefetch_buffer->TryReadFromCache(
&footer_input)) { IOOptions(), read_offset, Footer::kMaxEncodedLength, &footer_input)) {
if (file->use_direct_io()) { if (file->use_direct_io()) {
s = file->Read(IOOptions(), read_offset, Footer::kMaxEncodedLength, s = file->Read(opts, read_offset, Footer::kMaxEncodedLength,
&footer_input, nullptr, &internal_buf); &footer_input, nullptr, &internal_buf);
} else { } else {
footer_buf.reserve(Footer::kMaxEncodedLength); footer_buf.reserve(Footer::kMaxEncodedLength);
s = file->Read(IOOptions(), read_offset, Footer::kMaxEncodedLength, s = file->Read(opts, read_offset, Footer::kMaxEncodedLength,
&footer_input, &footer_buf[0], nullptr); &footer_input, &footer_buf[0], nullptr);
} }
if (!s.ok()) return s; if (!s.ok()) return s;

@ -215,7 +215,7 @@ class Footer {
// Read the footer from file // Read the footer from file
// If enforce_table_magic_number != 0, ReadFooterFromFile() will return // If enforce_table_magic_number != 0, ReadFooterFromFile() will return
// corruption if table_magic number is not equal to enforce_table_magic_number // corruption if table_magic number is not equal to enforce_table_magic_number
Status ReadFooterFromFile(RandomAccessFileReader* file, Status ReadFooterFromFile(const IOOptions& opts, RandomAccessFileReader* file,
FilePrefetchBuffer* prefetch_buffer, FilePrefetchBuffer* prefetch_buffer,
uint64_t file_size, Footer* footer, uint64_t file_size, Footer* footer,
uint64_t enforce_table_magic_number = 0); uint64_t enforce_table_magic_number = 0);

@ -193,7 +193,8 @@ bool NotifyCollectTableCollectorsOnFinish(
return all_succeeded; return all_succeeded;
} }
Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file, Status ReadProperties(const ReadOptions& read_options,
const Slice& handle_value, RandomAccessFileReader* file,
FilePrefetchBuffer* prefetch_buffer, const Footer& footer, FilePrefetchBuffer* prefetch_buffer, const Footer& footer,
const ImmutableCFOptions& ioptions, const ImmutableCFOptions& ioptions,
TableProperties** table_properties, bool verify_checksum, TableProperties** table_properties, bool verify_checksum,
@ -210,16 +211,16 @@ Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file,
} }
BlockContents block_contents; BlockContents block_contents;
ReadOptions read_options;
read_options.verify_checksums = verify_checksum;
Status s; Status s;
PersistentCacheOptions cache_options; PersistentCacheOptions cache_options;
ReadOptions ro = read_options;
BlockFetcher block_fetcher( ro.verify_checksums = verify_checksum;
file, prefetch_buffer, footer, read_options, handle, &block_contents,
ioptions, false /* decompress */, false /*maybe_compressed*/, BlockFetcher block_fetcher(file, prefetch_buffer, footer, ro, handle,
BlockType::kProperties, UncompressionDict::GetEmptyDict(), cache_options, &block_contents, ioptions, false /* decompress */,
memory_allocator); false /*maybe_compressed*/, BlockType::kProperties,
UncompressionDict::GetEmptyDict(), cache_options,
memory_allocator);
s = block_fetcher.ReadBlockContents(); s = block_fetcher.ReadBlockContents();
// property block is never compressed. Need to add uncompress logic if we are // property block is never compressed. Need to add uncompress logic if we are
// to compress it.. // to compress it..
@ -368,7 +369,8 @@ Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size,
FilePrefetchBuffer* prefetch_buffer) { FilePrefetchBuffer* prefetch_buffer) {
// -- Read metaindex block // -- Read metaindex block
Footer footer; Footer footer;
auto s = ReadFooterFromFile(file, prefetch_buffer, file_size, &footer, IOOptions opts;
auto s = ReadFooterFromFile(opts, file, prefetch_buffer, file_size, &footer,
table_magic_number); table_magic_number);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
@ -405,11 +407,11 @@ Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size,
TableProperties table_properties; TableProperties table_properties;
if (found_properties_block == true) { if (found_properties_block == true) {
s = ReadProperties(meta_iter->value(), file, prefetch_buffer, footer, s = ReadProperties(
ioptions, properties, false /* verify_checksum */, read_options, meta_iter->value(), file, prefetch_buffer, footer,
nullptr /* ret_block_hanel */, ioptions, properties, false /* verify_checksum */,
nullptr /* ret_block_contents */, nullptr /* ret_block_hanel */, nullptr /* ret_block_contents */,
compression_type_missing, memory_allocator); compression_type_missing, memory_allocator);
} else { } else {
s = Status::NotFound(); s = Status::NotFound();
} }
@ -438,8 +440,9 @@ Status FindMetaBlock(RandomAccessFileReader* file, uint64_t file_size,
bool /*compression_type_missing*/, bool /*compression_type_missing*/,
MemoryAllocator* memory_allocator) { MemoryAllocator* memory_allocator) {
Footer footer; Footer footer;
auto s = ReadFooterFromFile(file, nullptr /* prefetch_buffer */, file_size, IOOptions opts;
&footer, table_magic_number); auto s = ReadFooterFromFile(opts, file, nullptr /* prefetch_buffer */,
file_size, &footer, table_magic_number);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
@ -480,7 +483,8 @@ Status ReadMetaBlock(RandomAccessFileReader* file,
MemoryAllocator* memory_allocator) { MemoryAllocator* memory_allocator) {
Status status; Status status;
Footer footer; Footer footer;
status = ReadFooterFromFile(file, prefetch_buffer, file_size, &footer, IOOptions opts;
status = ReadFooterFromFile(opts, file, prefetch_buffer, file_size, &footer,
table_magic_number); table_magic_number);
if (!status.ok()) { if (!status.ok()) {
return status; return status;

@ -99,7 +99,8 @@ bool NotifyCollectTableCollectorsOnFinish(
// @returns a status to indicate if the operation succeeded. On success, // @returns a status to indicate if the operation succeeded. On success,
// *table_properties will point to a heap-allocated TableProperties // *table_properties will point to a heap-allocated TableProperties
// object, otherwise value of `table_properties` will not be modified. // object, otherwise value of `table_properties` will not be modified.
Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file, Status ReadProperties(const ReadOptions& ro, const Slice& handle_value,
RandomAccessFileReader* file,
FilePrefetchBuffer* prefetch_buffer, const Footer& footer, FilePrefetchBuffer* prefetch_buffer, const Footer& footer,
const ImmutableCFOptions& ioptions, const ImmutableCFOptions& ioptions,
TableProperties** table_properties, bool verify_checksum, TableProperties** table_properties, bool verify_checksum,

@ -61,6 +61,7 @@ std::shared_ptr<const TableProperties> MockTableReader::GetTableProperties()
MockTableFactory::MockTableFactory() : next_id_(1) {} MockTableFactory::MockTableFactory() : next_id_(1) {}
Status MockTableFactory::NewTableReader( Status MockTableFactory::NewTableReader(
const ReadOptions& /*ro*/,
const TableReaderOptions& /*table_reader_options*/, const TableReaderOptions& /*table_reader_options*/,
std::unique_ptr<RandomAccessFileReader>&& file, uint64_t /*file_size*/, std::unique_ptr<RandomAccessFileReader>&& file, uint64_t /*file_size*/,
std::unique_ptr<TableReader>* table_reader, std::unique_ptr<TableReader>* table_reader,

@ -176,8 +176,9 @@ class MockTableFactory : public TableFactory {
public: public:
MockTableFactory(); MockTableFactory();
const char* Name() const override { return "MockTable"; } const char* Name() const override { return "MockTable"; }
using TableFactory::NewTableReader;
Status NewTableReader( Status NewTableReader(
const TableReaderOptions& table_reader_options, const ReadOptions& ro, const TableReaderOptions& table_reader_options,
std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size, std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
std::unique_ptr<TableReader>* table_reader, std::unique_ptr<TableReader>* table_reader,
bool prefetch_index_and_filter_in_cache = true) const override; bool prefetch_index_and_filter_in_cache = true) const override;

@ -49,7 +49,7 @@ static std::unordered_map<std::string, OptionTypeInfo> plain_table_type_info = {
OptionTypeFlags::kNone, 0}}}; OptionTypeFlags::kNone, 0}}};
Status PlainTableFactory::NewTableReader( Status PlainTableFactory::NewTableReader(
const TableReaderOptions& table_reader_options, const ReadOptions& /*ro*/, const TableReaderOptions& table_reader_options,
std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size, std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
std::unique_ptr<TableReader>* table, std::unique_ptr<TableReader>* table,
bool /*prefetch_index_and_filter_in_cache*/) const { bool /*prefetch_index_and_filter_in_cache*/) const {

@ -35,7 +35,7 @@ class TableBuilder;
// 1. Data compression is not supported. // 1. Data compression is not supported.
// 2. Data is not checksumed. // 2. Data is not checksumed.
// it is not recommended to use this format on other type of file systems. // it is not recommended to use this format on other type of file systems.
// //
// PlainTable requires fixed length key, configured as a constructor // PlainTable requires fixed length key, configured as a constructor
// parameter of the factory class. Output file format: // parameter of the factory class. Output file format:
// +-------------+-----------------+ // +-------------+-----------------+
@ -160,7 +160,9 @@ class PlainTableFactory : public TableFactory {
: table_options_(_table_options) {} : table_options_(_table_options) {}
const char* Name() const override { return "PlainTable"; } const char* Name() const override { return "PlainTable"; }
Status NewTableReader(const TableReaderOptions& table_reader_options, using TableFactory::NewTableReader;
Status NewTableReader(const ReadOptions& ro,
const TableReaderOptions& table_reader_options,
std::unique_ptr<RandomAccessFileReader>&& file, std::unique_ptr<RandomAccessFileReader>&& file,
uint64_t file_size, std::unique_ptr<TableReader>* table, uint64_t file_size, std::unique_ptr<TableReader>* table,
bool prefetch_index_and_filter_in_cache) const override; bool prefetch_index_and_filter_in_cache) const override;

@ -103,10 +103,12 @@ Status SstFileDumper::GetTableReader(const std::string& file_path) {
? kSstDumpTailPrefetchSize ? kSstDumpTailPrefetchSize
: file_size; : file_size;
uint64_t prefetch_off = file_size - prefetch_size; uint64_t prefetch_off = file_size - prefetch_size;
prefetch_buffer.Prefetch(file_.get(), prefetch_off, IOOptions opts;
prefetch_buffer.Prefetch(opts, file_.get(), prefetch_off,
static_cast<size_t>(prefetch_size)); static_cast<size_t>(prefetch_size));
s = ReadFooterFromFile(file_.get(), &prefetch_buffer, file_size, &footer); s = ReadFooterFromFile(opts, file_.get(), &prefetch_buffer, file_size,
&footer);
} }
if (s.ok()) { if (s.ok()) {
magic_number = footer.table_magic_number(); magic_number = footer.table_magic_number();

@ -4321,8 +4321,10 @@ TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) {
uint64_t file_size = ss_rw.contents().size(); uint64_t file_size = ss_rw.contents().size();
Footer footer; Footer footer;
ASSERT_OK(ReadFooterFromFile(file, nullptr /* prefetch_buffer */, file_size, IOOptions opts;
&footer, kBlockBasedTableMagicNumber)); ASSERT_OK(ReadFooterFromFile(opts, file, nullptr /* prefetch_buffer */,
file_size, &footer,
kBlockBasedTableMagicNumber));
auto BlockFetchHelper = [&](const BlockHandle& handle, BlockType block_type, auto BlockFetchHelper = [&](const BlockHandle& handle, BlockType block_type,
BlockContents* contents) { BlockContents* contents) {
@ -4408,7 +4410,8 @@ TEST_P(BlockBasedTableTest, PropertiesMetaBlockLast) {
// read footer // read footer
Footer footer; Footer footer;
ASSERT_OK(ReadFooterFromFile(table_reader.get(), IOOptions opts;
ASSERT_OK(ReadFooterFromFile(opts, table_reader.get(),
nullptr /* prefetch_buffer */, table_size, nullptr /* prefetch_buffer */, table_size,
&footer, kBlockBasedTableMagicNumber)); &footer, kBlockBasedTableMagicNumber));
@ -4505,9 +4508,10 @@ TEST_F(BBTTailPrefetchTest, TestTailPrefetchStats) {
TEST_F(BBTTailPrefetchTest, FilePrefetchBufferMinOffset) { TEST_F(BBTTailPrefetchTest, FilePrefetchBufferMinOffset) {
TailPrefetchStats tpstats; TailPrefetchStats tpstats;
FilePrefetchBuffer buffer(nullptr, 0, 0, false, true); FilePrefetchBuffer buffer(nullptr, 0, 0, false, true);
buffer.TryReadFromCache(500, 10, nullptr); IOOptions opts;
buffer.TryReadFromCache(480, 10, nullptr); buffer.TryReadFromCache(opts, 500, 10, nullptr);
buffer.TryReadFromCache(490, 10, nullptr); buffer.TryReadFromCache(opts, 480, 10, nullptr);
buffer.TryReadFromCache(opts, 490, 10, nullptr);
ASSERT_EQ(480, buffer.min_offset_read()); ASSERT_EQ(480, buffer.min_offset_read());
} }

@ -170,7 +170,9 @@ class DummyTableFactory : public TableFactory {
const char* Name() const override { return "DummyTableFactory"; } const char* Name() const override { return "DummyTableFactory"; }
using TableFactory::NewTableReader;
Status NewTableReader( Status NewTableReader(
const ReadOptions& /*ro*/,
const TableReaderOptions& /*table_reader_options*/, const TableReaderOptions& /*table_reader_options*/,
std::unique_ptr<RandomAccessFileReader>&& /*file*/, std::unique_ptr<RandomAccessFileReader>&& /*file*/,
uint64_t /*file_size*/, std::unique_ptr<TableReader>* /*table_reader*/, uint64_t /*file_size*/, std::unique_ptr<TableReader>* /*table_reader*/,

Loading…
Cancel
Save