Add blob cache option in the column family options (#10155)

Summary:
There is currently no caching mechanism for blobs, which is not ideal especially when the database resides on remote storage (where we cannot rely on the OS page cache). As part of this task, we would like to make it possible for the application to configure a blob cache.
This PR is a part of https://github.com/facebook/rocksdb/issues/10156

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10155

Reviewed By: ltamasi

Differential Revision: D37150819

Pulled By: gangliao

fbshipit-source-id: b807c7916ea5d411588128f8e22a49f171388fe2
main
Gang Liao 2 years ago committed by Facebook GitHub Bot
parent 1d2950b8dd
commit cba398df8a
  1. 5
      db/c.cc
  2. 1
      db/db_options_test.cc
  3. 10
      include/rocksdb/advanced_options.h
  4. 3
      include/rocksdb/c.h
  5. 2
      include/rocksdb/file_system.h
  6. 1
      include/rocksdb/options.h
  7. 13
      options/cf_options.cc
  8. 2
      options/cf_options.h
  9. 9
      options/options.cc
  10. 1
      options/options_helper.cc
  11. 9
      options/options_settable_test.cc
  12. 32
      options/options_test.cc
  13. 2
      table/block_based/block_based_table_reader.cc

@ -3048,6 +3048,11 @@ int rocksdb_options_get_blob_file_starting_level(rocksdb_options_t* opt) {
return opt->rep.blob_file_starting_level;
}
void rocksdb_options_set_blob_cache(rocksdb_options_t* opt,
rocksdb_cache_t* blob_cache) {
opt->rep.blob_cache = blob_cache->rep;
}
void rocksdb_options_set_num_levels(rocksdb_options_t* opt, int n) {
opt->rep.num_levels = n;
}

@ -220,6 +220,7 @@ TEST_F(DBOptionsTest, SetMutableTableOptions) {
ColumnFamilyHandle* cfh = dbfull()->DefaultColumnFamily();
Options c_opts = dbfull()->GetOptions(cfh);
const auto* c_bbto =
c_opts.table_factory->GetOptions<BlockBasedTableOptions>();
ASSERT_NE(c_bbto, nullptr);

@ -10,6 +10,7 @@
#include <memory>
#include "rocksdb/cache.h"
#include "rocksdb/compression_type.h"
#include "rocksdb/memtablerep.h"
#include "rocksdb/universal_compaction.h"
@ -227,7 +228,7 @@ enum class Temperature : uint8_t {
};
// The control option of how the cache tiers will be used. Currently rocksdb
// support block cahe (volatile tier), secondary cache (non-volatile tier).
// support block cache (volatile tier), secondary cache (non-volatile tier).
// In the future, we may add more caching layers.
enum class CacheTier : uint8_t {
kVolatileTier = 0,
@ -953,6 +954,13 @@ struct AdvancedColumnFamilyOptions {
// Dynamically changeable through the SetOptions() API
int blob_file_starting_level = 0;
// This feature is WORK IN PROGRESS
// If non-NULL use the specified cache for blobs.
// If NULL, rocksdb will not use a blob cache.
//
// Default: nullptr (disabled)
std::shared_ptr<Cache> blob_cache = nullptr;
// Create ColumnFamilyOptions with default values for all fields
AdvancedColumnFamilyOptions();
// Create ColumnFamilyOptions from Options

@ -1264,6 +1264,9 @@ extern ROCKSDB_LIBRARY_API void rocksdb_options_set_blob_file_starting_level(
extern ROCKSDB_LIBRARY_API int rocksdb_options_get_blob_file_starting_level(
rocksdb_options_t* opt);
extern ROCKSDB_LIBRARY_API void rocksdb_options_set_blob_cache(
rocksdb_options_t* opt, rocksdb_cache_t* blob_cache);
/* returns a pointer to a malloc()-ed, null terminated string */
extern ROCKSDB_LIBRARY_API char* rocksdb_options_statistics_get_string(
rocksdb_options_t* opt);

@ -762,7 +762,7 @@ struct FSReadRequest {
// returns fewer bytes if end of file is hit (or `status` is not OK).
size_t len;
// A buffer that MultiRead() can optionally place data in. It can
// A buffer that MultiRead() can optionally place data in. It can
// ignore this and allocate its own buffer.
// The lifecycle of scratch will be until IO is completed.
//

@ -1412,7 +1412,6 @@ struct Options : public DBOptions, public ColumnFamilyOptions {
Options* DisableExtraChecks();
};
//
// An application can issue a read request (via Get/Iterators) and specify
// if that read should process data that ALREADY resides on a specified cache
// level. For example, if an application specifies kBlockCacheTier then the

@ -732,6 +732,16 @@ static std::unordered_map<std::string, OptionTypeInfo>
OptionTypeInfo::AsCustomSharedPtr<SstPartitionerFactory>(
offsetof(struct ImmutableCFOptions, sst_partitioner_factory),
OptionVerificationType::kByName, OptionTypeFlags::kAllowNull)},
{"blob_cache",
{offsetof(struct ImmutableCFOptions, blob_cache), OptionType::kUnknown,
OptionVerificationType::kNormal,
(OptionTypeFlags::kCompareNever | OptionTypeFlags::kDontSerialize),
// Parses the input value as a Cache
[](const ConfigOptions& opts, const std::string&,
const std::string& value, void* addr) {
auto* cache = static_cast<std::shared_ptr<Cache>*>(addr);
return Cache::CreateFromString(opts, value, cache);
}}},
};
const std::string OptionsHelper::kCFOptionsName = "ColumnFamilyOptions";
@ -870,7 +880,8 @@ ImmutableCFOptions::ImmutableCFOptions(const ColumnFamilyOptions& cf_options)
cf_options.memtable_insert_with_hint_prefix_extractor),
cf_paths(cf_options.cf_paths),
compaction_thread_limiter(cf_options.compaction_thread_limiter),
sst_partitioner_factory(cf_options.sst_partitioner_factory) {}
sst_partitioner_factory(cf_options.sst_partitioner_factory),
blob_cache(cf_options.blob_cache) {}
ImmutableOptions::ImmutableOptions() : ImmutableOptions(Options()) {}

@ -78,6 +78,8 @@ struct ImmutableCFOptions {
std::shared_ptr<ConcurrentTaskLimiter> compaction_thread_limiter;
std::shared_ptr<SstPartitionerFactory> sst_partitioner_factory;
std::shared_ptr<Cache> blob_cache;
};
struct ImmutableOptions : public ImmutableDBOptions, public ImmutableCFOptions {

@ -101,7 +101,8 @@ AdvancedColumnFamilyOptions::AdvancedColumnFamilyOptions(const Options& options)
blob_garbage_collection_force_threshold(
options.blob_garbage_collection_force_threshold),
blob_compaction_readahead_size(options.blob_compaction_readahead_size),
blob_file_starting_level(options.blob_file_starting_level) {
blob_file_starting_level(options.blob_file_starting_level),
blob_cache(options.blob_cache) {
assert(memtable_factory.get() != nullptr);
if (max_bytes_for_level_multiplier_additional.size() <
static_cast<unsigned int>(num_levels)) {
@ -417,6 +418,12 @@ void ColumnFamilyOptions::Dump(Logger* log) const {
blob_compaction_readahead_size);
ROCKS_LOG_HEADER(log, " Options.blob_file_starting_level: %d",
blob_file_starting_level);
if (blob_cache) {
ROCKS_LOG_HEADER(log, " Options.blob_cache: %s",
blob_cache->Name());
ROCKS_LOG_HEADER(log, " blob_cache options: %s",
blob_cache->GetPrintableOptions().c_str());
}
} // ColumnFamilyOptions::Dump
void Options::Dump(Logger* log) const {

@ -303,6 +303,7 @@ void UpdateColumnFamilyOptions(const ImmutableCFOptions& ioptions,
cf_opts->cf_paths = ioptions.cf_paths;
cf_opts->compaction_thread_limiter = ioptions.compaction_thread_limiter;
cf_opts->sst_partitioner_factory = ioptions.sst_partitioner_factory;
cf_opts->blob_cache = ioptions.blob_cache;
// TODO(yhchiang): find some way to handle the following derived options
// * max_file_size

@ -377,7 +377,7 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) {
// test is not updated accordingly.
// After adding an option, we need to make sure it is settable by
// GetColumnFamilyOptionsFromString() and add the option to the input
// string passed to GetColumnFamilyOptionsFromString()in this test.
// string passed to GetColumnFamilyOptionsFromString() in this test.
// If it is a complicated type, you also need to add the field to
// kColumnFamilyOptionsExcluded, and maybe add customized verification
// for it.
@ -400,6 +400,8 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) {
{offsetof(struct ColumnFamilyOptions,
table_properties_collector_factories),
sizeof(ColumnFamilyOptions::TablePropertiesCollectorFactories)},
{offsetof(struct ColumnFamilyOptions, blob_cache),
sizeof(std::shared_ptr<Cache>)},
{offsetof(struct ColumnFamilyOptions, comparator), sizeof(Comparator*)},
{offsetof(struct ColumnFamilyOptions, merge_operator),
sizeof(std::shared_ptr<MergeOperator>)},
@ -523,9 +525,12 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) {
"blob_file_starting_level=1;"
"bottommost_temperature=kWarm;"
"compaction_options_fifo={max_table_files_size=3;allow_"
"compaction=false;age_for_warm=1;};",
"compaction=false;age_for_warm=1;};"
"blob_cache=1M;",
new_options));
ASSERT_NE(new_options->blob_cache.get(), nullptr);
ASSERT_EQ(unset_bytes_base,
NumUnsetBytes(new_options_ptr, sizeof(ColumnFamilyOptions),
kColumnFamilyOptionsExcluded));

@ -601,6 +601,22 @@ TEST_F(OptionsTest, GetColumnFamilyOptionsFromStringTest) {
ASSERT_TRUE(new_cf_opt.memtable_factory != nullptr);
ASSERT_EQ(std::string(new_cf_opt.memtable_factory->Name()), "SkipListFactory");
ASSERT_TRUE(new_cf_opt.memtable_factory->IsInstanceOf("SkipListFactory"));
// blob cache
ASSERT_OK(GetColumnFamilyOptionsFromString(
config_options, base_cf_opt,
"blob_cache={capacity=1M;num_shard_bits=4;"
"strict_capacity_limit=true;high_pri_pool_ratio=0.5;};",
&new_cf_opt));
ASSERT_NE(new_cf_opt.blob_cache, nullptr);
ASSERT_EQ(new_cf_opt.blob_cache->GetCapacity(), 1024UL * 1024UL);
ASSERT_EQ(static_cast<ShardedCache*>(new_cf_opt.blob_cache.get())
->GetNumShardBits(),
4);
ASSERT_EQ(new_cf_opt.blob_cache->HasStrictCapacityLimit(), true);
ASSERT_EQ(static_cast<LRUCache*>(new_cf_opt.blob_cache.get())
->GetHighPriPoolRatio(),
0.5);
}
TEST_F(OptionsTest, CompressionOptionsFromString) {
@ -2767,6 +2783,22 @@ TEST_F(OptionsOldApiTest, GetColumnFamilyOptionsFromStringTest) {
&new_cf_opt));
ASSERT_TRUE(new_cf_opt.memtable_factory != nullptr);
ASSERT_TRUE(new_cf_opt.memtable_factory->IsInstanceOf("SkipListFactory"));
// blob cache
ASSERT_OK(GetColumnFamilyOptionsFromString(
base_cf_opt,
"blob_cache={capacity=1M;num_shard_bits=4;"
"strict_capacity_limit=true;high_pri_pool_ratio=0.5;};",
&new_cf_opt));
ASSERT_NE(new_cf_opt.blob_cache, nullptr);
ASSERT_EQ(new_cf_opt.blob_cache->GetCapacity(), 1024UL * 1024UL);
ASSERT_EQ(static_cast<ShardedCache*>(new_cf_opt.blob_cache.get())
->GetNumShardBits(),
4);
ASSERT_EQ(new_cf_opt.blob_cache->HasStrictCapacityLimit(), true);
ASSERT_EQ(static_cast<LRUCache*>(new_cf_opt.blob_cache.get())
->GetHighPriPoolRatio(),
0.5);
}
TEST_F(OptionsTest, SliceTransformCreateFromString) {

@ -563,7 +563,7 @@ void BlockBasedTable::SetupBaseCacheKey(const TableProperties* properties,
// assert(!db_id.empty());
// Minimum block size is 5 bytes; therefore we can trim off two lower bits
// from offets. See GetCacheKey.
// from offsets. See GetCacheKey.
*out_base_cache_key = OffsetableCacheKey(db_id, db_session_id, file_num,
/*max_offset*/ file_size >> 2);
}

Loading…
Cancel
Save