Change options memtable_prefix_bloom_huge_page_tlb_size => memtable_huge_page_size and cover huge page to memtable too

Summary: Extend the option memtable_prefix_bloom_huge_page_tlb_size from just putting memtable bloom filter to huge page to memtable itself too.

Test Plan: Run all existing tests.

Reviewers: IslamAbdelRahman, yhchiang, andrewkr

Reviewed By: andrewkr

Subscribers: leveldb, andrewkr, dhruba

Differential Revision: https://reviews.facebook.net/D60513
main
sdong 9 years ago
parent 0ce258f9b3
commit e5b5f12b81
  1. 1
      HISTORY.md
  2. 6
      db/c.cc
  3. 8
      db/memtable.cc
  4. 2
      db/memtable.h
  5. 5
      db/prefix_test.cc
  6. 4
      include/rocksdb/c.h
  7. 11
      include/rocksdb/options.h
  8. 3
      tools/db_bench_tool.cc
  9. 2
      tools/db_bench_tool_test.cc
  10. 3
      tools/db_stress.cc
  11. 4
      util/mutable_cf_options.cc
  12. 7
      util/mutable_cf_options.h
  13. 10
      util/options.cc
  14. 8
      util/options_helper.cc
  15. 7
      util/options_helper.h
  16. 2
      util/options_settable_test.cc
  17. 4
      util/options_test.cc
  18. 2
      util/testutil.cc

@ -4,6 +4,7 @@
* options.memtable_prefix_bloom_bits changes to options.memtable_prefix_bloom_bits_ratio and deprecate options.memtable_prefix_bloom_probes * options.memtable_prefix_bloom_bits changes to options.memtable_prefix_bloom_bits_ratio and deprecate options.memtable_prefix_bloom_probes
* enum type CompressionType and PerfLevel changes from char to unsigned char. Value of all PerfLevel shift by one. * enum type CompressionType and PerfLevel changes from char to unsigned char. Value of all PerfLevel shift by one.
* Deprecate options.filter_deletes. * Deprecate options.filter_deletes.
* options.memtable_prefix_bloom_huge_page_tlb_size => memtable_huge_page_size. When it is set, RocksDB will try to allocate memory from huge page for memtable too, rather than just memtable bloom filter.
### New Features ### New Features
* Add avoid_flush_during_recovery option. * Add avoid_flush_during_recovery option.

@ -1797,9 +1797,9 @@ void rocksdb_options_set_memtable_prefix_bloom_size_ratio(
opt->rep.memtable_prefix_bloom_size_ratio = v; opt->rep.memtable_prefix_bloom_size_ratio = v;
} }
void rocksdb_options_set_memtable_prefix_bloom_huge_page_tlb_size( void rocksdb_options_set_memtable_huge_page_size(rocksdb_options_t* opt,
rocksdb_options_t* opt, size_t v) { size_t v) {
opt->rep.memtable_prefix_bloom_huge_page_tlb_size = v; opt->rep.memtable_huge_page_size = v;
} }
void rocksdb_options_set_hash_skip_list_rep( void rocksdb_options_set_hash_skip_list_rep(

@ -44,8 +44,7 @@ MemTableOptions::MemTableOptions(const ImmutableCFOptions& ioptions,
static_cast<double>(mutable_cf_options.write_buffer_size) * static_cast<double>(mutable_cf_options.write_buffer_size) *
mutable_cf_options.memtable_prefix_bloom_size_ratio) * mutable_cf_options.memtable_prefix_bloom_size_ratio) *
8u), 8u),
memtable_prefix_bloom_huge_page_tlb_size( memtable_huge_page_size(mutable_cf_options.memtable_huge_page_size),
mutable_cf_options.memtable_prefix_bloom_huge_page_tlb_size),
inplace_update_support(ioptions.inplace_update_support), inplace_update_support(ioptions.inplace_update_support),
inplace_update_num_locks(mutable_cf_options.inplace_update_num_locks), inplace_update_num_locks(mutable_cf_options.inplace_update_num_locks),
inplace_callback(ioptions.inplace_callback), inplace_callback(ioptions.inplace_callback),
@ -63,7 +62,8 @@ MemTable::MemTable(const InternalKeyComparator& cmp,
moptions_(ioptions, mutable_cf_options), moptions_(ioptions, mutable_cf_options),
refs_(0), refs_(0),
kArenaBlockSize(OptimizeBlockSize(moptions_.arena_block_size)), kArenaBlockSize(OptimizeBlockSize(moptions_.arena_block_size)),
arena_(moptions_.arena_block_size, 0), arena_(moptions_.arena_block_size,
mutable_cf_options.memtable_huge_page_size),
allocator_(&arena_, write_buffer_manager), allocator_(&arena_, write_buffer_manager),
table_(ioptions.memtable_factory->CreateMemTableRep( table_(ioptions.memtable_factory->CreateMemTableRep(
comparator_, &allocator_, ioptions.prefix_extractor, comparator_, &allocator_, ioptions.prefix_extractor,
@ -92,7 +92,7 @@ MemTable::MemTable(const InternalKeyComparator& cmp,
prefix_bloom_.reset(new DynamicBloom( prefix_bloom_.reset(new DynamicBloom(
&allocator_, moptions_.memtable_prefix_bloom_bits, &allocator_, moptions_.memtable_prefix_bloom_bits,
ioptions.bloom_locality, 6 /* hard coded 6 probes */, nullptr, ioptions.bloom_locality, 6 /* hard coded 6 probes */, nullptr,
moptions_.memtable_prefix_bloom_huge_page_tlb_size, ioptions.info_log)); moptions_.memtable_huge_page_size, ioptions.info_log));
} }
} }

@ -41,7 +41,7 @@ struct MemTableOptions {
size_t write_buffer_size; size_t write_buffer_size;
size_t arena_block_size; size_t arena_block_size;
uint32_t memtable_prefix_bloom_bits; uint32_t memtable_prefix_bloom_bits;
size_t memtable_prefix_bloom_huge_page_tlb_size; size_t memtable_huge_page_size;
bool inplace_update_support; bool inplace_update_support;
size_t inplace_update_num_locks; size_t inplace_update_num_locks;
UpdateStatus (*inplace_callback)(char* existing_value, UpdateStatus (*inplace_callback)(char* existing_value,

@ -44,7 +44,7 @@ DEFINE_int32(max_write_buffer_number, 2, "");
DEFINE_int32(min_write_buffer_number_to_merge, 1, ""); DEFINE_int32(min_write_buffer_number_to_merge, 1, "");
DEFINE_int32(skiplist_height, 4, ""); DEFINE_int32(skiplist_height, 4, "");
DEFINE_double(memtable_prefix_bloom_size_ratio, 0.1, ""); DEFINE_double(memtable_prefix_bloom_size_ratio, 0.1, "");
DEFINE_int32(memtable_prefix_bloom_huge_page_tlb_size, 2 * 1024 * 1024, ""); DEFINE_int32(memtable_huge_page_size, 2 * 1024 * 1024, "");
DEFINE_int32(value_size, 40, ""); DEFINE_int32(value_size, 40, "");
// Path to the database on file system // Path to the database on file system
@ -161,8 +161,7 @@ class PrefixTest : public testing::Test {
options.memtable_prefix_bloom_size_ratio = options.memtable_prefix_bloom_size_ratio =
FLAGS_memtable_prefix_bloom_size_ratio; FLAGS_memtable_prefix_bloom_size_ratio;
options.memtable_prefix_bloom_huge_page_tlb_size = options.memtable_huge_page_size = FLAGS_memtable_huge_page_size;
FLAGS_memtable_prefix_bloom_huge_page_tlb_size;
options.prefix_extractor.reset(NewFixedPrefixTransform(8)); options.prefix_extractor.reset(NewFixedPrefixTransform(8));
BlockBasedTableOptions bbto; BlockBasedTableOptions bbto;

@ -678,8 +678,8 @@ extern ROCKSDB_LIBRARY_API void rocksdb_options_set_memtable_prefix_bloom_bits(
rocksdb_options_t*, uint32_t); rocksdb_options_t*, uint32_t);
extern ROCKSDB_LIBRARY_API void extern ROCKSDB_LIBRARY_API void
rocksdb_options_set_memtable_prefix_bloom_probes(rocksdb_options_t*, uint32_t); rocksdb_options_set_memtable_prefix_bloom_probes(rocksdb_options_t*, uint32_t);
extern ROCKSDB_LIBRARY_API void extern ROCKSDB_LIBRARY_API void rocksdb_options_set_memtable_huge_page_size(
rocksdb_options_set_memtable_prefix_bloom_huge_page_tlb_size(rocksdb_options_t*, size_t); rocksdb_options_t*, size_t);
extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_successive_merges( extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_successive_merges(
rocksdb_options_t*, size_t); rocksdb_options_t*, size_t);

@ -746,14 +746,17 @@ struct ColumnFamilyOptions {
// Dynamically changeable through SetOptions() API // Dynamically changeable through SetOptions() API
double memtable_prefix_bloom_size_ratio; double memtable_prefix_bloom_size_ratio;
// Page size for huge page TLB for bloom in memtable. If <=0, not allocate // Page size for huge page for the arena used by the memtable. If <=0, it
// from huge page TLB but from malloc. // won't allocate from huge page but from malloc.
// Need to reserve huge pages for it to be allocated. For example: // Users are responsible to reserve huge pages for it to be allocated. For
// example:
// sysctl -w vm.nr_hugepages=20 // sysctl -w vm.nr_hugepages=20
// See linux doc Documentation/vm/hugetlbpage.txt // See linux doc Documentation/vm/hugetlbpage.txt
// If there isn't enough free huge page available, it will fall back to
// malloc.
// //
// Dynamically changeable through SetOptions() API // Dynamically changeable through SetOptions() API
size_t memtable_prefix_bloom_huge_page_tlb_size; size_t memtable_huge_page_size;
// Control locality of bloom filter probes to improve cache miss rate. // Control locality of bloom filter probes to improve cache miss rate.
// This option only applies to memtable prefix bloom and plaintable // This option only applies to memtable prefix bloom and plaintable

@ -402,6 +402,8 @@ DEFINE_int32(bloom_bits, -1, "Bloom filter bits per key. Negative means"
DEFINE_double(memtable_bloom_size_ratio, 0, DEFINE_double(memtable_bloom_size_ratio, 0,
"Ratio of memtable size used for bloom filter. 0 means no bloom " "Ratio of memtable size used for bloom filter. 0 means no bloom "
"filter."); "filter.");
DEFINE_bool(memtable_use_huge_page, false,
"Try to use huge page in memtables.");
DEFINE_bool(use_existing_db, false, "If true, do not destroy the existing" DEFINE_bool(use_existing_db, false, "If true, do not destroy the existing"
" database. If you set this flag and also specify a benchmark that" " database. If you set this flag and also specify a benchmark that"
@ -2464,6 +2466,7 @@ class Benchmark {
exit(1); exit(1);
} }
} }
options.memtable_huge_page_size = FLAGS_memtable_use_huge_page ? 2048 : 0;
options.memtable_prefix_bloom_size_ratio = FLAGS_memtable_bloom_size_ratio; options.memtable_prefix_bloom_size_ratio = FLAGS_memtable_bloom_size_ratio;
options.bloom_locality = FLAGS_bloom_locality; options.bloom_locality = FLAGS_bloom_locality;
options.max_file_opening_threads = FLAGS_file_opening_threads; options.max_file_opening_threads = FLAGS_file_opening_threads;

@ -229,7 +229,7 @@ const std::string options_file_content = R"OPTIONS_FILE(
max_bytes_for_level_base=104857600 max_bytes_for_level_base=104857600
bloom_locality=0 bloom_locality=0
target_file_size_base=10485760 target_file_size_base=10485760
memtable_prefix_bloom_huge_page_tlb_size=0 memtable_huge_page_size=0
max_successive_merges=1000 max_successive_merges=1000
max_sequential_skip_in_iterations=8 max_sequential_skip_in_iterations=8
arena_block_size=52428800 arena_block_size=52428800

@ -1045,8 +1045,7 @@ class StressTest {
}}, }},
{"memtable_prefix_bloom_bits", {"0", "8", "10"}}, {"memtable_prefix_bloom_bits", {"0", "8", "10"}},
{"memtable_prefix_bloom_probes", {"4", "5", "6"}}, {"memtable_prefix_bloom_probes", {"4", "5", "6"}},
{"memtable_prefix_bloom_huge_page_tlb_size", {"memtable_huge_page_size", {"0", ToString(2 * 1024 * 1024)}},
{"0", ToString(2 * 1024 * 1024)}},
{"max_successive_merges", {"0", "2", "4"}}, {"max_successive_merges", {"0", "2", "4"}},
{"inplace_update_num_locks", {"100", "200", "300"}}, {"inplace_update_num_locks", {"100", "200", "300"}},
// TODO(ljin): enable test for this option // TODO(ljin): enable test for this option

@ -72,8 +72,8 @@ void MutableCFOptions::Dump(Logger* log) const {
arena_block_size); arena_block_size);
Log(log, " memtable_prefix_bloom_ratio: %f", Log(log, " memtable_prefix_bloom_ratio: %f",
memtable_prefix_bloom_size_ratio); memtable_prefix_bloom_size_ratio);
Log(log, " memtable_prefix_bloom_huge_page_tlb_size: %" ROCKSDB_PRIszt, Log(log, " memtable_huge_page_size: %" ROCKSDB_PRIszt,
memtable_prefix_bloom_huge_page_tlb_size); memtable_huge_page_size);
Log(log, " max_successive_merges: %" ROCKSDB_PRIszt, Log(log, " max_successive_merges: %" ROCKSDB_PRIszt,
max_successive_merges); max_successive_merges);
Log(log, " disable_auto_compactions: %d", Log(log, " disable_auto_compactions: %d",

@ -19,8 +19,7 @@ struct MutableCFOptions {
arena_block_size(options.arena_block_size), arena_block_size(options.arena_block_size),
memtable_prefix_bloom_size_ratio( memtable_prefix_bloom_size_ratio(
options.memtable_prefix_bloom_size_ratio), options.memtable_prefix_bloom_size_ratio),
memtable_prefix_bloom_huge_page_tlb_size( memtable_huge_page_size(options.memtable_huge_page_size),
options.memtable_prefix_bloom_huge_page_tlb_size),
max_successive_merges(options.max_successive_merges), max_successive_merges(options.max_successive_merges),
inplace_update_num_locks(options.inplace_update_num_locks), inplace_update_num_locks(options.inplace_update_num_locks),
disable_auto_compactions(options.disable_auto_compactions), disable_auto_compactions(options.disable_auto_compactions),
@ -58,7 +57,7 @@ struct MutableCFOptions {
max_write_buffer_number(0), max_write_buffer_number(0),
arena_block_size(0), arena_block_size(0),
memtable_prefix_bloom_size_ratio(0), memtable_prefix_bloom_size_ratio(0),
memtable_prefix_bloom_huge_page_tlb_size(0), memtable_huge_page_size(0),
max_successive_merges(0), max_successive_merges(0),
inplace_update_num_locks(0), inplace_update_num_locks(0),
disable_auto_compactions(false), disable_auto_compactions(false),
@ -108,7 +107,7 @@ struct MutableCFOptions {
int max_write_buffer_number; int max_write_buffer_number;
size_t arena_block_size; size_t arena_block_size;
double memtable_prefix_bloom_size_ratio; double memtable_prefix_bloom_size_ratio;
size_t memtable_prefix_bloom_huge_page_tlb_size; size_t memtable_huge_page_size;
size_t max_successive_merges; size_t max_successive_merges;
size_t inplace_update_num_locks; size_t inplace_update_num_locks;

@ -121,7 +121,7 @@ ColumnFamilyOptions::ColumnFamilyOptions()
inplace_update_num_locks(10000), inplace_update_num_locks(10000),
inplace_callback(nullptr), inplace_callback(nullptr),
memtable_prefix_bloom_size_ratio(0.0), memtable_prefix_bloom_size_ratio(0.0),
memtable_prefix_bloom_huge_page_tlb_size(0), memtable_huge_page_size(0),
bloom_locality(0), bloom_locality(0),
max_successive_merges(0), max_successive_merges(0),
min_partial_merge_operands(2), min_partial_merge_operands(2),
@ -189,8 +189,7 @@ ColumnFamilyOptions::ColumnFamilyOptions(const Options& options)
inplace_callback(options.inplace_callback), inplace_callback(options.inplace_callback),
memtable_prefix_bloom_size_ratio( memtable_prefix_bloom_size_ratio(
options.memtable_prefix_bloom_size_ratio), options.memtable_prefix_bloom_size_ratio),
memtable_prefix_bloom_huge_page_tlb_size( memtable_huge_page_size(options.memtable_huge_page_size),
options.memtable_prefix_bloom_huge_page_tlb_size),
bloom_locality(options.bloom_locality), bloom_locality(options.bloom_locality),
max_successive_merges(options.max_successive_merges), max_successive_merges(options.max_successive_merges),
min_partial_merge_operands(options.min_partial_merge_operands), min_partial_merge_operands(options.min_partial_merge_operands),
@ -599,9 +598,8 @@ void ColumnFamilyOptions::Dump(Logger* log) const {
Header(log, " Options.memtable_prefix_bloom_size_ratio: %f", Header(log, " Options.memtable_prefix_bloom_size_ratio: %f",
memtable_prefix_bloom_size_ratio); memtable_prefix_bloom_size_ratio);
Header(log, Header(log, " Options.memtable_huge_page_size: %" ROCKSDB_PRIszt,
" Options.memtable_prefix_bloom_huge_page_tlb_size: %" ROCKSDB_PRIszt, memtable_huge_page_size);
memtable_prefix_bloom_huge_page_tlb_size);
Header(log, " Options.bloom_locality: %d", Header(log, " Options.bloom_locality: %d",
bloom_locality); bloom_locality);

@ -543,8 +543,9 @@ bool ParseMemtableOptions(const std::string& name, const std::string& value,
} else if (name == "memtable_prefix_bloom_probes") { } else if (name == "memtable_prefix_bloom_probes") {
// Deprecated // Deprecated
} else if (name == "memtable_prefix_bloom_huge_page_tlb_size") { } else if (name == "memtable_prefix_bloom_huge_page_tlb_size") {
new_options->memtable_prefix_bloom_huge_page_tlb_size = // Deprecated
ParseSizeT(value); } else if (name == "memtable_huge_page_size") {
new_options->memtable_huge_page_size = ParseSizeT(value);
} else if (name == "max_successive_merges") { } else if (name == "max_successive_merges") {
new_options->max_successive_merges = ParseSizeT(value); new_options->max_successive_merges = ParseSizeT(value);
} else if (name == "filter_deletes") { } else if (name == "filter_deletes") {
@ -1443,8 +1444,7 @@ ColumnFamilyOptions BuildColumnFamilyOptions(
cf_opts.arena_block_size = mutable_cf_options.arena_block_size; cf_opts.arena_block_size = mutable_cf_options.arena_block_size;
cf_opts.memtable_prefix_bloom_size_ratio = cf_opts.memtable_prefix_bloom_size_ratio =
mutable_cf_options.memtable_prefix_bloom_size_ratio; mutable_cf_options.memtable_prefix_bloom_size_ratio;
cf_opts.memtable_prefix_bloom_huge_page_tlb_size = cf_opts.memtable_huge_page_size = mutable_cf_options.memtable_huge_page_size;
mutable_cf_options.memtable_prefix_bloom_huge_page_tlb_size;
cf_opts.max_successive_merges = mutable_cf_options.max_successive_merges; cf_opts.max_successive_merges = mutable_cf_options.max_successive_merges;
cf_opts.inplace_update_num_locks = cf_opts.inplace_update_num_locks =
mutable_cf_options.inplace_update_num_locks; mutable_cf_options.inplace_update_num_locks;

@ -428,10 +428,11 @@ static std::unordered_map<std::string, OptionTypeInfo> cf_options_type_info = {
{"max_successive_merges", {"max_successive_merges",
{offsetof(struct ColumnFamilyOptions, max_successive_merges), {offsetof(struct ColumnFamilyOptions, max_successive_merges),
OptionType::kSizeT, OptionVerificationType::kNormal}}, OptionType::kSizeT, OptionVerificationType::kNormal}},
{"memtable_prefix_bloom_huge_page_tlb_size", {"memtable_huge_page_size",
{offsetof(struct ColumnFamilyOptions, {offsetof(struct ColumnFamilyOptions, memtable_huge_page_size),
memtable_prefix_bloom_huge_page_tlb_size),
OptionType::kSizeT, OptionVerificationType::kNormal}}, OptionType::kSizeT, OptionVerificationType::kNormal}},
{"memtable_prefix_bloom_huge_page_tlb_size",
{0, OptionType::kSizeT, OptionVerificationType::kDeprecated}},
{"write_buffer_size", {"write_buffer_size",
{offsetof(struct ColumnFamilyOptions, write_buffer_size), {offsetof(struct ColumnFamilyOptions, write_buffer_size),
OptionType::kSizeT, OptionVerificationType::kNormal}}, OptionType::kSizeT, OptionVerificationType::kNormal}},

@ -391,7 +391,7 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) {
"max_bytes_for_level_base=986;" "max_bytes_for_level_base=986;"
"bloom_locality=8016;" "bloom_locality=8016;"
"target_file_size_base=4294976376;" "target_file_size_base=4294976376;"
"memtable_prefix_bloom_huge_page_tlb_size=2557;" "memtable_huge_page_size=2557;"
"max_successive_merges=5497;" "max_successive_merges=5497;"
"max_sequential_skip_in_iterations=4294971408;" "max_sequential_skip_in_iterations=4294971408;"
"arena_block_size=1893;" "arena_block_size=1893;"

@ -86,7 +86,7 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) {
{"compaction_measure_io_stats", "false"}, {"compaction_measure_io_stats", "false"},
{"inplace_update_num_locks", "25"}, {"inplace_update_num_locks", "25"},
{"memtable_prefix_bloom_size_ratio", "0.26"}, {"memtable_prefix_bloom_size_ratio", "0.26"},
{"memtable_prefix_bloom_huge_page_tlb_size", "28"}, {"memtable_huge_page_size", "28"},
{"bloom_locality", "29"}, {"bloom_locality", "29"},
{"max_successive_merges", "30"}, {"max_successive_merges", "30"},
{"min_partial_merge_operands", "31"}, {"min_partial_merge_operands", "31"},
@ -185,7 +185,7 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) {
ASSERT_EQ(new_cf_opt.inplace_update_support, true); ASSERT_EQ(new_cf_opt.inplace_update_support, true);
ASSERT_EQ(new_cf_opt.inplace_update_num_locks, 25U); ASSERT_EQ(new_cf_opt.inplace_update_num_locks, 25U);
ASSERT_EQ(new_cf_opt.memtable_prefix_bloom_size_ratio, 0.26); ASSERT_EQ(new_cf_opt.memtable_prefix_bloom_size_ratio, 0.26);
ASSERT_EQ(new_cf_opt.memtable_prefix_bloom_huge_page_tlb_size, 28U); ASSERT_EQ(new_cf_opt.memtable_huge_page_size, 28U);
ASSERT_EQ(new_cf_opt.bloom_locality, 29U); ASSERT_EQ(new_cf_opt.bloom_locality, 29U);
ASSERT_EQ(new_cf_opt.max_successive_merges, 30U); ASSERT_EQ(new_cf_opt.max_successive_merges, 30U);
ASSERT_EQ(new_cf_opt.min_partial_merge_operands, 31U); ASSERT_EQ(new_cf_opt.min_partial_merge_operands, 31U);

@ -328,7 +328,7 @@ void RandomInitCFOptions(ColumnFamilyOptions* cf_opt, Random* rnd) {
cf_opt->arena_block_size = rnd->Uniform(10000); cf_opt->arena_block_size = rnd->Uniform(10000);
cf_opt->inplace_update_num_locks = rnd->Uniform(10000); cf_opt->inplace_update_num_locks = rnd->Uniform(10000);
cf_opt->max_successive_merges = rnd->Uniform(10000); cf_opt->max_successive_merges = rnd->Uniform(10000);
cf_opt->memtable_prefix_bloom_huge_page_tlb_size = rnd->Uniform(10000); cf_opt->memtable_huge_page_size = rnd->Uniform(10000);
cf_opt->write_buffer_size = rnd->Uniform(10000); cf_opt->write_buffer_size = rnd->Uniform(10000);
// uint32_t options // uint32_t options

Loading…
Cancel
Save