add block deviation option to terminate a block before it exceeds block_size

Summary: a new option block_size_deviation is added.

Test Plan: run db_test and db_bench

Reviewers: dhruba, haobo

Reviewed By: haobo

Differential Revision: https://reviews.facebook.net/D10821
main
heyongqiang 12 years ago
parent ef15b9d178
commit 4b29651206
  1. 3
      db/db_impl.cc
  2. 8
      include/leveldb/options.h
  3. 15
      table/block_builder.cc
  4. 3
      table/block_builder.h
  5. 21
      table/table_builder.cc
  6. 8
      util/options.cc

@ -138,6 +138,9 @@ Options SanitizeOptions(const std::string& dbname,
result.block_cache = NewLRUCache(8 << 20); result.block_cache = NewLRUCache(8 << 20);
} }
result.compression_per_level = src.compression_per_level; result.compression_per_level = src.compression_per_level;
if (result.block_size_deviation < 0 || result.block_size_deviation > 100) {
result.block_size_deviation = 0;
}
return result; return result;
} }

@ -433,6 +433,14 @@ struct Options {
// if not zero, dump leveldb.stats to LOG every stats_dump_period_sec // if not zero, dump leveldb.stats to LOG every stats_dump_period_sec
// Default: 3600 (1 hour) // Default: 3600 (1 hour)
unsigned int stats_dump_period_sec; unsigned int stats_dump_period_sec;
// This is used to close a block before it reaches the configured
// 'block_size'. If the percentage of free space in the current block is less
// than this specified number and adding a new record to the block will
// exceed the configured block size, then this block will be closed and the
// new record will be written to the next block.
// Default is 10.
int block_size_deviation;
}; };
// Options that control read operations // Options that control read operations

@ -60,6 +60,21 @@ size_t BlockBuilder::CurrentSizeEstimate() const {
sizeof(uint32_t)); // Restart array length sizeof(uint32_t)); // Restart array length
} }
size_t BlockBuilder::EstimateSizeAfterKV(const Slice& key, const Slice& value)
const {
size_t estimate = CurrentSizeEstimate();
estimate += key.size() + value.size();
if (counter_ >= options_->block_restart_interval) {
estimate += sizeof(uint32_t); // a new restart entry.
}
estimate += sizeof(int32_t); // varint for shared prefix length.
estimate += VarintLength(key.size()); // varint for key length.
estimate += VarintLength(value.size()); // varint for value length.
return estimate;
}
Slice BlockBuilder::Finish() { Slice BlockBuilder::Finish() {
// Append restart array // Append restart array
for (size_t i = 0; i < restarts_.size(); i++) { for (size_t i = 0; i < restarts_.size(); i++) {

@ -34,6 +34,9 @@ class BlockBuilder {
// we are building. // we are building.
size_t CurrentSizeEstimate() const; size_t CurrentSizeEstimate() const;
// Returns an estimated block size after appending key and value.
size_t EstimateSizeAfterKV(const Slice& key, const Slice& value) const;
// Return true iff no entries have been added since the last Reset() // Return true iff no entries have been added since the last Reset()
bool empty() const { bool empty() const {
return buffer_.empty(); return buffer_.empty();

@ -98,6 +98,22 @@ void TableBuilder::Add(const Slice& key, const Slice& value) {
assert(r->options.comparator->Compare(key, Slice(r->last_key)) > 0); assert(r->options.comparator->Compare(key, Slice(r->last_key)) > 0);
} }
const size_t curr_size = r->data_block.CurrentSizeEstimate();
const size_t estimated_size_after = r->data_block.EstimateSizeAfterKV(key,
value);
// Do flush if one of the below two conditions is true:
// 1) if the current estimated size already exceeds the block size,
// 2) block_size_deviation is set and the estimated size after appending
// the kv will exceed the block size and the current size is under the
// the deviation.
if (curr_size >= r->options.block_size ||
(estimated_size_after > r->options.block_size &&
r->options.block_size_deviation > 0 &&
(curr_size * 100) >
r->options.block_size * (100 - r->options.block_size_deviation))) {
Flush();
}
if (r->pending_index_entry) { if (r->pending_index_entry) {
assert(r->data_block.empty()); assert(r->data_block.empty());
r->options.comparator->FindShortestSeparator(&r->last_key, key); r->options.comparator->FindShortestSeparator(&r->last_key, key);
@ -114,11 +130,6 @@ void TableBuilder::Add(const Slice& key, const Slice& value) {
r->last_key.assign(key.data(), key.size()); r->last_key.assign(key.data(), key.size());
r->num_entries++; r->num_entries++;
r->data_block.Add(key, value); r->data_block.Add(key, value);
const size_t estimated_block_size = r->data_block.CurrentSizeEstimate();
if (estimated_block_size >= r->options.block_size) {
Flush();
}
} }
void TableBuilder::Flush() { void TableBuilder::Flush() {

@ -70,7 +70,8 @@ Options::Options()
allow_mmap_writes(true), allow_mmap_writes(true),
is_fd_close_on_exec(true), is_fd_close_on_exec(true),
skip_log_error_on_recovery(false), skip_log_error_on_recovery(false),
stats_dump_period_sec(3600) { stats_dump_period_sec(3600),
block_size_deviation (10) {
} }
void void
@ -191,10 +192,15 @@ Options::Dump(Logger* log) const
allow_mmap_writes); allow_mmap_writes);
Log(log," Options.is_fd_close_on_exec: %d", Log(log," Options.is_fd_close_on_exec: %d",
is_fd_close_on_exec); is_fd_close_on_exec);
<<<<<<< HEAD
Log(log," Options.skip_log_error_on_recovery: %d", Log(log," Options.skip_log_error_on_recovery: %d",
skip_log_error_on_recovery); skip_log_error_on_recovery);
Log(log," Options.stats_dump_period_sec: %d", Log(log," Options.stats_dump_period_sec: %d",
stats_dump_period_sec); stats_dump_period_sec);
=======
Log(log," Options.block_size_deviation: %d",
block_size_deviation);
>>>>>>> add block deviation option to terminate a block before it exceeds block_size
} // Options::Dump } // Options::Dump
// //

Loading…
Cancel
Save