From 0f0a24e2981ba428cd5971bb960c8f80498aaa0a Mon Sep 17 00:00:00 2001 From: Xing Jin Date: Wed, 31 Jul 2013 12:42:23 -0700 Subject: [PATCH] Make arena block size configurable Summary: Add an option for arena block size, default value 4096 bytes. Arena will allocate blocks with such size. I am not sure about passing parameter to skiplist in the new virtualized framework, though I talked to Jim a bit. So add Jim as reviewer. Test Plan: new unit test, I am running db_test. For passing paramter from configured option to Arena, I tried tests like: TEST(DBTest, Arena_Option) { std::string dbname = test::TmpDir() + "/db_arena_option_test"; DestroyDB(dbname, Options()); DB* db = nullptr; Options opts; opts.create_if_missing = true; opts.arena_block_size = 1000000; // tested 99, 999999 Status s = DB::Open(opts, dbname, &db); db->Put(WriteOptions(), "a", "123"); } and printed some debug info. The results look good. Any suggestion for such a unit-test? Reviewers: haobo, dhruba, emayanke, jpaton Reviewed By: dhruba CC: leveldb, zshao Differential Revision: https://reviews.facebook.net/D11799 --- db/corruption_test.cc | 1 + db/db_impl.cc | 18 ++++++---- db/memtable.cc | 12 +++---- db/memtable.h | 11 +++--- db/memtablelist.h | 1 - db/skiplist.h | 3 -- db/skiplist_test.cc | 14 ++++---- db/skiplistrep.h | 14 +++----- include/leveldb/arena.h | 38 ++++++++++++++++++++ include/leveldb/memtablerep.h | 7 ++-- include/leveldb/options.h | 7 ++++ util/{arena.cc => arena_impl.cc} | 27 +++++++++------ util/{arena.h => arena_impl.h} | 43 +++++++++++++++-------- util/arena_test.cc | 59 ++++++++++++++++++++++++++------ util/options.cc | 3 ++ 15 files changed, 181 insertions(+), 77 deletions(-) create mode 100644 include/leveldb/arena.h rename util/{arena.cc => arena_impl.cc} (74%) rename util/{arena.h => arena_impl.h} (55%) diff --git a/db/corruption_test.cc b/db/corruption_test.cc index 7d9b4b068..4ed04c0d1 100644 --- a/db/corruption_test.cc +++ b/db/corruption_test.cc @@ -57,6 +57,7 @@ class CorruptionTest { opt.env = &env_; opt.block_cache = tiny_cache_; opt.block_size_deviation = 0; + opt.arena_block_size = 4096; return DB::Open(opt, dbname_, &db_); } diff --git a/db/db_impl.cc b/db/db_impl.cc index dbc6f71d3..7bc46c2b4 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -128,6 +128,12 @@ Options SanitizeOptions(const std::string& dbname, ((size_t)64)<<30); ClipToRange(&result.block_size, 1<<10, 4<<20); + // if user sets arena_block_size, we trust user to use this value. Otherwise, + // calculate a proper value from writer_buffer_size; + if (result.arena_block_size <= 0) { + result.arena_block_size = result.write_buffer_size / 10; + } + result.min_write_buffer_number_to_merge = std::min( result.min_write_buffer_number_to_merge, result.max_write_buffer_number-1); if (result.info_log == nullptr) { @@ -164,8 +170,8 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname) shutting_down_(nullptr), bg_cv_(&mutex_), mem_rep_factory_(options_.memtable_factory), - mem_(new MemTable(internal_comparator_, - mem_rep_factory_, NumberLevels())), + mem_(new MemTable(internal_comparator_, mem_rep_factory_, + NumberLevels(), options_)), logfile_number_(0), tmp_batch_(), bg_compaction_scheduled_(0), @@ -696,8 +702,8 @@ Status DBImpl::RecoverLogFile(uint64_t log_number, WriteBatchInternal::SetContents(&batch, record); if (mem == nullptr) { - mem = new MemTable(internal_comparator_, - mem_rep_factory_, NumberLevels()); + mem = new MemTable(internal_comparator_, mem_rep_factory_, + NumberLevels(), options_); mem->Ref(); } status = WriteBatchInternal::InsertInto(&batch, mem, &options_); @@ -2545,8 +2551,8 @@ Status DBImpl::MakeRoomForWrite(bool force) { log_.reset(new log::Writer(std::move(lfile))); mem_->SetLogNumber(logfile_number_); imm_.Add(mem_); - mem_ = new MemTable(internal_comparator_, - mem_rep_factory_, NumberLevels()); + mem_ = new MemTable(internal_comparator_, mem_rep_factory_, + NumberLevels(), options_); mem_->Ref(); force = false; // Do not force another compaction if have room MaybeScheduleCompaction(); diff --git a/db/memtable.cc b/db/memtable.cc index 8ccf49df0..fb3f4f1f7 100644 --- a/db/memtable.cc +++ b/db/memtable.cc @@ -24,10 +24,12 @@ static Slice GetLengthPrefixedSlice(const char* data) { MemTable::MemTable(const InternalKeyComparator& cmp, std::shared_ptr table_factory, - int numlevel) + int numlevel, + const Options& options) : comparator_(cmp), refs_(0), - table_(table_factory->CreateMemTableRep(comparator_)), + arena_impl_(options.arena_block_size), + table_(table_factory->CreateMemTableRep(comparator_, &arena_impl_)), flush_in_progress_(false), flush_completed_(false), file_number_(0), @@ -40,9 +42,7 @@ MemTable::~MemTable() { } size_t MemTable::ApproximateMemoryUsage() { - // The first term is the amount of memory used by the memtable and - // the second term is the amount of memory used by the backing store - return arena_.MemoryUsage() + table_->ApproximateMemoryUsage(); + return arena_impl_.ApproximateMemoryUsage(); } int MemTable::KeyComparator::operator()(const char* aptr, const char* bptr) @@ -111,7 +111,7 @@ void MemTable::Add(SequenceNumber s, ValueType type, const size_t encoded_len = VarintLength(internal_key_size) + internal_key_size + VarintLength(val_size) + val_size; - char* buf = arena_.Allocate(encoded_len); + char* buf = arena_impl_.Allocate(encoded_len); char* p = EncodeVarint32(buf, internal_key_size); memcpy(p, key.data(), key_size); p += key_size; diff --git a/db/memtable.h b/db/memtable.h index 2ffe4b913..6a3c7fcfd 100644 --- a/db/memtable.h +++ b/db/memtable.h @@ -11,8 +11,8 @@ #include "db/dbformat.h" #include "db/skiplist.h" #include "db/version_set.h" -#include "util/arena.h" #include "leveldb/memtablerep.h" +#include "util/arena_impl.h" namespace leveldb { @@ -29,8 +29,11 @@ class MemTable { // MemTables are reference counted. The initial reference count // is zero and the caller must call Ref() at least once. - explicit MemTable(const InternalKeyComparator& comparator, - std::shared_ptr table_factory, int numlevel = 7); + explicit MemTable( + const InternalKeyComparator& comparator, + std::shared_ptr table_factory, + int numlevel = 7, + const Options& options = Options()); // Increase reference count. void Ref() { ++refs_; } @@ -101,7 +104,7 @@ class MemTable { KeyComparator comparator_; int refs_; - Arena arena_; + ArenaImpl arena_impl_; shared_ptr table_; // These are used to manage memtable flushes to storage diff --git a/db/memtablelist.h b/db/memtablelist.h index 40419e56f..d741a6630 100644 --- a/db/memtablelist.h +++ b/db/memtablelist.h @@ -8,7 +8,6 @@ #include "leveldb/db.h" #include "db/dbformat.h" #include "db/skiplist.h" -#include "util/arena.h" #include "memtable.h" namespace leveldb { diff --git a/db/skiplist.h b/db/skiplist.h index 1c7b4dd71..a3fe05dbb 100644 --- a/db/skiplist.h +++ b/db/skiplist.h @@ -31,13 +31,10 @@ #include #include #include "port/port.h" -#include "util/arena.h" #include "util/random.h" namespace leveldb { -class Arena; - template class SkipList { private: diff --git a/db/skiplist_test.cc b/db/skiplist_test.cc index fa8a21a31..3542183f1 100644 --- a/db/skiplist_test.cc +++ b/db/skiplist_test.cc @@ -5,7 +5,7 @@ #include "db/skiplist.h" #include #include "leveldb/env.h" -#include "util/arena.h" +#include "util/arena_impl.h" #include "util/hash.h" #include "util/random.h" #include "util/testharness.h" @@ -29,9 +29,9 @@ struct TestComparator { class SkipTest { }; TEST(SkipTest, Empty) { - Arena arena; + ArenaImpl arena_impl; TestComparator cmp; - SkipList list(cmp, &arena); + SkipList list(cmp, &arena_impl); ASSERT_TRUE(!list.Contains(10)); SkipList::Iterator iter(&list); @@ -49,9 +49,9 @@ TEST(SkipTest, InsertAndLookup) { const int R = 5000; Random rnd(1000); std::set keys; - Arena arena; + ArenaImpl arena_impl; TestComparator cmp; - SkipList list(cmp, &arena); + SkipList list(cmp, &arena_impl); for (int i = 0; i < N; i++) { Key key = rnd.Next() % R; if (keys.insert(key).second) { @@ -204,14 +204,14 @@ class ConcurrentTest { // Current state of the test State current_; - Arena arena_; + ArenaImpl arena_impl_; // SkipList is not protected by mu_. We just use a single writer // thread to modify it. SkipList list_; public: - ConcurrentTest() : list_(TestComparator(), &arena_) { } + ConcurrentTest() : list_(TestComparator(), &arena_impl_) { } // REQUIRES: External synchronization void WriteStep(Random* rnd) { diff --git a/db/skiplistrep.h b/db/skiplistrep.h index 0f7523b6e..d22768f4d 100644 --- a/db/skiplistrep.h +++ b/db/skiplistrep.h @@ -10,11 +10,11 @@ namespace leveldb { class Arena; class SkipListRep : public MemTableRep { - Arena arena_; SkipList skip_list_; public: - explicit SkipListRep(MemTableRep::KeyComparator& compare) - : skip_list_(compare, &arena_) { } + explicit SkipListRep(MemTableRep::KeyComparator& compare, Arena* arena) + : skip_list_(compare, arena) { +} // Insert key into the list. // REQUIRES: nothing that compares equal to key is currently in the list. @@ -27,10 +27,6 @@ public: return skip_list_.Contains(key); } - virtual size_t ApproximateMemoryUsage() { - return arena_.MemoryUsage(); - } - virtual ~SkipListRep() { } // Iteration over the contents of a skip list @@ -96,8 +92,8 @@ public: class SkipListFactory : public MemTableRepFactory { public: virtual std::shared_ptr CreateMemTableRep ( - MemTableRep::KeyComparator& compare) { - return std::shared_ptr(new SkipListRep(compare)); + MemTableRep::KeyComparator& compare, Arena* arena) { + return std::shared_ptr(new SkipListRep(compare, arena)); } }; diff --git a/include/leveldb/arena.h b/include/leveldb/arena.h new file mode 100644 index 000000000..6e3a1f00b --- /dev/null +++ b/include/leveldb/arena.h @@ -0,0 +1,38 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +// +// Arena class defines memory allocation methods. It's used by memtable and +// skiplist. + +#ifndef STORAGE_LEVELDB_INCLUDE_ARENA_H_ +#define STORAGE_LEVELDB_INCLUDE_ARENA_H_ + +namespace leveldb { + +class Arena { + public: + Arena() {}; + virtual ~Arena() {}; + + // Return a pointer to a newly allocated memory block of "bytes" bytes. + virtual char* Allocate(size_t bytes) = 0; + + // Allocate memory with the normal alignment guarantees provided by malloc. + virtual char* AllocateAligned(size_t bytes) = 0; + + // Returns an estimate of the total memory used by arena. + virtual const size_t ApproximateMemoryUsage() = 0; + + // Returns the total number of bytes in all blocks allocated so far. + virtual const size_t MemoryAllocatedBytes() = 0; + + private: + // No copying allowed + Arena(const Arena&); + void operator=(const Arena&); +}; + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_INCLUDE_ARENA_H_ diff --git a/include/leveldb/memtablerep.h b/include/leveldb/memtablerep.h index cb5a6ed35..bf769f543 100644 --- a/include/leveldb/memtablerep.h +++ b/include/leveldb/memtablerep.h @@ -13,6 +13,7 @@ #define STORAGE_LEVELDB_DB_TABLE_H_ #include +#include "leveldb/arena.h" namespace leveldb { @@ -35,10 +36,6 @@ class MemTableRep { // Returns true iff an entry that compares equal to key is in the collection. virtual bool Contains(const char* key) const = 0; - // Returns an estimate of the number of bytes of data in use by this - // data structure. - virtual size_t ApproximateMemoryUsage() = 0; - virtual ~MemTableRep() { } // Iteration over the contents of a skip collection @@ -83,7 +80,7 @@ class MemTableRepFactory { public: virtual ~MemTableRepFactory() { }; virtual std::shared_ptr CreateMemTableRep( - MemTableRep::KeyComparator&) = 0; + MemTableRep::KeyComparator&, Arena* arena) = 0; }; } diff --git a/include/leveldb/options.h b/include/leveldb/options.h index f754fd3bf..6252a1eb0 100644 --- a/include/leveldb/options.h +++ b/include/leveldb/options.h @@ -375,6 +375,13 @@ struct Options { // Number of shards used for table cache. int table_cache_numshardbits; + // size of one block in arena memory allocation. + // If <= 0, a proper value is automatically calculated (usually 1/10 of + // writer_buffer_size). + // + // Default: 0 + size_t arena_block_size; + // Create an Options object with default values for all fields. Options(); diff --git a/util/arena.cc b/util/arena_impl.cc similarity index 74% rename from util/arena.cc rename to util/arena_impl.cc index a339f4055..6d39a80d7 100644 --- a/util/arena.cc +++ b/util/arena_impl.cc @@ -2,27 +2,32 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. -#include "util/arena.h" -#include +#include "util/arena_impl.h" namespace leveldb { -static const int kBlockSize = 4096; +ArenaImpl::ArenaImpl(size_t block_size) { + if (block_size < kMinBlockSize) { + block_size_ = kMinBlockSize; + } else if (block_size > kMaxBlockSize) { + block_size_ = kMaxBlockSize; + } else { + block_size_ = block_size; + } -Arena::Arena() { blocks_memory_ = 0; alloc_ptr_ = nullptr; // First allocation will allocate a block alloc_bytes_remaining_ = 0; } -Arena::~Arena() { +ArenaImpl::~ArenaImpl() { for (size_t i = 0; i < blocks_.size(); i++) { delete[] blocks_[i]; } } -char* Arena::AllocateFallback(size_t bytes) { - if (bytes > kBlockSize / 4) { +char* ArenaImpl::AllocateFallback(size_t bytes) { + if (bytes > block_size_ / 4) { // Object is more than a quarter of our block size. Allocate it separately // to avoid wasting too much space in leftover bytes. char* result = AllocateNewBlock(bytes); @@ -30,8 +35,8 @@ char* Arena::AllocateFallback(size_t bytes) { } // We waste the remaining space in the current block. - alloc_ptr_ = AllocateNewBlock(kBlockSize); - alloc_bytes_remaining_ = kBlockSize; + alloc_ptr_ = AllocateNewBlock(block_size_); + alloc_bytes_remaining_ = block_size_; char* result = alloc_ptr_; alloc_ptr_ += bytes; @@ -39,7 +44,7 @@ char* Arena::AllocateFallback(size_t bytes) { return result; } -char* Arena::AllocateAligned(size_t bytes) { +char* ArenaImpl::AllocateAligned(size_t bytes) { const int align = sizeof(void*); // We'll align to pointer size assert((align & (align-1)) == 0); // Pointer size should be a power of 2 size_t current_mod = reinterpret_cast(alloc_ptr_) & (align-1); @@ -58,7 +63,7 @@ char* Arena::AllocateAligned(size_t bytes) { return result; } -char* Arena::AllocateNewBlock(size_t block_bytes) { +char* ArenaImpl::AllocateNewBlock(size_t block_bytes) { char* result = new char[block_bytes]; blocks_memory_ += block_bytes; blocks_.push_back(result); diff --git a/util/arena.h b/util/arena_impl.h similarity index 55% rename from util/arena.h rename to util/arena_impl.h index 8f7dde226..a5425e87a 100644 --- a/util/arena.h +++ b/util/arena_impl.h @@ -2,38 +2,53 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. -#ifndef STORAGE_LEVELDB_UTIL_ARENA_H_ -#define STORAGE_LEVELDB_UTIL_ARENA_H_ +// ArenaImpl is an implementation of Arena class. For a request of small size, +// it allocates a block with pre-defined block size. For a request of big +// size, it uses malloc to directly get the requested size. + +#ifndef STORAGE_LEVELDB_UTIL_ARENA_IMPL_H_ +#define STORAGE_LEVELDB_UTIL_ARENA_IMPL_H_ #include #include #include #include +#include "leveldb/arena.h" namespace leveldb { -class Arena { +class ArenaImpl : public Arena { public: - Arena(); - ~Arena(); + explicit ArenaImpl(size_t block_size = kMinBlockSize); + virtual ~ArenaImpl(); - // Return a pointer to a newly allocated memory block of "bytes" bytes. - char* Allocate(size_t bytes); + virtual char* Allocate(size_t bytes); - // Allocate memory with the normal alignment guarantees provided by malloc - char* AllocateAligned(size_t bytes); + virtual char* AllocateAligned(size_t bytes); // Returns an estimate of the total memory usage of data allocated // by the arena (including space allocated but not yet used for user // allocations). - size_t MemoryUsage() const { + // + // TODO: Do we need to exclude space allocated but not used? + virtual const size_t ApproximateMemoryUsage() { return blocks_memory_ + blocks_.capacity() * sizeof(char*); } + virtual const size_t MemoryAllocatedBytes() { + return blocks_memory_; + } + private: char* AllocateFallback(size_t bytes); char* AllocateNewBlock(size_t block_bytes); + static const size_t kMinBlockSize = 4096; + static const size_t kMaxBlockSize = 2 << 30; + + // Number of bytes allocated in one block + size_t block_size_; + // Allocation state char* alloc_ptr_; size_t alloc_bytes_remaining_; @@ -45,11 +60,11 @@ class Arena { size_t blocks_memory_; // No copying allowed - Arena(const Arena&); - void operator=(const Arena&); + ArenaImpl(const ArenaImpl&); + void operator=(const ArenaImpl&); }; -inline char* Arena::Allocate(size_t bytes) { +inline char* ArenaImpl::Allocate(size_t bytes) { // The semantics of what to return are a bit messy if we allow // 0-byte allocations, so we disallow them here (we don't need // them for our internal use). @@ -65,4 +80,4 @@ inline char* Arena::Allocate(size_t bytes) { } // namespace leveldb -#endif // STORAGE_LEVELDB_UTIL_ARENA_H_ +#endif // STORAGE_LEVELDB_UTIL_ARENA_IMPL_H_ diff --git a/util/arena_test.cc b/util/arena_test.cc index d5c33d75b..13c6e9391 100644 --- a/util/arena_test.cc +++ b/util/arena_test.cc @@ -2,22 +2,59 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. -#include "util/arena.h" - +#include "util/arena_impl.h" #include "util/random.h" #include "util/testharness.h" namespace leveldb { -class ArenaTest { }; +class ArenaImplTest { }; + +TEST(ArenaImplTest, Empty) { + ArenaImpl arena0; +} + +TEST(ArenaImplTest, MemoryAllocatedBytes) { + const int N = 17; + size_t req_sz; //requested size + size_t bsz = 8192; // block size + size_t expected_memory_allocated; -TEST(ArenaTest, Empty) { - Arena arena; + ArenaImpl arena_impl(bsz); + + // requested size > quarter of a block: + // allocate requested size separately + req_sz = 3001; + for (int i = 0; i < N; i++) { + arena_impl.Allocate(req_sz); + } + expected_memory_allocated = req_sz * N; + ASSERT_EQ(arena_impl.MemoryAllocatedBytes(), expected_memory_allocated); + + // requested size < quarter of a block: + // allocate a block with the default size, then try to use unused part + // of the block. So one new block will be allocated for the first + // Allocate(99) call. All the remaining calls won't lead to new allocation. + req_sz = 99; + for (int i = 0; i < N; i++) { + arena_impl.Allocate(req_sz); + } + expected_memory_allocated += bsz; + ASSERT_EQ(arena_impl.MemoryAllocatedBytes(), expected_memory_allocated); + + // requested size > quarter of a block: + // allocate requested size separately + req_sz = 99999999; + for (int i = 0; i < N; i++) { + arena_impl.Allocate(req_sz); + } + expected_memory_allocated += req_sz * N; + ASSERT_EQ(arena_impl.MemoryAllocatedBytes(), expected_memory_allocated); } -TEST(ArenaTest, Simple) { +TEST(ArenaImplTest, Simple) { std::vector > allocated; - Arena arena; + ArenaImpl arena_impl; const int N = 100000; size_t bytes = 0; Random rnd(301); @@ -35,9 +72,9 @@ TEST(ArenaTest, Simple) { } char* r; if (rnd.OneIn(10)) { - r = arena.AllocateAligned(s); + r = arena_impl.AllocateAligned(s); } else { - r = arena.Allocate(s); + r = arena_impl.Allocate(s); } for (unsigned int b = 0; b < s; b++) { @@ -46,9 +83,9 @@ TEST(ArenaTest, Simple) { } bytes += s; allocated.push_back(std::make_pair(s, r)); - ASSERT_GE(arena.MemoryUsage(), bytes); + ASSERT_GE(arena_impl.ApproximateMemoryUsage(), bytes); if (i > N/10) { - ASSERT_LE(arena.MemoryUsage(), bytes * 1.10); + ASSERT_LE(arena_impl.ApproximateMemoryUsage(), bytes * 1.10); } } for (unsigned int i = 0; i < allocated.size(); i++) { diff --git a/util/options.cc b/util/options.cc index 912c53923..99cd1b094 100644 --- a/util/options.cc +++ b/util/options.cc @@ -61,6 +61,7 @@ Options::Options() max_manifest_file_size(std::numeric_limits::max()), no_block_cache(false), table_cache_numshardbits(4), + arena_block_size(0), disable_auto_compactions(false), WAL_ttl_seconds(0), manifest_preallocation_size(4 * 1024 * 1024), @@ -174,6 +175,8 @@ Options::Dump(Logger* log) const no_block_cache); Log(log," Options.table_cache_numshardbits: %d", table_cache_numshardbits); + Log(log," Options.arena_block_size: %ld", + arena_block_size); Log(log," Options.delete_obsolete_files_period_micros: %ld", delete_obsolete_files_period_micros); Log(log," Options.max_background_compactions: %d",