|
|
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
//
|
|
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
|
|
|
|
#include "util/arena.h"
|
|
|
|
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
|
|
|
|
#include <malloc.h>
|
|
|
|
#endif
|
|
|
|
#ifndef OS_WIN
|
|
|
|
#include <sys/mman.h>
|
|
|
|
#endif
|
|
|
|
#include "port/port.h"
|
|
|
|
#include <algorithm>
|
|
|
|
#include "rocksdb/env.h"
|
|
|
|
|
|
|
|
namespace rocksdb {
|
|
|
|
|
|
|
|
// MSVC complains that it is already defined since it is static in the header.
|
|
|
|
#ifndef OS_WIN
|
|
|
|
const size_t Arena::kInlineSize;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
const size_t Arena::kMinBlockSize = 4096;
|
|
|
|
const size_t Arena::kMaxBlockSize = 2 << 30;
|
|
|
|
static const int kAlignUnit = sizeof(void*);
|
|
|
|
|
|
|
|
size_t OptimizeBlockSize(size_t block_size) {
|
|
|
|
// Make sure block_size is in optimal range
|
|
|
|
block_size = std::max(Arena::kMinBlockSize, block_size);
|
|
|
|
block_size = std::min(Arena::kMaxBlockSize, block_size);
|
|
|
|
|
|
|
|
// make sure block_size is the multiple of kAlignUnit
|
|
|
|
if (block_size % kAlignUnit != 0) {
|
|
|
|
block_size = (1 + block_size / kAlignUnit) * kAlignUnit;
|
Make arena block size configurable
Summary:
Add an option for arena block size, default value 4096 bytes. Arena will allocate blocks with such size.
I am not sure about passing parameter to skiplist in the new virtualized framework, though I talked to Jim a bit. So add Jim as reviewer.
Test Plan:
new unit test, I am running db_test.
For passing paramter from configured option to Arena, I tried tests like:
TEST(DBTest, Arena_Option) {
std::string dbname = test::TmpDir() + "/db_arena_option_test";
DestroyDB(dbname, Options());
DB* db = nullptr;
Options opts;
opts.create_if_missing = true;
opts.arena_block_size = 1000000; // tested 99, 999999
Status s = DB::Open(opts, dbname, &db);
db->Put(WriteOptions(), "a", "123");
}
and printed some debug info. The results look good. Any suggestion for such a unit-test?
Reviewers: haobo, dhruba, emayanke, jpaton
Reviewed By: dhruba
CC: leveldb, zshao
Differential Revision: https://reviews.facebook.net/D11799
11 years ago
|
|
|
}
|
|
|
|
|
|
|
|
return block_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
Arena::Arena(size_t block_size, size_t huge_page_size)
|
|
|
|
: kBlockSize(OptimizeBlockSize(block_size)) {
|
|
|
|
assert(kBlockSize >= kMinBlockSize && kBlockSize <= kMaxBlockSize &&
|
|
|
|
kBlockSize % kAlignUnit == 0);
|
|
|
|
alloc_bytes_remaining_ = sizeof(inline_block_);
|
|
|
|
blocks_memory_ += alloc_bytes_remaining_;
|
|
|
|
aligned_alloc_ptr_ = inline_block_;
|
|
|
|
unaligned_alloc_ptr_ = inline_block_ + alloc_bytes_remaining_;
|
|
|
|
#ifdef MAP_HUGETLB
|
|
|
|
hugetlb_size_ = huge_page_size;
|
|
|
|
if (hugetlb_size_ && kBlockSize > hugetlb_size_) {
|
|
|
|
hugetlb_size_ = ((kBlockSize - 1U) / hugetlb_size_ + 1U) * hugetlb_size_;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
Arena::~Arena() {
|
|
|
|
for (const auto& block : blocks_) {
|
|
|
|
delete[] block;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef MAP_HUGETLB
|
|
|
|
for (const auto& mmap_info : huge_blocks_) {
|
|
|
|
auto ret = munmap(mmap_info.addr_, mmap_info.length_);
|
|
|
|
if (ret != 0) {
|
|
|
|
// TODO(sdong): Better handling
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
char* Arena::AllocateFallback(size_t bytes, bool aligned) {
|
|
|
|
if (bytes > kBlockSize / 4) {
|
|
|
|
++irregular_block_num;
|
|
|
|
// Object is more than a quarter of our block size. Allocate it separately
|
|
|
|
// to avoid wasting too much space in leftover bytes.
|
|
|
|
return AllocateNewBlock(bytes);
|
|
|
|
}
|
|
|
|
|
|
|
|
// We waste the remaining space in the current block.
|
|
|
|
size_t size = 0;
|
|
|
|
char* block_head = nullptr;
|
|
|
|
#ifdef MAP_HUGETLB
|
|
|
|
if (hugetlb_size_) {
|
|
|
|
size = hugetlb_size_;
|
|
|
|
block_head = AllocateFromHugePage(size);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
if (size == 0) {
|
|
|
|
assert(block_head == nullptr);
|
|
|
|
size = kBlockSize;
|
|
|
|
block_head = AllocateNewBlock(size);
|
|
|
|
}
|
|
|
|
alloc_bytes_remaining_ = size - bytes;
|
|
|
|
|
|
|
|
if (aligned) {
|
|
|
|
aligned_alloc_ptr_ = block_head + bytes;
|
|
|
|
unaligned_alloc_ptr_ = block_head + size;
|
|
|
|
return block_head;
|
|
|
|
} else {
|
|
|
|
aligned_alloc_ptr_ = block_head;
|
|
|
|
unaligned_alloc_ptr_ = block_head + size - bytes;
|
|
|
|
return unaligned_alloc_ptr_;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
char* Arena::AllocateFromHugePage(size_t bytes) {
|
|
|
|
#ifdef MAP_HUGETLB
|
|
|
|
if (hugetlb_size_ == 0) {
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
// already reserve space in huge_blocks_ before calling mmap().
|
|
|
|
// this way the insertion into the vector below will not throw and we
|
|
|
|
// won't leak the mapping in that case. if reserve() throws, we
|
|
|
|
// won't leak either
|
|
|
|
huge_blocks_.reserve(huge_blocks_.size() + 1);
|
|
|
|
|
|
|
|
void* addr = mmap(nullptr, bytes, (PROT_READ | PROT_WRITE),
|
|
|
|
(MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB), 0, 0);
|
|
|
|
|
|
|
|
if (addr == MAP_FAILED) {
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
// the following shouldn't throw because of the above reserve()
|
|
|
|
huge_blocks_.emplace_back(MmapInfo(addr, bytes));
|
|
|
|
blocks_memory_ += bytes;
|
|
|
|
return reinterpret_cast<char*>(addr);
|
|
|
|
#else
|
|
|
|
return nullptr;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
char* Arena::AllocateAligned(size_t bytes, size_t huge_page_size,
|
|
|
|
Logger* logger) {
|
|
|
|
assert((kAlignUnit & (kAlignUnit - 1)) ==
|
|
|
|
0); // Pointer size should be a power of 2
|
|
|
|
|
|
|
|
#ifdef MAP_HUGETLB
|
|
|
|
if (huge_page_size > 0 && bytes > 0) {
|
|
|
|
// Allocate from a huge page TBL table.
|
|
|
|
assert(logger != nullptr); // logger need to be passed in.
|
|
|
|
size_t reserved_size =
|
|
|
|
((bytes - 1U) / huge_page_size + 1U) * huge_page_size;
|
|
|
|
assert(reserved_size >= bytes);
|
|
|
|
|
|
|
|
char* addr = AllocateFromHugePage(reserved_size);
|
|
|
|
if (addr == nullptr) {
|
|
|
|
Warn(logger, "AllocateAligned fail to allocate huge TLB pages: %s",
|
|
|
|
strerror(errno));
|
|
|
|
// fail back to malloc
|
|
|
|
} else {
|
|
|
|
return addr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
size_t current_mod =
|
|
|
|
reinterpret_cast<uintptr_t>(aligned_alloc_ptr_) & (kAlignUnit - 1);
|
|
|
|
size_t slop = (current_mod == 0 ? 0 : kAlignUnit - current_mod);
|
|
|
|
size_t needed = bytes + slop;
|
|
|
|
char* result;
|
|
|
|
if (needed <= alloc_bytes_remaining_) {
|
|
|
|
result = aligned_alloc_ptr_ + slop;
|
|
|
|
aligned_alloc_ptr_ += needed;
|
|
|
|
alloc_bytes_remaining_ -= needed;
|
|
|
|
} else {
|
|
|
|
// AllocateFallback always returned aligned memory
|
|
|
|
result = AllocateFallback(bytes, true /* aligned */);
|
|
|
|
}
|
|
|
|
assert((reinterpret_cast<uintptr_t>(result) & (kAlignUnit - 1)) == 0);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
char* Arena::AllocateNewBlock(size_t block_bytes) {
|
|
|
|
// already reserve space in blocks_ before allocating memory via new.
|
|
|
|
// this way the insertion into the vector below will not throw and we
|
|
|
|
// won't leak the allocated memory in that case. if reserve() throws,
|
|
|
|
// we won't leak either
|
|
|
|
blocks_.reserve(blocks_.size() + 1);
|
|
|
|
|
|
|
|
char* block = new char[block_bytes];
|
|
|
|
|
|
|
|
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
|
|
|
|
blocks_memory_ += malloc_usable_size(block);
|
|
|
|
#else
|
|
|
|
blocks_memory_ += block_bytes;
|
|
|
|
#endif // ROCKSDB_MALLOC_USABLE_SIZE
|
|
|
|
// the following shouldn't throw because of the above reserve()
|
|
|
|
blocks_.push_back(block);
|
|
|
|
return block;
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace rocksdb
|