port folly::JemallocNodumpAllocator (#4534)

Summary:
Introduce `JemallocNodumpAllocator`, which allow exclusion of block cache usage from core dump. It utilize custom hook of jemalloc arena, and when jemalloc arena request memory from system, the allocator use the hook to set `MADV_DONTDUMP ` to the memory. The implementation is basically the same as `folly::JemallocNodumpAllocator`, except for some minor difference:
1. It only support jemalloc >= 5.0
2. When the allocator destruct, it explicitly destruct the corresponding arena via `arena.<i>.destroy` via `mallctl`.

Depending on #4502.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4534

Differential Revision: D10435474

Pulled By: yiwu-arbug

fbshipit-source-id: e80edea755d3853182485d2be710376384ce0bb4
main
Yi Wu 6 years ago committed by Facebook Github Bot
parent 5b4c709fad
commit 5f5fddabc7
  1. 1
      CMakeLists.txt
  2. 6
      TARGETS
  3. 15
      include/rocksdb/memory_allocator.h
  4. 1
      src.mk
  5. 126
      util/jemalloc_nodump_allocator.cc
  6. 56
      util/jemalloc_nodump_allocator.h

@ -603,6 +603,7 @@ set(SOURCES
util/filename.cc
util/filter_policy.cc
util/hash.cc
util/jemalloc_nodump_allocator.cc
util/log_buffer.cc
util/murmurhash.cc
util/random.cc

@ -226,6 +226,7 @@ cpp_library(
"util/filename.cc",
"util/filter_policy.cc",
"util/hash.cc",
"util/jemalloc_nodump_allocator.cc",
"util/log_buffer.cc",
"util/murmurhash.cc",
"util/random.cc",
@ -931,6 +932,11 @@ ROCKS_TESTS = [
"db/range_del_aggregator_test.cc",
"serial",
],
[
"range_tombstone_fragmenter_test",
"db/range_tombstone_fragmenter_test.cc",
"serial",
],
[
"rate_limiter_test",
"util/rate_limiter_test.cc",

@ -5,6 +5,10 @@
#pragma once
#include "rocksdb/status.h"
#include <memory>
namespace rocksdb {
// MemoryAllocator is an interface that a client can implement to supply custom
@ -18,10 +22,12 @@ class MemoryAllocator {
// Name of the cache allocator, printed in the log
virtual const char* Name() const = 0;
// Allocate a block of at least size size
// Allocate a block of at least size. Has to be thread-safe.
virtual void* Allocate(size_t size) = 0;
// Deallocate previously allocated block
// Deallocate previously allocated block. Has to be thread-safe.
virtual void Deallocate(void* p) = 0;
// Returns the memory size of the block allocated at p. The default
// implementation that just returns the original allocation_size is fine.
virtual size_t UsableSize(void* /*p*/, size_t allocation_size) const {
@ -30,4 +36,9 @@ class MemoryAllocator {
}
};
// Generate cache allocators which allocates through Jemalloc and utilize
// MADV_DONTDUMP through madvice to exclude cache items from core dump.
extern Status NewJemallocNodumpAllocator(
std::shared_ptr<MemoryAllocator>* memory_allocator);
} // namespace rocksdb

@ -143,6 +143,7 @@ LIB_SOURCES = \
util/filename.cc \
util/filter_policy.cc \
util/hash.cc \
util/jemalloc_nodump_allocator.cc \
util/log_buffer.cc \
util/murmurhash.cc \
util/random.cc \

@ -0,0 +1,126 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "util/jemalloc_nodump_allocator.h"
#include <string>
#include "util/string_util.h"
namespace rocksdb {
#ifdef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
std::atomic<extent_alloc_t*> JemallocNodumpAllocator::original_alloc_{nullptr};
JemallocNodumpAllocator::JemallocNodumpAllocator(
unsigned arena_index, int flags, std::unique_ptr<extent_hooks_t>&& hooks)
: arena_index_(arena_index), flags_(flags), hooks_(std::move(hooks)) {
assert(arena_index != 0);
}
void* JemallocNodumpAllocator::Allocate(size_t size) {
return mallocx(size, flags_);
}
void JemallocNodumpAllocator::Deallocate(void* p) { dallocx(p, flags_); }
void* JemallocNodumpAllocator::Alloc(extent_hooks_t* extent, void* new_addr,
size_t size, size_t alignment, bool* zero,
bool* commit, unsigned arena_ind) {
extent_alloc_t* original_alloc =
original_alloc_.load(std::memory_order_relaxed);
assert(original_alloc != nullptr);
void* result = original_alloc(extent, new_addr, size, alignment, zero, commit,
arena_ind);
if (result != nullptr) {
int ret = madvise(result, size, MADV_DONTDUMP);
if (ret != 0) {
fprintf(
stderr,
"JemallocNodumpAllocator failed to set MADV_DONTDUMP, error code: %d",
ret);
assert(false);
}
}
return result;
}
JemallocNodumpAllocator::~JemallocNodumpAllocator() {
assert(arena_index_ != 0);
std::string key = "arena." + ToString(arena_index_) + ".destroy";
int ret = mallctl(key.c_str(), nullptr, 0, nullptr, 0);
if (ret != 0) {
fprintf(stderr, "Failed to destroy jemalloc arena, error code: %d\n", ret);
}
}
size_t JemallocNodumpAllocator::UsableSize(void* p,
size_t /*allocation_size*/) const {
return malloc_usable_size(static_cast<void*>(p));
}
#endif // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
Status NewJemallocNodumpAllocator(
std::shared_ptr<MemoryAllocator>* memory_allocator) {
#ifndef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
*memory_allocator = nullptr;
return Status::NotSupported(
"JemallocNodumpAllocator only available with jemalloc version >= 5 "
"and MADV_DONTDUMP is available.");
#else
if (memory_allocator == nullptr) {
return Status::InvalidArgument("memory_allocator must be non-null.");
}
// Create arena.
unsigned arena_index = 0;
size_t arena_index_size = sizeof(arena_index);
int ret =
mallctl("arenas.create", &arena_index, &arena_index_size, nullptr, 0);
if (ret != 0) {
return Status::Incomplete("Failed to create jemalloc arena, error code: " +
ToString(ret));
}
assert(arena_index != 0);
int flags = MALLOCX_ARENA(arena_index) | MALLOCX_TCACHE_NONE;
std::string key = "arena." + ToString(arena_index) + ".extent_hooks";
// Read existing hooks.
extent_hooks_t* hooks;
size_t hooks_size = sizeof(hooks);
ret = mallctl(key.c_str(), &hooks, &hooks_size, nullptr, 0);
if (ret != 0) {
std::string msg =
"Failed to read existing hooks, error code: " + ToString(ret);
return Status::Incomplete("Failed to read existing hooks, error code: " +
ToString(ret));
}
// Store existing alloc.
extent_alloc_t* original_alloc = hooks->alloc;
extent_alloc_t* expected = nullptr;
bool success __attribute__((__unused__)) =
JemallocNodumpAllocator::original_alloc_.compare_exchange_strong(
expected, original_alloc);
assert(success || original_alloc == expected);
// Set the custom hook.
std::unique_ptr<extent_hooks_t> new_hooks(new extent_hooks_t(*hooks));
new_hooks->alloc = &JemallocNodumpAllocator::Alloc;
extent_hooks_t* hooks_ptr = new_hooks.get();
ret = mallctl(key.c_str(), nullptr, nullptr, &hooks_ptr, sizeof(hooks_ptr));
if (ret != 0) {
return Status::Incomplete("Failed to set custom hook, error code: " +
ToString(ret));
}
// Create cache allocator.
memory_allocator->reset(
new JemallocNodumpAllocator(arena_index, flags, std::move(new_hooks)));
return Status::OK();
#endif // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
}
} // namespace rocksdb

@ -0,0 +1,56 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <atomic>
#include "rocksdb/memory_allocator.h"
#if defined(ROCKSDB_JEMALLOC) && defined(ROCKSDB_PLATFORM_POSIX)
#include <jemalloc/jemalloc.h>
#include <sys/mman.h>
#if (JEMALLOC_VERSION_MAJOR >= 5) && defined(MADV_DONTDUMP)
#define ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
namespace rocksdb {
class JemallocNodumpAllocator : public MemoryAllocator {
public:
JemallocNodumpAllocator(unsigned arena_index, int flags,
std::unique_ptr<extent_hooks_t>&& hooks);
~JemallocNodumpAllocator();
const char* Name() const override { return "JemallocNodumpAllocator"; }
void* Allocate(size_t size) override;
void Deallocate(void* p) override;
size_t UsableSize(void* p, size_t allocation_size) const override;
private:
friend Status NewJemallocNodumpAllocator(
std::shared_ptr<MemoryAllocator>* memory_allocator);
// Custom alloc hook to replace jemalloc default alloc.
static void* Alloc(extent_hooks_t* extent, void* new_addr, size_t size,
size_t alignment, bool* zero, bool* commit,
unsigned arena_ind);
// A function pointer to jemalloc default alloc. Use atomic to make sure
// NewJemallocNodumpAllocator is thread-safe.
//
// Hack: original_alloc_ needs to be static for Alloc() to access it.
// alloc needs to be static to pass to jemalloc as function pointer.
static std::atomic<extent_alloc_t*> original_alloc_;
unsigned arena_index_;
int flags_;
const std::unique_ptr<extent_hooks_t> hooks_;
};
} // namespace rocksdb
#endif // (JEMALLOC_VERSION_MAJOR >= 5) && MADV_DONTDUMP
#endif // ROCKSDB_JEMALLOC && ROCKSDB_PLATFORM_POSIX
Loading…
Cancel
Save