From 5f5fddabc78d18d71eb711db12421fdfff866665 Mon Sep 17 00:00:00 2001 From: Yi Wu Date: Fri, 26 Oct 2018 17:27:13 -0700 Subject: [PATCH] port folly::JemallocNodumpAllocator (#4534) Summary: Introduce `JemallocNodumpAllocator`, which allow exclusion of block cache usage from core dump. It utilize custom hook of jemalloc arena, and when jemalloc arena request memory from system, the allocator use the hook to set `MADV_DONTDUMP ` to the memory. The implementation is basically the same as `folly::JemallocNodumpAllocator`, except for some minor difference: 1. It only support jemalloc >= 5.0 2. When the allocator destruct, it explicitly destruct the corresponding arena via `arena..destroy` via `mallctl`. Depending on #4502. Pull Request resolved: https://github.com/facebook/rocksdb/pull/4534 Differential Revision: D10435474 Pulled By: yiwu-arbug fbshipit-source-id: e80edea755d3853182485d2be710376384ce0bb4 --- CMakeLists.txt | 1 + TARGETS | 6 ++ include/rocksdb/memory_allocator.h | 15 +++- src.mk | 1 + util/jemalloc_nodump_allocator.cc | 126 +++++++++++++++++++++++++++++ util/jemalloc_nodump_allocator.h | 56 +++++++++++++ 6 files changed, 203 insertions(+), 2 deletions(-) create mode 100644 util/jemalloc_nodump_allocator.cc create mode 100644 util/jemalloc_nodump_allocator.h diff --git a/CMakeLists.txt b/CMakeLists.txt index ccbe14a00..6cb80cd10 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -603,6 +603,7 @@ set(SOURCES util/filename.cc util/filter_policy.cc util/hash.cc + util/jemalloc_nodump_allocator.cc util/log_buffer.cc util/murmurhash.cc util/random.cc diff --git a/TARGETS b/TARGETS index 68a8e6f7f..5040ff153 100644 --- a/TARGETS +++ b/TARGETS @@ -226,6 +226,7 @@ cpp_library( "util/filename.cc", "util/filter_policy.cc", "util/hash.cc", + "util/jemalloc_nodump_allocator.cc", "util/log_buffer.cc", "util/murmurhash.cc", "util/random.cc", @@ -931,6 +932,11 @@ ROCKS_TESTS = [ "db/range_del_aggregator_test.cc", "serial", ], + [ + "range_tombstone_fragmenter_test", + "db/range_tombstone_fragmenter_test.cc", + "serial", + ], [ "rate_limiter_test", "util/rate_limiter_test.cc", diff --git a/include/rocksdb/memory_allocator.h b/include/rocksdb/memory_allocator.h index 30b77dfdf..15aab65fc 100644 --- a/include/rocksdb/memory_allocator.h +++ b/include/rocksdb/memory_allocator.h @@ -5,6 +5,10 @@ #pragma once +#include "rocksdb/status.h" + +#include + namespace rocksdb { // MemoryAllocator is an interface that a client can implement to supply custom @@ -18,10 +22,12 @@ class MemoryAllocator { // Name of the cache allocator, printed in the log virtual const char* Name() const = 0; - // Allocate a block of at least size size + // Allocate a block of at least size. Has to be thread-safe. virtual void* Allocate(size_t size) = 0; - // Deallocate previously allocated block + + // Deallocate previously allocated block. Has to be thread-safe. virtual void Deallocate(void* p) = 0; + // Returns the memory size of the block allocated at p. The default // implementation that just returns the original allocation_size is fine. virtual size_t UsableSize(void* /*p*/, size_t allocation_size) const { @@ -30,4 +36,9 @@ class MemoryAllocator { } }; +// Generate cache allocators which allocates through Jemalloc and utilize +// MADV_DONTDUMP through madvice to exclude cache items from core dump. +extern Status NewJemallocNodumpAllocator( + std::shared_ptr* memory_allocator); + } // namespace rocksdb diff --git a/src.mk b/src.mk index 7ebd93a15..cfe9dcd2f 100644 --- a/src.mk +++ b/src.mk @@ -143,6 +143,7 @@ LIB_SOURCES = \ util/filename.cc \ util/filter_policy.cc \ util/hash.cc \ + util/jemalloc_nodump_allocator.cc \ util/log_buffer.cc \ util/murmurhash.cc \ util/random.cc \ diff --git a/util/jemalloc_nodump_allocator.cc b/util/jemalloc_nodump_allocator.cc new file mode 100644 index 000000000..c1391649f --- /dev/null +++ b/util/jemalloc_nodump_allocator.cc @@ -0,0 +1,126 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "util/jemalloc_nodump_allocator.h" + +#include + +#include "util/string_util.h" + +namespace rocksdb { + +#ifdef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR + +std::atomic JemallocNodumpAllocator::original_alloc_{nullptr}; + +JemallocNodumpAllocator::JemallocNodumpAllocator( + unsigned arena_index, int flags, std::unique_ptr&& hooks) + : arena_index_(arena_index), flags_(flags), hooks_(std::move(hooks)) { + assert(arena_index != 0); +} + +void* JemallocNodumpAllocator::Allocate(size_t size) { + return mallocx(size, flags_); +} + +void JemallocNodumpAllocator::Deallocate(void* p) { dallocx(p, flags_); } + +void* JemallocNodumpAllocator::Alloc(extent_hooks_t* extent, void* new_addr, + size_t size, size_t alignment, bool* zero, + bool* commit, unsigned arena_ind) { + extent_alloc_t* original_alloc = + original_alloc_.load(std::memory_order_relaxed); + assert(original_alloc != nullptr); + void* result = original_alloc(extent, new_addr, size, alignment, zero, commit, + arena_ind); + if (result != nullptr) { + int ret = madvise(result, size, MADV_DONTDUMP); + if (ret != 0) { + fprintf( + stderr, + "JemallocNodumpAllocator failed to set MADV_DONTDUMP, error code: %d", + ret); + assert(false); + } + } + return result; +} + +JemallocNodumpAllocator::~JemallocNodumpAllocator() { + assert(arena_index_ != 0); + std::string key = "arena." + ToString(arena_index_) + ".destroy"; + int ret = mallctl(key.c_str(), nullptr, 0, nullptr, 0); + if (ret != 0) { + fprintf(stderr, "Failed to destroy jemalloc arena, error code: %d\n", ret); + } +} + +size_t JemallocNodumpAllocator::UsableSize(void* p, + size_t /*allocation_size*/) const { + return malloc_usable_size(static_cast(p)); +} +#endif // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR + +Status NewJemallocNodumpAllocator( + std::shared_ptr* memory_allocator) { +#ifndef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR + *memory_allocator = nullptr; + return Status::NotSupported( + "JemallocNodumpAllocator only available with jemalloc version >= 5 " + "and MADV_DONTDUMP is available."); +#else + if (memory_allocator == nullptr) { + return Status::InvalidArgument("memory_allocator must be non-null."); + } + // Create arena. + unsigned arena_index = 0; + size_t arena_index_size = sizeof(arena_index); + int ret = + mallctl("arenas.create", &arena_index, &arena_index_size, nullptr, 0); + if (ret != 0) { + return Status::Incomplete("Failed to create jemalloc arena, error code: " + + ToString(ret)); + } + assert(arena_index != 0); + int flags = MALLOCX_ARENA(arena_index) | MALLOCX_TCACHE_NONE; + std::string key = "arena." + ToString(arena_index) + ".extent_hooks"; + + // Read existing hooks. + extent_hooks_t* hooks; + size_t hooks_size = sizeof(hooks); + ret = mallctl(key.c_str(), &hooks, &hooks_size, nullptr, 0); + if (ret != 0) { + std::string msg = + "Failed to read existing hooks, error code: " + ToString(ret); + return Status::Incomplete("Failed to read existing hooks, error code: " + + ToString(ret)); + } + + // Store existing alloc. + extent_alloc_t* original_alloc = hooks->alloc; + extent_alloc_t* expected = nullptr; + bool success __attribute__((__unused__)) = + JemallocNodumpAllocator::original_alloc_.compare_exchange_strong( + expected, original_alloc); + assert(success || original_alloc == expected); + + // Set the custom hook. + std::unique_ptr new_hooks(new extent_hooks_t(*hooks)); + new_hooks->alloc = &JemallocNodumpAllocator::Alloc; + extent_hooks_t* hooks_ptr = new_hooks.get(); + ret = mallctl(key.c_str(), nullptr, nullptr, &hooks_ptr, sizeof(hooks_ptr)); + if (ret != 0) { + return Status::Incomplete("Failed to set custom hook, error code: " + + ToString(ret)); + } + + // Create cache allocator. + memory_allocator->reset( + new JemallocNodumpAllocator(arena_index, flags, std::move(new_hooks))); + return Status::OK(); +#endif // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR +} + +} // namespace rocksdb diff --git a/util/jemalloc_nodump_allocator.h b/util/jemalloc_nodump_allocator.h new file mode 100644 index 000000000..69826fafe --- /dev/null +++ b/util/jemalloc_nodump_allocator.h @@ -0,0 +1,56 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include + +#include "rocksdb/memory_allocator.h" + +#if defined(ROCKSDB_JEMALLOC) && defined(ROCKSDB_PLATFORM_POSIX) + +#include +#include + +#if (JEMALLOC_VERSION_MAJOR >= 5) && defined(MADV_DONTDUMP) +#define ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR + +namespace rocksdb { + +class JemallocNodumpAllocator : public MemoryAllocator { + public: + JemallocNodumpAllocator(unsigned arena_index, int flags, + std::unique_ptr&& hooks); + ~JemallocNodumpAllocator(); + + const char* Name() const override { return "JemallocNodumpAllocator"; } + void* Allocate(size_t size) override; + void Deallocate(void* p) override; + size_t UsableSize(void* p, size_t allocation_size) const override; + + private: + friend Status NewJemallocNodumpAllocator( + std::shared_ptr* memory_allocator); + + // Custom alloc hook to replace jemalloc default alloc. + static void* Alloc(extent_hooks_t* extent, void* new_addr, size_t size, + size_t alignment, bool* zero, bool* commit, + unsigned arena_ind); + + // A function pointer to jemalloc default alloc. Use atomic to make sure + // NewJemallocNodumpAllocator is thread-safe. + // + // Hack: original_alloc_ needs to be static for Alloc() to access it. + // alloc needs to be static to pass to jemalloc as function pointer. + static std::atomic original_alloc_; + + unsigned arena_index_; + int flags_; + const std::unique_ptr hooks_; +}; + +} // namespace rocksdb +#endif // (JEMALLOC_VERSION_MAJOR >= 5) && MADV_DONTDUMP +#endif // ROCKSDB_JEMALLOC && ROCKSDB_PLATFORM_POSIX