JemallocNodumpAllocator: option to limit tcache memory usage (#4736)

Summary:
Add option to limit tcache usage by allocation size. This is to reduce total tcache size in case there are many user threads accessing the allocator and incur non-trivial memory usage.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4736

Differential Revision: D13269305

Pulled By: yiwu-arbug

fbshipit-source-id: 95a9b7fc67facd66837c849137e30e137112e19d
main
Yi Wu 6 years ago committed by Facebook Github Bot
parent 70645355ad
commit cf1df5d3cb
  1. 4
      include/rocksdb/cache.h
  2. 35
      include/rocksdb/memory_allocator.h
  3. 26
      util/jemalloc_nodump_allocator.cc
  4. 8
      util/jemalloc_nodump_allocator.h

@ -62,6 +62,10 @@ struct LRUCacheOptions {
// If non-nullptr will use this allocator instead of system allocator when
// allocating memory for cache blocks. Call this method before you start using
// the cache!
//
// Caveat: when the cache is used as block cache, the memory allocator is
// ignored when dealing with compression libraries that allocate memory
// internally (currently only XPRESS).
std::shared_ptr<MemoryAllocator> memory_allocator;
LRUCacheOptions() {}

@ -36,9 +36,42 @@ class MemoryAllocator {
}
};
// Generate cache allocators which allocates through Jemalloc and utilize
struct JemallocAllocatorOptions {
// Jemalloc tcache cache allocations by size class. For each size class,
// it caches between 20 (for large size classes) to 200 (for small size
// classes). To reduce tcache memory usage in case the allocator is access
// by large number of threads, we can control whether to cache an allocation
// by its size.
bool limit_tcache_size = false;
// Lower bound of allocation size to use tcache, if limit_tcache_size=true.
// When used with block cache, it is recommneded to set it to block_size/4.
size_t tcache_size_lower_bound = 1024;
// Upper bound of allocation size to use tcache, if limit_tcache_size=true.
// When used with block cache, it is recommneded to set it to block_size.
size_t tcache_size_upper_bound = 16 * 1024;
};
// Generate memory allocators which allocates through Jemalloc and utilize
// MADV_DONTDUMP through madvice to exclude cache items from core dump.
// Applications can use the allocator with block cache to exclude block cache
// usage from core dump.
//
// Implementation details:
// The JemallocNodumpAllocator creates a delicated jemalloc arena, and all
// allocations of the JemallocNodumpAllocator is through the same arena.
// The memory allocator hooks memory allocation of the arena, and call
// madvice() with MADV_DONTDUMP flag to exclude the piece of memory from
// core dump. Side benefit of using single arena would be reduce of jemalloc
// metadata for some workload.
//
// To mitigate mutex contention for using one single arena, jemalloc tcache
// (thread-local cache) is enabled to cache unused allocations for future use.
// The tcache normally incur 0.5M extra memory usage per-thread. The usage
// can be reduce by limitting allocation sizes to cache.
extern Status NewJemallocNodumpAllocator(
JemallocAllocatorOptions& options,
std::shared_ptr<MemoryAllocator>* memory_allocator);
} // namespace rocksdb

@ -19,15 +19,21 @@ namespace rocksdb {
std::atomic<extent_alloc_t*> JemallocNodumpAllocator::original_alloc_{nullptr};
JemallocNodumpAllocator::JemallocNodumpAllocator(
JemallocAllocatorOptions& options,
std::unique_ptr<extent_hooks_t>&& arena_hooks, unsigned arena_index)
: arena_hooks_(std::move(arena_hooks)),
: options_(options),
arena_hooks_(std::move(arena_hooks)),
arena_index_(arena_index),
tcache_(&JemallocNodumpAllocator::DestroyThreadSpecificCache) {}
int JemallocNodumpAllocator::GetThreadSpecificCache() {
int JemallocNodumpAllocator::GetThreadSpecificCache(size_t size) {
// We always enable tcache. The only corner case is when there are a ton of
// threads accessing with low frequency, then it could consume a lot of
// memory (may reach # threads * ~1MB) without bringing too much benefit.
if (options_.limit_tcache_size && (size <= options_.tcache_size_lower_bound ||
size > options_.tcache_size_upper_bound)) {
return MALLOCX_TCACHE_NONE;
}
unsigned* tcache_index = reinterpret_cast<unsigned*>(tcache_.Get());
if (UNLIKELY(tcache_index == nullptr)) {
// Instantiate tcache.
@ -46,13 +52,17 @@ int JemallocNodumpAllocator::GetThreadSpecificCache() {
}
void* JemallocNodumpAllocator::Allocate(size_t size) {
int tcache_flag = GetThreadSpecificCache();
int tcache_flag = GetThreadSpecificCache(size);
return mallocx(size, MALLOCX_ARENA(arena_index_) | tcache_flag);
}
void JemallocNodumpAllocator::Deallocate(void* p) {
// Obtain tcache.
int tcache_flag = GetThreadSpecificCache();
size_t size = 0;
if (options_.limit_tcache_size) {
size = malloc_usable_size(p);
}
int tcache_flag = GetThreadSpecificCache(size);
// No need to pass arena index to dallocx(). Jemalloc will find arena index
// from its own metadata.
dallocx(p, tcache_flag);
@ -120,6 +130,7 @@ size_t JemallocNodumpAllocator::UsableSize(void* p,
#endif // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
Status NewJemallocNodumpAllocator(
JemallocAllocatorOptions& options,
std::shared_ptr<MemoryAllocator>* memory_allocator) {
*memory_allocator = nullptr;
#ifndef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
@ -130,6 +141,11 @@ Status NewJemallocNodumpAllocator(
if (memory_allocator == nullptr) {
return Status::InvalidArgument("memory_allocator must be non-null.");
}
if (options.limit_tcache_size &&
options.tcache_size_lower_bound >= options.tcache_size_upper_bound) {
return Status::InvalidArgument(
"tcache_size_lower_bound larger or equal to tcache_size_upper_bound.");
}
// Create arena.
unsigned arena_index = 0;
@ -177,7 +193,7 @@ Status NewJemallocNodumpAllocator(
// Create cache allocator.
memory_allocator->reset(
new JemallocNodumpAllocator(std::move(new_hooks), arena_index));
new JemallocNodumpAllocator(options, std::move(new_hooks), arena_index));
return Status::OK();
#endif // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
}

@ -25,7 +25,8 @@ namespace rocksdb {
class JemallocNodumpAllocator : public MemoryAllocator {
public:
JemallocNodumpAllocator(std::unique_ptr<extent_hooks_t>&& arena_hooks,
JemallocNodumpAllocator(JemallocAllocatorOptions& options,
std::unique_ptr<extent_hooks_t>&& arena_hooks,
unsigned arena_index);
~JemallocNodumpAllocator();
@ -36,6 +37,7 @@ class JemallocNodumpAllocator : public MemoryAllocator {
private:
friend Status NewJemallocNodumpAllocator(
JemallocAllocatorOptions& options,
std::shared_ptr<MemoryAllocator>* memory_allocator);
// Custom alloc hook to replace jemalloc default alloc.
@ -51,7 +53,7 @@ class JemallocNodumpAllocator : public MemoryAllocator {
// Get or create tcache. Return flag suitable to use with `mallocx`:
// either MALLOCX_TCACHE_NONE or MALLOCX_TCACHE(tc).
int GetThreadSpecificCache();
int GetThreadSpecificCache(size_t size);
// A function pointer to jemalloc default alloc. Use atomic to make sure
// NewJemallocNodumpAllocator is thread-safe.
@ -60,6 +62,8 @@ class JemallocNodumpAllocator : public MemoryAllocator {
// alloc needs to be static to pass to jemalloc as function pointer.
static std::atomic<extent_alloc_t*> original_alloc_;
const JemallocAllocatorOptions options_;
// Custom hooks has to outlive corresponding arena.
const std::unique_ptr<extent_hooks_t> arena_hooks_;

Loading…
Cancel
Save