Pass logger to memtable rep and TLB page allocation error logged to info logs

Summary:
TLB page allocation errors are now logged to info logs, instead of stderr.
In order to do that, mem table rep's factory functions take a info logger now.

Test Plan: make all check

Reviewers: haobo, igor, yhchiang

Reviewed By: yhchiang

CC: leveldb, yhchiang, dhruba

Differential Revision: https://reviews.facebook.net/D18471
main
sdong 11 years ago
parent 044af85847
commit 3a171dcb51
  1. 1
      HISTORY.md
  2. 6
      db/memtable.cc
  3. 14
      include/rocksdb/memtablerep.h
  4. 10
      table/plain_table_reader.cc
  5. 8
      util/arena.cc
  6. 9
      util/arena.h
  7. 4
      util/dynamic_bloom.cc
  8. 4
      util/dynamic_bloom.h
  9. 5
      util/hash_cuckoo_rep.cc
  10. 2
      util/hash_cuckoo_rep.h
  11. 11
      util/hash_linklist_rep.cc
  12. 2
      util/hash_linklist_rep.h
  13. 2
      util/hash_skiplist_rep.cc
  14. 2
      util/hash_skiplist_rep.h
  15. 2
      util/skiplistrep.cc
  16. 2
      util/vectorrep.cc

@ -5,6 +5,7 @@
### Public API changes ### Public API changes
* Added _LEVEL to all InfoLogLevel enums * Added _LEVEL to all InfoLogLevel enums
* Deprecated ReadOptions.prefix and ReadOptions.prefix_seek. Seek() defaults to prefix-based seek when Options.prefix_extractor is supplied. More detail is documented in https://github.com/facebook/rocksdb/wiki/Prefix-Seek-API-Changes * Deprecated ReadOptions.prefix and ReadOptions.prefix_seek. Seek() defaults to prefix-based seek when Options.prefix_extractor is supplied. More detail is documented in https://github.com/facebook/rocksdb/wiki/Prefix-Seek-API-Changes
* MemTableRepFactory::CreateMemTableRep() takes info logger as an extra parameter.
### New Features ### New Features
* Column family support * Column family support

@ -37,7 +37,8 @@ MemTable::MemTable(const InternalKeyComparator& cmp, const Options& options)
kWriteBufferSize(options.write_buffer_size), kWriteBufferSize(options.write_buffer_size),
arena_(options.arena_block_size), arena_(options.arena_block_size),
table_(options.memtable_factory->CreateMemTableRep( table_(options.memtable_factory->CreateMemTableRep(
comparator_, &arena_, options.prefix_extractor.get())), comparator_, &arena_, options.prefix_extractor.get(),
options.info_log.get())),
num_entries_(0), num_entries_(0),
flush_in_progress_(false), flush_in_progress_(false),
flush_completed_(false), flush_completed_(false),
@ -55,7 +56,8 @@ MemTable::MemTable(const InternalKeyComparator& cmp, const Options& options)
prefix_bloom_.reset(new DynamicBloom( prefix_bloom_.reset(new DynamicBloom(
options.memtable_prefix_bloom_bits, options.bloom_locality, options.memtable_prefix_bloom_bits, options.bloom_locality,
options.memtable_prefix_bloom_probes, nullptr, options.memtable_prefix_bloom_probes, nullptr,
options.memtable_prefix_bloom_huge_page_tlb_size)); options.memtable_prefix_bloom_huge_page_tlb_size,
options.info_log.get()));
} }
} }

@ -44,6 +44,7 @@ class Arena;
class LookupKey; class LookupKey;
class Slice; class Slice;
class SliceTransform; class SliceTransform;
class Logger;
typedef void* KeyHandle; typedef void* KeyHandle;
@ -174,7 +175,8 @@ class MemTableRepFactory {
public: public:
virtual ~MemTableRepFactory() {} virtual ~MemTableRepFactory() {}
virtual MemTableRep* CreateMemTableRep(const MemTableRep::KeyComparator&, virtual MemTableRep* CreateMemTableRep(const MemTableRep::KeyComparator&,
Arena*, const SliceTransform*) = 0; Arena*, const SliceTransform*,
Logger* logger) = 0;
virtual const char* Name() const = 0; virtual const char* Name() const = 0;
}; };
@ -182,8 +184,8 @@ class MemTableRepFactory {
class SkipListFactory : public MemTableRepFactory { class SkipListFactory : public MemTableRepFactory {
public: public:
virtual MemTableRep* CreateMemTableRep(const MemTableRep::KeyComparator&, virtual MemTableRep* CreateMemTableRep(const MemTableRep::KeyComparator&,
Arena*, Arena*, const SliceTransform*,
const SliceTransform*) override; Logger* logger) override;
virtual const char* Name() const override { return "SkipListFactory"; } virtual const char* Name() const override { return "SkipListFactory"; }
}; };
@ -201,9 +203,9 @@ class VectorRepFactory : public MemTableRepFactory {
public: public:
explicit VectorRepFactory(size_t count = 0) : count_(count) { } explicit VectorRepFactory(size_t count = 0) : count_(count) { }
virtual MemTableRep* CreateMemTableRep( virtual MemTableRep* CreateMemTableRep(const MemTableRep::KeyComparator&,
const MemTableRep::KeyComparator&, Arena*, Arena*, const SliceTransform*,
const SliceTransform*) override; Logger* logger) override;
virtual const char* Name() const override { virtual const char* Name() const override {
return "VectorRepFactory"; return "VectorRepFactory";
} }

@ -272,7 +272,8 @@ void PlainTableReader::AllocateIndexAndBloom(int num_prefixes) {
uint32_t bloom_total_bits = num_prefixes * kBloomBitsPerKey; uint32_t bloom_total_bits = num_prefixes * kBloomBitsPerKey;
if (bloom_total_bits > 0) { if (bloom_total_bits > 0) {
bloom_.reset(new DynamicBloom(bloom_total_bits, options_.bloom_locality, bloom_.reset(new DynamicBloom(bloom_total_bits, options_.bloom_locality,
6, nullptr, huge_page_tlb_size_)); 6, nullptr, huge_page_tlb_size_,
options_.info_log.get()));
} }
} }
@ -328,8 +329,8 @@ void PlainTableReader::FillIndexes(
Log(options_.info_log, "Reserving %zu bytes for plain table's sub_index", Log(options_.info_log, "Reserving %zu bytes for plain table's sub_index",
kSubIndexSize); kSubIndexSize);
auto total_allocate_size = sizeof(uint32_t) * index_size_ + kSubIndexSize; auto total_allocate_size = sizeof(uint32_t) * index_size_ + kSubIndexSize;
char* allocated = char* allocated = arena_.AllocateAligned(
arena_.AllocateAligned(total_allocate_size, huge_page_tlb_size_); total_allocate_size, huge_page_tlb_size_, options_.info_log.get());
index_ = reinterpret_cast<uint32_t*>(allocated); index_ = reinterpret_cast<uint32_t*>(allocated);
sub_index_ = allocated + sizeof(uint32_t) * index_size_; sub_index_ = allocated + sizeof(uint32_t) * index_size_;
@ -398,7 +399,8 @@ Status PlainTableReader::PopulateIndex(TableProperties* props) {
uint32_t num_bloom_bits = table_properties_->num_entries * kBloomBitsPerKey; uint32_t num_bloom_bits = table_properties_->num_entries * kBloomBitsPerKey;
if (num_bloom_bits > 0) { if (num_bloom_bits > 0) {
bloom_.reset(new DynamicBloom(num_bloom_bits, options_.bloom_locality, 6, bloom_.reset(new DynamicBloom(num_bloom_bits, options_.bloom_locality, 6,
nullptr, huge_page_tlb_size_)); nullptr, huge_page_tlb_size_,
options_.info_log.get()));
} }
} }

@ -10,6 +10,7 @@
#include "util/arena.h" #include "util/arena.h"
#include <sys/mman.h> #include <sys/mman.h>
#include <algorithm> #include <algorithm>
#include "rocksdb/env.h"
namespace rocksdb { namespace rocksdb {
@ -70,20 +71,23 @@ char* Arena::AllocateFallback(size_t bytes, bool aligned) {
} }
} }
char* Arena::AllocateAligned(size_t bytes, size_t huge_page_tlb_size) { char* Arena::AllocateAligned(size_t bytes, size_t huge_page_tlb_size,
Logger* logger) {
assert((kAlignUnit & (kAlignUnit - 1)) == assert((kAlignUnit & (kAlignUnit - 1)) ==
0); // Pointer size should be a power of 2 0); // Pointer size should be a power of 2
#ifdef OS_LINUX #ifdef OS_LINUX
if (huge_page_tlb_size > 0 && bytes > 0) { if (huge_page_tlb_size > 0 && bytes > 0) {
// Allocate from a huge page TBL table. // Allocate from a huge page TBL table.
assert(logger != nullptr); // logger need to be passed in.
size_t reserved_size = size_t reserved_size =
((bytes - 1U) / huge_page_tlb_size + 1U) * huge_page_tlb_size; ((bytes - 1U) / huge_page_tlb_size + 1U) * huge_page_tlb_size;
assert(reserved_size >= bytes); assert(reserved_size >= bytes);
void* addr = mmap(nullptr, reserved_size, (PROT_READ | PROT_WRITE), void* addr = mmap(nullptr, reserved_size, (PROT_READ | PROT_WRITE),
(MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB), 0, 0); (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB), 0, 0);
if (addr == MAP_FAILED) { if (addr == MAP_FAILED) {
// TODO(sdong): Better handling Warn(logger, "AllocateAligned fail to allocate huge TLB pages: %s",
strerror(errno));
// fail back to malloc // fail back to malloc
} else { } else {
blocks_memory_ += reserved_size; blocks_memory_ += reserved_size;

@ -20,6 +20,8 @@
namespace rocksdb { namespace rocksdb {
class Logger;
class Arena { class Arena {
public: public:
// No copying allowed // No copying allowed
@ -41,7 +43,12 @@ class Arena {
// huge pages for it to be allocated, like: // huge pages for it to be allocated, like:
// sysctl -w vm.nr_hugepages=20 // sysctl -w vm.nr_hugepages=20
// See linux doc Documentation/vm/hugetlbpage.txt for details. // See linux doc Documentation/vm/hugetlbpage.txt for details.
char* AllocateAligned(size_t bytes, size_t huge_page_tlb_size = 0); // huge page allocation can fail. In this case it will fail back to
// normal cases. The messages will be logged to logger. So when calling with
// huge_page_tlb_size > 0, we highly recommend a logger is passed in.
// Otherwise, the error message will be printed out to stderr directly.
char* AllocateAligned(size_t bytes, size_t huge_page_tlb_size = 0,
Logger* logger = nullptr);
// Returns an estimate of the total memory usage of data allocated // Returns an estimate of the total memory usage of data allocated
// by the arena (exclude the space allocated but not yet used for future // by the arena (exclude the space allocated but not yet used for future

@ -22,7 +22,7 @@ static uint32_t BloomHash(const Slice& key) {
DynamicBloom::DynamicBloom(uint32_t total_bits, uint32_t cl_per_block, DynamicBloom::DynamicBloom(uint32_t total_bits, uint32_t cl_per_block,
uint32_t num_probes, uint32_t num_probes,
uint32_t (*hash_func)(const Slice& key), uint32_t (*hash_func)(const Slice& key),
size_t huge_page_tlb_size) size_t huge_page_tlb_size, Logger* logger)
: kBlocked(cl_per_block > 0), : kBlocked(cl_per_block > 0),
kBitsPerBlock(std::min(cl_per_block, num_probes) * CACHE_LINE_SIZE * 8), kBitsPerBlock(std::min(cl_per_block, num_probes) * CACHE_LINE_SIZE * 8),
kTotalBits((kBlocked ? (total_bits + kBitsPerBlock - 1) / kBitsPerBlock * kTotalBits((kBlocked ? (total_bits + kBitsPerBlock - 1) / kBitsPerBlock *
@ -40,7 +40,7 @@ DynamicBloom::DynamicBloom(uint32_t total_bits, uint32_t cl_per_block,
sz += CACHE_LINE_SIZE - 1; sz += CACHE_LINE_SIZE - 1;
} }
raw_ = reinterpret_cast<unsigned char*>( raw_ = reinterpret_cast<unsigned char*>(
arena_.AllocateAligned(sz, huge_page_tlb_size)); arena_.AllocateAligned(sz, huge_page_tlb_size, logger));
memset(raw_, 0, sz); memset(raw_, 0, sz);
if (kBlocked && (reinterpret_cast<uint64_t>(raw_) % CACHE_LINE_SIZE)) { if (kBlocked && (reinterpret_cast<uint64_t>(raw_) % CACHE_LINE_SIZE)) {
data_ = raw_ + CACHE_LINE_SIZE - data_ = raw_ + CACHE_LINE_SIZE -

@ -13,6 +13,7 @@
namespace rocksdb { namespace rocksdb {
class Slice; class Slice;
class Logger;
class DynamicBloom { class DynamicBloom {
public: public:
@ -29,7 +30,8 @@ class DynamicBloom {
explicit DynamicBloom(uint32_t total_bits, uint32_t cl_per_block = 0, explicit DynamicBloom(uint32_t total_bits, uint32_t cl_per_block = 0,
uint32_t num_probes = 6, uint32_t num_probes = 6,
uint32_t (*hash_func)(const Slice& key) = nullptr, uint32_t (*hash_func)(const Slice& key) = nullptr,
size_t huge_page_tlb_size = 0); size_t huge_page_tlb_size = 0,
Logger* logger = nullptr);
~DynamicBloom() {} ~DynamicBloom() {}

@ -314,7 +314,8 @@ void HashCuckooRep::Insert(KeyHandle handle) {
// immutable. // immutable.
if (backup_table_.get() == nullptr) { if (backup_table_.get() == nullptr) {
VectorRepFactory factory(10); VectorRepFactory factory(10);
backup_table_.reset(factory.CreateMemTableRep(compare_, arena_, nullptr)); backup_table_.reset(
factory.CreateMemTableRep(compare_, arena_, nullptr, nullptr));
is_nearly_full_ = true; is_nearly_full_ = true;
} }
backup_table_->Insert(key); backup_table_->Insert(key);
@ -595,7 +596,7 @@ void HashCuckooRep::Iterator::SeekToLast() {
MemTableRep* HashCuckooRepFactory::CreateMemTableRep( MemTableRep* HashCuckooRepFactory::CreateMemTableRep(
const MemTableRep::KeyComparator& compare, Arena* arena, const MemTableRep::KeyComparator& compare, Arena* arena,
const SliceTransform* transform) { const SliceTransform* transform, Logger* logger) {
// The estimated average fullness. The write performance of any close hash // The estimated average fullness. The write performance of any close hash
// degrades as the fullness of the mem-table increases. Setting kFullness // degrades as the fullness of the mem-table increases. Setting kFullness
// to a value around 0.7 can better avoid write performance degradation while // to a value around 0.7 can better avoid write performance degradation while

@ -29,7 +29,7 @@ class HashCuckooRepFactory : public MemTableRepFactory {
virtual MemTableRep* CreateMemTableRep( virtual MemTableRep* CreateMemTableRep(
const MemTableRep::KeyComparator& compare, Arena* arena, const MemTableRep::KeyComparator& compare, Arena* arena,
const SliceTransform* transform) override; const SliceTransform* transform, Logger* logger) override;
virtual const char* Name() const override { return "HashCuckooRepFactory"; } virtual const char* Name() const override { return "HashCuckooRepFactory"; }

@ -54,7 +54,7 @@ class HashLinkListRep : public MemTableRep {
public: public:
HashLinkListRep(const MemTableRep::KeyComparator& compare, Arena* arena, HashLinkListRep(const MemTableRep::KeyComparator& compare, Arena* arena,
const SliceTransform* transform, size_t bucket_size, const SliceTransform* transform, size_t bucket_size,
size_t huge_page_tlb_size); size_t huge_page_tlb_size, Logger* logger);
virtual KeyHandle Allocate(const size_t len, char** buf) override; virtual KeyHandle Allocate(const size_t len, char** buf) override;
@ -307,13 +307,14 @@ class HashLinkListRep : public MemTableRep {
HashLinkListRep::HashLinkListRep(const MemTableRep::KeyComparator& compare, HashLinkListRep::HashLinkListRep(const MemTableRep::KeyComparator& compare,
Arena* arena, const SliceTransform* transform, Arena* arena, const SliceTransform* transform,
size_t bucket_size, size_t huge_page_tlb_size) size_t bucket_size, size_t huge_page_tlb_size,
Logger* logger)
: MemTableRep(arena), : MemTableRep(arena),
bucket_size_(bucket_size), bucket_size_(bucket_size),
transform_(transform), transform_(transform),
compare_(compare) { compare_(compare) {
char* mem = arena_->AllocateAligned(sizeof(port::AtomicPointer) * bucket_size, char* mem = arena_->AllocateAligned(sizeof(port::AtomicPointer) * bucket_size,
huge_page_tlb_size); huge_page_tlb_size, logger);
buckets_ = new (mem) port::AtomicPointer[bucket_size]; buckets_ = new (mem) port::AtomicPointer[bucket_size];
@ -469,9 +470,9 @@ Node* HashLinkListRep::FindGreaterOrEqualInBucket(Node* head,
MemTableRep* HashLinkListRepFactory::CreateMemTableRep( MemTableRep* HashLinkListRepFactory::CreateMemTableRep(
const MemTableRep::KeyComparator& compare, Arena* arena, const MemTableRep::KeyComparator& compare, Arena* arena,
const SliceTransform* transform) { const SliceTransform* transform, Logger* logger) {
return new HashLinkListRep(compare, arena, transform, bucket_count_, return new HashLinkListRep(compare, arena, transform, bucket_count_,
huge_page_tlb_size_); huge_page_tlb_size_, logger);
} }
MemTableRepFactory* NewHashLinkListRepFactory(size_t bucket_count, MemTableRepFactory* NewHashLinkListRepFactory(size_t bucket_count,

@ -23,7 +23,7 @@ class HashLinkListRepFactory : public MemTableRepFactory {
virtual MemTableRep* CreateMemTableRep( virtual MemTableRep* CreateMemTableRep(
const MemTableRep::KeyComparator& compare, Arena* arena, const MemTableRep::KeyComparator& compare, Arena* arena,
const SliceTransform* transform) override; const SliceTransform* transform, Logger* logger) override;
virtual const char* Name() const override { virtual const char* Name() const override {
return "HashLinkListRepFactory"; return "HashLinkListRepFactory";

@ -320,7 +320,7 @@ MemTableRep::Iterator* HashSkipListRep::GetDynamicPrefixIterator() {
MemTableRep* HashSkipListRepFactory::CreateMemTableRep( MemTableRep* HashSkipListRepFactory::CreateMemTableRep(
const MemTableRep::KeyComparator& compare, Arena* arena, const MemTableRep::KeyComparator& compare, Arena* arena,
const SliceTransform* transform) { const SliceTransform* transform, Logger* logger) {
return new HashSkipListRep(compare, arena, transform, bucket_count_, return new HashSkipListRep(compare, arena, transform, bucket_count_,
skiplist_height_, skiplist_branching_factor_); skiplist_height_, skiplist_branching_factor_);
} }

@ -27,7 +27,7 @@ class HashSkipListRepFactory : public MemTableRepFactory {
virtual MemTableRep* CreateMemTableRep( virtual MemTableRep* CreateMemTableRep(
const MemTableRep::KeyComparator& compare, Arena* arena, const MemTableRep::KeyComparator& compare, Arena* arena,
const SliceTransform* transform) override; const SliceTransform* transform, Logger* logger) override;
virtual const char* Name() const override { virtual const char* Name() const override {
return "HashSkipListRepFactory"; return "HashSkipListRepFactory";

@ -116,7 +116,7 @@ public:
MemTableRep* SkipListFactory::CreateMemTableRep( MemTableRep* SkipListFactory::CreateMemTableRep(
const MemTableRep::KeyComparator& compare, Arena* arena, const MemTableRep::KeyComparator& compare, Arena* arena,
const SliceTransform*) { const SliceTransform*, Logger* logger) {
return new SkipListRep(compare, arena); return new SkipListRep(compare, arena);
} }

@ -275,7 +275,7 @@ MemTableRep::Iterator* VectorRep::GetIterator() {
MemTableRep* VectorRepFactory::CreateMemTableRep( MemTableRep* VectorRepFactory::CreateMemTableRep(
const MemTableRep::KeyComparator& compare, Arena* arena, const MemTableRep::KeyComparator& compare, Arena* arena,
const SliceTransform*) { const SliceTransform*, Logger* logger) {
return new VectorRep(compare, arena, count_); return new VectorRep(compare, arena, count_);
} }
} // namespace rocksdb } // namespace rocksdb

Loading…
Cancel
Save