|
|
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
|
|
|
|
#ifndef STORAGE_ROCKSDB_INCLUDE_STATISTICS_H_
|
|
|
|
#define STORAGE_ROCKSDB_INCLUDE_STATISTICS_H_
|
|
|
|
|
|
|
|
#include <atomic>
|
|
|
|
#include <cstddef>
|
|
|
|
#include <cstdint>
|
|
|
|
#include <string>
|
|
|
|
#include <memory>
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
namespace rocksdb {
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Keep adding ticker's here.
|
|
|
|
* 1. Any ticker should be added before TICKER_ENUM_MAX.
|
|
|
|
* 2. Add a readable string in TickersNameMap below for the newly added ticker.
|
|
|
|
*/
|
|
|
|
enum Tickers : uint32_t {
|
|
|
|
// total block cache misses
|
|
|
|
// REQUIRES: BLOCK_CACHE_MISS == BLOCK_CACHE_INDEX_MISS +
|
|
|
|
// BLOCK_CACHE_FILTER_MISS +
|
|
|
|
// BLOCK_CACHE_DATA_MISS;
|
|
|
|
BLOCK_CACHE_MISS = 0,
|
|
|
|
// total block cache hit
|
|
|
|
// REQUIRES: BLOCK_CACHE_HIT == BLOCK_CACHE_INDEX_HIT +
|
|
|
|
// BLOCK_CACHE_FILTER_HIT +
|
|
|
|
// BLOCK_CACHE_DATA_HIT;
|
|
|
|
BLOCK_CACHE_HIT,
|
|
|
|
// # of blocks added to block cache.
|
|
|
|
BLOCK_CACHE_ADD,
|
|
|
|
// # of times cache miss when accessing index block from block cache.
|
|
|
|
BLOCK_CACHE_INDEX_MISS,
|
|
|
|
// # of times cache hit when accessing index block from block cache.
|
|
|
|
BLOCK_CACHE_INDEX_HIT,
|
|
|
|
// # of times cache miss when accessing filter block from block cache.
|
|
|
|
BLOCK_CACHE_FILTER_MISS,
|
|
|
|
// # of times cache hit when accessing filter block from block cache.
|
|
|
|
BLOCK_CACHE_FILTER_HIT,
|
|
|
|
// # of times cache miss when accessing data block from block cache.
|
|
|
|
BLOCK_CACHE_DATA_MISS,
|
|
|
|
// # of times cache hit when accessing data block from block cache.
|
|
|
|
BLOCK_CACHE_DATA_HIT,
|
|
|
|
// # of times bloom filter has avoided file reads.
|
|
|
|
BLOOM_FILTER_USEFUL,
|
|
|
|
|
|
|
|
// # of memtable hits.
|
|
|
|
MEMTABLE_HIT,
|
|
|
|
// # of memtable misses.
|
|
|
|
MEMTABLE_MISS,
|
|
|
|
|
|
|
|
// # of Get() queries served by L0
|
|
|
|
GET_HIT_L0,
|
|
|
|
// # of Get() queries served by L1
|
|
|
|
GET_HIT_L1,
|
|
|
|
// # of Get() queries served by L2 and up
|
|
|
|
GET_HIT_L2_AND_UP,
|
|
|
|
|
|
|
|
/**
|
|
|
|
* COMPACTION_KEY_DROP_* count the reasons for key drop during compaction
|
|
|
|
* There are 3 reasons currently.
|
|
|
|
*/
|
|
|
|
COMPACTION_KEY_DROP_NEWER_ENTRY, // key was written with a newer value.
|
|
|
|
COMPACTION_KEY_DROP_OBSOLETE, // The key is obsolete.
|
|
|
|
COMPACTION_KEY_DROP_USER, // user compaction function has dropped the key.
|
|
|
|
|
|
|
|
// Number of keys written to the database via the Put and Write call's
|
|
|
|
NUMBER_KEYS_WRITTEN,
|
|
|
|
// Number of Keys read,
|
|
|
|
NUMBER_KEYS_READ,
|
In-place updates for equal keys and similar sized values
Summary:
Currently for each put, a fresh memory is allocated, and a new entry is added to the memtable with a new sequence number irrespective of whether the key already exists in the memtable. This diff is an attempt to update the value inplace for existing keys. It currently handles a very simple case:
1. Key already exists in the current memtable. Does not inplace update values in immutable memtable or snapshot
2. Latest value type is a 'put' ie kTypeValue
3. New value size is less than existing value, to avoid reallocating memory
TODO: For a put of an existing key, deallocate memory take by values, for other value types till a kTypeValue is found, ie. remove kTypeMerge.
TODO: Update the transaction log, to allow consistent reload of the memtable.
Test Plan: Added a unit test verifying the inplace update. But some other unit tests broken due to invalid sequence number checks. WIll fix them next.
Reviewers: xinyaohu, sumeet, haobo, dhruba
CC: leveldb
Differential Revision: https://reviews.facebook.net/D12423
Automatic commit by arc
11 years ago
|
|
|
// Number keys updated, if inplace update is enabled
|
|
|
|
NUMBER_KEYS_UPDATED,
|
|
|
|
// The number of uncompressed bytes issued by DB::Put(), DB::Delete(),
|
|
|
|
// DB::Merge(), and DB::Write().
|
|
|
|
BYTES_WRITTEN,
|
|
|
|
// The number of uncompressed bytes read from DB::Get(). It could be
|
|
|
|
// either from memtables, cache, or table files.
|
|
|
|
// For the number of logical bytes read from DB::MultiGet(),
|
|
|
|
// please use NUMBER_MULTIGET_BYTES_READ.
|
|
|
|
BYTES_READ,
|
|
|
|
NO_FILE_CLOSES,
|
|
|
|
NO_FILE_OPENS,
|
|
|
|
NO_FILE_ERRORS,
|
DB Stats Dump to print total stall time
Summary:
Add printing of stall time in DB Stats:
Sample outputs:
** DB Stats **
Uptime(secs): 53.2 total, 1.7 interval
Cumulative writes: 625940 writes, 625939 keys, 625940 batches, 1.0 writes per batch, 0.49 GB user ingest, stall micros: 50691070
Cumulative WAL: 625940 writes, 625939 syncs, 1.00 writes per sync, 0.49 GB written
Interval writes: 10859 writes, 10859 keys, 10859 batches, 1.0 writes per batch, 8.7 MB user ingest, stall micros: 1692319
Interval WAL: 10859 writes, 10859 syncs, 1.00 writes per sync, 0.01 MB written
Test Plan:
make all check
verify printing using db_bench
Reviewers: igor, yhchiang, rven, MarkCallaghan
Reviewed By: MarkCallaghan
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D31239
10 years ago
|
|
|
// DEPRECATED Time system had to wait to do LO-L1 compactions
|
|
|
|
STALL_L0_SLOWDOWN_MICROS,
|
DB Stats Dump to print total stall time
Summary:
Add printing of stall time in DB Stats:
Sample outputs:
** DB Stats **
Uptime(secs): 53.2 total, 1.7 interval
Cumulative writes: 625940 writes, 625939 keys, 625940 batches, 1.0 writes per batch, 0.49 GB user ingest, stall micros: 50691070
Cumulative WAL: 625940 writes, 625939 syncs, 1.00 writes per sync, 0.49 GB written
Interval writes: 10859 writes, 10859 keys, 10859 batches, 1.0 writes per batch, 8.7 MB user ingest, stall micros: 1692319
Interval WAL: 10859 writes, 10859 syncs, 1.00 writes per sync, 0.01 MB written
Test Plan:
make all check
verify printing using db_bench
Reviewers: igor, yhchiang, rven, MarkCallaghan
Reviewed By: MarkCallaghan
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D31239
10 years ago
|
|
|
// DEPRECATED Time system had to wait to move memtable to L1.
|
|
|
|
STALL_MEMTABLE_COMPACTION_MICROS,
|
DB Stats Dump to print total stall time
Summary:
Add printing of stall time in DB Stats:
Sample outputs:
** DB Stats **
Uptime(secs): 53.2 total, 1.7 interval
Cumulative writes: 625940 writes, 625939 keys, 625940 batches, 1.0 writes per batch, 0.49 GB user ingest, stall micros: 50691070
Cumulative WAL: 625940 writes, 625939 syncs, 1.00 writes per sync, 0.49 GB written
Interval writes: 10859 writes, 10859 keys, 10859 batches, 1.0 writes per batch, 8.7 MB user ingest, stall micros: 1692319
Interval WAL: 10859 writes, 10859 syncs, 1.00 writes per sync, 0.01 MB written
Test Plan:
make all check
verify printing using db_bench
Reviewers: igor, yhchiang, rven, MarkCallaghan
Reviewed By: MarkCallaghan
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D31239
10 years ago
|
|
|
// DEPRECATED write throttle because of too many files in L0
|
|
|
|
STALL_L0_NUM_FILES_MICROS,
|
DB Stats Dump to print total stall time
Summary:
Add printing of stall time in DB Stats:
Sample outputs:
** DB Stats **
Uptime(secs): 53.2 total, 1.7 interval
Cumulative writes: 625940 writes, 625939 keys, 625940 batches, 1.0 writes per batch, 0.49 GB user ingest, stall micros: 50691070
Cumulative WAL: 625940 writes, 625939 syncs, 1.00 writes per sync, 0.49 GB written
Interval writes: 10859 writes, 10859 keys, 10859 batches, 1.0 writes per batch, 8.7 MB user ingest, stall micros: 1692319
Interval WAL: 10859 writes, 10859 syncs, 1.00 writes per sync, 0.01 MB written
Test Plan:
make all check
verify printing using db_bench
Reviewers: igor, yhchiang, rven, MarkCallaghan
Reviewed By: MarkCallaghan
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D31239
10 years ago
|
|
|
// Writer has to wait for compaction or flush to finish.
|
|
|
|
STALL_MICROS,
|
|
|
|
// The wait time for db mutex.
|
|
|
|
DB_MUTEX_WAIT_MICROS,
|
|
|
|
RATE_LIMIT_DELAY_MILLIS,
|
|
|
|
NO_ITERATORS, // number of iterators currently open
|
|
|
|
|
|
|
|
// Number of MultiGet calls, keys read, and bytes read
|
|
|
|
NUMBER_MULTIGET_CALLS,
|
|
|
|
NUMBER_MULTIGET_KEYS_READ,
|
|
|
|
NUMBER_MULTIGET_BYTES_READ,
|
|
|
|
|
|
|
|
// Number of deletes records that were not required to be
|
|
|
|
// written to storage because key does not exist
|
|
|
|
NUMBER_FILTERED_DELETES,
|
|
|
|
NUMBER_MERGE_FAILURES,
|
|
|
|
SEQUENCE_NUMBER,
|
|
|
|
|
|
|
|
// number of times bloom was checked before creating iterator on a
|
|
|
|
// file, and the number of times the check was useful in avoiding
|
|
|
|
// iterator creation (and thus likely IOPs).
|
|
|
|
BLOOM_FILTER_PREFIX_CHECKED,
|
|
|
|
BLOOM_FILTER_PREFIX_USEFUL,
|
|
|
|
|
|
|
|
// Number of times we had to reseek inside an iteration to skip
|
|
|
|
// over large number of keys with same userkey.
|
|
|
|
NUMBER_OF_RESEEKS_IN_ITERATION,
|
|
|
|
|
|
|
|
// Record the number of calls to GetUpadtesSince. Useful to keep track of
|
|
|
|
// transaction log iterator refreshes
|
|
|
|
GET_UPDATES_SINCE_CALLS,
|
|
|
|
BLOCK_CACHE_COMPRESSED_MISS, // miss in the compressed block cache
|
|
|
|
BLOCK_CACHE_COMPRESSED_HIT, // hit in the compressed block cache
|
|
|
|
WAL_FILE_SYNCED, // Number of times WAL sync is done
|
|
|
|
WAL_FILE_BYTES, // Number of bytes written to WAL
|
Add monitoring for universal compaction and add counters for compaction IO
Summary:
Adds these counters
{ WAL_FILE_SYNCED, "rocksdb.wal.synced" }
number of writes that request a WAL sync
{ WAL_FILE_BYTES, "rocksdb.wal.bytes" },
number of bytes written to the WAL
{ WRITE_DONE_BY_SELF, "rocksdb.write.self" },
number of writes processed by the calling thread
{ WRITE_DONE_BY_OTHER, "rocksdb.write.other" },
number of writes not processed by the calling thread. Instead these were
processed by the current holder of the write lock
{ WRITE_WITH_WAL, "rocksdb.write.wal" },
number of writes that request WAL logging
{ COMPACT_READ_BYTES, "rocksdb.compact.read.bytes" },
number of bytes read during compaction
{ COMPACT_WRITE_BYTES, "rocksdb.compact.write.bytes" },
number of bytes written during compaction
Per-interval stats output was updated with WAL stats and correct stats for universal compaction
including a correct value for write-amplification. It now looks like:
Compactions
Level Files Size(MB) Score Time(sec) Read(MB) Write(MB) Rn(MB) Rnp1(MB) Wnew(MB) RW-Amplify Read(MB/s) Write(MB/s) Rn Rnp1 Wnp1 NewW Count Ln-stall Stall-cnt
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
0 7 464 46.4 281 3411 3875 3411 0 3875 2.1 12.1 13.8 621 0 240 240 628 0.0 0
Uptime(secs): 310.8 total, 2.0 interval
Writes cumulative: 9999999 total, 9999999 batches, 1.0 per batch, 1.22 ingest GB
WAL cumulative: 9999999 WAL writes, 9999999 WAL syncs, 1.00 writes per sync, 1.22 GB written
Compaction IO cumulative (GB): 1.22 new, 3.33 read, 3.78 write, 7.12 read+write
Compaction IO cumulative (MB/sec): 4.0 new, 11.0 read, 12.5 write, 23.4 read+write
Amplification cumulative: 4.1 write, 6.8 compaction
Writes interval: 100000 total, 100000 batches, 1.0 per batch, 12.5 ingest MB
WAL interval: 100000 WAL writes, 100000 WAL syncs, 1.00 writes per sync, 0.01 MB written
Compaction IO interval (MB): 12.49 new, 14.98 read, 21.50 write, 36.48 read+write
Compaction IO interval (MB/sec): 6.4 new, 7.6 read, 11.0 write, 18.6 read+write
Amplification interval: 101.7 write, 102.9 compaction
Stalls(secs): 142.924 level0_slowdown, 0.000 level0_numfiles, 0.805 memtable_compaction, 0.000 leveln_slowdown
Stalls(count): 132461 level0_slowdown, 0 level0_numfiles, 3 memtable_compaction, 0 leveln_slowdown
Task ID: #3329644, #3301695
Blame Rev:
Test Plan:
Revert Plan:
Database Impact:
Memcache Impact:
Other Notes:
EImportant:
- begin *PUBLIC* platform impact section -
Bugzilla: #
- end platform impact -
Reviewers: dhruba
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14583
11 years ago
|
|
|
|
|
|
|
// Writes can be processed by requesting thread or by the thread at the
|
|
|
|
// head of the writers queue.
|
|
|
|
WRITE_DONE_BY_SELF,
|
|
|
|
WRITE_DONE_BY_OTHER,
|
|
|
|
WRITE_TIMEDOUT, // Number of writes ending up with timed-out.
|
|
|
|
WRITE_WITH_WAL, // Number of Write calls that request WAL
|
|
|
|
COMPACT_READ_BYTES, // Bytes read during compaction
|
|
|
|
COMPACT_WRITE_BYTES, // Bytes written during compaction
|
|
|
|
FLUSH_WRITE_BYTES, // Bytes written during flush
|
Add monitoring for universal compaction and add counters for compaction IO
Summary:
Adds these counters
{ WAL_FILE_SYNCED, "rocksdb.wal.synced" }
number of writes that request a WAL sync
{ WAL_FILE_BYTES, "rocksdb.wal.bytes" },
number of bytes written to the WAL
{ WRITE_DONE_BY_SELF, "rocksdb.write.self" },
number of writes processed by the calling thread
{ WRITE_DONE_BY_OTHER, "rocksdb.write.other" },
number of writes not processed by the calling thread. Instead these were
processed by the current holder of the write lock
{ WRITE_WITH_WAL, "rocksdb.write.wal" },
number of writes that request WAL logging
{ COMPACT_READ_BYTES, "rocksdb.compact.read.bytes" },
number of bytes read during compaction
{ COMPACT_WRITE_BYTES, "rocksdb.compact.write.bytes" },
number of bytes written during compaction
Per-interval stats output was updated with WAL stats and correct stats for universal compaction
including a correct value for write-amplification. It now looks like:
Compactions
Level Files Size(MB) Score Time(sec) Read(MB) Write(MB) Rn(MB) Rnp1(MB) Wnew(MB) RW-Amplify Read(MB/s) Write(MB/s) Rn Rnp1 Wnp1 NewW Count Ln-stall Stall-cnt
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
0 7 464 46.4 281 3411 3875 3411 0 3875 2.1 12.1 13.8 621 0 240 240 628 0.0 0
Uptime(secs): 310.8 total, 2.0 interval
Writes cumulative: 9999999 total, 9999999 batches, 1.0 per batch, 1.22 ingest GB
WAL cumulative: 9999999 WAL writes, 9999999 WAL syncs, 1.00 writes per sync, 1.22 GB written
Compaction IO cumulative (GB): 1.22 new, 3.33 read, 3.78 write, 7.12 read+write
Compaction IO cumulative (MB/sec): 4.0 new, 11.0 read, 12.5 write, 23.4 read+write
Amplification cumulative: 4.1 write, 6.8 compaction
Writes interval: 100000 total, 100000 batches, 1.0 per batch, 12.5 ingest MB
WAL interval: 100000 WAL writes, 100000 WAL syncs, 1.00 writes per sync, 0.01 MB written
Compaction IO interval (MB): 12.49 new, 14.98 read, 21.50 write, 36.48 read+write
Compaction IO interval (MB/sec): 6.4 new, 7.6 read, 11.0 write, 18.6 read+write
Amplification interval: 101.7 write, 102.9 compaction
Stalls(secs): 142.924 level0_slowdown, 0.000 level0_numfiles, 0.805 memtable_compaction, 0.000 leveln_slowdown
Stalls(count): 132461 level0_slowdown, 0 level0_numfiles, 3 memtable_compaction, 0 leveln_slowdown
Task ID: #3329644, #3301695
Blame Rev:
Test Plan:
Revert Plan:
Database Impact:
Memcache Impact:
Other Notes:
EImportant:
- begin *PUBLIC* platform impact section -
Bugzilla: #
- end platform impact -
Reviewers: dhruba
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14583
11 years ago
|
|
|
|
|
|
|
// Number of table's properties loaded directly from file, without creating
|
|
|
|
// table reader object.
|
|
|
|
NUMBER_DIRECT_LOAD_TABLE_PROPERTIES,
|
|
|
|
NUMBER_SUPERVERSION_ACQUIRES,
|
|
|
|
NUMBER_SUPERVERSION_RELEASES,
|
|
|
|
NUMBER_SUPERVERSION_CLEANUPS,
|
|
|
|
NUMBER_BLOCK_NOT_COMPRESSED,
|
|
|
|
MERGE_OPERATION_TOTAL_TIME,
|
|
|
|
FILTER_OPERATION_TOTAL_TIME,
|
|
|
|
|
|
|
|
// Row cache.
|
|
|
|
ROW_CACHE_HIT,
|
|
|
|
ROW_CACHE_MISS,
|
|
|
|
|
|
|
|
TICKER_ENUM_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
// The order of items listed in Tickers should be the same as
|
|
|
|
// the order listed in TickersNameMap
|
|
|
|
const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
|
|
|
|
{BLOCK_CACHE_MISS, "rocksdb.block.cache.miss"},
|
|
|
|
{BLOCK_CACHE_HIT, "rocksdb.block.cache.hit"},
|
|
|
|
{BLOCK_CACHE_ADD, "rocksdb.block.cache.add"},
|
|
|
|
{BLOCK_CACHE_INDEX_MISS, "rocksdb.block.cache.index.miss"},
|
|
|
|
{BLOCK_CACHE_INDEX_HIT, "rocksdb.block.cache.index.hit"},
|
|
|
|
{BLOCK_CACHE_FILTER_MISS, "rocksdb.block.cache.filter.miss"},
|
|
|
|
{BLOCK_CACHE_FILTER_HIT, "rocksdb.block.cache.filter.hit"},
|
|
|
|
{BLOCK_CACHE_DATA_MISS, "rocksdb.block.cache.data.miss"},
|
|
|
|
{BLOCK_CACHE_DATA_HIT, "rocksdb.block.cache.data.hit"},
|
|
|
|
{BLOOM_FILTER_USEFUL, "rocksdb.bloom.filter.useful"},
|
|
|
|
{MEMTABLE_HIT, "rocksdb.memtable.hit"},
|
|
|
|
{MEMTABLE_MISS, "rocksdb.memtable.miss"},
|
|
|
|
{GET_HIT_L0, "rocksdb.l0.hit"},
|
|
|
|
{GET_HIT_L1, "rocksdb.l1.hit"},
|
|
|
|
{GET_HIT_L2_AND_UP, "rocksdb.l2andup.hit"},
|
|
|
|
{COMPACTION_KEY_DROP_NEWER_ENTRY, "rocksdb.compaction.key.drop.new"},
|
|
|
|
{COMPACTION_KEY_DROP_OBSOLETE, "rocksdb.compaction.key.drop.obsolete"},
|
|
|
|
{COMPACTION_KEY_DROP_USER, "rocksdb.compaction.key.drop.user"},
|
|
|
|
{NUMBER_KEYS_WRITTEN, "rocksdb.number.keys.written"},
|
|
|
|
{NUMBER_KEYS_READ, "rocksdb.number.keys.read"},
|
|
|
|
{NUMBER_KEYS_UPDATED, "rocksdb.number.keys.updated"},
|
|
|
|
{BYTES_WRITTEN, "rocksdb.bytes.written"},
|
|
|
|
{BYTES_READ, "rocksdb.bytes.read"},
|
|
|
|
{NO_FILE_CLOSES, "rocksdb.no.file.closes"},
|
|
|
|
{NO_FILE_OPENS, "rocksdb.no.file.opens"},
|
|
|
|
{NO_FILE_ERRORS, "rocksdb.no.file.errors"},
|
|
|
|
{STALL_L0_SLOWDOWN_MICROS, "rocksdb.l0.slowdown.micros"},
|
|
|
|
{STALL_MEMTABLE_COMPACTION_MICROS, "rocksdb.memtable.compaction.micros"},
|
|
|
|
{STALL_L0_NUM_FILES_MICROS, "rocksdb.l0.num.files.stall.micros"},
|
DB Stats Dump to print total stall time
Summary:
Add printing of stall time in DB Stats:
Sample outputs:
** DB Stats **
Uptime(secs): 53.2 total, 1.7 interval
Cumulative writes: 625940 writes, 625939 keys, 625940 batches, 1.0 writes per batch, 0.49 GB user ingest, stall micros: 50691070
Cumulative WAL: 625940 writes, 625939 syncs, 1.00 writes per sync, 0.49 GB written
Interval writes: 10859 writes, 10859 keys, 10859 batches, 1.0 writes per batch, 8.7 MB user ingest, stall micros: 1692319
Interval WAL: 10859 writes, 10859 syncs, 1.00 writes per sync, 0.01 MB written
Test Plan:
make all check
verify printing using db_bench
Reviewers: igor, yhchiang, rven, MarkCallaghan
Reviewed By: MarkCallaghan
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D31239
10 years ago
|
|
|
{STALL_MICROS, "rocksdb.stall.micros"},
|
|
|
|
{DB_MUTEX_WAIT_MICROS, "rocksdb.db.mutex.wait.micros"},
|
|
|
|
{RATE_LIMIT_DELAY_MILLIS, "rocksdb.rate.limit.delay.millis"},
|
|
|
|
{NO_ITERATORS, "rocksdb.num.iterators"},
|
|
|
|
{NUMBER_MULTIGET_CALLS, "rocksdb.number.multiget.get"},
|
|
|
|
{NUMBER_MULTIGET_KEYS_READ, "rocksdb.number.multiget.keys.read"},
|
|
|
|
{NUMBER_MULTIGET_BYTES_READ, "rocksdb.number.multiget.bytes.read"},
|
|
|
|
{NUMBER_FILTERED_DELETES, "rocksdb.number.deletes.filtered"},
|
|
|
|
{NUMBER_MERGE_FAILURES, "rocksdb.number.merge.failures"},
|
|
|
|
{SEQUENCE_NUMBER, "rocksdb.sequence.number"},
|
|
|
|
{BLOOM_FILTER_PREFIX_CHECKED, "rocksdb.bloom.filter.prefix.checked"},
|
|
|
|
{BLOOM_FILTER_PREFIX_USEFUL, "rocksdb.bloom.filter.prefix.useful"},
|
|
|
|
{NUMBER_OF_RESEEKS_IN_ITERATION, "rocksdb.number.reseeks.iteration"},
|
|
|
|
{GET_UPDATES_SINCE_CALLS, "rocksdb.getupdatessince.calls"},
|
|
|
|
{BLOCK_CACHE_COMPRESSED_MISS, "rocksdb.block.cachecompressed.miss"},
|
|
|
|
{BLOCK_CACHE_COMPRESSED_HIT, "rocksdb.block.cachecompressed.hit"},
|
|
|
|
{WAL_FILE_SYNCED, "rocksdb.wal.synced"},
|
|
|
|
{WAL_FILE_BYTES, "rocksdb.wal.bytes"},
|
|
|
|
{WRITE_DONE_BY_SELF, "rocksdb.write.self"},
|
|
|
|
{WRITE_DONE_BY_OTHER, "rocksdb.write.other"},
|
|
|
|
{WRITE_WITH_WAL, "rocksdb.write.wal"},
|
|
|
|
{FLUSH_WRITE_BYTES, "rocksdb.flush.write.bytes"},
|
|
|
|
{COMPACT_READ_BYTES, "rocksdb.compact.read.bytes"},
|
|
|
|
{COMPACT_WRITE_BYTES, "rocksdb.compact.write.bytes"},
|
|
|
|
{NUMBER_DIRECT_LOAD_TABLE_PROPERTIES,
|
|
|
|
"rocksdb.number.direct.load.table.properties"},
|
|
|
|
{NUMBER_SUPERVERSION_ACQUIRES, "rocksdb.number.superversion_acquires"},
|
|
|
|
{NUMBER_SUPERVERSION_RELEASES, "rocksdb.number.superversion_releases"},
|
|
|
|
{NUMBER_SUPERVERSION_CLEANUPS, "rocksdb.number.superversion_cleanups"},
|
|
|
|
{NUMBER_BLOCK_NOT_COMPRESSED, "rocksdb.number.block.not_compressed"},
|
|
|
|
{MERGE_OPERATION_TOTAL_TIME, "rocksdb.merge.operation.time.nanos"},
|
|
|
|
{FILTER_OPERATION_TOTAL_TIME, "rocksdb.filter.operation.time.nanos"},
|
|
|
|
{ROW_CACHE_HIT, "rocksdb.row.cache.hit"},
|
|
|
|
{ROW_CACHE_MISS, "rocksdb.row.cache.miss"},
|
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Keep adding histogram's here.
|
|
|
|
* Any histogram whould have value less than HISTOGRAM_ENUM_MAX
|
|
|
|
* Add a new Histogram by assigning it the current value of HISTOGRAM_ENUM_MAX
|
|
|
|
* Add a string representation in HistogramsNameMap below
|
|
|
|
* And increment HISTOGRAM_ENUM_MAX
|
|
|
|
*/
|
|
|
|
enum Histograms : uint32_t {
|
|
|
|
DB_GET = 0,
|
|
|
|
DB_WRITE,
|
|
|
|
COMPACTION_TIME,
|
|
|
|
TABLE_SYNC_MICROS,
|
|
|
|
COMPACTION_OUTFILE_SYNC_MICROS,
|
|
|
|
WAL_FILE_SYNC_MICROS,
|
|
|
|
MANIFEST_FILE_SYNC_MICROS,
|
|
|
|
// TIME SPENT IN IO DURING TABLE OPEN
|
|
|
|
TABLE_OPEN_IO_MICROS,
|
|
|
|
DB_MULTIGET,
|
|
|
|
READ_BLOCK_COMPACTION_MICROS,
|
|
|
|
READ_BLOCK_GET_MICROS,
|
|
|
|
WRITE_RAW_BLOCK_MICROS,
|
|
|
|
STALL_L0_SLOWDOWN_COUNT,
|
|
|
|
STALL_MEMTABLE_COMPACTION_COUNT,
|
|
|
|
STALL_L0_NUM_FILES_COUNT,
|
|
|
|
HARD_RATE_LIMIT_DELAY_COUNT,
|
|
|
|
SOFT_RATE_LIMIT_DELAY_COUNT,
|
|
|
|
NUM_FILES_IN_SINGLE_COMPACTION,
|
|
|
|
DB_SEEK,
|
Push- instead of pull-model for managing Write stalls
Summary:
Introducing WriteController, which is a source of truth about per-DB write delays. Let's define an DB epoch as a period where there are no flushes and compactions (i.e. new epoch is started when flush or compaction finishes). Each epoch can either:
* proceed with all writes without delay
* delay all writes by fixed time
* stop all writes
The three modes are recomputed at each epoch change (flush, compaction), rather than on every write (which is currently the case).
When we have a lot of column families, our current pull behavior adds a big overhead, since we need to loop over every column family for every write. With new push model, overhead on Write code-path is minimal.
This is just the start. Next step is to also take care of stalls introduced by slow memtable flushes. The final goal is to eliminate function MakeRoomForWrite(), which currently needs to be called for every column family by every write.
Test Plan: make check for now. I'll add some unit tests later. Also, perf test.
Reviewers: dhruba, yhchiang, MarkCallaghan, sdong, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22791
10 years ago
|
|
|
WRITE_STALL,
|
|
|
|
SST_READ_MICROS,
|
|
|
|
HISTOGRAM_ENUM_MAX, // TODO(ldemailly): enforce HistogramsNameMap match
|
|
|
|
};
|
|
|
|
|
|
|
|
const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {
|
|
|
|
{DB_GET, "rocksdb.db.get.micros"},
|
|
|
|
{DB_WRITE, "rocksdb.db.write.micros"},
|
|
|
|
{COMPACTION_TIME, "rocksdb.compaction.times.micros"},
|
|
|
|
{TABLE_SYNC_MICROS, "rocksdb.table.sync.micros"},
|
|
|
|
{COMPACTION_OUTFILE_SYNC_MICROS, "rocksdb.compaction.outfile.sync.micros"},
|
|
|
|
{WAL_FILE_SYNC_MICROS, "rocksdb.wal.file.sync.micros"},
|
|
|
|
{MANIFEST_FILE_SYNC_MICROS, "rocksdb.manifest.file.sync.micros"},
|
|
|
|
{TABLE_OPEN_IO_MICROS, "rocksdb.table.open.io.micros"},
|
|
|
|
{DB_MULTIGET, "rocksdb.db.multiget.micros"},
|
|
|
|
{READ_BLOCK_COMPACTION_MICROS, "rocksdb.read.block.compaction.micros"},
|
|
|
|
{READ_BLOCK_GET_MICROS, "rocksdb.read.block.get.micros"},
|
|
|
|
{WRITE_RAW_BLOCK_MICROS, "rocksdb.write.raw.block.micros"},
|
|
|
|
{STALL_L0_SLOWDOWN_COUNT, "rocksdb.l0.slowdown.count"},
|
|
|
|
{STALL_MEMTABLE_COMPACTION_COUNT, "rocksdb.memtable.compaction.count"},
|
|
|
|
{STALL_L0_NUM_FILES_COUNT, "rocksdb.num.files.stall.count"},
|
|
|
|
{HARD_RATE_LIMIT_DELAY_COUNT, "rocksdb.hard.rate.limit.delay.count"},
|
|
|
|
{SOFT_RATE_LIMIT_DELAY_COUNT, "rocksdb.soft.rate.limit.delay.count"},
|
|
|
|
{NUM_FILES_IN_SINGLE_COMPACTION, "rocksdb.numfiles.in.singlecompaction"},
|
|
|
|
{DB_SEEK, "rocksdb.db.seek.micros"},
|
|
|
|
{WRITE_STALL, "rocksdb.db.write.stall"},
|
|
|
|
{SST_READ_MICROS, "rocksdb.sst.read.micros"},
|
|
|
|
};
|
|
|
|
|
|
|
|
struct HistogramData {
|
|
|
|
double median;
|
|
|
|
double percentile95;
|
|
|
|
double percentile99;
|
|
|
|
double average;
|
|
|
|
double standard_deviation;
|
|
|
|
};
|
|
|
|
|
|
|
|
// Analyze the performance of a db
|
|
|
|
class Statistics {
|
|
|
|
public:
|
|
|
|
virtual ~Statistics() {}
|
|
|
|
|
|
|
|
virtual uint64_t getTickerCount(uint32_t tickerType) const = 0;
|
|
|
|
virtual void histogramData(uint32_t type,
|
|
|
|
HistogramData* const data) const = 0;
|
Add Statistics.getHistogramString() to print more detailed outputs of a histogram
Summary:
Provide a way for users to know more detailed ditribution of a histogram metrics. Example outputs:
Manually add statement
fprintf(stdout, "%s\n", dbstats->getHistogramString(SST_READ_MICROS).c_str());
Will print out something like:
Count: 989151 Average: 1.7659 StdDev: 1.52
Min: 0.0000 Median: 1.2071 Max: 860.0000
Percentiles: P50: 1.21 P75: 1.70 P99: 5.12 P99.9: 13.67 P99.99: 21.70
------------------------------------------------------
[ 0, 1 ) 390839 39.513% 39.513% ########
[ 1, 2 ) 500918 50.641% 90.154% ##########
[ 2, 3 ) 79358 8.023% 98.177% ##
[ 3, 4 ) 6297 0.637% 98.813%
[ 4, 5 ) 1712 0.173% 98.986%
[ 5, 6 ) 1134 0.115% 99.101%
[ 6, 7 ) 1222 0.124% 99.224%
[ 7, 8 ) 1529 0.155% 99.379%
[ 8, 9 ) 1264 0.128% 99.507%
[ 9, 10 ) 988 0.100% 99.607%
[ 10, 12 ) 1378 0.139% 99.746%
[ 12, 14 ) 1828 0.185% 99.931%
[ 14, 16 ) 410 0.041% 99.972%
[ 16, 18 ) 72 0.007% 99.980%
[ 18, 20 ) 67 0.007% 99.986%
[ 20, 25 ) 106 0.011% 99.997%
[ 25, 30 ) 24 0.002% 99.999%
[ 30, 35 ) 1 0.000% 100.000%
[ 250, 300 ) 2 0.000% 100.000%
[ 300, 350 ) 1 0.000% 100.000%
[ 800, 900 ) 1 0.000% 100.000%
Test Plan: Manually add a print in db_bench and make sure it prints out as expected. Will add some codes to cover the function
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D43611
9 years ago
|
|
|
virtual std::string getHistogramString(uint32_t type) const { return ""; }
|
|
|
|
virtual void recordTick(uint32_t tickerType, uint64_t count = 0) = 0;
|
|
|
|
virtual void setTickerCount(uint32_t tickerType, uint64_t count) = 0;
|
|
|
|
virtual void measureTime(uint32_t histogramType, uint64_t time) = 0;
|
|
|
|
|
|
|
|
// String representation of the statistic object.
|
|
|
|
virtual std::string ToString() const {
|
|
|
|
// Do nothing by default
|
|
|
|
return std::string("ToString(): not implemented");
|
|
|
|
}
|
|
|
|
|
|
|
|
// Override this function to disable particular histogram collection
|
|
|
|
virtual bool HistEnabledForType(uint32_t type) const {
|
|
|
|
return type < HISTOGRAM_ENUM_MAX;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
// Create a concrete DBStatistics object
|
|
|
|
std::shared_ptr<Statistics> CreateDBStatistics();
|
|
|
|
|
|
|
|
} // namespace rocksdb
|
|
|
|
|
|
|
|
#endif // STORAGE_ROCKSDB_INCLUDE_STATISTICS_H_
|