You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
rocksdb/db/internal_stats.cc

612 lines
24 KiB

// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "db/internal_stats.h"
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <inttypes.h>
#include <algorithm>
#include <vector>
#include "db/column_family.h"
#include "db/db_impl.h"
#include "util/string_util.h"
namespace rocksdb {
#ifndef ROCKSDB_LITE
namespace {
const double kMB = 1048576.0;
const double kGB = kMB * 1024;
void PrintLevelStatsHeader(char* buf, size_t len, const std::string& cf_name) {
snprintf(
buf, len,
"\n** Compaction Stats [%s] **\n"
"Level Files Size(MB) Score Read(GB) Rn(GB) Rnp1(GB) "
"Write(GB) Wnew(GB) Moved(GB) W-Amp Rd(MB/s) Wr(MB/s) "
"Comp(sec) Comp(cnt) Avg(sec) "
"Stall(cnt) KeyIn KeyDrop\n"
"--------------------------------------------------------------------"
"-----------------------------------------------------------"
"--------------------------------------\n",
cf_name.c_str());
}
void PrintLevelStats(char* buf, size_t len, const std::string& name,
int num_files, int being_compacted, double total_file_size, double score,
double w_amp, uint64_t stalls,
const InternalStats::CompactionStats& stats) {
uint64_t bytes_read = stats.bytes_readn + stats.bytes_readnp1;
int64_t bytes_new = stats.bytes_written - stats.bytes_readnp1;
double elapsed = (stats.micros + 1) / 1000000.0;
std::string num_input_records = NumberToHumanString(stats.num_input_records);
std::string num_dropped_records =
NumberToHumanString(stats.num_dropped_records);
snprintf(buf, len,
"%4s %6d/%-3d %8.0f %5.1f " /* Level, Files, Size(MB), Score */
"%8.1f " /* Read(GB) */
"%7.1f " /* Rn(GB) */
"%8.1f " /* Rnp1(GB) */
"%9.1f " /* Write(GB) */
"%8.1f " /* Wnew(GB) */
"%9.1f " /* Moved(GB) */
"%5.1f " /* W-Amp */
"%8.1f " /* Rd(MB/s) */
"%8.1f " /* Wr(MB/s) */
"%9.0f " /* Comp(sec) */
"%9d " /* Comp(cnt) */
"%8.3f " /* Avg(sec) */
"%10" PRIu64
" " /* Stall(cnt) */
"%7s " /* KeyIn */
"%6s\n", /* KeyDrop */
name.c_str(), num_files, being_compacted, total_file_size / kMB,
score, bytes_read / kGB, stats.bytes_readn / kGB,
stats.bytes_readnp1 / kGB, stats.bytes_written / kGB,
bytes_new / kGB, stats.bytes_moved / kGB,
w_amp, bytes_read / kMB / elapsed,
stats.bytes_written / kMB / elapsed, stats.micros / 1000000.0,
stats.count,
stats.count == 0 ? 0 : stats.micros / 1000000.0 / stats.count,
stalls,
num_input_records.c_str(), num_dropped_records.c_str());
}
}
static const std::string rocksdb_prefix = "rocksdb.";
static const std::string num_files_at_level_prefix = "num-files-at-level";
static const std::string allstats = "stats";
static const std::string sstables = "sstables";
static const std::string cfstats = "cfstats";
static const std::string dbstats = "dbstats";
static const std::string levelstats = "levelstats";
static const std::string num_immutable_mem_table = "num-immutable-mem-table";
static const std::string mem_table_flush_pending = "mem-table-flush-pending";
static const std::string compaction_pending = "compaction-pending";
static const std::string background_errors = "background-errors";
static const std::string cur_size_active_mem_table =
"cur-size-active-mem-table";
static const std::string cur_size_all_mem_tables = "cur-size-all-mem-tables";
static const std::string num_entries_active_mem_table =
"num-entries-active-mem-table";
static const std::string num_entries_imm_mem_tables =
"num-entries-imm-mem-tables";
static const std::string num_deletes_active_mem_table =
"num-deletes-active-mem-table";
static const std::string num_deletes_imm_mem_tables =
"num-deletes-imm-mem-tables";
static const std::string estimate_num_keys = "estimate-num-keys";
static const std::string estimate_table_readers_mem =
"estimate-table-readers-mem";
static const std::string is_file_deletions_enabled =
"is-file-deletions-enabled";
static const std::string num_snapshots = "num-snapshots";
static const std::string oldest_snapshot_time = "oldest-snapshot-time";
static const std::string num_live_versions = "num-live-versions";
static const std::string base_level = "base-level";
const std::string DB::Properties::kNumFilesAtLevelPrefix =
rocksdb_prefix + num_files_at_level_prefix;
const std::string DB::Properties::kStats = rocksdb_prefix + allstats;
const std::string DB::Properties::kSSTables = rocksdb_prefix + sstables;
const std::string DB::Properties::kCFStats = rocksdb_prefix + cfstats;
const std::string DB::Properties::kDBStats = rocksdb_prefix + dbstats;
const std::string DB::Properties::kNumImmutableMemTable =
rocksdb_prefix + num_immutable_mem_table;
const std::string DB::Properties::kMemTableFlushPending =
rocksdb_prefix + mem_table_flush_pending;
const std::string DB::Properties::kCompactionPending =
rocksdb_prefix + compaction_pending;
const std::string DB::Properties::kBackgroundErrors =
rocksdb_prefix + background_errors;
const std::string DB::Properties::kCurSizeActiveMemTable =
rocksdb_prefix + cur_size_active_mem_table;
const std::string DB::Properties::kCurSizeAllMemTables =
rocksdb_prefix + cur_size_all_mem_tables;
const std::string DB::Properties::kNumEntriesActiveMemTable =
rocksdb_prefix + num_entries_active_mem_table;
const std::string DB::Properties::kNumEntriesImmMemTables =
rocksdb_prefix + num_entries_imm_mem_tables;
const std::string DB::Properties::kNumDeletesActiveMemTable =
rocksdb_prefix + num_deletes_active_mem_table;
const std::string DB::Properties::kNumDeletesImmMemTables =
rocksdb_prefix + num_deletes_imm_mem_tables;
const std::string DB::Properties::kEstimateNumKeys =
rocksdb_prefix + estimate_num_keys;
const std::string DB::Properties::kEstimateTableReadersMem =
rocksdb_prefix + estimate_table_readers_mem;
const std::string DB::Properties::kIsFileDeletionsEnabled =
rocksdb_prefix + is_file_deletions_enabled;
const std::string DB::Properties::kNumSnapshots =
rocksdb_prefix + num_snapshots;
const std::string DB::Properties::kOldestSnapshotTime =
rocksdb_prefix + oldest_snapshot_time;
const std::string DB::Properties::kNumLiveVersions =
rocksdb_prefix + num_live_versions;
DBPropertyType GetPropertyType(const Slice& property, bool* is_int_property,
bool* need_out_of_mutex) {
assert(is_int_property != nullptr);
assert(need_out_of_mutex != nullptr);
Slice in = property;
Slice prefix(rocksdb_prefix);
*need_out_of_mutex = false;
*is_int_property = false;
if (!in.starts_with(prefix)) {
return kUnknown;
}
in.remove_prefix(prefix.size());
if (in.starts_with(num_files_at_level_prefix)) {
return kNumFilesAtLevel;
} else if (in == levelstats) {
return kLevelStats;
} else if (in == allstats) {
return kStats;
} else if (in == cfstats) {
return kCFStats;
} else if (in == dbstats) {
return kDBStats;
} else if (in == sstables) {
return kSsTables;
}
*is_int_property = true;
if (in == num_immutable_mem_table) {
return kNumImmutableMemTable;
} else if (in == mem_table_flush_pending) {
return kMemtableFlushPending;
} else if (in == compaction_pending) {
return kCompactionPending;
} else if (in == background_errors) {
return kBackgroundErrors;
} else if (in == cur_size_active_mem_table) {
return kCurSizeActiveMemTable;
} else if (in == cur_size_all_mem_tables) {
return kCurSizeAllMemTables;
} else if (in == num_entries_active_mem_table) {
return kNumEntriesInMutableMemtable;
} else if (in == num_entries_imm_mem_tables) {
return kNumEntriesInImmutableMemtable;
} else if (in == num_deletes_active_mem_table) {
return kNumDeletesInMutableMemtable;
} else if (in == num_deletes_imm_mem_tables) {
return kNumDeletesInImmutableMemtable;
} else if (in == estimate_num_keys) {
return kEstimatedNumKeys;
} else if (in == estimate_table_readers_mem) {
*need_out_of_mutex = true;
return kEstimatedUsageByTableReaders;
} else if (in == is_file_deletions_enabled) {
return kIsFileDeletionEnabled;
} else if (in == num_snapshots) {
return kNumSnapshots;
} else if (in == oldest_snapshot_time) {
return kOldestSnapshotTime;
} else if (in == num_live_versions) {
return kNumLiveVersions;
} else if (in == base_level) {
return kBaseLevel;
}
return kUnknown;
}
bool InternalStats::GetIntPropertyOutOfMutex(DBPropertyType property_type,
Version* version,
uint64_t* value) const {
assert(value != nullptr);
if (property_type != kEstimatedUsageByTableReaders) {
return false;
}
if (version == nullptr) {
*value = 0;
} else {
*value = version->GetMemoryUsageByTableReaders();
}
return true;
}
bool InternalStats::GetStringProperty(DBPropertyType property_type,
const Slice& property,
std::string* value) {
assert(value != nullptr);
auto* current = cfd_->current();
const auto* vstorage = current->storage_info();
Slice in = property;
switch (property_type) {
case kNumFilesAtLevel: {
in.remove_prefix(strlen("rocksdb.num-files-at-level"));
uint64_t level;
bool ok = ConsumeDecimalNumber(&in, &level) && in.empty();
if (!ok || (int)level >= number_levels_) {
return false;
} else {
char buf[100];
snprintf(buf, sizeof(buf), "%d",
vstorage->NumLevelFiles(static_cast<int>(level)));
*value = buf;
return true;
}
}
case kLevelStats: {
char buf[1000];
snprintf(buf, sizeof(buf),
"Level Files Size(MB)\n"
"--------------------\n");
value->append(buf);
for (int level = 0; level < number_levels_; level++) {
snprintf(buf, sizeof(buf), "%3d %8d %8.0f\n", level,
vstorage->NumLevelFiles(level),
vstorage->NumLevelBytes(level) / kMB);
value->append(buf);
}
return true;
}
case kStats: {
if (!GetStringProperty(kCFStats, DB::Properties::kCFStats, value)) {
return false;
}
if (!GetStringProperty(kDBStats, DB::Properties::kDBStats, value)) {
return false;
}
return true;
}
case kCFStats: {
DumpCFStats(value);
return true;
}
case kDBStats: {
DumpDBStats(value);
return true;
}
case kSsTables:
*value = current->DebugString();
return true;
default:
return false;
}
}
bool InternalStats::GetIntProperty(DBPropertyType property_type,
uint64_t* value, DBImpl* db) const {
db->mutex_.AssertHeld();
const auto* vstorage = cfd_->current()->storage_info();
switch (property_type) {
case kNumImmutableMemTable:
*value = cfd_->imm()->size();
return true;
case kMemtableFlushPending:
// Return number of mem tables that are ready to flush (made immutable)
*value = (cfd_->imm()->IsFlushPending() ? 1 : 0);
return true;
case kCompactionPending:
// 1 if the system already determines at least one compacdtion is needed.
// 0 otherwise,
*value = (cfd_->compaction_picker()->NeedsCompaction(vstorage) ? 1 : 0);
return true;
case kBackgroundErrors:
// Accumulated number of errors in background flushes or compactions.
*value = GetBackgroundErrorCount();
return true;
case kCurSizeActiveMemTable:
// Current size of the active memtable
*value = cfd_->mem()->ApproximateMemoryUsage();
return true;
case kCurSizeAllMemTables:
// Current size of the active memtable + immutable memtables
*value = cfd_->mem()->ApproximateMemoryUsage() +
cfd_->imm()->ApproximateMemoryUsage();
return true;
case kNumEntriesInMutableMemtable:
// Current number of entires in the active memtable
*value = cfd_->mem()->num_entries();
return true;
case kNumEntriesInImmutableMemtable:
// Current number of entries in the immutable memtables
*value = cfd_->imm()->current()->GetTotalNumEntries();
return true;
case kNumDeletesInMutableMemtable:
// Current number of entires in the active memtable
*value = cfd_->mem()->num_deletes();
return true;
case kNumDeletesInImmutableMemtable:
// Current number of entries in the immutable memtables
*value = cfd_->imm()->current()->GetTotalNumDeletes();
return true;
case kEstimatedNumKeys:
// Estimate number of entries in the column family:
// Use estimated entries in tables + total entries in memtables.
*value = cfd_->mem()->num_entries() +
cfd_->imm()->current()->GetTotalNumEntries() -
(cfd_->mem()->num_deletes() +
cfd_->imm()->current()->GetTotalNumDeletes()) *
2 +
vstorage->GetEstimatedActiveKeys();
return true;
case kNumSnapshots:
*value = db->snapshots().count();
return true;
case kOldestSnapshotTime:
*value = static_cast<uint64_t>(db->snapshots().GetOldestSnapshotTime());
return true;
case kNumLiveVersions:
*value = cfd_->GetNumLiveVersions();
return true;
#ifndef ROCKSDB_LITE
case kIsFileDeletionEnabled:
*value = db->IsFileDeletionsEnabled();
return true;
#endif
case kBaseLevel:
*value = vstorage->base_level();
return true;
default:
return false;
}
}
void InternalStats::DumpDBStats(std::string* value) {
char buf[1000];
// DB-level stats, only available from default column family
double seconds_up = (env_->NowMicros() - started_at_ + 1) / 1000000.0;
double interval_seconds_up = seconds_up - db_stats_snapshot_.seconds_up;
snprintf(buf, sizeof(buf),
"\n** DB Stats **\nUptime(secs): %.1f total, %.1f interval\n",
seconds_up, interval_seconds_up);
value->append(buf);
// Cumulative
uint64_t user_bytes_written = db_stats_[InternalStats::BYTES_WRITTEN];
uint64_t num_keys_written = db_stats_[InternalStats::NUMBER_KEYS_WRITTEN];
uint64_t write_other = db_stats_[InternalStats::WRITE_DONE_BY_OTHER];
uint64_t write_self = db_stats_[InternalStats::WRITE_DONE_BY_SELF];
uint64_t wal_bytes = db_stats_[InternalStats::WAL_FILE_BYTES];
uint64_t wal_synced = db_stats_[InternalStats::WAL_FILE_SYNCED];
uint64_t write_with_wal = db_stats_[InternalStats::WRITE_WITH_WAL];
uint64_t write_stall_micros = db_stats_[InternalStats::WRITE_STALL_MICROS];
const int kHumanMicrosLen = 32;
char human_micros[kHumanMicrosLen];
// Data
// writes: total number of write requests.
// keys: total number of key updates issued by all the write requests
// batches: number of group commits issued to the DB. Each group can contain
// one or more writes.
// so writes/keys is the average number of put in multi-put or put
// writes/batches is the average group commit size.
//
// The format is the same for interval stats.
snprintf(buf, sizeof(buf),
"Cumulative writes: %s writes, %s keys, %s batches, "
"%.1f writes per batch, ingest: %.2f GB, %.2f MB/s\n",
NumberToHumanString(write_other + write_self).c_str(),
NumberToHumanString(num_keys_written).c_str(),
NumberToHumanString(write_self).c_str(),
(write_other + write_self) / static_cast<double>(write_self + 1),
user_bytes_written / kGB, user_bytes_written / kMB / seconds_up);
value->append(buf);
// WAL
snprintf(buf, sizeof(buf),
"Cumulative WAL: %s writes, %s syncs, "
"%.2f writes per sync, written: %.2f GB, %.2f MB/s\n",
NumberToHumanString(write_with_wal).c_str(),
NumberToHumanString(wal_synced).c_str(),
write_with_wal / static_cast<double>(wal_synced + 1),
wal_bytes / kGB, wal_bytes / kMB / seconds_up);
value->append(buf);
// Stall
AppendHumanMicros(write_stall_micros, human_micros, kHumanMicrosLen, true);
snprintf(buf, sizeof(buf),
"Cumulative stall: %s, %.1f percent\n",
human_micros,
// 10000 = divide by 1M to get secs, then multiply by 100 for pct
write_stall_micros / 10000.0 / std::max(seconds_up, 0.001));
value->append(buf);
// Interval
uint64_t interval_write_other = write_other - db_stats_snapshot_.write_other;
uint64_t interval_write_self = write_self - db_stats_snapshot_.write_self;
uint64_t interval_num_keys_written =
num_keys_written - db_stats_snapshot_.num_keys_written;
snprintf(buf, sizeof(buf),
"Interval writes: %s writes, %s keys, %s batches, "
"%.1f writes per batch, ingest: %.2f MB, %.2f MB/s\n",
NumberToHumanString(
interval_write_other + interval_write_self).c_str(),
NumberToHumanString(interval_num_keys_written).c_str(),
NumberToHumanString(interval_write_self).c_str(),
static_cast<double>(interval_write_other + interval_write_self) /
(interval_write_self + 1),
(user_bytes_written - db_stats_snapshot_.ingest_bytes) / kMB,
(user_bytes_written - db_stats_snapshot_.ingest_bytes) / kMB /
std::max(interval_seconds_up, 0.001)),
value->append(buf);
uint64_t interval_write_with_wal =
write_with_wal - db_stats_snapshot_.write_with_wal;
uint64_t interval_wal_synced = wal_synced - db_stats_snapshot_.wal_synced;
uint64_t interval_wal_bytes = wal_bytes - db_stats_snapshot_.wal_bytes;
snprintf(buf, sizeof(buf),
"Interval WAL: %s writes, %s syncs, "
"%.2f writes per sync, written: %.2f MB, %.2f MB/s\n",
NumberToHumanString(interval_write_with_wal).c_str(),
NumberToHumanString(interval_wal_synced).c_str(),
interval_write_with_wal /
static_cast<double>(interval_wal_synced + 1),
interval_wal_bytes / kGB,
interval_wal_bytes / kMB / std::max(interval_seconds_up, 0.001));
value->append(buf);
// Stall
AppendHumanMicros(
write_stall_micros - db_stats_snapshot_.write_stall_micros,
human_micros, kHumanMicrosLen, true);
snprintf(buf, sizeof(buf),
"Interval stall: %s, %.1f percent\n",
human_micros,
// 10000 = divide by 1M to get secs, then multiply by 100 for pct
(write_stall_micros - db_stats_snapshot_.write_stall_micros) /
10000.0 / std::max(interval_seconds_up, 0.001));
value->append(buf);
db_stats_snapshot_.seconds_up = seconds_up;
db_stats_snapshot_.ingest_bytes = user_bytes_written;
db_stats_snapshot_.write_other = write_other;
db_stats_snapshot_.write_self = write_self;
db_stats_snapshot_.num_keys_written = num_keys_written;
db_stats_snapshot_.wal_bytes = wal_bytes;
db_stats_snapshot_.wal_synced = wal_synced;
db_stats_snapshot_.write_with_wal = write_with_wal;
db_stats_snapshot_.write_stall_micros = write_stall_micros;
}
void InternalStats::DumpCFStats(std::string* value) {
const VersionStorageInfo* vstorage = cfd_->current()->storage_info();
int num_levels_to_check =
(cfd_->ioptions()->compaction_style != kCompactionStyleUniversal &&
cfd_->ioptions()->compaction_style != kCompactionStyleFIFO)
? vstorage->num_levels() - 1
: 1;
// Compaction scores are sorted base on its value. Restore them to the
// level order
std::vector<double> compaction_score(number_levels_, 0);
for (int i = 0; i < num_levels_to_check; ++i) {
compaction_score[vstorage->CompactionScoreLevel(i)] =
vstorage->CompactionScore(i);
}
// Count # of files being compacted for each level
std::vector<int> files_being_compacted(number_levels_, 0);
for (int level = 0; level < num_levels_to_check; ++level) {
for (auto* f : vstorage->LevelFiles(level)) {
if (f->being_compacted) {
++files_being_compacted[level];
}
}
}
char buf[1000];
// Per-ColumnFamily stats
PrintLevelStatsHeader(buf, sizeof(buf), cfd_->GetName());
value->append(buf);
CompactionStats stats_sum(0);
int total_files = 0;
int total_files_being_compacted = 0;
double total_file_size = 0;
uint64_t total_slowdown_count_soft = 0;
uint64_t total_slowdown_count_hard = 0;
uint64_t total_stall_count = 0;
for (int level = 0; level < number_levels_; level++) {
int files = vstorage->NumLevelFiles(level);
total_files += files;
total_files_being_compacted += files_being_compacted[level];
if (comp_stats_[level].micros > 0 || files > 0) {
uint64_t stalls = level == 0 ?
(cf_stats_count_[LEVEL0_SLOWDOWN] +
cf_stats_count_[LEVEL0_NUM_FILES] +
cf_stats_count_[MEMTABLE_COMPACTION])
: (stall_leveln_slowdown_count_soft_[level] +
stall_leveln_slowdown_count_hard_[level]);
stats_sum.Add(comp_stats_[level]);
total_file_size += vstorage->NumLevelBytes(level);
total_stall_count += stalls;
total_slowdown_count_soft += stall_leveln_slowdown_count_soft_[level];
total_slowdown_count_hard += stall_leveln_slowdown_count_hard_[level];
double w_amp = (comp_stats_[level].bytes_readn == 0) ? 0.0
: comp_stats_[level].bytes_written /
static_cast<double>(comp_stats_[level].bytes_readn);
PrintLevelStats(buf, sizeof(buf), "L" + ToString(level), files,
files_being_compacted[level],
vstorage->NumLevelBytes(level), compaction_score[level],
w_amp, stalls, comp_stats_[level]);
value->append(buf);
}
}
uint64_t curr_ingest = cf_stats_value_[BYTES_FLUSHED];
// Cumulative summary
double w_amp = stats_sum.bytes_written / static_cast<double>(curr_ingest + 1);
// Stats summary across levels
PrintLevelStats(buf, sizeof(buf), "Sum", total_files,
total_files_being_compacted, total_file_size, 0, w_amp,
total_stall_count, stats_sum);
value->append(buf);
// Interval summary
uint64_t interval_ingest =
curr_ingest - cf_stats_snapshot_.ingest_bytes + 1;
CompactionStats interval_stats(stats_sum);
interval_stats.Subtract(cf_stats_snapshot_.comp_stats);
w_amp = interval_stats.bytes_written / static_cast<double>(interval_ingest);
PrintLevelStats(buf, sizeof(buf), "Int", 0, 0, 0, 0,
w_amp, total_stall_count - cf_stats_snapshot_.stall_count,
interval_stats);
value->append(buf);
snprintf(buf, sizeof(buf),
"Flush(GB): cumulative %.3f, interval %.3f\n",
curr_ingest / kGB, interval_ingest / kGB);
value->append(buf);
snprintf(buf, sizeof(buf),
"Stalls(count): %" PRIu64 " level0_slowdown, "
"%" PRIu64 " level0_numfiles, %" PRIu64 " memtable_compaction, "
"%" PRIu64 " leveln_slowdown_soft, "
"%" PRIu64 " leveln_slowdown_hard\n",
cf_stats_count_[LEVEL0_SLOWDOWN],
cf_stats_count_[LEVEL0_NUM_FILES],
cf_stats_count_[MEMTABLE_COMPACTION],
total_slowdown_count_soft, total_slowdown_count_hard);
value->append(buf);
cf_stats_snapshot_.ingest_bytes = curr_ingest;
cf_stats_snapshot_.comp_stats = stats_sum;
cf_stats_snapshot_.stall_count = total_stall_count;
}
#else
DBPropertyType GetPropertyType(const Slice& property, bool* is_int_property,
bool* need_out_of_mutex) {
return kUnknown;
}
#endif // !ROCKSDB_LITE
} // namespace rocksdb