Blob DB: miscellaneous changes

Summary:
* Expose garbage collection related options
* Minor logging and counter name update
* Remove unused constants.
Closes https://github.com/facebook/rocksdb/pull/3451

Differential Revision: D6867077

Pulled By: yiwu-arbug

fbshipit-source-id: 6c3272a9c9d78b125a0bd6b2e56d00d087cdd6c8
main
Yi Wu 7 years ago committed by Facebook Github Bot
parent 1edac32b77
commit e62a763752
  1. 2
      include/rocksdb/statistics.h
  2. 7
      utilities/blob_db/blob_db.h
  3. 21
      utilities/blob_db/blob_db_impl.cc
  4. 14
      utilities/blob_db/blob_db_impl.h

@ -415,7 +415,7 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
{BLOB_DB_WRITE_BLOB, "rocksdb.blobdb.write.blob"},
{BLOB_DB_WRITE_BLOB_TTL, "rocksdb.blobdb.write.blob.ttl"},
{BLOB_DB_BLOB_FILE_BYTES_WRITTEN, "rocksdb.blobdb.blob.file.bytes.written"},
{BLOB_DB_BLOB_FILE_BYTES_READ, "rocksdb.blobdb.blob.file,bytes.read"},
{BLOB_DB_BLOB_FILE_BYTES_READ, "rocksdb.blobdb.blob.file.bytes.read"},
{BLOB_DB_BLOB_FILE_SYNCED, "rocksdb.blobdb.blob.file.synced"},
{BLOB_DB_BLOB_INDEX_EXPIRED, "rocksdb.blobdb.blob.index.expired"},
{BLOB_DB_GC_NUM_FILES, "rocksdb.blobdb.gc.num.files"},

@ -78,6 +78,13 @@ struct BlobDBOptions {
// blob files will be cleanup based on TTL.
bool enable_garbage_collection = false;
// Time interval to trigger garbage collection, in seconds.
uint64_t garbage_collection_interval_secs = 60;
// If garbage collection is enabled, blob files with deleted size no less
// than this ratio will become candidates to be cleanup.
double garbage_collection_deletion_size_threshold = 0.75;
// Disable all background job. Used for test only.
bool disable_background_tasks = false;

@ -211,7 +211,8 @@ void BlobDBImpl::StartBackgroundTasks() {
tqueue_.add(
kReclaimOpenFilesPeriodMillisecs,
std::bind(&BlobDBImpl::ReclaimOpenFiles, this, std::placeholders::_1));
tqueue_.add(kGCCheckPeriodMillisecs,
tqueue_.add(static_cast<int64_t>(
bdb_options_.garbage_collection_interval_secs * 1000),
std::bind(&BlobDBImpl::RunGC, this, std::placeholders::_1));
if (bdb_options_.enable_garbage_collection) {
tqueue_.add(
@ -1773,8 +1774,9 @@ Status BlobDBImpl::GCFileAndUpdateLSM(const std::shared_ptr<BlobFile>& bfptr,
ROCKS_LOG_INFO(
db_options_.info_log,
"%s blob file %" PRIu64 ". Total blob records: %" PRIu64
", Expired: %" PRIu64 " keys/%" PRIu64 " bytes, Overwritten: %" PRIu64
" keys/%" PRIu64 " bytes.",
", expired: %" PRIu64 " keys/%" PRIu64
" bytes, updated or deleted by user: %" PRIu64 " keys/%" PRIu64
" bytes, rewrite to new file: %" PRIu64 " keys/%" PRIu64 " bytes.",
s.ok() ? "Successfully garbage collected" : "Failed to garbage collect",
bfptr->BlobFileNumber(), gc_stats->blob_count, gc_stats->num_keys_expired,
gc_stats->bytes_expired, gc_stats->num_keys_overwritten,
@ -1828,14 +1830,9 @@ bool BlobDBImpl::ShouldGCFile(std::shared_ptr<BlobFile> bfile, uint64_t now,
return true;
}
if (bdb_options_.ttl_range_secs < kPartialExpirationGCRangeSecs) {
*reason = "has ttl but partial expiration not turned on";
return false;
}
ReadLock lockbfile_r(&bfile->mutex_);
bool ret = ((bfile->deleted_size_ * 100.0 / bfile->file_size_.load()) >
kPartialExpirationPercentage);
bool ret = ((bfile->deleted_size_ / bfile->file_size_.load()) >
bdb_options_.garbage_collection_deletion_size_threshold);
if (ret) {
*reason = "deleted blobs beyond threshold";
} else {
@ -1854,8 +1851,8 @@ bool BlobDBImpl::ShouldGCFile(std::shared_ptr<BlobFile> bfile, uint64_t now,
ReadLock lockbfile_r(&bfile->mutex_);
if (bdb_options_.enable_garbage_collection) {
if ((bfile->deleted_size_ * 100.0 / bfile->file_size_.load()) >
kPartialExpirationPercentage) {
if ((bfile->deleted_size_ / bfile->file_size_.load()) >
bdb_options_.garbage_collection_deletion_size_threshold) {
*reason = "deleted simple blobs beyond threshold";
return true;
}

@ -144,20 +144,6 @@ class BlobDBImpl : public BlobDB {
// how many random access open files can we tolerate
static constexpr uint32_t kOpenFilesTrigger = 100;
// how many periods of stats do we keep.
static constexpr uint32_t kWriteAmplificationStatsPeriods = 24;
// we will garbage collect blob files in
// which entire files have expired. However if the
// ttl_range of files is very large say a day, we
// would have to wait for the entire day, before we
// recover most of the space.
static constexpr uint32_t kPartialExpirationGCRangeSecs = 4 * 3600;
// this should be based on allowed Write Amplification
// if 50% of the space of a blob file has been deleted/expired,
static constexpr uint32_t kPartialExpirationPercentage = 75;
// how often to schedule reclaim open files.
static constexpr uint32_t kReclaimOpenFilesPeriodMillisecs = 1 * 1000;

Loading…
Cancel
Save