From e62a7637520fe6185f4dd575743057c516c9fe69 Mon Sep 17 00:00:00 2001 From: Yi Wu Date: Wed, 31 Jan 2018 17:59:32 -0800 Subject: [PATCH] Blob DB: miscellaneous changes Summary: * Expose garbage collection related options * Minor logging and counter name update * Remove unused constants. Closes https://github.com/facebook/rocksdb/pull/3451 Differential Revision: D6867077 Pulled By: yiwu-arbug fbshipit-source-id: 6c3272a9c9d78b125a0bd6b2e56d00d087cdd6c8 --- include/rocksdb/statistics.h | 2 +- utilities/blob_db/blob_db.h | 7 +++++++ utilities/blob_db/blob_db_impl.cc | 21 +++++++++------------ utilities/blob_db/blob_db_impl.h | 14 -------------- 4 files changed, 17 insertions(+), 27 deletions(-) diff --git a/include/rocksdb/statistics.h b/include/rocksdb/statistics.h index e5f0bf61e..2dd0db5ca 100644 --- a/include/rocksdb/statistics.h +++ b/include/rocksdb/statistics.h @@ -415,7 +415,7 @@ const std::vector> TickersNameMap = { {BLOB_DB_WRITE_BLOB, "rocksdb.blobdb.write.blob"}, {BLOB_DB_WRITE_BLOB_TTL, "rocksdb.blobdb.write.blob.ttl"}, {BLOB_DB_BLOB_FILE_BYTES_WRITTEN, "rocksdb.blobdb.blob.file.bytes.written"}, - {BLOB_DB_BLOB_FILE_BYTES_READ, "rocksdb.blobdb.blob.file,bytes.read"}, + {BLOB_DB_BLOB_FILE_BYTES_READ, "rocksdb.blobdb.blob.file.bytes.read"}, {BLOB_DB_BLOB_FILE_SYNCED, "rocksdb.blobdb.blob.file.synced"}, {BLOB_DB_BLOB_INDEX_EXPIRED, "rocksdb.blobdb.blob.index.expired"}, {BLOB_DB_GC_NUM_FILES, "rocksdb.blobdb.gc.num.files"}, diff --git a/utilities/blob_db/blob_db.h b/utilities/blob_db/blob_db.h index 43dffc340..961f1728b 100644 --- a/utilities/blob_db/blob_db.h +++ b/utilities/blob_db/blob_db.h @@ -78,6 +78,13 @@ struct BlobDBOptions { // blob files will be cleanup based on TTL. bool enable_garbage_collection = false; + // Time interval to trigger garbage collection, in seconds. + uint64_t garbage_collection_interval_secs = 60; + + // If garbage collection is enabled, blob files with deleted size no less + // than this ratio will become candidates to be cleanup. + double garbage_collection_deletion_size_threshold = 0.75; + // Disable all background job. Used for test only. bool disable_background_tasks = false; diff --git a/utilities/blob_db/blob_db_impl.cc b/utilities/blob_db/blob_db_impl.cc index 4e0352614..f22a79205 100644 --- a/utilities/blob_db/blob_db_impl.cc +++ b/utilities/blob_db/blob_db_impl.cc @@ -211,7 +211,8 @@ void BlobDBImpl::StartBackgroundTasks() { tqueue_.add( kReclaimOpenFilesPeriodMillisecs, std::bind(&BlobDBImpl::ReclaimOpenFiles, this, std::placeholders::_1)); - tqueue_.add(kGCCheckPeriodMillisecs, + tqueue_.add(static_cast( + bdb_options_.garbage_collection_interval_secs * 1000), std::bind(&BlobDBImpl::RunGC, this, std::placeholders::_1)); if (bdb_options_.enable_garbage_collection) { tqueue_.add( @@ -1773,8 +1774,9 @@ Status BlobDBImpl::GCFileAndUpdateLSM(const std::shared_ptr& bfptr, ROCKS_LOG_INFO( db_options_.info_log, "%s blob file %" PRIu64 ". Total blob records: %" PRIu64 - ", Expired: %" PRIu64 " keys/%" PRIu64 " bytes, Overwritten: %" PRIu64 - " keys/%" PRIu64 " bytes.", + ", expired: %" PRIu64 " keys/%" PRIu64 + " bytes, updated or deleted by user: %" PRIu64 " keys/%" PRIu64 + " bytes, rewrite to new file: %" PRIu64 " keys/%" PRIu64 " bytes.", s.ok() ? "Successfully garbage collected" : "Failed to garbage collect", bfptr->BlobFileNumber(), gc_stats->blob_count, gc_stats->num_keys_expired, gc_stats->bytes_expired, gc_stats->num_keys_overwritten, @@ -1828,14 +1830,9 @@ bool BlobDBImpl::ShouldGCFile(std::shared_ptr bfile, uint64_t now, return true; } - if (bdb_options_.ttl_range_secs < kPartialExpirationGCRangeSecs) { - *reason = "has ttl but partial expiration not turned on"; - return false; - } - ReadLock lockbfile_r(&bfile->mutex_); - bool ret = ((bfile->deleted_size_ * 100.0 / bfile->file_size_.load()) > - kPartialExpirationPercentage); + bool ret = ((bfile->deleted_size_ / bfile->file_size_.load()) > + bdb_options_.garbage_collection_deletion_size_threshold); if (ret) { *reason = "deleted blobs beyond threshold"; } else { @@ -1854,8 +1851,8 @@ bool BlobDBImpl::ShouldGCFile(std::shared_ptr bfile, uint64_t now, ReadLock lockbfile_r(&bfile->mutex_); if (bdb_options_.enable_garbage_collection) { - if ((bfile->deleted_size_ * 100.0 / bfile->file_size_.load()) > - kPartialExpirationPercentage) { + if ((bfile->deleted_size_ / bfile->file_size_.load()) > + bdb_options_.garbage_collection_deletion_size_threshold) { *reason = "deleted simple blobs beyond threshold"; return true; } diff --git a/utilities/blob_db/blob_db_impl.h b/utilities/blob_db/blob_db_impl.h index 75acc8267..328087c9c 100644 --- a/utilities/blob_db/blob_db_impl.h +++ b/utilities/blob_db/blob_db_impl.h @@ -144,20 +144,6 @@ class BlobDBImpl : public BlobDB { // how many random access open files can we tolerate static constexpr uint32_t kOpenFilesTrigger = 100; - // how many periods of stats do we keep. - static constexpr uint32_t kWriteAmplificationStatsPeriods = 24; - - // we will garbage collect blob files in - // which entire files have expired. However if the - // ttl_range of files is very large say a day, we - // would have to wait for the entire day, before we - // recover most of the space. - static constexpr uint32_t kPartialExpirationGCRangeSecs = 4 * 3600; - - // this should be based on allowed Write Amplification - // if 50% of the space of a blob file has been deleted/expired, - static constexpr uint32_t kPartialExpirationPercentage = 75; - // how often to schedule reclaim open files. static constexpr uint32_t kReclaimOpenFilesPeriodMillisecs = 1 * 1000;