From b858da709a904eefbce23fd197f48f7bb6bf4f7e Mon Sep 17 00:00:00 2001 From: Mayank Agarwal Date: Thu, 20 Jun 2013 11:50:33 -0700 Subject: [PATCH] Simplify bucketing logic in ldb-ttl Summary: [start_time, end_time) is waht I'm following for the buckets and the whole time-range. Also cleaned up some code in db_ttl.* Not correcting the spacing/indenting convention for util/ldb_cmd.cc in this diff. Test Plan: python ldb_test.py, make ttl_test, Run mcrocksdb-backup tool, Run the ldb tool on 2 mcrocksdb production backups form sigmafio033.prn1 Reviewers: vamsi, haobo Reviewed By: vamsi Differential Revision: https://reviews.facebook.net/D11433 --- util/ldb_cmd.cc | 61 ++++++++++++++++------------------------- utilities/ttl/db_ttl.cc | 6 ++-- utilities/ttl/db_ttl.h | 27 ++++++------------ 3 files changed, 35 insertions(+), 59 deletions(-) diff --git a/util/ldb_cmd.cc b/util/ldb_cmd.cc index 8476f3e8e..780d3d2a4 100644 --- a/util/ldb_cmd.cc +++ b/util/ldb_cmd.cc @@ -538,20 +538,16 @@ string ReadableTime(int unixtime) { // This function only called when it's the sane case of >1 buckets in time-range // Also called only when timekv falls between ttl_start and ttl_end provided -void IncBucketCounts(uint64_t* bucket_counts, int ttl_start, int time_range, - int bucket_size, int timekv, int num_buckets) { - if (time_range <= 0 || timekv < ttl_start || timekv > (ttl_start + time_range) - || bucket_size <= 0 || num_buckets < 2) { - fprintf(stderr, "Error: bucketizing\n"); - return; - } - int bucket = (timekv - ttl_start); - bucket = (bucket == 0) ? 1 : ceil(bucket / (double)bucket_size); - bucket_counts[bucket - 1]++; +void IncBucketCounts(vector& bucket_counts, int ttl_start, + int time_range, int bucket_size, int timekv, int num_buckets) { + assert(time_range > 0 && timekv >= ttl_start && bucket_size > 0 && + timekv < (ttl_start + time_range) && num_buckets > 1); + int bucket = (timekv - ttl_start) / bucket_size; + bucket_counts[bucket]++; } -void PrintBucketCounts(uint64_t* bucket_counts, int ttl_start, int ttl_end, - int bucket_size, int num_buckets) { +void PrintBucketCounts(const vector& bucket_counts, int ttl_start, + int ttl_end, int bucket_size, int num_buckets) { int time_point = ttl_start; for(int i = 0; i < num_buckets - 1; i++, time_point += bucket_size) { fprintf(stdout, "Keys in range %s to %s : %lu\n", @@ -629,8 +625,8 @@ void DBDumperCommand::Help(string& ret) { ret.append(" [--" + ARG_COUNT_ONLY + "]"); ret.append(" [--" + ARG_STATS + "]"); ret.append(" [--" + ARG_TTL_BUCKET + "=]"); - ret.append(" [--" + ARG_TTL_START + "=]"); - ret.append(" [--" + ARG_TTL_END + "=]"); + ret.append(" [--" + ARG_TTL_START + "=:- is inclusive]"); + ret.append(" [--" + ARG_TTL_END + "=:- is exclusive]"); ret.append("\n"); } @@ -682,10 +678,9 @@ void DBDumperCommand::DoCommand() { bucket_size = time_range; // Will have just 1 bucket by default } // At this point, bucket_size=0 => time_range=0 - uint64_t num_buckets = - bucket_size >= time_range ? 1 : ceil((double)time_range/bucket_size); - unique_ptr bucket_counts(new uint64_t[num_buckets]); - fill(bucket_counts.get(), bucket_counts.get() + num_buckets, 0); + uint64_t num_buckets = (bucket_size >= time_range) ? 1 : + ((time_range + bucket_size - 1) / bucket_size); + vector bucket_counts(num_buckets, 0); if (is_db_ttl_ && !count_only_ && timestamp_) { fprintf(stdout, "Dumping key-values from %s to %s\n", ReadableTime(ttl_start).c_str(), ReadableTime(ttl_end).c_str()); @@ -693,7 +688,6 @@ void DBDumperCommand::DoCommand() { for (; iter->Valid(); iter->Next()) { int rawtime = 0; - string value; // If end marker was specified, we stop before it if (!null_to_ && (iter->key().ToString() >= to_)) break; @@ -702,20 +696,16 @@ void DBDumperCommand::DoCommand() { break; if (is_db_ttl_) { TtlIterator* it_ttl = (TtlIterator*)iter; - struct ValueAndTimestamp val_ts = it_ttl->ValueWithTimestamp(); - value = val_ts.value.ToString(); - rawtime = val_ts.timestamp; - if (rawtime < ttl_start || rawtime > ttl_end) { + rawtime = it_ttl->timestamp(); + if (rawtime < ttl_start || rawtime >= ttl_end) { continue; } - } else { - value = iter->value().ToString(); } if (max_keys > 0) { --max_keys; } if (is_db_ttl_ && num_buckets > 1) { - IncBucketCounts(bucket_counts.get(), ttl_start, time_range, bucket_size, + IncBucketCounts(bucket_counts, ttl_start, time_range, bucket_size, rawtime, num_buckets); } ++count; @@ -724,12 +714,13 @@ void DBDumperCommand::DoCommand() { fprintf(stdout, "%s ", ReadableTime(rawtime).c_str()); } string str = PrintKeyValue(iter->key().ToString(), - value, is_key_hex_, is_value_hex_); + iter->value().ToString(), is_key_hex_, + is_value_hex_); fprintf(stdout, "%s\n", str.c_str()); } } if (num_buckets > 1 && is_db_ttl_) { - PrintBucketCounts(bucket_counts.get(), ttl_start, ttl_end, bucket_size, + PrintBucketCounts(bucket_counts, ttl_start, ttl_end, bucket_size, num_buckets); } else { fprintf(stdout, "Keys in range: %lld\n", (long long) count); @@ -1189,8 +1180,8 @@ void ScanCommand::Help(string& ret) { ret.append(" [--" + ARG_TTL + "]"); ret.append(" [--" + ARG_TIMESTAMP + "]"); ret.append(" [--" + ARG_MAX_KEYS + "=q] "); - ret.append(" [--" + ARG_TTL_START + "=]"); - ret.append(" [--" + ARG_TTL_END + "=]"); + ret.append(" [--" + ARG_TTL_START + "=:- is inclusive]"); + ret.append(" [--" + ARG_TTL_END + "=:- is exclusive]"); ret.append("\n"); } @@ -1224,21 +1215,17 @@ void ScanCommand::DoCommand() { it->Valid() && (!end_key_specified_ || it->key().ToString() < end_key_); it->Next()) { string key = it->key().ToString(); - string value; if (is_db_ttl_) { TtlIterator* it_ttl = (TtlIterator*)it; - struct ValueAndTimestamp val_ts = it_ttl->ValueWithTimestamp(); - int rawtime = val_ts.timestamp; - value = val_ts.value.ToString(); - if (rawtime < ttl_start || rawtime > ttl_end) { + int rawtime = it_ttl->timestamp(); + if (rawtime < ttl_start || rawtime >= ttl_end) { continue; } if (timestamp_) { fprintf(stdout, "%s ", ReadableTime(rawtime).c_str()); } - } else { - value = it->value().ToString(); } + string value = it->value().ToString(); fprintf(stdout, "%s : %s\n", (is_key_hex_ ? StringToHex(key) : key).c_str(), (is_value_hex_ ? StringToHex(value) : value).c_str() diff --git a/utilities/ttl/db_ttl.cc b/utilities/ttl/db_ttl.cc index af99f80e4..d201208d3 100644 --- a/utilities/ttl/db_ttl.cc +++ b/utilities/ttl/db_ttl.cc @@ -84,8 +84,8 @@ Status DBWithTTL::AppendTS(const Slice& val, std::string& val_with_ts) { // Returns corruption if the length of the string is lesser than timestamp, or // timestamp refers to a time lesser than ttl-feature release time -Status DBWithTTL::SanityCheckTimestamp(const std::string& str) { - if (str.length() < (unsigned)kTSLength) { +Status DBWithTTL::SanityCheckTimestamp(const Slice& str) { + if (str.size() < (unsigned)kTSLength) { return Status::Corruption("Error: value's length less than timestamp's\n"); } // Checks that TS is not lesser than kMinTimestamp @@ -200,7 +200,7 @@ Status DBWithTTL::Write(const WriteOptions& opts, WriteBatch* updates) { } Iterator* DBWithTTL::NewIterator(const ReadOptions& opts) { - return new TtlIterator(db_->NewIterator(opts), kTSLength); + return new TtlIterator(db_->NewIterator(opts)); } const Snapshot* DBWithTTL::GetSnapshot() { diff --git a/utilities/ttl/db_ttl.h b/utilities/ttl/db_ttl.h index b7533cd49..2e01c1e3d 100644 --- a/utilities/ttl/db_ttl.h +++ b/utilities/ttl/db_ttl.h @@ -89,7 +89,7 @@ class DBWithTTL : public DB, CompactionFilter { static Status AppendTS(const Slice& val, std::string& val_with_ts); - static Status SanityCheckTimestamp(const std::string& str); + static Status SanityCheckTimestamp(const Slice& str); static Status StripTS(std::string* str); @@ -106,17 +106,11 @@ class DBWithTTL : public DB, CompactionFilter { int32_t ttl_; }; -struct ValueAndTimestamp { - Slice value; - int32_t timestamp; -}; - class TtlIterator : public Iterator { public: - TtlIterator(Iterator* iter, int32_t ts_len) - : iter_(iter), - ts_len_(ts_len) { + explicit TtlIterator(Iterator* iter) + : iter_(iter) { assert(iter_); } @@ -152,20 +146,16 @@ class TtlIterator : public Iterator { return iter_->key(); } - struct ValueAndTimestamp ValueWithTimestamp() const { - assert(DBWithTTL::SanityCheckTimestamp(iter_->value().ToString()).ok()); - struct ValueAndTimestamp val_ts; - val_ts.timestamp = DecodeFixed32( + int32_t timestamp() const { + return DecodeFixed32( iter_->value().data() + iter_->value().size() - DBWithTTL::kTSLength); - val_ts.value = iter_->value(); - val_ts.value.size_ -= ts_len_; - return val_ts; } Slice value() const { - assert(DBWithTTL::SanityCheckTimestamp(iter_->value().ToString()).ok()); + //TODO: handle timestamp corruption like in general iterator semantics + assert(DBWithTTL::SanityCheckTimestamp(iter_->value()).ok()); Slice trimmed_value = iter_->value(); - trimmed_value.size_ -= ts_len_; + trimmed_value.size_ -= DBWithTTL::kTSLength; return trimmed_value; } @@ -175,7 +165,6 @@ class TtlIterator : public Iterator { private: Iterator* iter_; - int32_t ts_len_; }; }