Implementation for GetPropertiesOfTablesInRange

Summary: In MyRocks, it is sometimes important to get propeties only for the subset of the database. This diff implements the API in RocksDB.

Test Plan: ran the GetPropertiesOfTablesInRange

Reviewers: rven, sdong

Reviewed By: sdong

Subscribers: dhruba

Differential Revision: https://reviews.facebook.net/D48651
main
Alexey Maykov 9 years ago
parent ad471453e8
commit e1a09a7703
  1. 1
      CMakeLists.txt
  2. 4
      Makefile
  3. 23
      db/db_impl.cc
  4. 4
      db/db_impl.h
  5. 217
      db/db_table_properties_test.cc
  6. 60
      db/db_test.cc
  7. 44
      db/version_set.cc
  8. 18
      db/version_set.h
  9. 3
      include/rocksdb/db.h
  10. 7
      include/rocksdb/utilities/stackable_db.h
  11. 1
      src.mk
  12. 19
      util/testutil.cc
  13. 4
      util/testutil.h

@ -294,6 +294,7 @@ set(TESTS
db/db_universal_compaction_test.cc
db/db_wal_test.cc
db/db_tailing_iter_test.cc
db/db_table_properties_test.cc
db/dbformat_test.cc
db/deletefile_test.cc
db/fault_injection_test.cc

@ -230,6 +230,7 @@ TESTS = \
db_tailing_iter_test \
db_universal_compaction_test \
db_wal_test \
db_table_properties_test \
block_hash_index_test \
autovector_test \
column_family_test \
@ -741,6 +742,9 @@ db_universal_compaction_test: db/db_universal_compaction_test.o db/db_test_util.
db_wal_test: db/db_wal_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)
db_table_properties_test: db/db_table_properties_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)
log_write_bench: util/log_write_bench.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK) $(pg)

@ -4161,6 +4161,29 @@ Status DBImpl::GetPropertiesOfAllTables(ColumnFamilyHandle* column_family,
return s;
}
Status DBImpl::GetPropertiesOfTablesInRange(ColumnFamilyHandle* column_family,
const Range* range, int n,
TablePropertiesCollection* props) {
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family);
auto cfd = cfh->cfd();
// Increment the ref count
mutex_.Lock();
auto version = cfd->current();
version->Ref();
mutex_.Unlock();
auto s = version->GetPropertiesOfTablesInRange(range, n, props);
// Decrement the ref count
mutex_.Lock();
version->Unref();
mutex_.Unlock();
return s;
}
#endif // ROCKSDB_LITE
const std::string& DBImpl::GetName() const {

@ -810,6 +810,10 @@ class DBImpl : public DB {
virtual Status GetPropertiesOfAllTables(ColumnFamilyHandle* column_family,
TablePropertiesCollection* props)
override;
virtual Status GetPropertiesOfTablesInRange(
ColumnFamilyHandle* column_family, const Range* range, int n,
TablePropertiesCollection* props) override;
#endif // ROCKSDB_LITE
// Function that Get and KeyMayExist call with no_io true or false

@ -0,0 +1,217 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <unordered_set>
#include <vector>
#include "db/db_test_util.h"
#include "port/stack_trace.h"
#include "rocksdb/db.h"
#include "util/testharness.h"
#include "util/testutil.h"
#ifndef ROCKSDB_LITE
namespace rocksdb {
// A helper function that ensures the table properties returned in
// `GetPropertiesOfAllTablesTest` is correct.
// This test assumes entries size is different for each of the tables.
namespace {
void VerifyTableProperties(DB* db, uint64_t expected_entries_size) {
TablePropertiesCollection props;
ASSERT_OK(db->GetPropertiesOfAllTables(&props));
ASSERT_EQ(4U, props.size());
std::unordered_set<uint64_t> unique_entries;
// Indirect test
uint64_t sum = 0;
for (const auto& item : props) {
unique_entries.insert(item.second->num_entries);
sum += item.second->num_entries;
}
ASSERT_EQ(props.size(), unique_entries.size());
ASSERT_EQ(expected_entries_size, sum);
}
} // namespace
class DBTablePropertiesTest : public DBTestBase {
public:
DBTablePropertiesTest() : DBTestBase("/db_table_properties_test") {}
TablePropertiesCollection TestGetPropertiesOfTablesInRange(
std::vector<Range> ranges, std::size_t* num_properties = nullptr,
std::size_t* num_files = nullptr);
};
TEST_F(DBTablePropertiesTest, GetPropertiesOfAllTablesTest) {
Options options = CurrentOptions();
options.level0_file_num_compaction_trigger = 8;
Reopen(options);
// Create 4 tables
for (int table = 0; table < 4; ++table) {
for (int i = 0; i < 10 + table; ++i) {
db_->Put(WriteOptions(), ToString(table * 100 + i), "val");
}
db_->Flush(FlushOptions());
}
// 1. Read table properties directly from file
Reopen(options);
VerifyTableProperties(db_, 10 + 11 + 12 + 13);
// 2. Put two tables to table cache and
Reopen(options);
// fetch key from 1st and 2nd table, which will internally place that table to
// the table cache.
for (int i = 0; i < 2; ++i) {
Get(ToString(i * 100 + 0));
}
VerifyTableProperties(db_, 10 + 11 + 12 + 13);
// 3. Put all tables to table cache
Reopen(options);
// fetch key from 1st and 2nd table, which will internally place that table to
// the table cache.
for (int i = 0; i < 4; ++i) {
Get(ToString(i * 100 + 0));
}
VerifyTableProperties(db_, 10 + 11 + 12 + 13);
}
TablePropertiesCollection
DBTablePropertiesTest::TestGetPropertiesOfTablesInRange(
std::vector<Range> ranges, std::size_t* num_properties,
std::size_t* num_files) {
// run the query
TablePropertiesCollection props;
EXPECT_OK(db_->GetPropertiesOfTablesInRange(
db_->DefaultColumnFamily(), &ranges[0], ranges.size(), &props));
// Make sure that we've received properties for those and for those files
// only which fall within requested ranges
std::vector<LiveFileMetaData> vmd;
db_->GetLiveFilesMetaData(&vmd);
for (auto md : vmd) {
std::string fn = md.db_path + md.name;
bool in_range = false;
for (auto r : ranges) {
// smallestkey < limit && largestkey >= start
if (r.limit.compare(md.smallestkey) >= 0 &&
r.start.compare(md.largestkey) <= 0) {
in_range = true;
EXPECT_GT(props.count(fn), 0);
}
}
if (!in_range) {
EXPECT_EQ(props.count(fn), 0);
}
}
if (num_properties) {
*num_properties = props.size();
}
if (num_files) {
*num_files = vmd.size();
}
return props;
}
TEST_F(DBTablePropertiesTest, GetPropertiesOfTablesInRange) {
// Fixed random sead
Random rnd(301);
Options options;
options.create_if_missing = true;
options.write_buffer_size = 4096;
options.max_write_buffer_number = 8;
options.level0_file_num_compaction_trigger = 2;
options.level0_slowdown_writes_trigger = 2;
options.level0_stop_writes_trigger = 4;
options.target_file_size_base = 2048;
options.max_bytes_for_level_base = 10240;
options.max_bytes_for_level_multiplier = 4;
options.soft_rate_limit = 1.1;
options.num_levels = 8;
DestroyAndReopen(options);
// build a decent LSM
for (int i = 0; i < 10000; i++) {
ASSERT_OK(Put(test::RandomKey(&rnd, 5), RandomString(&rnd, 102)));
}
Flush();
db_->PauseBackgroundWork();
// Ensure that we have at least L0, L1 and L2
ASSERT_GT(NumTableFilesAtLevel(0), 0);
ASSERT_GT(NumTableFilesAtLevel(1), 0);
ASSERT_GT(NumTableFilesAtLevel(2), 0);
// Query the largest range
std::size_t num_properties, num_files;
TestGetPropertiesOfTablesInRange(
{Range(test::RandomKey(&rnd, 5, test::RandomKeyType::SMALLEST),
test::RandomKey(&rnd, 5, test::RandomKeyType::LARGEST))},
&num_properties, &num_files);
ASSERT_EQ(num_properties, num_files);
// Query the empty range
TestGetPropertiesOfTablesInRange(
{Range(test::RandomKey(&rnd, 5, test::RandomKeyType::LARGEST),
test::RandomKey(&rnd, 5, test::RandomKeyType::SMALLEST))},
&num_properties, &num_files);
ASSERT_GT(num_files, 0);
ASSERT_EQ(num_properties, 0);
// Query the middle rangee
TestGetPropertiesOfTablesInRange(
{Range(test::RandomKey(&rnd, 5, test::RandomKeyType::MIDDLE),
test::RandomKey(&rnd, 5, test::RandomKeyType::LARGEST))},
&num_properties, &num_files);
ASSERT_GT(num_files, 0);
ASSERT_GT(num_files, num_properties);
ASSERT_GT(num_properties, 0);
// Query a bunch of random ranges
for (int j = 0; j < 100; j++) {
// create a bunch of ranges
std::vector<std::string> random_keys;
auto n = 2 * rnd.Uniform(50);
for (uint i = 0; i < n; ++i) {
random_keys.push_back(test::RandomKey(&rnd, 5));
}
std::vector<Range> ranges;
auto it = random_keys.begin();
while (it != random_keys.end()) {
ranges.push_back(Range(*it, *(it + 1)));
it += 2;
}
TestGetPropertiesOfTablesInRange(std::move(ranges));
}
}
} // namespace rocksdb
#endif // ROCKSDB_LITE
int main(int argc, char** argv) {
#if !(defined NDEBUG) || !defined(OS_WIN)
rocksdb::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
#else
return 0;
#endif
}

@ -84,24 +84,6 @@ static long TestGetTickerCount(const Options& options, Tickers ticker_type) {
// This test assumes entries size is different for each of the tables.
namespace {
void VerifyTableProperties(DB* db, uint64_t expected_entries_size) {
TablePropertiesCollection props;
ASSERT_OK(db->GetPropertiesOfAllTables(&props));
ASSERT_EQ(4U, props.size());
std::unordered_set<uint64_t> unique_entries;
// Indirect test
uint64_t sum = 0;
for (const auto& item : props) {
unique_entries.insert(item.second->num_entries);
sum += item.second->num_entries;
}
ASSERT_EQ(props.size(), unique_entries.size());
ASSERT_EQ(expected_entries_size, sum);
}
uint64_t GetNumberOfSstFilesForColumnFamily(DB* db,
std::string column_family_name) {
std::vector<LiveFileMetaData> metadata;
@ -440,42 +422,6 @@ TEST_F(DBTest, ParanoidFileChecks) {
TestGetTickerCount(options, BLOCK_CACHE_ADD));
}
TEST_F(DBTest, GetPropertiesOfAllTablesTest) {
Options options = CurrentOptions();
options.level0_file_num_compaction_trigger = 8;
Reopen(options);
// Create 4 tables
for (int table = 0; table < 4; ++table) {
for (int i = 0; i < 10 + table; ++i) {
db_->Put(WriteOptions(), ToString(table * 100 + i), "val");
}
db_->Flush(FlushOptions());
}
// 1. Read table properties directly from file
Reopen(options);
VerifyTableProperties(db_, 10 + 11 + 12 + 13);
// 2. Put two tables to table cache and
Reopen(options);
// fetch key from 1st and 2nd table, which will internally place that table to
// the table cache.
for (int i = 0; i < 2; ++i) {
Get(ToString(i * 100 + 0));
}
VerifyTableProperties(db_, 10 + 11 + 12 + 13);
// 3. Put all tables to table cache
Reopen(options);
// fetch key from 1st and 2nd table, which will internally place that table to
// the table cache.
for (int i = 0; i < 4; ++i) {
Get(ToString(i * 100 + 0));
}
VerifyTableProperties(db_, 10 + 11 + 12 + 13);
}
namespace {
void ResetTableProperties(TableProperties* tp) {
tp->data_size = 0;
@ -5692,6 +5638,12 @@ class ModelDB: public DB {
TablePropertiesCollection* props) override {
return Status();
}
virtual Status GetPropertiesOfTablesInRange(
ColumnFamilyHandle* column_family, const Range* range, int n,
TablePropertiesCollection* props) override {
return Status();
}
#endif // ROCKSDB_LITE
using DB::KeyMayExist;

@ -542,7 +542,7 @@ class BaseReferencedVersionBuilder {
Status Version::GetTableProperties(std::shared_ptr<const TableProperties>* tp,
const FileMetaData* file_meta,
const std::string* fname) {
const std::string* fname) const {
auto table_cache = cfd_->table_cache();
auto ioptions = cfd_->ioptions();
Status s = table_cache->GetTableProperties(
@ -624,6 +624,38 @@ Status Version::GetPropertiesOfAllTables(TablePropertiesCollection* props,
return Status::OK();
}
Status Version::GetPropertiesOfTablesInRange(
const Range* range, int n, TablePropertiesCollection* props) const {
for (int level = 0; level < storage_info_.num_non_empty_levels(); level++) {
for (int i = 0; i < n; i++) {
// Convert user_key into a corresponding internal key.
InternalKey k1(range[i].start, kMaxSequenceNumber, kValueTypeForSeek);
InternalKey k2(range[i].limit, kMaxSequenceNumber, kValueTypeForSeek);
std::vector<FileMetaData*> files;
storage_info_.GetOverlappingInputs(level, &k1, &k2, &files, -1, nullptr,
false);
for (const auto& file_meta : files) {
auto fname =
TableFileName(vset_->db_options_->db_paths,
file_meta->fd.GetNumber(), file_meta->fd.GetPathId());
if (props->count(fname) == 0) {
// 1. If the table is already present in table cache, load table
// properties from there.
std::shared_ptr<const TableProperties> table_properties;
Status s = GetTableProperties(&table_properties, file_meta, &fname);
if (s.ok()) {
props->insert({fname, table_properties});
} else {
return s;
}
}
}
}
}
return Status::OK();
}
Status Version::GetAggregatedTableProperties(
std::shared_ptr<const TableProperties>* tp, int level) {
TablePropertiesCollection props;
@ -1406,7 +1438,8 @@ bool VersionStorageInfo::OverlapInLevel(int level,
// The file_index returns a pointer to any file in an overlapping range.
void VersionStorageInfo::GetOverlappingInputs(
int level, const InternalKey* begin, const InternalKey* end,
std::vector<FileMetaData*>* inputs, int hint_index, int* file_index) {
std::vector<FileMetaData*>* inputs, int hint_index, int* file_index,
bool expand_range) const {
if (level >= num_non_empty_levels_) {
// this level is empty, no overlapping inputs
return;
@ -1439,7 +1472,7 @@ void VersionStorageInfo::GetOverlappingInputs(
// "f" is completely after specified range; skip it
} else {
inputs->push_back(files_[level][i-1]);
if (level == 0) {
if (level == 0 && expand_range) {
// Level-0 files may overlap each other. So check if the newly
// added file has expanded the range. If so, restart search.
if (begin != nullptr && user_cmp->Compare(file_start, user_begin) < 0) {
@ -1465,7 +1498,7 @@ void VersionStorageInfo::GetOverlappingInputs(
// forwards to find all overlapping files.
void VersionStorageInfo::GetOverlappingInputsBinarySearch(
int level, const Slice& user_begin, const Slice& user_end,
std::vector<FileMetaData*>* inputs, int hint_index, int* file_index) {
std::vector<FileMetaData*>* inputs, int hint_index, int* file_index) const {
assert(level > 0);
int min = 0;
int mid = 0;
@ -1513,8 +1546,7 @@ void VersionStorageInfo::GetOverlappingInputsBinarySearch(
// Use FileLevel in searching, make it faster
void VersionStorageInfo::ExtendOverlappingInputs(
int level, const Slice& user_begin, const Slice& user_end,
std::vector<FileMetaData*>* inputs, unsigned int midIndex) {
std::vector<FileMetaData*>* inputs, unsigned int midIndex) const {
const Comparator* user_cmp = user_comparator_;
const FdWithKeyRange* files = level_files_brief_[level].files;
#ifndef NDEBUG

@ -159,23 +159,26 @@ class VersionStorageInfo {
int level, const InternalKey* begin, // nullptr means before all keys
const InternalKey* end, // nullptr means after all keys
std::vector<FileMetaData*>* inputs,
int hint_index = -1, // index of overlap file
int* file_index = nullptr); // return index of overlap file
int hint_index = -1, // index of overlap file
int* file_index = nullptr, // return index of overlap file
bool expand_range = true) // if set, returns files which overlap the
const; // range and overlap each other. If false,
// then just files intersecting the range
void GetOverlappingInputsBinarySearch(
int level,
const Slice& begin, // nullptr means before all keys
const Slice& end, // nullptr means after all keys
std::vector<FileMetaData*>* inputs,
int hint_index, // index of overlap file
int* file_index); // return index of overlap file
int hint_index, // index of overlap file
int* file_index) const; // return index of overlap file
void ExtendOverlappingInputs(
int level,
const Slice& begin, // nullptr means before all keys
const Slice& end, // nullptr means after all keys
std::vector<FileMetaData*>* inputs,
unsigned int index); // start extending from this index
unsigned int index) const; // start extending from this index
// Returns true iff some file in the specified level overlaps
// some part of [*smallest_user_key,*largest_user_key].
@ -456,15 +459,16 @@ class Version {
// file-name conversion.
Status GetTableProperties(std::shared_ptr<const TableProperties>* tp,
const FileMetaData* file_meta,
const std::string* fname = nullptr);
const std::string* fname = nullptr) const;
// REQUIRES: lock is held
// On success, *props will be populated with all SSTables' table properties.
// The keys of `props` are the sst file name, the values of `props` are the
// tables' propertis, represented as shared_ptr.
Status GetPropertiesOfAllTables(TablePropertiesCollection* props);
Status GetPropertiesOfAllTables(TablePropertiesCollection* props, int level);
Status GetPropertiesOfTablesInRange(const Range* range, int n,
TablePropertiesCollection* props) const;
// REQUIRES: lock is held
// On success, "tp" will contains the aggregated table property amoug

@ -716,6 +716,9 @@ class DB {
virtual Status GetPropertiesOfAllTables(TablePropertiesCollection* props) {
return GetPropertiesOfAllTables(DefaultColumnFamily(), props);
}
virtual Status GetPropertiesOfTablesInRange(
ColumnFamilyHandle* column_family, const Range* range, int n,
TablePropertiesCollection* props) = 0;
#endif // ROCKSDB_LITE
// Needed for StackableDB

@ -279,6 +279,13 @@ class StackableDB : public DB {
return db_->GetPropertiesOfAllTables(column_family, props);
}
using DB::GetPropertiesOfTablesInRange;
virtual Status GetPropertiesOfTablesInRange(
ColumnFamilyHandle* column_family, const Range* range, int n,
TablePropertiesCollection* props) override {
return db_->GetPropertiesOfTablesInRange(column_family, range, n, props);
}
virtual Status GetUpdatesSince(
SequenceNumber seq_number, unique_ptr<TransactionLogIterator>* iter,
const TransactionLogIterator::ReadOptions& read_options) override {

@ -190,6 +190,7 @@ TEST_BENCH_SOURCES = \
db/db_universal_compaction_test.cc \
db/db_tailing_iter_test.cc \
db/db_wal_test.cc \
db/db_table_properties_test.cc \
db/deletefile_test.cc \
db/fault_injection_test.cc \
db/file_indexer_test.cc \

@ -33,7 +33,7 @@ extern std::string RandomHumanReadableString(Random* rnd, int len) {
return ret;
}
std::string RandomKey(Random* rnd, int len) {
std::string RandomKey(Random* rnd, int len, RandomKeyType type) {
// Make sure to generate a wide variety of characters so we
// test the boundary conditions for short-key optimizations.
static const char kTestChars[] = {
@ -41,7 +41,22 @@ std::string RandomKey(Random* rnd, int len) {
};
std::string result;
for (int i = 0; i < len; i++) {
result += kTestChars[rnd->Uniform(sizeof(kTestChars))];
std::size_t indx = 0;
switch (type) {
case RandomKeyType::RANDOM:
indx = rnd->Uniform(sizeof(kTestChars));
break;
case RandomKeyType::LARGEST:
indx = sizeof(kTestChars) - 1;
break;
case RandomKeyType::MIDDLE:
indx = sizeof(kTestChars) / 2;
break;
case RandomKeyType::SMALLEST:
indx = 0;
break;
}
result += kTestChars[indx];
}
return result;
}

@ -35,7 +35,9 @@ extern std::string RandomHumanReadableString(Random* rnd, int len);
// Return a random key with the specified length that may contain interesting
// characters (e.g. \x00, \xff, etc.).
extern std::string RandomKey(Random* rnd, int len);
enum RandomKeyType : char { RANDOM, LARGEST, SMALLEST, MIDDLE };
extern std::string RandomKey(Random* rnd, int len,
RandomKeyType type = RandomKeyType::RANDOM);
// Store in *dst a string of length "len" that will compress to
// "N*compressed_fraction" bytes and return a Slice that references

Loading…
Cancel
Save