Add PlainTableOptions

Summary:
Since we have a lot of options for PlainTable, add a struct PlainTableOptions
to manage them

Test Plan: make all check

Reviewers: sdong

Reviewed By: sdong

Subscribers: leveldb

Differential Revision: https://reviews.facebook.net/D20175
main
Stanislau Hlebik 11 years ago
parent 052ddbe0e2
commit 92d73cbe78
  1. 1
      HISTORY.md
  2. 10
      db/c.cc
  3. 9
      db/db_bench.cc
  4. 150
      db/plain_table_db_test.cc
  5. 21
      db/table_properties_collector_test.cc
  6. 38
      include/rocksdb/table.h
  7. 9
      table/plain_table_factory.cc
  8. 23
      table/plain_table_factory.h
  9. 3
      table/plain_table_reader.cc
  10. 9
      table/table_reader_bench.cc
  11. 17
      table/table_test.cc
  12. 13
      tools/sst_dump.cc

@ -6,6 +6,7 @@
### Public API changes
* DBOptions.db_paths now is a vector of a DBPath structure which indicates both of path and target size
* NewPlainTableFactory instead of bunch of parameters now accepts PlainTableOptions, which is defined in include/rocksdb/table.h
## 3.3.0 (7/10/2014)

@ -1428,9 +1428,13 @@ void rocksdb_options_set_plain_table_factory(
double hash_table_ratio, size_t index_sparseness) {
static rocksdb::TableFactory* factory = 0;
if (!factory) {
factory = rocksdb::NewPlainTableFactory(
user_key_len, bloom_bits_per_key,
hash_table_ratio, index_sparseness);
rocksdb::PlainTableOptions options;
options.user_key_len = user_key_len;
options.bloom_bits_per_key = bloom_bits_per_key;
options.hash_table_ratio = hash_table_ratio;
options.index_sparseness = index_sparseness;
factory = rocksdb::NewPlainTableFactory(options);
}
opt->rep.table_factory.reset(factory);
}

@ -1688,8 +1688,13 @@ class Benchmark {
if (bloom_bits_per_key < 0) {
bloom_bits_per_key = 0;
}
options.table_factory.reset(
NewPlainTableFactory(FLAGS_key_size, bloom_bits_per_key, 0.75));
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = FLAGS_key_size;
plain_table_options.bloom_bits_per_key = bloom_bits_per_key;
plain_table_options.hash_table_ratio = 0.75;
options.table_factory = std::shared_ptr<TableFactory>(
NewPlainTableFactory(plain_table_options));
} else {
BlockBasedTableOptions block_based_options;
if (FLAGS_use_hash_search) {

@ -61,7 +61,18 @@ class PlainTableDBTest {
// Return the current option configuration.
Options CurrentOptions() {
Options options;
options.table_factory.reset(NewPlainTableFactory(0, 2, 0.8, 3, 0, kPrefix));
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 0;
plain_table_options.bloom_bits_per_key = 2;
plain_table_options.hash_table_ratio = 0.8;
plain_table_options.index_sparseness = 3;
plain_table_options.huge_page_tlb_size = 0;
plain_table_options.encoding_type = kPrefix;
plain_table_options.full_scan_mode = false;
options.table_factory.reset(NewPlainTableFactory(plain_table_options));
options.memtable_factory.reset(NewHashLinkListRepFactory(4, 0, 3, true, 3));
options.prefix_extractor.reset(NewFixedPrefixTransform(8));
options.allow_mmap_reads = true;
@ -212,16 +223,11 @@ extern const uint64_t kPlainTableMagicNumber;
class TestPlainTableFactory : public PlainTableFactory {
public:
explicit TestPlainTableFactory(bool* expect_bloom_not_match,
uint32_t user_key_len, int bloom_bits_per_key,
double hash_table_ratio,
size_t index_sparseness,
size_t huge_page_tlb_size,
EncodingType encoding_type)
: PlainTableFactory(user_key_len, bloom_bits_per_key, hash_table_ratio,
index_sparseness, huge_page_tlb_size, encoding_type),
bloom_bits_per_key_(bloom_bits_per_key),
hash_table_ratio_(hash_table_ratio),
index_sparseness_(index_sparseness),
const PlainTableOptions& options)
: PlainTableFactory(options),
bloom_bits_per_key_(options.bloom_bits_per_key),
hash_table_ratio_(options.hash_table_ratio),
index_sparseness_(options.index_sparseness),
expect_bloom_not_match_(expect_bloom_not_match) {}
Status NewTableReader(const Options& options, const EnvOptions& soptions,
@ -268,11 +274,30 @@ TEST(PlainTableDBTest, Flush) {
// Test index interval for the same prefix to be 1, 2 and 4
if (total_order) {
options.prefix_extractor.reset();
options.table_factory.reset(NewPlainTableFactory(
0, bloom_bits, 0, 2, huge_page_tlb_size, encoding_type));
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 0;
plain_table_options.bloom_bits_per_key = bloom_bits;
plain_table_options.hash_table_ratio = 0;
plain_table_options.index_sparseness = 2;
plain_table_options.huge_page_tlb_size = huge_page_tlb_size;
plain_table_options.encoding_type = encoding_type;
plain_table_options.full_scan_mode = false;
options.table_factory.reset(
NewPlainTableFactory(plain_table_options));
} else {
options.table_factory.reset(NewPlainTableFactory(
0, bloom_bits, 0.75, 16, huge_page_tlb_size, encoding_type));
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 0;
plain_table_options.bloom_bits_per_key = bloom_bits;
plain_table_options.hash_table_ratio = 0.75;
plain_table_options.index_sparseness = 16;
plain_table_options.huge_page_tlb_size = huge_page_tlb_size;
plain_table_options.encoding_type = encoding_type;
plain_table_options.full_scan_mode = false;
options.table_factory.reset(
NewPlainTableFactory(plain_table_options));
}
DestroyAndReopen(&options);
@ -315,13 +340,27 @@ TEST(PlainTableDBTest, Flush2) {
// Test index interval for the same prefix to be 1, 2 and 4
if (total_order) {
options.prefix_extractor = nullptr;
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 0;
plain_table_options.bloom_bits_per_key = bloom_bits;
plain_table_options.hash_table_ratio = 0;
plain_table_options.index_sparseness = 2;
plain_table_options.huge_page_tlb_size = huge_page_tlb_size;
plain_table_options.encoding_type = encoding_type;
options.table_factory.reset(new TestPlainTableFactory(
&expect_bloom_not_match, 0, bloom_bits, 0, 2, huge_page_tlb_size,
encoding_type));
&expect_bloom_not_match, plain_table_options));
} else {
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 0;
plain_table_options.bloom_bits_per_key = bloom_bits;
plain_table_options.hash_table_ratio = 0.75;
plain_table_options.index_sparseness = 16;
plain_table_options.huge_page_tlb_size = huge_page_tlb_size;
plain_table_options.encoding_type = encoding_type;
options.table_factory.reset(new TestPlainTableFactory(
&expect_bloom_not_match, 0, bloom_bits, 0.75, 16,
huge_page_tlb_size, encoding_type));
&expect_bloom_not_match, plain_table_options));
}
DestroyAndReopen(&options);
ASSERT_OK(Put("0000000000000bar", "b"));
@ -380,13 +419,28 @@ TEST(PlainTableDBTest, Iterator) {
// Test index interval for the same prefix to be 1, 2 and 4
if (total_order) {
options.prefix_extractor = nullptr;
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 16;
plain_table_options.bloom_bits_per_key = bloom_bits;
plain_table_options.hash_table_ratio = 0;
plain_table_options.index_sparseness = 2;
plain_table_options.huge_page_tlb_size = huge_page_tlb_size;
plain_table_options.encoding_type = encoding_type;
options.table_factory.reset(new TestPlainTableFactory(
&expect_bloom_not_match, 16, bloom_bits, 0, 2, huge_page_tlb_size,
encoding_type));
&expect_bloom_not_match, plain_table_options));
} else {
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 16;
plain_table_options.bloom_bits_per_key = bloom_bits;
plain_table_options.hash_table_ratio = 0.75;
plain_table_options.index_sparseness = 16;
plain_table_options.huge_page_tlb_size = huge_page_tlb_size;
plain_table_options.encoding_type = encoding_type;
options.table_factory.reset(new TestPlainTableFactory(
&expect_bloom_not_match, 16, bloom_bits, 0.75, 16,
huge_page_tlb_size, encoding_type));
&expect_bloom_not_match, plain_table_options));
}
DestroyAndReopen(&options);
@ -485,7 +539,13 @@ std::string MakeLongKey(size_t length, char c) {
TEST(PlainTableDBTest, IteratorLargeKeys) {
Options options = CurrentOptions();
options.table_factory.reset(NewPlainTableFactory(0, 0, 0));
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 0;
plain_table_options.bloom_bits_per_key = 0;
plain_table_options.hash_table_ratio = 0;
options.table_factory.reset(NewPlainTableFactory(plain_table_options));
options.create_if_missing = true;
options.prefix_extractor.reset();
DestroyAndReopen(&options);
@ -529,7 +589,16 @@ std::string MakeLongKeyWithPrefix(size_t length, char c) {
TEST(PlainTableDBTest, IteratorLargeKeysWithPrefix) {
Options options = CurrentOptions();
options.table_factory.reset(NewPlainTableFactory(16, 0, 0.8, 3, 0, kPrefix));
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 16;
plain_table_options.bloom_bits_per_key = 0;
plain_table_options.hash_table_ratio = 0.8;
plain_table_options.index_sparseness = 3;
plain_table_options.huge_page_tlb_size = 0;
plain_table_options.encoding_type = kPrefix;
options.table_factory.reset(NewPlainTableFactory(plain_table_options));
options.create_if_missing = true;
DestroyAndReopen(&options);
@ -665,8 +734,16 @@ TEST(PlainTableDBTest, HashBucketConflict) {
options.create_if_missing = true;
// Set only one bucket to force bucket conflict.
// Test index interval for the same prefix to be 1, 2 and 4
options.table_factory.reset(
NewPlainTableFactory(16, 0, 0, 2 ^ i, huge_page_tlb_size));
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 16;
plain_table_options.bloom_bits_per_key = 0;
plain_table_options.hash_table_ratio = 0;
plain_table_options.index_sparseness = 2 ^ i;
plain_table_options.huge_page_tlb_size = huge_page_tlb_size;
options.table_factory.reset(NewPlainTableFactory(plain_table_options));
DestroyAndReopen(&options);
ASSERT_OK(Put("5000000000000fo0", "v1"));
ASSERT_OK(Put("5000000000000fo1", "v2"));
@ -752,8 +829,15 @@ TEST(PlainTableDBTest, HashBucketConflictReverseSuffixComparator) {
options.comparator = &comp;
// Set only one bucket to force bucket conflict.
// Test index interval for the same prefix to be 1, 2 and 4
options.table_factory.reset(
NewPlainTableFactory(16, 0, 0, 2 ^ i, huge_page_tlb_size));
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 16;
plain_table_options.bloom_bits_per_key = 0;
plain_table_options.hash_table_ratio = 0;
plain_table_options.index_sparseness = 2 ^ i;
plain_table_options.huge_page_tlb_size = huge_page_tlb_size;
options.table_factory.reset(NewPlainTableFactory(plain_table_options));
DestroyAndReopen(&options);
ASSERT_OK(Put("5000000000000fo0", "v1"));
ASSERT_OK(Put("5000000000000fo1", "v2"));
@ -833,7 +917,13 @@ TEST(PlainTableDBTest, NonExistingKeyToNonEmptyBucket) {
options.create_if_missing = true;
// Set only one bucket to force bucket conflict.
// Test index interval for the same prefix to be 1, 2 and 4
options.table_factory.reset(NewPlainTableFactory(16, 0, 0, 5));
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 16;
plain_table_options.bloom_bits_per_key = 0;
plain_table_options.hash_table_ratio = 0;
plain_table_options.index_sparseness = 5;
options.table_factory.reset(NewPlainTableFactory(plain_table_options));
DestroyAndReopen(&options);
ASSERT_OK(Put("5000000000000fo0", "v1"));
ASSERT_OK(Put("5000000000000fo1", "v2"));

@ -213,7 +213,14 @@ TEST(TablePropertiesTest, CustomizedTablePropertiesCollector) {
Options options;
options.table_properties_collector_factories.emplace_back(
new RegularKeysStartWithAFactory());
options.table_factory = std::make_shared<PlainTableFactory>(8, 8, 0);
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 8;
plain_table_options.bloom_bits_per_key = 8;
plain_table_options.hash_table_ratio = 0;
options.table_factory =
std::make_shared<PlainTableFactory>(plain_table_options);
test::PlainInternalKeyComparator ikc(options.comparator);
TestCustomizedTablePropertiesCollector(kPlainTableMagicNumber, true, options,
ikc);
@ -299,11 +306,15 @@ TEST(TablePropertiesTest, InternalKeyPropertiesCollector) {
true /* not sanitize */,
std::make_shared<BlockBasedTableFactory>()
);
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 8;
plain_table_options.bloom_bits_per_key = 8;
plain_table_options.hash_table_ratio = 0;
TestInternalKeyPropertiesCollector(
kPlainTableMagicNumber,
false /* not sanitize */,
std::make_shared<PlainTableFactory>(8, 8, 0)
);
kPlainTableMagicNumber, false /* not sanitize */,
std::make_shared<PlainTableFactory>(plain_table_options));
}
} // namespace rocksdb

@ -121,31 +121,39 @@ struct PlainTablePropertyNames {
static const std::string kEncodingType;
};
// -- Plain Table with prefix-only seek
// For this factory, you need to set Options.prefix_extrator properly to make it
// work. Look-up will starts with prefix hash lookup for key prefix. Inside the
// hash bucket found, a binary search is executed for hash conflicts. Finally,
// a linear search is used.
const uint32_t kPlainTableVariableLength = 0;
struct PlainTableOptions {
// @user_key_len: plain table has optimization for fix-sized keys, which can be
// specified via user_key_len. Alternatively, you can pass
// `kPlainTableVariableLength` if your keys have variable
// lengths.
uint32_t user_key_len = kPlainTableVariableLength;
// @bloom_bits_per_key: the number of bits used for bloom filer per prefix. You
// may disable it by passing a zero.
int bloom_bits_per_key = 10;
// @hash_table_ratio: the desired utilization of the hash table used for prefix
// hashing. hash_table_ratio = number of prefixes / #buckets
// in the hash table
double hash_table_ratio = 0.75;
// @index_sparseness: inside each prefix, need to build one index record for how
// many keys for binary search inside each hash bucket.
// For encoding type kPrefix, the value will be used when
// writing to determine an interval to rewrite the full key.
// It will also be used as a suggestion and satisfied when
// possible.
size_t index_sparseness = 16;
// @huge_page_tlb_size: if <=0, allocate hash indexes and blooms from malloc.
// Otherwise from huge page TLB. The user needs to reserve
// huge pages for it to be allocated, like:
// sysctl -w vm.nr_hugepages=20
// See linux doc Documentation/vm/hugetlbpage.txt
size_t huge_page_tlb_size = 0;
// @encoding_type: how to encode the keys. See enum EncodingType above for
// the choices. The value will determine how to encode keys
// when writing to a new SST file. This value will be stored
@ -153,13 +161,21 @@ struct PlainTablePropertyNames {
// file, which makes it possible for users to choose different
// encoding type when reopening a DB. Files with different
// encoding types can co-exist in the same DB and can be read.
EncodingType encoding_type = kPlain;
const uint32_t kPlainTableVariableLength = 0;
extern TableFactory* NewPlainTableFactory(
uint32_t user_key_len = kPlainTableVariableLength,
int bloom_bits_per_prefix = 10, double hash_table_ratio = 0.75,
size_t index_sparseness = 16, size_t huge_page_tlb_size = 0,
EncodingType encoding_type = kPlain, bool full_scan_mode = false);
// @full_scan_mode: mode for reading the whole file one record by one without
// using the index.
bool full_scan_mode = false;
};
// -- Plain Table with prefix-only seek
// For this factory, you need to set Options.prefix_extrator properly to make it
// work. Look-up will starts with prefix hash lookup for key prefix. Inside the
// hash bucket found, a binary search is executed for hash conflicts. Finally,
// a linear search is used.
extern TableFactory* NewPlainTableFactory(const PlainTableOptions& options =
PlainTableOptions());
#endif // ROCKSDB_LITE

@ -33,13 +33,8 @@ TableBuilder* PlainTableFactory::NewTableBuilder(
index_sparseness_);
}
extern TableFactory* NewPlainTableFactory(
uint32_t user_key_len, int bloom_bits_per_key, double hash_table_ratio,
size_t index_sparseness, size_t huge_page_tlb_size,
EncodingType encoding_type, bool full_scan_mode) {
return new PlainTableFactory(
user_key_len, bloom_bits_per_key, hash_table_ratio, index_sparseness,
huge_page_tlb_size, encoding_type, full_scan_mode);
extern TableFactory* NewPlainTableFactory(const PlainTableOptions& options) {
return new PlainTableFactory(options);
}
const std::string PlainTablePropertyNames::kPrefixExtractorName =

@ -143,20 +143,15 @@ class PlainTableFactory : public TableFactory {
// huge_page_tlb_size determines whether to allocate hash indexes from huge
// page TLB and the page size if allocating from there. See comments of
// Arena::AllocateAligned() for details.
explicit PlainTableFactory(uint32_t user_key_len = kPlainTableVariableLength,
int bloom_bits_per_key = 0,
double hash_table_ratio = 0.75,
size_t index_sparseness = 16,
size_t huge_page_tlb_size = 0,
EncodingType encoding_type = kPlain,
bool full_scan_mode = false)
: user_key_len_(user_key_len),
bloom_bits_per_key_(bloom_bits_per_key),
hash_table_ratio_(hash_table_ratio),
index_sparseness_(index_sparseness),
huge_page_tlb_size_(huge_page_tlb_size),
encoding_type_(encoding_type),
full_scan_mode_(full_scan_mode) {}
explicit PlainTableFactory(const PlainTableOptions& options =
PlainTableOptions())
: user_key_len_(options.user_key_len),
bloom_bits_per_key_(options.bloom_bits_per_key),
hash_table_ratio_(options.hash_table_ratio),
index_sparseness_(options.index_sparseness),
huge_page_tlb_size_(options.huge_page_tlb_size),
encoding_type_(options.encoding_type),
full_scan_mode_(options.full_scan_mode) {}
const char* Name() const override { return "PlainTable"; }
Status NewTableReader(const Options& options, const EnvOptions& soptions,
const InternalKeyComparator& internal_comparator,

@ -445,9 +445,10 @@ Status PlainTableReader::PopulateIndex(TableProperties* props,
size_t huge_page_tlb_size) {
assert(props != nullptr);
table_properties_.reset(props);
// options.prefix_extractor is requried for a hash-based look-up.
if ((options_.prefix_extractor.get() == nullptr) && (hash_table_ratio != 0)) {
// options.prefix_extractor is requried for a hash-based look-up.
if (options_.prefix_extractor.get() == nullptr && hash_table_ratio != 0) {
return Status::NotSupported(
"PlainTable requires a prefix extractor enable prefix hash mode.");
}

@ -261,8 +261,13 @@ int main(int argc, char** argv) {
if (FLAGS_plain_table) {
options.allow_mmap_reads = true;
env_options.use_mmap_reads = true;
tf = new rocksdb::PlainTableFactory(16, (FLAGS_prefix_len == 16) ? 0 : 8,
0.75);
rocksdb::PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 16;
plain_table_options.bloom_bits_per_key = (FLAGS_prefix_len == 16) ? 0 : 8;
plain_table_options.hash_table_ratio = 0.75;
tf = new rocksdb::PlainTableFactory(plain_table_options);
options.prefix_extractor.reset(rocksdb::NewFixedPrefixTransform(
FLAGS_prefix_len));
} else {

@ -718,8 +718,16 @@ class Harness {
only_support_prefix_seek_ = false;
options_.prefix_extractor = nullptr;
options_.allow_mmap_reads = true;
{
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = kPlainTableVariableLength;
plain_table_options.bloom_bits_per_key = 0;
plain_table_options.hash_table_ratio = 0;
options_.table_factory.reset(
NewPlainTableFactory(kPlainTableVariableLength, 0, 0));
NewPlainTableFactory(plain_table_options));
}
constructor_ = new TableConstructor(options_.comparator, true);
internal_comparator_.reset(
new InternalKeyComparator(options_.comparator));
@ -1493,7 +1501,12 @@ TEST(BlockBasedTableTest, BlockCacheLeak) {
}
TEST(PlainTableTest, BasicPlainTableProperties) {
PlainTableFactory factory(8, 8, 0);
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 8;
plain_table_options.bloom_bits_per_key = 8;
plain_table_options.hash_table_ratio = 0;
PlainTableFactory factory(plain_table_options);
StringSink sink;
Options options;
InternalKeyComparator ikc(options.comparator);

@ -157,8 +157,17 @@ Status SstFileReader::SetTableOptionsByMagicNumber(
} else if (table_magic_number == kPlainTableMagicNumber ||
table_magic_number == kLegacyPlainTableMagicNumber) {
options_.allow_mmap_reads = true;
options_.table_factory.reset(NewPlainTableFactory(
kPlainTableVariableLength, 0, 0, 1, 0, kPlain, true));
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = kPlainTableVariableLength;
plain_table_options.bloom_bits_per_key = 0;
plain_table_options.hash_table_ratio = 0;
plain_table_options.index_sparseness = 1;
plain_table_options.huge_page_tlb_size = 0;
plain_table_options.encoding_type = kPlain;
plain_table_options.full_scan_mode = true;
options_.table_factory.reset(NewPlainTableFactory(plain_table_options));
fprintf(stdout, "Sst file format: plain table\n");
} else {
char error_msg_buffer[80];

Loading…
Cancel
Save