Revert "Remove PlainTable's feature store_index_in_file (#4914)" (#5034)

Summary:
This reverts commit ee1818081f.

We are not ready to deprecate this feature. revert it for now.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5034

Differential Revision: D14287246

Pulled By: siying

fbshipit-source-id: e4beafdeaee1c94364fdaa6ba198218d158339f7
main
Siying Dong 6 years ago committed by Facebook Github Bot
parent 18d2e4beb7
commit 0920bf4e68
  1. 1
      HISTORY.md
  2. 62
      db/plain_table_db_test.cc
  3. 3
      include/rocksdb/table.h
  4. 4
      java/rocksjni/table.cc
  5. 24
      java/src/main/java/org/rocksdb/PlainTableConfig.java
  6. 8
      java/src/test/java/org/rocksdb/PlainTableConfigTest.java
  7. 1
      options/options_test.cc
  8. 75
      table/plain_table_builder.cc
  9. 16
      table/plain_table_builder.h
  10. 18
      table/plain_table_factory.cc
  11. 3
      table/plain_table_factory.h
  12. 86
      table/plain_table_reader.cc

@ -31,7 +31,6 @@
* With "ldb ----try_load_options", when wal_dir specified by the option file doesn't exist, ignore it. * With "ldb ----try_load_options", when wal_dir specified by the option file doesn't exist, ignore it.
* Change time resolution in FileOperationInfo. * Change time resolution in FileOperationInfo.
* Deleting Blob files also go through SStFileManager. * Deleting Blob files also go through SStFileManager.
* Remove PlainTable's store_index_in_file feature. When opening an existing DB with index in SST files, the index and bloom filter will still be rebuild while SST files are opened, in the same way as there is no index in the file.
* Remove CuckooHash memtable. * Remove CuckooHash memtable.
* The counter stat `number.block.not_compressed` now also counts blocks not compressed due to poor compression ratio. * The counter stat `number.block.not_compressed` now also counts blocks not compressed due to poor compression ratio.
* Remove ttl option from `CompactionOptionsFIFO`. The option has been deprecated and ttl in `ColumnFamilyOptions` is used instead. * Remove ttl option from `CompactionOptionsFIFO`. The option has been deprecated and ttl in `ColumnFamilyOptions` is used instead.

@ -134,6 +134,7 @@ class PlainTableDBTest : public testing::Test,
plain_table_options.huge_page_tlb_size = 0; plain_table_options.huge_page_tlb_size = 0;
plain_table_options.encoding_type = kPrefix; plain_table_options.encoding_type = kPrefix;
plain_table_options.full_scan_mode = false; plain_table_options.full_scan_mode = false;
plain_table_options.store_index_in_file = false;
options.table_factory.reset(NewPlainTableFactory(plain_table_options)); options.table_factory.reset(NewPlainTableFactory(plain_table_options));
options.memtable_factory.reset(NewHashLinkListRepFactory(4, 0, 3, true)); options.memtable_factory.reset(NewHashLinkListRepFactory(4, 0, 3, true));
@ -271,7 +272,8 @@ class TestPlainTableReader : public PlainTableReader {
std::unique_ptr<RandomAccessFileReader>&& file, std::unique_ptr<RandomAccessFileReader>&& file,
const ImmutableCFOptions& ioptions, const ImmutableCFOptions& ioptions,
const SliceTransform* prefix_extractor, const SliceTransform* prefix_extractor,
bool* expect_bloom_not_match, uint32_t column_family_id, bool* expect_bloom_not_match, bool store_index_in_file,
uint32_t column_family_id,
const std::string& column_family_name) const std::string& column_family_name)
: PlainTableReader(ioptions, std::move(file), env_options, icomparator, : PlainTableReader(ioptions, std::move(file), env_options, icomparator,
encoding_type, file_size, table_properties, encoding_type, file_size, table_properties,
@ -288,6 +290,17 @@ class TestPlainTableReader : public PlainTableReader {
TableProperties* props = const_cast<TableProperties*>(table_properties); TableProperties* props = const_cast<TableProperties*>(table_properties);
EXPECT_EQ(column_family_id, static_cast<uint32_t>(props->column_family_id)); EXPECT_EQ(column_family_id, static_cast<uint32_t>(props->column_family_id));
EXPECT_EQ(column_family_name, props->column_family_name); EXPECT_EQ(column_family_name, props->column_family_name);
if (store_index_in_file) {
auto bloom_version_ptr = props->user_collected_properties.find(
PlainTablePropertyNames::kBloomVersion);
EXPECT_TRUE(bloom_version_ptr != props->user_collected_properties.end());
EXPECT_EQ(bloom_version_ptr->second, std::string("1"));
if (ioptions.bloom_locality > 0) {
auto num_blocks_ptr = props->user_collected_properties.find(
PlainTablePropertyNames::kNumBloomBlocks);
EXPECT_TRUE(num_blocks_ptr != props->user_collected_properties.end());
}
}
} }
~TestPlainTableReader() override {} ~TestPlainTableReader() override {}
@ -316,6 +329,7 @@ class TestPlainTableFactory : public PlainTableFactory {
bloom_bits_per_key_(options.bloom_bits_per_key), bloom_bits_per_key_(options.bloom_bits_per_key),
hash_table_ratio_(options.hash_table_ratio), hash_table_ratio_(options.hash_table_ratio),
index_sparseness_(options.index_sparseness), index_sparseness_(options.index_sparseness),
store_index_in_file_(options.store_index_in_file),
expect_bloom_not_match_(expect_bloom_not_match), expect_bloom_not_match_(expect_bloom_not_match),
column_family_id_(column_family_id), column_family_id_(column_family_id),
column_family_name_(std::move(column_family_name)) {} column_family_name_(std::move(column_family_name)) {}
@ -332,6 +346,22 @@ class TestPlainTableFactory : public PlainTableFactory {
true /* compression_type_missing */); true /* compression_type_missing */);
EXPECT_TRUE(s.ok()); EXPECT_TRUE(s.ok());
if (store_index_in_file_) {
BlockHandle bloom_block_handle;
s = FindMetaBlock(file.get(), file_size, kPlainTableMagicNumber,
table_reader_options.ioptions,
BloomBlockBuilder::kBloomBlock, &bloom_block_handle,
/* compression_type_missing */ true);
EXPECT_TRUE(s.ok());
BlockHandle index_block_handle;
s = FindMetaBlock(file.get(), file_size, kPlainTableMagicNumber,
table_reader_options.ioptions,
PlainTableIndexBuilder::kPlainTableIndexBlock,
&index_block_handle, /* compression_type_missing */ true);
EXPECT_TRUE(s.ok());
}
auto& user_props = props->user_collected_properties; auto& user_props = props->user_collected_properties;
auto encoding_type_prop = auto encoding_type_prop =
user_props.find(PlainTablePropertyNames::kEncodingType); user_props.find(PlainTablePropertyNames::kEncodingType);
@ -345,7 +375,7 @@ class TestPlainTableFactory : public PlainTableFactory {
bloom_bits_per_key_, hash_table_ratio_, index_sparseness_, props, bloom_bits_per_key_, hash_table_ratio_, index_sparseness_, props,
std::move(file), table_reader_options.ioptions, std::move(file), table_reader_options.ioptions,
table_reader_options.prefix_extractor, expect_bloom_not_match_, table_reader_options.prefix_extractor, expect_bloom_not_match_,
column_family_id_, column_family_name_)); store_index_in_file_, column_family_id_, column_family_name_));
*table = std::move(new_reader); *table = std::move(new_reader);
return s; return s;
@ -355,6 +385,7 @@ class TestPlainTableFactory : public PlainTableFactory {
int bloom_bits_per_key_; int bloom_bits_per_key_;
double hash_table_ratio_; double hash_table_ratio_;
size_t index_sparseness_; size_t index_sparseness_;
bool store_index_in_file_;
bool* expect_bloom_not_match_; bool* expect_bloom_not_match_;
const uint32_t column_family_id_; const uint32_t column_family_id_;
const std::string column_family_name_; const std::string column_family_name_;
@ -366,6 +397,8 @@ TEST_P(PlainTableDBTest, Flush) {
for (EncodingType encoding_type : {kPlain, kPrefix}) { for (EncodingType encoding_type : {kPlain, kPrefix}) {
for (int bloom_bits = 0; bloom_bits <= 117; bloom_bits += 117) { for (int bloom_bits = 0; bloom_bits <= 117; bloom_bits += 117) {
for (int total_order = 0; total_order <= 1; total_order++) { for (int total_order = 0; total_order <= 1; total_order++) {
for (int store_index_in_file = 0; store_index_in_file <= 1;
++store_index_in_file) {
Options options = CurrentOptions(); Options options = CurrentOptions();
options.create_if_missing = true; options.create_if_missing = true;
// Set only one bucket to force bucket conflict. // Set only one bucket to force bucket conflict.
@ -381,6 +414,7 @@ TEST_P(PlainTableDBTest, Flush) {
plain_table_options.huge_page_tlb_size = huge_page_tlb_size; plain_table_options.huge_page_tlb_size = huge_page_tlb_size;
plain_table_options.encoding_type = encoding_type; plain_table_options.encoding_type = encoding_type;
plain_table_options.full_scan_mode = false; plain_table_options.full_scan_mode = false;
plain_table_options.store_index_in_file = store_index_in_file;
options.table_factory.reset( options.table_factory.reset(
NewPlainTableFactory(plain_table_options)); NewPlainTableFactory(plain_table_options));
@ -393,6 +427,7 @@ TEST_P(PlainTableDBTest, Flush) {
plain_table_options.huge_page_tlb_size = huge_page_tlb_size; plain_table_options.huge_page_tlb_size = huge_page_tlb_size;
plain_table_options.encoding_type = encoding_type; plain_table_options.encoding_type = encoding_type;
plain_table_options.full_scan_mode = false; plain_table_options.full_scan_mode = false;
plain_table_options.store_index_in_file = store_index_in_file;
options.table_factory.reset( options.table_factory.reset(
NewPlainTableFactory(plain_table_options)); NewPlainTableFactory(plain_table_options));
@ -418,12 +453,18 @@ TEST_P(PlainTableDBTest, Flush) {
auto row = ptc.begin(); auto row = ptc.begin();
auto tp = row->second; auto tp = row->second;
if (!store_index_in_file) {
ASSERT_EQ(total_order ? "4" : "12", ASSERT_EQ(total_order ? "4" : "12",
(tp->user_collected_properties) (tp->user_collected_properties)
.at("plain_table_hash_table_size")); .at("plain_table_hash_table_size"));
ASSERT_EQ( ASSERT_EQ("0", (tp->user_collected_properties)
"0", .at("plain_table_sub_index_size"));
(tp->user_collected_properties).at("plain_table_sub_index_size")); } else {
ASSERT_EQ("0", (tp->user_collected_properties)
.at("plain_table_hash_table_size"));
ASSERT_EQ("0", (tp->user_collected_properties)
.at("plain_table_sub_index_size"));
}
ASSERT_EQ("v3", Get("1000000000000foo")); ASSERT_EQ("v3", Get("1000000000000foo"));
ASSERT_EQ("v2", Get("0000000000000bar")); ASSERT_EQ("v2", Get("0000000000000bar"));
} }
@ -431,6 +472,7 @@ TEST_P(PlainTableDBTest, Flush) {
} }
} }
} }
}
TEST_P(PlainTableDBTest, Flush2) { TEST_P(PlainTableDBTest, Flush2) {
for (size_t huge_page_tlb_size = 0; huge_page_tlb_size <= 2 * 1024 * 1024; for (size_t huge_page_tlb_size = 0; huge_page_tlb_size <= 2 * 1024 * 1024;
@ -438,9 +480,17 @@ TEST_P(PlainTableDBTest, Flush2) {
for (EncodingType encoding_type : {kPlain, kPrefix}) { for (EncodingType encoding_type : {kPlain, kPrefix}) {
for (int bloom_bits = 0; bloom_bits <= 117; bloom_bits += 117) { for (int bloom_bits = 0; bloom_bits <= 117; bloom_bits += 117) {
for (int total_order = 0; total_order <= 1; total_order++) { for (int total_order = 0; total_order <= 1; total_order++) {
for (int store_index_in_file = 0; store_index_in_file <= 1;
++store_index_in_file) {
if (encoding_type == kPrefix && total_order) { if (encoding_type == kPrefix && total_order) {
continue; continue;
} }
if (!bloom_bits && store_index_in_file) {
continue;
}
if (total_order && store_index_in_file) {
continue;
}
bool expect_bloom_not_match = false; bool expect_bloom_not_match = false;
Options options = CurrentOptions(); Options options = CurrentOptions();
options.create_if_missing = true; options.create_if_missing = true;
@ -459,6 +509,7 @@ TEST_P(PlainTableDBTest, Flush2) {
plain_table_options.bloom_bits_per_key = bloom_bits; plain_table_options.bloom_bits_per_key = bloom_bits;
plain_table_options.huge_page_tlb_size = huge_page_tlb_size; plain_table_options.huge_page_tlb_size = huge_page_tlb_size;
plain_table_options.encoding_type = encoding_type; plain_table_options.encoding_type = encoding_type;
plain_table_options.store_index_in_file = store_index_in_file;
options.table_factory.reset(new TestPlainTableFactory( options.table_factory.reset(new TestPlainTableFactory(
&expect_bloom_not_match, plain_table_options, &expect_bloom_not_match, plain_table_options,
0 /* column_family_id */, kDefaultColumnFamilyName)); 0 /* column_family_id */, kDefaultColumnFamilyName));
@ -502,6 +553,7 @@ TEST_P(PlainTableDBTest, Flush2) {
} }
} }
} }
}
TEST_P(PlainTableDBTest, Immortal) { TEST_P(PlainTableDBTest, Immortal) {
for (EncodingType encoding_type : {kPlain, kPrefix}) { for (EncodingType encoding_type : {kPlain, kPrefix}) {

@ -351,11 +351,10 @@ struct PlainTableOptions {
// using the index. // using the index.
bool full_scan_mode = false; bool full_scan_mode = false;
// THIS FEATURE IS REMOVED.
// @store_index_in_file: compute plain table index and bloom filter during // @store_index_in_file: compute plain table index and bloom filter during
// file building and store it in file. When reading // file building and store it in file. When reading
// file, index will be mmaped instead of recomputation. // file, index will be mmaped instead of recomputation.
// bool store_index_in_file = false; bool store_index_in_file = false;
}; };
// -- Plain Table with prefix-only seek // -- Plain Table with prefix-only seek

@ -21,7 +21,8 @@
jlong Java_org_rocksdb_PlainTableConfig_newTableFactoryHandle( jlong Java_org_rocksdb_PlainTableConfig_newTableFactoryHandle(
JNIEnv * /*env*/, jobject /*jobj*/, jint jkey_size, JNIEnv * /*env*/, jobject /*jobj*/, jint jkey_size,
jint jbloom_bits_per_key, jdouble jhash_table_ratio, jint jindex_sparseness, jint jbloom_bits_per_key, jdouble jhash_table_ratio, jint jindex_sparseness,
jint jhuge_page_tlb_size, jbyte jencoding_type, jboolean jfull_scan_mode) { jint jhuge_page_tlb_size, jbyte jencoding_type, jboolean jfull_scan_mode,
jboolean jstore_index_in_file) {
rocksdb::PlainTableOptions options = rocksdb::PlainTableOptions(); rocksdb::PlainTableOptions options = rocksdb::PlainTableOptions();
options.user_key_len = jkey_size; options.user_key_len = jkey_size;
options.bloom_bits_per_key = jbloom_bits_per_key; options.bloom_bits_per_key = jbloom_bits_per_key;
@ -30,6 +31,7 @@ jlong Java_org_rocksdb_PlainTableConfig_newTableFactoryHandle(
options.huge_page_tlb_size = jhuge_page_tlb_size; options.huge_page_tlb_size = jhuge_page_tlb_size;
options.encoding_type = static_cast<rocksdb::EncodingType>(jencoding_type); options.encoding_type = static_cast<rocksdb::EncodingType>(jencoding_type);
options.full_scan_mode = jfull_scan_mode; options.full_scan_mode = jfull_scan_mode;
options.store_index_in_file = jstore_index_in_file;
return reinterpret_cast<jlong>(rocksdb::NewPlainTableFactory(options)); return reinterpret_cast<jlong>(rocksdb::NewPlainTableFactory(options));
} }

@ -21,6 +21,8 @@ public class PlainTableConfig extends TableFormatConfig {
public static final EncodingType DEFAULT_ENCODING_TYPE = public static final EncodingType DEFAULT_ENCODING_TYPE =
EncodingType.kPlain; EncodingType.kPlain;
public static final boolean DEFAULT_FULL_SCAN_MODE = false; public static final boolean DEFAULT_FULL_SCAN_MODE = false;
public static final boolean DEFAULT_STORE_INDEX_IN_FILE
= false;
public PlainTableConfig() { public PlainTableConfig() {
keySize_ = VARIABLE_LENGTH; keySize_ = VARIABLE_LENGTH;
@ -30,6 +32,7 @@ public class PlainTableConfig extends TableFormatConfig {
hugePageTlbSize_ = DEFAULT_HUGE_TLB_SIZE; hugePageTlbSize_ = DEFAULT_HUGE_TLB_SIZE;
encodingType_ = DEFAULT_ENCODING_TYPE; encodingType_ = DEFAULT_ENCODING_TYPE;
fullScanMode_ = DEFAULT_FULL_SCAN_MODE; fullScanMode_ = DEFAULT_FULL_SCAN_MODE;
storeIndexInFile_ = DEFAULT_STORE_INDEX_IN_FILE;
} }
/** /**
@ -208,10 +211,9 @@ public class PlainTableConfig extends TableFormatConfig {
* @param storeIndexInFile value indicating if index shall * @param storeIndexInFile value indicating if index shall
* be stored in a file * be stored in a file
* @return the reference to the current config. * @return the reference to the current config.
* @deprecated
*/ */
@Deprecated
public PlainTableConfig setStoreIndexInFile(boolean storeIndexInFile) { public PlainTableConfig setStoreIndexInFile(boolean storeIndexInFile) {
this.storeIndexInFile_ = storeIndexInFile;
return this; return this;
} }
@ -220,20 +222,23 @@ public class PlainTableConfig extends TableFormatConfig {
* in a file. * in a file.
* *
* @return currently set value for store index in file. * @return currently set value for store index in file.
* @deprecated
*/ */
@Deprecated
public boolean storeIndexInFile() { public boolean storeIndexInFile() {
return false; return storeIndexInFile_;
} }
@Override protected long newTableFactoryHandle() { @Override protected long newTableFactoryHandle() {
return newTableFactoryHandle(keySize_, bloomBitsPerKey_, hashTableRatio_, indexSparseness_, return newTableFactoryHandle(keySize_, bloomBitsPerKey_,
hugePageTlbSize_, encodingType_.getValue(), fullScanMode_); hashTableRatio_, indexSparseness_, hugePageTlbSize_,
encodingType_.getValue(), fullScanMode_,
storeIndexInFile_);
} }
private native long newTableFactoryHandle(int keySize, int bloomBitsPerKey, double hashTableRatio, private native long newTableFactoryHandle(
int indexSparseness, int hugePageTlbSize, byte encodingType, boolean fullScanMode); int keySize, int bloomBitsPerKey,
double hashTableRatio, int indexSparseness,
int hugePageTlbSize, byte encodingType,
boolean fullScanMode, boolean storeIndexInFile);
private int keySize_; private int keySize_;
private int bloomBitsPerKey_; private int bloomBitsPerKey_;
@ -242,4 +247,5 @@ public class PlainTableConfig extends TableFormatConfig {
private int hugePageTlbSize_; private int hugePageTlbSize_;
private EncodingType encodingType_; private EncodingType encodingType_;
private boolean fullScanMode_; private boolean fullScanMode_;
private boolean storeIndexInFile_;
} }

@ -70,6 +70,14 @@ public class PlainTableConfigTest {
plainTableConfig.setFullScanMode(true); plainTableConfig.setFullScanMode(true);
assertThat(plainTableConfig.fullScanMode()).isTrue(); } assertThat(plainTableConfig.fullScanMode()).isTrue(); }
@Test
public void storeIndexInFile() {
PlainTableConfig plainTableConfig = new PlainTableConfig();
plainTableConfig.setStoreIndexInFile(true);
assertThat(plainTableConfig.storeIndexInFile()).
isTrue();
}
@Test @Test
public void plainTableConfig() { public void plainTableConfig() {
try(final Options opt = new Options()) { try(final Options opt = new Options()) {

@ -662,6 +662,7 @@ TEST_F(OptionsTest, GetPlainTableOptionsFromString) {
ASSERT_EQ(new_opt.huge_page_tlb_size, 4); ASSERT_EQ(new_opt.huge_page_tlb_size, 4);
ASSERT_EQ(new_opt.encoding_type, EncodingType::kPrefix); ASSERT_EQ(new_opt.encoding_type, EncodingType::kPrefix);
ASSERT_TRUE(new_opt.full_scan_mode); ASSERT_TRUE(new_opt.full_scan_mode);
ASSERT_TRUE(new_opt.store_index_in_file);
// unknown option // unknown option
ASSERT_NOK(GetPlainTableOptionsFromString(table_opt, ASSERT_NOK(GetPlainTableOptionsFromString(table_opt,

@ -20,6 +20,7 @@
#include "table/plain_table_factory.h" #include "table/plain_table_factory.h"
#include "db/dbformat.h" #include "db/dbformat.h"
#include "table/block_builder.h" #include "table/block_builder.h"
#include "table/bloom_block.h"
#include "table/plain_table_index.h" #include "table/plain_table_index.h"
#include "table/format.h" #include "table/format.h"
#include "table/meta_blocks.h" #include "table/meta_blocks.h"
@ -61,17 +62,34 @@ PlainTableBuilder::PlainTableBuilder(
int_tbl_prop_collector_factories, int_tbl_prop_collector_factories,
uint32_t column_family_id, WritableFileWriter* file, uint32_t user_key_len, uint32_t column_family_id, WritableFileWriter* file, uint32_t user_key_len,
EncodingType encoding_type, size_t index_sparseness, EncodingType encoding_type, size_t index_sparseness,
const std::string& column_family_name) uint32_t bloom_bits_per_key, const std::string& column_family_name,
uint32_t num_probes, size_t huge_page_tlb_size, double hash_table_ratio,
bool store_index_in_file)
: ioptions_(ioptions), : ioptions_(ioptions),
moptions_(moptions), moptions_(moptions),
bloom_block_(num_probes),
file_(file), file_(file),
bloom_bits_per_key_(bloom_bits_per_key),
huge_page_tlb_size_(huge_page_tlb_size),
encoder_(encoding_type, user_key_len, moptions.prefix_extractor.get(), encoder_(encoding_type, user_key_len, moptions.prefix_extractor.get(),
index_sparseness), index_sparseness),
store_index_in_file_(store_index_in_file),
prefix_extractor_(moptions.prefix_extractor.get()) { prefix_extractor_(moptions.prefix_extractor.get()) {
// Build index block and save it in the file if hash_table_ratio > 0
if (store_index_in_file_) {
assert(hash_table_ratio > 0 || IsTotalOrderMode());
index_builder_.reset(new PlainTableIndexBuilder(
&arena_, ioptions, moptions.prefix_extractor.get(), index_sparseness,
hash_table_ratio, huge_page_tlb_size_));
properties_.user_collected_properties
[PlainTablePropertyNames::kBloomVersion] = "1"; // For future use
}
properties_.fixed_key_len = user_key_len; properties_.fixed_key_len = user_key_len;
// for plain table, we put all the data in a big chuck. // for plain table, we put all the data in a big chuck.
properties_.num_data_blocks = 1; properties_.num_data_blocks = 1;
// Fill it later if store_index_in_file_ == true
properties_.index_size = 0; properties_.index_size = 0;
properties_.filter_size = 0; properties_.filter_size = 0;
// To support roll-back to previous version, now still use version 0 for // To support roll-back to previous version, now still use version 0 for
@ -112,11 +130,26 @@ void PlainTableBuilder::Add(const Slice& key, const Slice& value) {
return; return;
} }
// Store key hash
if (store_index_in_file_) {
if (moptions_.prefix_extractor == nullptr) {
keys_or_prefixes_hashes_.push_back(GetSliceHash(internal_key.user_key));
} else {
Slice prefix =
moptions_.prefix_extractor->Transform(internal_key.user_key);
keys_or_prefixes_hashes_.push_back(GetSliceHash(prefix));
}
}
// Write value // Write value
assert(offset_ <= std::numeric_limits<uint32_t>::max()); assert(offset_ <= std::numeric_limits<uint32_t>::max());
auto prev_offset = static_cast<uint32_t>(offset_);
// Write out the key // Write out the key
encoder_.AppendKey(key, file_, &offset_, meta_bytes_buf, encoder_.AppendKey(key, file_, &offset_, meta_bytes_buf,
&meta_bytes_buf_size); &meta_bytes_buf_size);
if (SaveIndexInFile()) {
index_builder_->AddKeyPrefix(GetPrefix(internal_key), prev_offset);
}
// Write value length // Write value length
uint32_t value_size = static_cast<uint32_t>(value.size()); uint32_t value_size = static_cast<uint32_t>(value.size());
@ -162,6 +195,46 @@ Status PlainTableBuilder::Finish() {
MetaIndexBuilder meta_index_builer; MetaIndexBuilder meta_index_builer;
if (store_index_in_file_ && (properties_.num_entries > 0)) {
assert(properties_.num_entries <= std::numeric_limits<uint32_t>::max());
Status s;
BlockHandle bloom_block_handle;
if (bloom_bits_per_key_ > 0) {
bloom_block_.SetTotalBits(
&arena_,
static_cast<uint32_t>(properties_.num_entries) * bloom_bits_per_key_,
ioptions_.bloom_locality, huge_page_tlb_size_, ioptions_.info_log);
PutVarint32(&properties_.user_collected_properties
[PlainTablePropertyNames::kNumBloomBlocks],
bloom_block_.GetNumBlocks());
bloom_block_.AddKeysHashes(keys_or_prefixes_hashes_);
Slice bloom_finish_result = bloom_block_.Finish();
properties_.filter_size = bloom_finish_result.size();
s = WriteBlock(bloom_finish_result, file_, &offset_, &bloom_block_handle);
if (!s.ok()) {
return s;
}
meta_index_builer.Add(BloomBlockBuilder::kBloomBlock, bloom_block_handle);
}
BlockHandle index_block_handle;
Slice index_finish_result = index_builder_->Finish();
properties_.index_size = index_finish_result.size();
s = WriteBlock(index_finish_result, file_, &offset_, &index_block_handle);
if (!s.ok()) {
return s;
}
meta_index_builer.Add(PlainTableIndexBuilder::kPlainTableIndexBlock,
index_block_handle);
}
// Calculate bloom block size and index block size // Calculate bloom block size and index block size
PropertyBlockBuilder property_block_builder; PropertyBlockBuilder property_block_builder;
// -- Add basic properties // -- Add basic properties

@ -12,6 +12,8 @@
#include "rocksdb/status.h" #include "rocksdb/status.h"
#include "rocksdb/table.h" #include "rocksdb/table.h"
#include "rocksdb/table_properties.h" #include "rocksdb/table_properties.h"
#include "table/bloom_block.h"
#include "table/plain_table_index.h"
#include "table/plain_table_key_coding.h" #include "table/plain_table_key_coding.h"
#include "table/table_builder.h" #include "table/table_builder.h"
@ -35,7 +37,10 @@ class PlainTableBuilder: public TableBuilder {
int_tbl_prop_collector_factories, int_tbl_prop_collector_factories,
uint32_t column_family_id, WritableFileWriter* file, uint32_t column_family_id, WritableFileWriter* file,
uint32_t user_key_size, EncodingType encoding_type, uint32_t user_key_size, EncodingType encoding_type,
size_t index_sparseness, const std::string& column_family_name); size_t index_sparseness, uint32_t bloom_bits_per_key,
const std::string& column_family_name, uint32_t num_probes = 6,
size_t huge_page_tlb_size = 0, double hash_table_ratio = 0,
bool store_index_in_file = false);
// REQUIRES: Either Finish() or Abandon() has been called. // REQUIRES: Either Finish() or Abandon() has been called.
~PlainTableBuilder(); ~PlainTableBuilder();
@ -69,6 +74,8 @@ class PlainTableBuilder: public TableBuilder {
TableProperties GetTableProperties() const override { return properties_; } TableProperties GetTableProperties() const override { return properties_; }
bool SaveIndexInFile() const { return store_index_in_file_; }
private: private:
Arena arena_; Arena arena_;
const ImmutableCFOptions& ioptions_; const ImmutableCFOptions& ioptions_;
@ -76,12 +83,19 @@ class PlainTableBuilder: public TableBuilder {
std::vector<std::unique_ptr<IntTblPropCollector>> std::vector<std::unique_ptr<IntTblPropCollector>>
table_properties_collectors_; table_properties_collectors_;
BloomBlockBuilder bloom_block_;
std::unique_ptr<PlainTableIndexBuilder> index_builder_;
WritableFileWriter* file_; WritableFileWriter* file_;
uint64_t offset_ = 0; uint64_t offset_ = 0;
uint32_t bloom_bits_per_key_;
size_t huge_page_tlb_size_;
Status status_; Status status_;
TableProperties properties_; TableProperties properties_;
PlainTableKeyEncoder encoder_; PlainTableKeyEncoder encoder_;
bool store_index_in_file_;
std::vector<uint32_t> keys_or_prefixes_hashes_; std::vector<uint32_t> keys_or_prefixes_hashes_;
bool closed_ = false; // Either Finish() or Abandon() has been called. bool closed_ = false; // Either Finish() or Abandon() has been called.

@ -42,8 +42,10 @@ TableBuilder* PlainTableFactory::NewTableBuilder(
table_builder_options.ioptions, table_builder_options.moptions, table_builder_options.ioptions, table_builder_options.moptions,
table_builder_options.int_tbl_prop_collector_factories, column_family_id, table_builder_options.int_tbl_prop_collector_factories, column_family_id,
file, table_options_.user_key_len, table_options_.encoding_type, file, table_options_.user_key_len, table_options_.encoding_type,
table_options_.index_sparseness, table_options_.index_sparseness, table_options_.bloom_bits_per_key,
table_builder_options.column_family_name); table_builder_options.column_family_name, 6,
table_options_.huge_page_tlb_size, table_options_.hash_table_ratio,
table_options_.store_index_in_file);
} }
std::string PlainTableFactory::GetPrintableTableOptions() const { std::string PlainTableFactory::GetPrintableTableOptions() const {
@ -55,15 +57,27 @@ std::string PlainTableFactory::GetPrintableTableOptions() const {
snprintf(buffer, kBufferSize, " user_key_len: %u\n", snprintf(buffer, kBufferSize, " user_key_len: %u\n",
table_options_.user_key_len); table_options_.user_key_len);
ret.append(buffer); ret.append(buffer);
snprintf(buffer, kBufferSize, " bloom_bits_per_key: %d\n",
table_options_.bloom_bits_per_key);
ret.append(buffer);
snprintf(buffer, kBufferSize, " hash_table_ratio: %lf\n",
table_options_.hash_table_ratio);
ret.append(buffer);
snprintf(buffer, kBufferSize, " index_sparseness: %" ROCKSDB_PRIszt "\n", snprintf(buffer, kBufferSize, " index_sparseness: %" ROCKSDB_PRIszt "\n",
table_options_.index_sparseness); table_options_.index_sparseness);
ret.append(buffer); ret.append(buffer);
snprintf(buffer, kBufferSize, " huge_page_tlb_size: %" ROCKSDB_PRIszt "\n",
table_options_.huge_page_tlb_size);
ret.append(buffer);
snprintf(buffer, kBufferSize, " encoding_type: %d\n", snprintf(buffer, kBufferSize, " encoding_type: %d\n",
table_options_.encoding_type); table_options_.encoding_type);
ret.append(buffer); ret.append(buffer);
snprintf(buffer, kBufferSize, " full_scan_mode: %d\n", snprintf(buffer, kBufferSize, " full_scan_mode: %d\n",
table_options_.full_scan_mode); table_options_.full_scan_mode);
ret.append(buffer); ret.append(buffer);
snprintf(buffer, kBufferSize, " store_index_in_file: %d\n",
table_options_.store_index_in_file);
ret.append(buffer);
return ret; return ret;
} }

@ -204,7 +204,8 @@ static std::unordered_map<std::string, OptionTypeInfo> plain_table_type_info = {
{offsetof(struct PlainTableOptions, full_scan_mode), OptionType::kBoolean, {offsetof(struct PlainTableOptions, full_scan_mode), OptionType::kBoolean,
OptionVerificationType::kNormal, false, 0}}, OptionVerificationType::kNormal, false, 0}},
{"store_index_in_file", {"store_index_in_file",
{0, OptionType::kBoolean, OptionVerificationType::kDeprecated, false, 0}}}; {offsetof(struct PlainTableOptions, store_index_in_file),
OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}}};
} // namespace rocksdb } // namespace rocksdb
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE

@ -294,8 +294,47 @@ Status PlainTableReader::PopulateIndex(TableProperties* props,
assert(props != nullptr); assert(props != nullptr);
table_properties_.reset(props); table_properties_.reset(props);
// index_in_file and bloom_in_file features are deprecated. BlockContents index_block_contents;
// Even if they exist in file, ignore them and always reconstruct. Status s = ReadMetaBlock(file_info_.file.get(), nullptr /* prefetch_buffer */,
file_size_, kPlainTableMagicNumber, ioptions_,
PlainTableIndexBuilder::kPlainTableIndexBlock,
&index_block_contents,
true /* compression_type_missing */);
bool index_in_file = s.ok();
BlockContents bloom_block_contents;
bool bloom_in_file = false;
// We only need to read the bloom block if index block is in file.
if (index_in_file) {
s = ReadMetaBlock(file_info_.file.get(), nullptr /* prefetch_buffer */,
file_size_, kPlainTableMagicNumber, ioptions_,
BloomBlockBuilder::kBloomBlock, &bloom_block_contents,
true /* compression_type_missing */);
bloom_in_file = s.ok() && bloom_block_contents.data.size() > 0;
}
Slice* bloom_block;
if (bloom_in_file) {
// If bloom_block_contents.allocation is not empty (which will be the case
// for non-mmap mode), it holds the alloated memory for the bloom block.
// It needs to be kept alive to keep `bloom_block` valid.
bloom_block_alloc_ = std::move(bloom_block_contents.allocation);
bloom_block = &bloom_block_contents.data;
} else {
bloom_block = nullptr;
}
Slice* index_block;
if (index_in_file) {
// If index_block_contents.allocation is not empty (which will be the case
// for non-mmap mode), it holds the alloated memory for the index block.
// It needs to be kept alive to keep `index_block` valid.
index_block_alloc_ = std::move(index_block_contents.allocation);
index_block = &index_block_contents.data;
} else {
index_block = nullptr;
}
if ((prefix_extractor_ == nullptr) && (hash_table_ratio != 0)) { if ((prefix_extractor_ == nullptr) && (hash_table_ratio != 0)) {
// moptions.prefix_extractor is requried for a hash-based look-up. // moptions.prefix_extractor is requried for a hash-based look-up.
@ -308,6 +347,7 @@ Status PlainTableReader::PopulateIndex(TableProperties* props,
// offset) and append it to IndexRecordList, which is a data structure created // offset) and append it to IndexRecordList, which is a data structure created
// to store them. // to store them.
if (!index_in_file) {
// Allocate bloom filter here for total order mode. // Allocate bloom filter here for total order mode.
if (IsTotalOrderMode()) { if (IsTotalOrderMode()) {
uint32_t num_bloom_bits = uint32_t num_bloom_bits =
@ -319,25 +359,65 @@ Status PlainTableReader::PopulateIndex(TableProperties* props,
huge_page_tlb_size, ioptions_.info_log); huge_page_tlb_size, ioptions_.info_log);
} }
} }
} else if (bloom_in_file) {
enable_bloom_ = true;
auto num_blocks_property = props->user_collected_properties.find(
PlainTablePropertyNames::kNumBloomBlocks);
uint32_t num_blocks = 0;
if (num_blocks_property != props->user_collected_properties.end()) {
Slice temp_slice(num_blocks_property->second);
if (!GetVarint32(&temp_slice, &num_blocks)) {
num_blocks = 0;
}
}
// cast away const qualifier, because bloom_ won't be changed
bloom_.SetRawData(
const_cast<unsigned char*>(
reinterpret_cast<const unsigned char*>(bloom_block->data())),
static_cast<uint32_t>(bloom_block->size()) * 8, num_blocks);
} else {
// Index in file but no bloom in file. Disable bloom filter in this case.
enable_bloom_ = false;
bloom_bits_per_key = 0;
}
PlainTableIndexBuilder index_builder(&arena_, ioptions_, prefix_extractor_, PlainTableIndexBuilder index_builder(&arena_, ioptions_, prefix_extractor_,
index_sparseness, hash_table_ratio, index_sparseness, hash_table_ratio,
huge_page_tlb_size); huge_page_tlb_size);
std::vector<uint32_t> prefix_hashes; std::vector<uint32_t> prefix_hashes;
Status s = PopulateIndexRecordList(&index_builder, &prefix_hashes); if (!index_in_file) {
s = PopulateIndexRecordList(&index_builder, &prefix_hashes);
if (!s.ok()) {
return s;
}
} else {
s = index_.InitFromRawData(*index_block);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
}
if (!index_in_file) {
// Calculated bloom filter size and allocate memory for // Calculated bloom filter size and allocate memory for
// bloom filter based on the number of prefixes, then fill it. // bloom filter based on the number of prefixes, then fill it.
AllocateAndFillBloom(bloom_bits_per_key, index_.GetNumPrefixes(), AllocateAndFillBloom(bloom_bits_per_key, index_.GetNumPrefixes(),
huge_page_tlb_size, &prefix_hashes); huge_page_tlb_size, &prefix_hashes);
}
// Fill two table properties. // Fill two table properties.
if (!index_in_file) {
props->user_collected_properties["plain_table_hash_table_size"] = props->user_collected_properties["plain_table_hash_table_size"] =
ToString(index_.GetIndexSize() * PlainTableIndex::kOffsetLen); ToString(index_.GetIndexSize() * PlainTableIndex::kOffsetLen);
props->user_collected_properties["plain_table_sub_index_size"] = props->user_collected_properties["plain_table_sub_index_size"] =
ToString(index_.GetSubIndexSize()); ToString(index_.GetSubIndexSize());
} else {
props->user_collected_properties["plain_table_hash_table_size"] =
ToString(0);
props->user_collected_properties["plain_table_sub_index_size"] =
ToString(0);
}
return Status::OK(); return Status::OK();
} }

Loading…
Cancel
Save