From 2c1bd8846f2abea1375d37fbb24e64c811b30e0f Mon Sep 17 00:00:00 2001 From: fyrz Date: Mon, 20 Oct 2014 22:42:32 +0200 Subject: [PATCH] BlockBasedTableConfig & PlainTableConfig enhancements Summary: BlockBasedTableConfig - ported Checksum - ported IndexType PlainTableConfig - added missing options - added EncodingType Test Plan: make rocksdbjava make jtest Differential Revision: https://reviews.facebook.net/D26595 --- java/org/rocksdb/BlockBasedTableConfig.java | 61 ++++++-- java/org/rocksdb/ChecksumType.java | 39 ++++++ java/org/rocksdb/EncodingType.java | 55 ++++++++ java/org/rocksdb/IndexType.java | 37 +++++ java/org/rocksdb/PlainTableConfig.java | 148 ++++++++++++++++++-- java/rocksjni/table.cc | 19 ++- 6 files changed, 336 insertions(+), 23 deletions(-) create mode 100644 java/org/rocksdb/ChecksumType.java create mode 100644 java/org/rocksdb/EncodingType.java create mode 100644 java/org/rocksdb/IndexType.java diff --git a/java/org/rocksdb/BlockBasedTableConfig.java b/java/org/rocksdb/BlockBasedTableConfig.java index 2f9f0ac64..d236b1a39 100644 --- a/java/org/rocksdb/BlockBasedTableConfig.java +++ b/java/org/rocksdb/BlockBasedTableConfig.java @@ -22,6 +22,8 @@ public class BlockBasedTableConfig extends TableFormatConfig { cacheIndexAndFilterBlocks_ = false; hashIndexAllowCollision_ = true; blockCacheCompressedSize_ = 0; + checksumType_ = ChecksumType.kCRC32c; + indexType_ = IndexType.kBinarySearch; } /** @@ -293,6 +295,44 @@ public class BlockBasedTableConfig extends TableFormatConfig { return this; } + /** + * Sets the checksum type to be used with this table. + * + * @param checksumType {@link org.rocksdb.ChecksumType} value. + * @return the reference to the current option. + */ + public BlockBasedTableConfig setChecksumType(ChecksumType checksumType) { + checksumType_ = checksumType; + return this; + } + + /** + * + * @return the currently set checksum type + */ + public ChecksumType checksumType() { + return checksumType_; + } + + /** + * Sets the index type to used with this table. + * + * @param indexType {@link org.rocksdb.IndexType} value + * @return the reference to the current option. + */ + public BlockBasedTableConfig setIndexType(IndexType indexType) { + indexType_ = indexType; + return this; + } + + /** + * + * @return the currently set index type + */ + public IndexType indexType() { + return indexType_; + } + @Override protected long newTableFactoryHandle() { long filterHandle = 0; if (filter_ != null) { @@ -304,7 +344,8 @@ public class BlockBasedTableConfig extends TableFormatConfig { blockRestartInterval_, wholeKeyFiltering_, filterHandle, cacheIndexAndFilterBlocks_, hashIndexAllowCollision_, blockCacheCompressedSize_, - blockCacheCompressedNumShardBits_); + blockCacheCompressedNumShardBits_, + checksumType_.getValue(), indexType_.getValue()); } private native long newTableFactoryHandle( @@ -312,19 +353,21 @@ public class BlockBasedTableConfig extends TableFormatConfig { long blockSize, int blockSizeDeviation, int blockRestartInterval, boolean wholeKeyFiltering, long filterPolicyHandle, boolean cacheIndexAndFilterBlocks, boolean hashIndexAllowCollision, - long blockCacheCompressedSize, int blockCacheCompressedNumShardBits); + long blockCacheCompressedSize, int blockCacheCompressedNumShardBits, + byte checkSumType, byte indexType); + private boolean cacheIndexAndFilterBlocks_; + private IndexType indexType_; + private boolean hashIndexAllowCollision_; + private ChecksumType checksumType_; private boolean noBlockCache_; + private long blockSize_; private long blockCacheSize_; private int blockCacheNumShardBits_; - private long shard; - private long blockSize_; + private long blockCacheCompressedSize_; + private int blockCacheCompressedNumShardBits_; private int blockSizeDeviation_; private int blockRestartInterval_; - private boolean wholeKeyFiltering_; private Filter filter_; - private boolean cacheIndexAndFilterBlocks_; - private boolean hashIndexAllowCollision_; - private long blockCacheCompressedSize_; - private int blockCacheCompressedNumShardBits_; + private boolean wholeKeyFiltering_; } diff --git a/java/org/rocksdb/ChecksumType.java b/java/org/rocksdb/ChecksumType.java new file mode 100644 index 000000000..40ba032b3 --- /dev/null +++ b/java/org/rocksdb/ChecksumType.java @@ -0,0 +1,39 @@ +// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +package org.rocksdb; + +/** + * Checksum types used in conjunction with BlockBasedTable.. + */ +public enum ChecksumType { + /** + * Not implemented yet. + */ + kNoChecksum((byte) 0), + /** + * CRC32 Checksum + */ + kCRC32c((byte)1), + /** + * XX Hash + */ + kxxHash((byte)2); + + private final byte value_; + + private ChecksumType(byte value) { + value_ = value; + } + + /** + * Returns the byte value of the enumerations value + * + * @return byte representation + */ + public byte getValue() { + return value_; + } +} diff --git a/java/org/rocksdb/EncodingType.java b/java/org/rocksdb/EncodingType.java new file mode 100644 index 000000000..1d0a36c37 --- /dev/null +++ b/java/org/rocksdb/EncodingType.java @@ -0,0 +1,55 @@ +// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +package org.rocksdb; + +/** + * EncodingType + * + *

The value will determine how to encode keys + * when writing to a new SST file.

+ * + *

This value will be stored + * inside the SST file which will be used when reading from + * the file, which makes it possible for users to choose + * different encoding type when reopening a DB. Files with + * different encoding types can co-exist in the same DB and + * can be read.

+ */ +public enum EncodingType { + /** + * Always write full keys without any special encoding. + */ + kPlain((byte)0), + /** + *

Find opportunity to write the same prefix once for multiple rows. + * In some cases, when a key follows a previous key with the same prefix, + * instead of writing out the full key, it just writes out the size of the + * shared prefix, as well as other bytes, to save some bytes.

+ * + *

When using this option, the user is required to use the same prefix + * extractor to make sure the same prefix will be extracted from the same key. + * The Name() value of the prefix extractor will be stored in the file. When + * reopening the file, the name of the options.prefix_extractor given will be + * bitwise compared to the prefix extractors stored in the file. An error + * will be returned if the two don't match.

+ */ + kPrefix((byte)1); + + private final byte value_; + + private EncodingType(byte value) { + value_ = value; + } + + /** + * Returns the byte value of the enumerations value + * + * @return byte representation + */ + public byte getValue() { + return value_; + } +} diff --git a/java/org/rocksdb/IndexType.java b/java/org/rocksdb/IndexType.java new file mode 100644 index 000000000..47912f7b6 --- /dev/null +++ b/java/org/rocksdb/IndexType.java @@ -0,0 +1,37 @@ +// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +package org.rocksdb; + +/** + * IndexType used in conjunction with BlockBasedTable. + */ +public enum IndexType { + /** + * A space efficient index block that is optimized for + * binary-search-based index. + */ + kBinarySearch((byte) 0), + /** + * The hash index, if enabled, will do the hash lookup when + * {@code Options.prefix_extractor} is provided. + */ + kHashSearch((byte)1); + + private final byte value_; + + private IndexType(byte value) { + value_ = value; + } + + /** + * Returns the byte value of the enumerations value + * + * @return byte representation + */ + public byte getValue() { + return value_; + } +} diff --git a/java/org/rocksdb/PlainTableConfig.java b/java/org/rocksdb/PlainTableConfig.java index 554ce3840..bb44e1ac1 100644 --- a/java/org/rocksdb/PlainTableConfig.java +++ b/java/org/rocksdb/PlainTableConfig.java @@ -7,28 +7,43 @@ package org.rocksdb; /** * The config for plain table sst format. * - * PlainTable is a RocksDB's SST file format optimized for low query latency - * on pure-memory or really low-latency media. It also support prefix - * hash feature. + *

PlainTable is a RocksDB's SST file format optimized for low query + * latency on pure-memory or really low-latency media.

+ * + *

It also support prefix hash feature.

*/ public class PlainTableConfig extends TableFormatConfig { public static final int VARIABLE_LENGTH = 0; public static final int DEFAULT_BLOOM_BITS_PER_KEY = 10; public static final double DEFAULT_HASH_TABLE_RATIO = 0.75; public static final int DEFAULT_INDEX_SPARSENESS = 16; + public static final int DEFAULT_HUGE_TLB_SIZE = 0; + public static final EncodingType DEFAULT_ENCODING_TYPE = + EncodingType.kPlain; + public static final boolean DEFAULT_FULL_SCAN_MODE = false; + public static final boolean DEFAULT_STORE_INDEX_IN_FILE + = false; public PlainTableConfig() { keySize_ = VARIABLE_LENGTH; bloomBitsPerKey_ = DEFAULT_BLOOM_BITS_PER_KEY; hashTableRatio_ = DEFAULT_HASH_TABLE_RATIO; indexSparseness_ = DEFAULT_INDEX_SPARSENESS; + hugePageTlbSize_ = DEFAULT_HUGE_TLB_SIZE; + encodingType_ = DEFAULT_ENCODING_TYPE; + fullScanMode_ = DEFAULT_FULL_SCAN_MODE; + storeIndexInFile_ = DEFAULT_STORE_INDEX_IN_FILE; } /** - * Set the length of the user key. If it is set to be VARIABLE_LENGTH, - * then it indicates the user keys are variable-lengthed. Otherwise, - * all the keys need to have the same length in byte. - * DEFAULT: VARIABLE_LENGTH + *

Set the length of the user key. If it is set to be + * VARIABLE_LENGTH, then it indicates the user keys are + * of variable length.

+ * + *

Otherwise,all the keys need to have the same length + * in byte.

+ * + *

DEFAULT: VARIABLE_LENGTH

* * @param keySize the length of the user key. * @return the reference to the current config. @@ -103,21 +118,134 @@ public class PlainTableConfig extends TableFormatConfig { /** * @return the index sparseness. */ - public int indexSparseness() { + public long indexSparseness() { return indexSparseness_; } + /** + *

huge_page_tlb_size: if <=0, allocate hash indexes and blooms + * from malloc otherwise from huge page TLB.

+ * + *

The user needs to reserve huge pages for it to be allocated, + * like: {@code sysctl -w vm.nr_hugepages=20}

+ * + *

See linux doc Documentation/vm/hugetlbpage.txt

+ * + * @param hugePageTlbSize_ + * @return the reference to the current config. + */ + public PlainTableConfig setHugePageTlbSize_(int hugePageTlbSize_) { + this.hugePageTlbSize_ = hugePageTlbSize_; + return this; + } + + /** + * Returns the value for huge page tlb size + * + * @return hugePageTlbSize + */ + public int hugePageTlbSize() { + return hugePageTlbSize_; + } + + /** + * Sets the encoding type. + * + *

This setting determines how to encode + * the keys. See enum {@link EncodingType} for + * the choices.

+ * + *

The value will determine how to encode keys + * when writing to a new SST file. This value will be stored + * inside the SST file which will be used when reading from + * the file, which makes it possible for users to choose + * different encoding type when reopening a DB. Files with + * different encoding types can co-exist in the same DB and + * can be read.

+ * + * @param encodingType {@link org.rocksdb.EncodingType} value. + * @return the reference to the current config. + */ + public PlainTableConfig setEncodingType(EncodingType encodingType) { + this.encodingType_ = encodingType; + return this; + } + + /** + * Returns the active EncodingType + * + * @return currently set encoding type + */ + public EncodingType encodingType() { + return encodingType_; + } + + /** + * Set full scan mode, if true the whole file will be read + * one record by one without using the index. + * + * @param fullScanMode boolean value indicating if full + * scan mode shall be enabled. + * @return the reference to the current config. + */ + public PlainTableConfig setFullScanMode(boolean fullScanMode) { + this.fullScanMode_ = fullScanMode; + return this; + } + + /** + * Return if full scan mode is active + * @return boolean value indicating if the full scan mode is + * enabled. + */ + public boolean fullScanMode() { + return fullScanMode_; + } + + /** + *

If set to true: compute plain table index and bloom + * filter during file building and store it in file. + * When reading file, index will be mmaped instead + * of doing recomputation.

+ * + * @param storeIndexInFile value indicating if index shall + * be stored in a file + * @return the reference to the current config. + */ + public PlainTableConfig setStoreIndexInFile(boolean storeIndexInFile) { + this.storeIndexInFile_ = storeIndexInFile; + return this; + } + + /** + * Return a boolean value indicating if index shall be stored + * in a file. + * + * @return currently set value for store index in file. + */ + public boolean storeIndexInFile() { + return storeIndexInFile_; + } + @Override protected long newTableFactoryHandle() { return newTableFactoryHandle(keySize_, bloomBitsPerKey_, - hashTableRatio_, indexSparseness_); + hashTableRatio_, indexSparseness_, hugePageTlbSize_, + encodingType_.getValue(), fullScanMode_, + storeIndexInFile_); } private native long newTableFactoryHandle( int keySize, int bloomBitsPerKey, - double hashTableRatio, int indexSparseness); + double hashTableRatio, int indexSparseness, + int hugePageTlbSize, byte encodingType, + boolean fullScanMode, boolean storeIndexInFile); private int keySize_; private int bloomBitsPerKey_; private double hashTableRatio_; private int indexSparseness_; + private int hugePageTlbSize_; + private EncodingType encodingType_; + private boolean fullScanMode_; + private boolean storeIndexInFile_; } diff --git a/java/rocksjni/table.cc b/java/rocksjni/table.cc index 1582900f3..1b576a754 100644 --- a/java/rocksjni/table.cc +++ b/java/rocksjni/table.cc @@ -15,23 +15,30 @@ /* * Class: org_rocksdb_PlainTableConfig * Method: newTableFactoryHandle - * Signature: (IIDI)J + * Signature: (IIDIIBZZ)J */ jlong Java_org_rocksdb_PlainTableConfig_newTableFactoryHandle( JNIEnv* env, jobject jobj, jint jkey_size, jint jbloom_bits_per_key, - jdouble jhash_table_ratio, jint jindex_sparseness) { + jdouble jhash_table_ratio, jint jindex_sparseness, + jint jhuge_page_tlb_size, jbyte jencoding_type, + jboolean jfull_scan_mode, jboolean jstore_index_in_file) { rocksdb::PlainTableOptions options = rocksdb::PlainTableOptions(); options.user_key_len = jkey_size; options.bloom_bits_per_key = jbloom_bits_per_key; options.hash_table_ratio = jhash_table_ratio; options.index_sparseness = jindex_sparseness; + options.huge_page_tlb_size = jhuge_page_tlb_size; + options.encoding_type = static_cast( + jencoding_type); + options.full_scan_mode = jfull_scan_mode; + options.store_index_in_file = jstore_index_in_file; return reinterpret_cast(rocksdb::NewPlainTableFactory(options)); } /* * Class: org_rocksdb_BlockBasedTableConfig * Method: newTableFactoryHandle - * Signature: (ZJIJIIZIZZJI)J + * Signature: (ZJIJIIZIZZJIBB)J */ jlong Java_org_rocksdb_BlockBasedTableConfig_newTableFactoryHandle( JNIEnv* env, jobject jobj, jboolean no_block_cache, jlong block_cache_size, @@ -39,7 +46,8 @@ jlong Java_org_rocksdb_BlockBasedTableConfig_newTableFactoryHandle( jint block_restart_interval, jboolean whole_key_filtering, jlong jfilterPolicy, jboolean cache_index_and_filter_blocks, jboolean hash_index_allow_collision, jlong block_cache_compressed_size, - jint block_cache_compressd_num_shard_bits) { + jint block_cache_compressd_num_shard_bits, jbyte jchecksum_type, + jbyte jindex_type) { rocksdb::BlockBasedTableOptions options; options.no_block_cache = no_block_cache; @@ -72,6 +80,9 @@ jlong Java_org_rocksdb_BlockBasedTableConfig_newTableFactoryHandle( options.block_cache = rocksdb::NewLRUCache(block_cache_compressed_size); } } + options.checksum = static_cast(jchecksum_type); + options.index_type = static_cast< + rocksdb::BlockBasedTableOptions::IndexType>(jindex_type); return reinterpret_cast(rocksdb::NewBlockBasedTableFactory(options)); }