SST Partitioner interface that allows to split SST files (#6957)
Summary: SST Partitioner interface that allows to split SST files during compactions. It basically instruct compaction to create a new file when needed. When one is using well defined prefixes and prefixed way of defining tables it is good to define also partitioning so that promotion of some SST file does not cover huge key space on next level (worst case complete space). Pull Request resolved: https://github.com/facebook/rocksdb/pull/6957 Reviewed By: ajkr Differential Revision: D22461239 fbshipit-source-id: 9ce07bba08b3ba89c2d45630520368f704d1316emain
parent
954ee56571
commit
cd4592c220
@ -0,0 +1,44 @@ |
|||||||
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under both the GPLv2 (found in the
|
||||||
|
// COPYING file in the root directory) and Apache 2.0 License
|
||||||
|
// (found in the LICENSE.Apache file in the root directory).
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "rocksdb/sst_partitioner.h" |
||||||
|
|
||||||
|
#include <algorithm> |
||||||
|
|
||||||
|
namespace ROCKSDB_NAMESPACE { |
||||||
|
|
||||||
|
PartitionerResult SstPartitionerFixedPrefix::ShouldPartition( |
||||||
|
const PartitionerRequest& request) { |
||||||
|
Slice last_key_fixed(*request.prev_user_key); |
||||||
|
if (last_key_fixed.size() > len_) { |
||||||
|
last_key_fixed.size_ = len_; |
||||||
|
} |
||||||
|
Slice current_key_fixed(*request.current_user_key); |
||||||
|
if (current_key_fixed.size() > len_) { |
||||||
|
current_key_fixed.size_ = len_; |
||||||
|
} |
||||||
|
return last_key_fixed.compare(current_key_fixed) != 0 ? kRequired |
||||||
|
: kNotRequired; |
||||||
|
} |
||||||
|
|
||||||
|
bool SstPartitionerFixedPrefix::CanDoTrivialMove( |
||||||
|
const Slice& smallest_user_key, const Slice& largest_user_key) { |
||||||
|
return ShouldPartition(PartitionerRequest(smallest_user_key, largest_user_key, |
||||||
|
0)) == kNotRequired; |
||||||
|
} |
||||||
|
|
||||||
|
std::unique_ptr<SstPartitioner> |
||||||
|
SstPartitionerFixedPrefixFactory::CreatePartitioner( |
||||||
|
const SstPartitioner::Context& /* context */) const { |
||||||
|
return std::unique_ptr<SstPartitioner>(new SstPartitionerFixedPrefix(len_)); |
||||||
|
} |
||||||
|
|
||||||
|
std::shared_ptr<SstPartitionerFactory> NewSstPartitionerFixedPrefixFactory( |
||||||
|
size_t prefix_len) { |
||||||
|
return std::make_shared<SstPartitionerFixedPrefixFactory>(prefix_len); |
||||||
|
} |
||||||
|
|
||||||
|
} // namespace ROCKSDB_NAMESPACE
|
@ -0,0 +1,135 @@ |
|||||||
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under both the GPLv2 (found in the
|
||||||
|
// COPYING file in the root directory) and Apache 2.0 License
|
||||||
|
// (found in the LICENSE.Apache file in the root directory).
|
||||||
|
//
|
||||||
|
|
||||||
|
#pragma once |
||||||
|
|
||||||
|
#include <memory> |
||||||
|
#include <string> |
||||||
|
|
||||||
|
#include "rocksdb/rocksdb_namespace.h" |
||||||
|
#include "rocksdb/slice.h" |
||||||
|
|
||||||
|
namespace ROCKSDB_NAMESPACE { |
||||||
|
|
||||||
|
class Slice; |
||||||
|
|
||||||
|
enum PartitionerResult : char { |
||||||
|
// Partitioner does not require to create new file
|
||||||
|
kNotRequired = 0x0, |
||||||
|
// Partitioner is requesting forcefully to create new file
|
||||||
|
kRequired = 0x1 |
||||||
|
// Additional constants can be added
|
||||||
|
}; |
||||||
|
|
||||||
|
struct PartitionerRequest { |
||||||
|
PartitionerRequest(const Slice& prev_user_key_, |
||||||
|
const Slice& current_user_key_, |
||||||
|
uint64_t current_output_file_size_) |
||||||
|
: prev_user_key(&prev_user_key_), |
||||||
|
current_user_key(¤t_user_key_), |
||||||
|
current_output_file_size(current_output_file_size_) {} |
||||||
|
const Slice* prev_user_key; |
||||||
|
const Slice* current_user_key; |
||||||
|
uint64_t current_output_file_size; |
||||||
|
}; |
||||||
|
|
||||||
|
/*
|
||||||
|
* A SstPartitioner is a generic pluggable way of defining the partition |
||||||
|
* of SST files. Compaction job will split the SST files on partition boundary |
||||||
|
* to lower the write amplification during SST file promote to higher level. |
||||||
|
*/ |
||||||
|
class SstPartitioner { |
||||||
|
public: |
||||||
|
virtual ~SstPartitioner() {} |
||||||
|
|
||||||
|
// Return the name of this partitioner.
|
||||||
|
virtual const char* Name() const = 0; |
||||||
|
|
||||||
|
// It is called for all keys in compaction. When partitioner want to create
|
||||||
|
// new SST file it needs to return true. It means compaction job will finish
|
||||||
|
// current SST file where last key is "prev_user_key" parameter and start new
|
||||||
|
// SST file where first key is "current_user_key". Returns decission if
|
||||||
|
// partition boundary was detected and compaction should create new file.
|
||||||
|
virtual PartitionerResult ShouldPartition( |
||||||
|
const PartitionerRequest& request) = 0; |
||||||
|
|
||||||
|
// Called with smallest and largest keys in SST file when compation try to do
|
||||||
|
// trivial move. Returns true is partitioner allows to do trivial move.
|
||||||
|
virtual bool CanDoTrivialMove(const Slice& smallest_user_key, |
||||||
|
const Slice& largest_user_key) = 0; |
||||||
|
|
||||||
|
// Context information of a compaction run
|
||||||
|
struct Context { |
||||||
|
// Does this compaction run include all data files
|
||||||
|
bool is_full_compaction; |
||||||
|
// Is this compaction requested by the client (true),
|
||||||
|
// or is it occurring as an automatic compaction process
|
||||||
|
bool is_manual_compaction; |
||||||
|
// Output level for this compaction
|
||||||
|
int output_level; |
||||||
|
// Smallest key for compaction
|
||||||
|
Slice smallest_user_key; |
||||||
|
// Largest key for compaction
|
||||||
|
Slice largest_user_key; |
||||||
|
}; |
||||||
|
}; |
||||||
|
|
||||||
|
class SstPartitionerFactory { |
||||||
|
public: |
||||||
|
virtual ~SstPartitionerFactory() {} |
||||||
|
|
||||||
|
virtual std::unique_ptr<SstPartitioner> CreatePartitioner( |
||||||
|
const SstPartitioner::Context& context) const = 0; |
||||||
|
|
||||||
|
// Returns a name that identifies this partitioner factory.
|
||||||
|
virtual const char* Name() const = 0; |
||||||
|
}; |
||||||
|
|
||||||
|
/*
|
||||||
|
* Fixed key prefix partitioner. It splits the output SST files when prefix |
||||||
|
* defined by size changes. |
||||||
|
*/ |
||||||
|
class SstPartitionerFixedPrefix : public SstPartitioner { |
||||||
|
public: |
||||||
|
explicit SstPartitionerFixedPrefix(size_t len) : len_(len) {} |
||||||
|
|
||||||
|
virtual ~SstPartitionerFixedPrefix() override {} |
||||||
|
|
||||||
|
const char* Name() const override { return "SstPartitionerFixedPrefix"; } |
||||||
|
|
||||||
|
PartitionerResult ShouldPartition(const PartitionerRequest& request) override; |
||||||
|
|
||||||
|
bool CanDoTrivialMove(const Slice& smallest_user_key, |
||||||
|
const Slice& largest_user_key) override; |
||||||
|
|
||||||
|
private: |
||||||
|
size_t len_; |
||||||
|
}; |
||||||
|
|
||||||
|
/*
|
||||||
|
* Factory for fixed prefix partitioner. |
||||||
|
*/ |
||||||
|
class SstPartitionerFixedPrefixFactory : public SstPartitionerFactory { |
||||||
|
public: |
||||||
|
explicit SstPartitionerFixedPrefixFactory(size_t len) : len_(len) {} |
||||||
|
|
||||||
|
virtual ~SstPartitionerFixedPrefixFactory() {} |
||||||
|
|
||||||
|
const char* Name() const override { |
||||||
|
return "SstPartitionerFixedPrefixFactory"; |
||||||
|
} |
||||||
|
|
||||||
|
std::unique_ptr<SstPartitioner> CreatePartitioner( |
||||||
|
const SstPartitioner::Context& /* context */) const override; |
||||||
|
|
||||||
|
private: |
||||||
|
size_t len_; |
||||||
|
}; |
||||||
|
|
||||||
|
extern std::shared_ptr<SstPartitionerFactory> |
||||||
|
NewSstPartitionerFixedPrefixFactory(size_t prefix_len); |
||||||
|
|
||||||
|
} // namespace ROCKSDB_NAMESPACE
|
@ -0,0 +1,42 @@ |
|||||||
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under both the GPLv2 (found in the
|
||||||
|
// COPYING file in the root directory) and Apache 2.0 License
|
||||||
|
// (found in the LICENSE.Apache file in the root directory).
|
||||||
|
//
|
||||||
|
// This file implements the "bridge" between Java and C++ and enables
|
||||||
|
// calling C++ ROCKSDB_NAMESPACE::SstFileManager methods
|
||||||
|
// from Java side.
|
||||||
|
|
||||||
|
#include "rocksdb/sst_partitioner.h" |
||||||
|
|
||||||
|
#include <jni.h> |
||||||
|
|
||||||
|
#include <memory> |
||||||
|
|
||||||
|
#include "include/org_rocksdb_SstPartitionerFixedPrefixFactory.h" |
||||||
|
#include "rocksdb/sst_file_manager.h" |
||||||
|
#include "rocksjni/portal.h" |
||||||
|
|
||||||
|
/*
|
||||||
|
* Class: org_rocksdb_SstPartitionerFixedPrefixFactory |
||||||
|
* Method: newSstPartitionerFixedPrefixFactory0 |
||||||
|
* Signature: (J)J |
||||||
|
*/ |
||||||
|
jlong Java_org_rocksdb_SstPartitionerFixedPrefixFactory_newSstPartitionerFixedPrefixFactory0( |
||||||
|
JNIEnv*, jclass, jlong prefix_len) { |
||||||
|
auto* ptr = new std::shared_ptr<ROCKSDB_NAMESPACE::SstPartitionerFactory>( |
||||||
|
ROCKSDB_NAMESPACE::NewSstPartitionerFixedPrefixFactory(prefix_len)); |
||||||
|
return reinterpret_cast<jlong>(ptr); |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* Class: org_rocksdb_SstPartitionerFixedPrefixFactory |
||||||
|
* Method: disposeInternal |
||||||
|
* Signature: (J)V |
||||||
|
*/ |
||||||
|
void Java_org_rocksdb_SstPartitionerFixedPrefixFactory_disposeInternal( |
||||||
|
JNIEnv*, jobject, jlong jhandle) { |
||||||
|
auto* ptr = reinterpret_cast< |
||||||
|
std::shared_ptr<ROCKSDB_NAMESPACE::SstPartitionerFactory>*>(jhandle); |
||||||
|
delete ptr; // delete std::shared_ptr
|
||||||
|
} |
@ -0,0 +1,15 @@ |
|||||||
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under both the GPLv2 (found in the
|
||||||
|
// COPYING file in the root directory) and Apache 2.0 License
|
||||||
|
// (found in the LICENSE.Apache file in the root directory).
|
||||||
|
|
||||||
|
package org.rocksdb; |
||||||
|
|
||||||
|
/** |
||||||
|
* Handle to factory for SstPartitioner. It is used in {@link ColumnFamilyOptions} |
||||||
|
*/ |
||||||
|
public abstract class SstPartitionerFactory extends RocksObject { |
||||||
|
protected SstPartitionerFactory(final long nativeHandle) { |
||||||
|
super(nativeHandle); |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,19 @@ |
|||||||
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under both the GPLv2 (found in the
|
||||||
|
// COPYING file in the root directory) and Apache 2.0 License
|
||||||
|
// (found in the LICENSE.Apache file in the root directory).
|
||||||
|
|
||||||
|
package org.rocksdb; |
||||||
|
|
||||||
|
/** |
||||||
|
* Fixed prefix factory. It partitions SST files using fixed prefix of the key. |
||||||
|
*/ |
||||||
|
public class SstPartitionerFixedPrefixFactory extends SstPartitionerFactory { |
||||||
|
public SstPartitionerFixedPrefixFactory(long prefixLength) { |
||||||
|
super(newSstPartitionerFixedPrefixFactory0(prefixLength)); |
||||||
|
} |
||||||
|
|
||||||
|
private native static long newSstPartitionerFixedPrefixFactory0(long prefixLength); |
||||||
|
|
||||||
|
@Override protected final native void disposeInternal(final long handle); |
||||||
|
} |
@ -0,0 +1,43 @@ |
|||||||
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under both the GPLv2 (found in the
|
||||||
|
// COPYING file in the root directory) and Apache 2.0 License
|
||||||
|
// (found in the LICENSE.Apache file in the root directory).
|
||||||
|
|
||||||
|
package org.rocksdb; |
||||||
|
|
||||||
|
import static org.assertj.core.api.Assertions.assertThat; |
||||||
|
|
||||||
|
import java.util.List; |
||||||
|
import org.junit.ClassRule; |
||||||
|
import org.junit.Rule; |
||||||
|
import org.junit.Test; |
||||||
|
import org.junit.rules.TemporaryFolder; |
||||||
|
|
||||||
|
public class SstPartitionerTest { |
||||||
|
@ClassRule |
||||||
|
public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = |
||||||
|
new RocksNativeLibraryResource(); |
||||||
|
|
||||||
|
@Rule public TemporaryFolder dbFolder = new TemporaryFolder(); |
||||||
|
|
||||||
|
@Test |
||||||
|
public void sstFixedPrefix() throws InterruptedException, RocksDBException { |
||||||
|
try (SstPartitionerFixedPrefixFactory factory = new SstPartitionerFixedPrefixFactory(4); |
||||||
|
final Options opt = |
||||||
|
new Options().setCreateIfMissing(true).setSstPartitionerFactory(factory); |
||||||
|
final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { |
||||||
|
// writing (long)100 under key
|
||||||
|
db.put("aaaa1".getBytes(), "A".getBytes()); |
||||||
|
db.put("bbbb1".getBytes(), "B".getBytes()); |
||||||
|
db.flush(new FlushOptions()); |
||||||
|
|
||||||
|
db.put("aaaa1".getBytes(), "A2".getBytes()); |
||||||
|
db.flush(new FlushOptions()); |
||||||
|
|
||||||
|
db.compactRange(); |
||||||
|
|
||||||
|
List<LiveFileMetaData> metadata = db.getLiveFilesMetaData(); |
||||||
|
assertThat(metadata.size()).isEqualTo(2); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
Loading…
Reference in new issue