From bcb128723578aa02f599973a2090b9c88667eb5c Mon Sep 17 00:00:00 2001 From: Yueh-Hsuan Chiang Date: Thu, 12 May 2022 18:17:36 -0700 Subject: [PATCH] Port the batched version of MultiGet() to RocksDB's C API (#9952) Summary: The batched version of MultiGet() is not available in RocksDB's C API. This PR implements rocksdb_batched_multi_get_cf which is a C wrapper function that invokes the batched version of MultiGet() which takes one single column family. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9952 Test Plan: Added a new test case under "columnfamilies" test case in c_test.cc Reviewed By: riversand963 Differential Revision: D36302888 Pulled By: ajkr fbshipit-source-id: fa134c4a1c8e7d72dd4ae8649a74e3797b5cf4e6 --- db/c.cc | 37 +++++++++++++++++++++++++++++++++++++ db/c_test.c | 27 +++++++++++++++++++++++++-- include/rocksdb/c.h | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 95 insertions(+), 2 deletions(-) diff --git a/db/c.cc b/db/c.cc index 0a6ae8562..a10a0c843 100644 --- a/db/c.cc +++ b/db/c.cc @@ -1163,6 +1163,43 @@ void rocksdb_multi_get_cf( } } +void rocksdb_batched_multi_get_cf(rocksdb_t* db, + const rocksdb_readoptions_t* options, + rocksdb_column_family_handle_t* column_family, + size_t num_keys, const char* const* keys_list, + const size_t* keys_list_sizes, + rocksdb_pinnableslice_t** values, char** errs, + const bool sorted_input) { + Slice* key_slices = new Slice[num_keys]; + PinnableSlice* value_slices = new PinnableSlice[num_keys]; + Status* statuses = new Status[num_keys]; + for (size_t i = 0; i < num_keys; ++i) { + key_slices[i] = Slice(keys_list[i], keys_list_sizes[i]); + } + + db->rep->MultiGet(options->rep, column_family->rep, num_keys, key_slices, + value_slices, statuses, sorted_input); + + for (size_t i = 0; i < num_keys; ++i) { + if (statuses[i].ok()) { + values[i] = new (rocksdb_pinnableslice_t); + values[i]->rep = std::move(value_slices[i]); + errs[i] = nullptr; + } else { + values[i] = nullptr; + if (!statuses[i].IsNotFound()) { + errs[i] = strdup(statuses[i].ToString().c_str()); + } else { + errs[i] = nullptr; + } + } + } + + delete[] key_slices; + delete[] value_slices; + delete[] statuses; +} + unsigned char rocksdb_key_may_exist(rocksdb_t* db, const rocksdb_readoptions_t* options, const char* key, size_t key_len, diff --git a/db/c_test.c b/db/c_test.c index 2a0bc8d5e..53c312a98 100644 --- a/db/c_test.c +++ b/db/c_test.c @@ -1260,15 +1260,18 @@ int main(int argc, char** argv) { rocksdb_writebatch_clear(wb); rocksdb_writebatch_put_cf(wb, handles[1], "bar", 3, "b", 1); rocksdb_writebatch_put_cf(wb, handles[1], "box", 3, "c", 1); + rocksdb_writebatch_put_cf(wb, handles[1], "buff", 4, "rocksdb", 7); rocksdb_writebatch_delete_cf(wb, handles[1], "bar", 3); rocksdb_write(db, woptions, wb, &err); CheckNoError(err); CheckGetCF(db, roptions, handles[1], "baz", NULL); CheckGetCF(db, roptions, handles[1], "bar", NULL); CheckGetCF(db, roptions, handles[1], "box", "c"); + CheckGetCF(db, roptions, handles[1], "buff", "rocksdb"); CheckPinGetCF(db, roptions, handles[1], "baz", NULL); CheckPinGetCF(db, roptions, handles[1], "bar", NULL); CheckPinGetCF(db, roptions, handles[1], "box", "c"); + CheckPinGetCF(db, roptions, handles[1], "buff", "rocksdb"); rocksdb_writebatch_destroy(wb); rocksdb_flush_wal(db, 1, &err); @@ -1299,6 +1302,26 @@ int main(int argc, char** argv) { Free(&vals[i]); } + { + const char* batched_keys[4] = {"box", "buff", "barfooxx", "box"}; + const size_t batched_keys_sizes[4] = {3, 4, 8, 3}; + const char* expected_value[4] = {"c", "rocksdb", NULL, "c"}; + char* batched_errs[4]; + + rocksdb_pinnableslice_t* pvals[4]; + rocksdb_batched_multi_get_cf(db, roptions, handles[1], 4, batched_keys, + batched_keys_sizes, pvals, batched_errs, + false); + const char* val; + size_t val_len; + for (i = 0; i < 4; ++i) { + val = rocksdb_pinnableslice_value(pvals[i], &val_len); + CheckNoError(batched_errs[i]); + CheckEqual(expected_value[i], val, val_len); + rocksdb_pinnableslice_destroy(pvals[i]); + } + } + { unsigned char value_found = 0; @@ -1330,7 +1353,7 @@ int main(int argc, char** argv) { for (i = 0; rocksdb_iter_valid(iter) != 0; rocksdb_iter_next(iter)) { i++; } - CheckCondition(i == 3); + CheckCondition(i == 4); rocksdb_iter_get_error(iter, &err); CheckNoError(err); rocksdb_iter_destroy(iter); @@ -1354,7 +1377,7 @@ int main(int argc, char** argv) { for (i = 0; rocksdb_iter_valid(iter) != 0; rocksdb_iter_next(iter)) { i++; } - CheckCondition(i == 3); + CheckCondition(i == 4); rocksdb_iter_get_error(iter, &err); CheckNoError(err); rocksdb_iter_destroy(iter); diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index 1456c15a6..05a300277 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -63,6 +63,7 @@ extern "C" { #endif #include +#include #include #include @@ -438,6 +439,38 @@ extern ROCKSDB_LIBRARY_API void rocksdb_multi_get_cf( const size_t* keys_list_sizes, char** values_list, size_t* values_list_sizes, char** errs); +// The MultiGet API that improves performance by batching operations +// in the read path for greater efficiency. Currently, only the block based +// table format with full filters are supported. Other table formats such +// as plain table, block based table with block based filters and +// partitioned indexes will still work, but will not get any performance +// benefits. +// +// Note that all the keys passed to this API are restricted to a single +// column family. +// +// Parameters - +// db - the RocksDB instance. +// options - ReadOptions +// column_family - ColumnFamilyHandle* that the keys belong to. All the keys +// passed to the API are restricted to a single column family +// num_keys - Number of keys to lookup +// keys_list - Pointer to C style array of keys with num_keys elements +// keys_list_sizes - Pointer to C style array of the size of corresponding key +// in key_list with num_keys elements. +// values - Pointer to C style array of PinnableSlices with num_keys elements +// statuses - Pointer to C style array of Status with num_keys elements +// sorted_input - If true, it means the input keys are already sorted by key +// order, so the MultiGet() API doesn't have to sort them +// again. If false, the keys will be copied and sorted +// internally by the API - the input array will not be +// modified +extern ROCKSDB_LIBRARY_API void rocksdb_batched_multi_get_cf( + rocksdb_t* db, const rocksdb_readoptions_t* options, + rocksdb_column_family_handle_t* column_family, size_t num_keys, + const char* const* keys_list, const size_t* keys_list_sizes, + rocksdb_pinnableslice_t** values, char** errs, const bool sorted_input); + // The value is only allocated (using malloc) and returned if it is found and // value_found isn't NULL. In that case the user is responsible for freeing it. extern ROCKSDB_LIBRARY_API unsigned char rocksdb_key_may_exist(