updated merge_operator framework to support merge types other than associative (support a partial merge and full merges that may or may not have an existing value)

master
Rick Richardson 7 years ago
parent 963b8cba51
commit d31e2bb88e
  1. 2
      librocksdb-sys/Cargo.toml
  2. 12
      src/db_options.rs
  3. 1
      src/lib.rs
  4. 258
      src/merge_operator.rs
  5. 12
      tests/test_column_family.rs

@ -22,6 +22,6 @@ libc = "0.2"
const-cstr = "0.2" const-cstr = "0.2"
[build-dependencies] [build-dependencies]
cc = { version = "^1.0", features = ["parallel"] } cc = { git = "https://github.com/alexcrichton/cc-rs", features = ["parallel"] }
make-cmd = "0.1" make-cmd = "0.1"
bindgen = "0.29" bindgen = "0.29"

@ -200,10 +200,16 @@ impl Options {
} }
} }
pub fn set_merge_operator(&mut self, name: &str, merge_fn: MergeFn) { pub fn set_merge_operator(&mut self, name: &str, full_merge_fn: MergeFn, partial_merge_fn: Option<MergeFn>) {
if partial_merge_fn.is_none() {
println!("partial_merge not supplied, defaulting partial merge to full");
} else {
println!("using supplied partial_merge function");
}
let cb = Box::new(MergeOperatorCallback { let cb = Box::new(MergeOperatorCallback {
name: CString::new(name.as_bytes()).unwrap(), name: CString::new(name.as_bytes()).unwrap(),
merge_fn: merge_fn, full_merge_fn: full_merge_fn,
partial_merge_fn: partial_merge_fn.unwrap_or(full_merge_fn),
}); });
unsafe { unsafe {
@ -222,7 +228,7 @@ impl Options {
#[deprecated(since = "0.5.0", #[deprecated(since = "0.5.0",
note = "add_merge_operator has been renamed to set_merge_operator")] note = "add_merge_operator has been renamed to set_merge_operator")]
pub fn add_merge_operator(&mut self, name: &str, merge_fn: MergeFn) { pub fn add_merge_operator(&mut self, name: &str, merge_fn: MergeFn) {
self.set_merge_operator(name, merge_fn); self.set_merge_operator(name, merge_fn, None);
} }
/// Sets a compaction filter used to determine if entries should be kept, changed, /// Sets a compaction filter used to determine if entries should be kept, changed,

@ -173,6 +173,7 @@ pub struct WriteOptions {
inner: *mut ffi::rocksdb_writeoptions_t, inner: *mut ffi::rocksdb_writeoptions_t,
} }
/// An opaque type used to represent a column family. Returned from some functions, and used /// An opaque type used to represent a column family. Returned from some functions, and used
/// in others /// in others
#[derive(Copy, Clone)] #[derive(Copy, Clone)]

@ -21,7 +21,7 @@
//! fn concat_merge(new_key: &[u8], //! fn concat_merge(new_key: &[u8],
//! existing_val: Option<&[u8]>, //! existing_val: Option<&[u8]>,
//! operands: &mut MergeOperands) //! operands: &mut MergeOperands)
//! -> Vec<u8> { //! -> Option<Vec<u8>> {
//! //!
//! let mut result: Vec<u8> = Vec::with_capacity(operands.size_hint().0); //! let mut result: Vec<u8> = Vec::with_capacity(operands.size_hint().0);
//! existing_val.map(|v| { //! existing_val.map(|v| {
@ -34,14 +34,14 @@
//! result.push(*e) //! result.push(*e)
//! } //! }
//! } //! }
//! result //! Some(result)
//! } //! }
//! //!
//! fn main() { //! fn main() {
//! let path = "path/to/rocksdb"; //! let path = "path/to/rocksdb";
//! let mut opts = Options::default(); //! let mut opts = Options::default();
//! opts.create_if_missing(true); //! opts.create_if_missing(true);
//! opts.add_merge_operator("test operator", concat_merge); //! opts.set_merge_operator("test operator", concat_merge, None);
//! let db = DB::open(&opts, path).unwrap(); //! let db = DB::open(&opts, path).unwrap();
//! let p = db.put(b"k1", b"a"); //! let p = db.put(b"k1", b"a");
//! db.merge(b"k1", b"b"); //! db.merge(b"k1", b"b");
@ -60,11 +60,12 @@ use std::mem;
use std::ptr; use std::ptr;
use std::slice; use std::slice;
pub type MergeFn = fn(&[u8], Option<&[u8]>, &mut MergeOperands) -> Vec<u8>; pub type MergeFn = fn(&[u8], Option<&[u8]>, &mut MergeOperands) -> Option<Vec<u8>>;
pub struct MergeOperatorCallback { pub struct MergeOperatorCallback {
pub name: CString, pub name: CString,
pub merge_fn: MergeFn, pub full_merge_fn: MergeFn,
pub partial_merge_fn: MergeFn,
} }
pub unsafe extern "C" fn destructor_callback(raw_cb: *mut c_void) { pub unsafe extern "C" fn destructor_callback(raw_cb: *mut c_void) {
@ -87,12 +88,17 @@ pub unsafe extern "C" fn full_merge_callback(
num_operands: c_int, num_operands: c_int,
success: *mut u8, success: *mut u8,
new_value_length: *mut size_t, new_value_length: *mut size_t,
) -> *mut c_char { ) -> *mut c_char {
let cb = &mut *(raw_cb as *mut MergeOperatorCallback); let cb = &mut *(raw_cb as *mut MergeOperatorCallback);
let operands = &mut MergeOperands::new(operands_list, operands_list_len, num_operands); let operands = &mut MergeOperands::new(operands_list, operands_list_len, num_operands);
let key = slice::from_raw_parts(raw_key as *const u8, key_len as usize); let key = slice::from_raw_parts(raw_key as *const u8, key_len as usize);
let oldval = slice::from_raw_parts(existing_value as *const u8, existing_value_len as usize); let oldval =
let mut result = (cb.merge_fn)(key, Some(oldval), operands); if existing_value == ptr::null() {
None
} else {
Some(slice::from_raw_parts(existing_value as *const u8, existing_value_len as usize))
};
if let Some(mut result) = (cb.full_merge_fn)(key, oldval, operands) {
result.shrink_to_fit(); result.shrink_to_fit();
// TODO(tan) investigate zero-copy techniques to improve performance // TODO(tan) investigate zero-copy techniques to improve performance
let buf = libc::malloc(result.len() as size_t); let buf = libc::malloc(result.len() as size_t);
@ -101,6 +107,10 @@ pub unsafe extern "C" fn full_merge_callback(
*success = 1 as u8; *success = 1 as u8;
ptr::copy(result.as_ptr() as *mut c_void, &mut *buf, result.len()); ptr::copy(result.as_ptr() as *mut c_void, &mut *buf, result.len());
buf as *mut c_char buf as *mut c_char
} else {
*success = 0 as u8;
ptr::null_mut() as *mut c_char
}
} }
pub unsafe extern "C" fn partial_merge_callback( pub unsafe extern "C" fn partial_merge_callback(
@ -112,11 +122,12 @@ pub unsafe extern "C" fn partial_merge_callback(
num_operands: c_int, num_operands: c_int,
success: *mut u8, success: *mut u8,
new_value_length: *mut size_t, new_value_length: *mut size_t,
) -> *mut c_char { ) -> *mut c_char {
println!("In partial_merge_callback");
let cb = &mut *(raw_cb as *mut MergeOperatorCallback); let cb = &mut *(raw_cb as *mut MergeOperatorCallback);
let operands = &mut MergeOperands::new(operands_list, operands_list_len, num_operands); let operands = &mut MergeOperands::new(operands_list, operands_list_len, num_operands);
let key = slice::from_raw_parts(raw_key as *const u8, key_len as usize); let key = slice::from_raw_parts(raw_key as *const u8, key_len as usize);
let mut result = (cb.merge_fn)(key, None, operands); if let Some(mut result) = (cb.partial_merge_fn)(key, None, operands) {
result.shrink_to_fit(); result.shrink_to_fit();
// TODO(tan) investigate zero-copy techniques to improve performance // TODO(tan) investigate zero-copy techniques to improve performance
let buf = libc::malloc(result.len() as size_t); let buf = libc::malloc(result.len() as size_t);
@ -125,6 +136,10 @@ pub unsafe extern "C" fn partial_merge_callback(
*success = 1 as u8; *success = 1 as u8;
ptr::copy(result.as_ptr() as *mut c_void, &mut *buf, result.len()); ptr::copy(result.as_ptr() as *mut c_void, &mut *buf, result.len());
buf as *mut c_char buf as *mut c_char
} else {
*success = 0 as u8;
ptr::null_mut::<c_char>()
}
} }
@ -182,12 +197,15 @@ impl<'a> Iterator for &'a mut MergeOperands {
} }
#[cfg(test)] #[cfg(test)]
#[allow(unused_variables)] mod test {
fn test_provided_merge(
new_key: &[u8], use super::*;
fn test_provided_merge(
_new_key: &[u8],
existing_val: Option<&[u8]>, existing_val: Option<&[u8]>,
operands: &mut MergeOperands, operands: &mut MergeOperands,
) -> Vec<u8> { ) -> Option<Vec<u8>> {
let nops = operands.size_hint().0; let nops = operands.size_hint().0;
let mut result: Vec<u8> = Vec::with_capacity(nops); let mut result: Vec<u8> = Vec::with_capacity(nops);
if let Some(v) = existing_val { if let Some(v) = existing_val {
@ -200,17 +218,18 @@ fn test_provided_merge(
result.push(*e); result.push(*e);
} }
} }
result Some(result)
} }
#[ignore]
#[test] #[test]
fn mergetest() { fn mergetest() {
use {DB, Options}; use {DB, Options};
let path = "_rust_rocksdb_mergetest"; let path = "_rust_rocksdb_mergetest";
let mut opts = Options::default(); let mut opts = Options::default();
opts.create_if_missing(true); opts.create_if_missing(true);
opts.set_merge_operator("test operator", test_provided_merge); opts.set_merge_operator("test operator", test_provided_merge, None);
{ {
let db = DB::open(&opts, path).unwrap(); let db = DB::open(&opts, path).unwrap();
let p = db.put(b"k1", b"a"); let p = db.put(b"k1", b"a");
@ -239,4 +258,207 @@ fn mergetest() {
assert!(db.get(b"k1").unwrap().is_none()); assert!(db.get(b"k1").unwrap().is_none());
} }
assert!(DB::destroy(&opts, path).is_ok()); assert!(DB::destroy(&opts, path).is_ok());
}
unsafe fn to_slice<T: Sized>(p: &T) -> &[u8] {
::std::slice::from_raw_parts(
(p as *const T) as *const u8,
::std::mem::size_of::<T>(),
)
}
fn from_slice<T: Sized>(s: &[u8]) -> Option<&T> {
if ::std::mem::size_of::<T>() != s.len() {
println!("slice {:?} is len {}, but T is size {}", s, s.len(), ::std::mem::size_of::<T>());
None
} else {
unsafe {
Some(::std::mem::transmute(s.as_ptr()))
}
}
}
#[repr(packed)]
#[derive(Clone, Debug)]
struct ValueCounts {
num_a: u32,
num_b: u32,
num_c: u32,
num_d: u32,
}
fn test_counting_partial_merge(
_new_key: &[u8],
existing_val: Option<&[u8]>,
operands: &mut MergeOperands,
) -> Option<Vec<u8>> {
let nops = operands.size_hint().0;
let mut result: Vec<u8> = Vec::with_capacity(nops);
println!("Partial merge operands size hint {}", nops);
for op in operands {
for e in op {
result.push(*e);
}
}
Some(result)
}
fn test_counting_full_merge(
_new_key: &[u8],
existing_val: Option<&[u8]>,
operands: &mut MergeOperands,
) -> Option<Vec<u8>> {
let nops = operands.size_hint().0;
println!("Full merge operands size hint {}", nops);
let mut counts : ValueCounts =
if let Some(v) = existing_val {
println!("Full merge unpacking ValueCounts from {:?}", v);
from_slice::<ValueCounts>(v).unwrap().clone()
} else {
println!("Full merge creating new ValueCounts");
ValueCounts {
num_a: 0,
num_b: 0,
num_c: 0,
num_d: 0 }
};
for op in operands {
for e in op {
match *e {
b'a' => counts.num_a += 1,
b'b' => counts.num_b += 1,
b'c' => counts.num_c += 1,
b'd' => counts.num_d += 1,
_ => {}
}
}
}
let slc = unsafe { to_slice(&counts) };
Some(slc.to_vec())
}
#[test]
fn counting_mergetest() {
use std::thread;
use std::time::Duration;
use std::sync::Arc;
use {DB, Options, DBCompactionStyle};
let path = "_rust_rocksdb_partial_mergetest";
let mut opts = Options::default();
opts.create_if_missing(true);
opts.set_compaction_style(DBCompactionStyle::Universal);
opts.set_min_write_buffer_number_to_merge(10);
opts.set_merge_operator("sort operator", test_counting_full_merge, Some(test_counting_partial_merge));
{
let db = Arc::new(DB::open(&opts, path).unwrap());
let _ = db.delete(b"k1");
let _ = db.delete(b"k2");
let _ = db.merge(b"k1", b"a");
let _ = db.merge(b"k1", b"b");
let _ = db.merge(b"k1", b"d");
let _ = db.merge(b"k1", b"a");
let _ = db.merge(b"k1", b"a");
let _ = db.merge(b"k1", b"efg");
for i in 0..500 {
let _ = db.merge(b"k2", b"c");
if i % 20 == 0 {
let _ = db.get(b"k2");
}
}
for i in 0..500 {
let _ = db.merge(b"k2", b"c");
if i % 20 == 0 {
let _ = db.get(b"k2");
}
}
db.compact_range(None, None);
let d1 = db.clone();
let d2 = db.clone();
let d3 = db.clone();
let h1 = thread::spawn(move || {
for i in 0..500 {
let _ = d1.merge(b"k2", b"c");
if i % 20 == 0 {
let _ = d1.get(b"k2");
}
}
for i in 0..500 {
let _ = d1.merge(b"k2", b"a");
if i % 20 == 0 {
let _ = d1.get(b"k2");
}
}
});
let h2 = thread::spawn(move || {
for i in 0..500 {
let _ = d2.merge(b"k2", b"b");
if i % 20 == 0 {
let _ = d2.get(b"k2");
}
}
for i in 0..500 {
let _ = d2.merge(b"k2", b"d");
if i % 20 == 0 {
let _ = d2.get(b"k2");
}
}
d2.compact_range(None, None);
});
h2.join();
let h3 = thread::spawn(move || {
for i in 0..500 {
let _ = d3.merge(b"k2", b"a");
if i % 20 == 0 {
let _ = d3.get(b"k2");
}
}
for i in 0..500 {
let _ = d3.merge(b"k2", b"c");
if i % 20 == 0 {
let _ = d3.get(b"k2");
}
}
});
let m = db.merge(b"k1", b"b");
assert!(m.is_ok());
h3.join();
h1.join();
match db.get(b"k2") {
Ok(Some(value)) => {
match from_slice::<ValueCounts>(&*value) {
Some(v) => {
assert_eq!(v.num_a, 1000);
assert_eq!(v.num_b, 500);
assert_eq!(v.num_c, 2000);
assert_eq!(v.num_d, 500);
},
None => panic!("Failed to get ValueCounts from db"),
}
}
Err(_) => println!("error reading value"),
_ => panic!("value not present"),
}
match db.get(b"k1") {
Ok(Some(value)) => {
match from_slice::<ValueCounts>(&*value) {
Some(v) => {
assert_eq!(v.num_a, 3);
assert_eq!(v.num_b, 2);
assert_eq!(v.num_c, 0);
assert_eq!(v.num_d, 1);
},
None => panic!("Failed to get ValueCounts from db"),
}
}
Err(_) => println!("error reading value"),
_ => panic!("value not present"),
}
}
assert!(DB::destroy(&opts, path).is_ok());
}
} }

@ -24,7 +24,7 @@ pub fn test_column_family() {
{ {
let mut opts = Options::default(); let mut opts = Options::default();
opts.create_if_missing(true); opts.create_if_missing(true);
opts.set_merge_operator("test operator", test_provided_merge); opts.set_merge_operator("test operator", test_provided_merge, None);
let mut db = DB::open(&opts, path).unwrap(); let mut db = DB::open(&opts, path).unwrap();
let opts = Options::default(); let opts = Options::default();
match db.create_cf("cf1", &opts) { match db.create_cf("cf1", &opts) {
@ -38,7 +38,7 @@ pub fn test_column_family() {
// should fail to open db without specifying same column families // should fail to open db without specifying same column families
{ {
let mut opts = Options::default(); let mut opts = Options::default();
opts.set_merge_operator("test operator", test_provided_merge); opts.set_merge_operator("test operator", test_provided_merge, None);
match DB::open(&opts, path) { match DB::open(&opts, path) {
Ok(_) => { Ok(_) => {
panic!("should not have opened DB successfully without \ panic!("should not have opened DB successfully without \
@ -56,7 +56,7 @@ pub fn test_column_family() {
// should properly open db when specyfing all column families // should properly open db when specyfing all column families
{ {
let mut opts = Options::default(); let mut opts = Options::default();
opts.set_merge_operator("test operator", test_provided_merge); opts.set_merge_operator("test operator", test_provided_merge, None);
match DB::open_cf(&opts, path, &["cf1"]) { match DB::open_cf(&opts, path, &["cf1"]) {
Ok(_) => println!("successfully opened db with column family"), Ok(_) => println!("successfully opened db with column family"),
Err(e) => panic!("failed to open db with column family: {}", e), Err(e) => panic!("failed to open db with column family: {}", e),
@ -98,7 +98,7 @@ fn test_merge_operator() {
// TODO should be able to write, read, merge, batch, and iterate over a cf // TODO should be able to write, read, merge, batch, and iterate over a cf
{ {
let mut opts = Options::default(); let mut opts = Options::default();
opts.set_merge_operator("test operator", test_provided_merge); opts.set_merge_operator("test operator", test_provided_merge, None);
let db = match DB::open_cf(&opts, path, &["cf1"]) { let db = match DB::open_cf(&opts, path, &["cf1"]) {
Ok(db) => { Ok(db) => {
println!("successfully opened db with column family"); println!("successfully opened db with column family");
@ -140,7 +140,7 @@ fn test_merge_operator() {
fn test_provided_merge(_: &[u8], fn test_provided_merge(_: &[u8],
existing_val: Option<&[u8]>, existing_val: Option<&[u8]>,
operands: &mut MergeOperands) operands: &mut MergeOperands)
-> Vec<u8> { -> Option<Vec<u8>> {
let nops = operands.size_hint().0; let nops = operands.size_hint().0;
let mut result: Vec<u8> = Vec::with_capacity(nops); let mut result: Vec<u8> = Vec::with_capacity(nops);
match existing_val { match existing_val {
@ -156,5 +156,5 @@ fn test_provided_merge(_: &[u8],
result.push(*e); result.push(*e);
} }
} }
result Some(result)
} }

Loading…
Cancel
Save