|
|
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
|
|
|
|
#ifndef STORAGE_ROCKSDB_INCLUDE_DB_H_
|
|
|
|
#define STORAGE_ROCKSDB_INCLUDE_DB_H_
|
|
|
|
|
|
|
|
#include <stdint.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <memory>
|
|
|
|
#include <vector>
|
|
|
|
#include <string>
|
|
|
|
#include <unordered_map>
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
10 years ago
|
|
|
#include "rocksdb/metadata.h"
|
|
|
|
#include "rocksdb/version.h"
|
|
|
|
#include "rocksdb/iterator.h"
|
|
|
|
#include "rocksdb/options.h"
|
|
|
|
#include "rocksdb/types.h"
|
|
|
|
#include "rocksdb/transaction_log.h"
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
10 years ago
|
|
|
#include "rocksdb/listener.h"
|
|
|
|
#include "rocksdb/snapshot.h"
|
|
|
|
#include "rocksdb/thread_status.h"
|
|
|
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
// Windows API macro interference
|
|
|
|
#undef DeleteFile
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
namespace rocksdb {
|
|
|
|
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
10 years ago
|
|
|
struct Options;
|
|
|
|
struct DBOptions;
|
|
|
|
struct ColumnFamilyOptions;
|
|
|
|
struct ReadOptions;
|
|
|
|
struct WriteOptions;
|
|
|
|
struct FlushOptions;
|
|
|
|
struct CompactionOptions;
|
|
|
|
struct CompactRangeOptions;
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
10 years ago
|
|
|
struct TableProperties;
|
|
|
|
struct ExternalSstFileInfo;
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
10 years ago
|
|
|
class WriteBatch;
|
|
|
|
class Env;
|
|
|
|
class EventListener;
|
|
|
|
|
|
|
|
using std::unique_ptr;
|
|
|
|
|
|
|
|
class ColumnFamilyHandle {
|
|
|
|
public:
|
|
|
|
virtual ~ColumnFamilyHandle() {}
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
10 years ago
|
|
|
virtual const std::string& GetName() const = 0;
|
|
|
|
virtual uint32_t GetID() const = 0;
|
|
|
|
};
|
|
|
|
extern const std::string kDefaultColumnFamilyName;
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
|
|
|
|
struct ColumnFamilyDescriptor {
|
|
|
|
std::string name;
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
ColumnFamilyOptions options;
|
|
|
|
ColumnFamilyDescriptor()
|
|
|
|
: name(kDefaultColumnFamilyName), options(ColumnFamilyOptions()) {}
|
|
|
|
ColumnFamilyDescriptor(const std::string& _name,
|
|
|
|
const ColumnFamilyOptions& _options)
|
|
|
|
: name(_name), options(_options) {}
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
};
|
|
|
|
|
|
|
|
static const int kMajorVersion = __ROCKSDB_MAJOR__;
|
|
|
|
static const int kMinorVersion = __ROCKSDB_MINOR__;
|
|
|
|
|
|
|
|
// A range of keys
|
|
|
|
struct Range {
|
|
|
|
Slice start; // Included in the range
|
|
|
|
Slice limit; // Not included in the range
|
|
|
|
|
|
|
|
Range() { }
|
|
|
|
Range(const Slice& s, const Slice& l) : start(s), limit(l) { }
|
|
|
|
};
|
|
|
|
|
|
|
|
// A collections of table properties objects, where
|
|
|
|
// key: is the table's file name.
|
|
|
|
// value: the table properties object of the given table.
|
|
|
|
typedef std::unordered_map<std::string, std::shared_ptr<const TableProperties>>
|
|
|
|
TablePropertiesCollection;
|
|
|
|
|
|
|
|
// A DB is a persistent ordered map from keys to values.
|
|
|
|
// A DB is safe for concurrent access from multiple threads without
|
|
|
|
// any external synchronization.
|
|
|
|
class DB {
|
|
|
|
public:
|
|
|
|
// Open the database with the specified "name".
|
|
|
|
// Stores a pointer to a heap-allocated database in *dbptr and returns
|
|
|
|
// OK on success.
|
|
|
|
// Stores nullptr in *dbptr and returns a non-OK status on error.
|
|
|
|
// Caller should delete *dbptr when it is no longer needed.
|
|
|
|
static Status Open(const Options& options,
|
|
|
|
const std::string& name,
|
|
|
|
DB** dbptr);
|
|
|
|
|
|
|
|
// Open the database for read only. All DB interfaces
|
|
|
|
// that modify data, like put/delete, will return error.
|
|
|
|
// If the db is opened in read only mode, then no compactions
|
|
|
|
// will happen.
|
|
|
|
//
|
|
|
|
// Not supported in ROCKSDB_LITE, in which case the function will
|
|
|
|
// return Status::NotSupported.
|
|
|
|
static Status OpenForReadOnly(const Options& options,
|
|
|
|
const std::string& name, DB** dbptr,
|
|
|
|
bool error_if_log_file_exist = false);
|
|
|
|
|
|
|
|
// Open the database for read only with column families. When opening DB with
|
|
|
|
// read only, you can specify only a subset of column families in the
|
|
|
|
// database that should be opened. However, you always need to specify default
|
|
|
|
// column family. The default column family name is 'default' and it's stored
|
|
|
|
// in rocksdb::kDefaultColumnFamilyName
|
|
|
|
//
|
|
|
|
// Not supported in ROCKSDB_LITE, in which case the function will
|
|
|
|
// return Status::NotSupported.
|
|
|
|
static Status OpenForReadOnly(
|
|
|
|
const DBOptions& db_options, const std::string& name,
|
|
|
|
const std::vector<ColumnFamilyDescriptor>& column_families,
|
|
|
|
std::vector<ColumnFamilyHandle*>* handles, DB** dbptr,
|
|
|
|
bool error_if_log_file_exist = false);
|
|
|
|
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
// Open DB with column families.
|
|
|
|
// db_options specify database specific options
|
|
|
|
// column_families is the vector of all column families in the database,
|
|
|
|
// containing column family name and options. You need to open ALL column
|
|
|
|
// families in the database. To get the list of column families, you can use
|
|
|
|
// ListColumnFamilies(). Also, you can open only a subset of column families
|
|
|
|
// for read-only access.
|
|
|
|
// The default column family name is 'default' and it's stored
|
|
|
|
// in rocksdb::kDefaultColumnFamilyName.
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
// If everything is OK, handles will on return be the same size
|
|
|
|
// as column_families --- handles[i] will be a handle that you
|
|
|
|
// will use to operate on column family column_family[i]
|
|
|
|
static Status Open(const DBOptions& db_options, const std::string& name,
|
|
|
|
const std::vector<ColumnFamilyDescriptor>& column_families,
|
|
|
|
std::vector<ColumnFamilyHandle*>* handles, DB** dbptr);
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
|
|
|
|
// ListColumnFamilies will open the DB specified by argument name
|
|
|
|
// and return the list of all column families in that DB
|
|
|
|
// through column_families argument. The ordering of
|
|
|
|
// column families in column_families is unspecified.
|
|
|
|
static Status ListColumnFamilies(const DBOptions& db_options,
|
|
|
|
const std::string& name,
|
|
|
|
std::vector<std::string>* column_families);
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
|
|
|
|
DB() { }
|
|
|
|
virtual ~DB();
|
|
|
|
|
|
|
|
// Create a column_family and return the handle of column family
|
|
|
|
// through the argument handle.
|
|
|
|
virtual Status CreateColumnFamily(const ColumnFamilyOptions& options,
|
|
|
|
const std::string& column_family_name,
|
|
|
|
ColumnFamilyHandle** handle);
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
|
|
|
|
// Drop a column family specified by column_family handle. This call
|
|
|
|
// only records a drop record in the manifest and prevents the column
|
|
|
|
// family from flushing and compacting.
|
|
|
|
virtual Status DropColumnFamily(ColumnFamilyHandle* column_family);
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
|
In-place updates for equal keys and similar sized values
Summary:
Currently for each put, a fresh memory is allocated, and a new entry is added to the memtable with a new sequence number irrespective of whether the key already exists in the memtable. This diff is an attempt to update the value inplace for existing keys. It currently handles a very simple case:
1. Key already exists in the current memtable. Does not inplace update values in immutable memtable or snapshot
2. Latest value type is a 'put' ie kTypeValue
3. New value size is less than existing value, to avoid reallocating memory
TODO: For a put of an existing key, deallocate memory take by values, for other value types till a kTypeValue is found, ie. remove kTypeMerge.
TODO: Update the transaction log, to allow consistent reload of the memtable.
Test Plan: Added a unit test verifying the inplace update. But some other unit tests broken due to invalid sequence number checks. WIll fix them next.
Reviewers: xinyaohu, sumeet, haobo, dhruba
CC: leveldb
Differential Revision: https://reviews.facebook.net/D12423
Automatic commit by arc
11 years ago
|
|
|
// Set the database entry for "key" to "value".
|
|
|
|
// If "key" already exists, it will be overwritten.
|
In-place updates for equal keys and similar sized values
Summary:
Currently for each put, a fresh memory is allocated, and a new entry is added to the memtable with a new sequence number irrespective of whether the key already exists in the memtable. This diff is an attempt to update the value inplace for existing keys. It currently handles a very simple case:
1. Key already exists in the current memtable. Does not inplace update values in immutable memtable or snapshot
2. Latest value type is a 'put' ie kTypeValue
3. New value size is less than existing value, to avoid reallocating memory
TODO: For a put of an existing key, deallocate memory take by values, for other value types till a kTypeValue is found, ie. remove kTypeMerge.
TODO: Update the transaction log, to allow consistent reload of the memtable.
Test Plan: Added a unit test verifying the inplace update. But some other unit tests broken due to invalid sequence number checks. WIll fix them next.
Reviewers: xinyaohu, sumeet, haobo, dhruba
CC: leveldb
Differential Revision: https://reviews.facebook.net/D12423
Automatic commit by arc
11 years ago
|
|
|
// Returns OK on success, and a non-OK status on error.
|
|
|
|
// Note: consider setting options.sync = true.
|
|
|
|
virtual Status Put(const WriteOptions& options,
|
|
|
|
ColumnFamilyHandle* column_family, const Slice& key,
|
|
|
|
const Slice& value) = 0;
|
|
|
|
virtual Status Put(const WriteOptions& options, const Slice& key,
|
|
|
|
const Slice& value) {
|
|
|
|
return Put(options, DefaultColumnFamily(), key, value);
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
}
|
|
|
|
|
|
|
|
// Remove the database entry (if any) for "key". Returns OK on
|
|
|
|
// success, and a non-OK status on error. It is not an error if "key"
|
|
|
|
// did not exist in the database.
|
|
|
|
// Note: consider setting options.sync = true.
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
virtual Status Delete(const WriteOptions& options,
|
|
|
|
ColumnFamilyHandle* column_family,
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
const Slice& key) = 0;
|
|
|
|
virtual Status Delete(const WriteOptions& options, const Slice& key) {
|
|
|
|
return Delete(options, DefaultColumnFamily(), key);
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
}
|
|
|
|
|
Support for SingleDelete()
Summary:
This patch fixes #7460559. It introduces SingleDelete as a new database
operation. This operation can be used to delete keys that were never
overwritten (no put following another put of the same key). If an overwritten
key is single deleted the behavior is undefined. Single deletion of a
non-existent key has no effect but multiple consecutive single deletions are
not allowed (see limitations).
In contrast to the conventional Delete() operation, the deletion entry is
removed along with the value when the two are lined up in a compaction. Note:
The semantics are similar to @igor's prototype that allowed to have this
behavior on the granularity of a column family (
https://reviews.facebook.net/D42093 ). This new patch, however, is more
aggressive when it comes to removing tombstones: It removes the SingleDelete
together with the value whenever there is no snapshot between them while the
older patch only did this when the sequence number of the deletion was older
than the earliest snapshot.
Most of the complex additions are in the Compaction Iterator, all other changes
should be relatively straightforward. The patch also includes basic support for
single deletions in db_stress and db_bench.
Limitations:
- Not compatible with cuckoo hash tables
- Single deletions cannot be used in combination with merges and normal
deletions on the same key (other keys are not affected by this)
- Consecutive single deletions are currently not allowed (and older version of
this patch supported this so it could be resurrected if needed)
Test Plan: make all check
Reviewers: yhchiang, sdong, rven, anthony, yoshinorim, igor
Reviewed By: igor
Subscribers: maykov, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D43179
9 years ago
|
|
|
// Remove the database entry for "key". Requires that the key exists
|
|
|
|
// and was not overwritten. Returns OK on success, and a non-OK status
|
|
|
|
// on error. It is not an error if "key" did not exist in the database.
|
|
|
|
// Note: consider setting options.sync = true.
|
|
|
|
virtual Status SingleDelete(const WriteOptions& options,
|
|
|
|
ColumnFamilyHandle* column_family,
|
|
|
|
const Slice& key) = 0;
|
|
|
|
virtual Status SingleDelete(const WriteOptions& options, const Slice& key) {
|
|
|
|
return SingleDelete(options, DefaultColumnFamily(), key);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Merge the database entry for "key" with "value". Returns OK on success,
|
|
|
|
// and a non-OK status on error. The semantics of this operation is
|
|
|
|
// determined by the user provided merge_operator when opening DB.
|
|
|
|
// Note: consider setting options.sync = true.
|
|
|
|
virtual Status Merge(const WriteOptions& options,
|
|
|
|
ColumnFamilyHandle* column_family, const Slice& key,
|
|
|
|
const Slice& value) = 0;
|
|
|
|
virtual Status Merge(const WriteOptions& options, const Slice& key,
|
|
|
|
const Slice& value) {
|
|
|
|
return Merge(options, DefaultColumnFamily(), key, value);
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
}
|
|
|
|
|
|
|
|
// Apply the specified updates to the database.
|
|
|
|
// If `updates` contains no update, WAL will still be synced if
|
|
|
|
// options.sync=true.
|
|
|
|
// Returns OK on success, non-OK on failure.
|
|
|
|
// Note: consider setting options.sync = true.
|
|
|
|
virtual Status Write(const WriteOptions& options, WriteBatch* updates) = 0;
|
|
|
|
|
|
|
|
// If the database contains an entry for "key" store the
|
|
|
|
// corresponding value in *value and return OK.
|
|
|
|
//
|
|
|
|
// If there is no entry for "key" leave *value unchanged and return
|
|
|
|
// a status for which Status::IsNotFound() returns true.
|
|
|
|
//
|
|
|
|
// May return some other Status on an error.
|
|
|
|
virtual Status Get(const ReadOptions& options,
|
|
|
|
ColumnFamilyHandle* column_family, const Slice& key,
|
|
|
|
std::string* value) = 0;
|
|
|
|
virtual Status Get(const ReadOptions& options, const Slice& key, std::string* value) {
|
|
|
|
return Get(options, DefaultColumnFamily(), key, value);
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
}
|
|
|
|
|
|
|
|
// If keys[i] does not exist in the database, then the i'th returned
|
|
|
|
// status will be one for which Status::IsNotFound() is true, and
|
|
|
|
// (*values)[i] will be set to some arbitrary value (often ""). Otherwise,
|
|
|
|
// the i'th returned status will have Status::ok() true, and (*values)[i]
|
|
|
|
// will store the value associated with keys[i].
|
|
|
|
//
|
|
|
|
// (*values) will always be resized to be the same size as (keys).
|
|
|
|
// Similarly, the number of returned statuses will be the number of keys.
|
|
|
|
// Note: keys will not be "de-duplicated". Duplicate keys will return
|
|
|
|
// duplicate values in order.
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
virtual std::vector<Status> MultiGet(
|
|
|
|
const ReadOptions& options,
|
|
|
|
const std::vector<ColumnFamilyHandle*>& column_family,
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
const std::vector<Slice>& keys, std::vector<std::string>* values) = 0;
|
|
|
|
virtual std::vector<Status> MultiGet(const ReadOptions& options,
|
|
|
|
const std::vector<Slice>& keys,
|
|
|
|
std::vector<std::string>* values) {
|
|
|
|
return MultiGet(options, std::vector<ColumnFamilyHandle*>(
|
|
|
|
keys.size(), DefaultColumnFamily()),
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
keys, values);
|
|
|
|
}
|
|
|
|
|
|
|
|
// If the key definitely does not exist in the database, then this method
|
|
|
|
// returns false, else true. If the caller wants to obtain value when the key
|
|
|
|
// is found in memory, a bool for 'value_found' must be passed. 'value_found'
|
|
|
|
// will be true on return if value has been set properly.
|
|
|
|
// This check is potentially lighter-weight than invoking DB::Get(). One way
|
|
|
|
// to make this lighter weight is to avoid doing any IOs.
|
|
|
|
// Default implementation here returns true and sets 'value_found' to false
|
|
|
|
virtual bool KeyMayExist(const ReadOptions& options,
|
|
|
|
ColumnFamilyHandle* column_family, const Slice& key,
|
|
|
|
std::string* value, bool* value_found = nullptr) {
|
|
|
|
if (value_found != nullptr) {
|
|
|
|
*value_found = false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
virtual bool KeyMayExist(const ReadOptions& options, const Slice& key,
|
|
|
|
std::string* value, bool* value_found = nullptr) {
|
|
|
|
return KeyMayExist(options, DefaultColumnFamily(), key, value, value_found);
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
}
|
|
|
|
|
|
|
|
// Return a heap-allocated iterator over the contents of the database.
|
|
|
|
// The result of NewIterator() is initially invalid (caller must
|
|
|
|
// call one of the Seek methods on the iterator before using it).
|
|
|
|
//
|
|
|
|
// Caller should delete the iterator when it is no longer needed.
|
|
|
|
// The returned iterator should be deleted before this db is deleted.
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
virtual Iterator* NewIterator(const ReadOptions& options,
|
|
|
|
ColumnFamilyHandle* column_family) = 0;
|
|
|
|
virtual Iterator* NewIterator(const ReadOptions& options) {
|
|
|
|
return NewIterator(options, DefaultColumnFamily());
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
}
|
|
|
|
// Returns iterators from a consistent database state across multiple
|
|
|
|
// column families. Iterators are heap allocated and need to be deleted
|
|
|
|
// before the db is deleted
|
|
|
|
virtual Status NewIterators(
|
|
|
|
const ReadOptions& options,
|
|
|
|
const std::vector<ColumnFamilyHandle*>& column_families,
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
std::vector<Iterator*>* iterators) = 0;
|
|
|
|
|
|
|
|
// Return a handle to the current DB state. Iterators created with
|
|
|
|
// this handle will all observe a stable snapshot of the current DB
|
|
|
|
// state. The caller must call ReleaseSnapshot(result) when the
|
|
|
|
// snapshot is no longer needed.
|
Add a new mem-table representation based on cuckoo hash.
Summary:
= Major Changes =
* Add a new mem-table representation, HashCuckooRep, which is based cuckoo hash.
Cuckoo hash uses multiple hash functions. This allows each key to have multiple
possible locations in the mem-table.
- Put: When insert a key, it will try to find whether one of its possible
locations is vacant and store the key. If none of its possible
locations are available, then it will kick out a victim key and
store at that location. The kicked-out victim key will then be
stored at a vacant space of its possible locations or kick-out
another victim. In this diff, the kick-out path (known as
cuckoo-path) is found using BFS, which guarantees to be the shortest.
- Get: Simply tries all possible locations of a key --- this guarantees
worst-case constant time complexity.
- Time complexity: O(1) for Get, and average O(1) for Put if the
fullness of the mem-table is below 80%.
- Default using two hash functions, the number of hash functions used
by the cuckoo-hash may dynamically increase if it fails to find a
short-enough kick-out path.
- Currently, HashCuckooRep does not support iteration and snapshots,
as our current main purpose of this is to optimize point access.
= Minor Changes =
* Add IsSnapshotSupported() to DB to indicate whether the current DB
supports snapshots. If it returns false, then DB::GetSnapshot() will
always return nullptr.
Test Plan:
Run existing tests. Will develop a test specifically for cuckoo hash in
the next diff.
Reviewers: sdong, haobo
Reviewed By: sdong
CC: leveldb, dhruba, igor
Differential Revision: https://reviews.facebook.net/D16155
11 years ago
|
|
|
//
|
|
|
|
// nullptr will be returned if the DB fails to take a snapshot or does
|
|
|
|
// not support snapshot.
|
|
|
|
virtual const Snapshot* GetSnapshot() = 0;
|
|
|
|
|
|
|
|
// Release a previously acquired snapshot. The caller must not
|
|
|
|
// use "snapshot" after this call.
|
|
|
|
virtual void ReleaseSnapshot(const Snapshot* snapshot) = 0;
|
|
|
|
|
|
|
|
// DB implementations can export properties about their state
|
|
|
|
// via this method. If "property" is a valid property understood by this
|
|
|
|
// DB implementation, fills "*value" with its current value and returns
|
|
|
|
// true. Otherwise returns false.
|
|
|
|
//
|
|
|
|
//
|
|
|
|
// Valid property names include:
|
|
|
|
//
|
|
|
|
// "rocksdb.num-files-at-level<N>" - return the number of files at level <N>,
|
|
|
|
// where <N> is an ASCII representation of a level number (e.g. "0").
|
|
|
|
// "rocksdb.stats" - returns a multi-line string that describes statistics
|
|
|
|
// about the internal operation of the DB.
|
|
|
|
// "rocksdb.sstables" - returns a multi-line string that describes all
|
|
|
|
// of the sstables that make up the db contents.
|
|
|
|
// "rocksdb.cfstats"
|
|
|
|
// "rocksdb.dbstats"
|
|
|
|
// "rocksdb.num-immutable-mem-table"
|
|
|
|
// "rocksdb.mem-table-flush-pending"
|
|
|
|
// "rocksdb.compaction-pending" - 1 if at least one compaction is pending
|
|
|
|
// "rocksdb.background-errors" - accumulated number of background errors
|
|
|
|
// "rocksdb.cur-size-active-mem-table"
|
|
|
|
// "rocksdb.size-all-mem-tables"
|
|
|
|
// "rocksdb.num-entries-active-mem-table"
|
|
|
|
// "rocksdb.num-entries-imm-mem-tables"
|
|
|
|
// "rocksdb.num-deletes-active-mem-table"
|
|
|
|
// "rocksdb.num-deletes-imm-mem-tables"
|
|
|
|
// "rocksdb.estimate-num-keys" - estimated keys in the column family
|
|
|
|
// "rocksdb.estimate-table-readers-mem" - estimated memory used for reding
|
|
|
|
// SST tables, that is not counted as a part of block cache.
|
|
|
|
// "rocksdb.is-file-deletions-enabled"
|
|
|
|
// "rocksdb.num-snapshots"
|
|
|
|
// "rocksdb.oldest-snapshot-time"
|
|
|
|
// "rocksdb.num-live-versions" - `version` is an internal data structure.
|
|
|
|
// See version_set.h for details. More live versions often mean more SST
|
|
|
|
// files are held from being deleted, by iterators or unfinished
|
|
|
|
// compactions.
|
|
|
|
// "rocksdb.estimate-live-data-size"
|
|
|
|
// "rocksdb.total-sst-files-size" - total size of all used sst files, this
|
|
|
|
// may slow down online queries if there are too many files.
|
|
|
|
// "rocksdb.base-level"
|
|
|
|
// "rocksdb.estimate-pending-compaction-bytes" - estimated total number of
|
|
|
|
// bytes compaction needs to rewrite the data to get all levels down
|
|
|
|
// to under target size. Not valid for other compactions than
|
|
|
|
// level-based.
|
|
|
|
// "rocksdb.aggregated-table-properties" - returns a string representation
|
|
|
|
// of the aggregated table properties of the target column family.
|
|
|
|
// "rocksdb.aggregated-table-properties-at-level<N>", same as the previous
|
|
|
|
// one but only returns the aggregated table properties of the specified
|
|
|
|
// level "N" at the target column family.
|
|
|
|
// "rocksdb.num-running-compactions" - the number of currently running
|
|
|
|
// compacitons.
|
|
|
|
// "rocksdb.num-running-flushes" - the number of currently running flushes.
|
|
|
|
#ifndef ROCKSDB_LITE
|
|
|
|
struct Properties {
|
|
|
|
static const std::string kNumFilesAtLevelPrefix;
|
|
|
|
static const std::string kStats;
|
|
|
|
static const std::string kSSTables;
|
|
|
|
static const std::string kCFStats;
|
|
|
|
static const std::string kDBStats;
|
|
|
|
static const std::string kNumImmutableMemTable;
|
|
|
|
static const std::string kMemTableFlushPending;
|
|
|
|
static const std::string kNumRunningFlushes;
|
|
|
|
static const std::string kCompactionPending;
|
|
|
|
static const std::string kNumRunningCompactions;
|
|
|
|
static const std::string kBackgroundErrors;
|
|
|
|
static const std::string kCurSizeActiveMemTable;
|
|
|
|
static const std::string kCurSizeAllMemTables;
|
|
|
|
static const std::string kSizeAllMemTables;
|
|
|
|
static const std::string kNumEntriesActiveMemTable;
|
|
|
|
static const std::string kNumEntriesImmMemTables;
|
|
|
|
static const std::string kNumDeletesActiveMemTable;
|
|
|
|
static const std::string kNumDeletesImmMemTables;
|
|
|
|
static const std::string kEstimateNumKeys;
|
|
|
|
static const std::string kEstimateTableReadersMem;
|
|
|
|
static const std::string kIsFileDeletionsEnabled;
|
|
|
|
static const std::string kNumSnapshots;
|
|
|
|
static const std::string kOldestSnapshotTime;
|
|
|
|
static const std::string kNumLiveVersions;
|
|
|
|
static const std::string kEstimateLiveDataSize;
|
|
|
|
static const std::string kTotalSstFilesSize;
|
|
|
|
static const std::string kEstimatePendingCompactionBytes;
|
|
|
|
static const std::string kAggregatedTableProperties;
|
|
|
|
static const std::string kAggregatedTablePropertiesAtLevel;
|
|
|
|
};
|
|
|
|
#endif /* ROCKSDB_LITE */
|
|
|
|
|
|
|
|
virtual bool GetProperty(ColumnFamilyHandle* column_family,
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
const Slice& property, std::string* value) = 0;
|
|
|
|
virtual bool GetProperty(const Slice& property, std::string* value) {
|
|
|
|
return GetProperty(DefaultColumnFamily(), property, value);
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
}
|
|
|
|
|
|
|
|
// Similar to GetProperty(), but only works for a subset of properties whose
|
|
|
|
// return value is an integer. Return the value by integer. Supported
|
|
|
|
// properties:
|
|
|
|
// "rocksdb.num-immutable-mem-table"
|
|
|
|
// "rocksdb.mem-table-flush-pending"
|
|
|
|
// "rocksdb.compaction-pending"
|
|
|
|
// "rocksdb.background-errors"
|
|
|
|
// "rocksdb.cur-size-active-mem-table"
|
|
|
|
// "rocksdb.cur-size-all-mem-tables"
|
|
|
|
// "rocksdb.size-all-mem-tables"
|
|
|
|
// "rocksdb.num-entries-active-mem-table"
|
|
|
|
// "rocksdb.num-entries-imm-mem-tables"
|
|
|
|
// "rocksdb.num-deletes-active-mem-table"
|
|
|
|
// "rocksdb.num-deletes-imm-mem-tables"
|
|
|
|
// "rocksdb.estimate-num-keys"
|
|
|
|
// "rocksdb.estimate-table-readers-mem"
|
|
|
|
// "rocksdb.is-file-deletions-enabled"
|
|
|
|
// "rocksdb.num-snapshots"
|
|
|
|
// "rocksdb.oldest-snapshot-time"
|
|
|
|
// "rocksdb.num-live-versions"
|
|
|
|
// "rocksdb.estimate-live-data-size"
|
|
|
|
// "rocksdb.total-sst-files-size"
|
|
|
|
// "rocksdb.base-level"
|
|
|
|
// "rocksdb.estimate-pending-compaction-bytes"
|
|
|
|
// "rocksdb.num-running-compactions"
|
|
|
|
// "rocksdb.num-running-flushes"
|
|
|
|
virtual bool GetIntProperty(ColumnFamilyHandle* column_family,
|
|
|
|
const Slice& property, uint64_t* value) = 0;
|
|
|
|
virtual bool GetIntProperty(const Slice& property, uint64_t* value) {
|
|
|
|
return GetIntProperty(DefaultColumnFamily(), property, value);
|
|
|
|
}
|
|
|
|
|
|
|
|
// For each i in [0,n-1], store in "sizes[i]", the approximate
|
|
|
|
// file system space used by keys in "[range[i].start .. range[i].limit)".
|
|
|
|
//
|
|
|
|
// Note that the returned sizes measure file system space usage, so
|
|
|
|
// if the user data compresses by a factor of ten, the returned
|
|
|
|
// sizes will be one-tenth the size of the corresponding user data size.
|
|
|
|
//
|
|
|
|
// If include_memtable is set to true, then the result will also
|
|
|
|
// include those recently written data in the mem-tables if
|
|
|
|
// the mem-table type supports it.
|
|
|
|
virtual void GetApproximateSizes(ColumnFamilyHandle* column_family,
|
|
|
|
const Range* range, int n, uint64_t* sizes,
|
|
|
|
bool include_memtable = false) = 0;
|
|
|
|
virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes,
|
|
|
|
bool include_memtable = false) {
|
|
|
|
GetApproximateSizes(DefaultColumnFamily(), range, n, sizes,
|
|
|
|
include_memtable);
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
}
|
|
|
|
|
|
|
|
// Compact the underlying storage for the key range [*begin,*end].
|
|
|
|
// The actual compaction interval might be superset of [*begin, *end].
|
|
|
|
// In particular, deleted and overwritten versions are discarded,
|
|
|
|
// and the data is rearranged to reduce the cost of operations
|
|
|
|
// needed to access the data. This operation should typically only
|
|
|
|
// be invoked by users who understand the underlying implementation.
|
|
|
|
//
|
|
|
|
// begin==nullptr is treated as a key before all keys in the database.
|
|
|
|
// end==nullptr is treated as a key after all keys in the database.
|
|
|
|
// Therefore the following call will compact the entire database:
|
|
|
|
// db->CompactRange(options, nullptr, nullptr);
|
|
|
|
// Note that after the entire database is compacted, all data are pushed
|
|
|
|
// down to the last level containing any data. If the total data size after
|
|
|
|
// compaction is reduced, that level might not be appropriate for hosting all
|
|
|
|
// the files. In this case, client could set options.change_level to true, to
|
|
|
|
// move the files back to the minimum level capable of holding the data set
|
|
|
|
// or a given level (specified by non-negative options.target_level).
|
|
|
|
virtual Status CompactRange(const CompactRangeOptions& options,
|
|
|
|
ColumnFamilyHandle* column_family,
|
|
|
|
const Slice* begin, const Slice* end) = 0;
|
|
|
|
virtual Status CompactRange(const CompactRangeOptions& options,
|
|
|
|
const Slice* begin, const Slice* end) {
|
|
|
|
return CompactRange(options, DefaultColumnFamily(), begin, end);
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
}
|
|
|
|
|
|
|
|
#if defined(__GNUC__) || defined(__clang__)
|
|
|
|
__attribute__((deprecated))
|
|
|
|
#elif _WIN32
|
|
|
|
__declspec(deprecated)
|
|
|
|
#endif
|
|
|
|
virtual Status
|
|
|
|
CompactRange(ColumnFamilyHandle* column_family, const Slice* begin,
|
|
|
|
const Slice* end, bool change_level = false,
|
|
|
|
int target_level = -1, uint32_t target_path_id = 0) {
|
|
|
|
CompactRangeOptions options;
|
|
|
|
options.change_level = change_level;
|
|
|
|
options.target_level = target_level;
|
|
|
|
options.target_path_id = target_path_id;
|
|
|
|
return CompactRange(options, column_family, begin, end);
|
|
|
|
}
|
|
|
|
#if defined(__GNUC__) || defined(__clang__)
|
|
|
|
__attribute__((deprecated))
|
|
|
|
#elif _WIN32
|
|
|
|
__declspec(deprecated)
|
|
|
|
#endif
|
|
|
|
virtual Status
|
|
|
|
CompactRange(const Slice* begin, const Slice* end,
|
|
|
|
bool change_level = false, int target_level = -1,
|
|
|
|
uint32_t target_path_id = 0) {
|
|
|
|
CompactRangeOptions options;
|
|
|
|
options.change_level = change_level;
|
|
|
|
options.target_level = target_level;
|
|
|
|
options.target_path_id = target_path_id;
|
|
|
|
return CompactRange(options, DefaultColumnFamily(), begin, end);
|
|
|
|
}
|
|
|
|
|
|
|
|
virtual Status SetOptions(ColumnFamilyHandle* column_family,
|
|
|
|
const std::unordered_map<std::string, std::string>& new_options) {
|
|
|
|
return Status::NotSupported("Not implemented");
|
|
|
|
}
|
|
|
|
virtual Status SetOptions(
|
|
|
|
const std::unordered_map<std::string, std::string>& new_options) {
|
|
|
|
return SetOptions(DefaultColumnFamily(), new_options);
|
|
|
|
}
|
|
|
|
|
|
|
|
// CompactFiles() inputs a list of files specified by file numbers and
|
|
|
|
// compacts them to the specified level. Note that the behavior is different
|
|
|
|
// from CompactRange() in that CompactFiles() performs the compaction job
|
|
|
|
// using the CURRENT thread.
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
10 years ago
|
|
|
//
|
|
|
|
// @see GetDataBaseMetaData
|
|
|
|
// @see GetColumnFamilyMetaData
|
|
|
|
virtual Status CompactFiles(
|
|
|
|
const CompactionOptions& compact_options,
|
|
|
|
ColumnFamilyHandle* column_family,
|
|
|
|
const std::vector<std::string>& input_file_names,
|
|
|
|
const int output_level, const int output_path_id = -1) = 0;
|
|
|
|
|
|
|
|
virtual Status CompactFiles(
|
|
|
|
const CompactionOptions& compact_options,
|
|
|
|
const std::vector<std::string>& input_file_names,
|
|
|
|
const int output_level, const int output_path_id = -1) {
|
|
|
|
return CompactFiles(compact_options, DefaultColumnFamily(),
|
|
|
|
input_file_names, output_level, output_path_id);
|
|
|
|
}
|
|
|
|
|
|
|
|
// This function will wait until all currently running background processes
|
|
|
|
// finish. After it returns, no background process will be run until
|
|
|
|
// UnblockBackgroundWork is called
|
|
|
|
virtual Status PauseBackgroundWork() = 0;
|
|
|
|
virtual Status ContinueBackgroundWork() = 0;
|
|
|
|
|
|
|
|
// Number of levels used for this DB.
|
|
|
|
virtual int NumberLevels(ColumnFamilyHandle* column_family) = 0;
|
|
|
|
virtual int NumberLevels() { return NumberLevels(DefaultColumnFamily()); }
|
|
|
|
|
|
|
|
// Maximum level to which a new compacted memtable is pushed if it
|
|
|
|
// does not create overlap.
|
|
|
|
virtual int MaxMemCompactionLevel(ColumnFamilyHandle* column_family) = 0;
|
|
|
|
virtual int MaxMemCompactionLevel() {
|
|
|
|
return MaxMemCompactionLevel(DefaultColumnFamily());
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
}
|
|
|
|
|
|
|
|
// Number of files in level-0 that would stop writes.
|
|
|
|
virtual int Level0StopWriteTrigger(ColumnFamilyHandle* column_family) = 0;
|
|
|
|
virtual int Level0StopWriteTrigger() {
|
|
|
|
return Level0StopWriteTrigger(DefaultColumnFamily());
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
}
|
|
|
|
|
[RocksDB] BackupableDB
Summary:
In this diff I present you BackupableDB v1. You can easily use it to backup your DB and it will do incremental snapshots for you.
Let's first describe how you would use BackupableDB. It's inheriting StackableDB interface so you can easily construct it with your DB object -- it will add a method RollTheSnapshot() to the DB object. When you call RollTheSnapshot(), current snapshot of the DB will be stored in the backup dir. To restore, you can just call RestoreDBFromBackup() on a BackupableDB (which is a static method) and it will restore all files from the backup dir. In the next version, it will even support automatic backuping every X minutes.
There are multiple things you can configure:
1. backup_env and db_env can be different, which is awesome because then you can easily backup to HDFS or wherever you feel like.
2. sync - if true, it *guarantees* backup consistency on machine reboot
3. number of snapshots to keep - this will keep last N snapshots around if you want, for some reason, be able to restore from an earlier snapshot. All the backuping is done in incremental fashion - if we already have 00010.sst, we will not copy it again. *IMPORTANT* -- This is based on assumption that 00010.sst never changes - two files named 00010.sst from the same DB will always be exactly the same. Is this true? I always copy manifest, current and log files.
4. You can decide if you want to flush the memtables before you backup, or you're fine with backing up the log files -- either way, you get a complete and consistent view of the database at a time of backup.
5. More things you can find in BackupableDBOptions
Here is the directory structure I use:
backup_dir/CURRENT_SNAPSHOT - just 4 bytes holding the latest snapshot
0, 1, 2, ... - files containing serialized version of each snapshot - containing a list of files
files/*.sst - sst files shared between snapshots - if one snapshot references 00010.sst and another one needs to backup it from the DB, it will just reference the same file
files/ 0/, 1/, 2/, ... - snapshot directories containing private snapshot files - current, manifest and log files
All the files are ref counted and deleted immediatelly when they get out of scope.
Some other stuff in this diff:
1. Added GetEnv() method to the DB. Discussed with @haobo and we agreed that it seems right thing to do.
2. Fixed StackableDB interface. The way it was set up before, I was not able to implement BackupableDB.
Test Plan:
I have a unittest, but please don't look at this yet. I just hacked it up to help me with debugging. I will write a lot of good tests and update the diff.
Also, `make asan_check`
Reviewers: dhruba, haobo, emayanke
Reviewed By: dhruba
CC: leveldb, haobo
Differential Revision: https://reviews.facebook.net/D14295
11 years ago
|
|
|
// Get DB name -- the exact same name that was provided as an argument to
|
|
|
|
// DB::Open()
|
|
|
|
virtual const std::string& GetName() const = 0;
|
|
|
|
|
|
|
|
// Get Env object from the DB
|
|
|
|
virtual Env* GetEnv() const = 0;
|
|
|
|
|
|
|
|
// Get DB Options that we use. During the process of opening the
|
|
|
|
// column family, the options provided when calling DB::Open() or
|
|
|
|
// DB::CreateColumnFamily() will have been "sanitized" and transformed
|
|
|
|
// in an implementation-defined manner.
|
|
|
|
virtual const Options& GetOptions(ColumnFamilyHandle* column_family)
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
const = 0;
|
|
|
|
virtual const Options& GetOptions() const {
|
|
|
|
return GetOptions(DefaultColumnFamily());
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
}
|
|
|
|
|
|
|
|
virtual const DBOptions& GetDBOptions() const = 0;
|
|
|
|
|
|
|
|
// Flush all mem-table data.
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
virtual Status Flush(const FlushOptions& options,
|
|
|
|
ColumnFamilyHandle* column_family) = 0;
|
|
|
|
virtual Status Flush(const FlushOptions& options) {
|
|
|
|
return Flush(options, DefaultColumnFamily());
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
}
|
|
|
|
|
[wal changes 3/3] method in DB to sync WAL without blocking writers
Summary:
Subj. We really need this feature.
Previous diff D40899 has most of the changes to make this possible, this diff just adds the method.
Test Plan: `make check`, the new test fails without this diff; ran with ASAN, TSAN and valgrind.
Reviewers: igor, rven, IslamAbdelRahman, anthony, kradhakrishnan, tnovak, yhchiang, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, maykov, hermanlee4, yoshinorim, tnovak, dhruba
Differential Revision: https://reviews.facebook.net/D40905
9 years ago
|
|
|
// Sync the wal. Note that Write() followed by SyncWAL() is not exactly the
|
|
|
|
// same as Write() with sync=true: in the latter case the changes won't be
|
|
|
|
// visible until the sync is done.
|
|
|
|
// Currently only works if allow_mmap_writes = false in Options.
|
|
|
|
virtual Status SyncWAL() = 0;
|
|
|
|
|
|
|
|
// The sequence number of the most recent transaction.
|
|
|
|
virtual SequenceNumber GetLatestSequenceNumber() const = 0;
|
|
|
|
|
|
|
|
#ifndef ROCKSDB_LITE
|
|
|
|
|
|
|
|
// Prevent file deletions. Compactions will continue to occur,
|
|
|
|
// but no obsolete files will be deleted. Calling this multiple
|
|
|
|
// times have the same effect as calling it once.
|
|
|
|
virtual Status DisableFileDeletions() = 0;
|
|
|
|
|
|
|
|
// Allow compactions to delete obsolete files.
|
|
|
|
// If force == true, the call to EnableFileDeletions() will guarantee that
|
|
|
|
// file deletions are enabled after the call, even if DisableFileDeletions()
|
|
|
|
// was called multiple times before.
|
|
|
|
// If force == false, EnableFileDeletions will only enable file deletion
|
|
|
|
// after it's been called at least as many times as DisableFileDeletions(),
|
|
|
|
// enabling the two methods to be called by two threads concurrently without
|
|
|
|
// synchronization -- i.e., file deletions will be enabled only after both
|
|
|
|
// threads call EnableFileDeletions()
|
|
|
|
virtual Status EnableFileDeletions(bool force = true) = 0;
|
|
|
|
|
|
|
|
// GetLiveFiles followed by GetSortedWalFiles can generate a lossless backup
|
|
|
|
|
|
|
|
// Retrieve the list of all files in the database. The files are
|
|
|
|
// relative to the dbname and are not absolute paths. The valid size of the
|
|
|
|
// manifest file is returned in manifest_file_size. The manifest file is an
|
|
|
|
// ever growing file, but only the portion specified by manifest_file_size is
|
|
|
|
// valid for this snapshot.
|
|
|
|
// Setting flush_memtable to true does Flush before recording the live files.
|
|
|
|
// Setting flush_memtable to false is useful when we don't want to wait for
|
|
|
|
// flush which may have to wait for compaction to complete taking an
|
|
|
|
// indeterminate time.
|
|
|
|
//
|
|
|
|
// In case you have multiple column families, even if flush_memtable is true,
|
|
|
|
// you still need to call GetSortedWalFiles after GetLiveFiles to compensate
|
|
|
|
// for new data that arrived to already-flushed column families while other
|
|
|
|
// column families were flushing
|
|
|
|
virtual Status GetLiveFiles(std::vector<std::string>&,
|
|
|
|
uint64_t* manifest_file_size,
|
|
|
|
bool flush_memtable = true) = 0;
|
|
|
|
|
|
|
|
// Retrieve the sorted list of all wal files with earliest file first
|
|
|
|
virtual Status GetSortedWalFiles(VectorLogPtr& files) = 0;
|
|
|
|
|
|
|
|
// Sets iter to an iterator that is positioned at a write-batch containing
|
|
|
|
// seq_number. If the sequence number is non existent, it returns an iterator
|
|
|
|
// at the first available seq_no after the requested seq_no
|
|
|
|
// Returns Status::OK if iterator is valid
|
|
|
|
// Must set WAL_ttl_seconds or WAL_size_limit_MB to large values to
|
|
|
|
// use this api, else the WAL files will get
|
|
|
|
// cleared aggressively and the iterator might keep getting invalid before
|
|
|
|
// an update is read.
|
|
|
|
virtual Status GetUpdatesSince(
|
|
|
|
SequenceNumber seq_number, unique_ptr<TransactionLogIterator>* iter,
|
|
|
|
const TransactionLogIterator::ReadOptions&
|
|
|
|
read_options = TransactionLogIterator::ReadOptions()) = 0;
|
|
|
|
|
|
|
|
// Windows API macro interference
|
|
|
|
#undef DeleteFile
|
|
|
|
// Delete the file name from the db directory and update the internal state to
|
|
|
|
// reflect that. Supports deletion of sst and log files only. 'name' must be
|
|
|
|
// path relative to the db directory. eg. 000001.sst, /archive/000003.log
|
|
|
|
virtual Status DeleteFile(std::string name) = 0;
|
|
|
|
|
|
|
|
// Returns a list of all table files with their level, start key
|
|
|
|
// and end key
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
11 years ago
|
|
|
virtual void GetLiveFilesMetaData(std::vector<LiveFileMetaData>* metadata) {}
|
|
|
|
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
10 years ago
|
|
|
// Obtains the meta data of the specified column family of the DB.
|
|
|
|
// Status::NotFound() will be returned if the current DB does not have
|
|
|
|
// any column family match the specified name.
|
|
|
|
//
|
|
|
|
// If cf_name is not specified, then the metadata of the default
|
|
|
|
// column family will be returned.
|
|
|
|
virtual void GetColumnFamilyMetaData(
|
|
|
|
ColumnFamilyHandle* column_family,
|
|
|
|
ColumnFamilyMetaData* metadata) {}
|
|
|
|
|
|
|
|
// Get the metadata of the default column family.
|
|
|
|
void GetColumnFamilyMetaData(
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
10 years ago
|
|
|
ColumnFamilyMetaData* metadata) {
|
|
|
|
GetColumnFamilyMetaData(DefaultColumnFamily(), metadata);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Load table file located at "file_path" into "column_family", a pointer to
|
|
|
|
// ExternalSstFileInfo can be used instead of "file_path" to do a blind add
|
|
|
|
// that wont need to read the file, move_file can be set to true to
|
|
|
|
// move the file instead of copying it.
|
|
|
|
//
|
|
|
|
// Current Requirements:
|
|
|
|
// (1) Memtable is empty.
|
|
|
|
// (2) All existing files (if any) have sequence number = 0.
|
|
|
|
// (3) Key range in loaded table file don't overlap with existing
|
|
|
|
// files key ranges.
|
|
|
|
// (4) No other writes happen during AddFile call, otherwise
|
|
|
|
// DB may get corrupted.
|
|
|
|
// (5) Database have at least 2 levels.
|
|
|
|
virtual Status AddFile(ColumnFamilyHandle* column_family,
|
|
|
|
const std::string& file_path,
|
|
|
|
bool move_file = false) = 0;
|
|
|
|
virtual Status AddFile(const std::string& file_path, bool move_file = false) {
|
|
|
|
return AddFile(DefaultColumnFamily(), file_path, move_file);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Load table file with information "file_info" into "column_family"
|
|
|
|
virtual Status AddFile(ColumnFamilyHandle* column_family,
|
|
|
|
const ExternalSstFileInfo* file_info,
|
|
|
|
bool move_file = false) = 0;
|
|
|
|
virtual Status AddFile(const ExternalSstFileInfo* file_info,
|
|
|
|
bool move_file = false) {
|
|
|
|
return AddFile(DefaultColumnFamily(), file_info, move_file);
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif // ROCKSDB_LITE
|
|
|
|
|
|
|
|
// Sets the globally unique ID created at database creation time by invoking
|
|
|
|
// Env::GenerateUniqueId(), in identity. Returns Status::OK if identity could
|
|
|
|
// be set properly
|
|
|
|
virtual Status GetDbIdentity(std::string& identity) const = 0;
|
|
|
|
|
|
|
|
// Returns default column family handle
|
|
|
|
virtual ColumnFamilyHandle* DefaultColumnFamily() const = 0;
|
|
|
|
|
|
|
|
#ifndef ROCKSDB_LITE
|
|
|
|
virtual Status GetPropertiesOfAllTables(ColumnFamilyHandle* column_family,
|
|
|
|
TablePropertiesCollection* props) = 0;
|
|
|
|
virtual Status GetPropertiesOfAllTables(TablePropertiesCollection* props) {
|
|
|
|
return GetPropertiesOfAllTables(DefaultColumnFamily(), props);
|
|
|
|
}
|
|
|
|
#endif // ROCKSDB_LITE
|
|
|
|
|
|
|
|
// Needed for StackableDB
|
|
|
|
virtual DB* GetRootDB() { return this; }
|
|
|
|
|
|
|
|
private:
|
|
|
|
// No copying allowed
|
|
|
|
DB(const DB&);
|
|
|
|
void operator=(const DB&);
|
|
|
|
};
|
|
|
|
|
|
|
|
// Destroy the contents of the specified database.
|
|
|
|
// Be very careful using this method.
|
|
|
|
Status DestroyDB(const std::string& name, const Options& options);
|
|
|
|
|
|
|
|
#ifndef ROCKSDB_LITE
|
|
|
|
// If a DB cannot be opened, you may attempt to call this method to
|
|
|
|
// resurrect as much of the contents of the database as possible.
|
|
|
|
// Some data may be lost, so be careful when calling this function
|
|
|
|
// on a database that contains important information.
|
|
|
|
Status RepairDB(const std::string& dbname, const Options& options);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
} // namespace rocksdb
|
|
|
|
|
|
|
|
#endif // STORAGE_ROCKSDB_INCLUDE_DB_H_
|