Summary: This is a rough sketch of our new document API. Would like to get some thoughts and comments about the high-level architecture and API. I didn't optimize for performance at all. Leaving some low-hanging fruit so that we can be happy when we fix them! :) Currently, bunch of features are not supported at all. Indexes can be only specified when creating database. There is no query planner whatsoever. This will all be added in due time. Test Plan: Added a simple unit test Reviewers: haobo, yhchiang, dhruba, sdong, ljin Reviewed By: ljin Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D18747main
parent
222cf2555a
commit
f0a8be253e
@ -0,0 +1,149 @@ |
||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
|
||||
#pragma once |
||||
#ifndef ROCKSDB_LITE |
||||
|
||||
#include <string> |
||||
#include <vector> |
||||
|
||||
#include "utilities/stackable_db.h" |
||||
#include "utilities/json_document.h" |
||||
#include "rocksdb/db.h" |
||||
|
||||
namespace rocksdb { |
||||
|
||||
// IMPORTANT: DocumentDB is a work in progress. It is unstable and we might
|
||||
// change the API without warning. Talk to RocksDB team before using this in
|
||||
// production ;)
|
||||
|
||||
// DocumentDB is a layer on top of RocksDB that provides a very simple JSON API.
|
||||
// When creating a DB, you specify a list of indexes you want to keep on your
|
||||
// data. You can insert a JSON document to the DB, which is automatically
|
||||
// indexed. Every document added to the DB needs to have "_id" field which is
|
||||
// automatically indexed and is an unique primary key. All other indexes are
|
||||
// non-unique.
|
||||
|
||||
// NOTE: field names in the JSON are NOT allowed to start with '$' or
|
||||
// contain '.'. We don't currently enforce that rule, but will start behaving
|
||||
// badly.
|
||||
|
||||
// Cursor is what you get as a result of executing query. To get all
|
||||
// results from a query, call Next() on a Cursor while Valid() returns true
|
||||
class Cursor { |
||||
public: |
||||
Cursor() = default; |
||||
virtual ~Cursor() {} |
||||
|
||||
virtual bool Valid() const = 0; |
||||
virtual void Next() = 0; |
||||
// Lifecycle of the returned JSONDocument is until the next Next() call
|
||||
virtual const JSONDocument& document() const = 0; |
||||
virtual Status status() const = 0; |
||||
|
||||
private: |
||||
// No copying allowed
|
||||
Cursor(const Cursor&); |
||||
void operator=(const Cursor&); |
||||
}; |
||||
|
||||
struct DocumentDBOptions { |
||||
int background_threads = 4; |
||||
uint64_t memtable_size = 128 * 1024 * 1024; // 128 MB
|
||||
uint64_t cache_size = 1 * 1024 * 1024 * 1024; // 1 GB
|
||||
}; |
||||
|
||||
// TODO(icanadi) Add `JSONDocument* info` parameter to all calls that can be
|
||||
// used by the caller to get more information about the call execution (number
|
||||
// of dropped records, number of updated records, etc.)
|
||||
class DocumentDB : public StackableDB { |
||||
public: |
||||
struct IndexDescriptor { |
||||
// Currently, you can only define an index on a single field. To specify an
|
||||
// index on a field X, set index description to JSON "{X: 1}"
|
||||
// Currently the value needs to be 1, which means ascending.
|
||||
// In the future, we plan to also support indexes on multiple keys, where
|
||||
// you could mix ascending sorting (1) with descending sorting indexes (-1)
|
||||
JSONDocument* description; |
||||
std::string name; |
||||
}; |
||||
|
||||
// Open DocumentDB with specified indexes. The list of indexes has to be
|
||||
// complete, i.e. include all indexes present in the DB, except the primary
|
||||
// key index.
|
||||
// Otherwise, Open() will return an error
|
||||
static Status Open(const DocumentDBOptions& options, const std::string& name, |
||||
const std::vector<IndexDescriptor>& indexes, |
||||
DocumentDB** db, bool read_only = false); |
||||
|
||||
explicit DocumentDB(DB* db) : StackableDB(db) {} |
||||
|
||||
// Create a new index. It will stop all writes for the duration of the call.
|
||||
// All current documents in the DB are scanned and corresponding index entries
|
||||
// are created
|
||||
virtual Status CreateIndex(const WriteOptions& write_options, |
||||
const IndexDescriptor& index) = 0; |
||||
|
||||
// Drop an index. Client is responsible to make sure that index is not being
|
||||
// used by currently executing queries
|
||||
virtual Status DropIndex(const std::string& name) = 0; |
||||
|
||||
// Insert a document to the DB. The document needs to have a primary key "_id"
|
||||
// which can either be a string or an integer. Otherwise the write will fail
|
||||
// with InvalidArgument.
|
||||
virtual Status Insert(const WriteOptions& options, |
||||
const JSONDocument& document) = 0; |
||||
|
||||
// Deletes all documents matching a filter atomically
|
||||
virtual Status Remove(const ReadOptions& read_options, |
||||
const WriteOptions& write_options, |
||||
const JSONDocument& query) = 0; |
||||
|
||||
// Does this sequence of operations:
|
||||
// 1. Find all documents matching a filter
|
||||
// 2. For all documents, atomically:
|
||||
// 2.1. apply the update operators
|
||||
// 2.2. update the secondary indexes
|
||||
//
|
||||
// Currently only $set update operator is supported.
|
||||
// Syntax is: {$set: {key1: value1, key2: value2, etc...}}
|
||||
// This operator will change a document's key1 field to value1, key2 to
|
||||
// value2, etc. New values will be set even if a document didn't have an entry
|
||||
// for the specified key.
|
||||
//
|
||||
// You can not change a primary key of a document.
|
||||
//
|
||||
// Update example: Update({id: {$gt: 5}, $index: id}, {$set: {enabled: true}})
|
||||
virtual Status Update(const ReadOptions& read_options, |
||||
const WriteOptions& write_options, |
||||
const JSONDocument& filter, |
||||
const JSONDocument& updates) = 0; |
||||
|
||||
// query has to be an array in which every element is an operator. Currently
|
||||
// only $filter operator is supported. Syntax of $filter operator is:
|
||||
// {$filter: {key1: condition1, key2: condition2, etc.}} where conditions can
|
||||
// be either:
|
||||
// 1) a single value in which case the condition is equality condition, or
|
||||
// 2) a defined operators, like {$gt: 4}, which will match all documents that
|
||||
// have key greater than 4.
|
||||
//
|
||||
// Supported operators are:
|
||||
// 1) $gt -- greater than
|
||||
// 2) $gte -- greater than or equal
|
||||
// 3) $lt -- less than
|
||||
// 4) $lte -- less than or equal
|
||||
// If you want the filter to use an index, you need to specify it like this:
|
||||
// {$filter: {...(conditions)..., $index: index_name}}
|
||||
//
|
||||
// Example query:
|
||||
// * [{$filter: {name: John, age: {$gte: 18}, $index: age}}]
|
||||
// will return all Johns whose age is greater or equal to 18 and it will use
|
||||
// index "age" to satisfy the query.
|
||||
virtual Cursor* Query(const ReadOptions& read_options, |
||||
const JSONDocument& query) = 0; |
||||
}; |
||||
|
||||
} // namespace rocksdb
|
||||
#endif // ROCKSDB_LITE
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,262 @@ |
||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
|
||||
#include <algorithm> |
||||
|
||||
#include "utilities/json_document.h" |
||||
#include "utilities/document_db.h" |
||||
#include "util/testharness.h" |
||||
#include "util/testutil.h" |
||||
|
||||
namespace rocksdb { |
||||
|
||||
class DocumentDBTest { |
||||
public: |
||||
DocumentDBTest() { |
||||
dbname_ = test::TmpDir() + "/document_db_test"; |
||||
DestroyDB(dbname_, Options()); |
||||
} |
||||
~DocumentDBTest() { |
||||
delete db_; |
||||
DestroyDB(dbname_, Options()); |
||||
} |
||||
|
||||
void AssertCursorIDs(Cursor* cursor, std::vector<int64_t> expected) { |
||||
std::vector<int64_t> got; |
||||
while (cursor->Valid()) { |
||||
ASSERT_TRUE(cursor->Valid()); |
||||
ASSERT_TRUE(cursor->document().Contains("_id")); |
||||
got.push_back(cursor->document()["_id"].GetInt64()); |
||||
cursor->Next(); |
||||
} |
||||
std::sort(expected.begin(), expected.end()); |
||||
std::sort(got.begin(), got.end()); |
||||
ASSERT_TRUE(got == expected); |
||||
} |
||||
|
||||
// converts ' to ", so that we don't have to escape " all over the place
|
||||
std::string ConvertQuotes(const std::string& input) { |
||||
std::string output; |
||||
for (auto x : input) { |
||||
if (x == '\'') { |
||||
output.push_back('\"'); |
||||
} else { |
||||
output.push_back(x); |
||||
} |
||||
} |
||||
return output; |
||||
} |
||||
|
||||
void CreateIndexes(std::vector<DocumentDB::IndexDescriptor> indexes) { |
||||
for (auto i : indexes) { |
||||
ASSERT_OK(db_->CreateIndex(WriteOptions(), i)); |
||||
} |
||||
} |
||||
|
||||
JSONDocument* Parse(const std::string doc) { |
||||
return JSONDocument::ParseJSON(ConvertQuotes(doc).c_str()); |
||||
} |
||||
|
||||
std::string dbname_; |
||||
DocumentDB* db_; |
||||
}; |
||||
|
||||
TEST(DocumentDBTest, SimpleQueryTest) { |
||||
DocumentDBOptions options; |
||||
DocumentDB::IndexDescriptor index; |
||||
index.description = Parse("{'name': 1}"); |
||||
index.name = "name_index"; |
||||
|
||||
ASSERT_OK(DocumentDB::Open(options, dbname_, {}, &db_)); |
||||
CreateIndexes({index}); |
||||
delete db_; |
||||
// now there is index present
|
||||
ASSERT_OK(DocumentDB::Open(options, dbname_, {index}, &db_)); |
||||
delete index.description; |
||||
|
||||
std::vector<std::string> json_objects = { |
||||
"{'_id': 1, 'name': 'One'}", "{'_id': 2, 'name': 'Two'}", |
||||
"{'_id': 3, 'name': 'Three'}", "{'_id': 4, 'name': 'Four'}"}; |
||||
|
||||
for (auto& json : json_objects) { |
||||
std::unique_ptr<JSONDocument> document(Parse(json)); |
||||
ASSERT_TRUE(document.get() != nullptr); |
||||
ASSERT_OK(db_->Insert(WriteOptions(), *document)); |
||||
} |
||||
|
||||
// inserting a document with existing primary key should return failure
|
||||
{ |
||||
std::unique_ptr<JSONDocument> document(Parse(json_objects[0])); |
||||
ASSERT_TRUE(document.get() != nullptr); |
||||
Status s = db_->Insert(WriteOptions(), *document); |
||||
ASSERT_TRUE(s.IsInvalidArgument()); |
||||
} |
||||
|
||||
// find equal to "Two"
|
||||
{ |
||||
std::unique_ptr<JSONDocument> query( |
||||
Parse("[{'$filter': {'name': 'Two', '$index': 'name_index'}}]")); |
||||
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query)); |
||||
AssertCursorIDs(cursor.get(), {2}); |
||||
} |
||||
|
||||
// find less than "Three"
|
||||
{ |
||||
std::unique_ptr<JSONDocument> query(Parse( |
||||
"[{'$filter': {'name': {'$lt': 'Three'}, '$index': " |
||||
"'name_index'}}]")); |
||||
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query)); |
||||
|
||||
AssertCursorIDs(cursor.get(), {1, 4}); |
||||
} |
||||
|
||||
// find less than "Three" without index
|
||||
{ |
||||
std::unique_ptr<JSONDocument> query( |
||||
Parse("[{'$filter': {'name': {'$lt': 'Three'} }}]")); |
||||
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query)); |
||||
AssertCursorIDs(cursor.get(), {1, 4}); |
||||
} |
||||
|
||||
// remove less or equal to "Three"
|
||||
{ |
||||
std::unique_ptr<JSONDocument> query( |
||||
Parse("{'name': {'$lte': 'Three'}, '$index': 'name_index'}")); |
||||
ASSERT_OK(db_->Remove(ReadOptions(), WriteOptions(), *query)); |
||||
} |
||||
|
||||
// find all -- only "Two" left, everything else should be deleted
|
||||
{ |
||||
std::unique_ptr<JSONDocument> query(Parse("[]")); |
||||
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query)); |
||||
AssertCursorIDs(cursor.get(), {2}); |
||||
} |
||||
} |
||||
|
||||
TEST(DocumentDBTest, ComplexQueryTest) { |
||||
DocumentDBOptions options; |
||||
DocumentDB::IndexDescriptor priority_index; |
||||
priority_index.description = Parse("{'priority': 1}"); |
||||
priority_index.name = "priority"; |
||||
DocumentDB::IndexDescriptor job_name_index; |
||||
job_name_index.description = Parse("{'job_name': 1}"); |
||||
job_name_index.name = "job_name"; |
||||
DocumentDB::IndexDescriptor progress_index; |
||||
progress_index.description = Parse("{'progress': 1}"); |
||||
progress_index.name = "progress"; |
||||
|
||||
ASSERT_OK(DocumentDB::Open(options, dbname_, {}, &db_)); |
||||
CreateIndexes({priority_index, progress_index}); |
||||
delete priority_index.description; |
||||
delete progress_index.description; |
||||
|
||||
std::vector<std::string> json_objects = { |
||||
"{'_id': 1, 'job_name': 'play', 'priority': 10, 'progress': 14.2}", |
||||
"{'_id': 2, 'job_name': 'white', 'priority': 2, 'progress': 45.1}", |
||||
"{'_id': 3, 'job_name': 'straw', 'priority': 5, 'progress': 83.2}", |
||||
"{'_id': 4, 'job_name': 'temporary', 'priority': 3, 'progress': 14.9}", |
||||
"{'_id': 5, 'job_name': 'white', 'priority': 4, 'progress': 44.2}", |
||||
"{'_id': 6, 'job_name': 'tea', 'priority': 1, 'progress': 12.4}", |
||||
"{'_id': 7, 'job_name': 'delete', 'priority': 2, 'progress': 77.54}", |
||||
"{'_id': 8, 'job_name': 'rock', 'priority': 3, 'progress': 93.24}", |
||||
"{'_id': 9, 'job_name': 'steady', 'priority': 3, 'progress': 9.1}", |
||||
"{'_id': 10, 'job_name': 'white', 'priority': 1, 'progress': 61.4}", |
||||
"{'_id': 11, 'job_name': 'who', 'priority': 4, 'progress': 39.41}", }; |
||||
|
||||
// add index on the fly!
|
||||
CreateIndexes({job_name_index}); |
||||
delete job_name_index.description; |
||||
|
||||
for (auto& json : json_objects) { |
||||
std::unique_ptr<JSONDocument> document(Parse(json)); |
||||
ASSERT_TRUE(document != nullptr); |
||||
ASSERT_OK(db_->Insert(WriteOptions(), *document)); |
||||
} |
||||
|
||||
// 2 < priority < 4 AND progress > 10.0, index priority
|
||||
{ |
||||
std::unique_ptr<JSONDocument> query(Parse( |
||||
"[{'$filter': {'priority': {'$lt': 4, '$gt': 2}, 'progress': {'$gt': " |
||||
"10.0}, '$index': 'priority'}}]")); |
||||
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query)); |
||||
AssertCursorIDs(cursor.get(), {4, 8}); |
||||
} |
||||
|
||||
// 2 < priority < 4 AND progress > 10.0, index progress
|
||||
{ |
||||
std::unique_ptr<JSONDocument> query(Parse( |
||||
"[{'$filter': {'priority': {'$lt': 4, '$gt': 2}, 'progress': {'$gt': " |
||||
"10.0}, '$index': 'progress'}}]")); |
||||
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query)); |
||||
AssertCursorIDs(cursor.get(), {4, 8}); |
||||
} |
||||
|
||||
// job_name == 'white' AND priority >= 2, index job_name
|
||||
{ |
||||
std::unique_ptr<JSONDocument> query(Parse( |
||||
"[{'$filter': {'job_name': 'white', 'priority': {'$gte': " |
||||
"2}, '$index': 'job_name'}}]")); |
||||
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query)); |
||||
AssertCursorIDs(cursor.get(), {2, 5}); |
||||
} |
||||
|
||||
// 35.0 <= progress < 65.5, index progress
|
||||
{ |
||||
std::unique_ptr<JSONDocument> query(Parse( |
||||
"[{'$filter': {'progress': {'$gt': 5.0, '$gte': 35.0, '$lt': 65.5}, " |
||||
"'$index': 'progress'}}]")); |
||||
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query)); |
||||
AssertCursorIDs(cursor.get(), {2, 5, 10, 11}); |
||||
} |
||||
|
||||
// 2 < priority <= 4, index priority
|
||||
{ |
||||
std::unique_ptr<JSONDocument> query(Parse( |
||||
"[{'$filter': {'priority': {'$gt': 2, '$lt': 8, '$lte': 4}, " |
||||
"'$index': 'priority'}}]")); |
||||
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query)); |
||||
AssertCursorIDs(cursor.get(), {4, 5, 8, 9, 11}); |
||||
} |
||||
|
||||
// Delete all whose progress is bigger than 50%
|
||||
{ |
||||
std::unique_ptr<JSONDocument> query( |
||||
Parse("{'progress': {'$gt': 50.0}, '$index': 'progress'}")); |
||||
ASSERT_OK(db_->Remove(ReadOptions(), WriteOptions(), *query)); |
||||
} |
||||
|
||||
// 2 < priority < 6, index priority
|
||||
{ |
||||
std::unique_ptr<JSONDocument> query(Parse( |
||||
"[{'$filter': {'priority': {'$gt': 2, '$lt': 6}, " |
||||
"'$index': 'priority'}}]")); |
||||
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query)); |
||||
AssertCursorIDs(cursor.get(), {4, 5, 9, 11}); |
||||
} |
||||
|
||||
// update set priority to 10 where job_name is 'white'
|
||||
{ |
||||
std::unique_ptr<JSONDocument> query(Parse("{'job_name': 'white'}")); |
||||
std::unique_ptr<JSONDocument> update(Parse("{'$set': {'priority': 10}}")); |
||||
ASSERT_OK(db_->Update(ReadOptions(), WriteOptions(), *query, *update)); |
||||
} |
||||
|
||||
// 4 < priority
|
||||
{ |
||||
std::unique_ptr<JSONDocument> query( |
||||
Parse("[{'$filter': {'priority': {'$gt': 4}, '$index': 'priority'}}]")); |
||||
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query)); |
||||
ASSERT_OK(cursor->status()); |
||||
AssertCursorIDs(cursor.get(), {1, 2, 5}); |
||||
} |
||||
|
||||
Status s = db_->DropIndex("doesnt-exist"); |
||||
ASSERT_TRUE(!s.ok()); |
||||
ASSERT_OK(db_->DropIndex("priority")); |
||||
} |
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
int main(int argc, char** argv) { return rocksdb::test::RunAllTests(); } |
Loading…
Reference in new issue