Move RADOS support to separate repo (#9206)
	
		
	
				
					
				
			Summary: This PR moves RADOS support from RocksDB repo to a separate repo. The new (temporary?) repo in this PR serves as an example before we finalize the decision on where and who to host RADOS support. At this point, people can start from the example repo and fork. The goal is to include this commit in RocksDB 7.0 release. Reference: https://github.com/ajkr/dedupfs by ajkr Pull Request resolved: https://github.com/facebook/rocksdb/pull/9206 Test Plan: Follow instructions in https://github.com/riversand963/rocksdb-rados-env/blob/main/README.md and build test binary `env_librados_test` and run it. Also, make check Reviewed By: ajkr Differential Revision: D33751690 Pulled By: riversand963 fbshipit-source-id: 30466c62afa9e4619847a48567ed158e62835e35main
							parent
							
								
									5d30668cab
								
							
						
					
					
						commit
						fa52376117
					
				| @ -1,174 +0,0 @@ | |||||||
| // Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
 |  | ||||||
| //  This source code is licensed under both the GPLv2 (found in the
 |  | ||||||
| //  COPYING file in the root directory) and Apache 2.0 License
 |  | ||||||
| //  (found in the LICENSE.Apache file in the root directory).
 |  | ||||||
| 
 |  | ||||||
| #pragma once |  | ||||||
| 
 |  | ||||||
| #include <memory> |  | ||||||
| #include <string> |  | ||||||
| 
 |  | ||||||
| #include "rocksdb/status.h" |  | ||||||
| #include "rocksdb/utilities/env_mirror.h" |  | ||||||
| 
 |  | ||||||
| #include <rados/librados.hpp> |  | ||||||
| 
 |  | ||||||
| namespace ROCKSDB_NAMESPACE { |  | ||||||
| class LibradosWritableFile; |  | ||||||
| 
 |  | ||||||
| class EnvLibrados : public EnvWrapper { |  | ||||||
|  public: |  | ||||||
|   // Create a brand new sequentially-readable file with the specified name.
 |  | ||||||
|   // On success, stores a pointer to the new file in *result and returns OK.
 |  | ||||||
|   // On failure stores nullptr in *result and returns non-OK.  If the file does
 |  | ||||||
|   // not exist, returns a non-OK status.
 |  | ||||||
|   //
 |  | ||||||
|   // The returned file will only be accessed by one thread at a time.
 |  | ||||||
|   Status NewSequentialFile(const std::string& fname, |  | ||||||
|                            std::unique_ptr<SequentialFile>* result, |  | ||||||
|                            const EnvOptions& options) override; |  | ||||||
| 
 |  | ||||||
|   // Create a brand new random access read-only file with the
 |  | ||||||
|   // specified name.  On success, stores a pointer to the new file in
 |  | ||||||
|   // *result and returns OK.  On failure stores nullptr in *result and
 |  | ||||||
|   // returns non-OK.  If the file does not exist, returns a non-OK
 |  | ||||||
|   // status.
 |  | ||||||
|   //
 |  | ||||||
|   // The returned file may be concurrently accessed by multiple threads.
 |  | ||||||
|   Status NewRandomAccessFile(const std::string& fname, |  | ||||||
|                              std::unique_ptr<RandomAccessFile>* result, |  | ||||||
|                              const EnvOptions& options) override; |  | ||||||
| 
 |  | ||||||
|   // Create an object that writes to a new file with the specified
 |  | ||||||
|   // name.  Deletes any existing file with the same name and creates a
 |  | ||||||
|   // new file.  On success, stores a pointer to the new file in
 |  | ||||||
|   // *result and returns OK.  On failure stores nullptr in *result and
 |  | ||||||
|   // returns non-OK.
 |  | ||||||
|   //
 |  | ||||||
|   // The returned file will only be accessed by one thread at a time.
 |  | ||||||
|   Status NewWritableFile(const std::string& fname, |  | ||||||
|                          std::unique_ptr<WritableFile>* result, |  | ||||||
|                          const EnvOptions& options) override; |  | ||||||
| 
 |  | ||||||
|   // Reuse an existing file by renaming it and opening it as writable.
 |  | ||||||
|   Status ReuseWritableFile(const std::string& fname, |  | ||||||
|                            const std::string& old_fname, |  | ||||||
|                            std::unique_ptr<WritableFile>* result, |  | ||||||
|                            const EnvOptions& options) override; |  | ||||||
| 
 |  | ||||||
|   // Create an object that represents a directory. Will fail if directory
 |  | ||||||
|   // doesn't exist. If the directory exists, it will open the directory
 |  | ||||||
|   // and create a new Directory object.
 |  | ||||||
|   //
 |  | ||||||
|   // On success, stores a pointer to the new Directory in
 |  | ||||||
|   // *result and returns OK. On failure stores nullptr in *result and
 |  | ||||||
|   // returns non-OK.
 |  | ||||||
|   Status NewDirectory(const std::string& name, |  | ||||||
|                       std::unique_ptr<Directory>* result) override; |  | ||||||
| 
 |  | ||||||
|   // Returns OK if the named file exists.
 |  | ||||||
|   //         NotFound if the named file does not exist,
 |  | ||||||
|   //                  the calling process does not have permission to determine
 |  | ||||||
|   //                  whether this file exists, or if the path is invalid.
 |  | ||||||
|   //         IOError if an IO Error was encountered
 |  | ||||||
|   Status FileExists(const std::string& fname) override; |  | ||||||
| 
 |  | ||||||
|   // Store in *result the names of the children of the specified directory.
 |  | ||||||
|   // The names are relative to "dir".
 |  | ||||||
|   // Original contents of *results are dropped.
 |  | ||||||
|   Status GetChildren(const std::string& dir, |  | ||||||
|                      std::vector<std::string>* result) override; |  | ||||||
| 
 |  | ||||||
|   // Delete the named file.
 |  | ||||||
|   Status DeleteFile(const std::string& fname) override; |  | ||||||
| 
 |  | ||||||
|   // Create the specified directory. Returns error if directory exists.
 |  | ||||||
|   Status CreateDir(const std::string& dirname) override; |  | ||||||
| 
 |  | ||||||
|   // Creates directory if missing. Return Ok if it exists, or successful in
 |  | ||||||
|   // Creating.
 |  | ||||||
|   Status CreateDirIfMissing(const std::string& dirname) override; |  | ||||||
| 
 |  | ||||||
|   // Delete the specified directory.
 |  | ||||||
|   Status DeleteDir(const std::string& dirname) override; |  | ||||||
| 
 |  | ||||||
|   // Store the size of fname in *file_size.
 |  | ||||||
|   Status GetFileSize(const std::string& fname, uint64_t* file_size) override; |  | ||||||
| 
 |  | ||||||
|   // Store the last modification time of fname in *file_mtime.
 |  | ||||||
|   Status GetFileModificationTime(const std::string& fname, |  | ||||||
|                                  uint64_t* file_mtime) override; |  | ||||||
|   // Rename file src to target.
 |  | ||||||
|   Status RenameFile(const std::string& src, const std::string& target) override; |  | ||||||
|   // Hard Link file src to target.
 |  | ||||||
|   Status LinkFile(const std::string& src, const std::string& target) override; |  | ||||||
| 
 |  | ||||||
|   // Lock the specified file.  Used to prevent concurrent access to
 |  | ||||||
|   // the same db by multiple processes.  On failure, stores nullptr in
 |  | ||||||
|   // *lock and returns non-OK.
 |  | ||||||
|   //
 |  | ||||||
|   // On success, stores a pointer to the object that represents the
 |  | ||||||
|   // acquired lock in *lock and returns OK.  The caller should call
 |  | ||||||
|   // UnlockFile(*lock) to release the lock.  If the process exits,
 |  | ||||||
|   // the lock will be automatically released.
 |  | ||||||
|   //
 |  | ||||||
|   // If somebody else already holds the lock, finishes immediately
 |  | ||||||
|   // with a failure.  I.e., this call does not wait for existing locks
 |  | ||||||
|   // to go away.
 |  | ||||||
|   //
 |  | ||||||
|   // May create the named file if it does not already exist.
 |  | ||||||
|   Status LockFile(const std::string& fname, FileLock** lock) override; |  | ||||||
| 
 |  | ||||||
|   // Release the lock acquired by a previous successful call to LockFile.
 |  | ||||||
|   // REQUIRES: lock was returned by a successful LockFile() call
 |  | ||||||
|   // REQUIRES: lock has not already been unlocked.
 |  | ||||||
|   Status UnlockFile(FileLock* lock) override; |  | ||||||
| 
 |  | ||||||
|   // Get full directory name for this db.
 |  | ||||||
|   Status GetAbsolutePath(const std::string& db_path, |  | ||||||
|                          std::string* output_path) override; |  | ||||||
| 
 |  | ||||||
|   // Get default EnvLibrados
 |  | ||||||
|   static EnvLibrados* Default(); |  | ||||||
| 
 |  | ||||||
|   explicit EnvLibrados(const std::string& db_name, |  | ||||||
|                        const std::string& config_path, |  | ||||||
|                        const std::string& db_pool); |  | ||||||
| 
 |  | ||||||
|   explicit EnvLibrados( |  | ||||||
|       const std::string& client_name,  // first 3 parameters are
 |  | ||||||
|                                        // for RADOS client init
 |  | ||||||
|       const std::string& cluster_name, const uint64_t flags, |  | ||||||
|       const std::string& db_name, const std::string& config_path, |  | ||||||
|       const std::string& db_pool, const std::string& wal_dir, |  | ||||||
|       const std::string& wal_pool, const uint64_t write_buffer_size); |  | ||||||
|   ~EnvLibrados() { _rados.shutdown(); } |  | ||||||
| 
 |  | ||||||
|  private: |  | ||||||
|   std::string _client_name; |  | ||||||
|   std::string _cluster_name; |  | ||||||
|   uint64_t _flags; |  | ||||||
|   std::string _db_name;  // get from user, readable string; Also used as db_id
 |  | ||||||
|                          // for db metadata
 |  | ||||||
|   std::string _config_path; |  | ||||||
|   librados::Rados _rados;  // RADOS client
 |  | ||||||
|   std::string _db_pool_name; |  | ||||||
|   librados::IoCtx _db_pool_ioctx;  // IoCtx for connecting db_pool
 |  | ||||||
|   std::string _wal_dir;            // WAL dir path
 |  | ||||||
|   std::string _wal_pool_name; |  | ||||||
|   librados::IoCtx _wal_pool_ioctx;  // IoCtx for connecting wal_pool
 |  | ||||||
|   uint64_t _write_buffer_size;      // WritableFile buffer max size
 |  | ||||||
| 
 |  | ||||||
|   /* private function to communicate with rados */ |  | ||||||
|   std::string _CreateFid(); |  | ||||||
|   Status _GetFid(const std::string& fname, std::string& fid); |  | ||||||
|   Status _GetFid(const std::string& fname, std::string& fid, int fid_len); |  | ||||||
|   Status _RenameFid(const std::string& old_fname, const std::string& new_fname); |  | ||||||
|   Status _AddFid(const std::string& fname, const std::string& fid); |  | ||||||
|   Status _DelFid(const std::string& fname); |  | ||||||
|   Status _GetSubFnames(const std::string& dirname, |  | ||||||
|                        std::vector<std::string>* result); |  | ||||||
|   librados::IoCtx* _GetIoctx(const std::string& prefix); |  | ||||||
|   friend class LibradosWritableFile; |  | ||||||
| }; |  | ||||||
| }  // namespace ROCKSDB_NAMESPACE
 |  | ||||||
									
										
											File diff suppressed because it is too large
											Load Diff
										
									
								
							
						| @ -1,122 +0,0 @@ | |||||||
| # Introduce to EnvLibrados |  | ||||||
| EnvLibrados is a customized RocksDB Env to use RADOS as the backend file system of RocksDB. It overrides all file system related API of default Env. The easiest way to use it is just like following: |  | ||||||
| ```c++ |  | ||||||
| std::string db_name = "test_db"; |  | ||||||
| std::string config_path = "path/to/ceph/config"; |  | ||||||
| DB* db; |  | ||||||
| Options options; |  | ||||||
| options.env = EnvLibrados(db_name, config_path); |  | ||||||
| Status s = DB::Open(options, kDBPath, &db); |  | ||||||
| ... |  | ||||||
| ``` |  | ||||||
| Then EnvLibrados will forward all file read/write operation to the RADOS cluster assigned by config_path. Default pool is db_name+"_pool". |  | ||||||
| 
 |  | ||||||
| # Options for EnvLibrados |  | ||||||
| There are some options that users could set for EnvLibrados. |  | ||||||
| - write_buffer_size. This variable is the max buffer size for WritableFile. After reaching the buffer_max_size, EnvLibrados will sync buffer content to RADOS, then clear buffer. |  | ||||||
| - db_pool. Rather than using default pool, users could set their own db pool name |  | ||||||
| - wal_dir. The dir for WAL files. Because RocksDB only has 2-level structure (dir_name/file_name), the format of wal_dir is "/dir_name"(CAN'T be "/dir1/dir2"). Default wal_dir is "/wal". |  | ||||||
| - wal_pool. Corresponding pool name for WAL files. Default value is db_name+"_wal_pool" |  | ||||||
| 
 |  | ||||||
| The example of setting options looks like following: |  | ||||||
| ```c++ |  | ||||||
| db_name = "test_db"; |  | ||||||
| db_pool = db_name+"_pool"; |  | ||||||
| wal_dir = "/wal"; |  | ||||||
| wal_pool = db_name+"_wal_pool"; |  | ||||||
| write_buffer_size = 1 << 20; |  | ||||||
| env_ = new EnvLibrados(db_name, config, db_pool, wal_dir, wal_pool, write_buffer_size); |  | ||||||
| 
 |  | ||||||
| DB* db; |  | ||||||
| Options options; |  | ||||||
| options.env = env_; |  | ||||||
| // The last level dir name should match the dir name in prefix_pool_map |  | ||||||
| options.wal_dir = "/tmp/wal";                     |  | ||||||
| 
 |  | ||||||
| // open DB |  | ||||||
| Status s = DB::Open(options, kDBPath, &db); |  | ||||||
| ... |  | ||||||
| ``` |  | ||||||
| 
 |  | ||||||
| # Performance Test |  | ||||||
| ## Compile |  | ||||||
| Check this [link](https://github.com/facebook/rocksdb/blob/main/INSTALL.md) to install the dependencies of RocksDB. Then you can compile it by running `$ make env_librados_test ROCKSDB_USE_LIBRADOS=1` under `rocksdb\`. The configure file used by env_librados_test is `../ceph/src/ceph.conf`. For Ubuntu 14.04, just run following commands: |  | ||||||
| ```bash |  | ||||||
| $ sudo apt-get install libgflags-dev |  | ||||||
| $ sudo apt-get install libsnappy-dev |  | ||||||
| $ sudo apt-get install zlib1g-dev |  | ||||||
| $ sudo apt-get install libbz2-dev |  | ||||||
| $ make env_librados_test ROCKSDB_USE_LIBRADOS=1 |  | ||||||
| ``` |  | ||||||
| 
 |  | ||||||
| ## Test Result |  | ||||||
| My test environment is Ubuntu 14.04 in VirtualBox with 8 cores and 8G RAM. Following is the test result. |  | ||||||
| 
 |  | ||||||
| 1. Write (1<<20) keys in random order. The time of writing under default env is around 10s while the time of writing under EnvLibrados is varying from 10s to 30s. |  | ||||||
| 
 |  | ||||||
| 2. Write (1<<20) keys in sequential order. The time of writing under default env drops to arround 1s. But the time of writing under EnvLibrados is not changed.  |  | ||||||
| 
 |  | ||||||
| 3. Read (1<<16) keys from (1<<20) keys in random order. The time of reading under both Envs are roughly the same, around 1.8s. |  | ||||||
| 
 |  | ||||||
| # MyRocks Test |  | ||||||
| ## Compile Ceph |  | ||||||
| See [link](http://docs.ceph.com/docs/master/install/build-ceph/) |  | ||||||
| 
 |  | ||||||
| ## Start RADOS |  | ||||||
| 
 |  | ||||||
| ```bash |  | ||||||
| cd ceph-path/src |  | ||||||
| ( ( ./stop.sh; rm -rf dev/*; CEPH_NUM_OSD=3 ./vstart.sh --short --localhost -n |  | ||||||
| -x -d ; ) ) 2>&1 |  | ||||||
| ``` |  | ||||||
| 
 |  | ||||||
| ## Compile MySQL |  | ||||||
| 
 |  | ||||||
| ```bash |  | ||||||
| sudo apt-get update |  | ||||||
| sudo apt-get install g++ cmake libbz2-dev libaio-dev bison \ |  | ||||||
| zlib1g-dev libsnappy-dev  |  | ||||||
| sudo apt-get install libgflags-dev libreadline6-dev libncurses5-dev \ |  | ||||||
| libssl-dev liblz4-dev gdb git |  | ||||||
| 
 |  | ||||||
| git clone https://github.com/facebook/mysql-5.6.git |  | ||||||
| cd mysql-5.6 |  | ||||||
| git submodule init |  | ||||||
| git submodule update |  | ||||||
| cmake . -DCMAKE_BUILD_TYPE=RelWithDebInfo -DWITH_SSL=system \ |  | ||||||
| -DWITH_ZLIB=bundled -DMYSQL_MAINTAINER_MODE=0 -DENABLED_LOCAL_INFILE=1 -DROCKSDB_USE_LIBRADOS=1 |  | ||||||
| make install -j8 |  | ||||||
| ``` |  | ||||||
| 
 |  | ||||||
| Check this [link](https://github.com/facebook/mysql-5.6/wiki/Build-Steps) for latest compile steps. |  | ||||||
| 
 |  | ||||||
| ## Configure MySQL |  | ||||||
| Following is the steps of configuration of MySQL. |  | ||||||
| 
 |  | ||||||
| ```bash |  | ||||||
| mkdir -p /etc/mysql |  | ||||||
| mkdir -p /var/lib/mysql |  | ||||||
| mkdir -p /etc/mysql/conf.d |  | ||||||
| echo -e '[mysqld_safe]\nsyslog' > /etc/mysql/conf.d/mysqld_safe_syslog.cnf |  | ||||||
| cp /usr/share/mysql/my-medium.cnf /etc/mysql/my.cnf |  | ||||||
| sed -i 's#.*datadir.*#datadir = /var/lib/mysql#g' /etc/mysql/my.cnf |  | ||||||
| chown mysql:mysql -R /var/lib/mysql |  | ||||||
| 
 |  | ||||||
| mysql_install_db --user=mysql --ldata=/var/lib/mysql/ |  | ||||||
| export CEPH_CONFIG_PATH="path/of/ceph/config/file" |  | ||||||
| mysqld_safe -user=mysql --skip-innodb --rocksdb --default-storage-engine=rocksdb --default-tmp-storage-engine=MyISAM & |  | ||||||
| mysqladmin -u root password |  | ||||||
| mysql -u root -p |  | ||||||
| ``` |  | ||||||
| 
 |  | ||||||
| Check this [link](https://gist.github.com/shichao-an/f5639ecd551496ac2d70) for detail information. |  | ||||||
| 
 |  | ||||||
| ```sql |  | ||||||
| show databases; |  | ||||||
| create database testdb; |  | ||||||
| use testdb; |  | ||||||
| show tables; |  | ||||||
| CREATE TABLE tbl (id INT AUTO_INCREMENT primary key, str VARCHAR(32)); |  | ||||||
| insert into tbl values (1, "val2"); |  | ||||||
| select * from tbl; |  | ||||||
| ``` |  | ||||||
									
										
											File diff suppressed because it is too large
											Load Diff
										
									
								
							
						
					Loading…
					
					
				
		Reference in new issue
	
	 Yanqin Jin
						Yanqin Jin