|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
//
|
|
|
|
// An Env is an interface used by the rocksdb implementation to access
|
|
|
|
// operating system functionality like the filesystem etc. Callers
|
|
|
|
// may wish to provide a custom Env object when opening a database to
|
|
|
|
// get fine gain control; e.g., to rate limit file system operations.
|
|
|
|
//
|
|
|
|
// All Env implementations are safe for concurrent access from
|
|
|
|
// multiple threads without any external synchronization.
|
|
|
|
|
|
|
|
#ifndef STORAGE_ROCKSDB_INCLUDE_ENV_H_
|
|
|
|
#define STORAGE_ROCKSDB_INCLUDE_ENV_H_
|
|
|
|
|
|
|
|
#include <stdint.h>
|
|
|
|
#include <cstdarg>
|
|
|
|
#include <limits>
|
|
|
|
#include <memory>
|
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
|
|
|
#include "rocksdb/status.h"
|
|
|
|
#include "rocksdb/thread_status.h"
|
|
|
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
// Windows API macro interference
|
|
|
|
#undef DeleteFile
|
|
|
|
#undef GetCurrentTime
|
|
|
|
#endif
|
|
|
|
|
|
|
|
namespace rocksdb {
|
|
|
|
|
|
|
|
class FileLock;
|
|
|
|
class Logger;
|
|
|
|
class RandomAccessFile;
|
|
|
|
class SequentialFile;
|
|
|
|
class Slice;
|
|
|
|
class WritableFile;
|
|
|
|
class Directory;
|
|
|
|
struct DBOptions;
|
|
|
|
class RateLimiter;
|
|
|
|
class ThreadStatusUpdater;
|
|
|
|
struct ThreadStatus;
|
|
|
|
|
|
|
|
using std::unique_ptr;
|
|
|
|
using std::shared_ptr;
|
|
|
|
|
|
|
|
|
|
|
|
// Options while opening a file to read/write
|
|
|
|
struct EnvOptions {
|
|
|
|
|
|
|
|
// construct with default Options
|
|
|
|
EnvOptions();
|
|
|
|
|
|
|
|
// construct from Options
|
|
|
|
explicit EnvOptions(const DBOptions& options);
|
|
|
|
|
|
|
|
// If true, then allow caching of data in environment buffers
|
|
|
|
bool use_os_buffer = true;
|
|
|
|
|
|
|
|
// If true, then use mmap to read data
|
|
|
|
bool use_mmap_reads = false;
|
|
|
|
|
|
|
|
// If true, then use mmap to write data
|
|
|
|
bool use_mmap_writes = true;
|
|
|
|
|
|
|
|
// If false, fallocate() calls are bypassed
|
|
|
|
bool allow_fallocate = true;
|
|
|
|
|
|
|
|
// If true, set the FD_CLOEXEC on open fd.
|
|
|
|
bool set_fd_cloexec = true;
|
|
|
|
|
|
|
|
// Allows OS to incrementally sync files to disk while they are being
|
|
|
|
// written, in the background. Issue one request for every bytes_per_sync
|
|
|
|
// written. 0 turns it off.
|
|
|
|
// Default: 0
|
|
|
|
uint64_t bytes_per_sync = 0;
|
|
|
|
|
|
|
|
// If true, we will preallocate the file with FALLOC_FL_KEEP_SIZE flag, which
|
|
|
|
// means that file size won't change as part of preallocation.
|
|
|
|
// If false, preallocation will also change the file size. This option will
|
|
|
|
// improve the performance in workloads where you sync the data on every
|
|
|
|
// write. By default, we set it to true for MANIFEST writes and false for
|
|
|
|
// WAL writes
|
|
|
|
bool fallocate_with_keep_size = true;
|
|
|
|
|
|
|
|
// See DBOPtions doc
|
|
|
|
size_t compaction_readahead_size;
|
|
|
|
|
|
|
|
// See DBOPtions doc
|
|
|
|
size_t random_access_max_buffer_size;
|
|
|
|
|
|
|
|
// See DBOptions doc
|
|
|
|
size_t writable_file_max_buffer_size = 1024 * 1024;
|
|
|
|
|
|
|
|
// If not nullptr, write rate limiting is enabled for flush and compaction
|
|
|
|
RateLimiter* rate_limiter = nullptr;
|
|
|
|
};
|
|
|
|
|
|
|
|
class Env {
|
|
|
|
public:
|
|
|
|
struct FileAttributes {
|
|
|
|
// File name
|
|
|
|
std::string name;
|
|
|
|
|
|
|
|
// Size of file in bytes
|
|
|
|
uint64_t size_bytes;
|
|
|
|
};
|
|
|
|
|
|
|
|
Env() : thread_status_updater_(nullptr) {}
|
|
|
|
|
|
|
|
virtual ~Env();
|
|
|
|
|
|
|
|
// Return a default environment suitable for the current operating
|
|
|
|
// system. Sophisticated users may wish to provide their own Env
|
|
|
|
// implementation instead of relying on this default environment.
|
|
|
|
//
|
|
|
|
// The result of Default() belongs to rocksdb and must never be deleted.
|
|
|
|
static Env* Default();
|
|
|
|
|
|
|
|
// Create a brand new sequentially-readable file with the specified name.
|
|
|
|
// On success, stores a pointer to the new file in *result and returns OK.
|
|
|
|
// On failure stores nullptr in *result and returns non-OK. If the file does
|
|
|
|
// not exist, returns a non-OK status.
|
|
|
|
//
|
|
|
|
// The returned file will only be accessed by one thread at a time.
|
|
|
|
virtual Status NewSequentialFile(const std::string& fname,
|
|
|
|
unique_ptr<SequentialFile>* result,
|
|
|
|
const EnvOptions& options)
|
|
|
|
= 0;
|
|
|
|
|
|
|
|
// Create a brand new random access read-only file with the
|
|
|
|
// specified name. On success, stores a pointer to the new file in
|
|
|
|
// *result and returns OK. On failure stores nullptr in *result and
|
|
|
|
// returns non-OK. If the file does not exist, returns a non-OK
|
|
|
|
// status.
|
|
|
|
//
|
|
|
|
// The returned file may be concurrently accessed by multiple threads.
|
|
|
|
virtual Status NewRandomAccessFile(const std::string& fname,
|
|
|
|
unique_ptr<RandomAccessFile>* result,
|
|
|
|
const EnvOptions& options)
|
|
|
|
= 0;
|
|
|
|
|
|
|
|
// Create an object that writes to a new file with the specified
|
|
|
|
// name. Deletes any existing file with the same name and creates a
|
|
|
|
// new file. On success, stores a pointer to the new file in
|
|
|
|
// *result and returns OK. On failure stores nullptr in *result and
|
|
|
|
// returns non-OK.
|
|
|
|
//
|
|
|
|
// The returned file will only be accessed by one thread at a time.
|
|
|
|
virtual Status NewWritableFile(const std::string& fname,
|
|
|
|
unique_ptr<WritableFile>* result,
|
|
|
|
const EnvOptions& options) = 0;
|
|
|
|
|
|
|
|
// Reuse an existing file by renaming it and opening it as writable.
|
|
|
|
virtual Status ReuseWritableFile(const std::string& fname,
|
|
|
|
const std::string& old_fname,
|
|
|
|
unique_ptr<WritableFile>* result,
|
|
|
|
const EnvOptions& options);
|
|
|
|
|
|
|
|
// Create an object that represents a directory. Will fail if directory
|
|
|
|
// doesn't exist. If the directory exists, it will open the directory
|
|
|
|
// and create a new Directory object.
|
|
|
|
//
|
|
|
|
// On success, stores a pointer to the new Directory in
|
|
|
|
// *result and returns OK. On failure stores nullptr in *result and
|
|
|
|
// returns non-OK.
|
|
|
|
virtual Status NewDirectory(const std::string& name,
|
|
|
|
unique_ptr<Directory>* result) = 0;
|
|
|
|
|
|
|
|
// Returns OK if the named file exists.
|
|
|
|
// NotFound if the named file does not exist,
|
|
|
|
// the calling process does not have permission to determine
|
|
|
|
// whether this file exists, or if the path is invalid.
|
|
|
|
// IOError if an IO Error was encountered
|
|
|
|
virtual Status FileExists(const std::string& fname) = 0;
|
|
|
|
|
|
|
|
// Store in *result the names of the children of the specified directory.
|
|
|
|
// The names are relative to "dir".
|
|
|
|
// Original contents of *results are dropped.
|
|
|
|
virtual Status GetChildren(const std::string& dir,
|
|
|
|
std::vector<std::string>* result) = 0;
|
|
|
|
|
|
|
|
// Store in *result the attributes of the children of the specified directory.
|
|
|
|
// In case the implementation lists the directory prior to iterating the files
|
|
|
|
// and files are concurrently deleted, the deleted files will be omitted from
|
|
|
|
// result.
|
|
|
|
// The name attributes are relative to "dir".
|
|
|
|
// Original contents of *results are dropped.
|
|
|
|
virtual Status GetChildrenFileAttributes(const std::string& dir,
|
|
|
|
std::vector<FileAttributes>* result);
|
|
|
|
|
|
|
|
// Delete the named file.
|
|
|
|
virtual Status DeleteFile(const std::string& fname) = 0;
|
|
|
|
|
|
|
|
// Create the specified directory. Returns error if directory exists.
|
|
|
|
virtual Status CreateDir(const std::string& dirname) = 0;
|
|
|
|
|
|
|
|
// Creates directory if missing. Return Ok if it exists, or successful in
|
|
|
|
// Creating.
|
|
|
|
virtual Status CreateDirIfMissing(const std::string& dirname) = 0;
|
|
|
|
|
|
|
|
// Delete the specified directory.
|
|
|
|
virtual Status DeleteDir(const std::string& dirname) = 0;
|
|
|
|
|
|
|
|
// Store the size of fname in *file_size.
|
|
|
|
virtual Status GetFileSize(const std::string& fname, uint64_t* file_size) = 0;
|
|
|
|
|
|
|
|
// Store the last modification time of fname in *file_mtime.
|
|
|
|
virtual Status GetFileModificationTime(const std::string& fname,
|
|
|
|
uint64_t* file_mtime) = 0;
|
|
|
|
// Rename file src to target.
|
|
|
|
virtual Status RenameFile(const std::string& src,
|
|
|
|
const std::string& target) = 0;
|
|
|
|
|
|
|
|
// Hard Link file src to target.
|
|
|
|
virtual Status LinkFile(const std::string& src, const std::string& target) {
|
|
|
|
return Status::NotSupported("LinkFile is not supported for this Env");
|
|
|
|
}
|
|
|
|
|
|
|
|
// Lock the specified file. Used to prevent concurrent access to
|
|
|
|
// the same db by multiple processes. On failure, stores nullptr in
|
|
|
|
// *lock and returns non-OK.
|
|
|
|
//
|
|
|
|
// On success, stores a pointer to the object that represents the
|
|
|
|
// acquired lock in *lock and returns OK. The caller should call
|
|
|
|
// UnlockFile(*lock) to release the lock. If the process exits,
|
|
|
|
// the lock will be automatically released.
|
|
|
|
//
|
|
|
|
// If somebody else already holds the lock, finishes immediately
|
|
|
|
// with a failure. I.e., this call does not wait for existing locks
|
|
|
|
// to go away.
|
|
|
|
//
|
|
|
|
// May create the named file if it does not already exist.
|
|
|
|
virtual Status LockFile(const std::string& fname, FileLock** lock) = 0;
|
|
|
|
|
|
|
|
// Release the lock acquired by a previous successful call to LockFile.
|
|
|
|
// REQUIRES: lock was returned by a successful LockFile() call
|
|
|
|
// REQUIRES: lock has not already been unlocked.
|
|
|
|
virtual Status UnlockFile(FileLock* lock) = 0;
|
|
|
|
|
generic rate limiter
Summary:
A generic rate limiter that can be shared by threads and rocksdb
instances. Will use this to smooth out write traffic generated by
compaction and flush. This will help us get better p99 behavior on flash
storage.
Test Plan:
unit test output
==== Test RateLimiterTest.Rate
request size [1 - 1023], limit 10 KB/sec, actual rate: 10.374969 KB/sec, elapsed 2002265
request size [1 - 2047], limit 20 KB/sec, actual rate: 20.771242 KB/sec, elapsed 2002139
request size [1 - 4095], limit 40 KB/sec, actual rate: 41.285299 KB/sec, elapsed 2202424
request size [1 - 8191], limit 80 KB/sec, actual rate: 81.371605 KB/sec, elapsed 2402558
request size [1 - 16383], limit 160 KB/sec, actual rate: 162.541268 KB/sec, elapsed 3303500
Reviewers: yhchiang, igor, sdong
Reviewed By: sdong
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D19359
11 years ago
|
|
|
// Priority for scheduling job in thread pool
|
|
|
|
enum Priority { LOW, HIGH, TOTAL };
|
|
|
|
|
|
|
|
// Priority for requesting bytes in rate limiter scheduler
|
generic rate limiter
Summary:
A generic rate limiter that can be shared by threads and rocksdb
instances. Will use this to smooth out write traffic generated by
compaction and flush. This will help us get better p99 behavior on flash
storage.
Test Plan:
unit test output
==== Test RateLimiterTest.Rate
request size [1 - 1023], limit 10 KB/sec, actual rate: 10.374969 KB/sec, elapsed 2002265
request size [1 - 2047], limit 20 KB/sec, actual rate: 20.771242 KB/sec, elapsed 2002139
request size [1 - 4095], limit 40 KB/sec, actual rate: 41.285299 KB/sec, elapsed 2202424
request size [1 - 8191], limit 80 KB/sec, actual rate: 81.371605 KB/sec, elapsed 2402558
request size [1 - 16383], limit 160 KB/sec, actual rate: 162.541268 KB/sec, elapsed 3303500
Reviewers: yhchiang, igor, sdong
Reviewed By: sdong
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D19359
11 years ago
|
|
|
enum IOPriority {
|
|
|
|
IO_LOW = 0,
|
|
|
|
IO_HIGH = 1,
|
|
|
|
IO_TOTAL = 2
|
|
|
|
};
|
|
|
|
|
|
|
|
// Arrange to run "(*function)(arg)" once in a background thread, in
|
|
|
|
// the thread pool specified by pri. By default, jobs go to the 'LOW'
|
|
|
|
// priority thread pool.
|
|
|
|
|
|
|
|
// "function" may run in an unspecified thread. Multiple functions
|
|
|
|
// added to the same Env may run concurrently in different threads.
|
|
|
|
// I.e., the caller may not assume that background work items are
|
|
|
|
// serialized.
|
|
|
|
// When the UnSchedule function is called, the unschedFunction
|
|
|
|
// registered at the time of Schedule is invoked with arg as a parameter.
|
|
|
|
virtual void Schedule(void (*function)(void* arg), void* arg,
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
9 years ago
|
|
|
Priority pri = LOW, void* tag = nullptr,
|
|
|
|
void (*unschedFunction)(void* arg) = 0) = 0;
|
|
|
|
|
|
|
|
// Arrange to remove jobs for given arg from the queue_ if they are not
|
|
|
|
// already scheduled. Caller is expected to have exclusive lock on arg.
|
|
|
|
virtual int UnSchedule(void* arg, Priority pri) { return 0; }
|
|
|
|
|
|
|
|
// Start a new thread, invoking "function(arg)" within the new thread.
|
|
|
|
// When "function(arg)" returns, the thread will be destroyed.
|
|
|
|
virtual void StartThread(void (*function)(void* arg), void* arg) = 0;
|
|
|
|
|
|
|
|
// Wait for all threads started by StartThread to terminate.
|
|
|
|
virtual void WaitForJoin() {}
|
|
|
|
|
|
|
|
// Get thread pool queue length for specific thrad pool.
|
|
|
|
virtual unsigned int GetThreadPoolQueueLen(Priority pri = LOW) const {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// *path is set to a temporary directory that can be used for testing. It may
|
|
|
|
// or many not have just been created. The directory may or may not differ
|
|
|
|
// between runs of the same process, but subsequent calls will return the
|
|
|
|
// same directory.
|
|
|
|
virtual Status GetTestDirectory(std::string* path) = 0;
|
|
|
|
|
|
|
|
// Create and return a log file for storing informational messages.
|
|
|
|
virtual Status NewLogger(const std::string& fname,
|
|
|
|
shared_ptr<Logger>* result) = 0;
|
|
|
|
|
|
|
|
// Returns the number of micro-seconds since some fixed point in time. Only
|
|
|
|
// useful for computing deltas of time.
|
|
|
|
// However, it is often used as system time such as in GenericRateLimiter
|
|
|
|
// and other places so a port needs to return system time in order to work.
|
|
|
|
virtual uint64_t NowMicros() = 0;
|
|
|
|
|
[RocksDB] Added nano second stopwatch and new perf counters to track block read cost
Summary: The pupose of this diff is to expose per user-call level precise timing of block read, so that we can answer questions like: a Get() costs me 100ms, is that somehow related to loading blocks from file system, or sth else? We will answer that with EXACTLY how many blocks have been read, how much time was spent on transfering the bytes from os, how much time was spent on checksum verification and how much time was spent on block decompression, just for that one Get. A nano second stopwatch was introduced to track time with higher precision. The cost/precision of the stopwatch is also measured in unit-test. On my dev box, retrieving one time instance costs about 30ns, on average. The deviation of timing results is good enough to track 100ns-1us level events. And the overhead could be safely ignored for 100us level events (10000 instances/s), for example, a viewstate thrift call.
Test Plan: perf_context_test, also testing with viewstate shadow traffic.
Reviewers: dhruba
Reviewed By: dhruba
CC: leveldb, xjin
Differential Revision: https://reviews.facebook.net/D12351
12 years ago
|
|
|
// Returns the number of nano-seconds since some fixed point in time. Only
|
|
|
|
// useful for computing deltas of time in one run.
|
|
|
|
// Default implementation simply relies on NowMicros
|
|
|
|
virtual uint64_t NowNanos() {
|
|
|
|
return NowMicros() * 1000;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Sleep/delay the thread for the perscribed number of micro-seconds.
|
|
|
|
virtual void SleepForMicroseconds(int micros) = 0;
|
|
|
|
|
|
|
|
// Get the current host name.
|
|
|
|
virtual Status GetHostName(char* name, uint64_t len) = 0;
|
|
|
|
|
|
|
|
// Get the number of seconds since the Epoch, 1970-01-01 00:00:00 (UTC).
|
|
|
|
virtual Status GetCurrentTime(int64_t* unix_time) = 0;
|
|
|
|
|
|
|
|
// Get full directory name for this db.
|
|
|
|
virtual Status GetAbsolutePath(const std::string& db_path,
|
|
|
|
std::string* output_path) = 0;
|
|
|
|
|
|
|
|
// The number of background worker threads of a specific thread pool
|
|
|
|
// for this environment. 'LOW' is the default pool.
|
|
|
|
// default number: 1
|
|
|
|
virtual void SetBackgroundThreads(int number, Priority pri = LOW) = 0;
|
|
|
|
|
|
|
|
// Enlarge number of background worker threads of a specific thread pool
|
|
|
|
// for this environment if it is smaller than specified. 'LOW' is the default
|
|
|
|
// pool.
|
|
|
|
virtual void IncBackgroundThreadsIfNeeded(int number, Priority pri) = 0;
|
|
|
|
|
|
|
|
// Lower IO priority for threads from the specified pool.
|
|
|
|
virtual void LowerThreadPoolIOPriority(Priority pool = LOW) {}
|
|
|
|
|
|
|
|
// Converts seconds-since-Jan-01-1970 to a printable string
|
|
|
|
virtual std::string TimeToString(uint64_t time) = 0;
|
|
|
|
|
|
|
|
// Generates a unique id that can be used to identify a db
|
|
|
|
virtual std::string GenerateUniqueId();
|
|
|
|
|
|
|
|
// OptimizeForLogWrite will create a new EnvOptions object that is a copy of
|
|
|
|
// the EnvOptions in the parameters, but is optimized for writing log files.
|
|
|
|
// Default implementation returns the copy of the same object.
|
|
|
|
virtual EnvOptions OptimizeForLogWrite(const EnvOptions& env_options,
|
|
|
|
const DBOptions& db_options) const;
|
|
|
|
// OptimizeForManifestWrite will create a new EnvOptions object that is a copy
|
|
|
|
// of the EnvOptions in the parameters, but is optimized for writing manifest
|
|
|
|
// files. Default implementation returns the copy of the same object.
|
|
|
|
virtual EnvOptions OptimizeForManifestWrite(const EnvOptions& env_options)
|
|
|
|
const;
|
|
|
|
|
|
|
|
// Returns the status of all threads that belong to the current Env.
|
|
|
|
virtual Status GetThreadList(std::vector<ThreadStatus>* thread_list) {
|
|
|
|
return Status::NotSupported("Not supported.");
|
|
|
|
}
|
|
|
|
|
|
|
|
// Returns the pointer to ThreadStatusUpdater. This function will be
|
|
|
|
// used in RocksDB internally to update thread status and supports
|
|
|
|
// GetThreadList().
|
|
|
|
virtual ThreadStatusUpdater* GetThreadStatusUpdater() const {
|
|
|
|
return thread_status_updater_;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Returns the ID of the current thread.
|
|
|
|
virtual uint64_t GetThreadID() const;
|
|
|
|
|
|
|
|
protected:
|
|
|
|
// The pointer to an internal structure that will update the
|
|
|
|
// status of each thread.
|
|
|
|
ThreadStatusUpdater* thread_status_updater_;
|
|
|
|
|
|
|
|
private:
|
|
|
|
// No copying allowed
|
|
|
|
Env(const Env&);
|
|
|
|
void operator=(const Env&);
|
|
|
|
};
|
|
|
|
|
|
|
|
// The factory function to construct a ThreadStatusUpdater. Any Env
|
|
|
|
// that supports GetThreadList() feature should call this function in its
|
|
|
|
// constructor to initialize thread_status_updater_.
|
|
|
|
ThreadStatusUpdater* CreateThreadStatusUpdater();
|
|
|
|
|
|
|
|
// A file abstraction for reading sequentially through a file
|
|
|
|
class SequentialFile {
|
|
|
|
public:
|
|
|
|
SequentialFile() { }
|
|
|
|
virtual ~SequentialFile();
|
|
|
|
|
|
|
|
// Read up to "n" bytes from the file. "scratch[0..n-1]" may be
|
|
|
|
// written by this routine. Sets "*result" to the data that was
|
|
|
|
// read (including if fewer than "n" bytes were successfully read).
|
|
|
|
// May set "*result" to point at data in "scratch[0..n-1]", so
|
|
|
|
// "scratch[0..n-1]" must be live when "*result" is used.
|
|
|
|
// If an error was encountered, returns a non-OK status.
|
|
|
|
//
|
|
|
|
// REQUIRES: External synchronization
|
|
|
|
virtual Status Read(size_t n, Slice* result, char* scratch) = 0;
|
|
|
|
|
|
|
|
// Skip "n" bytes from the file. This is guaranteed to be no
|
|
|
|
// slower that reading the same data, but may be faster.
|
|
|
|
//
|
|
|
|
// If end of file is reached, skipping will stop at the end of the
|
|
|
|
// file, and Skip will return OK.
|
|
|
|
//
|
|
|
|
// REQUIRES: External synchronization
|
|
|
|
virtual Status Skip(uint64_t n) = 0;
|
|
|
|
|
|
|
|
// Remove any kind of caching of data from the offset to offset+length
|
|
|
|
// of this file. If the length is 0, then it refers to the end of file.
|
|
|
|
// If the system is not caching the file contents, then this is a noop.
|
|
|
|
virtual Status InvalidateCache(size_t offset, size_t length) {
|
|
|
|
return Status::NotSupported("InvalidateCache not supported.");
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
// A file abstraction for randomly reading the contents of a file.
|
|
|
|
class RandomAccessFile {
|
|
|
|
public:
|
|
|
|
RandomAccessFile() { }
|
|
|
|
virtual ~RandomAccessFile();
|
|
|
|
|
|
|
|
// Read up to "n" bytes from the file starting at "offset".
|
|
|
|
// "scratch[0..n-1]" may be written by this routine. Sets "*result"
|
|
|
|
// to the data that was read (including if fewer than "n" bytes were
|
|
|
|
// successfully read). May set "*result" to point at data in
|
|
|
|
// "scratch[0..n-1]", so "scratch[0..n-1]" must be live when
|
|
|
|
// "*result" is used. If an error was encountered, returns a non-OK
|
|
|
|
// status.
|
|
|
|
//
|
|
|
|
// Safe for concurrent use by multiple threads.
|
|
|
|
virtual Status Read(uint64_t offset, size_t n, Slice* result,
|
|
|
|
char* scratch) const = 0;
|
|
|
|
|
|
|
|
// Used by the file_reader_writer to decide if the ReadAhead wrapper
|
|
|
|
// should simply forward the call and do not enact buffering or locking.
|
|
|
|
virtual bool ShouldForwardRawRequest() const {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// For cases when read-ahead is implemented in the platform dependent
|
|
|
|
// layer
|
|
|
|
virtual void EnableReadAhead() {}
|
|
|
|
|
|
|
|
// Tries to get an unique ID for this file that will be the same each time
|
|
|
|
// the file is opened (and will stay the same while the file is open).
|
|
|
|
// Furthermore, it tries to make this ID at most "max_size" bytes. If such an
|
|
|
|
// ID can be created this function returns the length of the ID and places it
|
|
|
|
// in "id"; otherwise, this function returns 0, in which case "id"
|
|
|
|
// may not have been modified.
|
|
|
|
//
|
|
|
|
// This function guarantees, for IDs from a given environment, two unique ids
|
|
|
|
// cannot be made equal to eachother by adding arbitrary bytes to one of
|
|
|
|
// them. That is, no unique ID is the prefix of another.
|
|
|
|
//
|
|
|
|
// This function guarantees that the returned ID will not be interpretable as
|
|
|
|
// a single varint.
|
|
|
|
//
|
|
|
|
// Note: these IDs are only valid for the duration of the process.
|
|
|
|
virtual size_t GetUniqueId(char* id, size_t max_size) const {
|
|
|
|
return 0; // Default implementation to prevent issues with backwards
|
|
|
|
// compatibility.
|
|
|
|
};
|
|
|
|
|
|
|
|
enum AccessPattern { NORMAL, RANDOM, SEQUENTIAL, WILLNEED, DONTNEED };
|
|
|
|
|
|
|
|
virtual void Hint(AccessPattern pattern) {}
|
|
|
|
|
|
|
|
// Remove any kind of caching of data from the offset to offset+length
|
|
|
|
// of this file. If the length is 0, then it refers to the end of file.
|
|
|
|
// If the system is not caching the file contents, then this is a noop.
|
|
|
|
virtual Status InvalidateCache(size_t offset, size_t length) {
|
|
|
|
return Status::NotSupported("InvalidateCache not supported.");
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
// A file abstraction for sequential writing. The implementation
|
|
|
|
// must provide buffering since callers may append small fragments
|
|
|
|
// at a time to the file.
|
|
|
|
class WritableFile {
|
|
|
|
public:
|
|
|
|
WritableFile()
|
|
|
|
: last_preallocated_block_(0),
|
|
|
|
preallocation_block_size_(0),
|
|
|
|
io_priority_(Env::IO_TOTAL) {
|
|
|
|
}
|
|
|
|
virtual ~WritableFile();
|
|
|
|
|
|
|
|
// Indicates if the class makes use of unbuffered I/O
|
|
|
|
virtual bool UseOSBuffer() const {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
const size_t c_DefaultPageSize = 4 * 1024;
|
|
|
|
|
|
|
|
// This is needed when you want to allocate
|
|
|
|
// AlignedBuffer for use with file I/O classes
|
|
|
|
// Used for unbuffered file I/O when UseOSBuffer() returns false
|
|
|
|
virtual size_t GetRequiredBufferAlignment() const {
|
|
|
|
return c_DefaultPageSize;
|
|
|
|
}
|
|
|
|
|
|
|
|
virtual Status Append(const Slice& data) = 0;
|
|
|
|
|
|
|
|
// Positioned write for unbuffered access default forward
|
|
|
|
// to simple append as most of the tests are buffered by default
|
|
|
|
virtual Status PositionedAppend(const Slice& /* data */, uint64_t /* offset */) {
|
|
|
|
return Status::NotSupported();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Truncate is necessary to trim the file to the correct size
|
|
|
|
// before closing. It is not always possible to keep track of the file
|
|
|
|
// size due to whole pages writes. The behavior is undefined if called
|
|
|
|
// with other writes to follow.
|
|
|
|
virtual Status Truncate(uint64_t size) {
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
virtual Status Close() = 0;
|
|
|
|
virtual Status Flush() = 0;
|
|
|
|
virtual Status Sync() = 0; // sync data
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Sync data and/or metadata as well.
|
|
|
|
* By default, sync only data.
|
|
|
|
* Override this method for environments where we need to sync
|
|
|
|
* metadata as well.
|
|
|
|
*/
|
|
|
|
virtual Status Fsync() {
|
|
|
|
return Sync();
|
|
|
|
}
|
|
|
|
|
[wal changes 3/3] method in DB to sync WAL without blocking writers
Summary:
Subj. We really need this feature.
Previous diff D40899 has most of the changes to make this possible, this diff just adds the method.
Test Plan: `make check`, the new test fails without this diff; ran with ASAN, TSAN and valgrind.
Reviewers: igor, rven, IslamAbdelRahman, anthony, kradhakrishnan, tnovak, yhchiang, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, maykov, hermanlee4, yoshinorim, tnovak, dhruba
Differential Revision: https://reviews.facebook.net/D40905
9 years ago
|
|
|
// true if Sync() and Fsync() are safe to call concurrently with Append()
|
|
|
|
// and Flush().
|
|
|
|
virtual bool IsSyncThreadSafe() const {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Indicates the upper layers if the current WritableFile implementation
|
|
|
|
// uses direct IO.
|
|
|
|
virtual bool UseDirectIO() const { return false; }
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Change the priority in rate limiter if rate limiting is enabled.
|
|
|
|
* If rate limiting is not enabled, this call has no effect.
|
|
|
|
*/
|
|
|
|
virtual void SetIOPriority(Env::IOPriority pri) {
|
|
|
|
io_priority_ = pri;
|
|
|
|
}
|
|
|
|
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
9 years ago
|
|
|
virtual Env::IOPriority GetIOPriority() { return io_priority_; }
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get the size of valid data in the file.
|
|
|
|
*/
|
|
|
|
virtual uint64_t GetFileSize() {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get and set the default pre-allocation block size for writes to
|
|
|
|
* this file. If non-zero, then Allocate will be used to extend the
|
|
|
|
* underlying storage of a file (generally via fallocate) if the Env
|
|
|
|
* instance supports it.
|
|
|
|
*/
|
|
|
|
void SetPreallocationBlockSize(size_t size) {
|
|
|
|
preallocation_block_size_ = size;
|
|
|
|
}
|
|
|
|
|
|
|
|
virtual void GetPreallocationStatus(size_t* block_size,
|
|
|
|
size_t* last_allocated_block) {
|
|
|
|
*last_allocated_block = last_preallocated_block_;
|
|
|
|
*block_size = preallocation_block_size_;
|
|
|
|
}
|
|
|
|
|
|
|
|
// For documentation, refer to RandomAccessFile::GetUniqueId()
|
|
|
|
virtual size_t GetUniqueId(char* id, size_t max_size) const {
|
|
|
|
return 0; // Default implementation to prevent issues with backwards
|
|
|
|
}
|
|
|
|
|
|
|
|
// Remove any kind of caching of data from the offset to offset+length
|
|
|
|
// of this file. If the length is 0, then it refers to the end of file.
|
|
|
|
// If the system is not caching the file contents, then this is a noop.
|
|
|
|
// This call has no effect on dirty pages in the cache.
|
|
|
|
virtual Status InvalidateCache(size_t offset, size_t length) {
|
|
|
|
return Status::NotSupported("InvalidateCache not supported.");
|
|
|
|
}
|
|
|
|
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
9 years ago
|
|
|
// Sync a file range with disk.
|
|
|
|
// offset is the starting byte of the file range to be synchronized.
|
|
|
|
// nbytes specifies the length of the range to be synchronized.
|
|
|
|
// This asks the OS to initiate flushing the cached data to disk,
|
|
|
|
// without waiting for completion.
|
|
|
|
// Default implementation does nothing.
|
|
|
|
virtual Status RangeSync(uint64_t offset, uint64_t nbytes) { return Status::OK(); }
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
9 years ago
|
|
|
|
|
|
|
// PrepareWrite performs any necessary preparation for a write
|
|
|
|
// before the write actually occurs. This allows for pre-allocation
|
|
|
|
// of space on devices where it can result in less file
|
|
|
|
// fragmentation and/or less waste from over-zealous filesystem
|
|
|
|
// pre-allocation.
|
|
|
|
void PrepareWrite(size_t offset, size_t len) {
|
|
|
|
if (preallocation_block_size_ == 0) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
// If this write would cross one or more preallocation blocks,
|
|
|
|
// determine what the last preallocation block necesessary to
|
|
|
|
// cover this write would be and Allocate to that point.
|
|
|
|
const auto block_size = preallocation_block_size_;
|
|
|
|
size_t new_last_preallocated_block =
|
|
|
|
(offset + len + block_size - 1) / block_size;
|
|
|
|
if (new_last_preallocated_block > last_preallocated_block_) {
|
|
|
|
size_t num_spanned_blocks =
|
|
|
|
new_last_preallocated_block - last_preallocated_block_;
|
|
|
|
Allocate(block_size * last_preallocated_block_,
|
|
|
|
block_size * num_spanned_blocks);
|
|
|
|
last_preallocated_block_ = new_last_preallocated_block;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
9 years ago
|
|
|
protected:
|
|
|
|
/*
|
|
|
|
* Pre-allocate space for a file.
|
|
|
|
*/
|
|
|
|
virtual Status Allocate(uint64_t offset, uint64_t len) {
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t preallocation_block_size() { return preallocation_block_size_; }
|
|
|
|
|
|
|
|
private:
|
|
|
|
size_t last_preallocated_block_;
|
|
|
|
size_t preallocation_block_size_;
|
|
|
|
// No copying allowed
|
|
|
|
WritableFile(const WritableFile&);
|
|
|
|
void operator=(const WritableFile&);
|
|
|
|
|
|
|
|
protected:
|
|
|
|
friend class WritableFileWrapper;
|
|
|
|
friend class WritableFileMirror;
|
|
|
|
|
|
|
|
Env::IOPriority io_priority_;
|
|
|
|
};
|
|
|
|
|
|
|
|
// Directory object represents collection of files and implements
|
|
|
|
// filesystem operations that can be executed on directories.
|
|
|
|
class Directory {
|
|
|
|
public:
|
|
|
|
virtual ~Directory() {}
|
[wal changes 3/3] method in DB to sync WAL without blocking writers
Summary:
Subj. We really need this feature.
Previous diff D40899 has most of the changes to make this possible, this diff just adds the method.
Test Plan: `make check`, the new test fails without this diff; ran with ASAN, TSAN and valgrind.
Reviewers: igor, rven, IslamAbdelRahman, anthony, kradhakrishnan, tnovak, yhchiang, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, maykov, hermanlee4, yoshinorim, tnovak, dhruba
Differential Revision: https://reviews.facebook.net/D40905
9 years ago
|
|
|
// Fsync directory. Can be called concurrently from multiple threads.
|
|
|
|
virtual Status Fsync() = 0;
|
|
|
|
};
|
|
|
|
|
|
|
|
enum InfoLogLevel : unsigned char {
|
|
|
|
DEBUG_LEVEL = 0,
|
|
|
|
INFO_LEVEL,
|
|
|
|
WARN_LEVEL,
|
|
|
|
ERROR_LEVEL,
|
|
|
|
FATAL_LEVEL,
|
|
|
|
HEADER_LEVEL,
|
|
|
|
NUM_INFO_LOG_LEVELS,
|
|
|
|
};
|
|
|
|
|
|
|
|
// An interface for writing log messages.
|
|
|
|
class Logger {
|
|
|
|
public:
|
|
|
|
size_t kDoNotSupportGetLogFileSize = std::numeric_limits<size_t>::max();
|
|
|
|
|
|
|
|
explicit Logger(const InfoLogLevel log_level = InfoLogLevel::INFO_LEVEL)
|
|
|
|
: log_level_(log_level) {}
|
|
|
|
virtual ~Logger();
|
|
|
|
|
|
|
|
// Write a header to the log file with the specified format
|
|
|
|
// It is recommended that you log all header information at the start of the
|
|
|
|
// application. But it is not enforced.
|
|
|
|
virtual void LogHeader(const char* format, va_list ap) {
|
|
|
|
// Default implementation does a simple INFO level log write.
|
|
|
|
// Please override as per the logger class requirement.
|
|
|
|
Logv(format, ap);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Write an entry to the log file with the specified format.
|
|
|
|
virtual void Logv(const char* format, va_list ap) = 0;
|
|
|
|
|
|
|
|
// Write an entry to the log file with the specified log level
|
|
|
|
// and format. Any log with level under the internal log level
|
|
|
|
// of *this (see @SetInfoLogLevel and @GetInfoLogLevel) will not be
|
|
|
|
// printed.
|
|
|
|
virtual void Logv(const InfoLogLevel log_level, const char* format, va_list ap);
|
|
|
|
|
|
|
|
virtual size_t GetLogFileSize() const { return kDoNotSupportGetLogFileSize; }
|
|
|
|
// Flush to the OS buffers
|
|
|
|
virtual void Flush() {}
|
|
|
|
virtual InfoLogLevel GetInfoLogLevel() const { return log_level_; }
|
|
|
|
virtual void SetInfoLogLevel(const InfoLogLevel log_level) {
|
|
|
|
log_level_ = log_level;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
// No copying allowed
|
|
|
|
Logger(const Logger&);
|
|
|
|
void operator=(const Logger&);
|
|
|
|
InfoLogLevel log_level_;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// Identifies a locked file.
|
|
|
|
class FileLock {
|
|
|
|
public:
|
|
|
|
FileLock() { }
|
|
|
|
virtual ~FileLock();
|
|
|
|
private:
|
|
|
|
// No copying allowed
|
|
|
|
FileLock(const FileLock&);
|
|
|
|
void operator=(const FileLock&);
|
|
|
|
};
|
|
|
|
|
|
|
|
extern void LogFlush(const shared_ptr<Logger>& info_log);
|
|
|
|
|
|
|
|
extern void Log(const InfoLogLevel log_level,
|
|
|
|
const shared_ptr<Logger>& info_log, const char* format, ...);
|
|
|
|
|
|
|
|
// a set of log functions with different log levels.
|
|
|
|
extern void Header(const shared_ptr<Logger>& info_log, const char* format, ...);
|
|
|
|
extern void Debug(const shared_ptr<Logger>& info_log, const char* format, ...);
|
|
|
|
extern void Info(const shared_ptr<Logger>& info_log, const char* format, ...);
|
|
|
|
extern void Warn(const shared_ptr<Logger>& info_log, const char* format, ...);
|
|
|
|
extern void Error(const shared_ptr<Logger>& info_log, const char* format, ...);
|
|
|
|
extern void Fatal(const shared_ptr<Logger>& info_log, const char* format, ...);
|
|
|
|
|
|
|
|
// Log the specified data to *info_log if info_log is non-nullptr.
|
|
|
|
// The default info log level is InfoLogLevel::ERROR.
|
|
|
|
extern void Log(const shared_ptr<Logger>& info_log, const char* format, ...)
|
|
|
|
# if defined(__GNUC__) || defined(__clang__)
|
|
|
|
__attribute__((__format__ (__printf__, 2, 3)))
|
|
|
|
# endif
|
|
|
|
;
|
|
|
|
|
|
|
|
extern void LogFlush(Logger *info_log);
|
|
|
|
|
|
|
|
extern void Log(const InfoLogLevel log_level, Logger* info_log,
|
|
|
|
const char* format, ...);
|
|
|
|
|
|
|
|
// The default info log level is InfoLogLevel::ERROR.
|
|
|
|
extern void Log(Logger* info_log, const char* format, ...)
|
|
|
|
# if defined(__GNUC__) || defined(__clang__)
|
|
|
|
__attribute__((__format__ (__printf__, 2, 3)))
|
|
|
|
# endif
|
|
|
|
;
|
|
|
|
|
|
|
|
// a set of log functions with different log levels.
|
|
|
|
extern void Header(Logger* info_log, const char* format, ...);
|
|
|
|
extern void Debug(Logger* info_log, const char* format, ...);
|
|
|
|
extern void Info(Logger* info_log, const char* format, ...);
|
|
|
|
extern void Warn(Logger* info_log, const char* format, ...);
|
|
|
|
extern void Error(Logger* info_log, const char* format, ...);
|
|
|
|
extern void Fatal(Logger* info_log, const char* format, ...);
|
|
|
|
|
|
|
|
// A utility routine: write "data" to the named file.
|
|
|
|
extern Status WriteStringToFile(Env* env, const Slice& data,
|
|
|
|
const std::string& fname,
|
|
|
|
bool should_sync = false);
|
|
|
|
|
|
|
|
// A utility routine: read contents of named file into *data
|
|
|
|
extern Status ReadFileToString(Env* env, const std::string& fname,
|
|
|
|
std::string* data);
|
|
|
|
|
|
|
|
// An implementation of Env that forwards all calls to another Env.
|
|
|
|
// May be useful to clients who wish to override just part of the
|
|
|
|
// functionality of another Env.
|
|
|
|
class EnvWrapper : public Env {
|
|
|
|
public:
|
|
|
|
// Initialize an EnvWrapper that delegates all calls to *t
|
|
|
|
explicit EnvWrapper(Env* t) : target_(t) { }
|
|
|
|
virtual ~EnvWrapper();
|
|
|
|
|
|
|
|
// Return the target to which this Env forwards all calls
|
|
|
|
Env* target() const { return target_; }
|
|
|
|
|
|
|
|
// The following text is boilerplate that forwards all methods to target()
|
|
|
|
Status NewSequentialFile(const std::string& f, unique_ptr<SequentialFile>* r,
|
|
|
|
const EnvOptions& options) override {
|
|
|
|
return target_->NewSequentialFile(f, r, options);
|
|
|
|
}
|
|
|
|
Status NewRandomAccessFile(const std::string& f,
|
|
|
|
unique_ptr<RandomAccessFile>* r,
|
|
|
|
const EnvOptions& options) override {
|
|
|
|
return target_->NewRandomAccessFile(f, r, options);
|
|
|
|
}
|
|
|
|
Status NewWritableFile(const std::string& f, unique_ptr<WritableFile>* r,
|
|
|
|
const EnvOptions& options) override {
|
|
|
|
return target_->NewWritableFile(f, r, options);
|
|
|
|
}
|
|
|
|
Status ReuseWritableFile(const std::string& fname,
|
|
|
|
const std::string& old_fname,
|
|
|
|
unique_ptr<WritableFile>* r,
|
|
|
|
const EnvOptions& options) override {
|
|
|
|
return target_->ReuseWritableFile(fname, old_fname, r, options);
|
|
|
|
}
|
|
|
|
virtual Status NewDirectory(const std::string& name,
|
|
|
|
unique_ptr<Directory>* result) override {
|
|
|
|
return target_->NewDirectory(name, result);
|
|
|
|
}
|
|
|
|
Status FileExists(const std::string& f) override {
|
|
|
|
return target_->FileExists(f);
|
|
|
|
}
|
|
|
|
Status GetChildren(const std::string& dir,
|
|
|
|
std::vector<std::string>* r) override {
|
|
|
|
return target_->GetChildren(dir, r);
|
|
|
|
}
|
|
|
|
Status GetChildrenFileAttributes(
|
|
|
|
const std::string& dir, std::vector<FileAttributes>* result) override {
|
|
|
|
return target_->GetChildrenFileAttributes(dir, result);
|
|
|
|
}
|
|
|
|
Status DeleteFile(const std::string& f) override {
|
|
|
|
return target_->DeleteFile(f);
|
|
|
|
}
|
|
|
|
Status CreateDir(const std::string& d) override {
|
|
|
|
return target_->CreateDir(d);
|
|
|
|
}
|
|
|
|
Status CreateDirIfMissing(const std::string& d) override {
|
|
|
|
return target_->CreateDirIfMissing(d);
|
|
|
|
}
|
|
|
|
Status DeleteDir(const std::string& d) override {
|
|
|
|
return target_->DeleteDir(d);
|
|
|
|
}
|
|
|
|
Status GetFileSize(const std::string& f, uint64_t* s) override {
|
|
|
|
return target_->GetFileSize(f, s);
|
|
|
|
}
|
|
|
|
|
|
|
|
Status GetFileModificationTime(const std::string& fname,
|
|
|
|
uint64_t* file_mtime) override {
|
|
|
|
return target_->GetFileModificationTime(fname, file_mtime);
|
|
|
|
}
|
|
|
|
|
|
|
|
Status RenameFile(const std::string& s, const std::string& t) override {
|
|
|
|
return target_->RenameFile(s, t);
|
|
|
|
}
|
|
|
|
|
|
|
|
Status LinkFile(const std::string& s, const std::string& t) override {
|
|
|
|
return target_->LinkFile(s, t);
|
|
|
|
}
|
|
|
|
|
|
|
|
Status LockFile(const std::string& f, FileLock** l) override {
|
|
|
|
return target_->LockFile(f, l);
|
|
|
|
}
|
|
|
|
|
|
|
|
Status UnlockFile(FileLock* l) override { return target_->UnlockFile(l); }
|
|
|
|
|
|
|
|
void Schedule(void (*f)(void* arg), void* a, Priority pri,
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
9 years ago
|
|
|
void* tag = nullptr, void (*u)(void* arg) = 0) override {
|
|
|
|
return target_->Schedule(f, a, pri, tag, u);
|
|
|
|
}
|
|
|
|
|
|
|
|
int UnSchedule(void* tag, Priority pri) override {
|
|
|
|
return target_->UnSchedule(tag, pri);
|
|
|
|
}
|
|
|
|
|
|
|
|
void StartThread(void (*f)(void*), void* a) override {
|
|
|
|
return target_->StartThread(f, a);
|
|
|
|
}
|
|
|
|
void WaitForJoin() override { return target_->WaitForJoin(); }
|
|
|
|
virtual unsigned int GetThreadPoolQueueLen(
|
|
|
|
Priority pri = LOW) const override {
|
|
|
|
return target_->GetThreadPoolQueueLen(pri);
|
|
|
|
}
|
|
|
|
virtual Status GetTestDirectory(std::string* path) override {
|
|
|
|
return target_->GetTestDirectory(path);
|
|
|
|
}
|
|
|
|
virtual Status NewLogger(const std::string& fname,
|
|
|
|
shared_ptr<Logger>* result) override {
|
|
|
|
return target_->NewLogger(fname, result);
|
|
|
|
}
|
|
|
|
uint64_t NowMicros() override { return target_->NowMicros(); }
|
|
|
|
void SleepForMicroseconds(int micros) override {
|
|
|
|
target_->SleepForMicroseconds(micros);
|
|
|
|
}
|
|
|
|
Status GetHostName(char* name, uint64_t len) override {
|
|
|
|
return target_->GetHostName(name, len);
|
|
|
|
}
|
|
|
|
Status GetCurrentTime(int64_t* unix_time) override {
|
|
|
|
return target_->GetCurrentTime(unix_time);
|
|
|
|
}
|
|
|
|
Status GetAbsolutePath(const std::string& db_path,
|
|
|
|
std::string* output_path) override {
|
|
|
|
return target_->GetAbsolutePath(db_path, output_path);
|
|
|
|
}
|
|
|
|
void SetBackgroundThreads(int num, Priority pri) override {
|
|
|
|
return target_->SetBackgroundThreads(num, pri);
|
|
|
|
}
|
|
|
|
|
|
|
|
void IncBackgroundThreadsIfNeeded(int num, Priority pri) override {
|
|
|
|
return target_->IncBackgroundThreadsIfNeeded(num, pri);
|
|
|
|
}
|
|
|
|
|
|
|
|
void LowerThreadPoolIOPriority(Priority pool = LOW) override {
|
|
|
|
target_->LowerThreadPoolIOPriority(pool);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string TimeToString(uint64_t time) override {
|
|
|
|
return target_->TimeToString(time);
|
|
|
|
}
|
|
|
|
|
|
|
|
Status GetThreadList(std::vector<ThreadStatus>* thread_list) override {
|
|
|
|
return target_->GetThreadList(thread_list);
|
|
|
|
}
|
|
|
|
|
|
|
|
ThreadStatusUpdater* GetThreadStatusUpdater() const override {
|
|
|
|
return target_->GetThreadStatusUpdater();
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t GetThreadID() const override {
|
|
|
|
return target_->GetThreadID();
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
Env* target_;
|
|
|
|
};
|
|
|
|
|
|
|
|
// An implementation of WritableFile that forwards all calls to another
|
|
|
|
// WritableFile. May be useful to clients who wish to override just part of the
|
|
|
|
// functionality of another WritableFile.
|
|
|
|
// It's declared as friend of WritableFile to allow forwarding calls to
|
|
|
|
// protected virtual methods.
|
|
|
|
class WritableFileWrapper : public WritableFile {
|
|
|
|
public:
|
|
|
|
explicit WritableFileWrapper(WritableFile* t) : target_(t) { }
|
|
|
|
|
|
|
|
Status Append(const Slice& data) override { return target_->Append(data); }
|
|
|
|
Status PositionedAppend(const Slice& data, uint64_t offset) override {
|
|
|
|
return target_->PositionedAppend(data, offset);
|
|
|
|
}
|
|
|
|
Status Truncate(uint64_t size) override { return target_->Truncate(size); }
|
|
|
|
Status Close() override { return target_->Close(); }
|
|
|
|
Status Flush() override { return target_->Flush(); }
|
|
|
|
Status Sync() override { return target_->Sync(); }
|
|
|
|
Status Fsync() override { return target_->Fsync(); }
|
[wal changes 3/3] method in DB to sync WAL without blocking writers
Summary:
Subj. We really need this feature.
Previous diff D40899 has most of the changes to make this possible, this diff just adds the method.
Test Plan: `make check`, the new test fails without this diff; ran with ASAN, TSAN and valgrind.
Reviewers: igor, rven, IslamAbdelRahman, anthony, kradhakrishnan, tnovak, yhchiang, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, maykov, hermanlee4, yoshinorim, tnovak, dhruba
Differential Revision: https://reviews.facebook.net/D40905
9 years ago
|
|
|
bool IsSyncThreadSafe() const override { return target_->IsSyncThreadSafe(); }
|
|
|
|
void SetIOPriority(Env::IOPriority pri) override {
|
|
|
|
target_->SetIOPriority(pri);
|
|
|
|
}
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
9 years ago
|
|
|
Env::IOPriority GetIOPriority() override { return target_->GetIOPriority(); }
|
|
|
|
uint64_t GetFileSize() override { return target_->GetFileSize(); }
|
|
|
|
void GetPreallocationStatus(size_t* block_size,
|
|
|
|
size_t* last_allocated_block) override {
|
|
|
|
target_->GetPreallocationStatus(block_size, last_allocated_block);
|
|
|
|
}
|
|
|
|
size_t GetUniqueId(char* id, size_t max_size) const override {
|
|
|
|
return target_->GetUniqueId(id, max_size);
|
|
|
|
}
|
|
|
|
Status InvalidateCache(size_t offset, size_t length) override {
|
|
|
|
return target_->InvalidateCache(offset, length);
|
|
|
|
}
|
|
|
|
|
|
|
|
protected:
|
|
|
|
Status Allocate(uint64_t offset, uint64_t len) override {
|
|
|
|
return target_->Allocate(offset, len);
|
|
|
|
}
|
|
|
|
Status RangeSync(uint64_t offset, uint64_t nbytes) override {
|
|
|
|
return target_->RangeSync(offset, nbytes);
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
WritableFile* target_;
|
|
|
|
};
|
|
|
|
|
|
|
|
// Returns a new environment that stores its data in memory and delegates
|
|
|
|
// all non-file-storage tasks to base_env. The caller must delete the result
|
|
|
|
// when it is no longer needed.
|
|
|
|
// *base_env must remain live while the result is in use.
|
|
|
|
Env* NewMemEnv(Env* base_env);
|
|
|
|
|
|
|
|
// Returns a new environment that is used for HDFS environment.
|
|
|
|
// This is a factory method for HdfsEnv declared in hdfs/env_hdfs.h
|
|
|
|
Status NewHdfsEnv(Env** hdfs_env, const std::string& fsname);
|
|
|
|
|
|
|
|
} // namespace rocksdb
|
|
|
|
|
|
|
|
#endif // STORAGE_ROCKSDB_INCLUDE_ENV_H_
|