|  |  |  | @ -6,6 +6,7 @@ | 
			
		
	
		
			
				
					|  |  |  |  | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 | 
			
		
	
		
			
				
					|  |  |  |  | // Use of this source code is governed by a BSD-style license that can be
 | 
			
		
	
		
			
				
					|  |  |  |  | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | #pragma once | 
			
		
	
		
			
				
					|  |  |  |  | #include <atomic> | 
			
		
	
		
			
				
					|  |  |  |  | #include <sstream> | 
			
		
	
	
		
			
				
					|  |  |  | @ -22,9 +23,22 @@ namespace rocksdb { | 
			
		
	
		
			
				
					|  |  |  |  | class Statistics; | 
			
		
	
		
			
				
					|  |  |  |  | class HistogramImpl; | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | // This file provides the following main abstractions:
 | 
			
		
	
		
			
				
					|  |  |  |  | // SequentialFileReader : wrapper over Env::SequentialFile
 | 
			
		
	
		
			
				
					|  |  |  |  | // RandomAccessFileReader : wrapper over Env::RandomAccessFile
 | 
			
		
	
		
			
				
					|  |  |  |  | // WritableFileWriter : wrapper over Env::WritableFile
 | 
			
		
	
		
			
				
					|  |  |  |  | // In addition, it also exposed NewReadaheadRandomAccessFile, NewWritableFile,
 | 
			
		
	
		
			
				
					|  |  |  |  | // and ReadOneLine primitives.
 | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | // NewReadaheadRandomAccessFile provides a wrapper over RandomAccessFile to
 | 
			
		
	
		
			
				
					|  |  |  |  | // always prefetch additional data with every read. This is mainly used in
 | 
			
		
	
		
			
				
					|  |  |  |  | // Compaction Table Readers.
 | 
			
		
	
		
			
				
					|  |  |  |  | std::unique_ptr<RandomAccessFile> NewReadaheadRandomAccessFile( | 
			
		
	
		
			
				
					|  |  |  |  |   std::unique_ptr<RandomAccessFile>&& file, size_t readahead_size); | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | // SequentialFileReader is a wrapper on top of Env::SequentialFile. It handles
 | 
			
		
	
		
			
				
					|  |  |  |  | // Buffered (i.e when page cache is enabled) and Direct (with O_DIRECT / page
 | 
			
		
	
		
			
				
					|  |  |  |  | // cache disabled) reads appropriately, and also updates the IO stats.
 | 
			
		
	
		
			
				
					|  |  |  |  | class SequentialFileReader { | 
			
		
	
		
			
				
					|  |  |  |  |  private: | 
			
		
	
		
			
				
					|  |  |  |  |   std::unique_ptr<SequentialFile> file_; | 
			
		
	
	
		
			
				
					|  |  |  | @ -61,6 +75,12 @@ class SequentialFileReader { | 
			
		
	
		
			
				
					|  |  |  |  |   bool use_direct_io() const { return file_->use_direct_io(); } | 
			
		
	
		
			
				
					|  |  |  |  | }; | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | // RandomAccessFileReader is a wrapper on top of Env::RnadomAccessFile. It is
 | 
			
		
	
		
			
				
					|  |  |  |  | // responsible for:
 | 
			
		
	
		
			
				
					|  |  |  |  | // - Handling Buffered and Direct reads appropriately.
 | 
			
		
	
		
			
				
					|  |  |  |  | // - Rate limiting compaction reads.
 | 
			
		
	
		
			
				
					|  |  |  |  | // - Notifying any interested listeners on the completion of a read.
 | 
			
		
	
		
			
				
					|  |  |  |  | // - Updating IO stats.
 | 
			
		
	
		
			
				
					|  |  |  |  | class RandomAccessFileReader { | 
			
		
	
		
			
				
					|  |  |  |  |  private: | 
			
		
	
		
			
				
					|  |  |  |  | #ifndef ROCKSDB_LITE | 
			
		
	
	
		
			
				
					|  |  |  | @ -151,7 +171,13 @@ class RandomAccessFileReader { | 
			
		
	
		
			
				
					|  |  |  |  |   bool use_direct_io() const { return file_->use_direct_io(); } | 
			
		
	
		
			
				
					|  |  |  |  | }; | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | // Use posix write to write data to a file.
 | 
			
		
	
		
			
				
					|  |  |  |  | // WritableFileWriter is a wrapper on top of Env::WritableFile. It provides
 | 
			
		
	
		
			
				
					|  |  |  |  | // facilities to:
 | 
			
		
	
		
			
				
					|  |  |  |  | // - Handle Buffered and Direct writes.
 | 
			
		
	
		
			
				
					|  |  |  |  | // - Rate limit writes.
 | 
			
		
	
		
			
				
					|  |  |  |  | // - Flush and Sync the data to the underlying filesystem.
 | 
			
		
	
		
			
				
					|  |  |  |  | // - Notify any interested listeners on the completion of a write.
 | 
			
		
	
		
			
				
					|  |  |  |  | // - Update IO stats.
 | 
			
		
	
		
			
				
					|  |  |  |  | class WritableFileWriter { | 
			
		
	
		
			
				
					|  |  |  |  |  private: | 
			
		
	
		
			
				
					|  |  |  |  | #ifndef ROCKSDB_LITE | 
			
		
	
	
		
			
				
					|  |  |  | @ -277,13 +303,31 @@ class WritableFileWriter { | 
			
		
	
		
			
				
					|  |  |  |  |   Status SyncInternal(bool use_fsync); | 
			
		
	
		
			
				
					|  |  |  |  | }; | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | // FilePrefetchBuffer can automatically do the readahead if file_reader,
 | 
			
		
	
		
			
				
					|  |  |  |  | // readahead_size, and max_readahead_size are passed in.
 | 
			
		
	
		
			
				
					|  |  |  |  | // max_readahead_size should be greater than or equal to readahead_size.
 | 
			
		
	
		
			
				
					|  |  |  |  | // readahead_size will be doubled on every IO, until max_readahead_size.
 | 
			
		
	
		
			
				
					|  |  |  |  | // FilePrefetchBuffer is a smart buffer to store and read data from a file.
 | 
			
		
	
		
			
				
					|  |  |  |  | class FilePrefetchBuffer { | 
			
		
	
		
			
				
					|  |  |  |  |  public: | 
			
		
	
		
			
				
					|  |  |  |  |   // If `track_min_offset` is true, track minimum offset ever read.
 | 
			
		
	
		
			
				
					|  |  |  |  |   // Constructor.
 | 
			
		
	
		
			
				
					|  |  |  |  |   //
 | 
			
		
	
		
			
				
					|  |  |  |  |   // All arguments are optional.
 | 
			
		
	
		
			
				
					|  |  |  |  |   // file_reader        : the file reader to use. Can be a nullptr.
 | 
			
		
	
		
			
				
					|  |  |  |  |   // readahead_size     : the initial readahead size.
 | 
			
		
	
		
			
				
					|  |  |  |  |   // max_readahead_size : the maximum readahead size.
 | 
			
		
	
		
			
				
					|  |  |  |  |   //   If max_readahead_size > readahead_size, the readahead size will be
 | 
			
		
	
		
			
				
					|  |  |  |  |   //   doubled on every IO until max_readahead_size is hit.
 | 
			
		
	
		
			
				
					|  |  |  |  |   //   Typically this is set as a multiple of readahead_size.
 | 
			
		
	
		
			
				
					|  |  |  |  |   //   max_readahead_size should be greater than equal to readahead_size.
 | 
			
		
	
		
			
				
					|  |  |  |  |   // enable : controls whether reading from the buffer is enabled.
 | 
			
		
	
		
			
				
					|  |  |  |  |   //   If false, TryReadFromCache() always return false, and we only take stats
 | 
			
		
	
		
			
				
					|  |  |  |  |   //   for the minimum offset if track_min_offset = true.
 | 
			
		
	
		
			
				
					|  |  |  |  |   // track_min_offset : Track the minimum offset ever read and collect stats on
 | 
			
		
	
		
			
				
					|  |  |  |  |   //   it. Used for adaptable readahead of the file footer/metadata.
 | 
			
		
	
		
			
				
					|  |  |  |  |   //
 | 
			
		
	
		
			
				
					|  |  |  |  |   // Automatic readhead is enabled for a file if file_reader, readahead_size,
 | 
			
		
	
		
			
				
					|  |  |  |  |   // and max_readahead_size are passed in.
 | 
			
		
	
		
			
				
					|  |  |  |  |   // If file_reader is a nullptr, setting readadhead_size and max_readahead_size
 | 
			
		
	
		
			
				
					|  |  |  |  |   // does not make any sense. So it does nothing.
 | 
			
		
	
		
			
				
					|  |  |  |  |   // A user can construct a FilePrefetchBuffer without any arguments, but use
 | 
			
		
	
		
			
				
					|  |  |  |  |   // `Prefetch` to load data into the buffer.
 | 
			
		
	
		
			
				
					|  |  |  |  |   FilePrefetchBuffer(RandomAccessFileReader* file_reader = nullptr, | 
			
		
	
		
			
				
					|  |  |  |  |                      size_t readadhead_size = 0, size_t max_readahead_size = 0, | 
			
		
	
		
			
				
					|  |  |  |  |                      bool enable = true, bool track_min_offset = false) | 
			
		
	
	
		
			
				
					|  |  |  | @ -294,11 +338,26 @@ class FilePrefetchBuffer { | 
			
		
	
		
			
				
					|  |  |  |  |         min_offset_read_(port::kMaxSizet), | 
			
		
	
		
			
				
					|  |  |  |  |         enable_(enable), | 
			
		
	
		
			
				
					|  |  |  |  |         track_min_offset_(track_min_offset) {} | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |   // Load data into the buffer from a file.
 | 
			
		
	
		
			
				
					|  |  |  |  |   // reader : the file reader.
 | 
			
		
	
		
			
				
					|  |  |  |  |   // offset : the file offset to start reading from.
 | 
			
		
	
		
			
				
					|  |  |  |  |   // n      : the number of bytes to read.
 | 
			
		
	
		
			
				
					|  |  |  |  |   Status Prefetch(RandomAccessFileReader* reader, uint64_t offset, size_t n); | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |   // Tries returning the data for a file raed from this buffer, if that data is
 | 
			
		
	
		
			
				
					|  |  |  |  |   // in the buffer.
 | 
			
		
	
		
			
				
					|  |  |  |  |   // It handles tracking the minimum read offset if track_min_offset = true.
 | 
			
		
	
		
			
				
					|  |  |  |  |   // It also does the exponential readahead when readadhead_size is set as part
 | 
			
		
	
		
			
				
					|  |  |  |  |   // of the constructor.
 | 
			
		
	
		
			
				
					|  |  |  |  |   //
 | 
			
		
	
		
			
				
					|  |  |  |  |   // offset : the file offset.
 | 
			
		
	
		
			
				
					|  |  |  |  |   // n      : the number of bytes.
 | 
			
		
	
		
			
				
					|  |  |  |  |   // result : output buffer to put the data into.
 | 
			
		
	
		
			
				
					|  |  |  |  |   bool TryReadFromCache(uint64_t offset, size_t n, Slice* result); | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |   // The minimum `offset` ever passed to TryReadFromCache(). Only be tracked
 | 
			
		
	
		
			
				
					|  |  |  |  |   // if track_min_offset = true.
 | 
			
		
	
		
			
				
					|  |  |  |  |   // The minimum `offset` ever passed to TryReadFromCache(). This will nly be
 | 
			
		
	
		
			
				
					|  |  |  |  |   // tracked if track_min_offset = true.
 | 
			
		
	
		
			
				
					|  |  |  |  |   size_t min_offset_read() const { return min_offset_read_; } | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |  private: | 
			
		
	
	
		
			
				
					|  |  |  | @ -317,9 +376,17 @@ class FilePrefetchBuffer { | 
			
		
	
		
			
				
					|  |  |  |  |   bool track_min_offset_; | 
			
		
	
		
			
				
					|  |  |  |  | }; | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | // Returns a WritableFile.
 | 
			
		
	
		
			
				
					|  |  |  |  | //
 | 
			
		
	
		
			
				
					|  |  |  |  | // env     : the Env.
 | 
			
		
	
		
			
				
					|  |  |  |  | // fname   : the file name.
 | 
			
		
	
		
			
				
					|  |  |  |  | // result  : output arg. A WritableFile based on `fname` returned.
 | 
			
		
	
		
			
				
					|  |  |  |  | // options : the Env Options.
 | 
			
		
	
		
			
				
					|  |  |  |  | extern Status NewWritableFile(Env* env, const std::string& fname, | 
			
		
	
		
			
				
					|  |  |  |  |                               std::unique_ptr<WritableFile>* result, | 
			
		
	
		
			
				
					|  |  |  |  |                               const EnvOptions& options); | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | // Read a single line from a file.
 | 
			
		
	
		
			
				
					|  |  |  |  | bool ReadOneLine(std::istringstream* iss, SequentialFile* seq_file, | 
			
		
	
		
			
				
					|  |  |  |  |                  std::string* output, bool* has_data, Status* result); | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
	
		
			
				
					|  |  |  | 
 |