Make use of portable `uint64_t` type to make possible file access

in 64-bit.

  Currently, a signed off_t type is being used for the following
  interfaces for both offset and the length in bytes:
  * `Allocate`
  * `RangeSync`

  On Linux `off_t` is automatically either 32 or 64-bit depending on
  the platform. On Windows it is always a 32-bit signed long which
  limits file access and in particular space pre-allocation
  to effectively 2 Gb.

  Proposal is to replace off_t with uint64_t as a portable type
  always access files with 64-bit interfaces.

  May need to modify posix code but lack resources to test it.
main
Dmitri Smirnov 9 years ago
parent 75a8bad2ab
commit 5421c9728b
  1. 12
      include/rocksdb/env.h
  2. 10
      port/win/env_win.cc
  3. 4
      util/env_test.cc
  4. 2
      util/file_reader_writer.cc
  5. 2
      util/file_reader_writer.h
  6. 4
      util/file_reader_writer_test.cc
  7. 21
      util/io_posix.cc
  8. 6
      util/io_posix.h

@ -570,7 +570,7 @@ class WritableFile {
// This asks the OS to initiate flushing the cached data to disk, // This asks the OS to initiate flushing the cached data to disk,
// without waiting for completion. // without waiting for completion.
// Default implementation does nothing. // Default implementation does nothing.
virtual Status RangeSync(off_t offset, off_t nbytes) { return Status::OK(); } virtual Status RangeSync(uint64_t offset, uint64_t nbytes) { return Status::OK(); }
// PrepareWrite performs any necessary preparation for a write // PrepareWrite performs any necessary preparation for a write
// before the write actually occurs. This allows for pre-allocation // before the write actually occurs. This allows for pre-allocation
@ -590,8 +590,8 @@ class WritableFile {
if (new_last_preallocated_block > last_preallocated_block_) { if (new_last_preallocated_block > last_preallocated_block_) {
size_t num_spanned_blocks = size_t num_spanned_blocks =
new_last_preallocated_block - last_preallocated_block_; new_last_preallocated_block - last_preallocated_block_;
Allocate(static_cast<off_t>(block_size * last_preallocated_block_), Allocate(block_size * last_preallocated_block_,
static_cast<off_t>(block_size * num_spanned_blocks)); block_size * num_spanned_blocks);
last_preallocated_block_ = new_last_preallocated_block; last_preallocated_block_ = new_last_preallocated_block;
} }
} }
@ -600,7 +600,7 @@ class WritableFile {
/* /*
* Pre-allocate space for a file. * Pre-allocate space for a file.
*/ */
virtual Status Allocate(off_t offset, off_t len) { virtual Status Allocate(uint64_t offset, uint64_t len) {
return Status::OK(); return Status::OK();
} }
@ -920,10 +920,10 @@ class WritableFileWrapper : public WritableFile {
} }
protected: protected:
Status Allocate(off_t offset, off_t len) override { Status Allocate(uint64_t offset, uint64_t len) override {
return target_->Allocate(offset, len); return target_->Allocate(offset, len);
} }
Status RangeSync(off_t offset, off_t nbytes) override { Status RangeSync(uint64_t offset, uint64_t nbytes) override {
return target_->RangeSync(offset, nbytes); return target_->RangeSync(offset, nbytes);
} }

@ -61,12 +61,6 @@ ThreadStatusUpdater* CreateThreadStatusUpdater() {
return new ThreadStatusUpdater(); return new ThreadStatusUpdater();
} }
// A wrapper for fadvise, if the platform doesn't support fadvise,
// it will simply return Status::NotSupport.
int Fadvise(int fd, off_t offset, size_t len, int advice) {
return 0; // simply do nothing.
}
inline Status IOErrorFromWindowsError(const std::string& context, DWORD err) { inline Status IOErrorFromWindowsError(const std::string& context, DWORD err) {
return Status::IOError(context, GetWindowsErrSz(err)); return Status::IOError(context, GetWindowsErrSz(err));
} }
@ -605,7 +599,7 @@ class WinMmapFile : public WritableFile {
return Status::OK(); return Status::OK();
} }
virtual Status Allocate(off_t offset, off_t len) override { virtual Status Allocate(uint64_t offset, uint64_t len) override {
return Status::OK(); return Status::OK();
} }
}; };
@ -1053,7 +1047,7 @@ class WinWritableFile : public WritableFile {
return filesize_; return filesize_;
} }
virtual Status Allocate(off_t offset, off_t len) override { virtual Status Allocate(uint64_t offset, uint64_t len) override {
Status status; Status status;
TEST_KILL_RANDOM("WinWritableFile::Allocate", rocksdb_kill_odds); TEST_KILL_RANDOM("WinWritableFile::Allocate", rocksdb_kill_odds);

@ -971,11 +971,11 @@ TEST_F(EnvPosixTest, WritableFileWrapper) {
} }
protected: protected:
Status Allocate(off_t offset, off_t len) override { Status Allocate(uint64_t offset, uint64_t len) override {
inc(11); inc(11);
return Status::OK(); return Status::OK();
} }
Status RangeSync(off_t offset, off_t nbytes) override { Status RangeSync(uint64_t offset, uint64_t nbytes) override {
inc(12); inc(12);
return Status::OK(); return Status::OK();
} }

@ -248,7 +248,7 @@ Status WritableFileWriter::SyncInternal(bool use_fsync) {
return s; return s;
} }
Status WritableFileWriter::RangeSync(off_t offset, off_t nbytes) { Status WritableFileWriter::RangeSync(uint64_t offset, uint64_t nbytes) {
IOSTATS_TIMER_GUARD(range_sync_nanos); IOSTATS_TIMER_GUARD(range_sync_nanos);
TEST_SYNC_POINT("WritableFileWriter::RangeSync:0"); TEST_SYNC_POINT("WritableFileWriter::RangeSync:0");
return writable_file_->RangeSync(offset, nbytes); return writable_file_->RangeSync(offset, nbytes);

@ -162,7 +162,7 @@ class WritableFileWriter {
Status WriteUnbuffered(); Status WriteUnbuffered();
// Normal write // Normal write
Status WriteBuffered(const char* data, size_t size); Status WriteBuffered(const char* data, size_t size);
Status RangeSync(off_t offset, off_t nbytes); Status RangeSync(uint64_t offset, uint64_t nbytes);
size_t RequestToken(size_t bytes, bool align); size_t RequestToken(size_t bytes, bool align);
Status SyncInternal(bool use_fsync); Status SyncInternal(bool use_fsync);
}; };

@ -47,8 +47,8 @@ TEST_F(WritableFileWriterTest, RangeSync) {
} }
protected: protected:
Status Allocate(off_t offset, off_t len) override { return Status::OK(); } Status Allocate(uint64_t offset, uint64_t len) override { return Status::OK(); }
Status RangeSync(off_t offset, off_t nbytes) override { Status RangeSync(uint64_t offset, uint64_t nbytes) override {
EXPECT_EQ(offset % 4096, 0u); EXPECT_EQ(offset % 4096, 0u);
EXPECT_EQ(nbytes % 4096, 0u); EXPECT_EQ(nbytes % 4096, 0u);

@ -478,12 +478,15 @@ Status PosixMmapFile::InvalidateCache(size_t offset, size_t length) {
} }
#ifdef ROCKSDB_FALLOCATE_PRESENT #ifdef ROCKSDB_FALLOCATE_PRESENT
Status PosixMmapFile::Allocate(off_t offset, off_t len) { Status PosixMmapFile::Allocate(uint64_t offset, uint64_t len) {
assert(offset <= std::numeric_limits<off_t>::max());
assert(len <= std::numeric_limits<off_t>::max());
TEST_KILL_RANDOM("PosixMmapFile::Allocate:0", rocksdb_kill_odds); TEST_KILL_RANDOM("PosixMmapFile::Allocate:0", rocksdb_kill_odds);
int alloc_status = 0; int alloc_status = 0;
if (allow_fallocate_) { if (allow_fallocate_) {
alloc_status = fallocate( alloc_status = fallocate(
fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, offset, len); fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0,
static_cast<off_t>(offset), static_cast<off_t>(len));
} }
if (alloc_status == 0) { if (alloc_status == 0) {
return Status::OK(); return Status::OK();
@ -606,13 +609,16 @@ Status PosixWritableFile::InvalidateCache(size_t offset, size_t length) {
} }
#ifdef ROCKSDB_FALLOCATE_PRESENT #ifdef ROCKSDB_FALLOCATE_PRESENT
Status PosixWritableFile::Allocate(off_t offset, off_t len) { Status PosixWritableFile::Allocate(uint64_t offset, uint64_t len) {
assert(offset <= std::numeric_limits<off_t>::max());
assert(len <= std::numeric_limits<off_t>::max());
TEST_KILL_RANDOM("PosixWritableFile::Allocate:0", rocksdb_kill_odds); TEST_KILL_RANDOM("PosixWritableFile::Allocate:0", rocksdb_kill_odds);
IOSTATS_TIMER_GUARD(allocate_nanos); IOSTATS_TIMER_GUARD(allocate_nanos);
int alloc_status = 0; int alloc_status = 0;
if (allow_fallocate_) { if (allow_fallocate_) {
alloc_status = fallocate( alloc_status = fallocate(
fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, offset, len); fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0,
static_cast<off_t>(offset), static_cast<off_t>(len));
} }
if (alloc_status == 0) { if (alloc_status == 0) {
return Status::OK(); return Status::OK();
@ -621,8 +627,11 @@ Status PosixWritableFile::Allocate(off_t offset, off_t len) {
} }
} }
Status PosixWritableFile::RangeSync(off_t offset, off_t nbytes) { Status PosixWritableFile::RangeSync(uint64_t offset, uint64_t nbytes) {
if (sync_file_range(fd_, offset, nbytes, SYNC_FILE_RANGE_WRITE) == 0) { assert(offset <= std::numeric_limits<off_t>::max());
assert(nbytes <= std::numeric_limits<off_t>::max());
if (sync_file_range(fd_, static_cast<off_t>(offset),
static_cast<off_t>(nbytes), SYNC_FILE_RANGE_WRITE) == 0) {
return Status::OK(); return Status::OK();
} else { } else {
return IOError(filename_, errno); return IOError(filename_, errno);

@ -90,8 +90,8 @@ class PosixWritableFile : public WritableFile {
virtual uint64_t GetFileSize() override; virtual uint64_t GetFileSize() override;
virtual Status InvalidateCache(size_t offset, size_t length) override; virtual Status InvalidateCache(size_t offset, size_t length) override;
#ifdef ROCKSDB_FALLOCATE_PRESENT #ifdef ROCKSDB_FALLOCATE_PRESENT
virtual Status Allocate(off_t offset, off_t len) override; virtual Status Allocate(uint64_t offset, uint64_t len) override;
virtual Status RangeSync(off_t offset, off_t nbytes) override; virtual Status RangeSync(uint64_t offset, uint64_t nbytes) override;
virtual size_t GetUniqueId(char* id, size_t max_size) const override; virtual size_t GetUniqueId(char* id, size_t max_size) const override;
#endif #endif
}; };
@ -157,7 +157,7 @@ class PosixMmapFile : public WritableFile {
virtual uint64_t GetFileSize() override; virtual uint64_t GetFileSize() override;
virtual Status InvalidateCache(size_t offset, size_t length) override; virtual Status InvalidateCache(size_t offset, size_t length) override;
#ifdef ROCKSDB_FALLOCATE_PRESENT #ifdef ROCKSDB_FALLOCATE_PRESENT
virtual Status Allocate(off_t offset, off_t len) override; virtual Status Allocate(uint64_t offset, uint64_t len) override;
#endif #endif
}; };

Loading…
Cancel
Save