Make use of portable `uint64_t` type to make possible file access

in 64-bit.

  Currently, a signed off_t type is being used for the following
  interfaces for both offset and the length in bytes:
  * `Allocate`
  * `RangeSync`

  On Linux `off_t` is automatically either 32 or 64-bit depending on
  the platform. On Windows it is always a 32-bit signed long which
  limits file access and in particular space pre-allocation
  to effectively 2 Gb.

  Proposal is to replace off_t with uint64_t as a portable type
  always access files with 64-bit interfaces.

  May need to modify posix code but lack resources to test it.
main
Dmitri Smirnov 9 years ago
parent 75a8bad2ab
commit 5421c9728b
  1. 12
      include/rocksdb/env.h
  2. 10
      port/win/env_win.cc
  3. 4
      util/env_test.cc
  4. 2
      util/file_reader_writer.cc
  5. 2
      util/file_reader_writer.h
  6. 4
      util/file_reader_writer_test.cc
  7. 21
      util/io_posix.cc
  8. 6
      util/io_posix.h

@ -570,7 +570,7 @@ class WritableFile {
// This asks the OS to initiate flushing the cached data to disk,
// without waiting for completion.
// Default implementation does nothing.
virtual Status RangeSync(off_t offset, off_t nbytes) { return Status::OK(); }
virtual Status RangeSync(uint64_t offset, uint64_t nbytes) { return Status::OK(); }
// PrepareWrite performs any necessary preparation for a write
// before the write actually occurs. This allows for pre-allocation
@ -590,8 +590,8 @@ class WritableFile {
if (new_last_preallocated_block > last_preallocated_block_) {
size_t num_spanned_blocks =
new_last_preallocated_block - last_preallocated_block_;
Allocate(static_cast<off_t>(block_size * last_preallocated_block_),
static_cast<off_t>(block_size * num_spanned_blocks));
Allocate(block_size * last_preallocated_block_,
block_size * num_spanned_blocks);
last_preallocated_block_ = new_last_preallocated_block;
}
}
@ -600,7 +600,7 @@ class WritableFile {
/*
* Pre-allocate space for a file.
*/
virtual Status Allocate(off_t offset, off_t len) {
virtual Status Allocate(uint64_t offset, uint64_t len) {
return Status::OK();
}
@ -920,10 +920,10 @@ class WritableFileWrapper : public WritableFile {
}
protected:
Status Allocate(off_t offset, off_t len) override {
Status Allocate(uint64_t offset, uint64_t len) override {
return target_->Allocate(offset, len);
}
Status RangeSync(off_t offset, off_t nbytes) override {
Status RangeSync(uint64_t offset, uint64_t nbytes) override {
return target_->RangeSync(offset, nbytes);
}

@ -61,12 +61,6 @@ ThreadStatusUpdater* CreateThreadStatusUpdater() {
return new ThreadStatusUpdater();
}
// A wrapper for fadvise, if the platform doesn't support fadvise,
// it will simply return Status::NotSupport.
int Fadvise(int fd, off_t offset, size_t len, int advice) {
return 0; // simply do nothing.
}
inline Status IOErrorFromWindowsError(const std::string& context, DWORD err) {
return Status::IOError(context, GetWindowsErrSz(err));
}
@ -605,7 +599,7 @@ class WinMmapFile : public WritableFile {
return Status::OK();
}
virtual Status Allocate(off_t offset, off_t len) override {
virtual Status Allocate(uint64_t offset, uint64_t len) override {
return Status::OK();
}
};
@ -1053,7 +1047,7 @@ class WinWritableFile : public WritableFile {
return filesize_;
}
virtual Status Allocate(off_t offset, off_t len) override {
virtual Status Allocate(uint64_t offset, uint64_t len) override {
Status status;
TEST_KILL_RANDOM("WinWritableFile::Allocate", rocksdb_kill_odds);

@ -971,11 +971,11 @@ TEST_F(EnvPosixTest, WritableFileWrapper) {
}
protected:
Status Allocate(off_t offset, off_t len) override {
Status Allocate(uint64_t offset, uint64_t len) override {
inc(11);
return Status::OK();
}
Status RangeSync(off_t offset, off_t nbytes) override {
Status RangeSync(uint64_t offset, uint64_t nbytes) override {
inc(12);
return Status::OK();
}

@ -248,7 +248,7 @@ Status WritableFileWriter::SyncInternal(bool use_fsync) {
return s;
}
Status WritableFileWriter::RangeSync(off_t offset, off_t nbytes) {
Status WritableFileWriter::RangeSync(uint64_t offset, uint64_t nbytes) {
IOSTATS_TIMER_GUARD(range_sync_nanos);
TEST_SYNC_POINT("WritableFileWriter::RangeSync:0");
return writable_file_->RangeSync(offset, nbytes);

@ -162,7 +162,7 @@ class WritableFileWriter {
Status WriteUnbuffered();
// Normal write
Status WriteBuffered(const char* data, size_t size);
Status RangeSync(off_t offset, off_t nbytes);
Status RangeSync(uint64_t offset, uint64_t nbytes);
size_t RequestToken(size_t bytes, bool align);
Status SyncInternal(bool use_fsync);
};

@ -47,8 +47,8 @@ TEST_F(WritableFileWriterTest, RangeSync) {
}
protected:
Status Allocate(off_t offset, off_t len) override { return Status::OK(); }
Status RangeSync(off_t offset, off_t nbytes) override {
Status Allocate(uint64_t offset, uint64_t len) override { return Status::OK(); }
Status RangeSync(uint64_t offset, uint64_t nbytes) override {
EXPECT_EQ(offset % 4096, 0u);
EXPECT_EQ(nbytes % 4096, 0u);

@ -478,12 +478,15 @@ Status PosixMmapFile::InvalidateCache(size_t offset, size_t length) {
}
#ifdef ROCKSDB_FALLOCATE_PRESENT
Status PosixMmapFile::Allocate(off_t offset, off_t len) {
Status PosixMmapFile::Allocate(uint64_t offset, uint64_t len) {
assert(offset <= std::numeric_limits<off_t>::max());
assert(len <= std::numeric_limits<off_t>::max());
TEST_KILL_RANDOM("PosixMmapFile::Allocate:0", rocksdb_kill_odds);
int alloc_status = 0;
if (allow_fallocate_) {
alloc_status = fallocate(
fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, offset, len);
fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0,
static_cast<off_t>(offset), static_cast<off_t>(len));
}
if (alloc_status == 0) {
return Status::OK();
@ -606,13 +609,16 @@ Status PosixWritableFile::InvalidateCache(size_t offset, size_t length) {
}
#ifdef ROCKSDB_FALLOCATE_PRESENT
Status PosixWritableFile::Allocate(off_t offset, off_t len) {
Status PosixWritableFile::Allocate(uint64_t offset, uint64_t len) {
assert(offset <= std::numeric_limits<off_t>::max());
assert(len <= std::numeric_limits<off_t>::max());
TEST_KILL_RANDOM("PosixWritableFile::Allocate:0", rocksdb_kill_odds);
IOSTATS_TIMER_GUARD(allocate_nanos);
int alloc_status = 0;
if (allow_fallocate_) {
alloc_status = fallocate(
fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, offset, len);
fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0,
static_cast<off_t>(offset), static_cast<off_t>(len));
}
if (alloc_status == 0) {
return Status::OK();
@ -621,8 +627,11 @@ Status PosixWritableFile::Allocate(off_t offset, off_t len) {
}
}
Status PosixWritableFile::RangeSync(off_t offset, off_t nbytes) {
if (sync_file_range(fd_, offset, nbytes, SYNC_FILE_RANGE_WRITE) == 0) {
Status PosixWritableFile::RangeSync(uint64_t offset, uint64_t nbytes) {
assert(offset <= std::numeric_limits<off_t>::max());
assert(nbytes <= std::numeric_limits<off_t>::max());
if (sync_file_range(fd_, static_cast<off_t>(offset),
static_cast<off_t>(nbytes), SYNC_FILE_RANGE_WRITE) == 0) {
return Status::OK();
} else {
return IOError(filename_, errno);

@ -90,8 +90,8 @@ class PosixWritableFile : public WritableFile {
virtual uint64_t GetFileSize() override;
virtual Status InvalidateCache(size_t offset, size_t length) override;
#ifdef ROCKSDB_FALLOCATE_PRESENT
virtual Status Allocate(off_t offset, off_t len) override;
virtual Status RangeSync(off_t offset, off_t nbytes) override;
virtual Status Allocate(uint64_t offset, uint64_t len) override;
virtual Status RangeSync(uint64_t offset, uint64_t nbytes) override;
virtual size_t GetUniqueId(char* id, size_t max_size) const override;
#endif
};
@ -157,7 +157,7 @@ class PosixMmapFile : public WritableFile {
virtual uint64_t GetFileSize() override;
virtual Status InvalidateCache(size_t offset, size_t length) override;
#ifdef ROCKSDB_FALLOCATE_PRESENT
virtual Status Allocate(off_t offset, off_t len) override;
virtual Status Allocate(uint64_t offset, uint64_t len) override;
#endif
};

Loading…
Cancel
Save