//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
//  This source code is licensed under both the GPLv2 (found in the
//  COPYING file in the root directory) and Apache 2.0 License
//  (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.

#if defined(OS_WIN)

#include "port/win/io_win.h"

#include "env_win.h"
#include "monitoring/iostats_context_imp.h"
#include "test_util/sync_point.h"
#include "util/aligned_buffer.h"
#include "util/coding.h"

namespace ROCKSDB_NAMESPACE {
namespace port {

/*
 * DirectIOHelper
 */
namespace {

const size_t kSectorSize = 512;

inline bool IsPowerOfTwo(const size_t alignment) {
  return ((alignment) & (alignment - 1)) == 0;
}

inline bool IsAligned(size_t alignment, const void* ptr) {
  return ((uintptr_t(ptr)) & (alignment - 1)) == 0;
}
}  // namespace

std::string GetWindowsErrSz(DWORD err) {
  LPSTR lpMsgBuf;
  FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM |
                     FORMAT_MESSAGE_IGNORE_INSERTS,
                 NULL, err,
                 0,  // Default language
                 reinterpret_cast<LPSTR>(&lpMsgBuf), 0, NULL);

  std::string Err = lpMsgBuf;
  LocalFree(lpMsgBuf);
  return Err;
}

// We preserve the original name of this interface to denote the original idea
// behind it.
// All reads happen by a specified offset and pwrite interface does not change
// the position of the file pointer. Judging from the man page and errno it does
// execute
// lseek atomically to return the position of the file back where it was.
// WriteFile() does not
// have this capability. Therefore, for both pread and pwrite the pointer is
// advanced to the next position
// which is fine for writes because they are (should be) sequential.
// Because all the reads/writes happen by the specified offset, the caller in
// theory should not
// rely on the current file offset.
IOStatus pwrite(const WinFileData* file_data, const Slice& data,
                uint64_t offset, size_t& bytes_written) {
  IOStatus s;
  bytes_written = 0;

  size_t num_bytes = data.size();
  if (num_bytes > std::numeric_limits<DWORD>::max()) {
    // May happen in 64-bit builds where size_t is 64-bits but
    // long is still 32-bit, but that's the API here at the moment
    return IOStatus::InvalidArgument(
        "num_bytes is too large for a single write: " + file_data->GetName());
  }

  OVERLAPPED overlapped = {0};
  ULARGE_INTEGER offsetUnion;
  offsetUnion.QuadPart = offset;

  overlapped.Offset = offsetUnion.LowPart;
  overlapped.OffsetHigh = offsetUnion.HighPart;

  DWORD bytesWritten = 0;

  if (FALSE == WriteFile(file_data->GetFileHandle(), data.data(),
                         static_cast<DWORD>(num_bytes), &bytesWritten,
                         &overlapped)) {
    auto lastError = GetLastError();
    s = IOErrorFromWindowsError("WriteFile failed: " + file_data->GetName(),
                                lastError);
  } else {
    bytes_written = bytesWritten;
  }

  return s;
}

// See comments for pwrite above
IOStatus pread(const WinFileData* file_data, char* src, size_t num_bytes,
               uint64_t offset, size_t& bytes_read) {
  IOStatus s;
  bytes_read = 0;

  if (num_bytes > std::numeric_limits<DWORD>::max()) {
    return IOStatus::InvalidArgument(
        "num_bytes is too large for a single read: " + file_data->GetName());
  }

  OVERLAPPED overlapped = {0};
  ULARGE_INTEGER offsetUnion;
  offsetUnion.QuadPart = offset;

  overlapped.Offset = offsetUnion.LowPart;
  overlapped.OffsetHigh = offsetUnion.HighPart;

  DWORD bytesRead = 0;

  if (FALSE == ReadFile(file_data->GetFileHandle(), src,
                        static_cast<DWORD>(num_bytes), &bytesRead,
                        &overlapped)) {
    auto lastError = GetLastError();
    // EOF is OK with zero bytes read
    if (lastError != ERROR_HANDLE_EOF) {
      s = IOErrorFromWindowsError("ReadFile failed: " + file_data->GetName(),
                                  lastError);
    }
  } else {
    bytes_read = bytesRead;
  }

  return s;
}

// SetFileInformationByHandle() is capable of fast pre-allocates.
// However, this does not change the file end position unless the file is
// truncated and the pre-allocated space is not considered filled with zeros.
IOStatus fallocate(const std::string& filename, HANDLE hFile,
                   uint64_t to_size) {
  IOStatus status;

  FILE_ALLOCATION_INFO alloc_info;
  alloc_info.AllocationSize.QuadPart = to_size;

  if (!SetFileInformationByHandle(hFile, FileAllocationInfo, &alloc_info,
                                  sizeof(FILE_ALLOCATION_INFO))) {
    auto lastError = GetLastError();
    status = IOErrorFromWindowsError(
        "Failed to pre-allocate space: " + filename, lastError);
  }

  return status;
}

IOStatus ftruncate(const std::string& filename, HANDLE hFile, uint64_t toSize) {
  IOStatus status;

  FILE_END_OF_FILE_INFO end_of_file;
  end_of_file.EndOfFile.QuadPart = toSize;

  if (!SetFileInformationByHandle(hFile, FileEndOfFileInfo, &end_of_file,
                                  sizeof(FILE_END_OF_FILE_INFO))) {
    auto lastError = GetLastError();
    status = IOErrorFromWindowsError("Failed to Set end of file: " + filename,
                                     lastError);
  }

  return status;
}

size_t GetUniqueIdFromFile(HANDLE /*hFile*/, char* /*id*/,
                           size_t /*max_size*/) {
  // Returning 0 is safe as it causes the table reader to generate a unique ID.
  // This is suboptimal for performance as it prevents multiple table readers
  // for the same file from sharing cached blocks. For example, if users have
  // a low value for `max_open_files`, there can be many table readers opened
  // for the same file.
  //
  // TODO: this is a temporarily solution as it is safe but not optimal for
  // performance. For more details see discussion in
  // https://github.com/facebook/rocksdb/pull/5844.
  return 0;
}

WinFileData::WinFileData(const std::string& filename, HANDLE hFile,
                         bool direct_io)
    : filename_(filename),
      hFile_(hFile),
      use_direct_io_(direct_io),
      sector_size_(WinFileSystem::GetSectorSize(filename)) {}

bool WinFileData::IsSectorAligned(const size_t off) const {
  return (off & (sector_size_ - 1)) == 0;
}

////////////////////////////////////////////////////////////////////////////////////////////////////
// WinMmapReadableFile

WinMmapReadableFile::WinMmapReadableFile(const std::string& fileName,
                                         HANDLE hFile, HANDLE hMap,
                                         const void* mapped_region,
                                         size_t length)
    : WinFileData(fileName, hFile, false /* use_direct_io */),
      hMap_(hMap),
      mapped_region_(mapped_region),
      length_(length) {}

WinMmapReadableFile::~WinMmapReadableFile() {
  BOOL ret __attribute__((__unused__));
  ret = ::UnmapViewOfFile(mapped_region_);
  assert(ret);

  ret = ::CloseHandle(hMap_);
  assert(ret);
}

IOStatus WinMmapReadableFile::Read(uint64_t offset, size_t n,
                                   const IOOptions& /*options*/, Slice* result,
                                   char* scratch,
                                   IODebugContext* /*dbg*/) const {
  IOStatus s;

  if (offset > length_) {
    *result = Slice();
    return IOError(filename_, EINVAL);
  } else if (offset + n > length_) {
    n = length_ - static_cast<size_t>(offset);
  }
  *result = Slice(reinterpret_cast<const char*>(mapped_region_) + offset, n);
  return s;
}

IOStatus WinMmapReadableFile::InvalidateCache(size_t offset, size_t length) {
  return IOStatus::OK();
}

size_t WinMmapReadableFile::GetUniqueId(char* id, size_t max_size) const {
  return GetUniqueIdFromFile(hFile_, id, max_size);
}

///////////////////////////////////////////////////////////////////////////////
/// WinMmapFile

// Can only truncate or reserve to a sector size aligned if
// used on files that are opened with Unbuffered I/O
IOStatus WinMmapFile::TruncateFile(uint64_t toSize) {
  return ftruncate(filename_, hFile_, toSize);
}

IOStatus WinMmapFile::UnmapCurrentRegion() {
  IOStatus status;

  if (mapped_begin_ != nullptr) {
    if (!::UnmapViewOfFile(mapped_begin_)) {
      status = IOErrorFromWindowsError(
          "Failed to unmap file view: " + filename_, GetLastError());
    }

    // Move on to the next portion of the file
    file_offset_ += view_size_;

    // UnmapView automatically sends data to disk but not the metadata
    // which is good and provides some equivalent of fdatasync() on Linux
    // therefore, we donot need separate flag for metadata
    mapped_begin_ = nullptr;
    mapped_end_ = nullptr;
    dst_ = nullptr;

    last_sync_ = nullptr;
    pending_sync_ = false;
  }

  return status;
}

IOStatus WinMmapFile::MapNewRegion(const IOOptions& options,
                                   IODebugContext* dbg) {
  IOStatus status;

  assert(mapped_begin_ == nullptr);

  size_t minDiskSize = static_cast<size_t>(file_offset_) + view_size_;

  if (minDiskSize > reserved_size_) {
    status = Allocate(file_offset_, view_size_, options, dbg);
    if (!status.ok()) {
      return status;
    }
  }

  // Need to remap
  if (hMap_ == NULL || reserved_size_ > mapping_size_) {
    if (hMap_ != NULL) {
      // Unmap the previous one
      BOOL ret __attribute__((__unused__));
      ret = ::CloseHandle(hMap_);
      assert(ret);
      hMap_ = NULL;
    }

    ULARGE_INTEGER mappingSize;
    mappingSize.QuadPart = reserved_size_;

    hMap_ = CreateFileMappingA(
        hFile_,
        NULL,                  // Security attributes
        PAGE_READWRITE,        // There is not a write only mode for mapping
        mappingSize.HighPart,  // Enable mapping the whole file but the actual
        // amount mapped is determined by MapViewOfFile
        mappingSize.LowPart,
        NULL);  // Mapping name

    if (NULL == hMap_) {
      return IOErrorFromWindowsError(
          "WindowsMmapFile failed to create file mapping for: " + filename_,
          GetLastError());
    }

    mapping_size_ = reserved_size_;
  }

  ULARGE_INTEGER offset;
  offset.QuadPart = file_offset_;

  // View must begin at the granularity aligned offset
  mapped_begin_ = reinterpret_cast<char*>(
      MapViewOfFileEx(hMap_, FILE_MAP_WRITE, offset.HighPart, offset.LowPart,
                      view_size_, NULL));

  if (!mapped_begin_) {
    status = IOErrorFromWindowsError(
        "WindowsMmapFile failed to map file view: " + filename_,
        GetLastError());
  } else {
    mapped_end_ = mapped_begin_ + view_size_;
    dst_ = mapped_begin_;
    last_sync_ = mapped_begin_;
    pending_sync_ = false;
  }
  return status;
}

IOStatus WinMmapFile::PreallocateInternal(uint64_t spaceToReserve) {
  return fallocate(filename_, hFile_, spaceToReserve);
}

WinMmapFile::WinMmapFile(const std::string& fname, HANDLE hFile,
                         size_t page_size, size_t allocation_granularity,
                         const FileOptions& options)
    : WinFileData(fname, hFile, false),
      FSWritableFile(options),
      hMap_(NULL),
      page_size_(page_size),
      allocation_granularity_(allocation_granularity),
      reserved_size_(0),
      mapping_size_(0),
      view_size_(0),
      mapped_begin_(nullptr),
      mapped_end_(nullptr),
      dst_(nullptr),
      last_sync_(nullptr),
      file_offset_(0),
      pending_sync_(false) {
  // Allocation granularity must be obtained from GetSystemInfo() and must be
  // a power of two.
  assert(allocation_granularity > 0);
  assert((allocation_granularity & (allocation_granularity - 1)) == 0);

  assert(page_size > 0);
  assert((page_size & (page_size - 1)) == 0);

  // Only for memory mapped writes
  assert(options.use_mmap_writes);

  // View size must be both the multiple of allocation_granularity AND the
  // page size and the granularity is usually a multiple of a page size.
  const size_t viewSize =
      32 * 1024;  // 32Kb similar to the Windows File Cache in buffered mode
  view_size_ = Roundup(viewSize, allocation_granularity_);
}

WinMmapFile::~WinMmapFile() {
  if (hFile_) {
    this->Close(IOOptions(), nullptr);
  }
}

IOStatus WinMmapFile::Append(const Slice& data, const IOOptions& options,
                             IODebugContext* dbg) {
  const char* src = data.data();
  size_t left = data.size();

  while (left > 0) {
    assert(mapped_begin_ <= dst_);
    size_t avail = mapped_end_ - dst_;

    if (avail == 0) {
      IOStatus s = UnmapCurrentRegion();
      if (s.ok()) {
        s = MapNewRegion(options, dbg);
      }

      if (!s.ok()) {
        return s;
      }
    } else {
      size_t n = std::min(left, avail);
      memcpy(dst_, src, n);
      dst_ += n;
      src += n;
      left -= n;
      pending_sync_ = true;
    }
  }

  // Now make sure that the last partial page is padded with zeros if needed
  size_t bytesToPad = Roundup(size_t(dst_), page_size_) - size_t(dst_);
  if (bytesToPad > 0) {
    memset(dst_, 0, bytesToPad);
  }

  return IOStatus::OK();
}

// Means Close() will properly take care of truncate
// and it does not need any additional information
IOStatus WinMmapFile::Truncate(uint64_t size, const IOOptions& /*options*/,
                               IODebugContext* /*dbg*/) {
  return IOStatus::OK();
}

IOStatus WinMmapFile::Close(const IOOptions& options, IODebugContext* dbg) {
  IOStatus s;

  assert(NULL != hFile_);

  // We truncate to the precise size so no
  // uninitialized data at the end. SetEndOfFile
  // which we use does not write zeros and it is good.
  uint64_t targetSize = GetFileSize(options, dbg);

  if (mapped_begin_ != nullptr) {
    // Sync before unmapping to make sure everything
    // is on disk and there is not a lazy writing
    // so we are deterministic with the tests
    Sync(options, dbg);
    s = UnmapCurrentRegion();
  }

  if (NULL != hMap_) {
    BOOL ret = ::CloseHandle(hMap_);
    if (!ret && s.ok()) {
      auto lastError = GetLastError();
      s = IOErrorFromWindowsError(
          "Failed to Close mapping for file: " + filename_, lastError);
    }

    hMap_ = NULL;
  }

  if (hFile_ != NULL) {
    TruncateFile(targetSize);

    BOOL ret = ::CloseHandle(hFile_);
    hFile_ = NULL;

    if (!ret && s.ok()) {
      auto lastError = GetLastError();
      s = IOErrorFromWindowsError(
          "Failed to close file map handle: " + filename_, lastError);
    }
  }

  return s;
}

IOStatus WinMmapFile::Flush(const IOOptions& /*options*/,
                            IODebugContext* /*dbg*/) {
  return IOStatus::OK();
}

// Flush only data
IOStatus WinMmapFile::Sync(const IOOptions& /*options*/,
                           IODebugContext* /*dbg*/) {
  IOStatus s;

  // Some writes occurred since last sync
  if (dst_ > last_sync_) {
    assert(mapped_begin_);
    assert(dst_);
    assert(dst_ > mapped_begin_);
    assert(dst_ < mapped_end_);

    size_t page_begin =
        TruncateToPageBoundary(page_size_, last_sync_ - mapped_begin_);
    size_t page_end =
        TruncateToPageBoundary(page_size_, dst_ - mapped_begin_ - 1);

    // Flush only the amount of that is a multiple of pages
    if (!::FlushViewOfFile(mapped_begin_ + page_begin,
                           (page_end - page_begin) + page_size_)) {
      s = IOErrorFromWindowsError("Failed to FlushViewOfFile: " + filename_,
                                  GetLastError());
    } else {
      last_sync_ = dst_;
    }
  }

  return s;
}

/**
 * Flush data as well as metadata to stable storage.
 */
IOStatus WinMmapFile::Fsync(const IOOptions& options, IODebugContext* dbg) {
  IOStatus s = Sync(options, dbg);

  // Flush metadata
  if (s.ok() && pending_sync_) {
    if (!::FlushFileBuffers(hFile_)) {
      s = IOErrorFromWindowsError("Failed to FlushFileBuffers: " + filename_,
                                  GetLastError());
    }
    pending_sync_ = false;
  }

  return s;
}

/**
 * Get the size of valid data in the file. This will not match the
 * size that is returned from the filesystem because we use mmap
 * to extend file by map_size every time.
 */
uint64_t WinMmapFile::GetFileSize(const IOOptions& /*options*/,
                                  IODebugContext* /*dbg*/) {
  size_t used = dst_ - mapped_begin_;
  return file_offset_ + used;
}

IOStatus WinMmapFile::InvalidateCache(size_t offset, size_t length) {
  return IOStatus::OK();
}

IOStatus WinMmapFile::Allocate(uint64_t offset, uint64_t len,
                               const IOOptions& /*options*/,
                               IODebugContext* /*dbg*/) {
  IOStatus status;
  TEST_KILL_RANDOM("WinMmapFile::Allocate");

  // Make sure that we reserve an aligned amount of space
  // since the reservation block size is driven outside so we want
  // to check if we are ok with reservation here
  size_t spaceToReserve =
      Roundup(static_cast<size_t>(offset + len), view_size_);
  // Nothing to do
  if (spaceToReserve <= reserved_size_) {
    return status;
  }

  IOSTATS_TIMER_GUARD(allocate_nanos);
  status = PreallocateInternal(spaceToReserve);
  if (status.ok()) {
    reserved_size_ = spaceToReserve;
  }
  return status;
}

size_t WinMmapFile::GetUniqueId(char* id, size_t max_size) const {
  return GetUniqueIdFromFile(hFile_, id, max_size);
}

//////////////////////////////////////////////////////////////////////////////////
// WinSequentialFile

WinSequentialFile::WinSequentialFile(const std::string& fname, HANDLE f,
                                     const FileOptions& options)
    : WinFileData(fname, f, options.use_direct_reads) {}

WinSequentialFile::~WinSequentialFile() {
  assert(hFile_ != INVALID_HANDLE_VALUE);
}

IOStatus WinSequentialFile::Read(size_t n, const IOOptions& /*opts*/,
                                 Slice* result, char* scratch,
                                 IODebugContext* /*dbg*/) {
  IOStatus s;
  size_t r = 0;

  assert(result != nullptr);
  if (WinFileData::use_direct_io()) {
    return IOStatus::NotSupported("Read() does not support direct_io");
  }

  // Windows ReadFile API accepts a DWORD.
  // While it is possible to read in a loop if n is too big
  // it is an unlikely case.
  if (n > std::numeric_limits<DWORD>::max()) {
    return IOStatus::InvalidArgument("n is too big for a single ReadFile: " +
                                     filename_);
  }

  DWORD bytesToRead =
      static_cast<DWORD>(n);  // cast is safe due to the check above
  DWORD bytesRead = 0;
  BOOL ret = ReadFile(hFile_, scratch, bytesToRead, &bytesRead, NULL);
  if (ret != FALSE) {
    r = bytesRead;
  } else {
    auto lastError = GetLastError();
    if (lastError != ERROR_HANDLE_EOF) {
      s = IOErrorFromWindowsError("ReadFile failed: " + filename_, lastError);
    }
  }

  *result = Slice(scratch, r);
  return s;
}

IOStatus WinSequentialFile::PositionedReadInternal(char* src, size_t numBytes,
                                                   uint64_t offset,
                                                   size_t& bytes_read) const {
  return pread(this, src, numBytes, offset, bytes_read);
}

IOStatus WinSequentialFile::PositionedRead(uint64_t offset, size_t n,
                                           const IOOptions& /*opts*/,
                                           Slice* result, char* scratch,
                                           IODebugContext* /*dbg*/) {
  if (!WinFileData::use_direct_io()) {
    return IOStatus::NotSupported("This function is only used for direct_io");
  }

  assert(IsSectorAligned(static_cast<size_t>(offset)));
  assert(IsSectorAligned(static_cast<size_t>(n)));

  size_t bytes_read = 0;  // out param
  IOStatus s = PositionedReadInternal(scratch, static_cast<size_t>(n), offset,
                                      bytes_read);
  *result = Slice(scratch, bytes_read);
  return s;
}

IOStatus WinSequentialFile::Skip(uint64_t n) {
  // Can't handle more than signed max as SetFilePointerEx accepts a signed
  // 64-bit integer. As such it is a highly unlikley case to have n so large.
  if (n > static_cast<uint64_t>(std::numeric_limits<LONGLONG>::max())) {
    return IOStatus::InvalidArgument(
        "n is too large for a single SetFilePointerEx() call" + filename_);
  }

  LARGE_INTEGER li;
  li.QuadPart = static_cast<LONGLONG>(n);  // cast is safe due to the check
                                           // above
  BOOL ret = SetFilePointerEx(hFile_, li, NULL, FILE_CURRENT);
  if (ret == FALSE) {
    auto lastError = GetLastError();
    return IOErrorFromWindowsError("Skip SetFilePointerEx():" + filename_,
                                   lastError);
  }
  return IOStatus::OK();
}

IOStatus WinSequentialFile::InvalidateCache(size_t offset, size_t length) {
  return IOStatus::OK();
}

//////////////////////////////////////////////////////////////////////////////////////////////////
/// WinRandomAccessBase

inline IOStatus WinRandomAccessImpl::PositionedReadInternal(
    char* src, size_t numBytes, uint64_t offset, size_t& bytes_read) const {
  return pread(file_base_, src, numBytes, offset, bytes_read);
}

inline WinRandomAccessImpl::WinRandomAccessImpl(WinFileData* file_base,
                                                size_t alignment,
                                                const FileOptions& options)
    : file_base_(file_base),
      alignment_(std::max(alignment, file_base->GetSectorSize())) {
  assert(!options.use_mmap_reads);
}

inline IOStatus WinRandomAccessImpl::ReadImpl(uint64_t offset, size_t n,
                                              Slice* result,
                                              char* scratch) const {
  // Check buffer alignment
  if (file_base_->use_direct_io()) {
    assert(file_base_->IsSectorAligned(static_cast<size_t>(offset)));
    assert(IsAligned(alignment_, scratch));
  }

  if (n == 0) {
    *result = Slice(scratch, 0);
    return IOStatus::OK();
  }

  size_t bytes_read = 0;
  IOStatus s = PositionedReadInternal(scratch, n, offset, bytes_read);
  *result = Slice(scratch, bytes_read);
  return s;
}

///////////////////////////////////////////////////////////////////////////////////////////////////
/// WinRandomAccessFile

WinRandomAccessFile::WinRandomAccessFile(const std::string& fname, HANDLE hFile,
                                         size_t alignment,
                                         const FileOptions& options)
    : WinFileData(fname, hFile, options.use_direct_reads),
      WinRandomAccessImpl(this, alignment, options) {}

WinRandomAccessFile::~WinRandomAccessFile() {}

IOStatus WinRandomAccessFile::Read(uint64_t offset, size_t n,
                                   const IOOptions& /*options*/, Slice* result,
                                   char* scratch,
                                   IODebugContext* /*dbg*/) const {
  return ReadImpl(offset, n, result, scratch);
}

IOStatus WinRandomAccessFile::InvalidateCache(size_t offset, size_t length) {
  return IOStatus::OK();
}

size_t WinRandomAccessFile::GetUniqueId(char* id, size_t max_size) const {
  return GetUniqueIdFromFile(GetFileHandle(), id, max_size);
}

size_t WinRandomAccessFile::GetRequiredBufferAlignment() const {
  return GetAlignment();
}

/////////////////////////////////////////////////////////////////////////////
// WinWritableImpl
//

inline IOStatus WinWritableImpl::PreallocateInternal(uint64_t spaceToReserve) {
  return fallocate(file_data_->GetName(), file_data_->GetFileHandle(),
                   spaceToReserve);
}

inline WinWritableImpl::WinWritableImpl(WinFileData* file_data,
                                        size_t alignment)
    : file_data_(file_data),
      alignment_(std::max(alignment, file_data->GetSectorSize())),
      next_write_offset_(0),
      reservedsize_(0) {
  // Query current position in case ReopenWritableFile is called
  // This position is only important for buffered writes
  // for unbuffered writes we explicitely specify the position.
  LARGE_INTEGER zero_move;
  zero_move.QuadPart = 0;  // Do not move
  LARGE_INTEGER pos;
  pos.QuadPart = 0;
  BOOL ret = SetFilePointerEx(file_data_->GetFileHandle(), zero_move, &pos,
                              FILE_CURRENT);
  // Querying no supped to fail
  if (ret != 0) {
    next_write_offset_ = pos.QuadPart;
  } else {
    assert(false);
  }
}

inline IOStatus WinWritableImpl::AppendImpl(const Slice& data) {
  IOStatus s;

  if (data.size() > std::numeric_limits<DWORD>::max()) {
    return IOStatus::InvalidArgument("data is too long for a single write" +
                                     file_data_->GetName());
  }

  size_t bytes_written = 0;  // out param

  if (file_data_->use_direct_io()) {
    // With no offset specified we are appending
    // to the end of the file
    assert(file_data_->IsSectorAligned(next_write_offset_));
    assert(file_data_->IsSectorAligned(data.size()));
    assert(IsAligned(static_cast<size_t>(GetAlignment()), data.data()));
    s = pwrite(file_data_, data, next_write_offset_, bytes_written);
  } else {
    DWORD bytesWritten = 0;
    if (!WriteFile(file_data_->GetFileHandle(), data.data(),
                   static_cast<DWORD>(data.size()), &bytesWritten, NULL)) {
      auto lastError = GetLastError();
      s = IOErrorFromWindowsError(
          "Failed to WriteFile: " + file_data_->GetName(), lastError);
    } else {
      bytes_written = bytesWritten;
    }
  }

  if (s.ok()) {
    if (bytes_written == data.size()) {
      // This matters for direct_io cases where
      // we rely on the fact that next_write_offset_
      // is sector aligned
      next_write_offset_ += bytes_written;
    } else {
      s = IOStatus::IOError("Failed to write all bytes: " +
                            file_data_->GetName());
    }
  }

  return s;
}

inline IOStatus WinWritableImpl::PositionedAppendImpl(const Slice& data,
                                                      uint64_t offset) {
  if (file_data_->use_direct_io()) {
    assert(file_data_->IsSectorAligned(static_cast<size_t>(offset)));
    assert(file_data_->IsSectorAligned(data.size()));
    assert(IsAligned(static_cast<size_t>(GetAlignment()), data.data()));
  }

  size_t bytes_written = 0;
  IOStatus s = pwrite(file_data_, data, offset, bytes_written);

  if (s.ok()) {
    if (bytes_written == data.size()) {
      // For sequential write this would be simple
      // size extension by data.size()
      uint64_t write_end = offset + bytes_written;
      if (write_end >= next_write_offset_) {
        next_write_offset_ = write_end;
      }
    } else {
      s = IOStatus::IOError("Failed to write all of the requested data: " +
                            file_data_->GetName());
    }
  }
  return s;
}

inline IOStatus WinWritableImpl::TruncateImpl(uint64_t size) {
  // It is tempting to check for the size for sector alignment
  // but truncation may come at the end and there is not a requirement
  // for this to be sector aligned so long as we do not attempt to write
  // after that. The interface docs state that the behavior is undefined
  // in that case.
  IOStatus s =
      ftruncate(file_data_->GetName(), file_data_->GetFileHandle(), size);

  if (s.ok()) {
    next_write_offset_ = size;
  }
  return s;
}

inline IOStatus WinWritableImpl::CloseImpl() {
  IOStatus s;

  auto hFile = file_data_->GetFileHandle();
  assert(INVALID_HANDLE_VALUE != hFile);

  if (!::FlushFileBuffers(hFile)) {
    auto lastError = GetLastError();
    s = IOErrorFromWindowsError(
        "FlushFileBuffers failed at Close() for: " + file_data_->GetName(),
        lastError);
  }

  if (!file_data_->CloseFile() && s.ok()) {
    auto lastError = GetLastError();
    s = IOErrorFromWindowsError(
        "CloseHandle failed for: " + file_data_->GetName(), lastError);
  }
  return s;
}

inline IOStatus WinWritableImpl::SyncImpl(const IOOptions& /*options*/,
                                          IODebugContext* /*dbg*/) {
  IOStatus s;
  if (!::FlushFileBuffers(file_data_->GetFileHandle())) {
    auto lastError = GetLastError();
    s = IOErrorFromWindowsError(
        "FlushFileBuffers failed at Sync() for: " + file_data_->GetName(),
        lastError);
  }
  return s;
}

inline IOStatus WinWritableImpl::AllocateImpl(uint64_t offset, uint64_t len) {
  IOStatus status;
  TEST_KILL_RANDOM("WinWritableFile::Allocate");

  // Make sure that we reserve an aligned amount of space
  // since the reservation block size is driven outside so we want
  // to check if we are ok with reservation here
  size_t spaceToReserve = Roundup(static_cast<size_t>(offset + len),
                                  static_cast<size_t>(alignment_));
  // Nothing to do
  if (spaceToReserve <= reservedsize_) {
    return status;
  }

  IOSTATS_TIMER_GUARD(allocate_nanos);
  status = PreallocateInternal(spaceToReserve);
  if (status.ok()) {
    reservedsize_ = spaceToReserve;
  }
  return status;
}

////////////////////////////////////////////////////////////////////////////////
/// WinWritableFile

WinWritableFile::WinWritableFile(const std::string& fname, HANDLE hFile,
                                 size_t alignment, size_t /* capacity */,
                                 const FileOptions& options)
    : WinFileData(fname, hFile, options.use_direct_writes),
      WinWritableImpl(this, alignment),
      FSWritableFile(options) {
  assert(!options.use_mmap_writes);
}

WinWritableFile::~WinWritableFile() {}

// Indicates if the class makes use of direct I/O
bool WinWritableFile::use_direct_io() const {
  return WinFileData::use_direct_io();
}

size_t WinWritableFile::GetRequiredBufferAlignment() const {
  return static_cast<size_t>(GetAlignment());
}

IOStatus WinWritableFile::Append(const Slice& data,
                                 const IOOptions& /*options*/,
                                 IODebugContext* /*dbg*/) {
  return AppendImpl(data);
}

IOStatus WinWritableFile::PositionedAppend(const Slice& data, uint64_t offset,
                                           const IOOptions& /*options*/,
                                           IODebugContext* /*dbg*/) {
  return PositionedAppendImpl(data, offset);
}

// Need to implement this so the file is truncated correctly
// when buffered and unbuffered mode
IOStatus WinWritableFile::Truncate(uint64_t size, const IOOptions& /*options*/,
                                   IODebugContext* /*dbg*/) {
  return TruncateImpl(size);
}

IOStatus WinWritableFile::Close(const IOOptions& /*options*/,
                                IODebugContext* /*dbg*/) {
  return CloseImpl();
}

// write out the cached data to the OS cache
// This is now taken care of the WritableFileWriter
IOStatus WinWritableFile::Flush(const IOOptions& /*options*/,
                                IODebugContext* /*dbg*/) {
  return IOStatus::OK();
}

IOStatus WinWritableFile::Sync(const IOOptions& options, IODebugContext* dbg) {
  return SyncImpl(options, dbg);
}

IOStatus WinWritableFile::Fsync(const IOOptions& options, IODebugContext* dbg) {
  return SyncImpl(options, dbg);
}

bool WinWritableFile::IsSyncThreadSafe() const { return true; }

uint64_t WinWritableFile::GetFileSize(const IOOptions& /*options*/,
                                      IODebugContext* /*dbg*/) {
  return GetFileNextWriteOffset();
}

IOStatus WinWritableFile::Allocate(uint64_t offset, uint64_t len,
                                   const IOOptions& /*options*/,
                                   IODebugContext* /*dbg*/) {
  return AllocateImpl(offset, len);
}

size_t WinWritableFile::GetUniqueId(char* id, size_t max_size) const {
  return GetUniqueIdFromFile(GetFileHandle(), id, max_size);
}

/////////////////////////////////////////////////////////////////////////
/// WinRandomRWFile

WinRandomRWFile::WinRandomRWFile(const std::string& fname, HANDLE hFile,
                                 size_t alignment, const FileOptions& options)
    : WinFileData(fname, hFile,
                  options.use_direct_reads && options.use_direct_writes),
      WinRandomAccessImpl(this, alignment, options),
      WinWritableImpl(this, alignment) {}

bool WinRandomRWFile::use_direct_io() const {
  return WinFileData::use_direct_io();
}

size_t WinRandomRWFile::GetRequiredBufferAlignment() const {
  assert(WinRandomAccessImpl::GetAlignment() ==
         WinWritableImpl::GetAlignment());
  return static_cast<size_t>(WinRandomAccessImpl::GetAlignment());
}

IOStatus WinRandomRWFile::Write(uint64_t offset, const Slice& data,
                                const IOOptions& /*options*/,
                                IODebugContext* /*dbg*/) {
  return PositionedAppendImpl(data, offset);
}

IOStatus WinRandomRWFile::Read(uint64_t offset, size_t n,
                               const IOOptions& /*options*/, Slice* result,
                               char* scratch, IODebugContext* /*dbg*/) const {
  return ReadImpl(offset, n, result, scratch);
}

IOStatus WinRandomRWFile::Flush(const IOOptions& /*options*/,
                                IODebugContext* /*dbg*/) {
  return IOStatus::OK();
}

IOStatus WinRandomRWFile::Sync(const IOOptions& options, IODebugContext* dbg) {
  return SyncImpl(options, dbg);
}

IOStatus WinRandomRWFile::Close(const IOOptions& /*options*/,
                                IODebugContext* /*dbg*/) {
  return CloseImpl();
}

//////////////////////////////////////////////////////////////////////////
/// WinMemoryMappedBufer
WinMemoryMappedBuffer::~WinMemoryMappedBuffer() {
  BOOL ret
#if defined(_MSC_VER)
      = FALSE;
#else
      __attribute__((__unused__));
#endif
  if (base_ != nullptr) {
    ret = ::UnmapViewOfFile(base_);
    assert(ret);
    base_ = nullptr;
  }
  if (map_handle_ != NULL && map_handle_ != INVALID_HANDLE_VALUE) {
    ret = ::CloseHandle(map_handle_);
    assert(ret);
    map_handle_ = NULL;
  }
  if (file_handle_ != NULL && file_handle_ != INVALID_HANDLE_VALUE) {
    ret = ::CloseHandle(file_handle_);
    assert(ret);
    file_handle_ = NULL;
  }
}

//////////////////////////////////////////////////////////////////////////
/// WinDirectory

IOStatus WinDirectory::Fsync(const IOOptions& /*options*/,
                             IODebugContext* /*dbg*/) {
  return IOStatus::OK();
}

size_t WinDirectory::GetUniqueId(char* id, size_t max_size) const {
  return GetUniqueIdFromFile(handle_, id, max_size);
}
//////////////////////////////////////////////////////////////////////////
/// WinFileLock

WinFileLock::~WinFileLock() {
  BOOL ret __attribute__((__unused__));
  ret = ::CloseHandle(hFile_);
  assert(ret);
}

}  // namespace port
}  // namespace ROCKSDB_NAMESPACE

#endif