diff --git a/CMakeLists.txt b/CMakeLists.txt index 615457c0a..35d427dfe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -659,8 +659,12 @@ if(WIN32) port/win/env_default.cc port/win/port_win.cc port/win/win_logger.cc - port/win/win_thread.cc + port/win/win_thread.cc) + +if(WITH_XPRESS) + list(APPEND SOURCES port/win/xpress_win.cc) +endif() if(WITH_JEMALLOC) list(APPEND SOURCES diff --git a/port/win/io_win.cc b/port/win/io_win.cc index 549cc3a86..9321438d7 100644 --- a/port/win/io_win.cc +++ b/port/win/io_win.cc @@ -30,7 +30,7 @@ bool IsPowerOfTwo(const size_t alignment) { } inline -bool IsSectorAligned(const size_t off) { +bool IsSectorAligned(const size_t off) { return (off & (kSectorSize - 1)) == 0; } @@ -67,9 +67,20 @@ std::string GetWindowsErrSz(DWORD err) { // Because all the reads/writes happen by the specified offset, the caller in // theory should not // rely on the current file offset. -SSIZE_T pwrite(HANDLE hFile, const char* src, size_t numBytes, - uint64_t offset) { - assert(numBytes <= std::numeric_limits::max()); +Status pwrite(const WinFileData* file_data, const Slice& data, + uint64_t offset, size_t& bytes_written) { + + Status s; + bytes_written = 0; + + size_t num_bytes = data.size(); + if (num_bytes > std::numeric_limits::max()) { + // May happen in 64-bit builds where size_t is 64-bits but + // long is still 32-bit, but that's the API here at the moment + return Status::InvalidArgument("num_bytes is too large for a single write: " + + file_data->GetName()); + } + OVERLAPPED overlapped = { 0 }; ULARGE_INTEGER offsetUnion; offsetUnion.QuadPart = offset; @@ -77,23 +88,32 @@ SSIZE_T pwrite(HANDLE hFile, const char* src, size_t numBytes, overlapped.Offset = offsetUnion.LowPart; overlapped.OffsetHigh = offsetUnion.HighPart; - SSIZE_T result = 0; - - unsigned long bytesWritten = 0; + DWORD bytesWritten = 0; - if (FALSE == WriteFile(hFile, src, static_cast(numBytes), &bytesWritten, - &overlapped)) { - result = -1; + if (FALSE == WriteFile(file_data->GetFileHandle(), data.data(), static_cast(num_bytes), + &bytesWritten, &overlapped)) { + auto lastError = GetLastError(); + s = IOErrorFromWindowsError("WriteFile failed: " + file_data->GetName(), + lastError); } else { - result = bytesWritten; + bytes_written = bytesWritten; } - return result; + return s; } // See comments for pwrite above -SSIZE_T pread(HANDLE hFile, char* src, size_t numBytes, uint64_t offset) { - assert(numBytes <= std::numeric_limits::max()); +Status pread(const WinFileData* file_data, char* src, size_t num_bytes, + uint64_t offset, size_t& bytes_read) { + + Status s; + bytes_read = 0; + + if (num_bytes > std::numeric_limits::max()) { + return Status::InvalidArgument("num_bytes is too large for a single read: " + + file_data->GetName()); + } + OVERLAPPED overlapped = { 0 }; ULARGE_INTEGER offsetUnion; offsetUnion.QuadPart = offset; @@ -101,18 +121,21 @@ SSIZE_T pread(HANDLE hFile, char* src, size_t numBytes, uint64_t offset) { overlapped.Offset = offsetUnion.LowPart; overlapped.OffsetHigh = offsetUnion.HighPart; - SSIZE_T result = 0; - - unsigned long bytesRead = 0; + DWORD bytesRead = 0; - if (FALSE == ReadFile(hFile, src, static_cast(numBytes), &bytesRead, - &overlapped)) { - return -1; + if (FALSE == ReadFile(file_data->GetFileHandle(), src, static_cast(num_bytes), + &bytesRead, &overlapped)) { + auto lastError = GetLastError(); + // EOF is OK with zero bytes read + if (lastError != ERROR_HANDLE_EOF) { + s = IOErrorFromWindowsError("ReadFile failed: " + file_data->GetName(), + lastError); + } } else { - result = bytesRead; + bytes_read = bytesRead; } - return result; + return s; } // SetFileInformationByHandle() is capable of fast pre-allocates. @@ -586,34 +609,42 @@ WinSequentialFile::~WinSequentialFile() { } Status WinSequentialFile::Read(size_t n, Slice* result, char* scratch) { - assert(result != nullptr && !WinFileData::use_direct_io()); Status s; size_t r = 0; + assert(result != nullptr); + if (WinFileData::use_direct_io()) { + return Status::NotSupported("Read() does not support direct_io"); + } + // Windows ReadFile API accepts a DWORD. - // While it is possible to read in a loop if n is > UINT_MAX - // it is a highly unlikely case. - if (n > UINT_MAX) { - return IOErrorFromWindowsError(filename_, ERROR_INVALID_PARAMETER); + // While it is possible to read in a loop if n is too big + // it is an unlikely case. + if (n > std::numeric_limits::max()) { + return Status::InvalidArgument("n is too big for a single ReadFile: " + + filename_); } DWORD bytesToRead = static_cast(n); //cast is safe due to the check above DWORD bytesRead = 0; BOOL ret = ReadFile(hFile_, scratch, bytesToRead, &bytesRead, NULL); - if (ret == TRUE) { + if (ret != FALSE) { r = bytesRead; } else { - return IOErrorFromWindowsError(filename_, GetLastError()); + auto lastError = GetLastError(); + if (lastError != ERROR_HANDLE_EOF) { + s = IOErrorFromWindowsError("ReadFile failed: " + filename_, + lastError); + } } *result = Slice(scratch, r); - return s; } -SSIZE_T WinSequentialFile::PositionedReadInternal(char* src, size_t numBytes, - uint64_t offset) const { - return pread(GetFileHandle(), src, numBytes, offset); +Status WinSequentialFile::PositionedReadInternal(char* src, size_t numBytes, + uint64_t offset, size_t& bytes_read) const { + return pread(this, src, numBytes, offset, bytes_read); } Status WinSequentialFile::PositionedRead(uint64_t offset, size_t n, Slice* result, @@ -621,27 +652,19 @@ Status WinSequentialFile::PositionedRead(uint64_t offset, size_t n, Slice* resul Status s; - assert(WinFileData::use_direct_io()); - - // Windows ReadFile API accepts a DWORD. - // While it is possible to read in a loop if n is > UINT_MAX - // it is a highly unlikely case. - if (n > UINT_MAX) { - return IOErrorFromWindowsError(GetName(), ERROR_INVALID_PARAMETER); + if (!WinFileData::use_direct_io()) { + return Status::NotSupported("This function is only used for direct_io"); } - auto r = PositionedReadInternal(scratch, n, offset); - - if (r < 0) { - auto lastError = GetLastError(); - // Posix impl wants to treat reads from beyond - // of the file as OK. - if (lastError != ERROR_HANDLE_EOF) { - s = IOErrorFromWindowsError(GetName(), lastError); - } + if (!IsSectorAligned(offset) || + !IsSectorAligned(n)) { + return Status::InvalidArgument( + "WinSequentialFile::PositionedRead: offset is not properly aligned"); } - *result = Slice(scratch, (r < 0) ? 0 : size_t(r)); + size_t bytes_read = 0; // out param + s = PositionedReadInternal(scratch, n, offset, bytes_read); + *result = Slice(scratch, bytes_read); return s; } @@ -649,15 +672,18 @@ Status WinSequentialFile::PositionedRead(uint64_t offset, size_t n, Slice* resul Status WinSequentialFile::Skip(uint64_t n) { // Can't handle more than signed max as SetFilePointerEx accepts a signed 64-bit // integer. As such it is a highly unlikley case to have n so large. - if (n > _I64_MAX) { - return IOErrorFromWindowsError(filename_, ERROR_INVALID_PARAMETER); + if (n > static_cast(std::numeric_limits::max())) { + return Status::InvalidArgument("n is too large for a single SetFilePointerEx() call" + + filename_); } LARGE_INTEGER li; - li.QuadPart = static_cast(n); //cast is safe due to the check above + li.QuadPart = static_cast(n); //cast is safe due to the check above BOOL ret = SetFilePointerEx(hFile_, li, NULL, FILE_CURRENT); if (ret == FALSE) { - return IOErrorFromWindowsError(filename_, GetLastError()); + auto lastError = GetLastError(); + return IOErrorFromWindowsError("Skip SetFilePointerEx():" + filename_, + lastError); } return Status::OK(); } @@ -670,10 +696,11 @@ Status WinSequentialFile::InvalidateCache(size_t offset, size_t length) { /// WinRandomAccessBase inline -SSIZE_T WinRandomAccessImpl::PositionedReadInternal(char* src, +Status WinRandomAccessImpl::PositionedReadInternal(char* src, size_t numBytes, - uint64_t offset) const { - return pread(file_base_->GetFileHandle(), src, numBytes, offset); + uint64_t offset, + size_t& bytes_read) const { + return pread(file_base_, src, numBytes, offset, bytes_read); } inline @@ -694,8 +721,10 @@ Status WinRandomAccessImpl::ReadImpl(uint64_t offset, size_t n, Slice* result, // Check buffer alignment if (file_base_->use_direct_io()) { - if (!IsAligned(alignment_, scratch)) { - return Status::InvalidArgument("WinRandomAccessImpl::ReadImpl: scratch is not properly aligned"); + if (!IsSectorAligned(offset) || + !IsAligned(alignment_, scratch)) { + return Status::InvalidArgument( + "WinRandomAccessImpl::ReadImpl: offset or scratch is not properly aligned"); } } @@ -704,22 +733,9 @@ Status WinRandomAccessImpl::ReadImpl(uint64_t offset, size_t n, Slice* result, return s; } - size_t left = n; - - SSIZE_T r = PositionedReadInternal(scratch, left, offset); - if (r > 0) { - left -= r; - } else if (r < 0) { - auto lastError = GetLastError(); - // Posix impl wants to treat reads from beyond - // of the file as OK. - if(lastError != ERROR_HANDLE_EOF) { - s = IOErrorFromWindowsError(file_base_->GetName(), lastError); - } - } - - *result = Slice(scratch, (r < 0) ? 0 : n - left); - + size_t bytes_read = 0; + s = PositionedReadInternal(scratch, n, offset, bytes_read); + *result = Slice(scratch, bytes_read); return s; } @@ -778,7 +794,7 @@ WinWritableImpl::WinWritableImpl(WinFileData* file_data, size_t alignment) BOOL ret = SetFilePointerEx(file_data_->GetFileHandle(), zero_move, &pos, FILE_CURRENT); // Querying no supped to fail - if (ret) { + if (ret != 0) { next_write_offset_ = pos.QuadPart; } else { assert(false); @@ -790,31 +806,24 @@ Status WinWritableImpl::AppendImpl(const Slice& data) { Status s; - assert(data.size() < std::numeric_limits::max()); + if (data.size() > std::numeric_limits::max()) { + return Status::InvalidArgument("data is too long for a single write" + + file_data_->GetName()); + } - uint64_t written = 0; - (void)written; + size_t bytes_written = 0; // out param if (file_data_->use_direct_io()) { - // With no offset specified we are appending // to the end of the file - assert(IsSectorAligned(next_write_offset_)); - assert(IsSectorAligned(data.size())); - assert(IsAligned(GetAlignement(), data.data())); - - SSIZE_T ret = pwrite(file_data_->GetFileHandle(), data.data(), - data.size(), next_write_offset_); - - if (ret < 0) { - auto lastError = GetLastError(); - s = IOErrorFromWindowsError( - "Failed to pwrite for: " + file_data_->GetName(), lastError); + if (!IsSectorAligned(data.size()) || + !IsAligned(GetAlignement(), data.data())) { + s = Status::InvalidArgument( + "WriteData must be page aligned, size must be sector aligned"); } else { - written = ret; + s = pwrite(file_data_, data, next_write_offset_, bytes_written); } - } else { DWORD bytesWritten = 0; @@ -824,15 +833,21 @@ Status WinWritableImpl::AppendImpl(const Slice& data) { s = IOErrorFromWindowsError( "Failed to WriteFile: " + file_data_->GetName(), lastError); - } - else { - written = bytesWritten; + } else { + bytes_written = bytesWritten; } } if(s.ok()) { - assert(written == data.size()); - next_write_offset_ += data.size(); + if (bytes_written == data.size()) { + // This matters for direct_io cases where + // we rely on the fact that next_write_offset_ + // is sector aligned + next_write_offset_ += bytes_written; + } else { + s = Status::IOError("Failed to write all bytes: " + + file_data_->GetName()); + } } return s; @@ -842,38 +857,44 @@ inline Status WinWritableImpl::PositionedAppendImpl(const Slice& data, uint64_t offset) { if(file_data_->use_direct_io()) { - assert(IsSectorAligned(offset)); - assert(IsSectorAligned(data.size())); - assert(IsAligned(GetAlignement(), data.data())); + if (!IsSectorAligned(offset) || + !IsSectorAligned(data.size()) || + !IsAligned(GetAlignement(), data.data())) { + return Status::InvalidArgument( + "Data and offset must be page aligned, size must be sector aligned"); + } } - Status s; - - SSIZE_T ret = pwrite(file_data_->GetFileHandle(), data.data(), data.size(), offset); + size_t bytes_written = 0; + Status s = pwrite(file_data_, data, offset, bytes_written); - // Error break - if (ret < 0) { - auto lastError = GetLastError(); - s = IOErrorFromWindowsError( - "Failed to pwrite for: " + file_data_->GetName(), lastError); - } else { - assert(size_t(ret) == data.size()); - // For sequential write this would be simple - // size extension by data.size() - uint64_t write_end = offset + data.size(); - if (write_end >= next_write_offset_) { - next_write_offset_ = write_end; + if(s.ok()) { + if (bytes_written == data.size()) { + // For sequential write this would be simple + // size extension by data.size() + uint64_t write_end = offset + bytes_written; + if (write_end >= next_write_offset_) { + next_write_offset_ = write_end; + } + } else { + s = Status::IOError("Failed to write all of the requested data: " + + file_data_->GetName()); } } return s; } -// Need to implement this so the file is truncated correctly -// when buffered and unbuffered mode inline Status WinWritableImpl::TruncateImpl(uint64_t size) { + + // It is tempting to check for the size for sector alignment + // but truncation may come at the end and there is not a requirement + // for this to be sector aligned so long as we do not attempt to write + // after that. The interface docs state that the behavior is undefined + // in that case. Status s = ftruncate(file_data_->GetName(), file_data_->GetFileHandle(), size); + if (s.ok()) { next_write_offset_ = size; } @@ -888,14 +909,14 @@ Status WinWritableImpl::CloseImpl() { auto hFile = file_data_->GetFileHandle(); assert(INVALID_HANDLE_VALUE != hFile); - if (fsync(hFile) < 0) { + if (!::FlushFileBuffers(hFile)) { auto lastError = GetLastError(); - s = IOErrorFromWindowsError("fsync failed at Close() for: " + + s = IOErrorFromWindowsError("FlushFileBuffers failed at Close() for: " + file_data_->GetName(), lastError); } - if(!file_data_->CloseFile()) { + if(!file_data_->CloseFile() && s.ok()) { auto lastError = GetLastError(); s = IOErrorFromWindowsError("CloseHandle failed for: " + file_data_->GetName(), lastError); @@ -906,11 +927,10 @@ Status WinWritableImpl::CloseImpl() { inline Status WinWritableImpl::SyncImpl() { Status s; - // Calls flush buffers - if (!file_data_->use_direct_io() && fsync(file_data_->GetFileHandle()) < 0) { + if (!::FlushFileBuffers (file_data_->GetFileHandle())) { auto lastError = GetLastError(); s = IOErrorFromWindowsError( - "fsync failed at Sync() for: " + file_data_->GetName(), lastError); + "FlushFileBuffers failed at Sync() for: " + file_data_->GetName(), lastError); } return s; } diff --git a/port/win/io_win.h b/port/win/io_win.h index 2b9a75642..e177a2e3e 100644 --- a/port/win/io_win.h +++ b/port/win/io_win.h @@ -40,22 +40,13 @@ inline Status IOError(const std::string& context, int err_number) { : Status::IOError(context, strerror(err_number)); } -// Note the below two do not set errno because they are used only here in this -// file -// on a Windows handle and, therefore, not necessary. Translating GetLastError() -// to errno -// is a sad business -inline int fsync(HANDLE hFile) { - if (!FlushFileBuffers(hFile)) { - return -1; - } - - return 0; -} +class WinFileData; -SSIZE_T pwrite(HANDLE hFile, const char* src, size_t numBytes, uint64_t offset); +Status pwrite(const WinFileData* file_data, const Slice& data, + uint64_t offset, size_t& bytes_written); -SSIZE_T pread(HANDLE hFile, char* src, size_t numBytes, uint64_t offset); +Status pread(const WinFileData* file_data, char* src, size_t num_bytes, + uint64_t offset, size_t& bytes_read); Status fallocate(const std::string& filename, HANDLE hFile, uint64_t to_size); @@ -104,8 +95,8 @@ class WinFileData { class WinSequentialFile : protected WinFileData, public SequentialFile { // Override for behavior change when creating a custom env - virtual SSIZE_T PositionedReadInternal(char* src, size_t numBytes, - uint64_t offset) const; + virtual Status PositionedReadInternal(char* src, size_t numBytes, + uint64_t offset, size_t& bytes_read) const; public: WinSequentialFile(const std::string& fname, HANDLE f, @@ -240,8 +231,8 @@ class WinRandomAccessImpl { size_t alignment_; // Override for behavior change when creating a custom env - virtual SSIZE_T PositionedReadInternal(char* src, size_t numBytes, - uint64_t offset) const; + virtual Status PositionedReadInternal(char* src, size_t numBytes, + uint64_t offset, size_t& bytes_read) const; WinRandomAccessImpl(WinFileData* file_base, size_t alignment, const EnvOptions& options); diff --git a/tools/db_stress.cc b/tools/db_stress.cc index 64e7b2a05..ce4128052 100644 --- a/tools/db_stress.cc +++ b/tools/db_stress.cc @@ -857,7 +857,7 @@ class SharedState { "Cannot use --expected_values_path on when " "--clear_column_family_one_in is greater than zero."); } - size_t size; + size_t size = 0; if (status.ok()) { status = FLAGS_env->GetFileSize(FLAGS_expected_values_path, &size); }