// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #if defined(OS_WIN) #include "port/win/env_win.h" #include // _rmdir, _mkdir, _getcwd #include #include // _access #include // for uuid generation #include #include #include #include #include #include #include #include "monitoring/iostats_context_imp.h" #include "monitoring/thread_status_updater.h" #include "monitoring/thread_status_util.h" #include "port/port.h" #include "port/port_dirent.h" #include "port/win/io_win.h" #include "port/win/win_logger.h" #include "port/win/win_thread.h" #include "rocksdb/env.h" #include "rocksdb/slice.h" #include "strsafe.h" // Undefine the functions windows might use (again)... #undef GetCurrentTime #undef DeleteFile #undef LoadLibrary namespace ROCKSDB_NAMESPACE { ThreadStatusUpdater* CreateThreadStatusUpdater() { return new ThreadStatusUpdater(); } namespace { // Sector size used when physical sector size cannot be obtained from device. static const size_t kSectorSize = 512; // RAII helpers for HANDLEs const auto CloseHandleFunc = [](HANDLE h) { ::CloseHandle(h); }; typedef std::unique_ptr UniqueCloseHandlePtr; const auto FindCloseFunc = [](HANDLE h) { ::FindClose(h); }; typedef std::unique_ptr UniqueFindClosePtr; void WinthreadCall(const char* label, std::error_code result) { if (0 != result.value()) { fprintf(stderr, "pthread %s: %s\n", label, strerror(result.value())); abort(); } } } // namespace namespace port { WinClock::WinClock() : perf_counter_frequency_(0), nano_seconds_per_period_(0), GetSystemTimePreciseAsFileTime_(NULL) { { LARGE_INTEGER qpf; BOOL ret __attribute__((__unused__)); ret = QueryPerformanceFrequency(&qpf); assert(ret == TRUE); perf_counter_frequency_ = qpf.QuadPart; if (std::nano::den % perf_counter_frequency_ == 0) { nano_seconds_per_period_ = std::nano::den / perf_counter_frequency_; } } HMODULE module = GetModuleHandle("kernel32.dll"); if (module != NULL) { GetSystemTimePreciseAsFileTime_ = (FnGetSystemTimePreciseAsFileTime)GetProcAddress( module, "GetSystemTimePreciseAsFileTime"); } } const std::shared_ptr& WinClock::Default() { static std::shared_ptr clock = std::make_shared(); return clock; } void WinClock::SleepForMicroseconds(int micros) { std::this_thread::sleep_for(std::chrono::microseconds(micros)); } std::string WinClock::TimeToString(uint64_t secondsSince1970) { std::string result; const time_t seconds = secondsSince1970; const int maxsize = 64; struct tm t; errno_t ret = localtime_s(&t, &seconds); if (ret) { result = std::to_string(seconds); } else { result.resize(maxsize); char* p = &result[0]; int len = snprintf(p, maxsize, "%04d/%02d/%02d-%02d:%02d:%02d ", t.tm_year + 1900, t.tm_mon + 1, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec); assert(len > 0); result.resize(len); } return result; } uint64_t WinClock::NowMicros() { if (GetSystemTimePreciseAsFileTime_ != NULL) { // all std::chrono clocks on windows proved to return // values that may repeat that is not good enough for some uses. const int64_t c_UnixEpochStartTicks = 116444736000000000LL; const int64_t c_FtToMicroSec = 10; // This interface needs to return system time and not // just any microseconds because it is often used as an argument // to TimedWait() on condition variable FILETIME ftSystemTime; GetSystemTimePreciseAsFileTime_(&ftSystemTime); LARGE_INTEGER li; li.LowPart = ftSystemTime.dwLowDateTime; li.HighPart = ftSystemTime.dwHighDateTime; // Subtract unix epoch start li.QuadPart -= c_UnixEpochStartTicks; // Convert to microsecs li.QuadPart /= c_FtToMicroSec; return li.QuadPart; } using namespace std::chrono; return duration_cast(system_clock::now().time_since_epoch()) .count(); } uint64_t WinClock::NowNanos() { if (nano_seconds_per_period_ != 0) { // all std::chrono clocks on windows have the same resolution that is only // good enough for microseconds but not nanoseconds // On Windows 8 and Windows 2012 Server // GetSystemTimePreciseAsFileTime(¤t_time) can be used LARGE_INTEGER li; QueryPerformanceCounter(&li); // Convert performance counter to nanoseconds by precomputed ratio. // Directly multiply nano::den with li.QuadPart causes overflow. // Only do this when nano::den is divisible by perf_counter_frequency_, // which most likely is the case in reality. If it's not, fall back to // high_resolution_clock, which may be less precise under old compilers. li.QuadPart *= nano_seconds_per_period_; return li.QuadPart; } using namespace std::chrono; return duration_cast( high_resolution_clock::now().time_since_epoch()) .count(); } Status WinClock::GetCurrentTime(int64_t* unix_time) { time_t time = std::time(nullptr); if (time == (time_t)(-1)) { return Status::NotSupported("Failed to get time"); } *unix_time = time; return Status::OK(); } WinFileSystem::WinFileSystem(const std::shared_ptr& clock) : clock_(clock), page_size_(4 * 1024), allocation_granularity_(page_size_) { SYSTEM_INFO sinfo; GetSystemInfo(&sinfo); page_size_ = sinfo.dwPageSize; allocation_granularity_ = sinfo.dwAllocationGranularity; } const std::shared_ptr& WinFileSystem::Default() { static std::shared_ptr fs = std::make_shared(WinClock::Default()); return fs; } WinEnvIO::WinEnvIO(Env* hosted_env) : hosted_env_(hosted_env) {} WinEnvIO::~WinEnvIO() {} IOStatus WinFileSystem::DeleteFile(const std::string& fname, const IOOptions& /*options*/, IODebugContext* /*dbg*/) { IOStatus result; BOOL ret = RX_DeleteFile(RX_FN(fname).c_str()); if (!ret) { auto lastError = GetLastError(); result = IOErrorFromWindowsError("Failed to delete: " + fname, lastError); } return result; } IOStatus WinFileSystem::Truncate(const std::string& fname, size_t size, const IOOptions& /*options*/, IODebugContext* /*dbg*/) { IOStatus s; int result = ROCKSDB_NAMESPACE::port::Truncate(fname, size); if (result != 0) { s = IOError("Failed to truncate: " + fname, errno); } return s; } IOStatus WinFileSystem::NewSequentialFile( const std::string& fname, const FileOptions& options, std::unique_ptr* result, IODebugContext* /*dbg*/) { IOStatus s; result->reset(); // Corruption test needs to rename and delete files of these kind // while they are still open with another handle. For that reason we // allow share_write and delete(allows rename). HANDLE hFile = INVALID_HANDLE_VALUE; DWORD fileFlags = FILE_ATTRIBUTE_READONLY; if (options.use_direct_reads && !options.use_mmap_reads) { fileFlags |= FILE_FLAG_NO_BUFFERING; } { IOSTATS_TIMER_GUARD(open_nanos); hFile = RX_CreateFile( RX_FN(fname).c_str(), GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, OPEN_EXISTING, // Original fopen mode is "rb" fileFlags, NULL); } if (INVALID_HANDLE_VALUE == hFile) { auto lastError = GetLastError(); s = IOErrorFromWindowsError("Failed to open NewSequentialFile" + fname, lastError); } else { result->reset(new WinSequentialFile(fname, hFile, options)); } return s; } IOStatus WinFileSystem::NewRandomAccessFile( const std::string& fname, const FileOptions& options, std::unique_ptr* result, IODebugContext* dbg) { result->reset(); IOStatus s; // Open the file for read-only random access // Random access is to disable read-ahead as the system reads too much data DWORD fileFlags = FILE_ATTRIBUTE_READONLY; if (options.use_direct_reads && !options.use_mmap_reads) { fileFlags |= FILE_FLAG_NO_BUFFERING; } else { fileFlags |= FILE_FLAG_RANDOM_ACCESS; } /// Shared access is necessary for corruption test to pass // almost all tests would work with a possible exception of fault_injection HANDLE hFile = 0; { IOSTATS_TIMER_GUARD(open_nanos); hFile = RX_CreateFile(RX_FN(fname).c_str(), GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, OPEN_EXISTING, fileFlags, NULL); } if (INVALID_HANDLE_VALUE == hFile) { auto lastError = GetLastError(); return IOErrorFromWindowsError( "NewRandomAccessFile failed to Create/Open: " + fname, lastError); } UniqueCloseHandlePtr fileGuard(hFile, CloseHandleFunc); // CAUTION! This will map the entire file into the process address space if (options.use_mmap_reads && sizeof(void*) >= 8) { // Use mmap when virtual address-space is plentiful. uint64_t fileSize; s = GetFileSize(fname, IOOptions(), &fileSize, dbg); if (s.ok()) { // Will not map empty files if (fileSize == 0) { return IOError("NewRandomAccessFile failed to map empty file: " + fname, EINVAL); } HANDLE hMap = RX_CreateFileMapping(hFile, NULL, PAGE_READONLY, 0, // At its present length 0, NULL); // Mapping name if (!hMap) { auto lastError = GetLastError(); return IOErrorFromWindowsError( "Failed to create file mapping for NewRandomAccessFile: " + fname, lastError); } UniqueCloseHandlePtr mapGuard(hMap, CloseHandleFunc); const void* mapped_region = MapViewOfFileEx(hMap, FILE_MAP_READ, 0, // High DWORD of access start 0, // Low DWORD static_cast(fileSize), NULL); // Let the OS choose the mapping if (!mapped_region) { auto lastError = GetLastError(); return IOErrorFromWindowsError( "Failed to MapViewOfFile for NewRandomAccessFile: " + fname, lastError); } result->reset(new WinMmapReadableFile(fname, hFile, hMap, mapped_region, static_cast(fileSize))); mapGuard.release(); fileGuard.release(); } } else { result->reset(new WinRandomAccessFile( fname, hFile, std::max(GetSectorSize(fname), page_size_), options)); fileGuard.release(); } return s; } IOStatus WinFileSystem::OpenWritableFile( const std::string& fname, const FileOptions& options, std::unique_ptr* result, bool reopen) { const size_t c_BufferCapacity = 64 * 1024; EnvOptions local_options(options); result->reset(); IOStatus s; DWORD fileFlags = FILE_ATTRIBUTE_NORMAL; if (local_options.use_direct_writes && !local_options.use_mmap_writes) { fileFlags = FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH; } // Desired access. We are want to write only here but if we want to memory // map // the file then there is no write only mode so we have to create it // Read/Write // However, MapViewOfFile specifies only Write only DWORD desired_access = GENERIC_WRITE; DWORD shared_mode = FILE_SHARE_READ; if (local_options.use_mmap_writes) { desired_access |= GENERIC_READ; } else { // Adding this solely for tests to pass (fault_injection_test, // wal_manager_test). shared_mode |= (FILE_SHARE_WRITE | FILE_SHARE_DELETE); } // This will always truncate the file DWORD creation_disposition = CREATE_ALWAYS; if (reopen) { creation_disposition = OPEN_ALWAYS; } HANDLE hFile = 0; { IOSTATS_TIMER_GUARD(open_nanos); hFile = RX_CreateFile( RX_FN(fname).c_str(), desired_access, // Access desired shared_mode, NULL, // Security attributes // Posix env says (reopen) ? (O_CREATE | O_APPEND) : O_CREAT | O_TRUNC creation_disposition, fileFlags, // Flags NULL); // Template File } if (INVALID_HANDLE_VALUE == hFile) { auto lastError = GetLastError(); return IOErrorFromWindowsError( "Failed to create a NewWriteableFile: " + fname, lastError); } // We will start writing at the end, appending if (reopen) { LARGE_INTEGER zero_move; zero_move.QuadPart = 0; BOOL ret = SetFilePointerEx(hFile, zero_move, NULL, FILE_END); if (!ret) { auto lastError = GetLastError(); return IOErrorFromWindowsError( "Failed to create a ReopenWritableFile move to the end: " + fname, lastError); } } if (options.use_mmap_writes) { // We usually do not use mmmapping on SSD and thus we pass memory // page_size result->reset(new WinMmapFile(fname, hFile, page_size_, allocation_granularity_, local_options)); } else { // Here we want the buffer allocation to be aligned by the SSD page size // and to be a multiple of it result->reset(new WinWritableFile( fname, hFile, std::max(GetSectorSize(fname), GetPageSize()), c_BufferCapacity, local_options)); } return s; } IOStatus WinFileSystem::NewWritableFile(const std::string& fname, const FileOptions& options, std::unique_ptr* result, IODebugContext* /*dbg*/) { return OpenWritableFile(fname, options, result, false); } IOStatus WinFileSystem::ReopenWritableFile( const std::string& fname, const FileOptions& options, std::unique_ptr* result, IODebugContext* /*dbg*/) { return OpenWritableFile(fname, options, result, true); } IOStatus WinFileSystem::NewRandomRWFile(const std::string& fname, const FileOptions& options, std::unique_ptr* result, IODebugContext* /*dbg*/) { IOStatus s; // Open the file for read-only random access // Random access is to disable read-ahead as the system reads too much data DWORD desired_access = GENERIC_READ | GENERIC_WRITE; DWORD shared_mode = FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE; DWORD creation_disposition = OPEN_EXISTING; // Fail if file does not exist DWORD file_flags = FILE_FLAG_RANDOM_ACCESS; if (options.use_direct_reads && options.use_direct_writes) { file_flags |= FILE_FLAG_NO_BUFFERING; } /// Shared access is necessary for corruption test to pass // almost all tests would work with a possible exception of fault_injection HANDLE hFile = 0; { IOSTATS_TIMER_GUARD(open_nanos); hFile = RX_CreateFile(RX_FN(fname).c_str(), desired_access, shared_mode, NULL, // Security attributes creation_disposition, file_flags, NULL); } if (INVALID_HANDLE_VALUE == hFile) { auto lastError = GetLastError(); return IOErrorFromWindowsError( "NewRandomRWFile failed to Create/Open: " + fname, lastError); } UniqueCloseHandlePtr fileGuard(hFile, CloseHandleFunc); result->reset(new WinRandomRWFile( fname, hFile, std::max(GetSectorSize(fname), GetPageSize()), options)); fileGuard.release(); return s; } IOStatus WinFileSystem::NewMemoryMappedFileBuffer( const std::string& fname, std::unique_ptr* result) { IOStatus s; result->reset(); DWORD fileFlags = FILE_ATTRIBUTE_READONLY; HANDLE hFile = INVALID_HANDLE_VALUE; { IOSTATS_TIMER_GUARD(open_nanos); hFile = RX_CreateFile( RX_FN(fname).c_str(), GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, OPEN_EXISTING, // Open only if it exists fileFlags, NULL); } if (INVALID_HANDLE_VALUE == hFile) { auto lastError = GetLastError(); s = IOErrorFromWindowsError( "Failed to open NewMemoryMappedFileBuffer: " + fname, lastError); return s; } UniqueCloseHandlePtr fileGuard(hFile, CloseHandleFunc); uint64_t fileSize = 0; s = GetFileSize(fname, IOOptions(), &fileSize, nullptr); if (!s.ok()) { return s; } // Will not map empty files if (fileSize == 0) { return IOStatus::NotSupported( "NewMemoryMappedFileBuffer can not map zero length files: " + fname); } // size_t is 32-bit with 32-bit builds if (fileSize > std::numeric_limits::max()) { return IOStatus::NotSupported( "The specified file size does not fit into 32-bit memory addressing: " + fname); } HANDLE hMap = RX_CreateFileMapping(hFile, NULL, PAGE_READWRITE, 0, // Whole file at its present length 0, NULL); // Mapping name if (!hMap) { auto lastError = GetLastError(); return IOErrorFromWindowsError( "Failed to create file mapping for: " + fname, lastError); } UniqueCloseHandlePtr mapGuard(hMap, CloseHandleFunc); void* base = MapViewOfFileEx(hMap, FILE_MAP_WRITE, 0, // High DWORD of access start 0, // Low DWORD static_cast(fileSize), NULL); // Let the OS choose the mapping if (!base) { auto lastError = GetLastError(); return IOErrorFromWindowsError( "Failed to MapViewOfFile for NewMemoryMappedFileBuffer: " + fname, lastError); } result->reset(new WinMemoryMappedBuffer(hFile, hMap, base, static_cast(fileSize))); mapGuard.release(); fileGuard.release(); return s; } IOStatus WinFileSystem::NewDirectory(const std::string& name, const IOOptions& /*options*/, std::unique_ptr* result, IODebugContext* /*dbg*/) { IOStatus s; // Must be nullptr on failure result->reset(); if (!DirExists(name)) { s = IOErrorFromWindowsError("open folder: " + name, ERROR_DIRECTORY); return s; } HANDLE handle = INVALID_HANDLE_VALUE; // 0 - for access means read metadata { IOSTATS_TIMER_GUARD(open_nanos); handle = RX_CreateFile( RX_FN(name).c_str(), 0, FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, // make opening folders possible NULL); } if (INVALID_HANDLE_VALUE == handle) { auto lastError = GetLastError(); s = IOErrorFromWindowsError("open folder: " + name, lastError); return s; } result->reset(new WinDirectory(handle)); return s; } IOStatus WinFileSystem::FileExists(const std::string& fname, const IOOptions& /*opts*/, IODebugContext* /*dbg*/) { IOStatus s; // TODO: This does not follow symbolic links at this point // which is consistent with _access() impl on windows // but can be added WIN32_FILE_ATTRIBUTE_DATA attrs; if (FALSE == RX_GetFileAttributesEx(RX_FN(fname).c_str(), GetFileExInfoStandard, &attrs)) { auto lastError = GetLastError(); switch (lastError) { case ERROR_ACCESS_DENIED: case ERROR_NOT_FOUND: case ERROR_FILE_NOT_FOUND: case ERROR_PATH_NOT_FOUND: s = IOStatus::NotFound(); break; default: s = IOErrorFromWindowsError("Unexpected error for: " + fname, lastError); break; } } return s; } IOStatus WinFileSystem::GetChildren(const std::string& dir, const IOOptions& /*opts*/, std::vector* result, IODebugContext* /*dbg*/) { IOStatus status; result->clear(); std::vector output; RX_WIN32_FIND_DATA data; memset(&data, 0, sizeof(data)); std::string pattern(dir); pattern.append("\\").append("*"); HANDLE handle = RX_FindFirstFileEx(RX_FN(pattern).c_str(), // Do not want alternative name FindExInfoBasic, &data, FindExSearchNameMatch, NULL, // lpSearchFilter 0); if (handle == INVALID_HANDLE_VALUE) { auto lastError = GetLastError(); switch (lastError) { case ERROR_NOT_FOUND: case ERROR_ACCESS_DENIED: case ERROR_FILE_NOT_FOUND: case ERROR_PATH_NOT_FOUND: status = IOStatus::NotFound(); break; default: status = IOErrorFromWindowsError("Failed to GetChhildren for: " + dir, lastError); } return status; } UniqueFindClosePtr fc(handle, FindCloseFunc); if (result->capacity() > 0) { output.reserve(result->capacity()); } // For safety data.cFileName[MAX_PATH - 1] = 0; while (true) { auto x = RX_FILESTRING(data.cFileName, RX_FNLEN(data.cFileName)); output.emplace_back(FN_TO_RX(x)); BOOL ret = -RX_FindNextFile(handle, &data); // If the function fails the return value is zero // and non-zero otherwise. Not TRUE or FALSE. if (ret == FALSE) { // Posix does not care why we stopped break; } data.cFileName[MAX_PATH - 1] = 0; } output.swap(*result); return status; } IOStatus WinFileSystem::CreateDir(const std::string& name, const IOOptions& /*opts*/, IODebugContext* /*dbg*/) { IOStatus result; BOOL ret = RX_CreateDirectory(RX_FN(name).c_str(), NULL); if (!ret) { auto lastError = GetLastError(); result = IOErrorFromWindowsError("Failed to create a directory: " + name, lastError); } return result; } IOStatus WinFileSystem::CreateDirIfMissing(const std::string& name, const IOOptions& /*opts*/, IODebugContext* /*dbg*/) { IOStatus result; if (DirExists(name)) { return result; } BOOL ret = RX_CreateDirectory(RX_FN(name).c_str(), NULL); if (!ret) { auto lastError = GetLastError(); if (lastError != ERROR_ALREADY_EXISTS) { result = IOErrorFromWindowsError("Failed to create a directory: " + name, lastError); } else { result = IOStatus::IOError(name + ": exists but is not a directory"); } } return result; } IOStatus WinFileSystem::DeleteDir(const std::string& name, const IOOptions& /*options*/, IODebugContext* /*dbg*/) { IOStatus result; BOOL ret = RX_RemoveDirectory(RX_FN(name).c_str()); if (!ret) { auto lastError = GetLastError(); result = IOErrorFromWindowsError("Failed to remove dir: " + name, lastError); } return result; } IOStatus WinFileSystem::GetFileSize(const std::string& fname, const IOOptions& /*opts*/, uint64_t* size, IODebugContext* /*dbg*/) { IOStatus s; WIN32_FILE_ATTRIBUTE_DATA attrs; if (RX_GetFileAttributesEx(RX_FN(fname).c_str(), GetFileExInfoStandard, &attrs)) { ULARGE_INTEGER file_size; file_size.HighPart = attrs.nFileSizeHigh; file_size.LowPart = attrs.nFileSizeLow; *size = file_size.QuadPart; } else { auto lastError = GetLastError(); s = IOErrorFromWindowsError("Can not get size for: " + fname, lastError); } return s; } uint64_t WinFileSystem::FileTimeToUnixTime(const FILETIME& ftTime) { const uint64_t c_FileTimePerSecond = 10000000U; // UNIX epoch starts on 1970-01-01T00:00:00Z // Windows FILETIME starts on 1601-01-01T00:00:00Z // Therefore, we need to subtract the below number of seconds from // the seconds that we obtain from FILETIME with an obvious loss of // precision const uint64_t c_SecondBeforeUnixEpoch = 11644473600U; ULARGE_INTEGER li; li.HighPart = ftTime.dwHighDateTime; li.LowPart = ftTime.dwLowDateTime; uint64_t result = (li.QuadPart / c_FileTimePerSecond) - c_SecondBeforeUnixEpoch; return result; } IOStatus WinFileSystem::GetFileModificationTime(const std::string& fname, const IOOptions& /*opts*/, uint64_t* file_mtime, IODebugContext* /*dbg*/) { IOStatus s; WIN32_FILE_ATTRIBUTE_DATA attrs; if (RX_GetFileAttributesEx(RX_FN(fname).c_str(), GetFileExInfoStandard, &attrs)) { *file_mtime = FileTimeToUnixTime(attrs.ftLastWriteTime); } else { auto lastError = GetLastError(); s = IOErrorFromWindowsError( "Can not get file modification time for: " + fname, lastError); *file_mtime = 0; } return s; } IOStatus WinFileSystem::RenameFile(const std::string& src, const std::string& target, const IOOptions& /*opts*/, IODebugContext* /*dbg*/) { IOStatus result; // rename() is not capable of replacing the existing file as on Linux // so use OS API directly if (!RX_MoveFileEx(RX_FN(src).c_str(), RX_FN(target).c_str(), MOVEFILE_REPLACE_EXISTING)) { DWORD lastError = GetLastError(); std::string text("Failed to rename: "); text.append(src).append(" to: ").append(target); result = IOErrorFromWindowsError(text, lastError); } return result; } IOStatus WinFileSystem::LinkFile(const std::string& src, const std::string& target, const IOOptions& /*opts*/, IODebugContext* /*dbg*/) { IOStatus result; if (!RX_CreateHardLink(RX_FN(target).c_str(), RX_FN(src).c_str(), NULL)) { DWORD lastError = GetLastError(); if (lastError == ERROR_NOT_SAME_DEVICE) { return IOStatus::NotSupported("No cross FS links allowed"); } std::string text("Failed to link: "); text.append(src).append(" to: ").append(target); result = IOErrorFromWindowsError(text, lastError); } return result; } IOStatus WinFileSystem::NumFileLinks(const std::string& fname, const IOOptions& /*opts*/, uint64_t* count, IODebugContext* /*dbg*/) { IOStatus s; HANDLE handle = RX_CreateFile(RX_FN(fname).c_str(), 0, FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL); if (INVALID_HANDLE_VALUE == handle) { auto lastError = GetLastError(); s = IOErrorFromWindowsError("NumFileLinks: " + fname, lastError); return s; } UniqueCloseHandlePtr handle_guard(handle, CloseHandleFunc); FILE_STANDARD_INFO standard_info; if (0 != GetFileInformationByHandleEx(handle, FileStandardInfo, &standard_info, sizeof(standard_info))) { *count = standard_info.NumberOfLinks; } else { auto lastError = GetLastError(); s = IOErrorFromWindowsError("GetFileInformationByHandleEx: " + fname, lastError); } return s; } IOStatus WinFileSystem::AreFilesSame(const std::string& first, const std::string& second, const IOOptions& /*opts*/, bool* res, IODebugContext* /*dbg*/) { // For MinGW builds #if (_WIN32_WINNT == _WIN32_WINNT_VISTA) IOStatus s = IOStatus::NotSupported(); #else assert(res != nullptr); IOStatus s; if (res == nullptr) { s = IOStatus::InvalidArgument("res"); return s; } // 0 - for access means read metadata HANDLE file_1 = RX_CreateFile( RX_FN(first).c_str(), 0, FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, // make opening folders possible NULL); if (INVALID_HANDLE_VALUE == file_1) { auto lastError = GetLastError(); s = IOErrorFromWindowsError("open file: " + first, lastError); return s; } UniqueCloseHandlePtr g_1(file_1, CloseHandleFunc); HANDLE file_2 = RX_CreateFile( RX_FN(second).c_str(), 0, FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, // make opening folders possible NULL); if (INVALID_HANDLE_VALUE == file_2) { auto lastError = GetLastError(); s = IOErrorFromWindowsError("open file: " + second, lastError); return s; } UniqueCloseHandlePtr g_2(file_2, CloseHandleFunc); FILE_ID_INFO FileInfo_1; BOOL result = GetFileInformationByHandleEx(file_1, FileIdInfo, &FileInfo_1, sizeof(FileInfo_1)); if (!result) { auto lastError = GetLastError(); s = IOErrorFromWindowsError("stat file: " + first, lastError); return s; } FILE_ID_INFO FileInfo_2; result = GetFileInformationByHandleEx(file_2, FileIdInfo, &FileInfo_2, sizeof(FileInfo_2)); if (!result) { auto lastError = GetLastError(); s = IOErrorFromWindowsError("stat file: " + second, lastError); return s; } if (FileInfo_1.VolumeSerialNumber == FileInfo_2.VolumeSerialNumber) { *res = (0 == memcmp(FileInfo_1.FileId.Identifier, FileInfo_2.FileId.Identifier, sizeof(FileInfo_1.FileId.Identifier))); } else { *res = false; } #endif return s; } IOStatus WinFileSystem::LockFile(const std::string& lockFname, const IOOptions& /*opts*/, FileLock** lock, IODebugContext* /*dbg*/) { assert(lock != nullptr); *lock = NULL; IOStatus result; // No-sharing, this is a LOCK file const DWORD ExclusiveAccessON = 0; // Obtain exclusive access to the LOCK file // Previously, instead of NORMAL attr we set DELETE on close and that worked // well except with fault_injection test that insists on deleting it. HANDLE hFile = 0; { IOSTATS_TIMER_GUARD(open_nanos); hFile = RX_CreateFile(RX_FN(lockFname).c_str(), (GENERIC_READ | GENERIC_WRITE), ExclusiveAccessON, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); } if (INVALID_HANDLE_VALUE == hFile) { auto lastError = GetLastError(); result = IOErrorFromWindowsError("Failed to create lock file: " + lockFname, lastError); } else { *lock = new WinFileLock(hFile); } return result; } IOStatus WinFileSystem::UnlockFile(FileLock* lock, const IOOptions& /*opts*/, IODebugContext* /*dbg*/) { IOStatus result; assert(lock != nullptr); delete lock; return result; } IOStatus WinFileSystem::GetTestDirectory(const IOOptions& opts, std::string* result, IODebugContext* dbg) { std::string output; const char* env = getenv("TEST_TMPDIR"); if (env && env[0] != '\0') { output = env; } else { env = getenv("TMP"); if (env && env[0] != '\0') { output = env; } else { output = "c:\\tmp"; } } CreateDir(output, opts, dbg); output.append("\\testrocksdb-"); output.append(std::to_string(GetCurrentProcessId())); CreateDir(output, opts, dbg); output.swap(*result); return IOStatus::OK(); } IOStatus WinFileSystem::NewLogger(const std::string& fname, const IOOptions& /*opts*/, std::shared_ptr* result, IODebugContext* /*dbg*/) { IOStatus s; result->reset(); HANDLE hFile = 0; { IOSTATS_TIMER_GUARD(open_nanos); hFile = RX_CreateFile( RX_FN(fname).c_str(), GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_DELETE, // In RocksDb log files are // renamed and deleted before // they are closed. This enables // doing so. NULL, CREATE_ALWAYS, // Original fopen mode is "w" FILE_ATTRIBUTE_NORMAL, NULL); } if (INVALID_HANDLE_VALUE == hFile) { auto lastError = GetLastError(); s = IOErrorFromWindowsError("Failed to open LogFile" + fname, lastError); } else { { // With log files we want to set the true creation time as of now // because the system // for some reason caches the attributes of the previous file that just // been renamed from // this name so auto_roll_logger_test fails FILETIME ft; GetSystemTimeAsFileTime(&ft); // Set creation, last access and last write time to the same value SetFileTime(hFile, &ft, &ft, &ft); } result->reset(new WinLogger(&WinEnvThreads::gettid, clock_, hFile)); } return s; } IOStatus WinFileSystem::IsDirectory(const std::string& path, const IOOptions& /*opts*/, bool* is_dir, IODebugContext* /*dbg*/) { BOOL ret = RX_PathIsDirectory(RX_FN(path).c_str()); if (is_dir) { *is_dir = ret ? true : false; } return IOStatus::OK(); } Status WinEnvIO::GetHostName(char* name, uint64_t len) { Status s; DWORD nSize = static_cast( std::min(len, std::numeric_limits::max())); if (!::GetComputerNameA(name, &nSize)) { auto lastError = GetLastError(); s = IOErrorFromWindowsError("GetHostName", lastError); } else { name[nSize] = 0; } return s; } IOStatus WinFileSystem::GetAbsolutePath(const std::string& db_path, const IOOptions& /*options*/, std::string* output_path, IODebugContext* dbg) { // Check if we already have an absolute path // For test compatibility we will consider starting slash as an // absolute path if ((!db_path.empty() && (db_path[0] == '\\' || db_path[0] == '/')) || !RX_PathIsRelative(RX_FN(db_path).c_str())) { *output_path = db_path; return IOStatus::OK(); } RX_FILESTRING result; result.resize(MAX_PATH); // Hopefully no changes the current directory while we do this // however _getcwd also suffers from the same limitation DWORD len = RX_GetCurrentDirectory(MAX_PATH, &result[0]); if (len == 0) { auto lastError = GetLastError(); return IOErrorFromWindowsError("Failed to get current working directory", lastError); } result.resize(len); std::string res = FN_TO_RX(result); res.swap(*output_path); return IOStatus::OK(); } IOStatus WinFileSystem::GetFreeSpace(const std::string& path, const IOOptions& /*options*/, uint64_t* diskfree, IODebugContext* /*dbg*/) { assert(diskfree != nullptr); ULARGE_INTEGER freeBytes; BOOL f = RX_GetDiskFreeSpaceEx(RX_FN(path).c_str(), &freeBytes, NULL, NULL); if (f) { *diskfree = freeBytes.QuadPart; return IOStatus::OK(); } else { DWORD lastError = GetLastError(); return IOErrorFromWindowsError("Failed to get free space: " + path, lastError); } } FileOptions WinFileSystem::OptimizeForLogWrite( const FileOptions& file_options, const DBOptions& db_options) const { FileOptions optimized(file_options); // These two the same as default optimizations optimized.bytes_per_sync = db_options.wal_bytes_per_sync; optimized.writable_file_max_buffer_size = db_options.writable_file_max_buffer_size; // This adversely affects %999 on windows optimized.use_mmap_writes = false; // Direct writes will produce a huge perf impact on // Windows. Pre-allocate space for WAL. optimized.use_direct_writes = false; return optimized; } FileOptions WinFileSystem::OptimizeForManifestWrite( const FileOptions& options) const { FileOptions optimized(options); optimized.use_mmap_writes = false; optimized.use_direct_reads = false; return optimized; } FileOptions WinFileSystem::OptimizeForManifestRead( const FileOptions& file_options) const { FileOptions optimized(file_options); optimized.use_mmap_writes = false; optimized.use_direct_reads = false; return optimized; } // Returns true iff the named directory exists and is a directory. bool WinFileSystem::DirExists(const std::string& dname) { WIN32_FILE_ATTRIBUTE_DATA attrs; if (RX_GetFileAttributesEx(RX_FN(dname).c_str(), GetFileExInfoStandard, &attrs)) { return 0 != (attrs.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY); } return false; } size_t WinFileSystem::GetSectorSize(const std::string& fname) { size_t sector_size = kSectorSize; if (RX_PathIsRelative(RX_FN(fname).c_str())) { return sector_size; } // obtain device handle char devicename[7] = "\\\\.\\"; int erresult = strncat_s(devicename, sizeof(devicename), fname.c_str(), 2); if (erresult) { assert(false); return sector_size; } HANDLE hDevice = CreateFile(devicename, 0, 0, nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr); if (hDevice == INVALID_HANDLE_VALUE) { return sector_size; } STORAGE_PROPERTY_QUERY spropertyquery; spropertyquery.PropertyId = StorageAccessAlignmentProperty; spropertyquery.QueryType = PropertyStandardQuery; BYTE output_buffer[sizeof(STORAGE_ACCESS_ALIGNMENT_DESCRIPTOR)]; DWORD output_bytes = 0; BOOL ret = DeviceIoControl( hDevice, IOCTL_STORAGE_QUERY_PROPERTY, &spropertyquery, sizeof(spropertyquery), output_buffer, sizeof(STORAGE_ACCESS_ALIGNMENT_DESCRIPTOR), &output_bytes, nullptr); if (ret) { sector_size = ((STORAGE_ACCESS_ALIGNMENT_DESCRIPTOR*)output_buffer) ->BytesPerLogicalSector; } else { // many devices do not support StorageProcessAlignmentProperty. Any failure // here and we fall back to logical alignment DISK_GEOMETRY_EX geometry = {0}; ret = DeviceIoControl(hDevice, IOCTL_DISK_GET_DRIVE_GEOMETRY, nullptr, 0, &geometry, sizeof(geometry), &output_bytes, nullptr); if (ret) { sector_size = geometry.Geometry.BytesPerSector; } } if (hDevice != INVALID_HANDLE_VALUE) { CloseHandle(hDevice); } return sector_size; } //////////////////////////////////////////////////////////////////////// // WinEnvThreads WinEnvThreads::WinEnvThreads(Env* hosted_env) : hosted_env_(hosted_env), thread_pools_(Env::Priority::TOTAL) { for (int pool_id = 0; pool_id < Env::Priority::TOTAL; ++pool_id) { thread_pools_[pool_id].SetThreadPriority( static_cast(pool_id)); // This allows later initializing the thread-local-env of each thread. thread_pools_[pool_id].SetHostEnv(hosted_env); } } WinEnvThreads::~WinEnvThreads() { WaitForJoin(); for (auto& thpool : thread_pools_) { thpool.JoinAllThreads(); } } void WinEnvThreads::Schedule(void (*function)(void*), void* arg, Env::Priority pri, void* tag, void (*unschedFunction)(void* arg)) { assert(pri >= Env::Priority::BOTTOM && pri <= Env::Priority::HIGH); thread_pools_[pri].Schedule(function, arg, tag, unschedFunction); } int WinEnvThreads::UnSchedule(void* arg, Env::Priority pri) { return thread_pools_[pri].UnSchedule(arg); } namespace { struct StartThreadState { void (*user_function)(void*); void* arg; }; void* StartThreadWrapper(void* arg) { std::unique_ptr state( reinterpret_cast(arg)); state->user_function(state->arg); return nullptr; } } // namespace void WinEnvThreads::StartThread(void (*function)(void* arg), void* arg) { std::unique_ptr state(new StartThreadState); state->user_function = function; state->arg = arg; try { ROCKSDB_NAMESPACE::port::WindowsThread th(&StartThreadWrapper, state.get()); state.release(); std::lock_guard lg(mu_); threads_to_join_.push_back(std::move(th)); } catch (const std::system_error& ex) { WinthreadCall("start thread", ex.code()); } } void WinEnvThreads::WaitForJoin() { for (auto& th : threads_to_join_) { th.join(); } threads_to_join_.clear(); } unsigned int WinEnvThreads::GetThreadPoolQueueLen(Env::Priority pri) const { assert(pri >= Env::Priority::BOTTOM && pri <= Env::Priority::HIGH); return thread_pools_[pri].GetQueueLen(); } uint64_t WinEnvThreads::gettid() { uint64_t thread_id = GetCurrentThreadId(); return thread_id; } uint64_t WinEnvThreads::GetThreadID() const { return gettid(); } void WinEnvThreads::SetBackgroundThreads(int num, Env::Priority pri) { assert(pri >= Env::Priority::BOTTOM && pri <= Env::Priority::HIGH); thread_pools_[pri].SetBackgroundThreads(num); } int WinEnvThreads::GetBackgroundThreads(Env::Priority pri) { assert(pri >= Env::Priority::BOTTOM && pri <= Env::Priority::HIGH); return thread_pools_[pri].GetBackgroundThreads(); } void WinEnvThreads::IncBackgroundThreadsIfNeeded(int num, Env::Priority pri) { assert(pri >= Env::Priority::BOTTOM && pri <= Env::Priority::HIGH); thread_pools_[pri].IncBackgroundThreadsIfNeeded(num); } ///////////////////////////////////////////////////////////////////////// // WinEnv WinEnv::WinEnv() : CompositeEnv(WinFileSystem::Default()), winenv_io_(this), winenv_threads_(this) { // Protected member of the base class clock_ = WinClock::Default(); thread_status_updater_ = CreateThreadStatusUpdater(); } WinEnv::~WinEnv() { // All threads must be joined before the deletion of // thread_status_updater_. delete thread_status_updater_; } Status WinEnv::GetThreadList(std::vector* thread_list) { assert(thread_status_updater_); return thread_status_updater_->GetThreadList(thread_list); } Status WinEnv::GetCurrentTime(int64_t* unix_time) { return clock_->GetCurrentTime(unix_time); } uint64_t WinEnv::NowMicros() { return clock_->NowMicros(); } uint64_t WinEnv::NowNanos() { return clock_->NowNanos(); } Status WinEnv::GetHostName(char* name, uint64_t len) { return winenv_io_.GetHostName(name, len); } std::string WinEnv::TimeToString(uint64_t secondsSince1970) { return clock_->TimeToString(secondsSince1970); } void WinEnv::Schedule(void (*function)(void*), void* arg, Env::Priority pri, void* tag, void (*unschedFunction)(void* arg)) { return winenv_threads_.Schedule(function, arg, pri, tag, unschedFunction); } int WinEnv::UnSchedule(void* arg, Env::Priority pri) { return winenv_threads_.UnSchedule(arg, pri); } void WinEnv::StartThread(void (*function)(void* arg), void* arg) { return winenv_threads_.StartThread(function, arg); } void WinEnv::WaitForJoin() { return winenv_threads_.WaitForJoin(); } unsigned int WinEnv::GetThreadPoolQueueLen(Env::Priority pri) const { return winenv_threads_.GetThreadPoolQueueLen(pri); } uint64_t WinEnv::GetThreadID() const { return winenv_threads_.GetThreadID(); } void WinEnv::SleepForMicroseconds(int micros) { return clock_->SleepForMicroseconds(micros); } // Allow increasing the number of worker threads. void WinEnv::SetBackgroundThreads(int num, Env::Priority pri) { return winenv_threads_.SetBackgroundThreads(num, pri); } int WinEnv::GetBackgroundThreads(Env::Priority pri) { return winenv_threads_.GetBackgroundThreads(pri); } void WinEnv::IncBackgroundThreadsIfNeeded(int num, Env::Priority pri) { return winenv_threads_.IncBackgroundThreadsIfNeeded(num, pri); } } // namespace port std::string Env::GenerateUniqueId() { std::string result; UUID uuid; UuidCreateSequential(&uuid); RPC_CSTR rpc_str; auto status = UuidToStringA(&uuid, &rpc_str); (void)status; assert(status == RPC_S_OK); result = reinterpret_cast(rpc_str); status = RpcStringFreeA(&rpc_str); assert(status == RPC_S_OK); return result; } std::shared_ptr FileSystem::Default() { return port::WinFileSystem::Default(); } std::unique_ptr NewCompositeEnv(const std::shared_ptr& fs) { return std::unique_ptr(new CompositeEnvWrapper(Env::Default(), fs)); } } // namespace ROCKSDB_NAMESPACE #endif