diff --git a/.circleci/config.yml b/.circleci/config.yml index 12b5b7f08..4ddb651b4 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -516,7 +516,7 @@ jobs: resource_class: large steps: - pre-steps - - run: ulimit -S -n `ulimit -H -n` && make V=1 -j8 CRASH_TEST_EXT_ARGS='--duration=960 --max_key=2500000' blackbox_crash_test_with_atomic_flush + - run: ulimit -S -n `ulimit -H -n` && make V=1 -j8 CRASH_TEST_EXT_ARGS='--duration=960 --max_key=2500000 --async_io=0' blackbox_crash_test_with_atomic_flush - post-steps build-linux-crashtest-tiered-storage-bb: @@ -526,7 +526,7 @@ jobs: - pre-steps - run: name: "run crashtest" - command: ulimit -S -n `ulimit -H -n` && make V=1 -j32 CRASH_TEST_EXT_ARGS=--duration=10800 blackbox_crash_test_with_tiered_storage + command: ulimit -S -n `ulimit -H -n` && make V=1 -j32 CRASH_TEST_EXT_ARGS='--duration=10800 --async_io=0' blackbox_crash_test_with_tiered_storage no_output_timeout: 100m - post-steps @@ -537,7 +537,7 @@ jobs: - pre-steps - run: name: "run crashtest" - command: ulimit -S -n `ulimit -H -n` && make V=1 -j32 CRASH_TEST_EXT_ARGS=--duration=10800 whitebox_crash_test_with_tiered_storage + command: ulimit -S -n `ulimit -H -n` && make V=1 -j32 CRASH_TEST_EXT_ARGS='--duration=10800 --async_io=0' whitebox_crash_test_with_tiered_storage no_output_timeout: 100m - post-steps diff --git a/HISTORY.md b/HISTORY.md index bb6833091..15e755088 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -3,6 +3,7 @@ ### Behavior changes * `ReadOptions::verify_checksums=false` disables checksum verification for more reads of non-`CacheEntryRole::kDataBlock` blocks. +* In case of scan with async_io enabled, if posix doesn't support IOUring, Status::NotSupported error will be returned to the users. Initially that error was swallowed and reads were switched to synchronous reads. ### Bug Fixes * Fixed a data race on `ColumnFamilyData::flush_reason` caused by concurrent flushes. diff --git a/db_stress_tool/db_stress_gflags.cc b/db_stress_tool/db_stress_gflags.cc index eda58c0b3..4043c7b54 100644 --- a/db_stress_tool/db_stress_gflags.cc +++ b/db_stress_tool/db_stress_gflags.cc @@ -1073,4 +1073,6 @@ DEFINE_uint64(stats_dump_period_sec, ROCKSDB_NAMESPACE::Options().stats_dump_period_sec, "Gap between printing stats to log in seconds"); +extern "C" bool RocksDbIOUringEnable() { return true; } + #endif // GFLAGS diff --git a/db_stress_tool/db_stress_tool.cc b/db_stress_tool/db_stress_tool.cc index fd28856b7..c41c5051f 100644 --- a/db_stress_tool/db_stress_tool.cc +++ b/db_stress_tool/db_stress_tool.cc @@ -99,13 +99,6 @@ int db_stress_tool(int argc, char** argv) { env_wrapper_guard = std::make_shared( raw_env, std::make_shared(raw_env->GetFileSystem())); - if (!env_opts) { - // If using the default Env (Posix), wrap DbStressEnvWrapper with the - // legacy EnvWrapper. This is a temporary fix for the ReadAsync interface - // not being properly supported with Posix and db_stress. The EnvWrapper - // has a default implementation of ReadAsync that redirects to Read. - env_wrapper_guard = std::make_shared(env_wrapper_guard); - } db_stress_env = env_wrapper_guard.get(); FLAGS_rep_factory = StringToRepFactory(FLAGS_memtablerep.c_str()); diff --git a/file/file_prefetch_buffer.cc b/file/file_prefetch_buffer.cc index f7d4c9591..d16b937df 100644 --- a/file/file_prefetch_buffer.cc +++ b/file/file_prefetch_buffer.cc @@ -569,10 +569,11 @@ Status FilePrefetchBuffer::PrefetchAsyncInternal( bufs_[second].offset_ = rounddown_start2; assert(roundup_len2 >= chunk_len2); uint64_t read_len2 = static_cast(roundup_len2 - chunk_len2); - Status tmp_s = ReadAsync(opts, reader, read_len2, rounddown_start2, second); - if (!tmp_s.ok()) { + s = ReadAsync(opts, reader, read_len2, rounddown_start2, second); + if (!s.ok()) { DestroyAndClearIOHandle(second); bufs_[second].buffer_.Clear(); + return s; } } diff --git a/file/prefetch_test.cc b/file/prefetch_test.cc index 20e3a15b3..f1ae762c5 100644 --- a/file/prefetch_test.cc +++ b/file/prefetch_test.cc @@ -1609,27 +1609,23 @@ TEST_P(PrefetchTest, ReadAsyncWithPosixFS) { num_keys++; } - ASSERT_EQ(num_keys, total_keys); - ASSERT_GT(buff_prefetch_count, 0); - - // Check stats to make sure async prefetch is done. - { + if (read_async_called) { + ASSERT_EQ(num_keys, total_keys); + ASSERT_GT(buff_prefetch_count, 0); + // Check stats to make sure async prefetch is done. HistogramData async_read_bytes; options.statistics->histogramData(ASYNC_READ_BYTES, &async_read_bytes); HistogramData prefetched_bytes_discarded; options.statistics->histogramData(PREFETCHED_BYTES_DISCARDED, &prefetched_bytes_discarded); - + ASSERT_GT(async_read_bytes.count, 0); + ASSERT_GT(prefetched_bytes_discarded.count, 0); + ASSERT_EQ(get_perf_context()->number_async_seek, 0); + } else { // Not all platforms support iouring. In that case, ReadAsync in posix // won't submit async requests. - if (read_async_called) { - ASSERT_GT(async_read_bytes.count, 0); - } else { - ASSERT_EQ(async_read_bytes.count, 0); - } - ASSERT_GT(prefetched_bytes_discarded.count, 0); + ASSERT_EQ(iter->status(), Status::NotSupported()); } - ASSERT_EQ(get_perf_context()->number_async_seek, 0); } SyncPoint::GetInstance()->DisableProcessing(); @@ -1740,22 +1736,19 @@ TEST_P(PrefetchTest, MultipleSeekWithPosixFS) { num_keys++; iter->Next(); } - ASSERT_OK(iter->status()); - ASSERT_EQ(num_keys, num_keys_first_batch); - // Check stats to make sure async prefetch is done. - { + + if (read_async_called) { + ASSERT_OK(iter->status()); + ASSERT_EQ(num_keys, num_keys_first_batch); + // Check stats to make sure async prefetch is done. HistogramData async_read_bytes; options.statistics->histogramData(ASYNC_READ_BYTES, &async_read_bytes); - + ASSERT_GT(async_read_bytes.count, 0); + ASSERT_GT(get_perf_context()->number_async_seek, 0); + } else { // Not all platforms support iouring. In that case, ReadAsync in posix // won't submit async requests. - if (read_async_called) { - ASSERT_GT(async_read_bytes.count, 0); - ASSERT_GT(get_perf_context()->number_async_seek, 0); - } else { - ASSERT_EQ(async_read_bytes.count, 0); - ASSERT_EQ(get_perf_context()->number_async_seek, 0); - } + ASSERT_EQ(iter->status(), Status::NotSupported()); } } @@ -1771,29 +1764,26 @@ TEST_P(PrefetchTest, MultipleSeekWithPosixFS) { num_keys++; iter->Next(); } - ASSERT_OK(iter->status()); - ASSERT_EQ(num_keys, num_keys_second_batch); - ASSERT_GT(buff_prefetch_count, 0); + if (read_async_called) { + ASSERT_OK(iter->status()); + ASSERT_EQ(num_keys, num_keys_second_batch); - // Check stats to make sure async prefetch is done. - { + ASSERT_GT(buff_prefetch_count, 0); + + // Check stats to make sure async prefetch is done. HistogramData async_read_bytes; options.statistics->histogramData(ASYNC_READ_BYTES, &async_read_bytes); HistogramData prefetched_bytes_discarded; options.statistics->histogramData(PREFETCHED_BYTES_DISCARDED, &prefetched_bytes_discarded); - + ASSERT_GT(async_read_bytes.count, 0); + ASSERT_GT(get_perf_context()->number_async_seek, 0); + ASSERT_GT(prefetched_bytes_discarded.count, 0); + } else { // Not all platforms support iouring. In that case, ReadAsync in posix // won't submit async requests. - if (read_async_called) { - ASSERT_GT(async_read_bytes.count, 0); - ASSERT_GT(get_perf_context()->number_async_seek, 0); - } else { - ASSERT_EQ(async_read_bytes.count, 0); - ASSERT_EQ(get_perf_context()->number_async_seek, 0); - } - ASSERT_GT(prefetched_bytes_discarded.count, 0); + ASSERT_EQ(iter->status(), Status::NotSupported()); } } } @@ -1872,51 +1862,52 @@ TEST_P(PrefetchTest, SeekParallelizationTestWithPosix) { // Each block contains around 4 keys. auto iter = std::unique_ptr(db_->NewIterator(ro)); iter->Seek(BuildKey(0)); // Prefetch data because of seek parallelization. - ASSERT_TRUE(iter->Valid()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); + if (std::get<1>(GetParam()) && !read_async_called) { + ASSERT_EQ(iter->status(), Status::NotSupported()); + } else { + ASSERT_TRUE(iter->Valid()); + iter->Next(); + ASSERT_TRUE(iter->Valid()); + iter->Next(); + ASSERT_TRUE(iter->Valid()); + iter->Next(); + ASSERT_TRUE(iter->Valid()); - // New data block. Since num_file_reads in FilePrefetch after this read is - // 2, it won't go for prefetching. - iter->Next(); - ASSERT_TRUE(iter->Valid()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); + // New data block. Since num_file_reads in FilePrefetch after this read is + // 2, it won't go for prefetching. + iter->Next(); + ASSERT_TRUE(iter->Valid()); + iter->Next(); + ASSERT_TRUE(iter->Valid()); + iter->Next(); + ASSERT_TRUE(iter->Valid()); + iter->Next(); + ASSERT_TRUE(iter->Valid()); - // Prefetch data. - iter->Next(); - ASSERT_TRUE(iter->Valid()); + // Prefetch data. + iter->Next(); - // Check stats to make sure async prefetch is done. - { - HistogramData async_read_bytes; - options.statistics->histogramData(ASYNC_READ_BYTES, &async_read_bytes); - // Not all platforms support iouring. In that case, ReadAsync in posix - // won't submit async requests. if (read_async_called) { - ASSERT_GT(async_read_bytes.count, 0); - ASSERT_GT(get_perf_context()->number_async_seek, 0); - if (std::get<1>(GetParam())) { - ASSERT_EQ(buff_prefetch_count, 1); - } else { - ASSERT_EQ(buff_prefetch_count, 2); + ASSERT_TRUE(iter->Valid()); + // Check stats to make sure async prefetch is done. + { + HistogramData async_read_bytes; + options.statistics->histogramData(ASYNC_READ_BYTES, + &async_read_bytes); + ASSERT_GT(async_read_bytes.count, 0); + ASSERT_GT(get_perf_context()->number_async_seek, 0); + if (std::get<1>(GetParam())) { + ASSERT_EQ(buff_prefetch_count, 1); + } else { + ASSERT_EQ(buff_prefetch_count, 2); + } } } else { - ASSERT_EQ(async_read_bytes.count, 0); - ASSERT_EQ(get_perf_context()->number_async_seek, 0); - ASSERT_EQ(buff_prefetch_count, 1); + // Not all platforms support iouring. In that case, ReadAsync in posix + // won't submit async requests. + ASSERT_EQ(iter->status(), Status::NotSupported()); } } - - buff_prefetch_count = 0; } Close(); } @@ -2009,20 +2000,17 @@ TEST_P(PrefetchTest, TraceReadAsyncWithCallbackWrapper) { ASSERT_OK(db_->EndIOTrace()); ASSERT_OK(env_->FileExists(trace_file_path)); - ASSERT_EQ(num_keys, total_keys); - ASSERT_GT(buff_prefetch_count, 0); - - // Check stats to make sure async prefetch is done. - { + if (read_async_called) { + ASSERT_EQ(num_keys, total_keys); + ASSERT_GT(buff_prefetch_count, 0); + // Check stats to make sure async prefetch is done. HistogramData async_read_bytes; options.statistics->histogramData(ASYNC_READ_BYTES, &async_read_bytes); + ASSERT_GT(async_read_bytes.count, 0); + } else { // Not all platforms support iouring. In that case, ReadAsync in posix // won't submit async requests. - if (read_async_called) { - ASSERT_GT(async_read_bytes.count, 0); - } else { - ASSERT_EQ(async_read_bytes.count, 0); - } + ASSERT_EQ(iter->status(), Status::NotSupported()); } // Check the file to see if ReadAsync is logged. @@ -2102,8 +2090,13 @@ TEST_F(FilePrefetchBufferTest, SeekWithBlockCacheHit) { // Simulate a seek of 4096 bytes at offset 0. Due to the readahead settings, // it will do two reads of 4096+8192 and 8192 Status s = fpb.PrefetchAsync(IOOptions(), r.get(), 0, 4096, &result); - // Platforms that don't have IO uring may not support async IO - ASSERT_TRUE(s.IsTryAgain() || s.IsNotSupported()); + + // Platforms that don't have IO uring may not support async IO. + if (s.IsNotSupported()) { + return; + } + + ASSERT_TRUE(s.IsTryAgain()); // Simulate a block cache hit fpb.UpdateReadPattern(0, 4096, false); // Now read some data that straddles the two prefetch buffers - offset 8192 to @@ -2137,9 +2130,13 @@ TEST_F(FilePrefetchBufferTest, NoSyncWithAsyncIO) { // Simulate a seek of 4000 bytes at offset 3000. Due to the readahead // settings, it will do two reads of 4000+4096 and 4096 Status s = fpb.PrefetchAsync(IOOptions(), r.get(), 3000, 4000, &async_result); + // Platforms that don't have IO uring may not support async IO - ASSERT_TRUE(s.IsTryAgain() || s.IsNotSupported()); + if (s.IsNotSupported()) { + return; + } + ASSERT_TRUE(s.IsTryAgain()); ASSERT_TRUE(fpb.TryReadFromCacheAsync(IOOptions(), r.get(), /*offset=*/3000, /*length=*/4000, &async_result, &s, Env::IOPriority::IO_LOW));