From 8fc9aa56d5052229616c629eb8e03f9338829cb4 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Fri, 27 Sep 2024 11:35:36 -0700 Subject: [PATCH] [SYCL] Fix a thread pool data race during shutdown (#15535) The flag that kills the threads waiting for work was an atomic bool, but that is not enough: any modification of a condition must be made under the mutex associated with that condition variable. Otherwise, the bool value flip and the call to notify might happen after the condition check in the other thread, but before it starts waiting for notifications. --- sycl/source/detail/thread_pool.hpp | 14 ++++++++------ .../Regression/unwaited_for_host_task.cpp | 16 ++++++++++++++++ 2 files changed, 24 insertions(+), 6 deletions(-) create mode 100644 sycl/test-e2e/Regression/unwaited_for_host_task.cpp diff --git a/sycl/source/detail/thread_pool.hpp b/sycl/source/detail/thread_pool.hpp index 304045389b53..50240e0a98b0 100644 --- a/sycl/source/detail/thread_pool.hpp +++ b/sycl/source/detail/thread_pool.hpp @@ -29,17 +29,17 @@ class ThreadPool { std::queue> MJobQueue; std::mutex MJobQueueMutex; std::condition_variable MDoSmthOrStop; - std::atomic_bool MStop; + bool MStop = false; std::atomic_uint MJobsInPool; void worker() { GlobalHandler::instance().registerSchedulerUsage(/*ModifyCounter*/ false); std::unique_lock Lock(MJobQueueMutex); while (true) { - MDoSmthOrStop.wait( - Lock, [this]() { return !MJobQueue.empty() || MStop.load(); }); + MDoSmthOrStop.wait(Lock, + [this]() { return !MJobQueue.empty() || MStop; }); - if (MStop.load()) + if (MStop) break; std::function Job = std::move(MJobQueue.front()); @@ -57,7 +57,6 @@ class ThreadPool { void start() { MLaunchedThreads.reserve(MThreadCount); - MStop.store(false); MJobsInPool.store(0); for (size_t Idx = 0; Idx < MThreadCount; ++Idx) @@ -83,7 +82,10 @@ class ThreadPool { } void finishAndWait() { - MStop.store(true); + { + std::lock_guard Lock(MJobQueueMutex); + MStop = true; + } MDoSmthOrStop.notify_all(); diff --git a/sycl/test-e2e/Regression/unwaited_for_host_task.cpp b/sycl/test-e2e/Regression/unwaited_for_host_task.cpp new file mode 100644 index 000000000000..6f6ae3090c1e --- /dev/null +++ b/sycl/test-e2e/Regression/unwaited_for_host_task.cpp @@ -0,0 +1,16 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include + +using namespace sycl; + +// This test checks that host tasks that haven't been waited for do not cause +// issues during shutdown. +int main(int argc, char *argv[]) { + queue q; + + q.submit([&](handler &h) { h.host_task([=]() {}); }); + + return 0; +}