Skip to content

Commit

Permalink
[SYCL] Fix a thread pool data race during shutdown (#15535)
Browse files Browse the repository at this point in the history
The flag that kills the threads waiting for work was an atomic bool, but
that is not enough: any modification of a condition must be made under
the mutex associated with that condition variable. Otherwise, the bool
value flip and the call to notify might happen after the condition check
in the other thread, but before it starts waiting for notifications.
  • Loading branch information
sergey-semenov authored Sep 27, 2024
1 parent 23fed07 commit 8fc9aa5
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 6 deletions.
14 changes: 8 additions & 6 deletions sycl/source/detail/thread_pool.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,17 @@ class ThreadPool {
std::queue<std::function<void()>> MJobQueue;
std::mutex MJobQueueMutex;
std::condition_variable MDoSmthOrStop;
std::atomic_bool MStop;
bool MStop = false;
std::atomic_uint MJobsInPool;

void worker() {
GlobalHandler::instance().registerSchedulerUsage(/*ModifyCounter*/ false);
std::unique_lock<std::mutex> Lock(MJobQueueMutex);
while (true) {
MDoSmthOrStop.wait(
Lock, [this]() { return !MJobQueue.empty() || MStop.load(); });
MDoSmthOrStop.wait(Lock,
[this]() { return !MJobQueue.empty() || MStop; });

if (MStop.load())
if (MStop)
break;

std::function<void()> Job = std::move(MJobQueue.front());
Expand All @@ -57,7 +57,6 @@ class ThreadPool {
void start() {
MLaunchedThreads.reserve(MThreadCount);

MStop.store(false);
MJobsInPool.store(0);

for (size_t Idx = 0; Idx < MThreadCount; ++Idx)
Expand All @@ -83,7 +82,10 @@ class ThreadPool {
}

void finishAndWait() {
MStop.store(true);
{
std::lock_guard<std::mutex> Lock(MJobQueueMutex);
MStop = true;
}

MDoSmthOrStop.notify_all();

Expand Down
16 changes: 16 additions & 0 deletions sycl/test-e2e/Regression/unwaited_for_host_task.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

#include <sycl/detail/core.hpp>

using namespace sycl;

// This test checks that host tasks that haven't been waited for do not cause
// issues during shutdown.
int main(int argc, char *argv[]) {
queue q;

q.submit([&](handler &h) { h.host_task([=]() {}); });

return 0;
}

0 comments on commit 8fc9aa5

Please sign in to comment.