From 1d1c6d8fe8bc557d020c38a997eb5dccfe6e594e Mon Sep 17 00:00:00 2001 From: Jemale Lockett Date: Sun, 13 Oct 2024 15:23:30 -0400 Subject: [PATCH] Update Alternating Interrupt/Resume Test Updates the test to retry interrupting threads in case the threads were unavailable/not resumed when initial interrupt request sent Signed-off-by: Jemale Lockett --- .../tools/debug/src/test_debug.cpp | 13 +++--- .../tools/debug/src/test_debug_utils.cpp | 45 ++++++++++++++++++- .../tools/include/test_harness_debug.hpp | 7 +-- .../tools/src/test_harness_debug.cpp | 20 +++++++-- 4 files changed, 71 insertions(+), 14 deletions(-) diff --git a/conformance_tests/tools/debug/src/test_debug.cpp b/conformance_tests/tools/debug/src/test_debug.cpp index 47335878..1c7c6453 100644 --- a/conformance_tests/tools/debug/src/test_debug.cpp +++ b/conformance_tests/tools/debug/src/test_debug.cpp @@ -1440,6 +1440,7 @@ void zetDebugThreadControlTest::SetUpThreadControl(ze_device_handle_t &device, smaller_thread_functor()); } +#define INTERRUPT_TEST_SLEEP std::chrono::seconds(2) void zetDebugThreadControlTest::run_alternate_stop_resume_test( std::vector &devices, bool use_sub_devices) { for (auto &device : devices) { @@ -1467,7 +1468,7 @@ void zetDebugThreadControlTest::run_alternate_stop_resume_test( } i++; } - std::this_thread::sleep_for(std::chrono::seconds(2)); + std::this_thread::sleep_for(INTERRUPT_TEST_SLEEP); LOG_INFO << "[Debugger] ######### Interrupting Odd AND resumming Even threads " @@ -1494,7 +1495,7 @@ void zetDebugThreadControlTest::run_alternate_stop_resume_test( } i++; } - std::this_thread::sleep_for(std::chrono::seconds(2)); + std::this_thread::sleep_for(INTERRUPT_TEST_SLEEP); LOG_INFO << "[Debugger] ######### Interrupting Even threads AND resumming Odd " @@ -1521,7 +1522,7 @@ void zetDebugThreadControlTest::run_alternate_stop_resume_test( } i++; } - std::this_thread::sleep_for(std::chrono::seconds(2)); + std::this_thread::sleep_for(INTERRUPT_TEST_SLEEP); LOG_INFO << "[Debugger] ######### Interrupting Odd threads ##########"; threadsToCheck.clear(); @@ -1559,7 +1560,7 @@ void zetDebugThreadControlTest::run_alternate_stop_resume_test( i++; } - std::this_thread::sleep_for(std::chrono::seconds(2)); + std::this_thread::sleep_for(INTERRUPT_TEST_SLEEP); EXPECT_EQ(debugHelper.running(), true); LOG_INFO << "[Debugger] ######### Ressuming Odd threads ##########"; @@ -1573,7 +1574,7 @@ void zetDebugThreadControlTest::run_alternate_stop_resume_test( } LOG_INFO << "[Debugger] ######### Checking ALL threads are running ######"; - std::this_thread::sleep_for(std::chrono::seconds(2)); + std::this_thread::sleep_for(INTERRUPT_TEST_SLEEP); stoppedThreadsCheck.clear(); stoppedThreadsCheck = get_stopped_threads(debugSession, device); EXPECT_EQ(stoppedThreadsCheck.size(), 0); @@ -1608,7 +1609,7 @@ void zetDebugThreadControlTest::run_alternate_stop_resume_test( LOG_INFO << "[Debugger] ######### Checking ALL threads are running ##########"; - std::this_thread::sleep_for(std::chrono::seconds(2)); + std::this_thread::sleep_for(INTERRUPT_TEST_SLEEP); stoppedThreadsCheck.clear(); stoppedThreadsCheck = get_stopped_threads(debugSession, device); EXPECT_EQ(stoppedThreadsCheck.size(), 0); diff --git a/conformance_tests/tools/debug/src/test_debug_utils.cpp b/conformance_tests/tools/debug/src/test_debug_utils.cpp index 7a8f0a45..e6c975a2 100644 --- a/conformance_tests/tools/debug/src/test_debug_utils.cpp +++ b/conformance_tests/tools/debug/src/test_debug_utils.cpp @@ -577,18 +577,25 @@ bool find_multi_event_stopped_threads( uint8_t attempts = 0; uint16_t numEventsReceived = 0; uint16_t numEventsExpected = threadsToCheck.size(); + uint8_t retryAttempts = 0; zet_debug_event_t debugEvent = {}; stoppedThreadsFound.clear(); bool foundAll = true; + std::vector stoppedThreadsFromEvent; LOG_DEBUG << "[Debugger] Expecting " << threadsToCheck.size() << " events."; - for (auto threadToCheck : threadsToCheck) { + auto sleepTime = 30; + int numEvents = 0; + int numEventsExpectedDuringRetry = numEventsExpected; + std::vector threadsToRetry; + while ((numEventsReceived < numEventsExpected) && (retryAttempts <= 5)) { do { lzt::debug_read_event(debugSession, debugEvent, eventsTimeoutMS / 10, true); LOG_INFO << "[Debugger] received event: " << lzt::debuggerEventTypeString[debugEvent.type]; + numEvents++; if (debugEvent.type == ZET_DEBUG_EVENT_TYPE_THREAD_STOPPED) { print_thread("[Debugger] Stopped thread event for ", @@ -598,11 +605,47 @@ bool find_multi_event_stopped_threads( EXPECT_TRUE(is_thread_in_vector(debugEvent.info.thread.thread, threadsToCheck)); } + stoppedThreadsFromEvent.push_back(debugEvent.info.thread.thread); numEventsReceived++; break; + } else if (debugEvent.type == ZET_DEBUG_EVENT_TYPE_THREAD_UNAVAILABLE) { + // there is a thread we need to retry interrupting + print_thread("[Debugger] Thread unavailable event for ", + debugEvent.info.thread.thread, DEBUG); + + } else { + LOG_WARNING << "[Debugger] Unexpected event received: " + << lzt::debuggerEventTypeString[debugEvent.type]; } attempts++; } while (attempts < 5); + attempts = 0; + + if (numEvents >= numEventsExpectedDuringRetry && retryAttempts < 5) { + numEvents = 0; + for (auto &thread : threadsToCheck) { + if (is_thread_in_vector(thread, stoppedThreadsFromEvent)) + continue; + threadsToRetry.push_back(thread); + } + if (threadsToRetry.empty()) { + break; + } + numEventsExpectedDuringRetry = threadsToRetry.size(); + LOG_INFO << "[Debugger] Sleeping for " << sleepTime + << " seconds before retrying to interrupt threads"; + std::this_thread::sleep_for(std::chrono::seconds(sleepTime)); + LOG_INFO << "[Debugger] Trying to interrupt threads again"; + for (auto &thread : threadsToRetry) { + print_thread("[Debugger] \t\tThread to interrupt: ", thread, INFO); + } + for (auto &thread : threadsToRetry) { + lzt::debug_interrupt(debugSession, thread, true); + } + retryAttempts++; + sleepTime += 10; + threadsToRetry.clear(); + } } EXPECT_EQ(numEventsReceived, numEventsExpected); diff --git a/utils/test_harness/tools/include/test_harness_debug.hpp b/utils/test_harness/tools/include/test_harness_debug.hpp index 483bcb53..0af1b696 100644 --- a/utils/test_harness/tools/include/test_harness_debug.hpp +++ b/utils/test_harness/tools/include/test_harness_debug.hpp @@ -37,14 +37,15 @@ void debug_ack_event(const zet_debug_session_handle_t &debug_session, const zet_debug_event_t *debug_event); void debug_interrupt(const zet_debug_session_handle_t &debug_session, - const ze_device_thread_t &device_thread); + const ze_device_thread_t &device_thread, + bool retry = false); void debug_resume(const zet_debug_session_handle_t &debug_session, const ze_device_thread_t &device_thread); void clear_exceptions(const ze_device_handle_t &device, - const zet_debug_session_handle_t &debug_session, - const ze_device_thread_t &device_thread); + const zet_debug_session_handle_t &debug_session, + const ze_device_thread_t &device_thread); void debug_read_memory(const zet_debug_session_handle_t &debug_session, const ze_device_thread_t &device_thread, diff --git a/utils/test_harness/tools/src/test_harness_debug.cpp b/utils/test_harness/tools/src/test_harness_debug.cpp index ed1838b8..b93fd3a7 100644 --- a/utils/test_harness/tools/src/test_harness_debug.cpp +++ b/utils/test_harness/tools/src/test_harness_debug.cpp @@ -131,9 +131,21 @@ void debug_ack_event(const zet_debug_session_handle_t &debug_session, } void debug_interrupt(const zet_debug_session_handle_t &debug_session, - const ze_device_thread_t &device_thread) { + const ze_device_thread_t &device_thread, bool retry) { - EXPECT_EQ(ZE_RESULT_SUCCESS, zetDebugInterrupt(debug_session, device_thread)); + if (!retry) { + EXPECT_EQ(ZE_RESULT_SUCCESS, + zetDebugInterrupt(debug_session, device_thread)); + } else { + auto result = zetDebugInterrupt(debug_session, device_thread); + if (result != ZE_RESULT_SUCCESS) { + LOG_WARNING << "[Debugger] Interrupt failed: " << result + << " SLICE:" << device_thread.slice + << " SUBSLICE: " << device_thread.subslice + << " EU: " << device_thread.eu + << " THREAD: " << device_thread.thread; + } + } } void debug_resume(const zet_debug_session_handle_t &debug_session, @@ -181,8 +193,8 @@ bool get_register_set_props(ze_device_handle_t device, } void clear_exceptions(const ze_device_handle_t &device, - const zet_debug_session_handle_t &debug_session, - const ze_device_thread_t &device_thread) { + const zet_debug_session_handle_t &debug_session, + const ze_device_thread_t &device_thread) { size_t reg_size_in_bytes = 0; zet_debug_regset_properties_t cr_reg_prop;