From 0094d30dbe33099e9f70fc7fdd35b7927fcc04ce Mon Sep 17 00:00:00 2001 From: Matthew Kotila Date: Thu, 10 Aug 2023 00:53:00 +0000 Subject: [PATCH 1/3] Fix bug where null response were accidentally excluded from profile export --- src/c++/perf_analyzer/infer_context.cc | 8 ++++---- src/c++/perf_analyzer/inference_profiler.cc | 14 ++++++++++++-- src/c++/perf_analyzer/request_record.h | 8 ++++++-- src/c++/perf_analyzer/test_inference_profiler.cc | 16 ++++++++-------- src/c++/perf_analyzer/test_load_manager.cc | 16 ++++++++-------- 5 files changed, 38 insertions(+), 24 deletions(-) diff --git a/src/c++/perf_analyzer/infer_context.cc b/src/c++/perf_analyzer/infer_context.cc index f020cd8fa..f5381e89a 100644 --- a/src/c++/perf_analyzer/infer_context.cc +++ b/src/c++/perf_analyzer/infer_context.cc @@ -167,7 +167,7 @@ InferContext::SendRequest( auto total = end_time_sync - start_time_sync; thread_stat_->request_records_.emplace_back(RequestRecord( start_time_sync, std::move(end_time_syncs), - infer_data_.options_->sequence_end_, delayed, sequence_id)); + infer_data_.options_->sequence_end_, delayed, sequence_id, false)); thread_stat_->status_ = infer_backend_->ClientInferStat(&(thread_stat_->contexts_stat_[id_])); if (!thread_stat_->status_.IsOk()) { @@ -257,9 +257,9 @@ InferContext::AsyncCallbackFuncImpl(cb::InferResult* result) if (thread_stat_->cb_status_.IsOk() == false) { return; } + it->second.response_times_.push_back(std::chrono::system_clock::now()); if (is_null_response == false) { - it->second.response_times_.push_back( - std::chrono::system_clock::now()); + it->second.has_null_last_response_ = true; } thread_stat_->cb_status_ = result_ptr->IsFinalResponse(&is_final_response); @@ -270,7 +270,7 @@ InferContext::AsyncCallbackFuncImpl(cb::InferResult* result) thread_stat_->request_records_.emplace_back( it->second.start_time_, it->second.response_times_, it->second.sequence_end_, it->second.delayed_, - it->second.sequence_id_); + it->second.sequence_id_, it->second.has_null_last_response_); infer_backend_->ClientInferStat(&(thread_stat_->contexts_stat_[id_])); thread_stat_->cb_status_ = ValidateOutputs(result); async_req_map_.erase(request_id); diff --git a/src/c++/perf_analyzer/inference_profiler.cc b/src/c++/perf_analyzer/inference_profiler.cc index a0b0103d3..cb870f6f7 100644 --- a/src/c++/perf_analyzer/inference_profiler.cc +++ b/src/c++/perf_analyzer/inference_profiler.cc @@ -1279,8 +1279,18 @@ InferenceProfiler::ValidLatencyMeasurement( for (size_t i = 0; i < all_request_records_.size(); i++) { const auto& request_record = all_request_records_[i]; uint64_t request_start_ns = CHRONO_TO_NANOS(request_record.start_time_); - uint64_t request_end_ns = - CHRONO_TO_NANOS(request_record.response_times_.back()); + uint64_t request_end_ns; + + if (request_record.has_null_last_response_ == false) { + request_end_ns = CHRONO_TO_NANOS(request_record.response_times_.back()); + } else if (request_record.response_times_.size() > 1) { + size_t last_response_idx{request_record.response_times_.size() - 2}; + request_end_ns = + CHRONO_TO_NANOS(request_record.response_times_[last_response_idx]); + } else { + erase_indices.push_back(i); + continue; + } if (request_start_ns <= request_end_ns) { // Only counting requests that end within the time interval diff --git a/src/c++/perf_analyzer/request_record.h b/src/c++/perf_analyzer/request_record.h index ba441549a..0c9ca9263 100644 --- a/src/c++/perf_analyzer/request_record.h +++ b/src/c++/perf_analyzer/request_record.h @@ -39,10 +39,12 @@ struct RequestRecord { std::chrono::time_point start_time, std::vector> response_times, - bool sequence_end, bool delayed, uint64_t sequence_id) + bool sequence_end, bool delayed, uint64_t sequence_id, + bool has_null_last_response) : start_time_(start_time), response_times_(response_times), sequence_end_(sequence_end), delayed_(delayed), - sequence_id_(sequence_id) + sequence_id_(sequence_id), + has_null_last_response_(has_null_last_response) { } // The timestamp of when the request was started. @@ -56,6 +58,8 @@ struct RequestRecord { bool delayed_; // Sequence ID of the request uint64_t sequence_id_; + // Whether the last response is null + bool has_null_last_response_; }; }} // namespace triton::perfanalyzer diff --git a/src/c++/perf_analyzer/test_inference_profiler.cc b/src/c++/perf_analyzer/test_inference_profiler.cc index c72445d72..2c4a47d20 100644 --- a/src/c++/perf_analyzer/test_inference_profiler.cc +++ b/src/c++/perf_analyzer/test_inference_profiler.cc @@ -179,32 +179,32 @@ TEST_CASE("testing the ValidLatencyMeasurement function") // current window RequestRecord( time_point(ns(1)), std::vector{time_point(ns(2))}, 0, - false, 0), + false, 0, false), // request starts before window starts and ends inside window: included in // current window RequestRecord( time_point(ns(3)), std::vector{time_point(ns(5))}, 0, - false, 0), + false, 0, false), // requests start and end inside window: included in current window RequestRecord( time_point(ns(6)), std::vector{time_point(ns(9))}, 0, - false, 0), + false, 0, false), RequestRecord( time_point(ns(10)), std::vector{time_point(ns(14))}, 0, - false, 0), + false, 0, false), // request starts before window ends and ends after window ends: not // included in current window RequestRecord( time_point(ns(15)), std::vector{time_point(ns(20))}, 0, - false, 0), + false, 0, false), // request starts after window ends: not included in current window RequestRecord( time_point(ns(21)), std::vector{time_point(ns(27))}, 0, - false, 0)}; + false, 0, false)}; TestInferenceProfiler::ValidLatencyMeasurement( window, valid_sequence_count, delayed_request_count, &latencies, @@ -882,7 +882,7 @@ TEST_CASE( request1_timestamp, std::vector>{ response1_timestamp, response2_timestamp}, - 0, false, 0)}; + 0, false, 0, false)}; auto request2_timestamp{clock_epoch + std::chrono::nanoseconds(4)}; auto response3_timestamp{clock_epoch + std::chrono::nanoseconds(5)}; @@ -892,7 +892,7 @@ TEST_CASE( request2_timestamp, std::vector>{ response3_timestamp, response4_timestamp, response5_timestamp}, - 0, false, 0)}; + 0, false, 0, false)}; mock_inference_profiler.all_request_records_ = { request_record1, request_record2}; diff --git a/src/c++/perf_analyzer/test_load_manager.cc b/src/c++/perf_analyzer/test_load_manager.cc index ada49e25b..2a11e0749 100644 --- a/src/c++/perf_analyzer/test_load_manager.cc +++ b/src/c++/perf_analyzer/test_load_manager.cc @@ -40,10 +40,10 @@ operator==(const RequestRecord& lhs, const RequestRecord& rhs) { return std::tie( lhs.start_time_, lhs.response_times_, lhs.sequence_end_, - lhs.delayed_, lhs.sequence_id_) == + lhs.delayed_, lhs.sequence_id_, lhs.has_null_last_response_) == std::tie( rhs.start_time_, rhs.response_times_, rhs.sequence_end_, - rhs.delayed_, rhs.sequence_id_); + rhs.delayed_, rhs.sequence_id_, rhs.has_null_last_response_); } } // namespace @@ -134,13 +134,13 @@ class TestLoadManager : public TestLoadManagerBase, public LoadManager { using ns = std::chrono::nanoseconds; auto request_record1 = RequestRecord( time_point(ns(1)), std::vector{time_point(ns(2))}, 0, false, - 0); + 0, false); auto request_record2 = RequestRecord( time_point(ns(3)), std::vector{time_point(ns(4))}, 0, false, - 0); + 0, false); auto request_record3 = RequestRecord( time_point(ns(5)), std::vector{time_point(ns(6))}, 0, false, - 0); + 0, false); std::vector source_request_records; @@ -295,13 +295,13 @@ class TestLoadManager : public TestLoadManagerBase, public LoadManager { using ns = std::chrono::nanoseconds; auto request_record1 = RequestRecord( time_point(ns(1)), std::vector{time_point(ns(2))}, 0, false, - 0); + 0, false); auto request_record2 = RequestRecord( time_point(ns(3)), std::vector{time_point(ns(4))}, 0, false, - 0); + 0, false); auto request_record3 = RequestRecord( time_point(ns(5)), std::vector{time_point(ns(6))}, 0, false, - 0); + 0, false); SUBCASE("No threads") { From a704f59f9158c6203a1b2ae513f1c4e9e06c13df Mon Sep 17 00:00:00 2001 From: Matthew Kotila Date: Thu, 10 Aug 2023 06:58:13 +0000 Subject: [PATCH 2/3] Fix bug --- src/c++/perf_analyzer/infer_context.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/c++/perf_analyzer/infer_context.cc b/src/c++/perf_analyzer/infer_context.cc index f5381e89a..16855ca53 100644 --- a/src/c++/perf_analyzer/infer_context.cc +++ b/src/c++/perf_analyzer/infer_context.cc @@ -258,7 +258,7 @@ InferContext::AsyncCallbackFuncImpl(cb::InferResult* result) return; } it->second.response_times_.push_back(std::chrono::system_clock::now()); - if (is_null_response == false) { + if (is_null_response == true) { it->second.has_null_last_response_ = true; } thread_stat_->cb_status_ = From c1902bb558132cfa8cfdeea7265fde454ff55042 Mon Sep 17 00:00:00 2001 From: Matthew Kotila Date: Thu, 10 Aug 2023 22:31:22 +0000 Subject: [PATCH 3/3] Address feedback --- src/c++/perf_analyzer/inference_profiler.cc | 3 ++ .../perf_analyzer/test_inference_profiler.cc | 51 ++++++++++++++----- 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/src/c++/perf_analyzer/inference_profiler.cc b/src/c++/perf_analyzer/inference_profiler.cc index cb870f6f7..da5acf255 100644 --- a/src/c++/perf_analyzer/inference_profiler.cc +++ b/src/c++/perf_analyzer/inference_profiler.cc @@ -1298,6 +1298,9 @@ InferenceProfiler::ValidLatencyMeasurement( (request_end_ns <= valid_range.second)) { valid_latencies->push_back(request_end_ns - request_start_ns); response_count += request_record.response_times_.size(); + if (request_record.has_null_last_response_) { + response_count--; + } erase_indices.push_back(i); if (request_record.sequence_end_) { valid_sequence_count++; diff --git a/src/c++/perf_analyzer/test_inference_profiler.cc b/src/c++/perf_analyzer/test_inference_profiler.cc index 2c4a47d20..5a41319bd 100644 --- a/src/c++/perf_analyzer/test_inference_profiler.cc +++ b/src/c++/perf_analyzer/test_inference_profiler.cc @@ -885,14 +885,41 @@ TEST_CASE( 0, false, 0, false)}; auto request2_timestamp{clock_epoch + std::chrono::nanoseconds(4)}; - auto response3_timestamp{clock_epoch + std::chrono::nanoseconds(5)}; - auto response4_timestamp{clock_epoch + std::chrono::nanoseconds(6)}; - auto response5_timestamp{clock_epoch + std::chrono::nanoseconds(7)}; - auto request_record2{RequestRecord( - request2_timestamp, - std::vector>{ - response3_timestamp, response4_timestamp, response5_timestamp}, - 0, false, 0, false)}; + RequestRecord request_record2{}; + size_t expected_response_count{0}; + + SUBCASE("second request has three data responses") + { + auto response3_timestamp{clock_epoch + std::chrono::nanoseconds(5)}; + auto response4_timestamp{clock_epoch + std::chrono::nanoseconds(6)}; + auto response5_timestamp{clock_epoch + std::chrono::nanoseconds(7)}; + request_record2 = RequestRecord( + request2_timestamp, + std::vector>{ + response3_timestamp, response4_timestamp, response5_timestamp}, + 0, false, 0, false); + expected_response_count = 5; + } + SUBCASE("second request has two data responses and one null response") + { + auto response3_timestamp{clock_epoch + std::chrono::nanoseconds(5)}; + auto response4_timestamp{clock_epoch + std::chrono::nanoseconds(6)}; + auto response5_timestamp{clock_epoch + std::chrono::nanoseconds(7)}; + request_record2 = RequestRecord( + request2_timestamp, + std::vector>{ + response3_timestamp, response4_timestamp, response5_timestamp}, + 0, false, 0, true); + expected_response_count = 4; + } + SUBCASE("second request has one null response") + { + request_record2 = RequestRecord( + request2_timestamp, + std::vector>{}, 0, + false, 0, true); + expected_response_count = 2; + } mock_inference_profiler.all_request_records_ = { request_record1, request_record2}; @@ -909,7 +936,7 @@ TEST_CASE( valid_range, valid_sequence_count, delayed_request_count, &valid_latencies, response_count, valid_requests); - CHECK(response_count == 5); + CHECK(response_count == expected_response_count); } SUBCASE("testing logic relevant to valid request output") { @@ -921,7 +948,7 @@ TEST_CASE( request1_timestamp, std::vector>{ response1_timestamp}, - 0, false, 0)}; + 0, false, 0, false)}; auto request2_timestamp{clock_epoch + std::chrono::nanoseconds(3)}; auto response2_timestamp{clock_epoch + std::chrono::nanoseconds(4)}; @@ -929,7 +956,7 @@ TEST_CASE( request2_timestamp, std::vector>{ response2_timestamp}, - 0, false, 0)}; + 0, false, 0, false)}; auto request3_timestamp{clock_epoch + std::chrono::nanoseconds(5)}; auto response3_timestamp{clock_epoch + std::chrono::nanoseconds(6)}; @@ -937,7 +964,7 @@ TEST_CASE( request3_timestamp, std::vector>{ response3_timestamp}, - 0, false, 0)}; + 0, false, 0, false)}; mock_inference_profiler.all_request_records_ = { request_record1, request_record2, request_record3};