Skip to content

Commit

Permalink
Fix bug where null response were accidentally excluded from profile e…
Browse files Browse the repository at this point in the history
…xport (#379)

* Fix bug where null response were accidentally excluded from profile export

* Fix bug

* Address feedback
  • Loading branch information
matthewkotila committed Aug 15, 2023
1 parent 54e3036 commit 2b87928
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 36 deletions.
10 changes: 5 additions & 5 deletions src/c++/perf_analyzer/infer_context.cc
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ InferContext::SendRequest(
auto total = end_time_sync - start_time_sync;
thread_stat_->request_records_.emplace_back(RequestRecord(
start_time_sync, std::move(end_time_syncs),
infer_data_.options_->sequence_end_, delayed, sequence_id));
infer_data_.options_->sequence_end_, delayed, sequence_id, false));
thread_stat_->status_ =
infer_backend_->ClientInferStat(&(thread_stat_->contexts_stat_[id_]));
if (!thread_stat_->status_.IsOk()) {
Expand Down Expand Up @@ -257,9 +257,9 @@ InferContext::AsyncCallbackFuncImpl(cb::InferResult* result)
if (thread_stat_->cb_status_.IsOk() == false) {
return;
}
if (is_null_response == false) {
it->second.response_times_.push_back(
std::chrono::system_clock::now());
it->second.response_times_.push_back(std::chrono::system_clock::now());
if (is_null_response == true) {
it->second.has_null_last_response_ = true;
}
thread_stat_->cb_status_ =
result_ptr->IsFinalResponse(&is_final_response);
Expand All @@ -270,7 +270,7 @@ InferContext::AsyncCallbackFuncImpl(cb::InferResult* result)
thread_stat_->request_records_.emplace_back(
it->second.start_time_, it->second.response_times_,
it->second.sequence_end_, it->second.delayed_,
it->second.sequence_id_);
it->second.sequence_id_, it->second.has_null_last_response_);
infer_backend_->ClientInferStat(&(thread_stat_->contexts_stat_[id_]));
thread_stat_->cb_status_ = ValidateOutputs(result);
async_req_map_.erase(request_id);
Expand Down
17 changes: 15 additions & 2 deletions src/c++/perf_analyzer/inference_profiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1279,15 +1279,28 @@ InferenceProfiler::ValidLatencyMeasurement(
for (size_t i = 0; i < all_request_records_.size(); i++) {
const auto& request_record = all_request_records_[i];
uint64_t request_start_ns = CHRONO_TO_NANOS(request_record.start_time_);
uint64_t request_end_ns =
CHRONO_TO_NANOS(request_record.response_times_.back());
uint64_t request_end_ns;

if (request_record.has_null_last_response_ == false) {
request_end_ns = CHRONO_TO_NANOS(request_record.response_times_.back());
} else if (request_record.response_times_.size() > 1) {
size_t last_response_idx{request_record.response_times_.size() - 2};
request_end_ns =
CHRONO_TO_NANOS(request_record.response_times_[last_response_idx]);
} else {
erase_indices.push_back(i);
continue;
}

if (request_start_ns <= request_end_ns) {
// Only counting requests that end within the time interval
if ((request_end_ns >= valid_range.first) &&
(request_end_ns <= valid_range.second)) {
valid_latencies->push_back(request_end_ns - request_start_ns);
response_count += request_record.response_times_.size();
if (request_record.has_null_last_response_) {
response_count--;
}
erase_indices.push_back(i);
if (request_record.sequence_end_) {
valid_sequence_count++;
Expand Down
8 changes: 6 additions & 2 deletions src/c++/perf_analyzer/request_record.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,12 @@ struct RequestRecord {
std::chrono::time_point<std::chrono::system_clock> start_time,
std::vector<std::chrono::time_point<std::chrono::system_clock>>
response_times,
bool sequence_end, bool delayed, uint64_t sequence_id)
bool sequence_end, bool delayed, uint64_t sequence_id,
bool has_null_last_response)
: start_time_(start_time), response_times_(response_times),
sequence_end_(sequence_end), delayed_(delayed),
sequence_id_(sequence_id)
sequence_id_(sequence_id),
has_null_last_response_(has_null_last_response)
{
}
// The timestamp of when the request was started.
Expand All @@ -56,6 +58,8 @@ struct RequestRecord {
bool delayed_;
// Sequence ID of the request
uint64_t sequence_id_;
// Whether the last response is null
bool has_null_last_response_;
};

}} // namespace triton::perfanalyzer
65 changes: 46 additions & 19 deletions src/c++/perf_analyzer/test_inference_profiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -179,32 +179,32 @@ TEST_CASE("testing the ValidLatencyMeasurement function")
// current window
RequestRecord(
time_point(ns(1)), std::vector<time_point>{time_point(ns(2))}, 0,
false, 0),
false, 0, false),

// request starts before window starts and ends inside window: included in
// current window
RequestRecord(
time_point(ns(3)), std::vector<time_point>{time_point(ns(5))}, 0,
false, 0),
false, 0, false),

// requests start and end inside window: included in current window
RequestRecord(
time_point(ns(6)), std::vector<time_point>{time_point(ns(9))}, 0,
false, 0),
false, 0, false),
RequestRecord(
time_point(ns(10)), std::vector<time_point>{time_point(ns(14))}, 0,
false, 0),
false, 0, false),

// request starts before window ends and ends after window ends: not
// included in current window
RequestRecord(
time_point(ns(15)), std::vector<time_point>{time_point(ns(20))}, 0,
false, 0),
false, 0, false),

// request starts after window ends: not included in current window
RequestRecord(
time_point(ns(21)), std::vector<time_point>{time_point(ns(27))}, 0,
false, 0)};
false, 0, false)};

TestInferenceProfiler::ValidLatencyMeasurement(
window, valid_sequence_count, delayed_request_count, &latencies,
Expand Down Expand Up @@ -882,17 +882,44 @@ TEST_CASE(
request1_timestamp,
std::vector<std::chrono::time_point<std::chrono::system_clock>>{
response1_timestamp, response2_timestamp},
0, false, 0)};
0, false, 0, false)};

auto request2_timestamp{clock_epoch + std::chrono::nanoseconds(4)};
auto response3_timestamp{clock_epoch + std::chrono::nanoseconds(5)};
auto response4_timestamp{clock_epoch + std::chrono::nanoseconds(6)};
auto response5_timestamp{clock_epoch + std::chrono::nanoseconds(7)};
auto request_record2{RequestRecord(
request2_timestamp,
std::vector<std::chrono::time_point<std::chrono::system_clock>>{
response3_timestamp, response4_timestamp, response5_timestamp},
0, false, 0)};
RequestRecord request_record2{};
size_t expected_response_count{0};

SUBCASE("second request has three data responses")
{
auto response3_timestamp{clock_epoch + std::chrono::nanoseconds(5)};
auto response4_timestamp{clock_epoch + std::chrono::nanoseconds(6)};
auto response5_timestamp{clock_epoch + std::chrono::nanoseconds(7)};
request_record2 = RequestRecord(
request2_timestamp,
std::vector<std::chrono::time_point<std::chrono::system_clock>>{
response3_timestamp, response4_timestamp, response5_timestamp},
0, false, 0, false);
expected_response_count = 5;
}
SUBCASE("second request has two data responses and one null response")
{
auto response3_timestamp{clock_epoch + std::chrono::nanoseconds(5)};
auto response4_timestamp{clock_epoch + std::chrono::nanoseconds(6)};
auto response5_timestamp{clock_epoch + std::chrono::nanoseconds(7)};
request_record2 = RequestRecord(
request2_timestamp,
std::vector<std::chrono::time_point<std::chrono::system_clock>>{
response3_timestamp, response4_timestamp, response5_timestamp},
0, false, 0, true);
expected_response_count = 4;
}
SUBCASE("second request has one null response")
{
request_record2 = RequestRecord(
request2_timestamp,
std::vector<std::chrono::time_point<std::chrono::system_clock>>{}, 0,
false, 0, true);
expected_response_count = 2;
}

mock_inference_profiler.all_request_records_ = {
request_record1, request_record2};
Expand All @@ -909,7 +936,7 @@ TEST_CASE(
valid_range, valid_sequence_count, delayed_request_count,
&valid_latencies, response_count, valid_requests);

CHECK(response_count == 5);
CHECK(response_count == expected_response_count);
}
SUBCASE("testing logic relevant to valid request output")
{
Expand All @@ -921,23 +948,23 @@ TEST_CASE(
request1_timestamp,
std::vector<std::chrono::time_point<std::chrono::system_clock>>{
response1_timestamp},
0, false, 0)};
0, false, 0, false)};

auto request2_timestamp{clock_epoch + std::chrono::nanoseconds(3)};
auto response2_timestamp{clock_epoch + std::chrono::nanoseconds(4)};
auto request_record2{RequestRecord(
request2_timestamp,
std::vector<std::chrono::time_point<std::chrono::system_clock>>{
response2_timestamp},
0, false, 0)};
0, false, 0, false)};

auto request3_timestamp{clock_epoch + std::chrono::nanoseconds(5)};
auto response3_timestamp{clock_epoch + std::chrono::nanoseconds(6)};
auto request_record3{RequestRecord(
request3_timestamp,
std::vector<std::chrono::time_point<std::chrono::system_clock>>{
response3_timestamp},
0, false, 0)};
0, false, 0, false)};

mock_inference_profiler.all_request_records_ = {
request_record1, request_record2, request_record3};
Expand Down
16 changes: 8 additions & 8 deletions src/c++/perf_analyzer/test_load_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ operator==(const RequestRecord& lhs, const RequestRecord& rhs)
{
return std::tie(
lhs.start_time_, lhs.response_times_, lhs.sequence_end_,
lhs.delayed_, lhs.sequence_id_) ==
lhs.delayed_, lhs.sequence_id_, lhs.has_null_last_response_) ==
std::tie(
rhs.start_time_, rhs.response_times_, rhs.sequence_end_,
rhs.delayed_, rhs.sequence_id_);
rhs.delayed_, rhs.sequence_id_, rhs.has_null_last_response_);
}

} // namespace
Expand Down Expand Up @@ -134,13 +134,13 @@ class TestLoadManager : public TestLoadManagerBase, public LoadManager {
using ns = std::chrono::nanoseconds;
auto request_record1 = RequestRecord(
time_point(ns(1)), std::vector<time_point>{time_point(ns(2))}, 0, false,
0);
0, false);
auto request_record2 = RequestRecord(
time_point(ns(3)), std::vector<time_point>{time_point(ns(4))}, 0, false,
0);
0, false);
auto request_record3 = RequestRecord(
time_point(ns(5)), std::vector<time_point>{time_point(ns(6))}, 0, false,
0);
0, false);

std::vector<RequestRecord> source_request_records;

Expand Down Expand Up @@ -295,13 +295,13 @@ class TestLoadManager : public TestLoadManagerBase, public LoadManager {
using ns = std::chrono::nanoseconds;
auto request_record1 = RequestRecord(
time_point(ns(1)), std::vector<time_point>{time_point(ns(2))}, 0, false,
0);
0, false);
auto request_record2 = RequestRecord(
time_point(ns(3)), std::vector<time_point>{time_point(ns(4))}, 0, false,
0);
0, false);
auto request_record3 = RequestRecord(
time_point(ns(5)), std::vector<time_point>{time_point(ns(6))}, 0, false,
0);
0, false);

SUBCASE("No threads")
{
Expand Down

0 comments on commit 2b87928

Please sign in to comment.