Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix bug where null response were accidentally excluded from profile export #379

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/c++/perf_analyzer/infer_context.cc
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ InferContext::SendRequest(
auto total = end_time_sync - start_time_sync;
thread_stat_->request_records_.emplace_back(RequestRecord(
start_time_sync, std::move(end_time_syncs),
infer_data_.options_->sequence_end_, delayed, sequence_id));
infer_data_.options_->sequence_end_, delayed, sequence_id, false));
thread_stat_->status_ =
infer_backend_->ClientInferStat(&(thread_stat_->contexts_stat_[id_]));
if (!thread_stat_->status_.IsOk()) {
Expand Down Expand Up @@ -257,9 +257,9 @@ InferContext::AsyncCallbackFuncImpl(cb::InferResult* result)
if (thread_stat_->cb_status_.IsOk() == false) {
return;
}
if (is_null_response == false) {
it->second.response_times_.push_back(
std::chrono::system_clock::now());
it->second.response_times_.push_back(std::chrono::system_clock::now());
if (is_null_response == true) {
it->second.has_null_last_response_ = true;
}
thread_stat_->cb_status_ =
result_ptr->IsFinalResponse(&is_final_response);
Expand All @@ -270,7 +270,7 @@ InferContext::AsyncCallbackFuncImpl(cb::InferResult* result)
thread_stat_->request_records_.emplace_back(
it->second.start_time_, it->second.response_times_,
it->second.sequence_end_, it->second.delayed_,
it->second.sequence_id_);
it->second.sequence_id_, it->second.has_null_last_response_);
infer_backend_->ClientInferStat(&(thread_stat_->contexts_stat_[id_]));
thread_stat_->cb_status_ = ValidateOutputs(result);
async_req_map_.erase(request_id);
Expand Down
17 changes: 15 additions & 2 deletions src/c++/perf_analyzer/inference_profiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1279,15 +1279,28 @@ InferenceProfiler::ValidLatencyMeasurement(
for (size_t i = 0; i < all_request_records_.size(); i++) {
const auto& request_record = all_request_records_[i];
uint64_t request_start_ns = CHRONO_TO_NANOS(request_record.start_time_);
uint64_t request_end_ns =
CHRONO_TO_NANOS(request_record.response_times_.back());
uint64_t request_end_ns;

if (request_record.has_null_last_response_ == false) {
matthewkotila marked this conversation as resolved.
Show resolved Hide resolved
request_end_ns = CHRONO_TO_NANOS(request_record.response_times_.back());
} else if (request_record.response_times_.size() > 1) {
size_t last_response_idx{request_record.response_times_.size() - 2};
request_end_ns =
CHRONO_TO_NANOS(request_record.response_times_[last_response_idx]);
} else {
erase_indices.push_back(i);
continue;
}

if (request_start_ns <= request_end_ns) {
// Only counting requests that end within the time interval
if ((request_end_ns >= valid_range.first) &&
(request_end_ns <= valid_range.second)) {
valid_latencies->push_back(request_end_ns - request_start_ns);
response_count += request_record.response_times_.size();
if (request_record.has_null_last_response_) {
response_count--;
}
erase_indices.push_back(i);
if (request_record.sequence_end_) {
valid_sequence_count++;
Expand Down
8 changes: 6 additions & 2 deletions src/c++/perf_analyzer/request_record.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,12 @@ struct RequestRecord {
std::chrono::time_point<std::chrono::system_clock> start_time,
std::vector<std::chrono::time_point<std::chrono::system_clock>>
response_times,
bool sequence_end, bool delayed, uint64_t sequence_id)
bool sequence_end, bool delayed, uint64_t sequence_id,
bool has_null_last_response)
: start_time_(start_time), response_times_(response_times),
sequence_end_(sequence_end), delayed_(delayed),
sequence_id_(sequence_id)
sequence_id_(sequence_id),
has_null_last_response_(has_null_last_response)
{
}
// The timestamp of when the request was started.
Expand All @@ -56,6 +58,8 @@ struct RequestRecord {
bool delayed_;
// Sequence ID of the request
uint64_t sequence_id_;
// Whether the last response is null
bool has_null_last_response_;
};

}} // namespace triton::perfanalyzer
65 changes: 46 additions & 19 deletions src/c++/perf_analyzer/test_inference_profiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -179,32 +179,32 @@ TEST_CASE("testing the ValidLatencyMeasurement function")
// current window
RequestRecord(
time_point(ns(1)), std::vector<time_point>{time_point(ns(2))}, 0,
false, 0),
false, 0, false),

// request starts before window starts and ends inside window: included in
// current window
RequestRecord(
time_point(ns(3)), std::vector<time_point>{time_point(ns(5))}, 0,
false, 0),
false, 0, false),

// requests start and end inside window: included in current window
RequestRecord(
time_point(ns(6)), std::vector<time_point>{time_point(ns(9))}, 0,
false, 0),
false, 0, false),
RequestRecord(
time_point(ns(10)), std::vector<time_point>{time_point(ns(14))}, 0,
false, 0),
false, 0, false),

// request starts before window ends and ends after window ends: not
// included in current window
RequestRecord(
time_point(ns(15)), std::vector<time_point>{time_point(ns(20))}, 0,
false, 0),
false, 0, false),

// request starts after window ends: not included in current window
RequestRecord(
time_point(ns(21)), std::vector<time_point>{time_point(ns(27))}, 0,
false, 0)};
false, 0, false)};

TestInferenceProfiler::ValidLatencyMeasurement(
window, valid_sequence_count, delayed_request_count, &latencies,
Expand Down Expand Up @@ -882,17 +882,44 @@ TEST_CASE(
request1_timestamp,
std::vector<std::chrono::time_point<std::chrono::system_clock>>{
response1_timestamp, response2_timestamp},
0, false, 0)};
0, false, 0, false)};

auto request2_timestamp{clock_epoch + std::chrono::nanoseconds(4)};
auto response3_timestamp{clock_epoch + std::chrono::nanoseconds(5)};
auto response4_timestamp{clock_epoch + std::chrono::nanoseconds(6)};
auto response5_timestamp{clock_epoch + std::chrono::nanoseconds(7)};
auto request_record2{RequestRecord(
request2_timestamp,
std::vector<std::chrono::time_point<std::chrono::system_clock>>{
response3_timestamp, response4_timestamp, response5_timestamp},
0, false, 0)};
RequestRecord request_record2{};
size_t expected_response_count{0};

SUBCASE("second request has three data responses")
{
auto response3_timestamp{clock_epoch + std::chrono::nanoseconds(5)};
auto response4_timestamp{clock_epoch + std::chrono::nanoseconds(6)};
auto response5_timestamp{clock_epoch + std::chrono::nanoseconds(7)};
request_record2 = RequestRecord(
request2_timestamp,
std::vector<std::chrono::time_point<std::chrono::system_clock>>{
response3_timestamp, response4_timestamp, response5_timestamp},
0, false, 0, false);
expected_response_count = 5;
}
SUBCASE("second request has two data responses and one null response")
{
auto response3_timestamp{clock_epoch + std::chrono::nanoseconds(5)};
auto response4_timestamp{clock_epoch + std::chrono::nanoseconds(6)};
auto response5_timestamp{clock_epoch + std::chrono::nanoseconds(7)};
request_record2 = RequestRecord(
request2_timestamp,
std::vector<std::chrono::time_point<std::chrono::system_clock>>{
response3_timestamp, response4_timestamp, response5_timestamp},
0, false, 0, true);
expected_response_count = 4;
}
SUBCASE("second request has one null response")
{
request_record2 = RequestRecord(
request2_timestamp,
std::vector<std::chrono::time_point<std::chrono::system_clock>>{}, 0,
false, 0, true);
expected_response_count = 2;
}

mock_inference_profiler.all_request_records_ = {
request_record1, request_record2};
Expand All @@ -909,7 +936,7 @@ TEST_CASE(
valid_range, valid_sequence_count, delayed_request_count,
&valid_latencies, response_count, valid_requests);

CHECK(response_count == 5);
CHECK(response_count == expected_response_count);
}
SUBCASE("testing logic relevant to valid request output")
{
Expand All @@ -921,23 +948,23 @@ TEST_CASE(
request1_timestamp,
std::vector<std::chrono::time_point<std::chrono::system_clock>>{
response1_timestamp},
0, false, 0)};
0, false, 0, false)};

auto request2_timestamp{clock_epoch + std::chrono::nanoseconds(3)};
auto response2_timestamp{clock_epoch + std::chrono::nanoseconds(4)};
auto request_record2{RequestRecord(
request2_timestamp,
std::vector<std::chrono::time_point<std::chrono::system_clock>>{
response2_timestamp},
0, false, 0)};
0, false, 0, false)};

auto request3_timestamp{clock_epoch + std::chrono::nanoseconds(5)};
auto response3_timestamp{clock_epoch + std::chrono::nanoseconds(6)};
auto request_record3{RequestRecord(
request3_timestamp,
std::vector<std::chrono::time_point<std::chrono::system_clock>>{
response3_timestamp},
0, false, 0)};
0, false, 0, false)};

mock_inference_profiler.all_request_records_ = {
request_record1, request_record2, request_record3};
Expand Down
16 changes: 8 additions & 8 deletions src/c++/perf_analyzer/test_load_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ operator==(const RequestRecord& lhs, const RequestRecord& rhs)
{
return std::tie(
lhs.start_time_, lhs.response_times_, lhs.sequence_end_,
lhs.delayed_, lhs.sequence_id_) ==
lhs.delayed_, lhs.sequence_id_, lhs.has_null_last_response_) ==
std::tie(
rhs.start_time_, rhs.response_times_, rhs.sequence_end_,
rhs.delayed_, rhs.sequence_id_);
rhs.delayed_, rhs.sequence_id_, rhs.has_null_last_response_);
}

} // namespace
Expand Down Expand Up @@ -134,13 +134,13 @@ class TestLoadManager : public TestLoadManagerBase, public LoadManager {
using ns = std::chrono::nanoseconds;
auto request_record1 = RequestRecord(
time_point(ns(1)), std::vector<time_point>{time_point(ns(2))}, 0, false,
0);
0, false);
auto request_record2 = RequestRecord(
time_point(ns(3)), std::vector<time_point>{time_point(ns(4))}, 0, false,
0);
0, false);
auto request_record3 = RequestRecord(
time_point(ns(5)), std::vector<time_point>{time_point(ns(6))}, 0, false,
0);
0, false);

std::vector<RequestRecord> source_request_records;

Expand Down Expand Up @@ -295,13 +295,13 @@ class TestLoadManager : public TestLoadManagerBase, public LoadManager {
using ns = std::chrono::nanoseconds;
auto request_record1 = RequestRecord(
time_point(ns(1)), std::vector<time_point>{time_point(ns(2))}, 0, false,
0);
0, false);
auto request_record2 = RequestRecord(
time_point(ns(3)), std::vector<time_point>{time_point(ns(4))}, 0, false,
0);
0, false);
auto request_record3 = RequestRecord(
time_point(ns(5)), std::vector<time_point>{time_point(ns(6))}, 0, false,
0);
0, false);

SUBCASE("No threads")
{
Expand Down
Loading