diff --git a/src/c++/perf_analyzer/command_line_parser.h b/src/c++/perf_analyzer/command_line_parser.h index 4d09f0fd8..a0706525c 100644 --- a/src/c++/perf_analyzer/command_line_parser.h +++ b/src/c++/perf_analyzer/command_line_parser.h @@ -131,7 +131,7 @@ struct PerfAnalyzerParameters { return ( using_concurrency_range || using_old_options || !(using_request_rate_range || using_custom_intervals || - using_periodic_concurrency_mode)); + is_using_periodic_concurrency_mode)); } // Sets the threshold for PA client overhead. @@ -150,8 +150,7 @@ struct PerfAnalyzerParameters { // The profile export file path. std::string profile_export_file{""}; - // Whether periodic concurrency mode is being used - bool using_periodic_concurrency_mode{false}; + bool is_using_periodic_concurrency_mode{false}; Range periodic_concurrency_range{1, 1, 1}; uint64_t periodic_concurrency_request_period{10}; diff --git a/src/c++/perf_analyzer/concurrency_worker.cc b/src/c++/perf_analyzer/concurrency_worker.cc index 8b76835d5..37a562f76 100644 --- a/src/c++/perf_analyzer/concurrency_worker.cc +++ b/src/c++/perf_analyzer/concurrency_worker.cc @@ -56,29 +56,21 @@ bool ConcurrencyWorker::RunInference() { HandleExecuteOff(); - if (HandleNoConcurrency()) { return true; } - CreateContextsAsNecessary(); - if (HandleExitConditions()) { return true; } - SendInferRequests(); - if (HandleExitConditions()) { return true; } - WaitForResponses(); - if (HandleExitConditions()) { return true; } - return false; } diff --git a/src/c++/perf_analyzer/inference_profiler.h b/src/c++/perf_analyzer/inference_profiler.h index ac5dd7d79..913b23ded 100644 --- a/src/c++/perf_analyzer/inference_profiler.h +++ b/src/c++/perf_analyzer/inference_profiler.h @@ -311,11 +311,11 @@ class InferenceProfiler { { auto& manager{dynamic_cast(*manager_)}; std::vector request_records{manager.RunExperiment()}; - + // FIXME - Refactor collector class to not need ID or window in the case of + // periodic concurrency mode InferenceLoadMode id{1, 0.0}; collector_->AddWindow(id, 0, UINT64_MAX); collector_->AddData(id, std::move(request_records)); - return cb::Error::Success; } diff --git a/src/c++/perf_analyzer/perf_analyzer.cc b/src/c++/perf_analyzer/perf_analyzer.cc index 24bab4893..44ec520f2 100644 --- a/src/c++/perf_analyzer/perf_analyzer.cc +++ b/src/c++/perf_analyzer/perf_analyzer.cc @@ -160,7 +160,7 @@ PerfAnalyzer::CreateAnalyzerObjects() } std::unique_ptr manager; - params_->using_periodic_concurrency_mode = true; + params_->is_using_periodic_concurrency_mode = true; params_->periodic_concurrency_range = { std::stoi(std::getenv("MY_START")), std::stoi(std::getenv("MY_END")), std::stoi(std::getenv("MY_STEP"))}; @@ -216,7 +216,7 @@ PerfAnalyzer::CreateAnalyzerObjects() factory, &manager), "failed to create concurrency manager"); - } else if (params_->using_periodic_concurrency_mode) { + } else if (params_->is_using_periodic_concurrency_mode) { manager = std::make_unique( params_->async, params_->streaming, params_->batch_size, params_->max_threads, params_->max_concurrency, @@ -384,7 +384,7 @@ PerfAnalyzer::Profile() err = profiler_->Profile( params_->concurrency_range.start, params_->concurrency_range.end, params_->concurrency_range.step, params_->search_mode, perf_statuses_); - } else if (params_->using_periodic_concurrency_mode) { + } else if (params_->is_using_periodic_concurrency_mode) { err = profiler_->ProfilePeriodicConcurrencyMode(); } else { err = profiler_->Profile( @@ -409,7 +409,7 @@ PerfAnalyzer::Profile() void PerfAnalyzer::WriteReport() { - if (!perf_statuses_.size() || params_->using_periodic_concurrency_mode) { + if (!perf_statuses_.size() || params_->is_using_periodic_concurrency_mode) { return; } diff --git a/src/c++/perf_analyzer/periodic_concurrency_manager.cc b/src/c++/perf_analyzer/periodic_concurrency_manager.cc index d3859f40e..526e9806e 100644 --- a/src/c++/perf_analyzer/periodic_concurrency_manager.cc +++ b/src/c++/perf_analyzer/periodic_concurrency_manager.cc @@ -42,7 +42,6 @@ PeriodicConcurrencyManager::MakeWorker( std::shared_ptr thread_config) { uint32_t id = workers_.size(); - auto worker = std::make_shared( id, thread_stat, thread_config, parser_, data_loader_, factory_, on_sequence_model_, async_, max_concurrency_, using_json_data_, @@ -52,35 +51,45 @@ PeriodicConcurrencyManager::MakeWorker( return worker; }; +void +PeriodicConcurrencyManager::MaybeAddConcurrentRequests() +{ + if (steps_completed_ * concurrency_range_.step < concurrency_range_.end) { + AddConcurrentRequests(concurrency_range_.step); + } +} + void PeriodicConcurrencyManager::AddConcurrentRequests( uint64_t num_concurrent_requests) { for (size_t i = 0; i < num_concurrent_requests; i++) { - threads_stat_.emplace_back(std::make_shared()); - threads_config_.emplace_back( - std::make_shared( - threads_config_.size(), 1, i)); - workers_.emplace_back( - MakeWorker(threads_stat_.back(), threads_config_.back())); - threads_.emplace_back(&IWorker::Infer, workers_.back()); - active_threads_++; + AddConcurrentRequest(i); } num_incomplete_periods_ = num_concurrent_requests; } +void +PeriodicConcurrencyManager::AddConcurrentRequest(size_t seq_stat_index_offset) +{ + threads_stat_.emplace_back(std::make_shared()); + threads_config_.emplace_back( + std::make_shared( + threads_config_.size(), 1, seq_stat_index_offset)); + workers_.emplace_back( + MakeWorker(threads_stat_.back(), threads_config_.back())); + threads_.emplace_back(&IWorker::Infer, workers_.back()); + active_threads_++; +} + void PeriodicConcurrencyManager::PeriodCompletedCallback() { std::lock_guard lock(period_completed_callback_mutex_); - num_incomplete_periods_--; - if (num_incomplete_periods_ == 0) { steps_completed_++; - if (steps_completed_ * concurrency_range_.step < concurrency_range_.end) { - AddConcurrentRequests(concurrency_range_.step); - } + MaybeAddConcurrentRequests(); } } @@ -88,9 +97,7 @@ void PeriodicConcurrencyManager::RequestCompletedCallback() { std::lock_guard lock(request_completed_callback_mutex_); - num_completed_requests_++; - if (num_completed_requests_ == concurrency_range_.end) { all_requests_completed_promise_.set_value(true); } @@ -108,13 +115,11 @@ std::vector PeriodicConcurrencyManager::GetRequestRecords() { std::vector request_records{}; - for (const auto& thread_stat : threads_stat_) { request_records.insert( request_records.end(), thread_stat->request_records_.cbegin(), thread_stat->request_records_.cend()); } - return request_records; } diff --git a/src/c++/perf_analyzer/periodic_concurrency_manager.h b/src/c++/perf_analyzer/periodic_concurrency_manager.h index 27c701f09..38c515875 100644 --- a/src/c++/perf_analyzer/periodic_concurrency_manager.h +++ b/src/c++/perf_analyzer/periodic_concurrency_manager.h @@ -60,8 +60,12 @@ class PeriodicConcurrencyManager : public ConcurrencyManager { std::shared_ptr thread_config) override; + void MaybeAddConcurrentRequests(); + void AddConcurrentRequests(uint64_t num_concurrent_requests); + void AddConcurrentRequest(size_t seq_stat_index_offset); + void PeriodCompletedCallback(); void RequestCompletedCallback(); @@ -82,7 +86,6 @@ class PeriodicConcurrencyManager : public ConcurrencyManager { std::bind(&PeriodicConcurrencyManager::PeriodCompletedCallback, this)}; std::function request_completed_callback_{ std::bind(&PeriodicConcurrencyManager::RequestCompletedCallback, this)}; - std::function&&)> finalize_callback_{nullptr}; }; }} // namespace triton::perfanalyzer