From 2064297a36fc4a2fb14449d527151d62ad00faac Mon Sep 17 00:00:00 2001 From: Hyunjae Woo Date: Tue, 26 Sep 2023 17:35:30 -0700 Subject: [PATCH] Remove uint type and rebase --- src/c++/perf_analyzer/command_line_parser.cc | 15 ++++++--------- src/c++/perf_analyzer/command_line_parser.h | 6 +----- src/c++/perf_analyzer/docs/cli.md | 6 +++--- src/c++/perf_analyzer/perf_analyzer.cc | 10 +--------- src/c++/perf_analyzer/perf_utils.h | 3 +-- src/c++/perf_analyzer/test_command_line_parser.cc | 15 +++++++-------- 6 files changed, 19 insertions(+), 36 deletions(-) diff --git a/src/c++/perf_analyzer/command_line_parser.cc b/src/c++/perf_analyzer/command_line_parser.cc index 25b5f9af5..5a73d5927 100644 --- a/src/c++/perf_analyzer/command_line_parser.cc +++ b/src/c++/perf_analyzer/command_line_parser.cc @@ -334,8 +334,8 @@ CLParser::Usage(const std::string& msg) "--request-parameter : Specifies a custom " "parameter that can be sent to a Triton backend as part of the " "request. For example, providing '--request-parameter " - "max_tokens:256:uint' to the command line will set an additional " - "parameter 'max_tokens' of type 'uint' to 256 as part of the " + "max_tokens:256:int' to the command line will set an additional " + "parameter 'max_tokens' of type 'int' to 256 as part of the " "request. The --request-parameter may be specified multiple times " "for different custom parameters.", 18) @@ -1538,7 +1538,7 @@ CLParser::ParseCommandLine(int argc, char** argv) break; } case 59: { - params_->using_periodic_concurrency_range = true; + params_->is_using_periodic_concurrency_mode = true; std::string arg = optarg; std::vector values{SplitString(arg)}; if (values.size() < 2) { @@ -1605,9 +1605,6 @@ CLParser::ParseCommandLine(int argc, char** argv) if (type == "bool") { param.type = RequestParameterType::BOOL; param.bool_value = value == "true" ? true : false; - } else if (type == "uint") { - param.type = RequestParameterType::UINT; - param.uint_value = std::stoull(value); } else if (type == "int") { param.type = RequestParameterType::INT; param.int_value = std::stoll(value); @@ -1780,7 +1777,7 @@ CLParser::VerifyOptions() } std::vector load_modes{ - params_->using_periodic_concurrency_range, + params_->is_using_periodic_concurrency_mode, params_->using_concurrency_range, params_->using_request_rate_range, params_->using_custom_intervals}; if (std::count(load_modes.begin(), load_modes.end(), true) > 1) { @@ -1791,13 +1788,13 @@ CLParser::VerifyOptions() "--request-intervals."); } - if (params_->using_periodic_concurrency_range && !params_->streaming) { + if (params_->is_using_periodic_concurrency_mode && !params_->streaming) { Usage( "The --periodic-concurrency-range option requires bi-directional gRPC " "streaming."); } - if (params_->using_periodic_concurrency_range && + if (params_->is_using_periodic_concurrency_mode && (params_->profile_export_file == "")) { Usage( "Must provide --profile-export-file when using the " diff --git a/src/c++/perf_analyzer/command_line_parser.h b/src/c++/perf_analyzer/command_line_parser.h index 2dfcf8d43..518e7b2cf 100644 --- a/src/c++/perf_analyzer/command_line_parser.h +++ b/src/c++/perf_analyzer/command_line_parser.h @@ -58,9 +58,6 @@ struct PerfAnalyzerParameters { uint64_t measurement_window_ms = 5000; bool using_concurrency_range = false; Range concurrency_range{1, 1, 1}; - bool using_periodic_concurrency_range = false; - Range periodic_concurrency_range{1, 1, 1}; - uint64_t request_period = 10; std::unordered_map request_parameters; uint64_t latency_threshold_ms = NO_LIMIT; double stability_threshold = 0.1; @@ -155,9 +152,8 @@ struct PerfAnalyzerParameters { std::string profile_export_file{""}; bool is_using_periodic_concurrency_mode{false}; - Range periodic_concurrency_range{1, 1, 1}; - uint64_t periodic_concurrency_request_period{10}; + uint64_t request_period{10}; }; using PAParamsPtr = std::shared_ptr; diff --git a/src/c++/perf_analyzer/docs/cli.md b/src/c++/perf_analyzer/docs/cli.md index 6e5dd953e..5961224c8 100644 --- a/src/c++/perf_analyzer/docs/cli.md +++ b/src/c++/perf_analyzer/docs/cli.md @@ -207,12 +207,12 @@ Default value is `10`. #### `--request-parameter=` Specifies a custom parameter that can be sent to a Triton backend as part of -the request. For example, providing '--request-parameter max_tokens:256:uint' +the request. For example, providing '--request-parameter max_tokens:256:int' to the command line will set an additional parameter 'max_tokens' of type -'uint' to 256 as part of the request. The --request-parameter may be specified +'int' to 256 as part of the request. The --request-parameter may be specified multiple times for different custom parameters. -Valid `type` values are: `bool`, `int`, `uint`, and `string`. +Valid `type` values are: `bool`, `int`, and `string`. > **NOTE** > diff --git a/src/c++/perf_analyzer/perf_analyzer.cc b/src/c++/perf_analyzer/perf_analyzer.cc index 44ec520f2..c3e5e5f90 100644 --- a/src/c++/perf_analyzer/perf_analyzer.cc +++ b/src/c++/perf_analyzer/perf_analyzer.cc @@ -160,13 +160,6 @@ PerfAnalyzer::CreateAnalyzerObjects() } std::unique_ptr manager; - params_->is_using_periodic_concurrency_mode = true; - params_->periodic_concurrency_range = { - std::stoi(std::getenv("MY_START")), std::stoi(std::getenv("MY_END")), - std::stoi(std::getenv("MY_STEP"))}; - params_->periodic_concurrency_request_period = - std::stoi(std::getenv("MY_REQUEST_PERIOD")); - if (params_->targeting_concurrency()) { if ((parser_->SchedulerType() == pa::ModelParser::SEQUENCE) || (parser_->SchedulerType() == pa::ModelParser::ENSEMBLE_SEQUENCE)) { @@ -221,8 +214,7 @@ PerfAnalyzer::CreateAnalyzerObjects() params_->async, params_->streaming, params_->batch_size, params_->max_threads, params_->max_concurrency, params_->shared_memory_type, params_->output_shm_size, parser_, factory, - params_->periodic_concurrency_range, - params_->periodic_concurrency_request_period); + params_->periodic_concurrency_range, params_->request_period); } else if (params_->using_request_rate_range) { if ((params_->sequence_id_range != 0) && (params_->sequence_id_range < params_->num_of_sequences)) { diff --git a/src/c++/perf_analyzer/perf_utils.h b/src/c++/perf_analyzer/perf_utils.h index 887d38077..7be051a68 100644 --- a/src/c++/perf_analyzer/perf_utils.h +++ b/src/c++/perf_analyzer/perf_utils.h @@ -83,12 +83,11 @@ class Range { T step; }; -enum RequestParameterType { STRING = 0, INT = 1, UINT = 2, BOOL = 3 }; +enum RequestParameterType { STRING = 0, INT = 1, BOOL = 3 }; struct RequestParameter { std::string str_value; int64_t int_value; - uint64_t uint_value; bool bool_value; RequestParameterType type; }; diff --git a/src/c++/perf_analyzer/test_command_line_parser.cc b/src/c++/perf_analyzer/test_command_line_parser.cc index 2e666e9a2..bdf819ff8 100644 --- a/src/c++/perf_analyzer/test_command_line_parser.cc +++ b/src/c++/perf_analyzer/test_command_line_parser.cc @@ -176,8 +176,8 @@ CHECK_PARAMS(PAParamsPtr act, PAParamsPtr exp) CHECK(act->mpi_driver != nullptr); CHECK_STRING(act->memory_type, exp->memory_type); CHECK( - act->using_periodic_concurrency_range == - exp->using_periodic_concurrency_range); + act->is_using_periodic_concurrency_mode == + exp->is_using_periodic_concurrency_mode); CHECK( act->periodic_concurrency_range.start == exp->periodic_concurrency_range.start); @@ -200,8 +200,6 @@ CHECK_PARAMS(PAParamsPtr act, PAParamsPtr exp) CHECK(act_param.second.str_value == exp_param->second.str_value); } else if (act_param.second.type == RequestParameterType::INT) { CHECK(act_param.second.int_value == exp_param->second.int_value); - } else if (act_param.second.type == RequestParameterType::UINT) { - CHECK(act_param.second.uint_value == exp_param->second.uint_value); } else if (act_param.second.type == RequestParameterType::BOOL) { CHECK(act_param.second.bool_value == exp_param->second.bool_value); } @@ -1232,6 +1230,7 @@ TEST_CASE("Testing Command Line Parser") exp->async = true; exp->streaming = true; exp->url = "localhost:8001"; // gRPC url + exp->max_threads = 4; // not targeting concurrency SUBCASE("start provided") { @@ -1254,7 +1253,7 @@ TEST_CASE("Testing Command Line Parser") } CheckValidRange( - args, option_name, parser, act, exp->using_periodic_concurrency_range, + args, option_name, parser, act, exp->is_using_periodic_concurrency_mode, exp->periodic_concurrency_range); CheckInvalidRange(args, option_name, parser, act, check_params); @@ -1384,7 +1383,7 @@ TEST_CASE("Testing Command Line Parser") SUBCASE("valid parameter") { args.push_back(option_name); - args.push_back("max_tokens:256:uint"); + args.push_back("max_tokens:256:int"); int argc = args.size(); char* argv[argc]; @@ -1394,8 +1393,8 @@ TEST_CASE("Testing Command Line Parser") CHECK(!parser.UsageCalled()); RequestParameter param; - param.uint_value = 256; - param.type = RequestParameterType::UINT; + param.int_value = 256; + param.type = RequestParameterType::INT; exp->request_parameters["max_tokens"] = param; }