From 50a3edb798d91a1ef73270598cc4c8159d434807 Mon Sep 17 00:00:00 2001 From: Hayden Laccabue Date: Fri, 13 Sep 2024 14:47:10 -0700 Subject: [PATCH] Creating initial support for NPU3 no-op tests throughput and latency Signed-off-by: Hayden Laccabue --- src/runtime_src/core/common/query_requests.h | 5 +- .../tools/common/tests/TestNPULatency.cpp | 113 +++++++++++------- .../core/tools/xbutil2/SubCmdValidate.cpp | 8 +- 3 files changed, 76 insertions(+), 50 deletions(-) diff --git a/src/runtime_src/core/common/query_requests.h b/src/runtime_src/core/common/query_requests.h index 26e596da1d..001cea9e24 100644 --- a/src/runtime_src/core/common/query_requests.h +++ b/src/runtime_src/core/common/query_requests.h @@ -570,7 +570,8 @@ struct sequence_name : request tct_one_column, tct_all_column, gemm_int8, - aie_reconfig_overhead + aie_reconfig_overhead, + nop }; static std::string @@ -587,6 +588,8 @@ struct sequence_name : request return "gemm_int8"; case type::aie_reconfig_overhead: return "aie_reconfig_overhead"; + case type::nop: + return "nop"; } return "unknown"; } diff --git a/src/runtime_src/core/tools/common/tests/TestNPULatency.cpp b/src/runtime_src/core/tools/common/tests/TestNPULatency.cpp index aabc4d2939..5c7b08476f 100644 --- a/src/runtime_src/core/tools/common/tests/TestNPULatency.cpp +++ b/src/runtime_src/core/tools/common/tests/TestNPULatency.cpp @@ -10,7 +10,11 @@ #include "xrt/xrt_hw_context.h" #include "xrt/xrt_kernel.h" #include +#include "experimental/xrt_elf.h" +#include "experimental/xrt_ext.h" +#include "experimental/xrt_module.h" namespace XBU = XBUtilities; +namespace xq = xrt_core::query; #include @@ -29,17 +33,6 @@ TestNPULatency::run(std::shared_ptr dev) boost::property_tree::ptree ptree = get_test_header(); ptree.erase("xclbin"); - try { - set_threshold(dev, ptree); - if(XBU::getVerbose()) - logger(ptree, "Details", boost::str(boost::format("Threshold is %.1f us") % get_threshold())); - } - catch (const std::runtime_error& ex) { - logger(ptree, "Details", ex.what()); - ptree.put("status", test_token_skipped); - return ptree; - } - const auto xclbin_name = xrt_core::device_query(dev, xrt_core::query::xclbin_name::type::validate); auto xclbin_path = findPlatformFile(xclbin_name, ptree); if (!std::filesystem::exists(xclbin_path)){ @@ -59,41 +52,74 @@ TestNPULatency::run(std::shared_ptr dev) return ptree; } - // Determine The DPU Kernel Name - auto xkernels = xclbin.get_kernels(); - - auto itr = std::find_if(xkernels.begin(), xkernels.end(), [](xrt::xclbin::kernel& k) { - auto name = k.get_name(); - return name.rfind("DPU",0) == 0; // Starts with "DPU" - }); - - xrt::xclbin::kernel xkernel; - if (itr!=xkernels.end()) - xkernel = *itr; - else { - logger(ptree, "Error", "No kernel with `DPU` found in the xclbin"); - ptree.put("status", test_token_failed); - return ptree; - } - auto kernelName = xkernel.get_name(); - if(XBU::getVerbose()) - logger(ptree, "Details", boost::str(boost::format("Kernel name is '%s'") % kernelName)); - auto working_dev = xrt::device(dev); working_dev.register_xclbin(xclbin); xrt::hw_context hwctx; xrt::kernel testker; - try { - hwctx = xrt::hw_context(working_dev, xclbin.get_uuid()); - testker = xrt::kernel(hwctx, kernelName); + + if(xrt_core::device_query(dev).find("npu3") == std::string::npos) { + // Determine The DPU Kernel Name + auto xkernels = xclbin.get_kernels(); + + auto itr = std::find_if(xkernels.begin(), xkernels.end(), [](xrt::xclbin::kernel& k) { + auto name = k.get_name(); + return name.rfind("DPU",0) == 0; // Starts with "DPU" + }); + + xrt::xclbin::kernel xkernel; + if (itr!=xkernels.end()) + xkernel = *itr; + else { + logger(ptree, "Error", "No kernel with `DPU` found in the xclbin"); + ptree.put("status", test_token_failed); + return ptree; + } + auto kernelName = xkernel.get_name(); + if(XBU::getVerbose()) + logger(ptree, "Details", boost::str(boost::format("Kernel name is '%s'") % kernelName)); + + try { + hwctx = xrt::hw_context(working_dev, xclbin.get_uuid()); + testker = xrt::kernel(hwctx, kernelName); + } + catch (const std::exception& ex){ + logger(ptree, "Error", ex.what()); + ptree.put("status", test_token_failed); + return ptree; + } } - catch (const std::exception& ) - { - logger (ptree, "Error", "Not enough columns available. Please make sure no other workload is running on the device."); - ptree.put("status", test_token_failed);ptree.put("status", test_token_failed); - return ptree; + else { + //Elf flow + const auto elf_name = xrt_core::device_query(dev, xrt_core::query::sequence_name::type::nop); + auto elf_path = findPlatformFile(elf_name, ptree); + if (!std::filesystem::exists(elf_path)) + return ptree; + + logger(ptree, "Elf", elf_path); + + xrt::elf elf; + try { + elf = xrt::elf(elf_path); + } + catch (const std::runtime_error& ex) { + logger(ptree, "Error", ex.what()); + ptree.put("status", test_token_failed); + return ptree; + } + + xrt::module mod{elf}; + try{ + hwctx = xrt::hw_context{working_dev, xclbin.get_uuid()}; + testker = xrt::ext::kernel{hwctx, mod, "dpu:{nop}"}; + } + catch (const std::exception& ex){ + logger(ptree, "Error", ex.what()); + ptree.put("status", test_token_failed); + return ptree; + } } + xrt::xclbin::ip cu; for (const auto& ip : xclbin.get_ips()) { if (ip.get_type() != xrt::xclbin::ip::ip_type::ps) @@ -117,14 +143,14 @@ TestNPULatency::run(std::shared_ptr dev) if (arg.get_name() == "instruct") bo = xrt::bo(hwctx, arg.get_size(), xrt::bo::flags::cacheable, testker.group_id(arg_idx)); - else + else bo = xrt::bo(working_dev, arg.get_size(), xrt::bo::flags::host_only, testker.group_id(arg_idx)); bo.sync(XCL_BO_SYNC_BO_TO_DEVICE); global_args.push_back(bo); run.set_arg(arg_idx, bo); } - } + } //Log if(XBU::getVerbose()) { @@ -153,10 +179,7 @@ TestNPULatency::run(std::shared_ptr dev) // Calculate end-to-end latency of one job execution const double latency = (elapsed_secs / itr_count) * 1000000; //convert s to us - - //check if the value is in range - result_in_range(latency, get_threshold(), ptree); - logger(ptree, "Details", boost::str(boost::format("Average latency: %.1f us") % latency)); + ptree.put("status", test_token_passed); return ptree; } diff --git a/src/runtime_src/core/tools/xbutil2/SubCmdValidate.cpp b/src/runtime_src/core/tools/xbutil2/SubCmdValidate.cpp index 4668530d42..ab3a8f4ca2 100644 --- a/src/runtime_src/core/tools/xbutil2/SubCmdValidate.cpp +++ b/src/runtime_src/core/tools/xbutil2/SubCmdValidate.cpp @@ -229,7 +229,7 @@ print_status(test_status status, std::ostream & _ostream) _ostream << ". Please run the command '--verbose' option for more details"; _ostream << std::endl; } - +/* static void get_alveo_platform_info(const std::shared_ptr& device, boost::property_tree::ptree& ptTree) @@ -264,7 +264,7 @@ get_ryzen_platform_info(const std::shared_ptr& device, static void get_platform_info(const std::shared_ptr& device, boost::property_tree::ptree& ptTree, - Report::SchemaVersion /*schemaVersion*/, + Report::SchemaVersion, std::ostream& oStream) { auto bdf = xrt_core::device_query(device); @@ -296,7 +296,7 @@ get_platform_info(const std::shared_ptr& device, if (!boost::starts_with(power, "")) oStream << boost::format(" %-22s: %s Watts\n") % "Power" % power; } - +*/ static test_status run_test_suite_device( const std::shared_ptr& device, Report::SchemaVersion schemaVersion, @@ -310,7 +310,7 @@ run_test_suite_device( const std::shared_ptr& device, if (testObjectsToRun.empty()) throw std::runtime_error("No test given to validate against."); - get_platform_info(device, ptDeviceInfo, schemaVersion, std::cout); + //get_platform_info(device, ptDeviceInfo, schemaVersion, std::cout); std::cout << "-------------------------------------------------------------------------------" << std::endl; int test_idx = 0;