Skip to content

Commit

Permalink
Creating initial support for NPU3 no-op tests throughput and latency
Browse files Browse the repository at this point in the history
Signed-off-by: Hayden Laccabue <[email protected]>
  • Loading branch information
hlaccabu committed Oct 15, 2024
1 parent ffe9e25 commit 50a3edb
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 50 deletions.
5 changes: 4 additions & 1 deletion src/runtime_src/core/common/query_requests.h
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,8 @@ struct sequence_name : request
tct_one_column,
tct_all_column,
gemm_int8,
aie_reconfig_overhead
aie_reconfig_overhead,
nop
};

static std::string
Expand All @@ -587,6 +588,8 @@ struct sequence_name : request
return "gemm_int8";
case type::aie_reconfig_overhead:
return "aie_reconfig_overhead";
case type::nop:
return "nop";
}
return "unknown";
}
Expand Down
113 changes: 68 additions & 45 deletions src/runtime_src/core/tools/common/tests/TestNPULatency.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@
#include "xrt/xrt_hw_context.h"
#include "xrt/xrt_kernel.h"
#include <experimental/xrt_kernel.h>
#include "experimental/xrt_elf.h"
#include "experimental/xrt_ext.h"
#include "experimental/xrt_module.h"
namespace XBU = XBUtilities;
namespace xq = xrt_core::query;

#include <filesystem>

Expand All @@ -29,17 +33,6 @@ TestNPULatency::run(std::shared_ptr<xrt_core::device> dev)
boost::property_tree::ptree ptree = get_test_header();
ptree.erase("xclbin");

try {
set_threshold(dev, ptree);
if(XBU::getVerbose())
logger(ptree, "Details", boost::str(boost::format("Threshold is %.1f us") % get_threshold()));
}
catch (const std::runtime_error& ex) {
logger(ptree, "Details", ex.what());
ptree.put("status", test_token_skipped);
return ptree;
}

const auto xclbin_name = xrt_core::device_query<xrt_core::query::xclbin_name>(dev, xrt_core::query::xclbin_name::type::validate);
auto xclbin_path = findPlatformFile(xclbin_name, ptree);
if (!std::filesystem::exists(xclbin_path)){
Expand All @@ -59,41 +52,74 @@ TestNPULatency::run(std::shared_ptr<xrt_core::device> dev)
return ptree;
}

// Determine The DPU Kernel Name
auto xkernels = xclbin.get_kernels();

auto itr = std::find_if(xkernels.begin(), xkernels.end(), [](xrt::xclbin::kernel& k) {
auto name = k.get_name();
return name.rfind("DPU",0) == 0; // Starts with "DPU"
});

xrt::xclbin::kernel xkernel;
if (itr!=xkernels.end())
xkernel = *itr;
else {
logger(ptree, "Error", "No kernel with `DPU` found in the xclbin");
ptree.put("status", test_token_failed);
return ptree;
}
auto kernelName = xkernel.get_name();
if(XBU::getVerbose())
logger(ptree, "Details", boost::str(boost::format("Kernel name is '%s'") % kernelName));

auto working_dev = xrt::device(dev);
working_dev.register_xclbin(xclbin);

xrt::hw_context hwctx;
xrt::kernel testker;
try {
hwctx = xrt::hw_context(working_dev, xclbin.get_uuid());
testker = xrt::kernel(hwctx, kernelName);

if(xrt_core::device_query<xq::rom_vbnv>(dev).find("npu3") == std::string::npos) {
// Determine The DPU Kernel Name
auto xkernels = xclbin.get_kernels();

auto itr = std::find_if(xkernels.begin(), xkernels.end(), [](xrt::xclbin::kernel& k) {
auto name = k.get_name();
return name.rfind("DPU",0) == 0; // Starts with "DPU"
});

xrt::xclbin::kernel xkernel;
if (itr!=xkernels.end())
xkernel = *itr;
else {
logger(ptree, "Error", "No kernel with `DPU` found in the xclbin");
ptree.put("status", test_token_failed);
return ptree;
}
auto kernelName = xkernel.get_name();
if(XBU::getVerbose())
logger(ptree, "Details", boost::str(boost::format("Kernel name is '%s'") % kernelName));

try {
hwctx = xrt::hw_context(working_dev, xclbin.get_uuid());
testker = xrt::kernel(hwctx, kernelName);
}
catch (const std::exception& ex){
logger(ptree, "Error", ex.what());
ptree.put("status", test_token_failed);
return ptree;
}
}
catch (const std::exception& )
{
logger (ptree, "Error", "Not enough columns available. Please make sure no other workload is running on the device.");
ptree.put("status", test_token_failed);ptree.put("status", test_token_failed);
return ptree;
else {
//Elf flow
const auto elf_name = xrt_core::device_query<xrt_core::query::sequence_name>(dev, xrt_core::query::sequence_name::type::nop);
auto elf_path = findPlatformFile(elf_name, ptree);
if (!std::filesystem::exists(elf_path))
return ptree;

logger(ptree, "Elf", elf_path);

xrt::elf elf;
try {
elf = xrt::elf(elf_path);
}
catch (const std::runtime_error& ex) {
logger(ptree, "Error", ex.what());
ptree.put("status", test_token_failed);
return ptree;
}

xrt::module mod{elf};
try{
hwctx = xrt::hw_context{working_dev, xclbin.get_uuid()};
testker = xrt::ext::kernel{hwctx, mod, "dpu:{nop}"};
}
catch (const std::exception& ex){
logger(ptree, "Error", ex.what());
ptree.put("status", test_token_failed);
return ptree;
}
}

xrt::xclbin::ip cu;
for (const auto& ip : xclbin.get_ips()) {
if (ip.get_type() != xrt::xclbin::ip::ip_type::ps)
Expand All @@ -117,14 +143,14 @@ TestNPULatency::run(std::shared_ptr<xrt_core::device> dev)

if (arg.get_name() == "instruct")
bo = xrt::bo(hwctx, arg.get_size(), xrt::bo::flags::cacheable, testker.group_id(arg_idx));
else
else
bo = xrt::bo(working_dev, arg.get_size(), xrt::bo::flags::host_only, testker.group_id(arg_idx));

bo.sync(XCL_BO_SYNC_BO_TO_DEVICE);
global_args.push_back(bo);
run.set_arg(arg_idx, bo);
}
}
}

//Log
if(XBU::getVerbose()) {
Expand Down Expand Up @@ -153,10 +179,7 @@ TestNPULatency::run(std::shared_ptr<xrt_core::device> dev)

// Calculate end-to-end latency of one job execution
const double latency = (elapsed_secs / itr_count) * 1000000; //convert s to us

//check if the value is in range
result_in_range(latency, get_threshold(), ptree);

logger(ptree, "Details", boost::str(boost::format("Average latency: %.1f us") % latency));
ptree.put("status", test_token_passed);
return ptree;
}
8 changes: 4 additions & 4 deletions src/runtime_src/core/tools/xbutil2/SubCmdValidate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ print_status(test_status status, std::ostream & _ostream)
_ostream << ". Please run the command '--verbose' option for more details";
_ostream << std::endl;
}

/*
static void
get_alveo_platform_info(const std::shared_ptr<xrt_core::device>& device,
boost::property_tree::ptree& ptTree)
Expand Down Expand Up @@ -264,7 +264,7 @@ get_ryzen_platform_info(const std::shared_ptr<xrt_core::device>& device,
static void
get_platform_info(const std::shared_ptr<xrt_core::device>& device,
boost::property_tree::ptree& ptTree,
Report::SchemaVersion /*schemaVersion*/,
Report::SchemaVersion,
std::ostream& oStream)
{
auto bdf = xrt_core::device_query<xrt_core::query::pcie_bdf>(device);
Expand Down Expand Up @@ -296,7 +296,7 @@ get_platform_info(const std::shared_ptr<xrt_core::device>& device,
if (!boost::starts_with(power, ""))
oStream << boost::format(" %-22s: %s Watts\n") % "Power" % power;
}

*/
static test_status
run_test_suite_device( const std::shared_ptr<xrt_core::device>& device,
Report::SchemaVersion schemaVersion,
Expand All @@ -310,7 +310,7 @@ run_test_suite_device( const std::shared_ptr<xrt_core::device>& device,
if (testObjectsToRun.empty())
throw std::runtime_error("No test given to validate against.");

get_platform_info(device, ptDeviceInfo, schemaVersion, std::cout);
//get_platform_info(device, ptDeviceInfo, schemaVersion, std::cout);
std::cout << "-------------------------------------------------------------------------------" << std::endl;

int test_idx = 0;
Expand Down

0 comments on commit 50a3edb

Please sign in to comment.