diff --git a/src/runtime_src/core/common/api/module_int.h b/src/runtime_src/core/common/api/module_int.h index 08909c7da88..75e95e723c4 100644 --- a/src/runtime_src/core/common/api/module_int.h +++ b/src/runtime_src/core/common/api/module_int.h @@ -50,6 +50,10 @@ sync(const xrt::module&); ert_cmd_opcode get_ert_opcode(const xrt::module& module); +// Dump scratch pad mem buffer +void +dump_scratchpad_mem(const xrt::module& module); + } // xrt_core::module_int #endif diff --git a/src/runtime_src/core/common/api/xrt_kernel.cpp b/src/runtime_src/core/common/api/xrt_kernel.cpp index c44028ed25d..ac792483ce6 100644 --- a/src/runtime_src/core/common/api/xrt_kernel.cpp +++ b/src/runtime_src/core/common/api/xrt_kernel.cpp @@ -2478,6 +2478,9 @@ class run_impl } m_usage_logger->log_kernel_run_info(kernel.get(), this, state); + static bool dump = xrt_core::config::get_feature_toggle("Debug.dump_scratchpad_mem"); + if (dump) + xrt_core::module_int::dump_scratchpad_mem(m_module); return state; } @@ -2504,6 +2507,10 @@ class run_impl if (state == ERT_CMD_STATE_COMPLETED) { m_usage_logger->log_kernel_run_info(kernel.get(), this, state); + static bool dump = xrt_core::config::get_feature_toggle("Debug.dump_scratchpad_mem"); + if (dump) + xrt_core::module_int::dump_scratchpad_mem(m_module); + return std::cv_status::no_timeout; } diff --git a/src/runtime_src/core/common/api/xrt_module.cpp b/src/runtime_src/core/common/api/xrt_module.cpp index 371b21980ad..7c3e7539b19 100644 --- a/src/runtime_src/core/common/api/xrt_module.cpp +++ b/src/runtime_src/core/common/api/xrt_module.cpp @@ -186,11 +186,13 @@ struct patcher void patch57_aie4(uint32_t* bd_data_ptr, uint64_t patch) { + constexpr uint64_t ddr_aie_addr_offset = 0x80000000; + uint64_t base_address = ((static_cast(bd_data_ptr[0]) & 0x1FFFFFF) << 32) | // NOLINT bd_data_ptr[1]; - base_address += patch; + base_address += patch + ddr_aie_addr_offset; //2G offset bd_data_ptr[1] = (uint32_t)(base_address & 0xFFFFFFFF); // NOLINT bd_data_ptr[0] = (bd_data_ptr[0] & 0xFE000000) | ((base_address >> 32) & 0x1FFFFFF);// NOLINT } @@ -1392,6 +1394,26 @@ class module_sram : public module_impl { return m_scratch_pad_mem; } + + void + dump_scratchpad_mem() + { + if (m_scratch_pad_mem.size() == 0) { + xrt_core::message::send(xrt_core::message::severity_level::debug, "xrt_module", + "preemption scratchpad memory is not available"); + return; + } + + // sync data from device before dumping into file + m_scratch_pad_mem.sync(XCL_BO_SYNC_BO_FROM_DEVICE); + + std::string dump_file_name = "preemption_scratchpad_mem" + std::to_string(get_id()) + ".bin"; + dump_bo(m_scratch_pad_mem, dump_file_name); + + std::string msg {"dumped file "}; + msg.append(dump_file_name); + xrt_core::message::send(xrt_core::message::severity_level::debug, "xrt_module", msg); + } }; } // namespace xrt @@ -1468,6 +1490,16 @@ get_ert_opcode(const xrt::module& module) return module.get_handle()->get_ert_opcode(); } +void +dump_scratchpad_mem(const xrt::module& module) +{ + auto module_sram = std::dynamic_pointer_cast(module.get_handle()); + if (!module_sram) + throw std::runtime_error("Getting module_sram failed, wrong module object passed\n"); + + module_sram->dump_scratchpad_mem(); +} + } // xrt_core::module_int //////////////////////////////////////////////////////////////// diff --git a/src/runtime_src/core/common/config_reader.h b/src/runtime_src/core/common/config_reader.h index 1aa477337cf..bbbfbe82636 100644 --- a/src/runtime_src/core/common/config_reader.h +++ b/src/runtime_src/core/common/config_reader.h @@ -254,6 +254,20 @@ get_ml_timeline_buffer_size() return value; } +inline bool +get_aie_pc() +{ + static bool value = detail::get_bool_value("Debug.aie_pc",false); + return value; +} + +inline std::string +get_aie_pc_settings() +{ + static std::string value = detail::get_string_value("AIE_pc_settings.addresses", ""); + return value; +} + inline bool get_aie_halt() { diff --git a/src/runtime_src/core/common/info_platform.cpp b/src/runtime_src/core/common/info_platform.cpp index 76452f7dd37..117b3e6c705 100644 --- a/src/runtime_src/core/common/info_platform.cpp +++ b/src/runtime_src/core/common/info_platform.cpp @@ -359,6 +359,35 @@ add_clock_info(const xrt_core::device* device, ptree_type& pt) } } +void +add_tops_info(const xrt_core::device* device, ptree_type& pt) +{ + ptree_type pt_tops_array; + + try + { + auto res_info = xrt_core::device_query(device); + if (res_info.empty()) + return; + + for (auto &res : res_info) + { + if (res.type != xrt_core::query::xrt_resource_raw::resource_type::ipu_tops_max) + continue; + + ptree_type pt_tops; + pt_tops.add("id", xq::xrt_resource_raw::get_name(res.type)); + pt_tops.add("value", res.data_double); + pt_tops_array.push_back(std::make_pair("", pt_tops)); + } + pt.put_child("tops", pt_tops_array); + } + catch (const xq::no_such_key &) + { + // ignoring if not available: Edge Case + } +} + void add_electrical_info(const xrt_core::device* device, ptree_type& pt) { @@ -421,6 +450,7 @@ add_platform_info(const xrt_core::device* device, ptree_type& pt_platform_array) ptree_type pt_platforms; add_static_region_info(device, pt_platform); + add_tops_info(device, pt_platform); add_status_info(device, pt_platform); const auto device_class = xrt_core::device_query_default(device, xrt_core::query::device_class::type::alveo); diff --git a/src/runtime_src/core/common/query_requests.h b/src/runtime_src/core/common/query_requests.h index 90b9eb8db05..26e596da1d0 100644 --- a/src/runtime_src/core/common/query_requests.h +++ b/src/runtime_src/core/common/query_requests.h @@ -77,6 +77,7 @@ enum class key_type ip_layout_raw, debug_ip_layout_raw, clock_freq_topology_raw, + xrt_resource_raw, dma_stream, device_status, kds_cu_info, @@ -1185,6 +1186,63 @@ struct clock_freq_topology_raw : request get(const device*) const = 0; }; +/** + * Return some of the resouces within a NPU device + */ +struct xrt_resource_raw : request +{ + /** + * enum class resource_type - represents the different types of resources + */ + enum class resource_type + { + ipu_clk_max, // Max H-Clocks, query returns uint64 value + ipu_tops_max, // Max TOPs, query returns double value + ipu_task_max, // Max Tasks, query returns uint64 value + ipu_tops_curr, // Current TOPs, query returns double value + ipu_task_curr // Current Tasks, query returns uint64 value + }; + + /** + * The buffer that holds the resource query data + */ + struct xrt_resource_query + { + resource_type type; + union + { + uint64_t data_uint64; // holds the value represented as uint64 + double data_double; // holds the value represented as double + }; + }; + + using result_type = std::vector; // get value type + static const key_type key = key_type::xrt_resource_raw; + + static std::string + get_name(xrt_core::query::xrt_resource_raw::resource_type type) + { + switch (type) + { + case resource_type::ipu_clk_max: + return "Max Supported H-Clocks"; + case resource_type::ipu_tops_max: + return "Max Supported TOPs"; + case resource_type::ipu_task_max: + return "Max Supported Tasks"; + case resource_type::ipu_tops_curr: + return "Current TOPs"; + case resource_type::ipu_task_curr: + return "Current Tasks"; + default: + throw xrt_core::internal_error("enum value does not exists"); + } + } + + virtual std::any + get(const device *) const = 0; +}; + struct xmc_version : request { using result_type = std::string; @@ -3750,7 +3808,7 @@ struct performance_mode : request /* * this request force enables or disables pre-emption globally - * 0: enable; 1: disable + * 1: enable; 0: disable */ struct preemption : request { diff --git a/src/runtime_src/core/common/xdp/profile.cpp b/src/runtime_src/core/common/xdp/profile.cpp index 1af0b4bf288..6932782dff2 100644 --- a/src/runtime_src/core/common/xdp/profile.cpp +++ b/src/runtime_src/core/common/xdp/profile.cpp @@ -181,6 +181,55 @@ finish_flush_device(void* handle) } // end namespace xrt_core::xdp::ml_timeline +namespace xrt_core::xdp::aie_pc { + +std::function update_device_cb; +std::function finish_flush_device_cb; + +void +register_callbacks(void* handle) +{ + #ifdef XDP_CLIENT_BUILD + using ftype = void (*)(void*); + + update_device_cb = reinterpret_cast(xrt_core::dlsym(handle, "updateDeviceAIEPC")); + finish_flush_device_cb = reinterpret_cast(xrt_core::dlsym(handle, "finishflushDeviceAIEPC")); + #else + (void)handle; + #endif + +} + +void +warning_callbacks() +{ +} + +void +load() +{ + static xrt_core::module_loader xdp_loader("xdp_aie_pc_plugin", + register_callbacks, + warning_callbacks); +} + +// Make connections +void +update_device(void* handle) +{ + if (update_device_cb) + update_device_cb(handle); +} + +void +finish_flush_device(void* handle) +{ + if (finish_flush_device_cb) + finish_flush_device_cb(handle); +} + +} // end namespace xrt_core::xdp::aie_pc + namespace xrt_core::xdp::pl_deadlock { std::function update_device_cb; @@ -338,7 +387,8 @@ update_device(void* handle) || xrt_core::config::get_aie_profile() || xrt_core::config::get_aie_trace() || xrt_core::config::get_aie_debug() - || xrt_core::config::get_aie_halt()) { + || xrt_core::config::get_aie_halt() + || xrt_core::config::get_aie_pc()) { /* All the above plugins are dependent on xdp_core library. So, * explicitly load it to avoid library search issue in implicit loading. */ @@ -400,6 +450,16 @@ update_device(void* handle) xrt_core::xdp::ml_timeline::update_device(handle); } + if (xrt_core::config::get_aie_pc()) { + try { + xrt_core::xdp::aie_pc::load(); + } + catch (...) { + return; + } + xrt_core::xdp::aie_pc::update_device(handle); + } + #else if (xrt_core::config::get_pl_deadlock_detection() @@ -431,6 +491,8 @@ finish_flush_device(void* handle) xrt_core::xdp::aie::debug::end_debug(handle); if (xrt_core::config::get_ml_timeline()) xrt_core::xdp::ml_timeline::finish_flush_device(handle); + if (xrt_core::config::get_aie_pc()) + xrt_core::xdp::aie_pc::finish_flush_device(handle); #else diff --git a/src/runtime_src/core/edge/drm/zocl/common/zocl_bo.c b/src/runtime_src/core/edge/drm/zocl/common/zocl_bo.c index 62e2452c335..10561dd3522 100644 --- a/src/runtime_src/core/edge/drm/zocl/common/zocl_bo.c +++ b/src/runtime_src/core/edge/drm/zocl/common/zocl_bo.c @@ -400,7 +400,7 @@ zocl_create_bo(struct drm_device *dev, uint64_t unaligned_size, u32 user_flags) struct sg_table *zocl_gem_prime_get_sg_table(struct drm_gem_object *obj) { struct drm_zocl_bo *zocl_obj = to_zocl_bo(obj); - if (zocl_obj && (zocl_obj->flags & ZOCL_BO_FLAGS_CMA)) { + if (zocl_obj && !(zocl_obj->mm_node)) { return drm_gem_dma_get_sg_table(&zocl_obj->cma_base); } struct drm_device *drm = obj->dev; diff --git a/src/runtime_src/core/edge/drm/zocl/common/zocl_drv.c b/src/runtime_src/core/edge/drm/zocl/common/zocl_drv.c index dbe08502925..1f0ff207f95 100644 --- a/src/runtime_src/core/edge/drm/zocl/common/zocl_drv.c +++ b/src/runtime_src/core/edge/drm/zocl/common/zocl_drv.c @@ -586,7 +586,7 @@ void zocl_free_bo(struct drm_gem_object *obj) zocl_free_userptr_bo(obj); else if (zocl_obj->flags & ZOCL_BO_FLAGS_HOST_BO) zocl_free_host_bo(obj); - else if (zocl_obj->flags & ZOCL_BO_FLAGS_CMA) { + else if (!zocl_obj->mm_node) { /* Update memory usage statistics */ zocl_update_mem_stat(zdev, obj->size, -1, zocl_obj->mem_index); diff --git a/src/runtime_src/core/edge/user/aie/aie.cpp b/src/runtime_src/core/edge/user/aie/aie.cpp index dcb7fbfba76..15fd090334b 100755 --- a/src/runtime_src/core/edge/user/aie/aie.cpp +++ b/src/runtime_src/core/edge/user/aie/aie.cpp @@ -94,7 +94,7 @@ aie_array(const std::shared_ptr& device, const zynqaie::hwctx_ { dev_inst_obj = {0}; dev_inst = nullptr; - adf::driver_config driver_config = xrt_core::edge::aie::get_driver_config(device.get()); + adf::driver_config driver_config = xrt_core::edge::aie::get_driver_config(device.get(), hwctx_obj); XAie_SetupConfig(ConfigPtr, driver_config.hw_gen, @@ -136,23 +136,23 @@ aie_array(const std::shared_ptr& device, const zynqaie::hwctx_ dev_inst = &dev_inst_obj; - adf::aiecompiler_options aiecompiler_options = xrt_core::edge::aie::get_aiecompiler_options(device.get()); + adf::aiecompiler_options aiecompiler_options = xrt_core::edge::aie::get_aiecompiler_options(device.get(), hwctx_obj); adf::config_manager::initialize(dev_inst, driver_config.mem_num_rows, aiecompiler_options.broadcast_enable_core); fal_util::initialize(dev_inst); //resource manager initialization /* Initialize PLIO metadata */ - plio_configs = xrt_core::edge::aie::get_plios(device.get()); + plio_configs = xrt_core::edge::aie::get_plios(device.get(), hwctx_obj); /* Initialize gmio api instances */ - gmio_configs = xrt_core::edge::aie::get_gmios(device.get()); + gmio_configs = xrt_core::edge::aie::get_gmios(device.get(), hwctx_obj); for (auto config_itr = gmio_configs.begin(); config_itr != gmio_configs.end(); config_itr++) { auto p_gmio_api = std::make_shared(&config_itr->second); p_gmio_api->configure(); gmio_apis[config_itr->first] = p_gmio_api; } - external_buffer_configs = xrt_core::edge::aie::get_external_buffers(device.get()); + external_buffer_configs = xrt_core::edge::aie::get_external_buffers(device.get(), hwctx_obj); } aie_array:: diff --git a/src/runtime_src/core/tools/common/XBUtilities.cpp b/src/runtime_src/core/tools/common/XBUtilities.cpp index 131594e3827..2fe35bbdabc 100755 --- a/src/runtime_src/core/tools/common/XBUtilities.cpp +++ b/src/runtime_src/core/tools/common/XBUtilities.cpp @@ -762,11 +762,17 @@ fill_xrt_versions(const boost::property_tree::ptree& pt_xrt, if (boost::iequals(drv_name, "xclmgmt") && boost::iequals(driver.get("version", "N/A"), "unknown")) output << "WARNING: xclmgmt version is unknown. Is xclmgmt driver loaded? Or is MSD/MPD running?" << std::endl; } - if (!available_devices.empty()) { - const boost::property_tree::ptree& dev = available_devices.begin()->second; - if (dev.get("device_class") == xrt_core::query::device_class::enum_to_str(xrt_core::query::device_class::type::ryzen)) - output << boost::format(" %-20s : %s\n") % "NPU Firmware Version" % available_devices.begin()->second.get("firmware_version"); - else - output << boost::format(" %-20s : %s\n") % "Firmware Version" % available_devices.begin()->second.get("firmware_version"); + + try { + if (!available_devices.empty()) { + const boost::property_tree::ptree& dev = available_devices.begin()->second; + if (dev.get("device_class") == xrt_core::query::device_class::enum_to_str(xrt_core::query::device_class::type::ryzen)) + output << boost::format(" %-20s : %s\n") % "NPU Firmware Version" % available_devices.begin()->second.get("firmware_version"); + else + output << boost::format(" %-20s : %s\n") % "Firmware Version" % available_devices.begin()->second.get("firmware_version"); + } + } + catch (...) { + //no device available } } diff --git a/src/runtime_src/core/tools/common/reports/platform/ReportRyzenPlatform.cpp b/src/runtime_src/core/tools/common/reports/platform/ReportRyzenPlatform.cpp index f962d31999f..cfbf38f76b1 100644 --- a/src/runtime_src/core/tools/common/reports/platform/ReportRyzenPlatform.cpp +++ b/src/runtime_src/core/tools/common/reports/platform/ReportRyzenPlatform.cpp @@ -63,6 +63,16 @@ ReportRyzenPlatform::writeReport(const xrt_core::device* /*_pDevice*/, } } + const boost::property_tree::ptree& tops = pt_platform.get_child("tops", empty_ptree); + if (!tops.empty()) { + _output << "\nTOPs\n"; + for (const auto& kt : tops) { + const boost::property_tree::ptree& pt_tops = kt.second; + std::string tops_name_type = pt_tops.get("id"); + _output << boost::format(" %-23s: %s Terabyte ops/second\n") % tops_name_type % pt_tops.get("value"); + } + } + auto watts = pt_platform.get("electrical.power_consumption_watts", "N/A"); if (watts != "N/A") _output << std::endl << boost::format("%-23s : %s Watts\n") % "Power" % watts; diff --git a/src/runtime_src/core/tools/common/tests/TestGemm.cpp b/src/runtime_src/core/tools/common/tests/TestGemm.cpp index 4c97aaac159..ab8bcc6b3b0 100644 --- a/src/runtime_src/core/tools/common/tests/TestGemm.cpp +++ b/src/runtime_src/core/tools/common/tests/TestGemm.cpp @@ -22,13 +22,9 @@ static constexpr size_t host_app = 1; //opcode static constexpr uint32_t num_of_cores = 32; /* -* Essentially, we are doing 4 unrolled loop of 8x8_8x8 matmult. -* Each 8x8_8x8 matmult involves 8x8x8=512 MAC or 512*2 OP=1024 OPs. -* Total inner*outer loop count= 2*2*12*4 (4 for unrolled loop)=192. -* Total OPs= 192*1024= 192K OPs. +* Total OPs= = 196K OPs. */ -static constexpr uint32_t total_ops = ((8*8*8)*2)*(2*2*12*4); //192K OPs - +static constexpr uint32_t total_ops = 196608; //192K OPs // ----- C L A S S M E T H O D S ------------------------------------------- TestGemm::TestGemm() @@ -133,11 +129,13 @@ TestGemm::run(std::shared_ptr dev) // Create 128KB Debug BO to capture TOPS data xrt::bo bo_result = xrt_core::bo_int::create_debug_bo(hwctx, 0x20000); - // wait until clock reaches the targeted frequency - auto const target_h_clock_freq = 1810; + // wait until clock reaches the max frequency + int ipu_hclock_pre = 0; int ipu_hclock = 0; - while (ipu_hclock < target_h_clock_freq) { - //get h-clock + auto hclock_steady_counter = 0; + auto first_steady_state = -1, second_steady_state = -1;; + + for(int i=0; i<100;i++){ auto raw = xrt_core::device_query(dev); auto clock_topology = reinterpret_cast(raw.data()); for (int c = 0; c < clock_topology->m_count; c++) { @@ -145,6 +143,27 @@ TestGemm::run(std::shared_ptr dev) ipu_hclock = clock_topology->m_clock_freq[c].m_freq_Mhz; } std::this_thread::sleep_for(std::chrono::milliseconds(50)); + //std::cout << "NPU clock: " << ipu_hclock <= 1810) { + //break; + first_steady_state = ipu_hclock_pre; + hclock_steady_counter = 0; + } + + if(hclock_steady_counter == 8 && first_steady_state != -1 && second_steady_state == -1 && ipu_hclock > first_steady_state) { + //break; + second_steady_state = ipu_hclock; + hclock_steady_counter = 0; + } + + if (hclock_steady_counter == 8 && second_steady_state != -1 && ipu_hclock > second_steady_state) { + break; + } + + ipu_hclock_pre = ipu_hclock; // Update hclk with hclk_pre + } try { diff --git a/src/runtime_src/core/tools/xbutil2/OO_Preemption.cpp b/src/runtime_src/core/tools/xbutil2/OO_Preemption.cpp index 5d3b837a779..b50db4a15c2 100644 --- a/src/runtime_src/core/tools/xbutil2/OO_Preemption.cpp +++ b/src/runtime_src/core/tools/xbutil2/OO_Preemption.cpp @@ -85,10 +85,10 @@ OO_Preemption::execute(const SubCmdOptions& _options) const try { if (boost::iequals(m_action, "enable")) { - xrt_core::device_update(device.get(), 0); // default + xrt_core::device_update(device.get(), static_cast(1)); // default } else if (boost::iequals(m_action, "disable")) { - xrt_core::device_update(device.get(), 1); + xrt_core::device_update(device.get(), static_cast(0)); } else { throw xrt_core::error(boost::str(boost::format("Invalid force-preemption value: '%s'\n") % m_action)); diff --git a/src/runtime_src/tools/scripts/setup.csh b/src/runtime_src/tools/scripts/setup.csh index 1228dd6983c..44d8d19acaf 100644 --- a/src/runtime_src/tools/scripts/setup.csh +++ b/src/runtime_src/tools/scripts/setup.csh @@ -4,23 +4,27 @@ # Copyright (C) 2019-2021 Xilinx, Inc. All rights reserved. # -set called=($_) +#set called=($_) set script_path="" set xrt_dir="" -# check if script is sourced or executed -if ("$called" != "") then -# sourced - set script=$called[2] -else -# executed - set script=$0 -endif - -set script_rel_rootdir = `dirname $script` -set script_path = `cd $script_rel_rootdir && pwd` - -set xrt_dir = $script_path +# revisit if there is a better way than lsof to obtain the script path +# in non-interactive mode. If lsof is needed, then revisit why +# why sbin need to be prepended looks like some environment issue in +# user shell, e.g. /usr/local/bin/mis_env: No such file or directory. +# is because user path contain bad directories that are searched when +# looking of lsof. +set path=(/usr/sbin $path) +set called=(`\lsof +p $$ |\grep setup.csh`) + +# look for the right cmd component that contains setup.csh +foreach x ($called) + if ( "$x" =~ *setup.csh ) then + set script_path=`readlink -f $x` + set xrt_dir=`dirname $script_path` + endif + if ( $xrt_dir =~ */xrt ) break +end if ( $xrt_dir !~ */xrt ) then echo "Invalid location: $xrt_dir" diff --git a/src/runtime_src/xdp/profile/plugin/CMakeLists.txt b/src/runtime_src/xdp/profile/plugin/CMakeLists.txt index f289cf3c091..efd3a4cdaed 100644 --- a/src/runtime_src/xdp/profile/plugin/CMakeLists.txt +++ b/src/runtime_src/xdp/profile/plugin/CMakeLists.txt @@ -12,6 +12,7 @@ if (XDP_CLIENT_BUILD_CMAKE STREQUAL "yes") add_subdirectory(aie_trace) add_subdirectory(aie_debug) add_subdirectory(aie_halt) + add_subdirectory(aie_pc) add_subdirectory(ml_timeline) endif() else() diff --git a/src/runtime_src/xdp/profile/plugin/aie_pc/CMakeLists.txt b/src/runtime_src/xdp/profile/plugin/aie_pc/CMakeLists.txt new file mode 100644 index 00000000000..e62bbf2c485 --- /dev/null +++ b/src/runtime_src/xdp/profile/plugin/aie_pc/CMakeLists.txt @@ -0,0 +1,42 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved. +# + +# ======================================================================== +# This builds the AIE PC Plugin which can configure Perf Counter registers +# to count cycles between core function start and end. +# It is currently built on Client Windows Only. +# ========================================================================= + +if (XDP_CLIENT_BUILD_CMAKE STREQUAL "yes") + set(IMPL_DIR "${PROFILE_DIR}/plugin/aie_pc/clientDev") +endif() + + +file(GLOB XDP_AIE_PC_PLUGIN_FILES + "${PROFILE_DIR}/plugin/aie_pc/*.h" + "${PROFILE_DIR}/plugin/aie_pc/*.cpp" + "${IMPL_DIR}/*.h" + "${IMPL_DIR}/*.cpp" +) + +file(GLOB XDP_DEVICE_COMMON_FILES + "${PROFILE_DIR}/device/common/*.h" + "${PROFILE_DIR}/device/common/*.cpp" +) + +if (XDP_CLIENT_BUILD_CMAKE STREQUAL "yes") + add_library(xdp_aie_pc_plugin MODULE ${XDP_AIE_PC_PLUGIN_FILES} ${XDP_DEVICE_COMMON_FILES}) + add_dependencies(xdp_aie_pc_plugin xdp_core xrt_coreutil) + target_link_libraries(xdp_aie_pc_plugin PRIVATE xdp_core xrt_coreutil xaiengine) + target_compile_definitions(xdp_aie_pc_plugin PRIVATE XDP_CLIENT_BUILD=1 -DXAIE_FEATURE_MSVC) + target_include_directories(xdp_aie_pc_plugin PRIVATE ${AIERT_DIR}/include) + set_target_properties(xdp_aie_pc_plugin PROPERTIES VERSION ${XRT_VERSION_STRING} SOVERSION ${XRT_SOVERSION}) + + install (TARGETS xdp_aie_pc_plugin + LIBRARY DESTINATION ${XDP_PLUGIN_INSTALL_DIR} + ) + +# Else, on edge-aarch64 don't build at all + +endif() \ No newline at end of file diff --git a/src/runtime_src/xdp/profile/plugin/aie_pc/aie_pc_cb.cpp b/src/runtime_src/xdp/profile/plugin/aie_pc/aie_pc_cb.cpp new file mode 100644 index 00000000000..6d7dc29bb09 --- /dev/null +++ b/src/runtime_src/xdp/profile/plugin/aie_pc/aie_pc_cb.cpp @@ -0,0 +1,52 @@ +/** + * Copyright (C) 2024 Advanced Micro Devices, Inc. - All rights reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may + * not use this file except in compliance with the License. A copy of the + * License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +#define XDP_PLUGIN_SOURCE + +#include "xdp/profile/plugin/aie_pc/aie_pc_cb.h" +#include "xdp/profile/plugin/aie_pc/aie_pc_plugin.h" + +namespace xdp { + + static AIEPCPlugin aiePCPluginInstance; + + static void updateDeviceAIEPC(void* hwCtxImpl) + { + if (AIEPCPlugin::alive()) { + aiePCPluginInstance.updateDevice(hwCtxImpl); + } + } + + static void finishflushDeviceAIEPC(void* hwCtxImpl) + { + if (AIEPCPlugin::alive()) { + aiePCPluginInstance.finishflushDevice(hwCtxImpl); + } + } + +} // end namespace xdp + +extern "C" +void updateDeviceAIEPC(void* hwCtxImpl) +{ + xdp::updateDeviceAIEPC(hwCtxImpl); +} + +extern "C" +void finishflushDeviceAIEPC(void* hwCtxImpl) +{ + xdp::finishflushDeviceAIEPC(hwCtxImpl); +} \ No newline at end of file diff --git a/src/runtime_src/xdp/profile/plugin/aie_pc/aie_pc_cb.h b/src/runtime_src/xdp/profile/plugin/aie_pc/aie_pc_cb.h new file mode 100644 index 00000000000..549fe38112c --- /dev/null +++ b/src/runtime_src/xdp/profile/plugin/aie_pc/aie_pc_cb.h @@ -0,0 +1,28 @@ +/** + * Copyright (C) 2024 Advanced Micro Devices, Inc. - All rights reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may + * not use this file except in compliance with the License. A copy of the + * License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +#ifndef XDP_PLUGIN_AIE_PC_CB_H +#define XDP_PLUGIN_AIE_PC_CB_H + +#include "xdp/config.h" + +extern "C" { + + XDP_PLUGIN_EXPORT void updateDeviceAIEPC(void* hwCtxImpl); + XDP_PLUGIN_EXPORT void finishflushDeviceAIEPC(void* hwCtxImpl); + +} +#endif diff --git a/src/runtime_src/xdp/profile/plugin/aie_pc/aie_pc_impl.h b/src/runtime_src/xdp/profile/plugin/aie_pc/aie_pc_impl.h new file mode 100644 index 00000000000..459fdf4d8b6 --- /dev/null +++ b/src/runtime_src/xdp/profile/plugin/aie_pc/aie_pc_impl.h @@ -0,0 +1,51 @@ +/** + * Copyright (C) 2024 Advanced Micro Devices, Inc. - All rights reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may + * not use this file except in compliance with the License. A copy of the + * License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +#ifndef XDP_PLUGIN_AIE_PC_IMPL_H +#define XDP_PLUGIN_AIE_PC_IMPL_H + +#include "core/include/xrt/xrt_hw_context.h" + +namespace xdp { + + class VPDatabase; + + class AIEPCImpl + { + protected : + VPDatabase* db = nullptr; + xrt::hw_context mHwContext; + + public: + AIEPCImpl(VPDatabase* dB) + : db(dB) + {} + + AIEPCImpl() = delete; + + virtual ~AIEPCImpl() {} + + virtual void updateDevice(void*) = 0; + virtual void finishflushDevice(void*) = 0; + + void setHwContext(xrt::hw_context ctx) + { + mHwContext = std::move(ctx); + } + }; + +} +#endif \ No newline at end of file diff --git a/src/runtime_src/xdp/profile/plugin/aie_pc/aie_pc_plugin.cpp b/src/runtime_src/xdp/profile/plugin/aie_pc/aie_pc_plugin.cpp new file mode 100644 index 00000000000..e189a3d0077 --- /dev/null +++ b/src/runtime_src/xdp/profile/plugin/aie_pc/aie_pc_plugin.cpp @@ -0,0 +1,123 @@ +/** + * Copyright (C) 2024 Advanced Micro Devices, Inc. - All rights reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may + * not use this file except in compliance with the License. A copy of the + * License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +#define XDP_PLUGIN_SOURCE + +#include +#include + +#include "core/common/device.h" +#include "core/common/message.h" +#include "core/common/api/hw_context_int.h" + +#include "xdp/profile/plugin/aie_pc/aie_pc_plugin.h" +#include "xdp/profile/plugin/vp_base/info.h" +#include "xdp/profile/plugin/vp_base/utility.h" + +#ifdef XDP_CLIENT_BUILD +#include "xdp/profile/plugin/aie_pc/clientDev/aie_pc.h" +#endif + +namespace xdp { + + bool AIEPCPlugin::live = false; + + AIEPCPlugin::AIEPCPlugin() + : XDPPlugin() + { + AIEPCPlugin::live = true; + + db->registerPlugin(this); + db->registerInfo(info::aie_pc); + } + + AIEPCPlugin::~AIEPCPlugin() + { + if (VPDatabase::alive()) { + try { + writeAll(false); + } + catch (...) { + } + db->unregisterPlugin(this); + } + + AIEPCPlugin::live = false; + } + + bool AIEPCPlugin::alive() + { + return AIEPCPlugin::live; + } + + void AIEPCPlugin::updateDevice(void* hwCtxImpl) + { +#ifdef XDP_CLIENT_BUILD + if (mHwCtxImpl) { + // For client device flow, only 1 device and xclbin is supported now. + return; + } + mHwCtxImpl = hwCtxImpl; + + xrt::hw_context hwContext = xrt_core::hw_context_int::create_hw_context_from_implementation(mHwCtxImpl); + std::shared_ptr coreDevice = xrt_core::hw_context_int::get_core_device(hwContext); + + // Only one device for Client Device flow + uint64_t deviceId = db->addDevice("win_device"); + (db->getStaticInfo()).updateDeviceClient(deviceId, coreDevice); + (db->getStaticInfo()).setDeviceName(deviceId, "win_device"); + + DeviceDataEntry.valid = true; + DeviceDataEntry.implementation = std::make_unique(db); + DeviceDataEntry.implementation->setHwContext(hwContext); + DeviceDataEntry.implementation->updateDevice(mHwCtxImpl); +#endif + } + + void AIEPCPlugin::finishflushDevice(void* hwCtxImpl) + { +#ifdef XDP_CLIENT_BUILD + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", "AIE PC Plugin Finish Flush"); + if (!mHwCtxImpl || !DeviceDataEntry.valid) { + return; + } + + if (hwCtxImpl != mHwCtxImpl) { + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", + "New Hw Context Impl passed in AIE PC Plugin."); + return; + } + + DeviceDataEntry.valid = false; + DeviceDataEntry.implementation->finishflushDevice(mHwCtxImpl); +#endif + } + + void AIEPCPlugin::writeAll(bool /*openNewFiles*/) + { +#ifdef XDP_CLIENT_BUILD + + if (!mHwCtxImpl || !DeviceDataEntry.valid) { + return; + } + + // For client device flow, only 1 device and xclbin is supported now. + DeviceDataEntry.valid = false; + DeviceDataEntry.implementation->finishflushDevice(mHwCtxImpl); +#endif + } + +} \ No newline at end of file diff --git a/src/runtime_src/xdp/profile/plugin/aie_pc/aie_pc_plugin.h b/src/runtime_src/xdp/profile/plugin/aie_pc/aie_pc_plugin.h new file mode 100644 index 00000000000..c9a32002cf0 --- /dev/null +++ b/src/runtime_src/xdp/profile/plugin/aie_pc/aie_pc_plugin.h @@ -0,0 +1,54 @@ +/** + * Copyright (C) 2024 Advanced Micro Devices, Inc. - All rights reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may + * not use this file except in compliance with the License. A copy of the + * License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +#ifndef XDP_AIE_PC_PLUGIN_H +#define XDP_AIE_PC_PLUGIN_H + +#include "xdp/profile/plugin/aie_pc/aie_pc_impl.h" +#include "xdp/profile/plugin/vp_base/vp_base_plugin.h" + + +namespace xdp { + + class AIEPCPlugin : public XDPPlugin + { + public: + + AIEPCPlugin(); + ~AIEPCPlugin(); + + void updateDevice(void* hwCtxImpl); + void finishflushDevice(void* hwCtxImpl); + + void writeAll(bool); + + static bool alive(); + + private: + static bool live; + + struct DeviceData { + bool valid; + std::unique_ptr implementation; + } DeviceDataEntry; + + void* mHwCtxImpl = nullptr; + + }; + +} // end namespace xdp + +#endif diff --git a/src/runtime_src/xdp/profile/plugin/aie_pc/clientDev/aie_pc.cpp b/src/runtime_src/xdp/profile/plugin/aie_pc/clientDev/aie_pc.cpp new file mode 100644 index 00000000000..ac82cbaa831 --- /dev/null +++ b/src/runtime_src/xdp/profile/plugin/aie_pc/clientDev/aie_pc.cpp @@ -0,0 +1,392 @@ +/** + * Copyright (C) 2024 Advanced Micro Devices, Inc. - All rights reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may + * not use this file except in compliance with the License. A copy of the + * License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +#define XDP_PLUGIN_SOURCE + +#include +#include +#include + +#include "core/common/device.h" +#include "core/common/message.h" +#include "core/common/api/hw_context_int.h" + +#include "xdp/profile/database/database.h" +#include "xdp/profile/database/static_info/aie_util.h" +#include "xdp/profile/database/static_info/aie_constructs.h" +#include "xdp/profile/device/common/client_transaction.h" +#include "xdp/profile/plugin/aie_pc/clientDev/aie_pc.h" +#include "xdp/profile/plugin/vp_base/utility.h" +#include "xdp/profile/plugin/vp_base/info.h" + +#include "core/common/api/bo_int.h" +#include "xrt/xrt_bo.h" +#include "core/common/api/xclbin_int.h" +#include "core/include/xclbin.h" + +extern "C" { + #include + #include + #include +} + +namespace xdp { + + struct PCInfo { + uint64_t startPC; + uint64_t endPC; + XAie_Events startPCEvent; + XAie_Events endPCEvent; + uint64_t perfCounterOffset; + uint8_t perfCounterId; + }; + + struct TilePCInfo { + std::unique_ptr eventsCorePC_0_1; + std::unique_ptr eventsCorePC_2_3; + }; + + AIEPCClientDevImpl::AIEPCClientDevImpl(VPDatabase*dB) + : AIEPCImpl(dB) + { + } + + void AIEPCClientDevImpl::updateDevice(void* hwCtxImpl) + { + (void)hwCtxImpl; + + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", + "In AIEPCClientDevImpl::updateDevice"); + + std::unique_ptr txnHandler + = std::make_unique(mHwContext, "AIE PC"); + + if (!txnHandler->initializeKernel("XDP_KERNEL")) + return; + + boost::property_tree::ptree aieMetadata; + try { + auto device = xrt_core::hw_context_int::get_core_device(mHwContext); + xrt::xclbin xrtXclbin = device.get()->get_xclbin(device.get()->get_xclbin_uuid()); + auto data = xrt_core::xclbin_int::get_axlf_section(xrtXclbin, AIE_METADATA); + + if (!data.first || !data.second) { + xrt_core::message::send(xrt_core::message::severity_level::warning, "XRT", "Empty AIE Metadata in xclbin"); + return; + } + + std::stringstream ss; + ss.write(data.first,data.second); + + boost::property_tree::read_json(ss, aieMetadata); + } catch (const std::exception& e) { + std::string msg("AIE Metadata could not be read/processed from xclbin: "); + msg += e.what(); + xrt_core::message::send(xrt_core::message::severity_level::warning, "XRT", msg); + return; + } + + std::string str = xrt_core::config::get_aie_pc_settings(); + + std::vector addresses; + + std::map>>::iterator itrSpec; + std::map>::iterator itrTileInfo; + + uint32_t nEntries = 0; + + boost::split(addresses, str, boost::is_any_of(";")); + // Format : col, row:start_pc:end_pc ; col,row:start_pc:end_pc + for (uint32_t i = 0; i < addresses.size(); i++) { + std::vector address; + boost::split(address, addresses[i], boost::is_any_of(":")); + if (3 != address.size()) { + continue; + } + std::vector loc; + boost::split(loc, address[0], boost::is_any_of(",")); + if (2 != loc.size()) { + continue; + } + uint64_t col = 0, row = 0; + col = std::stoul(loc[0], nullptr, 10); + row = std::stoul(loc[1], nullptr, 10); + + itrSpec = spec.find(col); + // No entries added for current column + if (itrSpec == spec.end()) { + // Populate info + std::unique_ptr info = std::make_unique(); + info->startPC = std::stoul(address[1], nullptr, 10); + info->endPC = std::stoul(address[2], nullptr, 10); + info->startPCEvent = (XAie_Events)XAIE_EVENT_PC_0_CORE; + info->endPCEvent = (XAie_Events)XAIE_EVENT_PC_1_CORE; + info->perfCounterId = 0; + info->perfCounterOffset = 0x0031520; + + std::stringstream msg; + msg << "Configure PC event for Core " + << col << ", " << row << " Start PC " << info->startPC << " End PC " << info->endPC + << " using perf counter id " << std::to_string(info->perfCounterId) + << " perf counter address " << std::hex << info->perfCounterOffset << std::dec; + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", msg.str()); + + // Add new map for current column + spec[col] = std::map>(); + + // Add TilePCInfo entry for current column, row + spec[col][row] = std::make_unique(); + spec[col][row]->eventsCorePC_0_1 = std::move(info); + + nEntries++; + continue; + } + + // Entry found for current column + if (itrSpec != spec.end()) { + itrTileInfo = itrSpec->second.find(row); + + // No entry found for current column,row core tile + if (itrTileInfo == itrSpec->second.end()) { + // Populate info + std::unique_ptr info = std::make_unique(); + info->startPC = std::stoul(address[1], nullptr, 10); + info->endPC = std::stoul(address[2], nullptr, 10); + info->startPCEvent = (XAie_Events)XAIE_EVENT_PC_0_CORE; + info->endPCEvent = (XAie_Events)XAIE_EVENT_PC_1_CORE; + info->perfCounterId = 0; + info->perfCounterOffset = 0x0031520; + + std::stringstream msg; + msg << "Configure PC event for Core " + << col << ", " << row << " Start PC " << info->startPC << " End PC " << info->endPC + << " using perf counter id " << std::to_string(info->perfCounterId) + << " perf counter address " << std::hex << info->perfCounterOffset << std::dec; + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", msg.str()); + + // Add TilePCInfo entry for current column, row + itrSpec->second.emplace(row, std::make_unique()); + spec[col][row]->eventsCorePC_0_1 = std::move(info); + + nEntries++; + continue; + } + + // Entry found for current column,row core tile + if (itrTileInfo != itrSpec->second.end()) { + + // Check whether XAIE_EVENT_PC_2_CORE, XAIE_EVENT_PC_3_CORE configured for current column,row core tile + if (nullptr == itrTileInfo->second->eventsCorePC_2_3) { + + // Populate info + std::unique_ptr info = std::make_unique(); + info->startPC = std::stoul(address[1], nullptr, 10); + info->endPC = std::stoul(address[2], nullptr, 10); + info->startPCEvent = (XAie_Events)XAIE_EVENT_PC_2_CORE; + info->endPCEvent = (XAie_Events)XAIE_EVENT_PC_3_CORE; + info->perfCounterId = 1; + info->perfCounterOffset = 0x0031524; + + std::stringstream msg; + msg << "Configure PC event for Core " + << col << ", " << row << " Start PC " << info->startPC << " End PC " << info->endPC + << " using perf counter id " << std::to_string(info->perfCounterId) + << " perf counter address " << std::hex << info->perfCounterOffset << std::dec; + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", msg.str()); + + itrTileInfo->second->eventsCorePC_2_3 = std::move(info); + nEntries++; + continue; + } else { + std::string msg; + msg += "Core PC Events for tile in settings " + addresses[i] + + " are already used up. So, it is ignored. Please use a different core for this Start/End PC addresses.\n"; + xrt_core::message::send(xrt_core::message::severity_level::warning, "XRT", msg); + continue; + } + } + } + } + // end parsing settings and populating desired configurations + + xdp::aie::driver_config meta_config = xdp::aie::getDriverConfig(aieMetadata, "aie_metadata.driver_config"); + + XAie_Config cfg { + meta_config.hw_gen, + meta_config.base_address, + meta_config.column_shift, + meta_config.row_shift, + meta_config.num_rows, + meta_config.num_columns, + meta_config.shim_row, + meta_config.mem_row_start, + meta_config.mem_num_rows, + meta_config.aie_tile_row_start, + meta_config.aie_tile_num_rows, + {0} + }; + + auto RC = XAie_CfgInitialize(&aieDevInst, &cfg); + if (RC != XAIE_OK) { + xrt_core::message::send(xrt_core::message::severity_level::warning, "XRT", "AIE Driver Initialization Failed."); + return; + } + + XAie_StartTransaction(&aieDevInst, XAIE_TRANSACTION_DISABLE_AUTO_FLUSH); + + for (auto const &specEntry : spec) { + for (auto const &rowEntry : specEntry.second) { + auto coreTile = XAie_TileLoc(static_cast(specEntry.first), static_cast(rowEntry.first)); + + if (rowEntry.second->eventsCorePC_0_1) { + XAie_EventPCEnable(&aieDevInst, coreTile, 0, static_cast(rowEntry.second->eventsCorePC_0_1->startPC)); + XAie_EventPCEnable(&aieDevInst, coreTile, 1, static_cast(rowEntry.second->eventsCorePC_0_1->endPC)); + + // Reset Perf Counter + RC = XAie_PerfCounterReset(&aieDevInst, coreTile, XAIE_CORE_MOD, rowEntry.second->eventsCorePC_0_1->perfCounterId); + if(RC != XAIE_OK) { + xrt_core::message::send(xrt_core::message::severity_level::error, "XRT", "AIE Performance Counter Reset Failed."); + return; + } + + RC = XAie_PerfCounterControlSet(&aieDevInst, coreTile, XAIE_CORE_MOD, rowEntry.second->eventsCorePC_0_1->perfCounterId, + rowEntry.second->eventsCorePC_0_1->startPCEvent, rowEntry.second->eventsCorePC_0_1->endPCEvent); + + if(RC != XAIE_OK) { + xrt_core::message::send(xrt_core::message::severity_level::error, "XRT", "AIE Performance Counter Set with Function Call and Return Failed."); + return; + } + } + if (rowEntry.second->eventsCorePC_2_3) { + XAie_EventPCEnable(&aieDevInst, coreTile, 2, static_cast(rowEntry.second->eventsCorePC_2_3->startPC)); + XAie_EventPCEnable(&aieDevInst, coreTile, 3, static_cast(rowEntry.second->eventsCorePC_2_3->endPC)); + + // Reset Perf Counter + RC = XAie_PerfCounterReset(&aieDevInst, coreTile, XAIE_CORE_MOD, rowEntry.second->eventsCorePC_2_3->perfCounterId); + if(RC != XAIE_OK) { + xrt_core::message::send(xrt_core::message::severity_level::error, "XRT", "AIE Performance Counter Reset Failed."); + return; + } + RC = XAie_PerfCounterControlSet(&aieDevInst, coreTile, XAIE_CORE_MOD, rowEntry.second->eventsCorePC_2_3->perfCounterId, + rowEntry.second->eventsCorePC_2_3->startPCEvent, rowEntry.second->eventsCorePC_2_3->endPCEvent); + if(RC != XAIE_OK) { + xrt_core::message::send(xrt_core::message::severity_level::error, "XRT", "AIE Performance Counter Set with Function Call and Return Failed."); + return; + } + } + } + } + + std::stringstream msg1; + msg1 << "Configuration completed for " << nEntries << " entries. " << std::endl; + xrt_core::message::send(xrt_core::message::severity_level::info, "XRT", msg1.str()); + + uint8_t* txnBin = XAie_ExportSerializedTransaction(&aieDevInst, 1, 0); + if (!txnHandler->submitTransaction(txnBin)) + return; + XAie_ClearTransaction(&aieDevInst); + + sz = sizeof(read_register_op_t) + sizeof(register_data_t) * (nEntries - 1); + op = (read_register_op_t*)malloc(sz); + op->count = nEntries; + + uint32_t idx = 0; + for (auto const &specEntry : spec) { + for (auto const &rowEntry : specEntry.second) { + if (rowEntry.second->eventsCorePC_0_1) { + op->data[idx].address = ((specEntry.first) << 25) /*col*/ + ((rowEntry.first) << 20) /*row*/ + rowEntry.second->eventsCorePC_0_1->perfCounterOffset; + idx++; + } + if (rowEntry.second->eventsCorePC_2_3) { + op->data[idx].address = ((specEntry.first) << 25) /*col*/ + ((rowEntry.first) << 20) /*row*/ + rowEntry.second->eventsCorePC_2_3->perfCounterOffset; + idx++; + } + } + } + + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", "AIE PC txn to configure counter completed"); + } + + void AIEPCClientDevImpl::finishflushDevice(void* hwCtxImpl) + { + (void)hwCtxImpl; + + if (db->infoAvailable(xdp::info::ml_timeline)) { + db->broadcast(VPDatabase::MessageType::READ_RECORD_TIMESTAMPS, nullptr); + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", "Done reading recorded timestamps."); + } + + xrt::bo resultBO; + try { + resultBO = xrt_core::bo_int::create_debug_bo(mHwContext, 0x20000); + } catch (std::exception& e) { + std::stringstream msg; + msg << "Unable to create 128KB buffer for AIE PC Profile results. Cannot get AIE PC Profile info. " << e.what() << std::endl; + xrt_core::message::send(xrt_core::message::severity_level::warning, "XRT", msg.str()); + return; + } + + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", "AIE PC Finish Flush "); + std::unique_ptr txnHandler + = std::make_unique(mHwContext, "AIE PC Handler"); + + if (!txnHandler->initializeKernel("XDP_KERNEL")) + return; + XAie_StartTransaction(&aieDevInst, XAIE_TRANSACTION_DISABLE_AUTO_FLUSH); + + XAie_AddCustomTxnOp(&aieDevInst, XAIE_IO_CUSTOM_OP_READ_REGS, (void*)(op), sz); + uint8_t *txn_ptr = XAie_ExportSerializedTransaction(&aieDevInst, 1, 0); + + if (!txnHandler) + return; + txnHandler->setTransactionName("AIE PC Profile Read"); + if (!txnHandler->submitTransaction(txn_ptr)) + return; + + XAie_ClearTransaction(&aieDevInst); + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", "AIE PC txn to read perf counter completed"); + + resultBO.sync(XCL_BO_SYNC_BO_FROM_DEVICE); + auto resultBOMap = resultBO.map(); + uint32_t* output = reinterpret_cast(resultBOMap); + + // Process output + uint32_t idx = 0; + for (auto const &specEntry : spec) { + for (auto const &rowEntry : specEntry.second) { + + if (rowEntry.second->eventsCorePC_0_1) { + std::stringstream msg; + msg << "Core " << specEntry.first << ", " << rowEntry.first + << " PC " << rowEntry.second->eventsCorePC_0_1->startPC << ":" << rowEntry.second->eventsCorePC_0_1->endPC + << " Counter address/values: 0x" << std::hex << op->data[idx].address << ": " << std::dec << output[idx]; + xrt_core::message::send(xrt_core::message::severity_level::info, "XRT", msg.str()); + idx++; + } + if (rowEntry.second->eventsCorePC_2_3) { + std::stringstream msg; + msg << "Core " << specEntry.first << ", " << rowEntry.first + << " PC " << rowEntry.second->eventsCorePC_2_3->startPC << ":" << rowEntry.second->eventsCorePC_2_3->endPC + << " Counter address/values: 0x" << std::hex << op->data[idx].address << ": " << std::dec << output[idx]; + xrt_core::message::send(xrt_core::message::severity_level::info, "XRT", msg.str()); + idx++; + } + } + } + + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", "AIE PC Finish Flush Done"); + } +} \ No newline at end of file diff --git a/src/runtime_src/xdp/profile/plugin/aie_pc/clientDev/aie_pc.h b/src/runtime_src/xdp/profile/plugin/aie_pc/clientDev/aie_pc.h new file mode 100644 index 00000000000..f4d36ace2d4 --- /dev/null +++ b/src/runtime_src/xdp/profile/plugin/aie_pc/clientDev/aie_pc.h @@ -0,0 +1,54 @@ +/** + * Copyright (C) 2024 Advanced Micro Devices, Inc. - All rights reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may + * not use this file except in compliance with the License. A copy of the + * License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +#ifndef XDP_PLUGIN_AIE_PC_CLIENTDEV_IMPL_H +#define XDP_PLUGIN_AIE_PC_CLIENTDEV_IMPL_H + +#include "xdp/config.h" +#include "xdp/profile/plugin/aie_pc/aie_pc_impl.h" + +extern "C" { + #include + #include +} + +#include + +namespace xdp { + + struct TilePCInfo; + + class AIEPCClientDevImpl : public AIEPCImpl + { + XAie_DevInst aieDevInst = {0}; + + std::size_t sz; + read_register_op_t* op; + + std::map>> spec; + + public : + AIEPCClientDevImpl(VPDatabase* dB); + + ~AIEPCClientDevImpl() = default; + + virtual void updateDevice(void* hwCtxImpl); + virtual void finishflushDevice(void* hwCtxImpl); + }; + +} + +#endif \ No newline at end of file diff --git a/src/runtime_src/xdp/profile/plugin/vp_base/info.h b/src/runtime_src/xdp/profile/plugin/vp_base/info.h index 7ac93724cc3..9fbd5f0b744 100644 --- a/src/runtime_src/xdp/profile/plugin/vp_base/info.h +++ b/src/runtime_src/xdp/profile/plugin/vp_base/info.h @@ -1,5 +1,6 @@ /** * Copyright (C) 2016-2021 Xilinx, Inc + * Copyright (C) 2022-2024 Advanced Micro Devices, Inc. - All rights reserved * * Licensed under the Apache License, Version 2.0 (the "License"). You may * not use this file except in compliance with the License. A copy of the @@ -22,22 +23,23 @@ namespace xdp { namespace info { - const uint64_t aie_profile = 0x0001 ; - const uint64_t aie_trace = 0x0002 ; - const uint64_t device_offload = 0x0004 ; - const uint64_t hal = 0x0008 ; - const uint64_t lop = 0x0010 ; - const uint64_t native = 0x0020 ; - const uint64_t noc = 0x0040 ; - const uint64_t opencl_counters = 0x0080 ; - const uint64_t opencl_trace = 0x0100 ; - const uint64_t power = 0x0200 ; - const uint64_t system_compiler = 0x0400 ; - const uint64_t user = 0x0800 ; - const uint64_t vart = 0x1000 ; - const uint64_t aie_status = 0x2000 ; - const uint64_t ml_timeline = 0x4000 ; - const uint64_t aie_halt = 0x8000 ; + const uint64_t aie_profile = 0x00001 ; + const uint64_t aie_trace = 0x00002 ; + const uint64_t device_offload = 0x00004 ; + const uint64_t hal = 0x00008 ; + const uint64_t lop = 0x00010 ; + const uint64_t native = 0x00020 ; + const uint64_t noc = 0x00040 ; + const uint64_t opencl_counters = 0x00080 ; + const uint64_t opencl_trace = 0x00100 ; + const uint64_t power = 0x00200 ; + const uint64_t system_compiler = 0x00400 ; + const uint64_t user = 0x00800 ; + const uint64_t vart = 0x01000 ; + const uint64_t aie_status = 0x02000 ; + const uint64_t ml_timeline = 0x04000 ; + const uint64_t aie_halt = 0x08000 ; + const uint64_t aie_pc = 0x10000 ; } // end namespace info } // end namespace xdp ;