Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes for CR-1167717, CR-1173167, and CR-1173061 #7681

Merged
merged 5 commits into from
Aug 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ namespace xdp {
struct aiecompiler_options
{
bool broadcast_enable_core;
bool graph_iterator_event;
std::string event_trace;
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,12 @@ namespace aie {
aiecompiler_options getAIECompilerOptions(const boost::property_tree::ptree& aie_meta)
{
aiecompiler_options aiecompiler_options;
aiecompiler_options.broadcast_enable_core = aie_meta.get("aie_metadata.aiecompiler_options.broadcast_enable_core", false);
aiecompiler_options.event_trace = aie_meta.get("aie_metadata.aiecompiler_options.event_trace", "runtime");
aiecompiler_options.broadcast_enable_core =
aie_meta.get("aie_metadata.aiecompiler_options.broadcast_enable_core", false);
aiecompiler_options.graph_iterator_event =
aie_meta.get("aie_metadata.aiecompiler_options.graph_iterator_event", false);
aiecompiler_options.event_trace =
aie_meta.get("aie_metadata.aiecompiler_options.event_trace", "runtime");
return aiecompiler_options;
}

Expand Down
146 changes: 91 additions & 55 deletions src/runtime_src/xdp/profile/plugin/aie_profile/edge/aie_profile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <cmath>
#include <memory>
#include <cstring>
#include <map>

#include "core/common/message.h"
#include "core/common/time.h"
Expand Down Expand Up @@ -271,60 +272,96 @@ namespace xdp {
return (runningEvents.find(event) != runningEvents.end());
}

uint8_t AieProfile_EdgeImpl::getPortNumberFromEvent(XAie_Events event)
{
switch (event) {
case XAIE_EVENT_PORT_RUNNING_1_CORE:
case XAIE_EVENT_PORT_STALLED_1_CORE:
case XAIE_EVENT_PORT_TLAST_1_PL:
return 1;
default:
return 0;
}
}

// Configure stream switch ports for monitoring purposes
// NOTE: Used to monitor streams: trace, interfaces, and memory tiles
XAie_Events
void
AieProfile_EdgeImpl::configStreamSwitchPorts(XAie_DevInst* aieDevInst, const tile_type& tile,
xaiefal::XAieTile& xaieTile, const XAie_LocType loc,
const module_type type, const XAie_Events event,
const std::string metricSet, const uint8_t channel)
const module_type type, const uint32_t numCounters,
const std::string metricSet, const uint8_t channel0,
const uint8_t channel1, std::vector<XAie_Events>& startEvents,
std::vector<XAie_Events>& endEvents)
{
// Only configure as needed: must be applicable event and only need at most two
if (!isStreamSwitchPortEvent(event))
return event;

auto switchPortRsc = xaieTile.sswitchPort();
auto ret = switchPortRsc->reserve();
if (ret != AieRC::XAIE_OK)
return event;

if (type == module_type::core) {
// AIE Tiles (e.g., trace streams)
// Define stream switch port to monitor core or memory trace
uint8_t traceSelect = (event == XAIE_EVENT_PORT_RUNNING_0_CORE) ? 0 : 1;
switchPortRsc->setPortToSelect(XAIE_STRMSW_SLAVE, TRACE, traceSelect);
}
else if (type == module_type::shim) {
// Interface tiles (e.g., PLIO, GMIO)
// Grab slave/master and stream ID
// NOTE: stored in getTilesForProfiling() above
auto slaveOrMaster = (tile.itr_mem_col == 0) ? XAIE_STRMSW_SLAVE : XAIE_STRMSW_MASTER;
auto streamPortId = static_cast<uint8_t>(tile.itr_mem_row);
switchPortRsc->setPortToSelect(slaveOrMaster, SOUTH, streamPortId);
}
else {
// Memory tiles
if (metricSet.find("trace") != std::string::npos) {
switchPortRsc->setPortToSelect(XAIE_STRMSW_SLAVE, TRACE, 0);
std::map<uint8_t, std::shared_ptr<xaiefal::XAieStreamPortSelect>> switchPortMap;

// Traverse all counters and request monitor ports as needed
for (int i=0; i < numCounters; ++i) {
// Ensure applicable event
auto startEvent = startEvents.at(i);
auto endEvent = endEvents.at(i);
if (!isStreamSwitchPortEvent(startEvent))
continue;

bool newPort = false;
auto portnum = getPortNumberFromEvent(startEvent);

// New port needed: reserver, configure, and store
if (switchPortMap.find(portnum) == switchPortMap.end()) {
auto switchPortRsc = xaieTile.sswitchPort();
if (switchPortRsc->reserve() != AieRC::XAIE_OK)
continue;
newPort = true;
switchPortMap[portnum] = switchPortRsc;

if (type == module_type::core) {
// AIE Tiles (e.g., trace streams)
// Define stream switch port to monitor core or memory trace
uint8_t traceSelect = (startEvent == XAIE_EVENT_PORT_RUNNING_0_CORE) ? 0 : 1;
switchPortRsc->setPortToSelect(XAIE_STRMSW_SLAVE, TRACE, traceSelect);
}
else if (type == module_type::shim) {
// Interface tiles (e.g., PLIO, GMIO)
// Grab slave/master and stream ID
// NOTE: stored in getTilesForProfiling() above
auto slaveOrMaster = (tile.itr_mem_col == 0) ? XAIE_STRMSW_SLAVE : XAIE_STRMSW_MASTER;
auto streamPortId = static_cast<uint8_t>(tile.itr_mem_row);
switchPortRsc->setPortToSelect(slaveOrMaster, SOUTH, streamPortId);
}
else {
// Memory tiles
if (metricSet.find("trace") != std::string::npos) {
switchPortRsc->setPortToSelect(XAIE_STRMSW_SLAVE, TRACE, 0);
}
else {
uint8_t channel = (portnum == 0) ? channel0 : channel1;
auto slaveOrMaster = (metricSet.find("output") != std::string::npos) ?
XAIE_STRMSW_SLAVE : XAIE_STRMSW_MASTER;
switchPortRsc->setPortToSelect(slaveOrMaster, DMA, channel);
}
}
}
else {
auto slaveOrMaster = (metricSet.find("output") != std::string::npos) ?
XAIE_STRMSW_SLAVE : XAIE_STRMSW_MASTER;
switchPortRsc->setPortToSelect(slaveOrMaster, DMA, channel);

auto switchPortRsc = switchPortMap[portnum];

// Event options:
// getSSIdleEvent, getSSRunningEvent, getSSStalledEvent, & getSSTlastEvent
XAie_Events ssEvent;
if (isPortRunningEvent(startEvent))
switchPortRsc->getSSRunningEvent(ssEvent);
else
switchPortRsc->getSSStalledEvent(ssEvent);
startEvents.at(i) = ssEvent;
endEvents.at(i) = ssEvent;

if (newPort) {
switchPortRsc->start();
mStreamPorts.push_back(switchPortRsc);
}
}

// Event options:
// getSSIdleEvent, getSSRunningEvent, getSSStalledEvent, & getSSTlastEvent
XAie_Events ssEvent;
if (isPortRunningEvent(event))
switchPortRsc->getSSRunningEvent(ssEvent);
else
switchPortRsc->getSSStalledEvent(ssEvent);

switchPortRsc->start();
mStreamPorts.push_back(switchPortRsc);
return ssEvent;
switchPortMap.clear();
}

void
Expand Down Expand Up @@ -530,24 +567,19 @@ namespace xdp {
auto iter1 = configChannel1.find(tile);
uint8_t channel0 = (iter0 == configChannel0.end()) ? 0 : iter0->second;
uint8_t channel1 = (iter1 == configChannel1.end()) ? 1 : iter1->second;

configEventSelections(aieDevInst, loc, XAIE_MEM_MOD, type, metricSet, channel0, channel1);
configStreamSwitchPorts(aieDevInst, tileMetric.first, xaieTile, loc, type, numFreeCtr,
metricSet, channel0, channel1, startEvents, endEvents);

// Request and configure all available counters for this tile
for (int i=0; i < numFreeCtr; ++i) {
auto startEvent = startEvents.at(i);
auto endEvent = endEvents.at(i);
uint8_t resetEvent = 0;

// Channel number is based on monitoring port 0 or 1
auto channel = (startEvent <= XAIE_EVENT_PORT_TLAST_0_MEM_TILE) ? channel0 : channel1;

// Configure group event before reserving and starting counter
configGroupEvents(aieDevInst, loc, mod, startEvent, metricSet);
auto event = configStreamSwitchPorts(aieDevInst, tileMetric.first, xaieTile, loc, type,
startEvent, metricSet, channel);
if (event != startEvent) {
endEvent = (endEvent == startEvent) ? event : endEvent;
startEvent = event;
}

// Request counter from resource manager
auto perfCounter = xaieModule.perfCounter();
Expand All @@ -556,7 +588,7 @@ namespace xdp {
ret = perfCounter->reserve();
if (ret != XAIE_OK) break;

// Start the counters after group events have been configured
// Start the counter
ret = perfCounter->start();
if (ret != XAIE_OK) break;
mPerfCounters.push_back(perfCounter);
Expand All @@ -568,6 +600,10 @@ namespace xdp {
XAie_EventLogicalToPhysicalConv(aieDevInst, loc, mod, endEvent, &tmpEnd);
uint16_t phyStartEvent = tmpStart + mCounterBases[type];
uint16_t phyEndEvent = tmpEnd + mCounterBases[type];

// Get payload for reporting purposes
auto portnum = getPortNumberFromEvent(startEvent);
uint8_t channel = (portnum == 0) ? channel0 : channel1;
auto payload = getCounterPayload(aieDevInst, tileMetric.first, type, col, row,
startEvent, metricSet, channel);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ namespace xdp {
bool isValidType(module_type type, XAie_ModuleType mod);
bool isStreamSwitchPortEvent(const XAie_Events event);
bool isPortRunningEvent(const XAie_Events event);
uint8_t getPortNumberFromEvent(XAie_Events event);
void printTileModStats(xaiefal::XAieDev* aieDevice,
const tile_type& tile,
const XAie_ModuleType mod);
Expand All @@ -58,14 +59,17 @@ namespace xdp {
const XAie_ModuleType mod,
const XAie_Events event,
const std::string metricSet);
XAie_Events configStreamSwitchPorts(XAie_DevInst* aieDevInst,
const tile_type& tile,
xaiefal::XAieTile& xaieTile,
const XAie_LocType loc,
const module_type type,
const XAie_Events event,
const std::string metricSet,
const uint8_t channel);
void configStreamSwitchPorts(XAie_DevInst* aieDevInst,
const tile_type& tile,
xaiefal::XAieTile& xaieTile,
const XAie_LocType loc,
const module_type type,
const uint32_t numCounters,
const std::string metricSet,
const uint8_t channel0,
const uint8_t channel1,
std::vector<XAie_Events>& startEvents,
std::vector<XAie_Events>& endEvents);
void configEventSelections(XAie_DevInst* aieDevInst,
const XAie_LocType loc,
const XAie_ModuleType mod,
Expand All @@ -81,6 +85,7 @@ namespace xdp {
uint16_t startEvent,
const std::string metricSet,
const uint8_t channel);

private:
XAie_DevInst* aieDevInst = nullptr;
xaiefal::XAieDev* aieDevice = nullptr;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,16 +100,13 @@ namespace xdp {
}

// Get tiles to status
void AIEStatusPlugin::getTilesForStatus(void* handle)
void AIEStatusPlugin::getTilesForStatus()
{
std::shared_ptr<xrt_core::device> device = xrt_core::get_userpf_device(handle);

// Capture all tiles across all graphs
// Note: in the future, we could support user-defined tile sets
auto graphs = xrt_core::edge::aie::get_graphs(device.get());
auto graphs = aie::getValidGraphs(mAieMeta);
for (auto& graph : graphs) {
mGraphCoreTilesMap[graph] = xrt_core::edge::aie::get_event_tiles(device.get(), graph,
xrt_core::edge::aie::module_type::core);
mGraphCoreTilesMap[graph] = aie::getEventTiles(mAieMeta, graph, module_type::core);
}

// Report tiles (debug only)
Expand Down Expand Up @@ -189,7 +186,8 @@ namespace xdp {

// AIE core register offsets
constexpr uint64_t AIE_OFFSET_CORE_STATUS = 0x32004;
auto offset = getAIETileRowOffset(handle);
auto offset = aie::getAIETileRowOffset(mAieMeta);
auto hwGen = aie::getHardwareGeneration(mAieMeta);

// This mask check for following states
// ECC_Scrubbing_Stall
Expand Down Expand Up @@ -220,6 +218,7 @@ namespace xdp {
// Reset values
constexpr uint32_t CORE_RESET_STATUS = 0x2;
constexpr uint32_t CORE_ENABLE_MASK = 0x1;

// Tiles already reported with error(s)
std::set<tile_type> errorTileSet;
// Graph -> total stuck core cycles
Expand Down Expand Up @@ -298,18 +297,26 @@ namespace xdp {
// Check for errors in tile
// NOTE: warning is only issued once per tile
if (errorTileSet.find(tile) == errorTileSet.end()) {
uint8_t coreErrors0 = 0;
uint8_t coreErrors1 = 0;
uint8_t memErrors = 0;
auto loc = XAie_TileLoc(tile.col, tile.row + offset);
XAie_EventReadStatus(aieDevInst, loc, XAIE_CORE_MOD,
XAIE_EVENT_GROUP_ERRORS_0_CORE, &coreErrors0);
XAie_EventReadStatus(aieDevInst, loc, XAIE_CORE_MOD,
XAIE_EVENT_GROUP_ERRORS_1_CORE, &coreErrors1);

// Memory module
uint8_t memErrors = 0;
XAie_EventReadStatus(aieDevInst, loc, XAIE_MEM_MOD,
XAIE_EVENT_GROUP_ERRORS_MEM, &memErrors);
XAIE_EVENT_GROUP_ERRORS_MEM, &memErrors);

if (coreErrors0 || coreErrors1 || memErrors) {
// Core module
// NOTE: Per CR-1167717, ignore group errors on AIE1 devices
// since instruction event 2 is used as DONE bit.
uint8_t coreErrors0 = 0;
uint8_t coreErrors1 = 0;
if (hwGen > 1) {
XAie_EventReadStatus(aieDevInst, loc, XAIE_CORE_MOD,
XAIE_EVENT_GROUP_ERRORS_0_CORE, &coreErrors0);
XAie_EventReadStatus(aieDevInst, loc, XAIE_CORE_MOD,
XAIE_EVENT_GROUP_ERRORS_1_CORE, &coreErrors1);
}

if (memErrors || coreErrors0 || coreErrors1) {
std::stringstream errorMessage;
errorMessage << "Error(s) found in tile (" << tile.col << "," << tile.row
<< "). Please view status in Vitis Analyzer for specifics.";
Expand Down Expand Up @@ -402,8 +409,13 @@ namespace xdp {
}
}

// Grab AIE metadata
auto device = xrt_core::get_userpf_device(handle);
auto data = device->get_axlf_section(AIE_METADATA);
aie::readAIEMetadata(data.first, data.second, mAieMeta);

// Update list of tiles to debug
getTilesForStatus(handle);
getTilesForStatus();

// Open the writer for this device
struct xclDeviceInfo2 info;
Expand Down Expand Up @@ -477,30 +489,4 @@ namespace xdp {
mStatusThreadMap.clear();
}

uint16_t AIEStatusPlugin::getAIETileRowOffset(void* handle)
{
static uint16_t rowOffset = 1;
static bool gotValue = false;
if (!gotValue) {
auto device = xrt_core::get_userpf_device(handle);
auto data = device->get_axlf_section(AIE_METADATA);
if (!data.first || !data.second) {
rowOffset = 1;
} else {
boost::property_tree::ptree aie_meta;
read_aie_metadata(data.first, data.second, aie_meta);
rowOffset = aie_meta.get_child("aie_metadata.driver_config.aie_tile_row_start").get_value<uint16_t>();
}
gotValue = true;
}
return rowOffset;
}

void AIEStatusPlugin::read_aie_metadata(const char* data, size_t size, boost::property_tree::ptree& aie_project)
{
std::stringstream aie_stream;
aie_stream.write(data,size);
boost::property_tree::read_json(aie_stream,aie_project);
}

} // end namespace xdp
Loading