Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions src/runtime_src/core/common/config_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -658,6 +658,17 @@ get_qdma_aio_enable()
return value;
}

/**
* When true, batched io_uring submits (DmaWriteBdAsync / PushBdToQueueAsync + one wait).
* When false (default), synchronous AIE DMA APIs are used throughout.
*/
inline bool
get_io_uring()
{
static bool value = detail::get_bool_value("Runtime.io_uring", false);
return value;
}

inline std::string
get_hw_em_driver()
{
Expand Down
7 changes: 5 additions & 2 deletions src/runtime_src/core/edge/user/aie/aie.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
*/

#include "aie.h"
#include "core/common/config_reader.h"
#include "core/common/error.h"
#include "common_layer/fal_util.h"
#include "core/common/message.h"
Expand Down Expand Up @@ -70,7 +71,8 @@ aie_array(const std::shared_ptr<xrt_core::device>& device)
dev_inst = &dev_inst_obj;

adf::aiecompiler_options aiecompiler_options = xrt_core::edge::aie::get_aiecompiler_options(device.get());
m_config = std::make_shared<adf::config_manager>(dev_inst, driver_config.mem_num_rows, aiecompiler_options.broadcast_enable_core);
m_config = std::make_shared<adf::config_manager>(dev_inst, driver_config.mem_num_rows, aiecompiler_options.broadcast_enable_core,
xrt_core::config::get_io_uring());

fal_util::initialize(dev_inst); //resource manager initialization

Expand Down Expand Up @@ -144,7 +146,8 @@ aie_array(const std::shared_ptr<xrt_core::device>& device, const zynqaie::hwctx_
dev_inst = &dev_inst_obj;

adf::aiecompiler_options aiecompiler_options = xrt_core::edge::aie::get_aiecompiler_options(device.get(), hwctx_obj);
m_config = std::make_shared<adf::config_manager>(dev_inst, driver_config.mem_num_rows, aiecompiler_options.broadcast_enable_core);
m_config = std::make_shared<adf::config_manager>(dev_inst, driver_config.mem_num_rows, aiecompiler_options.broadcast_enable_core,
xrt_core::config::get_io_uring());

fal_util::initialize(dev_inst); //resource manager initialization

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ namespace adf
class config_manager
{
public:
config_manager(XAie_DevInst* dev_inst, size_t num_reserved_rows, bool broadcast_enable_core);
config_manager(XAie_DevInst* dev_inst, size_t num_reserved_rows, bool broadcast_enable_core,
bool io_uring = false);
XAie_DevInst*
get_dev()
{
Expand All @@ -45,10 +46,17 @@ class config_manager
return m_broadcast_enable_core;
}

bool
get_io_uring() const
{
return m_io_uring;
}

private:
XAie_DevInst* m_aie_dev;
size_t m_num_reserved_rows;
bool m_broadcast_enable_core;
bool m_io_uring;
};

class dma_api
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,24 @@
extern "C"
{
#include "xaiengine.h"
#include <xaiengine/xaie_helper.h>
}

namespace adf
{

namespace {
void
aie_async_wait_nr_throw(XAie_DevInst* dev, int nr)
{
if (nr <= 0)
return;
AieRC w = XAie_AsyncWaitNr(dev, static_cast<u32>(nr));
if (w != XAIE_OK)
throw xrt_core::error(-EIO, "XAie_AsyncWaitNr failed: " + std::to_string(w));
}
} // namespace

/********************************* Statics & Constants *********************************/

static constexpr short INVALID_TILE_COORD = 0xFF;
Expand All @@ -48,10 +61,12 @@ static constexpr unsigned LOCK_TIMEOUT = 0x7FFFFFFF;
/********************************* config_manager *************************************/

config_manager::
config_manager(XAie_DevInst* dev_inst, size_t num_reserved_rows, bool broadcast_enable_core)
config_manager(XAie_DevInst* dev_inst, size_t num_reserved_rows, bool broadcast_enable_core,
bool io_uring)
: m_aie_dev(dev_inst)
, m_num_reserved_rows(num_reserved_rows)
, m_broadcast_enable_core(broadcast_enable_core)
, m_io_uring(io_uring)
{}

/************************************ graph_api ************************************/
Expand Down Expand Up @@ -784,21 +799,54 @@ std::pair<size_t, size_t> gmio_api::enqueueBD(XAie_MemInst *memInst, uint64_t of
//get an available BD
uint16_t bdNumber = frontAndPop(availableBDs);

//set up BD
driverStatus |= XAie_DmaSetAddrOffsetLen(&shimDmaInst, memInst, offset, (u32)size);
if (config->get_io_uring()) {
driverStatus |= XAie_DmaSetAddrOffsetLen(&shimDmaInst, memInst, offset, static_cast<u32>(size));

if (config->get_dev()->DevProp.DevGen == XAIE_DEV_GEN_AIEML || config->get_dev()->DevProp.DevGen == XAIE_DEV_GEN_AIE2PS) // AIEML (note AIE1 XAIE_LOCK_WITH_NO_VALUE is -1, which does not work for AIEML)
XAie_AsyncRes ares[2]{};
int n = 0;

if (config->get_dev()->DevProp.DevGen == XAIE_DEV_GEN_AIEML || config->get_dev()->DevProp.DevGen == XAIE_DEV_GEN_AIE2PS)
driverStatus |= XAie_DmaSetLock(&shimDmaInst, XAie_LockInit(bdNumber, 0), XAie_LockInit(bdNumber, 0));
else
else
driverStatus |= XAie_DmaSetLock(&shimDmaInst, XAie_LockInit(bdNumber, XAIE_LOCK_WITH_NO_VALUE), XAie_LockInit(bdNumber, XAIE_LOCK_WITH_NO_VALUE));

driverStatus |= XAie_DmaEnableBd(&shimDmaInst);
driverStatus |= XAie_DmaEnableBd(&shimDmaInst);

int r = XAie_DmaWriteBdAsync(config->get_dev(), &shimDmaInst, gmioTileLoc, bdNumber, &ares[0]);
if (r == 0) {
aie_async_wait_nr_throw(config->get_dev(), n);
throw xrt_core::error(ares[0].res ? ares[0].res : -EIO, "XAie_DmaWriteBdAsync submit failed");
}
n += r;

r = XAie_DmaChannelPushBdToQueueAsync(config->get_dev(), gmioTileLoc, pGMIOConfig->channelNum,
(pGMIOConfig->type == gmio_config::gm2aie ? DMA_MM2S : DMA_S2MM), bdNumber, &ares[1]);
if (r == 0) {
aie_async_wait_nr_throw(config->get_dev(), n);
throw xrt_core::error(ares[1].res ? ares[1].res : -EIO, "XAie_DmaChannelPushBdToQueueAsync submit failed");
}
n += r;

aie_async_wait_nr_throw(config->get_dev(), n);
for (int i = 0; i < 2; ++i) {
if (ares[i].res != 0)
throw xrt_core::error(ares[i].res, "GMIO enqueueBD async completion failed");
}
}
else {
driverStatus |= XAie_DmaSetAddrOffsetLen(&shimDmaInst, memInst, offset, static_cast<u32>(size));

//write BD
driverStatus |= XAie_DmaWriteBd(config->get_dev(), &shimDmaInst, gmioTileLoc, bdNumber);
if (config->get_dev()->DevProp.DevGen == XAIE_DEV_GEN_AIEML || config->get_dev()->DevProp.DevGen == XAIE_DEV_GEN_AIE2PS)
driverStatus |= XAie_DmaSetLock(&shimDmaInst, XAie_LockInit(bdNumber, 0), XAie_LockInit(bdNumber, 0));
else
driverStatus |= XAie_DmaSetLock(&shimDmaInst, XAie_LockInit(bdNumber, XAIE_LOCK_WITH_NO_VALUE), XAie_LockInit(bdNumber, XAIE_LOCK_WITH_NO_VALUE));

driverStatus |= XAie_DmaEnableBd(&shimDmaInst);
driverStatus |= XAie_DmaWriteBd(config->get_dev(), &shimDmaInst, gmioTileLoc, bdNumber);
driverStatus |= XAie_DmaChannelPushBdToQueue(config->get_dev(), gmioTileLoc, pGMIOConfig->channelNum,
(pGMIOConfig->type == gmio_config::gm2aie ? DMA_MM2S : DMA_S2MM), bdNumber);
}

//enqueue BD
driverStatus |= XAie_DmaChannelPushBdToQueue(config->get_dev(), gmioTileLoc, pGMIOConfig->channelNum, (pGMIOConfig->type == gmio_config::gm2aie ? DMA_MM2S : DMA_S2MM), bdNumber);
enqueuedBDs.push(bdNumber);

/* Commenting out as this is increasing overhead of the performance */
Expand Down Expand Up @@ -967,7 +1015,6 @@ err_code dma_api::configureBD(int tileType, uint8_t column, uint8_t row, uint16_
//valid bd
driverStatus |= XAie_DmaEnableBd(&dmaInst);

//write bd
driverStatus |= XAie_DmaWriteBd(config->get_dev(), &dmaInst, tileLoc, bdId);
debugMsg(static_cast<std::stringstream &&>(std::stringstream() << "XAie_DmaWriteBd " << (uint16_t)bdId << std::endl).str());

Expand Down
Loading