Skip to content

Commit

Permalink
Merge branch 'main' into taplib-docs
Browse files Browse the repository at this point in the history
  • Loading branch information
hunhoffe authored Dec 12, 2024
2 parents 0c86a8f + b992976 commit ad0205f
Show file tree
Hide file tree
Showing 19 changed files with 669 additions and 231 deletions.
4 changes: 0 additions & 4 deletions include/aie/Dialect/AIE/IR/AIEOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -856,10 +856,6 @@ def AIE_DMABDOp: AIE_Op<"dma_bd", []> {
// access/store element at/to index (i * 16 /*stride_2*/ + j * 1 /*stride_1*/ + k * 2 /*stride_0*/)
```

Note that an additional dimension of sizes/strides is accepted (5th dimension for memtiles, 4th otherwise);
the additional size value is interpreted as a repeat count whereas the additional stride value is
interpreted as an iteration stride.

#### Important gotcha regarding strides

All strides are expressed in multiples of the element width (just like `len` and `offset`)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ void generateXAieDmaSetMultiDimAddr(llvm::raw_ostream &output, int ndims,
int elementWidthInBytes,
const char *errorRet);

llvm::SetVector<mlir::Block *> getOrderedChainOfBlocks(mlir::Region *region);

} // namespace AIE
} // namespace xilinx

Expand Down
122 changes: 23 additions & 99 deletions lib/Dialect/AIE/IR/AIEDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -455,21 +455,35 @@ LogicalResult HasValidBDs<ConcreteType>::verifyTrait(Operation *op) {
// has valid DMA channels.
template <typename ConcreteType>
LogicalResult HasValidDMAChannels<ConcreteType>::verifyTrait(Operation *op) {
DenseSet<DMAChannel> inputChannels;
DenseSet<DMAChannel> outputChannels;
auto element = cast<ConcreteType>(op);
DenseSet<DMAChannel> usedChannels;
for (auto &bodyOp : element.getBody().getOps()) {
// check for duplicate DMA channels within the same MemTileDMAOp
Region &body = element.getBody();
if (body.empty())
return op->emitOpError("should have non-empty body");
for (auto &bodyOp : body.getOps()) {
// check for duplicate DMA channels within the same ShimDMAOp
if (auto dmaStart = dyn_cast<DMAStartOp>(bodyOp)) {
DMAChannel dmaChan = {dmaStart.getChannelDir(),
dmaStart.getChannelIndex()};
if (usedChannels.count(dmaChan))
return dmaStart.emitOpError()
<< "duplicate DMA channel "
<< stringifyDMAChannelDir(dmaChan.direction) << dmaChan.channel
<< " not allowed";
usedChannels.insert(dmaChan);
// check if number of input and output channels is more than available
// hardware
if (dmaChan.direction == DMAChannelDir::S2MM)
inputChannels.insert(dmaChan);
else
outputChannels.insert(dmaChan);
}
}

if (inputChannels.size() >
element.getTileOp().getNumSourceConnections(WireBundle::DMA))
return op->emitOpError(
"uses more input channels than available on this tile");

if (outputChannels.size() >
element.getTileOp().getNumDestConnections(WireBundle::DMA))
return op->emitOpError(
"uses more output channels than available on this tile");
return success();
}

Expand Down Expand Up @@ -1335,49 +1349,12 @@ int ShimMuxOp::rowIndex() { return getTileOp().rowIndex(); }
//===----------------------------------------------------------------------===//

LogicalResult ShimDMAOp::verify() {
Region &body = getBody();
DenseSet<DMAChannel> usedChannels;
std::vector<DMAChannel> inputChannels;
std::vector<DMAChannel> outputChannels;

if (getBody().empty())
return emitOpError("should have non-empty body");

if (!getTileOp().isShimNOCTile())
return emitOpError("must be in a ShimTile with a NOC connection");

if (HasSomeTerminator<DMAStartOp, NextBDOp, EndOp>::verifyTrait(*this)
.failed())
return failure();

for (auto &bodyOp : body.getOps()) {
// check for duplicate DMA channels within the same ShimDMAOp
if (auto dmaStart = dyn_cast<DMAStartOp>(bodyOp)) {
DMAChannel dmaChan = {dmaStart.getChannelDir(),
dmaStart.getChannelIndex()};
if (usedChannels.count(dmaChan))
return dmaStart.emitOpError()
<< "duplicate DMA channel "
<< stringifyDMAChannelDir(dmaChan.direction) << dmaChan.channel
<< " in MemOp";
usedChannels.insert(dmaChan);
// check if number of input and output channels is more than available
// hardware
if (dmaChan.direction == DMAChannelDir::S2MM)
inputChannels.push_back(dmaChan);
else
outputChannels.push_back(dmaChan);
}
}

if (inputChannels.size() >
getTileOp().getNumSourceConnections(WireBundle::DMA))
return emitOpError("uses more input channels than available on this tile");

if (outputChannels.size() >
getTileOp().getNumDestConnections(WireBundle::DMA))
return emitOpError("uses more output channels than available on this tile");

return success();
}

Expand Down Expand Up @@ -1509,49 +1486,16 @@ static ParseResult parseBufferInitialValue(OpAsmParser &parser, Type &type,

LogicalResult MemOp::verify() {
Region &body = getBody();
DenseSet<DMAChannel> usedChannels;
std::vector<DMAChannel> inputChannels;
std::vector<DMAChannel> outputChannels;
if (body.empty())
return emitOpError("should have non-empty body");

if (HasSomeTerminator<DMAStartOp, NextBDOp, EndOp>::verifyTrait(*this)
.failed())
return failure();

for (auto &bodyOp : body.getOps()) {
// check for duplicate DMA channels within the same MemOp
if (auto dmaStart = dyn_cast<DMAStartOp>(bodyOp)) {
DMAChannel dmaChan = {dmaStart.getChannelDir(),
dmaStart.getChannelIndex()};
if (usedChannels.count(dmaChan))
return dmaStart.emitOpError()
<< "duplicate DMA channel "
<< stringifyDMAChannelDir(dmaChan.direction) << dmaChan.channel
<< " in MemOp";
usedChannels.insert(dmaChan);
// check if number of input and output channels is more than available
// hardware
if (dmaChan.direction == DMAChannelDir::S2MM)
inputChannels.push_back(dmaChan);
else
outputChannels.push_back(dmaChan);
}

if (auto allocOp = dyn_cast<memref::AllocOp>(bodyOp))
if (!allocOp->getAttr("id"))
return allocOp.emitOpError()
<< "allocOp in MemOp region should have an id attribute";
}

if (inputChannels.size() >
getTileOp().getNumSourceConnections(WireBundle::DMA))
return emitOpError("uses more input channels than available on this tile");

if (outputChannels.size() >
getTileOp().getNumDestConnections(WireBundle::DMA))
return emitOpError("uses more output channels than available on this tile");

return success();
}

Expand All @@ -1566,12 +1510,8 @@ int MemOp::rowIndex() { return getTileOp().rowIndex(); }
//===----------------------------------------------------------------------===//

LogicalResult MemTileDMAOp::verify() {
std::vector<DMAChannel> inputChannels;
std::vector<DMAChannel> outputChannels;

assert(getOperation()->getNumRegions() == 1 &&
"MemTileDMAOp has zero region!");
assert(!getBody().empty() && "MemTileDMAOp should have non-empty body");

if (HasSomeTerminator<DMAStartOp, NextBDOp, EndOp>::verifyTrait(*this)
.failed())
Expand All @@ -1584,14 +1524,6 @@ LogicalResult MemTileDMAOp::verify() {
<< "allocOp in MemTileDMAOp region should have an id attribute";
}
if (auto startOp = dyn_cast<DMAStartOp>(bodyOp)) {
// check if number of input and output channels is more than available
// hardware
DMAChannel dmaChan = {startOp.getChannelDir(), startOp.getChannelIndex()};
if (dmaChan.direction == DMAChannelDir::S2MM)
inputChannels.push_back(dmaChan);
else
outputChannels.push_back(dmaChan);

if (startOp.getChannelIndex() > 3) {
// Channels 4 and 5 in a memtile are restricted to only access local
// buffers and locks.
Expand Down Expand Up @@ -1649,14 +1581,6 @@ LogicalResult MemTileDMAOp::verify() {
}
}

if (inputChannels.size() >
getTileOp().getNumSourceConnections(WireBundle::DMA))
return emitOpError("uses more input channels than available on this tile");

if (outputChannels.size() >
getTileOp().getNumDestConnections(WireBundle::DMA))
return emitOpError("uses more output channels than available on this tile");

return success();
}

Expand Down
16 changes: 11 additions & 5 deletions lib/Targets/AIERT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
//===----------------------------------------------------------------------===//

#include "aie/Targets/AIERT.h"
#include "aie/Targets/AIETargetShared.h"

#include "mlir/Support/LogicalResult.h"

Expand Down Expand Up @@ -607,6 +608,11 @@ LogicalResult AIERTControl::addInitConfig(DeviceOp &targetOp) {
int row = memOp.getTileID().row;
XAie_LocType tileLoc = XAie_TileLoc(col, row);

// Get the region's entry block, then start traversing through the chain of
// blocks.
llvm::SetVector<Block *> blockVector =
getOrderedChainOfBlocks(&memOp.getOperation()->getRegion(0));

// handle DMA ops separately
auto dmaOps = llvm::to_vector_of<DMAOp>(
memOp.getOperation()->getRegion(0).getOps<DMAOp>());
Expand All @@ -618,10 +624,10 @@ LogicalResult AIERTControl::addInitConfig(DeviceOp &targetOp) {
return failure();
}
} else {
for (Block &block : memOp.getOperation()->getRegion(0)) {
if (block.getOps<DMABDOp>().empty())
for (Block *block : blockVector) {
if (block->getOps<DMABDOp>().empty())
continue;
if (failed(configureLocksAndBd(block, tileLoc)))
if (failed(configureLocksAndBd(*block, tileLoc)))
return failure();
}
}
Expand All @@ -637,8 +643,8 @@ LogicalResult AIERTControl::addInitConfig(DeviceOp &targetOp) {
return failure();
}
else
for (Block &block : memOp.getOperation()->getRegion(0)) {
for (auto op : block.getOps<DMAStartOp>()) {
for (Block *block : blockVector) {
for (auto op : block->getOps<DMAStartOp>()) {
DMABDOp bd = *op.getDest()->getOps<DMABDOp>().begin();
int chNum = op.getChannelIndex();
auto channelDir = op.getChannelDir();
Expand Down
2 changes: 1 addition & 1 deletion lib/Targets/AIETargetHSA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
// (c) Copyright 2021-2023, Advanced Micro Devices, Inc.
//
//===----------------------------------------------------------------------===//
#include "AIETargetShared.h"
#include "aie/Targets/AIETargetShared.h"

#include "aie/Dialect/AIE/IR/AIEDialect.h"
#include "aie/Dialect/AIEX/IR/AIEXDialect.h"
Expand Down
26 changes: 25 additions & 1 deletion lib/Targets/AIETargetShared.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//

#include "AIETargetShared.h"
#include "aie/Targets/AIETargetShared.h"

#include "aie/Dialect/AIE/IR/AIEDialect.h"
#include "aie/Dialect/AIEX/IR/AIEXDialect.h"
Expand Down Expand Up @@ -132,4 +132,28 @@ void generateXAieDmaSetMultiDimAddr(raw_ostream &output, int ndims,
<< " /* len */ " << lenA << "));\n";
}

// Traverse through a chain of blocks.
llvm::SetVector<Block *> getOrderedChainOfBlocks(Region *region) {
// Get the region's entry block, then start traversing through the chain of
// blocks.
llvm::SetVector<Block *> blockVector;
SmallVector<Block *, 16> worklist;
Block *firstBD = &region->front();
blockVector.insert(firstBD);
worklist.push_back(firstBD);
while (!worklist.empty()) {
Block *block = worklist.pop_back_val();
if (block->empty())
continue;
auto successors = block->getTerminator()->getSuccessors();
for (auto *i : successors) {
if (!blockVector.contains(i)) {
blockVector.insert(i);
worklist.push_back(i);
}
}
}
return blockVector;
}

} // namespace xilinx::AIE
27 changes: 16 additions & 11 deletions lib/Targets/AIETargetXAIEV2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
// (c) Copyright 2021-2023, Advanced Micro Devices, Inc.
//
//===----------------------------------------------------------------------===//
#include "AIETargetShared.h"
#include "aie/Targets/AIETargetShared.h"

#include "aie/Dialect/AIE/IR/AIEDialect.h"
#include "aie/Dialect/AIEX/IR/AIEXDialect.h"
Expand Down Expand Up @@ -83,7 +83,12 @@ mlir::LogicalResult generateDMAConfig(OpType memOp, raw_ostream &output,
int col = memOp.colIndex();
int row = memOp.rowIndex();

for (auto &block : memOp.getBody()) {
// Get the region's entry block, then start traversing through the chain of
// blocks.
llvm::SetVector<Block *> blockVector =
getOrderedChainOfBlocks(&memOp.getBody());

for (auto block : blockVector) {
bool foundBdPacket = false;
int packetType = 0;
int packetID = 0;
Expand All @@ -95,7 +100,7 @@ mlir::LogicalResult generateDMAConfig(OpType memOp, raw_ostream &output,
int ndims = 0;
ArrayRef<BDDimLayoutAttr> dims;
// StringRef FifoMode = disable; // FIXME: when to enable FIFO mode?
for (auto op : block.template getOps<DMABDOp>()) {
for (auto op : block->getOps<DMABDOp>()) {
foundBd = true;
if (!targetModel.isShimNOCTile(col, row)) {
assert(op.getBufferOp().getAddress() &&
Expand Down Expand Up @@ -133,7 +138,7 @@ mlir::LogicalResult generateDMAConfig(OpType memOp, raw_ostream &output,
int acqValue = 0, relValue = 0;
bool hasAcq = false, hasRel = false;
int acqLockID = 0, relLockID = 0;
for (auto op : block.template getOps<UseLockOp>()) {
for (auto op : block->getOps<UseLockOp>()) {
LockOp lock = cast<LockOp>(op.getLock().getDefiningOp());
int lockCol = lock.colIndex();
int lockRow = lock.rowIndex();
Expand Down Expand Up @@ -164,13 +169,13 @@ mlir::LogicalResult generateDMAConfig(OpType memOp, raw_ostream &output,
}
}

for (auto op : block.template getOps<DMABDPACKETOp>()) {
for (auto op : block->getOps<DMABDPACKETOp>()) {
foundBdPacket = true;
packetType = op.getPacketType();
packetID = op.getPacketID();
}

int bdNum = blockMap[&block];
int bdNum = blockMap[block];
if (foundBd) {
// TODO For now, we are going to name each dma desc with loc and bd
// which we assume is unique. This is strictly not enforced but in
Expand Down Expand Up @@ -219,9 +224,9 @@ mlir::LogicalResult generateDMAConfig(OpType memOp, raw_ostream &output,
BaseAddrA, offsetA, lenA,
elementWidthInBytes, "1");

if (block.getNumSuccessors() > 0) {
Block *nextBlock = block.getSuccessors()[0]; // should have only one
// successor block
if (block->getNumSuccessors() > 0) {
Block *nextBlock = block->getSuccessors()[0]; // should have only one
// successor block

int enableNextBd = 1;
if (!nextBlock->getOps<EndOp>().empty())
Expand All @@ -248,8 +253,8 @@ mlir::LogicalResult generateDMAConfig(OpType memOp, raw_ostream &output,
}
}

for (auto &block : memOp.getBody()) {
for (auto op : block.template getOps<DMAStartOp>()) {
for (auto block : blockVector) {
for (auto op : block->getOps<DMAStartOp>()) {
int bdNum = blockMap[op.getDest()];
StringRef dmaDir = stringifyDMAChannelDir(op.getChannelDir());
int chNum = op.getChannelIndex();
Expand Down
1 change: 1 addition & 0 deletions lib/Targets/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ set(BOOTGEN_SOURCE_DIR ${PROJECT_SOURCE_DIR}/third_party/bootgen)

add_mlir_library(AIERT
AIERT.cpp
AIETargetShared.cpp

PARTIAL_SOURCES_INTENDED
ENABLE_AGGREGATION
Expand Down
Loading

0 comments on commit ad0205f

Please sign in to comment.