Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Zero padding on MemTiles #1874

Merged
merged 82 commits into from
Nov 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
82 commits
Select commit Hold shift + click to select a range
7ed306d
Zero Padding python binding
pvasireddy-amd Sep 25, 2024
266b50b
Padding at python level
pvasireddy-amd Sep 25, 2024
47dfc0a
Connecting padding from fifo to dmabd
pvasireddy-amd Sep 25, 2024
221507e
Padding
pvasireddy-amd Sep 25, 2024
d7585b8
Slight adjustment
pvasireddy-amd Sep 26, 2024
ae7dc3c
Test cases
pvasireddy-amd Sep 26, 2024
c020094
Small changes
pvasireddy-amd Oct 1, 2024
31a8a3a
Merge branch 'main' of https://github.com/Xilinx/mlir-aie into zero_pad
pvasireddy-amd Oct 1, 2024
13ebe8b
Revert changes
pvasireddy-amd Oct 1, 2024
5b39c9e
Runtime zero padding on MemTile
pvasireddy-amd Oct 1, 2024
496d3a2
Padding through dma-tasks-to-npu
pvasireddy-amd Oct 9, 2024
468c195
ObjectFifo example
pvasireddy-amd Oct 9, 2024
28e5ea0
Example code and DmaTasksToNpu
pvasireddy-amd Oct 16, 2024
fced7a9
Removed comment
pvasireddy-amd Oct 16, 2024
8867b5c
Merging main
pvasireddy-amd Oct 23, 2024
613617f
clang format
pvasireddy-amd Oct 23, 2024
e05429b
Python format
pvasireddy-amd Oct 23, 2024
6e40239
Remove unnecessary changes
pvasireddy-amd Oct 23, 2024
58686f5
Clang format
pvasireddy-amd Oct 23, 2024
6302047
Missing comma
pvasireddy-amd Oct 23, 2024
30b4f9c
Python format
pvasireddy-amd Oct 23, 2024
e8b3bbb
Python format
pvasireddy-amd Oct 23, 2024
6b5e0d4
Python format
pvasireddy-amd Oct 23, 2024
79348c3
Extra case
pvasireddy-amd Oct 25, 2024
af6ab71
Merge branch 'main' into zero_pad
pvasireddy-amd Oct 25, 2024
7db1506
Run command
pvasireddy-amd Oct 25, 2024
7c0a256
Merge branch 'zero_pad' of https://github.com/Xilinx/mlir-aie into ze…
pvasireddy-amd Oct 25, 2024
7975a17
Example code
pvasireddy-amd Oct 25, 2024
5f791fd
Push the new change
pvasireddy-amd Oct 25, 2024
6e13cbb
Add zero padding to writebd in trace
pvasireddy-amd Oct 25, 2024
5855171
Update writebd in tests
pvasireddy-amd Oct 25, 2024
f6ce5c7
Test case
pvasireddy-amd Oct 25, 2024
6ac9524
Message error
pvasireddy-amd Oct 25, 2024
ac5b356
Adding D2Size parameter
pvasireddy-amd Oct 31, 2024
7b76da3
Empty padDims on MemTile MM2S channel
pvasireddy-amd Oct 31, 2024
743cbe2
clang-format
pvasireddy-amd Oct 31, 2024
5adf8bf
clang-format
pvasireddy-amd Oct 31, 2024
e75688d
Reveert changes
pvasireddy-amd Oct 31, 2024
94229b9
Changes to tests with D2Size
pvasireddy-amd Oct 31, 2024
1a4ddb2
Merge branch 'main' of https://github.com/Xilinx/mlir-aie into zero_pad
pvasireddy-amd Oct 31, 2024
71722af
D2Size
pvasireddy-amd Oct 31, 2024
5e12917
Checking syntax
pvasireddy-amd Nov 1, 2024
e5dd194
Revert change
pvasireddy-amd Nov 1, 2024
e872b36
Merge branch 'main' into zero_pad
pvasireddy-amd Nov 1, 2024
9bc6d07
Checking the pad dimensions:
pvasireddy-amd Nov 5, 2024
c048721
Update lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp
pvasireddy-amd Nov 5, 2024
98feeff
Merge branch 'main' into zero_pad
pvasireddy-amd Nov 6, 2024
b14062f
padDimensions check
pvasireddy-amd Nov 12, 2024
feb0b72
Merge branch 'main' of https://github.com/Xilinx/mlir-aie into zero_pad
pvasireddy-amd Nov 12, 2024
404fe65
Adding empty padDimensions
pvasireddy-amd Nov 12, 2024
cb789f3
Adding empty padDimensions
pvasireddy-amd Nov 12, 2024
f8ade08
Missed change
pvasireddy-amd Nov 12, 2024
42a8190
Change in AIEDialect
pvasireddy-amd Nov 12, 2024
844f68c
Revert "Change in AIEDialect"
pvasireddy-amd Nov 12, 2024
270009a
Attempt #1
pvasireddy-amd Nov 12, 2024
cbab8e9
Removing empty padDims
pvasireddy-amd Nov 12, 2024
970841d
Change order
pvasireddy-amd Nov 12, 2024
162851e
Formatting
pvasireddy-amd Nov 12, 2024
8f2fd4f
Formatting
pvasireddy-amd Nov 12, 2024
7e113c7
Number of dimensions greater than padDims
pvasireddy-amd Nov 12, 2024
7033cff
Adding misisng parameters to writebd in test cases
pvasireddy-amd Nov 12, 2024
3b32d85
Remove duplicate check
pvasireddy-amd Nov 12, 2024
c6531ca
More changes
pvasireddy-amd Nov 12, 2024
c68fa02
Attempt 1
pvasireddy-amd Nov 12, 2024
f198ee2
Attempt 2
pvasireddy-amd Nov 12, 2024
4b1c3d4
Attempt 3
pvasireddy-amd Nov 12, 2024
e1888e0
Attempt 4
pvasireddy-amd Nov 12, 2024
6b883b3
Change to error message
pvasireddy-amd Nov 12, 2024
3c2f0a8
Small change
pvasireddy-amd Nov 12, 2024
9b2f703
Remove unnecessary
pvasireddy-amd Nov 12, 2024
2ef6ad5
Attempt 5
pvasireddy-amd Nov 13, 2024
f57558c
Small change
pvasireddy-amd Nov 13, 2024
6be2718
Attempt 6
pvasireddy-amd Nov 13, 2024
250fa6b
Passed locally
pvasireddy-amd Nov 13, 2024
27c3bd5
Missed few zero parameters in test case
pvasireddy-amd Nov 13, 2024
5ac99f4
Merge branch 'main' of https://github.com/Xilinx/mlir-aie into zero_pad
pvasireddy-amd Nov 13, 2024
048d58c
Merging main into current branch
pvasireddy-amd Nov 14, 2024
a90f17f
Cleanup
pvasireddy-amd Nov 15, 2024
3116714
Formatting
pvasireddy-amd Nov 15, 2024
8bfc435
Resolving conflicts
pvasireddy-amd Nov 15, 2024
ce5380e
Removing comments
pvasireddy-amd Nov 15, 2024
75cd604
Merge branch 'main' into zero_pad
pvasireddy-amd Nov 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions include/aie/Dialect/AIE/IR/AIEOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -1677,7 +1677,8 @@ def AIE_ObjectFifoCreateOp: AIE_Op<"objectfifo", [HasParent<"DeviceOp">, Symbol]
OptionalAttr<AIEI32Attr>:$via_shared_mem,
// repeat_count==1 means "do it once"
OptionalAttr<ConfinedAttr<AIEI32Attr, [IntMinValue<1>]>>:$repeat_count,
InitValuesArrayAttr:$initValues
InitValuesArrayAttr:$initValues,
OptionalAttr<BDPadLayoutArrayAttr>:$padDimensions
);

let assemblyFormat = [{
Expand Down Expand Up @@ -1717,7 +1718,8 @@ def AIE_ObjectFifoCreateOp: AIE_Op<"objectfifo", [HasParent<"DeviceOp">, Symbol]
OpBuilder<(ins "mlir::StringAttr":$sym_name, "mlir::Value":$producerTile,
"mlir::ValueRange":$consumerTiles, "mlir::Attribute":$elemNumber, "mlir::Type":$elem_type,
CArg<"llvm::ArrayRef<AIE::BDDimLayoutAttr>", "{}">:$dimensionsToStream,
CArg<"llvm::ArrayRef<AIE::BDDimLayoutArrayAttr>", "{}">:$dimensionsFromStreamPerConsumer), [{
CArg<"llvm::ArrayRef<AIE::BDDimLayoutArrayAttr>", "{}">:$dimensionsFromStreamPerConsumer,
CArg<"llvm::ArrayRef<AIE::BDPadLayoutArrayAttr>", "{}">:$padDimensions), [{
odsState.addOperands(producerTile);
odsState.addOperands(consumerTiles);
odsState.addAttribute(getSymNameAttrName(odsState.name), sym_name);
Expand Down
17 changes: 15 additions & 2 deletions include/aie/Dialect/AIEX/IR/AIEX.td
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,13 @@ def AIE_NpuDmaMemcpyNdOp: AIEX_Op<"npu.dma_memcpy_nd", [
OptionalAttr<PacketInfoAttr>:$packet,
FlatSymbolRefAttr:$metadata,
I64Attr:$id,
DefaultValuedOptionalAttr<BoolAttr, "false">:$issue_token
DefaultValuedOptionalAttr<BoolAttr, "false">:$issue_token,
DefaultValuedOptionalAttr<I64Attr, "0">:$d0_zero_before,
DefaultValuedOptionalAttr<I64Attr, "0">:$d1_zero_before,
DefaultValuedOptionalAttr<I64Attr, "0">:$d2_zero_before,
DefaultValuedOptionalAttr<I64Attr, "0">:$d0_zero_after,
DefaultValuedOptionalAttr<I64Attr, "0">:$d1_zero_after,
DefaultValuedOptionalAttr<I64Attr, "0">:$d2_zero_after
);

let assemblyFormat = [{
Expand Down Expand Up @@ -828,6 +834,7 @@ def AIE_NpuWriteBdOp: AIEX_Op<"npu.writebd", []> {
I32Attr:$d0_stride,
I32Attr:$d1_size,
I32Attr:$d1_stride,
I32Attr:$d2_size,
I32Attr:$d2_stride,
I32Attr:$iteration_current,
I32Attr:$iteration_size,
Expand All @@ -840,7 +847,13 @@ def AIE_NpuWriteBdOp: AIEX_Op<"npu.writebd", []> {
I32Attr:$lock_rel_id,
I32Attr:$lock_acq_enable,
I32Attr:$lock_acq_val,
I32Attr:$lock_acq_id
I32Attr:$lock_acq_id,
I32Attr:$d0_zero_before,
I32Attr:$d1_zero_before,
I32Attr:$d2_zero_before,
I32Attr:$d0_zero_after,
I32Attr:$d1_zero_after,
I32Attr:$d2_zero_after
);
let results = (outs );
let assemblyFormat = [{ attr-dict }];
Expand Down
4 changes: 2 additions & 2 deletions lib/Dialect/AIE/IR/AIEDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1941,11 +1941,11 @@ LogicalResult DMABDOp::verify() {
if (!dims.has_value())
return emitOpError() << "Padding requires n-d data layouts expressed as"
<< " wrap(s) and stride(s).";
if (!targetModel.isMemTile(parentTileId.col, parentTileId.row))
return emitOpError() << "Padding is only supported by memtile dma bds.";
if (dims->size() != paddims->size())
return emitOpError() << "Mismatch number of dimensions between padding(s)"
<< " and wrap(s) and stride(s).";
if (!targetModel.isMemTile(parentTileId.col, parentTileId.row))
return emitOpError() << "Padding is only supported by memtile dma bds.";
int actuallen = 1;
for (unsigned i = 0; i < paddims->size(); i++) {
auto dim = (*dims)[i];
Expand Down
39 changes: 25 additions & 14 deletions lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -513,14 +513,19 @@ struct AIEObjectFifoStatefulTransformPass
void createBd(OpBuilder &builder, LockOp acqLock, int acqMode,
LockAction acqLockAction, LockOp relLock, int relMode,
MyOp buff, int offset, int len, Block *succ,
BDDimLayoutArrayAttr dims) {
BDDimLayoutArrayAttr dims, BDPadLayoutArrayAttr padDimensions) {
if (acqLock)
builder.create<UseLockOp>(builder.getUnknownLoc(), acqLock, acqLockAction,
acqMode);
if (!dims.getValue().empty())

if (!dims.getValue().empty() && padDimensions) {
builder.create<DMABDOp>(builder.getUnknownLoc(), buff, offset, len, dims,
padDimensions);
} else if (!dims.getValue().empty()) {
builder.create<DMABDOp>(builder.getUnknownLoc(), buff, offset, len, dims);
else
} else {
builder.create<DMABDOp>(builder.getUnknownLoc(), buff, offset, len);
}
if (acqLock)
builder.create<UseLockOp>(builder.getUnknownLoc(), relLock,
LockAction::Release, relMode);
Expand All @@ -534,7 +539,8 @@ struct AIEObjectFifoStatefulTransformPass
void createBdBlock(OpBuilder &builder, ObjectFifoCreateOp op, int lockMode,
int acqNum, int relNum, MyOp buff, int offset, int len,
DMAChannelDir channelDir, size_t blockIndex, Block *succ,
BDDimLayoutArrayAttr dims) {
BDDimLayoutArrayAttr dims,
BDPadLayoutArrayAttr padDimensions) {
LockOp acqLock;
LockOp relLock;
int acqMode = 1;
Expand All @@ -559,20 +565,23 @@ struct AIEObjectFifoStatefulTransformPass
}
}
createBd(builder, acqLock, acqMode, acqLockAction, relLock, relMode, buff,
offset, len, succ, dims);
offset, len, succ, dims, padDimensions);
}

/// Function that either calls createAIETileDMA(), createShimDMA() or
/// createMemTileDMA() based on op tile row value.
void createDMA(DeviceOp &device, OpBuilder &builder, ObjectFifoCreateOp op,
DMAChannelDir channelDir, int channelIndex, int lockMode,
BDDimLayoutArrayAttr dims) {
BDDimLayoutArrayAttr dims, BDPadLayoutArrayAttr pad_dims) {
if (op.getProducerTileOp().isShimTile()) {
createShimDMA(device, builder, op, channelDir, channelIndex, lockMode,
dims);
} else if (op.getProducerTileOp().isMemTile()) {
BDPadLayoutArrayAttr padDims = nullptr;
if (channelDir == DMAChannelDir::MM2S && pad_dims)
padDims = pad_dims;
createMemTileDMA(device, builder, op, channelDir, channelIndex, lockMode,
dims);
dims, padDims);
} else {
createAIETileDMA(device, builder, op, channelDir, channelIndex, lockMode,
dims);
Expand Down Expand Up @@ -669,7 +678,7 @@ struct AIEObjectFifoStatefulTransformPass
builder.setInsertionPointToStart(curr);
createBdBlock<BufferOp>(builder, target, lockMode, acqNum, relNum,
buffersPerFifo[target][blockIndex], /*offset*/ 0,
len, channelDir, blockIndex, succ, dims);
len, channelDir, blockIndex, succ, dims, nullptr);
curr = succ;
blockIndex++;
}
Expand Down Expand Up @@ -745,7 +754,7 @@ struct AIEObjectFifoStatefulTransformPass
createBdBlock<ExternalBufferOp>(builder, op, lockMode, acqNum, relNum,
externalBuffersPerFifo[op][blockIndex],
/*offset*/ 0, len, channelDir, blockIndex,
succ, dims);
succ, dims, nullptr);
curr = succ;
blockIndex++;
}
Expand All @@ -756,7 +765,8 @@ struct AIEObjectFifoStatefulTransformPass
void createMemTileDMA(DeviceOp &device, OpBuilder &builder,
ObjectFifoCreateOp op, DMAChannelDir channelDir,
int channelIndex, int lockMode,
BDDimLayoutArrayAttr dims) {
BDDimLayoutArrayAttr dims,
BDPadLayoutArrayAttr padDimensions) {
size_t numBlocks = op.size();
if (numBlocks == 0)
return;
Expand Down Expand Up @@ -898,7 +908,8 @@ struct AIEObjectFifoStatefulTransformPass
offset = extraOffset;
createBdBlock<BufferOp>(builder, target, lockMode, acqNum, relNum,
buffersPerFifo[target][blockIndex], offset,
lenOut, channelDir, blockIndex, succ, dims);
lenOut, channelDir, blockIndex, succ, dims,
padDimensions);
curr = succ;
blockIndex++;
}
Expand Down Expand Up @@ -1361,7 +1372,6 @@ struct AIEObjectFifoStatefulTransformPass
auto consumerWireType = WireBundle::DMA;
std::set<TileOp>
objectFifoTiles; // track cores to check for loops during unrolling

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typo?

//===------------------------------------------------------------------===//
// Split objectFifos into a consumer end and producer end if needed
//===------------------------------------------------------------------===//
Expand Down Expand Up @@ -1511,7 +1521,8 @@ struct AIEObjectFifoStatefulTransformPass
DMAChannel producerChan =
dmaAnalysis.getMasterDMAChannel(producer.getProducerTile());
createDMA(device, builder, producer, producerChan.direction,
producerChan.channel, 0, producer.getDimensionsToStreamAttr());
producerChan.channel, 0, producer.getDimensionsToStreamAttr(),
producer.getPadDimensionsAttr());
// generate objectFifo allocation info
builder.setInsertionPoint(&device.getBody()->back());

Expand All @@ -1529,7 +1540,7 @@ struct AIEObjectFifoStatefulTransformPass
BDDimLayoutArrayAttr consumerDims =
consumer.getDimensionsFromStreamPerConsumer()[0];
createDMA(device, builder, consumer, consumerChan.direction,
consumerChan.channel, 1, consumerDims);
consumerChan.channel, 1, consumerDims, nullptr);
// generate objectFifo allocation info
builder.setInsertionPoint(&device.getBody()->back());

Expand Down
7 changes: 7 additions & 0 deletions lib/Dialect/AIEX/IR/AIEXDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,13 @@ LogicalResult AIEX::NpuWriteBdOp::verify() {
return emitOpError("Iteration Size exceeds the [0:63] range.");
if (getIterationStride() > 0xFFFFF)
return emitOpError("Iteration Stride exceeds the [0:1M-1] range.");
if (targetModel.isShimNOCTile(getColumn(), getRow()) && getD2Size() != 0)
return emitOpError("ShimTile only supports 3 dimensions of sizes.");
if (targetModel.isShimNOCTile(getColumn(), getRow()) &&
(getD0ZeroBefore() != 0 || getD0ZeroAfter() != 0 ||
getD1ZeroBefore() != 0 || getD1ZeroAfter() != 0 ||
getD2ZeroBefore() != 0 || getD2ZeroAfter() != 0))
return emitOpError("ShimTile doesn't support zero padding.");
return success();
}

Expand Down
2 changes: 1 addition & 1 deletion lib/Dialect/AIEX/Transforms/AIECtrlPacketToDma.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ struct AIECtrlPacketToDmaPass : AIECtrlPacketToDmaBase<AIECtrlPacketToDmaPass> {
SmallVector<Value>{}, SmallVector<Value>{},
SmallVector<Value>{}, ArrayRef(staticOffsets),
ArrayRef(staticSizes), ArrayRef(staticStrides),
controllerIdPkt, metadata, 0, true);
controllerIdPkt, metadata, 0, true, 0, 0, 0, 0, 0, 0);

auto shimRow = builder.getI32IntegerAttr(0);
auto shimCol = builder.getI32IntegerAttr(col);
Expand Down
65 changes: 59 additions & 6 deletions lib/Dialect/AIEX/Transforms/AIEDMATasksToNPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,8 @@ struct AIEDMATasksToNPUPass : AIEDMATasksToNPUBase<AIEDMATasksToNPUPass> {
}

LogicalResult rewriteSingleBD(OpBuilder &builder, Block &block,
AIE::TileOp &tile) {
AIE::TileOp &tile,
AIE::DMAChannelDir channelDir) {
AIE::DMABDOp bd_op = getBdForBlock(block);
const auto &target_model = AIE::getTargetModel(bd_op);
MemRefType buffer_type = bd_op.getBuffer().getType();
Expand All @@ -237,12 +238,23 @@ struct AIEDMATasksToNPUPass : AIEDMATasksToNPUBase<AIEDMATasksToNPUPass> {
<< len << " bytes falls below minimum hardware transfer unit of "
<< (addr_granularity / 8) << " bytes.";
}

// Process strides/wraps
std::optional<llvm::ArrayRef<AIE::BDDimLayoutAttr>> dims =
bd_op.getDimensions();
llvm::SmallVector<int64_t, 4> sizes = llvm::SmallVector<int64_t, 4>(4, 0);
llvm::SmallVector<int64_t, 4> strides = llvm::SmallVector<int64_t, 4>(4, 0);

// Padding
std::optional<llvm::ArrayRef<AIE::BDPadLayoutAttr>> padDims =
bd_op.getPadDimensions();
llvm::SmallVector<int64_t, 4> padBefore =
llvm::SmallVector<int64_t, 4>(4, 0);
llvm::SmallVector<int64_t, 4> padAfter =
llvm::SmallVector<int64_t, 4>(4, 0);
std::fill(padBefore.begin(), padBefore.end(), 0);
std::fill(padAfter.begin(), padAfter.end(), 0);
int d2size = 0;

if (dims && dims->size() > 0) {
llvm::SmallVector<int64_t, 4> input_sizes =
llvm::SmallVector<int64_t, 4>(4, 1);
Expand All @@ -252,6 +264,7 @@ struct AIEDMATasksToNPUPass : AIEDMATasksToNPUBase<AIEDMATasksToNPUPass> {
return bd_op->emitOpError("At most four data layout transformation "
"dimensions may be provided.");
}

for (size_t i = 0; i < dims->size(); i++) {
// Pass down dimensions in reverse order; in the MLIR, this allows
// us to specify step sizes/wraps in the same order as we would
Expand All @@ -260,6 +273,33 @@ struct AIEDMATasksToNPUPass : AIEDMATasksToNPUBase<AIEDMATasksToNPUPass> {
input_sizes[i] = (*dims)[j].getSize();
input_strides[i] = (*dims)[j].getStride();
}
if (dims->size() > 2) {
d2size = (target_model.isMemTile(tile.getCol(), tile.getRow()))
? (*dims)[2].getSize()
: 0;
}
if (padDims.has_value()) {
if (!target_model.isMemTile(tile.getCol(), tile.getRow()))
return bd_op->emitOpError()
<< "Padding is only supported by memtile dma bds.";
if (padDims->size() > dims->size())
return bd_op->emitOpError()
<< "Mismatch number of dimensions between padding(s)"
<< " and wrap(s) and stride(s).";
if (channelDir == AIE::DMAChannelDir::MM2S) {
for (size_t i = 0; i < padDims->size(); i++) {
int j = padDims->size() - i - 1;
padBefore[i] = (*padDims)[j].getConstPadBefore();
padAfter[i] = (*padDims)[j].getConstPadAfter();
}
for (size_t i = padDims->size(); i < dims->size(); i++) {
padBefore[i] = 0;
padAfter[i] = 0;
}
} else
return bd_op->emitOpError()
<< "supports padding only for MM2S direction on MemTiles.";
}
getHardwareStridesWraps(target_model, buffer_type, input_sizes,
input_strides, sizes, strides);
if (failed(verifyStridesWraps(bd_op, buffer_type, tile.getCol(),
Expand Down Expand Up @@ -290,8 +330,16 @@ struct AIEDMATasksToNPUPass : AIEDMATasksToNPUBase<AIEDMATasksToNPUPass> {
"transfer length, as this is the BD repeat count.";
return failure();
}
} else {
if (padDims && target_model.isMemTile(tile.getCol(), tile.getRow()) &&
channelDir == AIE::DMAChannelDir::MM2S) {
return bd_op->emitOpError()
<< "Padding requires n-d data layouts expressed as "
<< "wrap(s) and stride(s).";
} else if (padDims) {
return bd_op->emitOpError() << "Padding is supported only on MemTiles.";
}
}

// find next BD ID, if any
uint32_t use_next_bd = 0;
uint32_t next_bd_id = 0;
Expand All @@ -306,7 +354,7 @@ struct AIEDMATasksToNPUPass : AIEDMATasksToNPUBase<AIEDMATasksToNPUPass> {
/* TODO: Strides/Wraps */
/*d0_size=*/sizes[0], /*d0_stride=*/strides[0],
/*d1_size=*/sizes[1], /*d1_stride=*/strides[1],
/*d2_stride=*/strides[2],
/*d2_size=*/d2size, /*d2_stride=*/strides[2],
/*iteration_current=*/0, /*iteration_size=*/sizes[3],
/*iteration_stride=*/strides[3],
/* TODO: Next BD */
Expand All @@ -316,7 +364,10 @@ struct AIEDMATasksToNPUPass : AIEDMATasksToNPUBase<AIEDMATasksToNPUPass> {
/*valid_bd=*/1,
/* TODO: Locks */
/*lock_rel_val=*/0, /*lock_rel_id=*/0, /*lock_acq_enable=*/0,
/*lock_acq_val=*/0, /*lock_ackq_id=*/0);
/*lock_acq_val=*/0, /*lock_ackq_id=*/0, /*d0_zero_before=*/padBefore[0],
/*d1_zero_before=*/padBefore[1], /*d2_zero_before=*/padBefore[2],
/*d0_zero_after=*/padAfter[0], /*d1_zero_after=*/padAfter[1],
/*d2_zero_after=*/padAfter[2]);

return setAddressForSingleBD(builder, bd_op, tile);
}
Expand Down Expand Up @@ -392,13 +443,15 @@ struct AIEDMATasksToNPUPass : AIEDMATasksToNPUBase<AIEDMATasksToNPUPass> {
return failure();
}

auto channelDir = op.getDirection();

// Lower all BDs
for (auto it = body.begin(); it != body.end(); ++it) {
Block &block = *it;
if (shouldSkipBlock(block)) {
continue;
}
if (failed(rewriteSingleBD(builder, block, tile))) {
if (failed(rewriteSingleBD(builder, block, tile, channelDir))) {
return failure();
}
}
Expand Down
Loading
Loading