Skip to content

Commit

Permalink
Add eltwise add_tiles and mul_tiles ttkernel dialect
Browse files Browse the repository at this point in the history
This also includes relevant setup HLK calls like:
- TileRegsAcquireOp
- TileRegsCommitOp
- TileRegsWaitOp
- TileRegsReleaseOp
- PackTileOp
- BinaryOpInitCommonOp
- AddTilesInitOp
- MulTilesInitOp
- AddTilesOp
- MulTilesOp

Also added support for lowering SCF through the ttkernel emitc flow.
  • Loading branch information
nsmithtt committed Aug 22, 2024
1 parent 0380447 commit 66b7a20
Show file tree
Hide file tree
Showing 2 changed files with 139 additions and 0 deletions.
111 changes: 111 additions & 0 deletions include/ttmlir/Dialect/TTKernel/IR/TTKernelOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,66 @@ def TTKernel_ReleaseDstOp : TTKernel_Op<"release_dst"> {
}];
}

def TTKernel_TileRegsAcquireOp : TTKernel_Op<"tile_regs_acquire"> {
let summary = "tile_regs_acquire";
let description = [{
Acquire an exclusive lock on the DST register for the MATH thread.
This register is an array of 16 tiles of 32x32 elements each.
This is a blocking function, i.e. this function will wait until the lock is acquired.
}];
}

def TTKernel_TileRegsCommitOp : TTKernel_Op<"tile_regs_commit"> {
let summary = "tile_regs_commit";
let description = [{
Release lock on DST register by MATH thread. The lock had to be previously acquired with tile_regs_acquire.
}];
}

def TTKernel_TileRegsWaitOp : TTKernel_Op<"tile_regs_wait"> {
let summary = "tile_regs_wait";
let description = [{
Acquire an exclusive lock on the DST register for the PACK thread.
It waits for the MATH thread to commit the DST register.
This is a blocking function, i.e. this function will wait until the lock is acquired.
}];
}

def TTKernel_TileRegsReleaseOp : TTKernel_Op<"tile_regs_release"> {
let summary = "tile_regs_release";
let description = [{
Release lock on DST register by PACK thread. The lock had to be previously acquired with tile_regs_wait.
}];
}

def TTKernel_PackTileOp : TTKernel_Op<"pack_tile"> {
let summary = "PackTile op.";
let description = [{
Copies a single tile from the DST register buffer at a specified index to a
specified CB at a given index. For the out_tile_index to be valid for this
call, cb_reserve_back(n) has to be called first to reserve at least some
number n > 0 of tiles in the output CB. out_tile_index = 0 then references
the first tile in the reserved section of the CB, up to index n - 1, which will
then be visible to the consumer in the same order after a cb_push_back call.
The DST register buffer must be in acquired state via *acquire_dst* call.
This call is blocking and is only available on the compute engine.

Each subsequent pack call will increment the write pointer in the cb by single
tile size. The pointer is then again set to a valid position with space for n
reserved tiles by another cb_reserve_back call.

Operates in tandem with functions cb_reserve_back and cb_push_back.

A typical use case is first the producer ensures that there is a number of
tiles available in the buffer via cb_reserve_back, then the producer uses
the pack_tile call to copy a tile from one of DST slots to a slot in
reserved space and finally cb_push_back is called to announce visibility of
the reserved section of the circular buffer to the consumer.
}];

let arguments = (ins I32:$dst_index, TTKernel_CB:$out_cb, I32:$out_index);
}

//===----------------------------------------------------------------------===//
// TTKernel FPU operations
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -123,6 +183,57 @@ def TTKernel_MatmulOp : TTKernel_Op<"matmul"> {
let arguments = (ins I32:$dst_index);
}

def TTKernel_BinaryOpInitCommonOp : TTKernel_Op<"binary_op_init_common"> {
let summary = "Init function for all binary ops";
let description = [{
Followed by the specific init required with an opcode (binrary_op_specific_init).
}];

let arguments = (ins TTKernel_CB:$in0_cb, TTKernel_CB:$in1_cb, TTKernel_CB:$out_cb);
}

def TTKernel_AddTilesInitOp : TTKernel_Op<"add_tiles_init"> {
let summary = "Short init function";
let description = [{
Must be run before add_tiles.
}];

let arguments = (ins TTKernel_CB:$in0_cb, TTKernel_CB:$in1_cb); // FIXME: , BOOL:$acc_to_dst);
}

def TTKernel_AddTilesOp : TTKernel_Op<"add_tiles"> {
let summary = "Add operation";
let description = [{
Performs element-wise addition C=A+B of tiles in two CBs at given indices
and writes the result to the DST register at index dst_tile_index. The DST
register buffer must be in acquired state via *acquire_dst* call. This call
is blocking and is only available on the compute engine.
}];

let arguments = (ins TTKernel_CB:$in0_cb, TTKernel_CB:$in1_cb, I32:$in0_tile_index, I32:$in1_tile_index, I32:$dst_index);
}

def TTKernel_MulTilesInitOp : TTKernel_Op<"mul_tiles_init"> {
let summary = "Short init function";
let description = [{
Must be run before mul_tiles.
}];

let arguments = (ins TTKernel_CB:$in0_cb, TTKernel_CB:$in1_cb);
}

def TTKernel_MulTilesOp : TTKernel_Op<"mul_tiles"> {
let summary = "Mul operation";
let description = [{
Performs element-wise multiplication C=A*B of tiles in two CBs at given
indices and writes the result to the DST register at index dst_tile_index.
The DST register buffer must be in acquired state via *acquire_dst* call.
This call is blocking and is only available on the compute engine.
}];

let arguments = (ins TTKernel_CB:$in0_cb, TTKernel_CB:$in1_cb, I32:$in0_tile_index, I32:$in1_tile_index, I32:$dst_index);
}

//===----------------------------------------------------------------------===//
// TTKernel CB operations
//===----------------------------------------------------------------------===//
Expand Down
28 changes: 28 additions & 0 deletions lib/Dialect/TTMetal/Transforms/KernelsToCpp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
#include "llvm/ADT/ScopeExit.h"

#include "mlir/Conversion/ArithToEmitC/ArithToEmitC.h"
#include "mlir/Conversion/SCFToEmitC/SCFToEmitC.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/EmitC/IR/EmitC.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/Rewrite/FrozenRewritePatternSet.h"
#include "mlir/Support/LogicalResult.h"
Expand Down Expand Up @@ -228,6 +230,9 @@ LogicalResult emitDispatchOpRegionAsCpp(DispatchOp origOp,
builder.create<emitc::IncludeOp>(module.getLoc(),
"compute_kernel_api/untilize.h",
/*isStandard=*/false);
builder.create<emitc::IncludeOp>(module.getLoc(),
"compute_kernel_api/eltwise_binary.h",
/*isStandard=*/false);
}

if (threadTypeAttr.getValue() == ttkernel::ThreadType::Tensix) {
Expand Down Expand Up @@ -264,11 +269,29 @@ LogicalResult emitDispatchOpRegionAsCpp(DispatchOp origOp,
}
}

// Apply scf to emitc conversion next
{
ConversionTarget target(*module.getContext());
target.addLegalDialect<emitc::EmitCDialect>();
target.addIllegalDialect<scf::SCFDialect>();
RewritePatternSet scfPatterns(module.getContext());
populateSCFToEmitCConversionPatterns(scfPatterns);
if (failed(
applyPartialConversion(module, target, std::move(scfPatterns)))) {
return failure();
}
}

TTKernelToEmitCTypeConverter typeConverter(module.getContext());
RewritePatternSet patterns(module.getContext());

patterns.add<TTMetalToEmitCFuncArgsRewriter, TTMetalToEmitCReturnRewriter,
TTMetalToEmitCOpaqueRewriter<ttkernel::BuiltinOp>,
TTMetalToEmitCOpaqueRewriter<ttkernel::TileRegsAcquireOp>,
TTMetalToEmitCOpaqueRewriter<ttkernel::TileRegsCommitOp>,
TTMetalToEmitCOpaqueRewriter<ttkernel::TileRegsWaitOp>,
TTMetalToEmitCOpaqueRewriter<ttkernel::TileRegsReleaseOp>,
TTMetalToEmitCOpaqueRewriter<ttkernel::PackTileOp>,
TTMetalToEmitCOpaqueRewriter<ttkernel::CBPushBackOp>,
TTMetalToEmitCOpaqueRewriter<ttkernel::CBPopFrontOp>,
TTMetalToEmitCOpaqueRewriter<ttkernel::CBReserveBackOp>,
Expand All @@ -277,6 +300,11 @@ LogicalResult emitDispatchOpRegionAsCpp(DispatchOp origOp,
TTMetalToEmitCOpaqueRewriter<ttkernel::UntilizeInitOp>,
TTMetalToEmitCOpaqueRewriter<ttkernel::TilizeBlockOp>,
TTMetalToEmitCOpaqueRewriter<ttkernel::UntilizeBlockOp>,
TTMetalToEmitCOpaqueRewriter<ttkernel::BinaryOpInitCommonOp>,
TTMetalToEmitCOpaqueRewriter<ttkernel::AddTilesInitOp>,
TTMetalToEmitCOpaqueRewriter<ttkernel::MulTilesInitOp>,
TTMetalToEmitCOpaqueRewriter<ttkernel::AddTilesOp>,
TTMetalToEmitCOpaqueRewriter<ttkernel::MulTilesOp>,
TTMetalToEmitCOpaqueRewriter<ttkernel::GetNocAddrOp>,
TTMetalToEmitCOpaqueRewriter<ttkernel::NocAsyncReadOp>,
TTMetalToEmitCOpaqueRewriter<ttkernel::NocAsyncReadBarrierOp>,
Expand Down

0 comments on commit 66b7a20

Please sign in to comment.