Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#1851: Add options to ttmlir-translate to output C++ from TTKernel dialect #1602

Merged
merged 28 commits into from
Jan 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
e5406a7
initial check-in
TT-billteng Dec 15, 2024
74e12fd
use existing passes to translate kernel to C++
TT-billteng Dec 16, 2024
e48c6c1
fix typo in filename
TT-billteng Dec 16, 2024
850d371
fix compile error
TT-billteng Dec 16, 2024
92267fb
CMake setup and fixing errors for TTKernelToCpp
TT-billteng Dec 16, 2024
aa1133e
remove unused function parameter and fix error msg
TT-billteng Dec 16, 2024
f178591
add TTKernel translate MLIR testcases
TT-billteng Dec 18, 2024
947004e
rename emitDispatchOpRegionAsCpp function as it's not specific to Dis…
TT-billteng Dec 19, 2024
4e6ebf3
register TT dialect
TT-billteng Dec 20, 2024
c83e19c
emit C++ with metal kernel boilerplate
TT-billteng Dec 20, 2024
cf0474e
cleanup
TT-billteng Dec 21, 2024
ee4776c
add python bindings to generate C++ for Noc and Tensix kernels
TT-billteng Dec 28, 2024
ae8a70a
comment out printer
TT-billteng Dec 28, 2024
1a27f16
fix linker error
TT-billteng Dec 28, 2024
63b6978
update ttmlir-translate options in mlir test files
TT-billteng Dec 28, 2024
e23a520
remove KernelConfigInterface member variable
TT-billteng Dec 28, 2024
1b5033a
remove debug code
TT-billteng Dec 28, 2024
4f96d52
remove python pass, change deferred to a later PR
TT-billteng Dec 28, 2024
c8729d8
cleanup
TT-billteng Dec 28, 2024
726d1da
clang format
TT-billteng Jan 1, 2025
052025d
use smallvector
TT-billteng Jan 13, 2025
970fefb
change name of mlir translate options
TT-billteng Jan 13, 2025
dcf1a7e
add filecheck statements
TT-billteng Jan 14, 2025
1e0a8c6
include header rather than fwd declare
TT-billteng Jan 14, 2025
63d5cbb
fix header order and add bug # to TODO
TT-billteng Jan 16, 2025
d6cba0b
reorder headers
TT-billteng Jan 16, 2025
cf69651
reorder headers
TT-billteng Jan 16, 2025
d81f756
fix headers
TT-billteng Jan 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions include/ttmlir/Conversion/TTKernelToEmitC/TTKernelToEmitC.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,25 @@ namespace mlir::tt {
// Runs a conversion pass to EmitC dialect on a func op containing given
// region's body. Also, it adds boilerplate code such as includes and namespace
// declarations.
LogicalResult convertTTKernelRegionToEmitC(
OpBuilder &builder, Region *region,
const ttkernel::KernelConfigInterface &kernelConfig);
LogicalResult
convertTTKernelRegionToEmitC(OpBuilder &builder, Region *region,
const ttkernel::ThreadType &threadType);

// Converts given region to EmitC dialect and translates it to C++ code.
LogicalResult
emitDispatchOpRegionAsCpp(Region *region, std::string &regionCpp,
const ttkernel::KernelConfigInterface &kernelConfig);
LogicalResult emitOpRegionAsCpp(Region *region, std::string &regionCpp,
const ttkernel::ThreadType &threadType);

LogicalResult emitOpRegionAsCpp(Region *region, llvm::raw_ostream &os,
const ttkernel::ThreadType &threadType);

// Converts dispatch op's regions to C++ code.
LogicalResult
emitDispatchOpRegionsAsCpp(ttmetal::DispatchOp dispatchOp,
llvm::SmallVector<std::string> &cppStrings);

LogicalResult emitKernelAsCpp(mlir::ModuleOp op, llvm::raw_ostream &os,
const ttkernel::ThreadType &threadType);

} // namespace mlir::tt

#endif
21 changes: 21 additions & 0 deletions include/ttmlir/Target/TTKernel/TTKernelToCpp.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
//
// SPDX-License-Identifier: Apache-2.0

#ifndef TTMLIR_TARGET_TTKERNEL_TTKERNELTOCPP_H
#define TTMLIR_TARGET_TTKERNEL_TTKERNELTOCPP_H

#include "ttmlir/Dialect/TTKernel/IR/TTKernelOpsTypes.h"
#include <mlir/IR/Operation.h>
#include <mlir/Support/LogicalResult.h>

namespace mlir::tt::ttkernel {

// Translates a TTKernel operation to C++ and writes it to the given
// stream.
LogicalResult translateTTKernelToCpp(Operation *op, llvm::raw_ostream &os,
const ThreadType &threadType);

} // namespace mlir::tt::ttkernel

#endif
1 change: 1 addition & 0 deletions lib/Conversion/TTKernelToEmitC/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ add_mlir_conversion_library(TTMLIRTTKernelToEmitC
MLIRIR
MLIRPass
MLIRArithToEmitC
MLIRSCFToEmitC
MLIREmitCDialect
MLIRTargetCpp
MLIRTransformUtils
Expand Down
69 changes: 44 additions & 25 deletions lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -460,16 +460,16 @@ std::unique_ptr<::mlir::Pass> createConvertTTKernelToEmitC() {
class ThreadConfigHelper {
public:
ThreadConfigHelper(OpBuilder *builder, Location loc,
ttkernel::KernelConfigInterface kernelConfig)
: builder(builder), loc(loc), kernelConfig(kernelConfig) {
ttkernel::ThreadType threadType)
: builder(builder), loc(loc), threadType(threadType) {
builder->create<emitc::IncludeOp>(loc, "cstdint",
/*isStandard=*/true);
if (kernelConfig.getThreadType() == ttkernel::ThreadType::Noc) {
if (threadType == ttkernel::ThreadType::Noc) {

builder->create<emitc::IncludeOp>(loc, "dataflow_api.h",
/*isStandard=*/false);
}
if (kernelConfig.getThreadType() == ttkernel::ThreadType::Tensix) {
if (threadType == ttkernel::ThreadType::Tensix) {
builder->create<emitc::IncludeOp>(loc, "llk_defs.h",
/*isStandard=*/false);
builder->create<emitc::IncludeOp>(loc, "compute_kernel_api/common.h",
Expand Down Expand Up @@ -514,7 +514,7 @@ class ThreadConfigHelper {
}

~ThreadConfigHelper() {
if (kernelConfig.getThreadType() == ttkernel::ThreadType::Tensix) {
if (threadType == ttkernel::ThreadType::Tensix) {
builder->create<emitc::VerbatimOp>(loc, "void MAIN { kernel_main(); }");
builder->create<emitc::VerbatimOp>(loc,
"}"); // close namespace NAMESPACE
Expand All @@ -524,14 +524,13 @@ class ThreadConfigHelper {
private:
OpBuilder *builder;
Location loc;
ttkernel::KernelConfigInterface kernelConfig;
ttkernel::ThreadType threadType;
};

LogicalResult convertTTKernelRegionToEmitC(
OpBuilder &builder, Region *region,
const ttkernel::KernelConfigInterface &kernelConfig) {
ThreadConfigHelper threadConfigHelper(&builder, region->getLoc(),
kernelConfig);
LogicalResult
convertTTKernelRegionToEmitC(OpBuilder &builder, Region *region,
const ttkernel::ThreadType &threadType) {
ThreadConfigHelper threadConfigHelper(&builder, region->getLoc(), threadType);

auto funcOp = builder.create<func::FuncOp>(
region->getLoc(), "kernel_main",
Expand All @@ -550,22 +549,33 @@ LogicalResult convertTTKernelRegionToEmitC(
return success();
}

LogicalResult
emitDispatchOpRegionAsCpp(Region *region, std::string &regionCpp,
const ttkernel::KernelConfigInterface &kernelConfig) {
OpBuilder builder(region->getContext());
LogicalResult emitOpRegionAsCpp(Region *region, std::string &regionCpp,
const ttkernel::ThreadType &threadType) {

llvm::raw_string_ostream os(regionCpp);
return emitOpRegionAsCpp(region, os, threadType);
}

LogicalResult emitOpRegionAsCpp(Region *region, llvm::raw_ostream &os,
const ttkernel::ThreadType &threadType) {

// We must load the EmitC dialect before we can emit any EmitC code. This
// dialect won't be loaded by MLIR until pass manager starts a pass that
// depends on it. Because we want to emit EmitC code before that, we need to
// load it here.
region->getContext()->getOrLoadDialect<emitc::EmitCDialect>();

OpBuilder builder(region->getContext());
// We will wrap everything in a module op so that we can run the
// translation.
auto moduleWrapper =
builder.create<mlir::ModuleOp>(region->getLoc(), "module_wrapper");
builder.setInsertionPointToStart(moduleWrapper.getBody());

if (convertTTKernelRegionToEmitC(builder, region, kernelConfig).failed()) {
if (convertTTKernelRegionToEmitC(builder, region, threadType).failed()) {
return failure();
}

llvm::raw_string_ostream os(regionCpp);
if (emitc::translateToCpp(moduleWrapper, os).failed()) {
return failure();
}
Expand All @@ -579,17 +589,11 @@ emitDispatchOpRegionsAsCpp(ttmetal::DispatchOp dispatchOp,
assert(cppStrings.size() == dispatchOp.getNumRegions() &&
"cppStrings size must match number of regions");

// We must load the EmitC dialect before we can emit any EmitC code. This
// dialect won't be loaded by MLIR until pass manager starts a pass that
// depends on it. Because we want to emit EmitC code before that, we need to
// load it here.
dispatchOp.getContext()->getOrLoadDialect<emitc::EmitCDialect>();

for (auto &reg : dispatchOp->getRegions()) {
auto kernelConfig = mlir::cast<ttkernel::KernelConfigInterface>(
dispatchOp.getKernelConfigs()[reg.getRegionNumber()]);
if (emitDispatchOpRegionAsCpp(&reg, cppStrings[reg.getRegionNumber()],
kernelConfig)
if (emitOpRegionAsCpp(&reg, cppStrings[reg.getRegionNumber()],
kernelConfig.getThreadType())
.failed()) {
return llvm::failure();
}
Expand All @@ -598,4 +602,19 @@ emitDispatchOpRegionsAsCpp(ttmetal::DispatchOp dispatchOp,
return success();
}

LogicalResult emitKernelAsCpp(mlir::ModuleOp op, llvm::raw_ostream &os,
const ttkernel::ThreadType &threadType) {
llvm::SmallVector<func::FuncOp, 1> ops;
op->walk([&](func::FuncOp entry) { ops.push_back(entry); });

for (const auto &op : ops) {
for (auto &reg : op->getRegions()) {
if (emitOpRegionAsCpp(&reg, os, threadType).failed()) {
return llvm::failure();
}
}
}
return llvm::success();
}

} // namespace mlir::tt
1 change: 1 addition & 0 deletions lib/SharedLib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ endif()
set(TTMLIR_LIBS
TTNNTargetFlatbuffer
TTMetalTargetFlatbuffer
TTKernelTargetCpp
MLIRTTDialect
MLIRTTIRDialect
MLIRTTNNDialect
Expand Down
1 change: 1 addition & 0 deletions lib/Target/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
add_subdirectory(TTMetal)
add_subdirectory(TTNN)
add_subdirectory(TTKernel)
16 changes: 16 additions & 0 deletions lib/Target/TTKernel/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
add_mlir_translation_library(TTKernelTargetCpp
TTKernelToCpp.cpp
TTKernelToCppRegistration.cpp

ADDITIONAL_HEADER_DIRS
${PROJECT_SOURCE_DIR}/include/Target/TTKernel

LINK_LIBS PUBLIC
MLIRTTKernelDialect
TTMLIRTTKernelToEmitC
MLIRFuncToEmitC
MLIRSCFToEmitC
MLIRArithToEmitC
)

target_include_directories(TTKernelTargetCpp PUBLIC ${PROJECT_BINARY_DIR}/include/ttmlir/Target/Common)
31 changes: 31 additions & 0 deletions lib/Target/TTKernel/TTKernelToCpp.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
//
// SPDX-License-Identifier: Apache-2.0

#include "mlir/IR/BuiltinOps.h"
#include "mlir/Pass/PassManager.h"
#include "mlir/Support/LLVM.h"
#include "mlir/Support/LogicalResult.h"
#include "mlir/Target/Cpp/CppEmitter.h"
#include "ttmlir/Conversion/TTKernelToEmitC/TTKernelToEmitC.h"
#include "llvm/Support/LogicalResult.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>

namespace mlir::tt::ttkernel {

static llvm::LogicalResult
translateModuleToCpp(Operation *op, llvm::raw_ostream &os,
const ttkernel::ThreadType &threadType) {

ModuleOp module = dyn_cast<ModuleOp>(op);
assert(module && "Expected ModuleOp as top level operation");
return mlir::tt::emitKernelAsCpp(module, os, threadType);
}

LogicalResult translateTTKernelToCpp(Operation *op, llvm::raw_ostream &os,
const ttkernel::ThreadType &threadType) {
return translateModuleToCpp(op, os, threadType);
}

} // namespace mlir::tt::ttkernel
49 changes: 49 additions & 0 deletions lib/Target/TTKernel/TTKernelToCppRegistration.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
//
// SPDX-License-Identifier: Apache-2.0

#include "ttmlir/Dialect/TT/IR/TT.h"
#include "ttmlir/Dialect/TTKernel/IR/TTKernel.h"
#include "ttmlir/Dialect/TTKernel/IR/TTKernelOpsTypes.h"
#include "ttmlir/Target/TTKernel/TTKernelToCpp.h"
#include <mlir/Dialect/EmitC/IR/EmitC.h>
#include <mlir/Dialect/Func/IR/FuncOps.h>
#include <mlir/Dialect/MemRef/IR/MemRef.h>
#include <mlir/Dialect/SCF/IR/SCF.h>
#include <mlir/Tools/mlir-translate/Translation.h>
using namespace mlir;

namespace mlir::tt::ttkernel {

// TODO(bug #1874): Should generalize this to read kernel type from Attribute?
void registerTensixKernelToCpp() {
TranslateFromMLIRRegistration reg(
"ttkernel-to-cpp-tensix", "translate tensix kernel to C++",
[](Operation *op, llvm::raw_ostream &os) -> LogicalResult {
return translateTTKernelToCpp(op, os, tt::ttkernel::ThreadType::Tensix);
},
[](DialectRegistry &registry) {
registry
.insert<mlir::scf::SCFDialect, mlir::tt::ttkernel::TTKernelDialect,
mlir::arith::ArithDialect, mlir::emitc::EmitCDialect,
mlir::func::FuncDialect, mlir::tt::TTDialect,
mlir::memref::MemRefDialect>();
});
}

void registerNocKernelToCpp() {
TranslateFromMLIRRegistration reg(
"ttkernel-to-cpp-noc", "translate noc kernel to C++",
[](Operation *op, llvm::raw_ostream &os) -> LogicalResult {
return translateTTKernelToCpp(op, os, tt::ttkernel::ThreadType::Noc);
},
[](DialectRegistry &registry) {
registry
.insert<mlir::scf::SCFDialect, mlir::tt::ttkernel::TTKernelDialect,
mlir::arith::ArithDialect, mlir::emitc::EmitCDialect,
mlir::func::FuncDialect, mlir::tt::TTDialect,
mlir::memref::MemRefDialect>();
});
}

} // namespace mlir::tt::ttkernel
30 changes: 30 additions & 0 deletions test/ttmlir/Translate/TTKernel/ttkernel_noc.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// RUN: ttmlir-translate --ttkernel-to-cpp-noc %s | FileCheck %s

// CHECK: #include "dataflow_api.h"
// CHECK: void kernel_main
func.func @ttkernel_noc() -> () {
// CHECK: int32_t [[B0:.*]] = 262432
%c262432_i32 = arith.constant 262432 : i32
// CHECK: int32_t [[B1:.*]] = 262208
%c262208_i32 = arith.constant 262208 : i32
// CHECK: int32_t [[C0:.*]] = 32
%c32_i32 = arith.constant 32 : i32
// CHECK: int32_t [[C1:.*]] = 262400
%c262400_i32 = arith.constant 262400 : i32
// CHECK: int32_t [[A0:.*]] = 0
%c0_i32 = arith.constant 0 : i32
// CHECK: int32_t [[A1:.*]] = 262144;
%c262144_i32 = arith.constant 262144 : i32
// CHECK: int64_t [[NOCADDR0:.*]] = get_noc_addr([[A0]], [[A0]], [[A1]])
%3 = "ttkernel.get_noc_addr_xy"(%c0_i32, %c0_i32, %c262144_i32) : (i32, i32, i32) -> !ttkernel.noc_addr
// CHECK: noc_async_read([[NOCADDR0]], [[C1]], [[C0]])
"ttkernel.noc_async_read"(%3, %c262400_i32, %c32_i32) : (!ttkernel.noc_addr, i32, i32) -> ()
// CHECK: int64_t [[NOCADDR1:.*]] = get_noc_addr([[A0]], [[A0]], [[B1]])
%4 = "ttkernel.get_noc_addr_xy"(%c0_i32, %c0_i32, %c262208_i32) : (i32, i32, i32) -> !ttkernel.noc_addr
// CHECK: noc_async_read([[NOCADDR1]], [[B0]], [[C0]])
"ttkernel.noc_async_read"(%4, %c262432_i32, %c32_i32) : (!ttkernel.noc_addr, i32, i32) -> ()
// CHECK: noc_async_read_barrier
"ttkernel.noc_async_read_barrier"() : () -> ()
// CHECK: return
"ttkernel.return"() : () -> ()
}
31 changes: 31 additions & 0 deletions test/ttmlir/Translate/TTKernel/ttkernel_tensix.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// RUN: ttmlir-translate --ttkernel-to-cpp-tensix %s | FileCheck %s

#l1_ = #tt.memory_space<l1>

// CHECK: void kernel_main
func.func @ttkernel_tensix(%arg1: !ttkernel.cb<cb_in0, 294912, memref<2x4x!tt.tile<32x32, f32>, #l1_>, 4096, 1>,
%arg2: !ttkernel.cb<cb_out0, 327680, memref<64x128xf32, #l1_>, 4096, 1>) -> () {

// CHECK: ::tt::CB [[CBIN0:.*]] = ::tt::CB::c_in0
// CHECK: ::tt::CB [[CBIN0ARG:.*]] = [[CBIN0]]
// CHECK: ::tt::CB [[CBOUT0:.*]] = ::tt::CB::c_out0
// CHECK: ::tt::CB [[CBOUT0ARG:.*]] = [[CBOUT0]]
// CHECK: int32_t [[C:.*]] = 4
%c4_i32 = arith.constant 4 : i32
// CHECK: untilize_init([[CBIN0ARG]], [[CBOUT0ARG]])
"ttkernel.untilize_init"(%arg1, %arg2) : (!ttkernel.cb<cb_in0, 294912, memref<2x4x!tt.tile<32x32, f32>, #l1_>, 4096, 1>, !ttkernel.cb<cb_out0, 327680, memref<64x128xf32, #l1_>, 4096, 1>) -> ()
// CHECK: untilize_block([[CBIN0ARG]], [[C]], [[CBOUT0ARG]])
"ttkernel.untilize_block"(%arg1, %c4_i32, %arg2) : (!ttkernel.cb<cb_in0, 294912, memref<2x4x!tt.tile<32x32, f32>, #l1_>, 4096, 1>, i32, !ttkernel.cb<cb_out0, 327680, memref<64x128xf32, #l1_>, 4096, 1>) -> ()
// CHECK: cb_pop_front([[CBIN0ARG]], [[C]])
"ttkernel.cb_pop_front"(%arg1, %c4_i32) : (!ttkernel.cb<cb_in0, 294912, memref<2x4x!tt.tile<32x32, f32>, #l1_>, 4096, 1>, i32) -> ()
// CHECK: cb_push_back([[CBOUT0ARG]], [[C]])
"ttkernel.cb_push_back"(%arg2, %c4_i32) : (!ttkernel.cb<cb_out0, 327680, memref<64x128xf32, #l1_>, 4096, 1>, i32) -> ()
// CHECK: untilize_block([[CBIN0ARG]], [[C]], [[CBOUT0ARG]])
"ttkernel.untilize_block"(%arg1, %c4_i32, %arg2) : (!ttkernel.cb<cb_in0, 294912, memref<2x4x!tt.tile<32x32, f32>, #l1_>, 4096, 1>, i32, !ttkernel.cb<cb_out0, 327680, memref<64x128xf32, #l1_>, 4096, 1>) -> ()
// CHECK: cb_pop_front([[CBIN0ARG]], [[C]])
"ttkernel.cb_pop_front"(%arg1, %c4_i32) : (!ttkernel.cb<cb_in0, 294912, memref<2x4x!tt.tile<32x32, f32>, #l1_>, 4096, 1>, i32) -> ()
// CHECK: cb_push_back([[CBOUT0ARG]], [[C]])
"ttkernel.cb_push_back"(%arg2, %c4_i32) : (!ttkernel.cb<cb_out0, 327680, memref<64x128xf32, #l1_>, 4096, 1>, i32) -> ()
// CHECK: return
"ttkernel.return"() : () -> ()
}
8 changes: 8 additions & 0 deletions tools/ttmlir-translate/ttmlir-translate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,19 @@ namespace mlir::tt::ttmetal {
void registerTTMetalToFlatbuffer();
} // namespace mlir::tt::ttmetal

namespace mlir::tt::ttkernel {
void registerTensixKernelToCpp();
void registerNocKernelToCpp();
} // namespace mlir::tt::ttkernel

// Place to register all the custom translations
static void registerCustomTranslations() {
static bool initOnce = []() {
mlir::tt::ttnn::registerTTNNToFlatbuffer();
mlir::tt::ttmetal::registerTTMetalToFlatbuffer();
mlir::tt::ttkernel::registerNocKernelToCpp();
mlir::tt::ttkernel::registerTensixKernelToCpp();

return true;
}();
(void)initOnce;
Expand Down
Loading