From e5406a7bc5c1dcb34a82ef244b9b0a7129089eac Mon Sep 17 00:00:00 2001 From: Bill Teng Date: Sun, 15 Dec 2024 06:23:33 +0000 Subject: [PATCH 01/28] initial check-in --- .../ttmlir/Target/TTKernel/TTKernelToCpp.h | 19 + lib/RegisterAll.cpp | 1 + lib/Target/TTKernel/TTKernelToCpp.cpp | 435 ++++++++++++++++++ .../TTKernel/TTKernelToCppRegristration.cpp | 30 ++ python/Passes.cpp | 18 + tools/ttmlir-translate/ttmlir-translate.cpp | 5 + 6 files changed, 508 insertions(+) create mode 100644 include/ttmlir/Target/TTKernel/TTKernelToCpp.h create mode 100644 lib/Target/TTKernel/TTKernelToCpp.cpp create mode 100644 lib/Target/TTKernel/TTKernelToCppRegristration.cpp diff --git a/include/ttmlir/Target/TTKernel/TTKernelToCpp.h b/include/ttmlir/Target/TTKernel/TTKernelToCpp.h new file mode 100644 index 000000000..665eabdae --- /dev/null +++ b/include/ttmlir/Target/TTKernel/TTKernelToCpp.h @@ -0,0 +1,19 @@ +// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef TTMLIR_TARGET_TTKERNEL_TTKERNELTOCPP_H +#define TTMLIR_TARGET_TTKERNEL_TTKERNELTOCPP_H + +#include "mlir/IR/Operation.h" +#include "mlir/Support/LogicalResult.h" + +namespace mlir::tt::ttkernel { + +// Translates a TTKernel operation to C++ and writes it to the given +// stream. +LogicalResult translateTTKernelToCpp( + Operation *op, llvm::raw_ostream &os); +} // namespace mlir::tt::ttkernel + +#endif diff --git a/lib/RegisterAll.cpp b/lib/RegisterAll.cpp index db8636f07..4a7156279 100644 --- a/lib/RegisterAll.cpp +++ b/lib/RegisterAll.cpp @@ -14,6 +14,7 @@ #include "ttmlir/Dialect/TTIR/Pipelines/TTIRPipelines.h" #include "ttmlir/Dialect/TTIR/Transforms/Passes.h" #include "ttmlir/Dialect/TTKernel/IR/TTKernel.h" +#include "ttmlir/Dialect/TTKernel/Pipelines/TTKernelPipelines.h" #include "ttmlir/Dialect/TTMetal/Pipelines/TTMetalPipelines.h" #include "ttmlir/Dialect/TTMetal/Transforms/Passes.h" #include "ttmlir/Dialect/TTNN/IR/TTNN.h" diff --git a/lib/Target/TTKernel/TTKernelToCpp.cpp b/lib/Target/TTKernel/TTKernelToCpp.cpp new file mode 100644 index 000000000..dddaac0ce --- /dev/null +++ b/lib/Target/TTKernel/TTKernelToCpp.cpp @@ -0,0 +1,435 @@ +// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include +#include +#include + +#include "mlir/Dialect/EmitC/IR/EmitC.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Support/LogicalResult.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/LogicalResult.h" +#include "llvm/Support/raw_ostream.h" + +#include "ttmlir/Conversion/TTKernelToEmitC/TTKernelToEmitC.h" +#include "ttmlir/Dialect/TT/IR/TT.h" +#include "ttmlir/Dialect/TT/IR/TTOpsTypes.h" +#include "ttmlir/Dialect/TTKernel/IR/TTKernel.h" +#include "ttmlir/Dialect/TTKernel/IR/TTKernelOps.h" +#include "ttmlir/Dialect/TTKernel/IR/TTKernelOpsTypes.h" +#include "ttmlir/Dialect/TTMetal/IR/TTMetalOpsTypes.h" +#include "ttmlir/Target/TTMetal/Target.h" +#include "ttmlir/Target/Utils/FlatbufferObjectCache.h" +#include "ttmlir/Target/Utils/MLIRToFlatbuffer.h" +#include "ttmlir/Version.h" +#include "types_generated.h" + +namespace mlir::tt { +flatbuffers::Offset<::tt::target::MemoryDesc> +memrefAttrToFlatbuffer(FlatbufferObjectCache &cache, MemRefType memref, + ::mlir::tt::TensorMemoryLayout memLayout) { + auto shapeInt64 = memref.getShape(); + std::vector shape(shapeInt64.begin(), shapeInt64.end()); + DataType dtype = DataType::Float32; + ::tt::target::Dim2d tileShape(1, 1); + Type elementType = memref.getElementType(); + std::uint64_t elementSize = 0; + if (isa(elementType)) { + auto tileType = mlir::cast(elementType); + dtype = tileType.getDataType(); + tileShape = ::tt::target::Dim2d(tileType.getHeight(), tileType.getWidth()); + elementSize = tileType.getSizeBytes(); + } else { + dtype = elementTypeToDataType(elementType); + elementSize = getElementSizeBytes(dtype); + } + + std::uint64_t size = elementSize; + for (auto dim : shapeInt64) { + size *= dim; + } + + return ::tt::target::CreateMemoryDescDirect( + *cache.fbb, &shape, &tileShape, toFlatbuffer(cache, dtype), + toFlatbuffer( + cache, + mlir::cast(memref.getMemorySpace()).getValue()), + toFlatbuffer(cache, memLayout), size); +} + +flatbuffers::Offset<::tt::target::LayoutDesc> metalLayoutAttrToFlatbuffer( + FlatbufferObjectCache &cache, MetalLayoutAttr metalLayoutAttr, + ArrayRef logicalShape, DeviceAttr deviceAttr) { + auto strideInt64 = metalLayoutAttr.getStride(logicalShape); + std::vector stride(strideInt64.begin(), strideInt64.end()); + auto coreRangeSet = toFlatbuffer(cache, metalLayoutAttr.getGrid(), + deviceAttr.getWorkerGrid()); + return ::tt::target::CreateLayoutDescDirect( + *cache.fbb, &stride, toFlatbuffer(cache, metalLayoutAttr.getOobVal()), + &coreRangeSet, + cache.getOrCreate(metalLayoutAttr.getMemref(), memrefAttrToFlatbuffer, + metalLayoutAttr.getMemLayout())); +} + +} // namespace mlir::tt + +namespace mlir::tt::ttkernel { + +struct CQBuilder { + ::flatbuffers::FlatBufferBuilder *fbb; + const char *name; + std::vector<::flatbuffers::Offset<::tt::target::TensorRef>> inputs; + std::vector<::flatbuffers::Offset<::tt::target::TensorRef>> outputs; + std::vector<::flatbuffers::Offset<::tt::target::metal::Command>> commands; + OpPrintingFlags printFlags; + + CQBuilder(::flatbuffers::FlatBufferBuilder *fbb) : fbb(fbb) { + printFlags = printFlags.elideLargeElementsAttrs() + .elideLargeResourceString() + .skipRegions() + .enableDebugInfo(); + } + + std::string getDebugString(mlir::Operation *op) { + std::string str; + llvm::raw_string_ostream os(str); + op->print(os, printFlags); + return str; + }; + + template + ::flatbuffers::Offset<::tt::target::metal::Command> + appendCommand(::flatbuffers::Offset commandT, mlir::Operation *op) { + auto debugString = getDebugString(op); + commands.push_back(::tt::target::metal::CreateCommandDirect( + *fbb, ::tt::target::metal::CommandTypeTraits::enum_value, + commandT.Union(), debugString.c_str())); + return commands.back(); + } +}; + +::tt::target::MathFidelity toFlatbuffer(ttkernel::MathFidelity mathFidelity) { + switch (mathFidelity) { + case ttkernel::MathFidelity::HiFi4: + return ::tt::target::MathFidelity::HiFi4; + case ttkernel::MathFidelity::HiFi3: + return ::tt::target::MathFidelity::HiFi3; + case ttkernel::MathFidelity::HiFi2: + return ::tt::target::MathFidelity::HiFi2; + case ttkernel::MathFidelity::LoFi: + return ::tt::target::MathFidelity::LoFi; + } + assert(false && "Unsupported MathFidelity"); +} + +std::vector<::tt::target::metal::UnpackToDestMode> +toFlatbuffer(llvm::ArrayRef unpackToDestModes) { + std::vector<::tt::target::metal::UnpackToDestMode> result; + result.reserve(unpackToDestModes.size()); + + for (auto mode : unpackToDestModes) { + switch (mode) { + case ttkernel::UnpackToDestMode::UnpackToDestFp32: + result.push_back(::tt::target::metal::UnpackToDestMode::UnpackToDestFp32); + break; + case ttkernel::UnpackToDestMode::Default: + result.push_back(::tt::target::metal::UnpackToDestMode::Default); + break; + } + } + return result; +} + +::tt::target::metal::EthType toFlatbuffer(ttkernel::EthType ethType) { + switch (ethType) { + case ttkernel::EthType::Sender: + return ::tt::target::metal::EthType::Sender; + case ttkernel::EthType::Receiver: + return ::tt::target::metal::EthType::Receiver; + } + assert(false && "Unsupported EthType"); +} + +::tt::target::metal::NocIndex toFlatbuffer(ttkernel::NocIndex nocIndex) { + switch (nocIndex) { + case ttkernel::NocIndex::Noc0: + return ::tt::target::metal::NocIndex::Noc0; + case ttkernel::NocIndex::Noc1: + return ::tt::target::metal::NocIndex::Noc1; + } + assert(false && "Unsupported NocIndex"); +} + +// Take KernelConfig and return pair of its type and variantized config itself +std::pair<::tt::target::metal::KernelConfig, ::flatbuffers::Offset> +toFlatbuffer(::flatbuffers::FlatBufferBuilder &fbb, + ttkernel::KernelConfigInterface kernelConfig) { + ttkernel::ThreadType threadType = kernelConfig.getThreadType(); + + switch (threadType) { + case ttkernel::ThreadType::Noc: { + auto nocConfigAttr = mlir::dyn_cast(kernelConfig); + auto configType = ::tt::target::metal::KernelConfig::NocConfig; + auto config = ::tt::target::metal::CreateNocConfig( + fbb, toFlatbuffer(nocConfigAttr.getNocIndex())); + return std::make_pair(configType, config.Union()); + } + case ttkernel::ThreadType::Tensix: { + auto tensixConfigAttr = + mlir::dyn_cast(kernelConfig); + auto configType = ::tt::target::metal::KernelConfig::TensixConfig; + auto unpackToDestModeVec = + toFlatbuffer(tensixConfigAttr.getUnpackToDestMode()); + auto config = ::tt::target::metal::CreateTensixConfigDirect( + fbb, toFlatbuffer(tensixConfigAttr.getMathFidelity()), + tensixConfigAttr.getFp32DestAccEn(), + tensixConfigAttr.getMathApproxMode(), &unpackToDestModeVec); + return std::make_pair(configType, config.Union()); + } + case ttkernel::ThreadType::Ethernet: { + auto ethernetConfigAttr = + mlir::dyn_cast(kernelConfig); + auto configType = ::tt::target::metal::KernelConfig::EthernetConfig; + auto config = ::tt::target::metal::CreateEthernetConfig( + fbb, toFlatbuffer(ethernetConfigAttr.getEthType()), + toFlatbuffer(ethernetConfigAttr.getNocIndex())); + return std::make_pair(configType, config.Union()); + } + } +} + +::tt::target::Dim2dRange toFlatbuffer(CoreRangeAttr coreRange) { + auto offset = coreRange.getOffset(); + auto size = coreRange.getSize(); + return ::tt::target::Dim2dRange(::tt::target::Dim2d(offset[0], offset[1]), + ::tt::target::Dim2d(size[0], size[1])); +} + +::flatbuffers::Offset<::tt::target::CBDesc> +cbTypeToFlatbuffer(FlatbufferObjectCache &cache, ttkernel::CBType cbType) { + auto memref = cache.getOrCreate(cbType.getMemref(), memrefAttrToFlatbuffer, + ::mlir::tt::TensorMemoryLayout::None); + return ::tt::target::CreateCBDesc( + *cache.fbb, + static_cast>(cbType.getPort()), + memref, cbType.getPageSize(), cbType.getNumBuffers()); +} + +std::pair<::tt::target::metal::HostBuffer, ::flatbuffers::Offset> +hostBufferToFlatbuffer(FlatbufferObjectCache &cache, + ElementsAttr elementsAttr) { + assert(elementsAttr.getElementType().isIntOrIndexOrFloat() && + "unsupported elements attr type"); + assert(elementsAttr.isSplat() && "expected a splat elements attr"); + assert(elementsAttr.getElementType().getIntOrFloatBitWidth() == 32 && + "unsupported elements attr bit width"); + auto vector = toFlatbuffer(cache, elementsAttr); + return std::make_pair( + ::tt::target::metal::HostBuffer::ConstantBuffer32, + ::tt::target::metal::CreateConstantBuffer32(*cache.fbb, vector).Union()); +} + +Value getOperandThroughDPSOps(Value value) { + auto *op = value.getDefiningOp(); + if (!op) { + return value; + } + while (isa(op)) { + assert(op->getResults().size() == 1); + auto dps = cast(op); + assert(dps.getNumDpsInits() == 1); + auto *opOperand = dps.getDpsInitOperand(0); + value = opOperand->get(); + op = value.getDefiningOp(); + } + return value; +} + +static std::shared_ptr translateTTKernelToCpp( + Operation *op, llvm::raw_ostream &os) { + + ModuleOp module = dyn_cast(op); + assert(module && "Expected ModuleOp as top level operation"); + + // auto systemDesc = + // mlir::cast(module->getAttr(tt::SystemDescAttr::name)); + // ::ttmlir::Version ttmlirVersion = ::ttmlir::getVersion(); + // ::tt::target::Version binaryVersion(ttmlirVersion.major, ttmlirVersion.minor, + // ttmlirVersion.patch); + // std::vector<::flatbuffers::Offset<::tt::target::metal::Program>> programs; + + module->walk([&](func::FuncOp entry) { + CQBuilder cqBuilder(&fbb); + cqBuilder.name = entry.getSymName().data(); + + auto argumentAllocations = mlir::cast( + entry->getDiscardableAttr(ArgumentAllocationAttr::name)); + assert(argumentAllocations && "expected argument_allocations attribute"); + for (auto &input : entry.getBody().getArguments()) { + auto argAlloc = mlir::cast( + argumentAllocations[input.getArgNumber()]); + assert( + argAlloc.getMemorySpace() == + mlir::cast( + mlir::cast(input.getType()).getEncoding()) + .getMemorySpace() && + "argument allocation memory space does not match tensor type " + "memory " + "space"); + cqBuilder.inputs.push_back( + cache.getOrCreate(input, tensorValueToFlatbuffer, + argAlloc.getAddress(), argAlloc.getSize())); + } + + entry->walk([&](mlir::Operation *op) { + if (auto dispatchOp = dyn_cast_or_null(op); + dispatchOp) { + std::vector<::flatbuffers::Offset<::tt::target::TensorRef>> operands; + for (auto operand : dispatchOp.getOperands()) { + operands.push_back(cache.at<::tt::target::TensorRef>( + getOperandThroughDPSOps(operand))); + } + + std::vector<::flatbuffers::Offset<::tt::target::metal::KernelDesc>> + kernels; + + llvm::SmallVector cppKernels(dispatchOp->getNumRegions()); + llvm::LogicalResult success = + emitDispatchOpRegionsAsCpp(dispatchOp, cppKernels); + assert(success.succeeded() && + "failed to emit dispatch op regions as cpp"); + for (auto ®ion : dispatchOp.getRegions()) { + std::vector<::tt::target::Dim2dRange> coreRangeSet = { + toFlatbuffer(mlir::cast( + dispatchOp.getCoreRanges()[region.getRegionNumber()]))}; + std::vector<::flatbuffers::Offset<::tt::target::CBRef>> cbs; + size_t argNumber = 0; + for (auto arg : region.getArguments()) { + auto cbType = mlir::cast(arg.getType()); + auto cbDesc = cache.getOrCreate(cbType, cbTypeToFlatbuffer); + auto tensorRef = + argNumber >= operands.size() ? 0 : operands[argNumber++]; + cbs.push_back( + ::tt::target::CreateCBRef(fbb, cache.global_id++, tensorRef, + cbType.getAddress(), cbDesc)); + } + + std::string &source = cppKernels[region.getRegionNumber()]; + assert(source.size() > 0 && "empty kernel source"); + + // Get pair of kernel's config type and config itself. + auto kernelConfig = + dispatchOp.getKernelConfigs()[region.getRegionNumber()]; + auto [kernelConfigType, kernelConfigUnion] = toFlatbuffer( + fbb, mlir::cast(kernelConfig)); + + kernels.push_back(::tt::target::metal::CreateKernelDescDirect( + fbb, ::tt::target::metal::Kernel::KernelSource, + ::tt::target::metal::CreateKernelSourceDirect( + fbb, source.c_str(), kernelConfigType, kernelConfigUnion) + .Union(), + &coreRangeSet, &cbs, nullptr, nullptr, /* TODO rtargs*/ + nullptr /*TODO debug info*/)); + } + ::flatbuffers::Offset<::tt::target::metal::ProgramDesc> program = + ::tt::target::metal::CreateProgramDescDirect(fbb, &kernels); + + cqBuilder.appendCommand( + ::tt::target::metal::CreateEnqueueProgramCommandDirect( + fbb, &operands, program), + op); + } else if (auto allocOp = dyn_cast_or_null(op); + allocOp) { + cqBuilder.appendCommand( + ::tt::target::metal::CreateCreateBufferCommand( + fbb, + cache.getOrCreate(allocOp.getResult(), tensorValueToFlatbuffer, + allocOp.getAddress(), allocOp.getSize())), + op); + } else if (auto deallocOp = dyn_cast_or_null(op); + deallocOp) { + cqBuilder.appendCommand( + ::tt::target::metal::CreateDeallocateBufferCommand( + fbb, cache.at<::tt::target::TensorRef>( + getOperandThroughDPSOps(deallocOp.getInput()))), + op); + } else if (auto hostReadOp = + dyn_cast_or_null(op); + hostReadOp) { + cqBuilder.appendCommand( + ::tt::target::metal::CreateEnqueueReadBufferCommand( + fbb, + cache.at<::tt::target::TensorRef>( + getOperandThroughDPSOps(hostReadOp.getInput())), + cache.at<::tt::target::TensorRef>( + getOperandThroughDPSOps(hostReadOp.getOutput()))), + op); + } else if (auto hostWriteOp = + dyn_cast_or_null(op); + hostWriteOp) { + auto [hostBufferType, hostBuffer] = + hostBufferToFlatbuffer(cache, hostWriteOp.getValue()); + cqBuilder.appendCommand( + ::tt::target::metal::CreateEnqueueWriteBufferCommand( + fbb, hostBufferType, hostBuffer, + cache.at<::tt::target::TensorRef>( + getOperandThroughDPSOps(hostWriteOp.getOutput()))), + op); + } else if (auto returnOp = dyn_cast_or_null(op); + returnOp) { + for (auto output : returnOp.getOperands()) { + cqBuilder.outputs.push_back(cache.at<::tt::target::TensorRef>( + getOperandThroughDPSOps(output))); + } + } + }); + + std::vector<::flatbuffers::Offset<::tt::target::metal::CommandQueue>> + commandQueues = { + ::tt::target::metal::CreateCommandQueueDirect(fbb, cqBuilder.name, + &cqBuilder.commands), + }; + + std::vector<::flatbuffers::Offset<::tt::target::metal::DeviceProgram>> + devicePrograms = { + ::tt::target::metal::CreateDeviceProgramDirect( + fbb, &cqBuilder.inputs, &cqBuilder.outputs, &commandQueues), + }; + programs.push_back(::tt::target::metal::CreateProgramDirect( + fbb, cqBuilder.name, &cqBuilder.inputs, &cqBuilder.outputs, + &devicePrograms)); + }); + + auto binary = ::tt::target::metal::CreateTTMetalBinaryDirect( + fbb, &binaryVersion, ::ttmlir::getGitHash(), + toFlatbuffer(cache, systemDesc), &programs); + + FinishSizePrefixedTTMetalBinaryBuffer(fbb, binary); + ::flatbuffers::Verifier verifier(fbb.GetBufferPointer(), fbb.GetSize()); + ::tt::target::metal::VerifySizePrefixedTTMetalBinaryBuffer(verifier); + + uint8_t *buf = fbb.GetBufferPointer(); + auto size = fbb.GetSize(); + + std::shared_ptr serializedBinary = + std::shared_ptr(std::malloc(size), std::free); + std::memcpy(serializedBinary.get(), buf, size); + + return serializedBinary; +} + +LogicalResult translateTTKernelToCpp( + Operation *op, llvm::raw_ostream &os) { + std::shared_ptr data = translateModuleToCpp(op); + // std::size_t size = ::flatbuffers::GetSizePrefixedBufferLength( + // static_cast(data.get())); + // os.write(reinterpret_cast(data.get()), size); + return success(); +} + +} // namespace mlir::tt::ttmetal diff --git a/lib/Target/TTKernel/TTKernelToCppRegristration.cpp b/lib/Target/TTKernel/TTKernelToCppRegristration.cpp new file mode 100644 index 000000000..b86349d57 --- /dev/null +++ b/lib/Target/TTKernel/TTKernelToCppRegristration.cpp @@ -0,0 +1,30 @@ +// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +// +// SPDX-License-Identifier: Apache-2.0 + +#include "mlir/Dialect/EmitC/IR/EmitC.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/SCF/SCF.h" +#include "mlir/Tools/mlir-translate/Translation.h" + +#include "ttmlir/Dialect/TTKernel/IR/TTKernel.h" +#include "ttmlir/Target/TTKernel/TTKernelToCpp.h" + +using namespace mlir; + +namespace mlir::tt::ttkernel { + +void registerTTKernelToCpp() { + TranslateFromMLIRRegistration reg( + "ttkernel-to-cpp", "translate ttmetal dialect to flatbuffer", + [](Operation *op, llvm::raw_ostream &os) -> LogicalResult { + return translateTTKernelToCpp(op, os); + }, + [](DialectRegistry ®istry) { + registry.insert(); + }); +} + +} // namespace mlir::tt::ttkernel diff --git a/python/Passes.cpp b/python/Passes.cpp index 43709426d..b8323b6d0 100644 --- a/python/Passes.cpp +++ b/python/Passes.cpp @@ -7,6 +7,7 @@ #include "ttmlir/RegisterAll.h" #include "ttmlir/Target/TTMetal/TTMetalToFlatbuffer.h" #include "ttmlir/Target/TTNN/TTNNToFlatbuffer.h" +#include "ttmlir/Target/TTKernel/TTKernelToCpp.h" #include PYBIND11_MAKE_OPAQUE(std::shared_ptr); @@ -201,6 +202,23 @@ void populatePassesModule(py::module &m) { } }); + m.def("ttkernel_to_cpp_file", + [](MlirModule module, std::string &filepath, + std::unordered_map goldenMap) { + mlir::Operation *moduleOp = unwrap(mlirModuleGetOperation(module)); + std::error_code fileError; + llvm::raw_fd_ostream file(filepath, fileError); + if (fileError) { + throw std::runtime_error("Failed to open file: " + filepath + + ". Error: " + fileError.message()); + } + if (mlir::failed(mlir::tt::ttkernel::translateTTKernelToCpp( + moduleOp, file))) { + throw std::runtime_error("Failed to write flatbuffer to file: " + + filepath); + } + }); + py::enum_<::tt::target::DataType>(m, "DataType") .value("Float32", ::tt::target::DataType::Float32) .value("Float16", ::tt::target::DataType::Float16); diff --git a/tools/ttmlir-translate/ttmlir-translate.cpp b/tools/ttmlir-translate/ttmlir-translate.cpp index a3ec6154d..7c447fd36 100644 --- a/tools/ttmlir-translate/ttmlir-translate.cpp +++ b/tools/ttmlir-translate/ttmlir-translate.cpp @@ -17,11 +17,16 @@ namespace mlir::tt::ttmetal { void registerTTMetalToFlatbuffer(); } // namespace mlir::tt::ttmetal +namespace mlir::tt::ttkernel { +void registerTTKernelToCpp(); +} // namespace mlir::tt::ttkernel + // Place to register all the custom translations static void registerCustomTranslations() { static bool initOnce = []() { mlir::tt::ttnn::registerTTNNToFlatbuffer(); mlir::tt::ttmetal::registerTTMetalToFlatbuffer(); + mlir::tt::ttkernel::registerTTKernelToCpp(); return true; }(); (void)initOnce; From 74e12fd6569cf45f83e2d4a7c08c3a2b473bccac Mon Sep 17 00:00:00 2001 From: Bill Teng Date: Mon, 16 Dec 2024 09:40:12 +0000 Subject: [PATCH 02/28] use existing passes to translate kernel to C++ --- lib/Target/TTKernel/TTKernelToCpp.cpp | 418 +------------------------- 1 file changed, 16 insertions(+), 402 deletions(-) diff --git a/lib/Target/TTKernel/TTKernelToCpp.cpp b/lib/Target/TTKernel/TTKernelToCpp.cpp index dddaac0ce..f8a44d8ce 100644 --- a/lib/Target/TTKernel/TTKernelToCpp.cpp +++ b/lib/Target/TTKernel/TTKernelToCpp.cpp @@ -4,7 +4,6 @@ #include #include -#include #include #include "mlir/Dialect/EmitC/IR/EmitC.h" @@ -15,421 +14,36 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Support/LogicalResult.h" #include "llvm/Support/raw_ostream.h" +#include "mlir/Target/Cpp/CppEmitter.h" #include "ttmlir/Conversion/TTKernelToEmitC/TTKernelToEmitC.h" -#include "ttmlir/Dialect/TT/IR/TT.h" -#include "ttmlir/Dialect/TT/IR/TTOpsTypes.h" -#include "ttmlir/Dialect/TTKernel/IR/TTKernel.h" -#include "ttmlir/Dialect/TTKernel/IR/TTKernelOps.h" -#include "ttmlir/Dialect/TTKernel/IR/TTKernelOpsTypes.h" -#include "ttmlir/Dialect/TTMetal/IR/TTMetalOpsTypes.h" -#include "ttmlir/Target/TTMetal/Target.h" -#include "ttmlir/Target/Utils/FlatbufferObjectCache.h" -#include "ttmlir/Target/Utils/MLIRToFlatbuffer.h" -#include "ttmlir/Version.h" -#include "types_generated.h" - -namespace mlir::tt { -flatbuffers::Offset<::tt::target::MemoryDesc> -memrefAttrToFlatbuffer(FlatbufferObjectCache &cache, MemRefType memref, - ::mlir::tt::TensorMemoryLayout memLayout) { - auto shapeInt64 = memref.getShape(); - std::vector shape(shapeInt64.begin(), shapeInt64.end()); - DataType dtype = DataType::Float32; - ::tt::target::Dim2d tileShape(1, 1); - Type elementType = memref.getElementType(); - std::uint64_t elementSize = 0; - if (isa(elementType)) { - auto tileType = mlir::cast(elementType); - dtype = tileType.getDataType(); - tileShape = ::tt::target::Dim2d(tileType.getHeight(), tileType.getWidth()); - elementSize = tileType.getSizeBytes(); - } else { - dtype = elementTypeToDataType(elementType); - elementSize = getElementSizeBytes(dtype); - } - - std::uint64_t size = elementSize; - for (auto dim : shapeInt64) { - size *= dim; - } - - return ::tt::target::CreateMemoryDescDirect( - *cache.fbb, &shape, &tileShape, toFlatbuffer(cache, dtype), - toFlatbuffer( - cache, - mlir::cast(memref.getMemorySpace()).getValue()), - toFlatbuffer(cache, memLayout), size); -} - -flatbuffers::Offset<::tt::target::LayoutDesc> metalLayoutAttrToFlatbuffer( - FlatbufferObjectCache &cache, MetalLayoutAttr metalLayoutAttr, - ArrayRef logicalShape, DeviceAttr deviceAttr) { - auto strideInt64 = metalLayoutAttr.getStride(logicalShape); - std::vector stride(strideInt64.begin(), strideInt64.end()); - auto coreRangeSet = toFlatbuffer(cache, metalLayoutAttr.getGrid(), - deviceAttr.getWorkerGrid()); - return ::tt::target::CreateLayoutDescDirect( - *cache.fbb, &stride, toFlatbuffer(cache, metalLayoutAttr.getOobVal()), - &coreRangeSet, - cache.getOrCreate(metalLayoutAttr.getMemref(), memrefAttrToFlatbuffer, - metalLayoutAttr.getMemLayout())); -} - -} // namespace mlir::tt namespace mlir::tt::ttkernel { -struct CQBuilder { - ::flatbuffers::FlatBufferBuilder *fbb; - const char *name; - std::vector<::flatbuffers::Offset<::tt::target::TensorRef>> inputs; - std::vector<::flatbuffers::Offset<::tt::target::TensorRef>> outputs; - std::vector<::flatbuffers::Offset<::tt::target::metal::Command>> commands; - OpPrintingFlags printFlags; - - CQBuilder(::flatbuffers::FlatBufferBuilder *fbb) : fbb(fbb) { - printFlags = printFlags.elideLargeElementsAttrs() - .elideLargeResourceString() - .skipRegions() - .enableDebugInfo(); - } - - std::string getDebugString(mlir::Operation *op) { - std::string str; - llvm::raw_string_ostream os(str); - op->print(os, printFlags); - return str; - }; - - template - ::flatbuffers::Offset<::tt::target::metal::Command> - appendCommand(::flatbuffers::Offset commandT, mlir::Operation *op) { - auto debugString = getDebugString(op); - commands.push_back(::tt::target::metal::CreateCommandDirect( - *fbb, ::tt::target::metal::CommandTypeTraits::enum_value, - commandT.Union(), debugString.c_str())); - return commands.back(); - } -}; - -::tt::target::MathFidelity toFlatbuffer(ttkernel::MathFidelity mathFidelity) { - switch (mathFidelity) { - case ttkernel::MathFidelity::HiFi4: - return ::tt::target::MathFidelity::HiFi4; - case ttkernel::MathFidelity::HiFi3: - return ::tt::target::MathFidelity::HiFi3; - case ttkernel::MathFidelity::HiFi2: - return ::tt::target::MathFidelity::HiFi2; - case ttkernel::MathFidelity::LoFi: - return ::tt::target::MathFidelity::LoFi; - } - assert(false && "Unsupported MathFidelity"); -} - -std::vector<::tt::target::metal::UnpackToDestMode> -toFlatbuffer(llvm::ArrayRef unpackToDestModes) { - std::vector<::tt::target::metal::UnpackToDestMode> result; - result.reserve(unpackToDestModes.size()); - - for (auto mode : unpackToDestModes) { - switch (mode) { - case ttkernel::UnpackToDestMode::UnpackToDestFp32: - result.push_back(::tt::target::metal::UnpackToDestMode::UnpackToDestFp32); - break; - case ttkernel::UnpackToDestMode::Default: - result.push_back(::tt::target::metal::UnpackToDestMode::Default); - break; - } - } - return result; -} - -::tt::target::metal::EthType toFlatbuffer(ttkernel::EthType ethType) { - switch (ethType) { - case ttkernel::EthType::Sender: - return ::tt::target::metal::EthType::Sender; - case ttkernel::EthType::Receiver: - return ::tt::target::metal::EthType::Receiver; - } - assert(false && "Unsupported EthType"); -} - -::tt::target::metal::NocIndex toFlatbuffer(ttkernel::NocIndex nocIndex) { - switch (nocIndex) { - case ttkernel::NocIndex::Noc0: - return ::tt::target::metal::NocIndex::Noc0; - case ttkernel::NocIndex::Noc1: - return ::tt::target::metal::NocIndex::Noc1; - } - assert(false && "Unsupported NocIndex"); -} - -// Take KernelConfig and return pair of its type and variantized config itself -std::pair<::tt::target::metal::KernelConfig, ::flatbuffers::Offset> -toFlatbuffer(::flatbuffers::FlatBufferBuilder &fbb, - ttkernel::KernelConfigInterface kernelConfig) { - ttkernel::ThreadType threadType = kernelConfig.getThreadType(); - - switch (threadType) { - case ttkernel::ThreadType::Noc: { - auto nocConfigAttr = mlir::dyn_cast(kernelConfig); - auto configType = ::tt::target::metal::KernelConfig::NocConfig; - auto config = ::tt::target::metal::CreateNocConfig( - fbb, toFlatbuffer(nocConfigAttr.getNocIndex())); - return std::make_pair(configType, config.Union()); - } - case ttkernel::ThreadType::Tensix: { - auto tensixConfigAttr = - mlir::dyn_cast(kernelConfig); - auto configType = ::tt::target::metal::KernelConfig::TensixConfig; - auto unpackToDestModeVec = - toFlatbuffer(tensixConfigAttr.getUnpackToDestMode()); - auto config = ::tt::target::metal::CreateTensixConfigDirect( - fbb, toFlatbuffer(tensixConfigAttr.getMathFidelity()), - tensixConfigAttr.getFp32DestAccEn(), - tensixConfigAttr.getMathApproxMode(), &unpackToDestModeVec); - return std::make_pair(configType, config.Union()); - } - case ttkernel::ThreadType::Ethernet: { - auto ethernetConfigAttr = - mlir::dyn_cast(kernelConfig); - auto configType = ::tt::target::metal::KernelConfig::EthernetConfig; - auto config = ::tt::target::metal::CreateEthernetConfig( - fbb, toFlatbuffer(ethernetConfigAttr.getEthType()), - toFlatbuffer(ethernetConfigAttr.getNocIndex())); - return std::make_pair(configType, config.Union()); - } - } -} - -::tt::target::Dim2dRange toFlatbuffer(CoreRangeAttr coreRange) { - auto offset = coreRange.getOffset(); - auto size = coreRange.getSize(); - return ::tt::target::Dim2dRange(::tt::target::Dim2d(offset[0], offset[1]), - ::tt::target::Dim2d(size[0], size[1])); -} - -::flatbuffers::Offset<::tt::target::CBDesc> -cbTypeToFlatbuffer(FlatbufferObjectCache &cache, ttkernel::CBType cbType) { - auto memref = cache.getOrCreate(cbType.getMemref(), memrefAttrToFlatbuffer, - ::mlir::tt::TensorMemoryLayout::None); - return ::tt::target::CreateCBDesc( - *cache.fbb, - static_cast>(cbType.getPort()), - memref, cbType.getPageSize(), cbType.getNumBuffers()); -} - -std::pair<::tt::target::metal::HostBuffer, ::flatbuffers::Offset> -hostBufferToFlatbuffer(FlatbufferObjectCache &cache, - ElementsAttr elementsAttr) { - assert(elementsAttr.getElementType().isIntOrIndexOrFloat() && - "unsupported elements attr type"); - assert(elementsAttr.isSplat() && "expected a splat elements attr"); - assert(elementsAttr.getElementType().getIntOrFloatBitWidth() == 32 && - "unsupported elements attr bit width"); - auto vector = toFlatbuffer(cache, elementsAttr); - return std::make_pair( - ::tt::target::metal::HostBuffer::ConstantBuffer32, - ::tt::target::metal::CreateConstantBuffer32(*cache.fbb, vector).Union()); -} - -Value getOperandThroughDPSOps(Value value) { - auto *op = value.getDefiningOp(); - if (!op) { - return value; - } - while (isa(op)) { - assert(op->getResults().size() == 1); - auto dps = cast(op); - assert(dps.getNumDpsInits() == 1); - auto *opOperand = dps.getDpsInitOperand(0); - value = opOperand->get(); - op = value.getDefiningOp(); - } - return value; -} - -static std::shared_ptr translateTTKernelToCpp( +static void translateModuleToCpp( Operation *op, llvm::raw_ostream &os) { + ModuleOp module = dyn_cast(op); + assert(module && "Expected ModuleOp as top level operation"); + mlir::PassManager pm(op->getContext()); - ModuleOp module = dyn_cast(op); - assert(module && "Expected ModuleOp as top level operation"); - - // auto systemDesc = - // mlir::cast(module->getAttr(tt::SystemDescAttr::name)); - // ::ttmlir::Version ttmlirVersion = ::ttmlir::getVersion(); - // ::tt::target::Version binaryVersion(ttmlirVersion.major, ttmlirVersion.minor, - // ttmlirVersion.patch); - // std::vector<::flatbuffers::Offset<::tt::target::metal::Program>> programs; + pm.addPass(mlir::tt::createConvertTTKernelToEmitC()); + pm.addPass(mlir::createConvertArithToEmitC()); + pm.addPass(mlir::createSCFToEmitC()); + pm.addPass(mlir::createConvertFuncToEmitC()); - module->walk([&](func::FuncOp entry) { - CQBuilder cqBuilder(&fbb); - cqBuilder.name = entry.getSymName().data(); - - auto argumentAllocations = mlir::cast( - entry->getDiscardableAttr(ArgumentAllocationAttr::name)); - assert(argumentAllocations && "expected argument_allocations attribute"); - for (auto &input : entry.getBody().getArguments()) { - auto argAlloc = mlir::cast( - argumentAllocations[input.getArgNumber()]); - assert( - argAlloc.getMemorySpace() == - mlir::cast( - mlir::cast(input.getType()).getEncoding()) - .getMemorySpace() && - "argument allocation memory space does not match tensor type " - "memory " - "space"); - cqBuilder.inputs.push_back( - cache.getOrCreate(input, tensorValueToFlatbuffer, - argAlloc.getAddress(), argAlloc.getSize())); + if (mlir::failed(pm.run(op))) { + throw std::runtime_error("Failed to lower MLIR to EmitC"); } - entry->walk([&](mlir::Operation *op) { - if (auto dispatchOp = dyn_cast_or_null(op); - dispatchOp) { - std::vector<::flatbuffers::Offset<::tt::target::TensorRef>> operands; - for (auto operand : dispatchOp.getOperands()) { - operands.push_back(cache.at<::tt::target::TensorRef>( - getOperandThroughDPSOps(operand))); - } - - std::vector<::flatbuffers::Offset<::tt::target::metal::KernelDesc>> - kernels; - - llvm::SmallVector cppKernels(dispatchOp->getNumRegions()); - llvm::LogicalResult success = - emitDispatchOpRegionsAsCpp(dispatchOp, cppKernels); - assert(success.succeeded() && - "failed to emit dispatch op regions as cpp"); - for (auto ®ion : dispatchOp.getRegions()) { - std::vector<::tt::target::Dim2dRange> coreRangeSet = { - toFlatbuffer(mlir::cast( - dispatchOp.getCoreRanges()[region.getRegionNumber()]))}; - std::vector<::flatbuffers::Offset<::tt::target::CBRef>> cbs; - size_t argNumber = 0; - for (auto arg : region.getArguments()) { - auto cbType = mlir::cast(arg.getType()); - auto cbDesc = cache.getOrCreate(cbType, cbTypeToFlatbuffer); - auto tensorRef = - argNumber >= operands.size() ? 0 : operands[argNumber++]; - cbs.push_back( - ::tt::target::CreateCBRef(fbb, cache.global_id++, tensorRef, - cbType.getAddress(), cbDesc)); - } - - std::string &source = cppKernels[region.getRegionNumber()]; - assert(source.size() > 0 && "empty kernel source"); - - // Get pair of kernel's config type and config itself. - auto kernelConfig = - dispatchOp.getKernelConfigs()[region.getRegionNumber()]; - auto [kernelConfigType, kernelConfigUnion] = toFlatbuffer( - fbb, mlir::cast(kernelConfig)); - - kernels.push_back(::tt::target::metal::CreateKernelDescDirect( - fbb, ::tt::target::metal::Kernel::KernelSource, - ::tt::target::metal::CreateKernelSourceDirect( - fbb, source.c_str(), kernelConfigType, kernelConfigUnion) - .Union(), - &coreRangeSet, &cbs, nullptr, nullptr, /* TODO rtargs*/ - nullptr /*TODO debug info*/)); - } - ::flatbuffers::Offset<::tt::target::metal::ProgramDesc> program = - ::tt::target::metal::CreateProgramDescDirect(fbb, &kernels); - - cqBuilder.appendCommand( - ::tt::target::metal::CreateEnqueueProgramCommandDirect( - fbb, &operands, program), - op); - } else if (auto allocOp = dyn_cast_or_null(op); - allocOp) { - cqBuilder.appendCommand( - ::tt::target::metal::CreateCreateBufferCommand( - fbb, - cache.getOrCreate(allocOp.getResult(), tensorValueToFlatbuffer, - allocOp.getAddress(), allocOp.getSize())), - op); - } else if (auto deallocOp = dyn_cast_or_null(op); - deallocOp) { - cqBuilder.appendCommand( - ::tt::target::metal::CreateDeallocateBufferCommand( - fbb, cache.at<::tt::target::TensorRef>( - getOperandThroughDPSOps(deallocOp.getInput()))), - op); - } else if (auto hostReadOp = - dyn_cast_or_null(op); - hostReadOp) { - cqBuilder.appendCommand( - ::tt::target::metal::CreateEnqueueReadBufferCommand( - fbb, - cache.at<::tt::target::TensorRef>( - getOperandThroughDPSOps(hostReadOp.getInput())), - cache.at<::tt::target::TensorRef>( - getOperandThroughDPSOps(hostReadOp.getOutput()))), - op); - } else if (auto hostWriteOp = - dyn_cast_or_null(op); - hostWriteOp) { - auto [hostBufferType, hostBuffer] = - hostBufferToFlatbuffer(cache, hostWriteOp.getValue()); - cqBuilder.appendCommand( - ::tt::target::metal::CreateEnqueueWriteBufferCommand( - fbb, hostBufferType, hostBuffer, - cache.at<::tt::target::TensorRef>( - getOperandThroughDPSOps(hostWriteOp.getOutput()))), - op); - } else if (auto returnOp = dyn_cast_or_null(op); - returnOp) { - for (auto output : returnOp.getOperands()) { - cqBuilder.outputs.push_back(cache.at<::tt::target::TensorRef>( - getOperandThroughDPSOps(output))); - } - } - }); - - std::vector<::flatbuffers::Offset<::tt::target::metal::CommandQueue>> - commandQueues = { - ::tt::target::metal::CreateCommandQueueDirect(fbb, cqBuilder.name, - &cqBuilder.commands), - }; - - std::vector<::flatbuffers::Offset<::tt::target::metal::DeviceProgram>> - devicePrograms = { - ::tt::target::metal::CreateDeviceProgramDirect( - fbb, &cqBuilder.inputs, &cqBuilder.outputs, &commandQueues), - }; - programs.push_back(::tt::target::metal::CreateProgramDirect( - fbb, cqBuilder.name, &cqBuilder.inputs, &cqBuilder.outputs, - &devicePrograms)); - }); - - auto binary = ::tt::target::metal::CreateTTMetalBinaryDirect( - fbb, &binaryVersion, ::ttmlir::getGitHash(), - toFlatbuffer(cache, systemDesc), &programs); - - FinishSizePrefixedTTMetalBinaryBuffer(fbb, binary); - ::flatbuffers::Verifier verifier(fbb.GetBufferPointer(), fbb.GetSize()); - ::tt::target::metal::VerifySizePrefixedTTMetalBinaryBuffer(verifier); - - uint8_t *buf = fbb.GetBufferPointer(); - auto size = fbb.GetSize(); - - std::shared_ptr serializedBinary = - std::shared_ptr(std::malloc(size), std::free); - std::memcpy(serializedBinary.get(), buf, size); - - return serializedBinary; + if ( mlir::failed( mlir::emitc::translateToCpp(op, os) ) ) { + throw std::runtime_error("Failed to write C++ code to file"); + } } LogicalResult translateTTKernelToCpp( Operation *op, llvm::raw_ostream &os) { - std::shared_ptr data = translateModuleToCpp(op); - // std::size_t size = ::flatbuffers::GetSizePrefixedBufferLength( - // static_cast(data.get())); - // os.write(reinterpret_cast(data.get()), size); + translateModuleToCpp(op, os); return success(); } -} // namespace mlir::tt::ttmetal +} // namespace mlir::tt::ttkernel From e48c6c1c75e3d6bbc43056e4c3e1eb0cc002bdaa Mon Sep 17 00:00:00 2001 From: Bill Teng Date: Mon, 16 Dec 2024 22:43:45 +0000 Subject: [PATCH 03/28] fix typo in filename --- ...KernelToCppRegristration.cpp => TTKernelToCppRegistration.cpp} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename lib/Target/TTKernel/{TTKernelToCppRegristration.cpp => TTKernelToCppRegistration.cpp} (100%) diff --git a/lib/Target/TTKernel/TTKernelToCppRegristration.cpp b/lib/Target/TTKernel/TTKernelToCppRegistration.cpp similarity index 100% rename from lib/Target/TTKernel/TTKernelToCppRegristration.cpp rename to lib/Target/TTKernel/TTKernelToCppRegistration.cpp From 850d371bf8c5b0a64415c3cc324f442c0c86a366 Mon Sep 17 00:00:00 2001 From: Bill Teng Date: Mon, 16 Dec 2024 22:43:54 +0000 Subject: [PATCH 04/28] fix compile error --- lib/RegisterAll.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/RegisterAll.cpp b/lib/RegisterAll.cpp index 4a7156279..db8636f07 100644 --- a/lib/RegisterAll.cpp +++ b/lib/RegisterAll.cpp @@ -14,7 +14,6 @@ #include "ttmlir/Dialect/TTIR/Pipelines/TTIRPipelines.h" #include "ttmlir/Dialect/TTIR/Transforms/Passes.h" #include "ttmlir/Dialect/TTKernel/IR/TTKernel.h" -#include "ttmlir/Dialect/TTKernel/Pipelines/TTKernelPipelines.h" #include "ttmlir/Dialect/TTMetal/Pipelines/TTMetalPipelines.h" #include "ttmlir/Dialect/TTMetal/Transforms/Passes.h" #include "ttmlir/Dialect/TTNN/IR/TTNN.h" From 92267fb45aee76bb7b1527ab909c82e57e1e7ac3 Mon Sep 17 00:00:00 2001 From: Bill Teng Date: Mon, 16 Dec 2024 23:34:31 +0000 Subject: [PATCH 05/28] CMake setup and fixing errors for TTKernelToCpp --- lib/SharedLib/CMakeLists.txt | 1 + lib/Target/CMakeLists.txt | 1 + lib/Target/TTKernel/CMakeLists.txt | 16 ++++++++++++++++ lib/Target/TTKernel/TTKernelToCpp.cpp | 15 +++++++++------ .../TTKernel/TTKernelToCppRegistration.cpp | 2 +- 5 files changed, 28 insertions(+), 7 deletions(-) create mode 100644 lib/Target/TTKernel/CMakeLists.txt diff --git a/lib/SharedLib/CMakeLists.txt b/lib/SharedLib/CMakeLists.txt index 7f32c8aa6..bc3316978 100644 --- a/lib/SharedLib/CMakeLists.txt +++ b/lib/SharedLib/CMakeLists.txt @@ -11,6 +11,7 @@ endif() set(TTMLIR_LIBS TTNNTargetFlatbuffer TTMetalTargetFlatbuffer + TTKernelTargetCpp MLIRTTDialect MLIRTTIRDialect MLIRTTNNDialect diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt index 2043ae34f..cdde750ac 100644 --- a/lib/Target/CMakeLists.txt +++ b/lib/Target/CMakeLists.txt @@ -1,2 +1,3 @@ add_subdirectory(TTMetal) add_subdirectory(TTNN) +add_subdirectory(TTKernel) diff --git a/lib/Target/TTKernel/CMakeLists.txt b/lib/Target/TTKernel/CMakeLists.txt new file mode 100644 index 000000000..bd11e0be2 --- /dev/null +++ b/lib/Target/TTKernel/CMakeLists.txt @@ -0,0 +1,16 @@ +add_mlir_translation_library(TTKernelTargetCpp + TTKernelToCpp.cpp + TTKernelToCppRegistration.cpp + + ADDITIONAL_HEADER_DIRS + ${PROJECT_SOURCE_DIR}/include/Target/TTKernel + + LINK_LIBS PUBLIC + MLIRTTKernelDialect + TTMLIRTTKernelToEmitC + MLIRFuncToEmitC + MLIRSCFToEmitC + MLIRArithToEmitC +) + +target_include_directories(TTKernelTargetCpp PUBLIC ${PROJECT_BINARY_DIR}/include/ttmlir/Target/Common) diff --git a/lib/Target/TTKernel/TTKernelToCpp.cpp b/lib/Target/TTKernel/TTKernelToCpp.cpp index f8a44d8ce..ed75a2ce3 100644 --- a/lib/Target/TTKernel/TTKernelToCpp.cpp +++ b/lib/Target/TTKernel/TTKernelToCpp.cpp @@ -15,12 +15,15 @@ #include "llvm/Support/LogicalResult.h" #include "llvm/Support/raw_ostream.h" #include "mlir/Target/Cpp/CppEmitter.h" - +#include "mlir/Pass/PassManager.h" #include "ttmlir/Conversion/TTKernelToEmitC/TTKernelToEmitC.h" +#include "mlir/Conversion/ArithToEmitC/ArithToEmitCPass.h" +#include "mlir/Conversion/SCFToEmitC/SCFToEmitC.h" +#include "mlir/Conversion/FuncToEmitC/FuncToEmitCPass.h" namespace mlir::tt::ttkernel { -static void translateModuleToCpp( +static llvm::LogicalResult translateModuleToCpp( Operation *op, llvm::raw_ostream &os) { ModuleOp module = dyn_cast(op); assert(module && "Expected ModuleOp as top level operation"); @@ -32,18 +35,18 @@ static void translateModuleToCpp( pm.addPass(mlir::createConvertFuncToEmitC()); if (mlir::failed(pm.run(op))) { - throw std::runtime_error("Failed to lower MLIR to EmitC"); + return llvm::failure(); } if ( mlir::failed( mlir::emitc::translateToCpp(op, os) ) ) { - throw std::runtime_error("Failed to write C++ code to file"); + return llvm::failure(); } + return success(); } LogicalResult translateTTKernelToCpp( Operation *op, llvm::raw_ostream &os) { - translateModuleToCpp(op, os); - return success(); + return translateModuleToCpp(op, os); } } // namespace mlir::tt::ttkernel diff --git a/lib/Target/TTKernel/TTKernelToCppRegistration.cpp b/lib/Target/TTKernel/TTKernelToCppRegistration.cpp index b86349d57..c8737d197 100644 --- a/lib/Target/TTKernel/TTKernelToCppRegistration.cpp +++ b/lib/Target/TTKernel/TTKernelToCppRegistration.cpp @@ -4,11 +4,11 @@ #include "mlir/Dialect/EmitC/IR/EmitC.h" #include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/Dialect/SCF/SCF.h" #include "mlir/Tools/mlir-translate/Translation.h" #include "ttmlir/Dialect/TTKernel/IR/TTKernel.h" #include "ttmlir/Target/TTKernel/TTKernelToCpp.h" +#include "mlir/Dialect/SCF/IR/SCF.h" using namespace mlir; From aa1133e7ebb266ad30c9f76e9b7ca5b6ded14352 Mon Sep 17 00:00:00 2001 From: Bill Teng Date: Mon, 16 Dec 2024 23:54:14 +0000 Subject: [PATCH 06/28] remove unused function parameter and fix error msg --- python/Passes.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/Passes.cpp b/python/Passes.cpp index b8323b6d0..5c3edfd9d 100644 --- a/python/Passes.cpp +++ b/python/Passes.cpp @@ -203,8 +203,7 @@ void populatePassesModule(py::module &m) { }); m.def("ttkernel_to_cpp_file", - [](MlirModule module, std::string &filepath, - std::unordered_map goldenMap) { + [](MlirModule module, std::string &filepath) { mlir::Operation *moduleOp = unwrap(mlirModuleGetOperation(module)); std::error_code fileError; llvm::raw_fd_ostream file(filepath, fileError); @@ -214,7 +213,7 @@ void populatePassesModule(py::module &m) { } if (mlir::failed(mlir::tt::ttkernel::translateTTKernelToCpp( moduleOp, file))) { - throw std::runtime_error("Failed to write flatbuffer to file: " + + throw std::runtime_error("Failed to write C++ to file: " + filepath); } }); From f1785912bb7c06e6475aab462aa755c84cfd84d5 Mon Sep 17 00:00:00 2001 From: Bill Teng Date: Wed, 18 Dec 2024 02:51:08 +0000 Subject: [PATCH 07/28] add TTKernel translate MLIR testcases --- .../Translate/TTKernel/ttkernel_noc.mlir | 16 +++++++++++++ .../Translate/TTKernel/ttkernel_tensix.mlir | 23 +++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 test/ttmlir/Translate/TTKernel/ttkernel_noc.mlir create mode 100644 test/ttmlir/Translate/TTKernel/ttkernel_tensix.mlir diff --git a/test/ttmlir/Translate/TTKernel/ttkernel_noc.mlir b/test/ttmlir/Translate/TTKernel/ttkernel_noc.mlir new file mode 100644 index 000000000..b34d28eb5 --- /dev/null +++ b/test/ttmlir/Translate/TTKernel/ttkernel_noc.mlir @@ -0,0 +1,16 @@ +// RUN: ttmlir-translate --ttkernel-to-cpp %s + +func.func @ttkernel_noc() -> () { + %c262432_i32 = arith.constant 262432 : i32 + %c262208_i32 = arith.constant 262208 : i32 + %c32_i32 = arith.constant 32 : i32 + %c262400_i32 = arith.constant 262400 : i32 + %c0_i32 = arith.constant 0 : i32 + %c262144_i32 = arith.constant 262144 : i32 + %3 = "ttkernel.get_noc_addr_xy"(%c0_i32, %c0_i32, %c262144_i32) : (i32, i32, i32) -> !ttkernel.noc_addr + "ttkernel.noc_async_read"(%3, %c262400_i32, %c32_i32) : (!ttkernel.noc_addr, i32, i32) -> () + %4 = "ttkernel.get_noc_addr_xy"(%c0_i32, %c0_i32, %c262208_i32) : (i32, i32, i32) -> !ttkernel.noc_addr + "ttkernel.noc_async_read"(%4, %c262432_i32, %c32_i32) : (!ttkernel.noc_addr, i32, i32) -> () + "ttkernel.noc_async_read_barrier"() : () -> () + "ttkernel.return"() : () -> () +} diff --git a/test/ttmlir/Translate/TTKernel/ttkernel_tensix.mlir b/test/ttmlir/Translate/TTKernel/ttkernel_tensix.mlir new file mode 100644 index 000000000..c53a0e92c --- /dev/null +++ b/test/ttmlir/Translate/TTKernel/ttkernel_tensix.mlir @@ -0,0 +1,23 @@ +// RUN: ttmlir-translate --ttkernel-to-cpp %s +#l1_ = #tt.memory_space + +func.func @ttkernel_tensix(%arg1: !ttkernel.cb, #l1_>, 4096, 1>, + %arg2: !ttkernel.cb, 4096, 1>) -> () { + %c4_i32 = arith.constant 4 : i32 + // CHECK: emitc.call_opaque "untilize_init"[[C:.*]] + "ttkernel.untilize_init"(%arg1, %arg2) : (!ttkernel.cb, #l1_>, 4096, 1>, !ttkernel.cb, 4096, 1>) -> () + // CHECK: emitc.call_opaque "untilize_block"[[C:.*]] + "ttkernel.untilize_block"(%arg1, %c4_i32, %arg2) : (!ttkernel.cb, #l1_>, 4096, 1>, i32, !ttkernel.cb, 4096, 1>) -> () + // CHECK: emitc.call_opaque "cb_pop_front"[[C:.*]] + "ttkernel.cb_pop_front"(%arg1, %c4_i32) : (!ttkernel.cb, #l1_>, 4096, 1>, i32) -> () + // CHECK: emitc.call_opaque "cb_push_back"[[C:.*]] + "ttkernel.cb_push_back"(%arg2, %c4_i32) : (!ttkernel.cb, 4096, 1>, i32) -> () + // CHECK: emitc.call_opaque "untilize_block"[[C:.*]] + "ttkernel.untilize_block"(%arg1, %c4_i32, %arg2) : (!ttkernel.cb, #l1_>, 4096, 1>, i32, !ttkernel.cb, 4096, 1>) -> () + // CHECK: emitc.call_opaque "cb_pop_front"[[C:.*]] + "ttkernel.cb_pop_front"(%arg1, %c4_i32) : (!ttkernel.cb, #l1_>, 4096, 1>, i32) -> () + // CHECK: emitc.call_opaque "cb_push_back"[[C:.*]] + "ttkernel.cb_push_back"(%arg2, %c4_i32) : (!ttkernel.cb, 4096, 1>, i32) -> () + // CHECK: return + "ttkernel.return"() : () -> () +} From 947004e83e7fc26dbf1b8122b0333b3196825f04 Mon Sep 17 00:00:00 2001 From: Bill Teng Date: Thu, 19 Dec 2024 03:56:09 +0000 Subject: [PATCH 08/28] rename emitDispatchOpRegionAsCpp function as it's not specific to Dispatch op --- include/ttmlir/Conversion/TTKernelToEmitC/TTKernelToEmitC.h | 2 +- lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/ttmlir/Conversion/TTKernelToEmitC/TTKernelToEmitC.h b/include/ttmlir/Conversion/TTKernelToEmitC/TTKernelToEmitC.h index bd6d98ea8..fd85e0a7f 100644 --- a/include/ttmlir/Conversion/TTKernelToEmitC/TTKernelToEmitC.h +++ b/include/ttmlir/Conversion/TTKernelToEmitC/TTKernelToEmitC.h @@ -25,7 +25,7 @@ LogicalResult convertTTKernelRegionToEmitC( // Converts given region to EmitC dialect and translates it to C++ code. LogicalResult -emitDispatchOpRegionAsCpp(Region *region, std::string ®ionCpp, +emitOpRegionAsCpp(Region *region, std::string ®ionCpp, const ttkernel::KernelConfigInterface &kernelConfig); // Converts dispatch op's regions to C++ code. diff --git a/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp b/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp index c265e8928..7f12d9c32 100644 --- a/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp +++ b/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp @@ -551,7 +551,7 @@ LogicalResult convertTTKernelRegionToEmitC( } LogicalResult -emitDispatchOpRegionAsCpp(Region *region, std::string ®ionCpp, +emitOpRegionAsCpp(Region *region, std::string ®ionCpp, const ttkernel::KernelConfigInterface &kernelConfig) { OpBuilder builder(region->getContext()); From 4e6ebf3d169a07e282f34488c440b08eaa3023ab Mon Sep 17 00:00:00 2001 From: Bill Teng Date: Fri, 20 Dec 2024 02:26:36 +0000 Subject: [PATCH 09/28] register TT dialect --- lib/Target/TTKernel/TTKernelToCppRegistration.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/Target/TTKernel/TTKernelToCppRegistration.cpp b/lib/Target/TTKernel/TTKernelToCppRegistration.cpp index c8737d197..46651b8f1 100644 --- a/lib/Target/TTKernel/TTKernelToCppRegistration.cpp +++ b/lib/Target/TTKernel/TTKernelToCppRegistration.cpp @@ -6,6 +6,7 @@ #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Tools/mlir-translate/Translation.h" +#include "ttmlir/Dialect/TT/IR/TT.h" #include "ttmlir/Dialect/TTKernel/IR/TTKernel.h" #include "ttmlir/Target/TTKernel/TTKernelToCpp.h" #include "mlir/Dialect/SCF/IR/SCF.h" @@ -23,7 +24,7 @@ void registerTTKernelToCpp() { [](DialectRegistry ®istry) { registry.insert(); + mlir::emitc::EmitCDialect, mlir::func::FuncDialect, mlir::tt::TTDialect>(); }); } From c83e19c7c9a9bbc151501a9378e0c930725225cb Mon Sep 17 00:00:00 2001 From: Bill Teng Date: Fri, 20 Dec 2024 04:32:40 +0000 Subject: [PATCH 10/28] emit C++ with metal kernel boilerplate --- .../TTKernelToEmitC/TTKernelToEmitC.h | 16 +++- lib/Conversion/TTKernelToEmitC/CMakeLists.txt | 1 + .../TTKernelToEmitC/TTKernelToEmitC.cpp | 86 +++++++++++++++---- lib/Target/TTKernel/TTKernelToCpp.cpp | 85 +++++++++++++++--- .../TTKernel/TTKernelToCppRegistration.cpp | 4 +- 5 files changed, 156 insertions(+), 36 deletions(-) diff --git a/include/ttmlir/Conversion/TTKernelToEmitC/TTKernelToEmitC.h b/include/ttmlir/Conversion/TTKernelToEmitC/TTKernelToEmitC.h index fd85e0a7f..728599538 100644 --- a/include/ttmlir/Conversion/TTKernelToEmitC/TTKernelToEmitC.h +++ b/include/ttmlir/Conversion/TTKernelToEmitC/TTKernelToEmitC.h @@ -26,13 +26,27 @@ LogicalResult convertTTKernelRegionToEmitC( // Converts given region to EmitC dialect and translates it to C++ code. LogicalResult emitOpRegionAsCpp(Region *region, std::string ®ionCpp, - const ttkernel::KernelConfigInterface &kernelConfig); + const ttkernel::ThreadType &threadType); + +LogicalResult +emitOpRegionAsCpp(Region *region, llvm::raw_ostream &os, + const ttkernel::ThreadType &threadType); // Converts dispatch op's regions to C++ code. LogicalResult emitDispatchOpRegionsAsCpp(ttmetal::DispatchOp dispatchOp, llvm::SmallVector &cppStrings); + +LogicalResult +emitNocKernelAsCpp( mlir::ModuleOp op, llvm::raw_ostream &os); + +LogicalResult +emitTensixKernelAsCpp( mlir::ModuleOp op, llvm::raw_ostream &os); + +LogicalResult +emitKernelAsCpp( mlir::ModuleOp op, llvm::raw_ostream &os, const ttkernel::ThreadType &threadType); + } // namespace mlir::tt #endif diff --git a/lib/Conversion/TTKernelToEmitC/CMakeLists.txt b/lib/Conversion/TTKernelToEmitC/CMakeLists.txt index 429a694f3..3a263a6b7 100644 --- a/lib/Conversion/TTKernelToEmitC/CMakeLists.txt +++ b/lib/Conversion/TTKernelToEmitC/CMakeLists.txt @@ -11,6 +11,7 @@ add_mlir_conversion_library(TTMLIRTTKernelToEmitC MLIRIR MLIRPass MLIRArithToEmitC + MLIRSCFToEmitC MLIREmitCDialect MLIRTargetCpp MLIRTransformUtils diff --git a/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp b/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp index 7f12d9c32..2bc699640 100644 --- a/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp +++ b/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp @@ -460,16 +460,16 @@ std::unique_ptr<::mlir::Pass> createConvertTTKernelToEmitC() { class ThreadConfigHelper { public: ThreadConfigHelper(OpBuilder *builder, Location loc, - ttkernel::KernelConfigInterface kernelConfig) - : builder(builder), loc(loc), kernelConfig(kernelConfig) { + ttkernel::ThreadType threadType) + : builder(builder), loc(loc), threadType(threadType) { builder->create(loc, "cstdint", /*isStandard=*/true); - if (kernelConfig.getThreadType() == ttkernel::ThreadType::Noc) { + if (threadType == ttkernel::ThreadType::Noc) { builder->create(loc, "dataflow_api.h", /*isStandard=*/false); } - if (kernelConfig.getThreadType() == ttkernel::ThreadType::Tensix) { + if (threadType == ttkernel::ThreadType::Tensix) { builder->create(loc, "llk_defs.h", /*isStandard=*/false); builder->create(loc, "compute_kernel_api/common.h", @@ -510,11 +510,11 @@ class ThreadConfigHelper { builder->create(loc, "compute_kernel_api/reduce.h", /*isStandard=*/false); builder->create(loc, "namespace NAMESPACE {"); - } + } } ~ThreadConfigHelper() { - if (kernelConfig.getThreadType() == ttkernel::ThreadType::Tensix) { + if (threadType == ttkernel::ThreadType::Tensix) { builder->create(loc, "void MAIN { kernel_main(); }"); builder->create(loc, "}"); // close namespace NAMESPACE @@ -522,16 +522,18 @@ class ThreadConfigHelper { } private: + OpBuilder *builder; Location loc; ttkernel::KernelConfigInterface kernelConfig; + ttkernel::ThreadType threadType; }; LogicalResult convertTTKernelRegionToEmitC( OpBuilder &builder, Region *region, - const ttkernel::KernelConfigInterface &kernelConfig) { + const ttkernel::ThreadType &threadType) { ThreadConfigHelper threadConfigHelper(&builder, region->getLoc(), - kernelConfig); + threadType); auto funcOp = builder.create( region->getLoc(), "kernel_main", @@ -552,20 +554,33 @@ LogicalResult convertTTKernelRegionToEmitC( LogicalResult emitOpRegionAsCpp(Region *region, std::string ®ionCpp, - const ttkernel::KernelConfigInterface &kernelConfig) { - OpBuilder builder(region->getContext()); + const ttkernel::ThreadType &threadType) { + + llvm::raw_string_ostream os(regionCpp); + return emitOpRegionAsCpp(region, os, threadType); +} + +LogicalResult +emitOpRegionAsCpp(Region *region, llvm::raw_ostream &os, + const ttkernel::ThreadType &threadType) { + // We must load the EmitC dialect before we can emit any EmitC code. This + // dialect won't be loaded by MLIR until pass manager starts a pass that + // depends on it. Because we want to emit EmitC code before that, we need to + // load it here. + region->getContext()->getOrLoadDialect(); + + OpBuilder builder(region->getContext()); // We will wrap everything in a module op so that we can run the // translation. auto moduleWrapper = builder.create(region->getLoc(), "module_wrapper"); builder.setInsertionPointToStart(moduleWrapper.getBody()); - if (convertTTKernelRegionToEmitC(builder, region, kernelConfig).failed()) { + if (convertTTKernelRegionToEmitC(builder, region, threadType).failed()) { return failure(); } - llvm::raw_string_ostream os(regionCpp); if (emitc::translateToCpp(moduleWrapper, os).failed()) { return failure(); } @@ -579,17 +594,13 @@ emitDispatchOpRegionsAsCpp(ttmetal::DispatchOp dispatchOp, assert(cppStrings.size() == dispatchOp.getNumRegions() && "cppStrings size must match number of regions"); - // We must load the EmitC dialect before we can emit any EmitC code. This - // dialect won't be loaded by MLIR until pass manager starts a pass that - // depends on it. Because we want to emit EmitC code before that, we need to - // load it here. - dispatchOp.getContext()->getOrLoadDialect(); + for (auto ® : dispatchOp->getRegions()) { auto kernelConfig = mlir::cast( dispatchOp.getKernelConfigs()[reg.getRegionNumber()]); - if (emitDispatchOpRegionAsCpp(®, cppStrings[reg.getRegionNumber()], - kernelConfig) + if (emitOpRegionAsCpp(®, cppStrings[reg.getRegionNumber()], + kernelConfig.getThreadType()) .failed()) { return llvm::failure(); } @@ -598,4 +609,41 @@ emitDispatchOpRegionsAsCpp(ttmetal::DispatchOp dispatchOp, return success(); } +LogicalResult +emitNocKernelAsCpp(mlir::ModuleOp op, llvm::raw_ostream &os) +{ + return emitKernelAsCpp(op, os, ttkernel::ThreadType::Noc); +} + +LogicalResult +emitTensixKernelAsCpp(mlir::ModuleOp op, llvm::raw_ostream &os) +{ + return emitKernelAsCpp(op, os, ttkernel::ThreadType::Tensix); +} + +LogicalResult +emitKernelAsCpp(mlir::ModuleOp op, llvm::raw_ostream &os, const ttkernel::ThreadType &threadType ) +{ + std::vector ops; + op->walk([&](func::FuncOp entry) { + ops.push_back(entry); + }); + + // PassManager pm(ops[0]->getContext()); + // pm.addPass(createConvertTTKernelToEmitC()); + + // if (pm.run(ops[0]).failed()) { + // return failure(); + // } + + for (auto ® : ops[0]->getRegions()) { + if (emitOpRegionAsCpp(®, os, + threadType) + .failed()) { + return llvm::failure(); + } + } + return llvm::success(); +} + } // namespace mlir::tt diff --git a/lib/Target/TTKernel/TTKernelToCpp.cpp b/lib/Target/TTKernel/TTKernelToCpp.cpp index ed75a2ce3..aef121294 100644 --- a/lib/Target/TTKernel/TTKernelToCpp.cpp +++ b/lib/Target/TTKernel/TTKernelToCpp.cpp @@ -20,30 +20,87 @@ #include "mlir/Conversion/ArithToEmitC/ArithToEmitCPass.h" #include "mlir/Conversion/SCFToEmitC/SCFToEmitC.h" #include "mlir/Conversion/FuncToEmitC/FuncToEmitCPass.h" +#include namespace mlir::tt::ttkernel { +/// This pass illustrates the IR nesting through printing. +struct Printer{ + + /// The three methods below are mutually recursive and follow the nesting of + /// the IR: operation->region->block->operation->... + public: + void printOperation(Operation *op) { + // Print the operation itself and some of its properties + printIndent() << "visiting op: '" << op->getName() << "' with " + << op->getNumOperands() << " operands and " + << op->getNumResults() << " results\n"; + // Print the operation attributes + if (!op->getAttrs().empty()) { + printIndent() << op->getAttrs().size() << " attributes:\n"; + for (NamedAttribute attr : op->getAttrs()) + printIndent() << " - '" << attr.getName().getValue() << "' : '" + << attr.getValue() << "'\n"; + } + + // Recurse into each of the regions attached to the operation. + printIndent() << " " << op->getNumRegions() << " nested regions:\n"; + auto indent = pushIndent(); + for (Region ®ion : op->getRegions()) + printRegion(region); + } + + void printRegion(Region ®ion) { + // A region does not hold anything by itself other than a list of blocks. + printIndent() << "Region with " << region.getBlocks().size() + << " blocks:\n"; + auto indent = pushIndent(); + for (Block &block : region.getBlocks()) + printBlock(block); + } + + void printBlock(Block &block) { + // Print the block intrinsics properties (basically: argument list) + printIndent() + << "Block with " << block.getNumArguments() << " arguments, " + << block.getNumSuccessors() + << " successors, and " + // Note, this `.size()` is traversing a linked-list and is O(n). + << block.getOperations().size() << " operations\n"; + + // Block main role is to hold a list of Operations: let's recurse. + auto indent = pushIndent(); + for (Operation &op : block.getOperations()) + printOperation(&op); + } + + /// Manages the indentation as we traverse the IR nesting. + int indent; + struct IdentRAII { + int &indent; + IdentRAII(int &indent) : indent(indent) {} + ~IdentRAII() { --indent; } + }; + void resetIndent() { indent = 0; } + IdentRAII pushIndent() { return IdentRAII(++indent); } + + llvm::raw_ostream &printIndent() { + for (int i = 0; i < indent; ++i) + llvm::outs() << " "; + return llvm::outs(); + } +}; + static llvm::LogicalResult translateModuleToCpp( Operation *op, llvm::raw_ostream &os) { ModuleOp module = dyn_cast(op); assert(module && "Expected ModuleOp as top level operation"); - mlir::PassManager pm(op->getContext()); + // return mlir::tt::emitTensixKernelAsCpp(module, os); + return mlir::tt::emitNocKernelAsCpp(module, os); - pm.addPass(mlir::tt::createConvertTTKernelToEmitC()); - pm.addPass(mlir::createConvertArithToEmitC()); - pm.addPass(mlir::createSCFToEmitC()); - pm.addPass(mlir::createConvertFuncToEmitC()); - - if (mlir::failed(pm.run(op))) { - return llvm::failure(); - } - - if ( mlir::failed( mlir::emitc::translateToCpp(op, os) ) ) { - return llvm::failure(); - } - return success(); } + LogicalResult translateTTKernelToCpp( Operation *op, llvm::raw_ostream &os) { return translateModuleToCpp(op, os); diff --git a/lib/Target/TTKernel/TTKernelToCppRegistration.cpp b/lib/Target/TTKernel/TTKernelToCppRegistration.cpp index 46651b8f1..525b70339 100644 --- a/lib/Target/TTKernel/TTKernelToCppRegistration.cpp +++ b/lib/Target/TTKernel/TTKernelToCppRegistration.cpp @@ -10,7 +10,7 @@ #include "ttmlir/Dialect/TTKernel/IR/TTKernel.h" #include "ttmlir/Target/TTKernel/TTKernelToCpp.h" #include "mlir/Dialect/SCF/IR/SCF.h" - +#include using namespace mlir; namespace mlir::tt::ttkernel { @@ -24,7 +24,7 @@ void registerTTKernelToCpp() { [](DialectRegistry ®istry) { registry.insert(); + mlir::emitc::EmitCDialect, mlir::func::FuncDialect, mlir::tt::TTDialect, mlir::memref::MemRefDialect>(); }); } From cf0474e9c67fdb546163670c9278a685771b8ccb Mon Sep 17 00:00:00 2001 From: Bill Teng Date: Sat, 21 Dec 2024 00:01:54 +0000 Subject: [PATCH 11/28] cleanup --- .../TTKernelToEmitC/TTKernelToEmitC.cpp | 21 +++++++------------ 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp b/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp index 2bc699640..82abd9bd2 100644 --- a/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp +++ b/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp @@ -624,23 +624,18 @@ emitTensixKernelAsCpp(mlir::ModuleOp op, llvm::raw_ostream &os) LogicalResult emitKernelAsCpp(mlir::ModuleOp op, llvm::raw_ostream &os, const ttkernel::ThreadType &threadType ) { - std::vector ops; + std::vector ops; op->walk([&](func::FuncOp entry) { ops.push_back(entry); }); - // PassManager pm(ops[0]->getContext()); - // pm.addPass(createConvertTTKernelToEmitC()); - - // if (pm.run(ops[0]).failed()) { - // return failure(); - // } - - for (auto ® : ops[0]->getRegions()) { - if (emitOpRegionAsCpp(®, os, - threadType) - .failed()) { - return llvm::failure(); + for (const auto &op : ops){ + for (auto ® : op->getRegions()) { + if (emitOpRegionAsCpp(®, os, + threadType) + .failed()) { + return llvm::failure(); + } } } return llvm::success(); From ee4776ca0971ff8d63f11f606ddce51970b0b83c Mon Sep 17 00:00:00 2001 From: Bill Teng Date: Sat, 28 Dec 2024 05:41:59 +0000 Subject: [PATCH 12/28] add python bindings to generate C++ for Noc and Tensix kernels --- .../TTKernelToEmitC/TTKernelToEmitC.h | 6 ------ .../ttmlir/Target/TTKernel/TTKernelToCpp.h | 7 ++++++- .../TTKernelToEmitC/TTKernelToEmitC.cpp | 12 ----------- lib/Target/TTKernel/TTKernelToCpp.cpp | 10 ++++----- .../TTKernel/TTKernelToCppRegistration.cpp | 21 ++++++++++++++++--- python/Passes.cpp | 21 ++++++++++++++++--- 6 files changed, 47 insertions(+), 30 deletions(-) diff --git a/include/ttmlir/Conversion/TTKernelToEmitC/TTKernelToEmitC.h b/include/ttmlir/Conversion/TTKernelToEmitC/TTKernelToEmitC.h index 728599538..1cb1dc103 100644 --- a/include/ttmlir/Conversion/TTKernelToEmitC/TTKernelToEmitC.h +++ b/include/ttmlir/Conversion/TTKernelToEmitC/TTKernelToEmitC.h @@ -38,12 +38,6 @@ emitDispatchOpRegionsAsCpp(ttmetal::DispatchOp dispatchOp, llvm::SmallVector &cppStrings); -LogicalResult -emitNocKernelAsCpp( mlir::ModuleOp op, llvm::raw_ostream &os); - -LogicalResult -emitTensixKernelAsCpp( mlir::ModuleOp op, llvm::raw_ostream &os); - LogicalResult emitKernelAsCpp( mlir::ModuleOp op, llvm::raw_ostream &os, const ttkernel::ThreadType &threadType); diff --git a/include/ttmlir/Target/TTKernel/TTKernelToCpp.h b/include/ttmlir/Target/TTKernel/TTKernelToCpp.h index 665eabdae..914dfed93 100644 --- a/include/ttmlir/Target/TTKernel/TTKernelToCpp.h +++ b/include/ttmlir/Target/TTKernel/TTKernelToCpp.h @@ -10,10 +10,15 @@ namespace mlir::tt::ttkernel { +enum class ThreadType : uint32_t; + // Translates a TTKernel operation to C++ and writes it to the given // stream. + LogicalResult translateTTKernelToCpp( - Operation *op, llvm::raw_ostream &os); + Operation *op, llvm::raw_ostream &os, const ThreadType &threadType); + + } // namespace mlir::tt::ttkernel #endif diff --git a/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp b/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp index 82abd9bd2..71f0742b4 100644 --- a/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp +++ b/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp @@ -609,18 +609,6 @@ emitDispatchOpRegionsAsCpp(ttmetal::DispatchOp dispatchOp, return success(); } -LogicalResult -emitNocKernelAsCpp(mlir::ModuleOp op, llvm::raw_ostream &os) -{ - return emitKernelAsCpp(op, os, ttkernel::ThreadType::Noc); -} - -LogicalResult -emitTensixKernelAsCpp(mlir::ModuleOp op, llvm::raw_ostream &os) -{ - return emitKernelAsCpp(op, os, ttkernel::ThreadType::Tensix); -} - LogicalResult emitKernelAsCpp(mlir::ModuleOp op, llvm::raw_ostream &os, const ttkernel::ThreadType &threadType ) { diff --git a/lib/Target/TTKernel/TTKernelToCpp.cpp b/lib/Target/TTKernel/TTKernelToCpp.cpp index aef121294..a828f0917 100644 --- a/lib/Target/TTKernel/TTKernelToCpp.cpp +++ b/lib/Target/TTKernel/TTKernelToCpp.cpp @@ -92,18 +92,18 @@ struct Printer{ }; static llvm::LogicalResult translateModuleToCpp( - Operation *op, llvm::raw_ostream &os) { + Operation *op, llvm::raw_ostream &os, const ttkernel::ThreadType &threadType) { + ModuleOp module = dyn_cast(op); assert(module && "Expected ModuleOp as top level operation"); // return mlir::tt::emitTensixKernelAsCpp(module, os); - return mlir::tt::emitNocKernelAsCpp(module, os); + return mlir::tt::emitKernelAsCpp(module, os, threadType); } - LogicalResult translateTTKernelToCpp( - Operation *op, llvm::raw_ostream &os) { - return translateModuleToCpp(op, os); + Operation *op, llvm::raw_ostream &os, const ttkernel::ThreadType &threadType) { + return translateModuleToCpp(op, os, threadType); } } // namespace mlir::tt::ttkernel diff --git a/lib/Target/TTKernel/TTKernelToCppRegistration.cpp b/lib/Target/TTKernel/TTKernelToCppRegistration.cpp index 525b70339..a255d240e 100644 --- a/lib/Target/TTKernel/TTKernelToCppRegistration.cpp +++ b/lib/Target/TTKernel/TTKernelToCppRegistration.cpp @@ -8,6 +8,7 @@ #include "ttmlir/Dialect/TT/IR/TT.h" #include "ttmlir/Dialect/TTKernel/IR/TTKernel.h" +#include "ttmlir/Dialect/TTKernel/IR/TTKernelOpsTypes.h" #include "ttmlir/Target/TTKernel/TTKernelToCpp.h" #include "mlir/Dialect/SCF/IR/SCF.h" #include @@ -15,11 +16,25 @@ using namespace mlir; namespace mlir::tt::ttkernel { -void registerTTKernelToCpp() { +//TODO: Should generalize this to read kernel type from Attribute? +void registerTensixKernelToCpp() { TranslateFromMLIRRegistration reg( - "ttkernel-to-cpp", "translate ttmetal dialect to flatbuffer", + "tensixkernel-to-cpp", "translate tensix kernel to C++", [](Operation *op, llvm::raw_ostream &os) -> LogicalResult { - return translateTTKernelToCpp(op, os); + return translateTTKernelToCpp(op, os, tt::ttkernel::ThreadType::Tensix); + }, + [](DialectRegistry ®istry) { + registry.insert(); + }); +} + +void registerNocKernelToCpp() { + TranslateFromMLIRRegistration reg( + "nockernel-to-cpp", "translate noc kernel to C++", + [](Operation *op, llvm::raw_ostream &os) -> LogicalResult { + return translateTTKernelToCpp(op, os, tt::ttkernel::ThreadType::Noc); }, [](DialectRegistry ®istry) { registry.insert(m, "DataType") .value("Float32", ::tt::target::DataType::Float32) From ae8a70a9353e1ee3453aa2c84d004d441a14e099 Mon Sep 17 00:00:00 2001 From: Bill Teng Date: Sat, 28 Dec 2024 06:21:33 +0000 Subject: [PATCH 13/28] comment out printer --- lib/Target/TTKernel/TTKernelToCpp.cpp | 130 +++++++++++++------------- 1 file changed, 65 insertions(+), 65 deletions(-) diff --git a/lib/Target/TTKernel/TTKernelToCpp.cpp b/lib/Target/TTKernel/TTKernelToCpp.cpp index a828f0917..54bf111db 100644 --- a/lib/Target/TTKernel/TTKernelToCpp.cpp +++ b/lib/Target/TTKernel/TTKernelToCpp.cpp @@ -25,71 +25,71 @@ namespace mlir::tt::ttkernel { /// This pass illustrates the IR nesting through printing. -struct Printer{ - - /// The three methods below are mutually recursive and follow the nesting of - /// the IR: operation->region->block->operation->... - public: - void printOperation(Operation *op) { - // Print the operation itself and some of its properties - printIndent() << "visiting op: '" << op->getName() << "' with " - << op->getNumOperands() << " operands and " - << op->getNumResults() << " results\n"; - // Print the operation attributes - if (!op->getAttrs().empty()) { - printIndent() << op->getAttrs().size() << " attributes:\n"; - for (NamedAttribute attr : op->getAttrs()) - printIndent() << " - '" << attr.getName().getValue() << "' : '" - << attr.getValue() << "'\n"; - } - - // Recurse into each of the regions attached to the operation. - printIndent() << " " << op->getNumRegions() << " nested regions:\n"; - auto indent = pushIndent(); - for (Region ®ion : op->getRegions()) - printRegion(region); - } - - void printRegion(Region ®ion) { - // A region does not hold anything by itself other than a list of blocks. - printIndent() << "Region with " << region.getBlocks().size() - << " blocks:\n"; - auto indent = pushIndent(); - for (Block &block : region.getBlocks()) - printBlock(block); - } - - void printBlock(Block &block) { - // Print the block intrinsics properties (basically: argument list) - printIndent() - << "Block with " << block.getNumArguments() << " arguments, " - << block.getNumSuccessors() - << " successors, and " - // Note, this `.size()` is traversing a linked-list and is O(n). - << block.getOperations().size() << " operations\n"; - - // Block main role is to hold a list of Operations: let's recurse. - auto indent = pushIndent(); - for (Operation &op : block.getOperations()) - printOperation(&op); - } - - /// Manages the indentation as we traverse the IR nesting. - int indent; - struct IdentRAII { - int &indent; - IdentRAII(int &indent) : indent(indent) {} - ~IdentRAII() { --indent; } - }; - void resetIndent() { indent = 0; } - IdentRAII pushIndent() { return IdentRAII(++indent); } - - llvm::raw_ostream &printIndent() { - for (int i = 0; i < indent; ++i) - llvm::outs() << " "; - return llvm::outs(); - } -}; +// struct Printer{ + +// /// The three methods below are mutually recursive and follow the nesting of +// /// the IR: operation->region->block->operation->... +// public: +// void printOperation(Operation *op) { +// // Print the operation itself and some of its properties +// printIndent() << "visiting op: '" << op->getName() << "' with " +// << op->getNumOperands() << " operands and " +// << op->getNumResults() << " results\n"; +// // Print the operation attributes +// if (!op->getAttrs().empty()) { +// printIndent() << op->getAttrs().size() << " attributes:\n"; +// for (NamedAttribute attr : op->getAttrs()) +// printIndent() << " - '" << attr.getName().getValue() << "' : '" +// << attr.getValue() << "'\n"; +// } + +// // Recurse into each of the regions attached to the operation. +// printIndent() << " " << op->getNumRegions() << " nested regions:\n"; +// auto indent = pushIndent(); +// for (Region ®ion : op->getRegions()) +// printRegion(region); +// } + +// void printRegion(Region ®ion) { +// // A region does not hold anything by itself other than a list of blocks. +// printIndent() << "Region with " << region.getBlocks().size() +// << " blocks:\n"; +// auto indent = pushIndent(); +// for (Block &block : region.getBlocks()) +// printBlock(block); +// } + +// void printBlock(Block &block) { +// // Print the block intrinsics properties (basically: argument list) +// printIndent() +// << "Block with " << block.getNumArguments() << " arguments, " +// << block.getNumSuccessors() +// << " successors, and " +// // Note, this `.size()` is traversing a linked-list and is O(n). +// << block.getOperations().size() << " operations\n"; + +// // Block main role is to hold a list of Operations: let's recurse. +// auto indent = pushIndent(); +// for (Operation &op : block.getOperations()) +// printOperation(&op); +// } + +// /// Manages the indentation as we traverse the IR nesting. +// int indent; +// struct IdentRAII { +// int &indent; +// IdentRAII(int &indent) : indent(indent) {} +// ~IdentRAII() { --indent; } +// }; +// void resetIndent() { indent = 0; } +// IdentRAII pushIndent() { return IdentRAII(++indent); } + +// llvm::raw_ostream &printIndent() { +// for (int i = 0; i < indent; ++i) +// llvm::outs() << " "; +// return llvm::outs(); +// } +// }; static llvm::LogicalResult translateModuleToCpp( Operation *op, llvm::raw_ostream &os, const ttkernel::ThreadType &threadType) { From 1a27f16adc0bfe6a2a876652e709bd5cbcc7c0c9 Mon Sep 17 00:00:00 2001 From: Bill Teng Date: Sat, 28 Dec 2024 06:21:50 +0000 Subject: [PATCH 14/28] fix linker error --- tools/ttmlir-translate/ttmlir-translate.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/ttmlir-translate/ttmlir-translate.cpp b/tools/ttmlir-translate/ttmlir-translate.cpp index 7c447fd36..88e0b317d 100644 --- a/tools/ttmlir-translate/ttmlir-translate.cpp +++ b/tools/ttmlir-translate/ttmlir-translate.cpp @@ -18,7 +18,8 @@ void registerTTMetalToFlatbuffer(); } // namespace mlir::tt::ttmetal namespace mlir::tt::ttkernel { -void registerTTKernelToCpp(); +void registerTensixKernelToCpp(); +void registerNocKernelToCpp(); } // namespace mlir::tt::ttkernel // Place to register all the custom translations @@ -26,7 +27,9 @@ static void registerCustomTranslations() { static bool initOnce = []() { mlir::tt::ttnn::registerTTNNToFlatbuffer(); mlir::tt::ttmetal::registerTTMetalToFlatbuffer(); - mlir::tt::ttkernel::registerTTKernelToCpp(); + mlir::tt::ttkernel::registerNocKernelToCpp(); + mlir::tt::ttkernel::registerTensixKernelToCpp(); + return true; }(); (void)initOnce; From 63b6978ffd4f31791f1daa2be2e5b1249310d319 Mon Sep 17 00:00:00 2001 From: Bill Teng Date: Sat, 28 Dec 2024 06:26:50 +0000 Subject: [PATCH 15/28] update ttmlir-translate options in mlir test files --- test/ttmlir/Translate/TTKernel/ttkernel_noc.mlir | 2 +- test/ttmlir/Translate/TTKernel/ttkernel_tensix.mlir | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/ttmlir/Translate/TTKernel/ttkernel_noc.mlir b/test/ttmlir/Translate/TTKernel/ttkernel_noc.mlir index b34d28eb5..58a46fdf9 100644 --- a/test/ttmlir/Translate/TTKernel/ttkernel_noc.mlir +++ b/test/ttmlir/Translate/TTKernel/ttkernel_noc.mlir @@ -1,4 +1,4 @@ -// RUN: ttmlir-translate --ttkernel-to-cpp %s +// RUN: ttmlir-translate --nockernel-to-cpp %s func.func @ttkernel_noc() -> () { %c262432_i32 = arith.constant 262432 : i32 diff --git a/test/ttmlir/Translate/TTKernel/ttkernel_tensix.mlir b/test/ttmlir/Translate/TTKernel/ttkernel_tensix.mlir index c53a0e92c..365ca7282 100644 --- a/test/ttmlir/Translate/TTKernel/ttkernel_tensix.mlir +++ b/test/ttmlir/Translate/TTKernel/ttkernel_tensix.mlir @@ -1,4 +1,4 @@ -// RUN: ttmlir-translate --ttkernel-to-cpp %s +// RUN: ttmlir-translate --tensixkernel-to-cpp %s #l1_ = #tt.memory_space func.func @ttkernel_tensix(%arg1: !ttkernel.cb, #l1_>, 4096, 1>, From e23a52067d948edb04ca1d9db3640662690e3f67 Mon Sep 17 00:00:00 2001 From: Bill Teng Date: Sat, 28 Dec 2024 06:38:27 +0000 Subject: [PATCH 16/28] remove KernelConfigInterface member variable --- lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp b/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp index 71f0742b4..e068d58db 100644 --- a/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp +++ b/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp @@ -525,7 +525,6 @@ class ThreadConfigHelper { OpBuilder *builder; Location loc; - ttkernel::KernelConfigInterface kernelConfig; ttkernel::ThreadType threadType; }; From 1b5033a2dab311cf98ec679f09fd5c68c383e3aa Mon Sep 17 00:00:00 2001 From: Bill Teng Date: Sat, 28 Dec 2024 06:57:20 +0000 Subject: [PATCH 17/28] remove debug code --- lib/Target/TTKernel/TTKernelToCpp.cpp | 67 --------------------------- 1 file changed, 67 deletions(-) diff --git a/lib/Target/TTKernel/TTKernelToCpp.cpp b/lib/Target/TTKernel/TTKernelToCpp.cpp index 54bf111db..dfcc2ba49 100644 --- a/lib/Target/TTKernel/TTKernelToCpp.cpp +++ b/lib/Target/TTKernel/TTKernelToCpp.cpp @@ -24,73 +24,6 @@ namespace mlir::tt::ttkernel { -/// This pass illustrates the IR nesting through printing. -// struct Printer{ - -// /// The three methods below are mutually recursive and follow the nesting of -// /// the IR: operation->region->block->operation->... -// public: -// void printOperation(Operation *op) { -// // Print the operation itself and some of its properties -// printIndent() << "visiting op: '" << op->getName() << "' with " -// << op->getNumOperands() << " operands and " -// << op->getNumResults() << " results\n"; -// // Print the operation attributes -// if (!op->getAttrs().empty()) { -// printIndent() << op->getAttrs().size() << " attributes:\n"; -// for (NamedAttribute attr : op->getAttrs()) -// printIndent() << " - '" << attr.getName().getValue() << "' : '" -// << attr.getValue() << "'\n"; -// } - -// // Recurse into each of the regions attached to the operation. -// printIndent() << " " << op->getNumRegions() << " nested regions:\n"; -// auto indent = pushIndent(); -// for (Region ®ion : op->getRegions()) -// printRegion(region); -// } - -// void printRegion(Region ®ion) { -// // A region does not hold anything by itself other than a list of blocks. -// printIndent() << "Region with " << region.getBlocks().size() -// << " blocks:\n"; -// auto indent = pushIndent(); -// for (Block &block : region.getBlocks()) -// printBlock(block); -// } - -// void printBlock(Block &block) { -// // Print the block intrinsics properties (basically: argument list) -// printIndent() -// << "Block with " << block.getNumArguments() << " arguments, " -// << block.getNumSuccessors() -// << " successors, and " -// // Note, this `.size()` is traversing a linked-list and is O(n). -// << block.getOperations().size() << " operations\n"; - -// // Block main role is to hold a list of Operations: let's recurse. -// auto indent = pushIndent(); -// for (Operation &op : block.getOperations()) -// printOperation(&op); -// } - -// /// Manages the indentation as we traverse the IR nesting. -// int indent; -// struct IdentRAII { -// int &indent; -// IdentRAII(int &indent) : indent(indent) {} -// ~IdentRAII() { --indent; } -// }; -// void resetIndent() { indent = 0; } -// IdentRAII pushIndent() { return IdentRAII(++indent); } - -// llvm::raw_ostream &printIndent() { -// for (int i = 0; i < indent; ++i) -// llvm::outs() << " "; -// return llvm::outs(); -// } -// }; - static llvm::LogicalResult translateModuleToCpp( Operation *op, llvm::raw_ostream &os, const ttkernel::ThreadType &threadType) { From 4f96d52180c3c5393bcd6d43b1bb2c1635d4dec0 Mon Sep 17 00:00:00 2001 From: Bill Teng Date: Sat, 28 Dec 2024 08:21:17 +0000 Subject: [PATCH 18/28] remove python pass, change deferred to a later PR --- python/Passes.cpp | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/python/Passes.cpp b/python/Passes.cpp index 36b5c9563..43709426d 100644 --- a/python/Passes.cpp +++ b/python/Passes.cpp @@ -4,11 +4,9 @@ #include "mlir/InitAllTranslations.h" #include "ttmlir/Bindings/Python/TTMLIRModule.h" -#include "ttmlir/Dialect/TTKernel/IR/TTKernelOpsTypes.h" #include "ttmlir/RegisterAll.h" #include "ttmlir/Target/TTMetal/TTMetalToFlatbuffer.h" #include "ttmlir/Target/TTNN/TTNNToFlatbuffer.h" -#include "ttmlir/Target/TTKernel/TTKernelToCpp.h" #include PYBIND11_MAKE_OPAQUE(std::shared_ptr); @@ -203,36 +201,6 @@ void populatePassesModule(py::module &m) { } }); - m.def("nockernel_to_cpp", - [](MlirModule module, std::string &filepath) { - mlir::Operation *moduleOp = unwrap(mlirModuleGetOperation(module)); - std::error_code fileError; - llvm::raw_fd_ostream file(filepath, fileError); - if (fileError) { - throw std::runtime_error("Failed to open file: " + filepath + - ". Error: " + fileError.message()); - } - if (mlir::failed(mlir::tt::ttkernel::translateTTKernelToCpp(moduleOp, file, tt::ttkernel::ThreadType::Noc))) { - throw std::runtime_error("Failed to write C++ to file: " + - filepath); - } - }); - m.def("tensixkernel_to_cpp", - [](MlirModule module, std::string &filepath) { - mlir::Operation *moduleOp = unwrap(mlirModuleGetOperation(module)); - std::error_code fileError; - llvm::raw_fd_ostream file(filepath, fileError); - if (fileError) { - throw std::runtime_error("Failed to open file: " + filepath + - ". Error: " + fileError.message()); - } - if (mlir::failed(mlir::tt::ttkernel::translateTTKernelToCpp(moduleOp, file, tt::ttkernel::ThreadType::Tensix))) { - throw std::runtime_error("Failed to write C++ to file: " + - filepath); - } - }); - - py::enum_<::tt::target::DataType>(m, "DataType") .value("Float32", ::tt::target::DataType::Float32) .value("Float16", ::tt::target::DataType::Float16); From c8729d80bb1e699d531be96c6f499b2fc96d48f2 Mon Sep 17 00:00:00 2001 From: Bill Teng Date: Sat, 28 Dec 2024 08:29:10 +0000 Subject: [PATCH 19/28] cleanup --- lib/Target/TTKernel/TTKernelToCpp.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/Target/TTKernel/TTKernelToCpp.cpp b/lib/Target/TTKernel/TTKernelToCpp.cpp index dfcc2ba49..ce33e790f 100644 --- a/lib/Target/TTKernel/TTKernelToCpp.cpp +++ b/lib/Target/TTKernel/TTKernelToCpp.cpp @@ -29,7 +29,6 @@ static llvm::LogicalResult translateModuleToCpp( ModuleOp module = dyn_cast(op); assert(module && "Expected ModuleOp as top level operation"); - // return mlir::tt::emitTensixKernelAsCpp(module, os); return mlir::tt::emitKernelAsCpp(module, os, threadType); } From 726d1dad470b2d18abc5bae083c701617a86037a Mon Sep 17 00:00:00 2001 From: Bill Teng Date: Wed, 1 Jan 2025 02:51:19 +0000 Subject: [PATCH 20/28] clang format --- .../TTKernelToEmitC/TTKernelToEmitC.h | 21 ++++------ .../ttmlir/Target/TTKernel/TTKernelToCpp.h | 5 +-- .../TTKernelToEmitC/TTKernelToEmitC.cpp | 41 +++++++------------ lib/Target/TTKernel/TTKernelToCpp.cpp | 28 ++++++------- .../TTKernel/TTKernelToCppRegistration.cpp | 20 +++++---- 5 files changed, 52 insertions(+), 63 deletions(-) diff --git a/include/ttmlir/Conversion/TTKernelToEmitC/TTKernelToEmitC.h b/include/ttmlir/Conversion/TTKernelToEmitC/TTKernelToEmitC.h index 1cb1dc103..7b8a92c37 100644 --- a/include/ttmlir/Conversion/TTKernelToEmitC/TTKernelToEmitC.h +++ b/include/ttmlir/Conversion/TTKernelToEmitC/TTKernelToEmitC.h @@ -19,27 +19,24 @@ namespace mlir::tt { // Runs a conversion pass to EmitC dialect on a func op containing given // region's body. Also, it adds boilerplate code such as includes and namespace // declarations. -LogicalResult convertTTKernelRegionToEmitC( - OpBuilder &builder, Region *region, - const ttkernel::KernelConfigInterface &kernelConfig); +LogicalResult +convertTTKernelRegionToEmitC(OpBuilder &builder, Region *region, + const ttkernel::ThreadType &threadType); // Converts given region to EmitC dialect and translates it to C++ code. -LogicalResult -emitOpRegionAsCpp(Region *region, std::string ®ionCpp, - const ttkernel::ThreadType &threadType); +LogicalResult emitOpRegionAsCpp(Region *region, std::string ®ionCpp, + const ttkernel::ThreadType &threadType); -LogicalResult -emitOpRegionAsCpp(Region *region, llvm::raw_ostream &os, - const ttkernel::ThreadType &threadType); +LogicalResult emitOpRegionAsCpp(Region *region, llvm::raw_ostream &os, + const ttkernel::ThreadType &threadType); // Converts dispatch op's regions to C++ code. LogicalResult emitDispatchOpRegionsAsCpp(ttmetal::DispatchOp dispatchOp, llvm::SmallVector &cppStrings); - -LogicalResult -emitKernelAsCpp( mlir::ModuleOp op, llvm::raw_ostream &os, const ttkernel::ThreadType &threadType); +LogicalResult emitKernelAsCpp(mlir::ModuleOp op, llvm::raw_ostream &os, + const ttkernel::ThreadType &threadType); } // namespace mlir::tt diff --git a/include/ttmlir/Target/TTKernel/TTKernelToCpp.h b/include/ttmlir/Target/TTKernel/TTKernelToCpp.h index 914dfed93..d6b8aa3ec 100644 --- a/include/ttmlir/Target/TTKernel/TTKernelToCpp.h +++ b/include/ttmlir/Target/TTKernel/TTKernelToCpp.h @@ -15,9 +15,8 @@ enum class ThreadType : uint32_t; // Translates a TTKernel operation to C++ and writes it to the given // stream. -LogicalResult translateTTKernelToCpp( - Operation *op, llvm::raw_ostream &os, const ThreadType &threadType); - +LogicalResult translateTTKernelToCpp(Operation *op, llvm::raw_ostream &os, + const ThreadType &threadType); } // namespace mlir::tt::ttkernel diff --git a/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp b/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp index e068d58db..fff91bc6c 100644 --- a/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp +++ b/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp @@ -510,7 +510,7 @@ class ThreadConfigHelper { builder->create(loc, "compute_kernel_api/reduce.h", /*isStandard=*/false); builder->create(loc, "namespace NAMESPACE {"); - } + } } ~ThreadConfigHelper() { @@ -522,17 +522,15 @@ class ThreadConfigHelper { } private: - OpBuilder *builder; Location loc; ttkernel::ThreadType threadType; }; -LogicalResult convertTTKernelRegionToEmitC( - OpBuilder &builder, Region *region, - const ttkernel::ThreadType &threadType) { - ThreadConfigHelper threadConfigHelper(&builder, region->getLoc(), - threadType); +LogicalResult +convertTTKernelRegionToEmitC(OpBuilder &builder, Region *region, + const ttkernel::ThreadType &threadType) { + ThreadConfigHelper threadConfigHelper(&builder, region->getLoc(), threadType); auto funcOp = builder.create( region->getLoc(), "kernel_main", @@ -551,17 +549,15 @@ LogicalResult convertTTKernelRegionToEmitC( return success(); } -LogicalResult -emitOpRegionAsCpp(Region *region, std::string ®ionCpp, - const ttkernel::ThreadType &threadType) { +LogicalResult emitOpRegionAsCpp(Region *region, std::string ®ionCpp, + const ttkernel::ThreadType &threadType) { llvm::raw_string_ostream os(regionCpp); return emitOpRegionAsCpp(region, os, threadType); } -LogicalResult -emitOpRegionAsCpp(Region *region, llvm::raw_ostream &os, - const ttkernel::ThreadType &threadType) { +LogicalResult emitOpRegionAsCpp(Region *region, llvm::raw_ostream &os, + const ttkernel::ThreadType &threadType) { // We must load the EmitC dialect before we can emit any EmitC code. This // dialect won't be loaded by MLIR until pass manager starts a pass that @@ -593,13 +589,11 @@ emitDispatchOpRegionsAsCpp(ttmetal::DispatchOp dispatchOp, assert(cppStrings.size() == dispatchOp.getNumRegions() && "cppStrings size must match number of regions"); - - for (auto ® : dispatchOp->getRegions()) { auto kernelConfig = mlir::cast( dispatchOp.getKernelConfigs()[reg.getRegionNumber()]); if (emitOpRegionAsCpp(®, cppStrings[reg.getRegionNumber()], - kernelConfig.getThreadType()) + kernelConfig.getThreadType()) .failed()) { return llvm::failure(); } @@ -608,19 +602,14 @@ emitDispatchOpRegionsAsCpp(ttmetal::DispatchOp dispatchOp, return success(); } -LogicalResult -emitKernelAsCpp(mlir::ModuleOp op, llvm::raw_ostream &os, const ttkernel::ThreadType &threadType ) -{ +LogicalResult emitKernelAsCpp(mlir::ModuleOp op, llvm::raw_ostream &os, + const ttkernel::ThreadType &threadType) { std::vector ops; - op->walk([&](func::FuncOp entry) { - ops.push_back(entry); - }); + op->walk([&](func::FuncOp entry) { ops.push_back(entry); }); - for (const auto &op : ops){ + for (const auto &op : ops) { for (auto ® : op->getRegions()) { - if (emitOpRegionAsCpp(®, os, - threadType) - .failed()) { + if (emitOpRegionAsCpp(®, os, threadType).failed()) { return llvm::failure(); } } diff --git a/lib/Target/TTKernel/TTKernelToCpp.cpp b/lib/Target/TTKernel/TTKernelToCpp.cpp index ce33e790f..264113ed9 100644 --- a/lib/Target/TTKernel/TTKernelToCpp.cpp +++ b/lib/Target/TTKernel/TTKernelToCpp.cpp @@ -6,35 +6,35 @@ #include #include +#include "mlir/Conversion/ArithToEmitC/ArithToEmitCPass.h" +#include "mlir/Conversion/FuncToEmitC/FuncToEmitCPass.h" +#include "mlir/Conversion/SCFToEmitC/SCFToEmitC.h" #include "mlir/Dialect/EmitC/IR/EmitC.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/IR/BuiltinOps.h" +#include "mlir/Pass/PassManager.h" #include "mlir/Support/LLVM.h" #include "mlir/Support/LogicalResult.h" +#include "mlir/Target/Cpp/CppEmitter.h" +#include "ttmlir/Conversion/TTKernelToEmitC/TTKernelToEmitC.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/LogicalResult.h" #include "llvm/Support/raw_ostream.h" -#include "mlir/Target/Cpp/CppEmitter.h" -#include "mlir/Pass/PassManager.h" -#include "ttmlir/Conversion/TTKernelToEmitC/TTKernelToEmitC.h" -#include "mlir/Conversion/ArithToEmitC/ArithToEmitCPass.h" -#include "mlir/Conversion/SCFToEmitC/SCFToEmitC.h" -#include "mlir/Conversion/FuncToEmitC/FuncToEmitCPass.h" #include namespace mlir::tt::ttkernel { -static llvm::LogicalResult translateModuleToCpp( - Operation *op, llvm::raw_ostream &os, const ttkernel::ThreadType &threadType) { - - ModuleOp module = dyn_cast(op); - assert(module && "Expected ModuleOp as top level operation"); - return mlir::tt::emitKernelAsCpp(module, os, threadType); +static llvm::LogicalResult +translateModuleToCpp(Operation *op, llvm::raw_ostream &os, + const ttkernel::ThreadType &threadType) { + ModuleOp module = dyn_cast(op); + assert(module && "Expected ModuleOp as top level operation"); + return mlir::tt::emitKernelAsCpp(module, os, threadType); } -LogicalResult translateTTKernelToCpp( - Operation *op, llvm::raw_ostream &os, const ttkernel::ThreadType &threadType) { +LogicalResult translateTTKernelToCpp(Operation *op, llvm::raw_ostream &os, + const ttkernel::ThreadType &threadType) { return translateModuleToCpp(op, os, threadType); } diff --git a/lib/Target/TTKernel/TTKernelToCppRegistration.cpp b/lib/Target/TTKernel/TTKernelToCppRegistration.cpp index a255d240e..0834e6e81 100644 --- a/lib/Target/TTKernel/TTKernelToCppRegistration.cpp +++ b/lib/Target/TTKernel/TTKernelToCppRegistration.cpp @@ -6,17 +6,17 @@ #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Tools/mlir-translate/Translation.h" +#include "mlir/Dialect/SCF/IR/SCF.h" #include "ttmlir/Dialect/TT/IR/TT.h" #include "ttmlir/Dialect/TTKernel/IR/TTKernel.h" #include "ttmlir/Dialect/TTKernel/IR/TTKernelOpsTypes.h" #include "ttmlir/Target/TTKernel/TTKernelToCpp.h" -#include "mlir/Dialect/SCF/IR/SCF.h" #include using namespace mlir; namespace mlir::tt::ttkernel { -//TODO: Should generalize this to read kernel type from Attribute? +// TODO: Should generalize this to read kernel type from Attribute? void registerTensixKernelToCpp() { TranslateFromMLIRRegistration reg( "tensixkernel-to-cpp", "translate tensix kernel to C++", @@ -24,9 +24,11 @@ void registerTensixKernelToCpp() { return translateTTKernelToCpp(op, os, tt::ttkernel::ThreadType::Tensix); }, [](DialectRegistry ®istry) { - registry.insert(); + registry + .insert(); }); } @@ -37,9 +39,11 @@ void registerNocKernelToCpp() { return translateTTKernelToCpp(op, os, tt::ttkernel::ThreadType::Noc); }, [](DialectRegistry ®istry) { - registry.insert(); + registry + .insert(); }); } From 052025d333e061a211e9f7b7d4db0b72c1f75877 Mon Sep 17 00:00:00 2001 From: Bill Teng Date: Mon, 13 Jan 2025 23:55:18 +0000 Subject: [PATCH 21/28] use smallvector --- lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp b/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp index fff91bc6c..227a7b080 100644 --- a/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp +++ b/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp @@ -604,7 +604,7 @@ emitDispatchOpRegionsAsCpp(ttmetal::DispatchOp dispatchOp, LogicalResult emitKernelAsCpp(mlir::ModuleOp op, llvm::raw_ostream &os, const ttkernel::ThreadType &threadType) { - std::vector ops; + llvm::SmallVector ops; op->walk([&](func::FuncOp entry) { ops.push_back(entry); }); for (const auto &op : ops) { From 970fefb480eaba25885b16c1464683b8935e82f1 Mon Sep 17 00:00:00 2001 From: Bill Teng Date: Mon, 13 Jan 2025 23:57:15 +0000 Subject: [PATCH 22/28] change name of mlir translate options --- lib/Target/TTKernel/TTKernelToCppRegistration.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Target/TTKernel/TTKernelToCppRegistration.cpp b/lib/Target/TTKernel/TTKernelToCppRegistration.cpp index 0834e6e81..8ac56fbff 100644 --- a/lib/Target/TTKernel/TTKernelToCppRegistration.cpp +++ b/lib/Target/TTKernel/TTKernelToCppRegistration.cpp @@ -19,7 +19,7 @@ namespace mlir::tt::ttkernel { // TODO: Should generalize this to read kernel type from Attribute? void registerTensixKernelToCpp() { TranslateFromMLIRRegistration reg( - "tensixkernel-to-cpp", "translate tensix kernel to C++", + "ttkernel-to-cpp-tensix", "translate tensix kernel to C++", [](Operation *op, llvm::raw_ostream &os) -> LogicalResult { return translateTTKernelToCpp(op, os, tt::ttkernel::ThreadType::Tensix); }, @@ -34,7 +34,7 @@ void registerTensixKernelToCpp() { void registerNocKernelToCpp() { TranslateFromMLIRRegistration reg( - "nockernel-to-cpp", "translate noc kernel to C++", + "ttkernel-to-cpp-noc", "translate noc kernel to C++", [](Operation *op, llvm::raw_ostream &os) -> LogicalResult { return translateTTKernelToCpp(op, os, tt::ttkernel::ThreadType::Noc); }, From dcf1a7e12a0b4f51a24886f00e647f6ce96c9bee Mon Sep 17 00:00:00 2001 From: Bill Teng Date: Tue, 14 Jan 2025 03:14:25 +0000 Subject: [PATCH 23/28] add filecheck statements --- .../Translate/TTKernel/ttkernel_noc.mlir | 16 ++++++++++++- .../Translate/TTKernel/ttkernel_tensix.mlir | 24 ++++++++++++------- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/test/ttmlir/Translate/TTKernel/ttkernel_noc.mlir b/test/ttmlir/Translate/TTKernel/ttkernel_noc.mlir index 58a46fdf9..edce3d23b 100644 --- a/test/ttmlir/Translate/TTKernel/ttkernel_noc.mlir +++ b/test/ttmlir/Translate/TTKernel/ttkernel_noc.mlir @@ -1,16 +1,30 @@ -// RUN: ttmlir-translate --nockernel-to-cpp %s +// RUN: ttmlir-translate --ttkernel-to-cpp-noc %s | FileCheck %s +// CHECK: #include "dataflow_api.h" +// CHECK: void kernel_main func.func @ttkernel_noc() -> () { + // CHECK: int32_t [[B0:.*]] = 262432 %c262432_i32 = arith.constant 262432 : i32 + // CHECK: int32_t [[B1:.*]] = 262208 %c262208_i32 = arith.constant 262208 : i32 + // CHECK: int32_t [[C0:.*]] = 32 %c32_i32 = arith.constant 32 : i32 + // CHECK: int32_t [[C1:.*]] = 262400 %c262400_i32 = arith.constant 262400 : i32 + // CHECK: int32_t [[A0:.*]] = 0 %c0_i32 = arith.constant 0 : i32 + // CHECK: int32_t [[A1:.*]] = 262144; %c262144_i32 = arith.constant 262144 : i32 + // CHECK: int64_t [[NOCADDR0:.*]] = get_noc_addr([[A0]], [[A0]], [[A1]]) %3 = "ttkernel.get_noc_addr_xy"(%c0_i32, %c0_i32, %c262144_i32) : (i32, i32, i32) -> !ttkernel.noc_addr + // CHECK: noc_async_read([[NOCADDR0]], [[C1]], [[C0]]) "ttkernel.noc_async_read"(%3, %c262400_i32, %c32_i32) : (!ttkernel.noc_addr, i32, i32) -> () + // CHECK: int64_t [[NOCADDR1:.*]] = get_noc_addr([[A0]], [[A0]], [[B1]]) %4 = "ttkernel.get_noc_addr_xy"(%c0_i32, %c0_i32, %c262208_i32) : (i32, i32, i32) -> !ttkernel.noc_addr + // CHECK: noc_async_read([[NOCADDR1]], [[B0]], [[C0]]) "ttkernel.noc_async_read"(%4, %c262432_i32, %c32_i32) : (!ttkernel.noc_addr, i32, i32) -> () + // CHECK: noc_async_read_barrier "ttkernel.noc_async_read_barrier"() : () -> () + // CHECK: return "ttkernel.return"() : () -> () } diff --git a/test/ttmlir/Translate/TTKernel/ttkernel_tensix.mlir b/test/ttmlir/Translate/TTKernel/ttkernel_tensix.mlir index 365ca7282..9910a3003 100644 --- a/test/ttmlir/Translate/TTKernel/ttkernel_tensix.mlir +++ b/test/ttmlir/Translate/TTKernel/ttkernel_tensix.mlir @@ -1,22 +1,30 @@ -// RUN: ttmlir-translate --tensixkernel-to-cpp %s +// RUN: ttmlir-translate --ttkernel-to-cpp-tensix %s | FileCheck %s + #l1_ = #tt.memory_space +// CHECK: void kernel_main func.func @ttkernel_tensix(%arg1: !ttkernel.cb, #l1_>, 4096, 1>, %arg2: !ttkernel.cb, 4096, 1>) -> () { + + // CHECK: ::tt::CB [[CBIN0:.*]] = ::tt::CB::c_in0 + // CHECK: ::tt::CB [[CBIN0ARG:.*]] = [[CBIN0]] + // CHECK: ::tt::CB [[CBOUT0:.*]] = ::tt::CB::c_out0 + // CHECK: ::tt::CB [[CBOUT0ARG:.*]] = [[CBOUT0]] + // CHECK: int32_t [[C:.*]] = 4 %c4_i32 = arith.constant 4 : i32 - // CHECK: emitc.call_opaque "untilize_init"[[C:.*]] + // CHECK: untilize_init([[CBIN0ARG]], [[CBOUT0ARG]]) "ttkernel.untilize_init"(%arg1, %arg2) : (!ttkernel.cb, #l1_>, 4096, 1>, !ttkernel.cb, 4096, 1>) -> () - // CHECK: emitc.call_opaque "untilize_block"[[C:.*]] + // CHECK: untilize_block([[CBIN0ARG]], [[C]], [[CBOUT0ARG]]) "ttkernel.untilize_block"(%arg1, %c4_i32, %arg2) : (!ttkernel.cb, #l1_>, 4096, 1>, i32, !ttkernel.cb, 4096, 1>) -> () - // CHECK: emitc.call_opaque "cb_pop_front"[[C:.*]] + // CHECK: cb_pop_front([[CBIN0ARG]], [[C]]) "ttkernel.cb_pop_front"(%arg1, %c4_i32) : (!ttkernel.cb, #l1_>, 4096, 1>, i32) -> () - // CHECK: emitc.call_opaque "cb_push_back"[[C:.*]] + // CHECK: cb_push_back([[CBOUT0ARG]], [[C]]) "ttkernel.cb_push_back"(%arg2, %c4_i32) : (!ttkernel.cb, 4096, 1>, i32) -> () - // CHECK: emitc.call_opaque "untilize_block"[[C:.*]] + // CHECK: untilize_block([[CBIN0ARG]], [[C]], [[CBOUT0ARG]]) "ttkernel.untilize_block"(%arg1, %c4_i32, %arg2) : (!ttkernel.cb, #l1_>, 4096, 1>, i32, !ttkernel.cb, 4096, 1>) -> () - // CHECK: emitc.call_opaque "cb_pop_front"[[C:.*]] + // CHECK: cb_pop_front([[CBIN0ARG]], [[C]]) "ttkernel.cb_pop_front"(%arg1, %c4_i32) : (!ttkernel.cb, #l1_>, 4096, 1>, i32) -> () - // CHECK: emitc.call_opaque "cb_push_back"[[C:.*]] + // CHECK: cb_push_back([[CBOUT0ARG]], [[C]]) "ttkernel.cb_push_back"(%arg2, %c4_i32) : (!ttkernel.cb, 4096, 1>, i32) -> () // CHECK: return "ttkernel.return"() : () -> () From 1e0a8c690ef016d48ab0d7ce1510ae79a96692d0 Mon Sep 17 00:00:00 2001 From: Bill Teng Date: Tue, 14 Jan 2025 03:19:32 +0000 Subject: [PATCH 24/28] include header rather than fwd declare --- include/ttmlir/Target/TTKernel/TTKernelToCpp.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/ttmlir/Target/TTKernel/TTKernelToCpp.h b/include/ttmlir/Target/TTKernel/TTKernelToCpp.h index d6b8aa3ec..6e96ba68b 100644 --- a/include/ttmlir/Target/TTKernel/TTKernelToCpp.h +++ b/include/ttmlir/Target/TTKernel/TTKernelToCpp.h @@ -7,11 +7,10 @@ #include "mlir/IR/Operation.h" #include "mlir/Support/LogicalResult.h" +#include "ttmlir/Dialect/TTKernel/IR/TTKernelOpsTypes.h" namespace mlir::tt::ttkernel { -enum class ThreadType : uint32_t; - // Translates a TTKernel operation to C++ and writes it to the given // stream. From 63d5cbb365a4acaa3591906c8a0415d960c70c48 Mon Sep 17 00:00:00 2001 From: Bill Teng Date: Thu, 16 Jan 2025 22:37:08 +0000 Subject: [PATCH 25/28] fix header order and add bug # to TODO --- lib/Target/TTKernel/TTKernelToCppRegistration.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/lib/Target/TTKernel/TTKernelToCppRegistration.cpp b/lib/Target/TTKernel/TTKernelToCppRegistration.cpp index 8ac56fbff..f40a50d6e 100644 --- a/lib/Target/TTKernel/TTKernelToCppRegistration.cpp +++ b/lib/Target/TTKernel/TTKernelToCppRegistration.cpp @@ -2,21 +2,20 @@ // // SPDX-License-Identifier: Apache-2.0 -#include "mlir/Dialect/EmitC/IR/EmitC.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/Tools/mlir-translate/Translation.h" - -#include "mlir/Dialect/SCF/IR/SCF.h" #include "ttmlir/Dialect/TT/IR/TT.h" #include "ttmlir/Dialect/TTKernel/IR/TTKernel.h" #include "ttmlir/Dialect/TTKernel/IR/TTKernelOpsTypes.h" #include "ttmlir/Target/TTKernel/TTKernelToCpp.h" +#include +#include #include +#include +#include using namespace mlir; namespace mlir::tt::ttkernel { -// TODO: Should generalize this to read kernel type from Attribute? +// TODO(bug #1874): Should generalize this to read kernel type from Attribute? void registerTensixKernelToCpp() { TranslateFromMLIRRegistration reg( "ttkernel-to-cpp-tensix", "translate tensix kernel to C++", From d6cba0b05a3569b2890f798ff0273d3e6579cc4e Mon Sep 17 00:00:00 2001 From: Bill Teng Date: Thu, 16 Jan 2025 22:38:51 +0000 Subject: [PATCH 26/28] reorder headers --- include/ttmlir/Target/TTKernel/TTKernelToCpp.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/ttmlir/Target/TTKernel/TTKernelToCpp.h b/include/ttmlir/Target/TTKernel/TTKernelToCpp.h index 6e96ba68b..23cf8250f 100644 --- a/include/ttmlir/Target/TTKernel/TTKernelToCpp.h +++ b/include/ttmlir/Target/TTKernel/TTKernelToCpp.h @@ -5,15 +5,14 @@ #ifndef TTMLIR_TARGET_TTKERNEL_TTKERNELTOCPP_H #define TTMLIR_TARGET_TTKERNEL_TTKERNELTOCPP_H -#include "mlir/IR/Operation.h" -#include "mlir/Support/LogicalResult.h" #include "ttmlir/Dialect/TTKernel/IR/TTKernelOpsTypes.h" +#include +#include namespace mlir::tt::ttkernel { // Translates a TTKernel operation to C++ and writes it to the given // stream. - LogicalResult translateTTKernelToCpp(Operation *op, llvm::raw_ostream &os, const ThreadType &threadType); From cf69651ccfc4c94b2efe7334f0bbd1b07e01abba Mon Sep 17 00:00:00 2001 From: Bill Teng Date: Thu, 16 Jan 2025 22:41:22 +0000 Subject: [PATCH 27/28] reorder headers --- lib/Target/TTKernel/TTKernelToCpp.cpp | 30 +++++++++++++-------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/lib/Target/TTKernel/TTKernelToCpp.cpp b/lib/Target/TTKernel/TTKernelToCpp.cpp index 264113ed9..14fa202c9 100644 --- a/lib/Target/TTKernel/TTKernelToCpp.cpp +++ b/lib/Target/TTKernel/TTKernelToCpp.cpp @@ -2,25 +2,23 @@ // // SPDX-License-Identifier: Apache-2.0 +#include "ttmlir/Conversion/TTKernelToEmitC/TTKernelToEmitC.h" #include #include -#include - -#include "mlir/Conversion/ArithToEmitC/ArithToEmitCPass.h" -#include "mlir/Conversion/FuncToEmitC/FuncToEmitCPass.h" -#include "mlir/Conversion/SCFToEmitC/SCFToEmitC.h" -#include "mlir/Dialect/EmitC/IR/EmitC.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/IR/BuiltinOps.h" -#include "mlir/Pass/PassManager.h" -#include "mlir/Support/LLVM.h" -#include "mlir/Support/LogicalResult.h" -#include "mlir/Target/Cpp/CppEmitter.h" -#include "ttmlir/Conversion/TTKernelToEmitC/TTKernelToEmitC.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/LogicalResult.h" -#include "llvm/Support/raw_ostream.h" +#include +#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace mlir::tt::ttkernel { From d81f756b0def4429c11bed4a27587df12fa633ca Mon Sep 17 00:00:00 2001 From: Bill Teng Date: Thu, 16 Jan 2025 22:47:41 +0000 Subject: [PATCH 28/28] fix headers --- lib/Target/TTKernel/TTKernelToCpp.cpp | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/lib/Target/TTKernel/TTKernelToCpp.cpp b/lib/Target/TTKernel/TTKernelToCpp.cpp index 14fa202c9..87e6010ae 100644 --- a/lib/Target/TTKernel/TTKernelToCpp.cpp +++ b/lib/Target/TTKernel/TTKernelToCpp.cpp @@ -2,23 +2,15 @@ // // SPDX-License-Identifier: Apache-2.0 +#include "mlir/IR/BuiltinOps.h" +#include "mlir/Pass/PassManager.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Support/LogicalResult.h" +#include "mlir/Target/Cpp/CppEmitter.h" #include "ttmlir/Conversion/TTKernelToEmitC/TTKernelToEmitC.h" +#include "llvm/Support/LogicalResult.h" +#include "llvm/Support/raw_ostream.h" #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include namespace mlir::tt::ttkernel {