From 11007aa6736e8ba3acacb0498388f68b4ad5ae18 Mon Sep 17 00:00:00 2001 From: Sasa Vuckovic <134393361+svuckovicTT@users.noreply.github.com> Date: Sat, 14 Dec 2024 15:44:57 +0100 Subject: [PATCH 01/10] Fix ttnn-standalone build (#1596) --- tools/ttnn-standalone/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ttnn-standalone/CMakeLists.txt b/tools/ttnn-standalone/CMakeLists.txt index 23c78c7ca9..0be29d763d 100644 --- a/tools/ttnn-standalone/CMakeLists.txt +++ b/tools/ttnn-standalone/CMakeLists.txt @@ -63,7 +63,7 @@ set(INCLUDE_DIRS $ENV{TT_METAL_HOME} $ENV{TT_METAL_HOME}/tt_metal $ENV{TT_METAL_HOME}/tt_metal/third_party/umd - $ENV{TT_METAL_HOME}/tt_metal/third_party/umd/device + $ENV{TT_METAL_HOME}/tt_metal/third_party/umd/device/api $ENV{TT_METAL_HOME}/tt_metal/third_party/fmt $ENV{TT_METAL_HOME}/tt_metal/hw/inc $ENV{TT_METAL_HOME}/tt_metal/hw/inc/${ARCH_NAME} From 1c1cb182a6306479fc6d59c2e947d635da29a873 Mon Sep 17 00:00:00 2001 From: Vladimir Milosevic <157983820+vmilosevic@users.noreply.github.com> Date: Sat, 14 Dec 2024 23:47:06 +0100 Subject: [PATCH 02/10] Uplift third_party/tt-metal to ed413ee399ad289740105b709c20e9144ec4f035 2024-12-14 (#1572) This PR uplifts the third_party/tt-metal to the ed413ee399ad289740105b709c20e9144ec4f035 - Switch to tt-metal branch that reverts a pair of commits. remainder op w/ float32 broken. - Add cpmcache json to tt-metal include dirs so nlohmann/json.hpp can be found (CPM package now) --------- Co-authored-by: kmitrovicTT <169657397+kmitrovicTT@users.noreply.github.com> Co-authored-by: Kyle Mabee --- third_party/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/third_party/CMakeLists.txt b/third_party/CMakeLists.txt index 54a1563c4f..0eaec5efc0 100644 --- a/third_party/CMakeLists.txt +++ b/third_party/CMakeLists.txt @@ -1,6 +1,6 @@ include(ExternalProject) -set(TT_METAL_VERSION "2f59d5e02ef0da955b99cf0f49da1759c772a67a") +set(TT_METAL_VERSION "5f821d46d6726e1fcf06b0802e2b40aae0af9e38") if ("$ENV{ARCH_NAME}" STREQUAL "grayskull") set(ARCH_NAME "grayskull") @@ -34,6 +34,7 @@ set(TTMETAL_INCLUDE_DIRS ${PROJECT_SOURCE_DIR}/third_party/tt-metal/src/tt-metal/.cpmcache/fmt/73b5ec45edbd92babfd91c3777a9e1ab9cac8238/include ${PROJECT_SOURCE_DIR}/third_party/tt-metal/src/tt-metal/.cpmcache/magic_enum/1e1af177d4ab0ef660f105434fd1017c4d1f8c17/include/magic_enum ${PROJECT_SOURCE_DIR}/third_party/tt-metal/src/tt-metal/.cpmcache/boost_core/e679bef5c160cf29d0f37d549881dc5f5a58c332/include + ${PROJECT_SOURCE_DIR}/third_party/tt-metal/src/tt-metal/.cpmcache/json/230202b6f5267cbf0c8e5a2f17301964d95f83ff/include PARENT_SCOPE ) From 809b126291285915308dbf66f55bc50a4c930df0 Mon Sep 17 00:00:00 2001 From: Jovan Serbedzija Date: Mon, 16 Dec 2024 14:52:03 +0100 Subject: [PATCH 03/10] Remove operand_constraints from generic op (#1587) As described in [Remove TTIR operand constraints](https://github.com/tenstorrent/tt-mlir/pulls?q=is%3Apr+is%3Aclosed+author%3AjserbedzijaTT), the removal of operand_constraints from the generic op was left incomplete. This PR finalizes that task. Closes https://github.com/tenstorrent/tt-mlir/issues/1139 --- docs/src/adding-an-op.md | 7 +- docs/src/overview.md | 3 +- include/ttmlir-c/TTAttrs.h | 7 -- include/ttmlir/Dialect/TT/IR/TTOpsEnums.td | 41 -------- include/ttmlir/Dialect/TT/IR/TTOpsTypes.td | 6 -- .../Dialect/TT/Utils/OperandConstraints.h | 95 ------------------- include/ttmlir/Dialect/TTIR/IR/TTIROps.td | 6 -- include/ttmlir/Utils.h | 3 +- lib/CAPI/TTAttrs.cpp | 22 ----- lib/Dialect/TT/IR/TTDialect.cpp | 15 --- lib/Dialect/TTIR/Transforms/Generic.cpp | 62 +----------- lib/Dialect/TTIR/Transforms/Layout.cpp | 2 +- lib/Dialect/TTNN/Transforms/TTNNLayout.cpp | 3 +- python/TTModule.cpp | 18 ---- test/python/smoketest.py | 1 - .../StableHLOToTTIR/binary/divide_op.mlir | 1 - .../StableHLOToTTIR/binary/multiply_op.mlir | 1 - .../StableHLOToTTIR/binary/remainder_op.mlir | 1 - .../StableHLOToTTIR/binary/subtract_op.mlir | 1 - .../StableHLOToTTIR/convert_op.mlir | 1 - .../StableHLOToTTIR/dynamic_iota_op.mlir | 1 - .../Conversion/StableHLOToTTIR/floor_op.mlir | 1 - .../Conversion/StableHLOToTTIR/gather_op.mlir | 1 - .../get_dimension_size_op.mlir | 1 - .../Conversion/StableHLOToTTIR/iota_op.mlir | 1 - .../StableHLOToTTIR/isfinite_op.mlir | 1 - .../Conversion/StableHLOToTTIR/rsqrt_op.mlir | 1 - .../Conversion/StableHLOToTTIR/slice_op.mlir | 1 - .../StableHLOToTTIR/unary/absolute_op.mlir | 1 - .../StableHLOToTTIR/unary/cbrt_op.mlir | 1 - .../StableHLOToTTIR/unary/exponential_op.mlir | 1 - .../StableHLOToTTIR/unary/negate_op.mlir | 1 - .../StableHLOToTTIR/unary/sqrt_op.mlir | 1 - .../Decomposition/arange_decomposition.mlir | 1 - .../convolution_tests_negative.mlir | 4 - .../TTIR/index/index_tests_negative.mlir | 11 --- .../TTIR/matmul/matmul_tests_negative.mlir | 13 --- .../TTIR/slice/slice_tests_negative.mlir | 15 --- test/ttmlir/Dialect/TTNN/ccl/all_reduce.mlir | 1 - .../TTNN/clamp/clamp_tests_negative.mlir | 2 - test/ttmlir/Silicon/TTNN/ones.mlir | 1 - 41 files changed, 9 insertions(+), 348 deletions(-) delete mode 100644 include/ttmlir/Dialect/TT/Utils/OperandConstraints.h diff --git a/docs/src/adding-an-op.md b/docs/src/adding-an-op.md index 74c713f0c7..cbe4a44c10 100644 --- a/docs/src/adding-an-op.md +++ b/docs/src/adding-an-op.md @@ -53,18 +53,13 @@ There are many things to break down here, starting from the top: be critical for modeling buffer allocation / lifetimes. Note the 3rd argument `AnyRankedTensor:$output`. - Next we have a list of `arguments`. These arguments consist of a mixture of - `Type`s (i.e. `AnyRankedTensor`) and `Attribute`s (i.e. `TT_OperandConstraintArrayAttr`). + `Type`s (i.e. `AnyRankedTensor`) and `Attribute`s. [Read more about Types & Attributes here](https://mlir.llvm.org/docs/DefiningDialects/AttributesAndTypes/#attributes). - `AnyRankedTensor` is part of a tablegen standard library which type aliases to MLIR's builtin Tensor type, with the added constraint that the tensor has a static rank. As much as possible we want to use the builtin types and infrastructure provided by MLIR. - - `TT_OperandConstraintArrayAttr` is a custom attribute that we have defined - in the [`TT`](./autogen/md/Dialect/TTDialect.md) dialect. This attribute is - used to specify constraints on the - operands of the operation. For example, the `TTIR_MatmulOp` requires that - the input tensors be in tile layout, this attribute captures this constraint. - Next we have a list of `results` in this case just 1, which aliases the `output` tensor. One drawback of DPS is that the result tensor and the output tensor will appear to have different SSA names in the IR, but they diff --git a/docs/src/overview.md b/docs/src/overview.md index f2e87fa032..b181b822a2 100644 --- a/docs/src/overview.md +++ b/docs/src/overview.md @@ -104,8 +104,7 @@ module attributes {tt.system_desc = #tt.system_desc<[<#tt.arch, #tt defines the type of result - Quotes are added around ttir.multiply since it's part of a - custom dialect, and more custom assembly instructions are - applied to specify operand_constraints. + custom dialect. - Operations typically have operands (arguments) and results which are highlighted with %, these results and operands help to show diff --git a/include/ttmlir-c/TTAttrs.h b/include/ttmlir-c/TTAttrs.h index 263cd1d8e4..6884eb3869 100644 --- a/include/ttmlir-c/TTAttrs.h +++ b/include/ttmlir-c/TTAttrs.h @@ -69,13 +69,6 @@ ttmlirTTIteratorTypeAttrGet(MlirContext ctx, uint32_t iteratorType); MLIR_CAPI_EXPORTED MlirAttribute ttmlirTTIteratorTypeArrayAttrGet( MlirContext ctx, uint32_t *iteratorTypes, size_t iteratorTypesSize); -MLIR_CAPI_EXPORTED MlirAttribute -ttmlirTTOperandConstraintAttrGet(MlirContext ctx, uint32_t OperandConstraint); - -MLIR_CAPI_EXPORTED MlirAttribute ttmlirTTOperandConstraintArrayAttrGet( - MlirContext ctx, uint32_t *OperandConstraints, - size_t OperandConstraintsSize); - MLIR_CAPI_EXPORTED MlirAttribute ttmlirTTTileSizeAttrGet(MlirContext ctx, int64_t y, int64_t x); diff --git a/include/ttmlir/Dialect/TT/IR/TTOpsEnums.td b/include/ttmlir/Dialect/TT/IR/TTOpsEnums.td index aee19f63c6..9ffa63dade 100644 --- a/include/ttmlir/Dialect/TT/IR/TTOpsEnums.td +++ b/include/ttmlir/Dialect/TT/IR/TTOpsEnums.td @@ -126,47 +126,6 @@ def TT_OOBVal : I32EnumAttr<"OOBVal", "TT OOBVal", let cppNamespace = "::mlir::tt"; } -def TT_OperandConstraintSystem : I32BitEnumAttrCaseBit<"System", 0, "system">; -def TT_OperandConstraintDRAM : I32BitEnumAttrCaseBit<"DRAM", 1, "dram">; -def TT_OperandConstraintL1 : I32BitEnumAttrCaseBit<"L1", 2, "l1">; -def TT_OperandConstraintScalar : I32BitEnumAttrCaseBit<"Scalar", 3, "scalar">; -def TT_OperandConstraintTile : I32BitEnumAttrCaseBit<"Tile", 4, "tile">; -def TT_OperandConstraintNone : I32BitEnumAttrCaseBit<"None", 5, "none">; -def TT_OperandConstraintInterleaved : I32BitEnumAttrCaseBit<"Interleaved", 6, "interleaved">; -def TT_OperandConstraintSingleBank : I32BitEnumAttrCaseBit<"SingleBank", 7, "single_bank">; -def TT_OperandConstraintHeightSharded : I32BitEnumAttrCaseBit<"HeightSharded", 8, "height_sharded">; -def TT_OperandConstraintWidthSharded : I32BitEnumAttrCaseBit<"WidthSharded", 9, "width_sharded">; -def TT_OperandConstraintBlockSharded : I32BitEnumAttrCaseBit<"BlockSharded", 10, "block_sharded">; -def TT_OperandConstraintSystemScalar : I32BitEnumAttrCaseGroup<"SystemScalar", [TT_OperandConstraintSystem, TT_OperandConstraintScalar], "system_scalar">; -def TT_OperandConstraintAnyLayout : I32BitEnumAttrCaseGroup<"AnyLayout", [TT_OperandConstraintNone, TT_OperandConstraintInterleaved, TT_OperandConstraintSingleBank, TT_OperandConstraintHeightSharded, TT_OperandConstraintWidthSharded, TT_OperandConstraintBlockSharded], "any_layout">; -def TT_OperandConstraintAny : I32BitEnumAttrCaseGroup<"Any", [TT_OperandConstraintSystem, TT_OperandConstraintDRAM, TT_OperandConstraintL1, TT_OperandConstraintScalar, TT_OperandConstraintTile, TT_OperandConstraintAnyLayout], "any">; -def TT_OperandConstraintAnyDevice : I32BitEnumAttrCaseGroup<"AnyDevice", [TT_OperandConstraintDRAM, TT_OperandConstraintL1, TT_OperandConstraintScalar, TT_OperandConstraintTile, TT_OperandConstraintAnyLayout], "any_device">; -def TT_OperandConstraintAnyDeviceTile : I32BitEnumAttrCaseGroup<"AnyDeviceTile", [TT_OperandConstraintDRAM, TT_OperandConstraintL1, TT_OperandConstraintTile, TT_OperandConstraintAnyLayout], "any_device_tile">; -def TT_OperandConstraintL1BlockSharded : I32BitEnumAttrCaseGroup<"L1BlockSharded", [TT_OperandConstraintL1, TT_OperandConstraintScalar, TT_OperandConstraintTile, TT_OperandConstraintBlockSharded], "l1_block_sharded">; -def TT_OperandConstraint : I32BitEnumAttr<"OperandConstraint", "TT Operand Constraints", - [ - TT_OperandConstraintSystem, - TT_OperandConstraintDRAM, - TT_OperandConstraintL1, - TT_OperandConstraintScalar, - TT_OperandConstraintTile, - TT_OperandConstraintNone, - TT_OperandConstraintInterleaved, - TT_OperandConstraintSingleBank, - TT_OperandConstraintHeightSharded, - TT_OperandConstraintWidthSharded, - TT_OperandConstraintBlockSharded, - TT_OperandConstraintSystemScalar, - TT_OperandConstraintAnyLayout, - TT_OperandConstraintAny, - TT_OperandConstraintAnyDevice, - TT_OperandConstraintAnyDeviceTile, - TT_OperandConstraintL1BlockSharded, - ]> { - let genSpecializedAttr = 0; - let cppNamespace = "::mlir::tt"; -} - def TT_ChipCapabilityPCIE : I32BitEnumAttrCaseBit<"PCIE", 0, "pcie">; def TT_ChipCapabilityHostMMIO : I32BitEnumAttrCaseBit<"HostMMIO", 1, "host_mmio">; diff --git a/include/ttmlir/Dialect/TT/IR/TTOpsTypes.td b/include/ttmlir/Dialect/TT/IR/TTOpsTypes.td index d5dc22e28d..c690b8bca8 100644 --- a/include/ttmlir/Dialect/TT/IR/TTOpsTypes.td +++ b/include/ttmlir/Dialect/TT/IR/TTOpsTypes.td @@ -428,12 +428,6 @@ def TT_IteratorTypeAttr : EnumAttr def TT_IteratorTypeArrayAttr : TypedArrayAttrBase; -def TT_OperandConstraintAttr : EnumAttr { - let assemblyFormat = "`<` $value `>`"; -} - -def TT_OperandConstraintArrayAttr : TypedArrayAttrBase; - def TT_ArgumentAllocationAttr : TT_Attr<"ArgumentAllocation", "arg_alloc", []> { let summary = "Argument allocation attribute in TT dialect"; let description = [{ diff --git a/include/ttmlir/Dialect/TT/Utils/OperandConstraints.h b/include/ttmlir/Dialect/TT/Utils/OperandConstraints.h deleted file mode 100644 index b6147102d9..0000000000 --- a/include/ttmlir/Dialect/TT/Utils/OperandConstraints.h +++ /dev/null @@ -1,95 +0,0 @@ -// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC -// -// SPDX-License-Identifier: Apache-2.0 - -#ifndef TTMLIR_DIALECT_TT_UTILS_OPERANDCONSTRAINTS_H -#define TTMLIR_DIALECT_TT_UTILS_OPERANDCONSTRAINTS_H - -#include "ttmlir/Dialect/TT/IR/TT.h" -#include "ttmlir/Dialect/TTIR/IR/TTIROps.h" - -namespace mlir::tt { - -inline OperandConstraint -memorySpaceAsOperandConstraint(MemorySpace memorySpace) { - switch (memorySpace) { - case MemorySpace::System: - case MemorySpace::SystemMMIO: - return OperandConstraint::System; - case MemorySpace::DeviceDRAM: - return OperandConstraint::DRAM; - case MemorySpace::DeviceL1: - return OperandConstraint::L1; - } -} - -inline OperandConstraint -memoryLayoutAsOperandConstraint(TensorMemoryLayout memoryLayout) { - switch (memoryLayout) { - case TensorMemoryLayout::None: - return OperandConstraint::None; - case TensorMemoryLayout::Interleaved: - return OperandConstraint::Interleaved; - case TensorMemoryLayout::SingleBank: - return OperandConstraint::SingleBank; - case TensorMemoryLayout::HeightSharded: - return OperandConstraint::HeightSharded; - case TensorMemoryLayout::WidthSharded: - return OperandConstraint::WidthSharded; - case TensorMemoryLayout::BlockSharded: - return OperandConstraint::BlockSharded; - } -} - -inline MemorySpace getLegalMemorySpace(OperandConstraint operandConstraint, - MemorySpace defaultMemorySpace) { - if (bitEnumContainsAny(operandConstraint, - memorySpaceAsOperandConstraint(defaultMemorySpace))) { - return defaultMemorySpace; - } - if (bitEnumContainsAny(operandConstraint, OperandConstraint::DRAM)) { - return MemorySpace::DeviceDRAM; - } - if (bitEnumContainsAny(operandConstraint, OperandConstraint::L1)) { - return MemorySpace::DeviceL1; - } - return MemorySpace::System; -} - -inline TensorMemoryLayout -getLegalTensorMemoryLayout(OperandConstraint operandConstraint, - MemorySpace targetMemorySpace, - TensorMemoryLayout defaultDeviceMemLayout) { - if (defaultDeviceMemLayout == TensorMemoryLayout::None) { - return TensorMemoryLayout::None; - } - - if (isSystemMemorySpace(targetMemorySpace)) { - return TensorMemoryLayout::None; - } - - assert(isDeviceMemorySpace(targetMemorySpace)); - if (bitEnumContainsAny(operandConstraint, memoryLayoutAsOperandConstraint( - defaultDeviceMemLayout))) { - return defaultDeviceMemLayout; - } - - std::map validLayoutsMap = { - {OperandConstraint::Interleaved, TensorMemoryLayout::Interleaved}, - {OperandConstraint::SingleBank, TensorMemoryLayout::SingleBank}, - {OperandConstraint::HeightSharded, TensorMemoryLayout::HeightSharded}, - {OperandConstraint::WidthSharded, TensorMemoryLayout::WidthSharded}, - {OperandConstraint::BlockSharded, TensorMemoryLayout::BlockSharded}}; - - for (const auto &[constraintLayout, memLayout] : validLayoutsMap) { - if (bitEnumContainsAny(operandConstraint, constraintLayout)) { - return memLayout; - } - } - - return TensorMemoryLayout::None; -} - -} // namespace mlir::tt - -#endif // TTMLIR_DIALECT_TT_UTILS_OPERANDCONSTRAINTS_H diff --git a/include/ttmlir/Dialect/TTIR/IR/TTIROps.td b/include/ttmlir/Dialect/TTIR/IR/TTIROps.td index ff1cc61bed..e685172c24 100644 --- a/include/ttmlir/Dialect/TTIR/IR/TTIROps.td +++ b/include/ttmlir/Dialect/TTIR/IR/TTIROps.td @@ -62,7 +62,6 @@ def TTIR_GenericOp : TTIR_DPSOp<"generic", [AttrSizedOperandSegments]> { TT_GridAttr:$grid, AffineMapArrayAttr:$indexing_maps, TT_IteratorTypeArrayAttr:$iterator_types, - TT_OperandConstraintArrayAttr:$operand_constraints, DefaultValuedOptionalAttr:$operand_cb_mapping); // index of input operand and index of cb go together let results = (outs Variadic:$results); let regions = (region AnyRegion:$region); @@ -126,11 +125,6 @@ def TTIR_ToLayoutOp : TTIR_Op<"to_layout", [DestinationStyleOpInterface, TTIROpI let extraClassDeclaration = [{ MutableOperandRange getDpsInitsMutable() { return getOutputMutable(); } - ArrayAttr getOperandConstraints() { - return nullptr; - // TODO return below, but we need a way to properly create an ArrayAttr: - // return {OperandConstraint::Any, OperandConstraint::Any}; - } struct CompoundComponents { bool isLayoutChange; diff --git a/include/ttmlir/Utils.h b/include/ttmlir/Utils.h index 49dad79e5e..ec7838b2f3 100644 --- a/include/ttmlir/Utils.h +++ b/include/ttmlir/Utils.h @@ -92,8 +92,7 @@ std::string join(const llvm::SmallVector &vec, // This util function can be used as a helper to create an attribute from an // array of attributes for any type defined like for example: // -// `def TT_OperandConstraintArrayAttr -// : TypedArrayAttrBase;` +// def TT_IteratorTypeArrayAttr : TypedArrayAttrBase; // // since these don't get any special Cpp class generated for them from // tablegen. diff --git a/lib/CAPI/TTAttrs.cpp b/lib/CAPI/TTAttrs.cpp index c329f41d56..ae2bb49d55 100644 --- a/lib/CAPI/TTAttrs.cpp +++ b/lib/CAPI/TTAttrs.cpp @@ -167,28 +167,6 @@ MlirAttribute ttmlirTTIteratorTypeArrayAttrGet(MlirContext ctx, return wrap(ArrayAttr::get(unwrap(ctx), iteratorTypesArray)); } -MlirAttribute ttmlirTTOperandConstraintAttrGet(MlirContext ctx, - uint32_t operandConstraint) { - return wrap(OperandConstraintAttr::get( - unwrap(ctx), static_cast(operandConstraint))); -} - -MlirAttribute -ttmlirTTOperandConstraintArrayAttrGet(MlirContext ctx, - uint32_t *operandConstraints, - size_t operandConstraintsSize) { - std::vector operandConstraintsEnumArray( - operandConstraints, operandConstraints + operandConstraintsSize); - std::vector operandConstraintsArray; - - for (auto operandEnum : operandConstraintsEnumArray) { - operandConstraintsArray.push_back(OperandConstraintAttr::get( - unwrap(ctx), static_cast(operandEnum))); - } - - return wrap(ArrayAttr::get(unwrap(ctx), operandConstraintsArray)); -} - MlirAttribute ttmlirTTTileSizeAttrGet(MlirContext ctx, int64_t y, int64_t x) { return wrap(TileSizeAttr::get(unwrap(ctx), y, x)); } diff --git a/lib/Dialect/TT/IR/TTDialect.cpp b/lib/Dialect/TT/IR/TTDialect.cpp index 1ac8a22239..764e951432 100644 --- a/lib/Dialect/TT/IR/TTDialect.cpp +++ b/lib/Dialect/TT/IR/TTDialect.cpp @@ -31,21 +31,6 @@ struct TTOpAsmDialectInterface : public OpAsmDialectInterface { os << mlir::cast(attr).getValue(); return AliasResult::OverridableAlias; } - if (llvm::isa(attr)) { - auto value = mlir::cast(attr).getValue(); - if (value == OperandConstraint::Any) { - os << "any"; - } else if (value == OperandConstraint::AnyDevice) { - os << "any_device"; - } else if (value == OperandConstraint::AnyDeviceTile) { - os << "any_device_tile"; - } else if (value == OperandConstraint::L1BlockSharded) { - os << "l1_block_sharded"; - } else { - os << "operand_constraint"; - } - return AliasResult::OverridableAlias; - } if (llvm::isa(attr)) { os << "device"; return AliasResult::OverridableAlias; diff --git a/lib/Dialect/TTIR/Transforms/Generic.cpp b/lib/Dialect/TTIR/Transforms/Generic.cpp index 15064ed346..6fc3eb83b3 100644 --- a/lib/Dialect/TTIR/Transforms/Generic.cpp +++ b/lib/Dialect/TTIR/Transforms/Generic.cpp @@ -149,23 +149,6 @@ class TTIRKernelGenericRewriter : public OpRewritePattern { llvm_unreachable("Unsupported kernel kind"); } - static ArrayAttr createOperandConstraints(PatternRewriter &rewriter, - StringRef kind, - mlir::OperandRange operands) { - auto numOperands = operands.size(); - if (kind == "eltwise") { - return rewriter.getArrayAttr(SmallVector( - numOperands, rewriter.getAttr( - OperandConstraint::AnyDevice))); - } - if (kind == "matmul") { - return rewriter.getArrayAttr(SmallVector( - numOperands, rewriter.getAttr( - OperandConstraint::AnyDeviceTile))); - } - llvm_unreachable("Unsupported kernel kind"); - } - LogicalResult matchAndRewrite(KernelOp op, PatternRewriter &rewriter) const final { if (mlir::isa(op.getOperation()->getParentOp())) { @@ -175,12 +158,10 @@ class TTIRKernelGenericRewriter : public OpRewritePattern { // Create a dispatch op auto [indexingMaps, iteratorTypes] = createIndexingMaps(rewriter, op.getKind(), op.getOperands()); - auto constraints = - createOperandConstraints(rewriter, op.getKind(), op.getOperands()); auto dispatch = rewriter.create( op.getLoc(), op.getResults().getTypes(), op.getInputs(), ValueRange() /* cbs */, op.getOutputs(), rewriter.getAttr(), - indexingMaps, iteratorTypes, constraints); + indexingMaps, iteratorTypes); // Create a new basic block for the dispatch op and create block arguments Block *block = rewriter.createBlock(&dispatch.getRegion()); @@ -246,9 +227,6 @@ class TTIRGenericRegionRewriter // Create a generic op. auto [indexingMaps, iteratorTypes] = op.getIndexingMaps(rewriter); - auto constraints = rewriter.getArrayAttr(SmallVector( - op->getNumOperands(), rewriter.getAttr( - OperandConstraint::AnyDeviceTile))); // For testing purposes try getting grid of the resulting tensor and put the // op in the grid. @@ -264,7 +242,7 @@ class TTIRGenericRegionRewriter auto genericOp = rewriter.create( op.getLoc(), op->getResults().getTypes(), dps.getDpsInputs(), ValueRange() /* cbs */, dps.getDpsInits(), gridAttr, indexingMaps, - iteratorTypes, constraints); + iteratorTypes); // Create a new basic block for the generic op and create block arguments. Block *block = rewriter.createBlock(&genericOp.getRegion()); @@ -439,14 +417,8 @@ class TTIRGenericOpCBsRewriter : public OpRewritePattern { SmallVector cbValues; SmallVector operandCBMapping; - SmallVector oldConstraints; - SmallVector cbConstraints; - size_t i = 0; for (auto operand : generic->getOperands()) { - size_t operandIdx = i++; - oldConstraints.push_back(generic.getOperandConstraints()[operandIdx]); - auto ty = mlir::cast(operand.getType()); // Enforcing tiled layout as in kernel we always want to work with tiles. @@ -472,42 +444,14 @@ class TTIRGenericOpCBsRewriter : public OpRewritePattern { generic->getLoc(), ty.getShape(), ty.getElementType(), desiredLayout); cbValues.push_back(emptyOp.getResult()); operandCBMapping.push_back(cbValues.size() - 1); - - // Inheriting constraints from the original operand. - // OperandConstraint inherittedConstraint = - // mlir::cast( - // generic.getOperandConstraints()[operandIdx]) - // .getValue(); - // inherittedConstraint = - // bitEnumSet(inherittedConstraint, OperandConstraint::L1); - // inherittedConstraint = - // bitEnumClear(inherittedConstraint, OperandConstraint::DRAM); - // inherittedConstraint = - // bitEnumClear(inherittedConstraint, OperandConstraint::System); - - // Fixing constraint to L1 for the CB operand. - // TODO(rpavlovic) remove or use code above when we decide on the operand - // constraints model. - cbConstraints.push_back( - rewriter.getAttr(OperandConstraint::L1)); } - SmallVector combinedConstraints; - combinedConstraints.append(oldConstraints.begin(), - oldConstraints.begin() + - generic.getInputs().size()); - combinedConstraints.append(cbConstraints.begin(), cbConstraints.end()); - combinedConstraints.append(oldConstraints.begin() + - generic.getInputs().size(), - oldConstraints.end()); - auto newConstraintsArray = rewriter.getArrayAttr(combinedConstraints); - rewriter.setInsertionPointAfter(generic); auto newGenericOp = rewriter.create( generic->getLoc(), generic.getResultTypes(), generic.getInputs(), cbValues, generic.getOutputs(), generic.getGrid(), generic.getIndexingMaps(), generic.getIteratorTypes(), - newConstraintsArray, operandCBMapping); + operandCBMapping); auto &oldRegion = generic.getRegion(); newGenericOp->getRegion(0).takeBody(oldRegion); diff --git a/lib/Dialect/TTIR/Transforms/Layout.cpp b/lib/Dialect/TTIR/Transforms/Layout.cpp index eca9747304..3dc87f03a2 100644 --- a/lib/Dialect/TTIR/Transforms/Layout.cpp +++ b/lib/Dialect/TTIR/Transforms/Layout.cpp @@ -3,7 +3,7 @@ // SPDX-License-Identifier: Apache-2.0 #include "ttmlir/Dialect/TT/IR/TT.h" -#include "ttmlir/Dialect/TT/Utils/OperandConstraints.h" +#include "ttmlir/Dialect/TTIR/IR/TTIROps.h" #include "ttmlir/Dialect/TTIR/Transforms/Passes.h" #include "mlir/Dialect/Func/IR/FuncOps.h" diff --git a/lib/Dialect/TTNN/Transforms/TTNNLayout.cpp b/lib/Dialect/TTNN/Transforms/TTNNLayout.cpp index 80b76d6d45..e148b575fb 100644 --- a/lib/Dialect/TTNN/Transforms/TTNNLayout.cpp +++ b/lib/Dialect/TTNN/Transforms/TTNNLayout.cpp @@ -2,7 +2,8 @@ // // SPDX-License-Identifier: Apache-2.0 -#include "ttmlir/Dialect/TT/Utils/OperandConstraints.h" +#include "ttmlir/Dialect/TT/IR/TT.h" +#include "ttmlir/Dialect/TTIR/IR/TTIROps.h" #include "ttmlir/Dialect/TTNN/Transforms/Passes.h" #include "mlir/Dialect/Func/IR/FuncOps.h" diff --git a/python/TTModule.cpp b/python/TTModule.cpp index 9b4bad5873..f64630bf19 100644 --- a/python/TTModule.cpp +++ b/python/TTModule.cpp @@ -397,24 +397,6 @@ void populateTTModule(py::module &m) { return static_cast(self.getValue()); }); - tt_attribute_class(m, "OperandConstraintAttr") - .def_static("get", - [](MlirContext ctx, uint32_t operandConstraint) { - return wrap(tt::OperandConstraintAttr::get( - unwrap(ctx), - static_cast(operandConstraint))); - }) - .def_static( - "get", - [](MlirContext ctx, std::vector attributesArray) { - return ::ttmlir::utils::wrapArrayOfMlirAttributesAsAttribute( - ctx, attributesArray); - }) - .def_property_readonly("operand_constraint_as_int", - [](tt::OperandConstraintAttr self) { - return static_cast(self.getValue()); - }); - tt_type_class(m, "DeviceType") .def_static( "get", diff --git a/test/python/smoketest.py b/test/python/smoketest.py index dfc324e381..88d871101e 100644 --- a/test/python/smoketest.py +++ b/test/python/smoketest.py @@ -12,7 +12,6 @@ module = Module.parse( """ - #any_device = #tt.operand_constraint %0 = tensor.empty() : tensor<64x128xf32> %1 = tensor.empty() : tensor<64x128xf32> %2 = tensor.empty() : tensor<64x128xf32> diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/binary/divide_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/binary/divide_op.mlir index 9505428888..bd8a698c7f 100644 --- a/test/ttmlir/Conversion/StableHLOToTTIR/binary/divide_op.mlir +++ b/test/ttmlir/Conversion/StableHLOToTTIR/binary/divide_op.mlir @@ -1,6 +1,5 @@ // REQUIRES: stablehlo // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s -#any_device = #tt.operand_constraint module @jit_eltwise_divice attributes {} { func.func public @test_divide(%arg0: tensor<13x21x3xf32>, %arg1: tensor<13x21x3xf32>) -> tensor<13x21x3xf32> { %0 = stablehlo.divide %arg0, %arg1 : tensor<13x21x3xf32> diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/binary/multiply_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/binary/multiply_op.mlir index 390fc156b4..63d8773bd7 100644 --- a/test/ttmlir/Conversion/StableHLOToTTIR/binary/multiply_op.mlir +++ b/test/ttmlir/Conversion/StableHLOToTTIR/binary/multiply_op.mlir @@ -1,6 +1,5 @@ // REQUIRES: stablehlo // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s -#any_device = #tt.operand_constraint module @jit_eltwise_multiply attributes {} { func.func public @test_multiply(%arg0: tensor<13x21x3xf32>, %arg1: tensor<13x21x3xf32>) -> tensor<13x21x3xf32> { %0 = stablehlo.multiply %arg0, %arg1 : tensor<13x21x3xf32> diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/binary/remainder_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/binary/remainder_op.mlir index bbca3a3f99..1d4a6a20c8 100644 --- a/test/ttmlir/Conversion/StableHLOToTTIR/binary/remainder_op.mlir +++ b/test/ttmlir/Conversion/StableHLOToTTIR/binary/remainder_op.mlir @@ -1,6 +1,5 @@ // REQUIRES: stablehlo // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s -#any_device = #tt.operand_constraint module @jit_eltwise_remainder attributes {} { func.func public @test_remainder(%arg0: tensor<32x32xf32>, %arg1: tensor<32x32xf32>) -> tensor<32x32xf32> { %0 = stablehlo.remainder %arg0, %arg1 : tensor<32x32xf32> diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/binary/subtract_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/binary/subtract_op.mlir index 1be903064c..4003dbf73f 100644 --- a/test/ttmlir/Conversion/StableHLOToTTIR/binary/subtract_op.mlir +++ b/test/ttmlir/Conversion/StableHLOToTTIR/binary/subtract_op.mlir @@ -1,6 +1,5 @@ // REQUIRES: stablehlo // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s -#any_device = #tt.operand_constraint module @jit_eltwise_subtract attributes {} { func.func public @test_subtract(%arg0: tensor<13x21x3xf32>, %arg1: tensor<13x21x3xf32>) -> tensor<13x21x3xf32> { %0 = stablehlo.subtract %arg0, %arg1 : tensor<13x21x3xf32> diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/convert_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/convert_op.mlir index 467325566a..ee2862cadd 100644 --- a/test/ttmlir/Conversion/StableHLOToTTIR/convert_op.mlir +++ b/test/ttmlir/Conversion/StableHLOToTTIR/convert_op.mlir @@ -1,6 +1,5 @@ // REQUIRES: stablehlo // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s -#any_device = #tt.operand_constraint module @jit_eltwise_convert attributes {} { func.func public @test_convert(%arg0: tensor<2x4xf32>) -> tensor<2x4xbf16> { %0 = stablehlo.convert %arg0 : (tensor<2x4xf32>) -> tensor<2x4xbf16> diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/dynamic_iota_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/dynamic_iota_op.mlir index 43241ac6f0..f92517600a 100644 --- a/test/ttmlir/Conversion/StableHLOToTTIR/dynamic_iota_op.mlir +++ b/test/ttmlir/Conversion/StableHLOToTTIR/dynamic_iota_op.mlir @@ -1,6 +1,5 @@ // REQUIRES: stablehlo // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s -#any_device = #tt.operand_constraint module @jit_dnamic_iota attributes {} { func.func public @test_dynamic_iota() -> tensor<1x32x128x128xf32> { // CHECK: %[[C:.*]] = "ttir.arange"[[C:.*]] diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/floor_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/floor_op.mlir index 2df91d6c5e..90e99aca6a 100644 --- a/test/ttmlir/Conversion/StableHLOToTTIR/floor_op.mlir +++ b/test/ttmlir/Conversion/StableHLOToTTIR/floor_op.mlir @@ -1,6 +1,5 @@ // REQUIRES: stablehlo // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s -#any_device = #tt.operand_constraint module @jit_eltwise_floor attributes {} { func.func public @test_floor(%arg0: tensor<32x32x3xf32>) -> tensor<32x32x3xf32> { %0 = stablehlo.floor %arg0 : tensor<32x32x3xf32> diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/gather_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/gather_op.mlir index e80bb75886..a9b4071edf 100644 --- a/test/ttmlir/Conversion/StableHLOToTTIR/gather_op.mlir +++ b/test/ttmlir/Conversion/StableHLOToTTIR/gather_op.mlir @@ -1,6 +1,5 @@ // REQUIRES: stablehlo // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s -#any_device = #tt.operand_constraint module @jit_gather attributes {} { func.func public @test_gather_0(%operand: tensor<32000x1024xf32>, %start_indices: tensor<1x32xi32>) -> tensor<1x32x1024xf32> { %0 = "stablehlo.gather"(%operand, %start_indices) <{dimension_numbers = #stablehlo.gather, indices_are_sorted = false, slice_sizes = array}> : (tensor<32000x1024xf32>, tensor<1x32xi32>) -> tensor<1x32x1024xf32> diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/get_dimension_size_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/get_dimension_size_op.mlir index 5e2a655388..f5df438612 100644 --- a/test/ttmlir/Conversion/StableHLOToTTIR/get_dimension_size_op.mlir +++ b/test/ttmlir/Conversion/StableHLOToTTIR/get_dimension_size_op.mlir @@ -1,6 +1,5 @@ // REQUIRES: stablehlo // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s -#any_device = #tt.operand_constraint module @jit_get_dimension_size attributes {} { func.func public @test_get_dimension_size(%arg0: tensor<13x21x3xf32>) -> tensor { %0 = stablehlo.get_dimension_size %arg0, dim = 1 : (tensor<13x21x3xf32>) -> tensor diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/iota_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/iota_op.mlir index 857a621bb0..d7620cd4a6 100644 --- a/test/ttmlir/Conversion/StableHLOToTTIR/iota_op.mlir +++ b/test/ttmlir/Conversion/StableHLOToTTIR/iota_op.mlir @@ -1,6 +1,5 @@ // REQUIRES: stablehlo // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s -#any_device = #tt.operand_constraint module @jit_iota attributes {} { func.func public @test_iota() -> tensor<1x32x128x128xf32> { // CHECK: %[[C:.*]] = "ttir.arange"[[C:.*]] diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/isfinite_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/isfinite_op.mlir index bdcef7a959..5f7c3d8359 100644 --- a/test/ttmlir/Conversion/StableHLOToTTIR/isfinite_op.mlir +++ b/test/ttmlir/Conversion/StableHLOToTTIR/isfinite_op.mlir @@ -1,6 +1,5 @@ // REQUIRES: stablehlo // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s -#any_device = #tt.operand_constraint module @jit_eltwise_isfinite attributes {} { func.func public @test_isfinite(%arg0: tensor<32x32x3xf32>) -> tensor<32x32x3xi1> { // CHECK: %[[E:.*]] = tensor.empty() : tensor<32x32x3xbf16> diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/rsqrt_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/rsqrt_op.mlir index 13ceed52e9..f52bb72bf0 100644 --- a/test/ttmlir/Conversion/StableHLOToTTIR/rsqrt_op.mlir +++ b/test/ttmlir/Conversion/StableHLOToTTIR/rsqrt_op.mlir @@ -1,6 +1,5 @@ // REQUIRES: stablehlo // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s -#any_device = #tt.operand_constraint module @jit_eltwise_rsqrt attributes {} { func.func public @test_rsqrt(%arg0: tensor<13x21x3xf32>) -> tensor<13x21x3xf32> { %0 = stablehlo.rsqrt %arg0 : tensor<13x21x3xf32> diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/slice_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/slice_op.mlir index 5839f38652..c94acfb886 100644 --- a/test/ttmlir/Conversion/StableHLOToTTIR/slice_op.mlir +++ b/test/ttmlir/Conversion/StableHLOToTTIR/slice_op.mlir @@ -1,6 +1,5 @@ // REQUIRES: stablehlo // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s -#any_device = #tt.operand_constraint module @jit_eltwise_subtract attributes {} { func.func @slice_op(%arg0: tensor<32x64xf32>) -> tensor<8x8xf32> { // CHECK: %[[C:.*]] = tensor.empty[[C:.*]] diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/unary/absolute_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/unary/absolute_op.mlir index dbed57cd3b..1d43609f8f 100644 --- a/test/ttmlir/Conversion/StableHLOToTTIR/unary/absolute_op.mlir +++ b/test/ttmlir/Conversion/StableHLOToTTIR/unary/absolute_op.mlir @@ -1,6 +1,5 @@ // REQUIRES: stablehlo // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s -#any_device = #tt.operand_constraint module @jit_eltwise_abs attributes {} { func.func public @test_abs(%arg0: tensor<13x21x3xf32>) -> tensor<13x21x3xf32> { %0 = stablehlo.abs %arg0 : tensor<13x21x3xf32> diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/unary/cbrt_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/unary/cbrt_op.mlir index 49eef5df7d..e754202468 100644 --- a/test/ttmlir/Conversion/StableHLOToTTIR/unary/cbrt_op.mlir +++ b/test/ttmlir/Conversion/StableHLOToTTIR/unary/cbrt_op.mlir @@ -1,6 +1,5 @@ // REQUIRES: stablehlo // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s -#any_device = #tt.operand_constraint module @jit_eltwise_rsqrt attributes {} { func.func public @test_cbrt(%arg0: tensor<4xf64>) -> tensor<4xf64> { %0 = stablehlo.cbrt %arg0 : tensor<4xf64> diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/unary/exponential_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/unary/exponential_op.mlir index 6b93c664c7..975652f49b 100644 --- a/test/ttmlir/Conversion/StableHLOToTTIR/unary/exponential_op.mlir +++ b/test/ttmlir/Conversion/StableHLOToTTIR/unary/exponential_op.mlir @@ -1,6 +1,5 @@ // REQUIRES: stablehlo // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s -#any_device = #tt.operand_constraint module @jit_eltwise_exp attributes {} { func.func public @test_exp(%arg0: tensor<13x21x3xf32>) -> tensor<13x21x3xf32> { %0 = stablehlo.exponential %arg0 : tensor<13x21x3xf32> diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/unary/negate_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/unary/negate_op.mlir index 65f491c038..68cfed1e9e 100644 --- a/test/ttmlir/Conversion/StableHLOToTTIR/unary/negate_op.mlir +++ b/test/ttmlir/Conversion/StableHLOToTTIR/unary/negate_op.mlir @@ -1,6 +1,5 @@ // REQUIRES: stablehlo // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s -#any_device = #tt.operand_constraint module @jit_eltwise_neg attributes {} { func.func public @test_neg(%arg0: tensor<13x21x3xf32>) -> tensor<13x21x3xf32> { %0 = stablehlo.negate %arg0 : tensor<13x21x3xf32> diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/unary/sqrt_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/unary/sqrt_op.mlir index 72d2449b05..99023e472b 100644 --- a/test/ttmlir/Conversion/StableHLOToTTIR/unary/sqrt_op.mlir +++ b/test/ttmlir/Conversion/StableHLOToTTIR/unary/sqrt_op.mlir @@ -1,6 +1,5 @@ // REQUIRES: stablehlo // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s -#any_device = #tt.operand_constraint module @jit_eltwise_sqrt attributes {} { func.func public @test_sqrt(%arg0: tensor<13x21x3xf32>) -> tensor<13x21x3xf32> { %0 = stablehlo.sqrt %arg0 : tensor<13x21x3xf32> diff --git a/test/ttmlir/Dialect/TTIR/Decomposition/arange_decomposition.mlir b/test/ttmlir/Dialect/TTIR/Decomposition/arange_decomposition.mlir index 6f72e56f17..6e5ef160c3 100644 --- a/test/ttmlir/Dialect/TTIR/Decomposition/arange_decomposition.mlir +++ b/test/ttmlir/Dialect/TTIR/Decomposition/arange_decomposition.mlir @@ -1,5 +1,4 @@ // RUN: ttmlir-opt --ttir-to-ttir-decomposition %s | FileCheck %s -#any_device = #tt.operand_constraint module attributes {} { func.func @forward(%arg0: tensor<1x32x128x128xf32>) -> tensor<1x32x128x128xf32> { // CHECK: %[[C:.*]] = "ttir.arange"[[C:.*]] diff --git a/test/ttmlir/Dialect/TTIR/convolution/convolution_tests_negative.mlir b/test/ttmlir/Dialect/TTIR/convolution/convolution_tests_negative.mlir index 278bb9f211..6f960f0dda 100644 --- a/test/ttmlir/Dialect/TTIR/convolution/convolution_tests_negative.mlir +++ b/test/ttmlir/Dialect/TTIR/convolution/convolution_tests_negative.mlir @@ -28,7 +28,6 @@ module @jit_convolution_bad_spatial_dimensions { } // ----- -#any_device_tile = #tt.operand_constraint module @jit_convolution_bad_stride_dimensions { func.func public @test_illegal_convolution(%arg0: tensor<1x3x100x100xbf16>, %arg1: tensor<7x3x3x3xbf16>) -> tensor<1x7x100x100xbf16> { %0 = tensor.empty() : tensor<1x7x100x100xbf16> @@ -58,7 +57,6 @@ module @jit_convolution_bad_stride_dimensions { } // ----- -#any_device_tile = #tt.operand_constraint module @jit_convolution_bad_input_tensor { func.func public @test_illegal_convolution(%arg0: tensor<1x3x100x100x100xbf16>, %arg1: tensor<7x3x3x3xbf16>) -> tensor<1x7x100x100xbf16> { %0 = tensor.empty() : tensor<1x7x100x100xbf16> @@ -88,7 +86,6 @@ module @jit_convolution_bad_input_tensor { } // ----- -#any_device_tile = #tt.operand_constraint module @jit_convolution_bad_weight_tensor { func.func public @test_illegal_convolution(%arg0: tensor<1x3x100x100xbf16>, %arg1: tensor<20x7x3x3x3xbf16>) -> tensor<1x7x100x100xbf16> { %0 = tensor.empty() : tensor<1x7x100x100xbf16> @@ -118,7 +115,6 @@ module @jit_convolution_bad_weight_tensor { } // ----- -#any_device_tile = #tt.operand_constraint module @jit_convolution_bad_bias_tensor { func.func public @test_illegal_convolution(%arg0: tensor<1x3x100x100xbf16>, %arg1: tensor<7x3x3x3xbf16>, %arg2: tensor<1x1x7xbf16>) -> tensor<1x7x100x100xbf16> { %0 = tensor.empty() : tensor<1x7x100x100xbf16> diff --git a/test/ttmlir/Dialect/TTIR/index/index_tests_negative.mlir b/test/ttmlir/Dialect/TTIR/index/index_tests_negative.mlir index 9f5d8b04ae..6b1d5faee3 100644 --- a/test/ttmlir/Dialect/TTIR/index/index_tests_negative.mlir +++ b/test/ttmlir/Dialect/TTIR/index/index_tests_negative.mlir @@ -13,7 +13,6 @@ module attributes {} { // Verify that the parsing fails if the dim is not in the rank range of the input tensor // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @index_negative_invalid_begins(%arg0: tensor<3x128x64xbf16>) -> tensor<3x128x64xbf16> { %0 = tensor.empty() : tensor<3x128x64xbf16> @@ -25,7 +24,6 @@ module attributes {} { // Verify that the parsing fails if the output type is not equal to the input tensor type // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @index_negative_invalid_output_datatype(%arg0: tensor<3x128x64xbf16>) -> tensor<3x128x32xf32> { %0 = tensor.empty() : tensor<3x128x32xf32> @@ -37,7 +35,6 @@ module attributes {} { // Verify that the parsing fails if the output rank is not equal to the input tensor rank // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @index_negative_input_output_rank_missmatch(%arg0: tensor<3x128x64xbf16>) -> tensor<3x64x64x1xbf16> { %0 = tensor.empty() : tensor<3x64x64x1xbf16> @@ -49,7 +46,6 @@ module attributes {} { // Verify that the parsing fails if the begin value exceeds positive limit // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @index_negative_invalid_begin_positive(%arg0: tensor<10x3x128x64xbf16>) -> tensor<10x1x128x64xbf16> { %0 = tensor.empty() : tensor<10x1x128x64xbf16> @@ -61,7 +57,6 @@ module attributes {} { // Verify that the parsing fails if the begin value exceeds negative limit // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @index_negative_invalid_begin_negative(%arg0: tensor<10x3x128x64xbf16>) -> tensor<10x3x64x64xbf16> { %0 = tensor.empty() : tensor<10x3x64x64xbf16> @@ -73,7 +68,6 @@ module attributes {} { // Verify that the parsing fails if the end value exceeds positive limit // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @index_negative_invalid_end_positive(%arg0: tensor<10x3x128x64xbf16>) -> tensor<10x3x128x64xbf16> { %0 = tensor.empty() : tensor<10x3x128x64xbf16> @@ -85,7 +79,6 @@ module attributes {} { // Verify that the parsing fails if the end value exceeds positive limit // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @index_negative_invalid_end_negative(%arg0: tensor<10x3x128x64xbf16>) -> tensor<10x3x128x64xbf16> { %0 = tensor.empty() : tensor<10x3x128x64xbf16> @@ -97,7 +90,6 @@ module attributes {} { // Verify that the parsing fails if the step value is equal to zero // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @index_negative_step_is_zero(%arg0: tensor<10x3x128x64xbf16>) -> tensor<10x3x128x64xbf16> { %0 = tensor.empty() : tensor<10x3x128x64xbf16> @@ -109,7 +101,6 @@ module attributes {} { // Verify that the parsing fails if the begin index is greater than end and step is positive // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @index_negative_begin_greater_than_end_positive_step(%arg0: tensor<10x3x128x64xbf16>) -> tensor<10x3x128x64xbf16> { %0 = tensor.empty() : tensor<10x3x128x64xbf16> @@ -121,7 +112,6 @@ module attributes {} { // Verify that the parsing fails if the end index is greater than begin and step is negative // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @index_negative_begin_less_than_end_negative_step(%arg0: tensor<10x3x128x64xbf16>) -> tensor<10x3x128x64xbf16> { %0 = tensor.empty() : tensor<10x3x128x64xbf16> @@ -133,7 +123,6 @@ module attributes {} { // Verify that the parsing fails if there is missmatch in output dimension // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @index_negative_invalid_output_shape(%arg0: tensor<10x3x128x64xbf16>) -> tensor<10x3x128x32xbf16> { %0 = tensor.empty() : tensor<10x3x128x32xbf16> diff --git a/test/ttmlir/Dialect/TTIR/matmul/matmul_tests_negative.mlir b/test/ttmlir/Dialect/TTIR/matmul/matmul_tests_negative.mlir index f15379c8f7..d6c20b0ac5 100644 --- a/test/ttmlir/Dialect/TTIR/matmul/matmul_tests_negative.mlir +++ b/test/ttmlir/Dialect/TTIR/matmul/matmul_tests_negative.mlir @@ -12,7 +12,6 @@ module attributes {} { } // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @matmul_negative_1d_1d_inner_dimension_missmatch(%arg0: tensor<128xbf16>, %arg1: tensor) -> tensor<1xbf16> { // CHECK: error: 'ttir.matmul' op Input B must be at least a 1D tensor @@ -24,7 +23,6 @@ module attributes {} { // Verify that the parsing fails if the output is a scalar // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @matmul_negative_1d_1d_inner_dimension_missmatch(%arg0: tensor<128xbf16>, %arg1: tensor<128xbf16>) -> tensor { // CHECK: error: 'ttir.matmul' op Scalar output is not supported, output must be at least a 1D tensor @@ -35,7 +33,6 @@ module attributes {} { } // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @matmul_negative_1d_1d_inner_dimension_missmatch(%arg0: tensor<128xbf16>, %arg1: tensor<128xbf16>) -> tensor<2xbf16> { // CHECK: error: 'ttir.matmul' op Scalar output must be a 1D tensor of size 1 @@ -47,7 +44,6 @@ module attributes {} { // Inner dimension mismatch tests // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @matmul_negative_1d_1d_inner_dimension_missmatch(%arg0: tensor<128xbf16>, %arg1: tensor<64xbf16>) -> tensor<1xbf16> { // CHECK: error: 'ttir.matmul' op Input A[-1](128) and B[-2](64) must have matching inner dimensions @@ -58,7 +54,6 @@ module attributes {} { } // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @matmul_negative_1d_2d_inner_dimension_missmatch(%arg0: tensor<64xbf16>, %arg1: tensor<128x64xbf16>) -> tensor<64xbf16> { // CHECK: error: 'ttir.matmul' op Input A[-1](64) and B[-2](128) must have matching inner dimensions @@ -69,7 +64,6 @@ func.func @matmul_negative_1d_2d_inner_dimension_missmatch(%arg0: tensor<64xbf16 } // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @matmul_negative_2d_1d_inner_dimension_missmatch(%arg0: tensor<64x128xbf16>, %arg1: tensor<64xbf16>) -> tensor<64xbf16> { // CHECK: error: 'ttir.matmul' op Input A[-1](128) and B[-2](64) must have matching inner dimensions @@ -80,7 +74,6 @@ module attributes {} { } // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @matmul_negative_2d_2d_inner_dimension_missmatch(%arg0: tensor<64x128xbf16>, %arg1: tensor<64x128xbf16>) -> tensor<64x64xbf16> { // CHECK: error: 'ttir.matmul' op Input A[-1](128) and B[-2](64) must have matching inner dimensions @@ -91,7 +84,6 @@ module attributes {} { } // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @matmul_negative_nd_nd_inner_dimension_missmatch(%arg0: tensor<7x64x128xbf16>, %arg1: tensor<1x64x128xbf16>) -> tensor<7x64x64xbf16> { // CHECK: error: 'ttir.matmul' op Input A[-1](128) and B[-2](64) must have matching inner dimensions @@ -103,7 +95,6 @@ module attributes {} { // Batch dimension mismatch tests // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @matmul_negative_nd_nd_same_rank_batch_broadcast_incompatible_1(%arg0: tensor<7x64x128xbf16>, %arg1: tensor<2x128x64xbf16>) -> tensor<7x64x64xbf16> { // CHECK: error: 'ttir.matmul' op Batch dimensions of input A(7) and B(2) are not broadcast compatible @@ -114,7 +105,6 @@ module attributes {} { } // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @matmul_negative_nd_nd_same_rank_batch_broadcast_incompatible_2(%arg0: tensor<2x7x64x128xbf16>, %arg1: tensor<7x1x128x64xbf16>) -> tensor<7x7x64x64xbf16> { // CHECK: error: 'ttir.matmul' op Batch dimensions of input A(2,7) and B(7,1) are not broadcast compatible @@ -125,7 +115,6 @@ module attributes {} { } // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @matmul_negative_nd_nd_different_rank_batch_broadcast_incompatible(%arg0: tensor<12x2x7x64x128xbf16>, %arg1: tensor<7x1x128x64xbf16>) -> tensor<12x7x7x64x64xbf16> { // CHECK: error: 'ttir.matmul' op Batch dimensions of input A(12,2,7) and B(7,1) are not broadcast compatible @@ -137,7 +126,6 @@ module attributes {} { // Output shape mismatch tests // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @matmul_negative_2d_2d_inner_dimension_missmatch(%arg0: tensor<64x128xbf16>, %arg1: tensor<128x64xbf16>) -> tensor<64xbf16> { // CHECK: error: 'ttir.matmul' op Output shape rank(1) must match the expected output shape rank(2) @@ -148,7 +136,6 @@ module attributes {} { } // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @matmul_negative_2d_2d_inner_dimension_missmatch(%arg0: tensor<64x128xbf16>, %arg1: tensor<128x64xbf16>) -> tensor<64x128xbf16> { // CHECK: error: 'ttir.matmul' op Output shape dimension[1](128) doesn't match the expected output shape dimension[1](64) diff --git a/test/ttmlir/Dialect/TTIR/slice/slice_tests_negative.mlir b/test/ttmlir/Dialect/TTIR/slice/slice_tests_negative.mlir index db444258ec..5e7eaf43c6 100644 --- a/test/ttmlir/Dialect/TTIR/slice/slice_tests_negative.mlir +++ b/test/ttmlir/Dialect/TTIR/slice/slice_tests_negative.mlir @@ -13,7 +13,6 @@ module attributes {} { // Verify that the parsing fails if the begins size is not equal to the input tensor rank // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @slice_negative_invalid_begins(%arg0: tensor<3x128x64xbf16>) -> tensor<1x64x64xbf16> { %0 = tensor.empty() : tensor<1x64x64xbf16> @@ -25,7 +24,6 @@ module attributes {} { // Verify that the parsing fails if the ends size is not equal to the input tensor rank // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @slice_negative_invalid_ends(%arg0: tensor<3x128x64xbf16>) -> tensor<1x64x64xbf16> { %0 = tensor.empty() : tensor<1x64x64xbf16> @@ -37,7 +35,6 @@ module attributes {} { // Verify that the parsing fails if the step size is not equal to the input tensor rank // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @slice_negative_invalid_step(%arg0: tensor<3x128x64xbf16>) -> tensor<1x64x64xbf16> { %0 = tensor.empty() : tensor<1x64x64xbf16> @@ -49,7 +46,6 @@ module attributes {} { // Verify that the parsing fails if the output type is not equal to the input tensor type // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @slice_negative_invalid_output_datatype(%arg0: tensor<3x128x64xbf16>) -> tensor<1x64x64xf32> { %0 = tensor.empty() : tensor<1x64x64xf32> @@ -61,7 +57,6 @@ module attributes {} { // Verify that the parsing fails if the output rank is not equal to the input tensor rank // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @slice_negative_input_output_rank_missmatch(%arg0: tensor<3x128x64xbf16>) -> tensor<1x1x64x64xbf16> { %0 = tensor.empty() : tensor<1x1x64x64xbf16> @@ -73,7 +68,6 @@ module attributes {} { // Verify that the parsing fails if the begin value exceeds positive limit // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @slice_negative_invalid_begin_positive(%arg0: tensor<10x3x128x64xbf16>) -> tensor<4x1x16x8xbf16> { %0 = tensor.empty() : tensor<4x1x16x8xbf16> @@ -85,7 +79,6 @@ module attributes {} { // Verify that the parsing fails if the begin value exceeds negative limit // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @slice_negative_invalid_begin_negative(%arg0: tensor<10x3x128x64xbf16>) -> tensor<4x1x16x8xbf16> { %0 = tensor.empty() : tensor<4x1x16x8xbf16> @@ -97,7 +90,6 @@ module attributes {} { // Verify that the parsing fails if the end value exceeds positive limit // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @slice_negative_invalid_end_positive(%arg0: tensor<10x3x128x64xbf16>) -> tensor<4x1x16x8xbf16> { %0 = tensor.empty() : tensor<4x1x16x8xbf16> @@ -109,7 +101,6 @@ module attributes {} { // Verify that the parsing fails if the end value exceeds negative limit // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @slice_negative_invalid_end_negative(%arg0: tensor<10x3x128x64xbf16>) -> tensor<4x1x16x8xbf16> { %0 = tensor.empty() : tensor<4x1x16x8xbf16> @@ -121,7 +112,6 @@ module attributes {} { // Verify that the parsing fails if the step value is equal to zero // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @slice_negative_step_is_zero(%arg0: tensor<10x3x128x64xbf16>) -> tensor<4x1x16x8xbf16> { %0 = tensor.empty() : tensor<4x1x16x8xbf16> @@ -133,7 +123,6 @@ module attributes {} { // Verify that the parsing fails if the begin index is greater than end and step is positive // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @slice_negative_begin_greater_than_end_positive_step(%arg0: tensor<10x3x128x64xbf16>) -> tensor<4x1x16x8xbf16> { %0 = tensor.empty() : tensor<4x1x16x8xbf16> @@ -144,7 +133,6 @@ module attributes {} { } // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @slice_negative_begin_greater_than_end_positive_step(%arg0: tensor<10x3x128x64xbf16>) -> tensor<4x1x8x8xbf16> { %0 = tensor.empty() : tensor<4x1x8x8xbf16> @@ -156,7 +144,6 @@ module attributes {} { // Verify that the parsing fails if the end index is greater than begin and step is negative // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @slice_negative_begin_less_than_end_negative_step(%arg0: tensor<10x3x128x64xbf16>) -> tensor<4x1x16x8xbf16> { %0 = tensor.empty() : tensor<4x1x16x8xbf16> @@ -167,7 +154,6 @@ module attributes {} { } // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @slice_negative_begin_less_than_end_negative_step(%arg0: tensor<10x3x128x64xbf16>) -> tensor<5x1x16x8xbf16> { %0 = tensor.empty() : tensor<5x1x16x8xbf16> @@ -179,7 +165,6 @@ module attributes {} { // Verify that the parsing fails if there is missmatch in output dimension // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @slice_negative_invalid_output_shape(%arg0: tensor<10x3x128x64xbf16>) -> tensor<4x1x16x16xbf16> { %0 = tensor.empty() : tensor<4x1x16x16xbf16> diff --git a/test/ttmlir/Dialect/TTNN/ccl/all_reduce.mlir b/test/ttmlir/Dialect/TTNN/ccl/all_reduce.mlir index 1eaf04df7f..5fa77776be 100644 --- a/test/ttmlir/Dialect/TTNN/ccl/all_reduce.mlir +++ b/test/ttmlir/Dialect/TTNN/ccl/all_reduce.mlir @@ -17,7 +17,6 @@ module attributes {} { // ----- // Verify lowering of ttir all_reduce to ttnn ops -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @all_reduce(%arg0: tensor<1x1x4096x16384xf32>) -> tensor<1x1x4096x16384xf32> { %0 = tensor.empty() : tensor<1x1x4096x16384xf32> diff --git a/test/ttmlir/Dialect/TTNN/clamp/clamp_tests_negative.mlir b/test/ttmlir/Dialect/TTNN/clamp/clamp_tests_negative.mlir index f97dde2ba1..67eb3d2116 100644 --- a/test/ttmlir/Dialect/TTNN/clamp/clamp_tests_negative.mlir +++ b/test/ttmlir/Dialect/TTNN/clamp/clamp_tests_negative.mlir @@ -2,7 +2,6 @@ // Negative tests for matmul operation // Verify that the parsing fails if input and output shapes do not match. -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @clamp(%arg0: tensor<64x64xbf16>) -> tensor<64x128xbf16> { // CHECK: error: 'ttnn.clamp' op input and output must have same shape. @@ -14,7 +13,6 @@ module attributes {} { // Verify that parsing fails in case of more than one input. // ----- -#any_device_tile = #tt.operand_constraint module attributes {} { func.func @clamp2(%arg0: tensor<64x128xbf16>, %arg1: tensor<64x128xbf16>) -> tensor<64x128xbf16> { // CHECK: error: 'ttnn.clamp' op expects one tensor as input. diff --git a/test/ttmlir/Silicon/TTNN/ones.mlir b/test/ttmlir/Silicon/TTNN/ones.mlir index 249530700b..660de36ae1 100644 --- a/test/ttmlir/Silicon/TTNN/ones.mlir +++ b/test/ttmlir/Silicon/TTNN/ones.mlir @@ -2,7 +2,6 @@ // RUN: FileCheck %s --input-file=%t.mlir // RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn -#any_device = #tt.operand_constraint module { func.func @ones_2d() -> tensor<32x128xbf16> { // CHECK: {{.*}} = "ttnn.ones"() {{.*}} From bc65050dce59ef1c4978bb80372a6bd9c8967cfb Mon Sep 17 00:00:00 2001 From: Ognjen Djuricic <160603639+odjuricicTT@users.noreply.github.com> Date: Mon, 16 Dec 2024 15:54:32 +0100 Subject: [PATCH 04/10] Fix codeowners file (#1580) Every new line in CODEOWNERS overrides all previous owners so child directories must always come after parents. --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index f49354540e..8e23597a4e 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -28,5 +28,5 @@ /test/ttmlir/Dialect/TTNN/optimizer/ @nobradovictt @odjuricicTT /test/ttmlir/Silicon/TTNN/optimizer/ @nobradovictt @odjuricicTT /test/unittests/Optimizer @nobradovictt @odjuricicTT -/tools/explorer/ @odjuricicTT @nobradovictt @vprajapati-tt /tools/ @svuckovicTT @mtopalovicTT +/tools/explorer/ @odjuricicTT @nobradovictt @vprajapati-tt From fa8ea652c0332fb2d0d4fcba3cc5a86f17c11a4f Mon Sep 17 00:00:00 2001 From: Vladimir Milosevic <157983820+vmilosevic@users.noreply.github.com> Date: Mon, 16 Dec 2024 18:05:03 +0100 Subject: [PATCH 05/10] Uplift third_party/tt-metal to 6d7cc2c38ae3efc36326f5407698b05fb314b5b0 2024-12-16 (#1601) This PR uplifts the third_party/tt-metal to the 6d7cc2c38ae3efc36326f5407698b05fb314b5b0 Co-authored-by: Kyle Mabee --- third_party/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/CMakeLists.txt b/third_party/CMakeLists.txt index 0eaec5efc0..e49dc0d8c9 100644 --- a/third_party/CMakeLists.txt +++ b/third_party/CMakeLists.txt @@ -1,6 +1,6 @@ include(ExternalProject) -set(TT_METAL_VERSION "5f821d46d6726e1fcf06b0802e2b40aae0af9e38") +set(TT_METAL_VERSION "388e187b27f8924ea09512719f666407c40f6bd9") if ("$ENV{ARCH_NAME}" STREQUAL "grayskull") set(ARCH_NAME "grayskull") From 593e0d88325eeb551c353506bca8b3c6d3319995 Mon Sep 17 00:00:00 2001 From: Sasa Vuckovic <134393361+svuckovicTT@users.noreply.github.com> Date: Mon, 16 Dec 2024 20:19:14 +0100 Subject: [PATCH 06/10] Add ModifySignaturesForDylib pass (#1595) --- include/ttmlir/Dialect/TT/IR/TTOps.h | 1 + include/ttmlir/Dialect/TT/IR/TTOps.td | 25 +++++ include/ttmlir/Dialect/TT/IR/TTOpsTypes.td | 8 ++ .../ttmlir/Dialect/TTNN/Transforms/Passes.td | 32 ++++++ lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp | 49 +++++++++- .../TTNNToEmitC/TTNNToEmitCPass.cpp | 4 + lib/Dialect/TT/IR/TTOps.cpp | 25 +++++ lib/Dialect/TTNN/Transforms/Passes.cpp | 98 ++++++++++++++++++- .../ttnn_modify_signatures_for_dylib_0.mlir | 12 +++ 9 files changed, 249 insertions(+), 5 deletions(-) create mode 100644 test/ttmlir/Dialect/TTNN/Transforms/ttnn_modify_signatures_for_dylib_0.mlir diff --git a/include/ttmlir/Dialect/TT/IR/TTOps.h b/include/ttmlir/Dialect/TT/IR/TTOps.h index 047fc2a3c4..16219c04e3 100644 --- a/include/ttmlir/Dialect/TT/IR/TTOps.h +++ b/include/ttmlir/Dialect/TT/IR/TTOps.h @@ -9,6 +9,7 @@ #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/OpDefinition.h" +#include "mlir/IR/TypeUtilities.h" #include "mlir/Interfaces/ControlFlowInterfaces.h" #include "mlir/Interfaces/DestinationStyleOpInterface.h" #include "mlir/Interfaces/InferTypeOpInterface.h" diff --git a/include/ttmlir/Dialect/TT/IR/TTOps.td b/include/ttmlir/Dialect/TT/IR/TTOps.td index d3b34fff88..9078028ab1 100644 --- a/include/ttmlir/Dialect/TT/IR/TTOps.td +++ b/include/ttmlir/Dialect/TT/IR/TTOps.td @@ -6,5 +6,30 @@ #define TTMLIR_TTMLIR_TTOPS_TD include "ttmlir/Dialect/TT/IR/TTOpsTypes.td" +include "mlir/Interfaces/InferTypeOpInterface.td" +include "mlir/Interfaces/SideEffectInterfaces.td" +include "mlir/IR/CommonTypeConstraints.td" + +def TT_GetTupleElementOp: TT_Op<"get_tuple_element", [Pure, DeclareOpInterfaceMethods]> { + let summary = "GetTupleElement operation"; + let description = [{ + Extracts element at `index` position of the `operand` tuple and produces a `result`. + + Example: + ```mlir + %result = tt.get_tuple_element %operand[0] : (tuple, tuple>>) -> tensor<32x32xbf16> + ``` + }]; + + let arguments = (ins TT_Tuple:$operand, + ConfinedAttr:$index + ); + + let results = (outs TT_TupleReturnType:$result); + + let assemblyFormat = [{ + $operand `[` $index `]` attr-dict `:` functional-type(operands, results) + }]; +} #endif diff --git a/include/ttmlir/Dialect/TT/IR/TTOpsTypes.td b/include/ttmlir/Dialect/TT/IR/TTOpsTypes.td index c690b8bca8..7472c298b7 100644 --- a/include/ttmlir/Dialect/TT/IR/TTOpsTypes.td +++ b/include/ttmlir/Dialect/TT/IR/TTOpsTypes.td @@ -488,4 +488,12 @@ def TT_Device : TT_Type<"Device", "device", []> { let assemblyFormat = "`<` $desc `>`"; } +//===----------------------------------------------------------------------===// +// Auxiliary type definitions +//===----------------------------------------------------------------------===// + +def TT_Tuple : NestedTupleOf<[AnyRankedTensor]>; + +def TT_TupleReturnType : AnyTypeOf<[AnyRankedTensor]>; + #endif diff --git a/include/ttmlir/Dialect/TTNN/Transforms/Passes.td b/include/ttmlir/Dialect/TTNN/Transforms/Passes.td index 99a9bed24f..4597db87e1 100644 --- a/include/ttmlir/Dialect/TTNN/Transforms/Passes.td +++ b/include/ttmlir/Dialect/TTNN/Transforms/Passes.td @@ -86,4 +86,36 @@ def TTNNCreateInputGenerators: Pass<"ttnn-create-input-gens", "::mlir::ModuleOp" }]; } +def TTNNModifySignaturesForDylib: Pass<"ttnn-modify-signatures-for-dylib", "::mlir::ModuleOp"> { + let summary = "Modify signatures of the functions for dylib path."; + let description = [{ + This pass is intended to be used only when the end result is a dylib! + + It modifies signatures of forward functions so that they take a canonical + form. Essentially, input tensors are packed into a tuple and then accessed + in the function body. This allows for easier interfacing with the generated + dylib as the signatures are then uniform across all forward functions. + + Given a forward function like this: + + ```mlir + func.func @add(%arg0: tensor<32x32xbf16>, %arg1: tensor<32x32xbf16>) -> tensor<32x32xbf16> { + %0 = "ttnn.add"(%arg0, %arg1) : (tensor<32x32xbf16>, tensor<32x32xbf16>) -> tensor<32x32xbf16> + return %0 : tensor<32x32xbf16> + } + ``` + + The pass will modify the signature and prepend unpacking ops like so: + + ```mlir + func.func @add(%arg0: tuple, tensor<32x32xbf16>>) -> tensor<32x32xbf16> { + %0 = tt.get_tuple_element %arg0[0] : (tuple, tensor<32x32xbf16>>) -> tensor<32x32xbf16> + %1 = tt.get_tuple_element %arg0[1] : (tuple, tensor<32x32xbf16>>) -> tensor<32x32xbf16> + %2 = "ttnn.add"(%0, %1) : (tensor<32x32xbf16>, tensor<32x32xbf16>) -> tensor<32x32xbf16> + return %2 : tensor<32x32xbf16> + } + ``` + }]; +} + #endif diff --git a/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp b/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp index b1d438b905..c1a07b5fcd 100644 --- a/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp +++ b/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp @@ -5,6 +5,7 @@ #include "ttmlir/Conversion/TTNNToEmitC/TTNNToEmitC.h" #include "ttmlir/Conversion/TTNNToEmitC/Utils.h" +#include "ttmlir/Dialect/TT/IR/TTOps.h" #include "ttmlir/Dialect/TT/IR/TTOpsDialect.h.inc" #include "ttmlir/Dialect/TTNN/IR/TTNN.h" #include "ttmlir/Dialect/TTNN/IR/TTNNOps.h" @@ -576,6 +577,42 @@ class ArithConstantOpConversionPattern } }; +class GetTupleElementOpConversionPattern + : public OpConversionPattern { + +public: + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(tt::GetTupleElementOp getTupleElementOp, + tt::GetTupleElementOp::Adaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + // SubscriptOp requires a Value object as index, which is created by + // invoking the emitc::LiteralOp + // + Value indexAsVal = rewriter.create( + getTupleElementOp->getLoc(), rewriter.getIndexType(), + std::to_string(adaptor.getIndex())); + + // SubscriptOp also returns an emitc::LValueType, so we wrap the OpaqueType + // with LValueType + // + emitc::LValueType lvalueReturnType = emitc::LValueType::get( + emitc::OpaqueType::get(rewriter.getContext(), "ttnn::Tensor")); + Value subscript = rewriter.create( + getTupleElementOp->getLoc(), lvalueReturnType, adaptor.getOperand(), + indexAsVal); + + // As SubscriptOp returns an LValueType, we need to convert it to an + // OpaqueType - this is done by invoking the emitc::LoadOp + // + rewriter.replaceOpWithNewOp( + getTupleElementOp, emitc::OpaqueType::get(getContext(), "ttnn::Tensor"), + subscript); + return success(); + } +}; + // Module Op conversion pattern // // This conversion pattern removes attributes from the ModuleOp. Previously, @@ -724,10 +761,6 @@ void populateTTNNToEmitCPatterns(mlir::MLIRContext *ctx, patterns.add>(typeConverter, ctx); - // Module op - // - patterns.add(typeConverter, ctx); - // KV Cache ops // patterns.add>(typeConverter, @@ -738,6 +771,14 @@ void populateTTNNToEmitCPatterns(mlir::MLIRContext *ctx, // Arith ops // patterns.add(typeConverter, ctx); + + // Module op + // + patterns.add(typeConverter, ctx); + + // Tuple ops + // + patterns.add(typeConverter, ctx); } } // namespace mlir::tt diff --git a/lib/Conversion/TTNNToEmitC/TTNNToEmitCPass.cpp b/lib/Conversion/TTNNToEmitC/TTNNToEmitCPass.cpp index bd0c9044fc..95e722d846 100644 --- a/lib/Conversion/TTNNToEmitC/TTNNToEmitCPass.cpp +++ b/lib/Conversion/TTNNToEmitC/TTNNToEmitCPass.cpp @@ -13,6 +13,7 @@ #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/Func/Transforms/FuncConversions.h" #include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/MLIRContext.h" #include "mlir/Support/LogicalResult.h" #include "mlir/Transforms/DialectConversion.h" @@ -40,6 +41,9 @@ class TTNNToEmitCTypeConverter : public TypeConverter { addConversion([ctx](mlir::TensorType type) -> emitc::OpaqueType { return emitc::OpaqueType::get(ctx, "ttnn::Tensor"); }); + addConversion([ctx](mlir::TupleType type) -> emitc::OpaqueType { + return emitc::OpaqueType::get(ctx, "std::vector"); + }); } }; diff --git a/lib/Dialect/TT/IR/TTOps.cpp b/lib/Dialect/TT/IR/TTOps.cpp index 6f15f813ef..b4f3b951d6 100644 --- a/lib/Dialect/TT/IR/TTOps.cpp +++ b/lib/Dialect/TT/IR/TTOps.cpp @@ -7,3 +7,28 @@ #define GET_OP_CLASSES #include "ttmlir/Dialect/TT/IR/TTOps.cpp.inc" + +namespace mlir::tt { + +LogicalResult GetTupleElementOp::inferReturnTypes( + MLIRContext *, std::optional location, ValueRange operands, + DictionaryAttr attributes, OpaqueProperties properties, RegionRange regions, + SmallVectorImpl &inferredReturnTypes) { + + GetTupleElementOp::Adaptor adaptor(operands, attributes, properties, regions); + + auto operandType = dyn_cast(adaptor.getOperand().getType()); + if (!operandType) { + return failure(); + } + if (adaptor.getIndex() >= static_cast(operandType.size())) { + return emitOptionalError(location, "index ", adaptor.getIndex(), + " is out of bounds of operand with size ", + operandType.size()); + } + + inferredReturnTypes.push_back(operandType.getType(adaptor.getIndex())); + return success(); +} + +} // namespace mlir::tt diff --git a/lib/Dialect/TTNN/Transforms/Passes.cpp b/lib/Dialect/TTNN/Transforms/Passes.cpp index 20172f4fd1..f35768d63d 100644 --- a/lib/Dialect/TTNN/Transforms/Passes.cpp +++ b/lib/Dialect/TTNN/Transforms/Passes.cpp @@ -4,6 +4,8 @@ #include "ttmlir/Dialect/TTNN/Transforms/Passes.h" +#include "ttmlir/Dialect/TT/IR/TTOps.h" +#include "ttmlir/Dialect/TT/IR/TTOpsTypes.h" #include "ttmlir/Dialect/TTNN/IR/TTNNOps.h" #include "ttmlir/Dialect/TTNN/IR/TTNNOpsAttrs.h" #include "ttmlir/Dialect/TTNN/IR/TTNNOpsTypes.h" @@ -12,6 +14,7 @@ #include "mlir/Analysis/Liveness.h" #include "mlir/Dialect/Arith/Utils/Utils.h" #include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/PatternMatch.h" #include "mlir/IR/TypeRange.h" @@ -24,6 +27,7 @@ namespace mlir::tt::ttnn { #define GEN_PASS_DEF_TTNNDEALLOCATE #define GEN_PASS_DEF_TTNNDECOMPOSELAYOUTS #define GEN_PASS_DEF_TTNNCREATEINPUTGENERATORS +#define GEN_PASS_DEF_TTNNMODIFYSIGNATURESFORDYLIB #include "ttmlir/Dialect/TTNN/Transforms/Passes.h.inc" class TTNNDeallocate : public impl::TTNNDeallocateBase { @@ -906,7 +910,7 @@ class TTNNCreateInputGenerators // Block *firstBlock = module.getBody(0); - // Find all the func.func ops in the module + // Find all the func.func ops in the module that are "forward" functions // SmallVector forwardFuncOps; for (mlir::Operation &op : firstBlock->getOperations()) { @@ -1065,4 +1069,96 @@ class TTNNCreateInputGenerators } }; +class TTNNModifySignaturesForDylib + : public impl::TTNNModifySignaturesForDylibBase< + TTNNModifySignaturesForDylib> { + +public: + using impl::TTNNModifySignaturesForDylibBase< + TTNNModifySignaturesForDylib>::TTNNModifySignaturesForDylibBase; + + void runOnOperation() final { + ModuleOp module = getOperation(); + IRRewriter rewriter(&getContext()); + + // Ensure that the module has a single region and a single block within that + // region + assert(module->getRegions().size() == 1); + assert(module->getRegion(0).getBlocks().size() == 1); + + // Get the first block of the region at index 0 + // + Block *firstBlock = module.getBody(0); + + // Find all the func.func ops in the module that are "forward" functions + // + SmallVector forwardFuncOps; + for (mlir::Operation &op : firstBlock->getOperations()) { + if (mlir::func::FuncOp funcOp = dyn_cast(op)) { + + // Skip functions that are called elsewhere in the IR + // + // This will skip utility functions that are used by other functions, + // only top-level "forward" functions should be considered + // + if (!funcOp->getUses().empty()) { + continue; + } + + forwardFuncOps.push_back(funcOp); + } + } + + // Iterate over all the func ops and modify the signatures + // + for (mlir::func::FuncOp forwardFuncOp : forwardFuncOps) { + // Replace the signature of the forward function so that all the tensor + // arguments are packed into a single tuple + // + mlir::FunctionType originalFuncType = forwardFuncOp.getFunctionType(); + assert( + std::all_of(originalFuncType.getInputs().begin(), + originalFuncType.getInputs().end(), + [](Type t) { return mlir::isa(t); }) && + "Expected all inputs must be of type RankedTensorType"); + mlir::TupleType inputTupleType = + mlir::TupleType::get(&getContext(), originalFuncType.getInputs()); + FunctionType tuplifiedFuncType = + originalFuncType.clone(inputTupleType, originalFuncType.getResults()); + rewriter.modifyOpInPlace(forwardFuncOp, + [&forwardFuncOp, &tuplifiedFuncType]() { + forwardFuncOp.setType(tuplifiedFuncType); + }); + + // First block of the function (often referred to as "entry block") needs + // its arguments updated as well - the args need to match the containing + // func's arguments; this is implemented here by first inserting the tuple + // as the first argument of the block, inserting GetTupleElementOp ops to + // start of the block in order to unpack tuple elements, and then + // replacing all uses of the original block arguments with the + // GetTupleElementOp results - after this it's finally safe to remove + // original block arguments as they have no live uses anymore + // + Block &entryBlock = forwardFuncOp.getBlocks().front(); + entryBlock.insertArgument(/*index=*/0u, + tuplifiedFuncType.getInputs().front(), + forwardFuncOp.getLoc()); + + rewriter.setInsertionPointToStart(&entryBlock); + for (size_t idx = 0; idx < originalFuncType.getInputs().size(); idx++) { + ::mlir::tt::GetTupleElementOp getTupleElementOp = + rewriter.create( + forwardFuncOp.getLoc(), forwardFuncOp.getArgument(0), idx); + + rewriter.replaceAllUsesWith(entryBlock.getArgument(1 + idx), + getTupleElementOp); + } + + // Erase original arguments + // + entryBlock.eraseArguments(1, originalFuncType.getInputs().size()); + } + } +}; + } // namespace mlir::tt::ttnn diff --git a/test/ttmlir/Dialect/TTNN/Transforms/ttnn_modify_signatures_for_dylib_0.mlir b/test/ttmlir/Dialect/TTNN/Transforms/ttnn_modify_signatures_for_dylib_0.mlir new file mode 100644 index 0000000000..f7cab85905 --- /dev/null +++ b/test/ttmlir/Dialect/TTNN/Transforms/ttnn_modify_signatures_for_dylib_0.mlir @@ -0,0 +1,12 @@ +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" --ttnn-modify-signatures-for-dylib %s | FileCheck %s + +module attributes {} { + // CHECK: func.func @add(%arg0: tuple<[[TENSOR_A:.*>]], [[TENSOR_B:.*>]]>) -> tensor<32x32xbf16, #ttnn_layout> { + func.func @add(%arg0: tensor<32x32xbf16>, %arg1: tensor<32x32xbf16>) -> tensor<32x32xbf16> { + // CHECK-NEXT: %0 = tt.get_tuple_element %arg0[0] : (tuple<[[TENSOR_A]], [[TENSOR_B]]>) -> [[TENSOR_A]] + // CHECK-NEXT: %1 = tt.get_tuple_element %arg0[1] : (tuple<[[TENSOR_A]], [[TENSOR_B]]>) -> [[TENSOR_B]] + %0 = tensor.empty() : tensor<32x32xbf16> + %1 = "ttir.add"(%arg0, %arg1, %0) <{operandSegmentSizes = array}> : (tensor<32x32xbf16>, tensor<32x32xbf16>, tensor<32x32xbf16>) -> tensor<32x32xbf16> + return %1 : tensor<32x32xbf16> + } +} From ba02a5d2bc3e489e97813761d593bd7bb1057ad9 Mon Sep 17 00:00:00 2001 From: Collin Tod Date: Mon, 16 Dec 2024 15:33:12 -0600 Subject: [PATCH 07/10] Fix golden location bug (#1593) This change fixes a bug introduced by the previous attempt to use real file names into ops built via python bindings. The problem stemmed from using the `Location.file` constructor, instead of manually constructing a location string and using the `Location.name` constructor instead. This is a temporary fix, and in the future, pure locations shouldn't be used as op UUIDs as they can be inherited by their decomposed ops in some circumstances, thus making them non-unique. --- python/test_infra/test_utils.py | 3 +- python/test_infra/ttir_builder.py | 77 ++++++++++++++-------- runtime/tools/python/ttrt/common/golden.py | 12 +--- test/python/golden/test_ttir_ops.py | 2 +- 4 files changed, 53 insertions(+), 41 deletions(-) diff --git a/python/test_infra/test_utils.py b/python/test_infra/test_utils.py index 09e86db975..da1957b7f6 100644 --- a/python/test_infra/test_utils.py +++ b/python/test_infra/test_utils.py @@ -4,9 +4,8 @@ import os import inspect -from typing import Callable, Dict, List, Optional +from typing import Callable, List, Optional -import torch from ttmlir.dialects import func from ttmlir.ir import * from ttmlir.passes import ( diff --git a/python/test_infra/ttir_builder.py b/python/test_infra/ttir_builder.py index 9c832d014d..471c07ca7d 100644 --- a/python/test_infra/ttir_builder.py +++ b/python/test_infra/ttir_builder.py @@ -3,12 +3,12 @@ # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations -import inspect +import inspect from dataclasses import dataclass from typing import List, Optional, Union, Tuple, Callable, Dict from ttmlir.ir import * -from ttmlir.dialects import ttir, tt, func, tensor +from ttmlir.dialects import ttir, tt, tensor from ttmlir.passes import create_golden_tensor, DataType import torch @@ -17,7 +17,50 @@ Operand = Union[Value, OpView, Operation] # Convenience alias for shape -Shape = Union[List[int], Tuple[int]] +Shape = Union[List[int], Tuple[int, ...]] + + +def get_loc_of_extra_file_callee(id: int = 0) -> Location: + """When called, this function returns a `Location` referring to first + callee outside the file of the caller of this function. E.G., if a function + in `foo.py` called a function in `bar.py` that then called this function, + the location would be pointing to the call in `foo.py`. + + NOTE: this location is _NOT_ in the form of + {filename}:{line_number}:{col_number}, but instead in the form: + {filename}:{line_number}:id({id}), where id is supplied to this function as + a disambiguator for calls that happen on the same line + + Arguments + --------- + + id : int + An optional variable that defaults to 0 to be appended to the location, + disambiguating calls on the same line. + + Returns + ------- + + A `Location` referring to the first extra file callee of the caller of this function + + """ + + stack = inspect.stack() + + # find the innermost frame outside of this file + caller_filename = stack[1].filename + + while len(stack) > 0 and stack[0].filename == caller_filename: + stack = stack[1:] + + assert ( + len(stack) > 0 + ), "Top of callstack to builder funcs must be outside the caller's file" + + # FIXME: this should be a `Location.file`, but for some reason it causes + # strange decomposition inheritance behaviour that breaks using this as + # a key into the golden map + return Location.name(f"{stack[0].filename}:{str(stack[0].lineno)}:id({str(id)})") @dataclass(frozen=True) @@ -251,40 +294,20 @@ def eltwise_proxy( inputs: List[Operand], ) -> OpView: - # Snoop the location of the first caller outside of this file to - # annotate the MLIR with. NOTE that this location is _NOT_ row:col, but - # instead row:id, where id is a unique id given to all calls to builder - # funcs. See `get_next_global_id` for more details - stack = inspect.stack() - - # find the innermost frame outside of this file - cur_filename = stack[0].filename - - while len(stack) > 0 and stack[0].filename == cur_filename: - stack = stack[1:] - - assert ( - len(stack) > 0 - ), "Top of callstack to builder funcs must be outside this file" + id = self.get_next_global_id() + loc = get_loc_of_extra_file_callee(id=id) with self._ctx, self._loc: output = self.empty(self.get_shape(inputs[0])) - id = self.get_next_global_id() - - op = op_ttir_function( - [self._get_type(output)], - inputs, - [output], - loc=Location.name(str(id)), - ) + op = op_ttir_function([self._get_type(output)], inputs, [output], loc=loc) goldens = [] for input in inputs: goldens.append(self._get_golden_tensor(input)) golden = Golden(op_golden_function(*goldens)) - self.id_golden_map[str(id)] = golden + self.id_golden_map[str(loc)] = golden self._store_golden(op, golden) self._override_golden(output, golden) diff --git a/runtime/tools/python/ttrt/common/golden.py b/runtime/tools/python/ttrt/common/golden.py index 055d4c8249..847942615d 100644 --- a/runtime/tools/python/ttrt/common/golden.py +++ b/runtime/tools/python/ttrt/common/golden.py @@ -117,17 +117,7 @@ def golden_partial_function( print("-----------executing golden comparision-----------") try: - op_debug_str = ttrt.runtime.get_op_debug_str(op_context) - - # find matching golden tensor based on loc in op debug string - match = re.search(r"loc\(([^)]+)\)", op_debug_str) - - if not match: - print(f"debug_str={op_debug_str}") - print("No location found in debug string - skipping golden comparison") - return - - loc = match.group(1).replace('"', "") + loc = ttrt.runtime.get_op_loc_info(op_context) print(f"found location={loc}") op_golden_tensor = binary.get_debug_info_golden(loc) diff --git a/test/python/golden/test_ttir_ops.py b/test/python/golden/test_ttir_ops.py index aa18e10369..e693196f53 100644 --- a/test/python/golden/test_ttir_ops.py +++ b/test/python/golden/test_ttir_ops.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -# RUN: %python %s +# RUN: SYSTEM_DESC_PATH=%system_desc_path% %python %s import inspect From 526919d8dbf2d1550f8b0a34c6a0eec442d66561 Mon Sep 17 00:00:00 2001 From: Vraj Prajapati Date: Mon, 16 Dec 2024 16:14:43 -0600 Subject: [PATCH 08/10] Created Utils module + Pybound Loc Functionality (#1541) - PR closes #1515 - Creates a new `utils` module in `ttmlir` that provides utility functions - Currently implemented - `utils.debug_print_module` Prints module with Debug flag active (prints locs + other data) - `utils.get_loc_name` Casts Loc to NameLoc if possible and returns the name - `utils.get_loc_full` Casts Loc to FileLineColLoc if possible and returns full FileLineCol str. - New namespace parsing method: If module doesn't have location info (loc is `-`) then a full location is passed into the namespace, otherwise the name is used. --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- include/ttmlir/Bindings/Python/TTMLIRModule.h | 1 + python/CMakeLists.txt | 7 ++++ python/TTMLIRModule.cpp | 2 + python/Util.cpp | 41 +++++++++++++++++++ python/ttmlir/util.py | 5 +++ .../tt_adapter/src/tt_adapter/mlir.py | 14 ++----- 6 files changed, 60 insertions(+), 10 deletions(-) create mode 100644 python/Util.cpp create mode 100644 python/ttmlir/util.py diff --git a/include/ttmlir/Bindings/Python/TTMLIRModule.h b/include/ttmlir/Bindings/Python/TTMLIRModule.h index d36529e676..49ec0e912b 100644 --- a/include/ttmlir/Bindings/Python/TTMLIRModule.h +++ b/include/ttmlir/Bindings/Python/TTMLIRModule.h @@ -62,6 +62,7 @@ void populateTTNNModule(py::module &m); void populateOverridesModule(py::module &m); void populateOptimizerOverridesModule(py::module &m); void populatePassesModule(py::module &m); +void populateUtilModule(py::module &m); } // namespace mlir::ttmlir::python #endif // TTMLIR_BINDINGS_PYTHON_TTMLIRMODULE_H diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index cbfc3bf95f..6f3c99345e 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -75,6 +75,12 @@ declare_mlir_python_sources(TTMLIRPythonSources.Passes SOURCES passes.py ) +declare_mlir_python_sources(TTMLIRPythonSources.Util + ROOT_DIR "${TTMLIR_PYTHON_ROOT_DIR}" + ADD_TO_PARENT TTMLIRPythonSources + SOURCES util.py +) + declare_mlir_python_sources(TTMLIRPythonTestInfra.TestInfra ROOT_DIR "${TTMLIR_PYTHON_TEST_INFRA_ROOT_DIR}" ADD_TO_PARENT TTMLIRPythonTestInfra @@ -95,6 +101,7 @@ declare_mlir_python_extension(TTMLIRPythonExtensions.Main Overrides.cpp OptimizerOverrides.cpp Passes.cpp + Util.cpp EMBED_CAPI_LINK_LIBS MLIRCAPITransforms TTMLIRCAPI diff --git a/python/TTMLIRModule.cpp b/python/TTMLIRModule.cpp index 0347da75b5..2805399205 100644 --- a/python/TTMLIRModule.cpp +++ b/python/TTMLIRModule.cpp @@ -43,4 +43,6 @@ PYBIND11_MODULE(_ttmlir, m) { auto optimizer_overrides = m.def_submodule( "optimizer_overrides", "Python-Bound Optimizer Overrides"); mlir::ttmlir::python::populateOptimizerOverridesModule(optimizer_overrides); + auto util = m.def_submodule("util", "Python-Bound Utilities & Helpers"); + mlir::ttmlir::python::populateUtilModule(util); } diff --git a/python/Util.cpp b/python/Util.cpp new file mode 100644 index 0000000000..c562306bc3 --- /dev/null +++ b/python/Util.cpp @@ -0,0 +1,41 @@ +// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#include "ttmlir/Bindings/Python/TTMLIRModule.h" + +namespace mlir::ttmlir::python { + +void populateUtilModule(py::module &m) { + m.def("debug_print_module", [](MlirModule module) { + std::string source; + llvm::raw_string_ostream os(source); + mlir::OpPrintingFlags flags; + flags.enableDebugInfo(); // Enable the debug print + auto *op = unwrap(mlirModuleGetOperation(module)); + op->print(os, flags); + return source; + }); + + m.def("get_loc_name", [](MlirLocation _loc) -> std::string { + mlir::Location loc = unwrap(_loc); + if (mlir::isa(loc)) { + mlir::NameLoc nameLoc = mlir::cast(loc); + return nameLoc.getName().str(); + } + return "-"; + }); + + m.def("get_loc_full", [](MlirLocation _loc) -> std::string { + mlir::Location loc = unwrap(_loc); + if (mlir::isa(loc)) { + mlir::FileLineColLoc fileLoc = mlir::cast(loc); + return fileLoc.getFilename().str() + ":" + + std::to_string(fileLoc.getLine()) + ":" + + std::to_string(fileLoc.getColumn()); + } + return "-"; + }); +} + +} // namespace mlir::ttmlir::python diff --git a/python/ttmlir/util.py b/python/ttmlir/util.py new file mode 100644 index 0000000000..9bcbfd0856 --- /dev/null +++ b/python/ttmlir/util.py @@ -0,0 +1,5 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 + +from ._mlir_libs._ttmlir.util import * diff --git a/tools/explorer/tt_adapter/src/tt_adapter/mlir.py b/tools/explorer/tt_adapter/src/tt_adapter/mlir.py index 843606b06c..eac036a38e 100644 --- a/tools/explorer/tt_adapter/src/tt_adapter/mlir.py +++ b/tools/explorer/tt_adapter/src/tt_adapter/mlir.py @@ -7,20 +7,14 @@ from model_explorer import graph_builder, node_data_builder from ttmlir.dialects import tt, ttnn, ttir -from ttmlir import ir +from ttmlir import ir, util def get_loc_str(loc): try: - # Constant loc( at the start of the location and ) at the end. Can just strip these characters - loc = str(loc) - if loc.startswith("loc(") and loc.endswith(")"): - # Fuzzy parse first string inside location - # 'loc("matmul_1"("MNISTLinear":4294967295:10))' -> matmul_1 - # TODO(odjuricic) Need to have this pybinded. - res = re.search(r'"([^"]+)"', loc).group(1) - else: - res = loc # This is a fallback to just visualize / see what the loc is if not processable. + res = util.get_loc_name(loc) + if res == "-": + res = util.get_loc_full(loc) except: res = "unknown" return res From 05a831ed374b2521b8dc4d64ed1adb75722ca5b3 Mon Sep 17 00:00:00 2001 From: Filip Bajraktari Date: Tue, 17 Dec 2024 17:00:29 +0100 Subject: [PATCH 09/10] [Optimizer] L1 Interleaved policy that solves simple fork-joins (#1501) This PR introduces new MemoryLayoutAnalysis policy as an alternative to the GreedyL1Interleaved policy with the goal of solving simple fork-joins. Fork-join is considered to be simple if there is no need for DRAM spill in its execution. In this policy, we want to make sure that we always solve simple fork-joins. Furthermore, if DRAM spill is necessary, this policy will not produce globally optimal solution. --- .../TTNN/Analysis/BFInterleavedPolicy.h | 76 +++++++ ...edPolicy.h => GreedyL1InterleavedPolicy.h} | 10 +- .../Dialect/TTNN/Analysis/L1ChainConfig.h | 8 +- .../Analysis/MemoryLayoutAnalysisPolicy.h | 1 + .../TTNN/Utils/MemoryLayoutAnalysisParams.h | 19 +- include/ttmlir/Dialect/TTNN/Utils/Utils.h | 6 + .../TTNN/Analysis/BFInterleavedPolicy.cpp | 206 ++++++++++++++++++ lib/Dialect/TTNN/Analysis/CMakeLists.txt | 3 +- .../TTNN/Analysis/DFShardingPolicy.cpp | 2 +- ...licy.cpp => GreedyL1InterleavedPolicy.cpp} | 61 ++---- lib/Dialect/TTNN/Analysis/L1ChainConfig.cpp | 14 ++ .../TTNN/Analysis/MemoryLayoutAnalysis.cpp | 15 +- lib/Dialect/TTNN/Utils/Utils.cpp | 20 ++ python/OptimizerOverrides.cpp | 6 +- .../all_dram_buffer_type.mlir | 13 ++ .../all_dram_operands_l1_op.mlir | 19 ++ .../all_l1_operands_dram_op.mlir | 19 ++ .../bf_interleaved_policy/fork_join_01.mlir | 35 +++ .../bf_interleaved_policy/fork_join_02.mlir | 42 ++++ .../all_l1_interleaved_policy.mlir | 2 +- .../fork_join.mlir | 2 +- .../mnist_l1_interleaved.mlir | 2 +- .../simple_join_tests/dram_ABC_l1_None.mlir | 2 +- .../simple_join_tests/dram_AB_l1_C.mlir | 2 +- .../simple_join_tests/dram_AC_l1_B.mlir | 2 +- .../simple_join_tests/dram_A_l1_BC.mlir | 2 +- .../simple_join_tests/dram_BC_l1_A.mlir | 2 +- .../simple_join_tests/dram_B_l1_AC.mlir | 2 +- .../simple_join_tests/dram_C_l1_AB.mlir | 2 +- .../simple_join_tests/dram_None_l1_ABC.mlir | 2 +- .../l1_interleaved_policy/single_op.mlir | 9 - .../TTNN/optimizer/simple_fork_join.mlir | 2 +- test/unittests/Optimizer/CMakeLists.txt | 2 +- ....cpp => TestGreedyL1InterleavedPolicy.cpp} | 16 +- .../Optimizer/TestOptimizerOverrides.cpp | 4 +- tools/explorer/test/run_tests.py | 2 +- .../tt_adapter/src/tt_adapter/main.py | 3 +- 37 files changed, 542 insertions(+), 93 deletions(-) create mode 100644 include/ttmlir/Dialect/TTNN/Analysis/BFInterleavedPolicy.h rename include/ttmlir/Dialect/TTNN/Analysis/{L1InterleavedPolicy.h => GreedyL1InterleavedPolicy.h} (93%) create mode 100644 lib/Dialect/TTNN/Analysis/BFInterleavedPolicy.cpp rename lib/Dialect/TTNN/Analysis/{L1InterleavedPolicy.cpp => GreedyL1InterleavedPolicy.cpp} (85%) create mode 100644 test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/all_dram_buffer_type.mlir create mode 100644 test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/all_dram_operands_l1_op.mlir create mode 100644 test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/all_l1_operands_dram_op.mlir create mode 100644 test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/fork_join_01.mlir create mode 100644 test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/fork_join_02.mlir rename test/ttmlir/Dialect/TTNN/optimizer/{l1_interleaved_policy => greedy_l1_interleaved_policy}/all_l1_interleaved_policy.mlir (97%) rename test/ttmlir/Dialect/TTNN/optimizer/{l1_interleaved_policy => greedy_l1_interleaved_policy}/fork_join.mlir (97%) rename test/ttmlir/Dialect/TTNN/optimizer/{l1_interleaved_policy => greedy_l1_interleaved_policy}/mnist_l1_interleaved.mlir (98%) rename test/ttmlir/Dialect/TTNN/optimizer/{l1_interleaved_policy => greedy_l1_interleaved_policy}/simple_join_tests/dram_ABC_l1_None.mlir (96%) rename test/ttmlir/Dialect/TTNN/optimizer/{l1_interleaved_policy => greedy_l1_interleaved_policy}/simple_join_tests/dram_AB_l1_C.mlir (97%) rename test/ttmlir/Dialect/TTNN/optimizer/{l1_interleaved_policy => greedy_l1_interleaved_policy}/simple_join_tests/dram_AC_l1_B.mlir (97%) rename test/ttmlir/Dialect/TTNN/optimizer/{l1_interleaved_policy => greedy_l1_interleaved_policy}/simple_join_tests/dram_A_l1_BC.mlir (97%) rename test/ttmlir/Dialect/TTNN/optimizer/{l1_interleaved_policy => greedy_l1_interleaved_policy}/simple_join_tests/dram_BC_l1_A.mlir (97%) rename test/ttmlir/Dialect/TTNN/optimizer/{l1_interleaved_policy => greedy_l1_interleaved_policy}/simple_join_tests/dram_B_l1_AC.mlir (97%) rename test/ttmlir/Dialect/TTNN/optimizer/{l1_interleaved_policy => greedy_l1_interleaved_policy}/simple_join_tests/dram_C_l1_AB.mlir (97%) rename test/ttmlir/Dialect/TTNN/optimizer/{l1_interleaved_policy => greedy_l1_interleaved_policy}/simple_join_tests/dram_None_l1_ABC.mlir (96%) delete mode 100644 test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/single_op.mlir rename test/unittests/Optimizer/{TestL1InterleavedPolicy.cpp => TestGreedyL1InterleavedPolicy.cpp} (93%) diff --git a/include/ttmlir/Dialect/TTNN/Analysis/BFInterleavedPolicy.h b/include/ttmlir/Dialect/TTNN/Analysis/BFInterleavedPolicy.h new file mode 100644 index 0000000000..1744a1d415 --- /dev/null +++ b/include/ttmlir/Dialect/TTNN/Analysis/BFInterleavedPolicy.h @@ -0,0 +1,76 @@ +// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef TTMLIR_DIALECT_TTNN_ANALYSIS_BFINTERLEAVEDPOLICY_H +#define TTMLIR_DIALECT_TTNN_ANALYSIS_BFINTERLEAVEDPOLICY_H + +#include "ttmlir/Dialect/TT/IR/TTOpsTypes.h" +#include "ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysisPolicy.h" +#include + +namespace mlir::tt::ttnn { + +// The goal of this policy is to always solve simple fork-joins if that is +// possible. Fork-join is considered to be simple if there is no need for DRAM +// spill in its execution. Furthermore, if DRAM spill is necessary, this policy +// will not produce globally optimal solution. +// +class BFInterleavedPolicy : public MemoryLayoutAnalysisPolicy { +public: + // In order to keep track of the L1 memory usage, we have to know two things + // for each op: + // 1. The L1 memory usage of each op's output tensor. + // 2. The number of op's users currently relying on the op's output tensor. + // This is important for fork ops where the output tensor is used by + // multiple other ops. + // + struct OpL1MemUsage { + uint64_t l1MemUsagePerUser; + uint64_t numOfUnscheduledUsers; + }; + +public: + BFInterleavedPolicy( + Operation *rootOp, std::vector &l1ChainConfigs, + const llvm::DenseMap> + &legalLayouts, + llvm::DenseMap> &schedule, + unsigned usableL1CacheSize) + : MemoryLayoutAnalysisPolicy(rootOp, l1ChainConfigs, legalLayouts, + schedule, usableL1CacheSize) {} + + void run() final; + +private: + // Check if the op is analyzable. Op is analyzable if it has at least one + // legal layout. + bool isAnalyzable(Operation *op); + + // Iterate over all operands of the op that satisfy the analyzability + // criterium defined by the isAnalyzable method. This is an abstraction + // for the boilerplate code used in different places within the policy. + // + void walkOnAnalyzableOperands(Operation *op, + function_ref callback); + + // Fetch op's DRAM layout from legalLayouts. + bool hasDRAMBufferType(Operation *op); + TTNNLayoutAttr getDRAMLayout(Operation *op); + + // Fetch op's L1 Interleaved layout from legalLayouts. + bool hasL1BufferType(Operation *op); + TTNNLayoutAttr getL1InterleavedLayout(Operation *op); + + size_t getAvailableL1CacheSize() const { + // Figure out this const based on exec data, but will be replaced + // with API. + // + constexpr float tensorL1UsageCap = 0.75; + return tensorL1UsageCap * usableL1CacheSize; + } +}; + +} // namespace mlir::tt::ttnn + +#endif // TTMLIR_DIALECT_TTNN_ANALYSIS_BFINTERLEAVEDPOLICY_H diff --git a/include/ttmlir/Dialect/TTNN/Analysis/L1InterleavedPolicy.h b/include/ttmlir/Dialect/TTNN/Analysis/GreedyL1InterleavedPolicy.h similarity index 93% rename from include/ttmlir/Dialect/TTNN/Analysis/L1InterleavedPolicy.h rename to include/ttmlir/Dialect/TTNN/Analysis/GreedyL1InterleavedPolicy.h index 2392cd7c9c..b7325332e9 100644 --- a/include/ttmlir/Dialect/TTNN/Analysis/L1InterleavedPolicy.h +++ b/include/ttmlir/Dialect/TTNN/Analysis/GreedyL1InterleavedPolicy.h @@ -2,8 +2,8 @@ // // SPDX-License-Identifier: Apache-2.0 -#ifndef TTMLIR_DIALECT_TTNN_ANALYSIS_L1INTERLEAVEDPOLICY_H -#define TTMLIR_DIALECT_TTNN_ANALYSIS_L1INTERLEAVEDPOLICY_H +#ifndef TTMLIR_DIALECT_TTNN_ANALYSIS_GREEDYL1INTERLEAVEDPOLICY_H +#define TTMLIR_DIALECT_TTNN_ANALYSIS_GREEDYL1INTERLEAVEDPOLICY_H #include "mlir/Dialect/Func/IR/FuncOps.h" #include "ttmlir/Dialect/TTNN/Analysis/L1ChainConfig.h" @@ -12,7 +12,7 @@ namespace mlir::tt::ttnn { -class L1InterleavedPolicy : public MemoryLayoutAnalysisPolicy { +class GreedyL1InterleavedPolicy : public MemoryLayoutAnalysisPolicy { public: struct OpMemSpec { TTNNLayoutAttr layout; @@ -46,7 +46,7 @@ class L1InterleavedPolicy : public MemoryLayoutAnalysisPolicy { }; public: - L1InterleavedPolicy( + GreedyL1InterleavedPolicy( Operation *rootOp, std::vector &l1ChainConfigs, const llvm::DenseMap> &legalLayouts, @@ -124,4 +124,4 @@ class L1InterleavedPolicy : public MemoryLayoutAnalysisPolicy { } // namespace mlir::tt::ttnn -#endif // TTMLIR_DIALECT_TTNN_ANALYSIS_L1INTERLEAVEDPOLICY_H +#endif // TTMLIR_DIALECT_TTNN_ANALYSIS_GREEDYL1INTERLEAVEDPOLICY_H diff --git a/include/ttmlir/Dialect/TTNN/Analysis/L1ChainConfig.h b/include/ttmlir/Dialect/TTNN/Analysis/L1ChainConfig.h index b8aee2e4ea..32ab07a958 100644 --- a/include/ttmlir/Dialect/TTNN/Analysis/L1ChainConfig.h +++ b/include/ttmlir/Dialect/TTNN/Analysis/L1ChainConfig.h @@ -5,9 +5,7 @@ #ifndef TTMLIR_DIALECT_TTNN_ANALYSIS_L1CHAINCONFIG_H #define TTMLIR_DIALECT_TTNN_ANALYSIS_L1CHAINCONFIG_H -#include "ttmlir/Dialect/TT/IR/TTOpsTypes.h" #include "ttmlir/Dialect/TTNN/Analysis/ShardSolver.h" -#include namespace mlir::tt::ttnn { @@ -19,7 +17,7 @@ struct OpL1MemSpec { // Tensor split factor for the output tensor of the op(working with a partial // tensor). // - uint tensorSplitFactor; + uint tensorSplitFactor = 1; // Layout of the output tensor of the op. // @@ -56,6 +54,7 @@ class L1ChainConfig { void complete(const llvm::DenseMap &selectedOpLayout, std::unordered_set &memReconfigEdges); + void complete(); bool isEmpty() { return opL1MemSpecs.empty(); } void addOpL1MemSpec(OpL1MemSpec spec) { @@ -70,6 +69,9 @@ class L1ChainConfig { const std::unordered_set &getMemReconfigEdges() const { return memReconfigEdges; } + + uint64_t size() const { return opL1MemSpecs.size(); } + void merge(L1ChainConfig &other); }; } // namespace mlir::tt::ttnn diff --git a/include/ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysisPolicy.h b/include/ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysisPolicy.h index 4f6fcd8f47..33477a798a 100644 --- a/include/ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysisPolicy.h +++ b/include/ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysisPolicy.h @@ -17,6 +17,7 @@ class MemoryLayoutAnalysisPolicy { llvm::DenseMap> legalLayouts; llvm::DenseMap> *schedule; unsigned usableL1CacheSize = 0; + DeviceAttr deviceAttr; public: virtual ~MemoryLayoutAnalysisPolicy() {}; diff --git a/include/ttmlir/Dialect/TTNN/Utils/MemoryLayoutAnalysisParams.h b/include/ttmlir/Dialect/TTNN/Utils/MemoryLayoutAnalysisParams.h index 5275e2340d..945afda812 100644 --- a/include/ttmlir/Dialect/TTNN/Utils/MemoryLayoutAnalysisParams.h +++ b/include/ttmlir/Dialect/TTNN/Utils/MemoryLayoutAnalysisParams.h @@ -10,7 +10,11 @@ namespace mlir::tt { -enum class MemoryLayoutAnalysisPolicyType { DFSharding, L1Interleaved }; +enum class MemoryLayoutAnalysisPolicyType { + DFSharding, + GreedyL1Interleaved, + BFInterleaved +}; struct MemoryLayoutAnalysisPolicyTypeParser : public llvm::cl::parser { @@ -22,8 +26,10 @@ struct MemoryLayoutAnalysisPolicyTypeParser llvm::StringRef arg, MemoryLayoutAnalysisPolicyType &value) { value = llvm::StringSwitch(arg) .Case("DFSharding", MemoryLayoutAnalysisPolicyType::DFSharding) - .Case("L1Interleaved", - MemoryLayoutAnalysisPolicyType::L1Interleaved); + .Case("GreedyL1Interleaved", + MemoryLayoutAnalysisPolicyType::GreedyL1Interleaved) + .Case("BFInterleaved", + MemoryLayoutAnalysisPolicyType::BFInterleaved); return false; } @@ -33,8 +39,11 @@ struct MemoryLayoutAnalysisPolicyTypeParser case MemoryLayoutAnalysisPolicyType::DFSharding: res += "DFSharding"; break; - case MemoryLayoutAnalysisPolicyType::L1Interleaved: - res += "L1Interleaved"; + case MemoryLayoutAnalysisPolicyType::GreedyL1Interleaved: + res += "GreedyL1Interleaved"; + break; + case MemoryLayoutAnalysisPolicyType::BFInterleaved: + res += "BFInterleaved"; break; } return res; diff --git a/include/ttmlir/Dialect/TTNN/Utils/Utils.h b/include/ttmlir/Dialect/TTNN/Utils/Utils.h index f214fa793d..d3fb76bda9 100644 --- a/include/ttmlir/Dialect/TTNN/Utils/Utils.h +++ b/include/ttmlir/Dialect/TTNN/Utils/Utils.h @@ -43,6 +43,12 @@ RankedTensorType createRankedTensorTypeWithEncoding(RankedTensorType tensorType, ttnn::TTNNLayoutAttr encoding); +// Return the L1 memory usage of the output tensor of the given op. +// Used within L1 interleaved policies. +// +uint64_t getOpOutputL1Usage(Operation *op, TTNNLayoutAttr opLayout, + DeviceAttr &deviceAttr); + } // namespace mlir::tt::ttnn::utils #endif // TTMLIR_DIALECT_TTNN_UTILS_UTILS_H diff --git a/lib/Dialect/TTNN/Analysis/BFInterleavedPolicy.cpp b/lib/Dialect/TTNN/Analysis/BFInterleavedPolicy.cpp new file mode 100644 index 0000000000..4d58d7b5aa --- /dev/null +++ b/lib/Dialect/TTNN/Analysis/BFInterleavedPolicy.cpp @@ -0,0 +1,206 @@ +// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +// +// SPDX-License-Identifier: Apache-2.0 + +#include "ttmlir/Dialect/TTNN/Analysis/BFInterleavedPolicy.h" +#include "ttmlir/Dialect/TTNN/Analysis/L1ChainConfig.h" +#include "ttmlir/Dialect/TTNN/IR/TTNNOpsAttrs.h" +#include "ttmlir/Dialect/TTNN/Utils/Utils.h" +#include "ttmlir/Scheduler/Scheduler.h" + +namespace mlir::tt::ttnn { + +void BFInterleavedPolicy::run() { + for (Operation &funcOp : rootOp->getRegion(0).getOps()) { + func::FuncOp func = dyn_cast(funcOp); + mlir::tt::scheduler::Scheduler scheduler(&func); + deviceAttr = getCurrentScopeDevice(func); + + // Initialize the policy. + // + llvm::DenseMap currentL1UsagePerOp; + uint64_t currentL1Usage = 0; + l1ChainConfigs->push_back(L1ChainConfig()); + + while (scheduler.hasUnscheduledOps()) { + uint64_t minimalChangeInL1Usage; + Operation *nextOpForScheduling; + BufferType nextOpForSchedulingBufferType; + + nextOpForScheduling = nullptr; + minimalChangeInL1Usage = std::numeric_limits::max(); + for (Operation *op : scheduler.getScheduleableOps()) { + uint64_t deallocOfL1Mem, allocOfL1Mem, changeInL1Usage; + BufferType opBufferType; + + // Calculate the L1 memory usage of the op's operands. + // + deallocOfL1Mem = 0; + walkOnAnalyzableOperands(op, [&](Operation *operandOp) { + if (currentL1UsagePerOp.count(operandOp)) { + deallocOfL1Mem += + (currentL1UsagePerOp[operandOp].numOfUnscheduledUsers == 1) * + currentL1UsagePerOp[operandOp].l1MemUsagePerUser; + } + }); + + // Default setup for all DRAM buffer type ops. + // + allocOfL1Mem = 0; + opBufferType = BufferType::DRAM; + + // Analyse the possibility of scheduling the op with L1 memory layout. + // + if (hasL1BufferType(op)) { + TTNNLayoutAttr layout = getL1InterleavedLayout(op); + uint64_t opOutputL1Usage = + utils::getOpOutputL1Usage(op, layout, deviceAttr); + + if (currentL1Usage + opOutputL1Usage <= getAvailableL1CacheSize()) { + allocOfL1Mem = opOutputL1Usage; + opBufferType = BufferType::L1; + } + } + + // Check if the scheduling of the op is consuming the least amount of L1 + // memory among all the scheduleable ops. + // + changeInL1Usage = allocOfL1Mem - deallocOfL1Mem; + if (changeInL1Usage < minimalChangeInL1Usage) { + nextOpForScheduling = op; + nextOpForSchedulingBufferType = opBufferType; + minimalChangeInL1Usage = changeInL1Usage; + } + } + + // In case we picked the L1 layout for the nextOpForScheduling, we need + // to add the OpL1MemSpec to the L1ChainConfig and update the state of L1 + // memory. + // + if (nextOpForSchedulingBufferType == BufferType::L1) { + + // Construct OpL1MemSpec for the nextOpForScheduling. + // + OpL1MemSpec opL1MemSpec; + opL1MemSpec.op = nextOpForScheduling; + opL1MemSpec.layout = getL1InterleavedLayout(nextOpForScheduling); + l1ChainConfigs->back().addOpL1MemSpec(opL1MemSpec); + + // Update the state of L1 memory by allocating the nextOpForScheduling's + // output tensor in L1 memory. + // + uint64_t numOfUsers = std::distance(nextOpForScheduling->user_begin(), + nextOpForScheduling->user_end()); + currentL1UsagePerOp[nextOpForScheduling].l1MemUsagePerUser = + utils::getOpOutputL1Usage(nextOpForScheduling, opL1MemSpec.layout, + deviceAttr); + currentL1UsagePerOp[nextOpForScheduling].numOfUnscheduledUsers = + numOfUsers; + currentL1Usage += + currentL1UsagePerOp[nextOpForScheduling].l1MemUsagePerUser; + } + + // Update the state of L1 memory. + // + walkOnAnalyzableOperands(nextOpForScheduling, [&](Operation *operandOp) { + if (currentL1UsagePerOp.count(operandOp)) { + currentL1UsagePerOp[operandOp].numOfUnscheduledUsers -= 1; + if (currentL1UsagePerOp[operandOp].numOfUnscheduledUsers == 0) { + // Only once we scheduled all the users of the operandOp, we can + // free its output tensor from L1 memory. + // + currentL1Usage -= currentL1UsagePerOp[operandOp].l1MemUsagePerUser; + currentL1UsagePerOp.erase(operandOp); + } + } + }); + + // Schedule the nextOpForScheduling and update currentL1Usage. + // + scheduler.scheduleOp(nextOpForScheduling); + } + + assert(currentL1Usage == 0); + assert(currentL1UsagePerOp.size() == 0); + + (*schedule)[func] = scheduler.getSchedule(); + + // Build, Resolve and Complete all L1ChainConfigs. + // + for (L1ChainConfig &l1ChainConfig : *l1ChainConfigs) { + l1ChainConfig.build(); + l1ChainConfig.resolve(); + l1ChainConfig.complete(); + } + } +} + +bool BFInterleavedPolicy::isAnalyzable(Operation *op) { + // Skip operations that are not analyzed by the LegalGridAnalysis. + // + if (legalLayouts.count(op) > 0) { + // Skip operations that are filterd out by the MemoryLayoutAnalysis. + // + return legalLayouts[op].size() > 0; + } + return false; +} + +void BFInterleavedPolicy::walkOnAnalyzableOperands( + Operation *op, function_ref callback) { + for (auto operand : op->getOperands()) { + // Skip block arguments (%arg0, %arg1, ...) + // + if (::llvm::isa(operand)) { + continue; + } + + Operation *operandOp = operand.getDefiningOp(); + + // Skip non-analyzable operands. + // + if (isAnalyzable(operandOp)) { + callback(operandOp); + } + } +} + +bool BFInterleavedPolicy::hasDRAMBufferType(Operation *op) { + if (legalLayouts.count(op)) { + return std::find_if(legalLayouts[op].begin(), legalLayouts[op].end(), + [](TTNNLayoutAttr layout) { + return layout.hasDRAMBufferType(); + }) != legalLayouts[op].end(); + } + return false; +} + +TTNNLayoutAttr BFInterleavedPolicy::getDRAMLayout(Operation *op) { + assert(hasDRAMBufferType(op)); + auto dramLayoutIter = std::find_if( + legalLayouts[op].begin(), legalLayouts[op].end(), + [](TTNNLayoutAttr layout) { return layout.hasDRAMBufferType(); }); + return *dramLayoutIter; +} + +bool BFInterleavedPolicy::hasL1BufferType(Operation *op) { + if (legalLayouts.count(op)) { + return std::find_if(legalLayouts[op].begin(), legalLayouts[op].end(), + [](TTNNLayoutAttr layout) { + return layout.hasInterleavedL1TensorMemoryLayout(); + }) != legalLayouts[op].end(); + } + return false; +} + +TTNNLayoutAttr BFInterleavedPolicy::getL1InterleavedLayout(Operation *op) { + assert(hasL1BufferType(op)); + auto l1InterleaveLayoutIter = + std::find_if(legalLayouts[op].begin(), legalLayouts[op].end(), + [](TTNNLayoutAttr layout) { + return layout.hasInterleavedL1TensorMemoryLayout(); + }); + return *l1InterleaveLayoutIter; +} + +} // namespace mlir::tt::ttnn diff --git a/lib/Dialect/TTNN/Analysis/CMakeLists.txt b/lib/Dialect/TTNN/Analysis/CMakeLists.txt index 4db2d78b9c..04cf9c0d22 100644 --- a/lib/Dialect/TTNN/Analysis/CMakeLists.txt +++ b/lib/Dialect/TTNN/Analysis/CMakeLists.txt @@ -4,7 +4,8 @@ add_mlir_dialect_library(MLIRTTNNAnalysis MemoryLayoutAnalysis.cpp L1ChainConfig.cpp DFShardingPolicy.cpp - L1InterleavedPolicy.cpp + GreedyL1InterleavedPolicy.cpp + BFInterleavedPolicy.cpp ShardSolver.cpp ADDITIONAL_HEADER_DIRS diff --git a/lib/Dialect/TTNN/Analysis/DFShardingPolicy.cpp b/lib/Dialect/TTNN/Analysis/DFShardingPolicy.cpp index 8d5f22bfc4..bd74c5a00f 100644 --- a/lib/Dialect/TTNN/Analysis/DFShardingPolicy.cpp +++ b/lib/Dialect/TTNN/Analysis/DFShardingPolicy.cpp @@ -10,7 +10,7 @@ namespace mlir::tt::ttnn { void DFShardingPolicy::run() { rootOp->walk([&](func::FuncOp func) { - DeviceAttr deviceAttr = getCurrentScopeDevice(func); + deviceAttr = getCurrentScopeDevice(func); mlir::tt::scheduler::Scheduler scheduler(&func); l1ChainConfigs->push_back(L1ChainConfig()); llvm::SmallVector scheduleableOps; diff --git a/lib/Dialect/TTNN/Analysis/L1InterleavedPolicy.cpp b/lib/Dialect/TTNN/Analysis/GreedyL1InterleavedPolicy.cpp similarity index 85% rename from lib/Dialect/TTNN/Analysis/L1InterleavedPolicy.cpp rename to lib/Dialect/TTNN/Analysis/GreedyL1InterleavedPolicy.cpp index 69a07af168..5606132906 100644 --- a/lib/Dialect/TTNN/Analysis/L1InterleavedPolicy.cpp +++ b/lib/Dialect/TTNN/Analysis/GreedyL1InterleavedPolicy.cpp @@ -2,33 +2,14 @@ // // SPDX-License-Identifier: Apache-2.0 -#include "ttmlir/Dialect/TTNN/Analysis/L1InterleavedPolicy.h" +#include "ttmlir/Dialect/TTNN/Analysis/GreedyL1InterleavedPolicy.h" #include "ttmlir/Dialect/TTNN/Analysis/L1ChainConfig.h" +#include "ttmlir/Dialect/TTNN/Utils/Utils.h" #include "ttmlir/Scheduler/Scheduler.h" namespace mlir::tt::ttnn { -uint64_t getOpOutputL1Usage(Operation *op, TTNNLayoutAttr opLayout, - DeviceAttr &deviceAttr) { - // In case the opLayout is not in L1 memory space, L1 memory usage is 0. - // - if (opLayout.hasDRAMBufferType()) { - return 0; - } - - // L1 memory usage of the ops without output tensors cannot be calculated. - // So far, this is only false for ttnn.get_device op. - // - assert(mlir::isa(op->getResult(0).getType())); - llvm::ArrayRef opOutputTensorShape = - mlir::cast(op->getResult(0).getType()).getShape(); - - uint64_t opL1OutputUsage = - opLayout.getTensorSizeInBytes(opOutputTensorShape, deviceAttr); - return opL1OutputUsage; -} - -L1InterleavedPolicy::OpConfig L1InterleavedPolicy::getGreedyConfig( +GreedyL1InterleavedPolicy::OpConfig GreedyL1InterleavedPolicy::getGreedyConfig( Operation *baseOp, llvm::DenseMap &opsL1Usage) { uint64_t numOfOps, bitIndex, currentMask; uint64_t currentL1Usage, optimalL1Usage; @@ -146,10 +127,10 @@ L1InterleavedPolicy::OpConfig L1InterleavedPolicy::getGreedyConfig( return optimalConfig; } -void L1InterleavedPolicy::run() { +void GreedyL1InterleavedPolicy::run() { for (Operation &funcOp : rootOp->getRegion(0).getOps()) { func::FuncOp func = dyn_cast(funcOp); - DeviceAttr deviceAttr = getCurrentScopeDevice(func); + deviceAttr = getCurrentScopeDevice(func); // Start the policy. // @@ -185,8 +166,8 @@ void L1InterleavedPolicy::run() { if (op->hasOneUse() && hasL1BufferType(op)) { L1Usage l1Usage; - l1Usage.outputL1Usage = - getOpOutputL1Usage(op, getL1InterleavedLayout(op), deviceAttr); + l1Usage.outputL1Usage = utils::getOpOutputL1Usage( + op, getL1InterleavedLayout(op), deviceAttr); l1Usage.requiredL1Usage = 0; opsL1Usage[op] = l1Usage; } @@ -211,8 +192,8 @@ void L1InterleavedPolicy::run() { // if (operandOpLayout.hasInterleavedL1TensorMemoryLayout()) { L1Usage l1Usage; - l1Usage.outputL1Usage = - getOpOutputL1Usage(operandOp, operandOpLayout, deviceAttr); + l1Usage.outputL1Usage = utils::getOpOutputL1Usage( + operandOp, operandOpLayout, deviceAttr); l1Usage.requiredL1Usage = OpMemSpecMap[operandOp].requiredL1Usage; opsL1Usage[operandOp] = l1Usage; } @@ -271,14 +252,15 @@ void L1InterleavedPolicy::run() { std::max(intermediateRequiredL1Usage, intermediateL1Usage + OpMemSpecMap[operandOp].requiredL1Usage); - intermediateL1Usage += getOpOutputL1Usage( + intermediateL1Usage += utils::getOpOutputL1Usage( operandOp, OpMemSpecMap[operandOp].layout, deviceAttr); } } - OpMemSpecMap[op].requiredL1Usage = std::max( - intermediateRequiredL1Usage, - intermediateL1Usage + - getOpOutputL1Usage(op, OpMemSpecMap[op].layout, deviceAttr)); + OpMemSpecMap[op].requiredL1Usage = + std::max(intermediateRequiredL1Usage, + intermediateL1Usage + + utils::getOpOutputL1Usage( + op, OpMemSpecMap[op].layout, deviceAttr)); } } } @@ -308,8 +290,8 @@ void L1InterleavedPolicy::run() { } } -bool L1InterleavedPolicy::isAnalyzable(Operation *op) { - // Skip operations that are not analyzed by the LegalLayoutAnalysis. +bool GreedyL1InterleavedPolicy::isAnalyzable(Operation *op) { + // Skip operations that are not analyzed by the LegalGridAnalysis. // if (legalLayouts.count(op) > 0) { // Skip operations that are filterd out by the MemoryLayoutAnalysis. @@ -319,14 +301,14 @@ bool L1InterleavedPolicy::isAnalyzable(Operation *op) { return false; } -bool L1InterleavedPolicy::hasDRAMBufferType(Operation *op) { +bool GreedyL1InterleavedPolicy::hasDRAMBufferType(Operation *op) { return std::find_if(legalLayouts[op].begin(), legalLayouts[op].end(), [](TTNNLayoutAttr layout) { return layout.hasDRAMBufferType(); }) != legalLayouts[op].end(); } -TTNNLayoutAttr L1InterleavedPolicy::getDRAMLayout(Operation *op) { +TTNNLayoutAttr GreedyL1InterleavedPolicy::getDRAMLayout(Operation *op) { assert(hasDRAMBufferType(op)); auto dramLayoutIter = std::find_if( legalLayouts[op].begin(), legalLayouts[op].end(), @@ -334,14 +316,15 @@ TTNNLayoutAttr L1InterleavedPolicy::getDRAMLayout(Operation *op) { return *dramLayoutIter; } -bool L1InterleavedPolicy::hasL1BufferType(Operation *op) { +bool GreedyL1InterleavedPolicy::hasL1BufferType(Operation *op) { return std::find_if(legalLayouts[op].begin(), legalLayouts[op].end(), [](TTNNLayoutAttr layout) { return layout.hasInterleavedL1TensorMemoryLayout(); }) != legalLayouts[op].end(); } -TTNNLayoutAttr L1InterleavedPolicy::getL1InterleavedLayout(Operation *op) { +TTNNLayoutAttr +GreedyL1InterleavedPolicy::getL1InterleavedLayout(Operation *op) { assert(hasL1BufferType(op)); auto l1InterleaveLayoutIter = std::find_if(legalLayouts[op].begin(), legalLayouts[op].end(), diff --git a/lib/Dialect/TTNN/Analysis/L1ChainConfig.cpp b/lib/Dialect/TTNN/Analysis/L1ChainConfig.cpp index 534f67a5fe..bf2eb9790a 100644 --- a/lib/Dialect/TTNN/Analysis/L1ChainConfig.cpp +++ b/lib/Dialect/TTNN/Analysis/L1ChainConfig.cpp @@ -50,4 +50,18 @@ void L1ChainConfig::complete( state = L1ChainState::Completed; } +void L1ChainConfig::complete() { + assert(state == L1ChainState::Resolved); + state = L1ChainState::Completed; +} + +void L1ChainConfig::merge(L1ChainConfig &other) { + assert(getState() == other.getState()); + opL1MemSpecs.insert(opL1MemSpecs.end(), other.opL1MemSpecs.begin(), + other.opL1MemSpecs.end()); + l1ChainedOps.insert(other.l1ChainedOps.begin(), other.l1ChainedOps.end()); + memReconfigEdges.insert(other.memReconfigEdges.begin(), + other.memReconfigEdges.end()); +} + } // namespace mlir::tt::ttnn diff --git a/lib/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.cpp b/lib/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.cpp index f3db4ed7bf..ef7e3d1c11 100644 --- a/lib/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.cpp +++ b/lib/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.cpp @@ -3,8 +3,9 @@ // SPDX-License-Identifier: Apache-2.0 #include "ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.h" +#include "ttmlir/Dialect/TTNN/Analysis/BFInterleavedPolicy.h" #include "ttmlir/Dialect/TTNN/Analysis/DFShardingPolicy.h" -#include "ttmlir/Dialect/TTNN/Analysis/L1InterleavedPolicy.h" +#include "ttmlir/Dialect/TTNN/Analysis/GreedyL1InterleavedPolicy.h" #include "ttmlir/Dialect/TTNN/IR/TTNNOpsAttrs.h" namespace mlir::tt::ttnn { @@ -68,14 +69,22 @@ void MemoryLayoutAnalysis::analysisImplementation() { dfShardingPolicy.run(); break; } - case MemoryLayoutAnalysisPolicyType::L1Interleaved: { - L1InterleavedPolicy l1InterleavedPolicy( + case MemoryLayoutAnalysisPolicyType::GreedyL1Interleaved: { + GreedyL1InterleavedPolicy l1InterleavedPolicy( op, l1ChainConfigs, filterDRAMAndL1Interleaved(analysisInput.legalLayouts), analysisResult.schedule, analysisInput.usableL1CacheSize); l1InterleavedPolicy.run(); break; } + case MemoryLayoutAnalysisPolicyType::BFInterleaved: { + BFInterleavedPolicy bfInterleavedPolicy( + op, l1ChainConfigs, + filterDRAMAndL1Interleaved(analysisInput.legalLayouts), + analysisResult.schedule, analysisInput.usableL1CacheSize); + bfInterleavedPolicy.run(); + break; + } } // Copy over default legal layouts. diff --git a/lib/Dialect/TTNN/Utils/Utils.cpp b/lib/Dialect/TTNN/Utils/Utils.cpp index 751c714ede..6976dd35f4 100644 --- a/lib/Dialect/TTNN/Utils/Utils.cpp +++ b/lib/Dialect/TTNN/Utils/Utils.cpp @@ -117,4 +117,24 @@ createRankedTensorTypeWithEncoding(RankedTensorType tensorType, tensorType.getElementType(), encoding); } +uint64_t getOpOutputL1Usage(Operation *op, TTNNLayoutAttr opLayout, + DeviceAttr &deviceAttr) { + assert(mlir::isa(op->getResult(0).getType()) && + "L1 memory usage of the ops without output tensors cannot be " + "calculated."); + + // In case the opLayout is not in L1 memory space, L1 memory usage is 0. + // + if (opLayout.hasDRAMBufferType()) { + return 0; + } + + llvm::ArrayRef opOutputTensorShape = + mlir::cast(op->getResult(0).getType()).getShape(); + + uint64_t opL1OutputUsage = + opLayout.getTensorSizeInBytes(opOutputTensorShape, deviceAttr); + return opL1OutputUsage; +} + } // namespace mlir::tt::ttnn::utils diff --git a/python/OptimizerOverrides.cpp b/python/OptimizerOverrides.cpp index bd5ce94f43..18806654c7 100644 --- a/python/OptimizerOverrides.cpp +++ b/python/OptimizerOverrides.cpp @@ -71,8 +71,10 @@ void populateOptimizerOverridesModule(py::module &m) { py::enum_( m, "MemoryLayoutAnalysisPolicyType") .value("DFSharding", mlir::tt::MemoryLayoutAnalysisPolicyType::DFSharding) - .value("L1Interleaved", - mlir::tt::MemoryLayoutAnalysisPolicyType::L1Interleaved); + .value("GreedyL1Interleaved", + mlir::tt::MemoryLayoutAnalysisPolicyType::GreedyL1Interleaved) + .value("BFInterleaved", + mlir::tt::MemoryLayoutAnalysisPolicyType::BFInterleaved); py::enum_(m, "BufferType") .value("DRAM", mlir::tt::ttnn::BufferType::DRAM) diff --git a/test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/all_dram_buffer_type.mlir b/test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/all_dram_buffer_type.mlir new file mode 100644 index 0000000000..bdce3fd0f9 --- /dev/null +++ b/test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/all_dram_buffer_type.mlir @@ -0,0 +1,13 @@ +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=BFInterleaved" %s | FileCheck %s +// XFAIL: * +#any_device = #tt.operand_constraint +module attributes {} { + func.func @forward(%arg0: tensor<6144x6144xbf16>, %arg1: tensor<6144x6144xbf16>, %arg2: tensor<6144x6144xbf16>) -> tensor<6144x6144xbf16> { + // CHECK: #[[L1_:.*]] = #ttnn.buffer_type + %0 = tensor.empty() : tensor<6144x6144xbf16> + %1 = "ttir.add"(%arg0, %arg1, %0) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<6144x6144xbf16>, tensor<6144x6144xbf16>, tensor<6144x6144xbf16>) -> tensor<6144x6144xbf16> + %2 = tensor.empty() : tensor<6144x6144xbf16> + %3 = "ttir.add"(%1, %arg2, %2) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<6144x6144xbf16>, tensor<6144x6144xbf16>, tensor<6144x6144xbf16>) -> tensor<6144x6144xbf16> + return %3 : tensor<6144x6144xbf16> + } +} diff --git a/test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/all_dram_operands_l1_op.mlir b/test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/all_dram_operands_l1_op.mlir new file mode 100644 index 0000000000..ec809a60a7 --- /dev/null +++ b/test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/all_dram_operands_l1_op.mlir @@ -0,0 +1,19 @@ +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=BFInterleaved" %s | FileCheck %s +module attributes {} { + func.func @forward(%arg0: tensor<5120x8192xbf16>, %arg1: tensor<8192x5120xbf16>) -> tensor<5120x5120xbf16> { + // CHECK: #[[L1_:.*]] = #ttnn.buffer_type + // CHECK-DAG: #[[LAYOUT_5:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <8x8, (d0, d1) -> (0, d0, d1)>, memref<32x20x!tt.tile<32x32, bf16>, #dram>, > + // CHECK-DAG: #[[LAYOUT_6:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <8x8, (d0, d1) -> (0, d0, d1)>, memref<20x32x!tt.tile<32x32, bf16>, #dram>, > + // CHECK-DAG: #[[LAYOUT_7:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <8x8, (d0, d1) -> (0, d0, d1)>, memref<20x20x!tt.tile<32x32, bf16>, #l1_>, > + %0 = tensor.empty() : tensor<5120x8192xbf16> + // CHECK-DAG: %{{.*}} = "ttnn.relu"{{.*}} -> tensor<5120x8192xbf16, #[[LAYOUT_6]]> + %1 = "ttir.relu"(%arg0, %0) <{operandSegmentSizes = array}> : (tensor<5120x8192xbf16>, tensor<5120x8192xbf16>) -> tensor<5120x8192xbf16> + %2 = tensor.empty() : tensor<8192x5120xbf16> + // CHECK-DAG: %{{.*}} = "ttnn.relu"{{.*}} -> tensor<8192x5120xbf16, #[[LAYOUT_5]]> + %3 = "ttir.relu"(%arg1, %2) <{operandSegmentSizes = array}> : (tensor<8192x5120xbf16>, tensor<8192x5120xbf16>) -> tensor<8192x5120xbf16> + %4 = tensor.empty() : tensor<5120x5120xbf16> + // CHECK: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<5120x5120xbf16, #[[LAYOUT_7]]> + %5 = "ttir.matmul"(%1, %3, %4) : (tensor<5120x8192xbf16>, tensor<8192x5120xbf16>, tensor<5120x5120xbf16>) -> tensor<5120x5120xbf16> + return %5 : tensor<5120x5120xbf16> + } +} diff --git a/test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/all_l1_operands_dram_op.mlir b/test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/all_l1_operands_dram_op.mlir new file mode 100644 index 0000000000..0460f6ac47 --- /dev/null +++ b/test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/all_l1_operands_dram_op.mlir @@ -0,0 +1,19 @@ +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=BFInterleaved" %s | FileCheck %s +module attributes {} { + func.func @forward(%arg0: tensor<6144x1024xbf16>, %arg1: tensor<1024x6144xbf16>) -> tensor<6144x6144xbf16> { + // CHECK: #[[L1_:.*]] = #ttnn.buffer_type + // CHECK-DAG: #[[LAYOUT_5:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <8x8, (d0, d1) -> (0, d0, d1)>, memref<24x4x!tt.tile<32x32, bf16>, #l1_>, > + // CHECK-DAG: #[[LAYOUT_6:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <8x8, (d0, d1) -> (0, d0, d1)>, memref<4x24x!tt.tile<32x32, bf16>, #l1_>, > + // CHECK-DAG: #[[LAYOUT_7:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <8x8, (d0, d1) -> (0, d0, d1)>, memref<24x24x!tt.tile<32x32, bf16>, #dram>, > + %0 = tensor.empty() : tensor<6144x1024xbf16> + // CHECK-DAG: %{{.*}} = "ttnn.relu"{{.*}} -> tensor<6144x1024xbf16, #[[LAYOUT_5]]> + %1 = "ttir.relu"(%arg0, %0) <{operandSegmentSizes = array}> : (tensor<6144x1024xbf16>, tensor<6144x1024xbf16>) -> tensor<6144x1024xbf16> + %2 = tensor.empty() : tensor<1024x6144xbf16> + // CHECK-DAG: %{{.*}} = "ttnn.relu"{{.*}} -> tensor<1024x6144xbf16, #[[LAYOUT_6]]> + %3 = "ttir.relu"(%arg1, %2) <{operandSegmentSizes = array}> : (tensor<1024x6144xbf16>, tensor<1024x6144xbf16>) -> tensor<1024x6144xbf16> + %4 = tensor.empty() : tensor<6144x6144xbf16> + // CHECK: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<6144x6144xbf16, #[[LAYOUT_7]]> + %5 = "ttir.matmul"(%1, %3, %4) : (tensor<6144x1024xbf16>, tensor<1024x6144xbf16>, tensor<6144x6144xbf16>) -> tensor<6144x6144xbf16> + return %5 : tensor<6144x6144xbf16> + } +} diff --git a/test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/fork_join_01.mlir b/test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/fork_join_01.mlir new file mode 100644 index 0000000000..5446082c75 --- /dev/null +++ b/test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/fork_join_01.mlir @@ -0,0 +1,35 @@ +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=BFInterleaved" %s | FileCheck %s +// +// A +// | +// B +// / \ +// C D +// \ / +// E +// | +// F +// +// There is enough L1 memory to schedule this fork-join but only if we allocate +// the output tensor of the op B once. +// +module attributes {} { + func.func @forward(%arg0: tensor<4096x5120xbf16>, %arg1: tensor<5120x1024xbf16>, %arg2: tensor<5120x1024xbf16>) -> tensor<4096x1024xbf16> { + // CHECK: #[[L1_:.*]] = #ttnn.buffer_type + // CHECK: #[[LAYOUT_5:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <8x8, (d0, d1) -> (0, d0, d1)>, memref<16x20x!tt.tile<32x32, bf16>, #l1_>, > + // CHECK: #[[LAYOUT_6:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <8x8, (d0, d1) -> (0, d0, d1)>, memref<16x4x!tt.tile<32x32, bf16>, #l1_>, > + %0 = tensor.empty() : tensor<4096x5120xbf16> + // CHECK: %{{.*}} = "ttnn.relu"{{.*}} -> tensor<4096x5120xbf16, #[[LAYOUT_5]]> + %1 = "ttir.relu"(%arg0, %0) <{operandSegmentSizes = array}> : (tensor<4096x5120xbf16>, tensor<4096x5120xbf16>) -> tensor<4096x5120xbf16> + %2 = tensor.empty() : tensor<4096x1024xbf16> + // CHECK: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<4096x1024xbf16, #[[LAYOUT_6]]> + %3 = "ttir.matmul"(%1, %arg1, %2) : (tensor<4096x5120xbf16>, tensor<5120x1024xbf16>, tensor<4096x1024xbf16>) -> tensor<4096x1024xbf16> + %4 = tensor.empty() : tensor<4096x1024xbf16> + // CHECK: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<4096x1024xbf16, #[[LAYOUT_6]]> + %5 = "ttir.matmul"(%1, %arg2, %4) : (tensor<4096x5120xbf16>, tensor<5120x1024xbf16>, tensor<4096x1024xbf16>) -> tensor<4096x1024xbf16> + %6 = tensor.empty() : tensor<4096x1024xbf16> + // CHECK: %{{.*}} = "ttnn.add"{{.*}} -> tensor<4096x1024xbf16, #[[LAYOUT_6]]> + %7 = "ttir.add"(%3, %5, %6) <{operandSegmentSizes = array}> : (tensor<4096x1024xbf16>, tensor<4096x1024xbf16>, tensor<4096x1024xbf16>) -> tensor<4096x1024xbf16> + return %7 : tensor<4096x1024xbf16> + } +} diff --git a/test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/fork_join_02.mlir b/test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/fork_join_02.mlir new file mode 100644 index 0000000000..ee44b78c21 --- /dev/null +++ b/test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/fork_join_02.mlir @@ -0,0 +1,42 @@ +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=BFInterleaved" %s | FileCheck %s +// +// A +// | +// B +// / \ +// C D +// | | +// E | +// \ / +// F +// | +// G +// +// There is not enough L1 memory to schedule this fork-join even if we allocate +// the output tensor of the op B once becuase the output tensor of the op C is +// too large to fit in L1 on its own. +// +module attributes {} { + func.func @forward(%arg0: tensor<4096x5120xbf16>, %arg1: tensor<5120x9216xbf16>, %arg2: tensor<9216x1024xbf16>, %arg3: tensor<5120x1024xbf16>) -> tensor<4096x1024xbf16> { + // CHECK: #[[L1_:.*]] = #ttnn.buffer_type + // CHECK: #[[LAYOUT_9:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <8x8, (d0, d1) -> (0, d0, d1)>, memref<16x20x!tt.tile<32x32, bf16>, #l1_>, > + // CHECK: #[[LAYOUT_10:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <8x8, (d0, d1) -> (0, d0, d1)>, memref<16x36x!tt.tile<32x32, bf16>, #dram>, > + // CHECK: #[[LAYOUT_11:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <8x8, (d0, d1) -> (0, d0, d1)>, memref<16x4x!tt.tile<32x32, bf16>, #l1_>, > + %0 = tensor.empty() : tensor<4096x5120xbf16> + // CHECK-DAG: %{{.*}} = "ttnn.relu"{{.*}} -> tensor<4096x5120xbf16, #[[LAYOUT_9]]> + %1 = "ttir.relu"(%arg0, %0) <{operandSegmentSizes = array}> : (tensor<4096x5120xbf16>, tensor<4096x5120xbf16>) -> tensor<4096x5120xbf16> + %2 = tensor.empty() : tensor<4096x9216xbf16> + // CHECK-DAG: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<4096x9216xbf16, #[[LAYOUT_10]]> + %3 = "ttir.matmul"(%1, %arg1, %2) : (tensor<4096x5120xbf16>, tensor<5120x9216xbf16>, tensor<4096x9216xbf16>) -> tensor<4096x9216xbf16> + %4 = tensor.empty() : tensor<4096x1024xbf16> + // CHECK-DAG: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<4096x1024xbf16, #[[LAYOUT_11]]> + %5 = "ttir.matmul"(%3, %arg2, %4) : (tensor<4096x9216xbf16>, tensor<9216x1024xbf16>, tensor<4096x1024xbf16>) -> tensor<4096x1024xbf16> + %6 = tensor.empty() : tensor<4096x1024xbf16> + // CHECK-DAG: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<4096x1024xbf16, #[[LAYOUT_11]]> + %7 = "ttir.matmul"(%1, %arg3, %6) : (tensor<4096x5120xbf16>, tensor<5120x1024xbf16>, tensor<4096x1024xbf16>) -> tensor<4096x1024xbf16> + %8 = tensor.empty() : tensor<4096x1024xbf16> + // CHECK-DAG: %{{.*}} = "ttnn.add"{{.*}} -> tensor<4096x1024xbf16, #[[LAYOUT_11]]> + %9 = "ttir.add"(%5, %7, %8) <{operandSegmentSizes = array}> : (tensor<4096x1024xbf16>, tensor<4096x1024xbf16>, tensor<4096x1024xbf16>) -> tensor<4096x1024xbf16> + return %9 : tensor<4096x1024xbf16> + } +} diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/all_l1_interleaved_policy.mlir b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/all_l1_interleaved_policy.mlir similarity index 97% rename from test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/all_l1_interleaved_policy.mlir rename to test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/all_l1_interleaved_policy.mlir index a895ca25ef..eaa16fb268 100644 --- a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/all_l1_interleaved_policy.mlir +++ b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/all_l1_interleaved_policy.mlir @@ -1,4 +1,4 @@ -// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=GreedyL1Interleaved" %s | FileCheck %s module attributes {} { func.func @forward(%arg0: tensor<64x128xbf16>, %arg1: tensor<128x96xbf16>, %arg2: tensor<64x96xbf16>, %arg3: tensor<96x32xbf16>, %arg4: tensor<64x32xbf16>) -> tensor<64x32xbf16> { // CHECK: #[[L1_:.*]] = #ttnn.buffer_type diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/fork_join.mlir b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/fork_join.mlir similarity index 97% rename from test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/fork_join.mlir rename to test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/fork_join.mlir index d9336db8a8..657da93390 100644 --- a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/fork_join.mlir +++ b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/fork_join.mlir @@ -1,4 +1,4 @@ -// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=GreedyL1Interleaved" %s | FileCheck %s // // A // | diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/mnist_l1_interleaved.mlir b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/mnist_l1_interleaved.mlir similarity index 98% rename from test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/mnist_l1_interleaved.mlir rename to test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/mnist_l1_interleaved.mlir index 3d437a74fa..d1a32308ff 100644 --- a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/mnist_l1_interleaved.mlir +++ b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/mnist_l1_interleaved.mlir @@ -1,4 +1,4 @@ -// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=GreedyL1Interleaved" %s | FileCheck %s #loc = loc("MNISTLinear":4294967295:0) module @"tt-forge-graph" attributes {} { func.func @main(%arg0: tensor<1x784xf32> loc("MNISTLinear":4294967295:0), %arg1: tensor<1x10xf32> loc("MNISTLinear":4294967295:0), %arg2: tensor<256x10xf32> loc("MNISTLinear":4294967295:0), %arg3: tensor<1x256xf32> loc("MNISTLinear":4294967295:0), %arg4: tensor<784x256xf32> loc("MNISTLinear":4294967295:0)) -> tensor<1x10xf32> { diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_ABC_l1_None.mlir b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_ABC_l1_None.mlir similarity index 96% rename from test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_ABC_l1_None.mlir rename to test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_ABC_l1_None.mlir index ecd90f1abb..436518ce0f 100644 --- a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_ABC_l1_None.mlir +++ b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_ABC_l1_None.mlir @@ -1,4 +1,4 @@ -// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=GreedyL1Interleaved" %s | FileCheck %s // // A B // \ / diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_AB_l1_C.mlir b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_AB_l1_C.mlir similarity index 97% rename from test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_AB_l1_C.mlir rename to test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_AB_l1_C.mlir index 056ded8d35..8f018f9515 100644 --- a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_AB_l1_C.mlir +++ b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_AB_l1_C.mlir @@ -1,4 +1,4 @@ -// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=GreedyL1Interleaved" %s | FileCheck %s // // A B // \ / diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_AC_l1_B.mlir b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_AC_l1_B.mlir similarity index 97% rename from test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_AC_l1_B.mlir rename to test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_AC_l1_B.mlir index caaf3254d8..0791c46295 100644 --- a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_AC_l1_B.mlir +++ b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_AC_l1_B.mlir @@ -1,4 +1,4 @@ -// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=GreedyL1Interleaved" %s | FileCheck %s // // A B // \ / diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_A_l1_BC.mlir b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_A_l1_BC.mlir similarity index 97% rename from test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_A_l1_BC.mlir rename to test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_A_l1_BC.mlir index 63cd3bcaa2..049f8f0b45 100644 --- a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_A_l1_BC.mlir +++ b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_A_l1_BC.mlir @@ -1,4 +1,4 @@ -// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=GreedyL1Interleaved" %s | FileCheck %s // // A B // \ / diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_BC_l1_A.mlir b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_BC_l1_A.mlir similarity index 97% rename from test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_BC_l1_A.mlir rename to test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_BC_l1_A.mlir index 9f12e8b6f6..0a63866a63 100644 --- a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_BC_l1_A.mlir +++ b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_BC_l1_A.mlir @@ -1,4 +1,4 @@ -// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=GreedyL1Interleaved" %s | FileCheck %s // // A B // \ / diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_B_l1_AC.mlir b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_B_l1_AC.mlir similarity index 97% rename from test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_B_l1_AC.mlir rename to test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_B_l1_AC.mlir index c594ca4182..c75c2f39c7 100644 --- a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_B_l1_AC.mlir +++ b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_B_l1_AC.mlir @@ -1,4 +1,4 @@ -// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=GreedyL1Interleaved" %s | FileCheck %s // // A B // \ / diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_C_l1_AB.mlir b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_C_l1_AB.mlir similarity index 97% rename from test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_C_l1_AB.mlir rename to test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_C_l1_AB.mlir index eb2a51b174..635540ea61 100644 --- a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_C_l1_AB.mlir +++ b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_C_l1_AB.mlir @@ -1,4 +1,4 @@ -// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=GreedyL1Interleaved" %s | FileCheck %s // // A B // \ / diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_None_l1_ABC.mlir b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_None_l1_ABC.mlir similarity index 96% rename from test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_None_l1_ABC.mlir rename to test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_None_l1_ABC.mlir index 883842694f..1d591ef2b2 100644 --- a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_None_l1_ABC.mlir +++ b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_None_l1_ABC.mlir @@ -1,4 +1,4 @@ -// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=GreedyL1Interleaved" %s | FileCheck %s // // A B // \ / diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/single_op.mlir b/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/single_op.mlir deleted file mode 100644 index 7b8aa07593..0000000000 --- a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/single_op.mlir +++ /dev/null @@ -1,9 +0,0 @@ -// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s -// UNSUPPORTED: true -module attributes {} { - func.func @forward(%arg0: tensor<5120x5120xbf16>) -> tensor<5120x5120xbf16> { - %0 = tensor.empty() : tensor<5120x5120xbf16> - %1 = "ttir.relu"(%arg0, %0) <{operandSegmentSizes = array}> : (tensor<5120x5120xbf16>, tensor<5120x5120xbf16>) -> tensor<5120x5120xbf16> - return %1 : tensor<5120x5120xbf16> - } -} diff --git a/test/ttmlir/Silicon/TTNN/optimizer/simple_fork_join.mlir b/test/ttmlir/Silicon/TTNN/optimizer/simple_fork_join.mlir index e323e10249..b6b3c4d686 100644 --- a/test/ttmlir/Silicon/TTNN/optimizer/simple_fork_join.mlir +++ b/test/ttmlir/Silicon/TTNN/optimizer/simple_fork_join.mlir @@ -1,4 +1,4 @@ -// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path% enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s > %t.mlir +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path% enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=GreedyL1Interleaved" %s > %t.mlir // RUN: FileCheck %s --input-file=%t.mlir // RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn // UNSUPPORTED: true diff --git a/test/unittests/Optimizer/CMakeLists.txt b/test/unittests/Optimizer/CMakeLists.txt index b05c8ae294..cf11c479fd 100644 --- a/test/unittests/Optimizer/CMakeLists.txt +++ b/test/unittests/Optimizer/CMakeLists.txt @@ -1,7 +1,7 @@ add_mlir_unittest(OptimizerTests TestShardSolver.cpp TestOptimizerOverrides.cpp - TestL1InterleavedPolicy.cpp + TestGreedyL1InterleavedPolicy.cpp ) target_link_libraries(OptimizerTests diff --git a/test/unittests/Optimizer/TestL1InterleavedPolicy.cpp b/test/unittests/Optimizer/TestGreedyL1InterleavedPolicy.cpp similarity index 93% rename from test/unittests/Optimizer/TestL1InterleavedPolicy.cpp rename to test/unittests/Optimizer/TestGreedyL1InterleavedPolicy.cpp index b09b65245d..3bc0c54410 100644 --- a/test/unittests/Optimizer/TestL1InterleavedPolicy.cpp +++ b/test/unittests/Optimizer/TestGreedyL1InterleavedPolicy.cpp @@ -16,14 +16,14 @@ #include "ttmlir/Dialect/TTNN/IR/TTNN.h" #include "ttmlir/Dialect/TTNN/IR/TTNNOps.h" -#include "ttmlir/Dialect/TTNN/Analysis/L1InterleavedPolicy.h" +#include "ttmlir/Dialect/TTNN/Analysis/GreedyL1InterleavedPolicy.h" using namespace mlir::tt::ttnn; constexpr int TensorDimX = 128; constexpr int TensorDimY = 128; -class L1InterleavedPolicyBase : public ::testing::Test { +class GreedyL1InterleavedPolicyBase : public ::testing::Test { public: mlir::MLIRContext context; mlir::OwningOpRef module; @@ -31,9 +31,9 @@ class L1InterleavedPolicyBase : public ::testing::Test { mlir::func::FuncOp func; mlir::tt::DeviceAttr deviceAttr; - using OpMemSpec = L1InterleavedPolicy::OpMemSpec; - using OpConfig = L1InterleavedPolicy::OpConfig; - using L1Usage = L1InterleavedPolicy::L1Usage; + using OpMemSpec = GreedyL1InterleavedPolicy::OpMemSpec; + using OpConfig = GreedyL1InterleavedPolicy::OpConfig; + using L1Usage = GreedyL1InterleavedPolicy::L1Usage; void SetUp() override { context.loadDialect(); @@ -121,7 +121,7 @@ class L1InterleavedPolicyBase : public ::testing::Test { void TearDown() override {} }; -TEST_F(L1InterleavedPolicyBase, VerifyGreedyPolicy) { +TEST_F(GreedyL1InterleavedPolicyBase, VerifyGreedyPolicy) { std::vector l1ChainConfigs; llvm::DenseMap> legalLayouts; llvm::DenseMap> @@ -174,8 +174,8 @@ TEST_F(L1InterleavedPolicyBase, VerifyGreedyPolicy) { legalLayouts, opsL1Usage); // Run greedy config picker policy - L1InterleavedPolicy l1InterleavedPolicy(nullptr, l1ChainConfigs, legalLayouts, - schedule, usableL1CacheSize); + GreedyL1InterleavedPolicy l1InterleavedPolicy( + nullptr, l1ChainConfigs, legalLayouts, schedule, usableL1CacheSize); OpConfig greedyConfig = l1InterleavedPolicy.getGreedyConfig(opD, opsL1Usage); // Sanity checks diff --git a/test/unittests/Optimizer/TestOptimizerOverrides.cpp b/test/unittests/Optimizer/TestOptimizerOverrides.cpp index 31118262f5..fee0796505 100644 --- a/test/unittests/Optimizer/TestOptimizerOverrides.cpp +++ b/test/unittests/Optimizer/TestOptimizerOverrides.cpp @@ -356,9 +356,9 @@ TEST_F(TestOptimizerOverrideHandler, TestSetMemoryLayoutAnalysisPolicy) { mlir::tt::MemoryLayoutAnalysisPolicyType::DFSharding); optimizerOverridesHandler.setMemoryLayoutAnalysisPolicy( - mlir::tt::MemoryLayoutAnalysisPolicyType::L1Interleaved); + mlir::tt::MemoryLayoutAnalysisPolicyType::GreedyL1Interleaved); ASSERT_EQ(optimizerOverridesHandler.getMemoryLayoutAnalysisPolicy(), - mlir::tt::MemoryLayoutAnalysisPolicyType::L1Interleaved); + mlir::tt::MemoryLayoutAnalysisPolicyType::GreedyL1Interleaved); } // Test the setInputLayoutOverrides method diff --git a/tools/explorer/test/run_tests.py b/tools/explorer/test/run_tests.py index 75925a44e2..485104fbbf 100644 --- a/tools/explorer/test/run_tests.py +++ b/tools/explorer/test/run_tests.py @@ -124,7 +124,7 @@ def test_execute_model(model_path): def test_execute_mnist_l1_interleaved(): execute_command_and_wait( "test/ttmlir/Silicon/TTNN/optimizer/mnist_sharding.mlir", - {"optimizationPolicy": "L1 Interleaved"}, + {"optimizationPolicy": "Greedy L1 Interleaved"}, timeout=60, ) diff --git a/tools/explorer/tt_adapter/src/tt_adapter/main.py b/tools/explorer/tt_adapter/src/tt_adapter/main.py index 53ea686691..8e7fa0f02c 100644 --- a/tools/explorer/tt_adapter/src/tt_adapter/main.py +++ b/tools/explorer/tt_adapter/src/tt_adapter/main.py @@ -10,7 +10,8 @@ class OptimizationPolicy(enum.Enum): DFSharding = "DF Sharding" - L1Interleaved = "L1 Interleaved" + GreedyL1Interleaved = "Greedy L1 Interleaved" + BFInterleaved = "BF Interleaved" OptimizerDisabled = "Optimizer Disabled" From 8c37b9ddb71e1a1aa87a5fcfcfdaac3bee472ede Mon Sep 17 00:00:00 2001 From: Vraj Prajapati Date: Tue, 17 Dec 2024 11:44:36 -0600 Subject: [PATCH 10/10] Send Perf Data to Explorer Frontend (#1608) - A few small changes to explorer to send perf data to the frontend. - Changed `to_adapter_format` to allow for multiple objects - Added `add_to_dataclass` to append perf_data to Graph dataclass. --- tools/explorer/CMakeLists.txt | 6 ++++-- tools/explorer/tt_adapter/src/tt_adapter/main.py | 4 +++- .../explorer/tt_adapter/src/tt_adapter/utils.py | 16 +++++++++++++--- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/tools/explorer/CMakeLists.txt b/tools/explorer/CMakeLists.txt index e0128691a1..3879558545 100644 --- a/tools/explorer/CMakeLists.txt +++ b/tools/explorer/CMakeLists.txt @@ -3,7 +3,7 @@ include(ExternalProject) set(TT_EXPLORER_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/run.py) set(TTMLIR_BUILD_BIN_DIR ${TTMLIR_BINARY_DIR}/bin) -set(MODEL_EXPLORER_VERSION "d0b53c3b7049fd41ea1caff193706272c399fac9") +set(MODEL_EXPLORER_VERSION "ca884d5eb3291507e7f4e76776957e231b2d9b6d") ExternalProject_Add( model-explorer PREFIX ${CMAKE_CURRENT_SOURCE_DIR}/model-explorer @@ -18,7 +18,9 @@ ExternalProject_Add( add_custom_target(explorer COMMENT "Building tt-explorer... ${TTMLIR_BIN_DIR}" COMMAND pip install $<$:-e> ${CMAKE_CURRENT_SOURCE_DIR}/tt_adapter - COMMAND pip install ${CMAKE_CURRENT_SOURCE_DIR}/model-explorer/src/model-explorer/src/server/package + COMMAND pip uninstall -y ai-edge-model-explorer + COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_CURRENT_SOURCE_DIR}/model-explorer/src/model-explorer/src/server/package/build + COMMAND pip install --upgrade ${CMAKE_CURRENT_SOURCE_DIR}/model-explorer/src/model-explorer/src/server/package DEPENDS TTMLIRPythonModules model-explorer ttrt ttmlir-opt ttmlir-translate ) diff --git a/tools/explorer/tt_adapter/src/tt_adapter/main.py b/tools/explorer/tt_adapter/src/tt_adapter/main.py index 8e7fa0f02c..9d0307d113 100644 --- a/tools/explorer/tt_adapter/src/tt_adapter/main.py +++ b/tools/explorer/tt_adapter/src/tt_adapter/main.py @@ -56,7 +56,9 @@ def convert( # Convert TTIR to Model Explorer Graphs and Display/Return graph, perf_data = mlir.build_graph(module, perf_trace) - return {"graphs": [graph], "perf_data": perf_data} + if perf_data: + graph = utils.add_to_dataclass(graph, "perf_data", perf_data.graphsData) + return {"graphs": [graph]} def execute( self, model_path: str, settings: Dict diff --git a/tools/explorer/tt_adapter/src/tt_adapter/utils.py b/tools/explorer/tt_adapter/src/tt_adapter/utils.py index 4b404a204b..6cc32c81ee 100644 --- a/tools/explorer/tt_adapter/src/tt_adapter/utils.py +++ b/tools/explorer/tt_adapter/src/tt_adapter/utils.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 import ttmlir -from dataclasses import make_dataclass +from dataclasses import make_dataclass, is_dataclass, asdict def parse_mlir_file(model_path): @@ -18,5 +18,15 @@ def to_dataclass(obj: dict, dc_name: str = "tempClass"): return make_dataclass(dc_name, ((k, type(v)) for k, v in obj.items()))(**obj) -def to_adapter_format(obj: dict): - return {"graphs": [to_dataclass(obj)]} +def add_to_dataclass(dataclass, new_attr_name: str, new_attr_value): + if not is_dataclass(dataclass): + return None + classname = dataclass.__class__.__name__ + dataclass = asdict(dataclass) + dataclass[new_attr_name] = new_attr_value + return to_dataclass(dataclass, dc_name=classname) + + +def to_adapter_format(*objs): + res = [x if is_dataclass(x) else to_dataclass(x) for x in objs] + return {"graphs": res}