From a93e83a4cb10301504137dd0c3559caf99b0097e Mon Sep 17 00:00:00 2001 From: Filip Bajraktari Date: Thu, 31 Oct 2024 10:12:57 +0000 Subject: [PATCH] memory layout analysis policy flag incorporated into both optimizer pass and TTIRToTTNNBackendPipeline --- .../TT/Utils/MemoryLayoutAnalysisParams.h | 47 +++++++++++++++++++ .../ttmlir/Dialect/TT/Utils/OverrideParams.h | 1 - .../TTNN/Analysis/MemoryLayoutAnalysis.h | 12 ++--- .../Dialect/TTNN/Pipelines/TTNNPipelines.h | 24 +--------- .../Dialect/TTNN/Transforms/Optimizer.h | 9 ++++ .../TTNN/Analysis/L1InterleavedPolicy.cpp | 4 +- .../TTNN/Analysis/MemoryLayoutAnalysis.cpp | 18 ------- lib/Dialect/TTNN/Pipelines/TTNNPipelines.cpp | 2 + lib/Dialect/TTNN/Transforms/Optimizer.cpp | 3 +- .../TTNN/all_l1_interleaved_policy.mlir | 12 ++--- .../Dialect/TTNN/mnist_l1_interleaved.mlir | 12 ++--- .../TTNN/all_l1_interleaved_policy.mlir | 14 +++--- .../Silicon/TTNN/mnist_l1_interleaved.mlir | 14 +++--- 13 files changed, 92 insertions(+), 80 deletions(-) create mode 100644 include/ttmlir/Dialect/TT/Utils/MemoryLayoutAnalysisParams.h diff --git a/include/ttmlir/Dialect/TT/Utils/MemoryLayoutAnalysisParams.h b/include/ttmlir/Dialect/TT/Utils/MemoryLayoutAnalysisParams.h new file mode 100644 index 0000000000..de6b591d1b --- /dev/null +++ b/include/ttmlir/Dialect/TT/Utils/MemoryLayoutAnalysisParams.h @@ -0,0 +1,47 @@ +// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef TTMLIR_DIALECT_TT_UTILS_MEMORYLAYOUTANALYSIS_H +#define TTMLIR_DIALECT_TT_UTILS_MEMORYLAYOUTANALYSIS_H + +#include +#include + +namespace mlir::tt { + +enum class MemoryLayoutAnalysisPolicyType { DFSharding, L1Interleaved }; + +struct MemoryLayoutAnalysisPolicyTypeParser + : public llvm::cl::parser { +public: + MemoryLayoutAnalysisPolicyTypeParser(llvm::cl::Option &opt) + : llvm::cl::parser(opt) {} + + bool parse(llvm::cl::Option &opt, llvm::StringRef argName, + llvm::StringRef arg, MemoryLayoutAnalysisPolicyType &value) { + value = llvm::StringSwitch(arg) + .Case("DFSharding", MemoryLayoutAnalysisPolicyType::DFSharding) + .Case("L1Interleaved", + MemoryLayoutAnalysisPolicyType::L1Interleaved); + return false; + } + + static void print(llvm::raw_ostream &os, + const MemoryLayoutAnalysisPolicyType &value) { + llvm::StringRef policy; + switch (value) { + case MemoryLayoutAnalysisPolicyType::DFSharding: + policy = "DFSharding"; + break; + case MemoryLayoutAnalysisPolicyType::L1Interleaved: + policy = "L1Interleaved"; + break; + } + os << "memory-layout-analysis-policy=" << policy << "\n"; + } +}; + +} // namespace mlir::tt + +#endif // TTMLIR_DIALECT_TT_UTILS_MEMORYLAYOUTANALYSIS_H diff --git a/include/ttmlir/Dialect/TT/Utils/OverrideParams.h b/include/ttmlir/Dialect/TT/Utils/OverrideParams.h index b80f73940f..85babb4d3a 100644 --- a/include/ttmlir/Dialect/TT/Utils/OverrideParams.h +++ b/include/ttmlir/Dialect/TT/Utils/OverrideParams.h @@ -6,7 +6,6 @@ #define TTMLIR_DIALECT_TT_UTILS_OVERRIDEPARAMS_H #include "ttmlir/Dialect/TT/IR/TTOpsTypes.h" -#include #include namespace mlir::tt { diff --git a/include/ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.h b/include/ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.h index dfb5866b3f..39d0595554 100644 --- a/include/ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.h +++ b/include/ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.h @@ -6,20 +6,13 @@ #define TTMLIR_DIALECT_TTNN_ANALYSIS_MEMORYLAYOUTANALYSIS_H #include "mlir/Dialect/Func/IR/FuncOps.h" +#include "ttmlir/Dialect/TT/Utils/MemoryLayoutAnalysisParams.h" #include "ttmlir/Dialect/TTNN/Analysis/Edge.h" #include "ttmlir/Dialect/TTNN/Analysis/L1ChainConfig.h" #include "ttmlir/Dialect/TTNN/Analysis/TTNNAnalysis.h" namespace mlir::tt::ttnn { -enum class MemoryLayoutAnalysisPolicyType { DFSharding, L1Interleaved }; - -::llvm::StringRef -stringifyMemoryLayoutAnalysisPolicyType(MemoryLayoutAnalysisPolicyType policy); - -MemoryLayoutAnalysisPolicyType -symbolizeMemoryLayoutAnalysisPolicyType(::llvm::StringRef policy); - struct MemoryLayoutAnalysisInput { llvm::DenseMap> legalLayouts; unsigned usableL1CacheSize = 0; @@ -32,7 +25,8 @@ struct MemoryLayoutAnalysisInput { const llvm::DenseMap> &legalLayouts, unsigned usableL1CacheSize, - const std::unordered_set &overrideReshardEdges, MemoryLayoutAnalysisPolicyType policy) + const std::unordered_set &overrideReshardEdges, + MemoryLayoutAnalysisPolicyType policy) : legalLayouts(legalLayouts), usableL1CacheSize(usableL1CacheSize), overrideReshardEdges(overrideReshardEdges), policy(policy) {} diff --git a/include/ttmlir/Dialect/TTNN/Pipelines/TTNNPipelines.h b/include/ttmlir/Dialect/TTNN/Pipelines/TTNNPipelines.h index 31785edfd2..1351e69c36 100644 --- a/include/ttmlir/Dialect/TTNN/Pipelines/TTNNPipelines.h +++ b/include/ttmlir/Dialect/TTNN/Pipelines/TTNNPipelines.h @@ -6,33 +6,11 @@ #define TTMLIR_DIALECT_TTNN_PIPELINES_TTNNPIPELINES_H #include "mlir/Pass/PassOptions.h" +#include "ttmlir/Dialect/TT/Utils/MemoryLayoutAnalysisParams.h" #include "ttmlir/Dialect/TT/Utils/OverrideParams.h" -#include "ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.h" namespace mlir::tt::ttnn { -struct MemoryLayoutAnalysisPolicyTypeParser - : public llvm::cl::parser { -public: - MemoryLayoutAnalysisPolicyTypeParser(llvm::cl::Option &opt) - : llvm::cl::parser(opt) {} - - bool parse(llvm::cl::Option &opt, StringRef argName, StringRef arg, - MemoryLayoutAnalysisPolicyType &value) { - MemoryLayoutAnalysisPolicyType policy = - symbolizeMemoryLayoutAnalysisPolicyType(arg); - value = policy; - return true; - } - - static void print(llvm::raw_ostream &os, - const MemoryLayoutAnalysisPolicyType &value) { - os << "memory-layout-analysis-policy=" - << stringifyMemoryLayoutAnalysisPolicyType(value); - os << "\n"; - } -}; - // Options for the TTIR to TTNN backend pipeline. // struct TTIRToTTNNBackendPipelineOptions diff --git a/include/ttmlir/Dialect/TTNN/Transforms/Optimizer.h b/include/ttmlir/Dialect/TTNN/Transforms/Optimizer.h index 013b839603..73722f45c4 100644 --- a/include/ttmlir/Dialect/TTNN/Transforms/Optimizer.h +++ b/include/ttmlir/Dialect/TTNN/Transforms/Optimizer.h @@ -19,6 +19,8 @@ struct TTNNOptimizerOptions { llvm::StringMap overrideOutputLayout = llvm::StringMap(); bool memoryLayoutAnalysisEnabled = false; + MemoryLayoutAnalysisPolicyType memoryLayoutAnalysisPolicy = + MemoryLayoutAnalysisPolicyType::DFSharding; bool memReconfigEnabled = false; int64_t maxLegalLayouts = 64; }; @@ -95,6 +97,7 @@ class TTNNOptimizerBase : public ::mlir::OperationPass<::mlir::ModuleOp> { memoryLayoutAnalysisEnabled = std::move(options.memoryLayoutAnalysisEnabled); memReconfigEnabled = std::move(options.memReconfigEnabled); + memoryLayoutAnalysisPolicy = std::move(options.memoryLayoutAnalysisPolicy); maxLegalLayouts = std::move(options.maxLegalLayouts); } @@ -122,6 +125,12 @@ class TTNNOptimizerBase : public ::mlir::OperationPass<::mlir::ModuleOp> { "we support all " "types of shard specs."), ::llvm::cl::init(false)}; + ::mlir::Pass::Option + memoryLayoutAnalysisPolicy{ + *this, "memory-layout-analysis-policy", + llvm::cl::desc("Specify policy for memory layout analysis."), + llvm::cl::init(MemoryLayoutAnalysisPolicyType::DFSharding)}; ::mlir::Pass::Option maxLegalLayouts{ *this, "max-legal-layouts", ::llvm::cl::desc( diff --git a/lib/Dialect/TTNN/Analysis/L1InterleavedPolicy.cpp b/lib/Dialect/TTNN/Analysis/L1InterleavedPolicy.cpp index df1089bfda..a98c091a24 100644 --- a/lib/Dialect/TTNN/Analysis/L1InterleavedPolicy.cpp +++ b/lib/Dialect/TTNN/Analysis/L1InterleavedPolicy.cpp @@ -32,7 +32,7 @@ void L1InterleavedPolicy::run() { if (legalLayouts.lookup(currentOp).size() > 0) { selectedOpLayout[currentOp] = legalLayouts.lookup(currentOp).front(); - // Add currentOp to shard chain config. + // Add currentOp to l1 chain config. // OpL1MemSpec shardSpec; shardSpec.op = currentOp; @@ -53,7 +53,7 @@ void L1InterleavedPolicy::run() { // (*schedule)[func] = scheduler.getSchedule(); - // Resolve shard chain configs. + // Resolve l1 chain configs. // for (auto &l1ChainConfig : *l1ChainConfigs) { l1ChainConfig.build(); diff --git a/lib/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.cpp b/lib/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.cpp index 11384f2b68..caf0d2b5f8 100644 --- a/lib/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.cpp +++ b/lib/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.cpp @@ -8,24 +8,6 @@ namespace mlir::tt::ttnn { -::llvm::StringRef -stringifyMemoryLayoutAnalysisPolicyType(MemoryLayoutAnalysisPolicyType policy) { - switch (policy) { - case MemoryLayoutAnalysisPolicyType::DFSharding: - return "DFSharding"; - case MemoryLayoutAnalysisPolicyType::L1Interleaved: - return "L1Interleaved"; - } - return ""; -} - -MemoryLayoutAnalysisPolicyType -symbolizeMemoryLayoutAnalysisPolicyType(::llvm::StringRef policy) { - return llvm::StringSwitch(policy) - .Case("DFSharding", MemoryLayoutAnalysisPolicyType::DFSharding) - .Case("L1Interleaved", MemoryLayoutAnalysisPolicyType::L1Interleaved); -} - bool MemoryLayoutAnalysis::applyOverrides() { // TODO(nobradovic): diff --git a/lib/Dialect/TTNN/Pipelines/TTNNPipelines.cpp b/lib/Dialect/TTNN/Pipelines/TTNNPipelines.cpp index 7f3baaeaf7..772b51b04a 100644 --- a/lib/Dialect/TTNN/Pipelines/TTNNPipelines.cpp +++ b/lib/Dialect/TTNN/Pipelines/TTNNPipelines.cpp @@ -51,6 +51,8 @@ void createTTNNPipelineAnalysisPasses( optimizerOptions.memoryLayoutAnalysisEnabled = options.memoryLayoutAnalysisEnabled; optimizerOptions.memReconfigEnabled = options.memReconfigEnabled; + optimizerOptions.memoryLayoutAnalysisPolicy = + options.memoryLayoutAnalysisPolicy; optimizerOptions.maxLegalLayouts = options.maxLegalLayouts; pm.addPass(mlir::tt::ttnn::createTTNNOptimizer(optimizerOptions)); } diff --git a/lib/Dialect/TTNN/Transforms/Optimizer.cpp b/lib/Dialect/TTNN/Transforms/Optimizer.cpp index 0fd166f567..e7996cf9c0 100644 --- a/lib/Dialect/TTNN/Transforms/Optimizer.cpp +++ b/lib/Dialect/TTNN/Transforms/Optimizer.cpp @@ -71,7 +71,8 @@ class TTNNOptimizer : public impl::TTNNOptimizerBase { MemoryLayoutAnalysis memoryLayoutAnalysis = getAnalysis(); memoryLayoutAnalysis.init(MemoryLayoutAnalysisInput( - legalLayouts, chipDesc.getUsableL1Size(), overrideReshardEdges, MemoryLayoutAnalysisPolicyType::DFSharding)); + legalLayouts, chipDesc.getUsableL1Size(), overrideReshardEdges, + memoryLayoutAnalysisPolicy)); legalLayouts = memoryLayoutAnalysis.getResult().legalLayouts; opSchedule = memoryLayoutAnalysis.getResult().schedule; memReconfigEdges = memoryLayoutAnalysis.getResult().memReconfigEdges; diff --git a/test/ttmlir/Dialect/TTNN/all_l1_interleaved_policy.mlir b/test/ttmlir/Dialect/TTNN/all_l1_interleaved_policy.mlir index f2bfc6639d..ce3cd0258e 100644 --- a/test/ttmlir/Dialect/TTNN/all_l1_interleaved_policy.mlir +++ b/test/ttmlir/Dialect/TTNN/all_l1_interleaved_policy.mlir @@ -17,30 +17,30 @@ module attributes {tt.device = #device, tt.system_desc = #system_desc} { // CHECK: #[[LAYOUT_6:.*]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <8x8>, memref<8x12xbf16, #l1_>, interleaved> // CHECK: #[[LAYOUT_7:.*]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <8x8>, memref<8x4xbf16, #l1_>, interleaved> %0 = "ttnn.get_device"() <{mesh_shape = #ttnn}> : () -> !tt.device<#device> - %1 = "ttnn.composite_to_layout"(%arg0, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<64x128xbf16, #layout>, !tt.device<#device>) -> tensor<64x128xbf16, #layout5> - %2 = "ttnn.composite_to_layout"(%arg1, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<128x96xbf16, #layout1>, !tt.device<#device>) -> tensor<128x96xbf16, #layout5> + %1 = "ttnn.to_layout"(%arg0, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<64x128xbf16, #layout>, !tt.device<#device>) -> tensor<64x128xbf16, #layout5> + %2 = "ttnn.to_layout"(%arg1, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<128x96xbf16, #layout1>, !tt.device<#device>) -> tensor<128x96xbf16, #layout5> %3 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<64x96>>>, shape = #ttnn.shape<64x96>}> : (!tt.device<#device>) -> tensor<64x96xbf16, #layout6> // CHECK: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<64x96xbf16, #[[LAYOUT_6]]> %4 = "ttnn.matmul"(%1, %2, %3) : (tensor<64x128xbf16, #layout5>, tensor<128x96xbf16, #layout5>, tensor<64x96xbf16, #layout6>) -> tensor<64x96xbf16, #layout6> - %5 = "ttnn.composite_to_layout"(%arg2, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<64x96xbf16, #layout2>, !tt.device<#device>) -> tensor<64x96xbf16, #layout5> + %5 = "ttnn.to_layout"(%arg2, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<64x96xbf16, #layout2>, !tt.device<#device>) -> tensor<64x96xbf16, #layout5> %6 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<64x96>>>, shape = #ttnn.shape<64x96>}> : (!tt.device<#device>) -> tensor<64x96xbf16, #layout6> // CHECK: %{{.*}} = "ttnn.add"{{.*}} -> tensor<64x96xbf16, #[[LAYOUT_6]]> %7 = "ttnn.add"(%4, %5, %6) <{operandSegmentSizes = array}> : (tensor<64x96xbf16, #layout6>, tensor<64x96xbf16, #layout5>, tensor<64x96xbf16, #layout6>) -> tensor<64x96xbf16, #layout6> %8 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<64x96>>>, shape = #ttnn.shape<64x96>}> : (!tt.device<#device>) -> tensor<64x96xbf16, #layout6> // CHECK: %{{.*}} = "ttnn.relu"{{.*}} -> tensor<64x96xbf16, #[[LAYOUT_6]]> %9 = "ttnn.relu"(%7, %8) <{operandSegmentSizes = array}> : (tensor<64x96xbf16, #layout6>, tensor<64x96xbf16, #layout6>) -> tensor<64x96xbf16, #layout6> - %10 = "ttnn.composite_to_layout"(%arg3, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<96x32xbf16, #layout3>, !tt.device<#device>) -> tensor<96x32xbf16, #layout5> + %10 = "ttnn.to_layout"(%arg3, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<96x32xbf16, #layout3>, !tt.device<#device>) -> tensor<96x32xbf16, #layout5> %11 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<64x32>>>, shape = #ttnn.shape<64x32>}> : (!tt.device<#device>) -> tensor<64x32xbf16, #layout7> // CHECK: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<64x32xbf16, #[[LAYOUT_7]]> %12 = "ttnn.matmul"(%9, %10, %11) : (tensor<64x96xbf16, #layout6>, tensor<96x32xbf16, #layout5>, tensor<64x32xbf16, #layout7>) -> tensor<64x32xbf16, #layout7> - %13 = "ttnn.composite_to_layout"(%arg4, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<64x32xbf16, #layout4>, !tt.device<#device>) -> tensor<64x32xbf16, #layout5> + %13 = "ttnn.to_layout"(%arg4, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<64x32xbf16, #layout4>, !tt.device<#device>) -> tensor<64x32xbf16, #layout5> %14 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<64x32>>>, shape = #ttnn.shape<64x32>}> : (!tt.device<#device>) -> tensor<64x32xbf16, #layout7> // CHECK: %{{.*}} = "ttnn.add"{{.*}} -> tensor<64x32xbf16, #[[LAYOUT_7]]> %15 = "ttnn.add"(%12, %13, %14) <{operandSegmentSizes = array}> : (tensor<64x32xbf16, #layout7>, tensor<64x32xbf16, #layout5>, tensor<64x32xbf16, #layout7>) -> tensor<64x32xbf16, #layout7> %16 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<64x32>>>, shape = #ttnn.shape<64x32>}> : (!tt.device<#device>) -> tensor<64x32xbf16, #layout7> // CHECK: %{{.*}} = "ttnn.relu"{{.*}} -> tensor<64x32xbf16, #[[LAYOUT_7]]> %17 = "ttnn.relu"(%15, %16) <{operandSegmentSizes = array}> : (tensor<64x32xbf16, #layout7>, tensor<64x32xbf16, #layout7>) -> tensor<64x32xbf16, #layout7> - %18 = "ttnn.composite_to_layout"(%17) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<64x32>>>}> : (tensor<64x32xbf16, #layout7>) -> tensor<64x32xbf16, #layout4> + %18 = "ttnn.to_layout"(%17) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<64x32>>>}> : (tensor<64x32xbf16, #layout7>) -> tensor<64x32xbf16, #layout4> return %18 : tensor<64x32xbf16, #layout4> } } diff --git a/test/ttmlir/Dialect/TTNN/mnist_l1_interleaved.mlir b/test/ttmlir/Dialect/TTNN/mnist_l1_interleaved.mlir index 1702331e9f..69eab27588 100644 --- a/test/ttmlir/Dialect/TTNN/mnist_l1_interleaved.mlir +++ b/test/ttmlir/Dialect/TTNN/mnist_l1_interleaved.mlir @@ -17,29 +17,29 @@ module @"tt-forge-graph" attributes {tt.device = #device, tt.system_desc = #syst // CHECK: #[[LAYOUT_6:.*]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <8x8>, memref<1x32xf32, #l1_>, interleaved> // CHECK: #[[LAYOUT_7:.*]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <8x8>, memref<1x2xf32, #l1_>, interleaved> %0 = "ttnn.get_device"() <{mesh_shape = #ttnn}> : () -> !tt.device<#device> - %1 = "ttnn.composite_to_layout"(%arg0, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<1x784xf32, #layout>, !tt.device<#device>) -> tensor<1x784xf32, #layout5> - %2 = "ttnn.composite_to_layout"(%arg4, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<784x256xf32, #layout4>, !tt.device<#device>) -> tensor<784x256xf32, #layout5> + %1 = "ttnn.to_layout"(%arg0, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<1x784xf32, #layout>, !tt.device<#device>) -> tensor<1x784xf32, #layout5> + %2 = "ttnn.to_layout"(%arg4, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<784x256xf32, #layout4>, !tt.device<#device>) -> tensor<784x256xf32, #layout5> %3 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x256>>>, shape = #ttnn.shape<1x256>}> : (!tt.device<#device>) -> tensor<1x256xf32, #layout6> // CHECK: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<1x256xf32, #[[LAYOUT_6]]> %4 = "ttnn.matmul"(%1, %2, %3) : (tensor<1x784xf32, #layout5>, tensor<784x256xf32, #layout5>, tensor<1x256xf32, #layout6>) -> tensor<1x256xf32, #layout6> - %5 = "ttnn.composite_to_layout"(%arg3, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<1x256xf32, #layout3>, !tt.device<#device>) -> tensor<1x256xf32, #layout5> + %5 = "ttnn.to_layout"(%arg3, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<1x256xf32, #layout3>, !tt.device<#device>) -> tensor<1x256xf32, #layout5> %6 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x256>>>, shape = #ttnn.shape<1x256>}> : (!tt.device<#device>) -> tensor<1x256xf32, #layout6> // CHECK: %{{.*}} = "ttnn.add"{{.*}} -> tensor<1x256xf32, #[[LAYOUT_6]]> %7 = "ttnn.add"(%4, %5, %6) <{operandSegmentSizes = array}> : (tensor<1x256xf32, #layout6>, tensor<1x256xf32, #layout5>, tensor<1x256xf32, #layout6>) -> tensor<1x256xf32, #layout6> %8 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x256>>>, shape = #ttnn.shape<1x256>}> : (!tt.device<#device>) -> tensor<1x256xf32, #layout6> // CHECK: %{{.*}} = "ttnn.relu"{{.*}} -> tensor<1x256xf32, #[[LAYOUT_6]]> %9 = "ttnn.relu"(%7, %8) <{operandSegmentSizes = array}> : (tensor<1x256xf32, #layout6>, tensor<1x256xf32, #layout6>) -> tensor<1x256xf32, #layout6> - %10 = "ttnn.composite_to_layout"(%arg2, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<256x10xf32, #layout2>, !tt.device<#device>) -> tensor<256x10xf32, #layout5> + %10 = "ttnn.to_layout"(%arg2, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<256x10xf32, #layout2>, !tt.device<#device>) -> tensor<256x10xf32, #layout5> %11 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x10>>>, shape = #ttnn.shape<1x10>}> : (!tt.device<#device>) -> tensor<1x10xf32, #layout7> // CHECK: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<1x10xf32, #[[LAYOUT_7]]> %12 = "ttnn.matmul"(%9, %10, %11) : (tensor<1x256xf32, #layout6>, tensor<256x10xf32, #layout5>, tensor<1x10xf32, #layout7>) -> tensor<1x10xf32, #layout7> - %13 = "ttnn.composite_to_layout"(%arg1, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<1x10xf32, #layout1>, !tt.device<#device>) -> tensor<1x10xf32, #layout5> + %13 = "ttnn.to_layout"(%arg1, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<1x10xf32, #layout1>, !tt.device<#device>) -> tensor<1x10xf32, #layout5> %14 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x10>>>, shape = #ttnn.shape<1x10>}> : (!tt.device<#device>) -> tensor<1x10xf32, #layout7> // CHECK: %{{.*}} = "ttnn.add"{{.*}} -> tensor<1x10xf32, #[[LAYOUT_7]]> %15 = "ttnn.add"(%12, %13, %14) <{operandSegmentSizes = array}> : (tensor<1x10xf32, #layout7>, tensor<1x10xf32, #layout5>, tensor<1x10xf32, #layout7>) -> tensor<1x10xf32, #layout7> // CHECK: %{{.*}} = "ttnn.softmax"{{.*}} -> tensor<1x10xf32, #[[LAYOUT_7]]> %16 = "ttnn.softmax"(%15) <{dimension = 1 : si32}> : (tensor<1x10xf32, #layout7>) -> tensor<1x10xf32, #layout7> - %17 = "ttnn.composite_to_layout"(%16) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x10>>>}> : (tensor<1x10xf32, #layout7>) -> tensor<1x10xf32, #layout1> + %17 = "ttnn.to_layout"(%16) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x10>>>}> : (tensor<1x10xf32, #layout7>) -> tensor<1x10xf32, #layout1> return %17 : tensor<1x10xf32, #layout1> } } diff --git a/test/ttmlir/Silicon/TTNN/all_l1_interleaved_policy.mlir b/test/ttmlir/Silicon/TTNN/all_l1_interleaved_policy.mlir index c18937f35e..2671523884 100644 --- a/test/ttmlir/Silicon/TTNN/all_l1_interleaved_policy.mlir +++ b/test/ttmlir/Silicon/TTNN/all_l1_interleaved_policy.mlir @@ -1,4 +1,4 @@ -// RUN: ttmlir-opt --ttir-load-system-desc="path=%system_desc_path%" --ttnn-optimizer="memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" --ttnn-decompose-composite-layouts --ttnn-deallocate %s > %t.mlir +// RUN: ttmlir-opt --ttir-load-system-desc="path=%system_desc_path%" --ttnn-optimizer="memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" --ttnn-decompose-layouts --ttnn-deallocate %s > %t.mlir // RUN: FileCheck %s --input-file=%t.mlir // RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn #device = #tt.device (0, d0, d1)>, l1Map = (d0, d1)[s0, s1] -> (0, d0 floordiv s0, d1 floordiv s1, (d0 mod s0) * s1 + d1 mod s1), dramMap = (d0, d1)[s0, s1] -> (0, 0, ((((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) floordiv 8192) mod 12, (((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) floordiv 98304 + (((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) mod 8192), meshShape = , chipIds = [0]> @@ -19,30 +19,30 @@ module attributes {tt.device = #device, tt.system_desc = #system_desc} { // CHECK: #[[LAYOUT_6:.*]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <8x8>, memref<8x12xbf16, #l1_>, interleaved> // CHECK: #[[LAYOUT_7:.*]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <8x8>, memref<8x4xbf16, #l1_>, interleaved> %0 = "ttnn.get_device"() <{mesh_shape = #ttnn}> : () -> !tt.device<#device> - %1 = "ttnn.composite_to_layout"(%arg0, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<64x128xbf16, #layout>, !tt.device<#device>) -> tensor<64x128xbf16, #layout5> - %2 = "ttnn.composite_to_layout"(%arg1, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<128x96xbf16, #layout1>, !tt.device<#device>) -> tensor<128x96xbf16, #layout5> + %1 = "ttnn.to_layout"(%arg0, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<64x128xbf16, #layout>, !tt.device<#device>) -> tensor<64x128xbf16, #layout5> + %2 = "ttnn.to_layout"(%arg1, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<128x96xbf16, #layout1>, !tt.device<#device>) -> tensor<128x96xbf16, #layout5> %3 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<64x96>>>, shape = #ttnn.shape<64x96>}> : (!tt.device<#device>) -> tensor<64x96xbf16, #layout6> // CHECK: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<64x96xbf16, #[[LAYOUT_6]]> %4 = "ttnn.matmul"(%1, %2, %3) : (tensor<64x128xbf16, #layout5>, tensor<128x96xbf16, #layout5>, tensor<64x96xbf16, #layout6>) -> tensor<64x96xbf16, #layout6> - %5 = "ttnn.composite_to_layout"(%arg2, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<64x96xbf16, #layout2>, !tt.device<#device>) -> tensor<64x96xbf16, #layout5> + %5 = "ttnn.to_layout"(%arg2, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<64x96xbf16, #layout2>, !tt.device<#device>) -> tensor<64x96xbf16, #layout5> %6 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<64x96>>>, shape = #ttnn.shape<64x96>}> : (!tt.device<#device>) -> tensor<64x96xbf16, #layout6> // CHECK: %{{.*}} = "ttnn.add"{{.*}} -> tensor<64x96xbf16, #[[LAYOUT_6]]> %7 = "ttnn.add"(%4, %5, %6) <{operandSegmentSizes = array}> : (tensor<64x96xbf16, #layout6>, tensor<64x96xbf16, #layout5>, tensor<64x96xbf16, #layout6>) -> tensor<64x96xbf16, #layout6> %8 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<64x96>>>, shape = #ttnn.shape<64x96>}> : (!tt.device<#device>) -> tensor<64x96xbf16, #layout6> // CHECK: %{{.*}} = "ttnn.relu"{{.*}} -> tensor<64x96xbf16, #[[LAYOUT_6]]> %9 = "ttnn.relu"(%7, %8) <{operandSegmentSizes = array}> : (tensor<64x96xbf16, #layout6>, tensor<64x96xbf16, #layout6>) -> tensor<64x96xbf16, #layout6> - %10 = "ttnn.composite_to_layout"(%arg3, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<96x32xbf16, #layout3>, !tt.device<#device>) -> tensor<96x32xbf16, #layout5> + %10 = "ttnn.to_layout"(%arg3, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<96x32xbf16, #layout3>, !tt.device<#device>) -> tensor<96x32xbf16, #layout5> %11 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<64x32>>>, shape = #ttnn.shape<64x32>}> : (!tt.device<#device>) -> tensor<64x32xbf16, #layout7> // CHECK: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<64x32xbf16, #[[LAYOUT_7]]> %12 = "ttnn.matmul"(%9, %10, %11) : (tensor<64x96xbf16, #layout6>, tensor<96x32xbf16, #layout5>, tensor<64x32xbf16, #layout7>) -> tensor<64x32xbf16, #layout7> - %13 = "ttnn.composite_to_layout"(%arg4, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<64x32xbf16, #layout4>, !tt.device<#device>) -> tensor<64x32xbf16, #layout5> + %13 = "ttnn.to_layout"(%arg4, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<64x32xbf16, #layout4>, !tt.device<#device>) -> tensor<64x32xbf16, #layout5> %14 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<64x32>>>, shape = #ttnn.shape<64x32>}> : (!tt.device<#device>) -> tensor<64x32xbf16, #layout7> // CHECK: %{{.*}} = "ttnn.add"{{.*}} -> tensor<64x32xbf16, #[[LAYOUT_7]]> %15 = "ttnn.add"(%12, %13, %14) <{operandSegmentSizes = array}> : (tensor<64x32xbf16, #layout7>, tensor<64x32xbf16, #layout5>, tensor<64x32xbf16, #layout7>) -> tensor<64x32xbf16, #layout7> %16 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<64x32>>>, shape = #ttnn.shape<64x32>}> : (!tt.device<#device>) -> tensor<64x32xbf16, #layout7> // CHECK: %{{.*}} = "ttnn.relu"{{.*}} -> tensor<64x32xbf16, #[[LAYOUT_7]]> %17 = "ttnn.relu"(%15, %16) <{operandSegmentSizes = array}> : (tensor<64x32xbf16, #layout7>, tensor<64x32xbf16, #layout7>) -> tensor<64x32xbf16, #layout7> - %18 = "ttnn.composite_to_layout"(%17) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<64x32>>>}> : (tensor<64x32xbf16, #layout7>) -> tensor<64x32xbf16, #layout4> + %18 = "ttnn.to_layout"(%17) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<64x32>>>}> : (tensor<64x32xbf16, #layout7>) -> tensor<64x32xbf16, #layout4> return %18 : tensor<64x32xbf16, #layout4> } } diff --git a/test/ttmlir/Silicon/TTNN/mnist_l1_interleaved.mlir b/test/ttmlir/Silicon/TTNN/mnist_l1_interleaved.mlir index 2b74355ac4..616c23f610 100644 --- a/test/ttmlir/Silicon/TTNN/mnist_l1_interleaved.mlir +++ b/test/ttmlir/Silicon/TTNN/mnist_l1_interleaved.mlir @@ -1,4 +1,4 @@ -// RUN: ttmlir-opt --ttir-load-system-desc="path=%system_desc_path%" --ttnn-optimizer="memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" --ttnn-decompose-composite-layouts --ttnn-deallocate %s > %t.mlir +// RUN: ttmlir-opt --ttir-load-system-desc="path=%system_desc_path%" --ttnn-optimizer="memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" --ttnn-decompose-layouts --ttnn-deallocate %s > %t.mlir // RUN: FileCheck %s --input-file=%t.mlir // RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn #device = #tt.device (0, d0, d1)>, l1Map = (d0, d1)[s0, s1] -> (0, d0 floordiv s0, d1 floordiv s1, (d0 mod s0) * s1 + d1 mod s1), dramMap = (d0, d1)[s0, s1] -> (0, 0, ((((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) floordiv 8192) mod 12, (((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) floordiv 98304 + (((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) mod 8192), meshShape = , chipIds = [0]> @@ -19,29 +19,29 @@ module @"tt-forge-graph" attributes {tt.device = #device, tt.system_desc = #syst // CHECK: #[[LAYOUT_6:.*]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <8x8>, memref<1x32xf32, #l1_>, interleaved> // CHECK: #[[LAYOUT_7:.*]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <8x8>, memref<1x2xf32, #l1_>, interleaved> %0 = "ttnn.get_device"() <{mesh_shape = #ttnn}> : () -> !tt.device<#device> - %1 = "ttnn.composite_to_layout"(%arg0, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<1x784xf32, #layout>, !tt.device<#device>) -> tensor<1x784xf32, #layout5> - %2 = "ttnn.composite_to_layout"(%arg4, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<784x256xf32, #layout4>, !tt.device<#device>) -> tensor<784x256xf32, #layout5> + %1 = "ttnn.to_layout"(%arg0, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<1x784xf32, #layout>, !tt.device<#device>) -> tensor<1x784xf32, #layout5> + %2 = "ttnn.to_layout"(%arg4, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<784x256xf32, #layout4>, !tt.device<#device>) -> tensor<784x256xf32, #layout5> %3 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x256>>>, shape = #ttnn.shape<1x256>}> : (!tt.device<#device>) -> tensor<1x256xf32, #layout6> // CHECK: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<1x256xf32, #[[LAYOUT_6]]> %4 = "ttnn.matmul"(%1, %2, %3) : (tensor<1x784xf32, #layout5>, tensor<784x256xf32, #layout5>, tensor<1x256xf32, #layout6>) -> tensor<1x256xf32, #layout6> - %5 = "ttnn.composite_to_layout"(%arg3, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<1x256xf32, #layout3>, !tt.device<#device>) -> tensor<1x256xf32, #layout5> + %5 = "ttnn.to_layout"(%arg3, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<1x256xf32, #layout3>, !tt.device<#device>) -> tensor<1x256xf32, #layout5> %6 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x256>>>, shape = #ttnn.shape<1x256>}> : (!tt.device<#device>) -> tensor<1x256xf32, #layout6> // CHECK: %{{.*}} = "ttnn.add"{{.*}} -> tensor<1x256xf32, #[[LAYOUT_6]]> %7 = "ttnn.add"(%4, %5, %6) <{operandSegmentSizes = array}> : (tensor<1x256xf32, #layout6>, tensor<1x256xf32, #layout5>, tensor<1x256xf32, #layout6>) -> tensor<1x256xf32, #layout6> %8 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x256>>>, shape = #ttnn.shape<1x256>}> : (!tt.device<#device>) -> tensor<1x256xf32, #layout6> // CHECK: %{{.*}} = "ttnn.relu"{{.*}} -> tensor<1x256xf32, #[[LAYOUT_6]]> %9 = "ttnn.relu"(%7, %8) <{operandSegmentSizes = array}> : (tensor<1x256xf32, #layout6>, tensor<1x256xf32, #layout6>) -> tensor<1x256xf32, #layout6> - %10 = "ttnn.composite_to_layout"(%arg2, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<256x10xf32, #layout2>, !tt.device<#device>) -> tensor<256x10xf32, #layout5> + %10 = "ttnn.to_layout"(%arg2, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<256x10xf32, #layout2>, !tt.device<#device>) -> tensor<256x10xf32, #layout5> %11 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x10>>>, shape = #ttnn.shape<1x10>}> : (!tt.device<#device>) -> tensor<1x10xf32, #layout7> // CHECK: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<1x10xf32, #[[LAYOUT_7]]> %12 = "ttnn.matmul"(%9, %10, %11) : (tensor<1x256xf32, #layout6>, tensor<256x10xf32, #layout5>, tensor<1x10xf32, #layout7>) -> tensor<1x10xf32, #layout7> - %13 = "ttnn.composite_to_layout"(%arg1, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<1x10xf32, #layout1>, !tt.device<#device>) -> tensor<1x10xf32, #layout5> + %13 = "ttnn.to_layout"(%arg1, %0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<1x10xf32, #layout1>, !tt.device<#device>) -> tensor<1x10xf32, #layout5> %14 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x10>>>, shape = #ttnn.shape<1x10>}> : (!tt.device<#device>) -> tensor<1x10xf32, #layout7> // CHECK: %{{.*}} = "ttnn.add"{{.*}} -> tensor<1x10xf32, #[[LAYOUT_7]]> %15 = "ttnn.add"(%12, %13, %14) <{operandSegmentSizes = array}> : (tensor<1x10xf32, #layout7>, tensor<1x10xf32, #layout5>, tensor<1x10xf32, #layout7>) -> tensor<1x10xf32, #layout7> // CHECK: %{{.*}} = "ttnn.softmax"{{.*}} -> tensor<1x10xf32, #[[LAYOUT_7]]> %16 = "ttnn.softmax"(%15) <{dimension = 1 : si32}> : (tensor<1x10xf32, #layout7>) -> tensor<1x10xf32, #layout7> - %17 = "ttnn.composite_to_layout"(%16) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x10>>>}> : (tensor<1x10xf32, #layout7>) -> tensor<1x10xf32, #layout1> + %17 = "ttnn.to_layout"(%16) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<1x10>>>}> : (tensor<1x10xf32, #layout7>) -> tensor<1x10xf32, #layout1> return %17 : tensor<1x10xf32, #layout1> } }